diff options
Diffstat (limited to 'include/haproxy')
293 files changed, 49302 insertions, 0 deletions
diff --git a/include/haproxy/acl-t.h b/include/haproxy/acl-t.h new file mode 100644 index 0000000..34b7e40 --- /dev/null +++ b/include/haproxy/acl-t.h @@ -0,0 +1,160 @@ +/* + * include/haproxy/acl-t.h + * This file provides structures and types for ACLs. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_ACL_T_H +#define _HAPROXY_ACL_T_H + +#include <haproxy/pattern-t.h> +#include <haproxy/sample-t.h> + +/* ACL test result. + * + * We're using a 3-state matching system : + * - PASS : at least one pattern already matches + * - MISS : some data is missing to decide if some rules may finally match. + * - FAIL : no pattern may ever match + * + * We assign values 0, 1 and 3 to FAIL, MISS and PASS respectively, so that we + * can make use of standard arithmetic for the truth tables below : + * + * x | !x x&y | F(0) | M(1) | P(3) x|y | F(0) | M(1) | P(3) + * ------+----- -----+------+------+----- -----+------+------+----- + * F(0) | P(3) F(0)| F(0) | F(0) | F(0) F(0)| F(0) | M(1) | P(3) + * M(1) | M(1) M(1)| F(0) | M(1) | M(1) M(1)| M(1) | M(1) | P(3) + * P(3) | F(0) P(3)| F(0) | M(1) | P(3) P(3)| P(3) | P(3) | P(3) + * + * neg(x) = (3 >> x) and(x,y) = (x & y) or(x,y) = (x | y) + * + * For efficiency, the ACL return flags are directly mapped from the pattern + * match flags. See include/pattern.h for existing values. + */ +enum acl_test_res { + ACL_TEST_FAIL = 0, /* test failed */ + ACL_TEST_MISS = 1, /* test may pass with more info */ + ACL_TEST_PASS = 3, /* test passed */ +}; + +/* Condition polarity. It makes it easier for any option to choose between + * IF/UNLESS if it can store that information within the condition itself. + * Those should be interpreted as "IF/UNLESS result == PASS". + */ +enum acl_cond_pol { + ACL_COND_NONE, /* no polarity set yet */ + ACL_COND_IF, /* positive condition (after 'if') */ + ACL_COND_UNLESS, /* negative condition (after 'unless') */ +}; + +/* + * ACL keyword: Associates keywords with parsers, methods to retrieve the value and testers. + */ +/* + * NOTE: + * The 'parse' function is called to parse words in the configuration. It must + * return the number of valid words read. 0 = error. The 'opaque' argument may + * be used by functions which need to maintain a context between consecutive + * values. It is initialized to zero before the first call, and passed along + * successive calls. + */ + +struct acl_expr; +struct acl_keyword { + const char *kw; + char *fetch_kw; + int match_type; /* Contain PAT_MATCH_* */ + int (*parse)(const char *text, struct pattern *pattern, int flags, char **err); + int (*index)(struct pattern_expr *expr, struct pattern *pattern, char **err); + void (*delete)(struct pat_ref *, struct pat_ref_elt *); + void (*prune)(struct pattern_expr *expr); + struct pattern *(*match)(struct sample *smp, struct pattern_expr *expr, int fill); + /* must be after the config params */ + struct sample_fetch *smp; /* the sample fetch we depend on */ +}; + +/* + * A keyword list. It is a NULL-terminated array of keywords. It embeds a + * struct list in order to be linked to other lists, allowing it to easily + * be declared where it is needed, and linked without duplicating data nor + * allocating memory. + */ +struct acl_kw_list { + struct list list; + struct acl_keyword kw[VAR_ARRAY]; +}; + +/* + * Description of an ACL expression. + * The expression is part of a list. It contains pointers to the keyword, the + * sample fetch descriptor which defaults to the keyword's, and the associated + * pattern matching. The structure is organized so that the hot parts are + * grouped together in order to optimize caching. + */ +struct acl_expr { + struct sample_expr *smp; /* the sample expression we depend on */ + struct pattern_head pat; /* the pattern matching expression */ + struct list list; /* chaining */ + const char *kw; /* points to the ACL kw's name or fetch's name (must not free) */ +}; + +/* The acl will be linked to from the proxy where it is declared */ +struct acl { + struct list list; /* chaining */ + char *name; /* acl name */ + struct list expr; /* list of acl_exprs */ + unsigned int use; /* or'ed bit mask of all acl_expr's SMP_USE_* */ + unsigned int val; /* or'ed bit mask of all acl_expr's SMP_VAL_* */ +}; + +/* the condition will be linked to from an action in a proxy */ +struct acl_term { + struct list list; /* chaining */ + struct acl *acl; /* acl pointed to by this term */ + int neg; /* 1 if the ACL result must be negated */ +}; + +struct acl_term_suite { + struct list list; /* chaining of term suites */ + struct list terms; /* list of acl_terms */ +}; + +struct acl_cond { + struct list list; /* Some specific tests may use multiple conditions */ + struct list suites; /* list of acl_term_suites */ + enum acl_cond_pol pol; /* polarity: ACL_COND_IF / ACL_COND_UNLESS */ + unsigned int use; /* or'ed bit mask of all suites's SMP_USE_* */ + unsigned int val; /* or'ed bit mask of all suites's SMP_VAL_* */ + const char *file; /* config file where the condition is declared */ + int line; /* line in the config file where the condition is declared */ +}; + +struct acl_sample { + struct acl_cond cond; + struct acl_term_suite suite; + struct acl_term terms[]; +}; + +#endif /* _HAPROXY_ACL_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/acl.h b/include/haproxy/acl.h new file mode 100644 index 0000000..38b1739 --- /dev/null +++ b/include/haproxy/acl.h @@ -0,0 +1,157 @@ +/* + * include/haproxy/acl.h + * This file provides interface definitions for ACL manipulation. + * + * Copyright (C) 2000-2013 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_ACL_H +#define _HAPROXY_ACL_H + +#include <haproxy/acl-t.h> +#include <haproxy/api.h> +#include <haproxy/arg-t.h> + +struct stream; + +/* + * FIXME: we need destructor functions too ! + */ + +/* Negate an acl result. This turns (ACL_MATCH_FAIL, ACL_MATCH_MISS, + * ACL_MATCH_PASS) into (ACL_MATCH_PASS, ACL_MATCH_MISS, ACL_MATCH_FAIL). + */ +static inline enum acl_test_res acl_neg(enum acl_test_res res) +{ + return (3 >> res); +} + +/* Convert an acl result to a boolean. Only ACL_MATCH_PASS returns 1. */ +static inline int acl_pass(enum acl_test_res res) +{ + return (res >> 1); +} + +/* Return a pointer to the ACL <name> within the list starting at <head>, or + * NULL if not found. + */ +struct acl *find_acl_by_name(const char *name, struct list *head); + +/* Return a pointer to the ACL keyword <kw> within the list starting at <head>, + * or NULL if not found. Note that if <kw> contains an opening parenthesis, + * only the left part of it is checked. + */ +struct acl_keyword *find_acl_kw(const char *kw); + +/* Parse an ACL expression starting at <args>[0], and return it. + * Right now, the only accepted syntax is : + * <subject> [<value>...] + */ +struct acl_expr *parse_acl_expr(const char **args, char **err, struct arg_list *al, const char *file, int line); + +/* Purge everything in the acl <acl>, then return <acl>. */ +struct acl *prune_acl(struct acl *acl); + +/* Parse an ACL with the name starting at <args>[0], and with a list of already + * known ACLs in <acl>. If the ACL was not in the list, it will be added. + * A pointer to that ACL is returned. + * + * args syntax: <aclname> <acl_expr> + */ +struct acl *parse_acl(const char **args, struct list *known_acl, char **err, struct arg_list *al, const char *file, int line); + +/* Parse an ACL condition starting at <args>[0], relying on a list of already + * known ACLs passed in <known_acl>. The new condition is returned (or NULL in + * case of low memory). Supports multiple conditions separated by "or". + */ +struct acl_cond *parse_acl_cond(const char **args, struct list *known_acl, + enum acl_cond_pol pol, char **err, struct arg_list *al, + const char *file, int line); + +/* Builds an ACL condition starting at the if/unless keyword. The complete + * condition is returned. NULL is returned in case of error or if the first + * word is neither "if" nor "unless". It automatically sets the file name and + * the line number in the condition for better error reporting, and sets the + * HTTP initialization requirements in the proxy. If <err> is not NULL, it will + * be set to an error message upon errors, that the caller will have to free. + */ +struct acl_cond *build_acl_cond(const char *file, int line, struct list *known_acl, + struct proxy *px, const char **args, char **err); + +/* Execute condition <cond> and return either ACL_TEST_FAIL, ACL_TEST_MISS or + * ACL_TEST_PASS depending on the test results. ACL_TEST_MISS may only be + * returned if <opt> does not contain SMP_OPT_FINAL, indicating that incomplete + * data is being examined. The function automatically sets SMP_OPT_ITERATE. This + * function only computes the condition, it does not apply the polarity required + * by IF/UNLESS, it's up to the caller to do this. + */ +enum acl_test_res acl_exec_cond(struct acl_cond *cond, struct proxy *px, struct session *sess, struct stream *strm, unsigned int opt); + +/* Returns a pointer to the first ACL conflicting with usage at place <where> + * which is one of the SMP_VAL_* bits indicating a check place, or NULL if + * no conflict is found. Only full conflicts are detected (ACL is not usable). + * Use the next function to check for useless keywords. + */ +const struct acl *acl_cond_conflicts(const struct acl_cond *cond, unsigned int where); + +/* Returns a pointer to the first ACL and its first keyword to conflict with + * usage at place <where> which is one of the SMP_VAL_* bits indicating a check + * place. Returns true if a conflict is found, with <acl> and <kw> set (if non + * null), or false if not conflict is found. The first useless keyword is + * returned. + */ +int acl_cond_kw_conflicts(const struct acl_cond *cond, unsigned int where, struct acl const **acl, char const **kw); + +/* + * Find targets for userlist and groups in acl. Function returns the number + * of errors or OK if everything is fine. + */ +int acl_find_targets(struct proxy *p); + +/* Return a pointer to the ACL <name> within the list starting at <head>, or + * NULL if not found. + */ +struct acl *find_acl_by_name(const char *name, struct list *head); + +/* + * Registers the ACL keyword list <kwl> as a list of valid keywords for next + * parsing sessions. + */ +void acl_register_keywords(struct acl_kw_list *kwl); + +/* + * Unregisters the ACL keyword list <kwl> from the list of valid keywords. + */ +void acl_unregister_keywords(struct acl_kw_list *kwl); + +/* initializes ACLs by resolving the sample fetch names they rely upon. + * Returns 0 on success, otherwise an error. + */ +int init_acl(void); + +void acl_dump_kwd(void); + +void free_acl_cond(struct acl_cond *cond); + +#endif /* _HAPROXY_ACL_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/action-t.h b/include/haproxy/action-t.h new file mode 100644 index 0000000..f77bdce --- /dev/null +++ b/include/haproxy/action-t.h @@ -0,0 +1,217 @@ +/* + * include/haproxy/action-t.h + * This file contains actions definitions. + * + * Copyright (C) 2000-2010 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_ACTION_T_H +#define _HAPROXY_ACTION_T_H + +#include <haproxy/applet-t.h> +#include <haproxy/stick_table-t.h> +#include <haproxy/vars-t.h> + +struct session; +struct stream; +struct proxy; + +enum act_from { + ACT_F_TCP_REQ_CON, /* tcp-request connection */ + ACT_F_TCP_REQ_SES, /* tcp-request session */ + ACT_F_TCP_REQ_CNT, /* tcp-request content */ + ACT_F_TCP_RES_CNT, /* tcp-response content */ + ACT_F_HTTP_REQ, /* http-request */ + ACT_F_HTTP_RES, /* http-response */ + ACT_F_TCP_CHK, /* tcp-check. */ + ACT_F_CFG_PARSER, /* config parser */ + ACT_F_CLI_PARSER, /* command line parser */ +}; + +enum act_return { + ACT_RET_CONT, /* continue processing. */ + ACT_RET_STOP, /* stop processing. */ + ACT_RET_YIELD, /* call me again. */ + ACT_RET_ERR, /* internal processing error. */ + ACT_RET_DONE, /* processing done, stop processing */ + ACT_RET_DENY, /* deny, must be handled by the caller */ + ACT_RET_ABRT, /* abort, handled by action itsleft. */ + ACT_RET_INV, /* invalid request/response */ +}; + +enum act_parse_ret { + ACT_RET_PRS_OK, /* continue processing. */ + ACT_RET_PRS_ERR, /* abort processing. */ +}; + +/* Option flags passed to custom actions */ +enum act_opt { + ACT_OPT_NONE = 0x00000000, /* no flag */ + ACT_OPT_FINAL = 0x00000001, /* last call, cannot yield */ + ACT_OPT_FIRST = 0x00000002, /* first call for this action */ +}; + +/* Flags used to describe the action. */ +enum act_flag { + ACT_FLAG_FINAL = 1 << 0, /* the action stops the rules evaluation when executed */ +}; + + +/* known actions to be used without any action function pointer. This enum is + * typically used in a switch case, if and only if .action_ptr is undefined. So + * if an action function is defined for one of following action types, the + * function have the priority over the switch. + */ +enum act_name { + ACT_CUSTOM = 0, + + /* common action */ + ACT_ACTION_ALLOW, + ACT_ACTION_DENY, + + /* common http actions .*/ + ACT_HTTP_REDIR, + + /* http request actions. */ + ACT_HTTP_REQ_TARPIT, + + /* tcp actions */ + ACT_TCP_EXPECT_PX, + ACT_TCP_EXPECT_CIP, + ACT_TCP_CLOSE, /* close at the sender's */ +}; + +/* Timeout name valid for a set-timeout rule */ +enum act_timeout_name { + ACT_TIMEOUT_SERVER, + ACT_TIMEOUT_TUNNEL, + ACT_TIMEOUT_CLIENT, +}; + +enum act_normalize_uri { + ACT_NORMALIZE_URI_PATH_MERGE_SLASHES, + ACT_NORMALIZE_URI_PATH_STRIP_DOT, + ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT, + ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT_FULL, + ACT_NORMALIZE_URI_QUERY_SORT_BY_NAME, + ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE, + ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT, + ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED, + ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT, + ACT_NORMALIZE_URI_FRAGMENT_STRIP, + ACT_NORMALIZE_URI_FRAGMENT_ENCODE, +}; + +/* NOTE: if <.action_ptr> is defined, the referenced function will always be + * called regardless the action type. */ +struct act_rule { + struct list list; + struct acl_cond *cond; /* acl condition to meet */ + unsigned int action; /* ACT_* or any meaningful value if action_ptr is defined */ + unsigned int flags; /* ACT_FLAG_* */ + enum act_from from; /* ACT_F_* */ + enum act_return (*action_ptr)(struct act_rule *rule, struct proxy *px, /* ptr to custom action */ + struct session *sess, struct stream *s, int opts); + int (*check_ptr)(struct act_rule *rule, struct proxy *px, char **err); /* ptr to check function */ + void (*release_ptr)(struct act_rule *rule); /* ptr to release function */ + const struct action_kw *kw; + struct applet applet; /* used for the applet registration. */ + union { + struct { + struct sample_expr *expr; + char *varname; + char *resolvers_id; + struct resolvers *resolvers; + struct resolv_options *opts; + } resolv; /* resolving */ + struct { + int i; /* integer param (status, nice, loglevel, ..) */ + struct ist str; /* string param (reason, header name, ...) */ + struct list fmt; /* log-format compatible expression */ + struct my_regex *re; /* used by replace-header/value/uri/path */ + } http; /* args used by some HTTP rules */ + struct http_reply *http_reply; /* HTTP response to be used by return/deny/tarpit rules */ + struct redirect_rule *redir; /* redirect rule or "http-request redirect" */ + struct { + char *ref; /* MAP or ACL file name to update */ + struct list key; /* pattern to retrieve MAP or ACL key */ + struct list value; /* pattern to retrieve MAP value */ + } map; + struct sample_expr *expr; + struct { + struct sample_expr *expr; /* expression used as the key */ + struct cap_hdr *hdr; /* the capture storage */ + } cap; + struct { + struct sample_expr *expr; + int idx; + } capid; + struct { + int value; /* plain timeout value in ms if no expr is used */ + enum act_timeout_name type; /* timeout type */ + struct sample_expr *expr; /* timeout value as an expression */ + } timeout; + struct hlua_rule *hlua_rule; + struct { + struct list fmt; /* log-format compatible expression */ + struct sample_expr *expr; + uint64_t name_hash; + enum vars_scope scope; + uint conditions; /* Bitfield of the conditions passed to this set-var call */ + } vars; + struct { + int sc; + unsigned int idx; + long long int value; + struct sample_expr *expr; + } gpc; + struct { + int sc; + unsigned int idx; + long long int value; + struct sample_expr *expr; + } gpt; + struct track_ctr_prm trk_ctr; + struct { + char *srvname; /* server name from config parsing. */ + struct server *srv; /* target server to attach the connection */ + struct sample_expr *name; /* used to differentiate idle connections */ + } attach_srv; /* 'attach-srv' rule */ + struct { + void *p[4]; + } act; /* generic pointers to be used by custom actions */ + } arg; /* arguments used by some actions */ + struct { + char *file; /* file name where the rule appears (or NULL) */ + int line; /* line number where the rule appears */ + } conf; +}; + +struct action_kw { + const char *kw; + enum act_parse_ret (*parse)(const char **args, int *cur_arg, struct proxy *px, + struct act_rule *rule, char **err); + int flags; + void *private; +}; + +struct action_kw_list { + struct list list; + struct action_kw kw[VAR_ARRAY]; +}; + +#endif /* _HAPROXY_ACTION_T_H */ diff --git a/include/haproxy/action.h b/include/haproxy/action.h new file mode 100644 index 0000000..dba1408 --- /dev/null +++ b/include/haproxy/action.h @@ -0,0 +1,124 @@ +/* + * include/haproxy/action.h + * This file contains actions prototypes. + * + * Copyright (C) 2000-2010 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_ACTION_H +#define _HAPROXY_ACTION_H + +#include <stdio.h> +#include <haproxy/action-t.h> +#include <haproxy/cfgparse.h> +#include <haproxy/list.h> +#include <haproxy/sample.h> + +struct resolv_requester; +struct dns_counters; + +int act_resolution_cb(struct resolv_requester *requester, struct dns_counters *counters); +int act_resolution_error_cb(struct resolv_requester *requester, int error_code); +const char *action_suggest(const char *word, const struct list *keywords, const char **extra); +void free_act_rule(struct act_rule *rule); + +static inline struct action_kw *action_lookup(struct list *keywords, const char *kw) +{ + struct action_kw_list *kw_list; + struct action_kw *best = NULL; + int len, bestlen = 0; + int i; + + if (LIST_ISEMPTY(keywords)) + return NULL; + + list_for_each_entry(kw_list, keywords, list) { + for (i = 0; kw_list->kw[i].kw != NULL; i++) { + if ((kw_list->kw[i].flags & KWF_MATCH_PREFIX) && + (len = strlen(kw_list->kw[i].kw)) > bestlen && + strncmp(kw, kw_list->kw[i].kw, len) == 0) { + if (len > bestlen) { + bestlen = len; + best = &kw_list->kw[i]; + } + } + if (strcmp(kw, kw_list->kw[i].kw) == 0) + return &kw_list->kw[i]; + } + } + return best; +} + +static inline void action_build_list(struct list *keywords, + struct buffer *chk) +{ + struct action_kw_list *kw_list; + int i; + char *p; + char *end; + int l; + + p = chk->area; + end = p + chk->size - 1; + list_for_each_entry(kw_list, keywords, list) { + for (i = 0; kw_list->kw[i].kw != NULL; i++) { + l = snprintf(p, end - p, "'%s%s', ", kw_list->kw[i].kw, (kw_list->kw[i].flags & KWF_MATCH_PREFIX) ? "(*)" : ""); + if (l > end - p) + continue; + p += l; + } + } + if (p > chk->area) + *(p-2) = '\0'; + else + *p = '\0'; +} + +/* Check an action ruleset validity. It returns the number of error encountered + * and err_code is updated if a warning is emitted. + */ +int check_action_rules(struct list *rules, struct proxy *px, int *err_code); + +/* Find and check the target table used by an action track-sc*. This + * function should be called during the configuration validity check. + * + * The function returns 1 in success case, otherwise, it returns 0 and err is + * filled. + */ +int check_trk_action(struct act_rule *rule, struct proxy *px, char **err); + +/* check a capture rule. This function should be called during the configuration + * validity check. + * + * The function returns 1 in success case, otherwise, it returns 0 and err is + * filled. + */ +int check_capture(struct act_rule *rule, struct proxy *px, char **err); + +int cfg_parse_rule_set_timeout(const char **args, int idx, struct act_rule *rule, + struct proxy *px, char **err); + +static inline void release_timeout_action(struct act_rule *rule) +{ + release_sample_expr(rule->arg.timeout.expr); +} + +struct act_rule *new_act_rule(enum act_from from, const char *file, int linenum); +void free_act_rules(struct list *rules); +void dump_act_rules(const struct list *rules, const char *pfx); + +#endif /* _HAPROXY_ACTION_H */ diff --git a/include/haproxy/activity-t.h b/include/haproxy/activity-t.h new file mode 100644 index 0000000..9faeecd --- /dev/null +++ b/include/haproxy/activity-t.h @@ -0,0 +1,144 @@ +/* + * include/haproxy/activity-t.h + * This file contains structure declarations for activity measurements. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_ACTIVITY_T_H +#define _HAPROXY_ACTIVITY_T_H + +#include <haproxy/api-t.h> +#include <haproxy/freq_ctr-t.h> + +/* bit fields for the "profiling" global variable */ +#define HA_PROF_TASKS_OFF 0x00000000 /* per-task CPU profiling forced disabled */ +#define HA_PROF_TASKS_AOFF 0x00000001 /* per-task CPU profiling off (automatic) */ +#define HA_PROF_TASKS_AON 0x00000002 /* per-task CPU profiling on (automatic) */ +#define HA_PROF_TASKS_ON 0x00000003 /* per-task CPU profiling forced enabled */ +#define HA_PROF_TASKS_MASK 0x00000003 /* per-task CPU profiling mask */ + +#define HA_PROF_MEMORY 0x00000004 /* memory profiling */ + + +#ifdef USE_MEMORY_PROFILING +/* Elements used by memory profiling. This determines the number of buckets to + * store stats. + */ +#define MEMPROF_HASH_BITS 10 +#define MEMPROF_HASH_BUCKETS (1U << MEMPROF_HASH_BITS) + +enum memprof_method { + MEMPROF_METH_UNKNOWN = 0, + MEMPROF_METH_MALLOC, + MEMPROF_METH_CALLOC, + MEMPROF_METH_REALLOC, + MEMPROF_METH_FREE, + MEMPROF_METH_P_ALLOC, // pool_alloc() + MEMPROF_METH_P_FREE, // pool_free() + MEMPROF_METH_METHODS /* count, must be last */ +}; + +/* stats: + * - malloc increases alloc + * - free increases free (if non null) + * - realloc increases either depending on the size change. + * when the real size is known (malloc_usable_size()), it's used in free_tot + * and alloc_tot, otherwise the requested size is reported in alloc_tot and + * zero in free_tot. + */ +struct memprof_stats { + const void *caller; + enum memprof_method method; + /* 4-7 bytes hole here */ + unsigned long long alloc_calls; + unsigned long long free_calls; + unsigned long long alloc_tot; + unsigned long long free_tot; + void *info; // for pools, ptr to the pool + void *pad; // pad to 64 +}; +#endif + +/* per-thread activity reports. It's important that it's aligned on cache lines + * because some elements will be updated very often. Most counters are OK on + * 32-bit since this will be used during debugging sessions for troubleshooting + * in iterative mode. + */ +struct activity { + unsigned int loops; // complete loops in run_poll_loop() + unsigned int wake_tasks; // active tasks prevented poll() from sleeping + unsigned int wake_signal; // pending signal prevented poll() from sleeping + unsigned int poll_io; // number of times poll() reported I/O events + unsigned int poll_exp; // number of times poll() sees an expired timeout (includes wake_*) + unsigned int poll_drop_fd; // poller dropped a dead FD from the update list + unsigned int poll_skip_fd; // poller skipped another thread's FD + unsigned int conn_dead; // conn_fd_handler woke up on an FD indicating a dead connection + unsigned int stream_calls; // calls to process_stream() + unsigned int ctxsw; // total number of context switches + unsigned int tasksw; // total number of task switches + unsigned int empty_rq; // calls to process_runnable_tasks() with nothing for the thread + unsigned int long_rq; // process_runnable_tasks() left with tasks in the run queue + unsigned int cpust_total; // sum of half-ms stolen per thread + unsigned int fd_takeover; // number of times this thread stole another one's FD + unsigned int check_adopted;// number of times a check was migrated to this thread + ALWAYS_ALIGN(64); + + struct freq_ctr cpust_1s; // avg amount of half-ms stolen over last second + struct freq_ctr cpust_15s; // avg amount of half-ms stolen over last 15s + unsigned int avg_loop_us; // average run time per loop over last 1024 runs + unsigned int accepted; // accepted incoming connections + unsigned int accq_pushed; // accept queue connections pushed + unsigned int accq_full; // accept queue connection not pushed because full + unsigned int pool_fail; // failed a pool allocation + unsigned int buf_wait; // waited on a buffer allocation + unsigned int check_started;// number of times a check was started on this thread +#if defined(DEBUG_DEV) + /* keep these ones at the end */ + unsigned int ctr0; // general purposee debug counter + unsigned int ctr1; // general purposee debug counter + unsigned int ctr2; // general purposee debug counter +#endif + char __pad[0]; // unused except to check remaining room + char __end[0] __attribute__((aligned(64))); // align size to 64. +}; + +/* 256 entries for callers * callees should be highly sufficient (~45 seen usually) */ +#define SCHED_ACT_HASH_BITS 8 +#define SCHED_ACT_HASH_BUCKETS (1U << SCHED_ACT_HASH_BITS) + +/* global profiling stats from the scheduler: each entry corresponds to a + * task or tasklet ->process function pointer, with a number of calls and + * a total time. Each entry is unique, except entry 0 which is for colliding + * hashes (i.e. others). All of these must be accessed atomically. + */ +struct sched_activity { + const void *func; + const struct ha_caller *caller; + uint64_t calls; + uint64_t cpu_time; + uint64_t lat_time; +}; + +#endif /* _HAPROXY_ACTIVITY_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/activity.h b/include/haproxy/activity.h new file mode 100644 index 0000000..dbc8ec3 --- /dev/null +++ b/include/haproxy/activity.h @@ -0,0 +1,47 @@ +/* + * include/haproxy/activity.h + * This file contains macros and inline functions for activity measurements. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_ACTIVITY_H +#define _HAPROXY_ACTIVITY_H + +#include <haproxy/activity-t.h> +#include <haproxy/api.h> + +extern unsigned int profiling; +extern struct activity activity[MAX_THREADS]; +extern struct sched_activity sched_activity[SCHED_ACT_HASH_BUCKETS]; + +void report_stolen_time(uint64_t stolen); +void activity_count_runtime(uint32_t run_time); +struct sched_activity *sched_activity_entry(struct sched_activity *array, const void *func, const void *caller); + +#ifdef USE_MEMORY_PROFILING +struct memprof_stats *memprof_get_bin(const void *ra, enum memprof_method meth); +#endif + +#endif /* _HAPROXY_ACTIVITY_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/api-t.h b/include/haproxy/api-t.h new file mode 100644 index 0000000..edb33a8 --- /dev/null +++ b/include/haproxy/api-t.h @@ -0,0 +1,40 @@ +/* + * include/haproxy/api-t.h + * This provides definitions for all common types or type modifiers used + * everywhere in the code, and suitable for use in structure fields. + * + * Copyright (C) 2020 Willy Tarreau - w@1wt.eu + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _HAPROXY_TYPES_H +#define _HAPROXY_TYPES_H + +#include <inttypes.h> +#include <stddef.h> + +#include <haproxy/compat.h> +#include <haproxy/compiler.h> +#include <haproxy/defaults.h> +#include <haproxy/list-t.h> + +#endif /* _HAPROXY_TYPES_H */ diff --git a/include/haproxy/api.h b/include/haproxy/api.h new file mode 100644 index 0000000..a0bb6a8 --- /dev/null +++ b/include/haproxy/api.h @@ -0,0 +1,38 @@ +/* + * include/haproxy/api.h + * + * Include wrapper that assembles all includes required by every haproxy file. + * Please do not add direct definitions into this file. + * + * Copyright (C) 2020 Willy Tarreau - w@1wt.eu + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _HAPROXY_BASE_H +#define _HAPROXY_BASE_H + +#include <haproxy/api-t.h> +#include <haproxy/atomic.h> +#include <haproxy/bug.h> +#include <haproxy/init.h> + +#endif diff --git a/include/haproxy/applet-t.h b/include/haproxy/applet-t.h new file mode 100644 index 0000000..bd96403 --- /dev/null +++ b/include/haproxy/applet-t.h @@ -0,0 +1,101 @@ +/* + * include/haproxy/applet-t.h + * This file describes the applet struct and associated constants. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_APPLET_T_H +#define _HAPROXY_APPLET_T_H + +#include <haproxy/api-t.h> +#include <haproxy/buf-t.h> +#include <haproxy/dynbuf-t.h> +#include <haproxy/freq_ctr-t.h> +#include <haproxy/obj_type-t.h> +#include <haproxy/xref-t.h> + +/* flags for appctx->state */ +#define APPLET_WANT_DIE 0x01 /* applet was running and requested to die */ + +/* Room for per-command context (mostly CLI commands but not only) */ +#define APPLET_MAX_SVCCTX 88 + +struct appctx; +struct proxy; +struct stconn; +struct sedesc; +struct session; + +/* Applet descriptor */ +struct applet { + enum obj_type obj_type; /* object type = OBJ_TYPE_APPLET */ + /* 3 unused bytes here */ + char *name; /* applet's name to report in logs */ + int (*init)(struct appctx *); /* callback to init resources, may be NULL. + expect 0 if ok, -1 if an error occurs. */ + void (*fct)(struct appctx *); /* internal I/O handler, may never be NULL */ + void (*release)(struct appctx *); /* callback to release resources, may be NULL */ + unsigned int timeout; /* execution timeout. */ +}; + +/* Context of a running applet. */ +struct appctx { + enum obj_type obj_type; /* OBJ_TYPE_APPCTX */ + /* 3 unused bytes here */ + unsigned short state; /* Internal appctx state */ + unsigned int st0; /* CLI state for stats, session state for peers */ + unsigned int st1; /* prompt/payload (bitwise OR of APPCTX_CLI_ST1_*) for stats, session error for peers */ + struct buffer *chunk; /* used to store unfinished commands */ + struct applet *applet; /* applet this context refers to */ + struct session *sess; /* session for frontend applets (NULL for backend applets) */ + struct sedesc *sedesc; /* stream endpoint descriptor the applet is attached to */ + struct act_rule *rule; /* rule associated with the applet. */ + int (*io_handler)(struct appctx *appctx); /* used within the cli_io_handler when st0 = CLI_ST_CALLBACK */ + void (*io_release)(struct appctx *appctx); /* used within the cli_io_handler when st0 = CLI_ST_CALLBACK, + if the command is terminated or the session released */ + int cli_severity_output; /* used within the cli_io_handler to format severity output of informational feedback */ + int cli_level; /* the level of CLI which can be lowered dynamically */ + char cli_payload_pat[8]; /* Payload pattern */ + uint32_t cli_anon_key; /* the key to anonymise with the hash in cli */ + struct buffer_wait buffer_wait; /* position in the list of objects waiting for a buffer */ + struct task *t; /* task associated to the applet */ + struct freq_ctr call_rate; /* appctx call rate */ + struct list wait_entry; /* entry in a list of waiters for an event (e.g. ring events) */ + + /* The pointer seen by application code is appctx->svcctx. In 2.7 the + * anonymous union and the "ctx" struct disappeared, and the struct + * "svc" became svc_storage, which is never accessed directly by + * application code. Look at "show fd" for an example. + */ + + /* here we have the service's context (CLI command, applet, etc) */ + void *svcctx; /* pointer to a context used by the command, e.g. <storage> below */ + struct { + void *shadow; /* shadow of svcctx above, do not use! */ + char storage[APPLET_MAX_SVCCTX]; /* storage of svcctx above */ + } svc; /* generic storage for most commands */ +}; + +#endif /* _HAPROXY_APPLET_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/applet.h b/include/haproxy/applet.h new file mode 100644 index 0000000..b04ffd9 --- /dev/null +++ b/include/haproxy/applet.h @@ -0,0 +1,270 @@ +/* + * include/haproxy/applet.h + * This file contains applet function prototypes + * + * Copyright (C) 2000-2015 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_APPLET_H +#define _HAPROXY_APPLET_H + +#include <stdlib.h> + +#include <haproxy/api.h> +#include <haproxy/applet-t.h> +#include <haproxy/channel.h> +#include <haproxy/list.h> +#include <haproxy/pool.h> +#include <haproxy/sc_strm.h> +#include <haproxy/session.h> +#include <haproxy/stconn.h> +#include <haproxy/task.h> + +extern unsigned int nb_applets; +extern struct pool_head *pool_head_appctx; + +struct task *task_run_applet(struct task *t, void *context, unsigned int state); +int appctx_buf_available(void *arg); +void *applet_reserve_svcctx(struct appctx *appctx, size_t size); +void applet_reset_svcctx(struct appctx *appctx); +void appctx_shut(struct appctx *appctx); + +struct appctx *appctx_new_on(struct applet *applet, struct sedesc *sedesc, int thr); +int appctx_finalize_startup(struct appctx *appctx, struct proxy *px, struct buffer *input); +void appctx_free_on_early_error(struct appctx *appctx); +void appctx_free(struct appctx *appctx); + +static inline struct appctx *appctx_new_here(struct applet *applet, struct sedesc *sedesc) +{ + return appctx_new_on(applet, sedesc, tid); +} + +static inline struct appctx *appctx_new_anywhere(struct applet *applet, struct sedesc *sedesc) +{ + return appctx_new_on(applet, sedesc, -1); +} + +/* Helper function to call .init applet callback function, if it exists. Returns 0 + * on success and -1 on error. + */ +static inline int appctx_init(struct appctx *appctx) +{ + /* Set appctx affinity to the current thread. Because, after this call, + * the appctx will be fully initialized. The session and the stream will + * eventually be created. The affinity must be set now ! + */ + BUG_ON(appctx->t->tid != tid); + task_set_thread(appctx->t, tid); + + if (appctx->applet->init) + return appctx->applet->init(appctx); + return 0; +} + +/* Releases an appctx previously allocated by appctx_new(). */ +static inline void __appctx_free(struct appctx *appctx) +{ + task_destroy(appctx->t); + if (LIST_INLIST(&appctx->buffer_wait.list)) + LIST_DEL_INIT(&appctx->buffer_wait.list); + if (appctx->sess) + session_free(appctx->sess); + BUG_ON(appctx->sedesc && !se_fl_test(appctx->sedesc, SE_FL_ORPHAN)); + sedesc_free(appctx->sedesc); + pool_free(pool_head_appctx, appctx); + _HA_ATOMIC_DEC(&nb_applets); +} + +/* wakes up an applet when conditions have changed. We're using a macro here in + * order to retrieve the caller's place. + */ +#define appctx_wakeup(ctx) \ + _task_wakeup((ctx)->t, TASK_WOKEN_OTHER, MK_CALLER(WAKEUP_TYPE_APPCTX_WAKEUP, 0, 0)) + +/* returns the stream connector the appctx is attached to, via the sedesc */ +static inline struct stconn *appctx_sc(const struct appctx *appctx) +{ + return appctx->sedesc->sc; +} + +/* returns the stream the appctx is attached to. Note that a stream *must* + * be attached, as we use an unchecked dereference via __sc_strm(). + */ +static inline struct stream *appctx_strm(const struct appctx *appctx) +{ + return __sc_strm(appctx->sedesc->sc); +} + +/* The applet announces it has more data to deliver to the stream's input + * buffer. + */ +static inline void applet_have_more_data(struct appctx *appctx) +{ + se_fl_clr(appctx->sedesc, SE_FL_HAVE_NO_DATA); +} + +/* The applet announces it doesn't have more data for the stream's input + * buffer. + */ +static inline void applet_have_no_more_data(struct appctx *appctx) +{ + se_fl_set(appctx->sedesc, SE_FL_HAVE_NO_DATA); +} + +/* The applet indicates that it's ready to consume data from the stream's + * output buffer. Rely on the corresponding SE function + */ +static inline void applet_will_consume(struct appctx *appctx) +{ + se_will_consume(appctx->sedesc); +} + +/* The applet indicates that it's not willing to consume data from the stream's + * output buffer. Rely on the corresponding SE function + */ +static inline void applet_wont_consume(struct appctx *appctx) +{ + se_wont_consume(appctx->sedesc); +} + +/* The applet indicates that it's willing to consume data from the stream's + * output buffer, but that there's not enough, so it doesn't want to be woken + * up until more are presented. Rely on the corresponding SE function + */ +static inline void applet_need_more_data(struct appctx *appctx) +{ + se_need_more_data(appctx->sedesc); +} + +/* The applet indicates that it does not expect data from the opposite endpoint. + * This way the stream know it should not trigger read timeout on the other + * side. + */ +static inline void applet_expect_no_data(struct appctx *appctx) +{ + se_fl_set(appctx->sedesc, SE_FL_EXP_NO_DATA); +} + +/* The applet indicates that it expects data from the opposite endpoint. This + * way the stream know it may trigger read timeout on the other side. + */ +static inline void applet_expect_data(struct appctx *appctx) +{ + se_fl_clr(appctx->sedesc, SE_FL_EXP_NO_DATA); +} + +/* writes chunk <chunk> into the input channel of the stream attached to this + * appctx's endpoint, and marks the SC_FL_NEED_ROOM on a channel full error. + * See ci_putchk() for the list of return codes. + */ +static inline int applet_putchk(struct appctx *appctx, struct buffer *chunk) +{ + struct sedesc *se = appctx->sedesc; + int ret; + + ret = ci_putchk(sc_ic(se->sc), chunk); + if (ret < 0) { + /* XXX: Handle all errors as a lack of space because callers + * don't handles other cases for now. So applets must be + * careful to handles shutdown (-2) and invalid calls (-3) by + * themselves. + */ + sc_need_room(se->sc, chunk->data); + ret = -1; + } + + return ret; +} + +/* writes <len> chars from <blk> into the input channel of the stream attached + * to this appctx's endpoint, and marks the SC_FL_NEED_ROOM on a channel full + * error. See ci_putblk() for the list of return codes. + */ +static inline int applet_putblk(struct appctx *appctx, const char *blk, int len) +{ + struct sedesc *se = appctx->sedesc; + int ret; + + ret = ci_putblk(sc_ic(se->sc), blk, len); + if (ret < -1) { + /* XXX: Handle all errors as a lack of space because callers + * don't handles other cases for now. So applets must be + * careful to handles shutdown (-2) and invalid calls (-3) by + * themselves. + */ + sc_need_room(se->sc, len); + ret = -1; + } + + return ret; +} + +/* writes chars from <str> up to the trailing zero (excluded) into the input + * channel of the stream attached to this appctx's endpoint, and marks the + * SC_FL_NEED_ROOM on a channel full error. See ci_putstr() for the list of + * return codes. + */ +static inline int applet_putstr(struct appctx *appctx, const char *str) +{ + struct sedesc *se = appctx->sedesc; + int ret; + + ret = ci_putstr(sc_ic(se->sc), str); + if (ret == -1) { + /* XXX: Handle all errors as a lack of space because callers + * don't handles other cases for now. So applets must be + * careful to handles shutdown (-2) and invalid calls (-3) by + * themselves. + */ + sc_need_room(se->sc, strlen(str)); + ret = -1; + } + + return ret; +} + +/* writes character <chr> into the input channel of the stream attached to this + * appctx's endpoint, and marks the SC_FL_NEED_ROOM on a channel full error. + * See ci_putchr() for the list of return codes. + */ +static inline int applet_putchr(struct appctx *appctx, char chr) +{ + struct sedesc *se = appctx->sedesc; + int ret; + + ret = ci_putchr(sc_ic(se->sc), chr); + if (ret == -1) { + /* XXX: Handle all errors as a lack of space because callers + * don't handles other cases for now. So applets must be + * careful to handles shutdown (-2) and invalid calls (-3) by + * themselves. + */ + sc_need_room(se->sc, 1); + ret = -1; + } + + return ret; +} + +#endif /* _HAPROXY_APPLET_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/arg-t.h b/include/haproxy/arg-t.h new file mode 100644 index 0000000..d90d326 --- /dev/null +++ b/include/haproxy/arg-t.h @@ -0,0 +1,152 @@ +/* + * include/haproxy/arg-t.h + * This file contains structure declarations for generaic argument parsing. + * + * Copyright 2012 Willy Tarreau <w@1wt.eu> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_ARG_T_H +#define _HAPROXY_ARG_T_H + +#include <sys/socket.h> +#include <netinet/in.h> + +#include <haproxy/buf-t.h> +#include <haproxy/protobuf-t.h> +#include <haproxy/stick_table-t.h> +#include <haproxy/vars-t.h> + +/* encoding of each arg type : up to 31 types are supported */ +#define ARGT_BITS 5 +#define ARGT_NBTYPES (1 << ARGT_BITS) +#define ARGT_MASK (ARGT_NBTYPES - 1) + +/* encoding of the arg count : up to 12 args are possible. 4 bits are left + * unused at the top. + */ +#define ARGM_MASK ((1 << ARGM_BITS) - 1) +#define ARGM_BITS 4 +#define ARGM_NBARGS (sizeof(uint64_t) * 8 - ARGM_BITS) / ARGT_BITS + +enum { + ARGT_STOP = 0, /* end of the arg list */ + ARGT_SINT, /* signed 64 bit integer. */ + ARGT_STR, /* string */ + ARGT_IPV4, /* an IPv4 address */ + ARGT_MSK4, /* an IPv4 address mask (integer or dotted), stored as ARGT_IPV4 */ + ARGT_IPV6, /* an IPv6 address */ + ARGT_MSK6, /* an IPv6 address mask (integer or dotted), stored as ARGT_IPV6 */ + ARGT_TIME, /* a delay in ms by default, stored as ARGT_UINT */ + ARGT_SIZE, /* a size in bytes by default, stored as ARGT_UINT */ + ARGT_FE, /* a pointer to a frontend only */ + ARGT_BE, /* a pointer to a backend only */ + ARGT_TAB, /* a pointer to a stick table */ + ARGT_SRV, /* a pointer to a server */ + ARGT_USR, /* a pointer to a user list */ + ARGT_MAP, /* a pointer to a map descriptor */ + ARGT_REG, /* a pointer to a regex */ + ARGT_VAR, /* contains a variable description. */ + ARGT_PBUF_FNUM, /* a protocol buffer field number */ + ARGT_PTR, /* a pointer to opaque data */ + /* please update arg_type_names[] in args.c if you add entries here */ +}; + +/* context where arguments are used, in order to help error reporting */ +enum { + ARGC_ACL = 0, /* ACL */ + ARGC_STK, /* sticking rule */ + ARGC_TRK, /* tracking rule */ + ARGC_LOG, /* log-format */ + ARGC_LOGSD, /* log-format-sd */ + ARGC_HRQ, /* http-request */ + ARGC_HRS, /* http-response */ + ARGC_UIF, /* unique-id-format */ + ARGC_RDR, /* redirect */ + ARGC_CAP, /* capture rule */ + ARGC_SRV, /* server line */ + ARGC_SPOE, /* spoe message args */ + ARGC_UBK, /* use_backend message */ + ARGC_USRV, /* use-server message */ + ARGC_HERR, /* http-error */ + ARGC_OT, /* opentracing scope args */ + ARGC_OPT, /* option directive */ + ARGC_TCO, /* tcp-request connection expression */ + ARGC_TSE, /* tcp-request session expression */ + ARGC_TRQ, /* tcp-request content expression */ + ARGC_TRS, /* tcp-response content expression */ + ARGC_TCK, /* tcp-check expression */ + ARGC_CFG, /* configuration expression */ + ARGC_CLI, /* CLI expression*/ +}; + +/* flags used when compiling and executing regex */ +#define ARGF_REG_ICASE 1 +#define ARGF_REG_GLOB 2 + +/* some types that are externally defined */ +struct proxy; +struct server; +struct userlist; +struct my_regex; + +union arg_data { + long long int sint; + struct buffer str; + struct in_addr ipv4; + struct in6_addr ipv6; + struct proxy *prx; /* used for fe, be, tables */ + struct server *srv; + struct stktable *t; + struct userlist *usr; + struct map_descriptor *map; + struct my_regex *reg; + struct pbuf_fid fid; + struct var_desc var; + void *ptr; +}; + +struct arg { + unsigned char type; /* argument type, ARGT_* */ + unsigned char unresolved; /* argument contains a string in <str> that must be resolved and freed */ + unsigned char type_flags; /* type-specific extra flags (eg: case sensitivity for regex), ARGF_* */ + union arg_data data; /* argument data */ +}; + +/* arg lists are used to store information about arguments that could not be + * resolved when parsing the configuration. The head is an arg_list which + * serves as a template to create new entries. Nothing here is allocated, + * so plain copies are OK. + */ +struct arg_list { + struct list list; /* chaining with other arg_list, or list head */ + struct arg *arg; /* pointer to the arg, NULL on list head */ + int arg_pos; /* argument position */ + int ctx; /* context where the arg is used (ARGC_*) */ + const char *kw; /* keyword making use of these args */ + const char *conv; /* conv keyword when in conv, otherwise NULL */ + const char *file; /* file name where the args are referenced */ + int line; /* line number where the args are referenced */ +}; + +#endif /* _HAPROXY_ARG_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/arg.h b/include/haproxy/arg.h new file mode 100644 index 0000000..5fe1888 --- /dev/null +++ b/include/haproxy/arg.h @@ -0,0 +1,94 @@ +/* + * include/haproxy/arg.h + * This file contains functions and macros declarations for generic argument parsing. + * + * Copyright 2012 Willy Tarreau <w@1wt.eu> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_ARG_H +#define _HAPROXY_ARG_H + +#include <haproxy/arg-t.h> + +/* Some macros used to build some arg list. We can declare various argument + * combinations from 0 to 7 args using a single 32-bit integer. The first + * argument of these macros is always the mandatory number of arguments, and + * remaining ones are optional args. Note: ARGM() may also be used to return + * the number of mandatory arguments in a mask. + */ +#define ARGM(m) \ + (uint64_t)(m & ARGM_MASK) + +#define ARG1(m, t1) \ + (ARGM(m) + ((uint64_t)ARGT_##t1 << (ARGM_BITS))) + +#define ARG2(m, t1, t2) \ + (ARG1(m, t1) + ((uint64_t)ARGT_##t2 << (ARGM_BITS + ARGT_BITS))) + +#define ARG3(m, t1, t2, t3) \ + (ARG2(m, t1, t2) + ((uint64_t)ARGT_##t3 << (ARGM_BITS + ARGT_BITS * 2))) + +#define ARG4(m, t1, t2, t3, t4) \ + (ARG3(m, t1, t2, t3) + ((uint64_t)ARGT_##t4 << (ARGM_BITS + ARGT_BITS * 3))) + +#define ARG5(m, t1, t2, t3, t4, t5) \ + (ARG4(m, t1, t2, t3, t4) + ((uint64_t)ARGT_##t5 << (ARGM_BITS + ARGT_BITS * 4))) + +#define ARG6(m, t1, t2, t3, t4, t5, t6) \ + (ARG5(m, t1, t2, t3, t4, t5) + ((uint64_t)ARGT_##t6 << (ARGM_BITS + ARGT_BITS * 5))) + +#define ARG7(m, t1, t2, t3, t4, t5, t6, t7) \ + (ARG6(m, t1, t2, t3, t4, t5, t6) + ((uint64_t)ARGT_##t7 << (ARGM_BITS + ARGT_BITS * 6))) + +#define ARG8(m, t1, t2, t3, t4, t5, t6, t7, t8) \ + (ARG7(m, t1, t2, t3, t4, t5, t6, t7) + ((uint64_t)ARGT_##t8 << (ARGM_BITS + ARGT_BITS * 7))) + +#define ARG9(m, t1, t2, t3, t4, t5, t6, t7, t8, t9) \ + (ARG8(m, t1, t2, t3, t4, t5, t6, t7, t8) + ((uint64_t)ARGT_##t9 << (ARGM_BITS + ARGT_BITS * 8))) + +#define ARG10(m, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) \ + (ARG9(m, t1, t2, t3, t4, t5, t6, t7, t8, t9) + ((uint64_t)ARGT_##t10 << (ARGM_BITS + ARGT_BITS * 9))) + +#define ARG11(m, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) \ + (ARG10(m, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) + ((uint64_t)ARGT_##t11 << (ARGM_BITS + ARGT_BITS * 10))) + +#define ARG12(m, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12) \ + (ARG11(m, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) + ((uint64_t)ARGT_##t12 << (ARGM_BITS + ARGT_BITS * 11))) + +/* Mapping between argument number and literal description. */ +extern const char *arg_type_names[]; + +/* This dummy arg list may be used by default when no arg is found, it helps + * parsers by removing pointer checks. + */ +extern struct arg empty_arg_list[ARGM_NBARGS]; + +struct arg_list *arg_list_clone(const struct arg_list *orig); +struct arg_list *arg_list_add(struct arg_list *orig, struct arg *arg, int pos); +int make_arg_list(const char *in, int len, uint64_t mask, struct arg **argp, + char **err_msg, const char **end_ptr, int *err_arg, + struct arg_list *al); +struct arg *free_args(struct arg *args); + +#endif /* _HAPROXY_ARG_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/atomic.h b/include/haproxy/atomic.h new file mode 100644 index 0000000..d64e192 --- /dev/null +++ b/include/haproxy/atomic.h @@ -0,0 +1,897 @@ +/* + * include/haproxy/atomic.h + * Macros and inline functions for thread-safe atomic operations. + * + * Copyright (C) 2017 Christopher Faulet - cfaulet@haproxy.com + * Copyright (C) 2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_ATOMIC_H +#define _HAPROXY_ATOMIC_H + +#include <haproxy/compiler.h> + +/* A few notes for the macros and functions here: + * - this file is painful to edit, most operations exist in 3 variants, + * no-thread, threads with gcc<4.7, threads with gcc>=4.7. Be careful when + * modifying it not to break any of them. + * + * - macros named HA_ATOMIC_* are or use in the general case, they contain the + * required memory barriers to guarantee sequential consistency + * + * - macros named _HA_ATOMIC_* are the same but without the memory barriers, + * so they may only be used if followed by other HA_ATOMIC_* or within a + * sequence of _HA_ATOMIC_* terminated by a store barrier, or when there is + * no data dependency (e.g. updating a counter). Not all of them are + * implemented, in which case fallbacks to the safe ones are provided. In + * case of doubt, don't use them and use the generic ones instead. + * + * - the __ha_atomic_* barriers are for use around _HA_ATOMIC_* operations. + * Some architectures make them useless and they will automatically be + * dropped in such a case. Don't use them outside of this use case. + * + * - in general, the more underscores you find in front of a function or macro + * name, the riskier it is to use. Barriers are among them because validating + * their usage is not trivial at all and it's often safer to fall back to + * more generic behaviors. + * + * There is also a compiler barrier (__ha_compiler_barrier) which is eliminated + * when threads are disabled. We currently don't have a permanent compiler + * barrier to prevent the compiler from reordering signal-sensitive code for + * example. + */ + + +#ifndef USE_THREAD + +/* Threads are DISABLED, atomic ops are also not used. Note that these MUST + * NOT be used for inter-process synchronization nor signal-safe variable + * manipulations which might occur without threads, as they are not atomic. + */ + +#define HA_ATOMIC_LOAD(val) *(val) +#define HA_ATOMIC_STORE(val, new) ({*(val) = new;}) + +#define HA_ATOMIC_XCHG(val, new) \ + ({ \ + typeof(*(val)) __old_xchg = *(val); \ + *(val) = new; \ + __old_xchg; \ + }) + +#define HA_ATOMIC_AND(val, flags) do { *(val) &= (flags);} while (0) +#define HA_ATOMIC_OR(val, flags) do { *(val) |= (flags);} while (0) +#define HA_ATOMIC_ADD(val, i) do { *(val) += (i);} while (0) +#define HA_ATOMIC_SUB(val, i) do { *(val) -= (i);} while (0) +#define HA_ATOMIC_INC(val) do { *(val) += 1;} while (0) +#define HA_ATOMIC_DEC(val) do { *(val) -= 1;} while (0) + +#define HA_ATOMIC_AND_FETCH(val, flags) ({ *(val) &= (flags); }) +#define HA_ATOMIC_OR_FETCH(val, flags) ({ *(val) |= (flags); }) +#define HA_ATOMIC_ADD_FETCH(val, i) ({ *(val) += (i); }) +#define HA_ATOMIC_SUB_FETCH(val, i) ({ *(val) -= (i); }) + +#define HA_ATOMIC_FETCH_AND(val, i) \ + ({ \ + typeof((val)) __p_val = (val); \ + typeof(*(val)) __old_val = *__p_val; \ + *__p_val &= (i); \ + __old_val; \ + }) + +#define HA_ATOMIC_FETCH_OR(val, i) \ + ({ \ + typeof((val)) __p_val = (val); \ + typeof(*(val)) __old_val = *__p_val; \ + *__p_val |= (i); \ + __old_val; \ + }) + +#define HA_ATOMIC_FETCH_ADD(val, i) \ + ({ \ + typeof((val)) __p_val = (val); \ + typeof(*(val)) __old_val = *__p_val; \ + *__p_val += (i); \ + __old_val; \ + }) + +#define HA_ATOMIC_FETCH_SUB(val, i) \ + ({ \ + typeof((val)) __p_val = (val); \ + typeof(*(val)) __old_val = *__p_val; \ + *__p_val -= (i); \ + __old_val; \ + }) + +#define HA_ATOMIC_BTS(val, bit) \ + ({ \ + typeof((val)) __p_bts = (val); \ + typeof(*__p_bts) __b_bts = (1UL << (bit)); \ + typeof(*__p_bts) __t_bts = *__p_bts & __b_bts; \ + if (!__t_bts) \ + *__p_bts |= __b_bts; \ + __t_bts; \ + }) + +#define HA_ATOMIC_BTR(val, bit) \ + ({ \ + typeof((val)) __p_btr = (val); \ + typeof(*__p_btr) __b_btr = (1UL << (bit)); \ + typeof(*__p_btr) __t_btr = *__p_btr & __b_btr; \ + if (__t_btr) \ + *__p_btr &= ~__b_btr; \ + __t_btr; \ + }) + +#define HA_ATOMIC_CAS(val, old, new) \ + ({ \ + typeof(val) _v = (val); \ + typeof(old) _o = (old); \ + (*_v == *_o) ? ((*_v = (new)), 1) : ((*_o = *_v), 0); \ + }) + +/* warning, n is a pointer to the double value for dwcas */ +#define HA_ATOMIC_DWCAS(val, o, n) \ + ({ \ + long *_v = (long*)(val); \ + long *_o = (long*)(o); \ + long *_n = (long*)(n); \ + long _v0 = _v[0], _v1 = _v[1]; \ + (_v0 == _o[0] && _v1 == _o[1]) ? \ + (_v[0] = _n[0], _v[1] = _n[1], 1) : \ + (_o[0] = _v0, _o[1] = _v1, 0); \ + }) + +#define HA_ATOMIC_UPDATE_MAX(val, new) \ + ({ \ + typeof(val) __val = (val); \ + typeof(*(val)) __new_max = (new); \ + \ + if (*__val < __new_max) \ + *__val = __new_max; \ + *__val; \ + }) + +#define HA_ATOMIC_UPDATE_MIN(val, new) \ + ({ \ + typeof(val) __val = (val); \ + typeof(*(val)) __new_min = (new); \ + \ + if (*__val > __new_min) \ + *__val = __new_min; \ + *__val; \ + }) + +/* various barriers */ +#define __ha_barrier_atomic_load() do { } while (0) +#define __ha_barrier_atomic_store() do { } while (0) +#define __ha_barrier_atomic_full() do { } while (0) +#define __ha_barrier_load() do { } while (0) +#define __ha_barrier_store() do { } while (0) +#define __ha_barrier_full() do { } while (0) +#define __ha_compiler_barrier() do { } while (0) +#define __ha_cpu_relax() ({ 1; }) + +#else /* !USE_THREAD */ + +/* Threads are ENABLED, all atomic ops are made thread-safe. By extension they + * can also be used for inter-process synchronization but one must verify that + * the code still builds with threads disabled. + */ + +#if defined(__GNUC__) && (__GNUC__ < 4 || __GNUC__ == 4 && __GNUC_MINOR__ < 7) && !defined(__clang__) +/* gcc < 4.7 */ + +#define HA_ATOMIC_LOAD(val) \ + ({ \ + typeof(*(val)) ret = \ + ({ __sync_synchronize(); *(volatile typeof(val))val; }); \ + __sync_synchronize(); \ + ret; \ + }) + +#define HA_ATOMIC_STORE(val, new) \ + ({ \ + typeof((val)) __val_store = (val); \ + typeof(*(val)) __old_store; \ + typeof((new)) __new_store = (new); \ + do { __old_store = *__val_store; \ + } while (!__sync_bool_compare_and_swap(__val_store, __old_store, __new_store) && __ha_cpu_relax()); \ + }) + +#define HA_ATOMIC_XCHG(val, new) \ + ({ \ + typeof((val)) __val_xchg = (val); \ + typeof(*(val)) __old_xchg; \ + typeof((new)) __new_xchg = (new); \ + do { __old_xchg = *__val_xchg; \ + } while (!__sync_bool_compare_and_swap(__val_xchg, __old_xchg, __new_xchg) && __ha_cpu_relax()); \ + __old_xchg; \ + }) + +#define HA_ATOMIC_AND(val, flags) do { __sync_and_and_fetch(val, flags); } while (0) +#define HA_ATOMIC_OR(val, flags) do { __sync_or_and_fetch(val, flags); } while (0) +#define HA_ATOMIC_ADD(val, i) do { __sync_add_and_fetch(val, i); } while (0) +#define HA_ATOMIC_SUB(val, i) do { __sync_sub_and_fetch(val, i); } while (0) +#define HA_ATOMIC_INC(val) do { __sync_add_and_fetch(val, 1); } while (0) +#define HA_ATOMIC_DEC(val) do { __sync_sub_and_fetch(val, 1); } while (0) + +#define HA_ATOMIC_AND_FETCH(val, flags) __sync_and_and_fetch(val, flags) +#define HA_ATOMIC_OR_FETCH(val, flags) __sync_or_and_fetch(val, flags) +#define HA_ATOMIC_ADD_FETCH(val, i) __sync_add_and_fetch(val, i) +#define HA_ATOMIC_SUB_FETCH(val, i) __sync_sub_and_fetch(val, i) + +#define HA_ATOMIC_FETCH_AND(val, flags) __sync_fetch_and_and(val, flags) +#define HA_ATOMIC_FETCH_OR(val, flags) __sync_fetch_and_or(val, flags) +#define HA_ATOMIC_FETCH_ADD(val, i) __sync_fetch_and_add(val, i) +#define HA_ATOMIC_FETCH_SUB(val, i) __sync_fetch_and_sub(val, i) + +#define HA_ATOMIC_BTS(val, bit) \ + ({ \ + typeof(*(val)) __b_bts = (1UL << (bit)); \ + __sync_fetch_and_or((val), __b_bts) & __b_bts; \ + }) + +#define HA_ATOMIC_BTR(val, bit) \ + ({ \ + typeof(*(val)) __b_btr = (1UL << (bit)); \ + __sync_fetch_and_and((val), ~__b_btr) & __b_btr; \ + }) + +/* the CAS is a bit complicated. The older API doesn't support returning the + * value and the swap's result at the same time. So here we take what looks + * like the safest route, consisting in using the boolean version guaranteeing + * that the operation was performed or not, and we snoop a previous value. If + * the compare succeeds, we return. If it fails, we return the previous value, + * but only if it differs from the expected one. If it's the same it's a race + * thus we try again to avoid confusing a possibly sensitive caller. + */ +#define HA_ATOMIC_CAS(val, old, new) \ + ({ \ + typeof((val)) __val_cas = (val); \ + typeof((old)) __oldp_cas = (old); \ + typeof(*(old)) __oldv_cas; \ + typeof((new)) __new_cas = (new); \ + int __ret_cas; \ + do { \ + __oldv_cas = *__val_cas; \ + __ret_cas = __sync_bool_compare_and_swap(__val_cas, *__oldp_cas, __new_cas); \ + } while (!__ret_cas && *__oldp_cas == __oldv_cas && __ha_cpu_relax()); \ + if (!__ret_cas) \ + *__oldp_cas = __oldv_cas; \ + __ret_cas; \ + }) + +/* warning, n is a pointer to the double value for dwcas */ +#define HA_ATOMIC_DWCAS(val, o, n) __ha_cas_dw(val, o, n) + +#define HA_ATOMIC_UPDATE_MAX(val, new) \ + ({ \ + typeof(val) __val = (val); \ + typeof(*(val)) __old_max = *__val; \ + typeof(*(val)) __new_max = (new); \ + \ + while (__old_max < __new_max && \ + !HA_ATOMIC_CAS(__val, &__old_max, __new_max) && __ha_cpu_relax()); \ + *__val; \ + }) + +#define HA_ATOMIC_UPDATE_MIN(val, new) \ + ({ \ + typeof(val) __val = (val); \ + typeof(*(val)) __old_min = *__val; \ + typeof(*(val)) __new_min = (new); \ + \ + while (__old_min > __new_min && \ + !HA_ATOMIC_CAS(__val, &__old_min, __new_min) && __ha_cpu_relax()); \ + *__val; \ + }) + +#else /* gcc */ + +/* gcc >= 4.7 or clang */ + +#define HA_ATOMIC_STORE(val, new) __atomic_store_n(val, new, __ATOMIC_RELEASE) +#define HA_ATOMIC_LOAD(val) __atomic_load_n(val, __ATOMIC_ACQUIRE) +#define HA_ATOMIC_XCHG(val, new) __atomic_exchange_n(val, new, __ATOMIC_ACQ_REL) + +#define HA_ATOMIC_AND(val, flags) do { __atomic_and_fetch(val, flags, __ATOMIC_SEQ_CST); } while (0) +#define HA_ATOMIC_OR(val, flags) do { __atomic_or_fetch(val, flags, __ATOMIC_SEQ_CST); } while (0) +#define HA_ATOMIC_ADD(val, i) do { __atomic_add_fetch(val, i, __ATOMIC_SEQ_CST); } while (0) +#define HA_ATOMIC_SUB(val, i) do { __atomic_sub_fetch(val, i, __ATOMIC_SEQ_CST); } while (0) +#define HA_ATOMIC_INC(val) do { __atomic_add_fetch(val, 1, __ATOMIC_SEQ_CST); } while (0) +#define HA_ATOMIC_DEC(val) do { __atomic_sub_fetch(val, 1, __ATOMIC_SEQ_CST); } while (0) + +#define HA_ATOMIC_AND_FETCH(val, flags) __atomic_and_fetch(val, flags, __ATOMIC_SEQ_CST) +#define HA_ATOMIC_OR_FETCH(val, flags) __atomic_or_fetch(val, flags, __ATOMIC_SEQ_CST) +#define HA_ATOMIC_ADD_FETCH(val, i) __atomic_add_fetch(val, i, __ATOMIC_SEQ_CST) +#define HA_ATOMIC_SUB_FETCH(val, i) __atomic_sub_fetch(val, i, __ATOMIC_SEQ_CST) + +#define HA_ATOMIC_FETCH_AND(val, flags) __atomic_fetch_and(val, flags, __ATOMIC_SEQ_CST) +#define HA_ATOMIC_FETCH_OR(val, flags) __atomic_fetch_or(val, flags, __ATOMIC_SEQ_CST) +#define HA_ATOMIC_FETCH_ADD(val, i) __atomic_fetch_add(val, i, __ATOMIC_SEQ_CST) +#define HA_ATOMIC_FETCH_SUB(val, i) __atomic_fetch_sub(val, i, __ATOMIC_SEQ_CST) + +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && (defined(__i386__) || defined (__x86_64__)) +#define HA_ATOMIC_BTS(val, bit) \ + ({ \ + unsigned char __ret; \ + if (sizeof(long) == 8 && sizeof(*(val)) == 8) { \ + asm volatile("lock btsq %2, %0\n" \ + : "+m" (*(val)), "=@ccc"(__ret) \ + : "Ir" ((unsigned long)(bit)) \ + : "cc"); \ + } else if (sizeof(*(val)) == 4) { \ + asm volatile("lock btsl %2, %0\n" \ + : "+m" (*(val)), "=@ccc"(__ret) \ + : "Ir" ((unsigned int)(bit)) \ + : "cc"); \ + } else if (sizeof(*(val)) == 2) { \ + asm volatile("lock btsw %2, %0\n" \ + : "+m" (*(val)), "=@ccc"(__ret) \ + : "Ir" ((unsigned short)(bit)) \ + : "cc"); \ + } else { \ + typeof(*(val)) __b_bts = (1UL << (bit)); \ + __ret = !!(__atomic_fetch_or((val), __b_bts, __ATOMIC_SEQ_CST) & __b_bts); \ + } \ + __ret; \ + }) + +#define HA_ATOMIC_BTR(val, bit) \ + ({ \ + unsigned char __ret; \ + if (sizeof(long) == 8 && sizeof(*(val)) == 8) { \ + asm volatile("lock btrq %2, %0\n" \ + : "+m" (*(val)), "=@ccc"(__ret) \ + : "Ir" ((unsigned long)(bit)) \ + : "cc"); \ + } else if (sizeof(*(val)) == 4) { \ + asm volatile("lock btrl %2, %0\n" \ + : "+m" (*(val)), "=@ccc"(__ret) \ + : "Ir" ((unsigned int)(bit)) \ + : "cc"); \ + } else if (sizeof(*(val)) == 2) { \ + asm volatile("lock btrw %2, %0\n" \ + : "+m" (*(val)), "=@ccc"(__ret) \ + : "Ir" ((unsigned short)(bit)) \ + : "cc"); \ + } else { \ + typeof(*(val)) __b_bts = (1UL << (bit)); \ + __ret = !!(__atomic_fetch_and((val), ~__b_bts, __ATOMIC_SEQ_CST) & __b_bts); \ + } \ + __ret; \ + }) + +#else // not x86 or !__GCC_ASM_FLAG_OUTPUTS__ + +#define HA_ATOMIC_BTS(val, bit) \ + ({ \ + typeof(*(val)) __b_bts = (1UL << (bit)); \ + __atomic_fetch_or((val), __b_bts, __ATOMIC_SEQ_CST) & __b_bts; \ + }) + +#define HA_ATOMIC_BTR(val, bit) \ + ({ \ + typeof(*(val)) __b_btr = (1UL << (bit)); \ + __atomic_fetch_and((val), ~__b_btr, __ATOMIC_SEQ_CST) & __b_btr; \ + }) + +#endif // x86 || __GCC_ASM_FLAG_OUTPUTS__ + +#define HA_ATOMIC_CAS(val, old, new) __atomic_compare_exchange_n(val, old, new, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + +/* warning, n is a pointer to the double value for dwcas */ +#define HA_ATOMIC_DWCAS(val, o, n) __ha_cas_dw(val, o, n) + +#define HA_ATOMIC_UPDATE_MAX(val, new) \ + ({ \ + typeof(val) __val = (val); \ + typeof(*(val)) __old_max = *__val; \ + typeof(*(val)) __new_max = (new); \ + \ + while (__old_max < __new_max && \ + !HA_ATOMIC_CAS(__val, &__old_max, __new_max) && __ha_cpu_relax()); \ + *__val; \ + }) + +#define HA_ATOMIC_UPDATE_MIN(val, new) \ + ({ \ + typeof(val) __val = (val); \ + typeof(*(val)) __old_min = *__val; \ + typeof(*(val)) __new_min = (new); \ + \ + while (__old_min > __new_min && \ + !HA_ATOMIC_CAS(__val, &__old_min, __new_min) && __ha_cpu_relax()); \ + *__val; \ + }) + +/* Modern compilers provide variants that don't generate any memory barrier. + * If you're unsure how to deal with barriers, just use the HA_ATOMIC_* version, + * that will always generate correct code. + * Usually it's fine to use those when updating data that have no dependency, + * ie updating a counter. Otherwise a barrier is required. + */ + +#define _HA_ATOMIC_LOAD(val) __atomic_load_n(val, __ATOMIC_RELAXED) +#define _HA_ATOMIC_STORE(val, new) __atomic_store_n(val, new, __ATOMIC_RELAXED) +#define _HA_ATOMIC_XCHG(val, new) __atomic_exchange_n(val, new, __ATOMIC_RELAXED) + +#define _HA_ATOMIC_AND(val, flags) do { __atomic_and_fetch(val, flags, __ATOMIC_RELAXED); } while (0) +#define _HA_ATOMIC_OR(val, flags) do { __atomic_or_fetch(val, flags, __ATOMIC_RELAXED); } while (0) +#define _HA_ATOMIC_ADD(val, i) do { __atomic_add_fetch(val, i, __ATOMIC_RELAXED); } while (0) +#define _HA_ATOMIC_SUB(val, i) do { __atomic_sub_fetch(val, i, __ATOMIC_RELAXED); } while (0) +#define _HA_ATOMIC_INC(val) do { __atomic_add_fetch(val, 1, __ATOMIC_RELAXED); } while (0) +#define _HA_ATOMIC_DEC(val) do { __atomic_sub_fetch(val, 1, __ATOMIC_RELAXED); } while (0) + +#define _HA_ATOMIC_AND_FETCH(val, flags) __atomic_and_fetch(val, flags, __ATOMIC_RELAXED) +#define _HA_ATOMIC_OR_FETCH(val, flags) __atomic_or_fetch(val, flags, __ATOMIC_RELAXED) +#define _HA_ATOMIC_ADD_FETCH(val, i) __atomic_add_fetch(val, i, __ATOMIC_RELAXED) +#define _HA_ATOMIC_SUB_FETCH(val, i) __atomic_sub_fetch(val, i, __ATOMIC_RELAXED) + +#define _HA_ATOMIC_FETCH_AND(val, flags) __atomic_fetch_and(val, flags, __ATOMIC_RELAXED) +#define _HA_ATOMIC_FETCH_OR(val, flags) __atomic_fetch_or(val, flags, __ATOMIC_RELAXED) +#define _HA_ATOMIC_FETCH_ADD(val, i) __atomic_fetch_add(val, i, __ATOMIC_RELAXED) +#define _HA_ATOMIC_FETCH_SUB(val, i) __atomic_fetch_sub(val, i, __ATOMIC_RELAXED) + +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && (defined(__i386__) || defined (__x86_64__)) +#define _HA_ATOMIC_BTS(val, bit) \ + ({ \ + unsigned char __ret; \ + if (sizeof(long) == 8 && sizeof(*(val)) == 8) { \ + asm volatile("lock btsq %2, %0\n" \ + : "+m" (*(val)), "=@ccc"(__ret) \ + : "Ir" ((unsigned long)(bit)) \ + : "cc"); \ + } else if (sizeof(*(val)) == 4) { \ + asm volatile("lock btsl %2, %0\n" \ + : "+m" (*(val)), "=@ccc"(__ret) \ + : "Ir" ((unsigned int)(bit)) \ + : "cc"); \ + } else if (sizeof(*(val)) == 2) { \ + asm volatile("lock btsw %2, %0\n" \ + : "+m" (*(val)), "=@ccc"(__ret) \ + : "Ir" ((unsigned short)(bit)) \ + : "cc"); \ + } else { \ + typeof(*(val)) __b_bts = (1UL << (bit)); \ + __ret = !!(__atomic_fetch_or((val), __b_bts, __ATOMIC_RELAXED) & __b_bts); \ + } \ + __ret; \ + }) + +#define _HA_ATOMIC_BTR(val, bit) \ + ({ \ + unsigned char __ret; \ + if (sizeof(long) == 8 && sizeof(*(val)) == 8) { \ + asm volatile("lock btrq %2, %0\n" \ + : "+m" (*(val)), "=@ccc"(__ret) \ + : "Ir" ((unsigned long)(bit)) \ + : "cc"); \ + } else if (sizeof(*(val)) == 4) { \ + asm volatile("lock btrl %2, %0\n" \ + : "+m" (*(val)), "=@ccc"(__ret) \ + : "Ir" ((unsigned int)(bit)) \ + : "cc"); \ + } else if (sizeof(*(val)) == 2) { \ + asm volatile("lock btrw %2, %0\n" \ + : "+m" (*(val)), "=@ccc"(__ret) \ + : "Ir" ((unsigned short)(bit)) \ + : "cc"); \ + } else { \ + typeof(*(val)) __b_bts = (1UL << (bit)); \ + __ret = !!(__atomic_fetch_and((val), ~__b_bts, __ATOMIC_RELAXED) & __b_bts); \ + } \ + __ret; \ + }) + +#else // not x86 or !__GCC_ASM_FLAG_OUTPUTS__ + +#define _HA_ATOMIC_BTS(val, bit) \ + ({ \ + typeof(*(val)) __b_bts = (1UL << (bit)); \ + __atomic_fetch_or((val), __b_bts, __ATOMIC_RELAXED) & __b_bts; \ + }) + +#define _HA_ATOMIC_BTR(val, bit) \ + ({ \ + typeof(*(val)) __b_btr = (1UL << (bit)); \ + __atomic_fetch_and((val), ~__b_btr, __ATOMIC_RELAXED) & __b_btr; \ + }) +#endif + +#define _HA_ATOMIC_CAS(val, old, new) __atomic_compare_exchange_n(val, old, new, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED) +/* warning, n is a pointer to the double value for dwcas */ +#define _HA_ATOMIC_DWCAS(val, o, n) __ha_cas_dw(val, o, n) + +#endif /* gcc >= 4.7 */ + +/* Here come a few architecture-specific double-word CAS and barrier + * implementations. + */ + +#ifdef __x86_64__ + +static __inline void +__ha_barrier_load(void) +{ + __asm __volatile("" ::: "memory"); +} + +static __inline void +__ha_barrier_store(void) +{ + __asm __volatile("" ::: "memory"); +} + +static __inline void +__ha_barrier_full(void) +{ + __asm __volatile("mfence" ::: "memory"); +} + +/* Use __ha_barrier_atomic* when you're trying to protect data that are + * are modified using _HA_ATOMIC* + */ +static __inline void +__ha_barrier_atomic_load(void) +{ + __asm __volatile("" ::: "memory"); +} + +static __inline void +__ha_barrier_atomic_store(void) +{ + __asm __volatile("" ::: "memory"); +} + +static __inline void +__ha_barrier_atomic_full(void) +{ + __asm __volatile("" ::: "memory"); +} + +static __inline int +__ha_cas_dw(void *target, void *compare, const void *set) +{ + char ret; + + __asm __volatile("lock cmpxchg16b %0; setz %3" + : "+m" (*(void **)target), + "=a" (((void **)compare)[0]), + "=d" (((void **)compare)[1]), + "=q" (ret) + : "a" (((void **)compare)[0]), + "d" (((void **)compare)[1]), + "b" (((const void **)set)[0]), + "c" (((const void **)set)[1]) + : "memory", "cc"); + return (ret); +} + +/* short-lived CPU relaxation */ +#define __ha_cpu_relax() ({ asm volatile("rep;nop\n"); 1; }) + +#elif defined(__arm__) && (defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)) + +static __inline void +__ha_barrier_load(void) +{ + __asm __volatile("dmb" ::: "memory"); +} + +static __inline void +__ha_barrier_store(void) +{ + __asm __volatile("dsb" ::: "memory"); +} + +static __inline void +__ha_barrier_full(void) +{ + __asm __volatile("dmb" ::: "memory"); +} + +/* Use __ha_barrier_atomic* when you're trying to protect data that are + * are modified using _HA_ATOMIC* + */ +static __inline void +__ha_barrier_atomic_load(void) +{ + __asm __volatile("dmb" ::: "memory"); +} + +static __inline void +__ha_barrier_atomic_store(void) +{ + __asm __volatile("dsb" ::: "memory"); +} + +static __inline void +__ha_barrier_atomic_full(void) +{ + __asm __volatile("dmb" ::: "memory"); +} + +static __inline int __ha_cas_dw(void *target, void *compare, const void *set) +{ + uint64_t previous; + int tmp; + + __asm __volatile("1:" + "ldrexd %0, [%4];" + "cmp %Q0, %Q2;" + "ittt eq;" + "cmpeq %R0, %R2;" + "strexdeq %1, %3, [%4];" + "cmpeq %1, #1;" + "beq 1b;" + : "=&r" (previous), "=&r" (tmp) + : "r" (*(uint64_t *)compare), "r" (*(uint64_t *)set), "r" (target) + : "memory", "cc"); + tmp = (previous == *(uint64_t *)compare); + *(uint64_t *)compare = previous; + return (tmp); +} + +/* short-lived CPU relaxation */ +#define __ha_cpu_relax() ({ asm volatile(""); 1; }) + +#elif defined (__aarch64__) + +static __inline void +__ha_barrier_load(void) +{ + __asm __volatile("dmb ishld" ::: "memory"); +} + +static __inline void +__ha_barrier_store(void) +{ + __asm __volatile("dmb ishst" ::: "memory"); +} + +static __inline void +__ha_barrier_full(void) +{ + __asm __volatile("dmb ish" ::: "memory"); +} + +/* Use __ha_barrier_atomic* when you're trying to protect data that are + * are modified using _HA_ATOMIC* + */ +static __inline void +__ha_barrier_atomic_load(void) +{ + __asm __volatile("dmb ishld" ::: "memory"); +} + +static __inline void +__ha_barrier_atomic_store(void) +{ + __asm __volatile("dmb ishst" ::: "memory"); +} + +static __inline void +__ha_barrier_atomic_full(void) +{ + __asm __volatile("dmb ish" ::: "memory"); +} + +/* short-lived CPU relaxation; this was shown to improve fairness on + * modern ARMv8 cores such as Neoverse N1. + */ +#define __ha_cpu_relax() ({ asm volatile("isb" ::: "memory"); 1; }) + +#if defined(__ARM_FEATURE_ATOMICS) && !defined(__clang__) // ARMv8.1-A atomics + +/* returns 0 on failure, non-zero on success */ +static forceinline int __ha_cas_dw(void *target, void *compare, const void *set) +{ + /* There's no status set by the CASP instruction so we need to keep a + * copy of the original registers and compare them afterwards to detect + * if we could apply the change. In order to pass a pair, we simply map + * a register pair on a struct so that the compiler can emit register + * pairs that we can use thanks to the undocumented "%H" modifier + * mentioned on the link below: + * https://patchwork.ozlabs.org/project/gcc/patch/59368A74.2060908@foss.arm.com/ + */ + struct pair { uint64_t r[2]; }; + register struct pair bck = *(struct pair *)compare; + register struct pair cmp asm("x0") = bck; + register struct pair new asm("x2") = *(const struct pair*)set; + int ret; + + __asm__ __volatile__("casp %0, %H0, %2, %H2, [%1]\n" + : "+r" (cmp) // %0 + : "r" (target), // %1 + "r" (new) // %2 + : "memory"); + + /* if the old value is still the same unchanged, we won, otherwise we + * store the refreshed old value. + */ + ret = cmp.r[0] == bck.r[0] && cmp.r[1] == bck.r[1]; + if (unlikely(!ret)) { + /* update the old value on failure. Note that in this case the + * caller will likely relax and jump backwards so we don't care + * about this cost provided that it doesn't enlarge the fast + * code path. + */ + *(struct pair *)compare = cmp; + } + return ret; +} + +#elif defined(__SIZEOF_INT128__) && defined(_ARM_FEATURE_ATOMICS) // 128-bit and ARMv8.1-A will work + +/* According to https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html + * we can use atomics on __int128. The availability of CAS is defined there: + * https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html + * However these usually involve a function call which can be expensive for some + * cases, but gcc 10.2 and above can reroute the function call to either LL/SC for + * v8.0 or LSE for v8.1+, which allows to use a more scalable version on v8.1+ at + * the extra cost of a function call. + */ + +/* returns 0 on failure, non-zero on success */ +static __inline int __ha_cas_dw(void *target, void *compare, const void *set) +{ + return __atomic_compare_exchange_n((__int128*)target, (__int128*)compare, *(const __int128*)set, + 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); +} + +#else // neither ARMv8.1-A atomics nor 128-bit atomics + +/* returns 0 on failure, non-zero on success */ +static __inline int __ha_cas_dw(void *target, void *compare, void *set) +{ + void *value[2]; + uint64_t tmp1, tmp2; + + __asm__ __volatile__("1:" + "ldxp %0, %1, [%4]\n" + "mov %2, %0\n" + "mov %3, %1\n" + "eor %0, %0, %5\n" + "eor %1, %1, %6\n" + "orr %1, %0, %1\n" + "mov %w0, #0\n" + "cbnz %1, 2f\n" + "stxp %w0, %7, %8, [%4]\n" + "cbnz %w0, 1b\n" + "mov %w0, #1\n" + "2:" + : "=&r" (tmp1), "=&r" (tmp2), "=&r" (value[0]), "=&r" (value[1]) + : "r" (target), "r" (((void **)(compare))[0]), "r" (((void **)(compare))[1]), "r" (((void **)(set))[0]), "r" (((void **)(set))[1]) + : "cc", "memory"); + + ((void **)(compare))[0] = value[0]; + ((void **)(compare))[1] = value[1]; + return (tmp1); +} +#endif // ARMv8.1-A atomics + +#else /* unknown / unhandled architecture, fall back to generic barriers */ + +#define __ha_barrier_atomic_load __sync_synchronize +#define __ha_barrier_atomic_store __sync_synchronize +#define __ha_barrier_atomic_full __sync_synchronize +#define __ha_barrier_load __sync_synchronize +#define __ha_barrier_store __sync_synchronize +#define __ha_barrier_full __sync_synchronize +/* Note: there is no generic DWCAS */ + +/* short-lived CPU relaxation */ +#define __ha_cpu_relax() ({ asm volatile(""); 1; }) + +#endif /* end of arch-specific barrier/dwcas */ + +static inline void __ha_compiler_barrier(void) +{ + __asm __volatile("" ::: "memory"); +} + +#endif /* USE_THREAD */ + + +/* fallbacks to remap all undefined _HA_ATOMIC_* on to their safe equivalent */ +#ifndef _HA_ATOMIC_BTR +#define _HA_ATOMIC_BTR HA_ATOMIC_BTR +#endif /* !_HA_ATOMIC_BTR */ + +#ifndef _HA_ATOMIC_BTS +#define _HA_ATOMIC_BTS HA_ATOMIC_BTS +#endif /* !_HA_ATOMIC_BTS */ + +#ifndef _HA_ATOMIC_CAS +#define _HA_ATOMIC_CAS HA_ATOMIC_CAS +#endif /* !_HA_ATOMIC_CAS */ + +#ifndef _HA_ATOMIC_DWCAS +#define _HA_ATOMIC_DWCAS HA_ATOMIC_DWCAS +#endif /* !_HA_ATOMIC_CAS */ + +#ifndef _HA_ATOMIC_ADD +#define _HA_ATOMIC_ADD HA_ATOMIC_ADD +#endif /* !_HA_ATOMIC_ADD */ + +#ifndef _HA_ATOMIC_ADD_FETCH +#define _HA_ATOMIC_ADD_FETCH HA_ATOMIC_ADD_FETCH +#endif /* !_HA_ATOMIC_ADD_FETCH */ + +#ifndef _HA_ATOMIC_FETCH_ADD +#define _HA_ATOMIC_FETCH_ADD HA_ATOMIC_FETCH_ADD +#endif /* !_HA_ATOMIC_FETCH_ADD */ + +#ifndef _HA_ATOMIC_SUB +#define _HA_ATOMIC_SUB HA_ATOMIC_SUB +#endif /* !_HA_ATOMIC_SUB */ + +#ifndef _HA_ATOMIC_SUB_FETCH +#define _HA_ATOMIC_SUB_FETCH HA_ATOMIC_SUB_FETCH +#endif /* !_HA_ATOMIC_SUB_FETCH */ + +#ifndef _HA_ATOMIC_FETCH_SUB +#define _HA_ATOMIC_FETCH_SUB HA_ATOMIC_FETCH_SUB +#endif /* !_HA_ATOMIC_FETCH_SUB */ + +#ifndef _HA_ATOMIC_INC +#define _HA_ATOMIC_INC HA_ATOMIC_INC +#endif /* !_HA_ATOMIC_INC */ + +#ifndef _HA_ATOMIC_DEC +#define _HA_ATOMIC_DEC HA_ATOMIC_DEC +#endif /* !_HA_ATOMIC_DEC */ + +#ifndef _HA_ATOMIC_AND +#define _HA_ATOMIC_AND HA_ATOMIC_AND +#endif /* !_HA_ATOMIC_AND */ + +#ifndef _HA_ATOMIC_AND_FETCH +#define _HA_ATOMIC_AND_FETCH HA_ATOMIC_AND_FETCH +#endif /* !_HA_ATOMIC_AND_FETCH */ + +#ifndef _HA_ATOMIC_FETCH_AND +#define _HA_ATOMIC_FETCH_AND HA_ATOMIC_FETCH_AND +#endif /* !_HA_ATOMIC_FETCH_AND */ + +#ifndef _HA_ATOMIC_OR +#define _HA_ATOMIC_OR HA_ATOMIC_OR +#endif /* !_HA_ATOMIC_OR */ + +#ifndef _HA_ATOMIC_OR_FETCH +#define _HA_ATOMIC_OR_FETCH HA_ATOMIC_OR_FETCH +#endif /* !_HA_ATOMIC_OR_FETCH */ + +#ifndef _HA_ATOMIC_FETCH_OR +#define _HA_ATOMIC_FETCH_OR HA_ATOMIC_FETCH_OR +#endif /* !_HA_ATOMIC_FETCH_OR */ + +#ifndef _HA_ATOMIC_XCHG +#define _HA_ATOMIC_XCHG HA_ATOMIC_XCHG +#endif /* !_HA_ATOMIC_XCHG */ + +#ifndef _HA_ATOMIC_STORE +#define _HA_ATOMIC_STORE HA_ATOMIC_STORE +#endif /* !_HA_ATOMIC_STORE */ + +#ifndef _HA_ATOMIC_LOAD +#define _HA_ATOMIC_LOAD HA_ATOMIC_LOAD +#endif /* !_HA_ATOMIC_LOAD */ + +#endif /* _HAPROXY_ATOMIC_H */ diff --git a/include/haproxy/auth-t.h b/include/haproxy/auth-t.h new file mode 100644 index 0000000..35a1ff6 --- /dev/null +++ b/include/haproxy/auth-t.h @@ -0,0 +1,57 @@ +/* + * include/haproxy/auth-t.h + * Types definitions for user authentication & authorization. + * + * Copyright 2010 Krzysztof Piotr Oledzki <ole@ans.pl> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_AUTH_T_H +#define _HAPROXY_AUTH_T_H + +#include <haproxy/api-t.h> + +#define AU_O_INSECURE 0x00000001 /* insecure, unencrypted password */ + +struct auth_groups { + struct auth_groups *next; + char *name; + char *groupusers; /* Just used during the configuration parsing. */ +}; + +struct auth_groups_list { + struct auth_groups_list *next; + struct auth_groups *group; +}; + +struct auth_users { + struct auth_users *next; + unsigned int flags; + char *user, *pass; + union { + char *groups_names; /* Just used during the configuration parsing. */ + struct auth_groups_list *groups; + } u; +}; + +struct userlist { + struct userlist *next; + char *name; + struct auth_users *users; + struct auth_groups *groups; +}; + +#endif /* _HAPROXY_AUTH_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ + diff --git a/include/haproxy/auth.h b/include/haproxy/auth.h new file mode 100644 index 0000000..2fe2b35 --- /dev/null +++ b/include/haproxy/auth.h @@ -0,0 +1,40 @@ +/* + * include/haproxy/auth.h + * Functions for user authentication & authorization. + * + * Copyright 2010 Krzysztof Piotr Oledzki <ole@ans.pl> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_AUTH_H +#define _HAPROXY_AUTH_H + +#include <haproxy/api.h> +#include <haproxy/auth-t.h> +#include <haproxy/pattern-t.h> +#include <haproxy/sample-t.h> + +extern struct userlist *userlist; + +struct userlist *auth_find_userlist(char *name); +unsigned int auth_resolve_groups(struct userlist *l, char *groups); +int userlist_postinit(); +void userlist_free(struct userlist *ul); +struct pattern *pat_match_auth(struct sample *smp, struct pattern_expr *expr, int fill); +int check_user(struct userlist *ul, const char *user, const char *pass); +int check_group(struct userlist *ul, char *name); + +#endif /* _HAPROXY_AUTH_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ + diff --git a/include/haproxy/backend-t.h b/include/haproxy/backend-t.h new file mode 100644 index 0000000..02a2cc5 --- /dev/null +++ b/include/haproxy/backend-t.h @@ -0,0 +1,191 @@ +/* + * include/haproxy/backend-t.h + * This file assembles definitions for backends + * + * Copyright (C) 2000-2012 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_BACKEND_T_H +#define _HAPROXY_BACKEND_T_H + +#include <haproxy/api-t.h> +#include <haproxy/lb_chash-t.h> +#include <haproxy/lb_fas-t.h> +#include <haproxy/lb_fwlc-t.h> +#include <haproxy/lb_fwrr-t.h> +#include <haproxy/lb_map-t.h> +#include <haproxy/server-t.h> +#include <haproxy/thread-t.h> + +/* Parameters for lbprm.algo */ + +/* Lower bits define the kind of load balancing method, which means the type of + * algorithm, and which criterion it is based on. For this reason, those bits + * also include information about dependencies, so that the config parser can + * detect incompatibilities. + */ + +/* LB parameters are on the lower 8 bits. Depends on the LB kind. */ + +/* BE_LB_HASH_* is used with BE_LB_KIND_HI */ +#define BE_LB_HASH_SRC 0x00000000 /* hash source IP */ +#define BE_LB_HASH_URI 0x00000001 /* hash HTTP URI */ +#define BE_LB_HASH_PRM 0x00000002 /* hash HTTP URL parameter */ +#define BE_LB_HASH_HDR 0x00000003 /* hash HTTP header value */ +#define BE_LB_HASH_RDP 0x00000004 /* hash RDP cookie value */ +#define BE_LB_HASH_SMP 0x00000005 /* hash a sample expression */ + +/* BE_LB_RR_* is used with BE_LB_KIND_RR */ +#define BE_LB_RR_DYN 0x00000000 /* dynamic round robin (default) */ +#define BE_LB_RR_STATIC 0x00000001 /* static round robin */ +#define BE_LB_RR_RANDOM 0x00000002 /* random round robin */ + +/* BE_LB_CB_* is used with BE_LB_KIND_CB */ +#define BE_LB_CB_LC 0x00000000 /* least-connections */ +#define BE_LB_CB_FAS 0x00000001 /* first available server (opposite of leastconn) */ + +#define BE_LB_PARM 0x000000FF /* mask to get/clear the LB param */ + +/* Required input(s) */ +#define BE_LB_NEED_NONE 0x00000000 /* no input needed */ +#define BE_LB_NEED_ADDR 0x00000100 /* only source address needed */ +#define BE_LB_NEED_DATA 0x00000200 /* some payload is needed */ +#define BE_LB_NEED_HTTP 0x00000400 /* an HTTP request is needed */ +#define BE_LB_NEED_LOG 0x00000800 /* LOG backend required */ +#define BE_LB_NEED 0x0000FF00 /* mask to get/clear dependencies */ + +/* Algorithm */ +#define BE_LB_KIND_NONE 0x00000000 /* algorithm not set */ +#define BE_LB_KIND_RR 0x00010000 /* round-robin */ +#define BE_LB_KIND_CB 0x00020000 /* connection-based */ +#define BE_LB_KIND_HI 0x00030000 /* hash of input (see hash inputs above) */ +#define BE_LB_KIND 0x00070000 /* mask to get/clear LB algorithm */ + +/* All known variants of load balancing algorithms. These can be cleared using + * the BE_LB_ALGO mask. For a check, using BE_LB_KIND is preferred. + */ +#define BE_LB_ALGO_NONE (BE_LB_KIND_NONE | BE_LB_NEED_NONE) /* not defined */ +#define BE_LB_ALGO_RR (BE_LB_KIND_RR | BE_LB_NEED_NONE) /* round robin */ +#define BE_LB_ALGO_RND (BE_LB_KIND_RR | BE_LB_NEED_NONE | BE_LB_RR_RANDOM) /* random value */ +#define BE_LB_ALGO_LC (BE_LB_KIND_CB | BE_LB_NEED_NONE | BE_LB_CB_LC) /* least connections */ +#define BE_LB_ALGO_FAS (BE_LB_KIND_CB | BE_LB_NEED_NONE | BE_LB_CB_FAS) /* first available server */ +#define BE_LB_ALGO_SRR (BE_LB_KIND_RR | BE_LB_NEED_NONE | BE_LB_RR_STATIC) /* static round robin */ +#define BE_LB_ALGO_SH (BE_LB_KIND_HI | BE_LB_NEED_ADDR | BE_LB_HASH_SRC) /* hash: source IP */ +#define BE_LB_ALGO_UH (BE_LB_KIND_HI | BE_LB_NEED_HTTP | BE_LB_HASH_URI) /* hash: HTTP URI */ +#define BE_LB_ALGO_PH (BE_LB_KIND_HI | BE_LB_NEED_HTTP | BE_LB_HASH_PRM) /* hash: HTTP URL parameter */ +#define BE_LB_ALGO_HH (BE_LB_KIND_HI | BE_LB_NEED_HTTP | BE_LB_HASH_HDR) /* hash: HTTP header value */ +#define BE_LB_ALGO_RCH (BE_LB_KIND_HI | BE_LB_NEED_DATA | BE_LB_HASH_RDP) /* hash: RDP cookie value */ +#define BE_LB_ALGO_SMP (BE_LB_KIND_HI | BE_LB_NEED_DATA | BE_LB_HASH_SMP) /* hash: sample expression */ +#define BE_LB_ALGO_LH (BE_LB_KIND_HI | BE_LB_NEED_LOG | BE_LB_HASH_SMP) /* log hash: sample expression */ +#define BE_LB_ALGO_LS (BE_LB_KIND_CB | BE_LB_NEED_LOG | BE_LB_CB_FAS) /* log sticky */ +#define BE_LB_ALGO (BE_LB_KIND | BE_LB_NEED | BE_LB_PARM ) /* mask to clear algo */ + +/* Higher bits define how a given criterion is mapped to a server. In fact it + * designates the LB function by itself. The dynamic algorithms will also have + * the DYN bit set. These flags are automatically set at the end of the parsing. + */ +#define BE_LB_LKUP_NONE 0x00000000 /* not defined */ +#define BE_LB_LKUP_MAP 0x00100000 /* static map based lookup */ +#define BE_LB_LKUP_RRTREE 0x00200000 /* FWRR tree lookup */ +#define BE_LB_LKUP_LCTREE 0x00300000 /* FWLC tree lookup */ +#define BE_LB_LKUP_CHTREE 0x00400000 /* consistent hash */ +#define BE_LB_LKUP_FSTREE 0x00500000 /* FAS tree lookup */ +#define BE_LB_LKUP 0x00700000 /* mask to get just the LKUP value */ + +/* additional properties */ +#define BE_LB_PROP_DYN 0x00800000 /* bit to indicate a dynamic algorithm */ + +/* hash types */ +#define BE_LB_HASH_MAP 0x00000000 /* map-based hash (default) */ +#define BE_LB_HASH_CONS 0x01000000 /* consistent hashbit to indicate a dynamic algorithm */ +#define BE_LB_HASH_TYPE 0x01000000 /* get/clear hash types */ + +/* additional modifier on top of the hash function (only avalanche right now) */ +#define BE_LB_HMOD_AVAL 0x02000000 /* avalanche modifier */ +#define BE_LB_HASH_MOD 0x02000000 /* get/clear hash modifier */ + +/* BE_LB_HFCN_* is the hash function, to be used with BE_LB_HASH_FUNC */ +#define BE_LB_HFCN_SDBM 0x00000000 /* sdbm hash */ +#define BE_LB_HFCN_DJB2 0x04000000 /* djb2 hash */ +#define BE_LB_HFCN_WT6 0x08000000 /* wt6 hash */ +#define BE_LB_HFCN_CRC32 0x0C000000 /* crc32 hash */ +#define BE_LB_HFCN_NONE 0x10000000 /* none - no hash */ +#define BE_LB_HASH_FUNC 0x1C000000 /* get/clear hash function */ + + +/* various constants */ + +/* The scale factor between user weight and effective weight allows smooth + * weight modulation even with small weights (eg: 1). It should not be too high + * though because it limits the number of servers in FWRR mode in order to + * prevent any integer overflow. The max number of servers per backend is + * limited to about (2^32-1)/256^2/scale ~= 65535.9999/scale. A scale of 16 + * looks like a good value, as it allows 4095 servers per backend while leaving + * modulation steps of about 6% for servers with the lowest weight (1). + */ +#define BE_WEIGHT_SCALE 16 + +/* LB parameters for all algorithms */ +struct lbprm { + union { /* LB parameters depending on the algo type */ + struct lb_map map; + struct lb_fwrr fwrr; + struct lb_fwlc fwlc; + struct lb_chash chash; + struct lb_fas fas; + struct { + struct server **srv; /* array containing in-use log servers */ + struct list avail; /* servers available for lb are registered in this list */ + uint32_t lastid; /* last relative id used */ + } log; /* used in log-balancing context (PR_MODE_SYSLOG backend) */ + }; + uint32_t algo; /* load balancing algorithm and variants: BE_LB_* */ + int tot_wact, tot_wbck; /* total effective weights of active and backup servers */ + int tot_weight; /* total effective weight of servers participating to LB */ + int tot_uweight; /* total user weight of servers participating to LB (for reporting) */ + int tot_used; /* total number of servers used for LB */ + int wmult; /* ratio between user weight and effective weight */ + int wdiv; /* ratio between effective weight and user weight */ + int hash_balance_factor; /* load balancing factor * 100, 0 if disabled */ + struct sample_expr *expr; /* sample expression for "balance hash" */ + char *arg_str; /* name of the URL parameter/header/cookie used for hashing */ + int arg_len; /* strlen(arg_str), computed only once */ + int arg_opt1; /* extra option 1 for the LB algo (algo-specific) */ + int arg_opt2; /* extra option 2 for the LB algo (algo-specific) */ + int arg_opt3; /* extra option 3 for the LB algo (algo-specific) */ + __decl_thread(HA_RWLOCK_T lock); + struct server *fbck; /* first backup server when !PR_O_USE_ALL_BK, or NULL */ + + /* Call backs for some actions. Any of them may be NULL (thus should be ignored). + * Those marked "srvlock" will need to be called with the server lock held. + * The other ones might take it themselves if needed. + */ + void (*update_server_eweight)(struct server *); /* to be called after eweight change // srvlock */ + void (*set_server_status_up)(struct server *); /* to be called after status changes to UP // srvlock */ + void (*set_server_status_down)(struct server *); /* to be called after status changes to DOWN // srvlock */ + void (*server_take_conn)(struct server *); /* to be called when connection is assigned */ + void (*server_drop_conn)(struct server *); /* to be called when connection is dropped */ +}; + +#endif /* _HAPROXY_BACKEND_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/backend.h b/include/haproxy/backend.h new file mode 100644 index 0000000..4ab9170 --- /dev/null +++ b/include/haproxy/backend.h @@ -0,0 +1,158 @@ +/* + * include/haproxy/backend.h + * Functions prototypes for the backend. + * + * Copyright (C) 2000-2012 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_BACKEND_H +#define _HAPROXY_BACKEND_H + +#include <haproxy/api.h> +#include <haproxy/backend-t.h> +#include <haproxy/clock.h> +#include <haproxy/proxy-t.h> +#include <haproxy/server-t.h> +#include <haproxy/stream-t.h> +#include <haproxy/time.h> + +int assign_server(struct stream *s); +int assign_server_address(struct stream *s); +int assign_server_and_queue(struct stream *s); +int alloc_bind_address(struct sockaddr_storage **ss, + struct server *srv, struct proxy *be, + struct stream *s); +int srv_redispatch_connect(struct stream *t); +void back_try_conn_req(struct stream *s); +void back_handle_st_req(struct stream *s); +void back_handle_st_con(struct stream *s); +void back_handle_st_rdy(struct stream *s); +void back_handle_st_cer(struct stream *s); + +const char *backend_lb_algo_str(int algo); +int backend_parse_balance(const char **args, char **err, struct proxy *curproxy); +int tcp_persist_rdp_cookie(struct stream *s, struct channel *req, int an_bit); + +int be_downtime(struct proxy *px); +void recount_servers(struct proxy *px); +void update_backend_weight(struct proxy *px); +int be_lastsession(const struct proxy *be); + +/* Returns number of usable servers in backend */ +static inline int be_usable_srv(struct proxy *be) +{ + if (be->flags & PR_FL_DISABLED) + return 0; + else if (be->srv_act) + return be->srv_act; + else if (be->lbprm.fbck) + return 1; + else + return be->srv_bck; +} + +/* set the time of last session on the backend */ +static inline void be_set_sess_last(struct proxy *be) +{ + be->be_counters.last_sess = ns_to_sec(now_ns); +} + +/* This function returns non-zero if the designated server will be + * usable for LB according to pending weight and state. + * Otherwise it returns 0. + */ +static inline int srv_willbe_usable(const struct server *srv) +{ + enum srv_state state = srv->next_state; + + if (!srv->next_eweight) + return 0; + if (srv->next_admin & SRV_ADMF_MAINT) + return 0; + if (srv->next_admin & SRV_ADMF_DRAIN) + return 0; + switch (state) { + case SRV_ST_STARTING: + case SRV_ST_RUNNING: + return 1; + case SRV_ST_STOPPING: + case SRV_ST_STOPPED: + return 0; + } + return 0; +} + +/* This function returns non-zero if the designated server was usable for LB + * according to its current weight and state. Otherwise it returns 0. + */ +static inline int srv_currently_usable(const struct server *srv) +{ + enum srv_state state = srv->cur_state; + + if (!srv->cur_eweight) + return 0; + if (srv->cur_admin & SRV_ADMF_MAINT) + return 0; + if (srv->cur_admin & SRV_ADMF_DRAIN) + return 0; + switch (state) { + case SRV_ST_STARTING: + case SRV_ST_RUNNING: + return 1; + case SRV_ST_STOPPING: + case SRV_ST_STOPPED: + return 0; + } + return 0; +} + +/* This function commits the next server state and weight onto the current + * ones in order to detect future changes. The server's lock is expected to + * be held when calling this function. + */ +static inline void srv_lb_commit_status(struct server *srv) +{ + srv->cur_state = srv->next_state; + srv->cur_admin = srv->next_admin; + srv->cur_eweight = srv->next_eweight; +} + +/* This function returns true when a server has experienced a change since last + * commit on its state or weight, otherwise zero. + */ +static inline int srv_lb_status_changed(const struct server *srv) +{ + return (srv->next_state != srv->cur_state || + srv->next_admin != srv->cur_admin || + srv->next_eweight != srv->cur_eweight); +} + +/* sends a log message when a backend goes down, and also sets last + * change date. + */ +void set_backend_down(struct proxy *be); + +unsigned int gen_hash(const struct proxy* px, const char* key, unsigned long len); + +#endif /* _HAPROXY_BACKEND_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/base64.h b/include/haproxy/base64.h new file mode 100644 index 0000000..ace6063 --- /dev/null +++ b/include/haproxy/base64.h @@ -0,0 +1,28 @@ +/* + * include/haproxy/base64.h + * Ascii to Base64 conversion as described in RFC1421. + * + * Copyright 2006-2020 Willy Tarreau <w@1wt.eu> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_BASE64_H +#define _HAPROXY_BASE64_H + +#include <haproxy/api.h> + +int a2base64(char *in, int ilen, char *out, int olen); +int a2base64url(const char *in, size_t ilen, char *out, size_t olen); +int base64dec(const char *in, size_t ilen, char *out, size_t olen); +int base64urldec(const char *in, size_t ilen, char *out, size_t olen); +const char *s30tob64(int in, char *out); +int b64tos30(const char *in); + +extern const char base64tab[]; + +#endif /* _HAPROXY_BASE64_H */ diff --git a/include/haproxy/buf-t.h b/include/haproxy/buf-t.h new file mode 100644 index 0000000..3c0f8b5 --- /dev/null +++ b/include/haproxy/buf-t.h @@ -0,0 +1,62 @@ +/* + * include/haproxy/buf-t.h + * Simple buffer handling - types definitions. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _HAPROXY_BUF_T_H +#define _HAPROXY_BUF_T_H + +#include <haproxy/api-t.h> + +/* Structure defining a buffer's head */ +struct buffer { + size_t size; /* buffer size in bytes */ + char *area; /* points to <size> bytes */ + size_t data; /* amount of data after head including wrapping */ + size_t head; /* start offset of remaining data relative to area */ +}; + +/* A buffer may be in 3 different states : + * - unallocated : size == 0, area == 0 (b_is_null() is true) + * - waiting : size == 0, area != 0 (b_is_null() is true) + * - allocated : size > 0, area > 0 (b_is_null() is false) + */ + +/* initializers for certain buffer states. It is important that the NULL buffer + * remains the one with all fields initialized to zero so that a calloc() or a + * memset() on a struct automatically sets a NULL buffer. + */ +#define BUF_NULL ((struct buffer){ }) +#define BUF_WANTED ((struct buffer){ .area = (char *)1 }) +#define BUF_RING ((struct buffer){ .area = (char *)2 }) + +#endif /* _HAPROXY_BUF_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/buf.h b/include/haproxy/buf.h new file mode 100644 index 0000000..e98161e --- /dev/null +++ b/include/haproxy/buf.h @@ -0,0 +1,1161 @@ +/* + * include/haproxy/buf.h + * Simple buffer handling - functions definitions. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _HAPROXY_BUF_H +#define _HAPROXY_BUF_H + +#include <sys/types.h> +#include <string.h> +#include <haproxy/api.h> +#include <haproxy/buf-t.h> + +/***************************************************************************/ +/* Functions used to compute offsets and pointers. Most of them exist in */ +/* both wrapping-safe and unchecked ("__" prefix) variants. Some returning */ +/* a pointer are also provided with an "_ofs" suffix when they return an */ +/* offset relative to the storage area. */ +/***************************************************************************/ + +/* b_is_null() : returns true if (and only if) the buffer is not yet allocated + * and thus has an empty size. Its pointer may then be anything, including NULL + * (unallocated) or an invalid pointer such as (char*)1 (allocation pending). + */ +static inline int b_is_null(const struct buffer *buf) +{ + return buf->size == 0; +} + +/* b_orig() : returns the pointer to the origin of the storage, which is the + * location of byte at offset zero. This is mostly used by functions which + * handle the wrapping by themselves. + */ +static inline char *b_orig(const struct buffer *b) +{ + return b->area; +} + +/* b_size() : returns the size of the buffer. */ +static inline size_t b_size(const struct buffer *b) +{ + return b->size; +} + +/* b_wrap() : returns the pointer to the wrapping position of the buffer area, + * which is by definition the first byte not part of the buffer. + */ +static inline char *b_wrap(const struct buffer *b) +{ + return b->area + b->size; +} + +/* b_data() : returns the number of bytes present in the buffer. */ +static inline size_t b_data(const struct buffer *b) +{ + return b->data; +} + +/* b_room() : returns the amount of room left in the buffer */ +static inline size_t b_room(const struct buffer *b) +{ + BUG_ON_HOT(b->data > b->size); + return b->size - b_data(b); +} + +/* b_full() : returns true if the buffer is full. */ +static inline size_t b_full(const struct buffer *b) +{ + return !b_room(b); +} + + +/* b_stop() : returns the pointer to the byte following the end of the buffer, + * which may be out of the buffer if the buffer ends on the last byte of the + * area. + */ +static inline size_t __b_stop_ofs(const struct buffer *b) +{ + return b->head + b->data; +} + +static inline const char *__b_stop(const struct buffer *b) +{ + return b_orig(b) + __b_stop_ofs(b); +} + +static inline size_t b_stop_ofs(const struct buffer *b) +{ + size_t stop = __b_stop_ofs(b); + + if (stop > b->size) + stop -= b->size; + return stop; +} + +static inline const char *b_stop(const struct buffer *b) +{ + return b_orig(b) + b_stop_ofs(b); +} + + +/* b_peek() : returns a pointer to the data at position <ofs> relative to the + * head of the buffer. Will typically point to input data if called with the + * amount of output data. The wrapped versions will only support wrapping once + * before the beginning or after the end. + */ +static inline size_t __b_peek_ofs(const struct buffer *b, size_t ofs) +{ + return b->head + ofs; +} + +static inline char *__b_peek(const struct buffer *b, size_t ofs) +{ + return b_orig(b) + __b_peek_ofs(b, ofs); +} + +static inline size_t b_peek_ofs(const struct buffer *b, size_t ofs) +{ + size_t ret = __b_peek_ofs(b, ofs); + + if (ret >= b->size) + ret -= b->size; + + return ret; +} + +static inline char *b_peek(const struct buffer *b, size_t ofs) +{ + return b_orig(b) + b_peek_ofs(b, ofs); +} + + +/* b_head() : returns the pointer to the buffer's head, which is the location + * of the next byte to be dequeued. Note that for buffers of size zero, the + * returned pointer may be outside of the buffer or even invalid. + */ +static inline size_t __b_head_ofs(const struct buffer *b) +{ + return b->head; +} + +static inline char *__b_head(const struct buffer *b) +{ + return b_orig(b) + __b_head_ofs(b); +} + +static inline size_t b_head_ofs(const struct buffer *b) +{ + return __b_head_ofs(b); +} + +static inline char *b_head(const struct buffer *b) +{ + return __b_head(b); +} + + +/* b_tail() : returns the pointer to the tail of the buffer, which is the + * location of the first byte where it is possible to enqueue new data. Note + * that for buffers of size zero, the returned pointer may be outside of the + * buffer or even invalid. + */ +static inline size_t __b_tail_ofs(const struct buffer *b) +{ + return __b_peek_ofs(b, b_data(b)); +} + +static inline char *__b_tail(const struct buffer *b) +{ + return __b_peek(b, b_data(b)); +} + +static inline size_t b_tail_ofs(const struct buffer *b) +{ + return b_peek_ofs(b, b_data(b)); +} + +static inline char *b_tail(const struct buffer *b) +{ + return b_peek(b, b_data(b)); +} + + +/* b_next() : for an absolute pointer <p> or a relative offset <o> pointing to + * a valid location within buffer <b>, returns either the absolute pointer or + * the relative offset pointing to the next byte, which usually is at (p + 1) + * unless p reaches the wrapping point and wrapping is needed. + */ +static inline size_t b_next_ofs(const struct buffer *b, size_t o) +{ + o++; + BUG_ON_HOT(o > b->size); + if (o == b->size) + o = 0; + return o; +} + +static inline char *b_next(const struct buffer *b, const char *p) +{ + p++; + BUG_ON_HOT(p > b_wrap(b)); + if (p == b_wrap(b)) + p = b_orig(b); + return (char *)p; +} + +/* b_dist() : returns the distance between two pointers, taking into account + * the ability to wrap around the buffer's end. The operation is not defined if + * either of the pointers does not belong to the buffer or if their distance is + * greater than the buffer's size. + */ +static inline size_t b_dist(const struct buffer *b, const char *from, const char *to) +{ + ssize_t dist = to - from; + + BUG_ON_HOT((dist > 0 && dist > b_size(b)) || (dist < 0 && -dist > b_size(b))); + dist += dist < 0 ? b_size(b) : 0; + return dist; +} + +/* b_almost_full() : returns 1 if the buffer uses at least 3/4 of its capacity, + * otherwise zero. Buffers of size zero are considered full. + */ +static inline int b_almost_full(const struct buffer *b) +{ + BUG_ON_HOT(b->data > b->size); + return b_data(b) >= b_size(b) * 3 / 4; +} + +/* b_space_wraps() : returns non-zero only if the buffer's free space wraps : + * [ |xxxx| ] => yes + * [xxxx| ] => no + * [ |xxxx] => no + * [xxxx| |xxxx] => no + * [xxxxxxxxxx|xxxxxxxxxxx] => no + * + * So the only case where the buffer does not wrap is when there's data either + * at the beginning or at the end of the buffer. Thus we have this : + * - if (head <= 0) ==> doesn't wrap + * - if (tail >= size) ==> doesn't wrap + * - otherwise wraps + */ +static inline int b_space_wraps(const struct buffer *b) +{ + BUG_ON_HOT(b->data > b->size); + if ((ssize_t)__b_head_ofs(b) <= 0) + return 0; + if (__b_tail_ofs(b) >= b_size(b)) + return 0; + return 1; +} + +/* b_contig_data() : returns the amount of data that can contiguously be read + * at once starting from a relative offset <start> (which allows to easily + * pre-compute blocks for memcpy). The start point will typically contain the + * amount of past data already returned by a previous call to this function. + */ +static inline size_t b_contig_data(const struct buffer *b, size_t start) +{ + size_t data = b_wrap(b) - b_peek(b, start); + size_t limit = b_data(b) - start; + + if (data > limit) + data = limit; + return data; +} + +/* b_contig_space() : returns the amount of bytes that can be appended to the + * buffer at once. We have 8 possible cases : + * + * [____________________] return size + * [______|_____________] return size - tail_ofs + * [XXXXXX|_____________] return size - tail_ofs + * [___|XXXXXX|_________] return size - tail_ofs + * [______________XXXXXX] return head_ofs + * [XXXX|___________|XXX] return head_ofs - tail_ofs + * [XXXXXXXXXX|XXXXXXXXX] return 0 + * [XXXXXXXXXXXXXXXXXXXX] return 0 + */ +static inline size_t b_contig_space(const struct buffer *b) +{ + size_t left, right; + + BUG_ON_HOT(b->data > b->size); + + right = b_head_ofs(b); + left = right + b_data(b); + + left = b_size(b) - left; + if ((ssize_t)left <= 0) + left += right; + return left; +} + +/* b_getblk() : gets one full block of data at once from a buffer, starting + * from offset <offset> after the buffer's head, and limited to no more than + * <len> bytes. The caller is responsible for ensuring that neither <offset> + * nor <offset>+<len> exceed the total number of bytes available in the buffer. + * Return values : + * >0 : number of bytes read, equal to requested size. + * =0 : not enough data available. <blk> is left undefined. + * The buffer is left unaffected. + */ +static inline size_t b_getblk(const struct buffer *buf, char *blk, size_t len, size_t offset) +{ + size_t firstblock; + + BUG_ON(buf->data > buf->size); + BUG_ON(offset > buf->data); + BUG_ON(offset + len > buf->data); + + if (len + offset > b_data(buf)) + return 0; + + firstblock = b_wrap(buf) - b_head(buf); + if (firstblock > offset) { + if (firstblock >= len + offset) { + memcpy(blk, b_head(buf) + offset, len); + return len; + } + + memcpy(blk, b_head(buf) + offset, firstblock - offset); + memcpy(blk + firstblock - offset, b_orig(buf), len - firstblock + offset); + return len; + } + + memcpy(blk, b_orig(buf) + offset - firstblock, len); + return len; +} + +/* b_getblk_nc() : gets one or two blocks of data at once from a buffer, + * starting from offset <ofs> after the beginning of its output, and limited to + * no more than <max> bytes. The caller is responsible for ensuring that + * neither <ofs> nor <ofs>+<max> exceed the total number of bytes available in + * the buffer. Return values : + * >0 : number of blocks filled (1 or 2). blk1 is always filled before blk2. + * =0 : not enough data available. <blk*> are left undefined. + * The buffer is left unaffected. Unused buffers are left in an undefined state. + */ +static inline size_t b_getblk_nc(const struct buffer *buf, const char **blk1, size_t *len1, const char **blk2, size_t *len2, size_t ofs, size_t max) +{ + size_t l1; + + BUG_ON_HOT(buf->data > buf->size); + BUG_ON_HOT(ofs > buf->data); + BUG_ON_HOT(ofs + max > buf->data); + + if (!max) + return 0; + + *blk1 = b_peek(buf, ofs); + l1 = b_wrap(buf) - *blk1; + if (l1 < max) { + *len1 = l1; + *len2 = max - l1; + *blk2 = b_orig(buf); + return 2; + } + *len1 = max; + return 1; +} + + +/*********************************************/ +/* Functions used to modify the buffer state */ +/*********************************************/ + +/* b_reset() : resets a buffer. The size is not touched. */ +static inline void b_reset(struct buffer *b) +{ + b->head = 0; + b->data = 0; +} + +/* b_make() : make a buffer from all parameters */ +static inline struct buffer b_make(char *area, size_t size, size_t head, size_t data) +{ + struct buffer b; + + b.area = area; + b.size = size; + b.head = head; + b.data = data; + return b; +} + +/* b_sub() : decreases the buffer length by <count> */ +static inline void b_sub(struct buffer *b, size_t count) +{ + BUG_ON_HOT(b->data < count); + b->data -= count; +} + +/* b_add() : increase the buffer length by <count> */ +static inline void b_add(struct buffer *b, size_t count) +{ + BUG_ON_HOT(b->data + count > b->size); + b->data += count; +} + +/* b_set_data() : sets the buffer's length */ +static inline void b_set_data(struct buffer *b, size_t len) +{ + BUG_ON_HOT(len > b->size); + b->data = len; +} + +/* b_del() : skips <del> bytes in a buffer <b>. Covers both the output and the + * input parts so it's up to the caller to know where it plays and that <del> + * is always smaller than the amount of data in the buffer. + */ +static inline void b_del(struct buffer *b, size_t del) +{ + BUG_ON_HOT(b->data < del); + b->data -= del; + b->head += del; + if (b->head >= b->size) + b->head -= b->size; +} + +/* b_realign_if_empty() : realigns a buffer if it's empty */ +static inline void b_realign_if_empty(struct buffer *b) +{ + if (!b_data(b)) + b->head = 0; +} + +/* b_slow_realign() : this function realigns a possibly wrapping buffer so that + * the part remaining to be parsed is contiguous and starts at the beginning of + * the buffer and the already parsed output part ends at the end of the buffer. + * This provides the best conditions since it allows the largest inputs to be + * processed at once and ensures that once the output data leaves, the whole + * buffer is available at once. The number of output bytes supposedly present + * at the beginning of the buffer and which need to be moved to the end must be + * passed in <output>. A temporary swap area at least as large as b->size must + * be provided in <swap>. It's up to the caller to ensure <output> is no larger + * than the difference between the whole buffer's length and its input. + */ +static inline void b_slow_realign(struct buffer *b, char *swap, size_t output) +{ + size_t block1 = output; + size_t block2 = 0; + + BUG_ON_HOT(b->data > b->size); + + /* process output data in two steps to cover wrapping */ + if (block1 > b_size(b) - b_head_ofs(b)) { + block2 = b_peek_ofs(b, block1); + block1 -= block2; + } + memcpy(swap + b_size(b) - output, b_head(b), block1); + memcpy(swap + b_size(b) - block2, b_orig(b), block2); + + /* process input data in two steps to cover wrapping */ + block1 = b_data(b) - output; + block2 = 0; + + if (block1 > b_tail_ofs(b)) { + block2 = b_tail_ofs(b); + block1 = block1 - block2; + } + memcpy(swap, b_peek(b, output), block1); + memcpy(swap + block1, b_orig(b), block2); + + /* reinject changes into the buffer */ + memcpy(b_orig(b), swap, b_data(b) - output); + memcpy(b_wrap(b) - output, swap + b_size(b) - output, output); + + b->head = (output ? b_size(b) - output : 0); +} + +/* b_slow_realign_ofs() : this function realigns a possibly wrapping buffer + * setting its new head at <ofs>. Depending of the <ofs> value, the resulting + * buffer may also wrap. A temporary swap area at least as large as b->size must + * be provided in <swap>. It's up to the caller to ensuze <ofs> is not larger + * than b->size. + */ +static inline void b_slow_realign_ofs(struct buffer *b, char *swap, size_t ofs) +{ + size_t block1 = b_data(b); + size_t block2 = 0; + + BUG_ON_HOT(b->data > b->size); + BUG_ON_HOT(ofs > b->size); + + if (__b_tail_ofs(b) >= b_size(b)) { + block2 = b_tail_ofs(b); + block1 -= block2; + } + memcpy(swap, b_head(b), block1); + memcpy(swap + block1, b_orig(b), block2); + + block1 = b_data(b); + block2 = 0; + if (block1 > b_size(b) - ofs) { + block1 = b_size(b) - ofs; + block2 = b_data(b) - block1; + } + memcpy(b_orig(b) + ofs, swap, block1); + memcpy(b_orig(b), swap + block1, block2); + + b->head = ofs; +} + + +/* b_putchar() : tries to append char <c> at the end of buffer <b>. Supports + * wrapping. Data are truncated if buffer is full. + */ +static inline void b_putchr(struct buffer *b, char c) +{ + if (b_full(b)) + return; + *b_tail(b) = c; + b->data++; +} + +/* __b_putblk() : tries to append <len> bytes from block <blk> to the end of + * buffer <b> without checking for free space (it's up to the caller to do it). + * Supports wrapping. It must not be called with len == 0. + */ +static inline void __b_putblk(struct buffer *b, const char *blk, size_t len) +{ + size_t half = b_contig_space(b); + + BUG_ON(b_data(b) + len > b_size(b)); + + if (half > len) + half = len; + + memcpy(b_tail(b), blk, half); + + if (len > half) + memcpy(b_peek(b, b_data(b) + half), blk + half, len - half); + b->data += len; +} + +/* b_putblk() : tries to append block <blk> at the end of buffer <b>. Supports + * wrapping. Data are truncated if buffer is too short. It returns the number + * of bytes copied. + */ +static inline size_t b_putblk(struct buffer *b, const char *blk, size_t len) +{ + if (len > b_room(b)) + len = b_room(b); + if (len) + __b_putblk(b, blk, len); + return len; +} + +/* b_xfer() : transfers at most <count> bytes from buffer <src> to buffer <dst> + * and returns the number of bytes copied. The bytes are removed from <src> and + * added to <dst>. The caller is responsible for ensuring that <count> is not + * larger than b_room(dst). Whenever possible (if the destination is empty and + * at least as much as the source was requested), the buffers are simply + * swapped instead of copied. + */ +static inline size_t b_xfer(struct buffer *dst, struct buffer *src, size_t count) +{ + size_t ret, block1, block2; + + ret = 0; + if (!count) + goto leave; + + ret = b_data(src); + if (!ret) + goto leave; + + if (ret > count) + ret = count; + else if (!b_data(dst)) { + /* zero copy is possible by just swapping buffers */ + struct buffer tmp = *dst; + *dst = *src; + *src = tmp; + goto leave; + } + + block1 = b_contig_data(src, 0); + if (block1 > ret) + block1 = ret; + block2 = ret - block1; + + if (block1) + __b_putblk(dst, b_head(src), block1); + + if (block2) + __b_putblk(dst, b_peek(src, block1), block2); + + b_del(src, ret); + leave: + return ret; +} + +/* b_ncat() : Copy <count> from <src> buffer at the end of <dst> buffer. + * The caller is responsible for ensuring that <count> is not larger than + * b_room(dst). + * Returns the number of bytes copied. + */ +static inline size_t b_ncat(struct buffer *dst, struct buffer *src, size_t count) +{ + size_t ret, block1, block2; + + ret = 0; + if (!count) + goto leave; + + ret = b_data(src); + if (!ret) + goto leave; + + if (ret > count) + ret = count; + block1 = b_contig_data(src, 0); + if (block1 > ret) + block1 = ret; + block2 = ret - block1; + + if (block1) + __b_putblk(dst, b_head(src), block1); + + if (block2) + __b_putblk(dst, b_peek(src, block1), block2); + + leave: + return ret; +} + +/* b_force_xfer() : same as b_xfer() but without zero copy. + * The caller is responsible for ensuring that <count> is not + * larger than b_room(dst). + */ +static inline size_t b_force_xfer(struct buffer *dst, struct buffer *src, size_t count) +{ + size_t ret; + + ret = b_ncat(dst, src, count); + b_del(src, ret); + + return ret; +} + + +/* Moves <len> bytes from absolute position <src> of buffer <b> by <shift> + * bytes, while supporting wrapping of both the source and the destination. + * The position is relative to the buffer's origin and may overlap with the + * target position. The <shift>'s absolute value must be strictly lower than + * the buffer's size. The main purpose is to aggregate data block during + * parsing while removing unused delimiters. The buffer's length is not + * modified, and the caller must take care of size adjustments and holes by + * itself. + */ +static inline void b_move(const struct buffer *b, size_t src, size_t len, ssize_t shift) +{ + char *orig = b_orig(b); + size_t size = b_size(b); + size_t dst = src + size + shift; + size_t cnt; + + BUG_ON(len > size); + + if (dst >= size) + dst -= size; + + if (shift < 0) { + BUG_ON(-shift >= size); + /* copy from left to right */ + for (; (cnt = len); len -= cnt) { + if (cnt > size - src) + cnt = size - src; + if (cnt > size - dst) + cnt = size - dst; + + memmove(orig + dst, orig + src, cnt); + dst += cnt; + src += cnt; + if (dst >= size) + dst -= size; + if (src >= size) + src -= size; + } + } + else if (shift > 0) { + BUG_ON(shift >= size); + /* copy from right to left */ + for (; (cnt = len); len -= cnt) { + size_t src_end = src + len; + size_t dst_end = dst + len; + + if (dst_end > size) + dst_end -= size; + if (src_end > size) + src_end -= size; + + if (cnt > dst_end) + cnt = dst_end; + if (cnt > src_end) + cnt = src_end; + + memmove(orig + dst_end - cnt, orig + src_end - cnt, cnt); + } + } +} + +/* b_rep_blk() : writes the block <blk> at position <pos> which must be in + * buffer <b>, and moves the part between <end> and the buffer's tail just + * after the end of the copy of <blk>. This effectively replaces the part + * located between <pos> and <end> with a copy of <blk> of length <len>. The + * buffer's length is automatically updated. This is used to replace a block + * with another one inside a buffer. The shift value (positive or negative) is + * returned. If there's no space left, the move is not done. If <len> is null, + * the <blk> pointer is allowed to be null, in order to erase a block. + */ +static inline int b_rep_blk(struct buffer *b, char *pos, char *end, const char *blk, size_t len) +{ + int delta; + + BUG_ON(pos < b->area || pos >= b->area + b->size); + + delta = len - (end - pos); + + if (__b_tail(b) + delta > b_wrap(b)) + return 0; /* no space left */ + + if (b_data(b) && + b_tail(b) + delta > b_head(b) && + b_head(b) >= b_tail(b)) + return 0; /* no space left before wrapping data */ + + /* first, protect the end of the buffer */ + memmove(end + delta, end, b_tail(b) - end); + + /* now, copy blk over pos */ + if (len) + memcpy(pos, blk, len); + + b_add(b, delta); + b_realign_if_empty(b); + + return delta; +} + +/* b_insert_blk(): inserts the block <blk> at the absolute offset <off> moving + * data between this offset and the buffer's tail just after the end of the copy + * of <blk>. The buffer's length is automatically updated. It Supports + * wrapping. If there are not enough space to perform the copy, 0 is + * returned. Otherwise, the number of bytes copied is returned +*/ +static inline int b_insert_blk(struct buffer *b, size_t off, const char *blk, size_t len) +{ + size_t pos; + + if (!len || len > b_room(b)) + return 0; /* nothing to copy or not enough space left */ + + pos = b_peek_ofs(b, off); + if (pos == b_tail_ofs(b)) + __b_putblk(b, blk, len); + else { + size_t delta = b_data(b) - off; + + /* first, protect the end of the buffer */ + b_move(b, pos, delta, len); + + /* change the amount of data in the buffer during the copy */ + b_sub(b, delta); + __b_putblk(b, blk, len); + b_add(b, delta); + } + return len; +} + +/* __b_put_varint(): encode 64-bit value <v> as a varint into buffer <b>. The + * caller must have checked that the encoded value fits in the buffer so that + * there are no length checks. Wrapping is supported. You don't want to use + * this function but b_put_varint() instead. + */ +static inline void __b_put_varint(struct buffer *b, uint64_t v) +{ + size_t data = b->data; + size_t size = b_size(b); + char *wrap = b_wrap(b); + char *tail = b_tail(b); + + BUG_ON_HOT(data >= size); + + if (v >= 0xF0) { + /* more than one byte, first write the 4 least significant + * bits, then follow with 7 bits per byte. + */ + *tail = v | 0xF0; + v = (v - 0xF0) >> 4; + + while (1) { + if (++tail == wrap) + tail -= size; + data++; + if (v < 0x80) + break; + *tail = v | 0x80; + v = (v - 0x80) >> 7; + } + } + + /* last byte */ + *tail = v; + BUG_ON_HOT(data >= size); + data++; + b->data = data; +} + +/* b_put_varint(): try to encode value <v> as a varint into buffer <b>. Returns + * the number of bytes written in case of success, or 0 if there is not enough + * room. Wrapping is supported. No partial writes will be performed. + */ +static inline int b_put_varint(struct buffer *b, uint64_t v) +{ + size_t data = b->data; + size_t size = b_size(b); + char *wrap = b_wrap(b); + char *tail = b_tail(b); + + if (data != size && v >= 0xF0) { + BUG_ON_HOT(data > size); + + /* more than one byte, first write the 4 least significant + * bits, then follow with 7 bits per byte. + */ + *tail = v | 0xF0; + v = (v - 0xF0) >> 4; + + while (1) { + if (++tail == wrap) + tail -= size; + data++; + if (data == size || v < 0x80) + break; + *tail = v | 0x80; + v = (v - 0x80) >> 7; + } + } + + /* last byte */ + if (data == size) + return 0; + + *tail = v; + data++; + + size = data - b->data; + b->data = data; + return size; +} + +/* b_get_varint(): try to decode a varint from buffer <b> into value <vptr>. + * Returns the number of bytes read in case of success, or 0 if there were not + * enough bytes. Wrapping is supported. No partial reads will be performed. + */ +static inline int b_get_varint(struct buffer *b, uint64_t *vptr) +{ + const uint8_t *head = (const uint8_t *)b_head(b); + const uint8_t *wrap = (const uint8_t *)b_wrap(b); + size_t data = b->data; + size_t size = b_size(b); + uint64_t v = 0; + int bits = 0; + + if (data != 0 && (*head >= 0xF0)) { + v = *head; + bits += 4; + while (1) { + if (++head == wrap) + head -= size; + data--; + if (!data || !(*head & 0x80)) + break; + v += (uint64_t)*head << bits; + bits += 7; + } + } + + /* last byte */ + if (!data) + return 0; + + v += (uint64_t)*head << bits; + *vptr = v; + data--; + size = b->data - data; + b_del(b, size); + return size; +} + +/* b_peek_varint(): try to decode a varint from buffer <b> at offset <ofs> + * relative to head, into value <vptr>. Returns the number of bytes parsed in + * case of success, or 0 if there were not enough bytes, in which case the + * contents of <vptr> are not updated. Wrapping is supported. The buffer's head + * will NOT be updated. It is illegal to call this function with <ofs> greater + * than b->data. + */ +static inline int b_peek_varint(struct buffer *b, size_t ofs, uint64_t *vptr) +{ + const uint8_t *head = (const uint8_t *)b_peek(b, ofs); + const uint8_t *wrap = (const uint8_t *)b_wrap(b); + size_t data = b_data(b) - ofs; + size_t size = b_size(b); + uint64_t v = 0; + int bits = 0; + + BUG_ON_HOT(ofs > b_data(b)); + + if (data != 0 && (*head >= 0xF0)) { + v = *head; + bits += 4; + while (1) { + if (++head == wrap) + head -= size; + data--; + if (!data || !(*head & 0x80)) + break; + v += (uint64_t)*head << bits; + bits += 7; + } + } + + /* last byte */ + if (!data) + return 0; + + v += (uint64_t)*head << bits; + *vptr = v; + data--; + size = b->data - ofs - data; + return size; +} + + +/* + * Buffer ring management. + * + * A buffer ring is a circular list of buffers, with a head buffer (the oldest, + * being read from) and a tail (the newest, being written to). Such a ring is + * declared as an array of buffers. The first element in the array is the root + * and is used differently. It stores the following elements : + * - size : number of allocated elements in the array, including the root + * - area : magic value BUF_RING (just to help debugging) + * - head : position of the head in the array (starts at one) + * - data : position of the tail in the array (starts at one). + * + * Note that contrary to a linear buffer, head and tail may be equal with room + * available, since the producer is expected to fill the tail. Also, the tail + * might pretty much be equal to BUF_WANTED if an allocation is pending, in + * which case it's illegal to try to allocate past this point (only one entry + * may be subscribed for allocation). It is illegal to allocate a buffer after + * an empty one, so that BUF_NULL is always the last buffer. It is also illegal + * to remove elements without freeing the buffers. Buffers between <tail> and + * <head> are in an undefined state, but <tail> and <head> are always valid. + * A ring may not contain less than 2 elements, since the root is mandatory, + * and at least one entry is required to always present a valid buffer. + * + * Given that buffers are 16- or 32- bytes long, it's convenient to set the + * size of the array to 2^N in order to keep (2^N)-1 elements, totalizing + * 2^N*16(or 32) bytes. For example on a 64-bit system, a ring of 31 usable + * buffers takes 1024 bytes. + */ + +/* Initialization of a ring, the size argument contains the number of allocated + * elements, including the root. There must always be at least 2 elements, one + * for the root and one for storage. + */ +static inline void br_init(struct buffer *r, size_t size) +{ + BUG_ON(size < 2); + + r->size = size; + r->area = BUF_RING.area; + r->head = r->data = 1; + r[1] = BUF_NULL; +} + +/* Returns number of elements in the ring, root included */ +static inline unsigned int br_size(const struct buffer *r) +{ + BUG_ON_HOT(r->area != BUF_RING.area); + + return r->size; +} + +/* Returns true if no more buffers may be added */ +static inline unsigned int br_full(const struct buffer *r) +{ + BUG_ON_HOT(r->area != BUF_RING.area); + + return r->data + 1 == r->head || r->data + 1 == r->head - 1 + r->size; +} + +/* Returns the number of buffers present */ +static inline unsigned int br_count(const struct buffer *r) +{ + BUG_ON_HOT(r->area != BUF_RING.area); + + if (r->data >= r->head) + return r->data - r->head + 1; + else + return r->data + r->size - r->head; +} + +/* Returns true if a single buffer is assigned */ +static inline unsigned int br_single(const struct buffer *r) +{ + BUG_ON_HOT(r->area != BUF_RING.area); + + return r->data == r->head; +} + +/* Returns the index of the ring's head buffer */ +static inline unsigned int br_head_idx(const struct buffer *r) +{ + BUG_ON_HOT(r->area != BUF_RING.area); + + return r->head; +} + +/* Returns the index of the ring's tail buffer */ +static inline unsigned int br_tail_idx(const struct buffer *r) +{ + BUG_ON_HOT(r->area != BUF_RING.area); + + return r->data; +} + +/* Returns a pointer to the ring's head buffer */ +static inline struct buffer *br_head(struct buffer *r) +{ + BUG_ON_HOT(r->area != BUF_RING.area); + + return r + br_head_idx(r); +} + +/* Returns a pointer to the ring's tail buffer */ +static inline struct buffer *br_tail(struct buffer *r) +{ + BUG_ON_HOT(r->area != BUF_RING.area); + + return r + br_tail_idx(r); +} + +/* Returns the amount of data of the ring's HEAD buffer */ +static inline unsigned int br_data(const struct buffer *r) +{ + BUG_ON_HOT(r->area != BUF_RING.area); + + return b_data(r + br_head_idx(r)); +} + +/* Returns non-zero if the ring is non-full or its tail has some room */ +static inline unsigned int br_has_room(const struct buffer *r) +{ + BUG_ON_HOT(r->area != BUF_RING.area); + + if (!br_full(r)) + return 1; + return b_room(r + br_tail_idx(r)); +} + +/* Advances the ring's tail if it points to a non-empty buffer, and returns the + * buffer, or NULL if the ring is full or the tail buffer is already empty. A + * new buffer is initialized to BUF_NULL before being returned. This is to be + * used after failing to append data, in order to decide to retry or not. + */ +static inline struct buffer *br_tail_add(struct buffer *r) +{ + struct buffer *b; + + BUG_ON_HOT(r->area != BUF_RING.area); + + b = br_tail(r); + if (!b_size(b)) + return NULL; + + if (br_full(r)) + return NULL; + + r->data++; + if (r->data >= r->size) + r->data = 1; + + b = br_tail(r); + *b = BUF_NULL; + return b; +} + +/* Extracts the ring's head buffer and returns it. The last buffer (tail) is + * never removed but it is returned. This guarantees that we stop on BUF_WANTED + * or BUF_EMPTY and that at the end a valid buffer remains present. This is + * used for pre-extraction during a free() loop for example. The caller is + * expected to detect the end (e.g. using bsize() since b_free() voids the + * buffer). + */ +static inline struct buffer *br_head_pick(struct buffer *r) +{ + struct buffer *b; + + BUG_ON_HOT(r->area != BUF_RING.area); + + b = br_head(r); + if (r->head != r->data) { + r->head++; + if (r->head >= r->size) + r->head = 1; + } + return b; +} + +/* Advances the ring's head and returns the next buffer, unless it's already + * the tail, in which case the tail itself is returned. This is used for post- + * parsing deletion. The caller is expected to detect the end (e.g. a parser + * will typically purge the head before proceeding). + */ +static inline struct buffer *br_del_head(struct buffer *r) +{ + BUG_ON_HOT(r->area != BUF_RING.area); + + if (r->head != r->data) { + r->head++; + if (r->head >= r->size) + r->head = 1; + } + return br_head(r); +} + +#endif /* _HAPROXY_BUF_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/bug.h b/include/haproxy/bug.h new file mode 100644 index 0000000..1356acf --- /dev/null +++ b/include/haproxy/bug.h @@ -0,0 +1,479 @@ +/* + * include/haproxy/bug.h + * Assertions and instant crash macros needed everywhere. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _HAPROXY_BUG_H +#define _HAPROXY_BUG_H + +#include <haproxy/atomic.h> +#include <haproxy/compiler.h> + +/* quick debugging hack, should really be removed ASAP */ +#ifdef DEBUG_FULL +#define DPRINTF(x...) fprintf(x) +#else +#define DPRINTF(x...) +#endif + +#define DUMP_TRACE() do { extern void ha_backtrace_to_stderr(void); ha_backtrace_to_stderr(); } while (0) + +/* First, let's try to handle some arch-specific crashing methods. We prefer + * the macro to the function because when opening the core, the debugger will + * directly show the calling point (e.g. the BUG_ON() condition) based on the + * line number, while the function will create new line numbers. But the + * function is needed e.g. if some pragmas are needed. + */ + +#if defined(__i386__) || defined(__x86_64__) +#define ha_crash_now() do { \ + /* ud2 opcode: 2 bytes, raises illegal instruction */ \ + __asm__ volatile(".byte 0x0f,0x0b\n"); \ + DO_NOT_FOLD(); \ + my_unreachable(); \ + } while (0) + +#elif defined(__aarch64__) +#define ha_crash_now() do { \ + /* udf#imm16: 4 bytes (), raises illegal instruction */ \ + __asm__ volatile(".byte 0x00,0x00,0x00,0x00\n"); \ + DO_NOT_FOLD(); \ + my_unreachable(); \ + } while (0) + +#else // not x86 + +/* generic implementation, causes a segfault */ +static inline __attribute((always_inline)) void ha_crash_now(void) +{ +#if __GNUC_PREREQ__(5, 0) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" +#if __GNUC_PREREQ__(6, 0) +#pragma GCC diagnostic ignored "-Wnull-dereference" +#endif +#endif + *(volatile char *)1 = 0; +#if __GNUC_PREREQ__(5, 0) +#pragma GCC diagnostic pop +#endif + DO_NOT_FOLD(); + my_unreachable(); +} + +#endif // end of arch-specific ha_crash_now() definitions + +#ifdef DEBUG_USE_ABORT +/* abort() is better recognized by code analysis tools */ + +/* abort() is generally tagged noreturn, so there's no 100% safe way to prevent + * the compiler from doing a tail-merge here. Tests show that stopping folding + * just before calling abort() does work in practice at -O2, increasing the + * number of abort() calls in h3.o from 18 to 26, probably because there's no + * more savings to be made by replacing a call with a jump. However, as -Os it + * drops to 5 regardless of the build option. In order to help here, instead we + * wrap abort() into another function, with the line number stored into a local + * variable on the stack and we pretend to use it, so that unwinding the stack + * from abort() will reveal its value even if the call was folded. + */ +static __attribute__((noinline,noreturn,unused)) void abort_with_line(uint line) +{ + DISGUISE(&line); + abort(); +} + +#define ABORT_NOW() do { DUMP_TRACE(); abort_with_line(__LINE__); } while (0) +#else +/* More efficient than abort() because it does not mangle the + * stack and stops at the exact location we need. + */ +#define ABORT_NOW() do { DUMP_TRACE(); ha_crash_now(); } while (0) +#endif + +/* This is the generic low-level macro dealing with conditional warnings and + * bugs. The caller decides whether to crash or not and what prefix and suffix + * to pass. The macro returns the boolean value of the condition as an int for + * the case where it wouldn't die. The <crash> flag is made of: + * - crash & 1: crash yes/no; + * - crash & 2: taint as bug instead of warn + */ +#define _BUG_ON(cond, file, line, crash, pfx, sfx) \ + __BUG_ON(cond, file, line, crash, pfx, sfx) + +#define __BUG_ON(cond, file, line, crash, pfx, sfx) \ + (void)(unlikely(cond) ? ({ \ + complain(NULL, "\n" pfx "condition \"" #cond "\" matched at " file ":" #line "" sfx "\n", crash); \ + if (crash & 1) \ + ABORT_NOW(); \ + else \ + DUMP_TRACE(); \ + 1; /* let's return the true condition */ \ + }) : 0) + +/* This one is equivalent except that it only emits the message once by + * maintaining a static counter. This may be used with warnings to detect + * certain unexpected conditions in field. Later on, in cores it will be + * possible to verify these counters. + */ +#define _BUG_ON_ONCE(cond, file, line, crash, pfx, sfx) \ + __BUG_ON_ONCE(cond, file, line, crash, pfx, sfx) + +#define __BUG_ON_ONCE(cond, file, line, crash, pfx, sfx) \ + (void)(unlikely(cond) ? ({ \ + static int __match_count_##line; \ + complain(&__match_count_##line, "\n" pfx "condition \"" #cond "\" matched at " file ":" #line "" sfx "\n", crash); \ + if (crash & 1) \ + ABORT_NOW(); \ + else \ + DUMP_TRACE(); \ + 1; /* let's return the true condition */ \ + }) : 0) + +/* DEBUG_STRICT enables/disables runtime checks on condition <cond> + * DEBUG_STRICT_ACTION indicates the level of verification on the rules when + * <cond> is true: + * + * macro BUG_ON() WARN_ON() CHECK_IF() + * value 0 warn warn warn + * 1 CRASH warn warn + * 2 CRASH CRASH warn + * 3 CRASH CRASH CRASH + */ + +/* The macros below are for general use */ +#if defined(DEBUG_STRICT) +# if defined(DEBUG_STRICT_ACTION) && (DEBUG_STRICT_ACTION < 1) +/* Lowest level: BUG_ON() warns, WARN_ON() warns, CHECK_IF() warns */ +# define BUG_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 2, "WARNING: bug ", " (not crashing but process is untrusted now, please report to developers)") +# define WARN_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 0, "WARNING: warn ", " (please report to developers)") +# define CHECK_IF(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 0, "WARNING: check ", " (please report to developers)") +# elif !defined(DEBUG_STRICT_ACTION) || (DEBUG_STRICT_ACTION == 1) +/* default level: BUG_ON() crashes, WARN_ON() warns, CHECK_IF() warns */ +# define BUG_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 3, "FATAL: bug ", "") +# define WARN_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 0, "WARNING: warn ", " (please report to developers)") +# define CHECK_IF(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 0, "WARNING: check ", " (please report to developers)") +# elif defined(DEBUG_STRICT_ACTION) && (DEBUG_STRICT_ACTION == 2) +/* Stricter level: BUG_ON() crashes, WARN_ON() crashes, CHECK_IF() warns */ +# define BUG_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 3, "FATAL: bug ", "") +# define WARN_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 1, "FATAL: warn ", "") +# define CHECK_IF(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 0, "WARNING: check ", " (please report to developers)") +# elif defined(DEBUG_STRICT_ACTION) && (DEBUG_STRICT_ACTION >= 3) +/* Developer/CI level: BUG_ON() crashes, WARN_ON() crashes, CHECK_IF() crashes */ +# define BUG_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 3, "FATAL: bug ", "") +# define WARN_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 1, "FATAL: warn ", "") +# define CHECK_IF(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 1, "FATAL: check ", "") +# endif +#else +# define BUG_ON(cond) do { (void)sizeof(cond); } while (0) +# define WARN_ON(cond) do { (void)sizeof(cond); } while (0) +# define CHECK_IF(cond) do { (void)sizeof(cond); } while (0) +#endif + +/* These macros are only for hot paths and remain disabled unless DEBUG_STRICT is 2 or above. + * Only developers/CI should use these levels as they may significantly impact performance by + * enabling checks in sensitive areas. + */ +#if defined(DEBUG_STRICT) && (DEBUG_STRICT > 1) +# if defined(DEBUG_STRICT_ACTION) && (DEBUG_STRICT_ACTION < 1) +/* Lowest level: BUG_ON() warns, CHECK_IF() warns */ +# define BUG_ON_HOT(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 2, "WARNING: bug ", " (not crashing but process is untrusted now, please report to developers)") +# define CHECK_IF_HOT(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 0, "WARNING: check ", " (please report to developers)") +# elif !defined(DEBUG_STRICT_ACTION) || (DEBUG_STRICT_ACTION < 3) +/* default level: BUG_ON() crashes, CHECK_IF() warns */ +# define BUG_ON_HOT(cond) _BUG_ON (cond, __FILE__, __LINE__, 3, "FATAL: bug ", "") +# define CHECK_IF_HOT(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 0, "WARNING: check ", " (please report to developers)") +# elif defined(DEBUG_STRICT_ACTION) && (DEBUG_STRICT_ACTION >= 3) +/* Developer/CI level: BUG_ON() crashes, CHECK_IF() crashes */ +# define BUG_ON_HOT(cond) _BUG_ON (cond, __FILE__, __LINE__, 3, "FATAL: bug ", "") +# define CHECK_IF_HOT(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 1, "FATAL: check ", "") +# endif +#else +# define BUG_ON_HOT(cond) do { (void)sizeof(cond); } while (0) +# define CHECK_IF_HOT(cond) do { (void)sizeof(cond); } while (0) +#endif + + +/* When not optimizing, clang won't remove that code, so only compile it in when optimizing */ +#if defined(__GNUC__) && defined(__OPTIMIZE__) +#define HA_LINK_ERROR(what) \ + do { \ + /* provoke a build-time error */ \ + extern volatile int what; \ + what = 1; \ + } while (0) +#else +#define HA_LINK_ERROR(what) \ + do { \ + } while (0) +#endif /* __OPTIMIZE__ */ + +/* more reliable free() that clears the pointer */ +#define ha_free(x) do { \ + typeof(x) __x = (x); \ + if (__builtin_constant_p((x)) || __builtin_constant_p(*(x))) { \ + HA_LINK_ERROR(call_to_ha_free_attempts_to_free_a_constant); \ + } \ + free(*__x); \ + *__x = NULL; \ + } while (0) + +/* describes a call place in the code, for example for tracing memory + * allocations or task wakeups. These must be declared static const. + */ +struct ha_caller { + const char *func; // function name + const char *file; // file name + uint16_t line; // line number + uint8_t what; // description of the call, usage specific + uint8_t arg8; // optional argument, usage specific + uint32_t arg32; // optional argument, usage specific +}; + +#define MK_CALLER(_what, _arg8, _arg32) \ + ({ static const struct ha_caller _ = { \ + .func = __func__, .file = __FILE__, .line = __LINE__, \ + .what = _what, .arg8 = _arg8, .arg32 = _arg32 }; \ + &_; }) + +/* handle 'tainted' status */ +enum tainted_flags { + TAINTED_CONFIG_EXP_KW_DECLARED = 0x00000001, + TAINTED_ACTION_EXP_EXECUTED = 0x00000002, + TAINTED_CLI_EXPERT_MODE = 0x00000004, + TAINTED_CLI_EXPERIMENTAL_MODE = 0x00000008, + TAINTED_WARN = 0x00000010, /* a WARN_ON triggered */ + TAINTED_BUG = 0x00000020, /* a BUG_ON triggered */ + TAINTED_SHARED_LIBS = 0x00000040, /* a shared library was loaded */ + TAINTED_REDEFINITION = 0x00000080, /* symbol redefinition detected */ + TAINTED_REPLACED_MEM_ALLOCATOR = 0x00000100, /* memory allocator was replaced using LD_PRELOAD */ + TAINTED_PANIC = 0x00000200, /* a panic dump has started */ + TAINTED_LUA_STUCK = 0x00000400, /* stuck in a Lua context */ + TAINTED_LUA_STUCK_SHARED = 0x00000800, /* stuck in a shared Lua context */ + TAINTED_MEM_TRIMMING_STUCK = 0x00001000, /* stuck while trimming memory */ +}; + +/* this is a bit field made of TAINTED_*, and is declared in haproxy.c */ +extern unsigned int tainted; + +void complain(int *counter, const char *msg, int taint); + +static inline void mark_tainted(const enum tainted_flags flag) +{ + HA_ATOMIC_OR(&tainted, flag); +} + +static inline unsigned int get_tainted() +{ + return HA_ATOMIC_LOAD(&tainted); +} + +#if defined(DEBUG_MEM_STATS) +#include <stdlib.h> +#include <string.h> + +/* Memory allocation statistics are centralized into a global "mem_stats" + * section. This will not work with some linkers. + */ +enum { + MEM_STATS_TYPE_UNSET = 0, + MEM_STATS_TYPE_CALLOC, + MEM_STATS_TYPE_FREE, + MEM_STATS_TYPE_MALLOC, + MEM_STATS_TYPE_REALLOC, + MEM_STATS_TYPE_STRDUP, + MEM_STATS_TYPE_P_ALLOC, + MEM_STATS_TYPE_P_FREE, +}; + +struct mem_stats { + size_t calls; + size_t size; + struct ha_caller caller; + const void *extra; // extra info specific to this call (e.g. pool ptr) +} __attribute__((aligned(sizeof(void*)))); + +#undef calloc +#define calloc(x,y) ({ \ + size_t __x = (x); size_t __y = (y); \ + static struct mem_stats _ __attribute__((used,__section__("mem_stats"),__aligned__(sizeof(void*)))) = { \ + .caller = { \ + .file = __FILE__, .line = __LINE__, \ + .what = MEM_STATS_TYPE_CALLOC, \ + .func = __func__, \ + }, \ + }; \ + HA_WEAK(__start_mem_stats); \ + HA_WEAK(__stop_mem_stats); \ + _HA_ATOMIC_INC(&_.calls); \ + _HA_ATOMIC_ADD(&_.size, __x * __y); \ + calloc(__x,__y); \ +}) + +/* note: we can't redefine free() because we have a few variables and struct + * members called like this. This one may be used before a call to free(), + * and when known, the size should be indicated, otherwise pass zero. The + * pointer is used to know whether the call should be accounted for (null is + * ignored). + */ +#undef will_free +#define will_free(x, y) ({ \ + void *__x = (x); size_t __y = (y); \ + static struct mem_stats _ __attribute__((used,__section__("mem_stats"),__aligned__(sizeof(void*)))) = { \ + .caller = { \ + .file = __FILE__, .line = __LINE__, \ + .what = MEM_STATS_TYPE_FREE, \ + .func = __func__, \ + }, \ + }; \ + HA_WEAK(__start_mem_stats); \ + HA_WEAK(__stop_mem_stats); \ + if (__x) { \ + _HA_ATOMIC_INC(&_.calls); \ + _HA_ATOMIC_ADD(&_.size, __y); \ + } \ +}) + +#undef ha_free +#define ha_free(x) ({ \ + typeof(x) __x = (x); \ + static struct mem_stats _ __attribute__((used,__section__("mem_stats"),__aligned__(sizeof(void*)))) = { \ + .caller = { \ + .file = __FILE__, .line = __LINE__, \ + .what = MEM_STATS_TYPE_FREE, \ + .func = __func__, \ + }, \ + }; \ + HA_WEAK(__start_mem_stats); \ + HA_WEAK(__stop_mem_stats); \ + if (__builtin_constant_p((x)) || __builtin_constant_p(*(x))) { \ + HA_LINK_ERROR(call_to_ha_free_attempts_to_free_a_constant); \ + } \ + if (*__x) \ + _HA_ATOMIC_INC(&_.calls); \ + free(*__x); \ + *__x = NULL; \ +}) + +#undef malloc +#define malloc(x) ({ \ + size_t __x = (x); \ + static struct mem_stats _ __attribute__((used,__section__("mem_stats"),__aligned__(sizeof(void*)))) = { \ + .caller = { \ + .file = __FILE__, .line = __LINE__, \ + .what = MEM_STATS_TYPE_MALLOC, \ + .func = __func__, \ + }, \ + }; \ + HA_WEAK(__start_mem_stats); \ + HA_WEAK(__stop_mem_stats); \ + _HA_ATOMIC_INC(&_.calls); \ + _HA_ATOMIC_ADD(&_.size, __x); \ + malloc(__x); \ +}) + +#undef realloc +#define realloc(x,y) ({ \ + void *__x = (x); size_t __y = (y); \ + static struct mem_stats _ __attribute__((used,__section__("mem_stats"),__aligned__(sizeof(void*)))) = { \ + .caller = { \ + .file = __FILE__, .line = __LINE__, \ + .what = MEM_STATS_TYPE_REALLOC, \ + .func = __func__, \ + }, \ + }; \ + HA_WEAK(__start_mem_stats); \ + HA_WEAK(__stop_mem_stats); \ + _HA_ATOMIC_INC(&_.calls); \ + _HA_ATOMIC_ADD(&_.size, __y); \ + realloc(__x,__y); \ +}) + +#undef strdup +#define strdup(x) ({ \ + const char *__x = (x); size_t __y = strlen(__x); \ + static struct mem_stats _ __attribute__((used,__section__("mem_stats"),__aligned__(sizeof(void*)))) = { \ + .caller = { \ + .file = __FILE__, .line = __LINE__, \ + .what = MEM_STATS_TYPE_STRDUP, \ + .func = __func__, \ + }, \ + }; \ + HA_WEAK(__start_mem_stats); \ + HA_WEAK(__stop_mem_stats); \ + _HA_ATOMIC_INC(&_.calls); \ + _HA_ATOMIC_ADD(&_.size, __y); \ + strdup(__x); \ +}) +#else // DEBUG_MEM_STATS + +#define will_free(x, y) do { } while (0) + +#endif /* DEBUG_MEM_STATS*/ + +/* Add warnings to users of such functions. These will be reported at link time + * indicating what file name and line used them. The goal is to remind their + * users that these are extremely unsafe functions that never have a valid + * reason for being used. + */ +#undef strcat +__attribute__warning("\n" +" * WARNING! strcat() must never be used, because there is no convenient way\n" +" * to use it that is safe. Use memcpy() instead!\n") +extern char *strcat(char *__restrict dest, const char *__restrict src); + +#undef strcpy +__attribute__warning("\n" +" * WARNING! strcpy() must never be used, because there is no convenient way\n" +" * to use it that is safe. Use memcpy() or strlcpy2() instead!\n") +extern char *strcpy(char *__restrict dest, const char *__restrict src); + +#undef strncat +__attribute__warning("\n" +" * WARNING! strncat() must never be used, because there is no convenient way\n" +" * to use it that is safe. Use memcpy() instead!\n") +extern char *strncat(char *__restrict dest, const char *__restrict src, size_t n); + +#undef sprintf +__attribute__warning("\n" +" * WARNING! sprintf() must never be used, because there is no convenient way\n" +" * to use it that is safe. Use snprintf() instead!\n") +extern int sprintf(char *__restrict dest, const char *__restrict fmt, ...); + +#if defined(_VA_LIST_DEFINED) || defined(_VA_LIST_DECLARED) || defined(_VA_LIST) +#undef vsprintf +__attribute__warning("\n" +" * WARNING! vsprintf() must never be used, because there is no convenient way\n" +" * to use it that is safe. Use vsnprintf() instead!\n") +extern int vsprintf(char *__restrict dest, const char *__restrict fmt, va_list ap); +#endif + +#endif /* _HAPROXY_BUG_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/capture-t.h b/include/haproxy/capture-t.h new file mode 100644 index 0000000..ebc7fe8 --- /dev/null +++ b/include/haproxy/capture-t.h @@ -0,0 +1,43 @@ +/* + * include/haproxy/capture-t.h + * This file defines types for captures. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CAPTURE_T_H +#define _HAPROXY_CAPTURE_T_H + +#include <haproxy/pool-t.h> + +struct cap_hdr { + struct cap_hdr *next; + char *name; /* header name, case insensitive, NULL if not header */ + int namelen; /* length of the header name, to speed-up lookups, 0 if !name */ + int len; /* capture length, not including terminal zero */ + int index; /* index in the output array */ + struct pool_head *pool; /* pool of pre-allocated memory area of (len+1) bytes */ +}; + +#endif /* _HAPROXY_CAPTURE_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/capture.h b/include/haproxy/capture.h new file mode 100644 index 0000000..ba0a6c0 --- /dev/null +++ b/include/haproxy/capture.h @@ -0,0 +1,37 @@ +/* + * include/haproxy/capture.h + * This file defines prototypes for captures. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CAPTURE_H +#define _HAPROXY_CAPTURE_H + +#include <haproxy/capture-t.h> +#include <haproxy/pool-t.h> + +extern struct pool_head *pool_head_capture; + +#endif /* _HAPROXY_CAPTURE_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/cbuf-t.h b/include/haproxy/cbuf-t.h new file mode 100644 index 0000000..27d3bf1 --- /dev/null +++ b/include/haproxy/cbuf-t.h @@ -0,0 +1,45 @@ +/* + * include/haprox/cbuf-t.h + * This file contains definition for circular buffers. + * + * Copyright 2021 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CBUF_T_H +#define _HAPROXY_CBUF_T_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif +#endif + +#include <haproxy/list-t.h> + +extern struct pool_head *pool_head_cbuf; + +struct cbuf { + /* buffer */ + unsigned char *buf; + /* buffer size */ + size_t sz; + /* Writer index */ + size_t wr; + /* Reader index */ + size_t rd; +}; + +#endif /* _HAPROXY_CBUF_T_H */ diff --git a/include/haproxy/cbuf.h b/include/haproxy/cbuf.h new file mode 100644 index 0000000..b217a5c --- /dev/null +++ b/include/haproxy/cbuf.h @@ -0,0 +1,136 @@ +/* + * include/haprox/cbuf.h + * This file contains definitions and prototypes for circular buffers. + * Inspired from Linux circular buffers (include/linux/circ_buf.h). + * + * Copyright 2021 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CBUF_H +#define _HAPROXY_CBUF_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif +#endif + +#include <haproxy/atomic.h> +#include <haproxy/list.h> +#include <haproxy/cbuf-t.h> + +struct cbuf *cbuf_new(unsigned char *buf, size_t sz); +void cbuf_free(struct cbuf *cbuf); + +/* Amount of data between <rd> and <wr> */ +#define CBUF_DATA(wr, rd, size) (((wr) - (rd)) & ((size) - 1)) + +/* Return the writer position in <cbuf>. + * To be used only by the writer! + */ +static inline unsigned char *cb_wr(struct cbuf *cbuf) +{ + return cbuf->buf + cbuf->wr; +} + +/* Reset the reader index. + * To be used by a reader! + */ +static inline void cb_rd_reset(struct cbuf *cbuf) +{ + cbuf->rd = 0; +} + +/* Reset the writer index. + * To be used by a writer! + */ +static inline void cb_wr_reset(struct cbuf *cbuf) +{ + cbuf->wr = 0; +} + +/* Increase <cbuf> circular buffer data by <count>. + * To be used by a writer! + */ +static inline void cb_add(struct cbuf *cbuf, size_t count) +{ + cbuf->wr = (cbuf->wr + count) & (cbuf->sz - 1); +} + +/* Return the reader position in <cbuf>. + * To be used only by the reader! + */ +static inline unsigned char *cb_rd(struct cbuf *cbuf) +{ + return cbuf->buf + cbuf->rd; +} + +/* Skip <count> byte in <cbuf> circular buffer. + * To be used by a reader! + */ +static inline void cb_del(struct cbuf *cbuf, size_t count) +{ + cbuf->rd = (cbuf->rd + count) & (cbuf->sz - 1); +} + +/* Return the amount of data left in <cbuf>. + * To be used only by the writer! + */ +static inline size_t cb_data(struct cbuf *cbuf) +{ + size_t rd; + + rd = HA_ATOMIC_LOAD(&cbuf->rd); + return CBUF_DATA(cbuf->wr, rd, cbuf->sz); +} + +/* Return the amount of room left in <cbuf> minus 1 to distinguish + * the case where the buffer is full from the case where is is empty + * To be used only by the write! + */ +static inline size_t cb_room(struct cbuf *cbuf) +{ + size_t rd; + + rd = HA_ATOMIC_LOAD(&cbuf->rd); + return CBUF_DATA(rd, cbuf->wr + 1, cbuf->sz); +} + +/* Return the amount of contiguous data left in <cbuf>. + * To be used only by the reader! + */ +static inline size_t cb_contig_data(struct cbuf *cbuf) +{ + size_t end, n; + + end = cbuf->sz - cbuf->rd; + n = (HA_ATOMIC_LOAD(&cbuf->wr) + end) & (cbuf->sz - 1); + return n < end ? n : end; +} + +/* Return the amount of contiguous space left in <cbuf>. + * To be used only by the writer! + */ +static inline size_t cb_contig_space(struct cbuf *cbuf) +{ + size_t end, n; + + end = cbuf->sz - 1 - cbuf->wr; + n = (HA_ATOMIC_LOAD(&cbuf->rd) + end) & (cbuf->sz - 1); + return n <= end ? n : end + 1; +} + +#endif /* _HAPROXY_CBUF_H */ diff --git a/include/haproxy/cfgcond-t.h b/include/haproxy/cfgcond-t.h new file mode 100644 index 0000000..00fc126 --- /dev/null +++ b/include/haproxy/cfgcond-t.h @@ -0,0 +1,105 @@ +/* + * include/haproxy/cfgcond-t.h + * Types for the configuration condition preprocessor + * + * Copyright (C) 2000-2021 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CFGCOND_T_H +#define _HAPROXY_CFGCOND_T_H + +#include <haproxy/api-t.h> + +/* nested if/elif/else/endif block states */ +enum nested_cond_state { + NESTED_COND_IF_TAKE, // "if" with a true condition + NESTED_COND_IF_DROP, // "if" with a false condition + NESTED_COND_IF_SKIP, // "if" masked by an outer false condition + + NESTED_COND_ELIF_TAKE, // "elif" with a true condition from a false one + NESTED_COND_ELIF_DROP, // "elif" with a false condition from a false one + NESTED_COND_ELIF_SKIP, // "elif" masked by an outer false condition or a previously taken if + + NESTED_COND_ELSE_TAKE, // taken "else" after an if false condition + NESTED_COND_ELSE_DROP, // "else" masked by outer false condition or an if true condition +}; + +/* 100 levels of nested conditions should already be sufficient */ +#define MAXNESTEDCONDS 100 + +/* supported conditional predicates for .if/.elif */ +enum cond_predicate { + CFG_PRED_NONE, // none + CFG_PRED_DEFINED, // "defined" + CFG_PRED_FEATURE, // "feature" + CFG_PRED_STREQ, // "streq" + CFG_PRED_STRNEQ, // "strneq" + CFG_PRED_STRSTR, // "strstr" + CFG_PRED_VERSION_ATLEAST, // "version_atleast" + CFG_PRED_VERSION_BEFORE, // "version_before" + CFG_PRED_OSSL_VERSION_ATLEAST, // "openssl_version_atleast" + CFG_PRED_OSSL_VERSION_BEFORE, // "openssl_version_before" + CFG_PRED_SSLLIB_NAME_STARTSWITH, // "ssllib_name_startswith" + CFG_PRED_ENABLED, // "enabled" +}; + +/* types for condition terms */ +enum cfg_cond_term_type { + CCTT_NONE = 0, + CCTT_FALSE, + CCTT_TRUE, + CCTT_PRED, + CCTT_PAREN, // '(' EXPR ')' +}; + +/* keyword for a condition predicate */ +struct cond_pred_kw { + const char *word; // NULL marks the end of the list + enum cond_predicate prd; // one of the CFG_PRED_* above + uint64_t arg_mask; // mask of supported arguments (strings only) +}; + +/* condition term */ +struct cfg_cond_term { + enum cfg_cond_term_type type; // CCTT_* + struct arg *args; // arguments for predicates + int neg; // 0: direct result; 1: negate + union { + const struct cond_pred_kw *pred; // predicate (function) + struct cfg_cond_expr *expr; // expression for CCTT_PAREN + }; +}; + +/* condition sub-expression for an AND: + * expr_and = <term> '&&' <expr_and> + * | <term> + */ +struct cfg_cond_and { + struct cfg_cond_term *left; + struct cfg_cond_and *right; // may be NULL +}; + +/* condition expression: + * expr = <expr_and> '||' <expr> + * | <expr_and> + */ +struct cfg_cond_expr { + struct cfg_cond_and *left; + struct cfg_cond_expr *right; // may be NULL +}; + +#endif /* _HAPROXY_CFGCOND_T_H */ diff --git a/include/haproxy/cfgcond.h b/include/haproxy/cfgcond.h new file mode 100644 index 0000000..3171f81 --- /dev/null +++ b/include/haproxy/cfgcond.h @@ -0,0 +1,43 @@ +/* + * include/haproxy/cfgcond.h + * Configuration condition preprocessor + * + * Copyright (C) 2000-2021 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CFGCOND_H +#define _HAPROXY_CFGCOND_H + +#include <haproxy/api.h> +#include <haproxy/cfgcond-t.h> + +const struct cond_pred_kw *cfg_lookup_cond_pred(const char *str); +int cfg_parse_cond_term(const char **text, struct cfg_cond_term **term, char **err, const char **errptr, int maxdepth); +int cfg_eval_cond_term(const struct cfg_cond_term *term, char **err); +void cfg_free_cond_term(struct cfg_cond_term *term); + +int cfg_parse_cond_and(const char **text, struct cfg_cond_and **expr, char **err, const char **errptr, int maxdepth); +int cfg_eval_cond_and(struct cfg_cond_and *expr, char **err); +void cfg_free_cond_and(struct cfg_cond_and *expr); + +int cfg_parse_cond_expr(const char **text, struct cfg_cond_expr **expr, char **err, const char **errptr, int maxdepth); +int cfg_eval_cond_expr(struct cfg_cond_expr *expr, char **err); +void cfg_free_cond_expr(struct cfg_cond_expr *expr); + +int cfg_eval_condition(char **args, char **err, const char **errptr); + +#endif diff --git a/include/haproxy/cfgdiag.h b/include/haproxy/cfgdiag.h new file mode 100644 index 0000000..6989109 --- /dev/null +++ b/include/haproxy/cfgdiag.h @@ -0,0 +1,11 @@ +#ifndef _HAPROXY_CFGDIAG_H +#define _HAPROXY_CFGDIAG_H + +/* Placeholder to execute various diagnostic checks after the configuration file + * has been fully parsed. It will output a warning for each diagnostic found. + * + * Returns 0 if no diagnostic message has been found else 1. + */ +int cfg_run_diagnostics(void); + +#endif /* _HAPROXY_CFGDIAG_H */ diff --git a/include/haproxy/cfgparse.h b/include/haproxy/cfgparse.h new file mode 100644 index 0000000..adcabb3 --- /dev/null +++ b/include/haproxy/cfgparse.h @@ -0,0 +1,149 @@ +/* + * include/haproxy/cfgparse.h + * Configuration parsing functions. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CFGPARSE_H +#define _HAPROXY_CFGPARSE_H + +#include <haproxy/api.h> + +struct hap_cpuset; +struct proxy; +struct bind_conf; +struct acl_cond; + +/* configuration sections */ +#define CFG_NONE 0 +#define CFG_GLOBAL 1 +#define CFG_LISTEN 2 +#define CFG_USERLIST 3 +#define CFG_PEERS 4 +#define CFG_CRTLIST 5 + +/* various keyword modifiers */ +enum kw_mod { + KWM_STD = 0, /* normal */ + KWM_NO, /* "no" prefixed before the keyword */ + KWM_DEF, /* "default" prefixed before the keyword */ +}; + +enum cfg_keyword_flags { + KWF_EXPERIMENTAL = 0x1, + KWF_MATCH_PREFIX = 0x2, +}; + +struct cfg_keyword { + int section; /* section type for this keyword */ + const char *kw; /* the keyword itself */ + int (*parse)( /* 0=OK, <0=Alert, >0=Warning */ + char **args, /* command line and arguments */ + int section_type, /* current section CFG_{GLOBAL|LISTEN} */ + struct proxy *curpx, /* current proxy (NULL in GLOBAL) */ + const struct proxy *defpx, /* default proxy (NULL in GLOBAL) */ + const char *file, /* config file name */ + int line, /* config file line number */ + char **err); /* error or warning message output pointer */ + int flags; +}; + +/* A keyword list. It is a NULL-terminated array of keywords. It embeds a + * struct list in order to be linked to other lists, allowing it to easily + * be declared where it is needed, and linked without duplicating data nor + * allocating memory. + */ +struct cfg_kw_list { + struct list list; + struct cfg_keyword kw[VAR_ARRAY]; +}; + +/* permit to store configuration section */ +struct cfg_section { + struct list list; + char *section_name; + int (*section_parser)(const char *, int, char **, int); + int (*post_section_parser)(); +}; + +/* store post configuration parsing */ + +struct cfg_postparser { + struct list list; + char *name; + int (*func)(); +}; + +extern struct list postparsers; +extern int cfg_maxpconn; +extern int cfg_maxconn; +extern char *cfg_scope; +extern struct cfg_kw_list cfg_keywords; +extern char *cursection; +extern int non_global_section_parsed; + +extern struct proxy *curproxy; + +int cfg_parse_global(const char *file, int linenum, char **args, int inv); +int cfg_parse_listen(const char *file, int linenum, char **args, int inv); +int cfg_parse_track_sc_num(unsigned int *track_sc_num, + const char *arg, const char *end, char **err); +int readcfgfile(const char *file); +void cfg_register_keywords(struct cfg_kw_list *kwl); +void cfg_unregister_keywords(struct cfg_kw_list *kwl); +int check_config_validity(void); +int str2listener(char *str, struct proxy *curproxy, struct bind_conf *bind_conf, const char *file, int line, char **err); +int str2receiver(char *str, struct proxy *curproxy, struct bind_conf *bind_conf, const char *file, int line, char **err); +int cfg_register_section(char *section_name, + int (*section_parser)(const char *, int, char **, int), + int (*post_section_parser)()); +int cfg_register_postparser(char *name, int (*func)()); +void cfg_unregister_sections(void); +void cfg_backup_sections(struct list *backup_sections); +void cfg_restore_sections(struct list *backup_sections); +int warnif_misplaced_tcp_conn(struct proxy *proxy, const char *file, int line, const char *arg); +int warnif_misplaced_tcp_sess(struct proxy *proxy, const char *file, int line, const char *arg); +int warnif_misplaced_tcp_cont(struct proxy *proxy, const char *file, int line, const char *arg); +int warnif_cond_conflicts(const struct acl_cond *cond, unsigned int where, const char *file, int line); +int warnif_tcp_http_cond(const struct proxy *px, const struct acl_cond *cond); +int too_many_args_idx(int maxarg, int index, char **args, char **msg, int *err_code); +int too_many_args(int maxarg, char **args, char **msg, int *err_code); +int alertif_too_many_args_idx(int maxarg, int index, const char *file, int linenum, char **args, int *err_code); +int alertif_too_many_args(int maxarg, const char *file, int linenum, char **args, int *err_code); +int parse_process_number(const char *arg, unsigned long *proc, int max, int *autoinc, char **err); +void free_email_alert(struct proxy *p); +const char *cfg_find_best_match(const char *word, const struct list *list, int section, const char **extra); +int warnifnotcap(struct proxy *proxy, int cap, const char *file, int line, const char *arg, const char *hint); +int failifnotcap(struct proxy *proxy, int cap, const char *file, int line, const char *arg, const char *hint); +void cfg_dump_registered_keywords(); + +/* simplified way to define a section parser */ +#define REGISTER_CONFIG_SECTION(name, parse, post) \ + INITCALL3(STG_REGISTER, cfg_register_section, (name), (parse), (post)) + +#define REGISTER_CONFIG_POSTPARSER(name, parser) \ + INITCALL2(STG_REGISTER, cfg_register_postparser, (name), (parser)) + +#endif /* _HAPROXY_CFGPARSE_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/channel-t.h b/include/haproxy/channel-t.h new file mode 100644 index 0000000..6972edb --- /dev/null +++ b/include/haproxy/channel-t.h @@ -0,0 +1,314 @@ +/* + * include/haproxy/channel-t.h + * Channel management definitions, macros and inline functions. + * + * Copyright (C) 2000-2014 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CHANNEL_T_H +#define _HAPROXY_CHANNEL_T_H + +#include <haproxy/api-t.h> +#include <haproxy/buf-t.h> +#include <haproxy/show_flags-t.h> + +/* The CF_* macros designate Channel Flags, which may be ORed in the bit field + * member 'flags' in struct channel. Here we have several types of flags : + * + * - pure status flags, reported by the data layer, which must be cleared + * before doing further I/O : + * CF_*_EVENT, CF_*_PARTIAL + * + * - pure status flags, reported by stream connector layer, which must also + * be cleared before doing further I/O : + * CF_*_TIMEOUT + * + * - read-only indicators reported by lower data levels : + * CF_STREAMER, CF_STREAMER_FAST + * + * The flags have been arranged for readability, so that the read and write + * bits have the same position in a byte (read being the lower byte and write + * the second one). All flag names are relative to the channel. For instance, + * 'write' indicates the direction from the channel to the stream connector. + * Please also update the chn_show_flags() function below in case of changes. + */ + +#define CF_READ_EVENT 0x00000001 /* a read event detected on producer side */ +/* unused: 0x00000002 */ +#define CF_READ_TIMEOUT 0x00000004 /* timeout while waiting for producer */ +/* unused 0x00000008 */ + +/* unused: 0x00000010 - 0x00000080 */ + +#define CF_WRITE_EVENT 0x00000100 /* a write event detected on consumer side */ +/* unused: 0x00000200 */ +#define CF_WRITE_TIMEOUT 0x00000400 /* timeout while waiting for consumer */ +/* unused 0x00000800 */ + +#define CF_WAKE_WRITE 0x00001000 /* wake the task up when there's write activity */ +/* unused: 0x00002000 - 0x00004000 */ +#define CF_AUTO_CLOSE 0x00008000 /* producer can forward shutdown to other side */ + +#define CF_STREAMER 0x00010000 /* the producer is identified as streaming data */ +#define CF_STREAMER_FAST 0x00020000 /* the consumer seems to eat the stream very fast */ + +#define CF_WROTE_DATA 0x00040000 /* some data were sent from this buffer */ +/* unused 0x00080000 - 0x00400000 */ +#define CF_AUTO_CONNECT 0x00800000 /* consumer may attempt to establish a new connection */ + +#define CF_DONT_READ 0x01000000 /* disable reading for now */ +/* unused 0x02000000 - 0x08000000 */ + +#define CF_WAKE_ONCE 0x10000000 /* pretend there is activity on this channel (one-shoot) */ +#define CF_FLT_ANALYZE 0x20000000 /* at least one filter is still analyzing this channel */ +/* unuse 0x40000000 */ +#define CF_ISRESP 0x80000000 /* 0 = request channel, 1 = response channel */ + +/* Masks which define input events for stream analysers */ +#define CF_MASK_ANALYSER (CF_READ_EVENT|CF_READ_TIMEOUT|CF_WRITE_EVENT|CF_WAKE_ONCE) + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *chn_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(CF_READ_EVENT, _(CF_READ_TIMEOUT, + _(CF_WRITE_EVENT, + _(CF_WRITE_TIMEOUT, + _(CF_WAKE_WRITE, _(CF_AUTO_CLOSE, + _(CF_STREAMER, _(CF_STREAMER_FAST, _(CF_WROTE_DATA, + _(CF_AUTO_CONNECT, _(CF_DONT_READ, + _(CF_WAKE_ONCE, _(CF_FLT_ANALYZE, + _(CF_ISRESP)))))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + +/* Analysers (channel->analysers). + * Those bits indicate that there are some processing to do on the buffer + * contents. It will probably evolve into a linked list later. Those + * analysers could be compared to higher level processors. + * The field is blanked by channel_init() and only by analysers themselves + * afterwards. + * Please also update the chn_show_analysers() function below in case of changes. + */ +/* AN_REQ_FLT_START_FE: 0x00000001 */ +#define AN_REQ_INSPECT_FE 0x00000002 /* inspect request contents in the frontend */ +#define AN_REQ_WAIT_HTTP 0x00000004 /* wait for an HTTP request */ +#define AN_REQ_HTTP_BODY 0x00000008 /* wait for HTTP request body */ +#define AN_REQ_HTTP_PROCESS_FE 0x00000010 /* process the frontend's HTTP part */ +#define AN_REQ_SWITCHING_RULES 0x00000020 /* apply the switching rules */ +/* AN_REQ_FLT_START_BE: 0x00000040 */ +#define AN_REQ_INSPECT_BE 0x00000080 /* inspect request contents in the backend */ +#define AN_REQ_HTTP_PROCESS_BE 0x00000100 /* process the backend's HTTP part */ +#define AN_REQ_HTTP_TARPIT 0x00000200 /* wait for end of HTTP tarpit */ +#define AN_REQ_SRV_RULES 0x00000400 /* use-server rules */ +#define AN_REQ_HTTP_INNER 0x00000800 /* inner processing of HTTP request */ +#define AN_REQ_PRST_RDP_COOKIE 0x00001000 /* persistence on rdp cookie */ +#define AN_REQ_STICKING_RULES 0x00002000 /* table persistence matching */ +/* AN_REQ_FLT_HTTP_HDRS: 0x00004000 */ +#define AN_REQ_HTTP_XFER_BODY 0x00008000 /* forward request body */ +#define AN_REQ_WAIT_CLI 0x00010000 +/* AN_REQ_FLT_XFER_DATA: 0x00020000 */ +/* AN_REQ_FLT_END: 0x00040000 */ +#define AN_REQ_ALL 0x0001bfbe /* all of the request analysers */ + +/* response analysers */ +/* AN_RES_FLT_START_FE: 0x00080000 */ +/* AN_RES_FLT_START_BE: 0x00100000 */ +#define AN_RES_INSPECT 0x00200000 /* content inspection */ +#define AN_RES_WAIT_HTTP 0x00400000 /* wait for HTTP response */ +#define AN_RES_STORE_RULES 0x00800000 /* table persistence matching */ +#define AN_RES_HTTP_PROCESS_BE 0x01000000 /* process backend's HTTP part */ +#define AN_RES_HTTP_PROCESS_FE 0x01000000 /* process frontend's HTTP part (same for now) */ +/* AN_RES_FLT_HTTP_HDRS: 0x02000000 */ +#define AN_RES_HTTP_XFER_BODY 0x04000000 /* forward response body */ +#define AN_RES_WAIT_CLI 0x08000000 +/* AN_RES_FLT_XFER_DATA: 0x10000000 */ +/* AN_RES_FLT_END: 0x20000000 */ +#define AN_RES_ALL 0x0de00000 /* all of the response analysers */ + +/* filters interleaved with analysers, see above */ +#define AN_REQ_FLT_START_FE 0x00000001 +#define AN_REQ_FLT_START_BE 0x00000040 +#define AN_REQ_FLT_HTTP_HDRS 0x00004000 +#define AN_REQ_FLT_XFER_DATA 0x00020000 +#define AN_REQ_FLT_END 0x00040000 + +#define AN_RES_FLT_START_FE 0x00080000 +#define AN_RES_FLT_START_BE 0x00100000 +#define AN_RES_FLT_HTTP_HDRS 0x02000000 +#define AN_RES_FLT_XFER_DATA 0x10000000 +#define AN_RES_FLT_END 0x20000000 + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *chn_show_analysers(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* request flags */ + _(AN_REQ_FLT_START_FE, _(AN_REQ_INSPECT_FE, _(AN_REQ_WAIT_HTTP, + _(AN_REQ_HTTP_BODY, _(AN_REQ_HTTP_PROCESS_FE, _(AN_REQ_SWITCHING_RULES, + _(AN_REQ_FLT_START_BE, _(AN_REQ_INSPECT_BE, _(AN_REQ_HTTP_PROCESS_BE, + _(AN_REQ_HTTP_TARPIT, _(AN_REQ_SRV_RULES, _(AN_REQ_HTTP_INNER, + _(AN_REQ_PRST_RDP_COOKIE, _(AN_REQ_STICKING_RULES, + _(AN_REQ_FLT_HTTP_HDRS, _(AN_REQ_HTTP_XFER_BODY, _(AN_REQ_WAIT_CLI, + _(AN_REQ_FLT_XFER_DATA, _(AN_REQ_FLT_END, + /* response flags */ + _(AN_RES_FLT_START_FE, _(AN_RES_FLT_START_BE, _(AN_RES_INSPECT, + _(AN_RES_WAIT_HTTP, _(AN_RES_STORE_RULES, _(AN_RES_HTTP_PROCESS_FE, + _(AN_RES_HTTP_PROCESS_BE, _(AN_RES_FLT_HTTP_HDRS, + _(AN_RES_HTTP_XFER_BODY, _(AN_RES_WAIT_CLI, _(AN_RES_FLT_XFER_DATA, + _(AN_RES_FLT_END))))))))))))))))))))))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + +/* Magic value to forward infinite size (TCP, ...), used with ->to_forward */ +#define CHN_INFINITE_FORWARD MAX_RANGE(unsigned int) + + +struct channel { + unsigned int flags; /* CF_* */ + unsigned int analysers; /* bit field indicating what to do on the channel */ + struct buffer buf; /* buffer attached to the channel, always present but may move */ + size_t output; /* part of buffer which is to be forwarded */ + unsigned int to_forward; /* number of bytes to forward after out without a wake-up */ + unsigned short last_read; /* 16 lower bits of last read date (max pause=65s) */ + unsigned char xfer_large; /* number of consecutive large xfers */ + unsigned char xfer_small; /* number of consecutive small xfers */ + unsigned long long total; /* total data read */ + int analyse_exp; /* expiration date for current analysers (if set) */ +}; + + +/* Note about the channel structure + * + * A channel stores information needed to reliably transport data in a single + * direction. It stores status flags, timeouts, counters, subscribed analysers, + * pointers to a data producer and to a data consumer, and information about + * the amount of data which is allowed to flow directly from the producer to + * the consumer without waking up the analysers. + * + * A channel may buffer data into two locations : + * - a visible buffer (->buf) + * - an invisible buffer which right now consists in a pipe making use of + * kernel buffers that cannot be tampered with. + * + * Data stored into the first location may be analysed and altered by analysers + * while data stored in pipes is only aimed at being transported from one + * network socket to another one without being subject to memory copies. This + * buffer may only be used when both the socket layer and the data layer of the + * producer and the consumer support it, which typically is the case with Linux + * splicing over sockets, and when there are enough data to be transported + * without being analyzed (transport of TCP/HTTP payload or tunnelled data, + * which is indicated by ->to_forward). + * + * In order not to mix data streams, the producer may only feed the invisible + * data with data to forward, and only when the visible buffer is empty. The + * producer may not always be able to feed the invisible buffer due to platform + * limitations (lack of kernel support). + * + * Conversely, the consumer must always take data from the invisible data first + * before ever considering visible data. There is no limit to the size of data + * to consume from the invisible buffer, as platform-specific implementations + * will rarely leave enough control on this. So any byte fed into the invisible + * buffer is expected to reach the destination file descriptor, by any means. + * However, it's the consumer's responsibility to ensure that the invisible + * data has been entirely consumed before consuming visible data. This must be + * reflected by ->pipe->data. This is very important as this and only this can + * ensure strict ordering of data between buffers. + * + * The producer is responsible for decreasing ->to_forward. The ->to_forward + * parameter indicates how many bytes may be fed into either data buffer + * without waking the parent up. The special value CHN_INFINITE_FORWARD is + * never decreased nor increased. + * + * The buf->o parameter says how many bytes may be consumed from the visible + * buffer. This parameter is updated by any buffer_write() as well as any data + * forwarded through the visible buffer. Since the ->to_forward attribute + * applies to data after buf->p, an analyser will not see a buffer which has a + * non-null ->to_forward with buf->i > 0. A producer is responsible for raising + * buf->o by min(to_forward, buf->i) when it injects data into the buffer. + * + * The consumer is responsible for decreasing ->buf->o when it sends data + * from the visible buffer, and ->pipe->data when it sends data from the + * invisible buffer. + * + * A real-world example consists in part in an HTTP response waiting in a + * buffer to be forwarded. We know the header length (300) and the amount of + * data to forward (content-length=9000). The buffer already contains 1000 + * bytes of data after the 300 bytes of headers. Thus the caller will set + * buf->o to 300 indicating that it explicitly wants to send those data, and + * set ->to_forward to 9000 (content-length). This value must be normalised + * immediately after updating ->to_forward : since there are already 1300 bytes + * in the buffer, 300 of which are already counted in buf->o, and that size + * is smaller than ->to_forward, we must update buf->o to 1300 to flush the + * whole buffer, and reduce ->to_forward to 8000. After that, the producer may + * try to feed the additional data through the invisible buffer using a + * platform-specific method such as splice(). + * + * The ->to_forward entry is also used to detect whether we can fill the buffer + * or not. The idea is that we need to save some space for data manipulation + * (mainly header rewriting in HTTP) so we don't want to have a full buffer on + * input before processing a request or response. Thus, we ensure that there is + * always global.maxrewrite bytes of free space. Since we don't want to forward + * chunks without filling the buffer, we rely on ->to_forward. When ->to_forward + * is null, we may have some processing to do so we don't want to fill the + * buffer. When ->to_forward is non-null, we know we don't care for at least as + * many bytes. In the end, we know that each of the ->to_forward bytes will + * eventually leave the buffer. So as long as ->to_forward is larger than + * global.maxrewrite, we can fill the buffer. If ->to_forward is smaller than + * global.maxrewrite, then we don't want to fill the buffer with more than + * buf->size - global.maxrewrite + ->to_forward. + * + * A buffer may contain up to 5 areas : + * - the data waiting to be sent. These data are located between buf->p-o and + * buf->p ; + * - the data to process and possibly transform. These data start at + * buf->p and may be up to ->i bytes long. + * - the data to preserve. They start at ->p and stop at ->p+i. The limit + * between the two solely depends on the protocol being analysed. + * - the spare area : it is the remainder of the buffer, which can be used to + * store new incoming data. It starts at ->p+i and is up to ->size-i-o long. + * It may be limited by global.maxrewrite. + * - the reserved area : this is the area which must not be filled and is + * reserved for possible rewrites ; it is up to global.maxrewrite bytes + * long. + */ + +#endif /* _HAPROXY_CHANNEL_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/channel.h b/include/haproxy/channel.h new file mode 100644 index 0000000..17dd75f --- /dev/null +++ b/include/haproxy/channel.h @@ -0,0 +1,1021 @@ +/* + * include/haproxy/channel.h + * Channel management definitions, macros and inline functions. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CHANNEL_H +#define _HAPROXY_CHANNEL_H + +#include <haproxy/api.h> +#include <haproxy/channel-t.h> +#include <haproxy/dynbuf.h> +#include <haproxy/global.h> +#include <haproxy/htx.h> +#include <haproxy/stream.h> +#include <haproxy/task.h> +#include <haproxy/ticks.h> +#include <haproxy/tools-t.h> + +struct stconn; + +/* perform minimal initializations, report 0 in case of error, 1 if OK. */ +int init_channel(); + +unsigned long long __channel_forward(struct channel *chn, unsigned long long bytes); + +/* SI-to-channel functions working with buffers */ +int ci_putblk(struct channel *chn, const char *str, int len); +int ci_putchr(struct channel *chn, char c); +int ci_getline_nc(const struct channel *chn, char **blk1, size_t *len1, char **blk2, size_t *len2); +int ci_getblk_nc(const struct channel *chn, char **blk1, size_t *len1, char **blk2, size_t *len2); +int ci_insert_line2(struct channel *c, int pos, const char *str, int len); +int co_inject(struct channel *chn, const char *msg, int len); +int co_getchar(const struct channel *chn, char *c); +int co_getline(const struct channel *chn, char *str, int len); +int co_getdelim(const struct channel *chn, char *str, int len, const char *delim, char escape); +int co_getword(const struct channel *chn, char *str, int len, char sep); +int co_getblk(const struct channel *chn, char *blk, int len, int offset); +int co_getline_nc(const struct channel *chn, const char **blk1, size_t *len1, const char **blk2, size_t *len2); +int co_getblk_nc(const struct channel *chn, const char **blk1, size_t *len1, const char **blk2, size_t *len2); + + +/* returns a pointer to the stream the channel belongs to */ +static inline struct stream *chn_strm(const struct channel *chn) +{ + if (chn->flags & CF_ISRESP) + return LIST_ELEM(chn, struct stream *, res); + else + return LIST_ELEM(chn, struct stream *, req); +} + +/* returns a pointer to the stream connector feeding the channel (producer) */ +static inline struct stconn *chn_prod(const struct channel *chn) +{ + if (chn->flags & CF_ISRESP) + return LIST_ELEM(chn, struct stream *, res)->scb; + else + return LIST_ELEM(chn, struct stream *, req)->scf; +} + +/* returns a pointer to the stream connector consuming the channel (producer) */ +static inline struct stconn *chn_cons(const struct channel *chn) +{ + if (chn->flags & CF_ISRESP) + return LIST_ELEM(chn, struct stream *, res)->scf; + else + return LIST_ELEM(chn, struct stream *, req)->scb; +} + +/* c_orig() : returns the pointer to the channel buffer's origin */ +static inline char *c_orig(const struct channel *c) +{ + return b_orig(&c->buf); +} + +/* c_size() : returns the size of the channel's buffer */ +static inline size_t c_size(const struct channel *c) +{ + return b_size(&c->buf); +} + +/* c_wrap() : returns the pointer to the channel buffer's wrapping point */ +static inline char *c_wrap(const struct channel *c) +{ + return b_wrap(&c->buf); +} + +/* c_data() : returns the amount of data in the channel's buffer */ +static inline size_t c_data(const struct channel *c) +{ + return b_data(&c->buf); +} + +/* c_room() : returns the room left in the channel's buffer */ +static inline size_t c_room(const struct channel *c) +{ + return b_size(&c->buf) - b_data(&c->buf); +} + +/* c_empty() : returns a boolean indicating if the channel's buffer is empty */ +static inline size_t c_empty(const struct channel *c) +{ + return !c_data(c); +} + +/* c_full() : returns a boolean indicating if the channel's buffer is full */ +static inline size_t c_full(const struct channel *c) +{ + return !c_room(c); +} + +/* co_data() : returns the amount of output data in the channel's buffer */ +static inline size_t co_data(const struct channel *c) +{ + CHECK_IF_HOT(c->output > c_data(c)); + return c->output; +} + +/* ci_data() : returns the amount of input data in the channel's buffer */ +static inline size_t ci_data(const struct channel *c) +{ + return c_data(c) - co_data(c); +} + +/* ci_next() : for an absolute pointer <p> or a relative offset <o> pointing to + * a valid location within channel <c>'s buffer, returns either the absolute + * pointer or the relative offset pointing to the next byte, which usually is + * at (p + 1) unless p reaches the wrapping point and wrapping is needed. + */ +static inline size_t ci_next_ofs(const struct channel *c, size_t o) +{ + return b_next_ofs(&c->buf, o); +} +static inline char *ci_next(const struct channel *c, const char *p) +{ + return b_next(&c->buf, p); +} + + +/* c_ptr() : returns a pointer to an offset relative to the beginning of the + * input data in the buffer. If instead the offset is negative, a pointer to + * existing output data is returned. The function only takes care of wrapping, + * it's up to the caller to ensure the offset is always within byte count + * bounds. + */ +static inline char *c_ptr(const struct channel *c, ssize_t ofs) +{ + return b_peek(&c->buf, co_data(c) + ofs); +} + +/* c_adv() : advances the channel's buffer by <adv> bytes, which means that the + * buffer's pointer advances, and that as many bytes from in are transferred + * from in to out. The caller is responsible for ensuring that adv is always + * smaller than or equal to b->i. + */ +static inline void c_adv(struct channel *c, size_t adv) +{ + c->output += adv; + BUG_ON_HOT(c->output > c_data(c)); +} + +/* c_rew() : rewinds the channel's buffer by <adv> bytes, which means that the + * buffer's pointer goes backwards, and that as many bytes from out are moved + * to in. The caller is responsible for ensuring that adv is always smaller + * than or equal to b->o. + */ +static inline void c_rew(struct channel *c, size_t adv) +{ + BUG_ON_HOT(c->output < adv); + c->output -= adv; +} + +/* c_realign_if_empty() : realign the channel's buffer if it's empty */ +static inline void c_realign_if_empty(struct channel *chn) +{ + b_realign_if_empty(&chn->buf); +} + +/* Sets the amount of output for the channel */ +static inline void co_set_data(struct channel *c, size_t output) +{ + BUG_ON_HOT(output > c_data(c)); + c->output = output; +} + + +/* co_head() : returns a pointer to the beginning of output data in the buffer. + * The "__" variants don't support wrapping, "ofs" are relative to + * the buffer's origin. + */ +static inline size_t __co_head_ofs(const struct channel *c) +{ + return __b_peek_ofs(&c->buf, 0); +} +static inline char *__co_head(const struct channel *c) +{ + return __b_peek(&c->buf, 0); +} +static inline size_t co_head_ofs(const struct channel *c) +{ + return b_peek_ofs(&c->buf, 0); +} +static inline char *co_head(const struct channel *c) +{ + return b_peek(&c->buf, 0); +} + + +/* co_tail() : returns a pointer to the end of output data in the buffer. + * The "__" variants don't support wrapping, "ofs" are relative to + * the buffer's origin. + */ +static inline size_t __co_tail_ofs(const struct channel *c) +{ + return __b_peek_ofs(&c->buf, co_data(c)); +} +static inline char *__co_tail(const struct channel *c) +{ + return __b_peek(&c->buf, co_data(c)); +} +static inline size_t co_tail_ofs(const struct channel *c) +{ + return b_peek_ofs(&c->buf, co_data(c)); +} +static inline char *co_tail(const struct channel *c) +{ + return b_peek(&c->buf, co_data(c)); +} + + +/* ci_head() : returns a pointer to the beginning of input data in the buffer. + * The "__" variants don't support wrapping, "ofs" are relative to + * the buffer's origin. + */ +static inline size_t __ci_head_ofs(const struct channel *c) +{ + return __b_peek_ofs(&c->buf, co_data(c)); +} +static inline char *__ci_head(const struct channel *c) +{ + return __b_peek(&c->buf, co_data(c)); +} +static inline size_t ci_head_ofs(const struct channel *c) +{ + return b_peek_ofs(&c->buf, co_data(c)); +} +static inline char *ci_head(const struct channel *c) +{ + return b_peek(&c->buf, co_data(c)); +} + + +/* ci_tail() : returns a pointer to the end of input data in the buffer. + * The "__" variants don't support wrapping, "ofs" are relative to + * the buffer's origin. + */ +static inline size_t __ci_tail_ofs(const struct channel *c) +{ + return __b_peek_ofs(&c->buf, c_data(c)); +} +static inline char *__ci_tail(const struct channel *c) +{ + return __b_peek(&c->buf, c_data(c)); +} +static inline size_t ci_tail_ofs(const struct channel *c) +{ + return b_peek_ofs(&c->buf, c_data(c)); +} +static inline char *ci_tail(const struct channel *c) +{ + return b_peek(&c->buf, c_data(c)); +} + + +/* ci_stop() : returns the pointer to the byte following the end of input data + * in the channel buffer. It may be out of the buffer. It's used to + * compute lengths or stop pointers. + */ +static inline size_t __ci_stop_ofs(const struct channel *c) +{ + return __b_stop_ofs(&c->buf); +} +static inline const char *__ci_stop(const struct channel *c) +{ + return __b_stop(&c->buf); +} +static inline size_t ci_stop_ofs(const struct channel *c) +{ + return b_stop_ofs(&c->buf); +} +static inline const char *ci_stop(const struct channel *c) +{ + return b_stop(&c->buf); +} + + +/* Returns the amount of input data that can contiguously be read at once */ +static inline size_t ci_contig_data(const struct channel *c) +{ + return b_contig_data(&c->buf, co_data(c)); +} + +/* Initialize all fields in the channel. */ +static inline void channel_init(struct channel *chn) +{ + chn->buf = BUF_NULL; + chn->to_forward = 0; + chn->last_read = now_ms; + chn->xfer_small = chn->xfer_large = 0; + chn->total = 0; + chn->analysers = 0; + chn->flags = 0; + chn->output = 0; +} + +/* Schedule up to <bytes> more bytes to be forwarded via the channel without + * notifying the owner task. Any data pending in the buffer are scheduled to be + * sent as well, in the limit of the number of bytes to forward. This must be + * the only method to use to schedule bytes to be forwarded. If the requested + * number is too large, it is automatically adjusted. The number of bytes taken + * into account is returned. Directly touching ->to_forward will cause lockups + * when buf->o goes down to zero if nobody is ready to push the remaining data. + */ +static inline unsigned long long channel_forward(struct channel *chn, unsigned long long bytes) +{ + /* hint: avoid comparisons on long long for the fast case, since if the + * length does not fit in an unsigned it, it will never be forwarded at + * once anyway. + */ + if (bytes <= ~0U) { + unsigned int bytes32 = bytes; + + if (bytes32 <= ci_data(chn)) { + /* OK this amount of bytes might be forwarded at once */ + c_adv(chn, bytes32); + return bytes; + } + } + return __channel_forward(chn, bytes); +} + +/* Forwards any input data and marks the channel for permanent forwarding */ +static inline void channel_forward_forever(struct channel *chn) +{ + c_adv(chn, ci_data(chn)); + chn->to_forward = CHN_INFINITE_FORWARD; +} + +/* <len> bytes of input data was added into the channel <chn>. This functions + * must be called to update the channel state. It also handles the fast + * forwarding. */ +static inline void channel_add_input(struct channel *chn, unsigned int len) +{ + if (chn->to_forward) { + unsigned long fwd = len; + if (chn->to_forward != CHN_INFINITE_FORWARD) { + if (fwd > chn->to_forward) + fwd = chn->to_forward; + chn->to_forward -= fwd; + } + c_adv(chn, fwd); + } + /* notify that some data was read */ + chn->total += len; + chn->flags |= CF_READ_EVENT; +} + +static inline unsigned long long channel_htx_forward(struct channel *chn, struct htx *htx, unsigned long long bytes) +{ + unsigned long long ret = 0; + + if (htx->data) { + b_set_data(&chn->buf, htx->data); + ret = channel_forward(chn, bytes); + b_set_data(&chn->buf, b_size(&chn->buf)); + } + return ret; +} + + +static inline void channel_htx_forward_forever(struct channel *chn, struct htx *htx) +{ + c_adv(chn, htx->data - co_data(chn)); + chn->to_forward = CHN_INFINITE_FORWARD; +} +/*********************************************************************/ +/* These functions are used to compute various channel content sizes */ +/*********************************************************************/ + +/* Returns non-zero if the channel is rewritable, which means that the buffer + * it is attached to has at least <maxrewrite> bytes immediately available. + * This is used to decide when a request or response may be parsed when some + * data from a previous exchange might still be present. + */ +static inline int channel_is_rewritable(const struct channel *chn) +{ + int rem = chn->buf.size; + + rem -= b_data(&chn->buf); + rem -= global.tune.maxrewrite; + return rem >= 0; +} + +/* Tells whether data are likely to leave the buffer. This is used to know when + * we can safely ignore the reserve since we know we cannot retry a connection. + * It returns zero if data are blocked, non-zero otherwise. + */ +static inline int channel_may_send(const struct channel *chn) +{ + return chn_cons(chn)->state == SC_ST_EST; +} + +/* HTX version of channel_may_recv(). Returns non-zero if the channel can still + * receive data. */ +static inline int channel_htx_may_recv(const struct channel *chn, const struct htx *htx) +{ + uint32_t rem; + + if (!htx->size) + return 1; + + rem = htx_free_data_space(htx); + if (!rem) + return 0; /* htx already full */ + + if (rem > global.tune.maxrewrite) + return 1; /* reserve not yet reached */ + + if (!channel_may_send(chn)) + return 0; /* don't touch reserve until we can send */ + + /* Now we know there's some room left in the reserve and we may + * forward. As long as i-to_fwd < size-maxrw, we may still + * receive. This is equivalent to i+maxrw-size < to_fwd, + * which is logical since i+maxrw-size is what overlaps with + * the reserve, and we want to ensure they're covered by scheduled + * forwards. + */ + rem += co_data(chn); + if (rem > global.tune.maxrewrite) + return 1; + + return (global.tune.maxrewrite - rem < chn->to_forward); +} + +/* Returns non-zero if the channel can still receive data. This is used to + * decide when to stop reading into a buffer when we want to ensure that we + * leave the reserve untouched after all pending outgoing data are forwarded. + * The reserved space is taken into account if ->to_forward indicates that an + * end of transfer is close to happen. Note that both ->buf.o and ->to_forward + * are considered as available since they're supposed to leave the buffer. The + * test is optimized to avoid as many operations as possible for the fast case + * and to be used as an "if" condition. Just like channel_recv_limit(), we + * never allow to overwrite the reserve until the output stream connector is + * connected, otherwise we could spin on a POST with http-send-name-header. + */ +static inline int channel_may_recv(const struct channel *chn) +{ + int rem = chn->buf.size; + + if (IS_HTX_STRM(chn_strm(chn))) + return channel_htx_may_recv(chn, htxbuf(&chn->buf)); + + if (b_is_null(&chn->buf)) + return 1; + + rem -= b_data(&chn->buf); + if (!rem) + return 0; /* buffer already full */ + + if (rem > global.tune.maxrewrite) + return 1; /* reserve not yet reached */ + + if (!channel_may_send(chn)) + return 0; /* don't touch reserve until we can send */ + + /* Now we know there's some room left in the reserve and we may + * forward. As long as i-to_fwd < size-maxrw, we may still + * receive. This is equivalent to i+maxrw-size < to_fwd, + * which is logical since i+maxrw-size is what overlaps with + * the reserve, and we want to ensure they're covered by scheduled + * forwards. + */ + rem = ci_data(chn) + global.tune.maxrewrite - chn->buf.size; + return rem < 0 || (unsigned int)rem < chn->to_forward; +} + +/* Returns true if the channel's input is already closed */ +static inline int channel_input_closed(struct channel *chn) +{ + return ((chn_prod(chn)->flags & (SC_FL_ABRT_DONE|SC_FL_EOS)) != 0); +} + +/* Returns true if the channel's output is already closed */ +static inline int channel_output_closed(struct channel *chn) +{ + return ((chn_cons(chn)->flags & SC_FL_SHUT_DONE) != 0); +} + +/* Check channel timeouts, and set the corresponding flags. */ +static inline void channel_check_timeout(struct channel *chn) +{ + if (likely(!(chn->flags & CF_READ_EVENT)) && unlikely(tick_is_expired(chn->analyse_exp, now_ms))) + chn->flags |= CF_READ_EVENT; +} + + +/* Erase any content from channel <buf> and adjusts flags accordingly. Note + * that any spliced data is not affected since we may not have any access to + * it. + */ +static inline void channel_erase(struct channel *chn) +{ + chn->to_forward = 0; + chn->output = 0; + b_reset(&chn->buf); +} + +static inline void channel_htx_erase(struct channel *chn, struct htx *htx) +{ + htx_reset(htx); + channel_erase(chn); +} + + +/* marks the channel as "shutdown" ASAP in both directions */ +static inline void channel_abort(struct channel *chn) +{ + chn_prod(chn)->flags |= SC_FL_ABRT_WANTED; + chn_cons(chn)->flags |= SC_FL_SHUT_WANTED; + chn->flags |= CF_AUTO_CLOSE; + chn->flags &= ~CF_AUTO_CONNECT; +} + +/* allow the consumer to try to establish a new connection. */ +static inline void channel_auto_connect(struct channel *chn) +{ + chn->flags |= CF_AUTO_CONNECT; +} + +/* prevent the consumer from trying to establish a new connection, and also + * disable auto shutdown forwarding. + */ +static inline void channel_dont_connect(struct channel *chn) +{ + chn->flags &= ~(CF_AUTO_CONNECT|CF_AUTO_CLOSE); +} + +/* allow the producer to forward shutdown requests */ +static inline void channel_auto_close(struct channel *chn) +{ + chn->flags |= CF_AUTO_CLOSE; +} + +/* prevent the producer from forwarding shutdown requests */ +static inline void channel_dont_close(struct channel *chn) +{ + chn->flags &= ~CF_AUTO_CLOSE; +} + +/* allow the producer to read / poll the input */ +static inline void channel_auto_read(struct channel *chn) +{ + chn->flags &= ~CF_DONT_READ; +} + +/* prevent the producer from read / poll the input */ +static inline void channel_dont_read(struct channel *chn) +{ + chn->flags |= CF_DONT_READ; +} + + +/*************************************************/ +/* Buffer operations in the context of a channel */ +/*************************************************/ + + +/* Return the max number of bytes the buffer can contain so that once all the + * pending bytes are forwarded, the buffer still has global.tune.maxrewrite + * bytes free. The result sits between chn->size - maxrewrite and chn->size. + * It is important to mention that if buf->i is already larger than size-maxrw + * the condition above cannot be satisfied and the lowest size will be returned + * anyway. The principles are the following : + * 0) the empty buffer has a limit of zero + * 1) a non-connected buffer cannot touch the reserve + * 2) infinite forward can always fill the buffer since all data will leave + * 3) all output bytes are considered in transit since they're leaving + * 4) all input bytes covered by to_forward are considered in transit since + * they'll be converted to output bytes. + * 5) all input bytes not covered by to_forward as considered remaining + * 6) all bytes scheduled to be forwarded minus what is already in the input + * buffer will be in transit during future rounds. + * 7) 4+5+6 imply that the amount of input bytes (i) is irrelevant to the max + * usable length, only to_forward and output count. The difference is + * visible when to_forward > i. + * 8) the reserve may be covered up to the amount of bytes in transit since + * these bytes will only take temporary space. + * + * A typical buffer looks like this : + * + * <-------------- max_len -----------> + * <---- o ----><----- i -----> <--- 0..maxrewrite ---> + * +------------+--------------+-------+----------------------+ + * |////////////|\\\\\\\\\\\\\\|xxxxxxx| reserve | + * +------------+--------+-----+-------+----------------------+ + * <- fwd -> <-avail-> + * + * Or when to_forward > i : + * + * <-------------- max_len -----------> + * <---- o ----><----- i -----> <--- 0..maxrewrite ---> + * +------------+--------------+-------+----------------------+ + * |////////////|\\\\\\\\\\\\\\|xxxxxxx| reserve | + * +------------+--------+-----+-------+----------------------+ + * <-avail-> + * <------------------ fwd ----------------> + * + * - the amount of buffer bytes in transit is : min(i, fwd) + o + * - some scheduled bytes may be in transit (up to fwd - i) + * - the reserve is max(0, maxrewrite - transit) + * - the maximum usable buffer length is size - reserve. + * - the available space is max_len - i - o + * + * So the formula to compute the buffer's maximum length to protect the reserve + * when reading new data is : + * + * max = size - maxrewrite + min(maxrewrite, transit) + * = size - max(maxrewrite - transit, 0) + * + * But WARNING! The conditions might change during the transfer and it could + * very well happen that a buffer would contain more bytes than max_len due to + * i+o already walking over the reserve (eg: after a header rewrite), including + * i or o alone hitting the limit. So it is critical to always consider that + * bounds may have already been crossed and that available space may be negative + * for example. Due to this it is perfectly possible for this function to return + * a value that is lower than current i+o. + */ +static inline int channel_recv_limit(const struct channel *chn) +{ + unsigned int transit; + int reserve; + + /* return zero if empty */ + reserve = chn->buf.size; + if (b_is_null(&chn->buf)) + goto end; + + /* return size - maxrewrite if we can't send */ + reserve = global.tune.maxrewrite; + if (unlikely(!channel_may_send(chn))) + goto end; + + /* We need to check what remains of the reserve after o and to_forward + * have been transmitted, but they can overflow together and they can + * cause an integer underflow in the comparison since both are unsigned + * while maxrewrite is signed. + * The code below has been verified for being a valid check for this : + * - if (o + to_forward) overflow => return size [ large enough ] + * - if o + to_forward >= maxrw => return size [ large enough ] + * - otherwise return size - (maxrw - (o + to_forward)) + */ + transit = co_data(chn) + chn->to_forward; + reserve -= transit; + if (transit < chn->to_forward || // addition overflow + transit >= (unsigned)global.tune.maxrewrite) // enough transit data + return chn->buf.size; + end: + return chn->buf.size - reserve; +} + +/* HTX version of channel_recv_limit(). Return the max number of bytes the HTX + * buffer can contain so that once all the pending bytes are forwarded, the + * buffer still has global.tune.maxrewrite bytes free. + */ +static inline int channel_htx_recv_limit(const struct channel *chn, const struct htx *htx) +{ + unsigned int transit; + int reserve; + + /* return zeor if not allocated */ + if (!htx->size) + return 0; + + /* return max_data_space - maxrewrite if we can't send */ + reserve = global.tune.maxrewrite; + if (unlikely(!channel_may_send(chn))) + goto end; + + /* We need to check what remains of the reserve after o and to_forward + * have been transmitted, but they can overflow together and they can + * cause an integer underflow in the comparison since both are unsigned + * while maxrewrite is signed. + * The code below has been verified for being a valid check for this : + * - if (o + to_forward) overflow => return htx->size [ large enough ] + * - if o + to_forward >= maxrw => return htx->size [ large enough ] + * - otherwise return htx->size - (maxrw - (o + to_forward)) + */ + transit = co_data(chn) + chn->to_forward; + reserve -= transit; + if (transit < chn->to_forward || // addition overflow + transit >= (unsigned)global.tune.maxrewrite) // enough transit data + return htx->size; + end: + return (htx->size - reserve); +} + +/* HTX version of channel_full(). Instead of checking if INPUT data exceeds + * (size - reserve), this function checks if the free space for data in <htx> + * and the data scheduled for output are lower to the reserve. In such case, the + * channel is considered as full. + */ +static inline int channel_htx_full(const struct channel *c, const struct htx *htx, + unsigned int reserve) +{ + if (!htx->size) + return 0; + return (htx_free_data_space(htx) + co_data(c) <= reserve); +} + +/* Returns non-zero if the channel's INPUT buffer's is considered full, which + * means that it holds at least as much INPUT data as (size - reserve). This + * also means that data that are scheduled for output are considered as potential + * free space, and that the reserved space is always considered as not usable. + * This information alone cannot be used as a general purpose free space indicator. + * However it accurately indicates that too many data were fed in the buffer + * for an analyzer for instance. See the channel_may_recv() function for a more + * generic function taking everything into account. + */ +static inline int channel_full(const struct channel *c, unsigned int reserve) +{ + if (b_is_null(&c->buf)) + return 0; + + if (IS_HTX_STRM(chn_strm(c))) + return channel_htx_full(c, htxbuf(&c->buf), reserve); + + return (ci_data(c) + reserve >= c_size(c)); +} + +/* HTX version of channel_recv_max(). */ +static inline int channel_htx_recv_max(const struct channel *chn, const struct htx *htx) +{ + int ret; + + ret = channel_htx_recv_limit(chn, htx) - htx_used_space(htx); + if (ret < 0) + ret = 0; + return ret; +} + +/* Returns the amount of space available at the input of the buffer, taking the + * reserved space into account if ->to_forward indicates that an end of transfer + * is close to happen. The test is optimized to avoid as many operations as + * possible for the fast case. + */ +static inline int channel_recv_max(const struct channel *chn) +{ + int ret; + + if (IS_HTX_STRM(chn_strm(chn))) + return channel_htx_recv_max(chn, htxbuf(&chn->buf)); + + ret = channel_recv_limit(chn) - b_data(&chn->buf); + if (ret < 0) + ret = 0; + return ret; +} + +/* Returns the maximum absolute amount of data that can be copied in a channel, + * taking the reserved space into account but also the HTX overhead for HTX + * streams. + */ +static inline size_t channel_data_limit(const struct channel *chn) +{ + size_t max = (global.tune.bufsize - global.tune.maxrewrite); + + if (IS_HTX_STRM(chn_strm(chn))) + max -= HTX_BUF_OVERHEAD; + return max; +} + +/* Returns the amount of data in a channel, taking the HTX streams into + * account. For raw channels, it is equivalent to c_data. For HTX channels, we + * rely on the HTX api. + */ +static inline size_t channel_data(const struct channel *chn) +{ + return (IS_HTX_STRM(chn_strm(chn)) ? htx_used_space(htxbuf(&chn->buf)) : c_data(chn)); +} + +/* Returns the amount of input data in a channel, taking he HTX streams into + * account. This function relies on channel_data(). + */ +static inline size_t channel_input_data(const struct channel *chn) +{ + return channel_data(chn) - co_data(chn); +} + +/* Returns 1 if the channel is empty, taking he HTX streams into account */ +static inline size_t channel_empty(const struct channel *chn) +{ + return (IS_HTX_STRM(chn) ? htx_is_empty(htxbuf(&chn->buf)) : c_empty(chn)); +} + + +/* Returns the amount of bytes that can be written over the input data at once, + * including reserved space which may be overwritten. This is used by Lua to + * insert data in the input side just before the other data using buffer_replace(). + * The goal is to transfer these new data in the output buffer. + */ +static inline int ci_space_for_replace(const struct channel *chn) +{ + const struct buffer *buf = &chn->buf; + const char *end; + + /* If the input side data overflows, we cannot insert data contiguously. */ + if (b_head(buf) + b_data(buf) >= b_wrap(buf)) + return 0; + + /* Check the last byte used in the buffer, it may be a byte of the output + * side if the buffer wraps, or its the end of the buffer. + */ + end = b_head(buf); + if (end <= ci_head(chn)) + end = b_wrap(buf); + + /* Compute the amount of bytes which can be written. */ + return end - ci_tail(chn); +} + +/* Allocates a buffer for channel <chn>. Returns 0 in case of failure, non-zero + * otherwise. + * + * If no buffer are available, the requester, represented by <wait> pointer, + * will be added in the list of objects waiting for an available buffer. + */ +static inline int channel_alloc_buffer(struct channel *chn, struct buffer_wait *wait) +{ + if (b_alloc(&chn->buf) != NULL) + return 1; + + if (!LIST_INLIST(&wait->list)) + LIST_APPEND(&th_ctx->buffer_wq, &wait->list); + + return 0; +} + +/* Releases a possibly allocated buffer for channel <chn>. If it was not + * allocated, this function does nothing. Else the buffer is released and we try + * to wake up as many streams/applets as possible. */ +static inline void channel_release_buffer(struct channel *chn, struct buffer_wait *wait) +{ + if (c_size(chn) && c_empty(chn)) { + b_free(&chn->buf); + offer_buffers(wait->target, 1); + } +} + +/* Truncate any unread data in the channel's buffer, and disable forwarding. + * Outgoing data are left intact. This is mainly to be used to send error + * messages after existing data. + */ +static inline void channel_truncate(struct channel *chn) +{ + if (!co_data(chn)) + return channel_erase(chn); + + chn->to_forward = 0; + if (!ci_data(chn)) + return; + + chn->buf.data = co_data(chn); +} + +static inline void channel_htx_truncate(struct channel *chn, struct htx *htx) +{ + if (!co_data(chn)) + return channel_htx_erase(chn, htx); + + chn->to_forward = 0; + if (htx->data == co_data(chn)) + return; + htx_truncate(htx, co_data(chn)); +} + +/* This function realigns a possibly wrapping channel buffer so that the input + * part is contiguous and starts at the beginning of the buffer and the output + * part ends at the end of the buffer. This provides the best conditions since + * it allows the largest inputs to be processed at once and ensures that once + * the output data leaves, the whole buffer is available at once. + */ +static inline void channel_slow_realign(struct channel *chn, char *swap) +{ + return b_slow_realign(&chn->buf, swap, co_data(chn)); +} + + +/* Forward all headers of an HTX message, starting from the SL to the EOH. This + * function returns the position of the block after the EOH, if + * found. Otherwise, it returns -1. + */ +static inline int32_t channel_htx_fwd_headers(struct channel *chn, struct htx *htx) +{ + int32_t pos; + size_t data = 0; + + for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) { + struct htx_blk *blk = htx_get_blk(htx, pos); + data += htx_get_blksz(blk); + if (htx_get_blk_type(blk) == HTX_BLK_EOH) { + pos = htx_get_next(htx, pos); + break; + } + } + c_adv(chn, data); + return pos; +} + +/* + * Advance the channel buffer's read pointer by <len> bytes. This is useful + * when data have been read directly from the buffer. It is illegal to call + * this function with <len> causing a wrapping at the end of the buffer. It's + * the caller's responsibility to ensure that <len> is never larger than + * chn->o. + */ +static inline void co_skip(struct channel *chn, int len) +{ + BUG_ON_HOT(len > chn->output); + b_del(&chn->buf, len); + chn->output -= len; + c_realign_if_empty(chn); +} + +/* HTX version of co_skip(). This function skips at most <len> bytes from the + * output of the channel <chn>. Depending on how data are stored in <htx> less + * than <len> bytes can be skipped.. + */ +static inline void co_htx_skip(struct channel *chn, struct htx *htx, int len) +{ + struct htx_ret htxret; + + htxret = htx_drain(htx, len); + if (htxret.ret) { + BUG_ON_HOT(htxret.ret > chn->output); + chn->output -= htxret.ret; + } +} + +/* Tries to copy chunk <chunk> into the channel's buffer after length controls. + * The chn->o and to_forward pointers are updated. If the channel's input is + * closed, -2 is returned. If the block is too large for this buffer, -3 is + * returned. If there is not enough room left in the buffer, -1 is returned. + * Otherwise the number of bytes copied is returned (0 being a valid number). + * Channel flag READ_PARTIAL is updated if some data can be transferred. The + * chunk's length is updated with the number of bytes sent. + */ +static inline int ci_putchk(struct channel *chn, struct buffer *chunk) +{ + int ret; + + ret = ci_putblk(chn, chunk->area, chunk->data); + if (ret > 0) + chunk->data -= ret; + return ret; +} + +/* Tries to copy string <str> at once into the channel's buffer after length + * controls. The chn->o and to_forward pointers are updated. If the channel's + * input is closed, -2 is returned. If the block is too large for this buffer, + * -3 is returned. If there is not enough room left in the buffer, -1 is + * returned. Otherwise the number of bytes copied is returned (0 being a valid + * number). Channel flag READ_PARTIAL is updated if some data can be + * transferred. + */ +static inline int ci_putstr(struct channel *chn, const char *str) +{ + return ci_putblk(chn, str, strlen(str)); +} + +/* + * Return one char from the channel's buffer. If the buffer is empty and the + * channel is closed, return -2. If the buffer is just empty, return -1. The + * buffer's pointer is not advanced, it's up to the caller to call co_skip(buf, + * 1) when it has consumed the char. Also note that this function respects the + * chn->o limit. + */ +static inline int co_getchr(struct channel *chn) +{ + /* closed or empty + imminent close = -2; empty = -1 */ + if (unlikely((chn_cons(chn)->flags & SC_FL_SHUT_DONE) || !co_data(chn))) { + if (chn_cons(chn)->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) + return -2; + return -1; + } + return *co_head(chn); +} + +#endif /* _HAPROXY_CHANNEL_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/check-t.h b/include/haproxy/check-t.h new file mode 100644 index 0000000..eb080a9 --- /dev/null +++ b/include/haproxy/check-t.h @@ -0,0 +1,198 @@ +/* + * include/haproxy/check-t.h + * Health-checks definitions, enums, macros and bitfields. + * + * Copyright 2008-2009 Krzysztof Piotr Oledzki <ole@ans.pl> + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_CHECKS_T_H +#define _HAPROXY_CHECKS_T_H + +#include <sys/time.h> + +#include <import/ebtree-t.h> +#include <import/ist.h> +#include <haproxy/api-t.h> +#include <haproxy/buf-t.h> +#include <haproxy/connection-t.h> +#include <haproxy/dynbuf-t.h> +#include <haproxy/obj_type-t.h> +#include <haproxy/vars-t.h> + +/* Please note: this file tends to commonly be part of circular dependencies, + * so it is important to keep its includes list to the minimum possible (i.e. + * only types whose size needs to be known). Since there are no function + * prototypes nor pointers here, forward declarations are not really necessary. + * This file oughtt to be split into multiple parts, at least regular checks vs + * tcp-checks. + */ + +/* enum used by check->result. Must remain in this order, as some code uses + * result >= CHK_RES_PASSED to declare success. + */ +enum chk_result { + CHK_RES_UNKNOWN = 0, /* initialized to this by default */ + CHK_RES_NEUTRAL, /* valid check but no status information */ + CHK_RES_FAILED, /* check failed */ + CHK_RES_PASSED, /* check succeeded and server is fully up again */ + CHK_RES_CONDPASS, /* check reports the server doesn't want new sessions */ +}; + +/* flags used by check->state */ +#define CHK_ST_INPROGRESS 0x0001 /* a check is currently running */ +#define CHK_ST_CONFIGURED 0x0002 /* this check is configured and may be enabled */ +#define CHK_ST_ENABLED 0x0004 /* this check is currently administratively enabled */ +#define CHK_ST_PAUSED 0x0008 /* checks are paused because of maintenance (health only) */ +#define CHK_ST_AGENT 0x0010 /* check is an agent check (otherwise it's a health check) */ +#define CHK_ST_PORT_MISS 0x0020 /* check can't be send because no port is configured to run it */ +#define CHK_ST_IN_ALLOC 0x0040 /* check blocked waiting for input buffer allocation */ +#define CHK_ST_OUT_ALLOC 0x0080 /* check blocked waiting for output buffer allocation */ +#define CHK_ST_CLOSE_CONN 0x0100 /* check is waiting that the connection gets closed */ +#define CHK_ST_PURGE 0x0200 /* check must be freed */ +#define CHK_ST_FASTINTER 0x0400 /* force fastinter check */ +#define CHK_ST_READY 0x0800 /* check ready to migrate or run, see below */ +#define CHK_ST_SLEEPING 0x1000 /* check was sleeping, i.e. not currently bound to a thread, see below */ + +/* 4 possible states for CHK_ST_SLEEPING and CHK_ST_READY: + * SLP RDY State Description + * 0 0 QUEUED Check is in queue due to concurrency limit + * 0 1 RUNNING Check is bound to current thread and running + * 1 0 SLEEPING Check is sleeping, not bound to a thread + * 1 1 MIGRATING Check is migrating to another thread + */ + +/* check status */ +enum healthcheck_status { + HCHK_STATUS_UNKNOWN = 0, /* Unknown */ + HCHK_STATUS_INI, /* Initializing */ + HCHK_STATUS_START, /* Check started - SPECIAL STATUS */ + + /* Below we have finished checks */ + HCHK_STATUS_CHECKED, /* DUMMY STATUS */ + + HCHK_STATUS_HANA, /* Health analyze detected enough consecutive errors */ + + HCHK_STATUS_SOCKERR, /* Socket error */ + + HCHK_STATUS_L4OK, /* L4 check passed, for example tcp connect */ + HCHK_STATUS_L4TOUT, /* L4 timeout */ + HCHK_STATUS_L4CON, /* L4 connection problem, for example: */ + /* "Connection refused" (tcp rst) or "No route to host" (icmp) */ + + HCHK_STATUS_L6OK, /* L6 check passed */ + HCHK_STATUS_L6TOUT, /* L6 (SSL) timeout */ + HCHK_STATUS_L6RSP, /* L6 invalid response - protocol error */ + + HCHK_STATUS_L7TOUT, /* L7 (HTTP/SMTP) timeout */ + HCHK_STATUS_L7RSP, /* L7 invalid response - protocol error */ + + /* Below we have layer 5-7 data available */ + HCHK_STATUS_L57DATA, /* DUMMY STATUS */ + HCHK_STATUS_L7OKD, /* L7 check passed */ + HCHK_STATUS_L7OKCD, /* L7 check conditionally passed */ + HCHK_STATUS_L7STS, /* L7 response error, for example HTTP 5xx */ + + HCHK_STATUS_PROCERR, /* External process check failure */ + HCHK_STATUS_PROCTOUT, /* External process check timeout */ + HCHK_STATUS_PROCOK, /* External process check passed */ + + HCHK_STATUS_SIZE +}; + +/* health status for response tracking */ +enum { + HANA_STATUS_UNKNOWN = 0, + + HANA_STATUS_L4_OK, /* L4 successful connection */ + HANA_STATUS_L4_ERR, /* L4 unsuccessful connection */ + + HANA_STATUS_HTTP_OK, /* Correct http response */ + HANA_STATUS_HTTP_STS, /* Wrong http response, for example HTTP 5xx */ + HANA_STATUS_HTTP_HDRRSP, /* Invalid http response (headers) */ + HANA_STATUS_HTTP_RSP, /* Invalid http response */ + + HANA_STATUS_HTTP_READ_ERROR, /* Read error */ + HANA_STATUS_HTTP_READ_TIMEOUT, /* Read timeout */ + HANA_STATUS_HTTP_BROKEN_PIPE, /* Unexpected close from server */ + + HANA_STATUS_SIZE +}; + +enum { + HANA_ONERR_UNKNOWN = 0, + + HANA_ONERR_FASTINTER, /* Force fastinter*/ + HANA_ONERR_FAILCHK, /* Simulate a failed check */ + HANA_ONERR_SUDDTH, /* Enters sudden death - one more failed check will mark this server down */ + HANA_ONERR_MARKDWN, /* Mark this server down, now! */ +}; + +enum { + HANA_ONMARKEDDOWN_NONE = 0, + HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS, /* Shutdown peer sessions */ +}; + +enum { + HANA_ONMARKEDUP_NONE = 0, + HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS, /* Shutdown peer sessions */ +}; + +enum { + HANA_OBS_NONE = 0, + + HANA_OBS_LAYER4, /* Observe L4 - for example tcp */ + HANA_OBS_LAYER7, /* Observe L7 - for example http */ + + HANA_OBS_SIZE +}; + +struct tcpcheck_rule; +struct tcpcheck_rules; + +struct check { + enum obj_type obj_type; /* object type == OBJ_TYPE_CHECK */ + struct session *sess; /* Health check session. */ + struct vars vars; /* Health check dynamic variables. */ + struct xprt_ops *xprt; /* transport layer operations for health checks */ + struct stconn *sc; /* stream connector used by health checks */ + struct buffer bi, bo; /* input and output buffers to send/recv check */ + struct buffer_wait buf_wait; /* Wait list for buffer allocation */ + struct task *task; /* the task associated to the health check processing, NULL if disabled */ + ullong start; /* last health check start time */ + long duration; /* time in ms took to finish last health check */ + short status, code; /* check result, check code */ + unsigned short port; /* the port to use for the health checks */ + char desc[HCHK_DESC_LEN]; /* health check description */ + signed char use_ssl; /* use SSL for health checks (1: on, 0: server mode, -1: off) */ + int send_proxy; /* send a PROXY protocol header with checks */ + struct tcpcheck_rules *tcpcheck_rules; /* tcp-check send / expect rules */ + struct tcpcheck_rule *current_step; /* current step when using tcpcheck */ + int inter, fastinter, downinter; /* checks: time in milliseconds */ + enum chk_result result; /* health-check result : CHK_RES_* */ + int state; /* state of the check : CHK_ST_* */ + int health; /* 0 to rise-1 = bad; + * rise to rise+fall-1 = good */ + int rise, fall; /* time in iterations */ + int type; /* Check type, one of PR_O2_*_CHK */ + struct server *server; /* back-pointer to server */ + struct proxy *proxy; /* proxy to be used */ + char **argv; /* the arguments to use if running a process-based check */ + char **envp; /* the environment to use if running a process-based check */ + struct pid_list *curpid; /* entry in pid_list used for current process-based test, or -1 if not in test */ + struct sockaddr_storage addr; /* the address to check */ + char *sni; /* Server name */ + char *alpn_str; /* ALPN to use for checks */ + int alpn_len; /* ALPN string length */ + const struct mux_proto_list *mux_proto; /* the mux to use for all outgoing connections (specified by the "proto" keyword) */ + struct list check_queue; /* entry in the check queue. Not empty = in queue. */ + int via_socks4; /* check the connection via socks4 proxy */ +}; + +#endif /* _HAPROXY_CHECKS_T_H */ diff --git a/include/haproxy/check.h b/include/haproxy/check.h new file mode 100644 index 0000000..c90d3e7 --- /dev/null +++ b/include/haproxy/check.h @@ -0,0 +1,131 @@ +/* + * include/haproxy/check.h + * Functions prototypes for the checks. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CHECKS_H +#define _HAPROXY_CHECKS_H + +#include <haproxy/check-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/server-t.h> +#include <haproxy/trace-t.h> + +extern struct trace_source trace_check; + +/* Details about these events are defined in <src/check.c> */ +#define CHK_EV_TASK_WAKE (1ULL << 0) +#define CHK_EV_HCHK_START (1ULL << 1) +#define CHK_EV_HCHK_WAKE (1ULL << 2) +#define CHK_EV_HCHK_RUN (1ULL << 3) +#define CHK_EV_HCHK_END (1ULL << 4) +#define CHK_EV_HCHK_SUCC (1ULL << 5) +#define CHK_EV_HCHK_ERR (1ULL << 6) +#define CHK_EV_HCHK (CHK_EV_HCHK_START|CHK_EV_HCHK_WAKE|CHK_EV_HCHK_RUN|\ + CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC|CHK_EV_HCHK_ERR) + +#define CHK_EV_TCPCHK_EVAL (1ULL << 7) +#define CHK_EV_TCPCHK_ERR (1ULL << 8) +#define CHK_EV_TCPCHK_CONN (1ULL << 9) +#define CHK_EV_TCPCHK_SND (1ULL << 10) +#define CHK_EV_TCPCHK_EXP (1ULL << 11) +#define CHK_EV_TCPCHK_ACT (1ULL << 12) +#define CHK_EV_TCPCHK (CHK_EV_TCPCHK_EVAL|CHK_EV_TCPCHK_ERR|CHK_EV_TCPCHK_CONN|\ + CHK_EV_TCPCHK_SND|CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ACT) + +#define CHK_EV_RX_DATA (1ULL << 13) +#define CHK_EV_RX_BLK (1ULL << 14) +#define CHK_EV_RX_ERR (1ULL << 15) +#define CHK_EV_RX (CHK_EV_RX_DATA|CHK_EV_RX_BLK|CHK_EV_RX_ERR) + +#define CHK_EV_TX_DATA (1ULL << 16) +#define CHK_EV_TX_BLK (1ULL << 17) +#define CHK_EV_TX_ERR (1ULL << 18) +#define CHK_EV_TX (CHK_EV_TX_DATA|CHK_EV_TX_BLK|CHK_EV_TX_ERR) + +extern struct data_cb check_conn_cb; +extern struct proxy checks_fe; + +short get_check_status_result(short check_status); +const char *get_check_status_description(short check_status); +const char *get_check_status_info(short check_status); +int httpchk_build_status_header(struct server *s, struct buffer *buf); +void __health_adjust(struct server *s, short status); +void check_append_info(struct buffer *msg, struct check *check); +void set_server_check_status(struct check *check, short status, const char *desc); +void chk_report_conn_err(struct check *check, int errno_bck, int expired); +void check_notify_failure(struct check *check); +void check_notify_stopping(struct check *check); +void check_notify_success(struct check *check); +struct task *process_chk(struct task *t, void *context, unsigned int state); + +struct task *srv_chk_io_cb(struct task *t, void *ctx, unsigned int state); + +int check_buf_available(void *target); +struct buffer *check_get_buf(struct check *check, struct buffer *bptr); +void check_release_buf(struct check *check, struct buffer *bptr); +const char *init_check(struct check *check, int type); +void free_check(struct check *check); +void check_purge(struct check *check); +int wake_srv_chk(struct stconn *sc); + +int init_srv_check(struct server *srv); +int init_srv_agent_check(struct server *srv); +int start_check_task(struct check *check, int mininter, int nbcheck, int srvpos); + +/* Declared here, but the definitions are in flt_spoe.c */ +int spoe_prepare_healthcheck_request(char **req, int *len); +int spoe_handle_healthcheck_response(char *frame, size_t size, char *err, int errlen); + +int set_srv_agent_send(struct server *srv, const char *send); + +/* set agent addr and appropriate flag */ +static inline void set_srv_agent_addr(struct server *srv, struct sockaddr_storage *sk) +{ + srv->agent.addr = *sk; + srv->flags |= SRV_F_AGENTADDR; +} + +/* set agent port and appropriate flag */ +static inline void set_srv_agent_port(struct server *srv, int port) +{ + srv->agent.port = port; + srv->flags |= SRV_F_AGENTPORT; +} + +/* Use this one only. This inline version only ensures that we don't + * call the function when the observe mode is disabled. + */ +static inline void health_adjust(struct server *s, short status) +{ + /* return now if observing nor health check is not enabled */ + if (!s->observe || !s->check.task) + return; + + __health_adjust(s, status); +} + +#endif /* _HAPROXY_CHECKS_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/chunk.h b/include/haproxy/chunk.h new file mode 100644 index 0000000..43c7270 --- /dev/null +++ b/include/haproxy/chunk.h @@ -0,0 +1,303 @@ +/* + * include/haproxy/chunk.h + * Chunk management definitions, macros and inline functions. + * + * Copyright (C) 2000-2012 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CHUNK_H +#define _HAPROXY_CHUNK_H + +#include <stdlib.h> +#include <string.h> + +#include <import/ist.h> +#include <haproxy/api.h> +#include <haproxy/buf-t.h> +#include <haproxy/pool.h> + + +extern struct pool_head *pool_head_trash; + +/* function prototypes */ + +int chunk_printf(struct buffer *chk, const char *fmt, ...) + __attribute__ ((format(printf, 2, 3))); + +int chunk_appendf(struct buffer *chk, const char *fmt, ...) + __attribute__ ((format(printf, 2, 3))); + +int chunk_htmlencode(struct buffer *dst, struct buffer *src); +int chunk_asciiencode(struct buffer *dst, struct buffer *src, char qc); +int chunk_strcmp(const struct buffer *chk, const char *str); +int chunk_strcasecmp(const struct buffer *chk, const char *str); +struct buffer *get_trash_chunk(void); +int init_trash_buffers(int first); + +static inline void chunk_reset(struct buffer *chk) +{ + chk->data = 0; +} + +static inline void chunk_init(struct buffer *chk, char *str, size_t size) +{ + chk->area = str; + chk->head = 0; + chk->data = 0; + chk->size = size; +} + +/* report 0 in case of error, 1 if OK. */ +static inline int chunk_initlen(struct buffer *chk, char *str, size_t size, + int len) +{ + + if (len < 0 || (size && len > size)) + return 0; + + chk->area = str; + chk->head = 0; + chk->data = len; + chk->size = size; + + return 1; +} + +/* this is only for temporary manipulation, the chunk is read-only */ +static inline void chunk_initstr(struct buffer *chk, const char *str) +{ + chk->area = (char *)str; + chk->head = 0; + chk->data = strlen(str); + chk->size = 0; /* mark it read-only */ +} + +/* + * Allocate a trash chunk from the reentrant pool. The buffer starts at the + * end of the chunk. This chunk must be freed using free_trash_chunk(). This + * call may fail and the caller is responsible for checking that the returned + * pointer is not NULL. + */ +static forceinline struct buffer *alloc_trash_chunk(void) +{ + struct buffer *chunk; + + chunk = pool_alloc(pool_head_trash); + if (chunk) { + char *buf = (char *)chunk + sizeof(struct buffer); + *buf = 0; + chunk_init(chunk, buf, + pool_head_trash->size - sizeof(struct buffer)); + } + return chunk; +} + +/* + * free a trash chunk allocated by alloc_trash_chunk(). NOP on NULL. + */ +static forceinline void free_trash_chunk(struct buffer *chunk) +{ + pool_free(pool_head_trash, chunk); +} + +/* copies chunk <src> into <chk>. Returns 0 in case of failure. */ +static inline int chunk_cpy(struct buffer *chk, const struct buffer *src) +{ + if (unlikely(src->data > chk->size)) + return 0; + + chk->data = src->data; + memcpy(chk->area, src->area, src->data); + return 1; +} + +/* copies memory area <src> into <chk> for <len> bytes. Returns 0 in + * case of failure. No trailing zero is added. + */ +static inline int chunk_memcpy(struct buffer *chk, const char *src, + size_t len) +{ + if (unlikely(len > chk->size)) + return 0; + + chk->data = len; + memcpy(chk->area, src, len); + + return 1; +} + +/* appends memory area <src> after <chk> for <len> bytes. Returns 0 in + * case of failure. No trailing zero is added. + */ +static inline int chunk_memcat(struct buffer *chk, const char *src, + size_t len) +{ + if (unlikely(chk->data + len > chk->size)) + return 0; + + memcpy(chk->area + chk->data, src, len); + chk->data += len; + return 1; +} + +/* appends ist <src> after <chk>. Returns 0 in case of failure. */ +static inline int chunk_istcat(struct buffer *chk, const struct ist src) +{ + return chunk_memcat(chk, istptr(src), istlen(src)); +} + +/* appends chunk <src> after <chk>. Returns 0 in case of failure. */ +static inline int chunk_cat(struct buffer *chk, const struct buffer *src) +{ + return chunk_memcat(chk, src->area, src->data); +} + +/* copies str into <chk> followed by a trailing zero. Returns 0 in + * case of failure. + */ +static inline int chunk_strcpy(struct buffer *chk, const char *str) +{ + size_t len; + + len = strlen(str); + + if (unlikely(len >= chk->size)) + return 0; + + chk->data = len; + memcpy(chk->area, str, len + 1); + + return 1; +} + +/* copies at most <max> chars from str into <chk> followed by a trailing zero. + * Returns 0 in case of failure. + */ +static inline int chunk_strncpy(struct buffer *chk, const char *str, size_t max) +{ + size_t len; + + len = strlen(str); + if (len > max) + len = max; + + if (unlikely(len >= chk->size)) + return 0; + + memcpy(chk->area, str, len); + chk->area[len] = 0; + chk->data = len; + return 1; +} + +/* appends str after <chk> followed by a trailing zero. Returns 0 in + * case of failure. + */ +static inline int chunk_strcat(struct buffer *chk, const char *str) +{ + size_t len; + + len = strlen(str); + + if (unlikely(chk->data + len >= chk->size)) + return 0; + + memcpy(chk->area + chk->data, str, len + 1); + chk->data += len; + return 1; +} + +/* Adds a trailing zero to the current chunk and returns the pointer to the + * following part. The purpose is to be able to use a chunk as a series of + * short independent strings with chunk_* functions, which do not need to be + * released. Returns NULL if no space is available to ensure that the new + * string will have its own trailing zero. For example : + * chunk_init(&trash); + * pid = chunk_newstr(&trash); + * chunk_appendf(&trash, "%d", getpid())); + * name = chunk_newstr(&trash); + * chunk_appendf(&trash, "%s", gethosname()); + * printf("hostname=<%s>, pid=<%d>\n", name, pid); + */ +static inline char *chunk_newstr(struct buffer *chk) +{ + if (chk->data + 1 >= chk->size) + return NULL; + + chk->area[chk->data++] = 0; + return chk->area + chk->data; +} + +static inline void chunk_drop(struct buffer *chk) +{ + chk->area = NULL; + chk->data = -1; + chk->size = 0; +} + +static inline void chunk_destroy(struct buffer *chk) +{ + if (!chk->size) + return; + + free(chk->area); + chunk_drop(chk); +} + +/* + * frees the destination chunk if already allocated, allocates a new string, + * and copies the source into it. The new chunk will have extra room for a + * trailing zero unless the source chunk was actually full. The pointer to + * the destination string is returned, or NULL if the allocation fails or if + * any pointer is NULL. + */ +static inline char *chunk_dup(struct buffer *dst, const struct buffer *src) +{ + if (!dst || !src || !src->area) + return NULL; + + if (dst->size) + free(dst->area); + dst->head = src->head; + dst->data = src->data; + dst->size = src->data; + if (dst->size < src->size || !src->size) + dst->size++; + + dst->area = malloc(dst->size); + if (!dst->area) { + dst->head = 0; + dst->data = 0; + dst->size = 0; + return NULL; + } + + memcpy(dst->area, src->area, dst->data); + if (dst->data < dst->size) + dst->area[dst->data] = 0; + + return dst->area; +} + +#endif /* _HAPROXY_CHUNK_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/cli-t.h b/include/haproxy/cli-t.h new file mode 100644 index 0000000..c155df3 --- /dev/null +++ b/include/haproxy/cli-t.h @@ -0,0 +1,100 @@ +/* + * include/haproxy/cli-t.h + * This file provides structures and types for CLI. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CLI_T_H +#define _HAPROXY_CLI_T_H + +#include <haproxy/applet-t.h> + +/* Access level for a stats socket (appctx->cli_level) */ +#define ACCESS_LVL_NONE 0x0000 +#define ACCESS_LVL_USER 0x0001 +#define ACCESS_LVL_OPER 0x0002 +#define ACCESS_LVL_ADMIN 0x0003 +#define ACCESS_LVL_MASK 0x0003 + +#define ACCESS_FD_LISTENERS 0x0004 /* expose listeners FDs on stats socket */ +#define ACCESS_MASTER 0x0008 /* works with the master (and every other processes) */ +#define ACCESS_MASTER_ONLY 0x0010 /* only works with the master */ +#define ACCESS_EXPERT 0x0020 /* access to dangerous commands reserved to experts */ +#define ACCESS_EXPERIMENTAL 0x0040 +#define ACCESS_MCLI_DEBUG 0x0080 /* allow the master CLI to use any command without the flag ACCESS_MASTER */ +#define ACCESS_MCLI_SEVERITY_NB 0x0100 /* 'set severity-output number' on master CLI */ +#define ACCESS_MCLI_SEVERITY_STR 0x0200 /* 'set severity-output string' on master CLI */ + +/* flags for appctx->st1 */ +#define APPCTX_CLI_ST1_PROMPT (1 << 0) +#define APPCTX_CLI_ST1_PAYLOAD (1 << 1) +#define APPCTX_CLI_ST1_NOLF (1 << 2) +#define APPCTX_CLI_ST1_TIMED (1 << 3) + +#define CLI_PREFIX_KW_NB 5 +#define CLI_MAX_MATCHES 5 +#define CLI_MAX_HELP_ENTRIES 1024 + +/* CLI states */ +enum { + CLI_ST_INIT = 0, /* initial state, must leave to zero ! */ + CLI_ST_END, /* final state, let's close */ + CLI_ST_GETREQ, /* wait for a request */ + CLI_ST_OUTPUT, /* all states after this one are responses */ + CLI_ST_PROMPT, /* display the prompt (first output, same code) */ + CLI_ST_PRINT, /* display const message in cli->msg */ + CLI_ST_PRINT_ERR, /* display const error in cli->msg */ + CLI_ST_PRINT_DYN, /* display dynamic message in cli->err. After the display, free the pointer */ + CLI_ST_PRINT_DYNERR, /* display dynamic error in cli->err. After the display, free the pointer */ + CLI_ST_PRINT_UMSG, /* display usermsgs_ctx buffer. After the display, usermsgs_ctx is reset. */ + CLI_ST_PRINT_UMSGERR, /* display usermsgs_ctx buffer as error. After the display, usermsgs_ctx is reset. */ + CLI_ST_CALLBACK, /* custom callback pointer */ +}; + +/* CLI severity output formats */ +enum { + CLI_SEVERITY_UNDEFINED = 0, /* undefined severity format */ + CLI_SEVERITY_NONE, /* no severity information prepended */ + CLI_SEVERITY_NUMBER, /* prepend informational cli messages with a severity as number */ + CLI_SEVERITY_STRING, /* prepend informational cli messages with a severity as string */ +}; + +/* CLI context for printing command responses. */ +struct cli_print_ctx { + const char *msg; /* pointer to a persistent message to be returned in CLI_ST_PRINT state */ + char *err; /* pointer to a 'must free' message to be returned in CLI_ST_PRINT_DYN state */ + int severity; /* severity of the message to be returned according to (syslog) rfc5424 */ +}; + +struct cli_kw { + const char *str_kw[CLI_PREFIX_KW_NB]; /* keywords ended by NULL, limited to CLI_PREFIX_KW_NB + separated keywords combination */ + const char *usage; /* usage message */ + int (*parse)(char **args, char *payload, struct appctx *appctx, void *private); + int (*io_handler)(struct appctx *appctx); + void (*io_release)(struct appctx *appctx); + void *private; + int level; /* this is the level needed to show the keyword usage and to use it */ +}; + +struct cli_kw_list { + struct list list; + struct cli_kw kw[VAR_ARRAY]; +}; + +#endif /* _HAPROXY_CLI_T_H */ diff --git a/include/haproxy/cli.h b/include/haproxy/cli.h new file mode 100644 index 0000000..32c6599 --- /dev/null +++ b/include/haproxy/cli.h @@ -0,0 +1,138 @@ +/* + * include/haproxy/cli.h + * This file contains definitions of some primitives to dedicated to + * statistics output. + * + * Copyright (C) 2000-2011 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CLI_H +#define _HAPROXY_CLI_H + +#include <haproxy/applet.h> +#include <haproxy/channel-t.h> +#include <haproxy/cli-t.h> +#include <haproxy/global.h> +#include <haproxy/mworker-t.h> +#include <haproxy/stream-t.h> + + +void cli_register_kw(struct cli_kw_list *kw_list); +struct cli_kw* cli_find_kw_exact(char **args); +void cli_list_keywords(void); + +int cli_has_level(struct appctx *appctx, int level); + +int cli_parse_default(char **args, char *payload, struct appctx *appctx, void *private); + +/* mworker proxy functions */ + +int mworker_cli_proxy_create(void); +struct bind_conf *mworker_cli_proxy_new_listener(char *line); +int mworker_cli_sockpair_new(struct mworker_proc *mworker_proc, int proc); +void mworker_cli_proxy_stop(void); + +extern struct bind_conf *mcli_reload_bind_conf; + +/* proxy mode cli functions */ + +/* analyzers */ +int pcli_wait_for_request(struct stream *s, struct channel *req, int an_bit); +int pcli_wait_for_response(struct stream *s, struct channel *rep, int an_bit); + +/* updates the CLI's context to log <msg> at <severity> and returns 1. This is + * for use in CLI parsers to deal with quick response messages. + */ +static inline int cli_msg(struct appctx *appctx, int severity, const char *msg) +{ + struct cli_print_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx)); + + ctx->severity = severity; + ctx->msg = msg; + appctx->st0 = CLI_ST_PRINT; + return 1; +} + +/* updates the CLI's context to log error message <err> and returns 1. The + * message will be logged at level LOG_ERR. This is for use in CLI parsers to + * deal with quick response messages. + */ +static inline int cli_err(struct appctx *appctx, const char *err) +{ + struct cli_print_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx)); + + ctx->msg = err; + appctx->st0 = CLI_ST_PRINT_ERR; + return 1; +} + +/* updates the CLI's context to log <msg> at <severity> and returns 1. The + * message must have been dynamically allocated and will be freed. This is + * for use in CLI parsers to deal with quick response messages. + */ +static inline int cli_dynmsg(struct appctx *appctx, int severity, char *msg) +{ + struct cli_print_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx)); + + ctx->severity = severity; + ctx->err = msg; + appctx->st0 = CLI_ST_PRINT_DYN; + return 1; +} + +/* updates the CLI's context to log error message <err> and returns 1. The + * message must have been dynamically allocated and will be freed. The message + * will be logged at level LOG_ERR. This is for use in CLI parsers to deal with + * quick response messages. + */ +static inline int cli_dynerr(struct appctx *appctx, char *err) +{ + struct cli_print_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx)); + + ctx->err = err; + appctx->st0 = CLI_ST_PRINT_DYNERR; + return 1; +} + +/* updates the CLI's context to log messages stored in thread-local + * usermsgs_ctx at <severity> level. usermsgs_ctx will be reset when done. + * This is for use in CLI parsers to deal with quick response messages. + * + * Always returns 1. + */ +static inline int cli_umsg(struct appctx *appctx, int severity) +{ + struct cli_print_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx)); + + ctx->severity = severity; + appctx->st0 = CLI_ST_PRINT_UMSG; + return 1; +} + +/* updates the CLI's context to log messages stored in thread-local + * usermsgs_ctx using error level. usermsgs_ctx will be reset when done. + * This is for use in CLI parsers to deal with quick response messages. + * + * Always returns 1. + */ +static inline int cli_umsgerr(struct appctx *appctx) +{ + appctx->st0 = CLI_ST_PRINT_UMSGERR; + return 1; +} + +#endif /* _HAPROXY_CLI_H */ diff --git a/include/haproxy/clock.h b/include/haproxy/clock.h new file mode 100644 index 0000000..264363e --- /dev/null +++ b/include/haproxy/clock.h @@ -0,0 +1,59 @@ +/* + * include/haproxy/clock.h + * Exported parts for time-keeping + * + * Copyright (C) 2000-2021 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CLOCK_H +#define _HAPROXY_CLOCK_H + +#include <sys/time.h> +#include <haproxy/api.h> + +extern struct timeval start_date; /* the process's start date in wall-clock time */ +extern struct timeval ready_date; /* date when the process was considered ready */ +extern ullong start_time_ns; /* the process's start date in internal monotonic time (ns) */ +extern volatile ullong global_now_ns; /* common monotonic date between all threads, in ns (wraps every 585 yr) */ + +extern THREAD_LOCAL ullong now_ns; /* internal monotonic date derived from real clock, in ns (wraps every 585 yr) */ +extern THREAD_LOCAL struct timeval date; /* the real current date (wall-clock time) */ + +uint64_t now_cpu_time_thread(int thr); +uint64_t now_mono_time(void); +uint64_t now_mono_time_fast(void); +uint64_t now_cpu_time(void); +uint64_t now_cpu_time_fast(void); +void clock_set_local_source(void); +void clock_update_local_date(int max_wait, int interrupted); +void clock_update_global_date(); +void clock_init_process_date(void); +void clock_init_thread_date(void); +int clock_setup_signal_timer(void *timer, int sig, int val); +char *timeofday_as_iso_us(int pad); +uint clock_report_idle(void); +void clock_leaving_poll(int timeout, int interrupted); +void clock_entering_poll(void); +void clock_adjust_now_offset(void); + +static inline void clock_update_date(int max_wait, int interrupted) +{ + clock_update_local_date(max_wait, interrupted); + clock_update_global_date(); +} + +#endif diff --git a/include/haproxy/compat.h b/include/haproxy/compat.h new file mode 100644 index 0000000..aa4f952 --- /dev/null +++ b/include/haproxy/compat.h @@ -0,0 +1,313 @@ +/* + * include/haproxy/compat.h + * Operating system compatibility interface. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_COMPAT_H +#define _HAPROXY_COMPAT_H + +#include <limits.h> +#include <unistd.h> +/* This is needed on Linux for Netfilter includes */ +#include <sys/param.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <netinet/in.h> +#include <netinet/tcp.h> + + +/* These are a few short names for commonly used types whose size and sometimes + * signedness depends on the architecture. Be careful not to rely on a few + * common but wrong assumptions: + * - char is not always signed (ARM, AARCH64, PPC) + * - long is not always large enough for a pointer (Windows) + * These types are needed with the standard C API (string.h, printf, syscalls). + * + * When a fixed size is needed (protocol interoperability), better use the + * standard types provided by stdint.h: + * - size_t : unsigned int of default word size, large enough for any + * object in memory + * - ssize_t : signed int of default word size, used by some syscalls + * - uintptr_t : an unsigned int large enough to store any pointer + * - ptrdiff_t : a signed int large enough to hold a distance between 2 ptrs + * - int<size>_t : a signed int of <size> bits (8,16,32,64 work everywhere) + * - uint<size>_t : an unsigned int of <size> bits + */ +typedef signed char schar; +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; +typedef unsigned long long ullong; +typedef long long llong; + + +/* set any optional field in a struct to this type to save ifdefs. Its address + * will still be valid but it will not reserve any room nor require any + * initialization. + */ +typedef struct { } empty_t; + +// Redefine some limits that are not present everywhere +#ifndef LLONG_MAX +# define LLONG_MAX 9223372036854775807LL +# define LLONG_MIN (-LLONG_MAX - 1LL) +#endif + +#ifndef ULLONG_MAX +# define ULLONG_MAX (LLONG_MAX * 2ULL + 1) +#endif + +#ifndef LONGBITS +#define LONGBITS ((unsigned int)sizeof(long) * 8) +#endif + +#ifndef BITS_PER_INT +#define BITS_PER_INT (8*sizeof(int)) +#endif + +#ifndef __WORDSIZE +# if defined(__SIZEOF_LONG__) && __SIZEOF_LONG__ == 4 +# define __WORDSIZE 32 +# elif defined(__SIZEOF_LONG__) && __SIZEOF_LONG__ == 8 +# define __WORDSIZE 64 +# else +# error "Unknown machine word size (__WORDSIZE, __SIZEOF_LONG)" +# endif +#endif + +#ifndef MIN +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif + +#ifndef MAX +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +/* this is for libc5 for example */ +#ifndef TCP_NODELAY +#define TCP_NODELAY 1 +#endif + +#ifndef SHUT_RD +#define SHUT_RD 0 +#endif + +#ifndef SHUT_WR +#define SHUT_WR 1 +#endif + +/* only Linux defines it */ +#ifndef MSG_NOSIGNAL +#define MSG_NOSIGNAL 0 +#endif + +/* AIX does not define MSG_DONTWAIT. We'll define it to zero, and test it + * wherever appropriate. + */ +#ifndef MSG_DONTWAIT +#define MSG_DONTWAIT 0 +#endif + +/* Only Linux defines MSG_MORE */ +#ifndef MSG_MORE +#define MSG_MORE 0 +#endif + +/* On Linux 2.4 and above, MSG_TRUNC can be used on TCP sockets to drop any + * pending data. Let's rely on NETFILTER to detect if this is supported. + */ +#ifdef USE_NETFILTER +#define MSG_TRUNC_CLEARS_INPUT +#endif + +/* Maximum path length, OS-dependant */ +#ifndef MAXPATHLEN +#define MAXPATHLEN 128 +#endif + +/* longest UNIX socket name */ +#ifndef UNIX_MAX_PATH +#define UNIX_MAX_PATH 108 +#endif + +/* On Linux, allows pipes to be resized */ +#ifndef F_SETPIPE_SZ +#define F_SETPIPE_SZ (1024 + 7) +#endif + +#if defined(USE_TPROXY) && defined(USE_NETFILTER) +#include <linux/types.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter_ipv4.h> +#endif + +/* On Linux, IP_TRANSPARENT and/or IP_FREEBIND generally require a kernel patch */ +#if defined(USE_LINUX_TPROXY) +#if !defined(IP_FREEBIND) +#define IP_FREEBIND 15 +#endif /* !IP_FREEBIND */ +#if !defined(IP_TRANSPARENT) +#define IP_TRANSPARENT 19 +#endif /* !IP_TRANSPARENT */ +#if !defined(IPV6_TRANSPARENT) +#define IPV6_TRANSPARENT 75 +#endif /* !IPV6_TRANSPARENT */ +#endif /* USE_LINUX_TPROXY */ + +#if defined(IP_FREEBIND) \ + || defined(IP_BINDANY) \ + || defined(IPV6_BINDANY) \ + || defined(SO_BINDANY) \ + || defined(IP_TRANSPARENT) \ + || defined(IPV6_TRANSPARENT) +#define CONFIG_HAP_TRANSPARENT +#endif + +/* We'll try to enable SO_REUSEPORT on Linux 2.4 and 2.6 if not defined. + * There are two families of values depending on the architecture. Those + * are at least valid on Linux 2.4 and 2.6, reason why we'll rely on the + * USE_NETFILTER define. + */ +#if !defined(SO_REUSEPORT) && defined(USE_NETFILTER) +#if defined(SO_REUSEADDR) && (SO_REUSEADDR == 2) +#define SO_REUSEPORT 15 +#elif defined(SO_REUSEADDR) && (SO_REUSEADDR == 0x0004) +#define SO_REUSEPORT 0x0200 +#endif /* SO_REUSEADDR */ +#endif /* SO_REUSEPORT */ + +/* only Linux defines TCP_FASTOPEN */ +#ifdef USE_TFO +#ifndef TCP_FASTOPEN +#define TCP_FASTOPEN 23 +#endif + +#ifndef TCP_FASTOPEN_CONNECT +#define TCP_FASTOPEN_CONNECT 30 +#endif +#endif + +/* If IPv6 is supported, define IN6_IS_ADDR_V4MAPPED() if missing. */ +#if defined(IPV6_TCLASS) && !defined(IN6_IS_ADDR_V4MAPPED) +#define IN6_IS_ADDR_V4MAPPED(a) \ +((((const uint32_t *) (a))[0] == 0) \ +&& (((const uint32_t *) (a))[1] == 0) \ +&& (((const uint32_t *) (a))[2] == htonl (0xffff))) +#endif + +#if defined(__dietlibc__) +#include <strings.h> +#endif + +/* crypt_r() has been present in glibc since 2.2 and on FreeBSD since 12.0 + * (12000002). No other OS makes any mention of it for now. Feel free to add + * valid known combinations below if needed to relax the crypt() lock when + * using threads. + */ +#if (defined(__GNU_LIBRARY__) && (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)) \ + || (defined(__FreeBSD__) && __FreeBSD_version >= 1200002) +#define HA_HAVE_CRYPT_R +#endif + +/* some backtrace() implementations are broken or incomplete, in this case we + * can replace them. We must not do it all the time as some are more accurate + * than ours. + */ +#ifdef USE_BACKTRACE +#if defined(__aarch64__) +/* on aarch64 at least from gcc-4.7.4 to 7.4.1 we only get a single entry, which + * is pointless. Ours works though it misses the faulty function itself, + * probably due to an alternate stack for the signal handler which does not + * create a new frame hence doesn't store the caller's return address. + */ +#elif defined(__clang__) && defined(__x86_64__) +/* this is on FreeBSD, clang 4.0 to 8.0 produce don't go further than the + * sighandler. + */ +#else +#define HA_HAVE_WORKING_BACKTRACE +#endif +#endif + +/* dl_iterate_phdr() is available in GLIBC 2.2.4 and up. Let's round up to 2.3.x */ +#if defined(USE_DL) && defined(__GNU_LIBRARY__) && (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 3) +#define HA_HAVE_DL_ITERATE_PHDR +#define HA_HAVE_DUMP_LIBS +#endif + +/* malloc_trim() can be very convenient to reclaim unused memory especially + * from huge pattern files. It's available (and really usable) in glibc 2.8 and + * above. + */ +#if (defined(__GNU_LIBRARY__) && (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 8)) +#include <malloc.h> +#define HA_HAVE_MALLOC_TRIM +#endif + +/* glibc 2.26 includes a thread-local cache which makes it fast enough in threads */ +#if (defined(__GNU_LIBRARY__) && (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 26)) +#include <malloc.h> +#define HA_HAVE_FAST_MALLOC +#endif + +/* glibc 2.33 provides mallinfo2() that overcomes mallinfo()'s type limitations */ +#if (defined(__GNU_LIBRARY__) && (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 33)) +#include <malloc.h> +#define HA_HAVE_MALLINFO2 +#endif + +/* FreeBSD also has malloc_usable_size() but it requires malloc_np.h */ +#if defined(USE_MEMORY_PROFILING) && defined(__FreeBSD__) && (__FreeBSD_version >= 700002) +#include <malloc_np.h> +#endif + +/* macOS has a call similar to malloc_usable_size */ +#if defined(__APPLE__) +#include <malloc/malloc.h> +#define malloc_usable_size malloc_size +#define HA_HAVE_MALLOC_ZONE +#define TCP_KEEPIDLE TCP_KEEPALIVE +#define TCP_INFO TCP_CONNECTION_INFO +#define tcp_info tcp_connection_info +#endif + +/* Max number of file descriptors we send in one sendmsg(). Linux seems to be + * able to send 253 fds per sendmsg(), however musl is limited to 252, not sure + * about the other OSes. + */ +#define MAX_SEND_FD 252 + +/* Some bsd kernels (ie: FreeBSD) offer the FAST clock source as equivalent + * to Linux COARSE clock source. Aliasing COARSE to FAST on such systems when + * COARSE is not already defined. + */ +#if !defined(CLOCK_MONOTONIC_COARSE) && defined(CLOCK_MONOTONIC_FAST) +#define CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC_FAST +#endif + +#endif /* _HAPROXY_COMPAT_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/compiler.h b/include/haproxy/compiler.h new file mode 100644 index 0000000..d8e8a72 --- /dev/null +++ b/include/haproxy/compiler.h @@ -0,0 +1,469 @@ +/* + * include/haproxy/compiler.h + * This files contains some compiler-specific settings. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_COMPILER_H +#define _HAPROXY_COMPILER_H + +/* leave a chance to the compiler to bring its own definitions first; this + * will cause cdefs.h to be included on systems which have it. + */ +#include <inttypes.h> + +#ifdef DEBUG_USE_ABORT +#include <stdlib.h> +#endif + +/* + * Gcc before 3.0 needs [0] to declare a variable-size array + */ +#ifndef VAR_ARRAY +#if defined(__GNUC__) && (__GNUC__ < 3) +#define VAR_ARRAY 0 +#else +#define VAR_ARRAY +#endif +#endif + +/* This is used to test if a macro is defined and equals 1. The principle is + * that the macro is passed as a value and its value concatenated to the word + * "comma_for_one" to form a new macro name. The macro "comma_for_one1" equals + * one comma, which, once used in an argument, will shift all of them by one, + * so that we can use this to concatenate both a 1 and a 0 and always pick the + * second one. + */ +#define comma_for_one1 , +#define _____equals_1(x, y, ...) (y) +#define ____equals_1(x, ...) _____equals_1(x, 0) +#define ___equals_1(x) ____equals_1(comma_for_one ## x 1) +#define __equals_1(x) ___equals_1(x) + +/* gcc 5 and clang 3 brought __has_attribute(), which is not well documented in + * the case of gcc, but is convenient since handled at the preprocessor level. + * In both cases it's possible to test for __has_attribute() using ifdef. When + * not defined we remap this to the __has_attribute_<name> macro so that we'll + * later be able to implement on a per-compiler basis those which are missing, + * by defining __has_attribute_<name> to 1. + */ +#ifndef __has_attribute +#define __has_attribute(x) __equals_1(__has_attribute_ ## x) +#endif + +/* The fallthrough attribute arrived with gcc 7, the same version that started + * to emit the fallthrough warnings and to parse the comments. Comments do not + * manage to stop the warning when preprocessing is split from compiling (e.g. + * when building under distcc). Better encourage the use of a __fallthrough + * statement instead. There are still limitations in that clang doesn't accept + * it after a label; this is the reason why we're always preceding it with an + * empty do-while. + */ +#if __has_attribute(fallthrough) +# define __fallthrough do { } while (0); __attribute__((fallthrough)) +#else +# define __fallthrough do { } while (0) +#endif + +#if !defined(__GNUC__) +/* Some versions of glibc irresponsibly redefine __attribute__() to empty for + * non-gcc compilers, and as such, silently break all constructors with other + * other compilers. Let's make sure such incompatibilities are detected if any, + * or that the attribute is properly enforced. + */ +#undef __attribute__ +#define __attribute__(x) __attribute__(x) +#endif + +/* attribute(warning) was added in gcc 4.3 */ +#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +# define __has_attribute_warning 1 +#endif + +/* __attribute__warning(x) does __attribute__((warning(x))) if supported by the + * compiler, otherwise __attribute__((deprecated)). Clang supports it since v14 + * but is a bit capricious in that it refuses a redefinition with a warning + * attribute that wasn't there the first time. However it's OK with deprecated(x) + * so better use this one. See: https://github.com/llvm/llvm-project/issues/56519 + */ +#if defined(__clang__) +# define __attribute__warning(x) __attribute__((deprecated(x))) +#elif __has_attribute(warning) +# define __attribute__warning(x) __attribute__((warning(x))) +#else +# define __attribute__warning(x) __attribute__((deprecated)) +#endif + +/* By default, gcc does not inline large chunks of code, but we want it to + * respect our choices. + */ +#if !defined(forceinline) +#if !defined(__GNUC__) || (__GNUC__ < 3) +#define forceinline inline +#else +#define forceinline inline __attribute__((always_inline)) +#endif +#endif + +#ifndef __maybe_unused +/* silence the "unused" warnings without having to place painful #ifdefs. + * For use with variables or functions. + */ +#define __maybe_unused __attribute__((unused)) +#endif + +/* TCC doesn't support weak attribute, sections etc and needs the more portable + * obsolete linker model instead. + */ +#if defined(__TINYC__) && !defined(USE_OBSOLETE_LINKER) +#define USE_OBSOLETE_LINKER 1 +#endif + +/* These macros are used to declare a section name for a variable. + * WARNING: keep section names short, as MacOS limits them to 16 characters. + * The _START and _STOP attributes have to be placed after the start and stop + * weak symbol declarations, and are only used by MacOS. + */ +#if !defined(USE_OBSOLETE_LINKER) + +#ifdef __APPLE__ +#define HA_SECTION(s) __attribute__((__section__("__DATA, " s))) +#define HA_SECTION_START(s) __asm("section$start$__DATA$" s) +#define HA_SECTION_STOP(s) __asm("section$end$__DATA$" s) +#else +#define HA_SECTION(s) __attribute__((__section__(s))) +#define HA_SECTION_START(s) +#define HA_SECTION_STOP(s) +#endif + +#else // obsolete linker below, let's just not force any section + +#define HA_SECTION(s) +#define HA_SECTION_START(s) +#define HA_SECTION_STOP(s) + +#endif // USE_OBSOLETE_LINKER + +/* Declare a symbol as weak if possible, otherwise global. Since we don't want to + * error on multiple definitions, the symbol is declared weak. On MacOS ".weak" + * does not exist and we must continue to use ".globl" instead. Note that + * ".global" is to be avoided on other platforms as llvm complains about it + * being used for symbols declared as weak elsewhere in the code. It may or may + * not work depending on linkers and assemblers, this is only for advanced use + * anyway (and most likely it will only work with !USE_OBSOLETE_LINKER). + */ +#if defined(__APPLE__) +# define __HA_WEAK(sym) __asm__(".globl " #sym) +#else +# define __HA_WEAK(sym) __asm__(".weak " #sym) +#endif +#define HA_WEAK(sym) __HA_WEAK(sym) + +/* declare a symbol as global */ +#define __HA_GLOBL(sym) __asm__(".globl " #sym) +#define HA_GLOBL(sym) __HA_GLOBL(sym) + +/* use this attribute on a variable to move it to the read_mostly section */ +#if !defined(__read_mostly) +#define __read_mostly HA_SECTION("read_mostly") +#endif + +/* This allows gcc to know that some locations are never reached, for example + * after a longjmp() in the Lua code, hence that some errors caught by such + * methods cannot propagate further. This is important with gcc versions 6 and + * above which can more aggressively detect null dereferences. The builtin + * below was introduced in gcc 4.5, and before it we didn't care. + */ +#ifdef DEBUG_USE_ABORT +#define my_unreachable() abort() +#else +#if defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) +#define my_unreachable() __builtin_unreachable() +#else +#define my_unreachable() do { } while (1) +#endif +#endif + +/* This prevents the compiler from folding multiple identical code paths into a + * single one, by adding a dependency on the line number in the path. This may + * typically happen on function tails, or purposely placed abort() before an + * unreachable() statement, due to the compiler performing an Identical Code + * Folding optimization. This macro is aimed at helping with code tracing in + * crash dumps and may also be used for specific optimizations. One known case + * is gcc-4.7 and 4.8 which aggressively fold multiple ABORT_NOW() exit points + * and which causes wrong line numbers to be reported by the debugger (note + * that even newer compilers do this when using abort()). Please keep in mind + * that nothing prevents the compiler from folding the code after that point, + * but at least it will not fold the code before. + */ +#define DO_NOT_FOLD() do { asm volatile("" :: "i"(__LINE__)); } while (0) + +/* This macro may be used to block constant propagation that lets the compiler + * detect a possible NULL dereference on a variable resulting from an explicit + * assignment in an impossible check. Sometimes a function is called which does + * safety checks and returns NULL if safe conditions are not met. The place + * where it's called cannot hit this condition and dereferencing the pointer + * without first checking it will make the compiler emit a warning about a + * "potential null pointer dereference" which is hard to work around. This + * macro "washes" the pointer and prevents the compiler from emitting tests + * branching to undefined instructions. It may only be used when the developer + * is absolutely certain that the conditions are guaranteed and that the + * pointer passed in argument cannot be NULL by design. + */ +#define ALREADY_CHECKED(p) do { asm("" : "=rm"(p) : "0"(p)); } while (0) + +/* same as above but to be used to pass the input value to the output but + * without letting the compiler know about its initial properties. + */ +#define DISGUISE(v) ({ typeof(v) __v = (v); ALREADY_CHECKED(__v); __v; }) + +/* Implements a static event counter where it's used. This is typically made to + * report some warnings only once, either during boot or at runtime. It only + * returns true on the very first call, and zero later. It's thread-safe and + * uses a single byte of memory per call place. It relies on the atomic xchg + * defined in atomic.h which is also part of the common API. + */ +#define ONLY_ONCE() ({ static char __cnt; !_HA_ATOMIC_XCHG(&__cnt, 1); }) + +/* makes a string from a constant (number or macro), avoids the need for + * printf("%d") format just to dump a setting limit or value in an error + * message. We use two levels so that macros are resolved. + */ +#define _TOSTR(x) #x +#define TOSTR(x) _TOSTR(x) + +/* + * Gcc >= 3 provides the ability for the program to give hints to the + * compiler about what branch of an if is most likely to be taken. This + * helps the compiler produce the most compact critical paths, which is + * generally better for the cache and to reduce the number of jumps. + */ +#if !defined(likely) +#if !defined(__GNUC__) || (__GNUC__ < 3) +#define __builtin_expect(x,y) (x) +#define likely(x) (x) +#define unlikely(x) (x) +#else +#define likely(x) (__builtin_expect((x) != 0, 1)) +#define unlikely(x) (__builtin_expect((x) != 0, 0)) +#endif +#endif + +#ifndef __GNUC_PREREQ__ +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) +#define __GNUC_PREREQ__(ma, mi) \ + (__GNUC__ > (ma) || __GNUC__ == (ma) && __GNUC_MINOR__ >= (mi)) +#else +#define __GNUC_PREREQ__(ma, mi) 0 +#endif +#endif + +#ifndef offsetof +#if __GNUC_PREREQ__(4, 1) +#define offsetof(type, field) __builtin_offsetof(type, field) +#else +#define offsetof(type, field) \ + ((size_t)(uintptr_t)((const volatile void *)&((type *)0)->field)) +#endif +#endif + +/* Linux-like "container_of". It returns a pointer to the structure of type + * <type> which has its member <name> stored at address <ptr>. + */ +#ifndef container_of +#define container_of(ptr, type, name) ((type *)(((void *)(ptr)) - ((long)&((type *)0)->name))) +#endif + +/* returns a pointer to the structure of type <type> which has its member <name> + * stored at address <ptr>, unless <ptr> is 0, in which case 0 is returned. + */ +#ifndef container_of_safe +#define container_of_safe(ptr, type, name) \ + ({ void *__p = (ptr); \ + __p ? (type *)(__p - ((long)&((type *)0)->name)) : (type *)0; \ + }) +#endif + + +/* From gcc 6 and above, enum values may have attributes */ +#if __GNUC_PREREQ__(6, 0) +#define ENUM_ATTRIBUTE(x) __attribute__(x) +#else +#define ENUM_ATTRIBUTE(x) +#endif + +/* Some architectures have a double-word CAS, sometimes even dual-8 bytes. + * Some architectures support unaligned accesses, others are fine with them + * but only for non-atomic operations. Also mention those supporting unaligned + * accesses and being little endian, and those where unaligned accesses are + * known to be fast (almost as fast as aligned ones). + */ +#if defined(__x86_64__) +#define HA_UNALIGNED +#define HA_UNALIGNED_LE +#define HA_UNALIGNED_LE64 +#define HA_UNALIGNED_FAST +#define HA_UNALIGNED_ATOMIC +#define HA_HAVE_CAS_DW +#define HA_CAS_IS_8B +#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) +#define HA_UNALIGNED +#define HA_UNALIGNED_LE +#define HA_UNALIGNED_ATOMIC +#elif defined (__aarch64__) || defined(__ARM_ARCH_8A) +#define HA_UNALIGNED +#define HA_UNALIGNED_LE +#define HA_UNALIGNED_LE64 +#define HA_UNALIGNED_FAST +#define HA_HAVE_CAS_DW +#define HA_CAS_IS_8B +#elif defined(__arm__) && (defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)) +#define HA_UNALIGNED +#define HA_UNALIGNED_LE +#define HA_UNALIGNED_FAST +#define HA_HAVE_CAS_DW +#endif + +/*********************** IMPORTANT NOTE ABOUT ALIGNMENT **********************\ + * Alignment works fine for variables. It also works on types and struct * + * members by propagating the alignment to the container struct itself, * + * but this requires that variables of the affected type are properly * + * aligned themselves. While regular variables will always abide, those * + * allocated using malloc() will not! Most platforms provide posix_memalign()* + * for this, but it's not available everywhere. As such one ought not to use * + * these alignment declarations inside structures that are dynamically * + * allocated. If the purpose is only to avoid false sharing of cache lines * + * for multi_threading, see THREAD_PAD() below. * +\*****************************************************************************/ + +/* sets alignment for current field or variable */ +#ifndef ALIGNED +#define ALIGNED(x) __attribute__((aligned(x))) +#endif + +/* sets alignment only on architectures preventing unaligned atomic accesses */ +#ifndef MAYBE_ALIGNED +#ifndef HA_UNALIGNED +#define MAYBE_ALIGNED(x) ALIGNED(x) +#else +#define MAYBE_ALIGNED(x) +#endif +#endif + +/* sets alignment only on architectures preventing unaligned atomic accesses */ +#ifndef ATOMIC_ALIGNED +#ifndef HA_UNALIGNED_ATOMIC +#define ATOMIC_ALIGNED(x) ALIGNED(x) +#else +#define ATOMIC_ALIGNED(x) +#endif +#endif + +/* sets alignment for current field or variable only when threads are enabled. + * Typically used to respect cache line alignment to avoid false sharing. + */ +#ifndef THREAD_ALIGNED +#ifdef USE_THREAD +#define THREAD_ALIGNED(x) __attribute__((aligned(x))) +#else +#define THREAD_ALIGNED(x) +#endif +#endif + +/* add a mandatory alignment for next fields in a structure */ +#ifndef ALWAYS_ALIGN +#define ALWAYS_ALIGN(x) union { } ALIGNED(x) +#endif + +/* add an optional alignment for next fields in a structure, only for archs + * which do not support unaligned accesses. + */ +#ifndef MAYBE_ALIGN +#ifndef HA_UNALIGNED +#define MAYBE_ALIGN(x) union { } ALIGNED(x) +#else +#define MAYBE_ALIGN(x) +#endif +#endif + +/* add an optional alignment for next fields in a structure, only for archs + * which do not support unaligned accesses for atomic operations. + */ +#ifndef ATOMIC_ALIGN +#ifndef HA_UNALIGNED_ATOMIC +#define ATOMIC_ALIGN(x) union { } ALIGNED(x) +#else +#define ATOMIC_ALIGN(x) +#endif +#endif + +/* add an optional alignment for next fields in a structure, only when threads + * are enabled. Typically used to respect cache line alignment to avoid false + * sharing. + */ +#ifndef THREAD_ALIGN +#ifdef USE_THREAD +#define THREAD_ALIGN(x) union { } ALIGNED(x) +#else +#define THREAD_ALIGN(x) +#endif +#endif + +/* add optional padding of the specified size between fields in a structure, + * only when threads are enabled. This is used to avoid false sharing of cache + * lines for dynamically allocated structures which cannot guarantee alignment. + */ +#ifndef THREAD_PAD +# ifdef USE_THREAD +# define __THREAD_PAD(x,l) char __pad_##l[x] +# define _THREAD_PAD(x,l) __THREAD_PAD(x, l) +# define THREAD_PAD(x) _THREAD_PAD(x, __LINE__) +# else +# define THREAD_PAD(x) +# endif +#endif + +/* The THREAD_LOCAL type attribute defines thread-local storage and is defined + * to __thread when threads are enabled or empty when disabled. + */ +#ifdef USE_THREAD +#define THREAD_LOCAL __thread +#else +#define THREAD_LOCAL +#endif + +/* The __decl_thread() statement is shows the argument when threads are enabled + * or hides it when disabled. The purpose is to condition the presence of some + * variables or struct members to the fact that threads are enabled, without + * having to enclose them inside a #ifdef USE_THREAD/#endif clause. + */ +#ifdef USE_THREAD +#define __decl_thread(decl) decl +#else +#define __decl_thread(decl) +#endif + +/* clang has a __has_feature() macro which reports true/false on a number of + * internally supported features. Let's make sure this macro is always defined + * and returns zero when not supported. + */ +#ifndef __has_feature +#define __has_feature(x) 0 +#endif + +#endif /* _HAPROXY_COMPILER_H */ diff --git a/include/haproxy/compression-t.h b/include/haproxy/compression-t.h new file mode 100644 index 0000000..b8f118b --- /dev/null +++ b/include/haproxy/compression-t.h @@ -0,0 +1,109 @@ +/* + * include/haproxy/compression-t.h + * This file defines everything related to compression. + * + * Copyright 2012 Exceliance, David Du Colombier <dducolombier@exceliance.fr> + William Lallemand <wlallemand@exceliance.fr> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_COMP_T_H +#define _HAPROXY_COMP_T_H + +#if defined(USE_SLZ) +#ifdef USE_ZLIB +#error "Cannot build with both USE_SLZ and USE_ZLIB at the same time." +#endif +#include <import/slz.h> +#elif defined(USE_ZLIB) +#include <zlib.h> +#endif + +#include <haproxy/buf-t.h> + +/* Direction index */ + +#define COMP_DIR_REQ 0 +#define COMP_DIR_RES 1 + +/* Compression flags */ + +#define COMP_FL_OFFLOAD 0x00000001 /* Compression offload */ +#define COMP_FL_DIR_REQ 0x00000002 /* Compress requests */ +#define COMP_FL_DIR_RES 0x00000004 /* Compress responses */ + +struct comp { + struct comp_algo *algos_res; /* Algos available for response */ + struct comp_algo *algo_req; /* Algo to use for request */ + struct comp_type *types_req; /* Types to be compressed for requests */ + struct comp_type *types_res; /* Types to be compressed for responses */ + unsigned int flags; +}; + +struct comp_ctx { +#if defined(USE_SLZ) + struct slz_stream strm; + const void *direct_ptr; /* NULL or pointer to beginning of data */ + int direct_len; /* length of direct_ptr if not NULL */ + struct buffer queued; /* if not NULL, data already queued */ +#elif defined(USE_ZLIB) + z_stream strm; /* zlib stream */ + void *zlib_deflate_state; + void *zlib_window; + void *zlib_prev; + void *zlib_pending_buf; + void *zlib_head; +#endif + int cur_lvl; +}; + +/* Thanks to MSIE/IIS, the "deflate" name is ambiguous, as according to the RFC + * it's a zlib-wrapped deflate stream, but MSIE only understands a raw deflate + * stream. For this reason some people prefer to emit a raw deflate stream on + * "deflate" and we'll need two algos for the same name, they are distinguished + * with the config name. + */ +struct comp_algo { + char *cfg_name; /* config name */ + int cfg_name_len; + + char *ua_name; /* name for the user-agent */ + int ua_name_len; + + int (*init)(struct comp_ctx **comp_ctx, int level); + int (*add_data)(struct comp_ctx *comp_ctx, const char *in_data, int in_len, struct buffer *out); + int (*flush)(struct comp_ctx *comp_ctx, struct buffer *out); + int (*finish)(struct comp_ctx *comp_ctx, struct buffer *out); + int (*end)(struct comp_ctx **comp_ctx); + struct comp_algo *next; +}; + +struct comp_type { + char *name; + int name_len; + struct comp_type *next; +}; + + +#endif /* _HAPROXY_COMP_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ + diff --git a/include/haproxy/compression.h b/include/haproxy/compression.h new file mode 100644 index 0000000..851ea23 --- /dev/null +++ b/include/haproxy/compression.h @@ -0,0 +1,44 @@ +/* + * include/haproxy/compression.h + * This file defines function prototypes for compression. + * + * Copyright 2012 (C) Exceliance, David Du Colombier <dducolombier@exceliance.fr> + * William Lallemand <wlallemand@exceliance.fr> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_COMP_H +#define _HAPROXY_COMP_H + +#include <haproxy/compression-t.h> + +extern unsigned int compress_min_idle; + +int comp_append_type(struct comp_type **types, const char *type); +int comp_append_algo(struct comp_algo **algos, const char *algo); + +#ifdef USE_ZLIB +extern long zlib_used_memory; +#endif /* USE_ZLIB */ + +#endif /* _HAPROXY_COMP_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/connection-t.h b/include/haproxy/connection-t.h new file mode 100644 index 0000000..2619fd6 --- /dev/null +++ b/include/haproxy/connection-t.h @@ -0,0 +1,722 @@ +/* + * include/haproxy/connection-t.h + * This file describes the connection struct and associated constants. + * + * Copyright (C) 2000-2014 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CONNECTION_T_H +#define _HAPROXY_CONNECTION_T_H + +#include <stdlib.h> +#include <sys/socket.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> + +#include <import/ebtree-t.h> +#include <import/ist.h> + +#include <haproxy/api-t.h> +#include <haproxy/buf-t.h> +#include <haproxy/obj_type-t.h> +#include <haproxy/port_range-t.h> +#include <haproxy/protocol-t.h> +#include <haproxy/show_flags-t.h> +#include <haproxy/thread-t.h> + +/* referenced below */ +struct connection; +struct stconn; +struct sedesc; +struct cs_info; +struct buffer; +struct proxy; +struct server; +struct session; +struct pipe; +struct quic_conn; +struct bind_conf; +struct qcs; +struct ssl_sock_ctx; + +/* Note: subscribing to these events is only valid after the caller has really + * attempted to perform the operation, and failed to proceed or complete. + */ +enum sub_event_type { + SUB_RETRY_RECV = 0x00000001, /* Schedule the tasklet when we can attempt to recv again */ + SUB_RETRY_SEND = 0x00000002, /* Schedule the tasklet when we can attempt to send again */ +}; + +/* For each direction, we have a CO_FL_XPRT_<DIR>_ENA flag, which + * indicates if read or write is desired in that direction for the respective + * layers. The current status corresponding to the current layer being used is + * remembered in the CO_FL_XPRT_<DIR>_ENA flag. The need to poll (ie receipt of + * EAGAIN) is remembered at the file descriptor level so that even when the + * activity is stopped and restarted, we still remember whether it was needed + * to poll before attempting the I/O. + * + * The FD state is updated according to CO_FL_XPRT_<DIR>_ENA in + * conn_cond_update_polling(). + */ + +/* flags for use in connection->flags. Please also update the conn_show_flags() + * function below in case of changes. + */ +enum { + CO_FL_NONE = 0x00000000, /* Just for initialization purposes */ + + /* Do not change these values without updating conn_*_poll_changes() ! */ + CO_FL_SAFE_LIST = 0x00000001, /* 0 = not in any list, 1 = in safe_list */ + CO_FL_IDLE_LIST = 0x00000002, /* 2 = in idle_list, 3 = invalid */ + CO_FL_LIST_MASK = 0x00000003, /* Is the connection in any server-managed list ? */ + + CO_FL_REVERSED = 0x00000004, /* connection has been reversed to backend / reversed and accepted on frontend */ + CO_FL_ACT_REVERSING = 0x00000008, /* connection has been reversed to frontend but not yet accepted */ + /* unused : 0x00000008 */ + + /* unused : 0x00000010 */ + /* unused : 0x00000020 */ + /* unused : 0x00000040, 0x00000080 */ + + /* These flags indicate whether the Control and Transport layers are initialized */ + CO_FL_CTRL_READY = 0x00000100, /* FD was registered, fd_delete() needed */ + CO_FL_XPRT_READY = 0x00000200, /* xprt_start() done, xprt can be used */ + + CO_FL_WANT_DRAIN = 0x00000400, /* try to drain pending data when closing */ + + /* This flag is used by data layers to indicate they had to stop + * receiving data because a buffer was full. The connection handler + * clears it before first calling the I/O and data callbacks. + */ + CO_FL_WAIT_ROOM = 0x00000800, /* data sink is full */ + + /* These flags are used to report whether the from/to addresses are set or not */ + /* unused: 0x00001000 */ + /* unused: 0x00002000 */ + + CO_FL_EARLY_SSL_HS = 0x00004000, /* We have early data pending, don't start SSL handshake yet */ + CO_FL_EARLY_DATA = 0x00008000, /* At least some of the data are early data */ + CO_FL_SOCKS4_SEND = 0x00010000, /* handshaking with upstream SOCKS4 proxy, going to send the handshake */ + CO_FL_SOCKS4_RECV = 0x00020000, /* handshaking with upstream SOCKS4 proxy, going to check if handshake succeed */ + + /* flags used to remember what shutdown have been performed/reported */ + CO_FL_SOCK_RD_SH = 0x00040000, /* SOCK layer was notified about shutr/read0 */ + CO_FL_SOCK_WR_SH = 0x00080000, /* SOCK layer asked for shutw */ + + /* flags used to report connection errors or other closing conditions */ + CO_FL_ERROR = 0x00100000, /* a fatal error was reported */ + CO_FL_NOTIFY_DONE = 0x001C0000, /* any xprt shut/error flags above needs to be reported */ + + CO_FL_FDLESS = 0x00200000, /* this connection doesn't use any FD (e.g. QUIC) */ + + /* flags used to report connection status updates */ + CO_FL_WAIT_L4_CONN = 0x00400000, /* waiting for L4 to be connected */ + CO_FL_WAIT_L6_CONN = 0x00800000, /* waiting for L6 to be connected (eg: SSL) */ + CO_FL_WAIT_L4L6 = 0x00C00000, /* waiting for L4 and/or L6 to be connected */ + + /* All the flags below are used for connection handshakes. Any new + * handshake should be added after this point, and CO_FL_HANDSHAKE + * should be updated. + */ + CO_FL_SEND_PROXY = 0x01000000, /* send a valid PROXY protocol header */ + CO_FL_ACCEPT_PROXY = 0x02000000, /* receive a valid PROXY protocol header */ + CO_FL_ACCEPT_CIP = 0x04000000, /* receive a valid NetScaler Client IP header */ + + /* below we have all handshake flags grouped into one */ + CO_FL_HANDSHAKE = CO_FL_SEND_PROXY | CO_FL_ACCEPT_PROXY | CO_FL_ACCEPT_CIP | CO_FL_SOCKS4_SEND | CO_FL_SOCKS4_RECV, + CO_FL_WAIT_XPRT = CO_FL_WAIT_L4_CONN | CO_FL_HANDSHAKE | CO_FL_WAIT_L6_CONN, + + CO_FL_SSL_WAIT_HS = 0x08000000, /* wait for an SSL handshake to complete */ + + /* This connection may not be shared between clients */ + CO_FL_PRIVATE = 0x10000000, + + /* This flag is used to know that a PROXY protocol header was sent by the client */ + CO_FL_RCVD_PROXY = 0x20000000, + + /* The connection is unused by its owner */ + CO_FL_SESS_IDLE = 0x40000000, + + /* This last flag indicates that the transport layer is used (for instance + * by logs) and must not be cleared yet. The last call to conn_xprt_close() + * must be done after clearing this flag. + */ + CO_FL_XPRT_TRACKED = 0x80000000, + + /* below we have all SOCKS handshake flags grouped into one */ + CO_FL_SOCKS4 = CO_FL_SOCKS4_SEND | CO_FL_SOCKS4_RECV, +}; + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *conn_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(CO_FL_SAFE_LIST, _(CO_FL_IDLE_LIST, _(CO_FL_CTRL_READY, + _(CO_FL_REVERSED, _(CO_FL_ACT_REVERSING, _(CO_FL_XPRT_READY, + _(CO_FL_WANT_DRAIN, _(CO_FL_WAIT_ROOM, _(CO_FL_EARLY_SSL_HS, _(CO_FL_EARLY_DATA, + _(CO_FL_SOCKS4_SEND, _(CO_FL_SOCKS4_RECV, _(CO_FL_SOCK_RD_SH, _(CO_FL_SOCK_WR_SH, + _(CO_FL_ERROR, _(CO_FL_FDLESS, _(CO_FL_WAIT_L4_CONN, _(CO_FL_WAIT_L6_CONN, + _(CO_FL_SEND_PROXY, _(CO_FL_ACCEPT_PROXY, _(CO_FL_ACCEPT_CIP, _(CO_FL_SSL_WAIT_HS, + _(CO_FL_PRIVATE, _(CO_FL_RCVD_PROXY, _(CO_FL_SESS_IDLE, _(CO_FL_XPRT_TRACKED + )))))))))))))))))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + +/* Possible connection error codes. + * Warning: Do not reorder the codes, they are fetchable through the + * "fc_err" sample fetch. If a new code is added, please add an error label + * in conn_err_code_str and in the "fc_err_str" sample fetch documentation. + */ +enum { + CO_ER_NONE, /* no error */ + + CO_ER_CONF_FDLIM, /* reached process's configured FD limitation */ + CO_ER_PROC_FDLIM, /* reached process's FD limitation */ + CO_ER_SYS_FDLIM, /* reached system's FD limitation */ + CO_ER_SYS_MEMLIM, /* reached system buffers limitation */ + CO_ER_NOPROTO, /* protocol not supported */ + CO_ER_SOCK_ERR, /* other socket error */ + + CO_ER_PORT_RANGE, /* source port range exhausted */ + CO_ER_CANT_BIND, /* can't bind to source address */ + CO_ER_FREE_PORTS, /* no more free ports on the system */ + CO_ER_ADDR_INUSE, /* local address already in use */ + + CO_ER_PRX_EMPTY, /* nothing received in PROXY protocol header */ + CO_ER_PRX_ABORT, /* client abort during PROXY protocol header */ + CO_ER_PRX_TIMEOUT, /* timeout while waiting for a PROXY header */ + CO_ER_PRX_TRUNCATED, /* truncated PROXY protocol header */ + CO_ER_PRX_NOT_HDR, /* not a PROXY protocol header */ + CO_ER_PRX_BAD_HDR, /* bad PROXY protocol header */ + CO_ER_PRX_BAD_PROTO, /* unsupported protocol in PROXY header */ + + CO_ER_CIP_EMPTY, /* nothing received in NetScaler Client IP header */ + CO_ER_CIP_ABORT, /* client abort during NetScaler Client IP header */ + CO_ER_CIP_TIMEOUT, /* timeout while waiting for a NetScaler Client IP header */ + CO_ER_CIP_TRUNCATED, /* truncated NetScaler Client IP header */ + CO_ER_CIP_BAD_MAGIC, /* bad magic number in NetScaler Client IP header */ + CO_ER_CIP_BAD_PROTO, /* unsupported protocol in NetScaler Client IP header */ + + CO_ER_SSL_EMPTY, /* client closed during SSL handshake */ + CO_ER_SSL_ABORT, /* client abort during SSL handshake */ + CO_ER_SSL_TIMEOUT, /* timeout during SSL handshake */ + CO_ER_SSL_TOO_MANY, /* too many SSL connections */ + CO_ER_SSL_NO_MEM, /* no more memory to allocate an SSL connection */ + CO_ER_SSL_RENEG, /* forbidden client renegotiation */ + CO_ER_SSL_CA_FAIL, /* client cert verification failed in the CA chain */ + CO_ER_SSL_CRT_FAIL, /* client cert verification failed on the certificate */ + CO_ER_SSL_MISMATCH, /* Server presented an SSL certificate different from the configured one */ + CO_ER_SSL_MISMATCH_SNI, /* Server presented an SSL certificate different from the expected one */ + CO_ER_SSL_HANDSHAKE, /* SSL error during handshake */ + CO_ER_SSL_HANDSHAKE_HB, /* SSL error during handshake with heartbeat present */ + CO_ER_SSL_KILLED_HB, /* Stopped a TLSv1 heartbeat attack (CVE-2014-0160) */ + CO_ER_SSL_NO_TARGET, /* unknown target (not client nor server) */ + CO_ER_SSL_EARLY_FAILED, /* Server refused early data */ + + CO_ER_SOCKS4_SEND, /* SOCKS4 Proxy write error during handshake */ + CO_ER_SOCKS4_RECV, /* SOCKS4 Proxy read error during handshake */ + CO_ER_SOCKS4_DENY, /* SOCKS4 Proxy deny the request */ + CO_ER_SOCKS4_ABORT, /* SOCKS4 Proxy handshake aborted by server */ + + CO_ERR_SSL_FATAL, /* SSL fatal error during a SSL_read or SSL_write */ + + CO_ER_REVERSE, /* Error during reverse connect */ +}; + +/* error return codes for accept_conn() */ +enum { + CO_AC_NONE = 0, /* no error, valid connection returned */ + CO_AC_DONE, /* reached the end of the queue (typically EAGAIN) */ + CO_AC_RETRY, /* late signal delivery or anything requiring the caller to try again */ + CO_AC_YIELD, /* short-lived limitation that requires a short pause */ + CO_AC_PAUSE, /* long-lived issue (resource/memory allocation error, paused FD) */ + CO_AC_PERMERR, /* permanent, non-recoverable error (e.g. closed listener socket) */ +}; + +/* source address settings for outgoing connections */ +enum { + /* Tproxy exclusive values from 0 to 7 */ + CO_SRC_TPROXY_ADDR = 0x0001, /* bind to this non-local address when connecting */ + CO_SRC_TPROXY_CIP = 0x0002, /* bind to the client's IP address when connecting */ + CO_SRC_TPROXY_CLI = 0x0003, /* bind to the client's IP+port when connecting */ + CO_SRC_TPROXY_DYN = 0x0004, /* bind to a dynamically computed non-local address */ + CO_SRC_TPROXY_MASK = 0x0007, /* bind to a non-local address when connecting */ + + CO_SRC_BIND = 0x0008, /* bind to a specific source address when connecting */ +}; + +/* flags that can be passed to xprt->rcv_buf() and mux->rcv_buf() */ +enum { + CO_RFL_BUF_WET = 0x0001, /* Buffer still has some output data present */ + CO_RFL_BUF_FLUSH = 0x0002, /* Flush mux's buffers but don't read more data */ + CO_RFL_READ_ONCE = 0x0004, /* don't loop even if the request/response is small */ + CO_RFL_KEEP_RECV = 0x0008, /* Instruct the mux to still wait for read events */ + CO_RFL_BUF_NOT_STUCK = 0x0010, /* Buffer is not stuck. Optims are possible during data copy */ + CO_RFL_MAY_SPLICE = 0x0020, /* The producer can use the kernel splicing */ +}; + +/* flags that can be passed to xprt->snd_buf() and mux->snd_buf() */ +enum { + CO_SFL_MSG_MORE = 0x0001, /* More data to come afterwards */ + CO_SFL_STREAMER = 0x0002, /* Producer is continuously streaming data */ +}; + +/* mux->shutr() modes */ +enum co_shr_mode { + CO_SHR_DRAIN = 0, /* read shutdown, drain any extra stuff */ + CO_SHR_RESET = 1, /* read shutdown, reset any extra stuff */ +}; + +/* mux->shutw() modes */ +enum co_shw_mode { + CO_SHW_NORMAL = 0, /* regular write shutdown */ + CO_SHW_SILENT = 1, /* imminent close, don't notify peer */ +}; + +/* known transport layers (for ease of lookup) */ +enum { + XPRT_RAW = 0, + XPRT_SSL = 1, + XPRT_HANDSHAKE = 2, + XPRT_QUIC = 3, + XPRT_ENTRIES /* must be last one */ +}; + +/* MUX-specific flags */ +enum { + MX_FL_NONE = 0x00000000, + MX_FL_HTX = 0x00000001, /* set if it is an HTX multiplexer */ + MX_FL_HOL_RISK = 0x00000002, /* set if the protocol is subject the to head-of-line blocking on server */ + MX_FL_NO_UPG = 0x00000004, /* set if mux does not support any upgrade */ + MX_FL_FRAMED = 0x00000008, /* mux working on top of a framed transport layer (QUIC) */ + MX_FL_REVERSABLE = 0x00000010, /* mux supports connection reversal */ +}; + +/* PROTO token registration */ +enum proto_proxy_mode { + PROTO_MODE_NONE = 0, + PROTO_MODE_TCP = 1 << 0, // must not be changed! + PROTO_MODE_HTTP = 1 << 1, // must not be changed! + PROTO_MODE_ANY = PROTO_MODE_TCP | PROTO_MODE_HTTP, +}; + +enum proto_proxy_side { + PROTO_SIDE_NONE = 0, + PROTO_SIDE_FE = 1, // same as PR_CAP_FE + PROTO_SIDE_BE = 2, // same as PR_CAP_BE + PROTO_SIDE_BOTH = PROTO_SIDE_FE | PROTO_SIDE_BE, +}; + +/* ctl command used by mux->ctl() */ +enum mux_ctl_type { + MUX_CTL_STATUS, /* Expects an int as output, sets it to a combinaison of MUX_CTL_STATUS flags */ + MUX_CTL_EXIT_STATUS, /* Expects an int as output, sets the mux exist/error/http status, if known or 0 */ + MUX_CTL_REVERSE_CONN, /* Notify about an active reverse connection accepted. */ + MUX_CTL_SUBS_RECV, /* Notify the mux it must wait for read events again */ +}; + +/* sctl command used by mux->sctl() */ +enum mux_sctl_type { + MUX_SCTL_SID, /* Return the mux stream ID as ouput, as a signed 64bits integer */ +}; + +/* response for ctl MUX_STATUS */ +#define MUX_STATUS_READY (1 << 0) + +enum mux_exit_status { + MUX_ES_SUCCESS, /* Success */ + MUX_ES_INVALID_ERR, /* invalid input */ + MUX_ES_TOUT_ERR, /* timeout */ + MUX_ES_NOTIMPL_ERR, /* not-implemented error */ + MUX_ES_INTERNAL_ERR, /* internal error */ + MUX_ES_UNKNOWN /* unknown status (must be the last) */ +}; + +/* socks4 response length */ +#define SOCKS4_HS_RSP_LEN 8 + +/* socks4 upstream proxy definitions */ +struct socks4_request { + uint8_t version; /* SOCKS version number, 1 byte, must be 0x04 for this version */ + uint8_t command; /* 0x01 = establish a TCP/IP stream connection */ + uint16_t port; /* port number, 2 bytes (in network byte order) */ + uint32_t ip; /* IP address, 4 bytes (in network byte order) */ + char user_id[8]; /* the user ID string, variable length, terminated with a null (0x00); Using "HAProxy\0" */ +}; + +/* Describes a set of subscriptions. Multiple events may be registered at the + * same time. The callee should assume everything not pending for completion is + * implicitly possible. It's illegal to change the tasklet if events are still + * registered. + */ +struct wait_event { + struct tasklet *tasklet; + int events; /* set of enum sub_event_type above */ +}; + +/* A connection handle is how we differentiate two connections on the lower + * layers. It usually is a file descriptor but can be a connection id. The + * CO_FL_FDLESS flag indicates which one is relevant. + */ +union conn_handle { + struct quic_conn *qc; /* Only present if this connection is a QUIC one (CO_FL_FDLESS=1) */ + int fd; /* file descriptor, for regular sockets (CO_FL_FDLESS=0) */ +}; + +/* xprt_ops describes transport-layer operations for a connection. They + * generally run over a socket-based control layer, but not always. Some + * of them are used for data transfer with the upper layer (rcv_*, snd_*) + * and the other ones are used to setup and release the transport layer. + */ +struct xprt_ops { + size_t (*rcv_buf)(struct connection *conn, void *xprt_ctx, struct buffer *buf, size_t count, int flags); /* recv callback */ + size_t (*snd_buf)(struct connection *conn, void *xprt_ctx, const struct buffer *buf, size_t count, int flags); /* send callback */ + int (*rcv_pipe)(struct connection *conn, void *xprt_ctx, struct pipe *pipe, unsigned int count); /* recv-to-pipe callback */ + int (*snd_pipe)(struct connection *conn, void *xprt_ctx, struct pipe *pipe, unsigned int count); /* send-to-pipe callback */ + void (*shutr)(struct connection *conn, void *xprt_ctx, int); /* shutr function */ + void (*shutw)(struct connection *conn, void *xprt_ctx, int); /* shutw function */ + void (*close)(struct connection *conn, void *xprt_ctx); /* close the transport layer */ + int (*init)(struct connection *conn, void **ctx); /* initialize the transport layer */ + int (*start)(struct connection *conn, void *ctx); /* Start the transport layer, if needed */ + int (*prepare_bind_conf)(struct bind_conf *conf); /* prepare a whole bind_conf */ + void (*destroy_bind_conf)(struct bind_conf *conf); /* destroy a whole bind_conf */ + int (*prepare_srv)(struct server *srv); /* prepare a server context */ + void (*destroy_srv)(struct server *srv); /* destroy a server context */ + int (*get_alpn)(const struct connection *conn, void *xprt_ctx, const char **str, int *len); /* get application layer name */ + int (*takeover)(struct connection *conn, void *xprt_ctx, int orig_tid); /* Let the xprt know the fd have been taken over */ + void (*set_idle)(struct connection *conn, void *xprt_ctx); /* notify the xprt that the connection becomes idle. implies set_used. */ + void (*set_used)(struct connection *conn, void *xprt_ctx); /* notify the xprt that the connection leaves idle. implies set_idle. */ + char name[8]; /* transport layer name, zero-terminated */ + int (*subscribe)(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es); /* Subscribe <es> to events, such as "being able to send" */ + int (*unsubscribe)(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es); /* Unsubscribe <es> from events */ + int (*remove_xprt)(struct connection *conn, void *xprt_ctx, void *toremove_ctx, const struct xprt_ops *newops, void *newctx); /* Remove an xprt from the connection, used by temporary xprt such as the handshake one */ + int (*add_xprt)(struct connection *conn, void *xprt_ctx, void *toadd_ctx, const struct xprt_ops *toadd_ops, void **oldxprt_ctx, const struct xprt_ops **oldxprt_ops); /* Add a new XPRT as the new xprt, and return the old one */ + struct ssl_sock_ctx *(*get_ssl_sock_ctx)(struct connection *); /* retrieve the ssl_sock_ctx in use, or NULL if none */ + int (*show_fd)(struct buffer *, const struct connection *, const void *ctx); /* append some data about xprt for "show fd"; returns non-zero if suspicious */ +}; + +/* mux_ops describes the mux operations, which are to be performed at the + * connection level after data are exchanged with the transport layer in order + * to propagate them to streams. The <init> function will automatically be + * called once the mux is instantiated by the connection's owner at the end + * of a transport handshake, when it is about to transfer data and the data + * layer is not ready yet. + */ +struct mux_ops { + int (*init)(struct connection *conn, struct proxy *prx, struct session *sess, struct buffer *input); /* early initialization */ + int (*wake)(struct connection *conn); /* mux-layer callback to report activity, mandatory */ + size_t (*rcv_buf)(struct stconn *sc, struct buffer *buf, size_t count, int flags); /* Called from the upper layer to get data */ + size_t (*snd_buf)(struct stconn *sc, struct buffer *buf, size_t count, int flags); /* Called from the upper layer to send data */ + size_t (*nego_fastfwd)(struct stconn *sc, struct buffer *input, size_t count, unsigned int may_splice); /* Callback to fill the SD iobuf */ + size_t (*done_fastfwd)(struct stconn *sc); /* Callback to terminate fast data forwarding */ + int (*fastfwd)(struct stconn *sc, unsigned int count, unsigned int flags); /* Callback to init fast data forwarding */ + int (*resume_fastfwd)(struct stconn *sc, unsigned int flags); /* Callback to resume fast data forwarding */ + void (*shutr)(struct stconn *sc, enum co_shr_mode); /* shutr function */ + void (*shutw)(struct stconn *sc, enum co_shw_mode); /* shutw function */ + + int (*attach)(struct connection *conn, struct sedesc *, struct session *sess); /* attach a stconn to an outgoing connection */ + struct stconn *(*get_first_sc)(const struct connection *); /* retrieves any valid stconn from this connection */ + void (*detach)(struct sedesc *); /* Detach an stconn from the stdesc from an outgoing connection, when the request is done */ + int (*show_fd)(struct buffer *, struct connection *); /* append some data about connection into chunk for "show fd"; returns non-zero if suspicious */ + int (*show_sd)(struct buffer *, struct sedesc *, const char *pfx); /* append some data about the mux stream into chunk for "show sess"; returns non-zero if suspicious */ + int (*subscribe)(struct stconn *sc, int event_type, struct wait_event *es); /* Subscribe <es> to events, such as "being able to send" */ + int (*unsubscribe)(struct stconn *sc, int event_type, struct wait_event *es); /* Unsubscribe <es> from events */ + int (*sctl)(struct stconn *sc, enum mux_sctl_type mux_sctl, void *arg); /* Provides information about the mux stream */ + int (*avail_streams)(struct connection *conn); /* Returns the number of streams still available for a connection */ + int (*avail_streams_bidi)(struct connection *conn); /* Returns the number of bidirectional streams still available for a connection */ + int (*avail_streams_uni)(struct connection *conn); /* Returns the number of unidirectional streams still available for a connection */ + int (*used_streams)(struct connection *conn); /* Returns the number of streams in use on a connection. */ + void (*destroy)(void *ctx); /* Let the mux know one of its users left, so it may have to disappear */ + int (*ctl)(struct connection *conn, enum mux_ctl_type mux_ctl, void *arg); /* Provides information about the mux connection */ + int (*takeover)(struct connection *conn, int orig_tid); /* Attempts to migrate the connection to the current thread */ + unsigned int flags; /* some flags characterizing the mux's capabilities (MX_FL_*) */ + char name[8]; /* mux layer name, zero-terminated */ +}; + +/* list of frontend connections. Used to call mux wake operation on soft-stop + * to close idling connections. + */ +struct mux_stopping_data { + struct list list; /* list of registered frontend connections */ + struct task *task; /* task woken up on soft-stop */ +}; + +struct my_tcphdr { + uint16_t source; + uint16_t dest; +}; + +/* a connection source profile defines all the parameters needed to properly + * bind an outgoing connection for a server or proxy. + */ +struct conn_src { + unsigned int opts; /* CO_SRC_* */ + int iface_len; /* bind interface name length */ + char *iface_name; /* bind interface name or NULL */ + struct port_range *sport_range; /* optional per-server TCP source ports */ + struct sockaddr_storage source_addr; /* the address to which we want to bind for connect() */ +#if defined(CONFIG_HAP_TRANSPARENT) + struct sockaddr_storage tproxy_addr; /* non-local address we want to bind to for connect() */ + char *bind_hdr_name; /* bind to this header name if defined */ + int bind_hdr_len; /* length of the name of the header above */ + int bind_hdr_occ; /* occurrence number of header above: >0 = from first, <0 = from end, 0=disabled */ +#endif +}; + +/* Hash header flag reflecting the input parameters present + * CAUTION! Always update CONN_HASH_PARAMS_TYPE_COUNT when adding a new entry. + */ +enum conn_hash_params_t { + CONN_HASH_PARAMS_TYPE_SNI = 0x1, + CONN_HASH_PARAMS_TYPE_DST_ADDR = 0x2, + CONN_HASH_PARAMS_TYPE_DST_PORT = 0x4, + CONN_HASH_PARAMS_TYPE_SRC_ADDR = 0x8, + CONN_HASH_PARAMS_TYPE_SRC_PORT = 0x10, + CONN_HASH_PARAMS_TYPE_PROXY = 0x20, +}; +#define CONN_HASH_PARAMS_TYPE_COUNT 6 + +#define CONN_HASH_PAYLOAD_LEN \ + (((sizeof(((struct conn_hash_node *)0)->node.key)) * 8) - CONN_HASH_PARAMS_TYPE_COUNT) + +#define CONN_HASH_GET_PAYLOAD(hash) \ + (((hash) << CONN_HASH_PARAMS_TYPE_COUNT) >> CONN_HASH_PARAMS_TYPE_COUNT) + +/* To avoid overflow, dynamically sized parameters must be pre-hashed. Their + * hashed will then be reused as input for the generation of the final + * connection hash. + */ +struct conn_hash_params { + uint64_t sni_prehash; + uint64_t proxy_prehash; + void *target; + struct sockaddr_storage *src_addr; + struct sockaddr_storage *dst_addr; +}; + +/* + * This structure describes an TLV entry consisting of its type + * and corresponding payload. This can be used to construct a list + * from which arbitrary TLV payloads can be fetched. + * It might be possible to embed the 'tlv struct' here in the future. + */ +struct conn_tlv_list { + struct list list; + unsigned short len; // 65535 should be more than enough! + unsigned char type; + char value[0]; +} __attribute__((packed)); + +/* This structure describes a connection with its methods and data. + * A connection may be performed to proxy or server via a local or remote + * socket, and can also be made to an internal applet. It can support + * several transport schemes (raw, ssl, ...). It can support several + * connection control schemes, generally a protocol for socket-oriented + * connections, but other methods for applets. + */ +struct connection { + /* first cache line */ + enum obj_type obj_type; /* differentiates connection from applet context */ + unsigned char err_code; /* CO_ER_* */ + signed short send_proxy_ofs; /* <0 = offset to (re)send from the end, >0 = send all (reused for SOCKS4) */ + unsigned int flags; /* CO_FL_* */ + const struct protocol *ctrl; /* operations at the socket layer */ + const struct xprt_ops *xprt; /* operations at the transport layer */ + const struct mux_ops *mux; /* mux layer operations. Must be set before xprt->init() */ + void *xprt_ctx; /* general purpose pointer, initialized to NULL */ + void *ctx; /* highest level context (usually the mux), initialized to NULL */ + void *owner; /* pointer to the owner session, or NULL */ + enum obj_type *target; /* the target to connect to (server, proxy, applet, ...) */ + + /* second cache line */ + struct wait_event *subs; /* Task to wake when awaited events are ready */ + union { + struct list idle_list; /* list element for idle connection in server idle list */ + struct mt_list toremove_list; /* list element when idle connection is ready to be purged */ + }; + union { + struct list session_list; /* used by backend conns, list of attached connections to a session */ + struct list stopping_list; /* used by frontend conns, attach point in mux stopping list */ + }; + union conn_handle handle; /* connection handle at the socket layer */ + const struct netns_entry *proxy_netns; + + /* third cache line and beyond */ + void (*destroy_cb)(struct connection *conn); /* callback to notify of imminent death of the connection */ + struct sockaddr_storage *src; /* source address (pool), when known, otherwise NULL */ + struct sockaddr_storage *dst; /* destination address (pool), when known, otherwise NULL */ + struct list tlv_list; /* list of TLVs received via PROXYv2 */ + + /* used to identify a backend connection for http-reuse, + * thus only present if conn.target is of type OBJ_TYPE_SERVER + */ + struct conn_hash_node *hash_node; + + /* Members used if connection must be reversed. */ + struct { + enum obj_type *target; /* Listener for active reverse, server for passive. */ + struct buffer name; /* Only used for passive reverse. Used as SNI when connection added to server idle pool. */ + } reverse; +}; + +/* node for backend connection in the idle trees for http-reuse + * A connection is identified by a hash generated from its specific parameters + */ +struct conn_hash_node { + struct eb64_node node; /* contains the hashing key */ + struct connection *conn; /* connection owner of the node */ +}; + +struct mux_proto_list { + const struct ist token; /* token name and length. Empty is catch-all */ + enum proto_proxy_mode mode; + enum proto_proxy_side side; + const struct mux_ops *mux; + struct list list; +}; + +/* proxy protocol stuff below */ + +/* proxy protocol v2 definitions */ +#define PP2_SIGNATURE "\x0D\x0A\x0D\x0A\x00\x0D\x0A\x51\x55\x49\x54\x0A" +#define PP2_SIGNATURE_LEN 12 +#define PP2_HEADER_LEN 16 + +/* ver_cmd byte */ +#define PP2_CMD_LOCAL 0x00 +#define PP2_CMD_PROXY 0x01 +#define PP2_CMD_MASK 0x0F + +#define PP2_VERSION 0x20 +#define PP2_VERSION_MASK 0xF0 + +/* fam byte */ +#define PP2_TRANS_UNSPEC 0x00 +#define PP2_TRANS_STREAM 0x01 +#define PP2_TRANS_DGRAM 0x02 +#define PP2_TRANS_MASK 0x0F + +#define PP2_FAM_UNSPEC 0x00 +#define PP2_FAM_INET 0x10 +#define PP2_FAM_INET6 0x20 +#define PP2_FAM_UNIX 0x30 +#define PP2_FAM_MASK 0xF0 + +#define PP2_ADDR_LEN_UNSPEC (0) +#define PP2_ADDR_LEN_INET (4 + 4 + 2 + 2) +#define PP2_ADDR_LEN_INET6 (16 + 16 + 2 + 2) +#define PP2_ADDR_LEN_UNIX (108 + 108) + +#define PP2_HDR_LEN_UNSPEC (PP2_HEADER_LEN + PP2_ADDR_LEN_UNSPEC) +#define PP2_HDR_LEN_INET (PP2_HEADER_LEN + PP2_ADDR_LEN_INET) +#define PP2_HDR_LEN_INET6 (PP2_HEADER_LEN + PP2_ADDR_LEN_INET6) +#define PP2_HDR_LEN_UNIX (PP2_HEADER_LEN + PP2_ADDR_LEN_UNIX) + +#define PP2_TYPE_ALPN 0x01 +#define PP2_TYPE_AUTHORITY 0x02 +#define PP2_TYPE_CRC32C 0x03 +#define PP2_TYPE_NOOP 0x04 +#define PP2_TYPE_UNIQUE_ID 0x05 +#define PP2_TYPE_SSL 0x20 +#define PP2_SUBTYPE_SSL_VERSION 0x21 +#define PP2_SUBTYPE_SSL_CN 0x22 +#define PP2_SUBTYPE_SSL_CIPHER 0x23 +#define PP2_SUBTYPE_SSL_SIG_ALG 0x24 +#define PP2_SUBTYPE_SSL_KEY_ALG 0x25 +#define PP2_TYPE_NETNS 0x30 + +#define PP2_CLIENT_SSL 0x01 +#define PP2_CLIENT_CERT_CONN 0x02 +#define PP2_CLIENT_CERT_SESS 0x04 + +#define PP2_CRC32C_LEN 4 /* Length of a CRC32C TLV value */ + +#define TLV_HEADER_SIZE 3 + +#define HA_PP2_AUTHORITY_MAX 255 /* Maximum length of an authority TLV */ +#define HA_PP2_TLV_VALUE_128 128 /* E.g., accommodate unique IDs (128 B) */ +#define HA_PP2_TLV_VALUE_256 256 /* E.g., accommodate authority TLVs (currently, <= 255 B) */ +#define HA_PP2_MAX_ALLOC 1024 /* Maximum TLV value for PPv2 to prevent DoS */ + +struct proxy_hdr_v2 { + uint8_t sig[12]; /* hex 0D 0A 0D 0A 00 0D 0A 51 55 49 54 0A */ + uint8_t ver_cmd; /* protocol version and command */ + uint8_t fam; /* protocol family and transport */ + uint16_t len; /* number of following bytes part of the header */ + union { + struct { /* for TCP/UDP over IPv4, len = 12 */ + uint32_t src_addr; + uint32_t dst_addr; + uint16_t src_port; + uint16_t dst_port; + } ip4; + struct { /* for TCP/UDP over IPv6, len = 36 */ + uint8_t src_addr[16]; + uint8_t dst_addr[16]; + uint16_t src_port; + uint16_t dst_port; + } ip6; + struct { /* for AF_UNIX sockets, len = 216 */ + uint8_t src_addr[108]; + uint8_t dst_addr[108]; + } unx; + } addr; +}; + +struct tlv { + uint8_t type; + uint8_t length_hi; + uint8_t length_lo; + uint8_t value[0]; // WT: don't use VAR_ARRAY here, it's an end of struct marker +}__attribute__((packed)); + +struct tlv_ssl { + struct tlv tlv; + uint8_t client; + uint32_t verify; + uint8_t sub_tlv[VAR_ARRAY]; +}__attribute__((packed)); + + +/* This structure is used to manage idle connections, their locking, and the + * list of such idle connections to be removed. It is per-thread and must be + * accessible from foreign threads. + */ +struct idle_conns { + struct mt_list toremove_conns; + struct task *cleanup_task; + __decl_thread(HA_SPINLOCK_T idle_conns_lock); +} THREAD_ALIGNED(64); + +#endif /* _HAPROXY_CONNECTION_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/connection.h b/include/haproxy/connection.h new file mode 100644 index 0000000..c7d9883 --- /dev/null +++ b/include/haproxy/connection.h @@ -0,0 +1,762 @@ +/* + * include/haproxy/connection.h + * This file contains connection function prototypes + * + * Copyright (C) 2000-2002 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_CONNECTION_H +#define _HAPROXY_CONNECTION_H + +#include <import/ist.h> + +#include <haproxy/api.h> +#include <haproxy/buf.h> +#include <haproxy/connection-t.h> +#include <haproxy/stconn-t.h> +#include <haproxy/fd.h> +#include <haproxy/list.h> +#include <haproxy/listener-t.h> +#include <haproxy/obj_type.h> +#include <haproxy/pool-t.h> +#include <haproxy/server.h> +#include <haproxy/session-t.h> +#include <haproxy/task-t.h> + +extern struct pool_head *pool_head_connection; +extern struct pool_head *pool_head_conn_hash_node; +extern struct pool_head *pool_head_sockaddr; +extern struct pool_head *pool_head_pp_tlv_128; +extern struct pool_head *pool_head_pp_tlv_256; +extern struct pool_head *pool_head_uniqueid; +extern struct xprt_ops *registered_xprt[XPRT_ENTRIES]; +extern struct mux_proto_list mux_proto_list; +extern struct mux_stopping_data mux_stopping_data[MAX_THREADS]; + +#define IS_HTX_CONN(conn) ((conn)->mux && ((conn)->mux->flags & MX_FL_HTX)) + +/* receive a PROXY protocol header over a connection */ +int conn_recv_proxy(struct connection *conn, int flag); +int conn_send_proxy(struct connection *conn, unsigned int flag); +int make_proxy_line(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm); +struct conn_tlv_list *conn_get_tlv(struct connection *conn, int type); + +int conn_append_debug_info(struct buffer *buf, const struct connection *conn, const char *pfx); + +int conn_subscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es); +int conn_unsubscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es); + +/* receive a NetScaler Client IP insertion header over a connection */ +int conn_recv_netscaler_cip(struct connection *conn, int flag); + +/* raw send() directly on the socket */ +int conn_ctrl_send(struct connection *conn, const void *buf, int len, int flags); + +/* drains any pending bytes from the socket */ +int conn_ctrl_drain(struct connection *conn); + +/* scoks4 proxy handshake */ +int conn_send_socks4_proxy_request(struct connection *conn); +int conn_recv_socks4_proxy_response(struct connection *conn); + +/* If we delayed the mux creation because we were waiting for the handshake, do it now */ +int conn_create_mux(struct connection *conn); +int conn_notify_mux(struct connection *conn, int old_flags, int forced_wake); +int conn_upgrade_mux_fe(struct connection *conn, void *ctx, struct buffer *buf, + struct ist mux_proto, int mode); +int conn_install_mux_fe(struct connection *conn, void *ctx); +int conn_install_mux_be(struct connection *conn, void *ctx, struct session *sess, + const struct mux_ops *force_mux_ops); +int conn_install_mux_chk(struct connection *conn, void *ctx, struct session *sess); + +void conn_delete_from_tree(struct connection *conn); + +void conn_init(struct connection *conn, void *target); +struct connection *conn_new(void *target); +void conn_free(struct connection *conn); +struct conn_hash_node *conn_alloc_hash_node(struct connection *conn); +struct sockaddr_storage *sockaddr_alloc(struct sockaddr_storage **sap, const struct sockaddr_storage *orig, socklen_t len); +void sockaddr_free(struct sockaddr_storage **sap); + + +/* connection hash stuff */ +uint64_t conn_calculate_hash(const struct conn_hash_params *params); +uint64_t conn_hash_prehash(char *buf, size_t size); +void conn_hash_update(char *buf, size_t *idx, + const void *data, size_t size, + enum conn_hash_params_t *flags, + enum conn_hash_params_t type); +uint64_t conn_hash_digest(char *buf, size_t bufsize, + enum conn_hash_params_t flags); + +int conn_reverse(struct connection *conn); + +const char *conn_err_code_str(struct connection *c); +int xprt_add_hs(struct connection *conn); +void register_mux_proto(struct mux_proto_list *list); + +extern struct idle_conns idle_conns[MAX_THREADS]; + +/* returns true if the transport layer is ready */ +static inline int conn_xprt_ready(const struct connection *conn) +{ + return (conn->flags & CO_FL_XPRT_READY); +} + +/* returns true if the control layer is ready */ +static inline int conn_ctrl_ready(const struct connection *conn) +{ + return (conn->flags & CO_FL_CTRL_READY); +} + +/* + * Calls the start() function of the transport layer, if needed. + * Returns < 0 in case of error. +*/ + +static inline int conn_xprt_start(struct connection *conn) +{ + int ret = 0; + + if (!conn_xprt_ready(conn) && conn->xprt && conn->xprt->start) + ret = conn->xprt->start(conn, conn->xprt_ctx); + + if (ret >= 0) + conn->flags |= CO_FL_XPRT_READY; + + return ret; +} + +/* Calls the close() function of the transport layer if any and if not done + * yet, and clears the CO_FL_XPRT_READY flags + * However this is not done if the CO_FL_XPRT_TRACKED flag is set, + * which allows logs to take data from the transport layer very late if needed. + */ +static inline void conn_xprt_close(struct connection *conn) +{ + if (conn->xprt && !(conn->flags & CO_FL_XPRT_TRACKED)) { + if (conn->xprt->close) + conn->xprt->close(conn, conn->xprt_ctx); + conn->xprt_ctx = NULL; + conn->flags &= ~CO_FL_XPRT_READY; + conn->xprt = NULL; + } +} + +/* Initializes the connection's control layer which essentially consists in + * registering the connection handle (e.g. file descriptor) for events and + * setting the CO_FL_CTRL_READY flag. The caller is responsible for ensuring + * that the control layer is already assigned to the connection prior to the + * call. + */ +static inline void conn_ctrl_init(struct connection *conn) +{ + if (!conn_ctrl_ready(conn)) { + conn->flags |= CO_FL_CTRL_READY; + if (conn->ctrl->ctrl_init) + conn->ctrl->ctrl_init(conn); + } +} + +/* Deletes the connection's handle (e.g. FD) if the transport layer is already + * gone, and removes the CO_FL_CTRL_READY flag. + */ +static inline void conn_ctrl_close(struct connection *conn) +{ + if (!conn->xprt && (conn->flags & CO_FL_CTRL_READY)) { + if ((conn->flags & (CO_FL_WANT_DRAIN | CO_FL_SOCK_RD_SH)) == CO_FL_WANT_DRAIN) + conn_ctrl_drain(conn); + conn->flags &= ~CO_FL_CTRL_READY; + if (conn->ctrl->ctrl_close) + conn->ctrl->ctrl_close(conn); + } +} + +/* If the connection still has a transport layer, then call its close() function + * if any, and delete the file descriptor if a control layer is set. This is + * used to close everything at once and atomically. However this is not done if + * the CO_FL_XPRT_TRACKED flag is set, which allows logs to take data from the + * transport layer very late if needed. + */ +static inline void conn_full_close(struct connection *conn) +{ + conn_xprt_close(conn); + conn_ctrl_close(conn); +} + +/* stop tracking a connection, allowing conn_full_close() to always + * succeed. + */ +static inline void conn_stop_tracking(struct connection *conn) +{ + conn->flags &= ~CO_FL_XPRT_TRACKED; +} + +/* returns the connection's FD if the connection exists, its control is ready, + * and the connection has an FD, otherwise -1. + */ +static inline int conn_fd(const struct connection *conn) +{ + if (!conn || !conn_ctrl_ready(conn) || (conn->flags & CO_FL_FDLESS)) + return -1; + return conn->handle.fd; +} + +/* read shutdown, called from the rcv_buf/rcv_pipe handlers when + * detecting an end of connection. + */ +static inline void conn_sock_read0(struct connection *c) +{ + c->flags |= CO_FL_SOCK_RD_SH; + if (conn_ctrl_ready(c)) { + /* we don't risk keeping ports unusable if we found the + * zero from the other side. + */ + BUG_ON(c->flags & CO_FL_FDLESS); + HA_ATOMIC_AND(&fdtab[c->handle.fd].state, ~FD_LINGER_RISK); + } +} + +/* write shutdown, indication that the upper layer is not willing to send + * anything anymore and wants to close after pending data are sent. The + * <clean> argument will allow not to perform the socket layer shutdown if + * equal to 0. + */ +static inline void conn_sock_shutw(struct connection *c, int clean) +{ + c->flags |= CO_FL_SOCK_WR_SH; + if (conn_ctrl_ready(c)) { + /* don't perform a clean shutdown if we're going to reset or + * if the shutr was already received. + */ + BUG_ON(c->flags & CO_FL_FDLESS); + if (!(c->flags & CO_FL_SOCK_RD_SH) && clean) + shutdown(c->handle.fd, SHUT_WR); + } +} + +static inline void conn_xprt_shutw(struct connection *c) +{ + /* clean data-layer shutdown */ + if (c->xprt && c->xprt->shutw) + c->xprt->shutw(c, c->xprt_ctx, 1); +} + +static inline void conn_xprt_shutw_hard(struct connection *c) +{ + /* unclean data-layer shutdown */ + if (c->xprt && c->xprt->shutw) + c->xprt->shutw(c, c->xprt_ctx, 0); +} + + +/* detect sock->data read0 transition */ +static inline int conn_xprt_read0_pending(struct connection *c) +{ + return (c->flags & CO_FL_SOCK_RD_SH) != 0; +} + +/* prepares a connection to work with protocol <proto> and transport <xprt>. + * The transport's is initialized as well, and the mux and its context are + * cleared. The target is not reinitialized and it is recommended that it is + * set prior to calling this function so that the function may make use of it + * in the future to refine the mux choice if needed. + */ +static inline int conn_prepare(struct connection *conn, const struct protocol *proto, const struct xprt_ops *xprt) +{ + int ret = 0; + + conn->ctrl = proto; + conn->xprt = xprt; + conn->mux = NULL; + conn->xprt_ctx = NULL; + conn->ctx = NULL; + if (xprt->init) { + ret = xprt->init(conn, &conn->xprt_ctx); + if (ret < 0) + conn->xprt = NULL; + } + return ret; +} + +/* returns 0 if the connection is valid and is a frontend connection, otherwise + * returns 1 indicating it's a backend connection. And uninitialized connection + * also returns 1 to better handle the usage in the middle of initialization. + */ +static inline int conn_is_back(const struct connection *conn) +{ + return !objt_listener(conn->target); +} + +/* sets <owner> as the connection's owner */ +static inline void conn_set_owner(struct connection *conn, void *owner, void (*cb)(struct connection *)) +{ + conn->owner = owner; + conn->destroy_cb = cb; +} + + +/* Mark the connection <conn> as private and remove it from the available connection list */ +static inline void conn_set_private(struct connection *conn) +{ + if (!(conn->flags & CO_FL_PRIVATE)) { + conn->flags |= CO_FL_PRIVATE; + + if (obj_type(conn->target) == OBJ_TYPE_SERVER) + srv_release_conn(__objt_server(conn->target), conn); + } +} + +static inline void conn_force_unsubscribe(struct connection *conn) +{ + if (!conn->subs) + return; + conn->subs->events = 0; + conn->subs = NULL; +} + +/* Returns the source address of the connection or NULL if not set */ +static inline const struct sockaddr_storage *conn_src(struct connection *conn) +{ + return conn->src; +} + +/* Returns the destination address of the connection or NULL if not set */ +static inline const struct sockaddr_storage *conn_dst(struct connection *conn) +{ + return conn->dst; +} + +/* Retrieves the connection's original source address. Returns non-zero on + * success or zero on failure. The operation is only performed once and the + * address is stored in the connection for future use. + */ +static inline int conn_get_src(struct connection *conn) +{ + if (conn->src) + return 1; + + if (!conn_ctrl_ready(conn)) + goto fail; + + if (!sockaddr_alloc(&conn->src, NULL, 0)) + goto fail; + + /* some stream protocols may provide their own get_src/dst functions */ + if (conn->ctrl->get_src && + conn->ctrl->get_src(conn, (struct sockaddr *)conn->src, sizeof(*conn->src)) != -1) + goto done; + + if (conn->ctrl->proto_type != PROTO_TYPE_STREAM) + goto fail; + + /* most other socket-based stream protocols will use their socket family's functions */ + if (conn->ctrl->fam->get_src && !(conn->flags & CO_FL_FDLESS) && + conn->ctrl->fam->get_src(conn->handle.fd, (struct sockaddr *)conn->src, + sizeof(*conn->src), + obj_type(conn->target) != OBJ_TYPE_LISTENER) != -1) + goto done; + + /* no other means */ + fail: + sockaddr_free(&conn->src); + return 0; + done: + return 1; +} + +/* Retrieves the connection's original destination address. Returns non-zero on + * success or zero on failure. The operation is only performed once and the + * address is stored in the connection for future use. + */ +static inline int conn_get_dst(struct connection *conn) +{ + if (conn->dst) + return 1; + + if (!conn_ctrl_ready(conn)) + goto fail; + + if (!sockaddr_alloc(&conn->dst, NULL, 0)) + goto fail; + + /* some stream protocols may provide their own get_src/dst functions */ + if (conn->ctrl->get_dst && + conn->ctrl->get_dst(conn, (struct sockaddr *)conn->dst, sizeof(*conn->dst)) != -1) + goto done; + + if (conn->ctrl->proto_type != PROTO_TYPE_STREAM) + goto fail; + + /* most other socket-based stream protocols will use their socket family's functions */ + if (conn->ctrl->fam->get_dst && !(conn->flags & CO_FL_FDLESS) && + conn->ctrl->fam->get_dst(conn->handle.fd, (struct sockaddr *)conn->dst, + sizeof(*conn->dst), + obj_type(conn->target) != OBJ_TYPE_LISTENER) != -1) + goto done; + + /* no other means */ + fail: + sockaddr_free(&conn->dst); + return 0; + done: + return 1; +} + +/* Sets the TOS header in IPv4 and the traffic class header in IPv6 packets + * (as per RFC3260 #4 and BCP37 #4.2 and #5.2). The connection is tested and if + * it is null, nothing is done. + */ +static inline void conn_set_tos(const struct connection *conn, int tos) +{ + if (!conn || !conn_ctrl_ready(conn) || (conn->flags & CO_FL_FDLESS)) + return; + +#ifdef IP_TOS + if (conn->src->ss_family == AF_INET) + setsockopt(conn->handle.fd, IPPROTO_IP, IP_TOS, &tos, sizeof(tos)); +#endif +#ifdef IPV6_TCLASS + if (conn->src->ss_family == AF_INET6) { + if (IN6_IS_ADDR_V4MAPPED(&((struct sockaddr_in6 *)conn->src)->sin6_addr)) + /* v4-mapped addresses need IP_TOS */ + setsockopt(conn->handle.fd, IPPROTO_IP, IP_TOS, &tos, sizeof(tos)); + else + setsockopt(conn->handle.fd, IPPROTO_IPV6, IPV6_TCLASS, &tos, sizeof(tos)); + } +#endif +} + +/* Sets the netfilter mark on the connection's socket. The connection is tested + * and if it is null, nothing is done. + */ +static inline void conn_set_mark(const struct connection *conn, int mark) +{ + if (!conn || !conn_ctrl_ready(conn) || (conn->flags & CO_FL_FDLESS)) + return; + +#if defined(SO_MARK) + setsockopt(conn->handle.fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); +#elif defined(SO_USER_COOKIE) + setsockopt(conn->handle.fd, SOL_SOCKET, SO_USER_COOKIE, &mark, sizeof(mark)); +#elif defined(SO_RTABLE) + setsockopt(conn->handle.fd, SOL_SOCKET, SO_RTABLE, &mark, sizeof(mark)); +#endif +} + +/* Sets adjust the TCP quick-ack feature on the connection's socket. The + * connection is tested and if it is null, nothing is done. + */ +static inline void conn_set_quickack(const struct connection *conn, int value) +{ + if (!conn || !conn_ctrl_ready(conn) || (conn->flags & CO_FL_FDLESS)) + return; + +#ifdef TCP_QUICKACK + setsockopt(conn->handle.fd, IPPROTO_TCP, TCP_QUICKACK, &value, sizeof(value)); +#endif +} + +static inline struct wait_event *wl_set_waitcb(struct wait_event *wl, struct task *(*cb)(struct task *, void *, unsigned int), void *ctx) +{ + if (!wl->tasklet->process) { + wl->tasklet->process = cb; + wl->tasklet->context = ctx; + } + return wl; +} + +/* Installs the connection's mux layer for upper context <ctx>. + * Returns < 0 on error. + */ +static inline int conn_install_mux(struct connection *conn, const struct mux_ops *mux, + void *ctx, struct proxy *prx, struct session *sess) +{ + int ret; + + conn->mux = mux; + conn->ctx = ctx; + ret = mux->init ? mux->init(conn, prx, sess, &BUF_NULL) : 0; + if (ret < 0) { + conn->mux = NULL; + conn->ctx = NULL; + } + return ret; +} + +/* Retrieves any valid stream connector from this connection, preferably the first + * valid one. The purpose is to be able to figure one other end of a private + * connection for purposes like source binding or proxy protocol header + * emission. In such cases, any stream connector is expected to be valid so the + * mux is encouraged to return the first one it finds. If the connection has + * no mux or the mux has no get_first_sc() method or the mux has no valid + * stream connector, NULL is returned. The output pointer is purposely marked + * const to discourage the caller from modifying anything there. + */ +static inline struct stconn *conn_get_first_sc(const struct connection *conn) +{ + BUG_ON(!conn || !conn->mux); + + if (!conn->mux->get_first_sc) + return NULL; + return conn->mux->get_first_sc(conn); +} + +int conn_update_alpn(struct connection *conn, const struct ist alpn, int force); + +static inline const char *conn_get_ctrl_name(const struct connection *conn) +{ + if (!conn || !conn_ctrl_ready(conn)) + return "NONE"; + return conn->ctrl->name; +} + +static inline const char *conn_get_xprt_name(const struct connection *conn) +{ + if (!conn || !conn->xprt) + return "NONE"; + return conn->xprt->name; +} + +static inline const char *conn_get_mux_name(const struct connection *conn) +{ + if (!conn || !conn->mux) + return "NONE"; + return conn->mux->name; +} + +/* registers pointer to transport layer <id> (XPRT_*) */ +static inline void xprt_register(int id, struct xprt_ops *xprt) +{ + if (id >= XPRT_ENTRIES) + return; + registered_xprt[id] = xprt; +} + +/* returns pointer to transport layer <id> (XPRT_*) or NULL if not registered */ +static inline struct xprt_ops *xprt_get(int id) +{ + if (id >= XPRT_ENTRIES) + return NULL; + return registered_xprt[id]; +} + +/* notify the next xprt that the connection is about to become idle and that it + * may be stolen at any time after the function returns and that any tasklet in + * the chain must be careful before dereferencing its context. + */ +static inline void xprt_set_idle(struct connection *conn, const struct xprt_ops *xprt, void *xprt_ctx) +{ + if (xprt->set_idle) + xprt->set_idle(conn, conn->xprt_ctx); +} + +/* notify the next xprt that the connection is not idle anymore and that it may + * not be stolen before the next xprt_set_idle(). + */ +static inline void xprt_set_used(struct connection *conn, const struct xprt_ops *xprt, void *xprt_ctx) +{ + if (xprt->set_used) + xprt->set_used(conn, conn->xprt_ctx); +} + +static inline int conn_get_alpn(const struct connection *conn, const char **str, int *len) +{ + if (!conn_xprt_ready(conn) || !conn->xprt->get_alpn) + return 0; + return conn->xprt->get_alpn(conn, conn->xprt_ctx, str, len); +} + +/* unregisters proto mux list <list> */ +static inline void unregister_mux_proto(struct mux_proto_list *list) +{ + LIST_DELETE(&list->list); + LIST_INIT(&list->list); +} + +static inline struct mux_proto_list *get_mux_proto(const struct ist proto) +{ + struct mux_proto_list *item; + + list_for_each_entry(item, &mux_proto_list.list, list) { + if (isteq(proto, item->token)) + return item; + } + return NULL; +} + +void list_mux_proto(FILE *out); +/* returns the first mux entry in the list matching the exact same <mux_proto> + * and compatible with the <proto_side> (FE or BE) and the <proto_mode> (TCP or + * HTTP). <mux_proto> can be empty. Will fall back to the first compatible mux + * with exactly the same <proto_mode> or with an empty name. May return + * null if the code improperly registered the default mux to use as a fallback. + * + * <proto_mode> expects PROTO_MODE_* value only: PROXY_MODE_* values should + * never be used directly here (but you may use conn_pr_mode_to_proto_mode() + * to map proxy mode to corresponding proto mode before calling the function). + */ +static inline const struct mux_proto_list *conn_get_best_mux_entry( + const struct ist mux_proto, + int proto_side, int proto_mode) +{ + struct mux_proto_list *item; + struct mux_proto_list *fallback = NULL; + + list_for_each_entry(item, &mux_proto_list.list, list) { + if (!(item->side & proto_side) || !(item->mode & proto_mode)) + continue; + if (istlen(mux_proto) && isteq(mux_proto, item->token)) + return item; + else if (!istlen(item->token)) { + if (!fallback || (item->mode == proto_mode && fallback->mode != proto_mode)) + fallback = item; + } + } + return fallback; + +} + +/* returns the first mux in the list matching the exact same <mux_proto> and + * compatible with the <proto_side> (FE or BE) and the <proto_mode> (TCP or + * HTTP). <mux_proto> can be empty. Will fall back to the first compatible mux + * with exactly the same <proto_mode> or with an empty name. May return + * null if the code improperly registered the default mux to use as a fallback. + */ +static inline const struct mux_ops *conn_get_best_mux(struct connection *conn, + const struct ist mux_proto, + int proto_side, int proto_mode) +{ + const struct mux_proto_list *item; + + item = conn_get_best_mux_entry(mux_proto, proto_side, proto_mode); + + return item ? item->mux : NULL; +} + +/* returns a pointer to the proxy associated with this connection. For a front + * connection it returns a pointer to the frontend ; for a back connection, it + * returns a pointer to the backend. + */ +static inline struct proxy *conn_get_proxy(const struct connection *conn) +{ + struct listener *l; + struct server *s; + + /* check if it's a frontend connection */ + l = objt_listener(conn->target); + if (l) + return l->bind_conf->frontend; + + /* check if it's a backend connection */ + s = objt_server(conn->target); + if (s) + return s->proxy; + + return objt_proxy(conn->target); +} + +/* unconditionally retrieves the ssl_sock_ctx for this connection. Prefer using + * the standard form conn_get_ssl_sock_ctx() which checks the transport layer + * and the availability of the method. + */ +static inline struct ssl_sock_ctx *__conn_get_ssl_sock_ctx(struct connection *conn) +{ + return conn->xprt->get_ssl_sock_ctx(conn); +} + +/* retrieves the ssl_sock_ctx for this connection otherwise NULL */ +static inline struct ssl_sock_ctx *conn_get_ssl_sock_ctx(struct connection *conn) +{ + if (!conn || !conn->xprt || !conn->xprt->get_ssl_sock_ctx) + return NULL; + return conn->xprt->get_ssl_sock_ctx(conn); +} + +/* boolean, returns true if connection is over SSL */ +static inline int conn_is_ssl(struct connection *conn) +{ + return !!conn_get_ssl_sock_ctx(conn); +} + +/* Returns true if connection must be reversed. */ +static inline int conn_is_reverse(const struct connection *conn) +{ + return !!(conn->reverse.target); +} + +/* Returns true if connection must be actively reversed or waiting to be accepted. */ +static inline int conn_reverse_in_preconnect(const struct connection *conn) +{ + return conn_is_back(conn) ? !!(conn->reverse.target) : + !!(conn->flags & CO_FL_ACT_REVERSING); +} + +/* Initialize <conn> as a reverse connection to <target>. */ +static inline void conn_set_reverse(struct connection *conn, enum obj_type *target) +{ + /* Ensure the correct target type is used depending on the connection side before reverse. */ + BUG_ON((!conn_is_back(conn) && !objt_server(target)) || + (conn_is_back(conn) && !objt_listener(target))); + + conn->reverse.target = target; +} + +/* Returns the listener instance for connection used for active reverse. */ +static inline struct listener *conn_active_reverse_listener(const struct connection *conn) +{ + return conn_is_back(conn) ? __objt_listener(conn->reverse.target) : + __objt_listener(conn->target); +} + +/* + * Prepare TLV argument for redirecting fetches. + * Note that it is not possible to use an argument check function + * as that would require us to allow arguments for functions + * that do not need it. Alternatively, the sample logic could be + * adjusted to perform checks for no arguments and allocate + * in the check function. However, this does not seem worth the trouble. + */ +static inline void set_tlv_arg(int tlv_type, struct arg *tlv_arg) +{ + tlv_arg->type = ARGT_SINT; + tlv_arg->data.sint = tlv_type; +} + +/* + * Map proxy mode (PR_MODE_*) to equivalent proto_proxy_mode (PROTO_MODE_*) + */ +static inline int conn_pr_mode_to_proto_mode(int proxy_mode) +{ + int mode; + + /* for now we only support TCP and HTTP proto_modes, so we + * consider that if it's not HTTP, then it's TCP + */ + mode = 1 << (proxy_mode == PR_MODE_HTTP); + + return mode; +} + +#endif /* _HAPROXY_CONNECTION_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/counters-t.h b/include/haproxy/counters-t.h new file mode 100644 index 0000000..933c228 --- /dev/null +++ b/include/haproxy/counters-t.h @@ -0,0 +1,128 @@ +/* + * include/haproxy/counters-t.h + * This file contains structure declarations for statistics counters. + * + * Copyright 2008-2009 Krzysztof Piotr Oledzki <ole@ans.pl> + * Copyright 2011-2014 Willy Tarreau <w@1wt.eu> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_COUNTERS_T_H +#define _HAPROXY_COUNTERS_T_H + +/* counters used by listeners and frontends */ +struct fe_counters { + unsigned int conn_max; /* max # of active sessions */ + long long cum_conn; /* cumulated number of received connections */ + long long cum_sess; /* cumulated number of accepted connections */ + long long cum_sess_ver[3]; /* cumulated number of h1/h2/h3 sessions */ + + unsigned int cps_max; /* maximum of new connections received per second */ + unsigned int sps_max; /* maximum of new connections accepted per second (sessions) */ + + long long bytes_in; /* number of bytes transferred from the client to the server */ + long long bytes_out; /* number of bytes transferred from the server to the client */ + + /* compression counters, index 0 for requests, 1 for responses */ + long long comp_in[2]; /* input bytes fed to the compressor */ + long long comp_out[2]; /* output bytes emitted by the compressor */ + long long comp_byp[2]; /* input bytes that bypassed the compressor (cpu/ram/bw limitation) */ + + long long denied_req; /* blocked requests because of security concerns */ + long long denied_resp; /* blocked responses because of security concerns */ + long long failed_req; /* failed requests (eg: invalid or timeout) */ + long long denied_conn; /* denied connection requests (tcp-req-conn rules) */ + long long denied_sess; /* denied session requests (tcp-req-sess rules) */ + long long failed_rewrites; /* failed rewrites (warning) */ + long long internal_errors; /* internal processing errors */ + + long long cli_aborts; /* aborted responses during DATA phase caused by the client */ + long long srv_aborts; /* aborted responses during DATA phase caused by the server */ + long long intercepted_req; /* number of monitoring or stats requests intercepted by the frontend */ + + union { + struct { + long long cum_req[4]; /* cumulated number of processed other/h1/h2/h3 requests */ + long long comp_rsp; /* number of compressed responses */ + unsigned int rps_max; /* maximum of new HTTP requests second observed */ + long long rsp[6]; /* http response codes */ + long long cache_lookups;/* cache lookups */ + long long cache_hits; /* cache hits */ + } http; + } p; /* protocol-specific stats */ +}; + +/* counters used by servers and backends */ +struct be_counters { + unsigned int conn_max; /* max # of active sessions */ + long long cum_conn; /* cumulated number of received connections */ + long long cum_sess; /* cumulated number of accepted connections */ + long long cum_lbconn; /* cumulated number of sessions processed by load balancing (BE only) */ + unsigned long last_sess; /* last session time */ + + unsigned int cps_max; /* maximum of new connections received per second */ + unsigned int sps_max; /* maximum of new connections accepted per second (sessions) */ + unsigned int nbpend_max; /* max number of pending connections with no server assigned yet */ + unsigned int cur_sess_max; /* max number of currently active sessions */ + + long long bytes_in; /* number of bytes transferred from the client to the server */ + long long bytes_out; /* number of bytes transferred from the server to the client */ + + /* compression counters, index 0 for requests, 1 for responses */ + long long comp_in[2]; /* input bytes fed to the compressor */ + long long comp_out[2]; /* output bytes emitted by the compressor */ + long long comp_byp[2]; /* input bytes that bypassed the compressor (cpu/ram/bw limitation) */ + + long long denied_req; /* blocked requests because of security concerns */ + long long denied_resp; /* blocked responses because of security concerns */ + + long long connect; /* number of connection establishment attempts */ + long long reuse; /* number of connection reuses */ + long long failed_conns; /* failed connect() attempts (BE only) */ + long long failed_resp; /* failed responses (BE only) */ + long long cli_aborts; /* aborted responses during DATA phase caused by the client */ + long long srv_aborts; /* aborted responses during DATA phase caused by the server */ + long long retries; /* retried and redispatched connections (BE only) */ + long long redispatches; /* retried and redispatched connections (BE only) */ + long long failed_rewrites; /* failed rewrites (warning) */ + long long internal_errors; /* internal processing errors */ + + long long failed_checks, failed_hana; /* failed health checks and health analyses for servers */ + long long down_trans; /* up->down transitions */ + + unsigned int q_time, c_time, d_time, t_time; /* sums of conn_time, queue_time, data_time, total_time */ + unsigned int qtime_max, ctime_max, dtime_max, ttime_max; /* maximum of conn_time, queue_time, data_time, total_time observed */ + + union { + struct { + long long cum_req; /* cumulated number of processed HTTP requests */ + long long comp_rsp; /* number of compressed responses */ + unsigned int rps_max; /* maximum of new HTTP requests second observed */ + long long rsp[6]; /* http response codes */ + long long cache_lookups;/* cache lookups */ + long long cache_hits; /* cache hits */ + } http; + } p; /* protocol-specific stats */ +}; + +#endif /* _HAPROXY_COUNTERS_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/cpuset-t.h b/include/haproxy/cpuset-t.h new file mode 100644 index 0000000..d3ebb35 --- /dev/null +++ b/include/haproxy/cpuset-t.h @@ -0,0 +1,54 @@ +#ifndef _HAPROXY_CPUSET_T_H +#define _HAPROXY_CPUSET_T_H + +#define _GNU_SOURCE +#include <sched.h> + +#if defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__) +#include <sys/param.h> +#ifdef __FreeBSD__ +#include <sys/_cpuset.h> +#include <sys/cpuset.h> +#include <sys/sysctl.h> +#include <strings.h> +#endif +#endif + +#include <haproxy/api-t.h> + +#if defined(__linux__) || defined(__DragonFly__) || \ + (defined(__FreeBSD_kernel__) && defined(__GLIBC__)) + +# define CPUSET_REPR cpu_set_t +# define CPUSET_USE_CPUSET + +#elif defined(__FreeBSD__) || defined(__NetBSD__) + +# define CPUSET_REPR cpuset_t + +# if defined(__FreeBSD__) && __FreeBSD_version >= 1301000 +# define CPUSET_USE_CPUSET +# else +# define CPUSET_USE_FREEBSD_CPUSET +# endif + +#elif defined(__APPLE__) + +# define CPUSET_REPR unsigned long +# define CPUSET_USE_ULONG + +#else + +# error "No cpuset support implemented on this platform" + +#endif + +struct hap_cpuset { + CPUSET_REPR cpuset; +}; + +struct cpu_map { + struct hap_cpuset thread[MAX_THREADS_PER_GROUP]; /* list of CPU masks for the 32/64 threads of this group */ +}; + +#endif /* _HAPROXY_CPUSET_T_H */ diff --git a/include/haproxy/cpuset.h b/include/haproxy/cpuset.h new file mode 100644 index 0000000..87c4ece --- /dev/null +++ b/include/haproxy/cpuset.h @@ -0,0 +1,76 @@ +#ifndef _HAPROXY_CPUSET_H +#define _HAPROXY_CPUSET_H + +#include <haproxy/cpuset-t.h> + +extern struct cpu_map *cpu_map; + +/* Unset all indexes in <set>. + */ +void ha_cpuset_zero(struct hap_cpuset *set); + +/* Set <cpu> index in <set> if not present. + * Returns 0 on success otherwise non-zero. + */ +int ha_cpuset_set(struct hap_cpuset *set, int cpu); + +/* Clear <cpu> index in <set> if present. + * Returns 0 on success otherwise non-zero. + */ +int ha_cpuset_clr(struct hap_cpuset *set, int cpu); + +/* Bitwise and equivalent operation between <src> and <dst> stored in <dst>. + */ +void ha_cpuset_and(struct hap_cpuset *dst, struct hap_cpuset *src); + +/* Bitwise OR equivalent operation between <src> and <dst> stored in <dst>. + */ +void ha_cpuset_or(struct hap_cpuset *dst, struct hap_cpuset *src); + +/* returns non-zero if CPU index <cpu> is set in <set>, otherwise 0. */ +int ha_cpuset_isset(const struct hap_cpuset *set, int cpu); + +/* Returns the count of set index in <set>. + */ +int ha_cpuset_count(const struct hap_cpuset *set); + +/* Returns the first index set plus one in <set> starting from the lowest. + * Returns 0 if no index set. + * Do not forget to subtract the result by one if using it for set/clr. + */ +int ha_cpuset_ffs(const struct hap_cpuset *set); + +/* Copy <src> set into <dst>. + */ +void ha_cpuset_assign(struct hap_cpuset *dst, struct hap_cpuset *src); + +/* Returns the biggest index plus one usable on the platform. + */ +int ha_cpuset_size(void); + +/* Detects CPUs that are bound to the current process. Returns the number of + * CPUs detected or 0 if the detection failed. + */ +int ha_cpuset_detect_bound(struct hap_cpuset *set); + +/* Parse cpu sets. Each CPU set is either a unique number between 0 and + * ha_cpuset_size() - 1 or a range with two such numbers delimited by a dash + * ('-'). Each CPU set can be a list of unique numbers or ranges separated by + * a comma. It is also possible to specify multiple cpu numbers or ranges in + * distinct argument in <args>. On success, it returns 0, otherwise it returns + * 1 with an error message in <err>. + */ +int parse_cpu_set(const char **args, struct hap_cpuset *cpu_set, char **err); + +/* Parse a linux cpu map string representing to a numeric cpu mask map + * The cpu map string is a list of 4-byte hex strings separated by commas, with + * most-significant byte first, one bit per cpu number. + */ +void parse_cpumap(char *cpumap_str, struct hap_cpuset *cpu_set); + +/* Returns true if at least one cpu-map directive was configured, otherwise + * false. + */ +int cpu_map_configured(void); + +#endif /* _HAPROXY_CPUSET_H */ diff --git a/include/haproxy/debug.h b/include/haproxy/debug.h new file mode 100644 index 0000000..b7a2e20 --- /dev/null +++ b/include/haproxy/debug.h @@ -0,0 +1,39 @@ +/* + * include/haproxy/debug.h + * This files contains some macros to help debugging. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_DEBUG_H +#define _HAPROXY_DEBUG_H + +struct task; +struct buffer; +extern unsigned int debug_commands_issued; +void ha_task_dump(struct buffer *buf, const struct task *task, const char *pfx); +void ha_thread_dump_one(int thr, int from_signal); +void ha_thread_dump(struct buffer *buf, int thr); +void ha_dump_backtrace(struct buffer *buf, const char *prefix, int dump); +void ha_backtrace_to_stderr(void); +void ha_panic(void); + +void post_mortem_add_component(const char *name, const char *version, + const char *toolchain, const char *toolchain_opts, + const char *build_settings, const char *path); + +#endif /* _HAPROXY_DEBUG_H */ diff --git a/include/haproxy/defaults.h b/include/haproxy/defaults.h new file mode 100644 index 0000000..7430c61 --- /dev/null +++ b/include/haproxy/defaults.h @@ -0,0 +1,533 @@ +/* + * include/haproxy/defaults.h + * Miscellaneous default values. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_DEFAULTS_H +#define _HAPROXY_DEFAULTS_H + +/* MAX_THREADS defines the highest limit for the global nbthread value. It + * defaults to the number of bits in a long integer when threads are enabled + * but may be lowered to save resources on embedded systems. +*/ +#ifndef USE_THREAD +/* threads disabled, 1 thread max, 1 group max (note: group ids start at 1) */ +#define MAX_THREADS 1 + +#define MAX_TGROUPS 1 +#define MAX_THREADS_PER_GROUP 1 + +#else + +/* theoretical limit is 64, though we'd rather not push it too far for now + * as some structures might be enlarged to be indexed per group. Let's start + * with 16 groups max, allowing to experiment with dual-socket machines + * suffering from up to 8 loosely coupled L3 caches. It's a good start and + * doesn't engage us too far. + */ +#ifndef MAX_TGROUPS +#define MAX_TGROUPS 16 +#endif + +#define MAX_THREADS_PER_GROUP __WORDSIZE + +/* threads enabled, max_threads defaults to long bits for 1 tgroup or 4 times + * long bits if more tgroups are enabled. + */ +#ifndef MAX_THREADS +#define MAX_THREADS ((((MAX_TGROUPS) > 1) ? 4 : 1) * (MAX_THREADS_PER_GROUP)) +#endif + +#endif // USE_THREAD + +/* + * BUFSIZE defines the size of a read and write buffer. It is the maximum + * amount of bytes which can be stored by the proxy for each stream. However, + * when reading HTTP headers, the proxy needs some spare space to add or rewrite + * headers if needed. The size of this spare is defined with MAXREWRITE. So it + * is not possible to process headers longer than BUFSIZE-MAXREWRITE bytes. By + * default, BUFSIZE=16384 bytes and MAXREWRITE=min(1024,BUFSIZE/2), so the + * maximum length of headers accepted is 15360 bytes. + */ +#ifndef BUFSIZE +#define BUFSIZE 16384 +#endif + +/* certain buffers may only be allocated for responses in order to avoid + * deadlocks caused by request queuing. 2 buffers is the absolute minimum + * acceptable to ensure that a request gaining access to a server can get + * a response buffer even if it doesn't completely flush the request buffer. + * The worst case is an applet making use of a request buffer that cannot + * completely be sent while the server starts to respond, and all unreserved + * buffers are allocated by request buffers from pending connections in the + * queue waiting for this one to flush. Both buffers reserved buffers may + * thus be used at the same time. + */ +#ifndef RESERVED_BUFS +#define RESERVED_BUFS 2 +#endif + +// reserved buffer space for header rewriting +#ifndef MAXREWRITE +#define MAXREWRITE 1024 +#endif + +#ifndef REQURI_LEN +#define REQURI_LEN 1024 +#endif + +#ifndef CAPTURE_LEN +#define CAPTURE_LEN 64 +#endif + +#ifndef MAX_SYSLOG_LEN +#define MAX_SYSLOG_LEN 1024 +#endif + +/* 64kB to archive startup-logs seems way more than enough + * /!\ Careful when changing this size, it is used in a shm when exec() from + * mworker to wait mode. + */ +#ifndef STARTUP_LOG_SIZE +#define STARTUP_LOG_SIZE 65536 +#endif + +// maximum line size when parsing config +#ifndef LINESIZE +#define LINESIZE 2048 +#endif + +// max # args on a configuration line +#define MAX_LINE_ARGS 64 + +// maximum line size when parsing crt-bind-list config +#define CRT_LINESIZE 65536 + +// max # args on crt-bind-list configuration line +#define MAX_CRT_ARGS 2048 + +// max # args on a command issued on the CLI ("stats socket") +// This should cover at least 5 + twice the # of data_types +#define MAX_CLI_ARGS 64 + +// max recursion levels in config condition evaluations +// (note that binary operators add one recursion level, and +// that parenthesis may add two). +#define MAX_CFG_RECURSION 1024 + +// max # of matches per regexp +#define MAX_MATCH 10 + +// max # of headers in one HTTP request or response +// By default, about 100 headers (+1 for the first line) +#ifndef MAX_HTTP_HDR +#define MAX_HTTP_HDR 101 +#endif + +// max # of headers in history when looking for header #-X +#ifndef MAX_HDR_HISTORY +#define MAX_HDR_HISTORY 10 +#endif + +// max length of a TRACE_PRINTF() output buffer (one less char for the message) +#ifndef TRACE_MAX_MSG +#define TRACE_MAX_MSG 1024 +#endif + +// max # of stick counters per session (at least 3 for sc0..sc2) +#ifndef MAX_SESS_STKCTR +#define MAX_SESS_STKCTR 3 +#endif + +// max # of extra stick-table data types that can be registered at runtime +#ifndef STKTABLE_EXTRA_DATA_TYPES +#define STKTABLE_EXTRA_DATA_TYPES 0 +#endif + +// max # of stick-table filter entries that can be used during dump +#ifndef STKTABLE_FILTER_LEN +#define STKTABLE_FILTER_LEN 4 +#endif + +// max # of loops we can perform around a read() which succeeds. +// It's very frequent that the system returns a few TCP segments at a time. +#ifndef MAX_READ_POLL_LOOPS +#define MAX_READ_POLL_LOOPS 4 +#endif + +// minimum number of bytes read at once above which we don't try to read +// more, in order not to risk facing an EAGAIN. Most often, if we read +// at least 10 kB, we can consider that the system has tried to read a +// full buffer and got multiple segments (>1 MSS for jumbo frames, >7 MSS +// for normal frames) did not bother truncating the last segment. +#ifndef MIN_RECV_AT_ONCE_ENOUGH +#define MIN_RECV_AT_ONCE_ENOUGH (7*1448) +#endif + +// The minimum number of bytes to be forwarded that is worth trying to splice. +// Below 4kB, it's not worth allocating pipes nor pretending to zero-copy. +#ifndef MIN_SPLICE_FORWARD +#define MIN_SPLICE_FORWARD 4096 +#endif + +// the max number of events returned in one call to poll/epoll. Too small a +// value will cause lots of calls, and too high a value may cause high latency. +#ifndef MAX_POLL_EVENTS +#define MAX_POLL_EVENTS 200 +#endif + +/* eternity when exprimed in timeval */ +#ifndef TV_ETERNITY +#define TV_ETERNITY (~0UL) +#endif + +/* eternity when exprimed in ms */ +#ifndef TV_ETERNITY_MS +#define TV_ETERNITY_MS (-1) +#endif + +/* delay between boot and first time wrap, in seconds */ +#ifndef BOOT_TIME_WRAP_SEC +#define BOOT_TIME_WRAP_SEC 20 +#endif +/* we want to be able to detect time jumps. Fix the maximum wait time to a low + * value so that we know the time has changed if we wait longer. + */ +#ifndef MAX_DELAY_MS +#define MAX_DELAY_MS 60000 +#endif + +// The maximum number of connections accepted at once by a thread for a single +// listener. It used to default to 64 divided by the number of processes but +// the tasklet-based model is much more scalable and benefits from smaller +// values. Experimentation has shown that 4 gives the highest accept rate for +// all thread values, and that 3 and 5 come very close, as shown below (HTTP/1 +// connections forwarded per second at multi-accept 4 and 64): +// +// ac\thr| 1 2 4 8 16 +// ------+------------------------------ +// 4| 80k 106k 168k 270k 336k +// 64| 63k 89k 145k 230k 274k +// +#ifndef MAX_ACCEPT +#define MAX_ACCEPT 4 +#endif + +// The base max number of tasks to run at once to be used when not set by +// tune.runqueue-depth. It will automatically be divided by the square root +// of the number of threads for better fairness. As such, 64 threads will +// use 35 and a single thread will use 280. +#ifndef RUNQUEUE_DEPTH +#define RUNQUEUE_DEPTH 280 +#endif + +// cookie delimiter in "prefix" mode. This character is inserted between the +// persistence cookie and the original value. The '~' is allowed by RFC6265, +// and should not be too common in server names. +#ifndef COOKIE_DELIM +#define COOKIE_DELIM '~' +#endif + +// this delimiter is used between a server's name and a last visit date in +// cookies exchanged with the client. +#ifndef COOKIE_DELIM_DATE +#define COOKIE_DELIM_DATE '|' +#endif + +// Max number of acl() sample fetch recursive evaluations, to avoid deep tree +// loops. +#ifndef ACL_MAX_RECURSE +#define ACL_MAX_RECURSE 1000 +#endif + +#define CONN_RETRIES 3 + +#define CHK_CONNTIME 2000 +#define DEF_CHKINTR 2000 +#define DEF_MAILALERTTIME 10000 +#define DEF_FALLTIME 3 +#define DEF_RISETIME 2 +#define DEF_AGENT_FALLTIME 1 +#define DEF_AGENT_RISETIME 1 +#define DEF_CHECK_PATH "" + + +#define DEF_HANA_ONERR HANA_ONERR_FAILCHK +#define DEF_HANA_ERRLIMIT 10 + +// X-Forwarded-For header default +#define DEF_XFORWARDFOR_HDR "X-Forwarded-For" + +// X-Original-To header default +#define DEF_XORIGINALTO_HDR "X-Original-To" + +/* Max number of events that may be processed at once by + * an event_hdl API consumer to prevent thread contention. + */ +#ifndef EVENT_HDL_MAX_AT_ONCE +#define EVENT_HDL_MAX_AT_ONCE 100 +#endif + +/* Default connections limit. + * + * A system limit can be enforced at build time in order to avoid using haproxy + * beyond reasonable system limits. For this, just define SYSTEM_MAXCONN to the + * absolute limit accepted by the system. If the configuration specifies a + * higher value, it will be capped to SYSTEM_MAXCONN and a warning will be + * emitted. The only way to override this limit will be to set it via the + * command-line '-n' argument. If SYSTEM_MAXCONN is not set, a minimum value + * of 100 will be used for DEFAULT_MAXCONN which almost guarantees that a + * process will correctly start in any situation. + */ +#ifdef SYSTEM_MAXCONN +#undef DEFAULT_MAXCONN +#define DEFAULT_MAXCONN SYSTEM_MAXCONN +#elif !defined(DEFAULT_MAXCONN) +#define DEFAULT_MAXCONN 100 +#endif + +/* Define a maxconn which will be used in the master process once it re-exec to + * the MODE_MWORKER_WAIT and won't change when SYSTEM_MAXCONN is set. + * + * 100 must be enough for the master since it only does communication between + * the master and the workers, and the master CLI. + */ +#ifndef MASTER_MAXCONN +#define MASTER_MAXCONN 100 +#endif + +/* Minimum check interval for spread health checks. Servers with intervals + * greater than or equal to this value will have their checks spread apart + * and will be considered when searching the minimal interval. + * Others will be ignored for the minimal interval and will have their checks + * scheduled on a different basis. + */ +#ifndef SRV_CHK_INTER_THRES +#define SRV_CHK_INTER_THRES 1000 +#endif + +/* Specifies the string used to report the version and release date on the + * statistics page. May be defined to the empty string ("") to permanently + * disable the feature. + */ +#ifndef STATS_VERSION_STRING +#define STATS_VERSION_STRING " version " HAPROXY_VERSION ", released " HAPROXY_DATE +#endif + +/* This is the default statistics URI */ +#ifdef CONFIG_STATS_DEFAULT_URI +#define STATS_DEFAULT_URI CONFIG_STATS_DEFAULT_URI +#else +#define STATS_DEFAULT_URI "/haproxy?stats" +#endif + +/* This is the default statistics realm */ +#ifdef CONFIG_STATS_DEFAULT_REALM +#define STATS_DEFAULT_REALM CONFIG_STATS_DEFAULT_REALM +#else +#define STATS_DEFAULT_REALM "HAProxy Statistics" +#endif + +/* Maximum signal queue size, and also number of different signals we can + * handle. + */ +#ifndef MAX_SIGNAL +#define MAX_SIGNAL 256 +#endif + +/* Maximum host name length */ +#ifndef MAX_HOSTNAME_LEN +#ifdef MAXHOSTNAMELEN +#define MAX_HOSTNAME_LEN MAXHOSTNAMELEN +#else +#define MAX_HOSTNAME_LEN 64 +#endif // MAXHOSTNAMELEN +#endif // MAX_HOSTNAME_LEN + +/* Maximum health check description length */ +#ifndef HCHK_DESC_LEN +#define HCHK_DESC_LEN 128 +#endif + +/* ciphers used as defaults on connect */ +#ifndef CONNECT_DEFAULT_CIPHERS +#define CONNECT_DEFAULT_CIPHERS NULL +#endif + +/* ciphers used as defaults on TLS 1.3 connect */ +#ifndef CONNECT_DEFAULT_CIPHERSUITES +#define CONNECT_DEFAULT_CIPHERSUITES NULL +#endif + +/* ciphers used as defaults on listeners */ +#ifndef LISTEN_DEFAULT_CIPHERS +#define LISTEN_DEFAULT_CIPHERS NULL +#endif + +/* cipher suites used as defaults on TLS 1.3 listeners */ +#ifndef LISTEN_DEFAULT_CIPHERSUITES +#define LISTEN_DEFAULT_CIPHERSUITES NULL +#endif + +/* named curve used as defaults for ECDHE ciphers */ +#ifndef ECDHE_DEFAULT_CURVE +#define ECDHE_DEFAULT_CURVE "prime256v1" +#endif + +/* ssl cache size */ +#ifndef SSLCACHESIZE +#define SSLCACHESIZE 20000 +#endif + +/* ssl max dh param size */ +#ifndef SSL_DEFAULT_DH_PARAM +#define SSL_DEFAULT_DH_PARAM 0 +#endif + +/* max memory cost per SSL session */ +#ifndef SSL_SESSION_MAX_COST +#define SSL_SESSION_MAX_COST (16*1024) // measured +#endif + +/* max memory cost per SSL handshake (on top of session) */ +#ifndef SSL_HANDSHAKE_MAX_COST +#define SSL_HANDSHAKE_MAX_COST (76*1024) // measured +#endif + +#ifndef DEFAULT_SSL_CTX_CACHE +#define DEFAULT_SSL_CTX_CACHE 1000 +#endif + +/* approximate stream size (for maxconn estimate) */ +#ifndef STREAM_MAX_COST +#define STREAM_MAX_COST (sizeof(struct stream) + \ + 2 * sizeof(struct channel) + \ + 2 * sizeof(struct connection) + \ + global.tune.requri_len + \ + 2 * global.tune.cookie_len) +#endif + +/* available memory estimate : count about 3% of overhead in various structures */ +#ifndef MEM_USABLE_RATIO +#define MEM_USABLE_RATIO 0.97 +#endif + +/* if not 0, maximum allocatable memory per process in MB */ +#ifndef HAPROXY_MEMMAX +#define HAPROXY_MEMMAX 0 +#endif + +/* For USE_ZLIB, DEFAULT_MAXZLIBMEM may be set to a hard-coded value that will + * preset a maxzlibmem value. Just leave it to zero for other configurations. + * Note that it's expressed in megabytes. + */ +#if !defined(DEFAULT_MAXZLIBMEM) || !defined(USE_ZLIB) +#undef DEFAULT_MAXZLIBMEM +#define DEFAULT_MAXZLIBMEM 0 +#endif + +/* On modern architectures with many threads, a fast memory allocator, and + * local pools, the global pools with their single list can be way slower than + * the standard allocator which already has its own per-thread arenas. In this + * case we disable global pools. The global pools may still be enforced + * using CONFIG_HAP_GLOBAL_POOLS though. + */ +#if defined(USE_THREAD) && defined(HA_HAVE_FAST_MALLOC) && !defined(CONFIG_HAP_GLOBAL_POOLS) +#define CONFIG_HAP_NO_GLOBAL_POOLS +#endif + +/* default per-thread pool cache size when enabled */ +#ifndef CONFIG_HAP_POOL_CACHE_SIZE +#define CONFIG_HAP_POOL_CACHE_SIZE 524288 +#endif + +#ifndef CONFIG_HAP_POOL_CLUSTER_SIZE +#define CONFIG_HAP_POOL_CLUSTER_SIZE 8 +#endif + +/* number of bits to encode the per-pool buckets for large setups */ +#ifndef CONFIG_HAP_POOL_BUCKETS_BITS +# if defined(USE_THREAD) && MAX_THREADS >= 512 +# define CONFIG_HAP_POOL_BUCKETS_BITS 6 +# elif defined(USE_THREAD) && MAX_THREADS >= 128 +# define CONFIG_HAP_POOL_BUCKETS_BITS 5 +# elif defined(USE_THREAD) && MAX_THREADS >= 16 +# define CONFIG_HAP_POOL_BUCKETS_BITS 4 +# elif defined(USE_THREAD) +# define CONFIG_HAP_POOL_BUCKETS_BITS 3 +# else +# define CONFIG_HAP_POOL_BUCKETS_BITS 0 +# endif +#endif + +#define CONFIG_HAP_POOL_BUCKETS (1UL << (CONFIG_HAP_POOL_BUCKETS_BITS)) + +/* Number of samples used to compute the times reported in stats. A power of + * two is highly recommended, and this value multiplied by the largest response + * time must not overflow and unsigned int. See freq_ctr.h for more information. + * We consider that values are accurate to 95% with two batches of samples below, + * so in order to advertise accurate times across 1k samples, we effectively + * measure over 512. + */ +#ifndef TIME_STATS_SAMPLES +#define TIME_STATS_SAMPLES 512 +#endif + +/* max ocsp cert id asn1 encoded length */ +#ifndef OCSP_MAX_CERTID_ASN1_LENGTH +#define OCSP_MAX_CERTID_ASN1_LENGTH 128 +#endif + +#ifndef OCSP_MAX_RESPONSE_TIME_SKEW +#define OCSP_MAX_RESPONSE_TIME_SKEW 300 +#endif + +/* Number of TLS tickets to check, used for rotation */ +#ifndef TLS_TICKETS_NO +#define TLS_TICKETS_NO 3 +#endif + +/* pattern lookup default cache size, in number of entries : + * 10k entries at 10k req/s mean 1% risk of a collision after 60 years, that's + * already much less than the memory's reliability in most machines and more + * durable than most admin's life expectancy. A collision will result in a + * valid result to be returned for a different entry from the same list. + */ +#ifndef DEFAULT_PAT_LRU_SIZE +#define DEFAULT_PAT_LRU_SIZE 10000 +#endif + +/* maximum number of pollers that may be registered */ +#ifndef MAX_POLLERS +#define MAX_POLLERS 10 +#endif + +/* system sysfs directory */ +#define NUMA_DETECT_SYSTEM_SYSFS_PATH "/sys/devices/system" + +/* Number of cache trees */ +#ifndef CACHE_TREE_NUM +# if defined(USE_THREAD) +# define CACHE_TREE_NUM 8 +# else +# define CACHE_TREE_NUM 1 +# endif +#endif + +#endif /* _HAPROXY_DEFAULTS_H */ diff --git a/include/haproxy/dgram-t.h b/include/haproxy/dgram-t.h new file mode 100644 index 0000000..4e4c2af --- /dev/null +++ b/include/haproxy/dgram-t.h @@ -0,0 +1,53 @@ +/* + * include/haproxy/dgram-t.h + * This file provides structures and types for datagram processing + * + * Copyright (C) 2014 Baptiste Assmann <bedis9@gmail.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HAPROXY_DGRAM_T_H +#define _HAPROXY_HAPROXY_DGRAM_T_H + +#include <arpa/inet.h> + +/* + * datagram related structure + */ +struct dgram_conn { + __decl_thread(HA_SPINLOCK_T lock); + const struct dgram_data_cb *data; /* data layer callbacks. Must be set before */ + void *owner; /* pointer to upper layer's entity */ + union { /* definitions which depend on connection type */ + struct { /*** information used by socket-based dgram ***/ + int fd; /* file descriptor */ + } sock; + } t; + struct { + struct sockaddr_storage from; /* client address, or address to spoof when connecting to the server */ + struct sockaddr_storage to; /* address reached by the client, or address to connect to */ + } addr; /* addresses of the remote side, client for producer and server for consumer */ +}; + +/* + * datagram callback structure + */ +struct dgram_data_cb { + void (*recv)(struct dgram_conn *dgram); /* recv callback */ + void (*send)(struct dgram_conn *dgram); /* send callback */ +}; + +#endif /* _HAPROXY_HAPROXY_DGRAM_T_H */ diff --git a/include/haproxy/dgram.h b/include/haproxy/dgram.h new file mode 100644 index 0000000..92d00ab --- /dev/null +++ b/include/haproxy/dgram.h @@ -0,0 +1,29 @@ +/* + * include/haproxy/proto_dgram.h + * This file provides functions related to DGRAM processing. + * + * Copyright (C) 2014 Baptiste Assmann <bedis9@gmail.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PROTO_DGRAM_H +#define _HAPROXY_PROTO_DGRAM_H + +#include <haproxy/dgram-t.h> + +void dgram_fd_handler(int); + +#endif // _HAPROXY_PROTO_DGRAM_H diff --git a/include/haproxy/dict-t.h b/include/haproxy/dict-t.h new file mode 100644 index 0000000..deaa88d --- /dev/null +++ b/include/haproxy/dict-t.h @@ -0,0 +1,46 @@ +/* + * include/haproxy/dict-t.h + * Dictionaries - types definitions + * + * Copyright 2019 Frederic Lecaille <flecaille@haproxy.com> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _HAPROXY_DICT_T_H +#define _HAPROXY_DICT_T_H + +#include <import/ebtree-t.h> +#include <haproxy/api-t.h> +#include <haproxy/thread-t.h> + +struct dict_entry { + struct ebpt_node value; + unsigned int refcount; + size_t len; +}; + +struct dict { + const char *name; + struct eb_root values; + __decl_thread(HA_RWLOCK_T rwlock); +}; + +#endif /* _HAPROXY_DICT_T_H */ diff --git a/include/haproxy/dict.h b/include/haproxy/dict.h new file mode 100644 index 0000000..635c3f1 --- /dev/null +++ b/include/haproxy/dict.h @@ -0,0 +1,36 @@ +/* + * include/haproxy/dict.h + * Dictionaries - functions prototypes + * + * Copyright 2019 Frederic Lecaille <flecaille@haproxy.com> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _HAPROXY_DICT_H +#define _HAPROXY_DICT_H + +#include <haproxy/dict-t.h> + +struct dict *new_dict(const char *name); +struct dict_entry *dict_insert(struct dict *d, char *str); +void dict_entry_unref(struct dict *d, struct dict_entry *de); + +#endif /* _HAPROXY_DICT_H */ diff --git a/include/haproxy/dns-t.h b/include/haproxy/dns-t.h new file mode 100644 index 0000000..1c876e3 --- /dev/null +++ b/include/haproxy/dns-t.h @@ -0,0 +1,179 @@ +/* + * include/haproxy/dns-t.h + * This file provides structures and types for DNS. + * + * Copyright (C) 2014 Baptiste Assmann <bedis9@gmail.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_DNS_T_H +#define _HAPROXY_DNS_T_H + +#include <import/ebtree-t.h> + +#include <haproxy/connection-t.h> +#include <haproxy/buf-t.h> +#include <haproxy/dgram-t.h> +#include <haproxy/obj_type-t.h> +#include <haproxy/ring-t.h> +#include <haproxy/stats-t.h> +#include <haproxy/task-t.h> +#include <haproxy/thread.h> + +/* DNS header size */ +#define DNS_HEADER_SIZE ((int)sizeof(struct dns_header)) + +/* max pending requests per stream */ +#define DNS_STREAM_MAX_PIPELINED_REQ 4 + +#define DNS_TCP_MSG_MAX_SIZE 65535 +#define DNS_TCP_MSG_RING_MAX_SIZE (1 + 1 + 3 + DNS_TCP_MSG_MAX_SIZE) // varint_bytes(DNS_TCP_MSG_MAX_SIZE) == 3 + +/* DNS request or response header structure */ +struct dns_header { + uint16_t id; + uint16_t flags; + uint16_t qdcount; + uint16_t ancount; + uint16_t nscount; + uint16_t arcount; +} __attribute__ ((packed)); + +/* short structure to describe a DNS question */ +/* NOTE: big endian structure */ +struct dns_question { + unsigned short qtype; /* question type */ + unsigned short qclass; /* query class */ +} __attribute__ ((packed)); + + +/* NOTE: big endian structure */ +struct dns_additional_record { + uint8_t name; /* domain name, must be 0 (RFC 6891) */ + uint16_t type; /* record type DNS_RTYPE_OPT (41) */ + uint16_t udp_payload_size; /* maximum size accepted for the response */ + uint32_t extension; /* extended rcode and flags, not used for now */ + uint16_t data_length; /* data length */ +/* as of today, we don't support yet edns options, that said I already put a + * placeholder here for this purpose. We may need to define a dns_option_record + * structure which itself should point to different type of data, based on the + * extension set (client subnet, tcp keepalive, etc...)*/ +} __attribute__ ((packed)); + +/* Structure describing a name server used during name resolution. + * A name server belongs to a resolvers section. + */ +struct dns_stream_server { + struct server *srv; + struct ring *ring_req; + int max_slots; + int maxconn; + int idle_conns; + int cur_conns; + int max_active_conns; + size_t ofs_req; // ring buffer reader offset + size_t ofs_rsp; // ring buffer reader offset + struct task *task_req; /* req conn management */ + struct task *task_rsp; /* rsp management */ + struct task *task_idle; /* handle idle sess */ + struct list free_sess; + struct list idle_sess; + struct list wait_sess; + __decl_thread(HA_SPINLOCK_T lock); // lock to protect current struct +}; + +struct dns_dgram_server { + struct dgram_conn conn; /* transport layer */ + struct ring *ring_req; + size_t ofs_req; // ring buffer reader offset +}; + +struct dns_query { + struct eb32_node qid; + uint16_t original_qid; + int expire; + struct list list; +}; + +struct dns_session { + struct appctx *appctx; // appctx of current session + struct dns_stream_server *dss; + uint16_t tx_msg_offset; + int nb_queries; + int onfly_queries; + int query_counter; + struct list list; + struct list waiter; + struct list queries; + struct task *task_exp; + struct eb_root query_ids; /* tree to quickly lookup/retrieve query ids currently in use */ + size_t ofs; // ring buffer reader offset + struct ring ring; + struct { + uint16_t len; + uint16_t offset; + char *area; + } rx_msg; + unsigned char *tx_ring_area; + int shutdown; +}; + +/* Structure describing a name server + */ +struct dns_nameserver { + char *id; /* nameserver unique identifier */ + void *parent; + struct { + const char *file; /* file where the section appears */ + int line; /* line where the section appears */ + } conf; /* config information */ + + int (*process_responses)(struct dns_nameserver *ns); /* callback used to process responses */ + struct dns_dgram_server *dgram; /* used for dgram dns */ + struct dns_stream_server *stream; /* used for tcp dns */ + + EXTRA_COUNTERS(extra_counters); + struct dns_counters *counters; + + struct list list; /* nameserver chained list */ +}; + +/* mixed dns and resolver counters, we will have to split them */ +struct dns_counters { + char *id; + char *pid; + long long sent; /* - queries sent */ + long long snd_error; /* - sending errors */ + union { + struct { + long long valid; /* - valid response */ + long long update; /* - valid response used to update server's IP */ + long long cname; /* - CNAME response requiring new resolution */ + long long cname_error; /* - error when resolving CNAMEs */ + long long any_err; /* - void response (usually because ANY qtype) */ + long long nx; /* - NX response */ + long long timeout; /* - queries which reached timeout */ + long long refused; /* - queries refused */ + long long other; /* - other type of response */ + long long invalid; /* - malformed DNS response */ + long long too_big; /* - too big response */ + long long outdated; /* - outdated response (server slower than the other ones) */ + long long truncated; /* - truncated response */; + } resolver; + } app; /* application specific counteurs */ +}; + +#endif /* _HAPROXY_DNS_T_H */ diff --git a/include/haproxy/dns.h b/include/haproxy/dns.h new file mode 100644 index 0000000..84181c4 --- /dev/null +++ b/include/haproxy/dns.h @@ -0,0 +1,33 @@ +/* + * include/haproxy/dns.h + * This file provides functions related to DNS protocol + * + * Copyright (C) 2020 HAProxy Technologies + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_DNS_H +#define _HAPROXY_DNS_H + +#include <haproxy/dns-t.h> +#include <haproxy/server-t.h> + +int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len); +ssize_t dns_recv_nameserver(struct dns_nameserver *ns, void *data, size_t size); +int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk); +int dns_stream_init(struct dns_nameserver *ns, struct server *s); + +#endif // _HAPROXY_DNS_H diff --git a/include/haproxy/dynbuf-t.h b/include/haproxy/dynbuf-t.h new file mode 100644 index 0000000..b5545ab --- /dev/null +++ b/include/haproxy/dynbuf-t.h @@ -0,0 +1,41 @@ +/* + * include/haproxy/dynbuf-t.h + * Structure definitions for dynamic buffer management. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_DYNBUF_T_H +#define _HAPROXY_DYNBUF_T_H + + +/* an element of the <buffer_wq> list. It represents an object that need to + * acquire a buffer to continue its process. */ +struct buffer_wait { + void *target; /* The waiting object that should be woken up */ + int (*wakeup_cb)(void *); /* The function used to wake up the <target>, passed as argument */ + struct list list; /* Next element in the <buffer_wq> list */ +}; + +#endif /* _HAPROXY_DYNBUF_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/dynbuf.h b/include/haproxy/dynbuf.h new file mode 100644 index 0000000..a89800c --- /dev/null +++ b/include/haproxy/dynbuf.h @@ -0,0 +1,131 @@ +/* + * include/haproxy/dynbuf.h + * Buffer management functions. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_DYNBUF_H +#define _HAPROXY_DYNBUF_H + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <import/ist.h> +#include <haproxy/activity.h> +#include <haproxy/api.h> +#include <haproxy/buf.h> +#include <haproxy/chunk.h> +#include <haproxy/dynbuf-t.h> +#include <haproxy/pool.h> + +extern struct pool_head *pool_head_buffer; + +int init_buffer(void); +void buffer_dump(FILE *o, struct buffer *b, int from, int to); + +/*****************************************************************/ +/* These functions are used to compute various buffer area sizes */ +/*****************************************************************/ + +/* Return 1 if the buffer has less than 1/4 of its capacity free, otherwise 0 */ +static inline int buffer_almost_full(const struct buffer *buf) +{ + if (b_is_null(buf)) + return 0; + + return b_almost_full(buf); +} + +/**************************************************/ +/* Functions below are used for buffer allocation */ +/**************************************************/ + +/* Ensures that <buf> is allocated, or allocates it. If no memory is available, + * ((char *)1) is assigned instead with a zero size. The allocated buffer is + * returned, or NULL in case no memory is available. Since buffers only contain + * user data, poisonning is always disabled as it brings no benefit and impacts + * performance. Due to the difficult buffer_wait management, they are not + * subject to forced allocation failures either. + */ +#define b_alloc(_buf) \ +({ \ + char *_area; \ + struct buffer *_retbuf = _buf; \ + \ + if (!_retbuf->size) { \ + *_retbuf = BUF_WANTED; \ + _area = pool_alloc_flag(pool_head_buffer, POOL_F_NO_POISON | POOL_F_NO_FAIL); \ + if (unlikely(!_area)) { \ + activity[tid].buf_wait++; \ + _retbuf = NULL; \ + } \ + else { \ + _retbuf->area = _area; \ + _retbuf->size = pool_head_buffer->size; \ + } \ + } \ + _retbuf; \ + }) + +/* Releases buffer <buf> (no check of emptiness). The buffer's head is marked + * empty. + */ +#define __b_free(_buf) \ + do { \ + char *area = (_buf)->area; \ + \ + /* let's first clear the area to save an occasional "show sess all" \ + * glancing over our shoulder from getting a dangling pointer. \ + */ \ + *(_buf) = BUF_NULL; \ + __ha_barrier_store(); \ + pool_free(pool_head_buffer, area); \ + } while (0) \ + +/* Releases buffer <buf> if allocated, and marks it empty. */ +#define b_free(_buf) \ + do { \ + if ((_buf)->size) \ + __b_free((_buf)); \ + } while (0) + +/* Offer one or multiple buffer currently belonging to target <from> to whoever + * needs one. Any pointer is valid for <from>, including NULL. Its purpose is + * to avoid passing a buffer to oneself in case of failed allocations (e.g. + * need two buffers, get one, fail, release it and wake up self again). In case + * of normal buffer release where it is expected that the caller is not waiting + * for a buffer, NULL is fine. It will wake waiters on the current thread only. + */ +void __offer_buffers(void *from, unsigned int count); + +static inline void offer_buffers(void *from, unsigned int count) +{ + if (!LIST_ISEMPTY(&th_ctx->buffer_wq)) + __offer_buffers(from, count); +} + + +#endif /* _HAPROXY_DYNBUF_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/errors.h b/include/haproxy/errors.h new file mode 100644 index 0000000..c102fed --- /dev/null +++ b/include/haproxy/errors.h @@ -0,0 +1,139 @@ +/* + * include/haproxy/errors.h + * Global error macros and constants + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_ERRORS_H +#define _HAPROXY_ERRORS_H + +#include <stdarg.h> +#include <stdio.h> + +#include <haproxy/buf-t.h> +#include <haproxy/obj_type-t.h> + +/* These flags may be used in various functions which are called from within + * loops (eg: to start all listeners from all proxies). They provide enough + * information to let the caller decide what to do. ERR_WARN and ERR_ALERT + * do not indicate any error, just that a message has been put in a shared + * buffer in order to be displayed by the caller. + */ +#define ERR_NONE 0x00 /* no error, no message returned */ +#define ERR_RETRYABLE 0x01 /* retryable error, may be cumulated */ +#define ERR_FATAL 0x02 /* fatal error, may be cumulated */ +#define ERR_ABORT 0x04 /* it's preferable to end any possible loop */ +#define ERR_WARN 0x08 /* a warning message has been returned */ +#define ERR_ALERT 0x10 /* an alert message has been returned */ + +#define ERR_CODE (ERR_RETRYABLE|ERR_FATAL|ERR_ABORT) /* mask */ + +extern struct ring *startup_logs; + +/* These codes may be used by config parsing functions which detect errors and + * which need to inform the upper layer about them. They are all prefixed with + * "PE_" for "Parse Error". These codes will probably be extended, and functions + * making use of them should be documented as such. Only code PE_NONE (zero) may + * indicate a valid condition, all other ones must be caught as errors, event if + * unknown by the caller. This must not be used to forward warnings. + */ +enum { + PE_NONE = 0, /* no error */ + PE_ENUM_OOR, /* enum data out of allowed range */ + PE_EXIST, /* trying to create something which already exists */ + PE_ARG_MISSING, /* mandatory argument not provided */ + PE_ARG_NOT_USED, /* argument provided cannot be used */ + PE_ARG_INVC, /* invalid char in argument (pointer not provided) */ + PE_ARG_INVC_PTR, /* invalid char in argument (pointer provided) */ + PE_ARG_NOT_FOUND, /* argument references something not found */ + PE_ARG_VALUE_OOR, /* argument value is out of range */ +}; + + +void usermsgs_clr(const char *prefix); +int usermsgs_empty(void); +const char *usermsgs_str(void); +extern uint tot_warnings; + +/************ Error reporting functions ***********/ + +struct usermsgs_ctx { + struct buffer str; + + const char *prefix; /* prefix of every output */ + const char *file; /* related filename for config parsing */ + int line; /* related line number for config parsing */ + enum obj_type *obj; /* related proxy, server, ... */ +}; +void set_usermsgs_ctx(const char *file, int line, enum obj_type *obj); +void register_parsing_obj(enum obj_type *obj); +void reset_usermsgs_ctx(void); + +/* + * Displays the message on stderr with the date and pid. Overrides the quiet + * mode during startup. + */ +void ha_alert(const char *fmt, ...) + __attribute__ ((format(printf, 1, 2))); + +/* + * Displays the message on stderr with the date and pid. + */ +void ha_warning(const char *fmt, ...) + __attribute__ ((format(printf, 1, 2))); + +/* + * These functions are reserved to output diagnostics on MODE_DIAG. + * Use the underscore variants only if MODE_DIAG has already been checked. + */ +void _ha_vdiag_warning(const char *fmt, va_list argp); +void _ha_diag_warning(const char *fmt, ...); +void ha_diag_warning(const char *fmt, ...) + __attribute__ ((format(printf, 1 ,2))); + +/* Check for both MODE_DIAG and <cond> before outputting a diagnostic warning */ +#define HA_DIAG_WARNING_COND(cond, fmt, ...) \ + do { \ + if ((global.mode & MODE_DIAG) && (cond)) \ + _ha_diag_warning((fmt), ##__VA_ARGS__); \ + } while (0) + +/* + * Displays the message on stderr with the date and pid. + */ +void ha_notice(const char *fmt, ...) + __attribute__ ((format(printf, 1, 2))); + +/* + * Displays the message on <out> only if quiet mode is not set. + */ +void qfprintf(FILE *out, const char *fmt, ...) + __attribute__ ((format(printf, 2, 3))); + +void startup_logs_init(); +struct ring *startup_logs_dup(struct ring *src); +void startup_logs_free(struct ring *r); + +#endif /* _HAPROXY_ERRORS_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/event_hdl-t.h b/include/haproxy/event_hdl-t.h new file mode 100644 index 0000000..d499852 --- /dev/null +++ b/include/haproxy/event_hdl-t.h @@ -0,0 +1,295 @@ +/* + * include/haproxy/event_hdl-t.h + * event handlers management definitions + * + * Copyright 2022 HAProxy Technologies + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_EVENT_HDL_T_H +# define _HAPROXY_EVENT_HDL_T_H + +#include <stdint.h> +#include <sys/time.h> + +#include <haproxy/api-t.h> + +/* event data struct are defined as followed */ +struct event_hdl_cb_data_template { + struct { + /* safe data can be safely used from both + * sync and async handlers + * data consistency is guaranteed + */ + } safe; + struct { + /* unsafe data may only be used from sync handlers: + * in async mode, data consistency cannot be guaranteed + * and unsafe data may already be stale, thus using + * it is highly discouraged because it + * could lead to undefined behavior (UAF, null dereference...) + */ + } unsafe; +}; + +/* event_hdl tunables */ +struct event_hdl_tune { + unsigned int max_events_at_once; +}; + +/* FIXME: adjust if needed! Should be large enough + * to support every struct event_hdl_cb_data_x types + * BUG_ON check in publish/async_mode and static assert + * in EVENT_HDL_CB_DATA will ensure this + */ +#define EVENT_HDL_ASYNC_EVENT_DATA (768) +/* used internally to store a single copy of event data when dealing with + * async handlers. + * The same copy can be provided to multiple handlers to prevent memory waste: + * refcount is used to keep track of references so that + * data can be freed when not used anymore + */ +typedef void (*event_hdl_data_free)(const void *data); +struct event_hdl_async_event_data +{ + /* internal storage */ + char data[EVENT_HDL_ASYNC_EVENT_DATA]; + /* user-provided free function if event data relies on + * dynamic members that require specific cleanup + */ + event_hdl_data_free mfree; + uint32_t refcount; +}; + +/* type for storing event subscription type */ +struct event_hdl_sub_type +{ + /* up to 256 families, non cumulative, adjust if needed */ + uint8_t family; + /* up to 16 sub types using bitmasks, adjust if needed */ + uint16_t subtype; +}; + +struct event_hdl_sub_list_head { + struct mt_list head; + struct mt_list known; /* api uses this to track known subscription lists */ +}; + +/* event_hdl_sub_list is an alias (please use this for portability) */ +typedef struct event_hdl_sub_list_head event_hdl_sub_list; + +struct event_hdl_async_equeue_head { + struct mt_list head; + uint32_t size; /* near realtime size, not fully synced with head (to be used as a hint) */ +}; + +/* event_hdl_async_equeue is an alias to mt_list (please use this for portability) */ +typedef struct event_hdl_async_equeue_head event_hdl_async_equeue; + +/* subscription mgmt from event */ +struct event_hdl_sub_mgmt +{ + /* manage subscriptions from event + * this must not be used directly because locking might be required + */ + struct event_hdl_sub *this; + /* safe functions than can be used from event context (sync and async mode) */ + struct event_hdl_sub_type (*getsub)(const struct event_hdl_sub_mgmt *); + int (*resub)(const struct event_hdl_sub_mgmt *, struct event_hdl_sub_type); + void (*unsub)(const struct event_hdl_sub_mgmt *); +}; + +/* single event structure pushed into async event queue + * used by tasks async handlers + */ +struct event_hdl_async_event +{ + struct mt_list mt_list; + struct event_hdl_sub_type type; + /* data wrapper - should not be used directly */ + struct event_hdl_async_event_data *_data; + /* for easy data access, + * points to _data->data if data is available + */ + void *data; + void *private; + struct timeval when; + struct event_hdl_sub_mgmt sub_mgmt; +}; + +/* internal structure provided to function event_hdl_publish() + * It contains ptr to data relevant to the event + */ +struct event_hdl_cb_data { + /* internal use: ptr to struct event_hdl_cb_data_type */ + void *_ptr; + /* internal use: holds actual data size*/ + size_t _size; + /* user specified freeing function for event_hdl_cb_data_type + * struct members + */ + event_hdl_data_free _mfree; +}; + +/* struct provided to event_hdl_cb_* handlers + * contains data related to the event + * that triggered the handler + */ +struct event_hdl_cb +{ + /* event type */ + struct event_hdl_sub_type e_type; + /* event data */ + void *e_data; + /* manage the subscription responsible for handing the event to us */ + const struct event_hdl_sub_mgmt *sub_mgmt; + + /* may be used by sync event handler to ensure + * it runs in sync mode, and thus is eligible to access unsafe data. + * This could save the day when users are copy-pasting function + * logic from a sync handler to an async handler without + * taking appropriate precautions and unsafe accesses are performed. + * (See EVENT_HDL_ASSERT_SYNC macro API helper) + */ + uint8_t _sync; +}; + +/* prototype for event_hdl_cb_sync function pointer */ +typedef void (*event_hdl_cb_sync)(const struct event_hdl_cb *cb, void *private); +/* prototype for event_hdl_cb async function pointer */ +typedef void (*event_hdl_cb_async)(const struct event_hdl_cb *cb, void *private); +/* prototype for event_hdl_private_free function pointer */ +typedef void (*event_hdl_private_free)(void *private); + +/* tasklet forward declaration */ +struct tasklet; +/* enum for sync mode */ +enum event_hdl_async_mode +{ + EVENT_HDL_ASYNC_MODE_NORMAL = 1, + EVENT_HDL_ASYNC_MODE_ADVANCED = 2 +}; + +/* event hdl, used when subscribing (and then associated with a subscription) */ +struct event_hdl { + /* optional unique id (hash) for lookup */ + uint64_t id; + /* handler debug: origin (initial event subscription calling place) */ + const char *dorigin; + /* handler requires async mode: + * EVENT_HDL_ASYNC_MODE_NORMAL = normal + * EVENT_HDL_ASYNC_MODE_ADVANCED = advanced, single task wakeup + */ + uint8_t async; + + union { + event_hdl_cb_sync sync_ptr; /* if !async */ + event_hdl_cb_async async_ptr; /* only used if async==1 (normal) */ + }; + + /* ptr to async task responsible for consuming events */ + struct tasklet *async_task; + /* used by async tasks to consume pending events */ + event_hdl_async_equeue *async_equeue; + /* function ptr automatically called by: + * async task when hdl is unregistered and private is no longer referenced + * sync context when unregistering is performed + */ + event_hdl_private_free private_free; + /* it is not safe to assume that private will not + * be used anymore once hdl is unregistered: + * with async handlers, private could still be referenced + * in pending events to be consumed later by the task (by design). + * If freeing private is needed, you must provide async_private_free + * function pointer when registering. + * It will be called when private is no longer used + * after unregistering hdl to perform private cleanup. + * (please use this even in sync mode so that subscription + * can easily be turned into async mode later without breaking stuff) + */ + void *private; +}; + +/* flags for event_hdl_sub struct (32 bits) */ +#define EHDL_SUB_F_PAUSED 0x0001 /* subscription will temporarily ignore events */ + +/* list elem: subscription (handler subscribed to specific events) + */ +struct event_hdl_sub { + struct mt_list mt_list; + /* event type subscription */ + struct event_hdl_sub_type sub; + uint32_t flags; + /* event handler */ + struct event_hdl hdl; + /* used to guarantee that END event will be delivered + * (memory is allocated when registering, no memory failure can occur at runtime) + */ + struct event_hdl_async_event *async_end; + /* > 0 : subscription is referenced, don't free yet + * use atomic OPS to write and read from it + */ + uint32_t refcount; + /* TODO: atomic_call_counter for stats?! */ +}; + +#define ESUB_INDEX(n) (1 << (n - 1)) + +#define EVENT_HDL_SUB_TYPE(_family, _type) ((struct event_hdl_sub_type){ .family = _family, .subtype = ESUB_INDEX(_type) }) +#define EVENT_HDL_SUB_FAMILY(_family) ((struct event_hdl_sub_type){ .family = _family, .subtype = ~0 }) + +#define EVENT_HDL_SUB_NONE ((struct event_hdl_sub_type){ .family = 0, .subtype = 0}) +/* for async tasks: subscription is ending */ +#define EVENT_HDL_SUB_END ((struct event_hdl_sub_type){ .family = 0, .subtype = 1}) + +/* --------------------------------------- */ + +/* user defined event types are listed here + * please reflect any change in these macros in the subtype map + * defined below that is used to perform string to event type and + * event type to string conversions + */ + +/* TODO */ + +/* SERVER FAMILY, provides event_hdl_cb_data_server struct + * (will be defined in haproxy/server-t.h) + */ +#define EVENT_HDL_SUB_SERVER EVENT_HDL_SUB_FAMILY(1) +#define EVENT_HDL_SUB_SERVER_ADD EVENT_HDL_SUB_TYPE(1,1) +#define EVENT_HDL_SUB_SERVER_DEL EVENT_HDL_SUB_TYPE(1,2) +#define EVENT_HDL_SUB_SERVER_UP EVENT_HDL_SUB_TYPE(1,3) +#define EVENT_HDL_SUB_SERVER_DOWN EVENT_HDL_SUB_TYPE(1,4) +/* server state change */ +#define EVENT_HDL_SUB_SERVER_STATE EVENT_HDL_SUB_TYPE(1,5) +/* server admin change */ +#define EVENT_HDL_SUB_SERVER_ADMIN EVENT_HDL_SUB_TYPE(1,6) +/* server check-related (agent or health) event */ +#define EVENT_HDL_SUB_SERVER_CHECK EVENT_HDL_SUB_TYPE(1,7) +/* server inet addr (addr:svc_port tuple) change event */ +#define EVENT_HDL_SUB_SERVER_INETADDR EVENT_HDL_SUB_TYPE(1,8) + +/* --------------------------------------- */ + +/* Please reflect changes above in event_hdl_sub_type_map defined + * in event_hdl.c file + */ +struct event_hdl_sub_type_map { + const char *name; + struct event_hdl_sub_type type; +}; + +#endif /* _HAPROXY_EVENT_HDL_T_H */ diff --git a/include/haproxy/event_hdl.h b/include/haproxy/event_hdl.h new file mode 100644 index 0000000..5a7ee66 --- /dev/null +++ b/include/haproxy/event_hdl.h @@ -0,0 +1,512 @@ +/* + * include/haproxy/event_hdl.h + * event handlers management + * + * Copyright 2022 HAProxy Technologies + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_EVENT_HDL_H +# define _HAPROXY_EVENT_HDL_H + +#include <haproxy/event_hdl-t.h> +#include <haproxy/list.h> + +/* preprocessor trick to extract function calling place + * __FILE__:__LINE__ + */ +#define _EVENT_HDL_CALLING_PLACE2(line) #line +#define _EVENT_HDL_CALLING_PLACE1(line) _EVENT_HDL_CALLING_PLACE2(line) +#define _EVENT_HDL_CALLING_PLACE __FILE__":"_EVENT_HDL_CALLING_PLACE1(__LINE__) + +/* ------ PUBLIC EVENT_HDL API ------ */ + +/* You will find a lot of useful information/comments in this file, but if you're looking + * for a step by step documentation please check out 'doc/internals/api/event_hdl.txt' + */ + +/* Note: API helper macros are used in this file to make event_hdl functions usage + * simpler, safer and more consistent between sync mode and async mode + */ + +/* ======================================= EVENT_HDL_SYNC handlers ===================================== + * must be used only with extreme precautions + * sync handlers are directly called under the function that published the event. + * Hence, all the processing done within such function will impact the caller. + * + * For this reason, you must be extremely careful when using sync mode, because trying to lock something + * that is already held by the caller, or depending on something external to the current thread will + * prevent the caller from running. + * + * Please consider using async handlers in this case, they are specifically made to solve this limitation. + * + * On the other hand, sync handlers are really useful when you directly depend on callers' provided data + * (example: pointer to data) or you need to perform something before the caller keeps going. + * A good example could be a cleanup function that will take care of freeing data, closing fds... related + * to event data before caller's flow keeps going (interrupting the process while dealing with the event). + */ + + +/* ===================================== EVENT_HDL_ASYNC handlers ====================================== + * async handlers are run in independent tasks, so that the caller (that published the event) can safely + * return to its own processing. + * + * async handlers may access safe event data safely with guaranteed consistency. + */ + + +/* ================================ IDENTIFIED vs ANONYMOUS EVENT_HDL ================================= + * When registering a sync or async event handler, you are free to provide a unique identifier (hash). + * + * id can be computed using event_hdl_id function. + * + * Not providing an id results in the subscription being considered as anonymous subscription. + * 0 is not a valid identifier (should be > 0) + * + * Identified subscription is guaranteed to be unique for a given subscription list, + * whereas anonymous subscriptions don't provide such guarantees. + * + * Identified subscriptions provide the ability to be later queried or unregistered from external code + * using dedicated id/hash for the lookups. + * + * On the other hand, anonymous subscriptions don't, the only other way to reference an anonymous subscription + * is to use a subscription pointer. + * + */ + +/* general purpose hashing function when you want to compute + * an ID based on <scope> x <name> + * It is your responsibility to make sure <scope> is not used + * elsewhere in the code (or that you are fine with sharing + * the scope). + */ +uint64_t event_hdl_id(const char *scope, const char *name); + +/* ------ EVENT SUBSCRIPTIONS FUNCTIONS ------ */ + +/* macro helper: + * sync version + * + * identified subscription + * + * <_id>: subscription id that could be used later + * to perform subscription lookup by id + * <func>: pointer to 'event_hdl_cb_sync' prototyped function + * <_private>: pointer to private data that will be handled to <func> + * <_private_free>: pointer to 'event_hdl_private_free' prototyped function + * that will be called with <private> when unsubscription is performed + */ +#define EVENT_HDL_ID_SYNC(_id, func, _private, _private_free) \ + (struct event_hdl){ .id = _id, \ + .dorigin = _EVENT_HDL_CALLING_PLACE, \ + .async = 0, \ + .sync_ptr = func, \ + .private = _private, \ + .private_free = _private_free } + +/* macro helper: + * sync version + * + * anonymous subscription (no lookup by id) + * + * <func>: pointer to 'event_hdl_cb_sync' prototyped function + * <_private>: pointer to private data that will be handled to <func> + * <_private_free>: pointer to 'event_hdl_private_free' prototyped function + * that will be called with <private> when unsubscription is performed + */ +#define EVENT_HDL_SYNC(func, _private, _private_free) \ + EVENT_HDL_ID_SYNC(0, func, _private, _private_free) + +/* macro helper: + * async version + * + * identified subscription + * + * <_id>: subscription id that could be used later + * to perform subscription lookup by id + * <func>: pointer to 'event_hdl_cb_sync' prototyped function + * <_private>: pointer to private data that will be handled to <func> + * <_private_free>: pointer to 'event_hdl_private_free' prototyped function + * that will be called with <private> after unsubscription is performed, + * when no more events can refer to <private>. + */ +#define EVENT_HDL_ID_ASYNC(_id, func, _private, _private_free) \ + (struct event_hdl){ .id = _id, \ + .dorigin = _EVENT_HDL_CALLING_PLACE, \ + .async = EVENT_HDL_ASYNC_MODE_NORMAL, \ + .async_ptr = func, \ + .private = _private, \ + .private_free = _private_free } + +/* macro helper: + * async version + * + * anonymous subscription (no lookup by id) + * + * <func>: pointer to 'event_hdl_cb_sync' prototyped function + * <_private>: pointer to private data that will be handled to <func> + * <_private_free>: pointer to 'event_hdl_private_free' prototyped function + * that will be called with <private> after unsubscription is performed, + * when no more events can refer to <private>. + */ +#define EVENT_HDL_ASYNC(func, _private, _private_free) \ + EVENT_HDL_ID_ASYNC(0, func, _private, _private_free) + +/* macro helper: + * async version + * same than EVENT_HDL_ID_ASYNC - advanced mode: + * you directly provide task and event_queue list. + * + * identified subscription + * + * <_id>: subscription id that could be used later + * to perform subscription lookup by id + * <equeue>: pointer to event_hdl_async_event queue where the pending + * events will be pushed. Cannot be NULL. + * <task>: pointer to task(let) responsible for consuming the events. +* Cannot be NULL. + * <_private>: pointer to private data that will be handled to <func> + * <_private_free>: pointer to 'event_hdl_private_free' prototyped function + * that will be called with <private> after unsubscription is performed, + * when no more events can refer to <private>. + */ +#define EVENT_HDL_ID_ASYNC_TASK(_id, equeue, task, _private, _private_free) \ + (struct event_hdl){ .id = _id, \ + .dorigin = _EVENT_HDL_CALLING_PLACE, \ + .async = EVENT_HDL_ASYNC_MODE_ADVANCED, \ + .async_task = (struct tasklet *)task, \ + .async_equeue = equeue, \ + .private = _private, \ + .private_free = _private_free } + +/* macro helper: + * async version + * same than EVENT_HDL_ASYNC - advanced mode: + * you directly provide task and event_queue list. + * + * anonymous subscription (no lookup by id) + * + * <equeue>: pointer to event_hdl_async_event queue where the pending + * events will be pushed. Cannot be NULL. + * <task>: pointer to task(let) responsible for consuming the events + * Cannot be NULL. + * <_private>: pointer to private data that will be handled to <func> + * <_private_free>: pointer to 'event_hdl_private_free' prototyped function + * that will be called with <private> after unsubscription is performed, + * when no more events can refer to <private>. + */ +#define EVENT_HDL_ASYNC_TASK(equeue, task, _private, _private_free) \ + EVENT_HDL_ID_ASYNC_TASK(0, equeue, task, _private, _private_free) + +/* register a new event subscription in <sub_list> + * that will handle <e_type> events + * + * This function requires you to use + * EVENT_HDL_(TASK_)(A)SYNC() EVENT_HDL_ID_(TASK_)(A)SYNC() (choose wisely) + * macro helpers to provide <hdl> argument + * + * If <sub_list> is not specified (equals NULL): + * global subscription list (process wide) will be used. + * + * For identified subscriptions (EVENT_HDL_ID_*), the function is safe against + * concurrent subscriptions attempts with the same ID: the ID will only be + * inserted once in the list and subsequent attempts will yield an error. + * However, trying to register the same ID multiple times is considered as + * an error (no specific error code is returned in this case) so the check should + * be performed by the caller if it is expected. (The caller must ensure that the ID + * is unique to prevent the error from being raised) + * + * Returns 1 in case of success, 0 in case of failure (invalid argument / memory error) + */ +int event_hdl_subscribe(event_hdl_sub_list *sub_list, + struct event_hdl_sub_type e_type, struct event_hdl hdl); + +/* same as event_hdl_subscribe, but + * returns the subscription ptr in case of success + * or NULL in case of failure + * subscription refcount is automatically incremented by 1 + * so that ptr remains valid while you use it. + * You must call event_hdl_drop() when you no longer + * use it or event_hdl_unsubscribe() to unregister the + * subscription + */ +struct event_hdl_sub *event_hdl_subscribe_ptr(event_hdl_sub_list *sub_list, + struct event_hdl_sub_type e_type, struct event_hdl hdl); + +/* update subscription type: + * if new type family does not match current family, does nothing + * only subtype update is supported + * Returns 1 for SUCCESS and 0 for FAILURE (update not supported) + */ +int event_hdl_resubscribe(struct event_hdl_sub *cur_sub, struct event_hdl_sub_type type); + +/* unregister an existing subscription <sub> + * will automatically call event_hdl_drop() + */ +void event_hdl_unsubscribe(struct event_hdl_sub *sub); + +/* decrease subscription refcount by 1 + * use this when you no longer use sub ptr + * provided by event_hdl_subscribe_ptr or + * to cancel previous event_hdl_take() + */ +void event_hdl_drop(struct event_hdl_sub *sub); + +/* increase subscription refcount by 1 + * event_hdl_drop is needed when ptr + * is not longer used + * or event_hdl_unsubscribe to end the subscription + */ +void event_hdl_take(struct event_hdl_sub *sub); + +/* ------ EVENT_HDL_LOOKUP: subscription lookup operations from external code ------ */ + +/* use this function to unregister the subscription <lookup_ip> + * within <sub_list> list. + * If <sub_list> is NULL, global subscription list will be used. + * Returns 1 for SUCCESS and 0 if not found + */ +int event_hdl_lookup_unsubscribe(event_hdl_sub_list *sub_list, + uint64_t lookup_id); + +/* use this function to update subscription by <lookup_id> within <sub_list> list + * if new type family does not match current family, does nothing + * only subtype update is supported + * If <sub_list> is NULL, global subscription list will be used. + * Returns 1 for SUCCESS and 0 if not found or not supported + */ +int event_hdl_lookup_resubscribe(event_hdl_sub_list *sub_list, + uint64_t lookup_id, struct event_hdl_sub_type type); + +/* use this function to get a new reference ptr to the subscription + * identified by <id> + * or event_hdl_unsubscribe to end the subscription + * If <sub_list> is NULL, global subscription list will be used. + * returns NULL if not found + * returned ptr should be called with event_hdl_drop when no longer used + */ +struct event_hdl_sub *event_hdl_lookup_take(event_hdl_sub_list *sub_list, + uint64_t lookup_id); + +/* pause an existing subscription <sub> + * the subscription will no longer receive events (reversible) + * This can be reverted thanks to _resume() function + */ +void event_hdl_pause(struct event_hdl_sub *sub); + +/* resume an existing subscription <sub> + * that was previously paused using _pause() function + */ +void event_hdl_resume(struct event_hdl_sub *sub); + +/* Same as event_hdl_pause() for identified subscriptions: + * use this function to pause the subscription <lookup_ip> + * within <sub_list> list. + * If <sub_list> is NULL, global subscription list will be used. + * Returns 1 for SUCCESS and 0 if not found + */ +int event_hdl_lookup_pause(event_hdl_sub_list *sub_list, + uint64_t lookup_id); + +/* Same as event_hdl_resume() for identified subscriptions: + * use this function to resume the subscription <lookup_ip> + * within <sub_list> list. + * If <sub_list> is NULL, global subscription list will be used. + * Returns 1 for SUCCESS and 0 if not found + */ +int event_hdl_lookup_resume(event_hdl_sub_list *sub_list, + uint64_t lookup_id); + +/* ------ PUBLISHING FUNCTIONS ------ */ + +/* this macro is provided as an internal helper to automatically populate + * data for fixed length structs as required by event_hdl publish function + */ +#define _EVENT_HDL_CB_DATA_ASSERT(size) \ + ({ \ + /* if this fails to compile \ + * it means you need to fix \ + * EVENT_HDL_ASYNC_EVENT_DATA \ + * size in event_hdl-t.h \ + */ \ + __attribute__((unused)) \ + char __static_assert[(size <= EVENT_HDL_ASYNC_EVENT_DATA) ? 1 : -1];\ + (size); \ + }) +#define _EVENT_HDL_CB_DATA(data,size,mfree) \ + (&(struct event_hdl_cb_data){ ._ptr = data, \ + ._size = size, \ + ._mfree = mfree }) + +/* Use this when 'safe' data is completely standalone */ +#define EVENT_HDL_CB_DATA(data) \ + _EVENT_HDL_CB_DATA(data, \ + _EVENT_HDL_CB_DATA_ASSERT(sizeof(*data)), \ + NULL) +/* Use this when 'safe' data points to dynamically allocated members + * that require freeing when the event is completely consumed + * (data in itself may be statically allocated as with + * EVENT_HDL_CB_DATA since the publish function will take + * care of copying it for async handlers) + * + * mfree function will be called with data as argument + * (or copy of data in async context) when the event is completely + * consumed (sync and async handlers included). This will give you + * enough context to perform the required cleanup steps. + * + * mfree should be prototyped like this: + * void (*mfree)(const void *data) + */ +#define EVENT_HDL_CB_DATA_DM(data, mfree) \ + _EVENT_HDL_CB_DATA(data, \ + _EVENT_HDL_CB_DATA_ASSERT(sizeof(*data)), \ + mfree) + +/* event publishing function + * this function should be called from anywhere in the code to notify + * about an <e_type> and provide some relevant <data> + * that will be provided to subscriptions in <sub_list> + * that are subscribed to <e_type>. + * <data> should be provided using EVENT_HDL_CB_DATA helper macro + * + * Example: + * struct event_hdl_cb_data_server cb_data; + * + * /... + * cb_data initialization + * .../ + * + * event_hdl_publish(NULL, EVENT_HDL_SUB_SERVER_UP, EVENT_HDL_CB_DATA(&cb_data)); + */ +int event_hdl_publish(event_hdl_sub_list *sub_list, + struct event_hdl_sub_type e_type, const struct event_hdl_cb_data *data); + +/* ------ MISC/HELPER FUNCTIONS ------ */ + +/* returns a statically allocated string that is + * the printable representation of <sub_type> + * or "N/A" if <sub_type> does not exist + */ +const char *event_hdl_sub_type_to_string(struct event_hdl_sub_type sub_type); + +/* returns the internal sub_type corresponding + * to the printable representation <name> + * or EVENT_HDL_SUB_NONE if no such event exists + * (see event_hdl-t.h for the complete list of supported types) + */ +struct event_hdl_sub_type event_hdl_string_to_sub_type(const char *name); + +/* Use this from sync hdl to ensure the function is executed + * in sync mode (and thus unsafe data is safe to use from this ctx) + * This macro is meant to prevent unsafe data access + * if code from sync function is copy pasted into + * async function (or if sync handler is changed + * to async handler without adapting the code) + * FIXME: do we BUG_ON, or simply warn and return from the function? + */ +#define EVENT_HDL_ASSERT_SYNC(cb) BUG_ON(!cb->_sync) + +/* check if a and b sub types are part of the same family */ +static inline int event_hdl_sub_family_equal(struct event_hdl_sub_type a, struct event_hdl_sub_type b) +{ + return (a.family == b.family); +} + +/* compares 2 event_hdl_sub_type structs + * returns 1 if equal, 0 if not equal + */ +static inline int event_hdl_sub_type_equal(struct event_hdl_sub_type a, struct event_hdl_sub_type b) +{ + return (a.family == b.family && a.subtype == b.subtype); +} + +/* performs subtraction between A and B event_hdl_sub_type + */ +static inline struct event_hdl_sub_type event_hdl_sub_type_del(struct event_hdl_sub_type a, struct event_hdl_sub_type b) +{ + if (unlikely(!a.family)) + a.family = b.family; + if (unlikely(a.family != b.family)) + return a; + a.subtype &= ~b.subtype; + + return a; +} + +/* performs addition between A and B event_hdl_sub_type + */ +static inline struct event_hdl_sub_type event_hdl_sub_type_add(struct event_hdl_sub_type a, struct event_hdl_sub_type b) +{ + if (unlikely(!a.family)) + a.family = b.family; + if (unlikely(a.family != b.family)) + return a; + a.subtype |= b.subtype; + + return a; +} + +/* use this function when you consumed an event in async handler + * (this will free the event so you must ensure that the event + * is already removed from the event queue and that you + * no longer make use of it) + */ +void event_hdl_async_free_event(struct event_hdl_async_event *e); + +/* use this for advanced async mode to initialize event queue */ +static inline void event_hdl_async_equeue_init(event_hdl_async_equeue *queue) +{ + MT_LIST_INIT(&queue->head); + queue->size = 0; +} + +/* use this for advanced async mode to pop an event from event queue */ +static inline struct event_hdl_async_event *event_hdl_async_equeue_pop(event_hdl_async_equeue *queue) +{ + struct event_hdl_async_event *event; + + event = MT_LIST_POP(&queue->head, struct event_hdl_async_event *, mt_list); + if (event) + HA_ATOMIC_DEC(&queue->size); + return event; +} + +/* use this for advanced async mode to check if the event queue is empty */ +static inline int event_hdl_async_equeue_isempty(event_hdl_async_equeue *queue) +{ + return MT_LIST_ISEMPTY(&queue->head); +} + +/* use this for advanced async mode to check if the event queue size */ +static inline uint32_t event_hdl_async_equeue_size(event_hdl_async_equeue *queue) +{ + return HA_ATOMIC_LOAD(&queue->size); +} + +/* use this to initialize <sub_list> event subscription list */ +void event_hdl_sub_list_init(event_hdl_sub_list *sub_list); + +/* use this function when you need to destroy <sub_list> + * event subscription list + * All subscriptions will be removed and properly freed according + * to their types + */ +void event_hdl_sub_list_destroy(event_hdl_sub_list *sub_list); + +/* event_hdl tunables */ +extern struct event_hdl_tune event_hdl_tune; + +#endif /* _HAPROXY_EVENT_HDL_H */ diff --git a/include/haproxy/extcheck.h b/include/haproxy/extcheck.h new file mode 100644 index 0000000..233d7c5 --- /dev/null +++ b/include/haproxy/extcheck.h @@ -0,0 +1,49 @@ +/* + * include/haproxy/extchecks.h + * Functions prototypes for the external checks. + * + * Copyright 2000-2009,2020 Willy Tarreau <w@1wt.eu> + * Copyright 2014 Horms Solutions Ltd, Simon Horman <horms@verge.net.au> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_EXTCHECK_H +#define _HAPROXY_EXTCHECK_H + +#include <haproxy/check-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/task-t.h> + +struct task *process_chk_proc(struct task *t, void *context, unsigned int state); +int prepare_external_check(struct check *check); +int init_pid_list(void); + +int proxy_parse_extcheck(char **args, int section, struct proxy *curpx, + struct proxy *defpx, const char *file, int line, + char **errmsg); + +int proxy_parse_external_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx, + const char *file, int line); + + +#endif /* _HAPROXY_EXTCHECK_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/fcgi-app-t.h b/include/haproxy/fcgi-app-t.h new file mode 100644 index 0000000..fb6ab27 --- /dev/null +++ b/include/haproxy/fcgi-app-t.h @@ -0,0 +1,123 @@ +/* + * include/haproxy/fcgi-app-t.h + * This file defines everything related to FCGI applications. + * + * Copyright (C) 2019 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HTTP_FCGI_T_H +#define _HAPROXY_HTTP_FCGI_T_H + +#include <import/ebtree-t.h> +#include <import/ist.h> + +#include <haproxy/acl-t.h> +#include <haproxy/api-t.h> +#include <haproxy/arg-t.h> +#include <haproxy/fcgi.h> +#include <haproxy/filters-t.h> +#include <haproxy/regex-t.h> + +#define FCGI_APP_FL_KEEP_CONN 0x00000001 /* Keep the connection alive */ +#define FCGI_APP_FL_GET_VALUES 0x00000002 /* Retrieve FCGI variables on connection establishment */ +#define FCGI_APP_FL_MPXS_CONNS 0x00000004 /* FCGI APP supports connection multiplexing */ + + +enum fcgi_rule_type { + FCGI_RULE_SET_PARAM = 0, + FCGI_RULE_UNSET_PARAM, + FCGI_RULE_PASS_HDR, + FCGI_RULE_HIDE_HDR, +}; + +/* Used during configuration parsing only and converted into fcgi_rule when + * filter is created. + */ +struct fcgi_rule_conf { + enum fcgi_rule_type type; + char *name; + char *value; + struct acl_cond *cond; /* acl condition to set/unset the param */ + struct list list; +}; + +/* parameter rule evaluated during request analyzis */ +struct fcgi_rule { + enum fcgi_rule_type type; + struct ist name; /* name of the parameter/header */ + struct list value; /* log-format compatible expression, may be empty */ + struct acl_cond *cond; /* acl condition to set the param */ + struct list list; +}; + +/* parameter rule to set/unset a param at the end of the analyzis */ +struct fcgi_param_rule { + struct ist name; + struct list *value; /* if empty , unset the parameter */ + struct ebpt_node node; +}; + +/* header rule to pass/hide a header at the end of the analyzis */ +struct fcgi_hdr_rule { + struct ist name; + int pass; /* 1 to pass the header, 0 Otherwise */ + struct ebpt_node node; +}; + +struct fcgi_app { + char *name; /* name to identify this set of params */ + struct ist docroot; /* FCGI docroot */ + struct ist index; /* filename to append to URI ending by a '/' */ + struct my_regex *pathinfo_re; /* Regex to use to split scriptname and path-info */ + unsigned int flags; /* FCGI_APP_FL_* */ + struct list loggers; /* one per 'log' directive */ + unsigned int maxreqs; /* maximum number of concurrent requests */ + + struct list acls; /* list of acls declared for this application */ + + struct { + char *file; /* file where the section appears */ + int line; /* line where the section appears */ + struct list rules; /* list of rules used during config parsing */ + struct arg_list args; /* sample arg list that need to be resolved */ + } conf; /* config information */ + struct fcgi_app *next; /* used to chain fcgi-app */ +}; + +/* FCGI config attached to backend proxies */ +struct fcgi_flt_conf { + char *name; /* fcgi-app name used during config parsing */ + struct fcgi_app *app; /* configuration of the fcgi application */ + + struct list param_rules; /* list of set/unset rules */ + struct list hdr_rules; /* list of pass/add rules */ +}; + +/* FCGI context attached to streames */ +struct fcgi_flt_ctx { + struct filter *filter; + struct fcgi_app *app; +}; + +#endif /* _HAPROXY_HTTP_FCGI_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/fcgi-app.h b/include/haproxy/fcgi-app.h new file mode 100644 index 0000000..99f0d58 --- /dev/null +++ b/include/haproxy/fcgi-app.h @@ -0,0 +1,42 @@ +/* + * include/haproxy/fcgi-app.h + * This file defines function prototypes for FCGI applications. + * + * Copyright (C) 2019 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HTTP_FCGI_H +#define _HAPROXY_HTTP_FCGI_H + +#include <haproxy/fcgi-app-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/stream-t.h> + +struct fcgi_app *fcgi_app_find_by_name(const char *name); +struct fcgi_flt_conf *find_px_fcgi_conf(struct proxy *px); +struct fcgi_flt_ctx *find_strm_fcgi_ctx(struct stream *s); +struct fcgi_app *get_px_fcgi_app(struct proxy *px); +struct fcgi_app *get_strm_fcgi_app(struct stream *s); + +#endif /* _HAPROXY_HTTP_FCGI_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/fcgi.h b/include/haproxy/fcgi.h new file mode 100644 index 0000000..e276d69 --- /dev/null +++ b/include/haproxy/fcgi.h @@ -0,0 +1,133 @@ +/* + * include/haproxy/fcgi.h + * This file contains FastCGI protocol definitions. + * + * Copyright (C) 2019 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_FCGI_H +#define _HAPROXY_FCGI_H + +#include <import/ist.h> +#include <haproxy/api.h> +#include <haproxy/buf-t.h> + +/* FCGI protocol version */ +#define FCGI_VERSION 0x1 + +/* flags for FCGI_BEGIN_REQUEST records */ +#define FCGI_KEEP_CONN 0x01 + +/* FCGI record's type */ +enum fcgi_record_type { + FCGI_BEGIN_REQUEST = 1, + FCGI_ABORT_REQUEST = 2, + FCGI_END_REQUEST = 3, + FCGI_PARAMS = 4, + FCGI_STDIN = 5, + FCGI_STDOUT = 6, + FCGI_STDERR = 7, + FCGI_DATA = 8, + FCGI_GET_VALUES = 9, + FCGI_GET_VALUES_RESULT = 10, + FCGI_UNKNOWN_TYPE = 11, + FCGI_ENTRIES +} __attribute__((packed)); + + +enum fcgi_role { + FCGI_RESPONDER = 1, + FCGI_AUTHORIZER = 2, /* Unsupported */ + FCGI_FILTER = 3, /* Unsupported */ +} __attribute__((packed)); + +/* Protocol status */ +enum fcgi_proto_status { + FCGI_PS_REQUEST_COMPLETE = 0, + FCGI_PS_CANT_MPX_CONN = 1, + FCGI_PS_OVERLOADED = 2, + FCGI_PS_UNKNOWN_ROLE = 3, + FCGI_PS_ENTRIES, +} __attribute__((packed)); + +struct fcgi_header { + uint8_t vsn; + uint8_t type; + uint16_t id; + uint16_t len; + uint8_t padding; + uint8_t rsv; +}; + +struct fcgi_param { + struct ist n; + struct ist v; +}; + +struct fcgi_begin_request { + enum fcgi_role role; + uint8_t flags; +}; + +struct fcgi_end_request { + uint32_t status; + uint8_t errcode; +}; + +struct fcgi_unknown_type { + uint8_t type; +}; + + +static inline const char *fcgi_rt_str(int type) +{ + switch (type) { + case FCGI_BEGIN_REQUEST : return "BEGIN_REQUEST"; + case FCGI_ABORT_REQUEST : return "ABORT_REQUEST"; + case FCGI_END_REQUEST : return "END_REQUEST"; + case FCGI_PARAMS : return "PARAMS"; + case FCGI_STDIN : return "STDIN"; + case FCGI_STDOUT : return "STDOUT"; + case FCGI_STDERR : return "STDERR"; + case FCGI_DATA : return "DATA"; + case FCGI_GET_VALUES : return "GET_VALUES"; + case FCGI_GET_VALUES_RESULT : return "GET_VALUES_RESULT"; + case FCGI_UNKNOWN_TYPE : return "UNKNOWN_TYPE"; + default : return "_UNKNOWN_"; + } +} + + +int fcgi_encode_record_hdr(struct buffer *out, const struct fcgi_header *h); +size_t fcgi_decode_record_hdr(const struct buffer *in, size_t o, struct fcgi_header *h); + +int fcgi_encode_begin_request(struct buffer *out, const struct fcgi_begin_request *r); + +int fcgi_encode_param(struct buffer *out, const struct fcgi_param *p); +size_t fcgi_decode_param(const struct buffer *in, size_t o, struct fcgi_param *p); +size_t fcgi_aligned_decode_param(const struct buffer *in, size_t o, struct fcgi_param *p); + +size_t fcgi_decode_end_request(const struct buffer *in, size_t o, struct fcgi_end_request *r); + +#endif /* _HAPROXY_FCGI_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/fd-t.h b/include/haproxy/fd-t.h new file mode 100644 index 0000000..c5e94cb --- /dev/null +++ b/include/haproxy/fd-t.h @@ -0,0 +1,251 @@ +/* + * include/haproxy/fd-t.h + * File descriptors states - check src/fd.c for explanations. + * + * Copyright (C) 2000-2014 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_FD_T_H +#define _HAPROXY_FD_T_H + +#include <haproxy/api-t.h> +#include <haproxy/port_range-t.h> +#include <haproxy/show_flags-t.h> + +/* Direction for each FD event update */ +enum { + DIR_RD=0, + DIR_WR=1, +}; + + +/* fdtab[].state is a composite state describing what is known about the FD. + * For now, the following information are stored in it: + * - event configuration and status for each direction (R,W) split into + * active, ready, shutdown categories (FD_EV_*). These are known by their + * bit values as well so that test-and-set bit operations are possible. + * + * - last known polling status (FD_POLL_*). For ease of troubleshooting, + * avoid visually mixing these ones with the other ones above. 3 of these + * flags are updated on each poll() report (FD_POLL_IN, FD_POLL_OUT, + * FD_POLL_PRI). FD_POLL_HUP and FD_POLL_ERR are "sticky" in that once they + * are reported, they will not be cleared until the FD is closed. + */ + +/* bits positions for a few flags */ +#define FD_EV_ACTIVE_R_BIT 0 +#define FD_EV_READY_R_BIT 1 +#define FD_EV_SHUT_R_BIT 2 +/* unused: 3 */ + +#define FD_EV_ACTIVE_W_BIT 4 +#define FD_EV_READY_W_BIT 5 +#define FD_EV_SHUT_W_BIT 6 +#define FD_EV_ERR_RW_BIT 7 + +#define FD_POLL_IN_BIT 8 +#define FD_POLL_PRI_BIT 9 +#define FD_POLL_OUT_BIT 10 +#define FD_POLL_ERR_BIT 11 +#define FD_POLL_HUP_BIT 12 + +/* info/config bits */ +#define FD_LINGER_RISK_BIT 16 /* must kill lingering before closing */ +#define FD_CLONED_BIT 17 /* cloned socket, requires EPOLL_CTL_DEL on close */ +#define FD_INITIALIZED_BIT 18 /* init phase was done (e.g. output pipe set non-blocking) */ +#define FD_ET_POSSIBLE_BIT 19 /* edge-triggered is possible on this FD */ +#define FD_EXPORTED_BIT 20 /* FD is exported and must not be closed */ +#define FD_EXCL_SYSCALL_BIT 21 /* a syscall claims exclusivity on this FD */ +#define FD_DISOWN_BIT 22 /* this fd will be closed by some external code */ +#define FD_MUST_CLOSE_BIT 23 /* this fd will be closed by some external code */ + + +/* and flag values */ +#define FD_EV_ACTIVE_R (1U << FD_EV_ACTIVE_R_BIT) +#define FD_EV_ACTIVE_W (1U << FD_EV_ACTIVE_W_BIT) +#define FD_EV_ACTIVE_RW (FD_EV_ACTIVE_R | FD_EV_ACTIVE_W) + +#define FD_EV_READY_R (1U << FD_EV_READY_R_BIT) +#define FD_EV_READY_W (1U << FD_EV_READY_W_BIT) +#define FD_EV_READY_RW (FD_EV_READY_R | FD_EV_READY_W) + +/* note that when FD_EV_SHUT is set, ACTIVE and READY are cleared */ +#define FD_EV_SHUT_R (1U << FD_EV_SHUT_R_BIT) +#define FD_EV_SHUT_W (1U << FD_EV_SHUT_W_BIT) +#define FD_EV_SHUT_RW (FD_EV_SHUT_R | FD_EV_SHUT_W) + +/* note that when FD_EV_ERR is set, SHUT is also set. Also, ERR is for both + * directions at once (write error, socket dead, etc). + */ +#define FD_EV_ERR_RW (1U << FD_EV_ERR_RW_BIT) + +/* mask covering all use cases above */ +#define FD_EV_ANY (FD_EV_ACTIVE_RW | FD_EV_READY_RW | FD_EV_SHUT_RW | FD_EV_ERR_RW) + +/* polling status */ +#define FD_POLL_IN (1U << FD_POLL_IN_BIT) +#define FD_POLL_PRI (1U << FD_POLL_PRI_BIT) +#define FD_POLL_OUT (1U << FD_POLL_OUT_BIT) +#define FD_POLL_ERR (1U << FD_POLL_ERR_BIT) +#define FD_POLL_HUP (1U << FD_POLL_HUP_BIT) +#define FD_POLL_UPDT_MASK (FD_POLL_IN | FD_POLL_PRI | FD_POLL_OUT) +#define FD_POLL_ANY_MASK (FD_POLL_IN | FD_POLL_PRI | FD_POLL_OUT | FD_POLL_ERR | FD_POLL_HUP) + +/* information/configuration flags */ +#define FD_LINGER_RISK (1U << FD_LINGER_RISK_BIT) +#define FD_CLONED (1U << FD_CLONED_BIT) +#define FD_INITIALIZED (1U << FD_INITIALIZED_BIT) +#define FD_ET_POSSIBLE (1U << FD_ET_POSSIBLE_BIT) +#define FD_EXPORTED (1U << FD_EXPORTED_BIT) +#define FD_EXCL_SYSCALL (1U << FD_EXCL_SYSCALL_BIT) +#define FD_DISOWN (1U << FD_DISOWN_BIT) +#define FD_MUST_CLOSE (1U << FD_MUST_CLOSE_BIT) + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *fd_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(FD_EV_ACTIVE_R, _(FD_EV_ACTIVE_W, _(FD_EV_READY_R, _(FD_EV_READY_W, + _(FD_EV_SHUT_R, _(FD_EV_SHUT_W, _(FD_EV_ERR_RW, _(FD_POLL_IN, + _(FD_POLL_PRI, _(FD_POLL_OUT, _(FD_POLL_ERR, _(FD_POLL_HUP, + _(FD_LINGER_RISK, _(FD_CLONED, _(FD_INITIALIZED, _(FD_ET_POSSIBLE, + _(FD_EXPORTED, _(FD_EXCL_SYSCALL, _(FD_DISOWN))))))))))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + +/* FD update status after fd_update_events() */ +enum { + FD_UPDT_DONE = 0, // update done, nothing else to be done + FD_UPDT_CLOSED, // FD was closed + FD_UPDT_MIGRATED, // FD was migrated, ignore it now +}; + +/* This is the value used to mark a file descriptor as dead. This value is + * negative, this is important so that tests on fd < 0 properly match. It + * also has the nice property of being highly negative but neither overflowing + * nor changing sign on 32-bit machines when multiplied by sizeof(fdtab). + * This ensures that any unexpected dereference of such an uninitialized + * file descriptor will lead to so large a dereference that it will crash + * the process at the exact location of the bug with a clean stack trace + * instead of causing silent manipulation of other FDs. And it's readable + * when found in a dump. + */ +#define DEAD_FD_MAGIC 0xFDDEADFD + +/* fdlist_entry: entry used by the fd cache. + * >= 0 means we're in the cache and gives the FD of the next in the cache, + * -1 means we're in the cache and the last element, + * -2 means the entry is locked, + * <= -3 means not in the cache, and next element is -4-fd + * + * It must remain 8-aligned so that aligned CAS operations may be done on both + * entries at once. + */ +struct fdlist_entry { + int next; + int prev; +} ALIGNED(8); + +/* head of the fd cache, per-group */ +struct fdlist { + int first; + int last; +} ALIGNED(64); + +/* info about one given fd. Note: only align on cache lines when using threads; + * 32-bit small archs can put everything in 32-bytes when threads are disabled. + * refc_tgid is an atomic 32-bit composite value made of 16 higher bits + * containing a refcount on tgid and the running_mask, and 16 lower bits + * containing a thread group ID and a lock bit on the 16th. The tgid may only + * be changed when refc is zero and running may only be checked/changed when + * refc is held and shows the reader is alone. An FD with tgid zero belongs to + * nobody. + */ +struct fdtab { + unsigned long running_mask; /* mask of thread IDs currently using the fd */ + unsigned long thread_mask; /* mask of thread IDs authorized to process the fd */ + unsigned long update_mask; /* mask of thread IDs having an update for fd */ + struct fdlist_entry update; /* Entry in the global update list */ + void (*iocb)(int fd); /* I/O handler */ + void *owner; /* the connection or listener associated with this fd, NULL if closed */ + unsigned int state; /* FD state for read and write directions (FD_EV_*) + FD_POLL_* */ + unsigned int refc_tgid; /* refcounted tgid, updated atomically */ +#ifdef DEBUG_FD + unsigned int event_count; /* number of events reported */ +#endif +} THREAD_ALIGNED(64); + +/* polled mask, one bit per thread and per direction for each FD */ +struct polled_mask { + unsigned long poll_recv; + unsigned long poll_send; +}; + +/* less often used information */ +struct fdinfo { + struct port_range *port_range; /* optional port range to bind to */ + int local_port; /* optional local port */ +}; + +/* + * Poller descriptors. + * - <name> is initialized by the poller's register() function, and should not + * be allocated, just linked to. + * - <pref> is initialized by the poller's register() function. It is set to 0 + * by default, meaning the poller is disabled. init() should set it to 0 in + * case of failure. term() must set it to 0. A generic unoptimized select() + * poller should set it to 100. + * - <private> is initialized by the poller's init() function, and cleaned by + * the term() function. + * - clo() should be used to do indicate the poller that fd will be closed. + * - poll() calls the poller, expiring at <exp>, or immediately if <wake> is set + * - flags indicate what the poller supports (HAP_POLL_F_*) + */ + +#define HAP_POLL_F_RDHUP 0x00000001 /* the poller notifies of HUP with reads */ +#define HAP_POLL_F_ERRHUP 0x00000002 /* the poller reports ERR and HUP */ + +struct poller { + void *private; /* any private data for the poller */ + void (*clo)(const int fd); /* mark <fd> as closed */ + void (*poll)(struct poller *p, int exp, int wake); /* the poller itself */ + int (*init)(struct poller *p); /* poller initialization */ + void (*term)(struct poller *p); /* termination of this poller */ + int (*test)(struct poller *p); /* pre-init check of the poller */ + int (*fork)(struct poller *p); /* post-fork re-opening */ + const char *name; /* poller name */ + unsigned int flags; /* HAP_POLL_F_* */ + int pref; /* try pollers with higher preference first */ +}; + +#endif /* _HAPROXY_FD_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/fd.h b/include/haproxy/fd.h new file mode 100644 index 0000000..11212ff --- /dev/null +++ b/include/haproxy/fd.h @@ -0,0 +1,542 @@ +/* + * include/haproxy/fd.h + * File descriptors states - exported variables and functions + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_FD_H +#define _HAPROXY_FD_H + +#include <sys/time.h> +#include <sys/types.h> +#include <stdio.h> +#include <unistd.h> +#include <import/ist.h> +#include <haproxy/api.h> +#include <haproxy/atomic.h> +#include <haproxy/fd-t.h> +#include <haproxy/global.h> +#include <haproxy/thread.h> + +/* public variables */ + +extern struct poller cur_poller; /* the current poller */ +extern int nbpollers; +extern struct poller pollers[MAX_POLLERS]; /* all registered pollers */ +extern struct fdtab *fdtab; /* array of all the file descriptors */ +extern struct fdinfo *fdinfo; /* less-often used infos for file descriptors */ +extern int totalconn; /* total # of terminated sessions */ +extern int actconn; /* # of active sessions */ + +extern volatile struct fdlist update_list[MAX_TGROUPS]; +extern struct polled_mask *polled_mask; + +extern THREAD_LOCAL int *fd_updt; // FD updates list +extern THREAD_LOCAL int fd_nbupdt; // number of updates in the list + +extern int poller_wr_pipe[MAX_THREADS]; + +extern volatile int ha_used_fds; // Number of FDs we're currently using + +/* Deletes an FD from the fdsets. + * The file descriptor is also closed. + */ +void fd_delete(int fd); +void _fd_delete_orphan(int fd); + +/* makes the new fd non-blocking and clears all other O_* flags; + * this is meant to be used on new FDs. Returns -1 on failure. + */ +int fd_set_nonblock(int fd); + +/* makes the fd close-on-exec; returns -1 on failure. */ +int fd_set_cloexec(int fd); + +/* Migrate a FD to a new thread <new_tid>. */ +void fd_migrate_on(int fd, uint new_tid); + +/* + * Take over a FD belonging to another thread. + * Returns 0 on success, and -1 on failure. + */ +int fd_takeover(int fd, void *expected_owner); + +ssize_t fd_write_frag_line(int fd, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg, int nl); + +/* close all FDs starting from <start> */ +void my_closefrom(int start); + +struct rlimit; +int raise_rlim_nofile(struct rlimit *old_limit, struct rlimit *new_limit); + +int compute_poll_timeout(int next); +void fd_leaving_poll(int wait_time, int status); + +/* disable the specified poller */ +void disable_poller(const char *poller_name); + +void poller_pipe_io_handler(int fd); + +/* + * Initialize the pollers till the best one is found. + * If none works, returns 0, otherwise 1. + * The pollers register themselves just before main() is called. + */ +int init_pollers(void); + +/* + * Deinitialize the pollers. + */ +void deinit_pollers(void); + +/* + * Some pollers may lose their connection after a fork(). It may be necessary + * to create initialize part of them again. Returns 0 in case of failure, + * otherwise 1. The fork() function may be NULL if unused. In case of error, + * the the current poller is destroyed and the caller is responsible for trying + * another one by calling init_pollers() again. + */ +int fork_poller(void); + +/* + * Lists the known pollers on <out>. + * Should be performed only before initialization. + */ +int list_pollers(FILE *out); + +/* + * Runs the polling loop + */ +void run_poller(); + +void fd_add_to_fd_list(volatile struct fdlist *list, int fd); +void fd_rm_from_fd_list(volatile struct fdlist *list, int fd); +void updt_fd_polling(const int fd); +int fd_update_events(int fd, uint evts); +void fd_reregister_all(int tgrp, ulong mask); + +/* Called from the poller to acknowledge we read an entry from the global + * update list, to remove our bit from the update_mask, and remove it from + * the list if we were the last one. + */ +static inline void done_update_polling(int fd) +{ + unsigned long update_mask; + + update_mask = _HA_ATOMIC_AND_FETCH(&fdtab[fd].update_mask, ~ti->ltid_bit); + while ((update_mask & _HA_ATOMIC_LOAD(&tg->threads_enabled)) == 0) { + /* If we were the last one that had to update that entry, remove it from the list */ + fd_rm_from_fd_list(&update_list[tgid - 1], fd); + update_mask = _HA_ATOMIC_LOAD(&fdtab[fd].update_mask); + if ((update_mask & _HA_ATOMIC_LOAD(&tg->threads_enabled)) != 0) { + /* Maybe it's been re-updated in the meanwhile, and we + * wrongly removed it from the list, if so, re-add it + */ + fd_add_to_fd_list(&update_list[tgid - 1], fd); + update_mask = _HA_ATOMIC_LOAD(&fdtab[fd].update_mask); + /* And then check again, just in case after all it + * should be removed, even if it's very unlikely, given + * the current thread wouldn't have been able to take + * care of it yet */ + } else + break; + } +} + +/* + * returns true if the FD is active for recv + */ +static inline int fd_recv_active(const int fd) +{ + return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_R; +} + +/* + * returns true if the FD is ready for recv + */ +static inline int fd_recv_ready(const int fd) +{ + return (unsigned)fdtab[fd].state & FD_EV_READY_R; +} + +/* + * returns true if the FD is active for send + */ +static inline int fd_send_active(const int fd) +{ + return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_W; +} + +/* + * returns true if the FD is ready for send + */ +static inline int fd_send_ready(const int fd) +{ + return (unsigned)fdtab[fd].state & FD_EV_READY_W; +} + +/* + * returns true if the FD is active for recv or send + */ +static inline int fd_active(const int fd) +{ + return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_RW; +} + +/* Disable processing recv events on fd <fd> */ +static inline void fd_stop_recv(int fd) +{ + if (!(fdtab[fd].state & FD_EV_ACTIVE_R) || + !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_ACTIVE_R_BIT)) + return; +} + +/* Disable processing send events on fd <fd> */ +static inline void fd_stop_send(int fd) +{ + if (!(fdtab[fd].state & FD_EV_ACTIVE_W) || + !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_ACTIVE_W_BIT)) + return; +} + +/* Disable processing of events on fd <fd> for both directions. */ +static inline void fd_stop_both(int fd) +{ + uint old, new; + + old = fdtab[fd].state; + do { + if (!(old & FD_EV_ACTIVE_RW)) + return; + new = old & ~FD_EV_ACTIVE_RW; + } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new))); +} + +/* Report that FD <fd> cannot receive anymore without polling (EAGAIN detected). */ +static inline void fd_cant_recv(const int fd) +{ + /* marking ready never changes polled status */ + if (!(fdtab[fd].state & FD_EV_READY_R) || + !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_READY_R_BIT)) + return; +} + +/* Report that FD <fd> may receive again without polling. */ +static inline void fd_may_recv(const int fd) +{ + /* marking ready never changes polled status */ + if ((fdtab[fd].state & FD_EV_READY_R) || + HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_R_BIT)) + return; +} + +/* Report that FD <fd> may receive again without polling but only if its not + * active yet. This is in order to speculatively try to enable I/Os when it's + * highly likely that these will succeed, but without interfering with polling. + */ +static inline void fd_cond_recv(const int fd) +{ + if ((fdtab[fd].state & (FD_EV_ACTIVE_R|FD_EV_READY_R)) == 0) + HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_R_BIT); +} + +/* Report that FD <fd> may send again without polling but only if its not + * active yet. This is in order to speculatively try to enable I/Os when it's + * highly likely that these will succeed, but without interfering with polling. + */ +static inline void fd_cond_send(const int fd) +{ + if ((fdtab[fd].state & (FD_EV_ACTIVE_W|FD_EV_READY_W)) == 0) + HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_W_BIT); +} + +/* Report that FD <fd> may receive and send without polling. Used at FD + * initialization. + */ +static inline void fd_may_both(const int fd) +{ + HA_ATOMIC_OR(&fdtab[fd].state, FD_EV_READY_RW); +} + +/* Report that FD <fd> cannot send anymore without polling (EAGAIN detected). */ +static inline void fd_cant_send(const int fd) +{ + /* removing ready never changes polled status */ + if (!(fdtab[fd].state & FD_EV_READY_W) || + !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_READY_W_BIT)) + return; +} + +/* Report that FD <fd> may send again without polling (EAGAIN not detected). */ +static inline void fd_may_send(const int fd) +{ + /* marking ready never changes polled status */ + if ((fdtab[fd].state & FD_EV_READY_W) || + HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_W_BIT)) + return; +} + +/* Prepare FD <fd> to try to receive */ +static inline void fd_want_recv(int fd) +{ + if ((fdtab[fd].state & FD_EV_ACTIVE_R) || + HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_ACTIVE_R_BIT)) + return; + updt_fd_polling(fd); +} + +/* Prepare FD <fd> to try to receive, and only create update if fd_updt exists + * (essentially for receivers during early boot). + */ +static inline void fd_want_recv_safe(int fd) +{ + if ((fdtab[fd].state & FD_EV_ACTIVE_R) || + HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_ACTIVE_R_BIT)) + return; + if (fd_updt) + updt_fd_polling(fd); +} + +/* Prepare FD <fd> to try to send */ +static inline void fd_want_send(int fd) +{ + if ((fdtab[fd].state & FD_EV_ACTIVE_W) || + HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_ACTIVE_W_BIT)) + return; + updt_fd_polling(fd); +} + +/* returns the tgid from an fd (masks the refcount) */ +static forceinline int fd_tgid(int fd) +{ + return _HA_ATOMIC_LOAD(&fdtab[fd].refc_tgid) & 0xFFFF; +} + +/* Release a tgid previously taken by fd_grab_tgid() */ +static forceinline void fd_drop_tgid(int fd) +{ + HA_ATOMIC_SUB(&fdtab[fd].refc_tgid, 0x10000); +} + +/* Unlock a tgid currently locked by fd_lock_tgid(). This will effectively + * allow threads from the FD's tgid to check the masks and manipulate the FD. + */ +static forceinline void fd_unlock_tgid(int fd) +{ + HA_ATOMIC_AND(&fdtab[fd].refc_tgid, 0xffff7fffU); +} + +/* Switch the FD's TGID to the new value with a refcount of 1 and the lock bit + * set. It doesn't care about the current TGID, except that it will wait for + * the FD not to be already switching and having its refcount cleared. After + * the function returns, the caller is free to manipulate the masks, and it + * must call fd_unlock_tgid() to drop the lock, allowing threads from the + * designated group to use the FD. Finally a call to fd_drop_tgid() will be + * needed to drop the reference. + */ +static inline void fd_lock_tgid(int fd, uint desired_tgid) +{ + uint old; + + BUG_ON(!desired_tgid); + + old = tgid; // assume we start from the caller's tgid + desired_tgid |= 0x18000; // refcount=1, lock bit=1. + + while (1) { + old &= 0x7fff; // expect no lock and refcount==0 + if (_HA_ATOMIC_CAS(&fdtab[fd].refc_tgid, &old, desired_tgid)) + break; + __ha_cpu_relax(); + } +} + +/* Grab a reference to the FD's TGID, and return the tgid. Note that a TGID of + * zero indicates the FD was closed, thus also fails (i.e. no need to drop it). + * On non-zero (success), the caller must release it using fd_drop_tgid(). + */ +static inline uint fd_take_tgid(int fd) +{ + uint old; + + old = _HA_ATOMIC_FETCH_ADD(&fdtab[fd].refc_tgid, 0x10000) & 0xffff; + if (likely(old)) + return old; + HA_ATOMIC_SUB(&fdtab[fd].refc_tgid, 0x10000); + return 0; +} + +/* Reset a tgid without affecting the refcount */ +static forceinline void fd_reset_tgid(int fd) +{ + HA_ATOMIC_AND(&fdtab[fd].refc_tgid, 0xffff0000U); +} + +/* Try to grab a reference to the FD's TGID, but only if it matches the + * requested one (i.e. it succeeds with TGID refcnt held, or fails). Note that + * a TGID of zero indicates the FD was closed, thus also fails. It returns + * non-zero on success, in which case the caller must then release it using + * fd_drop_tgid(), or zero on failure. The function is optimized for use + * when it's likely that the tgid matches the desired one as it's by far + * the most common. + */ +static inline uint fd_grab_tgid(int fd, uint desired_tgid) +{ + uint old; + + old = _HA_ATOMIC_FETCH_ADD(&fdtab[fd].refc_tgid, 0x10000) & 0xffff; + if (likely(old == desired_tgid)) + return 1; + HA_ATOMIC_SUB(&fdtab[fd].refc_tgid, 0x10000); + return 0; +} + +/* Set the FD's TGID to the new value with a refcount of 1, waiting for the + * current refcount to become 0, to cover the rare possibly that a late + * competing thread would be touching the tgid or the running mask in parallel. + * The caller must call fd_drop_tgid() once done. + */ +static inline void fd_claim_tgid(int fd, uint desired_tgid) +{ + uint old; + + BUG_ON(!desired_tgid); + + desired_tgid += 0x10000; // refcount=1 + old = 0; // assume unused (most likely) + while (1) { + if (_HA_ATOMIC_CAS(&fdtab[fd].refc_tgid, &old, desired_tgid)) + break; + __ha_cpu_relax(); + old &= 0x7fff; // keep only the tgid and drop the lock + } +} + +/* atomically read the running mask if the tgid matches, or returns zero if it + * does not match. This is meant for use in code paths where the bit is expected + * to be present and will be sufficient to protect against a short-term group + * migration (e.g. takss and return from iocb). + */ +static inline ulong fd_get_running(int fd, uint desired_tgid) +{ + ulong ret = 0; + uint old; + + /* TODO: may also be checked using an atomic double-load from a DWCAS + * on compatible architectures, which wouldn't require to modify nor + * restore the original value. + */ + old = _HA_ATOMIC_ADD_FETCH(&fdtab[fd].refc_tgid, 0x10000); + if (likely((old & 0xffff) == desired_tgid)) + ret = _HA_ATOMIC_LOAD(&fdtab[fd].running_mask); + _HA_ATOMIC_SUB(&fdtab[fd].refc_tgid, 0x10000); + return ret; +} + +/* remove tid_bit from the fd's running mask and returns the value before the + * atomic operation, so that the caller can know if it was present. + */ +static inline long fd_clr_running(int fd) +{ + return _HA_ATOMIC_FETCH_AND(&fdtab[fd].running_mask, ~ti->ltid_bit); +} + +/* Prepares <fd> for being polled on all permitted threads of this group ID + * (these will then be refined to only cover running ones). +*/ +static inline void fd_insert(int fd, void *owner, void (*iocb)(int fd), int tgid, unsigned long thread_mask) +{ + extern void sock_conn_iocb(int); + int newstate; + + /* conn_fd_handler should support edge-triggered FDs */ + newstate = 0; + if ((global.tune.options & GTUNE_FD_ET) && iocb == sock_conn_iocb) + newstate |= FD_ET_POSSIBLE; + + /* This must never happen and would definitely indicate a bug, in + * addition to overwriting some unexpected memory areas. + */ + BUG_ON(fd < 0); + BUG_ON(fd >= global.maxsock); + BUG_ON(fdtab[fd].owner != NULL); + BUG_ON(fdtab[fd].state != 0); + BUG_ON(tgid < 1 || tgid > MAX_TGROUPS); + + thread_mask &= tg->threads_enabled; + BUG_ON(thread_mask == 0); + + fd_claim_tgid(fd, tgid); + + BUG_ON(fdtab[fd].running_mask); + + fdtab[fd].owner = owner; + fdtab[fd].iocb = iocb; + fdtab[fd].state = newstate; + fdtab[fd].thread_mask = thread_mask; + fd_drop_tgid(fd); + +#ifdef DEBUG_FD + fdtab[fd].event_count = 0; +#endif + + /* note: do not reset polled_mask here as it indicates which poller + * still knows this FD from a possible previous round. + */ + + /* the two directions are ready until proven otherwise */ + fd_may_both(fd); + _HA_ATOMIC_INC(&ha_used_fds); +} + +/* These are replacements for FD_SET, FD_CLR, FD_ISSET, working on uints */ +static inline void hap_fd_set(int fd, unsigned int *evts) +{ + _HA_ATOMIC_OR(&evts[fd / (8*sizeof(*evts))], 1U << (fd & (8*sizeof(*evts) - 1))); +} + +static inline void hap_fd_clr(int fd, unsigned int *evts) +{ + _HA_ATOMIC_AND(&evts[fd / (8*sizeof(*evts))], ~(1U << (fd & (8*sizeof(*evts) - 1)))); +} + +static inline unsigned int hap_fd_isset(int fd, unsigned int *evts) +{ + return evts[fd / (8*sizeof(*evts))] & (1U << (fd & (8*sizeof(*evts) - 1))); +} + +/* send a wake-up event to this thread, only if it's asleep and not notified yet */ +static inline void wake_thread(int thr) +{ + struct thread_ctx *ctx = &ha_thread_ctx[thr]; + + if ((_HA_ATOMIC_FETCH_OR(&ctx->flags, TH_FL_NOTIFIED) & (TH_FL_SLEEPING|TH_FL_NOTIFIED)) == TH_FL_SLEEPING) { + char c = 'c'; + DISGUISE(write(poller_wr_pipe[thr], &c, 1)); + } +} + + +#endif /* _HAPROXY_FD_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/filters-t.h b/include/haproxy/filters-t.h new file mode 100644 index 0000000..c86ef6f --- /dev/null +++ b/include/haproxy/filters-t.h @@ -0,0 +1,258 @@ +/* + * include/haproxy/filteers-t.h + * This file defines everything related to stream filters. + * + * Copyright (C) 2015 Qualys Inc., Christopher Faulet <cfaulet@qualys.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef _HAPROXY_FILTERS_T_H +#define _HAPROXY_FILTERS_T_H + +#include <haproxy/api-t.h> + +/* Flags set on a filter config */ +#define FLT_CFG_FL_HTX 0x00000001 /* The filter can filter HTX streams */ + +/* Flags set on a filter instance */ +#define FLT_FL_IS_BACKEND_FILTER 0x0001 /* The filter is a backend filter */ +#define FLT_FL_IS_REQ_DATA_FILTER 0x0002 /* The filter will parse data on the request channel */ +#define FLT_FL_IS_RSP_DATA_FILTER 0x0004 /* The filter will parse data on the response channel */ + +/* Flags set on the stream, common to all filters attached to its stream */ +#define STRM_FLT_FL_HAS_FILTERS 0x0001 /* The stream has at least one filter */ +#define STRM_FLT_FL_HOLD_HTTP_HDRS 0x0002 /* At least one filter on the stream want to hold the message headers */ + + +struct http_msg; +struct proxy; +struct stream; +struct channel; +struct flt_conf; +struct filter; + +/* Descriptor for a "filter" keyword. The ->parse() function returns 0 in case + * of success, or a combination of ERR_* flags if an error is encountered. The + * function pointer can be NULL if not implemented. + */ +struct flt_kw { + const char *kw; + int (*parse)(char **args, int *cur_arg, struct proxy *px, + struct flt_conf *fconf, char **err, void *private); + void *private; +}; + +/* + * A keyword list. It is a NULL-terminated array of keywords. It embeds a struct + * list in order to be linked to other lists, allowing it to easily be declared + * where it is needed, and linked without duplicating data nor allocating + * memory. It is also possible to indicate a scope for the keywords. + */ +struct flt_kw_list { + const char *scope; + struct list list; + struct flt_kw kw[VAR_ARRAY]; +}; + +/* + * Callbacks available on a filter: + * + * - init : Initializes the filter for a proxy. Returns a + * negative value if an error occurs. + * - deinit : Cleans up what the init function has done. + * - check : Check the filter config for a proxy. Returns the + * number of errors encountered. + * - init_per_thread : Initializes the filter for a proxy for a specific + * thread. Returns a negative value if an error + * occurs. + * - deinit_per_thread : Cleans up what the init_per_thread function has + * done. + * + * + * - attach : Called after a filter instance creation, when it is + * attached to a stream. This happens when the stream + * is started for filters defined on the stream's + * frontend and when the backend is set for filters + * declared on the stream's backend. + * Returns a negative value if an error occurs, 0 if + * the filter must be ignored for the stream, any other + * value otherwise. + * - stream_start : Called when a stream is started. This callback will + * only be called for filters defined on the stream's + * frontend. + * Returns a negative value if an error occurs, any + * other value otherwise. + * - stream_set_backend : Called when a backend is set for a stream. This + * callbacks will be called for all filters attached + * to a stream (frontend and backend). + * Returns a negative value if an error occurs, any + * other value otherwise. + * - stream_stop : Called when a stream is stopped. This callback will + * only be called for filters defined on the stream's + * frontend. + * - detach : Called when a filter instance is detached from a + * stream, before its destruction. This happens when + * the stream is stopped for filters defined on the + * stream's frontend and when the analyze ends for + * filters defined on the stream's backend. + * - check_timeouts : Called when a a stream is woken up because of an + * expired timer. + * + * + * - channel_start_analyze: Called when a filter starts to analyze a channel. + * Returns a negative value if an error occurs, 0 if + * it needs to wait, any other value otherwise. + * - channel_pre_analyze : Called before each analyzer attached to a channel, + * expects analyzers responsible for data sending. + * Returns a negative value if an error occurs, 0 if + * it needs to wait, any other value otherwise. + * - channel_post_analyze: Called after each analyzer attached to a channel, + * expects analyzers responsible for data sending. + * Returns a negative value if an error occurs, + * any other value otherwise. + * - channel_end_analyze : Called when all other analyzers have finished their + * processing. + * Returns a negative value if an error occurs, 0 if + * it needs to wait, any other value otherwise. + * + * + * - http_headers : Called before the body parsing, after all HTTP + * headers was parsed and analyzed. + * Returns a negative value if an error occurs, 0 if + * it needs to wait, any other value otherwise. + * - http_payload : Called when some data can be consumed. + * Returns a negative value if an error occurs, else + * the number of forwarded bytes. + * - http_end : Called when all the request/response has been + * processed and all body data has been forwarded. + * Returns a negative value if an error occurs, 0 if + * it needs to wait for some reason, any other value + * otherwise. + * - http_reset : Called when the HTTP message is reset. It happens + * either when a 100-continue response is received. + * that can be detected if s->txn->status is 10X, or + * if we're attempting a L7 retry. + * Returns nothing. + * - http_reply : Called when, at any time, HAProxy decides to stop + * the HTTP message's processing and to send a message + * to the client (mainly, when an error or a redirect + * occur). + * Returns nothing. + * + * + * - tcp_payload : Called when some data can be consumed. + * Returns a negative value if an error occurs, else + * the number of forwarded bytes. + */ +struct flt_ops { + /* + * Callbacks to manage the filter lifecycle + */ + int (*init) (struct proxy *p, struct flt_conf *fconf); + void (*deinit) (struct proxy *p, struct flt_conf *fconf); + int (*check) (struct proxy *p, struct flt_conf *fconf); + int (*init_per_thread) (struct proxy *p, struct flt_conf *fconf); + void (*deinit_per_thread)(struct proxy *p, struct flt_conf *fconf); + /* + * Stream callbacks + */ + int (*attach) (struct stream *s, struct filter *f); + int (*stream_start) (struct stream *s, struct filter *f); + int (*stream_set_backend)(struct stream *s, struct filter *f, struct proxy *be); + void (*stream_stop) (struct stream *s, struct filter *f); + void (*detach) (struct stream *s, struct filter *f); + void (*check_timeouts) (struct stream *s, struct filter *f); + /* + * Channel callbacks + */ + int (*channel_start_analyze)(struct stream *s, struct filter *f, struct channel *chn); + int (*channel_pre_analyze) (struct stream *s, struct filter *f, struct channel *chn, unsigned int an_bit); + int (*channel_post_analyze) (struct stream *s, struct filter *f, struct channel *chn, unsigned int an_bit); + int (*channel_end_analyze) (struct stream *s, struct filter *f, struct channel *chn); + + /* + * HTTP callbacks + */ + int (*http_headers) (struct stream *s, struct filter *f, struct http_msg *msg); + int (*http_payload) (struct stream *s, struct filter *f, struct http_msg *msg, + unsigned int offset, unsigned int len); + int (*http_end) (struct stream *s, struct filter *f, struct http_msg *msg); + + void (*http_reset) (struct stream *s, struct filter *f, struct http_msg *msg); + void (*http_reply) (struct stream *s, struct filter *f, short status, + const struct buffer *msg); + + /* + * TCP callbacks + */ + int (*tcp_payload) (struct stream *s, struct filter *f, struct channel *chn, + unsigned int offset, unsigned int len); +}; + +/* + * Structure representing the filter configuration, attached to a proxy and + * accessible from a filter when instantiated in a stream + */ +struct flt_conf { + const char *id; /* The filter id */ + struct flt_ops *ops; /* The filter callbacks */ + void *conf; /* The filter configuration */ + struct list list; /* Next filter for the same proxy */ + unsigned int flags; /* FLT_CFG_FL_* */ +}; + +/* + * Structure reprensenting a filter instance attached to a stream + * + * 2D-Array fields are used to store info per channel. The first index stands + * for the request channel, and the second one for the response channel. + * Especially, <next> and <fwd> are offsets representing amount of data that the + * filter are, respectively, parsed and forwarded on a channel. Filters can + * access these values using FLT_NXT and FLT_FWD macros. + */ +struct filter { + struct flt_conf *config; /* the filter's configuration */ + void *ctx; /* The filter context (opaque) */ + unsigned short flags; /* FLT_FL_* */ + unsigned long long offset[2]; /* Offset of input data already filtered for a specific channel + * 0: request channel, 1: response channel */ + unsigned int pre_analyzers; /* bit field indicating analyzers to pre-process */ + unsigned int post_analyzers; /* bit field indicating analyzers to post-process */ + struct list list; /* Next filter for the same proxy/stream */ +}; + +/* + * Structure reprensenting the "global" state of filters attached to a stream. + */ +struct strm_flt { + struct list filters; /* List of filters attached to a stream */ + struct filter *current[2]; /* From which filter resume processing, for a specific channel. + * This is used for resumable callbacks only, + * If NULL, we start from the first filter. + * 0: request channel, 1: response channel */ + unsigned short flags; /* STRM_FL_* */ + unsigned char nb_req_data_filters; /* Number of data filters registered on the request channel */ + unsigned char nb_rsp_data_filters; /* Number of data filters registered on the response channel */ + unsigned long long offset[2]; +}; + +#endif /* _HAPROXY_FILTERS_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/filters.h b/include/haproxy/filters.h new file mode 100644 index 0000000..4a32c21 --- /dev/null +++ b/include/haproxy/filters.h @@ -0,0 +1,187 @@ +/* + * include/haproxy/filters.h + * This file defines function prototypes for stream filters management. + * + * Copyright (C) 2015 Qualys Inc., Christopher Faulet <cfaulet@qualys.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef _HAPROXY_FILTERS_H +#define _HAPROXY_FILTERS_H + +#include <haproxy/channel.h> +#include <haproxy/filters-t.h> +#include <haproxy/http_ana-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/stream-t.h> + +extern const char *trace_flt_id; +extern const char *http_comp_flt_id; +extern const char *cache_store_flt_id; +extern const char *spoe_filter_id; +extern const char *fcgi_flt_id; + +#define FLT_ID(flt) (flt)->config->id +#define FLT_CONF(flt) (flt)->config->conf +#define FLT_OPS(flt) (flt)->config->ops + +/* Useful macros to access per-channel values. It can be safely used inside + * filters. */ +#define CHN_IDX(chn) (((chn)->flags & CF_ISRESP) == CF_ISRESP) +#define FLT_STRM_OFF(s, chn) (strm_flt(s)->offset[CHN_IDX(chn)]) +#define FLT_OFF(flt, chn) ((flt)->offset[CHN_IDX(chn)]) + +#define HAS_FILTERS(strm) ((strm)->strm_flt.flags & STRM_FLT_FL_HAS_FILTERS) + +#define HAS_REQ_DATA_FILTERS(strm) ((strm)->strm_flt.nb_req_data_filters != 0) +#define HAS_RSP_DATA_FILTERS(strm) ((strm)->strm_flt.nb_rsp_data_filters != 0) +#define HAS_DATA_FILTERS(strm, chn) (((chn)->flags & CF_ISRESP) ? HAS_RSP_DATA_FILTERS(strm) : HAS_REQ_DATA_FILTERS(strm)) + +#define IS_REQ_DATA_FILTER(flt) ((flt)->flags & FLT_FL_IS_REQ_DATA_FILTER) +#define IS_RSP_DATA_FILTER(flt) ((flt)->flags & FLT_FL_IS_RSP_DATA_FILTER) +#define IS_DATA_FILTER(flt, chn) (((chn)->flags & CF_ISRESP) ? IS_RSP_DATA_FILTER(flt) : IS_REQ_DATA_FILTER(flt)) + +#define FLT_STRM_CB(strm, call) \ + do { \ + if (HAS_FILTERS(strm)) { call; } \ + } while (0) + +#define FLT_STRM_DATA_CB_IMPL_1(strm, chn, call, default_ret) \ + (HAS_DATA_FILTERS(strm, chn) ? call : default_ret) +#define FLT_STRM_DATA_CB_IMPL_2(strm, chn, call, default_ret, on_error) \ + ({ \ + int _ret; \ + if (HAS_DATA_FILTERS(strm, chn)) { \ + _ret = call; \ + if (_ret < 0) { on_error; } \ + } \ + else \ + _ret = default_ret; \ + _ret; \ + }) +#define FLT_STRM_DATA_CB_IMPL_3(strm, chn, call, default_ret, on_error, on_wait) \ + ({ \ + int _ret; \ + if (HAS_DATA_FILTERS(strm, chn)) { \ + _ret = call; \ + if (_ret < 0) { on_error; } \ + if (!_ret) { on_wait; } \ + } \ + else \ + _ret = default_ret; \ + _ret; \ + }) + +#define FLT_STRM_DATA_CB_IMPL_X(strm, chn, call, A, B, C, DATA_CB_IMPL, ...) \ + DATA_CB_IMPL + +#define FLT_STRM_DATA_CB(strm, chn, call, ...) \ + FLT_STRM_DATA_CB_IMPL_X(strm, chn, call, ##__VA_ARGS__, \ + FLT_STRM_DATA_CB_IMPL_3(strm, chn, call, ##__VA_ARGS__), \ + FLT_STRM_DATA_CB_IMPL_2(strm, chn, call, ##__VA_ARGS__), \ + FLT_STRM_DATA_CB_IMPL_1(strm, chn, call, ##__VA_ARGS__)) + +void flt_deinit(struct proxy *p); +int flt_check(struct proxy *p); + +int flt_stream_start(struct stream *s); +void flt_stream_stop(struct stream *s); +int flt_set_stream_backend(struct stream *s, struct proxy *be); +int flt_stream_init(struct stream *s); +void flt_stream_release(struct stream *s, int only_backend); +void flt_stream_check_timeouts(struct stream *s); + +int flt_http_payload(struct stream *s, struct http_msg *msg, unsigned int len); +int flt_http_end(struct stream *s, struct http_msg *msg); + +void flt_http_reset(struct stream *s, struct http_msg *msg); +void flt_http_reply(struct stream *s, short status, const struct buffer *msg); + +int flt_start_analyze(struct stream *s, struct channel *chn, unsigned int an_bit); +int flt_pre_analyze(struct stream *s, struct channel *chn, unsigned int an_bit); +int flt_post_analyze(struct stream *s, struct channel *chn, unsigned int an_bit); +int flt_analyze_http_headers(struct stream *s, struct channel *chn, unsigned int an_bit); +int flt_end_analyze(struct stream *s, struct channel *chn, unsigned int an_bit); + +int flt_xfer_data(struct stream *s, struct channel *chn, unsigned int an_bit); + +void flt_register_keywords(struct flt_kw_list *kwl); +struct flt_kw *flt_find_kw(const char *kw); +void flt_dump_kws(char **out); +void list_filters(FILE *out); + +/* Helper function that returns the "global" state of filters attached to a + * stream. */ +static inline struct strm_flt * +strm_flt(struct stream *s) +{ + return &s->strm_flt; +} + +/* Registers a filter to a channel. If a filter was already registered, this + * function do nothing. Once registered, the filter becomes a "data" filter for + * this channel. */ +static inline void +register_data_filter(struct stream *s, struct channel *chn, struct filter *filter) +{ + if (!IS_DATA_FILTER(filter, chn)) { + if (chn->flags & CF_ISRESP) { + filter->flags |= FLT_FL_IS_RSP_DATA_FILTER; + strm_flt(s)->nb_rsp_data_filters++; + } + else { + filter->flags |= FLT_FL_IS_REQ_DATA_FILTER; + strm_flt(s)->nb_req_data_filters++; + } + } +} + +/* Unregisters a "data" filter from a channel. */ +static inline void +unregister_data_filter(struct stream *s, struct channel *chn, struct filter *filter) +{ + if (IS_DATA_FILTER(filter, chn)) { + if (chn->flags & CF_ISRESP) { + filter->flags &= ~FLT_FL_IS_RSP_DATA_FILTER; + strm_flt(s)->nb_rsp_data_filters--; + + } + else { + filter->flags &= ~FLT_FL_IS_REQ_DATA_FILTER; + strm_flt(s)->nb_req_data_filters--; + } + } +} + +/* This function must be called when a filter alter payload data. It updates + * offsets of all previous filters. Do not call this function when a filter + * change the size of payload data leads to an undefined behavior. + * + * This is the filter's responsiblitiy to update data itself. + */ +static inline void +flt_update_offsets(struct filter *filter, struct channel *chn, int len) +{ + struct stream *s = chn_strm(chn); + struct filter *f; + + list_for_each_entry(f, &strm_flt(s)->filters, list) { + if (f == filter) + break; + FLT_OFF(f, chn) += len; + } +} + +#endif /* _HAPROXY_FILTERS_H */ diff --git a/include/haproxy/fix-t.h b/include/haproxy/fix-t.h new file mode 100644 index 0000000..4b4de55 --- /dev/null +++ b/include/haproxy/fix-t.h @@ -0,0 +1,70 @@ +/* + * include/haproxy/fix-t.h + * This file contains structure declarations for FIX protocol. + * + * Copyright 2020 Baptiste Assmann <bedis9@gmail.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_FIX_T_H +#define _HAPROXY_FIX_T_H + +#include <import/ist.h> + +/* + * FIX messages are composed by a list of Tag=Value separated by a 'delimiter' + */ +#define FIX_DELIMITER 0x01 + +/* + * know FIX version strings + */ +#define FIX_4_0 (ist("FIX.4.0")) +#define FIX_4_1 (ist("FIX.4.1")) +#define FIX_4_2 (ist("FIX.4.2")) +#define FIX_4_3 (ist("FIX.4.3")) +#define FIX_4_4 (ist("FIX.4.4")) +#define FIX_5_0 (ist("FIXT.1.1")) +/* FIX_5_0SP1 and FIX_5_0SP2 have the same version string than FIX5_0 */ + +/* + * Supported FIX tag ID + */ +#define FIX_TAG_BeginString 8 +#define FIX_TAG_BodyLength 9 +#define FIX_TAG_CheckSum 10 +#define FIX_TAG_MsgType 35 +#define FIX_TAG_SenderCompID 49 +#define FIX_TAG_TargetCompID 56 + + +#define FIX_MSG_MINSIZE 26 /* Minimal length for a FIX Message */ +#define FIX_CHKSUM_SIZE 7 /* Length of the CheckSum tag (10=NNN<delim>) */ +/* + * return code when parsing / validating FIX messages + */ +#define FIX_INVALID_MESSAGE -1 +#define FIX_NEED_MORE_DATA 0 +#define FIX_VALID_MESSAGE 1 + +#endif /* _HAPROXY_FIX_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/fix.h b/include/haproxy/fix.h new file mode 100644 index 0000000..94aa815 --- /dev/null +++ b/include/haproxy/fix.h @@ -0,0 +1,97 @@ +/* + * include/haproxy/fix.h + * This file contains functions and macros declarations for FIX protocol decoding. + * + * Copyright 2020 Baptiste Assmann <bedis9@gmail.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_FIX_H +#define _HAPROXY_FIX_H + +#include <import/ist.h> + +#include <haproxy/fix-t.h> +#include <haproxy/tools.h> + +unsigned int fix_check_id(const struct ist str, const struct ist version); +int fix_validate_message(const struct ist msg); +struct ist fix_tag_value(const struct ist msg, unsigned int tagid); + +/* + * Return the FIX version string (one of FIX_X_Y macros) corresponding to + * <str> or IST_NULL if not found. + */ +static inline struct ist fix_version(const struct ist str) +{ + /* 7 is the minimal size for the FIX version string */ + if (istlen(str) < 7) + return IST_NULL; + + if (isteq(FIX_4_0, str)) + return FIX_4_0; + else if (isteq(FIX_4_1, str)) + return FIX_4_1; + else if (isteq(FIX_4_2, str)) + return FIX_4_2; + else if (isteq(FIX_4_3, str)) + return FIX_4_3; + else if (isteq(FIX_4_4, str)) + return FIX_4_4; + else if (isteq(FIX_5_0, str)) + return FIX_5_0; + + return IST_NULL; +} + +/* + * Return the FIX tag ID corresponding to <tag> if one found or 0 if not. + * + * full list of tag ID available here, just in case we need to support + * more "string" equivalent in the future: + * https://www.onixs.biz/fix-dictionary/4.2/fields_by_tag.html + */ +static inline unsigned int fix_tagid(const struct ist tag) +{ + unsigned id = fix_check_id(tag, IST_NULL); + + if (id) + return id; + + else if (isteqi(tag, ist("MsgType"))) + return FIX_TAG_MsgType; + else if (isteqi(tag, ist("CheckSum"))) + return FIX_TAG_CheckSum; + else if (isteqi(tag, ist("BodyLength"))) + return FIX_TAG_BodyLength; + else if (isteqi(tag, ist("TargetCompID"))) + return FIX_TAG_TargetCompID; + else if (isteqi(tag, ist("BeginString"))) + return FIX_TAG_BeginString; + else if (isteqi(tag, ist("SenderCompID"))) + return FIX_TAG_SenderCompID; + + return 0; +} + +#endif /* _HAPROXY_FIX_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/flt_http_comp.h b/include/haproxy/flt_http_comp.h new file mode 100644 index 0000000..56f984a --- /dev/null +++ b/include/haproxy/flt_http_comp.h @@ -0,0 +1,28 @@ +/* + * include/haproxy/flt_http_comp.h + * This file defines function prototypes for the compression filter. + * + * Copyright (C) 2015 Qualys Inc., Christopher Faulet <cfaulet@qualys.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef _HAPROXY_FLT_HTTP_COMP_H +#define _HAPROXY_FLT_HTTP_COMP_H + +#include <haproxy/proxy-t.h> + +int check_implicit_http_comp_flt(struct proxy *proxy); + +#endif // _HAPROXY_FLT_HTTP_COMP_H diff --git a/include/haproxy/freq_ctr-t.h b/include/haproxy/freq_ctr-t.h new file mode 100644 index 0000000..d5f1a89 --- /dev/null +++ b/include/haproxy/freq_ctr-t.h @@ -0,0 +1,45 @@ +/* + * include/haproxy/freq_ctr.h + * This file contains structure declarations for frequency counters. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_FREQ_CTR_T_H +#define _HAPROXY_FREQ_CTR_T_H + +#include <haproxy/api-t.h> + +/* The generic freq_ctr counter counts a rate of events per period, where the + * period has to be known by the user. The period is measured in ticks and + * must be at least 2 ticks long. This form is slightly more CPU intensive for + * reads than the per-second form as it involves a divide. + */ +struct freq_ctr { + unsigned int curr_tick; /* start date of current period (wrapping ticks) */ + unsigned int curr_ctr; /* cumulated value for current period */ + unsigned int prev_ctr; /* value for last period */ +}; + +#endif /* _HAPROXY_FREQ_CTR_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/freq_ctr.h b/include/haproxy/freq_ctr.h new file mode 100644 index 0000000..f3f6903 --- /dev/null +++ b/include/haproxy/freq_ctr.h @@ -0,0 +1,402 @@ +/* + * include/haproxy/freq_ctr.h + * This file contains macros and inline functions for frequency counters. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_FREQ_CTR_H +#define _HAPROXY_FREQ_CTR_H + +#include <haproxy/api.h> +#include <haproxy/freq_ctr-t.h> +#include <haproxy/intops.h> +#include <haproxy/ticks.h> + +/* exported functions from freq_ctr.c */ +ullong freq_ctr_total(const struct freq_ctr *ctr, uint period, int pend); +int freq_ctr_overshoot_period(const struct freq_ctr *ctr, uint period, uint freq); +uint update_freq_ctr_period_slow(struct freq_ctr *ctr, uint period, uint inc); + +/* Update a frequency counter by <inc> incremental units. It is automatically + * rotated if the period is over. It is important that it correctly initializes + * a null area. + */ +static inline uint update_freq_ctr_period(struct freq_ctr *ctr, uint period, uint inc) +{ + uint curr_tick; + + /* our local clock (now_ms) is most of the time strictly equal to + * global_now_ms, and during the edge of the millisecond, global_now_ms + * might have been pushed further by another thread. Given that + * accessing this shared variable is extremely expensive, we first try + * to use our local date, which will be good almost every time. And we + * only switch to the global clock when we're out of the period so as + * to never put a date in the past there. + */ + curr_tick = HA_ATOMIC_LOAD(&ctr->curr_tick); + if (likely(now_ms - curr_tick < period)) + return HA_ATOMIC_ADD_FETCH(&ctr->curr_ctr, inc); + + return update_freq_ctr_period_slow(ctr, period, inc); +} + +/* Update a 1-sec frequency counter by <inc> incremental units. It is automatically + * rotated if the period is over. It is important that it correctly initializes + * a null area. + */ +static inline unsigned int update_freq_ctr(struct freq_ctr *ctr, unsigned int inc) +{ + return update_freq_ctr_period(ctr, MS_TO_TICKS(1000), inc); +} + +/* Reads a frequency counter taking history into account for missing time in + * current period. The period has to be passed in number of ticks and must + * match the one used to feed the counter. The counter value is reported for + * current global date. The return value has the same precision as one input + * data sample, so low rates over the period will be inaccurate but still + * appropriate for max checking. One trick we use for low values is to specially + * handle the case where the rate is between 0 and 1 in order to avoid flapping + * while waiting for the next event. + * + * For immediate limit checking, it's recommended to use freq_ctr_period_remain() + * instead which does not have the flapping correction, so that even frequencies + * as low as one event/period are properly handled. + */ +static inline uint read_freq_ctr_period(const struct freq_ctr *ctr, uint period) +{ + ullong total = freq_ctr_total(ctr, period, -1); + + return div64_32(total, period); +} + +/* same as read_freq_ctr_period() above except that floats are used for the + * output so that low rates can be more precise. + */ +static inline double read_freq_ctr_period_flt(const struct freq_ctr *ctr, uint period) +{ + ullong total = freq_ctr_total(ctr, period, -1); + + return (double)total / (double)period; +} + +/* Read a 1-sec frequency counter taking history into account for missing time + * in current period. + */ +static inline unsigned int read_freq_ctr(const struct freq_ctr *ctr) +{ + return read_freq_ctr_period(ctr, MS_TO_TICKS(1000)); +} + +/* same as read_freq_ctr() above except that floats are used for the + * output so that low rates can be more precise. + */ +static inline double read_freq_ctr_flt(const struct freq_ctr *ctr) +{ + return read_freq_ctr_period_flt(ctr, MS_TO_TICKS(1000)); +} + +/* Returns the number of remaining events that can occur on this freq counter + * while respecting <freq> events per period, and taking into account that + * <pend> events are already known to be pending. Returns 0 if limit was reached. + */ +static inline uint freq_ctr_remain_period(const struct freq_ctr *ctr, uint period, uint freq, uint pend) +{ + ullong total = freq_ctr_total(ctr, period, pend); + uint avg = div64_32(total, period); + + if (avg > freq) + avg = freq; + return freq - avg; +} + +/* returns the number of remaining events that can occur on this freq counter + * while respecting <freq> and taking into account that <pend> events are + * already known to be pending. Returns 0 if limit was reached. + */ +static inline unsigned int freq_ctr_remain(const struct freq_ctr *ctr, unsigned int freq, unsigned int pend) +{ + return freq_ctr_remain_period(ctr, MS_TO_TICKS(1000), freq, pend); +} + +/* return the expected wait time in ms before the next event may occur, + * respecting frequency <freq>, and assuming there may already be some pending + * events. It returns zero if we can proceed immediately, otherwise the wait + * time, which will be rounded down 1ms for better accuracy, with a minimum + * of one ms. + */ +static inline uint next_event_delay_period(const struct freq_ctr *ctr, uint period, uint freq, uint pend) +{ + ullong total = freq_ctr_total(ctr, period, pend); + ullong limit = (ullong)freq * period; + uint wait; + + if (total < limit) + return 0; + + /* too many events already, let's count how long to wait before they're + * processed. For this we'll subtract from the number of pending events + * the ones programmed for the current period, to know how long to wait + * for the next period. Each event takes period/freq ticks. + */ + total -= limit; + wait = div64_32(total, (freq ? freq : 1)); + return MAX(wait, 1); +} + +/* Returns the expected wait time in ms before the next event may occur, + * respecting frequency <freq> over 1 second, and assuming there may already be + * some pending events. It returns zero if we can proceed immediately, otherwise + * the wait time, which will be rounded down 1ms for better accuracy, with a + * minimum of one ms. + */ +static inline unsigned int next_event_delay(const struct freq_ctr *ctr, unsigned int freq, unsigned int pend) +{ + return next_event_delay_period(ctr, MS_TO_TICKS(1000), freq, pend); +} + +/* While the functions above report average event counts per period, we are + * also interested in average values per event. For this we use a different + * method. The principle is to rely on a long tail which sums the new value + * with a fraction of the previous value, resulting in a sliding window of + * infinite length depending on the precision we're interested in. + * + * The idea is that we always keep (N-1)/N of the sum and add the new sampled + * value. The sum over N values can be computed with a simple program for a + * constant value 1 at each iteration : + * + * N + * ,--- + * \ N - 1 e - 1 + * > ( --------- )^x ~= N * ----- + * / N e + * '--- + * x = 1 + * + * Note: I'm not sure how to demonstrate this but at least this is easily + * verified with a simple program, the sum equals N * 0.632120 for any N + * moderately large (tens to hundreds). + * + * Inserting a constant sample value V here simply results in : + * + * sum = V * N * (e - 1) / e + * + * But we don't want to integrate over a small period, but infinitely. Let's + * cut the infinity in P periods of N values. Each period M is exactly the same + * as period M-1 with a factor of ((N-1)/N)^N applied. A test shows that given a + * large N : + * + * N - 1 1 + * ( ------- )^N ~= --- + * N e + * + * Our sum is now a sum of each factor times : + * + * N*P P + * ,--- ,--- + * \ N - 1 e - 1 \ 1 + * > v ( --------- )^x ~= VN * ----- * > --- + * / N e / e^x + * '--- '--- + * x = 1 x = 0 + * + * For P "large enough", in tests we get this : + * + * P + * ,--- + * \ 1 e + * > --- ~= ----- + * / e^x e - 1 + * '--- + * x = 0 + * + * This simplifies the sum above : + * + * N*P + * ,--- + * \ N - 1 + * > v ( --------- )^x = VN + * / N + * '--- + * x = 1 + * + * So basically by summing values and applying the last result an (N-1)/N factor + * we just get N times the values over the long term, so we can recover the + * constant value V by dividing by N. In order to limit the impact of integer + * overflows, we'll use this equivalence which saves us one multiply : + * + * N - 1 1 x0 + * x1 = x0 * ------- = x0 * ( 1 - --- ) = x0 - ---- + * N N N + * + * And given that x0 is discrete here we'll have to saturate the values before + * performing the divide, so the value insertion will become : + * + * x0 + N - 1 + * x1 = x0 - ------------ + * N + * + * A value added at the entry of the sliding window of N values will thus be + * reduced to 1/e or 36.7% after N terms have been added. After a second batch, + * it will only be 1/e^2, or 13.5%, and so on. So practically speaking, each + * old period of N values represents only a quickly fading ratio of the global + * sum : + * + * period ratio + * 1 36.7% + * 2 13.5% + * 3 4.98% + * 4 1.83% + * 5 0.67% + * 6 0.25% + * 7 0.09% + * 8 0.033% + * 9 0.012% + * 10 0.0045% + * + * So after 10N samples, the initial value has already faded out by a factor of + * 22026, which is quite fast. If the sliding window is 1024 samples wide, it + * means that a sample will only count for 1/22k of its initial value after 10k + * samples went after it, which results in half of the value it would represent + * using an arithmetic mean. The benefit of this method is that it's very cheap + * in terms of computations when N is a power of two. This is very well suited + * to record response times as large values will fade out faster than with an + * arithmetic mean and will depend on sample count and not time. + * + * Demonstrating all the above assumptions with maths instead of a program is + * left as an exercise for the reader. + */ + +/* Adds sample value <v> to sliding window sum <sum> configured for <n> samples. + * The sample is returned. Better if <n> is a power of two. This function is + * thread-safe. + */ +static inline unsigned int swrate_add(unsigned int *sum, unsigned int n, unsigned int v) +{ + unsigned int new_sum, old_sum; + + old_sum = *sum; + do { + new_sum = old_sum - (old_sum + n - 1) / n + v; + } while (!HA_ATOMIC_CAS(sum, &old_sum, new_sum) && __ha_cpu_relax()); + return new_sum; +} + +/* Adds sample value <v> to sliding window sum <sum> configured for <n> samples. + * The sample is returned. Better if <n> is a power of two. This function is + * thread-safe. + * This function should give better accuracy than swrate_add when number of + * samples collected is lower than nominal window size. In such circumstances + * <n> should be set to 0. + */ +static inline unsigned int swrate_add_dynamic(unsigned int *sum, unsigned int n, unsigned int v) +{ + unsigned int new_sum, old_sum; + + old_sum = *sum; + do { + new_sum = old_sum - (n ? (old_sum + n - 1) / n : 0) + v; + } while (!HA_ATOMIC_CAS(sum, &old_sum, new_sum) && __ha_cpu_relax()); + return new_sum; +} + +/* Adds sample value <v> spanning <s> samples to sliding window sum <sum> + * configured for <n> samples, where <n> is supposed to be "much larger" than + * <s>. The sample is returned. Better if <n> is a power of two. Note that this + * is only an approximate. Indeed, as can be seen with two samples only over a + * 8-sample window, the original function would return : + * sum1 = sum - (sum + 7) / 8 + v + * sum2 = sum1 - (sum1 + 7) / 8 + v + * = (sum - (sum + 7) / 8 + v) - (sum - (sum + 7) / 8 + v + 7) / 8 + v + * ~= 7sum/8 - 7/8 + v - sum/8 + sum/64 - 7/64 - v/8 - 7/8 + v + * ~= (3sum/4 + sum/64) - (7/4 + 7/64) + 15v/8 + * + * while the function below would return : + * sum = sum + 2*v - (sum + 8) * 2 / 8 + * = 3sum/4 + 2v - 2 + * + * this presents an error of ~ (sum/64 + 9/64 + v/8) = (sum+n+1)/(n^s) + v/n + * + * Thus the simplified function effectively replaces a part of the history with + * a linear sum instead of applying the exponential one. But as long as s/n is + * "small enough", the error fades away and remains small for both small and + * large values of n and s (typically < 0.2% measured). This function is + * thread-safe. + */ +static inline unsigned int swrate_add_scaled(unsigned int *sum, unsigned int n, unsigned int v, unsigned int s) +{ + unsigned int new_sum, old_sum; + + old_sum = *sum; + do { + new_sum = old_sum + v * s - div64_32((unsigned long long)old_sum * s + n - 1, n); + } while (!HA_ATOMIC_CAS(sum, &old_sum, new_sum) && __ha_cpu_relax()); + return new_sum; +} + +/* opportunistic versions of the functions above: an attempt is made to update + * the value, but in case of contention, it's not retried. This is fine when + * rough estimates are needed and speed is preferred over accuracy. + */ + +static inline uint swrate_add_opportunistic(uint *sum, uint n, uint v) +{ + uint new_sum, old_sum; + + old_sum = *sum; + new_sum = old_sum - (old_sum + n - 1) / n + v; + HA_ATOMIC_CAS(sum, &old_sum, new_sum); + return new_sum; +} + +static inline uint swrate_add_dynamic_opportunistic(uint *sum, uint n, uint v) +{ + uint new_sum, old_sum; + + old_sum = *sum; + new_sum = old_sum - (n ? (old_sum + n - 1) / n : 0) + v; + HA_ATOMIC_CAS(sum, &old_sum, new_sum); + return new_sum; +} + +static inline uint swrate_add_scaled_opportunistic(uint *sum, uint n, uint v, uint s) +{ + uint new_sum, old_sum; + + old_sum = *sum; + new_sum = old_sum + v * s - div64_32((unsigned long long)old_sum * s + n - 1, n); + HA_ATOMIC_CAS(sum, &old_sum, new_sum); + return new_sum; +} + +/* Returns the average sample value for the sum <sum> over a sliding window of + * <n> samples. Better if <n> is a power of two. It must be the same <n> as the + * one used above in all additions. + */ +static inline unsigned int swrate_avg(unsigned int sum, unsigned int n) +{ + return (sum + n - 1) / n; +} + +#endif /* _HAPROXY_FREQ_CTR_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/frontend.h b/include/haproxy/frontend.h new file mode 100644 index 0000000..8cd1a0a --- /dev/null +++ b/include/haproxy/frontend.h @@ -0,0 +1,38 @@ +/* + * include/haproxy/frontend.h + * This file declares frontend-specific functions. + * + * Copyright (C) 2000-2011 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_FRONTEND_H +#define _HAPROXY_FRONTEND_H + +#include <haproxy/stream-t.h> + +int frontend_accept(struct stream *s); + +int increment_actconn(); + +#endif /* _HAPROXY_FRONTEND_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h new file mode 100644 index 0000000..9b3cd78 --- /dev/null +++ b/include/haproxy/global-t.h @@ -0,0 +1,251 @@ +/* + * include/haproxy/global-t.h + * Global types and macros. Please avoid adding more stuff here! + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_GLOBAL_T_H +#define _HAPROXY_GLOBAL_T_H + +#include <haproxy/api-t.h> +#include <haproxy/buf-t.h> +#include <haproxy/freq_ctr-t.h> + +/* modes of operation (global.mode) */ +#define MODE_DEBUG 0x01 +#define MODE_DAEMON 0x02 +#define MODE_QUIET 0x04 +#define MODE_CHECK 0x08 +#define MODE_VERBOSE 0x10 +#define MODE_STARTING 0x20 +#define MODE_FOREGROUND 0x40 +#define MODE_MWORKER 0x80 /* Master Worker */ +#define MODE_MWORKER_WAIT 0x100 /* Master Worker wait mode */ +#define MODE_ZERO_WARNING 0x200 /* warnings cause a failure */ +#define MODE_DIAG 0x400 /* extra warnings */ +#define MODE_CHECK_CONDITION 0x800 /* -cc mode */ +#define MODE_STOPPING 0x1000 /* the process is in the deinit phase, the event loop is not running anymore. */ +#define MODE_DUMP_LIBS 0x2000 /* dump loaded libraries at the end of init phase */ +#define MODE_DUMP_KWD 0x4000 /* dump registered keywords (see kwd_dump for the list) */ +#define MODE_DUMP_CFG 0x8000 /* dump the configuration file */ +#define MODE_DUMP_NB_L 0x10000 /* dump line numbers when the configuration file is dump */ + +/* list of last checks to perform, depending on config options */ +#define LSTCHK_CAP_BIND 0x00000001 /* check that we can bind to any port */ +#define LSTCHK_NETADM 0x00000002 /* check that we have CAP_NET_ADMIN */ + +/* Global tuning options */ +/* available polling mechanisms */ +#define GTUNE_USE_SELECT (1<<0) +#define GTUNE_USE_POLL (1<<1) +#define GTUNE_USE_EPOLL (1<<2) +#define GTUNE_USE_KQUEUE (1<<3) +/* platform-specific options */ +#define GTUNE_USE_SPLICE (1<<4) +#define GTUNE_USE_GAI (1<<5) +#define GTUNE_LIMITED_QUIC (1<<6) +#define GTUNE_RESOLVE_DONTFAIL (1<<7) + +#define GTUNE_SOCKET_TRANSFER (1<<8) +#define GTUNE_NOEXIT_ONFAILURE (1<<9) +#define GTUNE_USE_SYSTEMD (1<<10) + +#define GTUNE_BUSY_POLLING (1<<11) +/* (1<<12) unused */ +#define GTUNE_SET_DUMPABLE (1<<13) +#define GTUNE_USE_EVPORTS (1<<14) +#define GTUNE_STRICT_LIMITS (1<<15) +#define GTUNE_INSECURE_FORK (1<<16) +#define GTUNE_INSECURE_SETUID (1<<17) +#define GTUNE_FD_ET (1<<18) +#define GTUNE_SCHED_LOW_LATENCY (1<<19) +#define GTUNE_IDLE_POOL_SHARED (1<<20) +#define GTUNE_DISABLE_H2_WEBSOCKET (1<<21) +#define GTUNE_DISABLE_ACTIVE_CLOSE (1<<22) +#define GTUNE_QUICK_EXIT (1<<23) +#define GTUNE_QUIC_SOCK_PER_CONN (1<<24) +#define GTUNE_NO_QUIC (1<<25) +#define GTUNE_USE_FAST_FWD (1<<26) +#define GTUNE_LISTENER_MQ_FAIR (1<<27) +#define GTUNE_LISTENER_MQ_OPT (1<<28) +#define GTUNE_LISTENER_MQ_ANY (GTUNE_LISTENER_MQ_FAIR | GTUNE_LISTENER_MQ_OPT) + +#define NO_ZERO_COPY_FWD 0x0001 /* Globally disable zero-copy FF */ +#define NO_ZERO_COPY_FWD_PT 0x0002 /* disable zero-copy FF for PT (recv & send are disabled automatically) */ +#define NO_ZERO_COPY_FWD_H1_RCV 0x0004 /* disable zero-copy FF for H1 on received */ +#define NO_ZERO_COPY_FWD_H1_SND 0x0008 /* disable zero-copy FF for H1 on send */ +#define NO_ZERO_COPY_FWD_H2_RCV 0x0010 /* disable zero-copy FF for H2 on received */ +#define NO_ZERO_COPY_FWD_H2_SND 0x0020 /* disable zero-copy FF for H2 on send */ +#define NO_ZERO_COPY_FWD_QUIC_RCV 0x0040 /* disable zero-copy FF for QUIC on received */ +#define NO_ZERO_COPY_FWD_QUIC_SND 0x0080 /* disable zero-copy FF for QUIC on send */ +#define NO_ZERO_COPY_FWD_FCGI_RCV 0x0100 /* disable zero-copy FF for FCGI on received */ +#define NO_ZERO_COPY_FWD_FCGI_SND 0x0200 /* disable zero-copy FF for FCGI on send */ + + +extern int cluster_secret_isset; /* non zero means a cluster secret was initialized */ + +/* SSL server verify mode */ +enum { + SSL_SERVER_VERIFY_NONE = 0, + SSL_SERVER_VERIFY_REQUIRED = 1, +}; + +/* bit values to go with "warned" above */ +#define WARN_ANY 0x00000001 /* any warning was emitted */ +#define WARN_FORCECLOSE_DEPRECATED 0x00000002 +#define WARN_EXEC_PATH 0x00000004 /* executable path already reported */ + +/* put there the forward declarations needed for global.h */ +struct proxy; + +/* FIXME : this will have to be redefined correctly */ +struct global { + int uid; + int gid; + int external_check; /* 0=disabled, 1=enabled, 2=enabled with env */ + int nbthread; + int mode; + unsigned int hard_stop_after; /* maximum time allowed to perform a soft-stop */ + unsigned int grace_delay; /* grace delay between SIGUSR1 and soft-stop */ + unsigned int close_spread_time; /* time window during which connection closing is spread */ + unsigned int close_spread_end; /* end of close spread window */ + int maxconn, hardmaxconn; + int maxsslconn; + int ssl_session_max_cost; /* how many bytes an SSL session may cost */ + int ssl_handshake_max_cost; /* how many bytes an SSL handshake may use */ + int ssl_used_frontend; /* non-zero if SSL is used in a frontend */ + int ssl_used_backend; /* non-zero if SSL is used in a backend */ + int ssl_used_async_engines; /* number of used async engines */ + unsigned int ssl_server_verify; /* default verify mode on servers side */ + int comp_rate_lim; /* HTTP compression rate limit */ + int maxpipes; /* max # of pipes */ + int maxsock; /* max # of sockets */ + int rlimit_nofile; /* default ulimit-n value : 0=unset */ + int rlimit_memmax_all; /* default all-process memory limit in megs ; 0=unset */ + int rlimit_memmax; /* default per-process memory limit in megs ; 0=unset */ + long maxzlibmem; /* max RAM for zlib in bytes */ + int nbtgroups; /* number of thread groups (IDs start at 1) */ + int spread_checks; + int max_spread_checks; + int max_syslog_len; + char *chroot; + char *pidfile; + char *node, *desc; /* node name & description */ + int localpeer_cmdline; /* whether or not the commandline "-L" was set */ + int fd_hard_limit; /* hard limit on ulimit-n : 0=unset */ + struct buffer log_tag; /* name for syslog */ + struct list loggers; /* one per 'log' directive */ + char *log_send_hostname; /* set hostname in syslog header */ + char *server_state_base; /* path to a directory where server state files can be found */ + char *server_state_file; /* path to the file where server states are loaded from */ + unsigned char cluster_secret[16]; /* 128 bits of an SHA1 digest of a secret defined as ASCII string */ + struct { + int maxpollevents; /* max number of poll events at once */ + int maxaccept; /* max number of consecutive accept() */ + int options; /* various tuning options */ + int runqueue_depth;/* max number of tasks to run at once */ + int recv_enough; /* how many input bytes at once are "enough" */ + int bufsize; /* buffer size in bytes, defaults to BUFSIZE */ + int maxrewrite; /* buffer max rewrite size in bytes, defaults to MAXREWRITE */ + int reserved_bufs; /* how many buffers can only be allocated for response */ + int buf_limit; /* if not null, how many total buffers may only be allocated */ + int client_sndbuf; /* set client sndbuf to this value if not null */ + int client_rcvbuf; /* set client rcvbuf to this value if not null */ + int server_sndbuf; /* set server sndbuf to this value if not null */ + int server_rcvbuf; /* set server rcvbuf to this value if not null */ + int frontend_sndbuf; /* set frontend dgram sndbuf to this value if not null */ + int frontend_rcvbuf; /* set frontend dgram rcvbuf to this value if not null */ + int backend_sndbuf; /* set backend dgram sndbuf to this value if not null */ + int backend_rcvbuf; /* set backend dgram rcvbuf to this value if not null */ + int pipesize; /* pipe size in bytes, system defaults if zero */ + int max_http_hdr; /* max number of HTTP headers, use MAX_HTTP_HDR if zero */ + int requri_len; /* max len of request URI, use REQURI_LEN if zero */ + int cookie_len; /* max length of cookie captures */ + int pattern_cache; /* max number of entries in the pattern cache. */ + int sslcachesize; /* SSL cache size in session, defaults to 20000 */ + int comp_maxlevel; /* max HTTP compression level */ + int pool_low_ratio; /* max ratio of FDs used before we stop using new idle connections */ + int pool_high_ratio; /* max ratio of FDs used before we start killing idle connections when creating new connections */ + int pool_low_count; /* max number of opened fd before we stop using new idle connections */ + int pool_high_count; /* max number of opened fd before we start killing idle connections when creating new connections */ + size_t pool_cache_size; /* per-thread cache size per pool (defaults to CONFIG_HAP_POOL_CACHE_SIZE) */ + unsigned short idle_timer; /* how long before an empty buffer is considered idle (ms) */ + unsigned short no_zero_copy_fwd; /* Flags to disable zero-copy fast-forwarding (global & per-protocols) */ + int nb_stk_ctr; /* number of stick counters, defaults to MAX_SESS_STKCTR */ + int default_shards; /* default shards for listeners, or -1 (by-thread) or -2 (by-group) */ + uint max_checks_per_thread; /* if >0, no more than this concurrent checks per thread */ +#ifdef USE_QUIC + unsigned int quic_backend_max_idle_timeout; + unsigned int quic_frontend_max_idle_timeout; + unsigned int quic_frontend_max_streams_bidi; + unsigned int quic_retry_threshold; + unsigned int quic_reorder_ratio; + unsigned int quic_streams_buf; + unsigned int quic_max_frame_loss; +#endif /* USE_QUIC */ + } tune; + struct { + char *prefix; /* path prefix of unix bind socket */ + struct { /* UNIX socket permissions */ + uid_t uid; /* -1 to leave unchanged */ + gid_t gid; /* -1 to leave unchanged */ + mode_t mode; /* 0 to leave unchanged */ + } ux; + } unix_bind; + struct proxy *cli_fe; /* the frontend holding the stats settings */ + int numa_cpu_mapping; + int prealloc_fd; + int cfg_curr_line; /* line number currently being parsed */ + const char *cfg_curr_file; /* config file currently being parsed or NULL */ + char *cfg_curr_section; /* config section name currently being parsed or NULL */ + + /* The info above is config stuff, it doesn't change during the process' life */ + /* A number of the elements below are updated by all threads in real time and + * suffer high contention, so we need to put them in their own cache lines, if + * possible grouped by changes. + */ + ALWAYS_ALIGN(64); + struct freq_ctr conn_per_sec; + struct freq_ctr sess_per_sec; + struct freq_ctr ssl_per_sec; + struct freq_ctr ssl_fe_keys_per_sec; + struct freq_ctr ssl_be_keys_per_sec; + struct freq_ctr comp_bps_in; /* bytes per second, before http compression */ + struct freq_ctr comp_bps_out; /* bytes per second, after http compression */ + uint sslconns, totalsslconns; /* active, total # of SSL conns */ + int cps_lim, cps_max; + int sps_lim, sps_max; + int ssl_lim, ssl_max; + int ssl_fe_keys_max, ssl_be_keys_max; + unsigned int shctx_lookups, shctx_misses; + unsigned int req_count; /* request counter (HTTP or TCP session) for logs and unique_id */ + int last_checks; + uint32_t anon_key; + + /* leave this at the end to make sure we don't share this cache line by accident */ + ALWAYS_ALIGN(64); +}; + +#endif /* _HAPROXY_GLOBAL_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/global.h b/include/haproxy/global.h new file mode 100644 index 0000000..2e7fa6b --- /dev/null +++ b/include/haproxy/global.h @@ -0,0 +1,98 @@ +/* + * include/haproxy/global.h + * Exported global variables and functions. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_GLOBAL_H +#define _HAPROXY_GLOBAL_H + +#include <haproxy/api-t.h> +#include <haproxy/global-t.h> + +extern char *build_features; +extern struct global global; +extern int pid; /* current process id */ +extern int actconn; /* # of active sessions */ +extern int listeners; +extern int jobs; /* # of active jobs (listeners, sessions, open devices) */ +extern int unstoppable_jobs; /* # of active jobs that can't be stopped during a soft stop */ +extern int active_peers; /* # of active peers (connection attempts and successes) */ +extern int connected_peers; /* # of really connected peers */ +extern int nb_oldpids; /* contains the number of old pids found */ +extern const int zero; +extern const int one; +extern const struct linger nolinger; +extern int stopping; /* non zero means stopping in progress */ +extern int killed; /* >0 means a hard-stop is triggered, >1 means hard-stop immediately */ +extern char hostname[MAX_HOSTNAME_LEN]; +extern char *localpeer; +extern unsigned int warned; /* bitfield of a few warnings to emit just once */ +extern struct list proc_list; /* list of process in mworker mode */ +extern int master; /* 1 if in master, 0 otherwise */ +extern unsigned int rlim_fd_cur_at_boot; +extern unsigned int rlim_fd_max_at_boot; +extern int atexit_flag; +extern unsigned char boot_seed[20]; // per-boot random seed (160 bits initially) +extern THREAD_LOCAL struct buffer trash; + +struct proxy; +struct server; +int main(int argc, char **argv); +void deinit(void); +__attribute__((noreturn)) void deinit_and_exit(int); +void run_poll_loop(void); +int tell_old_pids(int sig); +int delete_oldpid(int pid); +void hap_register_build_opts(const char *str, int must_free); +void hap_register_feature(const char *name); +int split_version(const char *version, unsigned int *value); +int compare_current_version(const char *version); +void display_version(); + +void mworker_accept_wrapper(int fd); +void mworker_reload(int hardreload); + +/* to be used with warned and WARN_* */ +static inline int already_warned(unsigned int warning) +{ + if (warned & warning) + return 1; + warned |= warning; + return 0; +} + +extern unsigned int experimental_directives_allowed; + +struct cfg_keyword; +int check_kw_experimental(struct cfg_keyword *kw, const char *file, int linenum, + char **errmsg); +const char **hap_get_next_build_opt(const char **curr); + +/* simplified way to declare static build options in a file */ +#define REGISTER_BUILD_OPTS(str) \ + INITCALL2(STG_REGISTER, hap_register_build_opts, (str), 0) + +#endif /* _HAPROXY_GLOBAL_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/h1.h b/include/haproxy/h1.h new file mode 100644 index 0000000..7152c6e --- /dev/null +++ b/include/haproxy/h1.h @@ -0,0 +1,377 @@ +/* + * include/haproxy/h1.h + * This file contains HTTP/1 protocol definitions. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_H1_H +#define _HAPROXY_H1_H + +#include <import/ist.h> +#include <haproxy/api.h> +#include <haproxy/buf.h> +#include <haproxy/http.h> +#include <haproxy/http-hdr-t.h> +#include <haproxy/intops.h> + + +/* Possible states while parsing HTTP/1 messages (request|response) */ +enum h1m_state { + H1_MSG_RQBEFORE = 0, // request: leading LF, before start line + H1_MSG_RQBEFORE_CR = 1, // request: leading CRLF, before start line + /* these ones define a request start line */ + H1_MSG_RQMETH = 2, // parsing the Method + H1_MSG_RQMETH_SP = 3, // space(s) after the Method + H1_MSG_RQURI = 4, // parsing the Request URI + H1_MSG_RQURI_SP = 5, // space(s) after the Request URI + H1_MSG_RQVER = 6, // parsing the Request Version + H1_MSG_RQLINE_END = 7, // end of request line (CR or LF) + + H1_MSG_RPBEFORE = 8, // response: leading LF, before start line + H1_MSG_RPBEFORE_CR = 9, // response: leading CRLF, before start line + + /* these ones define a response start line */ + H1_MSG_RPVER = 10, // parsing the Response Version + H1_MSG_RPVER_SP = 11, // space(s) after the Response Version + H1_MSG_RPCODE = 12, // response code + H1_MSG_RPCODE_SP = 13, // space(s) after the response code + H1_MSG_RPREASON = 14, // response reason + H1_MSG_RPLINE_END = 15, // end of response line (CR or LF) + + /* common header processing */ + H1_MSG_HDR_FIRST = 16, // waiting for first header or last CRLF (no LWS possible) + H1_MSG_HDR_NAME = 17, // parsing header name + H1_MSG_HDR_COL = 18, // parsing header colon + H1_MSG_HDR_L1_SP = 19, // parsing header LWS (SP|HT) before value + H1_MSG_HDR_L1_LF = 20, // parsing header LWS (LF) before value + H1_MSG_HDR_L1_LWS = 21, // checking whether it's a new header or an LWS + H1_MSG_HDR_VAL = 22, // parsing header value + H1_MSG_HDR_L2_LF = 23, // parsing header LWS (LF) inside/after value + H1_MSG_HDR_L2_LWS = 24, // checking whether it's a new header or an LWS + + H1_MSG_LAST_LF = 25, // parsing last LF, last state for headers + + /* Body processing. */ + + H1_MSG_CHUNK_SIZE = 26, // parsing the chunk size (RFC7230 #4.1) + H1_MSG_DATA = 27, // skipping data chunk / content-length data + H1_MSG_CHUNK_CRLF = 28, // skipping CRLF after data chunk + H1_MSG_TRAILERS = 29, // trailers (post-data entity headers) + /* we enter this state when we've received the end of the current message */ + H1_MSG_DONE = 30, // message end received, waiting for resync or close + H1_MSG_TUNNEL = 31, // tunneled data after DONE +} __attribute__((packed)); + + +/* HTTP/1 message flags (32 bit), for use in h1m->flags only */ +#define H1_MF_NONE 0x00000000 +#define H1_MF_CLEN 0x00000001 // content-length present +#define H1_MF_CHNK 0x00000002 // chunk present (as last encoding), exclusive with c-l +#define H1_MF_RESP 0x00000004 // this message is the response message +#define H1_MF_TOLOWER 0x00000008 // turn the header names to lower case +#define H1_MF_VER_11 0x00000010 // message indicates version 1.1 or above +#define H1_MF_CONN_CLO 0x00000020 // message contains "connection: close" +#define H1_MF_CONN_KAL 0x00000040 // message contains "connection: keep-alive" +#define H1_MF_CONN_UPG 0x00000080 // message contains "connection: upgrade" +#define H1_MF_XFER_LEN 0x00000100 // message xfer size can be determined +#define H1_MF_XFER_ENC 0x00000200 // transfer-encoding is present +#define H1_MF_NO_PHDR 0x00000400 // don't add pseudo-headers in the header list +#define H1_MF_HDRS_ONLY 0x00000800 // parse headers only +#define H1_MF_CLEAN_CONN_HDR 0x00001000 // skip close/keep-alive values of connection headers during parsing +#define H1_MF_METH_CONNECT 0x00002000 // Set for a response to a CONNECT request +#define H1_MF_METH_HEAD 0x00004000 // Set for a response to a HEAD request +#define H1_MF_UPG_WEBSOCKET 0x00008000 // Set for a Websocket upgrade handshake +#define H1_MF_TE_CHUNKED 0x00010000 // T-E "chunked" +#define H1_MF_TE_OTHER 0x00020000 // T-E other than supported ones found (only "chunked" is supported for now) + +/* Mask to use to reset H1M flags when we restart headers parsing. + * + * WARNING: Don't forget to update it if a new flag must be preserved when + * headers parsing is restarted. + */ +#define H1_MF_RESTART_MASK (H1_MF_RESP|H1_MF_TOLOWER|H1_MF_NO_PHDR|H1_MF_HDRS_ONLY| \ + H1_MF_CLEAN_CONN_HDR|H1_MF_METH_CONNECT|H1_MF_METH_HEAD) + +/* Note: for a connection to be persistent, we need this for the request : + * - one of CLEN or CHNK + * - version 1.0 and KAL and not CLO + * - or version 1.1 and not CLO + * For the response it's the same except that UPG must not appear either. + * So in short, for a request it's (CLEN|CHNK) > 0 && !CLO && (VER_11 || KAL) + * and for a response it's (CLEN|CHNK) > 0 && !(CLO|UPG) && (VER_11 || KAL) + */ + + +/* basic HTTP/1 message state for use in parsers. The err_pos field is special, + * it is pre-set to a negative value (-1 or -2), and once non-negative it contains + * the relative position in the message of the first parse error. -2 is used to tell + * the parser that we want to block the invalid message. -1 is used to only perform + * a silent capture. + */ +struct h1m { + enum h1m_state state; // H1 message state (H1_MSG_*) + /* 24 bits available here */ + uint32_t flags; // H1 message flags (H1_MF_*) + uint64_t curr_len; // content-length or last chunk length + uint64_t body_len; // total known size of the body length + uint32_t next; // next byte to parse, relative to buffer's head + int err_pos; // position in the byte stream of the first error (H1 or H2) + int err_state; // state where the first error was met (H1 or H2) +}; + +/* basic H1 start line, describes either the request and the response */ +union h1_sl { /* useful start line pointers, relative to ->sol */ + struct { + struct ist m; /* METHOD */ + struct ist u; /* URI */ + struct ist v; /* VERSION */ + enum http_meth_t meth; /* method */ + } rq; /* request line : field, length */ + struct { + struct ist v; /* VERSION */ + struct ist c; /* CODE */ + struct ist r; /* REASON */ + uint16_t status; /* status code */ + } st; /* status line : field, length */ +}; + +int h1_headers_to_hdr_list(char *start, const char *stop, + struct http_hdr *hdr, unsigned int hdr_num, + struct h1m *h1m, union h1_sl *slp); +int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max); + +int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value); +int h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value); +void h1_parse_connection_header(struct h1m *h1m, struct ist *value); +void h1_parse_upgrade_header(struct h1m *h1m, struct ist value); + +void h1_generate_random_ws_input_key(char key_out[25]); +void h1_calculate_ws_output_key(const char *key, char *result); + +/* for debugging, reports the HTTP/1 message state name */ +static inline const char *h1m_state_str(enum h1m_state msg_state) +{ + switch (msg_state) { + case H1_MSG_RQBEFORE: return "MSG_RQBEFORE"; + case H1_MSG_RQBEFORE_CR: return "MSG_RQBEFORE_CR"; + case H1_MSG_RQMETH: return "MSG_RQMETH"; + case H1_MSG_RQMETH_SP: return "MSG_RQMETH_SP"; + case H1_MSG_RQURI: return "MSG_RQURI"; + case H1_MSG_RQURI_SP: return "MSG_RQURI_SP"; + case H1_MSG_RQVER: return "MSG_RQVER"; + case H1_MSG_RQLINE_END: return "MSG_RQLINE_END"; + case H1_MSG_RPBEFORE: return "MSG_RPBEFORE"; + case H1_MSG_RPBEFORE_CR: return "MSG_RPBEFORE_CR"; + case H1_MSG_RPVER: return "MSG_RPVER"; + case H1_MSG_RPVER_SP: return "MSG_RPVER_SP"; + case H1_MSG_RPCODE: return "MSG_RPCODE"; + case H1_MSG_RPCODE_SP: return "MSG_RPCODE_SP"; + case H1_MSG_RPREASON: return "MSG_RPREASON"; + case H1_MSG_RPLINE_END: return "MSG_RPLINE_END"; + case H1_MSG_HDR_FIRST: return "MSG_HDR_FIRST"; + case H1_MSG_HDR_NAME: return "MSG_HDR_NAME"; + case H1_MSG_HDR_COL: return "MSG_HDR_COL"; + case H1_MSG_HDR_L1_SP: return "MSG_HDR_L1_SP"; + case H1_MSG_HDR_L1_LF: return "MSG_HDR_L1_LF"; + case H1_MSG_HDR_L1_LWS: return "MSG_HDR_L1_LWS"; + case H1_MSG_HDR_VAL: return "MSG_HDR_VAL"; + case H1_MSG_HDR_L2_LF: return "MSG_HDR_L2_LF"; + case H1_MSG_HDR_L2_LWS: return "MSG_HDR_L2_LWS"; + case H1_MSG_LAST_LF: return "MSG_LAST_LF"; + case H1_MSG_CHUNK_SIZE: return "MSG_CHUNK_SIZE"; + case H1_MSG_DATA: return "MSG_DATA"; + case H1_MSG_CHUNK_CRLF: return "MSG_CHUNK_CRLF"; + case H1_MSG_TRAILERS: return "MSG_TRAILERS"; + case H1_MSG_DONE: return "MSG_DONE"; + case H1_MSG_TUNNEL: return "MSG_TUNNEL"; + default: return "MSG_??????"; + } +} + +/* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF + * at the end of a chunk. The caller should adjust msg->next + * in order to include this part into the next forwarding phase. Note that the + * caller must ensure that head+start points to the first byte to parse. It + * returns the number of bytes parsed on success, so the caller can set msg_state + * to HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not + * change anything and returns zero. Otherwise it returns a negative value + * indicating the error position relative to <stop>. Note: this function is + * designed to parse wrapped CRLF at the end of the buffer. + */ +static inline int h1_skip_chunk_crlf(const struct buffer *buf, int start, int stop) +{ + const char *ptr = b_peek(buf, start); + int bytes = 1; + + if (stop <= start) + return 0; + + if (unlikely(*ptr != '\r')) // negative position to stop + return ptr - __b_peek(buf, stop); + + /* NB: we'll check data availability at the end. It's not a + * problem because whatever we match first will be checked + * against the correct length. + */ + bytes++; + ptr++; + if (ptr >= b_wrap(buf)) + ptr = b_orig(buf); + + if (bytes > stop - start) + return 0; + + if (*ptr != '\n') // negative position to stop + return ptr - __b_peek(buf, stop); + + return bytes; +} + +/* Parse the chunk size start at buf + start and stops before buf + stop. The + * positions are relative to the buffer's head. + * It returns the chunk size in <res> and the amount of bytes read this way : + * < 0 : error at this position relative to <stop> + * = 0 : not enough bytes to read a complete chunk size + * > 0 : number of bytes successfully read that the caller can skip + * On success, the caller should adjust its msg->next to point to the first + * byte of data after the chunk size, so that we know we can forward exactly + * msg->next bytes, and msg->sol to contain the exact number of bytes forming + * the chunk size. That way it is always possible to differentiate between the + * start of the body and the start of the data. Note: this function is designed + * to parse wrapped CRLF at the end of the buffer. + */ +static inline int h1_parse_chunk_size(const struct buffer *buf, int start, int stop, uint64_t *res) +{ + const char *ptr = b_peek(buf, start); + const char *ptr_old = ptr; + const char *end = b_wrap(buf); + uint64_t chunk = 0; + + stop -= start; // bytes left + start = stop; // bytes to transfer + + /* The chunk size is in the following form, though we are only + * interested in the size and CRLF : + * 1*HEXDIGIT *WSP *[ ';' extensions ] CRLF + */ + while (1) { + int c; + if (!stop) + return 0; + c = hex2i(*ptr); + if (c < 0) /* not a hex digit anymore */ + break; + if (unlikely(++ptr >= end)) + ptr = b_orig(buf); + chunk = (chunk << 4) + c; + if (unlikely(chunk & 0xF0000000000000ULL)) { + /* Don't get more than 13 hexa-digit (2^52 - 1) to never fed possibly + * bogus values from languages that use floats for their integers + */ + goto error; + } + stop--; + } + + /* empty size not allowed */ + if (unlikely(ptr == ptr_old)) + goto error; + + while (HTTP_IS_SPHT(*ptr)) { + if (++ptr >= end) + ptr = b_orig(buf); + if (--stop == 0) + return 0; + } + + /* Up to there, we know that at least one byte is present at *ptr. Check + * for the end of chunk size. + */ + while (1) { + if (likely(*ptr == '\r')) { + /* we now have a CR, it must be followed by a LF */ + if (++ptr >= end) + ptr = b_orig(buf); + if (--stop == 0) + return 0; + + if (*ptr != '\n') + goto error; + if (++ptr >= end) + ptr = b_orig(buf); + --stop; + /* done */ + break; + } + else if (likely(*ptr == ';')) { + /* chunk extension, ends at next CRLF */ + if (++ptr >= end) + ptr = b_orig(buf); + if (--stop == 0) + return 0; + + while (!HTTP_IS_CRLF(*ptr)) { + if (++ptr >= end) + ptr = b_orig(buf); + if (--stop == 0) + return 0; + } + /* we have a CRLF now, loop above */ + continue; + } + else + goto error; + } + + /* OK we found our CRLF and now <ptr> points to the next byte, which may + * or may not be present. Let's return the number of bytes parsed. + */ + *res = chunk; + return start - stop; + error: + *res = 0; // just to stop gcc's -Wuninitialized warning :-( + return -stop; +} + +/* initializes an H1 message for a request */ +static inline struct h1m *h1m_init_req(struct h1m *h1m) +{ + h1m->state = H1_MSG_RQBEFORE; + h1m->next = 0; + h1m->flags = H1_MF_NONE; + h1m->curr_len = 0; + h1m->body_len = 0; + h1m->err_pos = -2; + h1m->err_state = 0; + return h1m; +} + +/* initializes an H1 message for a response */ +static inline struct h1m *h1m_init_res(struct h1m *h1m) +{ + h1m->state = H1_MSG_RPBEFORE; + h1m->next = 0; + h1m->flags = H1_MF_RESP; + h1m->curr_len = 0; + h1m->body_len = 0; + h1m->err_pos = -2; + h1m->err_state = 0; + return h1m; +} + +#endif /* _HAPROXY_H1_H */ diff --git a/include/haproxy/h1_htx.h b/include/haproxy/h1_htx.h new file mode 100644 index 0000000..61b96e0 --- /dev/null +++ b/include/haproxy/h1_htx.h @@ -0,0 +1,76 @@ +/* + * include/haproxy/h1_htx.h + * This file defines function prototypes for H1 manipulation using the + * internal representation. + * + * Copyright (C) 2019 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_H1_HTX_H +#define _HAPROXY_H1_HTX_H + +#include <import/ist.h> +#include <haproxy/api-t.h> +#include <haproxy/buf-t.h> +#include <haproxy/h1.h> +#include <haproxy/htx.h> + +int h1_parse_msg_hdrs(struct h1m *h1m, union h1_sl *h1sl, struct htx *dsthtx, + struct buffer *srcbuf, size_t ofs, size_t max); +size_t h1_parse_msg_data(struct h1m *h1m, struct htx **dsthtx, + struct buffer *srcbuf, size_t ofs, size_t max, + struct buffer *htxbuf); +int h1_parse_msg_tlrs(struct h1m *h1m, struct htx *dsthtx, + struct buffer *srcbuf, size_t ofs, size_t max); + +/* Returns the URI of an HTX message in the most common format for a H1 peer. It + * is the path part of an absolute URI when the URI was normalized, ortherwise + * it is the whole URI, as received. Concretely, it is only a special case for + * URIs received from H2 clients, to be able to send a relative path the H1 + * servers. + */ +static inline struct ist h1_get_uri(const struct htx_sl *sl) +{ + struct ist uri; + + uri = htx_sl_req_uri(sl); + if (sl->flags & HTX_SL_F_NORMALIZED_URI) { + struct http_uri_parser parser = http_uri_parser_init(uri); + uri = http_parse_path(&parser); + if (unlikely(!uri.len)) { + if (sl->info.req.meth == HTTP_METH_OPTIONS) + uri = ist("*"); + else + uri = ist("/"); + } + } + return uri; +} + +int h1_format_htx_reqline(const struct htx_sl *sl, struct buffer *chk); +int h1_format_htx_stline(const struct htx_sl *sl, struct buffer *chk); +int h1_format_htx_hdr(const struct ist n, const struct ist v, struct buffer *chk); +int h1_format_htx_data(const struct ist data, struct buffer *chk, int chunked); + +#endif /* _HAPROXY_H1_HTX_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/h2.h b/include/haproxy/h2.h new file mode 100644 index 0000000..4082b38 --- /dev/null +++ b/include/haproxy/h2.h @@ -0,0 +1,351 @@ +/* + * include/haproxy/h2.h + * This file contains types and macros used for the HTTP/2 protocol + * + * Copyright (C) 2000-2017 Willy Tarreau - w@1wt.eu + * Copyright (C) 2017 HAProxy Technologies + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _HAPROXY_H2_H +#define _HAPROXY_H2_H + +#include <import/ist.h> +#include <haproxy/api.h> +#include <haproxy/http-hdr-t.h> +#include <haproxy/htx-t.h> + +/* indexes of most important pseudo headers can be simplified to an almost + * linear array by dividing the index by 2 for all values from 1 to 9, and + * caping to 4 for values up to 14 ; thus it fits in a single 24-bit array + * shifted by 3 times the index value/2, or a 32-bit array shifted by 4x. + * Don't change these values, they are assumed by hpack_idx_to_phdr(). There + * is an entry for the Host header field which is not a pseudo-header but + * needs to be tracked as we should only use :authority if it's absent. + */ +enum { + H2_PHDR_IDX_NONE = 0, + H2_PHDR_IDX_AUTH = 1, /* :authority = 1 */ + H2_PHDR_IDX_METH = 2, /* :method = 2..3 */ + H2_PHDR_IDX_PATH = 3, /* :path = 4..5 */ + H2_PHDR_IDX_SCHM = 4, /* :scheme = 6..7 */ + H2_PHDR_IDX_STAT = 5, /* :status = 8..14 */ + H2_PHDR_IDX_HOST = 6, /* Host, never returned, just a place-holder */ + H2_PHDR_IDX_PROT = 7, /* :protocol from rfc 8441 Extended Connect */ + H2_PHDR_NUM_ENTRIES /* must be last */ +}; + +/* bit fields indicating the pseudo-headers found. It also covers the HOST + * header field as well as any non-pseudo-header field (NONE). + */ +enum { + H2_PHDR_FND_NONE = 1 << H2_PHDR_IDX_NONE, /* found a regular header */ + H2_PHDR_FND_AUTH = 1 << H2_PHDR_IDX_AUTH, + H2_PHDR_FND_METH = 1 << H2_PHDR_IDX_METH, + H2_PHDR_FND_PATH = 1 << H2_PHDR_IDX_PATH, + H2_PHDR_FND_SCHM = 1 << H2_PHDR_IDX_SCHM, + H2_PHDR_FND_STAT = 1 << H2_PHDR_IDX_STAT, + H2_PHDR_FND_HOST = 1 << H2_PHDR_IDX_HOST, + H2_PHDR_FND_PROT = 1 << H2_PHDR_IDX_PROT, +}; + +/* frame types, from the standard */ +enum h2_ft { + H2_FT_DATA = 0x00, // RFC7540 #6.1 + H2_FT_HEADERS = 0x01, // RFC7540 #6.2 + H2_FT_PRIORITY = 0x02, // RFC7540 #6.3 + H2_FT_RST_STREAM = 0x03, // RFC7540 #6.4 + H2_FT_SETTINGS = 0x04, // RFC7540 #6.5 + H2_FT_PUSH_PROMISE = 0x05, // RFC7540 #6.6 + H2_FT_PING = 0x06, // RFC7540 #6.7 + H2_FT_GOAWAY = 0x07, // RFC7540 #6.8 + H2_FT_WINDOW_UPDATE = 0x08, // RFC7540 #6.9 + H2_FT_CONTINUATION = 0x09, // RFC7540 #6.10 + H2_FT_ENTRIES /* must be last */ +} __attribute__((packed)); + +/* frame types, turned to bits or bit fields */ +enum { + /* one bit per frame type */ + H2_FT_DATA_BIT = 1U << H2_FT_DATA, + H2_FT_HEADERS_BIT = 1U << H2_FT_HEADERS, + H2_FT_PRIORITY_BIT = 1U << H2_FT_PRIORITY, + H2_FT_RST_STREAM_BIT = 1U << H2_FT_RST_STREAM, + H2_FT_SETTINGS_BIT = 1U << H2_FT_SETTINGS, + H2_FT_PUSH_PROMISE_BIT = 1U << H2_FT_PUSH_PROMISE, + H2_FT_PING_BIT = 1U << H2_FT_PING, + H2_FT_GOAWAY_BIT = 1U << H2_FT_GOAWAY, + H2_FT_WINDOW_UPDATE_BIT = 1U << H2_FT_WINDOW_UPDATE, + H2_FT_CONTINUATION_BIT = 1U << H2_FT_CONTINUATION, + /* padded frames */ + H2_FT_PADDED_MASK = H2_FT_DATA_BIT | H2_FT_HEADERS_BIT | H2_FT_PUSH_PROMISE_BIT, + /* flow controlled frames */ + H2_FT_FC_MASK = H2_FT_DATA_BIT, + /* header frames */ + H2_FT_HDR_MASK = H2_FT_HEADERS_BIT | H2_FT_PUSH_PROMISE_BIT | H2_FT_CONTINUATION_BIT, + /* frames allowed to arrive late on a stream */ + H2_FT_LATE_MASK = H2_FT_WINDOW_UPDATE_BIT | H2_FT_RST_STREAM_BIT | H2_FT_PRIORITY_BIT, +}; + + +/* flags defined for each frame type */ + +// RFC7540 #6.1 +#define H2_F_DATA_END_STREAM 0x01 +#define H2_F_DATA_PADDED 0x08 + +// RFC7540 #6.2 +#define H2_F_HEADERS_END_STREAM 0x01 +#define H2_F_HEADERS_END_HEADERS 0x04 +#define H2_F_HEADERS_PADDED 0x08 +#define H2_F_HEADERS_PRIORITY 0x20 + +// RFC7540 #6.3 : PRIORITY defines no flags +// RFC7540 #6.4 : RST_STREAM defines no flags + +// RFC7540 #6.5 +#define H2_F_SETTINGS_ACK 0x01 + +// RFC7540 #6.6 +#define H2_F_PUSH_PROMISE_END_HEADERS 0x04 +#define H2_F_PUSH_PROMISE_PADDED 0x08 + +// RFC7540 #6.7 +#define H2_F_PING_ACK 0x01 + +// RFC7540 #6.8 : GOAWAY defines no flags +// RFC7540 #6.9 : WINDOW_UPDATE defines no flags + +// PADDED is the exact same among DATA, HEADERS and PUSH_PROMISE (8) +#define H2_F_PADDED 0x08 + +/* HTTP/2 error codes - RFC7540 #7 */ +enum h2_err { + H2_ERR_NO_ERROR = 0x0, + H2_ERR_PROTOCOL_ERROR = 0x1, + H2_ERR_INTERNAL_ERROR = 0x2, + H2_ERR_FLOW_CONTROL_ERROR = 0x3, + H2_ERR_SETTINGS_TIMEOUT = 0x4, + H2_ERR_STREAM_CLOSED = 0x5, + H2_ERR_FRAME_SIZE_ERROR = 0x6, + H2_ERR_REFUSED_STREAM = 0x7, + H2_ERR_CANCEL = 0x8, + H2_ERR_COMPRESSION_ERROR = 0x9, + H2_ERR_CONNECT_ERROR = 0xa, + H2_ERR_ENHANCE_YOUR_CALM = 0xb, + H2_ERR_INADEQUATE_SECURITY = 0xc, + H2_ERR_HTTP_1_1_REQUIRED = 0xd, +} __attribute__((packed)); + +// RFC7540 #11.3 : Settings Registry +#define H2_SETTINGS_HEADER_TABLE_SIZE 0x0001 +#define H2_SETTINGS_ENABLE_PUSH 0x0002 +#define H2_SETTINGS_MAX_CONCURRENT_STREAMS 0x0003 +#define H2_SETTINGS_INITIAL_WINDOW_SIZE 0x0004 +#define H2_SETTINGS_MAX_FRAME_SIZE 0x0005 +#define H2_SETTINGS_MAX_HEADER_LIST_SIZE 0x0006 +#define H2_SETTINGS_ENABLE_CONNECT_PROTOCOL 0x0008 + + +/* some protocol constants */ + +// PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n +#define H2_CONN_PREFACE \ + "\x50\x52\x49\x20\x2a\x20\x48\x54" \ + "\x54\x50\x2f\x32\x2e\x30\x0d\x0a" \ + "\x0d\x0a\x53\x4d\x0d\x0a\x0d\x0a" + + +/* some flags related to protocol parsing */ +#define H2_MSGF_BODY 0x0001 // a body is present +#define H2_MSGF_BODY_CL 0x0002 // content-length is present +#define H2_MSGF_BODY_TUNNEL 0x0004 // a tunnel is in use (CONNECT) +#define H2_MSGF_RSP_1XX 0x0010 // a 1xx ( != 101) HEADERS frame was received +#define H2_MSGF_BODYLESS_RSP 0x0020 // response message is known to have no body + // (response to HEAD request or 204/304 response) +#define H2_MSGF_EXT_CONNECT 0x0040 // Extended CONNECT method from rfc 8441 + +#define H2_MAX_STREAM_ID ((1U << 31) - 1) +#define H2_MAX_FRAME_LEN ((1U << 24) - 1) +#define H2_DIR_REQ 1 +#define H2_DIR_RES 2 +#define H2_DIR_BOTH 3 + +/* constraints imposed by the protocol on each frame type, in terms of stream + * ID values, frame sizes, and direction so that most connection-level checks + * can be centralized regardless of the frame's acceptance. + */ +struct h2_frame_definition { + int32_t dir; /* 0=none, 1=request, 2=response, 3=both */ + int32_t min_id; /* minimum allowed stream ID */ + int32_t max_id; /* maximum allowed stream ID */ + int32_t min_len; /* minimum frame length */ + int32_t max_len; /* maximum frame length */ +}; + +extern struct h2_frame_definition h2_frame_definition[H2_FT_ENTRIES]; + +/* various protocol processing functions */ + +int h2_parse_cont_len_header(unsigned int *msgf, struct ist *value, unsigned long long *body_len); +int h2_make_htx_request(struct http_hdr *list, struct htx *htx, unsigned int *msgf, unsigned long long *body_len, int relaxed); +int h2_make_htx_response(struct http_hdr *list, struct htx *htx, unsigned int *msgf, unsigned long long *body_len, char *upgrade_protocol); +int h2_make_htx_trailers(struct http_hdr *list, struct htx *htx); + +/* + * Some helpful debugging functions. + */ + +/* returns a bit corresponding to the frame type */ +static inline unsigned int h2_ft_bit(enum h2_ft ft) +{ + if (ft >= H2_FT_ENTRIES) + return 0; + return 1U << ft; +} + +/* returns the frame type as a string */ +static inline const char *h2_ft_str(int type) +{ + switch (type) { + case H2_FT_DATA : return "DATA"; + case H2_FT_HEADERS : return "HEADERS"; + case H2_FT_PRIORITY : return "PRIORITY"; + case H2_FT_RST_STREAM : return "RST_STREAM"; + case H2_FT_SETTINGS : return "SETTINGS"; + case H2_FT_PUSH_PROMISE : return "PUSH_PROMISE"; + case H2_FT_PING : return "PING"; + case H2_FT_GOAWAY : return "GOAWAY"; + case H2_FT_WINDOW_UPDATE : return "WINDOW_UPDATE"; + default : return "_UNKNOWN_"; + } +} + +/* returns the error code as a string */ +static inline const char *h2_err_str(enum h2_err err) +{ + switch (err) { + case H2_ERR_NO_ERROR : return "NO_ERROR"; + case H2_ERR_PROTOCOL_ERROR : return "PROTOCOL_ERROR"; + case H2_ERR_INTERNAL_ERROR : return "INTERNAL_ERROR"; + case H2_ERR_FLOW_CONTROL_ERROR : return "FLOW_CONTROL_ERROR"; + case H2_ERR_SETTINGS_TIMEOUT : return "SETTINGS_TIMEOUT"; + case H2_ERR_STREAM_CLOSED : return "STREAM_CLOSED"; + case H2_ERR_FRAME_SIZE_ERROR : return "FRAME_SIZE_ERROR"; + case H2_ERR_REFUSED_STREAM : return "REFUSED_STREAM"; + case H2_ERR_CANCEL : return "CANCEL"; + case H2_ERR_COMPRESSION_ERROR : return "COMPRESSION_ERROR"; + case H2_ERR_CONNECT_ERROR : return "CONNECT_ERROR"; + case H2_ERR_ENHANCE_YOUR_CALM : return "ENHANCE_YOUR_CALM"; + case H2_ERR_INADEQUATE_SECURITY : return "INADEQUATE_SECURITY"; + case H2_ERR_HTTP_1_1_REQUIRED : return "HTTP_1_1_REQUIRED"; + default : return "_UNKNOWN_"; + } +} + +/* Returns an error code if the frame is valid protocol-wise, otherwise 0. <ft> + * is the frame type (H2_FT_*), <dir> is the direction (1=req, 2=res), <id> is + * the stream ID from the frame header, <len> is the frame length from the + * header. The purpose is to be able to quickly return a PROTOCOL_ERROR or + * FRAME_SIZE_ERROR connection error even for situations where the frame will + * be ignored. <mfs> must be the max frame size currently in place for the + * protocol. + */ +static inline int h2_frame_check(enum h2_ft ft, int dir, int32_t id, int32_t len, int32_t mfs) +{ + struct h2_frame_definition *fd; + + if (ft >= H2_FT_ENTRIES) + return H2_ERR_NO_ERROR; // ignore unhandled frame types + + fd = &h2_frame_definition[ft]; + + if (!(dir & fd->dir)) + return H2_ERR_PROTOCOL_ERROR; + + if (id < fd->min_id || id > fd->max_id) + return H2_ERR_PROTOCOL_ERROR; + + if (len < fd->min_len || len > fd->max_len) + return H2_ERR_FRAME_SIZE_ERROR; + + if (len > mfs) + return H2_ERR_FRAME_SIZE_ERROR; + + if (ft == H2_FT_SETTINGS && (len % 6) != 0) + return H2_ERR_FRAME_SIZE_ERROR; // RFC7540#6.5 + + return H2_ERR_NO_ERROR; +} + +/* returns the pseudo-header <str> corresponds to among H2_PHDR_IDX_*, 0 if not a + * pseudo-header, or -1 if not a valid pseudo-header. + */ +static inline int h2_str_to_phdr(const struct ist str) +{ + if (*str.ptr == ':') { + if (isteq(str, ist(":path"))) return H2_PHDR_IDX_PATH; + else if (isteq(str, ist(":method"))) return H2_PHDR_IDX_METH; + else if (isteq(str, ist(":scheme"))) return H2_PHDR_IDX_SCHM; + else if (isteq(str, ist(":status"))) return H2_PHDR_IDX_STAT; + else if (isteq(str, ist(":authority"))) return H2_PHDR_IDX_AUTH; + else if (isteq(str, ist(":protocol"))) return H2_PHDR_IDX_PROT; + + /* all other names starting with ':' */ + return -1; + } + + /* not a pseudo header */ + return 0; +} + +/* returns the pseudo-header name <num> as an ist, or ":UNKNOWN" if unknown. + * Note that all strings are zero-terminated constants. + */ +static inline struct ist h2_phdr_to_ist(int phdr) +{ + switch (phdr) { + case H2_PHDR_IDX_NONE: return ist(":NONE"); + case H2_PHDR_IDX_AUTH: return ist(":authority"); + case H2_PHDR_IDX_METH: return ist(":method"); + case H2_PHDR_IDX_PATH: return ist(":path"); + case H2_PHDR_IDX_SCHM: return ist(":scheme"); + case H2_PHDR_IDX_STAT: return ist(":status"); + case H2_PHDR_IDX_HOST: return ist("Host"); + default: return ist(":UNKNOWN"); + } +} + +/* returns the pseudo-header name <num> as a string, or ":UNKNOWN" if unknown */ +static inline const char *h2_phdr_to_str(int phdr) +{ + return h2_phdr_to_ist(phdr).ptr; +} + +#endif /* _HAPROXY_H2_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/h3.h b/include/haproxy/h3.h new file mode 100644 index 0000000..1bedf43 --- /dev/null +++ b/include/haproxy/h3.h @@ -0,0 +1,118 @@ +/* + * include/haproxy/h3.h + * This file contains types for H3 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_H3_T_H +#define _HAPROXY_H3_T_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <haproxy/buf-t.h> +#include <haproxy/mux_quic-t.h> + +/* H3 unidirecational stream types + * Emitted as the first byte on the stream to differentiate it. + */ +#define H3_UNI_S_T_CTRL 0x00 +#define H3_UNI_S_T_PUSH 0x01 +#define H3_UNI_S_T_QPACK_ENC 0x02 +#define H3_UNI_S_T_QPACK_DEC 0x03 +/* Must be the last one */ +#define H3_UNI_S_T_MAX H3_UNI_S_T_QPACK_DEC + +/* Settings */ +#define H3_SETTINGS_RESERVED_0 0x00 +#define H3_SETTINGS_QPACK_MAX_TABLE_CAPACITY 0x01 +/* there is a hole here of reserved settings, matching the h2 settings */ +#define H3_SETTINGS_RESERVED_2 0x02 +#define H3_SETTINGS_RESERVED_3 0x03 +#define H3_SETTINGS_RESERVED_4 0x04 +#define H3_SETTINGS_RESERVED_5 0x05 +#define H3_SETTINGS_MAX_FIELD_SECTION_SIZE 0x06 +#define H3_SETTINGS_QPACK_BLOCKED_STREAMS 0x07 + +/* Errors. */ +enum h3_err { + H3_NO_ERROR = 0x100, + H3_GENERAL_PROTOCOL_ERROR = 0x101, + H3_INTERNAL_ERROR = 0x102, + H3_STREAM_CREATION_ERROR = 0x103, + H3_CLOSED_CRITICAL_STREAM = 0x104, + H3_FRAME_UNEXPECTED = 0x105, + H3_FRAME_ERROR = 0x106, + H3_EXCESSIVE_LOAD = 0x107, + H3_ID_ERROR = 0x108, + H3_SETTINGS_ERROR = 0x109, + H3_MISSING_SETTINGS = 0x10a, + H3_REQUEST_REJECTED = 0x10b, + H3_REQUEST_CANCELLED = 0x10c, + H3_REQUEST_INCOMPLETE = 0x10d, + H3_MESSAGE_ERROR = 0x10e, + H3_CONNECT_ERROR = 0x10f, + H3_VERSION_FALLBACK = 0x110, + + QPACK_DECOMPRESSION_FAILED = 0x200, + QPACK_ENCODER_STREAM_ERROR = 0x201, + QPACK_DECODER_STREAM_ERROR = 0x202, +}; + +/* Frame types. */ +enum h3_ft { + /* internal value used to mark demuxing as inactive */ + H3_FT_UNINIT = -1, + + H3_FT_DATA = 0x00, + H3_FT_HEADERS = 0x01, + /* hole */ + H3_FT_CANCEL_PUSH = 0x03, + H3_FT_SETTINGS = 0x04, + H3_FT_PUSH_PROMISE = 0x05, + /* hole */ + H3_FT_GOAWAY = 0x07, + /* hole */ + H3_FT_MAX_PUSH_ID = 0x0d, +}; + +/* Stream types */ +enum h3s_t { + /* unidirectional streams */ + H3S_T_CTRL, + H3S_T_PUSH, + H3S_T_QPACK_DEC, + H3S_T_QPACK_ENC, + + /* bidirectional streams */ + H3S_T_REQ, + + H3S_T_UNKNOWN +}; + +/* State for request streams */ +enum h3s_st_req { + H3S_ST_REQ_BEFORE = 0, /* initial state */ + H3S_ST_REQ_HEADERS, /* header section received */ + H3S_ST_REQ_DATA, /* first DATA frame for content received */ + H3S_ST_REQ_TRAILERS, /* trailer section received */ +}; + +extern const struct qcc_app_ops h3_ops; + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_H3_T_H */ diff --git a/include/haproxy/h3_stats-t.h b/include/haproxy/h3_stats-t.h new file mode 100644 index 0000000..3c00f6c --- /dev/null +++ b/include/haproxy/h3_stats-t.h @@ -0,0 +1,12 @@ +#ifndef _HAPROXY_H3_STATS_T_H +#define _HAPROXY_H3_STATS_T_H + +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +extern struct stats_module h3_stats_module; + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_H3_STATS_T_H */ diff --git a/include/haproxy/h3_stats.h b/include/haproxy/h3_stats.h new file mode 100644 index 0000000..ed7c5e7 --- /dev/null +++ b/include/haproxy/h3_stats.h @@ -0,0 +1,17 @@ +#ifndef _HAPROXY_H3_STATS_H +#define _HAPROXY_H3_STATS_H + +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <haproxy/h3_stats-t.h> + +struct h3_counters; + +void h3_inc_err_cnt(void *ctx, int error_code); +void h3_inc_frame_type_cnt(struct h3_counters *ctrs, int frm_type); + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_H3_STATS_H */ diff --git a/include/haproxy/hash.h b/include/haproxy/hash.h new file mode 100644 index 0000000..cb506c7 --- /dev/null +++ b/include/haproxy/hash.h @@ -0,0 +1,33 @@ +/* + * include/haproxy/hash.h + * Macros for different hashing function. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HASH_H_ +#define _HAPROXY_HASH_H_ + +#include <inttypes.h> + +unsigned int hash_djb2(const void *input, int len); +unsigned int hash_wt6(const void *input, int len); +unsigned int hash_sdbm(const void *input, int len); +unsigned int hash_crc32(const void *input, int len); +uint32_t hash_crc32c(const void *input, int len); + +#endif /* _HAPROXY_HASH_H_ */ diff --git a/include/haproxy/hlua-t.h b/include/haproxy/hlua-t.h new file mode 100644 index 0000000..2672ffd --- /dev/null +++ b/include/haproxy/hlua-t.h @@ -0,0 +1,243 @@ +/* + * include/haproxy/hlua-t.h + * Lua core types definitions + * + * Copyright (C) 2015-2016 Thierry Fournier <tfournier@arpalert.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HLUA_T_H +#define _HAPROXY_HLUA_T_H + +#ifdef USE_LUA + +#include <lua.h> +#include <lauxlib.h> +#include <stdint.h> + +#include <import/ebtree-t.h> + +#include <haproxy/proxy-t.h> +#include <haproxy/regex-t.h> +#include <haproxy/server-t.h> +#include <haproxy/stick_table-t.h> +#include <haproxy/xref-t.h> +#include <haproxy/event_hdl-t.h> + +#define CLASS_CORE "Core" +#define CLASS_TXN "TXN" +#define CLASS_FETCHES "Fetches" +#define CLASS_CONVERTERS "Converters" +#define CLASS_SOCKET "Socket" +#define CLASS_CHANNEL "Channel" +#define CLASS_HTTP "HTTP" +#define CLASS_HTTP_MSG "HTTPMessage" +#define CLASS_HTTPCLIENT "HTTPClient" +#define CLASS_MAP "Map" +#define CLASS_APPLET_TCP "AppletTCP" +#define CLASS_APPLET_HTTP "AppletHTTP" +#define CLASS_PROXY "Proxy" +#define CLASS_SERVER "Server" +#define CLASS_LISTENER "Listener" +#define CLASS_EVENT_SUB "EventSub" +#define CLASS_REGEX "Regex" +#define CLASS_STKTABLE "StickTable" +#define CLASS_CERTCACHE "CertCache" +#define CLASS_PROXY_LIST "ProxyList" +#define CLASS_SERVER_LIST "ServerList" + +struct stream; + +#define HLUA_RUN 0x00000001 +#define HLUA_CTRLYIELD 0x00000002 +#define HLUA_WAKERESWR 0x00000004 +#define HLUA_WAKEREQWR 0x00000008 +#define HLUA_EXIT 0x00000010 +#define HLUA_NOYIELD 0x00000020 + +#define HLUA_F_AS_STRING 0x01 +#define HLUA_F_MAY_USE_HTTP 0x02 + +/* HLUA TXN flags */ +#define HLUA_TXN_NOTERM 0x00000001 +/* 0x00000002 .. 0x00000008 unused */ + +/* The execution context (enum), bits values from 0x00000010 to + * 0x00000030. These flags are mutually exclusives. Only one must be set at a + * time. + */ +#define HLUA_TXN_SMP_NONE 0x00000000 /* No specific execution context */ +#define HLUA_TXN_SMP_CTX 0x00000010 /* Executed from a sample fecth context */ +#define HLUA_TXN_ACT_CTX 0x00000020 /* Executed from a action context */ +#define HLUA_TXN_FLT_CTX 0x00000030 /* Executed from a filter context */ +#define HLUA_TXN_CTX_MASK 0x00000030 /* Mask to get the execution context */ + + +#define HLUA_CONCAT_BLOCSZ 2048 + +enum hlua_exec { + HLUA_E_OK = 0, + HLUA_E_AGAIN, /* LUA yield, must resume the stack execution later, when + the associatedtask is waked. */ + HLUA_E_ETMOUT, /* Execution timeout */ + HLUA_E_NOMEM, /* Out of memory error */ + HLUA_E_YIELD, /* LUA code try to yield, and this is not allowed */ + HLUA_E_ERRMSG, /* LUA stack execution failed with a string error message + in the top of stack. */ + HLUA_E_ERR, /* LUA stack execution failed without error message. */ +}; + +struct hlua_timer { + uint32_t start; /* cpu time in ms when the timer was started */ + uint32_t burst; /* execution time for the current call in ms */ + uint32_t cumulative; /* cumulative execution time for the coroutine in ms */ + uint32_t max; /* max (cumulative) execution time for the coroutine in ms */ +}; + +struct hlua { + lua_State *T; /* The LUA stack. */ + int state_id; /* contains the lua state id. 0 is common state, 1 to n are per-thread states.*/ + int Tref; /* The reference of the stack in coroutine case. + -1 for the main lua stack. */ + int Mref; /* The reference of the memory context in coroutine case. + -1 if the memory context is not used. */ + int nargs; /* The number of arguments in the stack at the start of execution. */ + unsigned int flags; /* The current execution flags. */ + int wake_time; /* The lua wants to be waked at this time, or before. (ticks) */ + struct hlua_timer timer; /* lua multipurpose timer */ + struct task *task; /* The task associated with the lua stack execution. + We must wake this task to continue the task execution */ + struct list com; /* The list head of the signals attached to this task. */ + struct mt_list hc_list; /* list of httpclient associated to this lua task */ + struct ebpt_node node; + int gc_count; /* number of items which need a GC */ +}; + +/* This is a part of the list containing references to functions + * called at the initialisation time. + */ +struct hlua_init_function { + struct list l; + int function_ref; +}; + +/* This struct contains the lua data used to bind + * Lua function on HAProxy hook like sample-fetches + * or actions. + */ +struct hlua_function { + struct list l; + char *name; + int function_ref[MAX_THREADS + 1]; + int nargs; +}; + +/* This struct is used with the structs: + * - http_req_rule + * - http_res_rule + * - tcp_rule + * It contains the lua execution configuration. + */ +struct hlua_rule { + struct hlua_function *fcn; + char **args; +}; + +/* This struct contains the pointer provided on the most + * of internal HAProxy calls during the processing of + * rules, converters and sample-fetches. This struct is + * associated with the lua object called "TXN". + */ +struct hlua_txn { + struct stream *s; + struct proxy *p; + int dir; /* SMP_OPT_DIR_{REQ,RES} */ + int flags; +}; + +/* This struct contains the applet context. */ +struct hlua_appctx { + struct appctx *appctx; + luaL_Buffer b; /* buffer used to prepare strings. */ + struct hlua_txn htxn; +}; + +/* This struct is used with sample fetches and sample converters. */ +struct hlua_smp { + struct stream *s; + struct proxy *p; + unsigned int flags; /* LUA_F_OPT_* */ + int dir; /* SMP_OPT_DIR_{REQ,RES} */ +}; + +/* This struct contains data used with sleep functions. */ +struct hlua_sleep { + struct task *task; /* task associated with sleep. */ + struct list com; /* list of signal to wake at the end of sleep. */ + unsigned int wakeup_ms; /* hour to wakeup. */ +}; + +/* This struct is used to create coprocess doing TCP or + * SSL I/O. It uses a fake stream. + */ +struct hlua_socket { + struct xref xref; /* cross reference with the stream used for socket I/O. */ + luaL_Buffer b; /* buffer used to prepare strings. */ + unsigned long tid; /* Store the thread id which creates the socket. */ +}; + +struct hlua_concat { + int size; + int len; +}; + +/* This struct is used to store the httpclient */ +struct hlua_httpclient { + struct httpclient *hc; /* ptr to the httpclient instance */ + size_t sent; /* payload sent */ + luaL_Buffer b; /* buffer used to prepare strings. */ + struct mt_list by_hlua; /* linked in the current hlua task */ +}; + +struct hlua_proxy_list { + char capabilities; +}; + +struct hlua_proxy_list_iterator_context { + struct proxy *next; + char capabilities; +}; + +struct hlua_server_list { + struct proxy *px; +}; + +struct hlua_server_list_iterator_context { + struct server *cur; + struct proxy *px; +}; + +#else /* USE_LUA */ +/************************ For use when Lua is disabled ********************/ + +/* Empty struct for compilation compatibility */ +struct hlua { }; +struct hlua_socket { }; +struct hlua_rule { }; + +#endif /* USE_LUA */ + +#endif /* _HAPROXY_HLUA_T_H */ diff --git a/include/haproxy/hlua.h b/include/haproxy/hlua.h new file mode 100644 index 0000000..3c67cce --- /dev/null +++ b/include/haproxy/hlua.h @@ -0,0 +1,81 @@ +/* + * include/haproxy/hlua.h + * Lua core management functions + * + * Copyright (C) 2015-2016 Thierry Fournier <tfournier@arpalert.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HLUA_H +#define _HAPROXY_HLUA_H + +#include <haproxy/hlua-t.h> + +#ifdef USE_LUA + +/* The following macros are used to set flags. */ +#define HLUA_SET_RUN(__hlua) do {(__hlua)->flags |= HLUA_RUN;} while(0) +#define HLUA_CLR_RUN(__hlua) do {(__hlua)->flags &= ~HLUA_RUN;} while(0) +#define HLUA_IS_RUNNING(__hlua) ((__hlua)->flags & HLUA_RUN) +#define HLUA_SET_CTRLYIELD(__hlua) do {(__hlua)->flags |= HLUA_CTRLYIELD;} while(0) +#define HLUA_CLR_CTRLYIELD(__hlua) do {(__hlua)->flags &= ~HLUA_CTRLYIELD;} while(0) +#define HLUA_IS_CTRLYIELDING(__hlua) ((__hlua)->flags & HLUA_CTRLYIELD) +#define HLUA_SET_WAKERESWR(__hlua) do {(__hlua)->flags |= HLUA_WAKERESWR;} while(0) +#define HLUA_CLR_WAKERESWR(__hlua) do {(__hlua)->flags &= ~HLUA_WAKERESWR;} while(0) +#define HLUA_IS_WAKERESWR(__hlua) ((__hlua)->flags & HLUA_WAKERESWR) +#define HLUA_SET_WAKEREQWR(__hlua) do {(__hlua)->flags |= HLUA_WAKEREQWR;} while(0) +#define HLUA_CLR_WAKEREQWR(__hlua) do {(__hlua)->flags &= ~HLUA_WAKEREQWR;} while(0) +#define HLUA_IS_WAKEREQWR(__hlua) ((__hlua)->flags & HLUA_WAKEREQWR) +#define HLUA_CLR_NOYIELD(__hlua) do {(__hlua)->flags &= ~HLUA_NOYIELD;} while(0) +#define HLUA_SET_NOYIELD(__hlua) do {(__hlua)->flags |= HLUA_NOYIELD;} while(0) +#define HLUA_CANT_YIELD(__hlua) ((__hlua)->flags & HLUA_NOYIELD) + + +#define HLUA_INIT(__hlua) do { (__hlua)->T = 0; } while(0) + +/* Lua HAProxy integration functions. */ +const char *hlua_traceback(lua_State *L, const char* sep); +void hlua_ctx_destroy(struct hlua *lua); +void hlua_init(); +int hlua_post_init(); +void hlua_applet_tcp_fct(struct appctx *ctx); +void hlua_applet_http_fct(struct appctx *ctx); +int hlua_event_sub(lua_State *L, event_hdl_sub_list *sub_list); +struct task *hlua_process_task(struct task *task, void *context, unsigned int state); +const char *hlua_show_current_location(const char *pfx); +int hlua_ref(lua_State *L); +void hlua_pushref(lua_State *L, int ref); +void hlua_unref(lua_State *L, int ref); +struct hlua *hlua_gethlua(lua_State *L); +void hlua_yieldk(lua_State *L, int nresults, lua_KContext ctx, lua_KFunction k, int timeout, unsigned int flags); + +#else /* USE_LUA */ + +/************************ For use when Lua is disabled ********************/ + +#define HLUA_IS_RUNNING(__hlua) 0 + +#define HLUA_INIT(__hlua) + +/* Empty function for compilation without Lua. */ +static inline void hlua_init() { } +static inline int hlua_post_init() { return 1; } +static inline void hlua_ctx_destroy(struct hlua *lua) { } +static inline const char *hlua_show_current_location(const char *pfx) { return NULL; } + +#endif /* USE_LUA */ + +#endif /* _HAPROXY_HLUA_H */ diff --git a/include/haproxy/hlua_fcn.h b/include/haproxy/hlua_fcn.h new file mode 100644 index 0000000..ff9250a --- /dev/null +++ b/include/haproxy/hlua_fcn.h @@ -0,0 +1,41 @@ +/* + * include/haproxy/hlua_fcn.h + * Lua user-level management functions + * + * Copyright (C) 2015-2016 Thierry Fournier <tfournier@arpalert.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HLUA_FCN_H +#define _HAPROXY_HLUA_FCN_H + +#include <lua.h> +#include <haproxy/hlua-t.h> + +int hlua_checkboolean(lua_State *L, int index); + +void hlua_class_const_int(lua_State *L, const char *name, int value); +void hlua_class_const_str(lua_State *L, const char *name, const char *value); +void hlua_class_function(lua_State *L, const char *name, int (*function)(lua_State *L)); +void *hlua_checkudata(lua_State *L, int ud, int class_ref); +int hlua_register_metatable(struct lua_State *L, char *name); +void hlua_fcn_reg_core_fcn(lua_State *L); +int hlua_dump_object(lua_State *L); +int hlua_fcn_new_proxy(lua_State *L, struct proxy *px); +int hlua_fcn_new_server(lua_State *L, struct server *srv); +int hlua_fcn_new_event_sub(lua_State *L, struct event_hdl_sub *sub); + +#endif /* _HAPROXY_HLUA_FCN_H */ diff --git a/include/haproxy/hpack-dec.h b/include/haproxy/hpack-dec.h new file mode 100644 index 0000000..4fb1a36 --- /dev/null +++ b/include/haproxy/hpack-dec.h @@ -0,0 +1,39 @@ +/* + * HPACK decompressor (RFC7541) + * + * Copyright (C) 2014-2020 Willy Tarreau <willy@haproxy.org> + * Copyright (C) 2017 HAProxy Technologies + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _COMMON_HPACK_DEC_H +#define _COMMON_HPACK_DEC_H + +#include <haproxy/api.h> +#include <haproxy/chunk.h> +#include <haproxy/hpack-tbl.h> + +int hpack_decode_frame(struct hpack_dht *dht, const uint8_t *raw, uint32_t len, + struct http_hdr *list, int list_size, + struct buffer *tmp); + +#endif /* _COMMON_HPACK_DEC_H */ diff --git a/include/haproxy/hpack-enc.h b/include/haproxy/hpack-enc.h new file mode 100644 index 0000000..7511c5d --- /dev/null +++ b/include/haproxy/hpack-enc.h @@ -0,0 +1,261 @@ +/* + * HPACK compressor (RFC7541) + * + * Copyright (C) 2014-2020 Willy Tarreau <willy@haproxy.org> + * Copyright (C) 2017 HAProxy Technologies + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _COMMON_HPACK_ENC_H +#define _COMMON_HPACK_ENC_H + +#include <string.h> +#include <import/ist.h> +#include <haproxy/api.h> +#include <haproxy/buf-t.h> +#include <haproxy/http-t.h> + +int hpack_encode_header(struct buffer *out, const struct ist n, + const struct ist v); + +/* Returns the number of bytes required to encode the string length <len>. The + * number of usable bits is an integral multiple of 7 plus 6 for the last byte. + * The maximum number of bytes returned is 4 (2097279 max length). Larger values + * return 0. + */ +static inline int hpack_len_to_bytes(size_t len) +{ + ssize_t slen = len; + + slen -= 127; + if (__builtin_expect(slen < 0, 1)) + return 1; + if (slen < (1 << 14)) { + if (__builtin_expect(slen < (1 << 7), 1)) + return 2; + else + return 3; + } + if (slen < (1 << 21)) + return 4; + return 0; +} + +/* Encodes <len> into <out>+<pos> and return the new position. The caller is + * responsible for checking for available room using hpack_len_to_bytes() + * first. + */ +static inline int hpack_encode_len(char *out, int pos, int len) +{ + int code = len - 127; + + if (code < 0) { + out[pos++] = len; + } else { + out[pos++] = 127; + for (; code >= 128; code >>= 7) + out[pos++] = code | 128; + out[pos++] = code; + } + return pos; +} + +/* Tries to encode header field index <idx> with short value <val> into the + * aligned buffer <out>. Returns non-zero on success, 0 on failure (buffer + * full). The caller is responsible for ensuring that the length of <val> is + * strictly lower than 127, and that <idx> is lower than 64 (static list only), + * and that the buffer is aligned (head==0). + */ +static inline int hpack_encode_short_idx(struct buffer *out, int idx, struct ist val) +{ + if (out->data + 2 + val.len > out->size) + return 0; + + /* literal header field with incremental indexing */ + out->area[out->data++] = idx | 0x40; + out->area[out->data++] = val.len; + ist2bin(&out->area[out->data], val); + out->data += val.len; + return 1; +} + +/* Tries to encode header field index <idx> with long value <val> into the + * aligned buffer <out>. Returns non-zero on success, 0 on failure (buffer + * full). The caller is responsible for ensuring <idx> is lower than 64 (static + * list only), and that the buffer is aligned (head==0). + */ +static inline int hpack_encode_long_idx(struct buffer *out, int idx, struct ist val) +{ + int len = out->data; + + if (!hpack_len_to_bytes(val.len) || + 1 + len + hpack_len_to_bytes(val.len) + val.len > out->size) + return 0; + + /* emit literal with indexing (7541#6.2.1) : + * [ 0 | 1 | Index (6+) ] + */ + out->area[len++] = idx | 0x40; + len = hpack_encode_len(out->area, len, val.len); + memcpy(out->area + len, val.ptr, val.len); + len += val.len; + + out->data = len; + return 1; +} + +/* Tries to encode a :status pseudo-header with the integer status <status> + * into the aligned buffer <out>. Returns non-zero on success, 0 on failure + * (buffer full). The caller is responsible for ensuring that the status is + * comprised between 100 and 999 inclusive and that the buffer is aligned. It's + * inlined because it's easily optimizable by the compiler. + */ +static inline int hpack_encode_int_status(struct buffer *out, unsigned int status) +{ + int len = out->data; + int size = out->size; + unsigned char c = 0; + + /* try to emit a single byte code */ + len++; + if (__builtin_expect(len > size, 0)) + goto fail; + + c = (status <= 304) ? + (status <= 204) ? + (status == 204) ? 0x89 : + (status == 200) ? 0x88 : + 0: /* > 204 */ + (status == 304) ? 0x8b : + (status == 206) ? 0x8a : + 0: + (status <= 404) ? + (status == 404) ? 0x8d : + (status == 400) ? 0x8c : + 0: /* > 404 */ + (status == 500) ? 0x8e : + 0; + + if (c) + goto last; + + /* fall back to literal */ + len += 4; + if (__builtin_expect(len > size, 0)) + goto fail; + + /* basic encoding of the status code */ + out->area[len - 5] = 0x48; // indexed name -- name=":status" (idx 8) + out->area[len - 4] = 0x03; // 3 bytes status + out->area[len - 3] = '0' + status / 100; + out->area[len - 2] = '0' + status / 10 % 10; + c = '0' + status % 10; + last: + out->area[len - 1] = c; + out->data = len; + return 1; + fail: + return 0; +} + +/* Tries to encode a :status pseudo-header with the integer status <status> + * also represented by <str> into the aligned buffer <out>. Returns non-zero + * on success or 0 on failure (buffer full). The caller is responsible for + * ensuring that the status is comprised between 100 and 999 inclusive, that + * <str> contains a valid representation of the numerical value, and that the + * buffer is aligned. This version is preferred when the caller already knows + * a string representation of the status because it avoids the computation in + * the uncompressed case. It's inlined because it's easily optimizable. + */ +static inline int hpack_encode_str_status(struct buffer *out, unsigned int status, struct ist str) +{ + /* don't try too hard, we already have the ASCII value for less common cases */ + if (status == 200 || status == 304) { + if (out->data >= out->size) + return 0; + out->area[out->data] = (status == 304) ? 0x8b : 0x88; + out->data++; + return 1; + } + return hpack_encode_short_idx(out, 8, str); // name=":status" (idx 8) +} + +/* Tries to encode a :method pseudo-header with the method in <meth>, which + * also exists as a string in <str>, into the aligned buffer <out>. Returns + * non-zero on success or 0 on failure (buffer full). The caller is responsible + * for ensuring that the string matches <meth>, that it's smaller than 127 + * bytes, and that the buffer is aligned. If <meth> is unknown then using + * HTTP_METH_OTHER will lead to the string being encoded as a literal. It's + * inlined because it's easily optimizable. + */ +static inline int hpack_encode_method(struct buffer *out, enum http_meth_t meth, struct ist str) +{ + if (out->data < out->size && meth == HTTP_METH_GET) + out->area[out->data++] = 0x82; // indexed field : idx[02]=(":method", "GET") + else if (out->data < out->size && meth == HTTP_METH_POST) + out->area[out->data++] = 0x83; // indexed field : idx[03]=(":method", "POST") + else + return hpack_encode_short_idx(out, 2, str); // name=":method" (idx 2) + return 1; +} + +/* Tries to encode a :scheme pseudo-header with the scheme in <scheme>, into + * the aligned buffer <out>. Returns non-zero on success or 0 on failure + * (buffer full). Only "http" and "https" are recognized and handled as indexed + * values, others are turned into short literals. The caller is responsible for + * ensuring that the scheme is smaller than 127 bytes, and that the buffer is + * aligned. Normally the compiler will detect constant strings in the comparison + * if the code remains inlined. + */ +static inline int hpack_encode_scheme(struct buffer *out, struct ist scheme) +{ + if (out->data < out->size && isteq(scheme, ist("https"))) + out->area[out->data++] = 0x87; // indexed field : idx[07]=(":scheme", "https") + else if (out->data < out->size && isteq(scheme, ist("http"))) + out->area[out->data++] = 0x86; // indexed field : idx[06]=(":scheme", "http") + else + return hpack_encode_short_idx(out, 6, scheme); // name=":scheme" (idx 6) + return 1; +} + +/* Tries to encode a :path pseudo-header with the path in <path>, into the + * aligned buffer <out>. Returns non-zero on success or 0 on failure (buffer + * full). The well-known values "/" and "/index.html" are recognized, and other + * ones are handled as literals. The caller is responsible for ensuring that + * the buffer is aligned. Normally the compiler will detect constant strings + * in the comparison if the code remains inlined. + */ +static inline int hpack_encode_path(struct buffer *out, struct ist path) +{ + if (out->data < out->size && isteq(path, ist("/"))) + out->area[out->data++] = 0x84; // indexed field : idx[04]=(":path", "/") + else if (out->data < out->size && isteq(path, ist("/index.html"))) + out->area[out->data++] = 0x85; // indexed field : idx[05]=(":path", "/index.html") + else if (path.len < 127) + return hpack_encode_short_idx(out, 4, path); // name=":path" (idx 4) + else + return hpack_encode_long_idx(out, 4, path); // name=":path" (idx 4) + return 1; +} + + +#endif /* _COMMON_HPACK_ENC_H */ diff --git a/include/haproxy/hpack-huff.h b/include/haproxy/hpack-huff.h new file mode 100644 index 0000000..f939103 --- /dev/null +++ b/include/haproxy/hpack-huff.h @@ -0,0 +1,35 @@ +/* + * Huffman decoding and encoding for HPACK (RFC7541) + * + * Copyright (C) 2014-2020 Willy Tarreau <willy@haproxy.org> + * Copyright (C) 2017 HAProxy Technologies + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _HAPROXY_HPACK_HUFF_H +#define _HAPROXY_HPACK_HUFF_H + +#include <inttypes.h> + +int huff_enc(const char *s, char *out); +int huff_dec(const uint8_t *huff, int hlen, char *out, int olen); + +#endif /* _HAPROXY_HPACK_HUFF_H */ diff --git a/include/haproxy/hpack-tbl-t.h b/include/haproxy/hpack-tbl-t.h new file mode 100644 index 0000000..4e5d536 --- /dev/null +++ b/include/haproxy/hpack-tbl-t.h @@ -0,0 +1,143 @@ +/* + * HPACK header table management (RFC7541) - type definitions + * + * Copyright (C) 2014-2020 Willy Tarreau <willy@haproxy.org> + * Copyright (C) 2017 HAProxy Technologies + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _HAPROXY_HPACK_TBL_T_H +#define _HAPROXY_HPACK_TBL_T_H + +#include <inttypes.h> + +/* Dynamic Headers Table, usable for tables up to 4GB long and values of 64kB-1. + * The model can be improved by using offsets relative to the table entry's end + * or to the end of the area, or by moving the descriptors at the end of the + * table and the data at the beginning. This entry is 8 bytes long, which is 1/4 + * of the bookkeeping planned by the HPACK spec. Thus it saves 24 bytes per + * header field, meaning that even with a single header, 24 extra bytes can be + * stored (ie one such descriptor). At 29.2 average bytes per header field as + * found in the hpack test case, that's slightly more than 1.5kB of space saved + * from a 4kB block, resulting in contiguous space almost always being + * available. + * + * Principle: the table is stored in a contiguous array containing both the + * descriptors and the contents. Descriptors are stored at the beginning of the + * array while contents are stored starting from the end. Most of the time there + * is enough room left in the table to insert a new header field, thanks to the + * savings on the descriptor size. Thus by inserting headers from the end it's + * possible to maximize the delay before a collision of DTEs and data. In order + * to always insert from the right, we need to keep a reference to the latest + * inserted element and look before it. The last inserted cell's address defines + * the lowest known address still in use, unless the area wraps in which case + * the available space lies between the end of the tail and the beginning of the + * head. + * + * In order to detect collisions between data blocks and DTEs, we also maintain + * an index to the lowest element facing the DTE table, called "front". This one + * is updated each time an element is inserted before it. Once the buffer wraps, + * this element doesn't have to be updated anymore until it is released, in + * which case the buffer doesn't wrap anymore and the front element becomes the + * head again. + * + * Various heuristics are possible concerning the opportunity to wrap the + * entries to limit the risk of collisions with the DTE, but experimentation + * shows that thanks to the important savings made on the descriptors, the + * likeliness of finding a large amount of free space at the end of the area is + * much higher than the risk of colliding, so in the end the most naive + * algorithms work pretty fine. Typical ratios of 1 collision per 2000 requests + * have been observed. + * + * The defragmentation should be rare ; a study on live data shows on average + * 29.2 bytes used per header field. This plus the 32 bytes overhead fix an + * average of 66.9 header fields per 4kB table. This brings a 1606 bytes saving + * using the current storage description, ensuring that oldest headers are + * linearly removed by the sender before fragmentation occurs. This means that + * for all smaller header fields there will not be any requirement to defragment + * the area and most of the time it will even be possible to copy the old values + * directly within the buffer after creating a new entry. On average within the + * available space there will be enough room to store 1606/(29.2+8)=43 extra + * header fields without switching to another place. + * + * The table header fits in the table itself, it only takes 16 bytes, so in the + * worst case (1 single header) it's possible to store 4096 - 16 - 8 = 4072 + * data bytes, which is larger than the 4064 the protocol requires (4096 - 32). + */ + +/* + * Gcc before 3.0 needs [0] to declare a variable-size array + */ +#ifndef VAR_ARRAY +#if defined(__GNUC__) && (__GNUC__ < 3) +#define VAR_ARRAY 0 +#else +#define VAR_ARRAY +#endif +#endif + +/* One dynamic table entry descriptor */ +struct hpack_dte { + uint32_t addr; /* storage address, relative to the dte address */ + uint16_t nlen; /* header name length */ + uint16_t vlen; /* header value length */ +}; + +/* Note: the table's head plus a struct hpack_dte must be smaller than or equal to 32 + * bytes so that a single large header can always fit. Here that's 16 bytes for + * the header, plus 8 bytes per slot. + * Note that when <used> == 0, front, head, and wrap are undefined. + */ +struct hpack_dht { + uint32_t size; /* allocated table size in bytes */ + uint32_t total; /* sum of nlen + vlen in bytes */ + uint16_t front; /* slot number of the first node after the idx table */ + uint16_t wrap; /* number of allocated slots, wraps here */ + uint16_t head; /* last inserted slot number */ + uint16_t used; /* number of slots in use */ + struct hpack_dte dte[VAR_ARRAY]; /* dynamic table entries */ +}; + +/* supported hpack encoding/decoding errors */ +enum { + HPACK_ERR_NONE = 0, /* no error */ + HPACK_ERR_ALLOC_FAIL, /* memory allocation error */ + HPACK_ERR_UNKNOWN_OPCODE, /* invalid first byte */ + HPACK_ERR_TRUNCATED, /* truncated stream */ + HPACK_ERR_HUFFMAN, /* huffman decoding error */ + HPACK_ERR_INVALID_PHDR, /* invalid pseudo header field name */ + HPACK_ERR_MISPLACED_PHDR, /* pseudo header field after a regular header field */ + HPACK_ERR_DUPLICATE_PHDR, /* duplicate pseudo header field */ + HPACK_ERR_DHT_INSERT_FAIL, /* failed to insert into DHT */ + HPACK_ERR_TOO_LARGE, /* decoded request/response is too large */ + HPACK_ERR_MISSING_METHOD, /* :method is missing */ + HPACK_ERR_MISSING_SCHEME, /* :scheme is missing */ + HPACK_ERR_MISSING_PATH, /* :path is missing */ + HPACK_ERR_MISSING_AUTHORITY, /* :authority is missing with CONNECT */ + HPACK_ERR_SCHEME_NOT_ALLOWED, /* :scheme not allowed with CONNECT */ + HPACK_ERR_PATH_NOT_ALLOWED, /* :path not allowed with CONNECT */ + HPACK_ERR_INVALID_ARGUMENT, /* an invalid argument was passed */ +}; + +/* static header table as in RFC7541 Appendix A. [0] unused. */ +#define HPACK_SHT_SIZE 62 + +#endif /* _HAPROXY_HPACK_TBL_T_H */ diff --git a/include/haproxy/hpack-tbl.h b/include/haproxy/hpack-tbl.h new file mode 100644 index 0000000..02cf7db --- /dev/null +++ b/include/haproxy/hpack-tbl.h @@ -0,0 +1,184 @@ +/* + * HPACK header table management (RFC7541) - prototypes + * + * Copyright (C) 2014-2020 Willy Tarreau <willy@haproxy.org> + * Copyright (C) 2017 HAProxy Technologies + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _HAPROXY_HPACK_TBL_H +#define _HAPROXY_HPACK_TBL_H + +#include <import/ist.h> +#include <haproxy/api.h> +#include <haproxy/hpack-tbl-t.h> +#include <haproxy/http-hdr-t.h> + +/* when built outside of haproxy, HPACK_STANDALONE must be defined, and + * pool_head_hpack_tbl->size must be set to the DHT size. + */ +#ifndef HPACK_STANDALONE +#include <haproxy/pool.h> +#define hpack_alloc(pool) pool_alloc(pool) +#define hpack_free(pool, ptr) pool_free(pool, ptr) +#else +#include <stdlib.h> +#include <haproxy/pool-t.h> +#define hpack_alloc(pool) malloc(pool->size) +#define hpack_free(pool, ptr) free(ptr) +#endif + +extern const struct http_hdr hpack_sht[HPACK_SHT_SIZE]; +extern struct pool_head *pool_head_hpack_tbl; + +int __hpack_dht_make_room(struct hpack_dht *dht, unsigned int needed); +int hpack_dht_insert(struct hpack_dht *dht, struct ist name, struct ist value); + +#ifdef DEBUG_HPACK +void hpack_dht_dump(FILE *out, const struct hpack_dht *dht); +void hpack_dht_check_consistency(const struct hpack_dht *dht); +#endif + +/* return a pointer to the entry designated by index <idx> (starting at 1) or + * NULL if this index is not there. + */ +static inline const struct hpack_dte *hpack_get_dte(const struct hpack_dht *dht, uint16_t idx) +{ + idx--; + + if (idx >= dht->used) + return NULL; + + if (idx <= dht->head) + idx = dht->head - idx; + else + idx = dht->head - idx + dht->wrap; + + return &dht->dte[idx]; +} + +/* returns non-zero if <idx> is valid for table <dht> */ +static inline int hpack_valid_idx(const struct hpack_dht *dht, uint32_t idx) +{ + return idx < dht->used + HPACK_SHT_SIZE; +} + +/* return a pointer to the header name for entry <dte>. */ +static inline struct ist hpack_get_name(const struct hpack_dht *dht, const struct hpack_dte *dte) +{ + struct ist ret = { + .ptr = (void *)dht + dte->addr, + .len = dte->nlen, + }; + return ret; +} + +/* return a pointer to the header value for entry <dte>. */ +static inline struct ist hpack_get_value(const struct hpack_dht *dht, const struct hpack_dte *dte) +{ + struct ist ret = { + .ptr = (void *)dht + dte->addr + dte->nlen, + .len = dte->vlen, + }; + return ret; +} + +/* takes an idx, returns the associated name */ +static inline struct ist hpack_idx_to_name(const struct hpack_dht *dht, uint32_t idx) +{ + const struct hpack_dte *dte; + + if (idx < HPACK_SHT_SIZE) + return hpack_sht[idx].n; + + dte = hpack_get_dte(dht, idx - HPACK_SHT_SIZE + 1); + if (!dte) + return ist("### ERR ###"); // error + + return hpack_get_name(dht, dte); +} + +/* takes an idx, returns the associated value */ +static inline struct ist hpack_idx_to_value(const struct hpack_dht *dht, uint32_t idx) +{ + const struct hpack_dte *dte; + + if (idx < HPACK_SHT_SIZE) + return hpack_sht[idx].v; + + dte = hpack_get_dte(dht, idx - HPACK_SHT_SIZE + 1); + if (!dte) + return ist("### ERR ###"); // error + + return hpack_get_value(dht, dte); +} + +/* returns the slot number of the oldest entry (tail). Must not be used on an + * empty table. + */ +static inline unsigned int hpack_dht_get_tail(const struct hpack_dht *dht) +{ + return ((dht->head + 1U < dht->used) ? dht->wrap : 0) + dht->head + 1U - dht->used; +} + +/* Purges table dht until a header field of <needed> bytes fits according to + * the protocol (adding 32 bytes overhead). Returns non-zero on success, zero + * on failure (ie: table empty but still not sufficient). + */ +static inline int hpack_dht_make_room(struct hpack_dht *dht, unsigned int needed) +{ + if (dht->used * 32 + dht->total + needed + 32 <= dht->size) + return 1; + else if (!dht->used) + return 0; + + return __hpack_dht_make_room(dht, needed); +} + +/* allocate a dynamic headers table of <size> bytes and return it initialized */ +static inline void hpack_dht_init(struct hpack_dht *dht, uint32_t size) +{ + dht->size = size; + dht->total = 0; + dht->used = 0; +} + +/* allocate a dynamic headers table from the pool and return it initialized */ +static inline struct hpack_dht *hpack_dht_alloc() +{ + struct hpack_dht *dht; + + if (unlikely(!pool_head_hpack_tbl)) + return NULL; + + dht = hpack_alloc(pool_head_hpack_tbl); + if (dht) + hpack_dht_init(dht, pool_head_hpack_tbl->size); + return dht; +} + +/* free a dynamic headers table */ +static inline void hpack_dht_free(struct hpack_dht *dht) +{ + hpack_free(pool_head_hpack_tbl, dht); +} + +#endif /* _HAPROXY_HPACK_TBL_H */ diff --git a/include/haproxy/hq_interop.h b/include/haproxy/hq_interop.h new file mode 100644 index 0000000..eb6ebf6 --- /dev/null +++ b/include/haproxy/hq_interop.h @@ -0,0 +1,6 @@ +#ifndef _HAPROXY_HQ_INTEROP_H_ +#define _HAPROXY_HQ_INTEROP_H_ + +extern const struct qcc_app_ops hq_interop_ops; + +#endif /* _HAPROXY_HQ_INTEROP_H_ */ diff --git a/include/haproxy/http-hdr-t.h b/include/haproxy/http-hdr-t.h new file mode 100644 index 0000000..3534f43 --- /dev/null +++ b/include/haproxy/http-hdr-t.h @@ -0,0 +1,41 @@ +/* + * include/haproxy/http-hdr-t.h + * HTTP header management (new model) - type definitions + * + * Copyright (C) 2014-2020 Willy Tarreau <willy@haproxy.org> + * Copyright (C) 2017 HAProxy Technologies + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _HAPROXY_HTTP_HDR_T_H +#define _HAPROXY_HTTP_HDR_T_H + +#include <import/ist.h> + +/* a header field made of a name and a value. Such structure stores 4 longs so + * it takes 16 bytes on 32-bit systems and 32 bytes on 64-bit systems. + */ +struct http_hdr { + struct ist n; /* name */ + struct ist v; /* value */ +}; + +#endif /* _HAPROXY_HTTP_HDR_T_H */ diff --git a/include/haproxy/http-hdr.h b/include/haproxy/http-hdr.h new file mode 100644 index 0000000..e9e253b --- /dev/null +++ b/include/haproxy/http-hdr.h @@ -0,0 +1,60 @@ +/* + * include/haproxy/http-hdr.h + * HTTP header management (new model) - functions + * + * Copyright (C) 2014-2017 Willy Tarreau <willy@haproxy.org> + * Copyright (C) 2017 HAProxy Technologies + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _HAPROXY_HTTP_HDR_H +#define _HAPROXY_HTTP_HDR_H + +#include <import/ist.h> +#include <haproxy/http-hdr-t.h> + +/* sets an http_hdr <hdr> to name <n> and value <v>. Useful to avoid casts in + * immediate assignments. + */ +static inline void http_set_hdr(struct http_hdr *hdr, const struct ist n, const struct ist v) +{ + hdr->n = n; + hdr->v = v; +} + +/* removes all occurrences of header name <n> in list <hdr> and returns the new count. The + * list must be terminated by the empty header. + */ +static inline int http_del_hdr(struct http_hdr *hdr, const struct ist n) +{ + int src = 0, dst = 0; + + do { + if (!isteqi(hdr[src].n, n)) { + if (src != dst) + hdr[dst] = hdr[src]; + dst++; + } + } while (hdr[src++].n.len); + + return dst; +} +#endif /* _HAPROXY_HTTP_HDR_H */ diff --git a/include/haproxy/http-t.h b/include/haproxy/http-t.h new file mode 100644 index 0000000..3165082 --- /dev/null +++ b/include/haproxy/http-t.h @@ -0,0 +1,184 @@ +/* + * include/haproxy/http-t.h + * + * Version-agnostic and implementation-agnostic HTTP protocol definitions. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HTTP_T_H +#define _HAPROXY_HTTP_T_H + +#include <inttypes.h> +#include <import/ist.h> +#include <haproxy/buf-t.h> + +/* + * some macros mainly used when parsing header fields. + * from RFC7230: + * CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)> + * SEP = one of the 17 defined separators or SP or HT + * LWS = CR, LF, SP or HT + * SPHT = SP or HT. Use this macro and not a boolean expression for best speed. + * CRLF = CR or LF. Use this macro and not a boolean expression for best speed. + * token = any CHAR except CTL or SEP. Use this macro and not a boolean expression for best speed. + * + * added for ease of use: + * ver_token = 'H', 'P', 'T', '/', '.', and digits. + */ +#define HTTP_FLG_CTL 0x01 +#define HTTP_FLG_SEP 0x02 +#define HTTP_FLG_LWS 0x04 +#define HTTP_FLG_SPHT 0x08 +#define HTTP_FLG_CRLF 0x10 +#define HTTP_FLG_TOK 0x20 +#define HTTP_FLG_VER 0x40 +#define HTTP_FLG_DIG 0x80 + +#define HTTP_IS_CTL(x) (http_char_classes[(uint8_t)(x)] & HTTP_FLG_CTL) +#define HTTP_IS_SEP(x) (http_char_classes[(uint8_t)(x)] & HTTP_FLG_SEP) +#define HTTP_IS_LWS(x) (http_char_classes[(uint8_t)(x)] & HTTP_FLG_LWS) +#define HTTP_IS_SPHT(x) (http_char_classes[(uint8_t)(x)] & HTTP_FLG_SPHT) +#define HTTP_IS_CRLF(x) (http_char_classes[(uint8_t)(x)] & HTTP_FLG_CRLF) +#define HTTP_IS_TOKEN(x) (http_char_classes[(uint8_t)(x)] & HTTP_FLG_TOK) +#define HTTP_IS_VER_TOKEN(x) (http_char_classes[(uint8_t)(x)] & HTTP_FLG_VER) +#define HTTP_IS_DIGIT(x) (http_char_classes[(uint8_t)(x)] & HTTP_FLG_DIG) + +/* Known HTTP methods */ +enum http_meth_t { + HTTP_METH_OPTIONS, + HTTP_METH_GET, + HTTP_METH_HEAD, + HTTP_METH_POST, + HTTP_METH_PUT, + HTTP_METH_DELETE, + HTTP_METH_TRACE, + HTTP_METH_CONNECT, + HTTP_METH_OTHER, /* Must be the last entry */ +} __attribute__((packed)); + +/* Known HTTP authentication schemes */ +enum ht_auth_m { + HTTP_AUTH_WRONG = -1, /* missing or unknown */ + HTTP_AUTH_UNKNOWN = 0, + HTTP_AUTH_BASIC, + HTTP_AUTH_DIGEST, + HTTP_AUTH_BEARER, +} __attribute__((packed)); + +/* All implemented HTTP status codes */ +enum { + HTTP_ERR_200 = 0, + HTTP_ERR_400, + HTTP_ERR_401, + HTTP_ERR_403, + HTTP_ERR_404, + HTTP_ERR_405, + HTTP_ERR_407, + HTTP_ERR_408, + HTTP_ERR_410, + HTTP_ERR_413, + HTTP_ERR_421, + HTTP_ERR_422, + HTTP_ERR_425, + HTTP_ERR_429, + HTTP_ERR_500, + HTTP_ERR_501, + HTTP_ERR_502, + HTTP_ERR_503, + HTTP_ERR_504, + HTTP_ERR_SIZE +}; + +/* Note: the strings below make use of chunks. Chunks may carry an allocated + * size in addition to the length. The size counts from the beginning (str) + * to the end. If the size is unknown, it MUST be zero, in which case the + * sample will automatically be duplicated when a change larger than <len> has + * to be performed. Thus it is safe to always set size to zero. + */ +struct http_meth { + enum http_meth_t meth; + struct buffer str; +}; + +struct http_auth_data { + enum ht_auth_m method; /* one of HTTP_AUTH_* */ + /* 7 bytes unused here */ + struct buffer method_data; /* points to the creditial part from 'Authorization:' header */ + char *user, *pass; /* extracted username & password */ +}; + +struct http_method_desc { + enum http_meth_t meth; + const struct ist text; +}; + +enum http_etag_type { + ETAG_INVALID = 0, + ETAG_STRONG, + ETAG_WEAK +}; + +/* Indicates what elements have been parsed in a HTTP URI. */ +enum http_uri_parser_state { + URI_PARSER_STATE_BEFORE = 0, + URI_PARSER_STATE_SCHEME_DONE, + URI_PARSER_STATE_AUTHORITY_DONE, + URI_PARSER_STATE_PATH_DONE, +}; + +/* HTTP URI format as described in rfc 7230 5.3. + * As the first character is used to identify the format, absolute-form and + * authority-form are not differentiated. + */ +enum http_uri_parser_format { + URI_PARSER_FORMAT_EMPTY, + URI_PARSER_FORMAT_ASTERISK, + URI_PARSER_FORMAT_ABSPATH, + URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY, +}; + +/* Parser context for a HTTP URI. Must be initialized with http_uri_parser_init + * before its usage. + * + * The parser API is not idempotent. For an initialized parser instance, each + * URI element can be extracted only once using its related function : + * - http_parse_scheme + * - http_parse_authority + * - http_parse_path + * + * Also each element must be extracted in the order of its appearance in the + * URI according to the rfc 3986. However, it is possible to skip the parsing + * of elements which are of no interest. + * + * If the above rules are not respected, the parsing functions return an empty + * ist. + */ +struct http_uri_parser { + struct ist uri; /* HTTP URI for parsing */ + enum http_uri_parser_state state; /* already parsed HTTP URI elements */ + enum http_uri_parser_format format; /* rfc 7230 5.3 HTTP URI format */ +}; + +#endif /* _HAPROXY_HTTP_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/http.h b/include/haproxy/http.h new file mode 100644 index 0000000..2992640 --- /dev/null +++ b/include/haproxy/http.h @@ -0,0 +1,222 @@ +/* + * include/haproxy/http.h + * + * Functions for version-agnostic and implementation-agnostic HTTP protocol. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HTTP_H +#define _HAPROXY_HTTP_H + +#include <string.h> +#include <import/ist.h> +#include <haproxy/api.h> +#include <haproxy/http-t.h> + +extern const int http_err_codes[HTTP_ERR_SIZE]; +extern const char *http_err_msgs[HTTP_ERR_SIZE]; +extern const struct ist http_known_methods[HTTP_METH_OTHER]; +extern const uint8_t http_char_classes[256]; + +enum http_meth_t find_http_meth(const char *str, const int len); +int http_get_status_idx(unsigned int status); +const char *http_get_reason(unsigned int status); +struct ist http_get_host_port(const struct ist host); +int http_is_default_port(const struct ist schm, const struct ist port); +int http_validate_scheme(const struct ist schm); +struct ist http_parse_scheme(struct http_uri_parser *parser); +struct ist http_parse_authority(struct http_uri_parser *parser, int no_userinfo); +struct ist http_parse_path(struct http_uri_parser *parser); +int http_parse_cont_len_header(struct ist *value, unsigned long long *body_len, + int not_first); +int http_header_match2(const char *hdr, const char *end, + const char *name, int len); +char *http_find_hdr_value_end(char *s, const char *e); +char *http_find_cookie_value_end(char *s, const char *e); +char *http_extract_cookie_value(char *hdr, const char *hdr_end, + char *cookie_name, size_t cookie_name_l, + int list, char **value, size_t *value_l); +char *http_extract_next_cookie_name(char *hdr_beg, char *hdr_end, int is_req, + char **ptr, size_t *len); +int http_parse_qvalue(const char *qvalue, const char **end); +const char *http_find_url_param_pos(const char **chunks, + const char* url_param_name, + size_t url_param_name_l, char delim, char insensitive); +int http_find_next_url_param(const char **chunks, + const char* url_param_name, size_t url_param_name_l, + const char **vstart, const char **vend, char delim, char insensitive); + +int http_parse_header(const struct ist hdr, struct ist *name, struct ist *value); +int http_parse_stline(const struct ist line, struct ist *p1, struct ist *p2, struct ist *p3); +int http_parse_status_val(const struct ist value, struct ist *status, struct ist *reason); + +int http_compare_etags(struct ist etag1, struct ist etag2); + +struct ist http_trim_leading_spht(struct ist value); +struct ist http_trim_trailing_spht(struct ist value); + +/* + * Given a path string and its length, find the position of beginning of the + * query string. Returns NULL if no query string is found in the path. + * + * Example: if path = "/foo/bar/fubar?yo=mama;ye=daddy", and n = 22: + * + * find_query_string(path, n, '?') points to "yo=mama;ye=daddy" string. + */ +static inline char *http_find_param_list(char *path, size_t path_l, char delim) +{ + char *p; + + p = memchr(path, delim, path_l); + return p ? p + 1 : NULL; +} + +static inline int http_is_param_delimiter(char c, char delim) +{ + return c == '&' || c == ';' || c == delim; +} + +/* Match language range with language tag. RFC2616 14.4: + * + * A language-range matches a language-tag if it exactly equals + * the tag, or if it exactly equals a prefix of the tag such + * that the first tag character following the prefix is "-". + * + * Return 1 if the strings match, else return 0. + */ +static inline int http_language_range_match(const char *range, int range_len, + const char *tag, int tag_len) +{ + const char *end = range + range_len; + const char *tend = tag + tag_len; + + while (range < end) { + if (*range == '-' && tag == tend) + return 1; + if (*range != *tag || tag == tend) + return 0; + range++; + tag++; + } + /* Return true only if the last char of the tag is matched. */ + return tag == tend; +} + +static inline enum http_etag_type http_get_etag_type(const struct ist etag) +{ + /* An ETag must be at least 2 characters. */ + if (etag.len < 2) + return ETAG_INVALID; + + /* The last character must be a `"`. */ + if (etag.ptr[etag.len - 1] != '"') + return ETAG_INVALID; + + /* If the ETag starts with a `"` then it is a strong ETag. */ + if (etag.ptr[0] == '"') + return ETAG_STRONG; + + /* If the ETag starts with `W/"` then it is a weak ETag. */ + if (istnmatch(etag, ist("W/\""), 3)) + return ETAG_WEAK; + + return ETAG_INVALID; +} + +/* Initialize a HTTP URI parser to use it with http URI parsing functions. The + * URI format is detected according to its first character. + */ +static inline struct http_uri_parser http_uri_parser_init(const struct ist uri) +{ + struct http_uri_parser parser = { + .uri = uri, + .state = URI_PARSER_STATE_BEFORE, + }; + + /* RFC7230, par. 2.7 : + * Request-URI = "*" | absuri | abspath | authority + */ + + if (!istlen(parser.uri)) { + parser.format = URI_PARSER_FORMAT_EMPTY; + } + else { + /* detect the format according to the first URI character */ + switch (*istptr(parser.uri)) { + case '*': + parser.format = URI_PARSER_FORMAT_ASTERISK; + break; + + case '/': + parser.format = URI_PARSER_FORMAT_ABSPATH; + break; + + default: + parser.format = URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY; + break; + } + } + + return parser; +} + +/* Looks into <ist> for forbidden characters for header values (0x00, 0x0A, + * 0x0D), starting at pointer <start> which must be within <ist>. Returns + * non-zero if such a character is found, 0 otherwise. When run on unlikely + * header match, it's recommended to first check for the presence of control + * chars using ist_find_ctl(). + */ +static inline int http_header_has_forbidden_char(const struct ist ist, const char *start) +{ + do { + if ((uint8_t)*start <= 0x0d && + (1U << (uint8_t)*start) & ((1<<13) | (1<<10) | (1<<0))) + return 1; + start++; + } while (start < istend(ist)); + return 0; +} + +/* Looks into <ist> for forbidden characters for :path values (0x00..0x1F, + * 0x20, 0x23), starting at pointer <start> which must be within <ist>. + * Returns non-zero if such a character is found, 0 otherwise. When run on + * unlikely header match, it's recommended to first check for the presence + * of control chars using ist_find_ctl(). + */ +static inline int http_path_has_forbidden_char(const struct ist ist, const char *start) +{ + do { + if ((uint8_t)*start <= 0x23) { + if ((uint8_t)*start < 0x20) + return 1; + if ((1U << ((uint8_t)*start & 0x1F)) & ((1<<3) | (1<<0))) + return 1; + } + start++; + } while (start < istend(ist)); + return 0; +} + +#endif /* _HAPROXY_HTTP_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/http_ana-t.h b/include/haproxy/http_ana-t.h new file mode 100644 index 0000000..5b7342f --- /dev/null +++ b/include/haproxy/http_ana-t.h @@ -0,0 +1,264 @@ +/* + * include/haproxy/http_ana-t.h + * This file contains HTTP protocol definitions. + * + * Copyright (C) 2000-2011 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PROTO_HTTP_T_H +#define _HAPROXY_PROTO_HTTP_T_H + +#include <haproxy/api-t.h> +#include <haproxy/channel-t.h> +#include <haproxy/http-t.h> + +/* These are the flags that are found in txn->flags */ + +/* action flags. + * Please also update the txn_show_flags() function below in case of changes. + */ +/* Unused: 0x00000001..0x00000004 */ +#define TX_CONST_REPLY 0x00000008 /* The http reply must not be rewritten (don't eval after-response ruleset) */ +#define TX_CLTARPIT 0x00000010 /* the transaction is tarpitted (anti-dos) */ + +/* transaction flags dedicated to cookies : bits values 0x20 to 0x80 (0-7 shift 5) */ +#define TX_CK_NONE 0x00000000 /* this transaction had no cookie */ +#define TX_CK_INVALID 0x00000020 /* this transaction had a cookie which matches no server */ +#define TX_CK_DOWN 0x00000040 /* this transaction had cookie matching a down server */ +#define TX_CK_VALID 0x00000060 /* this transaction had cookie matching a valid server */ +#define TX_CK_EXPIRED 0x00000080 /* this transaction had an expired cookie (idle for too long) */ +#define TX_CK_OLD 0x000000A0 /* this transaction had too old a cookie (offered too long ago) */ +#define TX_CK_UNUSED 0x000000C0 /* this transaction had a cookie but it was not used (eg: use-server was preferred) */ +#define TX_CK_MASK 0x000000E0 /* mask to get this transaction's cookie flags */ +#define TX_CK_SHIFT 5 /* bit shift */ + +/* response cookie information, bits values 0x100 to 0x700 (0-7 shift 8) */ +#define TX_SCK_NONE 0x00000000 /* no cookie found in the response */ +#define TX_SCK_FOUND 0x00000100 /* a persistence cookie was found and forwarded */ +#define TX_SCK_DELETED 0x00000200 /* an existing persistence cookie was deleted */ +#define TX_SCK_INSERTED 0x00000300 /* a persistence cookie was inserted */ +#define TX_SCK_REPLACED 0x00000400 /* a persistence cookie was present and rewritten */ +#define TX_SCK_UPDATED 0x00000500 /* an expirable persistence cookie was updated */ +#define TX_SCK_MASK 0x00000700 /* mask to get the set-cookie field */ +#define TX_SCK_SHIFT 8 /* bit shift */ + +#define TX_SCK_PRESENT 0x00000800 /* a cookie was found in the server's response */ + +/* cacheability management, bits values 0x1000 to 0x3000 (0-3 shift 12) */ +#define TX_CACHEABLE 0x00001000 /* at least part of the response is cacheable */ +#define TX_CACHE_COOK 0x00002000 /* a cookie in the response is cacheable */ +#define TX_CACHE_IGNORE 0x00004000 /* do not retrieve object from cache */ +#define TX_CACHE_SHIFT 12 /* bit shift */ + +#define TX_CON_WANT_TUN 0x00008000 /* Will be a tunnel (CONNECT or 101-Switching-Protocol) */ + +#define TX_CACHE_HAS_SEC_KEY 0x00010000 /* secondary key building succeeded */ + +#define TX_USE_PX_CONN 0x00020000 /* Use "Proxy-Connection" instead of "Connection" */ + +/* used only for keep-alive purposes, to indicate we're on a second transaction */ +#define TX_NOT_FIRST 0x00040000 /* the transaction is not the first one */ + +#define TX_L7_RETRY 0x000800000 /* The transaction may attempt L7 retries */ +#define TX_D_L7_RETRY 0x001000000 /* Disable L7 retries on this transaction, even if configured to do it */ + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG and __APPEND_ENUM macros. The new end of the buffer is + * returned. + */ +static forceinline char *txn_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) +#define _e(m, e, ...) __APPEND_ENUM(buf, len, delim, flg, m, e, #e, __VA_ARGS__) + /* prologue */ + _(0); + /* flags & enums */ + _(TX_SCK_PRESENT, _(TX_CACHEABLE, _(TX_CACHE_COOK, _(TX_CACHE_IGNORE, + _(TX_CON_WANT_TUN, _(TX_CACHE_HAS_SEC_KEY, _(TX_USE_PX_CONN, + _(TX_NOT_FIRST, _(TX_L7_RETRY, _(TX_D_L7_RETRY)))))))))); + + _e(TX_SCK_MASK, TX_SCK_FOUND, _e(TX_SCK_MASK, TX_SCK_DELETED, + _e(TX_SCK_MASK, TX_SCK_INSERTED, _e(TX_SCK_MASK, TX_SCK_REPLACED, + _e(TX_SCK_MASK, TX_SCK_UPDATED))))); + + _e(TX_CK_MASK, TX_CK_INVALID, _e(TX_CK_MASK, TX_CK_DOWN, + _e(TX_CK_MASK, TX_CK_VALID, _e(TX_CK_MASK, TX_CK_EXPIRED, + _e(TX_CK_MASK, TX_CK_OLD, _e(TX_CK_MASK, TX_CK_UNUSED)))))); + + _(TX_CONST_REPLY, _(TX_CLTARPIT)); + /* epilogue */ + _(~0U); + return buf; +#undef _e +#undef _ +} + + +/* + * HTTP message status flags (msg->flags). + * Please also update the txn_show_flags() function below in case of changes. + */ +#define HTTP_MSGF_CNT_LEN 0x00000001 /* content-length was found in the message */ +#define HTTP_MSGF_TE_CHNK 0x00000002 /* transfer-encoding: chunked was found */ + +/* if this flags is not set in either direction, we may be forced to complete a + * connection as a half-way tunnel (eg if no content-length appears in a 1.1 + * response, but the request is correctly sized) + */ +#define HTTP_MSGF_XFER_LEN 0x00000004 /* message xfer size can be determined */ +#define HTTP_MSGF_VER_11 0x00000008 /* the message is HTTP/1.1 or above */ + +#define HTTP_MSGF_SOFT_RW 0x00000010 /* soft header rewrites, no error triggered */ + +#define HTTP_MSGF_COMPRESSING 0x00000020 /* data compression is in progress */ + +#define HTTP_MSGF_BODYLESS 0x00000040 /* The message has no body (content-length = 0) */ +#define HTTP_MSGF_CONN_UPG 0x00000080 /* The message contains "Connection: Upgrade" header */ + +#define HTTP_MSGF_EXPECT_CHECKED 0x00000100 /* Expect header was already handled, if any */ + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *hmsg_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(HTTP_MSGF_CNT_LEN, _(HTTP_MSGF_TE_CHNK, _(HTTP_MSGF_XFER_LEN, + _(HTTP_MSGF_VER_11, _(HTTP_MSGF_SOFT_RW, _(HTTP_MSGF_COMPRESSING, + _(HTTP_MSGF_BODYLESS, _(HTTP_MSGF_CONN_UPG, _(HTTP_MSGF_EXPECT_CHECKED))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + + +/* Maximum length of the cache secondary key (sum of all the possible parts of + * the secondary key). The actual keys might be smaller for some + * request/response pairs, because they depend on the responses' optional Vary + * header. The different sizes can be found in the vary_information object (see + * cache.c).*/ +#define HTTP_CACHE_SEC_KEY_LEN (sizeof(uint32_t)+sizeof(uint64_t)+sizeof(uint64_t)) + + +/* Redirect flags */ +enum { + REDIRECT_FLAG_NONE = 0, + REDIRECT_FLAG_DROP_QS = 1, /* drop query string */ + REDIRECT_FLAG_APPEND_SLASH = 2, /* append a slash if missing at the end */ + REDIRECT_FLAG_FROM_REQ = 4, /* redirect rule on the request path */ + REDIRECT_FLAG_IGNORE_EMPTY = 8, /* silently ignore empty location expressions */ +}; + +/* Redirect types (location, prefix, extended ) */ +enum { + REDIRECT_TYPE_NONE = 0, /* no redirection */ + REDIRECT_TYPE_LOCATION, /* location redirect */ + REDIRECT_TYPE_PREFIX, /* prefix redirect */ + REDIRECT_TYPE_SCHEME, /* scheme redirect (eg: switch from http to https) */ +}; + +/* Persist types (force-persist, ignore-persist) */ +enum { + PERSIST_TYPE_NONE = 0, /* no persistence */ + PERSIST_TYPE_FORCE, /* force-persist */ + PERSIST_TYPE_IGNORE, /* ignore-persist */ +}; + +/* final results for http-request rules */ +enum rule_result { + HTTP_RULE_RES_CONT = 0, /* nothing special, continue rules evaluation */ + HTTP_RULE_RES_YIELD, /* call me later because some data is missing. */ + HTTP_RULE_RES_STOP, /* stopped processing on an accept */ + HTTP_RULE_RES_DENY, /* deny (or tarpit if TX_CLTARPIT) */ + HTTP_RULE_RES_ABRT, /* abort request, msg already sent (eg: auth) */ + HTTP_RULE_RES_DONE, /* processing done, stop processing (eg: redirect) */ + HTTP_RULE_RES_BADREQ, /* bad request */ + HTTP_RULE_RES_ERROR, /* Internal error */ +}; + +/* Legacy version of the HTTP/1 message state, used by the channels, should + * ultimately be removed. + */ +enum h1_state { + HTTP_MSG_RQBEFORE = 0, // request: leading LF, before start line + HTTP_MSG_RPBEFORE = 1, // response: leading LF, before start line + + /* Body processing. + * The state HTTP_MSG_BODY is a delimiter to know if we're waiting for headers + * or body. All the sub-states below also indicate we're processing the body, + * with some additional information. + */ + HTTP_MSG_BODY = 2, // parsing body at end of headers + HTTP_MSG_DATA = 3, // skipping data chunk / content-length data + /* we enter this state when we've received the end of the current message */ + HTTP_MSG_ENDING = 4, // message end received, wait that the filters end too + HTTP_MSG_DONE = 5, // message end received, waiting for resync or close + HTTP_MSG_CLOSING = 6, // shutdown_w done, not all bytes sent yet + HTTP_MSG_CLOSED = 7, // shutdown_w done, all bytes sent + HTTP_MSG_TUNNEL = 8, // tunneled data after DONE +} __attribute__((packed)); + + +/* This is the state of an HTTP seen from the analyzers point of view. It can be + * either a request message or a response message. + */ +struct http_msg { + enum h1_state msg_state; /* where we are in the current message parsing */ + /* 3 bytes unused here */ + unsigned int flags; /* flags describing the message (HTTP version, ...) */ + struct channel *chn; /* pointer to the channel transporting the message */ +}; + + +/* This is an HTTP transaction. It contains both a request message and a + * response message (which can be empty). + */ +struct http_txn { + struct http_msg rsp; /* HTTP response message */ + struct http_msg req; /* HTTP request message */ + unsigned int flags; /* transaction flags */ + enum http_meth_t meth; /* HTTP method */ + /* 1 unused byte here */ + short status; /* HTTP status sent to the client, negative if not set */ + short server_status; /* HTTP status received from the server, negative if not received */ + struct http_reply *http_reply; /* The HTTP reply to use as reply */ + struct buffer l7_buffer; /* To store the data, in case we have to retry */ + char cache_hash[20]; /* Store the cache hash */ + char cache_secondary_hash[HTTP_CACHE_SEC_KEY_LEN]; /* Optional cache secondary key. */ + char *uri; /* first line if log needed, NULL otherwise */ + char *cli_cookie; /* cookie presented by the client, in capture mode */ + char *srv_cookie; /* cookie presented by the server, in capture mode */ + int cookie_first_date; /* if non-zero, first date the expirable cookie was set/seen */ + int cookie_last_date; /* if non-zero, last date the expirable cookie was set/seen */ + + struct http_auth_data auth; /* HTTP auth data */ +}; + +#endif /* _HAPROXY_PROTO_HTTP_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/http_ana.h b/include/haproxy/http_ana.h new file mode 100644 index 0000000..2cc6516 --- /dev/null +++ b/include/haproxy/http_ana.h @@ -0,0 +1,91 @@ +/* + * include/haproxy/http_ana.h + * This file contains HTTP protocol definitions. + * + * Copyright (C) 2000-2011 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PROTO_HTTP_H +#define _HAPROXY_PROTO_HTTP_H + +#include <haproxy/api.h> +#include <haproxy/channel-t.h> +#include <haproxy/http_ana-t.h> +#include <haproxy/htx-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/stream-t.h> + +extern struct pool_head *pool_head_uniqueid; +extern struct pool_head *pool_head_http_txn; + +int http_wait_for_request(struct stream *s, struct channel *req, int an_bit); +int http_process_req_common(struct stream *s, struct channel *req, int an_bit, struct proxy *px); +int http_process_request(struct stream *s, struct channel *req, int an_bit); +int http_process_tarpit(struct stream *s, struct channel *req, int an_bit); +int http_wait_for_request_body(struct stream *s, struct channel *req, int an_bit); +int http_wait_for_response(struct stream *s, struct channel *rep, int an_bit); +int http_process_res_common(struct stream *s, struct channel *rep, int an_bit, struct proxy *px); +int http_request_forward_body(struct stream *s, struct channel *req, int an_bit); +int http_response_forward_body(struct stream *s, struct channel *res, int an_bit); +int http_apply_redirect_rule(struct redirect_rule *rule, struct stream *s, struct http_txn *txn); +int http_eval_after_res_rules(struct stream *s); +int http_replace_hdrs(struct stream* s, struct htx *htx, struct ist name, const char *str, struct my_regex *re, int full); +int http_req_replace_stline(int action, const char *replace, int len, + struct proxy *px, struct stream *s); +int http_res_set_status(unsigned int status, struct ist reason, struct stream *s); +void http_check_request_for_cacheability(struct stream *s, struct channel *req); +void http_check_response_for_cacheability(struct stream *s, struct channel *res); +enum rule_result http_wait_for_msg_body(struct stream *s, struct channel *chn, unsigned int time, unsigned int bytes); +void http_perform_server_redirect(struct stream *s, struct stconn *sc); +void http_server_error(struct stream *s, struct stconn *sc, int err, int finst, struct http_reply *msg); +void http_reply_and_close(struct stream *s, short status, struct http_reply *msg); +void http_return_srv_error(struct stream *s, struct stconn *sc); +struct http_reply *http_error_message(struct stream *s); +int http_reply_to_htx(struct stream *s, struct htx *htx, struct http_reply *reply); +int http_reply_message(struct stream *s, struct http_reply *reply); +int http_forward_proxy_resp(struct stream *s, int final); + +struct http_txn *http_create_txn(struct stream *s); +void http_destroy_txn(struct stream *s); + +void http_set_term_flags(struct stream *s); + +/* for debugging, reports the HTTP/1 message state name (legacy version) */ +static inline const char *h1_msg_state_str(enum h1_state msg_state) +{ + switch (msg_state) { + case HTTP_MSG_RQBEFORE: return "MSG_RQBEFORE"; + case HTTP_MSG_RPBEFORE: return "MSG_RPBEFORE"; + case HTTP_MSG_BODY: return "MSG_BODY"; + case HTTP_MSG_DATA: return "MSG_DATA"; + case HTTP_MSG_ENDING: return "MSG_ENDING"; + case HTTP_MSG_DONE: return "MSG_DONE"; + case HTTP_MSG_CLOSING: return "MSG_CLOSING"; + case HTTP_MSG_CLOSED: return "MSG_CLOSED"; + case HTTP_MSG_TUNNEL: return "MSG_TUNNEL"; + default: return "MSG_??????"; + } +} + +#endif /* _HAPROXY_PROTO_HTTP_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/http_client-t.h b/include/haproxy/http_client-t.h new file mode 100644 index 0000000..7ae0e61 --- /dev/null +++ b/include/haproxy/http_client-t.h @@ -0,0 +1,69 @@ +#ifndef _HAPROXY_HTTPCLIENT_T_H +#define _HAPROXY_HTTPCLIENT_T_H + +#include <haproxy/http-t.h> + +struct httpclient { + struct { + struct ist url; /* URL of the request */ + enum http_meth_t meth; /* method of the request */ + struct buffer buf; /* output buffer, HTX */ + } req; + struct { + struct ist vsn; + uint16_t status; + struct ist reason; + struct http_hdr *hdrs; /* headers */ + struct buffer buf; /* input buffer, raw HTTP */ + } res; + struct { + /* callbacks used to send the request, */ + void (*req_payload)(struct httpclient *hc); /* send a payload */ + + /* callbacks used to receive the response, if not set, the IO + * handler will consume the data without doing anything */ + void (*res_stline)(struct httpclient *hc); /* start line received */ + void (*res_headers)(struct httpclient *hc); /* headers received */ + void (*res_payload)(struct httpclient *hc); /* payload received */ + void (*res_end)(struct httpclient *hc); /* end of the response */ + } ops; + struct sockaddr_storage *dst; /* destination address */ + struct appctx *appctx; /* HTTPclient appctx */ + int timeout_server; /* server timeout in ms */ + void *caller; /* ptr of the caller */ + unsigned int flags; /* other flags */ + struct proxy *px; /* proxy for special cases */ + struct server *srv_raw; /* server for clear connections */ +#ifdef USE_OPENSSL + struct server *srv_ssl; /* server for SSL connections */ +#endif +}; + +/* Action (FA) to do */ +#define HTTPCLIENT_FA_STOP 0x00000001 /* stops the httpclient at the next IO handler call */ +#define HTTPCLIENT_FA_AUTOKILL 0x00000002 /* sets the applet to destroy the httpclient struct itself */ + +/* status (FS) */ +#define HTTPCLIENT_FS_STARTED 0x00010000 /* the httpclient was started */ +#define HTTPCLIENT_FS_ENDED 0x00020000 /* the httpclient is stopped */ + +/* States of the HTTP Client Appctx */ +enum { + HTTPCLIENT_S_REQ = 0, + HTTPCLIENT_S_REQ_BODY, + HTTPCLIENT_S_RES_STLINE, + HTTPCLIENT_S_RES_HDR, + HTTPCLIENT_S_RES_BODY, + HTTPCLIENT_S_RES_END, +}; + +#define HTTPCLIENT_USERAGENT "HAProxy" + +/* What kind of data we need to read */ +#define HC_F_RES_STLINE 0x01 +#define HC_F_RES_HDR 0x02 +#define HC_F_RES_BODY 0x04 +#define HC_F_RES_END 0x08 + + +#endif /* ! _HAPROXY_HTTCLIENT__T_H */ diff --git a/include/haproxy/http_client.h b/include/haproxy/http_client.h new file mode 100644 index 0000000..241ca24 --- /dev/null +++ b/include/haproxy/http_client.h @@ -0,0 +1,40 @@ +#ifndef _HAPROXY_HTTPCLIENT_H +#define _HAPROXY_HTTPCLIENT_H + +#include <haproxy/http_client-t.h> + +void httpclient_destroy(struct httpclient *hc); +void httpclient_stop_and_destroy(struct httpclient *hc); + +struct proxy *httpclient_create_proxy(const char *id); +struct httpclient *httpclient_new(void *caller, enum http_meth_t meth, struct ist url); +struct httpclient *httpclient_new_from_proxy(struct proxy *px, void *caller, enum http_meth_t meth, struct ist url); +int httpclient_set_proxy(struct httpclient *hc, struct proxy *px); + +struct appctx *httpclient_start(struct httpclient *hc); +int httpclient_set_dst(struct httpclient *hc, const char *dst); +void httpclient_set_timeout(struct httpclient *hc, int timeout); +int httpclient_res_xfer(struct httpclient *hc, struct buffer *dst); +int httpclient_req_gen(struct httpclient *hc, const struct ist url, enum http_meth_t meth, const struct http_hdr *hdrs, const struct ist payload); +int httpclient_req_xfer(struct httpclient *hc, struct ist src, int end); + +/* Return the amount of data available in the httpclient response buffer */ +static inline int httpclient_data(struct httpclient *hc) +{ + return b_data(&hc->res.buf); +} + +/* Return 1 if the httpclient ended and won't receive any new data */ +static inline int httpclient_ended(struct httpclient *hc) +{ + return !!(hc->flags & HTTPCLIENT_FS_ENDED); +} + +/* Return 1 if the httpclient started */ +static inline int httpclient_started(struct httpclient *hc) +{ + + return !!(hc->flags & HTTPCLIENT_FS_STARTED); +} + +#endif /* ! _HAPROXY_HTTCLIENT_H */ diff --git a/include/haproxy/http_ext-t.h b/include/haproxy/http_ext-t.h new file mode 100644 index 0000000..68eb047 --- /dev/null +++ b/include/haproxy/http_ext-t.h @@ -0,0 +1,149 @@ +/* + * include/haproxy/http_ext-t.h + * Version-agnostic and implementation-agnostic HTTP extensions definitions + * + * Copyright 2022 HAProxy Technologies + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HTTPEXT_T_H +#define _HAPROXY_HTTPEXT_T_H + +#include <arpa/inet.h> +#include <import/ist.h> +#include <haproxy/tools-t.h> + +enum forwarded_header_attribute_type { + FORWARDED_HEADER_UNK = 0, + FORWARDED_HEADER_OBFS = 1, + FORWARDED_HEADER_PORT = 2, + FORWARDED_HEADER_IP = 3, +}; + +struct forwarded_header_nodename { + union { + struct sockaddr_storage ip; + struct ist obfs; + }; + enum forwarded_header_attribute_type type; +}; + +struct forwarded_header_nodeport { + union { + uint16_t port; + struct ist obfs; + }; + enum forwarded_header_attribute_type type; +}; + +struct forwarded_header_node { + struct forwarded_header_nodename nodename; + struct forwarded_header_nodeport nodeport; + struct ist raw; +}; + +enum forwarded_header_proto { + FORWARDED_HEADER_HTTP = 1, + FORWARDED_HEADER_HTTPS = 2 +}; + +struct forwarded_header_ctx { + struct forwarded_header_node nfor; + struct forwarded_header_node nby; + struct ist host; + enum forwarded_header_proto proto; +}; + +enum http_ext_7239_forby_mode { + HTTP_7239_FORBY_ORIG = 1, + HTTP_7239_FORBY_SMP = 2 +}; +struct http_ext_7239_forby { + /* nn = nodename, np = nodeport */ + union { + char *nn_expr_s; + struct sample_expr *nn_expr; + }; + union { + char *np_expr_s; + struct sample_expr *np_expr; + }; + enum http_ext_7239_forby_mode nn_mode; + enum http_ext_7239_forby_mode np_mode; +}; + +enum http_ext_7239_host_mode { + HTTP_7239_HOST_ORIG = 1, + HTTP_7239_HOST_SMP = 2 +}; +struct http_ext_7239_host { + union { + char *expr_s; + struct sample_expr *expr; + }; + enum http_ext_7239_host_mode mode; +}; + +struct http_ext_7239 { + /* forwarded header parameters options */ + struct http_ext_7239_forby p_for; + struct http_ext_7239_forby p_by; + struct http_ext_7239_host p_host; + uint8_t p_proto; + /* config error hints, used only during configuration parsing */ + char *c_file; + int c_line; + int c_mode; /* 0: parsed, 1: compiled */ +}; + +enum forwarded_header_field { + FORWARDED_HEADER_FOR = 0x01, + FORWARDED_HEADER_BY = 0x02, + FORWARDED_HEADER_HOST = 0x04, + FORWARDED_HEADER_PROTO = 0x08, + FORWARDED_HEADER_ALL = FORWARDED_HEADER_FOR|FORWARDED_HEADER_BY|FORWARDED_HEADER_HOST|FORWARDED_HEADER_PROTO +}; + +enum http_ext_xff_mode { + HTTP_XFF_IFNONE = 0, /* set if not already set */ + HTTP_XFF_ALWAYS = 1 /* always set x-forwarded-for */ +}; +struct http_ext_xff { + struct ist hdr_name; /* header to use - default: "x-forwarded-for" */ + struct net_addr except_net; /* don't forward x-forward-for for this address. */ + uint8_t mode; +}; + +struct http_ext_xot { + struct ist hdr_name; /* header to use - default: "x-original-to" */ + struct net_addr except_net; /* don't forward x-original-to for this address. */ +}; + +/* http_ext options */ +struct http_ext { + /* forwarded header (RFC 7239) */ + struct http_ext_7239 *fwd; + /* x-forward-for: + * conditionally insert x-forwarded-for with client address + */ + struct http_ext_xff *xff; + /* x-original-to: + * insert x-original-to with destination address + */ + struct http_ext_xot *xot; +}; + +#endif /* !_HAPROXY_HTTPEXT_T_H */ diff --git a/include/haproxy/http_ext.h b/include/haproxy/http_ext.h new file mode 100644 index 0000000..53764a2 --- /dev/null +++ b/include/haproxy/http_ext.h @@ -0,0 +1,58 @@ +/* + * include/haproxy/http_ext.h + * Functions for Version-agnostic and implementation-agnostic HTTP extensions + * + * Copyright 2022 HAProxy Technologies + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HTTPEXT_H +#define _HAPROXY_HTTPEXT_H + +#include <haproxy/http_ext-t.h> +#include <haproxy/channel-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/stream-t.h> + +int http_validate_7239_header(struct ist hdr, int required_steps, struct forwarded_header_ctx *ctx); + +int http_handle_7239_header(struct stream *s, struct channel *req); +int http_handle_xff_header(struct stream *s, struct channel *req); +int http_handle_xot_header(struct stream *s, struct channel *req); + +int proxy_http_parse_7239(char **args, int cur_arg, struct proxy *curproxy, const struct proxy *defpx, const char *file, int linenum); +int proxy_http_compile_7239(struct proxy *curproxy); +int proxy_http_parse_xff(char **args, int cur_arg, struct proxy *curproxy, const struct proxy *defpx, const char *file, int linenum); +int proxy_http_parse_xot(char **args, int cur_arg, struct proxy *curproxy, const struct proxy *defpx, const char *file, int linenum); + +int http_ext_7239_prepare(struct proxy *cur); +int http_ext_xff_prepare(struct proxy *cur); +int http_ext_xot_prepare(struct proxy *cur); + +void http_ext_7239_dup(const struct proxy *def, struct proxy *cpy); +void http_ext_xff_dup(const struct proxy *def, struct proxy *cpy); +void http_ext_xot_dup(const struct proxy *def, struct proxy *cpy); + +void http_ext_7239_clean(struct proxy *cur); +void http_ext_xff_clean(struct proxy *cur); +void http_ext_xot_clean(struct proxy *cur); + +int http_ext_prepare(struct proxy *cur); +void http_ext_dup(const struct proxy *def, struct proxy *cpy); +void http_ext_clean(struct proxy *cur); +void http_ext_softclean(struct proxy *cur); + +#endif /* !_HAPROXY_HTTPEXT_H */ diff --git a/include/haproxy/http_fetch.h b/include/haproxy/http_fetch.h new file mode 100644 index 0000000..7997629 --- /dev/null +++ b/include/haproxy/http_fetch.h @@ -0,0 +1,41 @@ +/* + * include/haproxy/http_fetch.h + * This file contains the minimally required http sample fetch declarations. + * + * Copyright (C) 2000-2018 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HTTP_FETCH_H +#define _HAPROXY_HTTP_FETCH_H + +#include <haproxy/api.h> +#include <haproxy/arg-t.h> +#include <haproxy/channel-t.h> +#include <haproxy/check-t.h> +#include <haproxy/sample-t.h> + +struct htx *smp_prefetch_htx(struct sample *smp, struct channel *chn, struct check *check, int vol); +int val_hdr(struct arg *arg, char **err_msg); + +#endif /* _HAPROXY_HTTP_FETCH_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/http_htx-t.h b/include/haproxy/http_htx-t.h new file mode 100644 index 0000000..8051925 --- /dev/null +++ b/include/haproxy/http_htx-t.h @@ -0,0 +1,95 @@ +/* + * include/haproxy/http_htx-t.h + * This file defines everything related to HTTP manipulation using the internal + * representation. + * + * Copyright (C) 2018 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HTTP_HTX_T_H +#define _HAPROXY_HTTP_HTX_T_H + +#include <import/ebistree.h> +#include <import/ist.h> + +#include <haproxy/buf-t.h> +#include <haproxy/http-t.h> +#include <haproxy/htx-t.h> + +/* Context used to find/remove an HTTP header. */ +struct http_hdr_ctx { + struct htx_blk *blk; + struct ist value; + uint16_t lws_before; + uint16_t lws_after; +}; + + +/* Structure used to build the header list of an HTTP reply */ +struct http_reply_hdr { + struct ist name; /* the header name */ + struct list value; /* the log-format string value */ + struct list list; /* header chained list */ +}; + +#define HTTP_REPLY_EMPTY 0x00 /* the reply has no payload */ +#define HTTP_REPLY_ERRMSG 0x01 /* the reply is an error message (may be NULL) */ +#define HTTP_REPLY_ERRFILES 0x02 /* the reply references an http-errors section */ +#define HTTP_REPLY_RAW 0x03 /* the reply use a raw payload */ +#define HTTP_REPLY_LOGFMT 0x04 /* the reply use a log-format payload */ +#define HTTP_REPLY_INDIRECT 0x05 /* the reply references another http-reply (may be NULL) */ + +/* Uses by HAProxy to generate internal responses */ +struct http_reply { + unsigned char type; /* HTTP_REPLY_* */ + int status; /* The response status code */ + char *ctype; /* The response content-type, may be NULL */ + struct list hdrs; /* A list of http_reply_hdr */ + union { + struct list fmt; /* A log-format string (type = HTTP_REPLY_LOGFMT) */ + struct buffer obj; /* A raw string (type = HTTP_REPLY_RAW) */ + struct buffer *errmsg; /* The error message to use as response (type = HTTP_REPLY_ERRMSG). + * may be NULL, if so rely on the proxy error messages */ + struct http_reply *reply; /* The HTTP reply to use as response (type = HTTP_REPLY_INDIRECT) */ + char *http_errors; /* The http-errors section to use (type = HTTP_REPLY_ERRFILES). + * Should be resolved during post-check */ + } body; + struct list list; /* next http_reply in the global list. + * Only used for replies defined in a proxy section */ +}; + +/* A custom HTTP error message load from a row file and converted in HTX. The + * node key is the file path. + */ +struct http_error_msg { + struct buffer msg; + struct ebpt_node node; +}; + +/* http-errors section and parameters. */ +struct http_errors { + char *id; /* unique identifier */ + struct { + char *file; /* file where the section appears */ + int line; /* line where the section appears */ + } conf; /* config information */ + + struct http_reply *replies[HTTP_ERR_SIZE]; /* HTTP replies for known errors */ + struct list list; /* http-errors list */ +}; + +#endif /* _HAPROXY_HTTP_HTX_T_H */ diff --git a/include/haproxy/http_htx.h b/include/haproxy/http_htx.h new file mode 100644 index 0000000..3d01a06 --- /dev/null +++ b/include/haproxy/http_htx.h @@ -0,0 +1,84 @@ +/* + * include/haproxy/http_htx-t.h + * This file defines function prototypes for HTTP manipulation using the + * internal representation. + * + * Copyright (C) 2018 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HTTP_HTX_H +#define _HAPROXY_HTTP_HTX_H + +#include <import/ist.h> +#include <haproxy/buf-t.h> +#include <haproxy/http-hdr-t.h> +#include <haproxy/http_htx-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/regex-t.h> + +extern struct buffer http_err_chunks[HTTP_ERR_SIZE]; +extern struct http_reply http_err_replies[HTTP_ERR_SIZE]; +extern struct list http_errors_list; + +struct htx_sl *http_get_stline(const struct htx *htx); +size_t http_get_hdrs_size(struct htx *htx); +int http_find_header(const struct htx *htx, const struct ist name, struct http_hdr_ctx *ctx, int full); +int http_find_str_header(const struct htx *htx, const struct ist name, struct http_hdr_ctx *ctx, int full); +int http_find_pfx_header(const struct htx *htx, const struct ist prefix, struct http_hdr_ctx *ctx, int full); +int http_find_sfx_header(const struct htx *htx, const struct ist suffix, struct http_hdr_ctx *ctx, int full); +int http_find_sub_header(const struct htx *htx, const struct ist sub, struct http_hdr_ctx *ctx, int full); +int http_match_header(const struct htx *htx, const struct my_regex *re, struct http_hdr_ctx *ctx, int full); +int http_add_header(struct htx *htx, const struct ist n, const struct ist v); +int http_replace_stline(struct htx *htx, const struct ist p1, const struct ist p2, const struct ist p3); +int http_replace_req_meth(struct htx *htx, const struct ist meth); +int http_replace_req_uri(struct htx *htx, const struct ist uri); +int http_replace_req_path(struct htx *htx, const struct ist path, int with_qs); +int http_replace_req_query(struct htx *htx, const struct ist query); +int http_replace_res_status(struct htx *htx, const struct ist status, const struct ist reason); +int http_replace_res_reason(struct htx *htx, const struct ist reason); +int http_append_header_value(struct htx *htx, struct http_hdr_ctx *ctx, const struct ist data); +int http_prepend_header_value(struct htx *htx, struct http_hdr_ctx *ctx, const struct ist data); +int http_replace_header_value(struct htx *htx, struct http_hdr_ctx *ctx, const struct ist data); +int http_replace_header(struct htx *htx, struct http_hdr_ctx *ctx, const struct ist name, const struct ist value); +int http_remove_header(struct htx *htx, struct http_hdr_ctx *ctx); +int http_update_authority(struct htx *htx, struct htx_sl *sl, const struct ist host); +int http_update_host(struct htx *htx, struct htx_sl *sl, const struct ist uri); + +unsigned int http_get_htx_hdr(const struct htx *htx, const struct ist hdr, + int occ, struct http_hdr_ctx *ctx, char **vptr, size_t *vlen); +unsigned int http_get_htx_fhdr(const struct htx *htx, const struct ist hdr, + int occ, struct http_hdr_ctx *ctx, char **vptr, size_t *vlen); +int http_str_to_htx(struct buffer *buf, struct ist raw, char **errmsg); + +void release_http_reply(struct http_reply *http_reply); +int http_check_http_reply(struct http_reply *reply, struct proxy*px, char **errmsg); +struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struct proxy *px, + int default_status, char **errmsg); + +int http_scheme_based_normalize(struct htx *htx); + +void http_cookie_register(struct http_hdr *list, int idx, int *first, int *last); +int http_cookie_merge(struct htx *htx, struct http_hdr *list, int first); + +struct buffer *http_load_errorfile(const char *file, char **errmsg); +struct buffer *http_load_errormsg(const char *key, const struct ist msg, char **errmsg); +struct buffer *http_parse_errorfile(int status, const char *file, char **errmsg); +struct buffer *http_parse_errorloc(int errloc, int status, const char *url, char **errmsg); +int proxy_dup_default_conf_errors(struct proxy *curpx, const struct proxy *defpx, char **errmsg); +void proxy_release_conf_errors(struct proxy *px); + +#endif /* _HAPROXY_HTTP_HTX_H */ diff --git a/include/haproxy/http_rules.h b/include/haproxy/http_rules.h new file mode 100644 index 0000000..740b546 --- /dev/null +++ b/include/haproxy/http_rules.h @@ -0,0 +1,56 @@ +/* + * include/haproxy/http_rules.h + * This file contains "http" rules definitions + * + * Copyright (C) 2000-2018 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HTTP_RULES_H +#define _HAPROXY_HTTP_RULES_H + +#include <haproxy/action-t.h> +#include <haproxy/api.h> +#include <haproxy/list.h> +#include <haproxy/proxy-t.h> + +extern struct action_kw_list http_req_keywords; +extern struct action_kw_list http_res_keywords; +extern struct action_kw_list http_after_res_keywords; + +struct act_rule *parse_http_req_cond(const char **args, const char *file, int linenum, struct proxy *proxy); +struct act_rule *parse_http_res_cond(const char **args, const char *file, int linenum, struct proxy *proxy); +struct act_rule *parse_http_after_res_cond(const char **args, const char *file, int linenum, struct proxy *proxy); +void http_free_redirect_rule(struct redirect_rule *rdr); +struct redirect_rule *http_parse_redirect_rule(const char *file, int linenum, struct proxy *curproxy, + const char **args, char **errmsg, int use_fmt, int dir); + +void http_req_keywords_register(struct action_kw_list *kw_list); +void http_res_keywords_register(struct action_kw_list *kw_list); +void http_after_res_keywords_register(struct action_kw_list *kw_list); + +struct action_kw *action_http_req_custom(const char *kw); +struct action_kw *action_http_res_custom(const char *kw); +struct action_kw *action_http_after_res_custom(const char *kw); + +#endif /* _HAPROXY_HTTP_RULES_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/htx-t.h b/include/haproxy/htx-t.h new file mode 100644 index 0000000..2ea6bc8 --- /dev/null +++ b/include/haproxy/htx-t.h @@ -0,0 +1,277 @@ +/* + * include/haproxy/htx-t.h + * This file declares the types and constants used the internal HTTP messages + * + * Copyright (C) 2018 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HTX_T_H +#define _HAPROXY_HTX_T_H + +#include <haproxy/api.h> +#include <haproxy/http-t.h> +#include <haproxy/show_flags-t.h> + +/* + * The internal representation of an HTTP message, called HTX, is a structure + * with useful information on the message followed by a contiguous array + * containing parts of the message, called blocks. A block is composed of + * metadata (htx_blk) and the associated payload. Blocks' metadata are stored + * starting from the end of the array while their payload are stored at the + * beginning. Blocks' metadata are often simply called blocks. it is a misuse of + * language that's simplify explanations. + * + * + * +-----+---------------+------------------------------+--------------+ + * | HTX | PAYLOADS ==> | | <== HTX_BLKs | + * +-----+---------------+------------------------------+--------------+ + * ^ + * blocks[] (the beginning of the bocks array) + * + * + * The blocks part remains linear and sorted. You may think about it as an array + * with negative indexes. But, instead of using negative indexes, we use + * positive positions to identify a block. This position is then converted to a + * address relatively to the beginning of the blocks array. + * + * + * .....--+------------------------------+-----+-----+ + * | ... | BLK | BLK | + * .....--+------------------------------+-----+-----+ + * ^ ^ + * Addr of the block Addr of the block + * at the position 1 at the position 0 + * + * + * The payloads part is a raw space that may wrap. You never access to a block's + * payload directly. Instead you get a block to retrieve the address of its + * payload. When no more space left between blocks and payloads parts, the free + * space at the beginning, if any, is used. + * + * + * +----------- WRAPPING ------------------------+ + * | | + * V | + * +-----+-------------+---------------+---------------++--------------+ + * | HTX | PAYLOAD ==> | | PAYLOADS ==X || X== HTX_BLKs | + * +-----+-------------+---------------+---------------++--------------+ + * + * + * The blocks part, on its side, never wrap. If we have no space to allocate a + * new block and if there is a hole at the beginning of the blocks part (so at + * the end of the blocks array), we move back all blocks.x + * + * + * ...+--------------+----------+ blocks ...+----------+--------------+ + * | X== HTX_BLKS | | defrag | | <== HTX_BLKS | + * ...+--------------+----------+ =====> ...+----------+--------------+ + * + * + * At the end, if payload wrapping or blocks defragmentation is not enough, some + * free space may be get back with a full defragmentation. This way, the holes in + * the middle are not reusable but count in the available free space. The only + * way to reuse this lost space is to fully defragmenate the HTX message. + * + * - * - + * + * An HTX block is as well a header as a body part or a trailer. For all these + * types of block, a payload is attached to the block. It can also be a mark, + * like the end-of-headers or end-of-trailers. For these blocks, there is no + * payload but it count for a byte. It is important to not skip it when data are + * forwarded. Metadata of an HTX block are composed of 2 fields : + * + * - .info : It a 32 bits field containing the block's type on 4 bits + * followed by the payload length. See below for details. + * + * - .addr : The payload's address, if any, relatively to the beginning the + * array used to store the HTX message itself. + * + * htx_blk.info representation : + * + * 0b 0000 0000 0000 0000 0000 0000 0000 0000 + * ---- ------------------------ --------- + * type value (1 MB max) name length (header/trailer) + * ---------------------------------- + * data length (256 MB max) + * (body, method, path, version, status, reason) + * + * types : + * - 0000 = request start-line + * - 0001 = response start-line + * - 0010 = header + * - 0011 = pseudo-header ou "special" header + * - 0100 = end-of-headers + * - 0101 = data + * - 0110 = trailer + * - 0111 = end-of-trailers + * ... + * - 1111 = unused + * + */ + +/* HTX start-line flags. + * Please also update the se_show_flags() function below in case of changes. + */ +#define HTX_SL_F_NONE 0x00000000 +#define HTX_SL_F_IS_RESP 0x00000001 /* It is the response start-line (unset means the request one) */ +#define HTX_SL_F_XFER_LEN 0x00000002 /* The message xfer size can be dertermined */ +#define HTX_SL_F_XFER_ENC 0x00000004 /* The transfer-encoding header was found in message */ +#define HTX_SL_F_CLEN 0x00000008 /* The content-length header was found in message */ +#define HTX_SL_F_CHNK 0x00000010 /* The message payload is chunked */ +#define HTX_SL_F_VER_11 0x00000020 /* The message indicates version 1.1 or above */ +#define HTX_SL_F_BODYLESS 0x00000040 /* The message has no body (content-length = 0) */ +#define HTX_SL_F_HAS_SCHM 0x00000080 /* The scheme is explicitly specified */ +#define HTX_SL_F_SCHM_HTTP 0x00000100 /* The scheme HTTP should be used */ +#define HTX_SL_F_SCHM_HTTPS 0x00000200 /* The scheme HTTPS should be used */ +#define HTX_SL_F_HAS_AUTHORITY 0x00000400 /* The request authority is explicitly specified */ +#define HTX_SL_F_NORMALIZED_URI 0x00000800 /* The received URI is normalized (an implicit absolute-uri form) */ +#define HTX_SL_F_CONN_UPG 0x00001000 /* The message contains "connection: upgrade" header */ +#define HTX_SL_F_BODYLESS_RESP 0x00002000 /* The response to this message is bodyloess (only for reqyest) */ + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *hsl_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + + _(HTX_SL_F_IS_RESP, _(HTX_SL_F_XFER_LEN, _(HTX_SL_F_XFER_ENC, + _(HTX_SL_F_CLEN, _(HTX_SL_F_CHNK, _(HTX_SL_F_VER_11, + _(HTX_SL_F_BODYLESS, _(HTX_SL_F_HAS_SCHM, _(HTX_SL_F_SCHM_HTTP, + _(HTX_SL_F_SCHM_HTTPS, _(HTX_SL_F_HAS_AUTHORITY, + _(HTX_SL_F_NORMALIZED_URI, _(HTX_SL_F_CONN_UPG))))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + +/* Overhead induced by HTX on buffers during transfers. In addition to the size + * of the HTX structure itself, and meta data for one block, another block is + * accounted to favored zero-copy xfer. + */ +#define HTX_BUF_OVERHEAD (sizeof(struct htx) + 2 * sizeof(struct htx_blk)) + +/* HTX flags. + * Please also update the htx_show_flags() function below in case of changes. + */ +#define HTX_FL_NONE 0x00000000 +#define HTX_FL_PARSING_ERROR 0x00000001 /* Set when a parsing error occurred */ +#define HTX_FL_PROCESSING_ERROR 0x00000002 /* Set when a processing error occurred */ +#define HTX_FL_FRAGMENTED 0x00000004 /* Set when the HTX buffer is fragmented */ +#define HTX_FL_PROXY_RESP 0x00000008 /* Set when the response was generated by HAProxy */ +#define HTX_FL_EOM 0x00000010 /* Set when end-of-message is reached from the HTTP point of view + * (at worst, on the EOM block is missing) + */ +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *htx_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(HTX_FL_PARSING_ERROR, _(HTX_FL_PROCESSING_ERROR, + _(HTX_FL_FRAGMENTED, _(HTX_FL_PROXY_RESP, _(HTX_FL_EOM))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + + +/* HTX block's type (max 15). */ +enum htx_blk_type { + HTX_BLK_REQ_SL = 0, /* Request start-line */ + HTX_BLK_RES_SL = 1, /* Response start-line */ + HTX_BLK_HDR = 2, /* header name/value block */ + HTX_BLK_EOH = 3, /* end-of-headers block */ + HTX_BLK_DATA = 4, /* data block */ + HTX_BLK_TLR = 5, /* trailer name/value block */ + HTX_BLK_EOT = 6, /* end-of-trailers block */ + /* 7 .. 14 unused */ + HTX_BLK_UNUSED = 15, /* unused/removed block */ +}; + +/* One HTX block descriptor */ +struct htx_blk { + uint32_t addr; /* relative storage address of the block's payload */ + uint32_t info; /* information about the block (type, length) */ +}; + +/* Composite return value used by some HTX functions */ +struct htx_ret { + int32_t ret; /* A numerical value */ + struct htx_blk *blk; /* An HTX block */ +}; + +/* HTX start-line */ +struct htx_sl { + unsigned int flags; /* HTX_SL_F_* */ + union { + struct { + enum http_meth_t meth; /* method */ + } req; + struct { + uint16_t status; /* status code */ + } res; + } info; + + /* XXX 2 bytes unused */ + + unsigned int len[3]; /* length of different parts of the start-line */ + char l[VAR_ARRAY]; +}; + +/* Internal representation of an HTTP message */ +struct htx { + uint32_t size; /* the array size, in bytes, used to store the HTTP message itself */ + uint32_t data; /* the data size, in bytes. To known to total size used by all allocated + * blocks (blocks and their contents), you need to add size used by blocks, + * i.e. [ used * sizeof(struct htx_blk *) ] */ + + int32_t tail; /* newest inserted block. -1 if the HTX message is empty */ + int32_t head; /* oldest inserted block. -1 if the HTX message is empty */ + int32_t first; /* position of the first block to (re)start the analyse. -1 if unset. */ + + uint32_t tail_addr; /* start address of the free space in front of the the blocks table */ + uint32_t head_addr; /* start address of the free space at the beginning */ + uint32_t end_addr; /* end address of the free space at the beginning */ + + uint64_t extra; /* known bytes amount remaining to receive */ + uint32_t flags; /* HTX_FL_* */ + + /* XXX 4 bytes unused */ + + /* Blocks representing the HTTP message itself */ + char blocks[VAR_ARRAY] __attribute__((aligned(8))); +}; + +#endif /* _HAPROXY_HTX_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/htx.h b/include/haproxy/htx.h new file mode 100644 index 0000000..c991c81 --- /dev/null +++ b/include/haproxy/htx.h @@ -0,0 +1,885 @@ +/* + * include/haproxy/htx.h + * This file defines everything related to the internal HTTP messages. + * + * Copyright (C) 2018 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_HTX_H +#define _HAPROXY_HTX_H + +#include <import/ist.h> +#include <haproxy/api.h> +#include <haproxy/buf.h> +#include <haproxy/chunk.h> +#include <haproxy/http-hdr-t.h> +#include <haproxy/http-t.h> +#include <haproxy/htx-t.h> + +/* ->extra field value when the payload length is unknown (non-chunked message + * with no "Content-length" header) + */ +#define HTX_UNKOWN_PAYLOAD_LENGTH ULLONG_MAX + +extern struct htx htx_empty; + +struct htx_blk *htx_defrag(struct htx *htx, struct htx_blk *blk, uint32_t info); +struct htx_blk *htx_add_blk(struct htx *htx, enum htx_blk_type type, uint32_t blksz); +struct htx_blk *htx_remove_blk(struct htx *htx, struct htx_blk *blk); +struct htx_ret htx_find_offset(struct htx *htx, uint32_t offset); +void htx_truncate(struct htx *htx, uint32_t offset); +struct htx_ret htx_drain(struct htx *htx, uint32_t max); + +struct htx_blk *htx_replace_blk_value(struct htx *htx, struct htx_blk *blk, + const struct ist old, const struct ist new); +struct htx_ret htx_xfer_blks(struct htx *dst, struct htx *src, uint32_t count, + enum htx_blk_type mark); + +struct htx_sl *htx_replace_stline(struct htx *htx, struct htx_blk *blk, const struct ist p1, + const struct ist p2, const struct ist p3); + +struct htx_blk *htx_replace_header(struct htx *htx, struct htx_blk *blk, + const struct ist name, const struct ist value); + +struct htx_ret htx_reserve_max_data(struct htx *htx); +struct htx_blk *htx_add_data_atonce(struct htx *htx, struct ist data); +size_t htx_add_data(struct htx *htx, const struct ist data); +struct htx_blk *htx_add_last_data(struct htx *htx, struct ist data); +void htx_move_blk_before(struct htx *htx, struct htx_blk **blk, struct htx_blk **ref); +int htx_append_msg(struct htx *dst, const struct htx *src); + +/* Functions and macros to get parts of the start-line or length of these + * parts. Request and response start-lines are both composed of 3 parts. + */ +#define HTX_SL_LEN(sl) ((sl)->len[0] + (sl)->len[1] + (sl)->len[2]) + +#define HTX_SL_P1_LEN(sl) ((sl)->len[0]) +#define HTX_SL_P2_LEN(sl) ((sl)->len[1]) +#define HTX_SL_P3_LEN(sl) ((sl)->len[2]) +#define HTX_SL_P1_PTR(sl) ((sl)->l) +#define HTX_SL_P2_PTR(sl) (HTX_SL_P1_PTR(sl) + HTX_SL_P1_LEN(sl)) +#define HTX_SL_P3_PTR(sl) (HTX_SL_P2_PTR(sl) + HTX_SL_P2_LEN(sl)) + +#define HTX_SL_REQ_MLEN(sl) HTX_SL_P1_LEN(sl) +#define HTX_SL_REQ_ULEN(sl) HTX_SL_P2_LEN(sl) +#define HTX_SL_REQ_VLEN(sl) HTX_SL_P3_LEN(sl) +#define HTX_SL_REQ_MPTR(sl) HTX_SL_P1_PTR(sl) +#define HTX_SL_REQ_UPTR(sl) HTX_SL_P2_PTR(sl) +#define HTX_SL_REQ_VPTR(sl) HTX_SL_P3_PTR(sl) + +#define HTX_SL_RES_VLEN(sl) HTX_SL_P1_LEN(sl) +#define HTX_SL_RES_CLEN(sl) HTX_SL_P2_LEN(sl) +#define HTX_SL_RES_RLEN(sl) HTX_SL_P3_LEN(sl) +#define HTX_SL_RES_VPTR(sl) HTX_SL_P1_PTR(sl) +#define HTX_SL_RES_CPTR(sl) HTX_SL_P2_PTR(sl) +#define HTX_SL_RES_RPTR(sl) HTX_SL_P3_PTR(sl) + +static inline struct ist htx_sl_p1(const struct htx_sl *sl) +{ + return ist2(HTX_SL_P1_PTR(sl), HTX_SL_P1_LEN(sl)); +} + +static inline struct ist htx_sl_p2(const struct htx_sl *sl) +{ + return ist2(HTX_SL_P2_PTR(sl), HTX_SL_P2_LEN(sl)); +} + +static inline struct ist htx_sl_p3(const struct htx_sl *sl) +{ + return ist2(HTX_SL_P3_PTR(sl), HTX_SL_P3_LEN(sl)); +} + +static inline struct ist htx_sl_req_meth(const struct htx_sl *sl) +{ + return htx_sl_p1(sl); +} + +static inline struct ist htx_sl_req_uri(const struct htx_sl *sl) +{ + return htx_sl_p2(sl); +} + +static inline struct ist htx_sl_req_vsn(const struct htx_sl *sl) +{ + return htx_sl_p3(sl); +} + + +static inline struct ist htx_sl_res_vsn(const struct htx_sl *sl) +{ + return htx_sl_p1(sl); +} + +static inline struct ist htx_sl_res_code(const struct htx_sl *sl) +{ + return htx_sl_p2(sl); +} + +static inline struct ist htx_sl_res_reason(const struct htx_sl *sl) +{ + return htx_sl_p3(sl); +} + +/* Converts a position to the corresponding relative address */ +static inline uint32_t htx_pos_to_addr(const struct htx *htx, uint32_t pos) +{ + return htx->size - (pos + 1) * sizeof(struct htx_blk); +} + +/* Returns the position of the block <blk>. It is the caller responsibility to + * be sure <blk> is part of <htx>. */ +static inline uint32_t htx_get_blk_pos(const struct htx *htx, const struct htx_blk *blk) +{ + return ((htx->blocks + htx->size - (char *)blk) / sizeof(struct htx_blk) - 1); +} + +/* Returns the block at the position <pos>. It is the caller responsibility to + * be sure the block at the position <pos> exists. */ +static inline struct htx_blk *htx_get_blk(const struct htx *htx, uint32_t pos) +{ + return (struct htx_blk *)(htx->blocks + htx_pos_to_addr(htx, pos)); +} + +/* Returns the type of the block <blk> */ +static inline enum htx_blk_type htx_get_blk_type(const struct htx_blk *blk) +{ + return (blk->info >> 28); +} + +/* Returns the size of the block <blk>, depending of its type */ +static inline uint32_t htx_get_blksz(const struct htx_blk *blk) +{ + enum htx_blk_type type = htx_get_blk_type(blk); + + switch (type) { + case HTX_BLK_HDR: + case HTX_BLK_TLR: + /* name.length + value.length */ + return ((blk->info & 0xff) + ((blk->info >> 8) & 0xfffff)); + default: + /* value.length */ + return (blk->info & 0xfffffff); + } +} + +/* Returns the position of the oldest entry (head). It returns a signed 32-bits + * integer, -1 means the HTX message is empty. + */ +static inline int32_t htx_get_head(const struct htx *htx) +{ + return htx->head; +} + +/* Returns the oldest HTX block (head) if the HTX message is not + * empty. Otherwise it returns NULL. + */ +static inline struct htx_blk *htx_get_head_blk(const struct htx *htx) +{ + int32_t head = htx_get_head(htx); + + return ((head == -1) ? NULL : htx_get_blk(htx, head)); +} + +/* same as above but unchecked, may only be used when certain that a block + * exists. + */ +static inline struct htx_blk *__htx_get_head_blk(const struct htx *htx) +{ + int32_t head = htx_get_head(htx); + + return htx_get_blk(htx, head); +} + +/* Returns the type of the oldest HTX block (head) if the HTX message is not + * empty. Otherwise it returns HTX_BLK_UNUSED. + */ +static inline enum htx_blk_type htx_get_head_type(const struct htx *htx) +{ + struct htx_blk *blk = htx_get_head_blk(htx); + + return (blk ? htx_get_blk_type(blk) : HTX_BLK_UNUSED); +} + +/* Returns the position of the newest entry (tail). It returns a signed 32-bits + * integer, -1 means the HTX message is empty. + */ +static inline int32_t htx_get_tail(const struct htx *htx) +{ + return htx->tail; +} + +/* Returns the newest HTX block (tail) if the HTX message is not + * empty. Otherwise it returns NULL. + */ +static inline struct htx_blk *htx_get_tail_blk(const struct htx *htx) +{ + int32_t tail = htx_get_tail(htx); + + return ((tail == -1) ? NULL : htx_get_blk(htx, tail)); +} + +/* Returns the type of the newest HTX block (tail) if the HTX message is not + * empty. Otherwise it returns HTX_BLK_UNUSED. + */ +static inline enum htx_blk_type htx_get_tail_type(const struct htx *htx) +{ + struct htx_blk *blk = htx_get_tail_blk(htx); + + return (blk ? htx_get_blk_type(blk) : HTX_BLK_UNUSED); +} + +/* Returns the position of the first block in the HTX message <htx>. -1 means + * the first block is unset or the HTS is empty. + */ +static inline int32_t htx_get_first(const struct htx *htx) +{ + return htx->first; +} + +/* Returns the first HTX block in the HTX message <htx>. If unset or if <htx> is + * empty, NULL returned. + */ +static inline struct htx_blk *htx_get_first_blk(const struct htx *htx) +{ + int32_t pos; + + pos = htx_get_first(htx); + return ((pos == -1) ? NULL : htx_get_blk(htx, pos)); +} + +/* Returns the type of the first block in the HTX message <htx>. If unset or if + * <htx> is empty, HTX_BLK_UNUSED is returned. + */ +static inline enum htx_blk_type htx_get_first_type(const struct htx *htx) +{ + struct htx_blk *blk = htx_get_first_blk(htx); + + return (blk ? htx_get_blk_type(blk) : HTX_BLK_UNUSED); +} + +/* Returns the position of block immediately before the one pointed by <pos>. If + * the message is empty or if <pos> is the position of the head, -1 returned. + */ +static inline int32_t htx_get_prev(const struct htx *htx, uint32_t pos) +{ + if (htx->head == -1 || pos == htx->head) + return -1; + return (pos - 1); +} + +/* Returns the HTX block before <blk> in the HTX message <htx>. If <blk> is the + * head, NULL returned. + */ +static inline struct htx_blk *htx_get_prev_blk(const struct htx *htx, + const struct htx_blk *blk) +{ + int32_t pos; + + pos = htx_get_prev(htx, htx_get_blk_pos(htx, blk)); + return ((pos == -1) ? NULL : htx_get_blk(htx, pos)); +} + +/* Returns the position of block immediately after the one pointed by <pos>. If + * the message is empty or if <pos> is the position of the tail, -1 returned. + */ +static inline int32_t htx_get_next(const struct htx *htx, uint32_t pos) +{ + if (htx->tail == -1 || pos == htx->tail) + return -1; + return (pos + 1); + +} + +/* Returns the HTX block after <blk> in the HTX message <htx>. If <blk> is the + * tail, NULL returned. + */ +static inline struct htx_blk *htx_get_next_blk(const struct htx *htx, + const struct htx_blk *blk) +{ + int32_t pos; + + pos = htx_get_next(htx, htx_get_blk_pos(htx, blk)); + return ((pos == -1) ? NULL : htx_get_blk(htx, pos)); +} + +/* Returns 1 if <blk> is the block is the only one inside the HTX message <htx>, + * excluding all unused blocks. Otherwise, it returns 0. If 1 is returned, this + * means that there is only <blk> and eventually some unused ones in <htx>. + */ +static inline int htx_is_unique_blk(const struct htx *htx, + const struct htx_blk *blk) +{ + return (htx_get_blksz(blk) == htx->data); +} + +/* Changes the size of the value. It is the caller responsibility to change the + * value itself, make sure there is enough space and update allocated + * value. This function updates the HTX message accordingly. + */ +static inline void htx_change_blk_value_len(struct htx *htx, struct htx_blk *blk, uint32_t newlen) +{ + enum htx_blk_type type = htx_get_blk_type(blk); + uint32_t oldlen, sz; + int32_t delta; + + sz = htx_get_blksz(blk); + switch (type) { + case HTX_BLK_HDR: + case HTX_BLK_TLR: + oldlen = (blk->info >> 8) & 0xfffff; + blk->info = (type << 28) + (newlen << 8) + (blk->info & 0xff); + break; + default: + oldlen = blk->info & 0xfffffff; + blk->info = (type << 28) + newlen; + break; + } + + /* Update HTTP message */ + delta = (newlen - oldlen); + htx->data += delta; + if (blk->addr+sz == htx->tail_addr) + htx->tail_addr += delta; + else if (blk->addr+sz == htx->head_addr) + htx->head_addr += delta; +} + +/* Changes the size of the value. It is the caller responsibility to change the + * value itself, make sure there is enough space and update allocated + * value. Unlike the function htx_change_blk_value_len(), this one does not + * update the HTX message. So it should be used with caution. + */ +static inline void htx_set_blk_value_len(struct htx_blk *blk, uint32_t vlen) +{ + enum htx_blk_type type = htx_get_blk_type(blk); + + switch (type) { + case HTX_BLK_HDR: + case HTX_BLK_TLR: + blk->info = (type << 28) + (vlen << 8) + (blk->info & 0xff); + break; + case HTX_BLK_REQ_SL: + case HTX_BLK_RES_SL: + case HTX_BLK_DATA: + blk->info = (type << 28) + vlen; + break; + default: + /* Unexpected case */ + break; + } +} + +/* Returns the data pointer of the block <blk> */ +static inline void *htx_get_blk_ptr(const struct htx *htx, const struct htx_blk *blk) +{ + return ((void *)htx->blocks + blk->addr); +} + +/* Returns the name of the block <blk>, only if it is a header or a + * trailer. Otherwise it returns an empty string. + */ +static inline struct ist htx_get_blk_name(const struct htx *htx, const struct htx_blk *blk) +{ + enum htx_blk_type type = htx_get_blk_type(blk); + struct ist ret; + + switch (type) { + case HTX_BLK_HDR: + case HTX_BLK_TLR: + ret = ist2(htx_get_blk_ptr(htx, blk), + blk->info & 0xff); + break; + + default: + return ist(""); + } + return ret; +} + + +/* Returns the value of the block <blk>, depending on its type. If there is no + * value (for end-of blocks), an empty one is returned. + */ +static inline struct ist htx_get_blk_value(const struct htx *htx, const struct htx_blk *blk) +{ + enum htx_blk_type type = htx_get_blk_type(blk); + struct ist ret; + + switch (type) { + case HTX_BLK_HDR: + case HTX_BLK_TLR: + ret = ist2(htx_get_blk_ptr(htx, blk) + (blk->info & 0xff), + (blk->info >> 8) & 0xfffff); + break; + + case HTX_BLK_REQ_SL: + case HTX_BLK_RES_SL: + case HTX_BLK_DATA: + ret = ist2(htx_get_blk_ptr(htx, blk), + blk->info & 0xfffffff); + break; + + default: + return ist(""); + } + return ret; +} + +/* Add a new start-line. It returns it on success, otherwise it returns NULL. It + * is the caller responsibility to set sl->info, if necessary. + */ +static inline struct htx_sl *htx_add_stline(struct htx *htx, enum htx_blk_type type, unsigned int flags, + const struct ist p1, const struct ist p2, const struct ist p3) +{ + struct htx_blk *blk; + struct htx_sl *sl; + uint32_t size; + + if (type != HTX_BLK_REQ_SL && type != HTX_BLK_RES_SL) + return NULL; + + size = sizeof(*sl) + p1.len + p2.len + p3.len; + + blk = htx_add_blk(htx, type, size); + if (!blk) + return NULL; + blk->info += size; + + sl = htx_get_blk_ptr(htx, blk); + sl->flags = flags; + + HTX_SL_P1_LEN(sl) = p1.len; + HTX_SL_P2_LEN(sl) = p2.len; + HTX_SL_P3_LEN(sl) = p3.len; + + memcpy(HTX_SL_P1_PTR(sl), p1.ptr, p1.len); + memcpy(HTX_SL_P2_PTR(sl), p2.ptr, p2.len); + memcpy(HTX_SL_P3_PTR(sl), p3.ptr, p3.len); + + return sl; +} + +/* Adds an HTX block of type HDR in <htx>. It returns the new block on + * success. Otherwise, it returns NULL. The header name is always lower cased. + */ +static inline struct htx_blk *htx_add_header(struct htx *htx, const struct ist name, + const struct ist value) +{ + struct htx_blk *blk; + + if (name.len > 255 || value.len > 1048575) + return NULL; + + blk = htx_add_blk(htx, HTX_BLK_HDR, name.len + value.len); + if (!blk) + return NULL; + + blk->info += (value.len << 8) + name.len; + ist2bin_lc(htx_get_blk_ptr(htx, blk), name); + memcpy(htx_get_blk_ptr(htx, blk) + name.len, value.ptr, value.len); + return blk; +} + +/* Adds an HTX block of type TLR in <htx>. It returns the new block on + * success. Otherwise, it returns NULL. The trailer name is always lower cased. + */ +static inline struct htx_blk *htx_add_trailer(struct htx *htx, const struct ist name, + const struct ist value) +{ + struct htx_blk *blk; + + if (name.len > 255 || value.len > 1048575) + return NULL; + + blk = htx_add_blk(htx, HTX_BLK_TLR, name.len + value.len); + if (!blk) + return NULL; + + blk->info += (value.len << 8) + name.len; + ist2bin_lc(htx_get_blk_ptr(htx, blk), name); + memcpy(htx_get_blk_ptr(htx, blk) + name.len, value.ptr, value.len); + return blk; +} + +/* Adds an HTX block of type EOH or EOT in <htx>. It returns the new block on + * success. Otherwise, it returns NULL. + */ +static inline struct htx_blk *htx_add_endof(struct htx *htx, enum htx_blk_type type) +{ + struct htx_blk *blk; + + blk = htx_add_blk(htx, type, 1); + if (!blk) + return NULL; + + blk->info += 1; + return blk; +} + +/* Add all headers from the list <hdrs> into the HTX message <htx>, followed by + * the EOH. On success, it returns the last block inserted (the EOH), otherwise + * NULL is returned. + * + * Headers with a NULL value (.ptr == NULL) are ignored but not those with empty + * value (.len == 0 but .ptr != NULL) + */ +static inline struct htx_blk *htx_add_all_headers(struct htx *htx, const struct http_hdr *hdrs) +{ + int i; + + for (i = 0; hdrs[i].n.len; i++) { + /* Don't check the value length because a header value may be empty */ + if (isttest(hdrs[i].v) == 0) + continue; + if (!htx_add_header(htx, hdrs[i].n, hdrs[i].v)) + return NULL; + } + return htx_add_endof(htx, HTX_BLK_EOH); +} + +/* Add all trailers from the list <hdrs> into the HTX message <htx>, followed by + * the EOT. On success, it returns the last block inserted (the EOT), otherwise + * NULL is returned. + * + * Trailers with a NULL value (.ptr == NULL) are ignored but not those with + * empty value (.len == 0 but .ptr != NULL) + */ +static inline struct htx_blk *htx_add_all_trailers(struct htx *htx, const struct http_hdr *hdrs) +{ + int i; + + for (i = 0; hdrs[i].n.len; i++) { + /* Don't check the value length because a header value may be empty */ + if (isttest(hdrs[i].v) == 0) + continue; + if (!htx_add_trailer(htx, hdrs[i].n, hdrs[i].v)) + return NULL; + } + return htx_add_endof(htx, HTX_BLK_EOT); +} + +/* Removes <n> bytes from the beginning of DATA block <blk>. The block's start + * address and its length are adjusted, and the htx's total data count is + * updated. This is used to mark that part of some data were transferred + * from a DATA block without removing this DATA block. No sanity check is + * performed, the caller is responsible for doing this exclusively on DATA + * blocks, and never removing more than the block's size. + */ +static inline void htx_cut_data_blk(struct htx *htx, struct htx_blk *blk, uint32_t n) +{ + if (blk->addr == htx->end_addr) + htx->end_addr += n; + blk->addr += n; + blk->info -= n; + htx->data -= n; +} + +/* Returns the space used by metadata in <htx>. */ +static inline uint32_t htx_meta_space(const struct htx *htx) +{ + if (htx->tail == -1) + return 0; + + return ((htx->tail + 1 - htx->head) * sizeof(struct htx_blk)); +} + +/* Returns the space used (payload + metadata) in <htx> */ +static inline uint32_t htx_used_space(const struct htx *htx) +{ + return (htx->data + htx_meta_space(htx)); +} + +/* Returns the free space in <htx> */ +static inline uint32_t htx_free_space(const struct htx *htx) +{ + return (htx->size - htx_used_space(htx)); +} + +/* Returns the maximum size available to store some data in <htx> if a new block + * is reserved. + */ +static inline uint32_t htx_free_data_space(const struct htx *htx) +{ + uint32_t free = htx_free_space(htx); + + if (free < sizeof(struct htx_blk)) + return 0; + return (free - sizeof(struct htx_blk)); +} + +/* Returns non-zero only if the HTX message free space wraps */ +static inline int htx_space_wraps(const struct htx *htx) +{ + uint32_t headroom, tailroom; + + headroom = (htx->end_addr - htx->head_addr); + tailroom = (htx_pos_to_addr(htx, htx->tail) - htx->tail_addr); + + return (headroom && tailroom); +} + +/* Returns the maximum size for a block, not exceeding <max> bytes. <max> may be + * set to -1 to have no limit. + */ +static inline uint32_t htx_get_max_blksz(const struct htx *htx, int32_t max) +{ + uint32_t free = htx_free_space(htx); + + if (max != -1 && free > max) + free = max; + if (free < sizeof(struct htx_blk)) + return 0; + return (free - sizeof(struct htx_blk)); +} + +/* Returns 1 if the message has less than 1/4 of its capacity free, otherwise 0 */ +static inline int htx_almost_full(const struct htx *htx) +{ + if (!htx->size || htx_free_space(htx) < htx->size / 4) + return 1; + return 0; +} + +/* Resets an HTX message */ +static inline void htx_reset(struct htx *htx) +{ + htx->tail = htx->head = htx->first = -1; + htx->data = 0; + htx->tail_addr = htx->head_addr = htx->end_addr = 0; + htx->extra = 0; + htx->flags = HTX_FL_NONE; +} + +/* Returns the available room for raw data in buffer <buf> once HTX overhead is + * taken into account (one HTX header and two blocks). The purpose is to figure + * the optimal fill length to avoid copies. + */ +static inline size_t buf_room_for_htx_data(const struct buffer *buf) +{ + size_t room; + + room = b_room(buf); + if (room <= HTX_BUF_OVERHEAD) + room = 0; + else + room -= HTX_BUF_OVERHEAD; + + return room; +} + + +/* Returns an HTX message using the buffer <buf>. Unlike htx_from_buf(), this + * function does not update the buffer. So if the HTX message is updated, the + * caller must call htx_to_buf() to be sure to also update the underlying buffer + * accordingly. Note that it always returns a valid pointer, either to an + * initialized buffer or to the empty buffer. This function must always be + * called with a buffer containing an HTX message (or an empty buffer). + */ +static inline struct htx *htxbuf(const struct buffer *buf) +{ + struct htx *htx; + + if (b_is_null(buf)) + return &htx_empty; + htx = ((struct htx *)(buf->area)); + if (!b_data(buf)) { + htx->size = buf->size - sizeof(*htx); + htx_reset(htx); + } + return htx; +} + +/* Returns an HTX message using the buffer <buf>. <buf> is updated to appear as + * full. It should be used when you want to add something into the HTX message, + * so the call to htx_to_buf() may be skipped. But, it is the caller + * responsibility to call htx_to_buf() to reset <buf> if it is relevant. The + * returned pointer is always valid. This function must always be called with a + * buffer containing an HTX message (or an empty buffer). + * + * The caller can call htxbuf() function to avoid any update of the buffer. + */ +static inline struct htx *htx_from_buf(struct buffer *buf) +{ + struct htx *htx = htxbuf(buf); + + b_set_data(buf, b_size(buf)); + return htx; +} + +/* Update <buf> accordingly to the HTX message <htx> */ +static inline void htx_to_buf(struct htx *htx, struct buffer *buf) +{ + if ((htx->head == -1) && + !(htx->flags & (HTX_FL_PARSING_ERROR|HTX_FL_PROCESSING_ERROR))) { + htx_reset(htx); + b_set_data(buf, 0); + } + else + b_set_data(buf, b_size(buf)); +} + +/* Returns 1 if the message is empty, otherwise it returns 0. Note that it is + * illegal to call this with htx == NULL. + */ +static inline int htx_is_empty(const struct htx *htx) +{ + return (htx->head == -1); +} + +/* Returns 1 if the message is not empty, otherwise it returns 0. Note that it + * is illegal to call this with htx == NULL. + */ +static inline int htx_is_not_empty(const struct htx *htx) +{ + return (htx->head != -1); +} + +/* Returns 1 if no more data are expected for the message <htx>. Otherwise it + * returns 0. Note that it is illegal to call this with htx == NULL. This + * function relies on the HTX_FL_EOM flags. It means tunneled data are not + * considered here. + */ +static inline int htx_expect_more(const struct htx *htx) +{ + return !(htx->flags & HTX_FL_EOM); +} + +/* Set EOM flag in <htx>. This function is useful if the HTX message is empty. + * In this case, an EOT block is appended first to ensure the EOM will be + * forwarded as expected. This is a workaround as it is not possibly currently + * to push an empty HTX DATA block. + * + * Returns 1 on success else 0. + */ +static inline int htx_set_eom(struct htx *htx) +{ + if (htx_is_empty(htx)) { + if (!htx_add_endof(htx, HTX_BLK_EOT)) + return 0; + } + + htx->flags |= HTX_FL_EOM; + return 1; +} + +/* Copy an HTX message stored in the buffer <msg> to <htx>. We take care to + * not overwrite existing data. All the message is copied or nothing. It returns + * 1 on success and 0 on error. + */ +static inline int htx_copy_msg(struct htx *htx, const struct buffer *msg) +{ + /* The destination HTX message is allocated and empty, we can do a raw copy */ + if (htx_is_empty(htx) && htx_free_space(htx)) { + memcpy(htx, msg->area, msg->size); + return 1; + } + + /* Otherwise, we need to append the HTX message */ + return htx_append_msg(htx, htxbuf(msg)); +} + +/* Remove all blocks except headers. Trailers will also be removed too. */ +static inline void htx_skip_msg_payload(struct htx *htx) +{ + struct htx_blk *blk = htx_get_first_blk(htx); + + while (blk) { + enum htx_blk_type type = htx_get_blk_type(blk); + + blk = ((type > HTX_BLK_EOH) + ? htx_remove_blk(htx, blk) + : htx_get_next_blk(htx, blk)); + } +} + +/* Returns the number of used blocks in the HTX message <htx>. Note that it is + * illegal to call this function with htx == NULL. Note also blocks of type + * HTX_BLK_UNUSED are part of used blocks. + */ +static inline int htx_nbblks(const struct htx *htx) +{ + return ((htx->head != -1) ? (htx->tail + 1 - htx->head) : 0); +} +/* For debugging purpose */ +static inline const char *htx_blk_type_str(enum htx_blk_type type) +{ + switch (type) { + case HTX_BLK_REQ_SL: return "HTX_BLK_REQ_SL"; + case HTX_BLK_RES_SL: return "HTX_BLK_RES_SL"; + case HTX_BLK_HDR: return "HTX_BLK_HDR"; + case HTX_BLK_EOH: return "HTX_BLK_EOH"; + case HTX_BLK_DATA: return "HTX_BLK_DATA"; + case HTX_BLK_TLR: return "HTX_BLK_TLR"; + case HTX_BLK_EOT: return "HTX_BLK_EOT"; + case HTX_BLK_UNUSED: return "HTX_BLK_UNUSED"; + default: return "HTX_BLK_???"; + }; +} + +/* For debugging purpose */ +static inline void htx_dump(struct buffer *chunk, const struct htx *htx, int full) +{ + int32_t pos; + + chunk_appendf(chunk, " htx=%p(size=%u,data=%u,used=%u,wrap=%s,flags=0x%08x,extra=%llu," + "first=%d,head=%d,tail=%d,tail_addr=%d,head_addr=%d,end_addr=%d)", + htx, htx->size, htx->data, htx_nbblks(htx), (!htx->head_addr) ? "NO" : "YES", + htx->flags, (unsigned long long)htx->extra, htx->first, htx->head, htx->tail, + htx->tail_addr, htx->head_addr, htx->end_addr); + + if (!full || !htx_nbblks(htx)) + return; + chunk_memcat(chunk, "\n", 1); + + for (pos = htx_get_head(htx); pos != -1; pos = htx_get_next(htx, pos)) { + struct htx_sl *sl; + struct htx_blk *blk = htx_get_blk(htx, pos); + enum htx_blk_type type = htx_get_blk_type(blk); + uint32_t sz = htx_get_blksz(blk); + struct ist n, v; + + n = htx_get_blk_name(htx, blk); + v = htx_get_blk_value(htx, blk); + + if (type == HTX_BLK_REQ_SL || type == HTX_BLK_RES_SL) { + sl = htx_get_blk_ptr(htx, blk); + chunk_appendf(chunk, "\t\t[%u] type=%-17s - size=%-6u - addr=%-6u\t%.*s %.*s %.*s\n", + pos, htx_blk_type_str(type), sz, blk->addr, + HTX_SL_P1_LEN(sl), HTX_SL_P1_PTR(sl), + HTX_SL_P2_LEN(sl), HTX_SL_P2_PTR(sl), + HTX_SL_P3_LEN(sl), HTX_SL_P3_PTR(sl)); + } + else if (type == HTX_BLK_HDR || type == HTX_BLK_TLR) + chunk_appendf(chunk, "\t\t[%u] type=%-17s - size=%-6u - addr=%-6u\t%.*s: %.*s\n", + pos, htx_blk_type_str(type), sz, blk->addr, + (int)MIN(n.len, 32), n.ptr, + (int)MIN(v.len, 64), v.ptr); + else + chunk_appendf(chunk, "\t\t[%u] type=%-17s - size=%-6u - addr=%-6u%s\n", + pos, htx_blk_type_str(type), sz, blk->addr, + (!v.len ? "\t<empty>" : "")); + } +} + +#endif /* _HAPROXY_HTX_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/init-t.h b/include/haproxy/init-t.h new file mode 100644 index 0000000..110171b --- /dev/null +++ b/include/haproxy/init-t.h @@ -0,0 +1,64 @@ +#ifndef _HAPROXY_INIT_T_H +#define _HAPROXY_INIT_T_H + +#include <haproxy/list-t.h> + +struct proxy; +struct server; + +struct pre_check_fct { + struct list list; + int (*fct)(); +}; + +struct post_check_fct { + struct list list; + int (*fct)(); +}; + +struct post_proxy_check_fct { + struct list list; + int (*fct)(struct proxy *); +}; + +struct post_server_check_fct { + struct list list; + int (*fct)(struct server *); +}; + +struct per_thread_alloc_fct { + struct list list; + int (*fct)(); +}; + +struct per_thread_init_fct { + struct list list; + int (*fct)(); +}; + +struct post_deinit_fct { + struct list list; + void (*fct)(); +}; + +struct proxy_deinit_fct { + struct list list; + void (*fct)(struct proxy *); +}; + +struct server_deinit_fct { + struct list list; + void (*fct)(struct server *); +}; + +struct per_thread_free_fct { + struct list list; + void (*fct)(); +}; + +struct per_thread_deinit_fct { + struct list list; + void (*fct)(); +}; + +#endif /* _HAPROXY_INIT_T_H */ diff --git a/include/haproxy/init.h b/include/haproxy/init.h new file mode 100644 index 0000000..6e30475 --- /dev/null +++ b/include/haproxy/init.h @@ -0,0 +1,79 @@ +#ifndef _HAPROXY_INIT_H +#define _HAPROXY_INIT_H + +#include <haproxy/init-t.h> +#include <haproxy/initcall.h> + +struct proxy; +struct server; + +extern struct list pre_check_list; +extern struct list post_check_list; +extern struct list post_proxy_check_list; +extern struct list post_server_check_list; +extern struct list per_thread_alloc_list; +extern struct list per_thread_init_list; +extern struct list post_deinit_list; +extern struct list proxy_deinit_list; +extern struct list server_deinit_list; +extern struct list per_thread_free_list; +extern struct list per_thread_deinit_list; + +void hap_register_pre_check(int (*fct)()); +void hap_register_post_check(int (*fct)()); +void hap_register_post_proxy_check(int (*fct)(struct proxy *)); +void hap_register_post_server_check(int (*fct)(struct server *)); +void hap_register_post_deinit(void (*fct)()); +void hap_register_proxy_deinit(void (*fct)(struct proxy *)); +void hap_register_server_deinit(void (*fct)(struct server *)); + +void hap_register_per_thread_alloc(int (*fct)()); +void hap_register_per_thread_init(int (*fct)()); +void hap_register_per_thread_deinit(void (*fct)()); +void hap_register_per_thread_free(void (*fct)()); + +/* simplified way to declare a pre-check callback in a file */ +#define REGISTER_PRE_CHECK(fct) \ + INITCALL1(STG_REGISTER, hap_register_pre_check, (fct)) + +/* simplified way to declare a post-check callback in a file */ +#define REGISTER_POST_CHECK(fct) \ + INITCALL1(STG_REGISTER, hap_register_post_check, (fct)) + +/* simplified way to declare a post-proxy-check callback in a file */ +#define REGISTER_POST_PROXY_CHECK(fct) \ + INITCALL1(STG_REGISTER, hap_register_post_proxy_check, (fct)) + +/* simplified way to declare a post-server-check callback in a file */ +#define REGISTER_POST_SERVER_CHECK(fct) \ + INITCALL1(STG_REGISTER, hap_register_post_server_check, (fct)) + +/* simplified way to declare a post-deinit callback in a file */ +#define REGISTER_POST_DEINIT(fct) \ + INITCALL1(STG_REGISTER, hap_register_post_deinit, (fct)) + +/* simplified way to declare a proxy-deinit callback in a file */ +#define REGISTER_PROXY_DEINIT(fct) \ + INITCALL1(STG_REGISTER, hap_register_proxy_deinit, (fct)) + +/* simplified way to declare a proxy-deinit callback in a file */ +#define REGISTER_SERVER_DEINIT(fct) \ + INITCALL1(STG_REGISTER, hap_register_server_deinit, (fct)) + +/* simplified way to declare a per-thread allocation callback in a file */ +#define REGISTER_PER_THREAD_ALLOC(fct) \ + INITCALL1(STG_REGISTER, hap_register_per_thread_alloc, (fct)) + +/* simplified way to declare a per-thread init callback in a file */ +#define REGISTER_PER_THREAD_INIT(fct) \ + INITCALL1(STG_REGISTER, hap_register_per_thread_init, (fct)) + +/* simplified way to declare a per-thread deinit callback in a file */ +#define REGISTER_PER_THREAD_DEINIT(fct) \ + INITCALL1(STG_REGISTER, hap_register_per_thread_deinit, (fct)) + +/* simplified way to declare a per-thread free callback in a file */ +#define REGISTER_PER_THREAD_FREE(fct) \ + INITCALL1(STG_REGISTER, hap_register_per_thread_free, (fct)) + +#endif /* _HAPROXY_INIT_H */ diff --git a/include/haproxy/initcall.h b/include/haproxy/initcall.h new file mode 100644 index 0000000..dffec04 --- /dev/null +++ b/include/haproxy/initcall.h @@ -0,0 +1,257 @@ +/* + * include/haproxy/initcall.h + * + * Initcall management. + * + * Copyright (C) 2018-2020 Willy Tarreau - w@1wt.eu + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _HAPROXY_INITCALL_H +#define _HAPROXY_INITCALL_H + +#include <haproxy/compiler.h> + +/* List of known init stages. If others are added, please declare their + * section at the end of the file below. + */ + +/* The principle of the initcalls is to create optional sections in the target + * program which are made of arrays of structures containing a function pointer + * and 3 argument pointers. Then at boot time, these sections are scanned in a + * well defined order to call in turn each of these functions with their + * arguments. This allows to declare register callbacks in C files without + * having to export lots of things nor to cross-reference functions. There are + * several initialization stages defined so that certain guarantees are offered + * (for example list heads might or might not be initialized, pools might or + * might not have been created yet). + * + * On some very old platforms there is no convenient way to retrieve the start + * or stop pointer for these sections so there is no reliable way to enumerate + * the callbacks. When this is the case, as detected when USE_OBSOLETE_LINKER + * is set, instead of using sections we exclusively use constructors whose name + * is based on the current line number in the file to guarantee uniqueness. + * When called, these constructors then add their callback to their respective + * list. It works as well but slightly inflates the executable's size since + * code has to be emitted just to register each of these callbacks. + */ + +/* + * Please keep those names short enough, they are used to generate section + * names, Mac OS X accepts section names up to 16 characters, and we prefix + * them with i_, so stage name can't be more than 14 characters. + */ +enum init_stage { + STG_PREPARE = 0, // preset variables, tables, list heads + STG_LOCK, // pre-initialize locks + STG_REGISTER, // register static lists (keywords etc) + STG_ALLOC, // allocate required structures + STG_POOL, // create pools + STG_INIT, // subsystems normal initialization + STG_SIZE // size of the stages array, must be last +}; + +/* This is the descriptor for an initcall */ +struct initcall { + void (*const fct)(void *arg1, void *arg2, void *arg3); + void *arg1; + void *arg2; + void *arg3; +#if defined(USE_OBSOLETE_LINKER) + void *next; +#endif +}; + + +#if !defined(USE_OBSOLETE_LINKER) + +#define HA_INIT_SECTION(s) HA_SECTION("i_" # s) + +/* Declare a static variable in the init section dedicated to stage <stg>, + * with an element referencing function <function> and arguments <a1..a3>. + * <linenum> is needed to deduplicate entries created from a same file. The + * trick with (stg<STG_SIZE) consists in verifying that stg if a valid enum + * value from the initcall set, and to emit a warning or error if it is not. + * The function's type is cast so that it is technically possible to call a + * function taking other argument types, provided they are all the same size + * as a pointer (args are cast to (void*)). Do not use this macro directly, + * use INITCALL{0..3}() instead. + */ +#define __DECLARE_INITCALL(stg, linenum, function, a1, a2, a3) \ + HA_GLOBL(__start_i_##stg ); \ + HA_GLOBL(__stop_i_##stg ); \ + static const struct initcall *__initcb_##linenum \ + __attribute__((__used__)) HA_INIT_SECTION(stg) = \ + (stg < STG_SIZE) ? &(const struct initcall) { \ + .fct = (void (*)(void *,void *,void *))function, \ + .arg1 = (void *)(a1), \ + .arg2 = (void *)(a2), \ + .arg3 = (void *)(a3), \ + } : NULL + + +#else // USE_OBSOLETE_LINKER + +/* Declare a static constructor function to register a static descriptor for + * stage <stg>, with an element referencing function <function> and arguments + * <a1..a3>. <linenum> is needed to deduplicate entries created from a same + * file. The trick with (stg<STG_SIZE) consists in verifying that stg if a + * valid enum value from the initcall set, and to emit a warning or error if + * it is not. + * The function's type is cast so that it is technically possible to call a + * function taking other argument types, provided they are all the same size + * as a pointer (args are cast to (void*)). Do not use this macro directly, + * use INITCALL{0..3}() instead. + */ +#define __DECLARE_INITCALL(stg, linenum, function, a1, a2, a3) \ +__attribute__((constructor)) static void __initcb_##linenum() \ +{ \ + static struct initcall entry = { \ + .fct = (void (*)(void *,void *,void *))function, \ + .arg1 = (void *)(a1), \ + .arg2 = (void *)(a2), \ + .arg3 = (void *)(a3), \ + }; \ + if (stg < STG_SIZE) { \ + entry.next = __initstg[stg]; \ + __initstg[stg] = &entry; \ + }; \ +} + +#endif // USE_OBSOLETE_LINKER + +/* This is used to resolve <linenum> to an integer before calling + * __DECLARE_INITCALL(). Do not use this macro directly, use INITCALL{0..3}() + * instead. + */ +#define _DECLARE_INITCALL(...) \ + __DECLARE_INITCALL(__VA_ARGS__) + +/* This requires that function <function> is called with pointer argument + * <argument> during init stage <stage> which must be one of init_stage. + */ +#define INITCALL0(stage, function) \ + _DECLARE_INITCALL(stage, __LINE__, function, 0, 0, 0) + +/* This requires that function <function> is called with pointer argument + * <argument> during init stage <stage> which must be one of init_stage. + */ +#define INITCALL1(stage, function, arg1) \ + _DECLARE_INITCALL(stage, __LINE__, function, arg1, 0, 0) + +/* This requires that function <function> is called with pointer arguments + * <arg1..2> during init stage <stage> which must be one of init_stage. + */ +#define INITCALL2(stage, function, arg1, arg2) \ + _DECLARE_INITCALL(stage, __LINE__, function, arg1, arg2, 0) + +/* This requires that function <function> is called with pointer arguments + * <arg1..3> during init stage <stage> which must be one of init_stage. + */ +#define INITCALL3(stage, function, arg1, arg2, arg3) \ + _DECLARE_INITCALL(stage, __LINE__, function, arg1, arg2, arg3) + +#if !defined(USE_OBSOLETE_LINKER) +/* Iterate pointer p (of type initcall**) over all registered calls at + * stage <stg>. + */ +#define FOREACH_INITCALL(p,stg) \ + for ((p) = &(__start_i_##stg); (p) < &(__stop_i_##stg); (p)++) + +#else // USE_OBSOLETE_LINKER + +#define FOREACH_INITCALL(p,stg) \ + for ((p) = __initstg[stg]; (p); (p) = (p)->next) +#endif // USE_OBSOLETE_LINKER + + +#if !defined(USE_OBSOLETE_LINKER) +/* Declare a section for stage <stg>. The start and stop pointers are set by + * the linker itself, which is why they're declared extern here. The weak + * attribute is used so that we declare them ourselves if the section is + * empty. The corresponding sections must contain exclusively pointers to + * make sure each location may safely be visited by incrementing a pointer. + */ +#define DECLARE_INIT_SECTION(stg) \ + extern __attribute__((__weak__)) const struct initcall *__start_i_##stg HA_SECTION_START("i_" # stg); \ + extern __attribute__((__weak__)) const struct initcall *__stop_i_##stg HA_SECTION_STOP("i_" # stg) + +/* Declare all initcall sections here */ +DECLARE_INIT_SECTION(STG_PREPARE); +DECLARE_INIT_SECTION(STG_LOCK); +DECLARE_INIT_SECTION(STG_REGISTER); +DECLARE_INIT_SECTION(STG_ALLOC); +DECLARE_INIT_SECTION(STG_POOL); +DECLARE_INIT_SECTION(STG_INIT); + +// for use in the main haproxy.c file +#define DECLARE_INIT_STAGES asm("") + +/* not needed anymore */ +#undef DECLARE_INIT_SECTION + +#else // USE_OBSOLETE_LINKER + +extern struct initcall *__initstg[STG_SIZE]; + +// for use in the main haproxy.c file +#define DECLARE_INIT_STAGES struct initcall *__initstg[STG_SIZE] + +#endif // USE_OBSOLETE_LINKER + +#if !defined(USE_OBSOLETE_LINKER) +/* Run the initcalls for stage <stg>. The test on <stg> is only there to + * ensure it is a valid initcall stage. + */ +#define RUN_INITCALLS(stg) \ + do { \ + const struct initcall **ptr; \ + if (stg >= STG_SIZE) \ + break; \ + FOREACH_INITCALL(ptr, stg) \ + (*ptr)->fct((*ptr)->arg1, (*ptr)->arg2, (*ptr)->arg3); \ + } while (0) + +#else // USE_OBSOLETE_LINKER + +/* Run the initcalls for stage <stg>. The test on <stg> is only there to + * ensure it is a valid initcall stage. + */ +#define RUN_INITCALLS(stg) \ + do { \ + const struct initcall *ptr; \ + if (stg >= STG_SIZE) \ + break; \ + FOREACH_INITCALL(ptr, stg) \ + (ptr)->fct((ptr)->arg1, (ptr)->arg2, (ptr)->arg3); \ + } while (0) + +#endif // USE_OBSOLETE_LINKER + +#endif /* _HAPROXY_INITCALL_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/intops.h b/include/haproxy/intops.h new file mode 100644 index 0000000..34010cc --- /dev/null +++ b/include/haproxy/intops.h @@ -0,0 +1,495 @@ +/* + * include/haproxy/intops.h + * Functions for integer operations. + * + * Copyright (C) 2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifndef _HAPROXY_INTOPS_H +#define _HAPROXY_INTOPS_H + +#include <haproxy/api.h> + +/* exported functions, mostly integer parsing */ +/* rounds <i> down to the closest value having max 2 digits */ +unsigned int round_2dig(unsigned int i); +unsigned int full_hash(unsigned int a); +int varint_bytes(uint64_t v); +unsigned int read_uint(const char **s, const char *end); +long long read_int64(const char **s, const char *end); +unsigned long long read_uint64(const char **s, const char *end); +unsigned int str2ui(const char *s); +unsigned int str2uic(const char *s); +unsigned int strl2ui(const char *s, int len); +unsigned int strl2uic(const char *s, int len); +int strl2ic(const char *s, int len); +int strl2irc(const char *s, int len, int *ret); +int strl2llrc(const char *s, int len, long long *ret); +int strl2llrc_dotted(const char *text, int len, long long *ret); +unsigned int mask_find_rank_bit(unsigned int r, unsigned long m); +unsigned int mask_find_rank_bit_fast(unsigned int r, unsigned long m, + unsigned long a, unsigned long b, + unsigned long c, unsigned long d); +void mask_prep_rank_map(unsigned long m, + unsigned long *a, unsigned long *b, + unsigned long *c, unsigned long *d); +int one_among_mask(unsigned long v, int bit); + + +/* Multiply the two 32-bit operands and shift the 64-bit result right 32 bits. + * This is used to compute fixed ratios by setting one of the operands to + * (2^32*ratio). + */ +static inline unsigned int mul32hi(unsigned int a, unsigned int b) +{ + return ((unsigned long long)a * b + a) >> 32; +} + +/* gcc does not know when it can safely divide 64 bits by 32 bits. Use this + * function when you know for sure that the result fits in 32 bits, because + * it is optimal on x86 and on 64bit processors. + */ +static inline unsigned int div64_32(unsigned long long o1, unsigned int o2) +{ + unsigned long long result; +#ifdef __i386__ + asm("divl %2" + : "=A" (result) + : "A"(o1), "rm"(o2)); +#else + result = o1 / o2; +#endif + return result; +} + +/* rotate left a 64-bit integer by <bits:[0-5]> bits */ +static inline uint64_t rotl64(uint64_t v, uint8_t bits) +{ +#if !defined(__ARM_ARCH_8A) && !defined(__x86_64__) + bits &= 63; +#endif + v = (v << bits) | (v >> (-bits & 63)); + return v; +} + +/* rotate right a 64-bit integer by <bits:[0-5]> bits */ +static inline uint64_t rotr64(uint64_t v, uint8_t bits) +{ +#if !defined(__ARM_ARCH_8A) && !defined(__x86_64__) + bits &= 63; +#endif + v = (v >> bits) | (v << (-bits & 63)); + return v; +} + +/* Simple popcountl implementation. It returns the number of ones in a word. + * Described here : https://graphics.stanford.edu/~seander/bithacks.html + */ +static inline unsigned int my_popcountl(unsigned long a) +{ + a = a - ((a >> 1) & ~0UL/3); + a = (a & ~0UL/15*3) + ((a >> 2) & ~0UL/15*3); + a = (a + (a >> 4)) & ~0UL/255*15; + return (unsigned long)(a * (~0UL/255)) >> (sizeof(unsigned long) - 1) * 8; +} + +/* returns non-zero if <a> has at least 2 bits set */ +static inline unsigned long atleast2(unsigned long a) +{ + return a & (a - 1); +} + +/* Simple ffs implementation. It returns the position of the lowest bit set to + * one, starting at 1. It is illegal to call it with a==0 (undefined result). + */ +static inline unsigned int my_ffsl(unsigned long a) +{ + unsigned long cnt; + +#if defined(__x86_64__) + __asm__("bsf %1,%0\n" : "=r" (cnt) : "rm" (a)); + cnt++; +#else + + cnt = 1; +#if LONG_MAX > 0x7FFFFFFFL /* 64bits */ + if (!(a & 0xFFFFFFFFUL)) { + a >>= 32; + cnt += 32; + } +#endif + if (!(a & 0XFFFFU)) { + a >>= 16; + cnt += 16; + } + if (!(a & 0XFF)) { + a >>= 8; + cnt += 8; + } + if (!(a & 0xf)) { + a >>= 4; + cnt += 4; + } + if (!(a & 0x3)) { + a >>= 2; + cnt += 2; + } + if (!(a & 0x1)) { + cnt += 1; + } +#endif /* x86_64 */ + + return cnt; +} + +/* Simple fls implementation. It returns the position of the highest bit set to + * one, starting at 1. It is illegal to call it with a==0 (undefined result). + */ +static inline unsigned int my_flsl(unsigned long a) +{ + unsigned long cnt; + +#if defined(__x86_64__) + __asm__("bsr %1,%0\n" : "=r" (cnt) : "rm" (a)); + cnt++; +#else + + cnt = 1; +#if LONG_MAX > 0x7FFFFFFFUL /* 64bits */ + if (a & 0xFFFFFFFF00000000UL) { + a >>= 32; + cnt += 32; + } +#endif + if (a & 0XFFFF0000U) { + a >>= 16; + cnt += 16; + } + if (a & 0XFF00) { + a >>= 8; + cnt += 8; + } + if (a & 0xf0) { + a >>= 4; + cnt += 4; + } + if (a & 0xc) { + a >>= 2; + cnt += 2; + } + if (a & 0x2) { + cnt += 1; + } +#endif /* x86_64 */ + + return cnt; +} + +/* Build a word with the <bits> lower bits set (reverse of my_popcountl) */ +static inline unsigned long nbits(int bits) +{ + if (--bits < 0) + return 0; + else + return (2UL << bits) - 1; +} + +/* Turns 64-bit value <a> from host byte order to network byte order. + * The principle consists in letting the compiler detect we're playing + * with a union and simplify most or all operations. The asm-optimized + * htonl() version involving bswap (x86) / rev (arm) / other is a single + * operation on little endian, or a NOP on big-endian. In both cases, + * this lets the compiler "see" that we're rebuilding a 64-bit word from + * two 32-bit quantities that fit into a 32-bit register. In big endian, + * the whole code is optimized out. In little endian, with a decent compiler, + * a few bswap and 2 shifts are left, which is the minimum acceptable. + */ +static inline unsigned long long my_htonll(unsigned long long a) +{ +#if defined(__x86_64__) + __asm__ volatile("bswapq %0" : "=r"(a) : "0"(a)); + return a; +#else + union { + struct { + unsigned int w1; + unsigned int w2; + } by32; + unsigned long long by64; + } w = { .by64 = a }; + return ((unsigned long long)htonl(w.by32.w1) << 32) | htonl(w.by32.w2); +#endif +} + +/* Turns 64-bit value <a> from network byte order to host byte order. */ +static inline unsigned long long my_ntohll(unsigned long long a) +{ + return my_htonll(a); +} + +/* sets bit <bit> into map <map>, which must be long-aligned */ +static inline void ha_bit_set(unsigned long bit, long *map) +{ + map[bit / (8 * sizeof(*map))] |= 1UL << (bit & (8 * sizeof(*map) - 1)); +} + +/* clears bit <bit> from map <map>, which must be long-aligned */ +static inline void ha_bit_clr(unsigned long bit, long *map) +{ + map[bit / (8 * sizeof(*map))] &= ~(1UL << (bit & (8 * sizeof(*map) - 1))); +} + +/* flips bit <bit> from map <map>, which must be long-aligned */ +static inline void ha_bit_flip(unsigned long bit, long *map) +{ + map[bit / (8 * sizeof(*map))] ^= 1UL << (bit & (8 * sizeof(*map) - 1)); +} + +/* returns non-zero if bit <bit> from map <map> is set, otherwise 0 */ +static inline int ha_bit_test(unsigned long bit, const long *map) +{ + return !!(map[bit / (8 * sizeof(*map))] & 1UL << (bit & (8 * sizeof(*map) - 1))); +} + +/* hash a 32-bit integer to another 32-bit integer. This code may be large when + * inlined, use full_hash() instead. + */ +static inline unsigned int __full_hash(unsigned int a) +{ + /* This function is one of Bob Jenkins' full avalanche hashing + * functions, which when provides quite a good distribution for little + * input variations. The result is quite suited to fit over a 32-bit + * space with enough variations so that a randomly picked number falls + * equally before any server position. + * Check http://burtleburtle.net/bob/hash/integer.html for more info. + */ + a = (a+0x7ed55d16) + (a<<12); + a = (a^0xc761c23c) ^ (a>>19); + a = (a+0x165667b1) + (a<<5); + a = (a+0xd3a2646c) ^ (a<<9); + a = (a+0xfd7046c5) + (a<<3); + a = (a^0xb55a4f09) ^ (a>>16); + + /* ensure values are better spread all around the tree by multiplying + * by a large prime close to 3/4 of the tree. + */ + return a * 3221225473U; +} + +/* + * Return integer equivalent of character <c> for a hex digit (0-9, a-f, A-F), + * otherwise -1. This compact form helps gcc produce efficient code. + */ +static inline int hex2i(int c) +{ + if ((unsigned char)(c -= '0') > 9) { + if ((unsigned char)(c -= 'A' - '0') > 5 && + (unsigned char)(c -= 'a' - 'A') > 5) + c = -11; + c += 10; + } + return c; +} + +/* This one is 6 times faster than strtoul() on athlon, but does + * no check at all. + */ +static inline unsigned int __str2ui(const char *s) +{ + unsigned int i = 0; + while (*s) { + i = i * 10 - '0'; + i += (unsigned char)*s++; + } + return i; +} + +/* This one is 5 times faster than strtoul() on athlon with checks. + * It returns the value of the number composed of all valid digits read. + */ +static inline unsigned int __str2uic(const char *s) +{ + unsigned int i = 0; + unsigned int j; + + while (1) { + j = (*s++) - '0'; + if (j > 9) + break; + i *= 10; + i += j; + } + return i; +} + +/* This one is 28 times faster than strtoul() on athlon, but does + * no check at all! + */ +static inline unsigned int __strl2ui(const char *s, int len) +{ + unsigned int i = 0; + + while (len-- > 0) { + i = i * 10 - '0'; + i += (unsigned char)*s++; + } + return i; +} + +/* This one is 7 times faster than strtoul() on athlon with checks. + * It returns the value of the number composed of all valid digits read. + */ +static inline unsigned int __strl2uic(const char *s, int len) +{ + unsigned int i = 0; + unsigned int j, k; + + while (len-- > 0) { + j = (*s++) - '0'; + k = i * 10; + if (j > 9) + break; + i = k + j; + } + return i; +} + +/* This function reads an unsigned integer from the string pointed to by <s> + * and returns it. The <s> pointer is adjusted to point to the first unread + * char. The function automatically stops at <end>. + */ +static inline unsigned int __read_uint(const char **s, const char *end) +{ + const char *ptr = *s; + unsigned int i = 0; + unsigned int j, k; + + while (ptr < end) { + j = *ptr - '0'; + k = i * 10; + if (j > 9) + break; + i = k + j; + ptr++; + } + *s = ptr; + return i; +} + +/* returns the number of bytes needed to encode <v> as a varint. Be careful, use + * it only with constants as it generates a large code (typ. 180 bytes). Use the + * varint_bytes() version instead in case of doubt. + */ +static inline int __varint_bytes(uint64_t v) +{ + switch (v) { + case 0x0000000000000000ULL ... 0x00000000000000efULL: return 1; + case 0x00000000000000f0ULL ... 0x00000000000008efULL: return 2; + case 0x00000000000008f0ULL ... 0x00000000000408efULL: return 3; + case 0x00000000000408f0ULL ... 0x00000000020408efULL: return 4; + case 0x00000000020408f0ULL ... 0x00000001020408efULL: return 5; + case 0x00000001020408f0ULL ... 0x00000081020408efULL: return 6; + case 0x00000081020408f0ULL ... 0x00004081020408efULL: return 7; + case 0x00004081020408f0ULL ... 0x00204081020408efULL: return 8; + case 0x00204081020408f0ULL ... 0x10204081020408efULL: return 9; + default: return 10; + } +} + +/* Encode the integer <i> into a varint (variable-length integer). The encoded + * value is copied in <*buf>. Here is the encoding format: + * + * 0 <= X < 240 : 1 byte (7.875 bits) [ XXXX XXXX ] + * 240 <= X < 2288 : 2 bytes (11 bits) [ 1111 XXXX ] [ 0XXX XXXX ] + * 2288 <= X < 264432 : 3 bytes (18 bits) [ 1111 XXXX ] [ 1XXX XXXX ] [ 0XXX XXXX ] + * 264432 <= X < 33818864 : 4 bytes (25 bits) [ 1111 XXXX ] [ 1XXX XXXX ]*2 [ 0XXX XXXX ] + * 33818864 <= X < 4328786160 : 5 bytes (32 bits) [ 1111 XXXX ] [ 1XXX XXXX ]*3 [ 0XXX XXXX ] + * ... + * + * On success, it returns the number of written bytes and <*buf> is moved after + * the encoded value. Otherwise, it returns -1. */ +static inline int encode_varint(uint64_t i, char **buf, char *end) +{ + unsigned char *p = (unsigned char *)*buf; + int r; + + if (p >= (unsigned char *)end) + return -1; + + if (i < 240) { + *p++ = i; + *buf = (char *)p; + return 1; + } + + *p++ = (unsigned char)i | 240; + i = (i - 240) >> 4; + while (i >= 128) { + if (p >= (unsigned char *)end) + return -1; + *p++ = (unsigned char)i | 128; + i = (i - 128) >> 7; + } + + if (p >= (unsigned char *)end) + return -1; + *p++ = (unsigned char)i; + + r = ((char *)p - *buf); + *buf = (char *)p; + return r; +} + +/* Decode a varint from <*buf> and save the decoded value in <*i>. See + * 'spoe_encode_varint' for details about varint. + * On success, it returns the number of read bytes and <*buf> is moved after the + * varint. Otherwise, it returns -1. */ +static inline int decode_varint(char **buf, char *end, uint64_t *i) +{ + unsigned char *p = (unsigned char *)*buf; + int r; + + if (p >= (unsigned char *)end) + return -1; + + *i = *p++; + if (*i < 240) { + *buf = (char *)p; + return 1; + } + + r = 4; + do { + if (p >= (unsigned char *)end) + return -1; + *i += (uint64_t)*p << r; + r += 7; + } while (*p++ >= 128); + + r = ((char *)p - *buf); + *buf = (char *)p; + return r; +} + +#endif /* _HAPROXY_INTOPS_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/istbuf.h b/include/haproxy/istbuf.h new file mode 100644 index 0000000..392ec46 --- /dev/null +++ b/include/haproxy/istbuf.h @@ -0,0 +1,162 @@ +/* + * include/haproxy/istbuf.h + * Functions used to manipulate indirect strings with wrapping buffers. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _HAPROXY_ISTBUF_H +#define _HAPROXY_ISTBUF_H + +#include <sys/types.h> +#include <import/ist.h> +#include <haproxy/buf.h> + + +/* b_isteq() : returns > 0 if the first <n> characters of buffer <b> starting + * at offset <o> relative to the buffer's head match <ist>. (empty strings do + * match). It is designed to be used with reasonably small strings (it matches + * a single byte per loop iteration). It is expected to be used with an offset + * to skip old data. For example : + * - "input" contents : b_isteq(b, old_cnt, new_cnt, ist); + * - "output" contents : b_isteq(b, 0, old_cnt, ist); + * Return value : + * >0 : the number of matching bytes + * =0 : not enough bytes (or matching of empty string) + * <0 : non-matching byte found + */ +static inline ssize_t b_isteq(const struct buffer *b, size_t o, size_t n, const struct ist ist) +{ + struct ist r = ist; + const char *p; + const char *end = b_wrap(b); + + if (n < r.len) + return 0; + + p = b_peek(b, o); + while (r.len--) { + if (*p++ != *r.ptr++) + return -1; + if (unlikely(p == end)) + p = b_orig(b); + } + return ist.len; +} + +/* Same as b_isteq but case-insensitive */ +static inline ssize_t b_isteqi(const struct buffer *b, size_t o, size_t n, const struct ist ist) +{ + struct ist r = ist; + const char *p; + const char *end = b_wrap(b); + + if (n < r.len) + return 0; + + p = b_peek(b, o); + while (r.len--) { + if (*p != *r.ptr && + ist_lc[(unsigned char)*p] != ist_lc[(unsigned char)*r.ptr]) + return -1; + p++; + r.ptr++; + if (unlikely(p == end)) + p = b_orig(b); + } + return ist.len; +} + +/* b_isteat() : "eats" string <ist> from the head of buffer <b>. Wrapping data + * is explicitly supported. It matches a single byte per iteration so strings + * should remain reasonably small. Returns : + * > 0 : number of bytes matched and eaten + * = 0 : not enough bytes (or matching an empty string) + * < 0 : non-matching byte found + */ +static inline ssize_t b_isteat(struct buffer *b, const struct ist ist) +{ + ssize_t ret = b_isteq(b, 0, b_data(b), ist); + + if (ret > 0) + b_del(b, ret); + return ret; +} + +/* b_istput() : injects string <ist> at the tail of output buffer <b> provided + * that it fits. Wrapping is supported. It's designed for small strings as it + * only writes a single byte per iteration. Returns the number of characters + * copied (ist.len), 0 if it temporarily does not fit, or -1 if it will never + * fit. It will only modify the buffer upon success. In all cases, the contents + * are copied prior to reporting an error, so that the destination at least + * contains a valid but truncated string. + */ +static inline ssize_t b_istput(struct buffer *b, const struct ist ist) +{ + const char *end = b_wrap(b); + struct ist r = ist; + char *p; + + if (r.len > (size_t)b_room(b)) + return r.len < b->size ? 0 : -1; + + p = b_tail(b); + b->data += r.len; + while (r.len--) { + *p++ = *r.ptr++; + if (unlikely(p == end)) + p = b_orig(b); + } + return ist.len; +} + +/* b_putist() : tries to copy as much as possible of string <ist> into buffer + * <b> and returns the number of bytes copied (truncation is possible). It uses + * b_putblk() and is suitable for large blocks. + */ +static inline size_t b_putist(struct buffer *b, const struct ist ist) +{ + return b_putblk(b, ist.ptr, ist.len); +} + +/* builds and return a <struct buffer> based on <ist> + */ +static inline struct buffer ist2buf(const struct ist ist) +{ + struct buffer buf; + + buf.area = ist.ptr; + buf.size = ist.len; + buf.data = ist.len; + buf.head = 0; + return buf; +} + +#endif /* _HAPROXY_ISTBUF_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/jwt-t.h b/include/haproxy/jwt-t.h new file mode 100644 index 0000000..e94607e --- /dev/null +++ b/include/haproxy/jwt-t.h @@ -0,0 +1,86 @@ +/* + * include/haproxy/jwt-t.h + * Macros, variables and structures for JWT management. + * + * Copyright (C) 2021 HAProxy Technologies, Remi Tricot-Le Breton <rlebreton@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_JWT_T_H +#define _HAPROXY_JWT_T_H + +#include <haproxy/openssl-compat.h> + +#ifdef USE_OPENSSL +enum jwt_alg { + JWT_ALG_DEFAULT, + JWS_ALG_NONE, + JWS_ALG_HS256, + JWS_ALG_HS384, + JWS_ALG_HS512, + JWS_ALG_RS256, + JWS_ALG_RS384, + JWS_ALG_RS512, + JWS_ALG_ES256, + JWS_ALG_ES384, + JWS_ALG_ES512, + JWS_ALG_PS256, + JWS_ALG_PS384, + JWS_ALG_PS512, +}; + +struct jwt_item { + char *start; + size_t length; +}; + +struct jwt_ctx { + enum jwt_alg alg; + struct jwt_item jose; + struct jwt_item claims; + struct jwt_item signature; + char *key; + unsigned int key_length; +}; + +enum jwt_elt { + JWT_ELT_JOSE = 0, + JWT_ELT_CLAIMS, + JWT_ELT_SIG, + JWT_ELT_MAX +}; + +struct jwt_cert_tree_entry { + EVP_PKEY *pkey; + struct ebmb_node node; + char path[VAR_ARRAY]; +}; + +enum jwt_vrfy_status { + JWT_VRFY_KO = 0, + JWT_VRFY_OK = 1, + + JWT_VRFY_UNKNOWN_ALG = -1, + JWT_VRFY_UNMANAGED_ALG = -2, + JWT_VRFY_INVALID_TOKEN = -3, + JWT_VRFY_OUT_OF_MEMORY = -4, + JWT_VRFY_UNKNOWN_CERT = -5 +}; + +#endif /* USE_OPENSSL */ + + +#endif /* _HAPROXY_JWT_T_H */ diff --git a/include/haproxy/jwt.h b/include/haproxy/jwt.h new file mode 100644 index 0000000..a343ffa --- /dev/null +++ b/include/haproxy/jwt.h @@ -0,0 +1,37 @@ +/* + * include/haproxy/jwt.h + * Functions for JSON Web Token (JWT) management. + * + * Copyright (C) 2021 HAProxy Technologies, Remi Tricot-Le Breton <rlebreton@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_JWT_H +#define _HAPROXY_JWT_H + +#include <haproxy/jwt-t.h> +#include <haproxy/buf-t.h> + +#ifdef USE_OPENSSL +enum jwt_alg jwt_parse_alg(const char *alg_str, unsigned int alg_len); +int jwt_tokenize(const struct buffer *jwt, struct jwt_item *items, unsigned int *item_num); +int jwt_tree_load_cert(char *path, int pathlen, char **err); + +enum jwt_vrfy_status jwt_verify(const struct buffer *token, const struct buffer *alg, + const struct buffer *key); +#endif /* USE_OPENSSL */ + +#endif /* _HAPROXY_JWT_H */ diff --git a/include/haproxy/lb_chash-t.h b/include/haproxy/lb_chash-t.h new file mode 100644 index 0000000..c437981 --- /dev/null +++ b/include/haproxy/lb_chash-t.h @@ -0,0 +1,40 @@ +/* + * include/haproxy/lb_chash-t.h + * Types for Consistent Hash LB algorithm. + * + * Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LB_CHASH_T_H +#define _HAPROXY_LB_CHASH_T_H + +#include <import/ebtree-t.h> + +struct lb_chash { + struct eb_root act; /* weighted chash entries of active servers */ + struct eb_root bck; /* weighted chash entries of backup servers */ + struct eb32_node *last; /* last node found in case of round robin (or NULL) */ +}; + +#endif /* _HAPROXY_LB_CHASH_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/lb_chash.h b/include/haproxy/lb_chash.h new file mode 100644 index 0000000..7950457 --- /dev/null +++ b/include/haproxy/lb_chash.h @@ -0,0 +1,41 @@ +/* + * include/haproxy/lb_chash.h + * Function declarations for Consistent Hash LB algorithm. + * + * Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LB_CHASH_H +#define _HAPROXY_LB_CHASH_H + +#include <haproxy/api.h> +#include <haproxy/lb_chash-t.h> + +struct proxy; +struct server; +int chash_init_server_tree(struct proxy *p); +struct server *chash_get_next_server(struct proxy *p, struct server *srvtoavoid); +struct server *chash_get_server_hash(struct proxy *p, unsigned int hash, const struct server *avoid); + +#endif /* _HAPROXY_LB_CHASH_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/lb_fas-t.h b/include/haproxy/lb_fas-t.h new file mode 100644 index 0000000..cfb274c --- /dev/null +++ b/include/haproxy/lb_fas-t.h @@ -0,0 +1,39 @@ +/* + * include/types/lb_fas-t.h + * Types for First Available Server load balancing algorithm. + * + * Copyright (C) 2000-2012 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LB_FAS_T_H +#define _HAPROXY_LB_FAS_T_H + +#include <import/ebtree-t.h> + +struct lb_fas { + struct eb_root act; /* weighted least conns on the active servers */ + struct eb_root bck; /* weighted least conns on the backup servers */ +}; + +#endif /* _HAPROXY_LB_FAS_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/lb_fas.h b/include/haproxy/lb_fas.h new file mode 100644 index 0000000..b12831c --- /dev/null +++ b/include/haproxy/lb_fas.h @@ -0,0 +1,40 @@ +/* + * include/haproxy/lb_fas.h + * First Available Server load balancing algorithm. + * + * Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LB_FAS_H +#define _HAPROXY_LB_FAS_H + +#include <haproxy/api.h> +#include <haproxy/lb_fas-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/server-t.h> + +struct server *fas_get_next_server(struct proxy *p, struct server *srvtoavoid); +void fas_init_server_tree(struct proxy *p); + +#endif /* _HAPROXY_LB_FAS_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/lb_fwlc-t.h b/include/haproxy/lb_fwlc-t.h new file mode 100644 index 0000000..258a6ab --- /dev/null +++ b/include/haproxy/lb_fwlc-t.h @@ -0,0 +1,39 @@ +/* + * include/haproxy/lb_fwlc-t.h + * Types for Fast Weighted Least Connection load balancing algorithm. + * + * Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LB_FWLC_T_H +#define _HAPROXY_LB_FWLC_T_H + +#include <import/ebtree-t.h> + +struct lb_fwlc { + struct eb_root act; /* weighted least conns on the active servers */ + struct eb_root bck; /* weighted least conns on the backup servers */ +}; + +#endif /* _HAPROXY_LB_FWLC_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/lb_fwlc.h b/include/haproxy/lb_fwlc.h new file mode 100644 index 0000000..a598af9 --- /dev/null +++ b/include/haproxy/lb_fwlc.h @@ -0,0 +1,40 @@ +/* + * include/haproxy/lb_fwlc.h + * Fast Weighted Least Connection load balancing algorithm. + * + * Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LB_FWLC_H +#define _HAPROXY_LB_FWLC_H + +#include <haproxy/api.h> +#include <haproxy/lb_fwlc-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/server-t.h> + +struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid); +void fwlc_init_server_tree(struct proxy *p); + +#endif /* _HAPROXY_LB_FWLC_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/lb_fwrr-t.h b/include/haproxy/lb_fwrr-t.h new file mode 100644 index 0000000..f7b746e --- /dev/null +++ b/include/haproxy/lb_fwrr-t.h @@ -0,0 +1,50 @@ +/* + * include/haproxy/lb_fwrr-t.h + * Types for Fast Weighted Round Robin load balancing algorithm. + * + * Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LB_FWRR_T_H +#define _HAPROXY_LB_FWRR_T_H + +#include <import/ebtree-t.h> + +/* This structure is used to apply fast weighted round robin on a server group */ +struct fwrr_group { + struct eb_root curr; /* tree for servers in "current" time range */ + struct eb_root t0, t1; /* "init" and "next" servers */ + struct eb_root *init; /* servers waiting to be placed */ + struct eb_root *next; /* servers to be placed at next run */ + int curr_pos; /* current position in the tree */ + int curr_weight; /* total weight of the current time range */ + int next_weight; /* total weight of the next time range */ +}; + +struct lb_fwrr { + struct fwrr_group act; /* weighted round robin on the active servers */ + struct fwrr_group bck; /* weighted round robin on the backup servers */ +}; + +#endif /* _HAPROXY_LB_FWRR_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/lb_fwrr.h b/include/haproxy/lb_fwrr.h new file mode 100644 index 0000000..27b0a94 --- /dev/null +++ b/include/haproxy/lb_fwrr.h @@ -0,0 +1,40 @@ +/* + * include/haproxy/lb_fwrr.h + * Fast Weighted Round Robin load balancing algorithm. + * + * Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LB_FWRR_H +#define _HAPROXY_LB_FWRR_H + +#include <haproxy/api.h> +#include <haproxy/lb_fwrr-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/server-t.h> + +void fwrr_init_server_groups(struct proxy *p); +struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid); + +#endif /* _HAPROXY_LB_FWRR_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/lb_map-t.h b/include/haproxy/lb_map-t.h new file mode 100644 index 0000000..6d1dd1a --- /dev/null +++ b/include/haproxy/lb_map-t.h @@ -0,0 +1,40 @@ +/* + * include/haproxy/lb_map-t.h + * Types for map-based load-balancing (RR and HASH) + * + * Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LB_MAP_T_H +#define _HAPROXY_LB_MAP_T_H + +#include <haproxy/api-t.h> +#include <haproxy/server-t.h> + +struct lb_map { + struct server **srv; /* the server map used to apply weights */ + int rr_idx; /* next server to be elected in round robin mode */ +}; + +#endif /* _HAPROXY_LB_MAP_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/lb_map.h b/include/haproxy/lb_map.h new file mode 100644 index 0000000..ca483b2 --- /dev/null +++ b/include/haproxy/lb_map.h @@ -0,0 +1,41 @@ +/* + * include/haproxy/lb_map.h + * Map-based load-balancing (RR and HASH) + * + * Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LB_MAP_H +#define _HAPROXY_LB_MAP_H + +#include <haproxy/api.h> +#include <haproxy/proxy-t.h> +#include <haproxy/server-t.h> + +void recalc_server_map(struct proxy *px); +void init_server_map(struct proxy *p); +struct server *map_get_server_rr(struct proxy *px, struct server *srvtoavoid); +struct server *map_get_server_hash(struct proxy *px, unsigned int hash); + +#endif /* _HAPROXY_LB_MAP_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/linuxcap.h b/include/haproxy/linuxcap.h new file mode 100644 index 0000000..9c337a4 --- /dev/null +++ b/include/haproxy/linuxcap.h @@ -0,0 +1,7 @@ +#ifndef _HAPROXY_LINUXCAP_H +#define _HAPROXY_LINUXCAP_H + +int prepare_caps_for_setuid(int from_uid, int to_uid); +int finalize_caps_after_setuid(int from_uid, int to_uid); + +#endif /* _HAPROXY_LINUXCAP_H */ diff --git a/include/haproxy/list-t.h b/include/haproxy/list-t.h new file mode 100644 index 0000000..dd8493e --- /dev/null +++ b/include/haproxy/list-t.h @@ -0,0 +1,73 @@ +/* + * include/haproxy/list-t.h + * Circular list manipulation types definitions + * + * Copyright (C) 2002-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LIST_T_H +#define _HAPROXY_LIST_T_H + + +/* these are circular or bidirectionnal lists only. Each list pointer points to + * another list pointer in a structure, and not the structure itself. The + * pointer to the next element MUST be the first one so that the list is easily + * cast as a single linked list or pointer. + */ +struct list { + struct list *n; /* next */ + struct list *p; /* prev */ +}; + +/* This is similar to struct list, but we want to be sure the compiler will + * yell at you if you use macroes for one when you're using the other. You have + * to expicitely cast if that's really what you want to do. + */ +struct mt_list { + struct mt_list *next; + struct mt_list *prev; +}; + + +/* a back-ref is a pointer to a target list entry. It is used to detect when an + * element being deleted is currently being tracked by another user. The best + * example is a user dumping the session table. The table does not fit in the + * output buffer so we have to set a mark on a session and go on later. But if + * that marked session gets deleted, we don't want the user's pointer to go in + * the wild. So we can simply link this user's request to the list of this + * session's users, and put a pointer to the list element in ref, that will be + * used as the mark for next iteration. + */ +struct bref { + struct list users; + struct list *ref; /* pointer to the target's list entry */ +}; + +/* a word list is a generic list with a pointer to a string in each element. */ +struct wordlist { + struct list list; + char *s; +}; + +/* this is the same as above with an additional pointer to a condition. */ +struct cond_wordlist { + struct list list; + void *cond; + char *s; +}; + +#endif /* _HAPROXY_LIST_T_H */ diff --git a/include/haproxy/list.h b/include/haproxy/list.h new file mode 100644 index 0000000..368e6d7 --- /dev/null +++ b/include/haproxy/list.h @@ -0,0 +1,907 @@ +/* + * include/haproxy/list.h + * Circular list manipulation macros and functions. + * + * Copyright (C) 2002-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LIST_H +#define _HAPROXY_LIST_H + +#include <haproxy/api.h> +#include <haproxy/thread.h> + +/* First undefine some macros which happen to also be defined on OpenBSD, + * in sys/queue.h, used by sys/event.h + */ +#undef LIST_HEAD +#undef LIST_INIT +#undef LIST_NEXT + +/* ILH = Initialized List Head : used to prevent gcc from moving an empty + * list to BSS. Some older version tend to trim all the array and cause + * corruption. + */ +#define ILH { .n = (struct list *)1, .p = (struct list *)2 } + +#define LIST_HEAD(a) ((void *)(&(a))) + +#define LIST_INIT(l) ((l)->n = (l)->p = (l)) + +#define LIST_HEAD_INIT(l) { &l, &l } + +/* adds an element at the beginning of a list ; returns the element */ +#define LIST_INSERT(lh, el) ({ (el)->n = (lh)->n; (el)->n->p = (lh)->n = (el); (el)->p = (lh); (el); }) + +/* adds an element at the end of a list ; returns the element */ +#define LIST_APPEND(lh, el) ({ (el)->p = (lh)->p; (el)->p->n = (lh)->p = (el); (el)->n = (lh); (el); }) + +/* adds the contents of a list <old> at the beginning of another list <new>. The old list head remains untouched. */ +#define LIST_SPLICE(new, old) do { \ + if (!LIST_ISEMPTY(old)) { \ + (old)->p->n = (new)->n; (old)->n->p = (new); \ + (new)->n->p = (old)->p; (new)->n = (old)->n; \ + } \ + } while (0) + +/* adds the contents of a list whose first element is <old> and last one is + * <old->prev> at the end of another list <new>. The old list DOES NOT have + * any head here. + */ +#define LIST_SPLICE_END_DETACHED(new, old) do { \ + typeof(new) __t; \ + (new)->p->n = (old); \ + (old)->p->n = (new); \ + __t = (old)->p; \ + (old)->p = (new)->p; \ + (new)->p = __t; \ + } while (0) + +/* removes an element from a list and returns it */ +#if defined(DEBUG_LIST) +/* purposely corrupt the detached element to detect use-after-delete */ +#define LIST_DELETE(el) ({ typeof(el) __ret = (el); (el)->n->p = (el)->p; (el)->p->n = (el)->n; *(__ret) = (struct list)ILH; (__ret);}) +#else +#define LIST_DELETE(el) ({ typeof(el) __ret = (el); (el)->n->p = (el)->p; (el)->p->n = (el)->n; (__ret); }) +#endif + +/* removes an element from a list, initializes it and returns it. + * This is faster than LIST_DELETE+LIST_INIT as we avoid reloading the pointers. + */ +#define LIST_DEL_INIT(el) ({ \ + typeof(el) __ret = (el); \ + typeof(__ret->n) __n = __ret->n; \ + typeof(__ret->p) __p = __ret->p; \ + __n->p = __p; __p->n = __n; \ + __ret->n = __ret->p = __ret; \ + __ret; \ +}) + +/* returns a pointer of type <pt> to a structure containing a list head called + * <el> at address <lh>. Note that <lh> can be the result of a function or macro + * since it's used only once. + * Example: LIST_ELEM(cur_node->args.next, struct node *, args) + */ +#define LIST_ELEM(lh, pt, el) ((pt)(((const char *)(lh)) - ((size_t)&((pt)NULL)->el))) + +/* checks if the list head <lh> is empty or not */ +#define LIST_ISEMPTY(lh) ((lh)->n == (lh)) + +/* checks if the list element <el> was added to a list or not. This only + * works when detached elements are reinitialized (using LIST_DEL_INIT) + */ +#define LIST_INLIST(el) ((el)->n != (el)) + +/* atomically checks if the list element's next pointer points to anything + * different from itself, implying the element should be part of a list. This + * usually is similar to LIST_INLIST() except that while that one might be + * instrumented using debugging code to perform further consistency checks, + * the macro below guarantees to always perform a single atomic test and is + * safe to use with barriers. + */ +#define LIST_INLIST_ATOMIC(el) ({ \ + typeof(el) __ptr = (el); \ + HA_ATOMIC_LOAD(&(__ptr)->n) != __ptr; \ +}) + +/* returns a pointer of type <pt> to a structure following the element + * which contains list head <lh>, which is known as element <el> in + * struct pt. + * Example: LIST_NEXT(args, struct node *, list) + */ +#define LIST_NEXT(lh, pt, el) (LIST_ELEM((lh)->n, pt, el)) + + +/* returns a pointer of type <pt> to a structure preceding the element + * which contains list head <lh>, which is known as element <el> in + * struct pt. + */ +#undef LIST_PREV +#define LIST_PREV(lh, pt, el) (LIST_ELEM((lh)->p, pt, el)) + +/* + * Simpler FOREACH_ITEM macro inspired from Linux sources. + * Iterates <item> through a list of items of type "typeof(*item)" which are + * linked via a "struct list" member named <member>. A pointer to the head of + * the list is passed in <list_head>. No temporary variable is needed. Note + * that <item> must not be modified during the loop. + * Example: list_for_each_entry(cur_acl, known_acl, list) { ... }; + */ +#define list_for_each_entry(item, list_head, member) \ + for (item = LIST_ELEM((list_head)->n, typeof(item), member); \ + &item->member != (list_head); \ + item = LIST_ELEM(item->member.n, typeof(item), member)) + +/* + * Same as list_for_each_entry but starting from current point + * Iterates <item> through the list starting from <item> + * It's basically the same macro but without initializing item to the head of + * the list. + */ +#define list_for_each_entry_from(item, list_head, member) \ + for ( ; &item->member != (list_head); \ + item = LIST_ELEM(item->member.n, typeof(item), member)) + +/* + * Simpler FOREACH_ITEM_SAFE macro inspired from Linux sources. + * Iterates <item> through a list of items of type "typeof(*item)" which are + * linked via a "struct list" member named <member>. A pointer to the head of + * the list is passed in <list_head>. A temporary variable <back> of same type + * as <item> is needed so that <item> may safely be deleted if needed. + * Example: list_for_each_entry_safe(cur_acl, tmp, known_acl, list) { ... }; + */ +#define list_for_each_entry_safe(item, back, list_head, member) \ + for (item = LIST_ELEM((list_head)->n, typeof(item), member), \ + back = LIST_ELEM(item->member.n, typeof(item), member); \ + &item->member != (list_head); \ + item = back, back = LIST_ELEM(back->member.n, typeof(back), member)) + + +/* + * Same as list_for_each_entry_safe but starting from current point + * Iterates <item> through the list starting from <item> + * It's basically the same macro but without initializing item to the head of + * the list. + */ +#define list_for_each_entry_safe_from(item, back, list_head, member) \ + for (back = LIST_ELEM(item->member.n, typeof(item), member); \ + &item->member != (list_head); \ + item = back, back = LIST_ELEM(back->member.n, typeof(back), member)) + +/* + * Iterate backwards <item> through a list of items of type "typeof(*item)" + * which are linked via a "struct list" member named <member>. A pointer to + * the head of the list is passed in <list_head>. No temporary variable is + * needed. Note that <item> must not be modified during the loop. + * Example: list_for_each_entry_rev(cur_acl, known_acl, list) { ... }; + */ +#define list_for_each_entry_rev(item, list_head, member) \ + for (item = LIST_ELEM((list_head)->p, typeof(item), member); \ + &item->member != (list_head); \ + item = LIST_ELEM(item->member.p, typeof(item), member)) + +/* + * Same as list_for_each_entry_rev but starting from current point + * Iterate backwards <item> through the list starting from <item> + * It's basically the same macro but without initializing item to the head of + * the list. + */ +#define list_for_each_entry_from_rev(item, list_head, member) \ + for ( ; &item->member != (list_head); \ + item = LIST_ELEM(item->member.p, typeof(item), member)) + +/* + * Iterate backwards <item> through a list of items of type "typeof(*item)" + * which are linked via a "struct list" member named <member>. A pointer to + * the head of the list is passed in <list_head>. A temporary variable <back> + * of same type as <item> is needed so that <item> may safely be deleted + * if needed. + * Example: list_for_each_entry_safe_rev(cur_acl, tmp, known_acl, list) { ... }; + */ +#define list_for_each_entry_safe_rev(item, back, list_head, member) \ + for (item = LIST_ELEM((list_head)->p, typeof(item), member), \ + back = LIST_ELEM(item->member.p, typeof(item), member); \ + &item->member != (list_head); \ + item = back, back = LIST_ELEM(back->member.p, typeof(back), member)) + +/* + * Same as list_for_each_entry_safe_rev but starting from current point + * Iterate backwards <item> through the list starting from <item> + * It's basically the same macro but without initializing item to the head of + * the list. + */ +#define list_for_each_entry_safe_from_rev(item, back, list_head, member) \ + for (back = LIST_ELEM(item->member.p, typeof(item), member); \ + &item->member != (list_head); \ + item = back, back = LIST_ELEM(back->member.p, typeof(back), member)) + + +/* + * Locked version of list manipulation macros. + * It is OK to use those concurrently from multiple threads, as long as the + * list is only used with the locked variants. + */ +#define MT_LIST_BUSY ((struct mt_list *)1) + +/* + * Add an item at the beginning of a list. + * Returns 1 if we added the item, 0 otherwise (because it was already in a + * list). + */ +#define MT_LIST_TRY_INSERT(_lh, _el) \ + ({ \ + int _ret = 0; \ + struct mt_list *lh = (_lh), *el = (_el); \ + for (;;__ha_cpu_relax()) { \ + struct mt_list *n, *n2; \ + struct mt_list *p, *p2; \ + n = _HA_ATOMIC_XCHG(&(lh)->next, MT_LIST_BUSY); \ + if (n == MT_LIST_BUSY) \ + continue; \ + p = _HA_ATOMIC_XCHG(&n->prev, MT_LIST_BUSY); \ + if (p == MT_LIST_BUSY) { \ + (lh)->next = n; \ + __ha_barrier_store(); \ + continue; \ + } \ + n2 = _HA_ATOMIC_XCHG(&el->next, MT_LIST_BUSY); \ + if (n2 != el) { /* element already linked */ \ + if (n2 != MT_LIST_BUSY) \ + el->next = n2; \ + n->prev = p; \ + __ha_barrier_store(); \ + lh->next = n; \ + __ha_barrier_store(); \ + if (n2 == MT_LIST_BUSY) \ + continue; \ + break; \ + } \ + p2 = _HA_ATOMIC_XCHG(&el->prev, MT_LIST_BUSY); \ + if (p2 != el) { \ + if (p2 != MT_LIST_BUSY) \ + el->prev = p2; \ + n->prev = p; \ + el->next = el; \ + __ha_barrier_store(); \ + lh->next = n; \ + __ha_barrier_store(); \ + if (p2 == MT_LIST_BUSY) \ + continue; \ + break; \ + } \ + (el)->next = n; \ + (el)->prev = p; \ + __ha_barrier_store(); \ + n->prev = (el); \ + __ha_barrier_store(); \ + p->next = (el); \ + __ha_barrier_store(); \ + _ret = 1; \ + break; \ + } \ + (_ret); \ + }) + +/* + * Add an item at the end of a list. + * Returns 1 if we added the item, 0 otherwise (because it was already in a + * list). + */ +#define MT_LIST_TRY_APPEND(_lh, _el) \ + ({ \ + int _ret = 0; \ + struct mt_list *lh = (_lh), *el = (_el); \ + for (;;__ha_cpu_relax()) { \ + struct mt_list *n, *n2; \ + struct mt_list *p, *p2; \ + p = _HA_ATOMIC_XCHG(&(lh)->prev, MT_LIST_BUSY); \ + if (p == MT_LIST_BUSY) \ + continue; \ + n = _HA_ATOMIC_XCHG(&p->next, MT_LIST_BUSY); \ + if (n == MT_LIST_BUSY) { \ + (lh)->prev = p; \ + __ha_barrier_store(); \ + continue; \ + } \ + p2 = _HA_ATOMIC_XCHG(&el->prev, MT_LIST_BUSY); \ + if (p2 != el) { \ + if (p2 != MT_LIST_BUSY) \ + el->prev = p2; \ + p->next = n; \ + __ha_barrier_store(); \ + lh->prev = p; \ + __ha_barrier_store(); \ + if (p2 == MT_LIST_BUSY) \ + continue; \ + break; \ + } \ + n2 = _HA_ATOMIC_XCHG(&el->next, MT_LIST_BUSY); \ + if (n2 != el) { /* element already linked */ \ + if (n2 != MT_LIST_BUSY) \ + el->next = n2; \ + p->next = n; \ + el->prev = el; \ + __ha_barrier_store(); \ + lh->prev = p; \ + __ha_barrier_store(); \ + if (n2 == MT_LIST_BUSY) \ + continue; \ + break; \ + } \ + (el)->next = n; \ + (el)->prev = p; \ + __ha_barrier_store(); \ + p->next = (el); \ + __ha_barrier_store(); \ + n->prev = (el); \ + __ha_barrier_store(); \ + _ret = 1; \ + break; \ + } \ + (_ret); \ + }) + +/* + * Add an item at the beginning of a list. + * It is assumed the element can't already be in a list, so it isn't checked. + */ +#define MT_LIST_INSERT(_lh, _el) \ + ({ \ + int _ret = 0; \ + struct mt_list *lh = (_lh), *el = (_el); \ + for (;;__ha_cpu_relax()) { \ + struct mt_list *n; \ + struct mt_list *p; \ + n = _HA_ATOMIC_XCHG(&(lh)->next, MT_LIST_BUSY); \ + if (n == MT_LIST_BUSY) \ + continue; \ + p = _HA_ATOMIC_XCHG(&n->prev, MT_LIST_BUSY); \ + if (p == MT_LIST_BUSY) { \ + (lh)->next = n; \ + __ha_barrier_store(); \ + continue; \ + } \ + (el)->next = n; \ + (el)->prev = p; \ + __ha_barrier_store(); \ + n->prev = (el); \ + __ha_barrier_store(); \ + p->next = (el); \ + __ha_barrier_store(); \ + _ret = 1; \ + break; \ + } \ + (_ret); \ + }) + +/* + * Add an item at the end of a list. + * It is assumed the element can't already be in a list, so it isn't checked + */ +#define MT_LIST_APPEND(_lh, _el) \ + ({ \ + int _ret = 0; \ + struct mt_list *lh = (_lh), *el = (_el); \ + for (;;__ha_cpu_relax()) { \ + struct mt_list *n; \ + struct mt_list *p; \ + p = _HA_ATOMIC_XCHG(&(lh)->prev, MT_LIST_BUSY); \ + if (p == MT_LIST_BUSY) \ + continue; \ + n = _HA_ATOMIC_XCHG(&p->next, MT_LIST_BUSY); \ + if (n == MT_LIST_BUSY) { \ + (lh)->prev = p; \ + __ha_barrier_store(); \ + continue; \ + } \ + (el)->next = n; \ + (el)->prev = p; \ + __ha_barrier_store(); \ + p->next = (el); \ + __ha_barrier_store(); \ + n->prev = (el); \ + __ha_barrier_store(); \ + _ret = 1; \ + break; \ + } \ + (_ret); \ + }) + +/* + * Add an item at the end of a list. + * It is assumed the element can't already be in a list, so it isn't checked + * Item will be added in busy/locked state, so that it is already + * referenced in the list but no other thread can use it until we're ready. + * + * This returns a struct mt_list, that will be needed at unlock time. + * (using MT_LIST_UNLOCK_ELT) + */ +#define MT_LIST_APPEND_LOCKED(_lh, _el) \ + ({ \ + struct mt_list np; \ + struct mt_list *lh = (_lh), *el = (_el); \ + (el)->next = MT_LIST_BUSY; \ + (el)->prev = MT_LIST_BUSY; \ + for (;;__ha_cpu_relax()) { \ + struct mt_list *n; \ + struct mt_list *p; \ + p = _HA_ATOMIC_XCHG(&(lh)->prev, MT_LIST_BUSY); \ + if (p == MT_LIST_BUSY) \ + continue; \ + n = _HA_ATOMIC_XCHG(&p->next, MT_LIST_BUSY); \ + if (n == MT_LIST_BUSY) { \ + (lh)->prev = p; \ + __ha_barrier_store(); \ + continue; \ + } \ + np.prev = p; \ + np.next = n; \ + break; \ + } \ + (np); \ + }) + +/* + * Detach a list from its head. A pointer to the first element is returned + * and the list is closed. If the list was empty, NULL is returned. This may + * exclusively be used with lists modified by MT_LIST_TRY_INSERT/MT_LIST_TRY_APPEND. This + * is incompatible with MT_LIST_DELETE run concurrently. + * If there's at least one element, the next of the last element will always + * be NULL. + */ +#define MT_LIST_BEHEAD(_lh) ({ \ + struct mt_list *lh = (_lh); \ + struct mt_list *_n; \ + struct mt_list *_p; \ + for (;;__ha_cpu_relax()) { \ + _p = _HA_ATOMIC_XCHG(&(lh)->prev, MT_LIST_BUSY); \ + if (_p == MT_LIST_BUSY) \ + continue; \ + if (_p == (lh)) { \ + (lh)->prev = _p; \ + __ha_barrier_store(); \ + _n = NULL; \ + break; \ + } \ + _n = _HA_ATOMIC_XCHG(&(lh)->next, MT_LIST_BUSY); \ + if (_n == MT_LIST_BUSY) { \ + (lh)->prev = _p; \ + __ha_barrier_store(); \ + continue; \ + } \ + if (_n == (lh)) { \ + (lh)->next = _n; \ + (lh)->prev = _p; \ + __ha_barrier_store(); \ + _n = NULL; \ + break; \ + } \ + (lh)->next = (lh); \ + (lh)->prev = (lh); \ + __ha_barrier_store(); \ + _n->prev = _p; \ + __ha_barrier_store(); \ + _p->next = NULL; \ + __ha_barrier_store(); \ + break; \ + } \ + (_n); \ +}) + + +/* Remove an item from a list. + * Returns 1 if we removed the item, 0 otherwise (because it was in no list). + */ +#define MT_LIST_DELETE(_el) \ + ({ \ + int _ret = 0; \ + struct mt_list *el = (_el); \ + for (;;__ha_cpu_relax()) { \ + struct mt_list *n, *n2; \ + struct mt_list *p, *p2 = NULL; \ + n = _HA_ATOMIC_XCHG(&(el)->next, MT_LIST_BUSY); \ + if (n == MT_LIST_BUSY) \ + continue; \ + p = _HA_ATOMIC_XCHG(&(el)->prev, MT_LIST_BUSY); \ + if (p == MT_LIST_BUSY) { \ + (el)->next = n; \ + __ha_barrier_store(); \ + continue; \ + } \ + if (p != (el)) { \ + p2 = _HA_ATOMIC_XCHG(&p->next, MT_LIST_BUSY); \ + if (p2 == MT_LIST_BUSY) { \ + (el)->prev = p; \ + (el)->next = n; \ + __ha_barrier_store(); \ + continue; \ + } \ + } \ + if (n != (el)) { \ + n2 = _HA_ATOMIC_XCHG(&n->prev, MT_LIST_BUSY); \ + if (n2 == MT_LIST_BUSY) { \ + if (p2 != NULL) \ + p->next = p2; \ + (el)->prev = p; \ + (el)->next = n; \ + __ha_barrier_store(); \ + continue; \ + } \ + } \ + n->prev = p; \ + p->next = n; \ + if (p != (el) && n != (el)) \ + _ret = 1; \ + __ha_barrier_store(); \ + (el)->prev = (el); \ + (el)->next = (el); \ + __ha_barrier_store(); \ + break; \ + } \ + (_ret); \ + }) + + +/* Remove the first element from the list, and return it */ +#define MT_LIST_POP(_lh, pt, el) \ + ({ \ + void *_ret; \ + struct mt_list *lh = (_lh); \ + for (;;__ha_cpu_relax()) { \ + struct mt_list *n, *n2; \ + struct mt_list *p, *p2; \ + n = _HA_ATOMIC_XCHG(&(lh)->next, MT_LIST_BUSY); \ + if (n == MT_LIST_BUSY) \ + continue; \ + if (n == (lh)) { \ + (lh)->next = lh; \ + __ha_barrier_store(); \ + _ret = NULL; \ + break; \ + } \ + p = _HA_ATOMIC_XCHG(&n->prev, MT_LIST_BUSY); \ + if (p == MT_LIST_BUSY) { \ + (lh)->next = n; \ + __ha_barrier_store(); \ + continue; \ + } \ + n2 = _HA_ATOMIC_XCHG(&n->next, MT_LIST_BUSY); \ + if (n2 == MT_LIST_BUSY) { \ + n->prev = p; \ + __ha_barrier_store(); \ + (lh)->next = n; \ + __ha_barrier_store(); \ + continue; \ + } \ + p2 = _HA_ATOMIC_XCHG(&n2->prev, MT_LIST_BUSY); \ + if (p2 == MT_LIST_BUSY) { \ + n->next = n2; \ + n->prev = p; \ + __ha_barrier_store(); \ + (lh)->next = n; \ + __ha_barrier_store(); \ + continue; \ + } \ + (lh)->next = n2; \ + (n2)->prev = (lh); \ + __ha_barrier_store(); \ + (n)->prev = (n); \ + (n)->next = (n); \ + __ha_barrier_store(); \ + _ret = MT_LIST_ELEM(n, pt, el); \ + break; \ + } \ + (_ret); \ + }) + +#define MT_LIST_HEAD(a) ((void *)(&(a))) + +#define MT_LIST_INIT(l) ((l)->next = (l)->prev = (l)) + +#define MT_LIST_HEAD_INIT(l) { &l, &l } +/* returns a pointer of type <pt> to a structure containing a list head called + * <el> at address <lh>. Note that <lh> can be the result of a function or macro + * since it's used only once. + * Example: MT_LIST_ELEM(cur_node->args.next, struct node *, args) + */ +#define MT_LIST_ELEM(lh, pt, el) ((pt)(((const char *)(lh)) - ((size_t)&((pt)NULL)->el))) + +/* checks if the list head <lh> is empty or not */ +#define MT_LIST_ISEMPTY(lh) ((lh)->next == (lh)) + +/* returns a pointer of type <pt> to a structure following the element + * which contains list head <lh>, which is known as element <el> in + * struct pt. + * Example: MT_LIST_NEXT(args, struct node *, list) + */ +#define MT_LIST_NEXT(lh, pt, el) (MT_LIST_ELEM((lh)->next, pt, el)) + + +/* returns a pointer of type <pt> to a structure preceding the element + * which contains list head <lh>, which is known as element <el> in + * struct pt. + */ +#undef MT_LIST_PREV +#define MT_LIST_PREV(lh, pt, el) (MT_LIST_ELEM((lh)->prev, pt, el)) + +/* checks if the list element <el> was added to a list or not. This only + * works when detached elements are reinitialized (using LIST_DEL_INIT) + */ +#define MT_LIST_INLIST(el) ((el)->next != (el)) + +/* Lock an element in the list, to be sure it won't be removed nor + * accessed by another thread while the lock is held. + * Locking behavior is inspired from MT_LIST_DELETE macro, + * thus this macro can safely be used concurrently with MT_LIST_DELETE. + * This returns a struct mt_list, that will be needed at unlock time. + * (using MT_LIST_UNLOCK_ELT) + */ +#define MT_LIST_LOCK_ELT(_el) \ + ({ \ + struct mt_list ret; \ + struct mt_list *el = (_el); \ + for (;;__ha_cpu_relax()) { \ + struct mt_list *n, *n2; \ + struct mt_list *p, *p2 = NULL; \ + n = _HA_ATOMIC_XCHG(&(el)->next, MT_LIST_BUSY); \ + if (n == MT_LIST_BUSY) \ + continue; \ + p = _HA_ATOMIC_XCHG(&(el)->prev, MT_LIST_BUSY); \ + if (p == MT_LIST_BUSY) { \ + (el)->next = n; \ + __ha_barrier_store(); \ + continue; \ + } \ + if (p != (el)) { \ + p2 = _HA_ATOMIC_XCHG(&p->next, MT_LIST_BUSY);\ + if (p2 == MT_LIST_BUSY) { \ + (el)->prev = p; \ + (el)->next = n; \ + __ha_barrier_store(); \ + continue; \ + } \ + } \ + if (n != (el)) { \ + n2 = _HA_ATOMIC_XCHG(&n->prev, MT_LIST_BUSY);\ + if (n2 == MT_LIST_BUSY) { \ + if (p2 != NULL) \ + p->next = p2; \ + (el)->prev = p; \ + (el)->next = n; \ + __ha_barrier_store(); \ + continue; \ + } \ + } \ + ret.next = n; \ + ret.prev = p; \ + break; \ + } \ + ret; \ + }) + +/* Unlock an element previously locked by MT_LIST_LOCK_ELT. "np" is the + * struct mt_list returned by MT_LIST_LOCK_ELT(). + */ +#define MT_LIST_UNLOCK_ELT(_el, np) \ + do { \ + struct mt_list *n = (np).next, *p = (np).prev; \ + struct mt_list *el = (_el); \ + (el)->next = n; \ + (el)->prev = p; \ + if (n != (el)) \ + n->prev = (el); \ + if (p != (el)) \ + p->next = (el); \ + } while (0) + +/* Internal macroes for the foreach macroes */ +#define _MT_LIST_UNLOCK_NEXT(el, np) \ + do { \ + struct mt_list *n = (np); \ + (el)->next = n; \ + if (n != (el)) \ + n->prev = (el); \ + } while (0) + +/* Internal macroes for the foreach macroes */ +#define _MT_LIST_UNLOCK_PREV(el, np) \ + do { \ + struct mt_list *p = (np); \ + (el)->prev = p; \ + if (p != (el)) \ + p->next = (el); \ + } while (0) + +#define _MT_LIST_LOCK_NEXT(el) \ + ({ \ + struct mt_list *n = NULL; \ + for (;;__ha_cpu_relax()) { \ + struct mt_list *n2; \ + n = _HA_ATOMIC_XCHG(&((el)->next), MT_LIST_BUSY); \ + if (n == MT_LIST_BUSY) \ + continue; \ + if (n != (el)) { \ + n2 = _HA_ATOMIC_XCHG(&n->prev, MT_LIST_BUSY);\ + if (n2 == MT_LIST_BUSY) { \ + (el)->next = n; \ + __ha_barrier_store(); \ + continue; \ + } \ + } \ + break; \ + } \ + n; \ + }) + +#define _MT_LIST_LOCK_PREV(el) \ + ({ \ + struct mt_list *p = NULL; \ + for (;;__ha_cpu_relax()) { \ + struct mt_list *p2; \ + p = _HA_ATOMIC_XCHG(&((el)->prev), MT_LIST_BUSY); \ + if (p == MT_LIST_BUSY) \ + continue; \ + if (p != (el)) { \ + p2 = _HA_ATOMIC_XCHG(&p->next, MT_LIST_BUSY);\ + if (p2 == MT_LIST_BUSY) { \ + (el)->prev = p; \ + __ha_barrier_store(); \ + continue; \ + } \ + } \ + break; \ + } \ + p; \ + }) + +#define _MT_LIST_RELINK_DELETED(elt2) \ + do { \ + struct mt_list *n = elt2.next, *p = elt2.prev; \ + ALREADY_CHECKED(p); \ + n->prev = p; \ + p->next = n; \ + } while (0); + +/* Equivalent of MT_LIST_DELETE(), to be used when parsing the list with mt_list_entry_for_each_safe(). + * It should be the element currently parsed (tmpelt1) + */ +#define MT_LIST_DELETE_SAFE(_el) \ + do { \ + struct mt_list *el = (_el); \ + (el)->prev = (el); \ + (el)->next = (el); \ + (_el) = NULL; \ + } while (0) + +/* Safe as MT_LIST_DELETE_SAFE, but it won't reinit the element */ +#define MT_LIST_DELETE_SAFE_NOINIT(_el) \ + do { \ + (_el) = NULL; \ + } while (0) + +/* Iterates <item> through a list of items of type "typeof(*item)" which are + * linked via a "struct mt_list" member named <member>. A pointer to the head + * of the list is passed in <list_head>. + * + * <tmpelt> is a temporary struct mt_list *, and <tmpelt2> is a temporary + * struct mt_list, used internally, both are needed for MT_LIST_DELETE_SAFE. + * + * This macro is implemented using a nested loop. The inner loop will run for + * each element in the list, and the upper loop will run only once to do some + * cleanup when the end of the list is reached or user breaks from inner loop. + * It's safe to break from this macro as the cleanup will be performed anyway, + * but it is strictly forbidden to goto from the loop because skipping the + * cleanup will lead to undefined behavior. + * + * In order to remove the current element, please use MT_LIST_DELETE_SAFE. + * + * Example: + * mt_list_for_each_entry_safe(item, list_head, list_member, elt1, elt2) { + * ... + * } + */ +#define mt_list_for_each_entry_safe(item, list_head, member, tmpelt, tmpelt2) \ + for ((tmpelt) = NULL; (tmpelt) != MT_LIST_BUSY; ({ \ + /* post loop cleanup: \ + * gets executed only once to perform cleanup \ + * after child loop has finished \ + */ \ + if (tmpelt) { \ + /* last elem still exists, unlocking it */ \ + if (tmpelt2.prev) \ + MT_LIST_UNLOCK_ELT(tmpelt, tmpelt2); \ + else { \ + /* special case: child loop did not run \ + * so tmpelt2.prev == NULL \ + * (empty list) \ + */ \ + _MT_LIST_UNLOCK_NEXT(tmpelt, tmpelt2.next); \ + } \ + } else { \ + /* last elem was deleted by user, relink required: \ + * prev->next = next \ + * next->prev = prev \ + */ \ + _MT_LIST_RELINK_DELETED(tmpelt2); \ + } \ + /* break parent loop \ + * (this loop runs exactly one time) \ + */ \ + (tmpelt) = MT_LIST_BUSY; \ + })) \ + for ((tmpelt) = (list_head), (tmpelt2).prev = NULL, (tmpelt2).next = _MT_LIST_LOCK_NEXT(tmpelt); ({ \ + /* this gets executed before each user body loop */ \ + (item) = MT_LIST_ELEM((tmpelt2.next), typeof(item), member); \ + if (&item->member != (list_head)) { \ + /* did not reach end of list \ + * (back to list_head == end of list reached) \ + */ \ + if (tmpelt2.prev != &item->member) \ + tmpelt2.next = _MT_LIST_LOCK_NEXT(&item->member); \ + else { \ + /* FIXME: is this even supposed to happen?? \ + * I'm not understanding how \ + * tmpelt2.prev could be equal to &item->member. \ + * running 'test_list' multiple times with 8 \ + * concurrent threads: this never gets reached \ + */ \ + tmpelt2.next = tmpelt; \ + } \ + if (tmpelt != NULL) { \ + /* if tmpelt was not deleted by user */ \ + if (tmpelt2.prev) { \ + /* not executed on first run \ + * (tmpelt2.prev == NULL on first run) \ + */ \ + _MT_LIST_UNLOCK_PREV(tmpelt, tmpelt2.prev); \ + /* unlock_prev will implicitly relink: \ + * elt->prev = prev \ + * prev->next = elt \ + */ \ + } \ + tmpelt2.prev = tmpelt; \ + } \ + (tmpelt) = &item->member; \ + } \ + /* else: end of list reached (loop stop cond) */ \ + }), \ + &item->member != (list_head);) + +static __inline struct list *mt_list_to_list(struct mt_list *list) +{ + union { + struct mt_list *mt_list; + struct list *list; + } mylist; + + mylist.mt_list = list; + return mylist.list; +} + +static __inline struct mt_list *list_to_mt_list(struct list *list) +{ + union { + struct mt_list *mt_list; + struct list *list; + } mylist; + + mylist.list = list; + return mylist.mt_list; + +} + +#endif /* _HAPROXY_LIST_H */ diff --git a/include/haproxy/listener-t.h b/include/haproxy/listener-t.h new file mode 100644 index 0000000..7f5e52a --- /dev/null +++ b/include/haproxy/listener-t.h @@ -0,0 +1,317 @@ +/* + * include/haproxy/listener-t.h + * This file defines the structures needed to manage listeners. + * + * Copyright (C) 2000-2012 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LISTENER_T_H +#define _HAPROXY_LISTENER_T_H + +#include <sys/types.h> +#include <sys/socket.h> + +#include <import/ebtree-t.h> + +#include <haproxy/api-t.h> +#include <haproxy/obj_type-t.h> +#include <haproxy/quic_cc-t.h> +#include <haproxy/quic_sock-t.h> +#include <haproxy/quic_tp-t.h> +#include <haproxy/receiver-t.h> +#include <haproxy/stats-t.h> +#include <haproxy/thread.h> + +/* Some pointer types reference below */ +struct task; +struct protocol; +struct xprt_ops; +struct proxy; +struct fe_counters; +struct connection; + +/* listener state */ +enum li_state { + LI_NEW = 0, /* not initialized yet */ + LI_INIT, /* all parameters filled in, but not assigned yet */ + LI_ASSIGNED, /* assigned to the protocol, but not listening yet */ + LI_PAUSED, /* listener was paused, it's bound but not listening */ + LI_LISTEN, /* started, listening but not enabled */ + LI_READY, /* started, listening and enabled */ + LI_FULL, /* reached its connection limit */ + LI_LIMITED, /* transient state: limits have been reached, listener is queued */ +} __attribute__((packed)); + +/* Listener transitions + * calloc() set() add_listener() bind() + * -------> NEW ----> INIT ----------> ASSIGNED -----> LISTEN + * <------- <---- <---------- <----- + * free() bzero() del_listener() unbind() + * + * The file descriptor is valid only during these three states : + * + * disable() + * LISTEN <------------ READY + * A| ------------> |A + * || !max & enable() || + * || || + * || max || + * || max & enable() V| !max + * |+---------------> FULL + * +----------------- + * disable() + * + * The LIMITED state my be used when a limit has been detected just before + * using a listener. In this case, the listener MUST be queued into the + * appropriate wait queue (either the proxy's or the global one). It may be + * set back to the READY state at any instant and for any reason, so one must + * not rely on this state. + */ + +/* listener status for stats */ +enum li_status { + LI_STATUS_WAITING = 0, + LI_STATUS_OPEN, + LI_STATUS_FULL, + + LI_STATE_COUNT /* must be last */ +}; + +/* Note: if a bind_conf uses BC_O_UNLIMITED, it is highly recommended that it adds its own + * maxconn setting to the global.maxsock value so that its resources are reserved. + */ + +/* flags used with bind_conf->options */ +#define BC_O_USE_SSL 0x00000001 /* SSL is being used on this bind_conf */ +#define BC_O_GENERATE_CERTS 0x00000002 /* 1 if generate-certificates option is set, else 0 */ +#define BC_O_QUIC_FORCE_RETRY 0x00000004 /* always send Retry on reception of Initial without token */ +#define BC_O_USE_SOCK_DGRAM 0x00000008 /* at least one datagram-type listener is used */ +#define BC_O_USE_SOCK_STREAM 0x00000010 /* at least one stream-type listener is used */ +#define BC_O_USE_XPRT_DGRAM 0x00000020 /* at least one dgram-only xprt listener is used */ +#define BC_O_USE_XPRT_STREAM 0x00000040 /* at least one stream-only xprt listener is used */ +#define BC_O_NOLINGER 0x00000080 /* disable lingering on these listeners */ +#define BC_O_NOQUICKACK 0x00000100 /* disable quick ack of immediate data (linux) */ +#define BC_O_DEF_ACCEPT 0x00000200 /* wait up to 1 second for data before accepting */ +#define BC_O_TCP_FO 0x00000400 /* enable TCP Fast Open (linux >= 3.7) */ +#define BC_O_ACC_PROXY 0x00000800 /* find the proxied address in the first request line */ +#define BC_O_ACC_CIP 0x00001000 /* find the proxied address in the NetScaler Client IP header */ +#define BC_O_UNLIMITED 0x00002000 /* listeners not subject to global limits (peers & stats socket) */ +#define BC_O_NOSTOP 0x00004000 /* keep the listeners active even after a soft stop */ +#define BC_O_REVERSE_HTTP 0x00008000 /* a reverse HTTP bind is used */ +#define BC_O_XPRT_MAXCONN 0x00010000 /* transport layer allocates its own resource prior to accept and is responsible to check maxconn limit */ + + +/* flags used with bind_conf->ssl_options */ +#ifdef USE_OPENSSL +#define BC_SSL_O_NONE 0x0000 +#define BC_SSL_O_NO_TLS_TICKETS 0x0100 /* disable session resumption tickets */ +#define BC_SSL_O_PREF_CLIE_CIPH 0x0200 /* prefer client ciphers */ +#endif + +struct tls_version_filter { + uint16_t flags; /* ssl options */ + uint8_t min; /* min TLS version */ + uint8_t max; /* max TLS version */ +}; + +/* ssl "bind" settings */ +struct ssl_bind_conf { +#ifdef USE_OPENSSL + char *npn_str; /* NPN protocol string */ + int npn_len; /* NPN protocol string length */ + char *alpn_str; /* ALPN protocol string */ + int alpn_len; /* ALPN protocol string length */ + unsigned int verify:3; /* verify method (set of SSL_VERIFY_* flags) */ + unsigned int no_ca_names:1;/* do not send ca names to clients (ca_file related) */ + unsigned int early_data:1; /* early data allowed */ + unsigned int ocsp_update:2;/* enable OCSP auto update */ + char *ca_file; /* CAfile to use on verify and ca-names */ + char *ca_verify_file; /* CAverify file to use on verify only */ + char *crl_file; /* CRLfile to use on verify */ + char *ciphers; /* cipher suite to use if non-null */ + char *ciphersuites; /* TLS 1.3 cipher suite to use if non-null */ + char *curves; /* curves suite to use for ECDHE */ + char *ecdhe; /* named curve to use for ECDHE */ + char *sigalgs; /* Signature algorithms */ + char *client_sigalgs; /* Client Signature algorithms */ + struct tls_version_filter ssl_methods_cfg; /* original ssl methods found in configuration */ + struct tls_version_filter ssl_methods; /* actual ssl methods used at runtime */ +#endif +}; + +/* + * In OpenSSL 3.0.0, the biggest verify error code's value is 94 and on the + * latest 1.1.1 it already reaches 79 so we need to size the ca/crt-ignore-err + * arrays accordingly. If the max error code increases, the arrays might need to + * be resized. + */ +#define SSL_MAX_VFY_ERROR_CODE 94 +#define IGNERR_BF_SIZE ((SSL_MAX_VFY_ERROR_CODE >> 6) + 1) + +/* "bind" line settings */ +struct bind_conf { +#ifdef USE_OPENSSL + struct ssl_bind_conf ssl_conf; /* ssl conf for ctx setting */ + unsigned long long ca_ignerr_bitfield[IGNERR_BF_SIZE]; /* ignored verify errors in handshake if depth > 0 */ + unsigned long long crt_ignerr_bitfield[IGNERR_BF_SIZE]; /* ignored verify errors in handshake if depth == 0 */ + void *initial_ctx; /* SSL context for initial negotiation */ + void *default_ctx; /* SSL context of first/default certificate */ + struct ckch_inst *default_inst; + struct ssl_bind_conf *default_ssl_conf; /* custom SSL conf of default_ctx */ + int strict_sni; /* refuse negotiation if sni doesn't match a certificate */ + int ssl_options; /* ssl options */ + struct eb_root sni_ctx; /* sni_ctx tree of all known certs full-names sorted by name */ + struct eb_root sni_w_ctx; /* sni_ctx tree of all known certs wildcards sorted by name */ + struct tls_keys_ref *keys_ref; /* TLS ticket keys reference */ + + char *ca_sign_file; /* CAFile used to generate and sign server certificates */ + char *ca_sign_pass; /* CAKey passphrase */ + + struct ckch_data *ca_sign_ckch; /* CA and possible certificate chain for ca generation */ +#endif +#ifdef USE_QUIC + struct quic_transport_params quic_params; /* QUIC transport parameters. */ + struct quic_cc_algo *quic_cc_algo; /* QUIC control congestion algorithm */ + size_t max_cwnd; /* QUIC maximumu congestion control window size (kB) */ + enum quic_sock_mode quic_mode; /* QUIC socket allocation strategy */ +#endif + struct proxy *frontend; /* the frontend all these listeners belong to, or NULL */ + const struct mux_proto_list *mux_proto; /* the mux to use for all incoming connections (specified by the "proto" keyword) */ + struct xprt_ops *xprt; /* transport-layer operations for all listeners */ + uint options; /* set of BC_O_* flags */ + unsigned int analysers; /* bitmap of required protocol analysers */ + int maxseg; /* for TCP, advertised MSS */ + int tcp_ut; /* for TCP, user timeout */ + int maxaccept; /* if set, max number of connections accepted at once (-1 when disabled) */ + unsigned int backlog; /* if set, listen backlog */ + int maxconn; /* maximum connections allowed on this listener */ + int (*accept)(struct connection *conn); /* upper layer's accept() */ + int level; /* stats access level (ACCESS_LVL_*) */ + int severity_output; /* default severity output format in cli feedback messages */ + short int nice; /* nice value to assign to the instantiated tasks */ + /* 2-byte hole here */ + struct list listeners; /* list of listeners using this bind config */ + uint32_t ns_cip_magic; /* Excepted NetScaler Client IP magic number */ + struct list by_fe; /* next binding for the same frontend, or NULL */ + char *arg; /* argument passed to "bind" for better error reporting */ + char *file; /* file where the section appears */ + int line; /* line where the section appears */ + char *rhttp_srvname; /* name of server when using "rhttp@" address */ + int rhttp_nbconn; /* count of connections to initiate in parallel */ + __decl_thread(HA_RWLOCK_T sni_lock); /* lock the SNI trees during add/del operations */ + struct thread_set thread_set; /* entire set of the allowed threads (0=no restriction) */ + struct rx_settings settings; /* all the settings needed for the listening socket */ +}; + +/* Fields of a listener allocated per thread */ +struct li_per_thread { + struct { + struct mt_list list; /* list element in the QUIC accept queue */ + struct mt_list conns; /* list of QUIC connections from this listener ready to be accepted */ + } quic_accept; + + struct listener *li; /* back reference on the listener */ +}; + + +/* The listener will be directly referenced by the fdtab[] which holds its + * socket. The listener provides the protocol-specific accept() function to + * the fdtab. + */ +struct listener { + enum obj_type obj_type; /* object type = OBJ_TYPE_LISTENER */ + enum li_state state; /* state: NEW, INIT, ASSIGNED, LISTEN, READY, FULL */ + uint16_t flags; /* listener flags: LI_F_* */ + int luid; /* listener universally unique ID, used for SNMP */ + int nbconn; /* current number of connections on this listener */ + unsigned long thr_idx; /* thread indexes for queue distribution (see listener_accept()) */ + __decl_thread(HA_RWLOCK_T lock); + + struct fe_counters *counters; /* statistics counters */ + struct mt_list wait_queue; /* link element to make the listener wait for something (LI_LIMITED) */ + char *name; /* listener's name */ + + unsigned int thr_conn[MAX_THREADS_PER_GROUP]; /* number of connections per thread for the group */ + + struct list by_fe; /* chaining in frontend's list of listeners */ + struct list by_bind; /* chaining in bind_conf's list of listeners */ + struct bind_conf *bind_conf; /* "bind" line settings, include SSL settings among other things */ + struct receiver rx; /* network receiver parts */ + struct { + struct eb32_node id; /* place in the tree of used IDs */ + } conf; /* config information */ + + struct li_per_thread *per_thr; /* per-thread fields (one per thread in the group) */ + + EXTRA_COUNTERS(extra_counters); +}; + +/* listener flags (16 bits) */ +#define LI_F_FINALIZED 0x0001 /* listener made it to the READY||LIMITED||FULL state at least once, may be suspended/resumed safely */ +#define LI_F_SUSPENDED 0x0002 /* listener has been suspended using suspend_listener(), it is either is LI_PAUSED or LI_ASSIGNED state */ + +/* Descriptor for a "bind" keyword. The ->parse() function returns 0 in case of + * success, or a combination of ERR_* flags if an error is encountered. The + * function pointer can be NULL if not implemented. The function also has an + * access to the current "bind" config line. The ->skip value tells the parser + * how many words have to be skipped after the keyword. + */ +struct bind_kw { + const char *kw; + int (*parse)(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err); + int skip; /* nb of args to skip */ + int rhttp_ok; /* non-zero if kw is support for reverse HTTP bind */ +}; + +/* same as bind_kw but for crtlist keywords */ +struct ssl_crtlist_kw { + const char *kw; + int (*parse)(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err); + int skip; /* nb of args to skip */ +}; + +/* + * A keyword list. It is a NULL-terminated array of keywords. It embeds a + * struct list in order to be linked to other lists, allowing it to easily + * be declared where it is needed, and linked without duplicating data nor + * allocating memory. It is also possible to indicate a scope for the keywords. + */ +struct bind_kw_list { + const char *scope; + struct list list; + struct bind_kw kw[VAR_ARRAY]; +}; + +/* The per-thread accept queue ring, must be a power of two minus 1 */ +#define ACCEPT_QUEUE_SIZE ((1<<10) - 1) + +/* head and tail are both 16 bits so that idx can be accessed atomically */ +struct accept_queue_ring { + uint32_t idx; /* (head << 16) | tail */ + struct tasklet *tasklet; /* tasklet of the thread owning this ring */ + struct connection *entry[ACCEPT_QUEUE_SIZE] __attribute((aligned(64))); +}; + + +#endif /* _HAPROXY_LISTENER_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/listener.h b/include/haproxy/listener.h new file mode 100644 index 0000000..5b3dc18 --- /dev/null +++ b/include/haproxy/listener.h @@ -0,0 +1,246 @@ +/* + * include/haproxy/listener.h + * This file declares listener management primitives. + * + * Copyright (C) 2000-2012 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LISTENER_H +#define _HAPROXY_LISTENER_H + +#include <stdlib.h> +#include <string.h> + +#include <haproxy/api.h> +#include <haproxy/listener-t.h> + +struct proxy; +struct task; + +int li_init_per_thr(struct listener *li); + +/* adjust the listener's state and its proxy's listener counters if needed */ +void listener_set_state(struct listener *l, enum li_state st); + +/* This function tries to temporarily disable a listener, depending on the OS + * capabilities. Linux unbinds the listen socket after a SHUT_RD, and ignores + * SHUT_WR. Solaris refuses either shutdown(). OpenBSD ignores SHUT_RD but + * closes upon SHUT_WR and refuses to rebind. So a common validation path + * involves SHUT_WR && listen && SHUT_RD. In case of success, the FD's polling + * is disabled. It normally returns non-zero, unless an error is reported. + * It will need to operate under the proxy's lock and the listener's lock. + * suspend() may totally stop a listener if it doesn't support the PAUSED + * state, in which case state will be set to ASSIGNED. + * The caller is responsible for indicating in lpx, lli whether the respective + * locks are already held (non-zero) or not (zero) so that the function pick + * the missing ones, in this order. + */ +int suspend_listener(struct listener *l, int lpx, int lli); + +/* This function tries to resume a temporarily disabled listener. + * The resulting state will either be LI_READY or LI_FULL. 0 is returned + * in case of failure to resume (eg: dead socket). + * It will need to operate under the proxy's lock and the listener's lock. + * The caller is responsible for indicating in lpx, lli whether the respective + * locks are already held (non-zero) or not (zero) so that the function pick + * the missing ones, in this order. + */ +int resume_listener(struct listener *l, int lpx, int lli); + +/* Same as resume_listener(), but will only work to resume from + * LI_FULL or LI_LIMITED states because we try to relax listeners that + * were temporarily restricted and not to resume inactive listeners that + * may have been paused or completely stopped in the meantime. + * Returns positive value for success and 0 for failure. + * It will need to operate under the proxy's lock and the listener's lock. + * The caller is responsible for indicating in lpx, lli whether the respective + * locks are already held (non-zero) or not (zero) so that the function pick + * the missing ones, in this order. + */ +int relax_listener(struct listener *l, int lpx, int lli); + +/* + * This function completely stops a listener. It will need to operate under the + * proxy's lock, the protocol's and the listener's lock. The caller is + * responsible for indicating in lpx, lpr, lli whether the respective locks are + * already held (non-zero) or not (zero) so that the function picks the missing + * ones, in this order. + */ +void stop_listener(struct listener *l, int lpx, int lpr, int lli); + +/* This function adds the specified listener's file descriptor to the polling + * lists if it is in the LI_LISTEN state. The listener enters LI_READY or + * LI_FULL state depending on its number of connections. In daemon mode, we + * also support binding only the relevant processes to their respective + * listeners. We don't do that in debug mode however. + */ +void enable_listener(struct listener *listener); + +/* Dequeues all listeners waiting for a resource the global wait queue */ +void dequeue_all_listeners(void); + +/* Dequeues all listeners waiting for a resource in proxy <px>'s queue */ +void dequeue_proxy_listeners(struct proxy *px); + +/* This function closes the listening socket for the specified listener, + * provided that it's already in a listening state. The listener enters the + * LI_ASSIGNED state, except if the FD is not closed, in which case it may + * remain in LI_LISTEN. Depending on the process's status (master or worker), + * the listener's bind options and the receiver's origin, it may or may not + * close the receiver's FD. Must be called with the lock held. + */ +void do_unbind_listener(struct listener *listener); + +/* This function closes the listening socket for the specified listener, + * provided that it's already in a listening state. The listener enters the + * LI_ASSIGNED state, except if the FD is not closed, in which case it may + * remain in LI_LISTEN. This function is intended to be used as a generic + * function for standard protocols. + */ +void unbind_listener(struct listener *listener); + +/* creates one or multiple listeners for bind_conf <bc> on sockaddr <ss> on port + * range <portl> to <porth>, and possibly attached to fd <fd> (or -1 for auto + * allocation). The address family is taken from ss->ss_family, and the protocol + * passed in <proto> must be usable on this family. The number of jobs and + * listeners is automatically increased by the number of listeners created. It + * returns non-zero on success, zero on error with the error message set in <err>. + */ +int create_listeners(struct bind_conf *bc, const struct sockaddr_storage *ss, + int portl, int porth, int fd, struct protocol *proto, char **err); +struct shard_info *shard_info_attach(struct receiver *rx, struct shard_info *si); +void shard_info_detach(struct receiver *rx); +struct listener *clone_listener(struct listener *src); + +/* Delete a listener from its protocol's list of listeners. The listener's + * state is automatically updated from LI_ASSIGNED to LI_INIT. The protocol's + * number of listeners is updated. Note that the listener must have previously + * been unbound. This is the generic function to use to remove a listener. + */ +void delete_listener(struct listener *listener); +void __delete_listener(struct listener *listener); + +/* This function is called on a read event from a listening socket, corresponding + * to an accept. It tries to accept as many connections as possible, and for each + * calls the listener's accept handler (generally the frontend's accept handler). + */ +void listener_accept(struct listener *l); + +/* Returns a suitable value for a listener's backlog. It uses the listener's, + * otherwise the frontend's backlog, otherwise the listener's maxconn, + * otherwise the frontend's maxconn, otherwise 1024. + */ +int listener_backlog(const struct listener *l); + +/* Notify the listener that a connection initiated from it was released. This + * is used to keep the connection count consistent and to possibly re-open + * listening when it was limited. + */ +void listener_release(struct listener *l); + +/* This function adds the specified <listener> to the protocol <proto>. It + * does nothing if the protocol was already added. The listener's state is + * automatically updated from LI_INIT to LI_ASSIGNED. The number of listeners + * for the protocol is updated. This must be called with the proto lock held. + */ +void default_add_listener(struct protocol *proto, struct listener *listener); + +/* default function used to unbind a listener. This is for use by standard + * protocols working on top of accepted sockets. The receiver's rx_unbind() + * will automatically be used after the listener is disabled if the socket is + * still bound. This must be used under the listener's lock. + */ +void default_unbind_listener(struct listener *listener); + +/* default function called to suspend a listener: it simply passes the call to + * the underlying receiver. This is find for most socket-based protocols. This + * must be called under the listener's lock. It will return non-zero on success, + * 0 on failure. If no receiver-level suspend is provided, the operation is + * assumed to succeed. + */ +int default_suspend_listener(struct listener *l); + +/* Tries to resume a suspended listener, and returns non-zero on success or + * zero on failure. On certain errors, an alert or a warning might be displayed. + * It must be called with the listener's lock held. Depending on the listener's + * state and protocol, a listen() call might be used to resume operations, or a + * call to the receiver's resume() function might be used as well. This is + * suitable as a default function for TCP and UDP. This must be called with the + * listener's lock held. + */ +int default_resume_listener(struct listener *l); + +/* Applies the thread mask, shards etc to the bind_conf. It normally returns 0 + * otherwie the number of errors. Upon error it may set error codes (ERR_*) in + * err_code. It is supposed to be called only once very late in the boot process + * after the bind_conf's thread_set is fixed. The function may emit warnings and + * alerts. Extra listeners may be created on the fly. + */ +int bind_complete_thread_setup(struct bind_conf *bind_conf, int *err_code); + +/* + * Registers the bind keyword list <kwl> as a list of valid keywords for next + * parsing sessions. + */ +void bind_register_keywords(struct bind_kw_list *kwl); + +/* Return a pointer to the bind keyword <kw>, or NULL if not found. */ +struct bind_kw *bind_find_kw(const char *kw); + +/* Dumps all registered "bind" keywords to the <out> string pointer. */ +void bind_dump_kws(char **out); +const char *bind_find_best_kw(const char *word); +int bind_parse_args_list(struct bind_conf *bind_conf, char **args, int cur_arg, + const char *section, const char *file, int linenum); + +void bind_recount_thread_bits(struct bind_conf *conf); +unsigned int bind_map_thread_id(const struct bind_conf *conf, unsigned int r); +struct bind_conf *bind_conf_alloc(struct proxy *fe, const char *file, + int line, const char *arg, struct xprt_ops *xprt); +const char *listener_state_str(const struct listener *l); +struct task *accept_queue_process(struct task *t, void *context, unsigned int state); +struct task *manage_global_listener_queue(struct task *t, void *context, unsigned int state); + +extern struct accept_queue_ring accept_queue_rings[MAX_THREADS] __attribute__((aligned(64))); + +extern const char* li_status_st[LI_STATE_COUNT]; +enum li_status get_li_status(struct listener *l); + +/* number of times an accepted connection resulted in maxconn being reached */ +extern ullong maxconn_reached; + +static inline uint accept_queue_ring_len(const struct accept_queue_ring *ring) +{ + uint idx, head, tail, len; + + idx = _HA_ATOMIC_LOAD(&ring->idx); /* (head << 16) + tail */ + head = idx >> 16; + tail = idx & 0xffff; + len = tail + ACCEPT_QUEUE_SIZE - head; + if (len >= ACCEPT_QUEUE_SIZE) + len -= ACCEPT_QUEUE_SIZE; + return len; +} + +#endif /* _HAPROXY_LISTENER_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/log-t.h b/include/haproxy/log-t.h new file mode 100644 index 0000000..a0a25ac --- /dev/null +++ b/include/haproxy/log-t.h @@ -0,0 +1,277 @@ +/* + * include/haproxy/log-t.h + * This file contains definitions of log-related structures and macros. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LOG_T_H +#define _HAPROXY_LOG_T_H + +#include <sys/socket.h> +#include <sys/un.h> +#include <netinet/in.h> + +#include <haproxy/api-t.h> +#include <haproxy/ring-t.h> +#include <haproxy/thread-t.h> + + +#define NB_LOG_FACILITIES 24 +#define NB_LOG_LEVELS 8 +#define NB_LOG_HDR_MAX_ELEMENTS 15 +#define SYSLOG_PORT 514 +#define UNIQUEID_LEN 128 + +/* flags used in logformat_node->options */ +#define LOG_OPT_HEXA 0x00000001 +#define LOG_OPT_MANDATORY 0x00000002 +#define LOG_OPT_QUOTE 0x00000004 +#define LOG_OPT_REQ_CAP 0x00000008 +#define LOG_OPT_RES_CAP 0x00000010 +#define LOG_OPT_HTTP 0x00000020 +#define LOG_OPT_ESC 0x00000040 +#define LOG_OPT_MERGE_SPACES 0x00000080 + + +/* Fields that need to be extracted from the incoming connection or request for + * logging or for sending specific header information. They're set in px->to_log + * and appear as flags in session->logs.logwait, which are removed once the + * required information has been collected. + */ +#define LW_INIT 1 /* anything */ +#define LW_CLIP 2 /* CLient IP */ +#define LW_SVIP 4 /* SerVer IP */ +#define LW_SVID 8 /* server ID */ +#define LW_REQ 16 /* http REQuest */ +#define LW_RESP 32 /* http RESPonse */ +#define LW_BYTES 256 /* bytes read from server */ +#define LW_COOKIE 512 /* captured cookie */ +#define LW_REQHDR 1024 /* request header(s) */ +#define LW_RSPHDR 2048 /* response header(s) */ +#define LW_BCKIP 4096 /* backend IP */ +#define LW_FRTIP 8192 /* frontend IP */ +#define LW_XPRT 16384 /* transport layer information (eg: SSL) */ + +#define LOG_LEGACYTIME_LEN 15 +#define LOG_ISOTIME_MINLEN 20 +#define LOG_ISOTIME_MAXLEN 32 + +/* enum for log format */ +enum log_fmt { + LOG_FORMAT_UNSPEC = 0, + LOG_FORMAT_LOCAL, + LOG_FORMAT_RFC3164, + LOG_FORMAT_RFC5424, + LOG_FORMAT_PRIO, + LOG_FORMAT_SHORT, + LOG_FORMAT_TIMED, + LOG_FORMAT_ISO, + LOG_FORMAT_RAW, + LOG_FORMATS /* number of supported log formats, must always be last */ +}; + +/* enum log header meta data */ +enum log_meta { + LOG_META_PRIO, + LOG_META_TIME, + LOG_META_HOST, + LOG_META_TAG, + LOG_META_PID, + LOG_META_MSGID, + LOG_META_STDATA, + LOG_META_FIELDS /* must always be the last */ +}; + +/* log header data */ +struct log_header { + enum log_fmt format; /* how to format the header */ + int level, facility; /* used by several formats */ + struct ist *metadata; /* optional metadata - per-format */ +}; + +#define LOG_HEADER_NONE (struct log_header){ \ + .format = LOG_FORMAT_UNSPEC, \ + .level = 0, \ + .facility = 0, \ + .metadata = NULL \ + } + +/* log target types */ +enum log_tgt { + LOG_TARGET_DGRAM = 0, // datagram address (udp, unix socket) + LOG_TARGET_FD, // file descriptor + LOG_TARGET_BUFFER, // ring buffer + LOG_TARGET_BACKEND, // backend with SYSLOG mode +}; + +/* lists of fields that can be logged, for logformat_node->type */ +enum { + + LOG_FMT_TEXT = 0, /* raw text */ + LOG_FMT_EXPR, /* sample expression */ + LOG_FMT_SEPARATOR, /* separator replaced by one space */ + + /* information fields */ + LOG_FMT_GLOBAL, + LOG_FMT_CLIENTIP, + LOG_FMT_CLIENTPORT, + LOG_FMT_BACKENDIP, + LOG_FMT_BACKENDPORT, + LOG_FMT_FRONTENDIP, + LOG_FMT_FRONTENDPORT, + LOG_FMT_SERVERPORT, + LOG_FMT_SERVERIP, + LOG_FMT_COUNTER, + LOG_FMT_LOGCNT, + LOG_FMT_PID, + LOG_FMT_DATE, + LOG_FMT_DATEGMT, + LOG_FMT_DATELOCAL, + LOG_FMT_TS, + LOG_FMT_MS, + LOG_FMT_FRONTEND, + LOG_FMT_FRONTEND_XPRT, + LOG_FMT_BACKEND, + LOG_FMT_SERVER, + LOG_FMT_BYTES, + LOG_FMT_BYTES_UP, + LOG_FMT_Ta, + LOG_FMT_Th, + LOG_FMT_Ti, + LOG_FMT_TQ, + LOG_FMT_TW, + LOG_FMT_TC, + LOG_FMT_Tr, + LOG_FMT_tr, + LOG_FMT_trg, + LOG_FMT_trl, + LOG_FMT_TR, + LOG_FMT_TD, + LOG_FMT_TT, + LOG_FMT_TU, + LOG_FMT_STATUS, + LOG_FMT_CCLIENT, + LOG_FMT_CSERVER, + LOG_FMT_TERMSTATE, + LOG_FMT_TERMSTATE_CK, + LOG_FMT_ACTCONN, + LOG_FMT_FECONN, + LOG_FMT_BECONN, + LOG_FMT_SRVCONN, + LOG_FMT_RETRIES, + LOG_FMT_SRVQUEUE, + LOG_FMT_BCKQUEUE, + LOG_FMT_HDRREQUEST, + LOG_FMT_HDRRESPONS, + LOG_FMT_HDRREQUESTLIST, + LOG_FMT_HDRRESPONSLIST, + LOG_FMT_REQ, + LOG_FMT_HTTP_METHOD, + LOG_FMT_HTTP_URI, + LOG_FMT_HTTP_PATH, + LOG_FMT_HTTP_PATH_ONLY, + LOG_FMT_HTTP_QUERY, + LOG_FMT_HTTP_VERSION, + LOG_FMT_HOSTNAME, + LOG_FMT_UNIQUEID, + LOG_FMT_SSL_CIPHER, + LOG_FMT_SSL_VERSION, +}; + +/* enum for parse_logformat_string */ +enum { + LF_INIT = 0, // before first character + LF_TEXT, // normal text + LF_SEPARATOR, // a single separator + LF_VAR, // variable name, after '%' or '%{..}' + LF_STARTVAR, // % in text + LF_STARG, // after '%{' and berore '}' + LF_EDARG, // '}' after '%{' + LF_STEXPR, // after '%[' or '%{..}[' and berore ']' + LF_EDEXPR, // ']' after '%[' + LF_END, // \0 found +}; + + +struct logformat_node { + struct list list; + int type; // LOG_FMT_* + int options; // LOG_OPT_* + char *arg; // text for LOG_FMT_TEXT, arg for others + void *expr; // for use with LOG_FMT_EXPR +}; + +/* Range of indexes for log sampling. */ +struct smp_log_range { + unsigned int low; /* Low limit of the indexes of this range. */ + unsigned int high; /* High limit of the indexes of this range. */ + size_t sz; /* The size of this range, or number of indexes in + * this range. + */ +}; + +/* Log sampling information. */ +struct smp_info { + struct smp_log_range *smp_rgs; /* Array of ranges for log sampling. */ + size_t smp_rgs_sz; /* The size of <smp_rgs> array. */ + size_t smp_sz; /* The total number of logs to be sampled. */ + ullong curr_rg_idx; /* 63:32 = current range; 31:0 = current index */ +}; + +enum log_target_flags { + LOG_TARGET_FL_NONE = 0x00, + LOG_TARGET_FL_RESOLVED = 0x01 +}; + +struct log_target { + struct sockaddr_storage *addr; + union { + char *ring_name; /* type = BUFFER - preparsing */ + struct sink *sink; /* type = BUFFER - postparsing */ + char *be_name; /* type = BACKEND - preparsing */ + struct proxy *be; /* type = BACKEND - postparsing */ + char *resolv_name; /* generic - preparsing */ + }; + enum log_tgt type; + uint16_t flags; +}; + +struct logger { + struct list list; + struct log_target target; + struct smp_info lb; + enum log_fmt format; + int facility; + int level; + int minlvl; + int maxlen; + struct logger *ref; + struct { + char *file; /* file where the logger appears */ + int line; /* line where the logger appears */ + } conf; +}; + +#endif /* _HAPROXY_LOG_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/log.h b/include/haproxy/log.h new file mode 100644 index 0000000..68b8207 --- /dev/null +++ b/include/haproxy/log.h @@ -0,0 +1,195 @@ +/* + * include/haproxy/log.h + * This file contains definitions of log-related functions. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LOG_H +#define _HAPROXY_LOG_H + +#include <syslog.h> + +#include <haproxy/api.h> +#include <haproxy/log-t.h> +#include <haproxy/pool-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/stream.h> + +extern struct pool_head *pool_head_requri; +extern struct pool_head *pool_head_uniqueid; + +extern const char *log_levels[]; +extern char *log_format; +extern char httpclient_log_format[]; +extern char default_tcp_log_format[]; +extern char default_http_log_format[]; +extern char clf_http_log_format[]; +extern char default_https_log_format[]; + +extern char default_rfc5424_sd_log_format[]; + +extern const char sess_term_cond[]; +extern const char sess_fin_state[]; + +extern unsigned int dropped_logs; + +/* lof forward proxy list */ +extern struct proxy *cfg_log_forward; + +extern THREAD_LOCAL char *logline; +extern THREAD_LOCAL char *logline_rfc5424; + +/* global syslog message counter */ +extern int cum_log_messages; + +/* syslog UDP message handler */ +void syslog_fd_handler(int fd); + +/* Initialize/Deinitialize log buffers used for syslog messages */ +int init_log_buffers(void); +void deinit_log_buffers(void); + +/* build a log line for the session and an optional stream */ +int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t maxsize, struct list *list_format); + +/* + * send a log for the stream when we have enough info about it. + * Will not log if the frontend has no log defined. + */ +void strm_log(struct stream *s); +void sess_log(struct session *sess); + +/* send a applicative log with custom list of loggers */ +void app_log(struct list *loggers, struct buffer *tag, int level, const char *format, ...) + __attribute__ ((format(printf, 4, 5))); + +/* + * add to the logformat linked list + */ +int add_to_logformat_list(char *start, char *end, int type, struct list *list_format, char **err); + +/* + * Parse the log_format string and fill a linked list. + * Variable name are preceded by % and composed by characters [a-zA-Z0-9]* : %varname + * You can set arguments using { } : %{many arguments}varname + */ +int parse_logformat_string(const char *str, struct proxy *curproxy, struct list *list_format, int options, int cap, char **err); + +int postresolve_logger_list(struct list *loggers, const char *section, const char *section_name); + +struct logger *dup_logger(struct logger *def); +void free_logger(struct logger *logger); +void deinit_log_target(struct log_target *target); + +/* Parse "log" keyword and update the linked list. */ +int parse_logger(char **args, struct list *loggers, int do_del, const char *file, int linenum, char **err); + +/* + * This function adds a header to the message and sends the syslog message + * using a printf format string + */ +void send_log(struct proxy *p, int level, const char *format, ...) + __attribute__ ((format(printf, 3, 4))); + +/* + * This function sends a syslog message to all loggers of a proxy, + * or to global loggers if the proxy is NULL. + * It also tries not to waste too much time computing the message header. + * It doesn't care about errors nor does it report them. + */ + +void __send_log(struct list *loggers, struct buffer *tag, int level, char *message, size_t size, char *sd, size_t sd_size); + +/* + * returns log format for <fmt> or LOG_FORMAT_UNSPEC if not found. + */ +enum log_fmt get_log_format(const char *fmt); + +/* + * returns log level for <lev> or -1 if not found. + */ +int get_log_level(const char *lev); + +/* + * returns log facility for <fac> or -1 if not found. + */ +int get_log_facility(const char *fac); + +/* + * Write a string in the log string + * Take cares of quote options + * + * Return the address of the \0 character, or NULL on error + */ +char *lf_text_len(char *dst, const char *src, size_t len, size_t size, const struct logformat_node *node); + +/* + * Write a IP address to the log string + * +X option write in hexadecimal notation, most significant byte on the left + */ +char *lf_ip(char *dst, const struct sockaddr *sockaddr, size_t size, const struct logformat_node *node); + +/* + * Write a port to the log + * +X option write in hexadecimal notation, most significant byte on the left + */ +char *lf_port(char *dst, const struct sockaddr *sockaddr, size_t size, const struct logformat_node *node); + + +/* + * Function to handle log header building (exported for sinks) + */ +char *update_log_hdr_rfc5424(const time_t time, suseconds_t frac); +char *update_log_hdr(const time_t time); +char * get_format_pid_sep1(int format, size_t *len); +char * get_format_pid_sep2(int format, size_t *len); + +/* + * Builds a log line for the stream (must be valid). + */ +static inline int build_logline(struct stream *s, char *dst, size_t maxsize, struct list *list_format) +{ + return sess_build_logline(strm_sess(s), s, dst, maxsize, list_format); +} + +struct ist *build_log_header(struct log_header hdr, size_t *nbelem); + +/* + * lookup log forward proxy by name + * Returns NULL if no proxy found. + */ +static inline struct proxy *log_forward_by_name(const char *name) +{ + struct proxy *px = cfg_log_forward; + + while (px) { + if (strcmp(px->id, name) == 0) + return px; + px = px->next; + } + return NULL; +} + +#endif /* _HAPROXY_LOG_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/mailers-t.h b/include/haproxy/mailers-t.h new file mode 100644 index 0000000..0fa3197 --- /dev/null +++ b/include/haproxy/mailers-t.h @@ -0,0 +1,83 @@ +/* + * include/haproxy/mailer-t.h + * This file defines everything related to mailer. + * + * Copyright 2015 Horms Solutions Ltd., Simon Horman <horms@verge.net.au> + * + * Based on include/haproxy/peers-t.h + * + * Copyright 2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_MAILERS_T_H +#define _HAPROXY_MAILERS_T_H + +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include <haproxy/check-t.h> +#include <haproxy/tcpcheck-t.h> +#include <haproxy/thread-t.h> + +struct mailer { + char *id; + struct mailers *mailers; + struct { + const char *file; /* file where the section appears */ + int line; /* line where the section appears */ + } conf; /* config information */ + struct sockaddr_storage addr; /* SMTP server address */ + struct protocol *proto; /* SMTP server address's protocol */ + struct xprt_ops *xprt; /* SMTP server socket operations at transport layer */ + void *sock_init_arg; /* socket operations's opaque init argument if needed */ + struct mailer *next; /* next mailer in the list */ +}; + +struct mailers { + char *id; /* mailers section name */ + struct mailer *mailer_list; /* mailers in this mailers section */ + struct { + const char *file; /* file where the section appears */ + int line; /* line where the section appears */ + } conf; /* config information */ + struct mailers *next; /* next mailers section */ + int count; /* total number of mailers in this mailers section */ + int users; /* number of users of this mailers section */ + struct { /* time to: */ + int mail; /* try connecting to mailserver and sending a email */ + } timeout; +}; + +struct email_alert { + struct list list; + struct tcpcheck_rules rules; + struct server *srv; +}; + +struct email_alertq { + struct list email_alerts; + struct check check; /* Email alerts are implemented using existing check + * code even though they are not checks. This structure + * is as a parameter to the check code. + * Each check corresponds to a mailer */ + __decl_thread(HA_SPINLOCK_T lock); +}; + +#endif /* _HAPROXY_MAILERS_T_H */ + diff --git a/include/haproxy/mailers.h b/include/haproxy/mailers.h new file mode 100644 index 0000000..89aa1b0 --- /dev/null +++ b/include/haproxy/mailers.h @@ -0,0 +1,42 @@ +/* + * include/haproxy/mailer.h + * This file lists exported variables and functions for mailers. + * + * Copyright 2015 Horms Solutions Ltd., Simon Horman <horms@verge.net.au> + * Copyright 2020 Willy Tarreau <w@1wt.eu> + * + * Based on include/haproxy/peers-t.h + * + * Copyright 2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_MAILERS_H +#define _HAPROXY_MAILERS_H + +#include <haproxy/mailers-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/server-t.h> + +extern struct mailers *mailers; +extern int send_email_disabled; + +int init_email_alert(struct mailers *mailers, struct proxy *p, char **err); +void send_email_alert(struct server *s, int priority, const char *format, ...) + __attribute__ ((format(printf, 3, 4))); + + +#endif /* _HAPROXY_MAILERS_H */ diff --git a/include/haproxy/map-t.h b/include/haproxy/map-t.h new file mode 100644 index 0000000..d6085ee --- /dev/null +++ b/include/haproxy/map-t.h @@ -0,0 +1,34 @@ +/* + * include/haproxy/map-t.h + * This file provides structures and types for MAPs. + * + * Copyright (C) 2000-2012 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_MAP_T_H +#define _HAPROXY_MAP_T_H + +#include <haproxy/pattern-t.h> +#include <haproxy/sample-t.h> + +struct map_descriptor { + struct sample_conv *conv; /* original converter descriptor */ + struct pattern_head pat; /* the pattern matching associated to the map */ + int do_free; /* set if <pat> is the original pat and must be freed */ +}; + +#endif /* _HAPROXY_MAP_T_H */ diff --git a/include/haproxy/map.h b/include/haproxy/map.h new file mode 100644 index 0000000..3ec3418 --- /dev/null +++ b/include/haproxy/map.h @@ -0,0 +1,39 @@ +/* + * include/haproxy/map.h + * This file provides structures and types for pattern matching. + * + * Copyright (C) 2000-2013 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_MAP_H +#define _HAPROXY_MAP_H + +#include <haproxy/map-t.h> +#include <haproxy/sample-t.h> + +/* maps output sample parser */ +int map_parse_ip(const char *text, struct sample_data *data); +int map_parse_ip6(const char *text, struct sample_data *data); +int map_parse_str(const char *text, struct sample_data *data); +int map_parse_int(const char *text, struct sample_data *data); + +struct map_reference *map_get_reference(const char *reference); + +int sample_load_map(struct arg *arg, struct sample_conv *conv, + const char *file, int line, char **err); + +#endif /* _HAPROXY_MAP_H */ diff --git a/include/haproxy/mqtt-t.h b/include/haproxy/mqtt-t.h new file mode 100644 index 0000000..51f55ea --- /dev/null +++ b/include/haproxy/mqtt-t.h @@ -0,0 +1,310 @@ +/* + * include/haproxy/mqtt.h + * This file contains structure declarations for MQTT protocol. + * + * Copyright 2020 Baptiste Assmann <bedis9@gmail.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_MQTT_T_H +#define _HAPROXY_MQTT_T_H + +#include <import/ist.h> + +/* MQTT protocol version + * In MQTT 3.1.1, version is called "level" + */ +#define MQTT_VERSION_3_1 3 +#define MQTT_VERSION_3_1_1 4 +#define MQTT_VERSION_5_0 5 + +/* + * return code when parsing / validating MQTT messages + */ +#define MQTT_INVALID_MESSAGE -1 +#define MQTT_NEED_MORE_DATA 0 +#define MQTT_VALID_MESSAGE 1 + + +/* + * MQTT Control Packet Type: MQTT_CPT_* + * + * Part of the fixed headers, encoded on the first packet byte : + * + * +-------+-----------+-----------+-----------+---------+----------+----------+---------+------------+ + * | bit | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + * +-------+-----------+-----------+-----------+---------+----------+----------+---------+------------+ + * | field | MQTT Control Packet Type | Flags specific to each Control Packet type | + * +-------+---------------------------------------------+--------------------------------------------+ + * + * Don't forget to "left offset by 4 bits (<< 4)" the values below when matching against the fixed + * header collected in a MQTT packet. + * + * value 0x0 is reserved and forbidden + */ +enum { + MQTT_CPT_INVALID = 0, + + MQTT_CPT_CONNECT, + MQTT_CPT_CONNACK, + MQTT_CPT_PUBLISH, + MQTT_CPT_PUBACK, + MQTT_CPT_PUBREC, + MQTT_CPT_PUBREL, + MQTT_CPT_PUBCOMP, + MQTT_CPT_SUBSCRIBE, + MQTT_CPT_SUBACK, + MQTT_CPT_UNSUBSCRIBE, + MQTT_CPT_UNSUBACK, + MQTT_CPT_PINGREQ, + MQTT_CPT_PINGRESP, + MQTT_CPT_DISCONNECT, + MQTT_CPT_AUTH, + MQTT_CPT_ENTRIES /* used to mark the end/size of our MQTT_CPT_* list */ +}; + +/* MQTT CONNECT packet flags */ +#define MQTT_CONNECT_FL_RESERVED 0x01 +#define MQTT_CONNECT_FL_CLEAN_SESSION 0x02 +#define MQTT_CONNECT_FL_WILL 0x04 +#define MQTT_CONNECT_FL_WILL_QOS 0x18 /* covers 2 bits 00011000 */ +#define MQTT_CONNECT_FL_WILL_RETAIN 0x20 +#define MQTT_CONNECT_FL_PASSWORD 0x40 +#define MQTT_CONNECT_FL_USERNAME 0x80 + +/* MQTT packet properties identifiers + * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901029 + */ +#define MQTT_PROP_PAYLOAD_FORMAT_INDICATOR 0x01 +#define MQTT_PROP_MESSAGE_EXPIRY_INTERVAL 0x02 +#define MQTT_PROP_CONTENT_TYPE 0x03 +#define MQTT_PROP_RESPONSE_TOPIC 0x08 +#define MQTT_PROP_CORRELATION_DATA 0x09 +#define MQTT_PROP_SESSION_EXPIRY_INTERVAL 0x11 +#define MQTT_PROP_ASSIGNED_CLIENT_IDENTIFIER 0x12 +#define MQTT_PROP_SERVER_KEEPALIVE 0x13 +#define MQTT_PROP_AUTHENTICATION_METHOD 0x15 +#define MQTT_PROP_AUTHENTICATION_DATA 0x16 +#define MQTT_PROP_REQUEST_PROBLEM_INFORMATION 0x17 +#define MQTT_PROP_WILL_DELAY_INTERVAL 0x18 +#define MQTT_PROP_REQUEST_RESPONSE_INFORMATION 0x19 +#define MQTT_PROP_RESPONSE_INFORMATION 0x1A +#define MQTT_PROP_SERVER_REFERENCE 0x1C +#define MQTT_PROP_RECEIVE_MAXIMUM 0x21 +#define MQTT_PROP_TOPIC_ALIAS_MAXIMUM 0x22 +#define MQTT_PROP_MAXIMUM_QOS 0x24 +#define MQTT_PROP_RETAIN_AVAILABLE 0x25 +#define MQTT_PROP_USER_PROPERTIES 0x26 +#define MQTT_PROP_MAXIMUM_PACKET_SIZE 0x27 +#define MQTT_PROP_WILDCARD_SUBSCRIPTION_AVAILABLE 0x28 +#define MQTT_PROP_SUBSCRIPTION_IDENTIFIERS_AVAILABLE 0x29 +#define MQTT_PROP_SHARED_SUBSRIPTION_AVAILABLE 0x2A +#define MQTT_PROP_REASON_STRING 0x1F +#define MQTT_PROP_LAST 0xFF + +/* MQTT minimal packet size */ +#define MQTT_MIN_PKT_SIZE 2 +#define MQTT_REMAINING_LENGHT_MAX_SIZE 4 + +/* list of supported capturable Field Names and configuration file string */ +enum { + MQTT_FN_INVALID = 0, + + MQTT_FN_FLAGS, + MQTT_FN_REASON_CODE, + MQTT_FN_PROTOCOL_NAME, + MQTT_FN_PROTOCOL_VERSION, + MQTT_FN_CLIENT_IDENTIFIER, + MQTT_FN_WILL_TOPIC, + MQTT_FN_WILL_PAYLOAD, + MQTT_FN_USERNAME, + MQTT_FN_PASSWORD, + MQTT_FN_KEEPALIVE, + + /* MQTT 5.0 properties + * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901029 + */ + MQTT_FN_PAYLOAD_FORMAT_INDICATOR, + MQTT_FN_MESSAGE_EXPIRY_INTERVAL, + MQTT_FN_CONTENT_TYPE, + MQTT_FN_RESPONSE_TOPIC, + MQTT_FN_CORRELATION_DATA, + MQTT_FN_SUBSCRIPTION_IDENTIFIER, + MQTT_FN_SESSION_EXPIRY_INTERVAL, + MQTT_FN_ASSIGNED_CLIENT_IDENTIFIER, + MQTT_FN_SERVER_KEEPALIVE, + MQTT_FN_AUTHENTICATION_METHOD, + MQTT_FN_AUTHENTICATION_DATA, + MQTT_FN_REQUEST_PROBLEM_INFORMATION, + MQTT_FN_DELAY_INTERVAL, + MQTT_FN_REQUEST_RESPONSE_INFORMATION, + MQTT_FN_RESPONSE_INFORMATION, + MQTT_FN_SERVER_REFERENCE, + MQTT_FN_REASON_STRING, + MQTT_FN_RECEIVE_MAXIMUM, + MQTT_FN_TOPIC_ALIAS_MAXIMUM, + MQTT_FN_TOPIC_ALIAS, + MQTT_FN_MAXIMUM_QOS, + MQTT_FN_RETAIN_AVAILABLE, + MQTT_FN_USER_PROPERTY, + MQTT_FN_MAXIMUM_PACKET_SIZE, + MQTT_FN_WILDCARD_SUBSCRIPTION_AVAILABLE, + MQTT_FN_SUBSCRIPTION_IDENTIFIERS_AVAILABLE, + MQTT_FN_SHARED_SUBSCRIPTION_AVAILABLE, + + MQTT_FN_ENTRIES /* this one must always be the latest one */ +}; + +/* MQTT field string bit, for easy match using bitmasks + * ATTENTION: "user-properties" are not supported for now + */ +enum { + MQTT_FN_BIT_FLAGS = (1ULL << MQTT_FN_FLAGS), + MQTT_FN_BIT_REASON_CODE = (1ULL << MQTT_FN_REASON_CODE), + MQTT_FN_BIT_PROTOCOL_NAME = (1ULL << MQTT_FN_PROTOCOL_NAME), + MQTT_FN_BIT_PROTOCOL_VERSION = (1ULL << MQTT_FN_PROTOCOL_VERSION), + MQTT_FN_BIT_CLIENT_IDENTIFIER = (1ULL << MQTT_FN_CLIENT_IDENTIFIER), + MQTT_FN_BIT_WILL_TOPIC = (1ULL << MQTT_FN_WILL_TOPIC), + MQTT_FN_BIT_WILL_PAYLOAD = (1ULL << MQTT_FN_WILL_PAYLOAD), + MQTT_FN_BIT_USERNAME = (1ULL << MQTT_FN_USERNAME), + MQTT_FN_BIT_PASSWORD = (1ULL << MQTT_FN_PASSWORD), + MQTT_FN_BIT_KEEPALIVE = (1ULL << MQTT_FN_KEEPALIVE), + MQTT_FN_BIT_PAYLOAD_FORMAT_INDICATOR = (1ULL << MQTT_FN_PAYLOAD_FORMAT_INDICATOR), + MQTT_FN_BIT_MESSAGE_EXPIRY_INTERVAL = (1ULL << MQTT_FN_MESSAGE_EXPIRY_INTERVAL), + MQTT_FN_BIT_CONTENT_TYPE = (1ULL << MQTT_FN_CONTENT_TYPE), + MQTT_FN_BIT_RESPONSE_TOPIC = (1ULL << MQTT_FN_RESPONSE_TOPIC), + MQTT_FN_BIT_CORRELATION_DATA = (1ULL << MQTT_FN_CORRELATION_DATA), + MQTT_FN_BIT_SUBSCRIPTION_IDENTIFIER = (1ULL << MQTT_FN_SUBSCRIPTION_IDENTIFIER), + MQTT_FN_BIT_SESSION_EXPIRY_INTERVAL = (1ULL << MQTT_FN_SESSION_EXPIRY_INTERVAL), + MQTT_FN_BIT_ASSIGNED_CLIENT_IDENTIFIER = (1ULL << MQTT_FN_ASSIGNED_CLIENT_IDENTIFIER), + MQTT_FN_BIT_SERVER_KEEPALIVE = (1ULL << MQTT_FN_SERVER_KEEPALIVE), + MQTT_FN_BIT_AUTHENTICATION_METHOD = (1ULL << MQTT_FN_AUTHENTICATION_METHOD), + MQTT_FN_BIT_AUTHENTICATION_DATA = (1ULL << MQTT_FN_AUTHENTICATION_DATA), + MQTT_FN_BIT_REQUEST_PROBLEM_INFORMATION = (1ULL << MQTT_FN_REQUEST_PROBLEM_INFORMATION), + MQTT_FN_BIT_DELAY_INTERVAL = (1ULL << MQTT_FN_DELAY_INTERVAL), + MQTT_FN_BIT_REQUEST_RESPONSE_INFORMATION = (1ULL << MQTT_FN_REQUEST_RESPONSE_INFORMATION), + MQTT_FN_BIT_RESPONSE_INFORMATION = (1ULL << MQTT_FN_RESPONSE_INFORMATION), + MQTT_FN_BIT_SERVER_REFERENCE = (1ULL << MQTT_FN_SERVER_REFERENCE), + MQTT_FN_BIT_REASON_STRING = (1ULL << MQTT_FN_REASON_STRING), + MQTT_FN_BIT_RECEIVE_MAXIMUM = (1ULL << MQTT_FN_RECEIVE_MAXIMUM), + MQTT_FN_BIT_TOPIC_ALIAS_MAXIMUM = (1ULL << MQTT_FN_TOPIC_ALIAS_MAXIMUM), + MQTT_FN_BIT_TOPIC_ALIAS = (1ULL << MQTT_FN_TOPIC_ALIAS), + MQTT_FN_BIT_MAXIMUM_QOS = (1ULL << MQTT_FN_MAXIMUM_QOS), + MQTT_FN_BIT_RETAIN_AVAILABLE = (1ULL << MQTT_FN_RETAIN_AVAILABLE), + MQTT_FN_BIT_USER_PROPERTY = (1ULL << MQTT_FN_USER_PROPERTY), + MQTT_FN_BIT_MAXIMUM_PACKET_SIZE = (1ULL << MQTT_FN_MAXIMUM_PACKET_SIZE), + MQTT_FN_BIT_WILDCARD_SUBSCRIPTION_AVAILABLE = (1ULL << MQTT_FN_WILDCARD_SUBSCRIPTION_AVAILABLE), + MQTT_FN_BIT_SUBSCRIPTION_IDENTIFIERS_AVAILABLE= (1ULL << MQTT_FN_SUBSCRIPTION_IDENTIFIERS_AVAILABLE), + MQTT_FN_BIT_SHARED_SUBSCRIPTION_AVAILABLE = (1ULL << MQTT_FN_SHARED_SUBSCRIPTION_AVAILABLE), +}; + +/* structure to host fields for a MQTT CONNECT packet */ +#define MQTT_PROP_USER_PROPERTY_ENTRIES 5 +struct connect { + struct { + struct ist protocol_name; + uint8_t protocol_version; + uint8_t flags; + uint16_t keepalive; + + struct { + uint32_t session_expiry_interval; + uint16_t receive_maximum; + uint32_t maximum_packet_size; + uint16_t topic_alias_maximum; + uint8_t request_response_information; + uint8_t request_problem_information; + struct { + struct ist name; + struct ist value; + } user_props[MQTT_PROP_USER_PROPERTY_ENTRIES]; + struct ist authentication_method; + struct ist authentication_data; + } props; + } var_hdr; + struct { + struct ist client_identifier; + struct { + uint32_t delay_interval; + uint8_t payload_format_indicator; + uint32_t message_expiry_interval; + struct ist content_type; + struct ist response_topic; + struct ist correlation_data; + struct { + struct ist name; + struct ist value; + } user_props[MQTT_PROP_USER_PROPERTY_ENTRIES]; + } will_props; + struct ist will_topic; + struct ist will_payload; + struct ist username; + struct ist password; + } payload; +}; + +/* structure to host fields for a MQTT CONNACK packet */ +struct connack { + struct { + uint8_t protocol_version; + uint8_t flags; + uint8_t reason_code; + struct { + uint32_t session_expiry_interval; + uint16_t receive_maximum; + uint8_t maximum_qos; + uint8_t retain_available; + uint32_t maximum_packet_size; + struct ist assigned_client_identifier; + uint16_t topic_alias_maximum; + struct ist reason_string; + struct { + struct ist name; + struct ist value; + } user_props[MQTT_PROP_USER_PROPERTY_ENTRIES]; + uint8_t wildcard_subscription_available; + uint8_t subscription_identifiers_available; + uint8_t shared_subsription_available; + uint16_t server_keepalive; + struct ist response_information; + struct ist server_reference; + struct ist authentication_method; + struct ist authentication_data; + } props; + } var_hdr; +}; + +/* structure to host a MQTT packet */ +struct mqtt_pkt { + struct { + uint8_t type; /* MQTT_CPT_* */ + uint8_t flags; /* MQTT_CPT_FL* */ + uint32_t remaining_length; + } fixed_hdr; + union { + struct connect connect; + struct connack connack; + } data; +}; + +#endif /* _HAPROXY_MQTT_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/mqtt.h b/include/haproxy/mqtt.h new file mode 100644 index 0000000..6720bb7 --- /dev/null +++ b/include/haproxy/mqtt.h @@ -0,0 +1,118 @@ +/* + * include/haproxt/mqtt.h + * This file contains structure declarations for MQTT protocol. + * + * Copyright 2020 Baptiste Assmann <bedis9@gmail.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_MQTT_H +#define _HAPROXY_MQTT_H + +#include <import/ist.h> + +#include <haproxy/mqtt-t.h> +#include <haproxy/tools.h> + +/* expected flags for control packets */ +extern uint8_t mqtt_cpt_flags[MQTT_CPT_ENTRIES]; + +/* MQTT field string names */ +extern const struct ist mqtt_fields_string[MQTT_FN_ENTRIES]; + +/* list of supported capturable field names for each MQTT control packet type */ +extern const uint64_t mqtt_fields_per_packet[MQTT_CPT_ENTRIES]; + +int mqtt_validate_message(const struct ist msg, struct mqtt_pkt *mpkt); +struct ist mqtt_field_value(const struct ist msg, int type, int fieldname_id); + +/* + * Return a MQTT packet type ID based found in <str>. + * <str> can be a number or a string and returned value will always be the numeric value. + * + * If <str> can't be translated into an ID, then MQTT_CPT_INVALID (0) is returned. + */ +static inline int mqtt_typeid(struct ist str) +{ + int id; + + id = strl2ui(str.ptr, istlen(str)); + if ((id >= MQTT_CPT_CONNECT) && (id < MQTT_CPT_ENTRIES)) + return id; + + else if (isteqi(str, ist("CONNECT")) != 0) + return MQTT_CPT_CONNECT; + else if (isteqi(str, ist("CONNACK")) != 0) + return MQTT_CPT_CONNACK; + else if (isteqi(str, ist("PUBLISH")) != 0) + return MQTT_CPT_PUBLISH; + else if (isteqi(str, ist("PUBACK")) != 0) + return MQTT_CPT_PUBACK; + else if (isteqi(str, ist("PUBREC")) != 0) + return MQTT_CPT_PUBREC; + else if (isteqi(str, ist("PUBREL")) != 0) + return MQTT_CPT_PUBREL; + else if (isteqi(str, ist("PUBCOMP")) != 0) + return MQTT_CPT_PUBCOMP; + else if (isteqi(str, ist("SUBSCRIBE")) != 0) + return MQTT_CPT_SUBSCRIBE; + else if (isteqi(str, ist("SUBACK")) != 0) + return MQTT_CPT_SUBACK; + else if (isteqi(str, ist("UNSUBSCRIBE")) != 0) + return MQTT_CPT_UNSUBSCRIBE; + else if (isteqi(str, ist("UNSUBACK")) != 0) + return MQTT_CPT_UNSUBACK; + else if (isteqi(str, ist("PINGREQ")) != 0) + return MQTT_CPT_PINGREQ; + else if (isteqi(str, ist("PINGRESP")) != 0) + return MQTT_CPT_PINGRESP; + else if (isteqi(str, ist("DISCONNECT")) != 0) + return MQTT_CPT_DISCONNECT; + else if (isteqi(str, ist("AUTH")) != 0) + return MQTT_CPT_AUTH; + + return MQTT_CPT_INVALID; +} + +/* + * validate that <str> is a field that can be extracted from a <type> MQTT packet + * + * return the field name ID (MQTT_FN_*) if a match is found, MQTT_FN_INVALID (0) otherwise. + */ +static inline int mqtt_check_type_fieldname(int type, struct ist str) +{ + int i, id = MQTT_FN_INVALID; + + for (i = 0; i < MQTT_FN_ENTRIES; i++) { + if (isteqi(str, mqtt_fields_string[i])) { + if (mqtt_fields_per_packet[type] & (1ULL << i)) + id = i; + break; + } + } + + return id; + +} + +#endif /* _HAPROXY_MQTT_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/mux_fcgi-t.h b/include/haproxy/mux_fcgi-t.h new file mode 100644 index 0000000..27973db --- /dev/null +++ b/include/haproxy/mux_fcgi-t.h @@ -0,0 +1,175 @@ +/* + * include/haproxy/mux_fcgi-t.h + * Definitions for basic FCGI mux internal types, constants and flags. + * + * Copyright 2022 Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_MUX_FCGI_T_H +#define _HAPROXY_MUX_FCGI_T_H + +#include <haproxy/api-t.h> +#include <haproxy/show_flags-t.h> + +/**** FCGI connection flags (32 bit), in fcgi_conn->flags ****/ +#define FCGI_CF_NONE 0x00000000 + +/* Flags indicating why writing to the mux is blocked */ +#define FCGI_CF_MUX_MALLOC 0x00000001 /* mux is blocked on lack connection's mux buffer */ +#define FCGI_CF_MUX_MFULL 0x00000002 /* mux is blocked on connection's mux buffer full */ +#define FCGI_CF_MUX_BLOCK_ANY 0x00000003 /* mux is blocked on connection's mux buffer full */ + +/* Flags indicating why writing to the demux is blocked. + * The first two ones directly affect the ability for the mux to receive data + * from the connection. The other ones affect the mux's ability to demux + * received data. + */ +#define FCGI_CF_DEM_DALLOC 0x00000004 /* demux blocked on lack of connection's demux buffer */ +#define FCGI_CF_DEM_DFULL 0x00000008 /* demux blocked on connection's demux buffer full */ +#define FCGI_CF_DEM_MROOM 0x00000010 /* demux blocked on lack of room in mux buffer */ +#define FCGI_CF_DEM_SALLOC 0x00000020 /* demux blocked on lack of stream's rx buffer */ +#define FCGI_CF_DEM_SFULL 0x00000040 /* demux blocked on stream request buffer full */ +#define FCGI_CF_DEM_TOOMANY 0x00000080 /* demux blocked waiting for some stream connectors to leave */ +#define FCGI_CF_DEM_BLOCK_ANY 0x000000F0 /* aggregate of the demux flags above except DALLOC/DFULL */ + +/* Other flags */ +#define FCGI_CF_MPXS_CONNS 0x00000100 /* connection multiplexing is supported */ +#define FCGI_CF_ABRTS_SENT 0x00000200 /* a record ABORT was successfully sent to all active streams */ +#define FCGI_CF_ABRTS_FAILED 0x00000400 /* failed to abort processing of all streams */ +#define FCGI_CF_WAIT_FOR_HS 0x00000800 /* We did check that at least a stream was waiting for handshake */ +#define FCGI_CF_KEEP_CONN 0x00001000 /* HAProxy is responsible to close the connection */ +#define FCGI_CF_GET_VALUES 0x00002000 /* retrieve settings */ + +#define FCGI_CF_EOS 0x00004000 /* End-of-stream seen on the H1 connection (read0 detected) */ +#define FCGI_CF_ERR_PENDING 0x00008000 /* A write error was detected (block sends but not reads) */ +#define FCGI_CF_ERROR 0x00010000 /* A read error was detected (handled has an abort) */ + + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *fconn_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(FCGI_CF_MUX_MALLOC, _(FCGI_CF_MUX_MFULL, + _(FCGI_CF_DEM_DALLOC, _(FCGI_CF_DEM_DFULL, _(FCGI_CF_DEM_MROOM, + _(FCGI_CF_DEM_SALLOC, _(FCGI_CF_DEM_SFULL, _(FCGI_CF_DEM_TOOMANY, + _(FCGI_CF_MPXS_CONNS, _(FCGI_CF_ABRTS_SENT, _(FCGI_CF_ABRTS_FAILED, + _(FCGI_CF_WAIT_FOR_HS, _(FCGI_CF_KEEP_CONN, _(FCGI_CF_GET_VALUES, + _(FCGI_CF_EOS, _(FCGI_CF_ERR_PENDING, _(FCGI_CF_ERROR))))))))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + +/**** FCGI stream flags (32 bit), in fcgi_strm->flags ****/ +#define FCGI_SF_NONE 0x00000000 +#define FCGI_SF_ES_RCVD 0x00000001 /* end-of-stream received (empty STDOUT or EDN_REQUEST record) */ +#define FCGI_SF_ES_SENT 0x00000002 /* end-of-stream sent (empty STDIN record) */ +#define FCGI_SF_EP_SENT 0x00000004 /* end-of-param sent (empty PARAMS record) */ +#define FCGI_SF_ABRT_SENT 0x00000008 /* abort sent (ABORT_REQUEST record) */ + +/* Stream flags indicating the reason the stream is blocked */ +#define FCGI_SF_BLK_MBUSY 0x00000010 /* blocked waiting for mux access (transient) */ +#define FCGI_SF_BLK_MROOM 0x00000020 /* blocked waiting for room in the mux */ +#define FCGI_SF_BLK_ANY 0x00000030 /* any of the reasons above */ + +#define FCGI_SF_BEGIN_SENT 0x00000100 /* a BEGIN_REQUEST record was sent for this stream */ +#define FCGI_SF_OUTGOING_DATA 0x00000200 /* set whenever we've seen outgoing data */ +#define FCGI_SF_NOTIFIED 0x00000400 /* a paused stream was notified to try to send again */ + +#define FCGI_SF_WANT_SHUTR 0x00001000 /* a stream couldn't shutr() (mux full/busy) */ +#define FCGI_SF_WANT_SHUTW 0x00002000 /* a stream couldn't shutw() (mux full/busy) */ + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *fstrm_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(FCGI_SF_ES_RCVD, _(FCGI_SF_ES_SENT, _(FCGI_SF_EP_SENT, _(FCGI_SF_ABRT_SENT, + _(FCGI_SF_BLK_MBUSY, _(FCGI_SF_BLK_MROOM, + _(FCGI_SF_BEGIN_SENT, _(FCGI_SF_OUTGOING_DATA, _(FCGI_SF_NOTIFIED, + _(FCGI_SF_WANT_SHUTR, _(FCGI_SF_WANT_SHUTW))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + +/* FCGI connection state (fcgi_conn->state) */ +enum fcgi_conn_st { + FCGI_CS_INIT = 0, /* init done, waiting for sending GET_VALUES record */ + FCGI_CS_SETTINGS, /* GET_VALUES sent, waiting for the GET_VALUES_RESULT record */ + FCGI_CS_RECORD_H, /* GET_VALUES_RESULT received, waiting for a record header */ + FCGI_CS_RECORD_D, /* Record header OK, waiting for a record data */ + FCGI_CS_RECORD_P, /* Record processed, remains the padding */ + FCGI_CS_CLOSED, /* abort requests if necessary and close the connection ASAP */ + FCGI_CS_ENTRIES +} __attribute__((packed)); + +/* returns a fconn state as an abbreviated 3-letter string, or "???" if unknown */ +static inline const char *fconn_st_to_str(enum fcgi_conn_st st) +{ + switch (st) { + case FCGI_CS_INIT : return "INI"; + case FCGI_CS_SETTINGS : return "STG"; + case FCGI_CS_RECORD_H : return "RDH"; + case FCGI_CS_RECORD_D : return "RDD"; + case FCGI_CS_RECORD_P : return "RDP"; + case FCGI_CS_CLOSED : return "CLO"; + default : return "???"; + } +} + +/* FCGI stream state, in fcgi_strm->state */ +enum fcgi_strm_st { + FCGI_SS_IDLE = 0, + FCGI_SS_OPEN, + FCGI_SS_HREM, // half-closed(remote) + FCGI_SS_HLOC, // half-closed(local) + FCGI_SS_ERROR, + FCGI_SS_CLOSED, + FCGI_SS_ENTRIES +} __attribute__((packed)); + + +/* returns a fstrm state as an abbreviated 3-letter string, or "???" if unknown */ +static inline const char *fstrm_st_to_str(enum fcgi_strm_st st) +{ + switch (st) { + case FCGI_SS_IDLE : return "IDL"; + case FCGI_SS_OPEN : return "OPN"; + case FCGI_SS_HREM : return "RCL"; + case FCGI_SS_HLOC : return "HCL"; + case FCGI_SS_ERROR : return "ERR"; + case FCGI_SS_CLOSED : return "CLO"; + default : return "???"; + } +} + + +#endif /* _HAPROXY_MUX_FCGI_T_H */ diff --git a/include/haproxy/mux_h1-t.h b/include/haproxy/mux_h1-t.h new file mode 100644 index 0000000..2f49a49 --- /dev/null +++ b/include/haproxy/mux_h1-t.h @@ -0,0 +1,160 @@ +/* + * include/haproxy/mux_h1-t.h + * Definitions for basic H1 mux internal types, constants and flags. + * + * Copyright 2022 Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_MUX_H1_T_H +#define _HAPROXY_MUX_H1_T_H + +#include <haproxy/api-t.h> +#include <haproxy/show_flags-t.h> + +/**** Connection flags (32 bit), in h1c->flags ****/ +#define H1C_F_NONE 0x00000000 + +/* Flags indicating why writing output data are blocked */ +#define H1C_F_OUT_ALLOC 0x00000001 /* mux is blocked on lack of output buffer */ +#define H1C_F_OUT_FULL 0x00000002 /* mux is blocked on output buffer full */ +/* 0x00000004 - 0x00000008 unused */ + +/* Flags indicating why reading input data are blocked. */ +#define H1C_F_IN_ALLOC 0x00000010 /* mux is blocked on lack of input buffer */ +#define H1C_F_IN_FULL 0x00000020 /* mux is blocked on input buffer full */ +#define H1C_F_IN_SALLOC 0x00000040 /* mux is blocked on lack of stream's request buffer */ +/* 0x00000080 unused */ + +#define H1C_F_EOS 0x00000100 /* End-of-stream seen on the H1 connection (read0 detected) */ +#define H1C_F_ERR_PENDING 0x00000200 /* A write error was detected (block sends but not reads) */ +#define H1C_F_ERROR 0x00000400 /* A read error was detected (handled has an abort) */ +#define H1C_F_SILENT_SHUT 0x00000800 /* if H1C is closed closed, silent (or dirty) shutdown must be performed */ +#define H1C_F_ABRT_PENDING 0x00001000 /* An error must be sent (previous attempt failed) and H1 connection must be closed ASAP */ +#define H1C_F_ABRTED 0x00002000 /* An error must be sent (previous attempt failed) and H1 connection must be closed ASAP */ +#define H1C_F_WANT_FASTFWD 0x00004000 /* Don't read into a buffer because we want to fast forward data */ +#define H1C_F_WAIT_NEXT_REQ 0x00008000 /* waiting for the next request to start, use keep-alive timeout */ +#define H1C_F_UPG_H2C 0x00010000 /* set if an upgrade to h2 should be done */ +#define H1C_F_CO_MSG_MORE 0x00020000 /* set if CO_SFL_MSG_MORE must be set when calling xprt->snd_buf() */ +#define H1C_F_CO_STREAMER 0x00040000 /* set if CO_SFL_STREAMER must be set when calling xprt->snd_buf() */ +#define H1C_F_CANT_FASTFWD 0x00080000 /* Fast-forwarding is not supported (exclusive with WANT_FASTFWD) */ + +/* 0x00100000 - 0x40000000 unused */ +#define H1C_F_IS_BACK 0x80000000 /* Set on outgoing connection */ + + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *h1c_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(H1C_F_OUT_ALLOC, _(H1C_F_OUT_FULL, + _(H1C_F_IN_ALLOC, _(H1C_F_IN_FULL, _(H1C_F_IN_SALLOC, + _(H1C_F_EOS, _(H1C_F_ERR_PENDING, _(H1C_F_ERROR, + _(H1C_F_SILENT_SHUT, _(H1C_F_ABRT_PENDING, _(H1C_F_ABRTED, + _(H1C_F_WANT_FASTFWD, _(H1C_F_WAIT_NEXT_REQ, _(H1C_F_UPG_H2C, _(H1C_F_CO_MSG_MORE, + _(H1C_F_CO_STREAMER, _(H1C_F_CANT_FASTFWD, _(H1C_F_IS_BACK)))))))))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + + +/**** H1 stream flags (32 bit), in h1s->flags ****/ +#define H1S_F_NONE 0x00000000 + +#define H1S_F_RX_BLK 0x00100000 /* Don't process more input data, waiting sync with output side */ +#define H1S_F_TX_BLK 0x00200000 /* Don't process more output data, waiting sync with input side */ +#define H1S_F_RX_CONGESTED 0x00000004 /* Cannot process input data RX path is congested (waiting for more space in channel's buffer) */ + +/* 0x00000008 unused */ +#define H1S_F_WANT_KAL 0x00000010 +#define H1S_F_WANT_TUN 0x00000020 +#define H1S_F_WANT_CLO 0x00000040 +#define H1S_F_WANT_MSK 0x00000070 +#define H1S_F_NOT_FIRST 0x00000080 /* The H1 stream is not the first one */ +#define H1S_F_BODYLESS_RESP 0x00000100 /* Bodyless response message */ + +#define H1S_F_INTERNAL_ERROR 0x00000200 /* Set when an internal error occurred during the message parsing */ +#define H1S_F_NOT_IMPL_ERROR 0x00000400 /* Set when a feature is not implemented during the message parsing */ +#define H1S_F_PARSING_ERROR 0x00000800 /* Set when an error occurred during the message parsing */ +#define H1S_F_PROCESSING_ERROR 0x00001000 /* Set when an error occurred during the message xfer */ +#define H1S_F_ERROR_MASK 0x00003800 /* stream error mask */ + +#define H1S_F_HAVE_SRV_NAME 0x00002000 /* Set during output process if the server name header was added to the request */ +#define H1S_F_HAVE_O_CONN 0x00004000 /* Set during output process to know connection mode was processed */ +#define H1S_F_HAVE_WS_KEY 0x00008000 /* Set during output process to know WS key was found or generated */ +#define H1S_F_HAVE_CLEN 0x00010000 /* Set during output process to know C*L header was found or generated */ +#define H1S_F_HAVE_CHNK 0x00020000 /* Set during output process to know "T-E; chunk" header was found or generated */ + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *h1s_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(H1S_F_RX_BLK, _(H1S_F_TX_BLK, _(H1S_F_RX_CONGESTED, + _(H1S_F_WANT_KAL, _(H1S_F_WANT_TUN, _(H1S_F_WANT_CLO, + _(H1S_F_NOT_FIRST, _(H1S_F_BODYLESS_RESP, + _(H1S_F_INTERNAL_ERROR, _(H1S_F_NOT_IMPL_ERROR, _(H1S_F_PARSING_ERROR, _(H1S_F_PROCESSING_ERROR, + _(H1S_F_HAVE_SRV_NAME, _(H1S_F_HAVE_O_CONN, _(H1S_F_HAVE_WS_KEY, + _(H1S_F_HAVE_CLEN, _(H1S_F_HAVE_CHNK))))))))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + +/* H1 connection state, in h1c->state */ +enum h1_cs { + H1_CS_IDLE, /* IDLE connection. A freashly open or a reusable connection (H1S is NULL) */ + H1_CS_EMBRYONIC, /* Connection is waiting for the message headers (H1S is not NULL, not attached to a SC - Frontend connection only) */ + H1_CS_UPGRADING, /* TCP>H1 upgrade in-progress (H1S is not NULL and attached to a SC - Frontend connection only) */ + H1_CS_RUNNING, /* Connection fully established and the H1S is processing data (H1S is not NULL and attached to a SC) */ + H1_CS_CLOSING, /* Send pending outgoing data and close the connection ASAP (H1S may be NULL) */ + H1_CS_CLOSED, /* Connection must be closed now and H1C must be released (H1S is NULL) */ + H1_CS_ENTRIES, +} __attribute__((packed)); + + +/**** tiny state decoding functions for debug helpers ****/ + +/* returns a h1c state as an abbreviated 3-letter string, or "???" if unknown */ +static inline const char *h1c_st_to_str(enum h1_cs st) +{ + switch (st) { + case H1_CS_IDLE: return "IDL"; + case H1_CS_EMBRYONIC: return "EMB"; + case H1_CS_UPGRADING: return "UPG"; + case H1_CS_RUNNING: return "RUN"; + case H1_CS_CLOSING: return "CLI"; + case H1_CS_CLOSED: return "CLD"; + default: return "???"; + } +} + + +#endif /* _HAPROXY_MUX_H1_T_H */ diff --git a/include/haproxy/mux_h2-t.h b/include/haproxy/mux_h2-t.h new file mode 100644 index 0000000..ccb40b2 --- /dev/null +++ b/include/haproxy/mux_h2-t.h @@ -0,0 +1,222 @@ +/* + * include/haproxy/mux_h2-t.h + * Definitions for basic H2 mux internal types, constants and flags. + * + * Copyright 2017-2022 Willy Tarreau <w@1wt.eu> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_MUX_H2_T_H +#define _HAPROXY_MUX_H2_T_H + +#include <haproxy/api-t.h> +#include <haproxy/show_flags-t.h> + +/**** Connection flags (32 bit), in h2c->flags ****/ + +#define H2_CF_NONE 0x00000000 + +/* Flags indicating why writing to the mux is blocked. */ +#define H2_CF_MUX_MALLOC 0x00000001 // mux blocked on lack of connection's mux buffer +#define H2_CF_MUX_MFULL 0x00000002 // mux blocked on connection's mux buffer full +#define H2_CF_MUX_BLOCK_ANY 0x00000003 // aggregate of the mux flags above + +/* Flags indicating why writing to the demux is blocked. + * The first two ones directly affect the ability for the mux to receive data + * from the connection. The other ones affect the mux's ability to demux + * received data. + */ +#define H2_CF_DEM_DALLOC 0x00000004 // demux blocked on lack of connection's demux buffer +#define H2_CF_DEM_DFULL 0x00000008 // demux blocked on connection's demux buffer full + +#define H2_CF_WAIT_INLIST 0x00000010 // there is at least one stream blocked by another stream in send_list/fctl_list +#define H2_CF_DEM_MROOM 0x00000020 // demux blocked on lack of room in mux buffer +#define H2_CF_DEM_SALLOC 0x00000040 // demux blocked on lack of stream's request buffer +#define H2_CF_DEM_SFULL 0x00000080 // demux blocked on stream request buffer full +#define H2_CF_DEM_TOOMANY 0x00000100 // demux blocked waiting for some stream connectors to leave +#define H2_CF_DEM_BLOCK_ANY 0x000001E0 // aggregate of the demux flags above except DALLOC/DFULL + // (SHORT_READ is also excluded) + +#define H2_CF_DEM_SHORT_READ 0x00000200 // demux blocked on incomplete frame +#define H2_CF_DEM_IN_PROGRESS 0x00000400 // demux in progress (dsi,dfl,dft are valid) + +/* other flags */ +#define H2_CF_MBUF_HAS_DATA 0x00000800 // some stream data (data, headers) still in mbuf +#define H2_CF_GOAWAY_SENT 0x00001000 // a GOAWAY frame was successfully sent +#define H2_CF_GOAWAY_FAILED 0x00002000 // a GOAWAY frame failed to be sent +#define H2_CF_WAIT_FOR_HS 0x00004000 // We did check that at least a stream was waiting for handshake +#define H2_CF_IS_BACK 0x00008000 // this is an outgoing connection +#define H2_CF_WINDOW_OPENED 0x00010000 // demux increased window already advertised +#define H2_CF_RCVD_SHUT 0x00020000 // a recv() attempt already failed on a shutdown +#define H2_CF_END_REACHED 0x00040000 // pending data too short with RCVD_SHUT present + +#define H2_CF_RCVD_RFC8441 0x00100000 // settings from RFC8441 has been received indicating support for Extended CONNECT +#define H2_CF_SHTS_UPDATED 0x00200000 // SETTINGS_HEADER_TABLE_SIZE updated +#define H2_CF_DTSU_EMITTED 0x00400000 // HPACK Dynamic Table Size Update opcode emitted + +#define H2_CF_ERR_PENDING 0x00800000 // A write error was detected (block sends but not reads) +#define H2_CF_ERROR 0x01000000 //A read error was detected (handled has an abort) + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *h2c_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(H2_CF_MUX_MALLOC, _(H2_CF_MUX_MFULL, _(H2_CF_DEM_DALLOC, + _(H2_CF_DEM_DFULL, _(H2_CF_WAIT_INLIST, _(H2_CF_DEM_MROOM, + _(H2_CF_DEM_SALLOC, _(H2_CF_DEM_SFULL, _(H2_CF_DEM_TOOMANY, + _(H2_CF_DEM_SHORT_READ, _(H2_CF_DEM_IN_PROGRESS, _(H2_CF_MBUF_HAS_DATA, + _(H2_CF_GOAWAY_SENT, _(H2_CF_GOAWAY_FAILED, _(H2_CF_WAIT_FOR_HS, _(H2_CF_IS_BACK, + _(H2_CF_WINDOW_OPENED, _(H2_CF_RCVD_SHUT, _(H2_CF_END_REACHED, + _(H2_CF_RCVD_RFC8441, _(H2_CF_SHTS_UPDATED, _(H2_CF_DTSU_EMITTED, + _(H2_CF_ERR_PENDING, _(H2_CF_ERROR)))))))))))))))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + + +/**** HTTP/2 stream flags (32 bit), in h2s->flags ****/ + +#define H2_SF_NONE 0x00000000 +#define H2_SF_ES_RCVD 0x00000001 +#define H2_SF_ES_SENT 0x00000002 + +#define H2_SF_RST_RCVD 0x00000004 // received RST_STREAM +#define H2_SF_RST_SENT 0x00000008 // sent RST_STREAM + +/* stream flags indicating the reason the stream is blocked */ +#define H2_SF_BLK_MBUSY 0x00000010 // blocked waiting for mux access (transient) +#define H2_SF_BLK_MROOM 0x00000020 // blocked waiting for room in the mux (must be in send list) +#define H2_SF_BLK_MFCTL 0x00000040 // blocked due to mux fctl (must be in fctl list) +#define H2_SF_BLK_SFCTL 0x00000080 // blocked due to stream fctl (must be in blocked list) +#define H2_SF_BLK_ANY 0x000000F0 // any of the reasons above + +/* stream flags indicating how data is supposed to be sent */ +#define H2_SF_DATA_CLEN 0x00000100 // data sent using content-length +#define H2_SF_BODYLESS_RESP 0x00000200 /* Bodyless response message */ +#define H2_SF_BODY_TUNNEL 0x00000400 // Attempt to establish a Tunnelled stream (the result depends on the status code) + +#define H2_SF_NOTIFIED 0x00000800 // a paused stream was notified to try to send again +#define H2_SF_HEADERS_SENT 0x00001000 // a HEADERS frame was sent for this stream +#define H2_SF_OUTGOING_DATA 0x00002000 // set whenever we've seen outgoing data + +#define H2_SF_HEADERS_RCVD 0x00004000 // a HEADERS frame was received for this stream + +#define H2_SF_WANT_SHUTR 0x00008000 // a stream couldn't shutr() (mux full/busy) +#define H2_SF_WANT_SHUTW 0x00010000 // a stream couldn't shutw() (mux full/busy) + +#define H2_SF_EXT_CONNECT_SENT 0x00040000 // rfc 8441 an Extended CONNECT has been sent +#define H2_SF_EXT_CONNECT_RCVD 0x00080000 // rfc 8441 an Extended CONNECT has been received and parsed + +#define H2_SF_TUNNEL_ABRT 0x00100000 // A tunnel attempt was aborted +#define H2_SF_MORE_HTX_DATA 0x00200000 // more data expected from HTX + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *h2s_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(H2_SF_ES_RCVD, _(H2_SF_ES_SENT, _(H2_SF_RST_RCVD, _(H2_SF_RST_SENT, + _(H2_SF_BLK_MBUSY, _(H2_SF_BLK_MROOM, _(H2_SF_BLK_MFCTL, + _(H2_SF_BLK_SFCTL, _(H2_SF_DATA_CLEN, _(H2_SF_BODYLESS_RESP, + _(H2_SF_BODY_TUNNEL, _(H2_SF_NOTIFIED, _(H2_SF_HEADERS_SENT, + _(H2_SF_OUTGOING_DATA, _(H2_SF_HEADERS_RCVD, _(H2_SF_WANT_SHUTR, + _(H2_SF_WANT_SHUTW, _(H2_SF_EXT_CONNECT_SENT, _(H2_SF_EXT_CONNECT_RCVD, + _(H2_SF_TUNNEL_ABRT, _(H2_SF_MORE_HTX_DATA))))))))))))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + + +/* H2 connection state, in h2c->st0 */ +enum h2_cs { + H2_CS_PREFACE, // init done, waiting for connection preface + H2_CS_SETTINGS1, // preface OK, waiting for first settings frame + H2_CS_FRAME_H, // first settings frame ok, waiting for frame header + H2_CS_FRAME_P, // frame header OK, waiting for frame payload + H2_CS_FRAME_A, // frame payload OK, trying to send ACK frame + H2_CS_FRAME_E, // frame payload OK, trying to send RST frame + H2_CS_ERROR, // send GOAWAY(errcode) and close the connection ASAP + H2_CS_ERROR2, // GOAWAY(errcode) sent, close the connection ASAP + H2_CS_ENTRIES // must be last +} __attribute__((packed)); + +/* H2 stream state, in h2s->st */ +enum h2_ss { + H2_SS_IDLE = 0, // idle + H2_SS_RLOC, // reserved(local) + H2_SS_RREM, // reserved(remote) + H2_SS_OPEN, // open + H2_SS_HREM, // half-closed(remote) + H2_SS_HLOC, // half-closed(local) + H2_SS_ERROR, // an error needs to be sent using RST_STREAM + H2_SS_CLOSED, // closed + H2_SS_ENTRIES // must be last +} __attribute__((packed)); + + +/* 32 buffers: one for the ring's root, rest for the mbuf itself */ +#define H2C_MBUF_CNT 32 + +/**** tiny state decoding functions for debug helpers ****/ + +/* returns a h2c state as an abbreviated 3-letter string, or "???" if unknown */ +static inline const char *h2c_st_to_str(enum h2_cs st) +{ + switch (st) { + case H2_CS_PREFACE: return "PRF"; + case H2_CS_SETTINGS1: return "STG"; + case H2_CS_FRAME_H: return "FRH"; + case H2_CS_FRAME_P: return "FRP"; + case H2_CS_FRAME_A: return "FRA"; + case H2_CS_FRAME_E: return "FRE"; + case H2_CS_ERROR: return "ERR"; + case H2_CS_ERROR2: return "ER2"; + default: return "???"; + } +} + +/* returns a h2s state as an abbreviated 3-letter string, or "???" if unknown */ +static inline const char *h2s_st_to_str(enum h2_ss st) +{ + switch (st) { + case H2_SS_IDLE: return "IDL"; // idle + case H2_SS_RLOC: return "RSL"; // reserved local + case H2_SS_RREM: return "RSR"; // reserved remote + case H2_SS_OPEN: return "OPN"; // open + case H2_SS_HREM: return "HCR"; // half-closed remote + case H2_SS_HLOC: return "HCL"; // half-closed local + case H2_SS_ERROR : return "ERR"; // error + case H2_SS_CLOSED: return "CLO"; // closed + default: return "???"; + } +} + +#endif /* _HAPROXY_MUX_H2_T_H */ diff --git a/include/haproxy/mux_quic-t.h b/include/haproxy/mux_quic-t.h new file mode 100644 index 0000000..abfc20a --- /dev/null +++ b/include/haproxy/mux_quic-t.h @@ -0,0 +1,204 @@ +#ifndef _HAPROXY_MUX_QUIC_T_H +#define _HAPROXY_MUX_QUIC_T_H + +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <import/ebtree-t.h> + +#include <haproxy/buf-t.h> +#include <haproxy/connection-t.h> +#include <haproxy/htx-t.h> +#include <haproxy/list-t.h> +#include <haproxy/ncbuf-t.h> +#include <haproxy/quic_frame-t.h> +#include <haproxy/quic_stream-t.h> +#include <haproxy/stconn-t.h> + +/* Stream types */ +enum qcs_type { + QCS_CLT_BIDI, + QCS_SRV_BIDI, + QCS_CLT_UNI, + QCS_SRV_UNI, + + /* Must be the last one */ + QCS_MAX_TYPES +}; + +#define QC_CF_ERRL 0x00000001 /* fatal error detected locally, connection should be closed soon */ +#define QC_CF_ERRL_DONE 0x00000002 /* local error properly handled, connection can be released */ +#define QC_CF_BLK_MFCTL 0x00000004 /* sending blocked due to connection flow-control */ +#define QC_CF_CONN_FULL 0x00000008 /* no stream buffers available on connection */ +#define QC_CF_APP_SHUT 0x00000010 /* Application layer shutdown done. */ +#define QC_CF_ERR_CONN 0x00000020 /* fatal error reported by transport layer */ + +struct qcc { + struct connection *conn; + uint64_t nb_sc; /* number of attached stream connectors */ + uint64_t nb_hreq; /* number of in-progress http requests */ + uint32_t flags; /* QC_CF_* */ + + /* flow-control fields set by us enforced on our side. */ + struct { + struct list frms; /* prepared frames related to flow-control */ + + uint64_t ms_bidi_init; /* max initial sub-ID of bidi stream allowed for the peer */ + uint64_t ms_bidi; /* max sub-ID of bidi stream allowed for the peer */ + uint64_t cl_bidi_r; /* total count of closed remote bidi stream since last MAX_STREAMS emission */ + + uint64_t ms_uni; /* max sub-ID of uni stream allowed for the peer */ + + uint64_t msd_bidi_l; /* initial max-stream-data on local bidi streams */ + uint64_t msd_bidi_r; /* initial max-stream-data on remote bidi streams */ + uint64_t msd_uni_r; /* initial max-stream-data on remote uni streams */ + + uint64_t md; /* current max-data allowed for the peer */ + uint64_t md_init; /* initial max-data */ + uint64_t offsets_recv; /* sum of offsets received */ + uint64_t offsets_consume; /* sum of offsets consumed */ + } lfctl; + + /* flow-control fields set by the peer which we must respect. */ + struct { + uint64_t md; /* connection flow control limit updated on MAX_DATA frames reception */ + uint64_t msd_bidi_l; /* initial max-stream-data from peer on local bidi streams */ + uint64_t msd_bidi_r; /* initial max-stream-data from peer on remote bidi streams */ + uint64_t msd_uni_l; /* initial max-stream-data from peer on local uni streams */ + } rfctl; + + struct { + uint64_t offsets; /* sum of all offsets prepared */ + uint64_t sent_offsets; /* sum of all offset sent */ + } tx; + + uint64_t largest_bidi_r; /* largest remote bidi stream ID opened. */ + uint64_t largest_uni_r; /* largest remote uni stream ID opened. */ + uint64_t next_bidi_l; /* next stream ID to use for local bidi stream */ + uint64_t next_uni_l; /* next stream ID to use for local uni stream */ + + struct eb_root streams_by_id; /* all active streams by their ID */ + + struct list send_retry_list; /* list of qcs eligible to send retry */ + struct list send_list; /* list of qcs ready to send (STREAM, STOP_SENDING or RESET_STREAM emission) */ + + struct wait_event wait_event; /* To be used if we're waiting for I/Os */ + + struct proxy *proxy; + + /* haproxy timeout management */ + struct task *task; + struct list opening_list; /* list of not already attached streams (http-request timeout) */ + int timeout; + int shut_timeout; + int idle_start; /* base time for http-keep-alive timeout */ + struct quic_err err; /* code for locally detected error */ + + const struct qcc_app_ops *app_ops; + void *ctx; /* Application layer context */ +}; + +#define QC_SF_NONE 0x00000000 +#define QC_SF_SIZE_KNOWN 0x00000001 /* last frame received for this stream */ +#define QC_SF_FIN_STREAM 0x00000002 /* FIN bit must be set for last frame of the stream */ +#define QC_SF_BLK_MROOM 0x00000004 /* app layer is blocked waiting for room in the qcs.tx.buf */ +#define QC_SF_DETACH 0x00000008 /* sc is detached but there is remaining data to send */ +#define QC_SF_BLK_SFCTL 0x00000010 /* stream blocked due to stream flow control limit */ +#define QC_SF_DEM_FULL 0x00000020 /* demux blocked on request channel buffer full */ +#define QC_SF_READ_ABORTED 0x00000040 /* Rx closed using STOP_SENDING*/ +#define QC_SF_TO_RESET 0x00000080 /* a RESET_STREAM must be sent */ +#define QC_SF_HREQ_RECV 0x00000100 /* a full HTTP request has been received */ +#define QC_SF_TO_STOP_SENDING 0x00000200 /* a STOP_SENDING must be sent */ +#define QC_SF_UNKNOWN_PL_LENGTH 0x00000400 /* HTX EOM may be missing from the stream layer */ +#define QC_SF_RECV_RESET 0x00000800 /* a RESET_STREAM was received */ + +/* Maximum size of stream Rx buffer. */ +#define QC_S_RX_BUF_SZ (global.tune.bufsize - NCB_RESERVED_SZ) + +/* QUIC stream states + * + * On initialization a stream is put on idle state. It is opened as soon as + * data has been successfully sent or received on it. + * + * A bidirectional stream has two channels which can be closed separately. The + * local channel is closed when the STREAM frame with FIN or a RESET_STREAM has + * been emitted. The remote channel is closed as soon as all data from the peer + * has been received. The stream goes instantely to the close state once both + * channels are closed. + * + * A unidirectional stream has only one channel of communication. Thus, it does + * not use half closed states and transition directly from open to close state. + */ +enum qcs_state { + QC_SS_IDLE = 0, /* initial state */ + QC_SS_OPEN, /* opened */ + QC_SS_HLOC, /* half-closed local */ + QC_SS_HREM, /* half-closed remote */ + QC_SS_CLO, /* closed */ +} __attribute__((packed)); + +struct qcs { + struct qcc *qcc; + struct sedesc *sd; + uint32_t flags; /* QC_SF_* */ + enum qcs_state st; /* QC_SS_* state */ + void *ctx; /* app-ops context */ + + struct { + uint64_t offset; /* absolute current base offset of ncbuf */ + uint64_t offset_max; /* maximum absolute offset received */ + struct ncbuf ncbuf; /* receive buffer - can handle out-of-order offset frames */ + struct buffer app_buf; /* receive buffer used by stconn layer */ + uint64_t msd; /* current max-stream-data limit to enforce */ + uint64_t msd_init; /* initial max-stream-data */ + } rx; + struct { + uint64_t offset; /* last offset of data ready to be sent */ + uint64_t sent_offset; /* last offset sent by transport layer */ + struct buffer buf; /* transmit buffer before sending via xprt */ + uint64_t msd; /* fctl bytes limit to respect on emission */ + } tx; + + struct eb64_node by_id; + uint64_t id; + struct qc_stream_desc *stream; + + struct list el; /* element of qcc.send_retry_list */ + struct list el_send; /* element of qcc.send_list */ + struct list el_opening; /* element of qcc.opening_list */ + + struct wait_event wait_event; + struct wait_event *subs; + + uint64_t err; /* error code to transmit via RESET_STREAM */ + + int start; /* base timestamp for http-request timeout */ +}; + +/* Used as qcc_app_ops.close callback argument. */ +enum qcc_app_ops_close_side { + QCC_APP_OPS_CLOSE_SIDE_RD, /* Read channel closed (RESET_STREAM received). */ + QCC_APP_OPS_CLOSE_SIDE_WR /* Write channel closed (STOP_SENDING received). */ +}; + +/* QUIC application layer operations */ +struct qcc_app_ops { + int (*init)(struct qcc *qcc); + int (*attach)(struct qcs *qcs, void *conn_ctx); + ssize_t (*decode_qcs)(struct qcs *qcs, struct buffer *b, int fin); + size_t (*snd_buf)(struct qcs *qcs, struct buffer *buf, size_t count); + size_t (*nego_ff)(struct qcs *qcs, size_t count); + size_t (*done_ff)(struct qcs *qcs); + int (*close)(struct qcs *qcs, enum qcc_app_ops_close_side side); + void (*detach)(struct qcs *qcs); + int (*finalize)(void *ctx); + void (*shutdown)(void *ctx); /* Close a connection. */ + void (*release)(void *ctx); + void (*inc_err_cnt)(void *ctx, int err_code); +}; + +#endif /* USE_QUIC */ + +#endif /* _HAPROXY_MUX_QUIC_T_H */ diff --git a/include/haproxy/mux_quic.h b/include/haproxy/mux_quic.h new file mode 100644 index 0000000..872c5ea --- /dev/null +++ b/include/haproxy/mux_quic.h @@ -0,0 +1,116 @@ +#ifndef _HAPROXY_MUX_QUIC_H +#define _HAPROXY_MUX_QUIC_H + +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <haproxy/api.h> +#include <haproxy/connection.h> +#include <haproxy/list.h> +#include <haproxy/mux_quic-t.h> +#include <haproxy/stconn.h> + +void qcc_set_error(struct qcc *qcc, int err, int app); +struct qcs *qcc_init_stream_local(struct qcc *qcc, int bidi); +struct stconn *qcs_attach_sc(struct qcs *qcs, struct buffer *buf, char fin); +int qcs_is_close_local(struct qcs *qcs); +int qcs_is_close_remote(struct qcs *qcs); +struct buffer *qcs_get_buf(struct qcs *qcs, struct buffer *bptr); + +int qcs_subscribe(struct qcs *qcs, int event_type, struct wait_event *es); +void qcs_notify_recv(struct qcs *qcs); +void qcs_notify_send(struct qcs *qcs); + +void qcc_emit_cc_app(struct qcc *qcc, int err, int immediate); +void qcc_reset_stream(struct qcs *qcs, int err); +void qcc_send_stream(struct qcs *qcs, int urg); +void qcc_abort_stream_read(struct qcs *qcs); +int qcc_recv(struct qcc *qcc, uint64_t id, uint64_t len, uint64_t offset, + char fin, char *data); +int qcc_recv_max_data(struct qcc *qcc, uint64_t max); +int qcc_recv_max_stream_data(struct qcc *qcc, uint64_t id, uint64_t max); +int qcc_recv_reset_stream(struct qcc *qcc, uint64_t id, uint64_t err, uint64_t final_size); +int qcc_recv_stop_sending(struct qcc *qcc, uint64_t id, uint64_t err); +void qcc_streams_sent_done(struct qcs *qcs, uint64_t data, uint64_t offset); + +/* Bit shift to get the stream sub ID for internal use which is obtained + * shifting the stream IDs by this value, knowing that the + * QCS_ID_TYPE_SHIFT less significant bits identify the stream ID + * types (client initiated bidirectional, server initiated bidirectional, + * client initiated unidirectional, server initiated bidirectional). + * Note that there is no reference to such stream sub IDs in the RFC. + */ +#define QCS_ID_TYPE_MASK 0x3 +#define QCS_ID_TYPE_SHIFT 2 +/* The less significant bit of a stream ID is set for a server initiated stream */ +#define QCS_ID_SRV_INTIATOR_BIT 0x1 +/* This bit is set for unidirectional streams */ +#define QCS_ID_DIR_BIT 0x2 + +static inline enum qcs_type qcs_id_type(uint64_t id) +{ + return id & QCS_ID_TYPE_MASK; +} + +/* Return true if stream has been opened locally. */ +static inline int quic_stream_is_local(struct qcc *qcc, uint64_t id) +{ + return conn_is_back(qcc->conn) == !(id & QCS_ID_SRV_INTIATOR_BIT); +} + +/* Return true if stream is opened by peer. */ +static inline int quic_stream_is_remote(struct qcc *qcc, uint64_t id) +{ + return !quic_stream_is_local(qcc, id); +} + +static inline int quic_stream_is_uni(uint64_t id) +{ + return id & QCS_ID_DIR_BIT; +} + +static inline int quic_stream_is_bidi(uint64_t id) +{ + return !quic_stream_is_uni(id); +} + +static inline char *qcs_st_to_str(enum qcs_state st) +{ + switch (st) { + case QC_SS_IDLE: return "IDL"; + case QC_SS_OPEN: return "OPN"; + case QC_SS_HLOC: return "HCL"; + case QC_SS_HREM: return "HCR"; + case QC_SS_CLO: return "CLO"; + default: return "???"; + } +} + +int qcc_install_app_ops(struct qcc *qcc, const struct qcc_app_ops *app_ops); + +/* Register <qcs> stream for http-request timeout. If the stream is not yet + * attached in the configured delay, qcc timeout task will be triggered. This + * means the full header section was not received in time. + * + * This function should be called by the application protocol layer on request + * streams initialization. + */ +static inline void qcs_wait_http_req(struct qcs *qcs) +{ + struct qcc *qcc = qcs->qcc; + + /* A stream cannot be registered several times. */ + BUG_ON_HOT(tick_isset(qcs->start)); + qcs->start = now_ms; + + /* qcc.opening_list size is limited by flow-control so no custom + * restriction is needed here. + */ + LIST_APPEND(&qcc->opening_list, &qcs->el_opening); +} + +#endif /* USE_QUIC */ + +#endif /* _HAPROXY_MUX_QUIC_H */ diff --git a/include/haproxy/mworker-t.h b/include/haproxy/mworker-t.h new file mode 100644 index 0000000..3137ec0 --- /dev/null +++ b/include/haproxy/mworker-t.h @@ -0,0 +1,51 @@ +/* + * include/haproxy/mworker-t.h + * Master Worker type definitions. + * + * Copyright HAProxy Technologies 2019 - William Lallemand <wlallemand@haproxy.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_MWORKER_T_H_ +#define _HAPROXY_MWORKER_T_H_ + +#include <haproxy/list.h> +#include <haproxy/signal-t.h> + +/* options for mworker_proc */ + +#define PROC_O_TYPE_MASTER 0x00000001 +#define PROC_O_TYPE_WORKER 0x00000002 +#define PROC_O_TYPE_PROG 0x00000004 +/* 0x00000008 unused */ +#define PROC_O_LEAVING 0x00000010 /* this process should be leaving */ +/* 0x00000020 to 0x00000080 unused */ +#define PROC_O_START_RELOAD 0x00000100 /* Start the process even if the master was re-executed */ + +/* + * Structure used to describe the processes in master worker mode + */ +struct server; +struct mworker_proc { + int pid; + int options; + char *id; + char **command; + char *path; + char *version; + int ipc_fd[2]; /* 0 is master side, 1 is worker side */ + int reloads; + int failedreloads; /* number of failed reloads since the last successful one */ + int timestamp; + struct server *srv; /* the server entry in the master proxy */ + struct list list; + int uid; + int gid; +}; + +#endif /* _HAPROXY_MWORKER_T_H_ */ diff --git a/include/haproxy/mworker.h b/include/haproxy/mworker.h new file mode 100644 index 0000000..c9dd840 --- /dev/null +++ b/include/haproxy/mworker.h @@ -0,0 +1,48 @@ +/* + * include/haproxy/mworker-t.h + * Master Worker function prototypes. + * + * Copyright HAProxy Technologies 2019 - William Lallemand <wlallemand@haproxy.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_MWORKER_H_ +#define _HAPROXY_MWORKER_H_ + +#include <haproxy/mworker-t.h> +#include <haproxy/signal-t.h> + +extern struct mworker_proc *proc_self; + +void mworker_proc_list_to_env(void); +int mworker_env_to_proc_list(void); + + +void mworker_block_signals(void); +void mworker_unblock_signals(void); + +void mworker_broadcast_signal(struct sig_handler *sh); +void mworker_catch_sighup(struct sig_handler *sh); +void mworker_catch_sigterm(struct sig_handler *sh); +void mworker_catch_sigchld(struct sig_handler *sh); + +void mworker_accept_wrapper(int fd); + +void mworker_cleanlisteners(void); + +int mworker_child_nb(void); + +int mworker_ext_launch_all(void); + +void mworker_kill_max_reloads(int sig); + +struct mworker_proc *mworker_proc_new(); +void mworker_free_child(struct mworker_proc *); +void mworker_cleanup_proc(); + +#endif /* _HAPROXY_MWORKER_H_ */ diff --git a/include/haproxy/namespace-t.h b/include/haproxy/namespace-t.h new file mode 100644 index 0000000..fe46577 --- /dev/null +++ b/include/haproxy/namespace-t.h @@ -0,0 +1,39 @@ +/* + * include/haproxy/namespace-t.h + * Linux network namespaces types definitions + * + * Copyright (C) 2014 Tamas Kovacs, Sarkozi Laszlo, Krisztian Kovacs + * Copyright (C) 2015-2020 Willy Tarreau + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_NAMESPACE_T_H +#define _HAPROXY_NAMESPACE_T_H + +#include <import/ebtree-t.h> +#include <haproxy/api-t.h> + +/* the struct is just empty if namespaces are not supported */ +struct netns_entry +{ +#ifdef USE_NS + struct ebpt_node node; + size_t name_len; + int fd; +#endif +}; + +#endif /* _HAPROXY_NAMESPACE_T_H */ diff --git a/include/haproxy/namespace.h b/include/haproxy/namespace.h new file mode 100644 index 0000000..2d6b6f8 --- /dev/null +++ b/include/haproxy/namespace.h @@ -0,0 +1,47 @@ +/* + * include/haproxy/namespace.h + * Linux network namespaces management + * + * Copyright (C) 2014 Tamas Kovacs, Sarkozi Laszlo, Krisztian Kovacs + * Copyright (C) 2015-2020 Willy Tarreau + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_NAMESPACE_H +#define _HAPROXY_NAMESPACE_H + +#include <sys/types.h> +#include <sys/socket.h> +#include <import/ebistree.h> +#include <haproxy/namespace-t.h> + +#ifdef USE_NS + +int my_socketat(const struct netns_entry *ns, int domain, int type, int protocol); +struct netns_entry* netns_store_insert(const char *ns_name); +const struct netns_entry* netns_store_lookup(const char *ns_name, size_t ns_name_len); +int netns_init(void); + +#else /* no namespace support */ + +static inline int my_socketat(const struct netns_entry *ns, int domain, int type, int protocol) +{ + return socket(domain, type, protocol); +} + +#endif /* USE_NS */ + +#endif /* _HAPROXY_NAMESPACE_H */ diff --git a/include/haproxy/ncbuf-t.h b/include/haproxy/ncbuf-t.h new file mode 100644 index 0000000..0dd958f --- /dev/null +++ b/include/haproxy/ncbuf-t.h @@ -0,0 +1,104 @@ +#ifndef _HAPROXY_NCBUF_T_H +#define _HAPROXY_NCBUF_T_H + +/* **** public documentation **** + * + * <ncbuf> stands for non-contiguous circular buffer. This type can be used to + * store data in a non-linear way with gaps between them. The buffer is + * circular and so data may wrapped. + * + * The API of <ncbuf> is split in two parts. Please refer to the public API + * declared in this header file which should cover all the needs. + * + * To minimize the memory footprint, size of data and gaps are inserted in the + * gaps themselves. This way <ncbuf> does not need to maintain a separate list + * of data offsets in a dedicated structure. However, this put some limitations + * on the buffer usage that the user need to know. + * + * First, a space will always be reserved in the allocated buffer area to store + * the size of the first data block. Use ncb_size(buf) to retrieve the usable + * size of the allocated buffer excluding the reserved space. + * + * Second, add and deletion operations are constraint and may be impossible if + * a minimal gap size between data is not respected. A caller must always + * inspect the return values of these functions. To limit these errors and + * improve the buffer performance, <ncbuf> should be reserved for use-cases + * where the number of formed gaps is kept minimal and evenly spread. + */ + +/* **** internal documentation **** + * + * This section is useful to users who need to understand how ncbuf are + * implemented. + * + * Public and internal functions all shared a common abstraction of the buffer. + * The buffer content is represented as a list of blocks, alternating between + * DATA and GAP blocks. This simplifies the buffer examination loop and + * insertion/deletion. Note that this list of blocks is not stored in the + * buffer structure. + * + * The buffer is considered to always start with a DATA block. The size of this + * block is stored just before <head> which is the pointer for offset 0. This + * space will always be reserved for this usage. It can be accessed through + * ncb_int_head(buf). If the buffer has no data at head, the reserved space + * will simply contains the value 0, and will be follow by a gap. + * + * A gap always contains the size of the gap itself and the size of the next + * data block. Here is a small representation of a gap stored at offset <x> + * before a data block at offset <y>. + * + * x y + * ------------------------------------------------------------ + * xxxxxx| GAP-SZ | DATA-SZ | | xxxxxxxxxxxxx... + * ------------------------------------------------------------ + * | -------- GAP-SZ -------------- > | --- DATA-SZ ---> + * + * This means that a gap must be at least big enough to store two sizes. + * However, there is an optimization when the last block of the buffer is a + * gap. In this case, there is no minimal size for this block. If the gap is + * too small, the two sizes won't be stored in it. This block is considered + * to be a reduced gap. The block API will detect such a gap if stored at an + * offset near the end of the buffer. + * + */ + +#include <inttypes.h> + +/* ncb_sz_t is the basic type used in ncbuf to represent data and gap sizes. + * Use a bigger type to extend the maximum data size supported in the buffer. + * On the other hand, this also increases the minimal gap size which can + * cause more rejection for add/delete operations. + */ +typedef uint32_t ncb_sz_t; + +/* reserved size before head used to store first data block size */ +#define NCB_RESERVED_SZ (sizeof(ncb_sz_t)) + +/* A gap contains its size and the size of the data following it. */ +#define NCB_GAP_MIN_SZ (sizeof(ncb_sz_t) * 2) +#define NCB_GAP_SZ_OFF 0 +#define NCB_GAP_SZ_DATA_OFF (sizeof(ncb_sz_t)) + +#define NCBUF_NULL ((struct ncbuf){ }) + +struct ncbuf { + char *area; + ncb_sz_t size; + ncb_sz_t head; +}; + +enum ncb_ret { + NCB_RET_OK = 0, /* no error */ + + NCB_RET_GAP_SIZE, /* operation would create a too small gap */ + NCB_RET_DATA_REJ, /* operation would overwrite data with different one */ +}; + +/* Define how insert is conducted in regards with already stored data. */ +enum ncb_add_mode { + NCB_ADD_PRESERVE, /* keep the already stored data and only insert in gaps */ + NCB_ADD_OVERWRT, /* overwrite old data with new ones */ + NCB_ADD_COMPARE, /* compare before insert : if new data are different do not proceed */ +}; + +#endif /* _HAPROXY_NCBUF_T_H */ diff --git a/include/haproxy/ncbuf.h b/include/haproxy/ncbuf.h new file mode 100644 index 0000000..8972793 --- /dev/null +++ b/include/haproxy/ncbuf.h @@ -0,0 +1,54 @@ +#ifndef _HAPROXY_NCBUF_H +#define _HAPROXY_NCBUF_H + +#include <haproxy/ncbuf-t.h> + +static inline int ncb_is_null(const struct ncbuf *buf) +{ + return buf->size == 0; +} + +void ncb_init(struct ncbuf *buf, ncb_sz_t head); +struct ncbuf ncb_make(char *area, ncb_sz_t size, ncb_sz_t head); + +/* Returns start of allocated buffer area. */ +static inline char *ncb_orig(const struct ncbuf *buf) +{ + return buf->area; +} + +/* Returns current head pointer into buffer area. */ +static inline char *ncb_head(const struct ncbuf *buf) +{ + return buf->area + buf->head; +} + +/* Returns the first byte after the allocated buffer area. */ +static inline char *ncb_wrap(const struct ncbuf *buf) +{ + return buf->area + buf->size; +} + +/* Returns the usable size of <buf> for data storage. This is the size of the + * allocated buffer without the reserved header space. + */ +static inline ncb_sz_t ncb_size(const struct ncbuf *buf) +{ + if (ncb_is_null(buf)) + return 0; + + return buf->size - NCB_RESERVED_SZ; +} + +ncb_sz_t ncb_total_data(const struct ncbuf *buf); +int ncb_is_empty(const struct ncbuf *buf); +int ncb_is_full(const struct ncbuf *buf); +int ncb_is_fragmented(const struct ncbuf *buf); + +ncb_sz_t ncb_data(const struct ncbuf *buf, ncb_sz_t offset); + +enum ncb_ret ncb_add(struct ncbuf *buf, ncb_sz_t off, + const char *data, ncb_sz_t len, enum ncb_add_mode mode); +enum ncb_ret ncb_advance(struct ncbuf *buf, ncb_sz_t adv); + +#endif /* _HAPROXY_NCBUF_H */ diff --git a/include/haproxy/net_helper.h b/include/haproxy/net_helper.h new file mode 100644 index 0000000..f019d30 --- /dev/null +++ b/include/haproxy/net_helper.h @@ -0,0 +1,387 @@ +/* + * include/haproxy/net_helper.h + * This file contains miscellaneous network helper functions. + * + * Copyright (C) 2017 Olivier Houchard + * Copyright (C) 2017-2020 Willy Tarreau + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HAPROXY_NET_HELPER_H +#define _HAPROXY_NET_HELPER_H + +#include <arpa/inet.h> +#include <haproxy/api.h> +#include <haproxy/intops.h> + +/* Functions to read/write various integers that may be unaligned */ + +/* Read a uint16_t in native host order */ +static inline uint16_t read_u16(const void *p) +{ + const union { uint16_t u16; } __attribute__((packed))*u = p; + return u->u16; +} + +/* Write a uint16_t in native host order */ +static inline void write_u16(void *p, const uint16_t u16) +{ + union { uint16_t u16; } __attribute__((packed))*u = p; + u->u16 = u16; +} + +/* Read a uint32_t in native host order */ +static inline uint32_t read_u32(const void *p) +{ + const union { uint32_t u32; } __attribute__((packed))*u = p; + return u->u32; +} + +/* Write a uint32_t in native host order */ +static inline void write_u32(void *p, const uint32_t u32) +{ + union { uint32_t u32; } __attribute__((packed))*u = p; + u->u32 = u32; +} + +/* Read a uint64_t in native host order */ +static inline uint64_t read_u64(const void *p) +{ + const union { uint64_t u64; } __attribute__((packed))*u = p; + return u->u64; +} + +/* Write a uint64_t in native host order */ +static inline void write_u64(void *p, const uint64_t u64) +{ + union { uint64_t u64; } __attribute__((packed))*u = p; + u->u64 = u64; +} + +/* Read a void* in native host order */ +static inline void *read_ptr(const void *p) +{ + const union { void *ptr; } __attribute__((packed))*u = p; + return u->ptr; +} + +/* Write a void* in native host order */ +static inline void write_ptr(void *p, const void *ptr) +{ + if (sizeof(ptr) == 4) + return write_u32(p, (uintptr_t)ptr); + else + return write_u64(p, (uintptr_t)ptr); +} + +/* Read a possibly wrapping number of bytes <bytes> into destination <dst>. The + * first segment is composed of <s1> bytes at p1. The remaining byte(s), if any, + * are read from <p2>. <s1> may be zero and may also be larger than <bytes>. The + * caller is always responsible for providing enough bytes. Note: the function + * is purposely *not* marked inline to let the compiler decide what to do with + * it, because it's around 34 bytes long, placed on critical path but rarely + * called, and uses uses a lot of arguments if not inlined. The compiler will + * thus decide what's best to do with it depending on the context. + */ +static void readv_bytes(void *dst, const size_t bytes, const void *p1, size_t s1, const void *p2) +{ + size_t idx; + + p2 -= s1; + for (idx = 0; idx < bytes; idx++) { + if (idx == s1) + p1 = p2; + ((uint8_t *)dst)[idx] = ((const uint8_t *)p1)[idx]; + } + /* this memory barrier is critical otherwise gcc may over-optimize this + * code, completely removing it as well as any surrounding boundary + * check (4.7.1..6.4.0)! + */ + __asm__ volatile("" ::: "memory"); +} + +/* Write a possibly wrapping number of bytes <bytes> from location <src>. The + * first segment is composed of <s1> bytes at p1. The remaining byte(s), if any, + * are written to <p2>. <s1> may be zero and may also be larger than <bytes>. + * The caller is always responsible for providing enough room. Note: the + * function is purposely *not* marked inline to let the compiler decide what to + * do with it, because it's around 34 bytes long, placed on critical path but + * rarely called, and uses uses a lot of arguments if not inlined. The compiler + * will thus decide what's best to do with it depending on the context. + */ +static void writev_bytes(const void *src, const size_t bytes, void *p1, size_t s1, void *p2) +{ + size_t idx; + + p2 -= s1; + for (idx = 0; idx < bytes; idx++) { + if (idx == s1) + p1 = p2; + ((uint8_t *)p1)[idx] = ((const uint8_t *)src)[idx]; + } +} + +/* Read a possibly wrapping uint16_t in native host order. The first segment is + * composed of <s1> bytes at p1. The remaining byte(s), if any, are read from + * <p2>. <s1> may be zero and may be larger than the type. The caller is always + * responsible for providing enough bytes. + */ +static inline uint16_t readv_u16(const void *p1, size_t s1, const void *p2) +{ + if (unlikely(s1 == 1)) { + volatile uint16_t u16; + + ((uint8_t *)&u16)[0] = *(uint8_t *)p1; + ((uint8_t *)&u16)[1] = *(uint8_t *)p2; + return u16; + } + else { + const union { uint16_t u16; } __attribute__((packed)) *u; + + u = (s1 == 0) ? p2 : p1; + return u->u16; + } +} + +/* Write a possibly wrapping uint16_t in native host order. The first segment is + * composed of <s1> bytes at p1. The remaining byte(s), if any, are written to + * <p2>. <s1> may be zero and may be larger than the type. The caller is always + * responsible for providing enough room. + */ +static inline void writev_u16(void *p1, size_t s1, void *p2, const uint16_t u16) +{ + union { uint16_t u16; } __attribute__((packed)) *u; + + if (unlikely(s1 == 1)) { + *(uint8_t *)p1 = ((const uint8_t *)&u16)[0]; + *(uint8_t *)p2 = ((const uint8_t *)&u16)[1]; + } + else { + u = (s1 == 0) ? p2 : p1; + u->u16 = u16; + } +} + +/* Read a possibly wrapping uint32_t in native host order. The first segment is + * composed of <s1> bytes at p1. The remaining byte(s), if any, are read from + * <p2>. <s1> may be zero and may be larger than the type. The caller is always + * responsible for providing enough bytes. + */ +static inline uint32_t readv_u32(const void *p1, size_t s1, const void *p2) +{ + uint32_t u32; + + if (likely(s1 >= sizeof(u32))) + u32 = read_u32(p1); + else + readv_bytes(&u32, sizeof(u32), p1, s1, p2); + return u32; +} + +/* Write a possibly wrapping uint32_t in native host order. The first segment is + * composed of <s1> bytes at p1. The remaining byte(s), if any, are written to + * <p2>. <s1> may be zero and may be larger than the type. The caller is always + * responsible for providing enough room. + */ +static inline void writev_u32(void *p1, size_t s1, void *p2, const uint32_t u32) +{ + if (likely(s1 >= sizeof(u32))) + write_u32(p1, u32); + else + writev_bytes(&u32, sizeof(u32), p1, s1, p2); +} + +/* Read a possibly wrapping uint64_t in native host order. The first segment is + * composed of <s1> bytes at p1. The remaining byte(s), if any, are read from + * <p2>. <s1> may be zero and may be larger than the type. The caller is always + * responsible for providing enough bytes. + */ +static inline uint64_t readv_u64(const void *p1, size_t s1, const void *p2) +{ + uint64_t u64; + + if (likely(s1 >= sizeof(u64))) + u64 = read_u64(p1); + else + readv_bytes(&u64, sizeof(u64), p1, s1, p2); + return u64; +} + +/* Write a possibly wrapping uint64_t in native host order. The first segment is + * composed of <s1> bytes at p1. The remaining byte(s), if any, are written to + * <p2>. <s1> may be zero and may be larger than the type. The caller is always + * responsible for providing enough room. + */ +static inline void writev_u64(void *p1, size_t s1, void *p2, const uint64_t u64) +{ + if (likely(s1 >= sizeof(u64))) + write_u64(p1, u64); + else + writev_bytes(&u64, sizeof(u64), p1, s1, p2); +} + +/* Signed integer versions : return the same data but signed */ + +/* Read an int16_t in native host order */ +static inline int16_t read_i16(const void *p) +{ + return read_u16(p); +} + +/* Read an int32_t in native host order */ +static inline int32_t read_i32(const void *p) +{ + return read_u32(p); +} + +/* Read an int64_t in native host order */ +static inline int64_t read_i64(const void *p) +{ + return read_u64(p); +} + +/* Read a possibly wrapping int16_t in native host order */ +static inline int16_t readv_i16(const void *p1, size_t s1, const void *p2) +{ + return readv_u16(p1, s1, p2); +} + +/* Read a possibly wrapping int32_t in native host order */ +static inline int32_t readv_i32(const void *p1, size_t s1, const void *p2) +{ + return readv_u32(p1, s1, p2); +} + +/* Read a possibly wrapping int64_t in native host order */ +static inline int64_t readv_i64(const void *p1, size_t s1, const void *p2) +{ + return readv_u64(p1, s1, p2); +} + +/* Read a uint16_t, and convert from network order to host order */ +static inline uint16_t read_n16(const void *p) +{ + return ntohs(read_u16(p)); +} + +/* Write a uint16_t after converting it from host order to network order */ +static inline void write_n16(void *p, const uint16_t u16) +{ + write_u16(p, htons(u16)); +} + +/* Read a uint32_t, and convert from network order to host order */ +static inline uint32_t read_n32(const void *p) +{ + return ntohl(read_u32(p)); +} + +/* Write a uint32_t after converting it from host order to network order */ +static inline void write_n32(void *p, const uint32_t u32) +{ + write_u32(p, htonl(u32)); +} + +/* Read a uint64_t, and convert from network order to host order */ +static inline uint64_t read_n64(const void *p) +{ + return my_ntohll(read_u64(p)); +} + +/* Write a uint64_t after converting it from host order to network order */ +static inline void write_n64(void *p, const uint64_t u64) +{ + write_u64(p, my_htonll(u64)); +} + +/* Read a possibly wrapping uint16_t in network order. The first segment is + * composed of <s1> bytes at p1. The remaining byte(s), if any, are read from + * <p2>. <s1> may be zero and may be larger than the type. The caller is always + * responsible for providing enough bytes. + */ +static inline uint16_t readv_n16(const void *p1, size_t s1, const void *p2) +{ + if (unlikely(s1 < 2)) { + if (s1 == 0) + p1 = p2++; + } + else + p2 = p1 + 1; + return (*(uint8_t *)p1 << 8) + *(uint8_t *)p2; +} + +/* Write a possibly wrapping uint16_t in network order. The first segment is + * composed of <s1> bytes at p1. The remaining byte(s), if any, are written to + * <p2>. <s1> may be zero and may be larger than the type. The caller is always + * responsible for providing enough room. + */ +static inline void writev_n16(const void *p1, size_t s1, const void *p2, const uint16_t u16) +{ + if (unlikely(s1 < 2)) { + if (s1 == 0) + p1 = p2++; + } + else + p2 = p1 + 1; + *(uint8_t *)p1 = u16 >> 8; + *(uint8_t *)p2 = u16; +} + +/* Read a possibly wrapping uint32_t in network order. The first segment is + * composed of <s1> bytes at p1. The remaining byte(s), if any, are read from + * <p2>. <s1> may be zero and may be larger than the type. The caller is always + * responsible for providing enough bytes. + */ +static inline uint32_t readv_n32(const void *p1, size_t s1, const void *p2) +{ + return ntohl(readv_u32(p1, s1, p2)); +} + +/* Write a possibly wrapping uint32_t in network order. The first segment is + * composed of <s1> bytes at p1. The remaining byte(s), if any, are written to + * <p2>. <s1> may be zero and may be larger than the type. The caller is always + * responsible for providing enough room. + */ +static inline void writev_n32(void *p1, size_t s1, void *p2, const uint32_t u32) +{ + writev_u32(p1, s1, p2, htonl(u32)); +} + +/* Read a possibly wrapping uint64_t in network order. The first segment is + * composed of <s1> bytes at p1. The remaining byte(s), if any, are read from + * <p2>. <s1> may be zero and may be larger than the type. The caller is always + * responsible for providing enough bytes. + */ +static inline uint64_t readv_n64(const void *p1, size_t s1, const void *p2) +{ + return my_ntohll(readv_u64(p1, s1, p2)); +} + +/* Write a possibly wrapping uint64_t in network order. The first segment is + * composed of <s1> bytes at p1. The remaining byte(s), if any, are written to + * <p2>. <s1> may be zero and may be larger than the type. The caller is always + * responsible for providing enough room. + */ +static inline void writev_n64(void *p1, size_t s1, void *p2, const uint64_t u64) +{ + writev_u64(p1, s1, p2, my_htonll(u64)); +} + +#endif /* HAPROXY_NET_HELPER_H */ diff --git a/include/haproxy/obj_type-t.h b/include/haproxy/obj_type-t.h new file mode 100644 index 0000000..517d230 --- /dev/null +++ b/include/haproxy/obj_type-t.h @@ -0,0 +1,56 @@ +/* + * include/haproxy/obj_type-t.h + * This file declares some object types for use in various structures. + * + * Copyright (C) 2000-2013 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_OBJ_TYPE_T_H +#define _HAPROXY_OBJ_TYPE_T_H + +/* The principle is to be able to change the type of a pointer by pointing + * it directly to an object type. The object type indicates the format of the + * structure holing the type, and this is used to retrieve the pointer to the + * beginning of the structure. Doing so saves us from having to maintain both + * a pointer and a type for elements such as connections which can point to + * various types of objects. + */ + +/* object types : these ones take the same space as a char */ +enum obj_type { + OBJ_TYPE_NONE = 0, /* pointer is NULL by definition */ + OBJ_TYPE_LISTENER, /* object is a struct listener */ + OBJ_TYPE_PROXY, /* object is a struct proxy */ + OBJ_TYPE_SERVER, /* object is a struct server */ + OBJ_TYPE_APPLET, /* object is a struct applet */ + OBJ_TYPE_APPCTX, /* object is a struct appctx */ + OBJ_TYPE_CONN, /* object is a struct connection */ + OBJ_TYPE_SRVRQ, /* object is a struct dns_srvrq */ + OBJ_TYPE_SC, /* object is a struct stconn */ + OBJ_TYPE_STREAM, /* object is a struct stream */ + OBJ_TYPE_CHECK, /* object is a struct check */ + OBJ_TYPE_ENTRIES /* last one : number of entries */ +} __attribute__((packed)) ; + +#endif /* _HAPROXY_OBJ_TYPE_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/obj_type.h b/include/haproxy/obj_type.h new file mode 100644 index 0000000..1037460 --- /dev/null +++ b/include/haproxy/obj_type.h @@ -0,0 +1,213 @@ +/* + * include/haproxy/obj_type.h + * This file contains function prototypes to manipulate object types + * + * Copyright (C) 2000-2013 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_OBJ_TYPE_H +#define _HAPROXY_OBJ_TYPE_H + +#include <haproxy/api.h> +#include <haproxy/applet-t.h> +#include <haproxy/check-t.h> +#include <haproxy/connection-t.h> +#include <haproxy/listener-t.h> +#include <haproxy/obj_type-t.h> +#include <haproxy/pool.h> +#include <haproxy/proxy-t.h> +#include <haproxy/server-t.h> +#include <haproxy/stream-t.h> + +static inline enum obj_type obj_type(const enum obj_type *t) +{ + if (!t || *t >= OBJ_TYPE_ENTRIES) + return OBJ_TYPE_NONE; + return *t; +} + +static inline const char *obj_type_name(const enum obj_type *t) +{ + switch (obj_type(t)) { + case OBJ_TYPE_NONE: return "NONE"; + case OBJ_TYPE_LISTENER: return "LISTENER"; + case OBJ_TYPE_PROXY: return "PROXY"; + case OBJ_TYPE_SERVER: return "SERVER"; + case OBJ_TYPE_APPLET: return "APPLET"; + case OBJ_TYPE_APPCTX: return "APPCTX"; + case OBJ_TYPE_CONN: return "CONN"; + case OBJ_TYPE_SRVRQ: return "SRVRQ"; + case OBJ_TYPE_SC: return "SC"; + case OBJ_TYPE_STREAM: return "STREAM"; + case OBJ_TYPE_CHECK: return "CHECK"; + default: return "!INVAL!"; + } +} + +/* Note: for convenience, we provide two versions of each function : + * - __objt_<type> : converts the pointer without any control of its + * value nor type. + * - objt_<type> : same as above except that if the pointer is NULL + * or points to a non-matching type, NULL is returned instead. + */ + +static inline struct listener *__objt_listener(enum obj_type *t) +{ + return container_of(t, struct listener, obj_type); +} + +static inline struct listener *objt_listener(enum obj_type *t) +{ + if (!t || *t != OBJ_TYPE_LISTENER) + return NULL; + return __objt_listener(t); +} + +static inline struct proxy *__objt_proxy(enum obj_type *t) +{ + return container_of(t, struct proxy, obj_type); +} + +static inline struct proxy *objt_proxy(enum obj_type *t) +{ + if (!t || *t != OBJ_TYPE_PROXY) + return NULL; + return __objt_proxy(t); +} + +static inline struct server *__objt_server(enum obj_type *t) +{ + return container_of(t, struct server, obj_type); +} + +static inline struct server *objt_server(enum obj_type *t) +{ + if (!t || *t != OBJ_TYPE_SERVER) + return NULL; + return __objt_server(t); +} + +static inline struct applet *__objt_applet(enum obj_type *t) +{ + return container_of(t, struct applet, obj_type); +} + +static inline struct applet *objt_applet(enum obj_type *t) +{ + if (!t || *t != OBJ_TYPE_APPLET) + return NULL; + return __objt_applet(t); +} + +static inline struct appctx *__objt_appctx(enum obj_type *t) +{ + return container_of(t, struct appctx, obj_type); +} + +static inline struct appctx *objt_appctx(enum obj_type *t) +{ + if (!t || *t != OBJ_TYPE_APPCTX) + return NULL; + return __objt_appctx(t); +} + +static inline struct stconn *__objt_sc(enum obj_type *t) +{ + return (container_of(t, struct stconn, obj_type)); +} + +static inline struct stconn *objt_sc(enum obj_type *t) +{ + if (!t || *t != OBJ_TYPE_SC) + return NULL; + return __objt_sc(t); +} + +static inline struct connection *__objt_conn(enum obj_type *t) +{ + return container_of(t, struct connection, obj_type); +} + +static inline struct connection *objt_conn(enum obj_type *t) +{ + if (!t || *t != OBJ_TYPE_CONN) + return NULL; + return __objt_conn(t); +} + +static inline struct resolv_srvrq *__objt_resolv_srvrq(enum obj_type *t) +{ + return container_of(t, struct resolv_srvrq, obj_type); +} + +static inline struct resolv_srvrq *objt_resolv_srvrq(enum obj_type *t) +{ + if (!t || *t != OBJ_TYPE_SRVRQ) + return NULL; + return __objt_resolv_srvrq(t); +} + +static inline struct stream *__objt_stream(enum obj_type *t) +{ + return container_of(t, struct stream, obj_type); +} + +static inline struct stream *objt_stream(enum obj_type *t) +{ + if (!t || *t != OBJ_TYPE_STREAM) + return NULL; + return __objt_stream(t); +} + +static inline struct check *__objt_check(enum obj_type *t) +{ + return container_of(t, struct check, obj_type); +} + +static inline struct check *objt_check(enum obj_type *t) +{ + if (!t || *t != OBJ_TYPE_CHECK) + return NULL; + return __objt_check(t); +} + +static inline void *obj_base_ptr(enum obj_type *t) +{ + switch (obj_type(t)) { + case OBJ_TYPE_NONE: return NULL; + case OBJ_TYPE_LISTENER: return __objt_listener(t); + case OBJ_TYPE_PROXY: return __objt_proxy(t); + case OBJ_TYPE_SERVER: return __objt_server(t); + case OBJ_TYPE_APPLET: return __objt_applet(t); + case OBJ_TYPE_APPCTX: return __objt_appctx(t); + case OBJ_TYPE_CONN: return __objt_conn(t); + case OBJ_TYPE_SRVRQ: return __objt_resolv_srvrq(t); + case OBJ_TYPE_SC: return __objt_sc(t); + case OBJ_TYPE_STREAM: return __objt_stream(t); + case OBJ_TYPE_CHECK: return __objt_check(t); + default: return t; // exact pointer for invalid case + } +} + +#endif /* _HAPROXY_OBJ_TYPE_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/openssl-compat.h b/include/haproxy/openssl-compat.h new file mode 100644 index 0000000..5639468 --- /dev/null +++ b/include/haproxy/openssl-compat.h @@ -0,0 +1,487 @@ +#ifndef _HAPROXY_OPENSSL_COMPAT_H +#define _HAPROXY_OPENSSL_COMPAT_H +#ifdef USE_OPENSSL + +#ifdef USE_OPENSSL_WOLFSSL +#define TLSEXT_MAXLEN_host_name 255 +#include <wolfssl/options.h> +#endif + +#ifdef USE_OPENSSL_AWSLC +#include <openssl/base.h> +#if !defined(OPENSSL_IS_AWSLC) +#error "USE_OPENSSL_AWSLC is set but OPENSSL_IS_AWSLC is not defined, wrong header files detected" +#endif +#endif + +#include <openssl/bn.h> +#include <openssl/crypto.h> +#include <openssl/ssl.h> +#include <openssl/x509.h> +#include <openssl/x509v3.h> +#include <openssl/err.h> +#include <openssl/rand.h> +#include <openssl/hmac.h> +#include <openssl/rsa.h> +#if (defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) +#include <openssl/ocsp.h> +#endif +#ifndef OPENSSL_NO_DH +#include <openssl/dh.h> +#endif +#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE) +#include <openssl/engine.h> +#endif + +#ifdef SSL_MODE_ASYNC +#include <openssl/async.h> +#endif + +#if (OPENSSL_VERSION_NUMBER >= 0x3000000fL) +#include <openssl/core_names.h> +#include <openssl/decoder.h> +#include <openssl/param_build.h> +#include <openssl/provider.h> +#endif + +#ifdef USE_QUIC_OPENSSL_COMPAT +#include <haproxy/quic_openssl_compat.h> +#endif + +#if defined(LIBRESSL_VERSION_NUMBER) +/* LibreSSL is a fork of OpenSSL 1.0.1g but pretends to be 2.0.0, thus + * systematically breaking when some code is written for a specific version + * of OpenSSL. Let's make it appear like what it really is and deal with + * extra features with ORs and not with AND NOT. + */ +#define HA_OPENSSL_VERSION_NUMBER 0x1000107fL +#else /* this is for a real OpenSSL or a truly compatible derivative */ +#define HA_OPENSSL_VERSION_NUMBER OPENSSL_VERSION_NUMBER +#endif + +#ifndef OPENSSL_VERSION +#define OPENSSL_VERSION SSLEAY_VERSION +#define OpenSSL_version(x) SSLeay_version(x) +#define OpenSSL_version_num SSLeay +#endif + +#if (defined(LIBRESSL_VERSION_NUMBER) && LIBRESSL_VERSION_NUMBER >= 0x2070100fL) || defined(OPENSSL_IS_BORINGSSL) || (!defined(LIBRESSL_VERSION_NUMBER) && (OPENSSL_VERSION_NUMBER >= 0x10100000L)) +#define HAVE_SSL_EXTRACT_RANDOM +#endif + +#if ((OPENSSL_VERSION_NUMBER >= 0x10101000L) && !defined(OPENSSL_IS_BORINGSSL) && !defined(LIBRESSL_VERSION_NUMBER)) +#define HAVE_SSL_RAND_KEEP_RANDOM_DEVICES_OPEN +#endif + +#if ((OPENSSL_VERSION_NUMBER >= 0x10101000L) && !defined(LIBRESSL_VERSION_NUMBER) && !defined(OPENSSL_IS_BORINGSSL)) || defined(USE_OPENSSL_WOLFSSL) +#define HAVE_SSL_CTX_SET_CIPHERSUITES +#define HAVE_ASN1_TIME_TO_TM +#endif + +#if (defined(SSL_CLIENT_HELLO_CB) || defined(OPENSSL_IS_BORINGSSL)) +#define HAVE_SSL_CLIENT_HELLO_CB +#endif + +#if ((OPENSSL_VERSION_NUMBER >= 0x1000200fL) && !defined(OPENSSL_NO_TLSEXT) && !defined(LIBRESSL_VERSION_NUMBER) && !defined(OPENSSL_IS_BORINGSSL)) +#define HAVE_SSL_CTX_ADD_SERVER_CUSTOM_EXT +#endif + +#if ((OPENSSL_VERSION_NUMBER >= 0x10002000L) && !defined(LIBRESSL_VERSION_NUMBER)) +#define HAVE_SSL_CTX_get0_privatekey +#endif + +#if HA_OPENSSL_VERSION_NUMBER >= 0x1000104fL || defined(USE_OPENSSL_WOLFSSL) || defined(USE_OPENSSL_AWSLC) +/* CRYPTO_memcmp() is present since openssl 1.0.1d */ +#define HAVE_CRYPTO_memcmp +#endif + +#if (defined(SN_ct_cert_scts) && !defined(OPENSSL_NO_TLSEXT)) +#define HAVE_SSL_SCTL +#endif + +#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L) || defined(USE_OPENSSL_AWSLC) || (defined(USE_OPENSSL_WOLFSSL) && defined(HAVE_SECRET_CALLBACK)) +#define HAVE_SSL_KEYLOG +#endif + +/* minimum OpenSSL 1.1.1 & libreSSL 3.3.6 */ +#if (defined(LIBRESSL_VERSION_NUMBER) && (LIBRESSL_VERSION_NUMBER >= 0x3030600L)) || (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L) || defined(USE_OPENSSL_WOLFSSL) +#define HAVE_SSL_get0_verified_chain +#endif + + +#if (HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL) +#define HAVE_OSSL_PARAM +#define MAC_CTX EVP_MAC_CTX +#define HASSL_DH EVP_PKEY +#define HASSL_DH_free EVP_PKEY_free +#define HASSL_DH_up_ref EVP_PKEY_up_ref + +#define HAVE_SSL_PROVIDERS + +#else /* HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL */ +#define MAC_CTX HMAC_CTX +#define HASSL_DH DH +#define HASSL_DH_free DH_free +#define HASSL_DH_up_ref DH_up_ref +#endif + +#if ((HA_OPENSSL_VERSION_NUMBER < 0x1000000fL) && !defined(X509_get_X509_PUBKEY)) +#define X509_get_X509_PUBKEY(x) ((x)->cert_info->key) +#endif + + +#if (HA_OPENSSL_VERSION_NUMBER < 0x1000100fL) +/* + * Functions introduced in OpenSSL 1.0.1 + */ +static inline int SSL_SESSION_set1_id_context(SSL_SESSION *s, const unsigned char *sid_ctx, unsigned int sid_ctx_len) +{ + s->sid_ctx_length = sid_ctx_len; + memcpy(s->sid_ctx, sid_ctx, sid_ctx_len); + return 1; +} +#endif + + +#if (HA_OPENSSL_VERSION_NUMBER < 0x1000200fL) && (!defined(LIBRESSL_VERSION_NUMBER) || LIBRESSL_VERSION_NUMBER < 0x2070500fL) +/* introduced in openssl 1.0.2 */ + +static inline STACK_OF(X509) *X509_chain_up_ref(STACK_OF(X509) *chain) +{ + STACK_OF(X509) *ret; + int i; + + if ((ret = sk_X509_dup(chain)) == NULL) + return NULL; + for (i = 0; i < sk_X509_num(ret); i++) { + X509 *x = sk_X509_value(ret, i); + CRYPTO_add(&x->references, 1, CRYPTO_LOCK_X509); + } + return ret; +} + +#endif + +#ifdef OPENSSL_IS_BORINGSSL +/* + * Functions missing in BoringSSL + */ + +static inline X509_CRL *X509_OBJECT_get0_X509_CRL(const X509_OBJECT *a) +{ + if (a == NULL || a->type != X509_LU_CRL) { + return NULL; + } + return a->data.crl; +} +#endif + +#if (HA_OPENSSL_VERSION_NUMBER < 0x1010000fL) && (!defined(LIBRESSL_VERSION_NUMBER) || LIBRESSL_VERSION_NUMBER < 0x2070000fL) +/* + * Functions introduced in OpenSSL 1.1.0 and in LibreSSL 2.7.0 + */ + +static inline STACK_OF(X509_OBJECT) *X509_STORE_get0_objects(X509_STORE *st) +{ + return st->objs; +} + +static inline int X509_OBJECT_get_type(const X509_OBJECT *a) +{ + return a->type; +} + +static inline X509 *X509_OBJECT_get0_X509(const X509_OBJECT *a) +{ + if (a == NULL || a->type != X509_LU_X509) { + return NULL; + } + return a->data.x509; +} + +static inline X509_CRL *X509_OBJECT_get0_X509_CRL(const X509_OBJECT *a) +{ + if (a == NULL || a->type != X509_LU_CRL) { + return NULL; + } + return a->data.crl; +} + +static inline int SSL_SESSION_set1_id(SSL_SESSION *s, const unsigned char *sid, unsigned int sid_len) +{ + s->session_id_length = sid_len; + memcpy(s->session_id, sid, sid_len); + return 1; +} + +static inline X509_ALGOR *X509_get0_tbs_sigalg(const X509 *x) +{ + return x->cert_info->signature; +} + +#if (!defined OPENSSL_NO_OCSP) +static inline const OCSP_CERTID *OCSP_SINGLERESP_get0_id(const OCSP_SINGLERESP *single) +{ + return single->certId; +} +#endif + +#ifndef OPENSSL_NO_DH +static inline int DH_set0_pqg(DH *dh, BIGNUM *p, BIGNUM *q, BIGNUM *g) +{ + /* Implements only the bare necessities for HAProxy */ + dh->p = p; + dh->g = g; + return 1; +} +#endif + +static inline const unsigned char *ASN1_STRING_get0_data(const ASN1_STRING *x) +{ + return x->data; +} + +static inline void X509_up_ref(X509 *x) +{ + CRYPTO_add(&x->references, 1, CRYPTO_LOCK_X509); +} + +static inline void EVP_PKEY_up_ref(EVP_PKEY *pkey) +{ + CRYPTO_add(&pkey->references, 1, CRYPTO_LOCK_EVP_PKEY); +} + +static inline void SSL_CTX_up_ref(SSL_CTX *ctx) +{ + CRYPTO_add(&ctx->references, 1, CRYPTO_LOCK_SSL_CTX); +} + +static inline int X509_CRL_get_signature_nid(const X509_CRL *crl) +{ + return OBJ_obj2nid(crl->sig_alg->algorithm); +} + +static inline const ASN1_TIME *X509_CRL_get0_lastUpdate(const X509_CRL *crl) +{ + return X509_CRL_get_lastUpdate(crl); +} + +static inline const ASN1_TIME *X509_CRL_get0_nextUpdate(const X509_CRL *crl) +{ + return X509_CRL_get_nextUpdate(crl); +} + +static inline const ASN1_INTEGER *X509_REVOKED_get0_serialNumber(const X509_REVOKED *x) +{ + return x->serialNumber; +} + +static inline const ASN1_TIME *X509_REVOKED_get0_revocationDate(const X509_REVOKED *x) +{ + return x->revocationDate; +} + +static inline X509 *X509_STORE_CTX_get0_cert(X509_STORE_CTX *ctx) +{ + return ctx->cert; +} + +static inline int ECDSA_SIG_set0(ECDSA_SIG *sig, BIGNUM *r, BIGNUM *s) +{ + if (r == NULL || s == NULL) + return 0; + BN_clear_free(sig->r); + BN_clear_free(sig->s); + + sig->r = r; + sig->s = s; + return 1; +} + +#endif + +#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL) +#if defined(SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB) +#define SSL_CTX_set_tlsext_ticket_key_evp_cb SSL_CTX_set_tlsext_ticket_key_cb +#endif + +/* + * Functions introduced in OpenSSL 3.0.0 + */ +static inline unsigned long ERR_peek_error_func(const char **func) +{ + unsigned long ret = ERR_peek_error(); + if (ret == 0) + return ret; + + if (func) + *func = ERR_func_error_string(ret); + + return ret; +} + +#endif + +#if (HA_OPENSSL_VERSION_NUMBER >= 0x1010000fL) || (defined(LIBRESSL_VERSION_NUMBER) && LIBRESSL_VERSION_NUMBER >= 0x2070200fL) +#define __OPENSSL_110_CONST__ const +#else +#define __OPENSSL_110_CONST__ +#endif + +/* ERR_remove_state() was deprecated in 1.0.0 in favor of + * ERR_remove_thread_state(), which was in turn deprecated in + * 1.1.0 and does nothing anymore. Let's simply silently kill + * it. + */ +#if (HA_OPENSSL_VERSION_NUMBER >= 0x1010000fL) +#undef ERR_remove_state +#define ERR_remove_state(x) +#endif + + +/* RAND_pseudo_bytes() is deprecated in 1.1.0 in favor of RAND_bytes(). Note + * that the return codes differ, but it happens that the only use case (ticket + * key update) was already wrong, considering a non-cryptographic random as a + * failure. + */ +#if (HA_OPENSSL_VERSION_NUMBER >= 0x1010000fL) +#undef RAND_pseudo_bytes +#define RAND_pseudo_bytes(x,y) RAND_bytes(x,y) +#endif + + +/* Signature from RFC 5246, missing in openssl < 1.0.1 */ +#ifndef TLSEXT_signature_anonymous +#define TLSEXT_signature_anonymous 0 +#define TLSEXT_signature_rsa 1 +#define TLSEXT_signature_dsa 2 +#define TLSEXT_signature_ecdsa 3 +#endif + +#if ((HA_OPENSSL_VERSION_NUMBER < 0x1010000fL) && (!defined(LIBRESSL_VERSION_NUMBER) || LIBRESSL_VERSION_NUMBER < 0x2070000fL)) ||\ + defined(OPENSSL_IS_BORINGSSL) +#define X509_getm_notBefore X509_get_notBefore +#define X509_getm_notAfter X509_get_notAfter +#endif + +#if !defined(EVP_CTRL_AEAD_SET_IVLEN) +#define EVP_CTRL_AEAD_SET_IVLEN EVP_CTRL_GCM_SET_IVLEN +#endif + +#if !defined(EVP_CTRL_AEAD_SET_TAG) +#define EVP_CTRL_AEAD_SET_TAG EVP_CTRL_GCM_SET_TAG +#endif + +/* Supported hash function for TLS tickets */ +#ifdef OPENSSL_NO_SHA256 +#define TLS_TICKET_HASH_FUNCT EVP_sha1 +#else +#define TLS_TICKET_HASH_FUNCT EVP_sha256 +#endif /* OPENSSL_NO_SHA256 */ + +#ifndef SSL_OP_CIPHER_SERVER_PREFERENCE /* needs OpenSSL >= 0.9.7 */ +#define SSL_OP_CIPHER_SERVER_PREFERENCE 0 +#endif + +#ifndef SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION /* needs OpenSSL >= 0.9.7 */ +#define SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION 0 +#define SSL_renegotiate_pending(arg) 0 +#endif + +#ifndef SSL_OP_SINGLE_ECDH_USE /* needs OpenSSL >= 0.9.8 */ +#define SSL_OP_SINGLE_ECDH_USE 0 +#endif + +#ifndef SSL_OP_NO_TICKET /* needs OpenSSL >= 0.9.8 */ +#define SSL_OP_NO_TICKET 0 +#endif + +#ifndef SSL_OP_NO_COMPRESSION /* needs OpenSSL >= 0.9.9 */ +#define SSL_OP_NO_COMPRESSION 0 +#endif + +#ifdef OPENSSL_NO_SSL3 /* SSLv3 support removed */ +#undef SSL_OP_NO_SSLv3 +#define SSL_OP_NO_SSLv3 0 +#endif + +#ifndef SSL_OP_NO_TLSv1_1 /* needs OpenSSL >= 1.0.1 */ +#define SSL_OP_NO_TLSv1_1 0 +#endif + +#ifndef SSL_OP_NO_TLSv1_2 /* needs OpenSSL >= 1.0.1 */ +#define SSL_OP_NO_TLSv1_2 0 +#endif + +#ifndef SSL_OP_NO_TLSv1_3 /* needs OpenSSL >= 1.1.1 */ +#define SSL_OP_NO_TLSv1_3 0 +#endif + +#ifndef SSL_OP_SINGLE_DH_USE /* needs OpenSSL >= 0.9.6 */ +#define SSL_OP_SINGLE_DH_USE 0 +#endif + +#ifndef SSL_OP_SINGLE_ECDH_USE /* needs OpenSSL >= 1.0.0 */ +#define SSL_OP_SINGLE_ECDH_USE 0 +#endif + +#ifndef SSL_MODE_RELEASE_BUFFERS /* needs OpenSSL >= 1.0.0 */ +#define SSL_MODE_RELEASE_BUFFERS 0 +#endif + +#ifndef SSL_MODE_SMALL_BUFFERS /* needs small_records.patch */ +#define SSL_MODE_SMALL_BUFFERS 0 +#endif + +#ifndef SSL_OP_PRIORITIZE_CHACHA /* needs OpenSSL >= 1.1.1 */ +#define SSL_OP_PRIORITIZE_CHACHA 0 +#endif + +#ifndef SSL_CTRL_GET_EXTRA_CHAIN_CERTS +#define SSL_CTX_get_extra_chain_certs(ctx, chain) do { *(chain) = (ctx)->extra_certs; } while (0) +#endif + +#if HA_OPENSSL_VERSION_NUMBER < 0x10100000L && (!defined(LIBRESSL_VERSION_NUMBER) || LIBRESSL_VERSION_NUMBER < 0x2070000fL) +#define BIO_get_data(b) (b)->ptr +#define BIO_set_data(b, v) do { (b)->ptr = (v); } while (0) +#define BIO_set_init(b, v) do { (b)->init = (v); } while (0) + +#define BIO_meth_free(m) free(m) +#define BIO_meth_new(type, name) calloc(1, sizeof(BIO_METHOD)) +#define BIO_meth_set_gets(m, f) do { (m)->bgets = (f); } while (0) +#define BIO_meth_set_puts(m, f) do { (m)->bputs = (f); } while (0) +#define BIO_meth_set_read(m, f) do { (m)->bread = (f); } while (0) +#define BIO_meth_set_write(m, f) do { (m)->bwrite = (f); } while (0) +#define BIO_meth_set_create(m, f) do { (m)->create = (f); } while (0) +#define BIO_meth_set_ctrl(m, f) do { (m)->ctrl = (f); } while (0) +#define BIO_meth_set_destroy(m, f) do { (m)->destroy = (f); } while (0) +#endif + +#ifndef SSL_CTX_set_ecdh_auto +#define SSL_CTX_set_ecdh_auto(dummy, onoff) ((onoff) != 0) +#endif + +/* The EVP_MD_CTX_create() and EVP_MD_CTX_destroy() functions were renamed to + * EVP_MD_CTX_new() and EVP_MD_CTX_free() in OpenSSL 1.1.0, respectively. + */ +#if (HA_OPENSSL_VERSION_NUMBER < 0x1010000fL) +#define EVP_MD_CTX_new EVP_MD_CTX_create +#define EVP_MD_CTX_free EVP_MD_CTX_destroy +#endif + +/* OpenSSL 1.0.2 and onwards define SSL_CTX_set1_curves_list which is both a + * function and a macro. OpenSSL 1.0.2 to 1.1.0 define SSL_CTRL_SET_CURVES_LIST + * as a macro, which disappeared from 1.1.1. BoringSSL only has that one and + * not the former macro but it does have the function. Let's keep the test on + * the macro matching the function name. + */ +#if !defined(SSL_CTX_set1_curves_list) && defined(SSL_CTRL_SET_CURVES_LIST) +#define SSL_CTX_set1_curves_list SSL_CTX_set1_curves_list +#endif + +#if !defined(SSL_CTX_set1_sigalgs_list) && defined(SSL_CTRL_SET_SIGALGS_LIST) +#define SSL_CTX_set1_sigalgs_list SSL_CTX_set1_sigalgs_list +#endif + +#endif /* USE_OPENSSL */ +#endif /* _HAPROXY_OPENSSL_COMPAT_H */ diff --git a/include/haproxy/pattern-t.h b/include/haproxy/pattern-t.h new file mode 100644 index 0000000..6c1ba24 --- /dev/null +++ b/include/haproxy/pattern-t.h @@ -0,0 +1,235 @@ +/* + * include/haproxy/pattern-t.h + * This file provides structures and types for ACLs. + * + * Copyright (C) 2000-2012 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PATTERN_T_H +#define _HAPROXY_PATTERN_T_H + +#include <import/ebtree-t.h> + +#include <haproxy/api-t.h> +#include <haproxy/regex-t.h> +#include <haproxy/sample_data-t.h> +#include <haproxy/thread-t.h> + + +/* Pattern matching function result. + * + * We're using a 3-state matching system to match samples against patterns in + * ACLs : + * - PASS : at least one pattern already matches + * - MISS : some data is missing to decide if some rules may finally match. + * - FAIL : no pattern may ever match + * + * We assign values 0, 1 and 3 to FAIL, MISS and PASS respectively, so that we + * can make use of standard arithmetic for the truth tables below : + * + * x | !x x&y | F(0) | M(1) | P(3) x|y | F(0) | M(1) | P(3) + * ------+----- -----+------+------+----- -----+------+------+----- + * F(0) | P(3) F(0)| F(0) | F(0) | F(0) F(0)| F(0) | M(1) | P(3) + * M(1) | M(1) M(1)| F(0) | M(1) | M(1) M(1)| M(1) | M(1) | P(3) + * P(3) | F(0) P(3)| F(0) | M(1) | P(3) P(3)| P(3) | P(3) | P(3) + * + * neg(x) = (3 >> x) and(x,y) = (x & y) or(x,y) = (x | y) + * + * For efficiency, the ACL return flags are directly mapped from the pattern + * match flags. A pattern can't return "MISS" since it's always presented an + * existing sample. So that leaves us with only two possible values : + * MATCH = 0 + * NOMATCH = 3 + */ +enum pat_match_res { + PAT_NOMATCH = 0, /* sample didn't match any pattern */ + PAT_MATCH = 3, /* sample matched at least one pattern */ +}; + +/* possible flags for patterns matching or parsing */ +enum { + PAT_MF_IGNORE_CASE = 1 << 0, /* ignore case */ + PAT_MF_NO_DNS = 1 << 1, /* don't perform any DNS requests */ +}; + +/* possible flags for patterns storage */ +enum { + PAT_SF_TREE = 1 << 0, /* some patterns are arranged in a tree */ + PAT_SF_REGFREE = 1 << 1, /* run regex_free() on the pointer */ +}; + +/* ACL match methods */ +enum { + PAT_MATCH_FOUND, /* just ensure that fetch found the sample */ + PAT_MATCH_BOOL, /* match fetch's integer value as boolean */ + PAT_MATCH_INT, /* unsigned integer (int) */ + PAT_MATCH_IP, /* IPv4/IPv6 address (IP) */ + PAT_MATCH_BIN, /* hex string (bin) */ + PAT_MATCH_LEN, /* string length (str -> int) */ + PAT_MATCH_STR, /* exact string match (str) */ + PAT_MATCH_BEG, /* beginning of string (str) */ + PAT_MATCH_SUB, /* substring (str) */ + PAT_MATCH_DIR, /* directory-like sub-string (str) */ + PAT_MATCH_DOM, /* domain-like sub-string (str) */ + PAT_MATCH_END, /* end of string (str) */ + PAT_MATCH_REG, /* regex (str -> reg) */ + PAT_MATCH_REGM, /* regex (str -> reg) with match zones */ + /* keep this one last */ + PAT_MATCH_NUM +}; + +#define PAT_REF_MAP 0x1 /* Set if the reference is used by at least one map. */ +#define PAT_REF_ACL 0x2 /* Set if the reference is used by at least one acl. */ +#define PAT_REF_SMP 0x4 /* Flag used if the reference contains a sample. */ + +/* This struct contain a list of reference strings for dunamically + * updatable patterns. + */ +struct pat_ref { + struct list list; /* Used to chain refs. */ + char *reference; /* The reference name. */ + char *display; /* String displayed to identify the pattern origin. */ + struct list head; /* The head of the list of struct pat_ref_elt. */ + struct eb_root ebmb_root; /* The tree where pattern reference elements are attached. */ + struct list pat; /* The head of the list of struct pattern_expr. */ + unsigned int flags; /* flags PAT_REF_*. */ + unsigned int curr_gen; /* current generation number (anything below can be removed) */ + unsigned int next_gen; /* next generation number (insertions use this one) */ + int unique_id; /* Each pattern reference have unique id. */ + unsigned long long revision; /* updated for each update */ + unsigned long long entry_cnt; /* the total number of entries */ + THREAD_ALIGN(64); + __decl_thread(HA_RWLOCK_T lock); /* Lock used to protect pat ref elements */ +}; + +/* This is a part of struct pat_ref. Each entry contains one pattern and one + * associated value as original string. All derivative forms (via exprs) are + * accessed from list_head or tree_head. Be careful, it's variable-sized! + */ +struct pat_ref_elt { + struct list list; /* Used to chain elements. */ + struct list back_refs; /* list of users tracking this pat ref */ + void *list_head; /* all &pattern_list->from_ref derived from this reference, ends with NULL */ + void *tree_head; /* all &pattern_tree->from_ref derived from this reference, ends with NULL */ + char *sample; + unsigned int gen_id; /* generation of pat_ref this was made for */ + int line; + struct ebmb_node node; /* Node to attach this element to its <pat_ref> ebtree. */ + const char pattern[0]; // const only to make sure nobody tries to free it. +}; + +/* This contain each tree indexed entry. This struct permit to associate + * "sample" with a tree entry. It is used with maps. + */ +struct pattern_tree { + void *from_ref; // pattern_tree linked from pat_ref_elt, ends with NULL + struct sample_data *data; + struct pat_ref_elt *ref; + struct pattern_expr *expr; + struct ebmb_node node; +}; + +/* This describes one ACL pattern, which might be a single value or a tree of + * values. All patterns for a single ACL expression are linked together. Some + * of them might have a type (eg: IP). Right now, the types are shared with + * the samples, though it is possible that in the future this will change to + * accommodate for other types (eg: meth, regex). Unsigned and constant types + * are preferred when there is a doubt. + */ +struct pattern { + int type; /* type of the ACL pattern (SMP_T_*) */ + union { + int i; /* integer value */ + struct { + signed long long min, max; + unsigned int min_set:1; + unsigned int max_set:1; + } range; /* integer range */ + struct { + struct in_addr addr; + struct in_addr mask; + } ipv4; /* IPv4 address */ + struct { + struct in6_addr addr; + unsigned char mask; /* number of bits */ + } ipv6; /* IPv6 address/mask */ + } val; /* direct value */ + union { + void *ptr; /* any data */ + char *str; /* any string */ + struct my_regex *reg; /* a compiled regex */ + } ptr; /* indirect values, allocated or NULL */ + int len; /* data length when required */ + int sflags; /* flags relative to the storage method. */ + struct sample_data *data; /* used to store a pointer to sample value associated + with the match. It is used with maps */ + struct pat_ref_elt *ref; +}; + +/* This struct is just used for chaining patterns */ +struct pattern_list { + void *from_ref; // pattern_tree linked from pat_ref_elt, ends with NULL + struct list list; + struct pattern pat; + struct pattern_expr *expr; +}; + +/* Description of a pattern expression. + * It contains pointers to the parse and match functions, and a list or tree of + * patterns to test against. The structure is organized so that the hot parts + * are grouped together in order to optimize caching. + */ +struct pattern_expr { + struct list list; /* Used for chaining pattern_expr in pat_ref. */ + struct pat_ref *ref; /* The pattern reference if exists. */ + struct pattern_head *pat_head; /* Point to the pattern_head that contain manipulation functions. + * Note that this link point on compatible head but not on the real + * head. You can use only the function, and you must not use the + * "head". Don't write "(struct pattern_expr *)any->pat_head->expr". + */ + struct list patterns; /* list of acl_patterns */ + struct eb_root pattern_tree; /* may be used for lookup in large datasets */ + struct eb_root pattern_tree_2; /* may be used for different types */ + int mflags; /* flags relative to the parsing or matching method. */ + __decl_thread(HA_RWLOCK_T lock); /* lock used to protect patterns */ +}; + +/* This is a list of expression. A struct pattern_expr can be used by + * more than one "struct pattern_head". this intermediate struct + * permit more than one list. + */ +struct pattern_expr_list { + struct list list; /* Used for chaining pattern_expr in pattern_head. */ + int do_free; + struct pattern_expr *expr; /* The used expr. */ +}; + + +/* This struct contains a list of pattern expr */ +struct sample; +struct pattern_head { + int (*parse)(const char *text, struct pattern *pattern, int flags, char **err); + int (*parse_smp)(const char *text, struct sample_data *data); + int (*index)(struct pattern_expr *, struct pattern *, char **); + void (*prune)(struct pattern_expr *); + struct pattern *(*match)(struct sample *, struct pattern_expr *, int); + int expect_type; /* type of the expected sample (SMP_T_*) */ + + struct list head; /* This is a list of struct pattern_expr_list. */ +}; + +#endif /* _HAPROXY_PATTERN_T_H */ diff --git a/include/haproxy/pattern.h b/include/haproxy/pattern.h new file mode 100644 index 0000000..49e5ad2 --- /dev/null +++ b/include/haproxy/pattern.h @@ -0,0 +1,273 @@ +/* + * include/haproxy/pattern.h + * This file provides structures and types for pattern matching. + * + * Copyright (C) 2000-2013 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PATTERN_H +#define _HAPROXY_PATTERN_H + +#include <string.h> + +#include <haproxy/api.h> +#include <haproxy/pattern-t.h> +#include <haproxy/sample-t.h> + +/* pattern management function arrays */ +extern const char *const pat_match_names[PAT_MATCH_NUM]; +extern int const pat_match_types[PAT_MATCH_NUM]; + +extern int (*const pat_parse_fcts[PAT_MATCH_NUM])(const char *, struct pattern *, int, char **); +extern int (*const pat_index_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pattern *, char **); +extern void (*const pat_prune_fcts[PAT_MATCH_NUM])(struct pattern_expr *); +extern struct pattern *(*const pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern_expr *, int); + +/* This is the root of the list of all pattern_ref avalaibles. */ +extern struct list pattern_reference; + +int pattern_finalize_config(void); + +/* return the PAT_MATCH_* index for match name "name", or < 0 if not found */ +static inline int pat_find_match_name(const char *name) +{ + int i; + + for (i = 0; i < PAT_MATCH_NUM; i++) + if (strcmp(name, pat_match_names[i]) == 0) + return i; + return -1; +} + +/* This function executes a pattern match on a sample. It applies pattern <expr> + * to sample <smp>. The function returns NULL if the sample don't match. It returns + * non-null if the sample match. If <fill> is true and the sample match, the + * function returns the matched pattern. In many cases, this pattern can be a + * static buffer. + */ +struct pattern *pattern_exec_match(struct pattern_head *head, struct sample *smp, int fill); + +/* + * + * The following function gets "pattern", duplicate it and index it in "expr" + * + */ +int pat_idx_list_val(struct pattern_expr *expr, struct pattern *pat, char **err); +int pat_idx_list_ptr(struct pattern_expr *expr, struct pattern *pat, char **err); +int pat_idx_list_str(struct pattern_expr *expr, struct pattern *pat, char **err); +int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err); +int pat_idx_list_regm(struct pattern_expr *expr, struct pattern *pat, char **err); +int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err); +int pat_idx_tree_str(struct pattern_expr *expr, struct pattern *pat, char **err); +int pat_idx_tree_pfx(struct pattern_expr *expr, struct pattern *pat, char **err); + +/* + * + * The following function deletes all patterns related to reference pattern + * element <elt> in pattern reference <ref>. + * + */ +void pat_delete_gen(struct pat_ref *ref, struct pat_ref_elt *elt); + +/* + * + * The following functions clean all entries of a pattern expression and + * reset the tree and list root. + * + */ +void pat_prune_gen(struct pattern_expr *expr); + +/* + * + * The following functions are general purpose pattern matching functions. + * + */ + + +/* ignore the current line */ +int pat_parse_nothing(const char *text, struct pattern *pattern, int mflags, char **err); + +/* Parse an integer. It is put both in min and max. */ +int pat_parse_int(const char *text, struct pattern *pattern, int mflags, char **err); + +/* Parse an version. It is put both in min and max. */ +int pat_parse_dotted_ver(const char *text, struct pattern *pattern, int mflags, char **err); + +/* Parse a range of integers delimited by either ':' or '-'. If only one + * integer is read, it is set as both min and max. + */ +int pat_parse_range(const char *text, struct pattern *pattern, int mflags, char **err); + +/* Parse a string. It is allocated and duplicated. */ +int pat_parse_str(const char *text, struct pattern *pattern, int mflags, char **err); + +/* Parse a hexa binary definition. It is allocated and duplicated. */ +int pat_parse_bin(const char *text, struct pattern *pattern, int mflags, char **err); + +/* Parse a regex. It is allocated. */ +int pat_parse_reg(const char *text, struct pattern *pattern, int mflags, char **err); + +/* Parse an IP address and an optional mask in the form addr[/mask]. + * The addr may either be an IPv4 or IPv6 address, or a hostname that resolves + * to a valid IPv4 address. The mask can be provided as a number of bits, or + * even as a dotted mask (but the latter only works for IPv4 addresses). + * Returns 1 if OK, otherwise 0. + */ +int pat_parse_ip(const char *text, struct pattern *pattern, int mflags, char **err); + +/* NB: For two strings to be identical, it is required that their lengths match */ +struct pattern *pat_match_str(struct sample *smp, struct pattern_expr *expr, int fill); + +/* NB: For two binary buffers to be identical, it is required that their lengths match */ +struct pattern *pat_match_bin(struct sample *smp, struct pattern_expr *expr, int fill); + +/* Checks that the length of the pattern in <test> is included between min and max */ +struct pattern *pat_match_len(struct sample *smp, struct pattern_expr *expr, int fill); + +/* Checks that the integer in <test> is included between min and max */ +struct pattern *pat_match_int(struct sample *smp, struct pattern_expr *expr, int fill); + +/* always return false */ +struct pattern *pat_match_nothing(struct sample *smp, struct pattern_expr *expr, int fill); + +/* Checks that the pattern matches the end of the tested string. */ +struct pattern *pat_match_end(struct sample *smp, struct pattern_expr *expr, int fill); + +/* Checks that the pattern matches the beginning of the tested string. */ +struct pattern *pat_match_beg(struct sample *smp, struct pattern_expr *expr, int fill); + +/* Checks that the pattern is included inside the tested string. */ +struct pattern *pat_match_sub(struct sample *smp, struct pattern_expr *expr, int fill); + +/* Checks that the pattern is included inside the tested string, but enclosed + * between slashes or at the beginning or end of the string. Slashes at the + * beginning or end of the pattern are ignored. + */ +struct pattern *pat_match_dir(struct sample *smp, struct pattern_expr *expr, int fill); + +/* Checks that the pattern is included inside the tested string, but enclosed + * between dots or at the beginning or end of the string. Dots at the beginning + * or end of the pattern are ignored. + */ +struct pattern *pat_match_dom(struct sample *smp, struct pattern_expr *expr, int fill); + +/* Check that the input IP address (IPv4 or IPv6) in <smp> matches the IP/mask + * in pattern + */ +struct pattern *pat_match_ip(struct sample *smp, struct pattern_expr *expr, int fill); + +/* Executes a regex. It temporarily changes the data to add a trailing zero, + * and restores the previous character when leaving. + */ +struct pattern *pat_match_reg(struct sample *smp, struct pattern_expr *expr, int fill); +struct pattern *pat_match_regm(struct sample *smp, struct pattern_expr *expr, int fill); + +/* + * pattern_ref manipulation. + */ +struct pat_ref *pat_ref_lookup(const char *reference); +struct pat_ref *pat_ref_lookupid(int unique_id); +struct pat_ref *pat_ref_new(const char *reference, const char *display, unsigned int flags); +struct pat_ref *pat_ref_newid(int unique_id, const char *display, unsigned int flags); +struct pat_ref_elt *pat_ref_find_elt(struct pat_ref *ref, const char *key); +struct pat_ref_elt *pat_ref_append(struct pat_ref *ref, const char *pattern, const char *sample, int line); +struct pat_ref_elt *pat_ref_load(struct pat_ref *ref, unsigned int gen, const char *pattern, const char *sample, int line, char **err); +int pat_ref_push(struct pat_ref_elt *elt, struct pattern_expr *expr, int patflags, char **err); +int pat_ref_add(struct pat_ref *ref, const char *pattern, const char *sample, char **err); +int pat_ref_set(struct pat_ref *ref, const char *pattern, const char *sample, char **err, struct pat_ref_elt *elt); +int pat_ref_set_by_id(struct pat_ref *ref, struct pat_ref_elt *refelt, const char *value, char **err); +int pat_ref_delete(struct pat_ref *ref, const char *key); +void pat_ref_delete_by_ptr(struct pat_ref *ref, struct pat_ref_elt *elt); +int pat_ref_delete_by_id(struct pat_ref *ref, struct pat_ref_elt *refelt); +int pat_ref_prune(struct pat_ref *ref); +int pat_ref_commit_elt(struct pat_ref *ref, struct pat_ref_elt *elt, char **err); +int pat_ref_purge_range(struct pat_ref *ref, uint from, uint to, int budget); + +/* Create a new generation number for next pattern updates and returns it. This + * must be used to atomically insert new patterns that will atomically replace + * all current ones on commit. Generation numbers start at zero and are only + * incremented and wrap at 2^32. There must not be more than 2^31-1 called + * without a commit. The new reserved number is returned. Locking is not + * necessary. + */ +static inline unsigned int pat_ref_newgen(struct pat_ref *ref) +{ + return HA_ATOMIC_ADD_FETCH(&ref->next_gen, 1); +} + +/* Give up a previously assigned generation number. By doing this the caller + * certifies that no element was inserted using this number, and that this + * number might safely be reused if none was assigned since. This is convenient + * to avoid wasting numbers in case an operation couldn't be started right + * after a call to pat_ref_newgen(), but it is absolutely not necessary. The + * main use case is to politely abandon an update attempt upon error just after + * having received a number (e.g. attempting to retrieve entries from the + * network, and failed to establish a connection). This is done atomically so + * no locking is necessary. + */ +static inline void pat_ref_giveup(struct pat_ref *ref, unsigned int gen) +{ + HA_ATOMIC_CAS(&ref->next_gen, &gen, gen - 1); +} + +/* Commit the whole pattern reference by updating the generation number or + * failing in case someone else managed to do it meanwhile. While this could + * be done using a CAS, it must instead be called with the PATREF_LOCK held in + * order to guarantee the consistency of the generation number for all other + * functions that rely on it. It returns zero on success, non-zero on failure + * (technically speaking it returns the difference between the attempted + * generation and the effective one, so that it can be used for reporting). + */ +static inline int pat_ref_commit(struct pat_ref *ref, unsigned int gen) +{ + if ((int)(gen - ref->curr_gen) > 0) + ref->curr_gen = gen; + return gen - ref->curr_gen; +} + +/* This function purges all elements from <ref> that are older than generation + * <oldest>. It will not purge more than <budget> entries at once, in order to + * remain responsive. If budget is negative, no limit is applied. + * The caller must already hold the PATREF_LOCK on <ref>. The function will + * take the PATEXP_LOCK on all expressions of the pattern as needed. It returns + * non-zero on completion, or zero if it had to stop before the end after + * <budget> was depleted. + */ +static inline int pat_ref_purge_older(struct pat_ref *ref, uint oldest, int budget) +{ + return pat_ref_purge_range(ref, oldest + 1, oldest - 1, budget); +} + + +/* + * pattern_head manipulation. + */ +void pattern_init_head(struct pattern_head *head); +void pattern_prune(struct pattern_head *head); +int pattern_read_from_file(struct pattern_head *head, unsigned int refflags, const char *filename, int patflags, int load_smp, char **err, const char *file, int line); + +/* + * pattern_expr manipulation. + */ +void pattern_init_expr(struct pattern_expr *expr); +struct pattern_expr *pattern_lookup_expr(struct pattern_head *head, struct pat_ref *ref); +struct pattern_expr *pattern_new_expr(struct pattern_head *head, struct pat_ref *ref, + int patflags, char **err, int *reuse); +struct sample_data **pattern_find_smp(struct pattern_expr *expr, struct pat_ref_elt *elt); + + +#endif diff --git a/include/haproxy/payload.h b/include/haproxy/payload.h new file mode 100644 index 0000000..f91817a --- /dev/null +++ b/include/haproxy/payload.h @@ -0,0 +1,39 @@ +/* + * include/haproxy/payload.h + * Definitions for payload-based sample fetches and ACLs + * + * Copyright (C) 2000-2013 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PAYLOAD_H +#define _HAPROXY_PAYLOAD_H + +#include <haproxy/api.h> +#include <haproxy/sample-t.h> +#include <haproxy/stream-t.h> + +int fetch_rdp_cookie_name(struct stream *s, struct sample *smp, const char *cname, int clen); +int val_payload_lv(struct arg *arg, char **err_msg); + +#endif /* _HAPROXY_PAYLOAD_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/peers-t.h b/include/haproxy/peers-t.h new file mode 100644 index 0000000..124fac3 --- /dev/null +++ b/include/haproxy/peers-t.h @@ -0,0 +1,160 @@ +/* + * include/haproxy/peers-t.h + * This file defines everything related to peers. + * + * Copyright 2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PEERS_T_H +#define _HAPROXY_PEERS_T_H + +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include <import/ebtree-t.h> + +#include <haproxy/api-t.h> +#include <haproxy/dict-t.h> +#include <haproxy/stick_table-t.h> +#include <haproxy/thread-t.h> + + +struct shared_table { + struct stktable *table; /* stick table to sync */ + int local_id; + int remote_id; + int flags; + uint64_t remote_data; + unsigned int remote_data_nbelem[STKTABLE_DATA_TYPES]; + unsigned int last_acked; + unsigned int last_pushed; + unsigned int last_get; + unsigned int teaching_origin; + unsigned int update; + struct shared_table *next; /* next shared table in list */ +}; + +struct peer { + int local; /* proxy state */ + __decl_thread(HA_SPINLOCK_T lock); /* lock used to handle this peer section */ + char *id; + struct { + const char *file; /* file where the section appears */ + int line; /* line where the section appears */ + } conf; /* config information */ + time_t last_change; + struct sockaddr_storage addr; /* peer address */ + struct protocol *proto; /* peer address protocol */ + struct xprt_ops *xprt; /* peer socket operations at transport layer */ + void *sock_init_arg; /* socket operations's opaque init argument if needed */ + unsigned int flags; /* peer session flags */ + unsigned int statuscode; /* current/last session status code */ + unsigned int reconnect; /* next connect timer */ + unsigned int heartbeat; /* next heartbeat timer */ + unsigned int confirm; /* confirm message counter */ + unsigned int last_hdshk; /* Date of the last handshake. */ + uint32_t rx_hbt; /* received heartbeats counter */ + uint32_t tx_hbt; /* transmitted heartbeats counter */ + uint32_t no_hbt; /* no received heartbeat counter */ + uint32_t new_conn; /* new connection after reconnection timeout expiration counter */ + uint32_t proto_err; /* protocol errors counter */ + uint32_t coll; /* connection collisions counter */ + struct appctx *appctx; /* the appctx running it */ + struct shared_table *remote_table; + struct shared_table *last_local_table; /* Last table that emit update messages during a teach process */ + struct shared_table *stop_local_table; /* last evaluated table, used as restart point for the next teach process */ + struct shared_table *tables; + struct server *srv; + struct dcache *dcache; /* dictionary cache */ + struct peers *peers; /* associated peer section */ + struct peer *next; /* next peer in the list */ +}; + + +struct peers { + char *id; /* peer section name */ + struct task *sync_task; /* main sync task */ + struct sig_handler *sighandler; /* signal handler */ + struct peer *remote; /* remote peers list */ + struct peer *local; /* local peer list */ + struct proxy *peers_fe; /* peer frontend */ + struct { + const char *file; /* file where the section appears */ + int line; /* line where the section appears */ + } conf; /* config information */ + time_t last_change; + struct peers *next; /* next peer section */ + unsigned int flags; /* current peers section resync state */ + unsigned int resync_timeout; /* resync timeout timer */ + int count; /* total of peers */ + int nb_shards; /* Number of peer shards */ + int disabled; /* peers proxy disabled if >0 */ + int applet_count[MAX_THREADS]; /* applet count per thread */ +}; + +/* LRU cache for dictionaies */ +struct dcache_tx { + /* The last recently used key */ + unsigned int lru_key; + /* An array of entries to store pointers to dictionary entries. */ + struct ebpt_node *entries; + /* The previous lookup result. */ + struct ebpt_node *prev_lookup; + /* ebtree to store the previous entries. */ + struct eb_root cached_entries; +}; + +struct dcache_rx { + unsigned int id; + struct dict_entry *de; +}; + +struct dcache_tx_entry { + unsigned int id; + struct ebpt_node entry; +}; + +/* stick-table data type cache */ +struct dcache { + /* Cache used upon transmission */ + struct dcache_tx *tx; + /* Cache used upon receipt */ + struct dcache_rx *rx; + /* Maximum number of entries in this cache */ + size_t max_entries; +}; + +struct peers_keyword { + const char *kw; + int (*parse)( + char **args, + struct peers *curpeer, + const char *file, + int line, + char **err); + int flags; +}; + +struct peers_kw_list { + struct list list; + struct peers_keyword kw[VAR_ARRAY]; +}; + +#endif /* _HAPROXY_PEERS_T_H */ + diff --git a/include/haproxy/peers.h b/include/haproxy/peers.h new file mode 100644 index 0000000..e3c5fd3 --- /dev/null +++ b/include/haproxy/peers.h @@ -0,0 +1,69 @@ +/* + * include/haproxy/peers.h + * This file defines function prototypes for peers management. + * + * Copyright 2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PEERS_H +#define _HAPROXY_PEERS_H + +#include <haproxy/api.h> +#include <haproxy/connection.h> +#include <haproxy/obj_type.h> +#include <haproxy/peers-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/stick_table-t.h> +#include <haproxy/stream-t.h> + + +extern struct peers_kw_list peers_keywords; +extern struct peers *cfg_peers; + +int peers_init_sync(struct peers *peers); +int peers_alloc_dcache(struct peers *peers); +int peers_register_table(struct peers *, struct stktable *table); +void peers_setup_frontend(struct proxy *fe); +void peers_register_keywords(struct peers_kw_list *pkwl); + +#if defined(USE_OPENSSL) +static inline enum obj_type *peer_session_target(struct peer *p, struct stream *s) +{ + if (p->srv->use_ssl) + return &p->srv->obj_type; + else + return &s->be->obj_type; +} + +static inline struct xprt_ops *peer_xprt(struct peer *p) +{ + return p->srv->use_ssl ? xprt_get(XPRT_SSL) : xprt_get(XPRT_RAW); +} +#else +static inline enum obj_type *peer_session_target(struct peer *p, struct stream *s) +{ + return &s->be->obj_type; +} + +static inline struct xprt_ops *peer_xprt(struct peer *p) +{ + return xprt_get(XPRT_RAW); +} +#endif + +#endif /* _HAPROXY_PEERS_H */ + diff --git a/include/haproxy/pipe-t.h b/include/haproxy/pipe-t.h new file mode 100644 index 0000000..1a1fcfd --- /dev/null +++ b/include/haproxy/pipe-t.h @@ -0,0 +1,43 @@ +/* + * include/haproxy/pipe-t.h + * Pipe management - types definitions. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PIPE_T_H +#define _HAPROXY_PIPE_T_H + +/* A pipe is described by its read and write FDs, and the data remaining in it. + * The FDs are valid if there are data pending. The user is not allowed to + * change the FDs. + */ +struct pipe { + int data; /* number of bytes present in the pipe */ + int prod; /* FD the producer must write to ; -1 if none */ + int cons; /* FD the consumer must read from ; -1 if none */ + struct pipe *next; +}; + +#endif /* _HAPROXY_PIPE_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/pipe.h b/include/haproxy/pipe.h new file mode 100644 index 0000000..12bd8ea --- /dev/null +++ b/include/haproxy/pipe.h @@ -0,0 +1,54 @@ +/* + * include/haproxy/pipe.h + * Pipe management - exported functions + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PIPE_H +#define _HAPROXY_PIPE_H + +#include <haproxy/api.h> +#include <haproxy/pipe-t.h> + +extern int pipes_used; /* # of pipes in use (2 fds each) */ +extern int pipes_free; /* # of pipes unused (2 fds each) */ + +/* return a pre-allocated empty pipe. Try to allocate one if there isn't any + * left. NULL is returned if a pipe could not be allocated. + */ +struct pipe *get_pipe(); + +/* destroy a pipe, possibly because an error was encountered on it. Its FDs + * will be closed and it will not be reinjected into the live pool. + */ +void kill_pipe(struct pipe *p); + +/* put back a unused pipe into the live pool. If it still has data in it, it is + * closed and not reinjected into the live pool. The caller is not allowed to + * use it once released. + */ +void put_pipe(struct pipe *p); + +#endif /* _HAPROXY_PIPE_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/pool-os.h b/include/haproxy/pool-os.h new file mode 100644 index 0000000..cf29c58 --- /dev/null +++ b/include/haproxy/pool-os.h @@ -0,0 +1,109 @@ +/* + * include/haproxy/pool-os.h + * OS-level interface for memory management + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_POOL_OS_H +#define _HAPROXY_POOL_OS_H + +#include <sys/mman.h> +#include <stdlib.h> +#include <haproxy/api.h> + + +/************* normal allocator *************/ + +/* allocates an area of size <size> and returns it. The semantics are similar + * to those of malloc(). + */ +static forceinline void *pool_alloc_area(size_t size) +{ + return malloc(size); +} + +/* frees an area <area> of size <size> allocated by pool_alloc_area(). The + * semantics are identical to free() except that the size is specified and + * may be ignored. + */ +static forceinline void pool_free_area(void *area, size_t __maybe_unused size) +{ + will_free(area, size); + free(area); +} + +/************* use-after-free allocator *************/ + +/* allocates an area of size <size> and returns it. The semantics are similar + * to those of malloc(). However the allocation is rounded up to 4kB so that a + * full page is allocated. This ensures the object can be freed alone so that + * future dereferences are easily detected. The returned object is always + * 16-bytes aligned to avoid issues with unaligned structure objects. In case + * some padding is added, the area's start address is copied at the end of the + * padding to help detect underflows. + */ +static inline void *pool_alloc_area_uaf(size_t size) +{ + size_t pad = (4096 - size) & 0xFF0; + void *ret; + + ret = mmap(NULL, (size + 4095) & -4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (ret != MAP_FAILED) { + /* let's dereference the page before returning so that the real + * allocation in the system is performed without holding the lock. + */ + *(int *)ret = 0; + if (pad >= sizeof(void *)) + *(void **)(ret + pad - sizeof(void *)) = ret + pad; + ret += pad; + } else { + ret = NULL; + } + return ret; +} + +/* frees an area <area> of size <size> allocated by pool_alloc_area_uaf(). The + * semantics are identical to free() except that the size must absolutely match + * the one passed to pool_alloc_area_uaf(). In case some padding is added, the + * area's start address is compared to the one at the end of the padding, and + * a segfault is triggered if they don't match, indicating an underflow. + */ +static inline void pool_free_area_uaf(void *area, size_t size) +{ + size_t pad = (4096 - size) & 0xFF0; + + /* This object will be released for real in order to detect a use after + * free. We also force a write to the area to ensure we crash on double + * free or free of a const area. + */ + *(uint32_t *)area = 0xDEADADD4; + + if (pad >= sizeof(void *) && *(void **)(area - sizeof(void *)) != area) + ABORT_NOW(); + + munmap(area - pad, (size + 4095) & -4096); +} + +#endif /* _HAPROXY_POOL_OS_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/pool-t.h b/include/haproxy/pool-t.h new file mode 100644 index 0000000..157e2ca --- /dev/null +++ b/include/haproxy/pool-t.h @@ -0,0 +1,149 @@ +/* + * include/haproxy/pool-t.h + * Memory pools configuration and type definitions. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_POOL_T_H +#define _HAPROXY_POOL_T_H + +#include <haproxy/api-t.h> +#include <haproxy/list-t.h> + +#define MEM_F_SHARED 0x1 +#define MEM_F_EXACT 0x2 + +/* A special pointer for the pool's free_list that indicates someone is + * currently manipulating it. Serves as a short-lived lock. + */ +#define POOL_BUSY ((void *)1) + +#define POOL_AVG_SAMPLES 1024 + +/* possible flags for __pool_alloc() */ +#define POOL_F_NO_POISON 0x00000001 // do not poison the area +#define POOL_F_MUST_ZERO 0x00000002 // zero the returned area +#define POOL_F_NO_FAIL 0x00000004 // do not randomly fail + +/* pool debugging flags */ +#define POOL_DBG_FAIL_ALLOC 0x00000001 // randomly fail memory allocations +#define POOL_DBG_DONT_MERGE 0x00000002 // do not merge same-size pools +#define POOL_DBG_COLD_FIRST 0x00000004 // pick cold objects first +#define POOL_DBG_INTEGRITY 0x00000008 // perform integrity checks on cache +#define POOL_DBG_NO_GLOBAL 0x00000010 // disable global pools +#define POOL_DBG_NO_CACHE 0x00000020 // disable thread-local pool caches +#define POOL_DBG_CALLER 0x00000040 // trace last caller's location +#define POOL_DBG_TAG 0x00000080 // place a tag at the end of the area +#define POOL_DBG_POISON 0x00000100 // poison memory area on pool_alloc() +#define POOL_DBG_UAF 0x00000200 // enable use-after-free protection + + +/* This is the head of a thread-local cache */ +struct pool_cache_head { + struct list list; /* head of objects in this pool */ + unsigned int count; /* number of objects in this pool */ + unsigned int tid; /* thread id, for debugging only */ + struct pool_head *pool; /* assigned pool, for debugging only */ + ulong fill_pattern; /* pattern used to fill the area on free */ +} THREAD_ALIGNED(64); + +/* This represents one item stored in the thread-local cache. <by_pool> links + * the object to the list of objects in the pool, and <by_lru> links the object + * to the local thread's list of hottest objects. This way it's possible to + * allocate a fresh object from the cache, or to release cold objects from any + * pool (no bookkeeping is needed since shared pools do not know how many + * objects they store). + */ +struct pool_cache_item { + struct list by_pool; /* link to objects in this pool */ + struct list by_lru; /* link to objects by LRU order */ +}; + +/* This structure is used to represent an element in the pool's shared + * free_list. An item may carry a series of other items allocated or released + * as a same cluster. The storage then looks like this: + * +------+ +------+ +------+ + * -->| next |-->| next |-->| NULL | + * +------+ +------+ +------+ + * | NULL | | down | | down | + * +------+ +--|---+ +--|---+ + * V V + * +------+ +------+ + * | NULL | | NULL | + * +------+ +------+ + * | down | | NULL | + * +--|---+ +------+ + * V + * +------+ + * | NULL | + * +------+ + * | NULL | + * +------+ + */ +struct pool_item { + struct pool_item *next; + struct pool_item *down; // link to other items of the same cluster +}; + +/* This describes a complete pool, with its status, usage statistics and the + * thread-local caches if any. Even if pools are disabled, these descriptors + * are valid and are used at least to get names and sizes. For small builds + * using neither threads nor pools, this structure might be reduced, and + * alignment could be removed. + */ +struct pool_head { + /* read-mostly part, purely configuration */ + unsigned int limit; /* hard limit on the number of chunks */ + unsigned int minavail; /* how many chunks are expected to be used */ + unsigned int size; /* chunk size */ + unsigned int flags; /* MEM_F_* */ + unsigned int users; /* number of pools sharing this zone */ + unsigned int alloc_sz; /* allocated size (includes hidden fields) */ + struct list list; /* list of all known pools */ + void *base_addr; /* allocation address, for free() */ + char name[12]; /* name of the pool */ + + /* heavily read-write part */ + THREAD_ALIGN(64); + + /* these entries depend on the pointer value, they're used to reduce + * the contention on fast-changing values. The alignment here is + * important since the purpose is to lower the thread contention. + * The free_list and used/allocated are not related, the array is + * just meant to shard elements and there are no per-free_list stats. + */ + struct { + THREAD_ALIGN(64); + struct pool_item *free_list; /* list of free shared objects */ + unsigned int allocated; /* how many chunks have been allocated */ + unsigned int used; /* how many chunks are currently in use */ + unsigned int needed_avg;/* floating indicator between used and allocated */ + unsigned int failed; /* failed allocations (indexed by hash of TID) */ + } buckets[CONFIG_HAP_POOL_BUCKETS]; + + struct pool_cache_head cache[MAX_THREADS] THREAD_ALIGNED(64); /* pool caches */ +} __attribute__((aligned(64))); + +#endif /* _HAPROXY_POOL_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/pool.h b/include/haproxy/pool.h new file mode 100644 index 0000000..bf7cb8d --- /dev/null +++ b/include/haproxy/pool.h @@ -0,0 +1,368 @@ +/* + * include/haproxy/pool.h + * Memory management definitions.. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_POOL_H +#define _HAPROXY_POOL_H + +#include <string.h> + +#include <haproxy/api.h> +#include <haproxy/freq_ctr.h> +#include <haproxy/list.h> +#include <haproxy/pool-t.h> +#include <haproxy/thread.h> + +/* This registers a call to create_pool_callback(ptr, name, size) */ +#define REGISTER_POOL(ptr, name, size) \ + INITCALL3(STG_POOL, create_pool_callback, (ptr), (name), (size)) + +/* This macro declares a pool head <ptr> and registers its creation */ +#define DECLARE_POOL(ptr, name, size) \ + struct pool_head *(ptr) __read_mostly = NULL; \ + REGISTER_POOL(&ptr, name, size) + +/* This macro declares a static pool head <ptr> and registers its creation */ +#define DECLARE_STATIC_POOL(ptr, name, size) \ + static struct pool_head *(ptr) __read_mostly; \ + REGISTER_POOL(&ptr, name, size) + +/* By default, free objects are linked by a pointer stored at the beginning of + * the memory area. When DEBUG_MEMORY_POOLS is set, the allocated area is + * inflated by the size of a pointer so that the link is placed at the end + * of the objects. Hence free objects in pools remain intact. In addition, + * this location is used to keep a pointer to the pool the object was + * allocated from, and verify it's freed into the appropriate one. + */ +# define POOL_EXTRA_MARK (sizeof(void *)) +# define POOL_DEBUG_SET_MARK(pool, item) \ + do { \ + typeof(pool) __p = (pool); \ + typeof(item) __i = (item); \ + if (likely(!(pool_debugging & POOL_DBG_TAG))) \ + break; \ + *(typeof(pool)*)(((char *)__i) + __p->size) = __p; \ + } while (0) + +# define POOL_DEBUG_RESET_MARK(pool, item) \ + do { \ + typeof(pool) __p = (pool); \ + typeof(item) __i = (item); \ + if (likely(!(pool_debugging & POOL_DBG_TAG))) \ + break; \ + *(typeof(pool)*)(((char *)__i) + __p->size) = __builtin_return_address(0); \ + } while (0) + +# define POOL_DEBUG_CHECK_MARK(pool, item, caller) \ + do { \ + typeof(pool) __p = (pool); \ + typeof(item) __i = (item); \ + if (likely(!(pool_debugging & POOL_DBG_TAG))) \ + break; \ + if (*(typeof(pool)*)(((char *)__i) + __p->size) != __p) { \ + pool_inspect_item("tag mismatch on free()", pool, item, caller); \ + ABORT_NOW(); \ + } \ + } while (0) + +/* It's possible to trace callers of pool_free() by placing their pointer + * after the end of the area and the optional mark above, which means the + * end of the allocated array. + */ +# define POOL_EXTRA_CALLER (sizeof(void *)) +# define POOL_DEBUG_TRACE_CALLER(pool, item, caller) \ + do { \ + typeof(pool) __p = (pool); \ + typeof(item) __i = (item); \ + typeof(caller) __c = (caller); \ + if (likely(!(pool_debugging & POOL_DBG_CALLER))) \ + break; \ + *(typeof(caller)*)(((char *)__i) + __p->alloc_sz - sizeof(void*)) = __c; \ + } while (0) + +/* poison each newly allocated area with this byte if >= 0 */ +extern int mem_poison_byte; + +/* trim() in progress */ +extern int pool_trim_in_progress; + +/* set of POOL_DBG_* flags */ +extern uint pool_debugging; + +int malloc_trim(size_t pad); +void trim_all_pools(void); + +void *pool_get_from_os_noinc(struct pool_head *pool); +void pool_put_to_os_nodec(struct pool_head *pool, void *ptr); +void *pool_alloc_nocache(struct pool_head *pool, const void *caller); +void pool_free_nocache(struct pool_head *pool, void *ptr); +void dump_pools(void); +int pool_parse_debugging(const char *str, char **err); +int pool_total_failures(void); +unsigned long long pool_total_allocated(void); +unsigned long long pool_total_used(void); +void pool_flush(struct pool_head *pool); +void pool_gc(struct pool_head *pool_ctx); +struct pool_head *create_pool(char *name, unsigned int size, unsigned int flags); +void create_pool_callback(struct pool_head **ptr, char *name, unsigned int size); +void *pool_destroy(struct pool_head *pool); +void pool_destroy_all(void); +void *__pool_alloc(struct pool_head *pool, unsigned int flags); +void __pool_free(struct pool_head *pool, void *ptr); +void pool_inspect_item(const char *msg, struct pool_head *pool, const void *item, const void *caller); + + +/****************** Thread-local cache management ******************/ + +extern THREAD_LOCAL size_t pool_cache_bytes; /* total cache size */ +extern THREAD_LOCAL size_t pool_cache_count; /* #cache objects */ + +void pool_evict_from_local_cache(struct pool_head *pool, int full); +void pool_evict_from_local_caches(void); +void pool_put_to_cache(struct pool_head *pool, void *ptr, const void *caller); +void pool_fill_pattern(struct pool_cache_head *pch, struct pool_cache_item *item, uint size); +void pool_check_pattern(struct pool_cache_head *pch, struct pool_head *pool, struct pool_cache_item *item, const void *caller); +void pool_refill_local_from_shared(struct pool_head *pool, struct pool_cache_head *pch); +void pool_put_to_shared_cache(struct pool_head *pool, struct pool_item *item); + +/* returns the total number of allocated entries for a pool across all buckets */ +static inline uint pool_allocated(const struct pool_head *pool) +{ + int bucket; + uint ret; + + for (bucket = ret = 0; bucket < CONFIG_HAP_POOL_BUCKETS; bucket++) + ret += HA_ATOMIC_LOAD(&pool->buckets[bucket].allocated); + return ret; +} + +/* returns the total number of used entries for a pool across all buckets */ +static inline uint pool_used(const struct pool_head *pool) +{ + int bucket; + uint ret; + + for (bucket = ret = 0; bucket < CONFIG_HAP_POOL_BUCKETS; bucket++) + ret += HA_ATOMIC_LOAD(&pool->buckets[bucket].used); + return ret; +} + +/* returns the raw total number needed entries across all buckets. It must + * be passed to swrate_avg() to get something usable. + */ +static inline uint pool_needed_avg(const struct pool_head *pool) +{ + int bucket; + uint ret; + + for (bucket = ret = 0; bucket < CONFIG_HAP_POOL_BUCKETS; bucket++) + ret += HA_ATOMIC_LOAD(&pool->buckets[bucket].needed_avg); + return ret; +} + +/* returns the total number of failed allocations for a pool across all buckets */ +static inline uint pool_failed(const struct pool_head *pool) +{ + int bucket; + uint ret; + + for (bucket = ret = 0; bucket < CONFIG_HAP_POOL_BUCKETS; bucket++) + ret += HA_ATOMIC_LOAD(&pool->buckets[bucket].failed); + return ret; +} + +/* Returns the max number of entries that may be brought back to the pool + * before it's considered as full. Note that it is only usable for releasing + * objects, hence the function assumes that no more than ->used entries will + * be released in the worst case, and that this value is always lower than or + * equal to ->allocated. It's important to understand that under thread + * contention these values may not always be accurate but the principle is that + * any deviation remains contained. When global pools are disabled, this + * function always returns zero so that the caller knows it must free the + * object via other ways. + */ +static inline uint pool_releasable(const struct pool_head *pool) +{ + uint alloc, used; + uint needed_raw; + + if (unlikely(pool_debugging & (POOL_DBG_NO_CACHE|POOL_DBG_NO_GLOBAL))) + return 0; + + alloc = pool_allocated(pool); + used = pool_used(pool); + if (used > alloc) + alloc = used; + + needed_raw = pool_needed_avg(pool); + if (alloc < swrate_avg(needed_raw + needed_raw / 4, POOL_AVG_SAMPLES)) + return used; // less than needed is allocated, can release everything + + if ((uint)(alloc - used) < pool->minavail) + return pool->minavail - (alloc - used); // less than minimum available + + /* there are enough objects in this pool */ + return 0; +} + +/* These are generic cache-aware wrappers that allocate/free from/to the local + * cache first, then from the second level if it exists. + */ + +/* Tries to retrieve an object from the local pool cache corresponding to pool + * <pool>. If none is available, tries to allocate from the shared cache if any + * and returns NULL if nothing is available. Must not be used when pools are + * disabled. + */ +static inline void *pool_get_from_cache(struct pool_head *pool, const void *caller) +{ + struct pool_cache_item *item; + struct pool_cache_head *ph; + + BUG_ON(pool_debugging & POOL_DBG_NO_CACHE); + + ph = &pool->cache[tid]; + if (unlikely(LIST_ISEMPTY(&ph->list))) { + if (!(pool_debugging & POOL_DBG_NO_GLOBAL)) + pool_refill_local_from_shared(pool, ph); + if (LIST_ISEMPTY(&ph->list)) + return NULL; + } + + /* allocate hottest objects first */ + item = LIST_NEXT(&ph->list, typeof(item), by_pool); + + if (unlikely(pool_debugging & (POOL_DBG_COLD_FIRST|POOL_DBG_INTEGRITY))) { + /* allocate oldest objects first so as to keep them as long as possible + * in the cache before being reused and maximizing the chance to detect + * an overwrite. + */ + if (pool_debugging & POOL_DBG_COLD_FIRST) + item = LIST_PREV(&ph->list, typeof(item), by_pool); + + if (pool_debugging & POOL_DBG_INTEGRITY) + pool_check_pattern(ph, pool, item, caller); + } + + BUG_ON(&item->by_pool == &ph->list); + LIST_DELETE(&item->by_pool); + LIST_DELETE(&item->by_lru); + + /* keep track of where the element was allocated from */ + POOL_DEBUG_SET_MARK(pool, item); + POOL_DEBUG_TRACE_CALLER(pool, item, caller); + + ph->count--; + pool_cache_bytes -= pool->size; + pool_cache_count--; + + return item; +} + + +/****************** Common high-level code ******************/ + +#if !defined(DEBUG_MEM_STATS) + +/* + * Returns a pointer to an object from pool <pool> allocated using + * flags <flag> from the POOL_F_* set. + */ +#define pool_alloc_flag(pool, flag) __pool_alloc((pool), (flag)) + +/* + * Returns a pointer to type <type> taken from the pool <pool_type> or + * dynamically allocated. Memory poisonning is performed if enabled. + */ +#define pool_alloc(pool) __pool_alloc((pool), 0) + +/* + * Returns a pointer to type <type> taken from the pool <pool_type> or + * dynamically allocated. The area is zeroed. + */ +#define pool_zalloc(pool) __pool_alloc((pool), POOL_F_MUST_ZERO) + +/* + * Puts a memory area back to the corresponding pool. Just like with the libc's + * free(), <ptr> may be NULL. + */ +#define pool_free(pool, ptr) \ + do { \ + typeof(ptr) __ptr = (ptr); \ + if (likely((__ptr) != NULL)) \ + __pool_free(pool, __ptr); \ + } while (0) + + +#else /* DEBUG_MEM_STATS is set below */ + +#define pool_free(pool, ptr) ({ \ + struct pool_head *__pool = (pool); \ + typeof(ptr) __ptr = (ptr); \ + static struct mem_stats _ __attribute__((used,__section__("mem_stats"),__aligned__(sizeof(void*)))) = { \ + .caller = { \ + .file = __FILE__, .line = __LINE__, \ + .what = MEM_STATS_TYPE_P_FREE, \ + .func = __func__, \ + }, \ + }; \ + _.extra = __pool; \ + HA_WEAK(__start_mem_stats); \ + HA_WEAK(__stop_mem_stats); \ + if (__ptr) { \ + _HA_ATOMIC_INC(&_.calls); \ + _HA_ATOMIC_ADD(&_.size, __pool->size); \ + __pool_free(__pool, __ptr); \ + } \ +}) + +#define pool_alloc_flag(pool, flag) ({ \ + struct pool_head *__pool = (pool); \ + uint __flag = (flag); \ + size_t __x = __pool->size; \ + static struct mem_stats _ __attribute__((used,__section__("mem_stats"),__aligned__(sizeof(void*)))) = { \ + .caller = { \ + .file = __FILE__, .line = __LINE__, \ + .what = MEM_STATS_TYPE_P_ALLOC, \ + .func = __func__, \ + }, \ + }; \ + _.extra = __pool; \ + HA_WEAK(__start_mem_stats); \ + HA_WEAK(__stop_mem_stats); \ + _HA_ATOMIC_INC(&_.calls); \ + _HA_ATOMIC_ADD(&_.size, __x); \ + __pool_alloc(__pool, __flag); \ +}) + +#define pool_alloc(pool) pool_alloc_flag(pool, 0) + +#define pool_zalloc(pool) pool_alloc_flag(pool, POOL_F_MUST_ZERO) + +#endif /* DEBUG_MEM_STATS */ + +#endif /* _HAPROXY_POOL_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/port_range-t.h b/include/haproxy/port_range-t.h new file mode 100644 index 0000000..eea1132 --- /dev/null +++ b/include/haproxy/port_range-t.h @@ -0,0 +1,40 @@ +/* + * include/haproxy/port_range-t.h + * This file defines the prt_range type + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PORT_RANGE_T_H +#define _HAPROXY_PORT_RANGE_T_H + +#include <netinet/in.h> +#include <haproxy/api-t.h> + +struct port_range { + int size, get, put_h, put_t; /* range size, and get/put positions */ + uint16_t ports[VAR_ARRAY]; /* array of <size> ports, in host byte order */ +}; + +#endif /* _HAPROXY_PORT_RANGE_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/port_range.h b/include/haproxy/port_range.h new file mode 100644 index 0000000..9e4379a --- /dev/null +++ b/include/haproxy/port_range.h @@ -0,0 +1,105 @@ +/* + * include/haproxy/port_range.h + * This file defines everything needed to manage port ranges + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PORT_RANGE_H +#define _HAPROXY_PORT_RANGE_H + +#include <stdlib.h> +#include <haproxy/api.h> +#include <haproxy/port_range-t.h> + +#define GET_NEXT_OFF(range, off) ((off) == (range)->size - 1 ? 0 : (off) + 1) + +/* return an available port from range <range>, or zero if none is left */ +static inline int port_range_alloc_port(struct port_range *range) +{ + int ret; + int get; + int put; + + get = _HA_ATOMIC_LOAD(&range->get); + do { + /* barrier to make sure get is loaded before put */ + __ha_barrier_atomic_load(); + put = _HA_ATOMIC_LOAD(&range->put_t); + if (unlikely(put == get)) + return 0; + ret = range->ports[get]; + } while (!(_HA_ATOMIC_CAS(&range->get, &get, GET_NEXT_OFF(range, get)))); + return ret; +} + +/* release port <port> into port range <range>. Does nothing if <port> is zero + * nor if <range> is null. The caller is responsible for marking the port + * unused by either setting the port to zero or the range to NULL. + */ +static inline void port_range_release_port(struct port_range *range, int port) +{ + int put; + + if (!port || !range) + return; + + put = range->put_h; + /* put_h is reserved for producers, so that they can each get a + * free slot, put_t is what is used by consumers to know if there's + * elements available or not + */ + /* First reserve or slot, we know the ring buffer can't be full, + * as we will only ever release port we allocated before + */ + while (!(_HA_ATOMIC_CAS(&range->put_h, &put, GET_NEXT_OFF(range, put)))); + _HA_ATOMIC_STORE(&range->ports[put], port); + /* Barrier to make sure the new port is visible before we change put_t */ + __ha_barrier_atomic_store(); + /* Wait until all the threads that got a slot before us are done */ + while ((volatile int)range->put_t != put) + __ha_compiler_barrier(); + /* Let the world know we're done, and any potential consumer they + * can use that port. + */ + _HA_ATOMIC_STORE(&range->put_t, GET_NEXT_OFF(range, put)); +} + +/* return a new initialized port range of N ports. The ports are not + * filled in, it's up to the caller to do it. + */ +static inline struct port_range *port_range_alloc_range(int n) +{ + struct port_range *ret; + ret = calloc(1, sizeof(struct port_range) + + (n + 1) * sizeof(((struct port_range *)0)->ports[0])); + if (!ret) + return NULL; + ret->size = n + 1; + /* Start at the first free element */ + ret->put_h = ret->put_t = n; + return ret; +} + +#endif /* _HAPROXY_PORT_RANGE_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/proto_quic.h b/include/haproxy/proto_quic.h new file mode 100644 index 0000000..a0e2b98 --- /dev/null +++ b/include/haproxy/proto_quic.h @@ -0,0 +1,35 @@ +/* + * AF_INET/AF_INET6 QUIC protocol layer definitions. + * + * Copyright 2020 Frederic Lecaille <flecaille@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PROTO_QUIC_H +#define _HAPROXY_PROTO_QUIC_H + +extern struct protocol proto_quic4; +extern struct protocol proto_quic6; + +struct quic_cid_tree { + struct eb_root root; + __decl_thread(HA_RWLOCK_T lock); +}; + +extern struct quic_dghdlr *quic_dghdlrs; +extern struct quic_cid_tree *quic_cid_trees; + +#endif /* _HAPROXY_PROTO_QUIC_H */ diff --git a/include/haproxy/proto_rhttp-t.h b/include/haproxy/proto_rhttp-t.h new file mode 100644 index 0000000..28e2ff9 --- /dev/null +++ b/include/haproxy/proto_rhttp-t.h @@ -0,0 +1,14 @@ +#ifndef _HAPROXY_PROTO_RHTTP_H_T +#define _HAPROXY_PROTO_RHTTP_H_T + +/* State for reverse preconnect listener state machine. + * Used to limit log reporting only on state changes. + */ +enum li_preconn_state { + LI_PRECONN_ST_STOP, /* pre-connect task inactive */ + LI_PRECONN_ST_INIT, /* pre-connect task bootstrapped */ + LI_PRECONN_ST_ERR, /* last pre-connect attempt failed */ + LI_PRECONN_ST_FULL, /* pre-connect maxconn reached */ +}; + +#endif /* _HAPROXY_PROTO_RHTTP_H_T */ diff --git a/include/haproxy/proto_rhttp.h b/include/haproxy/proto_rhttp.h new file mode 100644 index 0000000..421680f --- /dev/null +++ b/include/haproxy/proto_rhttp.h @@ -0,0 +1,21 @@ +#ifndef _HAPROXY_PROTO_RHTTP_H +#define _HAPROXY_PROTO_RHTTP_H + +#include <haproxy/connection-t.h> +#include <haproxy/listener-t.h> +#include <haproxy/receiver-t.h> + +int rhttp_bind_receiver(struct receiver *rx, char **errmsg); + +int rhttp_bind_listener(struct listener *listener, char *errmsg, int errlen); +void rhttp_enable_listener(struct listener *l); +void rhttp_disable_listener(struct listener *l); +struct connection *rhttp_accept_conn(struct listener *l, int *status); +void rhttp_unbind_receiver(struct listener *l); +int rhttp_set_affinity(struct connection *conn, int new_tid); + +int rhttp_accepting_conn(const struct receiver *rx); + +void rhttp_notify_preconn_err(struct listener *l); + +#endif /* _HAPROXY_PROTO_RHTTP_H */ diff --git a/include/haproxy/proto_sockpair.h b/include/haproxy/proto_sockpair.h new file mode 100644 index 0000000..bb0256e --- /dev/null +++ b/include/haproxy/proto_sockpair.h @@ -0,0 +1,32 @@ +/* + * Socket Pair protocol layer (sockpair) + * + * Copyright HAProxy Technologies - William Lallemand <wlallemand@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PROTO_SOCKPAIR_H +#define _HAPROXY_PROTO_SOCKPAIR_H + +extern struct proto_fam proto_fam_sockpair; +extern struct protocol proto_sockpair; + +int recv_fd_uxst(int sock); +int send_fd_uxst(int fd, int send_fd); +int sockpair_bind_receiver(struct receiver *rx, char **errmsg); + +#endif /* _HAPROXY_PROTO_SOCKPAIR_H */ + diff --git a/include/haproxy/proto_tcp.h b/include/haproxy/proto_tcp.h new file mode 100644 index 0000000..8a3d9fd --- /dev/null +++ b/include/haproxy/proto_tcp.h @@ -0,0 +1,45 @@ +/* + * include/haproxy/proto_tcp.h + * This file contains TCP socket protocol definitions. + * + * Copyright (C) 2000-2013 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PROTO_TCP_H +#define _HAPROXY_PROTO_TCP_H + +#include <haproxy/api.h> +#include <haproxy/arg-t.h> +#include <haproxy/connection-t.h> +#include <haproxy/listener-t.h> +#include <haproxy/sample-t.h> + +extern struct protocol proto_tcpv4; +extern struct protocol proto_tcpv6; + +int tcp_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct sockaddr_storage *remote); +int tcp_connect_server(struct connection *conn, int flags); +int tcp_is_foreign(int fd, sa_family_t family); + +#endif /* _HAPROXY_PROTO_TCP_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/proto_udp.h b/include/haproxy/proto_udp.h new file mode 100644 index 0000000..1c4da77 --- /dev/null +++ b/include/haproxy/proto_udp.h @@ -0,0 +1,41 @@ +/* + * include/haproxy/proto_udp.h + * This file contains UDP socket protocol definitions. + * + * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * Partial merge by Emeric Brun <ebrun@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _PROTO_PROTO_UDP_H +#define _PROTO_PROTO_UDP_H + +extern struct protocol proto_udp4; +extern struct protocol proto_udp6; + +int udp_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct sockaddr_storage *remote); +int udp_suspend_receiver(struct receiver *rx); +int udp_resume_receiver(struct receiver *rx); + +#endif /* _PROTO_PROTO_UDP_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/proto_uxst.h b/include/haproxy/proto_uxst.h new file mode 100644 index 0000000..77caf3d --- /dev/null +++ b/include/haproxy/proto_uxst.h @@ -0,0 +1,34 @@ +/* + * include/haproxy/proto_uxst.h + * This file contains UNIX stream socket protocol definitions. + * + * Copyright (C) 2000-2013 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _PROTO_PROTO_UXST_H +#define _PROTO_PROTO_UXST_H + +extern struct protocol proto_uxst; + +#endif /* _PROTO_PROTO_UXST_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/protobuf-t.h b/include/haproxy/protobuf-t.h new file mode 100644 index 0000000..b1a14e7 --- /dev/null +++ b/include/haproxy/protobuf-t.h @@ -0,0 +1,87 @@ +/* + * include/haproxy/protobuf-t.h + * This file contains structure declarations for protocol buffers. + * + * Copyright 2012 Willy Tarreau <w@1wt.eu> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PROTOBUF_T_H +#define _HAPROXY_PROTOBUF_T_H + +#include <haproxy/api-t.h> + +enum protobuf_wire_type { + PBUF_TYPE_VARINT, + PBUF_TYPE_64BIT, + PBUF_TYPE_LENGTH_DELIMITED, + PBUF_TYPE_START_GROUP, /* Deprecated */ + PBUF_TYPE_STOP_GROUP, /* Deprecated */ + PBUF_TYPE_32BIT, +}; + +enum protobuf_type { + /* These enums are used to initialize calloc()'ed struct fields. + * Start them from 1 to avoid collisions with the default 0 value + * of such struct fields. + */ + PBUF_T_BINARY = 1, + + /* Do not reorder the following ones: + * PBUF_T_VARINT_*, PBUF_T_32BIT_* and PBUF_T_64BIT_* + */ + PBUF_T_VARINT_INT32, + PBUF_T_VARINT_UINT32, + PBUF_T_VARINT_INT64, + PBUF_T_VARINT_UINT64, + PBUF_T_VARINT_BOOL, + PBUF_T_VARINT_ENUM, + + /* These two following varints are first encoded with zigzag. */ + PBUF_T_VARINT_SINT32, + PBUF_T_VARINT_SINT64, + + /* Fixed size types from here. */ + PBUF_T_32BIT_FIXED32, + PBUF_T_32BIT_SFIXED32, + PBUF_T_32BIT_FLOAT, + + PBUF_T_64BIT_FIXED64, + PBUF_T_64BIT_SFIXED64, + PBUF_T_64BIT_DOUBLE, +}; + + +struct pbuf_fid { + unsigned int *ids; + size_t sz; +}; + +struct sample; +struct protobuf_parser_def { + int (*skip)(unsigned char **pos, size_t *left, size_t vlen); + int (*smp_store)(struct sample *, int type, + unsigned char *pos, size_t left, size_t vlen); +}; + +#endif /* _HAPROXY_PROTOBUF_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/protobuf.h b/include/haproxy/protobuf.h new file mode 100644 index 0000000..009bd13 --- /dev/null +++ b/include/haproxy/protobuf.h @@ -0,0 +1,577 @@ +/* + * include/haproxy/protobuf.h + * This file contains functions and macros declarations for protocol buffers decoding. + * + * Copyright 2012 Willy Tarreau <w@1wt.eu> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PROTOBUF_H +#define _HAPROXY_PROTOBUF_H + +#include <haproxy/api-t.h> +#include <haproxy/arg-t.h> +#include <haproxy/protobuf-t.h> +#include <haproxy/sample-t.h> + +#define PBUF_VARINT_DONT_STOP_BIT 7 +#define PBUF_VARINT_DONT_STOP_BITMASK (1 << PBUF_VARINT_DONT_STOP_BIT) +#define PBUF_VARINT_DATA_BITMASK ~PBUF_VARINT_DONT_STOP_BITMASK + +/* .skip and .smp_store prototypes. */ +int protobuf_skip_varint(unsigned char **pos, size_t *len, size_t vlen); +int protobuf_smp_store_varint(struct sample *smp, int type, + unsigned char *pos, size_t len, size_t vlen); +int protobuf_skip_64bit(unsigned char **pos, size_t *len, size_t vlen); +int protobuf_smp_store_64bit(struct sample *smp, int type, + unsigned char *pos, size_t len, size_t vlen); +int protobuf_skip_vlen(unsigned char **pos, size_t *len, size_t vlen); +int protobuf_smp_store_vlen(struct sample *smp, int type, + unsigned char *pos, size_t len, size_t vlen); +int protobuf_skip_32bit(unsigned char **pos, size_t *len, size_t vlen); +int protobuf_smp_store_32bit(struct sample *smp, int type, + unsigned char *pos, size_t len, size_t vlen); + +struct protobuf_parser_def protobuf_parser_defs [] = { + [PBUF_TYPE_VARINT ] = { + .skip = protobuf_skip_varint, + .smp_store = protobuf_smp_store_varint, + }, + [PBUF_TYPE_64BIT ] = { + .skip = protobuf_skip_64bit, + .smp_store = protobuf_smp_store_64bit, + }, + [PBUF_TYPE_LENGTH_DELIMITED] = { + .skip = protobuf_skip_vlen, + .smp_store = protobuf_smp_store_vlen, + }, + [PBUF_TYPE_START_GROUP ] = { + /* XXX Deprecated XXX */ + }, + [PBUF_TYPE_STOP_GROUP ] = { + /* XXX Deprecated XXX */ + }, + [PBUF_TYPE_32BIT ] = { + .skip = protobuf_skip_32bit, + .smp_store = protobuf_smp_store_32bit, + }, +}; + +/* + * Note that the field values with protocol buffers 32bit and 64bit fixed size as type + * are sent in little-endian byte order to the network. + */ + +/* Convert a little-endian ordered 32bit integer to the byte order of the host. */ +static inline uint32_t pbuf_le32toh(uint32_t v) +{ + uint8_t *p = (uint8_t *)&v; + return (p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24)); +} + +/* Convert a little-endian ordered 64bit integer to the byte order of the host. */ +static inline uint64_t pbuf_le64toh(uint64_t v) +{ + return (uint64_t)(pbuf_le32toh(v >> 32)) << 32 | pbuf_le32toh(v); +} + +/* + * Return a protobuf type enum from <s> string if succeeded, -1 if not. + */ +int protobuf_type(const char *s) +{ + /* varint types. */ + if (strcmp(s, "int32") == 0) + return PBUF_T_VARINT_INT32; + else if (strcmp(s, "uint32") == 0) + return PBUF_T_VARINT_UINT32; + else if (strcmp(s, "sint32") == 0) + return PBUF_T_VARINT_SINT32; + else if (strcmp(s, "int64") == 0) + return PBUF_T_VARINT_INT64; + else if (strcmp(s, "uint64") == 0) + return PBUF_T_VARINT_UINT64; + else if (strcmp(s, "sint64") == 0) + return PBUF_T_VARINT_SINT64; + else if (strcmp(s, "bool") == 0) + return PBUF_T_VARINT_BOOL; + else if (strcmp(s, "enum") == 0) + return PBUF_T_VARINT_ENUM; + + /* 32bit fixed size types. */ + else if (strcmp(s, "fixed32") == 0) + return PBUF_T_32BIT_FIXED32; + else if (strcmp(s, "sfixed32") == 0) + return PBUF_T_32BIT_SFIXED32; + else if (strcmp(s, "float") == 0) + return PBUF_T_32BIT_FLOAT; + + /* 64bit fixed size types. */ + else if (strcmp(s, "fixed64") == 0) + return PBUF_T_64BIT_FIXED64; + else if (strcmp(s, "sfixed64") == 0) + return PBUF_T_64BIT_SFIXED64; + else if (strcmp(s, "double") == 0) + return PBUF_T_64BIT_DOUBLE; + else + return -1; +} + +/* + * Decode a protocol buffers varint located in a buffer at <pos> address with + * <len> as length. The decoded value is stored at <val>. + * Returns 1 if succeeded, 0 if not. + */ +static inline int +protobuf_varint(uint64_t *val, unsigned char *pos, size_t len) +{ + unsigned int shift; + + *val = 0; + shift = 0; + + while (len > 0) { + int stop = !(*pos & PBUF_VARINT_DONT_STOP_BITMASK); + + *val |= ((uint64_t)(*pos & PBUF_VARINT_DATA_BITMASK)) << shift; + + ++pos; + --len; + + if (stop) + break; + else if (!len) + return 0; + + shift += 7; + /* The maximum length in bytes of a 64-bit encoded value is 10. */ + if (shift > 63) + return 0; + } + + return 1; +} + +/* + * Decode a protocol buffers varint located in a buffer at <pos> offset address with + * <len> as length address. Update <pos> and <len> consequently. Decrease <*len> + * by the number of decoded bytes. The decoded value is stored at <val>. + * Returns 1 if succeeded, 0 if not. + */ +static inline int +protobuf_decode_varint(uint64_t *val, unsigned char **pos, size_t *len) +{ + unsigned int shift; + + *val = 0; + shift = 0; + + while (*len > 0) { + int stop = !(**pos & PBUF_VARINT_DONT_STOP_BITMASK); + + *val |= ((uint64_t)**pos & PBUF_VARINT_DATA_BITMASK) << shift; + + ++*pos; + --*len; + + if (stop) + break; + else if (!*len) + return 0; + + shift += 7; + /* The maximum length in bytes of a 64-bit encoded value is 10. */ + if (shift > 63) + return 0; + } + + return 1; +} + +/* + * Skip a protocol buffer varint found at <pos> as position address with <len> + * as available length address. Update <*pos> to make it point to the next + * available byte. Decrease <*len> by the number of skipped bytes. + * Returns 1 if succeeded, 0 if not. + */ +int +protobuf_skip_varint(unsigned char **pos, size_t *len, size_t vlen) +{ + unsigned int shift; + + shift = 0; + + while (*len > 0) { + int stop = !(**pos & PBUF_VARINT_DONT_STOP_BITMASK); + + ++*pos; + --*len; + + if (stop) + break; + else if (!*len) + return 0; + + shift += 7; + /* The maximum length in bytes of a 64-bit encoded value is 10. */ + if (shift > 63) + return 0; + } + + return 1; +} + +/* + * If succeeded, return the length of a prococol buffers varint found at <pos> as + * position address, with <len> as address of the available bytes at <*pos>. + * Update <*pos> to make it point to the next available byte. Decrease <*len> + * by the number of bytes used to encode this varint. + * Return -1 if failed. + */ +static inline int +protobuf_varint_getlen(unsigned char *pos, size_t len) +{ + unsigned char *spos; + unsigned int shift; + + shift = 0; + spos = pos; + + while (len > 0) { + int stop = !(*pos & PBUF_VARINT_DONT_STOP_BITMASK); + + ++pos; + --len; + + if (stop) + break; + else if (!len) + return -1; + + shift += 7; + /* The maximum length in bytes of a 64-bit encoded value is 10. */ + if (shift > 63) + return -1; + } + + return pos - spos; +} + +/* + * Store a varint field value in a sample from <pos> buffer + * with <len> available bytes after having decoded it if needed + * depending on <type> the expected protocol buffer type of the field. + * Return 1 if succeeded, 0 if not. + */ +int protobuf_smp_store_varint(struct sample *smp, int type, + unsigned char *pos, size_t len, size_t vlen) +{ + switch (type) { + case PBUF_T_BINARY: + { + int varint_len; + + varint_len = protobuf_varint_getlen(pos, len); + if (varint_len == -1) + return 0; + + smp->data.type = SMP_T_BIN; + smp->data.u.str.area = (char *)pos; + smp->data.u.str.data = varint_len; + smp->flags = SMP_F_VOL_TEST; + break; + } + + case PBUF_T_VARINT_INT32 ... PBUF_T_VARINT_ENUM: + { + uint64_t varint; + + if (!protobuf_varint(&varint, pos, len)) + return 0; + + smp->data.u.sint = varint; + smp->data.type = SMP_T_SINT; + break; + } + + case PBUF_T_VARINT_SINT32 ... PBUF_T_VARINT_SINT64: + { + uint64_t varint; + + if (!protobuf_varint(&varint, pos, len)) + return 0; + + /* zigzag decoding. */ + smp->data.u.sint = (varint >> 1) ^ -(varint & 1); + smp->data.type = SMP_T_SINT; + break; + } + + default: + return 0; + + } + + return 1; +} + +/* + * Move forward <*pos> buffer by 8 bytes. Used to skip a 64bit field. + */ +int protobuf_skip_64bit(unsigned char **pos, size_t *len, size_t vlen) +{ + if (*len < sizeof(uint64_t)) + return 0; + + *pos += sizeof(uint64_t); + *len -= sizeof(uint64_t); + + return 1; +} + +/* + * Store a fixed size 64bit field value in a sample from <pos> buffer + * with <len> available bytes after having decoded it depending on <type> + * the expected protocol buffer type of the field. + * Return 1 if succeeded, 0 if not. + */ +int protobuf_smp_store_64bit(struct sample *smp, int type, + unsigned char *pos, size_t len, size_t vlen) +{ + if (len < sizeof(uint64_t)) + return 0; + + switch (type) { + case PBUF_T_BINARY: + smp->data.type = SMP_T_BIN; + smp->data.u.str.area = (char *)pos; + smp->data.u.str.data = sizeof(uint64_t); + smp->flags = SMP_F_VOL_TEST; + break; + + case PBUF_T_64BIT_FIXED64: + case PBUF_T_64BIT_SFIXED64: + smp->data.type = SMP_T_SINT; + smp->data.u.sint = pbuf_le64toh(*(uint64_t *)pos); + smp->flags = SMP_F_VOL_TEST; + break; + + case PBUF_T_64BIT_DOUBLE: + smp->data.type = SMP_T_SINT; + smp->data.u.sint = pbuf_le64toh(*(double *)pos); + smp->flags = SMP_F_VOL_TEST; + break; + + default: + return 0; + } + + return 1; +} + +/* + * Move forward <*pos> buffer by <vlen> bytes. Use to skip a length-delimited + * field. + */ +int protobuf_skip_vlen(unsigned char **pos, size_t *len, size_t vlen) +{ + if (*len < vlen) + return 0; + + *pos += vlen; + *len -= vlen; + + return 1; +} + +/* + * Store a <vlen>-bytes length-delimited field value in a sample from <pos> + * buffer with <len> available bytes. + * Return 1 if succeeded, 0 if not. + */ +int protobuf_smp_store_vlen(struct sample *smp, int type, + unsigned char *pos, size_t len, size_t vlen) +{ + if (len < vlen) + return 0; + + if (type != PBUF_T_BINARY) + return 0; + + smp->data.type = SMP_T_BIN; + smp->data.u.str.area = (char *)pos; + smp->data.u.str.data = vlen; + smp->flags = SMP_F_VOL_TEST; + + return 1; +} + +/* + * Move forward <*pos> buffer by 4 bytes. Used to skip a 32bit field. + */ +int protobuf_skip_32bit(unsigned char **pos, size_t *len, size_t vlen) +{ + if (*len < sizeof(uint32_t)) + return 0; + + *pos += sizeof(uint32_t); + *len -= sizeof(uint32_t); + + return 1; +} + +/* + * Store a fixed size 32bit field value in a sample from <pos> buffer + * with <len> available bytes after having decoded it depending on <type> + * the expected protocol buffer type of the field. + * Return 1 if succeeded, 0 if not. + */ +int protobuf_smp_store_32bit(struct sample *smp, int type, + unsigned char *pos, size_t len, size_t vlen) +{ + if (len < sizeof(uint32_t)) + return 0; + + switch (type) { + case PBUF_T_BINARY: + smp->data.type = SMP_T_BIN; + smp->data.u.str.area = (char *)pos; + smp->data.u.str.data = sizeof(uint32_t); + smp->flags = SMP_F_VOL_TEST; + break; + + case PBUF_T_32BIT_FIXED32: + smp->data.type = SMP_T_SINT; + smp->data.u.sint = pbuf_le32toh(*(uint32_t *)pos); + smp->flags = SMP_F_VOL_TEST; + break; + + case PBUF_T_32BIT_SFIXED32: + smp->data.type = SMP_T_SINT; + smp->data.u.sint = (int32_t)pbuf_le32toh(*(uint32_t *)pos); + smp->flags = SMP_F_VOL_TEST; + break; + + case PBUF_T_32BIT_FLOAT: + smp->data.type = SMP_T_SINT; + smp->data.u.sint = pbuf_le32toh(*(float *)pos); + smp->flags = SMP_F_VOL_TEST; + break; + + default: + return 0; + } + + return 1; +} + +/* + * Lookup for a protocol buffers field whose parameters are provided by <arg_p> + * first argument in the buffer with <pos> as address and <len> as length address. + * If found, store its value depending on the type of storage to use provided by <arg_p> + * second argument and return 1, 0 if not. + */ +static inline int protobuf_field_lookup(const struct arg *arg_p, struct sample *smp, + unsigned char **pos, size_t *len) +{ + unsigned int *fid; + size_t fid_sz; + int type; + uint64_t elen; + int field; + + fid = arg_p[0].data.fid.ids; + fid_sz = arg_p[0].data.fid.sz; + type = arg_p[1].data.sint; + + /* Length of the length-delimited messages if any. */ + elen = 0; + field = 0; + + while (field < fid_sz) { + int found; + uint64_t key, sleft; + struct protobuf_parser_def *pbuf_parser = NULL; + unsigned int wire_type, field_number; + + if ((ssize_t)*len <= 0) + return 0; + + /* Remaining bytes saving. */ + sleft = *len; + + /* Key decoding */ + if (!protobuf_decode_varint(&key, pos, len)) + return 0; + + wire_type = key & 0x7; + field_number = key >> 3; + found = field_number == fid[field]; + + /* Skip the data if the current field does not match. */ + switch (wire_type) { + case PBUF_TYPE_VARINT: + case PBUF_TYPE_32BIT: + case PBUF_TYPE_64BIT: + pbuf_parser = &protobuf_parser_defs[wire_type]; + if (!found && !pbuf_parser->skip(pos, len, 0)) + return 0; + break; + + case PBUF_TYPE_LENGTH_DELIMITED: + /* Decode the length of this length-delimited field. */ + if (!protobuf_decode_varint(&elen, pos, len) || elen > *len) + return 0; + + /* The size of the current field is computed from here to skip + * the bytes used to encode the previous length.* + */ + sleft = *len; + pbuf_parser = &protobuf_parser_defs[wire_type]; + if (!found && !pbuf_parser->skip(pos, len, elen)) + return 0; + break; + + default: + return 0; + } + + /* Store the data if found. Note that <pbuf_parser> is not NULL */ + if (found && field == fid_sz - 1) + return pbuf_parser->smp_store(smp, type, *pos, *len, elen); + + if ((ssize_t)(elen) > 0) + elen -= sleft - *len; + + if (found) { + field++; + } + else if ((ssize_t)elen <= 0) { + field = 0; + } + } + + return 0; +} + +#endif /* _HAPROXY_PROTOBUF_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/protocol-t.h b/include/haproxy/protocol-t.h new file mode 100644 index 0000000..b85f29c --- /dev/null +++ b/include/haproxy/protocol-t.h @@ -0,0 +1,148 @@ +/* + * include/haproxy/protocol-t.h + * This file defines the structures used by generic network protocols. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PROTOCOL_T_H +#define _HAPROXY_PROTOCOL_T_H + +#include <sys/types.h> +#include <sys/socket.h> + +#include <import/ebtree-t.h> +#include <haproxy/api-t.h> + +/* some pointer types referenced below */ +struct listener; +struct receiver; +struct connection; + +/* + * Custom network family for str2sa parsing. Should be ok to do this since + * sa_family_t is standardized as an unsigned integer + */ +#define AF_CUST_EXISTING_FD (AF_MAX + 1) +#define AF_CUST_SOCKPAIR (AF_MAX + 2) +#define AF_CUST_RHTTP_SRV (AF_MAX + 3) +#define AF_CUST_MAX (AF_MAX + 4) + +/* + * Test in case AF_CUST_MAX overflows the sa_family_t (unsigned int) + */ +#if (AF_CUST_MAX < AF_MAX) +# error "Can't build on the target system, AF_CUST_MAX overflow" +#endif + +/* socket-level protocol types, used for protocol selection */ +enum proto_type { + PROTO_TYPE_STREAM, /* streaming protocol (like TCP) */ + PROTO_TYPE_DGRAM, /* datagram protocol (like UDP) */ + PROTO_NUM_TYPES /* must be the last one */ +}; + +/* max length of a protocol name, including trailing zero */ +#define PROTO_NAME_LEN 16 + +/* flags for ->connect() */ +#define CONNECT_HAS_DATA 0x00000001 /* There's data available to be sent */ +#define CONNECT_DELACK_SMART_CONNECT 0x00000002 /* Use a delayed ACK if the backend has tcp-smart-connect */ +#define CONNECT_DELACK_ALWAYS 0x00000004 /* Use a delayed ACK */ +#define CONNECT_CAN_USE_TFO 0x00000008 /* We can use TFO for this connection */ + +/* Flags for protocol->flags */ +#define PROTO_F_REUSEPORT_SUPPORTED 0x00000001 /* SO_REUSEPORT is supported */ +#define PROTO_F_REUSEPORT_TESTED 0x00000002 /* SO_REUSEPORT support was tested */ + +/* protocol families define standard functions acting on a given address family + * for a socket implementation, such as AF_INET/PF_INET for example. + */ +struct proto_fam { + char name[PROTO_NAME_LEN]; /* family name, zero-terminated */ + int sock_domain; /* socket domain, as passed to socket() */ + sa_family_t sock_family; /* socket family, for sockaddr */ + ushort l3_addrlen; /* layer3 address length, used by hashes */ + socklen_t sock_addrlen; /* socket address length, used by bind() */ + /* 4-bytes hole here */ + int (*addrcmp)(const struct sockaddr_storage *, const struct sockaddr_storage *); /* compare addresses (like memcmp) */ + int (*bind)(struct receiver *rx, char **errmsg); /* bind a receiver */ + int (*get_src)(int fd, struct sockaddr *, socklen_t, int dir); /* syscall used to retrieve connection's src addr */ + int (*get_dst)(int fd, struct sockaddr *, socklen_t, int dir); /* syscall used to retrieve connection's dst addr */ + void (*set_port)(struct sockaddr_storage *, int port); /* set the port on the address; NULL if not implemented */ +}; + +/* This structure contains all information needed to easily handle a protocol. + * Its primary goal is to ease listeners maintenance. Specifically, the + * bind() primitive must be used before any fork(). rx_suspend()/rx_resume() + * return >0 on success, 0 if rx stopped, -1 on failure to proceed. rx_* may + * be null if the protocol doesn't provide direct access to the receiver. + */ +struct protocol { + char name[PROTO_NAME_LEN]; /* protocol name, zero-terminated */ + struct proto_fam *fam; /* protocol family */ + int xprt_type; /* transport layer type (PROTO_TYPE_STREAM/PROTO_TYPE_DGRAM) */ + enum proto_type proto_type; /* protocol type at the socket layer (PROTO_TYPE_*) */ + int sock_type; /* socket type, as passed to socket() */ + int sock_prot; /* socket protocol, as passed to socket() */ + + /* functions acting on the listener */ + void (*add)(struct protocol *p, struct listener *l); /* add a listener for this protocol */ + int (*listen)(struct listener *l, char *errmsg, int errlen); /* start a listener */ + void (*enable)(struct listener *l); /* enable receipt of new connections */ + void (*disable)(struct listener *l); /* disable receipt of new connections */ + void (*unbind)(struct listener *l); /* unbind the listener and possibly its receiver */ + int (*suspend)(struct listener *l); /* try to suspend the listener */ + int (*resume)(struct listener *l); /* try to resume a suspended listener */ + struct connection *(*accept_conn)(struct listener *l, int *status); /* accept a new connection */ + + /* functions acting on connections */ + void (*ctrl_init)(struct connection *); /* completes initialization of the connection */ + void (*ctrl_close)(struct connection *); /* completes release of the connection */ + int (*connect)(struct connection *, int flags); /* connect function if any, see below for flags values */ + int (*drain)(struct connection *); /* drain pending data; 0=failed, >0=success */ + int (*check_events)(struct connection *conn, int event_type); /* subscribe to socket events */ + void (*ignore_events)(struct connection *conn, int event_type); /* unsubscribe from socket events */ + int (*get_src)(struct connection *conn, struct sockaddr *, socklen_t); /* retrieve connection's source address; -1=fail */ + int (*get_dst)(struct connection *conn, struct sockaddr *, socklen_t); /* retrieve connection's dest address; -1=fail */ + int (*set_affinity)(struct connection *conn, int new_tid); + + /* functions acting on the receiver */ + int (*rx_suspend)(struct receiver *rx); /* temporarily suspend this receiver for a soft restart */ + int (*rx_resume)(struct receiver *rx); /* try to resume a temporarily suspended receiver */ + void (*rx_enable)(struct receiver *rx); /* enable receiving on the receiver */ + void (*rx_disable)(struct receiver *rx); /* disable receiving on the receiver */ + void (*rx_unbind)(struct receiver *rx); /* unbind the receiver, most often closing the FD */ + int (*rx_listening)(const struct receiver *rx); /* is the receiver listening ? 0=no, >0=OK, <0=unrecoverable */ + + /* default I/O handler */ + void (*default_iocb)(int fd); /* generic I/O handler (typically accept callback) */ + + uint flags; /* flags describing protocol support (PROTO_F_*) */ + uint nb_receivers; /* number of receivers (under proto_lock) */ + struct list receivers; /* list of receivers using this protocol (under proto_lock) */ + struct list list; /* list of registered protocols (under proto_lock) */ +}; + +#endif /* _HAPROXY_PROTOCOL_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/protocol.h b/include/haproxy/protocol.h new file mode 100644 index 0000000..828093d --- /dev/null +++ b/include/haproxy/protocol.h @@ -0,0 +1,111 @@ +/* + * include/haproxy/protocol.h + * This file declares generic protocol management primitives. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PROTOCOL_H +#define _HAPROXY_PROTOCOL_H + +#include <sys/socket.h> +#include <haproxy/protocol-t.h> +#include <haproxy/thread.h> + +/* [AF][sock_dgram][ctrl_dgram] */ +extern struct protocol *__protocol_by_family[AF_CUST_MAX][PROTO_NUM_TYPES][2]; +__decl_thread(extern HA_SPINLOCK_T proto_lock); + +/* Registers the protocol <proto> */ +void protocol_register(struct protocol *proto); + +/* Unregisters the protocol <proto>. Note that all listeners must have + * previously been unbound. + */ +void protocol_unregister(struct protocol *proto); + +/* clears flag <flag> on all protocols. */ +void protocol_clrf_all(uint flag); + +/* sets flag <flag> on all protocols. */ +void protocol_setf_all(uint flag); + +/* Checks if protocol <proto> supports PROTO_F flag <flag>. Returns zero if not, + * non-zero if supported. It may return a cached value from a previous test, + * and may run live tests then update the proto's flags to cache a result. It's + * better to call it only if needed so that it doesn't result in modules being + * loaded in case of a live test. + */ +int protocol_supports_flag(struct protocol *proto, uint flag); + +/* binds all listeners of all registered protocols. Returns a composition + * of ERR_NONE, ERR_RETRYABLE, ERR_FATAL, ERR_ABORT. + */ +int protocol_bind_all(int verbose); + +/* unbinds all listeners of all registered protocols. They are also closed. + * This must be performed before calling exit() in order to get a chance to + * remove file-system based sockets and pipes. + * Returns a composition of ERR_NONE, ERR_RETRYABLE, ERR_FATAL. + */ +int protocol_unbind_all(void); + +/* stops all listeners of all registered protocols. This will normally catch + * every single listener, all protocols included. This is to be used during + * soft_stop() only. It does not return any error. + */ +void protocol_stop_now(void); + +/* pauses all listeners of all registered protocols. This is typically + * used on SIG_TTOU to release all listening sockets for the time needed to + * try to bind a new process. The listeners enter LI_PAUSED. It returns + * ERR_NONE, with ERR_FATAL on failure. + */ +int protocol_pause_all(void); + +/* resumes all listeners of all registered protocols. This is typically used on + * SIG_TTIN to re-enable listening sockets after a new process failed to bind. + * The listeners switch to LI_READY/LI_FULL. It returns ERR_NONE, with ERR_FATAL + * on failure. + */ +int protocol_resume_all(void); + +/* enables all listeners of all registered protocols. This is intended to be + * used after a fork() to enable reading on all file descriptors. Returns a + * composition of ERR_NONE, ERR_RETRYABLE, ERR_FATAL. + */ +int protocol_enable_all(void); + +/* returns the protocol associated to family <family> with proto_type among the + * supported protocol types, and ctrl_type of either SOCK_STREAM or SOCK_DGRAM + * depending on the requested values, or NULL if not found. + */ +static inline struct protocol *protocol_lookup(int family, enum proto_type proto_type, int ctrl_dgram) +{ + if (family >= 0 && family < AF_CUST_MAX) + return __protocol_by_family[family][proto_type][!!ctrl_dgram]; + return NULL; +} + +#endif /* _HAPROXY_PROTOCOL_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/proxy-t.h b/include/haproxy/proxy-t.h new file mode 100644 index 0000000..2f7bf7b --- /dev/null +++ b/include/haproxy/proxy-t.h @@ -0,0 +1,547 @@ +/* + * include/haproxy/proxy-t.h + * This file defines everything related to proxies. + * + * Copyright (C) 2000-2011 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PROXY_T_H +#define _HAPROXY_PROXY_T_H + +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include <import/ebtree-t.h> + +#include <haproxy/api-t.h> +#include <haproxy/arg-t.h> +#include <haproxy/backend-t.h> +#include <haproxy/compression-t.h> +#include <haproxy/counters-t.h> +#include <haproxy/freq_ctr-t.h> +#include <haproxy/obj_type-t.h> +#include <haproxy/queue-t.h> +#include <haproxy/server-t.h> +#include <haproxy/stats-t.h> +#include <haproxy/tcpcheck-t.h> +#include <haproxy/thread-t.h> +#include <haproxy/tools-t.h> +#include <haproxy/uri_auth-t.h> +#include <haproxy/http_ext-t.h> + +/* values for proxy->mode */ +enum pr_mode { + PR_MODE_TCP = 0, + PR_MODE_HTTP, + PR_MODE_CLI, + PR_MODE_SYSLOG, + PR_MODE_PEERS, + PR_MODES +} __attribute__((packed)); + +enum PR_SRV_STATE_FILE { + PR_SRV_STATE_FILE_UNSPEC = 0, + PR_SRV_STATE_FILE_NONE, + PR_SRV_STATE_FILE_GLOBAL, + PR_SRV_STATE_FILE_LOCAL, +}; + + +/* flag values for proxy->cap. This is a bitmask of capabilities supported by the proxy */ +#define PR_CAP_NONE 0x0000 +#define PR_CAP_FE 0x0001 +#define PR_CAP_BE 0x0002 +#define PR_CAP_LISTEN (PR_CAP_FE|PR_CAP_BE) +#define PR_CAP_DEF 0x0004 /* defaults section */ +#define PR_CAP_INT 0x0008 /* internal proxy (used by lua engine) */ +#define PR_CAP_LB 0x0010 /* load-balancing capabilities, i.e. listen/frontend/backend proxies */ +#define PR_CAP_HTTPCLIENT 0x0020 /* proxy used for httpclient */ + +/* bits for proxy->options */ +#define PR_O_REDISP 0x00000001 /* allow reconnection to dispatch in case of errors */ +#define PR_O_TRANSP 0x00000002 /* transparent mode : use original DEST as dispatch */ + +/* HTTP server-side reuse */ +#define PR_O_REUSE_NEVR 0x00000000 /* never reuse a shared connection */ +#define PR_O_REUSE_SAFE 0x00000004 /* only reuse a shared connection when it's safe to do so */ +#define PR_O_REUSE_AGGR 0x00000008 /* aggressively reuse a shared connection */ +#define PR_O_REUSE_ALWS 0x0000000C /* always reuse a shared connection */ +#define PR_O_REUSE_MASK 0x0000000C /* mask to retrieve shared connection preferences */ + +#define PR_O_IDLE_CLOSE_RESP 0x00000010 /* avoid closing idle connections during a soft stop */ +#define PR_O_PREF_LAST 0x00000020 /* prefer last server */ +#define PR_O_DISPATCH 0x00000040 /* use dispatch mode */ +#define PR_O_FORCED_ID 0x00000080 /* proxy's ID was forced in the configuration */ +/* unused: 0x00000100 */ +#define PR_O_IGNORE_PRB 0x00000200 /* ignore empty requests (aborts and timeouts) */ +#define PR_O_NULLNOLOG 0x00000400 /* a connect without request will not be logged */ +#define PR_O_WREQ_BODY 0x00000800 /* always wait for the HTTP request body */ +#define PR_O_HTTP_UPG 0x00001000 /* Contain a "switch-mode http" tcp-request rule */ +/* unused: 0x00002000 */ +#define PR_O_PERSIST 0x00004000 /* server persistence stays effective even when server is down */ +#define PR_O_LOGASAP 0x00008000 /* log as soon as possible, without waiting for the stream to complete */ +#define PR_O_ERR_LOGFMT 0x00010000 /* use log-format for connection error message */ +#define PR_O_CHK_CACHE 0x00020000 /* require examination of cacheability of the 'set-cookie' field */ +#define PR_O_TCP_CLI_KA 0x00040000 /* enable TCP keep-alive on client-side streams */ +#define PR_O_TCP_SRV_KA 0x00080000 /* enable TCP keep-alive on server-side streams */ +#define PR_O_USE_ALL_BK 0x00100000 /* load-balance between backup servers */ +/* unused: 0x00200000 */ +#define PR_O_TCP_NOLING 0x00400000 /* disable lingering on client and server connections */ +#define PR_O_ABRT_CLOSE 0x00800000 /* immediately abort request when client closes */ + +#define PR_O_HTTP_KAL 0x00000000 /* HTTP keep-alive mode (http-keep-alive) */ +#define PR_O_HTTP_CLO 0x01000000 /* HTTP close mode (httpclose) */ +#define PR_O_HTTP_SCL 0x02000000 /* HTTP server close mode (http-server-close) */ +#define PR_O_HTTP_MODE 0x03000000 /* MASK to retrieve the HTTP mode */ +/* unused: 0x04000000 */ + +#define PR_O_TCPCHK_SSL 0x08000000 /* at least one TCPCHECK connect rule requires SSL */ +#define PR_O_CONTSTATS 0x10000000 /* continuous counters */ +/* unused: 0x20000000 */ +#define PR_O_DISABLE404 0x40000000 /* Disable a server on a 404 response to a health-check */ +/* unused: 0x80000000 */ + +/* bits for proxy->options2 */ +#define PR_O2_SPLIC_REQ 0x00000001 /* transfer requests using linux kernel's splice() */ +#define PR_O2_SPLIC_RTR 0x00000002 /* transfer responses using linux kernel's splice() */ +#define PR_O2_SPLIC_AUT 0x00000004 /* automatically use linux kernel's splice() */ +#define PR_O2_SPLIC_ANY (PR_O2_SPLIC_REQ|PR_O2_SPLIC_RTR|PR_O2_SPLIC_AUT) +#define PR_O2_REQBUG_OK 0x00000008 /* let buggy requests pass through */ +#define PR_O2_RSPBUG_OK 0x00000010 /* let buggy responses pass through */ +#define PR_O2_NOLOGNORM 0x00000020 /* don't log normal traffic, only errors and retries */ +#define PR_O2_LOGERRORS 0x00000040 /* log errors and retries at level LOG_ERR */ +#define PR_O2_SMARTACC 0x00000080 /* don't immediately ACK request after accept */ +#define PR_O2_SMARTCON 0x00000100 /* don't immediately send empty ACK after connect */ +#define PR_O2_RDPC_PRST 0x00000200 /* Activate rdp cookie analyser */ +#define PR_O2_CLFLOG 0x00000400 /* log into clf format */ +#define PR_O2_LOGHCHKS 0x00000800 /* log health checks */ +#define PR_O2_INDEPSTR 0x00001000 /* independent streams, don't update rex on write */ +#define PR_O2_SOCKSTAT 0x00002000 /* collect & provide separate statistics for sockets */ + +#define PR_O2_H1_ADJ_BUGCLI 0x00008000 /* adjust the case of h1 headers of the response for bogus clients */ +#define PR_O2_H1_ADJ_BUGSRV 0x00004000 /* adjust the case of h1 headers of the request for bogus servers */ +#define PR_O2_NO_H2_UPGRADE 0x00010000 /* disable the implicit H2 upgrades from H1 client connections */ + +#define PR_O2_NODELAY 0x00020000 /* fully interactive mode, never delay outgoing data */ +#define PR_O2_USE_PXHDR 0x00040000 /* use Proxy-Connection for proxy requests */ +#define PR_O2_CHK_SNDST 0x00080000 /* send the state of each server along with HTTP health checks */ + +#define PR_O2_SRC_ADDR 0x00100000 /* get the source ip and port for logs */ + +#define PR_O2_FAKE_KA 0x00200000 /* pretend we do keep-alive with server even though we close */ + +#define PR_O2_RSTRICT_REQ_HDR_NAMES_BLK 0x00400000 /* reject request with header names containing chars outside of [0-9a-zA-Z-] charset */ +#define PR_O2_RSTRICT_REQ_HDR_NAMES_DEL 0x00800000 /* remove request header names containing chars outside of [0-9a-zA-Z-] charset */ +#define PR_O2_RSTRICT_REQ_HDR_NAMES_NOOP 0x01000000 /* preserve request header names containing chars outside of [0-9a-zA-Z-] charset */ +#define PR_O2_RSTRICT_REQ_HDR_NAMES_MASK 0x01c00000 /* mask for restrict-http-header-names option */ +/* unused : 0x0000000..0x80000000 */ + +/* server health checks */ +#define PR_O2_CHK_NONE 0x00000000 /* no L7 health checks configured (TCP by default) */ +#define PR_O2_TCPCHK_CHK 0x90000000 /* use TCPCHK check for server health */ +#define PR_O2_EXT_CHK 0xA0000000 /* use external command for server health */ +/* unused: 0xB0000000 to 0xF000000, reserved for health checks */ +#define PR_O2_CHK_ANY 0xF0000000 /* Mask to cover any check */ +/* end of proxy->options2 */ + +/* Cookie settings for pr->ck_opts */ +#define PR_CK_RW 0x00000001 /* rewrite all direct cookies with the right serverid */ +#define PR_CK_IND 0x00000002 /* keep only indirect cookies */ +#define PR_CK_INS 0x00000004 /* insert cookies when not accessing a server directly */ +#define PR_CK_PFX 0x00000008 /* rewrite all cookies by prefixing the right serverid */ +#define PR_CK_ANY (PR_CK_RW | PR_CK_IND | PR_CK_INS | PR_CK_PFX) +#define PR_CK_NOC 0x00000010 /* add a 'Cache-control' header with the cookie */ +#define PR_CK_POST 0x00000020 /* don't insert cookies for requests other than a POST */ +#define PR_CK_PSV 0x00000040 /* cookie ... preserve */ +#define PR_CK_HTTPONLY 0x00000080 /* emit the "HttpOnly" attribute */ +#define PR_CK_SECURE 0x00000100 /* emit the "Secure" attribute */ +#define PR_CK_DYNAMIC 0x00000200 /* create dynamic cookies for each server */ + +/* bits for sticking rules */ +#define STK_IS_MATCH 0x00000001 /* match on request fetch */ +#define STK_IS_STORE 0x00000002 /* store on request fetch */ +#define STK_ON_RSP 0x00000004 /* store on response fetch */ + +/* diff bits for proxy_find_best_match */ +#define PR_FBM_MISMATCH_ID 0x01 +#define PR_FBM_MISMATCH_NAME 0x02 +#define PR_FBM_MISMATCH_PROXYTYPE 0x04 + +/* Bits for the different retry causes */ +#define PR_RE_CONN_FAILED 0x00000001 /* Retry if we failed to connect */ +#define PR_RE_DISCONNECTED 0x00000002 /* Retry if we got disconnected with no answer */ +#define PR_RE_TIMEOUT 0x00000004 /* Retry if we got a server timeout before we got any data */ +#define PR_RE_401 0x00000008 /* Retry if we got a 401 */ +#define PR_RE_403 0x00000010 /* Retry if we got a 403 */ +#define PR_RE_404 0x00000020 /* Retry if we got a 404 */ +#define PR_RE_408 0x00000040 /* Retry if we got a 408 */ +#define PR_RE_425 0x00000080 /* Retry if we got a 425 */ +#define PR_RE_500 0x00000100 /* Retry if we got a 500 */ +#define PR_RE_501 0x00000200 /* Retry if we got a 501 */ +#define PR_RE_502 0x00000400 /* Retry if we got a 502 */ +#define PR_RE_503 0x00000800 /* Retry if we got a 503 */ +#define PR_RE_504 0x00001000 /* Retry if we got a 504 */ +#define PR_RE_STATUS_MASK (PR_RE_401 | PR_RE_403 | PR_RE_404 | \ + PR_RE_408 | PR_RE_425 | PR_RE_500 | \ + PR_RE_501 | PR_RE_502 | PR_RE_503 | \ + PR_RE_504) +/* 0x00000800, 0x00001000, 0x00002000, 0x00004000 and 0x00008000 unused, + * reserved for eventual future status codes + */ +#define PR_RE_EARLY_ERROR 0x00010000 /* Retry if we failed at sending early data */ +#define PR_RE_JUNK_REQUEST 0x00020000 /* We received an incomplete or garbage response */ + +/* Proxy flags */ +#define PR_FL_DISABLED 0x01 /* The proxy was disabled in the configuration (not at runtime) */ +#define PR_FL_STOPPED 0x02 /* The proxy was stopped */ +#define PR_FL_READY 0x04 /* The proxy is ready to be used (initialized and configured) */ +#define PR_FL_EXPLICIT_REF 0x08 /* The default proxy is explicitly referenced by another proxy */ +#define PR_FL_IMPLICIT_REF 0x10 /* The default proxy is implicitly referenced by another proxy */ +#define PR_FL_PAUSED 0x20 /* The proxy was paused at run time (reversible) */ + +struct stream; + +struct http_snapshot { + unsigned int sid; /* ID of the faulty stream */ + unsigned int state; /* message state before the error (when saved) */ + unsigned int b_flags; /* buffer flags */ + unsigned int s_flags; /* stream flags */ + + unsigned int t_flags; /* transaction flags */ + unsigned int m_flags; /* message flags */ + unsigned long long m_clen; /* chunk len for this message */ + unsigned long long m_blen; /* body len for this message */ +}; + +struct h1_snapshot { + unsigned int state; /* H1 message state when the error occurred */ + unsigned int c_flags; /* H1 connection flags */ + unsigned int s_flags; /* H1 stream flags */ + unsigned int m_flags; /* H1 message flags */ + unsigned long long m_clen; /* chunk len for this message */ + unsigned long long m_blen; /* body len for this message */ +}; + +union error_snapshot_ctx { + struct http_snapshot http; + struct h1_snapshot h1; +}; + +struct error_snapshot { + /**** common part ****/ + struct timeval when; /* date of this event, (tv_sec == 0) means "never" */ + /* @16 */ + void (*show)(struct buffer *, const struct error_snapshot *); /* dump function */ + unsigned long long buf_ofs; /* relative position of the buffer's input inside its container */ + /* @32 */ + unsigned int buf_out; /* pending output bytes _before_ the buffer's input (0..buf->data-1) */ + unsigned int buf_len; /* original length of the last invalid request/response (0..buf->data-1-buf_out) */ + unsigned int buf_err; /* buffer-relative position where the error was detected (0..len-1) */ + unsigned int buf_wrap; /* buffer-relative position where the buffer is expected to wrap (1..buf_size) */ + /* @48 */ + struct proxy *oe; /* other end = frontend or backend involved */ + struct server *srv; /* server associated with the error (or NULL) */ + /* @64 */ + unsigned int ev_id; /* event number (counter incremented for each capture) */ + /* @68: 4 bytes hole here */ + struct sockaddr_storage src; /* client's address */ + + /**** protocol-specific part ****/ + union error_snapshot_ctx ctx; + char buf[VAR_ARRAY]; /* copy of the beginning of the message for bufsize bytes */ +}; + +struct proxy { + enum obj_type obj_type; /* object type == OBJ_TYPE_PROXY */ + char flags; /* bit field PR_FL_* */ + enum pr_mode mode; /* mode = PR_MODE_TCP, PR_MODE_HTTP, ... */ + char cap; /* supported capabilities (PR_CAP_*) */ + unsigned int maxconn; /* max # of active streams on the frontend */ + + int options; /* PR_O_REDISP, PR_O_TRANSP, ... */ + int options2; /* PR_O2_* */ + unsigned int ck_opts; /* PR_CK_* (cookie options) */ + unsigned int fe_req_ana, be_req_ana; /* bitmap of common request protocol analysers for the frontend and backend */ + unsigned int fe_rsp_ana, be_rsp_ana; /* bitmap of common response protocol analysers for the frontend and backend */ + unsigned int http_needed; /* non-null if HTTP analyser may be used */ + union { + struct proxy *be; /* default backend, or NULL if none set */ + char *name; /* default backend name during config parse */ + } defbe; + struct proxy *defpx; /* default proxy used to init this one (may be NULL) */ + struct list acl; /* ACL declared on this proxy */ + struct list http_req_rules; /* HTTP request rules: allow/deny/... */ + struct list http_res_rules; /* HTTP response rules: allow/deny/... */ + struct list http_after_res_rules; /* HTTP final response rules: set-header/del-header/... */ + struct list redirect_rules; /* content redirecting rules (chained) */ + struct list switching_rules; /* content switching rules (chained) */ + struct list persist_rules; /* 'force-persist' and 'ignore-persist' rules (chained) */ + struct list sticking_rules; /* content sticking rules (chained) */ + struct list storersp_rules; /* content store response rules (chained) */ + struct list server_rules; /* server switching rules (chained) */ + struct { /* TCP request processing */ + unsigned int inspect_delay; /* inspection delay */ + struct list inspect_rules; /* inspection rules */ + struct list l4_rules; /* layer4 rules */ + struct list l5_rules; /* layer5 rules */ + } tcp_req; + struct { /* TCP request processing */ + unsigned int inspect_delay; /* inspection delay */ + struct list inspect_rules; /* inspection rules */ + } tcp_rep; + struct server *srv, defsrv; /* known servers; default server configuration */ + struct lbprm lbprm; /* load-balancing parameters */ + int srv_act, srv_bck; /* # of servers eligible for LB (UP|!checked) AND (enabled+weight!=0) */ + int served; /* # of active sessions currently being served */ + int cookie_len; /* strlen(cookie_name), computed only once */ + char *cookie_domain; /* domain used to insert the cookie */ + char *cookie_name; /* name of the cookie to look for */ + char *cookie_attrs; /* list of attributes to add to the cookie */ + char *dyncookie_key; /* Secret key used to generate dynamic persistent cookies */ + unsigned int cookie_maxidle; /* max idle time for this cookie */ + unsigned int cookie_maxlife; /* max life time for this cookie */ + char *rdp_cookie_name; /* name of the RDP cookie to look for */ + char *capture_name; /* beginning of the name of the cookie to capture */ + int rdp_cookie_len; /* strlen(rdp_cookie_name), computed only once */ + int capture_namelen; /* length of the cookie name to match */ + struct uri_auth *uri_auth; /* if non-NULL, the (list of) per-URI authentications */ + int capture_len; /* length of the string to be captured */ + int max_out_conns; /* Max number of idling connections we keep for a session */ + int max_ka_queue; /* 1+maximum requests in queue accepted for reusing a K-A conn (0=none) */ + int clitcpka_cnt; /* The maximum number of keepalive probes TCP should send before dropping the connection. (client side) */ + int clitcpka_idle; /* The time (in seconds) the connection needs to remain idle before TCP starts sending keepalive probes. (client side) */ + int clitcpka_intvl; /* The time (in seconds) between individual keepalive probes. (client side) */ + int srvtcpka_cnt; /* The maximum number of keepalive probes TCP should send before dropping the connection. (server side) */ + int srvtcpka_idle; /* The time (in seconds) the connection needs to remain idle before TCP starts sending keepalive probes. (server side) */ + int srvtcpka_intvl; /* The time (in seconds) between individual keepalive probes. (server side) */ + struct ist monitor_uri; /* a special URI to which we respond with HTTP/200 OK */ + struct list mon_fail_cond; /* list of conditions to fail monitoring requests (chained) */ + struct { /* WARNING! check proxy_reset_timeouts() in proxy.h !!! */ + int client; /* client I/O timeout (in ticks) */ + int tarpit; /* tarpit timeout, defaults to connect if unspecified */ + int queue; /* queue timeout, defaults to connect if unspecified */ + int connect; /* connect timeout (in ticks) */ + int server; /* server I/O timeout (in ticks) */ + int client_hs; /* maximum time for client handshake completion */ + int httpreq; /* maximum time for complete HTTP request */ + int httpka; /* maximum time for a new HTTP request when using keep-alive */ + int check; /* maximum time for complete check */ + int tunnel; /* I/O timeout to use in tunnel mode (in ticks) */ + int clientfin; /* timeout to apply to client half-closed connections */ + int serverfin; /* timeout to apply to server half-closed connections */ + } timeout; + __decl_thread(HA_RWLOCK_T lock); /* may be taken under the server's lock */ + + char *id, *desc; /* proxy id (name) and description */ + struct queue queue; /* queued requests (pendconns) */ + int totpend; /* total number of pending connections on this instance (for stats) */ + unsigned int feconn, beconn; /* # of active frontend and backends streams */ + struct freq_ctr fe_req_per_sec; /* HTTP requests per second on the frontend */ + struct freq_ctr fe_conn_per_sec; /* received connections per second on the frontend */ + struct freq_ctr fe_sess_per_sec; /* accepted sessions per second on the frontend (after tcp rules) */ + struct freq_ctr be_sess_per_sec; /* sessions per second on the backend */ + unsigned int fe_sps_lim; /* limit on new sessions per second on the frontend */ + unsigned int fullconn; /* #conns on backend above which servers are used at full load */ + unsigned int tot_fe_maxconn; /* #maxconn of frontends linked to that backend, it is used to compute fullconn */ + struct ist server_id_hdr_name; /* the header to use to send the server id (name) */ + int conn_retries; /* maximum number of connect retries */ + unsigned int retry_type; /* Type of retry allowed */ + int redispatch_after; /* number of retries before redispatch */ + unsigned down_trans; /* up-down transitions */ + unsigned down_time; /* total time the proxy was down */ + time_t last_change; /* last time, when the state was changed */ + int (*accept)(struct stream *s); /* application layer's accept() */ + struct conn_src conn_src; /* connection source settings */ + enum obj_type *default_target; /* default target to use for accepted streams or NULL */ + struct proxy *next; + struct proxy *next_stkt_ref; /* Link to the list of proxies which refer to the same stick-table. */ + + struct list loggers; /* one per 'log' directive */ + struct list logformat; /* log_format linked list */ + struct list logformat_sd; /* log_format linked list for the RFC5424 structured-data part */ + struct list logformat_error; /* log_format linked list used in case of connection error on the frontend */ + struct buffer log_tag; /* override default syslog tag */ + struct ist header_unique_id; /* unique-id header */ + struct list format_unique_id; /* unique-id format */ + int to_log; /* things to be logged (LW_*) */ + int nb_req_cap, nb_rsp_cap; /* # of headers to be captured */ + struct cap_hdr *req_cap; /* chained list of request headers to be captured */ + struct cap_hdr *rsp_cap; /* chained list of response headers to be captured */ + struct pool_head *req_cap_pool, /* pools of pre-allocated char ** used to build the streams */ + *rsp_cap_pool; + struct be_counters be_counters; /* backend statistics counters */ + struct fe_counters fe_counters; /* frontend statistics counters */ + + struct mt_list listener_queue; /* list of the temporarily limited listeners because of lack of a proxy resource */ + struct stktable *table; /* table for storing sticking streams */ + + struct task *task; /* the associated task, mandatory to manage rate limiting, stopping and resource shortage, NULL if disabled */ + struct tcpcheck_rules tcpcheck_rules; /* tcp-check send / expect rules */ + char *check_command; /* Command to use for external agent checks */ + char *check_path; /* PATH environment to use for external agent checks */ + struct http_reply *replies[HTTP_ERR_SIZE]; /* HTTP replies for known errors */ + unsigned int log_count; /* number of logs produced by the frontend */ + int uuid; /* universally unique proxy ID, used for SNMP */ + unsigned int backlog; /* force the frontend's listen backlog */ + unsigned int li_all; /* total number of listeners attached to this proxy */ + unsigned int li_paused; /* total number of listeners paused (LI_PAUSED) */ + unsigned int li_bound; /* total number of listeners ready (LI_LISTEN) */ + unsigned int li_ready; /* total number of listeners ready (>=LI_READY) */ + unsigned int li_suspended; /* total number of listeners suspended (could be paused or unbound) */ + + /* warning: these structs are huge, keep them at the bottom */ + struct sockaddr_storage dispatch_addr; /* the default address to connect to */ + struct error_snapshot *invalid_req, *invalid_rep; /* captures of last errors */ + + /* used only during configuration parsing */ + int no_options; /* PR_O_REDISP, PR_O_TRANSP, ... */ + int no_options2; /* PR_O2_* */ + + struct { + char *file; /* file where the section appears */ + struct eb32_node id; /* place in the tree of used IDs */ + int line; /* line where the section appears */ + struct eb_root used_listener_id;/* list of listener IDs in use */ + struct eb_root used_server_id; /* list of server IDs in use */ + struct eb_root used_server_name; /* list of server names in use */ + struct list bind; /* list of bind settings */ + struct list listeners; /* list of listeners belonging to this frontend */ + struct list errors; /* list of all custom error files */ + struct arg_list args; /* sample arg list that need to be resolved */ + unsigned int refcount; /* refcount on this proxy (only used for default proxy for now) */ + struct ebpt_node by_name; /* proxies are stored sorted by name here */ + char *logformat_string; /* log format string */ + char *lfs_file; /* file name where the logformat string appears (strdup) */ + int lfs_line; /* file name where the logformat string appears */ + int uif_line; /* file name where the unique-id-format string appears */ + char *uif_file; /* file name where the unique-id-format string appears (strdup) */ + char *uniqueid_format_string; /* unique-id format string */ + char *logformat_sd_string; /* log format string for the RFC5424 structured-data part */ + char *lfsd_file; /* file name where the structured-data logformat string for RFC5424 appears (strdup) */ + int lfsd_line; /* file name where the structured-data logformat string for RFC5424 appears */ + char *error_logformat_string; + char *elfs_file; + int elfs_line; + } conf; /* config information */ + struct http_ext *http_ext; /* http ext options */ + struct eb_root used_server_addr; /* list of server addresses in use */ + void *parent; /* parent of the proxy when applicable */ + struct comp *comp; /* http compression */ + + struct { + union { + struct mailers *m; /* Mailer to send email alerts via */ + char *name; + } mailers; + char *from; /* Address to send email alerts from */ + char *to; /* Address(es) to send email alerts to */ + char *myhostname; /* Identity to use in HELO command sent to mailer */ + int level; /* Maximum syslog level of messages to send + * email alerts for */ + int set; /* True if email_alert settings are present */ + struct email_alertq *queues; /* per-mailer alerts queues */ + } email_alert; + + int load_server_state_from_file; /* location of the file containing server state. + * flag PR_SRV_STATE_FILE_* */ + char *server_state_file_name; /* used when load_server_state_from_file is set to + * PR_SRV_STATE_FILE_LOCAL. Give a specific file name for + * this backend. If not specified or void, then the backend + * name is used + */ + struct list filter_configs; /* list of the filters that are declared on this proxy */ + + EXTRA_COUNTERS(extra_counters_fe); + EXTRA_COUNTERS(extra_counters_be); +}; + +struct switching_rule { + struct list list; /* list linked to from the proxy */ + struct acl_cond *cond; /* acl condition to meet */ + int dynamic; /* this is a dynamic rule using the logformat expression */ + union { + struct proxy *backend; /* target backend */ + char *name; /* target backend name during config parsing */ + struct list expr; /* logformat expression to use for dynamic rules */ + } be; + char *file; + int line; +}; + +struct server_rule { + struct list list; /* list linked to from the proxy */ + struct acl_cond *cond; /* acl condition to meet */ + int dynamic; + union { + struct server *ptr; /* target server */ + char *name; /* target server name during config parsing */ + } srv; + struct list expr; /* logformat expression to use for dynamic rules */ + char *file; + int line; +}; + +struct persist_rule { + struct list list; /* list linked to from the proxy */ + struct acl_cond *cond; /* acl condition to meet */ + int type; +}; + +struct sticking_rule { + struct list list; /* list linked to from the proxy */ + struct acl_cond *cond; /* acl condition to meet */ + struct sample_expr *expr; /* fetch expr to fetch key */ + int flags; /* STK_* */ + union { + struct stktable *t; /* target table */ + char *name; /* target table name during config parsing */ + } table; +}; + + +struct redirect_rule { + struct list list; /* list linked to from the proxy */ + struct acl_cond *cond; /* acl condition to meet */ + int type; + int rdr_len; + char *rdr_str; + struct list rdr_fmt; + int code; + unsigned int flags; + int cookie_len; + char *cookie_str; +}; + +/* some of the most common options which are also the easiest to handle */ +struct cfg_opt { + const char *name; + unsigned int val; + unsigned int cap; + unsigned int checks; + unsigned int mode; +}; + +#endif /* _HAPROXY_PROXY_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/proxy.h b/include/haproxy/proxy.h new file mode 100644 index 0000000..efdfa21 --- /dev/null +++ b/include/haproxy/proxy.h @@ -0,0 +1,264 @@ +/* + * include/haproxy/proxy.h + * This file defines function prototypes for proxy management. + * + * Copyright (C) 2000-2011 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_PROXY_H +#define _HAPROXY_PROXY_H + +#include <haproxy/api.h> +#include <haproxy/applet-t.h> +#include <haproxy/freq_ctr.h> +#include <haproxy/list.h> +#include <haproxy/listener-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/server-t.h> +#include <haproxy/ticks.h> +#include <haproxy/thread.h> + +extern struct proxy *proxies_list; +extern struct eb_root used_proxy_id; /* list of proxy IDs in use */ +extern unsigned int error_snapshot_id; /* global ID assigned to each error then incremented */ +extern struct eb_root proxy_by_name; /* tree of proxies sorted by name */ + +extern const struct cfg_opt cfg_opts[]; +extern const struct cfg_opt cfg_opts2[]; + +struct task *manage_proxy(struct task *t, void *context, unsigned int state); +void proxy_cond_pause(struct proxy *p); +void proxy_cond_resume(struct proxy *p); +void proxy_cond_disable(struct proxy *p); +void soft_stop(void); +int pause_proxy(struct proxy *p); +int resume_proxy(struct proxy *p); +void stop_proxy(struct proxy *p); +int stream_set_backend(struct stream *s, struct proxy *be); + +void free_proxy(struct proxy *p); +const char *proxy_cap_str(int cap); +const char *proxy_mode_str(int mode); +const char *proxy_find_best_option(const char *word, const char **extra); +void proxy_store_name(struct proxy *px); +struct proxy *proxy_find_by_id(int id, int cap, int table); +struct proxy *proxy_find_by_name(const char *name, int cap, int table); +struct proxy *proxy_find_best_match(int cap, const char *name, int id, int *diff); +struct server *findserver(const struct proxy *px, const char *name); +struct server *findserver_unique_id(const struct proxy *px, int puid, uint32_t rid); +struct server *findserver_unique_name(const struct proxy *px, const char *name, uint32_t rid); +int proxy_cfg_ensure_no_http(struct proxy *curproxy); +int proxy_cfg_ensure_no_log(struct proxy *curproxy); +void init_new_proxy(struct proxy *p); +void proxy_preset_defaults(struct proxy *defproxy); +void proxy_free_defaults(struct proxy *defproxy); +void proxy_destroy_defaults(struct proxy *px); +void proxy_destroy_all_unref_defaults(void); +void proxy_ref_defaults(struct proxy *px, struct proxy *defpx); +void proxy_unref_defaults(struct proxy *px); +struct proxy *alloc_new_proxy(const char *name, unsigned int cap, + char **errmsg); +struct proxy *parse_new_proxy(const char *name, unsigned int cap, + const char *file, int linenum, + const struct proxy *defproxy); +void proxy_capture_error(struct proxy *proxy, int is_back, + struct proxy *other_end, enum obj_type *target, + const struct session *sess, + const struct buffer *buf, long buf_ofs, + unsigned int buf_out, unsigned int err_pos, + const union error_snapshot_ctx *ctx, + void (*show)(struct buffer *, const struct error_snapshot *)); +void proxy_adjust_all_maxconn(void); +struct proxy *cli_find_frontend(struct appctx *appctx, const char *arg); +struct proxy *cli_find_frontend(struct appctx *appctx, const char *arg); +int resolve_stick_rule(struct proxy *curproxy, struct sticking_rule *mrule); +void free_stick_rules(struct list *rules); +void free_server_rules(struct list *srules); + +/* + * This function returns a string containing the type of the proxy in a format + * suitable for error messages, from its capabilities. + */ +static inline const char *proxy_type_str(struct proxy *proxy) +{ + if (proxy->mode == PR_MODE_PEERS) + return "peers section"; + return proxy_cap_str(proxy->cap); +} + +/* Find the frontend having name <name>. The name may also start with a '#' to + * reference a numeric id. NULL is returned if not found. + */ +static inline struct proxy *proxy_fe_by_name(const char *name) +{ + return proxy_find_by_name(name, PR_CAP_FE, 0); +} + +/* Find the backend having name <name>. The name may also start with a '#' to + * reference a numeric id. NULL is returned if not found. + */ +static inline struct proxy *proxy_be_by_name(const char *name) +{ + return proxy_find_by_name(name, PR_CAP_BE, 0); +} + +/* this function initializes all timeouts for proxy p */ +static inline void proxy_reset_timeouts(struct proxy *proxy) +{ + proxy->timeout.client = TICK_ETERNITY; + proxy->timeout.tarpit = TICK_ETERNITY; + proxy->timeout.queue = TICK_ETERNITY; + proxy->timeout.connect = TICK_ETERNITY; + proxy->timeout.server = TICK_ETERNITY; + proxy->timeout.httpreq = TICK_ETERNITY; + proxy->timeout.check = TICK_ETERNITY; + proxy->timeout.tunnel = TICK_ETERNITY; +} + +/* increase the number of cumulated connections received on the designated frontend */ +static inline void proxy_inc_fe_conn_ctr(struct listener *l, struct proxy *fe) +{ + _HA_ATOMIC_INC(&fe->fe_counters.cum_conn); + if (l && l->counters) + _HA_ATOMIC_INC(&l->counters->cum_conn); + HA_ATOMIC_UPDATE_MAX(&fe->fe_counters.cps_max, + update_freq_ctr(&fe->fe_conn_per_sec, 1)); +} + +/* increase the number of cumulated connections accepted by the designated frontend */ +static inline void proxy_inc_fe_sess_ctr(struct listener *l, struct proxy *fe) +{ + + _HA_ATOMIC_INC(&fe->fe_counters.cum_sess); + if (l && l->counters) + _HA_ATOMIC_INC(&l->counters->cum_sess); + HA_ATOMIC_UPDATE_MAX(&fe->fe_counters.sps_max, + update_freq_ctr(&fe->fe_sess_per_sec, 1)); +} + +/* increase the number of cumulated HTTP sessions on the designated frontend. + * <http_ver> must be the HTTP version for such requests. + */ +static inline void proxy_inc_fe_cum_sess_ver_ctr(struct listener *l, struct proxy *fe, + unsigned int http_ver) +{ + if (http_ver == 0 || + http_ver > sizeof(fe->fe_counters.cum_sess_ver) / sizeof(*fe->fe_counters.cum_sess_ver)) + return; + + _HA_ATOMIC_INC(&fe->fe_counters.cum_sess_ver[http_ver - 1]); + if (l && l->counters) + _HA_ATOMIC_INC(&l->counters->cum_sess_ver[http_ver - 1]); +} + +/* increase the number of cumulated connections on the designated backend */ +static inline void proxy_inc_be_ctr(struct proxy *be) +{ + _HA_ATOMIC_INC(&be->be_counters.cum_conn); + HA_ATOMIC_UPDATE_MAX(&be->be_counters.sps_max, + update_freq_ctr(&be->be_sess_per_sec, 1)); +} + +/* increase the number of cumulated requests on the designated frontend. + * <http_ver> must be the HTTP version for HTTP request. 0 may be provided + * for others requests. + */ +static inline void proxy_inc_fe_req_ctr(struct listener *l, struct proxy *fe, + unsigned int http_ver) +{ + if (http_ver >= sizeof(fe->fe_counters.p.http.cum_req) / sizeof(*fe->fe_counters.p.http.cum_req)) + return; + + _HA_ATOMIC_INC(&fe->fe_counters.p.http.cum_req[http_ver]); + if (l && l->counters) + _HA_ATOMIC_INC(&l->counters->p.http.cum_req[http_ver]); + HA_ATOMIC_UPDATE_MAX(&fe->fe_counters.p.http.rps_max, + update_freq_ctr(&fe->fe_req_per_sec, 1)); +} + +/* Returns non-zero if the proxy is configured to retry a request if we got that status, 0 otherwise */ +static inline int l7_status_match(struct proxy *p, int status) +{ + /* Just return 0 if no retry was configured for any status */ + if (!(p->retry_type & PR_RE_STATUS_MASK)) + return 0; + + switch (status) { + case 401: + return (p->retry_type & PR_RE_401); + case 403: + return (p->retry_type & PR_RE_403); + case 404: + return (p->retry_type & PR_RE_404); + case 408: + return (p->retry_type & PR_RE_408); + case 425: + return (p->retry_type & PR_RE_425); + case 500: + return (p->retry_type & PR_RE_500); + case 501: + return (p->retry_type & PR_RE_501); + case 502: + return (p->retry_type & PR_RE_502); + case 503: + return (p->retry_type & PR_RE_503); + case 504: + return (p->retry_type & PR_RE_504); + default: + break; + } + return 0; +} + +/* Return 1 if <p> proxy is in <list> list of proxies which are also stick-tables, + * 0 if not. + */ +static inline int in_proxies_list(struct proxy *list, struct proxy *proxy) +{ + struct proxy *p; + + for (p = list; p; p = p->next_stkt_ref) + if (proxy == p) + return 1; + + return 0; +} + +/* Add <bytes> to the global total bytes sent and adjust the send rate. Set + * <splice> if this was sent usigin splicing. + */ +static inline void increment_send_rate(uint64_t bytes, int splice) +{ + /* We count the total bytes sent, and the send rate for 32-byte blocks. + * The reason for the latter is that freq_ctr are limited to 4GB and + * that it's not enough per second. + */ + + if (splice) + _HA_ATOMIC_ADD(&th_ctx->spliced_out_bytes, bytes); + _HA_ATOMIC_ADD(&th_ctx->out_bytes, bytes); + update_freq_ctr(&th_ctx->out_32bps, (bytes + 16) / 32); +} + +#endif /* _HAPROXY_PROXY_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/qmux_http.h b/include/haproxy/qmux_http.h new file mode 100644 index 0000000..a7dbe7c --- /dev/null +++ b/include/haproxy/qmux_http.h @@ -0,0 +1,17 @@ +#ifndef _HAPROXY_MUX_QUIC_HTTP_H +#define _HAPROXY_MUX_QUIC_HTTP_H + +#ifdef USE_QUIC + +#include <haproxy/buf.h> +#include <haproxy/mux_quic.h> + +size_t qcs_http_rcv_buf(struct qcs *qcs, struct buffer *buf, size_t count, + char *fin); +size_t qcs_http_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count, + char *fin); +size_t qcs_http_reset_buf(struct qcs *qcs, struct buffer *buf, size_t count); + +#endif /* USE_QUIC */ + +#endif /* _HAPROXY_MUX_QUIC_HTTP_H */ diff --git a/include/haproxy/qmux_trace.h b/include/haproxy/qmux_trace.h new file mode 100644 index 0000000..49759a3 --- /dev/null +++ b/include/haproxy/qmux_trace.h @@ -0,0 +1,73 @@ +#ifndef _HAPROXY_QMUX_TRACE_H +#define _HAPROXY_QMUX_TRACE_H + +#ifdef USE_QUIC + +#include <haproxy/api-t.h> +#include <haproxy/trace.h> + +extern struct trace_source trace_qmux; +#define TRACE_SOURCE &trace_qmux + +static const struct trace_event qmux_trace_events[] = { +#define QMUX_EV_QCC_NEW (1ULL << 0) + { .mask = QMUX_EV_QCC_NEW , .name = "qcc_new", .desc = "new QUIC connection" }, +#define QMUX_EV_QCC_RECV (1ULL << 1) + { .mask = QMUX_EV_QCC_RECV, .name = "qcc_recv", .desc = "Rx on QUIC connection" }, +#define QMUX_EV_QCC_SEND (1ULL << 2) + { .mask = QMUX_EV_QCC_SEND, .name = "qcc_send", .desc = "Tx on QUIC connection" }, +#define QMUX_EV_QCC_WAKE (1ULL << 3) + { .mask = QMUX_EV_QCC_WAKE, .name = "qcc_wake", .desc = "QUIC connection woken up" }, +#define QMUX_EV_QCC_END (1ULL << 4) + { .mask = QMUX_EV_QCC_END, .name = "qcc_end", .desc = "QUIC connection terminated" }, +#define QMUX_EV_QCC_NQCS (1ULL << 5) + { .mask = QMUX_EV_QCC_NQCS, .name = "qcc_no_qcs", .desc = "QUIC stream not found" }, +#define QMUX_EV_QCS_NEW (1ULL << 6) + { .mask = QMUX_EV_QCS_NEW, .name = "qcs_new", .desc = "new QUIC stream" }, +#define QMUX_EV_QCS_RECV (1ULL << 7) + { .mask = QMUX_EV_QCS_RECV, .name = "qcs_recv", .desc = "Rx on QUIC stream" }, +#define QMUX_EV_QCS_SEND (1ULL << 8) + { .mask = QMUX_EV_QCS_SEND, .name = "qcs_send", .desc = "Tx on QUIC stream" }, +#define QMUX_EV_QCS_END (1ULL << 9) + { .mask = QMUX_EV_QCS_END, .name = "qcs_end", .desc = "QUIC stream terminated" }, +#define QMUX_EV_STRM_RECV (1ULL << 10) + { .mask = QMUX_EV_STRM_RECV, .name = "strm_recv", .desc = "receiving data for stream" }, +#define QMUX_EV_STRM_SEND (1ULL << 11) + { .mask = QMUX_EV_STRM_SEND, .name = "strm_send", .desc = "sending data for stream" }, +#define QMUX_EV_STRM_WAKE (1ULL << 12) + { .mask = QMUX_EV_STRM_WAKE, .name = "strm_wake", .desc = "stream woken up" }, +#define QMUX_EV_STRM_SHUT (1ULL << 13) + { .mask = QMUX_EV_STRM_SHUT, .name = "strm_shut", .desc = "stream shutdown" }, +#define QMUX_EV_STRM_END (1ULL << 14) + { .mask = QMUX_EV_STRM_END, .name = "strm_end", .desc = "detaching app-layer stream" }, +#define QMUX_EV_SEND_FRM (1ULL << 15) + { .mask = QMUX_EV_SEND_FRM, .name = "send_frm", .desc = "sending QUIC frame" }, +/* special event dedicated to qcs_xfer_data */ +#define QMUX_EV_QCS_XFER_DATA (1ULL << 16) + { .mask = QMUX_EV_QCS_XFER_DATA, .name = "qcs_xfer_data", .desc = "qcs_xfer_data" }, +/* special event dedicated to qcs_build_stream_frm */ +#define QMUX_EV_QCS_BUILD_STRM (1ULL << 17) + { .mask = QMUX_EV_QCS_BUILD_STRM, .name = "qcs_build_stream_frm", .desc = "qcs_build_stream_frm" }, +#define QMUX_EV_PROTO_ERR (1ULL << 18) + { .mask = QMUX_EV_PROTO_ERR, .name = "proto_err", .desc = "protocol error" }, +#define QMUX_EV_QCC_ERR (1ULL << 19) + { .mask = QMUX_EV_QCC_ERR, .name = "qcc_err", .desc = "connection on error" }, + { } +}; + +/* custom arg for QMUX_EV_QCS_XFER_DATA */ +struct qcs_xfer_data_trace_arg { + size_t prep; + int xfer; +}; + +/* custom arg for QMUX_EV_QCS_BUILD_STRM */ +struct qcs_build_stream_trace_arg { + size_t len; + char fin; + uint64_t offset; +}; + +#endif /* USE_QUIC */ + +#endif /* _HAPROXY_QMUX_TRACE_H */ diff --git a/include/haproxy/qpack-dec.h b/include/haproxy/qpack-dec.h new file mode 100644 index 0000000..993f450 --- /dev/null +++ b/include/haproxy/qpack-dec.h @@ -0,0 +1,51 @@ +/* + * QPACK decompressor + * + * Copyright 2021 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_QPACK_DEC_H +#define _HAPROXY_QPACK_DEC_H + +struct buffer; +struct http_hdr; + +/* Internal QPACK processing errors. + *Nothing to see with the RFC. + */ +enum { + QPACK_ERR_NONE = 0, /* no error */ + QPACK_ERR_RIC, /* cannot decode Required Insert Count prefix field */ + QPACK_ERR_DB, /* cannot decode Delta Base prefix field */ + QPACK_ERR_TRUNCATED, /* truncated stream */ + QPACK_ERR_HUFFMAN, /* huffman decoding error */ + QPACK_ERR_TOO_LARGE, /* decoded request/response is too large */ +}; + +struct qpack_dec { + /* Insert count */ + uint64_t ic; + /* Known received count */ + uint64_t krc; +}; + +int qpack_decode_fs(const unsigned char *buf, uint64_t len, struct buffer *tmp, + struct http_hdr *list, int list_size); +int qpack_decode_enc(struct buffer *buf, int fin, void *ctx); +int qpack_decode_dec(struct buffer *buf, int fin, void *ctx); + +#endif /* _HAPROXY_QPACK_DEC_H */ diff --git a/include/haproxy/qpack-enc.h b/include/haproxy/qpack-enc.h new file mode 100644 index 0000000..0126937 --- /dev/null +++ b/include/haproxy/qpack-enc.h @@ -0,0 +1,12 @@ +#ifndef QPACK_ENC_H_ +#define QPACK_ENC_H_ + +#include <haproxy/istbuf.h> + +struct buffer; + +int qpack_encode_field_section_line(struct buffer *out); +int qpack_encode_int_status(struct buffer *out, unsigned int status); +int qpack_encode_header(struct buffer *out, const struct ist n, const struct ist v); + +#endif /* QPACK_ENC_H_ */ diff --git a/include/haproxy/qpack-t.h b/include/haproxy/qpack-t.h new file mode 100644 index 0000000..0e1736a --- /dev/null +++ b/include/haproxy/qpack-t.h @@ -0,0 +1,47 @@ +/* + * include/haproxy/qpack-t.h + * This file contains types for QPACK + * + * Copyright 2021 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_QPACK_T_H +#define _HAPROXY_QPACK_T_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +/* Encoder */ +/* Instruction bitmask */ +#define QPACK_ENC_INST_BITMASK 0xf0 +/* Instructions */ +#define QPACK_ENC_INST_DUP 0x00 // Duplicate +#define QPACK_ENC_INST_SDTC_BIT 0x20 // Set Dynamic Table Capacity +#define QPACK_ENC_INST_IWLN_BIT 0x40 // Insert With Literal Name +#define QPACK_ENC_INST_IWNR_BIT 0x80 // Insert With Name Reference + +/* Decoder */ +/* Instructions bitmask */ +#define QPACK_DEC_INST_BITMASK 0xf0 +/* Instructions */ +#define QPACK_DEC_INST_ICINC 0x00 // Insert Count Increment +#define QPACK_DEC_INST_SCCL 0x40 // Stream Cancellation +#define QPACK_DEC_INST_SACK 0x80 // Section Acknowledgment + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QPACK_T_H */ diff --git a/include/haproxy/qpack-tbl-t.h b/include/haproxy/qpack-tbl-t.h new file mode 100644 index 0000000..c27c623 --- /dev/null +++ b/include/haproxy/qpack-tbl-t.h @@ -0,0 +1,65 @@ +/* + * QPACK header table management (draft-ietf-quic-qpack-20) - type definitions + * + * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _HAPROXY_QPACK_TBL_T_H +#define _HAPROXY_QPACK_TBL_T_H + +/* + * Gcc before 3.0 needs [0] to declare a variable-size array + */ +#ifndef VAR_ARRAY +#if defined(__GNUC__) && (__GNUC__ < 3) +#define VAR_ARRAY 0 +#else +#define VAR_ARRAY +#endif +#endif + +/* One dynamic table entry descriptor */ +struct qpack_dte { + uint32_t addr; /* storage address, relative to the dte address */ + uint16_t nlen; /* header name length */ + uint16_t vlen; /* header value length */ +}; + +/* Note: the table's head plus a struct qpack_dte must be smaller than or equal to 32 + * bytes so that a single large header can always fit. Here that's 16 bytes for + * the header, plus 8 bytes per slot. + * Note that when <used> == 0, front, head, and wrap are undefined. + */ +struct qpack_dht { + uint32_t size; /* allocated table size in bytes */ + uint32_t total; /* sum of nlen + vlen in bytes */ + uint16_t front; /* slot number of the first node after the idx table */ + uint16_t wrap; /* number of allocated slots, wraps here */ + uint16_t head; /* last inserted slot number */ + uint16_t used; /* number of slots in use */ + struct qpack_dte dte[VAR_ARRAY]; /* dynamic table entries */ +}; + +/* static header table as in draft-ietf-quic-qpack-20 Appendix A. [0] unused. */ +#define QPACK_SHT_SIZE 99 + +#endif /* _HAPROXY_QPACK_TBL_T_H */ diff --git a/include/haproxy/qpack-tbl.h b/include/haproxy/qpack-tbl.h new file mode 100644 index 0000000..05f3ab4 --- /dev/null +++ b/include/haproxy/qpack-tbl.h @@ -0,0 +1,170 @@ +/* + * QPACK header table management - prototypes + * + * Copyright 2021 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _HAPROXY_QPACK_TBL_H +#define _HAPROXY_QPACK_TBL_H + +#include <import/ist.h> +#include <haproxy/api.h> +#include <haproxy/qpack-tbl-t.h> +#include <haproxy/http-hdr-t.h> + +/* when built outside of haproxy, QPACK_STANDALONE must be defined, and + * pool_head_qpack_tbl->size must be set to the DHT size. + */ +#ifndef QPACK_STANDALONE +#include <haproxy/pool.h> +#define qpack_alloc(pool) pool_alloc(pool) +#define qpack_free(pool, ptr) pool_free(pool, ptr) +#else +#include <stdlib.h> +#include <haproxy/pool-t.h> +#define qpack_alloc(pool) malloc(pool->size) +#define qpack_free(pool, ptr) free(ptr) +#endif + +extern const struct http_hdr qpack_sht[QPACK_SHT_SIZE]; +extern struct pool_head *pool_head_qpack_tbl; + +int __qpack_dht_make_room(struct qpack_dht *dht, unsigned int needed); +int qpack_dht_insert(struct qpack_dht *dht, struct ist name, struct ist value); + +#ifdef DEBUG_QPACK +void qpack_dht_dump(FILE *out, const struct qpack_dht *dht); +void qpack_dht_check_consistency(const struct qpack_dht *dht); +#endif + +/* return a pointer to the entry designated by index <idx> (starting at 0) or + * NULL if this index is not there. + */ +static inline const struct qpack_dte *qpack_get_dte(const struct qpack_dht *dht, uint16_t idx) +{ + if (idx >= dht->used) + return NULL; + + return &dht->dte[idx]; +} + +/* returns non-zero if <idx> is valid for table <dht> */ +static inline int qpack_valid_idx(const struct qpack_dht *dht, uint32_t idx) +{ + return idx < dht->used; +} + +/* return a pointer to the header name for entry <dte>. */ +static inline struct ist qpack_get_name(const struct qpack_dht *dht, const struct qpack_dte *dte) +{ + struct ist ret = { + .ptr = (void *)dht + dte->addr, + .len = dte->nlen, + }; + return ret; +} + +/* return a pointer to the header value for entry <dte>. */ +static inline struct ist qpack_get_value(const struct qpack_dht *dht, const struct qpack_dte *dte) +{ + struct ist ret = { + .ptr = (void *)dht + dte->addr + dte->nlen, + .len = dte->vlen, + }; + return ret; +} + +/* takes an idx, returns the associated name */ +static inline struct ist qpack_idx_to_name(const struct qpack_dht *dht, uint32_t idx) +{ + const struct qpack_dte *dte; + + dte = qpack_get_dte(dht, idx); + if (!dte) + return ist("### ERR ###"); // error + + return qpack_get_name(dht, dte); +} + +/* takes an idx, returns the associated value */ +static inline struct ist qpack_idx_to_value(const struct qpack_dht *dht, uint32_t idx) +{ + const struct qpack_dte *dte; + + dte = qpack_get_dte(dht, idx); + if (!dte) + return ist("### ERR ###"); // error + + return qpack_get_value(dht, dte); +} + +/* returns the slot number of the oldest entry (tail). Must not be used on an + * empty table. + */ +static inline unsigned int qpack_dht_get_tail(const struct qpack_dht *dht) +{ + return ((dht->head + 1U < dht->used) ? dht->wrap : 0) + dht->head + 1U - dht->used; +} + +/* Purges table dht until a header field of <needed> bytes fits according to + * the protocol (adding 32 bytes overhead). Returns non-zero on success, zero + * on failure (ie: table empty but still not sufficient). + */ +static inline int qpack_dht_make_room(struct qpack_dht *dht, unsigned int needed) +{ + if (dht->used * 32 + dht->total + needed + 32 <= dht->size) + return 1; + else if (!dht->used) + return 0; + + return __qpack_dht_make_room(dht, needed); +} + +/* allocate a dynamic headers table of <size> bytes and return it initialized */ +static inline void qpack_dht_init(struct qpack_dht *dht, uint32_t size) +{ + dht->size = size; + dht->total = 0; + dht->used = 0; +} + +/* allocate a dynamic headers table from the pool and return it initialized */ +static inline struct qpack_dht *qpack_dht_alloc() +{ + struct qpack_dht *dht; + + if (unlikely(!pool_head_qpack_tbl)) + return NULL; + + dht = qpack_alloc(pool_head_qpack_tbl); + if (dht) + qpack_dht_init(dht, pool_head_qpack_tbl->size); + return dht; +} + +/* free a dynamic headers table */ +static inline void qpack_dht_free(struct qpack_dht *dht) +{ + qpack_free(pool_head_qpack_tbl, dht); +} + +#endif /* _HAPROXY_QPACK_TBL_H */ diff --git a/include/haproxy/queue-t.h b/include/haproxy/queue-t.h new file mode 100644 index 0000000..8f6a1ec --- /dev/null +++ b/include/haproxy/queue-t.h @@ -0,0 +1,59 @@ +/* + * include/haproxy/queue-t.h + * This file defines variables and structures needed for queues. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_QUEUE_T_H +#define _HAPROXY_QUEUE_T_H + +#include <import/ebtree-t.h> +#include <haproxy/api-t.h> + +struct proxy; +struct server; +struct stream; +struct queue; + +struct pendconn { + int strm_flags; /* stream flags */ + unsigned int queue_idx; /* value of proxy/server queue_idx at time of enqueue */ + struct stream *strm; + struct queue *queue; /* the queue the entry is queued into */ + struct server *target; /* the server that was assigned, = srv except if srv==NULL */ + struct eb32_node node; + __decl_thread(HA_SPINLOCK_T del_lock); /* use before removal, always under queue's lock */ +}; + +struct queue { + struct eb_root head; /* queued pendconnds */ + struct proxy *px; /* the proxy we're waiting for, never NULL in queue */ + struct server *sv; /* the server we are waiting for, may be NULL if don't care */ + __decl_thread(HA_SPINLOCK_T lock); /* for manipulations in the tree */ + unsigned int idx; /* current queuing index */ + unsigned int length; /* number of entries */ +}; + +#endif /* _HAPROXY_QUEUE_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/queue.h b/include/haproxy/queue.h new file mode 100644 index 0000000..e77370c --- /dev/null +++ b/include/haproxy/queue.h @@ -0,0 +1,134 @@ +/* + * include/haproxy/queue.h + * This file defines everything related to queues. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_QUEUE_H +#define _HAPROXY_QUEUE_H + +#include <haproxy/api.h> +#include <haproxy/backend.h> +#include <haproxy/pool.h> +#include <haproxy/proxy-t.h> +#include <haproxy/queue-t.h> +#include <haproxy/server-t.h> +#include <haproxy/stream-t.h> + +extern struct pool_head *pool_head_pendconn; + +struct pendconn *pendconn_add(struct stream *strm); +int pendconn_dequeue(struct stream *strm); +void process_srv_queue(struct server *s); +unsigned int srv_dynamic_maxconn(const struct server *s); +int pendconn_redistribute(struct server *s); +int pendconn_grab_from_px(struct server *s); +void pendconn_unlink(struct pendconn *p); + +/* Removes the pendconn from the server/proxy queue. It supports being called + * with NULL for pendconn and with a pendconn not in the list. It is the + * function to be used by default when unsure. Do not call it with server + * or proxy locks held however. Warning: this is called from stream_free() + * which may run concurrently with pendconn_process_next_strm() which can be + * dequeuing the entry. The function must not return until the pendconn is + * guaranteed not to be known, which means that we must check its presence + * in the tree under the queue's lock so that penconn_process_next_strm() + * finishes before we return in case it would have grabbed this pendconn. See + * github bugs #880 and #908, and the commit log for this fix for more details. + */ +static inline void pendconn_cond_unlink(struct pendconn *p) +{ + if (p) + pendconn_unlink(p); +} + +/* Releases the pendconn associated to stream <s> if it has any, and decreases + * the pending count if needed. The connection might have been queued to a + * specific server as well as to the proxy. The stream also gets marked + * unqueued. + * + * This function must be called by the stream itself, so in the context of + * process_stream, without any lock held among the pendconn, the server's queue + * nor the proxy's queue. + */ +static inline void pendconn_free(struct stream *s) +{ + struct pendconn *p = s->pend_pos; + + if (p) { + pendconn_cond_unlink(p); + s->pend_pos = NULL; + pool_free(pool_head_pendconn, p); + } +} + +/* Returns 0 if all slots are full on a server, or 1 if there are slots available. */ +static inline int server_has_room(const struct server *s) { + return !s->maxconn || s->cur_sess < srv_dynamic_maxconn(s); +} + +/* returns 0 if nothing has to be done for server <s> regarding queued connections, + * and non-zero otherwise. If the server is down, we only check its own queue. Suited + * for and if/else usage. + */ +static inline int may_dequeue_tasks(const struct server *s, const struct proxy *p) { + return (s && (s->queue.length || (p->queue.length && srv_currently_usable(s))) && + (!s->maxconn || s->cur_sess < srv_dynamic_maxconn(s))); +} + +static inline int queue_limit_class(int class) +{ + if (class < -0x7ff) + return -0x7ff; + if (class > 0x7ff) + return 0x7ff; + return class; +} + +static inline int queue_limit_offset(int offset) +{ + if (offset < -0x7ffff) + return -0x7ffff; + if (offset > 0x7ffff) + return 0x7ffff; + return offset; +} + +/* initialize the queue <queue> for proxy <px> and server <sv>. A server's + * always has both a valid proxy and a valid server. A proxy's queue only + * has a valid proxy and NULL for the server queue. This is how they're + * distinguished during operations. + */ +static inline void queue_init(struct queue *queue, struct proxy *px, struct server *sv) +{ + queue->head = EB_ROOT; + queue->length = 0; + queue->idx = 0; + queue->px = px; + queue->sv = sv; + HA_SPIN_INIT(&queue->lock); +} + +#endif /* _HAPROXY_QUEUE_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/quic_ack-t.h b/include/haproxy/quic_ack-t.h new file mode 100644 index 0000000..95b77f1 --- /dev/null +++ b/include/haproxy/quic_ack-t.h @@ -0,0 +1,43 @@ +/* + * include/haproxy/quic_ack-t.h + * Definitions for QUIC acknowledgements internal types, constants and flags. + * + * Copyright (C) 2023 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#ifndef _HAPROXY_QUIC_ACK_T_H +#define _HAPROXY_QUIC_ACK_T_H + +/* The maximum number of ack ranges to be built in ACK frames */ +#define QUIC_MAX_ACK_RANGES 32 + +/* Structure to maintain a set of ACK ranges to be used to build ACK frames. */ +struct quic_arngs { + /* ebtree of ACK ranges organized by their first value. */ + struct eb_root root; + /* The number of ACK ranges is this tree */ + size_t sz; + /* The number of bytes required to encode this ACK ranges lists. */ + size_t enc_sz; +}; + +/* Structure to hold a range of ACKs sent in ACK frames. */ +struct quic_arng { + int64_t first; + int64_t last; +}; + +/* Structure to hold a range of ACKs to be store as a node in a tree of + * ACK ranges. + */ +struct quic_arng_node { + struct eb64_node first; + uint64_t last; +}; + +#endif /* _HAPROXY_QUIC_ACK_T_H */ diff --git a/include/haproxy/quic_ack.h b/include/haproxy/quic_ack.h new file mode 100644 index 0000000..540e2c0 --- /dev/null +++ b/include/haproxy/quic_ack.h @@ -0,0 +1,23 @@ +/* + * include/proto/quic_ack.h + * This file provides definitions for QUIC acknowledgements. + * + * Copyright (C) 2023 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _HAPROXY_QUIC_ACK_H +#define _HAPROXY_QUIC_ACK_H + +void quic_free_arngs(struct quic_conn *qc, struct quic_arngs *arngs); +int quic_update_ack_ranges_list(struct quic_conn *qc, + struct quic_arngs *arngs, + struct quic_arng *ar); +void qc_treat_ack_of_ack(struct quic_conn *qc, struct quic_arngs *arngs, + int64_t largest_acked_pn); + +#endif /* _HAPROXY_QUIC_ACK_H */ diff --git a/include/haproxy/quic_cc-t.h b/include/haproxy/quic_cc-t.h new file mode 100644 index 0000000..888efca --- /dev/null +++ b/include/haproxy/quic_cc-t.h @@ -0,0 +1,123 @@ +/* + * include/haproxy/quic_cc-t.h + * This file contains definitions for QUIC congestion control. + * + * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_QUIC_CC_H +#define _HAPROXY_QUIC_CC_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <inttypes.h> +#include <stddef.h> /* size_t */ + +#include <haproxy/buf-t.h> +#include <haproxy/quic_loss-t.h> + +#define QUIC_CC_INFINITE_SSTHESH ((uint32_t)-1) + +extern struct quic_cc_algo quic_cc_algo_nr; +extern struct quic_cc_algo quic_cc_algo_cubic; +extern struct quic_cc_algo *default_quic_cc_algo; + +/* Fake algorithm with its fixed window */ +extern struct quic_cc_algo quic_cc_algo_nocc; + +extern unsigned long long last_ts; + +enum quic_cc_algo_state_type { + /* Slow start. */ + QUIC_CC_ST_SS, + /* Congestion avoidance. */ + QUIC_CC_ST_CA, + /* Recovery period. */ + QUIC_CC_ST_RP, +}; + +enum quic_cc_event_type { + /* ACK receipt. */ + QUIC_CC_EVT_ACK, + /* Packet loss. */ + QUIC_CC_EVT_LOSS, + /* ECN-CE. */ + QUIC_CC_EVT_ECN_CE, +}; + +struct quic_cc_event { + enum quic_cc_event_type type; + union { + struct ack { + uint64_t acked; + unsigned int time_sent; + } ack; + struct loss { + unsigned int time_sent; + } loss; + }; +}; + +enum quic_cc_algo_type { + QUIC_CC_ALGO_TP_NEWRENO, + QUIC_CC_ALGO_TP_CUBIC, + QUIC_CC_ALGO_TP_NOCC, +}; + +struct quic_cc { + /* <conn> is there only for debugging purpose. */ + struct quic_conn *qc; + struct quic_cc_algo *algo; + uint32_t priv[16]; +}; + +struct quic_cc_path { + /* Control congestion. */ + struct quic_cc cc; + /* Packet loss detection information. */ + struct quic_loss loss; + + /* MTU. */ + size_t mtu; + /* Congestion window. */ + uint64_t cwnd; + /* The current maximum congestion window value reached. */ + uint64_t mcwnd; + /* The maximum congestion window value which can be reached. */ + uint64_t max_cwnd; + /* Minimum congestion window. */ + uint64_t min_cwnd; + /* Prepared data to be sent (in bytes). */ + uint64_t prep_in_flight; + /* Outstanding data (in bytes). */ + uint64_t in_flight; + /* Number of in flight ack-eliciting packets. */ + uint64_t ifae_pkts; +}; + +struct quic_cc_algo { + enum quic_cc_algo_type type; + int (*init)(struct quic_cc *cc); + void (*event)(struct quic_cc *cc, struct quic_cc_event *ev); + void (*slow_start)(struct quic_cc *cc); + void (*state_trace)(struct buffer *buf, const struct quic_cc *cc); +}; + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_CC_H */ diff --git a/include/haproxy/quic_cc.h b/include/haproxy/quic_cc.h new file mode 100644 index 0000000..721feca --- /dev/null +++ b/include/haproxy/quic_cc.h @@ -0,0 +1,112 @@ +/* + * include/proto/quic_cc.h + * This file contains prototypes for QUIC congestion control. + * + * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _PROTO_QUIC_CC_H +#define _PROTO_QUIC_CC_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <haproxy/api.h> +#include <haproxy/buf.h> +#include <haproxy/chunk.h> +#include <haproxy/quic_cc-t.h> +#include <haproxy/quic_conn-t.h> +#include <haproxy/quic_loss.h> + +void quic_cc_init(struct quic_cc *cc, struct quic_cc_algo *algo, struct quic_conn *qc); +void quic_cc_event(struct quic_cc *cc, struct quic_cc_event *ev); +void quic_cc_state_trace(struct buffer *buf, const struct quic_cc *cc); + +static inline const char *quic_cc_state_str(enum quic_cc_algo_state_type state) +{ + switch (state) { + case QUIC_CC_ST_SS: + return "ss"; + case QUIC_CC_ST_CA: + return "ca"; + case QUIC_CC_ST_RP: + return "rp"; + default: + return "unknown"; + } +} + +/* Return a human readable string from <ev> control congestion event type. */ +static inline void quic_cc_event_trace(struct buffer *buf, const struct quic_cc_event *ev) +{ + chunk_appendf(buf, " event="); + switch (ev->type) { + case QUIC_CC_EVT_ACK: + chunk_appendf(buf, "ack acked=%llu time_sent:%dms", + (unsigned long long)ev->ack.acked, TICKS_TO_MS(tick_remain(ev->ack.time_sent, now_ms))); + break; + case QUIC_CC_EVT_LOSS: + chunk_appendf(buf, "loss time_sent=%dms", TICKS_TO_MS(tick_remain(ev->loss.time_sent, now_ms))); + break; + case QUIC_CC_EVT_ECN_CE: + chunk_appendf(buf, "ecn_ce"); + break; + } +} + +static inline void *quic_cc_priv(const struct quic_cc *cc) +{ + return (void *)cc->priv; +} + +/* Initialize <p> QUIC network path depending on <ipv4> boolean + * which is true for an IPv4 path, if not false for an IPv6 path. + */ +static inline void quic_cc_path_init(struct quic_cc_path *path, int ipv4, unsigned long max_cwnd, + struct quic_cc_algo *algo, struct quic_conn *qc) +{ + unsigned int max_dgram_sz; + + max_dgram_sz = ipv4 ? QUIC_INITIAL_IPV4_MTU : QUIC_INITIAL_IPV6_MTU; + quic_loss_init(&path->loss); + path->mtu = max_dgram_sz; + path->cwnd = QUIC_MIN(10 * max_dgram_sz, QUIC_MAX(max_dgram_sz << 1, 14720U)); + path->mcwnd = path->cwnd; + path->max_cwnd = max_cwnd; + path->min_cwnd = max_dgram_sz << 1; + path->prep_in_flight = 0; + path->in_flight = 0; + path->ifae_pkts = 0; + quic_cc_init(&path->cc, algo, qc); +} + +/* Return the remaining <room> available on <path> QUIC path for prepared data + * (before being sent). Almost the same that for the QUIC path room, except that + * here this is the data which have been prepared which are taken into an account. + */ +static inline size_t quic_cc_path_prep_data(struct quic_cc_path *path) +{ + if (path->prep_in_flight > path->cwnd) + return 0; + + return path->cwnd - path->prep_in_flight; +} + + +#endif /* USE_QUIC */ +#endif /* _PROTO_QUIC_CC_H */ diff --git a/include/haproxy/quic_cid-t.h b/include/haproxy/quic_cid-t.h new file mode 100644 index 0000000..ccce844 --- /dev/null +++ b/include/haproxy/quic_cid-t.h @@ -0,0 +1,38 @@ +#ifndef _HAPROXY_QUIC_CID_T_H +#define _HAPROXY_QUIC_CID_T_H + +#include <import/ebtree-t.h> +#include <haproxy/quic_tp-t.h> + +/* QUIC connection ID maximum length for version 1. */ +#define QUIC_CID_MAXLEN 20 /* bytes */ + +/* QUIC connection id data. + * + * This struct is used by ebmb_node structs as last member of flexible arrays. + * So do not change the order of the member of quic_cid struct. + * <data> member must be the first one. + */ +struct quic_cid { + unsigned char data[QUIC_CID_MAXLEN]; + unsigned char len; /* size of QUIC CID */ +}; + +/* QUIC connection id attached to a QUIC connection. + * + * This structure is used to match received packets DCIDs with the + * corresponding QUIC connection. + */ +struct quic_connection_id { + struct eb64_node seq_num; + uint64_t retire_prior_to; + unsigned char stateless_reset_token[QUIC_STATELESS_RESET_TOKEN_LEN]; + + struct ebmb_node node; /* node for receiver tree, cid.data as key */ + struct quic_cid cid; /* CID data */ + + struct quic_conn *qc; /* QUIC connection using this CID */ + uint tid; /* Attached Thread ID for the connection. */ +}; + +#endif /* _HAPROXY_QUIC_CID_T_H */ diff --git a/include/haproxy/quic_cid.h b/include/haproxy/quic_cid.h new file mode 100644 index 0000000..482a020 --- /dev/null +++ b/include/haproxy/quic_cid.h @@ -0,0 +1,110 @@ +#ifndef _HAPROXY_QUIC_CID_H +#define _HAPROXY_QUIC_CID_H + +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <import/ebmbtree.h> + +#include <haproxy/buf-t.h> +#include <haproxy/chunk.h> +#include <haproxy/quic_conn-t.h> +#include <haproxy/quic_rx-t.h> +#include <haproxy/proto_quic.h> + +struct quic_connection_id *new_quic_cid(struct eb_root *root, + struct quic_conn *qc, + const struct quic_cid *orig, + const struct sockaddr_storage *addr); +int quic_get_cid_tid(const unsigned char *cid, size_t cid_len, + const struct sockaddr_storage *cli_addr, + unsigned char *pos, size_t len); +struct quic_cid quic_derive_cid(const struct quic_cid *orig, + const struct sockaddr_storage *addr); +struct quic_conn *retrieve_qc_conn_from_cid(struct quic_rx_packet *pkt, + struct sockaddr_storage *saddr, + int *new_tid); +int qc_build_new_connection_id_frm(struct quic_conn *qc, + struct quic_connection_id *conn_id); + +/* Copy <src> QUIC CID to <dst>. + * This is the responsibility of the caller to check there is enough room in + * <dst> to copy <src>. + * Always succeeds. + */ +static inline void quic_cid_cpy(struct quic_cid *dst, const struct quic_cid *src) +{ + memcpy(dst->data, src->data, src->len); + dst->len = src->len; +} + +/* Dump the QUIC connection ID value if present (non null length). Used only for + * debugging purposes. + * Always succeeds. + */ +static inline void quic_cid_dump(struct buffer *buf, + const struct quic_cid *cid) +{ + int i; + + chunk_appendf(buf, "(%d", cid->len); + if (cid->len) + chunk_appendf(buf, ","); + for (i = 0; i < cid->len; i++) + chunk_appendf(buf, "%02x", cid->data[i]); + chunk_appendf(buf, ")"); +} + +/* Return tree index where <cid> is stored. */ +static inline uchar _quic_cid_tree_idx(const unsigned char *cid) +{ + return cid[0]; +} + +/* Return tree index where <cid> is stored. */ +static inline uchar quic_cid_tree_idx(const struct quic_cid *cid) +{ + return _quic_cid_tree_idx(cid->data); +} + +/* Insert <conn_id> into global CID tree as a thread-safe operation. */ +static inline void quic_cid_insert(struct quic_connection_id *conn_id) +{ + const uchar idx = quic_cid_tree_idx(&conn_id->cid); + struct quic_cid_tree *tree = &quic_cid_trees[idx]; + + HA_RWLOCK_WRLOCK(QC_CID_LOCK, &tree->lock); + ebmb_insert(&tree->root, &conn_id->node, conn_id->cid.len); + HA_RWLOCK_WRUNLOCK(QC_CID_LOCK, &tree->lock); +} + +/* Remove <conn_id> from global CID tree as a thread-safe operation. */ +static inline void quic_cid_delete(struct quic_connection_id *conn_id) +{ + const uchar idx = quic_cid_tree_idx(&conn_id->cid); + struct quic_cid_tree __maybe_unused *tree = &quic_cid_trees[idx]; + + HA_RWLOCK_WRLOCK(QC_CID_LOCK, &tree->lock); + ebmb_delete(&conn_id->node); + HA_RWLOCK_WRUNLOCK(QC_CID_LOCK, &tree->lock); +} + +/* Copy <src> new connection ID information to <dst> NEW_CONNECTION_ID frame. + * Always succeeds. + */ +static inline void quic_connection_id_to_frm_cpy(struct quic_frame *dst, + struct quic_connection_id *src) +{ + struct qf_new_connection_id *ncid_frm = &dst->new_connection_id; + + ncid_frm->seq_num = src->seq_num.key; + ncid_frm->retire_prior_to = src->retire_prior_to; + ncid_frm->cid.len = src->cid.len; + ncid_frm->cid.data = src->cid.data; + ncid_frm->stateless_reset_token = src->stateless_reset_token; +} + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_CID_H */ diff --git a/include/haproxy/quic_cli-t.h b/include/haproxy/quic_cli-t.h new file mode 100644 index 0000000..6f95899 --- /dev/null +++ b/include/haproxy/quic_cli-t.h @@ -0,0 +1,18 @@ +/* + * include/haproxy/quic_trace-t.h + * Definitions for QUIC CLI internal types, constants and flags. + * + * Copyright (C) 2023 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#ifndef _HAPROXY_QUIC_CLI_T_H +#define _HAPROXY_QUIC_CLI_T_H + +extern unsigned int qc_epoch; + +#endif /* _HAPROXY_QUIC_CLI_T_H */ diff --git a/include/haproxy/quic_conn-t.h b/include/haproxy/quic_conn-t.h new file mode 100644 index 0000000..8aec6f0 --- /dev/null +++ b/include/haproxy/quic_conn-t.h @@ -0,0 +1,446 @@ +/* + * include/haproxy/quic_conn-t.h + * + * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_QUIC_CONN_T_H +#define _HAPROXY_QUIC_CONN_T_H + +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <sys/socket.h> + +#include <haproxy/cbuf-t.h> +#include <haproxy/list.h> + +#include <haproxy/openssl-compat.h> +#include <haproxy/mux_quic-t.h> +#include <haproxy/quic_cid-t.h> +#include <haproxy/quic_cc-t.h> +#include <haproxy/quic_loss-t.h> +#include <haproxy/quic_openssl_compat-t.h> +#include <haproxy/quic_stats-t.h> +#include <haproxy/quic_tls-t.h> +#include <haproxy/quic_tp-t.h> +#include <haproxy/task.h> + +#include <import/ebtree-t.h> + +typedef unsigned long long ull; + +#define QUIC_PROTOCOL_VERSION_DRAFT_29 0xff00001d /* draft-29 */ +#define QUIC_PROTOCOL_VERSION_1 0x00000001 /* V1 */ +#define QUIC_PROTOCOL_VERSION_2 0x6b3343cf /* V2 */ + +#define QUIC_INITIAL_IPV4_MTU 1252 /* (bytes) */ +#define QUIC_INITIAL_IPV6_MTU 1232 + +/* The minimum length of Initial packets. */ +#define QUIC_INITIAL_PACKET_MINLEN 1200 + +/* Lengths of the QUIC CIDs generated by the haproxy implementation. Current + * value is used to match 64 bits hash produced when deriving ODCID. + */ +#define QUIC_HAP_CID_LEN 8 + +/* Common definitions for short and long QUIC packet headers. */ +/* QUIC original destination connection ID minial length */ +#define QUIC_ODCID_MINLEN 8 /* bytes */ +/* + * All QUIC packets with long headers are made of at least (in bytes): + * flags(1), version(4), DCID length(1), DCID(0..20), SCID length(1), SCID(0..20) + */ +#define QUIC_LONG_PACKET_MINLEN 7 +/* DCID offset from beginning of a long packet */ +#define QUIC_LONG_PACKET_DCID_OFF (1 + sizeof(uint32_t)) +/* + * All QUIC packets with short headers are made of at least (in bytes): + * flags(1), DCID(0..20) + */ +#define QUIC_SHORT_PACKET_MINLEN 1 +/* DCID offset from beginning of a short packet */ +#define QUIC_SHORT_PACKET_DCID_OFF 1 + +/* Byte 0 of QUIC packets. */ +#define QUIC_PACKET_LONG_HEADER_BIT 0x80 /* Long header format if set, short if not. */ +#define QUIC_PACKET_FIXED_BIT 0x40 /* Must always be set for all the headers. */ + +/* Tokens formats */ +/* Format for Retry tokens sent by a QUIC server */ +#define QUIC_TOKEN_FMT_RETRY 0x9c +/* Format for token sent for new connections after a Retry token was sent */ +#define QUIC_TOKEN_FMT_NEW 0xb7 +/* Retry token duration */ +#define QUIC_RETRY_DURATION_SEC 10 +/* Default Retry threshold */ +#define QUIC_DFLT_RETRY_THRESHOLD 100 /* in connection openings */ +/* Default ratio value applied to a dynamic Packet reorder threshold. */ +#define QUIC_DFLT_REORDER_RATIO 50 /* in percent */ +/* Default limit of loss detection on a single frame. If exceeded, connection is closed. */ +#define QUIC_DFLT_MAX_FRAME_LOSS 10 + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+ + * |1|1|T|T|X|X|X|X| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Version (32) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | DCID Len (8) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Destination Connection ID (0..160) ... + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SCID Len (8) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Source Connection ID (0..160) ... + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * Long Header Packet Format + */ + +/* Two bits (T) for QUIC packet types. */ +#define QUIC_PACKET_TYPE_BITMASK 0x03 +#define QUIC_PACKET_TYPE_SHIFT 4 + +enum quic_pkt_type { + QUIC_PACKET_TYPE_INITIAL, + QUIC_PACKET_TYPE_0RTT, + QUIC_PACKET_TYPE_HANDSHAKE, + QUIC_PACKET_TYPE_RETRY, + /* + * The following one is not defined by the RFC but we define it for our + * own convenience. + */ + QUIC_PACKET_TYPE_SHORT, + + /* unknown type */ + QUIC_PACKET_TYPE_UNKNOWN +}; + +/* Packet number field length. */ +#define QUIC_PACKET_PNL_BITMASK 0x03 +#define QUIC_PACKET_PN_MAXLEN 4 + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+ + * |0|1|S|R|R|K|P|P| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Destination Connection ID (0..160) ... + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Packet Number (8/16/24/32) ... + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Protected Payload (*) ... + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * Short Header Packet Format + */ + +/* Bit (S) of short header. */ +#define QUIC_PACKET_SPIN_BIT 0x20 + +/* Reserved Bits (R): The next two bits of byte 0 are reserved. + * These bits are protected using header protection + * (see Section 5.4 of [QUIC-TLS]). The value included + * prior to protection MUST be set to 0. An endpoint MUST treat + * receipt of a packet that has a non-zero value for these bits, + * after removing both packet and header protection, as a connection + * error of type PROTOCOL_VIOLATION. Discarding such a packet after + * only removing header protection can expose the endpoint to attacks + * (see Section 9.3 of [QUIC-TLS]). + */ +#define QUIC_PACKET_RESERVED_BITS 0x18 /* (protected) */ + +#define QUIC_PACKET_KEY_PHASE_BIT 0x04 /* (protected) */ + +/* The maximum number of QUIC packets stored by the fd I/O handler by QUIC + * connection. Must be a power of two. + */ +#define QUIC_CONN_MAX_PACKET 64 + +#define QUIC_STATELESS_RESET_PACKET_HEADER_LEN 5 +#define QUIC_STATELESS_RESET_PACKET_MINLEN (22 + QUIC_HAP_CID_LEN) + +/* Similar to kernel min()/max() definitions. */ +#define QUIC_MIN(a, b) ({ \ + typeof(a) _a = (a); \ + typeof(b) _b = (b); \ + (void) (&_a == &_b); \ + _a < _b ? _a : _b; }) + +#define QUIC_MAX(a, b) ({ \ + typeof(a) _a = (a); \ + typeof(b) _b = (b); \ + (void) (&_a == &_b); \ + _a > _b ? _a : _b; }) + +/* Size of the QUIC RX buffer for the connections */ +#define QUIC_CONN_RX_BUFSZ (1UL << 16) + +struct quic_version { + uint32_t num; + const unsigned char *initial_salt; + size_t initial_salt_len; + const unsigned char *key_label; + size_t key_label_len; + const unsigned char *iv_label; + size_t iv_label_len; + const unsigned char *hp_label; + size_t hp_label_len; + const unsigned char *ku_label; + size_t ku_label_len; + /* Retry tag */ + const unsigned char *retry_tag_key; + const unsigned char *retry_tag_nonce; +}; + +extern const struct quic_version quic_versions[]; +extern const size_t quic_versions_nb; +extern const struct quic_version *preferred_version; + +/* unused: 0x01 */ +/* Flag the packet number space as requiring an ACK frame to be sent. */ +#define QUIC_FL_PKTNS_ACK_REQUIRED (1UL << 1) +/* Flag the packet number space as needing probing */ +#define QUIC_FL_PKTNS_PROBE_NEEDED (1UL << 2) +/* Flag the packet number space as having received a packet with a new largest + * packet number, to be acknowledege + */ +#define QUIC_FL_PKTNS_NEW_LARGEST_PN (1UL << 3) + +/* The maximum number of dgrams which may be sent upon PTO expirations. */ +#define QUIC_MAX_NB_PTO_DGRAMS 2 + +/* The QUIC packet numbers are 62-bits integers */ +#define QUIC_MAX_PACKET_NUM ((1ULL << 62) - 1) + +/* The maximum number of bytes of CRYPTO data in flight during handshakes. */ +#define QUIC_CRYPTO_IN_FLIGHT_MAX 4096 + +/* Status of the connection/mux layer. This defines how to handle app data. + * + * During a standard quic_conn lifetime it transitions like this : + * QC_MUX_NULL -> QC_MUX_READY -> QC_MUX_RELEASED + */ +enum qc_mux_state { + QC_MUX_NULL, /* not allocated, data should be buffered */ + QC_MUX_READY, /* allocated, ready to handle data */ + QC_MUX_RELEASED, /* released, data can be dropped */ +}; + +/* Counters at QUIC connection level */ +struct quic_conn_cntrs { + long long dropped_pkt; /* total number of dropped packets */ + long long dropped_pkt_bufoverrun;/* total number of dropped packets because of buffer overrun */ + long long dropped_parsing; /* total number of dropped packets upon parsing errors */ + long long socket_full; /* total number of EAGAIN errors on sendto() calls */ + long long sendto_err; /* total number of errors on sendto() calls, EAGAIN excepted */ + long long sendto_err_unknown; /* total number of errors on sendto() calls which are currently not supported */ + long long sent_pkt; /* total number of sent packets */ + long long lost_pkt; /* total number of lost packets */ + long long conn_migration_done; /* total number of connection migration handled */ + /* Streams related counters */ + long long data_blocked; /* total number of times DATA_BLOCKED frame was received */ + long long stream_data_blocked; /* total number of times STREAM_DATA_BLOCKED frame was received */ + long long streams_blocked_bidi; /* total number of times STREAMS_BLOCKED_BIDI frame was received */ + long long streams_blocked_uni; /* total number of times STREAMS_BLOCKED_UNI frame was received */ +}; + +/* Flags at connection level */ +#define QUIC_FL_CONN_ANTI_AMPLIFICATION_REACHED (1U << 0) +#define QUIC_FL_CONN_SPIN_BIT (1U << 1) /* Spin bit set by remote peer */ +#define QUIC_FL_CONN_NEED_POST_HANDSHAKE_FRMS (1U << 2) /* HANDSHAKE_DONE must be sent */ +#define QUIC_FL_CONN_LISTENER (1U << 3) +#define QUIC_FL_CONN_ACCEPT_REGISTERED (1U << 4) +#define QUIC_FL_CONN_TX_MUX_CONTEXT (1U << 5) /* sending in progress from the MUX layer */ +#define QUIC_FL_CONN_IDLE_TIMER_RESTARTED_AFTER_READ (1U << 6) +#define QUIC_FL_CONN_RETRANS_NEEDED (1U << 7) +#define QUIC_FL_CONN_RETRANS_OLD_DATA (1U << 8) /* retransmission in progress for probing with already sent data */ +#define QUIC_FL_CONN_TLS_ALERT (1U << 9) +#define QUIC_FL_CONN_AFFINITY_CHANGED (1U << 10) /* qc_finalize_affinity_rebind() must be called to finalize affinity rebind */ +/* gap here */ +#define QUIC_FL_CONN_HALF_OPEN_CNT_DECREMENTED (1U << 11) /* The half-open connection counter was decremented */ +#define QUIC_FL_CONN_HANDSHAKE_SPEED_UP (1U << 12) /* Handshake speeding up was done */ +#define QUIC_FL_CONN_ACK_TIMER_FIRED (1U << 13) /* idle timer triggered for acknowledgements */ +#define QUIC_FL_CONN_IO_TO_REQUEUE (1U << 14) /* IO handler must be requeued on new thread after connection migration */ +#define QUIC_FL_CONN_IPKTNS_DCD (1U << 15) /* Initial packet number space discarded */ +#define QUIC_FL_CONN_HPKTNS_DCD (1U << 16) /* Handshake packet number space discarded */ +#define QUIC_FL_CONN_PEER_VALIDATED_ADDR (1U << 17) /* Peer address is considered as validated for this connection. */ +#define QUIC_FL_CONN_TO_KILL (1U << 24) /* Unusable connection, to be killed */ +#define QUIC_FL_CONN_TX_TP_RECEIVED (1U << 25) /* Peer transport parameters have been received (used for the transmitting part) */ +#define QUIC_FL_CONN_FINALIZED (1U << 26) /* QUIC connection finalized (functional, ready to send/receive) */ +/* gap here */ +#define QUIC_FL_CONN_EXP_TIMER (1U << 28) /* timer has expired, quic-conn can be freed */ +#define QUIC_FL_CONN_CLOSING (1U << 29) /* closing state, entered on CONNECTION_CLOSE emission */ +#define QUIC_FL_CONN_DRAINING (1U << 30) /* draining state, entered on CONNECTION_CLOSE reception */ +#define QUIC_FL_CONN_IMMEDIATE_CLOSE (1U << 31) /* A CONNECTION_CLOSE must be sent */ + +#define QUIC_CONN_COMMON \ + struct { \ + /* Connection owned socket FD. */ \ + int fd; \ + unsigned int flags; \ + struct quic_err err; \ + /* When in closing state, number of packet before sending CC */ \ + unsigned int nb_pkt_for_cc; \ + /* When in closing state, number of packet since receiving CC */ \ + unsigned int nb_pkt_since_cc; \ + struct wait_event wait_event; \ + struct wait_event *subs; \ + struct sockaddr_storage local_addr; \ + struct sockaddr_storage peer_addr; \ + struct { \ + /* Number of bytes for prepared packets */ \ + uint64_t prep; \ + /* Number of sent bytes. */ \ + uint64_t tx; \ + /* Number of received bytes. */ \ + uint64_t rx; \ + } bytes; \ + /* First DCID used by client on its Initial packet. */ \ + struct quic_cid odcid; \ + /* DCID of our endpoint - not updated when a new DCID is used */ \ + struct quic_cid dcid; \ + /* first SCID of our endpoint - not updated when a new SCID is used */ \ + struct quic_cid scid; \ + /* tree of quic_connection_id - used to match a received packet DCID \ + * with a connection \ + */ \ + struct eb_root *cids; \ + struct listener *li; /* only valid for frontend connections */ \ + /* Idle timer task */ \ + struct task *idle_timer_task; \ + unsigned int idle_expire; \ + /* QUIC connection level counters */ \ + struct quic_conn_cntrs cntrs; \ + struct connection *conn; \ + } + +struct quic_conn { + QUIC_CONN_COMMON; + /* Used only to reach the tasklet for the I/O handler from this + * quic_conn object. + */ + struct ssl_sock_ctx *xprt_ctx; + const struct quic_version *original_version; + const struct quic_version *negotiated_version; + /* Negotiated version Initial TLS context */ + struct quic_tls_ctx *nictx; + /* QUIC transport parameters TLS extension */ + int tps_tls_ext; + int state; + enum qc_mux_state mux_state; /* status of the connection/mux layer */ +#ifdef USE_QUIC_OPENSSL_COMPAT + unsigned char enc_params[QUIC_TP_MAX_ENCLEN]; /* encoded QUIC transport parameters */ + size_t enc_params_len; +#endif + + uint64_t next_cid_seq_num; + /* Initial hash computed from first ID (derived from ODCID). + * it could be reused to derive extra CIDs from the same hash + */ + uint64_t hash64; + + /* Initial encryption level */ + struct quic_enc_level *iel; + /* 0-RTT encryption level */ + struct quic_enc_level *eel; + /* Handshake encryption level */ + struct quic_enc_level *hel; + /* 1-RTT encryption level */ + struct quic_enc_level *ael; + /* List of allocated QUIC TLS encryption level */ + struct list qel_list; + + struct quic_pktns *ipktns; + struct quic_pktns *hpktns; + struct quic_pktns *apktns; + /* List of packet number spaces attached to this connection */ + struct list pktns_list; + +#ifdef USE_QUIC_OPENSSL_COMPAT + struct quic_openssl_compat openssl_compat; +#endif + + struct { + /* Transport parameters sent by the peer */ + struct quic_transport_params params; + /* Send buffer used to write datagrams. */ + struct buffer buf; + /* Send buffer used to send a "connection close" datagram . */ + struct buffer cc_buf; + char *cc_buf_area; + /* Length of the "connection close" datagram. */ + size_t cc_dgram_len; + } tx; + struct { + /* Transport parameters the peer will receive */ + struct quic_transport_params params; + /* RX buffer */ + struct buffer buf; + struct list pkt_list; + struct { + /* Number of open or closed streams */ + uint64_t nb_streams; + } strms[QCS_MAX_TYPES]; + } rx; + struct { + struct quic_tls_kp prv_rx; + struct quic_tls_kp nxt_rx; + struct quic_tls_kp nxt_tx; + } ku; + unsigned int max_ack_delay; + unsigned int max_idle_timeout; + struct quic_cc_path paths[1]; + struct quic_cc_path *path; + + struct mt_list accept_list; /* chaining element used for accept, only valid for frontend connections */ + + struct eb_root streams_by_id; /* qc_stream_desc tree */ + int stream_buf_count; /* total count of allocated stream buffers for this connection */ + + /* MUX */ + struct qcc *qcc; + struct task *timer_task; + unsigned int timer; + unsigned int ack_expire; + /* Handshake expiration date */ + unsigned int hs_expire; + + const struct qcc_app_ops *app_ops; + /* Proxy counters */ + struct quic_counters *prx_counters; + + struct list el_th_ctx; /* list elem in ha_thread_ctx */ + struct list back_refs; /* list head of CLI context currently dumping this connection. */ + unsigned int qc_epoch; /* delimiter for newer instances started after "show quic". */ +}; + +/* QUIC connection in "connection close" state. */ +struct quic_conn_closed { + QUIC_CONN_COMMON; + char *cc_buf_area; + /* Length of the "connection close" datagram. */ + size_t cc_dgram_len; +}; + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_CONN_T_H */ diff --git a/include/haproxy/quic_conn.h b/include/haproxy/quic_conn.h new file mode 100644 index 0000000..92caed4 --- /dev/null +++ b/include/haproxy/quic_conn.h @@ -0,0 +1,201 @@ +/* + * include/haproxy/quic_conn.h + * + * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_QUIC_CONN_H +#define _HAPROXY_QUIC_CONN_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <inttypes.h> + +#include <import/eb64tree.h> +#include <import/ebmbtree.h> + +#include <haproxy/chunk.h> +#include <haproxy/dynbuf.h> +#include <haproxy/ncbuf.h> +#include <haproxy/net_helper.h> +#include <haproxy/openssl-compat.h> +#include <haproxy/ticks.h> + +#include <haproxy/listener.h> +#include <haproxy/proto_quic.h> +#include <haproxy/quic_cc.h> +#include <haproxy/quic_cid.h> +#include <haproxy/quic_conn-t.h> +#include <haproxy/quic_enc.h> +#include <haproxy/quic_frame.h> +#include <haproxy/quic_loss.h> +#include <haproxy/quic_rx.h> +#include <haproxy/mux_quic.h> + +#include <openssl/rand.h> + +extern struct pool_head *pool_head_quic_connection_id; + +int qc_conn_finalize(struct quic_conn *qc, int server); +int ssl_quic_initial_ctx(struct bind_conf *bind_conf); +struct quic_cstream *quic_cstream_new(struct quic_conn *qc); +void quic_cstream_free(struct quic_cstream *cs); +void quic_free_arngs(struct quic_conn *qc, struct quic_arngs *arngs); +struct quic_cstream *quic_cstream_new(struct quic_conn *qc); +struct task *quic_conn_app_io_cb(struct task *t, void *context, unsigned int state); + +struct quic_connection_id *new_quic_cid(struct eb_root *root, + struct quic_conn *qc, + const struct quic_cid *orig, + const struct sockaddr_storage *addr); +void quic_conn_closed_err_count_inc(struct quic_conn *qc, struct quic_frame *frm); +int qc_h3_request_reject(struct quic_conn *qc, uint64_t id); +struct quic_conn *qc_new_conn(const struct quic_version *qv, int ipv4, + struct quic_cid *dcid, struct quic_cid *scid, + const struct quic_cid *token_odcid, + struct quic_connection_id *conn_id, + struct sockaddr_storage *local_addr, + struct sockaddr_storage *peer_addr, + int server, int token, void *owner); +int quic_build_post_handshake_frames(struct quic_conn *qc); +const struct quic_version *qc_supported_version(uint32_t version); +int quic_peer_validated_addr(struct quic_conn *qc); +void qc_set_timer(struct quic_conn *qc); +void qc_detach_th_ctx_list(struct quic_conn *qc, int closing); +void qc_idle_timer_do_rearm(struct quic_conn *qc, int arm_ack); +void qc_idle_timer_rearm(struct quic_conn *qc, int read, int arm_ack); +void qc_check_close_on_released_mux(struct quic_conn *qc); +int quic_stateless_reset_token_cpy(unsigned char *pos, size_t len, + const unsigned char *salt, size_t saltlen); + +static inline int qc_is_listener(struct quic_conn *qc) +{ + return qc->flags & QUIC_FL_CONN_LISTENER; +} + +/* Free the CIDs attached to <conn> QUIC connection. */ +static inline void free_quic_conn_cids(struct quic_conn *conn) +{ + struct eb64_node *node; + + if (!conn->cids) + return; + + node = eb64_first(conn->cids); + while (node) { + struct quic_connection_id *conn_id; + + conn_id = eb64_entry(node, struct quic_connection_id, seq_num); + + /* remove the CID from the receiver tree */ + quic_cid_delete(conn_id); + + /* remove the CID from the quic_conn tree */ + node = eb64_next(node); + eb64_delete(&conn_id->seq_num); + pool_free(pool_head_quic_connection_id, conn_id); + } +} + +/* Move all the connection IDs from <conn> QUIC connection to <cc_conn> */ +static inline void quic_conn_mv_cids_to_cc_conn(struct quic_conn_closed *cc_conn, + struct quic_conn *conn) +{ + struct eb64_node *node; + + node = eb64_first(conn->cids); + while (node) { + struct quic_connection_id *conn_id; + + conn_id = eb64_entry(node, struct quic_connection_id, seq_num); + conn_id->qc = (struct quic_conn *)cc_conn; + node = eb64_next(node); + } + +} + +/* Allocate the underlying required memory for <ncbuf> non-contiguous buffer */ +static inline struct ncbuf *quic_get_ncbuf(struct ncbuf *ncbuf) +{ + struct buffer buf = BUF_NULL; + + if (!ncb_is_null(ncbuf)) + return ncbuf; + + b_alloc(&buf); + BUG_ON(b_is_null(&buf)); + + *ncbuf = ncb_make(buf.area, buf.size, 0); + ncb_init(ncbuf, 0); + + return ncbuf; +} + +/* Release the underlying memory use by <ncbuf> non-contiguous buffer */ +static inline void quic_free_ncbuf(struct ncbuf *ncbuf) +{ + struct buffer buf; + + if (ncb_is_null(ncbuf)) + return; + + buf = b_make(ncbuf->area, ncbuf->size, 0, 0); + b_free(&buf); + offer_buffers(NULL, 1); + + *ncbuf = NCBUF_NULL; +} + +void chunk_frm_appendf(struct buffer *buf, const struct quic_frame *frm); +void quic_set_connection_close(struct quic_conn *qc, const struct quic_err err); +void quic_set_tls_alert(struct quic_conn *qc, int alert); +int quic_set_app_ops(struct quic_conn *qc, const unsigned char *alpn, size_t alpn_len); +int qc_check_dcid(struct quic_conn *qc, unsigned char *dcid, size_t dcid_len); +struct quic_cid quic_derive_cid(const struct quic_cid *orig, + const struct sockaddr_storage *addr); +int quic_get_cid_tid(const unsigned char *cid, size_t cid_len, + const struct sockaddr_storage *cli_addr, + unsigned char *buf, size_t buf_len); +int qc_send_mux(struct quic_conn *qc, struct list *frms); + +void qc_notify_err(struct quic_conn *qc); +int qc_notify_send(struct quic_conn *qc); + +void qc_check_close_on_released_mux(struct quic_conn *qc); + +void quic_conn_release(struct quic_conn *qc); + +void qc_kill_conn(struct quic_conn *qc); + +int qc_parse_hd_form(struct quic_rx_packet *pkt, + unsigned char **buf, const unsigned char *end); + +int qc_set_tid_affinity(struct quic_conn *qc, uint new_tid, struct listener *new_li); +void qc_finalize_affinity_rebind(struct quic_conn *qc); +int qc_handle_conn_migration(struct quic_conn *qc, + const struct sockaddr_storage *peer_addr, + const struct sockaddr_storage *local_addr); + +/* Function pointer that can be used to compute a hash from first generated CID (derived from ODCID) */ +extern uint64_t (*quic_hash64_from_cid)(const unsigned char *cid, int size, const unsigned char *secret, size_t secretlen); +/* Function pointer that can be used to derive a new CID from the previously computed hash */ +extern void (*quic_newcid_from_hash64)(unsigned char *cid, int size, uint64_t hash, const unsigned char *secret, size_t secretlen); + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_CONN_H */ diff --git a/include/haproxy/quic_enc.h b/include/haproxy/quic_enc.h new file mode 100644 index 0000000..4b85605 --- /dev/null +++ b/include/haproxy/quic_enc.h @@ -0,0 +1,275 @@ +/* + * include/haproxy/quic_enc.h + * This file contains QUIC varint encoding function prototypes + * + * Copyright 2021 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_QUIC_ENC_H +#define _HAPROXY_QUIC_ENC_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <inttypes.h> + +#include <haproxy/buf.h> +#include <haproxy/chunk.h> + +/* The maximum size of a variable-length QUIC integer encoded with 1 byte */ +#define QUIC_VARINT_1_BYTE_MAX ((1UL << 6) - 1) +/* The maximum size of a variable-length QUIC integer encoded with 2 bytes */ +#define QUIC_VARINT_2_BYTE_MAX ((1UL << 14) - 1) +/* The maximum size of a variable-length QUIC integer encoded with 4 bytes */ +#define QUIC_VARINT_4_BYTE_MAX ((1UL << 30) - 1) +/* The maximum size of a variable-length QUIC integer encoded with 8 bytes */ +#define QUIC_VARINT_8_BYTE_MAX ((1ULL << 62) - 1) + +/* The maximum size of a variable-length QUIC integer */ +#define QUIC_VARINT_MAX_SIZE 8 + +/* The two most significant bits of byte #0 from a QUIC packet gives the 2 + * logarithm of the length of a variable length encoded integer. + */ +#define QUIC_VARINT_BYTE_0_BITMASK 0x3f +#define QUIC_VARINT_BYTE_0_SHIFT 6 + +/* Returns enough log2 of first powers of two to encode QUIC variable length + * integers. + * Returns -1 if <val> if out of the range of lengths supported by QUIC. + */ +static inline int quic_log2(unsigned int val) +{ + switch (val) { + case 8: + return 3; + case 4: + return 2; + case 2: + return 1; + case 1: + return 0; + default: + return -1; + } +} + +/* Returns the size in bytes required to encode a 64bits integer if + * not out of range (< (1 << 62)), or 0 if out of range. + */ +static inline size_t quic_int_getsize(uint64_t val) +{ + switch (val) { + case 0 ... QUIC_VARINT_1_BYTE_MAX: + return 1; + case QUIC_VARINT_1_BYTE_MAX + 1 ... QUIC_VARINT_2_BYTE_MAX: + return 2; + case QUIC_VARINT_2_BYTE_MAX + 1 ... QUIC_VARINT_4_BYTE_MAX: + return 4; + case QUIC_VARINT_4_BYTE_MAX + 1 ... QUIC_VARINT_8_BYTE_MAX: + return 8; + default: + return 0; + } +} + +/* Returns the maximum value of a QUIC variable-length integer with <sz> as size */ +static inline uint64_t quic_max_int(size_t sz) +{ + switch (sz) { + case 1: + return QUIC_VARINT_1_BYTE_MAX; + case 2: + return QUIC_VARINT_2_BYTE_MAX; + case 4: + return QUIC_VARINT_4_BYTE_MAX; + case 8: + return QUIC_VARINT_8_BYTE_MAX; + } + + return -1; +} + +/* Decode a QUIC variable-length integer from <buf> buffer into <val>. + * Note that the result is a 64-bits integer but with the less significant + * 62 bits as relevant information. The most significant 2 remaining bits encode + * the length of the integer. + * Returns 1 if succeeded there was enough data in <buf>), 0 if not. + */ +static inline int quic_dec_int(uint64_t *val, + const unsigned char **buf, + const unsigned char *end) +{ + size_t len; + + if (*buf >= end) + return 0; + + len = 1 << (**buf >> QUIC_VARINT_BYTE_0_SHIFT); + if (*buf + len > end) + return 0; + + *val = *(*buf)++ & QUIC_VARINT_BYTE_0_BITMASK; + while (--len) + *val = (*val << 8) | *(*buf)++; + + return 1; +} + +/* Decode a QUIC variable-length integer from <b> buffer into <val> supporting wrapping. + * Note that the result is a 64-bits integer but with the less significant + * 62 bits as relevant information. The most significant 2 bits encode + * the length of the integer. + * Note that this function update <b> buffer when a variable-length integer + * has successfully been parsed. + * Returns 1 and if succeeded (there was enough data in <buf>), 0 if not. + * If <retlen> is not null, increment <*retlen> by the number of bytes consumed to decode + * the varint. + */ +static inline size_t b_quic_dec_int(uint64_t *val, struct buffer *b, size_t *retlen) +{ + const unsigned char *pos = (const unsigned char *)b_head(b); + const unsigned char *end = (const unsigned char *)b_wrap(b); + size_t size = b_size(b); + size_t data = b_data(b); + size_t save_len, len; + + if (!data) + return 0; + + save_len = len = 1 << (*pos >> QUIC_VARINT_BYTE_0_SHIFT); + if (data < len) + return 0; + + *val = *pos & QUIC_VARINT_BYTE_0_BITMASK; + if (++pos == end) + pos -= size; + while (--len) { + *val = (*val << 8) | *pos; + if (++pos == end) + pos -= size; + } + if (retlen) + *retlen += save_len; + b_del(b, save_len); + + return 1; +} + +/* Encode a QUIC variable-length integer from <val> into <buf> buffer with <end> as first + * byte address after the end of this buffer. + * Returns 1 if succeeded (there was enough room in buf), 0 if not. + */ +static inline int quic_enc_int(unsigned char **buf, const unsigned char *end, uint64_t val) +{ + size_t len; + unsigned int shift; + unsigned char size_bits, *head; + + len = quic_int_getsize(val); + if (!len || end - *buf < len) + return 0; + + shift = (len - 1) * 8; + /* set the bits of byte#0 which gives the length of the encoded integer */ + size_bits = quic_log2(len) << QUIC_VARINT_BYTE_0_SHIFT; + head = *buf; + while (len--) { + *(*buf)++ = val >> shift; + shift -= 8; + } + *head |= size_bits; + + return 1; +} + +/* Encode a QUIC variable-length integer <val> into <b> buffer. <width> can be + * set to specify the desired output width. By default use 0 for the minimal + * integer size. Other valid values are 1, 2, 4 or 8. + * + * Returns 1 on success else 0. + */ +static inline int b_quic_enc_int(struct buffer *b, uint64_t val, int width) +{ + char *pos; + int save_width, len; + + /* width can only by 0, 1, 2, 4 or 8 */ + BUG_ON(width && (width > 8 || atleast2(width))); + + len = quic_int_getsize(val); + if (!len) + return 0; + + /* Check that buffer room is sufficient and width big enough if set. */ + if (b_room(b) < len || (width && width < len)) + return 0; + + if (!width) + width = len; + save_width = width; + + pos = b_tail(b); + while (width--) { + /* Encode the shifted integer or 0 if width bigger than integer length. */ + *pos++ = width >= len ? 0 : val >> (width * 8); + + if (pos == b_wrap(b)) + pos = b_orig(b); + } + + /* set the bits of byte#0 which gives the length of the encoded integer */ + *b_tail(b) |= quic_log2(save_width) << QUIC_VARINT_BYTE_0_SHIFT; + b_add(b, save_width); + + return 1; +} + +static inline size_t quic_incint_size_diff(uint64_t val) +{ + switch (val) { + case QUIC_VARINT_1_BYTE_MAX: + return 1; + case QUIC_VARINT_2_BYTE_MAX: + return 2; + case QUIC_VARINT_4_BYTE_MAX: + return 4; + default: + return 0; + } +} + +/* Return the difference between the encoded length of <val> and the encoded + * length of <val-1>. + */ +static inline size_t quic_decint_size_diff(uint64_t val) +{ + switch (val) { + case QUIC_VARINT_1_BYTE_MAX + 1: + return 1; + case QUIC_VARINT_2_BYTE_MAX + 1: + return 2; + case QUIC_VARINT_4_BYTE_MAX + 1: + return 4; + default: + return 0; + } +} + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_ENC_H */ diff --git a/include/haproxy/quic_frame-t.h b/include/haproxy/quic_frame-t.h new file mode 100644 index 0000000..5e91f93 --- /dev/null +++ b/include/haproxy/quic_frame-t.h @@ -0,0 +1,309 @@ +/* + * include/types/quic_frame.h + * This file contains QUIC frame definitions. + * + * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _TYPES_QUIC_FRAME_H +#define _TYPES_QUIC_FRAME_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <inttypes.h> +#include <stdlib.h> + +#include <import/ebtree-t.h> +#include <haproxy/buf-t.h> +#include <haproxy/list.h> +#include <haproxy/quic_stream-t.h> + +extern struct pool_head *pool_head_quic_frame; +extern struct pool_head *pool_head_qf_crypto; + +/* forward declarations from xprt-quic */ +struct quic_arngs; +struct quic_enc_level; +struct quic_tx_packet; + +/* QUIC frame types. */ +enum quic_frame_type { + QUIC_FT_PADDING = 0x00, + QUIC_FT_PING = 0x01, + QUIC_FT_ACK = 0x02, + QUIC_FT_ACK_ECN = 0x03, + QUIC_FT_RESET_STREAM = 0x04, + QUIC_FT_STOP_SENDING = 0x05, + QUIC_FT_CRYPTO = 0x06, + QUIC_FT_NEW_TOKEN = 0x07, + + QUIC_FT_STREAM_8 = 0x08, + QUIC_FT_STREAM_9 = 0x09, + QUIC_FT_STREAM_A = 0x0a, + QUIC_FT_STREAM_B = 0x0b, + QUIC_FT_STREAM_C = 0x0c, + QUIC_FT_STREAM_D = 0x0d, + QUIC_FT_STREAM_E = 0x0e, + QUIC_FT_STREAM_F = 0x0f, + + QUIC_FT_MAX_DATA = 0x10, + QUIC_FT_MAX_STREAM_DATA = 0x11, + QUIC_FT_MAX_STREAMS_BIDI = 0x12, + QUIC_FT_MAX_STREAMS_UNI = 0x13, + QUIC_FT_DATA_BLOCKED = 0x14, + QUIC_FT_STREAM_DATA_BLOCKED = 0x15, + QUIC_FT_STREAMS_BLOCKED_BIDI = 0x16, + QUIC_FT_STREAMS_BLOCKED_UNI = 0x17, + QUIC_FT_NEW_CONNECTION_ID = 0x18, + QUIC_FT_RETIRE_CONNECTION_ID = 0x19, + QUIC_FT_PATH_CHALLENGE = 0x1a, + QUIC_FT_PATH_RESPONSE = 0x1b, + QUIC_FT_CONNECTION_CLOSE = 0x1c, + QUIC_FT_CONNECTION_CLOSE_APP = 0x1d, + QUIC_FT_HANDSHAKE_DONE = 0x1e, + /* Do not insert enums after the following one. */ + QUIC_FT_MAX +}; + +#define QUIC_FT_PKT_TYPE_I_BITMASK (1 << QUIC_PACKET_TYPE_INITIAL) +#define QUIC_FT_PKT_TYPE_0_BITMASK (1 << QUIC_PACKET_TYPE_0RTT) +#define QUIC_FT_PKT_TYPE_H_BITMASK (1 << QUIC_PACKET_TYPE_HANDSHAKE) +#define QUIC_FT_PKT_TYPE_1_BITMASK (1 << QUIC_PACKET_TYPE_SHORT) + +#define QUIC_FT_PKT_TYPE_IH01_BITMASK \ + (QUIC_FT_PKT_TYPE_I_BITMASK | QUIC_FT_PKT_TYPE_H_BITMASK | \ + QUIC_FT_PKT_TYPE_0_BITMASK | QUIC_FT_PKT_TYPE_1_BITMASK) + +#define QUIC_FT_PKT_TYPE_IH_1_BITMASK \ + (QUIC_FT_PKT_TYPE_I_BITMASK | QUIC_FT_PKT_TYPE_H_BITMASK | \ + QUIC_FT_PKT_TYPE_1_BITMASK) + +#define QUIC_FT_PKT_TYPE___01_BITMASK \ + (QUIC_FT_PKT_TYPE_0_BITMASK | QUIC_FT_PKT_TYPE_1_BITMASK) + +#define QUIC_FT_PKT_TYPE____1_BITMASK QUIC_FT_PKT_TYPE_1_BITMASK + + +/* Flag a TX frame as acknowledged */ +#define QUIC_FL_TX_FRAME_ACKED 0x01 + +#define QUIC_STREAM_FRAME_TYPE_FIN_BIT 0x01 +#define QUIC_STREAM_FRAME_TYPE_LEN_BIT 0x02 +#define QUIC_STREAM_FRAME_TYPE_OFF_BIT 0x04 + +/* Servers have the stream initiator bit set. */ +#define QUIC_STREAM_FRAME_ID_INITIATOR_BIT 0x01 +/* Unidirectional streams have the direction bit set. */ +#define QUIC_STREAM_FRAME_ID_DIR_BIT 0x02 + +#define QUIC_PATH_CHALLENGE_LEN 8 +/* Maximum phrase length in CONNECTION_CLOSE frame */ +#define QUIC_CC_REASON_PHRASE_MAXLEN 64 + +struct qf_padding { + size_t len; +}; + +struct qf_ack { + uint64_t largest_ack; + uint64_t ack_delay; + uint64_t ack_range_num; + uint64_t first_ack_range; +}; + +/* Structure used when emitting ACK frames. */ +struct qf_tx_ack { + uint64_t ack_delay; + struct quic_arngs *arngs; +}; + +struct qf_reset_stream { + uint64_t id; + uint64_t app_error_code; + uint64_t final_size; +}; + +struct qf_stop_sending { + uint64_t id; + uint64_t app_error_code; +}; + +struct qf_crypto { + struct list list; + uint64_t offset; + uint64_t len; + const struct quic_enc_level *qel; + const unsigned char *data; +}; + +struct qf_new_token { + uint64_t len; + const unsigned char *data; +}; + +struct qf_stream { + uint64_t id; + struct qc_stream_desc *stream; + + /* used only on TX when constructing frames. + * Data cleared when processing ACK related to this STREAM frame. + * + * A same buffer may be shared between several STREAM frames. The + * <data> field of each quic_stream serves to differentiate the payload + * of each of these. + */ + struct buffer *buf; + + struct eb64_node offset; + uint64_t len; + + /* for TX pointer into <buf> field. + * for RX pointer into the packet buffer. + */ + const unsigned char *data; + + char dup; /* set for duplicated frame : this forces to check for the underlying qc_stream_buf instance before emitting it. */ +}; + +struct qf_max_data { + uint64_t max_data; +}; + +struct qf_max_stream_data { + uint64_t id; + uint64_t max_stream_data; +}; + +struct qf_max_streams { + uint64_t max_streams; +}; + +struct qf_data_blocked { + uint64_t limit; +}; + +struct qf_stream_data_blocked { + uint64_t id; + uint64_t limit; +}; + +struct qf_streams_blocked { + uint64_t limit; +}; + +struct qf_new_connection_id { + uint64_t seq_num; + uint64_t retire_prior_to; + struct { + unsigned char len; + const unsigned char *data; + } cid; + const unsigned char *stateless_reset_token; +}; + +struct qf_retire_connection_id { + uint64_t seq_num; +}; + +struct qf_path_challenge { + unsigned char data[QUIC_PATH_CHALLENGE_LEN]; +}; + +struct qf_path_challenge_response { + unsigned char data[QUIC_PATH_CHALLENGE_LEN]; +}; + +struct qf_connection_close { + uint64_t error_code; + uint64_t frame_type; + uint64_t reason_phrase_len; + unsigned char reason_phrase[QUIC_CC_REASON_PHRASE_MAXLEN]; +}; + +struct qf_connection_close_app { + uint64_t error_code; + uint64_t reason_phrase_len; + unsigned char reason_phrase[QUIC_CC_REASON_PHRASE_MAXLEN]; +}; + +struct quic_frame { + struct list list; /* List elem from parent elem (typically a Tx packet instance, a PKTNS or a MUX element). */ + struct quic_tx_packet *pkt; /* Last Tx packet used to send the frame. */ + unsigned char type; /* QUIC frame type. */ + union { + struct qf_padding padding; + struct qf_ack ack; + struct qf_tx_ack tx_ack; + struct qf_crypto crypto; + struct qf_reset_stream reset_stream; + struct qf_stop_sending stop_sending; + struct qf_new_token new_token; + struct qf_stream stream; + struct qf_max_data max_data; + struct qf_max_stream_data max_stream_data; + struct qf_max_streams max_streams_bidi; + struct qf_max_streams max_streams_uni; + struct qf_data_blocked data_blocked; + struct qf_stream_data_blocked stream_data_blocked; + struct qf_streams_blocked streams_blocked_bidi; + struct qf_streams_blocked streams_blocked_uni; + struct qf_new_connection_id new_connection_id; + struct qf_retire_connection_id retire_connection_id; + struct qf_path_challenge path_challenge; + struct qf_path_challenge_response path_challenge_response; + struct qf_connection_close connection_close; + struct qf_connection_close_app connection_close_app; + }; + struct quic_frame *origin; /* Parent frame. Set if frame is a duplicate (used for retransmission). */ + struct list reflist; /* List head containing duplicated children frames. */ + struct list ref; /* List elem from parent frame reflist. Set if frame is a duplicate (used for retransmission). */ + unsigned int flags; /* QUIC_FL_TX_FRAME_* */ + unsigned int loss_count; /* Counter for each occurrence of this frame marked as lost. */ +}; + + +/* QUIC error codes */ +struct quic_err { + uint64_t code; /* error code */ + int app; /* set for Application error code */ +}; + +/* Transport level error codes. */ +#define QC_ERR_NO_ERROR 0x00 +#define QC_ERR_INTERNAL_ERROR 0x01 +#define QC_ERR_CONNECTION_REFUSED 0x02 +#define QC_ERR_FLOW_CONTROL_ERROR 0x03 +#define QC_ERR_STREAM_LIMIT_ERROR 0x04 +#define QC_ERR_STREAM_STATE_ERROR 0x05 +#define QC_ERR_FINAL_SIZE_ERROR 0x06 +#define QC_ERR_FRAME_ENCODING_ERROR 0x07 +#define QC_ERR_TRANSPORT_PARAMETER_ERROR 0x08 +#define QC_ERR_CONNECTION_ID_LIMIT_ERROR 0x09 +#define QC_ERR_PROTOCOL_VIOLATION 0x0a +#define QC_ERR_INVALID_TOKEN 0x0b +#define QC_ERR_APPLICATION_ERROR 0x0c +#define QC_ERR_CRYPTO_BUFFER_EXCEEDED 0x0d +#define QC_ERR_KEY_UPDATE_ERROR 0x0e +#define QC_ERR_AEAD_LIMIT_REACHED 0x0f +#define QC_ERR_NO_VIABLE_PATH 0x10 +/* 256 TLS reserved errors 0x100-0x1ff. */ +#define QC_ERR_CRYPTO_ERROR 0x100 + +#endif /* USE_QUIC */ +#endif /* _TYPES_QUIC_FRAME_H */ diff --git a/include/haproxy/quic_frame.h b/include/haproxy/quic_frame.h new file mode 100644 index 0000000..90d6b21 --- /dev/null +++ b/include/haproxy/quic_frame.h @@ -0,0 +1,281 @@ +/* + * include/haproxy/quic_frame.h + * This file contains prototypes for QUIC frames. + * + * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_QUIC_FRAME_H +#define _HAPROXY_QUIC_FRAME_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <import/eb64tree.h> +#include <haproxy/quic_conn-t.h> +#include <haproxy/quic_enc.h> +#include <haproxy/quic_frame-t.h> +#include <haproxy/quic_rx-t.h> + +const char *quic_frame_type_string(enum quic_frame_type ft); + +int qc_build_frm(unsigned char **pos, const unsigned char *end, + struct quic_frame *frm, struct quic_tx_packet *pkt, + struct quic_conn *conn); + +int qc_parse_frm(struct quic_frame *frm, struct quic_rx_packet *pkt, + const unsigned char **pos, const unsigned char *end, + struct quic_conn *conn); + +void qc_release_frm(struct quic_conn *qc, struct quic_frame *frm); + +/* Return the length of <frm> frame if succeeded, -1 if not (unknown frames + * or which must not be transmitted again after having been lost (PING, PADDING). + */ +static inline size_t qc_frm_len(struct quic_frame *frm) +{ + size_t len = 0; + + switch (frm->type) { + case QUIC_FT_ACK: { + struct qf_tx_ack *tx_ack = &frm->tx_ack; + struct eb64_node *ar, *prev_ar; + struct quic_arng_node *ar_node, *prev_ar_node; + + ar = eb64_last(&tx_ack->arngs->root); + ar_node = eb64_entry(ar, struct quic_arng_node, first); + len += 1 + quic_int_getsize(ar_node->last); + len += quic_int_getsize(tx_ack->ack_delay); + len += quic_int_getsize(tx_ack->arngs->sz - 1); + len += quic_int_getsize(ar_node->last - ar_node->first.key); + + while ((prev_ar = eb64_prev(ar))) { + prev_ar_node = eb64_entry(prev_ar, struct quic_arng_node, first); + len += quic_int_getsize(ar_node->first.key - prev_ar_node->last - 2); + len += quic_int_getsize(prev_ar_node->last - prev_ar_node->first.key); + ar = prev_ar; + ar_node = eb64_entry(ar, struct quic_arng_node, first); + } + break; + } + case QUIC_FT_RESET_STREAM: { + struct qf_reset_stream *f = &frm->reset_stream; + len += 1 + quic_int_getsize(f->id) + + quic_int_getsize(f->app_error_code) + quic_int_getsize(f->final_size); + break; + } + case QUIC_FT_STOP_SENDING: { + struct qf_stop_sending *f = &frm->stop_sending; + len += 1 + quic_int_getsize(f->id) + quic_int_getsize(f->app_error_code); + break; + } + case QUIC_FT_CRYPTO: { + struct qf_crypto *f = &frm->crypto; + len += 1 + quic_int_getsize(f->offset) + quic_int_getsize(f->len) + f->len; + break; + } + case QUIC_FT_NEW_TOKEN: { + struct qf_new_token *f = &frm->new_token; + len += 1 + quic_int_getsize(f->len) + f->len; + break; + } + case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F: { + struct qf_stream *f = &frm->stream; + len += 1 + quic_int_getsize(f->id) + + ((frm->type & QUIC_STREAM_FRAME_TYPE_OFF_BIT) ? quic_int_getsize(f->offset.key) : 0) + + ((frm->type & QUIC_STREAM_FRAME_TYPE_LEN_BIT) ? quic_int_getsize(f->len) : 0) + f->len; + break; + } + case QUIC_FT_MAX_DATA: { + struct qf_max_data *f = &frm->max_data; + len += 1 + quic_int_getsize(f->max_data); + break; + } + case QUIC_FT_MAX_STREAM_DATA: { + struct qf_max_stream_data *f = &frm->max_stream_data; + len += 1 + quic_int_getsize(f->id) + quic_int_getsize(f->max_stream_data); + break; + } + case QUIC_FT_MAX_STREAMS_BIDI: { + struct qf_max_streams *f = &frm->max_streams_bidi; + len += 1 + quic_int_getsize(f->max_streams); + break; + } + case QUIC_FT_MAX_STREAMS_UNI: { + struct qf_max_streams *f = &frm->max_streams_uni; + len += 1 + quic_int_getsize(f->max_streams); + break; + } + case QUIC_FT_DATA_BLOCKED: { + struct qf_data_blocked *f = &frm->data_blocked; + len += 1 + quic_int_getsize(f->limit); + break; + } + case QUIC_FT_STREAM_DATA_BLOCKED: { + struct qf_stream_data_blocked *f = &frm->stream_data_blocked; + len += 1 + quic_int_getsize(f->id) + quic_int_getsize(f->limit); + break; + } + case QUIC_FT_STREAMS_BLOCKED_BIDI: { + struct qf_streams_blocked *f = &frm->streams_blocked_bidi; + len += 1 + quic_int_getsize(f->limit); + break; + } + case QUIC_FT_STREAMS_BLOCKED_UNI: { + struct qf_streams_blocked *f = &frm->streams_blocked_uni; + len += 1 + quic_int_getsize(f->limit); + break; + } + case QUIC_FT_NEW_CONNECTION_ID: { + struct qf_new_connection_id *f = &frm->new_connection_id; + len += 1 + quic_int_getsize(f->seq_num) + quic_int_getsize(f->retire_prior_to) + + quic_int_getsize(f->cid.len) + f->cid.len + QUIC_STATELESS_RESET_TOKEN_LEN; + break; + } + case QUIC_FT_RETIRE_CONNECTION_ID: { + struct qf_retire_connection_id *f = &frm->retire_connection_id; + len += 1 + quic_int_getsize(f->seq_num); + break; + } + case QUIC_FT_PATH_CHALLENGE: { + struct qf_path_challenge *f = &frm->path_challenge; + len += 1 + sizeof f->data; + break; + } + case QUIC_FT_PATH_RESPONSE: { + struct qf_path_challenge_response *f = &frm->path_challenge_response; + len += 1 + sizeof f->data; + break; + } + case QUIC_FT_CONNECTION_CLOSE: { + struct qf_connection_close *f = &frm->connection_close; + len += 1 + quic_int_getsize(f->error_code) + quic_int_getsize(f->frame_type) + + quic_int_getsize(f->reason_phrase_len) + f->reason_phrase_len; + break; + } + case QUIC_FT_CONNECTION_CLOSE_APP: { + struct qf_connection_close *f = &frm->connection_close; + len += 1 + quic_int_getsize(f->error_code) + + quic_int_getsize(f->reason_phrase_len) + f->reason_phrase_len; + break; + } + case QUIC_FT_HANDSHAKE_DONE: { + len += 1; + break; + } + default: + return -1; + } + + return len; +} + +static inline struct quic_err quic_err_transport(uint64_t code) +{ + return (struct quic_err){ .code = code, .app = 0 }; +} + +static inline struct quic_err quic_err_tls(uint64_t tls_alert) +{ + const uint64_t code = QC_ERR_CRYPTO_ERROR|tls_alert; + return (struct quic_err){ .code = code, .app = 0 }; +} + +static inline struct quic_err quic_err_app(uint64_t code) +{ + return (struct quic_err){ .code = code, .app = 1 }; +} + +/* Allocate a quic_frame with type <type>. Frame must be freed with + * qc_frm_free(). + * + * Returns the allocated frame or NULL on failure. + */ +static inline struct quic_frame *qc_frm_alloc(int type) +{ + struct quic_frame *frm = NULL; + + frm = pool_alloc(pool_head_quic_frame); + if (!frm) + return NULL; + + frm->type = type; + + LIST_INIT(&frm->list); + LIST_INIT(&frm->reflist); + LIST_INIT(&frm->ref); + frm->pkt = NULL; + frm->origin = NULL; + frm->flags = 0; + frm->loss_count = 0; + + return frm; +} + +/* Allocate a quic_frame by duplicating <origin> frame. This will create a new + * frame of the same type with the same content. Internal fields such as packet + * owner and flags are however reset for the newly allocated frame except + * for the loss counter. Frame must be freed with qc_frm_free(). + * + * Returns the allocated frame or NULL on failure. + */ +static inline struct quic_frame *qc_frm_dup(struct quic_frame *origin) +{ + struct quic_frame *frm = NULL; + + frm = pool_alloc(pool_head_quic_frame); + if (!frm) + return NULL; + + *frm = *origin; + + /* Reinit all internal members except loss_count. */ + LIST_INIT(&frm->list); + LIST_INIT(&frm->reflist); + frm->pkt = NULL; + frm->flags = 0; + + /* Attach <frm> to <origin>. */ + LIST_APPEND(&origin->reflist, &frm->ref); + frm->origin = origin; + + return frm; +} + +void qc_frm_free(struct quic_conn *qc, struct quic_frame **frm); +void qc_frm_unref(struct quic_frame *frm, struct quic_conn *qc); + +/* Move forward <strm> STREAM frame by <data> bytes. */ +static inline void qc_stream_frm_mv_fwd(struct quic_frame *frm, uint64_t data) +{ + struct qf_stream *strm_frm = &frm->stream; + struct buffer cf_buf; + + /* Set offset bit if not already there. */ + strm_frm->offset.key += data; + frm->type |= QUIC_STREAM_FRAME_TYPE_OFF_BIT; + + strm_frm->len -= data; + cf_buf = b_make(b_orig(strm_frm->buf), + b_size(strm_frm->buf), + (char *)strm_frm->data - b_orig(strm_frm->buf), 0); + strm_frm->data = (unsigned char *)b_peek(&cf_buf, data); +} + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_FRAME_H */ diff --git a/include/haproxy/quic_loss-t.h b/include/haproxy/quic_loss-t.h new file mode 100644 index 0000000..0f07ddc --- /dev/null +++ b/include/haproxy/quic_loss-t.h @@ -0,0 +1,62 @@ +/* + * include/types/quic_loss.h + * This file contains definitions for QUIC loss detection. + * + * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _TYPES_QUIC_LOSS_H +#define _TYPES_QUIC_LOSS_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <inttypes.h> + +/* Maximum reordering in packets. */ +#define QUIC_LOSS_PACKET_THRESHOLD 3 +#define QUIC_TIMER_GRANULARITY 1U /* 1ms */ +#define QUIC_LOSS_INITIAL_RTT 333U /* 333ms */ + +/* QUIC loss time threshold expressed an RTT multiplier + * (QUIC_LOSS_TIME_THRESHOLD_MULTIPLICAND / QUIC_LOSS_TIME_THRESHOLD_DIVISOR) + */ +#define QUIC_LOSS_TIME_THRESHOLD_MULTIPLICAND 9 +#define QUIC_LOSS_TIME_THRESHOLD_DIVISOR 8 + +/* Note that all the unit of variables for QUIC LOSS detections + * is the tick. + */ + +struct quic_loss { + /* The most recent RTT measurement (ms) */ + unsigned int latest_rtt; + /* Smoothed RTT (ms) */ + unsigned int srtt; + /* RTT variation (ms) */ + unsigned int rtt_var; + /* Minimum RTT (ms) */ + unsigned int rtt_min; + /* Number of NACKed sent PTO. */ + unsigned int pto_count; + unsigned long nb_lost_pkt; + unsigned long nb_reordered_pkt; +}; + +#endif /* USE_QUIC */ +#endif /* _TYPES_QUIC_LOSS_H */ diff --git a/include/haproxy/quic_loss.h b/include/haproxy/quic_loss.h new file mode 100644 index 0000000..fc713ca --- /dev/null +++ b/include/haproxy/quic_loss.h @@ -0,0 +1,92 @@ +/* + * include/proto/quic_loss.h + * This file provides interface definition for QUIC loss detection. + * + * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _PROTO_QUIC_LOSS_H +#define _PROTO_QUIC_LOSS_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <haproxy/quic_loss-t.h> + +#include <haproxy/api.h> +#include <haproxy/quic_conn-t.h> +#include <haproxy/quic_tls-t.h> + +static inline void quic_loss_init(struct quic_loss *ql) +{ + ql->latest_rtt = 0; + ql->srtt = QUIC_LOSS_INITIAL_RTT; + ql->rtt_var = QUIC_LOSS_INITIAL_RTT / 2; + ql->rtt_min = 0; + ql->pto_count = 0; + ql->nb_lost_pkt = 0; + ql->nb_reordered_pkt = 0; +} + +/* Return 1 if a persistent congestion is observed for a list of + * lost packets sent during <period> period depending on <ql> loss information, + * <now_us> the current time and <max_ack_delay_us> the maximum ACK delay of the connection + * experiencing a packet loss. Return 0 on the contrary. + */ +static inline int quic_loss_persistent_congestion(struct quic_loss *ql, + unsigned int period, + unsigned int now_us, + unsigned int max_ack_delay) +{ + unsigned int congestion_period; + + if (!period) + return 0; + + congestion_period = ql->srtt + + QUIC_MAX(4 * ql->rtt_var, QUIC_TIMER_GRANULARITY) + max_ack_delay; + congestion_period *= QUIC_LOSS_PACKET_THRESHOLD; + + return period >= congestion_period; +} + +/* Return the PTO associated to <pktns> packet number space for <qc> connection */ +static inline unsigned int quic_pto(struct quic_conn *qc) +{ + struct quic_loss *ql = &qc->path->loss; + + return ql->srtt + QUIC_MAX(4 * ql->rtt_var, QUIC_TIMER_GRANULARITY) + + (HA_ATOMIC_LOAD(&qc->state) >= QUIC_HS_ST_COMPLETE ? qc->max_ack_delay : 0); +} + +void quic_loss_srtt_update(struct quic_loss *ql, + unsigned int rtt, unsigned int ack_delay, + struct quic_conn *qc); + +struct quic_pktns *quic_loss_pktns(struct quic_conn *qc); + +struct quic_pktns *quic_pto_pktns(struct quic_conn *qc, + int handshake_completed, + unsigned int *pto); + +void qc_packet_loss_lookup(struct quic_pktns *pktns, struct quic_conn *qc, + struct list *lost_pkts); +int qc_release_lost_pkts(struct quic_conn *qc, struct quic_pktns *pktns, + struct list *pkts, uint64_t now_us); +#endif /* USE_QUIC */ +#endif /* _PROTO_QUIC_LOSS_H */ diff --git a/include/haproxy/quic_openssl_compat-t.h b/include/haproxy/quic_openssl_compat-t.h new file mode 100644 index 0000000..2f2b92b --- /dev/null +++ b/include/haproxy/quic_openssl_compat-t.h @@ -0,0 +1,64 @@ +#ifndef _HAPROXY_QUIC_OPENSSL_COMPAT_T_H_ +#define _HAPROXY_QUIC_OPENSSL_COMPAT_T_H_ + +#ifdef USE_QUIC_OPENSSL_COMPAT +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#define QUIC_OPENSSL_COMPAT_TLS_SECRET_LEN 48 +#define QUIC_OPENSSL_COMPAT_TLS_IV_LEN 12 + +/* Highly inspired from nginx QUIC TLS compatibility code */ + +enum ssl_encryption_level_t { + ssl_encryption_initial = 0, + ssl_encryption_early_data, + ssl_encryption_handshake, + ssl_encryption_application +}; + +typedef struct ssl_quic_method_st { + int (*set_encryption_secrets)(SSL *ssl, enum ssl_encryption_level_t level, + const uint8_t *rsecret, const uint8_t *wsecret, + size_t secret_len); + int (*add_handshake_data)(SSL *ssl, enum ssl_encryption_level_t level, + const uint8_t *data, size_t len); + int (*flush_flight)(SSL *ssl); + int (*send_alert)(SSL *ssl, enum ssl_encryption_level_t level, + uint8_t alert); +} SSL_QUIC_METHOD; + +struct quic_tls_md { + unsigned char data[QUIC_OPENSSL_COMPAT_TLS_SECRET_LEN]; + size_t len; +}; + +struct quic_tls_iv { + unsigned char data[QUIC_OPENSSL_COMPAT_TLS_IV_LEN]; + size_t len; +}; + +struct quic_tls_secret { + struct quic_tls_md secret; + struct quic_tls_md key; + struct quic_tls_iv iv; +}; + +struct quic_tls_compat_keys { + struct quic_tls_secret secret; + const EVP_CIPHER *cipher; +}; + +struct quic_openssl_compat { + BIO *rbio; + BIO *wbio; + const SSL_QUIC_METHOD *method; + enum ssl_encryption_level_t write_level; + enum ssl_encryption_level_t read_level; + uint64_t read_record; + struct quic_tls_compat_keys keys; +}; + +#endif /* USE_QUIC_OPENSSL_COMPAT */ +#endif /* _HAPROXY_QUIC_OPENSSL_COMPAT_T_H_ */ diff --git a/include/haproxy/quic_openssl_compat.h b/include/haproxy/quic_openssl_compat.h new file mode 100644 index 0000000..837a28d --- /dev/null +++ b/include/haproxy/quic_openssl_compat.h @@ -0,0 +1,33 @@ +#ifndef _HAPROXY_QUIC_OPENSSL_COMPAT_H_ +#define _HAPROXY_QUIC_OPENSSL_COMPAT_H_ + +#ifdef USE_QUIC_OPENSSL_COMPAT + +/* Highly inspired from nginx QUIC TLS compatibility code */ +#include <haproxy/listener-t.h> +#include <haproxy/quic_openssl_compat-t.h> + +#define QUIC_OPENSSL_COMPAT_SSL_TP_EXT 0x39 + +/* Used by keylog */ +#define QUIC_OPENSSL_COMPAT_CLIENT_HANDSHAKE "CLIENT_HANDSHAKE_TRAFFIC_SECRET" +#define QUIC_OPENSSL_COMPAT_SERVER_HANDSHAKE "SERVER_HANDSHAKE_TRAFFIC_SECRET" +#define QUIC_OPENSSL_COMPAT_CLIENT_APPLICATION "CLIENT_TRAFFIC_SECRET_0" +#define QUIC_OPENSSL_COMPAT_SERVER_APPLICATION "SERVER_TRAFFIC_SECRET_0" + +void quic_tls_compat_msg_callback(struct connection *conn, + int write_p, int version, int content_type, + const void *buf, size_t len, SSL *ssl); +int quic_tls_compat_init(struct bind_conf *bind_conf, SSL_CTX *ctx); +void quic_tls_compat_keylog_callback(const SSL *ssl, const char *line); + +int SSL_set_quic_method(SSL *ssl, const SSL_QUIC_METHOD *quic_method); +enum ssl_encryption_level_t SSL_quic_read_level(const SSL *ssl); +enum ssl_encryption_level_t SSL_quic_write_level(const SSL *ssl); +int SSL_set_quic_transport_params(SSL *ssl, const uint8_t *params, size_t params_len); +int SSL_provide_quic_data(SSL *ssl, enum ssl_encryption_level_t level, + const uint8_t *data, size_t len); +int SSL_process_quic_post_handshake(SSL *ssl); + +#endif /* USE_QUIC_OPENSSL_COMPAT */ +#endif /* _HAPROXY_QUIC_OPENSSL_COMPAT_H_ */ diff --git a/include/haproxy/quic_retransmit.h b/include/haproxy/quic_retransmit.h new file mode 100644 index 0000000..403a53c --- /dev/null +++ b/include/haproxy/quic_retransmit.h @@ -0,0 +1,20 @@ +#ifndef _HAPROXY_QUIC_RETRANSMIT_H +#define _HAPROXY_QUIC_RETRANSMIT_H + +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <haproxy/list-t.h> +#include <haproxy/quic_conn-t.h> +#include <haproxy/quic_tls-t.h> + +void qc_prep_fast_retrans(struct quic_conn *qc, + struct quic_pktns *pktns, + struct list *frms1, struct list *frms2); +void qc_prep_hdshk_fast_retrans(struct quic_conn *qc, + struct list *ifrms, struct list *hfrms); + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_RETRANSMIT_H */ diff --git a/include/haproxy/quic_retry.h b/include/haproxy/quic_retry.h new file mode 100644 index 0000000..d31be02 --- /dev/null +++ b/include/haproxy/quic_retry.h @@ -0,0 +1,33 @@ +#ifndef _HAPROXY_QUIC_RETRY_H +#define _HAPROXY_QUIC_RETRY_H + +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <inttypes.h> +#include <sys/socket.h> + +#include <haproxy/quic_cid-t.h> +#include <haproxy/quic_rx-t.h> +#include <haproxy/quic_sock-t.h> + +struct listener; + +int quic_generate_retry_token(unsigned char *token, size_t len, + const uint32_t version, + const struct quic_cid *odcid, + const struct quic_cid *dcid, + struct sockaddr_storage *addr); +int parse_retry_token(struct quic_conn *qc, + const unsigned char *token, const unsigned char *end, + struct quic_cid *odcid); +int quic_retry_token_check(struct quic_rx_packet *pkt, + struct quic_dgram *dgram, + struct listener *l, + struct quic_conn *qc, + struct quic_cid *odcid); + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_RETRY_H */ diff --git a/include/haproxy/quic_rx-t.h b/include/haproxy/quic_rx-t.h new file mode 100644 index 0000000..9ef8e7a --- /dev/null +++ b/include/haproxy/quic_rx-t.h @@ -0,0 +1,54 @@ +#ifndef _HAPROXY_RX_T_H +#define _HAPROXY_RX_T_H + +extern struct pool_head *pool_head_quic_conn_rxbuf; +extern struct pool_head *pool_head_quic_dgram; +extern struct pool_head *pool_head_quic_rx_packet; + +/* Maximum number of ack-eliciting received packets since the last + * ACK frame was sent + */ +#define QUIC_MAX_RX_AEPKTS_SINCE_LAST_ACK 2 +#define QUIC_ACK_DELAY (QUIC_TP_DFLT_MAX_ACK_DELAY - 5) +/* Flag a received packet as being an ack-eliciting packet. */ +#define QUIC_FL_RX_PACKET_ACK_ELICITING (1UL << 0) +/* Packet is the first one in the containing datagram. */ +#define QUIC_FL_RX_PACKET_DGRAM_FIRST (1UL << 1) +/* Spin bit set */ +#define QUIC_FL_RX_PACKET_SPIN_BIT (1UL << 2) + +struct quic_rx_packet { + struct list list; + struct list qc_rx_pkt_list; + + /* QUIC version used in packet. */ + const struct quic_version *version; + + unsigned char type; + /* Initial desctination connection ID. */ + struct quic_cid dcid; + struct quic_cid scid; + /* Packet number offset : only valid for Initial/Handshake/0-RTT/1-RTT. */ + size_t pn_offset; + /* Packet number */ + int64_t pn; + /* Packet number length */ + uint32_t pnl; + uint64_t token_len; + unsigned char *token; + /* Packet length */ + uint64_t len; + /* Packet length before decryption */ + uint64_t raw_len; + /* Additional authenticated data length */ + size_t aad_len; + unsigned char *data; + struct eb64_node pn_node; + volatile unsigned int refcnt; + /* Source address of this packet. */ + struct sockaddr_storage saddr; + unsigned int flags; + unsigned int time_received; +}; + +#endif /* _HAPROXY_RX_T_H */ diff --git a/include/haproxy/quic_rx.h b/include/haproxy/quic_rx.h new file mode 100644 index 0000000..494bc4a --- /dev/null +++ b/include/haproxy/quic_rx.h @@ -0,0 +1,58 @@ +/* + * QUIC protocol definitions (RX side). + * + * Copyright (C) 2023 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_QUIC_RX_H +#define _HAPROXY_QUIC_RX_H + +#include <haproxy/listener-t.h> +#include <haproxy/quic_conn-t.h> +#include <haproxy/quic_rx-t.h> + +int quic_dgram_parse(struct quic_dgram *dgram, struct quic_conn *from_qc, + struct listener *li); +int qc_treat_rx_pkts(struct quic_conn *qc); +int qc_parse_hd_form(struct quic_rx_packet *pkt, + unsigned char **pos, const unsigned char *end); +int qc_treat_rx_crypto_frms(struct quic_conn *qc, struct quic_enc_level *el, + struct ssl_sock_ctx *ctx); +int qc_handle_frms_of_lost_pkt(struct quic_conn *qc, + struct quic_tx_packet *pkt, + struct list *pktns_frm_list); + +/* Increment the reference counter of <pkt> */ +static inline void quic_rx_packet_refinc(struct quic_rx_packet *pkt) +{ + pkt->refcnt++; +} + +/* Decrement the reference counter of <pkt> while remaining positive */ +static inline void quic_rx_packet_refdec(struct quic_rx_packet *pkt) +{ + if (pkt->refcnt) + pkt->refcnt--; +} + +/* Return 1 if <pkt> header form is long, 0 if not. */ +static inline int qc_pkt_long(const struct quic_rx_packet *pkt) +{ + return pkt->type != QUIC_PACKET_TYPE_SHORT; +} + +#endif /* _HAPROXY_QUIC_RX_H */ diff --git a/include/haproxy/quic_sock-t.h b/include/haproxy/quic_sock-t.h new file mode 100644 index 0000000..67a5749 --- /dev/null +++ b/include/haproxy/quic_sock-t.h @@ -0,0 +1,50 @@ +#ifndef _HAPROXY_QUIC_SOCK_T_H +#define _HAPROXY_QUIC_SOCK_T_H +#ifdef USE_QUIC + +#include <haproxy/buf-t.h> + +/* QUIC socket allocation strategy. */ +enum quic_sock_mode { + QUIC_SOCK_MODE_CONN, /* Use a dedicated socket per connection. */ + QUIC_SOCK_MODE_LSTNR, /* Multiplex connections over listener socket. */ +}; + +/* QUIC connection accept queue. One per thread. */ +struct quic_accept_queue { + struct mt_list listeners; /* QUIC listeners with at least one connection ready to be accepted on this queue */ + struct tasklet *tasklet; /* task responsible to call listener_accept */ +}; + +/* Buffer used to receive QUIC datagrams on random thread and redispatch them + * to the connection thread. + */ +struct quic_receiver_buf { + struct buffer buf; /* storage for datagrams received. */ + struct list dgram_list; /* datagrams received with this rxbuf. */ + struct mt_list rxbuf_el; /* list element into receiver.rxbuf_list. */ +}; + +/* QUIC datagram */ +struct quic_dgram { + void *owner; + unsigned char *buf; + size_t len; + unsigned char *dcid; + size_t dcid_len; + struct sockaddr_storage saddr; + struct sockaddr_storage daddr; + struct quic_conn *qc; + + struct list recv_list; /* elemt to quic_receiver_buf <dgram_list>. */ + struct mt_list handler_list; /* elem to quic_dghdlr <dgrams>. */ +}; + +/* QUIC datagram handler */ +struct quic_dghdlr { + struct mt_list dgrams; + struct tasklet *task; +}; + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_SOCK_T_H */ diff --git a/include/haproxy/quic_sock.h b/include/haproxy/quic_sock.h new file mode 100644 index 0000000..531cf62 --- /dev/null +++ b/include/haproxy/quic_sock.h @@ -0,0 +1,107 @@ +/* + * include/haproxy/quic_sock.h + * This file contains declarations for QUIC sockets. + * + * Copyright 2020 Frederic Lecaille <flecaille@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_QUIC_SOCK_H +#define _HAPROXY_QUIC_SOCK_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <sys/socket.h> +#include <sys/types.h> + +#include <haproxy/api.h> +#include <haproxy/connection-t.h> +#include <haproxy/listener-t.h> +#include <haproxy/quic_conn-t.h> +#include <haproxy/quic_sock-t.h> + +int quic_session_accept(struct connection *cli_conn); +int quic_sock_get_src(struct connection *conn, struct sockaddr *addr, socklen_t len); +int quic_sock_get_dst(struct connection *conn, struct sockaddr *addr, socklen_t len); +int quic_sock_accepting_conn(const struct receiver *rx); +struct connection *quic_sock_accept_conn(struct listener *l, int *status); + +struct task *quic_lstnr_dghdlr(struct task *t, void *ctx, unsigned int state); +void quic_lstnr_sock_fd_iocb(int fd); +int qc_snd_buf(struct quic_conn *qc, const struct buffer *buf, size_t count, + int flags); +int qc_rcv_buf(struct quic_conn *qc); +void quic_conn_sock_fd_iocb(int fd); + +void qc_alloc_fd(struct quic_conn *qc, const struct sockaddr_storage *src, + const struct sockaddr_storage *dst); +void qc_release_fd(struct quic_conn *qc, int reinit); +void qc_want_recv(struct quic_conn *qc); + +void quic_accept_push_qc(struct quic_conn *qc); + +int quic_listener_max_handshake(const struct listener *l); +int quic_listener_max_accept(const struct listener *l); + +/* Set default value for <qc> socket as uninitialized. */ +static inline void qc_init_fd(struct quic_conn *qc) +{ + qc->fd = -1; +} + +/* Returns true if <qc> socket is initialized else false. */ +static inline char qc_test_fd(struct quic_conn *qc) +{ + /* quic-conn socket should not be accessed once it has been released. */ + BUG_ON(qc->fd == DEAD_FD_MAGIC); + return qc->fd >= 0; +} + +/* Try to increment <l> handshake current counter. If listener limit is + * reached, incrementation is rejected and 0 is returned. + */ +static inline int quic_increment_curr_handshake(struct listener *l) +{ + unsigned int count, next; + const int max = quic_listener_max_handshake(l); + + do { + count = l->rx.quic_curr_handshake; + if (count >= max) { + /* maxconn reached */ + next = 0; + goto end; + } + + /* try to increment quic_curr_handshake */ + next = count + 1; + } while (!_HA_ATOMIC_CAS(&l->rx.quic_curr_handshake, &count, next) && __ha_cpu_relax()); + + end: + return next; +} + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_SOCK_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/quic_ssl-t.h b/include/haproxy/quic_ssl-t.h new file mode 100644 index 0000000..3c057c6 --- /dev/null +++ b/include/haproxy/quic_ssl-t.h @@ -0,0 +1,21 @@ +/* + * include/haproxy/quic_ssl-t.h + * Definitions for QUIC over TLS/SSL api types, constants and flags. + * + * Copyright (C) 2023 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_QUIC_SSL_T_H +#define _HAPROXY_QUIC_SSL_T_H + +#include <haproxy/pool-t.h> + +extern struct pool_head *pool_head_quic_ssl_sock_ctx; + +#endif /* _HAPROXY_QUIC_SSL_T_H */ diff --git a/include/haproxy/quic_ssl.h b/include/haproxy/quic_ssl.h new file mode 100644 index 0000000..8f7df47 --- /dev/null +++ b/include/haproxy/quic_ssl.h @@ -0,0 +1,55 @@ +/* + * include/haproxy/quic_ssl.h + * This file contains QUIC over TLS/SSL api definitions. + * + * Copyright (C) 2023 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef _HAPROXY_QUIC_SSL_H +#define _HAPROXY_QUIC_SSL_H + +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <haproxy/listener-t.h> +#include <haproxy/ncbuf-t.h> +#include <haproxy/openssl-compat.h> +#include <haproxy/pool.h> +#include <haproxy/quic_ssl-t.h> +#include <haproxy/ssl_sock-t.h> + +int ssl_quic_initial_ctx(struct bind_conf *bind_conf); +int qc_alloc_ssl_sock_ctx(struct quic_conn *qc); +int qc_ssl_provide_quic_data(struct ncbuf *ncbuf, + enum ssl_encryption_level_t level, + struct ssl_sock_ctx *ctx, + const unsigned char *data, size_t len); +int qc_ssl_provide_all_quic_data(struct quic_conn *qc, struct ssl_sock_ctx *ctx); + +static inline void qc_free_ssl_sock_ctx(struct ssl_sock_ctx **ctx) +{ + if (!*ctx) + return; + + SSL_free((*ctx)->ssl); + pool_free(pool_head_quic_ssl_sock_ctx, *ctx); + *ctx = NULL; +} + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_SSL_H */ diff --git a/include/haproxy/quic_stats-t.h b/include/haproxy/quic_stats-t.h new file mode 100644 index 0000000..1ee6265 --- /dev/null +++ b/include/haproxy/quic_stats-t.h @@ -0,0 +1,105 @@ +#ifndef _HAPROXY_QUIC_STATS_T_H +#define _HAPROXY_QUIC_STATS_T_H + +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +extern struct stats_module quic_stats_module; + +enum { + QUIC_ST_RXBUF_FULL, + QUIC_ST_DROPPED_PACKET, + QUIC_ST_DROPPED_PACKET_BUFOVERRUN, + QUIC_ST_DROPPED_PARSING, + QUIC_ST_SOCKET_FULL, + QUIC_ST_SENDTO_ERR, + QUIC_ST_SENDTO_ERR_UNKNWN, + QUIC_ST_SENT_PACKET, + QUIC_ST_LOST_PACKET, + QUIC_ST_TOO_SHORT_INITIAL_DGRAM, + QUIC_ST_RETRY_SENT, + QUIC_ST_RETRY_VALIDATED, + QUIC_ST_RETRY_ERRORS, + QUIC_ST_HALF_OPEN_CONN, + QUIC_ST_HDSHK_FAIL, + QUIC_ST_STATELESS_RESET_SENT, + /* Special events of interest */ + QUIC_ST_CONN_MIGRATION_DONE, + /* Transport errors */ + QUIC_ST_TRANSP_ERR_NO_ERROR, + QUIC_ST_TRANSP_ERR_INTERNAL_ERROR, + QUIC_ST_TRANSP_ERR_CONNECTION_REFUSED, + QUIC_ST_TRANSP_ERR_FLOW_CONTROL_ERROR, + QUIC_ST_TRANSP_ERR_STREAM_LIMIT_ERROR, + QUIC_ST_TRANSP_ERR_STREAM_STATE_ERROR, + QUIC_ST_TRANSP_ERR_FINAL_SIZE_ERROR, + QUIC_ST_TRANSP_ERR_FRAME_ENCODING_ERROR, + QUIC_ST_TRANSP_ERR_TRANSPORT_PARAMETER_ERROR, + QUIC_ST_TRANSP_ERR_CONNECTION_ID_LIMIT_ERROR, + QUIC_ST_TRANSP_ERR_PROTOCOL_VIOLATION, + QUIC_ST_TRANSP_ERR_INVALID_TOKEN, + QUIC_ST_TRANSP_ERR_APPLICATION_ERROR, + QUIC_ST_TRANSP_ERR_CRYPTO_BUFFER_EXCEEDED, + QUIC_ST_TRANSP_ERR_KEY_UPDATE_ERROR, + QUIC_ST_TRANSP_ERR_AEAD_LIMIT_REACHED, + QUIC_ST_TRANSP_ERR_NO_VIABLE_PATH, + QUIC_ST_TRANSP_ERR_CRYPTO_ERROR, + QUIC_ST_TRANSP_ERR_UNKNOWN_ERROR, + /* Stream related counters */ + QUIC_ST_DATA_BLOCKED, + QUIC_ST_STREAM_DATA_BLOCKED, + QUIC_ST_STREAMS_BLOCKED_BIDI, + QUIC_ST_STREAMS_BLOCKED_UNI, + QUIC_STATS_COUNT /* must be the last */ +}; + +struct quic_counters { + long long rxbuf_full; /* receive operation cancelled due to full buffer */ + long long dropped_pkt; /* total number of dropped packets */ + long long dropped_pkt_bufoverrun;/* total number of dropped packets because of buffer overrun */ + long long dropped_parsing; /* total number of dropped packets upon parsing errors */ + long long socket_full; /* total number of EAGAIN errors on sendto() calls */ + long long sendto_err; /* total number of errors on sendto() calls, EAGAIN excepted */ + long long sendto_err_unknown; /* total number of errors on sendto() calls which are currently not supported */ + long long sent_pkt; /* total number of sent packets */ + long long lost_pkt; /* total number of lost packets */ + long long too_short_initial_dgram; /* total number of too short datagrams with Initial packets */ + long long retry_sent; /* total number of Retry sent */ + long long retry_validated; /* total number of validated Retry tokens */ + long long retry_error; /* total number of Retry token errors */ + long long half_open_conn; /* current number of connections waiting for address validation */ + long long hdshk_fail; /* total number of handshake failures */ + long long stateless_reset_sent; /* total number of handshake failures */ + /* Special events of interest */ + long long conn_migration_done; /* total number of connection migration handled */ + /* Transport errors */ + long long quic_transp_err_no_error; /* total number of NO_ERROR connection errors */ + long long quic_transp_err_internal_error; /* total number of INTERNAL_ERROR connection errors */ + long long quic_transp_err_connection_refused; /* total number of CONNECTION_REFUSED connection errors */ + long long quic_transp_err_flow_control_error; /* total number of FLOW_CONTROL_ERROR connection errors */ + long long quic_transp_err_stream_limit_error; /* total number of STREAM_LIMIT_ERROR connection errors */ + long long quic_transp_err_stream_state_error; /* total number of STREAM_STATE_ERROR connection errors */ + long long quic_transp_err_final_size_error; /* total number of FINAL_SIZE_ERROR connection errors */ + long long quic_transp_err_frame_encoding_error; /* total number of FRAME_ENCODING_ERROR connection errors */ + long long quic_transp_err_transport_parameter_error; /* total number of TRANSPORT_PARAMETER_ERROR connection errors */ + long long quic_transp_err_connection_id_limit; /* total number of CONNECTION_ID_LIMIT_ERROR connection errors */ + long long quic_transp_err_protocol_violation; /* total number of PROTOCOL_VIOLATION connection errors */ + long long quic_transp_err_invalid_token; /* total number of INVALID_TOKEN connection errors */ + long long quic_transp_err_application_error; /* total number of APPLICATION_ERROR connection errors */ + long long quic_transp_err_crypto_buffer_exceeded; /* total number of CRYPTO_BUFFER_EXCEEDED connection errors */ + long long quic_transp_err_key_update_error; /* total number of KEY_UPDATE_ERROR connection errors */ + long long quic_transp_err_aead_limit_reached; /* total number of AEAD_LIMIT_REACHED connection errors */ + long long quic_transp_err_no_viable_path; /* total number of NO_VIABLE_PATH connection errors */ + long long quic_transp_err_crypto_error; /* total number of CRYPTO_ERROR connection errors */ + long long quic_transp_err_unknown_error; /* total number of UNKNOWN_ERROR connection errors */ + /* Streams related counters */ + long long data_blocked; /* total number of times DATA_BLOCKED frame was received */ + long long stream_data_blocked; /* total number of times STREAM_DATA_BLOCKED frame was received */ + long long streams_blocked_bidi; /* total number of times STREAMS_BLOCKED_BIDI frame was received */ + long long streams_blocked_uni; /* total number of times STREAMS_BLOCKED_UNI frame was received */ +}; + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_STATS_T_H */ diff --git a/include/haproxy/quic_stats.h b/include/haproxy/quic_stats.h new file mode 100644 index 0000000..b2a8dec --- /dev/null +++ b/include/haproxy/quic_stats.h @@ -0,0 +1,14 @@ +#ifndef _HAPROXY_QUIC_STATS_H +#define _HAPROXY_QUIC_STATS_H + +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <haproxy/quic_stats-t.h> + +void quic_stats_transp_err_count_inc(struct quic_counters *ctrs, int error_code); + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_STATS_H */ diff --git a/include/haproxy/quic_stream-t.h b/include/haproxy/quic_stream-t.h new file mode 100644 index 0000000..e10ca6d --- /dev/null +++ b/include/haproxy/quic_stream-t.h @@ -0,0 +1,48 @@ +#ifndef _HAPROXY_QUIC_STREAM_T_H_ +#define _HAPROXY_QUIC_STREAM_T_H_ + +#ifdef USE_QUIC + +#include <import/ebtree-t.h> + +#include <haproxy/buf-t.h> +#include <haproxy/list-t.h> + +/* A QUIC STREAM buffer used for Tx. + * + * Currently, no offset is associated with an offset. The qc_stream_desc must + * store them in order and keep the offset of the oldest buffer. The buffers + * can be freed in strict order. + */ +struct qc_stream_buf { + struct buffer buf; /* STREAM payload */ + struct list list; /* element for qc_stream_desc list */ +}; + +/* QUIC STREAM descriptor. + * + * This structure is the low-level counterpart of the QUIC STREAM at the MUX + * layer. It is stored in the quic-conn and provides facility for Tx buffering. + * + * Once the MUX has finished to transfer data on a STREAM, it must release its + * QUIC STREAM descriptor. The descriptor will be kept by the quic_conn until + * all acknowledgement has been received. + */ +struct qc_stream_desc { + struct eb64_node by_id; /* node for quic_conn tree */ + struct quic_conn *qc; + + struct list buf_list; /* buffers waiting for ACK, oldest offset first */ + struct qc_stream_buf *buf; /* current buffer used by the MUX */ + uint64_t buf_offset; /* base offset of current buffer */ + + uint64_t ack_offset; /* last acknowledged offset */ + struct eb_root acked_frms; /* ACK frames tree for non-contiguous ACK ranges */ + + int release; /* set to 1 when the MUX has finished to use this stream */ + + void *ctx; /* MUX specific context */ +}; + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_STREAM_T_H_ */ diff --git a/include/haproxy/quic_stream.h b/include/haproxy/quic_stream.h new file mode 100644 index 0000000..4489728 --- /dev/null +++ b/include/haproxy/quic_stream.h @@ -0,0 +1,23 @@ +#ifndef _HAPROXY_QUIC_STREAM_H_ +#define _HAPROXY_QUIC_STREAM_H_ + +#ifdef USE_QUIC + +#include <haproxy/mux_quic-t.h> +#include <haproxy/quic_stream-t.h> + +struct quic_conn; + +struct qc_stream_desc *qc_stream_desc_new(uint64_t id, enum qcs_type, void *ctx, + struct quic_conn *qc); +void qc_stream_desc_release(struct qc_stream_desc *stream, uint64_t final_size); +int qc_stream_desc_ack(struct qc_stream_desc **stream, size_t offset, size_t len); +void qc_stream_desc_free(struct qc_stream_desc *stream, int closing); + +struct buffer *qc_stream_buf_get(struct qc_stream_desc *stream); +struct buffer *qc_stream_buf_alloc(struct qc_stream_desc *stream, + uint64_t offset, int *avail); +void qc_stream_buf_release(struct qc_stream_desc *stream); + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_STREAM_H_ */ diff --git a/include/haproxy/quic_tls-t.h b/include/haproxy/quic_tls-t.h new file mode 100644 index 0000000..ae65149 --- /dev/null +++ b/include/haproxy/quic_tls-t.h @@ -0,0 +1,283 @@ +/* + * include/types/quic_tls.h + * This file provides definitions for QUIC-TLS. + * + * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _TYPES_QUIC_TLS_H +#define _TYPES_QUIC_TLS_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <openssl/evp.h> + +#include <import/ebtree.h> + +#include <haproxy/ncbuf-t.h> +#include <haproxy/quic_ack-t.h> +#include <haproxy/openssl-compat.h> + +/* It seems TLS 1.3 ciphersuites macros differ between openssl and boringssl */ + +#if defined(OPENSSL_IS_BORINGSSL) || defined(OPENSSL_IS_AWSLC) +#if !defined(TLS1_3_CK_AES_128_GCM_SHA256) +#define TLS1_3_CK_AES_128_GCM_SHA256 TLS1_CK_AES_128_GCM_SHA256 +#endif +#if !defined(TLS1_3_CK_AES_256_GCM_SHA384) +#define TLS1_3_CK_AES_256_GCM_SHA384 TLS1_CK_AES_256_GCM_SHA384 +#endif +#if !defined(TLS1_3_CK_CHACHA20_POLY1305_SHA256) +#define TLS1_3_CK_CHACHA20_POLY1305_SHA256 TLS1_CK_CHACHA20_POLY1305_SHA256 +#endif +#if !defined(TLS1_3_CK_AES_128_CCM_SHA256) +/* Note that TLS1_CK_AES_128_CCM_SHA256 is not defined in boringssl */ +#define TLS1_3_CK_AES_128_CCM_SHA256 0x03001304 +#endif +#endif + +/* AEAD iv and secrete key lengths */ +#define QUIC_TLS_IV_LEN 12 /* bytes */ +#define QUIC_TLS_KEY_LEN 32 /* bytes */ +#define QUIC_TLS_SECRET_LEN 48 /* bytes */ +/* The ciphersuites for AEAD QUIC-TLS have 16-bytes authentication tags */ +#define QUIC_TLS_TAG_LEN 16 /* bytes */ + +/* The TLS extensions for QUIC transport parameters */ +#define TLS_EXTENSION_QUIC_TRANSPORT_PARAMETERS 0x0039 +#define TLS_EXTENSION_QUIC_TRANSPORT_PARAMETERS_DRAFT 0xffa5 + +extern struct pool_head *pool_head_quic_pktns; +extern struct pool_head *pool_head_quic_enc_level; +extern struct pool_head *pool_head_quic_tls_ctx; +extern struct pool_head *pool_head_quic_tls_secret; +extern struct pool_head *pool_head_quic_tls_iv; +extern struct pool_head *pool_head_quic_tls_key; + +#define QUIC_HKDF_KEY_LABEL_V1 "quic key" +#define QUIC_HKDF_IV_LABEL_V1 "quic iv" +#define QUIC_HKDF_HP_LABEL_V1 "quic hp" +#define QUIC_HKDF_KU_LABEL_V1 "quic ku" + +#define QUIC_HKDF_KEY_LABEL_V2 "quicv2 key" +#define QUIC_HKDF_IV_LABEL_V2 "quicv2 iv" +#define QUIC_HKDF_HP_LABEL_V2 "quicv2 hp" +#define QUIC_HKDF_KU_LABEL_V2 "quicv2 ku" + +#define QUIC_TLS_RETRY_KEY_DRAFT \ + "\xcc\xce\x18\x7e\xd0\x9a\x09\xd0\x57\x28\x15\x5a\x6c\xb9\x6b\xe1" +#define QUIC_TLS_RETRY_NONCE_DRAFT \ + "\xe5\x49\x30\xf9\x7f\x21\x36\xf0\x53\x0a\x8c\x1c" +#define QUIC_TLS_RETRY_KEY_V1 \ + "\xbe\x0c\x69\x0b\x9f\x66\x57\x5a\x1d\x76\x6b\x54\xe3\x68\xc8\x4e" +#define QUIC_TLS_RETRY_NONCE_V1 \ + "\x46\x15\x99\xd3\x5d\x63\x2b\xf2\x23\x98\x25\xbb" +#define QUIC_TLS_RETRY_KEY_V2 \ + "\x8f\xb4\xb0\x1b\x56\xac\x48\xe2\x60\xfb\xcb\xce\xad\x7c\xcc\x92" +#define QUIC_TLS_RETRY_NONCE_V2 \ + "\xd8\x69\x69\xbc\x2d\x7c\x6d\x99\x90\xef\xb0\x4a" + +/* QUIC handshake states for both clients and servers. */ +enum quic_handshake_state { + QUIC_HS_ST_CLIENT_HANDSHAKE_FAILED, + QUIC_HS_ST_SERVER_HANDSHAKE_FAILED, + + QUIC_HS_ST_CLIENT_INITIAL, + QUIC_HS_ST_CLIENT_HANDSHAKE, + + QUIC_HS_ST_SERVER_INITIAL, + QUIC_HS_ST_SERVER_HANDSHAKE, + + /* Common to servers and clients */ + QUIC_HS_ST_COMPLETE, + QUIC_HS_ST_CONFIRMED, +}; + +/* QUIC TLS level encryption */ +enum quic_tls_enc_level { + QUIC_TLS_ENC_LEVEL_NONE = -1, + QUIC_TLS_ENC_LEVEL_INITIAL, + QUIC_TLS_ENC_LEVEL_EARLY_DATA, + QUIC_TLS_ENC_LEVEL_HANDSHAKE, + QUIC_TLS_ENC_LEVEL_APP, + /* Please do not insert any value after this following one */ + QUIC_TLS_ENC_LEVEL_MAX, +}; + +/* QUIC packet number spaces */ +enum quic_tls_pktns { + QUIC_TLS_PKTNS_INITIAL, + QUIC_TLS_PKTNS_HANDSHAKE, + QUIC_TLS_PKTNS_01RTT, + /* Please do not insert any value after this following one */ + QUIC_TLS_PKTNS_MAX, +}; + +extern unsigned char initial_salt[20]; +extern const unsigned char initial_salt_draft_29[20]; +extern const unsigned char initial_salt_v1[20]; +extern const unsigned char initial_salt_v2[20]; + +/* QUIC packet number space */ +struct quic_pktns { + struct list list; + struct { + /* List of frames to send. */ + struct list frms; + /* Next packet number to use for transmissions. */ + int64_t next_pn; + /* The packet which has been sent. */ + struct eb_root pkts; + /* The time the most recent ack-eliciting packer was sent. */ + unsigned int time_of_last_eliciting; + /* The time this packet number space has experienced packet loss. */ + unsigned int loss_time; + /* Boolean to denote if we must send probe packet. */ + unsigned int pto_probe; + /* In flight bytes for this packet number space. */ + size_t in_flight; + /* The acknowledgement delay of the packet with the largest packet number */ + uint64_t ack_delay; + } tx; + struct { + /* Largest packet number */ + int64_t largest_pn; + /* Largest acked sent packet. */ + int64_t largest_acked_pn; + struct quic_arngs arngs; + unsigned int nb_aepkts_since_last_ack; + /* The time the packet with the largest packet number was received */ + uint64_t largest_time_received; + } rx; + unsigned int flags; +}; + +/* Key phase used for Key Update */ +struct quic_tls_kp { + EVP_CIPHER_CTX *ctx; + unsigned char *secret; + size_t secretlen; + unsigned char *iv; + size_t ivlen; + unsigned char *key; + size_t keylen; + uint64_t count; + int64_t pn; + unsigned char flags; +}; + +/* Key update phase bit */ +#define QUIC_FL_TLS_KP_BIT_SET (1 << 0) + +struct quic_tls_secrets { + EVP_CIPHER_CTX *ctx; + const EVP_CIPHER *aead; + const EVP_MD *md; + EVP_CIPHER_CTX *hp_ctx; + const EVP_CIPHER *hp; + unsigned char *secret; + size_t secretlen; + /* Header protection key. + * Note: the header protection is applied after packet protection. + * As the header belong to the data, its protection must be removed before removing + * the packet protection. + */ + unsigned char hp_key[32]; + unsigned char *iv; + size_t ivlen; + unsigned char *key; + size_t keylen; + /* Used only on the RX part to store the largest received packet number */ + int64_t pn; +}; + +struct quic_tls_ctx { + struct quic_tls_secrets rx; + struct quic_tls_secrets tx; + unsigned char flags; +}; + +#define QUIC_CRYPTO_BUF_SHIFT 10 +#define QUIC_CRYPTO_BUF_MASK ((1UL << QUIC_CRYPTO_BUF_SHIFT) - 1) +/* The maximum allowed size of CRYPTO data buffer provided by the TLS stack. */ +#define QUIC_CRYPTO_BUF_SZ (1UL << QUIC_CRYPTO_BUF_SHIFT) /* 1 KB */ + +extern struct pool_head *pool_head_quic_crypto_buf; + +/* + * CRYPTO buffer struct. + * Such buffers are used to send CRYPTO data. + */ +struct quic_crypto_buf { + unsigned char data[QUIC_CRYPTO_BUF_SZ]; + size_t sz; +}; + +/* Crypto data stream (one by encryption level) */ +struct quic_cstream { + struct { + uint64_t offset; /* absolute current base offset of ncbuf */ + struct ncbuf ncbuf; /* receive buffer - can handle out-of-order offset frames */ + } rx; + struct { + uint64_t offset; /* last offset of data ready to be sent */ + uint64_t sent_offset; /* last offset sent by transport layer */ + struct buffer buf; /* transmit buffer before sending via xprt */ + } tx; + + struct qc_stream_desc *desc; +}; + +struct quic_enc_level { + struct list list; + /* Attach point to enqueue this encryption level during retransmissions */ + struct list retrans; + /* pointer to list used only during retransmissions */ + struct list *retrans_frms; + /* Encryption level, as defined by the TLS stack. */ + enum ssl_encryption_level_t level; + /* TLS encryption context (AEAD only) */ + struct quic_tls_ctx tls_ctx; + + /* RX part */ + struct { + /* The packets received by the listener I/O handler + * with header protection removed. + */ + struct eb_root pkts; + /* List of QUIC packets with protected header. */ + struct list pqpkts; + /* List of crypto frames received in order. */ + struct list crypto_frms; + } rx; + + /* TX part */ + struct { + struct { + /* Array of CRYPTO data buffers */ + struct quic_crypto_buf **bufs; + /* The number of element in use in the previous array. */ + size_t nb_buf; + /* The total size of the CRYPTO data stored in the CRYPTO buffers. */ + size_t sz; + /* The offset of the CRYPT0 data stream. */ + uint64_t offset; + } crypto; + } tx; + + /* Crypto data stream */ + struct quic_cstream *cstream; + /* Packet number space */ + struct quic_pktns *pktns; +}; + +#endif /* USE_QUIC */ +#endif /* _TYPES_QUIC_TLS_H */ + diff --git a/include/haproxy/quic_tls.h b/include/haproxy/quic_tls.h new file mode 100644 index 0000000..86b8c1e --- /dev/null +++ b/include/haproxy/quic_tls.h @@ -0,0 +1,1116 @@ +/* + * include/proto/quic_tls.h + * This file provides definitions for QUIC-TLS. + * + * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _PROTO_QUIC_TLS_H +#define _PROTO_QUIC_TLS_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <stdlib.h> +#include <string.h> + +#include <haproxy/dynbuf.h> +#include <haproxy/pool.h> +#include <haproxy/openssl-compat.h> +#include <haproxy/quic_conn.h> +#include <haproxy/quic_frame.h> +#include <haproxy/quic_tls-t.h> +#include <haproxy/quic_tx.h> +#include <haproxy/quic_trace.h> +#include <haproxy/trace.h> + +int quic_tls_finalize(struct quic_conn *qc, int server); +void quic_tls_ctx_free(struct quic_tls_ctx **ctx); +void quic_pktns_release(struct quic_conn *qc, struct quic_pktns **pktns); +int qc_enc_level_alloc(struct quic_conn *qc, struct quic_pktns **pktns, + struct quic_enc_level **qel, enum ssl_encryption_level_t level); +void qc_enc_level_free(struct quic_conn *qc, struct quic_enc_level **qel); + +void quic_tls_keys_hexdump(struct buffer *buf, + const struct quic_tls_secrets *secs); +void quic_tls_kp_keys_hexdump(struct buffer *buf, + const struct quic_tls_kp *kp); + +void quic_conn_enc_level_uninit(struct quic_conn *qc, struct quic_enc_level *qel); +void quic_tls_secret_hexdump(struct buffer *buf, + const unsigned char *secret, size_t secret_len); + +int quic_derive_initial_secret(const EVP_MD *md, + const unsigned char *initial_salt, size_t initial_salt_sz, + unsigned char *initial_secret, size_t initial_secret_sz, + const unsigned char *secret, size_t secret_sz); + +int quic_tls_derive_initial_secrets(const EVP_MD *md, + unsigned char *rx, size_t rx_sz, + unsigned char *tx, size_t tx_sz, + const unsigned char *secret, size_t secret_sz, + int server); + +int quic_tls_encrypt(unsigned char *buf, size_t len, + const unsigned char *aad, size_t aad_len, + EVP_CIPHER_CTX *ctx, const EVP_CIPHER *aead, + const unsigned char *iv); + +int quic_tls_decrypt2(unsigned char *out, + unsigned char *in, size_t ilen, + unsigned char *aad, size_t aad_len, + EVP_CIPHER_CTX *ctx, const EVP_CIPHER *aead, + const unsigned char *key, const unsigned char *iv); + +int quic_tls_decrypt(unsigned char *buf, size_t len, + unsigned char *aad, size_t aad_len, + EVP_CIPHER_CTX *tls_ctx, const EVP_CIPHER *aead, + const unsigned char *key, const unsigned char *iv); + +int quic_tls_generate_retry_integrity_tag(unsigned char *odcid, unsigned char odcid_len, + unsigned char *buf, size_t len, + const struct quic_version *qv); + +int quic_tls_derive_keys(const EVP_CIPHER *aead, const EVP_CIPHER *hp, + const EVP_MD *md, const struct quic_version *qv, + unsigned char *key, size_t keylen, + unsigned char *iv, size_t ivlen, + unsigned char *hp_key, size_t hp_keylen, + const unsigned char *secret, size_t secretlen); + +int quic_tls_derive_retry_token_secret(const EVP_MD *md, + unsigned char *key, size_t keylen, + unsigned char *iv, size_t ivlen, + const unsigned char *salt, size_t saltlen, + const unsigned char *secret, size_t secretlen); + +int quic_hkdf_expand(const EVP_MD *md, + unsigned char *buf, size_t buflen, + const unsigned char *key, size_t keylen, + const unsigned char *label, size_t labellen); + +int quic_hkdf_expand_label(const EVP_MD *md, + unsigned char *buf, size_t buflen, + const unsigned char *key, size_t keylen, + const unsigned char *label, size_t labellen); + +int quic_hkdf_extract_and_expand(const EVP_MD *md, + unsigned char *buf, size_t buflen, + const unsigned char *key, size_t keylen, + const unsigned char *salt, size_t saltlen, + const unsigned char *label, size_t labellen); + +int quic_tls_rx_ctx_init(EVP_CIPHER_CTX **rx_ctx, + const EVP_CIPHER *aead, unsigned char *key); +int quic_tls_tx_ctx_init(EVP_CIPHER_CTX **tx_ctx, + const EVP_CIPHER *aead, unsigned char *key); + +int quic_tls_sec_update(const EVP_MD *md, const struct quic_version *qv, + unsigned char *new_sec, size_t new_seclen, + const unsigned char *sec, size_t seclen); + +void quic_aead_iv_build(unsigned char *iv, size_t ivlen, + unsigned char *aead_iv, size_t aead_ivlen, uint64_t pn); + +/* HP protection (AES) */ +int quic_tls_dec_aes_ctx_init(EVP_CIPHER_CTX **aes_ctx, + const EVP_CIPHER *aes, unsigned char *key); +int quic_tls_enc_aes_ctx_init(EVP_CIPHER_CTX **aes_ctx, + const EVP_CIPHER *aes, unsigned char *key); +int quic_tls_aes_decrypt(unsigned char *out, + const unsigned char *in, size_t inlen, + EVP_CIPHER_CTX *ctx); +int quic_tls_aes_encrypt(unsigned char *out, + const unsigned char *in, size_t inlen, + EVP_CIPHER_CTX *ctx); + +int quic_tls_key_update(struct quic_conn *qc); +void quic_tls_rotate_keys(struct quic_conn *qc); + +static inline const EVP_CIPHER *tls_aead(const SSL_CIPHER *cipher) +{ + switch (SSL_CIPHER_get_id(cipher)) { + case TLS1_3_CK_AES_128_GCM_SHA256: + return EVP_aes_128_gcm(); + case TLS1_3_CK_AES_256_GCM_SHA384: + return EVP_aes_256_gcm(); +#if !defined(OPENSSL_IS_AWSLC) + case TLS1_3_CK_CHACHA20_POLY1305_SHA256: + return EVP_chacha20_poly1305(); +#endif +#if !defined(USE_OPENSSL_WOLFSSL) && !defined(OPENSSL_IS_AWSLC) + case TLS1_3_CK_AES_128_CCM_SHA256: + return EVP_aes_128_ccm(); +#endif + default: + return NULL; + } +} + +static inline const EVP_MD *tls_md(const SSL_CIPHER *cipher) +{ + switch (SSL_CIPHER_get_id(cipher)) { + case TLS1_3_CK_AES_128_GCM_SHA256: + case TLS1_3_CK_AES_128_CCM_SHA256: + case TLS1_3_CK_CHACHA20_POLY1305_SHA256: + return EVP_sha256(); + case TLS1_3_CK_AES_256_GCM_SHA384: + return EVP_sha384(); + default: + return NULL; + } +} + +static inline const EVP_CIPHER *tls_hp(const SSL_CIPHER *cipher) +{ + switch (SSL_CIPHER_get_id(cipher)) { +#if !defined(OPENSSL_IS_AWSLC) + case TLS1_3_CK_CHACHA20_POLY1305_SHA256: + return EVP_chacha20(); +#endif + case TLS1_3_CK_AES_128_CCM_SHA256: + case TLS1_3_CK_AES_128_GCM_SHA256: + return EVP_aes_128_ctr(); + case TLS1_3_CK_AES_256_GCM_SHA384: + return EVP_aes_256_ctr(); + default: + return NULL; + } + +} + +/* These following functions map TLS implementation encryption level to ours */ +static inline struct quic_pktns **ssl_to_quic_pktns(struct quic_conn *qc, + enum ssl_encryption_level_t level) +{ + switch (level) { + case ssl_encryption_initial: + return &qc->ipktns; + case ssl_encryption_early_data: + return &qc->apktns; + case ssl_encryption_handshake: + return &qc->hpktns; + case ssl_encryption_application: + return &qc->apktns; + default: + return NULL; + } +} + +/* These following functions map TLS implementation encryption level to ours */ +static inline struct quic_pktns **qel_to_quic_pktns(struct quic_conn *qc, + enum quic_tls_enc_level level) +{ + switch (level) { + case QUIC_TLS_ENC_LEVEL_INITIAL: + return &qc->ipktns; + case QUIC_TLS_ENC_LEVEL_EARLY_DATA: + return &qc->apktns; + case QUIC_TLS_ENC_LEVEL_HANDSHAKE: + return &qc->hpktns; + case QUIC_TLS_ENC_LEVEL_APP: + return &qc->apktns; + default: + return NULL; + } +} + +/* Map <level> TLS stack encryption level to our internal QUIC TLS encryption level + * if succeeded, or -1 if failed. + */ +static inline enum quic_tls_enc_level ssl_to_quic_enc_level(enum ssl_encryption_level_t level) +{ + switch (level) { + case ssl_encryption_initial: + return QUIC_TLS_ENC_LEVEL_INITIAL; + case ssl_encryption_early_data: + return QUIC_TLS_ENC_LEVEL_EARLY_DATA; + case ssl_encryption_handshake: + return QUIC_TLS_ENC_LEVEL_HANDSHAKE; + case ssl_encryption_application: + return QUIC_TLS_ENC_LEVEL_APP; + default: + return -1; + } +} + +/* Return the address of the QUIC TLS encryption level associated to <level> TLS + * stack encryption level and attached to <qc> QUIC connection if succeeded, or + * NULL if failed. + */ +static inline struct quic_enc_level **ssl_to_qel_addr(struct quic_conn *qc, + enum ssl_encryption_level_t level) +{ + switch (level) { + case ssl_encryption_initial: + return &qc->iel; + case ssl_encryption_early_data: + return &qc->eel; + case ssl_encryption_handshake: + return &qc->hel; + case ssl_encryption_application: + return &qc->ael; + default: + return NULL; + } +} + +/* Return the address of the QUIC TLS encryption level associated to <level> internal + * encryption level and attached to <qc> QUIC connection if succeeded, or + * NULL if failed. + */ +static inline struct quic_enc_level **qel_to_qel_addr(struct quic_conn *qc, + enum quic_tls_enc_level level) +{ + switch (level) { + case QUIC_TLS_ENC_LEVEL_INITIAL: + return &qc->iel; + case QUIC_TLS_ENC_LEVEL_EARLY_DATA: + return &qc->eel; + case QUIC_TLS_ENC_LEVEL_HANDSHAKE: + return &qc->hel; + case QUIC_TLS_ENC_LEVEL_APP: + return &qc->ael; + default: + return NULL; + } +} + +/* Return the QUIC TLS encryption level associated to <level> internal encryption + * level attached to <qc> QUIC connection if succeeded, or NULL if failed. + */ +static inline struct quic_enc_level *qc_quic_enc_level(const struct quic_conn *qc, + enum quic_tls_enc_level level) +{ + switch (level) { + case QUIC_TLS_ENC_LEVEL_INITIAL: + return qc->iel; + case QUIC_TLS_ENC_LEVEL_EARLY_DATA: + return qc->eel; + case QUIC_TLS_ENC_LEVEL_HANDSHAKE: + return qc->hel; + case QUIC_TLS_ENC_LEVEL_APP: + return qc->ael; + default: + return NULL; + } +} + +/* These two following functions map our encryption level to the TLS implementation ones. */ +static inline enum ssl_encryption_level_t quic_to_ssl_enc_level(enum quic_tls_enc_level level) +{ + switch (level) { + case QUIC_TLS_ENC_LEVEL_INITIAL: + return ssl_encryption_initial; + case QUIC_TLS_ENC_LEVEL_EARLY_DATA: + return ssl_encryption_early_data; + case QUIC_TLS_ENC_LEVEL_HANDSHAKE: + return ssl_encryption_handshake; + case QUIC_TLS_ENC_LEVEL_APP: + return ssl_encryption_application; + default: + return -1; + } +} + +/* Return a human readable string from <state> QUIC handshake state of NULL + * for unknown state values (for debug purpose). + */ +static inline char *quic_hdshk_state_str(const enum quic_handshake_state state) +{ + switch (state) { + case QUIC_HS_ST_CLIENT_INITIAL: + return "CI"; + case QUIC_HS_ST_CLIENT_HANDSHAKE: + return "CH"; + case QUIC_HS_ST_CLIENT_HANDSHAKE_FAILED: + return "CF"; + case QUIC_HS_ST_SERVER_INITIAL: + return "SI"; + case QUIC_HS_ST_SERVER_HANDSHAKE: + return "SH"; + case QUIC_HS_ST_SERVER_HANDSHAKE_FAILED: + return "SF"; + case QUIC_HS_ST_COMPLETE: + return "HCP"; + case QUIC_HS_ST_CONFIRMED: + return "HCF"; + } + + return NULL; +} + +/* Return a human readable string from <err> SSL error (returned from + * SSL_get_error()) + */ +static inline const char *ssl_error_str(int err) +{ + switch (err) { + case SSL_ERROR_NONE: + return "NONE"; + case SSL_ERROR_SSL: + return "SSL"; + case SSL_ERROR_WANT_READ: + return "WANT_READ"; + case SSL_ERROR_WANT_WRITE: + return "WANT_WRITE"; + case SSL_ERROR_WANT_X509_LOOKUP: + return "X509_LOOKUP"; + case SSL_ERROR_SYSCALL: + return "SYSCALL"; + case SSL_ERROR_ZERO_RETURN: + return "ZERO_RETURN"; + case SSL_ERROR_WANT_CONNECT: + return "WANT_CONNECT"; + case SSL_ERROR_WANT_ACCEPT: + return "WANT_ACCEPT"; +#if !defined(LIBRESSL_VERSION_NUMBER) && !defined(USE_OPENSSL_WOLFSSL) && !defined(OPENSSL_IS_AWSLC) + case SSL_ERROR_WANT_ASYNC: + return "WANT_ASYNC"; + case SSL_ERROR_WANT_ASYNC_JOB: + return "WANT_ASYNC_JOB"; + case SSL_ERROR_WANT_CLIENT_HELLO_CB: + return "WANT_CLIENT_HELLO_CB"; +#endif + default: + return "UNKNOWN"; + } +} + + +/* Return a character identifying the encryption level from <level> QUIC TLS + * encryption level (for debug purpose). + * Initial -> 'I', Early Data -> 'E', Handshake -> 'H', Application -> 'A' and + * '-' if undefined. + */ +static inline char quic_enc_level_char(enum quic_tls_enc_level level) +{ + switch (level) { + case QUIC_TLS_ENC_LEVEL_INITIAL: + return 'I'; + case QUIC_TLS_ENC_LEVEL_EARLY_DATA: + return 'E'; + case QUIC_TLS_ENC_LEVEL_HANDSHAKE: + return 'H'; + case QUIC_TLS_ENC_LEVEL_APP: + return 'A'; + default: + return '-'; + } +} + +/* Return a character identifying <qel> encryption level from <qc> QUIC connection + * (for debug purpose). + * Initial -> 'I', Early Data -> 'E', Handshake -> 'H', Application -> 'A' and + * '-' if undefined. + */ +static inline char quic_enc_level_char_from_qel(const struct quic_enc_level *qel, + const struct quic_conn *qc) +{ + if (qel == qc->iel) + return 'I'; + else if (qel == qc->eel) + return 'E'; + else if (qel == qc->hel) + return 'H'; + else if (qel == qc->ael) + return 'A'; + return '-'; +} + +/* Return a character identifying the encryption level of a packet depending on + * its <type> type, and its <long_header> header length (for debug purpose). + * Initial -> 'I', ORTT -> '0', Handshake -> 'H', Application -> 'A' and + * '-' if undefined. + */ +static inline char quic_packet_type_enc_level_char(int packet_type) +{ + switch (packet_type) { + case QUIC_PACKET_TYPE_INITIAL: + return 'I'; + case QUIC_PACKET_TYPE_0RTT: + return '0'; + case QUIC_PACKET_TYPE_HANDSHAKE: + return 'H'; + case QUIC_PACKET_TYPE_SHORT: + return 'A'; + default: + return '-'; + } +} + +/* Initialize a QUIC packet number space. + * Never fails. + */ +static inline int quic_pktns_init(struct quic_conn *qc, struct quic_pktns **p) +{ + struct quic_pktns *pktns; + + pktns = pool_alloc(pool_head_quic_pktns); + if (!pktns) + return 0; + + LIST_INIT(&pktns->tx.frms); + pktns->tx.next_pn = -1; + pktns->tx.pkts = EB_ROOT_UNIQUE; + pktns->tx.time_of_last_eliciting = 0; + pktns->tx.loss_time = TICK_ETERNITY; + pktns->tx.pto_probe = 0; + pktns->tx.in_flight = 0; + pktns->tx.ack_delay = 0; + + pktns->rx.largest_pn = -1; + pktns->rx.largest_acked_pn = -1; + pktns->rx.arngs.root = EB_ROOT_UNIQUE; + pktns->rx.arngs.sz = 0; + pktns->rx.arngs.enc_sz = 0; + pktns->rx.nb_aepkts_since_last_ack = 0; + pktns->rx.largest_time_received = 0; + + pktns->flags = 0; + if (p == &qc->hpktns && qc->apktns) + LIST_INSERT(&qc->ipktns->list, &pktns->list); + else + LIST_APPEND(&qc->pktns_list, &pktns->list); + *p = pktns; + + return 1; +} + +static inline void quic_pktns_tx_pkts_release(struct quic_pktns *pktns, struct quic_conn *qc) +{ + struct eb64_node *node; + + TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc); + + node = eb64_first(&pktns->tx.pkts); + while (node) { + struct quic_tx_packet *pkt; + struct quic_frame *frm, *frmbak; + + pkt = eb64_entry(node, struct quic_tx_packet, pn_node); + node = eb64_next(node); + if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING) + qc->path->ifae_pkts--; + list_for_each_entry_safe(frm, frmbak, &pkt->frms, list) { + TRACE_DEVEL("freeing frame from packet", + QUIC_EV_CONN_PRSAFRM, qc, frm, &pkt->pn_node.key); + qc_frm_unref(frm, qc); + LIST_DEL_INIT(&frm->list); + quic_tx_packet_refdec(frm->pkt); + qc_frm_free(qc, &frm); + } + eb64_delete(&pkt->pn_node); + quic_tx_packet_refdec(pkt); + } + + TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, qc); +} + +/* Discard <pktns> packet number space attached to <qc> QUIC connection. + * Its loss information are reset. Deduce the outstanding bytes for this + * packet number space from the outstanding bytes for the path of this + * connection. + * Note that all the non acknowledged TX packets and their frames are freed. + * Always succeeds. + */ +static inline void quic_pktns_discard(struct quic_pktns *pktns, + struct quic_conn *qc) +{ + TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc); + + if (pktns == qc->ipktns) + qc->flags |= QUIC_FL_CONN_IPKTNS_DCD; + else if (pktns == qc->hpktns) + qc->flags |= QUIC_FL_CONN_HPKTNS_DCD; + qc->path->in_flight -= pktns->tx.in_flight; + qc->path->prep_in_flight -= pktns->tx.in_flight; + qc->path->loss.pto_count = 0; + + pktns->tx.time_of_last_eliciting = 0; + pktns->tx.loss_time = TICK_ETERNITY; + pktns->tx.pto_probe = 0; + pktns->tx.in_flight = 0; + quic_pktns_tx_pkts_release(pktns, qc); + + TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, qc); +} + + +/* Release all the frames attached to <pktns> packet number space */ +static inline void qc_release_pktns_frms(struct quic_conn *qc, + struct quic_pktns *pktns) +{ + struct quic_frame *frm, *frmbak; + + TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc); + + if (!pktns) + goto leave; + + list_for_each_entry_safe(frm, frmbak, &pktns->tx.frms, list) + qc_frm_free(qc, &frm); + + leave: + TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, qc); +} + +/* Return 1 if <pktns> matches with the Application packet number space of + * <conn> connection which is common to the 0-RTT and 1-RTT encryption levels, 0 + * if not (handshake packets). + */ +static inline int quic_application_pktns(struct quic_pktns *pktns, struct quic_conn *qc) +{ + return pktns == qc->apktns; +} + +/* Returns the current largest acknowledged packet number if exists, -1 if not */ +static inline int64_t quic_pktns_get_largest_acked_pn(struct quic_pktns *pktns) +{ + struct eb64_node *ar = eb64_last(&pktns->rx.arngs.root); + + if (!ar) + return -1; + + return eb64_entry(ar, struct quic_arng_node, first)->last; +} + +/* Return a character to identify the packet number space <pktns> of <qc> QUIC + * connection. 'I' for Initial packet number space, 'H' for Handshake packet + * space, and 'A' for Application data number space, or '-' if not found. + */ +static inline char quic_pktns_char(const struct quic_conn *qc, + const struct quic_pktns *pktns) +{ + if (pktns == qc->apktns) + return 'A'; + else if (pktns == qc->hpktns) + return 'H'; + else if (pktns == qc->ipktns) + return 'I'; + + return '-'; +} + +/* Return the TLS encryption level to be used for <packet_type> + * QUIC packet type. + * Returns -1 if there is no TLS encryption level for <packet_type> + * packet type. + */ +static inline enum quic_tls_enc_level quic_packet_type_enc_level(enum quic_pkt_type packet_type) +{ + switch (packet_type) { + case QUIC_PACKET_TYPE_INITIAL: + return QUIC_TLS_ENC_LEVEL_INITIAL; + case QUIC_PACKET_TYPE_0RTT: + return QUIC_TLS_ENC_LEVEL_EARLY_DATA; + case QUIC_PACKET_TYPE_HANDSHAKE: + return QUIC_TLS_ENC_LEVEL_HANDSHAKE; + case QUIC_PACKET_TYPE_RETRY: + return QUIC_TLS_ENC_LEVEL_NONE; + case QUIC_PACKET_TYPE_SHORT: + return QUIC_TLS_ENC_LEVEL_APP; + default: + return QUIC_TLS_ENC_LEVEL_NONE; + } +} + +static inline enum quic_tls_pktns quic_tls_pktns(enum quic_tls_enc_level level) +{ + switch (level) { + case QUIC_TLS_ENC_LEVEL_INITIAL: + return QUIC_TLS_PKTNS_INITIAL; + case QUIC_TLS_ENC_LEVEL_EARLY_DATA: + case QUIC_TLS_ENC_LEVEL_APP: + return QUIC_TLS_PKTNS_01RTT; + case QUIC_TLS_ENC_LEVEL_HANDSHAKE: + return QUIC_TLS_PKTNS_HANDSHAKE; + default: + return -1; + } +} + +/* Return 1 if <pktns> packet number space attached to <qc> connection has been discarded, + * 0 if not. + */ +static inline int quic_tls_pktns_is_dcd(struct quic_conn *qc, struct quic_pktns *pktns) +{ + if (pktns == qc->apktns) + return 0; + + if ((pktns == qc->ipktns && (qc->flags & QUIC_FL_CONN_IPKTNS_DCD)) || + (pktns == qc->hpktns && (qc->flags & QUIC_FL_CONN_HPKTNS_DCD))) + return 1; + + return 0; +} + +/* Return 1 the packet number space attached to <qc> connection with <type> associated + * packet type has been discarded, 0 if not. + */ +static inline int quic_tls_pkt_type_pktns_dcd(struct quic_conn *qc, unsigned char type) +{ + if ((type == QUIC_PACKET_TYPE_INITIAL && (qc->flags & QUIC_FL_CONN_IPKTNS_DCD)) || + (type == QUIC_PACKET_TYPE_HANDSHAKE && (qc->flags & QUIC_FL_CONN_HPKTNS_DCD))) + return 1; + + return 0; +} + +/* Select the correct TLS cipher context to used to decipher an RX packet + * with <type> as type and <version> as version and attached to <qc> + * connection from <qel> encryption level. + */ +static inline struct quic_tls_ctx *qc_select_tls_ctx(struct quic_conn *qc, + struct quic_enc_level *qel, + unsigned char type, + const struct quic_version *version) +{ + return type != QUIC_PACKET_TYPE_INITIAL ? &qel->tls_ctx : + version == qc->negotiated_version ? qc->nictx : &qel->tls_ctx; +} + +/* Reset all members of <ctx> to default values, ->hp_key[] excepted */ +static inline void quic_tls_ctx_reset(struct quic_tls_ctx *ctx) +{ + ctx->rx.ctx = NULL; + ctx->rx.aead = NULL; + ctx->rx.md = NULL; + ctx->rx.hp_ctx = NULL; + ctx->rx.hp = NULL; + ctx->rx.secret = NULL; + ctx->rx.secretlen = 0; + ctx->rx.iv = NULL; + ctx->rx.ivlen = 0; + ctx->rx.key = NULL; + ctx->rx.keylen = 0; + ctx->rx.pn = 0; + + ctx->tx.ctx = NULL; + ctx->tx.aead = NULL; + ctx->tx.md = NULL; + ctx->tx.hp_ctx = NULL; + ctx->tx.hp = NULL; + ctx->tx.secret = NULL; + ctx->tx.secretlen = 0; + ctx->tx.iv = NULL; + ctx->tx.ivlen = 0; + ctx->tx.key = NULL; + ctx->tx.keylen = 0; + /* Not used on the TX path. */ + ctx->tx.pn = 0; + + ctx->flags = 0; +} + +/* Erase and free the secrets for a QUIC encryption level with <ctx> as + * context. + * Always succeeds. + */ +static inline void quic_tls_ctx_secs_free(struct quic_tls_ctx *ctx) +{ + if (!ctx) + return; + + if (ctx->rx.iv) { + memset(ctx->rx.iv, 0, ctx->rx.ivlen); + ctx->rx.ivlen = 0; + } + if (ctx->rx.key) { + memset(ctx->rx.key, 0, ctx->rx.keylen); + ctx->rx.keylen = 0; + } + if (ctx->tx.iv) { + memset(ctx->tx.iv, 0, ctx->tx.ivlen); + ctx->tx.ivlen = 0; + } + if (ctx->tx.key) { + memset(ctx->tx.key, 0, ctx->tx.keylen); + ctx->tx.keylen = 0; + } + + /* RX HP protection */ + EVP_CIPHER_CTX_free(ctx->rx.hp_ctx); + /* RX AEAD decryption */ + EVP_CIPHER_CTX_free(ctx->rx.ctx); + pool_free(pool_head_quic_tls_iv, ctx->rx.iv); + pool_free(pool_head_quic_tls_key, ctx->rx.key); + + /* TX HP protection */ + EVP_CIPHER_CTX_free(ctx->tx.hp_ctx); + /* TX AEAD encryption */ + EVP_CIPHER_CTX_free(ctx->tx.ctx); + pool_free(pool_head_quic_tls_iv, ctx->tx.iv); + pool_free(pool_head_quic_tls_key, ctx->tx.key); + + quic_tls_ctx_reset(ctx); +} + +/* Allocate the secrete keys for a QUIC encryption level with <ctx> as context. + * Returns 1 if succeeded, 0 if not. + */ +static inline int quic_tls_ctx_keys_alloc(struct quic_tls_ctx *ctx) +{ + if (ctx->rx.key) + goto write; + + if (!(ctx->rx.iv = pool_alloc(pool_head_quic_tls_iv)) || + !(ctx->rx.key = pool_alloc(pool_head_quic_tls_key))) + goto err; + + write: + if (ctx->tx.key) + goto out; + + if (!(ctx->tx.iv = pool_alloc(pool_head_quic_tls_iv)) || + !(ctx->tx.key = pool_alloc(pool_head_quic_tls_key))) + goto err; + + ctx->rx.ivlen = ctx->tx.ivlen = QUIC_TLS_IV_LEN; + ctx->rx.keylen = ctx->tx.keylen = QUIC_TLS_KEY_LEN; +out: + return 1; + + err: + quic_tls_ctx_secs_free(ctx); + return 0; +} + +/* Release the memory allocated for <secs> secrets */ +static inline void quic_tls_secrets_keys_free(struct quic_tls_secrets *secs) +{ + if (secs->iv) { + memset(secs->iv, 0, secs->ivlen); + secs->ivlen = 0; + } + + if (secs->key) { + memset(secs->key, 0, secs->keylen); + secs->keylen = 0; + } + + /* HP protection */ + EVP_CIPHER_CTX_free(secs->hp_ctx); + /* AEAD decryption */ + EVP_CIPHER_CTX_free(secs->ctx); + pool_free(pool_head_quic_tls_iv, secs->iv); + pool_free(pool_head_quic_tls_key, secs->key); + + secs->iv = secs->key = NULL; +} + +/* Allocate the memory for the <secs> secrets. + * Return 1 if succeeded, 0 if not. + */ +static inline int quic_tls_secrets_keys_alloc(struct quic_tls_secrets *secs) +{ + if (!(secs->iv = pool_alloc(pool_head_quic_tls_iv)) || + !(secs->key = pool_alloc(pool_head_quic_tls_key))) + goto err; + + secs->ivlen = QUIC_TLS_IV_LEN; + secs->keylen = QUIC_TLS_KEY_LEN; + + return 1; + + err: + quic_tls_secrets_keys_free(secs); + return 0; +} + +/* Release the memory allocated for the negotiated Initial QUIC TLS context + * attached to <qc> connection. + */ +static inline void quic_nictx_free(struct quic_conn *qc) +{ + quic_tls_ctx_secs_free(qc->nictx); + pool_free(pool_head_quic_tls_ctx, qc->nictx); + qc->nictx = NULL; +} + +/* Initialize a TLS cryptographic context for the Initial encryption level. */ +static inline int quic_initial_tls_ctx_init(struct quic_tls_ctx *ctx) +{ + ctx->rx.aead = ctx->tx.aead = EVP_aes_128_gcm(); + ctx->rx.md = ctx->tx.md = EVP_sha256(); + ctx->rx.hp = ctx->tx.hp = EVP_aes_128_ctr(); + + ctx->rx.iv = NULL; + ctx->rx.ivlen = 0; + ctx->rx.key = NULL; + ctx->rx.keylen = 0; + ctx->rx.secret = NULL; + ctx->rx.secretlen = 0; + + ctx->tx.iv = NULL; + ctx->tx.ivlen = 0; + ctx->tx.key = NULL; + ctx->tx.keylen = 0; + ctx->tx.secret = NULL; + ctx->tx.secretlen = 0; + + return quic_tls_ctx_keys_alloc(ctx); +} + +static inline int quic_tls_level_pkt_type(enum quic_tls_enc_level level) +{ + switch (level) { + case QUIC_TLS_ENC_LEVEL_INITIAL: + return QUIC_PACKET_TYPE_INITIAL; + case QUIC_TLS_ENC_LEVEL_EARLY_DATA: + return QUIC_PACKET_TYPE_0RTT; + case QUIC_TLS_ENC_LEVEL_HANDSHAKE: + return QUIC_PACKET_TYPE_HANDSHAKE; + case QUIC_TLS_ENC_LEVEL_APP: + return QUIC_PACKET_TYPE_SHORT; + default: + return -1; + } +} + +/* Return the packet type associated to <qel> encryption for <qc> QUIC connection, + * or -1 if not found. + */ +static inline enum quic_pkt_type quic_enc_level_pkt_type(struct quic_conn *qc, + struct quic_enc_level *qel) +{ + if (qel == qc->iel) + return QUIC_PACKET_TYPE_INITIAL; + else if (qel == qc->hel) + return QUIC_PACKET_TYPE_HANDSHAKE; + else if (qel == qc->eel) + return QUIC_PACKET_TYPE_0RTT; + else if (qel == qc->ael) + return QUIC_PACKET_TYPE_SHORT; + else + return -1; +} + +/* Derive the initial secrets with <ctx> as QUIC TLS context which is the + * cryptographic context for the first encryption level (Initial) from + * <cid> connection ID with <cidlen> as length (in bytes) for a server or not + * depending on <server> boolean value. + * Return 1 if succeeded or 0 if not. + */ +static inline int qc_new_isecs(struct quic_conn *qc, + struct quic_tls_ctx *ctx, const struct quic_version *ver, + const unsigned char *cid, size_t cidlen, int server) +{ + unsigned char initial_secret[32]; + /* Initial secret to be derived for incoming packets */ + unsigned char rx_init_sec[32]; + /* Initial secret to be derived for outgoing packets */ + unsigned char tx_init_sec[32]; + struct quic_tls_secrets *rx_ctx, *tx_ctx; + + TRACE_ENTER(QUIC_EV_CONN_ISEC); + if (!quic_initial_tls_ctx_init(ctx)) + goto err; + + if (!quic_derive_initial_secret(ctx->rx.md, + ver->initial_salt, ver->initial_salt_len, + initial_secret, sizeof initial_secret, + cid, cidlen)) + goto err; + + if (!quic_tls_derive_initial_secrets(ctx->rx.md, + rx_init_sec, sizeof rx_init_sec, + tx_init_sec, sizeof tx_init_sec, + initial_secret, sizeof initial_secret, server)) + goto err; + + rx_ctx = &ctx->rx; + tx_ctx = &ctx->tx; + if (!quic_tls_derive_keys(ctx->rx.aead, ctx->rx.hp, ctx->rx.md, ver, + rx_ctx->key, rx_ctx->keylen, + rx_ctx->iv, rx_ctx->ivlen, + rx_ctx->hp_key, sizeof rx_ctx->hp_key, + rx_init_sec, sizeof rx_init_sec)) + goto err; + + if (!quic_tls_rx_ctx_init(&rx_ctx->ctx, rx_ctx->aead, rx_ctx->key)) + goto err; + + if (!quic_tls_enc_aes_ctx_init(&rx_ctx->hp_ctx, rx_ctx->hp, rx_ctx->hp_key)) + goto err; + + if (!quic_tls_derive_keys(ctx->tx.aead, ctx->tx.hp, ctx->tx.md, ver, + tx_ctx->key, tx_ctx->keylen, + tx_ctx->iv, tx_ctx->ivlen, + tx_ctx->hp_key, sizeof tx_ctx->hp_key, + tx_init_sec, sizeof tx_init_sec)) + goto err; + + if (!quic_tls_tx_ctx_init(&tx_ctx->ctx, tx_ctx->aead, tx_ctx->key)) + goto err; + + if (!quic_tls_enc_aes_ctx_init(&tx_ctx->hp_ctx, tx_ctx->hp, tx_ctx->hp_key)) + goto err; + + TRACE_LEAVE(QUIC_EV_CONN_ISEC, qc, rx_init_sec, tx_init_sec); + + return 1; + + err: + TRACE_DEVEL("leaving in error", QUIC_EV_CONN_ISEC); + return 0; +} + +/* Reset all members of <tls_kp> to default values. */ +static inline void quic_tls_ku_reset(struct quic_tls_kp *tls_kp) +{ + tls_kp->ctx = NULL; + tls_kp->secret = NULL; + tls_kp->iv = NULL; + tls_kp->key = NULL; +} + +/* Release the memory allocated for all the key update key phase + * structures for <qc> QUIC connection. + * Always succeeds. + */ +static inline void quic_tls_ku_free(struct quic_conn *qc) +{ + EVP_CIPHER_CTX_free(qc->ku.prv_rx.ctx); + pool_free(pool_head_quic_tls_secret, qc->ku.prv_rx.secret); + pool_free(pool_head_quic_tls_iv, qc->ku.prv_rx.iv); + pool_free(pool_head_quic_tls_key, qc->ku.prv_rx.key); + quic_tls_ku_reset(&qc->ku.prv_rx); + EVP_CIPHER_CTX_free(qc->ku.nxt_rx.ctx); + pool_free(pool_head_quic_tls_secret, qc->ku.nxt_rx.secret); + pool_free(pool_head_quic_tls_iv, qc->ku.nxt_rx.iv); + pool_free(pool_head_quic_tls_key, qc->ku.nxt_rx.key); + quic_tls_ku_reset(&qc->ku.nxt_rx); + EVP_CIPHER_CTX_free(qc->ku.nxt_tx.ctx); + pool_free(pool_head_quic_tls_secret, qc->ku.nxt_tx.secret); + pool_free(pool_head_quic_tls_iv, qc->ku.nxt_tx.iv); + pool_free(pool_head_quic_tls_key, qc->ku.nxt_tx.key); + quic_tls_ku_reset(&qc->ku.nxt_tx); +} + +/* Initialize <kp> key update secrets, allocating the required memory. + * Return 1 if all the secrets could be allocated, 0 if not. + * This is the responsibility of the caller to release the memory + * allocated by this function in case of failure. + */ +static inline int quic_tls_kp_init(struct quic_tls_kp *kp) +{ + kp->count = 0; + kp->pn = 0; + kp->flags = 0; + kp->secret = pool_alloc(pool_head_quic_tls_secret); + kp->secretlen = QUIC_TLS_SECRET_LEN; + kp->iv = pool_alloc(pool_head_quic_tls_iv); + kp->ivlen = QUIC_TLS_IV_LEN; + kp->key = pool_alloc(pool_head_quic_tls_key); + kp->keylen = QUIC_TLS_KEY_LEN; + + return kp->secret && kp->iv && kp->key; +} + +/* Initialize all the key update key phase structures for <qc> + * QUIC connection, allocating the required memory. + * + * Returns 1 if succeeded, 0 if not. The caller is responsible to use + * quic_tls_ku_free() on error to cleanup partially allocated content. + */ +static inline int quic_tls_ku_init(struct quic_conn *qc) +{ + struct quic_tls_kp *prv_rx = &qc->ku.prv_rx; + struct quic_tls_kp *nxt_rx = &qc->ku.nxt_rx; + struct quic_tls_kp *nxt_tx = &qc->ku.nxt_tx; + + if (!quic_tls_kp_init(prv_rx) || + !quic_tls_kp_init(nxt_rx) || + !quic_tls_kp_init(nxt_tx)) + goto err; + + return 1; + + err: + return 0; +} + +/* Return 1 if <qel> has RX secrets, 0 if not. */ +static inline int quic_tls_has_rx_sec(const struct quic_enc_level *qel) +{ + return qel && !!qel->tls_ctx.rx.key; +} + +/* Return 1 if <qel> has TX secrets, 0 if not. */ +static inline int quic_tls_has_tx_sec(const struct quic_enc_level *qel) +{ + return qel && !!qel->tls_ctx.tx.key; +} + +/* Return 1 if there is RX packets for <qel> QUIC encryption level, 0 if not */ +static inline int qc_el_rx_pkts(struct quic_enc_level *qel) +{ + int ret; + + ret = !eb_is_empty(&qel->rx.pkts); + + return ret; +} + +/* Delete all RX packets for <qel> QUIC encryption level */ +static inline void qc_el_rx_pkts_del(struct quic_enc_level *qel) +{ + struct eb64_node *node; + + node = eb64_first(&qel->rx.pkts); + while (node) { + struct quic_rx_packet *pkt = + eb64_entry(node, struct quic_rx_packet, pn_node); + + node = eb64_next(node); + eb64_delete(&pkt->pn_node); + quic_rx_packet_refdec(pkt); + } +} + +static inline void qc_list_qel_rx_pkts(struct quic_enc_level *qel) +{ + struct eb64_node *node; + + node = eb64_first(&qel->rx.pkts); + while (node) { + struct quic_rx_packet *pkt; + + pkt = eb64_entry(node, struct quic_rx_packet, pn_node); + fprintf(stderr, "pkt@%p type=%d pn=%llu\n", + pkt, pkt->type, (ull)pkt->pn_node.key); + node = eb64_next(node); + } +} + +/* Returns a boolean if <qc> needs to emit frames for <qel> encryption level. */ +static inline int qc_need_sending(struct quic_conn *qc, struct quic_enc_level *qel) +{ + return (qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE) || + (qel->pktns->flags & QUIC_FL_PKTNS_ACK_REQUIRED) || + qel->pktns->tx.pto_probe || + !LIST_ISEMPTY(&qel->pktns->tx.frms); +} + +/* Return 1 if <qc> connection may probe the Initial packet number space, 0 if not. + * This is not the case if the remote peer address is not validated and if + * it cannot send at least QUIC_INITIAL_PACKET_MINLEN bytes. + */ +static inline int qc_may_probe_ipktns(struct quic_conn *qc) +{ + return quic_peer_validated_addr(qc) || + quic_may_send_bytes(qc) >= QUIC_INITIAL_PACKET_MINLEN; +} + + + +#endif /* USE_QUIC */ +#endif /* _PROTO_QUIC_TLS_H */ + diff --git a/include/haproxy/quic_tp-t.h b/include/haproxy/quic_tp-t.h new file mode 100644 index 0000000..4897441 --- /dev/null +++ b/include/haproxy/quic_tp-t.h @@ -0,0 +1,118 @@ +#ifndef _HAPROXY_QUIC_TP_T_H +#define _HAPROXY_QUIC_TP_T_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <inttypes.h> +#include <sys/socket.h> +#include <netinet/in.h> + +#define QUIC_STATELESS_RESET_TOKEN_LEN 16 + +/* Default QUIC connection transport parameters */ +extern struct quic_transport_params quic_dflt_transport_params; + +struct tp_cid { + uint8_t len; + uint8_t data[20]; +}; + +struct tp_preferred_address { + uint16_t ipv4_port; + uint16_t ipv6_port; + struct in_addr ipv4_addr; + struct in6_addr ipv6_addr; + struct tp_cid cid; + uint8_t stateless_reset_token[QUIC_STATELESS_RESET_TOKEN_LEN]; +}; + +struct tp_version_information { + uint32_t chosen; + const struct quic_version *negotiated_version; +}; + +/* Default values for the absent transport parameters */ +#define QUIC_TP_DFLT_MAX_UDP_PAYLOAD_SIZE 65527 /* bytes */ +#define QUIC_TP_DFLT_ACK_DELAY_COMPONENT 3 /* milliseconds */ +#define QUIC_TP_DFLT_MAX_ACK_DELAY 25 /* milliseconds */ +#define QUIC_TP_DFLT_ACTIVE_CONNECTION_ID_LIMIT 2 /* number of connections */ +/* These ones are our implementation default values when not set + * by configuration + */ +#define QUIC_TP_DFLT_FRONT_MAX_IDLE_TIMEOUT 30000 /* milliseconds */ +#define QUIC_TP_DFLT_FRONT_MAX_STREAMS_BIDI 100 +#define QUIC_TP_DFLT_BACK_MAX_IDLE_TIMEOUT 30000 /* milliseconds */ + +/* Types of QUIC transport parameters */ +#define QUIC_TP_ORIGINAL_DESTINATION_CONNECTION_ID 0x00 +#define QUIC_TP_MAX_IDLE_TIMEOUT 0x01 +#define QUIC_TP_STATELESS_RESET_TOKEN 0x02 +#define QUIC_TP_MAX_UDP_PAYLOAD_SIZE 0x03 +#define QUIC_TP_INITIAL_MAX_DATA 0x04 +#define QUIC_TP_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL 0x05 +#define QUIC_TP_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE 0x06 +#define QUIC_TP_INITIAL_MAX_STREAM_DATA_UNI 0x07 +#define QUIC_TP_INITIAL_MAX_STREAMS_BIDI 0x08 +#define QUIC_TP_INITIAL_MAX_STREAMS_UNI 0x09 +#define QUIC_TP_ACK_DELAY_EXPONENT 0x0a +#define QUIC_TP_MAX_ACK_DELAY 0x0b +#define QUIC_TP_DISABLE_ACTIVE_MIGRATION 0x0c +#define QUIC_TP_PREFERRED_ADDRESS 0x0d +#define QUIC_TP_ACTIVE_CONNECTION_ID_LIMIT 0x0e +#define QUIC_TP_INITIAL_SOURCE_CONNECTION_ID 0x0f +#define QUIC_TP_RETRY_SOURCE_CONNECTION_ID 0x10 +#define QUIC_TP_VERSION_INFORMATION 0x11 + +/* + * These defines are not for transport parameter type, but the maximum accepted value for + * transport parameter types. + */ +#define QUIC_TP_ACK_DELAY_EXPONENT_LIMIT 20 +#define QUIC_TP_MAX_ACK_DELAY_LIMIT (1UL << 14) + +/* The maximum length of encoded transport parameters for any QUIC peer. */ +#define QUIC_TP_MAX_ENCLEN 128 +/* + * QUIC transport parameters. + * Note that forbidden parameters sent by clients MUST generate TRANSPORT_PARAMETER_ERROR errors. + */ +struct quic_transport_params { + uint64_t max_idle_timeout; + uint64_t max_udp_payload_size; /* Default: 65527 bytes (max of UDP payload for IPv6) */ + uint64_t initial_max_data; + uint64_t initial_max_stream_data_bidi_local; + uint64_t initial_max_stream_data_bidi_remote; + uint64_t initial_max_stream_data_uni; + uint64_t initial_max_streams_bidi; + uint64_t initial_max_streams_uni; + uint64_t ack_delay_exponent; /* Default: 3, max: 20 */ + uint64_t max_ack_delay; /* Default: 3ms, max: 2^14ms*/ + uint64_t active_connection_id_limit; + + /* Booleans */ + uint8_t disable_active_migration; + uint8_t with_stateless_reset_token; + uint8_t with_preferred_address; + uint8_t original_destination_connection_id_present; + uint8_t initial_source_connection_id_present; + + uint8_t stateless_reset_token[QUIC_STATELESS_RESET_TOKEN_LEN]; /* Forbidden for clients */ + /* + * MUST be sent by servers. + * When received by clients, must be set to 1 if present. + */ + struct tp_cid original_destination_connection_id; /* Forbidden for clients */ + /* + * MUST be sent by servers after Retry. + */ + struct tp_cid retry_source_connection_id; /* Forbidden for clients */ + /* MUST be present both for servers and clients. */ + struct tp_cid initial_source_connection_id; + struct tp_preferred_address preferred_address; /* Forbidden for clients */ + struct tp_version_information version_information; +}; + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_TP_T_H */ diff --git a/include/haproxy/quic_tp.h b/include/haproxy/quic_tp.h new file mode 100644 index 0000000..d3bdd18 --- /dev/null +++ b/include/haproxy/quic_tp.h @@ -0,0 +1,124 @@ +#ifndef _HAPROXY_QUIC_TP_H +#define _HAPROXY_QUIC_TP_H +#ifdef USE_QUIC +#ifndef USE_OPENSSL +#error "Must define USE_OPENSSL" +#endif + +#include <haproxy/chunk.h> +#include <haproxy/quic_conn-t.h> +#include <haproxy/quic_tp-t.h> + +void quic_transport_params_init(struct quic_transport_params *p, int server); +int quic_transport_params_encode(unsigned char *buf, + const unsigned char *end, + struct quic_transport_params *p, + const struct quic_version *chosen_version, + int server); + +int quic_transport_params_store(struct quic_conn *conn, int server, + const unsigned char *buf, + const unsigned char *end); + +int qc_lstnr_params_init(struct quic_conn *qc, + const struct quic_transport_params *listener_params, + const unsigned char *stateless_reset_token, + const unsigned char *dcid, size_t dcidlen, + const unsigned char *scid, size_t scidlen, + const struct quic_cid *token_odcid); + +/* Dump <cid> transport parameter connection ID value if present (non null length). + * Used only for debugging purposes. + */ +static inline void quic_tp_cid_dump(struct buffer *buf, + const struct tp_cid *cid) +{ + int i; + + for (i = 0; i < cid->len; i++) + chunk_appendf(buf, "%02x", cid->data[i]); +} + +static inline void quic_tp_version_info_dump(struct buffer *b, + const struct tp_version_information *tp, int local) +{ + if (!tp->chosen) + return; + + chunk_appendf(b, " versions:chosen=0x%08x", tp->chosen); + if (tp->negotiated_version) + chunk_appendf(b, ",negotiated=0x%08x", tp->negotiated_version->num); +} + +static inline void quic_transport_params_dump(struct buffer *b, + const struct quic_conn *qc, + const struct quic_transport_params *p) +{ + int local = p == &qc->rx.params; + + if (p->original_destination_connection_id.len) { + chunk_appendf(b, " odcid="); + quic_tp_cid_dump(b, &p->original_destination_connection_id); + } + chunk_appendf(b, " iscid="); + quic_tp_cid_dump(b, &p->initial_source_connection_id); + if (p->retry_source_connection_id.len) { + chunk_appendf(b, " rscid="); + quic_tp_cid_dump(b, &p->retry_source_connection_id); + } + chunk_appendf(b, "\n"); + + chunk_appendf(b, " midle_timeout=%llums", (ull)p->max_idle_timeout); + chunk_appendf(b, " mudp_payload_sz=%llu", (ull)p->max_udp_payload_size); + chunk_appendf(b, " ack_delay_exp=%llu", (ull)p->ack_delay_exponent); + chunk_appendf(b, " mack_delay=%llums", (ull)p->max_ack_delay); + chunk_appendf(b, " act_cid_limit=%llu\n", (ull)p->active_connection_id_limit); + + chunk_appendf(b, " md=%llu", (ull)p->initial_max_data); + chunk_appendf(b, " msd_bidi_l=%llu", + (ull)p->initial_max_stream_data_bidi_local); + chunk_appendf(b, " msd_bidi_r=%llu", + (ull)p->initial_max_stream_data_bidi_remote); + chunk_appendf(b, " msd_uni=%llu", + (ull)p->initial_max_stream_data_uni); + chunk_appendf(b, " ms_bidi=%llu", (ull)p->initial_max_streams_bidi); + chunk_appendf(b, " ms_uni=%llu\n", (ull)p->initial_max_streams_uni); + + if (p->disable_active_migration || p->with_stateless_reset_token) { + int prev = 0; + + chunk_appendf(b, " ("); + if (p->disable_active_migration) { + if (prev) + chunk_appendf(b, ","); + prev = 1; + chunk_appendf(b, "no_act_migr"); + } + if (p->with_stateless_reset_token) { + if (prev) + chunk_appendf(b, ","); + prev = 1; + chunk_appendf(b, "stless_rst_tok"); + } + chunk_appendf(b, ")"); + } + + if (p->with_preferred_address) { + char bufaddr[INET6_ADDRSTRLEN]; + chunk_appendf(b, " pref_addr="); + inet_ntop(AF_INET, &p->preferred_address.ipv4_addr, + bufaddr, sizeof(bufaddr)); + chunk_appendf(b, "%s:%hu ", bufaddr, p->preferred_address.ipv4_port); + + inet_ntop(AF_INET6, &p->preferred_address.ipv6_addr, + bufaddr, sizeof(bufaddr)); + chunk_appendf(b, "[%s]:%hu ", bufaddr, p->preferred_address.ipv6_port); + quic_tp_cid_dump(b, &p->preferred_address.cid); + chunk_appendf(b, "\n"); + } + + quic_tp_version_info_dump(b, &p->version_information, local); +} + +#endif /* USE_QUIC */ +#endif /* _HAPROXY_QUIC_TP_H */ diff --git a/include/haproxy/quic_trace-t.h b/include/haproxy/quic_trace-t.h new file mode 100644 index 0000000..7ebc8a7 --- /dev/null +++ b/include/haproxy/quic_trace-t.h @@ -0,0 +1,103 @@ +/* + * include/haproxy/quic_trace-t.h + * Definitions for QUIC traces internal types, constants and flags. + * + * Copyright (C) 2023 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_QUIC_TRACE_T_H +#define _HAPROXY_QUIC_TRACE_T_H + +#include <haproxy/quic_tls-t.h> +#include <haproxy/trace-t.h> + +extern struct trace_source trace_quic; + +/* Used only for QUIC TLS key phase traces */ +struct quic_kp_trace { + const unsigned char *rx_sec; + size_t rx_seclen; + const struct quic_tls_kp *rx; + const unsigned char *tx_sec; + size_t tx_seclen; + const struct quic_tls_kp *tx; +}; + +/* Only for debug purpose */ +struct enc_debug_info { + unsigned char *payload; + size_t payload_len; + unsigned char *aad; + size_t aad_len; + uint64_t pn; +}; + +/* Structure to store enough information about the RX CRYPTO frames. */ +struct quic_rx_crypto_frm { + struct eb64_node offset_node; + uint64_t len; + const unsigned char *data; + struct quic_rx_packet *pkt; +}; + +#define QUIC_EV_CONN_NEW (1ULL << 0) +#define QUIC_EV_CONN_INIT (1ULL << 1) +#define QUIC_EV_CONN_ISEC (1ULL << 2) +#define QUIC_EV_CONN_RSEC (1ULL << 3) +#define QUIC_EV_CONN_WSEC (1ULL << 4) +#define QUIC_EV_CONN_RWSEC (1ULL << 5) +#define QUIC_EV_CONN_LPKT (1ULL << 6) +#define QUIC_EV_CONN_SPKT (1ULL << 7) +#define QUIC_EV_CONN_ENCPKT (1ULL << 8) +#define QUIC_EV_CONN_TXPKT (1ULL << 9) +#define QUIC_EV_CONN_PAPKT (1ULL << 10) +#define QUIC_EV_CONN_PAPKTS (1ULL << 11) +#define QUIC_EV_CONN_IO_CB (1ULL << 12) +#define QUIC_EV_CONN_RMHP (1ULL << 13) +#define QUIC_EV_CONN_PRSHPKT (1ULL << 14) +#define QUIC_EV_CONN_PRSAPKT (1ULL << 15) +#define QUIC_EV_CONN_PRSFRM (1ULL << 16) +#define QUIC_EV_CONN_PRSAFRM (1ULL << 17) +#define QUIC_EV_CONN_BFRM (1ULL << 18) +#define QUIC_EV_CONN_PHPKTS (1ULL << 19) +#define QUIC_EV_CONN_TRMHP (1ULL << 20) +#define QUIC_EV_CONN_ELRMHP (1ULL << 21) +#define QUIC_EV_CONN_RXPKT (1ULL << 22) +#define QUIC_EV_CONN_SSLDATA (1ULL << 23) +#define QUIC_EV_CONN_RXCDATA (1ULL << 24) +#define QUIC_EV_CONN_ADDDATA (1ULL << 25) +#define QUIC_EV_CONN_FFLIGHT (1ULL << 26) +#define QUIC_EV_CONN_SSLALERT (1ULL << 27) +#define QUIC_EV_CONN_PSTRM (1ULL << 28) +#define QUIC_EV_CONN_RTTUPDT (1ULL << 29) +#define QUIC_EV_CONN_CC (1ULL << 30) +#define QUIC_EV_CONN_SPPKTS (1ULL << 31) +#define QUIC_EV_CONN_PKTLOSS (1ULL << 32) +#define QUIC_EV_CONN_STIMER (1ULL << 33) +#define QUIC_EV_CONN_PTIMER (1ULL << 34) +#define QUIC_EV_CONN_SPTO (1ULL << 35) +#define QUIC_EV_CONN_BCFRMS (1ULL << 36) +#define QUIC_EV_CONN_XPRTSEND (1ULL << 37) +#define QUIC_EV_CONN_XPRTRECV (1ULL << 38) +#define QUIC_EV_CONN_FREED (1ULL << 39) +#define QUIC_EV_CONN_CLOSE (1ULL << 40) +#define QUIC_EV_CONN_ACKSTRM (1ULL << 41) +#define QUIC_EV_CONN_FRMLIST (1ULL << 42) +#define QUIC_EV_STATELESS_RST (1ULL << 43) +#define QUIC_EV_TRANSP_PARAMS (1ULL << 44) +#define QUIC_EV_CONN_IDLE_TIMER (1ULL << 45) +#define QUIC_EV_CONN_SUB (1ULL << 46) +#define QUIC_EV_CONN_ELEVELSEL (1ULL << 47) +#define QUIC_EV_CONN_RCV (1ULL << 48) +#define QUIC_EV_CONN_KILL (1ULL << 49) +#define QUIC_EV_CONN_KP (1ULL << 50) +#define QUIC_EV_CONN_SSL_COMPAT (1ULL << 51) +#define QUIC_EV_CONN_SET_AFFINITY (1ULL << 52) + +#endif /* _HAPROXY_QUIC_TRACE_T_H */ diff --git a/include/haproxy/quic_trace.h b/include/haproxy/quic_trace.h new file mode 100644 index 0000000..19fe864 --- /dev/null +++ b/include/haproxy/quic_trace.h @@ -0,0 +1,40 @@ +/* + * include/haproxy/quic_trace.h + * This file contains QUIC traces definitions. + * + * Copyright (C) 2023 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef _HAPROXY_QUIC_TRACE_H +#define _HAPROXY_QUIC_TRACE_H + +#include <haproxy/quic_trace-t.h> + +#define TRACE_SOURCE &trace_quic + +/* Initializes a enc_debug_info struct (only for debug purpose) */ +static inline void enc_debug_info_init(struct enc_debug_info *edi, + unsigned char *payload, size_t payload_len, + unsigned char *aad, size_t aad_len, uint64_t pn) +{ + edi->payload = payload; + edi->payload_len = payload_len; + edi->aad = aad; + edi->aad_len = aad_len; + edi->pn = pn; +} + +#endif /* _HAPROXY_QUIC_TRACE_H */ diff --git a/include/haproxy/quic_tx-t.h b/include/haproxy/quic_tx-t.h new file mode 100644 index 0000000..4653f04 --- /dev/null +++ b/include/haproxy/quic_tx-t.h @@ -0,0 +1,56 @@ +#ifndef _HAPROXY_TX_T_H +#define _HAPROXY_TX_T_H + +#define QUIC_MIN_CC_PKTSIZE 128 +#define QUIC_DGRAM_HEADLEN (sizeof(uint16_t) + sizeof(void *)) +#define QUIC_MAX_CC_BUFSIZE (2 * (QUIC_MIN_CC_PKTSIZE + QUIC_DGRAM_HEADLEN)) + +extern struct pool_head *pool_head_quic_tx_packet; +extern struct pool_head *pool_head_quic_cc_buf; + +/* Flag a sent packet as being an ack-eliciting packet. */ +#define QUIC_FL_TX_PACKET_ACK_ELICITING (1UL << 0) +/* Flag a sent packet as containing a PADDING frame. */ +#define QUIC_FL_TX_PACKET_PADDING (1UL << 1) +/* Flag a sent packet as being in flight. */ +#define QUIC_FL_TX_PACKET_IN_FLIGHT (QUIC_FL_TX_PACKET_ACK_ELICITING | QUIC_FL_TX_PACKET_PADDING) +/* Flag a sent packet as containing a CONNECTION_CLOSE frame */ +#define QUIC_FL_TX_PACKET_CC (1UL << 2) +/* Flag a sent packet as containing an ACK frame */ +#define QUIC_FL_TX_PACKET_ACK (1UL << 3) +/* Flag a sent packet as being coalesced to another one in the same datagram */ +#define QUIC_FL_TX_PACKET_COALESCED (1UL << 4) +/* Flag a sent packet as being probing with old data */ +#define QUIC_FL_TX_PACKET_PROBE_WITH_OLD_DATA (1UL << 5) + +/* Structure to store enough information about TX QUIC packets. */ +struct quic_tx_packet { + /* List entry point. */ + struct list list; + /* Packet length */ + size_t len; + /* This is not the packet length but the length of outstanding data + * for in flight TX packet. + */ + size_t in_flight_len; + struct eb64_node pn_node; + /* The list of frames of this packet. */ + struct list frms; + /* The time this packet was sent (ms). */ + unsigned int time_sent; + /* Packet number spakce. */ + struct quic_pktns *pktns; + /* Flags. */ + unsigned int flags; + /* Reference counter */ + int refcnt; + /* Next packet in the same datagram */ + struct quic_tx_packet *next; + /* Previous packet in the same datagram */ + struct quic_tx_packet *prev; + /* Largest acknowledged packet number if this packet contains an ACK frame */ + int64_t largest_acked_pn; + unsigned char type; +}; + +#endif /* _HAPROXY_TX_T_H */ diff --git a/include/haproxy/quic_tx.h b/include/haproxy/quic_tx.h new file mode 100644 index 0000000..0659c14 --- /dev/null +++ b/include/haproxy/quic_tx.h @@ -0,0 +1,92 @@ +/* + * QUIC protocol definitions (TX side). + * + * Copyright (C) 2023 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_QUIC_TX_H +#define _HAPROXY_QUIC_TX_H + +#include <haproxy/buf-t.h> +#include <haproxy/list-t.h> +#include <haproxy/quic_conn-t.h> +#include <haproxy/quic_tls-t.h> +#include <haproxy/quic_rx-t.h> +#include <haproxy/quic_tx-t.h> + +struct buffer *qc_txb_alloc(struct quic_conn *qc); +void qc_txb_release(struct quic_conn *qc); +int qc_purge_txbuf(struct quic_conn *qc, struct buffer *buf); +struct buffer *qc_get_txb(struct quic_conn *qc); + +int qc_prep_hpkts(struct quic_conn *qc, struct buffer *buf, struct list *qels); +int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx); +int qc_send_app_pkts(struct quic_conn *qc, struct list *frms); +int qc_dgrams_retransmit(struct quic_conn *qc); +void qc_prep_hdshk_fast_retrans(struct quic_conn *qc, + struct list *ifrms, struct list *hfrms); +int send_retry(int fd, struct sockaddr_storage *addr, + struct quic_rx_packet *pkt, const struct quic_version *qv); +int send_stateless_reset(struct listener *l, struct sockaddr_storage *dstaddr, + struct quic_rx_packet *rxpkt); +int send_version_negotiation(int fd, struct sockaddr_storage *addr, + struct quic_rx_packet *pkt); + +/* The TX packets sent in the same datagram are linked to each others in + * the order they are built. This function detach a packet from its successor + * and predecessor in the same datagram. + */ +static inline void quic_tx_packet_dgram_detach(struct quic_tx_packet *pkt) +{ + if (pkt->prev) + pkt->prev->next = pkt->next; + if (pkt->next) + pkt->next->prev = pkt->prev; +} + + +/* Increment the reference counter of <pkt> */ +static inline void quic_tx_packet_refinc(struct quic_tx_packet *pkt) +{ + pkt->refcnt++; +} + +/* Decrement the reference counter of <pkt> */ +static inline void quic_tx_packet_refdec(struct quic_tx_packet *pkt) +{ + if (--pkt->refcnt == 0) { + BUG_ON(!LIST_ISEMPTY(&pkt->frms)); + /* If there are others packet in the same datagram <pkt> is attached to, + * detach the previous one and the next one from <pkt>. + */ + quic_tx_packet_dgram_detach(pkt); + pool_free(pool_head_quic_tx_packet, pkt); + } +} + +/* Return the number of bytes which may be sent from <qc> connection when + * it has not already been validated. Note that this is the responsability + * of the caller to check that the case with quic_peer_validated_addr(). + * This latter BUG_ON() if 3 * qc->rx.bytes < qc->tx.prep_bytes. + */ +static inline size_t quic_may_send_bytes(struct quic_conn *qc) +{ + return 3 * qc->bytes.rx - qc->bytes.prep; +} + + +#endif /* _HAPROXY_QUIC_TX_H */ diff --git a/include/haproxy/receiver-t.h b/include/haproxy/receiver-t.h new file mode 100644 index 0000000..0ae441e --- /dev/null +++ b/include/haproxy/receiver-t.h @@ -0,0 +1,106 @@ +/* + * include/haproxy/receiver-t.h + * This file defines the structures needed to manage receivers. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_RECEIVER_T_H +#define _HAPROXY_RECEIVER_T_H + +#include <sys/types.h> +#include <sys/socket.h> + +#include <haproxy/api-t.h> +#include <haproxy/namespace-t.h> +#include <haproxy/proto_rhttp-t.h> +#include <haproxy/quic_sock-t.h> +#include <haproxy/thread.h> + +/* Bit values for receiver->flags */ +#define RX_F_BOUND 0x00000001 /* receiver already bound */ +#define RX_F_INHERITED 0x00000002 /* inherited FD from the parent process (fd@) or duped from another local receiver */ +#define RX_F_MWORKER 0x00000004 /* keep the FD open in the master but close it in the children */ +#define RX_F_MUST_DUP 0x00000008 /* this receiver's fd must be dup() from a reference; ignore socket-level ops here */ +#define RX_F_NON_SUSPENDABLE 0x00000010 /* this socket cannot be suspended hence must always be unbound */ + +/* Bit values for rx_settings->options */ +#define RX_O_FOREIGN 0x00000001 /* receives on foreign addresses */ +#define RX_O_V4V6 0x00000002 /* binds to both IPv4 and IPv6 addresses if !V6ONLY */ +#define RX_O_V6ONLY 0x00000004 /* binds to IPv6 addresses only */ + +/* All the settings that are used to configure a receiver */ +struct rx_settings { + struct { /* UNIX socket permissions */ + uid_t uid; /* -1 to leave unchanged */ + gid_t gid; /* -1 to leave unchanged */ + mode_t mode; /* 0 to leave unchanged */ + } ux; + char *interface; /* interface name or NULL */ + const struct netns_entry *netns; /* network namespace of the listener*/ + unsigned int options; /* receiver options (RX_O_*) */ + int shards; /* number of shards, 0=not set yet, -1="by-thread" */ +}; + +/* info about a shard that is shared between multiple groups. Receivers that + * are alone in their shard do not have a shard_info. + */ +struct shard_info { + uint nbgroups; /* number of groups in this shard (=#rx); Zero = unused. */ + uint nbthreads; /* number of threads in this shard (>=nbgroups) */ + ulong tgroup_mask; /* bitmask of thread groups having a member here */ + struct receiver *ref; /* first one, reference for FDs to duplicate */ + struct receiver *members[MAX_TGROUPS]; /* all members of the shard (one per thread group) */ +}; + +/* This describes a receiver with all its characteristics (address, options, etc) */ +struct receiver { + int fd; /* handle we receive from (fd only for now) */ + unsigned int flags; /* receiver options (RX_F_*) */ + struct protocol *proto; /* protocol this receiver belongs to */ + void *owner; /* receiver's owner (usually a listener) */ + void (*iocb)(int fd); /* generic I/O handler (typically accept callback) */ + unsigned long bind_thread; /* bitmask of threads allowed on this receiver */ + uint bind_tgroup; /* thread group ID: 0=global IDs, non-zero=local IDs */ + struct rx_settings *settings; /* points to the settings used by this receiver */ + struct shard_info *shard_info; /* points to info about the owning shard, NULL if single rx */ + struct list proto_list; /* list in the protocol header */ +#ifdef USE_QUIC + struct mt_list rxbuf_list; /* list of buffers to receive and dispatch QUIC datagrams. */ + enum quic_sock_mode quic_mode; /* QUIC socket allocation strategy */ + unsigned int quic_curr_handshake; /* count of active QUIC handshakes */ + unsigned int quic_curr_accept; /* count of QUIC conns waiting for accept */ +#endif + struct { + struct task *task; /* Task used to open connection for reverse. */ + struct server *srv; /* Underlying server used to initiate reverse pre-connect. */ + struct connection *pend_conn; /* Pending connection waiting to complete reversal before being accepted. */ + enum li_preconn_state state; /* State for transition logging. */ + } rhttp; + + /* warning: this struct is huge, keep it at the bottom */ + struct sockaddr_storage addr; /* the address the socket is bound to */ +}; + +#endif /* _HAPROXY_RECEIVER_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/regex-t.h b/include/haproxy/regex-t.h new file mode 100644 index 0000000..33d88a2 --- /dev/null +++ b/include/haproxy/regex-t.h @@ -0,0 +1,78 @@ +/* + * include/haproxy/regex-t.h + * Types and macros definitions for regular expressions + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_REGEX_T_H +#define _HAPROXY_REGEX_T_H + +#include <stdlib.h> +#include <string.h> + +#include <haproxy/api.h> + +#ifdef USE_PCRE +#include <pcre.h> +#include <pcreposix.h> + +/* For pre-8.20 PCRE compatibility */ +#ifndef PCRE_STUDY_JIT_COMPILE +#define PCRE_STUDY_JIT_COMPILE 0 +#endif + +#elif defined(USE_PCRE2) +#include <pcre2.h> +#include <pcre2posix.h> + +#else /* no PCRE, nor PCRE2 */ +#include <regex.h> +#endif + +struct my_regex { +#ifdef USE_PCRE + pcre *reg; + pcre_extra *extra; +#ifdef USE_PCRE_JIT +#ifndef PCRE_CONFIG_JIT +#error "The PCRE lib doesn't support JIT. Change your lib, or remove the option USE_PCRE_JIT." +#endif +#endif +#elif defined(USE_PCRE2) + int(*mfn)(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, pcre2_match_data *, pcre2_match_context *); + pcre2_code *reg; +#else /* no PCRE */ + regex_t regex; +#endif +}; + +struct hdr_exp { + struct hdr_exp *next; + struct my_regex *preg; /* expression to look for */ + const char *replace; /* expression to set instead */ + void *cond; /* a possible condition or NULL */ +}; + +#endif /* _HAPROXY_REGEX_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/regex.h b/include/haproxy/regex.h new file mode 100644 index 0000000..2cd9573 --- /dev/null +++ b/include/haproxy/regex.h @@ -0,0 +1,144 @@ +/* + * include/haproxy/regex.h + * Compatibility layer for various regular expression engines + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_REGEX_H +#define _HAPROXY_REGEX_H + +#include <stdlib.h> +#include <string.h> + +#include <haproxy/api.h> +#include <haproxy/regex-t.h> + +extern THREAD_LOCAL regmatch_t pmatch[MAX_MATCH]; + +/* "str" is the string that contain the regex to compile. + * "regex" is preallocated memory. After the execution of this function, this + * struct contain the compiled regex. + * "cs" is the case sensitive flag. If cs is true, case sensitive is enabled. + * "cap" is capture flag. If cap if true the regex can capture into + * parenthesis strings. + * "err" is the standard error message pointer. + * + * The function return 1 is success case, else return 0 and err is filled. + */ +struct my_regex *regex_comp(const char *str, int cs, int cap, char **err); +int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches); +const char *check_replace_string(const char *str); +int regex_exec_match(const struct my_regex *preg, const char *subject, + size_t nmatch, regmatch_t pmatch[], int flags); +int regex_exec_match2(const struct my_regex *preg, char *subject, int length, + size_t nmatch, regmatch_t pmatch[], int flags); + + +/* If the function doesn't match, it returns false, else it returns true. + */ +static inline int regex_exec(const struct my_regex *preg, char *subject) +{ +#if defined(USE_PCRE) || defined(USE_PCRE_JIT) + if (pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, 0, NULL, 0) < 0) + return 0; + return 1; +#elif defined(USE_PCRE2) + pcre2_match_data *pm; + int ret; + + pm = pcre2_match_data_create_from_pattern(preg->reg, NULL); + ret = preg->mfn(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), + 0, 0, pm, NULL); + pcre2_match_data_free(pm); + if (ret < 0) + return 0; + return 1; +#else + int match; + match = regexec(&preg->regex, subject, 0, NULL, 0); + if (match == REG_NOMATCH) + return 0; + return 1; +#endif +} + +/* Note that <subject> MUST be at least <length+1> characters long and must + * be writable because the function will temporarily force a zero past the + * last character. + * + * If the function doesn't match, it returns false, else it returns true. + */ +static inline int regex_exec2(const struct my_regex *preg, char *subject, int length) +{ +#if defined(USE_PCRE) || defined(USE_PCRE_JIT) + if (pcre_exec(preg->reg, preg->extra, subject, length, 0, 0, NULL, 0) < 0) + return 0; + return 1; +#elif defined(USE_PCRE2) + pcre2_match_data *pm; + int ret; + + pm = pcre2_match_data_create_from_pattern(preg->reg, NULL); + ret = preg->mfn(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, + 0, 0, pm, NULL); + pcre2_match_data_free(pm); + if (ret < 0) + return 0; + return 1; +#else + int match; + char old_char = subject[length]; + subject[length] = 0; + match = regexec(&preg->regex, subject, 0, NULL, 0); + subject[length] = old_char; + if (match == REG_NOMATCH) + return 0; + return 1; +#endif +} + +static inline void regex_free(struct my_regex *preg) +{ + if (!preg) + return; +#if defined(USE_PCRE) || defined(USE_PCRE_JIT) + pcre_free(preg->reg); +/* PCRE < 8.20 requires pcre_free() while >= 8.20 requires pcre_study_free(), + * which is easily detected using PCRE_CONFIG_JIT. + */ +#ifdef PCRE_CONFIG_JIT + pcre_free_study(preg->extra); +#else /* PCRE_CONFIG_JIT */ + pcre_free(preg->extra); +#endif /* PCRE_CONFIG_JIT */ +#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT) + pcre2_code_free(preg->reg); +#else + regfree(&preg->regex); +#endif + free(preg); +} + +#endif /* _HAPROXY_REGEX_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/resolvers-t.h b/include/haproxy/resolvers-t.h new file mode 100644 index 0000000..b727463 --- /dev/null +++ b/include/haproxy/resolvers-t.h @@ -0,0 +1,297 @@ +/* + * include/haproxy/dns-t.h + * This file provides structures and types for DNS. + * + * Copyright (C) 2014 Baptiste Assmann <bedis9@gmail.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_RESOLVERS_T_H +#define _HAPROXY_RESOLVERS_T_H + +#include <import/ebtree-t.h> + +#include <haproxy/connection-t.h> +#include <haproxy/dns-t.h> +#include <haproxy/obj_type-t.h> +#include <haproxy/stats-t.h> +#include <haproxy/task-t.h> +#include <haproxy/thread.h> + +extern struct pool_head *resolv_requester_pool; + +/*DNS maximum values */ +/* + * Maximum issued from RFC: + * RFC 1035: https://www.ietf.org/rfc/rfc1035.txt chapter 2.3.4 + * RFC 2671: http://tools.ietf.org/html/rfc2671 + */ +#define DNS_MAX_LABEL_SIZE 63 +#define DNS_MAX_NAME_SIZE 255 +#define DNS_MAX_UDP_MESSAGE 65535 + +/* DNS minimum record size: 1 char + 1 NULL + type + class */ +#define DNS_MIN_RECORD_SIZE (1 + 1 + 2 + 2) + +/* DNS smallest fqdn 'a.gl' size */ +# define DNS_SMALLEST_FQDN_SIZE 4 + +/* maximum number of query records in a DNS response + * For now, we allow only one */ +#define DNS_MAX_QUERY_RECORDS 1 + +/* maximum number of answer record in a DNS response */ +#define DNS_MAX_ANSWER_RECORDS ((DNS_MAX_UDP_MESSAGE - DNS_HEADER_SIZE) / DNS_MIN_RECORD_SIZE) + +/* size of dns_buffer used to store responses from the buffer + * dns_buffer is used to store data collected from records found in a response. + * Before using it, caller will always check that there is at least DNS_MAX_NAME_SIZE bytes + * available */ +#define DNS_ANALYZE_BUFFER_SIZE DNS_MAX_UDP_MESSAGE + DNS_MAX_NAME_SIZE + +/* DNS error messages */ +#define DNS_TOO_LONG_FQDN "hostname too long" +#define DNS_LABEL_TOO_LONG "one label too long" +#define DNS_INVALID_CHARACTER "found an invalid character" + +/* dns query class */ +#define DNS_RCLASS_IN 1 /* internet class */ + +/* dns record types (non exhaustive list) */ +#define DNS_RTYPE_A 1 /* IPv4 address */ +#define DNS_RTYPE_CNAME 5 /* canonical name */ +#define DNS_RTYPE_AAAA 28 /* IPv6 address */ +#define DNS_RTYPE_SRV 33 /* SRV record */ +#define DNS_RTYPE_OPT 41 /* OPT */ +#define DNS_RTYPE_ANY 255 /* all records */ + +/* dns rcode values */ +#define DNS_RCODE_NO_ERROR 0 /* no error */ +#define DNS_RCODE_NX_DOMAIN 3 /* non existent domain */ +#define DNS_RCODE_REFUSED 5 /* query refused */ + +/* dns flags masks */ +#define DNS_FLAG_TRUNCATED 0x0200 /* mask for truncated flag */ +#define DNS_FLAG_REPLYCODE 0x000F /* mask for reply code */ + +/* max number of network preference entries are available from the + * configuration file. + */ +#define SRV_MAX_PREF_NET 5 + +/* NOTE: big endian structure */ +struct resolv_query_item { + char name[DNS_MAX_NAME_SIZE+1]; /* query name */ + unsigned short type; /* question type */ + unsigned short class; /* query class */ +}; + +/* NOTE: big endian structure */ +struct resolv_answer_item { + /*For SRV type, name also includes service and protocol value */ + char name[DNS_MAX_NAME_SIZE+1]; /* answer name */ + int16_t type; /* question type */ + int16_t class; /* query class */ + int32_t ttl; /* response TTL */ + int16_t priority; /* SRV type priority */ + uint16_t weight; /* SRV type weight */ + uint16_t port; /* SRV type port */ + uint16_t data_len; /* number of bytes in the <data> field below */ + struct eb32_node link; /* linking node */ + union { + struct sockaddr_in in4; /* IPv4 address for RTYPE_A */ + struct sockaddr_in6 in6; /* IPv6 address for RTYPE_AAAA */ + char target[DNS_MAX_NAME_SIZE+1]; /* Response data: SRV or CNAME type target */ + } data; + unsigned int last_seen; /* When was the answer was last seen */ + struct resolv_answer_item *ar_item; /* pointer to a RRset from the additional section, if exists */ + struct list attached_servers; /* attached server head */ +}; + +struct resolv_response { + struct dns_header header; + struct eb_root answer_tree; + /* authority ignored for now */ +}; + +/* Resolvers section and parameters. It is linked to the name servers + * servers points to it. + * current resolution are stored in a FIFO list. + */ +struct resolvers { + __decl_thread(HA_SPINLOCK_T lock); + unsigned int accepted_payload_size; /* maximum payload size we accept for responses */ + int nb_nameservers; /* total number of active nameservers in a resolvers section */ + int resolve_retries; /* number of retries before giving up */ + struct { /* time to: */ + int resolve; /* wait between 2 queries for the same resolution */ + int retry; /* wait for a response before retrying */ + } timeout; + struct { /* time to hold current data when */ + int valid; /* a response is valid */ + int nx; /* a response doesn't exist */ + int timeout; /* no answer was delivered */ + int refused; /* dns server refused to answer */ + int other; /* other dns response errors */ + int obsolete; /* an answer hasn't been seen */ + } hold; + struct task *t; /* timeout management */ + struct { + struct list wait; /* resolutions managed to this resolvers section */ + struct list curr; /* current running resolutions */ + } resolutions; + struct eb_root query_ids; /* tree to quickly lookup/retrieve query ids currently in use + * used by each nameserver, but stored in resolvers since there must + * be a unique relation between an eb_root and an eb_node (resolution) */ + struct list list; /* resolvers list */ + struct list nameservers; /* dns server list */ + struct proxy *px; /* px to handle connections to DNS servers */ + char *id; /* resolvers unique identifier */ + struct { + const char *file; /* file where the section appears */ + int line; /* line where the section appears */ + int implicit; /* config was auto-generated and must be silent */ + } conf; /* config information */ +}; + +struct resolv_options { + int family_prio; /* which IP family should the resolver use when both are returned */ + struct { + int family; + union { + struct in_addr in4; + struct in6_addr in6; + } addr; + union { + struct in_addr in4; + struct in6_addr in6; + } mask; + } pref_net[SRV_MAX_PREF_NET]; + int pref_net_nb; /* The number of registered preferred networks. */ + int accept_duplicate_ip; /* flag to indicate whether the associated object can use an IP address + already set to an other object of the same group */ + int ignore_weight; /* flag to indicate whether to ignore the weight within the record */ +}; + +/* Resolution structure associated to single server and used to manage name + * resolution for this server. + * The only link between the resolution and a nameserver is through the + * query_id. + */ +struct resolv_resolution { + struct resolvers *resolvers; /* pointer to the resolvers structure owning the resolution */ + struct list requesters; /* list of requesters using this resolution */ + int uuid; /* unique id (used for debugging purpose) */ + char *hostname_dn; /* server hostname in domain name label format */ + int hostname_dn_len; /* server domain name label len */ + unsigned int last_resolution; /* time of the last resolution */ + unsigned int last_query; /* time of the last query sent */ + unsigned int last_valid; /* time of the last valid response */ + int query_id; /* DNS query ID dedicated for this resolution */ + struct eb32_node qid; /* ebtree query id */ + int prefered_query_type; /* preferred query type */ + int query_type; /* current query type */ + int status; /* status of the resolution being processed RSLV_STATUS_* */ + int step; /* RSLV_STEP_* */ + int try; /* current resolution try */ + int nb_queries; /* count number of queries sent */ + int nb_responses; /* count number of responses received */ + + struct resolv_response response; /* structure hosting the DNS response */ + struct resolv_query_item response_query_records[DNS_MAX_QUERY_RECORDS]; /* <response> query records */ + + struct list list; /* resolution list */ +}; + +/* Structure used to describe the owner of a DNS resolution. */ +struct resolv_requester { + enum obj_type *owner; /* pointer to the owner (server or dns_srvrq) */ + struct resolv_resolution *resolution; /* pointer to the owned DNS resolution */ + + int (*requester_cb)(struct resolv_requester *, struct dns_counters *); /* requester callback for valid response */ + int (*requester_error_cb)(struct resolv_requester *, int); /* requester callback, for error management */ + + struct list list; /* requester list */ +}; + +/* Last resolution status code */ +enum { + RSLV_STATUS_NONE = 0, /* no resolution occurred yet */ + RSLV_STATUS_VALID, /* no error */ + RSLV_STATUS_INVALID, /* invalid responses */ + RSLV_STATUS_ERROR, /* error */ + RSLV_STATUS_NX, /* NXDOMAIN */ + RSLV_STATUS_REFUSED, /* server refused our query */ + RSLV_STATUS_TIMEOUT, /* no response from DNS servers */ + RSLV_STATUS_OTHER, /* other errors */ +}; + +/* Current resolution step */ +enum { + RSLV_STEP_NONE = 0, /* nothing happening currently */ + RSLV_STEP_RUNNING, /* resolution is running */ +}; + +/* Return codes after analyzing a DNS response */ +enum { + RSLV_RESP_VALID = 0, /* valid response */ + RSLV_RESP_INVALID, /* invalid response (various type of errors can trigger it) */ + RSLV_RESP_ERROR, /* DNS error code */ + RSLV_RESP_NX_DOMAIN, /* resolution unsuccessful */ + RSLV_RESP_REFUSED, /* DNS server refused to answer */ + RSLV_RESP_ANCOUNT_ZERO, /* no answers in the response */ + RSLV_RESP_WRONG_NAME, /* response does not match query name */ + RSLV_RESP_CNAME_ERROR, /* error when resolving a CNAME in an atomic response */ + RSLV_RESP_TIMEOUT, /* DNS server has not answered in time */ + RSLV_RESP_TRUNCATED, /* DNS response is truncated */ + RSLV_RESP_NO_EXPECTED_RECORD, /* No expected records were found in the response */ + RSLV_RESP_QUERY_COUNT_ERROR, /* we did not get the expected number of queries in the response */ + RSLV_RESP_INTERNAL, /* internal resolver error */ +}; + +/* Return codes after searching an IP in a DNS response buffer, using a family + * preference + */ +enum { + RSLV_UPD_NO = 1, /* provided IP was found and preference is matched + * OR provided IP found and preference is not matched, but no IP + * matching preference was found. + */ + RSLV_UPD_SRVIP_NOT_FOUND, /* provided IP not found + * OR provided IP found and preference is not match and an IP + * matching preference was found. + */ + RSLV_UPD_CNAME, /* CNAME without any IP provided in the response */ + RSLV_UPD_NAME_ERROR, /* name in the response did not match the query */ + RSLV_UPD_NO_IP_FOUND, /* no IP could be found in the response */ + RSLV_UPD_OBSOLETE_IP, /* The server IP was obsolete, and no other IP was found */ +}; + +struct proxy; +struct resolv_srvrq { + enum obj_type obj_type; /* object type == OBJ_TYPE_SRVRQ */ + struct resolvers *resolvers; /* pointer to the resolvers structure used for this server template */ + struct proxy *proxy; /* associated proxy */ + char *name; + char *hostname_dn; /* server hostname in Domain Name format */ + int hostname_dn_len; /* string length of the server hostname in Domain Name format */ + struct resolv_requester *requester; /* used to link to its DNS resolution */ + struct list attached_servers; /* List of the servers free to use */ + struct eb_root named_servers; /* tree of servers indexed by hostnames found in server state file */ + struct list list; /* Next SRV RQ for the same proxy */ +}; + +#endif /* _HAPROXY_RESOLVERS_T_H */ diff --git a/include/haproxy/resolvers.h b/include/haproxy/resolvers.h new file mode 100644 index 0000000..5d4c744 --- /dev/null +++ b/include/haproxy/resolvers.h @@ -0,0 +1,66 @@ +/* + * include/haproxy/dns.h + * This file provides functions related to DNS protocol + * + * Copyright (C) 2014 Baptiste Assmann <bedis9@gmail.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_RESOLVERS_H +#define _HAPROXY_RESOLVERS_H + +#include <haproxy/resolvers-t.h> + +struct proxy; +struct server; +struct stconn; +struct act_rule; +struct list; + +extern struct list sec_resolvers; +extern unsigned int resolv_failed_resolutions; + +struct resolvers *find_resolvers_by_id(const char *id); +struct resolv_srvrq *find_srvrq_by_name(const char *name, struct proxy *px); +struct resolv_srvrq *new_resolv_srvrq(struct server *srv, char *fqdn); +struct resolv_answer_item *find_srvrq_answer_record(const struct resolv_requester *requester); + +int resolv_str_to_dn_label(const char *str, int str_len, char *dn, int dn_len); +int resolv_dn_label_to_str(const char *dn, int dn_len, char *str, int str_len); + +int resolv_hostname_validation(const char *string, char **err); +int resolv_get_ip_from_response(struct resolv_response *r_res, + struct resolv_options *resolv_opts, void *currentip, + short currentip_sin_family, + void **newip, short *newip_sin_family, + struct server *owner); + +int resolv_link_resolution(void *requester, int requester_type, int requester_locked); +void resolv_unlink_resolution(struct resolv_requester *requester); +void resolv_detach_from_resolution_answer_items(struct resolv_resolution *res, struct resolv_requester *req); +void resolv_trigger_resolution(struct resolv_requester *requester); +enum act_parse_ret resolv_parse_do_resolve(const char **args, int *orig_arg, struct proxy *px, struct act_rule *rule, char **err); +int check_action_do_resolve(struct act_rule *rule, struct proxy *px, char **err); + +int stats_dump_resolvers(struct stconn *sc, + struct field *stats, size_t stats_count, + struct list *stat_modules); +void resolv_stats_clear_counters(int clrall, struct list *stat_modules); +int resolv_allocate_counters(struct list *stat_modules); +int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk); +int resolvers_create_default(); + +#endif // _HAPROXY_RESOLVER_H diff --git a/include/haproxy/ring-t.h b/include/haproxy/ring-t.h new file mode 100644 index 0000000..b89c886 --- /dev/null +++ b/include/haproxy/ring-t.h @@ -0,0 +1,113 @@ +/* + * include/haproxy/ring-t.h + * This file provides definitions for ring buffers used for disposable data. + * + * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_RING_T_H +#define _HAPROXY_RING_T_H + +#include <haproxy/api-t.h> +#include <haproxy/buf-t.h> +#include <haproxy/thread.h> + +/* The code below handles circular buffers with single-producer and multiple + * readers (up to 255). The buffer storage area must remain always allocated. + * It's made of series of payload blocks followed by a readers count (RC). + * There is always a readers count at the beginning of the buffer as well. Each + * payload block is composed of a varint-encoded size (VI) followed by the + * actual payload (PL). + * + * The readers count is encoded on a single byte. It indicates how many readers + * are still waiting at this position. The writer writes after the buffer's + * tail, which initially starts just past the first readers count. Then it + * knows by reading this count that it must wake up the readers to indicate + * data availability. When a reader reads the payload block, it increments the + * next readers count and decrements the current one. The area between the + * initial readers count and the next one is protected from overwriting for as + * long as the initial count is non-null. As such these readers count are + * effective barriers against data recycling. + * + * Only the writer is allowed to update the buffer's tail/head. This ensures + * that events can remain as long as possible so that late readers can get the + * maximum history available. It also helps dealing with multi-thread accesses + * using a simple RW lock during the buffer head's manipulation. The writer + * will have to delete some old records starting at the head until the new + * message can fit or a non-null readers count is encountered. If a message + * cannot fit due to insufficient room, the message is lost and the drop + * counted must be incremented. + * + * Like any buffer, this buffer naturally wraps at the end and continues at the + * beginning. The creation process consists in immediately adding a null + * readers count byte into the buffer. The write process consists in always + * writing a payload block followed by a new readers count. The delete process + * consists in removing a null readers count and payload block. As such, there + * is always at least one readers count byte in the buffer available at the + * head for new readers to attach to, and one before the tail, both of which + * may be the same when the buffer doesn't contain any event. It is thus safe + * for any reader to simply keep the absolute offset of the last visited + * position and to restart from there. The write will update the buffer's + * absolute offset when deleting entries. All this also has the benefit of + * allowing a buffer to be hot-resized without losing its contents. + * + * Thus we have this : + * - init of empty buffer: + * head-, ,-tail + * [ RC | xxxxxxxxxxxxxxxxxxxxxxxxxx ] + * + * - reader attached: + * head-, ,-tail + * [ RC | xxxxxxxxxxxxxxxxxxxxxxxxxx ] + * ^- +1 + * + * - append of one event: + * appended + * head-, <----------> ,-tail + * [ RC | VI | PL | RC | xxxxxxxxxxx ] + * + * - reader advancing: + * head-, ,-tail + * [ RC | VI | PL | RC | xxxxxxxxxxx ] + * ^- -1 ^- +1 + * + * - writer removing older message: + * head-, ,-tail + * [ xxxxxxxxxxxx | RC | xxxxxxxxxxx ] + * <----------> + * removed + */ + +/* ring watch flags to be used when watching the ring */ +#define RING_WF_WAIT_MODE 0x00000001 /* wait for new contents */ +#define RING_WF_SEEK_NEW 0x00000002 /* seek to new contents */ + +struct ring { + struct buffer buf; // storage area + struct list waiters; // list of waiters, for now, CLI "show event" + __decl_thread(HA_RWLOCK_T lock); + int readers_count; +}; + +#endif /* _HAPROXY_RING_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/ring.h b/include/haproxy/ring.h new file mode 100644 index 0000000..71217d5 --- /dev/null +++ b/include/haproxy/ring.h @@ -0,0 +1,53 @@ +/* + * include/haproxy/ring.h + * Exported functions for ring buffers used for disposable data. + * + * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_RING_H +#define _HAPROXY_RING_H + +#include <stdlib.h> +#include <import/ist.h> +#include <haproxy/ring-t.h> + +struct appctx; + +struct ring *ring_new(size_t size); +struct ring *ring_make_from_area(void *area, size_t size); +struct ring *ring_cast_from_area(void *area); +void ring_init(struct ring *ring, void* area, size_t size); +struct ring *ring_resize(struct ring *ring, size_t size); +void ring_free(struct ring *ring); +ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg); +int ring_attach(struct ring *ring); +void ring_detach_appctx(struct ring *ring, struct appctx *appctx, size_t ofs); +int ring_attach_cli(struct ring *ring, struct appctx *appctx, uint flags); +int cli_io_handler_show_ring(struct appctx *appctx); +void cli_io_release_show_ring(struct appctx *appctx); + +size_t ring_max_payload(const struct ring *ring); + +#endif /* _HAPROXY_RING_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/sample-t.h b/include/haproxy/sample-t.h new file mode 100644 index 0000000..27cf4ba --- /dev/null +++ b/include/haproxy/sample-t.h @@ -0,0 +1,315 @@ +/* + * include/haproxy/sample-t.h + * Macros, variables and structures for sample management. + * + * Copyright (C) 2009-2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr> + * Copyright (C) 2012-2013 Willy Tarreau <w@1wt.eu> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SAMPLE_T_H +#define _HAPROXY_SAMPLE_T_H + +#include <haproxy/api-t.h> +#include <haproxy/sample_data-t.h> + +/* input and output sample types + * + * Some of them are pseudo types which means that they can be used for + * in_type and out_type in sample (fetches/conv) definitions (they serve as + * compatibility and conversion hints) but they cannot be emitted at runtime. + */ +enum { + SMP_T_ANY = 0, /* pseudo type: any type */ + SMP_T_SAME, /* special: output type hint for converters that don't alter input type (out == in) */ + SMP_T_BOOL, /* boolean */ + SMP_T_SINT, /* signed 64bits integer type */ + SMP_T_ADDR, /* pseudo type: could be ipv4 or ipv6 */ + SMP_T_IPV4, /* ipv4 type */ + SMP_T_IPV6, /* ipv6 type */ + SMP_T_STR, /* char string type */ + SMP_T_BIN, /* buffer type */ + SMP_T_METH, /* contain method */ + SMP_TYPES /* number of types, must always be last */ +}; + +/* Sample sources are used to establish a relation between fetch keywords and + * the location where they're about to be used. They're reserved for internal + * use and are not meant to be known outside the sample management code. + */ +enum { + SMP_SRC_CONST, /* constat elements known at configuration time */ + SMP_SRC_INTRN, /* internal context-less information */ + SMP_SRC_LISTN, /* listener which accepted the connection */ + SMP_SRC_FTEND, /* frontend which accepted the connection */ + SMP_SRC_L4CLI, /* L4 information about the client */ + SMP_SRC_L5CLI, /* fetch uses client information from embryonic session */ + SMP_SRC_TRACK, /* fetch involves track counters */ + SMP_SRC_L6REQ, /* fetch uses raw information from the request buffer */ + SMP_SRC_HRQHV, /* fetch uses volatile information about HTTP request headers (eg: value) */ + SMP_SRC_HRQHP, /* fetch uses persistent information about HTTP request headers (eg: meth) */ + SMP_SRC_HRQBO, /* fetch uses information about HTTP request body */ + SMP_SRC_BKEND, /* fetch uses information about the backend */ + SMP_SRC_SERVR, /* fetch uses information about the selected server */ + SMP_SRC_L4SRV, /* fetch uses information about the server L4 connection */ + SMP_SRC_L5SRV, /* fetch uses information about the server L5 connection */ + SMP_SRC_L6RES, /* fetch uses raw information from the response buffer */ + SMP_SRC_HRSHV, /* fetch uses volatile information about HTTP response headers (eg: value) */ + SMP_SRC_HRSHP, /* fetch uses persistent information about HTTP response headers (eg: status) */ + SMP_SRC_HRSBO, /* fetch uses information about HTTP response body */ + SMP_SRC_RQFIN, /* final information about request buffer (eg: tot bytes) */ + SMP_SRC_RSFIN, /* final information about response buffer (eg: tot bytes) */ + SMP_SRC_TXFIN, /* final information about the transaction (eg: #comp rate) */ + SMP_SRC_SSFIN, /* final information about the stream (eg: #requests, final flags) */ + SMP_SRC_ENTRIES /* nothing after this */ +}; + +/* Sample checkpoints are a list of places where samples may be used. This is + * an internal enum used only to build SMP_VAL_*. + */ +enum { + SMP_CKP_FE_CON_ACC, /* FE connection accept rules ("tcp request connection") */ + SMP_CKP_FE_SES_ACC, /* FE stream accept rules (to come soon) */ + SMP_CKP_FE_REQ_CNT, /* FE request content rules ("tcp request content") */ + SMP_CKP_FE_HRQ_HDR, /* FE HTTP request headers (rules, headers, monitor, stats, redirect) */ + SMP_CKP_FE_HRQ_BDY, /* FE HTTP request body */ + SMP_CKP_FE_SET_BCK, /* FE backend switching rules ("use_backend") */ + SMP_CKP_BE_REQ_CNT, /* BE request content rules ("tcp request content") */ + SMP_CKP_BE_HRQ_HDR, /* BE HTTP request headers (rules, headers, monitor, stats, redirect) */ + SMP_CKP_BE_HRQ_BDY, /* BE HTTP request body */ + SMP_CKP_BE_SET_SRV, /* BE server switching rules ("use_server", "balance", "force-persist", "stick", ...) */ + SMP_CKP_BE_SRV_CON, /* BE server connect (eg: "source") */ + SMP_CKP_BE_RES_CNT, /* BE response content rules ("tcp response content") */ + SMP_CKP_BE_HRS_HDR, /* BE HTTP response headers (rules, headers) */ + SMP_CKP_BE_HRS_BDY, /* BE HTTP response body (stick-store rules are there) */ + SMP_CKP_BE_STO_RUL, /* BE stick-store rules */ + SMP_CKP_FE_RES_CNT, /* FE response content rules ("tcp response content") */ + SMP_CKP_FE_HRS_HDR, /* FE HTTP response headers (rules, headers) */ + SMP_CKP_FE_HRS_BDY, /* FE HTTP response body */ + SMP_CKP_FE_LOG_END, /* FE log at the end of the txn/stream */ + SMP_CKP_BE_CHK_RUL, /* BE tcp-check rules */ + SMP_CKP_CFG_PARSER, /* config parser (i.e. before boot) */ + SMP_CKP_CLI_PARSER, /* command line parser */ + SMP_CKP_ENTRIES /* nothing after this */ +}; + +/* SMP_USE_* are flags used to declare fetch keywords. Fetch methods are + * associated with bitfields composed of these values, generally only one, to + * indicate where the contents may be sampled. Some fetches are ambiguous as + * they apply to either the request or the response depending on the context, + * so they will have 2 of these bits (eg: hdr(), payload(), ...). These are + * stored in smp->use. + */ +enum { + SMP_USE_CONST = 1 << SMP_SRC_CONST, /* constant values known at config time */ + SMP_USE_INTRN = 1 << SMP_SRC_INTRN, /* internal context-less information */ + SMP_USE_LISTN = 1 << SMP_SRC_LISTN, /* listener which accepted the connection */ + SMP_USE_FTEND = 1 << SMP_SRC_FTEND, /* frontend which accepted the connection */ + SMP_USE_L4CLI = 1 << SMP_SRC_L4CLI, /* L4 information about the client */ + SMP_USE_L5CLI = 1 << SMP_SRC_L5CLI, /* fetch uses client information from embryonic session */ + SMP_USE_TRACK = 1 << SMP_SRC_TRACK, /* fetch involves track counters */ + SMP_USE_L6REQ = 1 << SMP_SRC_L6REQ, /* fetch uses raw information from the request buffer */ + SMP_USE_HRQHV = 1 << SMP_SRC_HRQHV, /* fetch uses volatile information about HTTP request headers (eg: value) */ + SMP_USE_HRQHP = 1 << SMP_SRC_HRQHP, /* fetch uses persistent information about HTTP request headers (eg: meth) */ + SMP_USE_HRQBO = 1 << SMP_SRC_HRQBO, /* fetch uses information about HTTP request body */ + SMP_USE_BKEND = 1 << SMP_SRC_BKEND, /* fetch uses information about the backend */ + SMP_USE_SERVR = 1 << SMP_SRC_SERVR, /* fetch uses information about the selected server */ + SMP_USE_L4SRV = 1 << SMP_SRC_L4SRV, /* fetch uses information about the server L4 connection */ + SMP_USE_L5SRV = 1 << SMP_SRC_L5SRV, /* fetch uses information about the server L5 connection */ + SMP_USE_L6RES = 1 << SMP_SRC_L6RES, /* fetch uses raw information from the response buffer */ + SMP_USE_HRSHV = 1 << SMP_SRC_HRSHV, /* fetch uses volatile information about HTTP response headers (eg: value) */ + SMP_USE_HRSHP = 1 << SMP_SRC_HRSHP, /* fetch uses persistent information about HTTP response headers (eg: status) */ + SMP_USE_HRSBO = 1 << SMP_SRC_HRSBO, /* fetch uses information about HTTP response body */ + SMP_USE_RQFIN = 1 << SMP_SRC_RQFIN, /* final information about request buffer (eg: tot bytes) */ + SMP_USE_RSFIN = 1 << SMP_SRC_RSFIN, /* final information about response buffer (eg: tot bytes) */ + SMP_USE_TXFIN = 1 << SMP_SRC_TXFIN, /* final information about the transaction (eg: #comp rate) */ + SMP_USE_SSFIN = 1 << SMP_SRC_SSFIN, /* final information about the stream (eg: #requests, final flags) */ + + /* This composite one is useful to detect if an http_txn needs to be allocated */ + SMP_USE_HTTP_ANY = SMP_USE_HRQHV | SMP_USE_HRQHP | SMP_USE_HRQBO | + SMP_USE_HRSHV | SMP_USE_HRSHP | SMP_USE_HRSBO, +}; + +/* Sample validity is computed from the fetch sources above when keywords + * are registered. Each fetch method may be used at different locations. The + * configuration parser will check whether the fetches are compatible with the + * location where they're used. These are stored in smp->val. + */ +enum { + SMP_VAL___________ = 0, /* Just used as a visual marker */ + SMP_VAL_FE_CON_ACC = 1 << SMP_CKP_FE_CON_ACC, /* FE connection accept rules ("tcp request connection") */ + SMP_VAL_FE_SES_ACC = 1 << SMP_CKP_FE_SES_ACC, /* FE stream accept rules (to come soon) */ + SMP_VAL_FE_REQ_CNT = 1 << SMP_CKP_FE_REQ_CNT, /* FE request content rules ("tcp request content") */ + SMP_VAL_FE_HRQ_HDR = 1 << SMP_CKP_FE_HRQ_HDR, /* FE HTTP request headers (rules, headers, monitor, stats, redirect) */ + SMP_VAL_FE_HRQ_BDY = 1 << SMP_CKP_FE_HRQ_BDY, /* FE HTTP request body */ + SMP_VAL_FE_SET_BCK = 1 << SMP_CKP_FE_SET_BCK, /* FE backend switching rules ("use_backend") */ + SMP_VAL_BE_REQ_CNT = 1 << SMP_CKP_BE_REQ_CNT, /* BE request content rules ("tcp request content") */ + SMP_VAL_BE_HRQ_HDR = 1 << SMP_CKP_BE_HRQ_HDR, /* BE HTTP request headers (rules, headers, monitor, stats, redirect) */ + SMP_VAL_BE_HRQ_BDY = 1 << SMP_CKP_BE_HRQ_BDY, /* BE HTTP request body */ + SMP_VAL_BE_SET_SRV = 1 << SMP_CKP_BE_SET_SRV, /* BE server switching rules ("use_server", "balance", "force-persist", "stick", ...) */ + SMP_VAL_BE_SRV_CON = 1 << SMP_CKP_BE_SRV_CON, /* BE server connect (eg: "source") */ + SMP_VAL_BE_RES_CNT = 1 << SMP_CKP_BE_RES_CNT, /* BE response content rules ("tcp response content") */ + SMP_VAL_BE_HRS_HDR = 1 << SMP_CKP_BE_HRS_HDR, /* BE HTTP response headers (rules, headers) */ + SMP_VAL_BE_HRS_BDY = 1 << SMP_CKP_BE_HRS_BDY, /* BE HTTP response body (stick-store rules are there) */ + SMP_VAL_BE_STO_RUL = 1 << SMP_CKP_BE_STO_RUL, /* BE stick-store rules */ + SMP_VAL_FE_RES_CNT = 1 << SMP_CKP_FE_RES_CNT, /* FE response content rules ("tcp response content") */ + SMP_VAL_FE_HRS_HDR = 1 << SMP_CKP_FE_HRS_HDR, /* FE HTTP response headers (rules, headers) */ + SMP_VAL_FE_HRS_BDY = 1 << SMP_CKP_FE_HRS_BDY, /* FE HTTP response body */ + SMP_VAL_FE_LOG_END = 1 << SMP_CKP_FE_LOG_END, /* FE log at the end of the txn/stream */ + SMP_VAL_BE_CHK_RUL = 1 << SMP_CKP_BE_CHK_RUL, /* BE tcp-check rule */ + SMP_VAL_CFG_PARSER = 1 << SMP_CKP_CFG_PARSER, /* within config parser */ + SMP_VAL_CLI_PARSER = 1 << SMP_CKP_CLI_PARSER, /* within command line parser */ + + /* a few combinations to decide what direction to try to fetch (useful for logs) */ + SMP_VAL_REQUEST = SMP_VAL_FE_CON_ACC | SMP_VAL_FE_SES_ACC | SMP_VAL_FE_REQ_CNT | + SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK | + SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY | + SMP_VAL_BE_SET_SRV | SMP_VAL_BE_CHK_RUL, + + SMP_VAL_RESPONSE = SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT | SMP_VAL_BE_HRS_HDR | + SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL | SMP_VAL_FE_RES_CNT | + SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY | SMP_VAL_FE_LOG_END | + SMP_VAL_BE_CHK_RUL, +}; + +/* Sample fetch options are passed to sample fetch functions to add precision + * about what is desired : + * - fetch direction (req/resp) + * - intermediary / final fetch + */ +enum { + SMP_OPT_DIR_REQ = 0, /* direction = request */ + SMP_OPT_DIR_RES = 1, /* direction = response */ + SMP_OPT_DIR = (SMP_OPT_DIR_REQ|SMP_OPT_DIR_RES), /* mask to get direction */ + SMP_OPT_FINAL = 2, /* final fetch, contents won't change anymore */ + SMP_OPT_ITERATE = 4, /* fetches may be iterated if supported (for ACLs) */ +}; + +/* Flags used to describe fetched samples. MAY_CHANGE indicates that the result + * of the fetch might still evolve, for instance because of more data expected, + * even if the fetch has failed. VOL_* indicates how long a result may be cached. + */ +enum { + SMP_F_NOT_LAST = 1 << 0, /* other occurrences might exist for this sample */ + SMP_F_MAY_CHANGE = 1 << 1, /* sample is unstable and might change (eg: request length) */ + SMP_F_VOL_TEST = 1 << 2, /* result must not survive longer than the test (eg: time) */ + SMP_F_VOL_1ST = 1 << 3, /* result sensitive to changes in first line (eg: URI) */ + SMP_F_VOL_HDR = 1 << 4, /* result sensitive to changes in headers */ + SMP_F_VOL_TXN = 1 << 5, /* result sensitive to new transaction (eg: HTTP version) */ + SMP_F_VOL_SESS = 1 << 6, /* result sensitive to new session (eg: src IP) */ + SMP_F_VOLATILE = (1<<2)|(1<<3)|(1<<4)|(1<<5)|(1<<6), /* any volatility condition */ + SMP_F_CONST = 1 << 7, /* This sample use constant memory. May diplicate it before changes */ +}; + +/* needed below */ +struct session; +struct stream; +struct arg; + +/* a sample context might be used by any sample fetch function in order to + * store information needed across multiple calls (eg: restart point for a + * next occurrence). By definition it may store up to 8 pointers, or any + * scalar (double, int, long long). + */ +union smp_ctx { + void *p; /* any pointer */ + int i; /* any integer */ + long long ll; /* any long long or smaller */ + double d; /* any float or double */ + void *a[8]; /* any array of up to 8 pointers */ +}; + +/* a sample is a typed data extracted from a stream. It has a type, contents, + * validity constraints, a context for use in iterative calls. + */ +struct sample { + unsigned int flags; /* SMP_F_* */ + struct sample_data data; + union smp_ctx ctx; + + /* Some sample analyzer (sample-fetch or converters) needs to + * known the attached proxy, session and stream. The sample-fetches + * and the converters function pointers cannot be called without + * these 3 pointers filled. + */ + struct proxy *px; + struct session *sess; + struct stream *strm; /* WARNING! MAY BE NULL! (eg: tcp-request connection) */ + unsigned int opt; /* fetch options (SMP_OPT_*) */ +}; + +/* Descriptor for a sample conversion */ +struct sample_conv { + const char *kw; /* configuration keyword */ + int (*process)(const struct arg *arg_p, + struct sample *smp, + void *private); /* process function */ + uint64_t arg_mask; /* arguments (ARG*()) */ + int (*val_args)(struct arg *arg_p, + struct sample_conv *smp_conv, + const char *file, int line, + char **err_msg); /* argument validation function */ + unsigned int in_type; /* expected input sample type */ + unsigned int out_type; /* output sample type */ + void *private; /* private values. only used by maps and Lua */ +}; + +/* sample conversion expression */ +struct sample_conv_expr { + struct list list; /* member of a sample_expr */ + struct sample_conv *conv; /* sample conversion used */ + struct arg *arg_p; /* optional arguments */ +}; + +/* Descriptor for a sample fetch method */ +struct sample_fetch { + const char *kw; /* configuration keyword */ + int (*process)(const struct arg *arg_p, + struct sample *smp, + const char *kw, /* fetch processing function */ + void *private); /* private value. */ + uint64_t arg_mask; /* arguments (ARG*()) */ + int (*val_args)(struct arg *arg_p, + char **err_msg); /* argument validation function */ + unsigned long out_type; /* output sample type */ + unsigned int use; /* fetch source (SMP_USE_*) */ + unsigned int val; /* fetch validity (SMP_VAL_*) */ + void *private; /* private values. only used by Lua */ +}; + +/* sample expression */ +struct sample_expr { + struct list list; /* member of list of sample, currently not used */ + struct sample_fetch *fetch; /* sample fetch method */ + struct arg *arg_p; /* optional pointer to arguments to fetch function */ + struct list conv_exprs; /* list of conversion expression to apply */ +}; + +/* sample fetch keywords list */ +struct sample_fetch_kw_list { + struct list list; /* head of sample fetch keyword list */ + struct sample_fetch kw[VAR_ARRAY]; /* array of sample fetch descriptors */ +}; + +/* sample conversion keywords list */ +struct sample_conv_kw_list { + struct list list; /* head of sample conversion keyword list */ + struct sample_conv kw[VAR_ARRAY]; /* array of sample conversion descriptors */ +}; + +typedef int (*sample_cast_fct)(struct sample *smp); + +#endif /* _HAPROXY_SAMPLE_T_H */ diff --git a/include/haproxy/sample.h b/include/haproxy/sample.h new file mode 100644 index 0000000..7e05e78 --- /dev/null +++ b/include/haproxy/sample.h @@ -0,0 +1,186 @@ +/* + * include/haproxy/sample.h + * Functions for samples management. + * + * Copyright (C) 2009-2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr> + * Copyright (C) 2012 Willy Tarreau <w@1wt.eu> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SAMPLE_H +#define _HAPROXY_SAMPLE_H + +#include <haproxy/api.h> +#include <haproxy/arg-t.h> +#include <haproxy/sample-t.h> +#include <haproxy/stick_table-t.h> + +extern sample_cast_fct sample_casts[SMP_TYPES][SMP_TYPES]; +extern const unsigned int fetch_cap[SMP_SRC_ENTRIES]; +extern const char *smp_to_type[SMP_TYPES]; + +struct sample_expr *sample_parse_expr(char **str, int *idx, const char *file, int line, char **err, struct arg_list *al, char **endptr); +int sample_parse_expr_cnv(char **str, int *idx, char **endptr, char **err_msg, struct arg_list *al, const char *file, int line, + struct sample_expr *expr, const char *start); +struct sample_conv *find_sample_conv(const char *kw, int len); +struct sample *sample_process(struct proxy *px, struct session *sess, + struct stream *strm, unsigned int opt, + struct sample_expr *expr, struct sample *p); +int sample_process_cnv(struct sample_expr *expr, struct sample *p); +struct sample *sample_fetch_as_type(struct proxy *px, struct session *sess, + struct stream *strm, unsigned int opt, + struct sample_expr *expr, int smp_type); +int sample_conv_var2smp(const struct var_desc *var, struct sample *smp, int type); +int sample_conv_var2smp_sint(const struct arg *arg, struct sample *smp); +int sample_conv_var2smp_str(const struct arg *arg, struct sample *smp); +void release_sample_expr(struct sample_expr *expr); +void sample_register_fetches(struct sample_fetch_kw_list *psl); +void sample_register_convs(struct sample_conv_kw_list *psl); +const char *sample_src_names(unsigned int use); +const char *sample_ckp_names(unsigned int use); +struct sample_fetch *find_sample_fetch(const char *kw, int len); +void smp_dump_fetch_kw(void); +void smp_dump_conv_kw(void); +struct sample_fetch *sample_fetch_getnext(struct sample_fetch *current, int *idx); +struct sample_conv *sample_conv_getnext(struct sample_conv *current, int *idx); +int smp_resolve_args(struct proxy *p, char **err); +int smp_check_date_unit(struct arg *args, char **err); +int smp_expr_output_type(struct sample_expr *expr); +int c_none(struct sample *smp); +int c_pseudo(struct sample *smp); +int smp_dup(struct sample *smp); + +/* + * This function just apply a cast on sample. It returns 0 if the cast is not + * available or if the cast fails, otherwise returns 1. It does not modify the + * input sample on failure. + */ +static inline +int sample_convert(struct sample *sample, int req_type) +{ + if (!sample_casts[sample->data.type][req_type]) + return 0; + if (sample_casts[sample->data.type][req_type] == c_none) + return 1; + return sample_casts[sample->data.type][req_type](sample); +} + +static inline +struct sample *smp_set_owner(struct sample *smp, struct proxy *px, + struct session *sess, struct stream *strm, int opt) +{ + smp->px = px; + smp->sess = sess; + smp->strm = strm; + smp->opt = opt; + return smp; +} + + +/* Returns 1 if a sample may be safely used. It performs a few checks on the + * string length versus size, same for the binary version, and ensures that + * strings are properly terminated by a zero. If this last point is not granted + * but the string is not const, then the \0 is appended. Otherwise it returns 0, + * meaning the caller may need to call smp_dup() before going further. + */ +static inline +int smp_is_safe(struct sample *smp) +{ + switch (smp->data.type) { + case SMP_T_METH: + if (smp->data.u.meth.meth != HTTP_METH_OTHER) + return 1; + __fallthrough; + + case SMP_T_STR: + if (!smp->data.u.str.size || smp->data.u.str.data >= smp->data.u.str.size) + return 0; + + if (smp->data.u.str.area[smp->data.u.str.data] == 0) + return 1; + + if (smp->flags & SMP_F_CONST) + return 0; + + smp->data.u.str.area[smp->data.u.str.data] = 0; + return 1; + + case SMP_T_BIN: + return !smp->data.u.str.size || smp->data.u.str.data <= smp->data.u.str.size; + + default: + return 1; + } +} + +/* checks that a sample may freely be used, or duplicates it to normalize it. + * Returns 1 on success, 0 if the sample must not be used. The function also + * checks for NULL to simplify the calling code. + */ +static inline +int smp_make_safe(struct sample *smp) +{ + return smp && (smp_is_safe(smp) || smp_dup(smp)); +} + +/* Returns 1 if a sample may be safely modified in place. It performs a few + * checks on the string length versus size, same for the binary version, and + * ensures that strings are properly terminated by a zero, and of course that + * the size is allocate and that the SMP_F_CONST flag is not set. If only the + * trailing zero is missing, it is appended. Otherwise it returns 0, meaning + * the caller may need to call smp_dup() before going further. + */ +static inline +int smp_is_rw(struct sample *smp) +{ + if (smp->flags & SMP_F_CONST) + return 0; + + switch (smp->data.type) { + case SMP_T_METH: + if (smp->data.u.meth.meth != HTTP_METH_OTHER) + return 1; + __fallthrough; + + case SMP_T_STR: + if (!smp->data.u.str.size || + smp->data.u.str.data >= smp->data.u.str.size) + return 0; + + if (smp->data.u.str.area[smp->data.u.str.data] != 0) + smp->data.u.str.area[smp->data.u.str.data] = 0; + return 1; + + case SMP_T_BIN: + return smp->data.u.str.size && + smp->data.u.str.data <= smp->data.u.str.size; + + default: + return 1; + } +} + +/* checks that a sample may freely be modified, or duplicates it to normalize + * it and make it R/W. Returns 1 on success, 0 if the sample must not be used. + * The function also checks for NULL to simplify the calling code. + */ +static inline +int smp_make_rw(struct sample *smp) +{ + return smp && (smp_is_rw(smp) || smp_dup(smp)); +} + +#endif /* _HAPROXY_SAMPLE_H */ diff --git a/include/haproxy/sample_data-t.h b/include/haproxy/sample_data-t.h new file mode 100644 index 0000000..2546028 --- /dev/null +++ b/include/haproxy/sample_data-t.h @@ -0,0 +1,51 @@ +/* + * include/haproxy/sample_data-t.h + * Definitions of sample data + * + * Copyright (C) 2009-2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr> + * Copyright (C) 2020 Willy Tarreau <w@1wt.eu> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SAMPLE_DATA_T_H +#define _HAPROXY_SAMPLE_DATA_T_H + +#include <sys/socket.h> +#include <netinet/in.h> +#include <haproxy/buf-t.h> +#include <haproxy/http-t.h> + +/* Note: the strings below make use of chunks. Chunks may carry an allocated + * size in addition to the length. The size counts from the beginning (str) + * to the end. If the size is unknown, it MUST be zero, in which case the + * sample will automatically be duplicated when a change larger than <len> has + * to be performed. Thus it is safe to always set size to zero. + */ +union sample_value { + long long int sint; /* used for signed 64bits integers */ + struct in_addr ipv4; /* used for ipv4 addresses */ + struct in6_addr ipv6; /* used for ipv6 addresses */ + struct buffer str; /* used for char strings or buffers */ + struct http_meth meth; /* used for http method */ +}; + +/* Used to store sample constant */ +struct sample_data { + int type; /* SMP_T_* */ + union sample_value u; /* sample data */ +}; + +#endif /* _HAPROXY_SAMPLE_DATA_T_H */ diff --git a/include/haproxy/sc_strm.h b/include/haproxy/sc_strm.h new file mode 100644 index 0000000..41f07e9 --- /dev/null +++ b/include/haproxy/sc_strm.h @@ -0,0 +1,447 @@ +/* + * include/haproxy/sc_strm.h + * This file contains stream-specific stream-connector functions prototypes + * + * Copyright 2022 Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SC_STRM_H +#define _HAPROXY_SC_STRM_H + +#include <haproxy/api.h> +#include <haproxy/buf-t.h> +#include <haproxy/channel-t.h> +#include <haproxy/stream-t.h> +#include <haproxy/task-t.h> +#include <haproxy/connection.h> +#include <haproxy/channel.h> +#include <haproxy/session.h> +#include <haproxy/stconn.h> +#include <haproxy/stream.h> + +void sc_update_rx(struct stconn *sc); +void sc_update_tx(struct stconn *sc); + +struct task *sc_conn_io_cb(struct task *t, void *ctx, unsigned int state); +int sc_conn_sync_recv(struct stconn *sc); +void sc_conn_sync_send(struct stconn *sc); + + +/* returns the channel which receives data from this stream connector (input channel) */ +static inline struct channel *sc_ic(const struct stconn *sc) +{ + struct stream *strm = __sc_strm(sc); + + return ((sc->flags & SC_FL_ISBACK) ? &(strm->res) : &(strm->req)); +} + +/* returns the channel which feeds data to this stream connector (output channel) */ +static inline struct channel *sc_oc(const struct stconn *sc) +{ + struct stream *strm = __sc_strm(sc); + + return ((sc->flags & SC_FL_ISBACK) ? &(strm->req) : &(strm->res)); +} + +/* returns the buffer which receives data from this stream connector (input channel's buffer) */ +static inline struct buffer *sc_ib(const struct stconn *sc) +{ + return &sc_ic(sc)->buf; +} + +/* returns the buffer which feeds data to this stream connector (output channel's buffer) */ +static inline struct buffer *sc_ob(const struct stconn *sc) +{ + return &sc_oc(sc)->buf; +} +/* returns the stream's task associated to this stream connector */ +static inline struct task *sc_strm_task(const struct stconn *sc) +{ + struct stream *strm = __sc_strm(sc); + + return strm->task; +} + +/* returns the stream connector on the other side. Used during forwarding. */ +static inline struct stconn *sc_opposite(const struct stconn *sc) +{ + struct stream *strm = __sc_strm(sc); + + return ((sc->flags & SC_FL_ISBACK) ? strm->scf : strm->scb); +} + + +/* sets the current and previous state of a stream connector to <state>. This is + * mainly used to create one in the established state on incoming connections. + */ +static inline void sc_set_state(struct stconn *sc, int state) +{ + sc->state = __sc_strm(sc)->prev_conn_state = state; +} + +/* returns a bit for a stream connector state, to match against SC_SB_* */ +static inline enum sc_state_bit sc_state_bit(enum sc_state state) +{ + BUG_ON(state > SC_ST_CLO); + return 1U << state; +} + +/* returns true if <state> matches one of the SC_SB_* bits in <mask> */ +static inline int sc_state_in(enum sc_state state, enum sc_state_bit mask) +{ + BUG_ON(mask & ~SC_SB_ALL); + return !!(sc_state_bit(state) & mask); +} + +/* Returns true if a connection is attached to the stream connector <sc> and if this + * connection is ready. + */ +static inline int sc_conn_ready(const struct stconn *sc) +{ + const struct connection *conn = sc_conn(sc); + + return conn && conn_ctrl_ready(conn) && conn_xprt_ready(conn); +} + + +/* The stream connector is only responsible for the connection during the early + * states, before plugging a mux. Thus it should only care about CO_FL_ERROR + * before SC_ST_EST, and after that it must absolutely ignore it since the mux + * may hold pending data. This function returns true if such an error was + * reported. Both the SC and the CONN must be valid. + */ +static inline int sc_is_conn_error(const struct stconn *sc) +{ + const struct connection *conn; + + if (sc->state >= SC_ST_EST) + return 0; + + conn = __sc_conn(sc); + BUG_ON(!conn); + return !!(conn->flags & CO_FL_ERROR); +} + +/* Try to allocate a buffer for the stream connector's input channel. It relies on + * channel_alloc_buffer() for this so it abides by its rules. It returns 0 on + * failure, non-zero otherwise. If no buffer is available, the requester, + * represented by the <wait> pointer, will be added in the list of objects + * waiting for an available buffer, and SC_FL_NEED_BUFF will be set on the + * stream connector and SE_FL_HAVE_NO_DATA cleared. The requester will be responsible + * for calling this function to try again once woken up. + */ +static inline int sc_alloc_ibuf(struct stconn *sc, struct buffer_wait *wait) +{ + int ret; + + ret = channel_alloc_buffer(sc_ic(sc), wait); + if (!ret) + sc_need_buff(sc); + return ret; +} + + +/* Returns the source address of the stream connector and, if not set, fallbacks on + * the session for frontend SC and the server connection for the backend SC. It + * returns a const address on success or NULL on failure. + */ +static inline const struct sockaddr_storage *sc_src(const struct stconn *sc) +{ + if (sc->src) + return sc->src; + if (!(sc->flags & SC_FL_ISBACK)) + return sess_src(strm_sess(__sc_strm(sc))); + else { + struct connection *conn = sc_conn(sc); + + if (conn) + return conn_src(conn); + } + return NULL; +} + + +/* Returns the destination address of the stream connector and, if not set, fallbacks + * on the session for frontend SC and the server connection for the backend + * SC. It returns a const address on success or NULL on failure. + */ +static inline const struct sockaddr_storage *sc_dst(const struct stconn *sc) +{ + if (sc->dst) + return sc->dst; + if (!(sc->flags & SC_FL_ISBACK)) + return sess_dst(strm_sess(__sc_strm(sc))); + else { + struct connection *conn = sc_conn(sc); + + if (conn) + return conn_dst(conn); + } + return NULL; +} + +/* Retrieves the source address of the stream connector. Returns non-zero on success + * or zero on failure. The operation is only performed once and the address is + * stored in the stream connector for future use. On the first call, the stream connector + * source address is copied from the session one for frontend SC and the server + * connection for the backend SC. + */ +static inline int sc_get_src(struct stconn *sc) +{ + const struct sockaddr_storage *src = NULL; + + if (sc->src) + return 1; + + if (!(sc->flags & SC_FL_ISBACK)) + src = sess_src(strm_sess(__sc_strm(sc))); + else { + struct connection *conn = sc_conn(sc); + + if (conn) + src = conn_src(conn); + } + if (!src) + return 0; + + if (!sockaddr_alloc(&sc->src, src, sizeof(*src))) + return 0; + + return 1; +} + +/* Retrieves the destination address of the stream connector. Returns non-zero on + * success or zero on failure. The operation is only performed once and the + * address is stored in the stream connector for future use. On the first call, the + * stream connector destination address is copied from the session one for frontend + * SC and the server connection for the backend SC. + */ +static inline int sc_get_dst(struct stconn *sc) +{ + const struct sockaddr_storage *dst = NULL; + + if (sc->dst) + return 1; + + if (!(sc->flags & SC_FL_ISBACK)) + dst = sess_dst(strm_sess(__sc_strm(sc))); + else { + struct connection *conn = sc_conn(sc); + + if (conn) + dst = conn_dst(conn); + } + if (!dst) + return 0; + + if (!sockaddr_alloc(&sc->dst, dst, sizeof(*dst))) + return 0; + + return 1; +} + + +/* Marks on the stream connector that next shutdown must kill the whole connection */ +static inline void sc_must_kill_conn(struct stconn *sc) +{ + sc_ep_set(sc, SE_FL_KILL_CONN); +} + + +/* Returns non-zero if the stream connector is allowed to receive from the + * endpoint, which means that no flag indicating a blocked channel, lack of + * buffer or room is set, and that the endpoint is not waiting for the + * application to complete a connection setup on the other side, and that + * the stream's channel is not shut for reads. This is only used by stream + * applications. + */ +__attribute__((warn_unused_result)) +static inline int sc_is_recv_allowed(const struct stconn *sc) +{ + if (sc->flags & (SC_FL_ABRT_DONE|SC_FL_EOS)) + return 0; + + if (sc_ep_test(sc, SE_FL_APPLET_NEED_CONN)) + return 0; + + if (sc_ep_test(sc, SE_FL_HAVE_NO_DATA)) + return 0; + + if (sc_ep_test(sc, SE_FL_MAY_FASTFWD_PROD) && (sc_opposite(sc)->sedesc->iobuf.flags & IOBUF_FL_FF_BLOCKED)) + return 0; + + return !(sc->flags & (SC_FL_WONT_READ|SC_FL_NEED_BUFF|SC_FL_NEED_ROOM)); +} + +/* This is to be used after making some room available in a channel. It will + * return without doing anything if the stream connector's RX path is blocked. + * It will automatically mark the stream connector as busy processing the end + * point in order to avoid useless repeated wakeups. + * It will then call ->chk_rcv() to enable receipt of new data. + */ +static inline void sc_chk_rcv(struct stconn *sc) +{ + if (sc_ep_test(sc, SE_FL_APPLET_NEED_CONN) && + sc_state_in(sc_opposite(sc)->state, SC_SB_RDY|SC_SB_EST|SC_SB_DIS|SC_SB_CLO)) { + sc_ep_clr(sc, SE_FL_APPLET_NEED_CONN); + sc_ep_report_read_activity(sc); + } + + if (!sc_is_recv_allowed(sc)) + return; + + if (!sc_state_in(sc->state, SC_SB_RDY|SC_SB_EST)) + return; + + sc_ep_set(sc, SE_FL_HAVE_NO_DATA); + if (likely(sc->app_ops->chk_rcv)) + sc->app_ops->chk_rcv(sc); +} + +/* Calls chk_snd on the endpoint using the data layer */ +static inline void sc_chk_snd(struct stconn *sc) +{ + if (likely(sc->app_ops->chk_snd)) + sc->app_ops->chk_snd(sc); +} + +/* Combines both sc_update_rx() and sc_update_tx() at once */ +static inline void sc_update(struct stconn *sc) +{ + sc_update_rx(sc); + sc_update_tx(sc); +} + +/* for debugging, reports the stream connector state name */ +static inline const char *sc_state_str(int state) +{ + switch (state) { + case SC_ST_INI: return "INI"; + case SC_ST_REQ: return "REQ"; + case SC_ST_QUE: return "QUE"; + case SC_ST_TAR: return "TAR"; + case SC_ST_ASS: return "ASS"; + case SC_ST_CON: return "CON"; + case SC_ST_CER: return "CER"; + case SC_ST_RDY: return "RDY"; + case SC_ST_EST: return "EST"; + case SC_ST_DIS: return "DIS"; + case SC_ST_CLO: return "CLO"; + default: return "???"; + } +} + +/* indicates if the connector may send data to the endpoint, that is, the + * endpoint is both willing to receive data and ready to do so. This is only + * used with applets so there's always a stream attached to this connector. + */ +__attribute__((warn_unused_result)) +static inline int sc_is_send_allowed(const struct stconn *sc) +{ + if (sc->flags & SC_FL_SHUT_DONE) + return 0; + + return !sc_ep_test(sc, SE_FL_WAIT_DATA | SE_FL_WONT_CONSUME); +} + +static inline int sc_rcv_may_expire(const struct stconn *sc) +{ + if ((sc->flags & (SC_FL_ABRT_DONE|SC_FL_EOS)) || (sc_ic(sc)->flags & CF_READ_TIMEOUT)) + return 0; + if (sc->flags & (SC_FL_EOI|SC_FL_WONT_READ|SC_FL_NEED_BUFF|SC_FL_NEED_ROOM)) + return 0; + if (sc_ep_test(sc, SE_FL_APPLET_NEED_CONN) || sc_ep_test(sc_opposite(sc), SE_FL_EXP_NO_DATA)) + return 0; + return 1; +} + +static inline int sc_snd_may_expire(const struct stconn *sc) +{ + if ((sc->flags & SC_FL_SHUT_DONE) || (sc_oc(sc)->flags & CF_WRITE_TIMEOUT)) + return 0; + if (sc_ep_test(sc, SE_FL_WONT_CONSUME)) + return 0; + return 1; +} + +static forceinline int sc_ep_rcv_ex(const struct stconn *sc) +{ + return ((tick_isset(sc->sedesc->lra) && sc_rcv_may_expire(sc)) + ? tick_add_ifset(sc->sedesc->lra, sc->ioto) + : TICK_ETERNITY); +} + +static forceinline int sc_ep_snd_ex(const struct stconn *sc) +{ + return ((tick_isset(sc->sedesc->fsb) && sc_snd_may_expire(sc)) + ? tick_add_ifset(sc->sedesc->fsb, sc->ioto) + : TICK_ETERNITY); +} + +static inline void sc_check_timeouts(const struct stconn *sc) +{ + if (unlikely(tick_is_expired(sc_ep_rcv_ex(sc), now_ms))) + sc_ic(sc)->flags |= CF_READ_TIMEOUT; + if (unlikely(tick_is_expired(sc_ep_snd_ex(sc), now_ms))) + sc_oc(sc)->flags |= CF_WRITE_TIMEOUT; +} + +static inline void sc_set_hcto(struct stconn *sc) +{ + struct stream *strm = __sc_strm(sc); + + if (IS_HTX_STRM(strm)) + return; + + if (sc->flags & SC_FL_ISBACK) { + if ((strm->flags & SF_BE_ASSIGNED) && tick_isset(strm->be->timeout.serverfin)) + sc->ioto = strm->be->timeout.serverfin; + } + else { + if (tick_isset(strm_fe(strm)->timeout.clientfin)) + sc->ioto = strm_fe(strm)->timeout.clientfin; + } + +} + +/* Schedule an abort for the SC */ +static inline void sc_schedule_abort(struct stconn *sc) +{ + sc->flags |= SC_FL_ABRT_WANTED; +} + +/* Abort the SC and notify the endpoint using the data layer */ +static inline void sc_abort(struct stconn *sc) +{ + if (likely(sc->app_ops->abort)) + sc->app_ops->abort(sc); +} + +/* Schedule a shutdown for the SC */ +static inline void sc_schedule_shutdown(struct stconn *sc) +{ + sc->flags |= SC_FL_SHUT_WANTED; +} + +/* Shutdown the SC and notify the endpoint using the data layer */ +static inline void sc_shutdown(struct stconn *sc) +{ + if (likely(sc->app_ops->shutdown)) + sc->app_ops->shutdown(sc); +} + +#endif /* _HAPROXY_SC_STRM_H */ diff --git a/include/haproxy/server-t.h b/include/haproxy/server-t.h new file mode 100644 index 0000000..666d2cc --- /dev/null +++ b/include/haproxy/server-t.h @@ -0,0 +1,681 @@ +/* + * include/haproxy/server-t.h + * This file defines everything related to servers. + * + * Copyright (C) 2000-2012 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SERVER_T_H +#define _HAPROXY_SERVER_T_H + +#include <netinet/in.h> +#include <arpa/inet.h> + +#include <import/ebtree-t.h> + +#include <haproxy/api-t.h> +#include <haproxy/check-t.h> +#include <haproxy/connection-t.h> +#include <haproxy/counters-t.h> +#include <haproxy/freq_ctr-t.h> +#include <haproxy/listener-t.h> +#include <haproxy/obj_type-t.h> +#include <haproxy/queue-t.h> +#include <haproxy/quic_tp-t.h> +#include <haproxy/resolvers-t.h> +#include <haproxy/stats-t.h> +#include <haproxy/task-t.h> +#include <haproxy/thread-t.h> +#include <haproxy/event_hdl-t.h> +#include <haproxy/tools-t.h> + + +/* server states. Only SRV_ST_STOPPED indicates a down server. */ +enum srv_state { + SRV_ST_STOPPED = 0, /* the server is down. Please keep set to zero. */ + SRV_ST_STARTING, /* the server is warming up (up but throttled) */ + SRV_ST_RUNNING, /* the server is fully up */ + SRV_ST_STOPPING, /* the server is up but soft-stopping (eg: 404) */ +} __attribute__((packed)); + +/* Administrative status : a server runs in one of these 3 stats : + * - READY : normal mode + * - DRAIN : takes no new visitor, equivalent to weight == 0 + * - MAINT : maintenance mode, no more traffic nor health checks. + * + * Each server may be in maintenance by itself or may inherit this status from + * another server it tracks. It can also be in drain mode by itself or inherit + * it from another server. Let's store these origins here as flags. These flags + * are combined this way : + * + * FMAINT IMAINT FDRAIN IDRAIN Resulting state + * 0 0 0 0 READY + * 0 0 0 1 DRAIN + * 0 0 1 x DRAIN + * 0 1 x x MAINT + * 1 x x x MAINT + * + * This can be simplified this way : + * + * state_str = (state & MAINT) ? "MAINT" : (state & DRAIN) : "DRAIN" : "READY" + */ +enum srv_admin { + SRV_ADMF_FMAINT = 0x01, /* the server was explicitly forced into maintenance */ + SRV_ADMF_IMAINT = 0x02, /* the server has inherited the maintenance status from a tracked server */ + SRV_ADMF_MAINT = 0x23, /* mask to check if any maintenance flag is present */ + SRV_ADMF_CMAINT = 0x04, /* the server is in maintenance because of the configuration */ + SRV_ADMF_FDRAIN = 0x08, /* the server was explicitly forced into drain state */ + SRV_ADMF_IDRAIN = 0x10, /* the server has inherited the drain status from a tracked server */ + SRV_ADMF_DRAIN = 0x18, /* mask to check if any drain flag is present */ + SRV_ADMF_RMAINT = 0x20, /* the server is down because of an IP address resolution failure */ + SRV_ADMF_HMAINT = 0x40, /* the server FQDN has been set from socket stats */ +} __attribute__((packed)); + +/* options for servers' "init-addr" parameter + * this parameter may be used to drive HAProxy's behavior when parsing a server + * address at start up time. + * These values are stored as a list into an integer ordered from first to last + * starting with the lowest to highest bits. SRV_IADDR_END (0) is used to + * indicate the end of the list. 3 bits are enough to store each value. + */ +enum srv_initaddr { + SRV_IADDR_END = 0, /* end of the list */ + SRV_IADDR_NONE = 1, /* the server won't have any address at start up */ + SRV_IADDR_LIBC = 2, /* address set using the libc DNS resolver */ + SRV_IADDR_LAST = 3, /* we set the IP address found in state-file for this server */ + SRV_IADDR_IP = 4, /* we set an arbitrary IP address to the server */ +} __attribute__((packed)); + +/* server-state-file version */ +#define SRV_STATE_FILE_VERSION 1 +#define SRV_STATE_FILE_VERSION_MIN 1 +#define SRV_STATE_FILE_VERSION_MAX 1 +#define SRV_STATE_FILE_FIELD_NAMES \ + "be_id " \ + "be_name " \ + "srv_id " \ + "srv_name " \ + "srv_addr " \ + "srv_op_state " \ + "srv_admin_state " \ + "srv_uweight " \ + "srv_iweight " \ + "srv_time_since_last_change " \ + "srv_check_status " \ + "srv_check_result " \ + "srv_check_health " \ + "srv_check_state " \ + "srv_agent_state " \ + "bk_f_forced_id " \ + "srv_f_forced_id " \ + "srv_fqdn " \ + "srv_port " \ + "srvrecord " \ + "srv_use_ssl " \ + "srv_check_port " \ + "srv_check_addr " \ + "srv_agent_addr " \ + "srv_agent_port" + +#define SRV_STATE_FILE_MAX_FIELDS 25 +#define SRV_STATE_FILE_MIN_FIELDS_VERSION_1 20 +#define SRV_STATE_FILE_MAX_FIELDS_VERSION_1 25 +#define SRV_STATE_LINE_MAXLEN 2000 + +/* server flags -- 32 bits */ +#define SRV_F_BACKUP 0x0001 /* this server is a backup server */ +#define SRV_F_MAPPORTS 0x0002 /* this server uses mapped ports */ +#define SRV_F_NON_STICK 0x0004 /* never add connections allocated to this server to a stick table */ +#define SRV_F_USE_NS_FROM_PP 0x0008 /* use namespace associated with connection if present */ +#define SRV_F_FORCED_ID 0x0010 /* server's ID was forced in the configuration */ +#define SRV_F_RHTTP 0x0020 /* reverse HTTP server which requires idle connection for transfers */ +#define SRV_F_AGENTPORT 0x0040 /* this server has a agent port configured */ +#define SRV_F_AGENTADDR 0x0080 /* this server has a agent addr configured */ +#define SRV_F_COOKIESET 0x0100 /* this server has a cookie configured, so don't generate dynamic cookies */ +#define SRV_F_FASTOPEN 0x0200 /* Use TCP Fast Open to connect to server */ +#define SRV_F_SOCKS4_PROXY 0x0400 /* this server uses SOCKS4 proxy */ +#define SRV_F_NO_RESOLUTION 0x0800 /* disable runtime DNS resolution on this server */ +#define SRV_F_DYNAMIC 0x1000 /* dynamic server instantiated at runtime */ +#define SRV_F_NON_PURGEABLE 0x2000 /* this server cannot be removed at runtime */ +#define SRV_F_DEFSRV_USE_SSL 0x4000 /* default-server uses SSL */ +#define SRV_F_DELETED 0x8000 /* srv is deleted but not yet purged */ + +/* configured server options for send-proxy (server->pp_opts) */ +#define SRV_PP_V1 0x0001 /* proxy protocol version 1 */ +#define SRV_PP_V2 0x0002 /* proxy protocol version 2 */ +#define SRV_PP_V2_SSL 0x0004 /* proxy protocol version 2 with SSL */ +#define SRV_PP_V2_SSL_CN 0x0008 /* proxy protocol version 2 with CN */ +#define SRV_PP_V2_SSL_KEY_ALG 0x0010 /* proxy protocol version 2 with cert key algorithm */ +#define SRV_PP_V2_SSL_SIG_ALG 0x0020 /* proxy protocol version 2 with cert signature algorithm */ +#define SRV_PP_V2_SSL_CIPHER 0x0040 /* proxy protocol version 2 with cipher used */ +#define SRV_PP_V2_AUTHORITY 0x0080 /* proxy protocol version 2 with authority */ +#define SRV_PP_V2_CRC32C 0x0100 /* proxy protocol version 2 with crc32c */ +#define SRV_PP_V2_UNIQUE_ID 0x0200 /* proxy protocol version 2 with unique ID */ + +/* function which act on servers need to return various errors */ +#define SRV_STATUS_OK 0 /* everything is OK. */ +#define SRV_STATUS_INTERNAL 1 /* other unrecoverable errors. */ +#define SRV_STATUS_NOSRV 2 /* no server is available */ +#define SRV_STATUS_FULL 3 /* the/all server(s) are saturated */ +#define SRV_STATUS_QUEUED 4 /* the/all server(s) are saturated but the connection was queued */ + +/* various constants */ +#define SRV_UWGHT_RANGE 256 +#define SRV_UWGHT_MAX (SRV_UWGHT_RANGE) +#define SRV_EWGHT_RANGE (SRV_UWGHT_RANGE * BE_WEIGHT_SCALE) +#define SRV_EWGHT_MAX (SRV_UWGHT_MAX * BE_WEIGHT_SCALE) + +/* server ssl options */ +#define SRV_SSL_O_NONE 0x0000 +#define SRV_SSL_O_NO_TLS_TICKETS 0x0100 /* disable session resumption tickets */ +#define SRV_SSL_O_NO_REUSE 0x200 /* disable session reuse */ +#define SRV_SSL_O_EARLY_DATA 0x400 /* Allow using early data */ + +/* log servers ring's protocols options */ +enum srv_log_proto { + SRV_LOG_PROTO_LEGACY, // messages on TCP separated by LF + SRV_LOG_PROTO_OCTET_COUNTING, // TCP frames: MSGLEN SP MSG +}; + +/* srv administrative change causes */ +enum srv_adm_st_chg_cause { + SRV_ADM_STCHGC_NONE = 0, + SRV_ADM_STCHGC_DNS_NOENT, /* entry removed from srv record */ + SRV_ADM_STCHGC_DNS_NOIP, /* no server ip in the srv record */ + SRV_ADM_STCHGC_DNS_NX, /* resolution spent too much time in NX state */ + SRV_ADM_STCHGC_DNS_TIMEOUT, /* resolution timeout */ + SRV_ADM_STCHGC_DNS_REFUSED, /* query refused by dns server */ + SRV_ADM_STCHGC_DNS_UNSPEC, /* unspecified dns error */ + SRV_ADM_STCHGC_STATS_DISABLE, /* legacy disable from the stats */ + SRV_ADM_STCHGC_STATS_STOP /* legacy stop from the stats */ +}; + +/* srv operational change causes */ +enum srv_op_st_chg_cause { + SRV_OP_STCHGC_NONE = 0, + SRV_OP_STCHGC_HEALTH, /* changed from a health check */ + SRV_OP_STCHGC_AGENT, /* changed from an agent check */ + SRV_OP_STCHGC_CLI, /* changed from the cli */ + SRV_OP_STCHGC_LUA, /* changed from lua */ + SRV_OP_STCHGC_STATS_WEB, /* changed from the web interface */ + SRV_OP_STCHGC_STATEFILE /* changed from state file */ +}; + +struct pid_list { + struct list list; + pid_t pid; + struct task *t; + int status; + int exited; +}; + +/* A tree occurrence is a descriptor of a place in a tree, with a pointer back + * to the server itself. + */ +struct server; +struct tree_occ { + struct server *server; + struct eb32_node node; +}; + +/* Each server will have one occurrence of this structure per thread */ +struct srv_per_thread { + struct mt_list streams; /* streams using this server (used by "shutdown server sessions") */ + struct eb_root idle_conns; /* Shareable idle connections */ + struct eb_root safe_conns; /* Safe idle connections */ + struct eb_root avail_conns; /* Connections in use, but with still new streams available */ + + /* Secondary idle conn storage used in parallel to idle/safe trees. + * Used to sort them by last usage and purge them in reverse order. + */ + struct list idle_conn_list; +}; + +/* Each server will have one occurrence of this structure per thread group */ +struct srv_per_tgroup { + unsigned int next_takeover; /* thread ID to try to steal connections from next time */ +}; + +/* Configure the protocol selection for websocket */ +enum __attribute__((__packed__)) srv_ws_mode { + SRV_WS_AUTO = 0, + SRV_WS_H1, + SRV_WS_H2, +}; + +/* Server-side TLV list, contains the types of the TLVs that should be sent out. + * Additionally, it can contain a format string, if specified in the config. + */ +struct srv_pp_tlv_list { + struct list list; + struct list fmt; + char *fmt_string; + unsigned char type; +}; + +struct proxy; +struct server { + /* mostly config or admin stuff, doesn't change often */ + enum obj_type obj_type; /* object type == OBJ_TYPE_SERVER */ + enum srv_state next_state, cur_state; /* server state among SRV_ST_* */ + enum srv_admin next_admin, cur_admin; /* server maintenance status : SRV_ADMF_* */ + signed char use_ssl; /* ssl enabled (1: on, 0: disabled, -1 forced off) */ + unsigned int flags; /* server flags (SRV_F_*) */ + unsigned int pp_opts; /* proxy protocol options (SRV_PP_*) */ + struct list global_list; /* attach point in the global servers_list */ + struct server *next; + struct mt_list prev_deleted; /* deleted servers with 'next' ptr pointing to us */ + int cklen; /* the len of the cookie, to speed up checks */ + int rdr_len; /* the length of the redirection prefix */ + char *cookie; /* the id set in the cookie */ + char *rdr_pfx; /* the redirection prefix */ + + struct proxy *proxy; /* the proxy this server belongs to */ + const struct mux_proto_list *mux_proto; /* the mux to use for all outgoing connections (specified by the "proto" keyword) */ + struct net_addr_type addr_type; /* server address type (socket and transport hints) */ + struct log_target *log_target; /* when 'mode log' is enabled, target facility used to transport log messages */ + unsigned maxconn, minconn; /* max # of active sessions (0 = unlimited), min# for dynamic limit. */ + struct srv_per_thread *per_thr; /* array of per-thread stuff such as connections lists */ + struct srv_per_tgroup *per_tgrp; /* array of per-tgroup stuff such as idle conns */ + unsigned int *curr_idle_thr; /* Current number of orphan idling connections per thread */ + + unsigned int pool_purge_delay; /* Delay before starting to purge the idle conns pool */ + unsigned int low_idle_conns; /* min idle connection count to start picking from other threads */ + unsigned int max_idle_conns; /* Max number of connection allowed in the orphan connections list */ + int max_reuse; /* Max number of requests on a same connection */ + struct task *warmup; /* the task dedicated to the warmup when slowstart is set */ + + struct server *track; /* the server we're currently tracking, if any */ + struct server *trackers; /* the list of servers tracking us, if any */ + struct server *tracknext; /* next server tracking <track> in <track>'s trackers list */ + char *trackit; /* temporary variable to make assignment deferrable */ + int consecutive_errors_limit; /* number of consecutive errors that triggers an event */ + short observe, onerror; /* observing mode: one of HANA_OBS_*; what to do on error: on of ANA_ONERR_* */ + short onmarkeddown; /* what to do when marked down: one of HANA_ONMARKEDDOWN_* */ + short onmarkedup; /* what to do when marked up: one of HANA_ONMARKEDUP_* */ + int slowstart; /* slowstart time in seconds (ms in the conf) */ + + char *id; /* just for identification */ + uint32_t rid; /* revision: if id has been reused for a new server, rid won't match */ + unsigned iweight,uweight, cur_eweight; /* initial weight, user-specified weight, and effective weight */ + unsigned wscore; /* weight score, used during srv map computation */ + unsigned next_eweight; /* next pending eweight to commit */ + unsigned rweight; /* remainder of weight in the current LB tree */ + unsigned cumulative_weight; /* weight of servers prior to this one in the same group, for chash balancing */ + int maxqueue; /* maximum number of pending connections allowed */ + int shard; /* shard (in peers protocol context only) */ + int log_bufsize; /* implicit ring bufsize (for log server only - in log backend) */ + + enum srv_ws_mode ws; /* configure the protocol selection for websocket */ + /* 3 bytes hole here */ + + uint refcount; /* refcount used to remove a server at runtime */ + + /* The elements below may be changed on every single request by any + * thread, and generally at the same time. + */ + THREAD_PAD(63); + struct eb32_node idle_node; /* When to next do cleanup in the idle connections */ + unsigned int curr_idle_conns; /* Current number of orphan idling connections, both the idle and the safe lists */ + unsigned int curr_idle_nb; /* Current number of connections in the idle list */ + unsigned int curr_safe_nb; /* Current number of connections in the safe list */ + unsigned int curr_used_conns; /* Current number of used connections */ + unsigned int max_used_conns; /* Max number of used connections (the counter is reset at each connection purges */ + unsigned int est_need_conns; /* Estimate on the number of needed connections (max of curr and previous max_used) */ + + struct queue queue; /* pending connections */ + + /* Element below are usd by LB algorithms and must be doable in + * parallel to other threads reusing connections above. + */ + THREAD_PAD(63); + __decl_thread(HA_SPINLOCK_T lock); /* may enclose the proxy's lock, must not be taken under */ + unsigned npos, lpos; /* next and last positions in the LB tree, protected by LB lock */ + union { + struct eb32_node lb_node; /* node used for tree-based load balancing */ + struct list lb_list; /* elem used for list-based load balancing */ + }; + struct server *next_full; /* next server in the temporary full list */ + + /* usually atomically updated by any thread during parsing or on end of request */ + THREAD_PAD(63); + int cur_sess; /* number of currently active sessions (including syn_sent) */ + int served; /* # of active sessions currently being served (ie not pending) */ + int consecutive_errors; /* current number of consecutive errors */ + struct freq_ctr sess_per_sec; /* sessions per second on this server */ + struct be_counters counters; /* statistics counters */ + + /* Below are some relatively stable settings, only changed under the lock */ + THREAD_PAD(63); + + struct eb_root *lb_tree; /* we want to know in what tree the server is */ + struct tree_occ *lb_nodes; /* lb_nodes_tot * struct tree_occ */ + unsigned lb_nodes_tot; /* number of allocated lb_nodes (C-HASH) */ + unsigned lb_nodes_now; /* number of lb_nodes placed in the tree (C-HASH) */ + + const struct netns_entry *netns; /* contains network namespace name or NULL. Network namespace comes from configuration */ + struct xprt_ops *xprt; /* transport-layer operations */ + unsigned int svc_port; /* the port to connect to (for relevant families) */ + unsigned down_time; /* total time the server was down */ + time_t last_change; /* last time, when the state was changed */ + + int puid; /* proxy-unique server ID, used for SNMP, and "first" LB algo */ + int tcp_ut; /* for TCP, user timeout */ + + int do_check; /* temporary variable used during parsing to denote if health checks must be enabled */ + int do_agent; /* temporary variable used during parsing to denote if an auxiliary agent check must be enabled */ + struct check check; /* health-check specific configuration */ + struct check agent; /* agent specific configuration */ + + struct resolv_requester *resolv_requester; /* used to link a server to its DNS resolution */ + char *resolvers_id; /* resolvers section used by this server */ + struct resolvers *resolvers; /* pointer to the resolvers structure used by this server */ + char *lastaddr; /* the address string provided by the server-state file */ + struct resolv_options resolv_opts; + int hostname_dn_len; /* string length of the server hostname in Domain Name format */ + char *hostname_dn; /* server hostname in Domain Name format */ + char *hostname; /* server hostname */ + struct sockaddr_storage init_addr; /* plain IP address specified on the init-addr line */ + unsigned int init_addr_methods; /* initial address setting, 3-bit per method, ends at 0, enough to store 10 entries */ + enum srv_log_proto log_proto; /* used proto to emit messages on server lines from log or ring section */ + + char *sni_expr; /* Temporary variable to store a sample expression for SNI */ + struct { + void *ctx; + struct { + /* ptr/size may be shared R/O with other threads under read lock + * "sess_lock", however only the owning thread may change them + * (under write lock). + */ + unsigned char *ptr; + int size; + int allocated_size; + char *sni; /* SNI used for the session */ + __decl_thread(HA_RWLOCK_T sess_lock); + } * reused_sess; + uint last_ssl_sess_tid; /* last tid+1 having updated reused_sess (0=none, >0=tid+1) */ + + struct ckch_inst *inst; /* Instance of the ckch_store in which the certificate was loaded (might be null if server has no certificate) */ + __decl_thread(HA_RWLOCK_T lock); /* lock the cache and SSL_CTX during commit operations */ + + char *ciphers; /* cipher suite to use if non-null */ + char *ciphersuites; /* TLS 1.3 cipher suite to use if non-null */ + char *curves; /* TLS curves list */ + int options; /* ssl options */ + int verify; /* verify method (set of SSL_VERIFY_* flags) */ + struct tls_version_filter methods; /* ssl methods */ + char *verify_host; /* hostname of certificate must match this host */ + char *ca_file; /* CAfile to use on verify */ + char *crl_file; /* CRLfile to use on verify */ + char *client_crt; /* client certificate to send */ + char *sigalgs; /* Signature algorithms */ + char *client_sigalgs; /* Client Signature algorithms */ + struct sample_expr *sni; /* sample expression for SNI */ + char *npn_str; /* NPN protocol string */ + int npn_len; /* NPN protocol string length */ + char *alpn_str; /* ALPN protocol string */ + int alpn_len; /* ALPN protocol string length */ + } ssl_ctx; + struct resolv_srvrq *srvrq; /* Pointer representing the DNS SRV requeest, if any */ + struct list srv_rec_item; /* to attach server to a srv record item */ + struct list ip_rec_item; /* to attach server to a A or AAAA record item */ + struct ebpt_node host_dn; /* hostdn store for srvrq and state file matching*/ + struct list pp_tlvs; /* to send out PROXY protocol v2 TLVs */ + struct task *srvrq_check; /* Task testing SRV record expiration date for this server */ + struct { + const char *file; /* file where the section appears */ + struct eb32_node id; /* place in the tree of used IDs */ + struct ebpt_node name; /* place in the tree of used names */ + int line; /* line where the section appears */ + } conf; /* config information */ + struct ebpt_node addr_node; /* Node for string representation of address for the server (including port number) */ + /* Template information used only for server objects which + * serve as template filled at parsing time and used during + * server allocations from server templates. + */ + struct { + char *prefix; + int nb_low; + int nb_high; + } tmpl_info; + + event_hdl_sub_list e_subs; /* event_hdl: server's subscribers list (atomically updated) */ + + /* warning, these structs are huge, keep them at the bottom */ + struct conn_src conn_src; /* connection source settings */ + struct sockaddr_storage addr; /* the address to connect to, doesn't include the port */ + struct sockaddr_storage socks4_addr; /* the address of the SOCKS4 Proxy, including the port */ + + EXTRA_COUNTERS(extra_counters); +}; + +/* data provided to EVENT_HDL_SUB_SERVER handlers through event_hdl facility */ +struct event_hdl_cb_data_server { + /* provided by: + * EVENT_HDL_SUB_SERVER_ADD + * EVENT_HDL_SUB_SERVER_DEL + * EVENT_HDL_SUB_SERVER_UP + * EVENT_HDL_SUB_SERVER_DOWN + * EVENT_HDL_SUB_SERVER_STATE + * EVENT_HDL_SUB_SERVER_ADMIN + * EVENT_HDL_SUB_SERVER_CHECK + * EVENT_HDL_SUB_SERVER_INETADDR + */ + struct { + /* safe data can be safely used from both + * sync and async handlers + * data consistency is guaranteed + */ + char name[64]; /* server name/id */ + char proxy_name[64]; /* id of proxy the server belongs to */ + int proxy_uuid; /* uuid of the proxy the server belongs to */ + int puid; /* proxy-unique server ID */ + uint32_t rid; /* server id revision */ + unsigned int flags; /* server flags */ + } safe; + struct { + /* unsafe data may only be used from sync handlers: + * in async mode, data consistency cannot be guaranteed + * and unsafe data may already be stale, thus using + * it is highly discouraged because it + * could lead to undefined behavior (UAF, null dereference...) + */ + struct server *ptr; /* server live ptr */ + /* lock hints */ + uint8_t thread_isolate; /* 1 = thread_isolate is on, no locking required */ + uint8_t srv_lock; /* 1 = srv lock is held */ + } unsafe; +}; + +/* check result snapshot provided through some event_hdl server events */ +struct event_hdl_cb_data_server_checkres { + uint8_t agent; /* 1 = agent check, 0 = health check */ + enum chk_result result; /* failed, passed, condpass (CHK_RES_*) */ + long duration; /* total check duration in ms */ + struct { + short status; /* check status as in check->status */ + short code; /* provided with some check statuses */ + } reason; + struct { + int cur; /* dynamic (= check->health) */ + int rise, fall; /* config dependent */ + } health; /* check's health, see check-t.h */ +}; + +/* data provided to EVENT_HDL_SUB_SERVER_STATE handlers through + * event_hdl facility + * + * Note that this may be casted to regular event_hdl_cb_data_server if + * you don't care about state related optional info + */ +struct event_hdl_cb_data_server_state { + /* provided by: + * EVENT_HDL_SUB_SERVER_STATE + */ + struct event_hdl_cb_data_server server; /* must be at the beginning */ + struct { + uint8_t type; /* 0 = operational, 1 = administrative */ + enum srv_state old_state, new_state; /* updated by both operational and admin changes */ + uint32_t requeued; /* requeued connections due to server state change */ + union { + /* state change cause: + * + * look for op_st_chg for operational state change, + * and adm_st_chg for administrative state change + */ + struct { + enum srv_op_st_chg_cause cause; + union { + /* check result is provided with + * cause == SRV_OP_STCHGC_HEALTH or cause == SRV_OP_STCHGC_AGENT + */ + struct event_hdl_cb_data_server_checkres check; + }; + } op_st_chg; + struct { + enum srv_adm_st_chg_cause cause; + } adm_st_chg; + }; + } safe; + /* no unsafe data */ +}; + +/* data provided to EVENT_HDL_SUB_SERVER_ADMIN handlers through + * event_hdl facility + * + * Note that this may be casted to regular event_hdl_cb_data_server if + * you don't care about admin related optional info + */ +struct event_hdl_cb_data_server_admin { + /* provided by: + * EVENT_HDL_SUB_SERVER_ADMIN + */ + struct event_hdl_cb_data_server server; /* must be at the beginning */ + struct { + enum srv_admin old_admin, new_admin; + uint32_t requeued; /* requeued connections due to server admin change */ + /* admin change cause */ + enum srv_adm_st_chg_cause cause; + } safe; + /* no unsafe data */ +}; + +/* data provided to EVENT_HDL_SUB_SERVER_CHECK handlers through + * event_hdl facility + * + * Note that this may be casted to regular event_hdl_cb_data_server if + * you don't care about check related optional info + */ +struct event_hdl_cb_data_server_check { + /* provided by: + * EVENT_HDL_SUB_SERVER_CHECK + */ + struct event_hdl_cb_data_server server; /* must be at the beginning */ + struct { + struct event_hdl_cb_data_server_checkres res; /* check result snapshot */ + } safe; + struct { + struct check *ptr; /* check ptr */ + } unsafe; +}; + +/* struct to store server address and port information in INET + * context + */ +struct server_inetaddr { + int family; /* AF_UNSPEC, AF_INET or AF_INET6 */ + union { + struct in_addr v4; + struct in6_addr v6; + } addr; /* may hold v4 or v6 addr */ + struct { + unsigned int svc; + uint8_t map; /* is a mapped port? (boolean) */ + } port; +}; + +/* data provided to EVENT_HDL_SUB_SERVER_INETADDR handlers through + * event_hdl facility + * + * Note that this may be casted to regular event_hdl_cb_data_server if + * you don't care about inetaddr related optional info + */ +struct event_hdl_cb_data_server_inetaddr { + /* provided by: + * EVENT_HDL_SUB_SERVER_INETADDR + */ + struct event_hdl_cb_data_server server; /* must be at the beginning */ + struct { + struct server_inetaddr prev; + struct server_inetaddr next; + uint8_t purge_conn; /* set to 1 if the network change will force a connection cleanup */ + } safe; + /* no unsafe data */ +}; + +/* Storage structure to load server-state lines from a flat file into + * an ebtree, for faster processing + */ +struct server_state_line { + char *line; + char *params[SRV_STATE_FILE_MAX_FIELDS]; + struct eb64_node node; +}; + + +/* Descriptor for a "server" keyword. The ->parse() function returns 0 in case of + * success, or a combination of ERR_* flags if an error is encountered. The + * function pointer can be NULL if not implemented. The function also has an + * access to the current "server" config line. The ->skip value tells the parser + * how many words have to be skipped after the keyword. If the function needs to + * parse more keywords, it needs to update cur_arg. + */ +struct srv_kw { + const char *kw; + int (*parse)(char **args, int *cur_arg, struct proxy *px, struct server *srv, char **err); + int skip; /* nb min of args to skip, for use when kw is not handled */ + int default_ok; /* non-zero if kw is supported in default-server section */ + int dynamic_ok; /* non-zero if kw is supported in add server cli command */ +}; + +/* + * A keyword list. It is a NULL-terminated array of keywords. It embeds a + * struct list in order to be linked to other lists, allowing it to easily + * be declared where it is needed, and linked without duplicating data nor + * allocating memory. It is also possible to indicate a scope for the keywords. + */ +struct srv_kw_list { + const char *scope; + struct list list; + struct srv_kw kw[VAR_ARRAY]; +}; + +#define SRV_PARSE_DEFAULT_SERVER 0x01 /* 'default-server' keyword */ +#define SRV_PARSE_TEMPLATE 0x02 /* 'server-template' keyword */ +#define SRV_PARSE_IN_PEER_SECTION 0x04 /* keyword in a peer section */ +#define SRV_PARSE_PARSE_ADDR 0x08 /* required to parse the server address in the second argument */ +#define SRV_PARSE_DYNAMIC 0x10 /* dynamic server created at runtime with cli */ +#define SRV_PARSE_INITIAL_RESOLVE 0x20 /* resolve immediately the fqdn to an ip address */ + +#endif /* _HAPROXY_SERVER_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/server.h b/include/haproxy/server.h new file mode 100644 index 0000000..2ba6e45 --- /dev/null +++ b/include/haproxy/server.h @@ -0,0 +1,328 @@ +/* + * include/haproxy/server.h + * This file defines everything related to servers. + * + * Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SERVER_H +#define _HAPROXY_SERVER_H + +#include <unistd.h> + +#include <haproxy/api.h> +#include <haproxy/applet-t.h> +#include <haproxy/freq_ctr.h> +#include <haproxy/proxy-t.h> +#include <haproxy/resolvers-t.h> +#include <haproxy/server-t.h> +#include <haproxy/task.h> +#include <haproxy/thread-t.h> +#include <haproxy/time.h> +#include <haproxy/tools.h> + + +__decl_thread(extern HA_SPINLOCK_T idle_conn_srv_lock); +extern struct idle_conns idle_conns[MAX_THREADS]; +extern struct task *idle_conn_task; +extern struct list servers_list; +extern struct dict server_key_dict; + +int srv_downtime(const struct server *s); +int srv_lastsession(const struct server *s); +int srv_getinter(const struct check *check); +void srv_settings_cpy(struct server *srv, const struct server *src, int srv_tmpl); +int parse_server(const char *file, int linenum, char **args, struct proxy *curproxy, const struct proxy *defproxy, int parse_flags); +int srv_update_addr(struct server *s, void *ip, int ip_sin_family, const char *updater); +int server_parse_sni_expr(struct server *newsrv, struct proxy *px, char **err); +const char *srv_update_addr_port(struct server *s, const char *addr, const char *port, char *updater); +const char *srv_update_check_addr_port(struct server *s, const char *addr, const char *port); +const char *srv_update_agent_addr_port(struct server *s, const char *addr, const char *port); +struct server *server_find_by_id(struct proxy *bk, int id); +struct server *server_find_by_name(struct proxy *bk, const char *name); +struct server *server_find_best_match(struct proxy *bk, char *name, int id, int *diff); +void apply_server_state(void); +void srv_compute_all_admin_states(struct proxy *px); +int srv_set_addr_via_libc(struct server *srv, int *err_code); +int srv_init_addr(void); +struct server *cli_find_server(struct appctx *appctx, char *arg); +struct server *new_server(struct proxy *proxy); +void srv_take(struct server *srv); +struct server *srv_drop(struct server *srv); +void srv_free_params(struct server *srv); +int srv_init_per_thr(struct server *srv); +void srv_set_ssl(struct server *s, int use_ssl); +const char *srv_adm_st_chg_cause(enum srv_adm_st_chg_cause cause); +const char *srv_op_st_chg_cause(enum srv_op_st_chg_cause cause); +void srv_event_hdl_publish_check(struct server *srv, struct check *check); + +/* functions related to server name resolution */ +int srv_prepare_for_resolution(struct server *srv, const char *hostname); +int srvrq_update_srv_status(struct server *s, int has_no_ip); +int snr_update_srv_status(struct server *s, int has_no_ip); +int srv_set_fqdn(struct server *srv, const char *fqdn, int resolv_locked); +const char *srv_update_fqdn(struct server *server, const char *fqdn, const char *updater, int dns_locked); +int snr_resolution_cb(struct resolv_requester *requester, struct dns_counters *counters); +int srvrq_resolution_error_cb(struct resolv_requester *requester, int error_code); +int snr_resolution_error_cb(struct resolv_requester *requester, int error_code); +struct server *snr_check_ip_callback(struct server *srv, void *ip, unsigned char *ip_family); +struct task *srv_cleanup_idle_conns(struct task *task, void *ctx, unsigned int state); +void srv_release_conn(struct server *srv, struct connection *conn); +struct connection *srv_lookup_conn(struct eb_root *tree, uint64_t hash); +struct connection *srv_lookup_conn_next(struct connection *conn); + +void _srv_add_idle(struct server *srv, struct connection *conn, int is_safe); +int srv_add_to_idle_list(struct server *srv, struct connection *conn, int is_safe); +void srv_add_to_avail_list(struct server *srv, struct connection *conn); +struct task *srv_cleanup_toremove_conns(struct task *task, void *context, unsigned int state); + +int srv_apply_track(struct server *srv, struct proxy *curproxy); + +/* + * Registers the server keyword list <kwl> as a list of valid keywords for next + * parsing sessions. + */ +void srv_register_keywords(struct srv_kw_list *kwl); + +/* Return a pointer to the server keyword <kw>, or NULL if not found. */ +struct srv_kw *srv_find_kw(const char *kw); + +/* Dumps all registered "server" keywords to the <out> string pointer. */ +void srv_dump_kws(char **out); + +/* Recomputes the server's eweight based on its state, uweight, the current time, + * and the proxy's algorithm. To be used after updating sv->uweight. The warmup + * state is automatically disabled if the time is elapsed. + */ +void server_recalc_eweight(struct server *sv, int must_update); + +/* + * Parses weight_str and configures sv accordingly. + * Returns NULL on success, error message string otherwise. + */ +const char *server_parse_weight_change_request(struct server *sv, + const char *weight_str); + +/* + * Parses addr_str and configures sv accordingly. updater precise + * the source of the change in the associated message log. + * Returns NULL on success, error message string otherwise. + */ +const char *server_parse_addr_change_request(struct server *sv, + const char *addr_str, const char *updater); + +/* + * Parses maxconn_str and configures sv accordingly. + * Returns NULL on success, error message string otherwise. + */ +const char *server_parse_maxconn_change_request(struct server *sv, + const char *maxconn_str); + +/* Shutdown all connections of a server. The caller must pass a termination + * code in <why>, which must be one of SF_ERR_* indicating the reason for the + * shutdown. + */ +void srv_shutdown_streams(struct server *srv, int why); + +/* Shutdown all connections of all backup servers of a proxy. The caller must + * pass a termination code in <why>, which must be one of SF_ERR_* indicating + * the reason for the shutdown. + */ +void srv_shutdown_backup_streams(struct proxy *px, int why); + +void srv_append_status(struct buffer *msg, struct server *s, struct check *, + int xferred, int forced); + +void srv_set_stopped(struct server *s, enum srv_op_st_chg_cause cause); +void srv_set_running(struct server *s, enum srv_op_st_chg_cause cause); +void srv_set_stopping(struct server *s, enum srv_op_st_chg_cause cause); + +/* Enables admin flag <mode> (among SRV_ADMF_*) on server <s>. This is used to + * enforce either maint mode or drain mode. It is not allowed to set more than + * one flag at once. The equivalent "inherited" flag is propagated to all + * tracking servers. Maintenance mode disables health checks (but not agent + * checks). When either the flag is already set or no flag is passed, nothing + * is done. If <cause> is non-null, it will be displayed at the end of the log + * lines to justify the state change. + */ +void srv_set_admin_flag(struct server *s, enum srv_admin mode, enum srv_adm_st_chg_cause cause); + +/* Disables admin flag <mode> (among SRV_ADMF_*) on server <s>. This is used to + * stop enforcing either maint mode or drain mode. It is not allowed to set more + * than one flag at once. The equivalent "inherited" flag is propagated to all + * tracking servers. Leaving maintenance mode re-enables health checks. When + * either the flag is already cleared or no flag is passed, nothing is done. + */ +void srv_clr_admin_flag(struct server *s, enum srv_admin mode); + +/* Calculates the dynamic persistent cookie for a server, if a secret key has + * been provided. + */ +void srv_set_dyncookie(struct server *s); + +int srv_check_reuse_ws(struct server *srv); +const struct mux_ops *srv_get_ws_proto(struct server *srv); + +/* increase the number of cumulated connections on the designated server */ +static inline void srv_inc_sess_ctr(struct server *s) +{ + _HA_ATOMIC_INC(&s->counters.cum_sess); + HA_ATOMIC_UPDATE_MAX(&s->counters.sps_max, + update_freq_ctr(&s->sess_per_sec, 1)); +} + +/* set the time of last session on the designated server */ +static inline void srv_set_sess_last(struct server *s) +{ + s->counters.last_sess = ns_to_sec(now_ns); +} + +/* returns the current server throttle rate between 0 and 100% */ +static inline unsigned int server_throttle_rate(struct server *sv) +{ + struct proxy *px = sv->proxy; + + /* when uweight is 0, we're in soft-stop so that cannot be a slowstart, + * thus the throttle is 100%. + */ + if (!sv->uweight) + return 100; + + return (100U * px->lbprm.wmult * sv->cur_eweight + px->lbprm.wdiv - 1) / (px->lbprm.wdiv * sv->uweight); +} + +/* + * Return true if the server has a zero user-weight, meaning it's in draining + * mode (ie: not taking new non-persistent connections). + */ +static inline int server_is_draining(const struct server *s) +{ + return !s->uweight || (s->cur_admin & SRV_ADMF_DRAIN); +} + +/* Puts server <s> into maintenance mode, and propagate that status down to all + * tracking servers. + */ +static inline void srv_adm_set_maint(struct server *s) +{ + srv_set_admin_flag(s, SRV_ADMF_FMAINT, SRV_ADM_STCHGC_NONE); + srv_clr_admin_flag(s, SRV_ADMF_FDRAIN); +} + +/* Puts server <s> into drain mode, and propagate that status down to all + * tracking servers. + */ +static inline void srv_adm_set_drain(struct server *s) +{ + srv_set_admin_flag(s, SRV_ADMF_FDRAIN, SRV_ADM_STCHGC_NONE); + srv_clr_admin_flag(s, SRV_ADMF_FMAINT); +} + +/* Puts server <s> into ready mode, and propagate that status down to all + * tracking servers. + */ +static inline void srv_adm_set_ready(struct server *s) +{ + srv_clr_admin_flag(s, SRV_ADMF_FDRAIN); + srv_clr_admin_flag(s, SRV_ADMF_FMAINT); +} + +/* appends an initaddr method to the existing list. Returns 0 on failure. */ +static inline int srv_append_initaddr(unsigned int *list, enum srv_initaddr addr) +{ + int shift = 0; + + while (shift + 3 < 32 && (*list >> shift)) + shift += 3; + + if (shift + 3 > 32) + return 0; + + *list |= addr << shift; + return 1; +} + +/* returns the next initaddr method and removes it from <list> by shifting + * it right (implying that it MUST NOT be the server's. Returns SRV_IADDR_END + * at the end. + */ +static inline enum srv_initaddr srv_get_next_initaddr(unsigned int *list) +{ + enum srv_initaddr ret; + + ret = *list & 7; + *list >>= 3; + return ret; +} + +static inline void srv_use_conn(struct server *srv, struct connection *conn) +{ + unsigned int curr, prev; + + curr = _HA_ATOMIC_ADD_FETCH(&srv->curr_used_conns, 1); + + + /* It's ok not to do that atomically, we don't need an + * exact max. + */ + prev = HA_ATOMIC_LOAD(&srv->max_used_conns); + if (prev < curr) + HA_ATOMIC_STORE(&srv->max_used_conns, curr); + + prev = HA_ATOMIC_LOAD(&srv->est_need_conns); + if (prev < curr) + HA_ATOMIC_STORE(&srv->est_need_conns, curr); +} + +/* checks if minconn and maxconn are consistent to each other + * and automatically adjust them if it is not the case + * This logic was historically implemented in check_config_validity() + * at boot time, but with the introduction of dynamic servers + * this may be used at multiple places in the code now + */ +static inline void srv_minmax_conn_apply(struct server *srv) +{ + if (srv->minconn > srv->maxconn) { + /* Only 'minconn' was specified, or it was higher than or equal + * to 'maxconn'. Let's turn this into maxconn and clean it, as + * this will avoid further useless expensive computations. + */ + srv->maxconn = srv->minconn; + } else if (srv->maxconn && !srv->minconn) { + /* minconn was not specified, so we set it to maxconn */ + srv->minconn = srv->maxconn; + } +} + +/* Returns true if server is used as transparent mode. */ +static inline int srv_is_transparent(const struct server *srv) +{ + /* A reverse server does not have any address but it is not used as a + * transparent one. + */ + return (!is_addr(&srv->addr) && !(srv->flags & SRV_F_RHTTP)) || + (srv->flags & SRV_F_MAPPORTS); +} + +#endif /* _HAPROXY_SERVER_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/session-t.h b/include/haproxy/session-t.h new file mode 100644 index 0000000..dff167e --- /dev/null +++ b/include/haproxy/session-t.h @@ -0,0 +1,78 @@ +/* + * include/haproxy/session-t.h + * This file defines everything related to sessions. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SESSION_T_H +#define _HAPROXY_SESSION_T_H + + +#include <sys/time.h> +#include <unistd.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include <haproxy/api-t.h> +#include <haproxy/obj_type-t.h> +#include <haproxy/stick_table-t.h> +#include <haproxy/task-t.h> +#include <haproxy/vars-t.h> + + +/* session flags */ +enum { + SESS_FL_NONE = 0x00000000, /* nothing */ + SESS_FL_PREFER_LAST = 0x00000001, /* NTML authent, we should reuse last conn */ +}; + +/* max number of idle server connections kept attached to a session */ +#define MAX_SRV_LIST 5 + +struct session { + struct proxy *fe; /* the proxy this session depends on for the client side */ + struct listener *listener; /* the listener by which the request arrived */ + enum obj_type *origin; /* the connection / applet which initiated this session */ + struct timeval accept_date; /* date of the session's accept() in user date */ + ullong accept_ts; /* date of the session's accept() in internal date (monotonic) */ + struct stkctr *stkctr; /* stick counters for tcp-connection */ + struct vars vars; /* list of variables for the session scope. */ + struct task *task; /* handshake timeout processing */ + long t_handshake; /* handshake duration, -1 = not completed */ + long t_idle; /* idle duration, -1 if never occurs */ + int idle_conns; /* Number of connections we're currently responsible for that we are not using */ + unsigned int flags; /* session flags, SESS_FL_* */ + struct list srv_list; /* List of servers and the connections the session is currently responsible for */ + struct sockaddr_storage *src; /* source address (pool), when known, otherwise NULL */ + struct sockaddr_storage *dst; /* destination address (pool), when known, otherwise NULL */ +}; + +struct sess_srv_list { + void *target; + struct list conn_list; /* Head of the connections list */ + struct list srv_list; /* Next element of the server list */ +}; + +#endif /* _HAPROXY_SESSION_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/session.h b/include/haproxy/session.h new file mode 100644 index 0000000..38335e4 --- /dev/null +++ b/include/haproxy/session.h @@ -0,0 +1,335 @@ +/* + * include/haproxy/session.h + * This file contains functions used to manage sessions. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SESSION_H +#define _HAPROXY_SESSION_H + +#include <haproxy/api.h> +#include <haproxy/connection.h> +#include <haproxy/global-t.h> +#include <haproxy/obj_type-t.h> +#include <haproxy/pool.h> +#include <haproxy/server.h> +#include <haproxy/session-t.h> +#include <haproxy/stick_table.h> + +extern struct pool_head *pool_head_session; +extern struct pool_head *pool_head_sess_srv_list; + +struct session *session_new(struct proxy *fe, struct listener *li, enum obj_type *origin); +void session_free(struct session *sess); +int session_accept_fd(struct connection *cli_conn); +int conn_complete_session(struct connection *conn); +struct task *session_expire_embryonic(struct task *t, void *context, unsigned int state); + +/* Remove the refcount from the session to the tracked counters, and clear the + * pointer to ensure this is only performed once. The caller is responsible for + * ensuring that the pointer is valid first. + */ +static inline void session_store_counters(struct session *sess) +{ + void *ptr; + int i; + struct stksess *ts; + + if (unlikely(!sess->stkctr)) // pool not allocated yet + return; + + for (i = 0; i < global.tune.nb_stk_ctr; i++) { + struct stkctr *stkctr = &sess->stkctr[i]; + + ts = stkctr_entry(stkctr); + if (!ts) + continue; + + ptr = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_CONN_CUR); + if (ptr) { + HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock); + + if (stktable_data_cast(ptr, std_t_uint) > 0) + stktable_data_cast(ptr, std_t_uint)--; + + HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock); + + /* If data was modified, we need to touch to re-schedule sync */ + stktable_touch_local(stkctr->table, ts, 0); + } + + stkctr_set_entry(stkctr, NULL); + stksess_kill_if_expired(stkctr->table, ts, 1); + } +} + +/* Increase the number of cumulated HTTP requests in the tracked counters */ +static inline void session_inc_http_req_ctr(struct session *sess) +{ + int i; + + if (unlikely(!sess->stkctr)) // pool not allocated yet + return; + + for (i = 0; i < global.tune.nb_stk_ctr; i++) + stkctr_inc_http_req_ctr(&sess->stkctr[i]); +} + +/* Increase the number of cumulated failed HTTP requests in the tracked + * counters. Only 4xx requests should be counted here so that we can + * distinguish between errors caused by client behaviour and other ones. + * Note that even 404 are interesting because they're generally caused by + * vulnerability scans. + */ +static inline void session_inc_http_err_ctr(struct session *sess) +{ + int i; + + if (unlikely(!sess->stkctr)) // pool not allocated yet + return; + + for (i = 0; i < global.tune.nb_stk_ctr; i++) + stkctr_inc_http_err_ctr(&sess->stkctr[i]); +} + +/* Increase the number of cumulated failed HTTP responses in the tracked + * counters. Only some 5xx responses should be counted here so that we can + * distinguish between server failures and errors triggered by the client + * (i.e. 501 and 505 may be triggered and must be ignored). + */ +static inline void session_inc_http_fail_ctr(struct session *sess) +{ + int i; + + if (unlikely(!sess->stkctr)) // pool not allocated yet + return; + + for (i = 0; i < global.tune.nb_stk_ctr; i++) + stkctr_inc_http_fail_ctr(&sess->stkctr[i]); +} + + +/* Remove the connection from the session list, and destroy the srv_list if it's now empty */ +static inline void session_unown_conn(struct session *sess, struct connection *conn) +{ + struct sess_srv_list *srv_list = NULL; + + BUG_ON(objt_listener(conn->target)); + + /* WT: this currently is a workaround for an inconsistency between + * the link status of the connection in the session list and the + * connection's owner. This should be removed as soon as all this + * is addressed. Right now it's possible to enter here with a non-null + * conn->owner that points to a dead session, but in this case the + * element is not linked. + */ + if (!LIST_INLIST(&conn->session_list)) + return; + + if (conn->flags & CO_FL_SESS_IDLE) + sess->idle_conns--; + LIST_DEL_INIT(&conn->session_list); + conn->owner = NULL; + list_for_each_entry(srv_list, &sess->srv_list, srv_list) { + if (srv_list->target == conn->target) { + if (LIST_ISEMPTY(&srv_list->conn_list)) { + LIST_DELETE(&srv_list->srv_list); + pool_free(pool_head_sess_srv_list, srv_list); + } + break; + } + } +} + +/* Add the connection <conn> to the server list of the session <sess>. This + * function is called only if the connection is private. Nothing is performed if + * the connection is already in the session sever list or if the session does + * not own the connection. + */ +static inline int session_add_conn(struct session *sess, struct connection *conn, void *target) +{ + struct sess_srv_list *srv_list = NULL; + int found = 0; + + BUG_ON(objt_listener(conn->target)); + + /* Already attach to the session or not the connection owner */ + if (!LIST_ISEMPTY(&conn->session_list) || (conn->owner && conn->owner != sess)) + return 1; + + list_for_each_entry(srv_list, &sess->srv_list, srv_list) { + if (srv_list->target == target) { + found = 1; + break; + } + } + if (!found) { + /* The session has no connection for the server, create a new entry */ + srv_list = pool_alloc(pool_head_sess_srv_list); + if (!srv_list) + return 0; + srv_list->target = target; + LIST_INIT(&srv_list->conn_list); + LIST_APPEND(&sess->srv_list, &srv_list->srv_list); + } + LIST_APPEND(&srv_list->conn_list, &conn->session_list); + return 1; +} + +/* Returns 0 if the session can keep the idle conn, -1 if it was destroyed. The + * connection must be private. + */ +static inline int session_check_idle_conn(struct session *sess, struct connection *conn) +{ + /* Another session owns this connection */ + if (conn->owner != sess) + return 0; + + if (sess->idle_conns >= sess->fe->max_out_conns) { + session_unown_conn(sess, conn); + conn->owner = NULL; + conn->flags &= ~CO_FL_SESS_IDLE; + conn->mux->destroy(conn->ctx); + return -1; + } else { + conn->flags |= CO_FL_SESS_IDLE; + sess->idle_conns++; + } + return 0; +} + +/* Look for an available connection matching the target <target> in the server + * list of the session <sess>. It returns a connection if found. Otherwise it + * returns NULL. + */ +static inline struct connection *session_get_conn(struct session *sess, void *target, int64_t hash) +{ + struct connection *srv_conn = NULL; + struct sess_srv_list *srv_list; + + list_for_each_entry(srv_list, &sess->srv_list, srv_list) { + if (srv_list->target == target) { + list_for_each_entry(srv_conn, &srv_list->conn_list, session_list) { + if ((srv_conn->hash_node && srv_conn->hash_node->node.key == hash) && + srv_conn->mux && + (srv_conn->mux->avail_streams(srv_conn) > 0) && + !(srv_conn->flags & CO_FL_WAIT_XPRT)) { + if (srv_conn->flags & CO_FL_SESS_IDLE) { + srv_conn->flags &= ~CO_FL_SESS_IDLE; + sess->idle_conns--; + } + goto end; + } + } + srv_conn = NULL; /* No available connection found */ + goto end; + } + } + + end: + return srv_conn; +} + +/* Returns the source address of the session and fallbacks on the client + * connection if not set. It returns a const address on success or NULL on + * failure. + */ +static inline const struct sockaddr_storage *sess_src(struct session *sess) +{ + struct connection *cli_conn = objt_conn(sess->origin); + + if (sess->src) + return sess->src; + if (cli_conn && conn_get_src(cli_conn)) + return conn_src(cli_conn); + return NULL; +} + +/* Returns the destination address of the session and fallbacks on the client + * connection if not set. It returns a const address on success or NULL on + * failure. + */ +static inline const struct sockaddr_storage *sess_dst(struct session *sess) +{ + struct connection *cli_conn = objt_conn(sess->origin); + + if (sess->dst) + return sess->dst; + if (cli_conn && conn_get_dst(cli_conn)) + return conn_dst(cli_conn); + return NULL; +} + + +/* Retrieves the source address of the session <sess>. Returns non-zero on + * success or zero on failure. The operation is only performed once and the + * address is stored in the session for future use. On the first call, the + * session source address is copied from the client connection one. + */ +static inline int sess_get_src(struct session *sess) +{ + struct connection *cli_conn = objt_conn(sess->origin); + const struct sockaddr_storage *src = NULL; + + if (sess->src) + return 1; + + if (cli_conn && conn_get_src(cli_conn)) + src = conn_src(cli_conn); + if (!src) + return 0; + + if (!sockaddr_alloc(&sess->src, src, sizeof(*src))) + return 0; + + return 1; +} + + +/* Retrieves the destination address of the session <sess>. Returns non-zero on + * success or zero on failure. The operation is only performed once and the + * address is stored in the session for future use. On the first call, the + * session destination address is copied from the client connection one. + */ +static inline int sess_get_dst(struct session *sess) +{ + struct connection *cli_conn = objt_conn(sess->origin); + const struct sockaddr_storage *dst = NULL; + + if (sess->dst) + return 1; + + if (cli_conn && conn_get_dst(cli_conn)) + dst = conn_dst(cli_conn); + if (!dst) + return 0; + + if (!sockaddr_alloc(&sess->dst, dst, sizeof(*dst))) + return 0; + + return 1; +} + +#endif /* _HAPROXY_SESSION_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/shctx-t.h b/include/haproxy/shctx-t.h new file mode 100644 index 0000000..493024a --- /dev/null +++ b/include/haproxy/shctx-t.h @@ -0,0 +1,63 @@ +/* + * include/haproxy/shctx-t.h - shared context management functions for SSL + * + * Copyright (C) 2011-2012 EXCELIANCE + * + * Author: Emeric Brun - emeric@exceliance.fr + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __HAPROXY_SHCTX_T_H +#define __HAPROXY_SHCTX_T_H + +#include <haproxy/api-t.h> +#include <haproxy/thread-t.h> + +#ifndef SHSESS_BLOCK_MIN_SIZE +#define SHSESS_BLOCK_MIN_SIZE 128 +#endif + +#ifndef SHSESS_MAX_DATA_LEN +#define SHSESS_MAX_DATA_LEN 4096 +#endif + +#ifndef SHCTX_APPNAME +#define SHCTX_APPNAME "haproxy" +#endif + +#define SHCTX_E_ALLOC_CACHE -1 +#define SHCTX_E_INIT_LOCK -2 + +#define SHCTX_F_REMOVING 0x1 /* Removing flag, does not accept new */ + +/* generic shctx struct */ +struct shared_block { + struct list list; + unsigned int len; /* data length for the row */ + unsigned int block_count; /* number of blocks */ + unsigned int refcount; + struct shared_block *last_reserved; + struct shared_block *last_append; + unsigned char data[VAR_ARRAY]; +}; + +struct shared_context { + __decl_thread(HA_RWLOCK_T lock); + struct list avail; /* list for active and free blocks */ + unsigned int nbav; /* number of available blocks */ + unsigned int max_obj_size; /* maximum object size (in bytes). */ + void (*free_block)(struct shared_block *first, void *data); + void (*reserve_finish)(struct shared_context *shctx); + void *cb_data; + short int block_size; + ALWAYS_ALIGN(64); /* The following member needs to be aligned to 64 in the + cache's case because the cache struct contains an explicitly + aligned member (struct cache_tree). */ + unsigned char data[VAR_ARRAY]; +}; + +#endif /* __HAPROXY_SHCTX_T_H */ diff --git a/include/haproxy/shctx.h b/include/haproxy/shctx.h new file mode 100644 index 0000000..a57cf15 --- /dev/null +++ b/include/haproxy/shctx.h @@ -0,0 +1,80 @@ +/* + * include/haproxy/shctx.h - shared context management functions for SSL + * + * Copyright (C) 2011-2012 EXCELIANCE + * + * Author: Emeric Brun - emeric@exceliance.fr + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __HAPROXY_SHCTX_H +#define __HAPROXY_SHCTX_H + +#include <haproxy/api.h> +#include <haproxy/list.h> +#include <haproxy/shctx-t.h> +#include <haproxy/thread.h> + +int shctx_init(struct shared_context **orig_shctx, + int maxblocks, int blocksize, unsigned int maxobjsz, + int extra); +struct shared_block *shctx_row_reserve_hot(struct shared_context *shctx, + struct shared_block *last, int data_len); +void shctx_row_detach(struct shared_context *shctx, struct shared_block *first); +void shctx_row_reattach(struct shared_context *shctx, struct shared_block *first); +int shctx_row_data_append(struct shared_context *shctx, + struct shared_block *first, + unsigned char *data, int len); +int shctx_row_data_get(struct shared_context *shctx, struct shared_block *first, + unsigned char *dst, int offset, int len); + + +/* Lock functions */ + +static inline void shctx_rdlock(struct shared_context *shctx) +{ + HA_RWLOCK_RDLOCK(SHCTX_LOCK, &shctx->lock); +} +static inline void shctx_rdunlock(struct shared_context *shctx) +{ + HA_RWLOCK_RDUNLOCK(SHCTX_LOCK, &shctx->lock); +} +static inline void shctx_wrlock(struct shared_context *shctx) +{ + HA_RWLOCK_WRLOCK(SHCTX_LOCK, &shctx->lock); +} +static inline void shctx_wrunlock(struct shared_context *shctx) +{ + HA_RWLOCK_WRUNLOCK(SHCTX_LOCK, &shctx->lock); +} + +/* List Macros */ + +/* + * Insert <s> block after <head> which is not necessarily the head of a list, + * so between <head> and the next element after <head>. + */ +static inline void shctx_block_append_hot(struct shared_context *shctx, + struct shared_block *first, + struct shared_block *s) +{ + shctx->nbav--; + LIST_DELETE(&s->list); + LIST_APPEND(&first->list, &s->list); +} + +static inline struct shared_block *shctx_block_detach(struct shared_context *shctx, + struct shared_block *s) +{ + shctx->nbav--; + LIST_DELETE(&s->list); + LIST_INIT(&s->list); + return s; +} + +#endif /* __HAPROXY_SHCTX_H */ + diff --git a/include/haproxy/show_flags-t.h b/include/haproxy/show_flags-t.h new file mode 100644 index 0000000..824d771 --- /dev/null +++ b/include/haproxy/show_flags-t.h @@ -0,0 +1,99 @@ +/* + * include/haproxy/show_flags.h + * These are helper macros used to decode flags for debugging + * + * Copyright (C) 2022 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SHOW_FLAGS_H +#define _HAPROXY_SHOW_FLAGS_H + +/* Only define the macro below if the caller requests it using HA_EXPOSE_FLAGS. + * It will be used by many low-level includes and we don't want to + * include the huge stdio here by default. The macro is used to make a string + * of a set of flags (and handles one flag at a time). It will append into + * <_buf>:<_len> the state of flag <_val> in <_flg>, appending string <_del> as + * delimiters till the last flag is dumped, then updating <_buf> and <_len> + * accordingly. <_nam> is used as the name for value <_val>. <_flg> loses all + * dumped flags. If <_flg> is zero and <_val> is 0, a "0" is reported, this can + * be used as a prologue to the dump. If <_val> contains more than one bit set, + * <_flg>'s hexadecimal output is reported instead of a name. + * + * It is possible to use it to enumerate all flags from right to left so that + * they are easier to check in the code. It will start by executing the optional + * code block in the extra flags (if any) before proceeding with the dump using + * the arguments. It is suggested to locally rename it to a single-char macro + * locally for readability, e.g: + * + * #define _(n, ...) __APPEND_FLAG(buf, len, del, flg, n, #n, __VA_ARGS__) + * _(0); + * _(X_FLAG1, _(X_FLAG2, _(X_FLAG3))); + * _(~0); + * #undef _ + * + * __APPEND_ENUM() works a bit differently in that it takes an additional mask + * to isolate bits to compare to the enum's value, and will remove the mask's + * bits at once in case of match. + */ +#ifdef HA_EXPOSE_FLAGS + +#define __APPEND_FLAG(_buf, _len, _del, _flg, _val, _nam, ...) \ + do { \ + size_t _ret = 0; \ + unsigned int _flg0 = (_flg); \ + do { __VA_ARGS__; } while (0); \ + (_flg) &= ~(unsigned int)(_val); \ + if (!((unsigned int)_val) && !(_flg)) \ + _ret = snprintf(_buf, _len, "0%s", \ + (_flg) ? (_del) : ""); \ + else if ((_flg0) & (_val)) { \ + if ((_val) & ((_val) - 1)) \ + _ret = snprintf(_buf, _len, "%#x%s", \ + (_flg0), (_flg) ? (_del) : ""); \ + else \ + _ret = snprintf(_buf, _len, _nam "%s", \ + (_flg) ? (_del) : ""); \ + } \ + if (_ret < _len) { \ + _len -= _ret; \ + _buf += _ret; \ + } \ + } while (0) + +#define __APPEND_ENUM(_buf, _len, _del, _flg, _msk, _val, _nam, ...) \ + do { \ + size_t _ret = 0; \ + do { __VA_ARGS__; } while (0); \ + if (((_flg) & (_msk)) == (_val)) { \ + (_flg) &= ~(_msk); \ + _ret = snprintf(_buf, _len, _nam "%s", \ + (_flg) ? (_del) : ""); \ + } \ + if (_ret < _len) { \ + _len -= _ret; \ + _buf += _ret; \ + } \ + } while (0) + +#else /* EOF not defined => no stdio, do nothing */ + +#define __APPEND_FLAG(_buf, _len, _del, _flg, _val, _nam, ...) do { } while (0) +#define __APPEND_ENUM(_buf, _len, _del, _flg, _msk, _val, _nam, ...) do { } while (0) + +#endif /* EOF */ + +#endif /* _HAPROXY_SHOW_FLAGS_H */ diff --git a/include/haproxy/signal-t.h b/include/haproxy/signal-t.h new file mode 100644 index 0000000..85d4b33 --- /dev/null +++ b/include/haproxy/signal-t.h @@ -0,0 +1,66 @@ +/* + * include/haproxy/signal-t.h + * Asynchronous signal delivery functions descriptors. + * + * Copyright 2000-2010 Willy Tarreau <w@1wt.eu> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_SIGNAL_T_H +#define _HAPROXY_SIGNAL_T_H + +#include <signal.h> +#include <haproxy/api-t.h> + +/* flags for -> flags */ +#define SIG_F_ONE_SHOOT 0x0001 /* unregister handler before calling it */ +#define SIG_F_TYPE_FCT 0x0002 /* handler is a function + arg */ +#define SIG_F_TYPE_TASK 0x0004 /* handler is a task + reason */ + +/* Define WDTSIG if available */ +#if defined(USE_RT) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) + + +/* We'll deliver SIGALRM when we've run out of CPU as it's not intercepted by + * gdb by default. + */ +#define WDTSIG SIGALRM + +#endif + +#ifdef USE_THREAD_DUMP +/* The signal to trigger a debug dump on a thread is SIGURG. It has the benefit + * of not stopping gdb by default, so that issuing "show threads" in a process + * being debugged has no adverse effect. + */ +#define DEBUGSIG SIGURG + +#endif + +/* those are highly dynamic and stored in pools */ +struct sig_handler { + struct list list; + void *handler; /* function to call or task to wake up */ + int arg; /* arg to pass to function, or signals*/ + int flags; /* SIG_F_* */ +}; + +/* one per signal */ +struct signal_descriptor { + int count; /* number of times raised */ + struct list handlers; /* sig_handler */ +}; + +#endif /* _HAPROXY_SIGNAL_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/signal.h b/include/haproxy/signal.h new file mode 100644 index 0000000..25a4ef1 --- /dev/null +++ b/include/haproxy/signal.h @@ -0,0 +1,52 @@ +/* + * include/haproxy/signal.h + * Asynchronous signal delivery functions. + * + * Copyright 2000-2010 Willy Tarreau <w@1wt.eu> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_SIGNAL_H +#define _HAPROXY_SIGNAL_H + +#include <signal.h> + +#include <haproxy/api.h> +#include <haproxy/signal-t.h> +#include <haproxy/task-t.h> +#include <haproxy/thread.h> + +extern int signal_queue_len; +extern struct signal_descriptor signal_state[]; + +__decl_thread(extern HA_SPINLOCK_T signals_lock); + +void signal_handler(int sig); +void __signal_process_queue(void); +void deinit_signals(void); +struct sig_handler *signal_register_fct(int sig, void (*fct)(struct sig_handler *), int arg); +struct sig_handler *signal_register_task(int sig, struct task *task, int reason); +void signal_unregister_handler(struct sig_handler *handler); +void signal_unregister_target(int sig, void *target); +void signal_unregister(int sig); +void haproxy_unblock_signals(void); + +static inline void signal_process_queue() +{ + if (unlikely(signal_queue_len > 0)) + __signal_process_queue(); +} + +#endif /* _HAPROXY_SIGNAL_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/sink-t.h b/include/haproxy/sink-t.h new file mode 100644 index 0000000..79a0dda --- /dev/null +++ b/include/haproxy/sink-t.h @@ -0,0 +1,76 @@ +/* + * include/haproxy/sink-t.h + * This file provides definitions for event sinks + * + * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SINK_T_H +#define _HAPROXY_SINK_T_H + +#include <import/ist.h> +#include <haproxy/api-t.h> +#include <haproxy/log-t.h> + +/* A sink may be of 4 distinct types : + * - file descriptor (such as stdout) + * - ring buffer, readable from CLI + */ +enum sink_type { + SINK_TYPE_NEW, // not yet initialized + SINK_TYPE_FD, // events sent to a file descriptor + SINK_TYPE_BUFFER, // events sent to a ring buffer +}; + +struct sink_forward_target { + struct server *srv; // used server + struct appctx *appctx; // appctx of current session + size_t ofs; // ring buffer reader offset + struct sink *sink; // the associated sink + struct sink_forward_target *next; + __decl_thread(HA_SPINLOCK_T lock); // lock to protect current struct +}; + +/* describes the configuration and current state of an event sink */ +struct sink { + struct list sink_list; // position in the sink list + char *name; // sink name + char *desc; // sink description + char *store; // backing-store file when buffer + enum log_fmt fmt; // format expected by the sink + enum sink_type type; // type of storage + uint32_t maxlen; // max message length (truncated above) + struct proxy* forward_px; // internal proxy used to forward (only set when exclusive to sink) + struct sink_forward_target *sft; // sink forward targets + struct task *forward_task; // task to handle forward targets conns + struct sig_handler *forward_sighandler; /* signal handler */ + struct { + struct ring *ring; // used by ring buffer and STRM sender + unsigned int dropped; // dropped events since last one. + int fd; // fd num for FD type sink + __decl_thread(HA_RWLOCK_T lock); // shared/excl for dropped + } ctx; +}; + +#endif /* _HAPROXY_SINK_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/sink.h b/include/haproxy/sink.h new file mode 100644 index 0000000..3b428a1 --- /dev/null +++ b/include/haproxy/sink.h @@ -0,0 +1,97 @@ +/* + * include/haproxy/sink.h + * This file provides declarations for event sinks management + * + * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SINK_H +#define _HAPROXY_SINK_H + +#include <sys/types.h> +#include <haproxy/sink-t.h> +#include <haproxy/thread.h> + +extern struct list sink_list; + +extern struct proxy *sink_proxies_list; + +struct sink *sink_find(const char *name); +struct sink *sink_new_fd(const char *name, const char *desc, enum log_fmt, int fd); +ssize_t __sink_write(struct sink *sink, struct log_header hdr, size_t maxlen, + const struct ist msg[], size_t nmsg); +int sink_announce_dropped(struct sink *sink, struct log_header hdr); + + +/* tries to send <nmsg> message parts from message array <msg> to sink <sink>. + * Formatting according to the sink's preference is done here, unless sink->fmt + * is unspecified, in which case the caller formatting will be used instead. + * + * It will stop writing at <maxlen> instead of sink->maxlen if <maxlen> is + * positive and inferior to sink->maxlen. + * + * Lost messages are accounted for in the sink's counter. If there + * were lost messages, an attempt is first made to indicate it. + * The function returns the number of Bytes effectively sent or announced. + * or <= 0 in other cases. + */ +static inline ssize_t sink_write(struct sink *sink, struct log_header hdr, + size_t maxlen, const struct ist msg[], size_t nmsg) +{ + ssize_t sent; + + if (unlikely(sink->ctx.dropped > 0)) { + /* We need to take an exclusive lock so that other producers + * don't do the same thing at the same time and above all we + * want to be sure others have finished sending their messages + * so that the dropped event arrives exactly at the right + * position. + */ + HA_RWLOCK_WRLOCK(RING_LOCK, &sink->ctx.lock); + sent = sink_announce_dropped(sink, hdr); + HA_RWLOCK_WRUNLOCK(RING_LOCK, &sink->ctx.lock); + + if (!sent) { + /* we failed, we don't try to send our log as if it + * would pass by chance, we'd get disordered events. + */ + goto fail; + } + } + + HA_RWLOCK_RDLOCK(RING_LOCK, &sink->ctx.lock); + sent = __sink_write(sink, hdr, maxlen, msg, nmsg); + HA_RWLOCK_RDUNLOCK(RING_LOCK, &sink->ctx.lock); + + fail: + if (unlikely(sent <= 0)) + HA_ATOMIC_INC(&sink->ctx.dropped); + + return sent; +} + +struct sink *sink_new_from_srv(struct server *srv, const char *from); +int sink_resolve_logger_buffer(struct logger *logger, char **msg); + +#endif /* _HAPROXY_SINK_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/sock-t.h b/include/haproxy/sock-t.h new file mode 100644 index 0000000..b843d44 --- /dev/null +++ b/include/haproxy/sock-t.h @@ -0,0 +1,37 @@ +/* + * include/haproxy/sock-t.h + * This file contains type definitions for native (BSD-compatible) sockets. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SOCK_T_H +#define _HAPROXY_SOCK_T_H + +#include <sys/socket.h> +#include <sys/types.h> + +#include <haproxy/api-t.h> + +#endif /* _HAPROXY_SOCK_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/sock.h b/include/haproxy/sock.h new file mode 100644 index 0000000..60e81ec --- /dev/null +++ b/include/haproxy/sock.h @@ -0,0 +1,62 @@ +/* + * include/haproxy/sock.h + * This file contains declarations for native (BSD-compatible) sockets. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SOCK_H +#define _HAPROXY_SOCK_H + +#include <sys/socket.h> +#include <sys/types.h> + +#include <haproxy/api.h> +#include <haproxy/connection-t.h> +#include <haproxy/listener-t.h> +#include <haproxy/sock-t.h> + +int sock_create_server_socket(struct connection *conn); +void sock_enable(struct receiver *rx); +void sock_disable(struct receiver *rx); +void sock_unbind(struct receiver *rx); +int sock_get_src(int fd, struct sockaddr *sa, socklen_t salen, int dir); +int sock_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir); +int sock_get_old_sockets(const char *unixsocket); +int sock_find_compatible_fd(const struct receiver *rx); +void sock_drop_unused_old_sockets(); +int sock_accepting_conn(const struct receiver *rx); +struct connection *sock_accept_conn(struct listener *l, int *status); +void sock_accept_iocb(int fd); +void sock_conn_ctrl_init(struct connection *conn); +void sock_conn_ctrl_close(struct connection *conn); +void sock_conn_iocb(int fd); +int sock_conn_check(struct connection *conn); +int sock_drain(struct connection *conn); +int sock_check_events(struct connection *conn, int event_type); +void sock_ignore_events(struct connection *conn, int event_type); +int _sock_supports_reuseport(const struct proto_fam *fam, int type, int protocol); + + +#endif /* _HAPROXY_SOCK_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/sock_inet.h b/include/haproxy/sock_inet.h new file mode 100644 index 0000000..6f07e63 --- /dev/null +++ b/include/haproxy/sock_inet.h @@ -0,0 +1,49 @@ +/* + * include/haproxy/sock_inet.h + * This file contains declarations for AF_INET & AF_INET6 sockets. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SOCK_INET_H +#define _HAPROXY_SOCK_INET_H + +#include <sys/socket.h> +#include <sys/types.h> + +#include <haproxy/api.h> + +extern int sock_inet6_v6only_default; +extern int sock_inet_tcp_maxseg_default; +extern int sock_inet6_tcp_maxseg_default; + +extern struct proto_fam proto_fam_inet4; +extern struct proto_fam proto_fam_inet6; + +/* external types */ +struct receiver; + +int sock_inet4_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b); +int sock_inet6_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b); +void sock_inet_set_port(struct sockaddr_storage *addr, int port); +int sock_inet_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir); +int sock_inet_is_foreign(int fd, sa_family_t family); +int sock_inet4_make_foreign(int fd); +int sock_inet6_make_foreign(int fd); +int sock_inet_bind_receiver(struct receiver *rx, char **errmsg); + +#endif /* _HAPROXY_SOCK_INET_H */ diff --git a/include/haproxy/sock_unix.h b/include/haproxy/sock_unix.h new file mode 100644 index 0000000..9934341 --- /dev/null +++ b/include/haproxy/sock_unix.h @@ -0,0 +1,36 @@ +/* + * include/haproxy/sock_unix.h + * This file contains declarations for AF_UNIX sockets. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SOCK_UNIX_H +#define _HAPROXY_SOCK_UNIX_H + +#include <sys/socket.h> +#include <sys/types.h> + +#include <haproxy/api.h> +#include <haproxy/receiver-t.h> + +extern struct proto_fam proto_fam_unix; + +int sock_unix_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b); +int sock_unix_bind_receiver(struct receiver *rx, char **errmsg); + +#endif /* _HAPROXY_SOCK_UNIX_H */ diff --git a/include/haproxy/spoe-t.h b/include/haproxy/spoe-t.h new file mode 100644 index 0000000..2732443 --- /dev/null +++ b/include/haproxy/spoe-t.h @@ -0,0 +1,413 @@ +/* + * include/haproxy/spoe-t.h + * Macros, variables and structures for the SPOE filter. + * + * Copyright (C) 2017 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SPOE_T_H +#define _HAPROXY_SPOE_T_H + +#include <sys/time.h> + +#include <haproxy/buf-t.h> +#include <haproxy/dynbuf-t.h> +#include <haproxy/filters-t.h> +#include <haproxy/freq_ctr-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/sample-t.h> +#include <haproxy/stream-t.h> +#include <haproxy/task-t.h> +#include <haproxy/thread-t.h> + +/* Type of list of messages */ +#define SPOE_MSGS_BY_EVENT 0x01 +#define SPOE_MSGS_BY_GROUP 0x02 + +/* Flags set on the SPOE agent */ +#define SPOE_FL_CONT_ON_ERR 0x00000001 /* Do not stop events processing when an error occurred */ +#define SPOE_FL_PIPELINING 0x00000002 /* Set when SPOE agent supports pipelining (set by default) */ +#define SPOE_FL_ASYNC 0x00000004 /* Set when SPOE agent supports async (set by default) */ +#define SPOE_FL_SND_FRAGMENTATION 0x00000008 /* Set when SPOE agent supports sending fragmented payload */ +#define SPOE_FL_RCV_FRAGMENTATION 0x00000010 /* Set when SPOE agent supports receiving fragmented payload */ +#define SPOE_FL_FORCE_SET_VAR 0x00000020 /* Set when SPOE agent will set all variables from agent (and not only known variables) */ + +/* Flags set on the SPOE context */ +#define SPOE_CTX_FL_CLI_CONNECTED 0x00000001 /* Set after that on-client-session event was processed */ +#define SPOE_CTX_FL_SRV_CONNECTED 0x00000002 /* Set after that on-server-session event was processed */ +#define SPOE_CTX_FL_REQ_PROCESS 0x00000004 /* Set when SPOE is processing the request */ +#define SPOE_CTX_FL_RSP_PROCESS 0x00000008 /* Set when SPOE is processing the response */ +#define SPOE_CTX_FL_FRAGMENTED 0x00000010 /* Set when a fragmented frame is processing */ + +#define SPOE_CTX_FL_PROCESS (SPOE_CTX_FL_REQ_PROCESS|SPOE_CTX_FL_RSP_PROCESS) + +/* Flags set on the SPOE applet */ +#define SPOE_APPCTX_FL_PIPELINING 0x00000001 /* Set if pipelining is supported */ +#define SPOE_APPCTX_FL_ASYNC 0x00000002 /* Set if asynchronous frames is supported */ +#define SPOE_APPCTX_FL_FRAGMENTATION 0x00000004 /* Set if fragmentation is supported */ + +#define SPOE_APPCTX_ERR_NONE 0x00000000 /* no error yet, leave it to zero */ +#define SPOE_APPCTX_ERR_TOUT 0x00000001 /* SPOE applet timeout */ + +/* Flags set on the SPOE frame */ +#define SPOE_FRM_FL_FIN 0x00000001 +#define SPOE_FRM_FL_ABRT 0x00000002 + +/* Masks to get data type or flags value */ +#define SPOE_DATA_T_MASK 0x0F +#define SPOE_DATA_FL_MASK 0xF0 + +/* Flags to set Boolean values */ +#define SPOE_DATA_FL_FALSE 0x00 +#define SPOE_DATA_FL_TRUE 0x10 + +/* All possible states for a SPOE context */ +enum spoe_ctx_state { + SPOE_CTX_ST_NONE = 0, + SPOE_CTX_ST_READY, + SPOE_CTX_ST_ENCODING_MSGS, + SPOE_CTX_ST_SENDING_MSGS, + SPOE_CTX_ST_WAITING_ACK, + SPOE_CTX_ST_DONE, + SPOE_CTX_ST_ERROR, +}; + +/* All possible states for a SPOE applet */ +enum spoe_appctx_state { + SPOE_APPCTX_ST_CONNECT = 0, + SPOE_APPCTX_ST_CONNECTING, + SPOE_APPCTX_ST_IDLE, + SPOE_APPCTX_ST_PROCESSING, + SPOE_APPCTX_ST_SENDING_FRAG_NOTIFY, + SPOE_APPCTX_ST_WAITING_SYNC_ACK, + SPOE_APPCTX_ST_DISCONNECT, + SPOE_APPCTX_ST_DISCONNECTING, + SPOE_APPCTX_ST_EXIT, + SPOE_APPCTX_ST_END, +}; + +/* All supported SPOE actions */ +enum spoe_action_type { + SPOE_ACT_T_SET_VAR = 1, + SPOE_ACT_T_UNSET_VAR, + SPOE_ACT_TYPES, +}; + +/* All supported SPOE events */ +enum spoe_event { + SPOE_EV_NONE = 0, + + /* Request events */ + SPOE_EV_ON_CLIENT_SESS = 1, + SPOE_EV_ON_TCP_REQ_FE, + SPOE_EV_ON_TCP_REQ_BE, + SPOE_EV_ON_HTTP_REQ_FE, + SPOE_EV_ON_HTTP_REQ_BE, + + /* Response events */ + SPOE_EV_ON_SERVER_SESS, + SPOE_EV_ON_TCP_RSP, + SPOE_EV_ON_HTTP_RSP, + + SPOE_EV_EVENTS +}; + +/* Errors triggered by streams */ +enum spoe_context_error { + SPOE_CTX_ERR_NONE = 0, + SPOE_CTX_ERR_TOUT, + SPOE_CTX_ERR_RES, + SPOE_CTX_ERR_TOO_BIG, + SPOE_CTX_ERR_FRAG_FRAME_ABRT, + SPOE_CTX_ERR_INTERRUPT, + SPOE_CTX_ERR_UNKNOWN = 255, + SPOE_CTX_ERRS, +}; + +/* Errors triggered by SPOE applet */ +enum spoe_frame_error { + SPOE_FRM_ERR_NONE = 0, + SPOE_FRM_ERR_IO, + SPOE_FRM_ERR_TOUT, + SPOE_FRM_ERR_TOO_BIG, + SPOE_FRM_ERR_INVALID, + SPOE_FRM_ERR_NO_VSN, + SPOE_FRM_ERR_NO_FRAME_SIZE, + SPOE_FRM_ERR_NO_CAP, + SPOE_FRM_ERR_BAD_VSN, + SPOE_FRM_ERR_BAD_FRAME_SIZE, + SPOE_FRM_ERR_FRAG_NOT_SUPPORTED, + SPOE_FRM_ERR_INTERLACED_FRAMES, + SPOE_FRM_ERR_FRAMEID_NOTFOUND, + SPOE_FRM_ERR_RES, + SPOE_FRM_ERR_UNKNOWN = 99, + SPOE_FRM_ERRS, +}; + +/* Scopes used for variables set by agents. It is a way to be agnotic to vars + * scope. */ +enum spoe_vars_scope { + SPOE_SCOPE_PROC = 0, /* <=> SCOPE_PROC */ + SPOE_SCOPE_SESS, /* <=> SCOPE_SESS */ + SPOE_SCOPE_TXN, /* <=> SCOPE_TXN */ + SPOE_SCOPE_REQ, /* <=> SCOPE_REQ */ + SPOE_SCOPE_RES, /* <=> SCOPE_RES */ +}; + +/* Frame Types sent by HAProxy and by agents */ +enum spoe_frame_type { + SPOE_FRM_T_UNSET = 0, + + /* Frames sent by HAProxy */ + SPOE_FRM_T_HAPROXY_HELLO = 1, + SPOE_FRM_T_HAPROXY_DISCON, + SPOE_FRM_T_HAPROXY_NOTIFY, + + /* Frames sent by the agents */ + SPOE_FRM_T_AGENT_HELLO = 101, + SPOE_FRM_T_AGENT_DISCON, + SPOE_FRM_T_AGENT_ACK +}; + +/* All supported data types */ +enum spoe_data_type { + SPOE_DATA_T_NULL = 0, + SPOE_DATA_T_BOOL, + SPOE_DATA_T_INT32, + SPOE_DATA_T_UINT32, + SPOE_DATA_T_INT64, + SPOE_DATA_T_UINT64, + SPOE_DATA_T_IPV4, + SPOE_DATA_T_IPV6, + SPOE_DATA_T_STR, + SPOE_DATA_T_BIN, + SPOE_DATA_TYPES +}; + + +/* Describe an argument that will be linked to a message. It is a sample fetch, + * with an optional name. */ +struct spoe_arg { + char *name; /* Name of the argument, may be NULL */ + unsigned int name_len; /* The name length, 0 if NULL */ + struct sample_expr *expr; /* Sample expression */ + struct list list; /* Used to chain SPOE args */ +}; + +/* Used during the config parsing only because, when a SPOE agent section is + * parsed, messages/groups can be undefined. */ +struct spoe_placeholder { + char *id; /* SPOE placeholder id */ + struct list list; /* Use to chain SPOE placeholders */ +}; + +/* Used during the config parsing, when SPOE agent section is parsed, to + * register some variable names. */ +struct spoe_var_placeholder { + char *name; /* The variable name */ + struct list list; /* Use to chain SPOE var placeholders */ +}; + +/* Describe a message that will be sent in a NOTIFY frame. A message has a name, + * an argument list (see above) and it is linked to a specific event. */ +struct spoe_message { + char *id; /* SPOE message id */ + unsigned int id_len; /* The message id length */ + struct spoe_agent *agent; /* SPOE agent owning this SPOE message */ + struct spoe_group *group; /* SPOE group owning this SPOE message (can be NULL) */ + struct { + char *file; /* file where the SPOE message appears */ + int line; /* line where the SPOE message appears */ + } conf; /* config information */ + unsigned int nargs; /* # of arguments */ + struct list args; /* Arguments added when the SPOE messages is sent */ + struct list list; /* Used to chain SPOE messages */ + struct list by_evt; /* By event list */ + struct list by_grp; /* By group list */ + + struct list acls; /* ACL declared on this message */ + struct acl_cond *cond; /* acl condition to meet */ + enum spoe_event event; /* SPOE_EV_* */ +}; + +/* Describe a group of messages that will be sent in a NOTIFY frame. A group has + * a name and a list of messages. It can be used by HAProxy, outside events + * processing, mainly in (tcp|http) rules. */ +struct spoe_group { + char *id; /* SPOE group id */ + struct spoe_agent *agent; /* SPOE agent owning this SPOE group */ + struct { + char *file; /* file where the SPOE group appears */ + int line; /* line where the SPOE group appears */ + } conf; /* config information */ + + struct list phs; /* List of placeholders used during conf parsing */ + struct list messages; /* List of SPOE messages that will be sent by this + * group */ + + struct list list; /* Used to chain SPOE groups */ +}; + +/* Describe a SPOE agent. */ +struct spoe_agent { + char *id; /* SPOE agent id (name) */ + struct { + char *file; /* file where the SPOE agent appears */ + int line; /* line where the SPOE agent appears */ + } conf; /* config information */ + union { + struct proxy *be; /* Backend used by this agent */ + char *name; /* Backend name used during conf parsing */ + } b; + struct { + unsigned int hello; /* Max time to receive AGENT-HELLO frame (in SPOE applet) */ + unsigned int idle; /* Max Idle timeout (in SPOE applet) */ + unsigned int processing; /* Max time to process an event (in the main stream) */ + } timeout; + + /* Config info */ + struct spoe_config *spoe_conf; /* SPOE filter config */ + char *var_pfx; /* Prefix used for vars set by the agent */ + char *var_on_error; /* Variable to set when an error occurred, in the TXN scope */ + char *var_t_process; /* Variable to set to report the processing time of the last event/group, in the TXN scope */ + char *var_t_total; /* Variable to set to report the cumulative processing time, in the TXN scope */ + unsigned int flags; /* SPOE_FL_* */ + unsigned int cps_max; /* Maximum # of connections per second */ + unsigned int eps_max; /* Maximum # of errors per second */ + unsigned int max_frame_size; /* Maximum frame size for this agent, before any negotiation */ + unsigned int max_fpa; /* Maximum # of frames handled per applet at once */ + + struct list events[SPOE_EV_EVENTS]; /* List of SPOE messages that will be sent + * for each supported events */ + + struct list groups; /* List of available SPOE groups */ + + struct list messages; /* list of all messages attached to this SPOE agent */ + + /* running info */ + struct { + char *engine_id; /* engine-id string */ + unsigned int frame_size; /* current maximum frame size, only used to encode messages */ + unsigned int processing; + struct freq_ctr processing_per_sec; + + struct freq_ctr conn_per_sec; /* connections per second */ + struct freq_ctr err_per_sec; /* connection errors per second */ + + unsigned int idles; /* # of idle applets */ + struct eb_root idle_applets; /* idle SPOE applets available to process data */ + struct list applets; /* all SPOE applets for this agent */ + struct list sending_queue; /* Queue of streams waiting to send data */ + struct list waiting_queue; /* Queue of streams waiting for a ack, in async mode */ + __decl_thread(HA_SPINLOCK_T lock); + } *rt; + + struct { + unsigned int applets; /* # of SPOE applets */ + unsigned int idles; /* # of idle applets */ + unsigned int nb_sending; /* # of streams waiting to send data */ + unsigned int nb_waiting; /* # of streams waiting for a ack */ + unsigned long long nb_processed; /* # of frames processed by the SPOE */ + unsigned long long nb_errors; /* # of errors during the processing */ + } counters; +}; + +/* SPOE filter configuration */ +struct spoe_config { + char *id; /* The SPOE engine name. If undefined in HAProxy config, + * it will be set with the SPOE agent name */ + struct proxy *proxy; /* Proxy owning the filter */ + struct spoe_agent *agent; /* Agent used by this filter */ + struct proxy agent_fe; /* Agent frontend */ +}; + +/* SPOE context attached to a stream. It is the main structure that handles the + * processing offload */ +struct spoe_context { + struct filter *filter; /* The SPOE filter */ + struct stream *strm; /* The stream that should be offloaded */ + + struct list *events; /* List of messages that will be sent during the stream processing */ + struct list *groups; /* List of available SPOE group */ + + struct buffer buffer; /* Buffer used to store a encoded messages */ + struct buffer_wait buffer_wait; /* position in the list of resources waiting for a buffer */ + struct list list; + + enum spoe_ctx_state state; /* SPOE_CTX_ST_* */ + unsigned int flags; /* SPOE_CTX_FL_* */ + unsigned int status_code; /* SPOE_CTX_ERR_* */ + + unsigned int stream_id; /* stream_id and frame_id are used */ + unsigned int frame_id; /* to map NOTIFY and ACK frames */ + unsigned int process_exp; /* expiration date to process an event */ + + struct spoe_appctx *spoe_appctx; /* SPOE appctx sending the current frame */ + struct { + struct spoe_message *curmsg; /* SPOE message from which to resume encoding */ + struct spoe_arg *curarg; /* SPOE arg in <curmsg> from which to resume encoding */ + unsigned int curoff; /* offset in <curarg> from which to resume encoding */ + unsigned int curlen; /* length of <curarg> need to be encode, for SMP_F_MAY_CHANGE data */ + unsigned int flags; /* SPOE_FRM_FL_* */ + } frag_ctx; /* Info about fragmented frames, valid on if SPOE_CTX_FL_FRAGMENTED is set */ + + struct { + ullong start_ts; /* start date of the current event/group */ + ullong request_ts; /* date the frame processing starts (reset for each frag) */ + ullong queue_ts; /* date the frame is queued (reset for each frag) */ + ullong wait_ts; /* date the stream starts waiting for a response */ + ullong response_ts; /* date the response processing starts */ + long t_request; /* delay to encode and push the frame in queue (cumulative for frags) */ + long t_queue; /* delay before the frame gets out the sending queue (cumulative for frags) */ + long t_waiting; /* delay before the response is received */ + long t_response; /* delay to process the response (from the stream pov) */ + long t_process; /* processing time of the last event/group */ + unsigned long t_total; /* cumulative processing time */ + } stats; /* Stats for this stream */ +}; + +/* SPOE context inside a appctx */ +struct spoe_appctx { + struct appctx *owner; /* the owner */ + struct task *task; /* task to handle applet timeouts */ + struct spoe_agent *agent; /* agent on which the applet is attached */ + + unsigned int version; /* the negotiated version */ + unsigned int max_frame_size; /* the negotiated max-frame-size value */ + unsigned int flags; /* SPOE_APPCTX_FL_* */ + + unsigned int status_code; /* SPOE_FRM_ERR_* */ +#if defined(DEBUG_SPOE) || defined(DEBUG_FULL) + char *reason; /* Error message, used for debugging only */ + int rlen; /* reason length */ +#endif + + struct buffer buffer; /* Buffer used to store a encoded messages */ + struct buffer_wait buffer_wait; /* position in the list of resources waiting for a buffer */ + struct list waiting_queue; /* list of streams waiting for a ACK frame, in sync and pipelining mode */ + struct list list; /* next spoe appctx for the same agent */ + struct eb32_node node; /* node used for applets tree */ + unsigned int cur_fpa; + + struct { + struct spoe_context *ctx; /* SPOE context owning the fragmented frame */ + unsigned int cursid; /* stream-id of the fragmented frame. used if the processing is aborted */ + unsigned int curfid; /* frame-id of the fragmented frame. used if the processing is aborted */ + } frag_ctx; /* Info about fragmented frames, unused for unfragmented frames */ +}; + +#endif /* _HAPROXY_SPOE_T_H */ diff --git a/include/haproxy/spoe.h b/include/haproxy/spoe.h new file mode 100644 index 0000000..7cd0987 --- /dev/null +++ b/include/haproxy/spoe.h @@ -0,0 +1,351 @@ +/* + * include/haproxy/spoe.h + * Encoding/Decoding functions for the SPOE filters (and other helpers). + * + * Copyright (C) 2017 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SPOE_H +#define _HAPROXY_SPOE_H + +#include <haproxy/api.h> +#include <haproxy/intops.h> +#include <haproxy/sample-t.h> +#include <haproxy/spoe-t.h> + + +/* Encode a buffer. Its length <len> is encoded as a varint, followed by a copy + * of <str>. It must have enough space in <*buf> to encode the buffer, else an + * error is triggered. + * On success, it returns <len> and <*buf> is moved after the encoded value. If + * an error occurred, it returns -1. */ +static inline int +spoe_encode_buffer(const char *str, size_t len, char **buf, char *end) +{ + char *p = *buf; + int ret; + + if (p >= end) + return -1; + + if (!len) { + *p++ = 0; + *buf = p; + return 0; + } + + ret = encode_varint(len, &p, end); + if (ret == -1 || p + len > end) + return -1; + + memcpy(p, str, len); + *buf = p + len; + return len; +} + +/* Encode a buffer, possibly partially. It does the same thing than + * 'spoe_encode_buffer', but if there is not enough space, it does not fail. + * On success, it returns the number of copied bytes and <*buf> is moved after + * the encoded value. If an error occurred, it returns -1. */ +static inline int +spoe_encode_frag_buffer(const char *str, size_t len, char **buf, char *end) +{ + char *p = *buf; + int ret; + + if (p >= end) + return -1; + + if (!len) { + *p++ = 0; + *buf = p; + return 0; + } + + ret = encode_varint(len, &p, end); + if (ret == -1 || p >= end) + return -1; + + ret = (p+len < end) ? len : (end - p); + memcpy(p, str, ret); + *buf = p + ret; + return ret; +} + +/* Decode a buffer. The buffer length is decoded and saved in <*len>. <*str> + * points on the first byte of the buffer. + * On success, it returns the buffer length and <*buf> is moved after the + * encoded buffer. Otherwise, it returns -1. */ +static inline int +spoe_decode_buffer(char **buf, char *end, char **str, uint64_t *len) +{ + char *p = *buf; + uint64_t sz; + int ret; + + *str = NULL; + *len = 0; + + ret = decode_varint(&p, end, &sz); + if (ret == -1 || p + sz > end) + return -1; + + *str = p; + *len = sz; + *buf = p + sz; + return sz; +} + +/* Encode a typed data using value in <smp>. On success, it returns the number + * of copied bytes and <*buf> is moved after the encoded value. If an error + * occurred, it returns -1. + * + * If the value is too big to be encoded, depending on its type, then encoding + * failed or the value is partially encoded. Only strings and binaries can be + * partially encoded. */ +static inline int +spoe_encode_data(struct sample *smp, char **buf, char *end) +{ + char *p = *buf; + int ret; + + if (p >= end) + return -1; + + if (smp == NULL) { + *p++ = SPOE_DATA_T_NULL; + goto end; + } + + switch (smp->data.type) { + case SMP_T_BOOL: + *p = SPOE_DATA_T_BOOL; + *p++ |= ((!smp->data.u.sint) ? SPOE_DATA_FL_FALSE : SPOE_DATA_FL_TRUE); + break; + + case SMP_T_SINT: + *p++ = SPOE_DATA_T_INT64; + if (encode_varint(smp->data.u.sint, &p, end) == -1) + return -1; + break; + + case SMP_T_IPV4: + if (p + 5 > end) + return -1; + *p++ = SPOE_DATA_T_IPV4; + memcpy(p, &smp->data.u.ipv4, 4); + p += 4; + break; + + case SMP_T_IPV6: + if (p + 17 > end) + return -1; + *p++ = SPOE_DATA_T_IPV6; + memcpy(p, &smp->data.u.ipv6, 16); + p += 16; + break; + + case SMP_T_STR: + case SMP_T_BIN: { + /* If defined, get length and offset of the sample by reading the sample + * context. ctx.a[0] is the pointer to the length and ctx.a[1] is the + * pointer to the offset. If the offset is greater than 0, it means the + * sample is partially encoded. In this case, we only need to encode the + * remaining. When all the sample is encoded, the offset is reset to 0. + * So the caller know it can try to encode the next sample. */ + struct buffer *chk = &smp->data.u.str; + unsigned int *len = smp->ctx.a[0]; + unsigned int *off = smp->ctx.a[1]; + + if (!*off) { + /* First evaluation of the sample : encode the + * type (string or binary), the buffer length + * (as a varint) and at least 1 byte of the + * buffer. */ + struct buffer *chk = &smp->data.u.str; + + *p++ = (smp->data.type == SMP_T_STR) + ? SPOE_DATA_T_STR + : SPOE_DATA_T_BIN; + ret = spoe_encode_frag_buffer(chk->area, + chk->data, &p, + end); + if (ret == -1) + return -1; + *len = chk->data; + } + else { + /* The sample has been fragmented, encode remaining data */ + ret = MIN(*len - *off, end - p); + memcpy(p, chk->area + *off, ret); + p += ret; + } + /* Now update <*off> */ + if (ret + *off != *len) + *off += ret; + else + *off = 0; + break; + } + + case SMP_T_METH: { + char *m; + size_t len; + + *p++ = SPOE_DATA_T_STR; + switch (smp->data.u.meth.meth) { + case HTTP_METH_OPTIONS: m = "OPTIONS"; len = 7; break; + case HTTP_METH_GET : m = "GET"; len = 3; break; + case HTTP_METH_HEAD : m = "HEAD"; len = 4; break; + case HTTP_METH_POST : m = "POST"; len = 4; break; + case HTTP_METH_PUT : m = "PUT"; len = 3; break; + case HTTP_METH_DELETE : m = "DELETE"; len = 6; break; + case HTTP_METH_TRACE : m = "TRACE"; len = 5; break; + case HTTP_METH_CONNECT: m = "CONNECT"; len = 7; break; + + default : + m = smp->data.u.meth.str.area; + len = smp->data.u.meth.str.data; + } + if (spoe_encode_buffer(m, len, &p, end) == -1) + return -1; + break; + } + + default: + *p++ = SPOE_DATA_T_NULL; + break; + } + + end: + ret = (p - *buf); + *buf = p; + return ret; +} + +/* Skip a typed data. If an error occurred, -1 is returned, otherwise the number + * of skipped bytes is returned and the <*buf> is moved after skipped data. + * + * A types data is composed of a type (1 byte) and corresponding data: + * - boolean: non additional data (0 bytes) + * - integers: a variable-length integer (see decode_varint) + * - ipv4: 4 bytes + * - ipv6: 16 bytes + * - binary and string: a buffer prefixed by its size, a variable-length + * integer (see spoe_decode_buffer) */ +static inline int +spoe_skip_data(char **buf, char *end) +{ + char *str, *p = *buf; + int type, ret; + uint64_t v, sz; + + if (p >= end) + return -1; + + type = *p++; + switch (type & SPOE_DATA_T_MASK) { + case SPOE_DATA_T_BOOL: + break; + case SPOE_DATA_T_INT32: + case SPOE_DATA_T_INT64: + case SPOE_DATA_T_UINT32: + case SPOE_DATA_T_UINT64: + if (decode_varint(&p, end, &v) == -1) + return -1; + break; + case SPOE_DATA_T_IPV4: + if (p+4 > end) + return -1; + p += 4; + break; + case SPOE_DATA_T_IPV6: + if (p+16 > end) + return -1; + p += 16; + break; + case SPOE_DATA_T_STR: + case SPOE_DATA_T_BIN: + /* All the buffer must be skipped */ + if (spoe_decode_buffer(&p, end, &str, &sz) == -1) + return -1; + break; + } + + ret = (p - *buf); + *buf = p; + return ret; +} + +/* Decode a typed data and fill <smp>. If an error occurred, -1 is returned, + * otherwise the number of read bytes is returned and <*buf> is moved after the + * decoded data. See spoe_skip_data for details. */ +static inline int +spoe_decode_data(char **buf, char *end, struct sample *smp) +{ + char *str, *p = *buf; + int type, r = 0; + uint64_t sz; + + if (p >= end) + return -1; + + type = *p++; + switch (type & SPOE_DATA_T_MASK) { + case SPOE_DATA_T_BOOL: + smp->data.u.sint = ((type & SPOE_DATA_FL_MASK) == SPOE_DATA_FL_TRUE); + smp->data.type = SMP_T_BOOL; + break; + case SPOE_DATA_T_INT32: + case SPOE_DATA_T_INT64: + case SPOE_DATA_T_UINT32: + case SPOE_DATA_T_UINT64: + if (decode_varint(&p, end, (uint64_t *)&smp->data.u.sint) == -1) + return -1; + smp->data.type = SMP_T_SINT; + break; + case SPOE_DATA_T_IPV4: + if (p+4 > end) + return -1; + smp->data.type = SMP_T_IPV4; + memcpy(&smp->data.u.ipv4, p, 4); + p += 4; + break; + case SPOE_DATA_T_IPV6: + if (p+16 > end) + return -1; + memcpy(&smp->data.u.ipv6, p, 16); + smp->data.type = SMP_T_IPV6; + p += 16; + break; + case SPOE_DATA_T_STR: + case SPOE_DATA_T_BIN: + /* All the buffer must be decoded */ + if (spoe_decode_buffer(&p, end, &str, &sz) == -1) + return -1; + smp->data.u.str.area = str; + smp->data.u.str.data = sz; + smp->data.type = (type == SPOE_DATA_T_STR) ? SMP_T_STR : SMP_T_BIN; + break; + } + + r = (p - *buf); + *buf = p; + return r; +} + +#endif /* _HAPROXY_SPOE_H */ diff --git a/include/haproxy/ssl_ckch-t.h b/include/haproxy/ssl_ckch-t.h new file mode 100644 index 0000000..0002b84 --- /dev/null +++ b/include/haproxy/ssl_ckch-t.h @@ -0,0 +1,161 @@ +/* + * include/haproxy/ssl_ckch-t.h + * ckch structures + * + * Copyright (C) 2020 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +/* The ckch (cert key and chain) structures are a group of structures used to + * cache and manipulate the certificates files loaded from the configuration + * file and the CLI Every certificate change made in a SSL_CTX should be done + * in these structures before being applied to a SSL_CTX. + * + * The complete architecture is described in doc/internals/ssl_cert.dia + */ + + +#ifndef _HAPROXY_SSL_CKCH_T_H +#define _HAPROXY_SSL_CKCH_T_H +#ifdef USE_OPENSSL + +#include <import/ebtree-t.h> +#include <haproxy/buf-t.h> +#include <haproxy/openssl-compat.h> + +/* This is used to preload the certificate, private key + * and Cert Chain of a file passed in via the crt + * argument + * + * This way, we do not have to read the file multiple times + * + * This structure is the base one, in the case of a multi-cert bundle, we + * allocate 1 structure per type. + */ +struct ckch_data { + X509 *cert; + EVP_PKEY *key; + STACK_OF(X509) *chain; + HASSL_DH *dh; + struct buffer *sctl; + struct buffer *ocsp_response; + X509 *ocsp_issuer; + OCSP_CERTID *ocsp_cid; + int ocsp_update_mode; +}; + +/* + * this is used to store 1 to SSL_SOCK_NUM_KEYTYPES cert_key_and_chain and + * metadata. + * + * "ckch" for cert, key and chain. + * + * XXX: Once we remove the multi-cert bundle support, we could merge this structure + * with the cert_key_and_chain one. + */ +struct ckch_store { + struct ckch_data *data; + struct list ckch_inst; /* list of ckch_inst which uses this ckch_node */ + struct list crtlist_entry; /* list of entries which use this store */ + struct ebmb_node node; + char path[VAR_ARRAY]; +}; + +/* forward declarations for ckch_inst */ +struct ssl_bind_conf; +struct crtlist_entry; + + +/* Used to keep a list of all the instances using a specific cafile_entry. + * It enables to link instances regardless of how they are using the CA file + * (either via the ca-file, ca-verify-file or crl-file option). */ +struct ckch_inst_link { + struct ckch_inst *ckch_inst; + struct list list; +}; + +/* Used to keep in a ckch instance a list of all the ckch_inst_link which + * reference it. This way, when deleting a ckch_inst, we can ensure that no + * dangling reference on it will remain. */ +struct ckch_inst_link_ref { + struct ckch_inst_link *link; + struct list list; +}; + +/* + * This structure describe a ckch instance. An instance is generated for each + * bind_conf. The instance contains a linked list of the sni ctx which uses + * the ckch in this bind_conf. + */ +struct ckch_inst { + struct bind_conf *bind_conf; /* pointer to the bind_conf that uses this ckch_inst */ + struct ssl_bind_conf *ssl_conf; /* pointer to the ssl_conf which is used by every sni_ctx of this inst */ + struct ckch_store *ckch_store; /* pointer to the store used to generate this inst */ + struct crtlist_entry *crtlist_entry; /* pointer to the crtlist_entry used, or NULL */ + struct server *server; /* pointer to the server if is_server_instance is set, NULL otherwise */ + SSL_CTX *ctx; /* pointer to the SSL context used by this instance */ + unsigned int is_default:1; /* This instance is used as the default ctx for this bind_conf */ + unsigned int is_server_instance:1; /* This instance is used by a backend server */ + /* space for more flag there */ + struct list sni_ctx; /* list of sni_ctx using this ckch_inst */ + struct list by_ckchs; /* chained in ckch_store's list of ckch_inst */ + struct list by_crtlist_entry; /* chained in crtlist_entry list of inst */ + struct list cafile_link_refs; /* list of ckch_inst_link pointing to this instance */ +}; + + +/* Option through which a cafile_entry was created, either + * ca-file/ca-verify-file or crl-file. */ +enum cafile_type { + CAFILE_CERT, + CAFILE_CRL +}; + +/* + * deduplicate cafile (and crlfile) + */ +struct cafile_entry { + X509_STORE *ca_store; + STACK_OF(X509_NAME) *ca_list; + struct list ckch_inst_link; /* list of ckch_inst which use this CA file entry */ + enum cafile_type type; + struct ebmb_node node; + char path[0]; +}; + +enum { + CERT_TYPE_PEM = 0, + CERT_TYPE_KEY, +#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) || defined OPENSSL_IS_BORINGSSL) + CERT_TYPE_OCSP, +#endif + CERT_TYPE_ISSUER, +#ifdef HAVE_SSL_SCTL + CERT_TYPE_SCTL, +#endif + CERT_TYPE_MAX, +}; + +struct cert_exts { + const char *ext; + int type; + int (*load)(const char *path, char *payload, struct ckch_data *data, char **err); + /* add a parsing callback */ +}; + +#endif /* USE_OPENSSL */ +#endif /* _HAPROXY_SSL_CKCH_T_H */ diff --git a/include/haproxy/ssl_ckch.h b/include/haproxy/ssl_ckch.h new file mode 100644 index 0000000..64ac3df --- /dev/null +++ b/include/haproxy/ssl_ckch.h @@ -0,0 +1,75 @@ +/* + * include/haproxy/ssl_ckch.h + * ckch function prototypes + * + * Copyright (C) 2020 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SSL_CKCH_H +#define _HAPROXY_SSL_CKCH_H +#ifdef USE_OPENSSL + +#include <haproxy/ssl_ckch-t.h> + +/* cert_key_and_chain functions */ + +int ssl_sock_load_files_into_ckch(const char *path, struct ckch_data *data, char **err); +int ssl_sock_load_pem_into_ckch(const char *path, char *buf, struct ckch_data *datackch , char **err); +void ssl_sock_free_cert_key_and_chain_contents(struct ckch_data *data); + +int ssl_sock_load_key_into_ckch(const char *path, char *buf, struct ckch_data *data , char **err); +int ssl_sock_load_ocsp_response_from_file(const char *ocsp_path, char *buf, struct ckch_data *data, char **err); +int ssl_sock_load_sctl_from_file(const char *sctl_path, char *buf, struct ckch_data *data, char **err); +int ssl_sock_load_issuer_file_into_ckch(const char *path, char *buf, struct ckch_data *data, char **err); + +/* ckch_store functions */ +struct ckch_store *ckchs_load_cert_file(char *path, char **err); +struct ckch_store *ckchs_lookup(char *path); +struct ckch_store *ckchs_dup(const struct ckch_store *src); +struct ckch_store *ckch_store_new(const char *filename); +void ckch_store_free(struct ckch_store *store); +void ckch_store_replace(struct ckch_store *old_ckchs, struct ckch_store *new_ckchs); + +/* ckch_inst functions */ +void ckch_inst_free(struct ckch_inst *inst); +struct ckch_inst *ckch_inst_new(); +int ckch_inst_new_load_store(const char *path, struct ckch_store *ckchs, struct bind_conf *bind_conf, + struct ssl_bind_conf *ssl_conf, char **sni_filter, int fcount, struct ckch_inst **ckchi, char **err); +int ckch_inst_new_load_srv_store(const char *path, struct ckch_store *ckchs, + struct ckch_inst **ckchi, char **err); +int ckch_inst_rebuild(struct ckch_store *ckch_store, struct ckch_inst *ckchi, + struct ckch_inst **new_inst, char **err); + +void ckch_deinit(); +void ckch_inst_add_cafile_link(struct ckch_inst *ckch_inst, struct bind_conf *bind_conf, + struct ssl_bind_conf *ssl_conf, const struct server *srv); + +/* ssl_store functions */ +struct cafile_entry *ssl_store_get_cafile_entry(char *path, int oldest_entry); +X509_STORE* ssl_store_get0_locations_file(char *path); +int ssl_store_add_uncommitted_cafile_entry(struct cafile_entry *entry); +struct cafile_entry *ssl_store_create_cafile_entry(char *path, X509_STORE *store, enum cafile_type type); +struct cafile_entry *ssl_store_dup_cafile_entry(struct cafile_entry *src); +void ssl_store_delete_cafile_entry(struct cafile_entry *ca_e); +int ssl_store_load_ca_from_buf(struct cafile_entry *ca_e, char *cert_buf, int append); +int ssl_store_load_locations_file(char *path, int create_if_none, enum cafile_type type); +int __ssl_store_load_locations_file(char *path, int create_if_none, enum cafile_type type, int shuterror); + +extern struct cert_exts cert_exts[]; + +#endif /* USE_OPENSSL */ +#endif /* _HAPROXY_SSL_CRTLIST_H */ diff --git a/include/haproxy/ssl_crtlist-t.h b/include/haproxy/ssl_crtlist-t.h new file mode 100644 index 0000000..dc7a376 --- /dev/null +++ b/include/haproxy/ssl_crtlist-t.h @@ -0,0 +1,63 @@ +/* + * include/haproxy/ssl_crtlist-t.h + * crt-list structures + * + * Copyright (C) 2020 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SSL_CRTLIST_T_H +#define _HAPROXY_SSL_CRTLIST_T_H +#ifdef USE_OPENSSL + +#include <import/ebtree-t.h> + + +/* forward declarations for structures below */ +struct bind_conf; +struct ssl_bind_conf; +struct proxy; + +/* list of bind conf used by struct crtlist */ +struct bind_conf_list { + struct bind_conf *bind_conf; + struct bind_conf_list *next; +}; + +/* This structure is basically a crt-list or a directory */ +struct crtlist { + struct bind_conf_list *bind_conf; /* list of bind_conf which use this crtlist */ + unsigned int linecount; /* number of lines */ + struct eb_root entries; + struct list ord_entries; /* list to keep the line order of the crt-list file */ + struct ebmb_node node; /* key is the filename or directory */ +}; + +/* a file in a directory or a line in a crt-list */ +struct crtlist_entry { + struct ssl_bind_conf *ssl_conf; /* SSL conf in crt-list */ + unsigned int linenum; + unsigned int fcount; /* filters count */ + char **filters; + struct crtlist *crtlist; /* ptr to the parent crtlist */ + struct list ckch_inst; /* list of instances of this entry, there is 1 ckch_inst per instance of the crt-list */ + struct list by_crtlist; /* ordered entries */ + struct list by_ckch_store; /* linked in ckch_store list of crtlist_entries */ + struct ebpt_node node; /* key is a ptr to a ckch_store */ +}; + +#endif /* USE_OPENSSL */ +#endif /* _HAPROXY_SSL_CRTLIST_T_H */ diff --git a/include/haproxy/ssl_crtlist.h b/include/haproxy/ssl_crtlist.h new file mode 100644 index 0000000..961cfc3 --- /dev/null +++ b/include/haproxy/ssl_crtlist.h @@ -0,0 +1,48 @@ +/* + * include/haproxy/ssl_crtlist.h + * crt-list function prototypes + * + * Copyright (C) 2020 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SSL_CRTLIST_H +#define _HAPROXY_SSL_CRTLIST_H +#ifdef USE_OPENSSL + +#include <haproxy/ssl_crtlist-t.h> + + +/* crt-list entry functions */ +void ssl_sock_free_ssl_conf(struct ssl_bind_conf *conf); +char **crtlist_dup_filters(char **args, int fcount); +void crtlist_free_filters(char **args); +void crtlist_entry_free(struct crtlist_entry *entry); +struct crtlist_entry *crtlist_entry_new(); + +/* crt-list functions */ +void crtlist_free(struct crtlist *crtlist); +struct crtlist *crtlist_new(const char *filename, int unique); + +/* file loading */ +int crtlist_parse_line(char *line, char **crt_path, struct crtlist_entry *entry, const char *file, int linenum, int from_cli, char **err); +int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *curproxy, struct crtlist **crtlist, char **err); +int crtlist_load_cert_dir(char *path, struct bind_conf *bind_conf, struct crtlist **crtlist, char **err); + +void crtlist_deinit(); + +#endif /* USE_OPENSSL */ +#endif /* _HAPROXY_SSL_CRTLIST_H */ diff --git a/include/haproxy/ssl_ocsp-t.h b/include/haproxy/ssl_ocsp-t.h new file mode 100644 index 0000000..fc2750b --- /dev/null +++ b/include/haproxy/ssl_ocsp-t.h @@ -0,0 +1,94 @@ +/* + * include/haproxy/ssl_ocsp-t.h + * SSL structures related to OCSP + * + * Copyright (C) 2022 Remi Tricot-Le Breton - rlebreton@haproxy.com + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SSL_OCSP_T_H +#define _HAPROXY_SSL_OCSP_T_H +#ifdef USE_OPENSSL + +#include <import/ebtree-t.h> + +#include <haproxy/buf-t.h> +#include <haproxy/openssl-compat.h> +#include <haproxy/ssl_sock-t.h> + +#ifndef OPENSSL_NO_OCSP +extern int ocsp_ex_index; +#endif + +#define SSL_OCSP_UPDATE_DELAY_MAX 60*60 /* 1H */ +#define SSL_OCSP_UPDATE_DELAY_MIN 5*60 /* 5 minutes */ +#define SSL_OCSP_UPDATE_MARGIN 60 /* 1 minute */ +#define SSL_OCSP_HTTP_ERR_REPLAY 60 /* 1 minute */ + +#if (defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) +/* + * struct alignment works here such that the key.key is the same as key_data + * Do not change the placement of key_data + */ +struct certificate_ocsp { + struct ebmb_node key; + unsigned char key_data[OCSP_MAX_CERTID_ASN1_LENGTH]; + unsigned int key_length; + int refcount_store; /* Number of ckch_store that reference this certificate_ocsp */ + int refcount_instance; /* Number of ckch_inst that reference this certificate_ocsp */ + struct buffer response; + long expire; + X509 *issuer; + STACK_OF(X509) *chain; + struct eb64_node next_update; /* Key of items inserted in ocsp_update_tree (sorted by absolute date) */ + struct buffer *uri; /* First OCSP URI contained in the corresponding certificate */ + + /* OCSP update stats */ + u64 last_update; /* Time of last successful update */ + unsigned int last_update_status;/* Status of the last OCSP update */ + unsigned int num_success; /* Number of successful updates */ + unsigned int num_failure; /* Number of failed updates */ + unsigned int fail_count:30; /* Number of successive failures */ + unsigned int update_once:1; /* Set if an entry should not be reinserted into te tree after update */ + unsigned int updating:1; /* Set if an entry is already being updated */ + char path[VAR_ARRAY]; +}; + +struct ocsp_cbk_arg { + int is_single; + int single_kt; + union { + struct certificate_ocsp *s_ocsp; + /* + * m_ocsp will have multiple entries dependent on key type + * Entry 0 - DSA + * Entry 1 - ECDSA + * Entry 2 - RSA + */ + struct certificate_ocsp *m_ocsp[SSL_SOCK_NUM_KEYTYPES]; + }; +}; + +extern struct eb_root cert_ocsp_tree; +extern struct eb_root ocsp_update_tree; +extern struct task *ocsp_update_task; + +__decl_thread(extern HA_SPINLOCK_T ocsp_tree_lock); + +#endif /* (defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) */ + +#endif /* USE_OPENSSL */ +#endif /* _HAPROXY_SSL_OCSP_T_H */ diff --git a/include/haproxy/ssl_ocsp.h b/include/haproxy/ssl_ocsp.h new file mode 100644 index 0000000..8a4197c --- /dev/null +++ b/include/haproxy/ssl_ocsp.h @@ -0,0 +1,70 @@ +/* + * include/haproxy/ssl_ocsp.h + * This file contains definition for ssl OCSP operations + * + * Copyright (C) 2022 Remi Tricot-Le Breton - rlebreton@haproxy.com + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SSL_OCSP_H +#define _HAPROXY_SSL_OCSP_H +#ifdef USE_OPENSSL + +#include <haproxy/openssl-compat.h> +#include <haproxy/ssl_ckch-t.h> +#include <haproxy/ssl_crtlist-t.h> +#include <haproxy/ssl_ocsp-t.h> + +#if (defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) + +int ssl_ocsp_build_response_key(OCSP_CERTID *ocsp_cid, unsigned char certid[OCSP_MAX_CERTID_ASN1_LENGTH], unsigned int *key_length); + +int ssl_sock_get_ocsp_arg_kt_index(int evp_keytype); +int ssl_sock_ocsp_stapling_cbk(SSL *ssl, void *arg); + +void ssl_sock_free_ocsp(struct certificate_ocsp *ocsp); +void ssl_sock_free_ocsp_instance(struct certificate_ocsp *ocsp); + +int ssl_sock_load_ocsp_response(struct buffer *ocsp_response, + struct certificate_ocsp *ocsp, + OCSP_CERTID *cid, char **err); +int ssl_sock_update_ocsp_response(struct buffer *ocsp_response, char **err); +void ssl_sock_ocsp_free_func(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int idx, long argl, void *argp); + +int ssl_ocsp_get_uri_from_cert(X509 *cert, struct buffer *out, char **err); +int ssl_ocsp_create_request_details(const OCSP_CERTID *certid, struct buffer *req_url, + struct buffer *req_body, char **err); +int ssl_ocsp_check_response(STACK_OF(X509) *chain, X509 *issuer, + struct buffer *respbuf, char **err); + +int ssl_create_ocsp_update_task(char **err); +void ssl_destroy_ocsp_update_task(void); + +int ssl_ocsp_update_insert(struct certificate_ocsp *ocsp); + +int ocsp_update_check_cfg_consistency(struct ckch_store *store, struct crtlist_entry *entry, char *crt_path, char **err); + +#endif /* (defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) */ + +#endif /* USE_OPENSSL */ +#endif /* _HAPROXY_SSL_OCSP_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/ssl_sock-t.h b/include/haproxy/ssl_sock-t.h new file mode 100644 index 0000000..fdf41a7 --- /dev/null +++ b/include/haproxy/ssl_sock-t.h @@ -0,0 +1,323 @@ +/* + * include/haproxy/ssl_sock-t.h + * SSL settings for listeners and servers + * + * Copyright (C) 2012 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SSL_SOCK_T_H +#define _HAPROXY_SSL_SOCK_T_H +#ifdef USE_OPENSSL + +#include <import/ebtree-t.h> + +#include <haproxy/buf-t.h> +#include <haproxy/connection-t.h> /* struct wait_event */ +#include <haproxy/listener-t.h> +#include <haproxy/openssl-compat.h> +#include <haproxy/ssl_ckch-t.h> +#include <haproxy/ssl_crtlist-t.h> +#include <haproxy/thread-t.h> + +/* ***** READ THIS before adding code here! ***** + * + * Due to API incompatibilities between multiple OpenSSL versions and their + * derivatives, it's often tempting to add macros to (re-)define certain + * symbols. Please do not do this here, and do it in common/openssl-compat.h + * exclusively so that the whole code consistently uses the same macros. + * + * Whenever possible if a macro is missing in certain versions, it's better + * to conditionally define it in openssl-compat.h than using lots of ifdefs. + */ + +/* Warning, these are bits, not integers! */ +#define SSL_SOCK_ST_FL_VERIFY_DONE 0x00000001 +#define SSL_SOCK_ST_FL_16K_WBFSIZE 0x00000002 +#define SSL_SOCK_SEND_UNLIMITED 0x00000004 +#define SSL_SOCK_RECV_HEARTBEAT 0x00000008 +#define SSL_SOCK_SEND_MORE 0x00000010 /* set MSG_MORE at lower levels */ + +/* bits 0xFFFFFF00 are reserved to store verify errors. + * The CA en CRT error codes will be stored on 7 bits each + * (since the max verify error code does not exceed 127) + * and the CA error depth will be stored on 4 bits. + */ + +/* Verify errors macros */ +#define SSL_SOCK_CA_ERROR_TO_ST(e) (((e > 127) ? 127 : e) << (8)) +#define SSL_SOCK_CAEDEPTH_TO_ST(d) (((d > 15) ? 15 : d) << (7+8)) +#define SSL_SOCK_CRTERROR_TO_ST(e) (((e > 127) ? 127 : e) << (4+7+8)) + +#define SSL_SOCK_ST_TO_CA_ERROR(s) ((s >> (8)) & 127) +#define SSL_SOCK_ST_TO_CAEDEPTH(s) ((s >> (7+8)) & 15) +#define SSL_SOCK_ST_TO_CRTERROR(s) ((s >> (4+7+8)) & 127) + +/* ssl_methods flags for ssl options */ +#define MC_SSL_O_ALL 0x0000 +#define MC_SSL_O_NO_SSLV3 0x0001 /* disable SSLv3 */ +#define MC_SSL_O_NO_TLSV10 0x0002 /* disable TLSv10 */ +#define MC_SSL_O_NO_TLSV11 0x0004 /* disable TLSv11 */ +#define MC_SSL_O_NO_TLSV12 0x0008 /* disable TLSv12 */ +#define MC_SSL_O_NO_TLSV13 0x0010 /* disable TLSv13 */ + +/* file to guess during file loading */ +#define SSL_GF_NONE 0x00000000 /* Don't guess any file, only open the files specified in the configuration files */ +#define SSL_GF_BUNDLE 0x00000001 /* try to open the bundles */ +#define SSL_GF_SCTL 0x00000002 /* try to open the .sctl file */ +#define SSL_GF_OCSP 0x00000004 /* try to open the .ocsp file */ +#define SSL_GF_OCSP_ISSUER 0x00000008 /* try to open the .issuer file if an OCSP file was loaded */ +#define SSL_GF_KEY 0x00000010 /* try to open the .key file to load a private key */ + +#define SSL_GF_ALL (SSL_GF_BUNDLE|SSL_GF_SCTL|SSL_GF_OCSP|SSL_GF_OCSP_ISSUER|SSL_GF_KEY) + +/* ssl_methods versions */ +enum { + CONF_TLSV_NONE = 0, + CONF_TLSV_MIN = 1, + CONF_SSLV3 = 1, + CONF_TLSV10 = 2, + CONF_TLSV11 = 3, + CONF_TLSV12 = 4, + CONF_TLSV13 = 5, + CONF_TLSV_MAX = 5, +}; + +/* server and bind verify method, it uses a global value as default */ +enum { + SSL_SOCK_VERIFY_DEFAULT = 0, + SSL_SOCK_VERIFY_REQUIRED = 1, + SSL_SOCK_VERIFY_OPTIONAL = 2, + SSL_SOCK_VERIFY_NONE = 3, +}; + +/* bind ocsp update mode */ +enum { + SSL_SOCK_OCSP_UPDATE_DFLT = 0, + SSL_SOCK_OCSP_UPDATE_OFF = 1, + SSL_SOCK_OCSP_UPDATE_ON = 2, +}; + +/* states of the CLI IO handler for 'set ssl cert' */ +enum { + SETCERT_ST_INIT = 0, + SETCERT_ST_GEN, + SETCERT_ST_INSERT, + SETCERT_ST_FIN, +}; + +#if (HA_OPENSSL_VERSION_NUMBER < 0x1010000fL) +typedef enum { SET_CLIENT, SET_SERVER } set_context_func; +#else /* openssl >= 1.1.0 */ +typedef enum { SET_MIN, SET_MAX } set_context_func; +#endif + +struct methodVersions { + int option; + uint16_t flag; + void (*ctx_set_version)(SSL_CTX *, set_context_func); + void (*ssl_set_version)(SSL *, set_context_func); + const char *name; +}; + +struct pkey_info { + uint8_t sig; /* TLSEXT_signature_[rsa,ecdsa,...] */ + uint16_t bits; /* key size in bits */ +}; + +struct sni_ctx { + SSL_CTX *ctx; /* context associated to the certificate */ + int order; /* load order for the certificate */ + unsigned int neg:1; /* reject if match */ + unsigned int wild:1; /* wildcard sni */ + struct pkey_info kinfo; /* pkey info */ + struct ssl_bind_conf *conf; /* ptr to a crtlist's ssl_conf, must not be free from here */ + struct list by_ckch_inst; /* chained in ckch_inst's list of sni_ctx */ + struct ckch_inst *ckch_inst; /* instance used to create this sni_ctx */ + struct ebmb_node name; /* node holding the servername value */ +}; + +struct tls_sess_key_128 { + unsigned char name[16]; + unsigned char aes_key[16]; + unsigned char hmac_key[16]; +} __attribute__((packed)); + +struct tls_sess_key_256 { + unsigned char name[16]; + unsigned char aes_key[32]; + unsigned char hmac_key[32]; +} __attribute__((packed)); + +union tls_sess_key{ + unsigned char name[16]; + struct tls_sess_key_128 key_128; + struct tls_sess_key_256 key_256; +} __attribute__((packed)); + +struct tls_keys_ref { + struct list list; /* Used to chain refs. */ + char *filename; + int unique_id; /* Each pattern reference have unique id. */ + int refcount; /* number of users of this tls_keys_ref. */ + union tls_sess_key *tlskeys; + int tls_ticket_enc_index; + int key_size_bits; + __decl_thread(HA_RWLOCK_T lock); /* lock used to protect the ref */ +}; + +/* shared ssl session */ +struct sh_ssl_sess_hdr { + struct ebmb_node key; + unsigned char key_data[SSL_MAX_SSL_SESSION_ID_LENGTH]; +}; + +/* issuer chain store with hash of Subject Key Identifier + certificate/issuer matching is verify with X509_check_issued +*/ +struct issuer_chain { + struct eb64_node node; + STACK_OF(X509) *chain; + char *path; +}; + +struct connection; + +typedef void (*ssl_sock_msg_callback_func)(struct connection *conn, + int write_p, int version, int content_type, + const void *buf, size_t len, SSL *ssl); + +/* This structure contains a function pointer <func> that is called + * when observing received or sent SSL/TLS protocol messages, such as + * handshake messages or other events that can occur during processing. + */ +struct ssl_sock_msg_callback { + ssl_sock_msg_callback_func func; + struct list list; /* list of registered callbacks */ +}; + +/* This memory pool is used for capturing clienthello parameters. */ +struct ssl_capture { + ullong xxh64; + ushort protocol_version; + ushort ciphersuite_len; + ushort extensions_len; + ushort ec_len; + uint ciphersuite_offset; + uint extensions_offset; + uint ec_offset; + uint ec_formats_offset; + uchar ec_formats_len; + char data[VAR_ARRAY]; +}; + +#ifdef HAVE_SSL_KEYLOG +#define SSL_KEYLOG_MAX_SECRET_SIZE 129 + +struct ssl_keylog { + /* + * https://developer.mozilla.org/en-US/docs/Mozilla/Projects/NSS/Key_Log_Format + */ + char *client_random; + + /* TLS 1.3 */ + char *client_early_traffic_secret; + char *client_handshake_traffic_secret; + char *server_handshake_traffic_secret; + char *client_traffic_secret_0; + char *server_traffic_secret_0; + char *exporter_secret; + char *early_exporter_secret; +}; +#endif + +struct ssl_sock_ctx { + struct connection *conn; + SSL *ssl; + BIO *bio; + const struct xprt_ops *xprt; + void *xprt_ctx; + struct wait_event wait_event; + struct wait_event *subs; + int xprt_st; /* transport layer state, initialized to zero */ + unsigned long error_code; /* last error code of the error stack */ + struct buffer early_buf; /* buffer to store the early data received */ + int sent_early_data; /* Amount of early data we sent so far */ + +#ifdef USE_QUIC + struct quic_conn *qc; +#endif +}; + +struct global_ssl { + char *crt_base; /* base directory path for certificates */ + char *ca_base; /* base directory path for CAs and CRLs */ + char *issuers_chain_path; /* from "issuers-chain-path" */ + int skip_self_issued_ca; + + int async; /* whether we use ssl async mode */ + + char *listen_default_ciphers; + char *connect_default_ciphers; +#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES + char *listen_default_ciphersuites; + char *connect_default_ciphersuites; +#endif +#if defined(SSL_CTX_set1_curves_list) + char *listen_default_curves; + char *connect_default_curves; +#endif +#if defined(SSL_CTX_set1_sigalgs_list) + char *listen_default_sigalgs; + char *connect_default_sigalgs; +#endif +#if defined(SSL_CTX_set1_sigalgs_list) + char *listen_default_client_sigalgs; + char *connect_default_client_sigalgs; +#endif + int listen_default_ssloptions; + int connect_default_ssloptions; + struct tls_version_filter listen_default_sslmethods; + struct tls_version_filter connect_default_sslmethods; + + int private_cache; /* Force to use a private session cache even if nbproc > 1 */ + unsigned int life_time; /* SSL session lifetime in seconds */ + unsigned int max_record; /* SSL max record size */ + unsigned int hard_max_record; /* SSL max record size hard limit */ + unsigned int default_dh_param; /* SSL maximum DH parameter size */ + int ctx_cache; /* max number of entries in the ssl_ctx cache. */ + int capture_buffer_size; /* Size of the capture buffer. */ + int keylog; /* activate keylog */ + int extra_files; /* which files not defined in the configuration file are we looking for */ + int extra_files_noext; /* whether we remove the extension when looking up a extra file */ + +#ifndef OPENSSL_NO_OCSP + struct { + unsigned int delay_max; + unsigned int delay_min; + } ocsp_update; +#endif +}; + +/* The order here matters for picking a default context, + * keep the most common keytype at the bottom of the list + */ +extern const char *SSL_SOCK_KEYTYPE_NAMES[]; + +#define SSL_SOCK_NUM_KEYTYPES 3 + +#endif /* USE_OPENSSL */ +#endif /* _HAPROXY_SSL_SOCK_T_H */ diff --git a/include/haproxy/ssl_sock.h b/include/haproxy/ssl_sock.h new file mode 100644 index 0000000..02d5b02 --- /dev/null +++ b/include/haproxy/ssl_sock.h @@ -0,0 +1,191 @@ +/* + * include/haproxy/ssl_sock.h + * This file contains definition for ssl stream socket operations + * + * Copyright (C) 2012 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SSL_SOCK_H +#define _HAPROXY_SSL_SOCK_H +#ifdef USE_OPENSSL + + +#include <haproxy/connection.h> +#include <haproxy/openssl-compat.h> +#include <haproxy/pool-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/ssl_sock-t.h> +#include <haproxy/thread.h> + +extern struct list tlskeys_reference; +extern struct eb_root ckchs_tree; +extern struct eb_root crtlists_tree; +extern struct eb_root cafile_tree; +extern int sctl_ex_index; +extern struct global_ssl global_ssl; +extern struct ssl_crtlist_kw ssl_crtlist_kws[]; +extern struct methodVersions methodVersions[]; +__decl_thread(extern HA_SPINLOCK_T ckch_lock); +extern struct pool_head *pool_head_ssl_capture; +extern int ssl_app_data_index; +#ifdef USE_QUIC +extern int ssl_qc_app_data_index; +#endif /* USE_QUIC */ +extern unsigned int openssl_engines_initialized; +extern int nb_engines; +extern struct xprt_ops ssl_sock; +extern int ssl_capture_ptr_index; +extern int ssl_keylog_index; +extern int ssl_client_sni_index; +extern struct pool_head *pool_head_ssl_keylog; +extern struct pool_head *pool_head_ssl_keylog_str; +extern struct list openssl_providers; + +int ssl_sock_prep_ctx_and_inst(struct bind_conf *bind_conf, struct ssl_bind_conf *ssl_conf, + SSL_CTX *ctx, struct ckch_inst *ckch_inst, char **err); +int ssl_sock_prep_srv_ctx_and_inst(const struct server *srv, SSL_CTX *ctx, + struct ckch_inst *ckch_inst); +int ssl_sock_prepare_all_ctx(struct bind_conf *bind_conf); +int ssl_sock_prepare_bind_conf(struct bind_conf *bind_conf); +void ssl_sock_destroy_bind_conf(struct bind_conf *bind_conf); +int ssl_sock_prepare_srv_ctx(struct server *srv); +void ssl_sock_free_srv_ctx(struct server *srv); +void ssl_sock_free_all_ctx(struct bind_conf *bind_conf); +int ssl_sock_get_alpn(const struct connection *conn, void *xprt_ctx, + const char **str, int *len); +int ssl_sock_load_ca(struct bind_conf *bind_conf); +void ssl_sock_free_ca(struct bind_conf *bind_conf); +int ssl_bio_and_sess_init(struct connection *conn, SSL_CTX *ssl_ctx, + SSL **ssl, BIO **bio, BIO_METHOD *bio_meth, void *ctx); +const char *ssl_sock_get_sni(struct connection *conn); +const char *ssl_sock_get_cert_sig(struct connection *conn); +const char *ssl_sock_get_cipher_name(struct connection *conn); +const char *ssl_sock_get_proto_version(struct connection *conn); +int ssl_sock_parse_alpn(char *arg, char **alpn_str, int *alpn_len, char **err); +void ssl_sock_set_alpn(struct connection *conn, const unsigned char *, int); +void ssl_sock_set_servername(struct connection *conn, const char *hostname); + +int ssl_sock_get_cert_used_sess(struct connection *conn); +int ssl_sock_get_cert_used_conn(struct connection *conn); +int ssl_sock_get_remote_common_name(struct connection *conn, + struct buffer *out); +int ssl_sock_get_pkey_algo(struct connection *conn, struct buffer *out); +unsigned int ssl_sock_get_verify_result(struct connection *conn); +#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0) +int ssl_sock_update_tlskey_ref(struct tls_keys_ref *ref, + struct buffer *tlskey); +int ssl_sock_update_tlskey(char *filename, struct buffer *tlskey, char **err); +struct tls_keys_ref *tlskeys_ref_lookup(const char *filename); +struct tls_keys_ref *tlskeys_ref_lookupid(int unique_id); +#endif +#ifndef OPENSSL_NO_DH +HASSL_DH *ssl_sock_get_dh_from_bio(BIO *bio); +int ssl_sock_load_global_dh_param_from_file(const char *filename); +void ssl_free_dh(void); +#endif +void ssl_free_engines(void); +#ifdef HAVE_SSL_PROVIDERS +void ssl_unload_providers(void); +#endif + +#ifdef HAVE_SSL_CLIENT_HELLO_CB +int ssl_sock_switchctx_err_cbk(SSL *ssl, int *al, void *priv); +# ifdef OPENSSL_IS_BORINGSSL +int ssl_sock_switchctx_cbk(const struct ssl_early_callback_ctx *ctx); +# else /* ! OPENSSL_IS_BORINGSSL */ +int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *arg); +# endif +#else /* ! HAVE_SSL_CLIENT_HELLO_CB */ +int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *priv); +#endif + +int increment_sslconn(); +SSL_CTX *ssl_sock_assign_generated_cert(unsigned int key, struct bind_conf *bind_conf, SSL *ssl); +SSL_CTX *ssl_sock_get_generated_cert(unsigned int key, struct bind_conf *bind_conf); +int ssl_sock_set_generated_cert(SSL_CTX *ctx, unsigned int key, struct bind_conf *bind_conf); +unsigned int ssl_sock_generated_cert_key(const void *data, size_t len); +void ssl_sock_load_cert_sni(struct ckch_inst *ckch_inst, struct bind_conf *bind_conf); +#ifdef SSL_MODE_ASYNC +void ssl_async_fd_handler(int fd); +void ssl_async_fd_free(int fd); +#endif +struct issuer_chain* ssl_get0_issuer_chain(X509 *cert); +int ssl_load_global_issuer_from_BIO(BIO *in, char *fp, char **err); +int ssl_sock_load_cert(char *path, struct bind_conf *bind_conf, char **err); +int ssl_sock_load_srv_cert(char *path, struct server *server, int create_if_none, char **err); +void ssl_free_global_issuers(void); +int ssl_initialize_random(void); +int ssl_sock_load_cert_list_file(char *file, int dir, struct bind_conf *bind_conf, struct proxy *curproxy, char **err); +int ssl_init_single_engine(const char *engine_id, const char *def_algorithms); +#ifdef HAVE_SSL_PROVIDERS +int ssl_init_provider(const char *provider_name); +#endif +#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL) +int ssl_get_ocspresponse_detail(unsigned char *ocsp_certid, struct buffer *out); +int ssl_ocsp_response_print(struct buffer *ocsp_response, struct buffer *out); +#endif + +/* ssl shctx macro */ + +#define sh_ssl_sess_tree_delete(s) ebmb_delete(&(s)->key); + +#define sh_ssl_sess_tree_insert(s) (struct sh_ssl_sess_hdr *)ebmb_insert(sh_ssl_sess_tree, \ + &(s)->key, SSL_MAX_SSL_SESSION_ID_LENGTH); + +#define sh_ssl_sess_tree_lookup(k) (struct sh_ssl_sess_hdr *)ebmb_lookup(sh_ssl_sess_tree, \ + (k), SSL_MAX_SSL_SESSION_ID_LENGTH); + +/* Registers the function <func> in order to be called on SSL/TLS protocol + * message processing. + */ +int ssl_sock_register_msg_callback(ssl_sock_msg_callback_func func); + +SSL *ssl_sock_get_ssl_object(struct connection *conn); + +static inline int cert_ignerr_bitfield_get(const unsigned long long *bitfield, int bit_index) +{ + int byte_index = bit_index >> 6; + int val = 0; + + if (byte_index < IGNERR_BF_SIZE) + val = bitfield[byte_index] & (1ULL << (bit_index & 0x3F)); + + return val != 0; +} + +static inline void cert_ignerr_bitfield_set(unsigned long long *bitfield, int bit_index) +{ + int byte_index = bit_index >> 6; + + if (byte_index < IGNERR_BF_SIZE) + bitfield[byte_index] |= (1ULL << (bit_index & 0x3F)); +} + +static inline void cert_ignerr_bitfield_set_all(unsigned long long *bitfield) +{ + memset(bitfield, -1, IGNERR_BF_SIZE*sizeof(*bitfield)); +} + +#endif /* USE_OPENSSL */ +#endif /* _HAPROXY_SSL_SOCK_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/ssl_utils.h b/include/haproxy/ssl_utils.h new file mode 100644 index 0000000..3391efd --- /dev/null +++ b/include/haproxy/ssl_utils.h @@ -0,0 +1,51 @@ +/* + * include/haproxy/ssl_utils.h + * + * Utility functions for SSL: + * Mostly generic functions that retrieve information from certificates + * + * Copyright (C) 2012 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr> + * Copyright (C) 2020 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SSL_UTILS_H +#define _HAPROXY_SSL_UTILS_H + +#ifdef USE_OPENSSL + +#include <haproxy/buf-t.h> +#include <haproxy/openssl-compat.h> + +int cert_get_pkey_algo(X509 *crt, struct buffer *out); +int ssl_sock_get_serial(X509 *crt, struct buffer *out); +int ssl_sock_crt2der(X509 *crt, struct buffer *out); +int ssl_sock_get_time(ASN1_TIME *tm, struct buffer *out); +int ssl_sock_get_dn_entry(X509_NAME *a, const struct buffer *entry, int pos, + struct buffer *out); +int ssl_sock_get_dn_formatted(X509_NAME *a, const struct buffer *format, struct buffer *out); +int ssl_sock_get_dn_oneline(X509_NAME *a, struct buffer *out); +X509* ssl_sock_get_peer_certificate(SSL *ssl); +X509* ssl_sock_get_verified_chain_root(SSL *ssl); +unsigned int openssl_version_parser(const char *version); +void exclude_tls_grease(char *input, int len, struct buffer *output); +int x509_v_err_str_to_int(const char *str); +const char *x509_v_err_int_to_str(int code); +long asn1_generalizedtime_to_epoch(ASN1_GENERALIZEDTIME *d); + +#endif /* _HAPROXY_SSL_UTILS_H */ +#endif /* USE_OPENSSL */ + diff --git a/include/haproxy/stats-t.h b/include/haproxy/stats-t.h new file mode 100644 index 0000000..34a4cc2 --- /dev/null +++ b/include/haproxy/stats-t.h @@ -0,0 +1,617 @@ +/* + * include/haproxy/stats-t.h + * This file provides structures and types for stats. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_STATS_T_H +#define _HAPROXY_STATS_T_H + +#include <haproxy/api-t.h> + +/* Flags for applet.ctx.stats.flags */ +#define STAT_FMT_HTML 0x00000001 /* dump the stats in HTML format */ +#define STAT_FMT_TYPED 0x00000002 /* use the typed output format */ +#define STAT_FMT_JSON 0x00000004 /* dump the stats in JSON format */ +#define STAT_HIDE_DOWN 0x00000008 /* hide 'down' servers in the stats page */ +#define STAT_NO_REFRESH 0x00000010 /* do not automatically refresh the stats page */ +#define STAT_ADMIN 0x00000020 /* indicate a stats admin level */ +#define STAT_CHUNKED 0x00000040 /* use chunked encoding (HTTP/1.1) */ +#define STAT_JSON_SCHM 0x00000080 /* dump the json schema */ + +#define STAT_HIDEVER 0x00000100 /* conf: do not report the version and reldate */ +#define STAT_SHNODE 0x00000200 /* conf: show node name */ +#define STAT_SHDESC 0x00000400 /* conf: show description */ +#define STAT_SHLGNDS 0x00000800 /* conf: show legends */ +#define STAT_SHOW_FDESC 0x00001000 /* show the field descriptions when possible */ +#define STAT_SHMODULES 0x00002000 /* conf: show modules */ +#define STAT_HIDE_MAINT 0x00004000 /* hide maint/disabled servers */ +#define STAT_CONVDONE 0x00008000 /* conf: rules conversion done */ +#define STAT_USE_FLOAT 0x00010000 /* use floats where possible in the outputs */ + +#define STAT_BOUND 0x00800000 /* bound statistics to selected proxies/types/services */ +#define STAT_STARTED 0x01000000 /* some output has occurred */ + +#define STAT_FMT_MASK 0x00000007 + +#define STATS_TYPE_FE 0 +#define STATS_TYPE_BE 1 +#define STATS_TYPE_SV 2 +#define STATS_TYPE_SO 3 + +#define STATS_DOMAIN (0) /* used for bitshifting, type of statistics: proxy or dns */ +#define STATS_PX_CAP (8) /* used for bitshifting, differentiate obj1 type for proxy statistics */ + +/* HTTP stats : applet.st0 */ +enum { + STAT_HTTP_INIT = 0, /* Initial state */ + STAT_HTTP_HEAD, /* send headers before dump */ + STAT_HTTP_DUMP, /* dumping stats */ + STAT_HTTP_POST, /* waiting post data */ + STAT_HTTP_LAST, /* sending last chunk of response */ + STAT_HTTP_DONE, /* dump is finished */ + STAT_HTTP_END, /* finished */ +}; + +/* status codes available for the stats admin page */ +enum { + STAT_STATUS_INIT = 0, + STAT_STATUS_DENY, /* action denied */ + STAT_STATUS_DONE, /* the action is successful */ + STAT_STATUS_ERRP, /* an error occurred due to invalid values in parameters */ + STAT_STATUS_EXCD, /* an error occurred because the buffer couldn't store all data */ + STAT_STATUS_NONE, /* nothing happened (no action chosen or servers state didn't change) */ + STAT_STATUS_PART, /* the action is partially successful */ + STAT_STATUS_UNKN, /* an unknown error occurred, shouldn't happen */ + STAT_STATUS_IVAL, /* invalid requests (chunked or invalid post) */ + STAT_STATUS_SIZE +}; + +/* HTML form to limit output scope */ +#define STAT_SCOPE_TXT_MAXLEN 20 /* max len for scope substring */ +#define STAT_SCOPE_INPUT_NAME "scope" /* pattern form scope name <input> in html form */ +#define STAT_SCOPE_PATTERN "?" STAT_SCOPE_INPUT_NAME "=" + +/* Actions available for the stats admin forms */ +enum { + ST_ADM_ACTION_NONE = 0, + + /* enable/disable health checks */ + ST_ADM_ACTION_DHLTH, + ST_ADM_ACTION_EHLTH, + + /* force health check status */ + ST_ADM_ACTION_HRUNN, + ST_ADM_ACTION_HNOLB, + ST_ADM_ACTION_HDOWN, + + /* enable/disable agent checks */ + ST_ADM_ACTION_DAGENT, + ST_ADM_ACTION_EAGENT, + + /* force agent check status */ + ST_ADM_ACTION_ARUNN, + ST_ADM_ACTION_ADOWN, + + /* set admin state */ + ST_ADM_ACTION_READY, + ST_ADM_ACTION_DRAIN, + ST_ADM_ACTION_MAINT, + ST_ADM_ACTION_SHUTDOWN, + /* these are the ancient actions, still available for compatibility */ + ST_ADM_ACTION_DISABLE, + ST_ADM_ACTION_ENABLE, + ST_ADM_ACTION_STOP, + ST_ADM_ACTION_START, +}; + + +/* data transmission states for the stats responses */ +enum stat_state { + STAT_STATE_INIT = 0, + STAT_STATE_HEAD, + STAT_STATE_INFO, + STAT_STATE_LIST, + STAT_STATE_END, + STAT_STATE_FIN, +}; + +/* kept in 2.6 only for compatibility with legacy code. Will be removed in 2.7, + * please do not use these values anymore and defined your own! + */ +enum obsolete_stat_state { + STAT_ST_INIT ENUM_ATTRIBUTE((deprecated)) = 0, + STAT_ST_HEAD ENUM_ATTRIBUTE((deprecated)), + STAT_ST_INFO ENUM_ATTRIBUTE((deprecated)), + STAT_ST_LIST ENUM_ATTRIBUTE((deprecated)), + STAT_ST_END ENUM_ATTRIBUTE((deprecated)), + STAT_ST_FIN ENUM_ATTRIBUTE((deprecated)), +}; + +/* data transmission states for the stats responses inside a proxy */ +enum { + STAT_PX_ST_INIT = 0, + STAT_PX_ST_TH, + STAT_PX_ST_FE, + STAT_PX_ST_LI, + STAT_PX_ST_SV, + STAT_PX_ST_BE, + STAT_PX_ST_END, + STAT_PX_ST_FIN, +}; + +/* This level of detail is needed to let the stats consumer know how to + * aggregate them (eg: between processes or cluster nodes). Only a few + * combinations are actually in use, though the mechanism tends to make + * this easy to extend to future uses. + * + * Each reported stats element is typed based on 4 dimensions : + * - the field format : it indicates the validity range of the reported value, + * its limits and how to parse it. 6 types are currently supported : + * empty, signed 32-bit integer, unsigned 32-bit integer, signed 64-bit + * integer, unsigned 64-bit integer, string + * + * - the field origin : how was the value retrieved and what it depends on. + * 5 origins are currently defined : product (eg: haproxy version or + * release date), configuration (eg: a configured limit), key (identifier + * used to group values at a certain level), metric (a measure of something), + * status (something discrete which by definition cannot be averaged nor + * aggregated, such as "listening" versus "full"). + * + * - the field nature : what does the data represent, implying how to aggregate + * it. At least 9 different natures are expected : counter (an increasing + * positive counter that may wrap when its type is overflown such as a byte + * counter), gauge (a measure at any instant that may vary, such as a + * concurrent connection count), a limit (eg: maximum acceptable concurrent + * connections), a minimum (eg: minimum free memory over a period), a + * maximum (eg: highest queue length over a period), an event rate (eg: + * incoming connections per second), a duration that is often aggregated by + * taking the max (eg: service uptime), an age that generally reports the + * last time an event appeared and which generally is aggregated by taking + * the most recent event hence the smallest one, the time which reports a + * discrete instant and cannot obviously be averaged either, a name which + * will generally be the name of an entity (such as a server name or cookie + * name), an output which is mostly used for various unsafe strings that are + * retrieved (eg: last check output, product name, description, etc), and an + * average which indicates that the value is relative and meant to be averaged + * between all nodes (eg: response time, throttling, etc). + * + * - the field scope : if the value is shared with other elements, which ones + * are expected to report the same value. The first scope with the least + * share is the process (most common one) where all data are only relevant + * to the process being consulted. The next one is the service, which is + * valid for all processes launched together (eg: shared SSL cache usage + * among processes). The next one is the system (such as the OS version) + * and which will report the same information for all instances running on + * the same node. The next one is the cluster, which indicates that the + * information are shared with other nodes being part of a same cluster. + * Stick-tables may carry such cluster-wide information. Larger scopes may + * be added in the future such as datacenter, country, continent, planet, + * galaxy, universe, etc. + * + * All these information will be encoded in the field as a bit field so that + * it is easy to pass composite values by simply ORing elements above, and + * to ease the definition of a few field types for the most common field + * combinations. + * + * The enums try to be arranged so that most likely characteristics are + * assigned the value zero, making it easier to add new fields. + * + * Field format has precedence over the other parts of the type. Please avoid + * declaring extra formats unless absolutely needed. The first one, FF_EMPTY, + * must absolutely have value zero so that it is what is returned after a + * memset(0). Furthermore, the producer is responsible for ensuring that when + * this format is set, all other bits of the type as well as the values in the + * union only contain zeroes. This makes it easier for the consumer to use the + * values as the expected type. + */ + +enum field_format { + FF_EMPTY = 0x00000000, + FF_S32 = 0x00000001, + FF_U32 = 0x00000002, + FF_S64 = 0x00000003, + FF_U64 = 0x00000004, + FF_STR = 0x00000005, + FF_FLT = 0x00000006, + FF_MASK = 0x000000FF, +}; + +enum field_origin { + FO_METRIC = 0x00000000, + FO_STATUS = 0x00000100, + FO_KEY = 0x00000200, + FO_CONFIG = 0x00000300, + FO_PRODUCT = 0x00000400, + FO_MASK = 0x0000FF00, +}; + +enum field_nature { + FN_GAUGE = 0x00000000, + FN_LIMIT = 0x00010000, + FN_MIN = 0x00020000, + FN_MAX = 0x00030000, + FN_RATE = 0x00040000, + FN_COUNTER = 0x00050000, + FN_DURATION = 0x00060000, + FN_AGE = 0x00070000, + FN_TIME = 0x00080000, + FN_NAME = 0x00090000, + FN_OUTPUT = 0x000A0000, + FN_AVG = 0x000B0000, + FN_MASK = 0x00FF0000, +}; + +enum field_scope { + FS_PROCESS = 0x00000000, + FS_SERVICE = 0x01000000, + FS_SYSTEM = 0x02000000, + FS_CLUSTER = 0x03000000, + FS_MASK = 0xFF000000, +}; + +/* Show info fields for CLI output. For any field added here, please add the + * text representation in the info_fields array. Please only append at the end, + * before the INF_TOTAL_FIELDS entry, and never insert anything in the middle + * nor at the beginning. + */ +enum info_field { + INF_NAME, + INF_VERSION, + INF_RELEASE_DATE, + INF_NBTHREAD, + INF_NBPROC, + INF_PROCESS_NUM, + INF_PID, + INF_UPTIME, + INF_UPTIME_SEC, + INF_MEMMAX_MB, + INF_POOL_ALLOC_MB, + INF_POOL_USED_MB, + INF_POOL_FAILED, + INF_ULIMIT_N, + INF_MAXSOCK, + INF_MAXCONN, + INF_HARD_MAXCONN, + INF_CURR_CONN, + INF_CUM_CONN, + INF_CUM_REQ, + INF_MAX_SSL_CONNS, + INF_CURR_SSL_CONNS, + INF_CUM_SSL_CONNS, + INF_MAXPIPES, + INF_PIPES_USED, + INF_PIPES_FREE, + INF_CONN_RATE, + INF_CONN_RATE_LIMIT, + INF_MAX_CONN_RATE, + INF_SESS_RATE, + INF_SESS_RATE_LIMIT, + INF_MAX_SESS_RATE, + INF_SSL_RATE, + INF_SSL_RATE_LIMIT, + INF_MAX_SSL_RATE, + INF_SSL_FRONTEND_KEY_RATE, + INF_SSL_FRONTEND_MAX_KEY_RATE, + INF_SSL_FRONTEND_SESSION_REUSE_PCT, + INF_SSL_BACKEND_KEY_RATE, + INF_SSL_BACKEND_MAX_KEY_RATE, + INF_SSL_CACHE_LOOKUPS, + INF_SSL_CACHE_MISSES, + INF_COMPRESS_BPS_IN, + INF_COMPRESS_BPS_OUT, + INF_COMPRESS_BPS_RATE_LIM, + INF_ZLIB_MEM_USAGE, + INF_MAX_ZLIB_MEM_USAGE, + INF_TASKS, + INF_RUN_QUEUE, + INF_IDLE_PCT, + INF_NODE, + INF_DESCRIPTION, + INF_STOPPING, + INF_JOBS, + INF_UNSTOPPABLE_JOBS, + INF_LISTENERS, + INF_ACTIVE_PEERS, + INF_CONNECTED_PEERS, + INF_DROPPED_LOGS, + INF_BUSY_POLLING, + INF_FAILED_RESOLUTIONS, + INF_TOTAL_BYTES_OUT, + INF_TOTAL_SPLICED_BYTES_OUT, + INF_BYTES_OUT_RATE, + INF_DEBUG_COMMANDS_ISSUED, + INF_CUM_LOG_MSGS, + INF_BUILD_INFO, + INF_MEMMAX_BYTES, + INF_POOL_ALLOC_BYTES, + INF_POOL_USED_BYTES, + INF_START_TIME_SEC, + INF_TAINTED, + INF_WARNINGS, + INF_MAXCONN_REACHED, + INF_BOOTTIME_MS, + INF_NICED_TASKS, + + /* must always be the last one */ + INF_TOTAL_FIELDS +}; + + +/* Stats fields for CSV output. For any field added here, please add the text + * representation in the stat_fields array. Please only append at the end, + * before the ST_F_TOTAL_FIELDS entry, and never insert anything in the middle + * nor at the beginning.When adding an entry here, one must always add a + * corresponding one in stat_fields[] otherwise Lua's get_stats() will break, + * and "show stats" will show a null. + */ +enum stat_field { + ST_F_PXNAME, + ST_F_SVNAME, + ST_F_QCUR, + ST_F_QMAX, + ST_F_SCUR, + ST_F_SMAX, + ST_F_SLIM, + ST_F_STOT, + ST_F_BIN , + ST_F_BOUT, + ST_F_DREQ, + ST_F_DRESP, + ST_F_EREQ, + ST_F_ECON, + ST_F_ERESP, + ST_F_WRETR, + ST_F_WREDIS, + ST_F_STATUS, + ST_F_WEIGHT, + ST_F_ACT, + ST_F_BCK, + ST_F_CHKFAIL, + ST_F_CHKDOWN, + ST_F_LASTCHG, + ST_F_DOWNTIME, + ST_F_QLIMIT, + ST_F_PID, + ST_F_IID, + ST_F_SID, + ST_F_THROTTLE, + ST_F_LBTOT, + ST_F_TRACKED, + ST_F_TYPE, + ST_F_RATE, + ST_F_RATE_LIM, + ST_F_RATE_MAX, + ST_F_CHECK_STATUS, + ST_F_CHECK_CODE, + ST_F_CHECK_DURATION, + ST_F_HRSP_1XX, + ST_F_HRSP_2XX, + ST_F_HRSP_3XX, + ST_F_HRSP_4XX, + ST_F_HRSP_5XX, + ST_F_HRSP_OTHER, + ST_F_HANAFAIL, + ST_F_REQ_RATE, + ST_F_REQ_RATE_MAX, + ST_F_REQ_TOT, + ST_F_CLI_ABRT, + ST_F_SRV_ABRT, + ST_F_COMP_IN, + ST_F_COMP_OUT, + ST_F_COMP_BYP, + ST_F_COMP_RSP, + ST_F_LASTSESS, + ST_F_LAST_CHK, + ST_F_LAST_AGT, + ST_F_QTIME, + ST_F_CTIME, + ST_F_RTIME, + ST_F_TTIME, + ST_F_AGENT_STATUS, + ST_F_AGENT_CODE, + ST_F_AGENT_DURATION, + ST_F_CHECK_DESC, + ST_F_AGENT_DESC, + ST_F_CHECK_RISE, + ST_F_CHECK_FALL, + ST_F_CHECK_HEALTH, + ST_F_AGENT_RISE, + ST_F_AGENT_FALL, + ST_F_AGENT_HEALTH, + ST_F_ADDR, + ST_F_COOKIE, + ST_F_MODE, + ST_F_ALGO, + ST_F_CONN_RATE, + ST_F_CONN_RATE_MAX, + ST_F_CONN_TOT, + ST_F_INTERCEPTED, + ST_F_DCON, + ST_F_DSES, + ST_F_WREW, + ST_F_CONNECT, + ST_F_REUSE, + ST_F_CACHE_LOOKUPS, + ST_F_CACHE_HITS, + ST_F_SRV_ICUR, + ST_F_SRV_ILIM, + ST_F_QT_MAX, + ST_F_CT_MAX, + ST_F_RT_MAX, + ST_F_TT_MAX, + ST_F_EINT, + ST_F_IDLE_CONN_CUR, + ST_F_SAFE_CONN_CUR, + ST_F_USED_CONN_CUR, + ST_F_NEED_CONN_EST, + ST_F_UWEIGHT, + ST_F_AGG_SRV_STATUS, + ST_F_AGG_SRV_CHECK_STATUS, + ST_F_AGG_CHECK_STATUS, + ST_F_SRID, + ST_F_SESS_OTHER, + ST_F_H1SESS, + ST_F_H2SESS, + ST_F_H3SESS, + ST_F_REQ_OTHER, + ST_F_H1REQ, + ST_F_H2REQ, + ST_F_H3REQ, + ST_F_PROTO, + + /* must always be the last one */ + ST_F_TOTAL_FIELDS +}; + +/* Please consider updating stats_dump_fields_*(), + * stats_dump_.*_info_fields() and stats_*_schema() + * when modifying struct field or related enums. + */ +struct field { + uint32_t type; + union { + int32_t s32; /* FF_S32 */ + uint32_t u32; /* FF_U32 */ + int64_t s64; /* FF_S64 */ + uint64_t u64; /* FF_U64 */ + double flt; /* FF_FLT */ + const char *str; /* FF_STR */ + } u; +}; + +enum counters_type { + COUNTERS_FE = 0, + COUNTERS_BE, + COUNTERS_SV, + COUNTERS_LI, + COUNTERS_RSLV, + + COUNTERS_OFF_END +}; + +/* Entity used to generate statistics on an HAProxy component */ +struct stats_module { + struct list list; + const char *name; + + /* functor used to generate the stats module using counters provided through data parameter */ + void (*fill_stats)(void *data, struct field *); + + struct name_desc *stats; /* name/description of stats provided by the module */ + void *counters; /* initial values of allocated counters */ + size_t counters_off[COUNTERS_OFF_END]; /* list of offsets of allocated counters in various objects */ + size_t stats_count; /* count of stats provided */ + size_t counters_size; /* sizeof counters */ + + uint32_t domain_flags; /* stats application domain for this module */ + char clearable; /* reset on a clear counters */ +}; + +struct extra_counters { + char *data; /* heap containing counters allocated in a linear fashion */ + size_t size; /* size of allocated data */ + enum counters_type type; /* type of object containing the counters */ +}; + +/* stats_domain is used in a flag as a 1 byte field */ +enum stats_domain { + STATS_DOMAIN_PROXY = 0, + STATS_DOMAIN_RESOLVERS, + STATS_DOMAIN_COUNT, + + STATS_DOMAIN_MASK = 0xff +}; + +/* used in a flag as a 1 byte field */ +enum stats_domain_px_cap { + STATS_PX_CAP_FE = 0x01, + STATS_PX_CAP_BE = 0x02, + STATS_PX_CAP_SRV = 0x04, + STATS_PX_CAP_LI = 0x08, + + STATS_PX_CAP_MASK = 0xff +}; + +/* the context of a "show stat" command in progress on the CLI or the stats applet */ +struct show_stat_ctx { + struct proxy *http_px; /* parent proxy of the current applet (only relevant for HTTP applet) */ + void *obj1; /* context pointer used in stats dump */ + void *obj2; /* context pointer used in stats dump */ + uint32_t domain; /* set the stats to used, for now only proxy stats are supported */ + int scope_str; /* limit scope to a frontend/backend substring */ + int scope_len; /* length of the string above in the buffer */ + int field; /* current field iterator when stat line is dumped through returning function */ + int px_st; /* STAT_PX_ST* */ + unsigned int flags; /* STAT_* from stats-t.h */ + int iid, type, sid; /* proxy id, type and service id if bounding of stats is enabled */ + int st_code; /* the status code returned by an action */ + enum stat_state state; /* phase of output production */ +}; + +extern THREAD_LOCAL void *trash_counters; + +#define EXTRA_COUNTERS(name) \ + struct extra_counters *name + +#define EXTRA_COUNTERS_GET(counters, mod) \ + (likely(counters) ? \ + ((void *)((counters)->data + (mod)->counters_off[(counters)->type])) : \ + (trash_counters)) + +#define EXTRA_COUNTERS_REGISTER(counters, ctype, alloc_failed_label) \ + do { \ + typeof(*counters) _ctr; \ + _ctr = calloc(1, sizeof(*_ctr)); \ + if (!_ctr) \ + goto alloc_failed_label; \ + _ctr->type = (ctype); \ + *(counters) = _ctr; \ + } while (0) + +#define EXTRA_COUNTERS_ADD(mod, counters, new_counters, csize) \ + do { \ + typeof(counters) _ctr = (counters); \ + (mod)->counters_off[_ctr->type] = _ctr->size; \ + _ctr->size += (csize); \ + } while (0) + +#define EXTRA_COUNTERS_ALLOC(counters, alloc_failed_label) \ + do { \ + typeof(counters) _ctr = (counters); \ + _ctr->data = malloc((_ctr)->size); \ + if (!_ctr->data) \ + goto alloc_failed_label; \ + } while (0) + +#define EXTRA_COUNTERS_INIT(counters, mod, init_counters, init_counters_size) \ + do { \ + typeof(counters) _ctr = (counters); \ + memcpy(_ctr->data + mod->counters_off[_ctr->type], \ + (init_counters), (init_counters_size)); \ + } while (0) + +#define EXTRA_COUNTERS_FREE(counters) \ + do { \ + if (counters) { \ + free((counters)->data); \ + free(counters); \ + } \ + } while (0) + +#endif /* _HAPROXY_STATS_T_H */ diff --git a/include/haproxy/stats.h b/include/haproxy/stats.h new file mode 100644 index 0000000..f9e6d97 --- /dev/null +++ b/include/haproxy/stats.h @@ -0,0 +1,145 @@ +/* + * include/haproxy/stats.h + * This file contains definitions of some primitives to dedicated to + * statistics output. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_STATS_H +#define _HAPROXY_STATS_H + +#include <haproxy/api.h> +#include <haproxy/listener-t.h> +#include <haproxy/stats-t.h> +#include <haproxy/tools-t.h> + +struct channel; +struct buffer; +struct proxy; +struct appctx; +struct htx; + +/* These two structs contains all field names and descriptions according to + * the the number of entries in "enum stat_field" and "enum info_field" + */ +extern const struct name_desc stat_fields[]; +extern const struct name_desc info_fields[]; +extern const char *stat_status_codes[]; +extern struct applet http_stats_applet; +extern THREAD_LOCAL struct field info[]; +extern THREAD_LOCAL struct field *stat_l[]; + +struct htx; +int stats_putchk(struct appctx *appctx, struct htx *htx); + +int stats_dump_one_line(const struct field *stats, size_t stats_count, struct appctx *appctx); + +int stats_fill_info(struct field *info, int len, uint flags); +int stats_fill_fe_stats(struct proxy *px, struct field *stats, int len, + enum stat_field *selected_field); +int stats_fill_li_stats(struct proxy *px, struct listener *l, int flags, + struct field *stats, int len, enum stat_field *selected_field); +int stats_fill_sv_stats(struct proxy *px, struct server *sv, int flags, + struct field *stats, int len, enum stat_field *selected_field); +int stats_fill_be_stats(struct proxy *px, int flags, struct field *stats, int len, + enum stat_field *selected_field); + +int stats_emit_raw_data_field(struct buffer *out, const struct field *f); +int stats_emit_typed_data_field(struct buffer *out, const struct field *f); +int stats_emit_field_tags(struct buffer *out, const struct field *f, + char delim); + + +static inline enum field_format field_format(const struct field *f, int e) +{ + return f[e].type & FF_MASK; +} + +static inline enum field_origin field_origin(const struct field *f, int e) +{ + return f[e].type & FO_MASK; +} + +static inline enum field_scope field_scope(const struct field *f, int e) +{ + return f[e].type & FS_MASK; +} + +static inline enum field_nature field_nature(const struct field *f, int e) +{ + return f[e].type & FN_MASK; +} + +static inline const char *field_str(const struct field *f, int e) +{ + return (field_format(f, e) == FF_STR && f[e].u.str) ? f[e].u.str : ""; +} + +static inline struct field mkf_s32(uint32_t type, int32_t value) +{ + struct field f = { .type = FF_S32 | type, .u.s32 = value }; + return f; +} + +static inline struct field mkf_u32(uint32_t type, uint32_t value) +{ + struct field f = { .type = FF_U32 | type, .u.u32 = value }; + return f; +} + +static inline struct field mkf_s64(uint32_t type, int64_t value) +{ + struct field f = { .type = FF_S64 | type, .u.s64 = value }; + return f; +} + +static inline struct field mkf_u64(uint32_t type, uint64_t value) +{ + struct field f = { .type = FF_U64 | type, .u.u64 = value }; + return f; +} + +static inline struct field mkf_str(uint32_t type, const char *value) +{ + struct field f = { .type = FF_STR | type, .u.str = value }; + return f; +} + +static inline struct field mkf_flt(uint32_t type, double value) +{ + struct field f = { .type = FF_FLT | type, .u.flt = value }; + return f; +} + +#define MK_STATS_PROXY_DOMAIN(px_cap) \ + ((px_cap) << STATS_PX_CAP | STATS_DOMAIN_PROXY) + +int stats_allocate_proxy_counters_internal(struct extra_counters **counters, + int type, int px_cap); +int stats_allocate_proxy_counters(struct proxy *px); + +void stats_register_module(struct stats_module *m); + +#endif /* _HAPROXY_STATS_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/stconn-t.h b/include/haproxy/stconn-t.h new file mode 100644 index 0000000..63bcb79 --- /dev/null +++ b/include/haproxy/stconn-t.h @@ -0,0 +1,325 @@ +/* + * include/haproxy/stconn-t.h + * This file describes the stream connector struct and associated constants. + * + * Copyright 2021 Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_STCONN_T_H +#define _HAPROXY_STCONN_T_H + +#include <haproxy/obj_type-t.h> +#include <haproxy/connection-t.h> +#include <haproxy/pipe-t.h> +#include <haproxy/show_flags-t.h> +#include <haproxy/xref-t.h> + +enum iobuf_flags { + IOBUF_FL_NONE = 0x00000000, /* For initialization purposes */ + IOBUF_FL_NO_FF = 0x00000001, /* Fast-forwarding is not supported */ + IOBUF_FL_NO_SPLICING = 0x00000002, /* Splicing is not supported or unusable for this stream */ + IOBUF_FL_FF_BLOCKED = 0x00000004, /* Fast-forwarding is blocked (buffer allocation/full) */ + + IOBUF_FL_INTERIM_FF = 0x00000008, /* Producer side warn it will immediately retry a fast-forward. + * .done_fastfwd() on consumer side must take care of this flag + */ + IOBUF_FL_EOI = 0x00000010, /* A EOI was encountered on producer side */ +}; + +struct iobuf { + struct pipe *pipe; /* non-NULL only when data present */ + struct buffer *buf; + size_t offset; + size_t data; + unsigned int flags; +}; + +/* Stream Endpoint Flags. + * Please also update the se_show_flags() function below in case of changes. + */ +enum se_flags { + SE_FL_NONE = 0x00000000, /* For initialization purposes */ + + /* Endpoint types */ + SE_FL_T_MUX = 0x00000001, /* The endpoint is a mux (the target may be NULL before the mux init) */ + SE_FL_T_APPLET = 0x00000002, /* The endpoint is an applet */ + + /* unused: 0x00000004 .. 0x00000008 */ + + /* Endpoint states: none == attached to a mux with a stream connector */ + SE_FL_DETACHED = 0x00000010, /* The endpoint is detached (no mux/no applet) */ + SE_FL_ORPHAN = 0x00000020, /* The endpoint is orphan (no stream connector) */ + + /* unused: 0x00000040 .. 0x00000080 */ + + SE_FL_SHRD = 0x00000100, /* read shut, draining extra data */ + SE_FL_SHRR = 0x00000200, /* read shut, resetting extra data */ + SE_FL_SHR = SE_FL_SHRD | SE_FL_SHRR, /* read shut status */ + + SE_FL_SHWN = 0x00000400, /* write shut, verbose mode */ + SE_FL_SHWS = 0x00000800, /* write shut, silent mode */ + SE_FL_SHW = SE_FL_SHWN | SE_FL_SHWS, /* write shut status */ + + /* following flags are supposed to be set by the endpoint and read by + * the app layer : + */ + + /* Permanent flags */ + SE_FL_NOT_FIRST = 0x00001000, /* This stream connector is not the first one for the endpoint */ + SE_FL_WEBSOCKET = 0x00002000, /* The endpoint uses the websocket proto */ + SE_FL_EOI = 0x00004000, /* end-of-input reached */ + SE_FL_EOS = 0x00008000, /* End of stream delivered to data layer */ + SE_FL_ERROR = 0x00010000, /* a fatal error was reported */ + /* Transient flags */ + SE_FL_ERR_PENDING= 0x00020000, /* An error is pending, but there's still data to be read */ + SE_FL_RCV_MORE = 0x00040000, /* Endpoint may have more bytes to transfer */ + SE_FL_WANT_ROOM = 0x00080000, /* More bytes to transfer, but not enough room */ + SE_FL_EXP_NO_DATA= 0x00100000, /* No data expected by the endpoint */ + SE_FL_MAY_FASTFWD_PROD = 0x00200000, /* The endpoint may produce data via zero-copy forwarding */ + SE_FL_MAY_FASTFWD_CONS = 0x00400000, /* The endpoint may consume data via zero-copy forwarding */ + SE_FL_ENDP_MASK = 0x004ff000, /* Mask for flags set by the endpoint */ + + /* following flags are supposed to be set by the app layer and read by + * the endpoint : + */ + /* unused 0x00800000,*/ + /* unused 0x01000000,*/ + /* unused 0x02000000,*/ + SE_FL_WAIT_FOR_HS = 0x04000000, /* This stream is waiting for handhskae */ + SE_FL_KILL_CONN = 0x08000000, /* must kill the connection when the SC closes */ + SE_FL_WAIT_DATA = 0x10000000, /* stream endpoint cannot work without more data from the stream's output */ + SE_FL_WONT_CONSUME = 0x20000000, /* stream endpoint will not consume more data */ + SE_FL_HAVE_NO_DATA = 0x40000000, /* the endpoint has no more data to deliver to the stream */ + SE_FL_APPLET_NEED_CONN = 0x80000000, /* applet is waiting for the other side to (fail to) connect */ +}; + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *se_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(SE_FL_T_MUX, _(SE_FL_T_APPLET, _(SE_FL_DETACHED, _(SE_FL_ORPHAN, + _(SE_FL_SHRD, _(SE_FL_SHRR, _(SE_FL_SHWN, _(SE_FL_SHWS, + _(SE_FL_NOT_FIRST, _(SE_FL_WEBSOCKET, _(SE_FL_EOI, _(SE_FL_EOS, + _(SE_FL_ERROR, _(SE_FL_ERR_PENDING, _(SE_FL_RCV_MORE, + _(SE_FL_WANT_ROOM, _(SE_FL_EXP_NO_DATA, _(SE_FL_MAY_FASTFWD_PROD, _(SE_FL_MAY_FASTFWD_CONS, + _(SE_FL_WAIT_FOR_HS, _(SE_FL_KILL_CONN, _(SE_FL_WAIT_DATA, + _(SE_FL_WONT_CONSUME, _(SE_FL_HAVE_NO_DATA, _(SE_FL_APPLET_NEED_CONN))))))))))))))))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + +/* stconn flags. + * Please also update the sc_show_flags() function below in case of changes. + * + * When SC_FL_ABRT_WANTED/SC_FL_EOS is set, it is strictly forbidden for the + * producer to alter the buffer contents. In this case, the consumer is free to + * perform a shutdown when it has consumed the last contents, otherwise the + * session processor will do it anyway. SC_FL_ABRT* are set at the upper layer + * level (the stream) while SC_FL_EOS is set at the SE layer. + * + * The SC_FL_SHUT_WANTED flaga should be set by the session processor when + * SC_FLABRT_DONE/SC_FL_EOS and CF_AUTO_CLOSE are both set. And it may also be + * set by the producer when it detects SC_FL_EOS while directly forwarding data to the + * consumer. + * + * The SHUT/ABRT flags work like this : + * + * ABRT_WANTED ABRT_DONE meaning + * 0 0 normal case, connection still open and data is being read + * 1 0 closing : the producer cannot feed data anymore but can close + * 0/1 1 closed: the producer has closed its input channel. + * + * SHUT_WANTED SHUT_DONE meaning + * 0 0 normal case, connection still open and data is being written + * 1 0 closing: the consumer can send last data and may then close + * 0/1 1 closed: the consumer has closed its output channel. + * + * + * The ABRT_WANTED flag is mostly used to force the producer to abort when an error is + * detected on the consumer side. + * + */ +enum sc_flags { + SC_FL_NONE = 0x00000000, /* Just for initialization purposes */ + SC_FL_ISBACK = 0x00000001, /* Set for SC on back-side */ + + SC_FL_EOI = 0x00000002, /* End of input was reached. no more data will be received from the endpoint */ + SC_FL_ERROR = 0x00000004, /* A fatal error was reported */ + + SC_FL_NOLINGER = 0x00000008, /* may close without lingering. One-shot. */ + SC_FL_NOHALF = 0x00000010, /* no half close, close both sides at once */ + SC_FL_DONT_WAKE = 0x00000020, /* resync in progress, don't wake up */ + SC_FL_INDEP_STR = 0x00000040, /* independent streams = don't update rex on write */ + + SC_FL_WONT_READ = 0x00000080, /* SC doesn't want to read data */ + SC_FL_NEED_BUFF = 0x00000100, /* SC waits for an rx buffer allocation to complete */ + SC_FL_NEED_ROOM = 0x00000200, /* SC needs more room in the rx buffer to store incoming data */ + + SC_FL_RCV_ONCE = 0x00000400, /* Don't loop to receive data. cleared after a successful receive */ + SC_FL_SND_ASAP = 0x00000800, /* Don't wait for sending. cleared when all data were sent */ + SC_FL_SND_NEVERWAIT = 0x00001000, /* Never wait for sending (permanent) */ + SC_FL_SND_EXP_MORE = 0x00002000, /* More data expected to be sent very soon. cleared when all data were sent */ + + SC_FL_ABRT_WANTED = 0x00004000, /* An abort was requested and must be performed ASAP (up side to down side) */ + SC_FL_SHUT_WANTED = 0x00008000, /* A shutdown was requested and mux be performed ASAP (up side to down side) */ + SC_FL_ABRT_DONE = 0x00010000, /* An abort was performed for the SC */ + SC_FL_SHUT_DONE = 0x00020000, /* A shutdown was performed for the SC */ + + SC_FL_EOS = 0x00040000, /* End of stream was reached (from down side to up side) */ +}; + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *sc_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(SC_FL_ISBACK, _(SC_FL_EOI, _(SC_FL_ERROR, _(SC_FL_NOLINGER, _(SC_FL_NOHALF, + _(SC_FL_DONT_WAKE, _(SC_FL_INDEP_STR, _(SC_FL_WONT_READ, + _(SC_FL_NEED_BUFF, _(SC_FL_NEED_ROOM, + _(SC_FL_RCV_ONCE, _(SC_FL_SND_ASAP, _(SC_FL_SND_NEVERWAIT, _(SC_FL_SND_EXP_MORE, + _(SC_FL_ABRT_WANTED, _(SC_FL_SHUT_WANTED, _(SC_FL_ABRT_DONE, _(SC_FL_SHUT_DONE, + _(SC_FL_EOS))))))))))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + +/* A conn stream must have its own errors independently of the buffer's, so that + * applications can rely on what the buffer reports while the conn stream is + * performing some retries (eg: connection error). Some states are transient and + * do not last beyond process_session(). + */ +enum sc_state { + SC_ST_INI = 0, /* SC not sollicitated yet */ + SC_ST_REQ, /* [transient] connection initiation desired and not started yet */ + SC_ST_QUE, /* SC waiting in queue */ + SC_ST_TAR, /* SC in turn-around state after failed connect attempt */ + SC_ST_ASS, /* server just assigned to this SC */ + SC_ST_CON, /* initiated connection request (resource exists) */ + SC_ST_CER, /* [transient] previous connection attempt failed (resource released) */ + SC_ST_RDY, /* [transient] ready proven after I/O success during SC_ST_CON */ + SC_ST_EST, /* connection established (resource exists) */ + SC_ST_DIS, /* [transient] disconnected from other side, but cleanup not done yet */ + SC_ST_CLO, /* SC closed, might not existing anymore. Buffers shut. */ +} __attribute__((packed)); + +/* state bits for use with lists of states */ +enum sc_state_bit { + SC_SB_NONE = 0, + SC_SB_INI = 1U << SC_ST_INI, + SC_SB_REQ = 1U << SC_ST_REQ, + SC_SB_QUE = 1U << SC_ST_QUE, + SC_SB_TAR = 1U << SC_ST_TAR, + SC_SB_ASS = 1U << SC_ST_ASS, + SC_SB_CON = 1U << SC_ST_CON, + SC_SB_CER = 1U << SC_ST_CER, + SC_SB_RDY = 1U << SC_ST_RDY, + SC_SB_EST = 1U << SC_ST_EST, + SC_SB_DIS = 1U << SC_ST_DIS, + SC_SB_CLO = 1U << SC_ST_CLO, + SC_SB_ALL = SC_SB_INI|SC_SB_REQ|SC_SB_QUE|SC_SB_TAR|SC_SB_ASS|SC_SB_CON|SC_SB_CER|SC_SB_RDY|SC_SB_EST|SC_SB_DIS|SC_SB_CLO, +}; + +struct stconn; + +/* A Stream Endpoint Descriptor (sedesc) is the link between the stream + * connector (ex. stconn) and the Stream Endpoint (mux or appctx). + * It always exists for either of them, and binds them together. It also + * contains some shared information relative to the endpoint. It is created by + * the first one which needs it and is shared by the other one, i.e. on the + * client side, it's created the mux or applet and shared with the connector. + * An sedesc without stconn is called an ORPHANED descriptor. An sedesc with + * no mux/applet is called a DETACHED descriptor. Upon detach, the connector + * transfers the whole responsibility of the endpoint descriptor to the + * endpoint itself (mux/applet) and eventually creates a new sedesc (for + * instance on connection retries). + * + * <lra> should be updated when a read activity at the endpoint level is + * detected. It can be a successful receive or when a EOS/EOI is reported. + * A read activity is also reported when receives are unblocked. + + * <fsb> should be updated when the first send of a series is blocked and reset + * when a successful send is reported. + * + * + * NOTE: <lra> and <fsb> must only be used via the SC api to compute read/write + * expiration date. + * + */ +struct sedesc { + void *se; /* the stream endpoint, i.e. the mux stream or the appctx */ + struct connection *conn; /* the connection for connection-based streams */ + struct stconn *sc; /* the stream connector we're attached to, or NULL */ + struct iobuf iobuf; /* contains data forwarded by the other side and that must be sent by the stream endpoint */ + unsigned int flags; /* SE_FL_* */ + unsigned int lra; /* the last read activity */ + unsigned int fsb; /* the first send blocked */ + /* 4 bytes hole here */ + struct xref xref; /* cross reference with the opposite SC */ +}; + +/* sc_app_ops describes the application layer's operations and notification + * callbacks when I/O activity is reported and to use to perform shutr/shutw. + * There are very few combinations in practice (strm/chk <-> none/mux/applet). + */ +struct sc_app_ops { + void (*chk_rcv)(struct stconn *); /* chk_rcv function, may not be null */ + void (*chk_snd)(struct stconn *); /* chk_snd function, may not be null */ + void (*abort)(struct stconn *); /* abort function, may not be null */ + void (*shutdown)(struct stconn *); /* shutdown function, may not be null */ + int (*wake)(struct stconn *); /* data-layer callback to report activity */ + char name[8]; /* data layer name, zero-terminated */ +}; + +/* + * This structure describes the elements of a connection relevant to a stream + */ +struct stconn { + enum obj_type obj_type; /* differentiates connection from applet context */ + enum sc_state state; /* SC_ST* */ + /* 2 bytes hole here */ + + unsigned int flags; /* SC_FL_* */ + unsigned int ioto; /* I/O activity timeout */ + ssize_t room_needed; /* free space in the input buffer required to receive more data. + * -1 : the SC is waiting for room but not on a specific amount of data + * >= 0 : min free space required to progress. 0 means SC must be unblocked ASAP + */ + struct wait_event wait_event; /* We're in a wait list */ + struct sedesc *sedesc; /* points to the stream endpoint descriptor */ + enum obj_type *app; /* points to the applicative point (stream or check) */ + const struct sc_app_ops *app_ops; /* general operations used at the app layer */ + struct sockaddr_storage *src; /* source address (pool), when known, otherwise NULL */ + struct sockaddr_storage *dst; /* destination address (pool), when known, otherwise NULL */ +}; + + +#endif /* _HAPROXY_STCONN_T_H */ diff --git a/include/haproxy/stconn.h b/include/haproxy/stconn.h new file mode 100644 index 0000000..7869fa3 --- /dev/null +++ b/include/haproxy/stconn.h @@ -0,0 +1,557 @@ +/* + * include/haproxy/stconn.h + * This file contains stream connector function prototypes + * + * Copyright 2021 Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_STCONN_H +#define _HAPROXY_STCONN_H + +#include <haproxy/api.h> +#include <haproxy/connection.h> +#include <haproxy/htx-t.h> +#include <haproxy/obj_type.h> +#include <haproxy/stconn-t.h> + +struct buffer; +struct session; +struct appctx; +struct stream; +struct check; + +#define IS_HTX_SC(sc) (sc_conn(sc) && IS_HTX_CONN(__sc_conn(sc))) + +struct sedesc *sedesc_new(); +void sedesc_free(struct sedesc *sedesc); + +struct stconn *sc_new_from_endp(struct sedesc *sedesc, struct session *sess, struct buffer *input); +struct stconn *sc_new_from_strm(struct stream *strm, unsigned int flags); +struct stconn *sc_new_from_check(struct check *check, unsigned int flags); +void sc_free(struct stconn *sc); + +int sc_attach_mux(struct stconn *sc, void *target, void *ctx); +int sc_attach_strm(struct stconn *sc, struct stream *strm); + +void sc_destroy(struct stconn *sc); +int sc_reset_endp(struct stconn *sc); + +struct appctx *sc_applet_create(struct stconn *sc, struct applet *app); + +void sc_conn_prepare_endp_upgrade(struct stconn *sc); +void sc_conn_abort_endp_upgrade(struct stconn *sc); +void sc_conn_commit_endp_upgrade(struct stconn *sc); + +/* The se_fl_*() set of functions manipulate the stream endpoint flags from + * the stream endpoint itself. The sc_ep_*() set of functions manipulate the + * stream endpoint flags from the the stream connector (ex. stconn). + * _zero() clears all flags, _clr() clears a set of flags (&=~), _set() sets + * a set of flags (|=), _test() tests the presence of a set of flags, _get() + * retrieves the exact flags, _setall() replaces the flags with the new value. + * All functions are purposely marked "forceinline" to avoid slowing down + * debugging code too much. None of these functions is atomic-safe. + */ + +/* stream endpoint version */ +static forceinline void se_fl_zero(struct sedesc *se) +{ + se->flags = 0; +} + +static forceinline void se_fl_setall(struct sedesc *se, uint all) +{ + se->flags = all; +} + +/* sets flags <on> on se->flags and handles ERR_PENDING to ERROR promotion if + * needed (upon EOI/EOS). + */ +static forceinline void se_fl_set(struct sedesc *se, uint on) +{ + if (((on & (SE_FL_EOS|SE_FL_EOI)) && se->flags & SE_FL_ERR_PENDING) || + ((on & SE_FL_ERR_PENDING) && se->flags & (SE_FL_EOI|SE_FL_EOS))) + on |= SE_FL_ERROR; + se->flags |= on; +} + +static forceinline void se_fl_clr(struct sedesc *se, uint off) +{ + se->flags &= ~off; +} + +static forceinline uint se_fl_test(const struct sedesc *se, uint test) +{ + return !!(se->flags & test); +} + +static forceinline uint se_fl_get(const struct sedesc *se) +{ + return se->flags; +} + +/* sets SE_FL_ERROR or SE_FL_ERR_PENDING on the endpoint */ +static inline void se_fl_set_error(struct sedesc *se) +{ + if (se_fl_test(se, (SE_FL_EOS|SE_FL_EOI))) + se_fl_set(se, SE_FL_ERROR); + else + se_fl_set(se, SE_FL_ERR_PENDING); +} + +static inline void se_expect_no_data(struct sedesc *se) +{ + se_fl_set(se, SE_FL_EXP_NO_DATA); +} + +static inline void se_expect_data(struct sedesc *se) +{ + se_fl_clr(se, SE_FL_EXP_NO_DATA); +} + +static inline unsigned int se_have_ff_data(struct sedesc *se) +{ + return (se->iobuf.data | (long)se->iobuf.pipe); +} + +static inline size_t se_ff_data(struct sedesc *se) +{ + return (se->iobuf.data + (se->iobuf.pipe ? se->iobuf.pipe->data : 0)); +} + +/* stream connector version */ +static forceinline void sc_ep_zero(struct stconn *sc) +{ + se_fl_zero(sc->sedesc); +} + +static forceinline void sc_ep_setall(struct stconn *sc, uint all) +{ + se_fl_setall(sc->sedesc, all); +} + +static forceinline void sc_ep_set(struct stconn *sc, uint on) +{ + se_fl_set(sc->sedesc, on); +} + +static forceinline void sc_ep_clr(struct stconn *sc, uint off) +{ + se_fl_clr(sc->sedesc, off); +} + +static forceinline uint sc_ep_test(const struct stconn *sc, uint test) +{ + return se_fl_test(sc->sedesc, test); +} + +static forceinline uint sc_ep_get(const struct stconn *sc) +{ + return se_fl_get(sc->sedesc); +} + +/* Return the last read activity timestamp. May be TICK_ETERNITY */ +static forceinline unsigned int sc_ep_lra(const struct stconn *sc) +{ + return sc->sedesc->lra; +} + +/* Return the first send blocked timestamp. May be TICK_ETERNITY */ +static forceinline unsigned int sc_ep_fsb(const struct stconn *sc) +{ + return sc->sedesc->fsb; +} + +/* Report a read activity. This function sets <lra> to now_ms */ +static forceinline void sc_ep_report_read_activity(struct stconn *sc) +{ + sc->sedesc->lra = now_ms; +} + +/* Report a send blocked. This function sets <fsb> to now_ms if it was not + * already set or if something was sent (to renew <fsb>). + * + * if something was sent (<did_send> != 0), a read activity is also reported for + * non-independent stream. + */ +static forceinline void sc_ep_report_blocked_send(struct stconn *sc, int did_send) +{ + if (did_send || !tick_isset(sc->sedesc->fsb)) { + sc->sedesc->fsb = now_ms; + if (did_send && !(sc->flags & SC_FL_INDEP_STR)) + sc_ep_report_read_activity(sc); + } +} + +/* Report a send activity by setting <fsb> to TICK_ETERNITY. + * For non-independent stream, a read activity is reported. + */ +static forceinline void sc_ep_report_send_activity(struct stconn *sc) +{ + sc->sedesc->fsb = TICK_ETERNITY; + if (!(sc->flags & SC_FL_INDEP_STR)) + sc_ep_report_read_activity(sc); +} + +static forceinline unsigned int sc_ep_have_ff_data(struct stconn *sc) +{ + return se_have_ff_data(sc->sedesc); +} + +static forceinline size_t sc_ep_ff_data(struct stconn *sc) +{ + return se_ff_data(sc->sedesc); +} + +/* Returns the stream endpoint from an connector, without any control */ +static inline void *__sc_endp(const struct stconn *sc) +{ + return sc->sedesc->se; +} + +/* Returns the connection from a sc if the endpoint is a mux stream. Otherwise + * NULL is returned. __sc_conn() returns the connection without any control + * while sc_conn() check the endpoint type. + */ +static inline struct connection *__sc_conn(const struct stconn *sc) +{ + return sc->sedesc->conn; +} +static inline struct connection *sc_conn(const struct stconn *sc) +{ + if (sc_ep_test(sc, SE_FL_T_MUX)) + return __sc_conn(sc); + return NULL; +} + +/* Returns the mux ops of the connection from an stconn if the endpoint is a + * mux stream. Otherwise NULL is returned. + */ +static inline const struct mux_ops *sc_mux_ops(const struct stconn *sc) +{ + const struct connection *conn = sc_conn(sc); + + return (conn ? conn->mux : NULL); +} + +/* Returns a pointer to the mux stream from a connector if the endpoint is + * a mux. Otherwise NULL is returned. __sc_mux_strm() returns the mux without + * any control while sc_mux_strm() checks the endpoint type. + */ +static inline void *__sc_mux_strm(const struct stconn *sc) +{ + return __sc_endp(sc); +} +static inline struct appctx *sc_mux_strm(const struct stconn *sc) +{ + if (sc_ep_test(sc, SE_FL_T_MUX)) + return __sc_mux_strm(sc); + return NULL; +} + +/* Returns the appctx from a sc if the endpoint is an appctx. Otherwise + * NULL is returned. __sc_appctx() returns the appctx without any control + * while sc_appctx() checks the endpoint type. + */ +static inline struct appctx *__sc_appctx(const struct stconn *sc) +{ + return __sc_endp(sc); +} +static inline struct appctx *sc_appctx(const struct stconn *sc) +{ + if (sc_ep_test(sc, SE_FL_T_APPLET)) + return __sc_appctx(sc); + return NULL; +} + +/* Returns the stream from a sc if the application is a stream. Otherwise + * NULL is returned. __sc_strm() returns the stream without any control + * while sc_strm() check the application type. + */ +static inline struct stream *__sc_strm(const struct stconn *sc) +{ + return __objt_stream(sc->app); +} + +static inline struct stream *sc_strm(const struct stconn *sc) +{ + if (obj_type(sc->app) == OBJ_TYPE_STREAM) + return __sc_strm(sc); + return NULL; +} + +/* Returns the healthcheck from a sc if the application is a + * healthcheck. Otherwise NULL is returned. __sc_check() returns the healthcheck + * without any control while sc_check() check the application type. + */ +static inline struct check *__sc_check(const struct stconn *sc) +{ + return __objt_check(sc->app); +} +static inline struct check *sc_check(const struct stconn *sc) +{ + if (obj_type(sc->app) == OBJ_TYPE_CHECK) + return __objt_check(sc->app); + return NULL; +} + +/* Returns the name of the application layer's name for the stconn, + * or "NONE" when none is attached. + */ +static inline const char *sc_get_data_name(const struct stconn *sc) +{ + if (!sc->app_ops) + return "NONE"; + return sc->app_ops->name; +} + +/* shut read */ +static inline void sc_conn_shutr(struct stconn *sc, enum co_shr_mode mode) +{ + const struct mux_ops *mux; + + BUG_ON(!sc_conn(sc)); + + if (sc_ep_test(sc, SE_FL_SHR)) + return; + + /* clean data-layer shutdown */ + mux = sc_mux_ops(sc); + if (mux && mux->shutr) + mux->shutr(sc, mode); + sc_ep_set(sc, (mode == CO_SHR_DRAIN) ? SE_FL_SHRD : SE_FL_SHRR); +} + +/* shut write */ +static inline void sc_conn_shutw(struct stconn *sc, enum co_shw_mode mode) +{ + const struct mux_ops *mux; + + BUG_ON(!sc_conn(sc)); + + if (sc_ep_test(sc, SE_FL_SHW)) + return; + + /* clean data-layer shutdown */ + mux = sc_mux_ops(sc); + if (mux && mux->shutw) + mux->shutw(sc, mode); + sc_ep_set(sc, (mode == CO_SHW_NORMAL) ? SE_FL_SHWN : SE_FL_SHWS); +} + +/* completely close a stream connector (but do not detach it) */ +static inline void sc_conn_shut(struct stconn *sc) +{ + sc_conn_shutw(sc, CO_SHW_SILENT); + sc_conn_shutr(sc, CO_SHR_RESET); +} + +/* completely close a stream connector after draining possibly pending data (but do not detach it) */ +static inline void sc_conn_drain_and_shut(struct stconn *sc) +{ + sc_conn_shutw(sc, CO_SHW_SILENT); + sc_conn_shutr(sc, CO_SHR_DRAIN); +} + +/* Returns non-zero if the stream connector's Rx path is blocked because of + * lack of room in the input buffer. This usually happens after applets failed + * to deliver data into the channel's buffer and reported it via sc_need_room(). + */ +__attribute__((warn_unused_result)) +static inline int sc_waiting_room(const struct stconn *sc) +{ + return !!(sc->flags & SC_FL_NEED_ROOM); +} + +/* The stream endpoint announces it has more data to deliver to the stream's + * input buffer. + */ +static inline void se_have_more_data(struct sedesc *se) +{ + se_fl_clr(se, SE_FL_HAVE_NO_DATA); +} + +/* The stream endpoint announces it doesn't have more data for the stream's + * input buffer. + */ +static inline void se_have_no_more_data(struct sedesc *se) +{ + se_fl_set(se, SE_FL_HAVE_NO_DATA); +} + +/* The application layer informs a stream connector that it's willing to + * receive data from the endpoint. A read activity is reported. + */ +static inline void sc_will_read(struct stconn *sc) +{ + if (sc->flags & SC_FL_WONT_READ) { + sc->flags &= ~SC_FL_WONT_READ; + sc_ep_report_read_activity(sc); + } +} + +/* The application layer informs a stream connector that it will not receive + * data from the endpoint (e.g. need to flush, bw limitations etc). Usually + * it corresponds to the channel's CF_DONT_READ flag. + */ +static inline void sc_wont_read(struct stconn *sc) +{ + sc->flags |= SC_FL_WONT_READ; +} + +/* An frontend (applet) stream endpoint tells the connector it needs the other + * side to connect or fail before continuing to work. This is used for example + * to allow an applet not to deliver data to a request channel before a + * connection is confirmed. + */ +static inline void se_need_remote_conn(struct sedesc *se) +{ + se_fl_set(se, SE_FL_APPLET_NEED_CONN); +} + +/* The application layer tells the stream connector that it just got the input + * buffer it was waiting for. A read activity is reported. + */ +static inline void sc_have_buff(struct stconn *sc) +{ + if (sc->flags & SC_FL_NEED_BUFF) { + sc->flags &= ~SC_FL_NEED_BUFF; + sc_ep_report_read_activity(sc); + } +} + +/* The stream connector failed to get an input buffer and is waiting for it. + * It indicates a willingness to deliver data to the buffer that will have to + * be retried. As such, callers will often automatically clear SE_FL_HAVE_NO_DATA + * to be called again as soon as SC_FL_NEED_BUFF is cleared. + */ +static inline void sc_need_buff(struct stconn *sc) +{ + sc->flags |= SC_FL_NEED_BUFF; +} + +/* Tell a stream connector some room was made in the input buffer and any + * failed attempt to inject data into it may be tried again. This is usually + * called after a successful transfer of buffer contents to the other side. + * A read activity is reported. + */ +static inline void sc_have_room(struct stconn *sc) +{ + if (sc->flags & SC_FL_NEED_ROOM) { + sc->flags &= ~SC_FL_NEED_ROOM; + sc->room_needed = 0; + sc_ep_report_read_activity(sc); + } +} + +/* The stream connector announces it failed to put data into the input buffer + * by lack of room. Since it indicates a willingness to deliver data to the + * buffer that will have to be retried. Usually the caller will also clear + * SE_FL_HAVE_NO_DATA to be called again as soon as SC_FL_NEED_ROOM is cleared. + * + * The caller is responsible to specified the amount of free space required to + * progress. It must take care to not exceed the buffer size. + */ +static inline void sc_need_room(struct stconn *sc, ssize_t room_needed) +{ + sc->flags |= SC_FL_NEED_ROOM; + BUG_ON_HOT(room_needed > (ssize_t)global.tune.bufsize); + sc->room_needed = room_needed; +} + +/* The stream endpoint indicates that it's ready to consume data from the + * stream's output buffer. Report a send activity if the SE is unblocked. + */ +static inline void se_will_consume(struct sedesc *se) +{ + if (se_fl_test(se, SE_FL_WONT_CONSUME)) { + se_fl_clr(se, SE_FL_WONT_CONSUME); + sc_ep_report_send_activity(se->sc); + } +} + +/* The stream endpoint indicates that it's not willing to consume data from the + * stream's output buffer. + */ +static inline void se_wont_consume(struct sedesc *se) +{ + se_fl_set(se, SE_FL_WONT_CONSUME); +} + +/* The stream endpoint indicates that it's willing to consume data from the + * stream's output buffer, but that there's not enough, so it doesn't want to + * be woken up until more are presented. + */ +static inline void se_need_more_data(struct sedesc *se) +{ + se_will_consume(se); + se_fl_set(se, SE_FL_WAIT_DATA); +} + + +static inline size_t se_nego_ff(struct sedesc *se, struct buffer *input, size_t count, unsigned int may_splice) +{ + size_t ret = 0; + + if (se_fl_test(se, SE_FL_T_MUX)) { + const struct mux_ops *mux = se->conn->mux; + + se->iobuf.flags &= ~IOBUF_FL_FF_BLOCKED; + if (mux->nego_fastfwd && mux->done_fastfwd) { + /* Disable zero-copy forwarding if EOS or an error was reported. */ + if (se_fl_test(se, SE_FL_EOS|SE_FL_ERROR|SE_FL_ERR_PENDING)) { + se->iobuf.flags |= IOBUF_FL_NO_FF; + goto end; + } + + ret = mux->nego_fastfwd(se->sc, input, count, may_splice); + if (se->iobuf.flags & IOBUF_FL_FF_BLOCKED) { + sc_ep_report_blocked_send(se->sc, 0); + + if (!(se->sc->wait_event.events & SUB_RETRY_SEND)) { + /* The SC must be subs for send to be notify when some + * space is made + */ + mux->subscribe(se->sc, SUB_RETRY_SEND, &se->sc->wait_event); + } + } + goto end; + } + } + se->iobuf.flags |= IOBUF_FL_NO_FF; + + end: + return ret; +} + +static inline void se_done_ff(struct sedesc *se) +{ + if (se_fl_test(se, SE_FL_T_MUX)) { + const struct mux_ops *mux = se->conn->mux; + size_t sent, to_send = se_ff_data(se); + + BUG_ON(!mux->done_fastfwd); + sent = mux->done_fastfwd(se->sc); + if (to_send) { + if (sent == to_send) + sc_ep_report_send_activity(se->sc); + else + sc_ep_report_blocked_send(se->sc, sent != 0); + } + } +} + +#endif /* _HAPROXY_STCONN_H */ diff --git a/include/haproxy/stick_table-t.h b/include/haproxy/stick_table-t.h new file mode 100644 index 0000000..749cb9a --- /dev/null +++ b/include/haproxy/stick_table-t.h @@ -0,0 +1,250 @@ +/* + * include/haproxy/stick_table-t.h + * Macros, variables and structures for stick tables management. + * + * Copyright (C) 2009-2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr> + * Copyright (C) 2010 Willy Tarreau <w@1wt.eu> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_STICK_TABLE_T_H +#define _HAPROXY_STICK_TABLE_T_H + +#include <import/ebtree-t.h> + +#include <haproxy/api-t.h> +#include <haproxy/freq_ctr-t.h> +#include <haproxy/thread-t.h> + +#define STKTABLE_MAX_DT_ARRAY_SIZE 100 + +/* The types of extra data we can store in a stick table */ +enum { + STKTABLE_DT_SERVER_ID, /* the server ID to use with this stream if > 0 */ + STKTABLE_DT_GPT0, /* General Purpose Flag 0. */ + STKTABLE_DT_GPC0, /* General Purpose Counter 0 (unsigned 32-bit integer) */ + STKTABLE_DT_GPC0_RATE, /* General Purpose Counter 0's event rate */ + STKTABLE_DT_CONN_CNT, /* cumulated number of connections */ + STKTABLE_DT_CONN_RATE, /* incoming connection rate */ + STKTABLE_DT_CONN_CUR, /* concurrent number of connections */ + STKTABLE_DT_SESS_CNT, /* cumulated number of sessions (accepted connections) */ + STKTABLE_DT_SESS_RATE, /* accepted sessions rate */ + STKTABLE_DT_HTTP_REQ_CNT, /* cumulated number of incoming HTTP requests */ + STKTABLE_DT_HTTP_REQ_RATE,/* incoming HTTP request rate */ + STKTABLE_DT_HTTP_ERR_CNT, /* cumulated number of HTTP requests errors (4xx) */ + STKTABLE_DT_HTTP_ERR_RATE,/* HTTP request error rate */ + STKTABLE_DT_BYTES_IN_CNT, /* cumulated bytes count from client to servers */ + STKTABLE_DT_BYTES_IN_RATE,/* bytes rate from client to servers */ + STKTABLE_DT_BYTES_OUT_CNT,/* cumulated bytes count from servers to client */ + STKTABLE_DT_BYTES_OUT_RATE,/* bytes rate from servers to client */ + STKTABLE_DT_GPC1, /* General Purpose Counter 1 (unsigned 32-bit integer) */ + STKTABLE_DT_GPC1_RATE, /* General Purpose Counter 1's event rate */ + STKTABLE_DT_SERVER_KEY, /* The server key */ + STKTABLE_DT_HTTP_FAIL_CNT, /* cumulated number of HTTP server failures */ + STKTABLE_DT_HTTP_FAIL_RATE,/* HTTP server failures rate */ + STKTABLE_DT_GPT, /* array of gpt */ + STKTABLE_DT_GPC, /* array of gpc */ + STKTABLE_DT_GPC_RATE, /* array of gpc_rate */ + + + STKTABLE_STATIC_DATA_TYPES,/* number of types above */ + /* up to STKTABLE_EXTRA_DATA_TYPES types may be registered here, always + * followed by the number of data types, must always be last. + */ + STKTABLE_DATA_TYPES = STKTABLE_STATIC_DATA_TYPES + STKTABLE_EXTRA_DATA_TYPES +}; + +/* The equivalent standard types of the stored data */ +enum { + STD_T_SINT = 0, /* data is of type signed int */ + STD_T_UINT, /* data is of type unsigned int */ + STD_T_ULL, /* data is of type unsigned long long */ + STD_T_FRQP, /* data is of type freq_ctr */ + STD_T_DICT, /* data is of type key of dictionary entry */ +}; + +/* The types of optional arguments to stored data */ +enum { + ARG_T_NONE = 0, /* data type takes no argument (default) */ + ARG_T_INT, /* signed integer */ + ARG_T_DELAY, /* a delay which supports time units */ +}; + +/* They types of keys that servers can be identified by */ +enum { + STKTABLE_SRV_NAME = 0, + STKTABLE_SRV_ADDR, +}; + +/* stick table key type flags */ +#define STK_F_CUSTOM_KEYSIZE 0x00000001 /* this table's key size is configurable */ + +/* WARNING: if new fields are added, they must be initialized in stream_accept() + * and freed in stream_free() ! + * + * What's the purpose of there two macro: + * - STKCTR_TRACK_BACKEND indicates that a tracking pointer was set from the backend + * and thus that when a keep-alive request goes to another backend, the track + * must cease. + * + * - STKCTR_TRACK_CONTENT indicates that the tracking pointer was set in a + * content-aware rule (tcp-request content or http-request) and that the + * tracking has to be performed in the stream and not in the session, and + * will cease for a new keep-alive request over the same connection. + * + * These values are mixed with the stksess pointer in stkctr->entry. + */ +#define STKCTR_TRACK_BACKEND 1 +#define STKCTR_TRACK_CONTENT 2 + +/* stick_table extra data. This is mainly used for casting or size computation */ +union stktable_data { + /* standard types for easy casting */ + int std_t_sint; + unsigned int std_t_uint; + unsigned long long std_t_ull; + struct freq_ctr std_t_frqp; + struct dict_entry *std_t_dict; +}; + +/* known data types */ +struct stktable_data_type { + const char *name; /* name of the data type */ + int std_type; /* standard type we can use for this data, STD_T_* */ + int arg_type; /* type of optional argument, ARG_T_* */ + uint is_array:1; /* this is an array of gpc/gpt */ + uint is_local:1; /* this is local only and never learned */ + uint as_is:1; /* cannot be processed / used with arithmetic operations */ +}; + +/* stick table keyword type */ +struct stktable_type { + const char *kw; /* keyword string */ + int flags; /* type flags */ + size_t default_size; /* default key size */ +}; + +/* Sticky session. + * Any additional data related to the stuck session is installed *before* + * stksess (with negative offsets). This allows us to run variable-sized + * keys and variable-sized data without making use of intermediate pointers. + */ +struct stksess { + unsigned int expire; /* session expiration date */ + unsigned int ref_cnt; /* reference count, can only purge when zero */ + __decl_thread(HA_RWLOCK_T lock); /* lock related to the table entry */ + int shard; /* shard */ + struct eb32_node exp; /* ebtree node used to hold the session in expiration tree */ + struct eb32_node upd; /* ebtree node used to hold the update sequence tree */ + struct ebmb_node key; /* ebtree node used to hold the session in table */ + /* WARNING! do not put anything after <keys>, it's used by the key */ +}; + + +/* stick table */ +struct stktable { + char *id; /* local table id name. */ + size_t idlen; /* local table id name length. */ + char *nid; /* table id name sent over the network with peers protocol. */ + struct stktable *next; /* The stick-table may be linked when belonging to + * the same configuration section. + */ + struct ebpt_node name; /* Stick-table are lookup by name here. */ + struct pool_head *pool; /* pool used to allocate sticky sessions */ + struct task *exp_task; /* expiration task */ + struct task *sync_task; /* sync task */ + + uint64_t hash_seed; /* hash seed used by shards */ + union { + struct peers *p; /* sync peers */ + char *name; + } peers; + + unsigned long type; /* type of table (determines key format) */ + size_t key_size; /* size of a key, maximum size in case of string */ + unsigned int server_key_type; /* What type of key is used to identify servers */ + unsigned int size; /* maximum number of sticky sessions in table */ + int nopurge; /* if non-zero, don't purge sticky sessions when full */ + int expire; /* time to live for sticky sessions (milliseconds) */ + int data_size; /* the size of the data that is prepended *before* stksess */ + int data_ofs[STKTABLE_DATA_TYPES]; /* negative offsets of present data types, or 0 if absent */ + unsigned int data_nbelem[STKTABLE_DATA_TYPES]; /* to store nb_elem in case of array types */ + union { + int i; + unsigned int u; + void *p; + } data_arg[STKTABLE_DATA_TYPES]; /* optional argument of each data type */ + struct proxy *proxy; /* The proxy this stick-table is attached to, if any.*/ + union { + char *name; /* preparsing hint */ + struct stktable *t; /* postparsing */ + void *ptr; /* generic ptr to check if set or not */ + } write_to; /* updates received on the source table will also update write_to */ + + THREAD_ALIGN(64); + + struct eb_root keys; /* head of sticky session tree */ + struct eb_root exps; /* head of sticky session expiration tree */ + unsigned int refcnt; /* number of local peer over all peers sections + attached to this table */ + unsigned int current; /* number of sticky sessions currently in table */ + __decl_thread(HA_RWLOCK_T lock); /* lock related to the table */ + + THREAD_ALIGN(64); + + struct eb_root updates; /* head of sticky updates sequence tree, uses updt_lock */ + unsigned int update; /* uses updt_lock */ + unsigned int localupdate; /* uses updt_lock */ + unsigned int commitupdate;/* used to identify the latest local updates pending for sync, uses updt_lock */ + + THREAD_ALIGN(64); + /* this lock is heavily used and must be on its own cache line */ + __decl_thread(HA_RWLOCK_T updt_lock); /* lock protecting the updates part */ + + /* rarely used config stuff below (should not interfere with updt_lock) */ + struct proxy *proxies_list; /* The list of proxies which reference this stick-table. */ + struct { + const char *file; /* The file where the stick-table is declared. */ + int line; /* The line in this <file> the stick-table is declared. */ + } conf; +}; + +extern struct stktable_data_type stktable_data_types[STKTABLE_DATA_TYPES]; + +/* stick table key */ +struct stktable_key { + void *key; /* pointer on key buffer */ + size_t key_len; /* data len to read in buff in case of null terminated string */ +}; + +/* stick counter. The <entry> member is a composite address (caddr) made of a + * pointer to an stksess struct, and two flags among STKCTR_TRACK_* above. + */ +struct stkctr { + unsigned long entry; /* entry containing counters currently being tracked by this stream */ + struct stktable *table; /* table the counters above belong to (undefined if counters are null) */ +}; + +/* parameters to configure tracked counters */ +struct track_ctr_prm { + struct sample_expr *expr; /* expression used as the key */ + union { + struct stktable *t; /* a pointer to the table */ + char *n; /* or its name during parsing. */ + } table; +}; + +#endif /* _HAPROXY_STICK_TABLE_T_H */ diff --git a/include/haproxy/stick_table.h b/include/haproxy/stick_table.h new file mode 100644 index 0000000..3200437 --- /dev/null +++ b/include/haproxy/stick_table.h @@ -0,0 +1,404 @@ +/* + * include/haproxy/stick_table.h + * Functions for stick tables management. + * + * Copyright (C) 2009-2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr> + * Copyright (C) 2010 Willy Tarreau <w@1wt.eu> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_STICK_TABLE_H +#define _HAPROXY_STICK_TABLE_H + +#include <haproxy/api.h> +#include <haproxy/dict-t.h> +#include <haproxy/errors.h> +#include <haproxy/freq_ctr.h> +#include <haproxy/sample-t.h> +#include <haproxy/stick_table-t.h> +#include <haproxy/ticks.h> + +extern struct stktable *stktables_list; +extern struct pool_head *pool_head_stk_ctr; +extern struct stktable_type stktable_types[]; + +#define stktable_data_size(type) (sizeof(((union stktable_data*)0)->type)) +#define stktable_data_cast(ptr, type) ((union stktable_data*)(ptr))->type + +void stktable_store_name(struct stktable *t); +struct stktable *stktable_find_by_name(const char *name); +struct stksess *stksess_new(struct stktable *t, struct stktable_key *key); +void stksess_setkey(struct stktable *t, struct stksess *ts, struct stktable_key *key); +void stksess_free(struct stktable *t, struct stksess *ts); +int stksess_kill(struct stktable *t, struct stksess *ts, int decrefcount); +int stktable_get_key_shard(struct stktable *t, const void *key, size_t len); + +int stktable_init(struct stktable *t, char **err_msg); +void stktable_deinit(struct stktable *t); +int stktable_parse_type(char **args, int *idx, unsigned long *type, size_t *key_size, const char *file, int linenum); +int parse_stick_table(const char *file, int linenum, char **args, + struct stktable *t, char *id, char *nid, struct peers *peers); +struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key *key); +struct stksess *stktable_set_entry(struct stktable *table, struct stksess *nts); +void stktable_requeue_exp(struct stktable *t, const struct stksess *ts); +void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int decrefcount, int expire, int decrefcnt); +void stktable_touch_remote(struct stktable *t, struct stksess *ts, int decrefcnt); +void stktable_touch_local(struct stktable *t, struct stksess *ts, int decrefccount); +struct stksess *stktable_lookup(struct stktable *t, struct stksess *ts); +struct stksess *stktable_lookup_key(struct stktable *t, struct stktable_key *key); +struct stksess *stktable_update_key(struct stktable *table, struct stktable_key *key); +struct stktable_key *smp_to_stkey(struct sample *smp, struct stktable *t); +struct stktable_key *stktable_fetch_key(struct stktable *t, struct proxy *px, struct session *sess, + struct stream *strm, unsigned int opt, + struct sample_expr *expr, struct sample *smp); +struct stkctr *smp_fetch_sc_stkctr(struct session *sess, struct stream *strm, const struct arg *args, const char *kw, struct stkctr *stkctr); +struct stkctr *smp_create_src_stkctr(struct session *sess, struct stream *strm, const struct arg *args, const char *kw, struct stkctr *stkctr); +int stktable_compatible_sample(struct sample_expr *expr, unsigned long table_type); +int stktable_register_data_store(int idx, const char *name, int std_type, int arg_type); +int stktable_get_data_type(char *name); +int stktable_trash_oldest(struct stktable *t, int to_batch); +int __stksess_kill(struct stktable *t, struct stksess *ts); + +/************************* Composite address manipulation ********************* + * Composite addresses are simply unsigned long data in which the higher bits + * represent a pointer, and the two lower bits are flags. There are several + * places where we just want to associate one or two flags to a pointer (eg, + * to type it), and these functions permit this. The pointer is necessarily a + * 32-bit aligned pointer, as its two lower bits will be cleared and replaced + * with the flags. + *****************************************************************************/ + +/* Masks the two lower bits of a composite address and converts it to a + * pointer. This is used to mix some bits with some aligned pointers to + * structs and to retrieve the original (32-bit aligned) pointer. + */ +static inline void *caddr_to_ptr(unsigned long caddr) +{ + return (void *)(caddr & ~3UL); +} + +/* Only retrieves the two lower bits of a composite address. This is used to mix + * some bits with some aligned pointers to structs and to retrieve the original + * data (2 bits). + */ +static inline unsigned int caddr_to_data(unsigned long caddr) +{ + return (caddr & 3UL); +} + +/* Combines the aligned pointer whose 2 lower bits will be masked with the bits + * from <data> to form a composite address. This is used to mix some bits with + * some aligned pointers to structs and to retrieve the original (32-bit aligned) + * pointer. + */ +static inline unsigned long caddr_from_ptr(void *ptr, unsigned int data) +{ + return (((unsigned long)ptr) & ~3UL) + (data & 3); +} + +/* sets the 2 bits of <data> in the <caddr> composite address */ +static inline unsigned long caddr_set_flags(unsigned long caddr, unsigned int data) +{ + return caddr | (data & 3); +} + +/* clears the 2 bits of <data> in the <caddr> composite address */ +static inline unsigned long caddr_clr_flags(unsigned long caddr, unsigned int data) +{ + return caddr & ~(unsigned long)(data & 3); +} + + +/* return allocation size for standard data type <type> */ +static inline int stktable_type_size(int type) +{ + switch(type) { + case STD_T_SINT: + case STD_T_UINT: + return sizeof(int); + case STD_T_ULL: + return sizeof(unsigned long long); + case STD_T_FRQP: + return sizeof(struct freq_ctr); + case STD_T_DICT: + return sizeof(struct dict_entry *); + } + return 0; +} + +int stktable_alloc_data_type(struct stktable *t, int type, const char *sa, const char *sa2); + +/* return pointer for data type <type> in sticky session <ts> of table <t>, all + * of which must exist (otherwise use stktable_data_ptr() if unsure). + */ +static inline void *__stktable_data_ptr(struct stktable *t, struct stksess *ts, int type) +{ + return (void *)ts + t->data_ofs[type]; +} + +/* return pointer for data type <type> in sticky session <ts> of table <t>, or + * NULL if either <ts> is NULL or the type is not stored. + */ +static inline void *stktable_data_ptr(struct stktable *t, struct stksess *ts, int type) +{ + if (type >= STKTABLE_DATA_TYPES) + return NULL; + + if (!t->data_ofs[type]) /* type not stored */ + return NULL; + + if (!ts) + return NULL; + + return __stktable_data_ptr(t, ts, type); +} + +/* return pointer on the element of index <idx> from the array data type <type> + * in sticky session <ts> of table <t>, or NULL if either <ts> is NULL + * or this element is not stored because this type is not stored or + * requested index is greater than the number of elements of the array. + * Note: this function is also usable on non array types, they are + * considered as array of size 1, so a call with <idx> at 0 + * as the same behavior than 'stktable_data_ptr'. + */ +static inline void *stktable_data_ptr_idx(struct stktable *t, struct stksess *ts, int type, unsigned int idx) +{ + if (type >= STKTABLE_DATA_TYPES) + return NULL; + + if (!t->data_ofs[type]) /* type not stored */ + return NULL; + + if (!ts) + return NULL; + + if (t->data_nbelem[type] <= idx) + return NULL; + + return __stktable_data_ptr(t, ts, type) + idx*stktable_type_size(stktable_data_types[type].std_type); +} + +/* kill an entry if it's expired and its ref_cnt is zero */ +static inline int __stksess_kill_if_expired(struct stktable *t, struct stksess *ts) +{ + if (t->expire != TICK_ETERNITY && tick_is_expired(ts->expire, now_ms)) + return __stksess_kill(t, ts); + + return 0; +} + +static inline void stksess_kill_if_expired(struct stktable *t, struct stksess *ts, int decrefcnt) +{ + + if (decrefcnt && HA_ATOMIC_SUB_FETCH(&ts->ref_cnt, 1) != 0) + return; + + if (t->expire != TICK_ETERNITY && tick_is_expired(ts->expire, now_ms)) { + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); + __stksess_kill_if_expired(t, ts); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + } +} + +/* sets the stick counter's entry pointer */ +static inline void stkctr_set_entry(struct stkctr *stkctr, struct stksess *entry) +{ + stkctr->entry = caddr_from_ptr(entry, 0); +} + +/* returns the entry pointer from a stick counter */ +static inline struct stksess *stkctr_entry(struct stkctr *stkctr) +{ + return caddr_to_ptr(stkctr->entry); +} + +/* returns the two flags from a stick counter */ +static inline unsigned int stkctr_flags(struct stkctr *stkctr) +{ + return caddr_to_data(stkctr->entry); +} + +/* sets up to two flags at a time on a composite address */ +static inline void stkctr_set_flags(struct stkctr *stkctr, unsigned int flags) +{ + stkctr->entry = caddr_set_flags(stkctr->entry, flags); +} + +/* returns the two flags from a stick counter */ +static inline void stkctr_clr_flags(struct stkctr *stkctr, unsigned int flags) +{ + stkctr->entry = caddr_clr_flags(stkctr->entry, flags); +} + +/* Increase the number of cumulated HTTP requests in the tracked counter + * <stkctr>. It returns 0 if the entry pointer does not exist and nothing is + * performed. Otherwise it returns 1. + */ +static inline int stkctr_inc_http_req_ctr(struct stkctr *stkctr) +{ + struct stksess *ts; + void *ptr1, *ptr2; + + ts = stkctr_entry(stkctr); + if (!ts) + return 0; + + HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock); + + ptr1 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_HTTP_REQ_CNT); + if (ptr1) + stktable_data_cast(ptr1, std_t_uint)++; + + ptr2 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_HTTP_REQ_RATE); + if (ptr2) + update_freq_ctr_period(&stktable_data_cast(ptr2, std_t_frqp), + stkctr->table->data_arg[STKTABLE_DT_HTTP_REQ_RATE].u, 1); + + HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock); + + /* If data was modified, we need to touch to re-schedule sync */ + if (ptr1 || ptr2) + stktable_touch_local(stkctr->table, ts, 0); + return 1; +} + +/* Increase the number of cumulated failed HTTP requests in the tracked counter + * <stkctr>. It returns 0 if the entry pointer does not exist and nothing is + * performed. Otherwise it returns 1. + */ +static inline int stkctr_inc_http_err_ctr(struct stkctr *stkctr) +{ + struct stksess *ts; + void *ptr1, *ptr2; + + ts = stkctr_entry(stkctr); + if (!ts) + return 0; + + HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock); + + ptr1 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_HTTP_ERR_CNT); + if (ptr1) + stktable_data_cast(ptr1, std_t_uint)++; + + ptr2 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_HTTP_ERR_RATE); + if (ptr2) + update_freq_ctr_period(&stktable_data_cast(ptr2, std_t_frqp), + stkctr->table->data_arg[STKTABLE_DT_HTTP_ERR_RATE].u, 1); + + HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock); + + /* If data was modified, we need to touch to re-schedule sync */ + if (ptr1 || ptr2) + stktable_touch_local(stkctr->table, ts, 0); + return 1; +} + +/* Increase the number of cumulated failed HTTP responses in the tracked counter + * <stkctr>. It returns 0 if the entry pointer does not exist and nothing is + * performed. Otherwise it returns 1. + */ +static inline int stkctr_inc_http_fail_ctr(struct stkctr *stkctr) +{ + struct stksess *ts; + void *ptr1, *ptr2; + + ts = stkctr_entry(stkctr); + if (!ts) + return 0; + + HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock); + + ptr1 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_HTTP_FAIL_CNT); + if (ptr1) + stktable_data_cast(ptr1, std_t_uint)++; + + ptr2 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_HTTP_FAIL_RATE); + if (ptr2) + update_freq_ctr_period(&stktable_data_cast(ptr2, std_t_frqp), + stkctr->table->data_arg[STKTABLE_DT_HTTP_FAIL_RATE].u, 1); + + HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock); + + /* If data was modified, we need to touch to re-schedule sync */ + if (ptr1 || ptr2) + stktable_touch_local(stkctr->table, ts, 0); + return 1; +} + +/* Increase the number of bytes received in the tracked counter <stkctr>. It + * returns 0 if the entry pointer does not exist and nothing is + * performed. Otherwise it returns 1. + */ +static inline int stkctr_inc_bytes_in_ctr(struct stkctr *stkctr, unsigned long long bytes) +{ + struct stksess *ts; + void *ptr1, *ptr2; + + ts = stkctr_entry(stkctr); + if (!ts) + return 0; + + HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock); + ptr1 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_BYTES_IN_CNT); + if (ptr1) + stktable_data_cast(ptr1, std_t_ull) += bytes; + + ptr2 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_BYTES_IN_RATE); + if (ptr2) + update_freq_ctr_period(&stktable_data_cast(ptr2, std_t_frqp), + stkctr->table->data_arg[STKTABLE_DT_BYTES_IN_RATE].u, bytes); + HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock); + + + /* If data was modified, we need to touch to re-schedule sync */ + if (ptr1 || ptr2) + stktable_touch_local(stkctr->table, ts, 0); + return 1; +} + +/* Increase the number of bytes sent in the tracked counter <stkctr>. It + * returns 0 if the entry pointer does not exist and nothing is + * performed. Otherwise it returns 1. + */ +static inline int stkctr_inc_bytes_out_ctr(struct stkctr *stkctr, unsigned long long bytes) +{ + struct stksess *ts; + void *ptr1, *ptr2; + + ts = stkctr_entry(stkctr); + if (!ts) + return 0; + + HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock); + ptr1 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_BYTES_OUT_CNT); + if (ptr1) + stktable_data_cast(ptr1, std_t_ull) += bytes; + + ptr2 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_BYTES_OUT_RATE); + if (ptr2) + update_freq_ctr_period(&stktable_data_cast(ptr2, std_t_frqp), + stkctr->table->data_arg[STKTABLE_DT_BYTES_OUT_RATE].u, bytes); + HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock); + + + /* If data was modified, we need to touch to re-schedule sync */ + if (ptr1 || ptr2) + stktable_touch_local(stkctr->table, ts, 0); + return 1; +} + +#endif /* _HAPROXY_STICK_TABLE_H */ diff --git a/include/haproxy/stream-t.h b/include/haproxy/stream-t.h new file mode 100644 index 0000000..7e79b96 --- /dev/null +++ b/include/haproxy/stream-t.h @@ -0,0 +1,301 @@ +/* + * include/haproxy/stream-t.h + * This file defines everything related to streams. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_STREAM_T_H +#define _HAPROXY_STREAM_T_H + +#include <sys/time.h> + +#include <haproxy/api-t.h> +#include <haproxy/channel-t.h> +#include <haproxy/stconn-t.h> +#include <haproxy/dynbuf-t.h> +#include <haproxy/filters-t.h> +#include <haproxy/obj_type-t.h> +#include <haproxy/show_flags-t.h> +#include <haproxy/stick_table-t.h> +#include <haproxy/vars-t.h> + + +/* Various Stream Flags, bits values 0x01 to 0x100 (shift 0). + * Please also update the txn_show_flags() function below in case of changes. + */ +#define SF_DIRECT 0x00000001 /* connection made on the server matching the client cookie */ +#define SF_ASSIGNED 0x00000002 /* no need to assign a server to this stream */ +/* unused: 0x00000004 */ +#define SF_BE_ASSIGNED 0x00000008 /* a backend was assigned. Conns are accounted. */ + +#define SF_FORCE_PRST 0x00000010 /* force persistence here, even if server is down */ +#define SF_MONITOR 0x00000020 /* this stream comes from a monitoring system */ +#define SF_CURR_SESS 0x00000040 /* a connection is currently being counted on the server */ +#define SF_CONN_EXP 0x00000080 /* timeout has expired */ +#define SF_REDISP 0x00000100 /* set if this stream was redispatched from one server to another */ +#define SF_IGNORE 0x00000200 /* The stream lead to a mux upgrade, and should be ignored */ +#define SF_REDIRECTABLE 0x00000400 /* set if this stream is redirectable (GET or HEAD) */ +#define SF_HTX 0x00000800 /* set if this stream is an htx stream */ + +/* stream termination conditions, bits values 0x1000 to 0x7000 (0-9 shift 12) */ +#define SF_ERR_NONE 0x00000000 /* normal end of request */ +#define SF_ERR_LOCAL 0x00001000 /* the proxy locally processed this request => not an error */ +#define SF_ERR_CLITO 0x00002000 /* client time-out */ +#define SF_ERR_CLICL 0x00003000 /* client closed (read/write error) */ +#define SF_ERR_SRVTO 0x00004000 /* server time-out, connect time-out */ +#define SF_ERR_SRVCL 0x00005000 /* server closed (connect/read/write error) */ +#define SF_ERR_PRXCOND 0x00006000 /* the proxy decided to close (deny...) */ +#define SF_ERR_RESOURCE 0x00007000 /* the proxy encountered a lack of a local resources (fd, mem, ...) */ +#define SF_ERR_INTERNAL 0x00008000 /* the proxy encountered an internal error */ +#define SF_ERR_DOWN 0x00009000 /* the proxy killed a stream because the backend became unavailable */ +#define SF_ERR_KILLED 0x0000a000 /* the proxy killed a stream because it was asked to do so */ +#define SF_ERR_UP 0x0000b000 /* the proxy killed a stream because a preferred backend became available */ +#define SF_ERR_CHK_PORT 0x0000c000 /* no port could be found for a health check. TODO: check SF_ERR_SHIFT */ +#define SF_ERR_MASK 0x0000f000 /* mask to get only stream error flags */ +#define SF_ERR_SHIFT 12 /* bit shift */ + +/* stream state at termination, bits values 0x10000 to 0x70000 (0-7 shift 16) */ +#define SF_FINST_R 0x00010000 /* stream ended during client request */ +#define SF_FINST_C 0x00020000 /* stream ended during server connect */ +#define SF_FINST_H 0x00030000 /* stream ended during server headers */ +#define SF_FINST_D 0x00040000 /* stream ended during data phase */ +#define SF_FINST_L 0x00050000 /* stream ended while pushing last data to client */ +#define SF_FINST_Q 0x00060000 /* stream ended while waiting in queue for a server slot */ +#define SF_FINST_T 0x00070000 /* stream ended tarpitted */ +#define SF_FINST_MASK 0x00070000 /* mask to get only final stream state flags */ +#define SF_FINST_SHIFT 16 /* bit shift */ + +#define SF_IGNORE_PRST 0x00080000 /* ignore persistence */ + +#define SF_SRV_REUSED 0x00100000 /* the server-side connection was reused */ +#define SF_SRV_REUSED_ANTICIPATED 0x00200000 /* the connection was reused but the mux is not ready yet */ +#define SF_WEBSOCKET 0x00400000 /* websocket stream */ // TODO: must be removed +#define SF_SRC_ADDR 0x00800000 /* get the source ip/port with getsockname */ + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG and __APPEND_ENUM macros. The new end of the buffer is + * returned. + */ +static forceinline char *strm_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) +#define _e(m, e, ...) __APPEND_ENUM(buf, len, delim, flg, m, e, #e, __VA_ARGS__) + /* prologue */ + _(0); + /* flags & enums */ + _(SF_IGNORE_PRST, _(SF_SRV_REUSED, _(SF_SRV_REUSED_ANTICIPATED, + _(SF_WEBSOCKET, _(SF_SRC_ADDR))))); + + _e(SF_FINST_MASK, SF_FINST_R, _e(SF_FINST_MASK, SF_FINST_C, + _e(SF_FINST_MASK, SF_FINST_H, _e(SF_FINST_MASK, SF_FINST_D, + _e(SF_FINST_MASK, SF_FINST_L, _e(SF_FINST_MASK, SF_FINST_Q, + _e(SF_FINST_MASK, SF_FINST_T))))))); + + _e(SF_ERR_MASK, SF_ERR_LOCAL, _e(SF_ERR_MASK, SF_ERR_CLITO, + _e(SF_ERR_MASK, SF_ERR_CLICL, _e(SF_ERR_MASK, SF_ERR_SRVTO, + _e(SF_ERR_MASK, SF_ERR_SRVCL, _e(SF_ERR_MASK, SF_ERR_PRXCOND, + _e(SF_ERR_MASK, SF_ERR_RESOURCE, _e(SF_ERR_MASK, SF_ERR_INTERNAL, + _e(SF_ERR_MASK, SF_ERR_DOWN, _e(SF_ERR_MASK, SF_ERR_KILLED, + _e(SF_ERR_MASK, SF_ERR_UP, _e(SF_ERR_MASK, SF_ERR_CHK_PORT)))))))))))); + + _(SF_DIRECT, _(SF_ASSIGNED, _(SF_BE_ASSIGNED, _(SF_FORCE_PRST, + _(SF_MONITOR, _(SF_CURR_SESS, _(SF_CONN_EXP, _(SF_REDISP, + _(SF_IGNORE, _(SF_REDIRECTABLE, _(SF_HTX))))))))))); + + /* epilogue */ + _(~0U); + return buf; +#undef _e +#undef _ +} + + +/* flags for the proxy of the master CLI */ +/* 0x0001.. to 0x8000 are reserved for ACCESS_* flags from cli-t.h */ + +#define PCLI_F_PROMPT 0x10000 +#define PCLI_F_PAYLOAD 0x20000 +#define PCLI_F_RELOAD 0x40000 /* this is the "reload" stream, quits after displaying reload status */ +#define PCLI_F_TIMED 0x80000 /* the prompt shows the process' uptime */ + + +/* error types reported on the streams for more accurate reporting. + * Please also update the strm_et_show_flags() function below in case of changes. + */ +enum { + STRM_ET_NONE = 0x0000, /* no error yet, leave it to zero */ + STRM_ET_QUEUE_TO = 0x0001, /* queue timeout */ + STRM_ET_QUEUE_ERR = 0x0002, /* queue error (eg: full) */ + STRM_ET_QUEUE_ABRT = 0x0004, /* aborted in queue by external cause */ + STRM_ET_CONN_TO = 0x0008, /* connection timeout */ + STRM_ET_CONN_ERR = 0x0010, /* connection error (eg: no server available) */ + STRM_ET_CONN_ABRT = 0x0020, /* connection aborted by external cause (eg: abort) */ + STRM_ET_CONN_RES = 0x0040, /* connection aborted due to lack of resources */ + STRM_ET_CONN_OTHER = 0x0080, /* connection aborted for other reason (eg: 500) */ + STRM_ET_DATA_TO = 0x0100, /* timeout during data phase */ + STRM_ET_DATA_ERR = 0x0200, /* error during data phase */ + STRM_ET_DATA_ABRT = 0x0400, /* data phase aborted by external cause */ +}; + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *strm_et_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(STRM_ET_QUEUE_TO, _(STRM_ET_QUEUE_ERR, _(STRM_ET_QUEUE_ABRT, + _(STRM_ET_CONN_TO, _(STRM_ET_CONN_ERR, _(STRM_ET_CONN_ABRT, + _(STRM_ET_CONN_RES, _(STRM_ET_CONN_OTHER, _(STRM_ET_DATA_TO, + _(STRM_ET_DATA_ERR, _(STRM_ET_DATA_ABRT))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + +struct hlua; +struct proxy; +struct pendconn; +struct session; +struct server; +struct task; +struct sockaddr_storage; + +/* some external definitions */ +struct strm_logs { + int logwait; /* log fields waiting to be collected : LW_* */ + int level; /* log level to force + 1 if > 0, -1 = no log */ + struct timeval accept_date; /* date of the stream's accept() in user date */ + ullong accept_ts; /* date of the session's accept() in internal date (monotonic) */ + long t_handshake; /* handshake duration, -1 if never occurs */ + long t_idle; /* idle duration, -1 if never occurs */ + ullong request_ts; /* date when the request arrives in internal date */ + long t_queue; /* delay before the stream gets out of the connect queue, -1 if never occurs */ + long t_connect; /* delay before the connect() to the server succeeds, -1 if never occurs */ + long t_data; /* delay before the first data byte from the server ... */ + unsigned long t_close; /* total stream duration */ + unsigned long srv_queue_pos; /* number of streams de-queued while waiting for a connection slot on this server */ + unsigned long prx_queue_pos; /* number of streams de-qeuued while waiting for a connection slot on this instance */ + long long bytes_in; /* number of bytes transferred from the client to the server */ + long long bytes_out; /* number of bytes transferred from the server to the client */ +}; + +struct stream { + enum obj_type obj_type; /* object type == OBJ_TYPE_STREAM */ + enum sc_state prev_conn_state; /* CS_ST*, copy of previous state of the server stream connector */ + + int16_t priority_class; /* priority class of the stream for the pending queue */ + int32_t priority_offset; /* priority offset of the stream for the pending queue */ + + int flags; /* some flags describing the stream */ + unsigned int uniq_id; /* unique ID used for the traces */ + enum obj_type *target; /* target to use for this stream */ + + struct session *sess; /* the session this stream is attached to */ + + struct channel req; /* request channel */ + struct channel res; /* response channel */ + + struct proxy *be; /* the proxy this stream depends on for the server side */ + + struct server *srv_conn; /* stream already has a slot on a server and is not in queue */ + struct pendconn *pend_pos; /* if not NULL, points to the pending position in the pending queue */ + + struct http_txn *txn; /* current HTTP transaction being processed. Should become a list. */ + + struct task *task; /* the task associated with this stream */ + unsigned int pending_events; /* the pending events not yet processed by the stream. + * This is a bit field of TASK_WOKEN_* */ + int conn_retries; /* number of connect retries performed */ + unsigned int conn_exp; /* wake up time for connect, queue, turn-around, ... */ + unsigned int conn_err_type; /* first error detected, one of STRM_ET_* */ + struct list list; /* position in the thread's streams list */ + struct mt_list by_srv; /* position in server stream list */ + struct list back_refs; /* list of users tracking this stream */ + struct buffer_wait buffer_wait; /* position in the list of objects waiting for a buffer */ + + uint64_t lat_time; /* total latency time experienced */ + uint64_t cpu_time; /* total CPU time consumed */ + struct freq_ctr call_rate; /* stream task call rate without making progress */ + + short store_count; + /* 2 unused bytes here */ + + struct { + struct stksess *ts; + struct stktable *table; + } store[8]; /* tracked stickiness values to store */ + + struct stkctr *stkctr; /* content-aware stick counters */ + + struct strm_flt strm_flt; /* current state of filters active on this stream */ + + char **req_cap; /* array of captures from the request (may be NULL) */ + char **res_cap; /* array of captures from the response (may be NULL) */ + struct vars vars_txn; /* list of variables for the txn scope. */ + struct vars vars_reqres; /* list of variables for the request and resp scope. */ + + struct stconn *scf; /* frontend stream connector */ + struct stconn *scb; /* backend stream connector */ + + struct strm_logs logs; /* logs for this stream */ + + void (*do_log)(struct stream *s); /* the function to call in order to log (or NULL) */ + void (*srv_error)(struct stream *s, /* the function to call upon unrecoverable server errors (or NULL) */ + struct stconn *sc); + + int pcli_next_pid; /* next target PID to use for the CLI proxy */ + int pcli_flags; /* flags for CLI proxy */ + char pcli_payload_pat[8]; /* payload pattern for the CLI proxy */ + + struct ist unique_id; /* custom unique ID */ + + /* These two pointers are used to resume the execution of the rule lists. */ + struct list *current_rule_list; /* this is used to store the current executed rule list. */ + void *current_rule; /* this is used to store the current rule to be resumed. */ + int rules_exp; /* expiration date for current rules execution */ + int tunnel_timeout; + const char *last_rule_file; /* last evaluated final rule's file (def: NULL) */ + int last_rule_line; /* last evaluated final rule's line (def: 0) */ + + unsigned int stream_epoch; /* copy of stream_epoch when the stream was created */ + struct hlua *hlua; /* lua runtime context */ + + /* Context */ + struct { + struct resolv_requester *requester; /* owner of the resolution */ + struct act_rule *parent; /* rule which requested this resolution */ + char *hostname_dn; /* hostname being resolve, in domain name format */ + int hostname_dn_len; /* size of hostname_dn */ + /* 4 unused bytes here, recoverable via packing if needed */ + } resolv_ctx; /* context information for DNS resolution */ +}; + +#endif /* _HAPROXY_STREAM_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/stream.h b/include/haproxy/stream.h new file mode 100644 index 0000000..a884007 --- /dev/null +++ b/include/haproxy/stream.h @@ -0,0 +1,404 @@ +/* + * include/haproxy/stream.h + * This file defines everything related to streams. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_STREAM_H +#define _HAPROXY_STREAM_H + +#include <haproxy/action-t.h> +#include <haproxy/api.h> +#include <haproxy/fd.h> +#include <haproxy/freq_ctr.h> +#include <haproxy/obj_type.h> +#include <haproxy/pool-t.h> +#include <haproxy/queue.h> +#include <haproxy/session.h> +#include <haproxy/stconn.h> +#include <haproxy/stick_table.h> +#include <haproxy/stream-t.h> +#include <haproxy/task-t.h> +#include <haproxy/trace-t.h> + +extern struct trace_source trace_strm; + +/* Details about these events are defined in <src/stream.c> */ +#define STRM_EV_STRM_NEW (1ULL << 0) +#define STRM_EV_STRM_FREE (1ULL << 1) +#define STRM_EV_STRM_ERR (1ULL << 2) +#define STRM_EV_STRM_ANA (1ULL << 3) +#define STRM_EV_STRM_PROC (1ULL << 4) +#define STRM_EV_CS_ST (1ULL << 5) +#define STRM_EV_HTTP_ANA (1ULL << 6) +#define STRM_EV_HTTP_ERR (1ULL << 7) +#define STRM_EV_TCP_ANA (1ULL << 8) +#define STRM_EV_TCP_ERR (1ULL << 9) +#define STRM_EV_FLT_ANA (1ULL << 10) +#define STRM_EV_FLT_ERR (1ULL << 11) + +#define IS_HTX_STRM(strm) ((strm)->flags & SF_HTX) + +extern struct pool_head *pool_head_stream; +extern struct pool_head *pool_head_uniqueid; + +extern struct data_cb sess_conn_cb; + +struct stream *stream_new(struct session *sess, struct stconn *sc, struct buffer *input); +void stream_free(struct stream *s); +int stream_upgrade_from_sc(struct stconn *sc, struct buffer *input); +int stream_set_http_mode(struct stream *s, const struct mux_proto_list *mux_proto); + +/* kill a stream and set the termination flags to <why> (one of SF_ERR_*) */ +void stream_shutdown(struct stream *stream, int why); +void stream_dump_and_crash(enum obj_type *obj, int rate); +void strm_dump_to_buffer(struct buffer *buf, const struct stream *strm, const char *pfx, uint32_t anon_key); + +struct ist stream_generate_unique_id(struct stream *strm, struct list *format); + +void stream_process_counters(struct stream *s); +void sess_change_server(struct stream *strm, struct server *newsrv); +struct task *process_stream(struct task *t, void *context, unsigned int state); +void default_srv_error(struct stream *s, struct stconn *sc); + +/* Update the stream's backend and server time stats */ +void stream_update_time_stats(struct stream *s); +void stream_release_buffers(struct stream *s); +int stream_buf_available(void *arg); + +/* returns the session this stream belongs to */ +static inline struct session *strm_sess(const struct stream *strm) +{ + return strm->sess; +} + +/* returns the frontend this stream was initiated from */ +static inline struct proxy *strm_fe(const struct stream *strm) +{ + return strm->sess->fe; +} + +/* returns the listener this stream was initiated from */ +static inline struct listener *strm_li(const struct stream *strm) +{ + return strm->sess->listener; +} + +/* returns a pointer to the origin of the session which created this stream */ +static inline enum obj_type *strm_orig(const struct stream *strm) +{ + return strm->sess->origin; +} + +/* Remove the refcount from the stream to the tracked counters, and clear the + * pointer to ensure this is only performed once. The caller is responsible for + * ensuring that the pointer is valid first. We must be extremely careful not + * to touch the entries we inherited from the session. + */ +static inline void stream_store_counters(struct stream *s) +{ + void *ptr; + int i; + struct stksess *ts; + + if (unlikely(!s->stkctr)) // pool not allocated yet + return; + + for (i = 0; i < global.tune.nb_stk_ctr; i++) { + ts = stkctr_entry(&s->stkctr[i]); + if (!ts) + continue; + + if (stkctr_entry(&s->sess->stkctr[i])) + continue; + + ptr = stktable_data_ptr(s->stkctr[i].table, ts, STKTABLE_DT_CONN_CUR); + if (ptr) { + HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock); + + if (stktable_data_cast(ptr, std_t_uint) > 0) + stktable_data_cast(ptr, std_t_uint)--; + + HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock); + + /* If data was modified, we need to touch to re-schedule sync */ + stktable_touch_local(s->stkctr[i].table, ts, 0); + } + stkctr_set_entry(&s->stkctr[i], NULL); + stksess_kill_if_expired(s->stkctr[i].table, ts, 1); + } +} + +/* Remove the refcount from the stream counters tracked at the content level if + * any, and clear the pointer to ensure this is only performed once. The caller + * is responsible for ensuring that the pointer is valid first. We must be + * extremely careful not to touch the entries we inherited from the session. + */ +static inline void stream_stop_content_counters(struct stream *s) +{ + struct stksess *ts; + void *ptr; + int i; + + if (unlikely(!s->stkctr)) // pool not allocated yet + return; + + for (i = 0; i < global.tune.nb_stk_ctr; i++) { + ts = stkctr_entry(&s->stkctr[i]); + if (!ts) + continue; + + if (stkctr_entry(&s->sess->stkctr[i])) + continue; + + if (!(stkctr_flags(&s->stkctr[i]) & STKCTR_TRACK_CONTENT)) + continue; + + ptr = stktable_data_ptr(s->stkctr[i].table, ts, STKTABLE_DT_CONN_CUR); + if (ptr) { + HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock); + + if (stktable_data_cast(ptr, std_t_uint) > 0) + stktable_data_cast(ptr, std_t_uint)--; + + HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock); + + /* If data was modified, we need to touch to re-schedule sync */ + stktable_touch_local(s->stkctr[i].table, ts, 0); + } + stkctr_set_entry(&s->stkctr[i], NULL); + stksess_kill_if_expired(s->stkctr[i].table, ts, 1); + } +} + +/* Increase total and concurrent connection count for stick entry <ts> of table + * <t>. The caller is responsible for ensuring that <t> and <ts> are valid + * pointers, and for calling this only once per connection. + */ +static inline void stream_start_counters(struct stktable *t, struct stksess *ts) +{ + void *ptr; + + HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock); + + ptr = stktable_data_ptr(t, ts, STKTABLE_DT_CONN_CUR); + if (ptr) + stktable_data_cast(ptr, std_t_uint)++; + + ptr = stktable_data_ptr(t, ts, STKTABLE_DT_CONN_CNT); + if (ptr) + stktable_data_cast(ptr, std_t_uint)++; + + ptr = stktable_data_ptr(t, ts, STKTABLE_DT_CONN_RATE); + if (ptr) + update_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp), + t->data_arg[STKTABLE_DT_CONN_RATE].u, 1); + if (tick_isset(t->expire)) + ts->expire = tick_add(now_ms, MS_TO_TICKS(t->expire)); + + HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock); + + /* If data was modified, we need to touch to re-schedule sync */ + stktable_touch_local(t, ts, 0); +} + +/* Enable tracking of stream counters as <stkctr> on stksess <ts>. The caller is + * responsible for ensuring that <t> and <ts> are valid pointers. Some controls + * are performed to ensure the state can still change. + */ +static inline void stream_track_stkctr(struct stkctr *ctr, struct stktable *t, struct stksess *ts) +{ + /* Why this test ???? */ + if (stkctr_entry(ctr)) + return; + + ctr->table = t; + stkctr_set_entry(ctr, ts); + stream_start_counters(t, ts); +} + +/* Increase the number of cumulated HTTP requests in the tracked counters */ +static inline void stream_inc_http_req_ctr(struct stream *s) +{ + int i; + + if (unlikely(!s->stkctr)) // pool not allocated yet + return; + + for (i = 0; i < global.tune.nb_stk_ctr; i++) { + if (!stkctr_inc_http_req_ctr(&s->stkctr[i])) + stkctr_inc_http_req_ctr(&s->sess->stkctr[i]); + } +} + +/* Increase the number of cumulated HTTP requests in the backend's tracked + * counters. We don't look up the session since it cannot happen in the backend. + */ +static inline void stream_inc_be_http_req_ctr(struct stream *s) +{ + int i; + + if (unlikely(!s->stkctr)) // pool not allocated yet + return; + + for (i = 0; i < global.tune.nb_stk_ctr; i++) { + if (!stkctr_entry(&s->stkctr[i]) || !(stkctr_flags(&s->stkctr[i]) & STKCTR_TRACK_BACKEND)) + continue; + + stkctr_inc_http_req_ctr(&s->stkctr[i]); + } +} + +/* Increase the number of cumulated failed HTTP requests in the tracked + * counters. Only 4xx requests should be counted here so that we can + * distinguish between errors caused by client behaviour and other ones. + * Note that even 404 are interesting because they're generally caused by + * vulnerability scans. + */ +static inline void stream_inc_http_err_ctr(struct stream *s) +{ + int i; + + if (unlikely(!s->stkctr)) // pool not allocated yet + return; + + for (i = 0; i < global.tune.nb_stk_ctr; i++) { + if (!stkctr_inc_http_err_ctr(&s->stkctr[i])) + stkctr_inc_http_err_ctr(&s->sess->stkctr[i]); + } +} + +/* Increase the number of cumulated failed HTTP responses in the tracked + * counters. Only some 5xx responses should be counted here so that we can + * distinguish between server failures and errors triggered by the client + * (i.e. 501 and 505 may be triggered and must be ignored). + */ +static inline void stream_inc_http_fail_ctr(struct stream *s) +{ + int i; + + if (unlikely(!s->stkctr)) // pool not allocated yet + return; + + for (i = 0; i < global.tune.nb_stk_ctr; i++) { + if (!stkctr_inc_http_fail_ctr(&s->stkctr[i])) + stkctr_inc_http_fail_ctr(&s->sess->stkctr[i]); + } +} + +static inline void stream_add_srv_conn(struct stream *strm, struct server *srv) +{ + /* note: this inserts in reverse order but we do not care, it's only + * used for massive kills (i.e. almost never). MT_LIST_INSERT() is a bit + * faster than MT_LIST_APPEND under contention due to a faster recovery + * from a conflict with an adjacent MT_LIST_DELETE, and using it improves + * the performance by about 3% on 32-cores. + */ + MT_LIST_INSERT(&srv->per_thr[tid].streams, &strm->by_srv); + HA_ATOMIC_STORE(&strm->srv_conn, srv); +} + +static inline void stream_del_srv_conn(struct stream *strm) +{ + struct server *srv = strm->srv_conn; + + if (!srv) + return; + + MT_LIST_DELETE(&strm->by_srv); + HA_ATOMIC_STORE(&strm->srv_conn, NULL); +} + +static inline void stream_init_srv_conn(struct stream *strm) +{ + strm->srv_conn = NULL; + MT_LIST_INIT(&strm->by_srv); +} + +static inline void stream_choose_redispatch(struct stream *s) +{ + /* If the "redispatch" option is set on the backend, we are allowed to + * retry on another server. By default this redispatch occurs on the + * last retry, but if configured we allow redispatches to occur on + * configurable intervals, e.g. on every retry. In order to achieve this, + * we must mark the stream unassigned, and eventually clear the DIRECT + * bit to ignore any persistence cookie. We won't count a retry nor a + * redispatch yet, because this will depend on what server is selected. + * If the connection is not persistent, the balancing algorithm is not + * determinist (round robin) and there is more than one active server, + * we accept to perform an immediate redispatch without waiting since + * we don't care about this particular server. + */ + if (objt_server(s->target) && + (s->be->options & PR_O_REDISP) && !(s->flags & SF_FORCE_PRST) && + ((__objt_server(s->target)->cur_state < SRV_ST_RUNNING) || + (((s->be->redispatch_after > 0) && + (s->conn_retries % s->be->redispatch_after == 0)) || + ((s->be->redispatch_after < 0) && + (s->conn_retries % (s->be->conn_retries + 1 + s->be->redispatch_after) == 0))) || + (!(s->flags & SF_DIRECT) && s->be->srv_act > 1 && + ((s->be->lbprm.algo & BE_LB_KIND) != BE_LB_KIND_HI)))) { + sess_change_server(s, NULL); + if (may_dequeue_tasks(objt_server(s->target), s->be)) + process_srv_queue(objt_server(s->target)); + + sockaddr_free(&s->scb->dst); + s->flags &= ~(SF_DIRECT | SF_ASSIGNED); + s->scb->state = SC_ST_REQ; + } else { + if (objt_server(s->target)) + _HA_ATOMIC_INC(&__objt_server(s->target)->counters.retries); + _HA_ATOMIC_INC(&s->be->be_counters.retries); + s->scb->state = SC_ST_ASS; + } + +} + +/* + * This function only has to be called once after a wakeup event in case of + * suspected timeout. It controls the stream connection timeout and sets + * si->flags accordingly. It does NOT close anything, as this timeout may + * be used for any purpose. It returns 1 if the timeout fired, otherwise + * zero. + */ +static inline int stream_check_conn_timeout(struct stream *s) +{ + if (tick_is_expired(s->conn_exp, now_ms)) { + s->flags |= SF_CONN_EXP; + return 1; + } + return 0; +} + +int stream_set_timeout(struct stream *s, enum act_timeout_name name, int timeout); +void stream_retnclose(struct stream *s, const struct buffer *msg); +void sess_set_term_flags(struct stream *s); +void stream_abort(struct stream *s); + +void service_keywords_register(struct action_kw_list *kw_list); +struct action_kw *service_find(const char *kw); +void list_services(FILE *out); + +#endif /* _HAPROXY_STREAM_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/task-t.h b/include/haproxy/task-t.h new file mode 100644 index 0000000..ea52de9 --- /dev/null +++ b/include/haproxy/task-t.h @@ -0,0 +1,182 @@ +/* + * include/haproxy/task-t.h + * Macros, variables and structures for task management. + * + * Copyright (C) 2000-2010 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_TASK_T_H +#define _HAPROXY_TASK_T_H + +#include <sys/time.h> + +#include <import/ebtree-t.h> + +#include <haproxy/api-t.h> +#include <haproxy/show_flags-t.h> +#include <haproxy/thread-t.h> + +/* values for task->state (32 bits). + * Please also update the task_show_state() function below in case of changes. + */ +#define TASK_SLEEPING 0x00000000 /* task sleeping */ +#define TASK_RUNNING 0x00000001 /* the task is currently running */ +/* unused 0x00000002 */ +#define TASK_QUEUED 0x00000004 /* The task has been (re-)added to the run queue */ +/* unused 0x00000008 */ +#define TASK_SELF_WAKING 0x00000010 /* task/tasklet found waking itself */ +#define TASK_KILLED 0x00000020 /* task/tasklet killed, may now be freed */ +#define TASK_IN_LIST 0x00000040 /* tasklet is in a tasklet list */ +#define TASK_HEAVY 0x00000080 /* this task/tasklet is extremely heavy */ + +#define TASK_WOKEN_INIT 0x00000100 /* woken up for initialisation purposes */ +#define TASK_WOKEN_TIMER 0x00000200 /* woken up because of expired timer */ +#define TASK_WOKEN_IO 0x00000400 /* woken up because of completed I/O */ +#define TASK_WOKEN_SIGNAL 0x00000800 /* woken up by a system signal */ +#define TASK_WOKEN_MSG 0x00001000 /* woken up by another task's message */ +#define TASK_WOKEN_RES 0x00002000 /* woken up because of available resource */ +#define TASK_WOKEN_OTHER 0x00004000 /* woken up for an unspecified reason */ + +/* use this to check a task state or to clean it up before queueing */ +#define TASK_WOKEN_ANY (TASK_WOKEN_OTHER|TASK_WOKEN_INIT|TASK_WOKEN_TIMER| \ + TASK_WOKEN_IO|TASK_WOKEN_SIGNAL|TASK_WOKEN_MSG| \ + TASK_WOKEN_RES) + +#define TASK_F_TASKLET 0x00008000 /* nature of this task: 0=task 1=tasklet */ +#define TASK_F_USR1 0x00010000 /* preserved user flag 1, application-specific, def:0 */ +/* unused: 0x20000..0x80000000 */ + +/* These flags are persistent across scheduler calls */ +#define TASK_PERSISTENT (TASK_SELF_WAKING | TASK_KILLED | \ + TASK_HEAVY | TASK_F_TASKLET | TASK_F_USR1) + +/* This function is used to report state in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *task_show_state(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(TASK_RUNNING, _(TASK_QUEUED, _(TASK_SELF_WAKING, + _(TASK_KILLED, _(TASK_IN_LIST, _(TASK_HEAVY, _(TASK_WOKEN_INIT, + _(TASK_WOKEN_TIMER, _(TASK_WOKEN_IO, _(TASK_WOKEN_SIGNAL, + _(TASK_WOKEN_MSG, _(TASK_WOKEN_RES, _(TASK_WOKEN_OTHER, + _(TASK_F_TASKLET, _(TASK_F_USR1))))))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + +/* these wakeup types are used to indicate how a task/tasklet was woken up, for + * debugging purposes. + */ +enum { + WAKEUP_TYPE_UNSET = 0, + WAKEUP_TYPE_TASK_WAKEUP, + WAKEUP_TYPE_TASK_INSTANT_WAKEUP, + WAKEUP_TYPE_TASKLET_WAKEUP, + WAKEUP_TYPE_TASKLET_WAKEUP_AFTER, + WAKEUP_TYPE_TASK_SCHEDULE, + WAKEUP_TYPE_TASK_QUEUE, + WAKEUP_TYPE_APPCTX_WAKEUP, +}; + +struct notification { + struct list purge_me; /* Part of the list of signals to be purged in the + case of the LUA execution stack crash. */ + struct list wake_me; /* Part of list of signals to be targeted if an + event occurs. */ + struct task *task; /* The task to be wake if an event occurs. */ + __decl_thread(HA_SPINLOCK_T lock); +}; + +#ifdef DEBUG_TASK +/* prev_caller keeps a copy of the previous value of the <caller> field. */ +#define TASK_DEBUG_STORAGE \ + struct { \ + const struct ha_caller *prev_caller; \ + } debug +#else +#define TASK_DEBUG_STORAGE +#endif + +/* This part is common between struct task and struct tasklet so that tasks + * can be used as-is as tasklets. + * + * Note that the process() function must ALWAYS return the task/tasklet's + * pointer if the task/tasklet remains valid, and return NULL if it has been + * deleted. The scheduler relies on this to know if it should update its state + * on return. + */ +#define TASK_COMMON \ + struct { \ + unsigned int state; /* task state : bitfield of TASK_ */ \ + int tid; /* tid of task/tasklet. <0 = local for tasklet, unbound for task */ \ + struct task *(*process)(struct task *t, void *ctx, unsigned int state); /* the function which processes the task */ \ + void *context; /* the task's context */ \ + const struct ha_caller *caller; /* call place of last wakeup(); 0 on init, -1 on free */ \ + uint32_t wake_date; /* date of the last task wakeup */ \ + unsigned int calls; /* number of times process was called */ \ + TASK_DEBUG_STORAGE; \ + } + +/* The base for all tasks */ +struct task { + TASK_COMMON; /* must be at the beginning! */ + struct eb32_node rq; /* ebtree node used to hold the task in the run queue */ + /* WARNING: the struct task is often aliased as a struct tasklet when + * it is NOT in the run queue. The tasklet has its struct list here + * where rq starts and this works because both are exclusive. Never + * ever reorder these fields without taking this into account! + */ + struct eb32_node wq; /* ebtree node used to hold the task in the wait queue */ + int expire; /* next expiration date for this task, in ticks */ + short nice; /* task prio from -1024 to +1024 */ + /* 16-bit hole here */ +}; + +/* lightweight tasks, without priority, mainly used for I/Os */ +struct tasklet { + TASK_COMMON; /* must be at the beginning! */ + struct list list; + /* WARNING: the struct task is often aliased as a struct tasklet when + * it is not in the run queue. The task has its struct rq here where + * list starts and this works because both are exclusive. Never ever + * reorder these fields without taking this into account! + */ +}; + +/* + * The task callback (->process) is responsible for updating ->expire. It must + * return a pointer to the task itself, except if the task has been deleted, in + * which case it returns NULL so that the scheduler knows it must not check the + * expire timer. The scheduler will requeue the task at the proper location. + */ + + +#endif /* _HAPROXY_TASK_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/task.h b/include/haproxy/task.h new file mode 100644 index 0000000..1c9c45f --- /dev/null +++ b/include/haproxy/task.h @@ -0,0 +1,857 @@ +/* + * include/haproxy/task.h + * Functions for task management. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_TASK_H +#define _HAPROXY_TASK_H + + +#include <sys/time.h> + +#include <import/eb32tree.h> + +#include <haproxy/activity.h> +#include <haproxy/api.h> +#include <haproxy/clock.h> +#include <haproxy/fd.h> +#include <haproxy/global.h> +#include <haproxy/intops.h> +#include <haproxy/list.h> +#include <haproxy/pool.h> +#include <haproxy/task-t.h> +#include <haproxy/thread.h> +#include <haproxy/ticks.h> + + +/* Principle of the wait queue. + * + * We want to be able to tell whether an expiration date is before of after the + * current time <now>. We KNOW that expiration dates are never too far apart, + * because they are measured in ticks (milliseconds). We also know that almost + * all dates will be in the future, and that a very small part of them will be + * in the past, they are the ones which have expired since last time we checked + * them. Using ticks, we know if a date is in the future or in the past, but we + * cannot use that to store sorted information because that reference changes + * all the time. + * + * We'll use the fact that the time wraps to sort timers. Timers above <now> + * are in the future, timers below <now> are in the past. Here, "above" and + * "below" are to be considered modulo 2^31. + * + * Timers are stored sorted in an ebtree. We use the new ability for ebtrees to + * lookup values starting from X to only expire tasks between <now> - 2^31 and + * <now>. If the end of the tree is reached while walking over it, we simply + * loop back to the beginning. That way, we have no problem keeping sorted + * wrapping timers in a tree, between (now - 24 days) and (now + 24 days). The + * keys in the tree always reflect their real position, none can be infinite. + * This reduces the number of checks to be performed. + * + * Another nice optimisation is to allow a timer to stay at an old place in the + * queue as long as it's not further than the real expiration date. That way, + * we use the tree as a place holder for a minorant of the real expiration + * date. Since we have a very low chance of hitting a timeout anyway, we can + * bounce the nodes to their right place when we scan the tree if we encounter + * a misplaced node once in a while. This even allows us not to remove the + * infinite timers from the wait queue. + * + * So, to summarize, we have : + * - node->key always defines current position in the wait queue + * - timer is the real expiration date (possibly infinite) + * - node->key is always before or equal to timer + * + * The run queue works similarly to the wait queue except that the current date + * is replaced by an insertion counter which can also wrap without any problem. + */ + +/* The farthest we can look back in a timer tree */ +#define TIMER_LOOK_BACK (1U << 31) + +/* tasklets are recognized with nice==-32768 */ +#define TASK_IS_TASKLET(t) ((t)->state & TASK_F_TASKLET) + +/* a few exported variables */ +extern struct pool_head *pool_head_task; +extern struct pool_head *pool_head_tasklet; +extern struct pool_head *pool_head_notification; + +__decl_thread(extern HA_RWLOCK_T wq_lock THREAD_ALIGNED(64)); + +void __tasklet_wakeup_on(struct tasklet *tl, int thr); +struct list *__tasklet_wakeup_after(struct list *head, struct tasklet *tl); +void task_kill(struct task *t); +void tasklet_kill(struct tasklet *t); +void __task_wakeup(struct task *t); +void __task_queue(struct task *task, struct eb_root *wq); + +unsigned int run_tasks_from_lists(unsigned int budgets[]); + +/* + * This does 3 things : + * - wake up all expired tasks + * - call all runnable tasks + * - return the date of next event in <next> or eternity. + */ + +void process_runnable_tasks(void); + +/* + * Extract all expired timers from the timer queue, and wakes up all + * associated tasks. + */ +void wake_expired_tasks(void); + +/* Checks the next timer for the current thread by looking into its own timer + * list and the global one. It may return TICK_ETERNITY if no timer is present. + * Note that the next timer might very well be slightly in the past. + */ +int next_timer_expiry(void); + +/* + * Delete every tasks before running the master polling loop + */ +void mworker_cleantasks(void); + +/* returns the number of running tasks+tasklets on the whole process. Note + * that this *is* racy since a task may move from the global to a local + * queue for example and be counted twice. This is only for statistics + * reporting. + */ +static inline int total_run_queues() +{ + int thr, ret = 0; + + for (thr = 0; thr < global.nbthread; thr++) + ret += _HA_ATOMIC_LOAD(&ha_thread_ctx[thr].rq_total); + return ret; +} + +/* returns the number of allocated tasks across all threads. Note that this + * *is* racy since some threads might be updating their counts while we're + * looking, but this is only for statistics reporting. + */ +static inline int total_allocated_tasks() +{ + int thr, ret; + + for (thr = ret = 0; thr < global.nbthread; thr++) + ret += _HA_ATOMIC_LOAD(&ha_thread_ctx[thr].nb_tasks); + return ret; +} + +/* returns the number of running niced tasks+tasklets on the whole process. + * Note that this *is* racy since a task may move from the global to a local + * queue for example and be counted twice. This is only for statistics + * reporting. + */ +static inline int total_niced_running_tasks() +{ + int tgrp, ret = 0; + + for (tgrp = 0; tgrp < global.nbtgroups; tgrp++) + ret += _HA_ATOMIC_LOAD(&ha_tgroup_ctx[tgrp].niced_tasks); + return ret; +} + +/* return 0 if task is in run queue, otherwise non-zero */ +static inline int task_in_rq(struct task *t) +{ + /* Check if leaf_p is NULL, in case he's not in the runqueue, and if + * it's not 0x1, which would mean it's in the tasklet list. + */ + return t->rq.node.leaf_p != NULL; +} + +/* return 0 if task is in wait queue, otherwise non-zero */ +static inline int task_in_wq(struct task *t) +{ + return t->wq.node.leaf_p != NULL; +} + +/* returns true if the current thread has some work to do */ +static inline int thread_has_tasks(void) +{ + return ((int)!eb_is_empty(&th_ctx->rqueue) | + (int)!eb_is_empty(&th_ctx->rqueue_shared) | + (int)!!th_ctx->tl_class_mask | + (int)!MT_LIST_ISEMPTY(&th_ctx->shared_tasklet_list)); +} + +/* puts the task <t> in run queue with reason flags <f>, and returns <t> */ +/* This will put the task in the local runqueue if the task is only runnable + * by the current thread, in the global runqueue otherwies. With DEBUG_TASK, + * the <file>:<line> from the call place are stored into the task for tracing + * purposes. + */ +#define task_wakeup(t, f) \ + _task_wakeup(t, f, MK_CALLER(WAKEUP_TYPE_TASK_WAKEUP, 0, 0)) + +static inline void _task_wakeup(struct task *t, unsigned int f, const struct ha_caller *caller) +{ + unsigned int state; + + state = _HA_ATOMIC_OR_FETCH(&t->state, f); + while (!(state & (TASK_RUNNING | TASK_QUEUED))) { + if (_HA_ATOMIC_CAS(&t->state, &state, state | TASK_QUEUED)) { + if (likely(caller)) { + caller = HA_ATOMIC_XCHG(&t->caller, caller); + BUG_ON((ulong)caller & 1); +#ifdef DEBUG_TASK + HA_ATOMIC_STORE(&t->debug.prev_caller, caller); +#endif + } + __task_wakeup(t); + break; + } + } +} + +/* Atomically drop the TASK_RUNNING bit while ensuring that any wakeup that + * happened since the flag was set will result in the task being queued (if + * it wasn't already). This is used to safely drop the flag from within the + * scheduler. The flag <f> is combined with existing flags before the test so + * that it's possible to unconditionally wakeup the task and drop the RUNNING + * flag if needed. + */ +static inline void task_drop_running(struct task *t, unsigned int f) +{ + unsigned int state, new_state; + + state = _HA_ATOMIC_LOAD(&t->state); + + while (1) { + new_state = state | f; + if (new_state & TASK_WOKEN_ANY) + new_state |= TASK_QUEUED; + + if (_HA_ATOMIC_CAS(&t->state, &state, new_state & ~TASK_RUNNING)) + break; + __ha_cpu_relax(); + } + + if ((new_state & ~state) & TASK_QUEUED) + __task_wakeup(t); +} + +/* + * Unlink the task from the wait queue, and possibly update the last_timer + * pointer. A pointer to the task itself is returned. The task *must* already + * be in the wait queue before calling this function. If unsure, use the safer + * task_unlink_wq() function. + */ +static inline struct task *__task_unlink_wq(struct task *t) +{ + eb32_delete(&t->wq); + return t; +} + +/* remove a task from its wait queue. It may either be the local wait queue if + * the task is bound to a single thread or the global queue. If the task uses a + * shared wait queue, the global wait queue lock is used. + */ +static inline struct task *task_unlink_wq(struct task *t) +{ + unsigned long locked; + + if (likely(task_in_wq(t))) { + locked = t->tid < 0; + BUG_ON(t->tid >= 0 && t->tid != tid && !(global.mode & MODE_STOPPING)); + if (locked) + HA_RWLOCK_WRLOCK(TASK_WQ_LOCK, &wq_lock); + __task_unlink_wq(t); + if (locked) + HA_RWLOCK_WRUNLOCK(TASK_WQ_LOCK, &wq_lock); + } + return t; +} + +/* Place <task> into the wait queue, where it may already be. If the expiration + * timer is infinite, do nothing and rely on wake_expired_task to clean up. + * If the task uses a shared wait queue, it's queued into the global wait queue, + * protected by the global wq_lock, otherwise by it necessarily belongs to the + * current thread'sand is queued without locking. + */ +#define task_queue(t) \ + _task_queue(t, MK_CALLER(WAKEUP_TYPE_TASK_QUEUE, 0, 0)) + +static inline void _task_queue(struct task *task, const struct ha_caller *caller) +{ + /* If we already have a place in the wait queue no later than the + * timeout we're trying to set, we'll stay there, because it is very + * unlikely that we will reach the timeout anyway. If the timeout + * has been disabled, it's useless to leave the queue as well. We'll + * rely on wake_expired_tasks() to catch the node and move it to the + * proper place should it ever happen. Finally we only add the task + * to the queue if it was not there or if it was further than what + * we want. + */ + if (!tick_isset(task->expire)) + return; + +#ifdef USE_THREAD + if (task->tid < 0) { + HA_RWLOCK_WRLOCK(TASK_WQ_LOCK, &wq_lock); + if (!task_in_wq(task) || tick_is_lt(task->expire, task->wq.key)) { + if (likely(caller)) { + caller = HA_ATOMIC_XCHG(&task->caller, caller); + BUG_ON((ulong)caller & 1); +#ifdef DEBUG_TASK + HA_ATOMIC_STORE(&task->debug.prev_caller, caller); +#endif + } + __task_queue(task, &tg_ctx->timers); + } + HA_RWLOCK_WRUNLOCK(TASK_WQ_LOCK, &wq_lock); + } else +#endif + { + BUG_ON(task->tid != tid); + if (!task_in_wq(task) || tick_is_lt(task->expire, task->wq.key)) { + if (likely(caller)) { + caller = HA_ATOMIC_XCHG(&task->caller, caller); + BUG_ON((ulong)caller & 1); +#ifdef DEBUG_TASK + HA_ATOMIC_STORE(&task->debug.prev_caller, caller); +#endif + } + __task_queue(task, &th_ctx->timers); + } + } +} + +/* Change the thread affinity of a task to <thr>, which may either be a valid + * thread number from 0 to nbthread-1, or a negative value to allow the task + * to run on any thread. + * + * This may only be done from within the running task itself or during its + * initialization. It will unqueue and requeue the task from the wait queue + * if it was in it. This is safe against a concurrent task_queue() call because + * task_queue() itself will unlink again if needed after taking into account + * the new thread_mask. + */ +static inline void task_set_thread(struct task *t, int thr) +{ +#ifndef USE_THREAD + /* no shared queue without threads */ + thr = 0; +#endif + if (unlikely(task_in_wq(t))) { + task_unlink_wq(t); + t->tid = thr; + task_queue(t); + } + else { + t->tid = thr; + } +} + +/* schedules tasklet <tl> to run onto thread <thr> or the current thread if + * <thr> is negative. Note that it is illegal to wakeup a foreign tasklet if + * its tid is negative and it is illegal to self-assign a tasklet that was + * at least once scheduled on a specific thread. With DEBUG_TASK, the + * <file>:<line> from the call place are stored into the tasklet for tracing + * purposes. + */ +#define tasklet_wakeup_on(tl, thr) \ + _tasklet_wakeup_on(tl, thr, MK_CALLER(WAKEUP_TYPE_TASKLET_WAKEUP, 0, 0)) + +static inline void _tasklet_wakeup_on(struct tasklet *tl, int thr, const struct ha_caller *caller) +{ + unsigned int state = tl->state; + + do { + /* do nothing if someone else already added it */ + if (state & TASK_IN_LIST) + return; + } while (!_HA_ATOMIC_CAS(&tl->state, &state, state | TASK_IN_LIST)); + + /* at this point we're the first ones to add this task to the list */ + if (likely(caller)) { + caller = HA_ATOMIC_XCHG(&tl->caller, caller); + BUG_ON((ulong)caller & 1); +#ifdef DEBUG_TASK + HA_ATOMIC_STORE(&tl->debug.prev_caller, caller); +#endif + } + + if (_HA_ATOMIC_LOAD(&th_ctx->flags) & TH_FL_TASK_PROFILING) + tl->wake_date = now_mono_time(); + __tasklet_wakeup_on(tl, thr); +} + +/* schedules tasklet <tl> to run onto the thread designated by tl->tid, which + * is either its owner thread if >= 0 or the current thread if < 0. When + * DEBUG_TASK is set, the <file>:<line> from the call place are stored into the + * task for tracing purposes. + */ +#define tasklet_wakeup(tl) \ + _tasklet_wakeup_on(tl, (tl)->tid, MK_CALLER(WAKEUP_TYPE_TASKLET_WAKEUP, 0, 0)) + +/* instantly wakes up task <t> on its owner thread even if it's not the current + * one, bypassing the run queue. The purpose is to be able to avoid contention + * in the global run queue for massively remote tasks (e.g. queue) when there's + * no value in passing the task again through the priority ordering since it has + * already been subject to it once (e.g. before entering process_stream). The + * task goes directly into the shared mt_list as a tasklet and will run as + * TL_URGENT. Great care is taken to be certain it's not queued nor running + * already. + */ +#define task_instant_wakeup(t, f) \ + _task_instant_wakeup(t, f, MK_CALLER(WAKEUP_TYPE_TASK_INSTANT_WAKEUP, 0, 0)) + +static inline void _task_instant_wakeup(struct task *t, unsigned int f, const struct ha_caller *caller) +{ + int thr = t->tid; + unsigned int state; + + if (thr < 0) + thr = tid; + + /* first, let's update the task's state with the wakeup condition */ + state = _HA_ATOMIC_OR_FETCH(&t->state, f); + + /* next we need to make sure the task was not/will not be added to the + * run queue because the tasklet list's mt_list uses the same storage + * as the task's run_queue. + */ + do { + /* do nothing if someone else already added it */ + if (state & (TASK_QUEUED|TASK_RUNNING)) + return; + } while (!_HA_ATOMIC_CAS(&t->state, &state, state | TASK_QUEUED)); + + BUG_ON_HOT(task_in_rq(t)); + + /* at this point we're the first ones to add this task to the list */ + if (likely(caller)) { + caller = HA_ATOMIC_XCHG(&t->caller, caller); + BUG_ON((ulong)caller & 1); +#ifdef DEBUG_TASK + HA_ATOMIC_STORE(&t->debug.prev_caller, caller); +#endif + } + + if (_HA_ATOMIC_LOAD(&th_ctx->flags) & TH_FL_TASK_PROFILING) + t->wake_date = now_mono_time(); + __tasklet_wakeup_on((struct tasklet *)t, thr); +} + +/* schedules tasklet <tl> to run immediately after the current one is done + * <tl> will be queued after entry <head>, or at the head of the task list. Return + * the new head to be used to queue future tasks. This is used to insert multiple entries + * at the head of the tasklet list, typically to transfer processing from a tasklet + * to another one or a set of other ones. If <head> is NULL, the tasklet list of <thr> + * thread will be used. + * With DEBUG_TASK, the <file>:<line> from the call place are stored into the tasklet + * for tracing purposes. + */ +#define tasklet_wakeup_after(head, tl) \ + _tasklet_wakeup_after(head, tl, MK_CALLER(WAKEUP_TYPE_TASKLET_WAKEUP_AFTER, 0, 0)) + +static inline struct list *_tasklet_wakeup_after(struct list *head, struct tasklet *tl, + const struct ha_caller *caller) +{ + unsigned int state = tl->state; + + do { + /* do nothing if someone else already added it */ + if (state & TASK_IN_LIST) + return head; + } while (!_HA_ATOMIC_CAS(&tl->state, &state, state | TASK_IN_LIST)); + + /* at this point we're the first one to add this task to the list */ + if (likely(caller)) { + caller = HA_ATOMIC_XCHG(&tl->caller, caller); + BUG_ON((ulong)caller & 1); +#ifdef DEBUG_TASK + HA_ATOMIC_STORE(&tl->debug.prev_caller, caller); +#endif + } + + if (th_ctx->flags & TH_FL_TASK_PROFILING) + tl->wake_date = now_mono_time(); + return __tasklet_wakeup_after(head, tl); +} + +/* This macro shows the current function name and the last known caller of the + * task (or tasklet) wakeup. + */ +#ifdef DEBUG_TASK +#define DEBUG_TASK_PRINT_CALLER(t) do { \ + const struct ha_caller *__caller = (t)->caller; \ + printf("%s woken up from %s(%s:%d)\n", __FUNCTION__, \ + __caller ? __caller->func : NULL, \ + __caller ? __caller->file : NULL, \ + __caller ? __caller->line : 0); \ +} while (0) +#else +#define DEBUG_TASK_PRINT_CALLER(t) do { } while (0) +#endif + + +/* Try to remove a tasklet from the list. This call is inherently racy and may + * only be performed on the thread that was supposed to dequeue this tasklet. + * This way it is safe to call MT_LIST_DELETE without first removing the + * TASK_IN_LIST bit, which must absolutely be removed afterwards in case + * another thread would want to wake this tasklet up in parallel. + */ +static inline void tasklet_remove_from_tasklet_list(struct tasklet *t) +{ + if (MT_LIST_DELETE(list_to_mt_list(&t->list))) { + _HA_ATOMIC_AND(&t->state, ~TASK_IN_LIST); + _HA_ATOMIC_DEC(&ha_thread_ctx[t->tid >= 0 ? t->tid : tid].rq_total); + } +} + +/* + * Initialize a new task. The bare minimum is performed (queue pointers and + * state). The task is returned. This function should not be used outside of + * task_new(). If the thread ID is < 0, the task may run on any thread. + */ +static inline struct task *task_init(struct task *t, int tid) +{ + t->wq.node.leaf_p = NULL; + t->rq.node.leaf_p = NULL; + t->state = TASK_SLEEPING; +#ifndef USE_THREAD + /* no shared wq without threads */ + tid = 0; +#endif + t->tid = tid; + t->nice = 0; + t->calls = 0; + t->wake_date = 0; + t->expire = TICK_ETERNITY; + t->caller = NULL; + return t; +} + +/* Initialize a new tasklet. It's identified as a tasklet by its flags + * TASK_F_TASKLET. It is expected to run on the calling thread by default, + * it's up to the caller to change ->tid if it wants to own it. + */ +static inline void tasklet_init(struct tasklet *t) +{ + t->calls = 0; + t->state = TASK_F_TASKLET; + t->process = NULL; + t->tid = -1; + t->wake_date = 0; + t->caller = NULL; + LIST_INIT(&t->list); +} + +/* Allocate and initialize a new tasklet, local to the thread by default. The + * caller may assign its tid if it wants to own the tasklet. + */ +static inline struct tasklet *tasklet_new(void) +{ + struct tasklet *t = pool_alloc(pool_head_tasklet); + + if (t) { + tasklet_init(t); + } + return t; +} + +/* + * Allocate and initialize a new task, to run on global thread <thr>, or any + * thread if negative. The task count is incremented. The new task is returned, + * or NULL in case of lack of memory. It's up to the caller to pass a valid + * thread number (in tid space, 0 to nbthread-1, or <0 for any). Tasks created + * this way must be freed using task_destroy(). + */ +static inline struct task *task_new_on(int thr) +{ + struct task *t = pool_alloc(pool_head_task); + if (t) { + th_ctx->nb_tasks++; + task_init(t, thr); + } + return t; +} + +/* Allocate and initialize a new task, to run on the calling thread. The new + * task is returned, or NULL in case of lack of memory. The task count is + * incremented. + */ +static inline struct task *task_new_here() +{ + return task_new_on(tid); +} + +/* Allocate and initialize a new task, to run on any thread. The new task is + * returned, or NULL in case of lack of memory. The task count is incremented. + */ +static inline struct task *task_new_anywhere() +{ + return task_new_on(-1); +} + +/* + * Free a task. Its context must have been freed since it will be lost. The + * task count is decremented. It it is the current task, this one is reset. + */ +static inline void __task_free(struct task *t) +{ + if (t == th_ctx->current) { + th_ctx->current = NULL; + __ha_barrier_store(); + } + BUG_ON(task_in_wq(t) || task_in_rq(t)); + + BUG_ON((ulong)t->caller & 1); +#ifdef DEBUG_TASK + HA_ATOMIC_STORE(&t->debug.prev_caller, HA_ATOMIC_LOAD(&t->caller)); +#endif + HA_ATOMIC_STORE(&t->caller, (void*)1); // make sure to crash if used after free + + pool_free(pool_head_task, t); + th_ctx->nb_tasks--; + if (unlikely(stopping)) + pool_flush(pool_head_task); +} + +/* Destroys a task : it's unlinked from the wait queues and is freed if it's + * the current task or not queued otherwise it's marked to be freed by the + * scheduler. It does nothing if <t> is NULL. + */ +static inline void task_destroy(struct task *t) +{ + if (!t) + return; + + task_unlink_wq(t); + /* We don't have to explicitly remove from the run queue. + * If we are in the runqueue, the test below will set t->process + * to NULL, and the task will be free'd when it'll be its turn + * to run. + */ + + /* There's no need to protect t->state with a lock, as the task + * has to run on the current thread. + */ + if (t == th_ctx->current || !(t->state & (TASK_QUEUED | TASK_RUNNING))) + __task_free(t); + else + t->process = NULL; +} + +/* Should only be called by the thread responsible for the tasklet */ +static inline void tasklet_free(struct tasklet *tl) +{ + if (!tl) + return; + + if (MT_LIST_DELETE(list_to_mt_list(&tl->list))) + _HA_ATOMIC_DEC(&ha_thread_ctx[tl->tid >= 0 ? tl->tid : tid].rq_total); + + BUG_ON((ulong)tl->caller & 1); +#ifdef DEBUG_TASK + HA_ATOMIC_STORE(&tl->debug.prev_caller, HA_ATOMIC_LOAD(&tl->caller)); +#endif + HA_ATOMIC_STORE(&tl->caller, (void*)1); // make sure to crash if used after free + pool_free(pool_head_tasklet, tl); + if (unlikely(stopping)) + pool_flush(pool_head_tasklet); +} + +static inline void tasklet_set_tid(struct tasklet *tl, int tid) +{ + tl->tid = tid; +} + +/* Ensure <task> will be woken up at most at <when>. If the task is already in + * the run queue (but not running), nothing is done. It may be used that way + * with a delay : task_schedule(task, tick_add(now_ms, delay)); + * It MUST NOT be used with a timer in the past, and even less with + * TICK_ETERNITY (which would block all timers). Note that passing it directly + * now_ms without using tick_add() will definitely make this happen once every + * 49.7 days. + */ +#define task_schedule(t, w) \ + _task_schedule(t, w, MK_CALLER(WAKEUP_TYPE_TASK_SCHEDULE, 0, 0)) + +static inline void _task_schedule(struct task *task, int when, const struct ha_caller *caller) +{ + /* TODO: mthread, check if there is no tisk with this test */ + if (task_in_rq(task)) + return; + +#ifdef USE_THREAD + if (task->tid < 0) { + /* FIXME: is it really needed to lock the WQ during the check ? */ + HA_RWLOCK_WRLOCK(TASK_WQ_LOCK, &wq_lock); + if (task_in_wq(task)) + when = tick_first(when, task->expire); + + task->expire = when; + if (!task_in_wq(task) || tick_is_lt(task->expire, task->wq.key)) { + if (likely(caller)) { + caller = HA_ATOMIC_XCHG(&task->caller, caller); + BUG_ON((ulong)caller & 1); +#ifdef DEBUG_TASK + HA_ATOMIC_STORE(&task->debug.prev_caller, caller); +#endif + } + __task_queue(task, &tg_ctx->timers); + } + HA_RWLOCK_WRUNLOCK(TASK_WQ_LOCK, &wq_lock); + } else +#endif + { + BUG_ON(task->tid != tid); + if (task_in_wq(task)) + when = tick_first(when, task->expire); + + task->expire = when; + if (!task_in_wq(task) || tick_is_lt(task->expire, task->wq.key)) { + if (likely(caller)) { + caller = HA_ATOMIC_XCHG(&task->caller, caller); + BUG_ON((ulong)caller & 1); +#ifdef DEBUG_TASK + HA_ATOMIC_STORE(&task->debug.prev_caller, caller); +#endif + } + __task_queue(task, &th_ctx->timers); + } + } +} + +/* returns the string corresponding to a task type as found in the task caller + * locations. + */ +static inline const char *task_wakeup_type_str(uint t) +{ + switch (t) { + case WAKEUP_TYPE_TASK_WAKEUP : return "task_wakeup"; + case WAKEUP_TYPE_TASK_INSTANT_WAKEUP : return "task_instant_wakeup"; + case WAKEUP_TYPE_TASKLET_WAKEUP : return "tasklet_wakeup"; + case WAKEUP_TYPE_TASKLET_WAKEUP_AFTER : return "tasklet_wakeup_after"; + case WAKEUP_TYPE_TASK_QUEUE : return "task_queue"; + case WAKEUP_TYPE_TASK_SCHEDULE : return "task_schedule"; + case WAKEUP_TYPE_APPCTX_WAKEUP : return "appctx_wakeup"; + default : return "?"; + } +} + +/* This function register a new signal. "lua" is the current lua + * execution context. It contains a pointer to the associated task. + * "link" is a list head attached to an other task that must be wake + * the lua task if an event occurs. This is useful with external + * events like TCP I/O or sleep functions. This function allocate + * memory for the signal. + */ +static inline struct notification *notification_new(struct list *purge, struct list *event, struct task *wakeup) +{ + struct notification *com = pool_alloc(pool_head_notification); + if (!com) + return NULL; + LIST_APPEND(purge, &com->purge_me); + LIST_APPEND(event, &com->wake_me); + HA_SPIN_INIT(&com->lock); + com->task = wakeup; + return com; +} + +/* This function purge all the pending signals when the LUA execution + * is finished. This prevent than a coprocess try to wake a deleted + * task. This function remove the memory associated to the signal. + * The purge list is not locked because it is owned by only one + * process. before browsing this list, the caller must ensure to be + * the only one browser. + */ +static inline void notification_purge(struct list *purge) +{ + struct notification *com, *back; + + /* Delete all pending communication signals. */ + list_for_each_entry_safe(com, back, purge, purge_me) { + HA_SPIN_LOCK(NOTIF_LOCK, &com->lock); + LIST_DELETE(&com->purge_me); + if (!com->task) { + HA_SPIN_UNLOCK(NOTIF_LOCK, &com->lock); + pool_free(pool_head_notification, com); + continue; + } + com->task = NULL; + HA_SPIN_UNLOCK(NOTIF_LOCK, &com->lock); + } +} + +/* In some cases, the disconnected notifications must be cleared. + * This function just release memory blocks. The purge list is not + * locked because it is owned by only one process. Before browsing + * this list, the caller must ensure to be the only one browser. + * The "com" is not locked because when com->task is NULL, the + * notification is no longer used. + */ +static inline void notification_gc(struct list *purge) +{ + struct notification *com, *back; + + /* Delete all pending communication signals. */ + list_for_each_entry_safe (com, back, purge, purge_me) { + if (com->task) + continue; + LIST_DELETE(&com->purge_me); + pool_free(pool_head_notification, com); + } +} + +/* This function sends signals. It wakes all the tasks attached + * to a list head, and remove the signal, and free the used + * memory. The wake list is not locked because it is owned by + * only one process. before browsing this list, the caller must + * ensure to be the only one browser. + */ +static inline void notification_wake(struct list *wake) +{ + struct notification *com, *back; + + /* Wake task and delete all pending communication signals. */ + list_for_each_entry_safe(com, back, wake, wake_me) { + HA_SPIN_LOCK(NOTIF_LOCK, &com->lock); + LIST_DELETE(&com->wake_me); + if (!com->task) { + HA_SPIN_UNLOCK(NOTIF_LOCK, &com->lock); + pool_free(pool_head_notification, com); + continue; + } + task_wakeup(com->task, TASK_WOKEN_MSG); + com->task = NULL; + HA_SPIN_UNLOCK(NOTIF_LOCK, &com->lock); + } +} + +/* This function returns true is some notification are pending + */ +static inline int notification_registered(struct list *wake) +{ + return !LIST_ISEMPTY(wake); +} + +#endif /* _HAPROXY_TASK_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/tcp_rules.h b/include/haproxy/tcp_rules.h new file mode 100644 index 0000000..2ed515e --- /dev/null +++ b/include/haproxy/tcp_rules.h @@ -0,0 +1,52 @@ +/* + * include/haproxy/tcp_rules.h + * This file contains "tcp" rules definitions + * + * Copyright (C) 2000-2016 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_TCP_RULES_H +#define _HAPROXY_TCP_RULES_H + +#include <haproxy/action-t.h> +#include <haproxy/api.h> +#include <haproxy/session-t.h> +#include <haproxy/stream-t.h> + +int tcp_inspect_request(struct stream *s, struct channel *req, int an_bit); +int tcp_inspect_response(struct stream *s, struct channel *rep, int an_bit); +int tcp_exec_l4_rules(struct session *sess); +int tcp_exec_l5_rules(struct session *sess); + +void tcp_req_conn_keywords_register(struct action_kw_list *kw_list); +void tcp_req_sess_keywords_register(struct action_kw_list *kw_list); +void tcp_req_cont_keywords_register(struct action_kw_list *kw_list); +void tcp_res_cont_keywords_register(struct action_kw_list *kw_list); + +struct action_kw *tcp_req_conn_action(const char *kw); +struct action_kw *tcp_req_sess_action(const char *kw); +struct action_kw *tcp_req_cont_action(const char *kw); +struct action_kw *tcp_res_cont_action(const char *kw); + +#endif /* _HAPROXY_TCP_RULES_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/tcpcheck-t.h b/include/haproxy/tcpcheck-t.h new file mode 100644 index 0000000..8878995 --- /dev/null +++ b/include/haproxy/tcpcheck-t.h @@ -0,0 +1,242 @@ +/* + * include/haproxy/tcpcheck-t.h + * TCP check definitions, enums, macros and bitfields. + * + * Copyright 2000-2009,2020 Willy Tarreau <w@1wt.eu> + * Copyright 2007-2010 Krzysztof Piotr Oledzki <ole@ans.pl> + * Copyright 2013 Baptiste Assmann <bedis9@gmail.com> + * Copyright 2020 Gaetan Rivet <grive@u256.net> + * Copyright 2020 Christopher Faulet <cfaulet@haproxy.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_TCPCHECK_T_H +#define _HAPROXY_TCPCHECK_T_H + +#include <import/ebtree-t.h> +#include <import/ist.h> +#include <haproxy/api-t.h> +#include <haproxy/buf-t.h> +#include <haproxy/check-t.h> +#include <haproxy/connection-t.h> +#include <haproxy/obj_type-t.h> +#include <haproxy/vars-t.h> + +/* options for tcp-check connect */ +#define TCPCHK_OPT_NONE 0x0000 /* no options specified, default */ +#define TCPCHK_OPT_SEND_PROXY 0x0001 /* send proxy-protocol string */ +#define TCPCHK_OPT_SSL 0x0002 /* SSL connection */ +#define TCPCHK_OPT_LINGER 0x0004 /* Do not RST connection, let it linger */ +#define TCPCHK_OPT_DEFAULT_CONNECT 0x0008 /* Do a connect using server params */ +#define TCPCHK_OPT_IMPLICIT 0x0010 /* Implicit connect */ +#define TCPCHK_OPT_SOCKS4 0x0020 /* check the connection via socks4 proxy */ +#define TCPCHK_OPT_HAS_DATA 0x0040 /* data should be sent after connection */ + +enum tcpcheck_send_type { + TCPCHK_SEND_UNDEF = 0, /* Send is not parsed. */ + TCPCHK_SEND_STRING, /* Send an ASCII string. */ + TCPCHK_SEND_BINARY, /* Send a binary sequence. */ + TCPCHK_SEND_STRING_LF, /* Send an ASCII log-format string. */ + TCPCHK_SEND_BINARY_LF, /* Send a binary log-format sequence. */ + TCPCHK_SEND_HTTP, /* Send an HTTP request */ +}; + +/* flags for tcp-check send */ +#define TCPCHK_SND_HTTP_FL_URI_FMT 0x0001 /* Use a log-format string for the uri */ +#define TCPCHK_SND_HTTP_FL_BODY_FMT 0x0002 /* Use a log-format string for the body */ +#define TCPCHK_SND_HTTP_FROM_OPT 0x0004 /* Send rule coming from "option httpck" directive */ + +enum tcpcheck_eval_ret { + TCPCHK_EVAL_WAIT = 0, + TCPCHK_EVAL_STOP, + TCPCHK_EVAL_CONTINUE, +}; + +enum tcpcheck_expect_type { + TCPCHK_EXPECT_UNDEF = 0, /* Match is not used. */ + TCPCHK_EXPECT_STRING, /* Matches a string. */ + TCPCHK_EXPECT_STRING_REGEX, /* Matches a regular pattern. */ + TCPCHK_EXPECT_STRING_LF, /* Matches a log-format string. */ + TCPCHK_EXPECT_BINARY, /* Matches a binary sequence on a hex-encoded text. */ + TCPCHK_EXPECT_BINARY_REGEX, /* Matches a regular pattern on a hex-encoded text. */ + TCPCHK_EXPECT_BINARY_LF, /* Matches a log-format binary sequence on a hex-encoded text. */ + TCPCHK_EXPECT_CUSTOM, /* Execute a custom function. */ + TCPCHK_EXPECT_HTTP_STATUS, /* Matches a list of codes on the HTTP status */ + TCPCHK_EXPECT_HTTP_STATUS_REGEX, /* Matches a regular pattern on the HTTP status */ + TCPCHK_EXPECT_HTTP_HEADER, /* Matches on HTTP headers */ + TCPCHK_EXPECT_HTTP_BODY, /* Matches a string oa the HTTP payload */ + TCPCHK_EXPECT_HTTP_BODY_REGEX, /* Matches a regular pattern on a HTTP payload */ + TCPCHK_EXPECT_HTTP_BODY_LF, /* Matches a log-format string on the HTTP payload */ +}; + +/* tcp-check expect flags */ +#define TCPCHK_EXPT_FL_INV 0x0001 /* Matching is inversed */ +#define TCPCHK_EXPT_FL_HTTP_HNAME_STR 0x0002 /* Exact match on the HTTP header name */ +#define TCPCHK_EXPT_FL_HTTP_HNAME_BEG 0x0004 /* Prefix match on the HTTP header name */ +#define TCPCHK_EXPT_FL_HTTP_HNAME_END 0x0008 /* Suffix match on the HTTP header name */ +#define TCPCHK_EXPT_FL_HTTP_HNAME_SUB 0x0010 /* Substring match on the HTTP header name */ +#define TCPCHK_EXPT_FL_HTTP_HNAME_REG 0x0020 /* Regex match on the HTTP header name */ +#define TCPCHK_EXPT_FL_HTTP_HNAME_FMT 0x0040 /* The HTTP header name is a log-format string */ +#define TCPCHK_EXPT_FL_HTTP_HVAL_NONE 0x0080 /* No match on the HTTP header value */ +#define TCPCHK_EXPT_FL_HTTP_HVAL_STR 0x0100 /* Exact match on the HTTP header value */ +#define TCPCHK_EXPT_FL_HTTP_HVAL_BEG 0x0200 /* Prefix match on the HTTP header value */ +#define TCPCHK_EXPT_FL_HTTP_HVAL_END 0x0400 /* Suffix match on the HTTP header value */ +#define TCPCHK_EXPT_FL_HTTP_HVAL_SUB 0x0800 /* Substring match on the HTTP header value */ +#define TCPCHK_EXPT_FL_HTTP_HVAL_REG 0x1000 /* Regex match on the HTTP header value*/ +#define TCPCHK_EXPT_FL_HTTP_HVAL_FMT 0x2000 /* The HTTP header value is a log-format string */ +#define TCPCHK_EXPT_FL_HTTP_HVAL_FULL 0x4000 /* Match the full header value ( no stop on commas ) */ + +#define TCPCHK_EXPT_FL_HTTP_HNAME_TYPE 0x003E /* Mask to get matching method on header name */ +#define TCPCHK_EXPT_FL_HTTP_HVAL_TYPE 0x1F00 /* Mask to get matching method on header value */ + +/* possible actions for tcpcheck_rule->action */ +enum tcpcheck_rule_type { + TCPCHK_ACT_SEND = 0, /* send action, regular string format */ + TCPCHK_ACT_EXPECT, /* expect action, either regular or binary string */ + TCPCHK_ACT_CONNECT, /* connect action, to probe a new port */ + TCPCHK_ACT_COMMENT, /* no action, simply a comment used for logs */ + TCPCHK_ACT_ACTION_KW, /* custom registered action_kw rule. */ +}; + +#define TCPCHK_RULES_NONE 0x00000000 +#define TCPCHK_RULES_UNUSED_TCP_RS 0x00000001 /* An unused tcp-check ruleset exists */ +#define TCPCHK_RULES_UNUSED_HTTP_RS 0x00000002 /* An unused http-check ruleset exists */ +#define TCPCHK_RULES_UNUSED_RS 0x00000003 /* Mask for unused ruleset */ + +#define TCPCHK_RULES_PGSQL_CHK 0x00000010 +#define TCPCHK_RULES_REDIS_CHK 0x00000020 +#define TCPCHK_RULES_SMTP_CHK 0x00000030 +#define TCPCHK_RULES_HTTP_CHK 0x00000040 +#define TCPCHK_RULES_MYSQL_CHK 0x00000050 +#define TCPCHK_RULES_LDAP_CHK 0x00000060 +#define TCPCHK_RULES_SSL3_CHK 0x00000070 +#define TCPCHK_RULES_AGENT_CHK 0x00000080 +#define TCPCHK_RULES_SPOP_CHK 0x00000090 +/* Unused 0x000000A0..0x00000FF0 (reserved for future proto) */ +#define TCPCHK_RULES_TCP_CHK 0x00000FF0 +#define TCPCHK_RULES_PROTO_CHK 0x00000FF0 /* Mask to cover protocol check */ + +struct check; +struct tcpcheck_connect { + char *sni; /* server name to use for SSL connections */ + char *alpn; /* ALPN to use for the SSL connection */ + int alpn_len; /* ALPN string length */ + const struct mux_proto_list *mux_proto; /* the mux to use for all outgoing connections (specified by the "proto" keyword) */ + uint16_t options; /* options when setting up a new connection */ + uint16_t port; /* port to connect to */ + struct sample_expr *port_expr; /* sample expr to determine the port, may be NULL */ + struct sockaddr_storage addr; /* the address to the connect */ +}; + +struct tcpcheck_http_hdr { + struct ist name; /* the header name */ + struct list value; /* the log-format string value */ + struct list list; /* header chained list */ +}; + +struct tcpcheck_codes { + unsigned int (*codes)[2]; /* an array of roange of codes: [0]=min [1]=max */ + size_t num; /* number of entry in the array */ +}; + +struct tcpcheck_send { + enum tcpcheck_send_type type; + union { + struct ist data; /* an ASCII string or a binary sequence */ + struct list fmt; /* an ASCII or hexa log-format string */ + struct { + unsigned int flags; /* TCPCHK_SND_HTTP_FL_* */ + struct http_meth meth; /* the HTTP request method */ + union { + struct ist uri; /* the HTTP request uri is a string */ + struct list uri_fmt; /* or a log-format string */ + }; + struct ist vsn; /* the HTTP request version string */ + struct list hdrs; /* the HTTP request header list */ + union { + struct ist body; /* the HTTP request payload is a string */ + struct list body_fmt; /* or a log-format string */ + }; + } http; /* Info about the HTTP request to send */ + }; +}; + +struct tcpcheck_expect { + enum tcpcheck_expect_type type; /* Type of pattern used for matching. */ + unsigned int flags; /* TCPCHK_EXPT_FL_* */ + union { + struct ist data; /* Matching a literal string / binary anywhere in the response. */ + struct my_regex *regex; /* Matching a regex pattern. */ + struct tcpcheck_codes codes; /* Matching a list of codes */ + struct list fmt; /* Matching a log-format string / binary */ + struct { + union { + struct ist name; + struct list name_fmt; + struct my_regex *name_re; + }; + union { + struct ist value; + struct list value_fmt; + struct my_regex *value_re; + }; + } hdr; /* Matching a header pattern */ + + + /* custom function to eval expect rule */ + enum tcpcheck_eval_ret (*custom)(struct check *, struct tcpcheck_rule *, int); + }; + struct tcpcheck_rule *head; /* first expect of a chain. */ + int min_recv; /* Minimum amount of data before an expect can be applied. (default: -1, ignored) */ + enum healthcheck_status ok_status; /* The healthcheck status to use on success (default: L7OKD) */ + enum healthcheck_status err_status; /* The healthcheck status to use on error (default: L7RSP) */ + enum healthcheck_status tout_status; /* The healthcheck status to use on timeout (default: L7TOUT) */ + struct list onerror_fmt; /* log-format string to use as comment on error */ + struct list onsuccess_fmt; /* log-format string to use as comment on success (if last rule) */ + struct sample_expr *status_expr; /* sample expr to determine the check status code */ +}; + +struct tcpcheck_action_kw { + struct act_rule *rule; +}; + +struct tcpcheck_rule { + struct list list; /* list linked to from the proxy */ + enum tcpcheck_rule_type action; /* type of the rule. */ + int index; /* Index within the list. Starts at 0. */ + char *comment; /* comment to be used in the logs and on the stats socket */ + union { + struct tcpcheck_connect connect; /* Connect rule. */ + struct tcpcheck_send send; /* Send rule. */ + struct tcpcheck_expect expect; /* Expected pattern. */ + struct tcpcheck_action_kw action_kw; /* Custom action. */ + }; +}; + +/* A list of tcp-check vars, to be registered before executing a ruleset */ +struct tcpcheck_var { + struct ist name; /* the variable name with the scope */ + struct sample_data data; /* the data associated to the variable */ + struct list list; /* element to chain tcp-check vars */ +}; + +/* a list of tcp-check rules */ +struct tcpcheck_rules { + unsigned int flags; /* flags applied to the rules */ + struct list *list; /* the list of tcpcheck_rules */ + struct list preset_vars; /* The list of variable to preset before executing the ruleset */ +}; + +/* A list of tcp-check rules with a name */ +struct tcpcheck_ruleset { + struct list rules; /* the list of tcpcheck_rule */ + struct ebpt_node node; /* node in the shared tree */ +}; + + +#endif /* _HAPROXY_CHECKS_T_H */ diff --git a/include/haproxy/tcpcheck.h b/include/haproxy/tcpcheck.h new file mode 100644 index 0000000..3abd1ef --- /dev/null +++ b/include/haproxy/tcpcheck.h @@ -0,0 +1,125 @@ +/* + * include/haproxy/tcpcheck.h + * Functions prototypes for the TCP checks. + * + * Copyright 2000-2009,2020 Willy Tarreau <w@1wt.eu> + * Copyright 2007-2010 Krzysztof Piotr Oledzki <ole@ans.pl> + * Copyright 2013 Baptiste Assmann <bedis9@gmail.com> + * Copyright 2020 Gaetan Rivet <grive@u256.net> + * Copyright 2020 Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_TCPCHECK_H +#define _HAPROXY_TCPCHECK_H + +#include <haproxy/action.h> +#include <haproxy/check-t.h> +#include <haproxy/pool-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/tcpcheck-t.h> + +extern struct action_kw_list tcp_check_keywords; +extern struct pool_head *pool_head_tcpcheck_rule; + +int tcpcheck_get_step_id(const struct check *check, const struct tcpcheck_rule *rule); +struct tcpcheck_rule *get_first_tcpcheck_rule(const struct tcpcheck_rules *rules); + +struct tcpcheck_ruleset *create_tcpcheck_ruleset(const char *name); +struct tcpcheck_ruleset *find_tcpcheck_ruleset(const char *name); +void free_tcpcheck_ruleset(struct tcpcheck_ruleset *rs); + +void free_tcpcheck(struct tcpcheck_rule *rule, int in_pool); +void deinit_proxy_tcpcheck(struct proxy *px); + +struct tcpcheck_var *create_tcpcheck_var(const struct ist name); +void free_tcpcheck_var(struct tcpcheck_var *var); +int dup_tcpcheck_vars(struct list *dst, const struct list *src); +void free_tcpcheck_vars(struct list *vars); + +int add_tcpcheck_expect_str(struct tcpcheck_rules *rules, const char *str); +int add_tcpcheck_send_strs(struct tcpcheck_rules *rules, const char * const *strs); +int tcpcheck_add_http_rule(struct tcpcheck_rule *chk, struct tcpcheck_rules *rules, char **errmsg); + +void free_tcpcheck_http_hdr(struct tcpcheck_http_hdr *hdr); + +enum tcpcheck_eval_ret tcpcheck_mysql_expect_iniths(struct check *check, struct tcpcheck_rule *rule, int last_read); +enum tcpcheck_eval_ret tcpcheck_mysql_expect_ok(struct check *check, struct tcpcheck_rule *rule, int last_read); +enum tcpcheck_eval_ret tcpcheck_ldap_expect_bindrsp(struct check *check, struct tcpcheck_rule *rule, int last_read); +enum tcpcheck_eval_ret tcpcheck_spop_expect_agenthello(struct check *check, struct tcpcheck_rule *rule, int last_read); +enum tcpcheck_eval_ret tcpcheck_agent_expect_reply(struct check *check, struct tcpcheck_rule *rule, int last_read); +enum tcpcheck_eval_ret tcpcheck_eval_connect(struct check *check, struct tcpcheck_rule *rule); +enum tcpcheck_eval_ret tcpcheck_eval_send(struct check *check, struct tcpcheck_rule *rule); +enum tcpcheck_eval_ret tcpcheck_eval_recv(struct check *check, struct tcpcheck_rule *rule); +enum tcpcheck_eval_ret tcpcheck_eval_expect_http(struct check *check, struct tcpcheck_rule *rule, int last_read); +enum tcpcheck_eval_ret tcpcheck_eval_expect(struct check *check, struct tcpcheck_rule *rule, int last_read); +enum tcpcheck_eval_ret tcpcheck_eval_action_kw(struct check *check, struct tcpcheck_rule *rule); +int tcpcheck_main(struct check *check); +struct tcpcheck_rule *parse_tcpcheck_action(char **args, int cur_arg, struct proxy *px, + struct list *rules, struct action_kw *kw, + const char *file, int line, char **errmsg); +struct tcpcheck_rule *parse_tcpcheck_connect(char **args, int cur_arg, struct proxy *px, struct list *rules, + const char *file, int line, char **errmsg); +struct tcpcheck_rule *parse_tcpcheck_send(char **args, int cur_arg, struct proxy *px, struct list *rules, + const char *file, int line, char **errmsg); +struct tcpcheck_rule *parse_tcpcheck_send_http(char **args, int cur_arg, struct proxy *px, struct list *rules, + const char *file, int line, char **errmsg); +struct tcpcheck_rule *parse_tcpcheck_comment(char **args, int cur_arg, struct proxy *px, struct list *rules, + const char *file, int line, char **errmsg); +struct tcpcheck_rule *parse_tcpcheck_expect(char **args, int cur_arg, struct proxy *px, + struct list *rules, unsigned int proto, + const char *file, int line, char **errmsg); + +int proxy_parse_tcp_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx, + const char *file, int line); +int proxy_parse_redis_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx, + const char *file, int line); +int proxy_parse_ssl_hello_chk_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx, + const char *file, int line); +int proxy_parse_smtpchk_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx, + const char *file, int line); +int proxy_parse_pgsql_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx, + const char *file, int line); +int proxy_parse_mysql_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx, + const char *file, int line); +int proxy_parse_ldap_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx, + const char *file, int line); +int proxy_parse_spop_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx, + const char *file, int line); +int proxy_parse_httpchk_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx, + const char *file, int line); + +void tcp_check_keywords_register(struct action_kw_list *kw_list); + +/* Return the struct action_kw associated to a keyword */ +static inline struct action_kw *action_kw_tcp_check_lookup(const char *kw) +{ + return action_lookup(&tcp_check_keywords.list, kw); +} + +static inline void action_kw_tcp_check_build_list(struct buffer *chk) +{ + action_build_list(&tcp_check_keywords.list, chk); +} + +#endif /* _HAPROXY_TCPCHECK_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/thread-t.h b/include/haproxy/thread-t.h new file mode 100644 index 0000000..f3552c2 --- /dev/null +++ b/include/haproxy/thread-t.h @@ -0,0 +1,165 @@ +/* + * include/haproxy/thread-t.h + * Definitions and types for thread support. + * + * Copyright (C) 2017 Christopher Faulet - cfaulet@haproxy.com + * Copyright (C) 2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_THREAD_T_H +#define _HAPROXY_THREAD_T_H + +#include <haproxy/defaults.h> + +/* Note: this file mainly contains 3 sections: + * - one used solely when USE_THREAD is *not* set + * - one used solely when USE_THREAD is set + * - a common one. + */ + +#ifndef USE_THREAD + +/********************** THREADS DISABLED ************************/ + +/* These macros allow to make some struct fields or local variables optional */ +#define __decl_spinlock(lock) +#define __decl_aligned_spinlock(lock) +#define __decl_rwlock(lock) +#define __decl_aligned_rwlock(lock) + +#elif !defined(DEBUG_THREAD) && !defined(DEBUG_FULL) + +/************** THREADS ENABLED WITHOUT DEBUGGING **************/ + +/* declare a self-initializing spinlock */ +#define __decl_spinlock(lock) \ + HA_SPINLOCK_T (lock) = 0; + +/* declare a self-initializing spinlock, aligned on a cache line */ +#define __decl_aligned_spinlock(lock) \ + HA_SPINLOCK_T (lock) __attribute__((aligned(64))) = 0; + +/* declare a self-initializing rwlock */ +#define __decl_rwlock(lock) \ + HA_RWLOCK_T (lock) = 0; + +/* declare a self-initializing rwlock, aligned on a cache line */ +#define __decl_aligned_rwlock(lock) \ + HA_RWLOCK_T (lock) __attribute__((aligned(64))) = 0; + +#else /* !USE_THREAD */ + +/**************** THREADS ENABLED WITH DEBUGGING ***************/ + +/* declare a self-initializing spinlock */ +#define __decl_spinlock(lock) \ + HA_SPINLOCK_T (lock); \ + INITCALL1(STG_LOCK, ha_spin_init, &(lock)) + +/* declare a self-initializing spinlock, aligned on a cache line */ +#define __decl_aligned_spinlock(lock) \ + HA_SPINLOCK_T (lock) __attribute__((aligned(64))); \ + INITCALL1(STG_LOCK, ha_spin_init, &(lock)) + +/* declare a self-initializing rwlock */ +#define __decl_rwlock(lock) \ + HA_RWLOCK_T (lock); \ + INITCALL1(STG_LOCK, ha_rwlock_init, &(lock)) + +/* declare a self-initializing rwlock, aligned on a cache line */ +#define __decl_aligned_rwlock(lock) \ + HA_RWLOCK_T (lock) __attribute__((aligned(64))); \ + INITCALL1(STG_LOCK, ha_rwlock_init, &(lock)) + +#endif /* USE_THREAD */ + + +/*** Common parts below ***/ + +/* storage types used by spinlocks and RW locks */ +#define __HA_SPINLOCK_T unsigned long +#define __HA_RWLOCK_T unsigned long + + +/* When thread debugging is enabled, we remap HA_SPINLOCK_T and HA_RWLOCK_T to + * complex structures which embed debugging info. + */ +#if !defined(DEBUG_THREAD) && !defined(DEBUG_FULL) + +#define HA_SPINLOCK_T __HA_SPINLOCK_T +#define HA_RWLOCK_T __HA_RWLOCK_T + +#else /* !DEBUG_THREAD */ + +#define HA_SPINLOCK_T struct ha_spinlock +#define HA_RWLOCK_T struct ha_rwlock + +/* Debugging information that is only used when thread debugging is enabled */ + +struct lock_stat { + uint64_t nsec_wait_for_write; + uint64_t nsec_wait_for_read; + uint64_t nsec_wait_for_seek; + uint64_t num_write_locked; + uint64_t num_write_unlocked; + uint64_t num_read_locked; + uint64_t num_read_unlocked; + uint64_t num_seek_locked; + uint64_t num_seek_unlocked; +}; + +struct ha_spinlock_state { + unsigned long owner; /* a bit is set to 1 << tid for the lock owner */ + unsigned long waiters; /* a bit is set to 1 << tid for waiting threads */ +}; + +struct ha_rwlock_state { + unsigned long cur_writer; /* a bit is set to 1 << tid for the lock owner */ + unsigned long wait_writers; /* a bit is set to 1 << tid for waiting writers */ + unsigned long cur_readers; /* a bit is set to 1 << tid for current readers */ + unsigned long wait_readers; /* a bit is set to 1 << tid for waiting waiters */ + unsigned long cur_seeker; /* a bit is set to 1 << tid for the lock seekers */ + unsigned long wait_seekers; /* a bit is set to 1 << tid for waiting seekers */ +}; + +struct ha_spinlock { + __HA_SPINLOCK_T lock; + struct { + struct ha_spinlock_state st[MAX_TGROUPS]; + struct { + const char *function; + const char *file; + int line; + } last_location; /* location of the last owner */ + } info; +}; + +struct ha_rwlock { + __HA_RWLOCK_T lock; + struct { + struct ha_rwlock_state st[MAX_TGROUPS]; + struct { + const char *function; + const char *file; + int line; + } last_location; /* location of the last write owner */ + } info; +}; + +#endif /* DEBUG_THREAD */ + +#endif /* _HAPROXY_THREAD_T_H */ diff --git a/include/haproxy/thread.h b/include/haproxy/thread.h new file mode 100644 index 0000000..8c7520b --- /dev/null +++ b/include/haproxy/thread.h @@ -0,0 +1,489 @@ +/* + * include/haproxy/thread.h + * definitions, macros and inline functions used by threads. + * + * Copyright (C) 2017 Christopher Faulet - cfaulet@haproxy.com + * Copyright (C) 2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_THREAD_H +#define _HAPROXY_THREAD_H + +#include <haproxy/api.h> +#include <haproxy/thread-t.h> +#include <haproxy/tinfo.h> + + +/* Note: this file mainly contains 5 sections: + * - a small common part, which also corresponds to the common API + * - one used solely when USE_THREAD is *not* set + * - one used solely when USE_THREAD is set + * - one used solely when USE_THREAD is set WITHOUT debugging + * - one used solely when USE_THREAD is set WITH debugging + * + */ + + +/* Generic exports */ +int parse_nbthread(const char *arg, char **err); +void ha_tkill(unsigned int thr, int sig); +void ha_tkillall(int sig); +void ha_thread_relax(void); +int thread_detect_binding_discrepancies(void); +int thread_detect_more_than_cpus(void); +int thread_map_to_groups(); +int thread_resolve_group_mask(struct thread_set *ts, int defgrp, char **err); +int parse_thread_set(const char *arg, struct thread_set *ts, char **err); +extern int thread_cpus_enabled_at_boot; + + +#ifndef USE_THREAD + +/********************** THREADS DISABLED ************************/ + +/* Only way found to replace variables with constants that are optimized away + * at build time. + */ +enum { all_tgroups_mask = 1UL }; +enum { tid_bit = 1UL }; +enum { tid = 0 }; +enum { tgid = 1 }; + +#define HA_SPIN_INIT(l) do { /* do nothing */ } while(0) +#define HA_SPIN_DESTROY(l) do { /* do nothing */ } while(0) +#define HA_SPIN_LOCK(lbl, l) do { /* do nothing */ } while(0) +#define HA_SPIN_TRYLOCK(lbl, l) ({ 0; }) +#define HA_SPIN_UNLOCK(lbl, l) do { /* do nothing */ } while(0) + +#define HA_RWLOCK_INIT(l) do { /* do nothing */ } while(0) +#define HA_RWLOCK_DESTROY(l) do { /* do nothing */ } while(0) +#define HA_RWLOCK_WRLOCK(lbl, l) do { /* do nothing */ } while(0) +#define HA_RWLOCK_TRYWRLOCK(lbl, l) ({ 0; }) +#define HA_RWLOCK_WRUNLOCK(lbl, l) do { /* do nothing */ } while(0) +#define HA_RWLOCK_RDLOCK(lbl, l) do { /* do nothing */ } while(0) +#define HA_RWLOCK_TRYRDLOCK(lbl, l) ({ 0; }) +#define HA_RWLOCK_RDUNLOCK(lbl, l) do { /* do nothing */ } while(0) + +#define HA_RWLOCK_SKLOCK(lbl,l) do { /* do nothing */ } while(0) +#define HA_RWLOCK_SKTOWR(lbl,l) do { /* do nothing */ } while(0) +#define HA_RWLOCK_WRTOSK(lbl,l) do { /* do nothing */ } while(0) +#define HA_RWLOCK_SKTORD(lbl,l) do { /* do nothing */ } while(0) +#define HA_RWLOCK_WRTORD(lbl,l) do { /* do nothing */ } while(0) +#define HA_RWLOCK_SKUNLOCK(lbl,l) do { /* do nothing */ } while(0) +#define HA_RWLOCK_TRYSKLOCK(lbl,l) ({ 0; }) +#define HA_RWLOCK_TRYRDTOSK(lbl,l) ({ 0; }) + +#define ha_sigmask(how, set, oldset) sigprocmask(how, set, oldset) + +/* Sets the current thread to a valid one described by <thr>, or to any thread + * and any group if NULL (e.g. for use during boot where they're not totally + * initialized). + */ +static inline void ha_set_thread(const struct thread_info *thr) +{ + if (thr) { + ti = thr; + tg = ti->tg; + th_ctx = &ha_thread_ctx[ti->tid]; + } else { + ti = &ha_thread_info[0]; + tg = &ha_tgroup_info[0]; + th_ctx = &ha_thread_ctx[0]; + } +} + +static inline void thread_idle_now() +{ +} + +static inline void thread_idle_end() +{ +} + +static inline void thread_harmless_now() +{ +} + +static inline int is_thread_harmless() +{ + return 1; +} + +static inline void thread_harmless_end() +{ +} + +static inline void thread_harmless_end_sig() +{ +} + +static inline void thread_isolate() +{ +} + +static inline void thread_isolate_full() +{ +} + +static inline void thread_release() +{ +} + +static inline unsigned long thread_isolated() +{ + return 1; +} + +static inline void setup_extra_threads(void *(*handler)(void *)) +{ +} + +static inline void wait_for_threads_completion() +{ +} + +static inline void set_thread_cpu_affinity() +{ +} + +static inline unsigned long long ha_get_pthread_id(unsigned int thr) +{ + return 0; +} + +#else /* !USE_THREAD */ + +/********************** THREADS ENABLED ************************/ + +#define PLOCK_LORW_INLINE_WAIT +#include <import/plock.h> + +void thread_harmless_till_end(void); +void thread_isolate(void); +void thread_isolate_full(void); +void thread_release(void); +void ha_spin_init(HA_SPINLOCK_T *l); +void ha_rwlock_init(HA_RWLOCK_T *l); +void setup_extra_threads(void *(*handler)(void *)); +void wait_for_threads_completion(); +void set_thread_cpu_affinity(); +unsigned long long ha_get_pthread_id(unsigned int thr); + +extern volatile unsigned long all_tgroups_mask; +extern volatile unsigned int rdv_requests; +extern volatile unsigned int isolated_thread; +extern THREAD_LOCAL unsigned int tid; /* The thread id */ +extern THREAD_LOCAL unsigned int tgid; /* The thread group id (starts at 1) */ + +#define ha_sigmask(how, set, oldset) pthread_sigmask(how, set, oldset) + +/* Sets the current thread to a valid one described by <thr>, or to any thread + * and any group if NULL (e.g. for use during boot where they're not totally + * initialized). + */ +static inline void ha_set_thread(const struct thread_info *thr) +{ + if (thr) { + BUG_ON(!thr->ltid_bit); + BUG_ON(!thr->tg); + BUG_ON(!thr->tgid); + + ti = thr; + tg = thr->tg; + tid = thr->tid; + tgid = thr->tgid; + th_ctx = &ha_thread_ctx[tid]; + tg_ctx = &ha_tgroup_ctx[tgid-1]; + } else { + tgid = 1; + tid = 0; + ti = &ha_thread_info[0]; + tg = &ha_tgroup_info[0]; + th_ctx = &ha_thread_ctx[0]; + tg_ctx = &ha_tgroup_ctx[0]; + } +} + +/* Marks the thread as idle, which means that not only it's not doing anything + * dangerous, but in addition it has not started anything sensitive either. + * This essentially means that the thread currently is in the poller, thus + * outside of any execution block. Needs to be terminated using + * thread_idle_end(). This is needed to release a concurrent call to + * thread_isolate_full(). + */ +static inline void thread_idle_now() +{ + HA_ATOMIC_OR(&tg_ctx->threads_idle, ti->ltid_bit); +} + +/* Ends the harmless period started by thread_idle_now(), i.e. the thread is + * about to restart engaging in sensitive operations. This must not be done on + * a thread marked harmless, as it could cause a deadlock between another + * thread waiting for idle again and thread_harmless_end() in this thread. + * + * The right sequence is thus: + * thread_idle_now(); + * thread_harmless_now(); + * poll(); + * thread_harmless_end(); + * thread_idle_end(); + */ +static inline void thread_idle_end() +{ + HA_ATOMIC_AND(&tg_ctx->threads_idle, ~ti->ltid_bit); +} + + +/* Marks the thread as harmless. Note: this must be true, i.e. the thread must + * not be touching any unprotected shared resource during this period. Usually + * this is called before poll(), but it may also be placed around very slow + * calls (eg: some crypto operations). Needs to be terminated using + * thread_harmless_end(). + */ +static inline void thread_harmless_now() +{ + HA_ATOMIC_OR(&tg_ctx->threads_harmless, ti->ltid_bit); +} + +/* Returns non-zero if the current thread is already harmless */ +static inline int is_thread_harmless() +{ + return !!(HA_ATOMIC_LOAD(&tg_ctx->threads_harmless) & ti->ltid_bit); +} + +/* Ends the harmless period started by thread_harmless_now(). Usually this is + * placed after the poll() call. If it is discovered that a job was running and + * is relying on the thread still being harmless, the thread waits for the + * other one to finish. + */ +static inline void thread_harmless_end() +{ + while (1) { + HA_ATOMIC_AND(&tg_ctx->threads_harmless, ~ti->ltid_bit); + if (likely(_HA_ATOMIC_LOAD(&rdv_requests) == 0)) + break; + thread_harmless_till_end(); + } +} + +/* Ends the harmless period started by thread_harmless_now(), but without + * waiting for isolated requests. This is meant to be used from signal handlers + * which might be called recursively while a thread already requested an + * isolation that must be ignored. It must not be used past a checkpoint where + * another thread could return and see the current thread as harmless before + * this call (or this could validate an isolation request by accident). + */ +static inline void thread_harmless_end_sig() +{ + HA_ATOMIC_AND(&tg_ctx->threads_harmless, ~ti->ltid_bit); +} + +/* an isolated thread has its ID in isolated_thread */ +static inline unsigned long thread_isolated() +{ + return _HA_ATOMIC_LOAD(&isolated_thread) == tid; +} + +/* Returns 1 if the cpu set is currently restricted for the process else 0. + * Currently only implemented for the Linux platform. + */ +int thread_cpu_mask_forced(void); + +#if !defined(DEBUG_THREAD) && !defined(DEBUG_FULL) + +/* Thread debugging is DISABLED, these are the regular locking functions */ + +#define HA_SPIN_INIT(l) ({ (*l) = 0; }) +#define HA_SPIN_DESTROY(l) ({ (*l) = 0; }) +#define HA_SPIN_LOCK(lbl, l) pl_take_s(l) +#define HA_SPIN_TRYLOCK(lbl, l) (!pl_try_s(l)) +#define HA_SPIN_UNLOCK(lbl, l) pl_drop_s(l) + +#define HA_RWLOCK_INIT(l) ({ (*l) = 0; }) +#define HA_RWLOCK_DESTROY(l) ({ (*l) = 0; }) +#define HA_RWLOCK_WRLOCK(lbl,l) pl_take_w(l) +#define HA_RWLOCK_TRYWRLOCK(lbl,l) (!pl_try_w(l)) +#define HA_RWLOCK_WRUNLOCK(lbl,l) pl_drop_w(l) +#define HA_RWLOCK_RDLOCK(lbl,l) pl_take_r(l) +#define HA_RWLOCK_TRYRDLOCK(lbl,l) (!pl_try_r(l)) +#define HA_RWLOCK_RDUNLOCK(lbl,l) pl_drop_r(l) + +/* rwlock upgrades via seek locks */ +#define HA_RWLOCK_SKLOCK(lbl,l) pl_take_s(l) /* N --> S */ +#define HA_RWLOCK_SKTOWR(lbl,l) pl_stow(l) /* S --> W */ +#define HA_RWLOCK_WRTOSK(lbl,l) pl_wtos(l) /* W --> S */ +#define HA_RWLOCK_SKTORD(lbl,l) pl_stor(l) /* S --> R */ +#define HA_RWLOCK_WRTORD(lbl,l) pl_wtor(l) /* W --> R */ +#define HA_RWLOCK_SKUNLOCK(lbl,l) pl_drop_s(l) /* S --> N */ +#define HA_RWLOCK_TRYSKLOCK(lbl,l) (!pl_try_s(l)) /* N -?> S */ +#define HA_RWLOCK_TRYRDTOSK(lbl,l) (!pl_try_rtos(l)) /* R -?> S */ + +#else /* !defined(DEBUG_THREAD) && !defined(DEBUG_FULL) */ + +/* Thread debugging is ENABLED, these are the instrumented functions */ + +#define __SPIN_INIT(l) ({ (*l) = 0; }) +#define __SPIN_DESTROY(l) ({ (*l) = 0; }) +#define __SPIN_LOCK(l) pl_take_s(l) +#define __SPIN_TRYLOCK(l) (!pl_try_s(l)) +#define __SPIN_UNLOCK(l) pl_drop_s(l) + +#define __RWLOCK_INIT(l) ({ (*l) = 0; }) +#define __RWLOCK_DESTROY(l) ({ (*l) = 0; }) +#define __RWLOCK_WRLOCK(l) pl_take_w(l) +#define __RWLOCK_TRYWRLOCK(l) (!pl_try_w(l)) +#define __RWLOCK_WRUNLOCK(l) pl_drop_w(l) +#define __RWLOCK_RDLOCK(l) pl_take_r(l) +#define __RWLOCK_TRYRDLOCK(l) (!pl_try_r(l)) +#define __RWLOCK_RDUNLOCK(l) pl_drop_r(l) + +/* rwlock upgrades via seek locks */ +#define __RWLOCK_SKLOCK(l) pl_take_s(l) /* N --> S */ +#define __RWLOCK_SKTOWR(l) pl_stow(l) /* S --> W */ +#define __RWLOCK_WRTOSK(l) pl_wtos(l) /* W --> S */ +#define __RWLOCK_SKTORD(l) pl_stor(l) /* S --> R */ +#define __RWLOCK_WRTORD(l) pl_wtor(l) /* W --> R */ +#define __RWLOCK_SKUNLOCK(l) pl_drop_s(l) /* S --> N */ +#define __RWLOCK_TRYSKLOCK(l) (!pl_try_s(l)) /* N -?> S */ +#define __RWLOCK_TRYRDTOSK(l) (!pl_try_rtos(l)) /* R -?> S */ + +#define HA_SPIN_INIT(l) __spin_init(l) +#define HA_SPIN_DESTROY(l) __spin_destroy(l) + +#define HA_SPIN_LOCK(lbl, l) __spin_lock(lbl, l, __func__, __FILE__, __LINE__) +#define HA_SPIN_TRYLOCK(lbl, l) __spin_trylock(lbl, l, __func__, __FILE__, __LINE__) +#define HA_SPIN_UNLOCK(lbl, l) __spin_unlock(lbl, l, __func__, __FILE__, __LINE__) + +#define HA_RWLOCK_INIT(l) __ha_rwlock_init((l)) +#define HA_RWLOCK_DESTROY(l) __ha_rwlock_destroy((l)) +#define HA_RWLOCK_WRLOCK(lbl,l) __ha_rwlock_wrlock(lbl, l, __func__, __FILE__, __LINE__) +#define HA_RWLOCK_TRYWRLOCK(lbl,l) __ha_rwlock_trywrlock(lbl, l, __func__, __FILE__, __LINE__) +#define HA_RWLOCK_WRUNLOCK(lbl,l) __ha_rwlock_wrunlock(lbl, l, __func__, __FILE__, __LINE__) +#define HA_RWLOCK_RDLOCK(lbl,l) __ha_rwlock_rdlock(lbl, l) +#define HA_RWLOCK_TRYRDLOCK(lbl,l) __ha_rwlock_tryrdlock(lbl, l) +#define HA_RWLOCK_RDUNLOCK(lbl,l) __ha_rwlock_rdunlock(lbl, l) + +#define HA_RWLOCK_SKLOCK(lbl,l) __ha_rwlock_sklock(lbl, l, __func__, __FILE__, __LINE__) +#define HA_RWLOCK_SKTOWR(lbl,l) __ha_rwlock_sktowr(lbl, l, __func__, __FILE__, __LINE__) +#define HA_RWLOCK_WRTOSK(lbl,l) __ha_rwlock_wrtosk(lbl, l, __func__, __FILE__, __LINE__) +#define HA_RWLOCK_SKTORD(lbl,l) __ha_rwlock_sktord(lbl, l, __func__, __FILE__, __LINE__) +#define HA_RWLOCK_WRTORD(lbl,l) __ha_rwlock_wrtord(lbl, l, __func__, __FILE__, __LINE__) +#define HA_RWLOCK_SKUNLOCK(lbl,l) __ha_rwlock_skunlock(lbl, l, __func__, __FILE__, __LINE__) +#define HA_RWLOCK_TRYSKLOCK(lbl,l) __ha_rwlock_trysklock(lbl, l, __func__, __FILE__, __LINE__) +#define HA_RWLOCK_TRYRDTOSK(lbl,l) __ha_rwlock_tryrdtosk(lbl, l, __func__, __FILE__, __LINE__) + +/* WARNING!!! if you update this enum, please also keep lock_label() up to date + * below. + */ +enum lock_label { + TASK_RQ_LOCK, + TASK_WQ_LOCK, + LISTENER_LOCK, + PROXY_LOCK, + SERVER_LOCK, + LBPRM_LOCK, + SIGNALS_LOCK, + STK_TABLE_LOCK, + STK_SESS_LOCK, + APPLETS_LOCK, + PEER_LOCK, + SHCTX_LOCK, + SSL_LOCK, + SSL_GEN_CERTS_LOCK, + PATREF_LOCK, + PATEXP_LOCK, + VARS_LOCK, + COMP_POOL_LOCK, + LUA_LOCK, + NOTIF_LOCK, + SPOE_APPLET_LOCK, + DNS_LOCK, + PID_LIST_LOCK, + EMAIL_ALERTS_LOCK, + PIPES_LOCK, + TLSKEYS_REF_LOCK, + AUTH_LOCK, + RING_LOCK, + DICT_LOCK, + PROTO_LOCK, + QUEUE_LOCK, + CKCH_LOCK, + SNI_LOCK, + SSL_SERVER_LOCK, + SFT_LOCK, /* sink forward target */ + IDLE_CONNS_LOCK, + OCSP_LOCK, + QC_CID_LOCK, + CACHE_LOCK, + OTHER_LOCK, + /* WT: make sure never to use these ones outside of development, + * we need them for lock profiling! + */ + DEBUG1_LOCK, + DEBUG2_LOCK, + DEBUG3_LOCK, + DEBUG4_LOCK, + DEBUG5_LOCK, + LOCK_LABELS +}; + + +/* Following functions are used to collect some stats about locks. We wrap + * pthread functions to known how much time we wait in a lock. */ + +void show_lock_stats(); +void __ha_rwlock_init(struct ha_rwlock *l); +void __ha_rwlock_destroy(struct ha_rwlock *l); +void __ha_rwlock_wrlock(enum lock_label lbl, struct ha_rwlock *l, + const char *func, const char *file, int line); +int __ha_rwlock_trywrlock(enum lock_label lbl, struct ha_rwlock *l, + const char *func, const char *file, int line); +void __ha_rwlock_wrunlock(enum lock_label lbl,struct ha_rwlock *l, + const char *func, const char *file, int line); +void __ha_rwlock_rdlock(enum lock_label lbl,struct ha_rwlock *l); +int __ha_rwlock_tryrdlock(enum lock_label lbl,struct ha_rwlock *l); +void __ha_rwlock_rdunlock(enum lock_label lbl,struct ha_rwlock *l); +void __ha_rwlock_wrtord(enum lock_label lbl, struct ha_rwlock *l, + const char *func, const char *file, int line); +void __ha_rwlock_wrtosk(enum lock_label lbl, struct ha_rwlock *l, + const char *func, const char *file, int line); +void __ha_rwlock_sklock(enum lock_label lbl, struct ha_rwlock *l, + const char *func, const char *file, int line); +void __ha_rwlock_sktowr(enum lock_label lbl, struct ha_rwlock *l, + const char *func, const char *file, int line); +void __ha_rwlock_sktord(enum lock_label lbl, struct ha_rwlock *l, + const char *func, const char *file, int line); +void __ha_rwlock_skunlock(enum lock_label lbl,struct ha_rwlock *l, + const char *func, const char *file, int line); +int __ha_rwlock_trysklock(enum lock_label lbl, struct ha_rwlock *l, + const char *func, const char *file, int line); +int __ha_rwlock_tryrdtosk(enum lock_label lbl, struct ha_rwlock *l, + const char *func, const char *file, int line); +void __spin_init(struct ha_spinlock *l); +void __spin_destroy(struct ha_spinlock *l); +void __spin_lock(enum lock_label lbl, struct ha_spinlock *l, + const char *func, const char *file, int line); +int __spin_trylock(enum lock_label lbl, struct ha_spinlock *l, + const char *func, const char *file, int line); +void __spin_unlock(enum lock_label lbl, struct ha_spinlock *l, + const char *func, const char *file, int line); + +#endif /* DEBUG_THREAD */ + +#endif /* USE_THREAD */ + +#endif /* _HAPROXY_THREAD_H */ diff --git a/include/haproxy/ticks.h b/include/haproxy/ticks.h new file mode 100644 index 0000000..8b8fcc6 --- /dev/null +++ b/include/haproxy/ticks.h @@ -0,0 +1,157 @@ +/* + * include/haproxy/ticks.h + * Functions and macros for manipulation of expiration timers + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * Using a mix of milliseconds and timeval for internal timers is expensive and + * overkill, because we don't need such a precision to compute timeouts. + * So we're converting them to "ticks". + * + * A tick is a representation of a date relative to another one, and is + * measured in milliseconds. The natural usage is to represent an absolute date + * relative to the current date. Since it is not practical to update all values + * each time the current date changes, instead we use the absolute date rounded + * down to fit in a tick. We then have to compare a tick to the current date to + * know whether it is in the future or in the past. If a tick is below the + * current date, it is in the past. If it is above, it is in the future. The + * values will wrap so we can't compare that easily, instead we check the sign + * of the difference between a tick and the current date. + * + * Proceeding like this allows us to manipulate dates that are stored in + * scalars with enough precision and range. For this reason, we store ticks in + * 32-bit integers. This is enough to handle dates that are between 24.85 days + * in the past and as much in the future. + * + * We must both support absolute dates (well in fact, dates relative to now+/- + * 24 days), and intervals (for timeouts). Both types need an "eternity" magic + * value. For optimal code generation, we'll use zero as the magic value + * indicating that an expiration timer or a timeout is not set. We have to + * check that we don't return this value when adding timeouts to <now>. If a + * computation returns 0, we must increase it to 1 (which will push the timeout + * 1 ms further). For this reason, timeouts must not be added by hand but via + * the dedicated tick_add() function. + */ + +#ifndef _HAPROXY_TICKS_H +#define _HAPROXY_TICKS_H + +#include <haproxy/api.h> + +#define TICK_ETERNITY 0 + +/* right now, ticks are milliseconds. Both negative ms and negative ticks + * indicate eternity. + */ +#define MS_TO_TICKS(ms) (ms) +#define TICKS_TO_MS(tk) (tk) + +/* currently updated and stored in time.c */ +extern THREAD_LOCAL unsigned int now_ms; /* internal date in milliseconds (may wrap) */ +extern volatile unsigned int global_now_ms; + +/* return 1 if tick is set, otherwise 0 */ +static inline int tick_isset(int expire) +{ + return expire != 0; +} + +/* Add <timeout> to <now>, and return the resulting expiration date. + * <timeout> will not be checked for null values. + */ +static inline int tick_add(int now, int timeout) +{ + now += timeout; + if (unlikely(!now)) + now++; /* unfortunate value */ + return now; +} + +/* add <timeout> to <now> if it is set, otherwise set it to eternity. + * Return the resulting expiration date. + */ +static inline int tick_add_ifset(int now, int timeout) +{ + if (!timeout) + return TICK_ETERNITY; + return tick_add(now, timeout); +} + +/* return 1 if timer <t1> is before <t2>, none of which can be infinite. */ +static inline int tick_is_lt(int t1, int t2) +{ + return (t1 - t2) < 0; +} + +/* return 1 if timer <t1> is before or equal to <t2>, none of which can be infinite. */ +static inline int tick_is_le(int t1, int t2) +{ + return (t1 - t2) <= 0; +} + +/* return 1 if timer <timer> is expired at date <now>, otherwise zero */ +static inline int tick_is_expired(int timer, int now) +{ + if (unlikely(!tick_isset(timer))) + return 0; + if (unlikely((timer - now) <= 0)) + return 1; + return 0; +} + +/* return the first one of the two timers, both of which may be infinite */ +static inline int tick_first(int t1, int t2) +{ + if (!tick_isset(t1)) + return t2; + if (!tick_isset(t2)) + return t1; + if ((t1 - t2) <= 0) + return t1; + else + return t2; +} + +/* return the first one of the two timers, where only the first one may be infinite */ +static inline int tick_first_2nz(int t1, int t2) +{ + if (!tick_isset(t1)) + return t2; + if ((t1 - t2) <= 0) + return t1; + else + return t2; +} + +/* return the number of ticks remaining from <now> to <exp>, or zero if expired */ +static inline int tick_remain(int now, int exp) +{ + if (tick_is_expired(exp, now)) + return 0; + return exp - now; +} + +#endif /* _HAPROXY_TICKS_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/time.h b/include/haproxy/time.h new file mode 100644 index 0000000..3ebc683 --- /dev/null +++ b/include/haproxy/time.h @@ -0,0 +1,520 @@ +/* + * include/haproxy/time.h + * timeval-based time calculation functions and macros. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_TIME_H +#define _HAPROXY_TIME_H + +#include <sys/time.h> +#include <haproxy/api.h> + +#define TIME_ETERNITY (TV_ETERNITY_MS) + + + +/**** exported functions *************************************************/ +/* + * adds <ms> ms to <from>, set the result to <tv> and returns a pointer <tv> + */ +struct timeval *tv_ms_add(struct timeval *tv, const struct timeval *from, int ms); + +/* + * compares <tv1> and <tv2> modulo 1ms: returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2 + * Must not be used when either argument is eternity. Use tv_ms_cmp2() for that. + */ +int tv_ms_cmp(const struct timeval *tv1, const struct timeval *tv2); + +/* + * compares <tv1> and <tv2> modulo 1 ms: returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2, + * assuming that TV_ETERNITY is greater than everything. + */ +int tv_ms_cmp2(const struct timeval *tv1, const struct timeval *tv2); + + +/**** general purpose functions and macros *******************************/ + + +/* + * sets a struct timeval to its highest value so that it can never happen + * note that only tv_usec is necessary to detect it since a tv_usec > 999999 + * is normally not possible. + */ +static inline struct timeval *tv_eternity(struct timeval *tv) +{ + tv->tv_sec = (typeof(tv->tv_sec))TV_ETERNITY; + tv->tv_usec = (typeof(tv->tv_usec))TV_ETERNITY; + return tv; +} + +/* + * sets a struct timeval to 0 + * + */ +static inline struct timeval *tv_zero(struct timeval *tv) { + tv->tv_sec = tv->tv_usec = 0; + return tv; +} + +/* + * returns non null if tv is [eternity], otherwise 0. + */ +#define tv_iseternity(tv) ((tv)->tv_usec == (typeof((tv)->tv_usec))TV_ETERNITY) + +/* + * returns 0 if tv is [eternity], otherwise non-zero. + */ +#define tv_isset(tv) ((tv)->tv_usec != (typeof((tv)->tv_usec))TV_ETERNITY) + +/* + * returns non null if tv is [0], otherwise 0. + */ +#define tv_iszero(tv) (((tv)->tv_sec | (tv)->tv_usec) == 0) + +/* + * Converts a struct timeval to a wrapping number of milliseconds. + */ +static inline uint __tv_to_ms(const struct timeval *tv) +{ + unsigned int ret; + + ret = (uint)tv->tv_sec * 1000; + ret += (uint)tv->tv_usec / 1000; + return ret; +} + +/* + * Converts a struct timeval to a number of milliseconds. + */ +static inline struct timeval * __tv_from_ms(struct timeval *tv, unsigned long ms) +{ + tv->tv_sec = ms / 1000; + tv->tv_usec = (ms % 1000) * 1000; + return tv; +} + +/* + * Converts a struct timeval to a relative timestamp in nanoseconds (only + * wraps every 585 years, i.e. never for our purpose). + */ +static forceinline ullong tv_to_ns(const struct timeval *tv) +{ + ullong ret; + + ret = (ullong)tv->tv_sec * 1000000000ULL; + ret += (ullong)tv->tv_usec * 1000ULL; + return ret; +} + +/* turns nanoseconds to seconds, just to avoid typos */ +static forceinline uint ns_to_sec(ullong ns) +{ + return ns / 1000000000ULL; +} + +/* turns nanoseconds to milliseconds, just to avoid typos */ +static forceinline uint ns_to_ms(ullong ns) +{ + return ns / 1000000ULL; +} + +/* turns seconds to nanoseconds, just to avoid typos */ +static forceinline ullong sec_to_ns(uint sec) +{ + return sec * 1000000000ULL; +} + +/* turns milliseconds to nanoseconds, just to avoid typos */ +static forceinline ullong ms_to_ns(uint ms) +{ + return ms * 1000000ULL; +} + +/* turns microseconds to nanoseconds, just to avoid typos */ +static forceinline ullong us_to_ns(uint us) +{ + return us * 1000ULL; +} + +/* creates a struct timeval from a relative timestamp in nanosecond */ +#define NS_TO_TV(t) ((const struct timeval){ .tv_sec = (t) / 1000000000ULL, .tv_usec = ((t) % 1000000000ULL) / 1000U }) + +/* Return a number of 1024Hz ticks between 0 and 1023 for input number of + * usecs between 0 and 999999. This function has been optimized to remove + * any divide and multiply, as it is completely optimized away by the compiler + * on CPUs which don't have a fast multiply. Its avg error rate is 305 ppm, + * which is almost twice as low as a direct usec to ms conversion. This version + * also has the benefit of returning 1024 for 1000000. + */ +static inline unsigned int __usec_to_1024th(unsigned int usec) +{ + return (usec * 1073 + 742516) >> 20; +} + + +/**** comparison functions and macros ***********************************/ + + +/* tv_cmp: compares <tv1> and <tv2> : returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2. */ +static inline int __tv_cmp(const struct timeval *tv1, const struct timeval *tv2) +{ + if ((unsigned)tv1->tv_sec < (unsigned)tv2->tv_sec) + return -1; + else if ((unsigned)tv1->tv_sec > (unsigned)tv2->tv_sec) + return 1; + else if ((unsigned)tv1->tv_usec < (unsigned)tv2->tv_usec) + return -1; + else if ((unsigned)tv1->tv_usec > (unsigned)tv2->tv_usec) + return 1; + else + return 0; +} + +/* tv_iseq: compares <tv1> and <tv2> : returns 1 if tv1 == tv2, otherwise 0 */ +#define tv_iseq __tv_iseq +static inline int __tv_iseq(const struct timeval *tv1, const struct timeval *tv2) +{ + return ((unsigned)tv1->tv_sec == (unsigned)tv2->tv_sec) && + ((unsigned)tv1->tv_usec == (unsigned)tv2->tv_usec); +} + +/* tv_isgt: compares <tv1> and <tv2> : returns 1 if tv1 > tv2, otherwise 0 */ +#define tv_isgt _tv_isgt +int _tv_isgt(const struct timeval *tv1, const struct timeval *tv2); +static inline int __tv_isgt(const struct timeval *tv1, const struct timeval *tv2) +{ + return + ((unsigned)tv1->tv_sec == (unsigned)tv2->tv_sec) ? + ((unsigned)tv1->tv_usec > (unsigned)tv2->tv_usec) : + ((unsigned)tv1->tv_sec > (unsigned)tv2->tv_sec); +} + +/* tv_isge: compares <tv1> and <tv2> : returns 1 if tv1 >= tv2, otherwise 0 */ +#define tv_isge __tv_isge +static inline int __tv_isge(const struct timeval *tv1, const struct timeval *tv2) +{ + return + ((unsigned)tv1->tv_sec == (unsigned)tv2->tv_sec) ? + ((unsigned)tv1->tv_usec >= (unsigned)tv2->tv_usec) : + ((unsigned)tv1->tv_sec > (unsigned)tv2->tv_sec); +} + +/* tv_islt: compares <tv1> and <tv2> : returns 1 if tv1 < tv2, otherwise 0 */ +#define tv_islt __tv_islt +static inline int __tv_islt(const struct timeval *tv1, const struct timeval *tv2) +{ + return + ((unsigned)tv1->tv_sec == (unsigned)tv2->tv_sec) ? + ((unsigned)tv1->tv_usec < (unsigned)tv2->tv_usec) : + ((unsigned)tv1->tv_sec < (unsigned)tv2->tv_sec); +} + +/* tv_isle: compares <tv1> and <tv2> : returns 1 if tv1 <= tv2, otherwise 0 */ +#define tv_isle _tv_isle +int _tv_isle(const struct timeval *tv1, const struct timeval *tv2); +static inline int __tv_isle(const struct timeval *tv1, const struct timeval *tv2) +{ + return + ((unsigned)tv1->tv_sec == (unsigned)tv2->tv_sec) ? + ((unsigned)tv1->tv_usec <= (unsigned)tv2->tv_usec) : + ((unsigned)tv1->tv_sec < (unsigned)tv2->tv_sec); +} + +/* + * compares <tv1> and <tv2> modulo 1ms: returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2 + * Must not be used when either argument is eternity. Use tv_ms_cmp2() for that. + */ +#define tv_ms_cmp _tv_ms_cmp +int _tv_ms_cmp(const struct timeval *tv1, const struct timeval *tv2); +static inline int __tv_ms_cmp(const struct timeval *tv1, const struct timeval *tv2) +{ + if ((unsigned)tv1->tv_sec == (unsigned)tv2->tv_sec) { + if ((unsigned)tv2->tv_usec >= (unsigned)tv1->tv_usec + 1000) + return -1; + else if ((unsigned)tv1->tv_usec >= (unsigned)tv2->tv_usec + 1000) + return 1; + else + return 0; + } + else if (((unsigned)tv2->tv_sec > (unsigned)tv1->tv_sec + 1) || + (((unsigned)tv2->tv_sec == (unsigned)tv1->tv_sec + 1) && + ((unsigned)tv2->tv_usec + 1000000 >= (unsigned)tv1->tv_usec + 1000))) + return -1; + else if (((unsigned)tv1->tv_sec > (unsigned)tv2->tv_sec + 1) || + (((unsigned)tv1->tv_sec == (unsigned)tv2->tv_sec + 1) && + ((unsigned)tv1->tv_usec + 1000000 >= (unsigned)tv2->tv_usec + 1000))) + return 1; + else + return 0; +} + +/* + * compares <tv1> and <tv2> modulo 1 ms: returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2, + * assuming that TV_ETERNITY is greater than everything. + */ +#define tv_ms_cmp2 _tv_ms_cmp2 +int _tv_ms_cmp2(const struct timeval *tv1, const struct timeval *tv2); +static inline int __tv_ms_cmp2(const struct timeval *tv1, const struct timeval *tv2) +{ + if (tv_iseternity(tv1)) + if (tv_iseternity(tv2)) + return 0; /* same */ + else + return 1; /* tv1 later than tv2 */ + else if (tv_iseternity(tv2)) + return -1; /* tv2 later than tv1 */ + return tv_ms_cmp(tv1, tv2); +} + +/* + * compares <tv1> and <tv2> modulo 1 ms: returns 1 if tv1 <= tv2, 0 if tv1 > tv2, + * assuming that TV_ETERNITY is greater than everything. Returns 0 if tv1 is + * TV_ETERNITY, and always assumes that tv2 != TV_ETERNITY. Designed to replace + * occurrences of (tv_ms_cmp2(tv,now) <= 0). + */ +#define tv_ms_le2 _tv_ms_le2 +int _tv_ms_le2(const struct timeval *tv1, const struct timeval *tv2); +static inline int __tv_ms_le2(const struct timeval *tv1, const struct timeval *tv2) +{ + if (likely((unsigned)tv1->tv_sec > (unsigned)tv2->tv_sec + 1)) + return 0; + + if (likely((unsigned)tv1->tv_sec < (unsigned)tv2->tv_sec)) + return 1; + + if (likely((unsigned)tv1->tv_sec == (unsigned)tv2->tv_sec)) { + if ((unsigned)tv2->tv_usec >= (unsigned)tv1->tv_usec + 1000) + return 1; + else + return 0; + } + + if (unlikely(((unsigned)tv1->tv_sec == (unsigned)tv2->tv_sec + 1) && + ((unsigned)tv1->tv_usec + 1000000 >= (unsigned)tv2->tv_usec + 1000))) + return 0; + else + return 1; +} + + +/**** operators **********************************************************/ + + +/* + * Returns the time in ms elapsed between tv1 and tv2, assuming that tv1<=tv2. + * Must not be used when either argument is eternity. + */ +#define tv_ms_elapsed __tv_ms_elapsed +unsigned long _tv_ms_elapsed(const struct timeval *tv1, const struct timeval *tv2); +static inline unsigned long __tv_ms_elapsed(const struct timeval *tv1, const struct timeval *tv2) +{ + unsigned long ret; + + ret = ((signed long)(tv2->tv_sec - tv1->tv_sec)) * 1000; + ret += ((signed long)(tv2->tv_usec - tv1->tv_usec)) / 1000; + return ret; +} + +/* + * returns the remaining time between tv1=now and event=tv2 + * if tv2 is passed, 0 is returned. + * Must not be used when either argument is eternity. + */ + +#define tv_ms_remain __tv_ms_remain +unsigned long _tv_ms_remain(const struct timeval *tv1, const struct timeval *tv2); +static inline unsigned long __tv_ms_remain(const struct timeval *tv1, const struct timeval *tv2) +{ + if (tv_ms_cmp(tv1, tv2) >= 0) + return 0; /* event elapsed */ + + return __tv_ms_elapsed(tv1, tv2); +} + +/* + * returns the remaining time between tv1=now and event=tv2 + * if tv2 is passed, 0 is returned. + * Returns TIME_ETERNITY if tv2 is eternity. + */ +#define tv_ms_remain2 _tv_ms_remain2 +unsigned long _tv_ms_remain2(const struct timeval *tv1, const struct timeval *tv2); +static inline unsigned long __tv_ms_remain2(const struct timeval *tv1, const struct timeval *tv2) +{ + if (tv_iseternity(tv2)) + return TIME_ETERNITY; + + return tv_ms_remain(tv1, tv2); +} + +/* + * adds <inc> to <from>, set the result to <tv> and returns a pointer <tv> + */ +#define tv_add _tv_add +struct timeval *_tv_add(struct timeval *tv, const struct timeval *from, const struct timeval *inc); +static inline struct timeval *__tv_add(struct timeval *tv, const struct timeval *from, const struct timeval *inc) +{ + tv->tv_usec = from->tv_usec + inc->tv_usec; + tv->tv_sec = from->tv_sec + inc->tv_sec; + if (tv->tv_usec >= 1000000) { + tv->tv_usec -= 1000000; + tv->tv_sec++; + } + return tv; +} + + +/* + * If <inc> is set, then add it to <from> and set the result to <tv>, then + * return 1, otherwise return 0. It is meant to be used in if conditions. + */ +#define tv_add_ifset _tv_add_ifset +int _tv_add_ifset(struct timeval *tv, const struct timeval *from, const struct timeval *inc); +static inline int __tv_add_ifset(struct timeval *tv, const struct timeval *from, const struct timeval *inc) +{ + if (tv_iseternity(inc)) + return 0; + tv->tv_usec = from->tv_usec + inc->tv_usec; + tv->tv_sec = from->tv_sec + inc->tv_sec; + if (tv->tv_usec >= 1000000) { + tv->tv_usec -= 1000000; + tv->tv_sec++; + } + return 1; +} + +/* + * adds <inc> to <tv> and returns a pointer <tv> + */ +static inline struct timeval *__tv_add2(struct timeval *tv, const struct timeval *inc) +{ + tv->tv_usec += inc->tv_usec; + tv->tv_sec += inc->tv_sec; + if (tv->tv_usec >= 1000000) { + tv->tv_usec -= 1000000; + tv->tv_sec++; + } + return tv; +} + + +/* + * Computes the remaining time between tv1=now and event=tv2. if tv2 is passed, + * 0 is returned. The result is stored into tv. + */ +#define tv_remain _tv_remain +struct timeval *_tv_remain(const struct timeval *tv1, const struct timeval *tv2, struct timeval *tv); +static inline struct timeval *__tv_remain(const struct timeval *tv1, const struct timeval *tv2, struct timeval *tv) +{ + tv->tv_usec = tv2->tv_usec - tv1->tv_usec; + tv->tv_sec = tv2->tv_sec - tv1->tv_sec; + if ((signed)tv->tv_sec > 0) { + if ((signed)tv->tv_usec < 0) { + tv->tv_usec += 1000000; + tv->tv_sec--; + } + } else if (tv->tv_sec == 0) { + if ((signed)tv->tv_usec < 0) + tv->tv_usec = 0; + } else { + tv->tv_sec = 0; + tv->tv_usec = 0; + } + return tv; +} + + +/* + * Computes the remaining time between tv1=now and event=tv2. if tv2 is passed, + * 0 is returned. The result is stored into tv. Returns ETERNITY if tv2 is + * eternity. + */ +#define tv_remain2 _tv_remain2 +struct timeval *_tv_remain2(const struct timeval *tv1, const struct timeval *tv2, struct timeval *tv); +static inline struct timeval *__tv_remain2(const struct timeval *tv1, const struct timeval *tv2, struct timeval *tv) +{ + if (tv_iseternity(tv2)) + return tv_eternity(tv); + return __tv_remain(tv1, tv2, tv); +} + + +/* + * adds <ms> ms to <from>, set the result to <tv> and returns a pointer <tv> + */ +#define tv_ms_add _tv_ms_add +struct timeval *_tv_ms_add(struct timeval *tv, const struct timeval *from, int ms); +static inline struct timeval *__tv_ms_add(struct timeval *tv, const struct timeval *from, int ms) +{ + tv->tv_usec = from->tv_usec + (ms % 1000) * 1000; + tv->tv_sec = from->tv_sec + (ms / 1000); + while (tv->tv_usec >= 1000000) { + tv->tv_usec -= 1000000; + tv->tv_sec++; + } + return tv; +} + + +/* + * compares <tv1> and <tv2> : returns 1 if <tv1> is before <tv2>, otherwise 0. + * This should be very fast because it's used in schedulers. + * It has been optimized to return 1 (so call it in a loop which continues + * as long as tv1<=tv2) + */ + +#define tv_isbefore(tv1, tv2) \ + (unlikely((unsigned)(tv1)->tv_sec < (unsigned)(tv2)->tv_sec) ? 1 : \ + (unlikely((unsigned)(tv1)->tv_sec > (unsigned)(tv2)->tv_sec) ? 0 : \ + unlikely((unsigned)(tv1)->tv_usec < (unsigned)(tv2)->tv_usec))) + +/* + * returns the first event between <tv1> and <tv2> into <tvmin>. + * a zero tv is ignored. <tvmin> is returned. If <tvmin> is known + * to be the same as <tv1> or <tv2>, it is recommended to use + * tv_bound instead. + */ +#define tv_min(tvmin, tv1, tv2) ({ \ + if (tv_isbefore(tv1, tv2)) { \ + *tvmin = *tv1; \ + } \ + else { \ + *tvmin = *tv2; \ + } \ + tvmin; \ +}) + +/* + * returns the first event between <tv1> and <tv2> into <tvmin>. + * a zero tv is ignored. <tvmin> is returned. This function has been + * optimized to be called as tv_min(a,a,b) or tv_min(b,a,b). + */ +#define tv_bound(tv1, tv2) ({ \ + if (tv_isbefore(tv2, tv1)) \ + *tv1 = *tv2; \ + tv1; \ +}) + +#endif /* _HAPROXY_TIME_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/timeshift.h b/include/haproxy/timeshift.h new file mode 100644 index 0000000..62e5855 --- /dev/null +++ b/include/haproxy/timeshift.h @@ -0,0 +1,10 @@ +#include <sys/time.h> +#include <sys/epoll.h> + +#define gettimeofday(tv, tz) timeshift_gettimeofday(tv, tz) +#define clock_gettime(clk_id, tp) timeshift_clock_gettime(clk_id, tp) +#define epoll_wait(epfd, events, maxevents, timeout) timeshift_epoll_wait(epfd, events, maxevents, timeout) + +int timeshift_gettimeofday(struct timeval *tv, void *tz); +int timeshift_clock_gettime(clockid_t clk_id, struct timespec *tp); +int timeshift_epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout); diff --git a/include/haproxy/tinfo-t.h b/include/haproxy/tinfo-t.h new file mode 100644 index 0000000..357c4c0 --- /dev/null +++ b/include/haproxy/tinfo-t.h @@ -0,0 +1,180 @@ +/* + * include/haproxy/tinfo-t.h + * Definitions of the thread_info structure. + * + * Copyright (C) 2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_TINFO_T_H +#define _HAPROXY_TINFO_T_H + +#include <import/ebtree-t.h> + +#include <haproxy/api-t.h> +#include <haproxy/freq_ctr-t.h> +#include <haproxy/thread-t.h> + +/* forward declarations for types used below */ +struct buffer; + +/* Threads sets are known either by a set of absolute thread numbers, or by a + * set of relative thread numbers within a group, for each group. The default + * is the absolute mode and corresponds to the case where no group is known + * (nbgrp == 0). The mode may only be changed when the set is empty (use + * thread_set_is_empty() for this). + */ +struct thread_set { + union { + ulong abs[(MAX_THREADS + LONGBITS - 1) / LONGBITS]; + ulong rel[MAX_TGROUPS]; + }; + ulong grps; /* bit field of all non-empty groups, 0 for abs */ +}; + +/* tasklet classes */ +enum { + TL_URGENT = 0, /* urgent tasklets (I/O callbacks) */ + TL_NORMAL = 1, /* normal tasks */ + TL_BULK = 2, /* bulk task/tasklets, streaming I/Os */ + TL_HEAVY = 3, /* heavy computational tasklets (e.g. TLS handshakes) */ + TL_CLASSES /* must be last */ +}; + +/* thread_ctx flags, for ha_thread_ctx[].flags. These flags describe the + * thread's state and are visible to other threads, so they must be used + * with atomic ops. + */ +#define TH_FL_STUCK 0x00000001 +#define TH_FL_TASK_PROFILING 0x00000002 +#define TH_FL_NOTIFIED 0x00000004 /* task was notified about the need to wake up */ +#define TH_FL_SLEEPING 0x00000008 /* thread won't check its task list before next wakeup */ +#define TH_FL_STARTED 0x00000010 /* set once the thread starts */ +#define TH_FL_IN_LOOP 0x00000020 /* set only inside the polling loop */ + + +/* Thread group information. This defines a base and a count of global thread + * IDs which belong to it, and which can be looked up into thread_info/ctx. It + * is set up during parsing and is stable during operation. Thread groups start + * at 1 so tgroup[0] describes thread group 1. + */ +struct tgroup_info { + ulong threads_enabled; /* mask of threads enabled in this group */ + uint base; /* first thread in this group */ + uint count; /* number of threads in this group */ + ulong tgid_bit; /* bit corresponding to the tgroup ID */ + + /* pad to cache line (64B) */ + char __pad[0]; /* unused except to check remaining room */ + char __end[0] __attribute__((aligned(64))); +}; + +/* This structure describes the group-specific context (e.g. active threads + * etc). It uses one cache line per thread to limit false sharing. + */ +struct tgroup_ctx { + ulong threads_harmless; /* mask of threads that are not modifying anything */ + ulong threads_idle; /* mask of threads idling in the poller */ + ulong stopping_threads; /* mask of threads currently stopping */ + + struct eb_root timers; /* wait queue (sorted timers tree, global, accessed under wq_lock) */ + + uint niced_tasks; /* number of niced tasks in this group's run queues */ + + /* pad to cache line (64B) */ + char __pad[0]; /* unused except to check remaining room */ + char __end[0] __attribute__((aligned(64))); +}; + +/* This structure describes all the per-thread info we need. When threads are + * disabled, it contains the same info for the single running thread. This is + * stable across all of a thread's life, and is being pointed to by the + * thread-local "ti" pointer. + */ +struct thread_info { + const struct tgroup_info *tg; /* config of the thread-group this thread belongs to */ + struct tgroup_ctx *tg_ctx; /* context of the thread-group this thread belongs to */ + uint tid, ltid; /* process-wide and group-wide thread ID (start at 0) */ + ulong ltid_bit; /* bit masks for the tid/ltid */ + uint tgid; /* ID of the thread group this thread belongs to (starts at 1; 0=unset) */ + /* 32-bit hole here */ + + ullong pth_id; /* the pthread_t cast to a ullong */ + void *stack_top; /* the top of the stack when entering the thread */ + + /* pad to cache line (64B) */ + char __pad[0]; /* unused except to check remaining room */ + char __end[0] __attribute__((aligned(64))); +}; + +/* This structure describes all the per-thread context we need. This is + * essentially the scheduler-specific stuff and a few important per-thread + * lists that need to be thread-local. We take care of splitting this into + * separate cache lines. + */ +struct thread_ctx { + // first and second cache lines on 64 bits: thread-local operations only. + struct eb_root timers; /* tree constituting the per-thread wait queue */ + struct eb_root rqueue; /* tree constituting the per-thread run queue */ + struct task *current; /* current task (not tasklet) */ + int current_queue; /* points to current tasklet list being run, -1 if none */ + unsigned int nb_tasks; /* number of tasks allocated on this thread */ + uint8_t tl_class_mask; /* bit mask of non-empty tasklets classes */ + + // 7 bytes hole here + struct list pool_lru_head; /* oldest objects in thread-local pool caches */ + struct list buffer_wq; /* buffer waiters */ + struct list streams; /* list of streams attached to this thread */ + struct list quic_conns; /* list of active quic-conns attached to this thread */ + struct list quic_conns_clo; /* list of closing quic-conns attached to this thread */ + struct list queued_checks; /* checks waiting for a connection slot */ + unsigned int nb_rhttp_conns; /* count of current conns used for active reverse HTTP */ + + ALWAYS_ALIGN(2*sizeof(void*)); + struct list tasklets[TL_CLASSES]; /* tasklets (and/or tasks) to run, by class */ + + // third cache line here on 64 bits: accessed mostly using atomic ops + ALWAYS_ALIGN(64); + struct mt_list shared_tasklet_list; /* Tasklet to be run, woken up by other threads */ + unsigned int rqueue_ticks; /* Insertion counter for the run queue */ + unsigned int rq_total; /* total size of the run queue, prio_tree + tasklets */ + int tasks_in_list; /* Number of tasks in the per-thread tasklets list */ + uint idle_pct; /* idle to total ratio over last sample (percent) */ + uint flags; /* thread flags, TH_FL_*, atomic! */ + uint active_checks; /* number of active health checks on this thread, incl migrated */ + + uint32_t sched_wake_date; /* current task/tasklet's wake date or 0 */ + uint32_t sched_call_date; /* current task/tasklet's call date (valid if sched_wake_date > 0) */ + struct sched_activity *sched_profile_entry; /* profile entry in use by the current task/tasklet, only if sched_wake_date>0 */ + + uint64_t prev_cpu_time; /* previous per thread CPU time */ + uint64_t prev_mono_time; /* previous system wide monotonic time */ + + struct eb_root rqueue_shared; /* run queue fed by other threads */ + __decl_thread(HA_SPINLOCK_T rqsh_lock); /* lock protecting the shared runqueue */ + + struct freq_ctr out_32bps; /* #of 32-byte blocks emitted per second */ + uint running_checks; /* number of health checks currently running on this thread */ + + unsigned long long out_bytes; /* total #of bytes emitted */ + unsigned long long spliced_out_bytes; /* total #of bytes emitted though a kernel pipe */ + struct buffer *thread_dump_buffer; /* NULL out of dump, valid during a dump, 0x01 once done */ + + ALWAYS_ALIGN(128); +}; + + +#endif /* _HAPROXY_TINFO_T_H */ diff --git a/include/haproxy/tinfo.h b/include/haproxy/tinfo.h new file mode 100644 index 0000000..ddb26aa --- /dev/null +++ b/include/haproxy/tinfo.h @@ -0,0 +1,120 @@ +/* + * include/haproxy/tinfo.h + * Export of ha_thread_info[] and ti pointer. + * + * Copyright (C) 2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_TINFO_H +#define _HAPROXY_TINFO_H + +#include <haproxy/api.h> +#include <haproxy/tinfo-t.h> +#include <haproxy/intops.h> + +/* the structs are in thread.c */ +extern struct tgroup_info ha_tgroup_info[MAX_TGROUPS]; +extern THREAD_LOCAL const struct tgroup_info *tg; + +extern struct thread_info ha_thread_info[MAX_THREADS]; +extern THREAD_LOCAL const struct thread_info *ti; /* thread_info for the current thread */ + +extern struct tgroup_ctx ha_tgroup_ctx[MAX_TGROUPS]; +extern THREAD_LOCAL struct tgroup_ctx *tg_ctx; /* ha_tgroup_ctx for the current thread */ + +extern struct thread_ctx ha_thread_ctx[MAX_THREADS]; +extern THREAD_LOCAL struct thread_ctx *th_ctx; /* ha_thread_ctx for the current thread */ + +/* returns the number of threads set in set <ts>. */ +static inline int thread_set_count(const struct thread_set *ts) +{ + int i, n; + + /* iterating over tgroups guarantees to visit all possible threads, the + * opposite is not true. + */ + for (i = n = 0; i < MAX_TGROUPS; i++) + n += my_popcountl(ts->rel[i]); + return n; +} + +/* returns zero if the thread set <ts> has at least one thread set, + * otherwise non-zero. + */ +static inline int thread_set_is_empty(const struct thread_set *ts) +{ + int i; + + /* iterating over tgroups guarantees to visit all possible threads, the + * opposite is not true. + */ + for (i = 0; i < MAX_TGROUPS; i++) + if (ts->rel[i]) + return 0; + return 1; +} + +/* returns the number starting at 1 of the <n>th thread-group set in thread set + * <ts>, or zero if the set is empty or if thread numbers are only absolute. + * <n> starts at zero and corresponds to the number of non-empty groups to be + * skipped (i.e. 0 returns the first one). + */ +static inline int thread_set_nth_group(const struct thread_set *ts, int n) +{ + int i; + + if (ts->grps) { + for (i = 0; i < MAX_TGROUPS; i++) + if (ts->rel[i] && !n--) + return i + 1; + } + return 0; +} + +/* returns the thread mask of the <n>th assigned thread-group in the thread + * set <ts> for relative sets, the first thread mask at all in case of absolute + * sets, or zero if the set is empty. This is only used temporarily to ease the + * transition. <n> starts at zero and corresponds to the number of non-empty + * groups to be skipped (i.e. 0 returns the first one). + */ +static inline ulong thread_set_nth_tmask(const struct thread_set *ts, int n) +{ + int i; + + if (ts->grps) { + for (i = 0; i < MAX_TGROUPS; i++) + if (ts->rel[i] && !n--) + return ts->rel[i]; + } + return ts->abs[0]; +} + +/* Pins the thread set to the specified thread mask on group 1 (use ~0UL for + * all threads). This is for compatibility with some rare legacy code. If a + * "thread" directive on a bind line is parsed, this one will be overwritten. + */ +static inline void thread_set_pin_grp1(struct thread_set *ts, ulong mask) +{ + int i; + + ts->grps = 1; + ts->rel[0] = mask; + for (i = 1; i < MAX_TGROUPS; i++) + ts->rel[i] = 0; +} + +#endif /* _HAPROXY_TINFO_H */ diff --git a/include/haproxy/tools-t.h b/include/haproxy/tools-t.h new file mode 100644 index 0000000..32d8193 --- /dev/null +++ b/include/haproxy/tools-t.h @@ -0,0 +1,166 @@ +/* + * include/haproxy/tools-t.h + * This files contains some general purpose macros and structures. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_TOOLS_T_H +#define _HAPROXY_TOOLS_T_H + +/* size used for max length of decimal representation of long long int. */ +#define NB_LLMAX_STR (sizeof("-9223372036854775807")-1) + +/* number of itoa_str entries */ +#define NB_ITOA_STR 16 + +/* maximum quoted string length (truncated above) */ +#define QSTR_SIZE 200 +#define NB_QSTR 10 + +/* returns 1 only if only zero or one bit is set in X, which means that X is a + * power of 2, and 0 otherwise */ +#define POWEROF2(x) (((x) & ((x)-1)) == 0) + +/* return an integer of type <ret> with only the highest bit set. <ret> may be + * both a variable or a type. + */ +#define MID_RANGE(ret) ((typeof(ret))1 << (8*sizeof(ret) - 1)) + +/* return the largest possible integer of type <ret>, with all bits set */ +#define MAX_RANGE(ret) (~(typeof(ret))0) + +/* DEFNULL() returns either the argument as-is, or NULL if absent. This is for + * use in macros arguments. + */ +#define DEFNULL(...) _FIRST_ARG(NULL, ##__VA_ARGS__, NULL) +#define _FIRST_ARG(a, b, ...) b + +/* options flags for parse_line() */ +#define PARSE_OPT_SHARP 0x00000001 // '#' ends the line +#define PARSE_OPT_BKSLASH 0x00000002 // '\' escapes chars +#define PARSE_OPT_SQUOTE 0x00000004 // "'" encloses a string +#define PARSE_OPT_DQUOTE 0x00000008 // '"' encloses a string +#define PARSE_OPT_ENV 0x00000010 // '$' is followed by environment variables +#define PARSE_OPT_INPLACE 0x00000020 // parse and tokenize in-place (src == dst) +#define PARSE_OPT_WORD_EXPAND 0x00000040 // '[*]' suffix to expand an environment variable as several individual arguments + +/* return error flags from parse_line() */ +#define PARSE_ERR_TOOLARGE 0x00000001 // result is too large for initial outlen +#define PARSE_ERR_TOOMANY 0x00000002 // more words than initial nbargs +#define PARSE_ERR_QUOTE 0x00000004 // unmatched quote (offending one at errptr) +#define PARSE_ERR_BRACE 0x00000008 // unmatched brace (offending one at errptr) +#define PARSE_ERR_HEX 0x00000010 // unparsable hex sequence (at errptr) +#define PARSE_ERR_VARNAME 0x00000020 // invalid variable name (at errptr) +#define PARSE_ERR_OVERLAP 0x00000040 // output overlaps with input, need to allocate +#define PARSE_ERR_WRONG_EXPAND 0x00000080 // unparsable word expansion sequence + +/* special return values for the time parser (parse_time_err()) */ +#define PARSE_TIME_UNDER ((char *)1) +#define PARSE_TIME_OVER ((char *)2) + +/* unit flags to pass to parse_time_err() */ +#define TIME_UNIT_US 0x0000 +#define TIME_UNIT_MS 0x0001 +#define TIME_UNIT_S 0x0002 +#define TIME_UNIT_MIN 0x0003 +#define TIME_UNIT_HOUR 0x0004 +#define TIME_UNIT_DAY 0x0005 +#define TIME_UNIT_MASK 0x0007 + +#define SEC 1 +#define MINUTE (60 * SEC) +#define HOUR (60 * MINUTE) +#define DAY (24 * HOUR) + +/* Address parsing options for use with str2sa_range() */ +#define PA_O_RESOLVE 0x00000001 /* do resolve the FQDN to an IP address */ +#define PA_O_PORT_OK 0x00000002 /* ports are supported */ +#define PA_O_PORT_MAND 0x00000004 /* ports are mandatory */ +#define PA_O_PORT_RANGE 0x00000008 /* port ranges are supported */ +#define PA_O_PORT_OFS 0x00000010 /* port offsets are supported */ +#define PA_O_SOCKET_FD 0x00000020 /* inherited socket FDs are supported */ +#define PA_O_RAW_FD 0x00000040 /* inherited raw FDs are supported (pipes, ttys, ...) */ +#define PA_O_DGRAM 0x00000080 /* the address can be used for a datagram socket (in or out) */ +#define PA_O_STREAM 0x00000100 /* the address can be used for streams (in or out) */ +#define PA_O_XPRT 0x00000200 /* transport protocols may be specified */ +#define PA_O_CONNECT 0x00000400 /* the protocol must have a ->connect method */ +#define PA_O_DEFAULT_DGRAM 0x00000800 /* by default, this address will be used for a datagram socket */ + +/* UTF-8 decoder status */ +#define UTF8_CODE_OK 0x00 +#define UTF8_CODE_OVERLONG 0x10 +#define UTF8_CODE_INVRANGE 0x20 +#define UTF8_CODE_BADSEQ 0x40 + +/* HAP_STRING() makes a string from a literal while HAP_XSTRING() first + * evaluates the argument and is suited to pass macros. + * + * They allow macros like PCRE_MAJOR to be defined without quotes, which + * is convenient for applications that want to test its value. + */ +#define HAP_STRING(...) #__VA_ARGS__ +#define HAP_XSTRING(...) HAP_STRING(__VA_ARGS__) + +/* operators to compare values. They're ordered that way so that the lowest bit + * serves as a negation for the test and contains all tests that are not equal. + */ +enum { + STD_OP_LE = 0, STD_OP_GT = 1, + STD_OP_EQ = 2, STD_OP_NE = 3, + STD_OP_GE = 4, STD_OP_LT = 5, +}; + +enum http_scheme { + SCH_HTTP, + SCH_HTTPS, +}; + +/* output format used by url2sa() */ +struct split_url { + enum http_scheme scheme; + const char *host; + int host_len; +}; + +/* generic structure associating a name and a value, for use in arrays */ +struct name_desc { + const char *name; + const char *desc; +}; + +struct net_addr { + int family; /* AF_INET or AF_INET6 if defined, AF_UNSET if undefined */ + union { + struct { + struct in_addr ip; + struct in_addr mask; + } v4; + struct { + struct in6_addr ip; + struct in6_addr mask; + } v6; + } addr; +}; + +/* holds socket and xprt types for a given address */ +struct net_addr_type { + int proto_type; // socket layer + int xprt_type; // transport layer +}; + +#endif /* _HAPROXY_TOOLS_T_H */ diff --git a/include/haproxy/tools.h b/include/haproxy/tools.h new file mode 100644 index 0000000..3726f63 --- /dev/null +++ b/include/haproxy/tools.h @@ -0,0 +1,1179 @@ +/* + * include/haproxy/tools.h + * This files contains some general purpose functions and macros. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_TOOLS_H +#define _HAPROXY_TOOLS_H + +#ifdef USE_BACKTRACE +#define _GNU_SOURCE +#include <execinfo.h> +#endif + +#include <string.h> +#include <stdio.h> +#include <time.h> +#include <stdarg.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <import/eb32sctree.h> +#include <import/eb32tree.h> +#include <haproxy/api.h> +#include <haproxy/chunk.h> +#include <haproxy/intops.h> +#include <haproxy/namespace-t.h> +#include <haproxy/protocol-t.h> +#include <haproxy/tools-t.h> + +/****** string-specific macros and functions ******/ +/* if a > max, then bound <a> to <max>. The macro returns the new <a> */ +#define UBOUND(a, max) ({ typeof(a) b = (max); if ((a) > b) (a) = b; (a); }) + +/* if a < min, then bound <a> to <min>. The macro returns the new <a> */ +#define LBOUND(a, min) ({ typeof(a) b = (min); if ((a) < b) (a) = b; (a); }) + +#define SWAP(a, b) do { typeof(a) t; t = a; a = b; b = t; } while(0) + +/* use if you want to return a simple hash. Key 0 doesn't hash. */ +#define HA_ANON_STR(key, str) hash_anon(key, str, "", "") + +/* use if you want to return a hash like : ID('hash'). Key 0 doesn't hash. */ +#define HA_ANON_ID(key, str) hash_anon(key, str, "ID(", ")") + +/* use if you want to return a hash like : PATH('hash'). Key 0 doesn't hash. */ +#define HA_ANON_PATH(key, str) hash_anon(key, str, "PATH(", ")") + +/* use only in a function that contains an appctx (key comes from appctx). */ +#define HA_ANON_CLI(str) hash_anon(appctx->cli_anon_key, str, "", "") + + +/* + * copies at most <size-1> chars from <src> to <dst>. Last char is always + * set to 0, unless <size> is 0. The number of chars copied is returned + * (excluding the terminating zero). + * This code has been optimized for size and speed : on x86, it's 45 bytes + * long, uses only registers, and consumes only 4 cycles per char. + */ +extern int strlcpy2(char *dst, const char *src, int size); + +/* + * This function simply returns a locally allocated string containing + * the ascii representation for number 'n' in decimal. + */ +extern THREAD_LOCAL int itoa_idx; /* index of next itoa_str to use */ +extern THREAD_LOCAL char itoa_str[][171]; +extern int build_is_static; +extern char *ultoa_r(unsigned long n, char *buffer, int size); +extern char *lltoa_r(long long int n, char *buffer, int size); +extern char *sltoa_r(long n, char *buffer, int size); +extern const char *ulltoh_r(unsigned long long n, char *buffer, int size); +size_t flt_trim(char *buffer, size_t num_start, size_t len); +char *ftoa_r(double n, char *buffer, int size); +static inline const char *ultoa(unsigned long n) +{ + return ultoa_r(n, itoa_str[0], sizeof(itoa_str[0])); +} + +/* + * unsigned long long ASCII representation + * + * return the last char '\0' or NULL if no enough + * space in dst + */ +char *ulltoa(unsigned long long n, char *dst, size_t size); + + +/* + * unsigned long ASCII representation + * + * return the last char '\0' or NULL if no enough + * space in dst + */ +char *ultoa_o(unsigned long n, char *dst, size_t size); + +/* + * signed long ASCII representation + * + * return the last char '\0' or NULL if no enough + * space in dst + */ +char *ltoa_o(long int n, char *dst, size_t size); + +/* + * signed long long ASCII representation + * + * return the last char '\0' or NULL if no enough + * space in dst + */ +char *lltoa(long long n, char *dst, size_t size); + +/* + * write a ascii representation of a unsigned into dst, + * return a pointer to the last character + * Pad the ascii representation with '0', using size. + */ +char *utoa_pad(unsigned int n, char *dst, size_t size); + +/* + * This function simply returns a locally allocated string containing the ascii + * representation for number 'n' in decimal, unless n is 0 in which case it + * returns the alternate string (or an empty string if the alternate string is + * NULL). It use is intended for limits reported in reports, where it's + * desirable not to display anything if there is no limit. Warning! it shares + * the same vector as ultoa_r(). + */ +extern const char *limit_r(unsigned long n, char *buffer, int size, const char *alt); + +/* returns a locally allocated string containing the ASCII representation of + * the number 'n' in decimal. Up to NB_ITOA_STR calls may be used in the same + * function call (eg: printf), shared with the other similar functions making + * use of itoa_str[]. + */ +static inline const char *U2A(unsigned long n) +{ + const char *ret = ultoa_r(n, itoa_str[itoa_idx], sizeof(itoa_str[0])); + if (++itoa_idx >= NB_ITOA_STR) + itoa_idx = 0; + return ret; +} + +/* returns a locally allocated string containing the HTML representation of + * the number 'n' in decimal. Up to NB_ITOA_STR calls may be used in the same + * function call (eg: printf), shared with the other similar functions making + * use of itoa_str[]. + */ +static inline const char *U2H(unsigned long long n) +{ + const char *ret = ulltoh_r(n, itoa_str[itoa_idx], sizeof(itoa_str[0])); + if (++itoa_idx >= NB_ITOA_STR) + itoa_idx = 0; + return ret; +} + +/* returns a locally allocated string containing the ASCII representation of + * the number 'n' in decimal. Up to NB_ITOA_STR calls may be used in the same + * function call (eg: printf), shared with the other similar functions making + * use of itoa_str[]. + */ +static inline const char *F2A(double n) +{ + const char *ret = ftoa_r(n, itoa_str[itoa_idx], sizeof(itoa_str[0])); + if (++itoa_idx >= NB_ITOA_STR) + itoa_idx = 0; + return ret; +} + +/* returns a locally allocated string containing the HTML representation of + * the number 'n' in decimal. Up to NB_ITOA_STR calls may be used in the same + * function call (eg: printf), shared with the other similar functions making + * use of itoa_str[]. + */ +static inline const char *F2H(double n) +{ + const char *ret = ftoa_r(n, itoa_str[itoa_idx], sizeof(itoa_str[0])); + if (++itoa_idx >= NB_ITOA_STR) + itoa_idx = 0; + return ret; +} + +/* returns a locally allocated string containing the ASCII representation of + * the number 'n' in decimal. Up to NB_ITOA_STR calls may be used in the same + * function call (eg: printf), shared with the other similar functions making + * use of itoa_str[]. + */ +static inline const char *LIM2A(unsigned long n, const char *alt) +{ + const char *ret = limit_r(n, itoa_str[itoa_idx], sizeof(itoa_str[0]), alt); + if (++itoa_idx >= NB_ITOA_STR) + itoa_idx = 0; + return ret; +} + +/* returns a locally allocated string containing the quoted encoding of the + * input string. The output may be truncated to QSTR_SIZE chars, but it is + * guaranteed that the string will always be properly terminated. Quotes are + * encoded by doubling them as is commonly done in CSV files. QSTR_SIZE must + * always be at least 4 chars. + */ +const char *qstr(const char *str); + +/* returns <str> or its quote-encoded equivalent if it contains at least one + * quote or a comma. This is aimed at build CSV-compatible strings. + */ +static inline const char *cstr(const char *str) +{ + const char *p = str; + + while (*p) { + if (*p == ',' || *p == '"') + return qstr(str); + p++; + } + return str; +} + +/* + * Returns non-zero if character <s> is a hex digit (0-9, a-f, A-F), else zero. + */ +extern int ishex(char s); + +/* + * Checks <name> for invalid characters. Valid chars are [A-Za-z0-9_:.-]. If an + * invalid character is found, a pointer to it is returned. If everything is + * fine, NULL is returned. + */ +extern const char *invalid_char(const char *name); + +/* + * Checks <name> for invalid characters. Valid chars are [A-Za-z0-9_.-]. + * If an invalid character is found, a pointer to it is returned. + * If everything is fine, NULL is returned. + */ +extern const char *invalid_domainchar(const char *name); + +/* + * Checks <name> for invalid characters. Valid chars are [A-Za-z_.-]. + * If an invalid character is found, a pointer to it is returned. + * If everything is fine, NULL is returned. + */ +extern const char *invalid_prefix_char(const char *name); + +/* returns true if <c> is an identifier character, that is, a digit, a letter, + * or '-', '+', '_', ':' or '.'. This is usable for proxy names, server names, + * ACL names, sample fetch names, and converter names. + */ +static inline int is_idchar(char c) +{ + return isalnum((unsigned char)c) || + c == '.' || c == '_' || c == '-' || c == '+' || c == ':'; +} + +/* + * converts <str> to a locally allocated struct sockaddr_storage *, and a + * port range consisting in two integers. The low and high end are always set + * even if the port is unspecified, in which case (0,0) is returned. The low + * port is set in the sockaddr. Thus, it is enough to check the size of the + * returned range to know if an array must be allocated or not. The format is + * "addr[:[port[-port]]]", where "addr" can be a dotted IPv4 address, an IPv6 + * address, a host name, or empty or "*" to indicate INADDR_ANY. If an IPv6 + * address wants to ignore port, it must be terminated by a trailing colon (':'). + * The IPv6 '::' address is IN6ADDR_ANY, so in order to bind to a given port on + * IPv6, use ":::port". NULL is returned if the host part cannot be resolved. + * If <pfx> is non-null, it is used as a string prefix before any path-based + * address (typically the path to a unix socket). + */ +struct sockaddr_storage *str2sa_range(const char *str, int *port, int *low, int *high, int *fd, + struct protocol **proto, struct net_addr_type *sa_type, + char **err, const char *pfx, char **fqdn, unsigned int opts); + + +/* converts <addr> and <port> into a string representation of the address and port. This is sort + * of an inverse of str2sa_range, with some restrictions. The supported families are AF_INET, + * AF_INET6, AF_UNIX, and AF_CUST_SOCKPAIR. If the family is unsopported NULL is returned. + * If map_ports is true, then the sign of the port is included in the output, to indicate it is + * relative to the incoming port. AF_INET and AF_INET6 will be in the form "<addr>:<port>". + * AF_UNIX will either be just the path (if using a pathname) or "abns@<path>" if it is abstract. + * AF_CUST_SOCKPAIR will be of the form "sockpair@<fd>". + * + * The returned char* is allocated, and it is the responsibility of the caller to free it. + */ +char *sa2str(const struct sockaddr_storage *addr, int port, int map_ports); + +/* converts <str> to a struct in_addr containing a network mask. It can be + * passed in dotted form (255.255.255.0) or in CIDR form (24). It returns 1 + * if the conversion succeeds otherwise zero. + */ +int str2mask(const char *str, struct in_addr *mask); + +/* converts <str> to a struct in6_addr containing a network mask. It can be + * passed in quadruplet form (ffff:ffff::) or in CIDR form (64). It returns 1 + * if the conversion succeeds otherwise zero. + */ +int str2mask6(const char *str, struct in6_addr *mask); + +/* convert <cidr> to struct in_addr <mask>. It returns 1 if the conversion + * succeeds otherwise non-zero. + */ +int cidr2dotted(int cidr, struct in_addr *mask); + +/* + * converts <str> to two struct in_addr* which must be pre-allocated. + * The format is "addr[/mask]", where "addr" cannot be empty, and mask + * is optional and either in the dotted or CIDR notation. + * Note: "addr" can also be a hostname. Returns 1 if OK, 0 if error. + */ +int str2net(const char *str, int resolve, struct in_addr *addr, struct in_addr *mask); + +/* str2ip and str2ip2: + * + * converts <str> to a struct sockaddr_storage* provided by the caller. The + * caller must have zeroed <sa> first, and may have set sa->ss_family to force + * parse a specific address format. If the ss_family is 0 or AF_UNSPEC, then + * the function tries to guess the address family from the syntax. If the + * family is forced and the format doesn't match, an error is returned. The + * string is assumed to contain only an address, no port. The address can be a + * dotted IPv4 address, an IPv6 address, a host name, or empty or "*" to + * indicate INADDR_ANY. NULL is returned if the host part cannot be resolved. + * The return address will only have the address family and the address set, + * all other fields remain zero. The string is not supposed to be modified. + * The IPv6 '::' address is IN6ADDR_ANY. + * + * str2ip2: + * + * If <resolve> is set, this function try to resolve DNS, otherwise, it returns + * NULL result. + */ +struct sockaddr_storage *str2ip2(const char *str, struct sockaddr_storage *sa, int resolve); +static inline struct sockaddr_storage *str2ip(const char *str, struct sockaddr_storage *sa) +{ + return str2ip2(str, sa, 1); +} + +/* + * converts <str> to two struct in6_addr* which must be pre-allocated. + * The format is "addr[/mask]", where "addr" cannot be empty, and mask + * is an optional number of bits (128 being the default). + * Returns 1 if OK, 0 if error. + */ +int str62net(const char *str, struct in6_addr *addr, unsigned char *mask); + +/* + * Parse IP address found in url. + */ +int url2ipv4(const char *addr, struct in_addr *dst); + +/* + * Resolve destination server from URL. Convert <str> to a sockaddr_storage*. + */ +int url2sa(const char *url, int ulen, struct sockaddr_storage *addr, struct split_url *out); + +/* Tries to convert a sockaddr_storage address to text form. Upon success, the + * address family is returned so that it's easy for the caller to adapt to the + * output format. Zero is returned if the address family is not supported. -1 + * is returned upon error, with errno set. AF_INET, AF_INET6 and AF_UNIX are + * supported. + */ +int addr_to_str(const struct sockaddr_storage *addr, char *str, int size); + +/* Tries to convert a sockaddr_storage port to text form. Upon success, the + * address family is returned so that it's easy for the caller to adapt to the + * output format. Zero is returned if the address family is not supported. -1 + * is returned upon error, with errno set. AF_INET, AF_INET6 and AF_UNIX are + * supported. + */ +int port_to_str(const struct sockaddr_storage *addr, char *str, int size); + +/* check if the given address is local to the system or not. It will return + * -1 when it's not possible to know, 0 when the address is not local, 1 when + * it is. We don't want to iterate over all interfaces for this (and it is not + * portable). So instead we try to bind in UDP to this address on a free non + * privileged port and to connect to the same address, port 0 (connect doesn't + * care). If it succeeds, we own the address. Note that non-inet addresses are + * considered local since they're most likely AF_UNIX. + */ +int addr_is_local(const struct netns_entry *ns, + const struct sockaddr_storage *orig); + +/* will try to encode the string <string> replacing all characters tagged in + * <map> with the hexadecimal representation of their ASCII-code (2 digits) + * prefixed by <escape>, and will store the result between <start> (included) + * and <stop> (excluded), and will always terminate the string with a '\0' + * before <stop>. The position of the '\0' is returned if the conversion + * completes. If bytes are missing between <start> and <stop>, then the + * conversion will be incomplete and truncated. If <stop> <= <start>, the '\0' + * cannot even be stored so we return <start> without writing the 0. + * The input string must also be zero-terminated. + */ +extern const char hextab[]; +extern long query_encode_map[]; +char *encode_string(char *start, char *stop, + const char escape, const long *map, + const char *string); + +/* + * Same behavior, except that it encodes chunk <chunk> instead of a string. + */ +char *encode_chunk(char *start, char *stop, + const char escape, const long *map, + const struct buffer *chunk); + +/* + * Tries to prefix characters tagged in the <map> with the <escape> + * character. The input <string> is processed until string_stop + * is reached or NULL-byte is encountered. The result will + * be stored between <start> (included) and <stop> (excluded). This + * function will always try to terminate the resulting string with a '\0' + * before <stop>, and will return its position if the conversion + * completes. + */ +char *escape_string(char *start, char *stop, + const char escape, const long *map, + const char *string, const char *string_stop); + +/* Check a string for using it in a CSV output format. If the string contains + * one of the following four char <">, <,>, CR or LF, the string is + * encapsulated between <"> and the <"> are escaped by a <""> sequence. + * <str> is the input string to be escaped. The function assumes that + * the input string is null-terminated. + * + * If <quote> is 0, the result is returned escaped but without double quote. + * It is useful if the escaped string is used between double quotes in the + * format. + * + * printf("..., \"%s\", ...\r\n", csv_enc(str, 0, 0, &trash)); + * + * If <quote> is 1, the converter puts the quotes only if any character is + * escaped. If <quote> is 2, the converter always puts the quotes. + * + * If <oneline> is not 0, CRs are skipped and LFs are replaced by spaces. + * This re-format multi-lines strings to only one line. The purpose is to + * allow a line by line parsing but also to keep the output compliant with + * the CLI witch uses LF to defines the end of the response. + * + * If <oneline> is 2, In addition to previous action, the trailing spaces are + * removed. + * + * <output> is a struct chunk used for storing the output string. + * + * The function returns the converted string on its output. If an error + * occurs, the function returns an empty string. This type of output is useful + * for using the function directly as printf() argument. + * + * If the output buffer is too short to contain the input string, the result + * is truncated. + * + * This function appends the encoding to the existing output chunk. Please + * use csv_enc() instead if you want to replace the output chunk. + */ +const char *csv_enc_append(const char *str, int quote, int online, + struct buffer *output); + +/* same as above but the output chunk is reset first */ +static inline const char *csv_enc(const char *str, int quote, int oneline, + struct buffer *output) +{ + chunk_reset(output); + return csv_enc_append(str, quote, oneline, output); +} + +/* Decode an URL-encoded string in-place. The resulting string might + * be shorter. If some forbidden characters are found, the conversion is + * aborted, the string is truncated before the issue and non-zero is returned, + * otherwise the operation returns non-zero indicating success. + * If the 'in_form' argument is non-nul the string is assumed to be part of + * an "application/x-www-form-urlencoded" encoded string, and the '+' will be + * turned to a space. If it's zero, this will only be done after a question + * mark ('?'). + */ +int url_decode(char *string, int in_form); + +unsigned int inetaddr_host(const char *text); +unsigned int inetaddr_host_lim(const char *text, const char *stop); +unsigned int inetaddr_host_lim_ret(char *text, char *stop, char **ret); + +/* Function that hashes or not a string according to the anonymizing key (scramble). */ +const char *hash_anon(uint32_t scramble, const char *string2hash, const char *prefix, const char *suffix); + +/* Function that hashes or not an ip according to the ipstring entered */ +const char * hash_ipanon(uint32_t scramble, char *ipstring, int hasport); + +static inline char *cut_crlf(char *s) { + + while (*s != '\r' && *s != '\n') { + char *p = s++; + + if (!*p) + return p; + } + + *s++ = '\0'; + + return s; +} + +static inline char *ltrim(char *s, char c) { + + if (c) + while (*s == c) + s++; + + return s; +} + +static inline char *rtrim(char *s, char c) { + + char *p = s + strlen(s); + + while (p-- > s) + if (*p == c) + *p = '\0'; + else + break; + + return s; +} + +static inline char *alltrim(char *s, char c) { + + rtrim(s, c); + + return ltrim(s, c); +} + +/* This function converts the time_t value <now> into a broken out struct tm + * which must be allocated by the caller. It is highly recommended to use this + * function instead of localtime() because that one requires a time_t* which + * is not always compatible with tv_sec depending on OS/hardware combinations. + */ +static inline void get_localtime(const time_t now, struct tm *tm) +{ + localtime_r(&now, tm); +} + +/* This function converts the time_t value <now> into a broken out struct tm + * which must be allocated by the caller. It is highly recommended to use this + * function instead of gmtime() because that one requires a time_t* which + * is not always compatible with tv_sec depending on OS/hardware combinations. + */ +static inline void get_gmtime(const time_t now, struct tm *tm) +{ + gmtime_r(&now, tm); +} + +/* Counts a number of elapsed days since 01/01/0000 based solely on elapsed + * years and assuming the regular rule for leap years applies. It's fake but + * serves as a temporary origin. It's worth remembering that it's the first + * year of each period that is leap and not the last one, so for instance year + * 1 sees 366 days since year 0 was leap. For this reason we have to apply + * modular arithmetic which is why we offset the year by 399 before + * subtracting the excess at the end. No overflow here before ~11.7 million + * years. + */ +static inline unsigned int days_since_zero(unsigned int y) +{ + return y * 365 + (y + 399) / 4 - (y + 399) / 100 + (y + 399) / 400 + - 399 / 4 + 399 / 100; +} + +/* Returns the number of seconds since 01/01/1970 0:0:0 GMT for GMT date <tm>. + * It is meant as a portable replacement for timegm() for use with valid inputs. + * Returns undefined results for invalid dates (eg: months out of range 0..11). + */ +extern time_t my_timegm(const struct tm *tm); + +/* This function parses a time value optionally followed by a unit suffix among + * "d", "h", "m", "s", "ms" or "us". It converts the value into the unit + * expected by the caller. The computation does its best to avoid overflows. + * The value is returned in <ret> if everything is fine, and a NULL is returned + * by the function. In case of error, a pointer to the error is returned and + * <ret> is left untouched. + */ +extern const char *parse_time_err(const char *text, unsigned *ret, unsigned unit_flags); +extern const char *parse_size_err(const char *text, unsigned *ret); + +/* + * Parse binary string written in hexadecimal (source) and store the decoded + * result into binstr and set binstrlen to the length of binstr. Memory for + * binstr is allocated by the function. In case of error, returns 0 with an + * error message in err. + */ +int parse_binary(const char *source, char **binstr, int *binstrlen, char **err); + +/* copies at most <n> characters from <src> and always terminates with '\0' */ +char *my_strndup(const char *src, int n); + +/* + * search needle in haystack + * returns the pointer if found, returns NULL otherwise + */ +const void *my_memmem(const void *, size_t, const void *, size_t); + +/* get length of the initial segment consisting entirely of bytes within a given + * mask + */ +size_t my_memspn(const void *, size_t, const void *, size_t); + +/* get length of the initial segment consisting entirely of bytes not within a + * given mask + */ +size_t my_memcspn(const void *, size_t, const void *, size_t); + +/* This function returns the first unused key greater than or equal to <key> in + * ID tree <root>. Zero is returned if no place is found. + */ +unsigned int get_next_id(struct eb_root *root, unsigned int key); + +/* dump the full tree to <file> in DOT format for debugging purposes. Will + * optionally highlight node <subj> if found, depending on operation <op> : + * 0 : nothing + * >0 : insertion, node/leaf are surrounded in red + * <0 : removal, node/leaf are dashed with no background + * Will optionally add "desc" as a label on the graph if set and non-null. + */ +void eb32sc_to_file(FILE *file, struct eb_root *root, const struct eb32sc_node *subj, + int op, const char *desc); + +/* same but for ebmb */ +void ebmb_to_file(FILE *file, struct eb_root *root, const struct ebmb_node *subj, + int op, const char *desc); + +/* This function compares a sample word possibly followed by blanks to another + * clean word. The compare is case-insensitive. 1 is returned if both are equal, + * otherwise zero. This intends to be used when checking HTTP headers for some + * values. + */ +int word_match(const char *sample, int slen, const char *word, int wlen); + +/* Convert a fixed-length string to an IP address. Returns 0 in case of error, + * or the number of chars read in case of success. + */ +int buf2ip(const char *buf, size_t len, struct in_addr *dst); +int buf2ip6(const char *buf, size_t len, struct in6_addr *dst); + +/* To be used to quote config arg positions. Returns the string at <ptr> + * surrounded by simple quotes if <ptr> is valid and non-empty, or "end of line" + * if ptr is NULL or empty. The string is locally allocated. + */ +const char *quote_arg(const char *ptr); + +/* returns an operator among STD_OP_* for string <str> or < 0 if unknown */ +int get_std_op(const char *str); + +/* sets the address family to AF_UNSPEC so that is_addr() does not match */ +static inline void clear_addr(struct sockaddr_storage *addr) +{ + addr->ss_family = AF_UNSPEC; +} + +/* returns non-zero if addr has a valid and non-null IPv4 or IPv6 address, + * otherwise zero. + */ +static inline int is_inet_addr(const struct sockaddr_storage *addr) +{ + int i; + + switch (addr->ss_family) { + case AF_INET: + return *(int *)&((struct sockaddr_in *)addr)->sin_addr; + case AF_INET6: + for (i = 0; i < sizeof(struct in6_addr) / sizeof(int); i++) + if (((int *)&((struct sockaddr_in6 *)addr)->sin6_addr)[i] != 0) + return ((int *)&((struct sockaddr_in6 *)addr)->sin6_addr)[i]; + } + return 0; +} + +/* returns non-zero if addr has a valid and non-null IPv4 or IPv6 address, + * or is a unix address, otherwise returns zero. + */ +static inline int is_addr(const struct sockaddr_storage *addr) +{ + if (addr->ss_family == AF_UNIX || addr->ss_family == AF_CUST_SOCKPAIR) + return 1; + else + return is_inet_addr(addr); +} + +/* returns port in network byte order */ +static inline int get_net_port(const struct sockaddr_storage *addr) +{ + switch (addr->ss_family) { + case AF_INET: + return ((struct sockaddr_in *)addr)->sin_port; + case AF_INET6: + return ((struct sockaddr_in6 *)addr)->sin6_port; + } + return 0; +} + +/* returns port in host byte order */ +static inline int get_host_port(const struct sockaddr_storage *addr) +{ + switch (addr->ss_family) { + case AF_INET: + return ntohs(((struct sockaddr_in *)addr)->sin_port); + case AF_INET6: + return ntohs(((struct sockaddr_in6 *)addr)->sin6_port); + } + return 0; +} + +/* returns address len for <addr>'s family, 0 for unknown families */ +static inline int get_addr_len(const struct sockaddr_storage *addr) +{ + switch (addr->ss_family) { + case AF_INET: + return sizeof(struct sockaddr_in); + case AF_INET6: + return sizeof(struct sockaddr_in6); + case AF_UNIX: + return sizeof(struct sockaddr_un); + } + return 0; +} + +/* set port in host byte order */ +static inline int set_net_port(struct sockaddr_storage *addr, int port) +{ + switch (addr->ss_family) { + case AF_INET: + ((struct sockaddr_in *)addr)->sin_port = port; + break; + case AF_INET6: + ((struct sockaddr_in6 *)addr)->sin6_port = port; + break; + } + return 0; +} + +/* set port in network byte order */ +static inline int set_host_port(struct sockaddr_storage *addr, int port) +{ + switch (addr->ss_family) { + case AF_INET: + ((struct sockaddr_in *)addr)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); + break; + } + return 0; +} + +/* Convert mask from bit length form to in_addr form. + * This function never fails. + */ +void len2mask4(int len, struct in_addr *addr); + +/* Convert mask from bit length form to in6_addr form. + * This function never fails. + */ +void len2mask6(int len, struct in6_addr *addr); + +/* Return true if IPv4 address is part of the network */ +extern int in_net_ipv4(const void *addr, const struct in_addr *mask, const struct in_addr *net); + +/* Return true if IPv6 address is part of the network */ +extern int in_net_ipv6(const void *addr, const struct in6_addr *mask, const struct in6_addr *net); + +/* Map IPv4 address on IPv6 address, as specified in RFC 3513. */ +extern void v4tov6(struct in6_addr *sin6_addr, struct in_addr *sin_addr); + +/* Map IPv6 address on IPv4 address, as specified in RFC 3513. + * Return true if conversion is possible and false otherwise. + */ +extern int v6tov4(struct in_addr *sin_addr, struct in6_addr *sin6_addr); + +/* compare two struct sockaddr_storage, including port if <check_port> is true, + * and return: + * 0 (true) if the addr is the same in both + * 1 (false) if the addr is not the same in both + * -1 (unable) if one of the addr is not AF_INET* + */ +int ipcmp(const struct sockaddr_storage *ss1, const struct sockaddr_storage *ss2, int check_port); + +/* compare a struct sockaddr_storage to a struct net_addr and return : + * 0 (true) if <addr> is matching <net> + * 1 (false) if <addr> is not matching <net> + * -1 (unable) if <addr> or <net> is not AF_INET* + */ +int ipcmp2net(const struct sockaddr_storage *addr, const struct net_addr *net); + +/* copy ip from <source> into <dest> + * the caller must clear <dest> before calling. + * Returns a pointer to the destination + */ +struct sockaddr_storage *ipcpy(const struct sockaddr_storage *source, struct sockaddr_storage *dest); + +char *human_time(int t, short hz_div); + +extern const char *monthname[]; + +/* date2str_log: write a date in the format : + * sprintf(str, "%02d/%s/%04d:%02d:%02d:%02d.%03d", + * tm.tm_mday, monthname[tm.tm_mon], tm.tm_year+1900, + * tm.tm_hour, tm.tm_min, tm.tm_sec, (int)date.tv_usec/1000); + * + * without using sprintf. return a pointer to the last char written (\0) or + * NULL if there isn't enough space. + */ +char *date2str_log(char *dest, const struct tm *tm, const struct timeval *date, size_t size); + +/* Return the GMT offset for a specific local time. + * Both t and tm must represent the same time. + * The string returned has the same format as returned by strftime(... "%z", tm). + * Offsets are kept in an internal cache for better performances. + */ +const char *get_gmt_offset(time_t t, struct tm *tm); + +/* gmt2str_log: write a date in the format : + * "%02d/%s/%04d:%02d:%02d:%02d +0000" without using snprintf + * return a pointer to the last char written (\0) or + * NULL if there isn't enough space. + */ +char *gmt2str_log(char *dst, struct tm *tm, size_t size); + +/* localdate2str_log: write a date in the format : + * "%02d/%s/%04d:%02d:%02d:%02d +0000(local timezone)" without using snprintf + * Both t and tm must represent the same time. + * return a pointer to the last char written (\0) or + * NULL if there isn't enough space. + */ +char *localdate2str_log(char *dst, time_t t, struct tm *tm, size_t size); + +/* These 3 functions parses date string and fills the + * corresponding broken-down time in <tm>. In success case, + * it returns 1, otherwise, it returns 0. + */ +int parse_http_date(const char *date, int len, struct tm *tm); +int parse_imf_date(const char *date, int len, struct tm *tm); +int parse_rfc850_date(const char *date, int len, struct tm *tm); +int parse_asctime_date(const char *date, int len, struct tm *tm); +int print_time_short(struct buffer *out, const char *pfx, uint64_t ns, const char *sfx); + +/* Dynamically allocates a string of the proper length to hold the formatted + * output. NULL is returned on error. The caller is responsible for freeing the + * memory area using free(). The resulting string is returned in <out> if the + * pointer is not NULL. A previous version of <out> might be used to build the + * new string, and it will be freed before returning if it is not NULL, which + * makes it possible to build complex strings from iterative calls without + * having to care about freeing intermediate values, as in the example below : + * + * memprintf(&err, "invalid argument: '%s'", arg); + * ... + * memprintf(&err, "parser said : <%s>\n", *err); + * ... + * free(*err); + * + * This means that <err> must be initialized to NULL before first invocation. + * The return value also holds the allocated string, which eases error checking + * and immediate consumption. If the output pointer is not used, NULL must be + * passed instead and it will be ignored. The returned message will then also + * be NULL so that the caller does not have to bother with freeing anything. + * + * It is also convenient to use it without any free except the last one : + * err = NULL; + * if (!fct1(err)) report(*err); + * if (!fct2(err)) report(*err); + * if (!fct3(err)) report(*err); + * free(*err); + * + * memprintf relies on memvprintf. This last version can be called from any + * function with variadic arguments. + */ +char *memvprintf(char **out, const char *format, va_list args) + __attribute__ ((format(printf, 2, 0))); + +char *memprintf(char **out, const char *format, ...) + __attribute__ ((format(printf, 2, 3))); + +/* Used to add <level> spaces before each line of <out>, unless there is only one line. + * The input argument is automatically freed and reassigned. The result will have to be + * freed by the caller. + * Example of use : + * parse(cmd, &err); (callee: memprintf(&err, ...)) + * fprintf(stderr, "Parser said: %s\n", indent_error(&err)); + * free(err); + */ +char *indent_msg(char **out, int level); +int append_prefixed_str(struct buffer *out, const char *in, const char *pfx, char eol, int first); + +/* removes environment variable <name> from the environment as found in + * environ. This is only provided as an alternative for systems without + * unsetenv() (old Solaris and AIX versions). THIS IS NOT THREAD SAFE. + * The principle is to scan environ for each occurrence of variable name + * <name> and to replace the matching pointers with the last pointer of + * the array (since variables are not ordered). + * It always returns 0 (success). + */ +int my_unsetenv(const char *name); + +/* Convert occurrences of environment variables in the input string to their + * corresponding value. A variable is identified as a series of alphanumeric + * characters or underscores following a '$' sign. The <in> string must be + * free()able. NULL returns NULL. The resulting string might be reallocated if + * some expansion is made. + */ +char *env_expand(char *in); +uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbargs, uint32_t opts, const char **errptr); +ssize_t read_line_to_trash(const char *path_fmt, ...); +size_t sanitize_for_printing(char *line, size_t pos, size_t width); +void update_word_fingerprint(uint8_t *fp, const char *word); +void make_word_fingerprint(uint8_t *fp, const char *word); +int word_fingerprint_distance(const uint8_t *fp1, const uint8_t *fp2); + +/* debugging macro to emit messages using write() on fd #-1 so that strace sees + * them. + */ +#define fddebug(msg...) do { char *_m = NULL; memprintf(&_m, ##msg); if (_m) write(-1, _m, strlen(_m)); free(_m); } while (0) + +/* displays a <len> long memory block at <buf>, assuming first byte of <buf> + * has address <baseaddr>. String <pfx> may be placed as a prefix in front of + * each line. It may be NULL if unused. The output is emitted to file <out>. + */ +void debug_hexdump(FILE *out, const char *pfx, const char *buf, unsigned int baseaddr, int len); + +/* this is used to emit call traces when building with TRACE=1 */ +__attribute__((format(printf, 1, 2))) +void calltrace(char *fmt, ...); + +/* same as strstr() but case-insensitive */ +const char *strnistr(const char *str1, int len_str1, const char *str2, int len_str2); + +int strordered(const char *s1, const char *s2, const char *s3); + +/* after increasing a pointer value, it can exceed the first buffer + * size. This function transform the value of <ptr> according with + * the expected position. <chunks> is an array of the one or two + * available chunks. The first value is the start of the first chunk, + * the second value if the end+1 of the first chunks. The third value + * is NULL or the start of the second chunk and the fourth value is + * the end+1 of the second chunk. The function returns 1 if does a + * wrap, else returns 0. + */ +static inline int fix_pointer_if_wrap(const char **chunks, const char **ptr) +{ + if (*ptr < chunks[1]) + return 0; + if (!chunks[2]) + return 0; + *ptr = chunks[2] + ( *ptr - chunks[1] ); + return 1; +} + +unsigned char utf8_next(const char *s, int len, unsigned int *c); + +static inline unsigned char utf8_return_code(unsigned int code) +{ + return code & 0xf0; +} + +static inline unsigned char utf8_return_length(unsigned char code) +{ + return code & 0x0f; +} + +/* returns a 64-bit a timestamp with the finest resolution available. The + * unit is intentionally not specified. It's mostly used to compare dates. + */ +#if defined(__i386__) || defined(__x86_64__) +static inline unsigned long long rdtsc() +{ + unsigned int a, d; + asm volatile("rdtsc" : "=a" (a), "=d" (d)); + return a + ((unsigned long long)d << 32); +} +#else +static inline unsigned long long rdtsc() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec * 1000000 + tv.tv_usec; +} +#endif + +/* append a copy of string <str> (in a wordlist) at the end of the list <li> + * On failure : return 0 and <err> filled with an error message. + * The caller is responsible for freeing the <err> and <str> copy + * memory area using free() + */ +struct list; +int list_append_word(struct list *li, const char *str, char **err); + +int dump_text(struct buffer *out, const char *buf, int bsize); +int dump_binary(struct buffer *out, const char *buf, int bsize); +int dump_text_line(struct buffer *out, const char *buf, int bsize, int len, + int *line, int ptr); +void dump_addr_and_bytes(struct buffer *buf, const char *pfx, const void *addr, int n); +void dump_hex(struct buffer *out, const char *pfx, const void *buf, int len, int unsafe); +int may_access(const void *ptr); +const void *resolve_sym_name(struct buffer *buf, const char *pfx, const void *addr); +const char *get_exec_path(void); +void *get_sym_curr_addr(const char *name); +void *get_sym_next_addr(const char *name); +int dump_libs(struct buffer *output, int with_addr); + +/* Note that this may result in opening libgcc() on first call, so it may need + * to have been called once before chrooting. + */ +static forceinline int my_backtrace(void **buffer, int max) +{ +#if !defined(USE_BACKTRACE) + return 0; +#elif defined(HA_HAVE_WORKING_BACKTRACE) + return backtrace(buffer, max); +#else + const struct frame { + const struct frame *next; + void *ra; + } *frame; + int count; + + frame = __builtin_frame_address(0); + for (count = 0; count < max && may_access(frame) && may_access(frame->ra);) { + buffer[count++] = frame->ra; + frame = frame->next; + } + return count; +#endif +} + +/* same as realloc() except that ptr is also freed upon failure */ +static inline void *my_realloc2(void *ptr, size_t size) +{ + void *ret; + + ret = realloc(ptr, size); + if (!ret && size) + free(ptr); + return ret; +} + +int parse_dotted_uints(const char *s, unsigned int **nums, size_t *sz); + +/* PRNG */ +void ha_generate_uuid(struct buffer *output); +void ha_random_seed(const unsigned char *seed, size_t len); +void ha_random_jump96(uint32_t dist); +uint64_t ha_random64(void); + +static inline uint32_t ha_random32() +{ + return ha_random64() >> 32; +} + +static inline int32_t ha_random() +{ + return ha_random32() >> 1; +} + +extern THREAD_LOCAL unsigned int statistical_prng_state; + +/* Xorshift RNGs from http://www.jstatsoft.org/v08/i14/paper. + * This has a (2^32)-1 period, only zero is never returned. + */ +static inline unsigned int statistical_prng() +{ + unsigned int x = statistical_prng_state; + + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return statistical_prng_state = x; +} + +/* returns a random number between 0 and <range> - 1 that is evenly distributed + * over the range. + */ +static inline uint statistical_prng_range(uint range) +{ + return mul32hi(statistical_prng(), range ? range - 1 : 0); +} + +/* returns a hash on <bits> bits of pointer <p> that is suitable for being used + * to compute statistic buckets, in that it's fast and reasonably distributed + * thanks to mixing the bits via a multiplication by a prime number and using + * the middle bits on 64-bit platforms or remixing the topmost with lowest ones + * on 32-bit. The distribution is smooth enough for the hash to provide on + * average 1/e non-colliding entries per input, and use on average 1-1/e + * entries total. Thus for example hashing 1024 random valid pointers will + * result on average in ~647 distinct keys, 377 of which are unique. It was + * carefully selected to deliver optimal distribution for low bit counts so + * that hashing on 2,3,4 or 5 bits delivers good results. + */ +static forceinline uint ptr_hash(const void *p, const int bits) +{ + unsigned long long x = (unsigned long)p; + + if (!bits) + return 0; + + x *= 0xacd1be85U; + if (sizeof(long) == 4) + x ^= x >> 32; + else + x >>= 31 - (bits + 1) / 2; + return x & (~0U >> (-bits & 31)); +} + +/* Same as above but works on two pointers. It will return the same values + * if the second pointer is NULL. + */ +static forceinline uint ptr2_hash(const void *p1, const void *p2, const int bits) +{ + unsigned long long x = (unsigned long)p1; + unsigned long long y = (unsigned long)p2; + + if (!bits) + return 0; + + x *= 0xacd1be85U; + y *= 0x9d28e4e9U; + x ^= y; + if (sizeof(long) == 4) + x ^= x >> 32; + else + x >>= 33 - bits / 2; + return x & (~0U >> (-bits & 31)); +} + + +/* Update array <fp> with the character transition <prev> to <curr>. If <prev> + * is zero, it's assumed that <curr> is the first character. If <curr> is zero + * its assumed to mark the end. Both may be zero. <fp> is a 1024-entries array + * indexed as 32*from+to. Positions for 'from' and 'to' are: + * 1..26=letter, 27=digit, 28=other/begin/end. + * Row "from=0" is used to mark the character's presence. Others unused. + */ +static inline void update_char_fingerprint(uint8_t *fp, char prev, char curr) +{ + int from, to; + + switch (prev) { + case 0: from = 28; break; // begin + case 'a'...'z': from = prev - 'a' + 1; break; + case 'A'...'Z': from = tolower(prev) - 'a' + 1; break; + case '0'...'9': from = 27; break; + default: from = 28; break; + } + + switch (curr) { + case 0: to = 28; break; // end + case 'a'...'z': to = curr - 'a' + 1; break; + case 'A'...'Z': to = tolower(curr) - 'a' + 1; break; + case '0'...'9': to = 27; break; + default: to = 28; break; + } + if (curr) + fp[to] = 1; + fp[32 * from + to]++; +} + + +/* compare the current OpenSSL version to a string */ +int openssl_compare_current_version(const char *version); +/* compare the current OpenSSL name to a string */ +int openssl_compare_current_name(const char *name); + +#endif /* _HAPROXY_TOOLS_H */ diff --git a/include/haproxy/trace-t.h b/include/haproxy/trace-t.h new file mode 100644 index 0000000..322fccd --- /dev/null +++ b/include/haproxy/trace-t.h @@ -0,0 +1,179 @@ +/* + * include/haproxy/trace-t.h + * This file provides definitions for runtime tracing + * + * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_TRACE_T_H +#define _HAPROXY_TRACE_T_H + +#include <import/ist.h> +#include <haproxy/api-t.h> +#include <haproxy/sink-t.h> + +/* the macros below define an optional type for each of the 4 args passed to + * the trace() call. When such a type is set, the caller commits to exclusively + * using a valid pointer when this argument is not null. This allows the trace() + * function to automatically start or stop the lock-on mechanism when it detects + * a type that it can dereference such as a connection or a stream. Each value + * is represented as an exclusive bit and each arg is represented by a distinct + * byte. The reason for using a single bit per value is to speed up tests using + * bitmasks. Users must not declare args with multiple bits set for the same arg. + * By default arguments are private, corresponding to value 0. + */ + +/* for use only in macro definitions above */ +#define TRC_ARG_PRIV (0) +#define TRC_ARG_CONN (1 << 0) +#define TRC_ARG_SESS (1 << 1) +#define TRC_ARG_STRM (1 << 2) +#define TRC_ARG_CHK (1 << 3) +#define TRC_ARG_QCON (1 << 4) +#define TRC_ARG_APPCTX (1 << 5) + +#define TRC_ARG1_PRIV (TRC_ARG_PRIV << 0) +#define TRC_ARG1_CONN (TRC_ARG_CONN << 0) +#define TRC_ARG1_SESS (TRC_ARG_SESS << 0) +#define TRC_ARG1_STRM (TRC_ARG_STRM << 0) +#define TRC_ARG1_CHK (TRC_ARG_CHK << 0) +#define TRC_ARG1_QCON (TRC_ARG_QCON << 0) +#define TRC_ARG1_APPCTX (TRC_ARG_APPCTX << 0) + +#define TRC_ARG2_PRIV (TRC_ARG_PRIV << 8) +#define TRC_ARG2_CONN (TRC_ARG_CONN << 8) +#define TRC_ARG2_SESS (TRC_ARG_SESS << 8) +#define TRC_ARG2_STRM (TRC_ARG_STRM << 8) +#define TRC_ARG2_CHK (TRC_ARG_CHK << 8) +#define TRC_ARG2_QCON (TRC_ARG_QCON << 8) +#define TRC_ARG2_APPCTX (TRC_ARG_APPCTX << 8) + +#define TRC_ARG3_PRIV (TRC_ARG_PRIV << 16) +#define TRC_ARG3_CONN (TRC_ARG_CONN << 16) +#define TRC_ARG3_SESS (TRC_ARG_SESS << 16) +#define TRC_ARG3_STRM (TRC_ARG_STRM << 16) +#define TRC_ARG3_CHK (TRC_ARG_CHK << 16) +#define TRC_ARG3_QCON (TRC_ARG_QCON << 16) +#define TRC_ARG3_APPCTX (TRC_ARG_APPCTX << 16) + +#define TRC_ARG4_PRIV (TRC_ARG_PRIV << 24) +#define TRC_ARG4_CONN (TRC_ARG_CONN << 24) +#define TRC_ARG4_SESS (TRC_ARG_SESS << 24) +#define TRC_ARG4_STRM (TRC_ARG_STRM << 24) +#define TRC_ARG4_CHK (TRC_ARG_CHK << 24) +#define TRC_ARG4_QCON (TRC_ARG_QCON << 24) +#define TRC_ARG4_APPCTX (TRC_ARG_APPCTX << 24) + +/* usable to detect the presence of any arg of the desired type */ +#define TRC_ARGS_CONN (TRC_ARG_CONN * 0x01010101U) +#define TRC_ARGS_SESS (TRC_ARG_SESS * 0x01010101U) +#define TRC_ARGS_STRM (TRC_ARG_STRM * 0x01010101U) +#define TRC_ARGS_CHK (TRC_ARG_CHK * 0x01010101U) +#define TRC_ARGS_QCON (TRC_ARG_QCON * 0x01010101U) +#define TRC_ARGS_APPCTX (TRC_ARG_APPCTX * 0x01010101U) + + +enum trace_state { + TRACE_STATE_STOPPED = 0, // completely disabled + TRACE_STATE_WAITING, // waiting for the start condition to happen + TRACE_STATE_RUNNING, // waiting for the stop or pause conditions +}; + +/* trace levels, from least detailed to most detailed. Traces emitted at a + * lower level are always reported at higher levels. + */ +enum trace_level { + TRACE_LEVEL_ERROR = 0, // only errors + TRACE_LEVEL_USER, // also info useful to the end user + TRACE_LEVEL_PROTO, // also report protocol-level updates + TRACE_LEVEL_STATE, // also report state changes + TRACE_LEVEL_DATA, // also report data exchanges + TRACE_LEVEL_DEVELOPER, // functions entry/exit and any other developer info +}; + +enum trace_lockon { + TRACE_LOCKON_NOTHING = 0, // don't lock on anything + TRACE_LOCKON_THREAD, // lock on the thread that started the trace + TRACE_LOCKON_LISTENER, // lock on the listener that started the trace + TRACE_LOCKON_FRONTEND, // lock on the frontend that started the trace + TRACE_LOCKON_BACKEND, // lock on the backend that started the trace + TRACE_LOCKON_SERVER, // lock on the server that started the trace + TRACE_LOCKON_CONNECTION, // lock on the connection that started the trace + TRACE_LOCKON_SESSION, // lock on the session that started the trace + TRACE_LOCKON_STREAM, // lock on the stream that started the trace + TRACE_LOCKON_CHECK, // lock on the check that started the trace + TRACE_LOCKON_QCON, // lock on the QUIC connection that started the trace + TRACE_LOCKON_APPCTX, // lock on the appctx that started the trace + TRACE_LOCKON_ARG1, // lock on arg1, totally source-dependent + TRACE_LOCKON_ARG2, // lock on arg2, totally source-dependent + TRACE_LOCKON_ARG3, // lock on arg3, totally source-dependent + TRACE_LOCKON_ARG4, // lock on arg4, totally source-dependent +}; + +/* Each trace event maps a name to a mask in an uint64_t. Multiple bits are + * permitted to have composite events. This is supposed to be stored into an + * array terminated by mask 0 (name and desc are then ignored). Names "now", + * "any" and "none" are reserved by the CLI parser for start/pause/stop + * operations.. + */ +struct trace_event { + uint64_t mask; + const char *name; + const char *desc; +}; + +/* Regarding the verbosity, if <decoding> is not NULL, it must point to a NULL- + * terminated array of name:description, which will define verbosity levels + * implemented by the decoding callback. The verbosity value will default to + * 1. When verbosity levels are defined, levels 1 and above are described by + * these levels. At level zero, the callback is never called. + */ +struct trace_source { + /* source definition */ + const struct ist name; + const char *desc; + const struct trace_event *known_events; + struct list source_link; // element in list of known trace sources + void (*default_cb)(enum trace_level level, uint64_t mask, + const struct trace_source *src, + const struct ist where, const struct ist func, + const void *a1, const void *a2, const void *a3, const void *a4); + uint32_t arg_def; // argument definitions (sum of TRC_ARG{1..4}_*) + const struct name_desc *lockon_args; // must be 4 entries if not NULL + const struct name_desc *decoding; // null-terminated if not NULL + /* trace configuration, adjusted by "trace <module>" on CLI */ + enum trace_lockon lockon; + uint64_t start_events; // what will start the trace. default: 0=nothing + uint64_t pause_events; // what will pause the trace. default: 0=nothing + uint64_t stop_events; // what will stop the trace. default: 0=nothing + uint64_t report_events; // mask of which events need to be reported. + enum trace_level level; // report traces up to this level of info + unsigned int verbosity; // decoder's level of detail among <decoding> (0=no cb) + struct sink *sink; // where to send the trace + /* trace state part below */ + enum trace_state state; + const void *lockon_ptr; // what to lockon when lockon is set +}; + +#endif /* _HAPROXY_TRACE_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/trace.h b/include/haproxy/trace.h new file mode 100644 index 0000000..703ac8d --- /dev/null +++ b/include/haproxy/trace.h @@ -0,0 +1,216 @@ +/* + * include/haproxy/trace.h + * This file provides functions for runtime tracing + * + * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_TRACE_H +#define _HAPROXY_TRACE_H + +#include <import/ist.h> +#include <haproxy/api.h> +#include <haproxy/list.h> +#include <haproxy/sink-t.h> +#include <haproxy/tools.h> +#include <haproxy/trace-t.h> + +/* Make a string from the location of the trace producer as "file:line" */ +#define TRC_LOC _TRC_LOC(__FILE__, __LINE__) +#define _TRC_LOC(f,l) __TRC_LOC(f, ":", l) +#define __TRC_LOC(f,c,l) f c #l + +/* truncate a macro arg list to exactly 5 args and replace missing ones with NULL. + * The first one (a0) is always ignored. + */ +#define TRC_5ARGS(a0,a1,a2,a3,a4,a5,...) DEFNULL(a1),DEFNULL(a2),DEFNULL(a3),DEFNULL(a4),DEFNULL(a5) + +/* reports whether trace is active for the source and the arguments. It uses + * the same criteria as trace() (locking, filtering etc) so it's safe to use + * from application code to decide whether or not to engage in heavier data + * preparation processing. + */ +#define _trace_enabled(level, mask, src, args...) \ + (unlikely((src)->state != TRACE_STATE_STOPPED && \ + __trace_enabled(level, mask, src, ##args, NULL) > 0)) + +/* sends a trace for the given source. Arguments are passed in the exact same + * order as in the __trace() function, which is only called if (src)->state is + * not TRACE_STATE_STOPPED. This is the only case where arguments are evaluated. + */ +#define _trace(level, mask, src, args...) \ + do { \ + if (unlikely((src)->state != TRACE_STATE_STOPPED)) \ + __trace(level, mask, src, ##args); \ + } while (0) + +/* For convenience, TRACE() alone uses the file's default TRACE_LEVEL, most + * likely TRACE_LEVEL_DEVELOPER, though the other explicit variants specify + * the desired level and will work when TRACE_LEVEL is not set. The 5 optional + * arguments are the 4 source-specific arguments that are passed to the cb() + * callback dedicated to decoding, and which may be used for special tracking. + * These 4 arguments as well as the cb() function pointer may all be NULL, or + * simply omitted (in which case they will be replaced by a NULL). This + * ordering allows many TRACE() calls to be placed using copy-paste and just + * change the message at the beginning. Only TRACE_DEVEL(), TRACE_ENTER() and + * TRACE_LEAVE() will report the calling function's name. TRACE_PRINTF() does + * require all the optional a1..a4 to be passed (possibly zero) so that they're + * always followed by the format string, then the values to be formatted. + * + * TRACE_* will call the _trace() macro which will test if the trace is enabled + * before calling the __trace() function. _trace() shouldn't be a function (nor + * inline) itself because we don't want the caller to compute its arguments if + * traces are not enabled. + * + * TRACE_ENABLED() reports whether or not trace is enabled for the current + * source, level, mask and arguments. + */ +#define TRACE_ENABLED(level, mask, args...) (_trace_enabled((level), (mask), TRACE_SOURCE, ist(TRC_LOC), __FUNCTION__, ##args)) + +#define TRACE(msg, mask, args...) \ + _trace(TRACE_LEVEL, (mask), TRACE_SOURCE, ist(TRC_LOC), NULL, TRC_5ARGS(0,##args,0,0,0,0,0), ist(msg)) + +#define TRACE_ERROR(msg, mask, args...) \ + _trace(TRACE_LEVEL_ERROR, (mask), TRACE_SOURCE, ist(TRC_LOC), NULL, TRC_5ARGS(0,##args,0,0,0,0,0), ist(msg)) + +#define TRACE_USER(msg, mask, args...) \ + _trace(TRACE_LEVEL_USER, (mask), TRACE_SOURCE, ist(TRC_LOC), NULL, TRC_5ARGS(0,##args,0,0,0,0,0), ist(msg)) + +#define TRACE_DATA(msg, mask, args...) \ + _trace(TRACE_LEVEL_DATA, (mask), TRACE_SOURCE, ist(TRC_LOC), NULL, TRC_5ARGS(0,##args,0,0,0,0,0), ist(msg)) + +#define TRACE_PROTO(msg, mask, args...) \ + _trace(TRACE_LEVEL_PROTO, (mask), TRACE_SOURCE, ist(TRC_LOC), NULL, TRC_5ARGS(0,##args,0,0,0,0,0), ist(msg)) + +#define TRACE_STATE(msg, mask, args...) \ + _trace(TRACE_LEVEL_STATE, (mask), TRACE_SOURCE, ist(TRC_LOC), NULL, TRC_5ARGS(0,##args,0,0,0,0,0), ist(msg)) + +#define TRACE_DEVEL(msg, mask, args...) \ + _trace(TRACE_LEVEL_DEVELOPER, (mask), TRACE_SOURCE, ist(TRC_LOC), __FUNCTION__, TRC_5ARGS(0,##args,0,0,0,0,0), ist(msg)) + +#define TRACE_ENTER(mask, args...) \ + _trace(TRACE_LEVEL_DEVELOPER, (mask), TRACE_SOURCE, ist(TRC_LOC), __FUNCTION__, TRC_5ARGS(0,##args,0,0,0,0,0), ist("entering")) + +#define TRACE_LEAVE(mask, args...) \ + _trace(TRACE_LEVEL_DEVELOPER, (mask), TRACE_SOURCE, ist(TRC_LOC), __FUNCTION__, TRC_5ARGS(0,##args,0,0,0,0,0), ist("leaving")) + +#define TRACE_POINT(mask, args...) \ + _trace(TRACE_LEVEL_DEVELOPER, (mask), TRACE_SOURCE, ist(TRC_LOC), __FUNCTION__, TRC_5ARGS(0,##args,0,0,0,0,0), ist("in")) + +/* This produces a printf-like trace at level <level> for event mask <mask> and + * trace arguments <a1..a4>. All args mandatory, but may be zero. No output + * callback will be used since we expect the caller to pass a fully formatted + * message that must not be degraded. The output will be truncated to + * TRACE_MAX_MSG-1 bytes (1023 by default). Caller must include <stdio.h> for + * snprintf(). One call will lead to one independent message, which means that + * multiple messages may be interleaved between threads, hence the caller is + * encouraged to prepend a context at the beginning of the format string when + * dumping lists or arrays. The _LOC variation takes the caller's location and + * function name as an ist and a (const char *) respectively, it is meant for + * being called from wrapper function which will work on behalf of a caller. + */ +#define TRACE_PRINTF(level, mask, a1, a2, a3, a4, fmt, args...) \ + TRACE_PRINTF_LOC(level, mask, ist(TRC_LOC), __FUNCTION__, a1, a2, a3, a4, fmt, ##args) + +#define TRACE_PRINTF_LOC(level, mask, trc_loc, func, a1, a2, a3, a4, fmt, args...) \ + do { \ + if (TRACE_ENABLED((level), (mask), a1, a2, a3, a4)) { \ + char _msg[TRACE_MAX_MSG]; \ + size_t _msg_len; \ + _msg_len = snprintf(_msg, sizeof(_msg), (fmt), ##args); \ + if (_msg_len >= sizeof(_msg)) \ + _msg_len = sizeof(_msg) - 1; \ + _trace((level), (mask), TRACE_SOURCE, \ + trc_loc, func, a1, a2, a3, a4, \ + &trace_no_cb, ist2(_msg, _msg_len)); \ + } \ + } while (0) + +#if defined(DEBUG_DEV) || defined(DEBUG_FULL) +# define DBG_TRACE(msg, mask, args...) TRACE(msg, mask, ##args) +# define DBG_TRACE_ERROR(msg, mask, args...) TRACE_ERROR(msg, mask, ##args) +# define DBG_TRACE_USER(msg, mask, args...) TRACE_USER(msg, mask, ##args) +# define DBG_TRACE_DATA(msg, mask, args...) TRACE_DATA(msg, mask, ##args) +# define DBG_TRACE_PROTO(msg, mask, args...) TRACE_PROTO(msg, mask, ##args) +# define DBG_TRACE_STATE(msg, mask, args...) TRACE_STATE(msg, mask, ##args) +# define DBG_TRACE_DEVEL(msg, mask, args...) TRACE_DEVEL(msg, mask, ##args) +# define DBG_TRACE_ENTER(mask, args...) TRACE_ENTER(mask, ##args) +# define DBG_TRACE_LEAVE(mask, args...) TRACE_LEAVE(mask, ##args) +# define DBG_TRACE_POINT(mask, args...) TRACE_POINT(mask, ##args) +# define DBG_TRACE_PRINTF(level, args...) TRACE_PRINTF(level, ##args) +# define DBG_TRACE_PRINTF_LOC(level, args...) TRACE_PRINTF_LOC(level, ##args) +#else +# define DBG_TRACE(msg, mask, args...) do { /* do nothing */ } while(0) +# define DBG_TRACE_ERROR(msg, mask, args...) do { /* do nothing */ } while(0) +# define DBG_TRACE_USER(msg, mask, args...) do { /* do nothing */ } while(0) +# define DBG_TRACE_DATA(msg, mask, args...) do { /* do nothing */ } while(0) +# define DBG_TRACE_PROTO(msg, mask, args...) do { /* do nothing */ } while(0) +# define DBG_TRACE_STATE(msg, mask, args...) do { /* do nothing */ } while(0) +# define DBG_TRACE_DEVEL(msg, mask, args...) do { /* do nothing */ } while(0) +# define DBG_TRACE_ENTER(mask, args...) do { /* do nothing */ } while(0) +# define DBG_TRACE_LEAVE(mask, args...) do { /* do nothing */ } while(0) +# define DBG_TRACE_POINT(mask, args...) do { /* do nothing */ } while(0) +# define DBG_TRACE_PRINTF(level, args...) do { /* do nothing */ } while(0) +# define DBG_TRACE_PRINTF_LOC(level, args...) do { /* do nothing */ } while(0) +#endif + +extern struct list trace_sources; +extern THREAD_LOCAL struct buffer trace_buf; + +int __trace_enabled(enum trace_level level, uint64_t mask, struct trace_source *src, + const struct ist where, const char *func, + const void *a1, const void *a2, const void *a3, const void *a4, + const void **plockptr); + +void __trace(enum trace_level level, uint64_t mask, struct trace_source *src, + const struct ist where, const char *func, + const void *a1, const void *a2, const void *a3, const void *a4, + void (*cb)(enum trace_level level, uint64_t mask, const struct trace_source *src, + const struct ist where, const struct ist func, + const void *a1, const void *a2, const void *a3, const void *a4), + const struct ist msg); + +void trace_no_cb(enum trace_level level, uint64_t mask, const struct trace_source *src, + const struct ist where, const struct ist func, + const void *a1, const void *a2, const void *a3, const void *a4); + +void trace_register_source(struct trace_source *source); + +int trace_parse_cmd(char *arg, char **errmsg); + +/* return a single char to describe a trace state */ +static inline char trace_state_char(enum trace_state st) +{ + return (st == TRACE_STATE_RUNNING) ? 'R' : + (st == TRACE_STATE_WAITING) ? 'w' : + '.'; +} + +/* return a single char to describe an event state */ +static inline char trace_event_char(uint64_t conf, uint64_t ev) +{ + return (conf & ev) ? '+' : '-'; +} + +#endif /* _HAPROXY_TRACE_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/uri_auth-t.h b/include/haproxy/uri_auth-t.h new file mode 100644 index 0000000..009adfd --- /dev/null +++ b/include/haproxy/uri_auth-t.h @@ -0,0 +1,56 @@ +/* + * include/haproxy/uri_auth-t.h + * Definitions for URI-based user authentication using the HTTP basic method. + * + * Copyright 2006-2020 Willy Tarreau <w@1wt.eu> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_URI_AUTH_T_H +#define _HAPROXY_URI_AUTH_T_H + +#include <haproxy/acl-t.h> +#include <haproxy/auth-t.h> + +/* This is a list of proxies we are allowed to see. Later, it should go in the + * user list, but before this we need to support de/re-authentication. + */ +struct stat_scope { + struct stat_scope *next; /* next entry, NULL if none */ + int px_len; /* proxy name length */ + char *px_id; /* proxy id */ +}; + +/* later we may link them to support multiple URI matching */ +struct uri_auth { + int uri_len; /* the prefix length */ + char *uri_prefix; /* the prefix we want to match */ + char *auth_realm; /* the realm reported to the client */ + char *node, *desc; /* node name & description reported in this stats */ + int refresh; /* refresh interval for the browser (in seconds) */ + unsigned int flags; /* STAT_* flags from stats.h and for applet.ctx.stats.flags */ + struct stat_scope *scope; /* linked list of authorized proxies */ + struct userlist *userlist; /* private userlist to emulate legacy "stats auth user:password" */ + struct list http_req_rules; /* stats http-request rules : allow/deny/auth */ + struct list admin_rules; /* 'stats admin' rules (chained) */ + struct uri_auth *next; /* Used at deinit() to build a list of unique elements */ +}; + +struct stats_admin_rule { + struct list list; /* list linked to from the proxy */ + struct acl_cond *cond; /* acl condition to meet */ +}; + +#endif /* _HAPROXY_URI_AUTH_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/uri_auth.h b/include/haproxy/uri_auth.h new file mode 100644 index 0000000..27dca02 --- /dev/null +++ b/include/haproxy/uri_auth.h @@ -0,0 +1,44 @@ +/* + * include/haproxy/uri_auth.h + * Functions for URI-based user authentication using the HTTP basic method. + * + * Copyright 2006-2020 Willy Tarreau <w@1wt.eu> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_URI_AUTH_H +#define _HAPROXY_URI_AUTH_H + +#include <haproxy/api.h> +#include <haproxy/uri_auth-t.h> + +/* Various functions used to set the fields during the configuration parsing. + * Please that all those function can initialize the root entry in order not to + * force the user to respect a certain order in the configuration file. + * + * Default values are used during initialization. Check STATS_DEFAULT_* for + * more information. + */ +struct uri_auth *stats_check_init_uri_auth(struct uri_auth **root); +struct uri_auth *stats_set_uri(struct uri_auth **root, char *uri); +struct uri_auth *stats_set_realm(struct uri_auth **root, char *realm); +struct uri_auth *stats_set_refresh(struct uri_auth **root, int interval); +struct uri_auth *stats_set_flag(struct uri_auth **root, int flag); +struct uri_auth *stats_add_auth(struct uri_auth **root, char *user); +struct uri_auth *stats_add_scope(struct uri_auth **root, char *scope); +struct uri_auth *stats_set_node(struct uri_auth **root, char *name); +struct uri_auth *stats_set_desc(struct uri_auth **root, char *desc); + +#endif /* _HAPROXY_URI_AUTH_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/uri_normalizer-t.h b/include/haproxy/uri_normalizer-t.h new file mode 100644 index 0000000..bcbcaef --- /dev/null +++ b/include/haproxy/uri_normalizer-t.h @@ -0,0 +1,31 @@ +/* + * include/haproxy/uri_normalizer.h + * HTTP request URI normalization. + * + * Copyright 2021 Tim Duesterhus <tim@bastelstu.be> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_URI_NORMALIZER_T_H +#define _HAPROXY_URI_NORMALIZER_T_H + +enum uri_normalizer_err { + URI_NORMALIZER_ERR_NONE = 0, + URI_NORMALIZER_ERR_ALLOC, + URI_NORMALIZER_ERR_INVALID_INPUT, + URI_NORMALIZER_ERR_INTERNAL_ERROR = 0xdead, +}; + +#endif /* _HAPROXY_URI_NORMALIZER_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/uri_normalizer.h b/include/haproxy/uri_normalizer.h new file mode 100644 index 0000000..b384007 --- /dev/null +++ b/include/haproxy/uri_normalizer.h @@ -0,0 +1,44 @@ +/* + * include/haproxy/uri_normalizer.h + * HTTP request URI normalization. + * + * Copyright 2021 Tim Duesterhus <tim@bastelstu.be> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_URI_NORMALIZER_H +#define _HAPROXY_URI_NORMALIZER_H + +#include <import/ist.h> + +#include <haproxy/uri_normalizer-t.h> + +/* Cuts the input at the first '#'. */ +static inline enum uri_normalizer_err uri_normalizer_fragment_strip(const struct ist input, struct ist *dst) +{ + *dst = iststop(input, '#'); + + return URI_NORMALIZER_ERR_NONE; +} + +enum uri_normalizer_err uri_normalizer_fragment_encode(const struct ist input, struct ist *dst); +enum uri_normalizer_err uri_normalizer_percent_decode_unreserved(const struct ist input, int strict, struct ist *dst); +enum uri_normalizer_err uri_normalizer_percent_upper(const struct ist input, int strict, struct ist *dst); +enum uri_normalizer_err uri_normalizer_path_dot(const struct ist path, struct ist *dst); +enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, int full, struct ist *dst); +enum uri_normalizer_err uri_normalizer_path_merge_slashes(const struct ist path, struct ist *dst); +enum uri_normalizer_err uri_normalizer_query_sort(const struct ist query, const char delim, struct ist *dst); + +#endif /* _HAPROXY_URI_NORMALIZER_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/vars-t.h b/include/haproxy/vars-t.h new file mode 100644 index 0000000..e239b1c --- /dev/null +++ b/include/haproxy/vars-t.h @@ -0,0 +1,71 @@ +/* + * include/haproxy/vars-t.h + * Macros and structures definitions for variables. + * + * Copyright (C) 2015 Thierry FOURNIER <tfournier@arpalert.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_VARS_T_H +#define _HAPROXY_VARS_T_H + +#include <haproxy/sample_data-t.h> +#include <haproxy/thread-t.h> + +/* flags used when setting/clearing variables */ +#define VF_CREATEONLY 0x00000001 // do nothing if the variable already exists +#define VF_PERMANENT 0x00000002 // variables known to the config parser + +#define VF_COND_IFEXISTS 0x00000004 // only set variable if it already exists +#define VF_COND_IFNOTEXISTS 0x00000008 // only set variable if it did not exist yet +#define VF_COND_IFEMPTY 0x00000010 // only set variable if sample is empty +#define VF_COND_IFNOTEMPTY 0x00000020 // only set variable if sample is not empty +#define VF_COND_IFSET 0x00000040 // only set variable if its type is not SMP_TYPE_ANY +#define VF_COND_IFNOTSET 0x00000080 // only set variable if its type is ANY +#define VF_COND_IFGT 0x00000100 // only set variable if its value is greater than the sample's +#define VF_COND_IFLT 0x00000200 // only set variable if its value is less than the sample's + +enum vars_scope { + SCOPE_SESS = 0, + SCOPE_TXN, + SCOPE_REQ, + SCOPE_RES, + SCOPE_PROC, + SCOPE_CHECK, +}; + +struct vars { + struct list head; + enum vars_scope scope; + unsigned int size; + __decl_thread(HA_RWLOCK_T rwlock); +}; + +/* This struct describes a variable as found in an arg_data */ +struct var_desc { + uint64_t name_hash; + enum vars_scope scope; +}; + +struct var { + struct list l; /* Used for chaining vars. */ + uint64_t name_hash; /* XXH3() of the variable's name */ + uint flags; // VF_* + /* 32-bit hole here */ + struct sample_data data; /* data storage. */ +}; + +#endif diff --git a/include/haproxy/vars.h b/include/haproxy/vars.h new file mode 100644 index 0000000..ebd1f15 --- /dev/null +++ b/include/haproxy/vars.h @@ -0,0 +1,72 @@ +/* + * include/haproxy/vars.h + * Prototypes for variables. + * + * Copyright (C) 2015 Thierry FOURNIER <tfournier@arpalert.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_VARS_H +#define _HAPROXY_VARS_H + +#include <haproxy/api-t.h> +#include <haproxy/session-t.h> +#include <haproxy/stream-t.h> +#include <haproxy/vars-t.h> + +extern struct vars proc_vars; + +void vars_init_head(struct vars *vars, enum vars_scope scope); +void var_accounting_diff(struct vars *vars, struct session *sess, struct stream *strm, int size); +unsigned int var_clear(struct var *var, int force); +void vars_prune(struct vars *vars, struct session *sess, struct stream *strm); +void vars_prune_per_sess(struct vars *vars); +int vars_get_by_name(const char *name, size_t len, struct sample *smp, const struct buffer *def); +int vars_set_by_name_ifexist(const char *name, size_t len, struct sample *smp); +int vars_set_by_name(const char *name, size_t len, struct sample *smp); +int vars_unset_by_name_ifexist(const char *name, size_t len, struct sample *smp); +int vars_get_by_desc(const struct var_desc *var_desc, struct sample *smp, const struct buffer *def); +int vars_check_arg(struct arg *arg, char **err); + +/* locks the <vars> for writes if it's in a shared scope */ +static inline void vars_wrlock(struct vars *vars) +{ + if (vars->scope == SCOPE_PROC) + HA_RWLOCK_WRLOCK(VARS_LOCK, &vars->rwlock); +} + +/* unlocks the <vars> for writes if it's in a shared scope */ +static inline void vars_wrunlock(struct vars *vars) +{ + if (vars->scope == SCOPE_PROC) + HA_RWLOCK_WRUNLOCK(VARS_LOCK, &vars->rwlock); +} + +/* locks the <vars> for reads if it's in a shared scope */ +static inline void vars_rdlock(struct vars *vars) +{ + if (vars->scope == SCOPE_PROC) + HA_RWLOCK_RDLOCK(VARS_LOCK, &vars->rwlock); +} + +/* unlocks the <vars> for reads if it's in a shared scope */ +static inline void vars_rdunlock(struct vars *vars) +{ + if (vars->scope == SCOPE_PROC) + HA_RWLOCK_RDUNLOCK(VARS_LOCK, &vars->rwlock); +} + +#endif diff --git a/include/haproxy/version.h b/include/haproxy/version.h new file mode 100644 index 0000000..651a8de --- /dev/null +++ b/include/haproxy/version.h @@ -0,0 +1,86 @@ +/* + * include/haproxy/version.h + * This file serves as a template for future include files. + * + * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_VERSION_H +#define _HAPROXY_VERSION_H + +#include <haproxy/api.h> + +#ifdef CONFIG_PRODUCT_NAME +#define PRODUCT_NAME CONFIG_PRODUCT_NAME +#else +#define PRODUCT_NAME "HAProxy" +#endif + +#ifdef CONFIG_PRODUCT_BRANCH +#define PRODUCT_BRANCH CONFIG_PRODUCT_BRANCH +#else +#define PRODUCT_BRANCH "2.9" +#endif + +#ifdef CONFIG_PRODUCT_STATUS +#define PRODUCT_STATUS CONFIG_PRODUCT_STATUS +#else +#define PRODUCT_STATUS "Status: stable branch - will stop receiving fixes around Q1 2025." +#endif + +#ifdef CONFIG_PRODUCT_URL_BUGS +#define PRODUCT_URL_BUGS CONFIG_PRODUCT_URL_BUGS +#else +#define PRODUCT_URL_BUGS "http://www.haproxy.org/bugs/bugs-%s.html" +#endif + +#ifdef CONFIG_PRODUCT_URL +#define PRODUCT_URL CONFIG_PRODUCT_URL +#else +#define PRODUCT_URL "http://www.haproxy.org/" +#endif + +#ifdef CONFIG_PRODUCT_URL_UPD +#define PRODUCT_URL_UPD CONFIG_PRODUCT_URL_UPD +#else +#define PRODUCT_URL_UPD "http://www.haproxy.org/#down" +#endif + +#ifdef CONFIG_PRODUCT_URL_DOC +#define PRODUCT_URL_DOC CONFIG_PRODUCT_URL_DOC +#else +#define PRODUCT_URL_DOC "http://www.haproxy.org/#docs" +#endif + +#ifdef CONFIG_HAPROXY_VERSION +#define HAPROXY_VERSION CONFIG_HAPROXY_VERSION +#else +#error "Must define CONFIG_HAPROXY_VERSION" +#endif + +#ifdef CONFIG_HAPROXY_DATE +#define HAPROXY_DATE CONFIG_HAPROXY_DATE +#else +#error "Must define CONFIG_HAPROXY_DATE" +#endif + +extern char haproxy_version[]; +extern char haproxy_date[]; +extern char stats_version_string[]; + +#endif /* _HAPROXY_VERSION_H */ + diff --git a/include/haproxy/xref-t.h b/include/haproxy/xref-t.h new file mode 100644 index 0000000..a2aed54 --- /dev/null +++ b/include/haproxy/xref-t.h @@ -0,0 +1,45 @@ +/* + * include/haproxy/xref-t.h + * Atomic cross-references between two elements - types + * + * Copyright (C) 2017 Thierry Fournier <thierry.fournier@ozon.io> + * Copyright (C) 2020 Willy Tarreau - w@1wt.eu + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __HAPROXY_XREF_T_H__ +#define __HAPROXY_XREF_T_H__ + +/* xref is used to create relation between two elements. + * Once an element is released, it breaks the relation. If the + * relation is already broken, it frees the xref struct. + * The pointer between two elements is sort of a refcount with + * max value 1. The relation is only between two elements. + * The pointer and the type of elements a and b are conventional. + */ + +#define XREF_BUSY ((struct xref *)1) + +struct xref { + struct xref *peer; +}; + +#endif /* __HAPROXY_XREF_T_H__ */ diff --git a/include/haproxy/xref.h b/include/haproxy/xref.h new file mode 100644 index 0000000..42eed58 --- /dev/null +++ b/include/haproxy/xref.h @@ -0,0 +1,105 @@ +/* + * include/haproxy/xref.h + * Atomic cross-references between two elements - functions + * + * Copyright (C) 2017 Thierry Fournier <thierry.fournier@ozon.io> + * Copyright (C) 2020 Willy Tarreau - w@1wt.eu + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __HAPROXY_XREF_H__ +#define __HAPROXY_XREF_H__ + +#include <haproxy/xref-t.h> + +/* xref is used to create relation between two elements. + * Once an element is released, it breaks the relation. If the + * relation is already broken, it frees the xref struct. + * The pointer between two elements is sort of a refcount with + * max value 1. The relation is only between two elements. + * The pointer and the type of elements a and b are conventional. + */ + +static inline void xref_create(struct xref *xref_a, struct xref *xref_b) +{ + xref_a->peer = xref_b; + xref_b->peer = xref_a; +} + +static inline struct xref *xref_get_peer_and_lock(struct xref *xref) +{ + struct xref *local; + struct xref *remote; + + while (1) { + + /* Get the local pointer to the peer. */ + local = _HA_ATOMIC_XCHG(&xref->peer, XREF_BUSY); + __ha_barrier_atomic_store(); + + /* If the local pointer is NULL, the peer no longer exists. */ + if (local == NULL) { + xref->peer = NULL; + return NULL; + } + + /* If the local pointeru is BUSY, the peer try to acquire the + * lock. We retry the process. + */ + if (local == XREF_BUSY) + continue; + + /* We are locked, the peer can't disappear, try to acquire + * the pper's lock. Note that remote can't be NULL. + */ + remote = _HA_ATOMIC_XCHG(&local->peer, XREF_BUSY); + + /* The remote lock is BUSY, We retry the process. */ + if (remote == XREF_BUSY) { + xref->peer = local; + __ha_barrier_store(); + continue; + } + + /* We have the lock, we return the value of the xref. */ + return local; + } +} + +static inline void xref_unlock(struct xref *xref, struct xref *peer) +{ + /* Release the peer. */ + peer->peer = xref; + + __ha_barrier_store(); + + /* Release myself. */ + xref->peer = peer; +} + +static inline void xref_disconnect(struct xref *xref, struct xref *peer) +{ + peer->peer = NULL; + __ha_barrier_store(); + xref->peer = NULL; +} + +#endif /* __HAPROXY_XREF_H__ */ diff --git a/include/haproxy/xxhash.h b/include/haproxy/xxhash.h new file mode 100644 index 0000000..cd333e6 --- /dev/null +++ b/include/haproxy/xxhash.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2020 Dragan Dosen <ddosen@haproxy.com> + * Copyright (C) 2021 Tim Duesterhus <tim@bastelstu.be> + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _HAPROXY_XXHASH_H +#define _HAPROXY_XXHASH_H + +/* Make all xxhash functions inline, with implementations being directly + * included within xxhash.h. + */ +#ifndef XXH_INLINE_ALL +#define XXH_INLINE_ALL +#else +#error "XXH_INLINE_ALL is already defined." +#endif + +#include <import/xxhash.h> + +/* Make the new complex name for the xxhash function easier to remember + * and use. + */ +#ifndef XXH3 +#define XXH3(data, len, seed) XXH3_64bits_withSeed(data, len, seed) +#endif + +#endif |