summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/acl.c1378
-rw-r--r--src/action.c348
-rw-r--r--src/activity.c996
-rw-r--r--src/applet.c269
-rw-r--r--src/arg.c479
-rw-r--r--src/auth.c316
-rw-r--r--src/backend.c3379
-rw-r--r--src/base64.c303
-rw-r--r--src/cache.c2764
-rw-r--r--src/calltrace.c286
-rw-r--r--src/cbuf.c59
-rw-r--r--src/cfgcond.c521
-rw-r--r--src/cfgdiag.c97
-rw-r--r--src/cfgparse-global.c1304
-rw-r--r--src/cfgparse-listen.c3207
-rw-r--r--src/cfgparse-quic.c143
-rw-r--r--src/cfgparse-ssl.c2027
-rw-r--r--src/cfgparse-tcp.c296
-rw-r--r--src/cfgparse-unix.c135
-rw-r--r--src/cfgparse.c4570
-rw-r--r--src/channel.c591
-rw-r--r--src/check.c2363
-rw-r--r--src/chunk.c317
-rw-r--r--src/cli.c3210
-rw-r--r--src/clock.c405
-rw-r--r--src/compression.c740
-rw-r--r--src/connection.c2454
-rw-r--r--src/cpuset.c120
-rw-r--r--src/debug.c1467
-rw-r--r--src/dgram.c30
-rw-r--r--src/dict.c127
-rw-r--r--src/dns.c1350
-rw-r--r--src/dynbuf.c129
-rw-r--r--src/eb32sctree.c472
-rw-r--r--src/eb32tree.c218
-rw-r--r--src/eb64tree.c218
-rw-r--r--src/ebimtree.c44
-rw-r--r--src/ebistree.c42
-rw-r--r--src/ebmbtree.c77
-rw-r--r--src/ebpttree.c208
-rw-r--r--src/ebsttree.c42
-rw-r--r--src/ebtree.c50
-rw-r--r--src/errors.c380
-rw-r--r--src/ev_epoll.c407
-rw-r--r--src/ev_evports.c447
-rw-r--r--src/ev_kqueue.c386
-rw-r--r--src/ev_poll.c343
-rw-r--r--src/ev_select.c332
-rw-r--r--src/extcheck.c678
-rw-r--r--src/fcgi-app.c1152
-rw-r--r--src/fcgi.c294
-rw-r--r--src/fd.c1214
-rw-r--r--src/filters.c1133
-rw-r--r--src/fix.c264
-rw-r--r--src/flt_http_comp.c872
-rw-r--r--src/flt_spoe.c4785
-rw-r--r--src/flt_trace.c675
-rw-r--r--src/freq_ctr.c104
-rw-r--r--src/frontend.c312
-rw-r--r--src/h1.c1278
-rw-r--r--src/h1_htx.c1072
-rw-r--r--src/h2.c814
-rw-r--r--src/h3.c1545
-rw-r--r--src/h3_stats.c276
-rw-r--r--src/haproxy.c3674
-rw-r--r--src/hash.c189
-rw-r--r--src/hlua.c12681
-rw-r--r--src/hlua_fcn.c1780
-rw-r--r--src/hpack-dec.c475
-rw-r--r--src/hpack-enc.c210
-rw-r--r--src/hpack-huff.c1532
-rw-r--r--src/hpack-tbl.c372
-rw-r--r--src/hq_interop.c172
-rw-r--r--src/http.c1309
-rw-r--r--src/http_acl.c185
-rw-r--r--src/http_act.c2498
-rw-r--r--src/http_ana.c5277
-rw-r--r--src/http_client.c1429
-rw-r--r--src/http_conv.c453
-rw-r--r--src/http_fetch.c2248
-rw-r--r--src/http_htx.c2915
-rw-r--r--src/http_rules.c498
-rw-r--r--src/htx.c1087
-rw-r--r--src/init.c249
-rw-r--r--src/jwt.c461
-rw-r--r--src/lb_chash.c517
-rw-r--r--src/lb_fas.c348
-rw-r--r--src/lb_fwlc.c375
-rw-r--r--src/lb_fwrr.c623
-rw-r--r--src/lb_map.c281
-rw-r--r--src/listener.c1956
-rw-r--r--src/log.c3968
-rw-r--r--src/lru.c305
-rw-r--r--src/mailers.c321
-rw-r--r--src/map.c1229
-rw-r--r--src/mjson.c1048
-rw-r--r--src/mqtt.c1281
-rw-r--r--src/mux_fcgi.c4318
-rw-r--r--src/mux_h1.c4316
-rw-r--r--src/mux_h2.c7149
-rw-r--r--src/mux_pt.c709
-rw-r--r--src/mux_quic.c2321
-rw-r--r--src/mworker-prog.c356
-rw-r--r--src/mworker.c743
-rw-r--r--src/namespace.c131
-rw-r--r--src/ncbuf.c984
-rw-r--r--src/pattern.c2702
-rw-r--r--src/payload.c1448
-rw-r--r--src/peers.c4076
-rw-r--r--src/pipe.c136
-rw-r--r--src/pool.c1108
-rw-r--r--src/proto_quic.c719
-rw-r--r--src/proto_sockpair.c564
-rw-r--r--src/proto_tcp.c824
-rw-r--r--src/proto_udp.c234
-rw-r--r--src/proto_uxdg.c152
-rw-r--r--src/proto_uxst.c361
-rw-r--r--src/protocol.c237
-rw-r--r--src/proxy.c3373
-rw-r--r--src/qmux_http.c131
-rw-r--r--src/qmux_trace.c108
-rw-r--r--src/qpack-dec.c563
-rw-r--r--src/qpack-enc.c185
-rw-r--r--src/qpack-tbl.c415
-rw-r--r--src/queue.c761
-rw-r--r--src/quic_cc.c49
-rw-r--r--src/quic_cc_cubic.c283
-rw-r--r--src/quic_cc_newreno.c173
-rw-r--r--src/quic_conn.c7565
-rw-r--r--src/quic_frame.c1168
-rw-r--r--src/quic_loss.c205
-rw-r--r--src/quic_sock.c595
-rw-r--r--src/quic_stats.c204
-rw-r--r--src/quic_stream.c267
-rw-r--r--src/quic_tls.c672
-rw-r--r--src/quic_tp.c711
-rw-r--r--src/raw_sock.c489
-rw-r--r--src/regex.c459
-rw-r--r--src/resolvers.c3801
-rw-r--r--src/ring.c444
-rw-r--r--src/sample.c4452
-rw-r--r--src/server.c6074
-rw-r--r--src/server_state.c931
-rw-r--r--src/session.c460
-rw-r--r--src/sha1.c308
-rw-r--r--src/shctx.c355
-rw-r--r--src/signal.c284
-rw-r--r--src/sink.c1434
-rw-r--r--src/slz.c1341
-rw-r--r--src/sock.c1000
-rw-r--r--src/sock_inet.c488
-rw-r--r--src/sock_unix.c350
-rw-r--r--src/ssl_ckch.c3938
-rw-r--r--src/ssl_crtlist.c1510
-rw-r--r--src/ssl_sample.c2225
-rw-r--r--src/ssl_sock.c8319
-rw-r--r--src/ssl_utils.c419
-rw-r--r--src/stats.c5361
-rw-r--r--src/stconn.c2012
-rw-r--r--src/stick_table.c5190
-rw-r--r--src/stream.c3976
-rw-r--r--src/task.c1044
-rw-r--r--src/tcp_act.c572
-rw-r--r--src/tcp_rules.c1428
-rw-r--r--src/tcp_sample.c530
-rw-r--r--src/tcpcheck.c5209
-rw-r--r--src/thread.c1371
-rw-r--r--src/time.c147
-rw-r--r--src/tools.c5853
-rw-r--r--src/trace.c761
-rw-r--r--src/uri_auth.c318
-rw-r--r--src/uri_normalizer.c467
-rw-r--r--src/vars.c1453
-rw-r--r--src/version.c28
-rw-r--r--src/wdt.c189
-rw-r--r--src/xprt_handshake.c299
-rw-r--r--src/xprt_quic.c167
177 files changed, 234732 insertions, 0 deletions
diff --git a/src/acl.c b/src/acl.c
new file mode 100644
index 0000000..15ca766
--- /dev/null
+++ b/src/acl.c
@@ -0,0 +1,1378 @@
+/*
+ * ACL management functions.
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <import/ebsttree.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/auth.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/pattern.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/sample.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/tools.h>
+
+/* List head of all known ACL keywords */
+static struct acl_kw_list acl_keywords = {
+ .list = LIST_HEAD_INIT(acl_keywords.list)
+};
+
+/* input values are 0 or 3, output is the same */
+static inline enum acl_test_res pat2acl(struct pattern *pat)
+{
+ if (pat)
+ return ACL_TEST_PASS;
+ else
+ return ACL_TEST_FAIL;
+}
+
+/*
+ * Registers the ACL keyword list <kwl> as a list of valid keywords for next
+ * parsing sessions.
+ */
+void acl_register_keywords(struct acl_kw_list *kwl)
+{
+ LIST_APPEND(&acl_keywords.list, &kwl->list);
+}
+
+/*
+ * Unregisters the ACL keyword list <kwl> from the list of valid keywords.
+ */
+void acl_unregister_keywords(struct acl_kw_list *kwl)
+{
+ LIST_DELETE(&kwl->list);
+ LIST_INIT(&kwl->list);
+}
+
+/* Return a pointer to the ACL <name> within the list starting at <head>, or
+ * NULL if not found.
+ */
+struct acl *find_acl_by_name(const char *name, struct list *head)
+{
+ struct acl *acl;
+ list_for_each_entry(acl, head, list) {
+ if (strcmp(acl->name, name) == 0)
+ return acl;
+ }
+ return NULL;
+}
+
+/* Return a pointer to the ACL keyword <kw>, or NULL if not found. Note that if
+ * <kw> contains an opening parenthesis or a comma, only the left part of it is
+ * checked.
+ */
+struct acl_keyword *find_acl_kw(const char *kw)
+{
+ int index;
+ const char *kwend;
+ struct acl_kw_list *kwl;
+
+ kwend = kw;
+ while (is_idchar(*kwend))
+ kwend++;
+
+ list_for_each_entry(kwl, &acl_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if ((strncmp(kwl->kw[index].kw, kw, kwend - kw) == 0) &&
+ kwl->kw[index].kw[kwend-kw] == 0)
+ return &kwl->kw[index];
+ }
+ }
+ return NULL;
+}
+
+static struct acl_expr *prune_acl_expr(struct acl_expr *expr)
+{
+ struct arg *arg;
+
+ pattern_prune(&expr->pat);
+
+ for (arg = expr->smp->arg_p; arg; arg++) {
+ if (arg->type == ARGT_STOP)
+ break;
+ if (arg->type == ARGT_STR || arg->unresolved) {
+ chunk_destroy(&arg->data.str);
+ arg->unresolved = 0;
+ }
+ }
+
+ release_sample_expr(expr->smp);
+
+ return expr;
+}
+
+/* Parse an ACL expression starting at <args>[0], and return it. If <err> is
+ * not NULL, it will be filled with a pointer to an error message in case of
+ * error. This pointer must be freeable or NULL. <al> is an arg_list serving
+ * as a list head to report missing dependencies. It may be NULL if such
+ * dependencies are not allowed.
+ *
+ * Right now, the only accepted syntax is :
+ * <subject> [<value>...]
+ */
+struct acl_expr *parse_acl_expr(const char **args, char **err, struct arg_list *al,
+ const char *file, int line)
+{
+ __label__ out_return, out_free_expr;
+ struct acl_expr *expr;
+ struct acl_keyword *aclkw;
+ int refflags, patflags;
+ const char *arg;
+ struct sample_expr *smp = NULL;
+ int idx = 0;
+ char *ckw = NULL;
+ const char *begw;
+ const char *endw;
+ const char *endt;
+ int cur_type;
+ int nbargs;
+ int operator = STD_OP_EQ;
+ int op;
+ int contain_colon, have_dot;
+ const char *dot;
+ signed long long value, minor;
+ /* The following buffer contain two numbers, a ':' separator and the final \0. */
+ char buffer[NB_LLMAX_STR + 1 + NB_LLMAX_STR + 1];
+ int is_loaded;
+ int unique_id;
+ char *error;
+ struct pat_ref *ref;
+ struct pattern_expr *pattern_expr;
+ int load_as_map = 0;
+ int acl_conv_found = 0;
+
+ /* First, we look for an ACL keyword. And if we don't find one, then
+ * we look for a sample fetch expression starting with a sample fetch
+ * keyword.
+ */
+
+ if (al) {
+ al->ctx = ARGC_ACL; // to report errors while resolving args late
+ al->kw = *args;
+ al->conv = NULL;
+ }
+
+ aclkw = find_acl_kw(args[0]);
+ if (aclkw) {
+ /* OK we have a real ACL keyword */
+
+ /* build new sample expression for this ACL */
+ smp = calloc(1, sizeof(*smp));
+ if (!smp) {
+ memprintf(err, "out of memory when parsing ACL expression");
+ goto out_return;
+ }
+ LIST_INIT(&(smp->conv_exprs));
+ smp->fetch = aclkw->smp;
+ smp->arg_p = empty_arg_list;
+
+ /* look for the beginning of the subject arguments */
+ for (arg = args[0]; is_idchar(*arg); arg++)
+ ;
+
+ /* At this point, we have :
+ * - args[0] : beginning of the keyword
+ * - arg : end of the keyword, first character not part of keyword
+ */
+ nbargs = make_arg_list(arg, -1, smp->fetch->arg_mask, &smp->arg_p,
+ err, &endt, NULL, al);
+ if (nbargs < 0) {
+ /* note that make_arg_list will have set <err> here */
+ memprintf(err, "ACL keyword '%s' : %s", aclkw->kw, *err);
+ goto out_free_smp;
+ }
+
+ if (!smp->arg_p) {
+ smp->arg_p = empty_arg_list;
+ }
+ else if (smp->fetch->val_args && !smp->fetch->val_args(smp->arg_p, err)) {
+ /* invalid keyword argument, error must have been
+ * set by val_args().
+ */
+ memprintf(err, "in argument to '%s', %s", aclkw->kw, *err);
+ goto out_free_smp;
+ }
+ arg = endt;
+
+ /* look for the beginning of the converters list. Those directly attached
+ * to the ACL keyword are found just after <arg> which points to the comma.
+ * If we find any converter, then we don't use the ACL keyword's match
+ * anymore but the one related to the converter's output type.
+ */
+ cur_type = smp->fetch->out_type;
+ while (*arg) {
+ struct sample_conv *conv;
+ struct sample_conv_expr *conv_expr;
+ int err_arg;
+ int argcnt;
+
+ if (*arg && *arg != ',') {
+ if (ckw)
+ memprintf(err, "ACL keyword '%s' : missing comma after converter '%s'.",
+ aclkw->kw, ckw);
+ else
+ memprintf(err, "ACL keyword '%s' : missing comma after fetch keyword.",
+ aclkw->kw);
+ goto out_free_smp;
+ }
+
+ /* FIXME: how long should we support such idiocies ? Maybe we
+ * should already warn ?
+ */
+ while (*arg == ',') /* then trailing commas */
+ arg++;
+
+ begw = arg; /* start of converter keyword */
+
+ if (!*begw)
+ /* none ? end of converters */
+ break;
+
+ for (endw = begw; is_idchar(*endw); endw++)
+ ;
+
+ free(ckw);
+ ckw = my_strndup(begw, endw - begw);
+
+ conv = find_sample_conv(begw, endw - begw);
+ if (!conv) {
+ /* Unknown converter method */
+ memprintf(err, "ACL keyword '%s' : unknown converter '%s'.",
+ aclkw->kw, ckw);
+ goto out_free_smp;
+ }
+
+ arg = endw;
+
+ if (conv->in_type >= SMP_TYPES || conv->out_type >= SMP_TYPES) {
+ memprintf(err, "ACL keyword '%s' : returns type of converter '%s' is unknown.",
+ aclkw->kw, ckw);
+ goto out_free_smp;
+ }
+
+ /* If impossible type conversion */
+ if (!sample_casts[cur_type][conv->in_type]) {
+ memprintf(err, "ACL keyword '%s' : converter '%s' cannot be applied.",
+ aclkw->kw, ckw);
+ goto out_free_smp;
+ }
+
+ cur_type = conv->out_type;
+ conv_expr = calloc(1, sizeof(*conv_expr));
+ if (!conv_expr)
+ goto out_free_smp;
+
+ LIST_APPEND(&(smp->conv_exprs), &(conv_expr->list));
+ conv_expr->conv = conv;
+ acl_conv_found = 1;
+
+ if (al) {
+ al->kw = smp->fetch->kw;
+ al->conv = conv_expr->conv->kw;
+ }
+ argcnt = make_arg_list(endw, -1, conv->arg_mask, &conv_expr->arg_p, err, &arg, &err_arg, al);
+ if (argcnt < 0) {
+ memprintf(err, "ACL keyword '%s' : invalid arg %d in converter '%s' : %s.",
+ aclkw->kw, err_arg+1, ckw, *err);
+ goto out_free_smp;
+ }
+
+ if (argcnt && !conv->arg_mask) {
+ memprintf(err, "converter '%s' does not support any args", ckw);
+ goto out_free_smp;
+ }
+
+ if (!conv_expr->arg_p)
+ conv_expr->arg_p = empty_arg_list;
+
+ if (conv->val_args && !conv->val_args(conv_expr->arg_p, conv, file, line, err)) {
+ memprintf(err, "ACL keyword '%s' : invalid args in converter '%s' : %s.",
+ aclkw->kw, ckw, *err);
+ goto out_free_smp;
+ }
+ }
+ ha_free(&ckw);
+ }
+ else {
+ /* This is not an ACL keyword, so we hope this is a sample fetch
+ * keyword that we're going to transparently use as an ACL. If
+ * so, we retrieve a completely parsed expression with args and
+ * convs already done.
+ */
+ smp = sample_parse_expr((char **)args, &idx, file, line, err, al, NULL);
+ if (!smp) {
+ memprintf(err, "%s in ACL expression '%s'", *err, *args);
+ goto out_return;
+ }
+ cur_type = smp_expr_output_type(smp);
+ }
+
+ expr = calloc(1, sizeof(*expr));
+ if (!expr) {
+ memprintf(err, "out of memory when parsing ACL expression");
+ goto out_free_smp;
+ }
+
+ pattern_init_head(&expr->pat);
+
+ expr->pat.expect_type = cur_type;
+ expr->smp = smp;
+ expr->kw = smp->fetch->kw;
+ smp = NULL; /* don't free it anymore */
+
+ if (aclkw && !acl_conv_found) {
+ expr->kw = aclkw->kw;
+ expr->pat.parse = aclkw->parse ? aclkw->parse : pat_parse_fcts[aclkw->match_type];
+ expr->pat.index = aclkw->index ? aclkw->index : pat_index_fcts[aclkw->match_type];
+ expr->pat.match = aclkw->match ? aclkw->match : pat_match_fcts[aclkw->match_type];
+ expr->pat.prune = aclkw->prune ? aclkw->prune : pat_prune_fcts[aclkw->match_type];
+ }
+
+ if (!expr->pat.parse) {
+ /* Parse/index/match functions depend on the expression type,
+ * so we have to map them now. Some types can be automatically
+ * converted.
+ */
+ switch (cur_type) {
+ case SMP_T_BOOL:
+ expr->pat.parse = pat_parse_fcts[PAT_MATCH_BOOL];
+ expr->pat.index = pat_index_fcts[PAT_MATCH_BOOL];
+ expr->pat.match = pat_match_fcts[PAT_MATCH_BOOL];
+ expr->pat.prune = pat_prune_fcts[PAT_MATCH_BOOL];
+ expr->pat.expect_type = pat_match_types[PAT_MATCH_BOOL];
+ break;
+ case SMP_T_SINT:
+ expr->pat.parse = pat_parse_fcts[PAT_MATCH_INT];
+ expr->pat.index = pat_index_fcts[PAT_MATCH_INT];
+ expr->pat.match = pat_match_fcts[PAT_MATCH_INT];
+ expr->pat.prune = pat_prune_fcts[PAT_MATCH_INT];
+ expr->pat.expect_type = pat_match_types[PAT_MATCH_INT];
+ break;
+ case SMP_T_ADDR:
+ case SMP_T_IPV4:
+ case SMP_T_IPV6:
+ expr->pat.parse = pat_parse_fcts[PAT_MATCH_IP];
+ expr->pat.index = pat_index_fcts[PAT_MATCH_IP];
+ expr->pat.match = pat_match_fcts[PAT_MATCH_IP];
+ expr->pat.prune = pat_prune_fcts[PAT_MATCH_IP];
+ expr->pat.expect_type = pat_match_types[PAT_MATCH_IP];
+ break;
+ case SMP_T_STR:
+ expr->pat.parse = pat_parse_fcts[PAT_MATCH_STR];
+ expr->pat.index = pat_index_fcts[PAT_MATCH_STR];
+ expr->pat.match = pat_match_fcts[PAT_MATCH_STR];
+ expr->pat.prune = pat_prune_fcts[PAT_MATCH_STR];
+ expr->pat.expect_type = pat_match_types[PAT_MATCH_STR];
+ break;
+ }
+ }
+
+ /* Additional check to protect against common mistakes */
+ if (expr->pat.parse && cur_type != SMP_T_BOOL && !*args[1]) {
+ ha_warning("parsing acl keyword '%s' :\n"
+ " no pattern to match against were provided, so this ACL will never match.\n"
+ " If this is what you intended, please add '--' to get rid of this warning.\n"
+ " If you intended to match only for existence, please use '-m found'.\n"
+ " If you wanted to force an int to match as a bool, please use '-m bool'.\n"
+ "\n",
+ args[0]);
+ }
+
+ args++;
+
+ /* check for options before patterns. Supported options are :
+ * -i : ignore case for all patterns by default
+ * -f : read patterns from those files
+ * -m : force matching method (must be used before -f)
+ * -M : load the file as map file
+ * -u : force the unique id of the acl
+ * -- : everything after this is not an option
+ */
+ refflags = PAT_REF_ACL;
+ patflags = 0;
+ is_loaded = 0;
+ unique_id = -1;
+ while (**args == '-') {
+ if (strcmp(*args, "-i") == 0)
+ patflags |= PAT_MF_IGNORE_CASE;
+ else if (strcmp(*args, "-n") == 0)
+ patflags |= PAT_MF_NO_DNS;
+ else if (strcmp(*args, "-u") == 0) {
+ unique_id = strtol(args[1], &error, 10);
+ if (*error != '\0') {
+ memprintf(err, "the argument of -u must be an integer");
+ goto out_free_expr;
+ }
+
+ /* Check if this id is really unique. */
+ if (pat_ref_lookupid(unique_id)) {
+ memprintf(err, "the id is already used");
+ goto out_free_expr;
+ }
+
+ args++;
+ }
+ else if (strcmp(*args, "-f") == 0) {
+ if (!expr->pat.parse) {
+ memprintf(err, "matching method must be specified first (using '-m') when using a sample fetch of this type ('%s')", expr->kw);
+ goto out_free_expr;
+ }
+
+ if (!pattern_read_from_file(&expr->pat, refflags, args[1], patflags, load_as_map, err, file, line))
+ goto out_free_expr;
+ is_loaded = 1;
+ args++;
+ }
+ else if (strcmp(*args, "-m") == 0) {
+ int idx;
+
+ if (is_loaded) {
+ memprintf(err, "'-m' must only be specified before patterns and files in parsing ACL expression");
+ goto out_free_expr;
+ }
+
+ idx = pat_find_match_name(args[1]);
+ if (idx < 0) {
+ memprintf(err, "unknown matching method '%s' when parsing ACL expression", args[1]);
+ goto out_free_expr;
+ }
+
+ /* Note: -m found is always valid, bool/int are compatible, str/bin/reg/len are compatible */
+ if (idx != PAT_MATCH_FOUND && !sample_casts[cur_type][pat_match_types[idx]]) {
+ memprintf(err, "matching method '%s' cannot be used with fetch keyword '%s'", args[1], expr->kw);
+ goto out_free_expr;
+ }
+ expr->pat.parse = pat_parse_fcts[idx];
+ expr->pat.index = pat_index_fcts[idx];
+ expr->pat.match = pat_match_fcts[idx];
+ expr->pat.prune = pat_prune_fcts[idx];
+ expr->pat.expect_type = pat_match_types[idx];
+ args++;
+ }
+ else if (strcmp(*args, "-M") == 0) {
+ refflags |= PAT_REF_MAP;
+ load_as_map = 1;
+ }
+ else if (strcmp(*args, "--") == 0) {
+ args++;
+ break;
+ }
+ else {
+ memprintf(err, "'%s' is not a valid ACL option. Please use '--' before any pattern beginning with a '-'", args[0]);
+ goto out_free_expr;
+ break;
+ }
+ args++;
+ }
+
+ if (!expr->pat.parse) {
+ memprintf(err, "matching method must be specified first (using '-m') when using a sample fetch of this type ('%s')", expr->kw);
+ goto out_free_expr;
+ }
+
+ /* Create displayed reference */
+ snprintf(trash.area, trash.size, "acl '%s' file '%s' line %d",
+ expr->kw, file, line);
+ trash.area[trash.size - 1] = '\0';
+
+ /* Create new pattern reference. */
+ ref = pat_ref_newid(unique_id, trash.area, PAT_REF_ACL);
+ if (!ref) {
+ memprintf(err, "memory error");
+ goto out_free_expr;
+ }
+
+ /* Create new pattern expression associated to this reference. */
+ pattern_expr = pattern_new_expr(&expr->pat, ref, patflags, err, NULL);
+ if (!pattern_expr)
+ goto out_free_expr;
+
+ /* now parse all patterns */
+ while (**args) {
+ arg = *args;
+
+ /* Compatibility layer. Each pattern can parse only one string per pattern,
+ * but the pat_parser_int() and pat_parse_dotted_ver() parsers were need
+ * optionally two operators. The first operator is the match method: eq,
+ * le, lt, ge and gt. pat_parse_int() and pat_parse_dotted_ver() functions
+ * can have a compatibility syntax based on ranges:
+ *
+ * pat_parse_int():
+ *
+ * "eq x" -> "x" or "x:x"
+ * "le x" -> ":x"
+ * "lt x" -> ":y" (with y = x - 1)
+ * "ge x" -> "x:"
+ * "gt x" -> "y:" (with y = x + 1)
+ *
+ * pat_parse_dotted_ver():
+ *
+ * "eq x.y" -> "x.y" or "x.y:x.y"
+ * "le x.y" -> ":x.y"
+ * "lt x.y" -> ":w.z" (with w.z = x.y - 1)
+ * "ge x.y" -> "x.y:"
+ * "gt x.y" -> "w.z:" (with w.z = x.y + 1)
+ *
+ * If y is not present, assume that is "0".
+ *
+ * The syntax eq, le, lt, ge and gt are proper to the acl syntax. The
+ * following block of code detect the operator, and rewrite each value
+ * in parsable string.
+ */
+ if (expr->pat.parse == pat_parse_int ||
+ expr->pat.parse == pat_parse_dotted_ver) {
+ /* Check for operator. If the argument is operator, memorise it and
+ * continue to the next argument.
+ */
+ op = get_std_op(arg);
+ if (op != -1) {
+ operator = op;
+ args++;
+ continue;
+ }
+
+ /* Check if the pattern contain ':' or '-' character. */
+ contain_colon = (strchr(arg, ':') || strchr(arg, '-'));
+
+ /* If the pattern contain ':' or '-' character, give it to the parser as is.
+ * If no contain ':' and operator is STD_OP_EQ, give it to the parser as is.
+ * In other case, try to convert the value according with the operator.
+ */
+ if (!contain_colon && operator != STD_OP_EQ) {
+ /* Search '.' separator. */
+ dot = strchr(arg, '.');
+ if (!dot) {
+ have_dot = 0;
+ minor = 0;
+ dot = arg + strlen(arg);
+ }
+ else
+ have_dot = 1;
+
+ /* convert the integer minor part for the pat_parse_dotted_ver() function. */
+ if (expr->pat.parse == pat_parse_dotted_ver && have_dot) {
+ if (strl2llrc(dot+1, strlen(dot+1), &minor) != 0) {
+ memprintf(err, "'%s' is neither a number nor a supported operator", arg);
+ goto out_free_expr;
+ }
+ if (minor >= 65536) {
+ memprintf(err, "'%s' contains too large a minor value", arg);
+ goto out_free_expr;
+ }
+ }
+
+ /* convert the integer value for the pat_parse_int() function, and the
+ * integer major part for the pat_parse_dotted_ver() function.
+ */
+ if (strl2llrc(arg, dot - arg, &value) != 0) {
+ memprintf(err, "'%s' is neither a number nor a supported operator", arg);
+ goto out_free_expr;
+ }
+ if (expr->pat.parse == pat_parse_dotted_ver) {
+ if (value >= 65536) {
+ memprintf(err, "'%s' contains too large a major value", arg);
+ goto out_free_expr;
+ }
+ value = (value << 16) | (minor & 0xffff);
+ }
+
+ switch (operator) {
+
+ case STD_OP_EQ: /* this case is not possible. */
+ memprintf(err, "internal error");
+ goto out_free_expr;
+
+ case STD_OP_GT:
+ value++; /* gt = ge + 1 */
+ /* fall through */
+
+ case STD_OP_GE:
+ if (expr->pat.parse == pat_parse_int)
+ snprintf(buffer, NB_LLMAX_STR+NB_LLMAX_STR+2, "%lld:", value);
+ else
+ snprintf(buffer, NB_LLMAX_STR+NB_LLMAX_STR+2, "%lld.%lld:",
+ value >> 16, value & 0xffff);
+ arg = buffer;
+ break;
+
+ case STD_OP_LT:
+ value--; /* lt = le - 1 */
+ /* fall through */
+
+ case STD_OP_LE:
+ if (expr->pat.parse == pat_parse_int)
+ snprintf(buffer, NB_LLMAX_STR+NB_LLMAX_STR+2, ":%lld", value);
+ else
+ snprintf(buffer, NB_LLMAX_STR+NB_LLMAX_STR+2, ":%lld.%lld",
+ value >> 16, value & 0xffff);
+ arg = buffer;
+ break;
+ }
+ }
+ }
+
+ /* Add sample to the reference, and try to compile it fior each pattern
+ * using this value.
+ */
+ if (!pat_ref_add(ref, arg, NULL, err))
+ goto out_free_expr;
+ args++;
+ }
+
+ return expr;
+
+ out_free_expr:
+ prune_acl_expr(expr);
+ free(expr);
+ out_free_smp:
+ free(ckw);
+ free(smp);
+ out_return:
+ return NULL;
+}
+
+/* Purge everything in the acl <acl>, then return <acl>. */
+struct acl *prune_acl(struct acl *acl) {
+
+ struct acl_expr *expr, *exprb;
+
+ free(acl->name);
+
+ list_for_each_entry_safe(expr, exprb, &acl->expr, list) {
+ LIST_DELETE(&expr->list);
+ prune_acl_expr(expr);
+ free(expr);
+ }
+
+ return acl;
+}
+
+/* Parse an ACL with the name starting at <args>[0], and with a list of already
+ * known ACLs in <acl>. If the ACL was not in the list, it will be added.
+ * A pointer to that ACL is returned. If the ACL has an empty name, then it's
+ * an anonymous one and it won't be merged with any other one. If <err> is not
+ * NULL, it will be filled with an appropriate error. This pointer must be
+ * freeable or NULL. <al> is the arg_list serving as a head for unresolved
+ * dependencies. It may be NULL if such dependencies are not allowed.
+ *
+ * args syntax: <aclname> <acl_expr>
+ */
+struct acl *parse_acl(const char **args, struct list *known_acl, char **err, struct arg_list *al,
+ const char *file, int line)
+{
+ __label__ out_return, out_free_acl_expr, out_free_name;
+ struct acl *cur_acl;
+ struct acl_expr *acl_expr;
+ char *name;
+ const char *pos;
+
+ if (**args && (pos = invalid_char(*args))) {
+ memprintf(err, "invalid character in ACL name : '%c'", *pos);
+ goto out_return;
+ }
+
+ acl_expr = parse_acl_expr(args + 1, err, al, file, line);
+ if (!acl_expr) {
+ /* parse_acl_expr will have filled <err> here */
+ goto out_return;
+ }
+
+ /* Check for args beginning with an opening parenthesis just after the
+ * subject, as this is almost certainly a typo. Right now we can only
+ * emit a warning, so let's do so.
+ */
+ if (!strchr(args[1], '(') && *args[2] == '(')
+ ha_warning("parsing acl '%s' :\n"
+ " matching '%s' for pattern '%s' is likely a mistake and probably\n"
+ " not what you want. Maybe you need to remove the extraneous space before '('.\n"
+ " If you are really sure this is not an error, please insert '--' between the\n"
+ " match and the pattern to make this warning message disappear.\n",
+ args[0], args[1], args[2]);
+
+ if (*args[0])
+ cur_acl = find_acl_by_name(args[0], known_acl);
+ else
+ cur_acl = NULL;
+
+ if (!cur_acl) {
+ name = strdup(args[0]);
+ if (!name) {
+ memprintf(err, "out of memory when parsing ACL");
+ goto out_free_acl_expr;
+ }
+ cur_acl = calloc(1, sizeof(*cur_acl));
+ if (cur_acl == NULL) {
+ memprintf(err, "out of memory when parsing ACL");
+ goto out_free_name;
+ }
+
+ LIST_INIT(&cur_acl->expr);
+ LIST_APPEND(known_acl, &cur_acl->list);
+ cur_acl->name = name;
+ }
+
+ /* We want to know what features the ACL needs (typically HTTP parsing),
+ * and where it may be used. If an ACL relies on multiple matches, it is
+ * OK if at least one of them may match in the context where it is used.
+ */
+ cur_acl->use |= acl_expr->smp->fetch->use;
+ cur_acl->val |= acl_expr->smp->fetch->val;
+ LIST_APPEND(&cur_acl->expr, &acl_expr->list);
+ return cur_acl;
+
+ out_free_name:
+ free(name);
+ out_free_acl_expr:
+ prune_acl_expr(acl_expr);
+ free(acl_expr);
+ out_return:
+ return NULL;
+}
+
+/* Some useful ACLs provided by default. Only those used are allocated. */
+
+const struct {
+ const char *name;
+ const char *expr[4]; /* put enough for longest expression */
+} default_acl_list[] = {
+ { .name = "TRUE", .expr = {"always_true",""}},
+ { .name = "FALSE", .expr = {"always_false",""}},
+ { .name = "LOCALHOST", .expr = {"src","127.0.0.1/8","::1",""}},
+ { .name = "HTTP", .expr = {"req.proto_http",""}},
+ { .name = "HTTP_1.0", .expr = {"req.ver","1.0",""}},
+ { .name = "HTTP_1.1", .expr = {"req.ver","1.1",""}},
+ { .name = "HTTP_2.0", .expr = {"req.ver","2.0",""}},
+ { .name = "METH_CONNECT", .expr = {"method","CONNECT",""}},
+ { .name = "METH_DELETE", .expr = {"method","DELETE",""}},
+ { .name = "METH_GET", .expr = {"method","GET","HEAD",""}},
+ { .name = "METH_HEAD", .expr = {"method","HEAD",""}},
+ { .name = "METH_OPTIONS", .expr = {"method","OPTIONS",""}},
+ { .name = "METH_POST", .expr = {"method","POST",""}},
+ { .name = "METH_PUT", .expr = {"method","PUT",""}},
+ { .name = "METH_TRACE", .expr = {"method","TRACE",""}},
+ { .name = "HTTP_URL_ABS", .expr = {"url_reg","^[^/:]*://",""}},
+ { .name = "HTTP_URL_SLASH", .expr = {"url_beg","/",""}},
+ { .name = "HTTP_URL_STAR", .expr = {"url","*",""}},
+ { .name = "HTTP_CONTENT", .expr = {"req.hdr_val(content-length)","gt","0",""}},
+ { .name = "RDP_COOKIE", .expr = {"req.rdp_cookie_cnt","gt","0",""}},
+ { .name = "REQ_CONTENT", .expr = {"req.len","gt","0",""}},
+ { .name = "WAIT_END", .expr = {"wait_end",""}},
+ { .name = NULL, .expr = {""}}
+};
+
+/* Find a default ACL from the default_acl list, compile it and return it.
+ * If the ACL is not found, NULL is returned. In theory, it cannot fail,
+ * except when default ACLs are broken, in which case it will return NULL.
+ * If <known_acl> is not NULL, the ACL will be queued at its tail. If <err> is
+ * not NULL, it will be filled with an error message if an error occurs. This
+ * pointer must be freeable or NULL. <al> is an arg_list serving as a list head
+ * to report missing dependencies. It may be NULL if such dependencies are not
+ * allowed.
+ */
+static struct acl *find_acl_default(const char *acl_name, struct list *known_acl,
+ char **err, struct arg_list *al,
+ const char *file, int line)
+{
+ __label__ out_return, out_free_acl_expr, out_free_name;
+ struct acl *cur_acl;
+ struct acl_expr *acl_expr;
+ char *name;
+ int index;
+
+ for (index = 0; default_acl_list[index].name != NULL; index++) {
+ if (strcmp(acl_name, default_acl_list[index].name) == 0)
+ break;
+ }
+
+ if (default_acl_list[index].name == NULL) {
+ memprintf(err, "no such ACL : '%s'", acl_name);
+ return NULL;
+ }
+
+ acl_expr = parse_acl_expr((const char **)default_acl_list[index].expr, err, al, file, line);
+ if (!acl_expr) {
+ /* parse_acl_expr must have filled err here */
+ goto out_return;
+ }
+
+ name = strdup(acl_name);
+ if (!name) {
+ memprintf(err, "out of memory when building default ACL '%s'", acl_name);
+ goto out_free_acl_expr;
+ }
+
+ cur_acl = calloc(1, sizeof(*cur_acl));
+ if (cur_acl == NULL) {
+ memprintf(err, "out of memory when building default ACL '%s'", acl_name);
+ goto out_free_name;
+ }
+
+ cur_acl->name = name;
+ cur_acl->use |= acl_expr->smp->fetch->use;
+ cur_acl->val |= acl_expr->smp->fetch->val;
+ LIST_INIT(&cur_acl->expr);
+ LIST_APPEND(&cur_acl->expr, &acl_expr->list);
+ if (known_acl)
+ LIST_APPEND(known_acl, &cur_acl->list);
+
+ return cur_acl;
+
+ out_free_name:
+ free(name);
+ out_free_acl_expr:
+ prune_acl_expr(acl_expr);
+ free(acl_expr);
+ out_return:
+ return NULL;
+}
+
+/* Purge everything in the acl_cond <cond>, then return <cond>. */
+struct acl_cond *prune_acl_cond(struct acl_cond *cond)
+{
+ struct acl_term_suite *suite, *tmp_suite;
+ struct acl_term *term, *tmp_term;
+
+ /* iterate through all term suites and free all terms and all suites */
+ list_for_each_entry_safe(suite, tmp_suite, &cond->suites, list) {
+ list_for_each_entry_safe(term, tmp_term, &suite->terms, list)
+ free(term);
+ free(suite);
+ }
+ return cond;
+}
+
+/* Parse an ACL condition starting at <args>[0], relying on a list of already
+ * known ACLs passed in <known_acl>. The new condition is returned (or NULL in
+ * case of low memory). Supports multiple conditions separated by "or". If
+ * <err> is not NULL, it will be filled with a pointer to an error message in
+ * case of error, that the caller is responsible for freeing. The initial
+ * location must either be freeable or NULL. The list <al> serves as a list head
+ * for unresolved dependencies. It may be NULL if such dependencies are not
+ * allowed.
+ */
+struct acl_cond *parse_acl_cond(const char **args, struct list *known_acl,
+ enum acl_cond_pol pol, char **err, struct arg_list *al,
+ const char *file, int line)
+{
+ __label__ out_return, out_free_suite, out_free_term;
+ int arg, neg;
+ const char *word;
+ struct acl *cur_acl;
+ struct acl_term *cur_term;
+ struct acl_term_suite *cur_suite;
+ struct acl_cond *cond;
+ unsigned int suite_val;
+
+ cond = calloc(1, sizeof(*cond));
+ if (cond == NULL) {
+ memprintf(err, "out of memory when parsing condition");
+ goto out_return;
+ }
+
+ LIST_INIT(&cond->list);
+ LIST_INIT(&cond->suites);
+ cond->pol = pol;
+ cond->val = 0;
+
+ cur_suite = NULL;
+ suite_val = ~0U;
+ neg = 0;
+ for (arg = 0; *args[arg]; arg++) {
+ word = args[arg];
+
+ /* remove as many exclamation marks as we can */
+ while (*word == '!') {
+ neg = !neg;
+ word++;
+ }
+
+ /* an empty word is allowed because we cannot force the user to
+ * always think about not leaving exclamation marks alone.
+ */
+ if (!*word)
+ continue;
+
+ if (strcasecmp(word, "or") == 0 || strcmp(word, "||") == 0) {
+ /* new term suite */
+ cond->val |= suite_val;
+ suite_val = ~0U;
+ cur_suite = NULL;
+ neg = 0;
+ continue;
+ }
+
+ if (strcmp(word, "{") == 0) {
+ /* we may have a complete ACL expression between two braces,
+ * find the last one.
+ */
+ int arg_end = arg + 1;
+ const char **args_new;
+
+ while (*args[arg_end] && strcmp(args[arg_end], "}") != 0)
+ arg_end++;
+
+ if (!*args[arg_end]) {
+ memprintf(err, "missing closing '}' in condition");
+ goto out_free_suite;
+ }
+
+ args_new = calloc(1, (arg_end - arg + 1) * sizeof(*args_new));
+ if (!args_new) {
+ memprintf(err, "out of memory when parsing condition");
+ goto out_free_suite;
+ }
+
+ args_new[0] = "";
+ memcpy(args_new + 1, args + arg + 1, (arg_end - arg) * sizeof(*args_new));
+ args_new[arg_end - arg] = "";
+ cur_acl = parse_acl(args_new, known_acl, err, al, file, line);
+ free(args_new);
+
+ if (!cur_acl) {
+ /* note that parse_acl() must have filled <err> here */
+ goto out_free_suite;
+ }
+ arg = arg_end;
+ }
+ else {
+ /* search for <word> in the known ACL names. If we do not find
+ * it, let's look for it in the default ACLs, and if found, add
+ * it to the list of ACLs of this proxy. This makes it possible
+ * to override them.
+ */
+ cur_acl = find_acl_by_name(word, known_acl);
+ if (cur_acl == NULL) {
+ cur_acl = find_acl_default(word, known_acl, err, al, file, line);
+ if (cur_acl == NULL) {
+ /* note that find_acl_default() must have filled <err> here */
+ goto out_free_suite;
+ }
+ }
+ }
+
+ cur_term = calloc(1, sizeof(*cur_term));
+ if (cur_term == NULL) {
+ memprintf(err, "out of memory when parsing condition");
+ goto out_free_suite;
+ }
+
+ cur_term->acl = cur_acl;
+ cur_term->neg = neg;
+
+ /* Here it is a bit complex. The acl_term_suite is a conjunction
+ * of many terms. It may only be used if all of its terms are
+ * usable at the same time. So the suite's validity domain is an
+ * AND between all ACL keywords' ones. But, the global condition
+ * is valid if at least one term suite is OK. So it's an OR between
+ * all of their validity domains. We could emit a warning as soon
+ * as suite_val is null because it means that the last ACL is not
+ * compatible with the previous ones. Let's remain simple for now.
+ */
+ cond->use |= cur_acl->use;
+ suite_val &= cur_acl->val;
+
+ if (!cur_suite) {
+ cur_suite = calloc(1, sizeof(*cur_suite));
+ if (cur_suite == NULL) {
+ memprintf(err, "out of memory when parsing condition");
+ goto out_free_term;
+ }
+ LIST_INIT(&cur_suite->terms);
+ LIST_APPEND(&cond->suites, &cur_suite->list);
+ }
+ LIST_APPEND(&cur_suite->terms, &cur_term->list);
+ neg = 0;
+ }
+
+ cond->val |= suite_val;
+ return cond;
+
+ out_free_term:
+ free(cur_term);
+ out_free_suite:
+ prune_acl_cond(cond);
+ free(cond);
+ out_return:
+ return NULL;
+}
+
+/* Builds an ACL condition starting at the if/unless keyword. The complete
+ * condition is returned. NULL is returned in case of error or if the first
+ * word is neither "if" nor "unless". It automatically sets the file name and
+ * the line number in the condition for better error reporting, and sets the
+ * HTTP intiailization requirements in the proxy. If <err> is not NULL, it will
+ * be filled with a pointer to an error message in case of error, that the
+ * caller is responsible for freeing. The initial location must either be
+ * freeable or NULL.
+ */
+struct acl_cond *build_acl_cond(const char *file, int line, struct list *known_acl,
+ struct proxy *px, const char **args, char **err)
+{
+ enum acl_cond_pol pol = ACL_COND_NONE;
+ struct acl_cond *cond = NULL;
+
+ if (err)
+ *err = NULL;
+
+ if (strcmp(*args, "if") == 0) {
+ pol = ACL_COND_IF;
+ args++;
+ }
+ else if (strcmp(*args, "unless") == 0) {
+ pol = ACL_COND_UNLESS;
+ args++;
+ }
+ else {
+ memprintf(err, "conditions must start with either 'if' or 'unless'");
+ return NULL;
+ }
+
+ cond = parse_acl_cond(args, known_acl, pol, err, &px->conf.args, file, line);
+ if (!cond) {
+ /* note that parse_acl_cond must have filled <err> here */
+ return NULL;
+ }
+
+ cond->file = file;
+ cond->line = line;
+ px->http_needed |= !!(cond->use & SMP_USE_HTTP_ANY);
+ return cond;
+}
+
+/* Execute condition <cond> and return either ACL_TEST_FAIL, ACL_TEST_MISS or
+ * ACL_TEST_PASS depending on the test results. ACL_TEST_MISS may only be
+ * returned if <opt> does not contain SMP_OPT_FINAL, indicating that incomplete
+ * data is being examined. The function automatically sets SMP_OPT_ITERATE. This
+ * function only computes the condition, it does not apply the polarity required
+ * by IF/UNLESS, it's up to the caller to do this using something like this :
+ *
+ * res = acl_pass(res);
+ * if (res == ACL_TEST_MISS)
+ * return 0;
+ * if (cond->pol == ACL_COND_UNLESS)
+ * res = !res;
+ */
+enum acl_test_res acl_exec_cond(struct acl_cond *cond, struct proxy *px, struct session *sess, struct stream *strm, unsigned int opt)
+{
+ __label__ fetch_next;
+ struct acl_term_suite *suite;
+ struct acl_term *term;
+ struct acl_expr *expr;
+ struct acl *acl;
+ struct sample smp;
+ enum acl_test_res acl_res, suite_res, cond_res;
+
+ /* ACLs are iterated over all values, so let's always set the flag to
+ * indicate this to the fetch functions.
+ */
+ opt |= SMP_OPT_ITERATE;
+
+ /* We're doing a logical OR between conditions so we initialize to FAIL.
+ * The MISS status is propagated down from the suites.
+ */
+ cond_res = ACL_TEST_FAIL;
+ list_for_each_entry(suite, &cond->suites, list) {
+ /* Evaluate condition suite <suite>. We stop at the first term
+ * which returns ACL_TEST_FAIL. The MISS status is still propagated
+ * in case of uncertainty in the result.
+ */
+
+ /* we're doing a logical AND between terms, so we must set the
+ * initial value to PASS.
+ */
+ suite_res = ACL_TEST_PASS;
+ list_for_each_entry(term, &suite->terms, list) {
+ acl = term->acl;
+
+ /* FIXME: use cache !
+ * check acl->cache_idx for this.
+ */
+
+ /* ACL result not cached. Let's scan all the expressions
+ * and use the first one to match.
+ */
+ acl_res = ACL_TEST_FAIL;
+ list_for_each_entry(expr, &acl->expr, list) {
+ /* we need to reset context and flags */
+ memset(&smp, 0, sizeof(smp));
+ fetch_next:
+ if (!sample_process(px, sess, strm, opt, expr->smp, &smp)) {
+ /* maybe we could not fetch because of missing data */
+ if (smp.flags & SMP_F_MAY_CHANGE && !(opt & SMP_OPT_FINAL))
+ acl_res |= ACL_TEST_MISS;
+ continue;
+ }
+
+ acl_res |= pat2acl(pattern_exec_match(&expr->pat, &smp, 0));
+ /*
+ * OK now acl_res holds the result of this expression
+ * as one of ACL_TEST_FAIL, ACL_TEST_MISS or ACL_TEST_PASS.
+ *
+ * Then if (!MISS) we can cache the result, and put
+ * (smp.flags & SMP_F_VOLATILE) in the cache flags.
+ *
+ * FIXME: implement cache.
+ *
+ */
+
+ /* we're ORing these terms, so a single PASS is enough */
+ if (acl_res == ACL_TEST_PASS)
+ break;
+
+ if (smp.flags & SMP_F_NOT_LAST)
+ goto fetch_next;
+
+ /* sometimes we know the fetched data is subject to change
+ * later and give another chance for a new match (eg: request
+ * size, time, ...)
+ */
+ if (smp.flags & SMP_F_MAY_CHANGE && !(opt & SMP_OPT_FINAL))
+ acl_res |= ACL_TEST_MISS;
+ }
+ /*
+ * Here we have the result of an ACL (cached or not).
+ * ACLs are combined, negated or not, to form conditions.
+ */
+
+ if (term->neg)
+ acl_res = acl_neg(acl_res);
+
+ suite_res &= acl_res;
+
+ /* we're ANDing these terms, so a single FAIL or MISS is enough */
+ if (suite_res != ACL_TEST_PASS)
+ break;
+ }
+ cond_res |= suite_res;
+
+ /* we're ORing these terms, so a single PASS is enough */
+ if (cond_res == ACL_TEST_PASS)
+ break;
+ }
+ return cond_res;
+}
+
+/* Returns a pointer to the first ACL conflicting with usage at place <where>
+ * which is one of the SMP_VAL_* bits indicating a check place, or NULL if
+ * no conflict is found. Only full conflicts are detected (ACL is not usable).
+ * Use the next function to check for useless keywords.
+ */
+const struct acl *acl_cond_conflicts(const struct acl_cond *cond, unsigned int where)
+{
+ struct acl_term_suite *suite;
+ struct acl_term *term;
+ struct acl *acl;
+
+ list_for_each_entry(suite, &cond->suites, list) {
+ list_for_each_entry(term, &suite->terms, list) {
+ acl = term->acl;
+ if (!(acl->val & where))
+ return acl;
+ }
+ }
+ return NULL;
+}
+
+/* Returns a pointer to the first ACL and its first keyword to conflict with
+ * usage at place <where> which is one of the SMP_VAL_* bits indicating a check
+ * place. Returns true if a conflict is found, with <acl> and <kw> set (if non
+ * null), or false if not conflict is found. The first useless keyword is
+ * returned.
+ */
+int acl_cond_kw_conflicts(const struct acl_cond *cond, unsigned int where, struct acl const **acl, char const **kw)
+{
+ struct acl_term_suite *suite;
+ struct acl_term *term;
+ struct acl_expr *expr;
+
+ list_for_each_entry(suite, &cond->suites, list) {
+ list_for_each_entry(term, &suite->terms, list) {
+ list_for_each_entry(expr, &term->acl->expr, list) {
+ if (!(expr->smp->fetch->val & where)) {
+ if (acl)
+ *acl = term->acl;
+ if (kw)
+ *kw = expr->kw;
+ return 1;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/*
+ * Find targets for userlist and groups in acl. Function returns the number
+ * of errors or OK if everything is fine. It must be called only once sample
+ * fetch arguments have been resolved (after smp_resolve_args()).
+ */
+int acl_find_targets(struct proxy *p)
+{
+
+ struct acl *acl;
+ struct acl_expr *expr;
+ struct pattern_list *pattern;
+ int cfgerr = 0;
+ struct pattern_expr_list *pexp;
+
+ list_for_each_entry(acl, &p->acl, list) {
+ list_for_each_entry(expr, &acl->expr, list) {
+ if (strcmp(expr->kw, "http_auth_group") == 0) {
+ /* Note: the ARGT_USR argument may only have been resolved earlier
+ * by smp_resolve_args().
+ */
+ if (expr->smp->arg_p->unresolved) {
+ ha_alert("Internal bug in proxy %s: %sacl %s %s() makes use of unresolved userlist '%s'. Please report this.\n",
+ p->id, *acl->name ? "" : "anonymous ", acl->name, expr->kw,
+ expr->smp->arg_p->data.str.area);
+ cfgerr++;
+ continue;
+ }
+
+ if (LIST_ISEMPTY(&expr->pat.head)) {
+ ha_alert("proxy %s: acl %s %s(): no groups specified.\n",
+ p->id, acl->name, expr->kw);
+ cfgerr++;
+ continue;
+ }
+
+ /* For each pattern, check if the group exists. */
+ list_for_each_entry(pexp, &expr->pat.head, list) {
+ if (LIST_ISEMPTY(&pexp->expr->patterns)) {
+ ha_alert("proxy %s: acl %s %s(): no groups specified.\n",
+ p->id, acl->name, expr->kw);
+ cfgerr++;
+ continue;
+ }
+
+ list_for_each_entry(pattern, &pexp->expr->patterns, list) {
+ /* this keyword only has one argument */
+ if (!check_group(expr->smp->arg_p->data.usr, pattern->pat.ptr.str)) {
+ ha_alert("proxy %s: acl %s %s(): invalid group '%s'.\n",
+ p->id, acl->name, expr->kw, pattern->pat.ptr.str);
+ cfgerr++;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return cfgerr;
+}
+
+/* initializes ACLs by resolving the sample fetch names they rely upon.
+ * Returns 0 on success, otherwise an error.
+ */
+int init_acl()
+{
+ int err = 0;
+ int index;
+ const char *name;
+ struct acl_kw_list *kwl;
+ struct sample_fetch *smp;
+
+ list_for_each_entry(kwl, &acl_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ name = kwl->kw[index].fetch_kw;
+ if (!name)
+ name = kwl->kw[index].kw;
+
+ smp = find_sample_fetch(name, strlen(name));
+ if (!smp) {
+ ha_alert("Critical internal error: ACL keyword '%s' relies on sample fetch '%s' which was not registered!\n",
+ kwl->kw[index].kw, name);
+ err++;
+ continue;
+ }
+ kwl->kw[index].smp = smp;
+ }
+ }
+ return err;
+}
+
+/* dump known ACL keywords on stdout */
+void acl_dump_kwd(void)
+{
+ struct acl_kw_list *kwl;
+ const struct acl_keyword *kwp, *kw;
+ const char *name;
+ int index;
+
+ for (kw = kwp = NULL;; kwp = kw) {
+ list_for_each_entry(kwl, &acl_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (strordered(kwp ? kwp->kw : NULL,
+ kwl->kw[index].kw,
+ kw != kwp ? kw->kw : NULL))
+ kw = &kwl->kw[index];
+ }
+ }
+
+ if (kw == kwp)
+ break;
+
+ name = kw->fetch_kw;
+ if (!name)
+ name = kw->kw;
+
+ printf("%s = %s -m %s\n", kw->kw, name, pat_match_names[kw->match_type]);
+ }
+}
+
+void free_acl_cond(struct acl_cond *cond)
+{
+ struct acl_term_suite *suite, *suiteb;
+ struct acl_term *term, *termb;
+
+ if (!cond)
+ return;
+
+ list_for_each_entry_safe(suite, suiteb, &cond->suites, list) {
+ list_for_each_entry_safe(term, termb, &suite->terms, list) {
+ LIST_DELETE(&term->list);
+ free(term);
+ }
+ LIST_DELETE(&suite->list);
+ free(suite);
+ }
+
+ free(cond);
+}
+
+/************************************************************************/
+/* All supported sample and ACL keywords must be declared here. */
+/************************************************************************/
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct acl_kw_list acl_kws = {ILH, {
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/action.c b/src/action.c
new file mode 100644
index 0000000..9d3bfe4
--- /dev/null
+++ b/src/action.c
@@ -0,0 +1,348 @@
+/*
+ * Action management functions.
+ *
+ * Copyright 2017 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/acl.h>
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/errors.h>
+#include <haproxy/list.h>
+#include <haproxy/obj_type.h>
+#include <haproxy/pool.h>
+#include <haproxy/proxy.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/task.h>
+#include <haproxy/tools.h>
+
+
+/* Check an action ruleset validity. It returns the number of error encountered
+ * and err_code is updated if a warning is emitted.
+ */
+int check_action_rules(struct list *rules, struct proxy *px, int *err_code)
+{
+ struct act_rule *rule;
+ char *errmsg = NULL;
+ int err = 0;
+
+ list_for_each_entry(rule, rules, list) {
+ if (rule->check_ptr && !rule->check_ptr(rule, px, &errmsg)) {
+ ha_alert("Proxy '%s': %s.\n", px->id, errmsg);
+ err++;
+ }
+ *err_code |= warnif_tcp_http_cond(px, rule->cond);
+ ha_free(&errmsg);
+ }
+
+ return err;
+}
+
+/* Find and check the target table used by an action track-sc*. This
+ * function should be called during the configuration validity check.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+int check_trk_action(struct act_rule *rule, struct proxy *px, char **err)
+{
+ struct stktable *target;
+
+ if (rule->arg.trk_ctr.table.n)
+ target = stktable_find_by_name(rule->arg.trk_ctr.table.n);
+ else
+ target = px->table;
+
+ if (!target) {
+ memprintf(err, "unable to find table '%s' referenced by track-sc%d",
+ rule->arg.trk_ctr.table.n ? rule->arg.trk_ctr.table.n : px->id,
+ rule->action);
+ return 0;
+ }
+
+ if (!stktable_compatible_sample(rule->arg.trk_ctr.expr, target->type)) {
+ memprintf(err, "stick-table '%s' uses a type incompatible with the 'track-sc%d' rule",
+ rule->arg.trk_ctr.table.n ? rule->arg.trk_ctr.table.n : px->id,
+ rule->action);
+ return 0;
+ }
+ else {
+ if (!in_proxies_list(target->proxies_list, px)) {
+ px->next_stkt_ref = target->proxies_list;
+ target->proxies_list = px;
+ }
+ free(rule->arg.trk_ctr.table.n);
+ rule->arg.trk_ctr.table.t = target;
+ /* Note: if we decide to enhance the track-sc syntax, we may be
+ * able to pass a list of counters to track and allocate them
+ * right here using stktable_alloc_data_type().
+ */
+ }
+
+ if (rule->from == ACT_F_TCP_REQ_CNT && (px->cap & PR_CAP_FE)) {
+ if (!px->tcp_req.inspect_delay && !(rule->arg.trk_ctr.expr->fetch->val & SMP_VAL_FE_SES_ACC)) {
+ ha_warning("%s '%s' : a 'tcp-request content track-sc*' rule explicitly depending on request"
+ " contents without any 'tcp-request inspect-delay' setting."
+ " This means that this rule will randomly find its contents. This can be fixed by"
+ " setting the tcp-request inspect-delay.\n",
+ proxy_type_str(px), px->id);
+ }
+
+ /* The following warning is emitted because HTTP multiplexers are able to catch errors
+ * or timeouts at the session level, before instantiating any stream.
+ * Thus the tcp-request content ruleset will not be evaluated in such case. It means,
+ * http_req and http_err counters will not be incremented as expected, even if the tracked
+ * counter does not use the request content. To track invalid requests it should be
+ * performed at the session level using a tcp-request session rule.
+ */
+ if (px->mode == PR_MODE_HTTP &&
+ !(rule->arg.trk_ctr.expr->fetch->use & (SMP_USE_L6REQ|SMP_USE_HRQHV|SMP_USE_HRQHP|SMP_USE_HRQBO)) &&
+ (!rule->cond || !(rule->cond->use & (SMP_USE_L6REQ|SMP_USE_HRQHV|SMP_USE_HRQHP|SMP_USE_HRQBO)))) {
+ ha_warning("%s '%s' : a 'tcp-request content track-sc*' rule not depending on request"
+ " contents for an HTTP frontend should be executed at the session level, using a"
+ " 'tcp-request session' rule (mandatory to track invalid HTTP requests).\n",
+ proxy_type_str(px), px->id);
+ }
+ }
+
+ return 1;
+}
+
+/* check a capture rule. This function should be called during the configuration
+ * validity check.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+int check_capture(struct act_rule *rule, struct proxy *px, char **err)
+{
+ if (rule->from == ACT_F_TCP_REQ_CNT && (px->cap & PR_CAP_FE) && !px->tcp_req.inspect_delay &&
+ !(rule->arg.cap.expr->fetch->val & SMP_VAL_FE_SES_ACC)) {
+ ha_warning("%s '%s' : a 'tcp-request capture' rule explicitly depending on request"
+ " contents without any 'tcp-request inspect-delay' setting."
+ " This means that this rule will randomly find its contents. This can be fixed by"
+ " setting the tcp-request inspect-delay.\n",
+ proxy_type_str(px), px->id);
+ }
+
+ return 1;
+}
+
+int act_resolution_cb(struct resolv_requester *requester, struct dns_counters *counters)
+{
+ struct stream *stream;
+
+ if (requester->resolution == NULL)
+ return 0;
+
+ stream = objt_stream(requester->owner);
+ if (stream == NULL)
+ return 0;
+
+ task_wakeup(stream->task, TASK_WOKEN_MSG);
+
+ return 0;
+}
+
+/*
+ * Do resolve error management callback
+ * returns:
+ * 0 if we can trash answser items.
+ * 1 when safely ignored and we must kept answer items
+ */
+int act_resolution_error_cb(struct resolv_requester *requester, int error_code)
+{
+ struct stream *stream;
+
+ if (requester->resolution == NULL)
+ return 0;
+
+ stream = objt_stream(requester->owner);
+ if (stream == NULL)
+ return 0;
+
+ task_wakeup(stream->task, TASK_WOKEN_MSG);
+
+ return 0;
+}
+
+/* Parse a set-timeout rule statement. It first checks if the timeout name is
+ * valid and returns it in <name>. Then the timeout is parsed as a plain value
+ * and * returned in <out_timeout>. If there is a parsing error, the value is
+ * reparsed as an expression and returned in <expr>.
+ *
+ * Returns -1 if the name is invalid or neither a time or an expression can be
+ * parsed, or if the timeout value is 0.
+ */
+int cfg_parse_rule_set_timeout(const char **args, int idx, int *out_timeout,
+ enum act_timeout_name *name,
+ struct sample_expr **expr, char **err,
+ const char *file, int line, struct arg_list *al)
+{
+ const char *res;
+ const char *timeout_name = args[idx++];
+
+ if (strcmp(timeout_name, "server") == 0) {
+ *name = ACT_TIMEOUT_SERVER;
+ }
+ else if (strcmp(timeout_name, "tunnel") == 0) {
+ *name = ACT_TIMEOUT_TUNNEL;
+ }
+ else {
+ memprintf(err,
+ "'set-timeout' rule supports 'server'/'tunnel' (got '%s')",
+ timeout_name);
+ return -1;
+ }
+
+ res = parse_time_err(args[idx], (unsigned int *)out_timeout, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to rule 'set-timeout %s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[idx], timeout_name);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to rule 'set-timeout %s' (minimum value is 1 ms)",
+ args[idx], timeout_name);
+ return -1;
+ }
+ /* res not NULL, parsing error */
+ else if (res) {
+ *expr = sample_parse_expr((char **)args, &idx, file, line, err, al, NULL);
+ if (!*expr) {
+ memprintf(err, "unexpected character '%c' in rule 'set-timeout %s'", *res, timeout_name);
+ return -1;
+ }
+ }
+ /* res NULL, parsing ok but value is 0 */
+ else if (!(*out_timeout)) {
+ memprintf(err, "null value is not valid for a 'set-timeout %s' rule",
+ timeout_name);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* tries to find in list <keywords> a similar looking action as the one in
+ * <word>, and returns it otherwise NULL. <word> may be NULL or empty. An
+ * optional array of extra words to compare may be passed in <extra>, but it
+ * must then be terminated by a NULL entry. If unused it may be NULL.
+ */
+const char *action_suggest(const char *word, const struct list *keywords, const char **extra)
+{
+ uint8_t word_sig[1024];
+ uint8_t list_sig[1024];
+ const struct action_kw_list *kwl;
+ const struct action_kw *best_kw = NULL;
+ const char *best_ptr = NULL;
+ int dist, best_dist = INT_MAX;
+ int index;
+
+ if (!word || !*word)
+ return NULL;
+
+ make_word_fingerprint(word_sig, word);
+ list_for_each_entry(kwl, keywords, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ make_word_fingerprint(list_sig, kwl->kw[index].kw);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_kw = &kwl->kw[index];
+ best_ptr = best_kw->kw;
+ }
+ }
+ }
+
+ while (extra && *extra) {
+ make_word_fingerprint(list_sig, *extra);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_kw = NULL;
+ best_ptr = *extra;
+ }
+ extra++;
+ }
+
+ /* eliminate too different ones, with more tolerance for prefixes
+ * when they're known to exist (not from extra list).
+ */
+ if (best_ptr &&
+ (best_dist > (2 + (best_kw && (best_kw->flags & KWF_MATCH_PREFIX))) * strlen(word) ||
+ best_dist > (2 + (best_kw && (best_kw->flags & KWF_MATCH_PREFIX))) * strlen(best_ptr)))
+ best_ptr = NULL;
+
+ return best_ptr;
+}
+
+/* allocates a rule for ruleset <from> (ACT_F_*), from file name <file> and
+ * line <linenum>. <file> and <linenum> may be zero if unknown. Returns the
+ * rule, otherwise NULL in case of memory allocation error.
+ */
+struct act_rule *new_act_rule(enum act_from from, const char *file, int linenum)
+{
+ struct act_rule *rule;
+
+ rule = calloc(1, sizeof(*rule));
+ if (!rule)
+ return NULL;
+ rule->from = from;
+ rule->conf.file = file ? strdup(file) : NULL;
+ rule->conf.line = linenum;
+ LIST_INIT(&rule->list);
+ return rule;
+}
+
+/* fees rule <rule> and its elements as well as the condition */
+void free_act_rule(struct act_rule *rule)
+{
+ LIST_DELETE(&rule->list);
+ free_acl_cond(rule->cond);
+ if (rule->release_ptr)
+ rule->release_ptr(rule);
+ free(rule->conf.file);
+ free(rule);
+}
+
+void free_act_rules(struct list *rules)
+{
+ struct act_rule *rule, *ruleb;
+
+ list_for_each_entry_safe(rule, ruleb, rules, list) {
+ free_act_rule(rule);
+ }
+}
+
+/* dumps all known actions registered in action rules <rules> after prefix
+ * <pfx> to stdout. The actions are alphabetically sorted. Those with the
+ * KWF_MATCH_PREFIX flag have their name suffixed with '*'.
+ */
+void dump_act_rules(const struct list *rules, const char *pfx)
+{
+ const struct action_kw *akwp, *akwn;
+ struct action_kw_list *akwl;
+ int index;
+
+ for (akwn = akwp = NULL;; akwp = akwn) {
+ list_for_each_entry(akwl, rules, list) {
+ for (index = 0; akwl->kw[index].kw != NULL; index++)
+ if (strordered(akwp ? akwp->kw : NULL,
+ akwl->kw[index].kw,
+ akwn != akwp ? akwn->kw : NULL))
+ akwn = &akwl->kw[index];
+ }
+ if (akwn == akwp)
+ break;
+ printf("%s%s%s\n", pfx ? pfx : "", akwn->kw,
+ (akwn->flags & KWF_MATCH_PREFIX) ? "*" : "");
+ }
+}
diff --git a/src/activity.c b/src/activity.c
new file mode 100644
index 0000000..65d7032
--- /dev/null
+++ b/src/activity.c
@@ -0,0 +1,996 @@
+/*
+ * activity measurement functions.
+ *
+ * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/activity-t.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/clock.h>
+#include <haproxy/channel.h>
+#include <haproxy/cli.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+#include <haproxy/xxhash.h>
+
+/* CLI context for the "show profiling" command */
+struct show_prof_ctx {
+ int dump_step; /* 0,1,2,4,5,6; see cli_iohandler_show_profiling() */
+ int linenum; /* next line to be dumped (starts at 0) */
+ int maxcnt; /* max line count per step (0=not set) */
+ int by_addr; /* 0=sort by usage, 1=sort by address */
+};
+
+#if defined(DEBUG_MEM_STATS)
+/* these ones are macros in bug.h when DEBUG_MEM_STATS is set, and will
+ * prevent the new ones from being redefined.
+ */
+#undef calloc
+#undef malloc
+#undef realloc
+#endif
+
+/* bit field of profiling options. Beware, may be modified at runtime! */
+unsigned int profiling __read_mostly = HA_PROF_TASKS_AOFF;
+unsigned long task_profiling_mask __read_mostly = 0;
+
+/* One struct per thread containing all collected measurements */
+struct activity activity[MAX_THREADS] __attribute__((aligned(64))) = { };
+
+/* One struct per function pointer hash entry (256 values, 0=collision) */
+struct sched_activity sched_activity[256] __attribute__((aligned(64))) = { };
+
+
+#ifdef USE_MEMORY_PROFILING
+/* determine the number of buckets to store stats */
+#define MEMPROF_HASH_BITS 10
+#define MEMPROF_HASH_BUCKETS (1U << MEMPROF_HASH_BITS)
+
+enum memprof_method {
+ MEMPROF_METH_UNKNOWN = 0,
+ MEMPROF_METH_MALLOC,
+ MEMPROF_METH_CALLOC,
+ MEMPROF_METH_REALLOC,
+ MEMPROF_METH_FREE,
+ MEMPROF_METH_METHODS /* count, must be last */
+};
+
+static const char *const memprof_methods[MEMPROF_METH_METHODS] = {
+ "unknown", "malloc", "calloc", "realloc", "free",
+};
+
+/* stats:
+ * - malloc increases alloc
+ * - free increases free (if non null)
+ * - realloc increases either depending on the size change.
+ * when the real size is known (malloc_usable_size()), it's used in free_tot
+ * and alloc_tot, otherwise the requested size is reported in alloc_tot and
+ * zero in free_tot.
+ */
+struct memprof_stats {
+ const void *caller;
+ enum memprof_method method;
+ /* 4-7 bytes hole here */
+ unsigned long long alloc_calls;
+ unsigned long long free_calls;
+ unsigned long long alloc_tot;
+ unsigned long long free_tot;
+};
+
+/* last one is for hash collisions ("others") and has no caller address */
+struct memprof_stats memprof_stats[MEMPROF_HASH_BUCKETS + 1] = { };
+
+/* used to detect recursive calls */
+static THREAD_LOCAL int in_memprof = 0;
+
+/* perform a pointer hash by scrambling its bits and retrieving the most
+ * mixed ones (topmost ones in 32-bit, middle ones in 64-bit).
+ */
+static unsigned int memprof_hash_ptr(const void *p)
+{
+ unsigned long long x = (unsigned long)p;
+
+ x = 0xcbda9653U * x;
+ if (sizeof(long) == 4)
+ x >>= 32;
+ else
+ x >>= 33 - MEMPROF_HASH_BITS / 2;
+ return x & (MEMPROF_HASH_BUCKETS - 1);
+}
+
+/* These ones are used by glibc and will be called early. They are in charge of
+ * initializing the handlers with the original functions.
+ */
+static void *memprof_malloc_initial_handler(size_t size);
+static void *memprof_calloc_initial_handler(size_t nmemb, size_t size);
+static void *memprof_realloc_initial_handler(void *ptr, size_t size);
+static void memprof_free_initial_handler(void *ptr);
+
+/* Fallback handlers for the main alloc/free functions. They are preset to
+ * the initializer in order to save a test in the functions's critical path.
+ */
+static void *(*memprof_malloc_handler)(size_t size) = memprof_malloc_initial_handler;
+static void *(*memprof_calloc_handler)(size_t nmemb, size_t size) = memprof_calloc_initial_handler;
+static void *(*memprof_realloc_handler)(void *ptr, size_t size) = memprof_realloc_initial_handler;
+static void (*memprof_free_handler)(void *ptr) = memprof_free_initial_handler;
+
+/* Used to force to die if it's not possible to retrieve the allocation
+ * functions. We cannot even use stdio in this case.
+ */
+static __attribute__((noreturn)) void memprof_die(const char *msg)
+{
+ DISGUISE(write(2, msg, strlen(msg)));
+ exit(1);
+}
+
+/* Resolve original allocation functions and initialize all handlers.
+ * This must be called very early at boot, before the very first malloc()
+ * call, and is not thread-safe! It's not even possible to use stdio there.
+ * Worse, we have to account for the risk of reentrance from dlsym() when
+ * it tries to prepare its error messages. Here its ahndled by in_memprof
+ * that makes allocators return NULL. dlsym() handles it gracefully. An
+ * alternate approach consists in calling aligned_alloc() from these places
+ * but that would mean not being able to intercept it later if considered
+ * useful to do so.
+ */
+static void memprof_init()
+{
+ in_memprof++;
+ memprof_malloc_handler = get_sym_next_addr("malloc");
+ if (!memprof_malloc_handler)
+ memprof_die("FATAL: malloc() function not found.\n");
+
+ memprof_calloc_handler = get_sym_next_addr("calloc");
+ if (!memprof_calloc_handler)
+ memprof_die("FATAL: calloc() function not found.\n");
+
+ memprof_realloc_handler = get_sym_next_addr("realloc");
+ if (!memprof_realloc_handler)
+ memprof_die("FATAL: realloc() function not found.\n");
+
+ memprof_free_handler = get_sym_next_addr("free");
+ if (!memprof_free_handler)
+ memprof_die("FATAL: free() function not found.\n");
+ in_memprof--;
+}
+
+/* the initial handlers will initialize all regular handlers and will call the
+ * one they correspond to. A single one of these functions will typically be
+ * called, though it's unknown which one (as any might be called before main).
+ */
+static void *memprof_malloc_initial_handler(size_t size)
+{
+ if (in_memprof) {
+ /* it's likely that dlsym() needs malloc(), let's fail */
+ return NULL;
+ }
+
+ memprof_init();
+ return memprof_malloc_handler(size);
+}
+
+static void *memprof_calloc_initial_handler(size_t nmemb, size_t size)
+{
+ if (in_memprof) {
+ /* it's likely that dlsym() needs calloc(), let's fail */
+ return NULL;
+ }
+ memprof_init();
+ return memprof_calloc_handler(nmemb, size);
+}
+
+static void *memprof_realloc_initial_handler(void *ptr, size_t size)
+{
+ if (in_memprof) {
+ /* it's likely that dlsym() needs realloc(), let's fail */
+ return NULL;
+ }
+
+ memprof_init();
+ return memprof_realloc_handler(ptr, size);
+}
+
+static void memprof_free_initial_handler(void *ptr)
+{
+ memprof_init();
+ memprof_free_handler(ptr);
+}
+
+/* Assign a bin for the memprof_stats to the return address. May perform a few
+ * attempts before finding the right one, but always succeeds (in the worst
+ * case, returns a default bin). The caller address is atomically set except
+ * for the default one which is never set.
+ */
+static struct memprof_stats *memprof_get_bin(const void *ra, enum memprof_method meth)
+{
+ int retries = 16; // up to 16 consecutive entries may be tested.
+ const void *old;
+ unsigned int bin;
+
+ bin = memprof_hash_ptr(ra);
+ for (; memprof_stats[bin].caller != ra; bin = (bin + 1) & (MEMPROF_HASH_BUCKETS - 1)) {
+ if (!--retries) {
+ bin = MEMPROF_HASH_BUCKETS;
+ break;
+ }
+
+ old = NULL;
+ if (!memprof_stats[bin].caller &&
+ HA_ATOMIC_CAS(&memprof_stats[bin].caller, &old, ra)) {
+ memprof_stats[bin].method = meth;
+ break;
+ }
+ }
+ return &memprof_stats[bin];
+}
+
+/* This is the new global malloc() function. It must optimize for the normal
+ * case (i.e. profiling disabled) hence the first test to permit a direct jump.
+ * It must remain simple to guarantee the lack of reentrance. stdio is not
+ * possible there even for debugging. The reported size is the really allocated
+ * one as returned by malloc_usable_size(), because this will allow it to be
+ * compared to the one before realloc() or free(). This is a GNU and jemalloc
+ * extension but other systems may also store this size in ptr[-1].
+ */
+void *malloc(size_t size)
+{
+ struct memprof_stats *bin;
+ void *ret;
+
+ if (likely(!(profiling & HA_PROF_MEMORY)))
+ return memprof_malloc_handler(size);
+
+ ret = memprof_malloc_handler(size);
+ size = malloc_usable_size(ret) + sizeof(void *);
+
+ bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_MALLOC);
+ _HA_ATOMIC_ADD(&bin->alloc_calls, 1);
+ _HA_ATOMIC_ADD(&bin->alloc_tot, size);
+ return ret;
+}
+
+/* This is the new global calloc() function. It must optimize for the normal
+ * case (i.e. profiling disabled) hence the first test to permit a direct jump.
+ * It must remain simple to guarantee the lack of reentrance. stdio is not
+ * possible there even for debugging. The reported size is the really allocated
+ * one as returned by malloc_usable_size(), because this will allow it to be
+ * compared to the one before realloc() or free(). This is a GNU and jemalloc
+ * extension but other systems may also store this size in ptr[-1].
+ */
+void *calloc(size_t nmemb, size_t size)
+{
+ struct memprof_stats *bin;
+ void *ret;
+
+ if (likely(!(profiling & HA_PROF_MEMORY)))
+ return memprof_calloc_handler(nmemb, size);
+
+ ret = memprof_calloc_handler(nmemb, size);
+ size = malloc_usable_size(ret) + sizeof(void *);
+
+ bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_CALLOC);
+ _HA_ATOMIC_ADD(&bin->alloc_calls, 1);
+ _HA_ATOMIC_ADD(&bin->alloc_tot, size);
+ return ret;
+}
+
+/* This is the new global realloc() function. It must optimize for the normal
+ * case (i.e. profiling disabled) hence the first test to permit a direct jump.
+ * It must remain simple to guarantee the lack of reentrance. stdio is not
+ * possible there even for debugging. The reported size is the really allocated
+ * one as returned by malloc_usable_size(), because this will allow it to be
+ * compared to the one before realloc() or free(). This is a GNU and jemalloc
+ * extension but other systems may also store this size in ptr[-1].
+ * Depending on the old vs new size, it's considered as an allocation or a free
+ * (or neither if the size remains the same).
+ */
+void *realloc(void *ptr, size_t size)
+{
+ struct memprof_stats *bin;
+ size_t size_before;
+ void *ret;
+
+ if (likely(!(profiling & HA_PROF_MEMORY)))
+ return memprof_realloc_handler(ptr, size);
+
+ size_before = malloc_usable_size(ptr);
+ ret = memprof_realloc_handler(ptr, size);
+ size = malloc_usable_size(ret);
+
+ /* only count the extra link for new allocations */
+ if (!ptr)
+ size += sizeof(void *);
+
+ bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_REALLOC);
+ if (size > size_before) {
+ _HA_ATOMIC_ADD(&bin->alloc_calls, 1);
+ _HA_ATOMIC_ADD(&bin->alloc_tot, size - size_before);
+ } else if (size < size_before) {
+ _HA_ATOMIC_ADD(&bin->free_calls, 1);
+ _HA_ATOMIC_ADD(&bin->free_tot, size_before - size);
+ }
+ return ret;
+}
+
+/* This is the new global free() function. It must optimize for the normal
+ * case (i.e. profiling disabled) hence the first test to permit a direct jump.
+ * It must remain simple to guarantee the lack of reentrance. stdio is not
+ * possible there even for debugging. The reported size is the really allocated
+ * one as returned by malloc_usable_size(), because this will allow it to be
+ * compared to the one before realloc() or free(). This is a GNU and jemalloc
+ * extension but other systems may also store this size in ptr[-1]. Since
+ * free() is often called on NULL pointers to collect garbage at the end of
+ * many functions or during config parsing, as a special case free(NULL)
+ * doesn't update any stats.
+ */
+void free(void *ptr)
+{
+ struct memprof_stats *bin;
+ size_t size_before;
+
+ if (likely(!(profiling & HA_PROF_MEMORY) || !ptr)) {
+ memprof_free_handler(ptr);
+ return;
+ }
+
+ size_before = malloc_usable_size(ptr) + sizeof(void *);
+ memprof_free_handler(ptr);
+
+ bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_FREE);
+ _HA_ATOMIC_ADD(&bin->free_calls, 1);
+ _HA_ATOMIC_ADD(&bin->free_tot, size_before);
+}
+
+#endif // USE_MEMORY_PROFILING
+
+/* Updates the current thread's statistics about stolen CPU time. The unit for
+ * <stolen> is half-milliseconds.
+ */
+void report_stolen_time(uint64_t stolen)
+{
+ activity[tid].cpust_total += stolen;
+ update_freq_ctr(&activity[tid].cpust_1s, stolen);
+ update_freq_ctr_period(&activity[tid].cpust_15s, 15000, stolen);
+}
+
+/* Update avg_loop value for the current thread and possibly decide to enable
+ * task-level profiling on the current thread based on its average run time.
+ * The <run_time> argument is the number of microseconds elapsed since the
+ * last time poll() returned.
+ */
+void activity_count_runtime(uint32_t run_time)
+{
+ uint32_t up, down;
+
+ /* 1 millisecond per loop on average over last 1024 iterations is
+ * enough to turn on profiling.
+ */
+ up = 1000;
+ down = up * 99 / 100;
+
+ run_time = swrate_add(&activity[tid].avg_loop_us, TIME_STATS_SAMPLES, run_time);
+
+ /* In automatic mode, reaching the "up" threshold on average switches
+ * profiling to "on" when automatic, and going back below the "down"
+ * threshold switches to off. The forced modes don't check the load.
+ */
+ if (!(task_profiling_mask & tid_bit)) {
+ if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_ON ||
+ ((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AON &&
+ swrate_avg(run_time, TIME_STATS_SAMPLES) >= up)))
+ _HA_ATOMIC_OR(&task_profiling_mask, tid_bit);
+ } else {
+ if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_OFF ||
+ ((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AOFF &&
+ swrate_avg(run_time, TIME_STATS_SAMPLES) <= down)))
+ _HA_ATOMIC_AND(&task_profiling_mask, ~tid_bit);
+ }
+}
+
+#ifdef USE_MEMORY_PROFILING
+/* config parser for global "profiling.memory", accepts "on" or "off" */
+static int cfg_parse_prof_memory(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ profiling |= HA_PROF_MEMORY;
+ else if (strcmp(args[1], "off") == 0)
+ profiling &= ~HA_PROF_MEMORY;
+ else {
+ memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+#endif // USE_MEMORY_PROFILING
+
+/* config parser for global "profiling.tasks", accepts "on" or "off" */
+static int cfg_parse_prof_tasks(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON;
+ else if (strcmp(args[1], "auto") == 0)
+ profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF;
+ else if (strcmp(args[1], "off") == 0)
+ profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF;
+ else {
+ memprintf(err, "'%s' expects either 'on', 'auto', or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/* parse a "set profiling" command. It always returns 1. */
+static int cli_parse_set_profiling(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (strcmp(args[2], "memory") == 0) {
+#ifdef USE_MEMORY_PROFILING
+ if (strcmp(args[3], "on") == 0) {
+ unsigned int old = profiling;
+ int i;
+
+ while (!_HA_ATOMIC_CAS(&profiling, &old, old | HA_PROF_MEMORY))
+ ;
+
+ /* also flush current profiling stats */
+ for (i = 0; i < sizeof(memprof_stats) / sizeof(memprof_stats[0]); i++) {
+ HA_ATOMIC_STORE(&memprof_stats[i].alloc_calls, 0);
+ HA_ATOMIC_STORE(&memprof_stats[i].free_calls, 0);
+ HA_ATOMIC_STORE(&memprof_stats[i].alloc_tot, 0);
+ HA_ATOMIC_STORE(&memprof_stats[i].free_tot, 0);
+ HA_ATOMIC_STORE(&memprof_stats[i].caller, NULL);
+ }
+ }
+ else if (strcmp(args[3], "off") == 0) {
+ unsigned int old = profiling;
+
+ while (!_HA_ATOMIC_CAS(&profiling, &old, old & ~HA_PROF_MEMORY))
+ ;
+ }
+ else
+ return cli_err(appctx, "Expects either 'on' or 'off'.\n");
+ return 1;
+#else
+ return cli_err(appctx, "Memory profiling not compiled in.\n");
+#endif
+ }
+
+ if (strcmp(args[2], "tasks") != 0)
+ return cli_err(appctx, "Expects either 'tasks' or 'memory'.\n");
+
+ if (strcmp(args[3], "on") == 0) {
+ unsigned int old = profiling;
+ int i;
+
+ while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON))
+ ;
+ /* also flush current profiling stats */
+ for (i = 0; i < 256; i++) {
+ HA_ATOMIC_STORE(&sched_activity[i].calls, 0);
+ HA_ATOMIC_STORE(&sched_activity[i].cpu_time, 0);
+ HA_ATOMIC_STORE(&sched_activity[i].lat_time, 0);
+ HA_ATOMIC_STORE(&sched_activity[i].func, NULL);
+ }
+ }
+ else if (strcmp(args[3], "auto") == 0) {
+ unsigned int old = profiling;
+ unsigned int new;
+
+ do {
+ if ((old & HA_PROF_TASKS_MASK) >= HA_PROF_TASKS_AON)
+ new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AON;
+ else
+ new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF;
+ } while (!_HA_ATOMIC_CAS(&profiling, &old, new));
+ }
+ else if (strcmp(args[3], "off") == 0) {
+ unsigned int old = profiling;
+ while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF))
+ ;
+ }
+ else
+ return cli_err(appctx, "Expects 'on', 'auto', or 'off'.\n");
+
+ return 1;
+}
+
+static int cmp_sched_activity_calls(const void *a, const void *b)
+{
+ const struct sched_activity *l = (const struct sched_activity *)a;
+ const struct sched_activity *r = (const struct sched_activity *)b;
+
+ if (l->calls > r->calls)
+ return -1;
+ else if (l->calls < r->calls)
+ return 1;
+ else
+ return 0;
+}
+
+static int cmp_sched_activity_addr(const void *a, const void *b)
+{
+ const struct sched_activity *l = (const struct sched_activity *)a;
+ const struct sched_activity *r = (const struct sched_activity *)b;
+
+ if (l->func > r->func)
+ return -1;
+ else if (l->func < r->func)
+ return 1;
+ else
+ return 0;
+}
+
+#ifdef USE_MEMORY_PROFILING
+/* used by qsort below */
+static int cmp_memprof_stats(const void *a, const void *b)
+{
+ const struct memprof_stats *l = (const struct memprof_stats *)a;
+ const struct memprof_stats *r = (const struct memprof_stats *)b;
+
+ if (l->alloc_tot + l->free_tot > r->alloc_tot + r->free_tot)
+ return -1;
+ else if (l->alloc_tot + l->free_tot < r->alloc_tot + r->free_tot)
+ return 1;
+ else
+ return 0;
+}
+
+static int cmp_memprof_addr(const void *a, const void *b)
+{
+ const struct memprof_stats *l = (const struct memprof_stats *)a;
+ const struct memprof_stats *r = (const struct memprof_stats *)b;
+
+ if (l->caller > r->caller)
+ return -1;
+ else if (l->caller < r->caller)
+ return 1;
+ else
+ return 0;
+}
+#endif // USE_MEMORY_PROFILING
+
+/* Computes the index of function pointer <func> for use with sched_activity[]
+ * or any other similar array passed in <array>, and returns a pointer to the
+ * entry after having atomically assigned it to this function pointer. Note
+ * that in case of collision, the first entry is returned instead ("other").
+ */
+struct sched_activity *sched_activity_entry(struct sched_activity *array, const void *func)
+{
+ uint64_t hash = XXH64_avalanche(XXH64_mergeRound((size_t)func, (size_t)func));
+ struct sched_activity *ret;
+ const void *old = NULL;
+
+ hash ^= (hash >> 32);
+ hash ^= (hash >> 16);
+ hash ^= (hash >> 8);
+ hash &= 0xff;
+ ret = &array[hash];
+
+ if (likely(ret->func == func))
+ return ret;
+
+ if (HA_ATOMIC_CAS(&ret->func, &old, func))
+ return ret;
+
+ return array;
+}
+
+/* This function dumps all profiling settings. It returns 0 if the output
+ * buffer is full and it needs to be called again, otherwise non-zero.
+ * It dumps some parts depending on the following states from show_prof_ctx:
+ * dump_step:
+ * 0, 4: dump status, then jump to 1 if 0
+ * 1, 5: dump tasks, then jump to 2 if 1
+ * 2, 6: dump memory, then stop
+ * linenum:
+ * restart line for each step (starts at zero)
+ * maxcnt:
+ * may contain a configured max line count for each step (0=not set)
+ * byaddr:
+ * 0: sort by usage
+ * 1: sort by address
+ */
+static int cli_io_handler_show_profiling(struct appctx *appctx)
+{
+ struct show_prof_ctx *ctx = appctx->svcctx;
+ struct sched_activity tmp_activity[256] __attribute__((aligned(64)));
+#ifdef USE_MEMORY_PROFILING
+ struct memprof_stats tmp_memstats[MEMPROF_HASH_BUCKETS + 1];
+ unsigned long long tot_alloc_calls, tot_free_calls;
+ unsigned long long tot_alloc_bytes, tot_free_bytes;
+#endif
+ struct stconn *sc = appctx_sc(appctx);
+ struct buffer *name_buffer = get_trash_chunk();
+ const char *str;
+ int max_lines;
+ int i, max;
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ return 1;
+
+ chunk_reset(&trash);
+
+ switch (profiling & HA_PROF_TASKS_MASK) {
+ case HA_PROF_TASKS_AOFF: str="auto-off"; break;
+ case HA_PROF_TASKS_AON: str="auto-on"; break;
+ case HA_PROF_TASKS_ON: str="on"; break;
+ default: str="off"; break;
+ }
+
+ if ((ctx->dump_step & 3) != 0)
+ goto skip_status;
+
+ chunk_printf(&trash,
+ "Per-task CPU profiling : %-8s # set profiling tasks {on|auto|off}\n"
+ "Memory usage profiling : %-8s # set profiling memory {on|off}\n",
+ str, (profiling & HA_PROF_MEMORY) ? "on" : "off");
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* failed, try again */
+ return 0;
+ }
+
+ ctx->linenum = 0; // reset first line to dump
+ if ((ctx->dump_step & 4) == 0)
+ ctx->dump_step++; // next step
+
+ skip_status:
+ if ((ctx->dump_step & 3) != 1)
+ goto skip_tasks;
+
+ memcpy(tmp_activity, sched_activity, sizeof(tmp_activity));
+ if (ctx->by_addr)
+ qsort(tmp_activity, 256, sizeof(tmp_activity[0]), cmp_sched_activity_addr);
+ else
+ qsort(tmp_activity, 256, sizeof(tmp_activity[0]), cmp_sched_activity_calls);
+
+ if (!ctx->linenum)
+ chunk_appendf(&trash, "Tasks activity:\n"
+ " function calls cpu_tot cpu_avg lat_tot lat_avg\n");
+
+ max_lines = ctx->maxcnt;
+ if (!max_lines)
+ max_lines = 256;
+
+ for (i = ctx->linenum; i < max_lines && tmp_activity[i].calls; i++) {
+ ctx->linenum = i;
+ chunk_reset(name_buffer);
+
+ if (!tmp_activity[i].func)
+ chunk_printf(name_buffer, "other");
+ else
+ resolve_sym_name(name_buffer, "", tmp_activity[i].func);
+
+ /* reserve 35 chars for name+' '+#calls, knowing that longer names
+ * are often used for less often called functions.
+ */
+ max = 35 - name_buffer->data;
+ if (max < 1)
+ max = 1;
+ chunk_appendf(&trash, " %s%*llu", name_buffer->area, max, (unsigned long long)tmp_activity[i].calls);
+
+ print_time_short(&trash, " ", tmp_activity[i].cpu_time, "");
+ print_time_short(&trash, " ", tmp_activity[i].cpu_time / tmp_activity[i].calls, "");
+ print_time_short(&trash, " ", tmp_activity[i].lat_time, "");
+ print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n");
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* failed, try again */
+ return 0;
+ }
+ }
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* failed, try again */
+ return 0;
+ }
+
+ ctx->linenum = 0; // reset first line to dump
+ if ((ctx->dump_step & 4) == 0)
+ ctx->dump_step++; // next step
+
+ skip_tasks:
+
+#ifdef USE_MEMORY_PROFILING
+ if ((ctx->dump_step & 3) != 2)
+ goto skip_mem;
+
+ memcpy(tmp_memstats, memprof_stats, sizeof(tmp_memstats));
+ if (ctx->by_addr)
+ qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_addr);
+ else
+ qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_stats);
+
+ if (!ctx->linenum)
+ chunk_appendf(&trash,
+ "Alloc/Free statistics by call place:\n"
+ " Calls | Tot Bytes | Caller and method\n"
+ "<- alloc -> <- free ->|<-- alloc ---> <-- free ---->|\n");
+
+ max_lines = ctx->maxcnt;
+ if (!max_lines)
+ max_lines = MEMPROF_HASH_BUCKETS + 1;
+
+ for (i = ctx->linenum; i < max_lines; i++) {
+ struct memprof_stats *entry = &tmp_memstats[i];
+
+ ctx->linenum = i;
+ if (!entry->alloc_calls && !entry->free_calls)
+ continue;
+ chunk_appendf(&trash, "%11llu %11llu %14llu %14llu| %16p ",
+ entry->alloc_calls, entry->free_calls,
+ entry->alloc_tot, entry->free_tot,
+ entry->caller);
+
+ if (entry->caller)
+ resolve_sym_name(&trash, NULL, entry->caller);
+ else
+ chunk_appendf(&trash, "[other]");
+
+ chunk_appendf(&trash," %s(%lld)", memprof_methods[entry->method],
+ (long long)(entry->alloc_tot - entry->free_tot) / (long long)(entry->alloc_calls + entry->free_calls));
+
+ if (entry->alloc_tot && entry->free_tot) {
+ /* that's a realloc, show the total diff to help spot leaks */
+ chunk_appendf(&trash," [delta=%lld]", (long long)(entry->alloc_tot - entry->free_tot));
+ }
+
+ chunk_appendf(&trash, "\n");
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+ }
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ tot_alloc_calls = tot_free_calls = tot_alloc_bytes = tot_free_bytes = 0;
+ for (i = 0; i < max_lines; i++) {
+ tot_alloc_calls += tmp_memstats[i].alloc_calls;
+ tot_free_calls += tmp_memstats[i].free_calls;
+ tot_alloc_bytes += tmp_memstats[i].alloc_tot;
+ tot_free_bytes += tmp_memstats[i].free_tot;
+ }
+
+ chunk_appendf(&trash,
+ "-----------------------|-----------------------------|\n"
+ "%11llu %11llu %14llu %14llu| <- Total; Delta_calls=%lld; Delta_bytes=%lld\n",
+ tot_alloc_calls, tot_free_calls,
+ tot_alloc_bytes, tot_free_bytes,
+ tot_alloc_calls - tot_free_calls,
+ tot_alloc_bytes - tot_free_bytes);
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ ctx->linenum = 0; // reset first line to dump
+ if ((ctx->dump_step & 4) == 0)
+ ctx->dump_step++; // next step
+
+ skip_mem:
+#endif // USE_MEMORY_PROFILING
+
+ return 1;
+}
+
+/* parse a "show profiling" command. It returns 1 on failure, 0 if it starts to dump.
+ * - cli.i0 is set to the first state (0=all, 4=status, 5=tasks, 6=memory)
+ * - cli.o1 is set to 1 if the output must be sorted by addr instead of usage
+ * - cli.o0 is set to the number of lines of output
+ */
+static int cli_parse_show_profiling(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_prof_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ int arg;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ for (arg = 2; *args[arg]; arg++) {
+ if (strcmp(args[arg], "all") == 0) {
+ ctx->dump_step = 0; // will cycle through 0,1,2; default
+ }
+ else if (strcmp(args[arg], "status") == 0) {
+ ctx->dump_step = 4; // will visit status only
+ }
+ else if (strcmp(args[arg], "tasks") == 0) {
+ ctx->dump_step = 5; // will visit tasks only
+ }
+ else if (strcmp(args[arg], "memory") == 0) {
+ ctx->dump_step = 6; // will visit memory only
+ }
+ else if (strcmp(args[arg], "byaddr") == 0) {
+ ctx->by_addr = 1; // sort output by address instead of usage
+ }
+ else if (isdigit((unsigned char)*args[arg])) {
+ ctx->maxcnt = atoi(args[arg]); // number of entries to dump
+ }
+ else
+ return cli_err(appctx, "Expects either 'all', 'status', 'tasks', 'memory', 'byaddr' or a max number of output lines.\n");
+ }
+ return 0;
+}
+
+/* This function scans all threads' run queues and collects statistics about
+ * running tasks. It returns 0 if the output buffer is full and it needs to be
+ * called again, otherwise non-zero.
+ */
+static int cli_io_handler_show_tasks(struct appctx *appctx)
+{
+ struct sched_activity tmp_activity[256] __attribute__((aligned(64)));
+ struct stconn *sc = appctx_sc(appctx);
+ struct buffer *name_buffer = get_trash_chunk();
+ struct sched_activity *entry;
+ const struct tasklet *tl;
+ const struct task *t;
+ uint64_t now_ns, lat;
+ struct eb32sc_node *rqnode;
+ uint64_t tot_calls;
+ int thr, queue;
+ int i, max;
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ return 1;
+
+ /* It's not possible to scan queues in small chunks and yield in the
+ * middle of the dump and come back again. So what we're doing instead
+ * is to freeze all threads and inspect their queues at once as fast as
+ * possible, using a sched_activity array to collect metrics with
+ * limited collision, then we'll report statistics only. The tasks'
+ * #calls will reflect the number of occurrences, and the lat_time will
+ * reflect the latency when set. We prefer to take the time before
+ * calling thread_isolate() so that the wait time doesn't impact the
+ * measurement accuracy. However this requires to take care of negative
+ * times since tasks might be queued after we retrieve it.
+ */
+
+ now_ns = now_mono_time();
+ memset(tmp_activity, 0, sizeof(tmp_activity));
+
+ thread_isolate();
+
+ /* 1. global run queue */
+
+#ifdef USE_THREAD
+ rqnode = eb32sc_first(&rqueue, ~0UL);
+ while (rqnode) {
+ t = eb32sc_entry(rqnode, struct task, rq);
+ entry = sched_activity_entry(tmp_activity, t->process);
+ if (t->wake_date) {
+ lat = now_ns - t->wake_date;
+ if ((int64_t)lat > 0)
+ entry->lat_time += lat;
+ }
+ entry->calls++;
+ rqnode = eb32sc_next(rqnode, ~0UL);
+ }
+#endif
+ /* 2. all threads's local run queues */
+ for (thr = 0; thr < global.nbthread; thr++) {
+ /* task run queue */
+ rqnode = eb32sc_first(&ha_thread_ctx[thr].rqueue, ~0UL);
+ while (rqnode) {
+ t = eb32sc_entry(rqnode, struct task, rq);
+ entry = sched_activity_entry(tmp_activity, t->process);
+ if (t->wake_date) {
+ lat = now_ns - t->wake_date;
+ if ((int64_t)lat > 0)
+ entry->lat_time += lat;
+ }
+ entry->calls++;
+ rqnode = eb32sc_next(rqnode, ~0UL);
+ }
+
+ /* shared tasklet list */
+ list_for_each_entry(tl, mt_list_to_list(&ha_thread_ctx[thr].shared_tasklet_list), list) {
+ t = (const struct task *)tl;
+ entry = sched_activity_entry(tmp_activity, t->process);
+ if (!TASK_IS_TASKLET(t) && t->wake_date) {
+ lat = now_ns - t->wake_date;
+ if ((int64_t)lat > 0)
+ entry->lat_time += lat;
+ }
+ entry->calls++;
+ }
+
+ /* classful tasklets */
+ for (queue = 0; queue < TL_CLASSES; queue++) {
+ list_for_each_entry(tl, &ha_thread_ctx[thr].tasklets[queue], list) {
+ t = (const struct task *)tl;
+ entry = sched_activity_entry(tmp_activity, t->process);
+ if (!TASK_IS_TASKLET(t) && t->wake_date) {
+ lat = now_ns - t->wake_date;
+ if ((int64_t)lat > 0)
+ entry->lat_time += lat;
+ }
+ entry->calls++;
+ }
+ }
+ }
+
+ /* hopefully we're done */
+ thread_release();
+
+ chunk_reset(&trash);
+
+ tot_calls = 0;
+ for (i = 0; i < 256; i++)
+ tot_calls += tmp_activity[i].calls;
+
+ qsort(tmp_activity, 256, sizeof(tmp_activity[0]), cmp_sched_activity_calls);
+
+ chunk_appendf(&trash, "Running tasks: %d (%d threads)\n"
+ " function places %% lat_tot lat_avg\n",
+ (int)tot_calls, global.nbthread);
+
+ for (i = 0; i < 256 && tmp_activity[i].calls; i++) {
+ chunk_reset(name_buffer);
+
+ if (!tmp_activity[i].func)
+ chunk_printf(name_buffer, "other");
+ else
+ resolve_sym_name(name_buffer, "", tmp_activity[i].func);
+
+ /* reserve 35 chars for name+' '+#calls, knowing that longer names
+ * are often used for less often called functions.
+ */
+ max = 35 - name_buffer->data;
+ if (max < 1)
+ max = 1;
+ chunk_appendf(&trash, " %s%*llu %3d.%1d",
+ name_buffer->area, max, (unsigned long long)tmp_activity[i].calls,
+ (int)(100ULL * tmp_activity[i].calls / tot_calls),
+ (int)((1000ULL * tmp_activity[i].calls / tot_calls)%10));
+ print_time_short(&trash, " ", tmp_activity[i].lat_time, "");
+ print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n");
+ }
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* failed, try again */
+ return 0;
+ }
+ return 1;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+#ifdef USE_MEMORY_PROFILING
+ { CFG_GLOBAL, "profiling.memory", cfg_parse_prof_memory },
+#endif
+ { CFG_GLOBAL, "profiling.tasks", cfg_parse_prof_tasks },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "set", "profiling", NULL }, "set profiling <what> {auto|on|off} : enable/disable resource profiling (tasks,memory)", cli_parse_set_profiling, NULL },
+ { { "show", "profiling", NULL }, "show profiling [<what>|<#lines>|byaddr]*: show profiling state (all,status,tasks,memory)", cli_parse_show_profiling, cli_io_handler_show_profiling, NULL },
+ { { "show", "tasks", NULL }, "show tasks : show running tasks", NULL, cli_io_handler_show_tasks, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
diff --git a/src/applet.c b/src/applet.c
new file mode 100644
index 0000000..c777bc2
--- /dev/null
+++ b/src/applet.c
@@ -0,0 +1,269 @@
+/*
+ * Functions managing applets
+ *
+ * Copyright 2000-2015 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/channel.h>
+#include <haproxy/list.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+
+unsigned int nb_applets = 0;
+
+DECLARE_POOL(pool_head_appctx, "appctx", sizeof(struct appctx));
+
+/* Tries to allocate a new appctx and initialize all of its fields. The appctx
+ * is returned on success, NULL on failure. The appctx must be released using
+ * appctx_free(). <applet> is assigned as the applet, but it can be NULL. The
+ * applet's task is always created on the current thread.
+ */
+struct appctx *appctx_new(struct applet *applet, struct sedesc *sedesc, unsigned long thread_mask)
+{
+ struct appctx *appctx;
+
+ /* Backend appctx cannot be started on another thread than the local one */
+ BUG_ON(thread_mask != tid_bit && sedesc);
+
+ appctx = pool_zalloc(pool_head_appctx);
+ if (unlikely(!appctx))
+ goto fail_appctx;
+
+ LIST_INIT(&appctx->wait_entry);
+ appctx->obj_type = OBJ_TYPE_APPCTX;
+ appctx->applet = applet;
+ appctx->sess = NULL;
+
+ appctx->t = task_new(thread_mask);
+ if (unlikely(!appctx->t))
+ goto fail_task;
+
+ if (!sedesc) {
+ sedesc = sedesc_new();
+ if (unlikely(!sedesc))
+ goto fail_endp;
+ sedesc->se = appctx;
+ se_fl_set(sedesc, SE_FL_T_APPLET | SE_FL_ORPHAN);
+ }
+
+ appctx->sedesc = sedesc;
+ appctx->t->process = task_run_applet;
+ appctx->t->context = appctx;
+
+ LIST_INIT(&appctx->buffer_wait.list);
+ appctx->buffer_wait.target = appctx;
+ appctx->buffer_wait.wakeup_cb = appctx_buf_available;
+
+ _HA_ATOMIC_INC(&nb_applets);
+ return appctx;
+
+ fail_endp:
+ task_destroy(appctx->t);
+ fail_task:
+ pool_free(pool_head_appctx, appctx);
+ fail_appctx:
+ return NULL;
+}
+
+/* Finalize the frontend appctx startup. It must not be called for a backend
+ * appctx. This function is responsible to create the appctx's session and the
+ * frontend stream connector. By transitivity, the stream is also created.
+ *
+ * It returns 0 on success and -1 on error. In this case, it is the caller
+ * responsibility to release the appctx. However, the session is released if it
+ * was created. On success, if an error is encountered in the caller function,
+ * the stream must be released instead of the appctx. To be sure,
+ * appctx_free_on_early_error() must be called in this case.
+ */
+int appctx_finalize_startup(struct appctx *appctx, struct proxy *px, struct buffer *input)
+{
+ struct session *sess;
+
+ /* async startup is only possible for frontend appctx. Thus for orphan
+ * appctx. Because no backend appctx can be orphan.
+ */
+ BUG_ON(!se_fl_test(appctx->sedesc, SE_FL_ORPHAN));
+
+ sess = session_new(px, NULL, &appctx->obj_type);
+ if (!sess)
+ return -1;
+ if (!sc_new_from_endp(appctx->sedesc, sess, input)) {
+ session_free(sess);
+ return -1;
+ }
+ appctx->sess = sess;
+ return 0;
+}
+
+/* Release function to call when an error occurred during init stage of a
+ * frontend appctx. For a backend appctx, it just calls appctx_free()
+ */
+void appctx_free_on_early_error(struct appctx *appctx)
+{
+ /* If a frontend appctx is attached to a stream connector, release the stream
+ * instead of the appctx.
+ */
+ if (!se_fl_test(appctx->sedesc, SE_FL_ORPHAN) && !(appctx_sc(appctx)->flags & SC_FL_ISBACK)) {
+ stream_free(appctx_strm(appctx));
+ return;
+ }
+ appctx_free(appctx);
+}
+
+/* reserves a command context of at least <size> bytes in the <appctx>, for
+ * use by a CLI command or any regular applet. The pointer to this context is
+ * stored in ctx.svcctx and is returned. The caller doesn't need to release
+ * it as it's allocated from reserved space. If the size is larger than
+ * APPLET_MAX_SVCCTX a crash will occur (hence that will never happen outside
+ * of development).
+ *
+ * Note that the command does *not* initialize the area, so that it can easily
+ * be used upon each entry in a function. It's left to the initialization code
+ * to do it if needed. The CLI will always zero the whole area before calling
+ * a keyword's ->parse() function.
+ */
+void *applet_reserve_svcctx(struct appctx *appctx, size_t size)
+{
+ BUG_ON(size > APPLET_MAX_SVCCTX);
+ appctx->svcctx = &appctx->svc.storage;
+ return appctx->svcctx;
+}
+
+/* This is used to reset an svcctx and the svc.storage without releasing the
+ * appctx. In fact this is only used by the CLI applet between commands.
+ */
+void applet_reset_svcctx(struct appctx *appctx)
+{
+ memset(&appctx->svc.storage, 0, APPLET_MAX_SVCCTX);
+ appctx->svcctx = NULL;
+}
+
+/* call the applet's release() function if any, and marks the sedesc as shut.
+ * Needs to be called upon close().
+ */
+void appctx_shut(struct appctx *appctx)
+{
+ if (se_fl_test(appctx->sedesc, SE_FL_SHR | SE_FL_SHW))
+ return;
+
+ if (appctx->applet->release)
+ appctx->applet->release(appctx);
+
+ se_fl_set(appctx->sedesc, SE_FL_SHRR | SE_FL_SHWN);
+}
+
+/* Callback used to wake up an applet when a buffer is available. The applet
+ * <appctx> is woken up if an input buffer was requested for the associated
+ * stream connector. In this case the buffer is immediately allocated and the
+ * function returns 1. Otherwise it returns 0. Note that this automatically
+ * covers multiple wake-up attempts by ensuring that the same buffer will not
+ * be accounted for multiple times.
+ */
+int appctx_buf_available(void *arg)
+{
+ struct appctx *appctx = arg;
+ struct stconn *sc = appctx_sc(appctx);
+
+ /* allocation requested ? */
+ if (!(sc->flags & SC_FL_NEED_BUFF))
+ return 0;
+
+ sc_have_buff(sc);
+
+ /* was already allocated another way ? if so, don't take this one */
+ if (c_size(sc_ic(sc)) || sc_ic(sc)->pipe)
+ return 0;
+
+ /* allocation possible now ? */
+ if (!b_alloc(&sc_ic(sc)->buf)) {
+ sc_need_buff(sc);
+ return 0;
+ }
+
+ task_wakeup(appctx->t, TASK_WOKEN_RES);
+ return 1;
+}
+
+/* Default applet handler */
+struct task *task_run_applet(struct task *t, void *context, unsigned int state)
+{
+ struct appctx *app = context;
+ struct stconn *sc;
+ unsigned int rate;
+ size_t count;
+
+ if (app->state & APPLET_WANT_DIE) {
+ __appctx_free(app);
+ return NULL;
+ }
+
+ if (se_fl_test(app->sedesc, SE_FL_ORPHAN)) {
+ /* Finalize init of orphan appctx. .init callback function must
+ * be defined and it must finalize appctx startup.
+ */
+ BUG_ON(!app->applet->init);
+
+ if (appctx_init(app) == -1) {
+ appctx_free_on_early_error(app);
+ return NULL;
+ }
+ BUG_ON(!app->sess || !appctx_sc(app) || !appctx_strm(app));
+ }
+
+ sc = appctx_sc(app);
+
+ /* We always pretend the applet can't get and doesn't want to
+ * put, it's up to it to change this if needed. This ensures
+ * that one applet which ignores any event will not spin.
+ */
+ applet_need_more_data(app);
+ applet_have_no_more_data(app);
+
+ /* Now we'll try to allocate the input buffer. We wake up the applet in
+ * all cases. So this is the applet's responsibility to check if this
+ * buffer was allocated or not. This leaves a chance for applets to do
+ * some other processing if needed. The applet doesn't have anything to
+ * do if it needs the buffer, it will be called again upon readiness.
+ */
+ if (!sc_alloc_ibuf(sc, &app->buffer_wait))
+ applet_have_more_data(app);
+
+ count = co_data(sc_oc(sc));
+ app->applet->fct(app);
+
+ /* now check if the applet has released some room and forgot to
+ * notify the other side about it.
+ */
+ if (count != co_data(sc_oc(sc))) {
+ sc_oc(sc)->flags |= CF_WRITE_PARTIAL | CF_WROTE_DATA;
+ sc_have_room(sc_opposite(sc));
+ }
+
+ /* measure the call rate and check for anomalies when too high */
+ if (((b_size(sc_ib(sc)) && sc->flags & SC_FL_NEED_BUFF) || // asks for a buffer which is present
+ (b_size(sc_ib(sc)) && !b_data(sc_ib(sc)) && sc->flags & SC_FL_NEED_ROOM) || // asks for room in an empty buffer
+ (b_data(sc_ob(sc)) && sc_is_send_allowed(sc)) || // asks for data already present
+ (!b_data(sc_ib(sc)) && b_data(sc_ob(sc)) && // didn't return anything ...
+ (sc_oc(sc)->flags & (CF_WRITE_PARTIAL|CF_SHUTW_NOW)) == CF_SHUTW_NOW))) { // ... and left data pending after a shut
+ rate = update_freq_ctr(&app->call_rate, 1);
+ if (rate >= 100000 && app->call_rate.prev_ctr) // looped like this more than 100k times over last second
+ stream_dump_and_crash(&app->obj_type, read_freq_ctr(&app->call_rate));
+ }
+
+ sc->app_ops->wake(sc);
+ channel_release_buffer(sc_ic(sc), &app->buffer_wait);
+ return t;
+}
diff --git a/src/arg.c b/src/arg.c
new file mode 100644
index 0000000..039602a
--- /dev/null
+++ b/src/arg.c
@@ -0,0 +1,479 @@
+/*
+ * Functions used to parse typed argument lists
+ *
+ * Copyright 2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include <haproxy/arg.h>
+#include <haproxy/chunk.h>
+#include <haproxy/global.h>
+#include <haproxy/regex.h>
+#include <haproxy/tools.h>
+
+const char *arg_type_names[ARGT_NBTYPES] = {
+ [ARGT_STOP] = "end of arguments",
+ [ARGT_SINT] = "integer",
+ [ARGT_STR] = "string",
+ [ARGT_IPV4] = "IPv4 address",
+ [ARGT_MSK4] = "IPv4 mask",
+ [ARGT_IPV6] = "IPv6 address",
+ [ARGT_MSK6] = "IPv6 mask",
+ [ARGT_TIME] = "delay",
+ [ARGT_SIZE] = "size",
+ [ARGT_FE] = "frontend",
+ [ARGT_BE] = "backend",
+ [ARGT_TAB] = "table",
+ [ARGT_SRV] = "server",
+ [ARGT_USR] = "user list",
+ [ARGT_MAP] = "map",
+ [ARGT_REG] = "regex",
+ [ARGT_VAR] = "variable",
+ [ARGT_PBUF_FNUM] = "Protocol buffers field number",
+ /* Unassigned types must never happen. Better crash during parsing if they do. */
+};
+
+/* This dummy arg list may be used by default when no arg is found, it helps
+ * parsers by removing pointer checks.
+ */
+struct arg empty_arg_list[ARGM_NBARGS] = { };
+
+/* This function clones a struct arg_list template into a new one which is
+ * returned.
+ */
+struct arg_list *arg_list_clone(const struct arg_list *orig)
+{
+ struct arg_list *new;
+
+ if ((new = calloc(1, sizeof(*new))) != NULL) {
+ /* ->list will be set by the caller when inserting the element.
+ * ->arg and ->arg_pos will be set by the caller.
+ */
+ new->ctx = orig->ctx;
+ new->kw = orig->kw;
+ new->conv = orig->conv;
+ new->file = orig->file;
+ new->line = orig->line;
+ }
+ return new;
+}
+
+/* This function clones a struct <arg_list> template into a new one which is
+ * set to point to arg <arg> at pos <pos>, and which is returned if the caller
+ * wants to apply further changes.
+ */
+struct arg_list *arg_list_add(struct arg_list *orig, struct arg *arg, int pos)
+{
+ struct arg_list *new;
+
+ new = arg_list_clone(orig);
+ if (new) {
+ new->arg = arg;
+ new->arg_pos = pos;
+ LIST_APPEND(&orig->list, &new->list);
+ }
+ return new;
+}
+
+/* This function builds an argument list from a config line, and stops at the
+ * first non-matching character, which is pointed to in <end_ptr>. A valid arg
+ * list starts with an opening parenthesis '(', contains a number of comma-
+ * delimited words, and ends with the closing parenthesis ')'. An empty list
+ * (with or without the parenthesis) will lead to a valid empty argument if the
+ * keyword has a mandatory one. The function returns the number of arguments
+ * emitted, or <0 in case of any error. Everything needed it automatically
+ * allocated. A pointer to an error message might be returned in err_msg if not
+ * NULL, in which case it would be allocated and the caller will have to check
+ * it and free it. The output arg list is returned in argp which must be valid.
+ * The returned array is always terminated by an arg of type ARGT_STOP (0),
+ * unless the mask indicates that no argument is supported. Unresolved arguments
+ * are appended to arg list <al>, which also serves as a template to create new
+ * entries. <al> may be NULL if unresolved arguments are not allowed. The mask
+ * is composed of a number of mandatory arguments in its lower ARGM_BITS bits,
+ * and a concatenation of each argument type in each subsequent ARGT_BITS-bit
+ * sblock. If <err_msg> is not NULL, it must point to a freeable or NULL
+ * pointer. The caller is expected to restart the parsing from the new pointer
+ * set in <end_ptr>, which is the first character considered as not being part
+ * of the arg list. The input string ends on the first between <len> characters
+ * (when len is positive) or the first NUL character. Placing -1 in <len> will
+ * make it virtually unbounded (~2GB long strings).
+ */
+int make_arg_list(const char *in, int len, uint64_t mask, struct arg **argp,
+ char **err_msg, const char **end_ptr, int *err_arg,
+ struct arg_list *al)
+{
+ int nbarg;
+ int pos;
+ struct arg *arg;
+ const char *beg;
+ const char *ptr_err = NULL;
+ int min_arg;
+ int empty;
+ struct arg_list *new_al = al;
+
+ *argp = NULL;
+
+ empty = 0;
+ if (!len || *in != '(') {
+ /* it's already not for us, stop here */
+ empty = 1;
+ len = 0;
+ } else {
+ /* skip opening parenthesis */
+ len--;
+ in++;
+ }
+
+ min_arg = mask & ARGM_MASK;
+ mask >>= ARGM_BITS;
+
+ pos = 0;
+ /* find between 0 and NBARGS the max number of args supported by the mask */
+ for (nbarg = 0; nbarg < ARGM_NBARGS && ((mask >> (nbarg * ARGT_BITS)) & ARGT_MASK); nbarg++);
+
+ if (!nbarg)
+ goto end_parse;
+
+ /* Note: an empty input string contains an empty argument if this argument
+ * is marked mandatory. Otherwise we can ignore it.
+ */
+ if (empty && !min_arg)
+ goto end_parse;
+
+ arg = *argp = calloc(nbarg + 1, sizeof(**argp));
+
+ if (!arg)
+ goto alloc_err;
+
+ /* Note: empty arguments after a comma always exist. */
+ while (pos < nbarg) {
+ unsigned int uint;
+ int squote = 0, dquote = 0;
+ char *out;
+
+ chunk_reset(&trash);
+ out = trash.area;
+
+ while (len && *in && trash.data < trash.size - 1) {
+ if (*in == '"' && !squote) { /* double quote outside single quotes */
+ if (dquote)
+ dquote = 0;
+ else
+ dquote = 1;
+ in++; len--;
+ continue;
+ }
+ else if (*in == '\'' && !dquote) { /* single quote outside double quotes */
+ if (squote)
+ squote = 0;
+ else
+ squote = 1;
+ in++; len--;
+ continue;
+ }
+ else if (*in == '\\' && !squote && len != 1) {
+ /* '\', ', ' ', '"' support being escaped by '\' */
+ if (len == 1 || in[1] == 0)
+ goto unquote_err;
+
+ if (in[1] == '\\' || in[1] == ' ' || in[1] == '"' || in[1] == '\'') {
+ in++; len--;
+ *out++ = *in;
+ }
+ else if (in[1] == 'r') {
+ in++; len--;
+ *out++ = '\r';
+ }
+ else if (in[1] == 'n') {
+ in++; len--;
+ *out++ = '\n';
+ }
+ else if (in[1] == 't') {
+ in++; len--;
+ *out++ = '\t';
+ }
+ else {
+ /* just a lone '\' */
+ *out++ = *in;
+ }
+ in++; len--;
+ }
+ else {
+ if (!squote && !dquote && (*in == ',' || *in == ')')) {
+ /* end of argument */
+ break;
+ }
+ /* verbatim copy */
+ *out++ = *in++;
+ len--;
+ }
+ trash.data = out - trash.area;
+ }
+
+ if (len && *in && *in != ',' && *in != ')')
+ goto buffer_err;
+
+ trash.area[trash.data] = 0;
+
+ arg->type = (mask >> (pos * ARGT_BITS)) & ARGT_MASK;
+
+ switch (arg->type) {
+ case ARGT_SINT:
+ if (!trash.data) // empty number
+ goto empty_err;
+ beg = trash.area;
+ arg->data.sint = read_int64(&beg, trash.area + trash.data);
+ if (beg < trash.area + trash.data)
+ goto parse_err;
+ arg->type = ARGT_SINT;
+ break;
+
+ case ARGT_FE:
+ case ARGT_BE:
+ case ARGT_TAB:
+ case ARGT_SRV:
+ case ARGT_USR:
+ case ARGT_REG:
+ /* These argument types need to be stored as strings during
+ * parsing then resolved later.
+ */
+ if (!al)
+ goto resolve_err;
+ arg->unresolved = 1;
+ new_al = arg_list_add(al, arg, pos);
+
+ /* fall through */
+ case ARGT_STR:
+ /* all types that must be resolved are stored as strings
+ * during the parsing. The caller must at one point resolve
+ * them and free the string.
+ */
+ arg->data.str.area = my_strndup(trash.area, trash.data);
+ arg->data.str.data = trash.data;
+ arg->data.str.size = trash.data + 1;
+ break;
+
+ case ARGT_IPV4:
+ if (!trash.data) // empty address
+ goto empty_err;
+
+ if (inet_pton(AF_INET, trash.area, &arg->data.ipv4) <= 0)
+ goto parse_err;
+ break;
+
+ case ARGT_MSK4:
+ if (!trash.data) // empty mask
+ goto empty_err;
+
+ if (!str2mask(trash.area, &arg->data.ipv4))
+ goto parse_err;
+
+ arg->type = ARGT_IPV4;
+ break;
+
+ case ARGT_IPV6:
+ if (!trash.data) // empty address
+ goto empty_err;
+
+ if (inet_pton(AF_INET6, trash.area, &arg->data.ipv6) <= 0)
+ goto parse_err;
+ break;
+
+ case ARGT_MSK6:
+ if (!trash.data) // empty mask
+ goto empty_err;
+
+ if (!str2mask6(trash.area, &arg->data.ipv6))
+ goto parse_err;
+
+ arg->type = ARGT_IPV6;
+ break;
+
+ case ARGT_TIME:
+ if (!trash.data) // empty time
+ goto empty_err;
+
+ ptr_err = parse_time_err(trash.area, &uint, TIME_UNIT_MS);
+ if (ptr_err) {
+ if (ptr_err == PARSE_TIME_OVER || ptr_err == PARSE_TIME_UNDER)
+ ptr_err = trash.area;
+ goto parse_err;
+ }
+ arg->data.sint = uint;
+ arg->type = ARGT_SINT;
+ break;
+
+ case ARGT_SIZE:
+ if (!trash.data) // empty size
+ goto empty_err;
+
+ ptr_err = parse_size_err(trash.area, &uint);
+ if (ptr_err)
+ goto parse_err;
+
+ arg->data.sint = uint;
+ arg->type = ARGT_SINT;
+ break;
+
+ case ARGT_PBUF_FNUM:
+ if (!trash.data)
+ goto empty_err;
+
+ if (!parse_dotted_uints(trash.area, &arg->data.fid.ids, &arg->data.fid.sz))
+ goto parse_err;
+
+ break;
+
+ /* FIXME: other types need to be implemented here */
+ default:
+ goto not_impl;
+ }
+
+ pos++;
+ arg++;
+
+ /* don't go back to parsing if we reached end */
+ if (!len || !*in || *in == ')' || pos >= nbarg)
+ break;
+
+ /* skip comma */
+ in++; len--;
+ }
+
+ end_parse:
+ if (pos < min_arg) {
+ /* not enough arguments */
+ memprintf(err_msg,
+ "missing arguments (got %d/%d), type '%s' expected",
+ pos, min_arg, arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK]);
+ goto err;
+ }
+
+ if (empty) {
+ /* nothing to do */
+ } else if (*in == ')') {
+ /* skip the expected closing parenthesis */
+ in++;
+ } else {
+ /* the caller is responsible for freeing this message */
+ char *word = (len > 0) ? my_strndup(in, len) : (char *)in;
+
+ if (*word)
+ memprintf(err_msg, "expected ')' before '%s'", word);
+ else
+ memprintf(err_msg, "expected ')'");
+
+ if (len > 0)
+ free(word);
+ /* when we're missing a right paren, the empty part preceding
+ * already created an empty arg, adding one to the position, so
+ * let's fix the reporting to avoid being confusing.
+ */
+ if (pos > 1)
+ pos--;
+ goto err;
+ }
+
+ /* note that pos might be < nbarg and this is not an error, it's up to the
+ * caller to decide what to do with optional args.
+ */
+ if (err_arg)
+ *err_arg = pos;
+ if (end_ptr)
+ *end_ptr = in;
+ return pos;
+
+ err:
+ if (new_al == al) {
+ /* only free the arg area if we have not queued unresolved args
+ * still pointing to it.
+ */
+ free_args(*argp);
+ free(*argp);
+ }
+ *argp = NULL;
+ if (err_arg)
+ *err_arg = pos;
+ if (end_ptr)
+ *end_ptr = in;
+ return -1;
+
+ empty_err:
+ /* If we've only got an empty set of parenthesis with nothing
+ * in between, there is no arg at all.
+ */
+ if (!pos) {
+ ha_free(argp);
+ }
+
+ if (pos >= min_arg)
+ goto end_parse;
+
+ memprintf(err_msg, "expected type '%s' at position %d, but got nothing",
+ arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK], pos + 1);
+ goto err;
+
+ parse_err:
+ /* come here with the word attempted to parse in trash */
+ memprintf(err_msg, "failed to parse '%s' as type '%s' at position %d",
+ trash.area, arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK], pos + 1);
+ goto err;
+
+ not_impl:
+ memprintf(err_msg, "parsing for type '%s' was not implemented, please report this bug",
+ arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK]);
+ goto err;
+
+ buffer_err:
+ memprintf(err_msg, "too small buffer size to store decoded argument %d, increase bufsize ?",
+ pos + 1);
+ goto err;
+
+ unquote_err:
+ /* come here with the parsed part in <trash.area>:<trash.data> and the
+ * unparsable part in <in>.
+ */
+ trash.area[trash.data] = 0;
+ memprintf(err_msg, "failed to parse '%s' after '%s' as type '%s' at position %d",
+ in, trash.area, arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK], pos + 1);
+ goto err;
+
+alloc_err:
+ memprintf(err_msg, "out of memory");
+ goto err;
+
+ resolve_err:
+ memprintf(err_msg, "unresolved argument of type '%s' at position %d not allowed",
+ arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK], pos + 1);
+ goto err;
+}
+
+/* Free all args of an args array, taking care of unresolved arguments as well.
+ * It stops at the ARGT_STOP, which must be present. The array itself is not
+ * freed, it's up to the caller to do it. However it is returned, allowing to
+ * call free(free_args(argptr)). It is valid to call it with a NULL args, and
+ * nothing will be done).
+ */
+struct arg *free_args(struct arg *args)
+{
+ struct arg *arg;
+
+ for (arg = args; arg && arg->type != ARGT_STOP; arg++) {
+ if (arg->type == ARGT_STR || arg->unresolved)
+ chunk_destroy(&arg->data.str);
+ else if (arg->type == ARGT_REG)
+ regex_free(arg->data.reg);
+ else if (arg->type == ARGT_PBUF_FNUM)
+ ha_free(&arg->data.fid.ids);
+ }
+ return args;
+}
diff --git a/src/auth.c b/src/auth.c
new file mode 100644
index 0000000..8c26374
--- /dev/null
+++ b/src/auth.c
@@ -0,0 +1,316 @@
+/*
+ * User authentication & authorization
+ *
+ * Copyright 2010 Krzysztof Piotr Oledzki <ole@ans.pl>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#ifdef USE_LIBCRYPT
+/* This is to have crypt() defined on Linux */
+#define _GNU_SOURCE
+
+#ifdef USE_CRYPT_H
+/* some platforms such as Solaris need this */
+#include <crypt.h>
+#endif
+#endif /* USE_LIBCRYPT */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <haproxy/api.h>
+#include <haproxy/auth-t.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/pattern-t.h>
+#include <haproxy/sample-t.h>
+#include <haproxy/thread.h>
+
+struct userlist *userlist = NULL; /* list of all existing userlists */
+
+#ifdef USE_LIBCRYPT
+#define CRYPT_STATE_MSG "yes"
+#ifdef HA_HAVE_CRYPT_R
+/* context for crypt_r() */
+static THREAD_LOCAL struct crypt_data crypt_data = { .initialized = 0 };
+#else
+/* lock for crypt() */
+__decl_thread(static HA_SPINLOCK_T auth_lock);
+#endif
+#else /* USE_LIBCRYPT */
+#define CRYPT_STATE_MSG "no"
+#endif
+
+/* find targets for selected groups. The function returns pointer to
+ * the userlist struct or NULL if name is NULL/empty or unresolvable.
+ */
+
+struct userlist *
+auth_find_userlist(char *name)
+{
+ struct userlist *l;
+
+ if (!name || !*name)
+ return NULL;
+
+ for (l = userlist; l; l = l->next)
+ if (strcmp(l->name, name) == 0)
+ return l;
+
+ return NULL;
+}
+
+int check_group(struct userlist *ul, char *name)
+{
+ struct auth_groups *ag;
+
+ for (ag = ul->groups; ag; ag = ag->next)
+ if (strcmp(name, ag->name) == 0)
+ return 1;
+ return 0;
+}
+
+void
+userlist_free(struct userlist *ul)
+{
+ struct userlist *tul;
+ struct auth_users *au, *tau;
+ struct auth_groups_list *agl, *tagl;
+ struct auth_groups *ag, *tag;
+
+ while (ul) {
+ /* Free users. */
+ au = ul->users;
+ while (au) {
+ /* Free groups that own current user. */
+ agl = au->u.groups;
+ while (agl) {
+ tagl = agl;
+ agl = agl->next;
+ free(tagl);
+ }
+
+ tau = au;
+ au = au->next;
+ free(tau->user);
+ free(tau->pass);
+ free(tau);
+ }
+
+ /* Free grouplist. */
+ ag = ul->groups;
+ while (ag) {
+ tag = ag;
+ ag = ag->next;
+ free(tag->name);
+ free(tag);
+ }
+
+ tul = ul;
+ ul = ul->next;
+ free(tul->name);
+ free(tul);
+ };
+}
+
+int userlist_postinit()
+{
+ struct userlist *curuserlist = NULL;
+
+ /* Resolve usernames and groupnames. */
+ for (curuserlist = userlist; curuserlist; curuserlist = curuserlist->next) {
+ struct auth_groups *ag;
+ struct auth_users *curuser;
+ struct auth_groups_list *grl;
+
+ for (curuser = curuserlist->users; curuser; curuser = curuser->next) {
+ char *group = NULL;
+ struct auth_groups_list *groups = NULL;
+
+ if (!curuser->u.groups_names)
+ continue;
+
+ while ((group = strtok(group?NULL:curuser->u.groups_names, ","))) {
+ for (ag = curuserlist->groups; ag; ag = ag->next) {
+ if (strcmp(ag->name, group) == 0)
+ break;
+ }
+
+ if (!ag) {
+ ha_alert("userlist '%s': no such group '%s' specified in user '%s'\n",
+ curuserlist->name, group, curuser->user);
+ free(groups);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ /* Add this group at the group userlist. */
+ grl = calloc(1, sizeof(*grl));
+ if (!grl) {
+ ha_alert("userlist '%s': no more memory when trying to allocate the user groups.\n",
+ curuserlist->name);
+ free(groups);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ grl->group = ag;
+ grl->next = groups;
+ groups = grl;
+ }
+
+ free(curuser->u.groups);
+ curuser->u.groups = groups;
+ }
+
+ for (ag = curuserlist->groups; ag; ag = ag->next) {
+ char *user = NULL;
+
+ if (!ag->groupusers)
+ continue;
+
+ while ((user = strtok(user?NULL:ag->groupusers, ","))) {
+ for (curuser = curuserlist->users; curuser; curuser = curuser->next) {
+ if (strcmp(curuser->user, user) == 0)
+ break;
+ }
+
+ if (!curuser) {
+ ha_alert("userlist '%s': no such user '%s' specified in group '%s'\n",
+ curuserlist->name, user, ag->name);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ /* Add this group at the group userlist. */
+ grl = calloc(1, sizeof(*grl));
+ if (!grl) {
+ ha_alert("userlist '%s': no more memory when trying to allocate the user groups.\n",
+ curuserlist->name);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ grl->group = ag;
+ grl->next = curuser->u.groups;
+ curuser->u.groups = grl;
+ }
+
+ ha_free(&ag->groupusers);
+ }
+
+#ifdef DEBUG_AUTH
+ for (ag = curuserlist->groups; ag; ag = ag->next) {
+ struct auth_groups_list *agl;
+
+ fprintf(stderr, "group %s, id %p, users:", ag->name, ag);
+ for (curuser = curuserlist->users; curuser; curuser = curuser->next) {
+ for (agl = curuser->u.groups; agl; agl = agl->next) {
+ if (agl->group == ag)
+ fprintf(stderr, " %s", curuser->user);
+ }
+ }
+ fprintf(stderr, "\n");
+ }
+#endif
+ }
+
+ return ERR_NONE;
+}
+
+/*
+ * Authenticate and authorize user; return 1 if OK, 0 if case of error.
+ */
+int
+check_user(struct userlist *ul, const char *user, const char *pass)
+{
+
+ struct auth_users *u;
+#ifdef DEBUG_AUTH
+ struct auth_groups_list *agl;
+#endif
+ const char *ep;
+
+#ifdef DEBUG_AUTH
+ fprintf(stderr, "req: userlist=%s, user=%s, pass=%s\n",
+ ul->name, user, pass);
+#endif
+
+ for (u = ul->users; u; u = u->next)
+ if (strcmp(user, u->user) == 0)
+ break;
+
+ if (!u)
+ return 0;
+
+#ifdef DEBUG_AUTH
+ fprintf(stderr, "cfg: user=%s, pass=%s, flags=%X, groups=",
+ u->user, u->pass, u->flags);
+ for (agl = u->u.groups; agl; agl = agl->next)
+ fprintf(stderr, " %s", agl->group->name);
+#endif
+
+ if (!(u->flags & AU_O_INSECURE)) {
+#ifdef USE_LIBCRYPT
+#ifdef HA_HAVE_CRYPT_R
+ ep = crypt_r(pass, u->pass, &crypt_data);
+#else
+ HA_SPIN_LOCK(AUTH_LOCK, &auth_lock);
+ ep = crypt(pass, u->pass);
+ HA_SPIN_UNLOCK(AUTH_LOCK, &auth_lock);
+#endif
+#else
+ return 0;
+#endif
+ } else
+ ep = pass;
+
+#ifdef DEBUG_AUTH
+ fprintf(stderr, ", crypt=%s\n", ep);
+#endif
+
+ if (ep && strcmp(ep, u->pass) == 0)
+ return 1;
+ else
+ return 0;
+}
+
+struct pattern *
+pat_match_auth(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct userlist *ul = smp->ctx.a[0];
+ struct pattern_list *lst;
+ struct auth_users *u;
+ struct auth_groups_list *agl;
+ struct pattern *pattern;
+
+ /* Check if the userlist is present in the context data. */
+ if (!ul)
+ return NULL;
+
+ /* Browse the userlist for searching user. */
+ for (u = ul->users; u; u = u->next) {
+ if (strcmp(smp->data.u.str.area, u->user) == 0)
+ break;
+ }
+ if (!u)
+ return NULL;
+
+ /* Browse each pattern. */
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ /* Browse each group for searching group name that match the pattern. */
+ for (agl = u->u.groups; agl; agl = agl->next) {
+ if (strcmp(agl->group->name, pattern->ptr.str) == 0)
+ return pattern;
+ }
+ }
+ return NULL;
+}
+
+REGISTER_BUILD_OPTS("Encrypted password support via crypt(3): "CRYPT_STATE_MSG);
diff --git a/src/backend.c b/src/backend.c
new file mode 100644
index 0000000..664d264
--- /dev/null
+++ b/src/backend.c
@@ -0,0 +1,3379 @@
+/*
+ * Backend variables and functions.
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <syslog.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/types.h>
+
+#include <import/ebmbtree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/acl.h>
+#include <haproxy/activity.h>
+#include <haproxy/arg.h>
+#include <haproxy/backend.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/hash.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/lb_chash.h>
+#include <haproxy/lb_fas.h>
+#include <haproxy/lb_fwlc.h>
+#include <haproxy/lb_fwrr.h>
+#include <haproxy/lb_map.h>
+#include <haproxy/log.h>
+#include <haproxy/namespace.h>
+#include <haproxy/obj_type.h>
+#include <haproxy/payload.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/queue.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/session.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+#include <haproxy/time.h>
+#include <haproxy/trace.h>
+
+#define TRACE_SOURCE &trace_strm
+
+int be_lastsession(const struct proxy *be)
+{
+ if (be->be_counters.last_sess)
+ return now.tv_sec - be->be_counters.last_sess;
+
+ return -1;
+}
+
+/* helper function to invoke the correct hash method */
+static unsigned int gen_hash(const struct proxy* px, const char* key, unsigned long len)
+{
+ unsigned int hash;
+
+ switch (px->lbprm.algo & BE_LB_HASH_FUNC) {
+ case BE_LB_HFCN_DJB2:
+ hash = hash_djb2(key, len);
+ break;
+ case BE_LB_HFCN_WT6:
+ hash = hash_wt6(key, len);
+ break;
+ case BE_LB_HFCN_CRC32:
+ hash = hash_crc32(key, len);
+ break;
+ case BE_LB_HFCN_SDBM:
+ /* this is the default hash function */
+ default:
+ hash = hash_sdbm(key, len);
+ break;
+ }
+
+ return hash;
+}
+
+/*
+ * This function recounts the number of usable active and backup servers for
+ * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
+ * This function also recomputes the total active and backup weights. However,
+ * it does not update tot_weight nor tot_used. Use update_backend_weight() for
+ * this.
+ * This functions is designed to be called before server's weight and state
+ * commit so it uses 'next' weight and states values.
+ *
+ * threads: this is the caller responsibility to lock data. For now, this
+ * function is called from lb modules, so it should be ok. But if you need to
+ * call it from another place, be careful (and update this comment).
+ */
+void recount_servers(struct proxy *px)
+{
+ struct server *srv;
+
+ px->srv_act = px->srv_bck = 0;
+ px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
+ px->lbprm.fbck = NULL;
+ for (srv = px->srv; srv != NULL; srv = srv->next) {
+ if (!srv_willbe_usable(srv))
+ continue;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ if (!px->srv_bck &&
+ !(px->options & PR_O_USE_ALL_BK))
+ px->lbprm.fbck = srv;
+ px->srv_bck++;
+ srv->cumulative_weight = px->lbprm.tot_wbck;
+ px->lbprm.tot_wbck += srv->next_eweight;
+ } else {
+ px->srv_act++;
+ srv->cumulative_weight = px->lbprm.tot_wact;
+ px->lbprm.tot_wact += srv->next_eweight;
+ }
+ }
+}
+
+/* This function simply updates the backend's tot_weight and tot_used values
+ * after servers weights have been updated. It is designed to be used after
+ * recount_servers() or equivalent.
+ *
+ * threads: this is the caller responsibility to lock data. For now, this
+ * function is called from lb modules, so it should be ok. But if you need to
+ * call it from another place, be careful (and update this comment).
+ */
+void update_backend_weight(struct proxy *px)
+{
+ if (px->srv_act) {
+ px->lbprm.tot_weight = px->lbprm.tot_wact;
+ px->lbprm.tot_used = px->srv_act;
+ }
+ else if (px->lbprm.fbck) {
+ /* use only the first backup server */
+ px->lbprm.tot_weight = px->lbprm.fbck->next_eweight;
+ px->lbprm.tot_used = 1;
+ }
+ else {
+ px->lbprm.tot_weight = px->lbprm.tot_wbck;
+ px->lbprm.tot_used = px->srv_bck;
+ }
+}
+
+/*
+ * This function tries to find a running server for the proxy <px> following
+ * the source hash method. Depending on the number of active/backup servers,
+ * it will either look for active servers, or for backup servers.
+ * If any server is found, it will be returned. If no valid server is found,
+ * NULL is returned.
+ */
+static struct server *get_server_sh(struct proxy *px, const char *addr, int len, const struct server *avoid)
+{
+ unsigned int h, l;
+
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ l = h = 0;
+
+ /* note: we won't hash if there's only one server left */
+ if (px->lbprm.tot_used == 1)
+ goto hash_done;
+
+ while ((l + sizeof (int)) <= len) {
+ h ^= ntohl(*(unsigned int *)(&addr[l]));
+ l += sizeof (int);
+ }
+ if ((px->lbprm.algo & BE_LB_HASH_MOD) == BE_LB_HMOD_AVAL)
+ h = full_hash(h);
+ hash_done:
+ if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ return chash_get_server_hash(px, h, avoid);
+ else
+ return map_get_server_hash(px, h);
+}
+
+/*
+ * This function tries to find a running server for the proxy <px> following
+ * the URI hash method. In order to optimize cache hits, the hash computation
+ * ends at the question mark. Depending on the number of active/backup servers,
+ * it will either look for active servers, or for backup servers.
+ * If any server is found, it will be returned. If no valid server is found,
+ * NULL is returned. The lbprm.arg_opt{1,2,3} values correspond respectively to
+ * the "whole" optional argument (boolean, bit0), the "len" argument (numeric)
+ * and the "depth" argument (numeric).
+ *
+ * This code was contributed by Guillaume Dallaire, who also selected this hash
+ * algorithm out of a tens because it gave him the best results.
+ *
+ */
+static struct server *get_server_uh(struct proxy *px, char *uri, int uri_len, const struct server *avoid)
+{
+ unsigned int hash = 0;
+ int c;
+ int slashes = 0;
+ const char *start, *end;
+
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ /* note: we won't hash if there's only one server left */
+ if (px->lbprm.tot_used == 1)
+ goto hash_done;
+
+ if (px->lbprm.arg_opt2) // "len"
+ uri_len = MIN(uri_len, px->lbprm.arg_opt2);
+
+ start = end = uri;
+ while (uri_len--) {
+ c = *end;
+ if (c == '/') {
+ slashes++;
+ if (slashes == px->lbprm.arg_opt3) /* depth+1 */
+ break;
+ }
+ else if (c == '?' && !(px->lbprm.arg_opt1 & 1)) // "whole"
+ break;
+ end++;
+ }
+
+ hash = gen_hash(px, start, (end - start));
+
+ if ((px->lbprm.algo & BE_LB_HASH_MOD) == BE_LB_HMOD_AVAL)
+ hash = full_hash(hash);
+ hash_done:
+ if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ return chash_get_server_hash(px, hash, avoid);
+ else
+ return map_get_server_hash(px, hash);
+}
+
+/*
+ * This function tries to find a running server for the proxy <px> following
+ * the URL parameter hash method. It looks for a specific parameter in the
+ * URL and hashes it to compute the server ID. This is useful to optimize
+ * performance by avoiding bounces between servers in contexts where sessions
+ * are shared but cookies are not usable. If the parameter is not found, NULL
+ * is returned. If any server is found, it will be returned. If no valid server
+ * is found, NULL is returned.
+ */
+static struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len, const struct server *avoid)
+{
+ unsigned int hash = 0;
+ const char *start, *end;
+ const char *p;
+ const char *params;
+ int plen;
+
+ /* when tot_weight is 0 then so is srv_count */
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ if ((p = memchr(uri, '?', uri_len)) == NULL)
+ return NULL;
+
+ p++;
+
+ uri_len -= (p - uri);
+ plen = px->lbprm.arg_len;
+ params = p;
+
+ while (uri_len > plen) {
+ /* Look for the parameter name followed by an equal symbol */
+ if (params[plen] == '=') {
+ if (memcmp(params, px->lbprm.arg_str, plen) == 0) {
+ /* OK, we have the parameter here at <params>, and
+ * the value after the equal sign, at <p>
+ * skip the equal symbol
+ */
+ p += plen + 1;
+ start = end = p;
+ uri_len -= plen + 1;
+
+ while (uri_len && *end != '&') {
+ uri_len--;
+ end++;
+ }
+ hash = gen_hash(px, start, (end - start));
+
+ if ((px->lbprm.algo & BE_LB_HASH_MOD) == BE_LB_HMOD_AVAL)
+ hash = full_hash(hash);
+
+ if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ return chash_get_server_hash(px, hash, avoid);
+ else
+ return map_get_server_hash(px, hash);
+ }
+ }
+ /* skip to next parameter */
+ p = memchr(params, '&', uri_len);
+ if (!p)
+ return NULL;
+ p++;
+ uri_len -= (p - params);
+ params = p;
+ }
+ return NULL;
+}
+
+/*
+ * this does the same as the previous server_ph, but check the body contents
+ */
+static struct server *get_server_ph_post(struct stream *s, const struct server *avoid)
+{
+ unsigned int hash = 0;
+ struct channel *req = &s->req;
+ struct proxy *px = s->be;
+ struct htx *htx = htxbuf(&req->buf);
+ struct htx_blk *blk;
+ unsigned int plen = px->lbprm.arg_len;
+ unsigned long len;
+ const char *params, *p, *start, *end;
+
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ p = params = NULL;
+ len = 0;
+ for (blk = htx_get_first_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ struct ist v;
+
+ if (type != HTX_BLK_DATA)
+ continue;
+ v = htx_get_blk_value(htx, blk);
+ p = params = v.ptr;
+ len = v.len;
+ break;
+ }
+
+ while (len > plen) {
+ /* Look for the parameter name followed by an equal symbol */
+ if (params[plen] == '=') {
+ if (memcmp(params, px->lbprm.arg_str, plen) == 0) {
+ /* OK, we have the parameter here at <params>, and
+ * the value after the equal sign, at <p>
+ * skip the equal symbol
+ */
+ p += plen + 1;
+ start = end = p;
+ len -= plen + 1;
+
+ while (len && *end != '&') {
+ if (unlikely(!HTTP_IS_TOKEN(*p))) {
+ /* if in a POST, body must be URI encoded or it's not a URI.
+ * Do not interpret any possible binary data as a parameter.
+ */
+ if (likely(HTTP_IS_LWS(*p))) /* eol, uncertain uri len */
+ break;
+ return NULL; /* oh, no; this is not uri-encoded.
+ * This body does not contain parameters.
+ */
+ }
+ len--;
+ end++;
+ /* should we break if vlen exceeds limit? */
+ }
+ hash = gen_hash(px, start, (end - start));
+
+ if ((px->lbprm.algo & BE_LB_HASH_MOD) == BE_LB_HMOD_AVAL)
+ hash = full_hash(hash);
+
+ if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ return chash_get_server_hash(px, hash, avoid);
+ else
+ return map_get_server_hash(px, hash);
+ }
+ }
+ /* skip to next parameter */
+ p = memchr(params, '&', len);
+ if (!p)
+ return NULL;
+ p++;
+ len -= (p - params);
+ params = p;
+ }
+ return NULL;
+}
+
+
+/*
+ * This function tries to find a running server for the proxy <px> following
+ * the Header parameter hash method. It looks for a specific parameter in the
+ * URL and hashes it to compute the server ID. This is useful to optimize
+ * performance by avoiding bounces between servers in contexts where sessions
+ * are shared but cookies are not usable. If the parameter is not found, NULL
+ * is returned. If any server is found, it will be returned. If no valid server
+ * is found, NULL is returned. When lbprm.arg_opt1 is set, the hash will only
+ * apply to the middle part of a domain name ("use_domain_only" option).
+ */
+static struct server *get_server_hh(struct stream *s, const struct server *avoid)
+{
+ unsigned int hash = 0;
+ struct proxy *px = s->be;
+ unsigned int plen = px->lbprm.arg_len;
+ unsigned long len;
+ const char *p;
+ const char *start, *end;
+ struct htx *htx = htxbuf(&s->req.buf);
+ struct http_hdr_ctx ctx = { .blk = NULL };
+
+ /* tot_weight appears to mean srv_count */
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ /* note: we won't hash if there's only one server left */
+ if (px->lbprm.tot_used == 1)
+ goto hash_done;
+
+ http_find_header(htx, ist2(px->lbprm.arg_str, plen), &ctx, 0);
+
+ /* if the header is not found or empty, let's fallback to round robin */
+ if (!ctx.blk || !ctx.value.len)
+ return NULL;
+
+ /* Found a the param_name in the headers.
+ * we will compute the hash based on this value ctx.val.
+ */
+ len = ctx.value.len;
+ p = ctx.value.ptr;
+
+ if (!px->lbprm.arg_opt1) {
+ hash = gen_hash(px, p, len);
+ } else {
+ int dohash = 0;
+ p += len;
+ /* special computation, use only main domain name, not tld/host
+ * going back from the end of string, start hashing at first
+ * dot stop at next.
+ * This is designed to work with the 'Host' header, and requires
+ * a special option to activate this.
+ */
+ end = p;
+ while (len) {
+ if (dohash) {
+ /* Rewind the pointer until the previous char
+ * is a dot, this will allow to set the start
+ * position of the domain. */
+ if (*(p - 1) == '.')
+ break;
+ }
+ else if (*p == '.') {
+ /* The pointer is rewinded to the dot before the
+ * tld, we memorize the end of the domain and
+ * can enter the domain processing. */
+ end = p;
+ dohash = 1;
+ }
+ p--;
+ len--;
+ }
+ start = p;
+ hash = gen_hash(px, start, (end - start));
+ }
+ if ((px->lbprm.algo & BE_LB_HASH_MOD) == BE_LB_HMOD_AVAL)
+ hash = full_hash(hash);
+ hash_done:
+ if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ return chash_get_server_hash(px, hash, avoid);
+ else
+ return map_get_server_hash(px, hash);
+}
+
+/* RDP Cookie HASH. */
+static struct server *get_server_rch(struct stream *s, const struct server *avoid)
+{
+ unsigned int hash = 0;
+ struct proxy *px = s->be;
+ unsigned long len;
+ int ret;
+ struct sample smp;
+ int rewind;
+
+ /* tot_weight appears to mean srv_count */
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ memset(&smp, 0, sizeof(smp));
+
+ rewind = co_data(&s->req);
+ c_rew(&s->req, rewind);
+
+ ret = fetch_rdp_cookie_name(s, &smp, px->lbprm.arg_str, px->lbprm.arg_len);
+ len = smp.data.u.str.data;
+
+ c_adv(&s->req, rewind);
+
+ if (ret == 0 || (smp.flags & SMP_F_MAY_CHANGE) || len == 0)
+ return NULL;
+
+ /* note: we won't hash if there's only one server left */
+ if (px->lbprm.tot_used == 1)
+ goto hash_done;
+
+ /* Found the param_name in the headers.
+ * we will compute the hash based on this value ctx.val.
+ */
+ hash = gen_hash(px, smp.data.u.str.area, len);
+
+ if ((px->lbprm.algo & BE_LB_HASH_MOD) == BE_LB_HMOD_AVAL)
+ hash = full_hash(hash);
+ hash_done:
+ if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ return chash_get_server_hash(px, hash, avoid);
+ else
+ return map_get_server_hash(px, hash);
+}
+
+/* sample expression HASH. Returns NULL if the sample is not found or if there
+ * are no server, relying on the caller to fall back to round robin instead.
+ */
+static struct server *get_server_expr(struct stream *s, const struct server *avoid)
+{
+ struct proxy *px = s->be;
+ struct sample *smp;
+ unsigned int hash = 0;
+
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ /* note: no need to hash if there's only one server left */
+ if (px->lbprm.tot_used == 1)
+ goto hash_done;
+
+ smp = sample_fetch_as_type(px, s->sess, s, SMP_OPT_DIR_REQ | SMP_OPT_FINAL, px->lbprm.expr, SMP_T_BIN);
+ if (!smp)
+ return NULL;
+
+ /* We have the desired data. Let's hash it according to the configured
+ * options and algorithm.
+ */
+ hash = gen_hash(px, smp->data.u.str.area, smp->data.u.str.data);
+
+ if ((px->lbprm.algo & BE_LB_HASH_MOD) == BE_LB_HMOD_AVAL)
+ hash = full_hash(hash);
+ hash_done:
+ if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ return chash_get_server_hash(px, hash, avoid);
+ else
+ return map_get_server_hash(px, hash);
+}
+
+/* random value */
+static struct server *get_server_rnd(struct stream *s, const struct server *avoid)
+{
+ unsigned int hash = 0;
+ struct proxy *px = s->be;
+ struct server *prev, *curr;
+ int draws = px->lbprm.arg_opt1; // number of draws
+
+ /* tot_weight appears to mean srv_count */
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ curr = NULL;
+ do {
+ prev = curr;
+ hash = statistical_prng();
+ curr = chash_get_server_hash(px, hash, avoid);
+ if (!curr)
+ break;
+
+ /* compare the new server to the previous best choice and pick
+ * the one with the least currently served requests.
+ */
+ if (prev && prev != curr &&
+ curr->served * prev->cur_eweight > prev->served * curr->cur_eweight)
+ curr = prev;
+ } while (--draws > 0);
+
+ /* if the selected server is full, pretend we have none so that we reach
+ * the backend's queue instead.
+ */
+ if (curr &&
+ (curr->queue.length || (curr->maxconn && curr->served >= srv_dynamic_maxconn(curr))))
+ curr = NULL;
+
+ return curr;
+}
+
+/*
+ * This function applies the load-balancing algorithm to the stream, as
+ * defined by the backend it is assigned to. The stream is then marked as
+ * 'assigned'.
+ *
+ * This function MAY NOT be called with SF_ASSIGNED already set. If the stream
+ * had a server previously assigned, it is rebalanced, trying to avoid the same
+ * server, which should still be present in target_srv(&s->target) before the call.
+ * The function tries to keep the original connection slot if it reconnects to
+ * the same server, otherwise it releases it and tries to offer it.
+ *
+ * It is illegal to call this function with a stream in a queue.
+ *
+ * It may return :
+ * SRV_STATUS_OK if everything is OK. ->srv and ->target are assigned.
+ * SRV_STATUS_NOSRV if no server is available. Stream is not ASSIGNED
+ * SRV_STATUS_FULL if all servers are saturated. Stream is not ASSIGNED
+ * SRV_STATUS_INTERNAL for other unrecoverable errors.
+ *
+ * Upon successful return, the stream flag SF_ASSIGNED is set to indicate that
+ * it does not need to be called anymore. This means that target_srv(&s->target)
+ * can be trusted in balance and direct modes.
+ *
+ */
+
+int assign_server(struct stream *s)
+{
+ struct connection *conn = NULL;
+ struct server *conn_slot;
+ struct server *srv = NULL, *prev_srv;
+ int err;
+
+ DPRINTF(stderr,"assign_server : s=%p\n",s);
+
+ err = SRV_STATUS_INTERNAL;
+ if (unlikely(s->pend_pos || s->flags & SF_ASSIGNED))
+ goto out_err;
+
+ prev_srv = objt_server(s->target);
+ conn_slot = s->srv_conn;
+
+ /* We have to release any connection slot before applying any LB algo,
+ * otherwise we may erroneously end up with no available slot.
+ */
+ if (conn_slot)
+ sess_change_server(s, NULL);
+
+ /* We will now try to find the good server and store it into <objt_server(s->target)>.
+ * Note that <objt_server(s->target)> may be NULL in case of dispatch or proxy mode,
+ * as well as if no server is available (check error code).
+ */
+
+ srv = NULL;
+ s->target = NULL;
+
+ if ((s->be->lbprm.algo & BE_LB_KIND) != BE_LB_KIND_HI &&
+ ((s->sess->flags & SESS_FL_PREFER_LAST) ||
+ (s->be->options & PR_O_PREF_LAST))) {
+ struct sess_srv_list *srv_list;
+ list_for_each_entry(srv_list, &s->sess->srv_list, srv_list) {
+ struct server *tmpsrv = objt_server(srv_list->target);
+
+ if (tmpsrv && tmpsrv->proxy == s->be &&
+ ((s->sess->flags & SESS_FL_PREFER_LAST) ||
+ (!s->be->max_ka_queue ||
+ server_has_room(tmpsrv) || (
+ tmpsrv->queue.length + 1 < s->be->max_ka_queue))) &&
+ srv_currently_usable(tmpsrv)) {
+ list_for_each_entry(conn, &srv_list->conn_list, session_list) {
+ if (!(conn->flags & CO_FL_WAIT_XPRT)) {
+ srv = tmpsrv;
+ s->target = &srv->obj_type;
+ if (conn->flags & CO_FL_SESS_IDLE) {
+ conn->flags &= ~CO_FL_SESS_IDLE;
+ s->sess->idle_conns--;
+ }
+ goto out_ok;
+ }
+ }
+ }
+ }
+ }
+
+ if (s->be->lbprm.algo & BE_LB_KIND) {
+ /* we must check if we have at least one server available */
+ if (!s->be->lbprm.tot_weight) {
+ err = SRV_STATUS_NOSRV;
+ goto out;
+ }
+
+ /* if there's some queue on the backend, with certain algos we
+ * know it's because all servers are full.
+ */
+ if (s->be->queue.length && s->be->queue.length != s->be->beconn &&
+ (((s->be->lbprm.algo & (BE_LB_KIND|BE_LB_NEED|BE_LB_PARM)) == BE_LB_ALGO_FAS)|| // first
+ ((s->be->lbprm.algo & (BE_LB_KIND|BE_LB_NEED|BE_LB_PARM)) == BE_LB_ALGO_RR) || // roundrobin
+ ((s->be->lbprm.algo & (BE_LB_KIND|BE_LB_NEED|BE_LB_PARM)) == BE_LB_ALGO_SRR))) { // static-rr
+ err = SRV_STATUS_FULL;
+ goto out;
+ }
+
+ /* First check whether we need to fetch some data or simply call
+ * the LB lookup function. Only the hashing functions will need
+ * some input data in fact, and will support multiple algorithms.
+ */
+ switch (s->be->lbprm.algo & BE_LB_LKUP) {
+ case BE_LB_LKUP_RRTREE:
+ srv = fwrr_get_next_server(s->be, prev_srv);
+ break;
+
+ case BE_LB_LKUP_FSTREE:
+ srv = fas_get_next_server(s->be, prev_srv);
+ break;
+
+ case BE_LB_LKUP_LCTREE:
+ srv = fwlc_get_next_server(s->be, prev_srv);
+ break;
+
+ case BE_LB_LKUP_CHTREE:
+ case BE_LB_LKUP_MAP:
+ if ((s->be->lbprm.algo & BE_LB_KIND) == BE_LB_KIND_RR) {
+ /* static-rr (map) or random (chash) */
+ if ((s->be->lbprm.algo & BE_LB_PARM) == BE_LB_RR_RANDOM)
+ srv = get_server_rnd(s, prev_srv);
+ else
+ srv = map_get_server_rr(s->be, prev_srv);
+ break;
+ }
+ else if ((s->be->lbprm.algo & BE_LB_KIND) != BE_LB_KIND_HI) {
+ /* unknown balancing algorithm */
+ err = SRV_STATUS_INTERNAL;
+ goto out;
+ }
+
+ switch (s->be->lbprm.algo & BE_LB_PARM) {
+ const struct sockaddr_storage *src;
+
+ case BE_LB_HASH_SRC:
+ src = sc_src(s->scf);
+ if (src && src->ss_family == AF_INET) {
+ srv = get_server_sh(s->be,
+ (void *)&((struct sockaddr_in *)src)->sin_addr,
+ 4, prev_srv);
+ }
+ else if (src && src->ss_family == AF_INET6) {
+ srv = get_server_sh(s->be,
+ (void *)&((struct sockaddr_in6 *)src)->sin6_addr,
+ 16, prev_srv);
+ }
+ break;
+
+ case BE_LB_HASH_URI:
+ /* URI hashing */
+ if (IS_HTX_STRM(s) && s->txn->req.msg_state >= HTTP_MSG_BODY) {
+ struct ist uri;
+
+ uri = htx_sl_req_uri(http_get_stline(htxbuf(&s->req.buf)));
+ if (s->be->lbprm.arg_opt1 & 2) {
+ struct http_uri_parser parser =
+ http_uri_parser_init(uri);
+
+ uri = http_parse_path(&parser);
+ if (!isttest(uri))
+ uri = ist("");
+ }
+ srv = get_server_uh(s->be, uri.ptr, uri.len, prev_srv);
+ }
+ break;
+
+ case BE_LB_HASH_PRM:
+ /* URL Parameter hashing */
+ if (IS_HTX_STRM(s) && s->txn->req.msg_state >= HTTP_MSG_BODY) {
+ struct ist uri;
+
+ uri = htx_sl_req_uri(http_get_stline(htxbuf(&s->req.buf)));
+ srv = get_server_ph(s->be, uri.ptr, uri.len, prev_srv);
+
+ if (!srv && s->txn->meth == HTTP_METH_POST)
+ srv = get_server_ph_post(s, prev_srv);
+ }
+ break;
+
+ case BE_LB_HASH_HDR:
+ /* Header Parameter hashing */
+ if (IS_HTX_STRM(s) && s->txn->req.msg_state >= HTTP_MSG_BODY)
+ srv = get_server_hh(s, prev_srv);
+ break;
+
+ case BE_LB_HASH_RDP:
+ /* RDP Cookie hashing */
+ srv = get_server_rch(s, prev_srv);
+ break;
+
+ case BE_LB_HASH_SMP:
+ /* sample expression hashing */
+ srv = get_server_expr(s, prev_srv);
+ break;
+
+ default:
+ /* unknown balancing algorithm */
+ err = SRV_STATUS_INTERNAL;
+ goto out;
+ }
+
+ /* If the hashing parameter was not found, let's fall
+ * back to round robin on the map.
+ */
+ if (!srv) {
+ if ((s->be->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ srv = chash_get_next_server(s->be, prev_srv);
+ else
+ srv = map_get_server_rr(s->be, prev_srv);
+ }
+
+ /* end of map-based LB */
+ break;
+
+ default:
+ /* unknown balancing algorithm */
+ err = SRV_STATUS_INTERNAL;
+ goto out;
+ }
+
+ if (!srv) {
+ err = SRV_STATUS_FULL;
+ goto out;
+ }
+ else if (srv != prev_srv) {
+ _HA_ATOMIC_INC(&s->be->be_counters.cum_lbconn);
+ _HA_ATOMIC_INC(&srv->counters.cum_lbconn);
+ }
+ s->target = &srv->obj_type;
+ }
+ else if (s->be->options & (PR_O_DISPATCH | PR_O_TRANSP)) {
+ s->target = &s->be->obj_type;
+ }
+ else {
+ err = SRV_STATUS_NOSRV;
+ goto out;
+ }
+
+out_ok:
+ s->flags |= SF_ASSIGNED;
+ err = SRV_STATUS_OK;
+ out:
+
+ /* Either we take back our connection slot, or we offer it to someone
+ * else if we don't need it anymore.
+ */
+ if (conn_slot) {
+ if (conn_slot == srv) {
+ sess_change_server(s, srv);
+ } else {
+ if (may_dequeue_tasks(conn_slot, s->be))
+ process_srv_queue(conn_slot);
+ }
+ }
+
+ out_err:
+ return err;
+}
+
+/* Allocate an address for the destination endpoint
+ * The address is taken from the currently assigned server, or from the
+ * dispatch or transparent address.
+ *
+ * Returns SRV_STATUS_OK on success. Does nothing if the address was
+ * already set.
+ * On error, no address is allocated and SRV_STATUS_INTERNAL is returned.
+ */
+static int alloc_dst_address(struct sockaddr_storage **ss,
+ struct server *srv, struct stream *s)
+{
+ const struct sockaddr_storage *dst;
+
+ if (*ss)
+ return SRV_STATUS_OK;
+
+ if ((s->flags & SF_DIRECT) || (s->be->lbprm.algo & BE_LB_KIND)) {
+ /* A server is necessarily known for this stream */
+ if (!(s->flags & SF_ASSIGNED))
+ return SRV_STATUS_INTERNAL;
+
+ if (!sockaddr_alloc(ss, NULL, 0))
+ return SRV_STATUS_INTERNAL;
+
+ **ss = srv->addr;
+ set_host_port(*ss, srv->svc_port);
+ if (!is_addr(*ss)) {
+ /* if the server has no address, we use the same address
+ * the client asked, which is handy for remapping ports
+ * locally on multiple addresses at once. Nothing is done
+ * for AF_UNIX addresses.
+ */
+ dst = sc_dst(s->scf);
+ if (dst && dst->ss_family == AF_INET) {
+ ((struct sockaddr_in *)*ss)->sin_family = AF_INET;
+ ((struct sockaddr_in *)*ss)->sin_addr =
+ ((struct sockaddr_in *)dst)->sin_addr;
+ } else if (dst && dst->ss_family == AF_INET6) {
+ ((struct sockaddr_in6 *)*ss)->sin6_family = AF_INET6;
+ ((struct sockaddr_in6 *)*ss)->sin6_addr =
+ ((struct sockaddr_in6 *)dst)->sin6_addr;
+ }
+ }
+
+ /* if this server remaps proxied ports, we'll use
+ * the port the client connected to with an offset. */
+ if ((srv->flags & SRV_F_MAPPORTS)) {
+ int base_port;
+
+ dst = sc_dst(s->scf);
+ if (dst) {
+ /* First, retrieve the port from the incoming connection */
+ base_port = get_host_port(dst);
+
+ /* Second, assign the outgoing connection's port */
+ base_port += get_host_port(*ss);
+ set_host_port(*ss, base_port);
+ }
+ }
+ }
+ else if (s->be->options & PR_O_DISPATCH) {
+ if (!sockaddr_alloc(ss, NULL, 0))
+ return SRV_STATUS_INTERNAL;
+
+ /* connect to the defined dispatch addr */
+ **ss = s->be->dispatch_addr;
+ }
+ else if ((s->be->options & PR_O_TRANSP)) {
+ if (!sockaddr_alloc(ss, NULL, 0))
+ return SRV_STATUS_INTERNAL;
+
+ /* in transparent mode, use the original dest addr if no dispatch specified */
+ dst = sc_dst(s->scf);
+ if (dst && (dst->ss_family == AF_INET || dst->ss_family == AF_INET6))
+ **ss = *dst;
+ }
+ else {
+ /* no server and no LB algorithm ! */
+ return SRV_STATUS_INTERNAL;
+ }
+
+ return SRV_STATUS_OK;
+}
+
+/* This function assigns a server to stream <s> if required, and can add the
+ * connection to either the assigned server's queue or to the proxy's queue.
+ * If ->srv_conn is set, the stream is first released from the server.
+ * It may also be called with SF_DIRECT and/or SF_ASSIGNED though. It will
+ * be called before any connection and after any retry or redispatch occurs.
+ *
+ * It is not allowed to call this function with a stream in a queue.
+ *
+ * Returns :
+ *
+ * SRV_STATUS_OK if everything is OK.
+ * SRV_STATUS_NOSRV if no server is available. objt_server(s->target) = NULL.
+ * SRV_STATUS_QUEUED if the connection has been queued.
+ * SRV_STATUS_FULL if the server(s) is/are saturated and the
+ * connection could not be queued at the server's,
+ * which may be NULL if we queue on the backend.
+ * SRV_STATUS_INTERNAL for other unrecoverable errors.
+ *
+ */
+int assign_server_and_queue(struct stream *s)
+{
+ struct pendconn *p;
+ struct server *srv;
+ int err;
+
+ if (s->pend_pos)
+ return SRV_STATUS_INTERNAL;
+
+ err = SRV_STATUS_OK;
+ if (!(s->flags & SF_ASSIGNED)) {
+ struct server *prev_srv = objt_server(s->target);
+
+ err = assign_server(s);
+ if (prev_srv) {
+ /* This stream was previously assigned to a server. We have to
+ * update the stream's and the server's stats :
+ * - if the server changed :
+ * - set TX_CK_DOWN if txn.flags was TX_CK_VALID
+ * - set SF_REDISP if it was successfully redispatched
+ * - increment srv->redispatches and be->redispatches
+ * - if the server remained the same : update retries.
+ */
+
+ if (prev_srv != objt_server(s->target)) {
+ if (s->txn && (s->txn->flags & TX_CK_MASK) == TX_CK_VALID) {
+ s->txn->flags &= ~TX_CK_MASK;
+ s->txn->flags |= TX_CK_DOWN;
+ }
+ s->flags |= SF_REDISP;
+ _HA_ATOMIC_INC(&prev_srv->counters.redispatches);
+ _HA_ATOMIC_INC(&s->be->be_counters.redispatches);
+ } else {
+ _HA_ATOMIC_INC(&prev_srv->counters.retries);
+ _HA_ATOMIC_INC(&s->be->be_counters.retries);
+ }
+ }
+ }
+
+ switch (err) {
+ case SRV_STATUS_OK:
+ /* we have SF_ASSIGNED set */
+ srv = objt_server(s->target);
+ if (!srv)
+ return SRV_STATUS_OK; /* dispatch or proxy mode */
+
+ /* If we already have a connection slot, no need to check any queue */
+ if (s->srv_conn == srv)
+ return SRV_STATUS_OK;
+
+ /* OK, this stream already has an assigned server, but no
+ * connection slot yet. Either it is a redispatch, or it was
+ * assigned from persistence information (direct mode).
+ */
+ if ((s->flags & SF_REDIRECTABLE) && srv->rdr_len) {
+ /* server scheduled for redirection, and already assigned. We
+ * don't want to go further nor check the queue.
+ */
+ sess_change_server(s, srv); /* not really needed in fact */
+ return SRV_STATUS_OK;
+ }
+
+ /* We might have to queue this stream if the assigned server is full.
+ * We know we have to queue it into the server's queue, so if a maxqueue
+ * is set on the server, we must also check that the server's queue is
+ * not full, in which case we have to return FULL.
+ */
+ if (srv->maxconn &&
+ (srv->queue.length || srv->served >= srv_dynamic_maxconn(srv))) {
+
+ if (srv->maxqueue > 0 && srv->queue.length >= srv->maxqueue)
+ return SRV_STATUS_FULL;
+
+ p = pendconn_add(s);
+ if (p)
+ return SRV_STATUS_QUEUED;
+ else
+ return SRV_STATUS_INTERNAL;
+ }
+
+ /* OK, we can use this server. Let's reserve our place */
+ sess_change_server(s, srv);
+ return SRV_STATUS_OK;
+
+ case SRV_STATUS_FULL:
+ /* queue this stream into the proxy's queue */
+ p = pendconn_add(s);
+ if (p)
+ return SRV_STATUS_QUEUED;
+ else
+ return SRV_STATUS_INTERNAL;
+
+ case SRV_STATUS_NOSRV:
+ return err;
+
+ case SRV_STATUS_INTERNAL:
+ return err;
+
+ default:
+ return SRV_STATUS_INTERNAL;
+ }
+}
+
+/* Allocate an address for source binding on the specified server or backend.
+ * The allocation is only performed if the connection is intended to be used
+ * with transparent mode.
+ *
+ * Returns SRV_STATUS_OK if no transparent mode or the address was successfully
+ * allocated. Otherwise returns SRV_STATUS_INTERNAL. Does nothing if the
+ * address was already allocated.
+ */
+static int alloc_bind_address(struct sockaddr_storage **ss,
+ struct server *srv, struct stream *s)
+{
+#if defined(CONFIG_HAP_TRANSPARENT)
+ const struct sockaddr_storage *addr;
+ struct conn_src *src = NULL;
+ struct sockaddr_in *sin;
+ char *vptr;
+ size_t vlen;
+#endif
+
+ if (*ss)
+ return SRV_STATUS_OK;
+
+#if defined(CONFIG_HAP_TRANSPARENT)
+ if (srv && srv->conn_src.opts & CO_SRC_BIND)
+ src = &srv->conn_src;
+ else if (s->be->conn_src.opts & CO_SRC_BIND)
+ src = &s->be->conn_src;
+
+ /* no transparent mode, no need to allocate an address, returns OK */
+ if (!src)
+ return SRV_STATUS_OK;
+
+ switch (src->opts & CO_SRC_TPROXY_MASK) {
+ case CO_SRC_TPROXY_ADDR:
+ if (!sockaddr_alloc(ss, NULL, 0))
+ return SRV_STATUS_INTERNAL;
+
+ **ss = src->tproxy_addr;
+ break;
+
+ case CO_SRC_TPROXY_CLI:
+ case CO_SRC_TPROXY_CIP:
+ /* FIXME: what can we do if the client connects in IPv6 or unix socket ? */
+ addr = sc_src(s->scf);
+ if (!addr)
+ return SRV_STATUS_INTERNAL;
+
+ if (!sockaddr_alloc(ss, NULL, 0))
+ return SRV_STATUS_INTERNAL;
+
+ **ss = *addr;
+ break;
+
+ case CO_SRC_TPROXY_DYN:
+ if (!src->bind_hdr_occ || !IS_HTX_STRM(s))
+ return SRV_STATUS_INTERNAL;
+
+ if (!sockaddr_alloc(ss, NULL, 0))
+ return SRV_STATUS_INTERNAL;
+
+ /* bind to the IP in a header */
+ sin = (struct sockaddr_in *)*ss;
+ sin->sin_family = AF_INET;
+ sin->sin_port = 0;
+ sin->sin_addr.s_addr = 0;
+ if (!http_get_htx_hdr(htxbuf(&s->req.buf),
+ ist2(src->bind_hdr_name, src->bind_hdr_len),
+ src->bind_hdr_occ, NULL, &vptr, &vlen)) {
+ sockaddr_free(ss);
+ return SRV_STATUS_INTERNAL;
+ }
+
+ sin->sin_addr.s_addr = htonl(inetaddr_host_lim(vptr, vptr + vlen));
+ break;
+
+ default:
+ ;
+ }
+#endif
+
+ return SRV_STATUS_OK;
+}
+
+/* Attempt to get a backend connection from the specified mt_list array
+ * (safe or idle connections). The <is_safe> argument means what type of
+ * connection the caller wants.
+ */
+static struct connection *conn_backend_get(struct stream *s, struct server *srv, int is_safe, int64_t hash)
+{
+ struct connection *conn = NULL;
+ int i; // thread number
+ int found = 0;
+ int stop;
+
+ /* We need to lock even if this is our own list, because another
+ * thread may be trying to migrate that connection, and we don't want
+ * to end up with two threads using the same connection.
+ */
+ i = tid;
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn = srv_lookup_conn(is_safe ? &srv->per_thr[tid].safe_conns : &srv->per_thr[tid].idle_conns, hash);
+ if (conn)
+ conn_delete_from_tree(&conn->hash_node->node);
+
+ /* If we failed to pick a connection from the idle list, let's try again with
+ * the safe list.
+ */
+ if (!conn && !is_safe && srv->curr_safe_nb > 0) {
+ conn = srv_lookup_conn(&srv->per_thr[tid].safe_conns, hash);
+ if (conn) {
+ conn_delete_from_tree(&conn->hash_node->node);
+ is_safe = 1;
+ }
+ }
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+
+ /* If we found a connection in our own list, and we don't have to
+ * steal one from another thread, then we're done.
+ */
+ if (conn)
+ goto done;
+
+ /* pool sharing globally disabled ? */
+ if (!(global.tune.options & GTUNE_IDLE_POOL_SHARED))
+ goto done;
+
+ /* Are we allowed to pick from another thread ? We'll still try
+ * it if we're running low on FDs as we don't want to create
+ * extra conns in this case, otherwise we can give up if we have
+ * too few idle conns.
+ */
+ if (srv->curr_idle_conns < srv->low_idle_conns &&
+ ha_used_fds < global.tune.pool_low_count)
+ goto done;
+
+ /* Lookup all other threads for an idle connection, starting from last
+ * unvisited thread.
+ */
+ stop = srv->next_takeover;
+ if (stop >= global.nbthread)
+ stop = 0;
+
+ i = stop;
+ do {
+ if (!srv->curr_idle_thr[i] || i == tid)
+ continue;
+
+ if (HA_SPIN_TRYLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock) != 0)
+ continue;
+ conn = srv_lookup_conn(is_safe ? &srv->per_thr[i].safe_conns : &srv->per_thr[i].idle_conns, hash);
+ while (conn) {
+ if (conn->mux->takeover && conn->mux->takeover(conn, i) == 0) {
+ conn_delete_from_tree(&conn->hash_node->node);
+ _HA_ATOMIC_INC(&activity[tid].fd_takeover);
+ found = 1;
+ break;
+ }
+
+ conn = srv_lookup_conn_next(conn);
+ }
+
+ if (!found && !is_safe && srv->curr_safe_nb > 0) {
+ conn = srv_lookup_conn(&srv->per_thr[i].safe_conns, hash);
+ while (conn) {
+ if (conn->mux->takeover && conn->mux->takeover(conn, i) == 0) {
+ conn_delete_from_tree(&conn->hash_node->node);
+ _HA_ATOMIC_INC(&activity[tid].fd_takeover);
+ found = 1;
+ is_safe = 1;
+ break;
+ }
+
+ conn = srv_lookup_conn_next(conn);
+ }
+ }
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
+ } while (!found && (i = (i + 1 == global.nbthread) ? 0 : i + 1) != stop);
+
+ if (!found)
+ conn = NULL;
+ done:
+ if (conn) {
+ _HA_ATOMIC_STORE(&srv->next_takeover, (i + 1 == global.nbthread) ? 0 : i + 1);
+
+ srv_use_conn(srv, conn);
+
+ _HA_ATOMIC_DEC(&srv->curr_idle_conns);
+ _HA_ATOMIC_DEC(conn->flags & CO_FL_SAFE_LIST ? &srv->curr_safe_nb : &srv->curr_idle_nb);
+ _HA_ATOMIC_DEC(&srv->curr_idle_thr[i]);
+ conn->flags &= ~CO_FL_LIST_MASK;
+ __ha_barrier_atomic_store();
+
+ if ((s->be->options & PR_O_REUSE_MASK) == PR_O_REUSE_SAFE &&
+ conn->mux->flags & MX_FL_HOL_RISK) {
+ /* attach the connection to the session private list
+ */
+ conn->owner = s->sess;
+ session_add_conn(s->sess, conn, conn->target);
+ }
+ else {
+ eb64_insert(&srv->per_thr[tid].avail_conns,
+ &conn->hash_node->node);
+ }
+ }
+ return conn;
+}
+
+static int do_connect_server(struct stream *s, struct connection *conn)
+{
+ int ret = SF_ERR_NONE;
+ int conn_flags = 0;
+
+ if (unlikely(!conn || !conn->ctrl || !conn->ctrl->connect))
+ return SF_ERR_INTERNAL;
+
+ if (!channel_is_empty(&s->res))
+ conn_flags |= CONNECT_HAS_DATA;
+ if (s->conn_retries == s->be->conn_retries)
+ conn_flags |= CONNECT_CAN_USE_TFO;
+ if (!conn_ctrl_ready(conn) || !conn_xprt_ready(conn)) {
+ ret = conn->ctrl->connect(conn, conn_flags);
+ if (ret != SF_ERR_NONE)
+ return ret;
+
+ /* we're in the process of establishing a connection */
+ s->scb->state = SC_ST_CON;
+ }
+ else {
+ /* try to reuse the existing connection, it will be
+ * confirmed once we can send on it.
+ */
+ /* Is the connection really ready ? */
+ if (conn->mux->ctl(conn, MUX_STATUS, NULL) & MUX_STATUS_READY)
+ s->scb->state = SC_ST_RDY;
+ else
+ s->scb->state = SC_ST_CON;
+ }
+
+ /* needs src ip/port for logging */
+ if (s->flags & SF_SRC_ADDR)
+ conn_get_src(conn);
+
+ return ret;
+}
+
+/*
+ * This function initiates a connection to the server assigned to this stream
+ * (s->target, (s->scb)->addr.to). It will assign a server if none
+ * is assigned yet.
+ * It can return one of :
+ * - SF_ERR_NONE if everything's OK
+ * - SF_ERR_SRVTO if there are no more servers
+ * - SF_ERR_SRVCL if the connection was refused by the server
+ * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
+ * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
+ * - SF_ERR_INTERNAL for any other purely internal errors
+ * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
+ * The server-facing stream connector is expected to hold a pre-allocated connection.
+ */
+static int connect_server(struct stream *s)
+{
+ struct connection *cli_conn = objt_conn(strm_orig(s));
+ struct connection *srv_conn = NULL;
+ struct server *srv;
+ const int reuse_mode = s->be->options & PR_O_REUSE_MASK;
+ int reuse = 0;
+ int init_mux = 0;
+ int err;
+#ifdef USE_OPENSSL
+ struct sample *sni_smp = NULL;
+#endif
+ struct sockaddr_storage *bind_addr = NULL;
+ int proxy_line_ret;
+ int64_t hash = 0;
+ struct conn_hash_params hash_params;
+
+ /* in standard configuration, srv will be valid
+ * it can be NULL for dispatch mode or transparent backend */
+ srv = objt_server(s->target);
+
+ err = alloc_dst_address(&s->scb->dst, srv, s);
+ if (err != SRV_STATUS_OK)
+ return SF_ERR_INTERNAL;
+
+ err = alloc_bind_address(&bind_addr, srv, s);
+ if (err != SRV_STATUS_OK)
+ return SF_ERR_INTERNAL;
+
+#ifdef USE_OPENSSL
+ if (srv && srv->ssl_ctx.sni) {
+ sni_smp = sample_fetch_as_type(s->be, s->sess, s,
+ SMP_OPT_DIR_REQ | SMP_OPT_FINAL,
+ srv->ssl_ctx.sni, SMP_T_STR);
+ }
+#endif
+
+ /* do not reuse if mode is not http */
+ if (!IS_HTX_STRM(s)) {
+ DBG_TRACE_STATE("skip idle connections reuse: no htx", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ goto skip_reuse;
+ }
+
+ /* disable reuse if websocket stream and the protocol to use is not the
+ * same as the main protocol of the server.
+ */
+ if (unlikely(s->flags & SF_WEBSOCKET) && srv) {
+ if (!srv_check_reuse_ws(srv)) {
+ DBG_TRACE_STATE("skip idle connections reuse: websocket stream", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ goto skip_reuse;
+ }
+ }
+
+ /* first, set unique connection parameters and then calculate hash */
+ memset(&hash_params, 0, sizeof(hash_params));
+
+ /* 1. target */
+ hash_params.target = s->target;
+
+#ifdef USE_OPENSSL
+ /* 2. sni
+ * only test if the sample is not null as smp_make_safe (called before
+ * ssl_sock_set_servername) can only fails if this is not the case
+ */
+ if (sni_smp) {
+ hash_params.sni_prehash =
+ conn_hash_prehash(sni_smp->data.u.str.area,
+ sni_smp->data.u.str.data);
+ }
+#endif /* USE_OPENSSL */
+
+ /* 3. destination address */
+ if (srv && (!is_addr(&srv->addr) || srv->flags & SRV_F_MAPPORTS))
+ hash_params.dst_addr = s->scb->dst;
+
+ /* 4. source address */
+ hash_params.src_addr = bind_addr;
+
+ /* 5. proxy protocol */
+ if (srv && srv->pp_opts) {
+ proxy_line_ret = make_proxy_line(trash.area, trash.size, srv, cli_conn, s);
+ if (proxy_line_ret) {
+ hash_params.proxy_prehash =
+ conn_hash_prehash(trash.area, proxy_line_ret);
+ }
+ }
+
+ hash = conn_calculate_hash(&hash_params);
+
+ /* first, search for a matching connection in the session's idle conns */
+ srv_conn = session_get_conn(s->sess, s->target, hash);
+ if (srv_conn) {
+ DBG_TRACE_STATE("reuse connection from session", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ reuse = 1;
+ }
+
+ if (srv && !reuse && reuse_mode != PR_O_REUSE_NEVR) {
+ /* Below we pick connections from the safe, idle or
+ * available (which are safe too) lists based
+ * on the strategy, the fact that this is a first or second
+ * (retryable) request, with the indicated priority (1 or 2) :
+ *
+ * SAFE AGGR ALWS
+ *
+ * +-----+-----+ +-----+-----+ +-----+-----+
+ * req| 1st | 2nd | req| 1st | 2nd | req| 1st | 2nd |
+ * ----+-----+-----+ ----+-----+-----+ ----+-----+-----+
+ * safe| - | 2 | safe| 1 | 2 | safe| 1 | 2 |
+ * ----+-----+-----+ ----+-----+-----+ ----+-----+-----+
+ * idle| - | 1 | idle| - | 1 | idle| 2 | 1 |
+ * ----+-----+-----+ ----+-----+-----+ ----+-----+-----+
+ *
+ * Idle conns are necessarily looked up on the same thread so
+ * that there is no concurrency issues.
+ */
+ if (!eb_is_empty(&srv->per_thr[tid].avail_conns)) {
+ srv_conn = srv_lookup_conn(&srv->per_thr[tid].avail_conns, hash);
+ if (srv_conn) {
+ DBG_TRACE_STATE("reuse connection from avail", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ reuse = 1;
+ }
+ }
+
+ /* if no available connections found, search for an idle/safe */
+ if (!srv_conn && srv->max_idle_conns && srv->curr_idle_conns > 0) {
+ const int not_first_req = s->txn && s->txn->flags & TX_NOT_FIRST;
+ const int idle = srv->curr_idle_nb > 0;
+ const int safe = srv->curr_safe_nb > 0;
+ const int retry_safe = (s->be->retry_type & (PR_RE_CONN_FAILED | PR_RE_DISCONNECTED | PR_RE_TIMEOUT)) ==
+ (PR_RE_CONN_FAILED | PR_RE_DISCONNECTED | PR_RE_TIMEOUT);
+
+ /* second column of the tables above,
+ * search for an idle then safe conn */
+ if (not_first_req || retry_safe) {
+ if (idle || safe)
+ srv_conn = conn_backend_get(s, srv, 0, hash);
+ }
+ /* first column of the tables above */
+ else if (reuse_mode >= PR_O_REUSE_AGGR) {
+ /* search for a safe conn */
+ if (safe)
+ srv_conn = conn_backend_get(s, srv, 1, hash);
+
+ /* search for an idle conn if no safe conn found
+ * on always reuse mode */
+ if (!srv_conn &&
+ reuse_mode == PR_O_REUSE_ALWS && idle) {
+ /* TODO conn_backend_get should not check the
+ * safe list is this case */
+ srv_conn = conn_backend_get(s, srv, 0, hash);
+ }
+ }
+
+ if (srv_conn) {
+ DBG_TRACE_STATE("reuse connection from idle/safe", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ reuse = 1;
+ }
+ }
+ }
+
+
+ /* here reuse might have been set above, indicating srv_conn finally
+ * is OK.
+ */
+
+ if (ha_used_fds > global.tune.pool_high_count && srv) {
+ struct connection *tokill_conn = NULL;
+ struct conn_hash_node *conn_node = NULL;
+ struct ebmb_node *node = NULL;
+
+ /* We can't reuse a connection, and e have more FDs than deemd
+ * acceptable, attempt to kill an idling connection
+ */
+ /* First, try from our own idle list */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ node = ebmb_first(&srv->per_thr[tid].idle_conns);
+ if (node) {
+ conn_node = ebmb_entry(node, struct conn_hash_node, node);
+ tokill_conn = conn_node->conn;
+ ebmb_delete(node);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+
+ /* Release the idle lock before calling mux->destroy.
+ * It will in turn call srv_release_conn through
+ * conn_free which also uses it.
+ */
+ tokill_conn->mux->destroy(tokill_conn->ctx);
+ }
+ else {
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+ /* If not, iterate over other thread's idling pool, and try to grab one */
+ if (!tokill_conn) {
+ int i;
+
+ for (i = tid; (i = ((i + 1 == global.nbthread) ? 0 : i + 1)) != tid;) {
+ // just silence stupid gcc which reports an absurd
+ // out-of-bounds warning for <i> which is always
+ // exactly zero without threads, but it seems to
+ // see it possibly larger.
+ ALREADY_CHECKED(i);
+
+ if (HA_SPIN_TRYLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock) != 0)
+ continue;
+
+ node = ebmb_first(&srv->per_thr[i].idle_conns);
+ if (node) {
+ conn_node = ebmb_entry(node, struct conn_hash_node, node);
+ tokill_conn = conn_node->conn;
+ ebmb_delete(node);
+ }
+
+ if (!tokill_conn) {
+ node = ebmb_first(&srv->per_thr[i].safe_conns);
+ if (node) {
+ conn_node = ebmb_entry(node, struct conn_hash_node, node);
+ tokill_conn = conn_node->conn;
+ ebmb_delete(node);
+ }
+ }
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
+
+ if (tokill_conn) {
+ /* We got one, put it into the concerned thread's to kill list, and wake it's kill task */
+
+ MT_LIST_APPEND(&idle_conns[i].toremove_conns,
+ (struct mt_list *)&tokill_conn->toremove_list);
+ task_wakeup(idle_conns[i].cleanup_task, TASK_WOKEN_OTHER);
+ break;
+ }
+ }
+ }
+
+ }
+
+ if (reuse) {
+ if (srv_conn->mux) {
+ int avail = srv_conn->mux->avail_streams(srv_conn);
+
+ if (avail <= 1) {
+ /* No more streams available, remove it from the list */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn_delete_from_tree(&srv_conn->hash_node->node);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+ if (avail >= 1) {
+ if (srv_conn->mux->attach(srv_conn, s->scb->sedesc, s->sess) == -1) {
+ srv_conn = NULL;
+ if (sc_reset_endp(s->scb) < 0)
+ return SF_ERR_INTERNAL;
+ sc_ep_clr(s->scb, ~SE_FL_DETACHED);
+ }
+ }
+ else
+ srv_conn = NULL;
+ }
+ /* otherwise srv_conn is left intact */
+ }
+ else
+ srv_conn = NULL;
+
+skip_reuse:
+ /* no reuse or failed to reuse the connection above, pick a new one */
+ if (!srv_conn) {
+ srv_conn = conn_new(s->target);
+ if (srv_conn) {
+ DBG_TRACE_STATE("alloc new be connection", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ srv_conn->owner = s->sess;
+
+ /* connection will be attached to the session if
+ * http-reuse mode is never or it is not targeted to a
+ * server */
+ if (reuse_mode == PR_O_REUSE_NEVR || !srv)
+ conn_set_private(srv_conn);
+
+ /* assign bind_addr to srv_conn */
+ srv_conn->src = bind_addr;
+ bind_addr = NULL;
+
+ if (!sockaddr_alloc(&srv_conn->dst, 0, 0)) {
+ conn_free(srv_conn);
+ return SF_ERR_RESOURCE;
+ }
+
+ srv_conn->hash_node->node.key = hash;
+ }
+ }
+
+ /* if bind_addr is non NULL free it */
+ sockaddr_free(&bind_addr);
+
+ /* srv_conn is still NULL only on allocation failure */
+ if (!srv_conn)
+ return SF_ERR_RESOURCE;
+
+ /* copy the target address into the connection */
+ *srv_conn->dst = *s->scb->dst;
+
+ /* Copy network namespace from client connection */
+ srv_conn->proxy_netns = cli_conn ? cli_conn->proxy_netns : NULL;
+
+ if (!srv_conn->xprt) {
+ /* set the correct protocol on the output stream connector */
+ if (srv) {
+ if (conn_prepare(srv_conn, protocol_lookup(srv_conn->dst->ss_family, PROTO_TYPE_STREAM, 0), srv->xprt)) {
+ conn_free(srv_conn);
+ return SF_ERR_INTERNAL;
+ }
+ } else if (obj_type(s->target) == OBJ_TYPE_PROXY) {
+ int ret;
+
+ /* proxies exclusively run on raw_sock right now */
+ ret = conn_prepare(srv_conn, protocol_lookup(srv_conn->dst->ss_family, PROTO_TYPE_STREAM, 0), xprt_get(XPRT_RAW));
+ if (ret < 0 || !(srv_conn->ctrl)) {
+ conn_free(srv_conn);
+ return SF_ERR_INTERNAL;
+ }
+ }
+ else {
+ conn_free(srv_conn);
+ return SF_ERR_INTERNAL; /* how did we get there ? */
+ }
+
+ if (sc_attach_mux(s->scb, NULL, srv_conn) < 0) {
+ conn_free(srv_conn);
+ return SF_ERR_INTERNAL; /* how did we get there ? */
+ }
+ srv_conn->ctx = s->scb;
+
+#if defined(USE_OPENSSL) && defined(TLSEXT_TYPE_application_layer_protocol_negotiation)
+ if (!srv ||
+ (srv->use_ssl != 1 || (!(srv->ssl_ctx.alpn_str) && !(srv->ssl_ctx.npn_str)) ||
+ srv->mux_proto || !IS_HTX_STRM(s)))
+#endif
+ init_mux = 1;
+
+ /* process the case where the server requires the PROXY protocol to be sent */
+ srv_conn->send_proxy_ofs = 0;
+
+ if (srv && srv->pp_opts) {
+ srv_conn->flags |= CO_FL_SEND_PROXY;
+ srv_conn->send_proxy_ofs = 1; /* must compute size */
+ }
+
+ if (srv && (srv->flags & SRV_F_SOCKS4_PROXY)) {
+ srv_conn->send_proxy_ofs = 1;
+ srv_conn->flags |= CO_FL_SOCKS4;
+ }
+
+#if defined(USE_OPENSSL) && defined(TLSEXT_TYPE_application_layer_protocol_negotiation)
+ /* if websocket stream, try to update connection ALPN. */
+ if (unlikely(s->flags & SF_WEBSOCKET) &&
+ srv && srv->use_ssl && srv->ssl_ctx.alpn_str) {
+ char *alpn = "";
+ int force = 0;
+
+ switch (srv->ws) {
+ case SRV_WS_AUTO:
+ alpn = "\x08http/1.1";
+ force = 0;
+ break;
+ case SRV_WS_H1:
+ alpn = "\x08http/1.1";
+ force = 1;
+ break;
+ case SRV_WS_H2:
+ alpn = "\x02h2";
+ force = 1;
+ break;
+ }
+
+ if (!conn_update_alpn(srv_conn, ist(alpn), force))
+ DBG_TRACE_STATE("update alpn for websocket", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ }
+#endif
+ }
+ else {
+ s->flags |= SF_SRV_REUSED;
+
+ /* Currently there seems to be no known cases of xprt ready
+ * without the mux installed here.
+ */
+ BUG_ON(!srv_conn->mux);
+
+ if (!(srv_conn->mux->ctl(srv_conn, MUX_STATUS, NULL) & MUX_STATUS_READY))
+ s->flags |= SF_SRV_REUSED_ANTICIPATED;
+ }
+
+ /* flag for logging source ip/port */
+ if (strm_fe(s)->options2 & PR_O2_SRC_ADDR)
+ s->flags |= SF_SRC_ADDR;
+
+ /* disable lingering */
+ if (s->be->options & PR_O_TCP_NOLING)
+ s->scb->flags |= SC_FL_NOLINGER;
+
+ if (s->flags & SF_SRV_REUSED) {
+ _HA_ATOMIC_INC(&s->be->be_counters.reuse);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.reuse);
+ } else {
+ _HA_ATOMIC_INC(&s->be->be_counters.connect);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.connect);
+ }
+
+ err = do_connect_server(s, srv_conn);
+ if (err != SF_ERR_NONE)
+ return err;
+
+#ifdef USE_OPENSSL
+ if (!(s->flags & SF_SRV_REUSED)) {
+ if (smp_make_safe(sni_smp))
+ ssl_sock_set_servername(srv_conn, sni_smp->data.u.str.area);
+ }
+#endif /* USE_OPENSSL */
+
+ /* The CO_FL_SEND_PROXY flag may have been set by the connect method,
+ * if so, add our handshake pseudo-XPRT now.
+ */
+ if ((srv_conn->flags & CO_FL_HANDSHAKE)) {
+ if (xprt_add_hs(srv_conn) < 0) {
+ conn_full_close(srv_conn);
+ return SF_ERR_INTERNAL;
+ }
+ }
+ conn_xprt_start(srv_conn);
+
+ /* We have to defer the mux initialization until after si_connect()
+ * has been called, as we need the xprt to have been properly
+ * initialized, or any attempt to recv during the mux init may
+ * fail, and flag the connection as CO_FL_ERROR.
+ */
+ if (init_mux) {
+ const struct mux_ops *alt_mux =
+ likely(!(s->flags & SF_WEBSOCKET)) ? NULL : srv_get_ws_proto(srv);
+ if (conn_install_mux_be(srv_conn, s->scb, s->sess, alt_mux) < 0) {
+ conn_full_close(srv_conn);
+ return SF_ERR_INTERNAL;
+ }
+ if (IS_HTX_STRM(s)) {
+ /* If we're doing http-reuse always, and the connection
+ * is not private with available streams (an http2
+ * connection), add it to the available list, so that
+ * others can use it right away. If the connection is
+ * private or we're doing http-reuse safe and the mux
+ * protocol supports multiplexing, add it in the
+ * session server list.
+ */
+ if (srv && reuse_mode == PR_O_REUSE_ALWS &&
+ !(srv_conn->flags & CO_FL_PRIVATE) &&
+ srv_conn->mux->avail_streams(srv_conn) > 0) {
+ eb64_insert(&srv->per_thr[tid].avail_conns, &srv_conn->hash_node->node);
+ }
+ else if (srv_conn->flags & CO_FL_PRIVATE ||
+ (reuse_mode == PR_O_REUSE_SAFE &&
+ srv_conn->mux->flags & MX_FL_HOL_RISK)) {
+ /* If it fail now, the same will be done in mux->detach() callback */
+ session_add_conn(s->sess, srv_conn, srv_conn->target);
+ }
+ }
+ }
+
+#if defined(USE_OPENSSL) && (defined(OPENSSL_IS_BORINGSSL) || (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L))
+
+ if (!reuse && cli_conn && srv && srv_conn->mux &&
+ (srv->ssl_ctx.options & SRV_SSL_O_EARLY_DATA) &&
+ /* Only attempt to use early data if either the client sent
+ * early data, so that we know it can handle a 425, or if
+ * we are allwoed to retry requests on early data failure, and
+ * it's our first try
+ */
+ ((cli_conn->flags & CO_FL_EARLY_DATA) ||
+ ((s->be->retry_type & PR_RE_EARLY_ERROR) && !s->conn_retries)) &&
+ !channel_is_empty(sc_oc(s->scb)) &&
+ srv_conn->flags & CO_FL_SSL_WAIT_HS)
+ srv_conn->flags &= ~(CO_FL_SSL_WAIT_HS | CO_FL_WAIT_L6_CONN);
+#endif
+
+ /* set connect timeout */
+ s->conn_exp = tick_add_ifset(now_ms, s->be->timeout.connect);
+
+ if (srv) {
+ int count;
+
+ s->flags |= SF_CURR_SESS;
+ count = _HA_ATOMIC_ADD_FETCH(&srv->cur_sess, 1);
+ HA_ATOMIC_UPDATE_MAX(&srv->counters.cur_sess_max, count);
+ if (s->be->lbprm.server_take_conn)
+ s->be->lbprm.server_take_conn(srv);
+ }
+
+ /* Now handle synchronously connected sockets. We know the stream connector
+ * is at least in state SC_ST_CON. These ones typically are UNIX
+ * sockets, socket pairs, andoccasionally TCP connections on the
+ * loopback on a heavily loaded system.
+ */
+ if (srv_conn->flags & CO_FL_ERROR)
+ sc_ep_set(s->scb, SE_FL_ERROR);
+
+ /* If we had early data, and the handshake ended, then
+ * we can remove the flag, and attempt to wake the task up,
+ * in the event there's an analyser waiting for the end of
+ * the handshake.
+ */
+ if (!(srv_conn->flags & (CO_FL_WAIT_XPRT | CO_FL_EARLY_SSL_HS)))
+ sc_ep_clr(s->scb, SE_FL_WAIT_FOR_HS);
+
+ if (!sc_state_in(s->scb->state, SC_SB_EST|SC_SB_DIS|SC_SB_CLO) &&
+ (srv_conn->flags & CO_FL_WAIT_XPRT) == 0) {
+ s->conn_exp = TICK_ETERNITY;
+ sc_oc(s->scb)->flags |= CF_WRITE_NULL;
+ if (s->scb->state == SC_ST_CON)
+ s->scb->state = SC_ST_RDY;
+ }
+
+ /* Report EOI on the channel if it was reached from the mux point of
+ * view.
+ *
+ * Note: This test is only required because si_cs_process is also the SI
+ * wake callback. Otherwise si_cs_recv()/si_cs_send() already take
+ * care of it.
+ */
+ if (sc_ep_test(s->scb, SE_FL_EOI) && !(sc_ic(s->scb)->flags & CF_EOI))
+ sc_ic(s->scb)->flags |= (CF_EOI|CF_READ_PARTIAL);
+
+ /* catch all sync connect while the mux is not already installed */
+ if (!srv_conn->mux && !(srv_conn->flags & CO_FL_WAIT_XPRT)) {
+ if (conn_create_mux(srv_conn) < 0) {
+ conn_full_close(srv_conn);
+ return SF_ERR_INTERNAL;
+ }
+ }
+
+ return SF_ERR_NONE; /* connection is OK */
+}
+
+
+/* This function performs the "redispatch" part of a connection attempt. It
+ * will assign a server if required, queue the connection if required, and
+ * handle errors that might arise at this level. It can change the server
+ * state. It will return 1 if it encounters an error, switches the server
+ * state, or has to queue a connection. Otherwise, it will return 0 indicating
+ * that the connection is ready to use.
+ */
+
+int srv_redispatch_connect(struct stream *s)
+{
+ struct server *srv;
+ int conn_err;
+
+ /* We know that we don't have any connection pending, so we will
+ * try to get a new one, and wait in this state if it's queued
+ */
+ redispatch:
+ conn_err = assign_server_and_queue(s);
+ srv = objt_server(s->target);
+
+ switch (conn_err) {
+ case SRV_STATUS_OK:
+ break;
+
+ case SRV_STATUS_FULL:
+ /* The server has reached its maxqueue limit. Either PR_O_REDISP is set
+ * and we can redispatch to another server, or it is not and we return
+ * 503. This only makes sense in DIRECT mode however, because normal LB
+ * algorithms would never select such a server, and hash algorithms
+ * would bring us on the same server again. Note that s->target is set
+ * in this case.
+ */
+ if (((s->flags & (SF_DIRECT|SF_FORCE_PRST)) == SF_DIRECT) &&
+ (s->be->options & PR_O_REDISP)) {
+ s->flags &= ~(SF_DIRECT | SF_ASSIGNED);
+ sockaddr_free(&s->scb->dst);
+ goto redispatch;
+ }
+
+ if (!s->conn_err_type) {
+ s->conn_err_type = STRM_ET_QUEUE_ERR;
+ }
+
+ _HA_ATOMIC_INC(&srv->counters.failed_conns);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
+ return 1;
+
+ case SRV_STATUS_NOSRV:
+ /* note: it is guaranteed that srv == NULL here */
+ if (!s->conn_err_type) {
+ s->conn_err_type = STRM_ET_CONN_ERR;
+ }
+
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
+ return 1;
+
+ case SRV_STATUS_QUEUED:
+ s->conn_exp = tick_add_ifset(now_ms, s->be->timeout.queue);
+ s->scb->state = SC_ST_QUE;
+ /* do nothing else and do not wake any other stream up */
+ return 1;
+
+ case SRV_STATUS_INTERNAL:
+ default:
+ if (!s->conn_err_type) {
+ s->conn_err_type = STRM_ET_CONN_OTHER;
+ }
+
+ if (srv)
+ srv_inc_sess_ctr(srv);
+ if (srv)
+ srv_set_sess_last(srv);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.failed_conns);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
+
+ /* release other streams waiting for this server */
+ if (may_dequeue_tasks(srv, s->be))
+ process_srv_queue(srv);
+ return 1;
+ }
+ /* if we get here, it's because we got SRV_STATUS_OK, which also
+ * means that the connection has not been queued.
+ */
+ return 0;
+}
+
+/* Check if the connection request is in such a state that it can be aborted. */
+static int back_may_abort_req(struct channel *req, struct stream *s)
+{
+ return ((req->flags & (CF_READ_ERROR)) ||
+ ((req->flags & (CF_SHUTW_NOW|CF_SHUTW)) && /* empty and client aborted */
+ (channel_is_empty(req) || (s->be->options & PR_O_ABRT_CLOSE))));
+}
+
+/* Update back stream connector status for input states SC_ST_ASS, SC_ST_QUE,
+ * SC_ST_TAR. Other input states are simply ignored.
+ * Possible output states are SC_ST_CLO, SC_ST_TAR, SC_ST_ASS, SC_ST_REQ, SC_ST_CON
+ * and SC_ST_EST. Flags must have previously been updated for timeouts and other
+ * conditions.
+ */
+void back_try_conn_req(struct stream *s)
+{
+ struct server *srv = objt_server(s->target);
+ struct stconn *sc = s->scb;
+ struct channel *req = &s->req;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+
+ if (sc->state == SC_ST_ASS) {
+ /* Server assigned to connection request, we have to try to connect now */
+ int conn_err;
+
+ /* Before we try to initiate the connection, see if the
+ * request may be aborted instead.
+ */
+ if (back_may_abort_req(req, s)) {
+ s->conn_err_type |= STRM_ET_CONN_ABRT;
+ DBG_TRACE_STATE("connection aborted", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto abort_connection;
+ }
+
+ conn_err = connect_server(s);
+ srv = objt_server(s->target);
+
+ if (conn_err == SF_ERR_NONE) {
+ /* state = SC_ST_CON or SC_ST_EST now */
+ if (srv)
+ srv_inc_sess_ctr(srv);
+ if (srv)
+ srv_set_sess_last(srv);
+ DBG_TRACE_STATE("connection attempt", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ goto end;
+ }
+
+ /* We have received a synchronous error. We might have to
+ * abort, retry immediately or redispatch.
+ */
+ if (conn_err == SF_ERR_INTERNAL) {
+ if (!s->conn_err_type) {
+ s->conn_err_type = STRM_ET_CONN_OTHER;
+ }
+
+ if (srv)
+ srv_inc_sess_ctr(srv);
+ if (srv)
+ srv_set_sess_last(srv);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.failed_conns);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
+
+ /* release other streams waiting for this server */
+ sess_change_server(s, NULL);
+ if (may_dequeue_tasks(srv, s->be))
+ process_srv_queue(srv);
+
+ /* Failed and not retryable. */
+ sc_shutr(sc);
+ sc_shutw(sc);
+ req->flags |= CF_WRITE_ERROR;
+
+ s->logs.t_queue = tv_ms_elapsed(&s->logs.tv_accept, &now);
+
+ /* we may need to know the position in the queue for logging */
+ pendconn_cond_unlink(s->pend_pos);
+
+ /* no stream was ever accounted for this server */
+ sc->state = SC_ST_CLO;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+ DBG_TRACE_STATE("internal error during connection", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+
+ /* We are facing a retryable error, but we don't want to run a
+ * turn-around now, as the problem is likely a source port
+ * allocation problem, so we want to retry now.
+ */
+ sc->state = SC_ST_CER;
+ sc_ep_clr(sc, SE_FL_ERROR);
+ back_handle_st_cer(s);
+
+ DBG_TRACE_STATE("connection error, retry", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ /* now sc->state is one of SC_ST_CLO, SC_ST_TAR, SC_ST_ASS, SC_ST_REQ */
+ }
+ else if (sc->state == SC_ST_QUE) {
+ /* connection request was queued, check for any update */
+ if (!pendconn_dequeue(s)) {
+ /* The connection is not in the queue anymore. Either
+ * we have a server connection slot available and we
+ * go directly to the assigned state, or we need to
+ * load-balance first and go to the INI state.
+ */
+ s->conn_exp = TICK_ETERNITY;
+ if (unlikely(!(s->flags & SF_ASSIGNED)))
+ sc->state = SC_ST_REQ;
+ else {
+ s->logs.t_queue = tv_ms_elapsed(&s->logs.tv_accept, &now);
+ sc->state = SC_ST_ASS;
+ }
+ DBG_TRACE_STATE("dequeue connection request", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ goto end;
+ }
+
+ /* Connection request still in queue... */
+ if (s->flags & SF_CONN_EXP) {
+ /* ... and timeout expired */
+ s->conn_exp = TICK_ETERNITY;
+ s->flags &= ~SF_CONN_EXP;
+ s->logs.t_queue = tv_ms_elapsed(&s->logs.tv_accept, &now);
+
+ /* we may need to know the position in the queue for logging */
+ pendconn_cond_unlink(s->pend_pos);
+
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.failed_conns);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
+ sc_shutr(sc);
+ sc_shutw(sc);
+ req->flags |= CF_WRITE_TIMEOUT;
+ if (!s->conn_err_type)
+ s->conn_err_type = STRM_ET_QUEUE_TO;
+ sc->state = SC_ST_CLO;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+ DBG_TRACE_STATE("connection request still queued", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ goto end;
+ }
+
+ /* Connection remains in queue, check if we have to abort it */
+ if (back_may_abort_req(req, s)) {
+ s->logs.t_queue = tv_ms_elapsed(&s->logs.tv_accept, &now);
+
+ /* we may need to know the position in the queue for logging */
+ pendconn_cond_unlink(s->pend_pos);
+
+ s->conn_err_type |= STRM_ET_QUEUE_ABRT;
+ DBG_TRACE_STATE("abort queued connection request", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto abort_connection;
+ }
+
+ /* Nothing changed */
+ }
+ else if (sc->state == SC_ST_TAR) {
+ /* Connection request might be aborted */
+ if (back_may_abort_req(req, s)) {
+ s->conn_err_type |= STRM_ET_CONN_ABRT;
+ DBG_TRACE_STATE("connection aborted", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto abort_connection;
+ }
+
+ if (!(s->flags & SF_CONN_EXP))
+ return; /* still in turn-around */
+
+ s->flags &= ~SF_CONN_EXP;
+ s->conn_exp = TICK_ETERNITY;
+
+ /* we keep trying on the same server as long as the stream is
+ * marked "assigned".
+ * FIXME: Should we force a redispatch attempt when the server is down ?
+ */
+ if (s->flags & SF_ASSIGNED)
+ sc->state = SC_ST_ASS;
+ else
+ sc->state = SC_ST_REQ;
+
+ DBG_TRACE_STATE("retry connection now", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ }
+
+ end:
+ DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ return;
+
+abort_connection:
+ /* give up */
+ s->conn_exp = TICK_ETERNITY;
+ s->flags &= ~SF_CONN_EXP;
+ sc_shutr(sc);
+ sc_shutw(sc);
+ sc->state = SC_ST_CLO;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+ DBG_TRACE_DEVEL("leaving on error", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ return;
+}
+
+/* This function initiates a server connection request on a stream connector
+ * already in SC_ST_REQ state. Upon success, the state goes to SC_ST_ASS for
+ * a real connection to a server, indicating that a server has been assigned,
+ * or SC_ST_RDY for a successful connection to an applet. It may also return
+ * SC_ST_QUE, or SC_ST_CLO upon error.
+ */
+void back_handle_st_req(struct stream *s)
+{
+ struct stconn *sc = s->scb;
+
+ if (sc->state != SC_ST_REQ)
+ return;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+
+ if (unlikely(obj_type(s->target) == OBJ_TYPE_APPLET)) {
+ struct appctx *appctx;
+
+ /* The target is an applet but the SC is in SC_ST_REQ. Thus it
+ * means no appctx are attached to the SC. Otherwise, it will be
+ * in SC_ST_RDY state. So, try to create the appctx now.
+ */
+ BUG_ON(sc_appctx(sc));
+ appctx = sc_applet_create(sc, objt_applet(s->target));
+ if (!appctx) {
+ /* No more memory, let's immediately abort. Force the
+ * error code to ignore the ERR_LOCAL which is not a
+ * real error.
+ */
+ s->flags &= ~(SF_ERR_MASK | SF_FINST_MASK);
+
+ sc_shutr(sc);
+ sc_shutw(sc);
+ s->req.flags |= CF_WRITE_ERROR;
+ s->conn_err_type = STRM_ET_CONN_RES;
+ sc->state = SC_ST_CLO;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+ DBG_TRACE_STATE("failed to register applet", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+
+ DBG_TRACE_STATE("applet registered", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ goto end;
+ }
+
+ /* Try to assign a server */
+ if (srv_redispatch_connect(s) != 0) {
+ /* We did not get a server. Either we queued the
+ * connection request, or we encountered an error.
+ */
+ if (sc->state == SC_ST_QUE) {
+ DBG_TRACE_STATE("connection request queued", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ goto end;
+ }
+
+ /* we did not get any server, let's check the cause */
+ sc_shutr(sc);
+ sc_shutw(sc);
+ s->req.flags |= CF_WRITE_ERROR;
+ if (!s->conn_err_type)
+ s->conn_err_type = STRM_ET_CONN_OTHER;
+ sc->state = SC_ST_CLO;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+ DBG_TRACE_STATE("connection request failed", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+
+ /* The server is assigned */
+ s->logs.t_queue = tv_ms_elapsed(&s->logs.tv_accept, &now);
+ sc->state = SC_ST_ASS;
+ be_set_sess_last(s->be);
+ DBG_TRACE_STATE("connection request assigned to a server", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+
+ end:
+ DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+}
+
+/* This function is called with (sc->state == SC_ST_CON) meaning that a
+ * connection was attempted and that the file descriptor is already allocated.
+ * We must check for timeout, error and abort. Possible output states are
+ * SC_ST_CER (error), SC_ST_DIS (abort), and SC_ST_CON (no change). This only
+ * works with connection-based streams. We know that there were no I/O event
+ * when reaching this function. Timeouts and errors are *not* cleared.
+ */
+void back_handle_st_con(struct stream *s)
+{
+ struct stconn *sc = s->scb;
+ struct channel *req = &s->req;
+ struct channel *rep = &s->res;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+
+ /* the client might want to abort */
+ if ((rep->flags & CF_SHUTW) ||
+ ((req->flags & CF_SHUTW_NOW) &&
+ (channel_is_empty(req) || (s->be->options & PR_O_ABRT_CLOSE)))) {
+ sc->flags |= SC_FL_NOLINGER;
+ sc_shutw(sc);
+ s->conn_err_type |= STRM_ET_CONN_ABRT;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+ /* Note: state = SC_ST_DIS now */
+ DBG_TRACE_STATE("client abort during connection attempt", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+
+ done:
+ /* retryable error ? */
+ if ((s->flags & SF_CONN_EXP) || sc_ep_test(sc, SE_FL_ERROR)) {
+ if (!s->conn_err_type) {
+ if (sc_ep_test(sc, SE_FL_ERROR))
+ s->conn_err_type = STRM_ET_CONN_ERR;
+ else
+ s->conn_err_type = STRM_ET_CONN_TO;
+ }
+
+ sc->state = SC_ST_CER;
+ DBG_TRACE_STATE("connection failed, retry", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ }
+
+ end:
+ DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+}
+
+/* This function is called with (sc->state == SC_ST_CER) meaning that a
+ * previous connection attempt has failed and that the file descriptor
+ * has already been released. Possible causes include asynchronous error
+ * notification and time out. Possible output states are SC_ST_CLO when
+ * retries are exhausted, SC_ST_TAR when a delay is wanted before a new
+ * connection attempt, SC_ST_ASS when it's wise to retry on the same server,
+ * and SC_ST_REQ when an immediate redispatch is wanted. The buffers are
+ * marked as in error state. Timeouts and errors are cleared before retrying.
+ */
+void back_handle_st_cer(struct stream *s)
+{
+ struct stconn *sc = s->scb;
+ int must_tar = sc_ep_test(sc, SE_FL_ERROR);
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+
+ s->conn_exp = TICK_ETERNITY;
+ s->flags &= ~SF_CONN_EXP;
+
+ /* we probably have to release last stream from the server */
+ if (objt_server(s->target)) {
+ struct connection *conn = sc_conn(sc);
+
+ health_adjust(__objt_server(s->target), HANA_STATUS_L4_ERR);
+
+ if (s->flags & SF_CURR_SESS) {
+ s->flags &= ~SF_CURR_SESS;
+ _HA_ATOMIC_DEC(&__objt_server(s->target)->cur_sess);
+ }
+
+ if (sc_ep_test(sc, SE_FL_ERROR) &&
+ conn && conn->err_code == CO_ER_SSL_MISMATCH_SNI) {
+ /* We tried to connect to a server which is configured
+ * with "verify required" and which doesn't have the
+ * "verifyhost" directive. The server presented a wrong
+ * certificate (a certificate for an unexpected name),
+ * which implies that we have used SNI in the handshake,
+ * and that the server doesn't have the associated cert
+ * and presented a default one.
+ *
+ * This is a serious enough issue not to retry. It's
+ * especially important because this wrong name might
+ * either be the result of a configuration error, and
+ * retrying will only hammer the server, or is caused
+ * by the use of a wrong SNI value, most likely
+ * provided by the client and we don't want to let the
+ * client provoke retries.
+ */
+ s->conn_retries = s->be->conn_retries;
+ DBG_TRACE_DEVEL("Bad SSL cert, disable connection retries", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ }
+ }
+
+ /* ensure that we have enough retries left */
+ if (s->conn_retries >= s->be->conn_retries || !(s->be->retry_type & PR_RE_CONN_FAILED)) {
+ if (!s->conn_err_type) {
+ s->conn_err_type = STRM_ET_CONN_ERR;
+ }
+
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&objt_server(s->target)->counters.failed_conns);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
+ sess_change_server(s, NULL);
+ if (may_dequeue_tasks(objt_server(s->target), s->be))
+ process_srv_queue(objt_server(s->target));
+
+ /* shutw is enough to stop a connecting socket */
+ sc_shutw(sc);
+ s->req.flags |= CF_WRITE_ERROR;
+ s->res.flags |= CF_READ_ERROR;
+
+ sc->state = SC_ST_CLO;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+
+ DBG_TRACE_STATE("connection failed", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+
+ /* At this stage, we will trigger a connection retry (with or without
+ * redispatch). Thus we must reset the SI endpoint on the server side
+ * an close the attached connection. It is especially important to do it
+ * now if the retry is not immediately performed, to be sure to release
+ * resources as soon as possible and to not catch errors from the lower
+ * layers in an unexpected state (i.e < ST_CONN).
+ *
+ * Note: the stream connector will be switched to ST_REQ, ST_ASS or
+ * ST_TAR and SE_FL_ERROR and SF_CONN_EXP flags will be unset.
+ */
+ if (sc_reset_endp(sc) < 0) {
+ if (!s->conn_err_type)
+ s->conn_err_type = STRM_ET_CONN_OTHER;
+
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&objt_server(s->target)->counters.internal_errors);
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ sess_change_server(s, NULL);
+ if (may_dequeue_tasks(objt_server(s->target), s->be))
+ process_srv_queue(objt_server(s->target));
+
+ /* shutw is enough to stop a connecting socket */
+ sc_shutw(sc);
+ s->req.flags |= CF_WRITE_ERROR;
+ s->res.flags |= CF_READ_ERROR;
+
+ sc->state = SC_ST_CLO;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+
+ DBG_TRACE_STATE("error resetting endpoint", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+
+ s->conn_retries++;
+ stream_choose_redispatch(s);
+
+ if (must_tar) {
+ /* The error was an asynchronous connection error, and we will
+ * likely have to retry connecting to the same server, most
+ * likely leading to the same result. To avoid this, we wait
+ * MIN(one second, connect timeout) before retrying. We don't
+ * do it when the failure happened on a reused connection
+ * though.
+ */
+
+ int delay = 1000;
+ const int reused = (s->flags & SF_SRV_REUSED) &&
+ !(s->flags & SF_SRV_REUSED_ANTICIPATED);
+
+ if (s->be->timeout.connect && s->be->timeout.connect < delay)
+ delay = s->be->timeout.connect;
+
+ if (!s->conn_err_type)
+ s->conn_err_type = STRM_ET_CONN_ERR;
+
+ /* only wait when we're retrying on the same server */
+ if ((sc->state == SC_ST_ASS ||
+ (s->be->srv_act <= 1)) && !reused) {
+ sc->state = SC_ST_TAR;
+ s->conn_exp = tick_add(now_ms, MS_TO_TICKS(delay));
+ }
+ DBG_TRACE_STATE("retry a new connection", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ }
+
+ end:
+ DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+}
+
+/* This function is called with (sc->state == SC_ST_RDY) meaning that a
+ * connection was attempted, that the file descriptor is already allocated,
+ * and that it has succeeded. We must still check for errors and aborts.
+ * Possible output states are SC_ST_EST (established), SC_ST_CER (error),
+ * and SC_ST_DIS (abort). This only works with connection-based streams.
+ * Timeouts and errors are *not* cleared.
+ */
+void back_handle_st_rdy(struct stream *s)
+{
+ struct stconn *sc = s->scb;
+ struct channel *req = &s->req;
+ struct channel *rep = &s->res;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+
+ if (unlikely(obj_type(s->target) == OBJ_TYPE_APPLET)) {
+ /* Here the appctx must exists because the SC was set to
+ * SC_ST_RDY state when the appctx was created.
+ */
+ BUG_ON(!sc_appctx(s->scb));
+
+ if (tv_iszero(&s->logs.tv_request))
+ s->logs.tv_request = now;
+ s->logs.t_queue = tv_ms_elapsed(&s->logs.tv_accept, &now);
+ be_set_sess_last(s->be);
+ }
+
+ /* We know the connection at least succeeded, though it could have
+ * since met an error for any other reason. At least it didn't time out
+ * even though the timeout might have been reported right after success.
+ * We need to take care of various situations here :
+ * - everything might be OK. We have to switch to established.
+ * - an I/O error might have been reported after a successful transfer,
+ * which is not retryable and needs to be logged correctly, and needs
+ * established as well
+ * - SC_ST_CON implies !CF_WROTE_DATA but not conversely as we could
+ * have validated a connection with incoming data (e.g. TCP with a
+ * banner protocol), or just a successful connect() probe.
+ * - the client might have requested a connection abort, this needs to
+ * be checked before we decide to retry anything.
+ */
+
+ /* it's still possible to handle client aborts or connection retries
+ * before any data were sent.
+ */
+ if (!(req->flags & CF_WROTE_DATA)) {
+ /* client abort ? */
+ if ((rep->flags & CF_SHUTW) ||
+ ((req->flags & CF_SHUTW_NOW) &&
+ (channel_is_empty(req) || (s->be->options & PR_O_ABRT_CLOSE)))) {
+ /* give up */
+ sc->flags |= SC_FL_NOLINGER;
+ sc_shutw(sc);
+ s->conn_err_type |= STRM_ET_CONN_ABRT;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+ DBG_TRACE_STATE("client abort during connection attempt", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+
+ /* retryable error ? */
+ if (sc_ep_test(sc, SE_FL_ERROR)) {
+ if (!s->conn_err_type)
+ s->conn_err_type = STRM_ET_CONN_ERR;
+ sc->state = SC_ST_CER;
+ DBG_TRACE_STATE("connection failed, retry", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+ }
+
+ /* data were sent and/or we had no error, back_establish() will
+ * now take over.
+ */
+ DBG_TRACE_STATE("connection established", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ s->conn_err_type = STRM_ET_NONE;
+ sc->state = SC_ST_EST;
+
+ end:
+ DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+}
+
+/* sends a log message when a backend goes down, and also sets last
+ * change date.
+ */
+void set_backend_down(struct proxy *be)
+{
+ be->last_change = now.tv_sec;
+ _HA_ATOMIC_INC(&be->down_trans);
+
+ if (!(global.mode & MODE_STARTING)) {
+ ha_alert("%s '%s' has no server available!\n", proxy_type_str(be), be->id);
+ send_log(be, LOG_EMERG, "%s %s has no server available!\n", proxy_type_str(be), be->id);
+ }
+}
+
+/* Apply RDP cookie persistence to the current stream. For this, the function
+ * tries to extract an RDP cookie from the request buffer, and look for the
+ * matching server in the list. If the server is found, it is assigned to the
+ * stream. This always returns 1, and the analyser removes itself from the
+ * list. Nothing is performed if a server was already assigned.
+ */
+int tcp_persist_rdp_cookie(struct stream *s, struct channel *req, int an_bit)
+{
+ struct proxy *px = s->be;
+ int ret;
+ struct sample smp;
+ struct server *srv = px->srv;
+ uint16_t port;
+ uint32_t addr;
+ char *p;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+
+ if (s->flags & SF_ASSIGNED)
+ goto no_cookie;
+
+ memset(&smp, 0, sizeof(smp));
+
+ ret = fetch_rdp_cookie_name(s, &smp, s->be->rdp_cookie_name, s->be->rdp_cookie_len);
+ if (ret == 0 || (smp.flags & SMP_F_MAY_CHANGE) || smp.data.u.str.data == 0)
+ goto no_cookie;
+
+ /* Considering an rdp cookie detected using acl, str ended with <cr><lf> and should return.
+ * The cookie format is <ip> "." <port> where "ip" is the integer corresponding to the
+ * server's IP address in network order, and "port" is the integer corresponding to the
+ * server's port in network order. Comments please Emeric.
+ */
+ addr = strtoul(smp.data.u.str.area, &p, 10);
+ if (*p != '.')
+ goto no_cookie;
+ p++;
+
+ port = ntohs(strtoul(p, &p, 10));
+ if (*p != '.')
+ goto no_cookie;
+
+ s->target = NULL;
+ while (srv) {
+ if (srv->addr.ss_family == AF_INET &&
+ port == srv->svc_port &&
+ addr == ((struct sockaddr_in *)&srv->addr)->sin_addr.s_addr) {
+ if ((srv->cur_state != SRV_ST_STOPPED) || (px->options & PR_O_PERSIST)) {
+ /* we found the server and it is usable */
+ s->flags |= SF_DIRECT | SF_ASSIGNED;
+ s->target = &srv->obj_type;
+ break;
+ }
+ }
+ srv = srv->next;
+ }
+
+no_cookie:
+ req->analysers &= ~an_bit;
+ req->analyse_exp = TICK_ETERNITY;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+ return 1;
+}
+
+int be_downtime(struct proxy *px) {
+ if (px->lbprm.tot_weight && px->last_change < now.tv_sec) // ignore negative time
+ return px->down_time;
+
+ return now.tv_sec - px->last_change + px->down_time;
+}
+
+/*
+ * This function returns a string containing the balancing
+ * mode of the proxy in a format suitable for stats.
+ */
+
+const char *backend_lb_algo_str(int algo) {
+
+ if (algo == BE_LB_ALGO_RR)
+ return "roundrobin";
+ else if (algo == BE_LB_ALGO_SRR)
+ return "static-rr";
+ else if (algo == BE_LB_ALGO_FAS)
+ return "first";
+ else if (algo == BE_LB_ALGO_LC)
+ return "leastconn";
+ else if (algo == BE_LB_ALGO_SH)
+ return "source";
+ else if (algo == BE_LB_ALGO_UH)
+ return "uri";
+ else if (algo == BE_LB_ALGO_PH)
+ return "url_param";
+ else if (algo == BE_LB_ALGO_HH)
+ return "hdr";
+ else if (algo == BE_LB_ALGO_RCH)
+ return "rdp-cookie";
+ else if (algo == BE_LB_ALGO_SMP)
+ return "hash";
+ else if (algo == BE_LB_ALGO_NONE)
+ return "none";
+ else
+ return "unknown";
+}
+
+/* This function parses a "balance" statement in a backend section describing
+ * <curproxy>. It returns -1 if there is any error, otherwise zero. If it
+ * returns -1, it will write an error message into the <err> buffer which will
+ * automatically be allocated and must be passed as NULL. The trailing '\n'
+ * will not be written. The function must be called with <args> pointing to the
+ * first word after "balance".
+ */
+int backend_parse_balance(const char **args, char **err, struct proxy *curproxy)
+{
+ if (!*(args[0])) {
+ /* if no option is set, use round-robin by default */
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_RR;
+ return 0;
+ }
+
+ if (strcmp(args[0], "roundrobin") == 0) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_RR;
+ }
+ else if (strcmp(args[0], "static-rr") == 0) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_SRR;
+ }
+ else if (strcmp(args[0], "first") == 0) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_FAS;
+ }
+ else if (strcmp(args[0], "leastconn") == 0) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_LC;
+ }
+ else if (!strncmp(args[0], "random", 6)) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_RND;
+ curproxy->lbprm.arg_opt1 = 2;
+
+ if (*(args[0] + 6) == '(' && *(args[0] + 7) != ')') { /* number of draws */
+ const char *beg;
+ char *end;
+
+ beg = args[0] + 7;
+ curproxy->lbprm.arg_opt1 = strtol(beg, &end, 0);
+
+ if (*end != ')') {
+ if (!*end)
+ memprintf(err, "random : missing closing parenthesis.");
+ else
+ memprintf(err, "random : unexpected character '%c' after argument.", *end);
+ return -1;
+ }
+
+ if (curproxy->lbprm.arg_opt1 < 1) {
+ memprintf(err, "random : number of draws must be at least 1.");
+ return -1;
+ }
+ }
+ }
+ else if (strcmp(args[0], "source") == 0) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_SH;
+ }
+ else if (strcmp(args[0], "uri") == 0) {
+ int arg = 1;
+
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_UH;
+ curproxy->lbprm.arg_opt1 = 0; // "whole", "path-only"
+ curproxy->lbprm.arg_opt2 = 0; // "len"
+ curproxy->lbprm.arg_opt3 = 0; // "depth"
+
+ while (*args[arg]) {
+ if (strcmp(args[arg], "len") == 0) {
+ if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
+ memprintf(err, "%s : '%s' expects a positive integer (got '%s').", args[0], args[arg], args[arg+1]);
+ return -1;
+ }
+ curproxy->lbprm.arg_opt2 = atoi(args[arg+1]);
+ arg += 2;
+ }
+ else if (strcmp(args[arg], "depth") == 0) {
+ if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
+ memprintf(err, "%s : '%s' expects a positive integer (got '%s').", args[0], args[arg], args[arg+1]);
+ return -1;
+ }
+ /* hint: we store the position of the ending '/' (depth+1) so
+ * that we avoid a comparison while computing the hash.
+ */
+ curproxy->lbprm.arg_opt3 = atoi(args[arg+1]) + 1;
+ arg += 2;
+ }
+ else if (strcmp(args[arg], "whole") == 0) {
+ curproxy->lbprm.arg_opt1 |= 1;
+ arg += 1;
+ }
+ else if (strcmp(args[arg], "path-only") == 0) {
+ curproxy->lbprm.arg_opt1 |= 2;
+ arg += 1;
+ }
+ else {
+ memprintf(err, "%s only accepts parameters 'len', 'depth', 'path-only', and 'whole' (got '%s').", args[0], args[arg]);
+ return -1;
+ }
+ }
+ }
+ else if (strcmp(args[0], "url_param") == 0) {
+ if (!*args[1]) {
+ memprintf(err, "%s requires an URL parameter name.", args[0]);
+ return -1;
+ }
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_PH;
+
+ free(curproxy->lbprm.arg_str);
+ curproxy->lbprm.arg_str = strdup(args[1]);
+ curproxy->lbprm.arg_len = strlen(args[1]);
+ if (*args[2]) {
+ if (strcmp(args[2], "check_post") != 0) {
+ memprintf(err, "%s only accepts 'check_post' modifier (got '%s').", args[0], args[2]);
+ return -1;
+ }
+ }
+ }
+ else if (strcmp(args[0], "hash") == 0) {
+ if (!*args[1]) {
+ memprintf(err, "%s requires a sample expression.", args[0]);
+ return -1;
+ }
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_SMP;
+
+ ha_free(&curproxy->lbprm.arg_str);
+ curproxy->lbprm.arg_str = strdup(args[1]);
+ curproxy->lbprm.arg_len = strlen(args[1]);
+
+ if (*args[2]) {
+ memprintf(err, "%s takes no other argument (got '%s').", args[0], args[2]);
+ return -1;
+ }
+ }
+ else if (!strncmp(args[0], "hdr(", 4)) {
+ const char *beg, *end;
+
+ beg = args[0] + 4;
+ end = strchr(beg, ')');
+
+ if (!end || end == beg) {
+ memprintf(err, "hdr requires an http header field name.");
+ return -1;
+ }
+
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_HH;
+
+ free(curproxy->lbprm.arg_str);
+ curproxy->lbprm.arg_len = end - beg;
+ curproxy->lbprm.arg_str = my_strndup(beg, end - beg);
+ curproxy->lbprm.arg_opt1 = 0;
+
+ if (*args[1]) {
+ if (strcmp(args[1], "use_domain_only") != 0) {
+ memprintf(err, "%s only accepts 'use_domain_only' modifier (got '%s').", args[0], args[1]);
+ return -1;
+ }
+ curproxy->lbprm.arg_opt1 = 1;
+ }
+ }
+ else if (!strncmp(args[0], "rdp-cookie", 10)) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_RCH;
+
+ if ( *(args[0] + 10 ) == '(' ) { /* cookie name */
+ const char *beg, *end;
+
+ beg = args[0] + 11;
+ end = strchr(beg, ')');
+
+ if (!end || end == beg) {
+ memprintf(err, "rdp-cookie : missing cookie name.");
+ return -1;
+ }
+
+ free(curproxy->lbprm.arg_str);
+ curproxy->lbprm.arg_str = my_strndup(beg, end - beg);
+ curproxy->lbprm.arg_len = end - beg;
+ }
+ else if ( *(args[0] + 10 ) == '\0' ) { /* default cookie name 'mstshash' */
+ free(curproxy->lbprm.arg_str);
+ curproxy->lbprm.arg_str = strdup("mstshash");
+ curproxy->lbprm.arg_len = strlen(curproxy->lbprm.arg_str);
+ }
+ else { /* syntax */
+ memprintf(err, "rdp-cookie : missing cookie name.");
+ return -1;
+ }
+ }
+ else {
+ memprintf(err, "only supports 'roundrobin', 'static-rr', 'leastconn', 'source', 'uri', 'url_param', 'hdr(name)' and 'rdp-cookie(name)' options.");
+ return -1;
+ }
+ return 0;
+}
+
+
+/************************************************************************/
+/* All supported sample and ACL keywords must be declared here. */
+/************************************************************************/
+
+/* set temp integer to the number of enabled servers on the proxy.
+ * Accepts exactly 1 argument. Argument is a backend, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_nbsrv(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+
+ smp->data.u.sint = be_usable_srv(px);
+
+ return 1;
+}
+
+/* report in smp->flags a success or failure depending on the designated
+ * server's state. There is no match function involved since there's no pattern.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_is_up(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct server *srv = args->data.srv;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_BOOL;
+ if (!(srv->cur_admin & SRV_ADMF_MAINT) &&
+ (!(srv->check.state & CHK_ST_CONFIGURED) || (srv->cur_state != SRV_ST_STOPPED)))
+ smp->data.u.sint = 1;
+ else
+ smp->data.u.sint = 0;
+ return 1;
+}
+
+/* set temp integer to the number of enabled servers on the proxy.
+ * Accepts exactly 1 argument. Argument is a backend, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_connslots(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct server *iterator;
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ for (iterator = px->srv; iterator; iterator = iterator->next) {
+ if (iterator->cur_state == SRV_ST_STOPPED)
+ continue;
+
+ if (iterator->maxconn == 0 || iterator->maxqueue == 0) {
+ /* configuration is stupid */
+ smp->data.u.sint = -1; /* FIXME: stupid value! */
+ return 1;
+ }
+
+ smp->data.u.sint += (iterator->maxconn - iterator->cur_sess)
+ + (iterator->maxqueue - iterator->queue.length);
+ }
+
+ return 1;
+}
+
+/* set temp integer to the id of the backend */
+static int
+smp_fetch_be_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = NULL;
+
+ if (smp->strm)
+ px = smp->strm->be;
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ px = __objt_check(smp->sess->origin)->proxy;
+ if (!px)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = px->uuid;
+ return 1;
+}
+
+/* set string to the name of the backend */
+static int
+smp_fetch_be_name(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = NULL;
+
+ if (smp->strm)
+ px = smp->strm->be;
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ px = __objt_check(smp->sess->origin)->proxy;
+ if (!px)
+ return 0;
+
+ smp->data.u.str.area = (char *)px->id;
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+
+ return 1;
+}
+
+/* set temp integer to the id of the server */
+static int
+smp_fetch_srv_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct server *srv = NULL;
+
+ if (smp->strm)
+ srv = objt_server(smp->strm->target);
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ srv = __objt_check(smp->sess->origin)->server;
+ if (!srv)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = srv->puid;
+
+ return 1;
+}
+
+/* set string to the name of the server */
+static int
+smp_fetch_srv_name(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct server *srv = NULL;
+
+ if (smp->strm)
+ srv = objt_server(smp->strm->target);
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ srv = __objt_check(smp->sess->origin)->server;
+ if (!srv)
+ return 0;
+
+ smp->data.u.str.area = srv->id;
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+
+ return 1;
+}
+
+/* set temp integer to the number of connections per second reaching the backend.
+ * Accepts exactly 1 argument. Argument is a backend, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_be_sess_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = read_freq_ctr(&px->be_sess_per_sec);
+ return 1;
+}
+
+/* set temp integer to the number of concurrent connections on the backend.
+ * Accepts exactly 1 argument. Argument is a backend, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_be_conn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = px->beconn;
+ return 1;
+}
+
+/* set temp integer to the number of available connections across available
+ * servers on the backend.
+ * Accepts exactly 1 argument. Argument is a backend, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_be_conn_free(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct server *iterator;
+ struct proxy *px = args->data.prx;
+ unsigned int maxconn;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ for (iterator = px->srv; iterator; iterator = iterator->next) {
+ if (iterator->cur_state == SRV_ST_STOPPED)
+ continue;
+
+ px = iterator->proxy;
+ if (!srv_currently_usable(iterator) ||
+ ((iterator->flags & SRV_F_BACKUP) &&
+ (px->srv_act || (iterator != px->lbprm.fbck && !(px->options & PR_O_USE_ALL_BK)))))
+ continue;
+
+ if (iterator->maxconn == 0) {
+ /* one active server is unlimited, return -1 */
+ smp->data.u.sint = -1;
+ return 1;
+ }
+
+ maxconn = srv_dynamic_maxconn(iterator);
+ if (maxconn > iterator->cur_sess)
+ smp->data.u.sint += maxconn - iterator->cur_sess;
+ }
+
+ return 1;
+}
+
+/* set temp integer to the total number of queued connections on the backend.
+ * Accepts exactly 1 argument. Argument is a backend, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_queue_size(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = px->totpend;
+ return 1;
+}
+
+/* set temp integer to the total number of queued connections on the backend divided
+ * by the number of running servers and rounded up. If there is no running
+ * server, we return twice the total, just as if we had half a running server.
+ * This is more or less correct anyway, since we expect the last server to come
+ * back soon.
+ * Accepts exactly 1 argument. Argument is a backend, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_avg_queue_size(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+ int nbsrv;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+
+ nbsrv = be_usable_srv(px);
+
+ if (nbsrv > 0)
+ smp->data.u.sint = (px->totpend + nbsrv - 1) / nbsrv;
+ else
+ smp->data.u.sint = px->totpend * 2;
+
+ return 1;
+}
+
+/* set temp integer to the number of concurrent connections on the server in the backend.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_conn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = args->data.srv->cur_sess;
+ return 1;
+}
+
+/* set temp integer to the number of available connections on the server in the backend.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_conn_free(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ unsigned int maxconn;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+
+ if (args->data.srv->maxconn == 0) {
+ /* one active server is unlimited, return -1 */
+ smp->data.u.sint = -1;
+ return 1;
+ }
+
+ maxconn = srv_dynamic_maxconn(args->data.srv);
+ if (maxconn > args->data.srv->cur_sess)
+ smp->data.u.sint = maxconn - args->data.srv->cur_sess;
+ else
+ smp->data.u.sint = 0;
+
+ return 1;
+}
+
+/* set temp integer to the number of connections pending in the server's queue.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_queue(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = args->data.srv->queue.length;
+ return 1;
+}
+
+/* set temp integer to the number of enabled servers on the proxy.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_sess_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = read_freq_ctr(&args->data.srv->sess_per_sec);
+ return 1;
+}
+
+/* set temp integer to the server weight.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_weight(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct server *srv = args->data.srv;
+ struct proxy *px = srv->proxy;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = (srv->cur_eweight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv;
+ return 1;
+}
+
+/* set temp integer to the server initial weight.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_iweight(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = args->data.srv->iweight;
+ return 1;
+}
+
+/* set temp integer to the server user-specified weight.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_uweight(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = args->data.srv->uweight;
+ return 1;
+}
+
+static int
+smp_fetch_be_server_timeout(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ struct proxy *px = NULL;
+
+ if (smp->strm)
+ px = smp->strm->be;
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ px = __objt_check(smp->sess->origin)->proxy;
+ if (!px)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = TICKS_TO_MS(px->timeout.server);
+ return 1;
+}
+
+static int
+smp_fetch_be_tunnel_timeout(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ struct proxy *px = NULL;
+
+ if (smp->strm)
+ px = smp->strm->be;
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ px = __objt_check(smp->sess->origin)->proxy;
+ if (!px)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = TICKS_TO_MS(px->timeout.tunnel);
+ return 1;
+}
+
+static int sample_conv_nbsrv(const struct arg *args, struct sample *smp, void *private)
+{
+
+ struct proxy *px;
+
+ if (!smp_make_safe(smp))
+ return 0;
+
+ px = proxy_find_by_name(smp->data.u.str.area, PR_CAP_BE, 0);
+ if (!px)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = be_usable_srv(px);
+
+ return 1;
+}
+
+static int
+sample_conv_srv_queue(const struct arg *args, struct sample *smp, void *private)
+{
+ struct proxy *px;
+ struct server *srv;
+ char *bksep;
+
+ if (!smp_make_safe(smp))
+ return 0;
+
+ bksep = strchr(smp->data.u.str.area, '/');
+
+ if (bksep) {
+ *bksep = '\0';
+ px = proxy_find_by_name(smp->data.u.str.area, PR_CAP_BE, 0);
+ if (!px)
+ return 0;
+ smp->data.u.str.area = bksep + 1;
+ } else {
+ if (!(smp->px->cap & PR_CAP_BE))
+ return 0;
+ px = smp->px;
+ }
+
+ srv = server_find_by_name(px, smp->data.u.str.area);
+ if (!srv)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = srv->queue.length;
+ return 1;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { "avg_queue", smp_fetch_avg_queue_size, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "be_conn", smp_fetch_be_conn, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "be_conn_free", smp_fetch_be_conn_free, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "be_id", smp_fetch_be_id, 0, NULL, SMP_T_SINT, SMP_USE_BKEND, },
+ { "be_name", smp_fetch_be_name, 0, NULL, SMP_T_STR, SMP_USE_BKEND, },
+ { "be_server_timeout", smp_fetch_be_server_timeout, 0, NULL, SMP_T_SINT, SMP_USE_BKEND, },
+ { "be_sess_rate", smp_fetch_be_sess_rate, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "be_tunnel_timeout", smp_fetch_be_tunnel_timeout, 0, NULL, SMP_T_SINT, SMP_USE_BKEND, },
+ { "connslots", smp_fetch_connslots, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "nbsrv", smp_fetch_nbsrv, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "queue", smp_fetch_queue_size, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "srv_conn", smp_fetch_srv_conn, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "srv_conn_free", smp_fetch_srv_conn_free, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "srv_id", smp_fetch_srv_id, 0, NULL, SMP_T_SINT, SMP_USE_SERVR, },
+ { "srv_is_up", smp_fetch_srv_is_up, ARG1(1,SRV), NULL, SMP_T_BOOL, SMP_USE_INTRN, },
+ { "srv_name", smp_fetch_srv_name, 0, NULL, SMP_T_STR, SMP_USE_SERVR, },
+ { "srv_queue", smp_fetch_srv_queue, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "srv_sess_rate", smp_fetch_srv_sess_rate, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "srv_weight", smp_fetch_srv_weight, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "srv_iweight", smp_fetch_srv_iweight, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "srv_uweight", smp_fetch_srv_uweight, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_conv_kw_list sample_conv_kws = {ILH, {
+ { "nbsrv", sample_conv_nbsrv, 0, NULL, SMP_T_STR, SMP_T_SINT },
+ { "srv_queue", sample_conv_srv_queue, 0, NULL, SMP_T_STR, SMP_T_SINT },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct acl_kw_list acl_kws = {ILH, {
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/base64.c b/src/base64.c
new file mode 100644
index 0000000..0601bf6
--- /dev/null
+++ b/src/base64.c
@@ -0,0 +1,303 @@
+/*
+ * ASCII <-> Base64 conversion as described in RFC1421.
+ *
+ * Copyright 2006-2010 Willy Tarreau <w@1wt.eu>
+ * Copyright 2009-2010 Krzysztof Piotr Oledzki <ole@ans.pl>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/base64.h>
+
+#define B64BASE '#' /* arbitrary chosen base value */
+#define B64CMIN '+'
+#define UB64CMIN '-'
+#define B64CMAX 'z'
+#define B64PADV 64 /* Base64 chosen special pad value */
+
+const char base64tab[65]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+const char base64rev[]="b###cXYZ[\\]^_`a###d###$%&'()*+,-./0123456789:;<=######>?@ABCDEFGHIJKLMNOPQRSTUVW";
+const char ubase64tab[65]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+const char ubase64rev[]="b##XYZ[\\]^_`a###c###$%&'()*+,-./0123456789:;<=####c#>?@ABCDEFGHIJKLMNOPQRSTUVW";
+
+/* Encodes <ilen> bytes from <in> to <out> for at most <olen> chars (including
+ * the trailing zero). Returns the number of bytes written. No check is made
+ * for <in> or <out> to be NULL. Returns negative value if <olen> is too short
+ * to accept <ilen>. 4 output bytes are produced for 1 to 3 input bytes.
+ */
+int a2base64(char *in, int ilen, char *out, int olen)
+{
+ int convlen;
+
+ convlen = ((ilen + 2) / 3) * 4;
+
+ if (convlen >= olen)
+ return -1;
+
+ /* we don't need to check olen anymore */
+ while (ilen >= 3) {
+ out[0] = base64tab[(((unsigned char)in[0]) >> 2)];
+ out[1] = base64tab[(((unsigned char)in[0] & 0x03) << 4) | (((unsigned char)in[1]) >> 4)];
+ out[2] = base64tab[(((unsigned char)in[1] & 0x0F) << 2) | (((unsigned char)in[2]) >> 6)];
+ out[3] = base64tab[(((unsigned char)in[2] & 0x3F))];
+ out += 4;
+ in += 3; ilen -= 3;
+ }
+
+ if (!ilen) {
+ out[0] = '\0';
+ } else {
+ out[0] = base64tab[((unsigned char)in[0]) >> 2];
+ if (ilen == 1) {
+ out[1] = base64tab[((unsigned char)in[0] & 0x03) << 4];
+ out[2] = '=';
+ } else {
+ out[1] = base64tab[(((unsigned char)in[0] & 0x03) << 4) |
+ (((unsigned char)in[1]) >> 4)];
+ out[2] = base64tab[((unsigned char)in[1] & 0x0F) << 2];
+ }
+ out[3] = '=';
+ out[4] = '\0';
+ }
+
+ return convlen;
+}
+
+/* url variant of a2base64 */
+int a2base64url(const char *in, size_t ilen, char *out, size_t olen)
+{
+ int convlen;
+
+ convlen = ((ilen + 2) / 3) * 4;
+
+ if (convlen >= olen)
+ return -1;
+
+ /* we don't need to check olen anymore */
+ while (ilen >= 3) {
+ out[0] = ubase64tab[(((unsigned char)in[0]) >> 2)];
+ out[1] = ubase64tab[(((unsigned char)in[0] & 0x03) << 4) | (((unsigned char)in[1]) >> 4)];
+ out[2] = ubase64tab[(((unsigned char)in[1] & 0x0F) << 2) | (((unsigned char)in[2]) >> 6)];
+ out[3] = ubase64tab[(((unsigned char)in[2] & 0x3F))];
+ out += 4;
+ in += 3;
+ ilen -= 3;
+ }
+
+ if (!ilen) {
+ out[0] = '\0';
+ return convlen;
+ }
+
+ out[0] = ubase64tab[((unsigned char)in[0]) >> 2];
+ if (ilen == 1) {
+ out[1] = ubase64tab[((unsigned char)in[0] & 0x03) << 4];
+ out[2] = '\0';
+ convlen -= 2;
+ } else {
+ out[1] = ubase64tab[(((unsigned char)in[0] & 0x03) << 4) |
+ (((unsigned char)in[1]) >> 4)];
+ out[2] = ubase64tab[((unsigned char)in[1] & 0x0F) << 2];
+ out[3] = '\0';
+ convlen -= 1;
+ }
+
+ return convlen;
+}
+
+/* Decodes <ilen> bytes from <in> to <out> for at most <olen> chars.
+ * Returns the number of bytes converted. No check is made for
+ * <in> or <out> to be NULL. Returns -1 if <in> is invalid or ilen
+ * has wrong size, -2 if <olen> is too short.
+ * 1 to 3 output bytes are produced for 4 input bytes.
+ */
+int base64dec(const char *in, size_t ilen, char *out, size_t olen) {
+
+ unsigned char t[4];
+ signed char b;
+ int convlen = 0, i = 0, pad = 0;
+
+ if (ilen % 4)
+ return -1;
+
+ if (olen < ((ilen / 4 * 3)
+ - (in[ilen-1] == '=' ? 1 : 0)
+ - (in[ilen-2] == '=' ? 1 : 0)))
+ return -2;
+
+ while (ilen) {
+
+ /* if (*p < B64CMIN || *p > B64CMAX) */
+ b = (signed char)*in - B64CMIN;
+ if ((unsigned char)b > (B64CMAX-B64CMIN))
+ return -1;
+
+ b = base64rev[b] - B64BASE - 1;
+
+ /* b == -1: invalid character */
+ if (b < 0)
+ return -1;
+
+ /* padding has to be continuous */
+ if (pad && b != B64PADV)
+ return -1;
+
+ /* valid padding: "XX==" or "XXX=", but never "X===" or "====" */
+ if (pad && i < 2)
+ return -1;
+
+ if (b == B64PADV)
+ pad++;
+
+ t[i++] = b;
+
+ if (i == 4) {
+ /*
+ * WARNING: we allow to write little more data than we
+ * should, but the checks from the beginning of the
+ * functions guarantee that we can safely do that.
+ */
+
+ /* xx000000 xx001111 xx111122 xx222222 */
+ if (convlen < olen)
+ out[convlen] = ((t[0] << 2) + (t[1] >> 4));
+ if (convlen+1 < olen)
+ out[convlen+1] = ((t[1] << 4) + (t[2] >> 2));
+ if (convlen+2 < olen)
+ out[convlen+2] = ((t[2] << 6) + (t[3] >> 0));
+
+ convlen += 3-pad;
+
+ pad = i = 0;
+ }
+
+ in++;
+ ilen--;
+ }
+
+ return convlen;
+}
+
+/* url variant of base64dec */
+/* The reverse tab used to decode base64 is generated via /dev/base64/base64rev-gen.c */
+int base64urldec(const char *in, size_t ilen, char *out, size_t olen)
+{
+ unsigned char t[4];
+ signed char b;
+ int convlen = 0, i = 0, pad = 0, padlen = 0;
+
+ switch (ilen % 4) {
+ case 0:
+ break;
+ case 2:
+ padlen = pad = 2;
+ break;
+ case 3:
+ padlen = pad = 1;
+ break;
+ default:
+ return -1;
+ }
+
+ if (olen < (((ilen + pad) / 4 * 3) - pad))
+ return -2;
+
+ while (ilen + pad) {
+ if (ilen) {
+ /* if (*p < UB64CMIN || *p > B64CMAX) */
+ b = (signed char) * in - UB64CMIN;
+ if ((unsigned char)b > (B64CMAX - UB64CMIN))
+ return -1;
+
+ b = ubase64rev[b] - B64BASE - 1;
+ /* b == -1: invalid character */
+ if (b < 0)
+ return -1;
+
+ in++;
+ ilen--;
+
+ } else {
+ b = B64PADV;
+ pad--;
+ }
+
+ t[i++] = b;
+
+ if (i == 4) {
+ /*
+ * WARNING: we allow to write little more data than we
+ * should, but the checks from the beginning of the
+ * functions guarantee that we can safely do that.
+ */
+
+ /* xx000000 xx001111 xx111122 xx222222 */
+ if (convlen < olen)
+ out[convlen] = ((t[0] << 2) + (t[1] >> 4));
+ if (convlen+1 < olen)
+ out[convlen+1] = ((t[1] << 4) + (t[2] >> 2));
+ if (convlen+2 < olen)
+ out[convlen+2] = ((t[2] << 6) + (t[3] >> 0));
+
+ convlen += 3;
+ i = 0;
+ }
+ }
+ convlen -= padlen;
+
+ return convlen;
+}
+
+/* Converts the lower 30 bits of an integer to a 5-char base64 string. The
+ * caller is responsible for ensuring that the output buffer can accept 6 bytes
+ * (5 + the trailing zero). The pointer to the string is returned. The
+ * conversion is performed with MSB first and in a format that can be
+ * decoded with b64tos30(). This format is not padded and thus is not
+ * compatible with usual base64 routines.
+ */
+const char *s30tob64(int in, char *out)
+{
+ int i;
+ for (i = 0; i < 5; i++) {
+ out[i] = base64tab[(in >> 24) & 0x3F];
+ in <<= 6;
+ }
+ out[5] = '\0';
+ return out;
+}
+
+/* Converts a 5-char base64 string encoded by s30tob64() into a 30-bit integer.
+ * The caller is responsible for ensuring that the input contains at least 5
+ * chars. If any unexpected character is encountered, a negative value is
+ * returned. Otherwise the decoded value is returned.
+ */
+int b64tos30(const char *in)
+{
+ int i, out;
+ signed char b;
+
+ out = 0;
+ for (i = 0; i < 5; i++) {
+ b = (signed char)in[i] - B64CMIN;
+ if ((unsigned char)b > (B64CMAX - B64CMIN))
+ return -1; /* input character out of range */
+
+ b = base64rev[b] - B64BASE - 1;
+ if (b < 0) /* invalid character */
+ return -1;
+
+ if (b == B64PADV) /* padding not allowed */
+ return -1;
+
+ out = (out << 6) + b;
+ }
+ return out;
+}
diff --git a/src/cache.c b/src/cache.c
new file mode 100644
index 0000000..1f98ea5
--- /dev/null
+++ b/src/cache.c
@@ -0,0 +1,2764 @@
+/*
+ * Cache management
+ *
+ * Copyright 2017 HAProxy Technologies
+ * William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <import/eb32tree.h>
+#include <import/sha1.h>
+
+#include <haproxy/action-t.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/filters.h>
+#include <haproxy/hash.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/htx.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/shctx.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/tools.h>
+
+#define CACHE_FLT_F_IMPLICIT_DECL 0x00000001 /* The cache filtre was implicitly declared (ie without
+ * the filter keyword) */
+#define CACHE_FLT_INIT 0x00000002 /* Whether the cache name was freed. */
+
+const char *cache_store_flt_id = "cache store filter";
+
+extern struct applet http_cache_applet;
+
+struct flt_ops cache_ops;
+
+struct cache {
+ struct list list; /* cache linked list */
+ struct eb_root entries; /* head of cache entries based on keys */
+ unsigned int maxage; /* max-age */
+ unsigned int maxblocks;
+ unsigned int maxobjsz; /* max-object-size (in bytes) */
+ unsigned int max_secondary_entries; /* maximum number of secondary entries with the same primary hash */
+ uint8_t vary_processing_enabled; /* boolean : manage Vary header (disabled by default) */
+ char id[33]; /* cache name */
+};
+
+/* the appctx context of a cache applet, stored in appctx->svcctx */
+struct cache_appctx {
+ struct cache_entry *entry; /* Entry to be sent from cache. */
+ unsigned int sent; /* The number of bytes already sent for this cache entry. */
+ unsigned int offset; /* start offset of remaining data relative to beginning of the next block */
+ unsigned int rem_data; /* Remaining bytes for the last data block (HTX only, 0 means process next block) */
+ unsigned int send_notmodified:1; /* In case of conditional request, we might want to send a "304 Not Modified" response instead of the stored data. */
+ unsigned int unused:31;
+ struct shared_block *next; /* The next block of data to be sent for this cache entry. */
+};
+
+/* cache config for filters */
+struct cache_flt_conf {
+ union {
+ struct cache *cache; /* cache used by the filter */
+ char *name; /* cache name used during conf parsing */
+ } c;
+ unsigned int flags; /* CACHE_FLT_F_* */
+};
+
+/* CLI context used during "show cache" */
+struct show_cache_ctx {
+ struct cache *cache;
+ uint next_key;
+};
+
+
+/*
+ * Vary-related structures and functions
+ */
+enum vary_header_bit {
+ VARY_ACCEPT_ENCODING = (1 << 0),
+ VARY_REFERER = (1 << 1),
+ VARY_LAST /* should always be last */
+};
+
+/*
+ * Encoding list extracted from
+ * https://www.iana.org/assignments/http-parameters/http-parameters.xhtml
+ * and RFC7231#5.3.4.
+ */
+enum vary_encoding {
+ VARY_ENCODING_GZIP = (1 << 0),
+ VARY_ENCODING_DEFLATE = (1 << 1),
+ VARY_ENCODING_BR = (1 << 2),
+ VARY_ENCODING_COMPRESS = (1 << 3),
+ VARY_ENCODING_AES128GCM = (1 << 4),
+ VARY_ENCODING_EXI = (1 << 5),
+ VARY_ENCODING_PACK200_GZIP = (1 << 6),
+ VARY_ENCODING_ZSTD = (1 << 7),
+ VARY_ENCODING_IDENTITY = (1 << 8),
+ VARY_ENCODING_STAR = (1 << 9),
+ VARY_ENCODING_OTHER = (1 << 10)
+};
+
+struct vary_hashing_information {
+ struct ist hdr_name; /* Header name */
+ enum vary_header_bit value; /* Bit representing the header in a vary signature */
+ unsigned int hash_length; /* Size of the sub hash for this header's value */
+ int(*norm_fn)(struct htx*,struct ist hdr_name,char* buf,unsigned int* buf_len); /* Normalization function */
+ int(*cmp_fn)(const void *ref, const void *new, unsigned int len); /* Comparison function, should return 0 if the hashes are alike */
+};
+
+static int http_request_prebuild_full_secondary_key(struct stream *s);
+static int http_request_build_secondary_key(struct stream *s, int vary_signature);
+static int http_request_reduce_secondary_key(unsigned int vary_signature,
+ char prebuilt_key[HTTP_CACHE_SEC_KEY_LEN]);
+
+static int parse_encoding_value(struct ist value, unsigned int *encoding_value,
+ unsigned int *has_null_weight);
+
+static int accept_encoding_normalizer(struct htx *htx, struct ist hdr_name,
+ char *buf, unsigned int *buf_len);
+static int default_normalizer(struct htx *htx, struct ist hdr_name,
+ char *buf, unsigned int *buf_len);
+
+static int accept_encoding_bitmap_cmp(const void *ref, const void *new, unsigned int len);
+
+/* Warning : do not forget to update HTTP_CACHE_SEC_KEY_LEN when new items are
+ * added to this array. */
+const struct vary_hashing_information vary_information[] = {
+ { IST("accept-encoding"), VARY_ACCEPT_ENCODING, sizeof(uint32_t), &accept_encoding_normalizer, &accept_encoding_bitmap_cmp },
+ { IST("referer"), VARY_REFERER, sizeof(int), &default_normalizer, NULL },
+};
+
+
+/*
+ * cache ctx for filters
+ */
+struct cache_st {
+ struct shared_block *first_block;
+};
+
+#define DEFAULT_MAX_SECONDARY_ENTRY 10
+
+struct cache_entry {
+ unsigned int complete; /* An entry won't be valid until complete is not null. */
+ unsigned int latest_validation; /* latest validation date */
+ unsigned int expire; /* expiration date (wall clock time) */
+ unsigned int age; /* Origin server "Age" header value */
+
+ struct eb32_node eb; /* ebtree node used to hold the cache object */
+ char hash[20];
+
+ char secondary_key[HTTP_CACHE_SEC_KEY_LEN]; /* Optional secondary key. */
+ unsigned int secondary_key_signature; /* Bitfield of the HTTP headers that should be used
+ * to build secondary keys for this cache entry. */
+ unsigned int secondary_entries_count; /* Should only be filled in the last entry of a list of dup entries */
+ unsigned int last_clear_ts; /* Timestamp of the last call to clear_expired_duplicates. */
+
+ unsigned int etag_length; /* Length of the ETag value (if one was found in the response). */
+ unsigned int etag_offset; /* Offset of the ETag value in the data buffer. */
+
+ time_t last_modified; /* Origin server "Last-Modified" header value converted in
+ * seconds since epoch. If no "Last-Modified"
+ * header is found, use "Date" header value,
+ * otherwise use reception time. This field will
+ * be used in case of an "If-Modified-Since"-based
+ * conditional request. */
+
+ unsigned char data[0];
+};
+
+#define CACHE_BLOCKSIZE 1024
+#define CACHE_ENTRY_MAX_AGE 2147483648U
+
+static struct list caches = LIST_HEAD_INIT(caches);
+static struct list caches_config = LIST_HEAD_INIT(caches_config); /* cache config to init */
+static struct cache *tmp_cache_config = NULL;
+
+DECLARE_STATIC_POOL(pool_head_cache_st, "cache_st", sizeof(struct cache_st));
+
+static struct eb32_node *insert_entry(struct cache *cache, struct cache_entry *new_entry);
+static void delete_entry(struct cache_entry *del_entry);
+
+struct cache_entry *entry_exist(struct cache *cache, char *hash)
+{
+ struct eb32_node *node;
+ struct cache_entry *entry;
+
+ node = eb32_lookup(&cache->entries, read_u32(hash));
+ if (!node)
+ return NULL;
+
+ entry = eb32_entry(node, struct cache_entry, eb);
+
+ /* if that's not the right node */
+ if (memcmp(entry->hash, hash, sizeof(entry->hash)))
+ return NULL;
+
+ if (entry->expire > date.tv_sec) {
+ return entry;
+ } else {
+ delete_entry(entry);
+ entry->eb.key = 0;
+ }
+ return NULL;
+
+}
+
+
+/*
+ * Compare a newly built secondary key to the one found in a cache_entry.
+ * Every sub-part of the key is compared to the reference through the dedicated
+ * comparison function of the sub-part (that might do more than a simple
+ * memcmp).
+ * Returns 0 if the keys are alike.
+ */
+static int secondary_key_cmp(const char *ref_key, const char *new_key)
+{
+ int retval = 0;
+ size_t idx = 0;
+ unsigned int offset = 0;
+ const struct vary_hashing_information *info;
+
+ for (idx = 0; idx < sizeof(vary_information)/sizeof(*vary_information) && !retval; ++idx) {
+ info = &vary_information[idx];
+
+ if (info->cmp_fn)
+ retval = info->cmp_fn(&ref_key[offset], &new_key[offset], info->hash_length);
+ else
+ retval = memcmp(&ref_key[offset], &new_key[offset], info->hash_length);
+
+ offset += info->hash_length;
+ }
+
+ return retval;
+}
+
+/*
+ * There can be multiple entries with the same primary key in the ebtree so in
+ * order to get the proper one out of the list, we use a secondary_key.
+ * This function simply iterates over all the entries with the same primary_key
+ * until it finds the right one.
+ * Returns the cache_entry in case of success, NULL otherwise.
+ */
+struct cache_entry *secondary_entry_exist(struct cache *cache, struct cache_entry *entry,
+ const char *secondary_key)
+{
+ struct eb32_node *node = &entry->eb;
+
+ if (!entry->secondary_key_signature)
+ return NULL;
+
+ while (entry && secondary_key_cmp(entry->secondary_key, secondary_key) != 0) {
+ node = eb32_next_dup(node);
+
+ /* Make the best use of this iteration and clear expired entries
+ * when we find them. Calling delete_entry would be too costly
+ * so we simply call eb32_delete. The secondary_entry count will
+ * be updated when we try to insert a new entry to this list. */
+ if (entry->expire <= date.tv_sec) {
+ eb32_delete(&entry->eb);
+ entry->eb.key = 0;
+ }
+
+ entry = node ? eb32_entry(node, struct cache_entry, eb) : NULL;
+ }
+
+ /* Expired entry */
+ if (entry && entry->expire <= date.tv_sec) {
+ eb32_delete(&entry->eb);
+ entry->eb.key = 0;
+ entry = NULL;
+ }
+
+ return entry;
+}
+
+
+/*
+ * Remove all expired entries from a list of duplicates.
+ * Return the number of alive entries in the list and sets dup_tail to the
+ * current last item of the list.
+ */
+static unsigned int clear_expired_duplicates(struct eb32_node **dup_tail)
+{
+ unsigned int entry_count = 0;
+ struct cache_entry *entry = NULL;
+ struct eb32_node *prev = *dup_tail;
+ struct eb32_node *tail = NULL;
+
+ while (prev) {
+ entry = container_of(prev, struct cache_entry, eb);
+ prev = eb32_prev_dup(prev);
+ if (entry->expire <= date.tv_sec) {
+ eb32_delete(&entry->eb);
+ entry->eb.key = 0;
+ }
+ else {
+ if (!tail)
+ tail = &entry->eb;
+ ++entry_count;
+ }
+ }
+
+ *dup_tail = tail;
+
+ return entry_count;
+}
+
+
+/*
+ * This function inserts a cache_entry in the cache's ebtree. In case of
+ * duplicate entries (vary), it then checks that the number of entries did not
+ * reach the max number of secondary entries. If this entry should not have been
+ * created, remove it.
+ * In the regular case (unique entries), this function does not do more than a
+ * simple insert. In case of secondary entries, it will at most cost an
+ * insertion+max_sec_entries time checks and entry deletion.
+ * Returns the newly inserted node in case of success, NULL otherwise.
+ */
+static struct eb32_node *insert_entry(struct cache *cache, struct cache_entry *new_entry)
+{
+ struct eb32_node *prev = NULL;
+ struct cache_entry *entry = NULL;
+ unsigned int entry_count = 0;
+ unsigned int last_clear_ts = date.tv_sec;
+
+ struct eb32_node *node = eb32_insert(&cache->entries, &new_entry->eb);
+
+ /* We should not have multiple entries with the same primary key unless
+ * the entry has a non null vary signature. */
+ if (!new_entry->secondary_key_signature)
+ return node;
+
+ prev = eb32_prev_dup(node);
+ if (prev != NULL) {
+ /* The last entry of a duplicate list should contain the current
+ * number of entries in the list. */
+ entry = container_of(prev, struct cache_entry, eb);
+ entry_count = entry->secondary_entries_count;
+ last_clear_ts = entry->last_clear_ts;
+
+ if (entry_count >= cache->max_secondary_entries) {
+ /* Some entries of the duplicate list might be expired so
+ * we will iterate over all the items in order to free some
+ * space. In order to avoid going over the same list too
+ * often, we first check the timestamp of the last check
+ * performed. */
+ if (last_clear_ts == date.tv_sec) {
+ /* Too many entries for this primary key, clear the
+ * one that was inserted. */
+ eb32_delete(node);
+ node->key = 0;
+ return NULL;
+ }
+
+ entry_count = clear_expired_duplicates(&prev);
+ if (entry_count >= cache->max_secondary_entries) {
+ /* Still too many entries for this primary key, delete
+ * the newly inserted one. */
+ entry = container_of(prev, struct cache_entry, eb);
+ entry->last_clear_ts = date.tv_sec;
+ eb32_delete(node);
+ node->key = 0;
+ return NULL;
+ }
+ }
+ }
+
+ new_entry->secondary_entries_count = entry_count + 1;
+ new_entry->last_clear_ts = last_clear_ts;
+
+ return node;
+}
+
+
+/*
+ * This function removes an entry from the ebtree. If the entry was a duplicate
+ * (in case of Vary), it updates the secondary entry counter in another
+ * duplicate entry (the last entry of the dup list).
+ */
+static void delete_entry(struct cache_entry *del_entry)
+{
+ struct eb32_node *prev = NULL, *next = NULL;
+ struct cache_entry *entry = NULL;
+ struct eb32_node *last = NULL;
+
+ if (del_entry->secondary_key_signature) {
+ next = &del_entry->eb;
+
+ /* Look for last entry of the duplicates list. */
+ while ((next = eb32_next_dup(next))) {
+ last = next;
+ }
+
+ if (last) {
+ entry = container_of(last, struct cache_entry, eb);
+ --entry->secondary_entries_count;
+ }
+ else {
+ /* The current entry is the last one, look for the
+ * previous one to update its counter. */
+ prev = eb32_prev_dup(&del_entry->eb);
+ if (prev) {
+ entry = container_of(prev, struct cache_entry, eb);
+ entry->secondary_entries_count = del_entry->secondary_entries_count - 1;
+ }
+ }
+ }
+ eb32_delete(&del_entry->eb);
+ del_entry->eb.key = 0;
+}
+
+
+static inline struct shared_context *shctx_ptr(struct cache *cache)
+{
+ return (struct shared_context *)((unsigned char *)cache - ((struct shared_context *)NULL)->data);
+}
+
+static inline struct shared_block *block_ptr(struct cache_entry *entry)
+{
+ return (struct shared_block *)((unsigned char *)entry - ((struct shared_block *)NULL)->data);
+}
+
+
+
+static int
+cache_store_init(struct proxy *px, struct flt_conf *fconf)
+{
+ fconf->flags |= FLT_CFG_FL_HTX;
+ return 0;
+}
+
+static void
+cache_store_deinit(struct proxy *px, struct flt_conf *fconf)
+{
+ struct cache_flt_conf *cconf = fconf->conf;
+
+ if (!(cconf->flags & CACHE_FLT_INIT))
+ free(cconf->c.name);
+ free(cconf);
+}
+
+static int
+cache_store_check(struct proxy *px, struct flt_conf *fconf)
+{
+ struct cache_flt_conf *cconf = fconf->conf;
+ struct flt_conf *f;
+ struct cache *cache;
+ int comp = 0;
+
+ /* Find the cache corresponding to the name in the filter config. The
+ * cache will not be referenced now in the filter config because it is
+ * not fully allocated. This step will be performed during the cache
+ * post_check.
+ */
+ list_for_each_entry(cache, &caches_config, list) {
+ if (strcmp(cache->id, cconf->c.name) == 0)
+ goto found;
+ }
+
+ ha_alert("config: %s '%s': unable to find the cache '%s' referenced by the filter 'cache'.\n",
+ proxy_type_str(px), px->id, (char *)cconf->c.name);
+ return 1;
+
+ found:
+ /* Here <cache> points on the cache the filter must use and <cconf>
+ * points on the cache filter configuration. */
+
+ /* Check all filters for proxy <px> to know if the compression is
+ * enabled and if it is after the cache. When the compression is before
+ * the cache, an error is returned. Also check if the cache filter must
+ * be explicitly declaired or not. */
+ list_for_each_entry(f, &px->filter_configs, list) {
+ if (f == fconf) {
+ /* The compression filter must be evaluated after the cache. */
+ if (comp) {
+ ha_alert("config: %s '%s': unable to enable the compression filter before "
+ "the cache '%s'.\n", proxy_type_str(px), px->id, cache->id);
+ return 1;
+ }
+ }
+ else if (f->id == http_comp_flt_id)
+ comp = 1;
+ else if (f->id == fcgi_flt_id)
+ continue;
+ else if ((f->id != fconf->id) && (cconf->flags & CACHE_FLT_F_IMPLICIT_DECL)) {
+ /* Implicit declaration is only allowed with the
+ * compression and fcgi. For other filters, an implicit
+ * declaration is required. */
+ ha_alert("config: %s '%s': require an explicit filter declaration "
+ "to use the cache '%s'.\n", proxy_type_str(px), px->id, cache->id);
+ return 1;
+ }
+
+ }
+ return 0;
+}
+
+static int
+cache_store_strm_init(struct stream *s, struct filter *filter)
+{
+ struct cache_st *st;
+
+ st = pool_alloc(pool_head_cache_st);
+ if (st == NULL)
+ return -1;
+
+ st->first_block = NULL;
+ filter->ctx = st;
+
+ /* Register post-analyzer on AN_RES_WAIT_HTTP */
+ filter->post_analyzers |= AN_RES_WAIT_HTTP;
+ return 1;
+}
+
+static void
+cache_store_strm_deinit(struct stream *s, struct filter *filter)
+{
+ struct cache_st *st = filter->ctx;
+ struct cache_flt_conf *cconf = FLT_CONF(filter);
+ struct cache *cache = cconf->c.cache;
+ struct shared_context *shctx = shctx_ptr(cache);
+
+ /* Everything should be released in the http_end filter, but we need to do it
+ * there too, in case of errors */
+ if (st && st->first_block) {
+ shctx_lock(shctx);
+ shctx_row_dec_hot(shctx, st->first_block);
+ shctx_unlock(shctx);
+ }
+ if (st) {
+ pool_free(pool_head_cache_st, st);
+ filter->ctx = NULL;
+ }
+}
+
+static int
+cache_store_post_analyze(struct stream *s, struct filter *filter, struct channel *chn,
+ unsigned an_bit)
+{
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->rsp;
+ struct cache_st *st = filter->ctx;
+
+ if (an_bit != AN_RES_WAIT_HTTP)
+ goto end;
+
+ /* Here we need to check if any compression filter precedes the cache
+ * filter. This is only possible when the compression is configured in
+ * the frontend while the cache filter is configured on the
+ * backend. This case cannot be detected during HAProxy startup. So in
+ * such cases, the cache is disabled.
+ */
+ if (st && (msg->flags & HTTP_MSGF_COMPRESSING)) {
+ pool_free(pool_head_cache_st, st);
+ filter->ctx = NULL;
+ }
+
+ end:
+ return 1;
+}
+
+static int
+cache_store_http_headers(struct stream *s, struct filter *filter, struct http_msg *msg)
+{
+ struct cache_st *st = filter->ctx;
+
+ if (!(msg->chn->flags & CF_ISRESP) || !st)
+ return 1;
+
+ if (st->first_block)
+ register_data_filter(s, msg->chn, filter);
+ return 1;
+}
+
+static inline void disable_cache_entry(struct cache_st *st,
+ struct filter *filter, struct shared_context *shctx)
+{
+ struct cache_entry *object;
+
+ object = (struct cache_entry *)st->first_block->data;
+ filter->ctx = NULL; /* disable cache */
+ shctx_lock(shctx);
+ shctx_row_dec_hot(shctx, st->first_block);
+ eb32_delete(&object->eb);
+ object->eb.key = 0;
+ shctx_unlock(shctx);
+ pool_free(pool_head_cache_st, st);
+}
+
+static int
+cache_store_http_payload(struct stream *s, struct filter *filter, struct http_msg *msg,
+ unsigned int offset, unsigned int len)
+{
+ struct cache_flt_conf *cconf = FLT_CONF(filter);
+ struct shared_context *shctx = shctx_ptr(cconf->c.cache);
+ struct cache_st *st = filter->ctx;
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct htx_blk *blk;
+ struct shared_block *fb;
+ struct htx_ret htxret;
+ unsigned int orig_len, to_forward;
+ int ret;
+
+ if (!len)
+ return len;
+
+ if (!st->first_block) {
+ unregister_data_filter(s, msg->chn, filter);
+ return len;
+ }
+
+ chunk_reset(&trash);
+ orig_len = len;
+ to_forward = 0;
+
+ htxret = htx_find_offset(htx, offset);
+ blk = htxret.blk;
+ offset = htxret.ret;
+ for (; blk && len; blk = htx_get_next_blk(htx, blk)) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t info, sz = htx_get_blksz(blk);
+ struct ist v;
+
+ switch (type) {
+ case HTX_BLK_UNUSED:
+ break;
+
+ case HTX_BLK_DATA:
+ v = htx_get_blk_value(htx, blk);
+ v = istadv(v, offset);
+ v = isttrim(v, len);
+
+ info = (type << 28) + v.len;
+ chunk_memcat(&trash, (char *)&info, sizeof(info));
+ chunk_istcat(&trash, v);
+ to_forward += v.len;
+ len -= v.len;
+ break;
+
+ default:
+ /* Here offset must always be 0 because only
+ * DATA blocks can be partially transferred. */
+ if (offset)
+ goto no_cache;
+ if (sz > len)
+ goto end;
+
+ chunk_memcat(&trash, (char *)&blk->info, sizeof(blk->info));
+ chunk_memcat(&trash, htx_get_blk_ptr(htx, blk), sz);
+ to_forward += sz;
+ len -= sz;
+ break;
+ }
+
+ offset = 0;
+ }
+
+ end:
+ shctx_lock(shctx);
+ fb = shctx_row_reserve_hot(shctx, st->first_block, trash.data);
+ if (!fb) {
+ shctx_unlock(shctx);
+ goto no_cache;
+ }
+ shctx_unlock(shctx);
+
+ ret = shctx_row_data_append(shctx, st->first_block, st->first_block->last_append,
+ (unsigned char *)b_head(&trash), b_data(&trash));
+ if (ret < 0)
+ goto no_cache;
+
+ return to_forward;
+
+ no_cache:
+ disable_cache_entry(st, filter, shctx);
+ unregister_data_filter(s, msg->chn, filter);
+ return orig_len;
+}
+
+static int
+cache_store_http_end(struct stream *s, struct filter *filter,
+ struct http_msg *msg)
+{
+ struct cache_st *st = filter->ctx;
+ struct cache_flt_conf *cconf = FLT_CONF(filter);
+ struct cache *cache = cconf->c.cache;
+ struct shared_context *shctx = shctx_ptr(cache);
+ struct cache_entry *object;
+
+ if (!(msg->chn->flags & CF_ISRESP))
+ return 1;
+
+ if (st && st->first_block) {
+
+ object = (struct cache_entry *)st->first_block->data;
+
+ shctx_lock(shctx);
+ /* The whole payload was cached, the entry can now be used. */
+ object->complete = 1;
+ /* remove from the hotlist */
+ shctx_row_dec_hot(shctx, st->first_block);
+ shctx_unlock(shctx);
+
+ }
+ if (st) {
+ pool_free(pool_head_cache_st, st);
+ filter->ctx = NULL;
+ }
+
+ return 1;
+}
+
+ /*
+ * This intends to be used when checking HTTP headers for some
+ * word=value directive. Return a pointer to the first character of value, if
+ * the word was not found or if there wasn't any value assigned to it return NULL
+ */
+char *directive_value(const char *sample, int slen, const char *word, int wlen)
+{
+ int st = 0;
+
+ if (slen < wlen)
+ return 0;
+
+ while (wlen) {
+ char c = *sample ^ *word;
+ if (c && c != ('A' ^ 'a'))
+ return NULL;
+ sample++;
+ word++;
+ slen--;
+ wlen--;
+ }
+
+ while (slen) {
+ if (st == 0) {
+ if (*sample != '=')
+ return NULL;
+ sample++;
+ slen--;
+ st = 1;
+ continue;
+ } else {
+ return (char *)sample;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Return the maxage in seconds of an HTTP response.
+ * The returned value will always take the cache's configuration into account
+ * (cache->maxage) but the actual max age of the response will be set in the
+ * true_maxage parameter. It will be used to determine if a response is already
+ * stale or not.
+ * Compute the maxage using either:
+ * - the assigned max-age of the cache
+ * - the s-maxage directive
+ * - the max-age directive
+ * - (Expires - Data) headers
+ * - the default-max-age of the cache
+ *
+ */
+int http_calc_maxage(struct stream *s, struct cache *cache, int *true_maxage)
+{
+ struct htx *htx = htxbuf(&s->res.buf);
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ long smaxage = -1;
+ long maxage = -1;
+ int expires = -1;
+ struct tm tm = {};
+ time_t expires_val = 0;
+ char *endptr = NULL;
+ int offset = 0;
+
+ /* The Cache-Control max-age and s-maxage directives should be followed by
+ * a positive numerical value (see RFC 7234#5.2.1.1). According to the
+ * specs, a sender "should not" generate a quoted-string value but we will
+ * still accept this format since it isn't strictly forbidden. */
+ while (http_find_header(htx, ist("cache-control"), &ctx, 0)) {
+ char *value;
+
+ value = directive_value(ctx.value.ptr, ctx.value.len, "s-maxage", 8);
+ if (value) {
+ struct buffer *chk = get_trash_chunk();
+
+ chunk_memcat(chk, value, ctx.value.len - 8 + 1);
+ chunk_memcat(chk, "", 1);
+ offset = (*chk->area == '"') ? 1 : 0;
+ smaxage = strtol(chk->area + offset, &endptr, 10);
+ if (unlikely(smaxage < 0 || endptr == chk->area + offset))
+ return -1;
+ }
+
+ value = directive_value(ctx.value.ptr, ctx.value.len, "max-age", 7);
+ if (value) {
+ struct buffer *chk = get_trash_chunk();
+
+ chunk_memcat(chk, value, ctx.value.len - 7 + 1);
+ chunk_memcat(chk, "", 1);
+ offset = (*chk->area == '"') ? 1 : 0;
+ maxage = strtol(chk->area + offset, &endptr, 10);
+ if (unlikely(maxage < 0 || endptr == chk->area + offset))
+ return -1;
+ }
+ }
+
+ /* Look for Expires header if no s-maxage or max-age Cache-Control data
+ * was found. */
+ if (maxage == -1 && smaxage == -1) {
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("expires"), &ctx, 1)) {
+ if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
+ expires_val = my_timegm(&tm);
+ /* A request having an expiring date earlier
+ * than the current date should be considered as
+ * stale. */
+ expires = (expires_val >= date.tv_sec) ?
+ (expires_val - date.tv_sec) : 0;
+ }
+ else {
+ /* Following RFC 7234#5.3, an invalid date
+ * format must be treated as a date in the past
+ * so the cache entry must be seen as already
+ * expired. */
+ expires = 0;
+ }
+ }
+ }
+
+
+ if (smaxage > 0) {
+ if (true_maxage)
+ *true_maxage = smaxage;
+ return MIN(smaxage, cache->maxage);
+ }
+
+ if (maxage > 0) {
+ if (true_maxage)
+ *true_maxage = maxage;
+ return MIN(maxage, cache->maxage);
+ }
+
+ if (expires >= 0) {
+ if (true_maxage)
+ *true_maxage = expires;
+ return MIN(expires, cache->maxage);
+ }
+
+ return cache->maxage;
+
+}
+
+
+static void cache_free_blocks(struct shared_block *first, struct shared_block *block)
+{
+ struct cache_entry *object = (struct cache_entry *)block->data;
+
+ if (first == block && object->eb.key)
+ delete_entry(object);
+ object->eb.key = 0;
+}
+
+
+/* As per RFC 7234#4.3.2, in case of "If-Modified-Since" conditional request, the
+ * date value should be compared to a date determined by in a previous response (for
+ * the same entity). This date could either be the "Last-Modified" value, or the "Date"
+ * value of the response's reception time (by decreasing order of priority). */
+static time_t get_last_modified_time(struct htx *htx)
+{
+ time_t last_modified = 0;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ struct tm tm = {};
+
+ if (http_find_header(htx, ist("last-modified"), &ctx, 1)) {
+ if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
+ last_modified = my_timegm(&tm);
+ }
+ }
+
+ if (!last_modified) {
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("date"), &ctx, 1)) {
+ if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
+ last_modified = my_timegm(&tm);
+ }
+ }
+ }
+
+ /* Fallback on the current time if no "Last-Modified" or "Date" header
+ * was found. */
+ if (!last_modified)
+ last_modified = date.tv_sec;
+
+ return last_modified;
+}
+
+/*
+ * Checks the vary header's value. The headers on which vary should be applied
+ * must be explicitly supported in the vary_information array (see cache.c). If
+ * any other header is mentioned, we won't store the response.
+ * Returns 1 if Vary-based storage can work, 0 otherwise.
+ */
+static int http_check_vary_header(struct htx *htx, unsigned int *vary_signature)
+{
+ unsigned int vary_idx;
+ unsigned int vary_info_count;
+ const struct vary_hashing_information *vary_info;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+
+ int retval = 1;
+
+ *vary_signature = 0;
+
+ vary_info_count = sizeof(vary_information)/sizeof(*vary_information);
+ while (retval && http_find_header(htx, ist("Vary"), &ctx, 0)) {
+ for (vary_idx = 0; vary_idx < vary_info_count; ++vary_idx) {
+ vary_info = &vary_information[vary_idx];
+ if (isteqi(ctx.value, vary_info->hdr_name)) {
+ *vary_signature |= vary_info->value;
+ break;
+ }
+ }
+ retval = (vary_idx < vary_info_count);
+ }
+
+ return retval;
+}
+
+
+/*
+ * Look for the accept-encoding part of the secondary_key and replace the
+ * encoding bitmap part of the hash with the actual encoding of the response,
+ * extracted from the content-encoding header value.
+ * Responses that have an unknown encoding will not be cached if they also
+ * "vary" on the accept-encoding value.
+ * Returns 0 if we found a known encoding in the response, -1 otherwise.
+ */
+static int set_secondary_key_encoding(struct htx *htx, char *secondary_key)
+{
+ unsigned int resp_encoding_bitmap = 0;
+ const struct vary_hashing_information *info = vary_information;
+ unsigned int offset = 0;
+ unsigned int count = 0;
+ unsigned int hash_info_count = sizeof(vary_information)/sizeof(*vary_information);
+ unsigned int encoding_value;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+
+ /* Look for the accept-encoding part of the secondary_key. */
+ while (count < hash_info_count && info->value != VARY_ACCEPT_ENCODING) {
+ offset += info->hash_length;
+ ++info;
+ ++count;
+ }
+
+ if (count == hash_info_count)
+ return -1;
+
+ while (http_find_header(htx, ist("content-encoding"), &ctx, 0)) {
+ if (parse_encoding_value(ctx.value, &encoding_value, NULL))
+ return -1; /* Do not store responses with an unknown encoding */
+ resp_encoding_bitmap |= encoding_value;
+ }
+
+ if (!resp_encoding_bitmap)
+ resp_encoding_bitmap |= VARY_ENCODING_IDENTITY;
+
+ /* Rewrite the bitmap part of the hash with the new bitmap that only
+ * corresponds the the response's encoding. */
+ write_u32(secondary_key + offset, resp_encoding_bitmap);
+
+ return 0;
+}
+
+
+/*
+ * This function will store the headers of the response in a buffer and then
+ * register a filter to store the data
+ */
+enum act_return http_action_store_cache(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ int effective_maxage = 0;
+ int true_maxage = 0;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->rsp;
+ struct filter *filter;
+ struct shared_block *first = NULL;
+ struct cache_flt_conf *cconf = rule->arg.act.p[0];
+ struct cache *cache = cconf->c.cache;
+ struct shared_context *shctx = shctx_ptr(cache);
+ struct cache_st *cache_ctx = NULL;
+ struct cache_entry *object, *old;
+ unsigned int key = read_u32(txn->cache_hash);
+ struct htx *htx;
+ struct http_hdr_ctx ctx;
+ size_t hdrs_len = 0;
+ int32_t pos;
+ unsigned int vary_signature = 0;
+
+ /* Don't cache if the response came from a cache */
+ if ((obj_type(s->target) == OBJ_TYPE_APPLET) &&
+ s->target == &http_cache_applet.obj_type) {
+ goto out;
+ }
+
+ /* cache only HTTP/1.1 */
+ if (!(txn->req.flags & HTTP_MSGF_VER_11))
+ goto out;
+
+ /* cache only GET method */
+ if (txn->meth != HTTP_METH_GET) {
+ /* In case of successful unsafe method on a stored resource, the
+ * cached entry must be invalidated (see RFC7234#4.4).
+ * A "non-error response" is one with a 2xx (Successful) or 3xx
+ * (Redirection) status code. */
+ if (txn->status >= 200 && txn->status < 400) {
+ switch (txn->meth) {
+ case HTTP_METH_OPTIONS:
+ case HTTP_METH_GET:
+ case HTTP_METH_HEAD:
+ case HTTP_METH_TRACE:
+ break;
+
+ default: /* Any unsafe method */
+ /* Discard any corresponding entry in case of successful
+ * unsafe request (such as PUT, POST or DELETE). */
+ shctx_lock(shctx);
+
+ old = entry_exist(cconf->c.cache, txn->cache_hash);
+ if (old) {
+ eb32_delete(&old->eb);
+ old->eb.key = 0;
+ }
+ shctx_unlock(shctx);
+ }
+ }
+ goto out;
+ }
+
+ /* cache key was not computed */
+ if (!key)
+ goto out;
+
+ /* cache only 200 status code */
+ if (txn->status != 200)
+ goto out;
+
+ /* Find the corresponding filter instance for the current stream */
+ list_for_each_entry(filter, &s->strm_flt.filters, list) {
+ if (FLT_ID(filter) == cache_store_flt_id && FLT_CONF(filter) == cconf) {
+ /* No filter ctx, don't cache anything */
+ if (!filter->ctx)
+ goto out;
+ cache_ctx = filter->ctx;
+ break;
+ }
+ }
+
+ /* from there, cache_ctx is always defined */
+ htx = htxbuf(&s->res.buf);
+
+ /* Do not cache too big objects. */
+ if ((msg->flags & HTTP_MSGF_CNT_LEN) && shctx->max_obj_size > 0 &&
+ htx->data + htx->extra > shctx->max_obj_size)
+ goto out;
+
+ /* Only a subset of headers are supported in our Vary implementation. If
+ * any other header is present in the Vary header value, we won't be
+ * able to use the cache. Likewise, if Vary header support is disabled,
+ * avoid caching responses that contain such a header. */
+ ctx.blk = NULL;
+ if (cache->vary_processing_enabled) {
+ if (!http_check_vary_header(htx, &vary_signature))
+ goto out;
+ if (vary_signature) {
+ /* If something went wrong during the secondary key
+ * building, do not store the response. */
+ if (!(txn->flags & TX_CACHE_HAS_SEC_KEY))
+ goto out;
+ http_request_reduce_secondary_key(vary_signature, txn->cache_secondary_hash);
+ }
+ }
+ else if (http_find_header(htx, ist("Vary"), &ctx, 0)) {
+ goto out;
+ }
+
+ http_check_response_for_cacheability(s, &s->res);
+
+ if (!(txn->flags & TX_CACHEABLE) || !(txn->flags & TX_CACHE_COOK))
+ goto out;
+
+ shctx_lock(shctx);
+ old = entry_exist(cache, txn->cache_hash);
+ if (old) {
+ if (vary_signature)
+ old = secondary_entry_exist(cconf->c.cache, old,
+ txn->cache_secondary_hash);
+ if (old) {
+ if (!old->complete) {
+ /* An entry with the same primary key is already being
+ * created, we should not try to store the current
+ * response because it will waste space in the cache. */
+ shctx_unlock(shctx);
+ goto out;
+ }
+ delete_entry(old);
+ old->eb.key = 0;
+ }
+ }
+ first = shctx_row_reserve_hot(shctx, NULL, sizeof(struct cache_entry));
+ if (!first) {
+ shctx_unlock(shctx);
+ goto out;
+ }
+ /* the received memory is not initialized, we need at least to mark
+ * the object as not indexed yet.
+ */
+ object = (struct cache_entry *)first->data;
+ memset(object, 0, sizeof(*object));
+ object->eb.key = key;
+ object->secondary_key_signature = vary_signature;
+ /* We need to temporarily set a valid expiring time until the actual one
+ * is set by the end of this function (in case of concurrent accesses to
+ * the same resource). This way the second access will find an existing
+ * but not yet usable entry in the tree and will avoid storing its data. */
+ object->expire = date.tv_sec + 2;
+
+ memcpy(object->hash, txn->cache_hash, sizeof(object->hash));
+ if (vary_signature)
+ memcpy(object->secondary_key, txn->cache_secondary_hash, HTTP_CACHE_SEC_KEY_LEN);
+
+ /* Insert the entry in the tree even if the payload is not cached yet. */
+ if (insert_entry(cache, object) != &object->eb) {
+ object->eb.key = 0;
+ shctx_unlock(shctx);
+ goto out;
+ }
+ shctx_unlock(shctx);
+
+ /* reserve space for the cache_entry structure */
+ first->len = sizeof(struct cache_entry);
+ first->last_append = NULL;
+
+ /* Determine the entry's maximum age (taking into account the cache's
+ * configuration) as well as the response's explicit max age (extracted
+ * from cache-control directives or the expires header). */
+ effective_maxage = http_calc_maxage(s, cconf->c.cache, &true_maxage);
+
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("Age"), &ctx, 0)) {
+ long long hdr_age;
+ if (!strl2llrc(ctx.value.ptr, ctx.value.len, &hdr_age) && hdr_age > 0) {
+ if (unlikely(hdr_age > CACHE_ENTRY_MAX_AGE))
+ hdr_age = CACHE_ENTRY_MAX_AGE;
+ /* A response with an Age value greater than its
+ * announced max age is stale and should not be stored. */
+ object->age = hdr_age;
+ if (unlikely(object->age > true_maxage))
+ goto out;
+ }
+ else
+ goto out;
+ http_remove_header(htx, &ctx);
+ }
+
+ /* Build a last-modified time that will be stored in the cache_entry and
+ * compared to a future If-Modified-Since client header. */
+ object->last_modified = get_last_modified_time(htx);
+
+ chunk_reset(&trash);
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t sz = htx_get_blksz(blk);
+
+ hdrs_len += sizeof(*blk) + sz;
+ chunk_memcat(&trash, (char *)&blk->info, sizeof(blk->info));
+ chunk_memcat(&trash, htx_get_blk_ptr(htx, blk), sz);
+
+ /* Look for optional ETag header.
+ * We need to store the offset of the ETag value in order for
+ * future conditional requests to be able to perform ETag
+ * comparisons. */
+ if (type == HTX_BLK_HDR) {
+ struct ist header_name = htx_get_blk_name(htx, blk);
+ if (isteq(header_name, ist("etag"))) {
+ object->etag_length = sz - istlen(header_name);
+ object->etag_offset = sizeof(struct cache_entry) + b_data(&trash) - sz + istlen(header_name);
+ }
+ }
+ if (type == HTX_BLK_EOH)
+ break;
+ }
+
+ /* Do not cache objects if the headers are too big. */
+ if (hdrs_len > htx->size - global.tune.maxrewrite)
+ goto out;
+
+ /* If the response has a secondary_key, fill its key part related to
+ * encodings with the actual encoding of the response. This way any
+ * subsequent request having the same primary key will have its accepted
+ * encodings tested upon the cached response's one.
+ * We will not cache a response that has an unknown encoding (not
+ * explicitly supported in parse_encoding_value function). */
+ if (cache->vary_processing_enabled && vary_signature)
+ if (set_secondary_key_encoding(htx, object->secondary_key))
+ goto out;
+
+ shctx_lock(shctx);
+ if (!shctx_row_reserve_hot(shctx, first, trash.data)) {
+ shctx_unlock(shctx);
+ goto out;
+ }
+ shctx_unlock(shctx);
+
+ /* cache the headers in a http action because it allows to chose what
+ * to cache, for example you might want to cache a response before
+ * modifying some HTTP headers, or on the contrary after modifying
+ * those headers.
+ */
+ /* does not need to be locked because it's in the "hot" list,
+ * copy the headers */
+ if (shctx_row_data_append(shctx, first, NULL, (unsigned char *)trash.area, trash.data) < 0)
+ goto out;
+
+ /* register the buffer in the filter ctx for filling it with data*/
+ if (cache_ctx) {
+ cache_ctx->first_block = first;
+ /* store latest value and expiration time */
+ object->latest_validation = date.tv_sec;
+ object->expire = date.tv_sec + effective_maxage;
+ return ACT_RET_CONT;
+ }
+
+out:
+ /* if does not cache */
+ if (first) {
+ shctx_lock(shctx);
+ first->len = 0;
+ if (object->eb.key)
+ delete_entry(object);
+ object->eb.key = 0;
+ shctx_row_dec_hot(shctx, first);
+ shctx_unlock(shctx);
+ }
+
+ return ACT_RET_CONT;
+}
+
+#define HTX_CACHE_INIT 0 /* Initial state. */
+#define HTX_CACHE_HEADER 1 /* Cache entry headers forwarding */
+#define HTX_CACHE_DATA 2 /* Cache entry data forwarding */
+#define HTX_CACHE_EOM 3 /* Cache entry completely forwarded. Finish the HTX message */
+#define HTX_CACHE_END 4 /* Cache entry treatment terminated */
+
+static void http_cache_applet_release(struct appctx *appctx)
+{
+ struct cache_appctx *ctx = appctx->svcctx;
+ struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
+ struct cache_entry *cache_ptr = ctx->entry;
+ struct cache *cache = cconf->c.cache;
+ struct shared_block *first = block_ptr(cache_ptr);
+
+ shctx_lock(shctx_ptr(cache));
+ shctx_row_dec_hot(shctx_ptr(cache), first);
+ shctx_unlock(shctx_ptr(cache));
+}
+
+
+static unsigned int htx_cache_dump_blk(struct appctx *appctx, struct htx *htx, enum htx_blk_type type,
+ uint32_t info, struct shared_block *shblk, unsigned int offset)
+{
+ struct cache_appctx *ctx = appctx->svcctx;
+ struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
+ struct shared_context *shctx = shctx_ptr(cconf->c.cache);
+ struct htx_blk *blk;
+ char *ptr;
+ unsigned int max, total;
+ uint32_t blksz;
+
+ max = htx_get_max_blksz(htx,
+ channel_htx_recv_max(sc_ic(appctx_sc(appctx)), htx));
+ if (!max)
+ return 0;
+ blksz = ((type == HTX_BLK_HDR || type == HTX_BLK_TLR)
+ ? (info & 0xff) + ((info >> 8) & 0xfffff)
+ : info & 0xfffffff);
+ if (blksz > max)
+ return 0;
+
+ blk = htx_add_blk(htx, type, blksz);
+ if (!blk)
+ return 0;
+
+ blk->info = info;
+ total = 4;
+ ptr = htx_get_blk_ptr(htx, blk);
+ while (blksz) {
+ max = MIN(blksz, shctx->block_size - offset);
+ memcpy(ptr, (const char *)shblk->data + offset, max);
+ offset += max;
+ blksz -= max;
+ total += max;
+ ptr += max;
+ if (blksz || offset == shctx->block_size) {
+ shblk = LIST_NEXT(&shblk->list, typeof(shblk), list);
+ offset = 0;
+ }
+ }
+ ctx->offset = offset;
+ ctx->next = shblk;
+ ctx->sent += total;
+ return total;
+}
+
+static unsigned int htx_cache_dump_data_blk(struct appctx *appctx, struct htx *htx,
+ uint32_t info, struct shared_block *shblk, unsigned int offset)
+{
+ struct cache_appctx *ctx = appctx->svcctx;
+ struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
+ struct shared_context *shctx = shctx_ptr(cconf->c.cache);
+ unsigned int max, total, rem_data;
+ uint32_t blksz;
+
+ max = htx_get_max_blksz(htx,
+ channel_htx_recv_max(sc_ic(appctx_sc(appctx)), htx));
+ if (!max)
+ return 0;
+
+ rem_data = 0;
+ if (ctx->rem_data) {
+ blksz = ctx->rem_data;
+ total = 0;
+ }
+ else {
+ blksz = (info & 0xfffffff);
+ total = 4;
+ }
+ if (blksz > max) {
+ rem_data = blksz - max;
+ blksz = max;
+ }
+
+ while (blksz) {
+ size_t sz;
+
+ max = MIN(blksz, shctx->block_size - offset);
+ sz = htx_add_data(htx, ist2(shblk->data + offset, max));
+ offset += sz;
+ blksz -= sz;
+ total += sz;
+ if (sz < max)
+ break;
+ if (blksz || offset == shctx->block_size) {
+ shblk = LIST_NEXT(&shblk->list, typeof(shblk), list);
+ offset = 0;
+ }
+ }
+
+ ctx->offset = offset;
+ ctx->next = shblk;
+ ctx->sent += total;
+ ctx->rem_data = rem_data + blksz;
+ return total;
+}
+
+static size_t htx_cache_dump_msg(struct appctx *appctx, struct htx *htx, unsigned int len,
+ enum htx_blk_type mark)
+{
+ struct cache_appctx *ctx = appctx->svcctx;
+ struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
+ struct shared_context *shctx = shctx_ptr(cconf->c.cache);
+ struct shared_block *shblk;
+ unsigned int offset, sz;
+ unsigned int ret, total = 0;
+
+ while (len) {
+ enum htx_blk_type type;
+ uint32_t info;
+
+ shblk = ctx->next;
+ offset = ctx->offset;
+ if (ctx->rem_data) {
+ type = HTX_BLK_DATA;
+ info = 0;
+ goto add_data_blk;
+ }
+
+ /* Get info of the next HTX block. May be split on 2 shblk */
+ sz = MIN(4, shctx->block_size - offset);
+ memcpy((char *)&info, (const char *)shblk->data + offset, sz);
+ offset += sz;
+ if (sz < 4) {
+ shblk = LIST_NEXT(&shblk->list, typeof(shblk), list);
+ memcpy(((char *)&info)+sz, (const char *)shblk->data, 4 - sz);
+ offset = (4 - sz);
+ }
+
+ /* Get payload of the next HTX block and insert it. */
+ type = (info >> 28);
+ if (type != HTX_BLK_DATA)
+ ret = htx_cache_dump_blk(appctx, htx, type, info, shblk, offset);
+ else {
+ add_data_blk:
+ ret = htx_cache_dump_data_blk(appctx, htx, info, shblk, offset);
+ }
+
+ if (!ret)
+ break;
+ total += ret;
+ len -= ret;
+
+ if (ctx->rem_data || type == mark)
+ break;
+ }
+
+ return total;
+}
+
+static int htx_cache_add_age_hdr(struct appctx *appctx, struct htx *htx)
+{
+ struct cache_appctx *ctx = appctx->svcctx;
+ struct cache_entry *cache_ptr = ctx->entry;
+ unsigned int age;
+ char *end;
+
+ chunk_reset(&trash);
+ age = MAX(0, (int)(date.tv_sec - cache_ptr->latest_validation)) + cache_ptr->age;
+ if (unlikely(age > CACHE_ENTRY_MAX_AGE))
+ age = CACHE_ENTRY_MAX_AGE;
+ end = ultoa_o(age, b_head(&trash), b_size(&trash));
+ b_set_data(&trash, end - b_head(&trash));
+ if (!http_add_header(htx, ist("Age"), ist2(b_head(&trash), b_data(&trash))))
+ return 0;
+ return 1;
+}
+
+static void http_cache_io_handler(struct appctx *appctx)
+{
+ struct cache_appctx *ctx = appctx->svcctx;
+ struct cache_entry *cache_ptr = ctx->entry;
+ struct shared_block *first = block_ptr(cache_ptr);
+ struct stconn *sc = appctx_sc(appctx);
+ struct channel *req = sc_oc(sc);
+ struct channel *res = sc_ic(sc);
+ struct htx *req_htx, *res_htx;
+ struct buffer *errmsg;
+ unsigned int len;
+ size_t ret, total = 0;
+
+ res_htx = htx_from_buf(&res->buf);
+ total = res_htx->data;
+
+ if (unlikely(sc->state == SC_ST_DIS || sc->state == SC_ST_CLO))
+ goto out;
+
+ /* Check if the input buffer is available. */
+ if (!b_size(&res->buf)) {
+ sc_need_room(sc);
+ goto out;
+ }
+
+ if (res->flags & (CF_SHUTW|CF_SHUTR|CF_SHUTW_NOW))
+ appctx->st0 = HTX_CACHE_END;
+
+ if (appctx->st0 == HTX_CACHE_INIT) {
+ ctx->next = block_ptr(cache_ptr);
+ ctx->offset = sizeof(*cache_ptr);
+ ctx->sent = 0;
+ ctx->rem_data = 0;
+ appctx->st0 = HTX_CACHE_HEADER;
+ }
+
+ if (appctx->st0 == HTX_CACHE_HEADER) {
+ /* Headers must be dump at once. Otherwise it is an error */
+ len = first->len - sizeof(*cache_ptr) - ctx->sent;
+ ret = htx_cache_dump_msg(appctx, res_htx, len, HTX_BLK_EOH);
+ if (!ret || (htx_get_tail_type(res_htx) != HTX_BLK_EOH) ||
+ !htx_cache_add_age_hdr(appctx, res_htx))
+ goto error;
+
+ /* In case of a conditional request, we might want to send a
+ * "304 Not Modified" response instead of the stored data. */
+ if (ctx->send_notmodified) {
+ if (!http_replace_res_status(res_htx, ist("304"), ist("Not Modified"))) {
+ /* If replacing the status code fails we need to send the full response. */
+ ctx->send_notmodified = 0;
+ }
+ }
+
+ /* Skip response body for HEAD requests or in case of "304 Not
+ * Modified" response. */
+ if (__sc_strm(sc)->txn->meth == HTTP_METH_HEAD || ctx->send_notmodified)
+ appctx->st0 = HTX_CACHE_EOM;
+ else
+ appctx->st0 = HTX_CACHE_DATA;
+ }
+
+ if (appctx->st0 == HTX_CACHE_DATA) {
+ len = first->len - sizeof(*cache_ptr) - ctx->sent;
+ if (len) {
+ ret = htx_cache_dump_msg(appctx, res_htx, len, HTX_BLK_UNUSED);
+ if (ret < len) {
+ sc_need_room(sc);
+ goto out;
+ }
+ }
+ appctx->st0 = HTX_CACHE_EOM;
+ }
+
+ if (appctx->st0 == HTX_CACHE_EOM) {
+ /* no more data are expected. */
+ res_htx->flags |= HTX_FL_EOM;
+ res->flags |= CF_EOI;
+ se_fl_set(appctx->sedesc, SE_FL_EOI);
+ appctx->st0 = HTX_CACHE_END;
+ }
+
+ end:
+ if (!(res->flags & CF_SHUTR) && appctx->st0 == HTX_CACHE_END) {
+ res->flags |= CF_READ_NULL;
+ sc_shutr(sc);
+ }
+
+ out:
+ total = res_htx->data - total;
+ if (total)
+ channel_add_input(res, total);
+ htx_to_buf(res_htx, &res->buf);
+
+ /* eat the whole request */
+ if (co_data(req)) {
+ req_htx = htx_from_buf(&req->buf);
+ co_htx_skip(req, req_htx, co_data(req));
+ htx_to_buf(req_htx, &req->buf);
+ }
+ return;
+
+ error:
+ /* Sent and HTTP error 500 */
+ b_reset(&res->buf);
+ errmsg = &http_err_chunks[HTTP_ERR_500];
+ res->buf.data = b_data(errmsg);
+ memcpy(res->buf.area, b_head(errmsg), b_data(errmsg));
+ res_htx = htx_from_buf(&res->buf);
+
+ total = 0;
+ appctx->st0 = HTX_CACHE_END;
+ goto end;
+}
+
+
+static int parse_cache_rule(struct proxy *proxy, const char *name, struct act_rule *rule, char **err)
+{
+ struct flt_conf *fconf;
+ struct cache_flt_conf *cconf = NULL;
+
+ if (!*name || strcmp(name, "if") == 0 || strcmp(name, "unless") == 0) {
+ memprintf(err, "expects a cache name");
+ goto err;
+ }
+
+ /* check if a cache filter was already registered with this cache
+ * name, if that's the case, must use it. */
+ list_for_each_entry(fconf, &proxy->filter_configs, list) {
+ if (fconf->id == cache_store_flt_id) {
+ cconf = fconf->conf;
+ if (cconf && strcmp((char *)cconf->c.name, name) == 0) {
+ rule->arg.act.p[0] = cconf;
+ return 1;
+ }
+ }
+ }
+
+ /* Create the filter cache config */
+ cconf = calloc(1, sizeof(*cconf));
+ if (!cconf) {
+ memprintf(err, "out of memory\n");
+ goto err;
+ }
+ cconf->flags = CACHE_FLT_F_IMPLICIT_DECL;
+ cconf->c.name = strdup(name);
+ if (!cconf->c.name) {
+ memprintf(err, "out of memory\n");
+ goto err;
+ }
+
+ /* register a filter to fill the cache buffer */
+ fconf = calloc(1, sizeof(*fconf));
+ if (!fconf) {
+ memprintf(err, "out of memory\n");
+ goto err;
+ }
+ fconf->id = cache_store_flt_id;
+ fconf->conf = cconf;
+ fconf->ops = &cache_ops;
+ LIST_APPEND(&proxy->filter_configs, &fconf->list);
+
+ rule->arg.act.p[0] = cconf;
+ return 1;
+
+ err:
+ free(cconf);
+ return 0;
+}
+
+enum act_parse_ret parse_cache_store(const char **args, int *orig_arg, struct proxy *proxy,
+ struct act_rule *rule, char **err)
+{
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_store_cache;
+
+ if (!parse_cache_rule(proxy, args[*orig_arg], rule, err))
+ return ACT_RET_PRS_ERR;
+
+ (*orig_arg)++;
+ return ACT_RET_PRS_OK;
+}
+
+/* This produces a sha1 hash of the concatenation of the HTTP method,
+ * the first occurrence of the Host header followed by the path component
+ * if it begins with a slash ('/'). */
+int sha1_hosturi(struct stream *s)
+{
+ struct http_txn *txn = s->txn;
+ struct htx *htx = htxbuf(&s->req.buf);
+ struct htx_sl *sl;
+ struct http_hdr_ctx ctx;
+ struct ist uri;
+ blk_SHA_CTX sha1_ctx;
+ struct buffer *trash;
+
+ trash = get_trash_chunk();
+ ctx.blk = NULL;
+
+ sl = http_get_stline(htx);
+ uri = htx_sl_req_uri(sl); // whole uri
+ if (!uri.len)
+ return 0;
+
+ /* In HTTP/1, most URIs are seen in origin form ('/path/to/resource'),
+ * unless haproxy is deployed in front of an outbound cache. In HTTP/2,
+ * URIs are almost always sent in absolute form with their scheme. In
+ * this case, the scheme is almost always "https". In order to support
+ * sharing of cache objects between H1 and H2, we'll hash the absolute
+ * URI whenever known, or prepend "https://" + the Host header for
+ * relative URIs. The difference will only appear on absolute HTTP/1
+ * requests sent to an origin server, which practically is never met in
+ * the real world so we don't care about the ability to share the same
+ * key here.URIs are normalized from the absolute URI to an origin form as
+ * well.
+ */
+ if (!(sl->flags & HTX_SL_F_HAS_AUTHORITY)) {
+ chunk_istcat(trash, ist("https://"));
+ if (!http_find_header(htx, ist("Host"), &ctx, 0))
+ return 0;
+ chunk_istcat(trash, ctx.value);
+ }
+
+ chunk_istcat(trash, uri);
+
+ /* hash everything */
+ blk_SHA1_Init(&sha1_ctx);
+ blk_SHA1_Update(&sha1_ctx, trash->area, trash->data);
+ blk_SHA1_Final((unsigned char *)txn->cache_hash, &sha1_ctx);
+
+ return 1;
+}
+
+/* Looks for "If-None-Match" headers in the request and compares their value
+ * with the one that might have been stored in the cache_entry. If any of them
+ * matches, a "304 Not Modified" response should be sent instead of the cached
+ * data.
+ * Although unlikely in a GET/HEAD request, the "If-None-Match: *" syntax is
+ * valid and should receive a "304 Not Modified" response (RFC 7234#4.3.2).
+ *
+ * If no "If-None-Match" header was found, look for an "If-Modified-Since"
+ * header and compare its value (date) to the one stored in the cache_entry.
+ * If the request's date is later than the cached one, we also send a
+ * "304 Not Modified" response (see RFCs 7232#3.3 and 7234#4.3.2).
+ *
+ * Returns 1 if "304 Not Modified" should be sent, 0 otherwise.
+ */
+static int should_send_notmodified_response(struct cache *cache, struct htx *htx,
+ struct cache_entry *entry)
+{
+ int retval = 0;
+
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ struct ist cache_entry_etag = IST_NULL;
+ struct buffer *etag_buffer = NULL;
+ int if_none_match_found = 0;
+
+ struct tm tm = {};
+ time_t if_modified_since = 0;
+
+ /* If we find a "If-None-Match" header in the request, rebuild the
+ * cache_entry's ETag in order to perform comparisons.
+ * There could be multiple "if-none-match" header lines. */
+ while (http_find_header(htx, ist("if-none-match"), &ctx, 0)) {
+ if_none_match_found = 1;
+
+ /* A '*' matches everything. */
+ if (isteq(ctx.value, ist("*")) != 0) {
+ retval = 1;
+ break;
+ }
+
+ /* No need to rebuild an etag if none was stored in the cache. */
+ if (entry->etag_length == 0)
+ break;
+
+ /* Rebuild the stored ETag. */
+ if (etag_buffer == NULL) {
+ etag_buffer = get_trash_chunk();
+
+ if (shctx_row_data_get(shctx_ptr(cache), block_ptr(entry),
+ (unsigned char*)b_orig(etag_buffer),
+ entry->etag_offset, entry->etag_length) == 0) {
+ cache_entry_etag = ist2(b_orig(etag_buffer), entry->etag_length);
+ } else {
+ /* We could not rebuild the ETag in one go, we
+ * won't send a "304 Not Modified" response. */
+ break;
+ }
+ }
+
+ if (http_compare_etags(cache_entry_etag, ctx.value) == 1) {
+ retval = 1;
+ break;
+ }
+ }
+
+ /* If the request did not contain an "If-None-Match" header, we look for
+ * an "If-Modified-Since" header (see RFC 7232#3.3). */
+ if (retval == 0 && if_none_match_found == 0) {
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("if-modified-since"), &ctx, 1)) {
+ if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
+ if_modified_since = my_timegm(&tm);
+
+ /* We send a "304 Not Modified" response if the
+ * entry's last modified date is earlier than
+ * the one found in the "If-Modified-Since"
+ * header. */
+ retval = (entry->last_modified <= if_modified_since);
+ }
+ }
+ }
+
+ return retval;
+}
+
+enum act_return http_action_req_cache_use(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+
+ struct http_txn *txn = s->txn;
+ struct cache_entry *res, *sec_entry = NULL;
+ struct cache_flt_conf *cconf = rule->arg.act.p[0];
+ struct cache *cache = cconf->c.cache;
+ struct shared_block *entry_block;
+
+
+ /* Ignore cache for HTTP/1.0 requests and for requests other than GET
+ * and HEAD */
+ if (!(txn->req.flags & HTTP_MSGF_VER_11) ||
+ (txn->meth != HTTP_METH_GET && txn->meth != HTTP_METH_HEAD))
+ txn->flags |= TX_CACHE_IGNORE;
+
+ http_check_request_for_cacheability(s, &s->req);
+
+ /* The request's hash has to be calculated for all requests, even POSTs
+ * or PUTs for instance because RFC7234 specifies that a successful
+ * "unsafe" method on a stored resource must invalidate it
+ * (see RFC7234#4.4). */
+ if (!sha1_hosturi(s))
+ return ACT_RET_CONT;
+
+ if (s->txn->flags & TX_CACHE_IGNORE)
+ return ACT_RET_CONT;
+
+ if (px == strm_fe(s))
+ _HA_ATOMIC_INC(&px->fe_counters.p.http.cache_lookups);
+ else
+ _HA_ATOMIC_INC(&px->be_counters.p.http.cache_lookups);
+
+ shctx_lock(shctx_ptr(cache));
+ res = entry_exist(cache, s->txn->cache_hash);
+ /* We must not use an entry that is not complete but the check will be
+ * performed after we look for a potential secondary entry (in case of
+ * Vary). */
+ if (res) {
+ struct appctx *appctx;
+ entry_block = block_ptr(res);
+ shctx_row_inc_hot(shctx_ptr(cache), entry_block);
+ shctx_unlock(shctx_ptr(cache));
+
+ /* In case of Vary, we could have multiple entries with the same
+ * primary hash. We need to calculate the secondary hash in order
+ * to find the actual entry we want (if it exists). */
+ if (res->secondary_key_signature) {
+ if (!http_request_build_secondary_key(s, res->secondary_key_signature)) {
+ shctx_lock(shctx_ptr(cache));
+ sec_entry = secondary_entry_exist(cache, res,
+ s->txn->cache_secondary_hash);
+ if (sec_entry && sec_entry != res) {
+ /* The wrong row was added to the hot list. */
+ shctx_row_dec_hot(shctx_ptr(cache), entry_block);
+ entry_block = block_ptr(sec_entry);
+ shctx_row_inc_hot(shctx_ptr(cache), entry_block);
+ }
+ res = sec_entry;
+ shctx_unlock(shctx_ptr(cache));
+ }
+ else
+ res = NULL;
+ }
+
+ /* We either looked for a valid secondary entry and could not
+ * find one, or the entry we want to use is not complete. We
+ * can't use the cache's entry and must forward the request to
+ * the server. */
+ if (!res || !res->complete) {
+ shctx_lock(shctx_ptr(cache));
+ shctx_row_dec_hot(shctx_ptr(cache), entry_block);
+ shctx_unlock(shctx_ptr(cache));
+ return ACT_RET_CONT;
+ }
+
+ s->target = &http_cache_applet.obj_type;
+ if ((appctx = sc_applet_create(s->scb, objt_applet(s->target)))) {
+ struct cache_appctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ appctx->st0 = HTX_CACHE_INIT;
+ appctx->rule = rule;
+ ctx->entry = res;
+ ctx->next = NULL;
+ ctx->sent = 0;
+ ctx->send_notmodified =
+ should_send_notmodified_response(cache, htxbuf(&s->req.buf), res);
+
+ if (px == strm_fe(s))
+ _HA_ATOMIC_INC(&px->fe_counters.p.http.cache_hits);
+ else
+ _HA_ATOMIC_INC(&px->be_counters.p.http.cache_hits);
+ return ACT_RET_CONT;
+ } else {
+ s->target = NULL;
+ shctx_lock(shctx_ptr(cache));
+ shctx_row_dec_hot(shctx_ptr(cache), entry_block);
+ shctx_unlock(shctx_ptr(cache));
+ return ACT_RET_CONT;
+ }
+ }
+ shctx_unlock(shctx_ptr(cache));
+
+ /* Shared context does not need to be locked while we calculate the
+ * secondary hash. */
+ if (!res && cache->vary_processing_enabled) {
+ /* Build a complete secondary hash until the server response
+ * tells us which fields should be kept (if any). */
+ http_request_prebuild_full_secondary_key(s);
+ }
+ return ACT_RET_CONT;
+}
+
+
+enum act_parse_ret parse_cache_use(const char **args, int *orig_arg, struct proxy *proxy,
+ struct act_rule *rule, char **err)
+{
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_req_cache_use;
+
+ if (!parse_cache_rule(proxy, args[*orig_arg], rule, err))
+ return ACT_RET_PRS_ERR;
+
+ (*orig_arg)++;
+ return ACT_RET_PRS_OK;
+}
+
+int cfg_parse_cache(const char *file, int linenum, char **args, int kwm)
+{
+ int err_code = 0;
+
+ if (strcmp(args[0], "cache") == 0) { /* new cache section */
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects a <name> argument\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ if (tmp_cache_config == NULL) {
+ struct cache *cache_config;
+
+ tmp_cache_config = calloc(1, sizeof(*tmp_cache_config));
+ if (!tmp_cache_config) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ strlcpy2(tmp_cache_config->id, args[1], 33);
+ if (strlen(args[1]) > 32) {
+ ha_warning("parsing [%s:%d]: cache name is limited to 32 characters, truncate to '%s'.\n",
+ file, linenum, tmp_cache_config->id);
+ err_code |= ERR_WARN;
+ }
+
+ list_for_each_entry(cache_config, &caches_config, list) {
+ if (strcmp(tmp_cache_config->id, cache_config->id) == 0) {
+ ha_alert("parsing [%s:%d]: Duplicate cache name '%s'.\n",
+ file, linenum, tmp_cache_config->id);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ }
+
+ tmp_cache_config->maxage = 60;
+ tmp_cache_config->maxblocks = 0;
+ tmp_cache_config->maxobjsz = 0;
+ tmp_cache_config->max_secondary_entries = DEFAULT_MAX_SECONDARY_ENTRY;
+ }
+ } else if (strcmp(args[0], "total-max-size") == 0) {
+ unsigned long int maxsize;
+ char *err;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ maxsize = strtoul(args[1], &err, 10);
+ if (err == args[1] || *err != '\0') {
+ ha_warning("parsing [%s:%d]: total-max-size wrong value '%s'\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ if (maxsize > (UINT_MAX >> 20)) {
+ ha_warning("parsing [%s:%d]: \"total-max-size\" (%s) must not be greater than %u\n",
+ file, linenum, args[1], UINT_MAX >> 20);
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ /* size in megabytes */
+ maxsize *= 1024 * 1024 / CACHE_BLOCKSIZE;
+ tmp_cache_config->maxblocks = maxsize;
+ } else if (strcmp(args[0], "max-age") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ if (!*args[1]) {
+ ha_warning("parsing [%s:%d]: '%s' expects an age parameter in seconds.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+
+ tmp_cache_config->maxage = atoi(args[1]);
+ } else if (strcmp(args[0], "max-object-size") == 0) {
+ unsigned int maxobjsz;
+ char *err;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ if (!*args[1]) {
+ ha_warning("parsing [%s:%d]: '%s' expects a maximum file size parameter in bytes.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+
+ maxobjsz = strtoul(args[1], &err, 10);
+ if (err == args[1] || *err != '\0') {
+ ha_warning("parsing [%s:%d]: max-object-size wrong value '%s'\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+ tmp_cache_config->maxobjsz = maxobjsz;
+ } else if (strcmp(args[0], "process-vary") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ if (!*args[1]) {
+ ha_warning("parsing [%s:%d]: '%s' expects \"on\" or \"off\" (enable or disable vary processing).\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+ if (strcmp(args[1], "on") == 0)
+ tmp_cache_config->vary_processing_enabled = 1;
+ else if (strcmp(args[1], "off") == 0)
+ tmp_cache_config->vary_processing_enabled = 0;
+ else {
+ ha_warning("parsing [%s:%d]: '%s' expects \"on\" or \"off\" (enable or disable vary processing).\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+ } else if (strcmp(args[0], "max-secondary-entries") == 0) {
+ unsigned int max_sec_entries;
+ char *err;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ if (!*args[1]) {
+ ha_warning("parsing [%s:%d]: '%s' expects a strictly positive number.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+
+ max_sec_entries = strtoul(args[1], &err, 10);
+ if (err == args[1] || *err != '\0' || max_sec_entries == 0) {
+ ha_warning("parsing [%s:%d]: max-secondary-entries wrong value '%s'\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+ tmp_cache_config->max_secondary_entries = max_sec_entries;
+ }
+ else if (*args[0] != 0) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in 'cache' section\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+out:
+ return err_code;
+}
+
+/* once the cache section is parsed */
+
+int cfg_post_parse_section_cache()
+{
+ int err_code = 0;
+
+ if (tmp_cache_config) {
+
+ if (tmp_cache_config->maxblocks <= 0) {
+ ha_alert("Size not specified for cache '%s'\n", tmp_cache_config->id);
+ err_code |= ERR_FATAL | ERR_ALERT;
+ goto out;
+ }
+
+ if (!tmp_cache_config->maxobjsz) {
+ /* Default max. file size is a 256th of the cache size. */
+ tmp_cache_config->maxobjsz =
+ (tmp_cache_config->maxblocks * CACHE_BLOCKSIZE) >> 8;
+ }
+ else if (tmp_cache_config->maxobjsz > tmp_cache_config->maxblocks * CACHE_BLOCKSIZE / 2) {
+ ha_alert("\"max-object-size\" is limited to an half of \"total-max-size\" => %u\n", tmp_cache_config->maxblocks * CACHE_BLOCKSIZE / 2);
+ err_code |= ERR_FATAL | ERR_ALERT;
+ goto out;
+ }
+
+ /* add to the list of cache to init and reinit tmp_cache_config
+ * for next cache section, if any.
+ */
+ LIST_APPEND(&caches_config, &tmp_cache_config->list);
+ tmp_cache_config = NULL;
+ return err_code;
+ }
+out:
+ ha_free(&tmp_cache_config);
+ return err_code;
+
+}
+
+int post_check_cache()
+{
+ struct proxy *px;
+ struct cache *back, *cache_config, *cache;
+ struct shared_context *shctx;
+ int ret_shctx;
+ int err_code = ERR_NONE;
+
+ list_for_each_entry_safe(cache_config, back, &caches_config, list) {
+
+ ret_shctx = shctx_init(&shctx, cache_config->maxblocks, CACHE_BLOCKSIZE,
+ cache_config->maxobjsz, sizeof(struct cache), 1);
+
+ if (ret_shctx <= 0) {
+ if (ret_shctx == SHCTX_E_INIT_LOCK)
+ ha_alert("Unable to initialize the lock for the cache.\n");
+ else
+ ha_alert("Unable to allocate cache.\n");
+
+ err_code |= ERR_FATAL | ERR_ALERT;
+ goto out;
+ }
+ shctx->free_block = cache_free_blocks;
+ /* the cache structure is stored in the shctx and added to the
+ * caches list, we can remove the entry from the caches_config
+ * list */
+ memcpy(shctx->data, cache_config, sizeof(struct cache));
+ cache = (struct cache *)shctx->data;
+ cache->entries = EB_ROOT;
+ LIST_APPEND(&caches, &cache->list);
+ LIST_DELETE(&cache_config->list);
+ free(cache_config);
+
+ /* Find all references for this cache in the existing filters
+ * (over all proxies) and reference it in matching filters.
+ */
+ for (px = proxies_list; px; px = px->next) {
+ struct flt_conf *fconf;
+ struct cache_flt_conf *cconf;
+
+ list_for_each_entry(fconf, &px->filter_configs, list) {
+ if (fconf->id != cache_store_flt_id)
+ continue;
+
+ cconf = fconf->conf;
+ if (strcmp(cache->id, cconf->c.name) == 0) {
+ free(cconf->c.name);
+ cconf->flags |= CACHE_FLT_INIT;
+ cconf->c.cache = cache;
+ break;
+ }
+ }
+ }
+ }
+
+out:
+ return err_code;
+
+}
+
+struct flt_ops cache_ops = {
+ .init = cache_store_init,
+ .check = cache_store_check,
+ .deinit = cache_store_deinit,
+
+ /* Handle stream init/deinit */
+ .attach = cache_store_strm_init,
+ .detach = cache_store_strm_deinit,
+
+ /* Handle channels activity */
+ .channel_post_analyze = cache_store_post_analyze,
+
+ /* Filter HTTP requests and responses */
+ .http_headers = cache_store_http_headers,
+ .http_payload = cache_store_http_payload,
+ .http_end = cache_store_http_end,
+};
+
+
+#define CHECK_ENCODING(str, encoding_name, encoding_value) \
+ ({ \
+ int retval = 0; \
+ if (istmatch(str, (struct ist){ .ptr = encoding_name+1, .len = sizeof(encoding_name) - 2 })) { \
+ retval = encoding_value; \
+ encoding = istadv(encoding, sizeof(encoding_name) - 2); \
+ } \
+ (retval); \
+ })
+
+/*
+ * Parse the encoding <encoding> and try to match the encoding part upon an
+ * encoding list of explicitly supported encodings (which all have a specific
+ * bit in an encoding bitmap). If a weight is included in the value, find out if
+ * it is null or not. The bit value will be set in the <encoding_value>
+ * parameter and the <has_null_weight> will be set to 1 if the weight is strictly
+ * 0, 1 otherwise.
+ * The encodings list is extracted from
+ * https://www.iana.org/assignments/http-parameters/http-parameters.xhtml.
+ * Returns 0 in case of success and -1 in case of error.
+ */
+static int parse_encoding_value(struct ist encoding, unsigned int *encoding_value,
+ unsigned int *has_null_weight)
+{
+ int retval = 0;
+
+ if (!encoding_value)
+ return -1;
+
+ if (!istlen(encoding))
+ return -1; /* Invalid encoding */
+
+ *encoding_value = 0;
+ if (has_null_weight)
+ *has_null_weight = 0;
+
+ switch (*encoding.ptr) {
+ case 'a':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "aes128gcm", VARY_ENCODING_AES128GCM);
+ break;
+ case 'b':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "br", VARY_ENCODING_BR);
+ break;
+ case 'c':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "compress", VARY_ENCODING_COMPRESS);
+ break;
+ case 'd':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "deflate", VARY_ENCODING_DEFLATE);
+ break;
+ case 'e':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "exi", VARY_ENCODING_EXI);
+ break;
+ case 'g':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "gzip", VARY_ENCODING_GZIP);
+ break;
+ case 'i':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "identity", VARY_ENCODING_IDENTITY);
+ break;
+ case 'p':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "pack200-gzip", VARY_ENCODING_PACK200_GZIP);
+ break;
+ case 'x':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "x-gzip", VARY_ENCODING_GZIP);
+ if (!*encoding_value)
+ *encoding_value = CHECK_ENCODING(encoding, "x-compress", VARY_ENCODING_COMPRESS);
+ break;
+ case 'z':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "zstd", VARY_ENCODING_ZSTD);
+ break;
+ case '*':
+ encoding = istnext(encoding);
+ *encoding_value = VARY_ENCODING_STAR;
+ break;
+ default:
+ retval = -1; /* Unmanaged encoding */
+ break;
+ }
+
+ /* Process the optional weight part of the encoding. */
+ if (*encoding_value) {
+ encoding = http_trim_leading_spht(encoding);
+ if (istlen(encoding)) {
+ if (*encoding.ptr != ';')
+ return -1;
+
+ if (has_null_weight) {
+ encoding = istnext(encoding);
+
+ encoding = http_trim_leading_spht(encoding);
+
+ *has_null_weight = isteq(encoding, ist("q=0"));
+ }
+ }
+ }
+
+ return retval;
+}
+
+#define ACCEPT_ENCODING_MAX_ENTRIES 16
+/*
+ * Build a bitmap of the accept-encoding header.
+ *
+ * The bitmap is built by matching every sub-part of the accept-encoding value
+ * with a subset of explicitly supported encodings, which all have their own bit
+ * in the bitmap. This bitmap will be used to determine if a response can be
+ * served to a client (that is if it has an encoding that is accepted by the
+ * client). Any unknown encodings will be indicated by the VARY_ENCODING_OTHER
+ * bit.
+ *
+ * Returns 0 in case of success and -1 in case of error.
+ */
+static int accept_encoding_normalizer(struct htx *htx, struct ist hdr_name,
+ char *buf, unsigned int *buf_len)
+{
+ size_t count = 0;
+ uint32_t encoding_bitmap = 0;
+ unsigned int encoding_bmp_bl = -1;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ unsigned int encoding_value;
+ unsigned int rejected_encoding;
+
+ /* A user agent always accepts an unencoded value unless it explicitly
+ * refuses it through an "identity;q=0" accept-encoding value. */
+ encoding_bitmap |= VARY_ENCODING_IDENTITY;
+
+ /* Iterate over all the ACCEPT_ENCODING_MAX_ENTRIES first accept-encoding
+ * values that might span acrosse multiple accept-encoding headers. */
+ while (http_find_header(htx, hdr_name, &ctx, 0) && count < ACCEPT_ENCODING_MAX_ENTRIES) {
+ count++;
+
+ /* As per RFC7231#5.3.4, "An Accept-Encoding header field with a
+ * combined field-value that is empty implies that the user agent
+ * does not want any content-coding in response."
+ *
+ * We must (and did) count the existence of this empty header to not
+ * hit the `count == 0` case below, but must ignore the value to not
+ * include VARY_ENCODING_OTHER into the final bitmap.
+ */
+ if (istlen(ctx.value) == 0)
+ continue;
+
+ /* Turn accept-encoding value to lower case */
+ ist2bin_lc(istptr(ctx.value), ctx.value);
+
+ /* Try to identify a known encoding and to manage null weights. */
+ if (!parse_encoding_value(ctx.value, &encoding_value, &rejected_encoding)) {
+ if (rejected_encoding)
+ encoding_bmp_bl &= ~encoding_value;
+ else
+ encoding_bitmap |= encoding_value;
+ }
+ else {
+ /* Unknown encoding */
+ encoding_bitmap |= VARY_ENCODING_OTHER;
+ }
+ }
+
+ /* If a "*" was found in the accepted encodings (without a null weight),
+ * all the encoding are accepted except the ones explicitly rejected. */
+ if (encoding_bitmap & VARY_ENCODING_STAR) {
+ encoding_bitmap = ~0;
+ }
+
+ /* Clear explicitly rejected encodings from the bitmap */
+ encoding_bitmap &= encoding_bmp_bl;
+
+ /* As per RFC7231#5.3.4, "If no Accept-Encoding field is in the request,
+ * any content-coding is considered acceptable by the user agent". */
+ if (count == 0)
+ encoding_bitmap = ~0;
+
+ /* A request with more than ACCEPT_ENCODING_MAX_ENTRIES accepted
+ * encodings might be illegitimate so we will not use it. */
+ if (count == ACCEPT_ENCODING_MAX_ENTRIES)
+ return -1;
+
+ write_u32(buf, encoding_bitmap);
+ *buf_len = sizeof(encoding_bitmap);
+
+ /* This function fills the hash buffer correctly even if no header was
+ * found, hence the 0 return value (success). */
+ return 0;
+}
+#undef ACCEPT_ENCODING_MAX_ENTRIES
+
+/*
+ * Normalizer used by default for the Referer header. It only
+ * calculates a simple crc of the whole value.
+ * Only the first occurrence of the header will be taken into account in the
+ * hash.
+ * Returns 0 in case of success, 1 if the hash buffer should be filled with 0s
+ * and -1 in case of error.
+ */
+static int default_normalizer(struct htx *htx, struct ist hdr_name,
+ char *buf, unsigned int *buf_len)
+{
+ int retval = 1;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+
+ if (http_find_header(htx, hdr_name, &ctx, 1)) {
+ retval = 0;
+ write_u32(buf, hash_crc32(istptr(ctx.value), istlen(ctx.value)));
+ *buf_len = sizeof(int);
+ }
+
+ return retval;
+}
+
+/*
+ * Accept-Encoding bitmap comparison function.
+ * Returns 0 if the bitmaps are compatible.
+ */
+static int accept_encoding_bitmap_cmp(const void *ref, const void *new, unsigned int len)
+{
+ uint32_t ref_bitmap = read_u32(ref);
+ uint32_t new_bitmap = read_u32(new);
+
+ if (!(ref_bitmap & VARY_ENCODING_OTHER)) {
+ /* All the bits set in the reference bitmap correspond to the
+ * stored response' encoding and should all be set in the new
+ * encoding bitmap in order for the client to be able to manage
+ * the response.
+ *
+ * If this is the case the cached response has encodings that
+ * are accepted by the client. It can be served directly by
+ * the cache (as far as the accept-encoding part is concerned).
+ */
+
+ return (ref_bitmap & new_bitmap) != ref_bitmap;
+ }
+ else {
+ return 1;
+ }
+}
+
+
+/*
+ * Pre-calculate the hashes of all the supported headers (in our Vary
+ * implementation) of a given request. We have to calculate all the hashes
+ * in advance because the actual Vary signature won't be known until the first
+ * response.
+ * Only the first occurrence of every header will be taken into account in the
+ * hash.
+ * If the header is not present, the hash portion of the given header will be
+ * filled with zeros.
+ * Returns 0 in case of success.
+ */
+static int http_request_prebuild_full_secondary_key(struct stream *s)
+{
+ /* The fake signature (second parameter) will ensure that every part of the
+ * secondary key is calculated. */
+ return http_request_build_secondary_key(s, ~0);
+}
+
+
+/*
+ * Calculate the secondary key for a request for which we already have a known
+ * vary signature. The key is made by aggregating hashes calculated for every
+ * header mentioned in the vary signature.
+ * Only the first occurrence of every header will be taken into account in the
+ * hash.
+ * If the header is not present, the hash portion of the given header will be
+ * filled with zeros.
+ * Returns 0 in case of success.
+ */
+static int http_request_build_secondary_key(struct stream *s, int vary_signature)
+{
+ struct http_txn *txn = s->txn;
+ struct htx *htx = htxbuf(&s->req.buf);
+
+ unsigned int idx;
+ const struct vary_hashing_information *info = NULL;
+ unsigned int hash_length = 0;
+ int retval = 0;
+ int offset = 0;
+
+ for (idx = 0; idx < sizeof(vary_information)/sizeof(*vary_information) && retval >= 0; ++idx) {
+ info = &vary_information[idx];
+
+ /* The normalizing functions will be in charge of getting the
+ * header values from the htx. This way they can manage multiple
+ * occurrences of their processed header. */
+ if ((vary_signature & info->value) && info->norm_fn != NULL &&
+ !(retval = info->norm_fn(htx, info->hdr_name, &txn->cache_secondary_hash[offset], &hash_length))) {
+ offset += hash_length;
+ }
+ else {
+ /* Fill hash with 0s. */
+ hash_length = info->hash_length;
+ memset(&txn->cache_secondary_hash[offset], 0, hash_length);
+ offset += hash_length;
+ }
+ }
+
+ if (retval >= 0)
+ txn->flags |= TX_CACHE_HAS_SEC_KEY;
+
+ return (retval < 0);
+}
+
+/*
+ * Build the actual secondary key of a given request out of the prebuilt key and
+ * the actual vary signature (extracted from the response).
+ * Returns 0 in case of success.
+ */
+static int http_request_reduce_secondary_key(unsigned int vary_signature,
+ char prebuilt_key[HTTP_CACHE_SEC_KEY_LEN])
+{
+ int offset = 0;
+ int global_offset = 0;
+ int vary_info_count = 0;
+ int keep = 0;
+ unsigned int vary_idx;
+ const struct vary_hashing_information *vary_info;
+
+ vary_info_count = sizeof(vary_information)/sizeof(*vary_information);
+ for (vary_idx = 0; vary_idx < vary_info_count; ++vary_idx) {
+ vary_info = &vary_information[vary_idx];
+ keep = (vary_signature & vary_info->value) ? 0xff : 0;
+
+ for (offset = 0; offset < vary_info->hash_length; ++offset,++global_offset) {
+ prebuilt_key[global_offset] &= keep;
+ }
+ }
+
+ return 0;
+}
+
+
+
+static int
+parse_cache_flt(char **args, int *cur_arg, struct proxy *px,
+ struct flt_conf *fconf, char **err, void *private)
+{
+ struct flt_conf *f, *back;
+ struct cache_flt_conf *cconf = NULL;
+ char *name = NULL;
+ int pos = *cur_arg;
+
+ /* Get the cache filter name. <pos> point on "cache" keyword */
+ if (!*args[pos + 1]) {
+ memprintf(err, "%s : expects a <name> argument", args[pos]);
+ goto error;
+ }
+ name = strdup(args[pos + 1]);
+ if (!name) {
+ memprintf(err, "%s '%s' : out of memory", args[pos], args[pos + 1]);
+ goto error;
+ }
+ pos += 2;
+
+ /* Check if an implicit filter with the same name already exists. If so,
+ * we remove the implicit filter to use the explicit one. */
+ list_for_each_entry_safe(f, back, &px->filter_configs, list) {
+ if (f->id != cache_store_flt_id)
+ continue;
+
+ cconf = f->conf;
+ if (strcmp(name, cconf->c.name) != 0) {
+ cconf = NULL;
+ continue;
+ }
+
+ if (!(cconf->flags & CACHE_FLT_F_IMPLICIT_DECL)) {
+ cconf = NULL;
+ memprintf(err, "%s: multiple explicit declarations of the cache filter '%s'",
+ px->id, name);
+ goto error;
+ }
+
+ /* Remove the implicit filter. <cconf> is kept for the explicit one */
+ LIST_DELETE(&f->list);
+ free(f);
+ free(name);
+ break;
+ }
+
+ /* No implicit cache filter found, create configuration for the explicit one */
+ if (!cconf) {
+ cconf = calloc(1, sizeof(*cconf));
+ if (!cconf) {
+ memprintf(err, "%s: out of memory", args[*cur_arg]);
+ goto error;
+ }
+ cconf->c.name = name;
+ }
+
+ cconf->flags = 0;
+ fconf->id = cache_store_flt_id;
+ fconf->conf = cconf;
+ fconf->ops = &cache_ops;
+
+ *cur_arg = pos;
+ return 0;
+
+ error:
+ free(name);
+ free(cconf);
+ return -1;
+}
+
+/* It reserves a struct show_cache_ctx for the local variables */
+static int cli_parse_show_cache(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_cache_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ ctx->cache = LIST_ELEM((caches).n, typeof(struct cache *), list);
+ return 0;
+}
+
+/* It uses a struct show_cache_ctx for the local variables */
+static int cli_io_handler_show_cache(struct appctx *appctx)
+{
+ struct show_cache_ctx *ctx = appctx->svcctx;
+ struct cache* cache = ctx->cache;
+
+ list_for_each_entry_from(cache, &caches, list) {
+ struct eb32_node *node = NULL;
+ unsigned int next_key;
+ struct cache_entry *entry;
+ unsigned int i;
+
+ next_key = ctx->next_key;
+ if (!next_key) {
+ chunk_printf(&trash, "%p: %s (shctx:%p, available blocks:%d)\n", cache, cache->id, shctx_ptr(cache), shctx_ptr(cache)->nbav);
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+ }
+
+ ctx->cache = cache;
+
+ while (1) {
+
+ shctx_lock(shctx_ptr(cache));
+ node = eb32_lookup_ge(&cache->entries, next_key);
+ if (!node) {
+ shctx_unlock(shctx_ptr(cache));
+ ctx->next_key = 0;
+ break;
+ }
+
+ entry = container_of(node, struct cache_entry, eb);
+ next_key = node->key + 1;
+
+ if (entry->expire > date.tv_sec) {
+ chunk_printf(&trash, "%p hash:%u vary:0x", entry, read_u32(entry->hash));
+ for (i = 0; i < HTTP_CACHE_SEC_KEY_LEN; ++i)
+ chunk_appendf(&trash, "%02x", (unsigned char)entry->secondary_key[i]);
+ chunk_appendf(&trash, " size:%u (%u blocks), refcount:%u, expire:%d\n",
+ block_ptr(entry)->len, block_ptr(entry)->block_count,
+ block_ptr(entry)->refcount, entry->expire - (int)date.tv_sec);
+ } else {
+ /* time to remove that one */
+ delete_entry(entry);
+ entry->eb.key = 0;
+ }
+
+ ctx->next_key = next_key;
+
+ shctx_unlock(shctx_ptr(cache));
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+ }
+
+ }
+
+ return 1;
+
+}
+
+
+/*
+ * boolean, returns true if response was built out of a cache entry.
+ */
+static int
+smp_fetch_res_cache_hit(const struct arg *args, struct sample *smp,
+ const char *kw, void *private)
+{
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = (smp->strm ? (smp->strm->target == &http_cache_applet.obj_type) : 0);
+
+ return 1;
+}
+
+/*
+ * string, returns cache name (if response came from a cache).
+ */
+static int
+smp_fetch_res_cache_name(const struct arg *args, struct sample *smp,
+ const char *kw, void *private)
+{
+ struct appctx *appctx = NULL;
+
+ struct cache_flt_conf *cconf = NULL;
+ struct cache *cache = NULL;
+
+ if (!smp->strm || smp->strm->target != &http_cache_applet.obj_type)
+ return 0;
+
+ /* Get appctx from the stream connector. */
+ appctx = sc_appctx(smp->strm->scb);
+ if (appctx && appctx->rule) {
+ cconf = appctx->rule->arg.act.p[0];
+ if (cconf) {
+ cache = cconf->c.cache;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.area = cache->id;
+ smp->data.u.str.data = strlen(cache->id);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/* Declare the filter parser for "cache" keyword */
+static struct flt_kw_list filter_kws = { "CACHE", { }, {
+ { "cache", parse_cache_flt, NULL },
+ { NULL, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, flt_register_keywords, &filter_kws);
+
+static struct cli_kw_list cli_kws = {{},{
+ { { "show", "cache", NULL }, "show cache : show cache status", cli_parse_show_cache, cli_io_handler_show_cache, NULL, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+static struct action_kw_list http_res_actions = {
+ .kw = {
+ { "cache-store", parse_cache_store },
+ { NULL, NULL }
+ }
+};
+
+INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_actions);
+
+static struct action_kw_list http_req_actions = {
+ .kw = {
+ { "cache-use", parse_cache_use },
+ { NULL, NULL }
+ }
+};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_actions);
+
+struct applet http_cache_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<CACHE>", /* used for logging */
+ .fct = http_cache_io_handler,
+ .release = http_cache_applet_release,
+};
+
+/* config parsers for this section */
+REGISTER_CONFIG_SECTION("cache", cfg_parse_cache, cfg_post_parse_section_cache);
+REGISTER_POST_CHECK(post_check_cache);
+
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "res.cache_hit", smp_fetch_res_cache_hit, 0, NULL, SMP_T_BOOL, SMP_USE_HRSHP, SMP_VAL_RESPONSE },
+ { "res.cache_name", smp_fetch_res_cache_name, 0, NULL, SMP_T_STR, SMP_USE_HRSHP, SMP_VAL_RESPONSE },
+ { /* END */ },
+ }
+};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
diff --git a/src/calltrace.c b/src/calltrace.c
new file mode 100644
index 0000000..f72e121
--- /dev/null
+++ b/src/calltrace.c
@@ -0,0 +1,286 @@
+/*
+ * Function call tracing for gcc >= 2.95
+ * WARNING! THIS CODE IS NOT THREAD-SAFE!
+ *
+ * Copyright 2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * gcc is able to call a specific function when entering and leaving any
+ * function when compiled with -finstrument-functions. This code must not
+ * be built with this argument. The performance impact is huge, so this
+ * feature should only be used when debugging.
+ *
+ * The entry and exits of all functions will be dumped into a file designated
+ * by the HAPROXY_TRACE environment variable, or by default "trace.out". If the
+ * trace file name is empty or "/dev/null", then traces are disabled. If
+ * opening the trace file fails, then stderr is used. If HAPROXY_TRACE_FAST is
+ * used, then the time is taken from the global <now> variable. Last, if
+ * HAPROXY_TRACE_TSC is used, then the machine's TSC is used instead of the
+ * real time (almost twice as fast).
+ *
+ * The output format is :
+ *
+ * <sec.usec> <level> <caller_ptr> <dir> <callee_ptr>
+ * or :
+ * <tsc> <level> <caller_ptr> <dir> <callee_ptr>
+ *
+ * where <dir> is '>' when entering a function and '<' when leaving.
+ *
+ * It is also possible to emit comments using the calltrace() function which uses
+ * the printf() format. Such comments are then inserted by replacing the caller
+ * pointer with a sharp ('#') like this :
+ *
+ * <sec.usec> <level> # <comment>
+ * or :
+ * <tsc> <level> # <comment>
+ *
+ * The article below is a nice explanation of how this works :
+ * http://balau82.wordpress.com/2010/10/06/trace-and-profile-function-calls-with-gcc/
+ */
+
+#include <sys/time.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <haproxy/api.h>
+#include <haproxy/clock.h>
+#include <haproxy/tools.h>
+
+static FILE *log;
+static int level;
+static int disabled;
+static int fast_time;
+static int use_tsc;
+static struct timeval trace_now;
+static struct timeval *now_ptr;
+static char line[128]; /* more than enough for a message (9+1+6+1+3+1+18+1+1+18+1+1) */
+
+static int open_trace()
+{
+ const char *output = getenv("HAPROXY_TRACE");
+
+ if (!output)
+ output = "trace.out";
+
+ if (!*output || strcmp(output, "/dev/null") == 0) {
+ disabled = 1;
+ return 0;
+ }
+
+ log = fopen(output, "w");
+ if (!log)
+ log = stderr;
+
+ now_ptr = &now;
+ if (getenv("HAPROXY_TRACE_FAST") != NULL) {
+ fast_time = 1;
+ now_ptr = &trace_now;
+ }
+ if (getenv("HAPROXY_TRACE_TSC") != NULL) {
+ fast_time = 1;
+ use_tsc = 1;
+ }
+ return 1;
+}
+
+/* This function first divides the number by 100M then iteratively multiplies it
+ * by 100 (using adds and shifts). The trick is that dividing by 100M is equivalent
+ * to multiplying by 1/100M, which approximates to 1441151881/2^57. All local
+ * variables fit in registers on x86. This version outputs two digits per round.
+ * <min_pairs> indicates the minimum number of pairs of digits that have to be
+ * emitted, which might be left-padded with zeroes.
+ * It returns the pointer to the ending '\0'.
+ */
+static char *ultoad2(unsigned int x, char *out, int min_pairs)
+{
+ unsigned int q;
+ char *p = out;
+ int pos = 4;
+ unsigned long long y;
+
+ static const unsigned short bcd[100] = {
+ 0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930,
+ 0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, 0x3831, 0x3931,
+ 0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, 0x3732, 0x3832, 0x3932,
+ 0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, 0x3633, 0x3733, 0x3833, 0x3933,
+ 0x3034, 0x3134, 0x3234, 0x3334, 0x3434, 0x3534, 0x3634, 0x3734, 0x3834, 0x3934,
+ 0x3035, 0x3135, 0x3235, 0x3335, 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935,
+ 0x3036, 0x3136, 0x3236, 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936,
+ 0x3037, 0x3137, 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937,
+ 0x3038, 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938,
+ 0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, 0x3939 };
+
+ y = x * 1441151881ULL; /* y>>57 will be the integer part of x/100M */
+ while (1) {
+ q = y >> 57;
+ /* Q is composed of the first digit in the lower byte and the second
+ * digit in the higher byte.
+ */
+ if (p != out || q > 9 || pos < min_pairs) {
+#if defined(__i386__) || defined(__x86_64__)
+ /* unaligned accesses are fast on x86 */
+ *(unsigned short *)p = bcd[q];
+ p += 2;
+#else
+ *(p++) = bcd[q];
+ *(p++) = bcd[q] >> 8;
+#endif
+ }
+ else if (q || !pos) {
+ /* only at most one digit */
+ *(p++) = bcd[q] >> 8;
+ }
+ if (--pos < 0)
+ break;
+
+ y &= 0x1FFFFFFFFFFFFFFULL; // remainder
+
+ if (sizeof(long) >= sizeof(long long)) {
+ /* shifting is preferred on 64-bit archs, while mult is faster on 32-bit.
+ * We multiply by 100 by doing *5, *5 and *4, all of which are trivial.
+ */
+ y += (y << 2);
+ y += (y << 2);
+ y <<= 2;
+ }
+ else
+ y *= 100;
+ }
+
+ *p = '\0';
+ return p;
+}
+
+/* Send <h> as hex into <out>. Returns the pointer to the ending '\0'. */
+static char *emit_hex(unsigned long h, char *out)
+{
+ static unsigned char hextab[16] = "0123456789abcdef";
+ int shift = sizeof(h) * 8 - 4;
+ unsigned int idx;
+
+ do {
+ idx = (h >> shift);
+ if (idx || !shift)
+ *out++ = hextab[idx & 15];
+ shift -= 4;
+ } while (shift >= 0);
+ *out = '\0';
+ return out;
+}
+
+static void make_line(void *from, void *to, int level, char dir, long ret)
+{
+ char *p = line;
+
+ if (unlikely(!log) && !open_trace())
+ return;
+
+ if (unlikely(!fast_time))
+ gettimeofday(now_ptr, NULL);
+
+#ifdef USE_SLOW_FPRINTF
+ if (!use_tsc)
+ fprintf(log, "%u.%06u %d %p %c %p\n",
+ (unsigned int)now_ptr->tv_sec,
+ (unsigned int)now_ptr->tv_usec,
+ level, from, dir, to);
+ else
+ fprintf(log, "%llx %d %p %c %p\n",
+ rdtsc(), level, from, dir, to);
+ return;
+#endif
+
+ if (unlikely(!use_tsc)) {
+ /* "%u.06u", tv_sec, tv_usec */
+ p = ultoad2(now_ptr->tv_sec, p, 0);
+ *p++ = '.';
+ p = ultoad2(now_ptr->tv_usec, p, 3);
+ } else {
+ /* "%08x%08x", high, low */
+ unsigned long long t = rdtsc();
+ if (sizeof(long) < sizeof(long long))
+ p = emit_hex((unsigned long)(t >> 32U), p);
+ p = emit_hex((unsigned long)(t), p);
+ }
+
+ /* " %u", level */
+ *p++ = ' ';
+ p = ultoad2(level, p, 0);
+
+ /* " %p", from */
+ *p++ = ' '; *p++ = '0'; *p++ = 'x';
+ p = emit_hex((unsigned long)from, p);
+
+ /* " %c", dir */
+ *p++ = ' '; *p++ = dir;
+
+ /* " %p", to */
+ *p++ = ' '; *p++ = '0'; *p++ = 'x';
+ p = emit_hex((unsigned long)to, p);
+
+ if (dir == '<') {
+ /* " %x", ret */
+ *p++ = ' '; *p++ = '0'; *p++ = 'x';
+ p = emit_hex(ret, p);
+ }
+
+ *p++ = '\n';
+
+ fwrite(line, p - line, 1, log);
+}
+
+/* These are the functions GCC calls */
+void __cyg_profile_func_enter(void *to, void *from)
+{
+ if (!disabled)
+ return make_line(from, to, ++level, '>', 0);
+}
+
+void __cyg_profile_func_exit(void *to, void *from)
+{
+ long ret = 0;
+
+#if defined(__x86_64__)
+ /* on x86_64, the return value (eax) is temporarily stored in ebx
+ * during the call to __cyg_profile_func_exit() so we can snoop it.
+ */
+ asm volatile("mov %%rbx, %0" : "=r"(ret));
+#endif
+ if (!disabled)
+ return make_line(from, to, level--, '<', ret);
+}
+
+/* the one adds comments in the trace above. The output format is :
+ * <timestamp> <level> # <string>
+ */
+__attribute__((format(printf, 1, 2)))
+void calltrace(char *fmt, ...)
+{
+ va_list ap;
+
+ if (unlikely(!log) && !open_trace())
+ return;
+
+ if (unlikely(!fast_time))
+ gettimeofday(now_ptr, NULL);
+
+ if (!use_tsc)
+ fprintf(log, "%u.%06u %d # ",
+ (unsigned int)now_ptr->tv_sec,
+ (unsigned int)now_ptr->tv_usec,
+ level + 1);
+ else
+ fprintf(log, "%llx %d # ",
+ rdtsc(), level + 1);
+
+ va_start(ap, fmt);
+ vfprintf(log, fmt, ap);
+ va_end(ap);
+ fputc('\n', log);
+ fflush(log);
+}
diff --git a/src/cbuf.c b/src/cbuf.c
new file mode 100644
index 0000000..b36bbeb
--- /dev/null
+++ b/src/cbuf.c
@@ -0,0 +1,59 @@
+/*
+ * Circular buffer management
+ *
+ * Copyright 2021 HAProxy Technologies, Frederic Lecaille <flecaill@haproxy.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <haproxy/list.h>
+#include <haproxy/pool.h>
+#include <haproxy/cbuf-t.h>
+
+DECLARE_POOL(pool_head_cbuf, "cbuf", sizeof(struct cbuf));
+
+/* Allocate and return a new circular buffer with <buf> as <sz> byte internal buffer
+ * if succeeded, NULL if not.
+ */
+struct cbuf *cbuf_new(unsigned char *buf, size_t sz)
+{
+ struct cbuf *cbuf;
+
+ cbuf = pool_alloc(pool_head_cbuf);
+ if (cbuf) {
+ cbuf->sz = sz;
+ cbuf->buf = buf;
+ cbuf->wr = 0;
+ cbuf->rd = 0;
+ }
+
+ return cbuf;
+}
+
+/* Free QUIC ring <cbuf> */
+void cbuf_free(struct cbuf *cbuf)
+{
+ if (!cbuf)
+ return;
+
+ pool_free(pool_head_cbuf, cbuf);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/cfgcond.c b/src/cfgcond.c
new file mode 100644
index 0000000..5fb7069
--- /dev/null
+++ b/src/cfgcond.c
@@ -0,0 +1,521 @@
+/*
+ * Configuration condition preprocessor
+ *
+ * Copyright 2000-2021 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/cfgcond.h>
+#include <haproxy/global.h>
+#include <haproxy/tools.h>
+
+/* supported condition predicates */
+const struct cond_pred_kw cond_predicates[] = {
+ { "defined", CFG_PRED_DEFINED, ARG1(1, STR) },
+ { "feature", CFG_PRED_FEATURE, ARG1(1, STR) },
+ { "streq", CFG_PRED_STREQ, ARG2(2, STR, STR) },
+ { "strneq", CFG_PRED_STRNEQ, ARG2(2, STR, STR) },
+ { "version_atleast", CFG_PRED_VERSION_ATLEAST, ARG1(1, STR) },
+ { "version_before", CFG_PRED_VERSION_BEFORE, ARG1(1, STR) },
+ { "openssl_version_atleast", CFG_PRED_OSSL_VERSION_ATLEAST, ARG1(1, STR) },
+ { "openssl_version_before", CFG_PRED_OSSL_VERSION_BEFORE, ARG1(1, STR) },
+ { "ssllib_name_startswith", CFG_PRED_SSLLIB_NAME_STARTSWITH, ARG1(1, STR) },
+ { NULL, CFG_PRED_NONE, 0 }
+};
+
+/* looks up a cond predicate matching the keyword in <str>, possibly followed
+ * by a parenthesis. Returns a pointer to it or NULL if not found.
+ */
+const struct cond_pred_kw *cfg_lookup_cond_pred(const char *str)
+{
+ const struct cond_pred_kw *ret;
+ int len = strcspn(str, " (");
+
+ for (ret = &cond_predicates[0]; ret->word; ret++) {
+ if (len != strlen(ret->word))
+ continue;
+ if (strncmp(str, ret->word, len) != 0)
+ continue;
+ return ret;
+ }
+ return NULL;
+}
+
+/* Frees <term> and its args. NULL is supported and does nothing. */
+void cfg_free_cond_term(struct cfg_cond_term *term)
+{
+ if (!term)
+ return;
+
+ if (term->type == CCTT_PAREN) {
+ cfg_free_cond_expr(term->expr);
+ term->expr = NULL;
+ }
+
+ free_args(term->args);
+ free(term->args);
+ free(term);
+}
+
+/* Parse an indirect input text as a possible config condition term.
+ * Returns <0 on parsing error, 0 if the parser is desynchronized, or >0 on
+ * success. <term> is allocated and filled with the parsed info, and <text>
+ * is updated on success to point to the first unparsed character, or is left
+ * untouched on failure. On success, the caller must free <term> using
+ * cfg_free_cond_term(). An error will be set in <err> on error, and only
+ * in this case. In this case the first bad character will be reported in
+ * <errptr>. <maxdepth> corresponds to the maximum recursion depth permitted,
+ * it is decremented on each recursive call and the parsing will fail one
+ * reaching <= 0.
+ */
+int cfg_parse_cond_term(const char **text, struct cfg_cond_term **term, char **err, const char **errptr, int maxdepth)
+{
+ struct cfg_cond_term *t;
+ const char *in = *text;
+ const char *end_ptr;
+ int err_arg;
+ int nbargs;
+ char *end;
+ long val;
+
+ while (*in == ' ' || *in == '\t')
+ in++;
+
+ if (!*in) /* empty term does not parse */
+ return 0;
+
+ *term = NULL;
+ if (maxdepth <= 0)
+ goto fail0;
+
+ t = *term = calloc(1, sizeof(**term));
+ if (!t) {
+ memprintf(err, "memory allocation error while parsing conditional expression '%s'", *text);
+ goto fail1;
+ }
+
+ t->type = CCTT_NONE;
+ t->args = NULL;
+ t->neg = 0;
+
+ /* !<term> negates the term. White spaces permitted */
+ while (*in == '!') {
+ t->neg = !t->neg;
+ do { in++; } while (*in == ' ' || *in == '\t');
+ }
+
+ val = strtol(in, &end, 0);
+ if (end != in) {
+ t->type = val ? CCTT_TRUE : CCTT_FALSE;
+ *text = end;
+ return 1;
+ }
+
+ /* Try to parse '(' EXPR ')' */
+ if (*in == '(') {
+ int ret;
+
+ t->type = CCTT_PAREN;
+ t->args = NULL;
+
+ do { in++; } while (*in == ' ' || *in == '\t');
+ ret = cfg_parse_cond_expr(&in, &t->expr, err, errptr, maxdepth - 1);
+ if (ret == -1)
+ goto fail2;
+ if (ret == 0)
+ goto fail0;
+
+ /* find the closing ')' */
+ while (*in == ' ' || *in == '\t')
+ in++;
+ if (*in != ')') {
+ memprintf(err, "expected ')' after conditional expression '%s'", *text);
+ goto fail1;
+ }
+ do { in++; } while (*in == ' ' || *in == '\t');
+ *text = in;
+ return 1;
+ }
+
+ /* below we'll likely all make_arg_list() so we must return only via
+ * the <done> label which frees the arg list.
+ */
+ t->pred = cfg_lookup_cond_pred(in);
+ if (t->pred) {
+ t->type = CCTT_PRED;
+ nbargs = make_arg_list(in + strlen(t->pred->word), -1,
+ t->pred->arg_mask, &t->args, err,
+ &end_ptr, &err_arg, NULL);
+ if (nbargs < 0) {
+ memprintf(err, "%s in argument %d of predicate '%s' used in conditional expression", *err, err_arg, t->pred->word);
+ if (errptr)
+ *errptr = end_ptr;
+ goto fail2;
+ }
+ *text = end_ptr;
+ return 1;
+ }
+
+ fail0:
+ memprintf(err, "unparsable conditional expression '%s'", *text);
+ fail1:
+ if (errptr)
+ *errptr = *text;
+ fail2:
+ cfg_free_cond_term(*term);
+ *term = NULL;
+ return -1;
+}
+
+/* evaluate a condition term on a .if/.elif line. The condition was already
+ * parsed in <term>. Returns -1 on error (in which case err is filled with a
+ * message, and only in this case), 0 if the condition is false, 1 if it's
+ * true.
+ */
+int cfg_eval_cond_term(const struct cfg_cond_term *term, char **err)
+{
+ int ret = -1;
+
+ if (term->type == CCTT_FALSE)
+ ret = 0;
+ else if (term->type == CCTT_TRUE)
+ ret = 1;
+ else if (term->type == CCTT_PRED) {
+ /* here we know we have a valid predicate with valid arguments
+ * placed in term->args (which the caller will free).
+ */
+ switch (term->pred->prd) {
+ case CFG_PRED_DEFINED: // checks if arg exists as an environment variable
+ ret = getenv(term->args[0].data.str.area) != NULL;
+ break;
+
+ case CFG_PRED_FEATURE: { // checks if the arg matches an enabled feature
+ const char *p;
+
+ ret = 0; // assume feature not found
+ for (p = build_features; (p = strstr(p, term->args[0].data.str.area)); p++) {
+ if (p > build_features &&
+ (p[term->args[0].data.str.data] == ' ' ||
+ p[term->args[0].data.str.data] == 0)) {
+ if (*(p-1) == '+') { // e.g. "+OPENSSL"
+ ret = 1;
+ break;
+ }
+ else if (*(p-1) == '-') { // e.g. "-OPENSSL"
+ ret = 0;
+ break;
+ }
+ /* it was a sub-word, let's restart from next place */
+ }
+ }
+ break;
+ }
+ case CFG_PRED_STREQ: // checks if the two arg are equal
+ ret = strcmp(term->args[0].data.str.area, term->args[1].data.str.area) == 0;
+ break;
+
+ case CFG_PRED_STRNEQ: // checks if the two arg are different
+ ret = strcmp(term->args[0].data.str.area, term->args[1].data.str.area) != 0;
+ break;
+
+ case CFG_PRED_VERSION_ATLEAST: // checks if the current version is at least this one
+ ret = compare_current_version(term->args[0].data.str.area) <= 0;
+ break;
+
+ case CFG_PRED_VERSION_BEFORE: // checks if the current version is older than this one
+ ret = compare_current_version(term->args[0].data.str.area) > 0;
+ break;
+
+ case CFG_PRED_OSSL_VERSION_ATLEAST: { // checks if the current openssl version is at least this one
+ int opensslret = openssl_compare_current_version(term->args[0].data.str.area);
+
+ if (opensslret < -1) /* can't parse the string or no openssl available */
+ ret = -1;
+ else
+ ret = opensslret <= 0;
+ break;
+ }
+ case CFG_PRED_OSSL_VERSION_BEFORE: { // checks if the current openssl version is older than this one
+ int opensslret = openssl_compare_current_version(term->args[0].data.str.area);
+
+ if (opensslret < -1) /* can't parse the string or no openssl available */
+ ret = -1;
+ else
+ ret = opensslret > 0;
+ break;
+ }
+ case CFG_PRED_SSLLIB_NAME_STARTSWITH: { // checks if the current SSL library's name starts with a specified string (can be used to distinguish OpenSSL from LibreSSL or BoringSSL)
+ ret = openssl_compare_current_name(term->args[0].data.str.area) == 0;
+ break;
+ }
+ default:
+ memprintf(err, "internal error: unhandled conditional expression predicate '%s'", term->pred->word);
+ break;
+ }
+ }
+ else if (term->type == CCTT_PAREN) {
+ ret = cfg_eval_cond_expr(term->expr, err);
+ }
+ else {
+ memprintf(err, "internal error: unhandled condition term type %d", (int)term->type);
+ }
+
+ if (ret >= 0 && term->neg)
+ ret = !ret;
+ return ret;
+}
+
+
+/* Frees <expr> and its terms and args. NULL is supported and does nothing. */
+void cfg_free_cond_and(struct cfg_cond_and *expr)
+{
+ struct cfg_cond_and *prev;
+
+ while (expr) {
+ cfg_free_cond_term(expr->left);
+ prev = expr;
+ expr = expr->right;
+ free(prev);
+ }
+}
+
+/* Frees <expr> and its terms and args. NULL is supported and does nothing. */
+void cfg_free_cond_expr(struct cfg_cond_expr *expr)
+{
+ struct cfg_cond_expr *prev;
+
+ while (expr) {
+ cfg_free_cond_and(expr->left);
+ prev = expr;
+ expr = expr->right;
+ free(prev);
+ }
+}
+
+/* Parse an indirect input text as a possible config condition sub-expr.
+ * Returns <0 on parsing error, 0 if the parser is desynchronized, or >0 on
+ * success. <expr> is filled with the parsed info, and <text> is updated on
+ * success to point to the first unparsed character, or is left untouched
+ * on failure. On success, the caller will have to free all lower-level
+ * allocated structs using cfg_free_cond_expr(). An error will be set in
+ * <err> on error, and only in this case. In this case the first bad
+ * character will be reported in <errptr>. <maxdepth> corresponds to the
+ * maximum recursion depth permitted, it is decremented on each recursive
+ * call and the parsing will fail one reaching <= 0.
+ */
+int cfg_parse_cond_and(const char **text, struct cfg_cond_and **expr, char **err, const char **errptr, int maxdepth)
+{
+ struct cfg_cond_and *e;
+ const char *in = *text;
+ int ret = -1;
+
+ if (!*in) /* empty expr does not parse */
+ return 0;
+
+ *expr = NULL;
+ if (maxdepth <= 0) {
+ memprintf(err, "unparsable conditional sub-expression '%s'", in);
+ if (errptr)
+ *errptr = in;
+ goto done;
+ }
+
+ e = *expr = calloc(1, sizeof(**expr));
+ if (!e) {
+ memprintf(err, "memory allocation error while parsing conditional expression '%s'", *text);
+ goto done;
+ }
+
+ ret = cfg_parse_cond_term(&in, &e->left, err, errptr, maxdepth - 1);
+ if (ret == -1) // parse error, error already reported
+ goto done;
+
+ if (ret == 0) {
+ /* ret == 0, no other way to parse this */
+ memprintf(err, "unparsable conditional sub-expression '%s'", in);
+ if (errptr)
+ *errptr = in;
+ ret = -1;
+ goto done;
+ }
+
+ /* ret=1, we have a term in the left hand set */
+
+ /* find an optional '&&' */
+ while (*in == ' ' || *in == '\t')
+ in++;
+
+ *text = in;
+ if (in[0] != '&' || in[1] != '&')
+ goto done;
+
+ /* we have a '&&', let's parse the right handset's subexp */
+ in += 2;
+ while (*in == ' ' || *in == '\t')
+ in++;
+
+ ret = cfg_parse_cond_and(&in, &e->right, err, errptr, maxdepth - 1);
+ if (ret > 0)
+ *text = in;
+ done:
+ if (ret < 0) {
+ cfg_free_cond_and(*expr);
+ *expr = NULL;
+ }
+ return ret;
+}
+
+/* Parse an indirect input text as a possible config condition term.
+ * Returns <0 on parsing error, 0 if the parser is desynchronized, or >0 on
+ * success. <expr> is filled with the parsed info, and <text> is updated on
+ * success to point to the first unparsed character, or is left untouched
+ * on failure. On success, the caller will have to free all lower-level
+ * allocated structs using cfg_free_cond_expr(). An error will be set in
+ * <err> on error, and only in this case. In this case the first bad
+ * character will be reported in <errptr>. <maxdepth> corresponds to the
+ * maximum recursion depth permitted, it is decremented on each recursive call
+ * and the parsing will fail one reaching <= 0.
+ */
+int cfg_parse_cond_expr(const char **text, struct cfg_cond_expr **expr, char **err, const char **errptr, int maxdepth)
+{
+ struct cfg_cond_expr *e;
+ const char *in = *text;
+ int ret = -1;
+
+ if (!*in) /* empty expr does not parse */
+ return 0;
+
+ *expr = NULL;
+ if (maxdepth <= 0) {
+ memprintf(err, "unparsable conditional expression '%s'", in);
+ if (errptr)
+ *errptr = in;
+ goto done;
+ }
+
+ e = *expr = calloc(1, sizeof(**expr));
+ if (!e) {
+ memprintf(err, "memory allocation error while parsing conditional expression '%s'", *text);
+ goto done;
+ }
+
+ ret = cfg_parse_cond_and(&in, &e->left, err, errptr, maxdepth - 1);
+ if (ret == -1) // parse error, error already reported
+ goto done;
+
+ if (ret == 0) {
+ /* ret == 0, no other way to parse this */
+ memprintf(err, "unparsable conditional expression '%s'", in);
+ if (errptr)
+ *errptr = in;
+ ret = -1;
+ goto done;
+ }
+
+ /* ret=1, we have a sub-expr in the left hand set */
+
+ /* find an optional '||' */
+ while (*in == ' ' || *in == '\t')
+ in++;
+
+ *text = in;
+ if (in[0] != '|' || in[1] != '|')
+ goto done;
+
+ /* we have a '||', let's parse the right handset's subexp */
+ in += 2;
+ while (*in == ' ' || *in == '\t')
+ in++;
+
+ ret = cfg_parse_cond_expr(&in, &e->right, err, errptr, maxdepth - 1);
+ if (ret > 0)
+ *text = in;
+ done:
+ if (ret < 0) {
+ cfg_free_cond_expr(*expr);
+ *expr = NULL;
+ }
+ return ret;
+}
+
+/* evaluate an sub-expression on a .if/.elif line. The expression is valid and
+ * was already parsed in <expr>. Returns -1 on error (in which case err is
+ * filled with a message, and only in this case), 0 if the condition is false,
+ * 1 if it's true.
+ */
+int cfg_eval_cond_and(struct cfg_cond_and *expr, char **err)
+{
+ int ret;
+
+ /* AND: loop on terms and sub-exp's terms as long as they're TRUE
+ * (stop on FALSE and ERROR).
+ */
+ while ((ret = cfg_eval_cond_term(expr->left, err)) > 0 && expr->right)
+ expr = expr->right;
+ return ret;
+}
+
+/* evaluate an expression on a .if/.elif line. The expression is valid and was
+ * already parsed in <expr>. Returns -1 on error (in which case err is filled
+ * with a message, and only in this case), 0 if the condition is false, 1 if
+ * it's true.
+ */
+int cfg_eval_cond_expr(struct cfg_cond_expr *expr, char **err)
+{
+ int ret;
+
+ /* OR: loop on sub-exps as long as they're FALSE (stop on TRUE and ERROR) */
+ while ((ret = cfg_eval_cond_and(expr->left, err)) == 0 && expr->right)
+ expr = expr->right;
+ return ret;
+}
+
+/* evaluate a condition on a .if/.elif line. The condition is already tokenized
+ * in <err>. Returns -1 on error (in which case err is filled with a message,
+ * and only in this case), 0 if the condition is false, 1 if it's true. If
+ * <errptr> is not NULL, it's set to the first invalid character on error.
+ */
+int cfg_eval_condition(char **args, char **err, const char **errptr)
+{
+ struct cfg_cond_expr *expr = NULL;
+ const char *text = args[0];
+ int ret = -1;
+
+ if (!*text) /* note: empty = false */
+ return 0;
+
+ ret = cfg_parse_cond_expr(&text, &expr, err, errptr, MAX_CFG_RECURSION);
+ if (ret != 0) {
+ if (ret == -1) // parse error, error already reported
+ goto done;
+ while (*text == ' ' || *text == '\t')
+ text++;
+
+ if (*text) {
+ ret = -1;
+ memprintf(err, "unexpected character '%c' at the end of conditional expression '%s'",
+ *text, args[0]);
+ goto fail;
+ }
+
+ ret = cfg_eval_cond_expr(expr, err);
+ goto done;
+ }
+
+ /* ret == 0, no other way to parse this */
+ ret = -1;
+ memprintf(err, "unparsable conditional expression '%s'", args[0]);
+ fail:
+ if (errptr)
+ *errptr = text;
+ done:
+ cfg_free_cond_expr(expr);
+ return ret;
+}
diff --git a/src/cfgdiag.c b/src/cfgdiag.c
new file mode 100644
index 0000000..f8e4a9e
--- /dev/null
+++ b/src/cfgdiag.c
@@ -0,0 +1,97 @@
+#include <stdarg.h>
+#include <stdlib.h>
+
+#include <import/ebistree.h>
+
+#include <haproxy/cfgdiag.h>
+#include <haproxy/log.h>
+#include <haproxy/proxy.h>
+#include <haproxy/server.h>
+
+/* Use this function to emit diagnostic.
+ * This can be used as a shortcut to set value pointed by <ret> to 1 at the
+ * same time.
+ */
+static inline void diag_warning(int *ret, char *fmt, ...)
+{
+ va_list argp;
+
+ va_start(argp, fmt);
+ *ret = 1;
+ _ha_vdiag_warning(fmt, argp);
+ va_end(argp);
+}
+
+/* Use this for dynamic allocation in diagnostics.
+ * In case of allocation failure, this will immediately terminates haproxy.
+ */
+static inline void *diag_alloc(size_t size)
+{
+ void *out = NULL;
+
+ if (!(out = malloc(size))) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+
+ return out;
+}
+
+/* Checks that two servers from the same backend does not share the same cookie
+ * value. Backup servers are not taken into account as it can be quite common to
+ * share cookie values in this case.
+ */
+static void check_server_cookies(int *ret)
+{
+ struct cookie_entry {
+ struct ebpt_node node;
+ };
+
+ struct proxy *px;
+ struct server *srv;
+
+ struct eb_root cookies_tree = EB_ROOT_UNIQUE;
+ struct ebpt_node *cookie_node;
+ struct cookie_entry *cookie_entry;
+ struct ebpt_node *node;
+
+ for (px = proxies_list; px; px = px->next) {
+ for (srv = px->srv; srv; srv = srv->next) {
+ /* do not take into account backup servers */
+ if (!srv->cookie || (srv->flags & SRV_F_BACKUP))
+ continue;
+
+ cookie_node = ebis_lookup(&cookies_tree, srv->cookie);
+ if (cookie_node) {
+ diag_warning(ret, "parsing [%s:%d] : 'server %s' : same cookie value is set for a previous non-backup server in the same backend, it may break connection persistence\n",
+ srv->conf.file, srv->conf.line, srv->id);
+ continue;
+ }
+
+ cookie_entry = diag_alloc(sizeof(*cookie_entry));
+ cookie_entry->node.key = srv->cookie;
+ ebis_insert(&cookies_tree, &cookie_entry->node);
+ }
+
+ /* clear the tree and free its entries */
+ while ((node = ebpt_first(&cookies_tree))) {
+ cookie_entry = ebpt_entry(node, struct cookie_entry, node);
+ eb_delete(&node->node);
+ free(cookie_entry);
+ }
+ }
+}
+
+/* Placeholder to execute various diagnostic checks after the configuration file
+ * has been fully parsed. It will output a warning for each diagnostic found.
+ *
+ * Returns 0 if no diagnostic message has been found else 1.
+ */
+int cfg_run_diagnostics()
+{
+ int ret = 0;
+
+ check_server_cookies(&ret);
+
+ return ret;
+}
diff --git a/src/cfgparse-global.c b/src/cfgparse-global.c
new file mode 100644
index 0000000..85b44df
--- /dev/null
+++ b/src/cfgparse-global.c
@@ -0,0 +1,1304 @@
+#define _GNU_SOURCE /* for cpu_set_t from haproxy/cpuset.h */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netdb.h>
+#include <ctype.h>
+#include <pwd.h>
+#include <grp.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <haproxy/buf.h>
+#include <haproxy/cfgparse.h>
+#ifdef USE_CPU_AFFINITY
+#include <haproxy/cpuset.h>
+#endif
+#include <haproxy/compression.h>
+#include <haproxy/global.h>
+#include <haproxy/log.h>
+#include <haproxy/peers.h>
+#include <haproxy/tools.h>
+
+/* some keywords that are still being parsed using strcmp() and are not
+ * registered anywhere. They are used as suggestions for mistyped words.
+ */
+static const char *common_kw_list[] = {
+ "global", "daemon", "master-worker", "noepoll", "nokqueue",
+ "noevports", "nopoll", "busy-polling", "set-dumpable",
+ "insecure-fork-wanted", "insecure-setuid-wanted", "nosplice",
+ "nogetaddrinfo", "noreuseport", "quiet", "zero-warning",
+ "tune.runqueue-depth", "tune.maxpollevents", "tune.maxaccept",
+ "tune.recv_enough", "tune.buffers.limit",
+ "tune.buffers.reserve", "tune.bufsize", "tune.maxrewrite",
+ "tune.idletimer", "tune.rcvbuf.client", "tune.rcvbuf.server",
+ "tune.sndbuf.client", "tune.sndbuf.server", "tune.pipesize",
+ "tune.http.cookielen", "tune.http.logurilen", "tune.http.maxhdr",
+ "tune.comp.maxlevel", "tune.pattern.cache-size", "uid", "gid",
+ "external-check", "user", "group", "nbproc", "maxconn",
+ "ssl-server-verify", "maxconnrate", "maxsessrate", "maxsslrate",
+ "maxcomprate", "maxpipes", "maxzlibmem", "maxcompcpuusage", "ulimit-n",
+ "chroot", "description", "node", "pidfile", "unix-bind", "log",
+ "log-send-hostname", "server-state-base", "server-state-file",
+ "log-tag", "spread-checks", "max-spread-checks", "cpu-map", "setenv",
+ "presetenv", "unsetenv", "resetenv", "strict-limits", "localpeer",
+ "numa-cpu-mapping", "defaults", "listen", "frontend", "backend",
+ "peers", "resolvers", "cluster-secret",
+ NULL /* must be last */
+};
+
+/*
+ * parse a line in a <global> section. Returns the error code, 0 if OK, or
+ * any combination of :
+ * - ERR_ABORT: must abort ASAP
+ * - ERR_FATAL: we can continue parsing but not start the service
+ * - ERR_WARN: a warning has been emitted
+ * - ERR_ALERT: an alert has been emitted
+ * Only the two first ones can stop processing, the two others are just
+ * indicators.
+ */
+int cfg_parse_global(const char *file, int linenum, char **args, int kwm)
+{
+ int err_code = 0;
+ char *errmsg = NULL;
+
+ if (strcmp(args[0], "global") == 0) { /* new section */
+ /* no option, nothing special to do */
+ alertif_too_many_args(0, file, linenum, args, &err_code);
+ goto out;
+ }
+ else if (strcmp(args[0], "expose-experimental-directives") == 0) {
+ experimental_directives_allowed = 1;
+ }
+ else if (strcmp(args[0], "daemon") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.mode |= MODE_DAEMON;
+ }
+ else if (strcmp(args[0], "master-worker") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*args[1]) {
+ if (strcmp(args[1], "no-exit-on-failure") == 0) {
+ global.tune.options |= GTUNE_NOEXIT_ONFAILURE;
+ } else {
+ ha_alert("parsing [%s:%d] : '%s' only supports 'no-exit-on-failure' option.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ global.mode |= MODE_MWORKER;
+ }
+ else if (strcmp(args[0], "noepoll") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.options &= ~GTUNE_USE_EPOLL;
+ }
+ else if (strcmp(args[0], "nokqueue") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.options &= ~GTUNE_USE_KQUEUE;
+ }
+ else if (strcmp(args[0], "noevports") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.options &= ~GTUNE_USE_EVPORTS;
+ }
+ else if (strcmp(args[0], "nopoll") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.options &= ~GTUNE_USE_POLL;
+ }
+ else if (strcmp(args[0], "busy-polling") == 0) { /* "no busy-polling" or "busy-polling" */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_NO)
+ global.tune.options &= ~GTUNE_BUSY_POLLING;
+ else
+ global.tune.options |= GTUNE_BUSY_POLLING;
+ }
+ else if (strcmp(args[0], "set-dumpable") == 0) { /* "no set-dumpable" or "set-dumpable" */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_NO)
+ global.tune.options &= ~GTUNE_SET_DUMPABLE;
+ else
+ global.tune.options |= GTUNE_SET_DUMPABLE;
+ }
+ else if (strcmp(args[0], "h2-workaround-bogus-websocket-clients") == 0) { /* "no h2-workaround-bogus-websocket-clients" or "h2-workaround-bogus-websocket-clients" */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_NO)
+ global.tune.options &= ~GTUNE_DISABLE_H2_WEBSOCKET;
+ else
+ global.tune.options |= GTUNE_DISABLE_H2_WEBSOCKET;
+ }
+ else if (strcmp(args[0], "insecure-fork-wanted") == 0) { /* "no insecure-fork-wanted" or "insecure-fork-wanted" */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_NO)
+ global.tune.options &= ~GTUNE_INSECURE_FORK;
+ else
+ global.tune.options |= GTUNE_INSECURE_FORK;
+ }
+ else if (strcmp(args[0], "insecure-setuid-wanted") == 0) { /* "no insecure-setuid-wanted" or "insecure-setuid-wanted" */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_NO)
+ global.tune.options &= ~GTUNE_INSECURE_SETUID;
+ else
+ global.tune.options |= GTUNE_INSECURE_SETUID;
+ }
+ else if (strcmp(args[0], "nosplice") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.options &= ~GTUNE_USE_SPLICE;
+ }
+ else if (strcmp(args[0], "nogetaddrinfo") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.options &= ~GTUNE_USE_GAI;
+ }
+ else if (strcmp(args[0], "noreuseport") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.options &= ~GTUNE_USE_REUSEPORT;
+ }
+ else if (strcmp(args[0], "quiet") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.mode |= MODE_QUIET;
+ }
+ else if (strcmp(args[0], "zero-warning") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.mode |= MODE_ZERO_WARNING;
+ }
+ else if (strcmp(args[0], "tune.runqueue-depth") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.tune.runqueue_depth != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.runqueue_depth = atol(args[1]);
+
+ }
+ else if (strcmp(args[0], "tune.maxpollevents") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.tune.maxpollevents != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.maxpollevents = atol(args[1]);
+ }
+ else if (strcmp(args[0], "tune.maxaccept") == 0) {
+ long max;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.tune.maxaccept != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ max = atol(args[1]);
+ if (/*max < -1 || */max > INT_MAX) {
+ ha_alert("parsing [%s:%d] : '%s' expects -1 or an integer from 0 to INT_MAX.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.maxaccept = max;
+ }
+ else if (strcmp(args[0], "tune.chksize") == 0) {
+ ha_alert("parsing [%s:%d]: option '%s' is not supported any more (tune.bufsize is used instead).\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "tune.recv_enough") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.recv_enough = atol(args[1]);
+ }
+ else if (strcmp(args[0], "tune.buffers.limit") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.buf_limit = atol(args[1]);
+ if (global.tune.buf_limit) {
+ if (global.tune.buf_limit < 3)
+ global.tune.buf_limit = 3;
+ if (global.tune.buf_limit <= global.tune.reserved_bufs)
+ global.tune.buf_limit = global.tune.reserved_bufs + 1;
+ }
+ }
+ else if (strcmp(args[0], "tune.buffers.reserve") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.reserved_bufs = atol(args[1]);
+ if (global.tune.reserved_bufs < 2)
+ global.tune.reserved_bufs = 2;
+ if (global.tune.buf_limit && global.tune.buf_limit <= global.tune.reserved_bufs)
+ global.tune.buf_limit = global.tune.reserved_bufs + 1;
+ }
+ else if (strcmp(args[0], "tune.bufsize") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.bufsize = atol(args[1]);
+ /* round it up to support a two-pointer alignment at the end */
+ global.tune.bufsize = (global.tune.bufsize + 2 * sizeof(void *) - 1) & -(2 * sizeof(void *));
+ if (global.tune.bufsize <= 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a positive integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "tune.maxrewrite") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.maxrewrite = atol(args[1]);
+ if (global.tune.maxrewrite < 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a positive integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "tune.idletimer") == 0) {
+ unsigned int idle;
+ const char *res;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a timer value between 0 and 65535 ms.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ res = parse_time_err(args[1], &idle, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s>, maximum value is 65535 ms.\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to <%s>.\n",
+ file, linenum, *res, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (idle > 65535) {
+ ha_alert("parsing [%s:%d] : '%s' expects a timer value between 0 and 65535 ms.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.idle_timer = idle;
+ }
+ else if (strcmp(args[0], "tune.rcvbuf.client") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.tune.client_rcvbuf != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.client_rcvbuf = atol(args[1]);
+ }
+ else if (strcmp(args[0], "tune.rcvbuf.server") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.tune.server_rcvbuf != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.server_rcvbuf = atol(args[1]);
+ }
+ else if (strcmp(args[0], "tune.sndbuf.client") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.tune.client_sndbuf != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.client_sndbuf = atol(args[1]);
+ }
+ else if (strcmp(args[0], "tune.sndbuf.server") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.tune.server_sndbuf != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.server_sndbuf = atol(args[1]);
+ }
+ else if (strcmp(args[0], "tune.pipesize") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.pipesize = atol(args[1]);
+ }
+ else if (strcmp(args[0], "tune.http.cookielen") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.cookie_len = atol(args[1]) + 1;
+ }
+ else if (strcmp(args[0], "tune.http.logurilen") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.requri_len = atol(args[1]) + 1;
+ }
+ else if (strcmp(args[0], "tune.http.maxhdr") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.max_http_hdr = atoi(args[1]);
+ if (global.tune.max_http_hdr < 1 || global.tune.max_http_hdr > 32767) {
+ ha_alert("parsing [%s:%d] : '%s' expects a numeric value between 1 and 32767\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "tune.comp.maxlevel") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*args[1]) {
+ global.tune.comp_maxlevel = atoi(args[1]);
+ if (global.tune.comp_maxlevel < 1 || global.tune.comp_maxlevel > 9) {
+ ha_alert("parsing [%s:%d] : '%s' expects a numeric value between 1 and 9\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ } else {
+ ha_alert("parsing [%s:%d] : '%s' expects a numeric value between 1 and 9\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "tune.pattern.cache-size") == 0) {
+ if (*args[1]) {
+ global.tune.pattern_cache = atoi(args[1]);
+ if (global.tune.pattern_cache < 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a positive numeric value\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ } else {
+ ha_alert("parsing [%s:%d] : '%s' expects a positive numeric value\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "cluster-secret") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*args[1] == 0) {
+ ha_alert("parsing [%s:%d] : expects an ASCII string argument.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (global.cluster_secret != NULL) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ ha_free(&global.cluster_secret);
+ global.cluster_secret = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "uid") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.uid != 0) {
+ ha_alert("parsing [%s:%d] : user/uid already specified. Continuing.\n", file, linenum);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (strl2irc(args[1], strlen(args[1]), &global.uid) != 0) {
+ ha_warning("parsing [%s:%d] : uid: string '%s' is not a number.\n | You might want to use the 'user' parameter to use a system user name.\n", file, linenum, args[1]);
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ }
+ else if (strcmp(args[0], "gid") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.gid != 0) {
+ ha_alert("parsing [%s:%d] : group/gid already specified. Continuing.\n", file, linenum);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (strl2irc(args[1], strlen(args[1]), &global.gid) != 0) {
+ ha_warning("parsing [%s:%d] : gid: string '%s' is not a number.\n | You might want to use the 'group' parameter to use a system group name.\n", file, linenum, args[1]);
+ err_code |= ERR_WARN;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "external-check") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.external_check = 1;
+ }
+ /* user/group name handling */
+ else if (strcmp(args[0], "user") == 0) {
+ struct passwd *ha_user;
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.uid != 0) {
+ ha_alert("parsing [%s:%d] : user/uid already specified. Continuing.\n", file, linenum);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ errno = 0;
+ ha_user = getpwnam(args[1]);
+ if (ha_user != NULL) {
+ global.uid = (int)ha_user->pw_uid;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : cannot find user id for '%s' (%d:%s)\n", file, linenum, args[1], errno, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else if (strcmp(args[0], "group") == 0) {
+ struct group *ha_group;
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.gid != 0) {
+ ha_alert("parsing [%s:%d] : gid/group was already specified. Continuing.\n", file, linenum);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ errno = 0;
+ ha_group = getgrnam(args[1]);
+ if (ha_group != NULL) {
+ global.gid = (int)ha_group->gr_gid;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : cannot find group id for '%s' (%d:%s)\n", file, linenum, args[1], errno, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ /* end of user/group name handling*/
+ else if (strcmp(args[0], "nbproc") == 0) {
+ ha_alert("parsing [%s:%d] : nbproc is not supported any more since HAProxy 2.5. Threads will automatically be used on multi-processor machines if available.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "maxconn") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.maxconn != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.maxconn = atol(args[1]);
+#ifdef SYSTEM_MAXCONN
+ if (global.maxconn > SYSTEM_MAXCONN && cfg_maxconn <= SYSTEM_MAXCONN) {
+ ha_alert("parsing [%s:%d] : maxconn value %d too high for this system.\nLimiting to %d. Please use '-n' to force the value.\n", file, linenum, global.maxconn, SYSTEM_MAXCONN);
+ global.maxconn = SYSTEM_MAXCONN;
+ err_code |= ERR_ALERT;
+ }
+#endif /* SYSTEM_MAXCONN */
+ }
+ else if (strcmp(args[0], "ssl-server-verify") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (strcmp(args[1],"none") == 0)
+ global.ssl_server_verify = SSL_SERVER_VERIFY_NONE;
+ else if (strcmp(args[1],"required") == 0)
+ global.ssl_server_verify = SSL_SERVER_VERIFY_REQUIRED;
+ else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'none' or 'required' as argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "maxconnrate") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.cps_lim != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.cps_lim = atol(args[1]);
+ }
+ else if (strcmp(args[0], "maxsessrate") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.sps_lim != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.sps_lim = atol(args[1]);
+ }
+ else if (strcmp(args[0], "maxsslrate") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.ssl_lim != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.ssl_lim = atol(args[1]);
+ }
+ else if (strcmp(args[0], "maxcomprate") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument in kb/s.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.comp_rate_lim = atoi(args[1]) * 1024;
+ }
+ else if (strcmp(args[0], "maxpipes") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.maxpipes != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.maxpipes = atol(args[1]);
+ }
+ else if (strcmp(args[0], "maxzlibmem") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.maxzlibmem = atol(args[1]) * 1024L * 1024L;
+ }
+ else if (strcmp(args[0], "maxcompcpuusage") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument between 0 and 100.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ compress_min_idle = 100 - atoi(args[1]);
+ if (compress_min_idle > 100) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument between 0 and 100.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "fd-hard-limit") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.fd_hard_limit != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.fd_hard_limit = atol(args[1]);
+ }
+ else if (strcmp(args[0], "ulimit-n") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.rlimit_nofile != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.rlimit_nofile = atol(args[1]);
+ }
+ else if (strcmp(args[0], "chroot") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.chroot != NULL) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a directory as an argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.chroot = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "description") == 0) {
+ int i, len=0;
+ char *d;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects a string argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ for (i = 1; *args[i]; i++)
+ len += strlen(args[i]) + 1;
+
+ if (global.desc)
+ free(global.desc);
+
+ global.desc = d = calloc(1, len);
+
+ d += snprintf(d, global.desc + len - d, "%s", args[1]);
+ for (i = 2; *args[i]; i++)
+ d += snprintf(d, global.desc + len - d, " %s", args[i]);
+ }
+ else if (strcmp(args[0], "node") == 0) {
+ int i;
+ char c;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ for (i=0; args[1][i]; i++) {
+ c = args[1][i];
+ if (!isupper((unsigned char)c) && !islower((unsigned char)c) &&
+ !isdigit((unsigned char)c) && c != '_' && c != '-' && c != '.')
+ break;
+ }
+
+ if (!i || args[1][i]) {
+ ha_alert("parsing [%s:%d]: '%s' requires valid node name - non-empty string"
+ " with digits(0-9), letters(A-Z, a-z), dot(.), hyphen(-) or underscode(_).\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (global.node)
+ free(global.node);
+
+ global.node = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "pidfile") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.pidfile != NULL) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a file name as an argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.pidfile = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "unix-bind") == 0) {
+ int cur_arg = 1;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "prefix") == 0) {
+ if (global.unix_bind.prefix != NULL) {
+ ha_alert("parsing [%s:%d] : unix-bind '%s' already specified. Continuing.\n", file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT;
+ cur_arg += 2;
+ continue;
+ }
+
+ if (*(args[cur_arg+1]) == 0) {
+ ha_alert("parsing [%s:%d] : unix_bind '%s' expects a path as an argument.\n", file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.unix_bind.prefix = strdup(args[cur_arg+1]);
+ cur_arg += 2;
+ continue;
+ }
+
+ if (strcmp(args[cur_arg], "mode") == 0) {
+
+ global.unix_bind.ux.mode = strtol(args[cur_arg + 1], NULL, 8);
+ cur_arg += 2;
+ continue;
+ }
+
+ if (strcmp(args[cur_arg], "uid") == 0) {
+
+ global.unix_bind.ux.uid = atol(args[cur_arg + 1 ]);
+ cur_arg += 2;
+ continue;
+ }
+
+ if (strcmp(args[cur_arg], "gid") == 0) {
+
+ global.unix_bind.ux.gid = atol(args[cur_arg + 1 ]);
+ cur_arg += 2;
+ continue;
+ }
+
+ if (strcmp(args[cur_arg], "user") == 0) {
+ struct passwd *user;
+
+ user = getpwnam(args[cur_arg + 1]);
+ if (!user) {
+ ha_alert("parsing [%s:%d] : '%s' : '%s' unknown user.\n",
+ file, linenum, args[0], args[cur_arg + 1 ]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ global.unix_bind.ux.uid = user->pw_uid;
+ cur_arg += 2;
+ continue;
+ }
+
+ if (strcmp(args[cur_arg], "group") == 0) {
+ struct group *group;
+
+ group = getgrnam(args[cur_arg + 1]);
+ if (!group) {
+ ha_alert("parsing [%s:%d] : '%s' : '%s' unknown group.\n",
+ file, linenum, args[0], args[cur_arg + 1 ]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ global.unix_bind.ux.gid = group->gr_gid;
+ cur_arg += 2;
+ continue;
+ }
+
+ ha_alert("parsing [%s:%d] : '%s' only supports the 'prefix', 'mode', 'uid', 'gid', 'user' and 'group' options.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "log") == 0) { /* "no log" or "log ..." */
+ if (!parse_logsrv(args, &global.logsrvs, (kwm == KWM_NO), file, linenum, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "log-send-hostname") == 0) { /* set the hostname in syslog header */
+ char *name;
+
+ if (global.log_send_hostname != NULL) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ if (*(args[1]))
+ name = args[1];
+ else
+ name = hostname;
+
+ free(global.log_send_hostname);
+ global.log_send_hostname = strdup(name);
+ }
+ else if (strcmp(args[0], "server-state-base") == 0) { /* path base where HAProxy can find server state files */
+ if (global.server_state_base != NULL) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects one argument: a directory path.\n", file, linenum, args[0]);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+
+ global.server_state_base = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "server-state-file") == 0) { /* path to the file where HAProxy can load the server states */
+ if (global.server_state_file != NULL) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expect one argument: a file path.\n", file, linenum, args[0]);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+
+ global.server_state_file = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "log-tag") == 0) { /* tag to report to syslog */
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a tag for use in syslog.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ chunk_destroy(&global.log_tag);
+ chunk_initlen(&global.log_tag, strdup(args[1]), strlen(args[1]), strlen(args[1]));
+ if (b_orig(&global.log_tag) == NULL) {
+ chunk_destroy(&global.log_tag);
+ ha_alert("parsing [%s:%d]: cannot allocate memory for '%s'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "spread-checks") == 0) { /* random time between checks (0-50) */
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.spread_checks != 0) {
+ ha_alert("parsing [%s:%d]: spread-checks already specified. Continuing.\n", file, linenum);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d]: '%s' expects an integer argument (0..50).\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.spread_checks = atol(args[1]);
+ if (global.spread_checks < 0 || global.spread_checks > 50) {
+ ha_alert("parsing [%s:%d]: 'spread-checks' needs a positive value in range 0..50.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else if (strcmp(args[0], "max-spread-checks") == 0) { /* maximum time between first and last check */
+ const char *err;
+ unsigned int val;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d]: '%s' expects an integer argument (0..50).\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = parse_time_err(args[1], &val, TIME_UNIT_MS);
+ if (err == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s>, maximum value is 2147483647 ms (~24.8 days).\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ else if (err == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ else if (err) {
+ ha_alert("parsing [%s:%d]: unsupported character '%c' in '%s' (wants an integer delay).\n", file, linenum, *err, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ global.max_spread_checks = val;
+ }
+ else if (strcmp(args[0], "cpu-map") == 0) {
+ /* map a process list to a CPU set */
+#ifdef USE_CPU_AFFINITY
+ char *slash;
+ unsigned long proc = 0, thread = 0;
+ int j, n, autoinc;
+ struct hap_cpuset cpus, cpus_copy;
+
+ if (!*args[1] || !*args[2]) {
+ ha_alert("parsing [%s:%d] : %s expects a process number "
+ " ('all', 'odd', 'even', a number from 1 to %d or a range), "
+ " followed by a list of CPU ranges with numbers from 0 to %d.\n",
+ file, linenum, args[0], LONGBITS, LONGBITS - 1);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((slash = strchr(args[1], '/')) != NULL)
+ *slash = 0;
+
+ /* note: we silently ignore processes over MAX_PROCS and
+ * threads over MAX_THREADS so as not to make configurations a
+ * pain to maintain.
+ */
+ if (parse_process_number(args[1], &proc, LONGBITS, &autoinc, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (slash) {
+ if (parse_process_number(slash+1, &thread, LONGBITS, NULL, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ *slash = '/';
+ }
+
+ if (parse_cpu_set((const char **)args+2, &cpus, 0, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (autoinc &&
+ my_popcountl(proc) != ha_cpuset_count(&cpus) &&
+ my_popcountl(thread) != ha_cpuset_count(&cpus)) {
+ ha_alert("parsing [%s:%d] : %s : PROC/THREAD range and CPU sets "
+ "must have the same size to be automatically bound\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* we now have to deal with 3 real cases :
+ * cpu-map P-Q => mapping for whole processes, numbers P to Q
+ * cpu-map P-Q/1 => mapping of first thread of processes P to Q
+ * cpu-map 1/T-U => mapping of threads T to U of process 1
+ * (note: P=Q=1 since 2.5).
+ * Otherwise other combinations are silently ignored since nbthread
+ * and nbproc cannot both be >1 :
+ * cpu-map P-Q/T => mapping for thread T for processes P to Q.
+ * Only one of T,Q may be > 1, others ignored.
+ * cpu-map P/T-U => mapping for threads T to U of process P. Only
+ * one of P,U may be > 1, others ignored.
+ */
+ if (!thread || thread == 0x1) {
+ /* mapping for whole process. E.g. cpu-map 1 0-3 or cpu-map 1/1 0-3 */
+ ha_cpuset_assign(&cpus_copy, &cpus);
+
+ if (!autoinc)
+ ha_cpuset_assign(&cpu_map.proc, &cpus);
+ else {
+ ha_cpuset_zero(&cpu_map.proc);
+ n = ha_cpuset_ffs(&cpus_copy) - 1;
+ ha_cpuset_clr(&cpus_copy, n);
+ ha_cpuset_set(&cpu_map.proc, n);
+ }
+ } else {
+ /* first process, iterate on threads. E.g. cpu-map 1/1-4 0-3 */
+ ha_cpuset_assign(&cpus_copy, &cpus);
+ for (j = n = 0; j < MAX_THREADS; j++) {
+ /* No mapping for this thread */
+ if (!(thread & (1UL << j)))
+ continue;
+
+ if (!autoinc)
+ ha_cpuset_assign(&cpu_map.thread[j], &cpus);
+ else {
+ ha_cpuset_zero(&cpu_map.thread[j]);
+ n = ha_cpuset_ffs(&cpus_copy) - 1;
+ ha_cpuset_clr(&cpus_copy, n);
+ ha_cpuset_set(&cpu_map.thread[j], n);
+ }
+ }
+
+ HA_DIAG_WARNING_COND(proc != 0x1 && thread != 0x1,
+ "parsing [%s:%d] : cpu-map statement is considered invalid and thus ignored as it addresses multiple processes and threads at the same time. At least one of them should be 1 and only 1.", file, linenum);
+ }
+#else
+ ha_alert("parsing [%s:%d] : '%s' is not enabled, please check build options for USE_CPU_AFFINITY.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+#endif /* ! USE_CPU_AFFINITY */
+ }
+ else if (strcmp(args[0], "setenv") == 0 || strcmp(args[0], "presetenv") == 0) {
+ if (alertif_too_many_args(3, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[2]) == 0) {
+ ha_alert("parsing [%s:%d]: '%s' expects a name and a value.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* "setenv" overwrites, "presetenv" only sets if not yet set */
+ if (setenv(args[1], args[2], (args[0][0] == 's')) != 0) {
+ ha_alert("parsing [%s:%d]: '%s' failed on variable '%s' : %s.\n", file, linenum, args[0], args[1], strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "unsetenv") == 0) {
+ int arg;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d]: '%s' expects at least one variable name.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ for (arg = 1; *args[arg]; arg++) {
+ if (unsetenv(args[arg]) != 0) {
+ ha_alert("parsing [%s:%d]: '%s' failed on variable '%s' : %s.\n", file, linenum, args[0], args[arg], strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ }
+ else if (strcmp(args[0], "resetenv") == 0) {
+ extern char **environ;
+ char **env = environ;
+
+ /* args contain variable names to keep, one per argument */
+ while (*env) {
+ int arg;
+
+ /* look for current variable in among all those we want to keep */
+ for (arg = 1; *args[arg]; arg++) {
+ if (strncmp(*env, args[arg], strlen(args[arg])) == 0 &&
+ (*env)[strlen(args[arg])] == '=')
+ break;
+ }
+
+ /* delete this variable */
+ if (!*args[arg]) {
+ char *delim = strchr(*env, '=');
+
+ if (!delim || delim - *env >= trash.size) {
+ ha_alert("parsing [%s:%d]: '%s' failed to unset invalid variable '%s'.\n", file, linenum, args[0], *env);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ memcpy(trash.area, *env, delim - *env);
+ trash.area[delim - *env] = 0;
+
+ if (unsetenv(trash.area) != 0) {
+ ha_alert("parsing [%s:%d]: '%s' failed to unset variable '%s' : %s.\n", file, linenum, args[0], *env, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else
+ env++;
+ }
+ }
+ else if (strcmp(args[0], "strict-limits") == 0) { /* "no strict-limits" or "strict-limits" */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_NO)
+ global.tune.options &= ~GTUNE_STRICT_LIMITS;
+ }
+ else if (strcmp(args[0], "localpeer") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a name as an argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (global.localpeer_cmdline != 0) {
+ ha_warning("parsing [%s:%d] : '%s' ignored since it is already set by using the '-L' "
+ "command line argument.\n", file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ if (cfg_peers) {
+ ha_warning("parsing [%s:%d] : '%s' ignored since it is used after 'peers' section.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ free(localpeer);
+ if ((localpeer = strdup(args[1])) == NULL) {
+ ha_alert("parsing [%s:%d]: cannot allocate memory for '%s'.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ setenv("HAPROXY_LOCALPEER", localpeer, 1);
+ }
+ else if (strcmp(args[0], "numa-cpu-mapping") == 0) {
+ global.numa_cpu_mapping = (kwm == KWM_NO) ? 0 : 1;
+ }
+ else {
+ struct cfg_kw_list *kwl;
+ const char *best;
+ int index;
+ int rc;
+
+ list_for_each_entry(kwl, &cfg_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (kwl->kw[index].section != CFG_GLOBAL)
+ continue;
+ if (strcmp(kwl->kw[index].kw, args[0]) == 0) {
+ if (check_kw_experimental(&kwl->kw[index], file, linenum, &errmsg)) {
+ ha_alert("%s\n", errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ rc = kwl->kw[index].parse(args, CFG_GLOBAL, NULL, NULL, file, linenum, &errmsg);
+ if (rc < 0) {
+ ha_alert("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ else if (rc > 0) {
+ ha_warning("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ err_code |= ERR_WARN;
+ goto out;
+ }
+ goto out;
+ }
+ }
+ }
+
+ best = cfg_find_best_match(args[0], &cfg_keywords.list, CFG_GLOBAL, common_kw_list);
+ if (best)
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section; did you mean '%s' maybe ?\n", file, linenum, args[0], cursection, best);
+ else
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], "global");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ out:
+ free(errmsg);
+ return err_code;
+}
+
diff --git a/src/cfgparse-listen.c b/src/cfgparse-listen.c
new file mode 100644
index 0000000..de9c105
--- /dev/null
+++ b/src/cfgparse-listen.c
@@ -0,0 +1,3207 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netdb.h>
+#include <ctype.h>
+#include <pwd.h>
+#include <grp.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/buf.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/compression-t.h>
+#include <haproxy/connection.h>
+#include <haproxy/extcheck.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/peers.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/server.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/tools.h>
+#include <haproxy/uri_auth.h>
+
+/* some keywords that are still being parsed using strcmp() and are not
+ * registered anywhere. They are used as suggestions for mistyped words.
+ */
+static const char *common_kw_list[] = {
+ "listen", "frontend", "backend", "defaults", "server",
+ "default-server", "server-template", "bind", "monitor-net",
+ "monitor-uri", "mode", "id", "description", "disabled", "enabled",
+ "bind-process", "acl", "dynamic-cookie-key", "cookie", "email-alert",
+ "persist", "appsession", "load-server-state-from-file",
+ "server-state-file-name", "max-session-srv-conns", "capture",
+ "retries", "http-request", "http-response", "http-after-response",
+ "http-send-name-header", "block", "redirect", "use_backend",
+ "use-server", "force-persist", "ignore-persist", "force-persist",
+ "stick-table", "stick", "stats", "option", "default_backend",
+ "http-reuse", "monitor", "transparent", "maxconn", "backlog",
+ "fullconn", "dispatch", "balance", "hash-type",
+ "hash-balance-factor", "unique-id-format", "unique-id-header",
+ "log-format", "log-format-sd", "log-tag", "log", "source", "usesrc",
+ "error-log-format",
+ NULL /* must be last */
+};
+
+static const char *common_options[] = {
+ "httpclose", "http-server-close", "http-keep-alive",
+ "redispatch", "httplog", "tcplog", "tcpka", "httpchk",
+ "ssl-hello-chk", "smtpchk", "pgsql-check", "redis-check",
+ "mysql-check", "ldap-check", "spop-check", "tcp-check",
+ "external-check", "forwardfor", "original-to",
+ NULL /* must be last */
+};
+
+/* Report a warning if a rule is placed after a 'tcp-request session' rule.
+ * Return 1 if the warning has been emitted, otherwise 0.
+ */
+int warnif_rule_after_tcp_sess(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ if (!LIST_ISEMPTY(&proxy->tcp_req.l5_rules)) {
+ ha_warning("parsing [%s:%d] : a '%s' rule placed after a 'tcp-request session' rule will still be processed before.\n",
+ file, line, arg);
+ return 1;
+ }
+ return 0;
+}
+
+/* Report a warning if a rule is placed after a 'tcp-request content' rule.
+ * Return 1 if the warning has been emitted, otherwise 0.
+ */
+int warnif_rule_after_tcp_cont(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ if (!LIST_ISEMPTY(&proxy->tcp_req.inspect_rules)) {
+ ha_warning("parsing [%s:%d] : a '%s' rule placed after a 'tcp-request content' rule will still be processed before.\n",
+ file, line, arg);
+ return 1;
+ }
+ return 0;
+}
+
+/* Report a warning if a rule is placed after a 'monitor fail' rule.
+ * Return 1 if the warning has been emitted, otherwise 0.
+ */
+int warnif_rule_after_monitor(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ if (!LIST_ISEMPTY(&proxy->mon_fail_cond)) {
+ ha_warning("parsing [%s:%d] : a '%s' rule placed after a 'monitor fail' rule will still be processed before.\n",
+ file, line, arg);
+ return 1;
+ }
+ return 0;
+}
+
+/* Report a warning if a rule is placed after an 'http_request' rule.
+ * Return 1 if the warning has been emitted, otherwise 0.
+ */
+int warnif_rule_after_http_req(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ if (!LIST_ISEMPTY(&proxy->http_req_rules)) {
+ ha_warning("parsing [%s:%d] : a '%s' rule placed after an 'http-request' rule will still be processed before.\n",
+ file, line, arg);
+ return 1;
+ }
+ return 0;
+}
+
+/* Report a warning if a rule is placed after a redirect rule.
+ * Return 1 if the warning has been emitted, otherwise 0.
+ */
+int warnif_rule_after_redirect(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ if (!LIST_ISEMPTY(&proxy->redirect_rules)) {
+ ha_warning("parsing [%s:%d] : a '%s' rule placed after a 'redirect' rule will still be processed before.\n",
+ file, line, arg);
+ return 1;
+ }
+ return 0;
+}
+
+/* Report a warning if a rule is placed after a 'use_backend' rule.
+ * Return 1 if the warning has been emitted, otherwise 0.
+ */
+int warnif_rule_after_use_backend(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ if (!LIST_ISEMPTY(&proxy->switching_rules)) {
+ ha_warning("parsing [%s:%d] : a '%s' rule placed after a 'use_backend' rule will still be processed before.\n",
+ file, line, arg);
+ return 1;
+ }
+ return 0;
+}
+
+/* Report a warning if a rule is placed after a 'use-server' rule.
+ * Return 1 if the warning has been emitted, otherwise 0.
+ */
+int warnif_rule_after_use_server(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ if (!LIST_ISEMPTY(&proxy->server_rules)) {
+ ha_warning("parsing [%s:%d] : a '%s' rule placed after a 'use-server' rule will still be processed before.\n",
+ file, line, arg);
+ return 1;
+ }
+ return 0;
+}
+
+/* report a warning if a redirect rule is dangerously placed */
+int warnif_misplaced_redirect(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ return warnif_rule_after_use_backend(proxy, file, line, arg) ||
+ warnif_rule_after_use_server(proxy, file, line, arg);
+}
+
+/* report a warning if an http-request rule is dangerously placed */
+int warnif_misplaced_http_req(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ return warnif_rule_after_redirect(proxy, file, line, arg) ||
+ warnif_misplaced_redirect(proxy, file, line, arg);
+}
+
+/* report a warning if a block rule is dangerously placed */
+int warnif_misplaced_monitor(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ return warnif_rule_after_http_req(proxy, file, line, arg) ||
+ warnif_misplaced_http_req(proxy, file, line, arg);
+}
+
+/* report a warning if a "tcp request content" rule is dangerously placed */
+int warnif_misplaced_tcp_cont(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ return warnif_rule_after_monitor(proxy, file, line, arg) ||
+ warnif_misplaced_monitor(proxy, file, line, arg);
+}
+
+/* report a warning if a "tcp request session" rule is dangerously placed */
+int warnif_misplaced_tcp_sess(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ return warnif_rule_after_tcp_cont(proxy, file, line, arg) ||
+ warnif_misplaced_tcp_cont(proxy, file, line, arg);
+}
+
+/* report a warning if a "tcp request connection" rule is dangerously placed */
+int warnif_misplaced_tcp_conn(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ return warnif_rule_after_tcp_sess(proxy, file, line, arg) ||
+ warnif_misplaced_tcp_sess(proxy, file, line, arg);
+}
+
+int cfg_parse_listen(const char *file, int linenum, char **args, int kwm)
+{
+ static struct proxy *curproxy = NULL;
+ static struct proxy *curr_defproxy = NULL;
+ static struct proxy *last_defproxy = NULL;
+ const char *err;
+ int rc;
+ int err_code = 0;
+ struct acl_cond *cond = NULL;
+ char *errmsg = NULL;
+ struct bind_conf *bind_conf;
+
+ if (!last_defproxy) {
+ /* we need a default proxy and none was created yet */
+ last_defproxy = alloc_new_proxy("", PR_CAP_DEF|PR_CAP_LISTEN, &errmsg);
+ proxy_preset_defaults(last_defproxy);
+
+ curr_defproxy = last_defproxy;
+ if (!last_defproxy) {
+ ha_alert("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ }
+
+ if (strcmp(args[0], "listen") == 0)
+ rc = PR_CAP_LISTEN | PR_CAP_LB;
+ else if (strcmp(args[0], "frontend") == 0)
+ rc = PR_CAP_FE | PR_CAP_LB;
+ else if (strcmp(args[0], "backend") == 0)
+ rc = PR_CAP_BE | PR_CAP_LB;
+ else if (strcmp(args[0], "defaults") == 0) {
+ /* "defaults" must first delete the last no-name defaults if any */
+ curr_defproxy = NULL;
+ rc = PR_CAP_DEF | PR_CAP_LISTEN;
+ }
+ else
+ rc = PR_CAP_NONE;
+
+ if ((rc & PR_CAP_LISTEN) && !(rc & PR_CAP_DEF)) { /* new proxy */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an <id> argument\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ curproxy = (rc & PR_CAP_FE) ? proxy_fe_by_name(args[1]) : proxy_be_by_name(args[1]);
+ if (curproxy) {
+ ha_alert("Parsing [%s:%d]: %s '%s' has the same name as %s '%s' declared at %s:%d.\n",
+ file, linenum, proxy_cap_str(rc), args[1], proxy_type_str(curproxy),
+ curproxy->id, curproxy->conf.file, curproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ curproxy = log_forward_by_name(args[1]);
+ if (curproxy) {
+ ha_alert("Parsing [%s:%d]: %s '%s' has the same name as log forward section '%s' declared at %s:%d.\n",
+ file, linenum, proxy_cap_str(rc), args[1],
+ curproxy->id, curproxy->conf.file, curproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[2] && (!*args[3] || strcmp(args[2], "from") != 0)) ||
+ alertif_too_many_args(3, file, linenum, args, &err_code)) {
+ if (rc & PR_CAP_FE)
+ ha_alert("parsing [%s:%d] : please use the 'bind' keyword for listening addresses.\n", file, linenum);
+ goto out;
+ }
+ }
+
+ if (rc & PR_CAP_LISTEN) { /* new proxy or defaults section */
+ const char *name = args[1];
+ int arg = 2;
+
+ if (rc & PR_CAP_DEF && strcmp(args[1], "from") == 0 && *args[2] && !*args[3]) {
+ // also support "defaults from blah" (no name then)
+ arg = 1;
+ name = "";
+ }
+
+ /* only regular proxies inherit from the previous defaults section */
+ if (!(rc & PR_CAP_DEF))
+ curr_defproxy = last_defproxy;
+
+ if (strcmp(args[arg], "from") == 0) {
+ struct ebpt_node *next_by_name;
+
+ curr_defproxy = proxy_find_by_name(args[arg+1], PR_CAP_DEF, 0);
+
+ if (!curr_defproxy) {
+ ha_alert("parsing [%s:%d] : defaults section '%s' not found for %s '%s'.\n", file, linenum, args[arg+1], proxy_cap_str(rc), name);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if ((next_by_name = ebpt_next_dup(&curr_defproxy->conf.by_name))) {
+ struct proxy *px2 = container_of(next_by_name, struct proxy, conf.by_name);
+
+ ha_alert("parsing [%s:%d] : ambiguous defaults section name '%s' referenced by %s '%s' exists at least at %s:%d and %s:%d.\n",
+ file, linenum, args[arg+1], proxy_cap_str(rc), name,
+ curr_defproxy->conf.file, curr_defproxy->conf.line, px2->conf.file, px2->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ err = invalid_char(args[arg+1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in defaults section name '%s' when designated by its name (section found at %s:%d).\n",
+ file, linenum, *err, args[arg+1], curr_defproxy->conf.file, curr_defproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ curr_defproxy->flags |= PR_FL_EXPLICIT_REF;
+ }
+ else if (curr_defproxy)
+ curr_defproxy->flags |= PR_FL_IMPLICIT_REF;
+
+ if (curr_defproxy && (curr_defproxy->flags & (PR_FL_EXPLICIT_REF|PR_FL_IMPLICIT_REF)) == (PR_FL_EXPLICIT_REF|PR_FL_IMPLICIT_REF)) {
+ ha_alert("parsing [%s:%d] : defaults section '%s' (declared at %s:%d) is explicitly referenced by another proxy and implicitly used here."
+ " To avoid any ambiguity don't mix both usage. Add a last defaults section not explicitly used or always use explicit references.\n",
+ file, linenum, curr_defproxy->id, curr_defproxy->conf.file, curr_defproxy->conf.line);
+ err_code |= ERR_WARN;
+ }
+
+ curproxy = parse_new_proxy(name, rc, file, linenum, curr_defproxy);
+ if (!curproxy) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (curr_defproxy && (!LIST_ISEMPTY(&curr_defproxy->http_req_rules) ||
+ !LIST_ISEMPTY(&curr_defproxy->http_res_rules) ||
+ !LIST_ISEMPTY(&curr_defproxy->http_after_res_rules) ||
+ !LIST_ISEMPTY(&curr_defproxy->tcp_req.l4_rules) ||
+ !LIST_ISEMPTY(&curr_defproxy->tcp_req.l5_rules) ||
+ !LIST_ISEMPTY(&curr_defproxy->tcp_req.inspect_rules) ||
+ !LIST_ISEMPTY(&curr_defproxy->tcp_rep.inspect_rules))) {
+ /* If the current default proxy defines TCP/HTTP rules, the
+ * current proxy will keep a reference on it. But some sanity
+ * checks are performed first:
+ *
+ * - It cannot be used to init a defaults section
+ * - It cannot be used to init a listen section
+ * - It cannot be used to init backend and frontend sections at
+ * same time. It can be used to init several sections of the
+ * same type only.
+ * - It cannot define L4/L5 TCP rules if it is used to init
+ * backend sections.
+ * - It cannot define 'tcp-response content' rules if it
+ * is used to init frontend sections.
+ *
+ * If no error is found, refcount of the default proxy is incremented.
+ */
+
+ /* Note: Add tcpcheck_rules too if unresolve args become allowed in defaults section */
+ if (rc & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d]: a defaults section cannot inherit from a defaults section defining TCP/HTTP rules (defaults section at %s:%d).\n",
+ file, linenum, curr_defproxy->conf.file, curr_defproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ else if ((rc & PR_CAP_LISTEN) == PR_CAP_LISTEN) {
+ ha_alert("parsing [%s:%d]: a listen section cannot inherit from a defaults section defining TCP/HTTP rules.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ else {
+ char defcap = (curr_defproxy->cap & PR_CAP_LISTEN);
+
+ if ((defcap == PR_CAP_BE || defcap == PR_CAP_FE) && (rc & PR_CAP_LISTEN) != defcap) {
+ ha_alert("parsing [%s:%d]: frontends and backends cannot inherit from the same defaults section"
+ " if it defines TCP/HTTP rules (defaults section at %s:%d).\n",
+ file, linenum, curr_defproxy->conf.file, curr_defproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ else if (!(rc & PR_CAP_FE) && (!LIST_ISEMPTY(&curr_defproxy->tcp_req.l4_rules) ||
+ !LIST_ISEMPTY(&curr_defproxy->tcp_req.l5_rules))) {
+ ha_alert("parsing [%s:%d]: a backend section cannot inherit from a defaults section defining"
+ " 'tcp-request connection' or 'tcp-request session' rules (defaults section at %s:%d).\n",
+ file, linenum, curr_defproxy->conf.file, curr_defproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ else if (!(rc & PR_CAP_BE) && !LIST_ISEMPTY(&curr_defproxy->tcp_rep.inspect_rules)) {
+ ha_alert("parsing [%s:%d]: a frontend section cannot inherit from a defaults section defining"
+ " 'tcp-response content' rules (defaults section at %s:%d).\n",
+ file, linenum, curr_defproxy->conf.file, curr_defproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ else {
+ curr_defproxy->cap = (curr_defproxy->cap & ~PR_CAP_LISTEN) | (rc & PR_CAP_LISTEN);
+ proxy_ref_defaults(curproxy, curr_defproxy);
+ }
+ }
+ }
+
+ if (curr_defproxy && (curr_defproxy->tcpcheck_rules.flags & TCPCHK_RULES_PROTO_CHK) &&
+ (curproxy->cap & PR_CAP_LISTEN) == PR_CAP_BE) {
+ /* If the current default proxy defines tcpcheck rules, the
+ * current proxy will keep a reference on it. but only if the
+ * current proxy has the backend capability.
+ */
+ proxy_ref_defaults(curproxy, curr_defproxy);
+ }
+
+ if ((rc & PR_CAP_BE) && curr_defproxy && (curr_defproxy->nb_req_cap || curr_defproxy->nb_rsp_cap)) {
+ ha_alert("parsing [%s:%d]: backend or defaults sections cannot inherit from a defaults section defining"
+ " capptures (defaults section at %s:%d).\n",
+ file, linenum, curr_defproxy->conf.file, curr_defproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+
+ if (rc & PR_CAP_DEF) {
+ /* last and current proxies must be updated to this one */
+ curr_defproxy = last_defproxy = curproxy;
+ } else {
+ /* regular proxies are in a list */
+ curproxy->next = proxies_list;
+ proxies_list = curproxy;
+ }
+ goto out;
+ }
+ else if (curproxy == NULL) {
+ ha_alert("parsing [%s:%d] : 'listen' or 'defaults' expected.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* update the current file and line being parsed */
+ curproxy->conf.args.file = curproxy->conf.file;
+ curproxy->conf.args.line = linenum;
+
+ /* Now let's parse the proxy-specific keywords */
+ if ((strcmp(args[0], "server") == 0)) {
+ err_code |= parse_server(file, linenum, args,
+ curproxy, curr_defproxy,
+ SRV_PARSE_PARSE_ADDR);
+
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[0], "default-server") == 0) {
+ err_code |= parse_server(file, linenum, args,
+ curproxy, curr_defproxy,
+ SRV_PARSE_DEFAULT_SERVER);
+
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[0], "server-template") == 0) {
+ err_code |= parse_server(file, linenum, args,
+ curproxy, curr_defproxy,
+ SRV_PARSE_TEMPLATE|SRV_PARSE_PARSE_ADDR);
+
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[0], "bind") == 0) { /* new listen addresses */
+ struct listener *l;
+ int cur_arg;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects {<path>|[addr1]:port1[-end1]}{,[addr]:port[-end]}... as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ bind_conf = bind_conf_alloc(curproxy, file, linenum, args[1], xprt_get(XPRT_RAW));
+ if (!bind_conf)
+ goto alloc_error;
+
+ /* use default settings for unix sockets */
+ bind_conf->settings.ux.uid = global.unix_bind.ux.uid;
+ bind_conf->settings.ux.gid = global.unix_bind.ux.gid;
+ bind_conf->settings.ux.mode = global.unix_bind.ux.mode;
+
+ /* NOTE: the following line might create several listeners if there
+ * are comma-separated IPs or port ranges. So all further processing
+ * will have to be applied to all listeners created after last_listen.
+ */
+ if (!str2listener(args[1], curproxy, bind_conf, file, linenum, &errmsg)) {
+ if (errmsg && *errmsg) {
+ indent_msg(&errmsg, 2);
+ ha_alert("parsing [%s:%d] : '%s' : %s\n", file, linenum, args[0], errmsg);
+ }
+ else
+ ha_alert("parsing [%s:%d] : '%s' : error encountered while parsing listening address '%s'.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ list_for_each_entry(l, &bind_conf->listeners, by_bind) {
+ /* Set default global rights and owner for unix bind */
+ global.maxsock++;
+ }
+
+ cur_arg = 2;
+ err_code |= bind_parse_args_list(bind_conf, args, cur_arg, cursection, file, linenum);
+ goto out;
+ }
+ else if (strcmp(args[0], "monitor-net") == 0) { /* set the range of IPs to ignore */
+ ha_alert("parsing [%s:%d] : 'monitor-net' doesn't exist anymore. Please use 'http-request return status 200 if { src %s }' instead.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "monitor-uri") == 0) { /* set the URI to intercept */
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an URI.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ istfree(&curproxy->monitor_uri);
+ curproxy->monitor_uri = istdup(ist(args[1]));
+ if (!isttest(curproxy->monitor_uri))
+ goto alloc_error;
+
+ goto out;
+ }
+ else if (strcmp(args[0], "mode") == 0) { /* sets the proxy mode */
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ if (strcmp(args[1], "http") == 0) curproxy->mode = PR_MODE_HTTP;
+ else if (strcmp(args[1], "tcp") == 0) curproxy->mode = PR_MODE_TCP;
+ else if (strcmp(args[1], "health") == 0) {
+ ha_alert("parsing [%s:%d] : 'mode health' doesn't exist anymore. Please use 'http-request return status 200' instead.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : unknown proxy mode '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "id") == 0) {
+ struct eb32_node *node;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d]: '%s' not allowed in 'defaults' section.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects an integer argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curproxy->uuid = atol(args[1]);
+ curproxy->conf.id.key = curproxy->uuid;
+ curproxy->options |= PR_O_FORCED_ID;
+
+ if (curproxy->uuid <= 0) {
+ ha_alert("parsing [%s:%d]: custom id has to be > 0.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ node = eb32_lookup(&used_proxy_id, curproxy->uuid);
+ if (node) {
+ struct proxy *target = container_of(node, struct proxy, conf.id);
+ ha_alert("parsing [%s:%d]: %s %s reuses same custom id as %s %s (declared at %s:%d).\n",
+ file, linenum, proxy_type_str(curproxy), curproxy->id,
+ proxy_type_str(target), target->id, target->conf.file, target->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ eb32_insert(&used_proxy_id, &curproxy->conf.id);
+ }
+ else if (strcmp(args[0], "description") == 0) {
+ int i, len=0;
+ char *d;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d]: '%s' not allowed in 'defaults' section.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects a string argument.\n",
+ file, linenum, args[0]);
+ return -1;
+ }
+
+ for (i = 1; *args[i]; i++)
+ len += strlen(args[i]) + 1;
+
+ d = calloc(1, len);
+ if (!d)
+ goto alloc_error;
+ curproxy->desc = d;
+
+ d += snprintf(d, curproxy->desc + len - d, "%s", args[1]);
+ for (i = 2; *args[i]; i++)
+ d += snprintf(d, curproxy->desc + len - d, " %s", args[i]);
+
+ }
+ else if (strcmp(args[0], "disabled") == 0) { /* disables this proxy */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ curproxy->flags |= PR_FL_DISABLED;
+ }
+ else if (strcmp(args[0], "enabled") == 0) { /* enables this proxy (used to revert a disabled default) */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ curproxy->flags &= ~PR_FL_DISABLED;
+ }
+ else if (strcmp(args[0], "bind-process") == 0) { /* enable this proxy only on some processes */
+ int cur_arg = 1;
+ unsigned long set = 0;
+
+ while (*args[cur_arg]) {
+ if (strcmp(args[cur_arg], "all") == 0) {
+ set = 0;
+ break;
+ }
+ if (parse_process_number(args[cur_arg], &set, 1, NULL, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ cur_arg++;
+ }
+ ha_warning("parsing [%s:%d]: '%s' has no effect, is deprecated, and will be removed in version 2.7.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+ else if (strcmp(args[0], "acl") == 0) { /* add an ACL */
+ if ((curproxy->cap & PR_CAP_DEF) && strlen(curproxy->id) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in anonymous 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in acl name '%s'.\n",
+ file, linenum, *err, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcasecmp(args[1], "or") == 0) {
+ ha_alert("parsing [%s:%d] : acl name '%s' will never match. 'or' is used to express a "
+ "logical disjunction within a condition.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (parse_acl((const char **)args + 1, &curproxy->acl, &errmsg, &curproxy->conf.args, file, linenum) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing ACL '%s' : %s.\n",
+ file, linenum, args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "dynamic-cookie-key") == 0) { /* Dynamic cookies secret key */
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects <secret_key> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->dyncookie_key);
+ curproxy->dyncookie_key = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "cookie") == 0) { /* cookie name */
+ int cur_arg;
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects <cookie_name> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curproxy->ck_opts = 0;
+ curproxy->cookie_maxidle = curproxy->cookie_maxlife = 0;
+ ha_free(&curproxy->cookie_domain);
+ free(curproxy->cookie_name);
+ curproxy->cookie_name = strdup(args[1]);
+ if (!curproxy->cookie_name)
+ goto alloc_error;
+ curproxy->cookie_len = strlen(curproxy->cookie_name);
+
+ cur_arg = 2;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "rewrite") == 0) {
+ curproxy->ck_opts |= PR_CK_RW;
+ }
+ else if (strcmp(args[cur_arg], "indirect") == 0) {
+ curproxy->ck_opts |= PR_CK_IND;
+ }
+ else if (strcmp(args[cur_arg], "insert") == 0) {
+ curproxy->ck_opts |= PR_CK_INS;
+ }
+ else if (strcmp(args[cur_arg], "nocache") == 0) {
+ curproxy->ck_opts |= PR_CK_NOC;
+ }
+ else if (strcmp(args[cur_arg], "postonly") == 0) {
+ curproxy->ck_opts |= PR_CK_POST;
+ }
+ else if (strcmp(args[cur_arg], "preserve") == 0) {
+ curproxy->ck_opts |= PR_CK_PSV;
+ }
+ else if (strcmp(args[cur_arg], "prefix") == 0) {
+ curproxy->ck_opts |= PR_CK_PFX;
+ }
+ else if (strcmp(args[cur_arg], "httponly") == 0) {
+ curproxy->ck_opts |= PR_CK_HTTPONLY;
+ }
+ else if (strcmp(args[cur_arg], "secure") == 0) {
+ curproxy->ck_opts |= PR_CK_SECURE;
+ }
+ else if (strcmp(args[cur_arg], "domain") == 0) {
+ if (!*args[cur_arg + 1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects <domain> as argument.\n",
+ file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!strchr(args[cur_arg + 1], '.')) {
+ /* rfc6265, 5.2.3 The Domain Attribute */
+ ha_warning("parsing [%s:%d]: domain '%s' contains no embedded dot,"
+ " this configuration may not work properly (see RFC6265#5.2.3).\n",
+ file, linenum, args[cur_arg + 1]);
+ err_code |= ERR_WARN;
+ }
+
+ err = invalid_domainchar(args[cur_arg + 1]);
+ if (err) {
+ ha_alert("parsing [%s:%d]: character '%c' is not permitted in domain name '%s'.\n",
+ file, linenum, *err, args[cur_arg + 1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!curproxy->cookie_domain) {
+ curproxy->cookie_domain = strdup(args[cur_arg + 1]);
+ } else {
+ /* one domain was already specified, add another one by
+ * building the string which will be returned along with
+ * the cookie.
+ */
+ memprintf(&curproxy->cookie_domain, "%s; domain=%s", curproxy->cookie_domain, args[cur_arg+1]);
+ }
+
+ if (!curproxy->cookie_domain)
+ goto alloc_error;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "maxidle") == 0) {
+ unsigned int maxidle;
+ const char *res;
+
+ if (!*args[cur_arg + 1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects <idletime> in seconds as argument.\n",
+ file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ res = parse_time_err(args[cur_arg + 1], &maxidle, TIME_UNIT_S);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s>, maximum value is 2147483647 s (~68 years).\n",
+ file, linenum, args[cur_arg+1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s>, minimum non-null value is 1 s.\n",
+ file, linenum, args[cur_arg+1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to <%s>.\n",
+ file, linenum, *res, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->cookie_maxidle = maxidle;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "maxlife") == 0) {
+ unsigned int maxlife;
+ const char *res;
+
+ if (!*args[cur_arg + 1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects <lifetime> in seconds as argument.\n",
+ file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+
+ res = parse_time_err(args[cur_arg + 1], &maxlife, TIME_UNIT_S);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s>, maximum value is 2147483647 s (~68 years).\n",
+ file, linenum, args[cur_arg+1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s>, minimum non-null value is 1 s.\n",
+ file, linenum, args[cur_arg+1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to <%s>.\n",
+ file, linenum, *res, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->cookie_maxlife = maxlife;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "dynamic") == 0) { /* Dynamic persistent cookies secret key */
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[cur_arg], NULL))
+ err_code |= ERR_WARN;
+ curproxy->ck_opts |= PR_CK_DYNAMIC;
+ }
+ else if (strcmp(args[cur_arg], "attr") == 0) {
+ char *val;
+ if (!*args[cur_arg + 1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects <value> as argument.\n",
+ file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ val = args[cur_arg + 1];
+ while (*val) {
+ if (iscntrl((unsigned char)*val) || *val == ';') {
+ ha_alert("parsing [%s:%d]: character '%%x%02X' is not permitted in attribute value.\n",
+ file, linenum, *val);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ val++;
+ }
+ /* don't add ';' for the first attribute */
+ if (!curproxy->cookie_attrs)
+ curproxy->cookie_attrs = strdup(args[cur_arg + 1]);
+ else
+ memprintf(&curproxy->cookie_attrs, "%s; %s", curproxy->cookie_attrs, args[cur_arg + 1]);
+
+ if (!curproxy->cookie_attrs)
+ goto alloc_error;
+ cur_arg++;
+ }
+
+ else {
+ ha_alert("parsing [%s:%d] : '%s' supports 'rewrite', 'insert', 'prefix', 'indirect', 'nocache', 'postonly', 'domain', 'maxidle', 'dynamic', 'maxlife' and 'attr' options.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ cur_arg++;
+ }
+ if (!POWEROF2(curproxy->ck_opts & (PR_CK_RW|PR_CK_IND))) {
+ ha_alert("parsing [%s:%d] : cookie 'rewrite' and 'indirect' modes are incompatible.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ if (!POWEROF2(curproxy->ck_opts & (PR_CK_RW|PR_CK_INS|PR_CK_PFX))) {
+ ha_alert("parsing [%s:%d] : cookie 'rewrite', 'insert' and 'prefix' modes are incompatible.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((curproxy->ck_opts & (PR_CK_PSV | PR_CK_INS | PR_CK_IND)) == PR_CK_PSV) {
+ ha_alert("parsing [%s:%d] : cookie 'preserve' requires at least 'insert' or 'indirect'.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }/* end else if (!strcmp(args[0], "cookie")) */
+ else if (strcmp(args[0], "email-alert") == 0) {
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : missing argument after '%s'.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[1], "from") == 0) {
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : missing argument after '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->email_alert.from);
+ curproxy->email_alert.from = strdup(args[2]);
+ if (!curproxy->email_alert.from)
+ goto alloc_error;
+ }
+ else if (strcmp(args[1], "mailers") == 0) {
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : missing argument after '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->email_alert.mailers.name);
+ curproxy->email_alert.mailers.name = strdup(args[2]);
+ if (!curproxy->email_alert.mailers.name)
+ goto alloc_error;
+ }
+ else if (strcmp(args[1], "myhostname") == 0) {
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : missing argument after '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->email_alert.myhostname);
+ curproxy->email_alert.myhostname = strdup(args[2]);
+ if (!curproxy->email_alert.myhostname)
+ goto alloc_error;
+ }
+ else if (strcmp(args[1], "level") == 0) {
+ curproxy->email_alert.level = get_log_level(args[2]);
+ if (curproxy->email_alert.level < 0) {
+ ha_alert("parsing [%s:%d] : unknown log level '%s' after '%s'\n",
+ file, linenum, args[1], args[2]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[1], "to") == 0) {
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : missing argument after '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->email_alert.to);
+ curproxy->email_alert.to = strdup(args[2]);
+ if (!curproxy->email_alert.to)
+ goto alloc_error;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : email-alert: unknown argument '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ /* Indicate that the email_alert is at least partially configured */
+ curproxy->email_alert.set = 1;
+ }/* end else if (!strcmp(args[0], "email-alert")) */
+ else if (strcmp(args[0], "persist") == 0) { /* persist */
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : missing persist method.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!strncmp(args[1], "rdp-cookie", 10)) {
+ curproxy->options2 |= PR_O2_RDPC_PRST;
+
+ if (*(args[1] + 10) == '(') { /* cookie name */
+ const char *beg, *end;
+
+ beg = args[1] + 11;
+ end = strchr(beg, ')');
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ if (!end || end == beg) {
+ ha_alert("parsing [%s:%d] : persist rdp-cookie(name)' requires an rdp cookie name.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ free(curproxy->rdp_cookie_name);
+ curproxy->rdp_cookie_name = my_strndup(beg, end - beg);
+ if (!curproxy->rdp_cookie_name)
+ goto alloc_error;
+ curproxy->rdp_cookie_len = end-beg;
+ }
+ else if (*(args[1] + 10) == '\0') { /* default cookie name 'msts' */
+ free(curproxy->rdp_cookie_name);
+ curproxy->rdp_cookie_name = strdup("msts");
+ if (!curproxy->rdp_cookie_name)
+ goto alloc_error;
+ curproxy->rdp_cookie_len = strlen(curproxy->rdp_cookie_name);
+ }
+ else { /* syntax */
+ ha_alert("parsing [%s:%d] : persist rdp-cookie(name)' requires an rdp cookie name.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else {
+ ha_alert("parsing [%s:%d] : unknown persist method.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "appsession") == 0) { /* cookie name */
+ ha_alert("parsing [%s:%d] : '%s' is not supported anymore since HAProxy 1.6.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "load-server-state-from-file") == 0) {
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+ if (strcmp(args[1], "global") == 0) { /* use the file pointed to by global server-state-file directive */
+ curproxy->load_server_state_from_file = PR_SRV_STATE_FILE_GLOBAL;
+ }
+ else if (strcmp(args[1], "local") == 0) { /* use the server-state-file-name variable to locate the server-state file */
+ curproxy->load_server_state_from_file = PR_SRV_STATE_FILE_LOCAL;
+ }
+ else if (strcmp(args[1], "none") == 0) { /* don't use server-state-file directive for this backend */
+ curproxy->load_server_state_from_file = PR_SRV_STATE_FILE_NONE;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'global', 'local' or 'none'. Got '%s'\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "server-state-file-name") == 0) {
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ ha_free(&curproxy->server_state_file_name);
+
+ if (*(args[1]) == 0 || strcmp(args[1], "use-backend-name") == 0)
+ curproxy->server_state_file_name = strdup(curproxy->id);
+ else
+ curproxy->server_state_file_name = strdup(args[1]);
+
+ if (!curproxy->server_state_file_name)
+ goto alloc_error;
+ }
+ else if (strcmp(args[0], "max-session-srv-conns") == 0) {
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+ if (*(args[1]) == 0) {
+ ha_alert("parsine [%s:%d] : '%s' expects a number. Got no argument\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->max_out_conns = atoi(args[1]);
+ }
+ else if (strcmp(args[0], "capture") == 0) {
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (strcmp(args[1], "cookie") == 0) { /* name of a cookie to capture */
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s %s' not allowed in 'defaults' section.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (alertif_too_many_args_idx(4, 1, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[4]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects 'cookie' <cookie_name> 'len' <len>.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->capture_name);
+ curproxy->capture_name = strdup(args[2]);
+ if (!curproxy->capture_name)
+ goto alloc_error;
+ curproxy->capture_namelen = strlen(curproxy->capture_name);
+ curproxy->capture_len = atol(args[4]);
+ curproxy->to_log |= LW_COOKIE;
+ }
+ else if (strcmp(args[1], "request") == 0 && strcmp(args[2], "header") == 0) {
+ struct cap_hdr *hdr;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s %s' not allowed in 'defaults' section.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (alertif_too_many_args_idx(4, 1, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[3]) == 0 || strcmp(args[4], "len") != 0 || *(args[5]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s %s' expects 'header' <header_name> 'len' <len>.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ hdr = calloc(1, sizeof(*hdr));
+ if (!hdr)
+ goto req_caphdr_alloc_error;
+ hdr->next = curproxy->req_cap;
+ hdr->name = strdup(args[3]);
+ if (!hdr->name)
+ goto req_caphdr_alloc_error;
+ hdr->namelen = strlen(args[3]);
+ hdr->len = atol(args[5]);
+ hdr->pool = create_pool("caphdr", hdr->len + 1, MEM_F_SHARED);
+ if (!hdr->pool) {
+ req_caphdr_alloc_error:
+ if (hdr)
+ ha_free(&hdr->name);
+ ha_free(&hdr);
+ goto alloc_error;
+ }
+ hdr->index = curproxy->nb_req_cap++;
+ curproxy->req_cap = hdr;
+ curproxy->to_log |= LW_REQHDR;
+ }
+ else if (strcmp(args[1], "response") == 0 && strcmp(args[2], "header") == 0) {
+ struct cap_hdr *hdr;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s %s' not allowed in 'defaults' section.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (alertif_too_many_args_idx(4, 1, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[3]) == 0 || strcmp(args[4], "len") != 0 || *(args[5]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s %s' expects 'header' <header_name> 'len' <len>.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ hdr = calloc(1, sizeof(*hdr));
+ if (!hdr)
+ goto res_caphdr_alloc_error;
+ hdr->next = curproxy->rsp_cap;
+ hdr->name = strdup(args[3]);
+ if (!hdr->name)
+ goto res_caphdr_alloc_error;
+ hdr->namelen = strlen(args[3]);
+ hdr->len = atol(args[5]);
+ hdr->pool = create_pool("caphdr", hdr->len + 1, MEM_F_SHARED);
+ if (!hdr->pool) {
+ res_caphdr_alloc_error:
+ if (hdr)
+ ha_free(&hdr->name);
+ ha_free(&hdr);
+ goto alloc_error;
+ }
+ hdr->index = curproxy->nb_rsp_cap++;
+ curproxy->rsp_cap = hdr;
+ curproxy->to_log |= LW_RSPHDR;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'cookie' or 'request header' or 'response header'.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "retries") == 0) { /* connection retries */
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument (dispatch counts for one).\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->conn_retries = atol(args[1]);
+ }
+ else if (strcmp(args[0], "http-request") == 0) { /* request access control: allow/deny/auth */
+ struct act_rule *rule;
+ int where = 0;
+
+ if ((curproxy->cap & PR_CAP_DEF) && strlen(curproxy->id) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in anonymous 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!LIST_ISEMPTY(&curproxy->http_req_rules) &&
+ !LIST_PREV(&curproxy->http_req_rules, struct act_rule *, list)->cond &&
+ (LIST_PREV(&curproxy->http_req_rules, struct act_rule *, list)->flags & ACT_FLAG_FINAL)) {
+ ha_warning("parsing [%s:%d]: previous '%s' action is final and has no condition attached, further entries are NOOP.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+
+ rule = parse_http_req_cond((const char **)args + 1, file, linenum, curproxy);
+
+ if (!rule) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ err_code |= warnif_misplaced_http_req(curproxy, file, linenum, args[0]);
+
+ if (curproxy->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (curproxy->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+ err_code |= warnif_cond_conflicts(rule->cond, where, file, linenum);
+
+ LIST_APPEND(&curproxy->http_req_rules, &rule->list);
+ }
+ else if (strcmp(args[0], "http-response") == 0) { /* response access control */
+ struct act_rule *rule;
+ int where = 0;
+
+ if ((curproxy->cap & PR_CAP_DEF) && strlen(curproxy->id) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in anonymous 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!LIST_ISEMPTY(&curproxy->http_res_rules) &&
+ !LIST_PREV(&curproxy->http_res_rules, struct act_rule *, list)->cond &&
+ (LIST_PREV(&curproxy->http_res_rules, struct act_rule *, list)->flags & ACT_FLAG_FINAL)) {
+ ha_warning("parsing [%s:%d]: previous '%s' action is final and has no condition attached, further entries are NOOP.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+
+ rule = parse_http_res_cond((const char **)args + 1, file, linenum, curproxy);
+
+ if (!rule) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (curproxy->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRS_HDR;
+ if (curproxy->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRS_HDR;
+ err_code |= warnif_cond_conflicts(rule->cond, where, file, linenum);
+
+ LIST_APPEND(&curproxy->http_res_rules, &rule->list);
+ }
+ else if (strcmp(args[0], "http-after-response") == 0) {
+ struct act_rule *rule;
+ int where = 0;
+ if ((curproxy->cap & PR_CAP_DEF) && strlen(curproxy->id) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in anonymous 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!LIST_ISEMPTY(&curproxy->http_after_res_rules) &&
+ !LIST_PREV(&curproxy->http_after_res_rules, struct act_rule *, list)->cond &&
+ (LIST_PREV(&curproxy->http_after_res_rules, struct act_rule *, list)->flags & ACT_FLAG_FINAL)) {
+ ha_warning("parsing [%s:%d]: previous '%s' action is final and has no condition attached, further entries are NOOP.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+
+ rule = parse_http_after_res_cond((const char **)args + 1, file, linenum, curproxy);
+
+ if (!rule) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (curproxy->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRS_HDR;
+ if (curproxy->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRS_HDR;
+ err_code |= warnif_cond_conflicts(rule->cond, where, file, linenum);
+
+ LIST_APPEND(&curproxy->http_after_res_rules, &rule->list);
+ }
+ else if (strcmp(args[0], "http-send-name-header") == 0) { /* send server name in request header */
+ /* set the header name and length into the proxy structure */
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' requires a header string.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* set the desired header name, in lower case */
+ istfree(&curproxy->server_id_hdr_name);
+ curproxy->server_id_hdr_name = istdup(ist(args[1]));
+ if (!isttest(curproxy->server_id_hdr_name))
+ goto alloc_error;
+ ist2bin_lc(istptr(curproxy->server_id_hdr_name), curproxy->server_id_hdr_name);
+ }
+ else if (strcmp(args[0], "block") == 0) {
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. Use 'http-request deny' which uses the exact same syntax.\n", file, linenum, args[0]);
+
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "redirect") == 0) {
+ struct redirect_rule *rule;
+ int where = 0;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((rule = http_parse_redirect_rule(file, linenum, curproxy, (const char **)args + 1, &errmsg, 0, 0)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected in %s '%s' while parsing redirect rule : %s.\n",
+ file, linenum, proxy_type_str(curproxy), curproxy->id, errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ LIST_APPEND(&curproxy->redirect_rules, &rule->list);
+ err_code |= warnif_misplaced_redirect(curproxy, file, linenum, args[0]);
+
+ if (curproxy->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (curproxy->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+ err_code |= warnif_cond_conflicts(rule->cond, where, file, linenum);
+ }
+ else if (strcmp(args[0], "use_backend") == 0) {
+ struct switching_rule *rule;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a backend name.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[2], "if") == 0 || strcmp(args[2], "unless") == 0) {
+ if ((cond = build_acl_cond(file, linenum, &curproxy->acl, curproxy, (const char **)args + 2, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing switching rule : %s.\n",
+ file, linenum, errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err_code |= warnif_cond_conflicts(cond, SMP_VAL_FE_SET_BCK, file, linenum);
+ }
+ else if (*args[2]) {
+ ha_alert("parsing [%s:%d] : unexpected keyword '%s' after switching rule, only 'if' and 'unless' are allowed.\n",
+ file, linenum, args[2]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ rule = calloc(1, sizeof(*rule));
+ if (!rule)
+ goto use_backend_alloc_error;
+ rule->cond = cond;
+ rule->be.name = strdup(args[1]);
+ if (!rule->be.name)
+ goto use_backend_alloc_error;
+ rule->line = linenum;
+ rule->file = strdup(file);
+ if (!rule->file) {
+ use_backend_alloc_error:
+ if (cond)
+ prune_acl_cond(cond);
+ ha_free(&cond);
+ if (rule)
+ ha_free(&(rule->be.name));
+ ha_free(&rule);
+ goto alloc_error;
+ }
+ LIST_INIT(&rule->list);
+ LIST_APPEND(&curproxy->switching_rules, &rule->list);
+ }
+ else if (strcmp(args[0], "use-server") == 0) {
+ struct server_rule *rule;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a server name.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[2], "if") != 0 && strcmp(args[2], "unless") != 0) {
+ ha_alert("parsing [%s:%d] : '%s' requires either 'if' or 'unless' followed by a condition.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((cond = build_acl_cond(file, linenum, &curproxy->acl, curproxy, (const char **)args + 2, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing switching rule : %s.\n",
+ file, linenum, errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err_code |= warnif_cond_conflicts(cond, SMP_VAL_BE_SET_SRV, file, linenum);
+
+ rule = calloc(1, sizeof(*rule));
+ if (!rule)
+ goto use_server_alloc_error;
+ rule->cond = cond;
+ rule->srv.name = strdup(args[1]);
+ if (!rule->srv.name)
+ goto use_server_alloc_error;
+ rule->line = linenum;
+ rule->file = strdup(file);
+ if (!rule->file) {
+ use_server_alloc_error:
+ if (cond)
+ prune_acl_cond(cond);
+ ha_free(&cond);
+ if (rule)
+ ha_free(&(rule->srv.name));
+ ha_free(&rule);
+ goto alloc_error;
+ }
+ LIST_INIT(&rule->list);
+ LIST_APPEND(&curproxy->server_rules, &rule->list);
+ curproxy->be_req_ana |= AN_REQ_SRV_RULES;
+ }
+ else if ((strcmp(args[0], "force-persist") == 0) ||
+ (strcmp(args[0], "ignore-persist") == 0)) {
+ struct persist_rule *rule;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (strcmp(args[1], "if") != 0 && strcmp(args[1], "unless") != 0) {
+ ha_alert("parsing [%s:%d] : '%s' requires either 'if' or 'unless' followed by a condition.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((cond = build_acl_cond(file, linenum, &curproxy->acl, curproxy, (const char **)args + 1, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing a '%s' rule : %s.\n",
+ file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* note: BE_REQ_CNT is the first one after FE_SET_BCK, which is
+ * where force-persist is applied.
+ */
+ err_code |= warnif_cond_conflicts(cond, SMP_VAL_BE_REQ_CNT, file, linenum);
+
+ rule = calloc(1, sizeof(*rule));
+ if (!rule) {
+ if (cond)
+ prune_acl_cond(cond);
+ ha_free(&cond);
+ goto alloc_error;
+ }
+ rule->cond = cond;
+ if (strcmp(args[0], "force-persist") == 0) {
+ rule->type = PERSIST_TYPE_FORCE;
+ } else {
+ rule->type = PERSIST_TYPE_IGNORE;
+ }
+ LIST_INIT(&rule->list);
+ LIST_APPEND(&curproxy->persist_rules, &rule->list);
+ }
+ else if (strcmp(args[0], "stick-table") == 0) {
+ struct stktable *other;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : 'stick-table' is not supported in 'defaults' section.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ other = stktable_find_by_name(curproxy->id);
+ if (other) {
+ ha_alert("parsing [%s:%d] : stick-table name '%s' conflicts with table declared in %s '%s' at %s:%d.\n",
+ file, linenum, curproxy->id,
+ other->proxy ? proxy_cap_str(other->proxy->cap) : "peers",
+ other->proxy ? other->id : other->peers.p->id,
+ other->conf.file, other->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curproxy->table = calloc(1, sizeof *curproxy->table);
+ if (!curproxy->table) {
+ ha_alert("parsing [%s:%d]: '%s %s' : memory allocation failed\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err_code |= parse_stick_table(file, linenum, args, curproxy->table,
+ curproxy->id, curproxy->id, NULL);
+ if (err_code & ERR_FATAL) {
+ ha_free(&curproxy->table);
+ goto out;
+ }
+
+ /* Store the proxy in the stick-table. */
+ curproxy->table->proxy = curproxy;
+
+ stktable_store_name(curproxy->table);
+ curproxy->table->next = stktables_list;
+ stktables_list = curproxy->table;
+
+ /* Add this proxy to the list of proxies which refer to its stick-table. */
+ if (curproxy->table->proxies_list != curproxy) {
+ curproxy->next_stkt_ref = curproxy->table->proxies_list;
+ curproxy->table->proxies_list = curproxy;
+ }
+ }
+ else if (strcmp(args[0], "stick") == 0) {
+ struct sticking_rule *rule;
+ struct sample_expr *expr;
+ int myidx = 0;
+ const char *name = NULL;
+ int flags;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL)) {
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ myidx++;
+ if ((strcmp(args[myidx], "store") == 0) ||
+ (strcmp(args[myidx], "store-request") == 0)) {
+ myidx++;
+ flags = STK_IS_STORE;
+ }
+ else if (strcmp(args[myidx], "store-response") == 0) {
+ myidx++;
+ flags = STK_IS_STORE | STK_ON_RSP;
+ }
+ else if (strcmp(args[myidx], "match") == 0) {
+ myidx++;
+ flags = STK_IS_MATCH;
+ }
+ else if (strcmp(args[myidx], "on") == 0) {
+ myidx++;
+ flags = STK_IS_MATCH | STK_IS_STORE;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'on', 'match', or 'store'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (*(args[myidx]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a fetch method.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curproxy->conf.args.ctx = ARGC_STK;
+ expr = sample_parse_expr(args, &myidx, file, linenum, &errmsg, &curproxy->conf.args, NULL);
+ if (!expr) {
+ ha_alert("parsing [%s:%d] : '%s': %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (flags & STK_ON_RSP) {
+ if (!(expr->fetch->val & SMP_VAL_BE_STO_RUL)) {
+ ha_alert("parsing [%s:%d] : '%s': fetch method '%s' extracts information from '%s', none of which is available for 'store-response'.\n",
+ file, linenum, args[0], expr->fetch->kw, sample_src_names(expr->fetch->use));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(expr);
+ goto out;
+ }
+ } else {
+ if (!(expr->fetch->val & SMP_VAL_BE_SET_SRV)) {
+ ha_alert("parsing [%s:%d] : '%s': fetch method '%s' extracts information from '%s', none of which is available during request.\n",
+ file, linenum, args[0], expr->fetch->kw, sample_src_names(expr->fetch->use));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(expr);
+ goto out;
+ }
+ }
+
+ /* check if we need to allocate an http_txn struct for HTTP parsing */
+ curproxy->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY);
+
+ if (strcmp(args[myidx], "table") == 0) {
+ myidx++;
+ name = args[myidx++];
+ }
+
+ if (strcmp(args[myidx], "if") == 0 || strcmp(args[myidx], "unless") == 0) {
+ if ((cond = build_acl_cond(file, linenum, &curproxy->acl, curproxy, (const char **)args + myidx, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : '%s': error detected while parsing sticking condition : %s.\n",
+ file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(expr);
+ goto out;
+ }
+ }
+ else if (*(args[myidx])) {
+ ha_alert("parsing [%s:%d] : '%s': unknown keyword '%s'.\n",
+ file, linenum, args[0], args[myidx]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(expr);
+ goto out;
+ }
+ if (flags & STK_ON_RSP)
+ err_code |= warnif_cond_conflicts(cond, SMP_VAL_BE_STO_RUL, file, linenum);
+ else
+ err_code |= warnif_cond_conflicts(cond, SMP_VAL_BE_SET_SRV, file, linenum);
+
+ rule = calloc(1, sizeof(*rule));
+ if (!rule) {
+ if (cond)
+ prune_acl_cond(cond);
+ ha_free(&cond);
+ goto alloc_error;
+ }
+ rule->cond = cond;
+ rule->expr = expr;
+ rule->flags = flags;
+ rule->table.name = name ? strdup(name) : NULL;
+ LIST_INIT(&rule->list);
+ if (flags & STK_ON_RSP)
+ LIST_APPEND(&curproxy->storersp_rules, &rule->list);
+ else
+ LIST_APPEND(&curproxy->sticking_rules, &rule->list);
+ }
+ else if (strcmp(args[0], "stats") == 0) {
+ if (!(curproxy->cap & PR_CAP_DEF) && curproxy->uri_auth == curr_defproxy->uri_auth)
+ curproxy->uri_auth = NULL; /* we must detach from the default config */
+
+ if (!*args[1]) {
+ goto stats_error_parsing;
+ } else if (strcmp(args[1], "admin") == 0) {
+ struct stats_admin_rule *rule;
+ int where = 0;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d]: '%s %s' not allowed in 'defaults' section.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!stats_check_init_uri_auth(&curproxy->uri_auth))
+ goto alloc_error;
+
+ if (strcmp(args[2], "if") != 0 && strcmp(args[2], "unless") != 0) {
+ ha_alert("parsing [%s:%d] : '%s %s' requires either 'if' or 'unless' followed by a condition.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if ((cond = build_acl_cond(file, linenum, &curproxy->acl, curproxy, (const char **)args + 2, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing a '%s %s' rule : %s.\n",
+ file, linenum, args[0], args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (curproxy->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (curproxy->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+ err_code |= warnif_cond_conflicts(cond, where, file, linenum);
+
+ rule = calloc(1, sizeof(*rule));
+ if (!rule) {
+ if (cond)
+ prune_acl_cond(cond);
+ ha_free(&cond);
+ goto alloc_error;
+ }
+ rule->cond = cond;
+ LIST_INIT(&rule->list);
+ LIST_APPEND(&curproxy->uri_auth->admin_rules, &rule->list);
+ } else if (strcmp(args[1], "uri") == 0) {
+ if (*(args[2]) == 0) {
+ ha_alert("parsing [%s:%d] : 'uri' needs an URI prefix.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ } else if (!stats_set_uri(&curproxy->uri_auth, args[2]))
+ goto alloc_error;
+ } else if (strcmp(args[1], "realm") == 0) {
+ if (*(args[2]) == 0) {
+ ha_alert("parsing [%s:%d] : 'realm' needs an realm name.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ } else if (!stats_set_realm(&curproxy->uri_auth, args[2]))
+ goto alloc_error;
+ } else if (strcmp(args[1], "refresh") == 0) {
+ unsigned interval;
+
+ err = parse_time_err(args[2], &interval, TIME_UNIT_S);
+ if (err == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to stats refresh interval, maximum value is 2147483647 s (~68 years).\n",
+ file, linenum, args[2]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (err == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to stats refresh interval, minimum non-null value is 1 s.\n",
+ file, linenum, args[2]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (err) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to stats refresh interval.\n",
+ file, linenum, *err);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ } else if (!stats_set_refresh(&curproxy->uri_auth, interval))
+ goto alloc_error;
+ } else if (strcmp(args[1], "http-request") == 0) { /* request access control: allow/deny/auth */
+ struct act_rule *rule;
+ int where = 0;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d]: '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!stats_check_init_uri_auth(&curproxy->uri_auth))
+ goto alloc_error;
+
+ if (!LIST_ISEMPTY(&curproxy->uri_auth->http_req_rules) &&
+ !LIST_PREV(&curproxy->uri_auth->http_req_rules, struct act_rule *, list)->cond) {
+ ha_warning("parsing [%s:%d]: previous '%s' action has no condition attached, further entries are NOOP.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+
+ rule = parse_http_req_cond((const char **)args + 2, file, linenum, curproxy);
+
+ if (!rule) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (curproxy->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (curproxy->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+ err_code |= warnif_cond_conflicts(rule->cond, where, file, linenum);
+ LIST_APPEND(&curproxy->uri_auth->http_req_rules, &rule->list);
+
+ } else if (strcmp(args[1], "auth") == 0) {
+ if (*(args[2]) == 0) {
+ ha_alert("parsing [%s:%d] : 'auth' needs a user:password account.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ } else if (!stats_add_auth(&curproxy->uri_auth, args[2]))
+ goto alloc_error;
+ } else if (strcmp(args[1], "scope") == 0) {
+ if (*(args[2]) == 0) {
+ ha_alert("parsing [%s:%d] : 'scope' needs a proxy name.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ } else if (!stats_add_scope(&curproxy->uri_auth, args[2]))
+ goto alloc_error;
+ } else if (strcmp(args[1], "enable") == 0) {
+ if (!stats_check_init_uri_auth(&curproxy->uri_auth))
+ goto alloc_error;
+ } else if (strcmp(args[1], "hide-version") == 0) {
+ if (!stats_set_flag(&curproxy->uri_auth, STAT_HIDEVER))
+ goto alloc_error;
+ } else if (strcmp(args[1], "show-legends") == 0) {
+ if (!stats_set_flag(&curproxy->uri_auth, STAT_SHLGNDS))
+ goto alloc_error;
+ } else if (strcmp(args[1], "show-modules") == 0) {
+ if (!stats_set_flag(&curproxy->uri_auth, STAT_SHMODULES))
+ goto alloc_error;
+ } else if (strcmp(args[1], "show-node") == 0) {
+
+ if (*args[2]) {
+ int i;
+ char c;
+
+ for (i=0; args[2][i]; i++) {
+ c = args[2][i];
+ if (!isupper((unsigned char)c) && !islower((unsigned char)c) &&
+ !isdigit((unsigned char)c) && c != '_' && c != '-' && c != '.')
+ break;
+ }
+
+ if (!i || args[2][i]) {
+ ha_alert("parsing [%s:%d]: '%s %s' invalid node name - should be a string"
+ "with digits(0-9), letters(A-Z, a-z), hyphen(-) or underscode(_).\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ if (!stats_set_node(&curproxy->uri_auth, args[2]))
+ goto alloc_error;
+ } else if (strcmp(args[1], "show-desc") == 0) {
+ char *desc = NULL;
+
+ if (*args[2]) {
+ int i, len=0;
+ char *d;
+
+ for (i = 2; *args[i]; i++)
+ len += strlen(args[i]) + 1;
+
+ desc = d = calloc(1, len);
+
+ d += snprintf(d, desc + len - d, "%s", args[2]);
+ for (i = 3; *args[i]; i++)
+ d += snprintf(d, desc + len - d, " %s", args[i]);
+ }
+
+ if (!*args[2] && !global.desc)
+ ha_warning("parsing [%s:%d]: '%s' requires a parameter or 'desc' to be set in the global section.\n",
+ file, linenum, args[1]);
+ else {
+ if (!stats_set_desc(&curproxy->uri_auth, desc)) {
+ free(desc);
+ goto alloc_error;
+ }
+ free(desc);
+ }
+ } else {
+stats_error_parsing:
+ ha_alert("parsing [%s:%d]: %s '%s', expects 'admin', 'uri', 'realm', 'auth', 'scope', 'enable', 'hide-version', 'show-node', 'show-desc' or 'show-legends'.\n",
+ file, linenum, *args[1]?"unknown stats parameter":"missing keyword in", args[*args[1]?1:0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "option") == 0) {
+ int optnum;
+
+ if (*(args[1]) == '\0') {
+ ha_alert("parsing [%s:%d]: '%s' expects an option name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ for (optnum = 0; cfg_opts[optnum].name; optnum++) {
+ if (strcmp(args[1], cfg_opts[optnum].name) == 0) {
+ if (cfg_opts[optnum].cap == PR_CAP_NONE) {
+ ha_alert("parsing [%s:%d]: option '%s' is not supported due to build options.\n",
+ file, linenum, cfg_opts[optnum].name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+
+ if (warnifnotcap(curproxy, cfg_opts[optnum].cap, file, linenum, args[1], NULL)) {
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ curproxy->no_options &= ~cfg_opts[optnum].val;
+ curproxy->options &= ~cfg_opts[optnum].val;
+
+ switch (kwm) {
+ case KWM_STD:
+ curproxy->options |= cfg_opts[optnum].val;
+ break;
+ case KWM_NO:
+ curproxy->no_options |= cfg_opts[optnum].val;
+ break;
+ case KWM_DEF: /* already cleared */
+ break;
+ }
+
+ goto out;
+ }
+ }
+
+ for (optnum = 0; cfg_opts2[optnum].name; optnum++) {
+ if (strcmp(args[1], cfg_opts2[optnum].name) == 0) {
+ if (cfg_opts2[optnum].cap == PR_CAP_NONE) {
+ ha_alert("parsing [%s:%d]: option '%s' is not supported due to build options.\n",
+ file, linenum, cfg_opts2[optnum].name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ if (warnifnotcap(curproxy, cfg_opts2[optnum].cap, file, linenum, args[1], NULL)) {
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ curproxy->no_options2 &= ~cfg_opts2[optnum].val;
+ curproxy->options2 &= ~cfg_opts2[optnum].val;
+
+ switch (kwm) {
+ case KWM_STD:
+ curproxy->options2 |= cfg_opts2[optnum].val;
+ break;
+ case KWM_NO:
+ curproxy->no_options2 |= cfg_opts2[optnum].val;
+ break;
+ case KWM_DEF: /* already cleared */
+ break;
+ }
+ goto out;
+ }
+ }
+
+ /* HTTP options override each other. They can be cancelled using
+ * "no option xxx" which only switches to default mode if the mode
+ * was this one (useful for cancelling options set in defaults
+ * sections).
+ */
+ if (strcmp(args[1], "forceclose") == 0) {
+ ha_alert("parsing [%s:%d]: option '%s' is not supported any more since HAProxy 2.0, please just remove it, or use 'option httpclose' if absolutely needed.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[1], "httpclose") == 0) {
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_STD) {
+ curproxy->options &= ~PR_O_HTTP_MODE;
+ curproxy->options |= PR_O_HTTP_CLO;
+ goto out;
+ }
+ else if (kwm == KWM_NO) {
+ if ((curproxy->options & PR_O_HTTP_MODE) == PR_O_HTTP_CLO)
+ curproxy->options &= ~PR_O_HTTP_MODE;
+ goto out;
+ }
+ }
+ else if (strcmp(args[1], "http-server-close") == 0) {
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_STD) {
+ curproxy->options &= ~PR_O_HTTP_MODE;
+ curproxy->options |= PR_O_HTTP_SCL;
+ goto out;
+ }
+ else if (kwm == KWM_NO) {
+ if ((curproxy->options & PR_O_HTTP_MODE) == PR_O_HTTP_SCL)
+ curproxy->options &= ~PR_O_HTTP_MODE;
+ goto out;
+ }
+ }
+ else if (strcmp(args[1], "http-keep-alive") == 0) {
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_STD) {
+ curproxy->options &= ~PR_O_HTTP_MODE;
+ curproxy->options |= PR_O_HTTP_KAL;
+ goto out;
+ }
+ else if (kwm == KWM_NO) {
+ if ((curproxy->options & PR_O_HTTP_MODE) == PR_O_HTTP_KAL)
+ curproxy->options &= ~PR_O_HTTP_MODE;
+ goto out;
+ }
+ }
+ else if (strcmp(args[1], "http-tunnel") == 0) {
+ ha_alert("parsing [%s:%d]: option '%s' is not supported any more since HAProxy 2.1, please just remove it, it shouldn't be needed.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* Redispatch can take an integer argument that control when the
+ * resispatch occurs. All values are relative to the retries option.
+ * This can be cancelled using "no option xxx".
+ */
+ if (strcmp(args[1], "redispatch") == 0) {
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[1], NULL)) {
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ curproxy->no_options &= ~PR_O_REDISP;
+ curproxy->options &= ~PR_O_REDISP;
+
+ switch (kwm) {
+ case KWM_STD:
+ curproxy->options |= PR_O_REDISP;
+ curproxy->redispatch_after = -1;
+ if(*args[2]) {
+ curproxy->redispatch_after = atol(args[2]);
+ }
+ break;
+ case KWM_NO:
+ curproxy->no_options |= PR_O_REDISP;
+ curproxy->redispatch_after = 0;
+ break;
+ case KWM_DEF: /* already cleared */
+ break;
+ }
+ goto out;
+ }
+
+ if (strcmp(args[1], "http_proxy") == 0) {
+ ha_alert("parsing [%s:%d]: option '%s' is not supported any more since HAProxy 2.5. This option stopped working in HAProxy 1.9 and usually had nasty side effects. It can be more reliably implemented with combinations of 'http-request set-dst' and 'http-request set-uri', and even 'http-request do-resolve' if DNS resolution is desired.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (kwm != KWM_STD) {
+ ha_alert("parsing [%s:%d]: negation/default is not supported for option '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[1], "httplog") == 0) {
+ char *logformat;
+ /* generate a complete HTTP log */
+ logformat = default_http_log_format;
+ if (*(args[2]) != '\0') {
+ if (strcmp(args[2], "clf") == 0) {
+ curproxy->options2 |= PR_O2_CLFLOG;
+ logformat = clf_http_log_format;
+ } else {
+ ha_alert("parsing [%s:%d] : keyword '%s' only supports option 'clf'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args_idx(1, 1, file, linenum, args, &err_code))
+ goto out;
+ }
+ if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) {
+ char *oldlogformat = "log-format";
+ char *clflogformat = "";
+
+ if (curproxy->conf.logformat_string == default_http_log_format)
+ oldlogformat = "option httplog";
+ else if (curproxy->conf.logformat_string == default_tcp_log_format)
+ oldlogformat = "option tcplog";
+ else if (curproxy->conf.logformat_string == clf_http_log_format)
+ oldlogformat = "option httplog clf";
+ else if (curproxy->conf.logformat_string == default_https_log_format)
+ oldlogformat = "option httpslog";
+ if (logformat == clf_http_log_format)
+ clflogformat = " clf";
+ ha_warning("parsing [%s:%d]: 'option httplog%s' overrides previous '%s' in 'defaults' section.\n",
+ file, linenum, clflogformat, oldlogformat);
+ }
+ if (curproxy->conf.logformat_string != default_http_log_format &&
+ curproxy->conf.logformat_string != default_tcp_log_format &&
+ curproxy->conf.logformat_string != clf_http_log_format &&
+ curproxy->conf.logformat_string != default_https_log_format)
+ free(curproxy->conf.logformat_string);
+ curproxy->conf.logformat_string = logformat;
+
+ free(curproxy->conf.lfs_file);
+ curproxy->conf.lfs_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.lfs_line = curproxy->conf.args.line;
+
+ if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) {
+ ha_warning("parsing [%s:%d] : backend '%s' : 'option httplog' directive is ignored in backends.\n",
+ file, linenum, curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ else if (strcmp(args[1], "tcplog") == 0) {
+ if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) {
+ char *oldlogformat = "log-format";
+
+ if (curproxy->conf.logformat_string == default_http_log_format)
+ oldlogformat = "option httplog";
+ else if (curproxy->conf.logformat_string == default_tcp_log_format)
+ oldlogformat = "option tcplog";
+ else if (curproxy->conf.logformat_string == clf_http_log_format)
+ oldlogformat = "option httplog clf";
+ else if (curproxy->conf.logformat_string == default_https_log_format)
+ oldlogformat = "option httpslog";
+ ha_warning("parsing [%s:%d]: 'option tcplog' overrides previous '%s' in 'defaults' section.\n",
+ file, linenum, oldlogformat);
+ }
+ /* generate a detailed TCP log */
+ if (curproxy->conf.logformat_string != default_http_log_format &&
+ curproxy->conf.logformat_string != default_tcp_log_format &&
+ curproxy->conf.logformat_string != clf_http_log_format &&
+ curproxy->conf.logformat_string != default_https_log_format)
+ free(curproxy->conf.logformat_string);
+ curproxy->conf.logformat_string = default_tcp_log_format;
+
+ free(curproxy->conf.lfs_file);
+ curproxy->conf.lfs_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.lfs_line = curproxy->conf.args.line;
+
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+
+ if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) {
+ ha_warning("parsing [%s:%d] : backend '%s' : 'option tcplog' directive is ignored in backends.\n",
+ file, linenum, curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ else if (strcmp(args[1], "httpslog") == 0) {
+ char *logformat;
+ /* generate a complete HTTP log */
+ logformat = default_https_log_format;
+ if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) {
+ char *oldlogformat = "log-format";
+
+ if (curproxy->conf.logformat_string == default_http_log_format)
+ oldlogformat = "option httplog";
+ else if (curproxy->conf.logformat_string == default_tcp_log_format)
+ oldlogformat = "option tcplog";
+ else if (curproxy->conf.logformat_string == clf_http_log_format)
+ oldlogformat = "option httplog clf";
+ else if (curproxy->conf.logformat_string == default_https_log_format)
+ oldlogformat = "option httpslog";
+ ha_warning("parsing [%s:%d]: 'option httplog' overrides previous '%s' in 'defaults' section.\n",
+ file, linenum, oldlogformat);
+ }
+ if (curproxy->conf.logformat_string != default_http_log_format &&
+ curproxy->conf.logformat_string != default_tcp_log_format &&
+ curproxy->conf.logformat_string != clf_http_log_format &&
+ curproxy->conf.logformat_string != default_https_log_format)
+ free(curproxy->conf.logformat_string);
+ curproxy->conf.logformat_string = logformat;
+
+ free(curproxy->conf.lfs_file);
+ curproxy->conf.lfs_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.lfs_line = curproxy->conf.args.line;
+
+ if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) {
+ ha_warning("parsing [%s:%d] : backend '%s' : 'option httpslog' directive is ignored in backends.\n",
+ file, linenum, curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ else if (strcmp(args[1], "tcpka") == 0) {
+ /* enable TCP keep-alives on client and server streams */
+ if (warnifnotcap(curproxy, PR_CAP_BE | PR_CAP_FE, file, linenum, args[1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+
+ if (curproxy->cap & PR_CAP_FE)
+ curproxy->options |= PR_O_TCP_CLI_KA;
+ if (curproxy->cap & PR_CAP_BE)
+ curproxy->options |= PR_O_TCP_SRV_KA;
+ }
+ else if (strcmp(args[1], "httpchk") == 0) {
+ err_code |= proxy_parse_httpchk_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "ssl-hello-chk") == 0) {
+ err_code |= proxy_parse_ssl_hello_chk_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "smtpchk") == 0) {
+ err_code |= proxy_parse_smtpchk_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "pgsql-check") == 0) {
+ err_code |= proxy_parse_pgsql_check_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "redis-check") == 0) {
+ err_code |= proxy_parse_redis_check_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "mysql-check") == 0) {
+ err_code |= proxy_parse_mysql_check_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "ldap-check") == 0) {
+ err_code |= proxy_parse_ldap_check_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "spop-check") == 0) {
+ err_code |= proxy_parse_spop_check_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "tcp-check") == 0) {
+ err_code |= proxy_parse_tcp_check_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "external-check") == 0) {
+ err_code |= proxy_parse_external_check_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "forwardfor") == 0) {
+ int cur_arg;
+
+ /* insert x-forwarded-for field, but not for the IP address listed as an except.
+ * set default options (ie: bitfield, header name, etc)
+ */
+
+ curproxy->options |= PR_O_FWDFOR | PR_O_FF_ALWAYS;
+
+ istfree(&curproxy->fwdfor_hdr_name);
+ curproxy->fwdfor_hdr_name = istdup(ist(DEF_XFORWARDFOR_HDR));
+ if (!isttest(curproxy->fwdfor_hdr_name))
+ goto alloc_error;
+ curproxy->except_xff_net.family = AF_UNSPEC;
+
+ /* loop to go through arguments - start at 2, since 0+1 = "option" "forwardfor" */
+ cur_arg = 2;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "except") == 0) {
+ unsigned char mask;
+ int i;
+
+ /* suboption except - needs additional argument for it */
+ if (*(args[cur_arg+1]) &&
+ str2net(args[cur_arg+1], 1, &curproxy->except_xff_net.addr.v4.ip, &curproxy->except_xff_net.addr.v4.mask)) {
+ curproxy->except_xff_net.family = AF_INET;
+ curproxy->except_xff_net.addr.v4.ip.s_addr &= curproxy->except_xff_net.addr.v4.mask.s_addr;
+ }
+ else if (*(args[cur_arg+1]) &&
+ str62net(args[cur_arg+1], &curproxy->except_xff_net.addr.v6.ip, &mask)) {
+ curproxy->except_xff_net.family = AF_INET6;
+ len2mask6(mask, &curproxy->except_xff_net.addr.v6.mask);
+ for (i = 0; i < 16; i++)
+ curproxy->except_xff_net.addr.v6.ip.s6_addr[i] &= curproxy->except_xff_net.addr.v6.mask.s6_addr[i];
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s %s %s' expects <address>[/mask] as argument.\n",
+ file, linenum, args[0], args[1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ /* flush useless bits */
+ cur_arg += 2;
+ } else if (strcmp(args[cur_arg], "header") == 0) {
+ /* suboption header - needs additional argument for it */
+ if (*(args[cur_arg+1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s %s %s' expects <header_name> as argument.\n",
+ file, linenum, args[0], args[1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ istfree(&curproxy->fwdfor_hdr_name);
+ curproxy->fwdfor_hdr_name = istdup(ist(args[cur_arg+1]));
+ if (!isttest(curproxy->fwdfor_hdr_name))
+ goto alloc_error;
+ cur_arg += 2;
+ } else if (strcmp(args[cur_arg], "if-none") == 0) {
+ curproxy->options &= ~PR_O_FF_ALWAYS;
+ cur_arg += 1;
+ } else {
+ /* unknown suboption - catchall */
+ ha_alert("parsing [%s:%d] : '%s %s' only supports optional values: 'except', 'header' and 'if-none'.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ } /* end while loop */
+ }
+ else if (strcmp(args[1], "originalto") == 0) {
+ int cur_arg;
+
+ /* insert x-original-to field, but not for the IP address listed as an except.
+ * set default options (ie: bitfield, header name, etc)
+ */
+
+ curproxy->options |= PR_O_ORGTO;
+
+ istfree(&curproxy->orgto_hdr_name);
+ curproxy->orgto_hdr_name = istdup(ist(DEF_XORIGINALTO_HDR));
+ if (!isttest(curproxy->orgto_hdr_name))
+ goto alloc_error;
+ curproxy->except_xot_net.family = AF_UNSPEC;
+
+ /* loop to go through arguments - start at 2, since 0+1 = "option" "originalto" */
+ cur_arg = 2;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "except") == 0) {
+ unsigned char mask;
+ int i;
+
+ /* suboption except - needs additional argument for it */
+ if (*(args[cur_arg+1]) &&
+ str2net(args[cur_arg+1], 1, &curproxy->except_xot_net.addr.v4.ip, &curproxy->except_xot_net.addr.v4.mask)) {
+ curproxy->except_xot_net.family = AF_INET;
+ curproxy->except_xot_net.addr.v4.ip.s_addr &= curproxy->except_xot_net.addr.v4.mask.s_addr;
+ }
+ else if (*(args[cur_arg+1]) &&
+ str62net(args[cur_arg+1], &curproxy->except_xot_net.addr.v6.ip, &mask)) {
+ curproxy->except_xot_net.family = AF_INET6;
+ len2mask6(mask, &curproxy->except_xot_net.addr.v6.mask);
+ for (i = 0; i < 16; i++)
+ curproxy->except_xot_net.addr.v6.ip.s6_addr[i] &= curproxy->except_xot_net.addr.v6.mask.s6_addr[i];
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s %s %s' expects <address>[/mask] as argument.\n",
+ file, linenum, args[0], args[1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ cur_arg += 2;
+ } else if (strcmp(args[cur_arg], "header") == 0) {
+ /* suboption header - needs additional argument for it */
+ if (*(args[cur_arg+1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s %s %s' expects <header_name> as argument.\n",
+ file, linenum, args[0], args[1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ istfree(&curproxy->orgto_hdr_name);
+ curproxy->orgto_hdr_name = istdup(ist(args[cur_arg+1]));
+ if (!isttest(curproxy->orgto_hdr_name))
+ goto alloc_error;
+ cur_arg += 2;
+ } else {
+ /* unknown suboption - catchall */
+ ha_alert("parsing [%s:%d] : '%s %s' only supports optional values: 'except' and 'header'.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ } /* end while loop */
+ }
+ else if (strcmp(args[1], "http-restrict-req-hdr-names") == 0) {
+ if (alertif_too_many_args(2, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[2]) == 0) {
+ ha_alert("parsing [%s:%d] : missing parameter. option '%s' expects 'preserve', 'reject' or 'delete' option.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curproxy->options2 &= ~PR_O2_RSTRICT_REQ_HDR_NAMES_MASK;
+ if (strcmp(args[2], "preserve") == 0)
+ curproxy->options2 |= PR_O2_RSTRICT_REQ_HDR_NAMES_NOOP;
+ else if (strcmp(args[2], "reject") == 0)
+ curproxy->options2 |= PR_O2_RSTRICT_REQ_HDR_NAMES_BLK;
+ else if (strcmp(args[2], "delete") == 0)
+ curproxy->options2 |= PR_O2_RSTRICT_REQ_HDR_NAMES_DEL;
+ else {
+ ha_alert("parsing [%s:%d] : invalid parameter '%s'. option '%s' expects 'preserve', 'reject' or 'delete' option.\n",
+ file, linenum, args[2], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else {
+ const char *best = proxy_find_best_option(args[1], common_options);
+
+ if (best)
+ ha_alert("parsing [%s:%d] : unknown option '%s'; did you mean '%s' maybe ?\n", file, linenum, args[1], best);
+ else
+ ha_alert("parsing [%s:%d] : unknown option '%s'.\n", file, linenum, args[1]);
+
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ goto out;
+ }
+ else if (strcmp(args[0], "default_backend") == 0) {
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a backend name.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->defbe.name);
+ curproxy->defbe.name = strdup(args[1]);
+ if (!curproxy->defbe.name)
+ goto alloc_error;
+
+ if (alertif_too_many_args_idx(1, 0, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[0], "redispatch") == 0 || strcmp(args[0], "redisp") == 0) {
+ ha_alert("parsing [%s:%d] : keyword '%s' directive is not supported anymore since HAProxy 2.1. Use 'option redispatch'.\n", file, linenum, args[0]);
+
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "http-reuse") == 0) {
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (strcmp(args[1], "never") == 0) {
+ /* enable a graceful server shutdown on an HTTP 404 response */
+ curproxy->options &= ~PR_O_REUSE_MASK;
+ curproxy->options |= PR_O_REUSE_NEVR;
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[1], "safe") == 0) {
+ /* enable a graceful server shutdown on an HTTP 404 response */
+ curproxy->options &= ~PR_O_REUSE_MASK;
+ curproxy->options |= PR_O_REUSE_SAFE;
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[1], "aggressive") == 0) {
+ curproxy->options &= ~PR_O_REUSE_MASK;
+ curproxy->options |= PR_O_REUSE_AGGR;
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[1], "always") == 0) {
+ /* enable a graceful server shutdown on an HTTP 404 response */
+ curproxy->options &= ~PR_O_REUSE_MASK;
+ curproxy->options |= PR_O_REUSE_ALWS;
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' only supports 'never', 'safe', 'aggressive', 'always'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "monitor") == 0) {
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (strcmp(args[1], "fail") == 0) {
+ /* add a condition to fail monitor requests */
+ if (strcmp(args[2], "if") != 0 && strcmp(args[2], "unless") != 0) {
+ ha_alert("parsing [%s:%d] : '%s %s' requires either 'if' or 'unless' followed by a condition.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err_code |= warnif_misplaced_monitor(curproxy, file, linenum, "monitor fail");
+ if ((cond = build_acl_cond(file, linenum, &curproxy->acl, curproxy, (const char **)args + 2, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing a '%s %s' condition : %s.\n",
+ file, linenum, args[0], args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ LIST_APPEND(&curproxy->mon_fail_cond, &cond->list);
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' only supports 'fail'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+#ifdef USE_TPROXY
+ else if (strcmp(args[0], "transparent") == 0) {
+ /* enable transparent proxy connections */
+ curproxy->options |= PR_O_TRANSP;
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ }
+#endif
+ else if (strcmp(args[0], "maxconn") == 0) { /* maxconn */
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], " Maybe you want 'fullconn' instead ?"))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->maxconn = atol(args[1]);
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[0], "backlog") == 0) { /* backlog */
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->backlog = atol(args[1]);
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[0], "fullconn") == 0) { /* fullconn */
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], " Maybe you want 'maxconn' instead ?"))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->fullconn = atol(args[1]);
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[0], "grace") == 0) { /* grace time (ms) */
+ ha_alert("parsing [%s:%d]: the '%s' keyword is not supported any more since HAProxy version 2.5.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "dispatch") == 0) { /* dispatch address */
+ struct sockaddr_storage *sk;
+ int port1, port2;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ sk = str2sa_range(args[1], NULL, &port1, &port2, NULL, NULL,
+ &errmsg, NULL, NULL,
+ PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_MAND | PA_O_STREAM | PA_O_XPRT | PA_O_CONNECT);
+ if (!sk) {
+ ha_alert("parsing [%s:%d] : '%s' : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ curproxy->dispatch_addr = *sk;
+ curproxy->options |= PR_O_DISPATCH;
+ }
+ else if (strcmp(args[0], "balance") == 0) { /* set balancing with optional algorithm */
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (backend_parse_balance((const char **)args + 1, &errmsg, curproxy) < 0) {
+ ha_alert("parsing [%s:%d] : %s %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "hash-type") == 0) { /* set hashing method */
+ /**
+ * The syntax for hash-type config element is
+ * hash-type {map-based|consistent} [[<algo>] avalanche]
+ *
+ * The default hash function is sdbm for map-based and sdbm+avalanche for consistent.
+ */
+ curproxy->lbprm.algo &= ~(BE_LB_HASH_TYPE | BE_LB_HASH_FUNC | BE_LB_HASH_MOD);
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (strcmp(args[1], "consistent") == 0) { /* use consistent hashing */
+ curproxy->lbprm.algo |= BE_LB_HASH_CONS;
+ }
+ else if (strcmp(args[1], "map-based") == 0) { /* use map-based hashing */
+ curproxy->lbprm.algo |= BE_LB_HASH_MAP;
+ }
+ else if (strcmp(args[1], "avalanche") == 0) {
+ ha_alert("parsing [%s:%d] : experimental feature '%s %s' is not supported anymore, please use '%s map-based sdbm avalanche' instead.\n", file, linenum, args[0], args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' only supports 'consistent' and 'map-based'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* set the hash function to use */
+ if (!*args[2]) {
+ /* the default algo is sdbm */
+ curproxy->lbprm.algo |= BE_LB_HFCN_SDBM;
+
+ /* if consistent with no argument, then avalanche modifier is also applied */
+ if ((curproxy->lbprm.algo & BE_LB_HASH_TYPE) == BE_LB_HASH_CONS)
+ curproxy->lbprm.algo |= BE_LB_HMOD_AVAL;
+ } else {
+ /* set the hash function */
+ if (strcmp(args[2], "sdbm") == 0) {
+ curproxy->lbprm.algo |= BE_LB_HFCN_SDBM;
+ }
+ else if (strcmp(args[2], "djb2") == 0) {
+ curproxy->lbprm.algo |= BE_LB_HFCN_DJB2;
+ }
+ else if (strcmp(args[2], "wt6") == 0) {
+ curproxy->lbprm.algo |= BE_LB_HFCN_WT6;
+ }
+ else if (strcmp(args[2], "crc32") == 0) {
+ curproxy->lbprm.algo |= BE_LB_HFCN_CRC32;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' only supports 'sdbm', 'djb2', 'crc32', or 'wt6' hash functions.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* set the hash modifier */
+ if (strcmp(args[3], "avalanche") == 0) {
+ curproxy->lbprm.algo |= BE_LB_HMOD_AVAL;
+ }
+ else if (*args[3]) {
+ ha_alert("parsing [%s:%d] : '%s' only supports 'avalanche' as a modifier for hash functions.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ }
+ else if (strcmp(args[0], "hash-balance-factor") == 0) {
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->lbprm.hash_balance_factor = atol(args[1]);
+ if (curproxy->lbprm.hash_balance_factor != 0 && curproxy->lbprm.hash_balance_factor <= 100) {
+ ha_alert("parsing [%s:%d] : '%s' must be 0 or greater than 100.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "unique-id-format") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : %s expects an argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (*(args[2])) {
+ ha_alert("parsing [%s:%d] : %s expects only one argument, don't forget to escape spaces!\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->conf.uniqueid_format_string);
+ curproxy->conf.uniqueid_format_string = strdup(args[1]);
+ if (!curproxy->conf.uniqueid_format_string)
+ goto alloc_error;
+
+ free(curproxy->conf.uif_file);
+ curproxy->conf.uif_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.uif_line = curproxy->conf.args.line;
+ }
+
+ else if (strcmp(args[0], "unique-id-header") == 0) {
+ char *copy;
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : %s expects an argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ copy = strdup(args[1]);
+ if (copy == NULL) {
+ ha_alert("parsing [%s:%d] : failed to allocate memory for unique-id-header\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ istfree(&curproxy->header_unique_id);
+ curproxy->header_unique_id = ist(copy);
+ }
+
+ else if (strcmp(args[0], "log-format") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : %s expects an argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (*(args[2])) {
+ ha_alert("parsing [%s:%d] : %s expects only one argument, don't forget to escape spaces!\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) {
+ char *oldlogformat = "log-format";
+
+ if (curproxy->conf.logformat_string == default_http_log_format)
+ oldlogformat = "option httplog";
+ else if (curproxy->conf.logformat_string == default_tcp_log_format)
+ oldlogformat = "option tcplog";
+ else if (curproxy->conf.logformat_string == clf_http_log_format)
+ oldlogformat = "option httplog clf";
+ else if (curproxy->conf.logformat_string == default_https_log_format)
+ oldlogformat = "option httpslog";
+ ha_warning("parsing [%s:%d]: 'log-format' overrides previous '%s' in 'defaults' section.\n",
+ file, linenum, oldlogformat);
+ }
+ if (curproxy->conf.logformat_string != default_http_log_format &&
+ curproxy->conf.logformat_string != default_tcp_log_format &&
+ curproxy->conf.logformat_string != clf_http_log_format &&
+ curproxy->conf.logformat_string != default_https_log_format)
+ free(curproxy->conf.logformat_string);
+ curproxy->conf.logformat_string = strdup(args[1]);
+ if (!curproxy->conf.logformat_string)
+ goto alloc_error;
+
+ free(curproxy->conf.lfs_file);
+ curproxy->conf.lfs_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.lfs_line = curproxy->conf.args.line;
+
+ /* get a chance to improve log-format error reporting by
+ * reporting the correct line-number when possible.
+ */
+ if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) {
+ ha_warning("parsing [%s:%d] : backend '%s' : 'log-format' directive is ignored in backends.\n",
+ file, linenum, curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ else if (strcmp(args[0], "log-format-sd") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : %s expects an argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (*(args[2])) {
+ ha_alert("parsing [%s:%d] : %s expects only one argument, don't forget to escape spaces!\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (curproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format)
+ free(curproxy->conf.logformat_sd_string);
+ curproxy->conf.logformat_sd_string = strdup(args[1]);
+ if (!curproxy->conf.logformat_sd_string)
+ goto alloc_error;
+
+ free(curproxy->conf.lfsd_file);
+ curproxy->conf.lfsd_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.lfsd_line = curproxy->conf.args.line;
+
+ /* get a chance to improve log-format-sd error reporting by
+ * reporting the correct line-number when possible.
+ */
+ if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) {
+ ha_warning("parsing [%s:%d] : backend '%s' : 'log-format-sd' directive is ignored in backends.\n",
+ file, linenum, curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ else if (strcmp(args[0], "error-log-format") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : %s expects an argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (*(args[2])) {
+ ha_alert("parsing [%s:%d] : %s expects only one argument, don't forget to escape spaces!\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (curproxy->conf.error_logformat_string && curproxy->cap & PR_CAP_DEF) {
+ ha_warning("parsing [%s:%d]: 'error-log-format' overrides previous 'error-log-format' in 'defaults' section.\n",
+ file, linenum);
+ }
+ free(curproxy->conf.error_logformat_string);
+ curproxy->conf.error_logformat_string = strdup(args[1]);
+ if (!curproxy->conf.error_logformat_string)
+ goto alloc_error;
+
+ free(curproxy->conf.elfs_file);
+ curproxy->conf.elfs_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.elfs_line = curproxy->conf.args.line;
+
+ /* get a chance to improve log-format error reporting by
+ * reporting the correct line-number when possible.
+ */
+ if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) {
+ ha_warning("parsing [%s:%d] : backend '%s' : 'error-log-format' directive is ignored in backends.\n",
+ file, linenum, curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ else if (strcmp(args[0], "log-tag") == 0) { /* tag to report to syslog */
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a tag for use in syslog.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ chunk_destroy(&curproxy->log_tag);
+ chunk_initlen(&curproxy->log_tag, strdup(args[1]), strlen(args[1]), strlen(args[1]));
+ if (b_orig(&curproxy->log_tag) == NULL) {
+ chunk_destroy(&curproxy->log_tag);
+ ha_alert("parsing [%s:%d]: cannot allocate memory for '%s'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "log") == 0) { /* "no log" or "log ..." */
+ if (!parse_logsrv(args, &curproxy->logsrvs, (kwm == KWM_NO), file, linenum, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "source") == 0) { /* address to which we bind when connecting */
+ int cur_arg;
+ int port1, port2;
+ struct sockaddr_storage *sk;
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects <addr>[:<port>], and optionally '%s' <addr>, and '%s' <name>.\n",
+ file, linenum, "source", "usesrc", "interface");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* we must first clear any optional default setting */
+ curproxy->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
+ ha_free(&curproxy->conn_src.iface_name);
+ curproxy->conn_src.iface_len = 0;
+
+ sk = str2sa_range(args[1], NULL, &port1, &port2, NULL, NULL,
+ &errmsg, NULL, NULL, PA_O_RESOLVE | PA_O_PORT_OK | PA_O_STREAM | PA_O_CONNECT);
+ if (!sk) {
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n",
+ file, linenum, args[0], args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curproxy->conn_src.source_addr = *sk;
+ curproxy->conn_src.opts |= CO_SRC_BIND;
+
+ cur_arg = 2;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "usesrc") == 0) { /* address to use outside */
+#if defined(CONFIG_HAP_TRANSPARENT)
+ if (!*args[cur_arg + 1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects <addr>[:<port>], 'client', or 'clientip' as argument.\n",
+ file, linenum, "usesrc");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[cur_arg + 1], "client") == 0) {
+ curproxy->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
+ curproxy->conn_src.opts |= CO_SRC_TPROXY_CLI;
+ } else if (strcmp(args[cur_arg + 1], "clientip") == 0) {
+ curproxy->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
+ curproxy->conn_src.opts |= CO_SRC_TPROXY_CIP;
+ } else if (!strncmp(args[cur_arg + 1], "hdr_ip(", 7)) {
+ char *name, *end;
+
+ name = args[cur_arg+1] + 7;
+ while (isspace((unsigned char)*name))
+ name++;
+
+ end = name;
+ while (*end && !isspace((unsigned char)*end) && *end != ',' && *end != ')')
+ end++;
+
+ curproxy->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
+ curproxy->conn_src.opts |= CO_SRC_TPROXY_DYN;
+ free(curproxy->conn_src.bind_hdr_name);
+ curproxy->conn_src.bind_hdr_name = calloc(1, end - name + 1);
+ if (!curproxy->conn_src.bind_hdr_name)
+ goto alloc_error;
+ curproxy->conn_src.bind_hdr_len = end - name;
+ memcpy(curproxy->conn_src.bind_hdr_name, name, end - name);
+ curproxy->conn_src.bind_hdr_name[end-name] = '\0';
+ curproxy->conn_src.bind_hdr_occ = -1;
+
+ /* now look for an occurrence number */
+ while (isspace((unsigned char)*end))
+ end++;
+ if (*end == ',') {
+ end++;
+ name = end;
+ if (*end == '-')
+ end++;
+ while (isdigit((unsigned char)*end))
+ end++;
+ curproxy->conn_src.bind_hdr_occ = strl2ic(name, end-name);
+ }
+
+ if (curproxy->conn_src.bind_hdr_occ < -MAX_HDR_HISTORY) {
+ ha_alert("parsing [%s:%d] : usesrc hdr_ip(name,num) does not support negative"
+ " occurrences values smaller than %d.\n",
+ file, linenum, MAX_HDR_HISTORY);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ } else {
+ struct sockaddr_storage *sk;
+
+ sk = str2sa_range(args[cur_arg + 1], NULL, &port1, &port2, NULL, NULL,
+ &errmsg, NULL, NULL, PA_O_RESOLVE | PA_O_PORT_OK | PA_O_STREAM | PA_O_CONNECT);
+ if (!sk) {
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n",
+ file, linenum, args[cur_arg], args[cur_arg+1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curproxy->conn_src.tproxy_addr = *sk;
+ curproxy->conn_src.opts |= CO_SRC_TPROXY_ADDR;
+ }
+ global.last_checks |= LSTCHK_NETADM;
+#else /* no TPROXY support */
+ ha_alert("parsing [%s:%d] : '%s' not allowed here because support for TPROXY was not compiled in.\n",
+ file, linenum, "usesrc");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+#endif
+ cur_arg += 2;
+ continue;
+ }
+
+ if (strcmp(args[cur_arg], "interface") == 0) { /* specifically bind to this interface */
+#ifdef SO_BINDTODEVICE
+ if (!*args[cur_arg + 1]) {
+ ha_alert("parsing [%s:%d] : '%s' : missing interface name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->conn_src.iface_name);
+ curproxy->conn_src.iface_name = strdup(args[cur_arg + 1]);
+ if (!curproxy->conn_src.iface_name)
+ goto alloc_error;
+ curproxy->conn_src.iface_len = strlen(curproxy->conn_src.iface_name);
+ global.last_checks |= LSTCHK_NETADM;
+#else
+ ha_alert("parsing [%s:%d] : '%s' : '%s' option not implemented.\n",
+ file, linenum, args[0], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+#endif
+ cur_arg += 2;
+ continue;
+ }
+ ha_alert("parsing [%s:%d] : '%s' only supports optional keywords '%s' and '%s'.\n",
+ file, linenum, args[0], "interface", "usesrc");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "usesrc") == 0) { /* address to use outside: needs "source" first */
+ ha_alert("parsing [%s:%d] : '%s' only allowed after a '%s' statement.\n",
+ file, linenum, "usesrc", "source");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "cliexp") == 0 || strcmp(args[0], "reqrep") == 0) { /* replace request header from a regex */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request replace-path', 'http-request replace-uri' or 'http-request replace-header' instead.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqdel") == 0) { /* delete request header from a regex */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request del-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqdeny") == 0) { /* deny a request if a header matches this regex */
+ ha_alert("parsing [%s:%d] : The '%s' not supported anymore since HAProxy 2.1. "
+ "Use 'http-request deny' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqpass") == 0) { /* pass this header without allowing or denying the request */
+ ha_alert("parsing [%s:%d] : The '%s' not supported anymore since HAProxy 2.1.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqallow") == 0) { /* allow a request if a header matches this regex */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request allow' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqtarpit") == 0) { /* tarpit a request if a header matches this regex */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request tarpit' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqirep") == 0) { /* replace request header from a regex, ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request replace-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqidel") == 0) { /* delete request header from a regex ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request del-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqideny") == 0) { /* deny a request if a header matches this regex ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request deny' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqipass") == 0) { /* pass this header without allowing or denying the request */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqiallow") == 0) { /* allow a request if a header matches this regex ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request allow' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqitarpit") == 0) { /* tarpit a request if a header matches this regex ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request tarpit' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqadd") == 0) { /* add request header */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request add-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "srvexp") == 0 || strcmp(args[0], "rsprep") == 0) { /* replace response header from a regex */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-response replace-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "rspdel") == 0) { /* delete response header from a regex */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-response del-header' .\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "rspdeny") == 0) { /* block response header from a regex */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-response deny' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "rspirep") == 0) { /* replace response header from a regex ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-response replace-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "rspidel") == 0) { /* delete response header from a regex ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-response del-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "rspideny") == 0) { /* block response header from a regex ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-response deny' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "rspadd") == 0) { /* add response header */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-response add-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else {
+ struct cfg_kw_list *kwl;
+ const char *best;
+ int index;
+
+ list_for_each_entry(kwl, &cfg_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (kwl->kw[index].section != CFG_LISTEN)
+ continue;
+ if (strcmp(kwl->kw[index].kw, args[0]) == 0) {
+ if (check_kw_experimental(&kwl->kw[index], file, linenum, &errmsg)) {
+ ha_alert("%s\n", errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* prepare error message just in case */
+ rc = kwl->kw[index].parse(args, CFG_LISTEN, curproxy, curr_defproxy, file, linenum, &errmsg);
+ if (rc < 0) {
+ ha_alert("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (rc > 0) {
+ ha_warning("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ err_code |= ERR_WARN;
+ goto out;
+ }
+ goto out;
+ }
+ }
+ }
+
+ best = cfg_find_best_match(args[0], &cfg_keywords.list, CFG_LISTEN, common_kw_list);
+ if (best)
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section; did you mean '%s' maybe ?\n", file, linenum, args[0], cursection, best);
+ else
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], cursection);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ out:
+ free(errmsg);
+ return err_code;
+
+ alloc_error:
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+}
diff --git a/src/cfgparse-quic.c b/src/cfgparse-quic.c
new file mode 100644
index 0000000..6df2a35
--- /dev/null
+++ b/src/cfgparse-quic.c
@@ -0,0 +1,143 @@
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/listener.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/quic_cc-t.h>
+#include <haproxy/tools.h>
+
+static int bind_parse_quic_force_retry(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->options |= BC_O_QUIC_FORCE_RETRY;
+ return 0;
+}
+
+/* parse "quic-cc-algo" bind keyword */
+static int bind_parse_quic_cc_algo(char **args, int cur_arg, struct proxy *px,
+ struct bind_conf *conf, char **err)
+{
+ struct quic_cc_algo *cc_algo;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing control congestion algorith", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (!strcmp(args[cur_arg + 1], "newreno"))
+ cc_algo = &quic_cc_algo_nr;
+ else if (!strcmp(args[cur_arg + 1], "cubic"))
+ cc_algo = &quic_cc_algo_cubic;
+ else {
+ memprintf(err, "'%s' : unknown control congestion algorithm", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->quic_cc_algo = cc_algo;
+ return 0;
+}
+
+static struct bind_kw_list bind_kws = { "QUIC", { }, {
+ { "quic-force-retry", bind_parse_quic_force_retry, 0 },
+ { "quic-cc-algo", bind_parse_quic_cc_algo, 1 },
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws);
+
+/* Must be used to parse tune.quic.* setting which requires a time
+ * as value.
+ * Return -1 on alert, or 0 if succeeded.
+ */
+static int cfg_parse_quic_time(char **args, int section_type,
+ struct proxy *curpx,
+ const struct proxy *defpx,
+ const char *file, int line, char **err)
+{
+ unsigned int time;
+ const char *res, *name, *value;
+ int prefix_len = strlen("tune.quic.");
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ name = args[0];
+ value = args[1];
+ res = parse_time_err(value, &time, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' "
+ "(maximum value is 2147483647 ms or ~24.8 days)", value, name);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' "
+ "(minimum non-null value is 1 ms)", value, name);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in '%s'", *res, name);
+ return -1;
+ }
+
+ if (strcmp(name + prefix_len, "frontend.max-idle-timeout") == 0)
+ global.tune.quic_frontend_max_idle_timeout = time;
+ else if (strcmp(name + prefix_len, "backend.max-idle-timeout") == 0)
+ global.tune.quic_backend_max_idle_timeout = time;
+ else {
+ memprintf(err, "'%s' keyword not unhandled (please report this bug).", args[0]);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Parse any tune.quic.* setting with strictly positive integer values.
+ * Return -1 on alert, or 0 if succeeded.
+ */
+static int cfg_parse_quic_tune_setting(char **args, int section_type,
+ struct proxy *curpx,
+ const struct proxy *defpx,
+ const char *file, int line, char **err)
+{
+ unsigned int arg = 0;
+ int prefix_len = strlen("tune.quic.");
+ const char *suffix;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) != 0)
+ arg = atoi(args[1]);
+
+ if (arg < 1) {
+ memprintf(err, "'%s' expects a positive integer.", args[0]);
+ return -1;
+ }
+
+ suffix = args[0] + prefix_len;
+ if (strcmp(suffix, "frontend.conn-tx-buffers.limit") == 0)
+ global.tune.quic_streams_buf = arg;
+ else if (strcmp(suffix, "frontend.max-streams-bidi") == 0)
+ global.tune.quic_frontend_max_streams_bidi = arg;
+ else if (strcmp(suffix, "retry-threshold") == 0)
+ global.tune.quic_retry_threshold = arg;
+ else {
+ memprintf(err, "'%s' keyword not unhandled (please report this bug).", args[0]);
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.quic.backend.max-idle-timeou", cfg_parse_quic_time },
+ { CFG_GLOBAL, "tune.quic.frontend.conn-tx-buffers.limit", cfg_parse_quic_tune_setting },
+ { CFG_GLOBAL, "tune.quic.frontend.max-streams-bidi", cfg_parse_quic_tune_setting },
+ { CFG_GLOBAL, "tune.quic.frontend.max-idle-timeout", cfg_parse_quic_time },
+ { CFG_GLOBAL, "tune.quic.retry-threshold", cfg_parse_quic_tune_setting },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
diff --git a/src/cfgparse-ssl.c b/src/cfgparse-ssl.c
new file mode 100644
index 0000000..63b41bf
--- /dev/null
+++ b/src/cfgparse-ssl.c
@@ -0,0 +1,2027 @@
+/*
+ *
+ * Copyright (C) 2012 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ * Copyright (C) 2020 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Configuration parsing for SSL.
+ * This file is split in 3 parts:
+ * - global section parsing
+ * - bind keyword parsing
+ * - server keyword parsing
+ *
+ * Please insert the new keywords at the right place
+ */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <haproxy/api.h>
+#include <haproxy/base64.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/errors.h>
+#include <haproxy/listener.h>
+#include <haproxy/openssl-compat.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/tools.h>
+#include <haproxy/ssl_ckch.h>
+
+
+/****************** Global Section Parsing ********************************************/
+
+static int ssl_load_global_issuers_from_path(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char *path;
+ struct dirent **de_list;
+ int i, n;
+ struct stat buf;
+ char *end;
+ char fp[MAXPATHLEN+1];
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ path = args[1];
+ if (*path == 0 || stat(path, &buf)) {
+ memprintf(err, "%sglobal statement '%s' expects a directory path as an argument.\n",
+ err && *err ? *err : "", args[0]);
+ return -1;
+ }
+ if (S_ISDIR(buf.st_mode) == 0) {
+ memprintf(err, "%sglobal statement '%s': %s is not a directory.\n",
+ err && *err ? *err : "", args[0], path);
+ return -1;
+ }
+
+ /* strip trailing slashes, including first one */
+ for (end = path + strlen(path) - 1; end >= path && *end == '/'; end--)
+ *end = 0;
+ /* path already parsed? */
+ if (global_ssl.issuers_chain_path && strcmp(global_ssl.issuers_chain_path, path) == 0)
+ return 0;
+ /* overwrite old issuers_chain_path */
+ free(global_ssl.issuers_chain_path);
+ global_ssl.issuers_chain_path = strdup(path);
+ ssl_free_global_issuers();
+
+ n = scandir(path, &de_list, 0, alphasort);
+ if (n < 0) {
+ memprintf(err, "%sglobal statement '%s': unable to scan directory '%s' : %s.\n",
+ err && *err ? *err : "", args[0], path, strerror(errno));
+ return -1;
+ }
+ for (i = 0; i < n; i++) {
+ struct dirent *de = de_list[i];
+ BIO *in = NULL;
+ char *warn = NULL;
+
+ snprintf(fp, sizeof(fp), "%s/%s", path, de->d_name);
+ free(de);
+ if (stat(fp, &buf) != 0) {
+ ha_warning("unable to stat certificate from file '%s' : %s.\n", fp, strerror(errno));
+ goto next;
+ }
+ if (!S_ISREG(buf.st_mode))
+ goto next;
+
+ in = BIO_new(BIO_s_file());
+ if (in == NULL)
+ goto next;
+ if (BIO_read_filename(in, fp) <= 0)
+ goto next;
+ ssl_load_global_issuer_from_BIO(in, fp, &warn);
+ if (warn) {
+ ha_warning("%s", warn);
+ ha_free(&warn);
+ }
+ next:
+ if (in)
+ BIO_free(in);
+ }
+ free(de_list);
+
+ return 0;
+}
+
+/* parse the "ssl-mode-async" keyword in global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ssl_async(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+#ifdef SSL_MODE_ASYNC
+ global_ssl.async = 1;
+ global.ssl_used_async_engines = nb_engines;
+ return 0;
+#else
+ memprintf(err, "'%s': openssl library does not support async mode", args[0]);
+ return -1;
+#endif
+}
+
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+/* parse the "ssl-engine" keyword in global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ssl_engine(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char *algo;
+ int ret = -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a valid engine name as an argument.", args[0]);
+ return ret;
+ }
+
+ if (*(args[2]) == 0) {
+ /* if no list of algorithms is given, it defaults to ALL */
+ algo = strdup("ALL");
+ goto add_engine;
+ }
+
+ /* otherwise the expected format is ssl-engine <engine_name> algo <list of algo> */
+ if (strcmp(args[2], "algo") != 0) {
+ memprintf(err, "global statement '%s' expects to have algo keyword.", args[0]);
+ return ret;
+ }
+
+ if (*(args[3]) == 0) {
+ memprintf(err, "global statement '%s' expects algorithm names as an argument.", args[0]);
+ return ret;
+ }
+ algo = strdup(args[3]);
+
+add_engine:
+ if (ssl_init_single_engine(args[1], algo)==0) {
+ openssl_engines_initialized++;
+ ret = 0;
+ }
+ free(algo);
+ return ret;
+}
+#endif
+
+#ifdef HAVE_SSL_PROVIDERS
+/* parse the "ssl-propquery" keyword in global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ssl_propquery(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int ret = -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a property string as an argument.", args[0]);
+ return ret;
+ }
+
+ if (EVP_set_default_properties(NULL, args[1]))
+ ret = 0;
+
+ return ret;
+}
+
+/* parse the "ssl-provider" keyword in global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ssl_provider(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int ret = -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a valid engine provider name as an argument.", args[0]);
+ return ret;
+ }
+
+ if (ssl_init_provider(args[1]) == 0)
+ ret = 0;
+
+ return ret;
+}
+
+/* parse the "ssl-provider-path" keyword in global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ssl_provider_path(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a directory path as an argument.", args[0]);
+ return -1;
+ }
+
+ OSSL_PROVIDER_set_default_search_path(NULL, args[1]);
+
+ return 0;
+}
+#endif
+
+/* parse the "ssl-default-bind-ciphers" / "ssl-default-server-ciphers" keywords
+ * in global section. Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ciphers(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char **target;
+
+ target = (args[0][12] == 'b') ? &global_ssl.listen_default_ciphers : &global_ssl.connect_default_ciphers;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a cipher suite as an argument.", args[0]);
+ return -1;
+ }
+
+ free(*target);
+ *target = strdup(args[1]);
+ return 0;
+}
+
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+/* parse the "ssl-default-bind-ciphersuites" / "ssl-default-server-ciphersuites" keywords
+ * in global section. Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ciphersuites(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char **target;
+
+ target = (args[0][12] == 'b') ? &global_ssl.listen_default_ciphersuites : &global_ssl.connect_default_ciphersuites;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a cipher suite as an argument.", args[0]);
+ return -1;
+ }
+
+ free(*target);
+ *target = strdup(args[1]);
+ return 0;
+}
+#endif
+
+#if defined(SSL_CTX_set1_curves_list)
+/*
+ * parse the "ssl-default-bind-curves" keyword in a global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_curves(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char **target;
+ target = &global_ssl.listen_default_curves;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a curves suite as an arguments.", args[0]);
+ return -1;
+ }
+
+ free(*target);
+ *target = strdup(args[1]);
+ return 0;
+}
+#endif
+/* parse various global tune.ssl settings consisting in positive integers.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_int(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int *target;
+
+ if (strcmp(args[0], "tune.ssl.cachesize") == 0)
+ target = &global.tune.sslcachesize;
+ else if (strcmp(args[0], "tune.ssl.maxrecord") == 0)
+ target = (int *)&global_ssl.max_record;
+ else if (strcmp(args[0], "tune.ssl.hard-maxrecord") == 0)
+ target = (int *)&global_ssl.hard_max_record;
+ else if (strcmp(args[0], "tune.ssl.ssl-ctx-cache-size") == 0)
+ target = &global_ssl.ctx_cache;
+ else if (strcmp(args[0], "maxsslconn") == 0)
+ target = &global.maxsslconn;
+ else if (strcmp(args[0], "tune.ssl.capture-buffer-size") == 0)
+ target = &global_ssl.capture_buffer_size;
+ else if (strcmp(args[0], "tune.ssl.capture-cipherlist-size") == 0) {
+ target = &global_ssl.capture_buffer_size;
+ ha_warning("parsing [%s:%d]: '%s' is deprecated and will be removed in version 2.7. Please use 'tune.ssl.capture-buffer-size' instead.\n",
+ file, line, args[0]);
+ }
+ else {
+ memprintf(err, "'%s' keyword not unhandled (please report this bug).", args[0]);
+ return -1;
+ }
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects an integer argument.", args[0]);
+ return -1;
+ }
+
+ *target = atoi(args[1]);
+ if (*target < 0) {
+ memprintf(err, "'%s' expects a positive numeric value.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+static int ssl_parse_global_capture_buffer(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int ret;
+
+ ret = ssl_parse_global_int(args, section_type, curpx, defpx, file, line, err);
+ if (ret != 0)
+ return ret;
+
+ if (pool_head_ssl_capture) {
+ memprintf(err, "'%s' is already configured.", args[0]);
+ return -1;
+ }
+
+ pool_head_ssl_capture = create_pool("ssl-capture", sizeof(struct ssl_capture) + global_ssl.capture_buffer_size, MEM_F_SHARED);
+ if (!pool_head_ssl_capture) {
+ memprintf(err, "Out of memory error.");
+ return -1;
+ }
+ return 0;
+}
+
+/* init the SSLKEYLOGFILE pool */
+#ifdef HAVE_SSL_KEYLOG
+static int ssl_parse_global_keylog(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ global_ssl.keylog = 1;
+ else if (strcmp(args[1], "off") == 0)
+ global_ssl.keylog = 0;
+ else {
+ memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+
+ if (pool_head_ssl_keylog) /* already configured */
+ return 0;
+
+ pool_head_ssl_keylog = create_pool("ssl-keylogfile", sizeof(struct ssl_keylog), MEM_F_SHARED);
+ if (!pool_head_ssl_keylog) {
+ memprintf(err, "Out of memory error.");
+ return -1;
+ }
+
+ pool_head_ssl_keylog_str = create_pool("ssl-keylogfile-str", sizeof(char) * SSL_KEYLOG_MAX_SECRET_SIZE, MEM_F_SHARED);
+ if (!pool_head_ssl_keylog_str) {
+ memprintf(err, "Out of memory error.");
+ return -1;
+ }
+
+ return 0;
+}
+#else
+static int ssl_parse_global_keylog(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ memprintf(err, "'%s' requires at least OpenSSL 1.1.1.", args[0]);
+ return -1;
+}
+#endif
+
+/* parse "ssl.force-private-cache".
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_private_cache(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(0, args, err, NULL))
+ return -1;
+
+ global_ssl.private_cache = 1;
+ return 0;
+}
+
+/* parse "ssl.lifetime".
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_lifetime(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ const char *res;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects ssl sessions <lifetime> in seconds as argument.", args[0]);
+ return -1;
+ }
+
+ res = parse_time_err(args[1], &global_ssl.life_time, TIME_UNIT_S);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to <%s> (maximum value is 2147483647 s or ~68 years).",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to <%s> (minimum non-null value is 1 s).",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in argument to <%s>.", *res, args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+#ifndef OPENSSL_NO_DH
+/* parse "ssl-dh-param-file".
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_dh_param_file(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects a file path as an argument.", args[0]);
+ return -1;
+ }
+
+ if (ssl_sock_load_global_dh_param_from_file(args[1])) {
+ memprintf(err, "'%s': unable to load DH parameters from file <%s>.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/* parse "ssl.default-dh-param".
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_default_dh(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects an integer argument.", args[0]);
+ return -1;
+ }
+
+ global_ssl.default_dh_param = atoi(args[1]);
+ if (global_ssl.default_dh_param < 1024) {
+ memprintf(err, "'%s' expects a value >= 1024.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+#endif
+
+
+/*
+ * parse "ssl-load-extra-files".
+ * multiple arguments are allowed: "bundle", "sctl", "ocsp", "issuer", "all", "none"
+ */
+static int ssl_parse_global_extra_files(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int i;
+ int gf = SSL_GF_NONE;
+
+ if (*(args[1]) == 0)
+ goto err_arg;
+
+ for (i = 1; *args[i]; i++) {
+
+ if (strcmp("bundle", args[i]) == 0) {
+ gf |= SSL_GF_BUNDLE;
+
+ } else if (strcmp("sctl", args[i]) == 0) {
+ gf |= SSL_GF_SCTL;
+
+ } else if (strcmp("ocsp", args[i]) == 0){
+ gf |= SSL_GF_OCSP;
+
+ } else if (strcmp("issuer", args[i]) == 0){
+ gf |= SSL_GF_OCSP_ISSUER;
+
+ } else if (strcmp("key", args[i]) == 0) {
+ gf |= SSL_GF_KEY;
+
+ } else if (strcmp("none", args[i]) == 0) {
+ if (gf != SSL_GF_NONE)
+ goto err_alone;
+ gf = SSL_GF_NONE;
+ i++;
+ break;
+
+ } else if (strcmp("all", args[i]) == 0) {
+ if (gf != SSL_GF_NONE)
+ goto err_alone;
+ gf = SSL_GF_ALL;
+ i++;
+ break;
+ } else {
+ goto err_arg;
+ }
+ }
+ /* break from loop but there are still arguments */
+ if (*args[i])
+ goto err_alone;
+
+ global_ssl.extra_files = gf;
+
+ return 0;
+
+err_alone:
+ memprintf(err, "'%s' 'none' and 'all' can be only used alone", args[0]);
+ return -1;
+
+err_arg:
+ memprintf(err, "'%s' expects one or multiple arguments (none, all, bundle, sctl, ocsp, issuer).", args[0]);
+ return -1;
+}
+
+
+/* parse 'ssl-load-extra-del-ext */
+static int ssl_parse_global_extra_noext(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ global_ssl.extra_files_noext = 1;
+ return 0;
+}
+
+/***************************** Bind keyword Parsing ********************************************/
+
+/* for ca-file and ca-verify-file */
+static int ssl_bind_parse_ca_file_common(char **args, int cur_arg, char **ca_file_p, int from_cli, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing CAfile path", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[cur_arg + 1] != '/') && global_ssl.ca_base)
+ memprintf(ca_file_p, "%s/%s", global_ssl.ca_base, args[cur_arg + 1]);
+ else
+ memprintf(ca_file_p, "%s", args[cur_arg + 1]);
+
+ if (!ssl_store_load_locations_file(*ca_file_p, !from_cli, CAFILE_CERT)) {
+ memprintf(err, "'%s' : unable to load %s", args[cur_arg], *ca_file_p);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+}
+
+/* parse the "ca-file" bind keyword */
+static int ssl_bind_parse_ca_file(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ return ssl_bind_parse_ca_file_common(args, cur_arg, &conf->ca_file, from_cli, err);
+}
+static int bind_parse_ca_file(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_ca_file(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "ca-verify-file" bind keyword */
+static int ssl_bind_parse_ca_verify_file(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ return ssl_bind_parse_ca_file_common(args, cur_arg, &conf->ca_verify_file, from_cli, err);
+}
+static int bind_parse_ca_verify_file(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_ca_verify_file(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "ca-sign-file" bind keyword */
+static int bind_parse_ca_sign_file(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing CAfile path", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[cur_arg + 1] != '/') && global_ssl.ca_base)
+ memprintf(&conf->ca_sign_file, "%s/%s", global_ssl.ca_base, args[cur_arg + 1]);
+ else
+ memprintf(&conf->ca_sign_file, "%s", args[cur_arg + 1]);
+
+ return 0;
+}
+
+/* parse the "ca-sign-pass" bind keyword */
+static int bind_parse_ca_sign_pass(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing CAkey password", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ memprintf(&conf->ca_sign_pass, "%s", args[cur_arg + 1]);
+ return 0;
+}
+
+/* parse the "ciphers" bind keyword */
+static int ssl_bind_parse_ciphers(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing cipher suite", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(conf->ciphers);
+ conf->ciphers = strdup(args[cur_arg + 1]);
+ return 0;
+}
+static int bind_parse_ciphers(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_ciphers(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+/* parse the "ciphersuites" bind keyword */
+static int ssl_bind_parse_ciphersuites(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing cipher suite", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(conf->ciphersuites);
+ conf->ciphersuites = strdup(args[cur_arg + 1]);
+ return 0;
+}
+static int bind_parse_ciphersuites(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_ciphersuites(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+#endif
+
+/* parse the "crt" bind keyword. Returns a set of ERR_* flags possibly with an error in <err>. */
+static int bind_parse_crt(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ char path[MAXPATHLEN];
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing certificate location", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[cur_arg + 1] != '/' ) && global_ssl.crt_base) {
+ if ((strlen(global_ssl.crt_base) + 1 + strlen(args[cur_arg + 1]) + 1) > sizeof(path) ||
+ snprintf(path, sizeof(path), "%s/%s", global_ssl.crt_base, args[cur_arg + 1]) > sizeof(path)) {
+ memprintf(err, "'%s' : path too long", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return ssl_sock_load_cert(path, conf, err);
+ }
+
+ return ssl_sock_load_cert(args[cur_arg + 1], conf, err);
+}
+
+/* parse the "crt-list" bind keyword. Returns a set of ERR_* flags possibly with an error in <err>. */
+static int bind_parse_crt_list(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ int err_code;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing certificate location", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ err_code = ssl_sock_load_cert_list_file(args[cur_arg + 1], 0, conf, px, err);
+ if (err_code)
+ memprintf(err, "'%s' : %s", args[cur_arg], *err);
+
+ return err_code;
+}
+
+/* parse the "crl-file" bind keyword */
+static int ssl_bind_parse_crl_file(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+#ifndef X509_V_FLAG_CRL_CHECK
+ memprintf(err, "'%s' : library does not support CRL verify", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#else
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing CRLfile path", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[cur_arg + 1] != '/') && global_ssl.ca_base)
+ memprintf(&conf->crl_file, "%s/%s", global_ssl.ca_base, args[cur_arg + 1]);
+ else
+ memprintf(&conf->crl_file, "%s", args[cur_arg + 1]);
+
+ if (!ssl_store_load_locations_file(conf->crl_file, !from_cli, CAFILE_CRL)) {
+ memprintf(err, "'%s' : unable to load %s", args[cur_arg], conf->crl_file);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+#endif
+}
+static int bind_parse_crl_file(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_crl_file(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "curves" bind keyword keyword */
+static int ssl_bind_parse_curves(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+#if defined(SSL_CTX_set1_curves_list)
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing curve suite", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ conf->curves = strdup(args[cur_arg + 1]);
+ return 0;
+#else
+ memprintf(err, "'%s' : library does not support curve suite", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+static int bind_parse_curves(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_curves(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "ecdhe" bind keyword keyword */
+static int ssl_bind_parse_ecdhe(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+#if !defined(SSL_CTX_set_tmp_ecdh)
+ memprintf(err, "'%s' : library does not support elliptic curve Diffie-Hellman (too old)", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#elif defined(OPENSSL_NO_ECDH)
+ memprintf(err, "'%s' : library does not support elliptic curve Diffie-Hellman (disabled via OPENSSL_NO_ECDH)", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#else
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing named curve", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->ecdhe = strdup(args[cur_arg + 1]);
+
+ return 0;
+#endif
+}
+static int bind_parse_ecdhe(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_ecdhe(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "crt-ignore-err" and "ca-ignore-err" bind keywords */
+static int bind_parse_ignore_err(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ int code;
+ char *p = args[cur_arg + 1];
+ unsigned long long *ignerr = conf->crt_ignerr_bitfield;
+
+ if (!*p) {
+ memprintf(err, "'%s' : missing error IDs list", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(args[cur_arg], "ca-ignore-err") == 0)
+ ignerr = conf->ca_ignerr_bitfield;
+
+ if (strcmp(p, "all") == 0) {
+ cert_ignerr_bitfield_set_all(ignerr);
+ return 0;
+ }
+
+ while (p) {
+ code = atoi(p);
+ if ((code <= 0) || (code > SSL_MAX_VFY_ERROR_CODE)) {
+ memprintf(err, "'%s' : ID '%d' out of range (1..%d) in error IDs list '%s'",
+ args[cur_arg], code, SSL_MAX_VFY_ERROR_CODE, args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ cert_ignerr_bitfield_set(ignerr, code);
+ p = strchr(p, ',');
+ if (p)
+ p++;
+ }
+
+ return 0;
+}
+
+/* parse tls_method_options "no-xxx" and "force-xxx" */
+static int parse_tls_method_options(char *arg, struct tls_version_filter *methods, char **err)
+{
+ uint16_t v;
+ char *p;
+ p = strchr(arg, '-');
+ if (!p)
+ goto fail;
+ p++;
+ if (strcmp(p, "sslv3") == 0)
+ v = CONF_SSLV3;
+ else if (strcmp(p, "tlsv10") == 0)
+ v = CONF_TLSV10;
+ else if (strcmp(p, "tlsv11") == 0)
+ v = CONF_TLSV11;
+ else if (strcmp(p, "tlsv12") == 0)
+ v = CONF_TLSV12;
+ else if (strcmp(p, "tlsv13") == 0)
+ v = CONF_TLSV13;
+ else
+ goto fail;
+ if (!strncmp(arg, "no-", 3))
+ methods->flags |= methodVersions[v].flag;
+ else if (!strncmp(arg, "force-", 6))
+ methods->min = methods->max = v;
+ else
+ goto fail;
+ return 0;
+ fail:
+ memprintf(err, "'%s' : option not implemented", arg);
+ return ERR_ALERT | ERR_FATAL;
+}
+
+static int bind_parse_tls_method_options(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return parse_tls_method_options(args[cur_arg], &conf->ssl_conf.ssl_methods, err);
+}
+
+static int srv_parse_tls_method_options(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ return parse_tls_method_options(args[*cur_arg], &newsrv->ssl_ctx.methods, err);
+}
+
+/* parse tls_method min/max: "ssl-min-ver" and "ssl-max-ver" */
+static int parse_tls_method_minmax(char **args, int cur_arg, struct tls_version_filter *methods, char **err)
+{
+ uint16_t i, v = 0;
+ char *argv = args[cur_arg + 1];
+ if (!*argv) {
+ memprintf(err, "'%s' : missing the ssl/tls version", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ for (i = CONF_TLSV_MIN; i <= CONF_TLSV_MAX; i++)
+ if (strcmp(argv, methodVersions[i].name) == 0)
+ v = i;
+ if (!v) {
+ memprintf(err, "'%s' : unknown ssl/tls version", args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ if (strcmp("ssl-min-ver", args[cur_arg]) == 0)
+ methods->min = v;
+ else if (strcmp("ssl-max-ver", args[cur_arg]) == 0)
+ methods->max = v;
+ else {
+ memprintf(err, "'%s' : option not implemented", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+}
+
+static int ssl_bind_parse_tls_method_minmax(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ int ret;
+
+#if (HA_OPENSSL_VERSION_NUMBER < 0x10101000L) && !defined(OPENSSL_IS_BORINGSSL)
+ ha_warning("crt-list: ssl-min-ver and ssl-max-ver are not supported with this Openssl version (skipped).\n");
+#endif
+ ret = parse_tls_method_minmax(args, cur_arg, &conf->ssl_methods_cfg, err);
+ if (ret != ERR_NONE)
+ return ret;
+
+ conf->ssl_methods.min = conf->ssl_methods_cfg.min;
+ conf->ssl_methods.max = conf->ssl_methods_cfg.max;
+
+ return ret;
+}
+static int bind_parse_tls_method_minmax(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return parse_tls_method_minmax(args, cur_arg, &conf->ssl_conf.ssl_methods, err);
+}
+
+static int srv_parse_tls_method_minmax(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ return parse_tls_method_minmax(args, *cur_arg, &newsrv->ssl_ctx.methods, err);
+}
+
+/* parse the "no-tls-tickets" bind keyword */
+static int bind_parse_no_tls_tickets(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->ssl_options |= BC_SSL_O_NO_TLS_TICKETS;
+ return 0;
+}
+
+/* parse the "allow-0rtt" bind keyword */
+static int ssl_bind_parse_allow_0rtt(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ conf->early_data = 1;
+ return 0;
+}
+
+static int bind_parse_allow_0rtt(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->ssl_conf.early_data = 1;
+ return 0;
+}
+
+/* parse the "npn" bind keyword */
+static int ssl_bind_parse_npn(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ char *p1, *p2;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing the comma-delimited NPN protocol suite", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(conf->npn_str);
+
+ /* the NPN string is built as a suite of (<len> <name>)*,
+ * so we reuse each comma to store the next <len> and need
+ * one more for the end of the string.
+ */
+ conf->npn_len = strlen(args[cur_arg + 1]) + 1;
+ conf->npn_str = calloc(1, conf->npn_len + 1);
+ memcpy(conf->npn_str + 1, args[cur_arg + 1], conf->npn_len);
+
+ /* replace commas with the name length */
+ p1 = conf->npn_str;
+ p2 = p1 + 1;
+ while (1) {
+ p2 = memchr(p1 + 1, ',', conf->npn_str + conf->npn_len - (p1 + 1));
+ if (!p2)
+ p2 = p1 + 1 + strlen(p1 + 1);
+
+ if (p2 - (p1 + 1) > 255) {
+ *p2 = '\0';
+ memprintf(err, "'%s' : NPN protocol name too long : '%s'", args[cur_arg], p1 + 1);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ *p1 = p2 - (p1 + 1);
+ p1 = p2;
+
+ if (!*p2)
+ break;
+
+ *(p2++) = '\0';
+ }
+ return 0;
+#else
+ memprintf(err, "'%s' : library does not support TLS NPN extension", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+static int bind_parse_npn(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_npn(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+
+/* Parses a alpn string and converts it to the right format for the SSL api */
+int ssl_sock_parse_alpn(char *arg, char **alpn_str, int *alpn_len, char **err)
+{
+ char *p1, *p2, *alpn = NULL;
+ int len, ret = 0;
+
+ *alpn_str = NULL;
+ *alpn_len = 0;
+
+ if (!*arg) {
+ memprintf(err, "missing the comma-delimited ALPN protocol suite");
+ goto error;
+ }
+
+ /* the ALPN string is built as a suite of (<len> <name>)*,
+ * so we reuse each comma to store the next <len> and need
+ * one more for the end of the string.
+ */
+ len = strlen(arg) + 1;
+ alpn = calloc(1, len+1);
+ if (!alpn) {
+ memprintf(err, "'%s' : out of memory", arg);
+ goto error;
+ }
+ memcpy(alpn+1, arg, len);
+
+ /* replace commas with the name length */
+ p1 = alpn;
+ p2 = p1 + 1;
+ while (1) {
+ p2 = memchr(p1 + 1, ',', alpn + len - (p1 + 1));
+ if (!p2)
+ p2 = p1 + 1 + strlen(p1 + 1);
+
+ if (p2 - (p1 + 1) > 255) {
+ *p2 = '\0';
+ memprintf(err, "ALPN protocol name too long : '%s'", p1 + 1);
+ goto error;
+ }
+
+ *p1 = p2 - (p1 + 1);
+ p1 = p2;
+
+ if (!*p2)
+ break;
+
+ *(p2++) = '\0';
+ }
+
+ *alpn_str = alpn;
+ *alpn_len = len;
+
+ out:
+ return ret;
+
+ error:
+ free(alpn);
+ ret = ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* parse the "alpn" bind keyword */
+static int ssl_bind_parse_alpn(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ int ret;
+
+ free(conf->alpn_str);
+
+ ret = ssl_sock_parse_alpn(args[cur_arg + 1], &conf->alpn_str, &conf->alpn_len, err);
+ if (ret)
+ memprintf(err, "'%s' : %s", args[cur_arg], *err);
+ return ret;
+#else
+ memprintf(err, "'%s' : library does not support TLS ALPN extension", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+static int bind_parse_alpn(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_alpn(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "ssl" bind keyword */
+static int bind_parse_ssl(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->options |= BC_O_USE_SSL;
+
+ if (global_ssl.listen_default_ciphers && !conf->ssl_conf.ciphers)
+ conf->ssl_conf.ciphers = strdup(global_ssl.listen_default_ciphers);
+#if defined(SSL_CTX_set1_curves_list)
+ if (global_ssl.listen_default_curves && !conf->ssl_conf.curves)
+ conf->ssl_conf.curves = strdup(global_ssl.listen_default_curves);
+#endif
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ if (global_ssl.listen_default_ciphersuites && !conf->ssl_conf.ciphersuites)
+ conf->ssl_conf.ciphersuites = strdup(global_ssl.listen_default_ciphersuites);
+#endif
+ conf->ssl_options |= global_ssl.listen_default_ssloptions;
+ conf->ssl_conf.ssl_methods.flags |= global_ssl.listen_default_sslmethods.flags;
+ if (!conf->ssl_conf.ssl_methods.min)
+ conf->ssl_conf.ssl_methods.min = global_ssl.listen_default_sslmethods.min;
+ if (!conf->ssl_conf.ssl_methods.max)
+ conf->ssl_conf.ssl_methods.max = global_ssl.listen_default_sslmethods.max;
+
+ return 0;
+}
+
+/* parse the "prefer-client-ciphers" bind keyword */
+static int bind_parse_pcc(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->ssl_options |= BC_SSL_O_PREF_CLIE_CIPH;
+ return 0;
+}
+
+/* parse the "generate-certificates" bind keyword */
+static int bind_parse_generate_certs(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES)
+ conf->options |= BC_O_GENERATE_CERTS;
+#else
+ memprintf(err, "%sthis version of openssl cannot generate SSL certificates.\n",
+ err && *err ? *err : "");
+#endif
+ return 0;
+}
+
+/* parse the "strict-sni" bind keyword */
+static int bind_parse_strict_sni(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->strict_sni = 1;
+ return 0;
+}
+
+/* parse the "tls-ticket-keys" bind keyword */
+static int bind_parse_tls_ticket_keys(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+ FILE *f = NULL;
+ int i = 0;
+ char thisline[LINESIZE];
+ struct tls_keys_ref *keys_ref = NULL;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing TLS ticket keys file path", args[cur_arg]);
+ goto fail;
+ }
+
+ keys_ref = tlskeys_ref_lookup(args[cur_arg + 1]);
+ if (keys_ref) {
+ keys_ref->refcount++;
+ conf->keys_ref = keys_ref;
+ return 0;
+ }
+
+ keys_ref = calloc(1, sizeof(*keys_ref));
+ if (!keys_ref) {
+ memprintf(err, "'%s' : allocation error", args[cur_arg+1]);
+ goto fail;
+ }
+
+ keys_ref->tlskeys = malloc(TLS_TICKETS_NO * sizeof(union tls_sess_key));
+ if (!keys_ref->tlskeys) {
+ memprintf(err, "'%s' : allocation error", args[cur_arg+1]);
+ goto fail;
+ }
+
+ if ((f = fopen(args[cur_arg + 1], "r")) == NULL) {
+ memprintf(err, "'%s' : unable to load ssl tickets keys file", args[cur_arg+1]);
+ goto fail;
+ }
+
+ keys_ref->filename = strdup(args[cur_arg + 1]);
+ if (!keys_ref->filename) {
+ memprintf(err, "'%s' : allocation error", args[cur_arg+1]);
+ goto fail;
+ }
+
+ keys_ref->key_size_bits = 0;
+ while (fgets(thisline, sizeof(thisline), f) != NULL) {
+ int len = strlen(thisline);
+ int dec_size;
+
+ /* Strip newline characters from the end */
+ if(thisline[len - 1] == '\n')
+ thisline[--len] = 0;
+
+ if(thisline[len - 1] == '\r')
+ thisline[--len] = 0;
+
+ dec_size = base64dec(thisline, len, (char *) (keys_ref->tlskeys + i % TLS_TICKETS_NO), sizeof(union tls_sess_key));
+ if (dec_size < 0) {
+ memprintf(err, "'%s' : unable to decode base64 key on line %d", args[cur_arg+1], i + 1);
+ goto fail;
+ }
+ else if (!keys_ref->key_size_bits && (dec_size == sizeof(struct tls_sess_key_128))) {
+ keys_ref->key_size_bits = 128;
+ }
+ else if (!keys_ref->key_size_bits && (dec_size == sizeof(struct tls_sess_key_256))) {
+ keys_ref->key_size_bits = 256;
+ }
+ else if (((dec_size != sizeof(struct tls_sess_key_128)) && (dec_size != sizeof(struct tls_sess_key_256)))
+ || ((dec_size == sizeof(struct tls_sess_key_128) && (keys_ref->key_size_bits != 128)))
+ || ((dec_size == sizeof(struct tls_sess_key_256) && (keys_ref->key_size_bits != 256)))) {
+ memprintf(err, "'%s' : wrong sized key on line %d", args[cur_arg+1], i + 1);
+ goto fail;
+ }
+ i++;
+ }
+
+ if (i < TLS_TICKETS_NO) {
+ memprintf(err, "'%s' : please supply at least %d keys in the tls-tickets-file", args[cur_arg+1], TLS_TICKETS_NO);
+ goto fail;
+ }
+
+ fclose(f);
+
+ /* Use penultimate key for encryption, handle when TLS_TICKETS_NO = 1 */
+ i -= 2;
+ keys_ref->tls_ticket_enc_index = i < 0 ? 0 : i % TLS_TICKETS_NO;
+ keys_ref->unique_id = -1;
+ keys_ref->refcount = 1;
+ HA_RWLOCK_INIT(&keys_ref->lock);
+ conf->keys_ref = keys_ref;
+
+ LIST_INSERT(&tlskeys_reference, &keys_ref->list);
+
+ return 0;
+
+ fail:
+ if (f)
+ fclose(f);
+ if (keys_ref) {
+ free(keys_ref->filename);
+ free(keys_ref->tlskeys);
+ free(keys_ref);
+ }
+ return ERR_ALERT | ERR_FATAL;
+
+#else
+ memprintf(err, "'%s' : TLS ticket callback extension not supported", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif /* SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB */
+}
+
+/* parse the "verify" bind keyword */
+static int ssl_bind_parse_verify(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing verify method", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(args[cur_arg + 1], "none") == 0)
+ conf->verify = SSL_SOCK_VERIFY_NONE;
+ else if (strcmp(args[cur_arg + 1], "optional") == 0)
+ conf->verify = SSL_SOCK_VERIFY_OPTIONAL;
+ else if (strcmp(args[cur_arg + 1], "required") == 0)
+ conf->verify = SSL_SOCK_VERIFY_REQUIRED;
+ else {
+ memprintf(err, "'%s' : unknown verify method '%s', only 'none', 'optional', and 'required' are supported\n",
+ args[cur_arg], args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+static int bind_parse_verify(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_verify(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "no-ca-names" bind keyword */
+static int ssl_bind_parse_no_ca_names(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ conf->no_ca_names = 1;
+ return 0;
+}
+static int bind_parse_no_ca_names(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_no_ca_names(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/***************************** "server" keywords Parsing ********************************************/
+
+/* parse the "npn" bind keyword */
+static int srv_parse_npn(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ char *p1, *p2;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing the comma-delimited NPN protocol suite", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->ssl_ctx.npn_str);
+
+ /* the NPN string is built as a suite of (<len> <name>)*,
+ * so we reuse each comma to store the next <len> and need
+ * one more for the end of the string.
+ */
+ newsrv->ssl_ctx.npn_len = strlen(args[*cur_arg + 1]) + 1;
+ newsrv->ssl_ctx.npn_str = calloc(1, newsrv->ssl_ctx.npn_len + 1);
+ if (!newsrv->ssl_ctx.npn_str) {
+ memprintf(err, "out of memory");
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ memcpy(newsrv->ssl_ctx.npn_str + 1, args[*cur_arg + 1],
+ newsrv->ssl_ctx.npn_len);
+
+ /* replace commas with the name length */
+ p1 = newsrv->ssl_ctx.npn_str;
+ p2 = p1 + 1;
+ while (1) {
+ p2 = memchr(p1 + 1, ',', newsrv->ssl_ctx.npn_str +
+ newsrv->ssl_ctx.npn_len - (p1 + 1));
+ if (!p2)
+ p2 = p1 + 1 + strlen(p1 + 1);
+
+ if (p2 - (p1 + 1) > 255) {
+ *p2 = '\0';
+ memprintf(err, "'%s' : NPN protocol name too long : '%s'", args[*cur_arg], p1 + 1);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ *p1 = p2 - (p1 + 1);
+ p1 = p2;
+
+ if (!*p2)
+ break;
+
+ *(p2++) = '\0';
+ }
+ return 0;
+#else
+ memprintf(err, "'%s' : library does not support TLS NPN extension", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+static int parse_alpn(char *alpn, char **out_alpn_str, int *out_alpn_len, char **err)
+{
+ free(*out_alpn_str);
+ return ssl_sock_parse_alpn(alpn, out_alpn_str, out_alpn_len, err);
+}
+#endif
+
+/* parse the "alpn" server keyword */
+static int srv_parse_alpn(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ int ret = parse_alpn(args[*cur_arg + 1],
+ &newsrv->ssl_ctx.alpn_str,
+ &newsrv->ssl_ctx.alpn_len, err);
+ if (ret)
+ memprintf(err, "'%s' : %s", args[*cur_arg], *err);
+ return ret;
+#else
+ memprintf(err, "'%s' : library does not support TLS ALPN extension", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+/* parse the "check-alpn" server keyword */
+static int srv_parse_check_alpn(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ int ret = parse_alpn(args[*cur_arg + 1],
+ &newsrv->check.alpn_str,
+ &newsrv->check.alpn_len, err);
+ if (ret)
+ memprintf(err, "'%s' : %s", args[*cur_arg], *err);
+ return ret;
+#else
+ memprintf(err, "'%s' : library does not support TLS ALPN extension", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+/* parse the "ca-file" server keyword */
+static int srv_parse_ca_file(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ const int create_if_none = newsrv->flags & SRV_F_DYNAMIC ? 0 : 1;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing CAfile path", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[*cur_arg + 1] != '/') && global_ssl.ca_base)
+ memprintf(&newsrv->ssl_ctx.ca_file, "%s/%s", global_ssl.ca_base, args[*cur_arg + 1]);
+ else
+ memprintf(&newsrv->ssl_ctx.ca_file, "%s", args[*cur_arg + 1]);
+
+ if (!ssl_store_load_locations_file(newsrv->ssl_ctx.ca_file, create_if_none, CAFILE_CERT)) {
+ memprintf(err, "'%s' : unable to load %s", args[*cur_arg], newsrv->ssl_ctx.ca_file);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "check-sni" server keyword */
+static int srv_parse_check_sni(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing SNI", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ newsrv->check.sni = strdup(args[*cur_arg + 1]);
+ if (!newsrv->check.sni) {
+ memprintf(err, "'%s' : failed to allocate memory", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+
+}
+
+/* common function to init ssl_ctx */
+static int ssl_sock_init_srv(struct server *s)
+{
+ if (global_ssl.connect_default_ciphers && !s->ssl_ctx.ciphers)
+ s->ssl_ctx.ciphers = strdup(global_ssl.connect_default_ciphers);
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ if (global_ssl.connect_default_ciphersuites && !s->ssl_ctx.ciphersuites) {
+ s->ssl_ctx.ciphersuites = strdup(global_ssl.connect_default_ciphersuites);
+ if (!s->ssl_ctx.ciphersuites)
+ return 1;
+ }
+#endif
+ s->ssl_ctx.options |= global_ssl.connect_default_ssloptions;
+ s->ssl_ctx.methods.flags |= global_ssl.connect_default_sslmethods.flags;
+
+ if (!s->ssl_ctx.methods.min)
+ s->ssl_ctx.methods.min = global_ssl.connect_default_sslmethods.min;
+
+ if (!s->ssl_ctx.methods.max)
+ s->ssl_ctx.methods.max = global_ssl.connect_default_sslmethods.max;
+
+ return 0;
+}
+
+/* parse the "check-ssl" server keyword */
+static int srv_parse_check_ssl(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->check.use_ssl = 1;
+ if (ssl_sock_init_srv(newsrv)) {
+ memprintf(err, "'%s' : not enough memory", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "ciphers" server keyword */
+static int srv_parse_ciphers(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing cipher suite", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->ssl_ctx.ciphers);
+ newsrv->ssl_ctx.ciphers = strdup(args[*cur_arg + 1]);
+
+ if (!newsrv->ssl_ctx.ciphers) {
+ memprintf(err, "'%s' : not enough memory", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+/* parse the "ciphersuites" server keyword */
+static int srv_parse_ciphersuites(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing cipher suite", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->ssl_ctx.ciphersuites);
+ newsrv->ssl_ctx.ciphersuites = strdup(args[*cur_arg + 1]);
+
+ if (!newsrv->ssl_ctx.ciphersuites) {
+ memprintf(err, "'%s' : not enough memory", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+#endif
+
+/* parse the "crl-file" server keyword */
+static int srv_parse_crl_file(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+#ifndef X509_V_FLAG_CRL_CHECK
+ memprintf(err, "'%s' : library does not support CRL verify", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#else
+ const int create_if_none = newsrv->flags & SRV_F_DYNAMIC ? 0 : 1;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing CRLfile path", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[*cur_arg + 1] != '/') && global_ssl.ca_base)
+ memprintf(&newsrv->ssl_ctx.crl_file, "%s/%s", global_ssl.ca_base, args[*cur_arg + 1]);
+ else
+ memprintf(&newsrv->ssl_ctx.crl_file, "%s", args[*cur_arg + 1]);
+
+ if (!ssl_store_load_locations_file(newsrv->ssl_ctx.crl_file, create_if_none, CAFILE_CRL)) {
+ memprintf(err, "'%s' : unable to load %s", args[*cur_arg], newsrv->ssl_ctx.crl_file);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+#endif
+}
+
+/* parse the "crt" server keyword */
+static int srv_parse_crt(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing certificate file path", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[*cur_arg + 1] != '/') && global_ssl.crt_base)
+ memprintf(&newsrv->ssl_ctx.client_crt, "%s/%s", global_ssl.crt_base, args[*cur_arg + 1]);
+ else
+ memprintf(&newsrv->ssl_ctx.client_crt, "%s", args[*cur_arg + 1]);
+
+ return 0;
+}
+
+/* parse the "no-check-ssl" server keyword */
+static int srv_parse_no_check_ssl(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->check.use_ssl = -1;
+ ha_free(&newsrv->ssl_ctx.ciphers);
+ newsrv->ssl_ctx.options &= ~global_ssl.connect_default_ssloptions;
+ return 0;
+}
+
+/* parse the "no-send-proxy-v2-ssl" server keyword */
+static int srv_parse_no_send_proxy_ssl(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->pp_opts &= ~SRV_PP_V2;
+ newsrv->pp_opts &= ~SRV_PP_V2_SSL;
+ return 0;
+}
+
+/* parse the "no-send-proxy-v2-ssl-cn" server keyword */
+static int srv_parse_no_send_proxy_cn(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->pp_opts &= ~SRV_PP_V2;
+ newsrv->pp_opts &= ~SRV_PP_V2_SSL;
+ newsrv->pp_opts &= ~SRV_PP_V2_SSL_CN;
+ return 0;
+}
+
+/* parse the "no-ssl" server keyword */
+static int srv_parse_no_ssl(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ /* if default-server have use_ssl, prepare ssl settings */
+ if (newsrv->use_ssl == 1) {
+ if (ssl_sock_init_srv(newsrv)) {
+ memprintf(err, "'%s' : not enough memory", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else {
+ ha_free(&newsrv->ssl_ctx.ciphers);
+ }
+ newsrv->use_ssl = -1;
+ return 0;
+}
+
+/* parse the "allow-0rtt" server keyword */
+static int srv_parse_allow_0rtt(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->ssl_ctx.options |= SRV_SSL_O_EARLY_DATA;
+ return 0;
+}
+
+/* parse the "no-ssl-reuse" server keyword */
+static int srv_parse_no_ssl_reuse(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->ssl_ctx.options |= SRV_SSL_O_NO_REUSE;
+ return 0;
+}
+
+/* parse the "no-tls-tickets" server keyword */
+static int srv_parse_no_tls_tickets(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->ssl_ctx.options |= SRV_SSL_O_NO_TLS_TICKETS;
+ return 0;
+}
+/* parse the "send-proxy-v2-ssl" server keyword */
+static int srv_parse_send_proxy_ssl(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->pp_opts |= SRV_PP_V2;
+ newsrv->pp_opts |= SRV_PP_V2_SSL;
+ return 0;
+}
+
+/* parse the "send-proxy-v2-ssl-cn" server keyword */
+static int srv_parse_send_proxy_cn(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->pp_opts |= SRV_PP_V2;
+ newsrv->pp_opts |= SRV_PP_V2_SSL;
+ newsrv->pp_opts |= SRV_PP_V2_SSL_CN;
+ return 0;
+}
+
+/* parse the "sni" server keyword */
+static int srv_parse_sni(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+#ifndef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ memprintf(err, "'%s' : the current SSL library doesn't support the SNI TLS extension", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#else
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' : missing sni expression", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->sni_expr);
+ newsrv->sni_expr = strdup(arg);
+ if (!newsrv->sni_expr) {
+ memprintf(err, "out of memory");
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+#endif
+}
+
+/* parse the "ssl" server keyword */
+static int srv_parse_ssl(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->use_ssl = 1;
+ if (ssl_sock_init_srv(newsrv)) {
+ memprintf(err, "'%s' : not enough memory", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "ssl-reuse" server keyword */
+static int srv_parse_ssl_reuse(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->ssl_ctx.options &= ~SRV_SSL_O_NO_REUSE;
+ return 0;
+}
+
+/* parse the "tls-tickets" server keyword */
+static int srv_parse_tls_tickets(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->ssl_ctx.options &= ~SRV_SSL_O_NO_TLS_TICKETS;
+ return 0;
+}
+
+/* parse the "verify" server keyword */
+static int srv_parse_verify(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing verify method", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(args[*cur_arg + 1], "none") == 0)
+ newsrv->ssl_ctx.verify = SSL_SOCK_VERIFY_NONE;
+ else if (strcmp(args[*cur_arg + 1], "required") == 0)
+ newsrv->ssl_ctx.verify = SSL_SOCK_VERIFY_REQUIRED;
+ else {
+ memprintf(err, "'%s' : unknown verify method '%s', only 'none' and 'required' are supported\n",
+ args[*cur_arg], args[*cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "verifyhost" server keyword */
+static int srv_parse_verifyhost(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing hostname to verify against", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->ssl_ctx.verify_host);
+ newsrv->ssl_ctx.verify_host = strdup(args[*cur_arg + 1]);
+
+ if (!newsrv->ssl_ctx.verify_host) {
+ memprintf(err, "'%s' : not enough memory", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "ssl-default-bind-options" keyword in global section */
+static int ssl_parse_default_bind_options(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err) {
+ int i = 1;
+
+ if (*(args[i]) == 0) {
+ memprintf(err, "global statement '%s' expects an option as an argument.", args[0]);
+ return -1;
+ }
+ while (*(args[i])) {
+ if (strcmp(args[i], "no-tls-tickets") == 0)
+ global_ssl.listen_default_ssloptions |= BC_SSL_O_NO_TLS_TICKETS;
+ else if (strcmp(args[i], "prefer-client-ciphers") == 0)
+ global_ssl.listen_default_ssloptions |= BC_SSL_O_PREF_CLIE_CIPH;
+ else if (strcmp(args[i], "ssl-min-ver") == 0 || strcmp(args[i], "ssl-max-ver") == 0) {
+ if (!parse_tls_method_minmax(args, i, &global_ssl.listen_default_sslmethods, err))
+ i++;
+ else {
+ memprintf(err, "%s on global statement '%s'.", *err, args[0]);
+ return -1;
+ }
+ }
+ else if (parse_tls_method_options(args[i], &global_ssl.listen_default_sslmethods, err)) {
+ memprintf(err, "unknown option '%s' on global statement '%s'.", args[i], args[0]);
+ return -1;
+ }
+ i++;
+ }
+ return 0;
+}
+
+/* parse the "ssl-default-server-options" keyword in global section */
+static int ssl_parse_default_server_options(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err) {
+ int i = 1;
+
+ if (*(args[i]) == 0) {
+ memprintf(err, "global statement '%s' expects an option as an argument.", args[0]);
+ return -1;
+ }
+ while (*(args[i])) {
+ if (strcmp(args[i], "no-tls-tickets") == 0)
+ global_ssl.connect_default_ssloptions |= SRV_SSL_O_NO_TLS_TICKETS;
+ else if (strcmp(args[i], "ssl-min-ver") == 0 || strcmp(args[i], "ssl-max-ver") == 0) {
+ if (!parse_tls_method_minmax(args, i, &global_ssl.connect_default_sslmethods, err))
+ i++;
+ else {
+ memprintf(err, "%s on global statement '%s'.", *err, args[0]);
+ return -1;
+ }
+ }
+ else if (parse_tls_method_options(args[i], &global_ssl.connect_default_sslmethods, err)) {
+ memprintf(err, "unknown option '%s' on global statement '%s'.", args[i], args[0]);
+ return -1;
+ }
+ i++;
+ }
+ return 0;
+}
+
+/* parse the "ca-base" / "crt-base" keywords in global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ca_crt_base(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char **target;
+
+ target = (args[0][1] == 'a') ? &global_ssl.ca_base : &global_ssl.crt_base;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*target) {
+ memprintf(err, "'%s' already specified.", args[0]);
+ return -1;
+ }
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a directory path as an argument.", args[0]);
+ return -1;
+ }
+ *target = strdup(args[1]);
+ return 0;
+}
+
+/* parse the "ssl-skip-self-issued-ca" keyword in global section. */
+static int ssl_parse_skip_self_issued_ca(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+#ifdef SSL_CTX_build_cert_chain
+ global_ssl.skip_self_issued_ca = 1;
+ return 0;
+#else
+ memprintf(err, "global statement '%s' requires at least OpenSSL 1.0.2.", args[0]);
+ return -1;
+#endif
+}
+
+
+
+
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted, doing so helps
+ * all code contributors.
+ * Optional keywords are also declared with a NULL ->parse() function so that
+ * the config parser can report an appropriate error when a known keyword was
+ * not enabled.
+ */
+
+/* the <ssl_crtlist_kws> keywords are used for crt-list parsing, they *MUST* be safe
+ * with their proxy argument NULL and must only fill the ssl_bind_conf */
+struct ssl_crtlist_kw ssl_crtlist_kws[] = {
+ { "allow-0rtt", ssl_bind_parse_allow_0rtt, 0 }, /* allow 0-RTT */
+ { "alpn", ssl_bind_parse_alpn, 1 }, /* set ALPN supported protocols */
+ { "ca-file", ssl_bind_parse_ca_file, 1 }, /* set CAfile to process ca-names and verify on client cert */
+ { "ca-verify-file", ssl_bind_parse_ca_verify_file, 1 }, /* set CAverify file to process verify on client cert */
+ { "ciphers", ssl_bind_parse_ciphers, 1 }, /* set SSL cipher suite */
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ { "ciphersuites", ssl_bind_parse_ciphersuites, 1 }, /* set TLS 1.3 cipher suite */
+#endif
+ { "crl-file", ssl_bind_parse_crl_file, 1 }, /* set certificate revocation list file use on client cert verify */
+ { "curves", ssl_bind_parse_curves, 1 }, /* set SSL curve suite */
+ { "ecdhe", ssl_bind_parse_ecdhe, 1 }, /* defines named curve for elliptic curve Diffie-Hellman */
+ { "no-ca-names", ssl_bind_parse_no_ca_names, 0 }, /* do not send ca names to clients (ca_file related) */
+ { "npn", ssl_bind_parse_npn, 1 }, /* set NPN supported protocols */
+ { "ssl-min-ver", ssl_bind_parse_tls_method_minmax,1 }, /* minimum version */
+ { "ssl-max-ver", ssl_bind_parse_tls_method_minmax,1 }, /* maximum version */
+ { "verify", ssl_bind_parse_verify, 1 }, /* set SSL verify method */
+ { NULL, NULL, 0 },
+};
+
+/* no initcall for ssl_bind_kws, these ones are parsed in the parser loop */
+
+static struct bind_kw_list bind_kws = { "SSL", { }, {
+ { "allow-0rtt", bind_parse_allow_0rtt, 0 }, /* Allow 0RTT */
+ { "alpn", bind_parse_alpn, 1 }, /* set ALPN supported protocols */
+ { "ca-file", bind_parse_ca_file, 1 }, /* set CAfile to process ca-names and verify on client cert */
+ { "ca-verify-file", bind_parse_ca_verify_file, 1 }, /* set CAverify file to process verify on client cert */
+ { "ca-ignore-err", bind_parse_ignore_err, 1 }, /* set error IDs to ignore on verify depth > 0 */
+ { "ca-sign-file", bind_parse_ca_sign_file, 1 }, /* set CAFile used to generate and sign server certs */
+ { "ca-sign-pass", bind_parse_ca_sign_pass, 1 }, /* set CAKey passphrase */
+ { "ciphers", bind_parse_ciphers, 1 }, /* set SSL cipher suite */
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ { "ciphersuites", bind_parse_ciphersuites, 1 }, /* set TLS 1.3 cipher suite */
+#endif
+ { "crl-file", bind_parse_crl_file, 1 }, /* set certificate revocation list file use on client cert verify */
+ { "crt", bind_parse_crt, 1 }, /* load SSL certificates from this location */
+ { "crt-ignore-err", bind_parse_ignore_err, 1 }, /* set error IDs to ignore on verify depth == 0 */
+ { "crt-list", bind_parse_crt_list, 1 }, /* load a list of crt from this location */
+ { "curves", bind_parse_curves, 1 }, /* set SSL curve suite */
+ { "ecdhe", bind_parse_ecdhe, 1 }, /* defines named curve for elliptic curve Diffie-Hellman */
+ { "force-sslv3", bind_parse_tls_method_options, 0 }, /* force SSLv3 */
+ { "force-tlsv10", bind_parse_tls_method_options, 0 }, /* force TLSv10 */
+ { "force-tlsv11", bind_parse_tls_method_options, 0 }, /* force TLSv11 */
+ { "force-tlsv12", bind_parse_tls_method_options, 0 }, /* force TLSv12 */
+ { "force-tlsv13", bind_parse_tls_method_options, 0 }, /* force TLSv13 */
+ { "generate-certificates", bind_parse_generate_certs, 0 }, /* enable the server certificates generation */
+ { "no-ca-names", bind_parse_no_ca_names, 0 }, /* do not send ca names to clients (ca_file related) */
+ { "no-sslv3", bind_parse_tls_method_options, 0 }, /* disable SSLv3 */
+ { "no-tlsv10", bind_parse_tls_method_options, 0 }, /* disable TLSv10 */
+ { "no-tlsv11", bind_parse_tls_method_options, 0 }, /* disable TLSv11 */
+ { "no-tlsv12", bind_parse_tls_method_options, 0 }, /* disable TLSv12 */
+ { "no-tlsv13", bind_parse_tls_method_options, 0 }, /* disable TLSv13 */
+ { "no-tls-tickets", bind_parse_no_tls_tickets, 0 }, /* disable session resumption tickets */
+ { "ssl", bind_parse_ssl, 0 }, /* enable SSL processing */
+ { "ssl-min-ver", bind_parse_tls_method_minmax, 1 }, /* minimum version */
+ { "ssl-max-ver", bind_parse_tls_method_minmax, 1 }, /* maximum version */
+ { "strict-sni", bind_parse_strict_sni, 0 }, /* refuse negotiation if sni doesn't match a certificate */
+ { "tls-ticket-keys", bind_parse_tls_ticket_keys, 1 }, /* set file to load TLS ticket keys from */
+ { "verify", bind_parse_verify, 1 }, /* set SSL verify method */
+ { "npn", bind_parse_npn, 1 }, /* set NPN supported protocols */
+ { "prefer-client-ciphers", bind_parse_pcc, 0 }, /* prefer client ciphers */
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted, doing so helps
+ * all code contributors.
+ * Optional keywords are also declared with a NULL ->parse() function so that
+ * the config parser can report an appropriate error when a known keyword was
+ * not enabled.
+ */
+static struct srv_kw_list srv_kws = { "SSL", { }, {
+ { "allow-0rtt", srv_parse_allow_0rtt, 0, 1, 1 }, /* Allow using early data on this server */
+ { "alpn", srv_parse_alpn, 1, 1, 1 }, /* Set ALPN supported protocols */
+ { "ca-file", srv_parse_ca_file, 1, 1, 1 }, /* set CAfile to process verify server cert */
+ { "check-alpn", srv_parse_check_alpn, 1, 1, 1 }, /* Set ALPN used for checks */
+ { "check-sni", srv_parse_check_sni, 1, 1, 1 }, /* set SNI */
+ { "check-ssl", srv_parse_check_ssl, 0, 1, 1 }, /* enable SSL for health checks */
+ { "ciphers", srv_parse_ciphers, 1, 1, 1 }, /* select the cipher suite */
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ { "ciphersuites", srv_parse_ciphersuites, 1, 1, 1 }, /* select the cipher suite */
+#endif
+ { "crl-file", srv_parse_crl_file, 1, 1, 1 }, /* set certificate revocation list file use on server cert verify */
+ { "crt", srv_parse_crt, 1, 1, 1 }, /* set client certificate */
+ { "force-sslv3", srv_parse_tls_method_options, 0, 1, 1 }, /* force SSLv3 */
+ { "force-tlsv10", srv_parse_tls_method_options, 0, 1, 1 }, /* force TLSv10 */
+ { "force-tlsv11", srv_parse_tls_method_options, 0, 1, 1 }, /* force TLSv11 */
+ { "force-tlsv12", srv_parse_tls_method_options, 0, 1, 1 }, /* force TLSv12 */
+ { "force-tlsv13", srv_parse_tls_method_options, 0, 1, 1 }, /* force TLSv13 */
+ { "no-check-ssl", srv_parse_no_check_ssl, 0, 1, 0 }, /* disable SSL for health checks */
+ { "no-send-proxy-v2-ssl", srv_parse_no_send_proxy_ssl, 0, 1, 0 }, /* do not send PROXY protocol header v2 with SSL info */
+ { "no-send-proxy-v2-ssl-cn", srv_parse_no_send_proxy_cn, 0, 1, 0 }, /* do not send PROXY protocol header v2 with CN */
+ { "no-ssl", srv_parse_no_ssl, 0, 1, 0 }, /* disable SSL processing */
+ { "no-ssl-reuse", srv_parse_no_ssl_reuse, 0, 1, 1 }, /* disable session reuse */
+ { "no-sslv3", srv_parse_tls_method_options, 0, 0, 1 }, /* disable SSLv3 */
+ { "no-tlsv10", srv_parse_tls_method_options, 0, 0, 1 }, /* disable TLSv10 */
+ { "no-tlsv11", srv_parse_tls_method_options, 0, 0, 1 }, /* disable TLSv11 */
+ { "no-tlsv12", srv_parse_tls_method_options, 0, 0, 1 }, /* disable TLSv12 */
+ { "no-tlsv13", srv_parse_tls_method_options, 0, 0, 1 }, /* disable TLSv13 */
+ { "no-tls-tickets", srv_parse_no_tls_tickets, 0, 1, 1 }, /* disable session resumption tickets */
+ { "npn", srv_parse_npn, 1, 1, 1 }, /* Set NPN supported protocols */
+ { "send-proxy-v2-ssl", srv_parse_send_proxy_ssl, 0, 1, 1 }, /* send PROXY protocol header v2 with SSL info */
+ { "send-proxy-v2-ssl-cn", srv_parse_send_proxy_cn, 0, 1, 1 }, /* send PROXY protocol header v2 with CN */
+ { "sni", srv_parse_sni, 1, 1, 1 }, /* send SNI extension */
+ { "ssl", srv_parse_ssl, 0, 1, 1 }, /* enable SSL processing */
+ { "ssl-min-ver", srv_parse_tls_method_minmax, 1, 1, 1 }, /* minimum version */
+ { "ssl-max-ver", srv_parse_tls_method_minmax, 1, 1, 1 }, /* maximum version */
+ { "ssl-reuse", srv_parse_ssl_reuse, 0, 1, 0 }, /* enable session reuse */
+ { "tls-tickets", srv_parse_tls_tickets, 0, 1, 1 }, /* enable session resumption tickets */
+ { "verify", srv_parse_verify, 1, 1, 1 }, /* set SSL verify method */
+ { "verifyhost", srv_parse_verifyhost, 1, 1, 1 }, /* require that SSL cert verifies for hostname */
+ { NULL, NULL, 0, 0 },
+}};
+
+INITCALL1(STG_REGISTER, srv_register_keywords, &srv_kws);
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "ca-base", ssl_parse_global_ca_crt_base },
+ { CFG_GLOBAL, "crt-base", ssl_parse_global_ca_crt_base },
+ { CFG_GLOBAL, "issuers-chain-path", ssl_load_global_issuers_from_path },
+ { CFG_GLOBAL, "maxsslconn", ssl_parse_global_int },
+ { CFG_GLOBAL, "ssl-default-bind-options", ssl_parse_default_bind_options },
+ { CFG_GLOBAL, "ssl-default-server-options", ssl_parse_default_server_options },
+#ifndef OPENSSL_NO_DH
+ { CFG_GLOBAL, "ssl-dh-param-file", ssl_parse_global_dh_param_file },
+#endif
+ { CFG_GLOBAL, "ssl-mode-async", ssl_parse_global_ssl_async },
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+ { CFG_GLOBAL, "ssl-engine", ssl_parse_global_ssl_engine },
+#endif
+#ifdef HAVE_SSL_PROVIDERS
+ { CFG_GLOBAL, "ssl-propquery", ssl_parse_global_ssl_propquery },
+ { CFG_GLOBAL, "ssl-provider", ssl_parse_global_ssl_provider },
+ { CFG_GLOBAL, "ssl-provider-path", ssl_parse_global_ssl_provider_path },
+#endif
+ { CFG_GLOBAL, "ssl-skip-self-issued-ca", ssl_parse_skip_self_issued_ca },
+ { CFG_GLOBAL, "tune.ssl.cachesize", ssl_parse_global_int },
+#ifndef OPENSSL_NO_DH
+ { CFG_GLOBAL, "tune.ssl.default-dh-param", ssl_parse_global_default_dh },
+#endif
+ { CFG_GLOBAL, "tune.ssl.force-private-cache", ssl_parse_global_private_cache },
+ { CFG_GLOBAL, "tune.ssl.lifetime", ssl_parse_global_lifetime },
+ { CFG_GLOBAL, "tune.ssl.maxrecord", ssl_parse_global_int },
+ { CFG_GLOBAL, "tune.ssl.hard-maxrecord", ssl_parse_global_int },
+ { CFG_GLOBAL, "tune.ssl.ssl-ctx-cache-size", ssl_parse_global_int },
+ { CFG_GLOBAL, "tune.ssl.capture-cipherlist-size", ssl_parse_global_capture_buffer },
+ { CFG_GLOBAL, "tune.ssl.capture-buffer-size", ssl_parse_global_capture_buffer },
+ { CFG_GLOBAL, "tune.ssl.keylog", ssl_parse_global_keylog },
+ { CFG_GLOBAL, "ssl-default-bind-ciphers", ssl_parse_global_ciphers },
+ { CFG_GLOBAL, "ssl-default-server-ciphers", ssl_parse_global_ciphers },
+#if defined(SSL_CTX_set1_curves_list)
+ { CFG_GLOBAL, "ssl-default-bind-curves", ssl_parse_global_curves },
+#endif
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ { CFG_GLOBAL, "ssl-default-bind-ciphersuites", ssl_parse_global_ciphersuites },
+ { CFG_GLOBAL, "ssl-default-server-ciphersuites", ssl_parse_global_ciphersuites },
+#endif
+ { CFG_GLOBAL, "ssl-load-extra-files", ssl_parse_global_extra_files },
+ { CFG_GLOBAL, "ssl-load-extra-del-ext", ssl_parse_global_extra_noext },
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
diff --git a/src/cfgparse-tcp.c b/src/cfgparse-tcp.c
new file mode 100644
index 0000000..13d433e
--- /dev/null
+++ b/src/cfgparse-tcp.c
@@ -0,0 +1,296 @@
+/*
+ * Configuration parsing for TCP (bind and server keywords)
+ *
+ * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/errors.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/namespace.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/server.h>
+#include <haproxy/tools.h>
+
+
+#ifdef IPV6_V6ONLY
+/* parse the "v4v6" bind keyword */
+static int bind_parse_v4v6(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->settings.options |= RX_O_V4V6;
+ return 0;
+}
+
+/* parse the "v6only" bind keyword */
+static int bind_parse_v6only(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->settings.options |= RX_O_V6ONLY;
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_HAP_TRANSPARENT
+/* parse the "transparent" bind keyword */
+static int bind_parse_transparent(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->settings.options |= RX_O_FOREIGN;
+ return 0;
+}
+#endif
+
+#if defined(TCP_DEFER_ACCEPT) || defined(SO_ACCEPTFILTER)
+/* parse the "defer-accept" bind keyword */
+static int bind_parse_defer_accept(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct listener *l;
+
+ list_for_each_entry(l, &conf->listeners, by_bind) {
+ if (l->rx.addr.ss_family == AF_INET || l->rx.addr.ss_family == AF_INET6)
+ l->options |= LI_O_DEF_ACCEPT;
+ }
+
+ return 0;
+}
+#endif
+
+#ifdef TCP_FASTOPEN
+/* parse the "tfo" bind keyword */
+static int bind_parse_tfo(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct listener *l;
+
+ list_for_each_entry(l, &conf->listeners, by_bind) {
+ if (l->rx.addr.ss_family == AF_INET || l->rx.addr.ss_family == AF_INET6)
+ l->options |= LI_O_TCP_FO;
+ }
+
+ return 0;
+}
+#endif
+
+#ifdef TCP_MAXSEG
+/* parse the "mss" bind keyword */
+static int bind_parse_mss(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct listener *l;
+ int mss;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing MSS value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ mss = atoi(args[cur_arg + 1]);
+ if (!mss || abs(mss) > 65535) {
+ memprintf(err, "'%s' : expects an MSS with and absolute value between 1 and 65535", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ list_for_each_entry(l, &conf->listeners, by_bind) {
+ if (l->rx.addr.ss_family == AF_INET || l->rx.addr.ss_family == AF_INET6)
+ l->maxseg = mss;
+ }
+
+ return 0;
+}
+#endif
+
+#ifdef TCP_USER_TIMEOUT
+/* parse the "tcp-ut" bind keyword */
+static int bind_parse_tcp_ut(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ const char *ptr = NULL;
+ struct listener *l;
+ unsigned int timeout;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing TCP User Timeout value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ ptr = parse_time_err(args[cur_arg + 1], &timeout, TIME_UNIT_MS);
+ if (ptr == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[cur_arg+1], args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (ptr == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[cur_arg+1], args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (ptr) {
+ memprintf(err, "'%s' : expects a positive delay in milliseconds", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ list_for_each_entry(l, &conf->listeners, by_bind) {
+ if (l->rx.addr.ss_family == AF_INET || l->rx.addr.ss_family == AF_INET6)
+ l->tcp_ut = timeout;
+ }
+
+ return 0;
+}
+#endif
+
+#ifdef SO_BINDTODEVICE
+/* parse the "interface" bind keyword */
+static int bind_parse_interface(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing interface name", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->settings.interface = strdup(args[cur_arg + 1]);
+ return 0;
+}
+#endif
+
+#ifdef USE_NS
+/* parse the "namespace" bind keyword */
+static int bind_parse_namespace(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ char *namespace = NULL;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing namespace id", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ namespace = args[cur_arg + 1];
+
+ conf->settings.netns = netns_store_lookup(namespace, strlen(namespace));
+
+ if (conf->settings.netns == NULL)
+ conf->settings.netns = netns_store_insert(namespace);
+
+ if (conf->settings.netns == NULL) {
+ ha_alert("Cannot open namespace '%s'.\n", args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+}
+#endif
+
+#ifdef TCP_USER_TIMEOUT
+/* parse the "tcp-ut" server keyword */
+static int srv_parse_tcp_ut(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ const char *ptr = NULL;
+ unsigned int timeout;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing TCP User Timeout value", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ ptr = parse_time_err(args[*cur_arg + 1], &timeout, TIME_UNIT_MS);
+ if (ptr == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[*cur_arg+1], args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (ptr == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[*cur_arg+1], args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (ptr) {
+ memprintf(err, "'%s' : expects a positive delay in milliseconds", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (newsrv->addr.ss_family == AF_INET || newsrv->addr.ss_family == AF_INET6)
+ newsrv->tcp_ut = timeout;
+
+ return 0;
+}
+#endif
+
+
+/************************************************************************/
+/* All supported bind keywords must be declared here. */
+/************************************************************************/
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted, doing so helps
+ * all code contributors.
+ * Optional keywords are also declared with a NULL ->parse() function so that
+ * the config parser can report an appropriate error when a known keyword was
+ * not enabled.
+ */
+static struct bind_kw_list bind_kws = { "TCP", { }, {
+#if defined(TCP_DEFER_ACCEPT) || defined(SO_ACCEPTFILTER)
+ { "defer-accept", bind_parse_defer_accept, 0 }, /* wait for some data for 1 second max before doing accept */
+#endif
+#ifdef SO_BINDTODEVICE
+ { "interface", bind_parse_interface, 1 }, /* specifically bind to this interface */
+#endif
+#ifdef TCP_MAXSEG
+ { "mss", bind_parse_mss, 1 }, /* set MSS of listening socket */
+#endif
+#ifdef TCP_USER_TIMEOUT
+ { "tcp-ut", bind_parse_tcp_ut, 1 }, /* set User Timeout on listening socket */
+#endif
+#ifdef TCP_FASTOPEN
+ { "tfo", bind_parse_tfo, 0 }, /* enable TCP_FASTOPEN of listening socket */
+#endif
+#ifdef CONFIG_HAP_TRANSPARENT
+ { "transparent", bind_parse_transparent, 0 }, /* transparently bind to the specified addresses */
+#endif
+#ifdef IPV6_V6ONLY
+ { "v4v6", bind_parse_v4v6, 0 }, /* force socket to bind to IPv4+IPv6 */
+ { "v6only", bind_parse_v6only, 0 }, /* force socket to bind to IPv6 only */
+#endif
+#ifdef USE_NS
+ { "namespace", bind_parse_namespace, 1 },
+#endif
+ /* the versions with the NULL parse function*/
+ { "defer-accept", NULL, 0 },
+ { "interface", NULL, 1 },
+ { "mss", NULL, 1 },
+ { "transparent", NULL, 0 },
+ { "v4v6", NULL, 0 },
+ { "v6only", NULL, 0 },
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws);
+
+static struct srv_kw_list srv_kws = { "TCP", { }, {
+#ifdef TCP_USER_TIMEOUT
+ { "tcp-ut", srv_parse_tcp_ut, 1, 1, 0 }, /* set TCP user timeout on server */
+#endif
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, srv_register_keywords, &srv_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/cfgparse-unix.c b/src/cfgparse-unix.c
new file mode 100644
index 0000000..b1fb1e2
--- /dev/null
+++ b/src/cfgparse-unix.c
@@ -0,0 +1,135 @@
+/*
+ * Configuration parsing for UNIX sockets (bind and server keywords)
+ *
+ * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <grp.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/errors.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/namespace.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/server.h>
+#include <haproxy/tools.h>
+
+/* parse the "mode" bind keyword */
+static int bind_parse_mode(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ char *endptr;
+
+ conf->settings.ux.mode = strtol(args[cur_arg + 1], &endptr, 8);
+
+ if (!*args[cur_arg + 1] || *endptr) {
+ memprintf(err, "'%s' : missing or invalid mode '%s' (octal integer expected)", args[cur_arg], args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "gid" bind keyword */
+static int bind_parse_gid(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->settings.ux.gid = atol(args[cur_arg + 1]);
+ return 0;
+}
+
+/* parse the "group" bind keyword */
+static int bind_parse_group(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct group *group;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing group name", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ group = getgrnam(args[cur_arg + 1]);
+ if (!group) {
+ memprintf(err, "'%s' : unknown group name '%s'", args[cur_arg], args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->settings.ux.gid = group->gr_gid;
+ return 0;
+}
+
+/* parse the "uid" bind keyword */
+static int bind_parse_uid(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->settings.ux.uid = atol(args[cur_arg + 1]);
+ return 0;
+}
+
+/* parse the "user" bind keyword */
+static int bind_parse_user(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct passwd *user;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing user name", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ user = getpwnam(args[cur_arg + 1]);
+ if (!user) {
+ memprintf(err, "'%s' : unknown user name '%s'", args[cur_arg], args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->settings.ux.uid = user->pw_uid;
+ return 0;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted, doing so helps
+ * all code contributors.
+ * Optional keywords are also declared with a NULL ->parse() function so that
+ * the config parser can report an appropriate error when a known keyword was
+ * not enabled.
+ */
+static struct bind_kw_list bind_kws = { "UNIX", { }, {
+ { "gid", bind_parse_gid, 1 }, /* set the socket's gid */
+ { "group", bind_parse_group, 1 }, /* set the socket's gid from the group name */
+ { "mode", bind_parse_mode, 1 }, /* set the socket's mode (eg: 0644)*/
+ { "uid", bind_parse_uid, 1 }, /* set the socket's uid */
+ { "user", bind_parse_user, 1 }, /* set the socket's uid from the user name */
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws);
diff --git a/src/cfgparse.c b/src/cfgparse.c
new file mode 100644
index 0000000..89c715d
--- /dev/null
+++ b/src/cfgparse.c
@@ -0,0 +1,4570 @@
+/*
+ * Configuration parser
+ *
+ * Copyright 2000-2011 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* This is to have crypt() and sched_setaffinity() defined on Linux */
+#define _GNU_SOURCE
+
+#ifdef USE_LIBCRYPT
+#ifdef USE_CRYPT_H
+/* some platforms such as Solaris need this */
+#include <crypt.h>
+#endif
+#endif /* USE_LIBCRYPT */
+
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netdb.h>
+#include <ctype.h>
+#include <pwd.h>
+#include <grp.h>
+#include <errno.h>
+#ifdef USE_CPU_AFFINITY
+#include <sched.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/auth.h>
+#include <haproxy/backend.h>
+#include <haproxy/capture.h>
+#include <haproxy/cfgcond.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/chunk.h>
+#include <haproxy/clock.h>
+#ifdef USE_CPU_AFFINITY
+#include <haproxy/cpuset.h>
+#endif
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/filters.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/lb_chash.h>
+#include <haproxy/lb_fas.h>
+#include <haproxy/lb_fwlc.h>
+#include <haproxy/lb_fwrr.h>
+#include <haproxy/lb_map.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/sink.h>
+#include <haproxy/mailers.h>
+#include <haproxy/namespace.h>
+#include <haproxy/quic_sock.h>
+#include <haproxy/obj_type-t.h>
+#include <haproxy/peers-t.h>
+#include <haproxy/peers.h>
+#include <haproxy/pool.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/sample.h>
+#include <haproxy/server.h>
+#include <haproxy/session.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/thread.h>
+#include <haproxy/tools.h>
+#include <haproxy/uri_auth-t.h>
+
+
+/* Used to chain configuration sections definitions. This list
+ * stores struct cfg_section
+ */
+struct list sections = LIST_HEAD_INIT(sections);
+
+struct list postparsers = LIST_HEAD_INIT(postparsers);
+
+extern struct proxy *mworker_proxy;
+
+char *cursection = NULL;
+int cfg_maxpconn = 0; /* # of simultaneous connections per proxy (-N) */
+int cfg_maxconn = 0; /* # of simultaneous connections, (-n) */
+char *cfg_scope = NULL; /* the current scope during the configuration parsing */
+
+/* how to handle default paths */
+static enum default_path_mode {
+ DEFAULT_PATH_CURRENT = 0, /* "current": paths are relative to CWD (this is the default) */
+ DEFAULT_PATH_CONFIG, /* "config": paths are relative to config file */
+ DEFAULT_PATH_PARENT, /* "parent": paths are relative to config file's ".." */
+ DEFAULT_PATH_ORIGIN, /* "origin": paths are relative to default_path_origin */
+} default_path_mode;
+
+static char initial_cwd[PATH_MAX];
+static char current_cwd[PATH_MAX];
+
+/* List head of all known configuration keywords */
+struct cfg_kw_list cfg_keywords = {
+ .list = LIST_HEAD_INIT(cfg_keywords.list)
+};
+
+/*
+ * converts <str> to a list of listeners which are dynamically allocated.
+ * The format is "{addr|'*'}:port[-end][,{addr|'*'}:port[-end]]*", where :
+ * - <addr> can be empty or "*" to indicate INADDR_ANY ;
+ * - <port> is a numerical port from 1 to 65535 ;
+ * - <end> indicates to use the range from <port> to <end> instead (inclusive).
+ * This can be repeated as many times as necessary, separated by a coma.
+ * Function returns 1 for success or 0 if error. In case of errors, if <err> is
+ * not NULL, it must be a valid pointer to either NULL or a freeable area that
+ * will be replaced with an error message.
+ */
+int str2listener(char *str, struct proxy *curproxy, struct bind_conf *bind_conf, const char *file, int line, char **err)
+{
+ struct protocol *proto;
+ char *next, *dupstr;
+ int port, end;
+
+ next = dupstr = strdup(str);
+
+ while (next && *next) {
+ struct sockaddr_storage *ss2;
+ int fd = -1;
+
+ str = next;
+ /* 1) look for the end of the first address */
+ if ((next = strchr(str, ',')) != NULL) {
+ *next++ = 0;
+ }
+
+ ss2 = str2sa_range(str, NULL, &port, &end, &fd, &proto, err,
+ (curproxy == global.cli_fe || curproxy == mworker_proxy) ? NULL : global.unix_bind.prefix,
+ NULL, PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_MAND | PA_O_PORT_RANGE |
+ PA_O_SOCKET_FD | PA_O_STREAM | PA_O_XPRT);
+ if (!ss2)
+ goto fail;
+
+ /* OK the address looks correct */
+ if (proto->proto_type == PROTO_TYPE_DGRAM)
+ bind_conf->options |= BC_O_USE_SOCK_DGRAM;
+ else
+ bind_conf->options |= BC_O_USE_SOCK_STREAM;
+
+ if (proto->xprt_type == PROTO_TYPE_DGRAM)
+ bind_conf->options |= BC_O_USE_XPRT_DGRAM;
+ else
+ bind_conf->options |= BC_O_USE_XPRT_STREAM;
+
+ if (!create_listeners(bind_conf, ss2, port, end, fd, proto, err)) {
+ memprintf(err, "%s for address '%s'.\n", *err, str);
+ goto fail;
+ }
+ } /* end while(next) */
+ free(dupstr);
+ return 1;
+ fail:
+ free(dupstr);
+ return 0;
+}
+
+/*
+ * converts <str> to a list of datagram-oriented listeners which are dynamically
+ * allocated.
+ * The format is "{addr|'*'}:port[-end][,{addr|'*'}:port[-end]]*", where :
+ * - <addr> can be empty or "*" to indicate INADDR_ANY ;
+ * - <port> is a numerical port from 1 to 65535 ;
+ * - <end> indicates to use the range from <port> to <end> instead (inclusive).
+ * This can be repeated as many times as necessary, separated by a coma.
+ * Function returns 1 for success or 0 if error. In case of errors, if <err> is
+ * not NULL, it must be a valid pointer to either NULL or a freeable area that
+ * will be replaced with an error message.
+ */
+int str2receiver(char *str, struct proxy *curproxy, struct bind_conf *bind_conf, const char *file, int line, char **err)
+{
+ struct protocol *proto;
+ char *next, *dupstr;
+ int port, end;
+
+ next = dupstr = strdup(str);
+
+ while (next && *next) {
+ struct sockaddr_storage *ss2;
+ int fd = -1;
+
+ str = next;
+ /* 1) look for the end of the first address */
+ if ((next = strchr(str, ',')) != NULL) {
+ *next++ = 0;
+ }
+
+ ss2 = str2sa_range(str, NULL, &port, &end, &fd, &proto, err,
+ curproxy == global.cli_fe ? NULL : global.unix_bind.prefix,
+ NULL, PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_MAND | PA_O_PORT_RANGE |
+ PA_O_SOCKET_FD | PA_O_DGRAM | PA_O_XPRT);
+ if (!ss2)
+ goto fail;
+
+ /* OK the address looks correct */
+ if (!create_listeners(bind_conf, ss2, port, end, fd, proto, err)) {
+ memprintf(err, "%s for address '%s'.\n", *err, str);
+ goto fail;
+ }
+ } /* end while(next) */
+ free(dupstr);
+ return 1;
+ fail:
+ free(dupstr);
+ return 0;
+}
+
+/*
+ * Sends a warning if proxy <proxy> does not have at least one of the
+ * capabilities in <cap>. An optional <hint> may be added at the end
+ * of the warning to help the user. Returns 1 if a warning was emitted
+ * or 0 if the condition is valid.
+ */
+int warnifnotcap(struct proxy *proxy, int cap, const char *file, int line, const char *arg, const char *hint)
+{
+ char *msg;
+
+ switch (cap) {
+ case PR_CAP_BE: msg = "no backend"; break;
+ case PR_CAP_FE: msg = "no frontend"; break;
+ case PR_CAP_BE|PR_CAP_FE: msg = "neither frontend nor backend"; break;
+ default: msg = "not enough"; break;
+ }
+
+ if (!(proxy->cap & cap)) {
+ ha_warning("parsing [%s:%d] : '%s' ignored because %s '%s' has %s capability.%s\n",
+ file, line, arg, proxy_type_str(proxy), proxy->id, msg, hint ? hint : "");
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Sends an alert if proxy <proxy> does not have at least one of the
+ * capabilities in <cap>. An optional <hint> may be added at the end
+ * of the alert to help the user. Returns 1 if an alert was emitted
+ * or 0 if the condition is valid.
+ */
+int failifnotcap(struct proxy *proxy, int cap, const char *file, int line, const char *arg, const char *hint)
+{
+ char *msg;
+
+ switch (cap) {
+ case PR_CAP_BE: msg = "no backend"; break;
+ case PR_CAP_FE: msg = "no frontend"; break;
+ case PR_CAP_BE|PR_CAP_FE: msg = "neither frontend nor backend"; break;
+ default: msg = "not enough"; break;
+ }
+
+ if (!(proxy->cap & cap)) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed because %s '%s' has %s capability.%s\n",
+ file, line, arg, proxy_type_str(proxy), proxy->id, msg, hint ? hint : "");
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Report an error in <msg> when there are too many arguments. This version is
+ * intended to be used by keyword parsers so that the message will be included
+ * into the general error message. The index is the current keyword in args.
+ * Return 0 if the number of argument is correct, otherwise build a message and
+ * return 1. Fill err_code with an ERR_ALERT and an ERR_FATAL if not null. The
+ * message may also be null, it will simply not be produced (useful to check only).
+ * <msg> and <err_code> are only affected on error.
+ */
+int too_many_args_idx(int maxarg, int index, char **args, char **msg, int *err_code)
+{
+ int i;
+
+ if (!*args[index + maxarg + 1])
+ return 0;
+
+ if (msg) {
+ *msg = NULL;
+ memprintf(msg, "%s", args[0]);
+ for (i = 1; i <= index; i++)
+ memprintf(msg, "%s %s", *msg, args[i]);
+
+ memprintf(msg, "'%s' cannot handle unexpected argument '%s'.", *msg, args[index + maxarg + 1]);
+ }
+ if (err_code)
+ *err_code |= ERR_ALERT | ERR_FATAL;
+
+ return 1;
+}
+
+/*
+ * same as too_many_args_idx with a 0 index
+ */
+int too_many_args(int maxarg, char **args, char **msg, int *err_code)
+{
+ return too_many_args_idx(maxarg, 0, args, msg, err_code);
+}
+
+/*
+ * Report a fatal Alert when there is too much arguments
+ * The index is the current keyword in args
+ * Return 0 if the number of argument is correct, otherwise emit an alert and return 1
+ * Fill err_code with an ERR_ALERT and an ERR_FATAL
+ */
+int alertif_too_many_args_idx(int maxarg, int index, const char *file, int linenum, char **args, int *err_code)
+{
+ char *kw = NULL;
+ int i;
+
+ if (!*args[index + maxarg + 1])
+ return 0;
+
+ memprintf(&kw, "%s", args[0]);
+ for (i = 1; i <= index; i++) {
+ memprintf(&kw, "%s %s", kw, args[i]);
+ }
+
+ ha_alert("parsing [%s:%d] : '%s' cannot handle unexpected argument '%s'.\n", file, linenum, kw, args[index + maxarg + 1]);
+ free(kw);
+ *err_code |= ERR_ALERT | ERR_FATAL;
+ return 1;
+}
+
+/*
+ * same as alertif_too_many_args_idx with a 0 index
+ */
+int alertif_too_many_args(int maxarg, const char *file, int linenum, char **args, int *err_code)
+{
+ return alertif_too_many_args_idx(maxarg, 0, file, linenum, args, err_code);
+}
+
+
+/* Report it if a request ACL condition uses some keywords that are incompatible
+ * with the place where the ACL is used. It returns either 0 or ERR_WARN so that
+ * its result can be or'ed with err_code. Note that <cond> may be NULL and then
+ * will be ignored.
+ */
+int warnif_cond_conflicts(const struct acl_cond *cond, unsigned int where, const char *file, int line)
+{
+ const struct acl *acl;
+ const char *kw;
+
+ if (!cond)
+ return 0;
+
+ acl = acl_cond_conflicts(cond, where);
+ if (acl) {
+ if (acl->name && *acl->name)
+ ha_warning("parsing [%s:%d] : acl '%s' will never match because it only involves keywords that are incompatible with '%s'\n",
+ file, line, acl->name, sample_ckp_names(where));
+ else
+ ha_warning("parsing [%s:%d] : anonymous acl will never match because it uses keyword '%s' which is incompatible with '%s'\n",
+ file, line, LIST_ELEM(acl->expr.n, struct acl_expr *, list)->kw, sample_ckp_names(where));
+ return ERR_WARN;
+ }
+ if (!acl_cond_kw_conflicts(cond, where, &acl, &kw))
+ return 0;
+
+ if (acl->name && *acl->name)
+ ha_warning("parsing [%s:%d] : acl '%s' involves keywords '%s' which is incompatible with '%s'\n",
+ file, line, acl->name, kw, sample_ckp_names(where));
+ else
+ ha_warning("parsing [%s:%d] : anonymous acl involves keyword '%s' which is incompatible with '%s'\n",
+ file, line, kw, sample_ckp_names(where));
+ return ERR_WARN;
+}
+
+/* Report it if an ACL uses a L6 sample fetch from an HTTP proxy. It returns
+ * either 0 or ERR_WARN so that its result can be or'ed with err_code. Note that
+ * <cond> may be NULL and then will be ignored.
+*/
+int warnif_tcp_http_cond(const struct proxy *px, const struct acl_cond *cond)
+{
+ if (!cond || px->mode != PR_MODE_HTTP)
+ return 0;
+
+ if (cond->use & (SMP_USE_L6REQ|SMP_USE_L6RES)) {
+ ha_warning("Proxy '%s': L6 sample fetches ignored on HTTP proxies (declared at %s:%d).\n",
+ px->id, cond->file, cond->line);
+ return ERR_WARN;
+ }
+ return 0;
+}
+
+/* try to find in <list> the word that looks closest to <word> by counting
+ * transitions between letters, digits and other characters. Will return the
+ * best matching word if found, otherwise NULL. An optional array of extra
+ * words to compare may be passed in <extra>, but it must then be terminated
+ * by a NULL entry. If unused it may be NULL.
+ */
+const char *cfg_find_best_match(const char *word, const struct list *list, int section, const char **extra)
+{
+ uint8_t word_sig[1024]; // 0..25=letter, 26=digit, 27=other, 28=begin, 29=end
+ uint8_t list_sig[1024];
+ const struct cfg_kw_list *kwl;
+ int index;
+ const char *best_ptr = NULL;
+ int dist, best_dist = INT_MAX;
+
+ make_word_fingerprint(word_sig, word);
+ list_for_each_entry(kwl, list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (kwl->kw[index].section != section)
+ continue;
+
+ make_word_fingerprint(list_sig, kwl->kw[index].kw);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = kwl->kw[index].kw;
+ }
+ }
+ }
+
+ while (extra && *extra) {
+ make_word_fingerprint(list_sig, *extra);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = *extra;
+ }
+ extra++;
+ }
+
+ if (best_dist > 2 * strlen(word) || (best_ptr && best_dist > 2 * strlen(best_ptr)))
+ best_ptr = NULL;
+ return best_ptr;
+}
+
+/* Parse a string representing a process number or a set of processes. It must
+ * be "all", "odd", "even", a number between 1 and <max> or a range with
+ * two such numbers delimited by a dash ('-'). On success, it returns
+ * 0. otherwise it returns 1 with an error message in <err>.
+ *
+ * Note: this function can also be used to parse a thread number or a set of
+ * threads.
+ */
+int parse_process_number(const char *arg, unsigned long *proc, int max, int *autoinc, char **err)
+{
+ if (autoinc) {
+ *autoinc = 0;
+ if (strncmp(arg, "auto:", 5) == 0) {
+ arg += 5;
+ *autoinc = 1;
+ }
+ }
+
+ if (strcmp(arg, "all") == 0)
+ *proc |= ~0UL;
+ else if (strcmp(arg, "odd") == 0)
+ *proc |= ~0UL/3UL; /* 0x555....555 */
+ else if (strcmp(arg, "even") == 0)
+ *proc |= (~0UL/3UL) << 1; /* 0xAAA...AAA */
+ else {
+ const char *p, *dash = NULL;
+ unsigned int low, high;
+
+ for (p = arg; *p; p++) {
+ if (*p == '-' && !dash)
+ dash = p;
+ else if (!isdigit((unsigned char)*p)) {
+ memprintf(err, "'%s' is not a valid number/range.", arg);
+ return -1;
+ }
+ }
+
+ low = high = str2uic(arg);
+ if (dash)
+ high = ((!*(dash+1)) ? max : str2uic(dash + 1));
+
+ if (high < low) {
+ unsigned int swap = low;
+ low = high;
+ high = swap;
+ }
+
+ if (low < 1 || low > max || high > max) {
+ memprintf(err, "'%s' is not a valid number/range."
+ " It supports numbers from 1 to %d.\n",
+ arg, max);
+ return 1;
+ }
+
+ for (;low <= high; low++)
+ *proc |= 1UL << (low-1);
+ }
+ *proc &= ~0UL >> (LONGBITS - max);
+
+ return 0;
+}
+
+#ifdef USE_CPU_AFFINITY
+/* Parse cpu sets. Each CPU set is either a unique number between 0 and
+ * ha_cpuset_size() - 1 or a range with two such numbers delimited by a dash
+ * ('-'). If <comma_allowed> is set, each CPU set can be a list of unique
+ * numbers or ranges separated by a comma. It is also possible to specify
+ * multiple cpu numbers or ranges in distinct argument in <args>. On success,
+ * it returns 0, otherwise it returns 1 with an error message in <err>.
+ */
+unsigned long parse_cpu_set(const char **args, struct hap_cpuset *cpu_set,
+ int comma_allowed, char **err)
+{
+ int cur_arg = 0;
+ const char *arg;
+
+ ha_cpuset_zero(cpu_set);
+
+ arg = args[cur_arg];
+ while (*arg) {
+ const char *dash, *comma;
+ unsigned int low, high;
+
+ if (!isdigit((unsigned char)*args[cur_arg])) {
+ memprintf(err, "'%s' is not a CPU range.", arg);
+ return -1;
+ }
+
+ low = high = str2uic(arg);
+
+ comma = comma_allowed ? strchr(arg, ',') : NULL;
+ dash = strchr(arg, '-');
+
+ if (dash && (!comma || dash < comma))
+ high = *(dash+1) ? str2uic(dash + 1) : ha_cpuset_size() - 1;
+
+ if (high < low) {
+ unsigned int swap = low;
+ low = high;
+ high = swap;
+ }
+
+ if (high >= ha_cpuset_size()) {
+ memprintf(err, "supports CPU numbers from 0 to %d.",
+ ha_cpuset_size() - 1);
+ return 1;
+ }
+
+ while (low <= high)
+ ha_cpuset_set(cpu_set, low++);
+
+ /* if a comma is present, parse the rest of the arg, else
+ * skip to the next arg */
+ arg = comma ? comma + 1 : args[++cur_arg];
+ }
+ return 0;
+}
+#endif
+
+/* Allocate and initialize the frontend of a "peers" section found in
+ * file <file> at line <linenum> with <id> as ID.
+ * Return 0 if succeeded, -1 if not.
+ * Note that this function may be called from "default-server"
+ * or "peer" lines.
+ */
+static int init_peers_frontend(const char *file, int linenum,
+ const char *id, struct peers *peers)
+{
+ struct proxy *p;
+
+ if (peers->peers_fe) {
+ p = peers->peers_fe;
+ goto out;
+ }
+
+ p = calloc(1, sizeof *p);
+ if (!p) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ return -1;
+ }
+
+ init_new_proxy(p);
+ peers_setup_frontend(p);
+ p->parent = peers;
+ /* Finally store this frontend. */
+ peers->peers_fe = p;
+
+ out:
+ if (id && !p->id)
+ p->id = strdup(id);
+ free(p->conf.file);
+ p->conf.args.file = p->conf.file = strdup(file);
+ if (linenum != -1)
+ p->conf.args.line = p->conf.line = linenum;
+
+ return 0;
+}
+
+/* Only change ->file, ->line and ->arg struct bind_conf member values
+ * if already present.
+ */
+static struct bind_conf *bind_conf_uniq_alloc(struct proxy *p,
+ const char *file, int line,
+ const char *arg, struct xprt_ops *xprt)
+{
+ struct bind_conf *bind_conf;
+
+ if (!LIST_ISEMPTY(&p->conf.bind)) {
+ bind_conf = LIST_ELEM((&p->conf.bind)->n, typeof(bind_conf), by_fe);
+ /*
+ * We keep bind_conf->file and bind_conf->line unchanged
+ * to make them available for error messages
+ */
+ if (arg) {
+ free(bind_conf->arg);
+ bind_conf->arg = strdup(arg);
+ }
+ }
+ else {
+ bind_conf = bind_conf_alloc(p, file, line, arg, xprt);
+ }
+
+ return bind_conf;
+}
+
+/*
+ * Allocate a new struct peer parsed at line <linenum> in file <file>
+ * to be added to <peers>.
+ * Returns the new allocated structure if succeeded, NULL if not.
+ */
+static struct peer *cfg_peers_add_peer(struct peers *peers,
+ const char *file, int linenum,
+ const char *id, int local)
+{
+ struct peer *p;
+
+ p = calloc(1, sizeof *p);
+ if (!p) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ return NULL;
+ }
+
+ /* the peers are linked backwards first */
+ peers->count++;
+ p->peers = peers;
+ p->next = peers->remote;
+ peers->remote = p;
+ p->conf.file = strdup(file);
+ p->conf.line = linenum;
+ p->last_change = now.tv_sec;
+ p->xprt = xprt_get(XPRT_RAW);
+ p->sock_init_arg = NULL;
+ HA_SPIN_INIT(&p->lock);
+ if (id)
+ p->id = strdup(id);
+ if (local) {
+ p->local = 1;
+ peers->local = p;
+ }
+
+ return p;
+}
+
+/*
+ * Parse a line in a <listen>, <frontend> or <backend> section.
+ * Returns the error code, 0 if OK, or any combination of :
+ * - ERR_ABORT: must abort ASAP
+ * - ERR_FATAL: we can continue parsing but not start the service
+ * - ERR_WARN: a warning has been emitted
+ * - ERR_ALERT: an alert has been emitted
+ * Only the two first ones can stop processing, the two others are just
+ * indicators.
+ */
+int cfg_parse_peers(const char *file, int linenum, char **args, int kwm)
+{
+ static struct peers *curpeers = NULL;
+ struct peer *newpeer = NULL;
+ const char *err;
+ struct bind_conf *bind_conf;
+ struct listener *l;
+ int err_code = 0;
+ char *errmsg = NULL;
+ static int bind_line, peer_line;
+
+ if (strcmp(args[0], "bind") == 0 || strcmp(args[0], "default-bind") == 0) {
+ int cur_arg;
+ struct bind_conf *bind_conf;
+ int ret;
+
+ cur_arg = 1;
+
+ if (init_peers_frontend(file, linenum, NULL, curpeers) != 0) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ bind_conf = bind_conf_uniq_alloc(curpeers->peers_fe, file, linenum,
+ args[1], xprt_get(XPRT_RAW));
+ if (!bind_conf) {
+ ha_alert("parsing [%s:%d] : '%s %s' : cannot allocate memory.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+ if (*args[0] == 'b') {
+ struct listener *l;
+
+ if (peer_line) {
+ ha_alert("parsing [%s:%d] : mixing \"peer\" and \"bind\" line is forbidden\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!LIST_ISEMPTY(&bind_conf->listeners)) {
+ ha_alert("parsing [%s:%d] : One listener per \"peers\" section is authorized but another is already configured at [%s:%d].\n", file, linenum, bind_conf->file, bind_conf->line);
+ err_code |= ERR_FATAL;
+ }
+
+ if (!str2listener(args[1], curpeers->peers_fe, bind_conf, file, linenum, &errmsg)) {
+ if (errmsg && *errmsg) {
+ indent_msg(&errmsg, 2);
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n", file, linenum, args[0], args[1], errmsg);
+ }
+ else
+ ha_alert("parsing [%s:%d] : '%s %s' : error encountered while parsing listening address %s.\n",
+ file, linenum, args[0], args[1], args[1]);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+ /*
+ * Newly allocated listener is at the end of the list
+ */
+ l = LIST_ELEM(bind_conf->listeners.p, typeof(l), by_bind);
+ l->maxaccept = 1;
+ l->accept = session_accept_fd;
+ l->analysers |= curpeers->peers_fe->fe_req_ana;
+ l->default_target = curpeers->peers_fe->default_target;
+ l->options |= LI_O_UNLIMITED; /* don't make the peers subject to global limits */
+ global.maxsock++; /* for the listening socket */
+
+ bind_line = 1;
+ if (cfg_peers->local) {
+ newpeer = cfg_peers->local;
+ }
+ else {
+ /* This peer is local.
+ * Note that we do not set the peer ID. This latter is initialized
+ * when parsing "peer" or "server" line.
+ */
+ newpeer = cfg_peers_add_peer(curpeers, file, linenum, NULL, 1);
+ if (!newpeer) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ }
+ newpeer->addr = l->rx.addr;
+ newpeer->proto = l->rx.proto;
+ cur_arg++;
+ }
+
+ ret = bind_parse_args_list(bind_conf, args, cur_arg, cursection, file, linenum);
+ err_code |= ret;
+ if (ret != 0)
+ goto out;
+ }
+ else if (strcmp(args[0], "default-server") == 0) {
+ if (init_peers_frontend(file, -1, NULL, curpeers) != 0) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ err_code |= parse_server(file, linenum, args, curpeers->peers_fe, NULL,
+ SRV_PARSE_DEFAULT_SERVER|SRV_PARSE_IN_PEER_SECTION|SRV_PARSE_INITIAL_RESOLVE);
+ }
+ else if (strcmp(args[0], "log") == 0) {
+ if (init_peers_frontend(file, linenum, NULL, curpeers) != 0) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ if (!parse_logsrv(args, &curpeers->peers_fe->logsrvs, (kwm == KWM_NO), file, linenum, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "peers") == 0) { /* new peers section */
+ /* Initialize these static variables when entering a new "peers" section*/
+ bind_line = peer_line = 0;
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for peers section.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ for (curpeers = cfg_peers; curpeers != NULL; curpeers = curpeers->next) {
+ /*
+ * If there are two proxies with the same name only following
+ * combinations are allowed:
+ */
+ if (strcmp(curpeers->id, args[1]) == 0) {
+ ha_alert("Parsing [%s:%d]: peers section '%s' has the same name as another peers section declared at %s:%d.\n",
+ file, linenum, args[1], curpeers->conf.file, curpeers->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ if ((curpeers = calloc(1, sizeof(*curpeers))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ curpeers->next = cfg_peers;
+ cfg_peers = curpeers;
+ curpeers->conf.file = strdup(file);
+ curpeers->conf.line = linenum;
+ curpeers->last_change = now.tv_sec;
+ curpeers->id = strdup(args[1]);
+ curpeers->disabled = 0;
+ }
+ else if (strcmp(args[0], "peer") == 0 ||
+ strcmp(args[0], "server") == 0) { /* peer or server definition */
+ int local_peer, peer;
+ int parse_addr = 0;
+
+ peer = *args[0] == 'p';
+ local_peer = strcmp(args[1], localpeer) == 0;
+ /* The local peer may have already partially been parsed on a "bind" line. */
+ if (*args[0] == 'p') {
+ if (bind_line) {
+ ha_alert("parsing [%s:%d] : mixing \"peer\" and \"bind\" line is forbidden\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ peer_line = 1;
+ }
+ if (cfg_peers->local && !cfg_peers->local->id && local_peer) {
+ /* The local peer has already been initialized on a "bind" line.
+ * Let's use it and store its ID.
+ */
+ newpeer = cfg_peers->local;
+ newpeer->id = strdup(localpeer);
+ }
+ else {
+ if (local_peer && cfg_peers->local) {
+ ha_alert("parsing [%s:%d] : '%s %s' : local peer name already referenced at %s:%d. %s\n",
+ file, linenum, args[0], args[1],
+ curpeers->peers_fe->conf.file, curpeers->peers_fe->conf.line, cfg_peers->local->id);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+ newpeer = cfg_peers_add_peer(curpeers, file, linenum, args[1], local_peer);
+ if (!newpeer) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ }
+
+ /* Line number and peer ID are updated only if this peer is the local one. */
+ if (init_peers_frontend(file,
+ newpeer->local ? linenum: -1,
+ newpeer->local ? newpeer->id : NULL,
+ curpeers) != 0) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* This initializes curpeer->peers->peers_fe->srv.
+ * The server address is parsed only if we are parsing a "peer" line,
+ * or if we are parsing a "server" line and the current peer is not the local one.
+ */
+ parse_addr = (peer || !local_peer) ? SRV_PARSE_PARSE_ADDR : 0;
+ err_code |= parse_server(file, linenum, args, curpeers->peers_fe, NULL,
+ SRV_PARSE_IN_PEER_SECTION|parse_addr|SRV_PARSE_INITIAL_RESOLVE);
+ if (!curpeers->peers_fe->srv) {
+ /* Remove the newly allocated peer. */
+ if (newpeer != curpeers->local) {
+ struct peer *p;
+
+ p = curpeers->remote;
+ curpeers->remote = curpeers->remote->next;
+ free(p->id);
+ free(p);
+ }
+ goto out;
+ }
+
+ if (curpeers->peers_fe->srv->init_addr_methods || curpeers->peers_fe->srv->resolvers_id ||
+ curpeers->peers_fe->srv->do_check || curpeers->peers_fe->srv->do_agent) {
+ ha_warning("parsing [%s:%d] : '%s %s' : init_addr, resolvers, check and agent are ignored for peers.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_WARN;
+ }
+
+ /* If the peer address has just been parsed, let's copy it to <newpeer>
+ * and initializes ->proto.
+ */
+ if (peer || !local_peer) {
+ newpeer->addr = curpeers->peers_fe->srv->addr;
+ newpeer->proto = protocol_lookup(newpeer->addr.ss_family, PROTO_TYPE_STREAM, 0);
+ }
+
+ newpeer->xprt = xprt_get(XPRT_RAW);
+ newpeer->sock_init_arg = NULL;
+ HA_SPIN_INIT(&newpeer->lock);
+
+ newpeer->srv = curpeers->peers_fe->srv;
+ if (!newpeer->local)
+ goto out;
+
+ /* The lines above are reserved to "peer" lines. */
+ if (*args[0] == 's')
+ goto out;
+
+ bind_conf = bind_conf_uniq_alloc(curpeers->peers_fe, file, linenum, args[2], xprt_get(XPRT_RAW));
+ if (!bind_conf) {
+ ha_alert("parsing [%s:%d] : '%s %s' : Cannot allocate memory.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+
+ if (!LIST_ISEMPTY(&bind_conf->listeners)) {
+ ha_alert("parsing [%s:%d] : One listener per \"peers\" section is authorized but another is already configured at [%s:%d].\n", file, linenum, bind_conf->file, bind_conf->line);
+ err_code |= ERR_FATAL;
+ }
+
+ if (!str2listener(args[2], curpeers->peers_fe, bind_conf, file, linenum, &errmsg)) {
+ if (errmsg && *errmsg) {
+ indent_msg(&errmsg, 2);
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n", file, linenum, args[0], args[1], errmsg);
+ }
+ else
+ ha_alert("parsing [%s:%d] : '%s %s' : error encountered while parsing listening address %s.\n",
+ file, linenum, args[0], args[1], args[2]);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+
+ /*
+ * Newly allocated listener is at the end of the list
+ */
+ l = LIST_ELEM(bind_conf->listeners.p, typeof(l), by_bind);
+ l->maxaccept = 1;
+ l->accept = session_accept_fd;
+ l->analysers |= curpeers->peers_fe->fe_req_ana;
+ l->default_target = curpeers->peers_fe->default_target;
+ l->options |= LI_O_UNLIMITED; /* don't make the peers subject to global limits */
+ global.maxsock++; /* for the listening socket */
+ }
+ else if (strcmp(args[0], "table") == 0) {
+ struct stktable *t, *other;
+ char *id;
+ size_t prefix_len;
+
+ /* Line number and peer ID are updated only if this peer is the local one. */
+ if (init_peers_frontend(file, -1, NULL, curpeers) != 0) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ other = stktable_find_by_name(args[1]);
+ if (other) {
+ ha_alert("parsing [%s:%d] : stick-table name '%s' conflicts with table declared in %s '%s' at %s:%d.\n",
+ file, linenum, args[1],
+ other->proxy ? proxy_cap_str(other->proxy->cap) : "peers",
+ other->proxy ? other->id : other->peers.p->id,
+ other->conf.file, other->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* Build the stick-table name, concatenating the "peers" section name
+ * followed by a '/' character and the table name argument.
+ */
+ chunk_reset(&trash);
+ if (!chunk_strcpy(&trash, curpeers->id)) {
+ ha_alert("parsing [%s:%d]: '%s %s' : stick-table name too long.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ prefix_len = trash.data;
+ if (!chunk_memcat(&trash, "/", 1) || !chunk_strcat(&trash, args[1])) {
+ ha_alert("parsing [%s:%d]: '%s %s' : stick-table name too long.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ t = calloc(1, sizeof *t);
+ id = strdup(trash.area);
+ if (!t || !id) {
+ ha_alert("parsing [%s:%d]: '%s %s' : memory allocation failed\n",
+ file, linenum, args[0], args[1]);
+ free(t);
+ free(id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err_code |= parse_stick_table(file, linenum, args, t, id, id + prefix_len, curpeers);
+ if (err_code & ERR_FATAL) {
+ free(t);
+ free(id);
+ goto out;
+ }
+
+ stktable_store_name(t);
+ t->next = stktables_list;
+ stktables_list = t;
+ }
+ else if (strcmp(args[0], "disabled") == 0) { /* disables this peers section */
+ curpeers->disabled |= PR_FL_DISABLED;
+ }
+ else if (strcmp(args[0], "enabled") == 0) { /* enables this peers section (used to revert a disabled default) */
+ curpeers->disabled = 0;
+ }
+ else if (*args[0] != 0) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], cursection);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+out:
+ free(errmsg);
+ return err_code;
+}
+
+/*
+ * Parse a line in a <listen>, <frontend> or <backend> section.
+ * Returns the error code, 0 if OK, or any combination of :
+ * - ERR_ABORT: must abort ASAP
+ * - ERR_FATAL: we can continue parsing but not start the service
+ * - ERR_WARN: a warning has been emitted
+ * - ERR_ALERT: an alert has been emitted
+ * Only the two first ones can stop processing, the two others are just
+ * indicators.
+ */
+int cfg_parse_mailers(const char *file, int linenum, char **args, int kwm)
+{
+ static struct mailers *curmailers = NULL;
+ struct mailer *newmailer = NULL;
+ const char *err;
+ int err_code = 0;
+ char *errmsg = NULL;
+
+ if (strcmp(args[0], "mailers") == 0) { /* new mailers section */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for mailers section.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ for (curmailers = mailers; curmailers != NULL; curmailers = curmailers->next) {
+ /*
+ * If there are two proxies with the same name only following
+ * combinations are allowed:
+ */
+ if (strcmp(curmailers->id, args[1]) == 0) {
+ ha_alert("Parsing [%s:%d]: mailers section '%s' has the same name as another mailers section declared at %s:%d.\n",
+ file, linenum, args[1], curmailers->conf.file, curmailers->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ if ((curmailers = calloc(1, sizeof(*curmailers))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ curmailers->next = mailers;
+ mailers = curmailers;
+ curmailers->conf.file = strdup(file);
+ curmailers->conf.line = linenum;
+ curmailers->id = strdup(args[1]);
+ curmailers->timeout.mail = DEF_MAILALERTTIME;/* XXX: Would like to Skip to the next alert, if any, ASAP.
+ * But need enough time so that timeouts don't occur
+ * during tcp procssing. For now just us an arbitrary default. */
+ }
+ else if (strcmp(args[0], "mailer") == 0) { /* mailer definition */
+ struct sockaddr_storage *sk;
+ int port1, port2;
+ struct protocol *proto;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d] : '%s' expects <name> and <addr>[:<port>] as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in server name '%s'.\n",
+ file, linenum, *err, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((newmailer = calloc(1, sizeof(*newmailer))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* the mailers are linked backwards first */
+ curmailers->count++;
+ newmailer->next = curmailers->mailer_list;
+ curmailers->mailer_list = newmailer;
+ newmailer->mailers = curmailers;
+ newmailer->conf.file = strdup(file);
+ newmailer->conf.line = linenum;
+
+ newmailer->id = strdup(args[1]);
+
+ sk = str2sa_range(args[2], NULL, &port1, &port2, NULL, &proto,
+ &errmsg, NULL, NULL,
+ PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_MAND | PA_O_STREAM | PA_O_XPRT | PA_O_CONNECT);
+ if (!sk) {
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n", file, linenum, args[0], args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (proto->sock_prot != IPPROTO_TCP) {
+ ha_alert("parsing [%s:%d] : '%s %s' : TCP not supported for this address family.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ newmailer->addr = *sk;
+ newmailer->proto = proto;
+ newmailer->xprt = xprt_get(XPRT_RAW);
+ newmailer->sock_init_arg = NULL;
+ }
+ else if (strcmp(args[0], "timeout") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects 'mail' and <time> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[1], "mail") == 0) {
+ const char *res;
+ unsigned int timeout_mail;
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d] : '%s %s' expects <time> as argument.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ res = parse_time_err(args[2], &timeout_mail, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s %s>, maximum value is 2147483647 ms (~24.8 days).\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s %s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to <%s %s>.\n",
+ file, linenum, *res, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curmailers->timeout.mail = timeout_mail;
+ } else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'mail' and <time> as arguments got '%s'.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (*args[0] != 0) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], cursection);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+out:
+ free(errmsg);
+ return err_code;
+}
+
+void free_email_alert(struct proxy *p)
+{
+ ha_free(&p->email_alert.mailers.name);
+ ha_free(&p->email_alert.from);
+ ha_free(&p->email_alert.to);
+ ha_free(&p->email_alert.myhostname);
+}
+
+
+int
+cfg_parse_netns(const char *file, int linenum, char **args, int kwm)
+{
+#ifdef USE_NS
+ const char *err;
+ const char *item = args[0];
+
+ if (strcmp(item, "namespace_list") == 0) {
+ return 0;
+ }
+ else if (strcmp(item, "namespace") == 0) {
+ size_t idx = 1;
+ const char *current;
+ while (*(current = args[idx++])) {
+ err = invalid_char(current);
+ if (err) {
+ ha_alert("parsing [%s:%d]: character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, item, current);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (netns_store_lookup(current, strlen(current))) {
+ ha_alert("parsing [%s:%d]: Namespace '%s' is already added.\n",
+ file, linenum, current);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ if (!netns_store_insert(current)) {
+ ha_alert("parsing [%s:%d]: Cannot open namespace '%s'.\n",
+ file, linenum, current);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ }
+ }
+
+ return 0;
+#else
+ ha_alert("parsing [%s:%d]: namespace support is not compiled in.",
+ file, linenum);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+int
+cfg_parse_users(const char *file, int linenum, char **args, int kwm)
+{
+
+ int err_code = 0;
+ const char *err;
+
+ if (strcmp(args[0], "userlist") == 0) { /* new userlist */
+ struct userlist *newul;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects <name> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d]: character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ for (newul = userlist; newul; newul = newul->next)
+ if (strcmp(newul->name, args[1]) == 0) {
+ ha_warning("parsing [%s:%d]: ignoring duplicated userlist '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ newul = calloc(1, sizeof(*newul));
+ if (!newul) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ newul->name = strdup(args[1]);
+ if (!newul->name) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ free(newul);
+ goto out;
+ }
+
+ newul->next = userlist;
+ userlist = newul;
+
+ } else if (strcmp(args[0], "group") == 0) { /* new group */
+ int cur_arg;
+ const char *err;
+ struct auth_groups *ag;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects <name> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d]: character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!userlist)
+ goto out;
+
+ for (ag = userlist->groups; ag; ag = ag->next)
+ if (strcmp(ag->name, args[1]) == 0) {
+ ha_warning("parsing [%s:%d]: ignoring duplicated group '%s' in userlist '%s'.\n",
+ file, linenum, args[1], userlist->name);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ ag = calloc(1, sizeof(*ag));
+ if (!ag) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ ag->name = strdup(args[1]);
+ if (!ag->name) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ free(ag);
+ goto out;
+ }
+
+ cur_arg = 2;
+
+ while (*args[cur_arg]) {
+ if (strcmp(args[cur_arg], "users") == 0) {
+ ag->groupusers = strdup(args[cur_arg + 1]);
+ cur_arg += 2;
+ continue;
+ } else {
+ ha_alert("parsing [%s:%d]: '%s' only supports 'users' option.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(ag->groupusers);
+ free(ag->name);
+ free(ag);
+ goto out;
+ }
+ }
+
+ ag->next = userlist->groups;
+ userlist->groups = ag;
+
+ } else if (strcmp(args[0], "user") == 0) { /* new user */
+ struct auth_users *newuser;
+ int cur_arg;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects <name> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (!userlist)
+ goto out;
+
+ for (newuser = userlist->users; newuser; newuser = newuser->next)
+ if (strcmp(newuser->user, args[1]) == 0) {
+ ha_warning("parsing [%s:%d]: ignoring duplicated user '%s' in userlist '%s'.\n",
+ file, linenum, args[1], userlist->name);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ newuser = calloc(1, sizeof(*newuser));
+ if (!newuser) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ newuser->user = strdup(args[1]);
+
+ newuser->next = userlist->users;
+ userlist->users = newuser;
+
+ cur_arg = 2;
+
+ while (*args[cur_arg]) {
+ if (strcmp(args[cur_arg], "password") == 0) {
+#ifdef USE_LIBCRYPT
+ if (!crypt("", args[cur_arg + 1])) {
+ ha_alert("parsing [%s:%d]: the encrypted password used for user '%s' is not supported by crypt(3).\n",
+ file, linenum, newuser->user);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+#else
+ ha_warning("parsing [%s:%d]: no crypt(3) support compiled, encrypted passwords will not work.\n",
+ file, linenum);
+ err_code |= ERR_ALERT;
+#endif
+ newuser->pass = strdup(args[cur_arg + 1]);
+ cur_arg += 2;
+ continue;
+ } else if (strcmp(args[cur_arg], "insecure-password") == 0) {
+ newuser->pass = strdup(args[cur_arg + 1]);
+ newuser->flags |= AU_O_INSECURE;
+ cur_arg += 2;
+ continue;
+ } else if (strcmp(args[cur_arg], "groups") == 0) {
+ newuser->u.groups_names = strdup(args[cur_arg + 1]);
+ cur_arg += 2;
+ continue;
+ } else {
+ ha_alert("parsing [%s:%d]: '%s' only supports 'password', 'insecure-password' and 'groups' options.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ } else {
+ ha_alert("parsing [%s:%d]: unknown keyword '%s' in '%s' section\n", file, linenum, args[0], "users");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+out:
+ return err_code;
+}
+
+int
+cfg_parse_scope(const char *file, int linenum, char *line)
+{
+ char *beg, *end, *scope = NULL;
+ int err_code = 0;
+ const char *err;
+
+ beg = line + 1;
+ end = strchr(beg, ']');
+
+ /* Detect end of scope declaration */
+ if (!end || end == beg) {
+ ha_alert("parsing [%s:%d] : empty scope name is forbidden.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* Get scope name and check its validity */
+ scope = my_strndup(beg, end-beg);
+ err = invalid_char(scope);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in a scope name.\n",
+ file, linenum, *err);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* Be sure to have a scope declaration alone on its line */
+ line = end+1;
+ while (isspace((unsigned char)*line))
+ line++;
+ if (*line && *line != '#' && *line != '\n' && *line != '\r') {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted after scope declaration.\n",
+ file, linenum, *line);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* We have a valid scope declaration, save it */
+ free(cfg_scope);
+ cfg_scope = scope;
+ scope = NULL;
+
+ out:
+ free(scope);
+ return err_code;
+}
+
+int
+cfg_parse_track_sc_num(unsigned int *track_sc_num,
+ const char *arg, const char *end, char **errmsg)
+{
+ const char *p;
+ unsigned int num;
+
+ p = arg;
+ num = read_uint64(&arg, end);
+
+ if (arg != end) {
+ memprintf(errmsg, "Wrong track-sc number '%s'", p);
+ return -1;
+ }
+
+ if (num >= MAX_SESS_STKCTR) {
+ memprintf(errmsg, "%u track-sc number exceeding "
+ "%d (MAX_SESS_STKCTR-1) value", num, MAX_SESS_STKCTR - 1);
+ return -1;
+ }
+
+ *track_sc_num = num;
+ return 0;
+}
+
+/*
+ * Detect a global section after a non-global one and output a diagnostic
+ * warning.
+ */
+static void check_section_position(char *section_name,
+ const char *file, int linenum,
+ int *non_global_parsed)
+{
+ if (strcmp(section_name, "global") == 0) {
+ if (*non_global_parsed == 1)
+ _ha_diag_warning("parsing [%s:%d] : global section detected after a non-global one, the prevalence of their statements is unspecified\n", file, linenum);
+ }
+ else if (*non_global_parsed == 0) {
+ *non_global_parsed = 1;
+ }
+}
+
+/* apply the current default_path setting for config file <file>, and
+ * optionally replace the current path to <origin> if not NULL while the
+ * default-path mode is set to "origin". Errors are returned into an
+ * allocated string passed to <err> if it's not NULL. Returns 0 on failure
+ * or non-zero on success.
+ */
+static int cfg_apply_default_path(const char *file, const char *origin, char **err)
+{
+ const char *beg, *end;
+
+ /* make path start at <beg> and end before <end>, and switch it to ""
+ * if no slash was passed.
+ */
+ beg = file;
+ end = strrchr(beg, '/');
+ if (!end)
+ end = beg;
+
+ if (!*initial_cwd) {
+ if (getcwd(initial_cwd, sizeof(initial_cwd)) == NULL) {
+ if (err)
+ memprintf(err, "Impossible to retrieve startup directory name: %s", strerror(errno));
+ return 0;
+ }
+ }
+ else if (chdir(initial_cwd) == -1) {
+ if (err)
+ memprintf(err, "Impossible to get back to initial directory '%s': %s", initial_cwd, strerror(errno));
+ return 0;
+ }
+
+ /* OK now we're (back) to initial_cwd */
+
+ switch (default_path_mode) {
+ case DEFAULT_PATH_CURRENT:
+ /* current_cwd never set, nothing to do */
+ return 1;
+
+ case DEFAULT_PATH_ORIGIN:
+ /* current_cwd set in the config */
+ if (origin &&
+ snprintf(current_cwd, sizeof(current_cwd), "%s", origin) > sizeof(current_cwd)) {
+ if (err)
+ memprintf(err, "Absolute path too long: '%s'", origin);
+ return 0;
+ }
+ break;
+
+ case DEFAULT_PATH_CONFIG:
+ if (end - beg >= sizeof(current_cwd)) {
+ if (err)
+ memprintf(err, "Config file path too long, cannot use for relative paths: '%s'", file);
+ return 0;
+ }
+ memcpy(current_cwd, beg, end - beg);
+ current_cwd[end - beg] = 0;
+ break;
+
+ case DEFAULT_PATH_PARENT:
+ if (end - beg + 3 >= sizeof(current_cwd)) {
+ if (err)
+ memprintf(err, "Config file path too long, cannot use for relative paths: '%s'", file);
+ return 0;
+ }
+ memcpy(current_cwd, beg, end - beg);
+ if (end > beg)
+ memcpy(current_cwd + (end - beg), "/..\0", 4);
+ else
+ memcpy(current_cwd + (end - beg), "..\0", 3);
+ break;
+ }
+
+ if (*current_cwd && chdir(current_cwd) == -1) {
+ if (err)
+ memprintf(err, "Impossible to get back to directory '%s': %s", initial_cwd, strerror(errno));
+ return 0;
+ }
+
+ return 1;
+}
+
+/* parses a global "default-path" directive. */
+static int cfg_parse_global_def_path(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int ret = -1;
+
+ /* "current", "config", "parent", "origin <path>" */
+
+ if (strcmp(args[1], "current") == 0)
+ default_path_mode = DEFAULT_PATH_CURRENT;
+ else if (strcmp(args[1], "config") == 0)
+ default_path_mode = DEFAULT_PATH_CONFIG;
+ else if (strcmp(args[1], "parent") == 0)
+ default_path_mode = DEFAULT_PATH_PARENT;
+ else if (strcmp(args[1], "origin") == 0)
+ default_path_mode = DEFAULT_PATH_ORIGIN;
+ else {
+ memprintf(err, "%s default-path mode '%s' for '%s', supported modes include 'current', 'config', 'parent', and 'origin'.", *args[1] ? "unsupported" : "missing", args[1], args[0]);
+ goto end;
+ }
+
+ if (default_path_mode == DEFAULT_PATH_ORIGIN) {
+ if (!*args[2]) {
+ memprintf(err, "'%s %s' expects a directory as an argument.", args[0], args[1]);
+ goto end;
+ }
+ if (!cfg_apply_default_path(file, args[2], err)) {
+ memprintf(err, "couldn't set '%s' to origin '%s': %s.", args[0], args[2], *err);
+ goto end;
+ }
+ }
+ else if (!cfg_apply_default_path(file, NULL, err)) {
+ memprintf(err, "couldn't set '%s' to '%s': %s.", args[0], args[1], *err);
+ goto end;
+ }
+
+ /* note that once applied, the path is immediately updated */
+
+ ret = 0;
+ end:
+ return ret;
+}
+
+/*
+ * This function reads and parses the configuration file given in the argument.
+ * Returns the error code, 0 if OK, -1 if the config file couldn't be opened,
+ * or any combination of :
+ * - ERR_ABORT: must abort ASAP
+ * - ERR_FATAL: we can continue parsing but not start the service
+ * - ERR_WARN: a warning has been emitted
+ * - ERR_ALERT: an alert has been emitted
+ * Only the two first ones can stop processing, the two others are just
+ * indicators.
+ */
+int readcfgfile(const char *file)
+{
+ char *thisline = NULL;
+ int linesize = LINESIZE;
+ FILE *f = NULL;
+ int linenum = 0;
+ int err_code = 0;
+ struct cfg_section *cs = NULL, *pcs = NULL;
+ struct cfg_section *ics;
+ int readbytes = 0;
+ char *outline = NULL;
+ size_t outlen = 0;
+ size_t outlinesize = 0;
+ int fatal = 0;
+ int missing_lf = -1;
+ int nested_cond_lvl = 0;
+ enum nested_cond_state nested_conds[MAXNESTEDCONDS];
+ int non_global_section_parsed = 0;
+ char *errmsg = NULL;
+
+ global.cfg_curr_line = 0;
+ global.cfg_curr_file = file;
+
+ if ((thisline = malloc(sizeof(*thisline) * linesize)) == NULL) {
+ ha_alert("Out of memory trying to allocate a buffer for a configuration line.\n");
+ err_code = -1;
+ goto err;
+ }
+
+ if ((f = fopen(file,"r")) == NULL) {
+ err_code = -1;
+ goto err;
+ }
+
+ /* change to the new dir if required */
+ if (!cfg_apply_default_path(file, NULL, &errmsg)) {
+ ha_alert("parsing [%s:%d]: failed to apply default-path: %s.\n", file, linenum, errmsg);
+ free(errmsg);
+ err_code = -1;
+ goto err;
+ }
+
+next_line:
+ while (fgets(thisline + readbytes, linesize - readbytes, f) != NULL) {
+ int arg, kwm = KWM_STD;
+ char *end;
+ char *args[MAX_LINE_ARGS + 1];
+ char *line = thisline;
+
+ if (missing_lf != -1) {
+ ha_alert("parsing [%s:%d]: Stray NUL character at position %d.\n",
+ file, linenum, (missing_lf + 1));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ missing_lf = -1;
+ break;
+ }
+
+ linenum++;
+ global.cfg_curr_line = linenum;
+
+ if (fatal >= 50) {
+ ha_alert("parsing [%s:%d]: too many fatal errors (%d), stopping now.\n", file, linenum, fatal);
+ break;
+ }
+
+ end = line + strlen(line);
+
+ if (end-line == linesize-1 && *(end-1) != '\n') {
+ /* Check if we reached the limit and the last char is not \n.
+ * Watch out for the last line without the terminating '\n'!
+ */
+ char *newline;
+ int newlinesize = linesize * 2;
+
+ newline = realloc(thisline, sizeof(*thisline) * newlinesize);
+ if (newline == NULL) {
+ ha_alert("parsing [%s:%d]: line too long, cannot allocate memory.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ linenum--;
+ continue;
+ }
+
+ readbytes = linesize - 1;
+ linesize = newlinesize;
+ thisline = newline;
+ linenum--;
+ continue;
+ }
+
+ readbytes = 0;
+
+ if (end > line && *(end-1) == '\n') {
+ /* kill trailing LF */
+ *(end - 1) = 0;
+ }
+ else {
+ /* mark this line as truncated */
+ missing_lf = end - line;
+ }
+
+ /* skip leading spaces */
+ while (isspace((unsigned char)*line))
+ line++;
+
+ if (*line == '[') {/* This is the beginning if a scope */
+ err_code |= cfg_parse_scope(file, linenum, line);
+ goto next_line;
+ }
+
+ while (1) {
+ uint32_t err;
+ const char *errptr;
+
+ arg = sizeof(args) / sizeof(*args);
+ outlen = outlinesize;
+ err = parse_line(line, outline, &outlen, args, &arg,
+ PARSE_OPT_ENV | PARSE_OPT_DQUOTE | PARSE_OPT_SQUOTE |
+ PARSE_OPT_BKSLASH | PARSE_OPT_SHARP | PARSE_OPT_WORD_EXPAND,
+ &errptr);
+
+ if (err & PARSE_ERR_QUOTE) {
+ size_t newpos = sanitize_for_printing(line, errptr - line, 80);
+
+ ha_alert("parsing [%s:%d]: unmatched quote at position %d:\n"
+ " %s\n %*s\n", file, linenum, (int)(errptr-thisline+1), line, (int)(newpos+1), "^");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ goto next_line;
+ }
+
+ if (err & PARSE_ERR_BRACE) {
+ size_t newpos = sanitize_for_printing(line, errptr - line, 80);
+
+ ha_alert("parsing [%s:%d]: unmatched brace in environment variable name at position %d:\n"
+ " %s\n %*s\n", file, linenum, (int)(errptr-thisline+1), line, (int)(newpos+1), "^");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ goto next_line;
+ }
+
+ if (err & PARSE_ERR_VARNAME) {
+ size_t newpos = sanitize_for_printing(line, errptr - line, 80);
+
+ ha_alert("parsing [%s:%d]: forbidden first char in environment variable name at position %d:\n"
+ " %s\n %*s\n", file, linenum, (int)(errptr-thisline+1), line, (int)(newpos+1), "^");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ goto next_line;
+ }
+
+ if (err & PARSE_ERR_HEX) {
+ size_t newpos = sanitize_for_printing(line, errptr - line, 80);
+
+ ha_alert("parsing [%s:%d]: truncated or invalid hexadecimal sequence at position %d:\n"
+ " %s\n %*s\n", file, linenum, (int)(errptr-thisline+1), line, (int)(newpos+1), "^");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ goto next_line;
+ }
+
+ if (err & PARSE_ERR_WRONG_EXPAND) {
+ size_t newpos = sanitize_for_printing(line, errptr - line, 80);
+
+ ha_alert("parsing [%s:%d]: truncated or invalid word expansion sequence at position %d:\n"
+ " %s\n %*s\n", file, linenum, (int)(errptr-thisline+1), line, (int)(newpos+1), "^");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ goto next_line;
+ }
+
+ if (err & (PARSE_ERR_TOOLARGE|PARSE_ERR_OVERLAP)) {
+ outlinesize = (outlen + 1023) & -1024;
+ outline = my_realloc2(outline, outlinesize);
+ if (outline == NULL) {
+ ha_alert("parsing [%s:%d]: line too long, cannot allocate memory.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ fatal++;
+ outlinesize = 0;
+ goto err;
+ }
+ /* try again */
+ continue;
+ }
+
+ if (err & PARSE_ERR_TOOMANY) {
+ /* only check this *after* being sure the output is allocated */
+ ha_alert("parsing [%s:%d]: too many words, truncating after word %d, position %ld: <%s>.\n",
+ file, linenum, MAX_LINE_ARGS, (long)(args[MAX_LINE_ARGS-1] - outline + 1), args[MAX_LINE_ARGS-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ goto next_line;
+ }
+
+ /* everything's OK */
+ break;
+ }
+
+ /* empty line */
+ if (!**args)
+ continue;
+
+ /* check for config macros */
+ if (*args[0] == '.') {
+ if (strcmp(args[0], ".if") == 0) {
+ const char *errptr = NULL;
+ char *errmsg = NULL;
+ int cond;
+ char *w;
+
+ /* remerge all words into a single expression */
+ for (w = *args; (w += strlen(w)) < outline + outlen - 1; *w = ' ')
+ ;
+
+ nested_cond_lvl++;
+ if (nested_cond_lvl >= MAXNESTEDCONDS) {
+ ha_alert("parsing [%s:%d]: too many nested '.if', max is %d.\n", file, linenum, MAXNESTEDCONDS);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+
+ if (nested_cond_lvl > 1 &&
+ (nested_conds[nested_cond_lvl - 1] == NESTED_COND_IF_DROP ||
+ nested_conds[nested_cond_lvl - 1] == NESTED_COND_IF_SKIP ||
+ nested_conds[nested_cond_lvl - 1] == NESTED_COND_ELIF_DROP ||
+ nested_conds[nested_cond_lvl - 1] == NESTED_COND_ELIF_SKIP ||
+ nested_conds[nested_cond_lvl - 1] == NESTED_COND_ELSE_DROP)) {
+ nested_conds[nested_cond_lvl] = NESTED_COND_IF_SKIP;
+ goto next_line;
+ }
+
+ cond = cfg_eval_condition(args + 1, &errmsg, &errptr);
+ if (cond < 0) {
+ size_t newpos = sanitize_for_printing(args[1], errptr - args[1], 76);
+
+ ha_alert("parsing [%s:%d]: %s in '.if' at position %d:\n .if %s\n %*s\n",
+ file, linenum, errmsg,
+ (int)(errptr-args[1]+1), args[1], (int)(newpos+5), "^");
+
+ free(errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+
+ if (cond)
+ nested_conds[nested_cond_lvl] = NESTED_COND_IF_TAKE;
+ else
+ nested_conds[nested_cond_lvl] = NESTED_COND_IF_DROP;
+
+ goto next_line;
+ }
+ else if (strcmp(args[0], ".elif") == 0) {
+ const char *errptr = NULL;
+ char *errmsg = NULL;
+ int cond;
+ char *w;
+
+ /* remerge all words into a single expression */
+ for (w = *args; (w += strlen(w)) < outline + outlen - 1; *w = ' ')
+ ;
+
+ if (!nested_cond_lvl) {
+ ha_alert("parsing [%s:%d]: lone '.elif' with no matching '.if'.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+
+ if (nested_conds[nested_cond_lvl] == NESTED_COND_ELSE_TAKE ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELSE_DROP) {
+ ha_alert("parsing [%s:%d]: '.elif' after '.else' is not permitted.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+
+ if (nested_conds[nested_cond_lvl] == NESTED_COND_IF_TAKE ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_IF_SKIP ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELIF_TAKE ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELIF_SKIP) {
+ nested_conds[nested_cond_lvl] = NESTED_COND_ELIF_SKIP;
+ goto next_line;
+ }
+
+ cond = cfg_eval_condition(args + 1, &errmsg, &errptr);
+ if (cond < 0) {
+ size_t newpos = sanitize_for_printing(args[1], errptr - args[1], 74);
+
+ ha_alert("parsing [%s:%d]: %s in '.elif' at position %d:\n .elif %s\n %*s\n",
+ file, linenum, errmsg,
+ (int)(errptr-args[1]+1), args[1], (int)(newpos+7), "^");
+
+ free(errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+
+ if (cond)
+ nested_conds[nested_cond_lvl] = NESTED_COND_ELIF_TAKE;
+ else
+ nested_conds[nested_cond_lvl] = NESTED_COND_ELIF_DROP;
+
+ goto next_line;
+ }
+ else if (strcmp(args[0], ".else") == 0) {
+ if (*args[1]) {
+ ha_alert("parsing [%s:%d]: Unexpected argument '%s' for '%s'.\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ break;
+ }
+
+ if (!nested_cond_lvl) {
+ ha_alert("parsing [%s:%d]: lone '.else' with no matching '.if'.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+
+ if (nested_conds[nested_cond_lvl] == NESTED_COND_ELSE_TAKE ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELSE_DROP) {
+ ha_alert("parsing [%s:%d]: '.else' after '.else' is not permitted.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+
+ if (nested_conds[nested_cond_lvl] == NESTED_COND_IF_TAKE ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_IF_SKIP ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELIF_TAKE ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELIF_SKIP) {
+ nested_conds[nested_cond_lvl] = NESTED_COND_ELSE_DROP;
+ } else {
+ /* otherwise we take the "else" */
+ nested_conds[nested_cond_lvl] = NESTED_COND_ELSE_TAKE;
+ }
+ goto next_line;
+ }
+ else if (strcmp(args[0], ".endif") == 0) {
+ if (*args[1]) {
+ ha_alert("parsing [%s:%d]: Unexpected argument '%s' for '%s'.\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ break;
+ }
+
+ if (!nested_cond_lvl) {
+ ha_alert("parsing [%s:%d]: lone '.endif' with no matching '.if'.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ break;
+ }
+ nested_cond_lvl--;
+ goto next_line;
+ }
+ }
+
+ if (nested_cond_lvl &&
+ (nested_conds[nested_cond_lvl] == NESTED_COND_IF_DROP ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_IF_SKIP ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELIF_DROP ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELIF_SKIP ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELSE_DROP)) {
+ /* The current block is masked out by the conditions */
+ goto next_line;
+ }
+
+ /* .warning/.error/.notice/.diag */
+ if (*args[0] == '.') {
+ if (strcmp(args[0], ".alert") == 0) {
+ if (*args[2]) {
+ ha_alert("parsing [%s:%d]: Unexpected argument '%s' for '%s'. Use quotes if the message should contain spaces.\n",
+ file, linenum, args[2], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto next_line;
+ }
+
+ ha_alert("parsing [%s:%d]: '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+ else if (strcmp(args[0], ".warning") == 0) {
+ if (*args[2]) {
+ ha_alert("parsing [%s:%d]: Unexpected argument '%s' for '%s'. Use quotes if the message should contain spaces.\n",
+ file, linenum, args[2], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto next_line;
+ }
+
+ ha_warning("parsing [%s:%d]: '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_WARN;
+ goto next_line;
+ }
+ else if (strcmp(args[0], ".notice") == 0) {
+ if (*args[2]) {
+ ha_alert("parsing [%s:%d]: Unexpected argument '%s' for '%s'. Use quotes if the message should contain spaces.\n",
+ file, linenum, args[2], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto next_line;
+ }
+
+ ha_notice("parsing [%s:%d]: '%s'.\n", file, linenum, args[1]);
+ goto next_line;
+ }
+ else if (strcmp(args[0], ".diag") == 0) {
+ if (*args[2]) {
+ ha_alert("parsing [%s:%d]: Unexpected argument '%s' for '%s'. Use quotes if the message should contain spaces.\n",
+ file, linenum, args[2], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto next_line;
+ }
+
+ ha_diag_warning("parsing [%s:%d]: '%s'.\n", file, linenum, args[1]);
+ goto next_line;
+ }
+ else {
+ ha_alert("parsing [%s:%d]: unknown directive '%s'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ break;
+ }
+ }
+
+ /* check for keyword modifiers "no" and "default" */
+ if (strcmp(args[0], "no") == 0) {
+ char *tmp;
+
+ kwm = KWM_NO;
+ tmp = args[0];
+ for (arg=0; *args[arg+1]; arg++)
+ args[arg] = args[arg+1]; // shift args after inversion
+ *tmp = '\0'; // fix the next arg to \0
+ args[arg] = tmp;
+ }
+ else if (strcmp(args[0], "default") == 0) {
+ kwm = KWM_DEF;
+ for (arg=0; *args[arg+1]; arg++)
+ args[arg] = args[arg+1]; // shift args after inversion
+ }
+
+ if (kwm != KWM_STD && strcmp(args[0], "option") != 0 &&
+ strcmp(args[0], "log") != 0 && strcmp(args[0], "busy-polling") != 0 &&
+ strcmp(args[0], "set-dumpable") != 0 && strcmp(args[0], "strict-limits") != 0 &&
+ strcmp(args[0], "insecure-fork-wanted") != 0 &&
+ strcmp(args[0], "numa-cpu-mapping") != 0) {
+ ha_alert("parsing [%s:%d]: negation/default currently "
+ "supported only for options, log, busy-polling, "
+ "set-dumpable, strict-limits, insecure-fork-wanted "
+ "and numa-cpu-mapping.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ }
+
+ /* detect section start */
+ list_for_each_entry(ics, &sections, list) {
+ if (strcmp(args[0], ics->section_name) == 0) {
+ cursection = ics->section_name;
+ pcs = cs;
+ cs = ics;
+ free(global.cfg_curr_section);
+ global.cfg_curr_section = strdup(*args[1] ? args[1] : args[0]);
+
+ if (global.mode & MODE_DIAG) {
+ check_section_position(args[0], file, linenum,
+ &non_global_section_parsed);
+ }
+
+ break;
+ }
+ }
+
+ if (pcs && pcs->post_section_parser) {
+ int status;
+
+ status = pcs->post_section_parser();
+ err_code |= status;
+ if (status & ERR_FATAL)
+ fatal++;
+
+ if (err_code & ERR_ABORT)
+ goto err;
+ }
+ pcs = NULL;
+
+ if (!cs) {
+ ha_alert("parsing [%s:%d]: unknown keyword '%s' out of section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ } else {
+ int status;
+
+ status = cs->section_parser(file, linenum, args, kwm);
+ err_code |= status;
+ if (status & ERR_FATAL)
+ fatal++;
+
+ if (err_code & ERR_ABORT)
+ goto err;
+ }
+ }
+
+ if (missing_lf != -1) {
+ ha_alert("parsing [%s:%d]: Missing LF on last line, file might have been truncated at position %d.\n",
+ file, linenum, (missing_lf + 1));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ ha_free(&global.cfg_curr_section);
+ if (cs && cs->post_section_parser)
+ err_code |= cs->post_section_parser();
+
+ if (nested_cond_lvl) {
+ ha_alert("parsing [%s:%d]: non-terminated '.if' block.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ }
+
+ if (*initial_cwd && chdir(initial_cwd) == -1) {
+ ha_alert("Impossible to get back to initial directory '%s' : %s\n", initial_cwd, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+err:
+ ha_free(&cfg_scope);
+ cursection = NULL;
+ free(thisline);
+ free(outline);
+ global.cfg_curr_line = 0;
+ global.cfg_curr_file = NULL;
+
+ if (f)
+ fclose(f);
+
+ return err_code;
+}
+
+#if defined(USE_THREAD) && defined USE_CPU_AFFINITY
+#if defined(__linux__)
+
+/* filter directory name of the pattern node<X> */
+static int numa_filter(const struct dirent *dir)
+{
+ char *endptr;
+
+ /* dir name must start with "node" prefix */
+ if (strncmp(dir->d_name, "node", 4))
+ return 0;
+
+ /* dir name must be at least 5 characters long */
+ if (!dir->d_name[4])
+ return 0;
+
+ /* dir name must end with a numeric id */
+ if (strtol(&dir->d_name[4], &endptr, 10) < 0 || *endptr)
+ return 0;
+
+ /* all tests succeeded */
+ return 1;
+}
+
+/* Parse a linux cpu map string representing to a numeric cpu mask map
+ * The cpu map string is a list of 4-byte hex strings separated by commas, with
+ * most-significant byte first, one bit per cpu number.
+ */
+static void parse_cpumap(char *cpumap_str, struct hap_cpuset *cpu_set)
+{
+ unsigned long cpumap;
+ char *start, *endptr, *comma;
+ int i, j;
+
+ ha_cpuset_zero(cpu_set);
+
+ i = 0;
+ do {
+ /* reverse-search for a comma, parse the string after the comma
+ * or at the beginning if no comma found
+ */
+ comma = strrchr(cpumap_str, ',');
+ start = comma ? comma + 1 : cpumap_str;
+
+ cpumap = strtoul(start, &endptr, 16);
+ for (j = 0; cpumap; cpumap >>= 1, ++j) {
+ if (cpumap & 0x1)
+ ha_cpuset_set(cpu_set, j + i * 32);
+ }
+
+ if (comma)
+ *comma = '\0';
+ ++i;
+ } while (comma);
+}
+
+/* Read the first line of a file from <path> into the trash buffer.
+ * Returns 0 on success, otherwise non-zero.
+ */
+static int read_file_to_trash(const char *path)
+{
+ FILE *file;
+ int ret = 1;
+
+ file = fopen(path, "r");
+ if (file) {
+ if (fgets(trash.area, trash.size, file))
+ ret = 0;
+
+ fclose(file);
+ }
+
+ return ret;
+}
+
+/* Inspect the cpu topology of the machine on startup. If a multi-socket
+ * machine is detected, try to bind on the first node with active cpu. This is
+ * done to prevent an impact on the overall performance when the topology of
+ * the machine is unknown. This function is not called if one of the conditions
+ * is met :
+ * - a non-null nbthread directive is active
+ * - a restrictive cpu-map directive is active
+ * - a restrictive affinity is already applied, for example via taskset
+ *
+ * Returns the count of cpus selected. If no automatic binding was required or
+ * an error occurred and the topology is unknown, 0 is returned.
+ */
+static int numa_detect_topology()
+{
+ struct dirent **node_dirlist;
+ int node_dirlist_size;
+
+ struct hap_cpuset active_cpus, node_cpu_set;
+ const char *parse_cpu_set_args[2];
+ char cpumap_path[PATH_MAX];
+ char *err = NULL;
+
+ /* node_cpu_set count is used as return value */
+ ha_cpuset_zero(&node_cpu_set);
+
+ /* 1. count the sysfs node<X> directories */
+ node_dirlist = NULL;
+ node_dirlist_size = scandir(NUMA_DETECT_SYSTEM_SYSFS_PATH"/node", &node_dirlist, numa_filter, alphasort);
+ if (node_dirlist_size <= 1)
+ goto free_scandir_entries;
+
+ /* 2. read and parse the list of currently online cpu */
+ if (read_file_to_trash(NUMA_DETECT_SYSTEM_SYSFS_PATH"/cpu/online")) {
+ ha_notice("Cannot read online CPUs list, will not try to refine binding\n");
+ goto free_scandir_entries;
+ }
+
+ parse_cpu_set_args[0] = trash.area;
+ parse_cpu_set_args[1] = "\0";
+ if (parse_cpu_set(parse_cpu_set_args, &active_cpus, 1, &err)) {
+ ha_notice("Cannot read online CPUs list: '%s'. Will not try to refine binding\n", err);
+ free(err);
+ goto free_scandir_entries;
+ }
+
+ /* 3. loop through nodes dirs and find the first one with active cpus */
+ while (node_dirlist_size--) {
+ const char *node = node_dirlist[node_dirlist_size]->d_name;
+ ha_cpuset_zero(&node_cpu_set);
+
+ snprintf(cpumap_path, PATH_MAX, "%s/node/%s/cpumap",
+ NUMA_DETECT_SYSTEM_SYSFS_PATH, node);
+
+ if (read_file_to_trash(cpumap_path)) {
+ ha_notice("Cannot read CPUs list of '%s', will not select them to refine binding\n", node);
+ free(node_dirlist[node_dirlist_size]);
+ continue;
+ }
+
+ parse_cpumap(trash.area, &node_cpu_set);
+ ha_cpuset_and(&node_cpu_set, &active_cpus);
+
+ /* 5. set affinity on the first found node with active cpus */
+ if (!ha_cpuset_count(&node_cpu_set)) {
+ free(node_dirlist[node_dirlist_size]);
+ continue;
+ }
+
+ ha_diag_warning("Multi-socket cpu detected, automatically binding on active CPUs of '%s' (%u active cpu(s))\n", node, ha_cpuset_count(&node_cpu_set));
+ if (sched_setaffinity(getpid(), sizeof(node_cpu_set.cpuset), &node_cpu_set.cpuset) == -1) {
+ ha_warning("Cannot set the cpu affinity for this multi-cpu machine\n");
+
+ /* clear the cpuset used as return value */
+ ha_cpuset_zero(&node_cpu_set);
+ }
+
+ free(node_dirlist[node_dirlist_size]);
+ break;
+ }
+
+ free_scandir_entries:
+ while (node_dirlist_size-- > 0)
+ free(node_dirlist[node_dirlist_size]);
+ free(node_dirlist);
+
+ return ha_cpuset_count(&node_cpu_set);
+}
+
+#elif defined(__FreeBSD__)
+static int numa_detect_topology()
+{
+ struct hap_cpuset node_cpu_set;
+ int ndomains = 0, i;
+ size_t len = sizeof(ndomains);
+
+ if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) {
+ ha_notice("Cannot assess the number of CPUs domains\n");
+ return 0;
+ }
+
+ BUG_ON(ndomains > MAXMEMDOM);
+ ha_cpuset_zero(&node_cpu_set);
+
+ /*
+ * We retrieve the first active valid CPU domain
+ * with active cpu and binding it, we returns
+ * the number of cpu from the said domain
+ */
+ for (i = 0; i < ndomains; i ++) {
+ struct hap_cpuset dom;
+ ha_cpuset_zero(&dom);
+ if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_DOMAIN, i, sizeof(dom.cpuset), &dom.cpuset) == -1)
+ continue;
+
+ if (!ha_cpuset_count(&dom))
+ continue;
+
+ ha_cpuset_assign(&node_cpu_set, &dom);
+
+ ha_diag_warning("Multi-socket cpu detected, automatically binding on active CPUs of '%d' (%u active cpu(s))\n", i, ha_cpuset_count(&node_cpu_set));
+ if (cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(node_cpu_set.cpuset), &node_cpu_set.cpuset) == -1) {
+ ha_warning("Cannot set the cpu affinity for this multi-cpu machine\n");
+
+ /* clear the cpuset used as return value */
+ ha_cpuset_zero(&node_cpu_set);
+ }
+ break;
+ }
+
+ return ha_cpuset_count(&node_cpu_set);
+}
+
+#else
+static int numa_detect_topology()
+{
+ return 0;
+}
+
+#endif
+#endif /* USE_THREAD && USE_CPU_AFFINITY */
+
+/*
+ * Returns the error code, 0 if OK, or any combination of :
+ * - ERR_ABORT: must abort ASAP
+ * - ERR_FATAL: we can continue parsing but not start the service
+ * - ERR_WARN: a warning has been emitted
+ * - ERR_ALERT: an alert has been emitted
+ * Only the two first ones can stop processing, the two others are just
+ * indicators.
+ */
+int check_config_validity()
+{
+ int cfgerr = 0;
+ struct proxy *curproxy = NULL;
+ struct proxy *init_proxies_list = NULL;
+ struct stktable *t;
+ struct server *newsrv = NULL;
+ int err_code = 0;
+ unsigned int next_pxid = 1;
+ struct bind_conf *bind_conf;
+ char *err;
+ struct cfg_postparser *postparser;
+ struct resolvers *curr_resolvers = NULL;
+ int i;
+ int diag_no_cluster_secret = 0;
+
+ bind_conf = NULL;
+ /*
+ * Now, check for the integrity of all that we have collected.
+ */
+
+ /* will be needed further to delay some tasks */
+ clock_update_date(0,1);
+
+ if (!global.tune.max_http_hdr)
+ global.tune.max_http_hdr = MAX_HTTP_HDR;
+
+ if (!global.tune.cookie_len)
+ global.tune.cookie_len = CAPTURE_LEN;
+
+ if (!global.tune.requri_len)
+ global.tune.requri_len = REQURI_LEN;
+
+ if (!global.nbthread) {
+ /* nbthread not set, thus automatic. In this case, and only if
+ * running on a single process, we enable the same number of
+ * threads as the number of CPUs the process is bound to. This
+ * allows to easily control the number of threads using taskset.
+ */
+ global.nbthread = 1;
+
+#if defined(USE_THREAD)
+ {
+ int numa_cores = 0;
+#if defined(USE_CPU_AFFINITY)
+ if (global.numa_cpu_mapping && !thread_cpu_mask_forced())
+ numa_cores = numa_detect_topology();
+#endif
+ global.nbthread = numa_cores ? numa_cores :
+ thread_cpus_enabled_at_boot;
+
+ if (global.nbthread > MAX_THREADS) {
+ ha_diag_warning("nbthread not set, found %d CPUs, limiting to %d threads. Please set nbthreads in the global section to silence this warning.\n",
+ global.nbthread, MAX_THREADS);
+ global.nbthread = MAX_THREADS;
+ }
+ }
+ all_threads_mask = nbits(global.nbthread);
+#endif
+ }
+
+ if (!global.nbtgroups)
+ global.nbtgroups = 1;
+
+ if (thread_map_to_groups() < 0) {
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ pool_head_requri = create_pool("requri", global.tune.requri_len , MEM_F_SHARED);
+
+ pool_head_capture = create_pool("capture", global.tune.cookie_len, MEM_F_SHARED);
+
+ /* Post initialisation of the users and groups lists. */
+ err_code = userlist_postinit();
+ if (err_code != ERR_NONE)
+ goto out;
+
+ /* first, we will invert the proxy list order */
+ curproxy = NULL;
+ while (proxies_list) {
+ struct proxy *next;
+
+ next = proxies_list->next;
+ proxies_list->next = curproxy;
+ curproxy = proxies_list;
+ if (!next)
+ break;
+ proxies_list = next;
+ }
+
+ /* starting to initialize the main proxies list */
+ init_proxies_list = proxies_list;
+
+init_proxies_list_stage1:
+ for (curproxy = init_proxies_list; curproxy; curproxy = curproxy->next) {
+ struct switching_rule *rule;
+ struct server_rule *srule;
+ struct sticking_rule *mrule;
+ struct logsrv *tmplogsrv;
+ unsigned int next_id;
+
+ if (!(curproxy->cap & PR_CAP_INT) && curproxy->uuid < 0) {
+ /* proxy ID not set, use automatic numbering with first
+ * spare entry starting with next_pxid. We don't assign
+ * numbers for internal proxies as they may depend on
+ * build or config options and we don't want them to
+ * possibly reuse existing IDs.
+ */
+ next_pxid = get_next_id(&used_proxy_id, next_pxid);
+ curproxy->conf.id.key = curproxy->uuid = next_pxid;
+ eb32_insert(&used_proxy_id, &curproxy->conf.id);
+ }
+
+ if (curproxy->mode == PR_MODE_HTTP && global.tune.bufsize >= (256 << 20) && ONLY_ONCE()) {
+ ha_alert("global.tune.bufsize must be below 256 MB when HTTP is in use (current value = %d).\n",
+ global.tune.bufsize);
+ cfgerr++;
+ }
+
+ /* next IDs are shifted even if the proxy is disabled, this
+ * guarantees that a proxy that is temporarily disabled in the
+ * configuration doesn't cause a renumbering. Internal proxies
+ * that are not assigned a static ID must never shift the IDs
+ * either since they may appear in any order (Lua, logs, etc).
+ * The GLOBAL proxy that carries the stats socket has its ID
+ * forced to zero.
+ */
+ if (curproxy->uuid >= 0)
+ next_pxid++;
+
+ if (curproxy->flags & PR_FL_DISABLED) {
+ /* ensure we don't keep listeners uselessly bound. We
+ * can't disable their listeners yet (fdtab not
+ * allocated yet) but let's skip them.
+ */
+ if (curproxy->table) {
+ ha_free(&curproxy->table->peers.name);
+ curproxy->table->peers.p = NULL;
+ }
+ continue;
+ }
+
+ /* The current proxy is referencing a default proxy. We must
+ * finalize its config, but only once. If the default proxy is
+ * ready (PR_FL_READY) it means it was already fully configured.
+ */
+ if (curproxy->defpx) {
+ if (!(curproxy->defpx->flags & PR_FL_READY)) {
+ /* check validity for 'tcp-request' layer 4/5/6/7 rules */
+ cfgerr += check_action_rules(&curproxy->defpx->tcp_req.l4_rules, curproxy->defpx, &err_code);
+ cfgerr += check_action_rules(&curproxy->defpx->tcp_req.l5_rules, curproxy->defpx, &err_code);
+ cfgerr += check_action_rules(&curproxy->defpx->tcp_req.inspect_rules, curproxy->defpx, &err_code);
+ cfgerr += check_action_rules(&curproxy->defpx->tcp_rep.inspect_rules, curproxy->defpx, &err_code);
+ cfgerr += check_action_rules(&curproxy->defpx->http_req_rules, curproxy->defpx, &err_code);
+ cfgerr += check_action_rules(&curproxy->defpx->http_res_rules, curproxy->defpx, &err_code);
+ cfgerr += check_action_rules(&curproxy->defpx->http_after_res_rules, curproxy->defpx, &err_code);
+
+ err = NULL;
+ i = smp_resolve_args(curproxy->defpx, &err);
+ cfgerr += i;
+ if (i) {
+ indent_msg(&err, 8);
+ ha_alert("%s%s\n", i > 1 ? "multiple argument resolution errors:" : "", err);
+ ha_free(&err);
+ }
+ else
+ cfgerr += acl_find_targets(curproxy->defpx);
+
+ /* default proxy is now ready. Set the right FE/BE capabilities */
+ curproxy->defpx->flags |= PR_FL_READY;
+ }
+ }
+
+ /* check and reduce the bind-proc of each listener */
+ list_for_each_entry(bind_conf, &curproxy->conf.bind, by_fe) {
+ unsigned long mask;
+ struct listener *li;
+
+ /* HTTP frontends with "h2" as ALPN/NPN will work in
+ * HTTP/2 and absolutely require buffers 16kB or larger.
+ */
+#ifdef USE_OPENSSL
+ if (curproxy->mode == PR_MODE_HTTP && global.tune.bufsize < 16384) {
+#ifdef OPENSSL_NPN_NEGOTIATED
+ /* check NPN */
+ if (bind_conf->ssl_conf.npn_str && strstr(bind_conf->ssl_conf.npn_str, "\002h2")) {
+ ha_alert("HTTP frontend '%s' enables HTTP/2 via NPN at [%s:%d], so global.tune.bufsize must be at least 16384 bytes (%d now).\n",
+ curproxy->id, bind_conf->file, bind_conf->line, global.tune.bufsize);
+ cfgerr++;
+ }
+#endif
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ /* check ALPN */
+ if (bind_conf->ssl_conf.alpn_str && strstr(bind_conf->ssl_conf.alpn_str, "\002h2")) {
+ ha_alert("HTTP frontend '%s' enables HTTP/2 via ALPN at [%s:%d], so global.tune.bufsize must be at least 16384 bytes (%d now).\n",
+ curproxy->id, bind_conf->file, bind_conf->line, global.tune.bufsize);
+ cfgerr++;
+ }
+#endif
+ } /* HTTP && bufsize < 16384 */
+#endif
+
+ /* detect and address thread affinity inconsistencies */
+ err = NULL;
+ if (thread_resolve_group_mask(bind_conf->bind_tgroup, bind_conf->bind_thread,
+ &bind_conf->bind_tgroup, &bind_conf->bind_thread, &err) < 0) {
+ ha_alert("Proxy '%s': %s in 'bind %s' at [%s:%d].\n",
+ curproxy->id, err, bind_conf->arg, bind_conf->file, bind_conf->line);
+ free(err);
+ cfgerr++;
+ } else if (!((mask = bind_conf->bind_thread) & all_threads_mask)) {
+ unsigned long new_mask = 0;
+
+ while (mask) {
+ new_mask |= mask & all_threads_mask;
+ mask >>= global.nbthread;
+ }
+
+ bind_conf->bind_thread = new_mask;
+ ha_warning("Proxy '%s': the thread range specified on the 'thread' directive of 'bind %s' at [%s:%d] only refers to thread numbers out of the range defined by the global 'nbthread' directive. The thread numbers were remapped to existing threads instead (mask 0x%lx).\n",
+ curproxy->id, bind_conf->arg, bind_conf->file, bind_conf->line, new_mask);
+ }
+
+ /* apply thread masks and groups to all receivers */
+ list_for_each_entry(li, &bind_conf->listeners, by_bind) {
+ if (bind_conf->settings.shards <= 1) {
+ li->rx.bind_thread = bind_conf->bind_thread;
+ li->rx.bind_tgroup = bind_conf->bind_tgroup;
+ } else {
+ struct listener *new_li;
+ int shard, shards, todo, done, bit;
+ ulong mask;
+
+ shards = bind_conf->settings.shards;
+ todo = my_popcountl(bind_conf->bind_thread);
+
+ /* no more shards than total threads */
+ if (shards > todo)
+ shards = todo;
+
+ shard = done = bit = 0;
+ new_li = li;
+
+ while (1) {
+ mask = 0;
+ while (done < todo) {
+ /* enlarge mask to cover next bit of bind_thread */
+ while (!(bind_conf->bind_thread & (1UL << bit)))
+ bit++;
+ mask |= (1UL << bit);
+ bit++;
+ done += shards;
+ }
+
+ new_li->rx.bind_thread = bind_conf->bind_thread & mask;
+ new_li->rx.bind_tgroup = bind_conf->bind_tgroup;
+ done -= todo;
+
+ shard++;
+ if (shard >= shards)
+ break;
+
+ /* create another listener for new shards */
+ new_li = clone_listener(li);
+ if (!new_li) {
+ ha_alert("Out of memory while trying to allocate extra listener for shard %d in %s %s\n",
+ shard, proxy_type_str(curproxy), curproxy->id);
+ cfgerr++;
+ err_code |= ERR_FATAL | ERR_ALERT;
+ goto out;
+ }
+ }
+ }
+ }
+ }
+
+ switch (curproxy->mode) {
+ case PR_MODE_TCP:
+ cfgerr += proxy_cfg_ensure_no_http(curproxy);
+ break;
+
+ case PR_MODE_HTTP:
+ curproxy->http_needed = 1;
+ break;
+
+ case PR_MODE_CLI:
+ cfgerr += proxy_cfg_ensure_no_http(curproxy);
+ break;
+
+ case PR_MODE_SYSLOG:
+ /* this mode is initialized as the classic tcp proxy */
+ cfgerr += proxy_cfg_ensure_no_http(curproxy);
+ break;
+
+ case PR_MODE_PEERS:
+ case PR_MODES:
+ /* should not happen, bug gcc warn missing switch statement */
+ ha_alert("%s '%s' cannot initialize this proxy mode (peers) in this way. NOTE: PLEASE REPORT THIS TO DEVELOPERS AS YOU'RE NOT SUPPOSED TO BE ABLE TO CREATE A CONFIGURATION TRIGGERING THIS!\n",
+ proxy_type_str(curproxy), curproxy->id);
+ cfgerr++;
+ break;
+ }
+
+ if (!(curproxy->cap & PR_CAP_INT) && (curproxy->cap & PR_CAP_FE) && LIST_ISEMPTY(&curproxy->conf.listeners)) {
+ ha_warning("%s '%s' has no 'bind' directive. Please declare it as a backend if this was intended.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ if (curproxy->cap & PR_CAP_BE) {
+ if (curproxy->lbprm.algo & BE_LB_KIND) {
+ if (curproxy->options & PR_O_TRANSP) {
+ ha_alert("%s '%s' cannot use both transparent and balance mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ cfgerr++;
+ }
+#ifdef WE_DONT_SUPPORT_SERVERLESS_LISTENERS
+ else if (curproxy->srv == NULL) {
+ ha_alert("%s '%s' needs at least 1 server in balance mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ cfgerr++;
+ }
+#endif
+ else if (curproxy->options & PR_O_DISPATCH) {
+ ha_warning("dispatch address of %s '%s' will be ignored in balance mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ else if (!(curproxy->options & (PR_O_TRANSP | PR_O_DISPATCH))) {
+ /* If no LB algo is set in a backend, and we're not in
+ * transparent mode, dispatch mode nor proxy mode, we
+ * want to use balance roundrobin by default.
+ */
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_RR;
+ }
+ }
+
+ if (curproxy->options & PR_O_DISPATCH)
+ curproxy->options &= ~PR_O_TRANSP;
+ else if (curproxy->options & PR_O_TRANSP)
+ curproxy->options &= ~PR_O_DISPATCH;
+
+ if ((curproxy->tcpcheck_rules.flags & TCPCHK_RULES_UNUSED_HTTP_RS)) {
+ ha_warning("%s '%s' uses http-check rules without 'option httpchk', so the rules are ignored.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ if ((curproxy->options2 & PR_O2_CHK_ANY) == PR_O2_TCPCHK_CHK &&
+ (curproxy->tcpcheck_rules.flags & TCPCHK_RULES_PROTO_CHK) != TCPCHK_RULES_HTTP_CHK) {
+ if (curproxy->options & PR_O_DISABLE404) {
+ ha_warning("'%s' will be ignored for %s '%s' (requires 'option httpchk').\n",
+ "disable-on-404", proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ curproxy->options &= ~PR_O_DISABLE404;
+ }
+ if (curproxy->options2 & PR_O2_CHK_SNDST) {
+ ha_warning("'%s' will be ignored for %s '%s' (requires 'option httpchk').\n",
+ "send-state", proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ curproxy->options &= ~PR_O2_CHK_SNDST;
+ }
+ }
+
+ if ((curproxy->options2 & PR_O2_CHK_ANY) == PR_O2_EXT_CHK) {
+ if (!global.external_check) {
+ ha_alert("Proxy '%s' : '%s' unable to find required 'global.external-check'.\n",
+ curproxy->id, "option external-check");
+ cfgerr++;
+ }
+ if (!curproxy->check_command) {
+ ha_alert("Proxy '%s' : '%s' unable to find required 'external-check command'.\n",
+ curproxy->id, "option external-check");
+ cfgerr++;
+ }
+ if (!(global.tune.options & GTUNE_INSECURE_FORK)) {
+ ha_warning("Proxy '%s' : 'insecure-fork-wanted' not enabled in the global section, '%s' will likely fail.\n",
+ curproxy->id, "option external-check");
+ err_code |= ERR_WARN;
+ }
+ }
+
+ if (curproxy->email_alert.set) {
+ if (!(curproxy->email_alert.mailers.name && curproxy->email_alert.from && curproxy->email_alert.to)) {
+ ha_warning("'email-alert' will be ignored for %s '%s' (the presence any of "
+ "'email-alert from', 'email-alert level' 'email-alert mailers', "
+ "'email-alert myhostname', or 'email-alert to' "
+ "requires each of 'email-alert from', 'email-alert mailers' and 'email-alert to' "
+ "to be present).\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ free_email_alert(curproxy);
+ }
+ if (!curproxy->email_alert.myhostname)
+ curproxy->email_alert.myhostname = strdup(hostname);
+ }
+
+ if (curproxy->check_command) {
+ int clear = 0;
+ if ((curproxy->options2 & PR_O2_CHK_ANY) != PR_O2_EXT_CHK) {
+ ha_warning("'%s' will be ignored for %s '%s' (requires 'option external-check').\n",
+ "external-check command", proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ clear = 1;
+ }
+ if (curproxy->check_command[0] != '/' && !curproxy->check_path) {
+ ha_alert("Proxy '%s': '%s' does not have a leading '/' and 'external-check path' is not set.\n",
+ curproxy->id, "external-check command");
+ cfgerr++;
+ }
+ if (clear) {
+ ha_free(&curproxy->check_command);
+ }
+ }
+
+ if (curproxy->check_path) {
+ if ((curproxy->options2 & PR_O2_CHK_ANY) != PR_O2_EXT_CHK) {
+ ha_warning("'%s' will be ignored for %s '%s' (requires 'option external-check').\n",
+ "external-check path", proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ ha_free(&curproxy->check_path);
+ }
+ }
+
+ /* if a default backend was specified, let's find it */
+ if (curproxy->defbe.name) {
+ struct proxy *target;
+
+ target = proxy_be_by_name(curproxy->defbe.name);
+ if (!target) {
+ ha_alert("Proxy '%s': unable to find required default_backend: '%s'.\n",
+ curproxy->id, curproxy->defbe.name);
+ cfgerr++;
+ } else if (target == curproxy) {
+ ha_alert("Proxy '%s': loop detected for default_backend: '%s'.\n",
+ curproxy->id, curproxy->defbe.name);
+ cfgerr++;
+ } else if (target->mode != curproxy->mode &&
+ !(curproxy->mode == PR_MODE_TCP && target->mode == PR_MODE_HTTP)) {
+
+ ha_alert("%s %s '%s' (%s:%d) tries to use incompatible %s %s '%s' (%s:%d) as its default backend (see 'mode').\n",
+ proxy_mode_str(curproxy->mode), proxy_type_str(curproxy), curproxy->id,
+ curproxy->conf.file, curproxy->conf.line,
+ proxy_mode_str(target->mode), proxy_type_str(target), target->id,
+ target->conf.file, target->conf.line);
+ cfgerr++;
+ } else {
+ free(curproxy->defbe.name);
+ curproxy->defbe.be = target;
+ /* Emit a warning if this proxy also has some servers */
+ if (curproxy->srv) {
+ ha_warning("In proxy '%s', the 'default_backend' rule always has precedence over the servers, which will never be used.\n",
+ curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ }
+
+ /* find the target proxy for 'use_backend' rules */
+ list_for_each_entry(rule, &curproxy->switching_rules, list) {
+ struct proxy *target;
+ struct logformat_node *node;
+ char *pxname;
+
+ /* Try to parse the string as a log format expression. If the result
+ * of the parsing is only one entry containing a simple string, then
+ * it's a standard string corresponding to a static rule, thus the
+ * parsing is cancelled and be.name is restored to be resolved.
+ */
+ pxname = rule->be.name;
+ LIST_INIT(&rule->be.expr);
+ curproxy->conf.args.ctx = ARGC_UBK;
+ curproxy->conf.args.file = rule->file;
+ curproxy->conf.args.line = rule->line;
+ err = NULL;
+ if (!parse_logformat_string(pxname, curproxy, &rule->be.expr, 0, SMP_VAL_FE_HRQ_HDR, &err)) {
+ ha_alert("Parsing [%s:%d]: failed to parse use_backend rule '%s' : %s.\n",
+ rule->file, rule->line, pxname, err);
+ free(err);
+ cfgerr++;
+ continue;
+ }
+ node = LIST_NEXT(&rule->be.expr, struct logformat_node *, list);
+
+ if (!LIST_ISEMPTY(&rule->be.expr)) {
+ if (node->type != LOG_FMT_TEXT || node->list.n != &rule->be.expr) {
+ rule->dynamic = 1;
+ free(pxname);
+ continue;
+ }
+ /* Only one element in the list, a simple string: free the expression and
+ * fall back to static rule
+ */
+ LIST_DELETE(&node->list);
+ free(node->arg);
+ free(node);
+ }
+
+ rule->dynamic = 0;
+ rule->be.name = pxname;
+
+ target = proxy_be_by_name(rule->be.name);
+ if (!target) {
+ ha_alert("Proxy '%s': unable to find required use_backend: '%s'.\n",
+ curproxy->id, rule->be.name);
+ cfgerr++;
+ } else if (target == curproxy) {
+ ha_alert("Proxy '%s': loop detected for use_backend: '%s'.\n",
+ curproxy->id, rule->be.name);
+ cfgerr++;
+ } else if (target->mode != curproxy->mode &&
+ !(curproxy->mode == PR_MODE_TCP && target->mode == PR_MODE_HTTP)) {
+
+ ha_alert("%s %s '%s' (%s:%d) tries to use incompatible %s %s '%s' (%s:%d) in a 'use_backend' rule (see 'mode').\n",
+ proxy_mode_str(curproxy->mode), proxy_type_str(curproxy), curproxy->id,
+ curproxy->conf.file, curproxy->conf.line,
+ proxy_mode_str(target->mode), proxy_type_str(target), target->id,
+ target->conf.file, target->conf.line);
+ cfgerr++;
+ } else {
+ ha_free(&rule->be.name);
+ rule->be.backend = target;
+ }
+ err_code |= warnif_tcp_http_cond(curproxy, rule->cond);
+ }
+
+ /* find the target server for 'use_server' rules */
+ list_for_each_entry(srule, &curproxy->server_rules, list) {
+ struct server *target;
+ struct logformat_node *node;
+ char *server_name;
+
+ /* We try to parse the string as a log format expression. If the result of the parsing
+ * is only one entry containing a single string, then it's a standard string corresponding
+ * to a static rule, thus the parsing is cancelled and we fall back to setting srv.ptr.
+ */
+ server_name = srule->srv.name;
+ LIST_INIT(&srule->expr);
+ curproxy->conf.args.ctx = ARGC_USRV;
+ err = NULL;
+ if (!parse_logformat_string(server_name, curproxy, &srule->expr, 0, SMP_VAL_FE_HRQ_HDR, &err)) {
+ ha_alert("Parsing [%s:%d]; use-server rule failed to parse log-format '%s' : %s.\n",
+ srule->file, srule->line, server_name, err);
+ free(err);
+ cfgerr++;
+ continue;
+ }
+ node = LIST_NEXT(&srule->expr, struct logformat_node *, list);
+
+ if (!LIST_ISEMPTY(&srule->expr)) {
+ if (node->type != LOG_FMT_TEXT || node->list.n != &srule->expr) {
+ srule->dynamic = 1;
+ free(server_name);
+ continue;
+ }
+ /* Only one element in the list, a simple string: free the expression and
+ * fall back to static rule
+ */
+ LIST_DELETE(&node->list);
+ free(node->arg);
+ free(node);
+ }
+
+ srule->dynamic = 0;
+ srule->srv.name = server_name;
+ target = findserver(curproxy, srule->srv.name);
+ err_code |= warnif_tcp_http_cond(curproxy, srule->cond);
+
+ if (!target) {
+ ha_alert("%s '%s' : unable to find server '%s' referenced in a 'use-server' rule.\n",
+ proxy_type_str(curproxy), curproxy->id, srule->srv.name);
+ cfgerr++;
+ continue;
+ }
+ ha_free(&srule->srv.name);
+ srule->srv.ptr = target;
+ target->flags |= SRV_F_NON_PURGEABLE;
+ }
+
+ /* find the target table for 'stick' rules */
+ list_for_each_entry(mrule, &curproxy->sticking_rules, list) {
+ struct stktable *target;
+
+ curproxy->be_req_ana |= AN_REQ_STICKING_RULES;
+ if (mrule->flags & STK_IS_STORE)
+ curproxy->be_rsp_ana |= AN_RES_STORE_RULES;
+
+ if (mrule->table.name)
+ target = stktable_find_by_name(mrule->table.name);
+ else
+ target = curproxy->table;
+
+ if (!target) {
+ ha_alert("Proxy '%s': unable to find stick-table '%s'.\n",
+ curproxy->id, mrule->table.name ? mrule->table.name : curproxy->id);
+ cfgerr++;
+ }
+ else if (!stktable_compatible_sample(mrule->expr, target->type)) {
+ ha_alert("Proxy '%s': type of fetch not usable with type of stick-table '%s'.\n",
+ curproxy->id, mrule->table.name ? mrule->table.name : curproxy->id);
+ cfgerr++;
+ }
+ else {
+ ha_free(&mrule->table.name);
+ mrule->table.t = target;
+ stktable_alloc_data_type(target, STKTABLE_DT_SERVER_ID, NULL, NULL);
+ stktable_alloc_data_type(target, STKTABLE_DT_SERVER_KEY, NULL, NULL);
+ if (!in_proxies_list(target->proxies_list, curproxy)) {
+ curproxy->next_stkt_ref = target->proxies_list;
+ target->proxies_list = curproxy;
+ }
+ }
+ err_code |= warnif_tcp_http_cond(curproxy, mrule->cond);
+ }
+
+ /* find the target table for 'store response' rules */
+ list_for_each_entry(mrule, &curproxy->storersp_rules, list) {
+ struct stktable *target;
+
+ curproxy->be_rsp_ana |= AN_RES_STORE_RULES;
+
+ if (mrule->table.name)
+ target = stktable_find_by_name(mrule->table.name);
+ else
+ target = curproxy->table;
+
+ if (!target) {
+ ha_alert("Proxy '%s': unable to find store table '%s'.\n",
+ curproxy->id, mrule->table.name ? mrule->table.name : curproxy->id);
+ cfgerr++;
+ }
+ else if (!stktable_compatible_sample(mrule->expr, target->type)) {
+ ha_alert("Proxy '%s': type of fetch not usable with type of stick-table '%s'.\n",
+ curproxy->id, mrule->table.name ? mrule->table.name : curproxy->id);
+ cfgerr++;
+ }
+ else {
+ ha_free(&mrule->table.name);
+ mrule->table.t = target;
+ stktable_alloc_data_type(target, STKTABLE_DT_SERVER_ID, NULL, NULL);
+ stktable_alloc_data_type(target, STKTABLE_DT_SERVER_KEY, NULL, NULL);
+ if (!in_proxies_list(target->proxies_list, curproxy)) {
+ curproxy->next_stkt_ref = target->proxies_list;
+ target->proxies_list = curproxy;
+ }
+ }
+ }
+
+ /* check validity for 'tcp-request' layer 4/5/6/7 rules */
+ cfgerr += check_action_rules(&curproxy->tcp_req.l4_rules, curproxy, &err_code);
+ cfgerr += check_action_rules(&curproxy->tcp_req.l5_rules, curproxy, &err_code);
+ cfgerr += check_action_rules(&curproxy->tcp_req.inspect_rules, curproxy, &err_code);
+ cfgerr += check_action_rules(&curproxy->tcp_rep.inspect_rules, curproxy, &err_code);
+ cfgerr += check_action_rules(&curproxy->http_req_rules, curproxy, &err_code);
+ cfgerr += check_action_rules(&curproxy->http_res_rules, curproxy, &err_code);
+ cfgerr += check_action_rules(&curproxy->http_after_res_rules, curproxy, &err_code);
+
+ /* Warn is a switch-mode http is used on a TCP listener with servers but no backend */
+ if (!curproxy->defbe.name && LIST_ISEMPTY(&curproxy->switching_rules) && curproxy->srv) {
+ if ((curproxy->options & PR_O_HTTP_UPG) && curproxy->mode == PR_MODE_TCP)
+ ha_warning("Proxy '%s' : 'switch-mode http' configured for a %s %s with no backend. "
+ "Incoming connections upgraded to HTTP cannot be routed to TCP servers\n",
+ curproxy->id, proxy_mode_str(curproxy->mode), proxy_type_str(curproxy));
+ }
+
+ if (curproxy->table && curproxy->table->peers.name) {
+ struct peers *curpeers;
+
+ for (curpeers = cfg_peers; curpeers; curpeers = curpeers->next) {
+ if (strcmp(curpeers->id, curproxy->table->peers.name) == 0) {
+ ha_free(&curproxy->table->peers.name);
+ curproxy->table->peers.p = curpeers;
+ break;
+ }
+ }
+
+ if (!curpeers) {
+ ha_alert("Proxy '%s': unable to find sync peers '%s'.\n",
+ curproxy->id, curproxy->table->peers.name);
+ ha_free(&curproxy->table->peers.name);
+ curproxy->table->peers.p = NULL;
+ cfgerr++;
+ }
+ else if (curpeers->disabled) {
+ /* silently disable this peers section */
+ curproxy->table->peers.p = NULL;
+ }
+ else if (!curpeers->peers_fe) {
+ ha_alert("Proxy '%s': unable to find local peer '%s' in peers section '%s'.\n",
+ curproxy->id, localpeer, curpeers->id);
+ curproxy->table->peers.p = NULL;
+ cfgerr++;
+ }
+ }
+
+
+ if (curproxy->email_alert.mailers.name) {
+ struct mailers *curmailers = mailers;
+
+ for (curmailers = mailers; curmailers; curmailers = curmailers->next) {
+ if (strcmp(curmailers->id, curproxy->email_alert.mailers.name) == 0)
+ break;
+ }
+ if (!curmailers) {
+ ha_alert("Proxy '%s': unable to find mailers '%s'.\n",
+ curproxy->id, curproxy->email_alert.mailers.name);
+ free_email_alert(curproxy);
+ cfgerr++;
+ }
+ else {
+ err = NULL;
+ if (init_email_alert(curmailers, curproxy, &err)) {
+ ha_alert("Proxy '%s': %s.\n", curproxy->id, err);
+ free(err);
+ cfgerr++;
+ }
+ }
+ }
+
+ if (curproxy->uri_auth && !(curproxy->uri_auth->flags & STAT_CONVDONE) &&
+ !LIST_ISEMPTY(&curproxy->uri_auth->http_req_rules) &&
+ (curproxy->uri_auth->userlist || curproxy->uri_auth->auth_realm )) {
+ ha_alert("%s '%s': stats 'auth'/'realm' and 'http-request' can't be used at the same time.\n",
+ "proxy", curproxy->id);
+ cfgerr++;
+ goto out_uri_auth_compat;
+ }
+
+ if (curproxy->uri_auth && curproxy->uri_auth->userlist &&
+ (!(curproxy->uri_auth->flags & STAT_CONVDONE) ||
+ LIST_ISEMPTY(&curproxy->uri_auth->http_req_rules))) {
+ const char *uri_auth_compat_req[10];
+ struct act_rule *rule;
+ i = 0;
+
+ /* build the ACL condition from scratch. We're relying on anonymous ACLs for that */
+ uri_auth_compat_req[i++] = "auth";
+
+ if (curproxy->uri_auth->auth_realm) {
+ uri_auth_compat_req[i++] = "realm";
+ uri_auth_compat_req[i++] = curproxy->uri_auth->auth_realm;
+ }
+
+ uri_auth_compat_req[i++] = "unless";
+ uri_auth_compat_req[i++] = "{";
+ uri_auth_compat_req[i++] = "http_auth(.internal-stats-userlist)";
+ uri_auth_compat_req[i++] = "}";
+ uri_auth_compat_req[i++] = "";
+
+ rule = parse_http_req_cond(uri_auth_compat_req, "internal-stats-auth-compat", 0, curproxy);
+ if (!rule) {
+ cfgerr++;
+ break;
+ }
+
+ LIST_APPEND(&curproxy->uri_auth->http_req_rules, &rule->list);
+
+ if (curproxy->uri_auth->auth_realm) {
+ ha_free(&curproxy->uri_auth->auth_realm);
+ }
+ curproxy->uri_auth->flags |= STAT_CONVDONE;
+ }
+out_uri_auth_compat:
+
+ /* check whether we have a log server that uses RFC5424 log format */
+ list_for_each_entry(tmplogsrv, &curproxy->logsrvs, list) {
+ if (tmplogsrv->format == LOG_FORMAT_RFC5424) {
+ if (!curproxy->conf.logformat_sd_string) {
+ /* set the default logformat_sd_string */
+ curproxy->conf.logformat_sd_string = default_rfc5424_sd_log_format;
+ }
+ break;
+ }
+ }
+
+ /* compile the log format */
+ if (!(curproxy->cap & PR_CAP_FE)) {
+ if (curproxy->conf.logformat_string != default_http_log_format &&
+ curproxy->conf.logformat_string != default_tcp_log_format &&
+ curproxy->conf.logformat_string != clf_http_log_format)
+ free(curproxy->conf.logformat_string);
+ curproxy->conf.logformat_string = NULL;
+ ha_free(&curproxy->conf.lfs_file);
+ curproxy->conf.lfs_line = 0;
+
+ if (curproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format)
+ free(curproxy->conf.logformat_sd_string);
+ curproxy->conf.logformat_sd_string = NULL;
+ ha_free(&curproxy->conf.lfsd_file);
+ curproxy->conf.lfsd_line = 0;
+ }
+
+ if (curproxy->conf.logformat_string) {
+ curproxy->conf.args.ctx = ARGC_LOG;
+ curproxy->conf.args.file = curproxy->conf.lfs_file;
+ curproxy->conf.args.line = curproxy->conf.lfs_line;
+ err = NULL;
+ if (!parse_logformat_string(curproxy->conf.logformat_string, curproxy, &curproxy->logformat,
+ LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES,
+ SMP_VAL_FE_LOG_END, &err)) {
+ ha_alert("Parsing [%s:%d]: failed to parse log-format : %s.\n",
+ curproxy->conf.lfs_file, curproxy->conf.lfs_line, err);
+ free(err);
+ cfgerr++;
+ }
+ curproxy->conf.args.file = NULL;
+ curproxy->conf.args.line = 0;
+ }
+
+ if (curproxy->conf.logformat_sd_string) {
+ curproxy->conf.args.ctx = ARGC_LOGSD;
+ curproxy->conf.args.file = curproxy->conf.lfsd_file;
+ curproxy->conf.args.line = curproxy->conf.lfsd_line;
+ err = NULL;
+ if (!parse_logformat_string(curproxy->conf.logformat_sd_string, curproxy, &curproxy->logformat_sd,
+ LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES,
+ SMP_VAL_FE_LOG_END, &err)) {
+ ha_alert("Parsing [%s:%d]: failed to parse log-format-sd : %s.\n",
+ curproxy->conf.lfs_file, curproxy->conf.lfs_line, err);
+ free(err);
+ cfgerr++;
+ } else if (!add_to_logformat_list(NULL, NULL, LF_SEPARATOR, &curproxy->logformat_sd, &err)) {
+ ha_alert("Parsing [%s:%d]: failed to parse log-format-sd : %s.\n",
+ curproxy->conf.lfs_file, curproxy->conf.lfs_line, err);
+ free(err);
+ cfgerr++;
+ }
+ curproxy->conf.args.file = NULL;
+ curproxy->conf.args.line = 0;
+ }
+
+ if (curproxy->conf.uniqueid_format_string) {
+ int where = 0;
+
+ curproxy->conf.args.ctx = ARGC_UIF;
+ curproxy->conf.args.file = curproxy->conf.uif_file;
+ curproxy->conf.args.line = curproxy->conf.uif_line;
+ err = NULL;
+ if (curproxy->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (curproxy->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+ if (!parse_logformat_string(curproxy->conf.uniqueid_format_string, curproxy, &curproxy->format_unique_id,
+ LOG_OPT_HTTP|LOG_OPT_MERGE_SPACES, where, &err)) {
+ ha_alert("Parsing [%s:%d]: failed to parse unique-id : %s.\n",
+ curproxy->conf.uif_file, curproxy->conf.uif_line, err);
+ free(err);
+ cfgerr++;
+ }
+ curproxy->conf.args.file = NULL;
+ curproxy->conf.args.line = 0;
+ }
+
+ if (curproxy->conf.error_logformat_string) {
+ curproxy->conf.args.ctx = ARGC_LOG;
+ curproxy->conf.args.file = curproxy->conf.elfs_file;
+ curproxy->conf.args.line = curproxy->conf.elfs_line;
+ err = NULL;
+ if (!parse_logformat_string(curproxy->conf.error_logformat_string, curproxy, &curproxy->logformat_error,
+ LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES,
+ SMP_VAL_FE_LOG_END, &err)) {
+ ha_alert("Parsing [%s:%d]: failed to parse error-log-format : %s.\n",
+ curproxy->conf.elfs_file, curproxy->conf.elfs_line, err);
+ free(err);
+ cfgerr++;
+ }
+ curproxy->conf.args.file = NULL;
+ curproxy->conf.args.line = 0;
+ }
+
+ /* "balance hash" needs to compile its expression */
+ if ((curproxy->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_SMP) {
+ int idx = 0;
+ const char *args[] = {
+ curproxy->lbprm.arg_str,
+ NULL,
+ };
+
+ err = NULL;
+ curproxy->conf.args.ctx = ARGC_USRV; // same context as use_server.
+ curproxy->lbprm.expr =
+ sample_parse_expr((char **)args, &idx,
+ curproxy->conf.file, curproxy->conf.line,
+ &err, &curproxy->conf.args, NULL);
+
+ if (!curproxy->lbprm.expr) {
+ ha_alert("%s '%s' [%s:%d]: failed to parse 'balance hash' expression '%s' in : %s.\n",
+ proxy_type_str(curproxy), curproxy->id,
+ curproxy->conf.file, curproxy->conf.line,
+ curproxy->lbprm.arg_str, err);
+ ha_free(&err);
+ cfgerr++;
+ }
+ else if (!(curproxy->lbprm.expr->fetch->val & SMP_VAL_BE_SET_SRV)) {
+ ha_alert("%s '%s' [%s:%d]: error detected while parsing 'balance hash' expression '%s' "
+ "which requires information from %s, which is not available here.\n",
+ proxy_type_str(curproxy), curproxy->id,
+ curproxy->conf.file, curproxy->conf.line,
+ curproxy->lbprm.arg_str, sample_src_names(curproxy->lbprm.expr->fetch->use));
+ cfgerr++;
+ }
+ else if (curproxy->mode == PR_MODE_HTTP && (curproxy->lbprm.expr->fetch->use & SMP_USE_L6REQ)) {
+ ha_warning("%s '%s' [%s:%d]: L6 sample fetch <%s> will be ignored in 'balance hash' expression in HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id,
+ curproxy->conf.file, curproxy->conf.line,
+ curproxy->lbprm.arg_str);
+ }
+ else
+ curproxy->http_needed |= !!(curproxy->lbprm.expr->fetch->use & SMP_USE_HTTP_ANY);
+ }
+
+ /* only now we can check if some args remain unresolved.
+ * This must be done after the users and groups resolution.
+ */
+ err = NULL;
+ i = smp_resolve_args(curproxy, &err);
+ cfgerr += i;
+ if (i) {
+ indent_msg(&err, 8);
+ ha_alert("%s%s\n", i > 1 ? "multiple argument resolution errors:" : "", err);
+ ha_free(&err);
+ } else
+ cfgerr += acl_find_targets(curproxy);
+
+ if (!(curproxy->cap & PR_CAP_INT) && (curproxy->mode == PR_MODE_TCP || curproxy->mode == PR_MODE_HTTP) &&
+ (((curproxy->cap & PR_CAP_FE) && !curproxy->timeout.client) ||
+ ((curproxy->cap & PR_CAP_BE) && (curproxy->srv) &&
+ (!curproxy->timeout.connect ||
+ (!curproxy->timeout.server && (curproxy->mode == PR_MODE_HTTP || !curproxy->timeout.tunnel)))))) {
+ ha_warning("missing timeouts for %s '%s'.\n"
+ " | While not properly invalid, you will certainly encounter various problems\n"
+ " | with such a configuration. To fix this, please ensure that all following\n"
+ " | timeouts are set to a non-zero value: 'client', 'connect', 'server'.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ /* Historically, the tarpit and queue timeouts were inherited from contimeout.
+ * We must still support older configurations, so let's find out whether those
+ * parameters have been set or must be copied from contimeouts.
+ */
+ if (!curproxy->timeout.tarpit)
+ curproxy->timeout.tarpit = curproxy->timeout.connect;
+ if ((curproxy->cap & PR_CAP_BE) && !curproxy->timeout.queue)
+ curproxy->timeout.queue = curproxy->timeout.connect;
+
+ if ((curproxy->tcpcheck_rules.flags & TCPCHK_RULES_UNUSED_TCP_RS)) {
+ ha_warning("%s '%s' uses tcp-check rules without 'option tcp-check', so the rules are ignored.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ /* ensure that cookie capture length is not too large */
+ if (curproxy->capture_len >= global.tune.cookie_len) {
+ ha_warning("truncating capture length to %d bytes for %s '%s'.\n",
+ global.tune.cookie_len - 1, proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ curproxy->capture_len = global.tune.cookie_len - 1;
+ }
+
+ /* The small pools required for the capture lists */
+ if (curproxy->nb_req_cap) {
+ curproxy->req_cap_pool = create_pool("ptrcap",
+ curproxy->nb_req_cap * sizeof(char *),
+ MEM_F_SHARED);
+ }
+
+ if (curproxy->nb_rsp_cap) {
+ curproxy->rsp_cap_pool = create_pool("ptrcap",
+ curproxy->nb_rsp_cap * sizeof(char *),
+ MEM_F_SHARED);
+ }
+
+ switch (curproxy->load_server_state_from_file) {
+ case PR_SRV_STATE_FILE_UNSPEC:
+ curproxy->load_server_state_from_file = PR_SRV_STATE_FILE_NONE;
+ break;
+ case PR_SRV_STATE_FILE_GLOBAL:
+ if (!global.server_state_file) {
+ ha_warning("backend '%s' configured to load server state file from global section 'server-state-file' directive. Unfortunately, 'server-state-file' is not set!\n",
+ curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ break;
+ }
+
+ /* first, we will invert the servers list order */
+ newsrv = NULL;
+ while (curproxy->srv) {
+ struct server *next;
+
+ next = curproxy->srv->next;
+ curproxy->srv->next = newsrv;
+ newsrv = curproxy->srv;
+ if (!next)
+ break;
+ curproxy->srv = next;
+ }
+
+ /* Check that no server name conflicts. This causes trouble in the stats.
+ * We only emit a warning for the first conflict affecting each server,
+ * in order to avoid combinatory explosion if all servers have the same
+ * name. We do that only for servers which do not have an explicit ID,
+ * because these IDs were made also for distinguishing them and we don't
+ * want to annoy people who correctly manage them.
+ */
+ for (newsrv = curproxy->srv; newsrv; newsrv = newsrv->next) {
+ struct server *other_srv;
+
+ if (newsrv->puid)
+ continue;
+
+ for (other_srv = curproxy->srv; other_srv && other_srv != newsrv; other_srv = other_srv->next) {
+ if (!other_srv->puid && strcmp(other_srv->id, newsrv->id) == 0) {
+ ha_alert("parsing [%s:%d] : %s '%s', another server named '%s' was already defined at line %d, please use distinct names.\n",
+ newsrv->conf.file, newsrv->conf.line,
+ proxy_type_str(curproxy), curproxy->id,
+ newsrv->id, other_srv->conf.line);
+ cfgerr++;
+ break;
+ }
+ }
+ }
+
+ /* assign automatic UIDs to servers which don't have one yet */
+ next_id = 1;
+ newsrv = curproxy->srv;
+ while (newsrv != NULL) {
+ if (!newsrv->puid) {
+ /* server ID not set, use automatic numbering with first
+ * spare entry starting with next_svid.
+ */
+ next_id = get_next_id(&curproxy->conf.used_server_id, next_id);
+ newsrv->conf.id.key = newsrv->puid = next_id;
+ eb32_insert(&curproxy->conf.used_server_id, &newsrv->conf.id);
+ }
+ newsrv->conf.name.key = newsrv->id;
+ ebis_insert(&curproxy->conf.used_server_name, &newsrv->conf.name);
+
+ next_id++;
+ newsrv = newsrv->next;
+ }
+
+ curproxy->lbprm.wmult = 1; /* default weight multiplier */
+ curproxy->lbprm.wdiv = 1; /* default weight divider */
+
+ /*
+ * If this server supports a maxconn parameter, it needs a dedicated
+ * tasks to fill the emptied slots when a connection leaves.
+ * Also, resolve deferred tracking dependency if needed.
+ */
+ newsrv = curproxy->srv;
+ while (newsrv != NULL) {
+ set_usermsgs_ctx(newsrv->conf.file, newsrv->conf.line, &newsrv->obj_type);
+
+ srv_minmax_conn_apply(newsrv);
+
+ /* this will also properly set the transport layer for
+ * prod and checks
+ * if default-server have use_ssl, prerare ssl init
+ * without activating it */
+ if (newsrv->use_ssl == 1 || newsrv->check.use_ssl == 1 ||
+ (newsrv->proxy->options & PR_O_TCPCHK_SSL) ||
+ ((newsrv->flags & SRV_F_DEFSRV_USE_SSL) && newsrv->use_ssl != 1)) {
+ if (xprt_get(XPRT_SSL) && xprt_get(XPRT_SSL)->prepare_srv)
+ cfgerr += xprt_get(XPRT_SSL)->prepare_srv(newsrv);
+ }
+
+ if ((newsrv->flags & SRV_F_FASTOPEN) &&
+ ((curproxy->retry_type & (PR_RE_DISCONNECTED | PR_RE_TIMEOUT)) !=
+ (PR_RE_DISCONNECTED | PR_RE_TIMEOUT)))
+ ha_warning("server has tfo activated, the backend should be configured with at least 'conn-failure', 'empty-response' and 'response-timeout' or we wouldn't be able to retry the connection on failure.\n");
+
+ if (newsrv->trackit) {
+ if (srv_apply_track(newsrv, curproxy)) {
+ ++cfgerr;
+ goto next_srv;
+ }
+ }
+
+ next_srv:
+ reset_usermsgs_ctx();
+ newsrv = newsrv->next;
+ }
+
+ /*
+ * Try to generate dynamic cookies for servers now.
+ * It couldn't be done earlier, since at the time we parsed
+ * the server line, we may not have known yet that we
+ * should use dynamic cookies, or the secret key may not
+ * have been provided yet.
+ */
+ if (curproxy->ck_opts & PR_CK_DYNAMIC) {
+ newsrv = curproxy->srv;
+ while (newsrv != NULL) {
+ srv_set_dyncookie(newsrv);
+ newsrv = newsrv->next;
+ }
+
+ }
+ /* We have to initialize the server lookup mechanism depending
+ * on what LB algorithm was chosen.
+ */
+
+ curproxy->lbprm.algo &= ~(BE_LB_LKUP | BE_LB_PROP_DYN);
+ switch (curproxy->lbprm.algo & BE_LB_KIND) {
+ case BE_LB_KIND_RR:
+ if ((curproxy->lbprm.algo & BE_LB_PARM) == BE_LB_RR_STATIC) {
+ curproxy->lbprm.algo |= BE_LB_LKUP_MAP;
+ init_server_map(curproxy);
+ } else if ((curproxy->lbprm.algo & BE_LB_PARM) == BE_LB_RR_RANDOM) {
+ curproxy->lbprm.algo |= BE_LB_LKUP_CHTREE | BE_LB_PROP_DYN;
+ if (chash_init_server_tree(curproxy) < 0) {
+ cfgerr++;
+ }
+ } else {
+ curproxy->lbprm.algo |= BE_LB_LKUP_RRTREE | BE_LB_PROP_DYN;
+ fwrr_init_server_groups(curproxy);
+ }
+ break;
+
+ case BE_LB_KIND_CB:
+ if ((curproxy->lbprm.algo & BE_LB_PARM) == BE_LB_CB_LC) {
+ curproxy->lbprm.algo |= BE_LB_LKUP_LCTREE | BE_LB_PROP_DYN;
+ fwlc_init_server_tree(curproxy);
+ } else {
+ curproxy->lbprm.algo |= BE_LB_LKUP_FSTREE | BE_LB_PROP_DYN;
+ fas_init_server_tree(curproxy);
+ }
+ break;
+
+ case BE_LB_KIND_HI:
+ if ((curproxy->lbprm.algo & BE_LB_HASH_TYPE) == BE_LB_HASH_CONS) {
+ curproxy->lbprm.algo |= BE_LB_LKUP_CHTREE | BE_LB_PROP_DYN;
+ if (chash_init_server_tree(curproxy) < 0) {
+ cfgerr++;
+ }
+ } else {
+ curproxy->lbprm.algo |= BE_LB_LKUP_MAP;
+ init_server_map(curproxy);
+ }
+ break;
+ }
+ HA_RWLOCK_INIT(&curproxy->lbprm.lock);
+
+ if (curproxy->options & PR_O_LOGASAP)
+ curproxy->to_log &= ~LW_BYTES;
+
+ if (!(curproxy->cap & PR_CAP_INT) && (curproxy->mode == PR_MODE_TCP || curproxy->mode == PR_MODE_HTTP) &&
+ (curproxy->cap & PR_CAP_FE) && LIST_ISEMPTY(&curproxy->logsrvs) &&
+ (!LIST_ISEMPTY(&curproxy->logformat) || !LIST_ISEMPTY(&curproxy->logformat_sd))) {
+ ha_warning("log format ignored for %s '%s' since it has no log address.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ if (curproxy->mode != PR_MODE_HTTP && !(curproxy->options & PR_O_HTTP_UPG)) {
+ int optnum;
+
+ if (curproxy->uri_auth) {
+ ha_warning("'stats' statement ignored for %s '%s' as it requires HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ curproxy->uri_auth = NULL;
+ }
+
+ if (curproxy->capture_name) {
+ ha_warning("'capture' statement ignored for %s '%s' as it requires HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ if (!LIST_ISEMPTY(&curproxy->http_req_rules)) {
+ ha_warning("'http-request' rules ignored for %s '%s' as they require HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ if (!LIST_ISEMPTY(&curproxy->http_res_rules)) {
+ ha_warning("'http-response' rules ignored for %s '%s' as they require HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ if (!LIST_ISEMPTY(&curproxy->http_after_res_rules)) {
+ ha_warning("'http-after-response' rules ignored for %s '%s' as they require HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ if (!LIST_ISEMPTY(&curproxy->redirect_rules)) {
+ ha_warning("'redirect' rules ignored for %s '%s' as they require HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ if (curproxy->options & (PR_O_FWDFOR | PR_O_FF_ALWAYS)) {
+ ha_warning("'option %s' ignored for %s '%s' as it requires HTTP mode.\n",
+ "forwardfor", proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ curproxy->options &= ~(PR_O_FWDFOR | PR_O_FF_ALWAYS);
+ }
+
+ if (curproxy->options & PR_O_ORGTO) {
+ ha_warning("'option %s' ignored for %s '%s' as it requires HTTP mode.\n",
+ "originalto", proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ curproxy->options &= ~PR_O_ORGTO;
+ }
+
+ for (optnum = 0; cfg_opts[optnum].name; optnum++) {
+ if (cfg_opts[optnum].mode == PR_MODE_HTTP &&
+ (curproxy->cap & cfg_opts[optnum].cap) &&
+ (curproxy->options & cfg_opts[optnum].val)) {
+ ha_warning("'option %s' ignored for %s '%s' as it requires HTTP mode.\n",
+ cfg_opts[optnum].name, proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ curproxy->options &= ~cfg_opts[optnum].val;
+ }
+ }
+
+ for (optnum = 0; cfg_opts2[optnum].name; optnum++) {
+ if (cfg_opts2[optnum].mode == PR_MODE_HTTP &&
+ (curproxy->cap & cfg_opts2[optnum].cap) &&
+ (curproxy->options2 & cfg_opts2[optnum].val)) {
+ ha_warning("'option %s' ignored for %s '%s' as it requires HTTP mode.\n",
+ cfg_opts2[optnum].name, proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ curproxy->options2 &= ~cfg_opts2[optnum].val;
+ }
+ }
+
+#if defined(CONFIG_HAP_TRANSPARENT)
+ if (curproxy->conn_src.bind_hdr_occ) {
+ curproxy->conn_src.bind_hdr_occ = 0;
+ ha_warning("%s '%s' : ignoring use of header %s as source IP in non-HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id, curproxy->conn_src.bind_hdr_name);
+ err_code |= ERR_WARN;
+ }
+#endif
+ }
+
+ /*
+ * ensure that we're not cross-dressing a TCP server into HTTP.
+ */
+ newsrv = curproxy->srv;
+ while (newsrv != NULL) {
+ if ((curproxy->mode != PR_MODE_HTTP) && newsrv->rdr_len) {
+ ha_alert("%s '%s' : server cannot have cookie or redirect prefix in non-HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ cfgerr++;
+ }
+
+ if ((curproxy->mode != PR_MODE_HTTP) && newsrv->cklen) {
+ ha_warning("%s '%s' : ignoring cookie for server '%s' as HTTP mode is disabled.\n",
+ proxy_type_str(curproxy), curproxy->id, newsrv->id);
+ err_code |= ERR_WARN;
+ }
+
+ if ((newsrv->flags & SRV_F_MAPPORTS) && (curproxy->options2 & PR_O2_RDPC_PRST)) {
+ ha_warning("%s '%s' : RDP cookie persistence will not work for server '%s' because it lacks an explicit port number.\n",
+ proxy_type_str(curproxy), curproxy->id, newsrv->id);
+ err_code |= ERR_WARN;
+ }
+
+#if defined(CONFIG_HAP_TRANSPARENT)
+ if (curproxy->mode != PR_MODE_HTTP && newsrv->conn_src.bind_hdr_occ) {
+ newsrv->conn_src.bind_hdr_occ = 0;
+ ha_warning("%s '%s' : server %s cannot use header %s as source IP in non-HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id, newsrv->id, newsrv->conn_src.bind_hdr_name);
+ err_code |= ERR_WARN;
+ }
+#endif
+
+ if ((curproxy->mode != PR_MODE_HTTP) && (curproxy->options & PR_O_REUSE_MASK) != PR_O_REUSE_NEVR)
+ curproxy->options &= ~PR_O_REUSE_MASK;
+
+ newsrv = newsrv->next;
+ }
+
+ /* Check filter configuration, if any */
+ cfgerr += flt_check(curproxy);
+
+ if (curproxy->cap & PR_CAP_FE) {
+ if (!curproxy->accept)
+ curproxy->accept = frontend_accept;
+
+ if (!LIST_ISEMPTY(&curproxy->tcp_req.inspect_rules) ||
+ (curproxy->defpx && !LIST_ISEMPTY(&curproxy->defpx->tcp_req.inspect_rules)))
+ curproxy->fe_req_ana |= AN_REQ_INSPECT_FE;
+
+ if (curproxy->mode == PR_MODE_HTTP) {
+ curproxy->fe_req_ana |= AN_REQ_WAIT_HTTP | AN_REQ_HTTP_PROCESS_FE;
+ curproxy->fe_rsp_ana |= AN_RES_WAIT_HTTP | AN_RES_HTTP_PROCESS_FE;
+ }
+
+ if (curproxy->mode == PR_MODE_CLI) {
+ curproxy->fe_req_ana |= AN_REQ_WAIT_CLI;
+ curproxy->fe_rsp_ana |= AN_RES_WAIT_CLI;
+ }
+
+ /* both TCP and HTTP must check switching rules */
+ curproxy->fe_req_ana |= AN_REQ_SWITCHING_RULES;
+
+ /* Add filters analyzers if needed */
+ if (!LIST_ISEMPTY(&curproxy->filter_configs)) {
+ curproxy->fe_req_ana |= AN_REQ_FLT_START_FE | AN_REQ_FLT_XFER_DATA | AN_REQ_FLT_END;
+ curproxy->fe_rsp_ana |= AN_RES_FLT_START_FE | AN_RES_FLT_XFER_DATA | AN_RES_FLT_END;
+ }
+ }
+
+ if (curproxy->cap & PR_CAP_BE) {
+ if (!LIST_ISEMPTY(&curproxy->tcp_req.inspect_rules) ||
+ (curproxy->defpx && !LIST_ISEMPTY(&curproxy->defpx->tcp_req.inspect_rules)))
+ curproxy->be_req_ana |= AN_REQ_INSPECT_BE;
+
+ if (!LIST_ISEMPTY(&curproxy->tcp_rep.inspect_rules) ||
+ (curproxy->defpx && !LIST_ISEMPTY(&curproxy->defpx->tcp_rep.inspect_rules)))
+ curproxy->be_rsp_ana |= AN_RES_INSPECT;
+
+ if (curproxy->mode == PR_MODE_HTTP) {
+ curproxy->be_req_ana |= AN_REQ_WAIT_HTTP | AN_REQ_HTTP_INNER | AN_REQ_HTTP_PROCESS_BE;
+ curproxy->be_rsp_ana |= AN_RES_WAIT_HTTP | AN_RES_HTTP_PROCESS_BE;
+ }
+
+ /* If the backend does requires RDP cookie persistence, we have to
+ * enable the corresponding analyser.
+ */
+ if (curproxy->options2 & PR_O2_RDPC_PRST)
+ curproxy->be_req_ana |= AN_REQ_PRST_RDP_COOKIE;
+
+ /* Add filters analyzers if needed */
+ if (!LIST_ISEMPTY(&curproxy->filter_configs)) {
+ curproxy->be_req_ana |= AN_REQ_FLT_START_BE | AN_REQ_FLT_XFER_DATA | AN_REQ_FLT_END;
+ curproxy->be_rsp_ana |= AN_RES_FLT_START_BE | AN_RES_FLT_XFER_DATA | AN_RES_FLT_END;
+ }
+ }
+
+ /* Check the mux protocols, if any, for each listener and server
+ * attached to the current proxy */
+ list_for_each_entry(bind_conf, &curproxy->conf.bind, by_fe) {
+ int mode = (1 << (curproxy->mode == PR_MODE_HTTP));
+ const struct mux_proto_list *mux_ent;
+
+ if (!bind_conf->mux_proto) {
+ /* No protocol was specified. If we're using QUIC at the transport
+ * layer, we'll instantiate it as a mux as well. If QUIC is not
+ * compiled in, this will remain NULL.
+ */
+ if (bind_conf->xprt && bind_conf->xprt == xprt_get(XPRT_QUIC))
+ bind_conf->mux_proto = get_mux_proto(ist("quic"));
+ }
+
+ if (!bind_conf->mux_proto)
+ continue;
+
+ /* it is possible that an incorrect mux was referenced
+ * due to the proxy's mode not being taken into account
+ * on first pass. Let's adjust it now.
+ */
+ mux_ent = conn_get_best_mux_entry(bind_conf->mux_proto->token, PROTO_SIDE_FE, mode);
+
+ if (!mux_ent || !isteq(mux_ent->token, bind_conf->mux_proto->token)) {
+ ha_alert("%s '%s' : MUX protocol '%.*s' is not usable for 'bind %s' at [%s:%d].\n",
+ proxy_type_str(curproxy), curproxy->id,
+ (int)bind_conf->mux_proto->token.len,
+ bind_conf->mux_proto->token.ptr,
+ bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr++;
+ } else {
+ if ((mux_ent->mux->flags & MX_FL_FRAMED) && !(bind_conf->options & BC_O_USE_SOCK_DGRAM)) {
+ ha_alert("%s '%s' : frame-based MUX protocol '%.*s' is incompatible with stream transport of 'bind %s' at [%s:%d].\n",
+ proxy_type_str(curproxy), curproxy->id,
+ (int)bind_conf->mux_proto->token.len,
+ bind_conf->mux_proto->token.ptr,
+ bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr++;
+ }
+ else if (!(mux_ent->mux->flags & MX_FL_FRAMED) && !(bind_conf->options & BC_O_USE_SOCK_STREAM)) {
+ ha_alert("%s '%s' : stream-based MUX protocol '%.*s' is incompatible with framed transport of 'bind %s' at [%s:%d].\n",
+ proxy_type_str(curproxy), curproxy->id,
+ (int)bind_conf->mux_proto->token.len,
+ bind_conf->mux_proto->token.ptr,
+ bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr++;
+ }
+ }
+
+ /* update the mux */
+ bind_conf->mux_proto = mux_ent;
+ }
+ for (newsrv = curproxy->srv; newsrv; newsrv = newsrv->next) {
+ int mode = (1 << (curproxy->mode == PR_MODE_HTTP));
+ const struct mux_proto_list *mux_ent;
+
+ if (!newsrv->mux_proto)
+ continue;
+
+ /* it is possible that an incorrect mux was referenced
+ * due to the proxy's mode not being taken into account
+ * on first pass. Let's adjust it now.
+ */
+ mux_ent = conn_get_best_mux_entry(newsrv->mux_proto->token, PROTO_SIDE_BE, mode);
+
+ if (!mux_ent || !isteq(mux_ent->token, newsrv->mux_proto->token)) {
+ ha_alert("%s '%s' : MUX protocol '%.*s' is not usable for server '%s' at [%s:%d].\n",
+ proxy_type_str(curproxy), curproxy->id,
+ (int)newsrv->mux_proto->token.len,
+ newsrv->mux_proto->token.ptr,
+ newsrv->id, newsrv->conf.file, newsrv->conf.line);
+ cfgerr++;
+ }
+
+ /* update the mux */
+ newsrv->mux_proto = mux_ent;
+ }
+
+ /* Allocate default tcp-check rules for proxies without
+ * explicit rules.
+ */
+ if (curproxy->cap & PR_CAP_BE) {
+ if (!(curproxy->options2 & PR_O2_CHK_ANY)) {
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curproxy->tcpcheck_rules;
+
+ curproxy->options2 |= PR_O2_TCPCHK_CHK;
+
+ rs = find_tcpcheck_ruleset("*tcp-check");
+ if (!rs) {
+ rs = create_tcpcheck_ruleset("*tcp-check");
+ if (rs == NULL) {
+ ha_alert("config: %s '%s': out of memory.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ cfgerr++;
+ }
+ }
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = &rs->rules;
+ rules->flags = 0;
+ }
+ }
+ }
+
+ /*
+ * We have just initialized the main proxies list
+ * we must also configure the log-forward proxies list
+ */
+ if (init_proxies_list == proxies_list) {
+ init_proxies_list = cfg_log_forward;
+ /* check if list is not null to avoid infinite loop */
+ if (init_proxies_list)
+ goto init_proxies_list_stage1;
+ }
+
+ if (init_proxies_list == cfg_log_forward) {
+ init_proxies_list = sink_proxies_list;
+ /* check if list is not null to avoid infinite loop */
+ if (init_proxies_list)
+ goto init_proxies_list_stage1;
+ }
+
+ /***********************************************************/
+ /* At this point, target names have already been resolved. */
+ /***********************************************************/
+
+ /* we must finish to initialize certain things on the servers */
+
+ list_for_each_entry(newsrv, &servers_list, global_list) {
+ /* initialize idle conns lists */
+ if (srv_init_per_thr(newsrv) == -1) {
+ ha_alert("parsing [%s:%d] : failed to allocate per-thread lists for server '%s'.\n",
+ newsrv->conf.file, newsrv->conf.line, newsrv->id);
+ cfgerr++;
+ continue;
+ }
+
+ if (newsrv->max_idle_conns != 0) {
+ newsrv->curr_idle_thr = calloc(global.nbthread, sizeof(*newsrv->curr_idle_thr));
+ if (!newsrv->curr_idle_thr) {
+ ha_alert("parsing [%s:%d] : failed to allocate idle connection tasks for server '%s'.\n",
+ newsrv->conf.file, newsrv->conf.line, newsrv->id);
+ cfgerr++;
+ continue;
+ }
+
+ }
+ }
+
+ idle_conn_task = task_new_anywhere();
+ if (!idle_conn_task) {
+ ha_alert("parsing : failed to allocate global idle connection task.\n");
+ cfgerr++;
+ }
+ else {
+ idle_conn_task->process = srv_cleanup_idle_conns;
+ idle_conn_task->context = NULL;
+
+ for (i = 0; i < global.nbthread; i++) {
+ idle_conns[i].cleanup_task = task_new_on(i);
+ if (!idle_conns[i].cleanup_task) {
+ ha_alert("parsing : failed to allocate idle connection tasks for thread '%d'.\n", i);
+ cfgerr++;
+ break;
+ }
+
+ idle_conns[i].cleanup_task->process = srv_cleanup_toremove_conns;
+ idle_conns[i].cleanup_task->context = NULL;
+ HA_SPIN_INIT(&idle_conns[i].idle_conns_lock);
+ MT_LIST_INIT(&idle_conns[i].toremove_conns);
+ }
+ }
+
+ /* perform the final checks before creating tasks */
+
+ /* starting to initialize the main proxies list */
+ init_proxies_list = proxies_list;
+
+init_proxies_list_stage2:
+ for (curproxy = init_proxies_list; curproxy; curproxy = curproxy->next) {
+ struct listener *listener;
+ unsigned int next_id;
+
+ /* Configure SSL for each bind line.
+ * Note: if configuration fails at some point, the ->ctx member
+ * remains NULL so that listeners can later detach.
+ */
+ list_for_each_entry(bind_conf, &curproxy->conf.bind, by_fe) {
+ if (bind_conf->xprt->prepare_bind_conf &&
+ bind_conf->xprt->prepare_bind_conf(bind_conf) < 0)
+ cfgerr++;
+ }
+
+ /* adjust this proxy's listeners */
+ next_id = 1;
+ list_for_each_entry(listener, &curproxy->conf.listeners, by_fe) {
+ if (!listener->luid) {
+ /* listener ID not set, use automatic numbering with first
+ * spare entry starting with next_luid.
+ */
+ next_id = get_next_id(&curproxy->conf.used_listener_id, next_id);
+ listener->conf.id.key = listener->luid = next_id;
+ eb32_insert(&curproxy->conf.used_listener_id, &listener->conf.id);
+ }
+ next_id++;
+
+ /* enable separate counters */
+ if (curproxy->options2 & PR_O2_SOCKSTAT) {
+ listener->counters = calloc(1, sizeof(*listener->counters));
+ if (!listener->name)
+ memprintf(&listener->name, "sock-%d", listener->luid);
+ }
+
+ if (curproxy->options & PR_O_TCP_NOLING)
+ listener->options |= LI_O_NOLINGER;
+ if (!listener->maxaccept)
+ listener->maxaccept = global.tune.maxaccept ? global.tune.maxaccept : MAX_ACCEPT;
+
+ /* listener accept callback */
+ listener->accept = session_accept_fd;
+#ifdef USE_QUIC
+ /* override the accept callback for QUIC listeners. */
+ if (listener->flags & LI_F_QUIC_LISTENER) {
+ if (!global.cluster_secret) {
+ diag_no_cluster_secret = 1;
+ if (listener->bind_conf->options & BC_O_QUIC_FORCE_RETRY) {
+ ha_alert("QUIC listener with quic-force-retry requires global cluster-secret to be set.\n");
+ cfgerr++;
+ }
+ }
+
+ li_init_per_thr(listener);
+ }
+#endif
+
+ listener->analysers |= curproxy->fe_req_ana;
+ listener->default_target = curproxy->default_target;
+
+ if (!LIST_ISEMPTY(&curproxy->tcp_req.l4_rules))
+ listener->options |= LI_O_TCP_L4_RULES;
+
+ if (!LIST_ISEMPTY(&curproxy->tcp_req.l5_rules))
+ listener->options |= LI_O_TCP_L5_RULES;
+
+ /* smart accept mode is automatic in HTTP mode */
+ if ((curproxy->options2 & PR_O2_SMARTACC) ||
+ ((curproxy->mode == PR_MODE_HTTP || (listener->bind_conf->options & BC_O_USE_SSL)) &&
+ !(curproxy->no_options2 & PR_O2_SMARTACC)))
+ listener->options |= LI_O_NOQUICKACK;
+ }
+
+ /* Release unused SSL configs */
+ list_for_each_entry(bind_conf, &curproxy->conf.bind, by_fe) {
+ if (!(bind_conf->options & BC_O_USE_SSL) && bind_conf->xprt->destroy_bind_conf)
+ bind_conf->xprt->destroy_bind_conf(bind_conf);
+ }
+
+ /* create the task associated with the proxy */
+ curproxy->task = task_new_anywhere();
+ if (curproxy->task) {
+ curproxy->task->context = curproxy;
+ curproxy->task->process = manage_proxy;
+ curproxy->flags |= PR_FL_READY;
+ } else {
+ ha_alert("Proxy '%s': no more memory when trying to allocate the management task\n",
+ curproxy->id);
+ cfgerr++;
+ }
+ }
+
+ /*
+ * We have just initialized the main proxies list
+ * we must also configure the log-forward proxies list
+ */
+ if (init_proxies_list == proxies_list) {
+ init_proxies_list = cfg_log_forward;
+ /* check if list is not null to avoid infinite loop */
+ if (init_proxies_list)
+ goto init_proxies_list_stage2;
+ }
+
+ if (diag_no_cluster_secret) {
+ ha_diag_warning("Generating a random cluster secret. "
+ "You should define your own one in the configuration to ensure consistency "
+ "after reload/restart or across your whole cluster.\n");
+ }
+
+ /*
+ * Recount currently required checks.
+ */
+
+ for (curproxy=proxies_list; curproxy; curproxy=curproxy->next) {
+ int optnum;
+
+ for (optnum = 0; cfg_opts[optnum].name; optnum++)
+ if (curproxy->options & cfg_opts[optnum].val)
+ global.last_checks |= cfg_opts[optnum].checks;
+
+ for (optnum = 0; cfg_opts2[optnum].name; optnum++)
+ if (curproxy->options2 & cfg_opts2[optnum].val)
+ global.last_checks |= cfg_opts2[optnum].checks;
+ }
+
+ if (cfg_peers) {
+ struct peers *curpeers = cfg_peers, **last;
+ struct peer *p, *pb;
+
+ /* Remove all peers sections which don't have a valid listener,
+ * which are not used by any table, or which are bound to more
+ * than one process.
+ */
+ last = &cfg_peers;
+ while (*last) {
+ struct stktable *t;
+ curpeers = *last;
+
+ if (curpeers->disabled) {
+ /* the "disabled" keyword was present */
+ if (curpeers->peers_fe)
+ stop_proxy(curpeers->peers_fe);
+ curpeers->peers_fe = NULL;
+ }
+ else if (!curpeers->peers_fe || !curpeers->peers_fe->id) {
+ ha_warning("Removing incomplete section 'peers %s' (no peer named '%s').\n",
+ curpeers->id, localpeer);
+ if (curpeers->peers_fe)
+ stop_proxy(curpeers->peers_fe);
+ curpeers->peers_fe = NULL;
+ }
+ else {
+ /* Initializes the transport layer of the server part of all the peers belonging to
+ * <curpeers> section if required.
+ * Note that ->srv is used by the local peer of a new process to connect to the local peer
+ * of an old process.
+ */
+ curpeers->peers_fe->flags |= PR_FL_READY;
+ p = curpeers->remote;
+ while (p) {
+ if (p->srv) {
+ if (p->srv->use_ssl == 1 && xprt_get(XPRT_SSL) && xprt_get(XPRT_SSL)->prepare_srv)
+ cfgerr += xprt_get(XPRT_SSL)->prepare_srv(p->srv);
+ }
+ p = p->next;
+ }
+ /* Configure the SSL bindings of the local peer if required. */
+ if (!LIST_ISEMPTY(&curpeers->peers_fe->conf.bind)) {
+ struct list *l;
+ struct bind_conf *bind_conf;
+
+ l = &curpeers->peers_fe->conf.bind;
+ bind_conf = LIST_ELEM(l->n, typeof(bind_conf), by_fe);
+
+ if (curpeers->local->srv) {
+ if (curpeers->local->srv->use_ssl == 1 && !(bind_conf->options & BC_O_USE_SSL)) {
+ ha_warning("Peers section '%s': local peer have a non-SSL listener and a SSL server configured at line %s:%d.\n",
+ curpeers->peers_fe->id, curpeers->local->conf.file, curpeers->local->conf.line);
+ }
+ else if (curpeers->local->srv->use_ssl != 1 && (bind_conf->options & BC_O_USE_SSL)) {
+ ha_warning("Peers section '%s': local peer have a SSL listener and a non-SSL server configured at line %s:%d.\n",
+ curpeers->peers_fe->id, curpeers->local->conf.file, curpeers->local->conf.line);
+ }
+ }
+
+ if (bind_conf->xprt->prepare_bind_conf &&
+ bind_conf->xprt->prepare_bind_conf(bind_conf) < 0)
+ cfgerr++;
+ }
+ if (!peers_init_sync(curpeers) || !peers_alloc_dcache(curpeers)) {
+ ha_alert("Peers section '%s': out of memory, giving up on peers.\n",
+ curpeers->id);
+ cfgerr++;
+ break;
+ }
+ last = &curpeers->next;
+ continue;
+ }
+
+ /* clean what has been detected above */
+ p = curpeers->remote;
+ while (p) {
+ pb = p->next;
+ free(p->id);
+ free(p);
+ p = pb;
+ }
+
+ /* Destroy and unlink this curpeers section.
+ * Note: curpeers is backed up into *last.
+ */
+ free(curpeers->id);
+ curpeers = curpeers->next;
+ /* Reset any refereance to this peers section in the list of stick-tables */
+ for (t = stktables_list; t; t = t->next) {
+ if (t->peers.p && t->peers.p == *last)
+ t->peers.p = NULL;
+ }
+ free(*last);
+ *last = curpeers;
+ }
+ }
+
+ for (t = stktables_list; t; t = t->next) {
+ if (t->proxy)
+ continue;
+ if (!stktable_init(t)) {
+ ha_alert("Proxy '%s': failed to initialize stick-table.\n", t->id);
+ cfgerr++;
+ }
+ }
+
+ /* initialize stick-tables on backend capable proxies. This must not
+ * be done earlier because the data size may be discovered while parsing
+ * other proxies.
+ */
+ for (curproxy = proxies_list; curproxy; curproxy = curproxy->next) {
+ if ((curproxy->flags & PR_FL_DISABLED) || !curproxy->table)
+ continue;
+
+ if (!stktable_init(curproxy->table)) {
+ ha_alert("Proxy '%s': failed to initialize stick-table.\n", curproxy->id);
+ cfgerr++;
+ }
+ }
+
+ if (mailers) {
+ struct mailers *curmailers = mailers, **last;
+ struct mailer *m, *mb;
+
+ /* Remove all mailers sections which don't have a valid listener.
+ * This can happen when a mailers section is never referenced.
+ */
+ last = &mailers;
+ while (*last) {
+ curmailers = *last;
+ if (curmailers->users) {
+ last = &curmailers->next;
+ continue;
+ }
+
+ ha_warning("Removing incomplete section 'mailers %s'.\n",
+ curmailers->id);
+
+ m = curmailers->mailer_list;
+ while (m) {
+ mb = m->next;
+ free(m->id);
+ free(m);
+ m = mb;
+ }
+
+ /* Destroy and unlink this curmailers section.
+ * Note: curmailers is backed up into *last.
+ */
+ free(curmailers->id);
+ curmailers = curmailers->next;
+ free(*last);
+ *last = curmailers;
+ }
+ }
+
+ /* Update server_state_file_name to backend name if backend is supposed to use
+ * a server-state file locally defined and none has been provided */
+ for (curproxy = proxies_list; curproxy; curproxy = curproxy->next) {
+ if (curproxy->load_server_state_from_file == PR_SRV_STATE_FILE_LOCAL &&
+ curproxy->server_state_file_name == NULL)
+ curproxy->server_state_file_name = strdup(curproxy->id);
+ }
+
+ list_for_each_entry(curr_resolvers, &sec_resolvers, list) {
+ if (LIST_ISEMPTY(&curr_resolvers->nameservers)) {
+ ha_warning("resolvers '%s' [%s:%d] has no nameservers configured!\n",
+ curr_resolvers->id, curr_resolvers->conf.file,
+ curr_resolvers->conf.line);
+ err_code |= ERR_WARN;
+ }
+ }
+
+ list_for_each_entry(postparser, &postparsers, list) {
+ if (postparser->func)
+ cfgerr += postparser->func();
+ }
+
+ if (cfgerr > 0)
+ err_code |= ERR_ALERT | ERR_FATAL;
+ out:
+ return err_code;
+}
+
+/*
+ * Registers the CFG keyword list <kwl> as a list of valid keywords for next
+ * parsing sessions.
+ */
+void cfg_register_keywords(struct cfg_kw_list *kwl)
+{
+ LIST_APPEND(&cfg_keywords.list, &kwl->list);
+}
+
+/*
+ * Unregisters the CFG keyword list <kwl> from the list of valid keywords.
+ */
+void cfg_unregister_keywords(struct cfg_kw_list *kwl)
+{
+ LIST_DELETE(&kwl->list);
+ LIST_INIT(&kwl->list);
+}
+
+/* this function register new section in the haproxy configuration file.
+ * <section_name> is the name of this new section and <section_parser>
+ * is the called parser. If two section declaration have the same name,
+ * only the first declared is used.
+ */
+int cfg_register_section(char *section_name,
+ int (*section_parser)(const char *, int, char **, int),
+ int (*post_section_parser)())
+{
+ struct cfg_section *cs;
+
+ list_for_each_entry(cs, &sections, list) {
+ if (strcmp(cs->section_name, section_name) == 0) {
+ ha_alert("register section '%s': already registered.\n", section_name);
+ return 0;
+ }
+ }
+
+ cs = calloc(1, sizeof(*cs));
+ if (!cs) {
+ ha_alert("register section '%s': out of memory.\n", section_name);
+ return 0;
+ }
+
+ cs->section_name = section_name;
+ cs->section_parser = section_parser;
+ cs->post_section_parser = post_section_parser;
+
+ LIST_APPEND(&sections, &cs->list);
+
+ return 1;
+}
+
+/* this function register a new function which will be called once the haproxy
+ * configuration file has been parsed. It's useful to check dependencies
+ * between sections or to resolve items once everything is parsed.
+ */
+int cfg_register_postparser(char *name, int (*func)())
+{
+ struct cfg_postparser *cp;
+
+ cp = calloc(1, sizeof(*cp));
+ if (!cp) {
+ ha_alert("register postparser '%s': out of memory.\n", name);
+ return 0;
+ }
+ cp->name = name;
+ cp->func = func;
+
+ LIST_APPEND(&postparsers, &cp->list);
+
+ return 1;
+}
+
+/*
+ * free all config section entries
+ */
+void cfg_unregister_sections(void)
+{
+ struct cfg_section *cs, *ics;
+
+ list_for_each_entry_safe(cs, ics, &sections, list) {
+ LIST_DELETE(&cs->list);
+ free(cs);
+ }
+}
+
+void cfg_backup_sections(struct list *backup_sections)
+{
+ struct cfg_section *cs, *ics;
+
+ list_for_each_entry_safe(cs, ics, &sections, list) {
+ LIST_DELETE(&cs->list);
+ LIST_APPEND(backup_sections, &cs->list);
+ }
+}
+
+void cfg_restore_sections(struct list *backup_sections)
+{
+ struct cfg_section *cs, *ics;
+
+ list_for_each_entry_safe(cs, ics, backup_sections, list) {
+ LIST_DELETE(&cs->list);
+ LIST_APPEND(&sections, &cs->list);
+ }
+}
+
+/* dumps all registered keywords by section on stdout */
+void cfg_dump_registered_keywords()
+{
+ /* CFG_GLOBAL, CFG_LISTEN, CFG_USERLIST, CFG_PEERS, CFG_CRTLIST */
+ const char* sect_names[] = { "", "global", "listen", "userlist", "peers", "crt-list", 0 };
+ int section;
+ int index;
+
+ for (section = 1; sect_names[section]; section++) {
+ struct cfg_kw_list *kwl;
+ const struct cfg_keyword *kwp, *kwn;
+
+ printf("%s\n", sect_names[section]);
+
+ for (kwn = kwp = NULL;; kwp = kwn) {
+ list_for_each_entry(kwl, &cfg_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++)
+ if (kwl->kw[index].section == section &&
+ strordered(kwp ? kwp->kw : NULL, kwl->kw[index].kw, kwn != kwp ? kwn->kw : NULL))
+ kwn = &kwl->kw[index];
+ }
+ if (kwn == kwp)
+ break;
+ printf("\t%s\n", kwn->kw);
+ }
+
+ if (section == CFG_LISTEN) {
+ /* there are plenty of other keywords there */
+ extern struct list tcp_req_conn_keywords, tcp_req_sess_keywords,
+ tcp_req_cont_keywords, tcp_res_cont_keywords;
+ extern struct bind_kw_list bind_keywords;
+ extern struct srv_kw_list srv_keywords;
+ struct bind_kw_list *bkwl;
+ struct srv_kw_list *skwl;
+ const struct bind_kw *bkwp, *bkwn;
+ const struct srv_kw *skwp, *skwn;
+ const struct cfg_opt *coptp, *coptn;
+
+ /* display the non-ssl keywords */
+ for (bkwn = bkwp = NULL;; bkwp = bkwn) {
+ list_for_each_entry(bkwl, &bind_keywords.list, list) {
+ if (strcmp(bkwl->scope, "SSL") == 0) /* skip SSL keywords */
+ continue;
+ for (index = 0; bkwl->kw[index].kw != NULL; index++) {
+ if (strordered(bkwp ? bkwp->kw : NULL,
+ bkwl->kw[index].kw,
+ bkwn != bkwp ? bkwn->kw : NULL))
+ bkwn = &bkwl->kw[index];
+ }
+ }
+ if (bkwn == bkwp)
+ break;
+
+ if (!bkwn->skip)
+ printf("\tbind <addr> %s\n", bkwn->kw);
+ else
+ printf("\tbind <addr> %s +%d\n", bkwn->kw, bkwn->skip);
+ }
+#if defined(USE_OPENSSL)
+ /* displays the "ssl" keywords */
+ for (bkwn = bkwp = NULL;; bkwp = bkwn) {
+ list_for_each_entry(bkwl, &bind_keywords.list, list) {
+ if (strcmp(bkwl->scope, "SSL") != 0) /* skip non-SSL keywords */
+ continue;
+ for (index = 0; bkwl->kw[index].kw != NULL; index++) {
+ if (strordered(bkwp ? bkwp->kw : NULL,
+ bkwl->kw[index].kw,
+ bkwn != bkwp ? bkwn->kw : NULL))
+ bkwn = &bkwl->kw[index];
+ }
+ }
+ if (bkwn == bkwp)
+ break;
+
+ if (strcmp(bkwn->kw, "ssl") == 0) /* skip "bind <addr> ssl ssl" */
+ continue;
+
+ if (!bkwn->skip)
+ printf("\tbind <addr> ssl %s\n", bkwn->kw);
+ else
+ printf("\tbind <addr> ssl %s +%d\n", bkwn->kw, bkwn->skip);
+ }
+#endif
+ for (skwn = skwp = NULL;; skwp = skwn) {
+ list_for_each_entry(skwl, &srv_keywords.list, list) {
+ for (index = 0; skwl->kw[index].kw != NULL; index++)
+ if (strordered(skwp ? skwp->kw : NULL,
+ skwl->kw[index].kw,
+ skwn != skwp ? skwn->kw : NULL))
+ skwn = &skwl->kw[index];
+ }
+ if (skwn == skwp)
+ break;
+
+ if (!skwn->skip)
+ printf("\tserver <name> <addr> %s\n", skwn->kw);
+ else
+ printf("\tserver <name> <addr> %s +%d\n", skwn->kw, skwn->skip);
+ }
+
+ for (coptn = coptp = NULL;; coptp = coptn) {
+ for (index = 0; cfg_opts[index].name; index++)
+ if (strordered(coptp ? coptp->name : NULL,
+ cfg_opts[index].name,
+ coptn != coptp ? coptn->name : NULL))
+ coptn = &cfg_opts[index];
+
+ for (index = 0; cfg_opts2[index].name; index++)
+ if (strordered(coptp ? coptp->name : NULL,
+ cfg_opts2[index].name,
+ coptn != coptp ? coptn->name : NULL))
+ coptn = &cfg_opts2[index];
+ if (coptn == coptp)
+ break;
+
+ printf("\toption %s [ ", coptn->name);
+ if (coptn->cap & PR_CAP_FE)
+ printf("FE ");
+ if (coptn->cap & PR_CAP_BE)
+ printf("BE ");
+ if (coptn->mode == PR_MODE_HTTP)
+ printf("HTTP ");
+ printf("]\n");
+ }
+
+ dump_act_rules(&tcp_req_conn_keywords, "\ttcp-request connection ");
+ dump_act_rules(&tcp_req_sess_keywords, "\ttcp-request session ");
+ dump_act_rules(&tcp_req_cont_keywords, "\ttcp-request content ");
+ dump_act_rules(&tcp_res_cont_keywords, "\ttcp-response content ");
+ dump_act_rules(&http_req_keywords.list, "\thttp-request ");
+ dump_act_rules(&http_res_keywords.list, "\thttp-response ");
+ dump_act_rules(&http_after_res_keywords.list, "\thttp-after-response ");
+ }
+ if (section == CFG_CRTLIST) {
+ /* displays the keyword available for the crt-lists */
+ extern struct ssl_crtlist_kw ssl_crtlist_kws[] __maybe_unused;
+ const struct ssl_crtlist_kw *sbkwp __maybe_unused, *sbkwn __maybe_unused;
+
+#if defined(USE_OPENSSL)
+ for (sbkwn = sbkwp = NULL;; sbkwp = sbkwn) {
+ for (index = 0; ssl_crtlist_kws[index].kw != NULL; index++) {
+ if (strordered(sbkwp ? sbkwp->kw : NULL,
+ ssl_crtlist_kws[index].kw,
+ sbkwn != sbkwp ? sbkwn->kw : NULL))
+ sbkwn = &ssl_crtlist_kws[index];
+ }
+ if (sbkwn == sbkwp)
+ break;
+ if (!sbkwn->skip)
+ printf("\t%s\n", sbkwn->kw);
+ else
+ printf("\t%s +%d\n", sbkwn->kw, sbkwn->skip);
+ }
+#endif
+
+ }
+ }
+}
+
+/* these are the config sections handled by default */
+REGISTER_CONFIG_SECTION("listen", cfg_parse_listen, NULL);
+REGISTER_CONFIG_SECTION("frontend", cfg_parse_listen, NULL);
+REGISTER_CONFIG_SECTION("backend", cfg_parse_listen, NULL);
+REGISTER_CONFIG_SECTION("defaults", cfg_parse_listen, NULL);
+REGISTER_CONFIG_SECTION("global", cfg_parse_global, NULL);
+REGISTER_CONFIG_SECTION("userlist", cfg_parse_users, NULL);
+REGISTER_CONFIG_SECTION("peers", cfg_parse_peers, NULL);
+REGISTER_CONFIG_SECTION("mailers", cfg_parse_mailers, NULL);
+REGISTER_CONFIG_SECTION("namespace_list", cfg_parse_netns, NULL);
+
+static struct cfg_kw_list cfg_kws = {{ },{
+ { CFG_GLOBAL, "default-path", cfg_parse_global_def_path },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/channel.c b/src/channel.c
new file mode 100644
index 0000000..9970575
--- /dev/null
+++ b/src/channel.c
@@ -0,0 +1,591 @@
+/*
+ * Channel management functions.
+ *
+ * Copyright 2000-2014 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/channel.h>
+
+
+/* Schedule up to <bytes> more bytes to be forwarded via the channel without
+ * notifying the owner task. Any data pending in the buffer are scheduled to be
+ * sent as well, within the limit of the number of bytes to forward. This must
+ * be the only method to use to schedule bytes to be forwarded. If the requested
+ * number is too large, it is automatically adjusted. The number of bytes taken
+ * into account is returned. Directly touching ->to_forward will cause lockups
+ * when buf->o goes down to zero if nobody is ready to push the remaining data.
+ */
+unsigned long long __channel_forward(struct channel *chn, unsigned long long bytes)
+{
+ unsigned int budget;
+ unsigned int forwarded;
+
+ /* This is more of a safety measure as it's not supposed to happen in
+ * regular code paths.
+ */
+ if (unlikely(chn->to_forward == CHN_INFINITE_FORWARD)) {
+ c_adv(chn, ci_data(chn));
+ return bytes;
+ }
+
+ /* Bound the transferred size to a 32-bit count since all our values
+ * are 32-bit, and we don't want to reach CHN_INFINITE_FORWARD.
+ */
+ budget = MIN(bytes, CHN_INFINITE_FORWARD - 1);
+
+ /* transfer as much as we can of buf->i */
+ forwarded = MIN(ci_data(chn), budget);
+ c_adv(chn, forwarded);
+ budget -= forwarded;
+
+ if (!budget)
+ return forwarded;
+
+ /* Now we must ensure chn->to_forward sats below CHN_INFINITE_FORWARD,
+ * which also implies it won't overflow. It's less operations in 64-bit.
+ */
+ bytes = (unsigned long long)chn->to_forward + budget;
+ if (bytes >= CHN_INFINITE_FORWARD)
+ bytes = CHN_INFINITE_FORWARD - 1;
+ budget = bytes - chn->to_forward;
+
+ chn->to_forward += budget;
+ forwarded += budget;
+ return forwarded;
+}
+
+/* writes <len> bytes from message <msg> to the channel's buffer. Returns -1 in
+ * case of success, -2 if the message is larger than the buffer size, or the
+ * number of bytes available otherwise. The send limit is automatically
+ * adjusted to the amount of data written. FIXME-20060521: handle unaligned
+ * data. Note: this function appends data to the buffer's output and possibly
+ * overwrites any pending input data which are assumed not to exist.
+ */
+int co_inject(struct channel *chn, const char *msg, int len)
+{
+ int max;
+
+ if (len == 0)
+ return -1;
+
+ if (len < 0 || len > c_size(chn)) {
+ /* we can't write this chunk and will never be able to, because
+ * it is larger than the buffer. This must be reported as an
+ * error. Then we return -2 so that writers that don't care can
+ * ignore it and go on, and others can check for this value.
+ */
+ return -2;
+ }
+
+ c_realign_if_empty(chn);
+ max = b_contig_space(&chn->buf);
+ if (len > max)
+ return max;
+
+ memcpy(co_tail(chn), msg, len);
+ b_add(&chn->buf, len);
+ c_adv(chn, len);
+ chn->total += len;
+ return -1;
+}
+
+/* Tries to copy character <c> into the channel's buffer after some length
+ * controls. The chn->o and to_forward pointers are updated. If the channel
+ * input is closed, -2 is returned. If there is not enough room left in the
+ * buffer, -1 is returned. Otherwise the number of bytes copied is returned
+ * (1). Channel flag READ_PARTIAL is updated if some data can be transferred.
+ */
+int ci_putchr(struct channel *chn, char c)
+{
+ if (unlikely(channel_input_closed(chn)))
+ return -2;
+
+ if (!channel_may_recv(chn))
+ return -1;
+
+ *ci_tail(chn) = c;
+
+ b_add(&chn->buf, 1);
+ chn->flags |= CF_READ_PARTIAL;
+
+ if (chn->to_forward >= 1) {
+ if (chn->to_forward != CHN_INFINITE_FORWARD)
+ chn->to_forward--;
+ c_adv(chn, 1);
+ }
+
+ chn->total++;
+ return 1;
+}
+
+/* Tries to copy block <blk> at once into the channel's buffer after length
+ * controls. The chn->o and to_forward pointers are updated. If the channel
+ * input is closed, -2 is returned. If the block is too large for this buffer,
+ * -3 is returned. If there is not enough room left in the buffer, -1 is
+ * returned. Otherwise the number of bytes copied is returned (0 being a valid
+ * number). Channel flag READ_PARTIAL is updated if some data can be
+ * transferred.
+ */
+int ci_putblk(struct channel *chn, const char *blk, int len)
+{
+ int max;
+
+ if (unlikely(channel_input_closed(chn)))
+ return -2;
+
+ if (len < 0)
+ return -3;
+
+ max = channel_recv_limit(chn);
+ if (unlikely(len > max - c_data(chn))) {
+ /* we can't write this chunk right now because the buffer is
+ * almost full or because the block is too large. Returns
+ * -3 if block is too large for this buffer. Or -1 if the
+ * room left is not large enough.
+ */
+ if (len > max)
+ return -3;
+
+ return -1;
+ }
+
+ if (unlikely(len == 0))
+ return 0;
+
+ /* OK so the data fits in the buffer in one or two blocks */
+ max = b_contig_space(&chn->buf);
+ memcpy(ci_tail(chn), blk, MIN(len, max));
+ if (len > max)
+ memcpy(c_orig(chn), blk + max, len - max);
+
+ b_add(&chn->buf, len);
+ channel_add_input(chn, len);
+ return len;
+}
+
+/* Locates the longest part of the channel's output buffer that is composed
+ * exclusively of characters not in the <delim> set, and delimited by one of
+ * these characters, and returns the initial part and the first of such
+ * delimiters. A single escape character in <escape> may be specified so that
+ * when not 0 and found, the character that follows it is never taken as a
+ * delimiter. Note that <delim> cannot contain the zero byte, hence this
+ * function is not usable with byte zero as a delimiter.
+ *
+ * Return values :
+ * >0 : number of bytes read. Includes the sep if present before len or end.
+ * =0 : no sep before end found. <str> is left undefined.
+ * <0 : no more bytes readable because output is shut.
+ * The channel status is not changed. The caller must call co_skip() to
+ * update it. One of the delimiters is waited for as long as neither the buffer
+ * nor the output are full. If either of them is full, the string may be
+ * returned as is, without the delimiter.
+ */
+int co_getdelim(const struct channel *chn, char *str, int len, const char *delim, char escape)
+{
+ uchar delim_map[256 / 8];
+ int found, escaped;
+ uint pos, bit;
+ int ret, max;
+ uchar b;
+ char *p;
+
+ ret = 0;
+ max = len;
+
+ /* closed or empty + imminent close = -1; empty = 0 */
+ if (unlikely((chn->flags & CF_SHUTW) || channel_is_empty(chn))) {
+ if (chn->flags & (CF_SHUTW|CF_SHUTW_NOW))
+ ret = -1;
+ goto out;
+ }
+
+ p = co_head(chn);
+
+ if (max > co_data(chn)) {
+ max = co_data(chn);
+ str[max-1] = 0;
+ }
+
+ /* create the byte map */
+ memset(delim_map, 0, sizeof(delim_map));
+ while ((b = *delim)) {
+ pos = b >> 3;
+ bit = b & 7;
+ delim_map[pos] |= 1 << bit;
+ delim++;
+ }
+
+ found = escaped = 0;
+ while (max) {
+ *str++ = b = *p;
+ ret++;
+ max--;
+
+ if (escape && (escaped || *p == escape)) {
+ escaped = !escaped;
+ goto skip;
+ }
+
+ pos = b >> 3;
+ bit = b & 7;
+ if (delim_map[pos] & (1 << bit)) {
+ found = 1;
+ break;
+ }
+ skip:
+ p = b_next(&chn->buf, p);
+ }
+
+ if (ret > 0 && ret < len &&
+ (ret < co_data(chn) || channel_may_recv(chn)) &&
+ !found &&
+ !(chn->flags & (CF_SHUTW|CF_SHUTW_NOW)))
+ ret = 0;
+ out:
+ if (max)
+ *str = 0;
+ return ret;
+}
+
+/* Gets one text word out of a channel's buffer from a stream connector.
+ * Return values :
+ * >0 : number of bytes read. Includes the sep if present before len or end.
+ * =0 : no sep before end found. <str> is left undefined.
+ * <0 : no more bytes readable because output is shut.
+ * The channel status is not changed. The caller must call co_skip() to
+ * update it. The line separator is waited for as long as neither the buffer
+ * nor the output are full. If either of them is full, the string may be
+ * returned as is, without the line separator.
+ */
+int co_getword(const struct channel *chn, char *str, int len, char sep)
+{
+ int ret, max;
+ char *p;
+
+ ret = 0;
+ max = len;
+
+ /* closed or empty + imminent close = -1; empty = 0 */
+ if (unlikely((chn->flags & CF_SHUTW) || channel_is_empty(chn))) {
+ if (chn->flags & (CF_SHUTW|CF_SHUTW_NOW))
+ ret = -1;
+ goto out;
+ }
+
+ p = co_head(chn);
+
+ if (max > co_data(chn)) {
+ max = co_data(chn);
+ str[max-1] = 0;
+ }
+ while (max) {
+ *str++ = *p;
+ ret++;
+ max--;
+
+ if (*p == sep)
+ break;
+ p = b_next(&chn->buf, p);
+ }
+ if (ret > 0 && ret < len &&
+ (ret < co_data(chn) || channel_may_recv(chn)) &&
+ *(str-1) != sep &&
+ !(chn->flags & (CF_SHUTW|CF_SHUTW_NOW)))
+ ret = 0;
+ out:
+ if (max)
+ *str = 0;
+ return ret;
+}
+
+/* Gets one text line out of a channel's buffer from a stream connector.
+ * Return values :
+ * >0 : number of bytes read. Includes the \n if present before len or end.
+ * =0 : no '\n' before end found. <str> is left undefined.
+ * <0 : no more bytes readable because output is shut.
+ * The channel status is not changed. The caller must call co_skip() to
+ * update it. The '\n' is waited for as long as neither the buffer nor the
+ * output are full. If either of them is full, the string may be returned
+ * as is, without the '\n'.
+ */
+int co_getline(const struct channel *chn, char *str, int len)
+{
+ int ret, max;
+ char *p;
+
+ ret = 0;
+ max = len;
+
+ /* closed or empty + imminent close = -1; empty = 0 */
+ if (unlikely((chn->flags & CF_SHUTW) || channel_is_empty(chn))) {
+ if (chn->flags & (CF_SHUTW|CF_SHUTW_NOW))
+ ret = -1;
+ goto out;
+ }
+
+ p = co_head(chn);
+
+ if (max > co_data(chn)) {
+ max = co_data(chn);
+ str[max-1] = 0;
+ }
+ while (max) {
+ *str++ = *p;
+ ret++;
+ max--;
+
+ if (*p == '\n')
+ break;
+ p = b_next(&chn->buf, p);
+ }
+ if (ret > 0 && ret < len &&
+ (ret < co_data(chn) || channel_may_recv(chn)) &&
+ *(str-1) != '\n' &&
+ !(chn->flags & (CF_SHUTW|CF_SHUTW_NOW)))
+ ret = 0;
+ out:
+ if (max)
+ *str = 0;
+ return ret;
+}
+
+/* Gets one char of data from a channel's buffer,
+ * Return values :
+ * 1 : number of bytes read, equal to requested size.
+ * =0 : not enough data available. <c> is left undefined.
+ * <0 : no more bytes readable because output is shut.
+ * The channel status is not changed. The caller must call co_skip() to
+ * update it.
+ */
+int co_getchar(const struct channel *chn, char *c)
+{
+ if (chn->flags & CF_SHUTW)
+ return -1;
+
+ if (unlikely(co_data(chn) == 0)) {
+ if (chn->flags & (CF_SHUTW|CF_SHUTW_NOW))
+ return -1;
+ return 0;
+ }
+
+ *c = *(co_head(chn));
+ return 1;
+}
+
+/* Gets one full block of data at once from a channel's buffer, optionally from
+ * a specific offset. Return values :
+ * >0 : number of bytes read, equal to requested size.
+ * =0 : not enough data available. <blk> is left undefined.
+ * <0 : no more bytes readable because output is shut.
+ * The channel status is not changed. The caller must call co_skip() to
+ * update it.
+ */
+int co_getblk(const struct channel *chn, char *blk, int len, int offset)
+{
+ if (chn->flags & CF_SHUTW)
+ return -1;
+
+ if (len + offset > co_data(chn)) {
+ if (chn->flags & (CF_SHUTW|CF_SHUTW_NOW))
+ return -1;
+ return 0;
+ }
+
+ return b_getblk(&chn->buf, blk, len, offset);
+}
+
+/* Gets one or two blocks of data at once from a channel's output buffer.
+ * Return values :
+ * >0 : number of blocks filled (1 or 2). blk1 is always filled before blk2.
+ * =0 : not enough data available. <blk*> are left undefined.
+ * <0 : no more bytes readable because output is shut.
+ * The channel status is not changed. The caller must call co_skip() to
+ * update it. Unused buffers are left in an undefined state.
+ */
+int co_getblk_nc(const struct channel *chn, const char **blk1, size_t *len1, const char **blk2, size_t *len2)
+{
+ if (unlikely(co_data(chn) == 0)) {
+ if (chn->flags & (CF_SHUTW|CF_SHUTW_NOW))
+ return -1;
+ return 0;
+ }
+
+ return b_getblk_nc(&chn->buf, blk1, len1, blk2, len2, 0, co_data(chn));
+}
+
+/* Gets one text line out of a channel's output buffer from a stream connector.
+ * Return values :
+ * >0 : number of blocks returned (1 or 2). blk1 is always filled before blk2.
+ * =0 : not enough data available.
+ * <0 : no more bytes readable because output is shut.
+ * The '\n' is waited for as long as neither the buffer nor the output are
+ * full. If either of them is full, the string may be returned as is, without
+ * the '\n'. Unused buffers are left in an undefined state.
+ */
+int co_getline_nc(const struct channel *chn,
+ const char **blk1, size_t *len1,
+ const char **blk2, size_t *len2)
+{
+ int retcode;
+ int l;
+
+ retcode = co_getblk_nc(chn, blk1, len1, blk2, len2);
+ if (unlikely(retcode <= 0))
+ return retcode;
+
+ for (l = 0; l < *len1 && (*blk1)[l] != '\n'; l++);
+ if (l < *len1 && (*blk1)[l] == '\n') {
+ *len1 = l + 1;
+ return 1;
+ }
+
+ if (retcode >= 2) {
+ for (l = 0; l < *len2 && (*blk2)[l] != '\n'; l++);
+ if (l < *len2 && (*blk2)[l] == '\n') {
+ *len2 = l + 1;
+ return 2;
+ }
+ }
+
+ if (chn->flags & (CF_SHUTW|CF_SHUTW_NOW)) {
+ /* If we have found no LF and the buffer is shut, then
+ * the resulting string is made of the concatenation of
+ * the pending blocks (1 or 2).
+ */
+ return retcode;
+ }
+
+ /* No LF yet and not shut yet */
+ return 0;
+}
+
+/* Gets one full block of data at once from a channel's input buffer.
+ * This function can return the data slitted in one or two blocks.
+ * Return values :
+ * >0 : number of blocks returned (1 or 2). blk1 is always filled before blk2.
+ * =0 : not enough data available.
+ * <0 : no more bytes readable because input is shut.
+ */
+int ci_getblk_nc(const struct channel *chn,
+ char **blk1, size_t *len1,
+ char **blk2, size_t *len2)
+{
+ if (unlikely(ci_data(chn) == 0)) {
+ if (chn->flags & CF_SHUTR)
+ return -1;
+ return 0;
+ }
+
+ if (unlikely(ci_head(chn) + ci_data(chn) > c_wrap(chn))) {
+ *blk1 = ci_head(chn);
+ *len1 = c_wrap(chn) - ci_head(chn);
+ *blk2 = c_orig(chn);
+ *len2 = ci_data(chn) - *len1;
+ return 2;
+ }
+
+ *blk1 = ci_head(chn);
+ *len1 = ci_data(chn);
+ return 1;
+}
+
+/* Gets one text line out of a channel's input buffer from a stream connector.
+ * Return values :
+ * >0 : number of blocks returned (1 or 2). blk1 is always filled before blk2.
+ * =0 : not enough data available.
+ * <0 : no more bytes readable because output is shut.
+ * The '\n' is waited for as long as neither the buffer nor the input are
+ * full. If either of them is full, the string may be returned as is, without
+ * the '\n'. Unused buffers are left in an undefined state.
+ */
+int ci_getline_nc(const struct channel *chn,
+ char **blk1, size_t *len1,
+ char **blk2, size_t *len2)
+{
+ int retcode;
+ int l;
+
+ retcode = ci_getblk_nc(chn, blk1, len1, blk2, len2);
+ if (unlikely(retcode <= 0))
+ return retcode;
+
+ for (l = 0; l < *len1 && (*blk1)[l] != '\n'; l++);
+ if (l < *len1 && (*blk1)[l] == '\n') {
+ *len1 = l + 1;
+ return 1;
+ }
+
+ if (retcode >= 2) {
+ for (l = 0; l < *len2 && (*blk2)[l] != '\n'; l++);
+ if (l < *len2 && (*blk2)[l] == '\n') {
+ *len2 = l + 1;
+ return 2;
+ }
+ }
+
+ if (chn->flags & CF_SHUTW) {
+ /* If we have found no LF and the buffer is shut, then
+ * the resulting string is made of the concatenation of
+ * the pending blocks (1 or 2).
+ */
+ return retcode;
+ }
+
+ /* No LF yet and not shut yet */
+ return 0;
+}
+
+/* Inserts <str> followed by "\r\n" at position <pos> relative to channel <c>'s
+ * input head. The <len> argument informs about the length of string <str> so
+ * that we don't have to measure it. <str> must be a valid pointer and must not
+ * include the trailing "\r\n".
+ *
+ * The number of bytes added is returned on success. 0 is returned on failure.
+ */
+int ci_insert_line2(struct channel *c, int pos, const char *str, int len)
+{
+ struct buffer *b = &c->buf;
+ char *dst = c_ptr(c, pos);
+ int delta;
+
+ delta = len + 2;
+
+ if (__b_tail(b) + delta >= b_wrap(b))
+ return 0; /* no space left */
+
+ if (b_data(b) &&
+ b_tail(b) + delta > b_head(b) &&
+ b_head(b) >= b_tail(b))
+ return 0; /* no space left before wrapping data */
+
+ /* first, protect the end of the buffer */
+ memmove(dst + delta, dst, b_tail(b) - dst);
+
+ /* now, copy str over dst */
+ memcpy(dst, str, len);
+ dst[len] = '\r';
+ dst[len + 1] = '\n';
+
+ b_add(b, delta);
+ return delta;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/check.c b/src/check.c
new file mode 100644
index 0000000..a4cd851
--- /dev/null
+++ b/src/check.c
@@ -0,0 +1,2363 @@
+/*
+ * Health-checks functions.
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ * Copyright 2007-2009 Krzysztof Piotr Oledzki <ole@ans.pl>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/chunk.h>
+#include <haproxy/dgram.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/extcheck.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/h1.h>
+#include <haproxy/http.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/mailers.h>
+#include <haproxy/port_range.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/queue.h>
+#include <haproxy/regex.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/sample.h>
+#include <haproxy/server.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/task.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/thread.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/trace.h>
+#include <haproxy/vars.h>
+
+/* trace source and events */
+static void check_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+/* The event representation is split like this :
+ * check - check
+ *
+ * CHECK_EV_* macros are defined in <haproxy/check.h>
+ */
+static const struct trace_event check_trace_events[] = {
+ { .mask = CHK_EV_TASK_WAKE, .name = "task_wake", .desc = "Check task woken up" },
+ { .mask = CHK_EV_HCHK_START, .name = "hchck_start", .desc = "Health-check started" },
+ { .mask = CHK_EV_HCHK_WAKE, .name = "hchck_wake", .desc = "Health-check woken up" },
+ { .mask = CHK_EV_HCHK_RUN, .name = "hchck_run", .desc = "Health-check running" },
+ { .mask = CHK_EV_HCHK_END, .name = "hchck_end", .desc = "Health-check terminated" },
+ { .mask = CHK_EV_HCHK_SUCC, .name = "hchck_succ", .desc = "Health-check success" },
+ { .mask = CHK_EV_HCHK_ERR, .name = "hchck_err", .desc = "Health-check failure" },
+
+ { .mask = CHK_EV_TCPCHK_EVAL, .name = "tcp_check_eval", .desc = "tcp-check rules evaluation" },
+ { .mask = CHK_EV_TCPCHK_ERR, .name = "tcp_check_err", .desc = "tcp-check evaluation error" },
+ { .mask = CHK_EV_TCPCHK_CONN, .name = "tcp_check_conn", .desc = "tcp-check connection rule" },
+ { .mask = CHK_EV_TCPCHK_SND, .name = "tcp_check_send", .desc = "tcp-check send rule" },
+ { .mask = CHK_EV_TCPCHK_EXP, .name = "tcp_check_expect", .desc = "tcp-check expect rule" },
+ { .mask = CHK_EV_TCPCHK_ACT, .name = "tcp_check_action", .desc = "tcp-check action rule" },
+
+ { .mask = CHK_EV_RX_DATA, .name = "rx_data", .desc = "receipt of data" },
+ { .mask = CHK_EV_RX_BLK, .name = "rx_blk", .desc = "receipt blocked" },
+ { .mask = CHK_EV_RX_ERR, .name = "rx_err", .desc = "receipt error" },
+
+ { .mask = CHK_EV_TX_DATA, .name = "tx_data", .desc = "transmission of data" },
+ { .mask = CHK_EV_TX_BLK, .name = "tx_blk", .desc = "transmission blocked" },
+ { .mask = CHK_EV_TX_ERR, .name = "tx_err", .desc = "transmission error" },
+
+ {}
+};
+
+static const struct name_desc check_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the check */ },
+ /* arg2 */ { },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc check_trace_decoding[] = {
+#define CHK_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define CHK_VERB_MINIMAL 2
+ { .name="minimal", .desc="report info on streams and connectors" },
+#define CHK_VERB_SIMPLE 3
+ { .name="simple", .desc="add info on request and response channels" },
+#define CHK_VERB_ADVANCED 4
+ { .name="advanced", .desc="add info on channel's buffer for data and developer levels only" },
+#define CHK_VERB_COMPLETE 5
+ { .name="complete", .desc="add info on channel's buffer" },
+ { /* end */ }
+};
+
+struct trace_source trace_check = {
+ .name = IST("check"),
+ .desc = "Health-check",
+ .arg_def = TRC_ARG1_CHK, // TRACE()'s first argument is always a stream
+ .default_cb = check_trace,
+ .known_events = check_trace_events,
+ .lockon_args = check_trace_lockon_args,
+ .decoding = check_trace_decoding,
+ .report_events = ~0, // report everything by default
+};
+
+#define TRACE_SOURCE &trace_check
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+
+/* Dummy frontend used to create all checks sessions. */
+struct proxy checks_fe;
+
+
+static inline void check_trace_buf(const struct buffer *buf, size_t ofs, size_t len)
+{
+ size_t block1, block2;
+ int line, ptr, newptr;
+
+ block1 = b_contig_data(buf, ofs);
+ block2 = 0;
+ if (block1 > len)
+ block1 = len;
+ block2 = len - block1;
+
+ ofs = b_peek_ofs(buf, ofs);
+
+ line = 0;
+ ptr = ofs;
+ while (ptr < ofs + block1) {
+ newptr = dump_text_line(&trace_buf, b_orig(buf), b_size(buf), ofs + block1, &line, ptr);
+ if (newptr == ptr)
+ break;
+ ptr = newptr;
+ }
+
+ line = ptr = 0;
+ while (ptr < block2) {
+ newptr = dump_text_line(&trace_buf, b_orig(buf), b_size(buf), block2, &line, ptr);
+ if (newptr == ptr)
+ break;
+ ptr = newptr;
+ }
+}
+
+/* trace source and events */
+static void check_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct check *check = a1;
+ const struct server *srv = (check ? check->server : NULL);
+ const size_t *val = a4;
+ const char *res;
+
+ if (!check || src->verbosity < CHK_VERB_CLEAN)
+ return;
+
+ if (srv) {
+ chunk_appendf(&trace_buf, " : [%c] SRV=%s",
+ ((check->type == PR_O2_EXT_CHK) ? 'E' : (check->state & CHK_ST_AGENT ? 'A' : 'H')),
+ srv->id);
+
+ chunk_appendf(&trace_buf, " status=%d/%d %s",
+ (check->health >= check->rise) ? check->health - check->rise + 1 : check->health,
+ (check->health >= check->rise) ? check->fall : check->rise,
+ (check->health >= check->rise) ? (srv->uweight ? "UP" : "DRAIN") : "DOWN");
+ }
+ else
+ chunk_appendf(&trace_buf, " : [EMAIL]");
+
+ switch (check->result) {
+ case CHK_RES_NEUTRAL: res = "-"; break;
+ case CHK_RES_FAILED: res = "FAIL"; break;
+ case CHK_RES_PASSED: res = "PASS"; break;
+ case CHK_RES_CONDPASS: res = "COND"; break;
+ default: res = "UNK"; break;
+ }
+
+ if (src->verbosity == CHK_VERB_CLEAN)
+ return;
+
+ chunk_appendf(&trace_buf, " - last=%s(%d)/%s(%d)",
+ get_check_status_info(check->status), check->status,
+ res, check->result);
+
+ /* Display the value to the 4th argument (level > STATE) */
+ if (src->level > TRACE_LEVEL_STATE && val)
+ chunk_appendf(&trace_buf, " - VAL=%lu", (long)*val);
+
+ chunk_appendf(&trace_buf, " check=%p(0x%08x)", check, check->state);
+
+ if (src->verbosity == CHK_VERB_MINIMAL)
+ return;
+
+
+ if (check->sc) {
+ struct connection *conn = sc_conn(check->sc);
+
+ chunk_appendf(&trace_buf, " - conn=%p(0x%08x)", conn, conn ? conn->flags : 0);
+ chunk_appendf(&trace_buf, " sc=%p(0x%08x)", check->sc, check->sc->flags);
+ }
+
+ if (mask & CHK_EV_TCPCHK) {
+ const char *type;
+
+ switch (check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) {
+ case TCPCHK_RULES_PGSQL_CHK: type = "PGSQL"; break;
+ case TCPCHK_RULES_REDIS_CHK: type = "REDIS"; break;
+ case TCPCHK_RULES_SMTP_CHK: type = "SMTP"; break;
+ case TCPCHK_RULES_HTTP_CHK: type = "HTTP"; break;
+ case TCPCHK_RULES_MYSQL_CHK: type = "MYSQL"; break;
+ case TCPCHK_RULES_LDAP_CHK: type = "LDAP"; break;
+ case TCPCHK_RULES_SSL3_CHK: type = "SSL3"; break;
+ case TCPCHK_RULES_AGENT_CHK: type = "AGENT"; break;
+ case TCPCHK_RULES_SPOP_CHK: type = "SPOP"; break;
+ case TCPCHK_RULES_TCP_CHK: type = "TCP"; break;
+ default: type = "???"; break;
+ }
+ if (check->current_step)
+ chunk_appendf(&trace_buf, " - tcp-check=(%s,%d)", type, tcpcheck_get_step_id(check, NULL));
+ else
+ chunk_appendf(&trace_buf, " - tcp-check=(%s,-)", type);
+ }
+
+ /* Display bi and bo buffer info (level > USER & verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_USER) {
+ const struct buffer *buf = NULL;
+
+ chunk_appendf(&trace_buf, " bi=%u@%p+%u/%u",
+ (unsigned int)b_data(&check->bi), b_orig(&check->bi),
+ (unsigned int)b_head_ofs(&check->bi), (unsigned int)b_size(&check->bi));
+ chunk_appendf(&trace_buf, " bo=%u@%p+%u/%u",
+ (unsigned int)b_data(&check->bo), b_orig(&check->bo),
+ (unsigned int)b_head_ofs(&check->bo), (unsigned int)b_size(&check->bo));
+
+ if (src->verbosity >= CHK_VERB_ADVANCED && (mask & (CHK_EV_RX)))
+ buf = (b_is_null(&check->bi) ? NULL : &check->bi);
+ else if (src->verbosity >= CHK_VERB_ADVANCED && (mask & (CHK_EV_TX)))
+ buf = (b_is_null(&check->bo) ? NULL : &check->bo);
+
+ if (buf) {
+ if ((check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK) {
+ int full = (src->verbosity == CHK_VERB_COMPLETE);
+
+ chunk_memcat(&trace_buf, "\n\t", 2);
+ htx_dump(&trace_buf, htxbuf(buf), full);
+ }
+ else {
+ int max = ((src->verbosity == CHK_VERB_COMPLETE) ? 1024 : 256);
+
+ chunk_memcat(&trace_buf, "\n", 1);
+ if (b_data(buf) > max) {
+ check_trace_buf(buf, 0, max);
+ chunk_memcat(&trace_buf, " ...\n", 6);
+ }
+ else
+ check_trace_buf(buf, 0, b_data(buf));
+ }
+
+ }
+ }
+
+}
+
+
+/**************************************************************************/
+/************************ Handle check results ****************************/
+/**************************************************************************/
+struct check_status {
+ short result; /* one of SRV_CHK_* */
+ char *info; /* human readable short info */
+ char *desc; /* long description */
+};
+
+struct analyze_status {
+ char *desc; /* description */
+ unsigned char lr[HANA_OBS_SIZE]; /* result for l4/l7: 0 = ignore, 1 - error, 2 - OK */
+};
+
+static const struct check_status check_statuses[HCHK_STATUS_SIZE] = {
+ [HCHK_STATUS_UNKNOWN] = { CHK_RES_UNKNOWN, "UNK", "Unknown" },
+ [HCHK_STATUS_INI] = { CHK_RES_UNKNOWN, "INI", "Initializing" },
+ [HCHK_STATUS_START] = { /* SPECIAL STATUS*/ },
+
+ /* Below we have finished checks */
+ [HCHK_STATUS_CHECKED] = { CHK_RES_NEUTRAL, "CHECKED", "No status change" },
+ [HCHK_STATUS_HANA] = { CHK_RES_FAILED, "HANA", "Health analyze" },
+
+ [HCHK_STATUS_SOCKERR] = { CHK_RES_FAILED, "SOCKERR", "Socket error" },
+
+ [HCHK_STATUS_L4OK] = { CHK_RES_PASSED, "L4OK", "Layer4 check passed" },
+ [HCHK_STATUS_L4TOUT] = { CHK_RES_FAILED, "L4TOUT", "Layer4 timeout" },
+ [HCHK_STATUS_L4CON] = { CHK_RES_FAILED, "L4CON", "Layer4 connection problem" },
+
+ [HCHK_STATUS_L6OK] = { CHK_RES_PASSED, "L6OK", "Layer6 check passed" },
+ [HCHK_STATUS_L6TOUT] = { CHK_RES_FAILED, "L6TOUT", "Layer6 timeout" },
+ [HCHK_STATUS_L6RSP] = { CHK_RES_FAILED, "L6RSP", "Layer6 invalid response" },
+
+ [HCHK_STATUS_L7TOUT] = { CHK_RES_FAILED, "L7TOUT", "Layer7 timeout" },
+ [HCHK_STATUS_L7RSP] = { CHK_RES_FAILED, "L7RSP", "Layer7 invalid response" },
+
+ [HCHK_STATUS_L57DATA] = { /* DUMMY STATUS */ },
+
+ [HCHK_STATUS_L7OKD] = { CHK_RES_PASSED, "L7OK", "Layer7 check passed" },
+ [HCHK_STATUS_L7OKCD] = { CHK_RES_CONDPASS, "L7OKC", "Layer7 check conditionally passed" },
+ [HCHK_STATUS_L7STS] = { CHK_RES_FAILED, "L7STS", "Layer7 wrong status" },
+
+ [HCHK_STATUS_PROCERR] = { CHK_RES_FAILED, "PROCERR", "External check error" },
+ [HCHK_STATUS_PROCTOUT] = { CHK_RES_FAILED, "PROCTOUT", "External check timeout" },
+ [HCHK_STATUS_PROCOK] = { CHK_RES_PASSED, "PROCOK", "External check passed" },
+};
+
+static const struct analyze_status analyze_statuses[HANA_STATUS_SIZE] = { /* 0: ignore, 1: error, 2: OK */
+ [HANA_STATUS_UNKNOWN] = { "Unknown", { 0, 0 }},
+
+ [HANA_STATUS_L4_OK] = { "L4 successful connection", { 2, 0 }},
+ [HANA_STATUS_L4_ERR] = { "L4 unsuccessful connection", { 1, 1 }},
+
+ [HANA_STATUS_HTTP_OK] = { "Correct http response", { 0, 2 }},
+ [HANA_STATUS_HTTP_STS] = { "Wrong http response", { 0, 1 }},
+ [HANA_STATUS_HTTP_HDRRSP] = { "Invalid http response (headers)", { 0, 1 }},
+ [HANA_STATUS_HTTP_RSP] = { "Invalid http response", { 0, 1 }},
+
+ [HANA_STATUS_HTTP_READ_ERROR] = { "Read error (http)", { 0, 1 }},
+ [HANA_STATUS_HTTP_READ_TIMEOUT] = { "Read timeout (http)", { 0, 1 }},
+ [HANA_STATUS_HTTP_BROKEN_PIPE] = { "Close from server (http)", { 0, 1 }},
+};
+
+/* checks if <err> is a real error for errno or one that can be ignored, and
+ * return 0 for these ones or <err> for real ones.
+ */
+static inline int unclean_errno(int err)
+{
+ if (err == EAGAIN || err == EWOULDBLOCK || err == EINPROGRESS ||
+ err == EISCONN || err == EALREADY)
+ return 0;
+ return err;
+}
+
+/* Converts check_status code to result code */
+short get_check_status_result(short check_status)
+{
+ if (check_status < HCHK_STATUS_SIZE)
+ return check_statuses[check_status].result;
+ else
+ return check_statuses[HCHK_STATUS_UNKNOWN].result;
+}
+
+/* Converts check_status code to description */
+const char *get_check_status_description(short check_status) {
+
+ const char *desc;
+
+ if (check_status < HCHK_STATUS_SIZE)
+ desc = check_statuses[check_status].desc;
+ else
+ desc = NULL;
+
+ if (desc && *desc)
+ return desc;
+ else
+ return check_statuses[HCHK_STATUS_UNKNOWN].desc;
+}
+
+/* Converts check_status code to short info */
+const char *get_check_status_info(short check_status)
+{
+ const char *info;
+
+ if (check_status < HCHK_STATUS_SIZE)
+ info = check_statuses[check_status].info;
+ else
+ info = NULL;
+
+ if (info && *info)
+ return info;
+ else
+ return check_statuses[HCHK_STATUS_UNKNOWN].info;
+}
+
+/* Convert analyze_status to description */
+const char *get_analyze_status(short analyze_status) {
+
+ const char *desc;
+
+ if (analyze_status < HANA_STATUS_SIZE)
+ desc = analyze_statuses[analyze_status].desc;
+ else
+ desc = NULL;
+
+ if (desc && *desc)
+ return desc;
+ else
+ return analyze_statuses[HANA_STATUS_UNKNOWN].desc;
+}
+
+/* Sets check->status, update check->duration and fill check->result with an
+ * adequate CHK_RES_* value. The new check->health is computed based on the
+ * result.
+ *
+ * Shows information in logs about failed health check if server is UP or
+ * succeeded health checks if server is DOWN.
+ */
+void set_server_check_status(struct check *check, short status, const char *desc)
+{
+ struct server *s = check->server;
+ short prev_status = check->status;
+ int report = 0;
+
+ TRACE_POINT(CHK_EV_HCHK_RUN, check);
+
+ if (status == HCHK_STATUS_START) {
+ check->result = CHK_RES_UNKNOWN; /* no result yet */
+ check->desc[0] = '\0';
+ check->start = now;
+ return;
+ }
+
+ if (!check->status)
+ return;
+
+ if (desc && *desc) {
+ strncpy(check->desc, desc, HCHK_DESC_LEN-1);
+ check->desc[HCHK_DESC_LEN-1] = '\0';
+ } else
+ check->desc[0] = '\0';
+
+ check->status = status;
+ if (check_statuses[status].result)
+ check->result = check_statuses[status].result;
+
+ if (status == HCHK_STATUS_HANA)
+ check->duration = -1;
+ else if (!tv_iszero(&check->start)) {
+ /* set_server_check_status() may be called more than once */
+ check->duration = tv_ms_elapsed(&check->start, &now);
+ tv_zero(&check->start);
+ }
+
+ /* no change is expected if no state change occurred */
+ if (check->result == CHK_RES_NEUTRAL)
+ return;
+
+ /* If the check was really just sending a mail, it won't have an
+ * associated server, so we're done now.
+ */
+ if (!s)
+ return;
+ report = 0;
+
+
+ switch (check->result) {
+ case CHK_RES_FAILED:
+ /* Failure to connect to the agent as a secondary check should not
+ * cause the server to be marked down.
+ */
+ if ((!(check->state & CHK_ST_AGENT) ||
+ (check->status >= HCHK_STATUS_L57DATA)) &&
+ (check->health > 0)) {
+ _HA_ATOMIC_INC(&s->counters.failed_checks);
+ report = 1;
+ check->health--;
+ if (check->health < check->rise)
+ check->health = 0;
+ }
+ break;
+
+ case CHK_RES_PASSED:
+ case CHK_RES_CONDPASS:
+ if (check->health < check->rise + check->fall - 1) {
+ report = 1;
+ check->health++;
+
+ if (check->health >= check->rise)
+ check->health = check->rise + check->fall - 1; /* OK now */
+ }
+
+ /* clear consecutive_errors if observing is enabled */
+ if (s->onerror)
+ s->consecutive_errors = 0;
+ break;
+
+ default:
+ break;
+ }
+
+ if (s->proxy->options2 & PR_O2_LOGHCHKS &&
+ (status != prev_status || report)) {
+ chunk_printf(&trash,
+ "%s check for %sserver %s/%s %s%s",
+ (check->state & CHK_ST_AGENT) ? "Agent" : "Health",
+ s->flags & SRV_F_BACKUP ? "backup " : "",
+ s->proxy->id, s->id,
+ (check->result == CHK_RES_CONDPASS) ? "conditionally ":"",
+ (check->result >= CHK_RES_PASSED) ? "succeeded" : "failed");
+
+ srv_append_status(&trash, s, check, -1, 0);
+
+ chunk_appendf(&trash, ", status: %d/%d %s",
+ (check->health >= check->rise) ? check->health - check->rise + 1 : check->health,
+ (check->health >= check->rise) ? check->fall : check->rise,
+ (check->health >= check->rise) ? (s->uweight ? "UP" : "DRAIN") : "DOWN");
+
+ ha_warning("%s.\n", trash.area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.area);
+ send_email_alert(s, LOG_INFO, "%s", trash.area);
+ }
+}
+
+/* Marks the check <check>'s server down if the current check is already failed
+ * and the server is not down yet nor in maintenance.
+ */
+void check_notify_failure(struct check *check)
+{
+ struct server *s = check->server;
+
+ /* The agent secondary check should only cause a server to be marked
+ * as down if check->status is HCHK_STATUS_L7STS, which indicates
+ * that the agent returned "fail", "stopped" or "down".
+ * The implication here is that failure to connect to the agent
+ * as a secondary check should not cause the server to be marked
+ * down. */
+ if ((check->state & CHK_ST_AGENT) && check->status != HCHK_STATUS_L7STS)
+ return;
+
+ if (check->health > 0)
+ return;
+
+ TRACE_STATE("health-check failed, set server DOWN", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ /* We only report a reason for the check if we did not do so previously */
+ srv_set_stopped(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
+}
+
+/* Marks the check <check> as valid and tries to set its server up, provided
+ * it isn't in maintenance, it is not tracking a down server and other checks
+ * comply. The rule is simple : by default, a server is up, unless any of the
+ * following conditions is true :
+ * - health check failed (check->health < rise)
+ * - agent check failed (agent->health < rise)
+ * - the server tracks a down server (track && track->state == STOPPED)
+ * Note that if the server has a slowstart, it will switch to STARTING instead
+ * of RUNNING. Also, only the health checks support the nolb mode, so the
+ * agent's success may not take the server out of this mode.
+ */
+void check_notify_success(struct check *check)
+{
+ struct server *s = check->server;
+
+ if (s->next_admin & SRV_ADMF_MAINT)
+ return;
+
+ if (s->track && s->track->next_state == SRV_ST_STOPPED)
+ return;
+
+ if ((s->check.state & CHK_ST_ENABLED) && (s->check.health < s->check.rise))
+ return;
+
+ if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
+ return;
+
+ if ((check->state & CHK_ST_AGENT) && s->next_state == SRV_ST_STOPPING)
+ return;
+
+ TRACE_STATE("health-check succeeded, set server RUNNING", CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
+ srv_set_running(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
+}
+
+/* Marks the check <check> as valid and tries to set its server into stopping mode
+ * if it was running or starting, and provided it isn't in maintenance and other
+ * checks comply. The conditions for the server to be marked in stopping mode are
+ * the same as for it to be turned up. Also, only the health checks support the
+ * nolb mode.
+ */
+void check_notify_stopping(struct check *check)
+{
+ struct server *s = check->server;
+
+ if (s->next_admin & SRV_ADMF_MAINT)
+ return;
+
+ if (check->state & CHK_ST_AGENT)
+ return;
+
+ if (s->track && s->track->next_state == SRV_ST_STOPPED)
+ return;
+
+ if ((s->check.state & CHK_ST_ENABLED) && (s->check.health < s->check.rise))
+ return;
+
+ if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
+ return;
+
+ TRACE_STATE("health-check condionnaly succeeded, set server STOPPING", CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
+ srv_set_stopping(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
+}
+
+/* note: use health_adjust() only, which first checks that the observe mode is
+ * enabled. This will take the server lock if needed.
+ */
+void __health_adjust(struct server *s, short status)
+{
+ int failed;
+ int expire;
+
+ if (s->observe >= HANA_OBS_SIZE)
+ return;
+
+ if (status >= HANA_STATUS_SIZE || !analyze_statuses[status].desc)
+ return;
+
+ switch (analyze_statuses[status].lr[s->observe - 1]) {
+ case 1:
+ failed = 1;
+ break;
+
+ case 2:
+ failed = 0;
+ break;
+
+ default:
+ return;
+ }
+
+ if (!failed) {
+ /* good: clear consecutive_errors */
+ s->consecutive_errors = 0;
+ return;
+ }
+
+ _HA_ATOMIC_INC(&s->consecutive_errors);
+
+ if (s->consecutive_errors < s->consecutive_errors_limit)
+ return;
+
+ chunk_printf(&trash, "Detected %d consecutive errors, last one was: %s",
+ s->consecutive_errors, get_analyze_status(status));
+
+ if (s->check.fastinter)
+ expire = tick_add(now_ms, MS_TO_TICKS(s->check.fastinter));
+ else
+ expire = TICK_ETERNITY;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
+
+ switch (s->onerror) {
+ case HANA_ONERR_FASTINTER:
+ /* force fastinter - nothing to do here as all modes force it */
+ break;
+
+ case HANA_ONERR_SUDDTH:
+ /* simulate a pre-fatal failed health check */
+ if (s->check.health > s->check.rise)
+ s->check.health = s->check.rise + 1;
+
+ /* fall through */
+
+ case HANA_ONERR_FAILCHK:
+ /* simulate a failed health check */
+ set_server_check_status(&s->check, HCHK_STATUS_HANA,
+ trash.area);
+ check_notify_failure(&s->check);
+ break;
+
+ case HANA_ONERR_MARKDWN:
+ /* mark server down */
+ s->check.health = s->check.rise;
+ set_server_check_status(&s->check, HCHK_STATUS_HANA,
+ trash.area);
+ check_notify_failure(&s->check);
+ break;
+
+ default:
+ /* write a warning? */
+ break;
+ }
+
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+
+ s->consecutive_errors = 0;
+ _HA_ATOMIC_INC(&s->counters.failed_hana);
+
+ if (tick_isset(expire) && tick_is_lt(expire, s->check.task->expire)) {
+ /* requeue check task with new expire */
+ task_schedule(s->check.task, expire);
+ }
+}
+
+/* Checks the connection. If an error has already been reported or the socket is
+ * closed, keep errno intact as it is supposed to contain the valid error code.
+ * If no error is reported, check the socket's error queue using getsockopt().
+ * Warning, this must be done only once when returning from poll, and never
+ * after an I/O error was attempted, otherwise the error queue might contain
+ * inconsistent errors. If an error is detected, the CO_FL_ERROR is set on the
+ * socket. Returns non-zero if an error was reported, zero if everything is
+ * clean (including a properly closed socket).
+ */
+static int retrieve_errno_from_socket(struct connection *conn)
+{
+ int skerr;
+ socklen_t lskerr = sizeof(skerr);
+
+ if (conn->flags & CO_FL_ERROR && (unclean_errno(errno) || !conn->ctrl))
+ return 1;
+
+ if (!conn_ctrl_ready(conn))
+ return 0;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (getsockopt(conn->handle.fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) == 0)
+ errno = skerr;
+
+ errno = unclean_errno(errno);
+
+ if (!errno) {
+ /* we could not retrieve an error, that does not mean there is
+ * none. Just don't change anything and only report the prior
+ * error if any.
+ */
+ if (conn->flags & CO_FL_ERROR)
+ return 1;
+ else
+ return 0;
+ }
+
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_WR_SH | CO_FL_SOCK_RD_SH;
+ return 1;
+}
+
+/* Tries to collect as much information as possible on the connection status,
+ * and adjust the server status accordingly. It may make use of <errno_bck>
+ * if non-null when the caller is absolutely certain of its validity (eg:
+ * checked just after a syscall). If the caller doesn't have a valid errno,
+ * it can pass zero, and retrieve_errno_from_socket() will be called to try
+ * to extract errno from the socket. If no error is reported, it will consider
+ * the <expired> flag. This is intended to be used when a connection error was
+ * reported in conn->flags or when a timeout was reported in <expired>. The
+ * function takes care of not updating a server status which was already set.
+ * All situations where at least one of <expired> or CO_FL_ERROR are set
+ * produce a status.
+ */
+void chk_report_conn_err(struct check *check, int errno_bck, int expired)
+{
+ struct stconn *sc = check->sc;
+ struct connection *conn = sc_conn(sc);
+ const char *err_msg;
+ struct buffer *chk;
+ int step;
+
+ if (check->result != CHK_RES_UNKNOWN) {
+ return;
+ }
+
+ errno = unclean_errno(errno_bck);
+ if (conn && errno)
+ retrieve_errno_from_socket(conn);
+
+ if (conn && !(conn->flags & CO_FL_ERROR) &&
+ sc && !sc_ep_test(sc, SE_FL_ERROR) && !expired)
+ return;
+
+ TRACE_ENTER(CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check, 0, 0, (size_t[]){expired});
+
+ /* we'll try to build a meaningful error message depending on the
+ * context of the error possibly present in conn->err_code, and the
+ * socket error possibly collected above. This is useful to know the
+ * exact step of the L6 layer (eg: SSL handshake).
+ */
+ chk = get_trash_chunk();
+
+ if (check->type == PR_O2_TCPCHK_CHK &&
+ (check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_TCP_CHK) {
+ step = tcpcheck_get_step_id(check, NULL);
+ if (!step) {
+ TRACE_DEVEL("initial connection failure", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ chunk_printf(chk, " at initial connection step of tcp-check");
+ }
+ else {
+ chunk_printf(chk, " at step %d of tcp-check", step);
+ /* we were looking for a string */
+ if (check->current_step && check->current_step->action == TCPCHK_ACT_CONNECT) {
+ if (check->current_step->connect.port)
+ chunk_appendf(chk, " (connect port %d)" ,check->current_step->connect.port);
+ else
+ chunk_appendf(chk, " (connect)");
+ TRACE_DEVEL("connection failure", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ }
+ else if (check->current_step && check->current_step->action == TCPCHK_ACT_EXPECT) {
+ struct tcpcheck_expect *expect = &check->current_step->expect;
+
+ switch (expect->type) {
+ case TCPCHK_EXPECT_STRING:
+ chunk_appendf(chk, " (expect string '%.*s')", (unsigned int)istlen(expect->data), istptr(expect->data));
+ break;
+ case TCPCHK_EXPECT_BINARY:
+ chunk_appendf(chk, " (expect binary '%.*s')", (unsigned int)istlen(expect->data), istptr(expect->data));
+ break;
+ case TCPCHK_EXPECT_STRING_REGEX:
+ chunk_appendf(chk, " (expect regex)");
+ break;
+ case TCPCHK_EXPECT_BINARY_REGEX:
+ chunk_appendf(chk, " (expect binary regex)");
+ break;
+ case TCPCHK_EXPECT_STRING_LF:
+ chunk_appendf(chk, " (expect log-format string)");
+ break;
+ case TCPCHK_EXPECT_BINARY_LF:
+ chunk_appendf(chk, " (expect log-format binary)");
+ break;
+ case TCPCHK_EXPECT_HTTP_STATUS:
+ chunk_appendf(chk, " (expect HTTP status codes)");
+ break;
+ case TCPCHK_EXPECT_HTTP_STATUS_REGEX:
+ chunk_appendf(chk, " (expect HTTP status regex)");
+ break;
+ case TCPCHK_EXPECT_HTTP_HEADER:
+ chunk_appendf(chk, " (expect HTTP header pattern)");
+ break;
+ case TCPCHK_EXPECT_HTTP_BODY:
+ chunk_appendf(chk, " (expect HTTP body content '%.*s')", (unsigned int)istlen(expect->data), istptr(expect->data));
+ break;
+ case TCPCHK_EXPECT_HTTP_BODY_REGEX:
+ chunk_appendf(chk, " (expect HTTP body regex)");
+ break;
+ case TCPCHK_EXPECT_HTTP_BODY_LF:
+ chunk_appendf(chk, " (expect log-format HTTP body)");
+ break;
+ case TCPCHK_EXPECT_CUSTOM:
+ chunk_appendf(chk, " (expect custom function)");
+ break;
+ case TCPCHK_EXPECT_UNDEF:
+ chunk_appendf(chk, " (undefined expect!)");
+ break;
+ }
+ TRACE_DEVEL("expect rule failed", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ }
+ else if (check->current_step && check->current_step->action == TCPCHK_ACT_SEND) {
+ chunk_appendf(chk, " (send)");
+ TRACE_DEVEL("send rule failed", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ }
+
+ if (check->current_step && check->current_step->comment)
+ chunk_appendf(chk, " comment: '%s'", check->current_step->comment);
+ }
+ }
+
+ if (conn && conn->err_code) {
+ if (unclean_errno(errno))
+ chunk_printf(&trash, "%s (%s)%s", conn_err_code_str(conn), strerror(errno),
+ chk->area);
+ else
+ chunk_printf(&trash, "%s%s", conn_err_code_str(conn),
+ chk->area);
+ err_msg = trash.area;
+ }
+ else {
+ if (unclean_errno(errno)) {
+ chunk_printf(&trash, "%s%s", strerror(errno),
+ chk->area);
+ err_msg = trash.area;
+ }
+ else {
+ err_msg = chk->area;
+ }
+ }
+
+ if (check->state & CHK_ST_PORT_MISS) {
+ /* NOTE: this is reported after <fall> tries */
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
+ }
+
+ if (!sc || !conn || !conn->ctrl) {
+ /* error before any connection attempt (connection allocation error or no control layer) */
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
+ }
+ else if (conn->flags & CO_FL_WAIT_L4_CONN) {
+ /* L4 not established (yet) */
+ if (conn->flags & CO_FL_ERROR || sc_ep_test(sc, SE_FL_ERROR))
+ set_server_check_status(check, HCHK_STATUS_L4CON, err_msg);
+ else if (expired)
+ set_server_check_status(check, HCHK_STATUS_L4TOUT, err_msg);
+
+ /*
+ * might be due to a server IP change.
+ * Let's trigger a DNS resolution if none are currently running.
+ */
+ if (check->server)
+ resolv_trigger_resolution(check->server->resolv_requester);
+
+ }
+ else if (conn->flags & CO_FL_WAIT_L6_CONN) {
+ /* L6 not established (yet) */
+ if (conn->flags & CO_FL_ERROR || sc_ep_test(sc, SE_FL_ERROR))
+ set_server_check_status(check, HCHK_STATUS_L6RSP, err_msg);
+ else if (expired)
+ set_server_check_status(check, HCHK_STATUS_L6TOUT, err_msg);
+ }
+ else if (conn->flags & CO_FL_ERROR || sc_ep_test(sc, SE_FL_ERROR)) {
+ /* I/O error after connection was established and before we could diagnose */
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
+ }
+ else if (expired) {
+ enum healthcheck_status tout = HCHK_STATUS_L7TOUT;
+
+ /* connection established but expired check */
+ if (check->current_step && check->current_step->action == TCPCHK_ACT_EXPECT &&
+ check->current_step->expect.tout_status != HCHK_STATUS_UNKNOWN)
+ tout = check->current_step->expect.tout_status;
+ set_server_check_status(check, tout, err_msg);
+ }
+
+ TRACE_LEAVE(CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ return;
+}
+
+
+/* Builds the server state header used by HTTP health-checks */
+int httpchk_build_status_header(struct server *s, struct buffer *buf)
+{
+ int sv_state;
+ int ratio;
+ char addr[46];
+ char port[6];
+ const char *srv_hlt_st[7] = { "DOWN", "DOWN %d/%d",
+ "UP %d/%d", "UP",
+ "NOLB %d/%d", "NOLB",
+ "no check" };
+
+ if (!(s->check.state & CHK_ST_ENABLED))
+ sv_state = 6;
+ else if (s->cur_state != SRV_ST_STOPPED) {
+ if (s->check.health == s->check.rise + s->check.fall - 1)
+ sv_state = 3; /* UP */
+ else
+ sv_state = 2; /* going down */
+
+ if (s->cur_state == SRV_ST_STOPPING)
+ sv_state += 2;
+ } else {
+ if (s->check.health)
+ sv_state = 1; /* going up */
+ else
+ sv_state = 0; /* DOWN */
+ }
+
+ chunk_appendf(buf, srv_hlt_st[sv_state],
+ (s->cur_state != SRV_ST_STOPPED) ? (s->check.health - s->check.rise + 1) : (s->check.health),
+ (s->cur_state != SRV_ST_STOPPED) ? (s->check.fall) : (s->check.rise));
+
+ addr_to_str(&s->addr, addr, sizeof(addr));
+ if (s->addr.ss_family == AF_INET || s->addr.ss_family == AF_INET6)
+ snprintf(port, sizeof(port), "%u", s->svc_port);
+ else
+ *port = 0;
+
+ chunk_appendf(buf, "; address=%s; port=%s; name=%s/%s; node=%s; weight=%d/%d; scur=%d/%d; qcur=%d",
+ addr, port, s->proxy->id, s->id,
+ global.node,
+ (s->cur_eweight * s->proxy->lbprm.wmult + s->proxy->lbprm.wdiv - 1) / s->proxy->lbprm.wdiv,
+ (s->proxy->lbprm.tot_weight * s->proxy->lbprm.wmult + s->proxy->lbprm.wdiv - 1) / s->proxy->lbprm.wdiv,
+ s->cur_sess, s->proxy->beconn - s->proxy->queue.length,
+ s->queue.length);
+
+ if ((s->cur_state == SRV_ST_STARTING) &&
+ now.tv_sec < s->last_change + s->slowstart &&
+ now.tv_sec >= s->last_change) {
+ ratio = MAX(1, 100 * (now.tv_sec - s->last_change) / s->slowstart);
+ chunk_appendf(buf, "; throttle=%d%%", ratio);
+ }
+
+ return b_data(buf);
+}
+
+/**************************************************************************/
+/***************** Health-checks based on connections *********************/
+/**************************************************************************/
+/* This function is used only for server health-checks. It handles connection
+ * status updates including errors. If necessary, it wakes the check task up.
+ * It returns 0 on normal cases, <0 if at least one close() has happened on the
+ * connection (eg: reconnect). It relies on tcpcheck_main().
+ */
+int wake_srv_chk(struct stconn *sc)
+{
+ struct connection *conn;
+ struct check *check = __sc_check(sc);
+ struct email_alertq *q = container_of(check, typeof(*q), check);
+ int ret = 0;
+
+ TRACE_ENTER(CHK_EV_HCHK_WAKE, check);
+ if (check->result != CHK_RES_UNKNOWN)
+ goto end;
+
+ if (check->server)
+ HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
+ else
+ HA_SPIN_LOCK(EMAIL_ALERTS_LOCK, &q->lock);
+
+ /* we may have to make progress on the TCP checks */
+ ret = tcpcheck_main(check);
+
+ sc = check->sc;
+ conn = sc_conn(sc);
+
+ if (unlikely(!conn || !sc || conn->flags & CO_FL_ERROR || sc_ep_test(sc, SE_FL_ERROR))) {
+ /* We may get error reports bypassing the I/O handlers, typically
+ * the case when sending a pure TCP check which fails, then the I/O
+ * handlers above are not called. This is completely handled by the
+ * main processing task so let's simply wake it up. If we get here,
+ * we expect errno to still be valid.
+ */
+ TRACE_ERROR("report connection error", CHK_EV_HCHK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ chk_report_conn_err(check, errno, 0);
+ task_wakeup(check->task, TASK_WOKEN_IO);
+ }
+
+ if (check->result != CHK_RES_UNKNOWN || ret == -1) {
+ /* Check complete or aborted. Wake the check task up to be sure
+ * the result is handled ASAP. */
+ ret = -1;
+ task_wakeup(check->task, TASK_WOKEN_IO);
+ }
+
+ if (check->server)
+ HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock);
+ else
+ HA_SPIN_UNLOCK(EMAIL_ALERTS_LOCK, &q->lock);
+
+ end:
+ TRACE_LEAVE(CHK_EV_HCHK_WAKE, check);
+ return ret;
+}
+
+/* This function checks if any I/O is wanted, and if so, attempts to do so */
+struct task *srv_chk_io_cb(struct task *t, void *ctx, unsigned int state)
+{
+ struct stconn *sc = ctx;
+
+ wake_srv_chk(sc);
+ return NULL;
+}
+
+/* manages a server health-check that uses a connection. Returns
+ * the time the task accepts to wait, or TIME_ETERNITY for infinity.
+ *
+ * Please do NOT place any return statement in this function and only leave
+ * via the out_unlock label.
+ */
+struct task *process_chk_conn(struct task *t, void *context, unsigned int state)
+{
+ struct check *check = context;
+ struct proxy *proxy = check->proxy;
+ struct stconn *sc;
+ struct connection *conn;
+ int rv;
+ int expired = tick_is_expired(t->expire, now_ms);
+
+ TRACE_ENTER(CHK_EV_TASK_WAKE, check);
+
+ if (check->server)
+ HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
+
+ if (unlikely(check->state & CHK_ST_PURGE)) {
+ TRACE_STATE("health-check state to purge", CHK_EV_TASK_WAKE, check);
+ }
+ else if (!(check->state & (CHK_ST_INPROGRESS))) {
+ /* no check currently running */
+ if (!expired) /* woke up too early */ {
+ TRACE_STATE("health-check wake up too early", CHK_EV_TASK_WAKE, check);
+ goto out_unlock;
+ }
+
+ /* we don't send any health-checks when the proxy is
+ * stopped, the server should not be checked or the check
+ * is disabled.
+ */
+ if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) ||
+ (proxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ TRACE_STATE("health-check paused or disabled", CHK_EV_TASK_WAKE, check);
+ goto reschedule;
+ }
+
+ /* we'll initiate a new check */
+ set_server_check_status(check, HCHK_STATUS_START, NULL);
+
+ check->state |= CHK_ST_INPROGRESS;
+ TRACE_STATE("init new health-check", CHK_EV_TASK_WAKE|CHK_EV_HCHK_START, check);
+
+ task_set_affinity(t, tid_bit);
+
+ check->current_step = NULL;
+
+ check->sc = sc_new_from_check(check, SC_FL_NONE);
+ if (!check->sc) {
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, NULL);
+ goto end;
+ }
+ tcpcheck_main(check);
+ expired = 0;
+ }
+
+ /* there was a test running.
+ * First, let's check whether there was an uncaught error,
+ * which can happen on connect timeout or error.
+ */
+ if (check->result == CHK_RES_UNKNOWN && likely(!(check->state & CHK_ST_PURGE))) {
+ sc = check->sc;
+ conn = (sc ? sc_conn(sc) : NULL);
+
+ /* Here the connection must be defined. Otherwise the
+ * error would have already been detected
+ */
+ if ((conn && ((conn->flags & CO_FL_ERROR) || sc_ep_test(sc, SE_FL_ERROR))) || expired) {
+ TRACE_ERROR("report connection error", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ chk_report_conn_err(check, 0, expired);
+ }
+ else {
+ if (check->state & CHK_ST_CLOSE_CONN) {
+ TRACE_DEVEL("closing current connection", CHK_EV_TASK_WAKE|CHK_EV_HCHK_RUN, check);
+ check->state &= ~CHK_ST_CLOSE_CONN;
+ conn = NULL;
+ if (!sc_reset_endp(check->sc)) {
+ /* error will be handled by tcpcheck_main().
+ * On success, remove all flags except SE_FL_DETACHED
+ */
+ sc_ep_clr(check->sc, ~SE_FL_DETACHED);
+ }
+ tcpcheck_main(check);
+ }
+ if (check->result == CHK_RES_UNKNOWN) {
+ TRACE_DEVEL("health-check not expired", CHK_EV_TASK_WAKE|CHK_EV_HCHK_RUN, check);
+ goto out_unlock; /* timeout not reached, wait again */
+ }
+ }
+ }
+
+ /* check complete or aborted */
+ TRACE_STATE("health-check complete or aborted", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END, check);
+
+ check->current_step = NULL;
+ sc = check->sc;
+ conn = (sc ? sc_conn(sc) : NULL);
+
+ if (conn && conn->xprt) {
+ /* The check was aborted and the connection was not yet closed.
+ * This can happen upon timeout, or when an external event such
+ * as a failed response coupled with "observe layer7" caused the
+ * server state to be suddenly changed.
+ */
+ sc_conn_drain_and_shut(sc);
+ }
+
+ if (sc) {
+ sc_destroy(sc);
+ sc = check->sc = NULL;
+ conn = NULL;
+ }
+
+ if (check->sess != NULL) {
+ vars_prune(&check->vars, check->sess, NULL);
+ session_free(check->sess);
+ check->sess = NULL;
+ }
+
+ end:
+ if (check->server && likely(!(check->state & CHK_ST_PURGE))) {
+ if (check->result == CHK_RES_FAILED) {
+ /* a failure or timeout detected */
+ TRACE_DEVEL("report failure", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ check_notify_failure(check);
+ }
+ else if (check->result == CHK_RES_CONDPASS) {
+ /* check is OK but asks for stopping mode */
+ TRACE_DEVEL("report conditional success", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
+ check_notify_stopping(check);
+ }
+ else if (check->result == CHK_RES_PASSED) {
+ /* a success was detected */
+ TRACE_DEVEL("report success", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
+ check_notify_success(check);
+ }
+ }
+
+ if (LIST_INLIST(&check->buf_wait.list))
+ LIST_DEL_INIT(&check->buf_wait.list);
+
+ task_set_affinity(t, MAX_THREADS_MASK);
+ check_release_buf(check, &check->bi);
+ check_release_buf(check, &check->bo);
+ check->state &= ~(CHK_ST_INPROGRESS|CHK_ST_IN_ALLOC|CHK_ST_OUT_ALLOC);
+
+ if (check->server) {
+ rv = 0;
+ if (global.spread_checks > 0) {
+ rv = srv_getinter(check) * global.spread_checks / 100;
+ rv -= (int) (2 * rv * (ha_random32() / 4294967295.0));
+ }
+ t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
+ }
+
+ reschedule:
+ while (tick_is_expired(t->expire, now_ms))
+ t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
+ out_unlock:
+ if (check->server)
+ HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock);
+
+ TRACE_LEAVE(CHK_EV_TASK_WAKE, check);
+
+ /* Free the check if set to PURGE. After this, the check instance may be
+ * freed via the srv_drop invocation, so it must not be accessed after
+ * this point.
+ */
+ if (unlikely(check->state & CHK_ST_PURGE)) {
+ free_check(check);
+ if (check->server)
+ srv_drop(check->server);
+
+ t = NULL;
+ }
+
+ return t;
+}
+
+
+/**************************************************************************/
+/************************** Init/deinit checks ****************************/
+/**************************************************************************/
+/*
+ * Tries to grab a buffer and to re-enables processing on check <target>. The
+ * check flags are used to figure what buffer was requested. It returns 1 if the
+ * allocation succeeds, in which case the I/O tasklet is woken up, or 0 if it's
+ * impossible to wake up and we prefer to be woken up later.
+ */
+int check_buf_available(void *target)
+{
+ struct check *check = target;
+
+ BUG_ON(!check->sc);
+
+ if ((check->state & CHK_ST_IN_ALLOC) && b_alloc(&check->bi)) {
+ TRACE_STATE("unblocking check, input buffer allocated", CHK_EV_TCPCHK_EXP|CHK_EV_RX_BLK, check);
+ check->state &= ~CHK_ST_IN_ALLOC;
+ tasklet_wakeup(check->sc->wait_event.tasklet);
+ return 1;
+ }
+ if ((check->state & CHK_ST_OUT_ALLOC) && b_alloc(&check->bo)) {
+ TRACE_STATE("unblocking check, output buffer allocated", CHK_EV_TCPCHK_SND|CHK_EV_TX_BLK, check);
+ check->state &= ~CHK_ST_OUT_ALLOC;
+ tasklet_wakeup(check->sc->wait_event.tasklet);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Allocate a buffer. If it fails, it adds the check in buffer wait queue.
+ */
+struct buffer *check_get_buf(struct check *check, struct buffer *bptr)
+{
+ struct buffer *buf = NULL;
+
+ if (likely(!LIST_INLIST(&check->buf_wait.list)) &&
+ unlikely((buf = b_alloc(bptr)) == NULL)) {
+ check->buf_wait.target = check;
+ check->buf_wait.wakeup_cb = check_buf_available;
+ LIST_APPEND(&th_ctx->buffer_wq, &check->buf_wait.list);
+ }
+ return buf;
+}
+
+/*
+ * Release a buffer, if any, and try to wake up entities waiting in the buffer
+ * wait queue.
+ */
+void check_release_buf(struct check *check, struct buffer *bptr)
+{
+ if (bptr->size) {
+ b_free(bptr);
+ offer_buffers(check->buf_wait.target, 1);
+ }
+}
+
+const char *init_check(struct check *check, int type)
+{
+ check->type = type;
+
+ check->bi = BUF_NULL;
+ check->bo = BUF_NULL;
+ LIST_INIT(&check->buf_wait.list);
+ return NULL;
+}
+
+/* Liberates the resources allocated for a check.
+ *
+ * This function must only be run by the thread owning the check.
+ */
+void free_check(struct check *check)
+{
+ /* For agent-check, free the rules / vars from the server. This is not
+ * done for health-check : the proxy is the owner of the rules / vars
+ * in this case.
+ */
+ if (check->state & CHK_ST_AGENT) {
+ free_tcpcheck_vars(&check->tcpcheck_rules->preset_vars);
+ ha_free(&check->tcpcheck_rules);
+ }
+
+ task_destroy(check->task);
+
+ check_release_buf(check, &check->bi);
+ check_release_buf(check, &check->bo);
+ if (check->sc) {
+ sc_destroy(check->sc);
+ check->sc = NULL;
+ }
+}
+
+/* This function must be used in order to free a started check. The check will
+ * be scheduled for a next execution in order to properly close and free all
+ * check elements.
+ *
+ * Non thread-safe.
+ */
+void check_purge(struct check *check)
+{
+ check->state |= CHK_ST_PURGE;
+ task_wakeup(check->task, TASK_WOKEN_OTHER);
+}
+
+/* manages a server health-check. Returns the time the task accepts to wait, or
+ * TIME_ETERNITY for infinity.
+ */
+struct task *process_chk(struct task *t, void *context, unsigned int state)
+{
+ struct check *check = context;
+
+ if (check->type == PR_O2_EXT_CHK)
+ return process_chk_proc(t, context, state);
+ return process_chk_conn(t, context, state);
+
+}
+
+
+int start_check_task(struct check *check, int mininter,
+ int nbcheck, int srvpos)
+{
+ struct task *t;
+
+ /* task for the check. Process-based checks exclusively run on thread 1. */
+ if (check->type == PR_O2_EXT_CHK)
+ t = task_new_on(0);
+ else
+ t = task_new_anywhere();
+
+ if (!t)
+ goto fail_alloc_task;
+
+ check->task = t;
+ t->process = process_chk;
+ t->context = check;
+
+ if (mininter < srv_getinter(check))
+ mininter = srv_getinter(check);
+
+ if (global.max_spread_checks && mininter > global.max_spread_checks)
+ mininter = global.max_spread_checks;
+
+ /* check this every ms */
+ t->expire = tick_add(now_ms, MS_TO_TICKS(mininter * srvpos / nbcheck));
+ check->start = now;
+ task_queue(t);
+
+ return 1;
+
+ fail_alloc_task:
+ ha_alert("Starting [%s:%s] check: out of memory.\n",
+ check->server->proxy->id, check->server->id);
+ return 0;
+}
+
+/*
+ * Start health-check.
+ * Returns 0 if OK, ERR_FATAL on error, and prints the error in this case.
+ */
+static int start_checks()
+{
+
+ struct proxy *px;
+ struct server *s;
+ int nbcheck=0, mininter=0, srvpos=0;
+
+ /* 0- init the dummy frontend used to create all checks sessions */
+ init_new_proxy(&checks_fe);
+ checks_fe.id = strdup("CHECKS-FE");
+ checks_fe.cap = PR_CAP_FE | PR_CAP_BE;
+ checks_fe.mode = PR_MODE_TCP;
+ checks_fe.maxconn = 0;
+ checks_fe.conn_retries = CONN_RETRIES;
+ checks_fe.options2 |= PR_O2_INDEPSTR | PR_O2_SMARTCON | PR_O2_SMARTACC;
+ checks_fe.timeout.client = TICK_ETERNITY;
+
+ /* 1- count the checkers to run simultaneously.
+ * We also determine the minimum interval among all of those which
+ * have an interval larger than SRV_CHK_INTER_THRES. This interval
+ * will be used to spread their start-up date. Those which have
+ * a shorter interval will start independently and will not dictate
+ * too short an interval for all others.
+ */
+ for (px = proxies_list; px; px = px->next) {
+ for (s = px->srv; s; s = s->next) {
+ if (s->check.state & CHK_ST_CONFIGURED) {
+ nbcheck++;
+ if ((srv_getinter(&s->check) >= SRV_CHK_INTER_THRES) &&
+ (!mininter || mininter > srv_getinter(&s->check)))
+ mininter = srv_getinter(&s->check);
+ }
+
+ if (s->agent.state & CHK_ST_CONFIGURED) {
+ nbcheck++;
+ if ((srv_getinter(&s->agent) >= SRV_CHK_INTER_THRES) &&
+ (!mininter || mininter > srv_getinter(&s->agent)))
+ mininter = srv_getinter(&s->agent);
+ }
+ }
+ }
+
+ if (!nbcheck)
+ return ERR_NONE;
+
+ srand((unsigned)time(NULL));
+
+ /* 2- start them as far as possible from each other. For this, we will
+ * start them after their interval is set to the min interval divided
+ * by the number of servers, weighted by the server's position in the
+ * list.
+ */
+ for (px = proxies_list; px; px = px->next) {
+ if ((px->options2 & PR_O2_CHK_ANY) == PR_O2_EXT_CHK) {
+ if (init_pid_list()) {
+ ha_alert("Starting [%s] check: out of memory.\n", px->id);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ for (s = px->srv; s; s = s->next) {
+ /* A task for the main check */
+ if (s->check.state & CHK_ST_CONFIGURED) {
+ if (s->check.type == PR_O2_EXT_CHK) {
+ if (!prepare_external_check(&s->check))
+ return ERR_ALERT | ERR_FATAL;
+ }
+ if (!start_check_task(&s->check, mininter, nbcheck, srvpos))
+ return ERR_ALERT | ERR_FATAL;
+ srvpos++;
+ }
+
+ /* A task for a auxiliary agent check */
+ if (s->agent.state & CHK_ST_CONFIGURED) {
+ if (!start_check_task(&s->agent, mininter, nbcheck, srvpos)) {
+ return ERR_ALERT | ERR_FATAL;
+ }
+ srvpos++;
+ }
+ }
+ }
+ return ERR_NONE;
+}
+
+
+/*
+ * Return value:
+ * the port to be used for the health check
+ * 0 in case no port could be found for the check
+ */
+static int srv_check_healthcheck_port(struct check *chk)
+{
+ int i = 0;
+ struct server *srv = NULL;
+
+ srv = chk->server;
+
+ /* by default, we use the health check port configured */
+ if (chk->port > 0)
+ return chk->port;
+
+ /* try to get the port from check_core.addr if check.port not set */
+ i = get_host_port(&chk->addr);
+ if (i > 0)
+ return i;
+
+ /* try to get the port from server address */
+ /* prevent MAPPORTS from working at this point, since checks could
+ * not be performed in such case (MAPPORTS impose a relative ports
+ * based on live traffic)
+ */
+ if (srv->flags & SRV_F_MAPPORTS)
+ return 0;
+
+ i = srv->svc_port; /* by default */
+ if (i > 0)
+ return i;
+
+ return 0;
+}
+
+/* Initializes an health-check attached to the server <srv>. Non-zero is returned
+ * if an error occurred.
+ */
+int init_srv_check(struct server *srv)
+{
+ const char *err;
+ struct tcpcheck_rule *r;
+ int ret = ERR_NONE;
+ int check_type;
+
+ if (!srv->do_check || !(srv->proxy->cap & PR_CAP_BE))
+ goto out;
+
+ check_type = srv->check.tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK;
+
+ if (!(srv->flags & SRV_F_DYNAMIC)) {
+ /* If neither a port nor an addr was specified and no check
+ * transport layer is forced, then the transport layer used by
+ * the checks is the same as for the production traffic.
+ * Otherwise we use raw_sock by default, unless one is
+ * specified.
+ */
+ if (!srv->check.port && !is_addr(&srv->check.addr)) {
+ if (!srv->check.use_ssl && srv->use_ssl != -1) {
+ srv->check.use_ssl = srv->use_ssl;
+ srv->check.xprt = srv->xprt;
+ }
+ else if (srv->check.use_ssl == 1)
+ srv->check.xprt = xprt_get(XPRT_SSL);
+ srv->check.send_proxy |= (srv->pp_opts);
+ }
+ else if (srv->check.use_ssl == 1)
+ srv->check.xprt = xprt_get(XPRT_SSL);
+ }
+ else {
+ /* For dynamic servers, check-ssl and check-send-proxy must be
+ * explicitly defined even if the check port was not
+ * overridden.
+ */
+ if (srv->check.use_ssl == 1)
+ srv->check.xprt = xprt_get(XPRT_SSL);
+ }
+
+ /* Inherit the mux protocol from the server if not already defined for
+ * the check
+ */
+ if (srv->mux_proto && !srv->check.mux_proto &&
+ ((srv->mux_proto->mode == PROTO_MODE_HTTP && check_type == TCPCHK_RULES_HTTP_CHK) ||
+ (srv->mux_proto->mode == PROTO_MODE_TCP && check_type != TCPCHK_RULES_HTTP_CHK))) {
+ srv->check.mux_proto = srv->mux_proto;
+ }
+ /* test that check proto is valid if explicitly defined */
+ else if (srv->check.mux_proto &&
+ ((srv->check.mux_proto->mode == PROTO_MODE_HTTP && check_type != TCPCHK_RULES_HTTP_CHK) ||
+ (srv->check.mux_proto->mode == PROTO_MODE_TCP && check_type == TCPCHK_RULES_HTTP_CHK))) {
+ ha_alert("config: %s '%s': server '%s' uses an incompatible MUX protocol for the selected check type\n",
+ proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
+ ret |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* validate <srv> server health-check settings */
+
+ /* We need at least a service port, a check port or the first tcp-check
+ * rule must be a 'connect' one when checking an IPv4/IPv6 server.
+ */
+ if ((srv_check_healthcheck_port(&srv->check) != 0) ||
+ (!is_inet_addr(&srv->check.addr) && (is_addr(&srv->check.addr) || !is_inet_addr(&srv->addr))))
+ goto init;
+
+ if (!srv->proxy->tcpcheck_rules.list || LIST_ISEMPTY(srv->proxy->tcpcheck_rules.list)) {
+ ha_alert("config: %s '%s': server '%s' has neither service port nor check port.\n",
+ proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
+ ret |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* search the first action (connect / send / expect) in the list */
+ r = get_first_tcpcheck_rule(&srv->proxy->tcpcheck_rules);
+ if (!r || (r->action != TCPCHK_ACT_CONNECT) || (!r->connect.port && !get_host_port(&r->connect.addr))) {
+ ha_alert("config: %s '%s': server '%s' has neither service port nor check port "
+ "nor tcp_check rule 'connect' with port information.\n",
+ proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
+ ret |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* scan the tcp-check ruleset to ensure a port has been configured */
+ list_for_each_entry(r, srv->proxy->tcpcheck_rules.list, list) {
+ if ((r->action == TCPCHK_ACT_CONNECT) && (!r->connect.port && !get_host_port(&r->connect.addr))) {
+ ha_alert("config: %s '%s': server '%s' has neither service port nor check port, "
+ "and a tcp_check rule 'connect' with no port information.\n",
+ proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
+ ret |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ }
+
+ init:
+ err = init_check(&srv->check, srv->proxy->options2 & PR_O2_CHK_ANY);
+ if (err) {
+ ha_alert("config: %s '%s': unable to init check for server '%s' (%s).\n",
+ proxy_type_str(srv->proxy), srv->proxy->id, srv->id, err);
+ ret |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ srv->check.state |= CHK_ST_CONFIGURED | CHK_ST_ENABLED;
+ srv_take(srv);
+
+ /* Only increment maxsock for servers from the configuration. Dynamic
+ * servers at the moment are not taken into account for the estimation
+ * of the resources limits.
+ */
+ if (global.mode & MODE_STARTING)
+ global.maxsock++;
+
+ out:
+ return ret;
+}
+
+/* Initializes an agent-check attached to the server <srv>. Non-zero is returned
+ * if an error occurred.
+ */
+int init_srv_agent_check(struct server *srv)
+{
+ struct tcpcheck_rule *chk;
+ const char *err;
+ int ret = ERR_NONE;
+
+ if (!srv->do_agent || !(srv->proxy->cap & PR_CAP_BE))
+ goto out;
+
+ /* If there is no connect rule preceding all send / expect rules, an
+ * implicit one is inserted before all others.
+ */
+ chk = get_first_tcpcheck_rule(srv->agent.tcpcheck_rules);
+ if (!chk || chk->action != TCPCHK_ACT_CONNECT) {
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ ha_alert("%s '%s': unable to add implicit tcp-check connect rule"
+ " to agent-check for server '%s' (out of memory).\n",
+ proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
+ ret |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ chk->action = TCPCHK_ACT_CONNECT;
+ chk->connect.options = (TCPCHK_OPT_DEFAULT_CONNECT|TCPCHK_OPT_IMPLICIT);
+ LIST_INSERT(srv->agent.tcpcheck_rules->list, &chk->list);
+ }
+
+ /* <chk> is always defined here and it is a CONNECT action. If there is
+ * a preset variable, it means there is an agent string defined and data
+ * will be sent after the connect.
+ */
+ if (!LIST_ISEMPTY(&srv->agent.tcpcheck_rules->preset_vars))
+ chk->connect.options |= TCPCHK_OPT_HAS_DATA;
+
+
+ err = init_check(&srv->agent, PR_O2_TCPCHK_CHK);
+ if (err) {
+ ha_alert("config: %s '%s': unable to init agent-check for server '%s' (%s).\n",
+ proxy_type_str(srv->proxy), srv->proxy->id, srv->id, err);
+ ret |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (!srv->agent.inter)
+ srv->agent.inter = srv->check.inter;
+
+ srv->agent.state |= CHK_ST_CONFIGURED | CHK_ST_ENABLED | CHK_ST_AGENT;
+ srv_take(srv);
+
+ /* Only increment maxsock for servers from the configuration. Dynamic
+ * servers at the moment are not taken into account for the estimation
+ * of the resources limits.
+ */
+ if (global.mode & MODE_STARTING)
+ global.maxsock++;
+
+ out:
+ return ret;
+}
+
+static void deinit_srv_check(struct server *srv)
+{
+ if (srv->check.state & CHK_ST_CONFIGURED)
+ free_check(&srv->check);
+ srv->check.state &= ~CHK_ST_CONFIGURED & ~CHK_ST_ENABLED;
+ srv->do_check = 0;
+}
+
+
+static void deinit_srv_agent_check(struct server *srv)
+{
+ if (srv->agent.state & CHK_ST_CONFIGURED)
+ free_check(&srv->agent);
+
+ srv->agent.state &= ~CHK_ST_CONFIGURED & ~CHK_ST_ENABLED & ~CHK_ST_AGENT;
+ srv->do_agent = 0;
+}
+
+REGISTER_POST_SERVER_CHECK(init_srv_check);
+REGISTER_POST_SERVER_CHECK(init_srv_agent_check);
+REGISTER_POST_CHECK(start_checks);
+
+REGISTER_SERVER_DEINIT(deinit_srv_check);
+REGISTER_SERVER_DEINIT(deinit_srv_agent_check);
+
+
+/**************************************************************************/
+/************************** Check sample fetches **************************/
+/**************************************************************************/
+
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+
+/**************************************************************************/
+/************************ Check's parsing functions ***********************/
+/**************************************************************************/
+/* Parse the "addr" server keyword */
+static int srv_parse_addr(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ struct sockaddr_storage *sk;
+ int port1, port2, err_code = 0;
+
+
+ if (!*args[*cur_arg+1]) {
+ memprintf(errmsg, "'%s' expects <ipv4|ipv6> as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ sk = str2sa_range(args[*cur_arg+1], NULL, &port1, &port2, NULL, NULL, errmsg, NULL, NULL,
+ PA_O_RESOLVE | PA_O_PORT_OK | PA_O_STREAM | PA_O_CONNECT);
+ if (!sk) {
+ memprintf(errmsg, "'%s' : %s", args[*cur_arg], *errmsg);
+ goto error;
+ }
+
+ srv->check.addr = *sk;
+ /* if agentaddr was never set, we can use addr */
+ if (!(srv->flags & SRV_F_AGENTADDR))
+ srv->agent.addr = *sk;
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "agent-addr" server keyword */
+static int srv_parse_agent_addr(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ struct sockaddr_storage sk;
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects an address as argument.", args[*cur_arg]);
+ goto error;
+ }
+ memset(&sk, 0, sizeof(sk));
+ if (str2ip(args[*cur_arg + 1], &sk) == NULL) {
+ memprintf(errmsg, "parsing agent-addr failed. Check if '%s' is correct address.", args[*cur_arg+1]);
+ goto error;
+ }
+ set_srv_agent_addr(srv, &sk);
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "agent-check" server keyword */
+static int srv_parse_agent_check(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = srv->agent.tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ int err_code = 0;
+
+ if (srv->do_agent)
+ goto out;
+
+ if (!(curpx->cap & PR_CAP_BE)) {
+ memprintf(errmsg, "'%s' ignored because %s '%s' has no backend capability",
+ args[*cur_arg], proxy_type_str(curpx), curpx->id);
+ return ERR_WARN;
+ }
+
+ if (!rules) {
+ rules = calloc(1, sizeof(*rules));
+ if (!rules) {
+ memprintf(errmsg, "out of memory.");
+ goto error;
+ }
+ LIST_INIT(&rules->preset_vars);
+ srv->agent.tcpcheck_rules = rules;
+ }
+ rules->list = NULL;
+ rules->flags = 0;
+
+ rs = find_tcpcheck_ruleset("*agent-check");
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset("*agent-check");
+ if (rs == NULL) {
+ memprintf(errmsg, "out of memory.");
+ goto error;
+ }
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send-lf", "%[var(check.agent_string)]", ""},
+ 1, curpx, &rs->rules, srv->conf.file, srv->conf.line, errmsg);
+ if (!chk) {
+ memprintf(errmsg, "'%s': %s", args[*cur_arg], *errmsg);
+ goto error;
+ }
+ chk->index = 0;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "custom", ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_AGENT_CHK,
+ srv->conf.file, srv->conf.line, errmsg);
+ if (!chk) {
+ memprintf(errmsg, "'%s': %s", args[*cur_arg], *errmsg);
+ goto error;
+ }
+ chk->expect.custom = tcpcheck_agent_expect_reply;
+ chk->index = 1;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_AGENT_CHK;
+ srv->do_agent = 1;
+
+ out:
+ return err_code;
+
+ error:
+ deinit_srv_agent_check(srv);
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "agent-inter" server keyword */
+static int srv_parse_agent_inter(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ const char *err = NULL;
+ unsigned int delay;
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a delay as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ err = parse_time_err(args[*cur_arg+1], &delay, TIME_UNIT_MS);
+ if (err == PARSE_TIME_OVER) {
+ memprintf(errmsg, "timer overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err == PARSE_TIME_UNDER) {
+ memprintf(errmsg, "timer underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err) {
+ memprintf(errmsg, "unexpected character '%c' in 'agent-inter' argument of server %s.",
+ *err, srv->id);
+ goto error;
+ }
+ if (delay <= 0) {
+ memprintf(errmsg, "invalid value %d for argument '%s' of server %s.",
+ delay, args[*cur_arg], srv->id);
+ goto error;
+ }
+ srv->agent.inter = delay;
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "agent-port" server keyword */
+static int srv_parse_agent_port(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a port number as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ /* Only increment maxsock for servers from the configuration. Dynamic
+ * servers at the moment are not taken into account for the estimation
+ * of the resources limits.
+ */
+ if (global.mode & MODE_STARTING)
+ global.maxsock++;
+
+ set_srv_agent_port(srv, atol(args[*cur_arg + 1]));
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+int set_srv_agent_send(struct server *srv, const char *send)
+{
+ struct tcpcheck_rules *rules = srv->agent.tcpcheck_rules;
+ struct tcpcheck_var *var = NULL;
+ char *str;
+
+ str = strdup(send);
+ var = create_tcpcheck_var(ist("check.agent_string"));
+ if (str == NULL || var == NULL)
+ goto error;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+
+ var->data.type = SMP_T_STR;
+ var->data.u.str.area = str;
+ var->data.u.str.data = strlen(str);
+ LIST_INIT(&var->list);
+ LIST_APPEND(&rules->preset_vars, &var->list);
+
+ return 1;
+
+ error:
+ free(str);
+ free(var);
+ return 0;
+}
+
+/* Parse the "agent-send" server keyword */
+static int srv_parse_agent_send(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ struct tcpcheck_rules *rules = srv->agent.tcpcheck_rules;
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ if (!rules) {
+ rules = calloc(1, sizeof(*rules));
+ if (!rules) {
+ memprintf(errmsg, "out of memory.");
+ goto error;
+ }
+ LIST_INIT(&rules->preset_vars);
+ srv->agent.tcpcheck_rules = rules;
+ }
+
+ if (!set_srv_agent_send(srv, args[*cur_arg+1])) {
+ memprintf(errmsg, "out of memory.");
+ goto error;
+ }
+
+ out:
+ return err_code;
+
+ error:
+ deinit_srv_agent_check(srv);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "no-agent-send" server keyword */
+static int srv_parse_no_agent_check(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ deinit_srv_agent_check(srv);
+ return 0;
+}
+
+/* Parse the "check" server keyword */
+static int srv_parse_check(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ if (!(curpx->cap & PR_CAP_BE)) {
+ memprintf(errmsg, "'%s' ignored because %s '%s' has no backend capability",
+ args[*cur_arg], proxy_type_str(curpx), curpx->id);
+ return ERR_WARN;
+ }
+
+ srv->do_check = 1;
+ return 0;
+}
+
+/* Parse the "check-send-proxy" server keyword */
+static int srv_parse_check_send_proxy(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ srv->check.send_proxy = 1;
+ return 0;
+}
+
+/* Parse the "check-via-socks4" server keyword */
+static int srv_parse_check_via_socks4(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ srv->check.via_socks4 = 1;
+ return 0;
+}
+
+/* Parse the "no-check" server keyword */
+static int srv_parse_no_check(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ deinit_srv_check(srv);
+ return 0;
+}
+
+/* Parse the "no-check-send-proxy" server keyword */
+static int srv_parse_no_check_send_proxy(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ srv->check.send_proxy = 0;
+ return 0;
+}
+
+/* parse the "check-proto" server keyword */
+static int srv_parse_check_proto(char **args, int *cur_arg,
+ struct proxy *px, struct server *newsrv, char **err)
+{
+ int err_code = 0;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[*cur_arg]);
+ goto error;
+ }
+ newsrv->check.mux_proto = get_mux_proto(ist(args[*cur_arg + 1]));
+ if (!newsrv->check.mux_proto) {
+ memprintf(err, "'%s' : unknown MUX protocol '%s'", args[*cur_arg], args[*cur_arg+1]);
+ goto error;
+ }
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+
+/* Parse the "rise" server keyword */
+static int srv_parse_check_rise(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ int err_code = 0;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(errmsg, "'%s' expects an integer argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ srv->check.rise = atol(args[*cur_arg+1]);
+ if (srv->check.rise <= 0) {
+ memprintf(errmsg, "'%s' has to be > 0.", args[*cur_arg]);
+ goto error;
+ }
+
+ if (srv->check.health)
+ srv->check.health = srv->check.rise;
+
+ out:
+ return err_code;
+
+ error:
+ deinit_srv_agent_check(srv);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "fall" server keyword */
+static int srv_parse_check_fall(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ int err_code = 0;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(errmsg, "'%s' expects an integer argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ srv->check.fall = atol(args[*cur_arg+1]);
+ if (srv->check.fall <= 0) {
+ memprintf(errmsg, "'%s' has to be > 0.", args[*cur_arg]);
+ goto error;
+ }
+
+ out:
+ return err_code;
+
+ error:
+ deinit_srv_agent_check(srv);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "inter" server keyword */
+static int srv_parse_check_inter(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ const char *err = NULL;
+ unsigned int delay;
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a delay as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ err = parse_time_err(args[*cur_arg+1], &delay, TIME_UNIT_MS);
+ if (err == PARSE_TIME_OVER) {
+ memprintf(errmsg, "timer overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err == PARSE_TIME_UNDER) {
+ memprintf(errmsg, "timer underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err) {
+ memprintf(errmsg, "unexpected character '%c' in 'agent-inter' argument of server %s.",
+ *err, srv->id);
+ goto error;
+ }
+ if (delay <= 0) {
+ memprintf(errmsg, "invalid value %d for argument '%s' of server %s.",
+ delay, args[*cur_arg], srv->id);
+ goto error;
+ }
+ srv->check.inter = delay;
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+
+/* Parse the "fastinter" server keyword */
+static int srv_parse_check_fastinter(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ const char *err = NULL;
+ unsigned int delay;
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a delay as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ err = parse_time_err(args[*cur_arg+1], &delay, TIME_UNIT_MS);
+ if (err == PARSE_TIME_OVER) {
+ memprintf(errmsg, "timer overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err == PARSE_TIME_UNDER) {
+ memprintf(errmsg, "timer underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err) {
+ memprintf(errmsg, "unexpected character '%c' in 'agent-inter' argument of server %s.",
+ *err, srv->id);
+ goto error;
+ }
+ if (delay <= 0) {
+ memprintf(errmsg, "invalid value %d for argument '%s' of server %s.",
+ delay, args[*cur_arg], srv->id);
+ goto error;
+ }
+ srv->check.fastinter = delay;
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+
+/* Parse the "downinter" server keyword */
+static int srv_parse_check_downinter(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ const char *err = NULL;
+ unsigned int delay;
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a delay as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ err = parse_time_err(args[*cur_arg+1], &delay, TIME_UNIT_MS);
+ if (err == PARSE_TIME_OVER) {
+ memprintf(errmsg, "timer overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err == PARSE_TIME_UNDER) {
+ memprintf(errmsg, "timer underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err) {
+ memprintf(errmsg, "unexpected character '%c' in 'agent-inter' argument of server %s.",
+ *err, srv->id);
+ goto error;
+ }
+ if (delay <= 0) {
+ memprintf(errmsg, "invalid value %d for argument '%s' of server %s.",
+ delay, args[*cur_arg], srv->id);
+ goto error;
+ }
+ srv->check.downinter = delay;
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "port" server keyword */
+static int srv_parse_check_port(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a port number as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ /* Only increment maxsock for servers from the configuration. Dynamic
+ * servers at the moment are not taken into account for the estimation
+ * of the resources limits.
+ */
+ if (global.mode & MODE_STARTING)
+ global.maxsock++;
+
+ srv->check.port = atol(args[*cur_arg+1]);
+ /* if agentport was never set, we can use port */
+ if (!(srv->flags & SRV_F_AGENTPORT))
+ srv->agent.port = srv->check.port;
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+static struct srv_kw_list srv_kws = { "CHK", { }, {
+ { "addr", srv_parse_addr, 1, 1, 1 }, /* IP address to send health to or to probe from agent-check */
+ { "agent-addr", srv_parse_agent_addr, 1, 1, 1 }, /* Enable an auxiliary agent check */
+ { "agent-check", srv_parse_agent_check, 0, 1, 1 }, /* Enable agent checks */
+ { "agent-inter", srv_parse_agent_inter, 1, 1, 1 }, /* Set the interval between two agent checks */
+ { "agent-port", srv_parse_agent_port, 1, 1, 1 }, /* Set the TCP port used for agent checks. */
+ { "agent-send", srv_parse_agent_send, 1, 1, 1 }, /* Set string to send to agent. */
+ { "check", srv_parse_check, 0, 1, 1 }, /* Enable health checks */
+ { "check-proto", srv_parse_check_proto, 1, 1, 1 }, /* Set the mux protocol for health checks */
+ { "check-send-proxy", srv_parse_check_send_proxy, 0, 1, 1 }, /* Enable PROXY protocol for health checks */
+ { "check-via-socks4", srv_parse_check_via_socks4, 0, 1, 1 }, /* Enable socks4 proxy for health checks */
+ { "no-agent-check", srv_parse_no_agent_check, 0, 1, 0 }, /* Do not enable any auxiliary agent check */
+ { "no-check", srv_parse_no_check, 0, 1, 0 }, /* Disable health checks */
+ { "no-check-send-proxy", srv_parse_no_check_send_proxy, 0, 1, 0 }, /* Disable PROXY protocol for health checks */
+ { "rise", srv_parse_check_rise, 1, 1, 1 }, /* Set rise value for health checks */
+ { "fall", srv_parse_check_fall, 1, 1, 1 }, /* Set fall value for health checks */
+ { "inter", srv_parse_check_inter, 1, 1, 1 }, /* Set inter value for health checks */
+ { "fastinter", srv_parse_check_fastinter, 1, 1, 1 }, /* Set fastinter value for health checks */
+ { "downinter", srv_parse_check_downinter, 1, 1, 1 }, /* Set downinter value for health checks */
+ { "port", srv_parse_check_port, 1, 1, 1 }, /* Set the TCP port used for health checks. */
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, srv_register_keywords, &srv_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/chunk.c b/src/chunk.c
new file mode 100644
index 0000000..d67e9f1
--- /dev/null
+++ b/src/chunk.c
@@ -0,0 +1,317 @@
+/*
+ * Chunk management functions.
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/chunk.h>
+#include <haproxy/global.h>
+#include <haproxy/tools.h>
+
+/* trash chunks used for various conversions */
+static THREAD_LOCAL struct buffer *trash_chunk;
+static THREAD_LOCAL struct buffer trash_chunk1;
+static THREAD_LOCAL struct buffer trash_chunk2;
+
+/* trash buffers used for various conversions */
+static int trash_size __read_mostly;
+static THREAD_LOCAL char *trash_buf1;
+static THREAD_LOCAL char *trash_buf2;
+
+/* the trash pool for reentrant allocations */
+struct pool_head *pool_head_trash __read_mostly = NULL;
+
+/* this is used to drain data, and as a temporary buffer for sprintf()... */
+THREAD_LOCAL struct buffer trash = { };
+
+/*
+* Returns a pre-allocated and initialized trash chunk that can be used for any
+* type of conversion. Two chunks and their respective buffers are alternatively
+* returned so that it is always possible to iterate data transformations without
+* losing the data being transformed. The blocks are initialized to the size of
+* a standard buffer, so they should be enough for everything. For convenience,
+* a zero is always emitted at the beginning of the string so that it may be
+* used as an empty string as well.
+*/
+struct buffer *get_trash_chunk(void)
+{
+ char *trash_buf;
+
+ if (trash_chunk == &trash_chunk1) {
+ trash_chunk = &trash_chunk2;
+ trash_buf = trash_buf2;
+ }
+ else {
+ trash_chunk = &trash_chunk1;
+ trash_buf = trash_buf1;
+ }
+ *trash_buf = 0;
+ chunk_init(trash_chunk, trash_buf, trash_size);
+ return trash_chunk;
+}
+
+/* (re)allocates the trash buffers. Returns 0 in case of failure. It is
+ * possible to call this function multiple times if the trash size changes.
+ */
+static int alloc_trash_buffers(int bufsize)
+{
+ chunk_init(&trash, my_realloc2(trash.area, bufsize), bufsize);
+ trash_size = bufsize;
+ trash_buf1 = (char *)my_realloc2(trash_buf1, bufsize);
+ trash_buf2 = (char *)my_realloc2(trash_buf2, bufsize);
+ return trash.area && trash_buf1 && trash_buf2;
+}
+
+static int alloc_trash_buffers_per_thread()
+{
+ return alloc_trash_buffers(global.tune.bufsize);
+}
+
+static void free_trash_buffers_per_thread()
+{
+ chunk_destroy(&trash);
+ ha_free(&trash_buf2);
+ ha_free(&trash_buf1);
+}
+
+/* Initialize the trash buffers. It returns 0 if an error occurred. */
+int init_trash_buffers(int first)
+{
+ pool_destroy(pool_head_trash);
+ pool_head_trash = create_pool("trash",
+ sizeof(struct buffer) + global.tune.bufsize,
+ MEM_F_EXACT);
+ if (!pool_head_trash || !alloc_trash_buffers(global.tune.bufsize))
+ return 0;
+ return 1;
+}
+
+/*
+ * Allocate a trash chunk from the reentrant pool. The buffer starts at the
+ * end of the chunk. This chunk must be freed using free_trash_chunk(). This
+ * call may fail and the caller is responsible for checking that the returned
+ * pointer is not NULL.
+ */
+struct buffer *alloc_trash_chunk(void)
+{
+ struct buffer *chunk;
+
+ chunk = pool_alloc(pool_head_trash);
+ if (chunk) {
+ char *buf = (char *)chunk + sizeof(struct buffer);
+ *buf = 0;
+ chunk_init(chunk, buf,
+ pool_head_trash->size - sizeof(struct buffer));
+ }
+ return chunk;
+}
+
+/*
+ * Does an snprintf() at the beginning of chunk <chk>, respecting the limit of
+ * at most chk->size chars. If the chk->len is over, nothing is added. Returns
+ * the new chunk size, or < 0 in case of failure.
+ */
+int chunk_printf(struct buffer *chk, const char *fmt, ...)
+{
+ va_list argp;
+ int ret;
+
+ if (!chk->area || !chk->size)
+ return 0;
+
+ va_start(argp, fmt);
+ ret = vsnprintf(chk->area, chk->size, fmt, argp);
+ va_end(argp);
+
+ if (ret >= chk->size)
+ return -1;
+
+ chk->data = ret;
+ return chk->data;
+}
+
+/*
+ * Does an snprintf() at the end of chunk <chk>, respecting the limit of
+ * at most chk->size chars. If the chk->len is over, nothing is added. Returns
+ * the new chunk size.
+ */
+int chunk_appendf(struct buffer *chk, const char *fmt, ...)
+{
+ va_list argp;
+ int ret;
+
+ if (!chk->area || !chk->size)
+ return 0;
+
+ va_start(argp, fmt);
+ ret = vsnprintf(chk->area + chk->data, chk->size - chk->data, fmt,
+ argp);
+ if (ret >= chk->size - chk->data)
+ /* do not copy anything in case of truncation */
+ chk->area[chk->data] = 0;
+ else
+ chk->data += ret;
+ va_end(argp);
+ return chk->data;
+}
+
+/*
+ * Encode chunk <src> into chunk <dst>, respecting the limit of at most
+ * chk->size chars. Replace non-printable or special characters with "&#%d;".
+ * If the chk->len is over, nothing is added. Returns the new chunk size.
+ */
+int chunk_htmlencode(struct buffer *dst, struct buffer *src)
+{
+ int i, l;
+ int olen, free;
+ char c;
+
+ olen = dst->data;
+
+ for (i = 0; i < src->data; i++) {
+ free = dst->size - dst->data;
+
+ if (!free) {
+ dst->data = olen;
+ return dst->data;
+ }
+
+ c = src->area[i];
+
+ if (!isascii((unsigned char)c) || !isprint((unsigned char)c) || c == '&' || c == '"' || c == '\'' || c == '<' || c == '>') {
+ l = snprintf(dst->area + dst->data, free, "&#%u;",
+ (unsigned char)c);
+
+ if (free < l) {
+ dst->data = olen;
+ return dst->data;
+ }
+
+ dst->data += l;
+ } else {
+ dst->area[dst->data] = c;
+ dst->data++;
+ }
+ }
+
+ return dst->data;
+}
+
+/*
+ * Encode chunk <src> into chunk <dst>, respecting the limit of at most
+ * chk->size chars. Replace non-printable or char passed in qc with "<%02X>".
+ * If the chk->len is over, nothing is added. Returns the new chunk size.
+ */
+int chunk_asciiencode(struct buffer *dst, struct buffer *src, char qc)
+{
+ int i, l;
+ int olen, free;
+ char c;
+
+ olen = dst->data;
+
+ for (i = 0; i < src->data; i++) {
+ free = dst->size - dst->data;
+
+ if (!free) {
+ dst->data = olen;
+ return dst->data;
+ }
+
+ c = src->area[i];
+
+ if (!isascii((unsigned char)c) || !isprint((unsigned char)c) || c == '<' || c == '>' || c == qc) {
+ l = snprintf(dst->area + dst->data, free, "<%02X>",
+ (unsigned char)c);
+
+ if (free < l) {
+ dst->data = olen;
+ return dst->data;
+ }
+
+ dst->data += l;
+ } else {
+ dst->area[dst->data] = c;
+ dst->data++;
+ }
+ }
+
+ return dst->data;
+}
+
+/* Compares the string in chunk <chk> with the string in <str> which must be
+ * zero-terminated. Return is the same as with strcmp(). Neither is allowed
+ * to be null.
+ */
+int chunk_strcmp(const struct buffer *chk, const char *str)
+{
+ const char *s1 = chk->area;
+ int len = chk->data;
+ int diff = 0;
+
+ do {
+ if (--len < 0) {
+ diff = (unsigned char)0 - (unsigned char)*str;
+ break;
+ }
+ diff = (unsigned char)*(s1++) - (unsigned char)*(str++);
+ } while (!diff);
+ return diff;
+}
+
+/* Case-insensitively compares the string in chunk <chk> with the string in
+ * <str> which must be zero-terminated. Return is the same as with strcmp().
+ * Neither is allowed to be null.
+ */
+int chunk_strcasecmp(const struct buffer *chk, const char *str)
+{
+ const char *s1 = chk->area;
+ int len = chk->data;
+ int diff = 0;
+
+ do {
+ if (--len < 0) {
+ diff = (unsigned char)0 - (unsigned char)*str;
+ break;
+ }
+ diff = (unsigned char)*s1 - (unsigned char)*str;
+ if (unlikely(diff)) {
+ unsigned int l = (unsigned char)*s1;
+ unsigned int r = (unsigned char)*str;
+
+ l -= 'a';
+ r -= 'a';
+
+ if (likely(l <= (unsigned char)'z' - 'a'))
+ l -= 'a' - 'A';
+ if (likely(r <= (unsigned char)'z' - 'a'))
+ r -= 'a' - 'A';
+ diff = l - r;
+ }
+ s1++; str++;
+ } while (!diff);
+ return diff;
+}
+
+REGISTER_PER_THREAD_ALLOC(alloc_trash_buffers_per_thread);
+REGISTER_PER_THREAD_FREE(free_trash_buffers_per_thread);
+REGISTER_POST_DEINIT(free_trash_buffers_per_thread);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/cli.c b/src/cli.c
new file mode 100644
index 0000000..1dc37f6
--- /dev/null
+++ b/src/cli.c
@@ -0,0 +1,3210 @@
+/*
+ * Functions dedicated to statistics output and the stats socket
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ * Copyright 2007-2009 Krzysztof Piotr Oledzki <ole@ans.pl>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pwd.h>
+#include <grp.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <net/if.h>
+
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/base64.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/cli.h>
+#include <haproxy/compression.h>
+#include <haproxy/dns-t.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/mworker.h>
+#include <haproxy/mworker-t.h>
+#include <haproxy/pattern-t.h>
+#include <haproxy/peers.h>
+#include <haproxy/pipe.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample-t.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/session.h>
+#include <haproxy/sock.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+#define PAYLOAD_PATTERN "<<"
+
+static struct applet cli_applet;
+static struct applet mcli_applet;
+
+static const char cli_permission_denied_msg[] =
+ "Permission denied\n"
+ "";
+
+
+static THREAD_LOCAL char *dynamic_usage_msg = NULL;
+
+/* List head of cli keywords */
+static struct cli_kw_list cli_keywords = {
+ .list = LIST_HEAD_INIT(cli_keywords.list)
+};
+
+extern const char *stat_status_codes[];
+
+struct proxy *mworker_proxy; /* CLI proxy of the master */
+
+/* CLI context for the "show env" command */
+struct show_env_ctx {
+ char **var; /* first variable to show */
+ int show_one; /* stop after showing the first one */
+};
+
+/* CLI context for the "show fd" command */
+struct show_fd_ctx {
+ int fd; /* first FD to show */
+ int show_one; /* stop after showing one FD */
+};
+
+/* CLI context for the "show cli sockets" command */
+struct show_sock_ctx {
+ struct bind_conf *bind_conf;
+ struct listener *listener;
+};
+
+static int cmp_kw_entries(const void *a, const void *b)
+{
+ const struct cli_kw *l = *(const struct cli_kw **)a;
+ const struct cli_kw *r = *(const struct cli_kw **)b;
+
+ return strcmp(l->usage ? l->usage : "", r->usage ? r->usage : "");
+}
+
+/* This will show the help message and list the commands supported at the
+ * current level that match all of the first words of <args> if args is not
+ * NULL, or all args if none matches or if args is null.
+ */
+static char *cli_gen_usage_msg(struct appctx *appctx, char * const *args)
+{
+ struct cli_kw *entries[CLI_MAX_HELP_ENTRIES];
+ struct cli_kw_list *kw_list;
+ struct cli_kw *kw;
+ struct buffer *tmp = get_trash_chunk();
+ struct buffer out;
+ struct { struct cli_kw *kw; int dist; } matches[CLI_MAX_MATCHES], swp;
+ int idx;
+ int ishelp = 0;
+ int length = 0;
+ int help_entries = 0;
+
+ ha_free(&dynamic_usage_msg);
+
+ if (args && *args && strcmp(*args, "help") == 0) {
+ args++;
+ ishelp = 1;
+ }
+
+ /* first, let's measure the longest match */
+ list_for_each_entry(kw_list, &cli_keywords.list, list) {
+ for (kw = &kw_list->kw[0]; kw->str_kw[0]; kw++) {
+ if (kw->level & ~appctx->cli_level & (ACCESS_MASTER_ONLY|ACCESS_EXPERT|ACCESS_EXPERIMENTAL))
+ continue;
+ if (!(appctx->cli_level & ACCESS_MCLI_DEBUG) &&
+ (appctx->cli_level & ~kw->level & (ACCESS_MASTER_ONLY|ACCESS_MASTER)) ==
+ (ACCESS_MASTER_ONLY|ACCESS_MASTER))
+ continue;
+
+ /* OK this command is visible */
+ for (idx = 0; idx < CLI_PREFIX_KW_NB; idx++) {
+ if (!kw->str_kw[idx])
+ break; // end of keyword
+ if (!args || !args[idx] || !*args[idx])
+ break; // end of command line
+ if (strcmp(kw->str_kw[idx], args[idx]) != 0)
+ break;
+ if (idx + 1 > length)
+ length = idx + 1;
+ }
+ }
+ }
+
+ /* now <length> equals the number of exactly matching words */
+ chunk_reset(tmp);
+ if (ishelp) // this is the help message.
+ chunk_strcat(tmp, "The following commands are valid at this level:\n");
+ else if (!length && (!args || !*args || !**args)) // no match
+ chunk_strcat(tmp, "Unknown command. Please enter one of the following commands only:\n");
+ else // partial match
+ chunk_strcat(tmp, "Unknown command, but maybe one of the following ones is a better match:\n");
+
+ for (idx = 0; idx < CLI_MAX_MATCHES; idx++) {
+ matches[idx].kw = NULL;
+ matches[idx].dist = INT_MAX;
+ }
+
+ /* In case of partial match we'll look for the best matching entries
+ * starting from position <length>
+ */
+ if (args && args[length] && *args[length]) {
+ list_for_each_entry(kw_list, &cli_keywords.list, list) {
+ for (kw = &kw_list->kw[0]; kw->str_kw[0]; kw++) {
+ if (kw->level & ~appctx->cli_level & (ACCESS_MASTER_ONLY|ACCESS_EXPERT|ACCESS_EXPERIMENTAL))
+ continue;
+ if (!(appctx->cli_level & ACCESS_MCLI_DEBUG) &&
+ ((appctx->cli_level & ~kw->level & (ACCESS_MASTER_ONLY|ACCESS_MASTER)) ==
+ (ACCESS_MASTER_ONLY|ACCESS_MASTER)))
+ continue;
+
+ for (idx = 0; idx < length; idx++) {
+ if (!kw->str_kw[idx])
+ break; // end of keyword
+ if (!args || !args[idx] || !*args[idx])
+ break; // end of command line
+ if (strcmp(kw->str_kw[idx], args[idx]) != 0)
+ break;
+ }
+
+ /* extra non-matching words are fuzzy-matched */
+ if (kw->usage && idx == length && args[idx] && *args[idx]) {
+ uint8_t word_sig[1024];
+ uint8_t list_sig[1024];
+ int dist = 0;
+ int totlen = 0;
+ int i;
+
+ /* this one matches, let's compute the distance between the two
+ * on the remaining words. For this we're computing the signature
+ * of everything that remains and the cumulated length of the
+ * strings.
+ */
+ memset(word_sig, 0, sizeof(word_sig));
+ for (i = idx; i < CLI_PREFIX_KW_NB && args[i] && *args[i]; i++) {
+ update_word_fingerprint(word_sig, args[i]);
+ totlen += strlen(args[i]);
+ }
+
+ memset(list_sig, 0, sizeof(list_sig));
+ for (i = idx; i < CLI_PREFIX_KW_NB && kw->str_kw[i]; i++) {
+ update_word_fingerprint(list_sig, kw->str_kw[i]);
+ totlen += strlen(kw->str_kw[i]);
+ }
+
+ dist = word_fingerprint_distance(word_sig, list_sig);
+
+ /* insert this one at its place if relevant, in order to keep only
+ * the best matches.
+ */
+ swp.kw = kw; swp.dist = dist;
+ if (dist < 5*totlen/2 && dist < matches[CLI_MAX_MATCHES-1].dist) {
+ matches[CLI_MAX_MATCHES-1] = swp;
+ for (idx = CLI_MAX_MATCHES - 1; --idx >= 0;) {
+ if (matches[idx+1].dist >= matches[idx].dist)
+ break;
+ matches[idx+1] = matches[idx];
+ matches[idx] = swp;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (matches[0].kw) {
+ /* we have fuzzy matches, let's propose them */
+ for (idx = 0; idx < CLI_MAX_MATCHES; idx++) {
+ kw = matches[idx].kw;
+ if (!kw)
+ break;
+
+ /* stop the dump if some words look very unlikely candidates */
+ if (matches[idx].dist > 5*matches[0].dist/2)
+ break;
+
+ if (help_entries < CLI_MAX_HELP_ENTRIES)
+ entries[help_entries++] = kw;
+ }
+ }
+
+ list_for_each_entry(kw_list, &cli_keywords.list, list) {
+ /* no full dump if we've already found nice candidates */
+ if (matches[0].kw)
+ break;
+
+ for (kw = &kw_list->kw[0]; kw->str_kw[0]; kw++) {
+
+ /* in a worker or normal process, don't display master-only commands
+ * nor expert/experimental mode commands if not in this mode.
+ */
+ if (kw->level & ~appctx->cli_level & (ACCESS_MASTER_ONLY|ACCESS_EXPERT|ACCESS_EXPERIMENTAL))
+ continue;
+
+ /* in master, if the CLI don't have the
+ * ACCESS_MCLI_DEBUG don't display commands that have
+ * neither the master bit nor the master-only bit.
+ */
+ if (!(appctx->cli_level & ACCESS_MCLI_DEBUG) &&
+ ((appctx->cli_level & ~kw->level & (ACCESS_MASTER_ONLY|ACCESS_MASTER)) ==
+ (ACCESS_MASTER_ONLY|ACCESS_MASTER)))
+ continue;
+
+ for (idx = 0; idx < length; idx++) {
+ if (!kw->str_kw[idx])
+ break; // end of keyword
+ if (!args || !args[idx] || !*args[idx])
+ break; // end of command line
+ if (strcmp(kw->str_kw[idx], args[idx]) != 0)
+ break;
+ }
+
+ if (kw->usage && idx == length && help_entries < CLI_MAX_HELP_ENTRIES)
+ entries[help_entries++] = kw;
+ }
+ }
+
+ qsort(entries, help_entries, sizeof(*entries), cmp_kw_entries);
+
+ for (idx = 0; idx < help_entries; idx++)
+ chunk_appendf(tmp, " %s\n", entries[idx]->usage);
+
+ /* always show the prompt/help/quit commands */
+ chunk_strcat(tmp,
+ " help [<command>] : list matching or all commands\n"
+ " prompt : toggle interactive mode with prompt\n"
+ " quit : disconnect\n");
+
+ chunk_init(&out, NULL, 0);
+ chunk_dup(&out, tmp);
+ dynamic_usage_msg = out.area;
+
+ cli_msg(appctx, LOG_INFO, dynamic_usage_msg);
+ return dynamic_usage_msg;
+}
+
+struct cli_kw* cli_find_kw(char **args)
+{
+ struct cli_kw_list *kw_list;
+ struct cli_kw *kw;/* current cli_kw */
+ char **tmp_args;
+ const char **tmp_str_kw;
+ int found = 0;
+
+ if (LIST_ISEMPTY(&cli_keywords.list))
+ return NULL;
+
+ list_for_each_entry(kw_list, &cli_keywords.list, list) {
+ kw = &kw_list->kw[0];
+ while (*kw->str_kw) {
+ tmp_args = args;
+ tmp_str_kw = kw->str_kw;
+ while (*tmp_str_kw) {
+ if (strcmp(*tmp_str_kw, *tmp_args) == 0) {
+ found = 1;
+ } else {
+ found = 0;
+ break;
+ }
+ tmp_args++;
+ tmp_str_kw++;
+ }
+ if (found)
+ return (kw);
+ kw++;
+ }
+ }
+ return NULL;
+}
+
+struct cli_kw* cli_find_kw_exact(char **args)
+{
+ struct cli_kw_list *kw_list;
+ int found = 0;
+ int i;
+ int j;
+
+ if (LIST_ISEMPTY(&cli_keywords.list))
+ return NULL;
+
+ list_for_each_entry(kw_list, &cli_keywords.list, list) {
+ for (i = 0; kw_list->kw[i].str_kw[0]; i++) {
+ found = 1;
+ for (j = 0; j < CLI_PREFIX_KW_NB; j++) {
+ if (args[j] == NULL && kw_list->kw[i].str_kw[j] == NULL) {
+ break;
+ }
+ if (args[j] == NULL || kw_list->kw[i].str_kw[j] == NULL) {
+ found = 0;
+ break;
+ }
+ if (strcmp(args[j], kw_list->kw[i].str_kw[j]) != 0) {
+ found = 0;
+ break;
+ }
+ }
+ if (found)
+ return &kw_list->kw[i];
+ }
+ }
+ return NULL;
+}
+
+void cli_register_kw(struct cli_kw_list *kw_list)
+{
+ LIST_APPEND(&cli_keywords.list, &kw_list->list);
+}
+
+/* list all known keywords on stdout, one per line */
+void cli_list_keywords(void)
+{
+ struct cli_kw_list *kw_list;
+ struct cli_kw *kwp, *kwn, *kw;
+ int idx;
+
+ for (kwn = kwp = NULL;; kwp = kwn) {
+ list_for_each_entry(kw_list, &cli_keywords.list, list) {
+ /* note: we sort based on the usage message when available,
+ * otherwise we fall back to the first keyword.
+ */
+ for (kw = &kw_list->kw[0]; kw->str_kw[0]; kw++) {
+ if (strordered(kwp ? kwp->usage ? kwp->usage : kwp->str_kw[0] : NULL,
+ kw->usage ? kw->usage : kw->str_kw[0],
+ kwn != kwp ? kwn->usage ? kwn->usage : kwn->str_kw[0] : NULL))
+ kwn = kw;
+ }
+ }
+
+ if (kwn == kwp)
+ break;
+
+ for (idx = 0; kwn->str_kw[idx]; idx++) {
+ printf("%s ", kwn->str_kw[idx]);
+ }
+ if (kwn->level & (ACCESS_MASTER_ONLY|ACCESS_MASTER))
+ printf("[MASTER] ");
+ if (!(kwn->level & ACCESS_MASTER_ONLY))
+ printf("[WORKER] ");
+ if (kwn->level & ACCESS_EXPERT)
+ printf("[EXPERT] ");
+ if (kwn->level & ACCESS_EXPERIMENTAL)
+ printf("[EXPERIM] ");
+ printf("\n");
+ }
+}
+
+/* allocate a new stats frontend named <name>, and return it
+ * (or NULL in case of lack of memory).
+ */
+static struct proxy *cli_alloc_fe(const char *name, const char *file, int line)
+{
+ struct proxy *fe;
+
+ fe = calloc(1, sizeof(*fe));
+ if (!fe)
+ return NULL;
+
+ init_new_proxy(fe);
+ fe->next = proxies_list;
+ proxies_list = fe;
+ fe->last_change = now.tv_sec;
+ fe->id = strdup("GLOBAL");
+ fe->cap = PR_CAP_FE|PR_CAP_INT;
+ fe->maxconn = 10; /* default to 10 concurrent connections */
+ fe->timeout.client = MS_TO_TICKS(10000); /* default timeout of 10 seconds */
+ fe->conf.file = strdup(file);
+ fe->conf.line = line;
+ fe->accept = frontend_accept;
+ fe->default_target = &cli_applet.obj_type;
+
+ /* the stats frontend is the only one able to assign ID #0 */
+ fe->conf.id.key = fe->uuid = 0;
+ eb32_insert(&used_proxy_id, &fe->conf.id);
+ return fe;
+}
+
+/* This function parses a "stats" statement in the "global" section. It returns
+ * -1 if there is any error, otherwise zero. If it returns -1, it will write an
+ * error message into the <err> buffer which will be preallocated. The trailing
+ * '\n' must not be written. The function must be called with <args> pointing to
+ * the first word after "stats".
+ */
+static int cli_parse_global(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ struct bind_conf *bind_conf;
+ struct listener *l;
+
+ if (strcmp(args[1], "socket") == 0) {
+ int cur_arg;
+
+ if (*args[2] == 0) {
+ memprintf(err, "'%s %s' in global section expects an address or a path to a UNIX socket", args[0], args[1]);
+ return -1;
+ }
+
+ if (!global.cli_fe) {
+ if ((global.cli_fe = cli_alloc_fe("GLOBAL", file, line)) == NULL) {
+ memprintf(err, "'%s %s' : out of memory trying to allocate a frontend", args[0], args[1]);
+ return -1;
+ }
+ }
+
+ bind_conf = bind_conf_alloc(global.cli_fe, file, line, args[2], xprt_get(XPRT_RAW));
+ if (!bind_conf) {
+ memprintf(err, "'%s %s' : out of memory trying to allocate a bind_conf", args[0], args[1]);
+ return -1;
+ }
+ bind_conf->level &= ~ACCESS_LVL_MASK;
+ bind_conf->level |= ACCESS_LVL_OPER; /* default access level */
+
+ if (!str2listener(args[2], global.cli_fe, bind_conf, file, line, err)) {
+ memprintf(err, "parsing [%s:%d] : '%s %s' : %s\n",
+ file, line, args[0], args[1], err && *err ? *err : "error");
+ return -1;
+ }
+
+ cur_arg = 3;
+ while (*args[cur_arg]) {
+ struct bind_kw *kw;
+ const char *best;
+ int code;
+
+ kw = bind_find_kw(args[cur_arg]);
+ if (kw) {
+ if (!kw->parse) {
+ memprintf(err, "'%s %s' : '%s' option is not implemented in this version (check build options).",
+ args[0], args[1], args[cur_arg]);
+ return -1;
+ }
+
+ code = kw->parse(args, cur_arg, global.cli_fe, bind_conf, err);
+
+ /* FIXME: this is ugly, we don't have a way to collect warnings,
+ * yet some important bind keywords may report warnings that we
+ * must display.
+ */
+ if (((code & (ERR_WARN|ERR_FATAL|ERR_ALERT)) == ERR_WARN) && err && *err) {
+ indent_msg(err, 2);
+ ha_warning("parsing [%s:%d] : '%s %s' : %s\n", file, line, args[0], args[1], *err);
+ ha_free(err);
+ }
+
+ if (code & ~ERR_WARN) {
+ if (err && *err)
+ memprintf(err, "'%s %s' : '%s'", args[0], args[1], *err);
+ else
+ memprintf(err, "'%s %s' : error encountered while processing '%s'",
+ args[0], args[1], args[cur_arg]);
+ return -1;
+ }
+
+ cur_arg += 1 + kw->skip;
+ continue;
+ }
+
+ best = bind_find_best_kw(args[cur_arg]);
+ if (best)
+ memprintf(err, "'%s %s' : unknown keyword '%s'. Did you mean '%s' maybe ?",
+ args[0], args[1], args[cur_arg], best);
+ else
+ memprintf(err, "'%s %s' : unknown keyword '%s'.",
+ args[0], args[1], args[cur_arg]);
+ return -1;
+ }
+
+ list_for_each_entry(l, &bind_conf->listeners, by_bind) {
+ l->accept = session_accept_fd;
+ l->default_target = global.cli_fe->default_target;
+ l->options |= LI_O_UNLIMITED; /* don't make the peers subject to global limits */
+ l->nice = -64; /* we want to boost priority for local stats */
+ global.maxsock++; /* for the listening socket */
+ }
+ }
+ else if (strcmp(args[1], "timeout") == 0) {
+ unsigned timeout;
+ const char *res = parse_time_err(args[2], &timeout, TIME_UNIT_MS);
+
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s %s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[2], args[0], args[1]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s %s' (minimum non-null value is 1 ms)",
+ args[2], args[0], args[1]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "'%s %s' : unexpected character '%c'", args[0], args[1], *res);
+ return -1;
+ }
+
+ if (!timeout) {
+ memprintf(err, "'%s %s' expects a positive value", args[0], args[1]);
+ return -1;
+ }
+ if (!global.cli_fe) {
+ if ((global.cli_fe = cli_alloc_fe("GLOBAL", file, line)) == NULL) {
+ memprintf(err, "'%s %s' : out of memory trying to allocate a frontend", args[0], args[1]);
+ return -1;
+ }
+ }
+ global.cli_fe->timeout.client = MS_TO_TICKS(timeout);
+ }
+ else if (strcmp(args[1], "maxconn") == 0) {
+ int maxconn = atol(args[2]);
+
+ if (maxconn <= 0) {
+ memprintf(err, "'%s %s' expects a positive value", args[0], args[1]);
+ return -1;
+ }
+
+ if (!global.cli_fe) {
+ if ((global.cli_fe = cli_alloc_fe("GLOBAL", file, line)) == NULL) {
+ memprintf(err, "'%s %s' : out of memory trying to allocate a frontend", args[0], args[1]);
+ return -1;
+ }
+ }
+ global.cli_fe->maxconn = maxconn;
+ }
+ else if (strcmp(args[1], "bind-process") == 0) { /* enable the socket only on some processes */
+ int cur_arg = 2;
+ unsigned long set = 0;
+
+ if (!global.cli_fe) {
+ if ((global.cli_fe = cli_alloc_fe("GLOBAL", file, line)) == NULL) {
+ memprintf(err, "'%s %s' : out of memory trying to allocate a frontend", args[0], args[1]);
+ return -1;
+ }
+ }
+
+ while (*args[cur_arg]) {
+ if (strcmp(args[cur_arg], "all") == 0) {
+ set = 0;
+ break;
+ }
+ if (parse_process_number(args[cur_arg], &set, 1, NULL, err)) {
+ memprintf(err, "'%s %s' : %s", args[0], args[1], *err);
+ return -1;
+ }
+ cur_arg++;
+ }
+ }
+ else {
+ memprintf(err, "'%s' only supports 'socket', 'maxconn', 'bind-process' and 'timeout' (got '%s')", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * This function exports the bound addresses of a <frontend> in the environment
+ * variable <varname>. Those addresses are separated by semicolons and prefixed
+ * with their type (abns@, unix@, sockpair@ etc)
+ * Return -1 upon error, 0 otherwise
+ */
+int listeners_setenv(struct proxy *frontend, const char *varname)
+{
+ struct buffer *trash = get_trash_chunk();
+ struct bind_conf *bind_conf;
+
+ if (frontend) {
+ list_for_each_entry(bind_conf, &frontend->conf.bind, by_fe) {
+ struct listener *l;
+
+ list_for_each_entry(l, &bind_conf->listeners, by_bind) {
+ char addr[46];
+ char port[6];
+
+ /* separate listener by semicolons */
+ if (trash->data)
+ chunk_appendf(trash, ";");
+
+ if (l->rx.addr.ss_family == AF_UNIX) {
+ const struct sockaddr_un *un;
+
+ un = (struct sockaddr_un *)&l->rx.addr;
+ if (un->sun_path[0] == '\0') {
+ chunk_appendf(trash, "abns@%s", un->sun_path+1);
+ } else {
+ chunk_appendf(trash, "unix@%s", un->sun_path);
+ }
+ } else if (l->rx.addr.ss_family == AF_INET) {
+ addr_to_str(&l->rx.addr, addr, sizeof(addr));
+ port_to_str(&l->rx.addr, port, sizeof(port));
+ chunk_appendf(trash, "ipv4@%s:%s", addr, port);
+ } else if (l->rx.addr.ss_family == AF_INET6) {
+ addr_to_str(&l->rx.addr, addr, sizeof(addr));
+ port_to_str(&l->rx.addr, port, sizeof(port));
+ chunk_appendf(trash, "ipv6@[%s]:%s", addr, port);
+ } else if (l->rx.addr.ss_family == AF_CUST_SOCKPAIR) {
+ chunk_appendf(trash, "sockpair@%d", ((struct sockaddr_in *)&l->rx.addr)->sin_addr.s_addr);
+ }
+ }
+ }
+ trash->area[trash->data++] = '\0';
+ if (setenv(varname, trash->area, 1) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+int cli_socket_setenv()
+{
+ if (listeners_setenv(global.cli_fe, "HAPROXY_CLI") < 0)
+ return -1;
+ if (listeners_setenv(mworker_proxy, "HAPROXY_MASTER_CLI") < 0)
+ return -1;
+
+ return 0;
+}
+
+REGISTER_CONFIG_POSTPARSER("cli", cli_socket_setenv);
+
+/* Verifies that the CLI at least has a level at least as high as <level>
+ * (typically ACCESS_LVL_ADMIN). Returns 1 if OK, otherwise 0. In case of
+ * failure, an error message is prepared and the appctx's state is adjusted
+ * to print it so that a return 1 is enough to abort any processing.
+ */
+int cli_has_level(struct appctx *appctx, int level)
+{
+
+ if ((appctx->cli_level & ACCESS_LVL_MASK) < level) {
+ cli_err(appctx, cli_permission_denied_msg);
+ return 0;
+ }
+ return 1;
+}
+
+/* same as cli_has_level but for the CLI proxy and without error message */
+int pcli_has_level(struct stream *s, int level)
+{
+ if ((s->pcli_flags & ACCESS_LVL_MASK) < level) {
+ return 0;
+ }
+ return 1;
+}
+
+/* Returns severity_output for the current session if set, or default for the socket */
+static int cli_get_severity_output(struct appctx *appctx)
+{
+ if (appctx->cli_severity_output)
+ return appctx->cli_severity_output;
+ return strm_li(appctx_strm(appctx))->bind_conf->severity_output;
+}
+
+/* Processes the CLI interpreter on the stats socket. This function is called
+ * from the CLI's IO handler running in an appctx context. The function returns
+ * 1 if the request was understood, otherwise zero (in which case an error
+ * message will be displayed). It is called with appctx->st0
+ * set to CLI_ST_GETREQ and presets ->st2 to 0 so that parsers don't have to do
+ * it. It will possilbly leave st0 to CLI_ST_CALLBACK if the keyword needs to
+ * have its own I/O handler called again. Most of the time, parsers will only
+ * set st0 to CLI_ST_PRINT and put their message to be displayed into cli.msg.
+ * If a keyword parser is NULL and an I/O handler is declared, the I/O handler
+ * will automatically be used.
+ */
+static int cli_parse_request(struct appctx *appctx)
+{
+ char *args[MAX_CLI_ARGS + 1], *p, *end, *payload = NULL;
+ int i = 0;
+ struct cli_kw *kw;
+
+ appctx->_st2 = 0;
+
+ /* temporary for 2.6: let's make sure we clean the whole shared
+ * context.
+ */
+ if (sizeof(appctx->ctx) > sizeof(appctx->svc))
+ memset(&appctx->ctx, 0, sizeof(appctx->ctx));
+ else
+ memset(&appctx->svc, 0, sizeof(appctx->svc));
+
+ p = appctx->chunk->area;
+ end = p + appctx->chunk->data;
+
+ /*
+ * Get pointers on words.
+ * One extra slot is reserved to store a pointer on a null byte.
+ */
+ while (i < MAX_CLI_ARGS && p < end) {
+ int j, k;
+
+ /* skip leading spaces/tabs */
+ p += strspn(p, " \t");
+ if (!*p)
+ break;
+
+ if (strcmp(p, PAYLOAD_PATTERN) == 0) {
+ /* payload pattern recognized here, this is not an arg anymore,
+ * the payload starts at the first byte that follows the zero
+ * after the pattern.
+ */
+ payload = p + strlen(PAYLOAD_PATTERN) + 1;
+ break;
+ }
+
+ args[i] = p;
+ while (1) {
+ p += strcspn(p, " \t\\");
+ /* escaped chars using backlashes (\) */
+ if (*p == '\\') {
+ if (!*++p)
+ break;
+ if (!*++p)
+ break;
+ } else {
+ break;
+ }
+ }
+ *p++ = 0;
+
+ /* unescape backslashes (\) */
+ for (j = 0, k = 0; args[i][k]; k++) {
+ if (args[i][k] == '\\') {
+ if (args[i][k + 1] == '\\')
+ k++;
+ else
+ continue;
+ }
+ args[i][j] = args[i][k];
+ j++;
+ }
+ args[i][j] = 0;
+
+ i++;
+ }
+ /* fill unused slots */
+ p = appctx->chunk->area + appctx->chunk->data;
+ for (; i < MAX_CLI_ARGS + 1; i++)
+ args[i] = p;
+
+ kw = cli_find_kw(args);
+ if (!kw ||
+ (kw->level & ~appctx->cli_level & ACCESS_MASTER_ONLY) ||
+ (!(appctx->cli_level & ACCESS_MCLI_DEBUG) &&
+ (appctx->cli_level & ~kw->level & (ACCESS_MASTER_ONLY|ACCESS_MASTER)) == (ACCESS_MASTER_ONLY|ACCESS_MASTER))) {
+ /* keyword not found in this mode */
+ cli_gen_usage_msg(appctx, args);
+ return 0;
+ }
+
+ /* don't handle expert mode commands if not in this mode. */
+ if (kw->level & ~appctx->cli_level & ACCESS_EXPERT) {
+ cli_err(appctx, "This command is restricted to expert mode only.\n");
+ return 0;
+ }
+
+ if (kw->level & ~appctx->cli_level & ACCESS_EXPERIMENTAL) {
+ cli_err(appctx, "This command is restricted to experimental mode only.\n");
+ return 0;
+ }
+
+ if (kw->level == ACCESS_EXPERT)
+ mark_tainted(TAINTED_CLI_EXPERT_MODE);
+ else if (kw->level == ACCESS_EXPERIMENTAL)
+ mark_tainted(TAINTED_CLI_EXPERIMENTAL_MODE);
+
+ appctx->io_handler = kw->io_handler;
+ appctx->io_release = kw->io_release;
+
+ if (kw->parse && kw->parse(args, payload, appctx, kw->private) != 0)
+ goto fail;
+
+ /* kw->parse could set its own io_handler or io_release handler */
+ if (!appctx->io_handler)
+ goto fail;
+
+ appctx->st0 = CLI_ST_CALLBACK;
+ return 1;
+fail:
+ appctx->io_handler = NULL;
+ appctx->io_release = NULL;
+ return 1;
+}
+
+/* prepends then outputs the argument msg with a syslog-type severity depending on severity_output value */
+static int cli_output_msg(struct channel *chn, const char *msg, int severity, int severity_output)
+{
+ struct buffer *tmp;
+
+ if (likely(severity_output == CLI_SEVERITY_NONE))
+ return ci_putblk(chn, msg, strlen(msg));
+
+ tmp = get_trash_chunk();
+ chunk_reset(tmp);
+
+ if (severity < 0 || severity > 7) {
+ ha_warning("socket command feedback with invalid severity %d", severity);
+ chunk_printf(tmp, "[%d]: ", severity);
+ }
+ else {
+ switch (severity_output) {
+ case CLI_SEVERITY_NUMBER:
+ chunk_printf(tmp, "[%d]: ", severity);
+ break;
+ case CLI_SEVERITY_STRING:
+ chunk_printf(tmp, "[%s]: ", log_levels[severity]);
+ break;
+ default:
+ ha_warning("Unrecognized severity output %d", severity_output);
+ }
+ }
+ chunk_appendf(tmp, "%s", msg);
+
+ return ci_putblk(chn, tmp->area, strlen(tmp->area));
+}
+
+/* This I/O handler runs as an applet embedded in a stream connector. It is
+ * used to processes I/O from/to the stats unix socket. The system relies on a
+ * state machine handling requests and various responses. We read a request,
+ * then we process it and send the response, and we possibly display a prompt.
+ * Then we can read again. The state is stored in appctx->st0 and is one of the
+ * CLI_ST_* constants. appctx->st1 is used to indicate whether prompt is enabled
+ * or not.
+ */
+static void cli_io_handler(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct channel *req = sc_oc(sc);
+ struct channel *res = sc_ic(sc);
+ struct bind_conf *bind_conf = strm_li(__sc_strm(sc))->bind_conf;
+ int reql;
+ int len;
+
+ if (unlikely(sc->state == SC_ST_DIS || sc->state == SC_ST_CLO))
+ goto out;
+
+ /* Check if the input buffer is available. */
+ if (res->buf.size == 0) {
+ /* buf.size==0 means we failed to get a buffer and were
+ * already subscribed to a wait list to get a buffer.
+ */
+ goto out;
+ }
+
+ while (1) {
+ if (appctx->st0 == CLI_ST_INIT) {
+ /* CLI/stats not initialized yet */
+ memset(&appctx->ctx, 0, sizeof(appctx->ctx));
+ /* reset severity to default at init */
+ appctx->cli_severity_output = bind_conf->severity_output;
+ applet_reset_svcctx(appctx);
+ appctx->st0 = CLI_ST_GETREQ;
+ appctx->cli_level = bind_conf->level;
+ }
+ else if (appctx->st0 == CLI_ST_END) {
+ /* Let's close for real now. We just close the request
+ * side, the conditions below will complete if needed.
+ */
+ sc_shutw(sc);
+ free_trash_chunk(appctx->chunk);
+ appctx->chunk = NULL;
+ break;
+ }
+ else if (appctx->st0 == CLI_ST_GETREQ) {
+ char *str;
+
+ /* use a trash chunk to store received data */
+ if (!appctx->chunk) {
+ appctx->chunk = alloc_trash_chunk();
+ if (!appctx->chunk) {
+ appctx->st0 = CLI_ST_END;
+ continue;
+ }
+ }
+
+ str = appctx->chunk->area + appctx->chunk->data;
+
+ /* ensure we have some output room left in the event we
+ * would want to return some info right after parsing.
+ */
+ if (buffer_almost_full(sc_ib(sc))) {
+ sc_need_room(sc);
+ break;
+ }
+
+ /* payload doesn't take escapes nor does it end on semi-colons, so
+ * we use the regular getline. Normal mode however must stop on
+ * LFs and semi-colons that are not prefixed by a backslash. Note
+ * that we reserve one byte at the end to insert a trailing nul byte.
+ */
+
+ if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD)
+ reql = co_getline(sc_oc(sc), str,
+ appctx->chunk->size - appctx->chunk->data - 1);
+ else
+ reql = co_getdelim(sc_oc(sc), str,
+ appctx->chunk->size - appctx->chunk->data - 1,
+ "\n;", '\\');
+
+ if (reql <= 0) { /* closed or EOL not found */
+ if (reql == 0)
+ break;
+ appctx->st0 = CLI_ST_END;
+ continue;
+ }
+
+ if (!(appctx->st1 & APPCTX_CLI_ST1_PAYLOAD)) {
+ /* seek for a possible unescaped semi-colon. If we find
+ * one, we replace it with an LF and skip only this part.
+ */
+ for (len = 0; len < reql; len++) {
+ if (str[len] == '\\') {
+ len++;
+ continue;
+ }
+ if (str[len] == ';') {
+ str[len] = '\n';
+ reql = len + 1;
+ break;
+ }
+ }
+ }
+
+ /* now it is time to check that we have a full line,
+ * remove the trailing \n and possibly \r, then cut the
+ * line.
+ */
+ len = reql - 1;
+ if (str[len] != '\n') {
+ appctx->st0 = CLI_ST_END;
+ continue;
+ }
+
+ if (len && str[len-1] == '\r')
+ len--;
+
+ str[len] = '\0';
+ appctx->chunk->data += len;
+
+ if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) {
+ appctx->chunk->area[appctx->chunk->data] = '\n';
+ appctx->chunk->area[appctx->chunk->data + 1] = 0;
+ appctx->chunk->data++;
+ }
+
+ appctx->st0 = CLI_ST_PROMPT;
+
+ if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) {
+ /* empty line */
+ if (!len) {
+ /* remove the last two \n */
+ appctx->chunk->data -= 2;
+ appctx->chunk->area[appctx->chunk->data] = 0;
+ cli_parse_request(appctx);
+ chunk_reset(appctx->chunk);
+ /* NB: cli_sock_parse_request() may have put
+ * another CLI_ST_O_* into appctx->st0.
+ */
+
+ appctx->st1 &= ~APPCTX_CLI_ST1_PAYLOAD;
+ }
+ }
+ else {
+ /*
+ * Look for the "payload start" pattern at the end of a line
+ * Its location is not remembered here, this is just to switch
+ * to a gathering mode.
+ */
+ if (strcmp(appctx->chunk->area + appctx->chunk->data - strlen(PAYLOAD_PATTERN), PAYLOAD_PATTERN) == 0) {
+ appctx->st1 |= APPCTX_CLI_ST1_PAYLOAD;
+ appctx->chunk->data++; // keep the trailing \0 after '<<'
+ }
+ else {
+ /* no payload, the command is complete: parse the request */
+ cli_parse_request(appctx);
+ chunk_reset(appctx->chunk);
+ }
+ }
+
+ /* re-adjust req buffer */
+ co_skip(sc_oc(sc), reql);
+ req->flags |= CF_READ_DONTWAIT; /* we plan to read small requests */
+ }
+ else { /* output functions */
+ struct cli_print_ctx *ctx;
+ const char *msg;
+ int sev;
+
+ switch (appctx->st0) {
+ case CLI_ST_PROMPT:
+ break;
+ case CLI_ST_PRINT: /* print const message in msg */
+ case CLI_ST_PRINT_ERR: /* print const error in msg */
+ case CLI_ST_PRINT_DYN: /* print dyn message in msg, free */
+ case CLI_ST_PRINT_FREE: /* print dyn error in err, free */
+ /* the message is in the svcctx */
+ ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ if (appctx->st0 == CLI_ST_PRINT || appctx->st0 == CLI_ST_PRINT_ERR) {
+ sev = appctx->st0 == CLI_ST_PRINT_ERR ?
+ LOG_ERR : ctx->severity;
+ msg = ctx->msg;
+ }
+ else if (appctx->st0 == CLI_ST_PRINT_DYN || appctx->st0 == CLI_ST_PRINT_FREE) {
+ sev = appctx->st0 == CLI_ST_PRINT_FREE ?
+ LOG_ERR : ctx->severity;
+ msg = ctx->err;
+ if (!msg) {
+ sev = LOG_ERR;
+ msg = "Out of memory.\n";
+ }
+ }
+ else {
+ sev = LOG_ERR;
+ msg = "Internal error.\n";
+ }
+
+ if (cli_output_msg(res, msg, sev, cli_get_severity_output(appctx)) != -1) {
+ if (appctx->st0 == CLI_ST_PRINT_FREE ||
+ appctx->st0 == CLI_ST_PRINT_DYN) {
+ ha_free(&ctx->err);
+ }
+ appctx->st0 = CLI_ST_PROMPT;
+ }
+ else
+ sc_need_room(sc);
+ break;
+
+ case CLI_ST_CALLBACK: /* use custom pointer */
+ if (appctx->io_handler)
+ if (appctx->io_handler(appctx)) {
+ appctx->st0 = CLI_ST_PROMPT;
+ if (appctx->io_release) {
+ appctx->io_release(appctx);
+ appctx->io_release = NULL;
+ }
+ }
+ break;
+ default: /* abnormal state */
+ se_fl_set(appctx->sedesc, SE_FL_ERROR);
+ break;
+ }
+
+ /* The post-command prompt is either LF alone or LF + '> ' in interactive mode */
+ if (appctx->st0 == CLI_ST_PROMPT) {
+ const char *prompt = "";
+
+ if (appctx->st1 & APPCTX_CLI_ST1_PROMPT) {
+ /*
+ * when entering a payload with interactive mode, change the prompt
+ * to emphasize that more data can still be sent
+ */
+ if (appctx->chunk->data && appctx->st1 & APPCTX_CLI_ST1_PAYLOAD)
+ prompt = "+ ";
+ else
+ prompt = "\n> ";
+ }
+ else {
+ if (!(appctx->st1 & (APPCTX_CLI_ST1_PAYLOAD|APPCTX_CLI_ST1_NOLF)))
+ prompt = "\n";
+ }
+
+ if (applet_putstr(appctx, prompt) != -1) {
+ applet_reset_svcctx(appctx);
+ appctx->st0 = CLI_ST_GETREQ;
+ }
+ }
+
+ /* If the output functions are still there, it means they require more room. */
+ if (appctx->st0 >= CLI_ST_OUTPUT) {
+ applet_wont_consume(appctx);
+ break;
+ }
+
+ /* Now we close the output if one of the writers did so,
+ * or if we're not in interactive mode and the request
+ * buffer is empty. This still allows pipelined requests
+ * to be sent in non-interactive mode.
+ */
+ if (((res->flags & (CF_SHUTW|CF_SHUTW_NOW))) ||
+ (!(appctx->st1 & APPCTX_CLI_ST1_PROMPT) && !co_data(req) && (!(appctx->st1 & APPCTX_CLI_ST1_PAYLOAD)))) {
+ appctx->st0 = CLI_ST_END;
+ continue;
+ }
+
+ /* switch state back to GETREQ to read next requests */
+ applet_reset_svcctx(appctx);
+ appctx->st0 = CLI_ST_GETREQ;
+ applet_will_consume(appctx);
+
+ /* reactivate the \n at the end of the response for the next command */
+ appctx->st1 &= ~APPCTX_CLI_ST1_NOLF;
+
+ /* this forces us to yield between pipelined commands and
+ * avoid extremely long latencies (e.g. "del map" etc). In
+ * addition this increases the likelihood that the stream
+ * refills the buffer with new bytes in non-interactive
+ * mode, avoiding to close on apparently empty commands.
+ */
+ if (co_data(sc_oc(sc))) {
+ appctx_wakeup(appctx);
+ goto out;
+ }
+ }
+ }
+
+ if ((res->flags & CF_SHUTR) && (sc->state == SC_ST_EST)) {
+ DPRINTF(stderr, "%s@%d: sc to buf closed. req=%08x, res=%08x, st=%d\n",
+ __FUNCTION__, __LINE__, req->flags, res->flags, sc->state);
+ /* Other side has closed, let's abort if we have no more processing to do
+ * and nothing more to consume. This is comparable to a broken pipe, so
+ * we forward the close to the request side so that it flows upstream to
+ * the client.
+ */
+ sc_shutw(sc);
+ }
+
+ if ((req->flags & CF_SHUTW) && (sc->state == SC_ST_EST) && (appctx->st0 < CLI_ST_OUTPUT)) {
+ DPRINTF(stderr, "%s@%d: buf to sc closed. req=%08x, res=%08x, st=%d\n",
+ __FUNCTION__, __LINE__, req->flags, res->flags, sc->state);
+ /* We have no more processing to do, and nothing more to send, and
+ * the client side has closed. So we'll forward this state downstream
+ * on the response buffer.
+ */
+ sc_shutr(sc);
+ res->flags |= CF_READ_NULL;
+ }
+
+ out:
+ DPRINTF(stderr, "%s@%d: st=%d, rqf=%x, rpf=%x, rqh=%lu, rqs=%lu, rh=%lu, rs=%lu\n",
+ __FUNCTION__, __LINE__,
+ sc->state, req->flags, res->flags, ci_data(req), co_data(req), ci_data(res), co_data(res));
+}
+
+/* This is called when the stream connector is closed. For instance, upon an
+ * external abort, we won't call the i/o handler anymore so we may need to
+ * remove back references to the stream currently being dumped.
+ */
+static void cli_release_handler(struct appctx *appctx)
+{
+ free_trash_chunk(appctx->chunk);
+ appctx->chunk = NULL;
+
+ if (appctx->io_release) {
+ appctx->io_release(appctx);
+ appctx->io_release = NULL;
+ }
+ else if (appctx->st0 == CLI_ST_PRINT_FREE || appctx->st0 == CLI_ST_PRINT_DYN) {
+ struct cli_print_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ ha_free(&ctx->err);
+ }
+}
+
+/* This function dumps all environmnent variables to the buffer. It returns 0
+ * if the output buffer is full and it needs to be called again, otherwise
+ * non-zero. It takes its context from the show_env_ctx in svcctx, and will
+ * start from ->var and dump only one variable if ->show_one is set.
+ */
+static int cli_io_handler_show_env(struct appctx *appctx)
+{
+ struct show_env_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ char **var = ctx->var;
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ return 1;
+
+ chunk_reset(&trash);
+
+ /* we have two inner loops here, one for the proxy, the other one for
+ * the buffer.
+ */
+ while (*var) {
+ chunk_printf(&trash, "%s\n", *var);
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ if (ctx->show_one)
+ break;
+ var++;
+ ctx->var = var;
+ }
+
+ /* dump complete */
+ return 1;
+}
+
+/* This function dumps all file descriptors states (or the requested one) to
+ * the buffer. It returns 0 if the output buffer is full and it needs to be
+ * called again, otherwise non-zero. It takes its context from the show_fd_ctx
+ * in svcctx, only dumps one entry if ->show_one is non-zero, and (re)starts
+ * from ->fd.
+ */
+static int cli_io_handler_show_fd(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct show_fd_ctx *fdctx = appctx->svcctx;
+ int fd = fdctx->fd;
+ int ret = 1;
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ goto end;
+
+ chunk_reset(&trash);
+
+ /* isolate the threads once per round. We're limited to a buffer worth
+ * of output anyway, it cannot last very long.
+ */
+ thread_isolate();
+
+ /* we have two inner loops here, one for the proxy, the other one for
+ * the buffer.
+ */
+ while (fd >= 0 && fd < global.maxsock) {
+ struct fdtab fdt;
+ const struct listener *li = NULL;
+ const struct server *sv = NULL;
+ const struct proxy *px = NULL;
+ const struct connection *conn = NULL;
+ const struct mux_ops *mux = NULL;
+ const struct xprt_ops *xprt = NULL;
+ const void *ctx = NULL;
+ const void *xprt_ctx = NULL;
+ uint32_t conn_flags = 0;
+ uint8_t conn_err = 0;
+ int is_back = 0;
+ int suspicious = 0;
+
+ fdt = fdtab[fd];
+
+ /* When DEBUG_FD is set, we also report closed FDs that have a
+ * non-null event count to detect stuck ones.
+ */
+ if (!fdt.owner) {
+#ifdef DEBUG_FD
+ if (!fdt.event_count)
+#endif
+ goto skip; // closed
+ }
+ else if (fdt.iocb == sock_conn_iocb) {
+ conn = (const struct connection *)fdt.owner;
+ conn_flags = conn->flags;
+ conn_err = conn->err_code;
+ mux = conn->mux;
+ ctx = conn->ctx;
+ xprt = conn->xprt;
+ xprt_ctx = conn->xprt_ctx;
+ li = objt_listener(conn->target);
+ sv = objt_server(conn->target);
+ px = objt_proxy(conn->target);
+ is_back = conn_is_back(conn);
+ if (atleast2(fdt.thread_mask))
+ suspicious = 1;
+ if (conn->handle.fd != fd)
+ suspicious = 1;
+ }
+ else if (fdt.iocb == sock_accept_iocb)
+ li = fdt.owner;
+
+ if (!fdt.thread_mask)
+ suspicious = 1;
+
+ chunk_printf(&trash,
+ " %5d : st=0x%06x(%c%c %c%c%c%c%c W:%c%c%c R:%c%c%c) ref=%#x gid=%d tmask=0x%lx umask=0x%lx prmsk=0x%lx pwmsk=0x%lx owner=%p iocb=%p(",
+ fd,
+ fdt.state,
+ (fdt.state & FD_CLONED) ? 'C' : 'c',
+ (fdt.state & FD_LINGER_RISK) ? 'L' : 'l',
+ (fdt.state & FD_POLL_HUP) ? 'H' : 'h',
+ (fdt.state & FD_POLL_ERR) ? 'E' : 'e',
+ (fdt.state & FD_POLL_OUT) ? 'O' : 'o',
+ (fdt.state & FD_POLL_PRI) ? 'P' : 'p',
+ (fdt.state & FD_POLL_IN) ? 'I' : 'i',
+ (fdt.state & FD_EV_SHUT_W) ? 'S' : 's',
+ (fdt.state & FD_EV_READY_W) ? 'R' : 'r',
+ (fdt.state & FD_EV_ACTIVE_W) ? 'A' : 'a',
+ (fdt.state & FD_EV_SHUT_R) ? 'S' : 's',
+ (fdt.state & FD_EV_READY_R) ? 'R' : 'r',
+ (fdt.state & FD_EV_ACTIVE_R) ? 'A' : 'a',
+ (fdt.refc_tgid >> 4) & 0xffff,
+ (fdt.refc_tgid) & 0xffff,
+ fdt.thread_mask, fdt.update_mask,
+ polled_mask[fd].poll_recv,
+ polled_mask[fd].poll_send,
+ fdt.owner,
+ fdt.iocb);
+ resolve_sym_name(&trash, NULL, fdt.iocb);
+
+ if (!fdt.owner) {
+ chunk_appendf(&trash, ")");
+ }
+ else if (fdt.iocb == sock_conn_iocb) {
+ chunk_appendf(&trash, ") back=%d cflg=0x%08x cerr=%d", is_back, conn_flags, conn_err);
+
+ if (conn->handle.fd != fd) {
+ chunk_appendf(&trash, " fd=%d(BOGUS)", conn->handle.fd);
+ suspicious = 1;
+ } else {
+ struct sockaddr_storage sa;
+ socklen_t salen;
+
+ salen = sizeof(sa);
+ if (getsockname(fd, (struct sockaddr *)&sa, &salen) != -1) {
+ if (sa.ss_family == AF_INET)
+ chunk_appendf(&trash, " fam=ipv4 lport=%d", ntohs(((const struct sockaddr_in *)&sa)->sin_port));
+ else if (sa.ss_family == AF_INET6)
+ chunk_appendf(&trash, " fam=ipv6 lport=%d", ntohs(((const struct sockaddr_in6 *)&sa)->sin6_port));
+ else if (sa.ss_family == AF_UNIX)
+ chunk_appendf(&trash, " fam=unix");
+ }
+
+ salen = sizeof(sa);
+ if (getpeername(fd, (struct sockaddr *)&sa, &salen) != -1) {
+ if (sa.ss_family == AF_INET)
+ chunk_appendf(&trash, " rport=%d", ntohs(((const struct sockaddr_in *)&sa)->sin_port));
+ else if (sa.ss_family == AF_INET6)
+ chunk_appendf(&trash, " rport=%d", ntohs(((const struct sockaddr_in6 *)&sa)->sin6_port));
+ }
+ }
+
+ if (px)
+ chunk_appendf(&trash, " px=%s", px->id);
+ else if (sv)
+ chunk_appendf(&trash, " sv=%s/%s", sv->proxy->id, sv->id);
+ else if (li)
+ chunk_appendf(&trash, " fe=%s", li->bind_conf->frontend->id);
+
+ if (mux) {
+ chunk_appendf(&trash, " mux=%s ctx=%p", mux->name, ctx);
+ if (!ctx)
+ suspicious = 1;
+ if (mux->show_fd)
+ suspicious |= mux->show_fd(&trash, fdt.owner);
+ }
+ else
+ chunk_appendf(&trash, " nomux");
+
+ chunk_appendf(&trash, " xprt=%s", xprt ? xprt->name : "");
+ if (xprt) {
+ if (xprt_ctx || xprt->show_fd)
+ chunk_appendf(&trash, " xprt_ctx=%p", xprt_ctx);
+ if (xprt->show_fd)
+ suspicious |= xprt->show_fd(&trash, conn, xprt_ctx);
+ }
+ }
+ else if (fdt.iocb == sock_accept_iocb) {
+ struct sockaddr_storage sa;
+ socklen_t salen;
+
+ chunk_appendf(&trash, ") l.st=%s fe=%s",
+ listener_state_str(li),
+ li->bind_conf->frontend->id);
+
+ salen = sizeof(sa);
+ if (getsockname(fd, (struct sockaddr *)&sa, &salen) != -1) {
+ if (sa.ss_family == AF_INET)
+ chunk_appendf(&trash, " fam=ipv4 lport=%d", ntohs(((const struct sockaddr_in *)&sa)->sin_port));
+ else if (sa.ss_family == AF_INET6)
+ chunk_appendf(&trash, " fam=ipv6 lport=%d", ntohs(((const struct sockaddr_in6 *)&sa)->sin6_port));
+ else if (sa.ss_family == AF_UNIX)
+ chunk_appendf(&trash, " fam=unix");
+ }
+ }
+ else
+ chunk_appendf(&trash, ")");
+
+#ifdef DEBUG_FD
+ chunk_appendf(&trash, " evcnt=%u", fdtab[fd].event_count);
+ if (fdtab[fd].event_count >= 1000000)
+ suspicious = 1;
+#endif
+ chunk_appendf(&trash, "%s\n", suspicious ? " !" : "");
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ fdctx->fd = fd;
+ ret = 0;
+ break;
+ }
+ skip:
+ if (fdctx->show_one)
+ break;
+
+ fd++;
+ }
+
+ end:
+ /* dump complete */
+
+ thread_release();
+ return ret;
+}
+
+/* This function dumps some activity counters used by developers and support to
+ * rule out some hypothesis during bug reports. It returns 0 if the output
+ * buffer is full and it needs to be called again, otherwise non-zero. It dumps
+ * everything at once in the buffer and is not designed to do it in multiple
+ * passes.
+ */
+static int cli_io_handler_show_activity(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ int thr;
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ return 1;
+
+ chunk_reset(&trash);
+
+#undef SHOW_TOT
+#define SHOW_TOT(t, x) \
+ do { \
+ unsigned int _v[MAX_THREADS]; \
+ unsigned int _tot; \
+ const unsigned int _nbt = global.nbthread; \
+ _tot = t = 0; \
+ do { \
+ _tot += _v[t] = (x); \
+ } while (++t < _nbt); \
+ if (_nbt == 1) { \
+ chunk_appendf(&trash, " %u\n", _tot); \
+ break; \
+ } \
+ chunk_appendf(&trash, " %u [", _tot); \
+ for (t = 0; t < _nbt; t++) \
+ chunk_appendf(&trash, " %u", _v[t]); \
+ chunk_appendf(&trash, " ]\n"); \
+ } while (0)
+
+#undef SHOW_AVG
+#define SHOW_AVG(t, x) \
+ do { \
+ unsigned int _v[MAX_THREADS]; \
+ unsigned int _tot; \
+ const unsigned int _nbt = global.nbthread; \
+ _tot = t = 0; \
+ do { \
+ _tot += _v[t] = (x); \
+ } while (++t < _nbt); \
+ if (_nbt == 1) { \
+ chunk_appendf(&trash, " %u\n", _tot); \
+ break; \
+ } \
+ chunk_appendf(&trash, " %u [", (_tot + _nbt/2) / _nbt); \
+ for (t = 0; t < _nbt; t++) \
+ chunk_appendf(&trash, " %u", _v[t]); \
+ chunk_appendf(&trash, " ]\n"); \
+ } while (0)
+
+ chunk_appendf(&trash, "thread_id: %u (%u..%u)\n", tid + 1, 1, global.nbthread);
+ chunk_appendf(&trash, "date_now: %lu.%06lu\n", (long)now.tv_sec, (long)now.tv_usec);
+ chunk_appendf(&trash, "ctxsw:"); SHOW_TOT(thr, activity[thr].ctxsw);
+ chunk_appendf(&trash, "tasksw:"); SHOW_TOT(thr, activity[thr].tasksw);
+ chunk_appendf(&trash, "empty_rq:"); SHOW_TOT(thr, activity[thr].empty_rq);
+ chunk_appendf(&trash, "long_rq:"); SHOW_TOT(thr, activity[thr].long_rq);
+ chunk_appendf(&trash, "loops:"); SHOW_TOT(thr, activity[thr].loops);
+ chunk_appendf(&trash, "wake_tasks:"); SHOW_TOT(thr, activity[thr].wake_tasks);
+ chunk_appendf(&trash, "wake_signal:"); SHOW_TOT(thr, activity[thr].wake_signal);
+ chunk_appendf(&trash, "poll_io:"); SHOW_TOT(thr, activity[thr].poll_io);
+ chunk_appendf(&trash, "poll_exp:"); SHOW_TOT(thr, activity[thr].poll_exp);
+ chunk_appendf(&trash, "poll_drop_fd:"); SHOW_TOT(thr, activity[thr].poll_drop_fd);
+ chunk_appendf(&trash, "poll_skip_fd:"); SHOW_TOT(thr, activity[thr].poll_skip_fd);
+ chunk_appendf(&trash, "conn_dead:"); SHOW_TOT(thr, activity[thr].conn_dead);
+ chunk_appendf(&trash, "stream_calls:"); SHOW_TOT(thr, activity[thr].stream_calls);
+ chunk_appendf(&trash, "pool_fail:"); SHOW_TOT(thr, activity[thr].pool_fail);
+ chunk_appendf(&trash, "buf_wait:"); SHOW_TOT(thr, activity[thr].buf_wait);
+ chunk_appendf(&trash, "cpust_ms_tot:"); SHOW_TOT(thr, activity[thr].cpust_total / 2);
+ chunk_appendf(&trash, "cpust_ms_1s:"); SHOW_TOT(thr, read_freq_ctr(&activity[thr].cpust_1s) / 2);
+ chunk_appendf(&trash, "cpust_ms_15s:"); SHOW_TOT(thr, read_freq_ctr_period(&activity[thr].cpust_15s, 15000) / 2);
+ chunk_appendf(&trash, "avg_loop_us:"); SHOW_AVG(thr, swrate_avg(activity[thr].avg_loop_us, TIME_STATS_SAMPLES));
+ chunk_appendf(&trash, "accepted:"); SHOW_TOT(thr, activity[thr].accepted);
+ chunk_appendf(&trash, "accq_pushed:"); SHOW_TOT(thr, activity[thr].accq_pushed);
+ chunk_appendf(&trash, "accq_full:"); SHOW_TOT(thr, activity[thr].accq_full);
+#ifdef USE_THREAD
+ chunk_appendf(&trash, "accq_ring:"); SHOW_TOT(thr, (accept_queue_rings[thr].tail - accept_queue_rings[thr].head + ACCEPT_QUEUE_SIZE) % ACCEPT_QUEUE_SIZE);
+ chunk_appendf(&trash, "fd_takeover:"); SHOW_TOT(thr, activity[thr].fd_takeover);
+#endif
+
+#if defined(DEBUG_DEV)
+ /* keep these ones at the end */
+ chunk_appendf(&trash, "ctr0:"); SHOW_TOT(thr, activity[thr].ctr0);
+ chunk_appendf(&trash, "ctr1:"); SHOW_TOT(thr, activity[thr].ctr1);
+ chunk_appendf(&trash, "ctr2:"); SHOW_TOT(thr, activity[thr].ctr2);
+#endif
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ chunk_reset(&trash);
+ chunk_printf(&trash, "[output too large, cannot dump]\n");
+ }
+
+#undef SHOW_AVG
+#undef SHOW_TOT
+ /* dump complete */
+ return 1;
+}
+
+/*
+ * CLI IO handler for `show cli sockets`.
+ * Uses the svcctx as a show_sock_ctx to store/retrieve the bind_conf and the
+ * listener pointers.
+ */
+static int cli_io_handler_show_cli_sock(struct appctx *appctx)
+{
+ struct show_sock_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct bind_conf *bind_conf = ctx->bind_conf;
+
+ if (!global.cli_fe)
+ goto done;
+
+ chunk_reset(&trash);
+
+ if (!bind_conf) {
+ /* first call */
+ if (applet_putstr(appctx, "# socket lvl processes\n") == -1)
+ goto full;
+ bind_conf = LIST_ELEM(global.cli_fe->conf.bind.n, typeof(bind_conf), by_fe);
+ }
+
+ list_for_each_entry_from(bind_conf, &global.cli_fe->conf.bind, by_fe) {
+ struct listener *l = ctx->listener;
+
+ if (!l)
+ l = LIST_ELEM(bind_conf->listeners.n, typeof(l), by_bind);
+
+ list_for_each_entry_from(l, &bind_conf->listeners, by_bind) {
+ char addr[46];
+ char port[6];
+
+ if (l->rx.addr.ss_family == AF_UNIX) {
+ const struct sockaddr_un *un;
+
+ un = (struct sockaddr_un *)&l->rx.addr;
+ if (un->sun_path[0] == '\0') {
+ chunk_appendf(&trash, "abns@%s ", un->sun_path+1);
+ } else {
+ chunk_appendf(&trash, "unix@%s ", un->sun_path);
+ }
+ } else if (l->rx.addr.ss_family == AF_INET) {
+ addr_to_str(&l->rx.addr, addr, sizeof(addr));
+ port_to_str(&l->rx.addr, port, sizeof(port));
+ chunk_appendf(&trash, "ipv4@%s:%s ", addr, port);
+ } else if (l->rx.addr.ss_family == AF_INET6) {
+ addr_to_str(&l->rx.addr, addr, sizeof(addr));
+ port_to_str(&l->rx.addr, port, sizeof(port));
+ chunk_appendf(&trash, "ipv6@[%s]:%s ", addr, port);
+ } else if (l->rx.addr.ss_family == AF_CUST_SOCKPAIR) {
+ chunk_appendf(&trash, "sockpair@%d ", ((struct sockaddr_in *)&l->rx.addr)->sin_addr.s_addr);
+ } else
+ chunk_appendf(&trash, "unknown ");
+
+ if ((bind_conf->level & ACCESS_LVL_MASK) == ACCESS_LVL_ADMIN)
+ chunk_appendf(&trash, "admin ");
+ else if ((bind_conf->level & ACCESS_LVL_MASK) == ACCESS_LVL_OPER)
+ chunk_appendf(&trash, "operator ");
+ else if ((bind_conf->level & ACCESS_LVL_MASK) == ACCESS_LVL_USER)
+ chunk_appendf(&trash, "user ");
+ else
+ chunk_appendf(&trash, " ");
+
+ chunk_appendf(&trash, "all\n");
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ ctx->bind_conf = bind_conf;
+ ctx->listener = l;
+ goto full;
+ }
+ }
+ }
+ done:
+ return 1;
+ full:
+ return 0;
+}
+
+
+/* parse a "show env" CLI request. Returns 0 if it needs to continue, 1 if it
+ * wants to stop here. It reserves a sohw_env_ctx where it puts the variable to
+ * be dumped as well as a flag if a single variable is requested, otherwise puts
+ * environ there.
+ */
+static int cli_parse_show_env(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_env_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ extern char **environ;
+ char **var;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ var = environ;
+
+ if (*args[2]) {
+ int len = strlen(args[2]);
+
+ for (; *var; var++) {
+ if (strncmp(*var, args[2], len) == 0 &&
+ (*var)[len] == '=')
+ break;
+ }
+ if (!*var)
+ return cli_err(appctx, "Variable not found\n");
+
+ ctx->show_one = 1;
+ }
+ ctx->var = var;
+ return 0;
+}
+
+/* parse a "show fd" CLI request. Returns 0 if it needs to continue, 1 if it
+ * wants to stop here. It sets a show_fd_ctx context where, if a specific fd is
+ * requested, it puts the FD number into ->fd and sets ->show_one to 1.
+ */
+static int cli_parse_show_fd(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_fd_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ if (*args[2]) {
+ ctx->fd = atoi(args[2]);
+ ctx->show_one = 1;
+ }
+ return 0;
+}
+
+/* parse a "set timeout" CLI request. It always returns 1. */
+static int cli_parse_set_timeout(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct stream *s = appctx_strm(appctx);
+
+ if (strcmp(args[2], "cli") == 0) {
+ unsigned timeout;
+ const char *res;
+
+ if (!*args[3])
+ return cli_err(appctx, "Expects an integer value.\n");
+
+ res = parse_time_err(args[3], &timeout, TIME_UNIT_S);
+ if (res || timeout < 1)
+ return cli_err(appctx, "Invalid timeout value.\n");
+
+ s->req.rto = s->res.wto = 1 + MS_TO_TICKS(timeout*1000);
+ task_wakeup(s->task, TASK_WOKEN_MSG); // recompute timeouts
+ return 1;
+ }
+
+ return cli_err(appctx, "'set timeout' only supports 'cli'.\n");
+}
+
+/* parse a "set maxconn global" command. It always returns 1. */
+static int cli_parse_set_maxconn_global(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int v;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "Expects an integer value.\n");
+
+ v = atoi(args[3]);
+ if (v > global.hardmaxconn)
+ return cli_err(appctx, "Value out of range.\n");
+
+ /* check for unlimited values */
+ if (v <= 0)
+ v = global.hardmaxconn;
+
+ global.maxconn = v;
+
+ /* Dequeues all of the listeners waiting for a resource */
+ dequeue_all_listeners();
+
+ return 1;
+}
+
+static int set_severity_output(int *target, char *argument)
+{
+ if (strcmp(argument, "none") == 0) {
+ *target = CLI_SEVERITY_NONE;
+ return 1;
+ }
+ else if (strcmp(argument, "number") == 0) {
+ *target = CLI_SEVERITY_NUMBER;
+ return 1;
+ }
+ else if (strcmp(argument, "string") == 0) {
+ *target = CLI_SEVERITY_STRING;
+ return 1;
+ }
+ return 0;
+}
+
+/* parse a "set severity-output" command. */
+static int cli_parse_set_severity_output(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (*args[2] && set_severity_output(&appctx->cli_severity_output, args[2]))
+ return 0;
+
+ return cli_err(appctx, "one of 'none', 'number', 'string' is a required argument\n");
+}
+
+
+/* show the level of the current CLI session */
+static int cli_parse_show_lvl(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if ((appctx->cli_level & ACCESS_LVL_MASK) == ACCESS_LVL_ADMIN)
+ return cli_msg(appctx, LOG_INFO, "admin\n");
+ else if ((appctx->cli_level & ACCESS_LVL_MASK) == ACCESS_LVL_OPER)
+ return cli_msg(appctx, LOG_INFO, "operator\n");
+ else if ((appctx->cli_level & ACCESS_LVL_MASK) == ACCESS_LVL_USER)
+ return cli_msg(appctx, LOG_INFO, "user\n");
+ else
+ return cli_msg(appctx, LOG_INFO, "unknown\n");
+}
+
+/* parse and set the CLI level dynamically */
+static int cli_parse_set_lvl(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ /* this will ask the applet to not output a \n after the command */
+ if (strcmp(args[1], "-") == 0)
+ appctx->st1 |= APPCTX_CLI_ST1_NOLF;
+
+ if (strcmp(args[0], "operator") == 0) {
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER)) {
+ return 1;
+ }
+ appctx->cli_level &= ~ACCESS_LVL_MASK;
+ appctx->cli_level |= ACCESS_LVL_OPER;
+
+ } else if (strcmp(args[0], "user") == 0) {
+ if (!cli_has_level(appctx, ACCESS_LVL_USER)) {
+ return 1;
+ }
+ appctx->cli_level &= ~ACCESS_LVL_MASK;
+ appctx->cli_level |= ACCESS_LVL_USER;
+ }
+ appctx->cli_level &= ~(ACCESS_EXPERT|ACCESS_EXPERIMENTAL);
+ return 1;
+}
+
+
+/* parse and set the CLI expert/experimental-mode dynamically */
+static int cli_parse_expert_experimental_mode(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int level;
+ char *level_str;
+ char *output = NULL;
+
+ /* this will ask the applet to not output a \n after the command */
+ if (*args[1] && *args[2] && strcmp(args[2], "-") == 0)
+ appctx->st1 |= APPCTX_CLI_ST1_NOLF;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (strcmp(args[0], "expert-mode") == 0) {
+ level = ACCESS_EXPERT;
+ level_str = "expert-mode";
+ }
+ else if (strcmp(args[0], "experimental-mode") == 0) {
+ level = ACCESS_EXPERIMENTAL;
+ level_str = "experimental-mode";
+ }
+ else if (strcmp(args[0], "mcli-debug-mode") == 0) {
+ level = ACCESS_MCLI_DEBUG;
+ level_str = "mcli-debug-mode";
+ }
+ else {
+ return 1;
+ }
+
+ if (!*args[1]) {
+ memprintf(&output, "%s is %s\n", level_str,
+ (appctx->cli_level & level) ? "ON" : "OFF");
+ return cli_dynmsg(appctx, LOG_INFO, output);
+ }
+
+ appctx->cli_level &= ~level;
+ if (strcmp(args[1], "on") == 0)
+ appctx->cli_level |= level;
+ return 1;
+}
+
+/* shows HAProxy version */
+static int cli_parse_show_version(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ char *msg = NULL;
+
+ return cli_dynmsg(appctx, LOG_INFO, memprintf(&msg, "%s\n", haproxy_version));
+}
+
+int cli_parse_default(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ return 0;
+}
+
+/* parse a "set rate-limit" command. It always returns 1. */
+static int cli_parse_set_ratelimit(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int v;
+ int *res;
+ int mul = 1;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (strcmp(args[2], "connections") == 0 && strcmp(args[3], "global") == 0)
+ res = &global.cps_lim;
+ else if (strcmp(args[2], "sessions") == 0 && strcmp(args[3], "global") == 0)
+ res = &global.sps_lim;
+#ifdef USE_OPENSSL
+ else if (strcmp(args[2], "ssl-sessions") == 0 && strcmp(args[3], "global") == 0)
+ res = &global.ssl_lim;
+#endif
+ else if (strcmp(args[2], "http-compression") == 0 && strcmp(args[3], "global") == 0) {
+ res = &global.comp_rate_lim;
+ mul = 1024;
+ }
+ else {
+ return cli_err(appctx,
+ "'set rate-limit' only supports :\n"
+ " - 'connections global' to set the per-process maximum connection rate\n"
+ " - 'sessions global' to set the per-process maximum session rate\n"
+#ifdef USE_OPENSSL
+ " - 'ssl-sessions global' to set the per-process maximum SSL session rate\n"
+#endif
+ " - 'http-compression global' to set the per-process maximum compression speed in kB/s\n");
+ }
+
+ if (!*args[4])
+ return cli_err(appctx, "Expects an integer value.\n");
+
+ v = atoi(args[4]);
+ if (v < 0)
+ return cli_err(appctx, "Value out of range.\n");
+
+ *res = v * mul;
+
+ /* Dequeues all of the listeners waiting for a resource */
+ dequeue_all_listeners();
+
+ return 1;
+}
+
+/* parse the "expose-fd" argument on the bind lines */
+static int bind_parse_expose_fd(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing fd type", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ if (strcmp(args[cur_arg + 1], "listeners") == 0) {
+ conf->level |= ACCESS_FD_LISTENERS;
+ } else {
+ memprintf(err, "'%s' only supports 'listeners' (got '%s')",
+ args[cur_arg], args[cur_arg+1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "level" argument on the bind lines */
+static int bind_parse_level(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing level", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(args[cur_arg + 1], "user") == 0) {
+ conf->level &= ~ACCESS_LVL_MASK;
+ conf->level |= ACCESS_LVL_USER;
+ } else if (strcmp(args[cur_arg + 1], "operator") == 0) {
+ conf->level &= ~ACCESS_LVL_MASK;
+ conf->level |= ACCESS_LVL_OPER;
+ } else if (strcmp(args[cur_arg + 1], "admin") == 0) {
+ conf->level &= ~ACCESS_LVL_MASK;
+ conf->level |= ACCESS_LVL_ADMIN;
+ } else {
+ memprintf(err, "'%s' only supports 'user', 'operator', and 'admin' (got '%s')",
+ args[cur_arg], args[cur_arg+1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+static int bind_parse_severity_output(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing severity format", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (set_severity_output(&conf->severity_output, args[cur_arg+1]))
+ return 0;
+ else {
+ memprintf(err, "'%s' only supports 'none', 'number', and 'string' (got '%s')",
+ args[cur_arg], args[cur_arg+1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+}
+
+/* Send all the bound sockets, always returns 1 */
+static int _getsocks(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ char *cmsgbuf = NULL;
+ unsigned char *tmpbuf = NULL;
+ struct cmsghdr *cmsg;
+ struct stconn *sc = appctx_sc(appctx);
+ struct stream *s = __sc_strm(sc);
+ struct connection *remote = sc_conn(sc_opposite(sc));
+ struct msghdr msghdr;
+ struct iovec iov;
+ struct timeval tv = { .tv_sec = 1, .tv_usec = 0 };
+ const char *ns_name, *if_name;
+ unsigned char ns_nlen, if_nlen;
+ int nb_queued;
+ int cur_fd = 0;
+ int *tmpfd;
+ int tot_fd_nb = 0;
+ int fd = -1;
+ int curoff = 0;
+ int old_fcntl = -1;
+ int ret;
+
+ if (!remote) {
+ ha_warning("Only works on real connections\n");
+ goto out;
+ }
+
+ fd = remote->handle.fd;
+
+ /* Temporary set the FD in blocking mode, that will make our life easier */
+ old_fcntl = fcntl(fd, F_GETFL);
+ if (old_fcntl < 0) {
+ ha_warning("Couldn't get the flags for the unix socket\n");
+ goto out;
+ }
+ cmsgbuf = malloc(CMSG_SPACE(sizeof(int) * MAX_SEND_FD));
+ if (!cmsgbuf) {
+ ha_warning("Failed to allocate memory to send sockets\n");
+ goto out;
+ }
+ if (fcntl(fd, F_SETFL, old_fcntl &~ O_NONBLOCK) == -1) {
+ ha_warning("Cannot make the unix socket blocking\n");
+ goto out;
+ }
+ setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (void *)&tv, sizeof(tv));
+ iov.iov_base = &tot_fd_nb;
+ iov.iov_len = sizeof(tot_fd_nb);
+ if (!(strm_li(s)->bind_conf->level & ACCESS_FD_LISTENERS))
+ goto out;
+ memset(&msghdr, 0, sizeof(msghdr));
+ /*
+ * First, calculates the total number of FD, so that we can let
+ * the caller know how much it should expect.
+ */
+ for (cur_fd = 0;cur_fd < global.maxsock; cur_fd++)
+ tot_fd_nb += !!(fdtab[cur_fd].state & FD_EXPORTED);
+
+ if (tot_fd_nb == 0)
+ goto out;
+
+ /* First send the total number of file descriptors, so that the
+ * receiving end knows what to expect.
+ */
+ msghdr.msg_iov = &iov;
+ msghdr.msg_iovlen = 1;
+ ret = sendmsg(fd, &msghdr, 0);
+ if (ret != sizeof(tot_fd_nb)) {
+ ha_warning("Failed to send the number of sockets to send\n");
+ goto out;
+ }
+
+ /* Now send the fds */
+ msghdr.msg_control = cmsgbuf;
+ msghdr.msg_controllen = CMSG_SPACE(sizeof(int) * MAX_SEND_FD);
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ cmsg->cmsg_len = CMSG_LEN(MAX_SEND_FD * sizeof(int));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ tmpfd = (int *)CMSG_DATA(cmsg);
+
+ /* For each socket, e message is sent, containing the following :
+ * Size of the namespace name (or 0 if none), as an unsigned char.
+ * The namespace name, if any
+ * Size of the interface name (or 0 if none), as an unsigned char
+ * The interface name, if any
+ * 32 bits of zeroes (used to be listener options).
+ */
+ /* We will send sockets MAX_SEND_FD per MAX_SEND_FD, allocate a
+ * buffer big enough to store the socket information.
+ */
+ tmpbuf = malloc(MAX_SEND_FD * (1 + MAXPATHLEN + 1 + IFNAMSIZ + sizeof(int)));
+ if (tmpbuf == NULL) {
+ ha_warning("Failed to allocate memory to transfer socket information\n");
+ goto out;
+ }
+
+ nb_queued = 0;
+ iov.iov_base = tmpbuf;
+ for (cur_fd = 0; cur_fd < global.maxsock; cur_fd++) {
+ if (!(fdtab[cur_fd].state & FD_EXPORTED))
+ continue;
+
+ ns_name = if_name = "";
+ ns_nlen = if_nlen = 0;
+
+ /* for now we can only retrieve namespaces and interfaces from
+ * pure listeners.
+ */
+ if (fdtab[cur_fd].iocb == sock_accept_iocb) {
+ const struct listener *l = fdtab[cur_fd].owner;
+
+ if (l->rx.settings->interface) {
+ if_name = l->rx.settings->interface;
+ if_nlen = strlen(if_name);
+ }
+
+#ifdef USE_NS
+ if (l->rx.settings->netns) {
+ ns_name = l->rx.settings->netns->node.key;
+ ns_nlen = l->rx.settings->netns->name_len;
+ }
+#endif
+ }
+
+ /* put the FD into the CMSG_DATA */
+ tmpfd[nb_queued++] = cur_fd;
+
+ /* first block is <ns_name_len> <ns_name> */
+ tmpbuf[curoff++] = ns_nlen;
+ if (ns_nlen)
+ memcpy(tmpbuf + curoff, ns_name, ns_nlen);
+ curoff += ns_nlen;
+
+ /* second block is <if_name_len> <if_name> */
+ tmpbuf[curoff++] = if_nlen;
+ if (if_nlen)
+ memcpy(tmpbuf + curoff, if_name, if_nlen);
+ curoff += if_nlen;
+
+ /* we used to send the listener options here before 2.3 */
+ memset(tmpbuf + curoff, 0, sizeof(int));
+ curoff += sizeof(int);
+
+ /* there's a limit to how many FDs may be sent at once */
+ if (nb_queued == MAX_SEND_FD) {
+ iov.iov_len = curoff;
+ if (sendmsg(fd, &msghdr, 0) != curoff) {
+ ha_warning("Failed to transfer sockets\n");
+ return -1;
+ }
+
+ /* Wait for an ack */
+ do {
+ ret = recv(fd, &tot_fd_nb, sizeof(tot_fd_nb), 0);
+ } while (ret == -1 && errno == EINTR);
+
+ if (ret <= 0) {
+ ha_warning("Unexpected error while transferring sockets\n");
+ return -1;
+ }
+ curoff = 0;
+ nb_queued = 0;
+ }
+ }
+
+ /* flush pending stuff */
+ if (nb_queued) {
+ iov.iov_len = curoff;
+ cmsg->cmsg_len = CMSG_LEN(nb_queued * sizeof(int));
+ msghdr.msg_controllen = CMSG_SPACE(nb_queued * sizeof(int));
+ if (sendmsg(fd, &msghdr, 0) != curoff) {
+ ha_warning("Failed to transfer sockets\n");
+ goto out;
+ }
+ }
+
+out:
+ if (fd >= 0 && old_fcntl >= 0 && fcntl(fd, F_SETFL, old_fcntl) == -1) {
+ ha_warning("Cannot make the unix socket non-blocking\n");
+ goto out;
+ }
+ appctx->st0 = CLI_ST_END;
+ free(cmsgbuf);
+ free(tmpbuf);
+ return 1;
+}
+
+static int cli_parse_simple(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (*args[0] == 'h')
+ /* help */
+ cli_gen_usage_msg(appctx, args);
+ else if (*args[0] == 'p')
+ /* prompt */
+ appctx->st1 ^= APPCTX_CLI_ST1_PROMPT;
+ else if (*args[0] == 'q')
+ /* quit */
+ appctx->st0 = CLI_ST_END;
+
+ return 1;
+}
+
+void pcli_write_prompt(struct stream *s)
+{
+ struct buffer *msg = get_trash_chunk();
+ struct channel *oc = sc_oc(s->scf);
+
+ if (!(s->pcli_flags & PCLI_F_PROMPT))
+ return;
+
+ if (s->pcli_flags & PCLI_F_PAYLOAD) {
+ chunk_appendf(msg, "+ ");
+ } else {
+ if (s->pcli_next_pid == 0)
+ chunk_appendf(msg, "master%s",
+ (proc_self->failedreloads > 0) ? "[ReloadFailed]" : "");
+ else
+ chunk_appendf(msg, "%d", s->pcli_next_pid);
+
+ if (s->pcli_flags & (ACCESS_EXPERIMENTAL|ACCESS_EXPERT|ACCESS_MCLI_DEBUG)) {
+ chunk_appendf(msg, "(");
+
+ if (s->pcli_flags & ACCESS_EXPERIMENTAL)
+ chunk_appendf(msg, "x");
+
+ if (s->pcli_flags & ACCESS_EXPERT)
+ chunk_appendf(msg, "e");
+
+ if (s->pcli_flags & ACCESS_MCLI_DEBUG)
+ chunk_appendf(msg, "d");
+
+ chunk_appendf(msg, ")");
+ }
+
+ chunk_appendf(msg, "> ");
+
+
+ }
+ co_inject(oc, msg->area, msg->data);
+}
+
+
+/* The pcli_* functions are used for the CLI proxy in the master */
+
+void pcli_reply_and_close(struct stream *s, const char *msg)
+{
+ struct buffer *buf = get_trash_chunk();
+
+ chunk_initstr(buf, msg);
+ stream_retnclose(s, buf);
+}
+
+static enum obj_type *pcli_pid_to_server(int proc_pid)
+{
+ struct mworker_proc *child;
+
+ /* return the mCLI applet of the master */
+ if (proc_pid == 0)
+ return &mcli_applet.obj_type;
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (child->pid == proc_pid){
+ return &child->srv->obj_type;
+ }
+ }
+ return NULL;
+}
+
+/* Take a CLI prefix in argument (eg: @!1234 @master @1)
+ * Return:
+ * 0: master
+ * > 0: pid of a worker
+ * < 0: didn't find a worker
+ */
+static int pcli_prefix_to_pid(const char *prefix)
+{
+ int proc_pid;
+ struct mworker_proc *child;
+ char *errtol = NULL;
+
+ if (*prefix != '@') /* not a prefix, should not happen */
+ return -1;
+
+ prefix++;
+ if (!*prefix) /* sent @ alone, return the master */
+ return 0;
+
+ if (strcmp("master", prefix) == 0) {
+ return 0;
+ } else if (*prefix == '!') {
+ prefix++;
+ if (!*prefix)
+ return -1;
+
+ proc_pid = strtol(prefix, &errtol, 10);
+ if (*errtol != '\0')
+ return -1;
+ list_for_each_entry(child, &proc_list, list) {
+ if (!(child->options & PROC_O_TYPE_WORKER))
+ continue;
+ if (child->pid == proc_pid){
+ return child->pid;
+ }
+ }
+ } else {
+ struct mworker_proc *chosen = NULL;
+ /* this is a relative pid */
+
+ proc_pid = strtol(prefix, &errtol, 10);
+ if (*errtol != '\0')
+ return -1;
+
+ if (proc_pid == 0) /* return the master */
+ return 0;
+
+ if (proc_pid != 1) /* only the "@1" relative PID is supported */
+ return -1;
+
+ /* chose the right process, the current one is the one with the
+ least number of reloads */
+ list_for_each_entry(child, &proc_list, list) {
+ if (!(child->options & PROC_O_TYPE_WORKER))
+ continue;
+ if (child->reloads == 0)
+ return child->pid;
+ else if (chosen == NULL || child->reloads < chosen->reloads)
+ chosen = child;
+ }
+ if (chosen)
+ return chosen->pid;
+ }
+ return -1;
+}
+
+/* Return::
+ * >= 0 : number of words to escape
+ * = -1 : error
+ */
+
+int pcli_find_and_exec_kw(struct stream *s, char **args, int argl, char **errmsg, int *next_pid)
+{
+ if (argl < 1)
+ return 0;
+
+ /* there is a prefix */
+ if (args[0][0] == '@') {
+ int target_pid = pcli_prefix_to_pid(args[0]);
+
+ if (target_pid == -1) {
+ memprintf(errmsg, "Can't find the target PID matching the prefix '%s'\n", args[0]);
+ return -1;
+ }
+
+ /* if the prefix is alone, define a default target */
+ if (argl == 1)
+ s->pcli_next_pid = target_pid;
+ else
+ *next_pid = target_pid;
+ return 1;
+ } else if (strcmp("prompt", args[0]) == 0) {
+ s->pcli_flags ^= PCLI_F_PROMPT;
+ return argl; /* return the number of elements in the array */
+
+ } else if (strcmp("quit", args[0]) == 0) {
+ channel_shutr_now(&s->req);
+ channel_shutw_now(&s->res);
+ return argl; /* return the number of elements in the array */
+ } else if (strcmp(args[0], "operator") == 0) {
+ if (!pcli_has_level(s, ACCESS_LVL_OPER)) {
+ memprintf(errmsg, "Permission denied!\n");
+ return -1;
+ }
+ s->pcli_flags &= ~ACCESS_LVL_MASK;
+ s->pcli_flags |= ACCESS_LVL_OPER;
+ return argl;
+
+ } else if (strcmp(args[0], "user") == 0) {
+ if (!pcli_has_level(s, ACCESS_LVL_USER)) {
+ memprintf(errmsg, "Permission denied!\n");
+ return -1;
+ }
+ s->pcli_flags &= ~ACCESS_LVL_MASK;
+ s->pcli_flags |= ACCESS_LVL_USER;
+ return argl;
+
+ } else if (strcmp(args[0], "expert-mode") == 0) {
+ if (!pcli_has_level(s, ACCESS_LVL_ADMIN)) {
+ memprintf(errmsg, "Permission denied!\n");
+ return -1;
+ }
+
+ s->pcli_flags &= ~ACCESS_EXPERT;
+ if ((argl > 1) && (strcmp(args[1], "on") == 0))
+ s->pcli_flags |= ACCESS_EXPERT;
+ return argl;
+
+ } else if (strcmp(args[0], "experimental-mode") == 0) {
+ if (!pcli_has_level(s, ACCESS_LVL_ADMIN)) {
+ memprintf(errmsg, "Permission denied!\n");
+ return -1;
+ }
+ s->pcli_flags &= ~ACCESS_EXPERIMENTAL;
+ if ((argl > 1) && (strcmp(args[1], "on") == 0))
+ s->pcli_flags |= ACCESS_EXPERIMENTAL;
+ return argl;
+ } else if (strcmp(args[0], "mcli-debug-mode") == 0) {
+ if (!pcli_has_level(s, ACCESS_LVL_ADMIN)) {
+ memprintf(errmsg, "Permission denied!\n");
+ return -1;
+ }
+ s->pcli_flags &= ~ACCESS_MCLI_DEBUG;
+ if ((argl > 1) && (strcmp(args[1], "on") == 0))
+ s->pcli_flags |= ACCESS_MCLI_DEBUG;
+ return argl;
+ }
+
+ return 0;
+}
+
+/*
+ * Parse the CLI request:
+ * - It does basically the same as the cli_io_handler, but as a proxy
+ * - It can exec a command and strip non forwardable commands
+ *
+ * Return:
+ * - the number of characters to forward or
+ * - 1 if there is an error or not enough data
+ */
+int pcli_parse_request(struct stream *s, struct channel *req, char **errmsg, int *next_pid)
+{
+ char *str;
+ char *end;
+ char *args[MAX_CLI_ARGS + 1]; /* +1 for storing a NULL */
+ int argl; /* number of args */
+ char *p;
+ char *trim = NULL;
+ char *payload = NULL;
+ int wtrim = 0; /* number of words to trim */
+ int reql = 0;
+ int ret;
+ int i = 0;
+
+ /* we cannot deal with a wrapping buffer, so let's take care of this
+ * first.
+ */
+ if (b_head(&req->buf) + b_data(&req->buf) > b_wrap(&req->buf))
+ b_slow_realign(&req->buf, trash.area, co_data(req));
+
+ str = (char *)ci_head(req);
+ end = (char *)ci_stop(req);
+
+ p = str;
+
+ if (!(s->pcli_flags & PCLI_F_PAYLOAD)) {
+
+ /* Looks for the end of one command */
+ while (p+reql < end) {
+ /* handle escaping */
+ if (p[reql] == '\\') {
+ reql+=2;
+ continue;
+ }
+ if (p[reql] == ';' || p[reql] == '\n') {
+ /* found the end of the command */
+ p[reql] = '\n';
+ reql++;
+ break;
+ }
+ reql++;
+ }
+ } else {
+ while (p+reql < end) {
+ if (p[reql] == '\n') {
+ /* found the end of the line */
+ reql++;
+ break;
+ }
+ reql++;
+ }
+ }
+
+ /* set end to first byte after the end of the command */
+ end = p + reql;
+
+ /* there is no end to this command, need more to parse ! */
+ if (!reql || *(end-1) != '\n') {
+ return -1;
+ }
+
+ if (s->pcli_flags & PCLI_F_PAYLOAD) {
+ if (reql == 1) /* last line of the payload */
+ s->pcli_flags &= ~PCLI_F_PAYLOAD;
+ return reql;
+ }
+
+ *(end-1) = '\0';
+
+ /* splits the command in words */
+ while (i < MAX_CLI_ARGS && p < end) {
+ /* skip leading spaces/tabs */
+ p += strspn(p, " \t");
+ if (!*p)
+ break;
+
+ args[i] = p;
+ while (1) {
+ p += strcspn(p, " \t\\");
+ /* escaped chars using backlashes (\) */
+ if (*p == '\\') {
+ if (!*++p)
+ break;
+ if (!*++p)
+ break;
+ } else {
+ break;
+ }
+ }
+ *p++ = 0;
+ i++;
+ }
+
+ argl = i;
+
+ for (; i < MAX_CLI_ARGS + 1; i++)
+ args[i] = NULL;
+
+ wtrim = pcli_find_and_exec_kw(s, args, argl, errmsg, next_pid);
+
+ /* End of words are ending by \0, we need to replace the \0s by spaces
+ before forwarding them */
+ p = str;
+ while (p < end-1) {
+ if (*p == '\0')
+ *p = ' ';
+ p++;
+ }
+
+ payload = strstr(str, PAYLOAD_PATTERN);
+ if ((end - 1) == (payload + strlen(PAYLOAD_PATTERN))) {
+ /* if the payload pattern is at the end */
+ s->pcli_flags |= PCLI_F_PAYLOAD;
+ }
+
+ *(end-1) = '\n';
+
+ if (wtrim > 0) {
+ trim = &args[wtrim][0];
+ if (trim == NULL) /* if this was the last word in the table */
+ trim = end;
+
+ b_del(&req->buf, trim - str);
+
+ ret = end - trim;
+ } else if (wtrim < 0) {
+ /* parsing error */
+ return -1;
+ } else {
+ /* the whole string */
+ ret = end - str;
+ }
+
+ if (ret > 1) {
+
+ /* the mcli-debug-mode is only sent to the applet of the master */
+ if ((s->pcli_flags & ACCESS_MCLI_DEBUG) && *next_pid <= 0) {
+ ci_insert_line2(req, 0, "mcli-debug-mode on -", strlen("mcli-debug-mode on -"));
+ ret += strlen("mcli-debug-mode on -") + 2;
+ }
+ if (s->pcli_flags & ACCESS_EXPERIMENTAL) {
+ ci_insert_line2(req, 0, "experimental-mode on -", strlen("experimental-mode on -"));
+ ret += strlen("experimental-mode on -") + 2;
+ }
+ if (s->pcli_flags & ACCESS_EXPERT) {
+ ci_insert_line2(req, 0, "expert-mode on -", strlen("expert-mode on -"));
+ ret += strlen("expert-mode on -") + 2;
+ }
+
+ if (pcli_has_level(s, ACCESS_LVL_ADMIN)) {
+ goto end;
+ } else if (pcli_has_level(s, ACCESS_LVL_OPER)) {
+ ci_insert_line2(req, 0, "operator -", strlen("operator -"));
+ ret += strlen("operator -") + 2;
+ } else if (pcli_has_level(s, ACCESS_LVL_USER)) {
+ ci_insert_line2(req, 0, "user -", strlen("user -"));
+ ret += strlen("user -") + 2;
+ }
+ }
+end:
+
+ return ret;
+}
+
+int pcli_wait_for_request(struct stream *s, struct channel *req, int an_bit)
+{
+ int next_pid = -1;
+ int to_forward;
+ char *errmsg = NULL;
+
+ /* Don't read the next command if still processing the response of the
+ * current one. Just wait. At this stage, errors should be handled by
+ * the response analyzer.
+ */
+ if (s->res.analysers & AN_RES_WAIT_CLI)
+ return 0;
+
+ if ((s->pcli_flags & ACCESS_LVL_MASK) == ACCESS_LVL_NONE)
+ s->pcli_flags |= strm_li(s)->bind_conf->level & ACCESS_LVL_MASK;
+
+read_again:
+ /* if the channel is closed for read, we won't receive any more data
+ from the client, but we don't want to forward this close to the
+ server */
+ channel_dont_close(req);
+
+ /* We don't know yet to which server we will connect */
+ channel_dont_connect(req);
+
+ req->flags |= CF_READ_DONTWAIT;
+
+ /* need more data */
+ if (!ci_data(req))
+ goto missing_data;
+
+ /* If there is data available for analysis, log the end of the idle time. */
+ if (c_data(req) && s->logs.t_idle == -1)
+ s->logs.t_idle = tv_ms_elapsed(&s->logs.tv_accept, &now) - s->logs.t_handshake;
+
+ to_forward = pcli_parse_request(s, req, &errmsg, &next_pid);
+ if (to_forward > 0) {
+ int target_pid;
+ /* enough data */
+
+ /* forward only 1 command */
+ channel_forward(req, to_forward);
+
+ if (!(s->pcli_flags & PCLI_F_PAYLOAD)) {
+ /* we send only 1 command per request, and we write close after it */
+ channel_shutw_now(req);
+ } else {
+ pcli_write_prompt(s);
+ }
+
+ s->res.flags |= CF_WAKE_ONCE; /* need to be called again */
+ s->res.analysers |= AN_RES_WAIT_CLI;
+
+ if (!(s->flags & SF_ASSIGNED)) {
+ if (next_pid > -1)
+ target_pid = next_pid;
+ else
+ target_pid = s->pcli_next_pid;
+ /* we can connect now */
+ s->target = pcli_pid_to_server(target_pid);
+
+ if (!s->target)
+ goto server_disconnect;
+
+ s->flags |= (SF_DIRECT | SF_ASSIGNED);
+ channel_auto_connect(req);
+ }
+
+ } else if (to_forward == 0) {
+ /* we trimmed things but we might have other commands to consume */
+ pcli_write_prompt(s);
+ goto read_again;
+ } else if (to_forward == -1) {
+ if (errmsg) {
+ /* there was an error during the parsing */
+ pcli_reply_and_close(s, errmsg);
+ s->req.analysers &= ~AN_REQ_WAIT_CLI;
+ return 0;
+ }
+ goto missing_data;
+ }
+
+ return 0;
+
+send_help:
+ b_reset(&req->buf);
+ b_putblk(&req->buf, "help\n", 5);
+ goto read_again;
+
+missing_data:
+ if (req->flags & CF_SHUTR) {
+ /* There is no more request or a only a partial one and we
+ * receive a close from the client, we can leave */
+ channel_shutw_now(&s->res);
+ s->req.analysers &= ~AN_REQ_WAIT_CLI;
+ return 1;
+ }
+ else if (channel_full(req, global.tune.maxrewrite)) {
+ /* buffer is full and we didn't catch the end of a command */
+ goto send_help;
+ }
+ return 0;
+
+server_disconnect:
+ pcli_reply_and_close(s, "Can't connect to the target CLI!\n");
+ return 0;
+}
+
+int pcli_wait_for_response(struct stream *s, struct channel *rep, int an_bit)
+{
+ struct proxy *fe = strm_fe(s);
+ struct proxy *be = s->be;
+
+ if ((rep->flags & (CF_READ_ERROR|CF_READ_TIMEOUT|CF_WRITE_ERROR|CF_WRITE_TIMEOUT)) ||
+ ((rep->flags & CF_SHUTW) && (rep->to_forward || co_data(rep)))) {
+ pcli_reply_and_close(s, "Can't connect to the target CLI!\n");
+ s->req.analysers &= ~AN_REQ_WAIT_CLI;
+ s->res.analysers &= ~AN_RES_WAIT_CLI;
+ return 0;
+ }
+ rep->flags |= CF_READ_DONTWAIT; /* try to get back here ASAP */
+ rep->flags |= CF_NEVER_WAIT;
+
+ /* don't forward the close */
+ channel_dont_close(&s->res);
+ channel_dont_close(&s->req);
+
+ if (s->pcli_flags & PCLI_F_PAYLOAD) {
+ s->res.analysers &= ~AN_RES_WAIT_CLI;
+ s->req.flags |= CF_WAKE_ONCE; /* need to be called again if there is some command left in the request */
+ return 0;
+ }
+
+ /* forward the data */
+ if (ci_data(rep)) {
+ c_adv(rep, ci_data(rep));
+ return 0;
+ }
+
+ if ((rep->flags & (CF_SHUTR|CF_READ_NULL))) {
+ /* stream cleanup */
+
+ pcli_write_prompt(s);
+
+ s->scb->flags |= SC_FL_NOLINGER | SC_FL_NOHALF;
+ sc_shutr(s->scb);
+ sc_shutw(s->scb);
+
+ /*
+ * starting from there this the same code as
+ * http_end_txn_clean_session().
+ *
+ * It allows to do frontend keepalive while reconnecting to a
+ * new server for each request.
+ */
+
+ if (s->flags & SF_BE_ASSIGNED) {
+ HA_ATOMIC_DEC(&be->beconn);
+ if (unlikely(s->srv_conn))
+ sess_change_server(s, NULL);
+ }
+
+ s->logs.t_close = tv_ms_elapsed(&s->logs.tv_accept, &now);
+ stream_process_counters(s);
+
+ /* don't count other requests' data */
+ s->logs.bytes_in -= ci_data(&s->req);
+ s->logs.bytes_out -= ci_data(&s->res);
+
+ /* we may need to know the position in the queue */
+ pendconn_free(s);
+
+ /* let's do a final log if we need it */
+ if (!LIST_ISEMPTY(&fe->logformat) && s->logs.logwait &&
+ !(s->flags & SF_MONITOR) &&
+ (!(fe->options & PR_O_NULLNOLOG) || s->req.total)) {
+ s->do_log(s);
+ }
+
+ /* stop tracking content-based counters */
+ stream_stop_content_counters(s);
+ stream_update_time_stats(s);
+
+ s->logs.accept_date = date; /* user-visible date for logging */
+ s->logs.tv_accept = now; /* corrected date for internal use */
+ s->logs.t_handshake = 0; /* There are no handshake in keep alive connection. */
+ s->logs.t_idle = -1;
+ tv_zero(&s->logs.tv_request);
+ s->logs.t_queue = -1;
+ s->logs.t_connect = -1;
+ s->logs.t_data = -1;
+ s->logs.t_close = 0;
+ s->logs.prx_queue_pos = 0; /* we get the number of pending conns before us */
+ s->logs.srv_queue_pos = 0; /* we will get this number soon */
+
+ s->logs.bytes_in = s->req.total = ci_data(&s->req);
+ s->logs.bytes_out = s->res.total = ci_data(&s->res);
+
+ stream_del_srv_conn(s);
+ if (objt_server(s->target)) {
+ if (s->flags & SF_CURR_SESS) {
+ s->flags &= ~SF_CURR_SESS;
+ HA_ATOMIC_DEC(&__objt_server(s->target)->cur_sess);
+ }
+ if (may_dequeue_tasks(__objt_server(s->target), be))
+ process_srv_queue(__objt_server(s->target));
+ }
+
+ s->target = NULL;
+
+ /* only release our endpoint if we don't intend to reuse the
+ * connection.
+ */
+ if (!sc_conn_ready(s->scb)) {
+ s->srv_conn = NULL;
+ if (sc_reset_endp(s->scb) < 0) {
+ if (!s->conn_err_type)
+ s->conn_err_type = STRM_ET_CONN_OTHER;
+ if (s->srv_error)
+ s->srv_error(s, s->scb);
+ return 1;
+ }
+ se_fl_clr(s->scb->sedesc, ~SE_FL_DETACHED);
+ }
+
+ sockaddr_free(&s->scb->dst);
+
+ sc_set_state(s->scb, SC_ST_INI);
+ s->scb->flags &= SC_FL_ISBACK | SC_FL_DONT_WAKE; /* we're in the context of process_stream */
+ s->req.flags &= ~(CF_SHUTW|CF_SHUTW_NOW|CF_AUTO_CONNECT|CF_WRITE_ERROR|CF_STREAMER|CF_STREAMER_FAST|CF_NEVER_WAIT|CF_WROTE_DATA);
+ s->res.flags &= ~(CF_SHUTR|CF_SHUTR_NOW|CF_READ_ATTACHED|CF_READ_ERROR|CF_READ_NOEXP|CF_STREAMER|CF_STREAMER_FAST|CF_WRITE_PARTIAL|CF_NEVER_WAIT|CF_WROTE_DATA|CF_READ_NULL);
+ s->flags &= ~(SF_DIRECT|SF_ASSIGNED|SF_BE_ASSIGNED|SF_FORCE_PRST|SF_IGNORE_PRST);
+ s->flags &= ~(SF_CURR_SESS|SF_REDIRECTABLE|SF_SRV_REUSED);
+ s->flags &= ~(SF_ERR_MASK|SF_FINST_MASK|SF_REDISP);
+ s->conn_retries = 0; /* used for logging too */
+ s->conn_exp = TICK_ETERNITY;
+ s->conn_err_type = STRM_ET_NONE;
+ /* reinitialise the current rule list pointer to NULL. We are sure that
+ * any rulelist match the NULL pointer.
+ */
+ s->current_rule_list = NULL;
+
+ s->be = strm_fe(s);
+ s->logs.logwait = strm_fe(s)->to_log;
+ s->logs.level = 0;
+ stream_del_srv_conn(s);
+ s->target = NULL;
+ /* re-init store persistence */
+ s->store_count = 0;
+ s->uniq_id = global.req_count++;
+
+ s->req.flags |= CF_READ_DONTWAIT; /* one read is usually enough */
+
+ s->req.flags |= CF_WAKE_ONCE; /* need to be called again if there is some command left in the request */
+
+ s->res.analysers &= ~AN_RES_WAIT_CLI;
+
+ /* We must trim any excess data from the response buffer, because we
+ * may have blocked an invalid response from a server that we don't
+ * want to accidentally forward once we disable the analysers, nor do
+ * we want those data to come along with next response. A typical
+ * example of such data would be from a buggy server responding to
+ * a HEAD with some data, or sending more than the advertised
+ * content-length.
+ */
+ if (unlikely(ci_data(&s->res)))
+ b_set_data(&s->res.buf, co_data(&s->res));
+
+ /* Now we can realign the response buffer */
+ c_realign_if_empty(&s->res);
+
+ s->req.rto = strm_fe(s)->timeout.client;
+ s->req.wto = TICK_ETERNITY;
+
+ s->res.rto = TICK_ETERNITY;
+ s->res.wto = strm_fe(s)->timeout.client;
+
+ s->req.rex = TICK_ETERNITY;
+ s->req.wex = TICK_ETERNITY;
+ s->req.analyse_exp = TICK_ETERNITY;
+ s->res.rex = TICK_ETERNITY;
+ s->res.wex = TICK_ETERNITY;
+ s->res.analyse_exp = TICK_ETERNITY;
+ s->scb->hcto = TICK_ETERNITY;
+
+ /* we're removing the analysers, we MUST re-enable events detection.
+ * We don't enable close on the response channel since it's either
+ * already closed, or in keep-alive with an idle connection handler.
+ */
+ channel_auto_read(&s->req);
+ channel_auto_close(&s->req);
+ channel_auto_read(&s->res);
+
+
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * The mworker functions are used to initialize the CLI in the master process
+ */
+
+ /*
+ * Stop the mworker proxy
+ */
+void mworker_cli_proxy_stop()
+{
+ if (mworker_proxy)
+ stop_proxy(mworker_proxy);
+}
+
+/*
+ * Create the mworker CLI proxy
+ */
+int mworker_cli_proxy_create()
+{
+ struct mworker_proc *child;
+ char *msg = NULL;
+ char *errmsg = NULL;
+
+ mworker_proxy = alloc_new_proxy("MASTER", PR_CAP_LISTEN|PR_CAP_INT, &errmsg);
+ if (!mworker_proxy)
+ goto error_proxy;
+
+ mworker_proxy->mode = PR_MODE_CLI;
+ mworker_proxy->maxconn = 10; /* default to 10 concurrent connections */
+ mworker_proxy->timeout.client = 0; /* no timeout */
+ mworker_proxy->conf.file = strdup("MASTER");
+ mworker_proxy->conf.line = 0;
+ mworker_proxy->accept = frontend_accept;
+ mworker_proxy-> lbprm.algo = BE_LB_ALGO_NONE;
+
+ /* Does not init the default target the CLI applet, but must be done in
+ * the request parsing code */
+ mworker_proxy->default_target = NULL;
+
+ /* create all servers using the mworker_proc list */
+ list_for_each_entry(child, &proc_list, list) {
+ struct server *newsrv = NULL;
+ struct sockaddr_storage *sk;
+ int port1, port2, port;
+ struct protocol *proto;
+
+ /* only the workers support the master CLI */
+ if (!(child->options & PROC_O_TYPE_WORKER))
+ continue;
+
+ newsrv = new_server(mworker_proxy);
+ if (!newsrv)
+ goto error;
+
+ /* we don't know the new pid yet */
+ if (child->pid == -1)
+ memprintf(&msg, "cur-%d", 1);
+ else
+ memprintf(&msg, "old-%d", child->pid);
+
+ newsrv->next = mworker_proxy->srv;
+ mworker_proxy->srv = newsrv;
+ newsrv->conf.file = strdup(msg);
+ newsrv->id = strdup(msg);
+ newsrv->conf.line = 0;
+
+ memprintf(&msg, "sockpair@%d", child->ipc_fd[0]);
+ if ((sk = str2sa_range(msg, &port, &port1, &port2, NULL, &proto,
+ &errmsg, NULL, NULL, PA_O_STREAM)) == 0) {
+ goto error;
+ }
+ ha_free(&msg);
+
+ if (!proto->connect) {
+ goto error;
+ }
+
+ /* no port specified */
+ newsrv->flags |= SRV_F_MAPPORTS;
+ newsrv->addr = *sk;
+ /* don't let the server participate to load balancing */
+ newsrv->iweight = 0;
+ newsrv->uweight = 0;
+ srv_lb_commit_status(newsrv);
+
+ child->srv = newsrv;
+ }
+
+ mworker_proxy->next = proxies_list;
+ proxies_list = mworker_proxy;
+
+ return 0;
+
+error:
+
+ list_for_each_entry(child, &proc_list, list) {
+ free((char *)child->srv->conf.file); /* cast because of const char * */
+ free(child->srv->id);
+ ha_free(&child->srv);
+ }
+ free_proxy(mworker_proxy);
+ free(msg);
+
+error_proxy:
+ ha_alert("%s\n", errmsg);
+ free(errmsg);
+
+ return -1;
+}
+
+/*
+ * Create a new listener for the master CLI proxy
+ */
+int mworker_cli_proxy_new_listener(char *line)
+{
+ struct bind_conf *bind_conf;
+ struct listener *l;
+ char *err = NULL;
+ char *args[MAX_LINE_ARGS + 1];
+ int arg;
+ int cur_arg;
+
+ arg = 1;
+ args[0] = line;
+
+ /* args is a bind configuration with spaces replaced by commas */
+ while (*line && arg < MAX_LINE_ARGS) {
+
+ if (*line == ',') {
+ *line++ = '\0';
+ while (*line == ',')
+ line++;
+ args[arg++] = line;
+ }
+ line++;
+ }
+
+ args[arg] = "\0";
+
+ bind_conf = bind_conf_alloc(mworker_proxy, "master-socket", 0, "", xprt_get(XPRT_RAW));
+ if (!bind_conf)
+ goto err;
+
+ bind_conf->level &= ~ACCESS_LVL_MASK;
+ bind_conf->level |= ACCESS_LVL_ADMIN;
+ bind_conf->level |= ACCESS_MASTER | ACCESS_MASTER_ONLY;
+
+ if (!str2listener(args[0], mworker_proxy, bind_conf, "master-socket", 0, &err)) {
+ ha_alert("Cannot create the listener of the master CLI\n");
+ goto err;
+ }
+
+ cur_arg = 1;
+
+ while (*args[cur_arg]) {
+ struct bind_kw *kw;
+ const char *best;
+
+ kw = bind_find_kw(args[cur_arg]);
+ if (kw) {
+ if (!kw->parse) {
+ memprintf(&err, "'%s %s' : '%s' option is not implemented in this version (check build options).",
+ args[0], args[1], args[cur_arg]);
+ goto err;
+ }
+
+ if (kw->parse(args, cur_arg, global.cli_fe, bind_conf, &err) != 0) {
+ if (err)
+ memprintf(&err, "'%s %s' : '%s'", args[0], args[1], err);
+ else
+ memprintf(&err, "'%s %s' : error encountered while processing '%s'",
+ args[0], args[1], args[cur_arg]);
+ goto err;
+ }
+
+ cur_arg += 1 + kw->skip;
+ continue;
+ }
+
+ best = bind_find_best_kw(args[cur_arg]);
+ if (best)
+ memprintf(&err, "'%s %s' : unknown keyword '%s'. Did you mean '%s' maybe ?",
+ args[0], args[1], args[cur_arg], best);
+ else
+ memprintf(&err, "'%s %s' : unknown keyword '%s'.",
+ args[0], args[1], args[cur_arg]);
+ goto err;
+ }
+
+
+ list_for_each_entry(l, &bind_conf->listeners, by_bind) {
+ l->accept = session_accept_fd;
+ l->default_target = mworker_proxy->default_target;
+ /* don't make the peers subject to global limits and don't close it in the master */
+ l->options |= LI_O_UNLIMITED;
+ l->rx.flags |= RX_F_MWORKER; /* we are keeping this FD in the master */
+ l->nice = -64; /* we want to boost priority for local stats */
+ global.maxsock++; /* for the listening socket */
+ }
+ global.maxsock += mworker_proxy->maxconn;
+
+ return 0;
+
+err:
+ ha_alert("%s\n", err);
+ free(err);
+ free(bind_conf);
+ return -1;
+
+}
+
+/*
+ * Create a new CLI socket using a socketpair for a worker process
+ * <mworker_proc> is the process structure, and <proc> is the process number
+ */
+int mworker_cli_sockpair_new(struct mworker_proc *mworker_proc, int proc)
+{
+ struct bind_conf *bind_conf;
+ struct listener *l;
+ char *path = NULL;
+ char *err = NULL;
+
+ /* master pipe to ensure the master is still alive */
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, mworker_proc->ipc_fd) < 0) {
+ ha_alert("Cannot create worker socketpair.\n");
+ return -1;
+ }
+
+ /* XXX: we might want to use a separate frontend at some point */
+ if (!global.cli_fe) {
+ if ((global.cli_fe = cli_alloc_fe("GLOBAL", "master-socket", 0)) == NULL) {
+ ha_alert("out of memory trying to allocate the stats frontend");
+ goto error;
+ }
+ }
+
+ bind_conf = bind_conf_alloc(global.cli_fe, "master-socket", 0, "", xprt_get(XPRT_RAW));
+ if (!bind_conf)
+ goto error;
+
+ bind_conf->level &= ~ACCESS_LVL_MASK;
+ bind_conf->level |= ACCESS_LVL_ADMIN; /* TODO: need to lower the rights with a CLI keyword*/
+ bind_conf->level |= ACCESS_FD_LISTENERS;
+
+ if (!memprintf(&path, "sockpair@%d", mworker_proc->ipc_fd[1])) {
+ ha_alert("Cannot allocate listener.\n");
+ goto error;
+ }
+
+ if (!str2listener(path, global.cli_fe, bind_conf, "master-socket", 0, &err)) {
+ free(path);
+ ha_alert("Cannot create a CLI sockpair listener for process #%d\n", proc);
+ goto error;
+ }
+ ha_free(&path);
+
+ list_for_each_entry(l, &bind_conf->listeners, by_bind) {
+ l->accept = session_accept_fd;
+ l->default_target = global.cli_fe->default_target;
+ l->options |= (LI_O_UNLIMITED | LI_O_NOSTOP);
+ HA_ATOMIC_INC(&unstoppable_jobs);
+ /* it's a sockpair but we don't want to keep the fd in the master */
+ l->rx.flags &= ~RX_F_INHERITED;
+ l->nice = -64; /* we want to boost priority for local stats */
+ global.maxsock++; /* for the listening socket */
+ }
+
+ return 0;
+
+error:
+ close(mworker_proc->ipc_fd[0]);
+ close(mworker_proc->ipc_fd[1]);
+ free(err);
+
+ return -1;
+}
+
+static struct applet cli_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<CLI>", /* used for logging */
+ .fct = cli_io_handler,
+ .release = cli_release_handler,
+};
+
+/* master CLI */
+static struct applet mcli_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<MCLI>", /* used for logging */
+ .fct = cli_io_handler,
+ .release = cli_release_handler,
+};
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "help", NULL }, NULL, cli_parse_simple, NULL, NULL, NULL, ACCESS_MASTER },
+ { { "prompt", NULL }, NULL, cli_parse_simple, NULL, NULL, NULL, ACCESS_MASTER },
+ { { "quit", NULL }, NULL, cli_parse_simple, NULL, NULL, NULL, ACCESS_MASTER },
+ { { "_getsocks", NULL }, NULL, _getsocks, NULL },
+ { { "expert-mode", NULL }, NULL, cli_parse_expert_experimental_mode, NULL, NULL, NULL, ACCESS_MASTER }, // not listed
+ { { "experimental-mode", NULL }, NULL, cli_parse_expert_experimental_mode, NULL, NULL, NULL, ACCESS_MASTER }, // not listed
+ { { "mcli-debug-mode", NULL }, NULL, cli_parse_expert_experimental_mode, NULL, NULL, NULL, ACCESS_MASTER_ONLY }, // not listed
+ { { "set", "maxconn", "global", NULL }, "set maxconn global <value> : change the per-process maxconn setting", cli_parse_set_maxconn_global, NULL },
+ { { "set", "rate-limit", NULL }, "set rate-limit <setting> <value> : change a rate limiting value", cli_parse_set_ratelimit, NULL },
+ { { "set", "severity-output", NULL }, "set severity-output [none|number|string]: set presence of severity level in feedback information", cli_parse_set_severity_output, NULL, NULL },
+ { { "set", "timeout", NULL }, "set timeout [cli] <delay> : change a timeout setting", cli_parse_set_timeout, NULL, NULL },
+ { { "show", "env", NULL }, "show env [var] : dump environment variables known to the process", cli_parse_show_env, cli_io_handler_show_env, NULL },
+ { { "show", "cli", "sockets", NULL }, "show cli sockets : dump list of cli sockets", cli_parse_default, cli_io_handler_show_cli_sock, NULL, NULL, ACCESS_MASTER },
+ { { "show", "cli", "level", NULL }, "show cli level : display the level of the current CLI session", cli_parse_show_lvl, NULL, NULL, NULL, ACCESS_MASTER},
+ { { "show", "fd", NULL }, "show fd [num] : dump list of file descriptors in use or a specific one", cli_parse_show_fd, cli_io_handler_show_fd, NULL },
+ { { "show", "activity", NULL }, "show activity : show per-thread activity stats (for support/developers)", cli_parse_default, cli_io_handler_show_activity, NULL },
+ { { "show", "version", NULL }, "show version : show version of the current process", cli_parse_show_version, NULL, NULL, NULL, ACCESS_MASTER },
+ { { "operator", NULL }, "operator : lower the level of the current CLI session to operator", cli_parse_set_lvl, NULL, NULL, NULL, ACCESS_MASTER},
+ { { "user", NULL }, "user : lower the level of the current CLI session to user", cli_parse_set_lvl, NULL, NULL, NULL, ACCESS_MASTER},
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "stats", cli_parse_global },
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+static struct bind_kw_list bind_kws = { "STAT", { }, {
+ { "level", bind_parse_level, 1 }, /* set the unix socket admin level */
+ { "expose-fd", bind_parse_expose_fd, 1 }, /* set the unix socket expose fd rights */
+ { "severity-output", bind_parse_severity_output, 1 }, /* set the severity output format */
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/clock.c b/src/clock.c
new file mode 100644
index 0000000..3090a02
--- /dev/null
+++ b/src/clock.c
@@ -0,0 +1,405 @@
+/*
+ * General time-keeping code and variables
+ *
+ * Copyright 2000-2021 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/time.h>
+#include <signal.h>
+#include <time.h>
+
+#ifdef USE_THREAD
+#include <pthread.h>
+#endif
+
+#include <haproxy/api.h>
+#include <haproxy/activity.h>
+#include <haproxy/clock.h>
+#include <haproxy/signal-t.h>
+#include <haproxy/time.h>
+#include <haproxy/tinfo-t.h>
+#include <haproxy/tools.h>
+
+struct timeval start_date; /* the process's start date in wall-clock time */
+volatile ullong global_now; /* common monotonic date between all threads (32:32) */
+volatile uint global_now_ms; /* common monotonic date in milliseconds (may wrap) */
+
+THREAD_ALIGNED(64) static ullong now_offset; /* global offset between system time and global time */
+
+THREAD_LOCAL uint now_ms; /* internal monotonic date in milliseconds (may wrap) */
+THREAD_LOCAL struct timeval now; /* internal monotonic date derived from real clock */
+THREAD_LOCAL struct timeval date; /* the real current date (wall-clock time) */
+
+static THREAD_LOCAL struct timeval before_poll; /* system date before calling poll() */
+static THREAD_LOCAL struct timeval after_poll; /* system date after leaving poll() */
+static THREAD_LOCAL unsigned int samp_time; /* total elapsed time over current sample */
+static THREAD_LOCAL unsigned int idle_time; /* total idle time over current sample */
+static THREAD_LOCAL unsigned int iso_time_sec; /* last iso time value for this thread */
+static THREAD_LOCAL char iso_time_str[34]; /* ISO time representation of gettimeofday() */
+
+#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
+static clockid_t per_thread_clock_id[MAX_THREADS];
+#endif
+
+/* returns the system's monotonic time in nanoseconds if supported, otherwise zero */
+uint64_t now_mono_time(void)
+{
+ uint64_t ret = 0;
+#if defined(_POSIX_TIMERS) && defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_MONOTONIC_CLOCK)
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+#endif
+ return ret;
+}
+
+/* returns the current thread's cumulated CPU time in nanoseconds if supported, otherwise zero */
+uint64_t now_cpu_time(void)
+{
+ uint64_t ret = 0;
+#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
+ struct timespec ts;
+ clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
+ ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+#endif
+ return ret;
+}
+
+/* returns another thread's cumulated CPU time in nanoseconds if supported, otherwise zero */
+uint64_t now_cpu_time_thread(int thr)
+{
+ uint64_t ret = 0;
+#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
+ struct timespec ts;
+ clock_gettime(per_thread_clock_id[thr], &ts);
+ ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+#endif
+ return ret;
+}
+
+/* set the clock source for the local thread */
+void clock_set_local_source(void)
+{
+#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
+#ifdef USE_THREAD
+ pthread_getcpuclockid(pthread_self(), &per_thread_clock_id[tid]);
+#else
+ per_thread_clock_id[tid] = CLOCK_THREAD_CPUTIME_ID;
+#endif
+#endif
+}
+
+/* registers a timer <tmr> of type timer_t delivering signal <sig> with value
+ * <val>. It tries on the current thread's clock ID first and falls back to
+ * CLOCK_REALTIME. Returns non-zero on success, 1 on failure.
+ */
+int clock_setup_signal_timer(void *tmr, int sig, int val)
+{
+ int ret = 0;
+
+#if defined(USE_RT) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
+ struct sigevent sev = { };
+ timer_t *timer = tmr;
+ sigset_t set;
+
+ /* unblock the WDTSIG signal we intend to use */
+ sigemptyset(&set);
+ sigaddset(&set, WDTSIG);
+ ha_sigmask(SIG_UNBLOCK, &set, NULL);
+
+ /* this timer will signal WDTSIG when it fires, with tid in the si_int
+ * field (important since any thread will receive the signal).
+ */
+ sev.sigev_notify = SIGEV_SIGNAL;
+ sev.sigev_signo = sig;
+ sev.sigev_value.sival_int = val;
+ if (timer_create(per_thread_clock_id[tid], &sev, timer) != -1 ||
+ timer_create(CLOCK_REALTIME, &sev, timer) != -1)
+ ret = 1;
+#endif
+ return ret;
+}
+
+/* clock_update_date: sets <date> to system time, and sets <now> to something as
+ * close as possible to real time, following a monotonic function. The main
+ * principle consists in detecting backwards and forwards time jumps and adjust
+ * an offset to correct them. This function should be called once after each
+ * poll, and never farther apart than MAX_DELAY_MS*2. The poll's timeout should
+ * be passed in <max_wait>, and the return value in <interrupted> (a non-zero
+ * value means that we have not expired the timeout).
+ *
+ * clock_init_process_date() must have been called once first, and
+ * clock_init_thread_date() must also have been called once for each thread.
+ *
+ * An offset is used to adjust the current time (date), to figure a monotonic
+ * local time (now). The offset is not critical, as it is only updated after a
+ * clock jump is detected. From this point all threads will apply it to their
+ * locally measured time, and will then agree around a common monotonic
+ * global_now value that serves to further refine their local time. As it is
+ * not possible to atomically update a timeval, both global_now and the
+ * now_offset values are instead stored as 64-bit integers made of two 32 bit
+ * values for the tv_sec and tv_usec parts. The offset is made of two signed
+ * ints so that the clock can be adjusted in the two directions.
+ */
+void clock_update_date(int max_wait, int interrupted)
+{
+ struct timeval min_deadline, max_deadline, tmp_now;
+ uint old_now_ms;
+ ullong old_now;
+ ullong new_now;
+ ullong ofs, ofs_new;
+ uint sec_ofs, usec_ofs;
+
+ gettimeofday(&date, NULL);
+
+ /* compute the minimum and maximum local date we may have reached based
+ * on our past date and the associated timeout. There are three possible
+ * extremities:
+ * - the new date cannot be older than before_poll
+ * - if not interrupted, the new date cannot be older than
+ * before_poll+max_wait
+ * - in any case the new date cannot be newer than
+ * before_poll+max_wait+some margin (100ms used here).
+ * In case of violation, we'll ignore the current date and instead
+ * restart from the last date we knew.
+ */
+ _tv_ms_add(&min_deadline, &before_poll, max_wait);
+ _tv_ms_add(&max_deadline, &before_poll, max_wait + 100);
+
+ ofs = HA_ATOMIC_LOAD(&now_offset);
+
+ if (unlikely(__tv_islt(&date, &before_poll) || // big jump backwards
+ (!interrupted && __tv_islt(&date, &min_deadline)) || // small jump backwards
+ __tv_islt(&max_deadline, &date))) { // big jump forwards
+ if (!interrupted)
+ _tv_ms_add(&now, &now, max_wait);
+ } else {
+ /* The date is still within expectations. Let's apply the
+ * now_offset to the system date. Note: ofs if made of two
+ * independent signed ints.
+ */
+ now.tv_sec = date.tv_sec + (int)(ofs >> 32); // note: may be positive or negative
+ now.tv_usec = date.tv_usec + (int)ofs; // note: may be positive or negative
+ if ((int)now.tv_usec < 0) {
+ now.tv_usec += 1000000;
+ now.tv_sec -= 1;
+ } else if (now.tv_usec >= 1000000) {
+ now.tv_usec -= 1000000;
+ now.tv_sec += 1;
+ }
+ }
+
+ /* now that we have bounded the local time, let's check if it's
+ * realistic regarding the global date, which only moves forward,
+ * otherwise catch up.
+ */
+ old_now = global_now;
+ old_now_ms = global_now_ms;
+
+ do {
+ tmp_now.tv_sec = (unsigned int)(old_now >> 32);
+ tmp_now.tv_usec = old_now & 0xFFFFFFFFU;
+
+ if (__tv_islt(&now, &tmp_now))
+ now = tmp_now;
+
+ /* now <now> is expected to be the most accurate date,
+ * equal to <global_now> or newer.
+ */
+ new_now = ((ullong)now.tv_sec << 32) + (uint)now.tv_usec;
+ now_ms = __tv_to_ms(&now);
+
+ /* let's try to update the global <now> (both in timeval
+ * and ms forms) or loop again.
+ */
+ } while (((new_now != old_now && !_HA_ATOMIC_CAS(&global_now, &old_now, new_now)) ||
+ (now_ms != old_now_ms && !_HA_ATOMIC_CAS(&global_now_ms, &old_now_ms, now_ms))) &&
+ __ha_cpu_relax());
+
+ /* <now> and <now_ms> are now updated to the last value of global_now
+ * and global_now_ms, which were also monotonically updated. We can
+ * compute the latest offset, we don't care who writes it last, the
+ * variations will not break the monotonic property.
+ */
+
+ sec_ofs = now.tv_sec - date.tv_sec;
+ usec_ofs = now.tv_usec - date.tv_usec;
+ if ((int)usec_ofs < 0) {
+ usec_ofs += 1000000;
+ sec_ofs -= 1;
+ }
+ ofs_new = ((ullong)sec_ofs << 32) + usec_ofs;
+ if (ofs_new != ofs)
+ HA_ATOMIC_STORE(&now_offset, ofs_new);
+}
+
+/* must be called once at boot to initialize some global variables */
+void clock_init_process_date(void)
+{
+ now_offset = 0;
+ gettimeofday(&date, NULL);
+ now = after_poll = before_poll = date;
+ global_now = ((ullong)date.tv_sec << 32) + (uint)date.tv_usec;
+ global_now_ms = now.tv_sec * 1000 + now.tv_usec / 1000;
+ th_ctx->idle_pct = 100;
+ clock_update_date(0, 1);
+}
+
+/* must be called once per thread to initialize their thread-local variables.
+ * Note that other threads might also be initializing and running in parallel.
+ */
+void clock_init_thread_date(void)
+{
+ ullong old_now;
+
+ gettimeofday(&date, NULL);
+ after_poll = before_poll = date;
+
+ old_now = _HA_ATOMIC_LOAD(&global_now);
+ now.tv_sec = old_now >> 32;
+ now.tv_usec = (uint)old_now;
+ th_ctx->idle_pct = 100;
+ th_ctx->prev_cpu_time = now_cpu_time();
+ clock_update_date(0, 1);
+}
+
+/* report the average CPU idle percentage over all running threads, between 0 and 100 */
+uint clock_report_idle(void)
+{
+ uint total = 0;
+ uint rthr = 0;
+ uint thr;
+
+ for (thr = 0; thr < MAX_THREADS; thr++) {
+ if (!(all_threads_mask & (1UL << thr)))
+ continue;
+ total += HA_ATOMIC_LOAD(&ha_thread_ctx[thr].idle_pct);
+ rthr++;
+ }
+ return rthr ? total / rthr : 0;
+}
+
+/* Update the idle time value twice a second, to be called after
+ * clock_update_date() when called after poll(), and currently called only by
+ * clock_leaving_poll() below. It relies on <before_poll> to be updated to
+ * the system time before calling poll().
+ */
+static inline void clock_measure_idle(void)
+{
+ /* Let's compute the idle to work ratio. We worked between after_poll
+ * and before_poll, and slept between before_poll and date. The idle_pct
+ * is updated at most twice every second. Note that the current second
+ * rarely changes so we avoid a multiply when not needed.
+ */
+ int delta;
+
+ if ((delta = date.tv_sec - before_poll.tv_sec))
+ delta *= 1000000;
+ idle_time += delta + (date.tv_usec - before_poll.tv_usec);
+
+ if ((delta = date.tv_sec - after_poll.tv_sec))
+ delta *= 1000000;
+ samp_time += delta + (date.tv_usec - after_poll.tv_usec);
+
+ after_poll.tv_sec = date.tv_sec; after_poll.tv_usec = date.tv_usec;
+ if (samp_time < 500000)
+ return;
+
+ HA_ATOMIC_STORE(&th_ctx->idle_pct, (100ULL * idle_time + samp_time / 2) / samp_time);
+ idle_time = samp_time = 0;
+}
+
+/* Collect date and time information after leaving poll(). <timeout> must be
+ * set to the maximum sleep time passed to poll (in milliseconds), and
+ * <interrupted> must be zero if the poller reached the timeout or non-zero
+ * otherwise, which generally is provided by the poller's return value.
+ */
+void clock_leaving_poll(int timeout, int interrupted)
+{
+ clock_measure_idle();
+ th_ctx->prev_cpu_time = now_cpu_time();
+ th_ctx->prev_mono_time = now_mono_time();
+}
+
+/* Collect date and time information before calling poll(). This will be used
+ * to count the run time of the past loop and the sleep time of the next poll.
+ * It also compares the elasped and cpu times during the activity period to
+ * estimate the amount of stolen time, which is reported if higher than half
+ * a millisecond.
+ */
+void clock_entering_poll(void)
+{
+ uint64_t new_mono_time;
+ uint64_t new_cpu_time;
+ uint32_t run_time;
+ int64_t stolen;
+
+ gettimeofday(&before_poll, NULL);
+
+ run_time = (before_poll.tv_sec - after_poll.tv_sec) * 1000000U + (before_poll.tv_usec - after_poll.tv_usec);
+
+ new_cpu_time = now_cpu_time();
+ new_mono_time = now_mono_time();
+
+ if (th_ctx->prev_cpu_time && th_ctx->prev_mono_time) {
+ new_cpu_time -= th_ctx->prev_cpu_time;
+ new_mono_time -= th_ctx->prev_mono_time;
+ stolen = new_mono_time - new_cpu_time;
+ if (unlikely(stolen >= 500000)) {
+ stolen /= 500000;
+ /* more than half a millisecond difference might
+ * indicate an undesired preemption.
+ */
+ report_stolen_time(stolen);
+ }
+ }
+
+ /* update the average runtime */
+ activity_count_runtime(run_time);
+}
+
+/* returns the current date as returned by gettimeofday() in ISO+microsecond
+ * format. It uses a thread-local static variable that the reader can consume
+ * for as long as it wants until next call. Thus, do not call it from a signal
+ * handler. If <pad> is non-0, a trailing space will be added. It will always
+ * return exactly 32 or 33 characters (depending on padding) and will always be
+ * zero-terminated, thus it will always fit into a 34 bytes buffer.
+ * This also always include the local timezone (in +/-HH:mm format) .
+ */
+char *timeofday_as_iso_us(int pad)
+{
+ struct timeval new_date;
+ struct tm tm;
+ const char *offset;
+ char c;
+
+ gettimeofday(&new_date, NULL);
+ if (new_date.tv_sec != iso_time_sec || !new_date.tv_sec) {
+ get_localtime(new_date.tv_sec, &tm);
+ offset = get_gmt_offset(new_date.tv_sec, &tm);
+ if (unlikely(strftime(iso_time_str, sizeof(iso_time_str), "%Y-%m-%dT%H:%M:%S.000000+00:00", &tm) != 32))
+ strcpy(iso_time_str, "YYYY-mm-ddTHH:MM:SS.000000-00:00"); // make the failure visible but respect format.
+ iso_time_str[26] = offset[0];
+ iso_time_str[27] = offset[1];
+ iso_time_str[28] = offset[2];
+ iso_time_str[30] = offset[3];
+ iso_time_str[31] = offset[4];
+ iso_time_sec = new_date.tv_sec;
+ }
+
+ /* utoa_pad adds a trailing 0 so we save the char for restore */
+ c = iso_time_str[26];
+ utoa_pad(new_date.tv_usec, iso_time_str + 20, 7);
+ iso_time_str[26] = c;
+ if (pad) {
+ iso_time_str[32] = ' ';
+ iso_time_str[33] = 0;
+ }
+ return iso_time_str;
+}
diff --git a/src/compression.c b/src/compression.c
new file mode 100644
index 0000000..3ce2a60
--- /dev/null
+++ b/src/compression.c
@@ -0,0 +1,740 @@
+/*
+ * HTTP compression.
+ *
+ * Copyright 2012 Exceliance, David Du Colombier <dducolombier@exceliance.fr>
+ * William Lallemand <wlallemand@exceliance.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdio.h>
+
+#if defined(USE_ZLIB)
+/* Note: the crappy zlib and openssl libs both define the "free_func" type.
+ * That's a very clever idea to use such a generic name in general purpose
+ * libraries, really... The zlib one is easier to redefine than openssl's,
+ * so let's only fix this one.
+ */
+#define free_func zlib_free_func
+#include <zlib.h>
+#undef free_func
+#endif /* USE_ZLIB */
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/compression-t.h>
+#include <haproxy/compression.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/global.h>
+#include <haproxy/pool.h>
+#include <haproxy/stream.h>
+#include <haproxy/thread.h>
+#include <haproxy/tools.h>
+
+
+#if defined(USE_ZLIB)
+__decl_spinlock(comp_pool_lock);
+#endif
+
+#ifdef USE_ZLIB
+
+static void *alloc_zlib(void *opaque, unsigned int items, unsigned int size);
+static void free_zlib(void *opaque, void *ptr);
+
+/* zlib allocation */
+static struct pool_head *zlib_pool_deflate_state __read_mostly = NULL;
+static struct pool_head *zlib_pool_window __read_mostly = NULL;
+static struct pool_head *zlib_pool_prev __read_mostly = NULL;
+static struct pool_head *zlib_pool_head __read_mostly = NULL;
+static struct pool_head *zlib_pool_pending_buf __read_mostly = NULL;
+
+long zlib_used_memory = 0;
+
+static int global_tune_zlibmemlevel = 8; /* zlib memlevel */
+static int global_tune_zlibwindowsize = MAX_WBITS; /* zlib window size */
+
+#endif
+
+unsigned int compress_min_idle = 0;
+
+static int identity_init(struct comp_ctx **comp_ctx, int level);
+static int identity_add_data(struct comp_ctx *comp_ctx, const char *in_data, int in_len, struct buffer *out);
+static int identity_flush(struct comp_ctx *comp_ctx, struct buffer *out);
+static int identity_finish(struct comp_ctx *comp_ctx, struct buffer *out);
+static int identity_end(struct comp_ctx **comp_ctx);
+
+#if defined(USE_SLZ)
+
+static int rfc1950_init(struct comp_ctx **comp_ctx, int level);
+static int rfc1951_init(struct comp_ctx **comp_ctx, int level);
+static int rfc1952_init(struct comp_ctx **comp_ctx, int level);
+static int rfc195x_add_data(struct comp_ctx *comp_ctx, const char *in_data, int in_len, struct buffer *out);
+static int rfc195x_flush(struct comp_ctx *comp_ctx, struct buffer *out);
+static int rfc195x_finish(struct comp_ctx *comp_ctx, struct buffer *out);
+static int rfc195x_end(struct comp_ctx **comp_ctx);
+
+#elif defined(USE_ZLIB)
+
+static int gzip_init(struct comp_ctx **comp_ctx, int level);
+static int raw_def_init(struct comp_ctx **comp_ctx, int level);
+static int deflate_init(struct comp_ctx **comp_ctx, int level);
+static int deflate_add_data(struct comp_ctx *comp_ctx, const char *in_data, int in_len, struct buffer *out);
+static int deflate_flush(struct comp_ctx *comp_ctx, struct buffer *out);
+static int deflate_finish(struct comp_ctx *comp_ctx, struct buffer *out);
+static int deflate_end(struct comp_ctx **comp_ctx);
+
+#endif /* USE_ZLIB */
+
+
+const struct comp_algo comp_algos[] =
+{
+ { "identity", 8, "identity", 8, identity_init, identity_add_data, identity_flush, identity_finish, identity_end },
+#if defined(USE_SLZ)
+ { "deflate", 7, "deflate", 7, rfc1950_init, rfc195x_add_data, rfc195x_flush, rfc195x_finish, rfc195x_end },
+ { "raw-deflate", 11, "deflate", 7, rfc1951_init, rfc195x_add_data, rfc195x_flush, rfc195x_finish, rfc195x_end },
+ { "gzip", 4, "gzip", 4, rfc1952_init, rfc195x_add_data, rfc195x_flush, rfc195x_finish, rfc195x_end },
+#elif defined(USE_ZLIB)
+ { "deflate", 7, "deflate", 7, deflate_init, deflate_add_data, deflate_flush, deflate_finish, deflate_end },
+ { "raw-deflate", 11, "deflate", 7, raw_def_init, deflate_add_data, deflate_flush, deflate_finish, deflate_end },
+ { "gzip", 4, "gzip", 4, gzip_init, deflate_add_data, deflate_flush, deflate_finish, deflate_end },
+#endif /* USE_ZLIB */
+ { NULL, 0, NULL, 0, NULL , NULL, NULL, NULL, NULL }
+};
+
+/*
+ * Add a content-type in the configuration
+ * Returns 0 in case of success, 1 in case of allocation failure.
+ */
+int comp_append_type(struct comp *comp, const char *type)
+{
+ struct comp_type *comp_type;
+
+ comp_type = calloc(1, sizeof(*comp_type));
+ if (!comp_type)
+ return 1;
+ comp_type->name_len = strlen(type);
+ comp_type->name = strdup(type);
+ comp_type->next = comp->types;
+ comp->types = comp_type;
+ return 0;
+}
+
+/*
+ * Add an algorithm in the configuration
+ * Returns 0 in case of success, -1 if the <algo> is unmanaged, 1 in case of
+ * allocation failure.
+ */
+int comp_append_algo(struct comp *comp, const char *algo)
+{
+ struct comp_algo *comp_algo;
+ int i;
+
+ for (i = 0; comp_algos[i].cfg_name; i++) {
+ if (strcmp(algo, comp_algos[i].cfg_name) == 0) {
+ comp_algo = calloc(1, sizeof(*comp_algo));
+ if (!comp_algo)
+ return 1;
+ memmove(comp_algo, &comp_algos[i], sizeof(struct comp_algo));
+ comp_algo->next = comp->algos;
+ comp->algos = comp_algo;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+#if defined(USE_ZLIB) || defined(USE_SLZ)
+DECLARE_STATIC_POOL(pool_comp_ctx, "comp_ctx", sizeof(struct comp_ctx));
+
+/*
+ * Alloc the comp_ctx
+ */
+static inline int init_comp_ctx(struct comp_ctx **comp_ctx)
+{
+#ifdef USE_ZLIB
+ z_stream *strm;
+
+ if (global.maxzlibmem > 0 && (global.maxzlibmem - zlib_used_memory) < sizeof(struct comp_ctx))
+ return -1;
+#endif
+
+ *comp_ctx = pool_alloc(pool_comp_ctx);
+ if (*comp_ctx == NULL)
+ return -1;
+#if defined(USE_SLZ)
+ (*comp_ctx)->direct_ptr = NULL;
+ (*comp_ctx)->direct_len = 0;
+ (*comp_ctx)->queued = BUF_NULL;
+#elif defined(USE_ZLIB)
+ _HA_ATOMIC_ADD(&zlib_used_memory, sizeof(struct comp_ctx));
+ __ha_barrier_atomic_store();
+
+ strm = &(*comp_ctx)->strm;
+ strm->zalloc = alloc_zlib;
+ strm->zfree = free_zlib;
+ strm->opaque = *comp_ctx;
+#endif
+ return 0;
+}
+
+/*
+ * Dealloc the comp_ctx
+ */
+static inline int deinit_comp_ctx(struct comp_ctx **comp_ctx)
+{
+ if (!*comp_ctx)
+ return 0;
+
+ pool_free(pool_comp_ctx, *comp_ctx);
+ *comp_ctx = NULL;
+
+#ifdef USE_ZLIB
+ _HA_ATOMIC_SUB(&zlib_used_memory, sizeof(struct comp_ctx));
+ __ha_barrier_atomic_store();
+#endif
+ return 0;
+}
+#endif
+
+
+/****************************
+ **** Identity algorithm ****
+ ****************************/
+
+/*
+ * Init the identity algorithm
+ */
+static int identity_init(struct comp_ctx **comp_ctx, int level)
+{
+ return 0;
+}
+
+/*
+ * Process data
+ * Return size of consumed data or -1 on error
+ */
+static int identity_add_data(struct comp_ctx *comp_ctx, const char *in_data, int in_len, struct buffer *out)
+{
+ char *out_data = b_tail(out);
+ int out_len = b_room(out);
+
+ if (out_len < in_len)
+ return -1;
+
+ memcpy(out_data, in_data, in_len);
+
+ b_add(out, in_len);
+
+ return in_len;
+}
+
+static int identity_flush(struct comp_ctx *comp_ctx, struct buffer *out)
+{
+ return 0;
+}
+
+static int identity_finish(struct comp_ctx *comp_ctx, struct buffer *out)
+{
+ return 0;
+}
+
+/*
+ * Deinit the algorithm
+ */
+static int identity_end(struct comp_ctx **comp_ctx)
+{
+ return 0;
+}
+
+
+#ifdef USE_SLZ
+
+/* SLZ's gzip format (RFC1952). Returns < 0 on error. */
+static int rfc1952_init(struct comp_ctx **comp_ctx, int level)
+{
+ if (init_comp_ctx(comp_ctx) < 0)
+ return -1;
+
+ (*comp_ctx)->cur_lvl = !!level;
+ return slz_rfc1952_init(&(*comp_ctx)->strm, !!level);
+}
+
+/* SLZ's raw deflate format (RFC1951). Returns < 0 on error. */
+static int rfc1951_init(struct comp_ctx **comp_ctx, int level)
+{
+ if (init_comp_ctx(comp_ctx) < 0)
+ return -1;
+
+ (*comp_ctx)->cur_lvl = !!level;
+ return slz_rfc1951_init(&(*comp_ctx)->strm, !!level);
+}
+
+/* SLZ's zlib format (RFC1950). Returns < 0 on error. */
+static int rfc1950_init(struct comp_ctx **comp_ctx, int level)
+{
+ if (init_comp_ctx(comp_ctx) < 0)
+ return -1;
+
+ (*comp_ctx)->cur_lvl = !!level;
+ return slz_rfc1950_init(&(*comp_ctx)->strm, !!level);
+}
+
+/* Return the size of consumed data or -1. The output buffer is unused at this
+ * point, we only keep a reference to the input data or a copy of them if the
+ * reference is already used.
+ */
+static int rfc195x_add_data(struct comp_ctx *comp_ctx, const char *in_data, int in_len, struct buffer *out)
+{
+ static THREAD_LOCAL struct buffer tmpbuf = BUF_NULL;
+
+ if (in_len <= 0)
+ return 0;
+
+ if (comp_ctx->direct_ptr && b_is_null(&comp_ctx->queued)) {
+ /* data already being pointed to, we're in front of fragmented
+ * data and need a buffer now. We reuse the same buffer, as it's
+ * not used out of the scope of a series of add_data()*, end().
+ */
+ if (b_alloc(&tmpbuf) == NULL)
+ return -1; /* no memory */
+ b_reset(&tmpbuf);
+ memcpy(b_tail(&tmpbuf), comp_ctx->direct_ptr, comp_ctx->direct_len);
+ b_add(&tmpbuf, comp_ctx->direct_len);
+ comp_ctx->direct_ptr = NULL;
+ comp_ctx->direct_len = 0;
+ comp_ctx->queued = tmpbuf;
+ /* fall through buffer copy */
+ }
+
+ if (!b_is_null(&comp_ctx->queued)) {
+ /* data already pending */
+ memcpy(b_tail(&comp_ctx->queued), in_data, in_len);
+ b_add(&comp_ctx->queued, in_len);
+ return in_len;
+ }
+
+ comp_ctx->direct_ptr = in_data;
+ comp_ctx->direct_len = in_len;
+ return in_len;
+}
+
+/* Compresses the data accumulated using add_data(), and optionally sends the
+ * format-specific trailer if <finish> is non-null. <out> is expected to have a
+ * large enough free non-wrapping space as verified by http_comp_buffer_init().
+ * The number of bytes emitted is reported.
+ */
+static int rfc195x_flush_or_finish(struct comp_ctx *comp_ctx, struct buffer *out, int finish)
+{
+ struct slz_stream *strm = &comp_ctx->strm;
+ const char *in_ptr;
+ int in_len;
+ int out_len;
+
+ in_ptr = comp_ctx->direct_ptr;
+ in_len = comp_ctx->direct_len;
+
+ if (!b_is_null(&comp_ctx->queued)) {
+ in_ptr = b_head(&comp_ctx->queued);
+ in_len = b_data(&comp_ctx->queued);
+ }
+
+ out_len = b_data(out);
+
+ if (in_ptr)
+ b_add(out, slz_encode(strm, b_tail(out), in_ptr, in_len, !finish));
+
+ if (finish)
+ b_add(out, slz_finish(strm, b_tail(out)));
+
+ out_len = b_data(out) - out_len;
+
+ /* very important, we must wipe the data we've just flushed */
+ comp_ctx->direct_len = 0;
+ comp_ctx->direct_ptr = NULL;
+ comp_ctx->queued = BUF_NULL;
+
+ /* Verify compression rate limiting and CPU usage */
+ if ((global.comp_rate_lim > 0 && (read_freq_ctr(&global.comp_bps_out) > global.comp_rate_lim)) || /* rate */
+ (th_ctx->idle_pct < compress_min_idle)) { /* idle */
+ if (comp_ctx->cur_lvl > 0)
+ strm->level = --comp_ctx->cur_lvl;
+ }
+ else if (comp_ctx->cur_lvl < global.tune.comp_maxlevel && comp_ctx->cur_lvl < 1) {
+ strm->level = ++comp_ctx->cur_lvl;
+ }
+
+ /* and that's all */
+ return out_len;
+}
+
+static int rfc195x_flush(struct comp_ctx *comp_ctx, struct buffer *out)
+{
+ return rfc195x_flush_or_finish(comp_ctx, out, 0);
+}
+
+static int rfc195x_finish(struct comp_ctx *comp_ctx, struct buffer *out)
+{
+ return rfc195x_flush_or_finish(comp_ctx, out, 1);
+}
+
+/* we just need to free the comp_ctx here, nothing was allocated */
+static int rfc195x_end(struct comp_ctx **comp_ctx)
+{
+ deinit_comp_ctx(comp_ctx);
+ return 0;
+}
+
+#elif defined(USE_ZLIB) /* ! USE_SLZ */
+
+/*
+ * This is a tricky allocation function using the zlib.
+ * This is based on the allocation order in deflateInit2.
+ */
+static void *alloc_zlib(void *opaque, unsigned int items, unsigned int size)
+{
+ struct comp_ctx *ctx = opaque;
+ static THREAD_LOCAL char round = 0; /* order in deflateInit2 */
+ void *buf = NULL;
+ struct pool_head *pool = NULL;
+
+ if (global.maxzlibmem > 0 && (global.maxzlibmem - zlib_used_memory) < (long)(items * size))
+ goto end;
+
+ switch (round) {
+ case 0:
+ if (zlib_pool_deflate_state == NULL) {
+ HA_SPIN_LOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ if (zlib_pool_deflate_state == NULL)
+ zlib_pool_deflate_state = create_pool("zlib_state", size * items, MEM_F_SHARED);
+ HA_SPIN_UNLOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ }
+ pool = zlib_pool_deflate_state;
+ ctx->zlib_deflate_state = buf = pool_alloc(pool);
+ break;
+
+ case 1:
+ if (zlib_pool_window == NULL) {
+ HA_SPIN_LOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ if (zlib_pool_window == NULL)
+ zlib_pool_window = create_pool("zlib_window", size * items, MEM_F_SHARED);
+ HA_SPIN_UNLOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ }
+ pool = zlib_pool_window;
+ ctx->zlib_window = buf = pool_alloc(pool);
+ break;
+
+ case 2:
+ if (zlib_pool_prev == NULL) {
+ HA_SPIN_LOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ if (zlib_pool_prev == NULL)
+ zlib_pool_prev = create_pool("zlib_prev", size * items, MEM_F_SHARED);
+ HA_SPIN_UNLOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ }
+ pool = zlib_pool_prev;
+ ctx->zlib_prev = buf = pool_alloc(pool);
+ break;
+
+ case 3:
+ if (zlib_pool_head == NULL) {
+ HA_SPIN_LOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ if (zlib_pool_head == NULL)
+ zlib_pool_head = create_pool("zlib_head", size * items, MEM_F_SHARED);
+ HA_SPIN_UNLOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ }
+ pool = zlib_pool_head;
+ ctx->zlib_head = buf = pool_alloc(pool);
+ break;
+
+ case 4:
+ if (zlib_pool_pending_buf == NULL) {
+ HA_SPIN_LOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ if (zlib_pool_pending_buf == NULL)
+ zlib_pool_pending_buf = create_pool("zlib_pending_buf", size * items, MEM_F_SHARED);
+ HA_SPIN_UNLOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ }
+ pool = zlib_pool_pending_buf;
+ ctx->zlib_pending_buf = buf = pool_alloc(pool);
+ break;
+ }
+ if (buf != NULL) {
+ _HA_ATOMIC_ADD(&zlib_used_memory, pool->size);
+ __ha_barrier_atomic_store();
+ }
+
+end:
+
+ /* deflateInit2() first allocates and checks the deflate_state, then if
+ * it succeeds, it allocates all other 4 areas at ones and checks them
+ * at the end. So we want to correctly count the rounds depending on when
+ * zlib is supposed to abort.
+ */
+ if (buf || round)
+ round = (round + 1) % 5;
+ return buf;
+}
+
+static void free_zlib(void *opaque, void *ptr)
+{
+ struct comp_ctx *ctx = opaque;
+ struct pool_head *pool = NULL;
+
+ if (ptr == ctx->zlib_window)
+ pool = zlib_pool_window;
+ else if (ptr == ctx->zlib_deflate_state)
+ pool = zlib_pool_deflate_state;
+ else if (ptr == ctx->zlib_prev)
+ pool = zlib_pool_prev;
+ else if (ptr == ctx->zlib_head)
+ pool = zlib_pool_head;
+ else if (ptr == ctx->zlib_pending_buf)
+ pool = zlib_pool_pending_buf;
+ else {
+ // never matched, just to silence gcc
+ ABORT_NOW();
+ return;
+ }
+
+ pool_free(pool, ptr);
+ _HA_ATOMIC_SUB(&zlib_used_memory, pool->size);
+ __ha_barrier_atomic_store();
+}
+
+/**************************
+**** gzip algorithm ****
+***************************/
+static int gzip_init(struct comp_ctx **comp_ctx, int level)
+{
+ z_stream *strm;
+
+ if (init_comp_ctx(comp_ctx) < 0)
+ return -1;
+
+ strm = &(*comp_ctx)->strm;
+
+ if (deflateInit2(strm, level, Z_DEFLATED, global_tune_zlibwindowsize + 16, global_tune_zlibmemlevel, Z_DEFAULT_STRATEGY) != Z_OK) {
+ deinit_comp_ctx(comp_ctx);
+ return -1;
+ }
+
+ (*comp_ctx)->cur_lvl = level;
+
+ return 0;
+}
+
+/* Raw deflate algorithm */
+static int raw_def_init(struct comp_ctx **comp_ctx, int level)
+{
+ z_stream *strm;
+
+ if (init_comp_ctx(comp_ctx) < 0)
+ return -1;
+
+ strm = &(*comp_ctx)->strm;
+
+ if (deflateInit2(strm, level, Z_DEFLATED, -global_tune_zlibwindowsize, global_tune_zlibmemlevel, Z_DEFAULT_STRATEGY) != Z_OK) {
+ deinit_comp_ctx(comp_ctx);
+ return -1;
+ }
+
+ (*comp_ctx)->cur_lvl = level;
+ return 0;
+}
+
+/**************************
+**** Deflate algorithm ****
+***************************/
+
+static int deflate_init(struct comp_ctx **comp_ctx, int level)
+{
+ z_stream *strm;
+
+ if (init_comp_ctx(comp_ctx) < 0)
+ return -1;
+
+ strm = &(*comp_ctx)->strm;
+
+ if (deflateInit2(strm, level, Z_DEFLATED, global_tune_zlibwindowsize, global_tune_zlibmemlevel, Z_DEFAULT_STRATEGY) != Z_OK) {
+ deinit_comp_ctx(comp_ctx);
+ return -1;
+ }
+
+ (*comp_ctx)->cur_lvl = level;
+
+ return 0;
+}
+
+/* Return the size of consumed data or -1 */
+static int deflate_add_data(struct comp_ctx *comp_ctx, const char *in_data, int in_len, struct buffer *out)
+{
+ int ret;
+ z_stream *strm = &comp_ctx->strm;
+ char *out_data = b_tail(out);
+ int out_len = b_room(out);
+
+ if (in_len <= 0)
+ return 0;
+
+
+ if (out_len <= 0)
+ return -1;
+
+ strm->next_in = (unsigned char *)in_data;
+ strm->avail_in = in_len;
+ strm->next_out = (unsigned char *)out_data;
+ strm->avail_out = out_len;
+
+ ret = deflate(strm, Z_NO_FLUSH);
+ if (ret != Z_OK)
+ return -1;
+
+ /* deflate update the available data out */
+ b_add(out, out_len - strm->avail_out);
+
+ return in_len - strm->avail_in;
+}
+
+static int deflate_flush_or_finish(struct comp_ctx *comp_ctx, struct buffer *out, int flag)
+{
+ int ret;
+ int out_len = 0;
+ z_stream *strm = &comp_ctx->strm;
+
+ strm->next_in = NULL;
+ strm->avail_in = 0;
+ strm->next_out = (unsigned char *)b_tail(out);
+ strm->avail_out = b_room(out);
+
+ ret = deflate(strm, flag);
+ if (ret != Z_OK && ret != Z_STREAM_END)
+ return -1;
+
+ out_len = b_room(out) - strm->avail_out;
+ b_add(out, out_len);
+
+ /* compression limit */
+ if ((global.comp_rate_lim > 0 && (read_freq_ctr(&global.comp_bps_out) > global.comp_rate_lim)) || /* rate */
+ (th_ctx->idle_pct < compress_min_idle)) { /* idle */
+ /* decrease level */
+ if (comp_ctx->cur_lvl > 0) {
+ comp_ctx->cur_lvl--;
+ deflateParams(&comp_ctx->strm, comp_ctx->cur_lvl, Z_DEFAULT_STRATEGY);
+ }
+
+ } else if (comp_ctx->cur_lvl < global.tune.comp_maxlevel) {
+ /* increase level */
+ comp_ctx->cur_lvl++ ;
+ deflateParams(&comp_ctx->strm, comp_ctx->cur_lvl, Z_DEFAULT_STRATEGY);
+ }
+
+ return out_len;
+}
+
+static int deflate_flush(struct comp_ctx *comp_ctx, struct buffer *out)
+{
+ return deflate_flush_or_finish(comp_ctx, out, Z_SYNC_FLUSH);
+}
+
+static int deflate_finish(struct comp_ctx *comp_ctx, struct buffer *out)
+{
+ return deflate_flush_or_finish(comp_ctx, out, Z_FINISH);
+}
+
+static int deflate_end(struct comp_ctx **comp_ctx)
+{
+ z_stream *strm = &(*comp_ctx)->strm;
+ int ret;
+
+ ret = deflateEnd(strm);
+
+ deinit_comp_ctx(comp_ctx);
+
+ return ret;
+}
+
+/* config parser for global "tune.zlibmemlevel" */
+static int zlib_parse_global_memlevel(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects a numeric value between 1 and 9.", args[0]);
+ return -1;
+ }
+
+ global_tune_zlibmemlevel = atoi(args[1]);
+ if (global_tune_zlibmemlevel < 1 || global_tune_zlibmemlevel > 9) {
+ memprintf(err, "'%s' expects a numeric value between 1 and 9.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+
+/* config parser for global "tune.zlibwindowsize" */
+static int zlib_parse_global_windowsize(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects a numeric value between 8 and 15.", args[0]);
+ return -1;
+ }
+
+ global_tune_zlibwindowsize = atoi(args[1]);
+ if (global_tune_zlibwindowsize < 8 || global_tune_zlibwindowsize > 15) {
+ memprintf(err, "'%s' expects a numeric value between 8 and 15.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+#endif /* USE_ZLIB */
+
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+#ifdef USE_ZLIB
+ { CFG_GLOBAL, "tune.zlib.memlevel", zlib_parse_global_memlevel },
+ { CFG_GLOBAL, "tune.zlib.windowsize", zlib_parse_global_windowsize },
+#endif
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+static void comp_register_build_opts(void)
+{
+ char *ptr = NULL;
+ int i;
+
+#ifdef USE_ZLIB
+ memprintf(&ptr, "Built with zlib version : " ZLIB_VERSION);
+ memprintf(&ptr, "%s\nRunning on zlib version : %s", ptr, zlibVersion());
+#elif defined(USE_SLZ)
+ memprintf(&ptr, "Built with libslz for stateless compression.");
+#else
+ memprintf(&ptr, "Built without compression support (neither USE_ZLIB nor USE_SLZ are set).");
+#endif
+ memprintf(&ptr, "%s\nCompression algorithms supported :", ptr);
+
+ for (i = 0; comp_algos[i].cfg_name; i++)
+ memprintf(&ptr, "%s%s %s(\"%s\")", ptr, (i == 0 ? "" : ","), comp_algos[i].cfg_name, comp_algos[i].ua_name);
+
+ if (i == 0)
+ memprintf(&ptr, "%s none", ptr);
+
+ hap_register_build_opts(ptr, 1);
+}
+
+INITCALL0(STG_REGISTER, comp_register_build_opts);
diff --git a/src/connection.c b/src/connection.c
new file mode 100644
index 0000000..5a459fd
--- /dev/null
+++ b/src/connection.c
@@ -0,0 +1,2454 @@
+/*
+ * Connection management functions
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+
+#include <import/ebmbtree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/connection.h>
+#include <haproxy/fd.h>
+#include <haproxy/frontend.h>
+#include <haproxy/hash.h>
+#include <haproxy/list.h>
+#include <haproxy/log-t.h>
+#include <haproxy/namespace.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/session.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+#include <haproxy/xxhash.h>
+
+
+DECLARE_POOL(pool_head_connection, "connection", sizeof(struct connection));
+DECLARE_POOL(pool_head_conn_hash_node, "conn_hash_node", sizeof(struct conn_hash_node));
+DECLARE_POOL(pool_head_sockaddr, "sockaddr", sizeof(struct sockaddr_storage));
+DECLARE_POOL(pool_head_authority, "authority", PP2_AUTHORITY_MAX);
+
+struct idle_conns idle_conns[MAX_THREADS] = { };
+struct xprt_ops *registered_xprt[XPRT_ENTRIES] = { NULL, };
+
+/* List head of all known muxes for PROTO */
+struct mux_proto_list mux_proto_list = {
+ .list = LIST_HEAD_INIT(mux_proto_list.list)
+};
+
+struct mux_stopping_data mux_stopping_data[MAX_THREADS];
+
+/* disables sending of proxy-protocol-v2's LOCAL command */
+static int pp2_never_send_local;
+
+void conn_delete_from_tree(struct eb64_node *node)
+{
+ eb64_delete(node);
+}
+
+int conn_create_mux(struct connection *conn)
+{
+ if (conn_is_back(conn)) {
+ struct server *srv;
+ struct stconn *sc = conn->ctx;
+ struct session *sess = conn->owner;
+
+ if (conn->flags & CO_FL_ERROR)
+ goto fail;
+
+ if (sess && obj_type(sess->origin) == OBJ_TYPE_CHECK) {
+ if (conn_install_mux_chk(conn, conn->ctx, sess) < 0)
+ goto fail;
+ }
+ else if (conn_install_mux_be(conn, conn->ctx, sess, NULL) < 0)
+ goto fail;
+ srv = objt_server(conn->target);
+
+ /* If we're doing http-reuse always, and the connection is not
+ * private with available streams (an http2 connection), add it
+ * to the available list, so that others can use it right
+ * away. If the connection is private, add it in the session
+ * server list.
+ */
+ if (srv && ((srv->proxy->options & PR_O_REUSE_MASK) == PR_O_REUSE_ALWS) &&
+ !(conn->flags & CO_FL_PRIVATE) && conn->mux->avail_streams(conn) > 0)
+ eb64_insert(&srv->per_thr[tid].avail_conns, &conn->hash_node->node);
+ else if (conn->flags & CO_FL_PRIVATE) {
+ /* If it fail now, the same will be done in mux->detach() callback */
+ session_add_conn(sess, conn, conn->target);
+ }
+ return 0;
+fail:
+ /* let the upper layer know the connection failed */
+ sc->app_ops->wake(sc);
+ return -1;
+ } else
+ return conn_complete_session(conn);
+
+}
+
+/* This is used at the end of the socket IOCB to possibly create the mux if it
+ * was not done yet, or wake it up if flags changed compared to old_flags or if
+ * need_wake insists on this. It returns <0 if the connection was destroyed and
+ * must not be used, >=0 otherwise.
+ */
+int conn_notify_mux(struct connection *conn, int old_flags, int forced_wake)
+{
+ int ret = 0;
+
+ /* If we don't yet have a mux, that means we were waiting for
+ * information to create one, typically from the ALPN. If we're
+ * done with the handshake, attempt to create one.
+ */
+ if (unlikely(!conn->mux) && !(conn->flags & CO_FL_WAIT_XPRT)) {
+ ret = conn_create_mux(conn);
+ if (ret < 0)
+ goto done;
+ }
+
+ /* The wake callback is normally used to notify the data layer about
+ * data layer activity (successful send/recv), connection establishment,
+ * shutdown and fatal errors. We need to consider the following
+ * situations to wake up the data layer :
+ * - change among the CO_FL_NOTIFY_DONE flags :
+ * SOCK_{RD,WR}_SH, ERROR,
+ * - absence of any of {L4,L6}_CONN and CONNECTED, indicating the
+ * end of handshake and transition to CONNECTED
+ * - raise of CONNECTED with HANDSHAKE down
+ * - end of HANDSHAKE with CONNECTED set
+ * - regular data layer activity
+ *
+ * One tricky case is the wake up on read0 or error on an idle
+ * backend connection, that can happen on a connection that is still
+ * polled while at the same moment another thread is about to perform a
+ * takeover. The solution against this is to remove the connection from
+ * the idle list if it was in it, and possibly reinsert it at the end
+ * if the connection remains valid. The cost is non-null (locked tree
+ * removal) but remains low given that this is extremely rarely called.
+ * In any case it's guaranteed by the FD's thread_mask that we're
+ * called from the same thread the connection is queued in.
+ *
+ * Note that the wake callback is allowed to release the connection and
+ * the fd (and return < 0 in this case).
+ */
+ if ((forced_wake ||
+ ((conn->flags ^ old_flags) & CO_FL_NOTIFY_DONE) ||
+ ((old_flags & CO_FL_WAIT_XPRT) && !(conn->flags & CO_FL_WAIT_XPRT))) &&
+ conn->mux && conn->mux->wake) {
+ uint conn_in_list = conn_get_idle_flag(conn);
+ struct server *srv = objt_server(conn->target);
+
+ if (conn_in_list) {
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn_delete_from_tree(&conn->hash_node->node);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+ ret = conn->mux->wake(conn);
+ if (ret < 0)
+ goto done;
+
+ if (conn_in_list) {
+ struct eb_root *root = (conn_in_list == CO_FL_SAFE_LIST) ?
+ &srv->per_thr[tid].safe_conns :
+ &srv->per_thr[tid].idle_conns;
+
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ eb64_insert(root, &conn->hash_node->node);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+ }
+ done:
+ return ret;
+}
+
+/* Change the mux for the connection.
+ * The caller should make sure he's not subscribed to the underlying XPRT.
+ */
+int conn_upgrade_mux_fe(struct connection *conn, void *ctx, struct buffer *buf,
+ struct ist mux_proto, int mode)
+{
+ struct bind_conf *bind_conf = __objt_listener(conn->target)->bind_conf;
+ const struct mux_ops *old_mux, *new_mux;
+ void *old_mux_ctx;
+ const char *alpn_str = NULL;
+ int alpn_len = 0;
+
+ if (!mux_proto.len) {
+ conn_get_alpn(conn, &alpn_str, &alpn_len);
+ mux_proto = ist2(alpn_str, alpn_len);
+ }
+ new_mux = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_FE, mode);
+ old_mux = conn->mux;
+
+ /* No mux found */
+ if (!new_mux)
+ return -1;
+
+ /* Same mux, nothing to do */
+ if (old_mux == new_mux)
+ return 0;
+
+ old_mux_ctx = conn->ctx;
+ conn->mux = new_mux;
+ conn->ctx = ctx;
+ if (new_mux->init(conn, bind_conf->frontend, conn->owner, buf) == -1) {
+ /* The mux upgrade failed, so restore the old mux */
+ conn->ctx = old_mux_ctx;
+ conn->mux = old_mux;
+ return -1;
+ }
+
+ /* The mux was upgraded, destroy the old one */
+ *buf = BUF_NULL;
+ old_mux->destroy(old_mux_ctx);
+ return 0;
+}
+
+/* installs the best mux for incoming connection <conn> using the upper context
+ * <ctx>. If the mux protocol is forced, we use it to find the best
+ * mux. Otherwise we use the ALPN name, if any. Returns < 0 on error.
+ */
+int conn_install_mux_fe(struct connection *conn, void *ctx)
+{
+ struct bind_conf *bind_conf = __objt_listener(conn->target)->bind_conf;
+ const struct mux_ops *mux_ops;
+
+ if (bind_conf->mux_proto)
+ mux_ops = bind_conf->mux_proto->mux;
+ else {
+ struct ist mux_proto;
+ const char *alpn_str = NULL;
+ int alpn_len = 0;
+ int mode;
+
+ if (bind_conf->frontend->mode == PR_MODE_HTTP)
+ mode = PROTO_MODE_HTTP;
+ else
+ mode = PROTO_MODE_TCP;
+
+ conn_get_alpn(conn, &alpn_str, &alpn_len);
+ mux_proto = ist2(alpn_str, alpn_len);
+ mux_ops = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_FE, mode);
+ if (!mux_ops)
+ return -1;
+ }
+ return conn_install_mux(conn, mux_ops, ctx, bind_conf->frontend, conn->owner);
+}
+
+/* installs the best mux for outgoing connection <conn> using the upper context
+ * <ctx>. If the server mux protocol is forced, we use it to find the best mux.
+ * It's also possible to specify an alternative mux protocol <force_mux_ops>,
+ * in which case it will be used instead of the default server mux protocol.
+ *
+ * Returns < 0 on error.
+ */
+int conn_install_mux_be(struct connection *conn, void *ctx, struct session *sess,
+ const struct mux_ops *force_mux_ops)
+{
+ struct server *srv = objt_server(conn->target);
+ struct proxy *prx = objt_proxy(conn->target);
+ const struct mux_ops *mux_ops;
+
+ if (srv)
+ prx = srv->proxy;
+
+ if (!prx) // target must be either proxy or server
+ return -1;
+
+ if (srv && srv->mux_proto && likely(!force_mux_ops)) {
+ mux_ops = srv->mux_proto->mux;
+ }
+ else if (srv && unlikely(force_mux_ops)) {
+ mux_ops = force_mux_ops;
+ }
+ else {
+ struct ist mux_proto;
+ const char *alpn_str = NULL;
+ int alpn_len = 0;
+ int mode;
+
+ if (prx->mode == PR_MODE_HTTP)
+ mode = PROTO_MODE_HTTP;
+ else
+ mode = PROTO_MODE_TCP;
+
+ conn_get_alpn(conn, &alpn_str, &alpn_len);
+ mux_proto = ist2(alpn_str, alpn_len);
+
+ mux_ops = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_BE, mode);
+ if (!mux_ops)
+ return -1;
+ }
+ return conn_install_mux(conn, mux_ops, ctx, prx, sess);
+}
+
+/* installs the best mux for outgoing connection <conn> for a check using the
+ * upper context <ctx>. If the mux protocol is forced by the check, we use it to
+ * find the best mux. Returns < 0 on error.
+ */
+int conn_install_mux_chk(struct connection *conn, void *ctx, struct session *sess)
+{
+ struct check *check = objt_check(sess->origin);
+ struct server *srv = objt_server(conn->target);
+ struct proxy *prx = objt_proxy(conn->target);
+ const struct mux_ops *mux_ops;
+
+ if (!check) // Check must be defined
+ return -1;
+
+ if (srv)
+ prx = srv->proxy;
+
+ if (!prx) // target must be either proxy or server
+ return -1;
+
+ if (check->mux_proto)
+ mux_ops = check->mux_proto->mux;
+ else {
+ struct ist mux_proto;
+ const char *alpn_str = NULL;
+ int alpn_len = 0;
+ int mode;
+
+ if ((check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK)
+ mode = PROTO_MODE_HTTP;
+ else
+ mode = PROTO_MODE_TCP;
+
+ conn_get_alpn(conn, &alpn_str, &alpn_len);
+ mux_proto = ist2(alpn_str, alpn_len);
+
+ mux_ops = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_BE, mode);
+ if (!mux_ops)
+ return -1;
+ }
+ return conn_install_mux(conn, mux_ops, ctx, prx, sess);
+}
+
+/* Set the ALPN of connection <conn> to <alpn>. If force is false, <alpn> must
+ * be a subset or identical to the registered protos for the parent SSL_CTX.
+ * In this case <alpn> must be a single protocol value, not a list.
+ *
+ * Returns 0 if ALPN is updated else -1.
+ */
+int conn_update_alpn(struct connection *conn, const struct ist alpn, int force)
+{
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ size_t alpn_len = istlen(alpn);
+ char *ctx_alpn_str = NULL;
+ int ctx_alpn_len = 0, found = 0;
+
+ /* if not force, first search if alpn is a subset or identical to the
+ * parent SSL_CTX.
+ */
+ if (!force) {
+ /* retrieve the SSL_CTX according to the connection side. */
+ if (conn_is_back(conn)) {
+ if (obj_type(conn->target) == OBJ_TYPE_SERVER) {
+ struct server *srv = __objt_server(conn->target);
+ ctx_alpn_str = srv->ssl_ctx.alpn_str;
+ ctx_alpn_len = srv->ssl_ctx.alpn_len;
+ }
+ }
+ else {
+ struct session *sess = conn->owner;
+ struct listener *li = sess->listener;
+
+ if (li->bind_conf && li->bind_conf->options & BC_O_USE_SSL) {
+ ctx_alpn_str = li->bind_conf->ssl_conf.alpn_str;
+ ctx_alpn_len = li->bind_conf->ssl_conf.alpn_len;
+ }
+ }
+
+ if (ctx_alpn_str) {
+ /* search if ALPN is present in SSL_CTX ALPN before
+ * using it.
+ */
+ while (ctx_alpn_len) {
+ /* skip ALPN whose size is not 8 */
+ if (*ctx_alpn_str != alpn_len - 1) {
+ ctx_alpn_len -= *ctx_alpn_str + 1;
+ }
+ else {
+ if (isteqi(ist2(ctx_alpn_str, alpn_len), alpn)) {
+ found = 1;
+ break;
+ }
+ }
+ ctx_alpn_str += *ctx_alpn_str + 1;
+
+ /* This indicates an invalid ALPN formatted
+ * string and should never happen. */
+ BUG_ON(ctx_alpn_len < 0);
+ }
+ }
+ }
+
+ if (found || force) {
+ ssl_sock_set_alpn(conn, (const uchar *)istptr(alpn), istlen(alpn));
+ return 0;
+ }
+
+#endif
+ return -1;
+}
+
+/* Initializes all required fields for a new connection. Note that it does the
+ * minimum acceptable initialization for a connection that already exists and
+ * is about to be reused. It also leaves the addresses untouched, which makes
+ * it usable across connection retries to reset a connection to a known state.
+ */
+void conn_init(struct connection *conn, void *target)
+{
+ conn->obj_type = OBJ_TYPE_CONN;
+ conn->flags = CO_FL_NONE;
+ conn->mux = NULL;
+ conn->ctx = NULL;
+ conn->owner = NULL;
+ conn->send_proxy_ofs = 0;
+ conn->handle.fd = DEAD_FD_MAGIC;
+ conn->err_code = CO_ER_NONE;
+ conn->target = target;
+ conn->destroy_cb = NULL;
+ conn->proxy_netns = NULL;
+ MT_LIST_INIT(&conn->toremove_list);
+ if (conn_is_back(conn))
+ LIST_INIT(&conn->session_list);
+ else
+ LIST_INIT(&conn->stopping_list);
+ conn->subs = NULL;
+ conn->src = NULL;
+ conn->dst = NULL;
+ conn->proxy_authority = IST_NULL;
+ conn->proxy_unique_id = IST_NULL;
+ conn->hash_node = NULL;
+ conn->xprt = NULL;
+}
+
+/* Tries to allocate a new connection and initialized its main fields. The
+ * connection is returned on success, NULL on failure. The connection must
+ * be released using pool_free() or conn_free().
+ */
+struct connection *conn_new(void *target)
+{
+ struct connection *conn;
+ struct conn_hash_node *hash_node;
+
+ conn = pool_alloc(pool_head_connection);
+ if (unlikely(!conn))
+ return NULL;
+
+ conn_init(conn, target);
+
+ if (conn_is_back(conn)) {
+ if (obj_type(target) == OBJ_TYPE_SERVER)
+ srv_use_conn(__objt_server(target), conn);
+
+ hash_node = conn_alloc_hash_node(conn);
+ if (unlikely(!hash_node)) {
+ pool_free(pool_head_connection, conn);
+ return NULL;
+ }
+
+ conn->hash_node = hash_node;
+ }
+
+ return conn;
+}
+
+/* Releases a connection previously allocated by conn_new() */
+void conn_free(struct connection *conn)
+{
+ /* If the connection is owned by the session, remove it from its list
+ */
+ if (conn_is_back(conn) && LIST_INLIST(&conn->session_list)) {
+ session_unown_conn(conn->owner, conn);
+ }
+ else if (!(conn->flags & CO_FL_PRIVATE)) {
+ if (obj_type(conn->target) == OBJ_TYPE_SERVER)
+ srv_release_conn(__objt_server(conn->target), conn);
+ }
+
+ /* Remove the conn from toremove_list.
+ *
+ * This is needed to prevent a double-free in case the connection was
+ * already scheduled from cleaning but is freed before via another
+ * call.
+ */
+ MT_LIST_DELETE(&conn->toremove_list);
+
+ sockaddr_free(&conn->src);
+ sockaddr_free(&conn->dst);
+
+ pool_free(pool_head_authority, istptr(conn->proxy_authority));
+ conn->proxy_authority = IST_NULL;
+
+ pool_free(pool_head_uniqueid, istptr(conn->proxy_unique_id));
+ conn->proxy_unique_id = IST_NULL;
+
+ pool_free(pool_head_conn_hash_node, conn->hash_node);
+ conn->hash_node = NULL;
+
+ conn_force_unsubscribe(conn);
+ pool_free(pool_head_connection, conn);
+}
+
+struct conn_hash_node *conn_alloc_hash_node(struct connection *conn)
+{
+ struct conn_hash_node *hash_node = NULL;
+
+ hash_node = pool_zalloc(pool_head_conn_hash_node);
+ if (unlikely(!hash_node))
+ return NULL;
+
+ hash_node->conn = conn;
+
+ return hash_node;
+}
+
+/* Allocates a struct sockaddr from the pool if needed, assigns it to *sap and
+ * returns it. If <sap> is NULL, the address is always allocated and returned.
+ * if <sap> is non-null, an address will only be allocated if it points to a
+ * non-null pointer. In this case the allocated address will be assigned there.
+ * If <orig> is non-null and <len> positive, the address in <sa> will be copied
+ * into the allocated address. In both situations the new pointer is returned.
+ */
+struct sockaddr_storage *sockaddr_alloc(struct sockaddr_storage **sap, const struct sockaddr_storage *orig, socklen_t len)
+{
+ struct sockaddr_storage *sa;
+
+ if (sap && *sap)
+ return *sap;
+
+ sa = pool_alloc(pool_head_sockaddr);
+ if (sa && orig && len > 0)
+ memcpy(sa, orig, len);
+ if (sap)
+ *sap = sa;
+ return sa;
+}
+
+/* Releases the struct sockaddr potentially pointed to by <sap> to the pool. It
+ * may be NULL or may point to NULL. If <sap> is not NULL, a NULL is placed
+ * there.
+ */
+void sockaddr_free(struct sockaddr_storage **sap)
+{
+ if (!sap)
+ return;
+ pool_free(pool_head_sockaddr, *sap);
+ *sap = NULL;
+}
+
+/* Try to add a handshake pseudo-XPRT. If the connection's first XPRT is
+ * raw_sock, then just use the new XPRT as the connection XPRT, otherwise
+ * call the xprt's add_xprt() method.
+ * Returns 0 on success, or non-zero on failure.
+ */
+int xprt_add_hs(struct connection *conn)
+{
+ void *xprt_ctx = NULL;
+ const struct xprt_ops *ops = xprt_get(XPRT_HANDSHAKE);
+ void *nextxprt_ctx = NULL;
+ const struct xprt_ops *nextxprt_ops = NULL;
+
+ if (conn->flags & CO_FL_ERROR)
+ return -1;
+ if (ops->init(conn, &xprt_ctx) < 0)
+ return -1;
+ if (conn->xprt == xprt_get(XPRT_RAW)) {
+ nextxprt_ctx = conn->xprt_ctx;
+ nextxprt_ops = conn->xprt;
+ conn->xprt_ctx = xprt_ctx;
+ conn->xprt = ops;
+ } else {
+ if (conn->xprt->add_xprt(conn, conn->xprt_ctx, xprt_ctx, ops,
+ &nextxprt_ctx, &nextxprt_ops) != 0) {
+ ops->close(conn, xprt_ctx);
+ return -1;
+ }
+ }
+ if (ops->add_xprt(conn, xprt_ctx, nextxprt_ctx, nextxprt_ops, NULL, NULL) != 0) {
+ ops->close(conn, xprt_ctx);
+ return -1;
+ }
+ return 0;
+}
+
+/* returns a human-readable error code for conn->err_code, or NULL if the code
+ * is unknown.
+ */
+const char *conn_err_code_str(struct connection *c)
+{
+ switch (c->err_code) {
+ case CO_ER_NONE: return "Success";
+
+ case CO_ER_CONF_FDLIM: return "Reached configured maxconn value";
+ case CO_ER_PROC_FDLIM: return "Too many sockets on the process";
+ case CO_ER_SYS_FDLIM: return "Too many sockets on the system";
+ case CO_ER_SYS_MEMLIM: return "Out of system buffers";
+ case CO_ER_NOPROTO: return "Protocol or address family not supported";
+ case CO_ER_SOCK_ERR: return "General socket error";
+ case CO_ER_PORT_RANGE: return "Source port range exhausted";
+ case CO_ER_CANT_BIND: return "Can't bind to source address";
+ case CO_ER_FREE_PORTS: return "Out of local source ports on the system";
+ case CO_ER_ADDR_INUSE: return "Local source address already in use";
+
+ case CO_ER_PRX_EMPTY: return "Connection closed while waiting for PROXY protocol header";
+ case CO_ER_PRX_ABORT: return "Connection error while waiting for PROXY protocol header";
+ case CO_ER_PRX_TIMEOUT: return "Timeout while waiting for PROXY protocol header";
+ case CO_ER_PRX_TRUNCATED: return "Truncated PROXY protocol header received";
+ case CO_ER_PRX_NOT_HDR: return "Received something which does not look like a PROXY protocol header";
+ case CO_ER_PRX_BAD_HDR: return "Received an invalid PROXY protocol header";
+ case CO_ER_PRX_BAD_PROTO: return "Received an unhandled protocol in the PROXY protocol header";
+
+ case CO_ER_CIP_EMPTY: return "Connection closed while waiting for NetScaler Client IP header";
+ case CO_ER_CIP_ABORT: return "Connection error while waiting for NetScaler Client IP header";
+ case CO_ER_CIP_TIMEOUT: return "Timeout while waiting for a NetScaler Client IP header";
+ case CO_ER_CIP_TRUNCATED: return "Truncated NetScaler Client IP header received";
+ case CO_ER_CIP_BAD_MAGIC: return "Received an invalid NetScaler Client IP magic number";
+ case CO_ER_CIP_BAD_PROTO: return "Received an unhandled protocol in the NetScaler Client IP header";
+
+ case CO_ER_SSL_EMPTY: return "Connection closed during SSL handshake";
+ case CO_ER_SSL_ABORT: return "Connection error during SSL handshake";
+ case CO_ER_SSL_TIMEOUT: return "Timeout during SSL handshake";
+ case CO_ER_SSL_TOO_MANY: return "Too many SSL connections";
+ case CO_ER_SSL_NO_MEM: return "Out of memory when initializing an SSL connection";
+ case CO_ER_SSL_RENEG: return "Rejected a client-initiated SSL renegotiation attempt";
+ case CO_ER_SSL_CA_FAIL: return "SSL client CA chain cannot be verified";
+ case CO_ER_SSL_CRT_FAIL: return "SSL client certificate not trusted";
+ case CO_ER_SSL_MISMATCH: return "Server presented an SSL certificate different from the configured one";
+ case CO_ER_SSL_MISMATCH_SNI: return "Server presented an SSL certificate different from the expected one";
+ case CO_ER_SSL_HANDSHAKE: return "SSL handshake failure";
+ case CO_ER_SSL_HANDSHAKE_HB: return "SSL handshake failure after heartbeat";
+ case CO_ER_SSL_KILLED_HB: return "Stopped a TLSv1 heartbeat attack (CVE-2014-0160)";
+ case CO_ER_SSL_NO_TARGET: return "Attempt to use SSL on an unknown target (internal error)";
+ case CO_ER_SSL_EARLY_FAILED: return "Server refused early data";
+
+ case CO_ER_SOCKS4_SEND: return "SOCKS4 Proxy write error during handshake";
+ case CO_ER_SOCKS4_RECV: return "SOCKS4 Proxy read error during handshake";
+ case CO_ER_SOCKS4_DENY: return "SOCKS4 Proxy deny the request";
+ case CO_ER_SOCKS4_ABORT: return "SOCKS4 Proxy handshake aborted by server";
+
+ case CO_ERR_SSL_FATAL: return "SSL fatal error";
+ }
+ return NULL;
+}
+
+/* Send a message over an established connection. It makes use of send() and
+ * returns the same return code and errno. If the socket layer is not ready yet
+ * then -1 is returned and ENOTSOCK is set into errno. If the fd is not marked
+ * as ready, or if EAGAIN or ENOTCONN is returned, then we return 0. It returns
+ * EMSGSIZE if called with a zero length message. The purpose is to simplify
+ * some rare attempts to directly write on the socket from above the connection
+ * (typically send_proxy). In case of EAGAIN, the fd is marked as "cant_send".
+ * It automatically retries on EINTR. Other errors cause the connection to be
+ * marked as in error state. It takes similar arguments as send() except the
+ * first one which is the connection instead of the file descriptor. <flags>
+ * only support CO_SFL_MSG_MORE.
+ */
+int conn_ctrl_send(struct connection *conn, const void *buf, int len, int flags)
+{
+ const struct buffer buffer = b_make((char*)buf, len, 0, len);
+ const struct xprt_ops *xprt = xprt_get(XPRT_RAW);
+ int ret;
+
+ ret = -1;
+ errno = ENOTSOCK;
+
+ if (conn->flags & CO_FL_SOCK_WR_SH)
+ goto fail;
+
+ if (!conn_ctrl_ready(conn))
+ goto fail;
+
+ errno = EMSGSIZE;
+ if (!len)
+ goto fail;
+
+ /* snd_buf() already takes care of updating conn->flags and handling
+ * the FD polling status.
+ */
+ ret = xprt->snd_buf(conn, NULL, &buffer, buffer.data, flags);
+ if (conn->flags & CO_FL_ERROR)
+ ret = -1;
+ return ret;
+ fail:
+ conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH | CO_FL_ERROR;
+ return ret;
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The event subscriber <es> is not allowed to change from a previous call as
+ * long as at least one event is still subscribed. The <event_type> must only
+ * be a combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+int conn_unsubscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(conn->subs && conn->subs != es);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ conn->subs = NULL;
+
+ if (conn_ctrl_ready(conn) && conn->ctrl->ignore_events)
+ conn->ctrl->ignore_events(conn, event_type);
+
+ return 0;
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>.
+ * The <es> struct is not allowed to differ from the one passed during a
+ * previous call to subscribe(). If the connection's ctrl layer is ready,
+ * the wait_event is immediately woken up and the subcription is cancelled.
+ * It always returns zero.
+ */
+int conn_subscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ int ret = 0;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(conn->subs && conn->subs != es);
+
+ if (conn->subs && (conn->subs->events & event_type) == event_type)
+ return 0;
+
+ if (conn_ctrl_ready(conn) && conn->ctrl->check_events) {
+ ret = conn->ctrl->check_events(conn, event_type);
+ if (ret)
+ tasklet_wakeup(es->tasklet);
+ }
+
+ es->events = (es->events | event_type) & ~ret;
+ conn->subs = es->events ? es : NULL;
+ return 0;
+}
+
+/* Drains possibly pending incoming data on the connection and update the flags
+ * accordingly. This is used to know whether we need to disable lingering on
+ * close. Returns non-zero if it is safe to close without disabling lingering,
+ * otherwise zero. The CO_FL_SOCK_RD_SH flag may also be updated if the incoming
+ * shutdown was reported by the ->drain() function.
+ */
+int conn_ctrl_drain(struct connection *conn)
+{
+ int ret = 0;
+
+ if (!conn_ctrl_ready(conn) || conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH))
+ ret = 1;
+ else if (conn->ctrl->drain) {
+ ret = conn->ctrl->drain(conn);
+ if (ret)
+ conn->flags |= CO_FL_SOCK_RD_SH;
+ }
+ return ret;
+}
+
+/*
+ * Get data length from tlv
+ */
+static inline size_t get_tlv_length(const struct tlv *src)
+{
+ return (src->length_hi << 8) | src->length_lo;
+}
+
+/* This handshake handler waits a PROXY protocol header at the beginning of the
+ * raw data stream. The header looks like this :
+ *
+ * "PROXY" <SP> PROTO <SP> SRC3 <SP> DST3 <SP> SRC4 <SP> <DST4> "\r\n"
+ *
+ * There must be exactly one space between each field. Fields are :
+ * - PROTO : layer 4 protocol, which must be "TCP4" or "TCP6".
+ * - SRC3 : layer 3 (eg: IP) source address in standard text form
+ * - DST3 : layer 3 (eg: IP) destination address in standard text form
+ * - SRC4 : layer 4 (eg: TCP port) source address in standard text form
+ * - DST4 : layer 4 (eg: TCP port) destination address in standard text form
+ *
+ * This line MUST be at the beginning of the buffer and MUST NOT wrap.
+ *
+ * The header line is small and in all cases smaller than the smallest normal
+ * TCP MSS. So it MUST always be delivered as one segment, which ensures we
+ * can safely use MSG_PEEK and avoid buffering.
+ *
+ * Once the data is fetched, the values are set in the connection's address
+ * fields, and data are removed from the socket's buffer. The function returns
+ * zero if it needs to wait for more data or if it fails, or 1 if it completed
+ * and removed itself.
+ */
+int conn_recv_proxy(struct connection *conn, int flag)
+{
+ struct session *sess = conn->owner;
+ char *line, *end;
+ struct proxy_hdr_v2 *hdr_v2;
+ const char v2sig[] = PP2_SIGNATURE;
+ size_t total_v2_len;
+ size_t tlv_offset = 0;
+ int ret;
+
+ if (!conn_ctrl_ready(conn))
+ goto fail;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_recv_ready(conn->handle.fd))
+ goto not_ready;
+
+ while (1) {
+ ret = recv(conn->handle.fd, trash.area, trash.size, MSG_PEEK);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ fd_cant_recv(conn->handle.fd);
+ goto not_ready;
+ }
+ goto recv_abort;
+ }
+ trash.data = ret;
+ break;
+ }
+
+ if (!trash.data) {
+ /* client shutdown */
+ conn->err_code = CO_ER_PRX_EMPTY;
+ goto fail;
+ }
+
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ if (trash.data < 6)
+ goto missing;
+
+ line = trash.area;
+ end = trash.area + trash.data;
+
+ /* Decode a possible proxy request, fail early if it does not match */
+ if (strncmp(line, "PROXY ", 6) != 0)
+ goto not_v1;
+
+ line += 6;
+ if (trash.data < 9) /* shortest possible line */
+ goto missing;
+
+ if (memcmp(line, "TCP4 ", 5) == 0) {
+ u32 src3, dst3, sport, dport;
+
+ line += 5;
+
+ src3 = inetaddr_host_lim_ret(line, end, &line);
+ if (line == end)
+ goto missing;
+ if (*line++ != ' ')
+ goto bad_header;
+
+ dst3 = inetaddr_host_lim_ret(line, end, &line);
+ if (line == end)
+ goto missing;
+ if (*line++ != ' ')
+ goto bad_header;
+
+ sport = read_uint((const char **)&line, end);
+ if (line == end)
+ goto missing;
+ if (*line++ != ' ')
+ goto bad_header;
+
+ dport = read_uint((const char **)&line, end);
+ if (line > end - 2)
+ goto missing;
+ if (*line++ != '\r')
+ goto bad_header;
+ if (*line++ != '\n')
+ goto bad_header;
+
+ if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
+ goto fail;
+
+ /* update the session's addresses and mark them set */
+ ((struct sockaddr_in *)sess->src)->sin_family = AF_INET;
+ ((struct sockaddr_in *)sess->src)->sin_addr.s_addr = htonl(src3);
+ ((struct sockaddr_in *)sess->src)->sin_port = htons(sport);
+
+ ((struct sockaddr_in *)sess->dst)->sin_family = AF_INET;
+ ((struct sockaddr_in *)sess->dst)->sin_addr.s_addr = htonl(dst3);
+ ((struct sockaddr_in *)sess->dst)->sin_port = htons(dport);
+ }
+ else if (memcmp(line, "TCP6 ", 5) == 0) {
+ u32 sport, dport;
+ char *src_s;
+ char *dst_s, *sport_s, *dport_s;
+ struct in6_addr src3, dst3;
+
+ line += 5;
+
+ src_s = line;
+ dst_s = sport_s = dport_s = NULL;
+ while (1) {
+ if (line > end - 2) {
+ goto missing;
+ }
+ else if (*line == '\r') {
+ *line = 0;
+ line++;
+ if (*line++ != '\n')
+ goto bad_header;
+ break;
+ }
+
+ if (*line == ' ') {
+ *line = 0;
+ if (!dst_s)
+ dst_s = line + 1;
+ else if (!sport_s)
+ sport_s = line + 1;
+ else if (!dport_s)
+ dport_s = line + 1;
+ }
+ line++;
+ }
+
+ if (!dst_s || !sport_s || !dport_s)
+ goto bad_header;
+
+ sport = read_uint((const char **)&sport_s,dport_s - 1);
+ if (*sport_s != 0)
+ goto bad_header;
+
+ dport = read_uint((const char **)&dport_s,line - 2);
+ if (*dport_s != 0)
+ goto bad_header;
+
+ if (inet_pton(AF_INET6, src_s, (void *)&src3) != 1)
+ goto bad_header;
+
+ if (inet_pton(AF_INET6, dst_s, (void *)&dst3) != 1)
+ goto bad_header;
+
+ if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
+ goto fail;
+
+ /* update the session's addresses and mark them set */
+ ((struct sockaddr_in6 *)sess->src)->sin6_family = AF_INET6;
+ memcpy(&((struct sockaddr_in6 *)sess->src)->sin6_addr, &src3, sizeof(struct in6_addr));
+ ((struct sockaddr_in6 *)sess->src)->sin6_port = htons(sport);
+
+ ((struct sockaddr_in6 *)sess->dst)->sin6_family = AF_INET6;
+ memcpy(&((struct sockaddr_in6 *)sess->dst)->sin6_addr, &dst3, sizeof(struct in6_addr));
+ ((struct sockaddr_in6 *)sess->dst)->sin6_port = htons(dport);
+ }
+ else if (memcmp(line, "UNKNOWN\r\n", 9) == 0) {
+ /* This can be a UNIX socket forwarded by an haproxy upstream */
+ line += 9;
+ }
+ else {
+ /* The protocol does not match something known (TCP4/TCP6/UNKNOWN) */
+ conn->err_code = CO_ER_PRX_BAD_PROTO;
+ goto fail;
+ }
+
+ trash.data = line - trash.area;
+ goto eat_header;
+
+ not_v1:
+ /* try PPv2 */
+ if (trash.data < PP2_HEADER_LEN)
+ goto missing;
+
+ hdr_v2 = (struct proxy_hdr_v2 *) trash.area;
+
+ if (memcmp(hdr_v2->sig, v2sig, PP2_SIGNATURE_LEN) != 0 ||
+ (hdr_v2->ver_cmd & PP2_VERSION_MASK) != PP2_VERSION) {
+ conn->err_code = CO_ER_PRX_NOT_HDR;
+ goto fail;
+ }
+
+ total_v2_len = PP2_HEADER_LEN + ntohs(hdr_v2->len);
+ if (trash.data < total_v2_len)
+ goto missing;
+
+ switch (hdr_v2->ver_cmd & PP2_CMD_MASK) {
+ case 0x01: /* PROXY command */
+ switch (hdr_v2->fam) {
+ case 0x11: /* TCPv4 */
+ if (ntohs(hdr_v2->len) < PP2_ADDR_LEN_INET)
+ goto bad_header;
+
+ if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
+ goto fail;
+
+ ((struct sockaddr_in *)sess->src)->sin_family = AF_INET;
+ ((struct sockaddr_in *)sess->src)->sin_addr.s_addr = hdr_v2->addr.ip4.src_addr;
+ ((struct sockaddr_in *)sess->src)->sin_port = hdr_v2->addr.ip4.src_port;
+ ((struct sockaddr_in *)sess->dst)->sin_family = AF_INET;
+ ((struct sockaddr_in *)sess->dst)->sin_addr.s_addr = hdr_v2->addr.ip4.dst_addr;
+ ((struct sockaddr_in *)sess->dst)->sin_port = hdr_v2->addr.ip4.dst_port;
+ tlv_offset = PP2_HEADER_LEN + PP2_ADDR_LEN_INET;
+ break;
+ case 0x21: /* TCPv6 */
+ if (ntohs(hdr_v2->len) < PP2_ADDR_LEN_INET6)
+ goto bad_header;
+
+ if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
+ goto fail;
+
+ ((struct sockaddr_in6 *)sess->src)->sin6_family = AF_INET6;
+ memcpy(&((struct sockaddr_in6 *)sess->src)->sin6_addr, hdr_v2->addr.ip6.src_addr, 16);
+ ((struct sockaddr_in6 *)sess->src)->sin6_port = hdr_v2->addr.ip6.src_port;
+ ((struct sockaddr_in6 *)sess->dst)->sin6_family = AF_INET6;
+ memcpy(&((struct sockaddr_in6 *)sess->dst)->sin6_addr, hdr_v2->addr.ip6.dst_addr, 16);
+ ((struct sockaddr_in6 *)sess->dst)->sin6_port = hdr_v2->addr.ip6.dst_port;
+ tlv_offset = PP2_HEADER_LEN + PP2_ADDR_LEN_INET6;
+ break;
+ }
+
+ /* TLV parsing */
+ while (tlv_offset < total_v2_len) {
+ struct tlv *tlv_packet;
+ struct ist tlv;
+
+ /* Verify that we have at least TLV_HEADER_SIZE bytes left */
+ if (tlv_offset + TLV_HEADER_SIZE > total_v2_len)
+ goto bad_header;
+
+ tlv_packet = (struct tlv *) &trash.area[tlv_offset];
+ tlv = ist2((const char *)tlv_packet->value, get_tlv_length(tlv_packet));
+ tlv_offset += istlen(tlv) + TLV_HEADER_SIZE;
+
+ /* Verify that the TLV length does not exceed the total PROXYv2 length */
+ if (tlv_offset > total_v2_len)
+ goto bad_header;
+
+ switch (tlv_packet->type) {
+ case PP2_TYPE_CRC32C: {
+ uint32_t n_crc32c;
+
+ /* Verify that this TLV is exactly 4 bytes long */
+ if (istlen(tlv) != 4)
+ goto bad_header;
+
+ n_crc32c = read_n32(istptr(tlv));
+ write_n32(istptr(tlv), 0); // compute with CRC==0
+
+ if (hash_crc32c(trash.area, total_v2_len) != n_crc32c)
+ goto bad_header;
+ break;
+ }
+#ifdef USE_NS
+ case PP2_TYPE_NETNS: {
+ const struct netns_entry *ns;
+
+ ns = netns_store_lookup(istptr(tlv), istlen(tlv));
+ if (ns)
+ conn->proxy_netns = ns;
+ break;
+ }
+#endif
+ case PP2_TYPE_AUTHORITY: {
+ if (istlen(tlv) > PP2_AUTHORITY_MAX)
+ goto bad_header;
+ conn->proxy_authority = ist2(pool_alloc(pool_head_authority), 0);
+ if (!isttest(conn->proxy_authority))
+ goto fail;
+ if (istcpy(&conn->proxy_authority, tlv, PP2_AUTHORITY_MAX) < 0) {
+ /* This is impossible, because we verified that the TLV value fits. */
+ my_unreachable();
+ goto fail;
+ }
+ break;
+ }
+ case PP2_TYPE_UNIQUE_ID: {
+ if (istlen(tlv) > UNIQUEID_LEN)
+ goto bad_header;
+ conn->proxy_unique_id = ist2(pool_alloc(pool_head_uniqueid), 0);
+ if (!isttest(conn->proxy_unique_id))
+ goto fail;
+ if (istcpy(&conn->proxy_unique_id, tlv, UNIQUEID_LEN) < 0) {
+ /* This is impossible, because we verified that the TLV value fits. */
+ my_unreachable();
+ goto fail;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ /* Verify that the PROXYv2 header ends at a TLV boundary.
+ * This is can not be true, because the TLV parsing already
+ * verifies that a TLV does not exceed the total length and
+ * also that there is space for a TLV header.
+ */
+ BUG_ON(tlv_offset != total_v2_len);
+
+ /* unsupported protocol, keep local connection address */
+ break;
+ case 0x00: /* LOCAL command */
+ /* keep local connection address for LOCAL */
+ break;
+ default:
+ goto bad_header; /* not a supported command */
+ }
+
+ trash.data = total_v2_len;
+ goto eat_header;
+
+ eat_header:
+ /* remove the PROXY line from the request. For this we re-read the
+ * exact line at once. If we don't get the exact same result, we
+ * fail.
+ */
+ while (1) {
+ ssize_t len2 = recv(conn->handle.fd, trash.area, trash.data, 0);
+
+ if (len2 < 0 && errno == EINTR)
+ continue;
+ if (len2 != trash.data)
+ goto recv_abort;
+ break;
+ }
+
+ conn->flags &= ~flag;
+ conn->flags |= CO_FL_RCVD_PROXY;
+ return 1;
+
+ not_ready:
+ return 0;
+
+ missing:
+ /* Missing data. Since we're using MSG_PEEK, we can only poll again if
+ * we have not read anything. Otherwise we need to fail because we won't
+ * be able to poll anymore.
+ */
+ conn->err_code = CO_ER_PRX_TRUNCATED;
+ goto fail;
+
+ bad_header:
+ /* This is not a valid proxy protocol header */
+ conn->err_code = CO_ER_PRX_BAD_HDR;
+ goto fail;
+
+ recv_abort:
+ conn->err_code = CO_ER_PRX_ABORT;
+ conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ goto fail;
+
+ fail:
+ conn->flags |= CO_FL_ERROR;
+ return 0;
+}
+
+/* This callback is used to send a valid PROXY protocol line to a socket being
+ * established. It returns 0 if it fails in a fatal way or needs to poll to go
+ * further, otherwise it returns non-zero and removes itself from the connection's
+ * flags (the bit is provided in <flag> by the caller). It is designed to be
+ * called by the connection handler and relies on it to commit polling changes.
+ * Note that it can emit a PROXY line by relying on the other end's address
+ * when the connection is attached to a stream connector, or by resolving the
+ * local address otherwise (also called a LOCAL line).
+ */
+int conn_send_proxy(struct connection *conn, unsigned int flag)
+{
+ if (!conn_ctrl_ready(conn))
+ goto out_error;
+
+ /* If we have a PROXY line to send, we'll use this to validate the
+ * connection, in which case the connection is validated only once
+ * we've sent the whole proxy line. Otherwise we use connect().
+ */
+ if (conn->send_proxy_ofs) {
+ struct stconn *sc;
+ int ret;
+
+ /* If there is no mux attached to the connection, it means the
+ * connection context is a stream connector.
+ */
+ sc = conn->mux ? conn_get_first_sc(conn) : conn->ctx;
+
+ /* The target server expects a PROXY line to be sent first.
+ * If the send_proxy_ofs is negative, it corresponds to the
+ * offset to start sending from then end of the proxy string
+ * (which is recomputed every time since it's constant). If
+ * it is positive, it means we have to send from the start.
+ * We can only send a "normal" PROXY line when the connection
+ * is attached to a stream connector. Otherwise we can only
+ * send a LOCAL line (eg: for use with health checks).
+ */
+
+ if (sc && sc_strm(sc)) {
+ ret = make_proxy_line(trash.area, trash.size,
+ objt_server(conn->target),
+ sc_conn(sc_opposite(sc)),
+ __sc_strm(sc));
+ }
+ else {
+ /* The target server expects a LOCAL line to be sent first. Retrieving
+ * local or remote addresses may fail until the connection is established.
+ */
+ if (!conn_get_src(conn) || !conn_get_dst(conn))
+ goto out_wait;
+
+ ret = make_proxy_line(trash.area, trash.size,
+ objt_server(conn->target), conn,
+ NULL);
+ }
+
+ if (!ret)
+ goto out_error;
+
+ if (conn->send_proxy_ofs > 0)
+ conn->send_proxy_ofs = -ret; /* first call */
+
+ /* we have to send trash from (ret+sp for -sp bytes). If the
+ * data layer has a pending write, we'll also set MSG_MORE.
+ */
+ ret = conn_ctrl_send(conn,
+ trash.area + ret + conn->send_proxy_ofs,
+ -conn->send_proxy_ofs,
+ (conn->subs && conn->subs->events & SUB_RETRY_SEND) ? CO_SFL_MSG_MORE : 0);
+
+ if (ret < 0)
+ goto out_error;
+
+ conn->send_proxy_ofs += ret; /* becomes zero once complete */
+ if (conn->send_proxy_ofs != 0)
+ goto out_wait;
+
+ /* OK we've sent the whole line, we're connected */
+ }
+
+ /* The connection is ready now, simply return and let the connection
+ * handler notify upper layers if needed.
+ */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ conn->flags &= ~flag;
+ return 1;
+
+ out_error:
+ /* Write error on the file descriptor */
+ conn->flags |= CO_FL_ERROR;
+ return 0;
+
+ out_wait:
+ return 0;
+}
+
+/* This handshake handler waits a NetScaler Client IP insertion header
+ * at the beginning of the raw data stream. The header format is
+ * described in doc/netscaler-client-ip-insertion-protocol.txt
+ *
+ * This line MUST be at the beginning of the buffer and MUST NOT be
+ * fragmented.
+ *
+ * The header line is small and in all cases smaller than the smallest normal
+ * TCP MSS. So it MUST always be delivered as one segment, which ensures we
+ * can safely use MSG_PEEK and avoid buffering.
+ *
+ * Once the data is fetched, the values are set in the connection's address
+ * fields, and data are removed from the socket's buffer. The function returns
+ * zero if it needs to wait for more data or if it fails, or 1 if it completed
+ * and removed itself.
+ */
+int conn_recv_netscaler_cip(struct connection *conn, int flag)
+{
+ struct session *sess = conn->owner;
+ char *line;
+ uint32_t hdr_len;
+ uint8_t ip_ver;
+ int ret;
+
+ if (!conn_ctrl_ready(conn))
+ goto fail;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_recv_ready(conn->handle.fd))
+ goto not_ready;
+
+ while (1) {
+ ret = recv(conn->handle.fd, trash.area, trash.size, MSG_PEEK);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ fd_cant_recv(conn->handle.fd);
+ goto not_ready;
+ }
+ goto recv_abort;
+ }
+ trash.data = ret;
+ break;
+ }
+
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ if (!trash.data) {
+ /* client shutdown */
+ conn->err_code = CO_ER_CIP_EMPTY;
+ goto fail;
+ }
+
+ /* Fail if buffer length is not large enough to contain
+ * CIP magic, header length or
+ * CIP magic, CIP length, CIP type, header length */
+ if (trash.data < 12)
+ goto missing;
+
+ line = trash.area;
+
+ /* Decode a possible NetScaler Client IP request, fail early if
+ * it does not match */
+ if (ntohl(read_u32(line)) != __objt_listener(conn->target)->bind_conf->ns_cip_magic)
+ goto bad_magic;
+
+ /* Legacy CIP protocol */
+ if ((trash.area[8] & 0xD0) == 0x40) {
+ hdr_len = ntohl(read_u32((line+4)));
+ line += 8;
+ }
+ /* Standard CIP protocol */
+ else if (trash.area[8] == 0x00) {
+ hdr_len = ntohs(read_u32((line+10)));
+ line += 12;
+ }
+ /* Unknown CIP protocol */
+ else {
+ conn->err_code = CO_ER_CIP_BAD_PROTO;
+ goto fail;
+ }
+
+ /* Fail if buffer length is not large enough to contain
+ * a minimal IP header */
+ if (trash.data < 20)
+ goto missing;
+
+ /* Get IP version from the first four bits */
+ ip_ver = (*line & 0xf0) >> 4;
+
+ if (ip_ver == 4) {
+ struct ip *hdr_ip4;
+ struct my_tcphdr *hdr_tcp;
+
+ hdr_ip4 = (struct ip *)line;
+
+ if (trash.data < 40 || trash.data < hdr_len) {
+ /* Fail if buffer length is not large enough to contain
+ * IPv4 header, TCP header */
+ goto missing;
+ }
+ else if (hdr_ip4->ip_p != IPPROTO_TCP) {
+ /* The protocol does not include a TCP header */
+ conn->err_code = CO_ER_CIP_BAD_PROTO;
+ goto fail;
+ }
+
+ hdr_tcp = (struct my_tcphdr *)(line + (hdr_ip4->ip_hl * 4));
+
+ if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
+ goto fail;
+
+ /* update the session's addresses and mark them set */
+ ((struct sockaddr_in *)sess->src)->sin_family = AF_INET;
+ ((struct sockaddr_in *)sess->src)->sin_addr.s_addr = hdr_ip4->ip_src.s_addr;
+ ((struct sockaddr_in *)sess->src)->sin_port = hdr_tcp->source;
+
+ ((struct sockaddr_in *)sess->dst)->sin_family = AF_INET;
+ ((struct sockaddr_in *)sess->dst)->sin_addr.s_addr = hdr_ip4->ip_dst.s_addr;
+ ((struct sockaddr_in *)sess->dst)->sin_port = hdr_tcp->dest;
+ }
+ else if (ip_ver == 6) {
+ struct ip6_hdr *hdr_ip6;
+ struct my_tcphdr *hdr_tcp;
+
+ hdr_ip6 = (struct ip6_hdr *)line;
+
+ if (trash.data < 60 || trash.data < hdr_len) {
+ /* Fail if buffer length is not large enough to contain
+ * IPv6 header, TCP header */
+ goto missing;
+ }
+ else if (hdr_ip6->ip6_nxt != IPPROTO_TCP) {
+ /* The protocol does not include a TCP header */
+ conn->err_code = CO_ER_CIP_BAD_PROTO;
+ goto fail;
+ }
+
+ hdr_tcp = (struct my_tcphdr *)(line + sizeof(struct ip6_hdr));
+
+ if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
+ goto fail;
+
+ /* update the session's addresses and mark them set */
+ ((struct sockaddr_in6 *)sess->src)->sin6_family = AF_INET6;
+ ((struct sockaddr_in6 *)sess->src)->sin6_addr = hdr_ip6->ip6_src;
+ ((struct sockaddr_in6 *)sess->src)->sin6_port = hdr_tcp->source;
+
+ ((struct sockaddr_in6 *)sess->dst)->sin6_family = AF_INET6;
+ ((struct sockaddr_in6 *)sess->dst)->sin6_addr = hdr_ip6->ip6_dst;
+ ((struct sockaddr_in6 *)sess->dst)->sin6_port = hdr_tcp->dest;
+ }
+ else {
+ /* The protocol does not match something known (IPv4/IPv6) */
+ conn->err_code = CO_ER_CIP_BAD_PROTO;
+ goto fail;
+ }
+
+ line += hdr_len;
+ trash.data = line - trash.area;
+
+ /* remove the NetScaler Client IP header from the request. For this
+ * we re-read the exact line at once. If we don't get the exact same
+ * result, we fail.
+ */
+ while (1) {
+ int len2 = recv(conn->handle.fd, trash.area, trash.data, 0);
+ if (len2 < 0 && errno == EINTR)
+ continue;
+ if (len2 != trash.data)
+ goto recv_abort;
+ break;
+ }
+
+ conn->flags &= ~flag;
+ return 1;
+
+ not_ready:
+ return 0;
+
+ missing:
+ /* Missing data. Since we're using MSG_PEEK, we can only poll again if
+ * we have not read anything. Otherwise we need to fail because we won't
+ * be able to poll anymore.
+ */
+ conn->err_code = CO_ER_CIP_TRUNCATED;
+ goto fail;
+
+ bad_magic:
+ conn->err_code = CO_ER_CIP_BAD_MAGIC;
+ goto fail;
+
+ recv_abort:
+ conn->err_code = CO_ER_CIP_ABORT;
+ conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ goto fail;
+
+ fail:
+ conn->flags |= CO_FL_ERROR;
+ return 0;
+}
+
+
+int conn_send_socks4_proxy_request(struct connection *conn)
+{
+ struct socks4_request req_line;
+
+ if (!conn_ctrl_ready(conn))
+ goto out_error;
+
+ if (!conn_get_dst(conn))
+ goto out_error;
+
+ req_line.version = 0x04;
+ req_line.command = 0x01;
+ req_line.port = get_net_port(conn->dst);
+ req_line.ip = is_inet_addr(conn->dst);
+ memcpy(req_line.user_id, "HAProxy\0", 8);
+
+ if (conn->send_proxy_ofs > 0) {
+ /*
+ * This is the first call to send the request
+ */
+ conn->send_proxy_ofs = -(int)sizeof(req_line);
+ }
+
+ if (conn->send_proxy_ofs < 0) {
+ int ret = 0;
+
+ /* we are sending the socks4_req_line here. If the data layer
+ * has a pending write, we'll also set MSG_MORE.
+ */
+ ret = conn_ctrl_send(
+ conn,
+ ((char *)(&req_line)) + (sizeof(req_line)+conn->send_proxy_ofs),
+ -conn->send_proxy_ofs,
+ (conn->subs && conn->subs->events & SUB_RETRY_SEND) ? CO_SFL_MSG_MORE : 0);
+
+ DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Before send remain is [%d], sent [%d]\n",
+ conn_fd(conn), -conn->send_proxy_ofs, ret);
+
+ if (ret < 0) {
+ goto out_error;
+ }
+
+ conn->send_proxy_ofs += ret; /* becomes zero once complete */
+ if (conn->send_proxy_ofs != 0) {
+ goto out_wait;
+ }
+ }
+
+ /* OK we've the whole request sent */
+ conn->flags &= ~CO_FL_SOCKS4_SEND;
+
+ /* The connection is ready now, simply return and let the connection
+ * handler notify upper layers if needed.
+ */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ if (conn->flags & CO_FL_SEND_PROXY) {
+ /*
+ * Get the send_proxy_ofs ready for the send_proxy due to we are
+ * reusing the "send_proxy_ofs", and SOCKS4 handshake should be done
+ * before sending PROXY Protocol.
+ */
+ conn->send_proxy_ofs = 1;
+ }
+ return 1;
+
+ out_error:
+ /* Write error on the file descriptor */
+ conn->flags |= CO_FL_ERROR;
+ if (conn->err_code == CO_ER_NONE) {
+ conn->err_code = CO_ER_SOCKS4_SEND;
+ }
+ return 0;
+
+ out_wait:
+ return 0;
+}
+
+int conn_recv_socks4_proxy_response(struct connection *conn)
+{
+ char line[SOCKS4_HS_RSP_LEN];
+ int ret;
+
+ if (!conn_ctrl_ready(conn))
+ goto fail;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_recv_ready(conn->handle.fd))
+ goto not_ready;
+
+ while (1) {
+ /* SOCKS4 Proxy will response with 8 bytes, 0x00 | 0x5A | 0x00 0x00 | 0x00 0x00 0x00 0x00
+ * Try to peek into it, before all 8 bytes ready.
+ */
+ ret = recv(conn->handle.fd, line, SOCKS4_HS_RSP_LEN, MSG_PEEK);
+
+ if (ret == 0) {
+ /* the socket has been closed or shutdown for send */
+ DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received ret[%d], errno[%d], looks like the socket has been closed or shutdown for send\n",
+ conn->handle.fd, ret, errno);
+ if (conn->err_code == CO_ER_NONE) {
+ conn->err_code = CO_ER_SOCKS4_RECV;
+ }
+ goto fail;
+ }
+
+ if (ret > 0) {
+ if (ret == SOCKS4_HS_RSP_LEN) {
+ DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received 8 bytes, the response is [%02X|%02X|%02X %02X|%02X %02X %02X %02X]\n",
+ conn->handle.fd, line[0], line[1], line[2], line[3], line[4], line[5], line[6], line[7]);
+ }else{
+ DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received ret[%d], first byte is [%02X], last bye is [%02X]\n", conn->handle.fd, ret, line[0], line[ret-1]);
+ }
+ } else {
+ DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received ret[%d], errno[%d]\n", conn->handle.fd, ret, errno);
+ }
+
+ if (ret < 0) {
+ if (errno == EINTR) {
+ continue;
+ }
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ fd_cant_recv(conn->handle.fd);
+ goto not_ready;
+ }
+ goto recv_abort;
+ }
+ break;
+ }
+
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ if (ret < SOCKS4_HS_RSP_LEN) {
+ /* Missing data. Since we're using MSG_PEEK, we can only poll again if
+ * we are not able to read enough data.
+ */
+ goto not_ready;
+ }
+
+ /*
+ * Base on the SOCSK4 protocol:
+ *
+ * +----+----+----+----+----+----+----+----+
+ * | VN | CD | DSTPORT | DSTIP |
+ * +----+----+----+----+----+----+----+----+
+ * # of bytes: 1 1 2 4
+ * VN is the version of the reply code and should be 0. CD is the result
+ * code with one of the following values:
+ * 90: request granted
+ * 91: request rejected or failed
+ * 92: request rejected because SOCKS server cannot connect to identd on the client
+ * 93: request rejected because the client program and identd report different user-ids
+ * The remaining fields are ignored.
+ */
+ if (line[1] != 90) {
+ conn->flags &= ~CO_FL_SOCKS4_RECV;
+
+ DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: FAIL, the response is [%02X|%02X|%02X %02X|%02X %02X %02X %02X]\n",
+ conn->handle.fd, line[0], line[1], line[2], line[3], line[4], line[5], line[6], line[7]);
+ if (conn->err_code == CO_ER_NONE) {
+ conn->err_code = CO_ER_SOCKS4_DENY;
+ }
+ goto fail;
+ }
+
+ /* remove the 8 bytes response from the stream */
+ while (1) {
+ ret = recv(conn->handle.fd, line, SOCKS4_HS_RSP_LEN, 0);
+ if (ret < 0 && errno == EINTR) {
+ continue;
+ }
+ if (ret != SOCKS4_HS_RSP_LEN) {
+ if (conn->err_code == CO_ER_NONE) {
+ conn->err_code = CO_ER_SOCKS4_RECV;
+ }
+ goto fail;
+ }
+ break;
+ }
+
+ conn->flags &= ~CO_FL_SOCKS4_RECV;
+ return 1;
+
+ not_ready:
+ return 0;
+
+ recv_abort:
+ if (conn->err_code == CO_ER_NONE) {
+ conn->err_code = CO_ER_SOCKS4_ABORT;
+ }
+ conn->flags |= (CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH);
+ goto fail;
+
+ fail:
+ conn->flags |= CO_FL_ERROR;
+ return 0;
+}
+
+/* registers proto mux list <list>. Modifies the list element! */
+void register_mux_proto(struct mux_proto_list *list)
+{
+ LIST_APPEND(&mux_proto_list.list, &list->list);
+}
+
+/* Lists the known proto mux on <out>. This function is used by "haproxy -vv"
+ * and is suitable for early boot just after the "REGISTER" stage because it
+ * doesn't depend on anything to be already allocated.
+ */
+void list_mux_proto(FILE *out)
+{
+ struct mux_proto_list *item;
+ struct ist proto;
+ char *mode, *side;
+ int done;
+
+ fprintf(out, "Available multiplexer protocols :\n"
+ "(protocols marked as <default> cannot be specified using 'proto' keyword)\n");
+ list_for_each_entry(item, &mux_proto_list.list, list) {
+ proto = item->token;
+
+ if (item->mode == PROTO_MODE_ANY)
+ mode = "TCP|HTTP";
+ else if (item->mode == PROTO_MODE_TCP)
+ mode = "TCP";
+ else if (item->mode == PROTO_MODE_HTTP)
+ mode = "HTTP";
+ else
+ mode = "NONE";
+
+ if (item->side == PROTO_SIDE_BOTH)
+ side = "FE|BE";
+ else if (item->side == PROTO_SIDE_FE)
+ side = "FE";
+ else if (item->side == PROTO_SIDE_BE)
+ side = "BE";
+ else
+ side = "NONE";
+
+ fprintf(out, " %10s : mode=%-5s side=%-6s mux=%-5s flags=",
+ (proto.len ? proto.ptr : "<default>"), mode, side, item->mux->name);
+
+ done = 0;
+
+ /* note: the block below could be simplied using macros but for only
+ * 4 flags it's not worth it.
+ */
+ if (item->mux->flags & MX_FL_HTX)
+ done |= fprintf(out, "%sHTX", done ? "|" : "");
+
+ if (item->mux->flags & MX_FL_HOL_RISK)
+ done |= fprintf(out, "%sHOL_RISK", done ? "|" : "");
+
+ if (item->mux->flags & MX_FL_NO_UPG)
+ done |= fprintf(out, "%sNO_UPG", done ? "|" : "");
+
+ if (item->mux->flags & MX_FL_FRAMED)
+ done |= fprintf(out, "%sFRAMED", done ? "|" : "");
+
+ fprintf(out, "\n");
+ }
+}
+
+/* Makes a PROXY protocol line from the two addresses. The output is sent to
+ * buffer <buf> for a maximum size of <buf_len> (including the trailing zero).
+ * It returns the number of bytes composing this line (including the trailing
+ * LF), or zero in case of failure (eg: not enough space). It supports TCP4,
+ * TCP6 and "UNKNOWN" formats. If any of <src> or <dst> is null, UNKNOWN is
+ * emitted as well.
+ */
+static int make_proxy_line_v1(char *buf, int buf_len, const struct sockaddr_storage *src, const struct sockaddr_storage *dst)
+{
+ int ret = 0;
+ char * protocol;
+ char src_str[MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
+ char dst_str[MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
+ in_port_t src_port;
+ in_port_t dst_port;
+
+ if ( !src
+ || !dst
+ || (src->ss_family != AF_INET && src->ss_family != AF_INET6)
+ || (dst->ss_family != AF_INET && dst->ss_family != AF_INET6)) {
+ /* unknown family combination */
+ ret = snprintf(buf, buf_len, "PROXY UNKNOWN\r\n");
+ if (ret >= buf_len)
+ return 0;
+
+ return ret;
+ }
+
+ /* IPv4 for both src and dst */
+ if (src->ss_family == AF_INET && dst->ss_family == AF_INET) {
+ protocol = "TCP4";
+ if (!inet_ntop(AF_INET, &((struct sockaddr_in *)src)->sin_addr, src_str, sizeof(src_str)))
+ return 0;
+ src_port = ((struct sockaddr_in *)src)->sin_port;
+ if (!inet_ntop(AF_INET, &((struct sockaddr_in *)dst)->sin_addr, dst_str, sizeof(dst_str)))
+ return 0;
+ dst_port = ((struct sockaddr_in *)dst)->sin_port;
+ }
+ /* IPv6 for at least one of src and dst */
+ else {
+ struct in6_addr tmp;
+
+ protocol = "TCP6";
+
+ if (src->ss_family == AF_INET) {
+ /* Convert src to IPv6 */
+ v4tov6(&tmp, &((struct sockaddr_in *)src)->sin_addr);
+ src_port = ((struct sockaddr_in *)src)->sin_port;
+ }
+ else {
+ tmp = ((struct sockaddr_in6 *)src)->sin6_addr;
+ src_port = ((struct sockaddr_in6 *)src)->sin6_port;
+ }
+
+ if (!inet_ntop(AF_INET6, &tmp, src_str, sizeof(src_str)))
+ return 0;
+
+ if (dst->ss_family == AF_INET) {
+ /* Convert dst to IPv6 */
+ v4tov6(&tmp, &((struct sockaddr_in *)dst)->sin_addr);
+ dst_port = ((struct sockaddr_in *)dst)->sin_port;
+ }
+ else {
+ tmp = ((struct sockaddr_in6 *)dst)->sin6_addr;
+ dst_port = ((struct sockaddr_in6 *)dst)->sin6_port;
+ }
+
+ if (!inet_ntop(AF_INET6, &tmp, dst_str, sizeof(dst_str)))
+ return 0;
+ }
+
+ ret = snprintf(buf, buf_len, "PROXY %s %s %s %u %u\r\n", protocol, src_str, dst_str, ntohs(src_port), ntohs(dst_port));
+ if (ret >= buf_len)
+ return 0;
+
+ return ret;
+}
+
+static int make_tlv(char *dest, int dest_len, char type, uint16_t length, const char *value)
+{
+ struct tlv *tlv;
+
+ if (!dest || (length + sizeof(*tlv) > dest_len))
+ return 0;
+
+ tlv = (struct tlv *)dest;
+
+ tlv->type = type;
+ tlv->length_hi = length >> 8;
+ tlv->length_lo = length & 0x00ff;
+ memcpy(tlv->value, value, length);
+ return length + sizeof(*tlv);
+}
+
+/* Note: <remote> is explicitly allowed to be NULL */
+static int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm)
+{
+ const char pp2_signature[] = PP2_SIGNATURE;
+ void *tlv_crc32c_p = NULL;
+ int ret = 0;
+ struct proxy_hdr_v2 *hdr = (struct proxy_hdr_v2 *)buf;
+ struct sockaddr_storage null_addr = { .ss_family = 0 };
+ const struct sockaddr_storage *src = &null_addr;
+ const struct sockaddr_storage *dst = &null_addr;
+ const char *value;
+ int value_len;
+
+ if (buf_len < PP2_HEADER_LEN)
+ return 0;
+ memcpy(hdr->sig, pp2_signature, PP2_SIGNATURE_LEN);
+
+ if (strm) {
+ src = sc_src(strm->scf);
+ dst = sc_dst(strm->scf);
+ }
+ else if (remote && conn_get_src(remote) && conn_get_dst(remote)) {
+ src = conn_src(remote);
+ dst = conn_dst(remote);
+ }
+
+ /* At least one of src or dst is not of AF_INET or AF_INET6 */
+ if ( !src
+ || !dst
+ || (!pp2_never_send_local && conn_is_back(remote)) // locally initiated connection
+ || (src->ss_family != AF_INET && src->ss_family != AF_INET6)
+ || (dst->ss_family != AF_INET && dst->ss_family != AF_INET6)) {
+ if (buf_len < PP2_HDR_LEN_UNSPEC)
+ return 0;
+ hdr->ver_cmd = PP2_VERSION | PP2_CMD_LOCAL;
+ hdr->fam = PP2_FAM_UNSPEC | PP2_TRANS_UNSPEC;
+ ret = PP2_HDR_LEN_UNSPEC;
+ }
+ else {
+ hdr->ver_cmd = PP2_VERSION | PP2_CMD_PROXY;
+ /* IPv4 for both src and dst */
+ if (src->ss_family == AF_INET && dst->ss_family == AF_INET) {
+ if (buf_len < PP2_HDR_LEN_INET)
+ return 0;
+ hdr->fam = PP2_FAM_INET | PP2_TRANS_STREAM;
+ hdr->addr.ip4.src_addr = ((struct sockaddr_in *)src)->sin_addr.s_addr;
+ hdr->addr.ip4.src_port = ((struct sockaddr_in *)src)->sin_port;
+ hdr->addr.ip4.dst_addr = ((struct sockaddr_in *)dst)->sin_addr.s_addr;
+ hdr->addr.ip4.dst_port = ((struct sockaddr_in *)dst)->sin_port;
+ ret = PP2_HDR_LEN_INET;
+ }
+ /* IPv6 for at least one of src and dst */
+ else {
+ struct in6_addr tmp;
+
+ if (buf_len < PP2_HDR_LEN_INET6)
+ return 0;
+ hdr->fam = PP2_FAM_INET6 | PP2_TRANS_STREAM;
+ if (src->ss_family == AF_INET) {
+ v4tov6(&tmp, &((struct sockaddr_in *)src)->sin_addr);
+ memcpy(hdr->addr.ip6.src_addr, &tmp, 16);
+ hdr->addr.ip6.src_port = ((struct sockaddr_in *)src)->sin_port;
+ }
+ else {
+ memcpy(hdr->addr.ip6.src_addr, &((struct sockaddr_in6 *)src)->sin6_addr, 16);
+ hdr->addr.ip6.src_port = ((struct sockaddr_in6 *)src)->sin6_port;
+ }
+ if (dst->ss_family == AF_INET) {
+ v4tov6(&tmp, &((struct sockaddr_in *)dst)->sin_addr);
+ memcpy(hdr->addr.ip6.dst_addr, &tmp, 16);
+ hdr->addr.ip6.dst_port = ((struct sockaddr_in *)dst)->sin_port;
+ }
+ else {
+ memcpy(hdr->addr.ip6.dst_addr, &((struct sockaddr_in6 *)dst)->sin6_addr, 16);
+ hdr->addr.ip6.dst_port = ((struct sockaddr_in6 *)dst)->sin6_port;
+ }
+
+ ret = PP2_HDR_LEN_INET6;
+ }
+ }
+
+ if (srv->pp_opts & SRV_PP_V2_CRC32C) {
+ uint32_t zero_crc32c = 0;
+
+ if ((buf_len - ret) < sizeof(struct tlv))
+ return 0;
+ tlv_crc32c_p = (void *)((struct tlv *)&buf[ret])->value;
+ ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_CRC32C, sizeof(zero_crc32c), (const char *)&zero_crc32c);
+ }
+
+ if (remote && conn_get_alpn(remote, &value, &value_len)) {
+ if ((buf_len - ret) < sizeof(struct tlv))
+ return 0;
+ ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_ALPN, value_len, value);
+ }
+
+ if (srv->pp_opts & SRV_PP_V2_AUTHORITY) {
+ value = NULL;
+ if (remote && isttest(remote->proxy_authority)) {
+ value = istptr(remote->proxy_authority);
+ value_len = istlen(remote->proxy_authority);
+ }
+#ifdef USE_OPENSSL
+ else {
+ if ((value = ssl_sock_get_sni(remote)))
+ value_len = strlen(value);
+ }
+#endif
+ if (value) {
+ if ((buf_len - ret) < sizeof(struct tlv))
+ return 0;
+ ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_AUTHORITY, value_len, value);
+ }
+ }
+
+ if (strm && (srv->pp_opts & SRV_PP_V2_UNIQUE_ID)) {
+ struct session* sess = strm_sess(strm);
+ struct ist unique_id = stream_generate_unique_id(strm, &sess->fe->format_unique_id);
+
+ value = unique_id.ptr;
+ value_len = unique_id.len;
+
+ if (value_len >= 0) {
+ if ((buf_len - ret) < sizeof(struct tlv))
+ return 0;
+ ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_UNIQUE_ID, value_len, value);
+ }
+ }
+
+#ifdef USE_OPENSSL
+ if (srv->pp_opts & SRV_PP_V2_SSL) {
+ struct tlv_ssl *tlv;
+ int ssl_tlv_len = 0;
+
+ if ((buf_len - ret) < sizeof(struct tlv_ssl))
+ return 0;
+ tlv = (struct tlv_ssl *)&buf[ret];
+ memset(tlv, 0, sizeof(struct tlv_ssl));
+ ssl_tlv_len += sizeof(struct tlv_ssl);
+ tlv->tlv.type = PP2_TYPE_SSL;
+ if (conn_is_ssl(remote)) {
+ tlv->client |= PP2_CLIENT_SSL;
+ value = ssl_sock_get_proto_version(remote);
+ if (value) {
+ ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len-ret-ssl_tlv_len), PP2_SUBTYPE_SSL_VERSION, strlen(value), value);
+ }
+ if (ssl_sock_get_cert_used_sess(remote)) {
+ tlv->client |= PP2_CLIENT_CERT_SESS;
+ tlv->verify = htonl(ssl_sock_get_verify_result(remote));
+ if (ssl_sock_get_cert_used_conn(remote))
+ tlv->client |= PP2_CLIENT_CERT_CONN;
+ }
+ if (srv->pp_opts & SRV_PP_V2_SSL_CN) {
+ struct buffer *cn_trash = get_trash_chunk();
+ if (ssl_sock_get_remote_common_name(remote, cn_trash) > 0) {
+ ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_CN,
+ cn_trash->data,
+ cn_trash->area);
+ }
+ }
+ if (srv->pp_opts & SRV_PP_V2_SSL_KEY_ALG) {
+ struct buffer *pkey_trash = get_trash_chunk();
+ if (ssl_sock_get_pkey_algo(remote, pkey_trash) > 0) {
+ ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_KEY_ALG,
+ pkey_trash->data,
+ pkey_trash->area);
+ }
+ }
+ if (srv->pp_opts & SRV_PP_V2_SSL_SIG_ALG) {
+ value = ssl_sock_get_cert_sig(remote);
+ if (value) {
+ ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_SIG_ALG, strlen(value), value);
+ }
+ }
+ if (srv->pp_opts & SRV_PP_V2_SSL_CIPHER) {
+ value = ssl_sock_get_cipher_name(remote);
+ if (value) {
+ ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_CIPHER, strlen(value), value);
+ }
+ }
+ }
+ tlv->tlv.length_hi = (uint16_t)(ssl_tlv_len - sizeof(struct tlv)) >> 8;
+ tlv->tlv.length_lo = (uint16_t)(ssl_tlv_len - sizeof(struct tlv)) & 0x00ff;
+ ret += ssl_tlv_len;
+ }
+#endif
+
+#ifdef USE_NS
+ if (remote && (remote->proxy_netns)) {
+ if ((buf_len - ret) < sizeof(struct tlv))
+ return 0;
+ ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_NETNS, remote->proxy_netns->name_len, remote->proxy_netns->node.key);
+ }
+#endif
+
+ hdr->len = htons((uint16_t)(ret - PP2_HEADER_LEN));
+
+ if (tlv_crc32c_p) {
+ write_u32(tlv_crc32c_p, htonl(hash_crc32c(buf, ret)));
+ }
+
+ return ret;
+}
+
+/* Note: <remote> is explicitly allowed to be NULL */
+int make_proxy_line(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm)
+{
+ int ret = 0;
+
+ if (srv && (srv->pp_opts & SRV_PP_V2)) {
+ ret = make_proxy_line_v2(buf, buf_len, srv, remote, strm);
+ }
+ else {
+ const struct sockaddr_storage *src = NULL;
+ const struct sockaddr_storage *dst = NULL;
+
+ if (strm) {
+ src = sc_src(strm->scf);
+ dst = sc_dst(strm->scf);
+ }
+ else if (remote && conn_get_src(remote) && conn_get_dst(remote)) {
+ src = conn_src(remote);
+ dst = conn_dst(remote);
+ }
+
+ if (src && dst)
+ ret = make_proxy_line_v1(buf, buf_len, src, dst);
+ else
+ ret = make_proxy_line_v1(buf, buf_len, NULL, NULL);
+ }
+
+ return ret;
+}
+
+/* returns 0 on success */
+static int cfg_parse_pp2_never_send_local(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(0, args, err, NULL))
+ return -1;
+ pp2_never_send_local = 1;
+ return 0;
+}
+
+/* extracts some info from the connection and appends them to buffer <buf>. The
+ * connection's pointer, its direction, target (fe/be/srv), xprt/ctrl, source
+ * when set, destination when set, are printed in a compact human-readable format
+ * fitting on a single line. This is handy to complete traces or debug output.
+ * It is permitted to pass a NULL conn pointer. The number of characters emitted
+ * is returned. A prefix <pfx> might be prepended before the first field if not
+ * NULL.
+ */
+int conn_append_debug_info(struct buffer *buf, const struct connection *conn, const char *pfx)
+{
+ const struct listener *li;
+ const struct server *sv;
+ const struct proxy *px;
+ char addr[40];
+ int old_len = buf->data;
+
+ if (!conn)
+ return 0;
+
+ chunk_appendf(buf, "%sconn=%p(%s)", pfx ? pfx : "", conn, conn_is_back(conn) ? "OUT" : "IN");
+
+ if ((li = objt_listener(conn->target)))
+ chunk_appendf(buf, " fe=%s", li->bind_conf->frontend->id);
+ else if ((sv = objt_server(conn->target)))
+ chunk_appendf(buf, " sv=%s/%s", sv->proxy->id, sv->id);
+ else if ((px = objt_proxy(conn->target)))
+ chunk_appendf(buf, " be=%s", px->id);
+
+ chunk_appendf(buf, " %s/%s", conn_get_xprt_name(conn), conn_get_ctrl_name(conn));
+
+ if (conn->src && addr_to_str(conn->src, addr, sizeof(addr)))
+ chunk_appendf(buf, " src=%s:%d", addr, get_host_port(conn->src));
+
+ if (conn->dst && addr_to_str(conn->dst, addr, sizeof(addr)))
+ chunk_appendf(buf, " dst=%s:%d", addr, get_host_port(conn->dst));
+
+ return buf->data - old_len;
+}
+
+/* return the major HTTP version as 1 or 2 depending on how the request arrived
+ * before being processed.
+ *
+ * WARNING: Should be updated if a new major HTTP version is added.
+ */
+static int
+smp_fetch_fc_http_major(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn = NULL;
+ const char *mux_name = NULL;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ /* No connection or a connection with a RAW muxx */
+ if (!conn || (conn->mux && !(conn->mux->flags & MX_FL_HTX)))
+ return 0;
+
+ /* No mux install, this may change */
+ if (!conn->mux) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ mux_name = conn_get_mux_name(conn);
+
+ smp->data.type = SMP_T_SINT;
+ if (strcmp(mux_name, "QUIC") == 0)
+ smp->data.u.sint = 3;
+ else if (strcmp(mux_name, "H2") == 0)
+ smp->data.u.sint = 2;
+ else
+ smp->data.u.sint = 1;
+
+ return 1;
+}
+
+/* fetch if the received connection used a PROXY protocol header */
+int smp_fetch_fc_rcvd_proxy(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+
+ conn = objt_conn(smp->sess->origin);
+ if (!conn)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ smp->flags = 0;
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = (conn->flags & CO_FL_RCVD_PROXY) ? 1 : 0;
+
+ return 1;
+}
+
+/* fetch the authority TLV from a PROXY protocol header */
+int smp_fetch_fc_pp_authority(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+
+ conn = objt_conn(smp->sess->origin);
+ if (!conn)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (!isttest(conn->proxy_authority))
+ return 0;
+
+ smp->flags = 0;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = istptr(conn->proxy_authority);
+ smp->data.u.str.data = istlen(conn->proxy_authority);
+
+ return 1;
+}
+
+/* fetch the unique ID TLV from a PROXY protocol header */
+int smp_fetch_fc_pp_unique_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+
+ conn = objt_conn(smp->sess->origin);
+ if (!conn)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (!isttest(conn->proxy_unique_id))
+ return 0;
+
+ smp->flags = 0;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = istptr(conn->proxy_unique_id);
+ smp->data.u.str.data = istlen(conn->proxy_unique_id);
+
+ return 1;
+}
+
+/* fetch the error code of a connection */
+int smp_fetch_fc_err(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ if (!conn)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ smp->flags = 0;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = (unsigned long long int)conn->err_code;
+
+ return 1;
+}
+
+/* fetch a string representation of the error code of a connection */
+int smp_fetch_fc_err_str(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ const char *err_code_str;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ if (!conn)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ err_code_str = conn_err_code_str(conn);
+
+ if (!err_code_str)
+ return 0;
+
+ smp->flags = 0;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = (char*)err_code_str;
+ smp->data.u.str.data = strlen(err_code_str);
+
+ return 1;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Note: fetches that may return multiple types must be declared as the lowest
+ * common denominator, the type that can be casted into all other ones. For
+ * instance v4/v6 must be declared v4.
+ */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "bc_err", smp_fetch_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
+ { "bc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4SRV },
+ { "bc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
+ { "fc_err", smp_fetch_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
+ { "fc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },
+ { "fc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
+ { "fc_rcvd_proxy", smp_fetch_fc_rcvd_proxy, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
+ { "fc_pp_authority", smp_fetch_fc_pp_authority, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },
+ { "fc_pp_unique_id", smp_fetch_fc_pp_unique_id, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "pp2-never-send-local", cfg_parse_pp2_never_send_local },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/* private function to handle sockaddr as input for connection hash */
+static void conn_calculate_hash_sockaddr(const struct sockaddr_storage *ss,
+ char *buf, size_t *idx,
+ enum conn_hash_params_t *hash_flags,
+ enum conn_hash_params_t param_type_addr,
+ enum conn_hash_params_t param_type_port)
+{
+ struct sockaddr_in *addr;
+ struct sockaddr_in6 *addr6;
+
+ switch (ss->ss_family) {
+ case AF_INET:
+ addr = (struct sockaddr_in *)ss;
+
+ conn_hash_update(buf, idx,
+ &addr->sin_addr, sizeof(addr->sin_addr),
+ hash_flags, param_type_addr);
+
+ if (addr->sin_port) {
+ conn_hash_update(buf, idx,
+ &addr->sin_port, sizeof(addr->sin_port),
+ hash_flags, param_type_port);
+ }
+
+ break;
+
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)ss;
+
+ conn_hash_update(buf, idx,
+ &addr6->sin6_addr, sizeof(addr6->sin6_addr),
+ hash_flags, param_type_addr);
+
+ if (addr6->sin6_port) {
+ conn_hash_update(buf, idx,
+ &addr6->sin6_port, sizeof(addr6->sin6_port),
+ hash_flags, param_type_port);
+ }
+
+ break;
+ }
+}
+
+/* Generate the hash of a connection with params as input
+ * Each non-null field of params is taken into account for the hash calcul.
+ */
+uint64_t conn_hash_prehash(char *buf, size_t size)
+{
+ return XXH64(buf, size, 0);
+}
+
+/* Append <data> into <buf> at <idx> offset in preparation for connection hash
+ * calcul. <idx> is incremented beyond data <size>. In the same time, <flags>
+ * are updated with <type> for the hash header.
+ */
+void conn_hash_update(char *buf, size_t *idx,
+ const void *data, size_t size,
+ enum conn_hash_params_t *flags,
+ enum conn_hash_params_t type)
+{
+ memcpy(&buf[*idx], data, size);
+ *idx += size;
+ *flags |= type;
+}
+
+uint64_t conn_hash_digest(char *buf, size_t bufsize,
+ enum conn_hash_params_t flags)
+{
+ const uint64_t flags_u64 = (uint64_t)flags;
+ const uint64_t hash = XXH64(buf, bufsize, 0);
+
+ return (flags_u64 << CONN_HASH_PAYLOAD_LEN) | CONN_HASH_GET_PAYLOAD(hash);
+}
+
+uint64_t conn_calculate_hash(const struct conn_hash_params *params)
+{
+ char *buf;
+ size_t idx = 0;
+ uint64_t hash = 0;
+ enum conn_hash_params_t hash_flags = 0;
+
+ buf = trash.area;
+
+ conn_hash_update(buf, &idx, &params->target, sizeof(params->target), &hash_flags, 0);
+
+ if (params->sni_prehash) {
+ conn_hash_update(buf, &idx,
+ &params->sni_prehash, sizeof(params->sni_prehash),
+ &hash_flags, CONN_HASH_PARAMS_TYPE_SNI);
+ }
+
+ if (params->dst_addr) {
+ conn_calculate_hash_sockaddr(params->dst_addr,
+ buf, &idx, &hash_flags,
+ CONN_HASH_PARAMS_TYPE_DST_ADDR,
+ CONN_HASH_PARAMS_TYPE_DST_PORT);
+ }
+
+ if (params->src_addr) {
+ conn_calculate_hash_sockaddr(params->src_addr,
+ buf, &idx, &hash_flags,
+ CONN_HASH_PARAMS_TYPE_SRC_ADDR,
+ CONN_HASH_PARAMS_TYPE_SRC_PORT);
+ }
+
+ if (params->proxy_prehash) {
+ conn_hash_update(buf, &idx,
+ &params->proxy_prehash, sizeof(params->proxy_prehash),
+ &hash_flags, CONN_HASH_PARAMS_TYPE_PROXY);
+ }
+
+ hash = conn_hash_digest(buf, idx, hash_flags);
+ return hash;
+}
+
+/* Handler of the task of mux_stopping_data.
+ * Called on soft-stop.
+ */
+static struct task *mux_stopping_process(struct task *t, void *ctx, unsigned int state)
+{
+ struct connection *conn, *back;
+
+ list_for_each_entry_safe(conn, back, &mux_stopping_data[tid].list, stopping_list) {
+ if (conn->mux && conn->mux->wake)
+ conn->mux->wake(conn);
+ }
+
+ return t;
+}
+
+static int allocate_mux_cleanup(void)
+{
+ /* allocates the thread bound mux_stopping_data task */
+ mux_stopping_data[tid].task = task_new_here();
+ if (!mux_stopping_data[tid].task) {
+ ha_alert("Failed to allocate the task for connection cleanup on thread %d.\n", tid);
+ return 0;
+ }
+
+ mux_stopping_data[tid].task->process = mux_stopping_process;
+ LIST_INIT(&mux_stopping_data[tid].list);
+
+ return 1;
+}
+REGISTER_PER_THREAD_ALLOC(allocate_mux_cleanup);
+
+static int deallocate_mux_cleanup(void)
+{
+ task_destroy(mux_stopping_data[tid].task);
+ return 1;
+}
+REGISTER_PER_THREAD_FREE(deallocate_mux_cleanup);
+
+static void deinit_idle_conns(void)
+{
+ int i;
+
+ for (i = 0; i < global.nbthread; i++) {
+ if (idle_conns[i].cleanup_task)
+ task_destroy(idle_conns[i].cleanup_task);
+ }
+}
+REGISTER_POST_DEINIT(deinit_idle_conns);
diff --git a/src/cpuset.c b/src/cpuset.c
new file mode 100644
index 0000000..f7b6602
--- /dev/null
+++ b/src/cpuset.c
@@ -0,0 +1,120 @@
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <haproxy/compat.h>
+#include <haproxy/cpuset.h>
+#include <haproxy/intops.h>
+
+struct cpu_map cpu_map;
+
+void ha_cpuset_zero(struct hap_cpuset *set)
+{
+#if defined(CPUSET_USE_CPUSET) || defined(CPUSET_USE_FREEBSD_CPUSET)
+ CPU_ZERO(&set->cpuset);
+
+#elif defined(CPUSET_USE_ULONG)
+ set->cpuset = 0;
+#endif
+}
+
+int ha_cpuset_set(struct hap_cpuset *set, int cpu)
+{
+ if (cpu >= ha_cpuset_size())
+ return 1;
+
+#if defined(CPUSET_USE_CPUSET) || defined(CPUSET_USE_FREEBSD_CPUSET)
+ CPU_SET(cpu, &set->cpuset);
+ return 0;
+
+#elif defined(CPUSET_USE_ULONG)
+ set->cpuset |= (0x1 << cpu);
+ return 0;
+#endif
+}
+
+int ha_cpuset_clr(struct hap_cpuset *set, int cpu)
+{
+ if (cpu >= ha_cpuset_size())
+ return 1;
+
+#if defined(CPUSET_USE_CPUSET) || defined(CPUSET_USE_FREEBSD_CPUSET)
+ CPU_CLR(cpu, &set->cpuset);
+ return 0;
+
+#elif defined(CPUSET_USE_ULONG)
+ set->cpuset &= ~(0x1 << cpu);
+ return 0;
+#endif
+}
+
+void ha_cpuset_and(struct hap_cpuset *dst, struct hap_cpuset *src)
+{
+#if defined(CPUSET_USE_CPUSET)
+ CPU_AND(&dst->cpuset, &dst->cpuset, &src->cpuset);
+
+#elif defined(CPUSET_USE_FREEBSD_CPUSET)
+ CPU_AND(&dst->cpuset, &src->cpuset);
+
+#elif defined(CPUSET_USE_ULONG)
+ dst->cpuset &= src->cpuset;
+#endif
+}
+
+int ha_cpuset_count(const struct hap_cpuset *set)
+{
+#if defined(CPUSET_USE_CPUSET) || defined(CPUSET_USE_FREEBSD_CPUSET)
+ return CPU_COUNT(&set->cpuset);
+
+#elif defined(CPUSET_USE_ULONG)
+ return my_popcountl(set->cpuset);
+#endif
+}
+
+int ha_cpuset_ffs(const struct hap_cpuset *set)
+{
+#if defined(CPUSET_USE_CPUSET)
+ int n;
+
+ if (!CPU_COUNT(&set->cpuset))
+ return 0;
+
+ for (n = 0; !CPU_ISSET(n, &set->cpuset); ++n)
+ ;
+
+ return n + 1;
+
+#elif defined(CPUSET_USE_FREEBSD_CPUSET)
+ return CPU_FFS(&set->cpuset);
+
+#elif defined(CPUSET_USE_ULONG)
+ if (!set->cpuset)
+ return 0;
+
+ return my_ffsl(set->cpuset);
+#endif
+}
+
+void ha_cpuset_assign(struct hap_cpuset *dst, struct hap_cpuset *src)
+{
+#if defined(CPUSET_USE_CPUSET)
+ CPU_ZERO(&dst->cpuset);
+ CPU_OR(&dst->cpuset, &dst->cpuset, &src->cpuset);
+
+#elif defined(CPUSET_USE_FREEBSD_CPUSET)
+ CPU_COPY(&src->cpuset, &dst->cpuset);
+
+#elif defined(CPUSET_USE_ULONG)
+ dst->cpuset = src->cpuset;
+#endif
+}
+
+int ha_cpuset_size()
+{
+#if defined(CPUSET_USE_CPUSET) || defined(CPUSET_USE_FREEBSD_CPUSET)
+ return CPU_SETSIZE;
+
+#elif defined(CPUSET_USE_ULONG)
+ return LONGBITS;
+
+#endif
+}
diff --git a/src/debug.c b/src/debug.c
new file mode 100644
index 0000000..6d510e9
--- /dev/null
+++ b/src/debug.c
@@ -0,0 +1,1467 @@
+/*
+ * Process debugging functions.
+ *
+ * Copyright 2000-2019 Willy Tarreau <willy@haproxy.org>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <syslog.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#ifdef USE_EPOLL
+#include <sys/epoll.h>
+#endif
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/buf.h>
+#include <haproxy/cli.h>
+#include <haproxy/clock.h>
+#include <haproxy/debug.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/hlua.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/log.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+#include <haproxy/task.h>
+#include <haproxy/thread.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <import/ist.h>
+
+
+/* mask of threads still having to dump, used to respect ordering. Only used
+ * when USE_THREAD_DUMP is set.
+ */
+volatile unsigned long threads_to_dump = 0;
+unsigned int panic_started = 0;
+unsigned int debug_commands_issued = 0;
+
+/* dumps a backtrace of the current thread that is appended to buffer <buf>.
+ * Lines are prefixed with the string <prefix> which may be empty (used for
+ * indenting). It is recommended to use this at a function's tail so that
+ * the function does not appear in the call stack. The <dump> argument
+ * indicates what dump state to start from, and should usually be zero. It
+ * may be among the following values:
+ * - 0: search usual callers before step 1, or directly jump to 2
+ * - 1: skip usual callers before step 2
+ * - 2: dump until polling loop, scheduler, or main() (excluded)
+ * - 3: end
+ * - 4-7: like 0 but stops *after* main.
+ */
+void ha_dump_backtrace(struct buffer *buf, const char *prefix, int dump)
+{
+ struct buffer bak;
+ char pfx2[100];
+ void *callers[100];
+ int j, nptrs;
+ const void *addr;
+
+ nptrs = my_backtrace(callers, sizeof(callers)/sizeof(*callers));
+ if (!nptrs)
+ return;
+
+ if (snprintf(pfx2, sizeof(pfx2), "%s| ", prefix) > sizeof(pfx2))
+ pfx2[0] = 0;
+
+ /* The call backtrace_symbols_fd(callers, nptrs, STDOUT_FILENO would
+ * produce similar output to the following:
+ */
+ chunk_appendf(buf, "%scall trace(%d):\n", prefix, nptrs);
+ for (j = 0; (j < nptrs || (dump & 3) < 2); j++) {
+ if (j == nptrs && !(dump & 3)) {
+ /* we failed to spot the starting point of the
+ * dump, let's start over dumping everything we
+ * have.
+ */
+ dump += 2;
+ j = 0;
+ }
+ bak = *buf;
+ dump_addr_and_bytes(buf, pfx2, callers[j], 8);
+ addr = resolve_sym_name(buf, ": ", callers[j]);
+ if ((dump & 3) == 0) {
+ /* dump not started, will start *after*
+ * ha_thread_dump_all_to_trash, ha_panic and ha_backtrace_to_stderr
+ */
+ if (addr == ha_thread_dump_all_to_trash || addr == ha_panic ||
+ addr == ha_backtrace_to_stderr)
+ dump++;
+ *buf = bak;
+ continue;
+ }
+
+ if ((dump & 3) == 1) {
+ /* starting */
+ if (addr == ha_thread_dump_all_to_trash || addr == ha_panic ||
+ addr == ha_backtrace_to_stderr) {
+ *buf = bak;
+ continue;
+ }
+ dump++;
+ }
+
+ if ((dump & 3) == 2) {
+ /* still dumping */
+ if (dump == 6) {
+ /* we only stop *after* main and we must send the LF */
+ if (addr == main) {
+ j = nptrs;
+ dump++;
+ }
+ }
+ else if (addr == run_poll_loop || addr == main || addr == run_tasks_from_lists) {
+ dump++;
+ *buf = bak;
+ break;
+ }
+ }
+ /* OK, line dumped */
+ chunk_appendf(buf, "\n");
+ }
+}
+
+/* dump a backtrace of current thread's stack to stderr. */
+void ha_backtrace_to_stderr(void)
+{
+ char area[2048];
+ struct buffer b = b_make(area, sizeof(area), 0, 0);
+
+ ha_dump_backtrace(&b, " ", 4);
+ if (b.data)
+ DISGUISE(write(2, b.area, b.data));
+}
+
+/* Dumps to the buffer some known information for the desired thread, and
+ * optionally extra info for the current thread. The dump will be appended to
+ * the buffer, so the caller is responsible for preliminary initializing it.
+ * The calling thread ID needs to be passed in <calling_tid> to display a star
+ * in front of the calling thread's line (usually it's tid). Any stuck thread
+ * is also prefixed with a '>'.
+ */
+void ha_thread_dump(struct buffer *buf, int thr, int calling_tid)
+{
+ unsigned long thr_bit = 1UL << thr;
+ unsigned long long p = ha_thread_ctx[thr].prev_cpu_time;
+ unsigned long long n = now_cpu_time_thread(thr);
+ int stuck = !!(ha_thread_ctx[thr].flags & TH_FL_STUCK);
+
+ chunk_appendf(buf,
+ "%c%cThread %-2u: id=0x%llx act=%d glob=%d wq=%d rq=%d tl=%d tlsz=%d rqsz=%d\n"
+ " %2u/%-2u stuck=%d prof=%d",
+ (thr == calling_tid) ? '*' : ' ', stuck ? '>' : ' ', thr + 1,
+ ha_get_pthread_id(thr),
+ thread_has_tasks(),
+ !!(global_tasks_mask & thr_bit),
+ !eb_is_empty(&ha_thread_ctx[thr].timers),
+ !eb_is_empty(&ha_thread_ctx[thr].rqueue),
+ !(LIST_ISEMPTY(&ha_thread_ctx[thr].tasklets[TL_URGENT]) &&
+ LIST_ISEMPTY(&ha_thread_ctx[thr].tasklets[TL_NORMAL]) &&
+ LIST_ISEMPTY(&ha_thread_ctx[thr].tasklets[TL_BULK]) &&
+ MT_LIST_ISEMPTY(&ha_thread_ctx[thr].shared_tasklet_list)),
+ ha_thread_ctx[thr].tasks_in_list,
+ ha_thread_ctx[thr].rq_total,
+ ha_thread_info[thr].tg->tgid, ha_thread_info[thr].ltid + 1,
+ stuck,
+ !!(task_profiling_mask & thr_bit));
+
+ chunk_appendf(buf,
+ " harmless=%d wantrdv=%d",
+ !!(threads_harmless_mask & thr_bit),
+ !!(threads_want_rdv_mask & thr_bit));
+
+ chunk_appendf(buf, "\n");
+ chunk_appendf(buf, " cpu_ns: poll=%llu now=%llu diff=%llu\n", p, n, n-p);
+
+ /* this is the end of what we can dump from outside the current thread */
+
+ if (thr != tid)
+ return;
+
+ chunk_appendf(buf, " curr_task=");
+ ha_task_dump(buf, th_ctx->current, " ");
+
+ if (stuck) {
+ /* We only emit the backtrace for stuck threads in order not to
+ * waste precious output buffer space with non-interesting data.
+ * Please leave this as the last instruction in this function
+ * so that the compiler uses tail merging and the current
+ * function does not appear in the stack.
+ */
+ ha_dump_backtrace(buf, " ", 0);
+ }
+}
+
+
+/* dumps into the buffer some information related to task <task> (which may
+ * either be a task or a tasklet, and prepend each line except the first one
+ * with <pfx>. The buffer is only appended and the first output starts by the
+ * pointer itself. The caller is responsible for making sure the task is not
+ * going to vanish during the dump.
+ */
+void ha_task_dump(struct buffer *buf, const struct task *task, const char *pfx)
+{
+ const struct stream *s = NULL;
+ const struct appctx __maybe_unused *appctx = NULL;
+ struct hlua __maybe_unused *hlua = NULL;
+ const struct stconn *sc;
+
+ if (!task) {
+ chunk_appendf(buf, "0\n");
+ return;
+ }
+
+ if (TASK_IS_TASKLET(task))
+ chunk_appendf(buf,
+ "%p (tasklet) calls=%u\n",
+ task,
+ task->calls);
+ else
+ chunk_appendf(buf,
+ "%p (task) calls=%u last=%llu%s\n",
+ task,
+ task->calls,
+ task->wake_date ? (unsigned long long)(now_mono_time() - task->wake_date) : 0,
+ task->wake_date ? " ns ago" : "");
+
+ chunk_appendf(buf, "%s fct=%p(", pfx, task->process);
+ resolve_sym_name(buf, NULL, task->process);
+ chunk_appendf(buf,") ctx=%p", task->context);
+
+ if (task->process == task_run_applet && (appctx = task->context))
+ chunk_appendf(buf, "(%s)\n", appctx->applet->name);
+ else
+ chunk_appendf(buf, "\n");
+
+ if (task->process == process_stream && task->context)
+ s = (struct stream *)task->context;
+ else if (task->process == task_run_applet && task->context && (sc = appctx_sc((struct appctx *)task->context)))
+ s = sc_strm(sc);
+ else if (task->process == sc_conn_io_cb && task->context)
+ s = sc_strm(((struct stconn *)task->context));
+
+ if (s)
+ stream_dump(buf, s, pfx, '\n');
+
+#ifdef USE_LUA
+ hlua = NULL;
+ if (s && (hlua = s->hlua)) {
+ chunk_appendf(buf, "%sCurrent executing Lua from a stream analyser -- ", pfx);
+ }
+ else if (task->process == hlua_process_task && (hlua = task->context)) {
+ chunk_appendf(buf, "%sCurrent executing a Lua task -- ", pfx);
+ }
+ else if (task->process == task_run_applet && (appctx = task->context) &&
+ (appctx->applet->fct == hlua_applet_tcp_fct)) {
+ chunk_appendf(buf, "%sCurrent executing a Lua TCP service -- ", pfx);
+ }
+ else if (task->process == task_run_applet && (appctx = task->context) &&
+ (appctx->applet->fct == hlua_applet_http_fct)) {
+ chunk_appendf(buf, "%sCurrent executing a Lua HTTP service -- ", pfx);
+ }
+
+ if (hlua && hlua->T) {
+ chunk_appendf(buf, "stack traceback:\n ");
+ append_prefixed_str(buf, hlua_traceback(hlua->T, "\n "), pfx, '\n', 0);
+ b_putchr(buf, '\n');
+ }
+ else
+ b_putchr(buf, '\n');
+#endif
+}
+
+/* This function dumps all profiling settings. It returns 0 if the output
+ * buffer is full and it needs to be called again, otherwise non-zero.
+ */
+static int cli_io_handler_show_threads(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ int thr;
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ return 1;
+
+ if (appctx->st0)
+ thr = appctx->st1;
+ else
+ thr = 0;
+
+ chunk_reset(&trash);
+ ha_thread_dump_all_to_trash();
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* failed, try again */
+ appctx->st1 = thr;
+ return 0;
+ }
+ return 1;
+}
+
+#if defined(HA_HAVE_DUMP_LIBS)
+/* parse a "show libs" command. It returns 1 if it emits anything otherwise zero. */
+static int debug_parse_cli_show_libs(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ chunk_reset(&trash);
+ if (dump_libs(&trash, 1))
+ return cli_msg(appctx, LOG_INFO, trash.area);
+ else
+ return 0;
+}
+#endif
+
+/* dumps a state of all threads into the trash and on fd #2, then aborts. */
+void ha_panic()
+{
+ if (HA_ATOMIC_FETCH_ADD(&panic_started, 1) != 0) {
+ /* a panic dump is already in progress, let's not disturb it,
+ * we'll be called via signal DEBUGSIG. By returning we may be
+ * able to leave a current signal handler (e.g. WDT) so that
+ * this will ensure more reliable signal delivery.
+ */
+ return;
+ }
+ chunk_reset(&trash);
+ chunk_appendf(&trash, "Thread %u is about to kill the process.\n", tid + 1);
+ ha_thread_dump_all_to_trash();
+ DISGUISE(write(2, trash.area, trash.data));
+ for (;;)
+ abort();
+}
+
+/* Complain with message <msg> on stderr. If <counter> is not NULL, it is
+ * atomically incremented, and the message is only printed when the counter
+ * was zero, so that the message is only printed once. <taint> is only checked
+ * on bit 1, and will taint the process either for a bug (2) or warn (0).
+ */
+void complain(int *counter, const char *msg, int taint)
+{
+ if (counter && _HA_ATOMIC_FETCH_ADD(counter, 1))
+ return;
+ DISGUISE(write(2, msg, strlen(msg)));
+ if (taint & 2)
+ mark_tainted(TAINTED_BUG);
+ else
+ mark_tainted(TAINTED_WARN);
+}
+
+/* parse a "debug dev exit" command. It always returns 1, though it should never return. */
+static int debug_parse_cli_exit(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int code = atoi(args[3]);
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ exit(code);
+ return 1;
+}
+
+/* parse a "debug dev bug" command. It always returns 1, though it should never return.
+ * Note: we make sure not to make the function static so that it appears in the trace.
+ */
+int debug_parse_cli_bug(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ BUG_ON(one > zero);
+ return 1;
+}
+
+/* parse a "debug dev warn" command. It always returns 1.
+ * Note: we make sure not to make the function static so that it appears in the trace.
+ */
+int debug_parse_cli_warn(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ WARN_ON(one > zero);
+ return 1;
+}
+
+/* parse a "debug dev check" command. It always returns 1.
+ * Note: we make sure not to make the function static so that it appears in the trace.
+ */
+int debug_parse_cli_check(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ CHECK_IF(one > zero);
+ return 1;
+}
+
+/* parse a "debug dev close" command. It always returns 1. */
+static int debug_parse_cli_close(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int fd;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "Missing file descriptor number.\n");
+
+ fd = atoi(args[3]);
+ if (fd < 0 || fd >= global.maxsock)
+ return cli_err(appctx, "File descriptor out of range.\n");
+
+ if (!fdtab[fd].owner)
+ return cli_msg(appctx, LOG_INFO, "File descriptor was already closed.\n");
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ fd_delete(fd);
+ return 1;
+}
+
+/* parse a "debug dev delay" command. It always returns 1. */
+static int debug_parse_cli_delay(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int delay = atoi(args[3]);
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ usleep((long)delay * 1000);
+ return 1;
+}
+
+/* parse a "debug dev log" command. It always returns 1. */
+static int debug_parse_cli_log(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int arg;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ chunk_reset(&trash);
+ for (arg = 3; *args[arg]; arg++) {
+ if (arg > 3)
+ chunk_strcat(&trash, " ");
+ chunk_strcat(&trash, args[arg]);
+ }
+
+ send_log(NULL, LOG_INFO, "%s\n", trash.area);
+ return 1;
+}
+
+/* parse a "debug dev loop" command. It always returns 1. */
+static int debug_parse_cli_loop(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct timeval deadline, curr;
+ int loop = atoi(args[3]);
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ gettimeofday(&curr, NULL);
+ tv_ms_add(&deadline, &curr, loop);
+
+ while (tv_ms_cmp(&curr, &deadline) < 0)
+ gettimeofday(&curr, NULL);
+
+ return 1;
+}
+
+/* parse a "debug dev panic" command. It always returns 1, though it should never return. */
+static int debug_parse_cli_panic(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ ha_panic();
+ return 1;
+}
+
+/* parse a "debug dev exec" command. It always returns 1. */
+#if defined(DEBUG_DEV)
+static int debug_parse_cli_exec(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int pipefd[2];
+ int arg;
+ int pid;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ chunk_reset(&trash);
+ for (arg = 3; *args[arg]; arg++) {
+ if (arg > 3)
+ chunk_strcat(&trash, " ");
+ chunk_strcat(&trash, args[arg]);
+ }
+
+ thread_isolate();
+ if (pipe(pipefd) < 0)
+ goto fail_pipe;
+
+ if (fd_set_cloexec(pipefd[0]) == -1)
+ goto fail_fcntl;
+
+ if (fd_set_cloexec(pipefd[1]) == -1)
+ goto fail_fcntl;
+
+ pid = fork();
+
+ if (pid < 0)
+ goto fail_fork;
+ else if (pid == 0) {
+ /* child */
+ char *cmd[4] = { "/bin/sh", "-c", 0, 0 };
+
+ close(0);
+ dup2(pipefd[1], 1);
+ dup2(pipefd[1], 2);
+
+ cmd[2] = trash.area;
+ execvp(cmd[0], cmd);
+ printf("execvp() failed\n");
+ exit(1);
+ }
+
+ /* parent */
+ thread_release();
+ close(pipefd[1]);
+ chunk_reset(&trash);
+ while (1) {
+ size_t ret = read(pipefd[0], trash.area + trash.data, trash.size - 20 - trash.data);
+ if (ret <= 0)
+ break;
+ trash.data += ret;
+ if (trash.data + 20 == trash.size) {
+ chunk_strcat(&trash, "\n[[[TRUNCATED]]]\n");
+ break;
+ }
+ }
+ close(pipefd[0]);
+ waitpid(pid, NULL, WNOHANG);
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_INFO, trash.area);
+
+ fail_fork:
+ fail_fcntl:
+ close(pipefd[0]);
+ close(pipefd[1]);
+ fail_pipe:
+ thread_release();
+ return cli_err(appctx, "Failed to execute command.\n");
+}
+#endif
+
+/* parse a "debug dev hex" command. It always returns 1. */
+static int debug_parse_cli_hex(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ unsigned long start, len;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "Missing memory address to dump from.\n");
+
+ start = strtoul(args[3], NULL, 0);
+ if (!start)
+ return cli_err(appctx, "Will not dump from NULL address.\n");
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+
+ /* by default, dump ~128 till next block of 16 */
+ len = strtoul(args[4], NULL, 0);
+ if (!len)
+ len = ((start + 128) & -16) - start;
+
+ chunk_reset(&trash);
+ dump_hex(&trash, " ", (const void *)start, len, 1);
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_INFO, trash.area);
+}
+
+/* parse a "debug dev sym <addr>" command. It always returns 1. */
+static int debug_parse_cli_sym(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ unsigned long addr;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "Missing memory address to be resolved.\n");
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+
+ addr = strtoul(args[3], NULL, 0);
+ chunk_printf(&trash, "%#lx resolves to ", addr);
+ resolve_sym_name(&trash, NULL, (const void *)addr);
+ chunk_appendf(&trash, "\n");
+
+ return cli_msg(appctx, LOG_INFO, trash.area);
+}
+
+/* parse a "debug dev tkill" command. It always returns 1. */
+static int debug_parse_cli_tkill(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int thr = 0;
+ int sig = SIGABRT;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (*args[3])
+ thr = atoi(args[3]);
+
+ if (thr < 0 || thr > global.nbthread)
+ return cli_err(appctx, "Thread number out of range (use 0 for current).\n");
+
+ if (*args[4])
+ sig = atoi(args[4]);
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ if (thr)
+ ha_tkill(thr - 1, sig);
+ else
+ raise(sig);
+ return 1;
+}
+
+/* parse a "debug dev write" command. It always returns 1. */
+static int debug_parse_cli_write(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ unsigned long len;
+
+ if (!*args[3])
+ return cli_err(appctx, "Missing output size.\n");
+
+ len = strtoul(args[3], NULL, 0);
+ if (len >= trash.size)
+ return cli_err(appctx, "Output too large, must be <tune.bufsize.\n");
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+
+ chunk_reset(&trash);
+ trash.data = len;
+ memset(trash.area, '.', trash.data);
+ trash.area[trash.data] = 0;
+ for (len = 64; len < trash.data; len += 64)
+ trash.area[len] = '\n';
+ return cli_msg(appctx, LOG_INFO, trash.area);
+}
+
+/* parse a "debug dev stream" command */
+/*
+ * debug dev stream [strm=<ptr>] [strm.f[{+-=}<flags>]] [txn.f[{+-=}<flags>]] \
+ * [req.f[{+-=}<flags>]] [res.f[{+-=}<flags>]] \
+ * [sif.f[{+-=<flags>]] [sib.f[{+-=<flags>]] \
+ * [sif.s[=<state>]] [sib.s[=<state>]]
+ */
+static int debug_parse_cli_stream(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct stream *s = appctx_strm(appctx);
+ int arg;
+ void *ptr;
+ int size;
+ const char *word, *end;
+ struct ist name;
+ char *msg = NULL;
+ char *endarg;
+ unsigned long long old, new;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ ptr = NULL; size = 0;
+
+ if (!*args[3]) {
+ return cli_err(appctx,
+ "Usage: debug dev stream { <obj> <op> <value> | wake }*\n"
+ " <obj> = {strm | strm.f | strm.x | scf.s | scb.s |\n"
+ " txn.f | req.f | req.r | req.w | res.f | res.r | res.w}\n"
+ " <op> = {'' (show) | '=' (assign) | '^' (xor) | '+' (or) | '-' (andnot)}\n"
+ " <value> = 'now' | 64-bit dec/hex integer (0x prefix supported)\n"
+ " 'wake' wakes the stream asssigned to 'strm' (default: current)\n"
+ );
+ }
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ for (arg = 3; *args[arg]; arg++) {
+ old = 0;
+ end = word = args[arg];
+ while (*end && *end != '=' && *end != '^' && *end != '+' && *end != '-')
+ end++;
+ name = ist2(word, end - word);
+ if (isteq(name, ist("strm"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s; size = sizeof(s);
+ } else if (isteq(name, ist("strm.f"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->flags; size = sizeof(s->flags);
+ } else if (isteq(name, ist("strm.x"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->conn_exp; size = sizeof(s->conn_exp);
+ } else if (isteq(name, ist("txn.f"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->txn->flags; size = sizeof(s->txn->flags);
+ } else if (isteq(name, ist("req.f"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->req.flags; size = sizeof(s->req.flags);
+ } else if (isteq(name, ist("res.f"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->res.flags; size = sizeof(s->res.flags);
+ } else if (isteq(name, ist("req.r"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->req.rex; size = sizeof(s->req.rex);
+ } else if (isteq(name, ist("res.r"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->res.rex; size = sizeof(s->res.rex);
+ } else if (isteq(name, ist("req.w"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->req.wex; size = sizeof(s->req.wex);
+ } else if (isteq(name, ist("res.w"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->res.wex; size = sizeof(s->res.wex);
+ } else if (isteq(name, ist("scf.s"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->scf->state; size = sizeof(s->scf->state);
+ } else if (isteq(name, ist("scb.s"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->scf->state; size = sizeof(s->scb->state);
+ } else if (isteq(name, ist("wake"))) {
+ if (s && may_access(s) && may_access((void *)s + sizeof(*s) - 1))
+ task_wakeup(s->task, TASK_WOKEN_TIMER|TASK_WOKEN_IO|TASK_WOKEN_MSG);
+ continue;
+ } else
+ return cli_dynerr(appctx, memprintf(&msg, "Unsupported field name: '%s'.\n", word));
+
+ /* read previous value */
+ if ((s || ptr == &s) && ptr && may_access(ptr) && may_access(ptr + size - 1)) {
+ if (size == 8)
+ old = read_u64(ptr);
+ else if (size == 4)
+ old = read_u32(ptr);
+ else if (size == 2)
+ old = read_u16(ptr);
+ else
+ old = *(const uint8_t *)ptr;
+ } else {
+ memprintf(&msg,
+ "%sSkipping inaccessible pointer %p for field '%.*s'.\n",
+ msg ? msg : "", ptr, (int)(end - word), word);
+ continue;
+ }
+
+ /* parse the new value . */
+ new = strtoll(end + 1, &endarg, 0);
+ if (end[1] && *endarg) {
+ if (strcmp(end + 1, "now") == 0)
+ new = now_ms;
+ else {
+ memprintf(&msg,
+ "%sIgnoring unparsable value '%s' for field '%.*s'.\n",
+ msg ? msg : "", end + 1, (int)(end - word), word);
+ continue;
+ }
+ }
+
+ switch (*end) {
+ case '\0': /* show */
+ memprintf(&msg, "%s%.*s=%#llx ", msg ? msg : "", (int)(end - word), word, old);
+ new = old; // do not change the value
+ break;
+
+ case '=': /* set */
+ break;
+
+ case '^': /* XOR */
+ new = old ^ new;
+ break;
+
+ case '+': /* OR */
+ new = old | new;
+ break;
+
+ case '-': /* AND NOT */
+ new = old & ~new;
+ break;
+
+ default:
+ break;
+ }
+
+ /* write the new value */
+ if (new != old) {
+ if (size == 8)
+ write_u64(ptr, new);
+ else if (size == 4)
+ write_u32(ptr, new);
+ else if (size == 2)
+ write_u16(ptr, new);
+ else
+ *(uint8_t *)ptr = new;
+ }
+ }
+
+ if (msg && *msg)
+ return cli_dynmsg(appctx, LOG_INFO, msg);
+ return 1;
+}
+
+static struct task *debug_task_handler(struct task *t, void *ctx, unsigned int state)
+{
+ unsigned long *tctx = ctx; // [0] = #tasks, [1] = inter, [2+] = { tl | (tsk+1) }
+ unsigned long inter = tctx[1];
+ unsigned long rnd;
+
+ t->expire = tick_add(now_ms, inter);
+
+ /* half of the calls will wake up another entry */
+ rnd = statistical_prng();
+ if (rnd & 1) {
+ rnd >>= 1;
+ rnd %= tctx[0];
+ rnd = tctx[rnd + 2];
+
+ if (rnd & 1)
+ task_wakeup((struct task *)(rnd - 1), TASK_WOKEN_MSG);
+ else
+ tasklet_wakeup((struct tasklet *)rnd);
+ }
+ return t;
+}
+
+static struct task *debug_tasklet_handler(struct task *t, void *ctx, unsigned int state)
+{
+ unsigned long *tctx = ctx; // [0] = #tasks, [1] = inter, [2+] = { tl | (tsk+1) }
+ unsigned long rnd;
+ int i;
+
+ /* wake up two random entries */
+ for (i = 0; i < 2; i++) {
+ rnd = statistical_prng() % tctx[0];
+ rnd = tctx[rnd + 2];
+
+ if (rnd & 1)
+ task_wakeup((struct task *)(rnd - 1), TASK_WOKEN_MSG);
+ else
+ tasklet_wakeup((struct tasklet *)rnd);
+ }
+ return t;
+}
+
+/* parse a "debug dev sched" command
+ * debug dev sched {task|tasklet} [count=<count>] [mask=<mask>] [single=<single>] [inter=<inter>]
+ */
+static int debug_parse_cli_sched(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int arg;
+ void *ptr;
+ int size;
+ const char *word, *end;
+ struct ist name;
+ char *msg = NULL;
+ char *endarg;
+ unsigned long long new;
+ unsigned long count = 0;
+ unsigned long thrid = 0;
+ unsigned int inter = 0;
+ unsigned long mask, tmask;
+ unsigned long i;
+ int mode = 0; // 0 = tasklet; 1 = task
+ int single = 0;
+ unsigned long *tctx; // [0] = #tasks, [1] = inter, [2+] = { tl | (tsk+1) }
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ ptr = NULL; size = 0;
+ mask = all_threads_mask;
+
+ if (strcmp(args[3], "task") != 0 && strcmp(args[3], "tasklet") != 0) {
+ return cli_err(appctx,
+ "Usage: debug dev sched {task|tasklet} { <obj> = <value> }*\n"
+ " <obj> = {count | mask | inter | single }\n"
+ " <value> = 64-bit dec/hex integer (0x prefix supported)\n"
+ );
+ }
+
+ mode = strcmp(args[3], "task") == 0;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ for (arg = 4; *args[arg]; arg++) {
+ end = word = args[arg];
+ while (*end && *end != '=' && *end != '^' && *end != '+' && *end != '-')
+ end++;
+ name = ist2(word, end - word);
+ if (isteq(name, ist("count"))) {
+ ptr = &count; size = sizeof(count);
+ } else if (isteq(name, ist("mask"))) {
+ ptr = &mask; size = sizeof(mask);
+ } else if (isteq(name, ist("tid"))) {
+ ptr = &thrid; size = sizeof(thrid);
+ } else if (isteq(name, ist("inter"))) {
+ ptr = &inter; size = sizeof(inter);
+ } else if (isteq(name, ist("single"))) {
+ ptr = &single; size = sizeof(single);
+ } else
+ return cli_dynerr(appctx, memprintf(&msg, "Unsupported setting: '%s'.\n", word));
+
+ /* parse the new value . */
+ new = strtoll(end + 1, &endarg, 0);
+ if (end[1] && *endarg) {
+ memprintf(&msg,
+ "%sIgnoring unparsable value '%s' for field '%.*s'.\n",
+ msg ? msg : "", end + 1, (int)(end - word), word);
+ continue;
+ }
+
+ /* write the new value */
+ if (size == 8)
+ write_u64(ptr, new);
+ else if (size == 4)
+ write_u32(ptr, new);
+ else if (size == 2)
+ write_u16(ptr, new);
+ else
+ *(uint8_t *)ptr = new;
+ }
+
+ tctx = calloc(sizeof(*tctx), count + 2);
+ if (!tctx)
+ goto fail;
+
+ tctx[0] = (unsigned long)count;
+ tctx[1] = (unsigned long)inter;
+
+ mask &= all_threads_mask;
+ if (!mask)
+ mask = tid_bit;
+
+ tmask = 0;
+ for (i = 0; i < count; i++) {
+ if (single || mode == 0) {
+ /* look for next bit matching a bit in mask or loop back to zero */
+ for (tmask <<= 1; !(mask & tmask); ) {
+ if (!(mask & -tmask))
+ tmask = 1;
+ else
+ tmask <<= 1;
+ }
+ } else {
+ /* multi-threaded task */
+ tmask = mask;
+ }
+
+ /* now, if poly or mask was set, tmask corresponds to the
+ * valid thread mask to use, otherwise it remains zero.
+ */
+ //printf("%lu: mode=%d mask=%#lx\n", i, mode, tmask);
+ if (mode == 0) {
+ struct tasklet *tl = tasklet_new();
+
+ if (!tl)
+ goto fail;
+
+ if (tmask)
+ tl->tid = my_ffsl(tmask) - 1;
+ tl->process = debug_tasklet_handler;
+ tl->context = tctx;
+ tctx[i + 2] = (unsigned long)tl;
+ } else {
+ struct task *task = task_new(tmask ? tmask : tid_bit);
+
+ if (!task)
+ goto fail;
+
+ task->process = debug_task_handler;
+ task->context = tctx;
+ tctx[i + 2] = (unsigned long)task + 1;
+ }
+ }
+
+ /* start the tasks and tasklets */
+ for (i = 0; i < count; i++) {
+ unsigned long ctx = tctx[i + 2];
+
+ if (ctx & 1)
+ task_wakeup((struct task *)(ctx - 1), TASK_WOKEN_INIT);
+ else
+ tasklet_wakeup((struct tasklet *)ctx);
+ }
+
+ if (msg && *msg)
+ return cli_dynmsg(appctx, LOG_INFO, msg);
+ return 1;
+
+ fail:
+ /* free partially allocated entries */
+ for (i = 0; tctx && i < count; i++) {
+ unsigned long ctx = tctx[i + 2];
+
+ if (!ctx)
+ break;
+
+ if (ctx & 1)
+ task_destroy((struct task *)(ctx - 1));
+ else
+ tasklet_free((struct tasklet *)ctx);
+ }
+
+ free(tctx);
+ return cli_err(appctx, "Not enough memory");
+}
+
+/* CLI state for "debug dev fd" */
+struct dev_fd_ctx {
+ int start_fd;
+};
+
+/* CLI parser for the "debug dev fd" command. The current FD to restart from is
+ * stored in a struct dev_fd_ctx pointed to by svcctx.
+ */
+static int debug_parse_cli_fd(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct dev_fd_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ /* start at fd #0 */
+ ctx->start_fd = 0;
+ return 0;
+}
+
+/* CLI I/O handler for the "debug dev fd" command. Dumps all FDs that are
+ * accessible from the process but not known from fdtab. The FD number to
+ * restart from is stored in a struct dev_fd_ctx pointed to by svcctx.
+ */
+static int debug_iohandler_fd(struct appctx *appctx)
+{
+ struct dev_fd_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct sockaddr_storage sa;
+ struct stat statbuf;
+ socklen_t salen, vlen;
+ int ret1, ret2, port;
+ char *addrstr;
+ int ret = 1;
+ int i, fd;
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ goto end;
+
+ chunk_reset(&trash);
+
+ thread_isolate();
+
+ /* we have two inner loops here, one for the proxy, the other one for
+ * the buffer.
+ */
+ for (fd = ctx->start_fd; fd < global.maxsock; fd++) {
+ /* check for FD's existence */
+ ret1 = fcntl(fd, F_GETFD, 0);
+ if (ret1 == -1)
+ continue; // not known to the process
+ if (fdtab[fd].owner)
+ continue; // well-known
+
+ /* OK we're seeing an orphan let's try to retrieve as much
+ * information as possible about it.
+ */
+ chunk_printf(&trash, "%5d", fd);
+
+ if (fstat(fd, &statbuf) != -1) {
+ chunk_appendf(&trash, " type=%s mod=%04o dev=%#llx siz=%#llx uid=%lld gid=%lld fs=%#llx ino=%#llx",
+ isatty(fd) ? "tty.":
+ S_ISREG(statbuf.st_mode) ? "file":
+ S_ISDIR(statbuf.st_mode) ? "dir.":
+ S_ISCHR(statbuf.st_mode) ? "chr.":
+ S_ISBLK(statbuf.st_mode) ? "blk.":
+ S_ISFIFO(statbuf.st_mode) ? "pipe":
+ S_ISLNK(statbuf.st_mode) ? "link":
+ S_ISSOCK(statbuf.st_mode) ? "sock":
+#ifdef USE_EPOLL
+ epoll_wait(fd, NULL, 0, 0) != -1 || errno != EBADF ? "epol":
+#endif
+ "????",
+ (uint)statbuf.st_mode & 07777,
+
+ (ullong)statbuf.st_rdev,
+ (ullong)statbuf.st_size,
+ (ullong)statbuf.st_uid,
+ (ullong)statbuf.st_gid,
+
+ (ullong)statbuf.st_dev,
+ (ullong)statbuf.st_ino);
+ }
+
+ chunk_appendf(&trash, " getfd=%s+%#x",
+ (ret1 & FD_CLOEXEC) ? "cloex" : "",
+ ret1 &~ FD_CLOEXEC);
+
+ /* FD options */
+ ret2 = fcntl(fd, F_GETFL, 0);
+ if (ret2) {
+ chunk_appendf(&trash, " getfl=%s",
+ (ret1 & 3) >= 2 ? "O_RDWR" :
+ (ret1 & 1) ? "O_WRONLY" : "O_RDONLY");
+
+ for (i = 2; i < 32; i++) {
+ if (!(ret2 & (1UL << i)))
+ continue;
+ switch (1UL << i) {
+ case O_CREAT: chunk_appendf(&trash, ",O_CREAT"); break;
+ case O_EXCL: chunk_appendf(&trash, ",O_EXCL"); break;
+ case O_NOCTTY: chunk_appendf(&trash, ",O_NOCTTY"); break;
+ case O_TRUNC: chunk_appendf(&trash, ",O_TRUNC"); break;
+ case O_APPEND: chunk_appendf(&trash, ",O_APPEND"); break;
+#ifdef O_ASYNC
+ case O_ASYNC: chunk_appendf(&trash, ",O_ASYNC"); break;
+#endif
+#ifdef O_DIRECT
+ case O_DIRECT: chunk_appendf(&trash, ",O_DIRECT"); break;
+#endif
+#ifdef O_NOATIME
+ case O_NOATIME: chunk_appendf(&trash, ",O_NOATIME"); break;
+#endif
+ }
+ }
+ }
+
+ vlen = sizeof(ret2);
+ ret1 = getsockopt(fd, SOL_SOCKET, SO_TYPE, &ret2, &vlen);
+ if (ret1 != -1)
+ chunk_appendf(&trash, " so_type=%d", ret2);
+
+ vlen = sizeof(ret2);
+ ret1 = getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &ret2, &vlen);
+ if (ret1 != -1)
+ chunk_appendf(&trash, " so_accept=%d", ret2);
+
+ vlen = sizeof(ret2);
+ ret1 = getsockopt(fd, SOL_SOCKET, SO_ERROR, &ret2, &vlen);
+ if (ret1 != -1)
+ chunk_appendf(&trash, " so_error=%d", ret2);
+
+ salen = sizeof(sa);
+ if (getsockname(fd, (struct sockaddr *)&sa, &salen) != -1) {
+ if (sa.ss_family == AF_INET)
+ port = ntohs(((const struct sockaddr_in *)&sa)->sin_port);
+ else if (sa.ss_family == AF_INET6)
+ port = ntohs(((const struct sockaddr_in6 *)&sa)->sin6_port);
+ else
+ port = 0;
+ addrstr = sa2str(&sa, port, 0);
+ chunk_appendf(&trash, " laddr=%s", addrstr);
+ free(addrstr);
+ }
+
+ salen = sizeof(sa);
+ if (getpeername(fd, (struct sockaddr *)&sa, &salen) != -1) {
+ if (sa.ss_family == AF_INET)
+ port = ntohs(((const struct sockaddr_in *)&sa)->sin_port);
+ else if (sa.ss_family == AF_INET6)
+ port = ntohs(((const struct sockaddr_in6 *)&sa)->sin6_port);
+ else
+ port = 0;
+ addrstr = sa2str(&sa, port, 0);
+ chunk_appendf(&trash, " raddr=%s", addrstr);
+ free(addrstr);
+ }
+
+ chunk_appendf(&trash, "\n");
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ ctx->start_fd = fd;
+ ret = 0;
+ break;
+ }
+ }
+
+ thread_release();
+ end:
+ return ret;
+}
+
+#if defined(DEBUG_MEM_STATS)
+
+/* CLI state for "debug dev memstats" */
+struct dev_mem_ctx {
+ struct mem_stats *start, *stop; /* begin/end of dump */
+ int show_all; /* show all entries if non-null */
+};
+
+/* CLI parser for the "debug dev memstats" command. Sets a dev_mem_ctx shown above. */
+static int debug_parse_cli_memstats(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct dev_mem_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ extern __attribute__((__weak__)) struct mem_stats __start_mem_stats;
+ extern __attribute__((__weak__)) struct mem_stats __stop_mem_stats;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ if (strcmp(args[3], "reset") == 0) {
+ struct mem_stats *ptr;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ for (ptr = &__start_mem_stats; ptr < &__stop_mem_stats; ptr++) {
+ _HA_ATOMIC_STORE(&ptr->calls, 0);
+ _HA_ATOMIC_STORE(&ptr->size, 0);
+ }
+ return 1;
+ }
+
+ if (strcmp(args[3], "all") == 0)
+ ctx->show_all = 1;
+
+ /* otherwise proceed with the dump from p0 to p1 */
+ ctx->start = &__start_mem_stats;
+ ctx->stop = &__stop_mem_stats;
+ return 0;
+}
+
+/* CLI I/O handler for the "debug dev memstats" command using a dev_mem_ctx
+ * found in appctx->svcctx. Dumps all mem_stats structs referenced by pointers
+ * located between ->start and ->stop. Dumps all entries if ->show_all != 0,
+ * otherwise only non-zero calls.
+ */
+static int debug_iohandler_memstats(struct appctx *appctx)
+{
+ struct dev_mem_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct mem_stats *ptr = ctx->start;
+ int ret = 1;
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ goto end;
+
+ chunk_reset(&trash);
+
+ /* we have two inner loops here, one for the proxy, the other one for
+ * the buffer.
+ */
+ for (; ptr != ctx->stop; ptr++) {
+ const char *type;
+ const char *name;
+ const char *p;
+
+ if (!ptr->size && !ptr->calls && !ctx->show_all)
+ continue;
+
+ /* basename only */
+ for (p = name = ptr->file; *p; p++) {
+ if (*p == '/')
+ name = p + 1;
+ }
+
+ switch (ptr->type) {
+ case MEM_STATS_TYPE_CALLOC: type = "CALLOC"; break;
+ case MEM_STATS_TYPE_FREE: type = "FREE"; break;
+ case MEM_STATS_TYPE_MALLOC: type = "MALLOC"; break;
+ case MEM_STATS_TYPE_REALLOC: type = "REALLOC"; break;
+ case MEM_STATS_TYPE_STRDUP: type = "STRDUP"; break;
+ default: type = "UNSET"; break;
+ }
+
+ //chunk_printf(&trash,
+ // "%20s:%-5d %7s size: %12lu calls: %9lu size/call: %6lu\n",
+ // name, ptr->line, type,
+ // (unsigned long)ptr->size, (unsigned long)ptr->calls,
+ // (unsigned long)(ptr->calls ? (ptr->size / ptr->calls) : 0));
+
+ chunk_printf(&trash, "%s:%d", name, ptr->line);
+ while (trash.data < 25)
+ trash.area[trash.data++] = ' ';
+ chunk_appendf(&trash, "%7s size: %12lu calls: %9lu size/call: %6lu\n",
+ type,
+ (unsigned long)ptr->size, (unsigned long)ptr->calls,
+ (unsigned long)(ptr->calls ? (ptr->size / ptr->calls) : 0));
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ ctx->start = ptr;
+ ret = 0;
+ break;
+ }
+ }
+
+ end:
+ return ret;
+}
+
+#endif
+
+#ifndef USE_THREAD_DUMP
+
+/* This function dumps all threads' state to the trash. This version is the
+ * most basic one, which doesn't inspect other threads.
+ */
+void ha_thread_dump_all_to_trash()
+{
+ unsigned int thr;
+
+ for (thr = 0; thr < global.nbthread; thr++)
+ ha_thread_dump(&trash, thr, tid);
+}
+
+#else /* below USE_THREAD_DUMP is set */
+
+/* ID of the thread requesting the dump */
+static unsigned int thread_dump_tid;
+
+/* points to the buffer where the dump functions should write. It must
+ * have already been initialized by the requester. Nothing is done if
+ * it's NULL.
+ */
+struct buffer *thread_dump_buffer = NULL;
+
+void ha_thread_dump_all_to_trash()
+{
+ unsigned long old;
+
+ while (1) {
+ old = 0;
+ if (HA_ATOMIC_CAS(&threads_to_dump, &old, all_threads_mask))
+ break;
+ ha_thread_relax();
+ }
+
+ thread_dump_buffer = &trash;
+ thread_dump_tid = tid;
+ ha_tkillall(DEBUGSIG);
+}
+
+/* handles DEBUGSIG to dump the state of the thread it's working on */
+void debug_handler(int sig, siginfo_t *si, void *arg)
+{
+ /* first, let's check it's really for us and that we didn't just get
+ * a spurious DEBUGSIG.
+ */
+ if (!(threads_to_dump & tid_bit))
+ return;
+
+ /* There are 4 phases in the dump process:
+ * 1- wait for our turn, i.e. when all lower bits are gone.
+ * 2- perform the action if our bit is set
+ * 3- remove our bit to let the next one go, unless we're
+ * the last one and have to put them all as a signal
+ * 4- wait out bit to re-appear, then clear it and quit.
+ */
+
+ /* wait for all previous threads to finish first */
+ while (threads_to_dump & (tid_bit - 1))
+ ha_thread_relax();
+
+ /* dump if needed */
+ if (threads_to_dump & tid_bit) {
+ if (thread_dump_buffer)
+ ha_thread_dump(thread_dump_buffer, tid, thread_dump_tid);
+ if ((threads_to_dump & all_threads_mask) == tid_bit) {
+ /* last one */
+ HA_ATOMIC_STORE(&threads_to_dump, all_threads_mask);
+ thread_dump_buffer = NULL;
+ }
+ else
+ HA_ATOMIC_AND(&threads_to_dump, ~tid_bit);
+ }
+
+ /* now wait for all others to finish dumping. The last one will set all
+ * bits again to broadcast the leaving condition so we'll see ourselves
+ * present again. This way the threads_to_dump variable never passes to
+ * zero until all visitors have stopped waiting.
+ */
+ while (!(threads_to_dump & tid_bit))
+ ha_thread_relax();
+ HA_ATOMIC_AND(&threads_to_dump, ~tid_bit);
+
+ /* mark the current thread as stuck to detect it upon next invocation
+ * if it didn't move.
+ */
+ if (!((threads_harmless_mask|sleeping_thread_mask) & tid_bit))
+ th_ctx->flags |= TH_FL_STUCK;
+}
+
+static int init_debug_per_thread()
+{
+ sigset_t set;
+
+ /* unblock the DEBUGSIG signal we intend to use */
+ sigemptyset(&set);
+ sigaddset(&set, DEBUGSIG);
+ ha_sigmask(SIG_UNBLOCK, &set, NULL);
+ return 1;
+}
+
+static int init_debug()
+{
+ struct sigaction sa;
+ void *callers[1];
+
+ /* calling backtrace() will access libgcc at runtime. We don't want to
+ * do it after the chroot, so let's perform a first call to have it
+ * ready in memory for later use.
+ */
+ my_backtrace(callers, sizeof(callers)/sizeof(*callers));
+ sa.sa_handler = NULL;
+ sa.sa_sigaction = debug_handler;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ sigaction(DEBUGSIG, &sa, NULL);
+ return ERR_NONE;
+}
+
+REGISTER_POST_CHECK(init_debug);
+REGISTER_PER_THREAD_INIT(init_debug_per_thread);
+
+#endif /* USE_THREAD_DUMP */
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ {{ "debug", "dev", "bug", NULL }, "debug dev bug : call BUG_ON() and crash", debug_parse_cli_bug, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "check", NULL }, "debug dev check : call CHECK_IF() and possibly crash", debug_parse_cli_check, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "close", NULL }, "debug dev close <fd> : close this file descriptor", debug_parse_cli_close, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "delay", NULL }, "debug dev delay [ms] : sleep this long", debug_parse_cli_delay, NULL, NULL, NULL, ACCESS_EXPERT },
+#if defined(DEBUG_DEV)
+ {{ "debug", "dev", "exec", NULL }, "debug dev exec [cmd] ... : show this command's output", debug_parse_cli_exec, NULL, NULL, NULL, ACCESS_EXPERT },
+#endif
+ {{ "debug", "dev", "fd", NULL }, "debug dev fd : scan for rogue/unhandled FDs", debug_parse_cli_fd, debug_iohandler_fd, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "exit", NULL }, "debug dev exit [code] : immediately exit the process", debug_parse_cli_exit, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "hex", NULL }, "debug dev hex <addr> [len] : dump a memory area", debug_parse_cli_hex, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "log", NULL }, "debug dev log [msg] ... : send this msg to global logs", debug_parse_cli_log, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "loop", NULL }, "debug dev loop [ms] : loop this long", debug_parse_cli_loop, NULL, NULL, NULL, ACCESS_EXPERT },
+#if defined(DEBUG_MEM_STATS)
+ {{ "debug", "dev", "memstats", NULL }, "debug dev memstats [reset|all] : dump/reset memory statistics", debug_parse_cli_memstats, debug_iohandler_memstats, NULL, NULL, ACCESS_EXPERT },
+#endif
+ {{ "debug", "dev", "panic", NULL }, "debug dev panic : immediately trigger a panic", debug_parse_cli_panic, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "sched", NULL }, "debug dev sched {task|tasklet} [k=v]* : stress the scheduler", debug_parse_cli_sched, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "stream",NULL }, "debug dev stream [k=v]* : show/manipulate stream flags", debug_parse_cli_stream,NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "sym", NULL }, "debug dev sym <addr> : resolve symbol address", debug_parse_cli_sym, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "tkill", NULL }, "debug dev tkill [thr] [sig] : send signal to thread", debug_parse_cli_tkill, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "warn", NULL }, "debug dev warn : call WARN_ON() and possibly crash", debug_parse_cli_warn, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "write", NULL }, "debug dev write [size] : write that many bytes in return", debug_parse_cli_write, NULL, NULL, NULL, ACCESS_EXPERT },
+#if defined(HA_HAVE_DUMP_LIBS)
+ {{ "show", "libs", NULL, NULL }, "show libs : show loaded object files and libraries", debug_parse_cli_show_libs, NULL, NULL },
+#endif
+ {{ "show", "threads", NULL, NULL }, "show threads : show some threads debugging information", NULL, cli_io_handler_show_threads, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
diff --git a/src/dgram.c b/src/dgram.c
new file mode 100644
index 0000000..54823d1
--- /dev/null
+++ b/src/dgram.c
@@ -0,0 +1,30 @@
+/*
+ * Datagram processing functions
+ *
+ * Copyright 2014 Baptiste Assmann <bedis9@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/fd.h>
+#include <haproxy/dgram.h>
+
+/* datagram handler callback */
+void dgram_fd_handler(int fd)
+{
+ struct dgram_conn *dgram = fdtab[fd].owner;
+
+ if (unlikely(!dgram))
+ return;
+
+ if (fd_recv_ready(fd))
+ dgram->data->recv(dgram);
+ if (fd_send_ready(fd))
+ dgram->data->send(dgram);
+
+ return;
+}
diff --git a/src/dict.c b/src/dict.c
new file mode 100644
index 0000000..a225081
--- /dev/null
+++ b/src/dict.c
@@ -0,0 +1,127 @@
+#include <string.h>
+
+#include <import/eb32tree.h>
+#include <import/ebistree.h>
+#include <haproxy/dict.h>
+#include <haproxy/thread.h>
+
+struct dict *new_dict(const char *name)
+{
+ struct dict *dict;
+
+ dict = malloc(sizeof *dict);
+ if (!dict)
+ return NULL;
+
+ dict->name = name;
+ dict->values = EB_ROOT_UNIQUE;
+ HA_RWLOCK_INIT(&dict->rwlock);
+
+ return dict;
+}
+
+/*
+ * Allocate a new dictionary entry with <s> as string value which is strdup()'ed.
+ * Returns the new allocated entry if succeeded, NULL if not.
+ */
+static struct dict_entry *new_dict_entry(char *s)
+{
+ struct dict_entry *de;
+
+ de = calloc(1, sizeof *de);
+ if (!de)
+ return NULL;
+
+ de->value.key = strdup(s);
+ if (!de->value.key)
+ goto err;
+
+ de->len = strlen(s);
+ de->refcount = 1;
+
+ return de;
+
+ err:
+ ha_free(&de->value.key);
+ de->len = 0;
+ free(de);
+ return NULL;
+}
+
+/*
+ * Release the memory allocated for <de> dictionary entry.
+ */
+static void free_dict_entry(struct dict_entry *de)
+{
+ de->refcount = 0;
+ ha_free(&de->value.key);
+ free(de);
+}
+
+/*
+ * Simple function to lookup dictionary entries with <s> as value.
+ */
+static struct dict_entry *__dict_lookup(struct dict *d, const char *s)
+{
+ struct dict_entry *de;
+ struct ebpt_node *node;
+
+ de = NULL;
+ node = ebis_lookup(&d->values, s);
+ if (node)
+ de = container_of(node, struct dict_entry, value);
+
+ return de;
+}
+
+/*
+ * Insert an entry in <d> dictionary with <s> as value. *
+ */
+struct dict_entry *dict_insert(struct dict *d, char *s)
+{
+ struct dict_entry *de;
+ struct ebpt_node *n;
+
+ HA_RWLOCK_RDLOCK(DICT_LOCK, &d->rwlock);
+ de = __dict_lookup(d, s);
+ HA_RWLOCK_RDUNLOCK(DICT_LOCK, &d->rwlock);
+ if (de) {
+ HA_ATOMIC_INC(&de->refcount);
+ return de;
+ }
+
+ de = new_dict_entry(s);
+ if (!de)
+ return NULL;
+
+ HA_RWLOCK_WRLOCK(DICT_LOCK, &d->rwlock);
+ n = ebis_insert(&d->values, &de->value);
+ HA_RWLOCK_WRUNLOCK(DICT_LOCK, &d->rwlock);
+ if (n != &de->value) {
+ free_dict_entry(de);
+ de = container_of(n, struct dict_entry, value);
+ }
+
+ return de;
+}
+
+
+/*
+ * Unreference a dict entry previously acquired with <dict_insert>.
+ * If this is the last live reference to the entry, it is
+ * removed from the dictionary.
+ */
+void dict_entry_unref(struct dict *d, struct dict_entry *de)
+{
+ if (!de)
+ return;
+
+ if (HA_ATOMIC_SUB_FETCH(&de->refcount, 1) != 0)
+ return;
+
+ HA_RWLOCK_WRLOCK(DICT_LOCK, &d->rwlock);
+ ebpt_delete(&de->value);
+ HA_RWLOCK_WRUNLOCK(DICT_LOCK, &d->rwlock);
+
+ free_dict_entry(de);
+}
diff --git a/src/dns.c b/src/dns.c
new file mode 100644
index 0000000..8bf8dcd
--- /dev/null
+++ b/src/dns.c
@@ -0,0 +1,1350 @@
+/*
+ * Name server resolution
+ *
+ * Copyright 2020 HAProxy Technologies
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/types.h>
+
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/cli.h>
+#include <haproxy/dgram.h>
+#include <haproxy/dns.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/log.h>
+#include <haproxy/ring.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/tools.h>
+
+static THREAD_LOCAL char *dns_msg_trash;
+
+DECLARE_STATIC_POOL(dns_session_pool, "dns_session", sizeof(struct dns_session));
+DECLARE_STATIC_POOL(dns_query_pool, "dns_query", sizeof(struct dns_query));
+DECLARE_STATIC_POOL(dns_msg_buf, "dns_msg_buf", DNS_TCP_MSG_RING_MAX_SIZE);
+
+/* Opens an UDP socket on the namesaver's IP/Port, if required. Returns 0 on
+ * success, -1 otherwise. ns->dgram must be defined.
+ */
+static int dns_connect_nameserver(struct dns_nameserver *ns)
+{
+ struct dgram_conn *dgram = &ns->dgram->conn;
+ int fd;
+
+ /* Already connected */
+ if (dgram->t.sock.fd != -1)
+ return 0;
+
+ /* Create an UDP socket and connect it on the nameserver's IP/Port */
+ if ((fd = socket(dgram->addr.to.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
+ send_log(NULL, LOG_WARNING,
+ "DNS : section '%s': can't create socket for nameserver '%s'.\n",
+ ns->counters->pid, ns->id);
+ return -1;
+ }
+ if (connect(fd, (struct sockaddr*)&dgram->addr.to, get_addr_len(&dgram->addr.to)) == -1) {
+ send_log(NULL, LOG_WARNING,
+ "DNS : section '%s': can't connect socket for nameserver '%s'.\n",
+ ns->counters->id, ns->id);
+ close(fd);
+ return -1;
+ }
+
+ /* Make the socket non blocking */
+ fd_set_nonblock(fd);
+
+ /* Add the fd in the fd list and update its parameters */
+ dgram->t.sock.fd = fd;
+ fd_insert(fd, dgram, dgram_fd_handler, MAX_THREADS_MASK);
+ fd_want_recv(fd);
+ return 0;
+}
+
+/* Sends a message to a name server
+ * It returns message length on success
+ * or -1 in error case
+ * 0 is returned in case of output ring buffer is full
+ */
+int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len)
+{
+ int ret = -1;
+
+ if (ns->dgram) {
+ struct dgram_conn *dgram = &ns->dgram->conn;
+ int fd;
+
+ HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
+ fd = dgram->t.sock.fd;
+ if (fd == -1) {
+ if (dns_connect_nameserver(ns) == -1) {
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return -1;
+ }
+ fd = dgram->t.sock.fd;
+ }
+
+ ret = send(fd, buf, len, 0);
+ if (ret < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ struct ist myist;
+
+ myist = ist2(buf, len);
+ ret = ring_write(ns->dgram->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
+ if (!ret) {
+ ns->counters->snd_error++;
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return -1;
+ }
+ fd_cant_send(fd);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return ret;
+ }
+ ns->counters->snd_error++;
+ fd_delete(fd);
+ dgram->t.sock.fd = -1;
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return -1;
+ }
+ ns->counters->sent++;
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ }
+ else if (ns->stream) {
+ struct ist myist;
+
+ myist = ist2(buf, len);
+ ret = ring_write(ns->stream->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
+ if (!ret) {
+ ns->counters->snd_error++;
+ return -1;
+ }
+ task_wakeup(ns->stream->task_req, TASK_WOKEN_MSG);
+ return ret;
+ }
+
+ return ret;
+}
+
+void dns_session_free(struct dns_session *);
+
+/* Receives a dns message
+ * Returns message length
+ * 0 is returned if no more message available
+ * -1 in error case
+ */
+ssize_t dns_recv_nameserver(struct dns_nameserver *ns, void *data, size_t size)
+{
+ ssize_t ret = -1;
+
+ if (ns->dgram) {
+ struct dgram_conn *dgram = &ns->dgram->conn;
+ int fd;
+
+ HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
+ fd = dgram->t.sock.fd;
+ if (fd == -1) {
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return -1;
+ }
+
+ if ((ret = recv(fd, data, size, 0)) < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ fd_cant_recv(fd);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return 0;
+ }
+ fd_delete(fd);
+ dgram->t.sock.fd = -1;
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return -1;
+ }
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ }
+ else if (ns->stream) {
+ struct dns_stream_server *dss = ns->stream;
+ struct dns_session *ds;
+
+ HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
+
+ if (!LIST_ISEMPTY(&dss->wait_sess)) {
+ ds = LIST_NEXT(&dss->wait_sess, struct dns_session *, waiter);
+ ret = ds->rx_msg.len < size ? ds->rx_msg.len : size;
+ memcpy(data, ds->rx_msg.area, ret);
+
+ ds->rx_msg.len = 0;
+
+ /* This barrier is here to ensure that all data is
+ * stored if the appctx detect the elem is out of the
+ * list.
+ */
+ __ha_barrier_store();
+
+ LIST_DEL_INIT(&ds->waiter);
+
+ if (ds->appctx) {
+ /* This second barrier is here to ensure that
+ * the waked up appctx won't miss that the elem
+ * is removed from the list.
+ */
+ __ha_barrier_store();
+
+ /* awake appctx because it may have other
+ * message to receive
+ */
+ appctx_wakeup(ds->appctx);
+
+ /* dns_session could already be into free_sess list
+ * so we firstly remove it */
+ LIST_DEL_INIT(&ds->list);
+
+ /* decrease nb_queries to free a slot for a new query on that sess */
+ ds->nb_queries--;
+ if (ds->nb_queries) {
+ /* it remains pipelined unanswered request
+ * into this session but we just decrease
+ * the counter so the session
+ * can not be full of pipelined requests
+ * so we can add if to free_sess list
+ * to receive a new request
+ */
+ LIST_INSERT(&ds->dss->free_sess, &ds->list);
+ }
+ else {
+ /* there is no more pipelined requests
+ * into this session, so we move it
+ * to idle_sess list */
+ LIST_INSERT(&ds->dss->idle_sess, &ds->list);
+
+ /* update the counter of idle sessions */
+ ds->dss->idle_conns++;
+
+ /* Note: this is useless there to update
+ * the max_active_conns since we increase
+ * the idle count */
+ }
+ }
+ else {
+ /* there is no more appctx for this session
+ * it means it is ready to die
+ */
+ dns_session_free(ds);
+ }
+
+
+ }
+
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+ }
+
+ return ret;
+}
+
+static void dns_resolve_recv(struct dgram_conn *dgram)
+{
+ struct dns_nameserver *ns;
+ int fd;
+
+ HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
+
+ fd = dgram->t.sock.fd;
+
+ /* check if ready for reading */
+ if ((fd == -1) || !fd_recv_ready(fd)) {
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return;
+ }
+
+ /* no need to go further if we can't retrieve the nameserver */
+ if ((ns = dgram->owner) == NULL) {
+ _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
+ fd_stop_recv(fd);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return;
+ }
+
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+
+ ns->process_responses(ns);
+}
+
+/* Called when a dns network socket is ready to send data */
+static void dns_resolve_send(struct dgram_conn *dgram)
+{
+ int fd;
+ struct dns_nameserver *ns;
+ struct ring *ring;
+ struct buffer *buf;
+ uint64_t msg_len;
+ size_t len, cnt, ofs;
+
+ HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
+
+ fd = dgram->t.sock.fd;
+
+ /* check if ready for sending */
+ if ((fd == -1) || !fd_send_ready(fd)) {
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return;
+ }
+
+ /* no need to go further if we can't retrieve the nameserver */
+ if ((ns = dgram->owner) == NULL) {
+ _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
+ fd_stop_send(fd);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return;
+ }
+
+ ring = ns->dgram->ring_req;
+ buf = &ring->buf;
+
+ HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
+ ofs = ns->dgram->ofs_req;
+
+ /* explanation for the initialization below: it would be better to do
+ * this in the parsing function but this would occasionally result in
+ * dropped events because we'd take a reference on the oldest message
+ * and keep it while being scheduled. Thus instead let's take it the
+ * first time we enter here so that we have a chance to pass many
+ * existing messages before grabbing a reference to a location. This
+ * value cannot be produced after initialization.
+ */
+ if (unlikely(ofs == ~0)) {
+ ofs = 0;
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ ofs += ring->ofs;
+ }
+
+ /* we were already there, adjust the offset to be relative to
+ * the buffer's head and remove us from the counter.
+ */
+ ofs -= ring->ofs;
+ BUG_ON(ofs >= buf->size);
+ HA_ATOMIC_DEC(b_peek(buf, ofs));
+
+ while (ofs + 1 < b_data(buf)) {
+ int ret;
+
+ cnt = 1;
+ len = b_peek_varint(buf, ofs + cnt, &msg_len);
+ if (!len)
+ break;
+ cnt += len;
+ BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
+ if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
+ /* too large a message to ever fit, let's skip it */
+ ofs += cnt + msg_len;
+ continue;
+ }
+
+ len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
+
+ ret = send(fd, dns_msg_trash, len, 0);
+ if (ret < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ fd_cant_send(fd);
+ goto out;
+ }
+ ns->counters->snd_error++;
+ fd_delete(fd);
+ fd = dgram->t.sock.fd = -1;
+ goto out;
+ }
+ ns->counters->sent++;
+
+ ofs += cnt + len;
+ }
+
+ /* we don't want/need to be waked up any more for sending
+ * because all ring content is sent */
+ fd_stop_send(fd);
+
+out:
+
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ ofs += ring->ofs;
+ ns->dgram->ofs_req = ofs;
+ HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+
+}
+
+/* proto_udp callback functions for a DNS resolution */
+struct dgram_data_cb dns_dgram_cb = {
+ .recv = dns_resolve_recv,
+ .send = dns_resolve_send,
+};
+
+int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk)
+{
+ struct dns_dgram_server *dgram;
+
+ if ((dgram = calloc(1, sizeof(*dgram))) == NULL)
+ return -1;
+
+ /* Leave dgram partially initialized, no FD attached for
+ * now. */
+ dgram->conn.owner = ns;
+ dgram->conn.data = &dns_dgram_cb;
+ dgram->conn.t.sock.fd = -1;
+ dgram->conn.addr.to = *sk;
+ HA_SPIN_INIT(&dgram->conn.lock);
+ ns->dgram = dgram;
+
+ dgram->ofs_req = ~0; /* init ring offset */
+ dgram->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
+ if (!dgram->ring_req) {
+ ha_alert("memory allocation error initializing the ring for nameserver.\n");
+ goto out;
+ }
+
+ /* attach the task as reader */
+ if (!ring_attach(dgram->ring_req)) {
+ /* mark server attached to the ring */
+ ha_alert("nameserver sets too many watchers > 255 on ring. This is a bug and should not happen.\n");
+ goto out;
+ }
+ return 0;
+out:
+ if (dgram->ring_req)
+ ring_free(dgram->ring_req);
+
+ free(dgram);
+
+ return -1;
+}
+
+/*
+ * IO Handler to handle message push to dns tcp server
+ * It takes its context from appctx->svcctx.
+ */
+static void dns_session_io_handler(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct dns_session *ds = appctx->svcctx;
+ struct ring *ring = &ds->ring;
+ struct buffer *buf = &ring->buf;
+ uint64_t msg_len;
+ int available_room;
+ size_t len, cnt, ofs;
+ int ret = 0;
+
+ /* if stopping was requested, close immediately */
+ if (unlikely(stopping))
+ goto close;
+
+ /* we want to be sure to not miss that we have been awaked for a shutdown */
+ __ha_barrier_load();
+
+ /* that means the connection was requested to shutdown
+ * for instance idle expire */
+ if (ds->shutdown)
+ goto close;
+
+ /* an error was detected */
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ goto close;
+
+ /* con closed by server side, we will skip data write and drain data from channel */
+ if ((sc_oc(sc)->flags & CF_SHUTW)) {
+ goto read;
+ }
+
+ /* if the connection is not established, inform the stream that we want
+ * to be notified whenever the connection completes.
+ */
+ if (sc_opposite(sc)->state < SC_ST_EST) {
+ applet_need_more_data(appctx);
+ se_need_remote_conn(appctx->sedesc);
+ applet_have_more_data(appctx);
+ return;
+ }
+
+
+ ofs = ds->ofs;
+
+ HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
+ LIST_DEL_INIT(&appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
+
+ HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
+
+ /* explanation for the initialization below: it would be better to do
+ * this in the parsing function but this would occasionally result in
+ * dropped events because we'd take a reference on the oldest message
+ * and keep it while being scheduled. Thus instead let's take it the
+ * first time we enter here so that we have a chance to pass many
+ * existing messages before grabbing a reference to a location. This
+ * value cannot be produced after initialization.
+ */
+ if (unlikely(ofs == ~0)) {
+ ofs = 0;
+
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ ofs += ring->ofs;
+ }
+
+ /* in this loop, ofs always points to the counter byte that precedes
+ * the message so that we can take our reference there if we have to
+ * stop before the end (ret=0).
+ */
+ if (sc_opposite(sc)->state == SC_ST_EST) {
+ /* we were already there, adjust the offset to be relative to
+ * the buffer's head and remove us from the counter.
+ */
+ ofs -= ring->ofs;
+ BUG_ON(ofs >= buf->size);
+ HA_ATOMIC_DEC(b_peek(buf, ofs));
+
+ ret = 1;
+ while (ofs + 1 < b_data(buf)) {
+ struct dns_query *query;
+ uint16_t original_qid;
+ uint16_t new_qid;
+
+ cnt = 1;
+ len = b_peek_varint(buf, ofs + cnt, &msg_len);
+ if (!len)
+ break;
+ cnt += len;
+ BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
+
+ /* retrieve available room on output channel */
+ available_room = channel_recv_max(sc_ic(sc));
+
+ /* tx_msg_offset null means we are at the start of a new message */
+ if (!ds->tx_msg_offset) {
+ uint16_t slen;
+
+ /* check if there is enough room to put message len and query id */
+ if (available_room < sizeof(slen) + sizeof(new_qid)) {
+ sc_need_room(sc);
+ ret = 0;
+ break;
+ }
+
+ /* put msg len into then channel */
+ slen = (uint16_t)msg_len;
+ slen = htons(slen);
+ applet_putblk(appctx, (char *)&slen, sizeof(slen));
+ available_room -= sizeof(slen);
+
+ /* backup original query id */
+ len = b_getblk(buf, (char *)&original_qid, sizeof(original_qid), ofs + cnt);
+ if (!len) {
+ /* should never happen since messages are atomically
+ * written into ring
+ */
+ ret = 0;
+ break;
+ }
+
+ /* generates new query id */
+ new_qid = ++ds->query_counter;
+ new_qid = htons(new_qid);
+
+ /* put new query id into the channel */
+ applet_putblk(appctx, (char *)&new_qid, sizeof(new_qid));
+ available_room -= sizeof(new_qid);
+
+ /* keep query id mapping */
+
+ query = pool_alloc(dns_query_pool);
+ if (query) {
+ query->qid.key = new_qid;
+ query->original_qid = original_qid;
+ query->expire = tick_add(now_ms, 5000);
+ LIST_INIT(&query->list);
+ if (LIST_ISEMPTY(&ds->queries)) {
+ /* enable task to handle expire */
+ ds->task_exp->expire = query->expire;
+ /* ensure this will be executed by the same
+ * thread than ds_session_release
+ * to ensure session_release is free
+ * to destroy the task */
+ task_queue(ds->task_exp);
+ }
+ LIST_APPEND(&ds->queries, &query->list);
+ eb32_insert(&ds->query_ids, &query->qid);
+ ds->onfly_queries++;
+ }
+
+ /* update the tx_offset to handle output in 16k streams */
+ ds->tx_msg_offset = sizeof(original_qid);
+
+ }
+
+ /* check if it remains available room on output chan */
+ if (unlikely(!available_room)) {
+ sc_need_room(sc);
+ ret = 0;
+ break;
+ }
+
+ chunk_reset(&trash);
+ if ((msg_len - ds->tx_msg_offset) > available_room) {
+ /* remaining msg data is too large to be written in output channel at one time */
+
+ len = b_getblk(buf, trash.area, available_room, ofs + cnt + ds->tx_msg_offset);
+
+ /* update offset to complete mesg forwarding later */
+ ds->tx_msg_offset += len;
+ }
+ else {
+ /* remaining msg data can be written in output channel at one time */
+ len = b_getblk(buf, trash.area, msg_len - ds->tx_msg_offset, ofs + cnt + ds->tx_msg_offset);
+
+ /* reset tx_msg_offset to mark forward fully processed */
+ ds->tx_msg_offset = 0;
+ }
+ trash.data += len;
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* should never happen since we
+ * check available_room is large
+ * enough here.
+ */
+ ret = 0;
+ break;
+ }
+
+ if (ds->tx_msg_offset) {
+ /* msg was not fully processed, we must be awake to drain pending data */
+
+ sc_need_room(sc);
+ ret = 0;
+ break;
+ }
+ /* switch to next message */
+ ofs += cnt + msg_len;
+ }
+
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ ofs += ring->ofs;
+ ds->ofs = ofs;
+ }
+ HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
+
+ if (ret) {
+ /* let's be woken up once new request to write arrived */
+ HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
+ BUG_ON(LIST_INLIST(&appctx->wait_entry));
+ LIST_APPEND(&ring->waiters, &appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
+ applet_have_no_more_data(appctx);
+ }
+
+read:
+
+ /* if session is not a waiter it means there is no committed
+ * message into rx_buf and we are free to use it
+ * Note: we need a load barrier here to not miss the
+ * delete from the list
+ */
+
+ __ha_barrier_load();
+ if (!LIST_INLIST_ATOMIC(&ds->waiter)) {
+ while (1) {
+ uint16_t query_id;
+ struct eb32_node *eb;
+ struct dns_query *query;
+
+ if (!ds->rx_msg.len) {
+ /* next message len is not fully available into the channel */
+ if (co_data(sc_oc(sc)) < 2)
+ break;
+
+ /* retrieve message len */
+ co_getblk(sc_oc(sc), (char *)&msg_len, 2, 0);
+
+ /* mark as consumed */
+ co_skip(sc_oc(sc), 2);
+
+ /* store message len */
+ ds->rx_msg.len = ntohs(msg_len);
+ }
+
+ if (!co_data(sc_oc(sc))) {
+ /* we need more data but nothing is available */
+ break;
+ }
+
+ if (co_data(sc_oc(sc)) + ds->rx_msg.offset < ds->rx_msg.len) {
+ /* message only partially available */
+
+ /* read available data */
+ co_getblk(sc_oc(sc), ds->rx_msg.area + ds->rx_msg.offset, co_data(sc_oc(sc)), 0);
+
+ /* update message offset */
+ ds->rx_msg.offset += co_data(sc_oc(sc));
+
+ /* consume all pending data from the channel */
+ co_skip(sc_oc(sc), co_data(sc_oc(sc)));
+
+ /* we need to wait for more data */
+ break;
+ }
+
+ /* enough data is available into the channel to read the message until the end */
+
+ /* read from the channel until the end of the message */
+ co_getblk(sc_oc(sc), ds->rx_msg.area + ds->rx_msg.offset, ds->rx_msg.len - ds->rx_msg.offset, 0);
+
+ /* consume all data until the end of the message from the channel */
+ co_skip(sc_oc(sc), ds->rx_msg.len - ds->rx_msg.offset);
+
+ /* reset reader offset to 0 for next message reand */
+ ds->rx_msg.offset = 0;
+
+ /* try remap query id to original */
+ memcpy(&query_id, ds->rx_msg.area, sizeof(query_id));
+ eb = eb32_lookup(&ds->query_ids, query_id);
+ if (!eb) {
+ /* query id not found means we have an unknown corresponding
+ * request, perhaps server's bug or or the query reached
+ * timeout
+ */
+ ds->rx_msg.len = 0;
+ continue;
+ }
+
+ /* re-map the original query id set by the requester */
+ query = eb32_entry(eb, struct dns_query, qid);
+ memcpy(ds->rx_msg.area, &query->original_qid, sizeof(query->original_qid));
+
+ /* remove query ids mapping from pending queries list/tree */
+ eb32_delete(&query->qid);
+ LIST_DELETE(&query->list);
+ pool_free(dns_query_pool, query);
+ ds->onfly_queries--;
+
+ /* the dns_session is also added in queue of the
+ * wait_sess list where the task processing
+ * response will pop available responses
+ */
+ HA_SPIN_LOCK(DNS_LOCK, &ds->dss->lock);
+
+ BUG_ON(LIST_INLIST(&ds->waiter));
+ LIST_APPEND(&ds->dss->wait_sess, &ds->waiter);
+
+ HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
+
+ /* awake the task processing the responses */
+ task_wakeup(ds->dss->task_rsp, TASK_WOKEN_INIT);
+
+ break;
+ }
+
+ if (!LIST_INLIST(&ds->waiter)) {
+ /* there is no more pending data to read and the con was closed by the server side */
+ if (!co_data(sc_oc(sc)) && (sc_oc(sc)->flags & CF_SHUTW)) {
+ goto close;
+ }
+ }
+
+ }
+
+ return;
+close:
+ sc_shutw(sc);
+ sc_shutr(sc);
+ sc_ic(sc)->flags |= CF_READ_NULL;
+}
+
+void dns_queries_flush(struct dns_session *ds)
+{
+ struct dns_query *query, *queryb;
+
+ list_for_each_entry_safe(query, queryb, &ds->queries, list) {
+ eb32_delete(&query->qid);
+ LIST_DELETE(&query->list);
+ pool_free(dns_query_pool, query);
+ }
+}
+
+void dns_session_free(struct dns_session *ds)
+{
+ if (ds->rx_msg.area)
+ pool_free(dns_msg_buf, ds->rx_msg.area);
+ if (ds->tx_ring_area)
+ pool_free(dns_msg_buf, ds->tx_ring_area);
+ if (ds->task_exp)
+ task_destroy(ds->task_exp);
+
+ dns_queries_flush(ds);
+
+ /* Ensure to remove this session from external lists
+ * Note: we are under the lock of dns_stream_server
+ * which own the heads of those lists.
+ */
+ LIST_DEL_INIT(&ds->waiter);
+ LIST_DEL_INIT(&ds->list);
+
+ ds->dss->cur_conns--;
+ /* Note: this is useless to update
+ * max_active_conns here because
+ * we decrease the value
+ */
+
+ BUG_ON(!LIST_ISEMPTY(&ds->list));
+ BUG_ON(!LIST_ISEMPTY(&ds->waiter));
+ BUG_ON(!LIST_ISEMPTY(&ds->queries));
+ BUG_ON(!LIST_ISEMPTY(&ds->ring.waiters));
+ BUG_ON(!eb_is_empty(&ds->query_ids));
+ pool_free(dns_session_pool, ds);
+}
+
+static struct appctx *dns_session_create(struct dns_session *ds);
+
+static int dns_session_init(struct appctx *appctx)
+{
+ struct dns_session *ds = appctx->svcctx;
+ struct stream *s;
+ struct sockaddr_storage *addr = NULL;
+
+ if (!sockaddr_alloc(&addr, &ds->dss->srv->addr, sizeof(ds->dss->srv->addr)))
+ goto error;
+
+ if (appctx_finalize_startup(appctx, ds->dss->srv->proxy, &BUF_NULL) == -1)
+ goto error;
+
+ s = appctx_strm(appctx);
+ s->scb->dst = addr;
+ s->scb->flags |= SC_FL_NOLINGER;
+ s->target = &ds->dss->srv->obj_type;
+ s->flags = SF_ASSIGNED;
+
+ s->do_log = NULL;
+ s->uniq_id = 0;
+
+ s->res.flags |= CF_READ_DONTWAIT;
+ /* for rto and rex to eternity to not expire on idle recv:
+ * We are using a syslog server.
+ */
+ s->res.rto = TICK_ETERNITY;
+ s->res.rex = TICK_ETERNITY;
+
+ ds->appctx = appctx;
+ return 0;
+
+ error:
+ return -1;
+}
+
+/*
+ * Function to release a DNS tcp session
+ */
+static void dns_session_release(struct appctx *appctx)
+{
+ struct dns_session *ds = appctx->svcctx;
+ struct dns_stream_server *dss __maybe_unused;
+
+ if (!ds)
+ return;
+
+ /* We do not call ring_appctx_detach here
+ * because we want to keep readers counters
+ * to retry a conn with a different appctx.
+ */
+ HA_RWLOCK_WRLOCK(DNS_LOCK, &ds->ring.lock);
+ LIST_DEL_INIT(&appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ds->ring.lock);
+
+ dss = ds->dss;
+
+ HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
+ LIST_DEL_INIT(&ds->list);
+
+ if (stopping) {
+ dns_session_free(ds);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+ return;
+ }
+
+ if (!ds->nb_queries) {
+ /* this is an idle session */
+ /* Note: this is useless to update max_active_sess
+ * here because we decrease idle_conns but
+ * dns_session_free decrease curconns
+ */
+
+ ds->dss->idle_conns--;
+ dns_session_free(ds);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+ return;
+ }
+
+ if (ds->onfly_queries == ds->nb_queries) {
+ /* the session can be released because
+ * it means that all queries AND
+ * responses are in fly */
+ dns_session_free(ds);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+ return;
+ }
+
+ /* if there is no pending complete response
+ * message, ensure to reset
+ * message offsets if the session
+ * was closed with an incomplete pending response
+ */
+ if (!LIST_INLIST(&ds->waiter))
+ ds->rx_msg.len = ds->rx_msg.offset = 0;
+
+ /* we flush pending sent queries because we never
+ * have responses
+ */
+ ds->nb_queries -= ds->onfly_queries;
+ dns_queries_flush(ds);
+
+ /* reset offset to be sure to start from message start */
+ ds->tx_msg_offset = 0;
+
+ /* here the ofs and the attached counter
+ * are kept unchanged
+ */
+
+ /* Create a new appctx, We hope we can
+ * create from the release callback! */
+ ds->appctx = dns_session_create(ds);
+ if (!ds->appctx) {
+ dns_session_free(ds);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+ return;
+ }
+
+ if (ds->nb_queries < DNS_STREAM_MAX_PIPELINED_REQ)
+ LIST_INSERT(&ds->dss->free_sess, &ds->list);
+
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+}
+
+/* DNS tcp session applet */
+static struct applet dns_session_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<STRMDNS>", /* used for logging */
+ .fct = dns_session_io_handler,
+ .init = dns_session_init,
+ .release = dns_session_release,
+};
+
+/*
+ * Function used to create an appctx for a DNS session
+ * It sets its context into appctx->svcctx.
+ */
+static struct appctx *dns_session_create(struct dns_session *ds)
+{
+ struct appctx *appctx;
+
+ appctx = appctx_new_here(&dns_session_applet, NULL);
+ if (!appctx)
+ goto out_close;
+ appctx->svcctx = (void *)ds;
+
+ if (appctx_init(appctx) == -1) {
+ ha_alert("out of memory in dns_session_create().\n");
+ goto out_free_appctx;
+ }
+
+ return appctx;
+
+ /* Error unrolling */
+ out_free_appctx:
+ appctx_free_on_early_error(appctx);
+ out_close:
+ return NULL;
+}
+
+/* Task processing expiration of unresponded queries, this one is supposed
+ * to be stuck on the same thread than the appctx handler
+ */
+static struct task *dns_process_query_exp(struct task *t, void *context, unsigned int state)
+{
+ struct dns_session *ds = (struct dns_session *)context;
+ struct dns_query *query, *queryb;
+
+ t->expire = TICK_ETERNITY;
+
+ list_for_each_entry_safe(query, queryb, &ds->queries, list) {
+ if (tick_is_expired(query->expire, now_ms)) {
+ eb32_delete(&query->qid);
+ LIST_DELETE(&query->list);
+ pool_free(dns_query_pool, query);
+ ds->onfly_queries--;
+ }
+ else {
+ t->expire = query->expire;
+ break;
+ }
+ }
+
+ return t;
+}
+
+/* Task processing expiration of idle sessions */
+static struct task *dns_process_idle_exp(struct task *t, void *context, unsigned int state)
+{
+ struct dns_stream_server *dss = (struct dns_stream_server *)context;
+ struct dns_session *ds, *dsb;
+ int target = 0;
+ int cur_active_conns;
+
+ HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
+
+
+ cur_active_conns = dss->cur_conns - dss->idle_conns;
+ if (cur_active_conns > dss->max_active_conns)
+ dss->max_active_conns = cur_active_conns;
+
+ target = (dss->max_active_conns - cur_active_conns) / 2;
+ list_for_each_entry_safe(ds, dsb, &dss->idle_sess, list) {
+ if (!target)
+ break;
+
+ /* remove conn to pending list to ensure it won't be reused */
+ LIST_DEL_INIT(&ds->list);
+
+ /* force session shutdown */
+ ds->shutdown = 1;
+
+ /* to be sure that the appctx won't miss shutdown */
+ __ha_barrier_store();
+
+ /* wake appctx to perform the shutdown */
+ appctx_wakeup(ds->appctx);
+ }
+
+ /* reset max to current active conns */
+ dss->max_active_conns = cur_active_conns;
+
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+
+ t->expire = tick_add(now_ms, 5000);
+
+ return t;
+}
+
+struct dns_session *dns_session_new(struct dns_stream_server *dss)
+{
+ struct dns_session *ds;
+
+ if (dss->maxconn && (dss->maxconn <= dss->cur_conns))
+ return NULL;
+
+ ds = pool_zalloc(dns_session_pool);
+ if (!ds)
+ return NULL;
+
+ ds->ofs = ~0;
+ ds->dss = dss;
+ LIST_INIT(&ds->list);
+ LIST_INIT(&ds->queries);
+ LIST_INIT(&ds->waiter);
+ ds->rx_msg.offset = ds->rx_msg.len = 0;
+ ds->rx_msg.area = NULL;
+ ds->tx_ring_area = NULL;
+ ds->task_exp = NULL;
+ ds->appctx = NULL;
+ ds->shutdown = 0;
+ ds->nb_queries = 0;
+ ds->query_ids = EB_ROOT_UNIQUE;
+ ds->rx_msg.area = pool_alloc(dns_msg_buf);
+ if (!ds->rx_msg.area)
+ goto error;
+
+ ds->tx_ring_area = pool_alloc(dns_msg_buf);
+ if (!ds->tx_ring_area)
+ goto error;
+
+ ring_init(&ds->ring, ds->tx_ring_area, DNS_TCP_MSG_RING_MAX_SIZE);
+ /* never fail because it is the first watcher attached to the ring */
+ DISGUISE(ring_attach(&ds->ring));
+
+ if ((ds->task_exp = task_new_here()) == NULL)
+ goto error;
+
+ ds->task_exp->process = dns_process_query_exp;
+ ds->task_exp->context = ds;
+
+ ds->appctx = dns_session_create(ds);
+ if (!ds->appctx)
+ goto error;
+
+ dss->cur_conns++;
+
+ return ds;
+
+error:
+ if (ds->task_exp)
+ task_destroy(ds->task_exp);
+ if (ds->rx_msg.area)
+ pool_free(dns_msg_buf, ds->rx_msg.area);
+ if (ds->tx_ring_area)
+ pool_free(dns_msg_buf, ds->tx_ring_area);
+
+ pool_free(dns_session_pool, ds);
+
+ return NULL;
+}
+
+/*
+ * Task used to consume pending messages from nameserver ring
+ * and forward them to dns_session ring.
+ * Note: If no slot found a new dns_session is allocated
+ */
+static struct task *dns_process_req(struct task *t, void *context, unsigned int state)
+{
+ struct dns_nameserver *ns = (struct dns_nameserver *)context;
+ struct dns_stream_server *dss = ns->stream;
+ struct ring *ring = dss->ring_req;
+ struct buffer *buf = &ring->buf;
+ uint64_t msg_len;
+ size_t len, cnt, ofs;
+ struct dns_session *ds, *ads;
+ HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
+
+ ofs = dss->ofs_req;
+
+ HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
+
+ /* explanation for the initialization below: it would be better to do
+ * this in the parsing function but this would occasionally result in
+ * dropped events because we'd take a reference on the oldest message
+ * and keep it while being scheduled. Thus instead let's take it the
+ * first time we enter here so that we have a chance to pass many
+ * existing messages before grabbing a reference to a location. This
+ * value cannot be produced after initialization.
+ */
+ if (unlikely(ofs == ~0)) {
+ ofs = 0;
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ ofs += ring->ofs;
+ }
+
+ /* we were already there, adjust the offset to be relative to
+ * the buffer's head and remove us from the counter.
+ */
+ ofs -= ring->ofs;
+ BUG_ON(ofs >= buf->size);
+ HA_ATOMIC_DEC(b_peek(buf, ofs));
+
+ while (ofs + 1 < b_data(buf)) {
+ struct ist myist;
+
+ cnt = 1;
+ len = b_peek_varint(buf, ofs + cnt, &msg_len);
+ if (!len)
+ break;
+ cnt += len;
+ BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
+ if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
+ /* too large a message to ever fit, let's skip it */
+ ofs += cnt + msg_len;
+ continue;
+ }
+
+ len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
+
+ myist = ist2(dns_msg_trash, len);
+
+ ads = NULL;
+ /* try to push request into active sess with free slot */
+ if (!LIST_ISEMPTY(&dss->free_sess)) {
+ ds = LIST_NEXT(&dss->free_sess, struct dns_session *, list);
+
+ if (ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1) > 0) {
+ ds->nb_queries++;
+ if (ds->nb_queries >= DNS_STREAM_MAX_PIPELINED_REQ)
+ LIST_DEL_INIT(&ds->list);
+ ads = ds;
+ }
+ else {
+ /* it means we were unable to put a request in this slot,
+ * it may be close to be full so we put it at the end
+ * of free conn list */
+ LIST_DEL_INIT(&ds->list);
+ LIST_APPEND(&dss->free_sess, &ds->list);
+ }
+ }
+
+ if (!ads) {
+ /* try to push request into idle, this one should have enough free space */
+ if (!LIST_ISEMPTY(&dss->idle_sess)) {
+ ds = LIST_NEXT(&dss->idle_sess, struct dns_session *, list);
+
+ /* ring is empty so this ring_write should never fail */
+ ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
+ ds->nb_queries++;
+ LIST_DEL_INIT(&ds->list);
+
+ ds->dss->idle_conns--;
+
+ /* we may have to update the max_active_conns */
+ if (ds->dss->max_active_conns < ds->dss->cur_conns - ds->dss->idle_conns)
+ ds->dss->max_active_conns = ds->dss->cur_conns - ds->dss->idle_conns;
+
+ /* since we may unable to find a free list to handle
+ * this request, this request may be large and fill
+ * the ring buffer so we prefer to put at the end of free
+ * list. */
+ LIST_APPEND(&dss->free_sess, &ds->list);
+ ads = ds;
+ }
+ }
+
+ /* we didn't find a session available with large enough room */
+ if (!ads) {
+ /* allocate a new session */
+ ads = dns_session_new(dss);
+ if (ads) {
+ /* ring is empty so this ring_write should never fail */
+ ring_write(&ads->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
+ ads->nb_queries++;
+ LIST_INSERT(&dss->free_sess, &ads->list);
+ }
+ else
+ ns->counters->snd_error++;
+ }
+
+ if (ads)
+ ns->counters->sent++;
+
+ ofs += cnt + len;
+ }
+
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ ofs += ring->ofs;
+ dss->ofs_req = ofs;
+ HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
+
+
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+ return t;
+}
+
+/*
+ * Task used to consume response
+ * Note: upper layer callback is called
+ */
+static struct task *dns_process_rsp(struct task *t, void *context, unsigned int state)
+{
+ struct dns_nameserver *ns = (struct dns_nameserver *)context;
+
+ ns->process_responses(ns);
+
+ return t;
+}
+
+/* Function used to initialize an TCP nameserver */
+int dns_stream_init(struct dns_nameserver *ns, struct server *srv)
+{
+ struct dns_stream_server *dss = NULL;
+
+ dss = calloc(1, sizeof(*dss));
+ if (!dss) {
+ ha_alert("memory allocation error initializing dns tcp server '%s'.\n", srv->id);
+ goto out;
+ }
+
+ dss->srv = srv;
+ dss->maxconn = srv->maxconn;
+
+ dss->ofs_req = ~0; /* init ring offset */
+ dss->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
+ if (!dss->ring_req) {
+ ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
+ goto out;
+ }
+ /* Create the task associated to the resolver target handling conns */
+ if ((dss->task_req = task_new_anywhere()) == NULL) {
+ ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
+ goto out;
+ }
+
+ /* Update task's parameters */
+ dss->task_req->process = dns_process_req;
+ dss->task_req->context = ns;
+
+ /* attach the task as reader */
+ if (!ring_attach(dss->ring_req)) {
+ /* mark server attached to the ring */
+ ha_alert("server '%s': too many watchers for ring. this should never happen.\n", srv->id);
+ goto out;
+ }
+
+ /* Create the task associated to the resolver target handling conns */
+ if ((dss->task_rsp = task_new_anywhere()) == NULL) {
+ ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
+ goto out;
+ }
+
+ /* Update task's parameters */
+ dss->task_rsp->process = dns_process_rsp;
+ dss->task_rsp->context = ns;
+
+ /* Create the task associated to the resolver target handling conns */
+ if ((dss->task_idle = task_new_anywhere()) == NULL) {
+ ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
+ goto out;
+ }
+
+ /* Update task's parameters */
+ dss->task_idle->process = dns_process_idle_exp;
+ dss->task_idle->context = dss;
+ dss->task_idle->expire = tick_add(now_ms, 5000);
+
+ /* let start the task to free idle conns immediately */
+ task_queue(dss->task_idle);
+
+ LIST_INIT(&dss->free_sess);
+ LIST_INIT(&dss->idle_sess);
+ LIST_INIT(&dss->wait_sess);
+ HA_SPIN_INIT(&dss->lock);
+ ns->stream = dss;
+ return 0;
+out:
+ if (dss && dss->task_rsp)
+ task_destroy(dss->task_rsp);
+ if (dss && dss->task_req)
+ task_destroy(dss->task_req);
+ if (dss && dss->ring_req)
+ ring_free(dss->ring_req);
+
+ free(dss);
+ return -1;
+}
+
+int init_dns_buffers()
+{
+ dns_msg_trash = malloc(DNS_TCP_MSG_MAX_SIZE);
+ if (!dns_msg_trash)
+ return 0;
+
+ return 1;
+}
+
+void deinit_dns_buffers()
+{
+ ha_free(&dns_msg_trash);
+}
+
+REGISTER_PER_THREAD_ALLOC(init_dns_buffers);
+REGISTER_PER_THREAD_FREE(deinit_dns_buffers);
diff --git a/src/dynbuf.c b/src/dynbuf.c
new file mode 100644
index 0000000..0b12c75
--- /dev/null
+++ b/src/dynbuf.c
@@ -0,0 +1,129 @@
+/*
+ * Buffer management functions.
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/pool.h>
+
+struct pool_head *pool_head_buffer __read_mostly;
+
+/* perform minimal intializations, report 0 in case of error, 1 if OK. */
+int init_buffer()
+{
+ void *buffer;
+ int thr;
+ int done;
+
+ pool_head_buffer = create_pool("buffer", global.tune.bufsize, MEM_F_SHARED|MEM_F_EXACT);
+ if (!pool_head_buffer)
+ return 0;
+
+ for (thr = 0; thr < MAX_THREADS; thr++)
+ LIST_INIT(&ha_thread_ctx[thr].buffer_wq);
+
+
+ /* The reserved buffer is what we leave behind us. Thus we always need
+ * at least one extra buffer in minavail otherwise we'll end up waking
+ * up tasks with no memory available, causing a lot of useless wakeups.
+ * That means that we always want to have at least 3 buffers available
+ * (2 for current session, one for next session that might be needed to
+ * release a server connection).
+ */
+ pool_head_buffer->minavail = MAX(global.tune.reserved_bufs, 3);
+ if (global.tune.buf_limit)
+ pool_head_buffer->limit = global.tune.buf_limit;
+
+ for (done = 0; done < pool_head_buffer->minavail - 1; done++) {
+ buffer = pool_alloc_nocache(pool_head_buffer);
+ if (!buffer)
+ return 0;
+ pool_free(pool_head_buffer, buffer);
+ }
+ return 1;
+}
+
+/*
+ * Dumps part or all of a buffer.
+ */
+void buffer_dump(FILE *o, struct buffer *b, int from, int to)
+{
+ fprintf(o, "Dumping buffer %p\n", b);
+ fprintf(o, " orig=%p size=%u head=%u tail=%u data=%u\n",
+ b_orig(b), (unsigned int)b_size(b), (unsigned int)b_head_ofs(b), (unsigned int)b_tail_ofs(b), (unsigned int)b_data(b));
+
+ fprintf(o, "Dumping contents from byte %d to byte %d\n", from, to);
+ fprintf(o, " 0 1 2 3 4 5 6 7 8 9 a b c d e f\n");
+ /* dump hexa */
+ while (from < to) {
+ int i;
+
+ fprintf(o, " %04x: ", from);
+ for (i = 0; ((from + i) < to) && (i < 16) ; i++) {
+ fprintf(o, "%02x ", (unsigned char)b_orig(b)[from + i]);
+ if (i == 7)
+ fprintf(o, "- ");
+ }
+ if (to - from < 16) {
+ int j = 0;
+
+ for (j = 0; j < from + 16 - to; j++)
+ fprintf(o, " ");
+ if (j > 8)
+ fprintf(o, " ");
+ }
+ fprintf(o, " ");
+ for (i = 0; (from + i < to) && (i < 16) ; i++) {
+ fprintf(o, "%c", isprint((unsigned char)b_orig(b)[from + i]) ? b_orig(b)[from + i] : '.') ;
+ if ((i == 15) && ((from + i) != to-1))
+ fprintf(o, "\n");
+ }
+ from += i;
+ }
+ fprintf(o, "\n--\n");
+ fflush(o);
+}
+
+/* see offer_buffers() for details */
+void __offer_buffers(void *from, unsigned int count)
+{
+ struct buffer_wait *wait, *wait_back;
+
+ /* For now, we consider that all objects need 1 buffer, so we can stop
+ * waking up them once we have enough of them to eat all the available
+ * buffers. Note that we don't really know if they are streams or just
+ * other tasks, but that's a rough estimate. Similarly, for each cached
+ * event we'll need 1 buffer.
+ */
+ list_for_each_entry_safe(wait, wait_back, &th_ctx->buffer_wq, list) {
+ if (!count)
+ break;
+
+ if (wait->target == from || !wait->wakeup_cb(wait->target))
+ continue;
+
+ LIST_DEL_INIT(&wait->list);
+ count--;
+ }
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/eb32sctree.c b/src/eb32sctree.c
new file mode 100644
index 0000000..af6a539
--- /dev/null
+++ b/src/eb32sctree.c
@@ -0,0 +1,472 @@
+/*
+ * Elastic Binary Trees - exported functions for operations on 32bit nodes.
+ * Version 6.0.6 with backports from v7-dev
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult eb32sctree.h for more details about those functions */
+
+#include <import/eb32sctree.h>
+
+
+/* This function is used to build a tree of duplicates by adding a new node to
+ * a subtree of at least 2 entries.
+ */
+struct eb32sc_node *eb32sc_insert_dup(struct eb_node *sub, struct eb_node *new, unsigned long scope)
+{
+ struct eb32sc_node *eb32;
+ struct eb_node *head = sub;
+ eb_troot_t *new_left = eb_dotag(&new->branches, EB_LEFT);
+ eb_troot_t *new_rght = eb_dotag(&new->branches, EB_RGHT);
+ eb_troot_t *new_leaf = eb_dotag(&new->branches, EB_LEAF);
+
+ /* first, identify the deepest hole on the right branch */
+ while (eb_gettag(head->branches.b[EB_RGHT]) != EB_LEAF) {
+ struct eb_node *last = head;
+
+ head = container_of(eb_untag(head->branches.b[EB_RGHT], EB_NODE),
+ struct eb_node, branches);
+
+ if (unlikely(head->bit > last->bit + 1)) {
+ /* there's a hole here, we must assign the top of the
+ * following sub-tree to <sub> and mark all intermediate
+ * nodes with the scope mask.
+ */
+ do {
+ eb32 = container_of(sub, struct eb32sc_node, node);
+ if (!(eb32->node_s & scope))
+ eb32->node_s |= scope;
+
+ sub = container_of(eb_untag(sub->branches.b[EB_RGHT], EB_NODE),
+ struct eb_node, branches);
+ } while (sub != head);
+ }
+
+ eb32 = container_of(head, struct eb32sc_node, node);
+ if (!(eb32->node_s & scope))
+ eb32->node_s |= scope;
+ }
+
+ /* Here we have a leaf attached to (head)->b[EB_RGHT] */
+ if (head->bit < -1) {
+ /* A hole exists just before the leaf, we insert there */
+ new->bit = -1;
+ sub = container_of(eb_untag(head->branches.b[EB_RGHT], EB_LEAF),
+ struct eb_node, branches);
+ head->branches.b[EB_RGHT] = eb_dotag(&new->branches, EB_NODE);
+
+ new->node_p = sub->leaf_p;
+ new->leaf_p = new_rght;
+ sub->leaf_p = new_left;
+ new->branches.b[EB_LEFT] = eb_dotag(&sub->branches, EB_LEAF);
+ new->branches.b[EB_RGHT] = new_leaf;
+ eb32 = container_of(new, struct eb32sc_node, node);
+ eb32->node_s = container_of(sub, struct eb32sc_node, node)->leaf_s | scope;
+ return eb32;
+ } else {
+ int side;
+ /* No hole was found before a leaf. We have to insert above
+ * <sub>. Note that we cannot be certain that <sub> is attached
+ * to the right of its parent, as this is only true if <sub>
+ * is inside the dup tree, not at the head.
+ */
+ new->bit = sub->bit - 1; /* install at the lowest level */
+ side = eb_gettag(sub->node_p);
+ head = container_of(eb_untag(sub->node_p, side), struct eb_node, branches);
+ head->branches.b[side] = eb_dotag(&new->branches, EB_NODE);
+
+ new->node_p = sub->node_p;
+ new->leaf_p = new_rght;
+ sub->node_p = new_left;
+ new->branches.b[EB_LEFT] = eb_dotag(&sub->branches, EB_NODE);
+ new->branches.b[EB_RGHT] = new_leaf;
+ eb32 = container_of(new, struct eb32sc_node, node);
+ eb32->node_s = container_of(sub, struct eb32sc_node, node)->node_s | scope;
+ return eb32;
+ }
+}
+
+/* Insert eb32sc_node <new> into subtree starting at node root <root>. Only
+ * new->key needs be set with the key. The eb32sc_node is returned. This
+ * implementation does NOT support unique trees.
+ */
+struct eb32sc_node *eb32sc_insert(struct eb_root *root, struct eb32sc_node *new, unsigned long scope)
+{
+ struct eb32sc_node *old;
+ unsigned int side;
+ eb_troot_t *troot, **up_ptr;
+ u32 newkey; /* caching the key saves approximately one cycle */
+ eb_troot_t *new_left, *new_rght;
+ eb_troot_t *new_leaf;
+ int old_node_bit;
+ unsigned long old_scope;
+
+ side = EB_LEFT;
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL)) {
+ /* Tree is empty, insert the leaf part below the left branch */
+ root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
+ new->node.leaf_p = eb_dotag(root, EB_LEFT);
+ new->node.node_p = NULL; /* node part unused */
+ new->node_s = scope;
+ new->leaf_s = scope;
+ return new;
+ }
+
+ /* The tree descent is fairly easy :
+ * - first, check if we have reached a leaf node
+ * - second, check if we have gone too far
+ * - third, reiterate
+ * Everywhere, we use <new> for the node node we are inserting, <root>
+ * for the node we attach it to, and <old> for the node we are
+ * displacing below <new>. <troot> will always point to the future node
+ * (tagged with its type). <side> carries the side the node <new> is
+ * attached to below its parent, which is also where previous node
+ * was attached. <newkey> carries the key being inserted.
+ */
+ newkey = new->key;
+
+ while (1) {
+ if (eb_gettag(troot) == EB_LEAF) {
+ /* insert above a leaf */
+ old = container_of(eb_untag(troot, EB_LEAF),
+ struct eb32sc_node, node.branches);
+ new->node.node_p = old->node.leaf_p;
+ up_ptr = &old->node.leaf_p;
+ old_scope = old->leaf_s;
+ break;
+ }
+
+ /* OK we're walking down this link */
+ old = container_of(eb_untag(troot, EB_NODE),
+ struct eb32sc_node, node.branches);
+ old_node_bit = old->node.bit;
+
+ /* our new node will be found through this one, we must mark it */
+ if ((old->node_s | scope) != old->node_s)
+ old->node_s |= scope;
+
+ /* Stop going down when we don't have common bits anymore. We
+ * also stop in front of a duplicates tree because it means we
+ * have to insert above.
+ */
+
+ if ((old_node_bit < 0) || /* we're above a duplicate tree, stop here */
+ (((new->key ^ old->key) >> old_node_bit) >= EB_NODE_BRANCHES)) {
+ /* The tree did not contain the key, so we insert <new> before the node
+ * <old>, and set ->bit to designate the lowest bit position in <new>
+ * which applies to ->branches.b[].
+ */
+ new->node.node_p = old->node.node_p;
+ up_ptr = &old->node.node_p;
+ old_scope = old->node_s;
+ break;
+ }
+
+ /* walk down */
+ root = &old->node.branches;
+ side = (newkey >> old_node_bit) & EB_NODE_BRANCH_MASK;
+ troot = root->b[side];
+ }
+
+ new_left = eb_dotag(&new->node.branches, EB_LEFT);
+ new_rght = eb_dotag(&new->node.branches, EB_RGHT);
+ new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
+
+ /* We need the common higher bits between new->key and old->key.
+ * What differences are there between new->key and the node here ?
+ * NOTE that bit(new) is always < bit(root) because highest
+ * bit of new->key and old->key are identical here (otherwise they
+ * would sit on different branches).
+ */
+
+ // note that if EB_NODE_BITS > 1, we should check that it's still >= 0
+ new->node.bit = flsnz(new->key ^ old->key) - EB_NODE_BITS;
+ new->leaf_s = scope;
+ new->node_s = old_scope | scope;
+
+ if (new->key == old->key) {
+ new->node.bit = -1; /* mark as new dup tree, just in case */
+
+ if (eb_gettag(troot) != EB_LEAF) {
+ /* there was already a dup tree below */
+ return eb32sc_insert_dup(&old->node, &new->node, scope);
+ }
+ /* otherwise fall through */
+ }
+
+ if (new->key >= old->key) {
+ new->node.branches.b[EB_LEFT] = troot;
+ new->node.branches.b[EB_RGHT] = new_leaf;
+ new->node.leaf_p = new_rght;
+ *up_ptr = new_left;
+ }
+ else {
+ new->node.branches.b[EB_LEFT] = new_leaf;
+ new->node.branches.b[EB_RGHT] = troot;
+ new->node.leaf_p = new_left;
+ *up_ptr = new_rght;
+ }
+
+ /* Ok, now we are inserting <new> between <root> and <old>. <old>'s
+ * parent is already set to <new>, and the <root>'s branch is still in
+ * <side>. Update the root's leaf till we have it. Note that we can also
+ * find the side by checking the side of new->node.node_p.
+ */
+
+ root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
+ return new;
+}
+
+/*
+ * Find the first occurrence of the lowest key in the tree <root>, which is
+ * equal to or greater than <x>. NULL is returned is no key matches.
+ */
+struct eb32sc_node *eb32sc_lookup_ge(struct eb_root *root, u32 x, unsigned long scope)
+{
+ struct eb32sc_node *node;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ node = container_of(eb_untag(troot, EB_LEAF),
+ struct eb32sc_node, node.branches);
+ if ((node->leaf_s & scope) && node->key >= x)
+ return node;
+ /* return next */
+ troot = node->node.leaf_p;
+ break;
+ }
+ node = container_of(eb_untag(troot, EB_NODE),
+ struct eb32sc_node, node.branches);
+
+ if (node->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the leftmost node, or
+ * we don't and we skip the whole subtree to return the
+ * next node after the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * next node without first trying to escape from the
+ * tree.
+ */
+ if ((node->node_s & scope) && node->key >= x)
+ troot = eb_dotag(&node->node.branches, EB_LEFT);
+ else
+ troot = node->node.node_p;
+ break;
+ }
+
+ if (((x ^ node->key) >> node->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * large and we need to get its lowest value, or it is too
+ * small, and we need to get the next value.
+ */
+ if ((node->node_s & scope) && (node->key >> node->node.bit) > (x >> node->node.bit))
+ troot = eb_dotag(&node->node.branches, EB_LEFT);
+ else
+ troot = node->node.node_p;
+ break;
+ }
+ troot = node->node.branches.b[(x >> node->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report next node after the
+ * current one which is not below. <troot> is already initialised
+ * to the parent's branches.
+ */
+ return eb32sc_next_with_parent(troot, scope);
+}
+
+/*
+ * Find the first occurrence of the lowest key in the tree <root> which is
+ * equal to or greater than <x>, matching scope <scope>. If not found, it loops
+ * back to the beginning of the tree. NULL is returned is no key matches.
+ */
+struct eb32sc_node *eb32sc_lookup_ge_or_first(struct eb_root *root, u32 x, unsigned long scope)
+{
+ struct eb32sc_node *eb32;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ eb32 = container_of(eb_untag(troot, EB_LEAF),
+ struct eb32sc_node, node.branches);
+ if ((eb32->leaf_s & scope) && eb32->key >= x)
+ return eb32;
+ /* return next */
+ troot = eb32->node.leaf_p;
+ break;
+ }
+ eb32 = container_of(eb_untag(troot, EB_NODE),
+ struct eb32sc_node, node.branches);
+
+ if (eb32->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the leftmost node, or
+ * we don't and we skip the whole subtree to return the
+ * next node after the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * next node without first trying to escape from the
+ * tree.
+ */
+ if ((eb32->node_s & scope) && eb32->key >= x)
+ troot = eb_dotag(&eb32->node.branches, EB_LEFT);
+ else
+ troot = eb32->node.node_p;
+ break;
+ }
+
+ if (((x ^ eb32->key) >> eb32->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * large and we need to get its lowest value, or it is too
+ * small, and we need to get the next value.
+ */
+ if ((eb32->node_s & scope) && (eb32->key >> eb32->node.bit) > (x >> eb32->node.bit))
+ troot = eb_dotag(&eb32->node.branches, EB_LEFT);
+ else
+ troot = eb32->node.node_p;
+ break;
+ }
+ troot = eb32->node.branches.b[(x >> eb32->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report next node after the
+ * current one which is not below. <troot> is already initialised
+ * to the parent's branches.
+ */
+ eb32 = eb32sc_next_with_parent(troot, scope);
+ if (!eb32)
+ eb32 = eb32sc_walk_down_left(root->b[EB_LEFT], scope);
+
+ return eb32;
+}
+
+/* Removes a leaf node from the tree if it was still in it. Marks the node
+ * as unlinked.
+ */
+void eb32sc_delete(struct eb32sc_node *eb32)
+{
+ struct eb_node *node = &eb32->node;
+ unsigned int pside, gpside, sibtype;
+ struct eb_node *parent;
+ struct eb_root *gparent;
+ unsigned long scope;
+
+ if (!node->leaf_p)
+ return;
+
+ /* we need the parent, our side, and the grand parent */
+ pside = eb_gettag(node->leaf_p);
+ parent = eb_root_to_node(eb_untag(node->leaf_p, pside));
+
+ /* We likely have to release the parent link, unless it's the root,
+ * in which case we only set our branch to NULL. Note that we can
+ * only be attached to the root by its left branch.
+ */
+
+ if (eb_clrtag(parent->branches.b[EB_RGHT]) == NULL) {
+ /* we're just below the root, it's trivial. */
+ parent->branches.b[EB_LEFT] = NULL;
+ goto delete_unlink;
+ }
+
+ /* To release our parent, we have to identify our sibling, and reparent
+ * it directly to/from the grand parent. Note that the sibling can
+ * either be a link or a leaf.
+ */
+
+ gpside = eb_gettag(parent->node_p);
+ gparent = eb_untag(parent->node_p, gpside);
+
+ gparent->b[gpside] = parent->branches.b[!pside];
+ sibtype = eb_gettag(gparent->b[gpside]);
+
+ if (sibtype == EB_LEAF) {
+ eb_root_to_node(eb_untag(gparent->b[gpside], EB_LEAF))->leaf_p =
+ eb_dotag(gparent, gpside);
+ } else {
+ eb_root_to_node(eb_untag(gparent->b[gpside], EB_NODE))->node_p =
+ eb_dotag(gparent, gpside);
+ }
+ /* Mark the parent unused. Note that we do not check if the parent is
+ * our own node, but that's not a problem because if it is, it will be
+ * marked unused at the same time, which we'll use below to know we can
+ * safely remove it.
+ */
+ parent->node_p = NULL;
+
+ /* The parent node has been detached, and is currently unused. It may
+ * belong to another node, so we cannot remove it that way. Also, our
+ * own node part might still be used. so we can use this spare node
+ * to replace ours if needed.
+ */
+
+ /* If our link part is unused, we can safely exit now */
+ if (!node->node_p)
+ goto delete_unlink;
+
+ /* From now on, <node> and <parent> are necessarily different, and the
+ * <node>'s node part is in use. By definition, <parent> is at least
+ * below <node>, so keeping its key for the bit string is OK. However
+ * its scope must be enlarged to cover the new branch it absorbs.
+ */
+
+ parent->node_p = node->node_p;
+ parent->branches = node->branches;
+ parent->bit = node->bit;
+
+ /* We must now update the new node's parent... */
+ gpside = eb_gettag(parent->node_p);
+ gparent = eb_untag(parent->node_p, gpside);
+ gparent->b[gpside] = eb_dotag(&parent->branches, EB_NODE);
+
+ /* ... and its branches */
+ scope = 0;
+ for (pside = 0; pside <= 1; pside++) {
+ if (eb_gettag(parent->branches.b[pside]) == EB_NODE) {
+ eb_root_to_node(eb_untag(parent->branches.b[pside], EB_NODE))->node_p =
+ eb_dotag(&parent->branches, pside);
+ scope |= container_of(eb_untag(parent->branches.b[pside], EB_NODE), struct eb32sc_node, node.branches)->node_s;
+ } else {
+ eb_root_to_node(eb_untag(parent->branches.b[pside], EB_LEAF))->leaf_p =
+ eb_dotag(&parent->branches, pside);
+ scope |= container_of(eb_untag(parent->branches.b[pside], EB_LEAF), struct eb32sc_node, node.branches)->leaf_s;
+ }
+ }
+ container_of(parent, struct eb32sc_node, node)->node_s = scope;
+
+ delete_unlink:
+ /* Now the node has been completely unlinked */
+ node->leaf_p = NULL;
+ return; /* tree is not empty yet */
+}
diff --git a/src/eb32tree.c b/src/eb32tree.c
new file mode 100644
index 0000000..38ddab0
--- /dev/null
+++ b/src/eb32tree.c
@@ -0,0 +1,218 @@
+/*
+ * Elastic Binary Trees - exported functions for operations on 32bit nodes.
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult eb32tree.h for more details about those functions */
+
+#include <import/eb32tree.h>
+
+struct eb32_node *eb32_insert(struct eb_root *root, struct eb32_node *new)
+{
+ return __eb32_insert(root, new);
+}
+
+struct eb32_node *eb32i_insert(struct eb_root *root, struct eb32_node *new)
+{
+ return __eb32i_insert(root, new);
+}
+
+struct eb32_node *eb32_lookup(struct eb_root *root, u32 x)
+{
+ return __eb32_lookup(root, x);
+}
+
+struct eb32_node *eb32i_lookup(struct eb_root *root, s32 x)
+{
+ return __eb32i_lookup(root, x);
+}
+
+/*
+ * Find the last occurrence of the highest key in the tree <root>, which is
+ * equal to or less than <x>. NULL is returned is no key matches.
+ */
+struct eb32_node *eb32_lookup_le(struct eb_root *root, u32 x)
+{
+ struct eb32_node *node;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ node = container_of(eb_untag(troot, EB_LEAF),
+ struct eb32_node, node.branches);
+ if (node->key <= x)
+ return node;
+ /* return prev */
+ troot = node->node.leaf_p;
+ break;
+ }
+ node = container_of(eb_untag(troot, EB_NODE),
+ struct eb32_node, node.branches);
+
+ if (node->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the rightmost node, or
+ * we don't and we skip the whole subtree to return the
+ * prev node before the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * prev node without first trying to escape from the
+ * tree.
+ */
+ if (node->key <= x) {
+ troot = node->node.branches.b[EB_RGHT];
+ while (eb_gettag(troot) != EB_LEAF)
+ troot = (eb_untag(troot, EB_NODE))->b[EB_RGHT];
+ return container_of(eb_untag(troot, EB_LEAF),
+ struct eb32_node, node.branches);
+ }
+ /* return prev */
+ troot = node->node.node_p;
+ break;
+ }
+
+ if (((x ^ node->key) >> node->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * small and we need to get its highest value, or it is
+ * too large, and we need to get the prev value.
+ */
+ if ((node->key >> node->node.bit) < (x >> node->node.bit)) {
+ troot = node->node.branches.b[EB_RGHT];
+ return eb32_entry(eb_walk_down(troot, EB_RGHT), struct eb32_node, node);
+ }
+
+ /* Further values will be too high here, so return the prev
+ * unique node (if it exists).
+ */
+ troot = node->node.node_p;
+ break;
+ }
+ troot = node->node.branches.b[(x >> node->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report previous node before the
+ * current one which is not above. <troot> is already initialised to
+ * the parent's branches.
+ */
+ while (eb_gettag(troot) == EB_LEFT) {
+ /* Walking up from left branch. We must ensure that we never
+ * walk beyond root.
+ */
+ if (unlikely(eb_clrtag((eb_untag(troot, EB_LEFT))->b[EB_RGHT]) == NULL))
+ return NULL;
+ troot = (eb_root_to_node(eb_untag(troot, EB_LEFT)))->node_p;
+ }
+ /* Note that <troot> cannot be NULL at this stage */
+ troot = (eb_untag(troot, EB_RGHT))->b[EB_LEFT];
+ node = eb32_entry(eb_walk_down(troot, EB_RGHT), struct eb32_node, node);
+ return node;
+}
+
+/*
+ * Find the first occurrence of the lowest key in the tree <root>, which is
+ * equal to or greater than <x>. NULL is returned is no key matches.
+ */
+struct eb32_node *eb32_lookup_ge(struct eb_root *root, u32 x)
+{
+ struct eb32_node *node;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ node = container_of(eb_untag(troot, EB_LEAF),
+ struct eb32_node, node.branches);
+ if (node->key >= x)
+ return node;
+ /* return next */
+ troot = node->node.leaf_p;
+ break;
+ }
+ node = container_of(eb_untag(troot, EB_NODE),
+ struct eb32_node, node.branches);
+
+ if (node->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the leftmost node, or
+ * we don't and we skip the whole subtree to return the
+ * next node after the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * next node without first trying to escape from the
+ * tree.
+ */
+ if (node->key >= x) {
+ troot = node->node.branches.b[EB_LEFT];
+ while (eb_gettag(troot) != EB_LEAF)
+ troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
+ return container_of(eb_untag(troot, EB_LEAF),
+ struct eb32_node, node.branches);
+ }
+ /* return next */
+ troot = node->node.node_p;
+ break;
+ }
+
+ if (((x ^ node->key) >> node->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * large and we need to get its lowest value, or it is too
+ * small, and we need to get the next value.
+ */
+ if ((node->key >> node->node.bit) > (x >> node->node.bit)) {
+ troot = node->node.branches.b[EB_LEFT];
+ return eb32_entry(eb_walk_down(troot, EB_LEFT), struct eb32_node, node);
+ }
+
+ /* Further values will be too low here, so return the next
+ * unique node (if it exists).
+ */
+ troot = node->node.node_p;
+ break;
+ }
+ troot = node->node.branches.b[(x >> node->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report next node after the
+ * current one which is not below. <troot> is already initialised
+ * to the parent's branches.
+ */
+ while (eb_gettag(troot) != EB_LEFT)
+ /* Walking up from right branch, so we cannot be below root */
+ troot = (eb_root_to_node(eb_untag(troot, EB_RGHT)))->node_p;
+
+ /* Note that <troot> cannot be NULL at this stage */
+ troot = (eb_untag(troot, EB_LEFT))->b[EB_RGHT];
+ if (eb_clrtag(troot) == NULL)
+ return NULL;
+
+ node = eb32_entry(eb_walk_down(troot, EB_LEFT), struct eb32_node, node);
+ return node;
+}
diff --git a/src/eb64tree.c b/src/eb64tree.c
new file mode 100644
index 0000000..b908d4d
--- /dev/null
+++ b/src/eb64tree.c
@@ -0,0 +1,218 @@
+/*
+ * Elastic Binary Trees - exported functions for operations on 64bit nodes.
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult eb64tree.h for more details about those functions */
+
+#include <import/eb64tree.h>
+
+struct eb64_node *eb64_insert(struct eb_root *root, struct eb64_node *new)
+{
+ return __eb64_insert(root, new);
+}
+
+struct eb64_node *eb64i_insert(struct eb_root *root, struct eb64_node *new)
+{
+ return __eb64i_insert(root, new);
+}
+
+struct eb64_node *eb64_lookup(struct eb_root *root, u64 x)
+{
+ return __eb64_lookup(root, x);
+}
+
+struct eb64_node *eb64i_lookup(struct eb_root *root, s64 x)
+{
+ return __eb64i_lookup(root, x);
+}
+
+/*
+ * Find the last occurrence of the highest key in the tree <root>, which is
+ * equal to or less than <x>. NULL is returned is no key matches.
+ */
+struct eb64_node *eb64_lookup_le(struct eb_root *root, u64 x)
+{
+ struct eb64_node *node;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ node = container_of(eb_untag(troot, EB_LEAF),
+ struct eb64_node, node.branches);
+ if (node->key <= x)
+ return node;
+ /* return prev */
+ troot = node->node.leaf_p;
+ break;
+ }
+ node = container_of(eb_untag(troot, EB_NODE),
+ struct eb64_node, node.branches);
+
+ if (node->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the rightmost node, or
+ * we don't and we skip the whole subtree to return the
+ * prev node before the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * prev node without first trying to escape from the
+ * tree.
+ */
+ if (node->key <= x) {
+ troot = node->node.branches.b[EB_RGHT];
+ while (eb_gettag(troot) != EB_LEAF)
+ troot = (eb_untag(troot, EB_NODE))->b[EB_RGHT];
+ return container_of(eb_untag(troot, EB_LEAF),
+ struct eb64_node, node.branches);
+ }
+ /* return prev */
+ troot = node->node.node_p;
+ break;
+ }
+
+ if (((x ^ node->key) >> node->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * small and we need to get its highest value, or it is
+ * too large, and we need to get the prev value.
+ */
+ if ((node->key >> node->node.bit) < (x >> node->node.bit)) {
+ troot = node->node.branches.b[EB_RGHT];
+ return eb64_entry(eb_walk_down(troot, EB_RGHT), struct eb64_node, node);
+ }
+
+ /* Further values will be too high here, so return the prev
+ * unique node (if it exists).
+ */
+ troot = node->node.node_p;
+ break;
+ }
+ troot = node->node.branches.b[(x >> node->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report previous node before the
+ * current one which is not above. <troot> is already initialised to
+ * the parent's branches.
+ */
+ while (eb_gettag(troot) == EB_LEFT) {
+ /* Walking up from left branch. We must ensure that we never
+ * walk beyond root.
+ */
+ if (unlikely(eb_clrtag((eb_untag(troot, EB_LEFT))->b[EB_RGHT]) == NULL))
+ return NULL;
+ troot = (eb_root_to_node(eb_untag(troot, EB_LEFT)))->node_p;
+ }
+ /* Note that <troot> cannot be NULL at this stage */
+ troot = (eb_untag(troot, EB_RGHT))->b[EB_LEFT];
+ node = eb64_entry(eb_walk_down(troot, EB_RGHT), struct eb64_node, node);
+ return node;
+}
+
+/*
+ * Find the first occurrence of the lowest key in the tree <root>, which is
+ * equal to or greater than <x>. NULL is returned is no key matches.
+ */
+struct eb64_node *eb64_lookup_ge(struct eb_root *root, u64 x)
+{
+ struct eb64_node *node;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ node = container_of(eb_untag(troot, EB_LEAF),
+ struct eb64_node, node.branches);
+ if (node->key >= x)
+ return node;
+ /* return next */
+ troot = node->node.leaf_p;
+ break;
+ }
+ node = container_of(eb_untag(troot, EB_NODE),
+ struct eb64_node, node.branches);
+
+ if (node->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the leftmost node, or
+ * we don't and we skip the whole subtree to return the
+ * next node after the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * next node without first trying to escape from the
+ * tree.
+ */
+ if (node->key >= x) {
+ troot = node->node.branches.b[EB_LEFT];
+ while (eb_gettag(troot) != EB_LEAF)
+ troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
+ return container_of(eb_untag(troot, EB_LEAF),
+ struct eb64_node, node.branches);
+ }
+ /* return next */
+ troot = node->node.node_p;
+ break;
+ }
+
+ if (((x ^ node->key) >> node->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * large and we need to get its lowest value, or it is too
+ * small, and we need to get the next value.
+ */
+ if ((node->key >> node->node.bit) > (x >> node->node.bit)) {
+ troot = node->node.branches.b[EB_LEFT];
+ return eb64_entry(eb_walk_down(troot, EB_LEFT), struct eb64_node, node);
+ }
+
+ /* Further values will be too low here, so return the next
+ * unique node (if it exists).
+ */
+ troot = node->node.node_p;
+ break;
+ }
+ troot = node->node.branches.b[(x >> node->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report next node after the
+ * current one which is not below. <troot> is already initialised
+ * to the parent's branches.
+ */
+ while (eb_gettag(troot) != EB_LEFT)
+ /* Walking up from right branch, so we cannot be below root */
+ troot = (eb_root_to_node(eb_untag(troot, EB_RGHT)))->node_p;
+
+ /* Note that <troot> cannot be NULL at this stage */
+ troot = (eb_untag(troot, EB_LEFT))->b[EB_RGHT];
+ if (eb_clrtag(troot) == NULL)
+ return NULL;
+
+ node = eb64_entry(eb_walk_down(troot, EB_LEFT), struct eb64_node, node);
+ return node;
+}
diff --git a/src/ebimtree.c b/src/ebimtree.c
new file mode 100644
index 0000000..1ac444a
--- /dev/null
+++ b/src/ebimtree.c
@@ -0,0 +1,44 @@
+/*
+ * Elastic Binary Trees - exported functions for Indirect Multi-Byte data nodes.
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult ebimtree.h for more details about those functions */
+
+#include <import/ebimtree.h>
+#include <import/ebpttree.h>
+
+/* Find the first occurrence of a key of <len> bytes in the tree <root>.
+ * If none can be found, return NULL.
+ */
+struct ebpt_node *
+ebim_lookup(struct eb_root *root, const void *x, unsigned int len)
+{
+ return __ebim_lookup(root, x, len);
+}
+
+/* Insert ebpt_node <new> into subtree starting at node root <root>.
+ * Only new->key needs be set with the key. The ebpt_node is returned.
+ * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
+ * len is specified in bytes.
+ */
+struct ebpt_node *
+ebim_insert(struct eb_root *root, struct ebpt_node *new, unsigned int len)
+{
+ return __ebim_insert(root, new, len);
+}
diff --git a/src/ebistree.c b/src/ebistree.c
new file mode 100644
index 0000000..193950d
--- /dev/null
+++ b/src/ebistree.c
@@ -0,0 +1,42 @@
+/*
+ * Elastic Binary Trees - exported functions for Indirect String data nodes.
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult ebistree.h for more details about those functions */
+
+#include <import/ebistree.h>
+
+/* Find the first occurrence of a zero-terminated string <x> in the tree <root>.
+ * It's the caller's responsibility to use this function only on trees which
+ * only contain zero-terminated strings. If none can be found, return NULL.
+ */
+struct ebpt_node *ebis_lookup(struct eb_root *root, const char *x)
+{
+ return __ebis_lookup(root, x);
+}
+
+/* Insert ebpt_node <new> into subtree starting at node root <root>. Only
+ * new->key needs be set with the zero-terminated string key. The ebpt_node is
+ * returned. If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
+ * caller is responsible for properly terminating the key with a zero.
+ */
+struct ebpt_node *ebis_insert(struct eb_root *root, struct ebpt_node *new)
+{
+ return __ebis_insert(root, new);
+}
diff --git a/src/ebmbtree.c b/src/ebmbtree.c
new file mode 100644
index 0000000..a3de9a1
--- /dev/null
+++ b/src/ebmbtree.c
@@ -0,0 +1,77 @@
+/*
+ * Elastic Binary Trees - exported functions for Multi-Byte data nodes.
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult ebmbtree.h for more details about those functions */
+
+#include <import/ebmbtree.h>
+
+/* Find the first occurrence of a key of <len> bytes in the tree <root>.
+ * If none can be found, return NULL.
+ */
+struct ebmb_node *
+ebmb_lookup(struct eb_root *root, const void *x, unsigned int len)
+{
+ return __ebmb_lookup(root, x, len);
+}
+
+/* Insert ebmb_node <new> into subtree starting at node root <root>.
+ * Only new->key needs be set with the key. The ebmb_node is returned.
+ * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
+ * len is specified in bytes.
+ */
+struct ebmb_node *
+ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len)
+{
+ return __ebmb_insert(root, new, len);
+}
+
+/* Find the first occurrence of the longest prefix matching a key <x> in the
+ * tree <root>. It's the caller's responsibility to ensure that key <x> is at
+ * least as long as the keys in the tree. If none can be found, return NULL.
+ */
+struct ebmb_node *
+ebmb_lookup_longest(struct eb_root *root, const void *x)
+{
+ return __ebmb_lookup_longest(root, x);
+}
+
+/* Find the first occurrence of a prefix matching a key <x> of <pfx> BITS in the
+ * tree <root>. If none can be found, return NULL.
+ */
+struct ebmb_node *
+ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx)
+{
+ return __ebmb_lookup_prefix(root, x, pfx);
+}
+
+/* Insert ebmb_node <new> into a prefix subtree starting at node root <root>.
+ * Only new->key and new->pfx need be set with the key and its prefix length.
+ * Note that bits between <pfx> and <len> are theoretically ignored and should be
+ * zero, as it is not certain yet that they will always be ignored everywhere
+ * (eg in bit compare functions).
+ * The ebmb_node is returned.
+ * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
+ * len is specified in bytes.
+ */
+struct ebmb_node *
+ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len)
+{
+ return __ebmb_insert_prefix(root, new, len);
+}
diff --git a/src/ebpttree.c b/src/ebpttree.c
new file mode 100644
index 0000000..558d334
--- /dev/null
+++ b/src/ebpttree.c
@@ -0,0 +1,208 @@
+/*
+ * Elastic Binary Trees - exported functions for operations on pointer nodes.
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult ebpttree.h for more details about those functions */
+
+#include <import/ebpttree.h>
+
+struct ebpt_node *ebpt_insert(struct eb_root *root, struct ebpt_node *new)
+{
+ return __ebpt_insert(root, new);
+}
+
+struct ebpt_node *ebpt_lookup(struct eb_root *root, void *x)
+{
+ return __ebpt_lookup(root, x);
+}
+
+/*
+ * Find the last occurrence of the highest key in the tree <root>, which is
+ * equal to or less than <x>. NULL is returned is no key matches.
+ */
+struct ebpt_node *ebpt_lookup_le(struct eb_root *root, void *x)
+{
+ struct ebpt_node *node;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ node = container_of(eb_untag(troot, EB_LEAF),
+ struct ebpt_node, node.branches);
+ if (node->key <= x)
+ return node;
+ /* return prev */
+ troot = node->node.leaf_p;
+ break;
+ }
+ node = container_of(eb_untag(troot, EB_NODE),
+ struct ebpt_node, node.branches);
+
+ if (node->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the rightmost node, or
+ * we don't and we skip the whole subtree to return the
+ * prev node before the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * prev node without first trying to escape from the
+ * tree.
+ */
+ if (node->key <= x) {
+ troot = node->node.branches.b[EB_RGHT];
+ while (eb_gettag(troot) != EB_LEAF)
+ troot = (eb_untag(troot, EB_NODE))->b[EB_RGHT];
+ return container_of(eb_untag(troot, EB_LEAF),
+ struct ebpt_node, node.branches);
+ }
+ /* return prev */
+ troot = node->node.node_p;
+ break;
+ }
+
+ if ((((ptr_t)x ^ (ptr_t)node->key) >> node->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * small and we need to get its highest value, or it is
+ * too large, and we need to get the prev value.
+ */
+ if (((ptr_t)node->key >> node->node.bit) < ((ptr_t)x >> node->node.bit)) {
+ troot = node->node.branches.b[EB_RGHT];
+ return ebpt_entry(eb_walk_down(troot, EB_RGHT), struct ebpt_node, node);
+ }
+
+ /* Further values will be too high here, so return the prev
+ * unique node (if it exists).
+ */
+ troot = node->node.node_p;
+ break;
+ }
+ troot = node->node.branches.b[((ptr_t)x >> node->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report previous node before the
+ * current one which is not above. <troot> is already initialised to
+ * the parent's branches.
+ */
+ while (eb_gettag(troot) == EB_LEFT) {
+ /* Walking up from left branch. We must ensure that we never
+ * walk beyond root.
+ */
+ if (unlikely(eb_clrtag((eb_untag(troot, EB_LEFT))->b[EB_RGHT]) == NULL))
+ return NULL;
+ troot = (eb_root_to_node(eb_untag(troot, EB_LEFT)))->node_p;
+ }
+ /* Note that <troot> cannot be NULL at this stage */
+ troot = (eb_untag(troot, EB_RGHT))->b[EB_LEFT];
+ node = ebpt_entry(eb_walk_down(troot, EB_RGHT), struct ebpt_node, node);
+ return node;
+}
+
+/*
+ * Find the first occurrence of the lowest key in the tree <root>, which is
+ * equal to or greater than <x>. NULL is returned is no key matches.
+ */
+struct ebpt_node *ebpt_lookup_ge(struct eb_root *root, void *x)
+{
+ struct ebpt_node *node;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ node = container_of(eb_untag(troot, EB_LEAF),
+ struct ebpt_node, node.branches);
+ if (node->key >= x)
+ return node;
+ /* return next */
+ troot = node->node.leaf_p;
+ break;
+ }
+ node = container_of(eb_untag(troot, EB_NODE),
+ struct ebpt_node, node.branches);
+
+ if (node->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the leftmost node, or
+ * we don't and we skip the whole subtree to return the
+ * next node after the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * next node without first trying to escape from the
+ * tree.
+ */
+ if (node->key >= x) {
+ troot = node->node.branches.b[EB_LEFT];
+ while (eb_gettag(troot) != EB_LEAF)
+ troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
+ return container_of(eb_untag(troot, EB_LEAF),
+ struct ebpt_node, node.branches);
+ }
+ /* return next */
+ troot = node->node.node_p;
+ break;
+ }
+
+ if ((((ptr_t)x ^ (ptr_t)node->key) >> node->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * large and we need to get its lowest value, or it is too
+ * small, and we need to get the next value.
+ */
+ if (((ptr_t)node->key >> node->node.bit) > ((ptr_t)x >> node->node.bit)) {
+ troot = node->node.branches.b[EB_LEFT];
+ return ebpt_entry(eb_walk_down(troot, EB_LEFT), struct ebpt_node, node);
+ }
+
+ /* Further values will be too low here, so return the next
+ * unique node (if it exists).
+ */
+ troot = node->node.node_p;
+ break;
+ }
+ troot = node->node.branches.b[((ptr_t)x >> node->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report next node after the
+ * current one which is not below. <troot> is already initialised
+ * to the parent's branches.
+ */
+ while (eb_gettag(troot) != EB_LEFT)
+ /* Walking up from right branch, so we cannot be below root */
+ troot = (eb_root_to_node(eb_untag(troot, EB_RGHT)))->node_p;
+
+ /* Note that <troot> cannot be NULL at this stage */
+ troot = (eb_untag(troot, EB_LEFT))->b[EB_RGHT];
+ if (eb_clrtag(troot) == NULL)
+ return NULL;
+
+ node = ebpt_entry(eb_walk_down(troot, EB_LEFT), struct ebpt_node, node);
+ return node;
+}
diff --git a/src/ebsttree.c b/src/ebsttree.c
new file mode 100644
index 0000000..a4fbe33
--- /dev/null
+++ b/src/ebsttree.c
@@ -0,0 +1,42 @@
+/*
+ * Elastic Binary Trees - exported functions for String data nodes.
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult ebsttree.h for more details about those functions */
+
+#include <import/ebsttree.h>
+
+/* Find the first occurrence of a zero-terminated string <x> in the tree <root>.
+ * It's the caller's responsibility to use this function only on trees which
+ * only contain zero-terminated strings. If none can be found, return NULL.
+ */
+struct ebmb_node *ebst_lookup(struct eb_root *root, const char *x)
+{
+ return __ebst_lookup(root, x);
+}
+
+/* Insert ebmb_node <new> into subtree starting at node root <root>. Only
+ * new->key needs be set with the zero-terminated string key. The ebmb_node is
+ * returned. If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
+ * caller is responsible for properly terminating the key with a zero.
+ */
+struct ebmb_node *ebst_insert(struct eb_root *root, struct ebmb_node *new)
+{
+ return __ebst_insert(root, new);
+}
diff --git a/src/ebtree.c b/src/ebtree.c
new file mode 100644
index 0000000..db27875
--- /dev/null
+++ b/src/ebtree.c
@@ -0,0 +1,50 @@
+/*
+ * Elastic Binary Trees - exported generic functions
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <import/ebtree.h>
+
+void eb_delete(struct eb_node *node)
+{
+ __eb_delete(node);
+}
+
+/* used by insertion primitives */
+struct eb_node *eb_insert_dup(struct eb_node *sub, struct eb_node *new)
+{
+ return __eb_insert_dup(sub, new);
+}
+
+/* compares memory blocks m1 and m2 for up to <len> bytes. Immediately stops at
+ * the first non-matching byte. It returns 0 on full match, non-zero otherwise.
+ * One byte will always be checked so this must not be called with len==0. It
+ * takes 2+5cy/B on x86_64 and is ~29 bytes long.
+ */
+int eb_memcmp(const void *m1, const void *m2, size_t len)
+{
+ const char *p1 = (const char *)m1 + len;
+ const char *p2 = (const char *)m2 + len;
+ ssize_t ofs = -len;
+ char diff;
+
+ do {
+ diff = p1[ofs] - p2[ofs];
+ } while (!diff && ++ofs);
+ return diff;
+}
diff --git a/src/errors.c b/src/errors.c
new file mode 100644
index 0000000..bd3c271
--- /dev/null
+++ b/src/errors.c
@@ -0,0 +1,380 @@
+#include <stdarg.h>
+#include <stdio.h>
+#include <syslog.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet-t.h>
+#include <haproxy/buf.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/obj_type.h>
+#include <haproxy/ring.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+/* A global buffer used to store all startup alerts/warnings. It will then be
+ * retrieve on the CLI. */
+static struct ring *startup_logs = NULL;
+
+/* A thread local buffer used to store all alerts/warnings. It can be used to
+ * retrieve them for CLI commands after startup.
+ */
+#define USER_MESSAGES_BUFSIZE 1024
+static THREAD_LOCAL struct buffer usermsgs_buf = BUF_NULL;
+
+/* A thread local context used for stderr output via ha_alert/warning/notice/diag.
+ */
+#define USERMSGS_CTX_BUFSIZE PATH_MAX
+static THREAD_LOCAL struct usermsgs_ctx usermsgs_ctx = { .str = BUF_NULL, };
+
+/* Put msg in usermsgs_buf.
+ *
+ * The message should not be terminated by a newline because this function
+ * manually insert it.
+ *
+ * If there is not enough room in the buffer, the message is silently discarded.
+ * Do not forget to frequently clear the buffer.
+ */
+static void usermsgs_put(const struct ist *msg)
+{
+ /* Allocate the buffer if not already done. */
+ if (unlikely(b_is_null(&usermsgs_buf))) {
+ usermsgs_buf.area = malloc(USER_MESSAGES_BUFSIZE * sizeof(char));
+ usermsgs_buf.size = USER_MESSAGES_BUFSIZE;
+ }
+
+ if (likely(!b_is_null(&usermsgs_buf))) {
+ if (b_room(&usermsgs_buf) >= msg->len + 2) {
+ /* Insert the message + newline. */
+ b_putblk(&usermsgs_buf, msg->ptr, msg->len);
+ b_putchr(&usermsgs_buf, '\n');
+ /* Insert NUL outside of the buffer. */
+ *b_tail(&usermsgs_buf) = '\0';
+ }
+ }
+}
+
+/* Clear the user messages log buffer.
+ *
+ * <prefix> will set the local-thread context appended to every output
+ * following this call. It can be NULL if not necessary.
+ */
+void usermsgs_clr(const char *prefix)
+{
+ if (likely(!b_is_null(&usermsgs_buf))) {
+ b_reset(&usermsgs_buf);
+ usermsgs_buf.area[0] = '\0';
+ }
+
+ usermsgs_ctx.prefix = prefix;
+}
+
+/* Check if the user messages buffer is empty. */
+int usermsgs_empty(void)
+{
+ return !!(b_is_null(&usermsgs_buf) || !b_data(&usermsgs_buf));
+}
+
+/* Return the messages log buffer content. */
+const char *usermsgs_str(void)
+{
+ if (unlikely(b_is_null(&usermsgs_buf)))
+ return "";
+
+ return b_head(&usermsgs_buf);
+}
+
+/* Set thread-local context infos to prefix forthcoming stderr output during
+ * configuration parsing.
+ *
+ * <file> and <line> specify the location of the parsed configuration.
+ *
+ * <obj> can be of various types. If not NULL, the string prefix generated will
+ * depend on its type.
+ */
+void set_usermsgs_ctx(const char *file, int line, enum obj_type *obj)
+{
+ usermsgs_ctx.file = file;
+ usermsgs_ctx.line = line;
+ usermsgs_ctx.obj = obj;
+}
+
+/* Set thread-local context infos to prefix forthcoming stderr output. It will
+ * be set as a complement to possibly already defined file/line.
+ *
+ * <obj> can be of various types. If not NULL, the string prefix generated will
+ * depend on its type.
+ */
+void register_parsing_obj(enum obj_type *obj)
+{
+ usermsgs_ctx.obj = obj;
+}
+
+/* Reset thread-local context infos for stderr output. */
+void reset_usermsgs_ctx(void)
+{
+ usermsgs_ctx.file = NULL;
+ usermsgs_ctx.line = 0;
+ usermsgs_ctx.obj = NULL;
+}
+
+static void generate_usermsgs_ctx_str(void)
+{
+ struct usermsgs_ctx *ctx = &usermsgs_ctx;
+ void *area;
+ int ret;
+
+ if (unlikely(b_is_null(&ctx->str))) {
+ area = calloc(USERMSGS_CTX_BUFSIZE, sizeof(*area));
+ if (area)
+ ctx->str = b_make(area, USERMSGS_CTX_BUFSIZE, 0, 0);
+ }
+
+ if (likely(!b_is_null(&ctx->str))) {
+ b_reset(&ctx->str);
+
+ if (ctx->prefix) {
+ ret = snprintf(b_tail(&ctx->str), b_room(&ctx->str),
+ "%s : ", ctx->prefix);
+ b_add(&ctx->str, MIN(ret, b_room(&ctx->str)));
+ }
+
+ if (ctx->file) {
+ ret = snprintf(b_tail(&ctx->str), b_room(&ctx->str),
+ "[%s:%d] : ", ctx->file, ctx->line);
+ b_add(&ctx->str, MIN(ret, b_room(&ctx->str)));
+ }
+
+ switch (obj_type(ctx->obj)) {
+ case OBJ_TYPE_SERVER:
+ ret = snprintf(b_tail(&ctx->str), b_room(&ctx->str),
+ "'server %s/%s' : ",
+ __objt_server(ctx->obj)->proxy->id,
+ __objt_server(ctx->obj)->id);
+ b_add(&ctx->str, MIN(ret, b_room(&ctx->str)));
+ break;
+
+ case OBJ_TYPE_NONE:
+ default:
+ break;
+ }
+
+ if (!b_data(&ctx->str))
+ snprintf(b_tail(&ctx->str), b_room(&ctx->str), "%s", "");
+ }
+}
+
+/* Generic function to display messages prefixed by a label */
+static void print_message(int use_usermsgs_ctx, const char *label, const char *fmt, va_list argp)
+{
+ struct ist msg_ist = IST_NULL;
+ char *head, *parsing_str, *msg;
+ char prefix[11]; // '[' + 8 chars + ']' + 0.
+
+ *prefix = '[';
+ strncpy(prefix + 1, label, sizeof(prefix) - 2);
+ msg = prefix + strlen(prefix);
+ *msg++ = ']';
+ while (msg < prefix + sizeof(prefix) - 1)
+ *msg++ = ' ';
+ *msg = 0;
+
+ head = parsing_str = msg = NULL;
+ memprintf(&head, "%s (%u) : ", prefix, (uint)getpid());
+ memvprintf(&msg, fmt, argp);
+
+ /* trim the trailing '\n' */
+ msg_ist = ist(msg);
+ if (msg_ist.len > 0 && msg_ist.ptr[msg_ist.len - 1] == '\n')
+ msg_ist.len--;
+
+ if (use_usermsgs_ctx) {
+ generate_usermsgs_ctx_str();
+ parsing_str = b_head(&usermsgs_ctx.str);
+ reset_usermsgs_ctx();
+ }
+ else {
+ parsing_str = "";
+ }
+
+ if (global.mode & MODE_STARTING) {
+ if (unlikely(!startup_logs))
+ startup_logs = ring_new(STARTUP_LOG_SIZE);
+
+ if (likely(startup_logs)) {
+ struct ist m[3];
+
+ m[0] = ist(head);
+ m[1] = ist(parsing_str);
+ m[2] = msg_ist;
+
+ ring_write(startup_logs, ~0, 0, 0, m, 3);
+ }
+ }
+ else {
+ usermsgs_put(&msg_ist);
+ }
+
+ fprintf(stderr, "%s%s%s", head, parsing_str, msg);
+ fflush(stderr);
+
+ free(head);
+ free(msg);
+}
+
+static void print_message_args(int use_usermsgs_ctx, const char *label, const char *fmt, ...)
+{
+ va_list argp;
+ va_start(argp, fmt);
+ print_message(use_usermsgs_ctx, label, fmt, argp);
+ va_end(argp);
+}
+
+/*
+ * Displays the message on stderr with the date and pid. Overrides the quiet
+ * mode during startup.
+ */
+void ha_alert(const char *fmt, ...)
+{
+ va_list argp;
+
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE) ||
+ !(global.mode & MODE_STARTING)) {
+ if (!(warned & WARN_EXEC_PATH) && (global.mode & MODE_STARTING)) {
+ const char *path = get_exec_path();
+
+ warned |= WARN_EXEC_PATH;
+ print_message_args(0, "NOTICE", "haproxy version is %s\n", haproxy_version);
+ if (path)
+ print_message_args(0, "NOTICE", "path to executable is %s\n", path);
+ }
+ va_start(argp, fmt);
+ print_message(1, "ALERT", fmt, argp);
+ va_end(argp);
+ }
+}
+
+/*
+ * Displays the message on stderr with the date and pid.
+ */
+void ha_warning(const char *fmt, ...)
+{
+ va_list argp;
+
+ warned |= WARN_ANY;
+
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE) ||
+ !(global.mode & MODE_STARTING)) {
+ if (!(warned & WARN_EXEC_PATH) && (global.mode & MODE_STARTING)) {
+ const char *path = get_exec_path();
+
+ warned |= WARN_EXEC_PATH;
+ print_message_args(0, "NOTICE", "haproxy version is %s\n", haproxy_version);
+ if (path)
+ print_message_args(0, "NOTICE", "path to executable is %s\n", path);
+ }
+ va_start(argp, fmt);
+ print_message(1, "WARNING", fmt, argp);
+ va_end(argp);
+ }
+}
+
+/*
+ * Variant of _ha_diag_warning with va_list.
+ * Use it only if MODE_DIAG has been previously checked.
+ */
+void _ha_vdiag_warning(const char *fmt, va_list argp)
+{
+ print_message(1, "DIAG", fmt, argp);
+}
+
+/*
+ * Output a diagnostic warning.
+ * Use it only if MODE_DIAG has been previously checked.
+ */
+void _ha_diag_warning(const char *fmt, ...)
+{
+ va_list argp;
+
+ va_start(argp, fmt);
+ _ha_vdiag_warning(fmt, argp);
+ va_end(argp);
+}
+
+/*
+ * Output a diagnostic warning. Do nothing of MODE_DIAG is not on.
+ */
+void ha_diag_warning(const char *fmt, ...)
+{
+ va_list argp;
+
+ if (global.mode & MODE_DIAG) {
+ va_start(argp, fmt);
+ _ha_vdiag_warning(fmt, argp);
+ va_end(argp);
+ }
+}
+
+/*
+ * Displays the message on stderr with the date and pid.
+ */
+void ha_notice(const char *fmt, ...)
+{
+ va_list argp;
+
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE) ||
+ !(global.mode & MODE_STARTING)) {
+ va_start(argp, fmt);
+ print_message(1, "NOTICE", fmt, argp);
+ va_end(argp);
+ }
+}
+
+/*
+ * Displays the message on <out> only if quiet mode is not set.
+ */
+void qfprintf(FILE *out, const char *fmt, ...)
+{
+ va_list argp;
+
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) {
+ va_start(argp, fmt);
+ vfprintf(out, fmt, argp);
+ fflush(out);
+ va_end(argp);
+ }
+}
+
+
+/* parse the "show startup-logs" command, returns 1 if a message is returned, otherwise zero */
+static int cli_parse_show_startup_logs(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ if (!startup_logs)
+ return cli_msg(appctx, LOG_INFO, "\n"); // nothing to print
+
+ return ring_attach_cli(startup_logs, appctx, 0);
+}
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "show", "startup-logs", NULL }, "show startup-logs : report logs emitted during HAProxy startup", cli_parse_show_startup_logs, NULL, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+
+static void deinit_errors_buffers()
+{
+ ring_free(_HA_ATOMIC_XCHG(&startup_logs, NULL));
+ ha_free(&usermsgs_buf.area);
+ ha_free(&usermsgs_ctx.str.area);
+}
+
+/* errors might be used in threads and even before forking, thus 2 deinit */
+REGISTER_PER_THREAD_FREE(deinit_errors_buffers);
+REGISTER_POST_DEINIT(deinit_errors_buffers);
diff --git a/src/ev_epoll.c b/src/ev_epoll.c
new file mode 100644
index 0000000..3fc868d
--- /dev/null
+++ b/src/ev_epoll.c
@@ -0,0 +1,407 @@
+/*
+ * FD polling functions for Linux epoll
+ *
+ * Copyright 2000-2014 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <unistd.h>
+#include <sys/epoll.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/signal.h>
+#include <haproxy/ticks.h>
+#include <haproxy/task.h>
+#include <haproxy/tools.h>
+
+
+/* private data */
+static THREAD_LOCAL struct epoll_event *epoll_events = NULL;
+static int epoll_fd[MAX_THREADS] __read_mostly; // per-thread epoll_fd
+
+#ifndef EPOLLRDHUP
+/* EPOLLRDHUP was defined late in libc, and it appeared in kernel 2.6.17 */
+#define EPOLLRDHUP 0x2000
+#endif
+
+/*
+ * Immediately remove file descriptor from epoll set upon close.
+ * Since we forked, some fds share inodes with the other process, and epoll may
+ * send us events even though this process closed the fd (see man 7 epoll,
+ * "Questions and answers", Q 6).
+ */
+static void __fd_clo(int fd)
+{
+ if (unlikely(fdtab[fd].state & FD_CLONED)) {
+ unsigned long m = polled_mask[fd].poll_recv | polled_mask[fd].poll_send;
+ struct epoll_event ev;
+ int i;
+
+ for (i = global.nbthread - 1; i >= 0; i--)
+ if (m & (1UL << i))
+ epoll_ctl(epoll_fd[i], EPOLL_CTL_DEL, fd, &ev);
+ }
+}
+
+static void _update_fd(int fd)
+{
+ int en, opcode;
+ struct epoll_event ev = { };
+
+ en = fdtab[fd].state;
+
+ /* Try to force EPOLLET on FDs that support it */
+ if (fdtab[fd].state & FD_ET_POSSIBLE) {
+ /* already done ? */
+ if (polled_mask[fd].poll_recv & polled_mask[fd].poll_send & tid_bit)
+ return;
+
+ /* enable ET polling in both directions */
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit);
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit);
+ opcode = EPOLL_CTL_ADD;
+ ev.events = EPOLLIN | EPOLLRDHUP | EPOLLOUT | EPOLLET;
+ goto done;
+ }
+
+ /* if we're already polling or are going to poll for this FD and it's
+ * neither active nor ready, force it to be active so that we don't
+ * needlessly unsubscribe then re-subscribe it.
+ */
+ if (!(en & FD_EV_READY_R) &&
+ ((en & FD_EV_ACTIVE_W) ||
+ ((polled_mask[fd].poll_send | polled_mask[fd].poll_recv) & tid_bit)))
+ en |= FD_EV_ACTIVE_R;
+
+ if ((polled_mask[fd].poll_send | polled_mask[fd].poll_recv) & tid_bit) {
+ if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_ACTIVE_RW)) {
+ /* fd removed from poll list */
+ opcode = EPOLL_CTL_DEL;
+ if (polled_mask[fd].poll_recv & tid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
+ if (polled_mask[fd].poll_send & tid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
+ }
+ else {
+ if (((en & FD_EV_ACTIVE_R) != 0) ==
+ ((polled_mask[fd].poll_recv & tid_bit) != 0) &&
+ ((en & FD_EV_ACTIVE_W) != 0) ==
+ ((polled_mask[fd].poll_send & tid_bit) != 0))
+ return;
+ if (en & FD_EV_ACTIVE_R) {
+ if (!(polled_mask[fd].poll_recv & tid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit);
+ } else {
+ if (polled_mask[fd].poll_recv & tid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
+ }
+ if (en & FD_EV_ACTIVE_W) {
+ if (!(polled_mask[fd].poll_send & tid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit);
+ } else {
+ if (polled_mask[fd].poll_send & tid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
+ }
+ /* fd status changed */
+ opcode = EPOLL_CTL_MOD;
+ }
+ }
+ else if ((fdtab[fd].thread_mask & tid_bit) && (en & FD_EV_ACTIVE_RW)) {
+ /* new fd in the poll list */
+ opcode = EPOLL_CTL_ADD;
+ if (en & FD_EV_ACTIVE_R)
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit);
+ if (en & FD_EV_ACTIVE_W)
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit);
+ }
+ else {
+ return;
+ }
+
+ /* construct the epoll events based on new state */
+ if (en & FD_EV_ACTIVE_R)
+ ev.events |= EPOLLIN | EPOLLRDHUP;
+
+ if (en & FD_EV_ACTIVE_W)
+ ev.events |= EPOLLOUT;
+
+ done:
+ ev.data.fd = fd;
+ epoll_ctl(epoll_fd[tid], opcode, fd, &ev);
+}
+
+/*
+ * Linux epoll() poller
+ */
+static void _do_poll(struct poller *p, int exp, int wake)
+{
+ int status;
+ int fd;
+ int count;
+ int updt_idx;
+ int wait_time;
+ int old_fd;
+
+ /* first, scan the update list to find polling changes */
+ for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) {
+ fd = fd_updt[updt_idx];
+
+ if (!fd_grab_tgid(fd, 1)) {
+ /* was reassigned */
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+
+ _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
+
+ if (fdtab[fd].owner)
+ _update_fd(fd);
+ else
+ activity[tid].poll_drop_fd++;
+
+ fd_drop_tgid(fd);
+ }
+ fd_nbupdt = 0;
+
+ /* Scan the shared update list */
+ for (old_fd = fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) {
+ if (fd == -2) {
+ fd = old_fd;
+ continue;
+ }
+ else if (fd <= -3)
+ fd = -fd -4;
+ if (fd == -1)
+ break;
+
+ if (!fd_grab_tgid(fd, 1)) {
+ /* was reassigned */
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+
+ if (!(fdtab[fd].update_mask & tid_bit)) {
+ fd_drop_tgid(fd);
+ continue;
+ }
+
+ done_update_polling(fd);
+
+ if (fdtab[fd].owner)
+ _update_fd(fd);
+ else
+ activity[tid].poll_drop_fd++;
+
+ fd_drop_tgid(fd);
+ }
+
+ thread_idle_now();
+ thread_harmless_now();
+
+ /* Now let's wait for polled events. */
+ wait_time = wake ? 0 : compute_poll_timeout(exp);
+ clock_entering_poll();
+
+ do {
+ int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time;
+
+ status = epoll_wait(epoll_fd[tid], epoll_events, global.tune.maxpollevents, timeout);
+ clock_update_date(timeout, status);
+
+ if (status) {
+ activity[tid].poll_io++;
+ break;
+ }
+ if (timeout || !wait_time)
+ break;
+ if (tick_isset(exp) && tick_is_expired(exp, now_ms))
+ break;
+ } while (1);
+
+ clock_leaving_poll(wait_time, status);
+
+ thread_harmless_end();
+ thread_idle_end();
+
+ if (sleeping_thread_mask & tid_bit)
+ _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit);
+
+ /* process polled events */
+
+ for (count = 0; count < status; count++) {
+ unsigned int n, e;
+
+ e = epoll_events[count].events;
+ fd = epoll_events[count].data.fd;
+
+ if ((e & EPOLLRDHUP) && !(cur_poller.flags & HAP_POLL_F_RDHUP))
+ _HA_ATOMIC_OR(&cur_poller.flags, HAP_POLL_F_RDHUP);
+
+#ifdef DEBUG_FD
+ _HA_ATOMIC_INC(&fdtab[fd].event_count);
+#endif
+ n = ((e & EPOLLIN) ? FD_EV_READY_R : 0) |
+ ((e & EPOLLOUT) ? FD_EV_READY_W : 0) |
+ ((e & EPOLLRDHUP) ? FD_EV_SHUT_R : 0) |
+ ((e & EPOLLHUP) ? FD_EV_SHUT_RW : 0) |
+ ((e & EPOLLERR) ? FD_EV_ERR_RW : 0);
+
+ fd_update_events(fd, n);
+ }
+ /* the caller will take care of cached events */
+}
+
+static int init_epoll_per_thread()
+{
+ int fd;
+
+ epoll_events = calloc(1, sizeof(struct epoll_event) * global.tune.maxpollevents);
+ if (epoll_events == NULL)
+ goto fail_alloc;
+
+ if (MAX_THREADS > 1 && tid) {
+ epoll_fd[tid] = epoll_create(global.maxsock + 1);
+ if (epoll_fd[tid] < 0)
+ goto fail_fd;
+ }
+
+ /* we may have to unregister some events initially registered on the
+ * original fd when it was alone, and/or to register events on the new
+ * fd for this thread. Let's just mark them as updated, the poller will
+ * do the rest.
+ */
+ for (fd = 0; fd < global.maxsock; fd++)
+ updt_fd_polling(fd);
+
+ return 1;
+ fail_fd:
+ free(epoll_events);
+ fail_alloc:
+ return 0;
+}
+
+static void deinit_epoll_per_thread()
+{
+ if (MAX_THREADS > 1 && tid)
+ close(epoll_fd[tid]);
+
+ ha_free(&epoll_events);
+}
+
+/*
+ * Initialization of the epoll() poller.
+ * Returns 0 in case of failure, non-zero in case of success. If it fails, it
+ * disables the poller by setting its pref to 0.
+ */
+static int _do_init(struct poller *p)
+{
+ p->private = NULL;
+
+ epoll_fd[tid] = epoll_create(global.maxsock + 1);
+ if (epoll_fd[tid] < 0)
+ goto fail_fd;
+
+ hap_register_per_thread_init(init_epoll_per_thread);
+ hap_register_per_thread_deinit(deinit_epoll_per_thread);
+
+ return 1;
+
+ fail_fd:
+ p->pref = 0;
+ return 0;
+}
+
+/*
+ * Termination of the epoll() poller.
+ * Memory is released and the poller is marked as unselectable.
+ */
+static void _do_term(struct poller *p)
+{
+ if (epoll_fd[tid] >= 0) {
+ close(epoll_fd[tid]);
+ epoll_fd[tid] = -1;
+ }
+
+ p->private = NULL;
+ p->pref = 0;
+}
+
+/*
+ * Check that the poller works.
+ * Returns 1 if OK, otherwise 0.
+ */
+static int _do_test(struct poller *p)
+{
+ int fd;
+
+ fd = epoll_create(global.maxsock + 1);
+ if (fd < 0)
+ return 0;
+ close(fd);
+ return 1;
+}
+
+/*
+ * Recreate the epoll file descriptor after a fork(). Returns 1 if OK,
+ * otherwise 0. It will ensure that all processes will not share their
+ * epoll_fd. Some side effects were encountered because of this, such
+ * as epoll_wait() returning an FD which was previously deleted.
+ */
+static int _do_fork(struct poller *p)
+{
+ if (epoll_fd[tid] >= 0)
+ close(epoll_fd[tid]);
+ epoll_fd[tid] = epoll_create(global.maxsock + 1);
+ if (epoll_fd[tid] < 0)
+ return 0;
+ return 1;
+}
+
+/*
+ * Registers the poller.
+ */
+static void _do_register(void)
+{
+ struct poller *p;
+ int i;
+
+ if (nbpollers >= MAX_POLLERS)
+ return;
+
+ for (i = 0; i < MAX_THREADS; i++)
+ epoll_fd[i] = -1;
+
+ p = &pollers[nbpollers++];
+
+ p->name = "epoll";
+ p->pref = 300;
+ p->flags = HAP_POLL_F_ERRHUP; // note: RDHUP might be dynamically added
+ p->private = NULL;
+
+ p->clo = __fd_clo;
+ p->test = _do_test;
+ p->init = _do_init;
+ p->term = _do_term;
+ p->poll = _do_poll;
+ p->fork = _do_fork;
+}
+
+INITCALL0(STG_REGISTER, _do_register);
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/ev_evports.c b/src/ev_evports.c
new file mode 100644
index 0000000..acbb76f
--- /dev/null
+++ b/src/ev_evports.c
@@ -0,0 +1,447 @@
+/*
+ * FD polling functions for SunOS event ports.
+ *
+ * Copyright 2018 Joyent, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <poll.h>
+#include <port.h>
+#include <errno.h>
+#include <syslog.h>
+
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/signal.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+
+/*
+ * Private data:
+ */
+static int evports_fd[MAX_THREADS]; // per-thread evports_fd
+static THREAD_LOCAL port_event_t *evports_evlist = NULL;
+static THREAD_LOCAL int evports_evlist_max = 0;
+
+/*
+ * Convert the "state" member of "fdtab" into an event ports event mask.
+ */
+static inline int evports_state_to_events(int state)
+{
+ int events = 0;
+
+ if (state & FD_EV_ACTIVE_W)
+ events |= POLLOUT;
+ if (state & FD_EV_ACTIVE_R)
+ events |= POLLIN;
+
+ return (events);
+}
+
+/*
+ * Associate or dissociate this file descriptor with the event port, using the
+ * specified event mask.
+ */
+static inline void evports_resync_fd(int fd, int events)
+{
+ if (events == 0)
+ port_dissociate(evports_fd[tid], PORT_SOURCE_FD, fd);
+ else
+ port_associate(evports_fd[tid], PORT_SOURCE_FD, fd, events, NULL);
+}
+
+static void _update_fd(int fd)
+{
+ int en;
+ int events;
+
+ en = fdtab[fd].state;
+
+ if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_ACTIVE_RW)) {
+ if (!(polled_mask[fd].poll_recv & tid_bit) &&
+ !(polled_mask[fd].poll_send & tid_bit)) {
+ /* fd was not watched, it's still not */
+ return;
+ }
+ /* fd totally removed from poll list */
+ events = 0;
+ if (polled_mask[fd].poll_recv & tid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
+ if (polled_mask[fd].poll_send & tid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
+ }
+ else {
+ /* OK fd has to be monitored, it was either added or changed */
+ events = evports_state_to_events(en);
+ if (en & FD_EV_ACTIVE_R) {
+ if (!(polled_mask[fd].poll_recv & tid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit);
+ } else {
+ if (polled_mask[fd].poll_recv & tid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
+ }
+ if (en & FD_EV_ACTIVE_W) {
+ if (!(polled_mask[fd].poll_send & tid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit);
+ } else {
+ if (polled_mask[fd].poll_send & tid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
+ }
+
+ }
+ evports_resync_fd(fd, events);
+}
+
+/*
+ * Event Ports poller. This routine interacts with the file descriptor
+ * management data structures and routines; see the large block comment in
+ * "src/fd.c" for more information.
+ */
+
+static void _do_poll(struct poller *p, int exp, int wake)
+{
+ int i;
+ int wait_time;
+ struct timespec timeout_ts;
+ unsigned int nevlist;
+ int fd, old_fd;
+ int status;
+
+ /*
+ * Scan the list of file descriptors with an updated status:
+ */
+ for (i = 0; i < fd_nbupdt; i++) {
+ fd = fd_updt[i];
+
+ if (!fd_grab_tgid(fd, 1)) {
+ /* was reassigned */
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+
+ _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
+
+ if (fdtab[fd].owner)
+ _update_fd(fd);
+ else
+ activity[tid].poll_drop_fd++;
+
+ fd_drop_tgid(fd);
+ }
+ fd_nbupdt = 0;
+
+ /* Scan the shared update list */
+ for (old_fd = fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) {
+ if (fd == -2) {
+ fd = old_fd;
+ continue;
+ }
+ else if (fd <= -3)
+ fd = -fd -4;
+ if (fd == -1)
+ break;
+
+ if (!fd_grab_tgid(fd, 1)) {
+ /* was reassigned */
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+
+ if (!(fdtab[fd].update_mask & tid_bit)) {
+ fd_drop_tgid(fd);
+ continue;
+ }
+
+ done_update_polling(fd);
+
+ if (fdtab[fd].owner)
+ _update_fd(fd);
+ else
+ activity[tid].poll_drop_fd++;
+
+ fd_drop_tgid(fd);
+ }
+
+ thread_idle_now();
+ thread_harmless_now();
+
+ /* Now let's wait for polled events. */
+ wait_time = wake ? 0 : compute_poll_timeout(exp);
+ clock_entering_poll();
+
+ do {
+ int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time;
+ int interrupted = 0;
+ nevlist = 1; /* desired number of events to be retrieved */
+ timeout_ts.tv_sec = (timeout / 1000);
+ timeout_ts.tv_nsec = (timeout % 1000) * 1000000;
+
+ status = port_getn(evports_fd[tid],
+ evports_evlist,
+ evports_evlist_max,
+ &nevlist, /* updated to the number of events retrieved */
+ &timeout_ts);
+ if (status != 0) {
+ int e = errno;
+ switch (e) {
+ case ETIME:
+ /*
+ * Though the manual page has not historically made it
+ * clear, port_getn() can return -1 with an errno of
+ * ETIME and still have returned some number of events.
+ */
+ /* nevlist >= 0 */
+ break;
+ default:
+ nevlist = 0;
+ interrupted = 1;
+ break;
+ }
+ }
+ clock_update_date(timeout, nevlist);
+
+ if (nevlist || interrupted)
+ break;
+ if (timeout || !wait_time)
+ break;
+ if (tick_isset(exp) && tick_is_expired(exp, now_ms))
+ break;
+ } while(1);
+
+ clock_leaving_poll(wait_time, nevlist);
+
+ thread_harmless_end();
+ thread_idle_end();
+
+ if (sleeping_thread_mask & tid_bit)
+ _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit);
+
+ if (nevlist > 0)
+ activity[tid].poll_io++;
+
+ for (i = 0; i < nevlist; i++) {
+ unsigned int n = 0;
+ int events, rebind_events;
+ int ret;
+
+ fd = evports_evlist[i].portev_object;
+ events = evports_evlist[i].portev_events;
+
+#ifdef DEBUG_FD
+ _HA_ATOMIC_INC(&fdtab[fd].event_count);
+#endif
+ /*
+ * By virtue of receiving an event for this file descriptor, it
+ * is no longer associated with the port in question. Store
+ * the previous event mask so that we may reassociate after
+ * processing is complete.
+ */
+ rebind_events = evports_state_to_events(fdtab[fd].state);
+ /* rebind_events != 0 */
+
+ /*
+ * Set bits based on the events we received from the port:
+ */
+ n = ((events & POLLIN) ? FD_EV_READY_R : 0) |
+ ((events & POLLOUT) ? FD_EV_READY_W : 0) |
+ ((events & POLLHUP) ? FD_EV_SHUT_RW : 0) |
+ ((events & POLLERR) ? FD_EV_ERR_RW : 0);
+
+ /*
+ * Call connection processing callbacks. Note that it's
+ * possible for this processing to alter the required event
+ * port association; i.e., the "state" member of the "fdtab"
+ * entry. If it changes, the fd will be placed on the updated
+ * list for processing the next time we are called.
+ */
+ ret = fd_update_events(fd, n);
+
+ /* polling will be on this instance if the FD was migrated */
+ if (ret == FD_UPDT_MIGRATED)
+ continue;
+
+ /*
+ * This file descriptor was closed during the processing of
+ * polled events. No need to reassociate.
+ */
+ if (ret == FD_UPDT_CLOSED)
+ continue;
+
+ /*
+ * Reassociate with the port, using the same event mask as
+ * before. This call will not result in a dissociation as we
+ * asserted that _some_ events needed to be rebound above.
+ *
+ * Reassociating with the same mask allows us to mimic the
+ * level-triggered behaviour of poll(2). In the event that we
+ * are interested in the same events on the next turn of the
+ * loop, this represents no extra work.
+ *
+ * If this additional port_associate(3C) call becomes a
+ * performance problem, we would need to verify that we can
+ * correctly interact with the file descriptor cache and update
+ * list (see "src/fd.c") to avoid reassociating here, or to use
+ * a different events mask.
+ */
+ evports_resync_fd(fd, rebind_events);
+ }
+}
+
+static int init_evports_per_thread()
+{
+ int fd;
+
+ evports_evlist_max = global.tune.maxpollevents;
+ evports_evlist = calloc(evports_evlist_max, sizeof(*evports_evlist));
+ if (evports_evlist == NULL) {
+ goto fail_alloc;
+ }
+
+ if (MAX_THREADS > 1 && tid) {
+ if ((evports_fd[tid] = port_create()) == -1) {
+ goto fail_fd;
+ }
+ }
+
+ /* we may have to unregister some events initially registered on the
+ * original fd when it was alone, and/or to register events on the new
+ * fd for this thread. Let's just mark them as updated, the poller will
+ * do the rest.
+ */
+ for (fd = 0; fd < global.maxsock; fd++)
+ updt_fd_polling(fd);
+
+ return 1;
+
+ fail_fd:
+ ha_free(&evports_evlist);
+ evports_evlist_max = 0;
+ fail_alloc:
+ return 0;
+}
+
+static void deinit_evports_per_thread()
+{
+ if (MAX_THREADS > 1 && tid)
+ close(evports_fd[tid]);
+
+ ha_free(&evports_evlist);
+ evports_evlist_max = 0;
+}
+
+/*
+ * Initialisation of the event ports poller.
+ * Returns 0 in case of failure, non-zero in case of success.
+ */
+static int _do_init(struct poller *p)
+{
+ p->private = NULL;
+
+ if ((evports_fd[tid] = port_create()) == -1) {
+ goto fail;
+ }
+
+ hap_register_per_thread_init(init_evports_per_thread);
+ hap_register_per_thread_deinit(deinit_evports_per_thread);
+
+ return 1;
+
+fail:
+ p->pref = 0;
+ return 0;
+}
+
+/*
+ * Termination of the event ports poller.
+ * All resources are released and the poller is marked as inoperative.
+ */
+static void _do_term(struct poller *p)
+{
+ if (evports_fd[tid] != -1) {
+ close(evports_fd[tid]);
+ evports_fd[tid] = -1;
+ }
+
+ p->private = NULL;
+ p->pref = 0;
+
+ ha_free(&evports_evlist);
+ evports_evlist_max = 0;
+}
+
+/*
+ * Run-time check to make sure we can allocate the resources needed for
+ * the poller to function correctly.
+ * Returns 1 on success, otherwise 0.
+ */
+static int _do_test(struct poller *p)
+{
+ int fd;
+
+ if ((fd = port_create()) == -1) {
+ return 0;
+ }
+
+ close(fd);
+ return 1;
+}
+
+/*
+ * Close and recreate the event port after fork(). Returns 1 on success,
+ * otherwise 0. If this function fails, "_do_term()" must be called to
+ * clean up the poller.
+ */
+static int _do_fork(struct poller *p)
+{
+ if (evports_fd[tid] != -1) {
+ close(evports_fd[tid]);
+ }
+
+ if ((evports_fd[tid] = port_create()) == -1) {
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Registers the poller.
+ */
+static void _do_register(void)
+{
+ struct poller *p;
+ int i;
+
+ if (nbpollers >= MAX_POLLERS)
+ return;
+
+ for (i = 0; i < MAX_THREADS; i++)
+ evports_fd[i] = -1;
+
+ p = &pollers[nbpollers++];
+
+ p->name = "evports";
+ p->pref = 300;
+ p->flags = HAP_POLL_F_ERRHUP;
+ p->private = NULL;
+
+ p->clo = NULL;
+ p->test = _do_test;
+ p->init = _do_init;
+ p->term = _do_term;
+ p->poll = _do_poll;
+ p->fork = _do_fork;
+}
+
+INITCALL0(STG_REGISTER, _do_register);
diff --git a/src/ev_kqueue.c b/src/ev_kqueue.c
new file mode 100644
index 0000000..a3591ad
--- /dev/null
+++ b/src/ev_kqueue.c
@@ -0,0 +1,386 @@
+/*
+ * FD polling functions for FreeBSD kqueue()
+ *
+ * Copyright 2000-2014 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <sys/event.h>
+#include <sys/time.h>
+
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/signal.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+
+
+/* private data */
+static int kqueue_fd[MAX_THREADS] __read_mostly; // per-thread kqueue_fd
+static THREAD_LOCAL struct kevent *kev = NULL;
+static struct kevent *kev_out = NULL; // Trash buffer for kevent() to write the eventlist in
+
+static int _update_fd(int fd, int start)
+{
+ int en;
+ int changes = start;
+
+ en = fdtab[fd].state;
+
+ if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_ACTIVE_RW)) {
+ if (!(polled_mask[fd].poll_recv & tid_bit) &&
+ !(polled_mask[fd].poll_send & tid_bit)) {
+ /* fd was not watched, it's still not */
+ return changes;
+ }
+ /* fd totally removed from poll list */
+ EV_SET(&kev[changes++], fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
+ EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
+ if (polled_mask[fd].poll_recv & tid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
+ if (polled_mask[fd].poll_send & tid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
+ }
+ else {
+ /* OK fd has to be monitored, it was either added or changed */
+
+ if (en & FD_EV_ACTIVE_R) {
+ if (!(polled_mask[fd].poll_recv & tid_bit)) {
+ EV_SET(&kev[changes++], fd, EVFILT_READ, EV_ADD, 0, 0, NULL);
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit);
+ }
+ }
+ else if (polled_mask[fd].poll_recv & tid_bit) {
+ EV_SET(&kev[changes++], fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
+ HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
+ }
+
+ if (en & FD_EV_ACTIVE_W) {
+ if (!(polled_mask[fd].poll_send & tid_bit)) {
+ EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL);
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit);
+ }
+ }
+ else if (polled_mask[fd].poll_send & tid_bit) {
+ EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
+ }
+
+ }
+ return changes;
+}
+
+/*
+ * kqueue() poller
+ */
+static void _do_poll(struct poller *p, int exp, int wake)
+{
+ int status;
+ int count, fd, wait_time;
+ struct timespec timeout_ts;
+ int updt_idx;
+ int changes = 0;
+ int old_fd;
+
+ timeout_ts.tv_sec = 0;
+ timeout_ts.tv_nsec = 0;
+ /* first, scan the update list to find changes */
+ for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) {
+ fd = fd_updt[updt_idx];
+
+ if (!fd_grab_tgid(fd, 1)) {
+ /* was reassigned */
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+
+ _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
+
+ if (fdtab[fd].owner)
+ changes = _update_fd(fd, changes);
+ else
+ activity[tid].poll_drop_fd++;
+
+ fd_drop_tgid(fd);
+ }
+ /* Scan the global update list */
+ for (old_fd = fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) {
+ if (fd == -2) {
+ fd = old_fd;
+ continue;
+ }
+ else if (fd <= -3)
+ fd = -fd -4;
+ if (fd == -1)
+ break;
+
+ if (!fd_grab_tgid(fd, 1)) {
+ /* was reassigned */
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+
+ if (!(fdtab[fd].update_mask & tid_bit)) {
+ fd_drop_tgid(fd);
+ continue;
+ }
+
+ done_update_polling(fd);
+
+ if (fdtab[fd].owner)
+ changes = _update_fd(fd, changes);
+ else
+ activity[tid].poll_drop_fd++;
+
+ fd_drop_tgid(fd);
+ }
+
+ thread_idle_now();
+ thread_harmless_now();
+
+ if (changes) {
+#ifdef EV_RECEIPT
+ kev[0].flags |= EV_RECEIPT;
+#else
+ /* If EV_RECEIPT isn't defined, just add an invalid entry,
+ * so that we get an error and kevent() stops before scanning
+ * the kqueue.
+ */
+ EV_SET(&kev[changes++], -1, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
+#endif
+ kevent(kqueue_fd[tid], kev, changes, kev_out, changes, &timeout_ts);
+ }
+ fd_nbupdt = 0;
+
+ /* Now let's wait for polled events. */
+ wait_time = wake ? 0 : compute_poll_timeout(exp);
+ fd = global.tune.maxpollevents;
+ clock_entering_poll();
+
+ do {
+ int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time;
+
+ timeout_ts.tv_sec = (timeout / 1000);
+ timeout_ts.tv_nsec = (timeout % 1000) * 1000000;
+
+ status = kevent(kqueue_fd[tid], // int kq
+ NULL, // const struct kevent *changelist
+ 0, // int nchanges
+ kev, // struct kevent *eventlist
+ fd, // int nevents
+ &timeout_ts); // const struct timespec *timeout
+ clock_update_date(timeout, status);
+
+ if (status) {
+ activity[tid].poll_io++;
+ break;
+ }
+ if (timeout || !wait_time)
+ break;
+ if (tick_isset(exp) && tick_is_expired(exp, now_ms))
+ break;
+ } while (1);
+
+ clock_leaving_poll(wait_time, status);
+
+ thread_harmless_end();
+ thread_idle_end();
+
+ if (sleeping_thread_mask & tid_bit)
+ _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit);
+
+ for (count = 0; count < status; count++) {
+ unsigned int n = 0;
+
+ fd = kev[count].ident;
+
+#ifdef DEBUG_FD
+ _HA_ATOMIC_INC(&fdtab[fd].event_count);
+#endif
+ if (kev[count].filter == EVFILT_READ) {
+ if (kev[count].data || !(kev[count].flags & EV_EOF))
+ n |= FD_EV_READY_R;
+ if (kev[count].flags & EV_EOF)
+ n |= FD_EV_SHUT_R;
+ }
+ else if (kev[count].filter == EVFILT_WRITE) {
+ n |= FD_EV_READY_W;
+ if (kev[count].flags & EV_EOF)
+ n |= FD_EV_ERR_RW;
+ }
+
+ fd_update_events(fd, n);
+ }
+}
+
+
+static int init_kqueue_per_thread()
+{
+ int fd;
+
+ /* we can have up to two events per fd, so allocate enough to store
+ * 2*fd event, and an extra one, in case EV_RECEIPT isn't defined,
+ * so that we can add an invalid entry and get an error, to avoid
+ * scanning the kqueue uselessly.
+ */
+ kev = calloc(1, sizeof(struct kevent) * (2 * global.maxsock + 1));
+ if (kev == NULL)
+ goto fail_alloc;
+
+ if (MAX_THREADS > 1 && tid) {
+ kqueue_fd[tid] = kqueue();
+ if (kqueue_fd[tid] < 0)
+ goto fail_fd;
+ }
+
+ /* we may have to unregister some events initially registered on the
+ * original fd when it was alone, and/or to register events on the new
+ * fd for this thread. Let's just mark them as updated, the poller will
+ * do the rest.
+ */
+ for (fd = 0; fd < global.maxsock; fd++)
+ updt_fd_polling(fd);
+
+ return 1;
+ fail_fd:
+ free(kev);
+ fail_alloc:
+ return 0;
+}
+
+static void deinit_kqueue_per_thread()
+{
+ if (MAX_THREADS > 1 && tid)
+ close(kqueue_fd[tid]);
+
+ ha_free(&kev);
+}
+
+/*
+ * Initialization of the kqueue() poller.
+ * Returns 0 in case of failure, non-zero in case of success. If it fails, it
+ * disables the poller by setting its pref to 0.
+ */
+static int _do_init(struct poller *p)
+{
+ p->private = NULL;
+
+ /* we can have up to two events per fd, so allocate enough to store
+ * 2*fd event, and an extra one, in case EV_RECEIPT isn't defined,
+ * so that we can add an invalid entry and get an error, to avoid
+ * scanning the kqueue uselessly.
+ */
+ kev_out = calloc(1, sizeof(struct kevent) * (2 * global.maxsock + 1));
+ if (!kev_out)
+ goto fail_alloc;
+
+ kqueue_fd[tid] = kqueue();
+ if (kqueue_fd[tid] < 0)
+ goto fail_fd;
+
+ hap_register_per_thread_init(init_kqueue_per_thread);
+ hap_register_per_thread_deinit(deinit_kqueue_per_thread);
+ return 1;
+
+ fail_fd:
+ ha_free(&kev_out);
+fail_alloc:
+ p->pref = 0;
+ return 0;
+}
+
+/*
+ * Termination of the kqueue() poller.
+ * Memory is released and the poller is marked as unselectable.
+ */
+static void _do_term(struct poller *p)
+{
+ if (kqueue_fd[tid] >= 0) {
+ close(kqueue_fd[tid]);
+ kqueue_fd[tid] = -1;
+ }
+
+ p->private = NULL;
+ p->pref = 0;
+ if (kev_out) {
+ ha_free(&kev_out);
+ }
+}
+
+/*
+ * Check that the poller works.
+ * Returns 1 if OK, otherwise 0.
+ */
+static int _do_test(struct poller *p)
+{
+ int fd;
+
+ fd = kqueue();
+ if (fd < 0)
+ return 0;
+ close(fd);
+ return 1;
+}
+
+/*
+ * Recreate the kqueue file descriptor after a fork(). Returns 1 if OK,
+ * otherwise 0. Note that some pollers need to be reopened after a fork()
+ * (such as kqueue), and some others may fail to do so in a chroot.
+ */
+static int _do_fork(struct poller *p)
+{
+ kqueue_fd[tid] = kqueue();
+ if (kqueue_fd[tid] < 0)
+ return 0;
+ return 1;
+}
+
+/*
+ * Registers the poller.
+ */
+static void _do_register(void)
+{
+ struct poller *p;
+ int i;
+
+ if (nbpollers >= MAX_POLLERS)
+ return;
+
+ for (i = 0; i < MAX_THREADS; i++)
+ kqueue_fd[i] = -1;
+
+ p = &pollers[nbpollers++];
+
+ p->name = "kqueue";
+ p->pref = 300;
+ p->flags = HAP_POLL_F_RDHUP | HAP_POLL_F_ERRHUP;
+ p->private = NULL;
+
+ p->clo = NULL;
+ p->test = _do_test;
+ p->init = _do_init;
+ p->term = _do_term;
+ p->poll = _do_poll;
+ p->fork = _do_fork;
+}
+
+INITCALL0(STG_REGISTER, _do_register);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/ev_poll.c b/src/ev_poll.c
new file mode 100644
index 0000000..3882a4e
--- /dev/null
+++ b/src/ev_poll.c
@@ -0,0 +1,343 @@
+/*
+ * FD polling functions for generic poll()
+ *
+ * Copyright 2000-2014 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE // for POLLRDHUP on Linux
+
+#include <unistd.h>
+#include <poll.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/signal.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+
+
+#ifndef POLLRDHUP
+/* POLLRDHUP was defined late in libc, and it appeared in kernel 2.6.17 */
+#define POLLRDHUP 0
+#endif
+
+static int maxfd; /* # of the highest fd + 1 */
+static unsigned int *fd_evts[2];
+
+/* private data */
+static THREAD_LOCAL int nbfd = 0;
+static THREAD_LOCAL struct pollfd *poll_events = NULL;
+
+static void __fd_clo(int fd)
+{
+ hap_fd_clr(fd, fd_evts[DIR_RD]);
+ hap_fd_clr(fd, fd_evts[DIR_WR]);
+}
+
+static void _update_fd(int fd, int *max_add_fd)
+{
+ int en;
+
+ en = fdtab[fd].state;
+
+ /* we have a single state for all threads, which is why we
+ * don't check the tid_bit. First thread to see the update
+ * takes it for every other one.
+ */
+ if (!(en & FD_EV_ACTIVE_RW)) {
+ if (!(polled_mask[fd].poll_recv | polled_mask[fd].poll_send)) {
+ /* fd was not watched, it's still not */
+ return;
+ }
+ /* fd totally removed from poll list */
+ hap_fd_clr(fd, fd_evts[DIR_RD]);
+ hap_fd_clr(fd, fd_evts[DIR_WR]);
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, 0);
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, 0);
+ }
+ else {
+ /* OK fd has to be monitored, it was either added or changed */
+ if (!(en & FD_EV_ACTIVE_R)) {
+ hap_fd_clr(fd, fd_evts[DIR_RD]);
+ if (polled_mask[fd].poll_recv & tid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
+ } else {
+ hap_fd_set(fd, fd_evts[DIR_RD]);
+ if (!(polled_mask[fd].poll_recv & tid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit);
+ }
+
+ if (!(en & FD_EV_ACTIVE_W)) {
+ hap_fd_clr(fd, fd_evts[DIR_WR]);
+ if (polled_mask[fd].poll_send & tid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
+ }else {
+ hap_fd_set(fd, fd_evts[DIR_WR]);
+ if (!(polled_mask[fd].poll_send & tid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit);
+ }
+
+ if (fd > *max_add_fd)
+ *max_add_fd = fd;
+ }
+}
+
+/*
+ * Poll() poller
+ */
+static void _do_poll(struct poller *p, int exp, int wake)
+{
+ int status;
+ int fd;
+ int wait_time;
+ int updt_idx;
+ int fds, count;
+ int sr, sw;
+ int old_maxfd, new_maxfd, max_add_fd;
+ unsigned rn, wn; /* read new, write new */
+ int old_fd;
+
+ max_add_fd = -1;
+
+ /* first, scan the update list to find changes */
+ for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) {
+ fd = fd_updt[updt_idx];
+
+ _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
+ if (!fdtab[fd].owner) {
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+ _update_fd(fd, &max_add_fd);
+ }
+
+ /* Now scan the global update list */
+ for (old_fd = fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) {
+ if (fd == -2) {
+ fd = old_fd;
+ continue;
+ }
+ else if (fd <= -3)
+ fd = -fd -4;
+ if (fd == -1)
+ break;
+ if (fdtab[fd].update_mask & tid_bit) {
+ /* Cheat a bit, as the state is global to all pollers
+ * we don't need every thread to take care of the
+ * update.
+ */
+ _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~all_threads_mask);
+ done_update_polling(fd);
+ } else
+ continue;
+ if (!fdtab[fd].owner)
+ continue;
+ _update_fd(fd, &max_add_fd);
+ }
+
+ /* maybe we added at least one fd larger than maxfd */
+ for (old_maxfd = maxfd; old_maxfd <= max_add_fd; ) {
+ if (_HA_ATOMIC_CAS(&maxfd, &old_maxfd, max_add_fd + 1))
+ break;
+ }
+
+ /* maxfd doesn't need to be precise but it needs to cover *all* active
+ * FDs. Thus we only shrink it if we have such an opportunity. The algo
+ * is simple : look for the previous used place, try to update maxfd to
+ * point to it, abort if maxfd changed in the mean time.
+ */
+ old_maxfd = maxfd;
+ do {
+ new_maxfd = old_maxfd;
+ while (new_maxfd - 1 >= 0 && !fdtab[new_maxfd - 1].owner)
+ new_maxfd--;
+ if (new_maxfd >= old_maxfd)
+ break;
+ } while (!_HA_ATOMIC_CAS(&maxfd, &old_maxfd, new_maxfd));
+
+ thread_idle_now();
+ thread_harmless_now();
+
+ fd_nbupdt = 0;
+
+ nbfd = 0;
+ for (fds = 0; (fds * 8*sizeof(**fd_evts)) < maxfd; fds++) {
+ rn = fd_evts[DIR_RD][fds];
+ wn = fd_evts[DIR_WR][fds];
+
+ if (!(rn|wn))
+ continue;
+
+ for (count = 0, fd = fds * 8*sizeof(**fd_evts); count < 8*sizeof(**fd_evts) && fd < maxfd; count++, fd++) {
+ sr = (rn >> count) & 1;
+ sw = (wn >> count) & 1;
+ if ((sr|sw)) {
+ if (!fdtab[fd].owner) {
+ /* should normally not happen here except
+ * due to rare thread concurrency
+ */
+ continue;
+ }
+
+ if (!(fdtab[fd].thread_mask & tid_bit)) {
+ continue;
+ }
+
+ poll_events[nbfd].fd = fd;
+ poll_events[nbfd].events = (sr ? (POLLIN | POLLRDHUP) : 0) | (sw ? POLLOUT : 0);
+ nbfd++;
+ }
+ }
+ }
+
+ /* Now let's wait for polled events. */
+ wait_time = wake ? 0 : compute_poll_timeout(exp);
+ clock_entering_poll();
+ status = poll(poll_events, nbfd, wait_time);
+ clock_update_date(wait_time, status);
+ clock_leaving_poll(wait_time, status);
+
+ thread_harmless_end();
+ thread_idle_end();
+
+ if (sleeping_thread_mask & tid_bit)
+ _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit);
+
+ if (status > 0)
+ activity[tid].poll_io++;
+
+ for (count = 0; status > 0 && count < nbfd; count++) {
+ unsigned int n;
+ int e = poll_events[count].revents;
+
+ fd = poll_events[count].fd;
+
+ if ((e & POLLRDHUP) && !(cur_poller.flags & HAP_POLL_F_RDHUP))
+ _HA_ATOMIC_OR(&cur_poller.flags, HAP_POLL_F_RDHUP);
+
+#ifdef DEBUG_FD
+ _HA_ATOMIC_INC(&fdtab[fd].event_count);
+#endif
+ if (!(e & ( POLLOUT | POLLIN | POLLERR | POLLHUP | POLLRDHUP )))
+ continue;
+
+ /* ok, we found one active fd */
+ status--;
+
+ n = ((e & POLLIN) ? FD_EV_READY_R : 0) |
+ ((e & POLLOUT) ? FD_EV_READY_W : 0) |
+ ((e & POLLRDHUP) ? FD_EV_SHUT_R : 0) |
+ ((e & POLLHUP) ? FD_EV_SHUT_RW : 0) |
+ ((e & POLLERR) ? FD_EV_ERR_RW : 0);
+
+ fd_update_events(fd, n);
+ }
+}
+
+
+static int init_poll_per_thread()
+{
+ poll_events = calloc(1, sizeof(struct pollfd) * global.maxsock);
+ if (poll_events == NULL)
+ return 0;
+ return 1;
+}
+
+static void deinit_poll_per_thread()
+{
+ ha_free(&poll_events);
+}
+
+/*
+ * Initialization of the poll() poller.
+ * Returns 0 in case of failure, non-zero in case of success. If it fails, it
+ * disables the poller by setting its pref to 0.
+ */
+static int _do_init(struct poller *p)
+{
+ __label__ fail_swevt, fail_srevt;
+ int fd_evts_bytes;
+
+ p->private = NULL;
+ fd_evts_bytes = (global.maxsock + sizeof(**fd_evts) * 8 - 1) / (sizeof(**fd_evts) * 8) * sizeof(**fd_evts);
+
+ if ((fd_evts[DIR_RD] = calloc(1, fd_evts_bytes)) == NULL)
+ goto fail_srevt;
+ if ((fd_evts[DIR_WR] = calloc(1, fd_evts_bytes)) == NULL)
+ goto fail_swevt;
+
+ hap_register_per_thread_init(init_poll_per_thread);
+ hap_register_per_thread_deinit(deinit_poll_per_thread);
+
+ return 1;
+
+ fail_swevt:
+ free(fd_evts[DIR_RD]);
+ fail_srevt:
+ p->pref = 0;
+ return 0;
+}
+
+/*
+ * Termination of the poll() poller.
+ * Memory is released and the poller is marked as unselectable.
+ */
+static void _do_term(struct poller *p)
+{
+ free(fd_evts[DIR_WR]);
+ free(fd_evts[DIR_RD]);
+ p->private = NULL;
+ p->pref = 0;
+}
+
+/*
+ * Check that the poller works.
+ * Returns 1 if OK, otherwise 0.
+ */
+static int _do_test(struct poller *p)
+{
+ return 1;
+}
+
+/*
+ * Registers the poller.
+ */
+static void _do_register(void)
+{
+ struct poller *p;
+
+ if (nbpollers >= MAX_POLLERS)
+ return;
+ p = &pollers[nbpollers++];
+
+ p->name = "poll";
+ p->pref = 200;
+ p->flags = HAP_POLL_F_ERRHUP;
+ p->private = NULL;
+
+ p->clo = __fd_clo;
+ p->test = _do_test;
+ p->init = _do_init;
+ p->term = _do_term;
+ p->poll = _do_poll;
+}
+
+INITCALL0(STG_REGISTER, _do_register);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/ev_select.c b/src/ev_select.c
new file mode 100644
index 0000000..3880d0d
--- /dev/null
+++ b/src/ev_select.c
@@ -0,0 +1,332 @@
+/*
+ * FD polling functions for generic select()
+ *
+ * Copyright 2000-2014 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+
+
+/* private data */
+static int maxfd; /* # of the highest fd + 1 */
+static unsigned int *fd_evts[2];
+static THREAD_LOCAL fd_set *tmp_evts[2];
+
+/* Immediately remove the entry upon close() */
+static void __fd_clo(int fd)
+{
+ hap_fd_clr(fd, fd_evts[DIR_RD]);
+ hap_fd_clr(fd, fd_evts[DIR_WR]);
+}
+
+static void _update_fd(int fd, int *max_add_fd)
+{
+ int en;
+
+ en = fdtab[fd].state;
+
+ /* we have a single state for all threads, which is why we
+ * don't check the tid_bit. First thread to see the update
+ * takes it for every other one.
+ */
+ if (!(en & FD_EV_ACTIVE_RW)) {
+ if (!(polled_mask[fd].poll_recv | polled_mask[fd].poll_send)) {
+ /* fd was not watched, it's still not */
+ return;
+ }
+ /* fd totally removed from poll list */
+ hap_fd_clr(fd, fd_evts[DIR_RD]);
+ hap_fd_clr(fd, fd_evts[DIR_WR]);
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, 0);
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, 0);
+ }
+ else {
+ /* OK fd has to be monitored, it was either added or changed */
+ if (!(en & FD_EV_ACTIVE_R)) {
+ hap_fd_clr(fd, fd_evts[DIR_RD]);
+ if (polled_mask[fd].poll_recv & tid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
+ } else {
+ hap_fd_set(fd, fd_evts[DIR_RD]);
+ if (!(polled_mask[fd].poll_recv & tid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit);
+ }
+
+ if (!(en & FD_EV_ACTIVE_W)) {
+ hap_fd_clr(fd, fd_evts[DIR_WR]);
+ if (polled_mask[fd].poll_send & tid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
+ } else {
+ hap_fd_set(fd, fd_evts[DIR_WR]);
+ if (!(polled_mask[fd].poll_send & tid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit);
+ }
+
+ if (fd > *max_add_fd)
+ *max_add_fd = fd;
+ }
+}
+
+/*
+ * Select() poller
+ */
+static void _do_poll(struct poller *p, int exp, int wake)
+{
+ int status;
+ int fd, i;
+ struct timeval delta;
+ int delta_ms;
+ int fds;
+ int updt_idx;
+ char count;
+ int readnotnull, writenotnull;
+ int old_maxfd, new_maxfd, max_add_fd;
+ int old_fd;
+
+ max_add_fd = -1;
+
+ /* first, scan the update list to find changes */
+ for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) {
+ fd = fd_updt[updt_idx];
+
+ _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
+ if (!fdtab[fd].owner) {
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+ _update_fd(fd, &max_add_fd);
+ }
+ /* Now scan the global update list */
+ for (old_fd = fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) {
+ if (fd == -2) {
+ fd = old_fd;
+ continue;
+ }
+ else if (fd <= -3)
+ fd = -fd -4;
+ if (fd == -1)
+ break;
+ if (fdtab[fd].update_mask & tid_bit) {
+ /* Cheat a bit, as the state is global to all pollers
+ * we don't need every thread to take care of the
+ * update.
+ */
+ _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~all_threads_mask);
+ done_update_polling(fd);
+ } else
+ continue;
+ if (!fdtab[fd].owner)
+ continue;
+ _update_fd(fd, &max_add_fd);
+ }
+
+
+ /* maybe we added at least one fd larger than maxfd */
+ for (old_maxfd = maxfd; old_maxfd <= max_add_fd; ) {
+ if (_HA_ATOMIC_CAS(&maxfd, &old_maxfd, max_add_fd + 1))
+ break;
+ }
+
+ /* maxfd doesn't need to be precise but it needs to cover *all* active
+ * FDs. Thus we only shrink it if we have such an opportunity. The algo
+ * is simple : look for the previous used place, try to update maxfd to
+ * point to it, abort if maxfd changed in the mean time.
+ */
+ old_maxfd = maxfd;
+ do {
+ new_maxfd = old_maxfd;
+ while (new_maxfd - 1 >= 0 && !fdtab[new_maxfd - 1].owner)
+ new_maxfd--;
+ if (new_maxfd >= old_maxfd)
+ break;
+ } while (!_HA_ATOMIC_CAS(&maxfd, &old_maxfd, new_maxfd));
+
+ thread_idle_now();
+ thread_harmless_now();
+
+ fd_nbupdt = 0;
+
+ /* let's restore fdset state */
+ readnotnull = 0; writenotnull = 0;
+ for (i = 0; i < (maxfd + FD_SETSIZE - 1)/(8*sizeof(int)); i++) {
+ readnotnull |= (*(((int*)tmp_evts[DIR_RD])+i) = *(((int*)fd_evts[DIR_RD])+i)) != 0;
+ writenotnull |= (*(((int*)tmp_evts[DIR_WR])+i) = *(((int*)fd_evts[DIR_WR])+i)) != 0;
+ }
+
+ /* now let's wait for events */
+ delta_ms = wake ? 0 : compute_poll_timeout(exp);
+ delta.tv_sec = (delta_ms / 1000);
+ delta.tv_usec = (delta_ms % 1000) * 1000;
+ clock_entering_poll();
+ status = select(maxfd,
+ readnotnull ? tmp_evts[DIR_RD] : NULL,
+ writenotnull ? tmp_evts[DIR_WR] : NULL,
+ NULL,
+ &delta);
+ clock_update_date(delta_ms, status);
+ clock_leaving_poll(delta_ms, status);
+
+ thread_harmless_end();
+ thread_idle_end();
+
+ if (sleeping_thread_mask & tid_bit)
+ _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit);
+
+ if (status <= 0)
+ return;
+
+ activity[tid].poll_io++;
+
+ for (fds = 0; (fds * BITS_PER_INT) < maxfd; fds++) {
+ if ((((int *)(tmp_evts[DIR_RD]))[fds] | ((int *)(tmp_evts[DIR_WR]))[fds]) == 0)
+ continue;
+
+ for (count = BITS_PER_INT, fd = fds * BITS_PER_INT; count && fd < maxfd; count--, fd++) {
+ unsigned int n = 0;
+
+ if (FD_ISSET(fd, tmp_evts[DIR_RD]))
+ n |= FD_EV_READY_R;
+
+ if (FD_ISSET(fd, tmp_evts[DIR_WR]))
+ n |= FD_EV_READY_W;
+
+ if (!n)
+ continue;
+
+#ifdef DEBUG_FD
+ _HA_ATOMIC_INC(&fdtab[fd].event_count);
+#endif
+
+ fd_update_events(fd, n);
+ }
+ }
+}
+
+static int init_select_per_thread()
+{
+ int fd_set_bytes;
+
+ fd_set_bytes = sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE;
+ tmp_evts[DIR_RD] = calloc(1, fd_set_bytes);
+ if (tmp_evts[DIR_RD] == NULL)
+ goto fail;
+ tmp_evts[DIR_WR] = calloc(1, fd_set_bytes);
+ if (tmp_evts[DIR_WR] == NULL)
+ goto fail;
+ return 1;
+ fail:
+ free(tmp_evts[DIR_RD]);
+ free(tmp_evts[DIR_WR]);
+ return 0;
+}
+
+static void deinit_select_per_thread()
+{
+ ha_free(&tmp_evts[DIR_WR]);
+ ha_free(&tmp_evts[DIR_RD]);
+}
+
+/*
+ * Initialization of the select() poller.
+ * Returns 0 in case of failure, non-zero in case of success. If it fails, it
+ * disables the poller by setting its pref to 0.
+ */
+static int _do_init(struct poller *p)
+{
+ int fd_set_bytes;
+
+ p->private = NULL;
+
+ if (global.maxsock > FD_SETSIZE)
+ goto fail_srevt;
+
+ fd_set_bytes = sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE;
+
+ if ((fd_evts[DIR_RD] = calloc(1, fd_set_bytes)) == NULL)
+ goto fail_srevt;
+ if ((fd_evts[DIR_WR] = calloc(1, fd_set_bytes)) == NULL)
+ goto fail_swevt;
+
+ hap_register_per_thread_init(init_select_per_thread);
+ hap_register_per_thread_deinit(deinit_select_per_thread);
+
+ return 1;
+
+ fail_swevt:
+ free(fd_evts[DIR_RD]);
+ fail_srevt:
+ p->pref = 0;
+ return 0;
+}
+
+/*
+ * Termination of the select() poller.
+ * Memory is released and the poller is marked as unselectable.
+ */
+static void _do_term(struct poller *p)
+{
+ free(fd_evts[DIR_WR]);
+ free(fd_evts[DIR_RD]);
+ p->private = NULL;
+ p->pref = 0;
+}
+
+/*
+ * Check that the poller works.
+ * Returns 1 if OK, otherwise 0.
+ */
+static int _do_test(struct poller *p)
+{
+ if (global.maxsock > FD_SETSIZE)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Registers the poller.
+ */
+static void _do_register(void)
+{
+ struct poller *p;
+
+ if (nbpollers >= MAX_POLLERS)
+ return;
+ p = &pollers[nbpollers++];
+
+ p->name = "select";
+ p->pref = 150;
+ p->flags = 0;
+ p->private = NULL;
+
+ p->clo = __fd_clo;
+ p->test = _do_test;
+ p->init = _do_init;
+ p->term = _do_term;
+ p->poll = _do_poll;
+}
+
+INITCALL0(STG_REGISTER, _do_register);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/extcheck.c b/src/extcheck.c
new file mode 100644
index 0000000..0843431
--- /dev/null
+++ b/src/extcheck.c
@@ -0,0 +1,678 @@
+/*
+ * External health-checks functions.
+ *
+ * Copyright 2000-2009,2020 Willy Tarreau <w@1wt.eu>
+ * Copyright 2014 Horms Solutions Ltd, Simon Horman <horms@verge.net.au>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/proxy.h>
+#include <haproxy/server.h>
+#include <haproxy/signal.h>
+#include <haproxy/stream-t.h>
+#include <haproxy/task.h>
+#include <haproxy/thread.h>
+#include <haproxy/tools.h>
+
+
+static struct list pid_list = LIST_HEAD_INIT(pid_list);
+static struct pool_head *pool_head_pid_list __read_mostly;
+__decl_spinlock(pid_list_lock);
+
+struct extcheck_env {
+ char *name; /* environment variable name */
+ int vmaxlen; /* value maximum length, used to determine the required memory allocation */
+};
+
+/* environment variables memory requirement for different types of data */
+#define EXTCHK_SIZE_EVAL_INIT 0 /* size determined during the init phase,
+ * such environment variables are not updatable. */
+#define EXTCHK_SIZE_ULONG 20 /* max string length for an unsigned long value */
+#define EXTCHK_SIZE_UINT 11 /* max string length for an unsigned int value */
+#define EXTCHK_SIZE_ADDR 256 /* max string length for an IPv4/IPv6/UNIX address */
+
+/* external checks environment variables */
+enum {
+ EXTCHK_PATH = 0,
+
+ /* Proxy specific environment variables */
+ EXTCHK_HAPROXY_PROXY_NAME, /* the backend name */
+ EXTCHK_HAPROXY_PROXY_ID, /* the backend id */
+ EXTCHK_HAPROXY_PROXY_ADDR, /* the first bind address if available (or empty) */
+ EXTCHK_HAPROXY_PROXY_PORT, /* the first bind port if available (or empty) */
+
+ /* Server specific environment variables */
+ EXTCHK_HAPROXY_SERVER_NAME, /* the server name */
+ EXTCHK_HAPROXY_SERVER_ID, /* the server id */
+ EXTCHK_HAPROXY_SERVER_ADDR, /* the server address */
+ EXTCHK_HAPROXY_SERVER_PORT, /* the server port if available (or empty) */
+ EXTCHK_HAPROXY_SERVER_MAXCONN, /* the server max connections */
+ EXTCHK_HAPROXY_SERVER_CURCONN, /* the current number of connections on the server */
+ EXTCHK_HAPROXY_SERVER_SSL, /* "1" if the server supports SSL, otherwise zero */
+ EXTCHK_HAPROXY_SERVER_PROTO, /* the server's configured proto, if any */
+
+ EXTCHK_SIZE
+};
+
+const struct extcheck_env extcheck_envs[EXTCHK_SIZE] = {
+ [EXTCHK_PATH] = { "PATH", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_PROXY_NAME] = { "HAPROXY_PROXY_NAME", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_PROXY_ID] = { "HAPROXY_PROXY_ID", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_PROXY_ADDR] = { "HAPROXY_PROXY_ADDR", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_PROXY_PORT] = { "HAPROXY_PROXY_PORT", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_SERVER_NAME] = { "HAPROXY_SERVER_NAME", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_SERVER_ID] = { "HAPROXY_SERVER_ID", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_SERVER_ADDR] = { "HAPROXY_SERVER_ADDR", EXTCHK_SIZE_ADDR },
+ [EXTCHK_HAPROXY_SERVER_PORT] = { "HAPROXY_SERVER_PORT", EXTCHK_SIZE_UINT },
+ [EXTCHK_HAPROXY_SERVER_MAXCONN] = { "HAPROXY_SERVER_MAXCONN", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_SERVER_CURCONN] = { "HAPROXY_SERVER_CURCONN", EXTCHK_SIZE_ULONG },
+ [EXTCHK_HAPROXY_SERVER_SSL] = { "HAPROXY_SERVER_SSL", EXTCHK_SIZE_UINT },
+ [EXTCHK_HAPROXY_SERVER_PROTO] = { "HAPROXY_SERVER_PROTO", EXTCHK_SIZE_EVAL_INIT },
+};
+
+void block_sigchld(void)
+{
+ sigset_t set;
+ sigemptyset(&set);
+ sigaddset(&set, SIGCHLD);
+ assert(ha_sigmask(SIG_BLOCK, &set, NULL) == 0);
+}
+
+void unblock_sigchld(void)
+{
+ sigset_t set;
+ sigemptyset(&set);
+ sigaddset(&set, SIGCHLD);
+ assert(ha_sigmask(SIG_UNBLOCK, &set, NULL) == 0);
+}
+
+static struct pid_list *pid_list_add(pid_t pid, struct task *t)
+{
+ struct pid_list *elem;
+ struct check *check = t->context;
+
+ elem = pool_alloc(pool_head_pid_list);
+ if (!elem)
+ return NULL;
+ elem->pid = pid;
+ elem->t = t;
+ elem->exited = 0;
+ check->curpid = elem;
+ LIST_INIT(&elem->list);
+
+ HA_SPIN_LOCK(PID_LIST_LOCK, &pid_list_lock);
+ LIST_INSERT(&pid_list, &elem->list);
+ HA_SPIN_UNLOCK(PID_LIST_LOCK, &pid_list_lock);
+
+ return elem;
+}
+
+static void pid_list_del(struct pid_list *elem)
+{
+ struct check *check;
+
+ if (!elem)
+ return;
+
+ HA_SPIN_LOCK(PID_LIST_LOCK, &pid_list_lock);
+ LIST_DELETE(&elem->list);
+ HA_SPIN_UNLOCK(PID_LIST_LOCK, &pid_list_lock);
+
+ if (!elem->exited)
+ kill(elem->pid, SIGTERM);
+
+ check = elem->t->context;
+ check->curpid = NULL;
+ pool_free(pool_head_pid_list, elem);
+}
+
+/* Called from inside SIGCHLD handler, SIGCHLD is blocked */
+static void pid_list_expire(pid_t pid, int status)
+{
+ struct pid_list *elem;
+
+ HA_SPIN_LOCK(PID_LIST_LOCK, &pid_list_lock);
+ list_for_each_entry(elem, &pid_list, list) {
+ if (elem->pid == pid) {
+ elem->t->expire = now_ms;
+ elem->status = status;
+ elem->exited = 1;
+ task_wakeup(elem->t, TASK_WOKEN_IO);
+ break;
+ }
+ }
+ HA_SPIN_UNLOCK(PID_LIST_LOCK, &pid_list_lock);
+}
+
+static void sigchld_handler(struct sig_handler *sh)
+{
+ pid_t pid;
+ int status;
+
+ while ((pid = waitpid(0, &status, WNOHANG)) > 0)
+ pid_list_expire(pid, status);
+}
+
+int init_pid_list(void)
+{
+ if (pool_head_pid_list != NULL)
+ /* Nothing to do */
+ return 0;
+
+ if (!signal_register_fct(SIGCHLD, sigchld_handler, SIGCHLD)) {
+ ha_alert("Failed to set signal handler for external health checks: %s. Aborting.\n",
+ strerror(errno));
+ return 1;
+ }
+
+ pool_head_pid_list = create_pool("pid_list", sizeof(struct pid_list), MEM_F_SHARED);
+ if (pool_head_pid_list == NULL) {
+ ha_alert("Failed to allocate memory pool for external health checks: %s. Aborting.\n",
+ strerror(errno));
+ return 1;
+ }
+
+ return 0;
+}
+
+/* helper macro to set an environment variable and jump to a specific label on failure. */
+#define EXTCHK_SETENV(check, envidx, value, fail) { if (extchk_setenv(check, envidx, value)) goto fail; }
+
+/*
+ * helper function to allocate enough memory to store an environment variable.
+ * It will also check that the environment variable is updatable, and silently
+ * fail if not.
+ */
+static int extchk_setenv(struct check *check, int idx, const char *value)
+{
+ int len, ret;
+ char *envname;
+ int vmaxlen;
+
+ if (idx < 0 || idx >= EXTCHK_SIZE) {
+ ha_alert("Illegal environment variable index %d. Aborting.\n", idx);
+ return 1;
+ }
+
+ envname = extcheck_envs[idx].name;
+ vmaxlen = extcheck_envs[idx].vmaxlen;
+
+ /* Check if the environment variable is already set, and silently reject
+ * the update if this one is not updatable. */
+ if ((vmaxlen == EXTCHK_SIZE_EVAL_INIT) && (check->envp[idx]))
+ return 0;
+
+ /* Instead of sending NOT_USED, sending an empty value is preferable */
+ if (strcmp(value, "NOT_USED") == 0) {
+ value = "";
+ }
+
+ len = strlen(envname) + 1;
+ if (vmaxlen == EXTCHK_SIZE_EVAL_INIT)
+ len += strlen(value);
+ else
+ len += vmaxlen;
+
+ if (!check->envp[idx])
+ check->envp[idx] = malloc(len + 1);
+
+ if (!check->envp[idx]) {
+ ha_alert("Failed to allocate memory for the environment variable '%s'. Aborting.\n", envname);
+ return 1;
+ }
+ ret = snprintf(check->envp[idx], len + 1, "%s=%s", envname, value);
+ if (ret < 0) {
+ ha_alert("Failed to store the environment variable '%s'. Reason : %s. Aborting.\n", envname, strerror(errno));
+ return 1;
+ }
+ else if (ret > len) {
+ ha_alert("Environment variable '%s' was truncated. Aborting.\n", envname);
+ return 1;
+ }
+ return 0;
+}
+
+int prepare_external_check(struct check *check)
+{
+ struct server *s = check->server;
+ struct proxy *px = s->proxy;
+ struct listener *listener = NULL, *l;
+ int i;
+ const char *path = px->check_path ? px->check_path : DEF_CHECK_PATH;
+ char buf[256];
+ const char *svmode = NULL;
+
+ list_for_each_entry(l, &px->conf.listeners, by_fe)
+ /* Use the first INET, INET6 or UNIX listener */
+ if (l->rx.addr.ss_family == AF_INET ||
+ l->rx.addr.ss_family == AF_INET6 ||
+ l->rx.addr.ss_family == AF_UNIX) {
+ listener = l;
+ break;
+ }
+
+ check->curpid = NULL;
+ check->envp = calloc((EXTCHK_SIZE + 1), sizeof(*check->envp));
+ if (!check->envp) {
+ ha_alert("Failed to allocate memory for environment variables. Aborting\n");
+ goto err;
+ }
+
+ check->argv = calloc(6, sizeof(*check->argv));
+ if (!check->argv) {
+ ha_alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
+ goto err;
+ }
+
+ check->argv[0] = px->check_command;
+
+ if (!listener) {
+ check->argv[1] = strdup("NOT_USED");
+ check->argv[2] = strdup("NOT_USED");
+ }
+ else if (listener->rx.addr.ss_family == AF_INET ||
+ listener->rx.addr.ss_family == AF_INET6) {
+ addr_to_str(&listener->rx.addr, buf, sizeof(buf));
+ check->argv[1] = strdup(buf);
+ port_to_str(&listener->rx.addr, buf, sizeof(buf));
+ check->argv[2] = strdup(buf);
+ }
+ else if (listener->rx.addr.ss_family == AF_UNIX) {
+ const struct sockaddr_un *un;
+
+ un = (struct sockaddr_un *)&listener->rx.addr;
+ check->argv[1] = strdup(un->sun_path);
+ check->argv[2] = strdup("NOT_USED");
+ }
+ else {
+ ha_alert("Starting [%s:%s] check: unsupported address family.\n", px->id, s->id);
+ goto err;
+ }
+
+ /* args 3 and 4 are the address, they're replaced on each check */
+ check->argv[3] = calloc(EXTCHK_SIZE_ADDR, sizeof(*check->argv[3]));
+ check->argv[4] = calloc(EXTCHK_SIZE_UINT, sizeof(*check->argv[4]));
+
+ for (i = 0; i < 5; i++) {
+ if (!check->argv[i]) {
+ ha_alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
+ goto err;
+ }
+ }
+
+ EXTCHK_SETENV(check, EXTCHK_PATH, path, err);
+ /* Add proxy environment variables */
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_NAME, px->id, err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_ID, ultoa_r(px->uuid, buf, sizeof(buf)), err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_ADDR, check->argv[1], err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_PORT, check->argv[2], err);
+ /* Add server environment variables */
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_NAME, s->id, err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_ID, ultoa_r(s->puid, buf, sizeof(buf)), err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_ADDR, check->argv[3], err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_PORT, check->argv[4], err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_MAXCONN, ultoa_r(s->maxconn, buf, sizeof(buf)), err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_CURCONN, ultoa_r(s->cur_sess, buf, sizeof(buf)), err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_SSL, s->use_ssl ? "1" : "0", err);
+
+ switch (px->mode) {
+ case PR_MODE_CLI: svmode = "cli"; break;
+ case PR_MODE_SYSLOG: svmode = "syslog"; break;
+ case PR_MODE_PEERS: svmode = "peers"; break;
+ case PR_MODE_HTTP: svmode = (s->mux_proto) ? s->mux_proto->token.ptr : "h1"; break;
+ case PR_MODE_TCP: svmode = "tcp"; break;
+ /* all valid cases must be enumerated above, below is to avoid a warning */
+ case PR_MODES: svmode = "?"; break;
+ }
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_PROTO, svmode, err);
+
+ /* Ensure that we don't leave any hole in check->envp */
+ for (i = 0; i < EXTCHK_SIZE; i++)
+ if (!check->envp[i])
+ EXTCHK_SETENV(check, i, "", err);
+
+ return 1;
+err:
+ if (check->envp) {
+ for (i = 0; i < EXTCHK_SIZE; i++)
+ free(check->envp[i]);
+ ha_free(&check->envp);
+ }
+
+ if (check->argv) {
+ for (i = 1; i < 5; i++)
+ free(check->argv[i]);
+ ha_free(&check->argv);
+ }
+ return 0;
+}
+
+/*
+ * establish a server health-check that makes use of a process.
+ *
+ * It can return one of :
+ * - SF_ERR_NONE if everything's OK
+ * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
+ * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
+ *
+ * Blocks and then unblocks SIGCHLD
+ */
+static int connect_proc_chk(struct task *t)
+{
+ char buf[256];
+ struct check *check = t->context;
+ struct server *s = check->server;
+ struct proxy *px = s->proxy;
+ int status;
+ pid_t pid;
+
+ status = SF_ERR_RESOURCE;
+
+ block_sigchld();
+
+ pid = fork();
+ if (pid < 0) {
+ ha_alert("Failed to fork process for external health check%s: %s. Aborting.\n",
+ (global.tune.options & GTUNE_INSECURE_FORK) ?
+ "" : " (likely caused by missing 'insecure-fork-wanted')",
+ strerror(errno));
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
+ goto out;
+ }
+ if (pid == 0) {
+ /* Child */
+ extern char **environ;
+ struct rlimit limit;
+ int fd;
+
+ /* close all FDs. Keep stdin/stdout/stderr in verbose mode */
+ fd = (global.mode & (MODE_QUIET|MODE_VERBOSE)) == MODE_QUIET ? 0 : 3;
+
+ my_closefrom(fd);
+
+ /* restore the initial FD limits */
+ limit.rlim_cur = rlim_fd_cur_at_boot;
+ limit.rlim_max = rlim_fd_max_at_boot;
+ if (raise_rlim_nofile(NULL, &limit) != 0) {
+ getrlimit(RLIMIT_NOFILE, &limit);
+ ha_warning("External check: failed to restore initial FD limits (cur=%u max=%u), using cur=%u max=%u\n",
+ rlim_fd_cur_at_boot, rlim_fd_max_at_boot,
+ (unsigned int)limit.rlim_cur, (unsigned int)limit.rlim_max);
+ }
+
+ environ = check->envp;
+
+ /* Update some environment variables and command args: curconn, server addr and server port */
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_CURCONN, ultoa_r(s->cur_sess, buf, sizeof(buf)), fail);
+
+ if (s->addr.ss_family == AF_UNIX) {
+ const struct sockaddr_un *un = (struct sockaddr_un *)&s->addr;
+ strlcpy2(check->argv[3], un->sun_path, EXTCHK_SIZE_ADDR);
+ memcpy(check->argv[4], "NOT_USED", 9);
+ } else {
+ addr_to_str(&s->addr, check->argv[3], EXTCHK_SIZE_ADDR);
+ *check->argv[4] = 0; // just in case the address family changed
+ if (s->addr.ss_family == AF_INET || s->addr.ss_family == AF_INET6)
+ snprintf(check->argv[4], EXTCHK_SIZE_UINT, "%u", s->svc_port);
+ }
+
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_ADDR, check->argv[3], fail);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_PORT, check->argv[4], fail);
+
+ haproxy_unblock_signals();
+ execvp(px->check_command, check->argv);
+ ha_alert("Failed to exec process for external health check: %s. Aborting.\n",
+ strerror(errno));
+ fail:
+ exit(-1);
+ }
+
+ /* Parent */
+ if (check->result == CHK_RES_UNKNOWN) {
+ if (pid_list_add(pid, t) != NULL) {
+ t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
+
+ if (px->timeout.check && px->timeout.connect) {
+ int t_con = tick_add(now_ms, px->timeout.connect);
+ t->expire = tick_first(t->expire, t_con);
+ }
+ status = SF_ERR_NONE;
+ goto out;
+ }
+ else {
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
+ }
+ kill(pid, SIGTERM); /* process creation error */
+ }
+ else
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
+
+out:
+ unblock_sigchld();
+ return status;
+}
+
+/*
+ * manages a server health-check that uses an external process. Returns
+ * the time the task accepts to wait, or TIME_ETERNITY for infinity.
+ *
+ * Please do NOT place any return statement in this function and only leave
+ * via the out_unlock label.
+ */
+struct task *process_chk_proc(struct task *t, void *context, unsigned int state)
+{
+ struct check *check = context;
+ struct server *s = check->server;
+ int rv;
+ int ret;
+ int expired = tick_is_expired(t->expire, now_ms);
+
+ HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
+ if (!(check->state & CHK_ST_INPROGRESS)) {
+ /* no check currently running */
+ if (!expired) /* woke up too early */
+ goto out_unlock;
+
+ /* we don't send any health-checks when the proxy is
+ * stopped, the server should not be checked or the check
+ * is disabled.
+ */
+ if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) ||
+ (s->proxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED)))
+ goto reschedule;
+
+ /* we'll initiate a new check */
+ set_server_check_status(check, HCHK_STATUS_START, NULL);
+
+ check->state |= CHK_ST_INPROGRESS;
+
+ ret = connect_proc_chk(t);
+ if (ret == SF_ERR_NONE) {
+ /* the process was forked, we allow up to min(inter,
+ * timeout.connect) for it to report its status, but
+ * only when timeout.check is set as it may be to short
+ * for a full check otherwise.
+ */
+ t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
+
+ if (s->proxy->timeout.check && s->proxy->timeout.connect) {
+ int t_con = tick_add(now_ms, s->proxy->timeout.connect);
+ t->expire = tick_first(t->expire, t_con);
+ }
+ task_set_affinity(t, tid_bit);
+ goto reschedule;
+ }
+
+ /* here, we failed to start the check */
+
+ check->state &= ~CHK_ST_INPROGRESS;
+ check_notify_failure(check);
+
+ /* we allow up to min(inter, timeout.connect) for a connection
+ * to establish but only when timeout.check is set
+ * as it may be to short for a full check otherwise
+ */
+ while (tick_is_expired(t->expire, now_ms)) {
+ int t_con;
+
+ t_con = tick_add(t->expire, s->proxy->timeout.connect);
+ t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
+
+ if (s->proxy->timeout.check)
+ t->expire = tick_first(t->expire, t_con);
+ }
+ }
+ else {
+ /* there was a test running.
+ * First, let's check whether there was an uncaught error,
+ * which can happen on connect timeout or error.
+ */
+ if (check->result == CHK_RES_UNKNOWN) {
+ /* good connection is enough for pure TCP check */
+ struct pid_list *elem = check->curpid;
+ int status = HCHK_STATUS_UNKNOWN;
+
+ if (elem->exited) {
+ status = elem->status; /* Save in case the process exits between use below */
+ if (!WIFEXITED(status))
+ check->code = -1;
+ else
+ check->code = WEXITSTATUS(status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ status = HCHK_STATUS_PROCERR;
+ else
+ status = HCHK_STATUS_PROCOK;
+ } else if (expired) {
+ status = HCHK_STATUS_PROCTOUT;
+ ha_warning("kill %d\n", (int)elem->pid);
+ kill(elem->pid, SIGTERM);
+ }
+ set_server_check_status(check, status, NULL);
+ }
+
+ if (check->result == CHK_RES_FAILED) {
+ /* a failure or timeout detected */
+ check_notify_failure(check);
+ }
+ else if (check->result == CHK_RES_CONDPASS) {
+ /* check is OK but asks for stopping mode */
+ check_notify_stopping(check);
+ }
+ else if (check->result == CHK_RES_PASSED) {
+ /* a success was detected */
+ check_notify_success(check);
+ }
+ task_set_affinity(t, 1);
+ check->state &= ~CHK_ST_INPROGRESS;
+
+ pid_list_del(check->curpid);
+
+ rv = 0;
+ if (global.spread_checks > 0) {
+ rv = srv_getinter(check) * global.spread_checks / 100;
+ rv -= (int) (2 * rv * (ha_random32() / 4294967295.0));
+ }
+ t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
+ }
+
+ reschedule:
+ while (tick_is_expired(t->expire, now_ms))
+ t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
+
+ out_unlock:
+ HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock);
+ return t;
+}
+
+/* Parses the "external-check" proxy keyword */
+int proxy_parse_extcheck(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **errmsg)
+{
+ int cur_arg, ret = 0;
+
+ cur_arg = 1;
+ if (!*(args[cur_arg])) {
+ memprintf(errmsg, "missing argument after '%s'.\n", args[0]);
+ goto error;
+ }
+
+ if (strcmp(args[cur_arg], "command") == 0) {
+ if (too_many_args(2, args, errmsg, NULL))
+ goto error;
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "missing argument after '%s'.", args[cur_arg]);
+ goto error;
+ }
+ free(curpx->check_command);
+ curpx->check_command = strdup(args[cur_arg+1]);
+ }
+ else if (strcmp(args[cur_arg], "path") == 0) {
+ if (too_many_args(2, args, errmsg, NULL))
+ goto error;
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "missing argument after '%s'.", args[cur_arg]);
+ goto error;
+ }
+ free(curpx->check_path);
+ curpx->check_path = strdup(args[cur_arg+1]);
+ }
+ else {
+ memprintf(errmsg, "'%s' only supports 'command' and 'path'. but got '%s'.",
+ args[0], args[1]);
+ goto error;
+ }
+
+ ret = (*errmsg != NULL); /* Handle warning */
+ return ret;
+
+error:
+ return -1;
+}
+
+int proxy_parse_external_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ int err_code = 0;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_EXT_CHK;
+ if (alertif_too_many_args_idx(0, 1, file, line, args, &err_code))
+ goto out;
+
+ out:
+ return err_code;
+}
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_LISTEN, "external-check", proxy_parse_extcheck },
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
diff --git a/src/fcgi-app.c b/src/fcgi-app.c
new file mode 100644
index 0000000..1ece72b
--- /dev/null
+++ b/src/fcgi-app.c
@@ -0,0 +1,1152 @@
+/*
+ * Functions about FCGI applications and filters.
+ *
+ * Copyright (C) 2019 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/chunk.h>
+#include <haproxy/errors.h>
+#include <haproxy/fcgi-app.h>
+#include <haproxy/filters.h>
+#include <haproxy/http_fetch.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/log.h>
+#include <haproxy/proxy.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/server-t.h>
+#include <haproxy/session.h>
+#include <haproxy/sink.h>
+#include <haproxy/tools.h>
+
+
+/* Global list of all FCGI applications */
+static struct fcgi_app *fcgi_apps = NULL;
+
+struct flt_ops fcgi_flt_ops;
+const char *fcgi_flt_id = "FCGI filter";
+
+DECLARE_STATIC_POOL(pool_head_fcgi_flt_ctx, "fcgi_flt_ctx", sizeof(struct fcgi_flt_ctx));
+DECLARE_STATIC_POOL(pool_head_fcgi_param_rule, "fcgi_param_rule", sizeof(struct fcgi_param_rule));
+DECLARE_STATIC_POOL(pool_head_fcgi_hdr_rule, "fcgi_hdr_rule", sizeof(struct fcgi_hdr_rule));
+
+/**************************************************************************/
+/***************************** Uitls **************************************/
+/**************************************************************************/
+/* Makes a fcgi parameter name (prefixed by ':fcgi-') with <name> (in
+ * lowercase). All non alphanumeric character are replaced by an underscore
+ * ('_'). The result is copied into <dst>. the corresponding ist is returned.
+ */
+static struct ist fcgi_param_name(char *dst, const struct ist name)
+{
+ size_t ofs1, ofs2;
+
+ memcpy(dst, ":fcgi-", 6);
+ ofs1 = 6;
+ for (ofs2 = 0; ofs2 < name.len; ofs2++) {
+ if (isalnum((unsigned char)name.ptr[ofs2]))
+ dst[ofs1++] = ist_lc[(unsigned char)name.ptr[ofs2]];
+ else
+ dst[ofs1++] = '_';
+ }
+ return ist2(dst, ofs1);
+}
+
+/* Returns a pointer to the FCGi application matching the name <name>. NULL is
+ * returned if no match found.
+ */
+struct fcgi_app *fcgi_app_find_by_name(const char *name)
+{
+ struct fcgi_app *app;
+
+ for (app = fcgi_apps; app != NULL; app = app->next) {
+ if (strcmp(app->name, name) == 0)
+ return app;
+ }
+
+ return NULL;
+}
+
+struct fcgi_flt_conf *find_px_fcgi_conf(struct proxy *px)
+{
+ struct flt_conf *fconf;
+
+ list_for_each_entry(fconf, &px->filter_configs, list) {
+ if (fconf->id == fcgi_flt_id)
+ return fconf->conf;
+ }
+ return NULL;
+}
+
+struct fcgi_flt_ctx *find_strm_fcgi_ctx(struct stream *s)
+{
+ struct filter *filter;
+
+ if (!s)
+ return NULL;
+
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_ID(filter) == fcgi_flt_id)
+ return FLT_CONF(filter);
+ }
+ return NULL;
+}
+
+struct fcgi_app *get_px_fcgi_app(struct proxy *px)
+{
+ struct fcgi_flt_conf *fcgi_conf = find_px_fcgi_conf(px);
+
+ if (fcgi_conf)
+ return fcgi_conf->app;
+ return NULL;
+}
+
+struct fcgi_app *get_strm_fcgi_app(struct stream *s)
+{
+ struct fcgi_flt_ctx *fcgi_ctx = find_strm_fcgi_ctx(s);
+
+ if (fcgi_ctx)
+ return fcgi_ctx->app;
+ return NULL;
+}
+
+static void fcgi_release_rule_conf(struct fcgi_rule_conf *rule)
+{
+ if (!rule)
+ return;
+ free(rule->name);
+ free(rule->value);
+ if (rule->cond) {
+ prune_acl_cond(rule->cond);
+ free(rule->cond);
+ }
+ free(rule);
+}
+
+static void fcgi_release_rule(struct fcgi_rule *rule)
+{
+ if (!rule)
+ return;
+
+ if (!LIST_ISEMPTY(&rule->value)) {
+ struct logformat_node *lf, *lfb;
+
+ list_for_each_entry_safe(lf, lfb, &rule->value, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ }
+ /* ->cond and ->name are not owned by the rule */
+ free(rule);
+}
+
+/**************************************************************************/
+/*********************** FCGI Sample fetches ******************************/
+/**************************************************************************/
+
+static int smp_fetch_fcgi_docroot(const struct arg *args, struct sample *smp,
+ const char *kw, void *private)
+{
+ struct fcgi_app *app = get_strm_fcgi_app(smp->strm);
+
+ if (!app)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = app->docroot.ptr;
+ smp->data.u.str.data = app->docroot.len;
+ smp->flags = SMP_F_CONST;
+ return 1;
+}
+
+static int smp_fetch_fcgi_index(const struct arg *args, struct sample *smp,
+ const char *kw, void *private)
+{
+ struct fcgi_app *app = get_strm_fcgi_app(smp->strm);
+
+ if (!app || !istlen(app->index))
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = app->index.ptr;
+ smp->data.u.str.data = app->index.len;
+ smp->flags = SMP_F_CONST;
+ return 1;
+}
+
+/**************************************************************************/
+/************************** FCGI filter ***********************************/
+/**************************************************************************/
+static int fcgi_flt_init(struct proxy *px, struct flt_conf *fconf)
+{
+ fconf->flags |= FLT_CFG_FL_HTX;
+ return 0;
+}
+
+static void fcgi_flt_deinit(struct proxy *px, struct flt_conf *fconf)
+{
+ struct fcgi_flt_conf *fcgi_conf = fconf->conf;
+ struct fcgi_rule *rule, *back;
+
+ if (!fcgi_conf)
+ return;
+
+ free(fcgi_conf->name);
+
+ list_for_each_entry_safe(rule, back, &fcgi_conf->param_rules, list) {
+ LIST_DELETE(&rule->list);
+ fcgi_release_rule(rule);
+ }
+
+ list_for_each_entry_safe(rule, back, &fcgi_conf->hdr_rules, list) {
+ LIST_DELETE(&rule->list);
+ fcgi_release_rule(rule);
+ }
+
+ free(fcgi_conf);
+}
+
+static int fcgi_flt_check(struct proxy *px, struct flt_conf *fconf)
+{
+ struct fcgi_flt_conf *fcgi_conf = fconf->conf;
+ struct fcgi_rule_conf *crule, *back;
+ struct fcgi_rule *rule = NULL;
+ struct flt_conf *f;
+ char *errmsg = NULL;
+
+ fcgi_conf->app = fcgi_app_find_by_name(fcgi_conf->name);
+ if (!fcgi_conf->app) {
+ ha_alert("proxy '%s' : fcgi-app '%s' not found.\n",
+ px->id, fcgi_conf->name);
+ goto err;
+ }
+
+ list_for_each_entry(f, &px->filter_configs, list) {
+ if (f->id == http_comp_flt_id || f->id == cache_store_flt_id)
+ continue;
+ else if ((f->id == fconf->id) && f->conf != fcgi_conf) {
+ ha_alert("proxy '%s' : only one fcgi-app supported per backend.\n",
+ px->id);
+ goto err;
+ }
+ else if (f->id != fconf->id) {
+ /* Implicit declaration is only allowed with the
+ * compression and cache. For other filters, an implicit
+ * declaration is required. */
+ ha_alert("config: proxy '%s': require an explicit filter declaration "
+ "to use the fcgi-app '%s'.\n", px->id, fcgi_conf->name);
+ goto err;
+ }
+ }
+
+ list_for_each_entry_safe(crule, back, &fcgi_conf->app->conf.rules, list) {
+ rule = calloc(1, sizeof(*rule));
+ if (!rule) {
+ ha_alert("proxy '%s' : out of memory.\n", px->id);
+ goto err;
+ }
+ rule->type = crule->type;
+ rule->name = ist(crule->name);
+ rule->cond = crule->cond;
+ LIST_INIT(&rule->value);
+
+ if (crule->value) {
+ if (!parse_logformat_string(crule->value, px, &rule->value, LOG_OPT_HTTP,
+ SMP_VAL_BE_HRQ_HDR, &errmsg)) {
+ ha_alert("proxy '%s' : %s.\n", px->id, errmsg);
+ goto err;
+ }
+ }
+
+ if (rule->type == FCGI_RULE_SET_PARAM || rule->type == FCGI_RULE_UNSET_PARAM)
+ LIST_APPEND(&fcgi_conf->param_rules, &rule->list);
+ else /* FCGI_RULE_PASS_HDR/FCGI_RULE_HIDE_HDR */
+ LIST_APPEND(&fcgi_conf->hdr_rules, &rule->list);
+ rule = NULL;
+ }
+ return 0;
+
+ err:
+ free(errmsg);
+ free(rule);
+ return 1;
+}
+
+static int fcgi_flt_start(struct stream *s, struct filter *filter)
+{
+ struct fcgi_flt_conf *fcgi_conf = FLT_CONF(filter);
+ struct fcgi_flt_ctx *fcgi_ctx;
+
+ fcgi_ctx = pool_alloc(pool_head_fcgi_flt_ctx);
+ if (fcgi_ctx == NULL) {
+ // FIXME: send a warning
+ return 0;
+ }
+ fcgi_ctx->filter = filter;
+ fcgi_ctx->app = fcgi_conf->app;
+ filter->ctx = fcgi_ctx;
+
+ s->req.analysers |= AN_REQ_HTTP_BODY;
+ return 1;
+}
+
+static void fcgi_flt_stop(struct stream *s, struct filter *filter)
+{
+ struct flt_fcgi_ctx *fcgi_ctx = filter->ctx;
+
+ if (!fcgi_ctx)
+ return;
+ pool_free(pool_head_fcgi_flt_ctx, fcgi_ctx);
+ filter->ctx = NULL;
+}
+
+static int fcgi_flt_http_headers(struct stream *s, struct filter *filter, struct http_msg *msg)
+{
+ struct session *sess = strm_sess(s);
+ struct buffer *value;
+ struct fcgi_flt_conf *fcgi_conf = FLT_CONF(filter);
+ struct fcgi_rule *rule;
+ struct fcgi_param_rule *param_rule;
+ struct fcgi_hdr_rule *hdr_rule;
+ struct ebpt_node *node, *next;
+ struct eb_root param_rules = EB_ROOT;
+ struct eb_root hdr_rules = EB_ROOT;
+ struct htx *htx;
+ struct http_hdr_ctx ctx;
+ int ret;
+
+ htx = htxbuf(&msg->chn->buf);
+
+ if (msg->chn->flags & CF_ISRESP) {
+ struct htx_sl *sl;
+
+ /* Remove the header "Status:" from the response */
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("status"), &ctx, 1))
+ http_remove_header(htx, &ctx);
+
+ /* Add the header "Date:" if not found */
+ ctx.blk = NULL;
+ if (!http_find_header(htx, ist("date"), &ctx, 1)) {
+ struct tm tm;
+
+ get_gmtime(date.tv_sec, &tm);
+ trash.data = strftime(trash.area, trash.size, "%a, %d %b %Y %T %Z", &tm);
+ if (trash.data)
+ http_add_header(htx, ist("date"), ist2(trash.area, trash.data));
+ }
+
+ /* Add the header "Content-Length:" if possible */
+ sl = http_get_stline(htx);
+ if (s->txn->meth != HTTP_METH_HEAD && sl &&
+ (msg->flags & (HTTP_MSGF_XFER_LEN|HTTP_MSGF_CNT_LEN|HTTP_MSGF_TE_CHNK)) == HTTP_MSGF_XFER_LEN &&
+ (htx->flags & HTX_FL_EOM)) {
+ struct htx_blk * blk;
+ char *end;
+ size_t len = 0;
+
+ for (blk = htx_get_first_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA)
+ len += htx_get_blksz(blk);
+ }
+ end = ultoa_o(len, trash.area, trash.size);
+ if (http_add_header(htx, ist("content-length"), ist2(trash.area, end-trash.area))) {
+ sl->flags |= HTX_SL_F_CLEN;
+ msg->flags |= HTTP_MSGF_CNT_LEN;
+ }
+ }
+
+ return 1;
+ }
+
+ /* Analyze the request's headers */
+
+ value = alloc_trash_chunk();
+ if (!value)
+ goto end;
+
+ list_for_each_entry(rule, &fcgi_conf->param_rules, list) {
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, s->be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+
+ /* the rule does not match */
+ if (!ret)
+ continue;
+ }
+
+ param_rule = NULL;
+ node = ebis_lookup_len(&param_rules, rule->name.ptr, rule->name.len);
+ if (node) {
+ param_rule = container_of(node, struct fcgi_param_rule, node);
+ ebpt_delete(node);
+ }
+ else {
+ param_rule = pool_alloc(pool_head_fcgi_param_rule);
+ if (param_rule == NULL)
+ goto param_rule_err;
+ }
+
+ param_rule->node.key = rule->name.ptr;
+ param_rule->name = rule->name;
+ param_rule->value = &rule->value;
+ ebis_insert(&param_rules, &param_rule->node);
+ }
+
+ list_for_each_entry(rule, &fcgi_conf->hdr_rules, list) {
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, s->be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+
+ /* the rule does not match */
+ if (!ret)
+ continue;
+ }
+
+ hdr_rule = NULL;
+ node = ebis_lookup_len(&hdr_rules, rule->name.ptr, rule->name.len);
+ if (node) {
+ hdr_rule = container_of(node, struct fcgi_hdr_rule, node);
+ ebpt_delete(node);
+ }
+ else {
+ hdr_rule = pool_alloc(pool_head_fcgi_hdr_rule);
+ if (hdr_rule == NULL)
+ goto hdr_rule_err;
+ }
+
+ hdr_rule->node.key = rule->name.ptr;
+ hdr_rule->name = rule->name;
+ hdr_rule->pass = (rule->type == FCGI_RULE_PASS_HDR);
+ ebis_insert(&hdr_rules, &hdr_rule->node);
+ }
+
+ node = ebpt_first(&param_rules);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ param_rule = container_of(node, struct fcgi_param_rule, node);
+ node = next;
+
+ b_reset(value);
+ value->data = build_logline(s, value->area, value->size, param_rule->value);
+ if (!value->data) {
+ pool_free(pool_head_fcgi_param_rule, param_rule);
+ continue;
+ }
+ if (!http_add_header(htx, param_rule->name, ist2(value->area, value->data)))
+ goto rewrite_err;
+ pool_free(pool_head_fcgi_param_rule, param_rule);
+ }
+
+ node = ebpt_first(&hdr_rules);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ hdr_rule = container_of(node, struct fcgi_hdr_rule, node);
+ node = next;
+
+ if (!hdr_rule->pass) {
+ ctx.blk = NULL;
+ while (http_find_header(htx, hdr_rule->name, &ctx, 1))
+ http_remove_header(htx, &ctx);
+ }
+ pool_free(pool_head_fcgi_hdr_rule, hdr_rule);
+ }
+
+ goto end;
+
+ rewrite_err:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+ hdr_rule_err:
+ node = ebpt_first(&hdr_rules);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ hdr_rule = container_of(node, struct fcgi_hdr_rule, node);
+ node = next;
+ pool_free(pool_head_fcgi_hdr_rule, hdr_rule);
+ }
+ param_rule_err:
+ node = ebpt_first(&param_rules);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ param_rule = container_of(node, struct fcgi_param_rule, node);
+ node = next;
+ pool_free(pool_head_fcgi_param_rule, param_rule);
+ }
+ end:
+ free_trash_chunk(value);
+ return 1;
+}
+
+struct flt_ops fcgi_flt_ops = {
+ .init = fcgi_flt_init,
+ .check = fcgi_flt_check,
+ .deinit = fcgi_flt_deinit,
+
+ .attach = fcgi_flt_start,
+ .detach = fcgi_flt_stop,
+
+ .http_headers = fcgi_flt_http_headers,
+};
+
+/**************************************************************************/
+/*********************** FCGI Config parsing ******************************/
+/**************************************************************************/
+static int
+parse_fcgi_flt(char **args, int *cur_arg, struct proxy *px,
+ struct flt_conf *fconf, char **err, void *private)
+{
+ struct flt_conf *f, *back;
+ struct fcgi_flt_conf *fcgi_conf = NULL;
+ char *name = NULL;
+ int pos = *cur_arg;
+
+ /* Get the fcgi-app name*/
+ if (!*args[pos + 1]) {
+ memprintf(err, "%s : expects a <name> argument", args[pos]);
+ goto err;
+ }
+ name = strdup(args[pos + 1]);
+ if (!name) {
+ memprintf(err, "%s '%s' : out of memory", args[pos], args[pos + 1]);
+ goto err;
+ }
+ pos += 2;
+
+ /* Check if an fcgi-app filter with the same name already exists */
+ list_for_each_entry_safe(f, back, &px->filter_configs, list) {
+ if (f->id != fcgi_flt_id)
+ continue;
+ fcgi_conf = f->conf;
+ if (strcmp(name, fcgi_conf->name) != 0) {
+ fcgi_conf = NULL;
+ continue;
+ }
+
+ /* Place the filter at its right position */
+ LIST_DELETE(&f->list);
+ free(f);
+ ha_free(&name);
+ break;
+ }
+
+ /* No other fcgi-app filter found, create configuration for the explicit one */
+ if (!fcgi_conf) {
+ fcgi_conf = calloc(1, sizeof(*fcgi_conf));
+ if (!fcgi_conf) {
+ memprintf(err, "%s: out of memory", args[*cur_arg]);
+ goto err;
+ }
+ fcgi_conf->name = name;
+ LIST_INIT(&fcgi_conf->param_rules);
+ LIST_INIT(&fcgi_conf->hdr_rules);
+ }
+
+ fconf->id = fcgi_flt_id;
+ fconf->conf = fcgi_conf;
+ fconf->ops = &fcgi_flt_ops;
+
+ *cur_arg = pos;
+ return 0;
+ err:
+ free(name);
+ return -1;
+}
+
+/* Parses the "use-fcgi-app" proxy keyword */
+static int proxy_parse_use_fcgi_app(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ struct flt_conf *fconf = NULL;
+ struct fcgi_flt_conf *fcgi_conf = NULL;
+ int retval = 0;
+
+ if ((curpx->cap & PR_CAP_DEF) || !(curpx->cap & PR_CAP_BE)) {
+ memprintf(err, "'%s' only available in backend or listen section", args[0]);
+ retval = -1;
+ goto end;
+ }
+
+ if (!*(args[1])) {
+ memprintf(err, "'%s' expects <name> as argument", args[0]);
+ retval = -1;
+ goto end;
+ }
+
+ /* check if a fcgi filter was already registered with this name,
+ * if that's the case, must use it. */
+ list_for_each_entry(fconf, &curpx->filter_configs, list) {
+ if (fconf->id == fcgi_flt_id) {
+ fcgi_conf = fconf->conf;
+ if (fcgi_conf && strcmp((char *)fcgi_conf->name, args[1]) == 0)
+ goto end;
+ memprintf(err, "'%s' : only one fcgi-app supported per backend", args[0]);
+ retval = -1;
+ goto end;
+ }
+ }
+
+ /* Create the FCGI filter config */
+ fcgi_conf = calloc(1, sizeof(*fcgi_conf));
+ if (!fcgi_conf)
+ goto err;
+ fcgi_conf->name = strdup(args[1]);
+ LIST_INIT(&fcgi_conf->param_rules);
+ LIST_INIT(&fcgi_conf->hdr_rules);
+
+ /* Register the filter */
+ fconf = calloc(1, sizeof(*fconf));
+ if (!fconf)
+ goto err;
+ fconf->id = fcgi_flt_id;
+ fconf->conf = fcgi_conf;
+ fconf->ops = &fcgi_flt_ops;
+ LIST_APPEND(&curpx->filter_configs, &fconf->list);
+
+ end:
+ return retval;
+ err:
+ if (fcgi_conf) {
+ free(fcgi_conf->name);
+ free(fcgi_conf);
+ }
+ memprintf(err, "out of memory");
+ retval = -1;
+ goto end;
+}
+
+/* Finishes the parsing of FCGI application of proxies and servers */
+static int cfg_fcgi_apps_postparser()
+{
+ struct fcgi_app *curapp;
+ struct proxy *px;
+ struct server *srv;
+ struct logsrv *logsrv;
+ int err_code = 0;
+
+ for (px = proxies_list; px; px = px->next) {
+ struct fcgi_flt_conf *fcgi_conf = find_px_fcgi_conf(px);
+ int nb_fcgi_srv = 0;
+
+ if (px->mode == PR_MODE_TCP && fcgi_conf) {
+ ha_alert("proxy '%s': FCGI application cannot be used in non-HTTP mode.\n",
+ px->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* By default, for FCGI-ready backend, HTTP request header names
+ * are restricted and the "delete" policy is set
+ */
+ if (fcgi_conf && !(px->options2 & PR_O2_RSTRICT_REQ_HDR_NAMES_MASK))
+ px->options2 |= PR_O2_RSTRICT_REQ_HDR_NAMES_DEL;
+
+ for (srv = px->srv; srv; srv = srv->next) {
+ if (srv->mux_proto && isteq(srv->mux_proto->token, ist("fcgi"))) {
+ nb_fcgi_srv++;
+ if (fcgi_conf)
+ continue;
+ ha_alert("proxy '%s': FCGI server '%s' has no FCGI app configured.\n",
+ px->id, srv->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ }
+ if (fcgi_conf && !nb_fcgi_srv) {
+ ha_alert("proxy '%s': FCGI app configured but no FCGI server found.\n",
+ px->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ }
+
+ for (curapp = fcgi_apps; curapp != NULL; curapp = curapp->next) {
+ if (!istlen(curapp->docroot)) {
+ ha_alert("fcgi-app '%s': no docroot configured.\n",
+ curapp->name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ if (!(curapp->flags & (FCGI_APP_FL_MPXS_CONNS|FCGI_APP_FL_GET_VALUES))) {
+ if (curapp->maxreqs > 1) {
+ ha_warning("fcgi-app '%s': multiplexing not supported, "
+ "ignore the option 'max-reqs'.\n",
+ curapp->name);
+ err_code |= ERR_WARN;
+ }
+ curapp->maxreqs = 1;
+ }
+
+ list_for_each_entry(logsrv, &curapp->logsrvs, list) {
+ if (logsrv->type == LOG_TARGET_BUFFER) {
+ struct sink *sink = sink_find(logsrv->ring_name);
+
+ if (!sink || sink->type != SINK_TYPE_BUFFER) {
+ ha_alert("fcgi-app '%s' : log server uses unknown ring named '%s'.\n",
+ curapp->name, logsrv->ring_name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ logsrv->sink = sink;
+ }
+ }
+ }
+
+ end:
+ return err_code;
+}
+
+static int fcgi_app_add_rule(struct fcgi_app *curapp, enum fcgi_rule_type type, char *name, char *value,
+ struct acl_cond *cond, char **err)
+{
+ struct fcgi_rule_conf *rule;
+
+ /* Param not found, add a new one */
+ rule = calloc(1, sizeof(*rule));
+ if (!rule)
+ goto err;
+ LIST_INIT(&rule->list);
+ rule->type = type;
+ if (type == FCGI_RULE_SET_PARAM || type == FCGI_RULE_UNSET_PARAM) {
+ struct ist fname = fcgi_param_name(trash.area, ist(name));
+ rule->name = my_strndup(fname.ptr, fname.len);
+ }
+ else { /* FCGI_RULE_PASS_HDR/FCGI_RULE_HIDE_HDR */
+ struct ist fname = ist2bin_lc(trash.area, ist(name));
+ rule->name = my_strndup(fname.ptr, fname.len);
+ }
+ if (!rule->name)
+ goto err;
+
+ if (value) {
+ rule->value = strdup(value);
+ if (!rule->value)
+ goto err;
+ }
+ rule->cond = cond;
+ LIST_APPEND(&curapp->conf.rules, &rule->list);
+ return 1;
+
+ err:
+ if (rule) {
+ free(rule->name);
+ free(rule->value);
+ free(rule);
+ }
+ if (cond) {
+ prune_acl_cond(cond);
+ free(cond);
+ }
+ memprintf(err, "out of memory");
+ return 0;
+}
+
+/* Parses "fcgi-app" section */
+static int cfg_parse_fcgi_app(const char *file, int linenum, char **args, int kwm)
+{
+ static struct fcgi_app *curapp = NULL;
+ struct acl_cond *cond = NULL;
+ char *name, *value = NULL;
+ enum fcgi_rule_type type;
+ int err_code = 0;
+ const char *err;
+ char *errmsg = NULL;
+
+ if (strcmp(args[0], "fcgi-app") == 0) { /* new fcgi-app */
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d]: '%s' expects <name> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d]: character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ for (curapp = fcgi_apps; curapp != NULL; curapp = curapp->next) {
+ if (strcmp(curapp->name, args[1]) == 0) {
+ ha_alert("Parsing [%s:%d]: fcgi-app section '%s' has the same name as another one declared at %s:%d.\n",
+ file, linenum, args[1], curapp->conf.file, curapp->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ curapp = calloc(1, sizeof(*curapp));
+ if (!curapp) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ curapp->next = fcgi_apps;
+ fcgi_apps = curapp;
+ curapp->flags = FCGI_APP_FL_KEEP_CONN;
+ curapp->docroot = ist(NULL);
+ curapp->index = ist(NULL);
+ curapp->pathinfo_re = NULL;
+ curapp->name = strdup(args[1]);
+ curapp->maxreqs = 1;
+ curapp->conf.file = strdup(file);
+ curapp->conf.line = linenum;
+ LIST_INIT(&curapp->acls);
+ LIST_INIT(&curapp->logsrvs);
+ LIST_INIT(&curapp->conf.args.list);
+ LIST_INIT(&curapp->conf.rules);
+
+ /* Set info about authentication */
+ if (!fcgi_app_add_rule(curapp, FCGI_RULE_SET_PARAM, "REMOTE_USER", "%[http_auth_user]", NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_SET_PARAM, "AUTH_TYPE", "%[http_auth_type]", NULL, &errmsg)) {
+ ha_alert("parsing [%s:%d] : '%s' : %s.\n", file, linenum,
+ args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ /* Hide hop-by-hop headers by default */
+ if (!fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "connection", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "keep-alive", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "authorization", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "proxy", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "proxy-authorization", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "proxy-authenticate", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "te", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "trailers", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "transfer-encoding", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "upgrade", NULL, NULL, &errmsg)) {
+ ha_alert("parsing [%s:%d] : '%s' : %s.\n", file, linenum,
+ args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else if (strcmp(args[0], "docroot") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects <path> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ istfree(&curapp->docroot);
+ curapp->docroot = ist(strdup(args[1]));
+ if (!isttest(curapp->docroot)) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ }
+ else if (strcmp(args[0], "path-info") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects <regex> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ regex_free(curapp->pathinfo_re);
+ curapp->pathinfo_re = regex_comp(args[1], 1, 1, &errmsg);
+ if (!curapp->pathinfo_re) {
+ ha_alert("parsing [%s:%d] : '%s' : %s.\n", file, linenum,
+ args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else if (strcmp(args[0], "index") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects <filename> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ istfree(&curapp->index);
+ curapp->index = ist(strdup(args[1]));
+ if (!isttest(curapp->index)) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ }
+ else if (strcmp(args[0], "acl") == 0) {
+ const char *err;
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in acl name '%s'.\n",
+ file, linenum, *err, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (strcasecmp(args[1], "or") == 0) {
+ ha_alert("parsing [%s:%d] : acl name '%s' will never match. 'or' is used to express a "
+ "logical disjunction within a condition.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (parse_acl((const char **)args+1, &curapp->acls, &errmsg, &curapp->conf.args, file, linenum) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing ACL '%s' : %s.\n",
+ file, linenum, args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "set-param") == 0) {
+ if (!*(args[1]) || !*(args[2])) {
+ ha_alert("parsing [%s:%d] : '%s' expects <name> and <value> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ type = FCGI_RULE_SET_PARAM;
+ name = args[1];
+ value = args[2];
+ cond = NULL;
+ args += 3;
+
+ parse_cond_rule:
+ if (!*(args[0])) /* No condition */
+ goto add_rule;
+
+ if (strcmp(args[0], "if") == 0)
+ cond = parse_acl_cond((const char **)args+1, &curapp->acls, ACL_COND_IF, &errmsg, &curapp->conf.args,
+ file, linenum);
+ else if (strcmp(args[0], "unless") == 0)
+ cond = parse_acl_cond((const char **)args+1, &curapp->acls, ACL_COND_UNLESS, &errmsg, &curapp->conf.args,
+ file, linenum);
+ if (!cond) {
+ ha_alert("parsing [%s:%d] : '%s' : %s.\n", file, linenum,
+ name, errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ add_rule:
+ if (!fcgi_app_add_rule(curapp, type, name, value, cond, &errmsg)) {
+ ha_alert("parsing [%s:%d] : '%s' : %s.\n", file, linenum,
+ name, errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+#if 0 /* Disabled for now */
+ else if (!strcmp(args[0], "unset-param")) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects <name> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ type = FCGI_RULE_UNSET_PARAM;
+ name = args[1];
+ value = NULL;
+ cond = NULL;
+ args += 2;
+ goto parse_cond_rule;
+ }
+#endif
+ else if (strcmp(args[0], "pass-header") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects <name> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ type = FCGI_RULE_PASS_HDR;
+ name = args[1];
+ value = NULL;
+ cond = NULL;
+ args += 2;
+ goto parse_cond_rule;
+ }
+#if 0 /* Disabled for now */
+ else if (!strcmp(args[0], "hide-header")) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects <name> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ type = FCGI_RULE_HIDE_HDR;
+ name = args[1];
+ value = NULL;
+ cond = NULL;
+ args += 2;
+ goto parse_cond_rule;
+ }
+#endif
+ else if (strcmp(args[0], "option") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d]: '%s' expects an option name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ else if (strcmp(args[1], "keep-conn") == 0) {
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_STD)
+ curapp->flags |= FCGI_APP_FL_KEEP_CONN;
+ else if (kwm == KWM_NO)
+ curapp->flags &= ~FCGI_APP_FL_KEEP_CONN;
+ }
+ else if (strcmp(args[1], "get-values") == 0) {
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_STD)
+ curapp->flags |= FCGI_APP_FL_GET_VALUES;
+ else if (kwm == KWM_NO)
+ curapp->flags &= ~FCGI_APP_FL_GET_VALUES;
+ }
+ else if (strcmp(args[1], "mpxs-conns") == 0) {
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_STD)
+ curapp->flags |= FCGI_APP_FL_MPXS_CONNS;
+ else if (kwm == KWM_NO)
+ curapp->flags &= ~FCGI_APP_FL_MPXS_CONNS;
+ }
+ else if (strcmp(args[1], "max-reqs") == 0) {
+ if (kwm != KWM_STD) {
+ ha_alert("parsing [%s:%d]: negation/default is not supported for option '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (!*(args[2])) {
+ ha_alert("parsing [%s:%d]: option '%s' expects an integer argument.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args_idx(1, 1, file, linenum, args, &err_code))
+ goto out;
+
+ curapp->maxreqs = atol(args[2]);
+ if (!curapp->maxreqs) {
+ ha_alert("parsing [%s:%d]: option '%s' expects a strictly positive integer argument.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else {
+ ha_alert("parsing [%s:%d] : unknown option '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else if (strcmp(args[0], "log-stderr") == 0) {
+ if (!parse_logsrv(args, &curapp->logsrvs, (kwm == KWM_NO), file, linenum, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else {
+ ha_alert("parsing [%s:%d]: unknown keyword '%s' in '%s' section\n", file, linenum, args[0], "fcgi-app");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+out:
+ free(errmsg);
+ return err_code;
+}
+
+
+/**************************************************************************/
+/*********************** FCGI Deinit functions ****************************/
+/**************************************************************************/
+void fcgi_apps_deinit()
+{
+ struct fcgi_app *curapp, *nextapp;
+ struct logsrv *log, *logb;
+
+ for (curapp = fcgi_apps; curapp != NULL; curapp = nextapp) {
+ struct fcgi_rule_conf *rule, *back;
+
+ free(curapp->name);
+ istfree(&curapp->docroot);
+ istfree(&curapp->index);
+ regex_free(curapp->pathinfo_re);
+ free(curapp->conf.file);
+
+ list_for_each_entry_safe(log, logb, &curapp->logsrvs, list) {
+ LIST_DELETE(&log->list);
+ free(log);
+ }
+
+ list_for_each_entry_safe(rule, back, &curapp->conf.rules, list) {
+ LIST_DELETE(&rule->list);
+ fcgi_release_rule_conf(rule);
+ }
+
+ nextapp = curapp->next;
+ free(curapp);
+ }
+}
+
+
+/**************************************************************************/
+/*************** Keywords definition and registration *********************/
+/**************************************************************************/
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_LISTEN, "use-fcgi-app", proxy_parse_use_fcgi_app },
+ { 0, NULL, NULL },
+}};
+
+// FIXME: Add rep.fcgi smp_fetch
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "fcgi.docroot", smp_fetch_fcgi_docroot, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "fcgi.index", smp_fetch_fcgi_index, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { /* END */ }
+}};
+
+/* Declare the filter parser for "fcgi-app" keyword */
+static struct flt_kw_list filter_kws = { "FCGI", { }, {
+ { "fcgi-app", parse_fcgi_flt, NULL },
+ { NULL, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+INITCALL1(STG_REGISTER, flt_register_keywords, &filter_kws);
+
+INITCALL1(STG_REGISTER, hap_register_post_deinit, fcgi_apps_deinit);
+
+REGISTER_CONFIG_SECTION("fcgi-app", cfg_parse_fcgi_app, NULL);
+REGISTER_CONFIG_POSTPARSER("fcgi-apps", cfg_fcgi_apps_postparser);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/fcgi.c b/src/fcgi.c
new file mode 100644
index 0000000..1d1a82b
--- /dev/null
+++ b/src/fcgi.c
@@ -0,0 +1,294 @@
+/*
+ * FastCGI protocol processing
+ *
+ * Copyright (C) 2019 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <haproxy/buf.h>
+#include <haproxy/fcgi.h>
+#include <haproxy/istbuf.h>
+
+/* Encodes header of a FCGI record into the chunk <out>. It returns non-zero on
+ * success and 0 on failure (buffer full). <out> is a chunk, so the wrapping is
+ * not handled by this function. It is the caller responsibility to ensure
+ * enough contiguous space is available
+ */
+int fcgi_encode_record_hdr(struct buffer *out, const struct fcgi_header *h)
+{
+ size_t len = out->data;
+
+ if (len + 8 >= b_size(out))
+ return 0;
+
+ out->area[len++] = h->vsn;
+ out->area[len++] = h->type;
+ out->area[len++] = ((h->id >> 8) & 0xff);
+ out->area[len++] = (h->id & 0xff);
+ out->area[len++] = ((h->len >> 8) & 0xff);
+ out->area[len++] = (h->len & 0xff);
+ out->area[len++] = h->padding;
+ out->area[len++] = 0; /* rsv */
+
+ out->data = len;
+ return 1;
+}
+
+/* Decodes a FCGI record header from offset <o> of buffer <in> into descriptor
+ * <h>. The buffer may wrap so each byte read must be checked. The header is
+ * formed like this :
+ *
+ * b0 b1 b2 b3 b4 b5 b6 b7
+ * +-----+------+-----+-----+------+------+--------+-----+
+ * | vsn | type | id1 | id0 | len1 | len0 | padlen | rsv |
+ * +-----+------+-----+-----+------+------+--------+-----+
+ *
+ * Returns zero if some bytes are missing, otherwise the number of read bytes.
+ */
+size_t fcgi_decode_record_hdr(const struct buffer *in, size_t o, struct fcgi_header *h)
+{
+ if (b_data(in) < o + 8)
+ return 0;
+
+ h->vsn = (uint8_t)(*b_peek(in, o));
+ h->type = (uint8_t)(*b_peek(in, o+1));
+ h->id = ((uint8_t)(*b_peek(in, o+2)) << 8) + (uint8_t)(*b_peek(in, o+3));
+ h->len = ((uint8_t)(*b_peek(in, o+4)) << 8) + (uint8_t)(*b_peek(in, o+5));
+ h->padding = (uint8_t)(*b_peek(in, o+6));
+ /* ignore rsv */
+
+ return 8;
+}
+
+/* Encodes the payload part of a BEGIN_REQUEST record into the chunk <out>. It
+ * returns non-zero on success and 0 on failure (buffer full). <out> is a chunk,
+ * so the wrapping is not handled by this function. It is the caller
+ * responsibility to ensure enough contiguous space is available
+ */
+int fcgi_encode_begin_request(struct buffer *out, const struct fcgi_begin_request *r)
+{
+ size_t len = out->data;
+
+ if (len + 8 >= b_size(out))
+ return 0;
+
+ out->area[len++] = ((r->role >> 8) & 0xff);
+ out->area[len++] = (r->role & 0xff);
+ out->area[len++] = r->flags;
+ out->area[len++] = 0; /* rsv */
+ out->area[len++] = 0;
+ out->area[len++] = 0;
+ out->area[len++] = 0;
+ out->area[len++] = 0;
+
+ out->data = len;
+ return 1;
+}
+
+/* Encodes a parameter, part of the payload of a PARAM record, into the chunk
+ * <out>. It returns non-zero on success and 0 on failure (buffer full). <out>
+ * is a chunk, so the wrapping is not handled by this function. It is the caller
+ * responsibility to ensure enough contiguous space is available. The
+ * parameter's name is converted to upper case and non-alphanumeric character
+ * are replaced by an underscore.
+ */
+int fcgi_encode_param(struct buffer *out, const struct fcgi_param *p)
+{
+ size_t off, len = out->data;
+ int nbytes, vbytes;
+
+ nbytes = (!(p->n.len >> 7) ? 1 : 4);
+ vbytes = (!(p->v.len >> 7) ? 1 : 4);
+ if ((len + nbytes + p->n.len + vbytes + p->v.len) >= b_size(out))
+ return 0;
+
+ if (nbytes == 1)
+ out->area[len++] = (p->n.len & 0xff);
+ else {
+ out->area[len++] = (((p->n.len >> 24) & 0xff) | 0x80);
+ out->area[len++] = ((p->n.len >> 16) & 0xff);
+ out->area[len++] = ((p->n.len >> 8) & 0xff);
+ out->area[len++] = (p->n.len & 0xff);
+ }
+
+ if (vbytes == 1)
+ out->area[len++] = (p->v.len & 0xff);
+ else {
+ out->area[len++] = (((p->v.len >> 24) & 0xff) | 0x80);
+ out->area[len++] = ((p->v.len >> 16) & 0xff);
+ out->area[len++] = ((p->v.len >> 8) & 0xff);
+ out->area[len++] = (p->v.len & 0xff);
+ }
+
+ for (off = 0; off < p->n.len; off++) {
+ if (isalnum((unsigned char)p->n.ptr[off]))
+ out->area[len++] = ist_uc[(unsigned char)p->n.ptr[off]];
+ else
+ out->area[len++] = '_';
+ }
+ if (p->v.len) {
+ ist2bin(out->area + len, p->v);
+ len += p->v.len;
+ }
+
+ out->data = len;
+ return 1;
+}
+
+/* Decodes a parameter of a PARAM record from offset <o> of buffer <in> into the
+ * FCGI param <p>. The buffer may wrap so each byte read must be checked.
+ * Returns zero if some bytes are missing, otherwise the number of read bytes.
+ */
+size_t fcgi_decode_param(const struct buffer *in, size_t o, struct fcgi_param *p)
+{
+ size_t data = b_data(in);
+ size_t nlen, vlen, len = 0;
+ uint8_t b0, b1, b2, b3;
+
+ if (data < o + 1)
+ return 0;
+ b0 = *b_peek(in, o++);
+ if (!(b0 >> 7)) {
+ nlen = b0;
+ len++;
+ }
+ else {
+ if (data < o + 3)
+ return 0;
+ b1 = *b_peek(in, o++);
+ b2 = *b_peek(in, o++);
+ b3 = *b_peek(in, o++);
+ nlen = ((b0 & 0x7f) << 24) + (b1 << 16) + (b2 << 8) + b3;
+ len += 4;
+ }
+
+ if (data < o + 1)
+ return 0;
+ b0 = *b_peek(in, o++);
+ if (!(b0 >> 7)) {
+ vlen = b0;
+ len++;
+ }
+ else {
+ if (data < o + 3)
+ return 0;
+ b1 = *b_peek(in, o++);
+ b2 = *b_peek(in, o++);
+ b3 = *b_peek(in, o++);
+ vlen = ((b0 & 0x7f) << 24) + (b1 << 16) + (b2 << 8) + b3;
+ len += 4;
+ }
+
+ if (data < nlen + vlen)
+ return 0;
+
+ p->n = ist2(b_peek(in, o), nlen);
+ p->v = ist2(b_peek(in, o + nlen), vlen);
+ len += nlen + vlen;
+
+ return len;
+}
+
+
+/* Decodes a parameter of a PARAM record from offset <o> of buffer <in> into the
+ * FCGI param <p>. To call this function, the buffer must not wrap. Returns zero
+ * if some bytes are missing, otherwise the number of read bytes.
+ */
+size_t fcgi_aligned_decode_param(const struct buffer *in, size_t o, struct fcgi_param *p)
+{
+ size_t data = b_data(in);
+ size_t nlen, vlen, len = 0;
+ uint8_t b0, b1, b2, b3;
+
+ if (data < o + 1)
+ return 0;
+ b0 = in->area[o++];
+ if (!(b0 >> 7)) {
+ nlen = b0;
+ len++;
+ }
+ else {
+ if (data < o + 3)
+ return 0;
+ b1 = in->area[o++];
+ b2 = in->area[o++];
+ b3 = in->area[o++];
+ nlen = ((b0 & 0x7f) << 24) + (b1 << 16) + (b2 << 8) + b3;
+ len += 4;
+ }
+
+ if (data < o + 1)
+ return 0;
+ b0 = in->area[o++];
+ if (!(b0 >> 7)) {
+ vlen = b0;
+ len++;
+ }
+ else {
+ if (data < o + 3)
+ return 0;
+ b1 = in->area[o++];
+ b2 = in->area[o++];
+ b3 = in->area[o++];
+ vlen = ((b0 & 0x7f) << 24) + (b1 << 16) + (b2 << 8) + b3;
+ len += 4;
+ }
+
+ if (data < nlen + vlen)
+ return 0;
+
+ p->n = ist2(in->area + o, nlen);
+ p->v = ist2(in->area + o + nlen, vlen);
+ len += nlen + vlen;
+
+ return len;
+}
+
+/* Decodes payload of a END_REQUEST record from offset <o> of buffer <in> into
+ * the FCGI param <p>. The buffer may wrap so each byte read must be
+ * checked. Returns zero if some bytes are missing, otherwise the number of read
+ * bytes.
+ */
+size_t fcgi_decode_end_request(const struct buffer *in, size_t o, struct fcgi_end_request *rec)
+{
+ uint8_t b0, b1, b2, b3;
+
+ if (b_data(in) < o + 8)
+ return 0;
+
+ b0 = *b_peek(in, o++);
+ b1 = *b_peek(in, o++);
+ b2 = *b_peek(in, o++);
+ b3 = *b_peek(in, o++);
+ rec->status = ((b0 & 0x7f) << 24) + (b1 << 16) + (b2 << 8) + b3;
+ rec->errcode = *b_peek(in, o++);
+ o += 3; /* ignore rsv */
+
+ return 8;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/fd.c b/src/fd.c
new file mode 100644
index 0000000..0dbaef8
--- /dev/null
+++ b/src/fd.c
@@ -0,0 +1,1214 @@
+/*
+ * File descriptors management functions.
+ *
+ * Copyright 2000-2014 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * There is no direct link between the FD and the updates list. There is only a
+ * bit in the fdtab[] to indicate than a file descriptor is already present in
+ * the updates list. Once an fd is present in the updates list, it will have to
+ * be considered even if its changes are reverted in the middle or if the fd is
+ * replaced.
+ *
+ * The event state for an FD, as found in fdtab[].state, is maintained for each
+ * direction. The state field is built this way, with R bits in the low nibble
+ * and W bits in the high nibble for ease of access and debugging :
+ *
+ * 7 6 5 4 3 2 1 0
+ * [ 0 | 0 | RW | AW | 0 | 0 | RR | AR ]
+ *
+ * A* = active *R = read
+ * R* = ready *W = write
+ *
+ * An FD is marked "active" when there is a desire to use it.
+ * An FD is marked "ready" when it has not faced a new EAGAIN since last wake-up
+ * (it is a cache of the last EAGAIN regardless of polling changes). Each poller
+ * has its own "polled" state for the same fd, as stored in the polled_mask.
+ *
+ * We have 4 possible states for each direction based on these 2 flags :
+ *
+ * +---+---+----------+---------------------------------------------+
+ * | R | A | State | Description |
+ * +---+---+----------+---------------------------------------------+
+ * | 0 | 0 | DISABLED | No activity desired, not ready. |
+ * | 0 | 1 | ACTIVE | Activity desired. |
+ * | 1 | 0 | STOPPED | End of activity. |
+ * | 1 | 1 | READY | Activity desired and reported. |
+ * +---+---+----------+---------------------------------------------+
+ *
+ * The transitions are pretty simple :
+ * - fd_want_*() : set flag A
+ * - fd_stop_*() : clear flag A
+ * - fd_cant_*() : clear flag R (when facing EAGAIN)
+ * - fd_may_*() : set flag R (upon return from poll())
+ *
+ * Each poller then computes its own polled state :
+ * if (A) { if (!R) P := 1 } else { P := 0 }
+ *
+ * The state transitions look like the diagram below.
+ *
+ * may +----------+
+ * ,----| DISABLED | (READY=0, ACTIVE=0)
+ * | +----------+
+ * | want | ^
+ * | | |
+ * | v | stop
+ * | +----------+
+ * | | ACTIVE | (READY=0, ACTIVE=1)
+ * | +----------+
+ * | | ^
+ * | may | |
+ * | v | EAGAIN (can't)
+ * | +--------+
+ * | | READY | (READY=1, ACTIVE=1)
+ * | +--------+
+ * | stop | ^
+ * | | |
+ * | v | want
+ * | +---------+
+ * `--->| STOPPED | (READY=1, ACTIVE=0)
+ * +---------+
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/uio.h>
+
+#if defined(USE_POLL)
+#include <poll.h>
+#include <errno.h>
+#endif
+
+#include <haproxy/api.h>
+#include <haproxy/activity.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/log.h>
+#include <haproxy/port_range.h>
+#include <haproxy/ticks.h>
+#include <haproxy/tools.h>
+
+
+struct fdtab *fdtab __read_mostly = NULL; /* array of all the file descriptors */
+struct polled_mask *polled_mask __read_mostly = NULL; /* Array for the polled_mask of each fd */
+struct fdinfo *fdinfo __read_mostly = NULL; /* less-often used infos for file descriptors */
+int totalconn; /* total # of terminated sessions */
+int actconn; /* # of active sessions */
+
+struct poller pollers[MAX_POLLERS] __read_mostly;
+struct poller cur_poller __read_mostly;
+int nbpollers = 0;
+
+volatile struct fdlist update_list; // Global update list
+
+THREAD_LOCAL int *fd_updt = NULL; // FD updates list
+THREAD_LOCAL int fd_nbupdt = 0; // number of updates in the list
+THREAD_LOCAL int poller_rd_pipe = -1; // Pipe to wake the thread
+int poller_wr_pipe[MAX_THREADS] __read_mostly; // Pipe to wake the threads
+
+volatile int ha_used_fds = 0; // Number of FD we're currently using
+static struct fdtab *fdtab_addr; /* address of the allocated area containing fdtab */
+
+#define _GET_NEXT(fd, off) ((volatile struct fdlist_entry *)(void *)((char *)(&fdtab[fd]) + off))->next
+#define _GET_PREV(fd, off) ((volatile struct fdlist_entry *)(void *)((char *)(&fdtab[fd]) + off))->prev
+/* adds fd <fd> to fd list <list> if it was not yet in it */
+void fd_add_to_fd_list(volatile struct fdlist *list, int fd, int off)
+{
+ int next;
+ int new;
+ int old;
+ int last;
+
+redo_next:
+ next = _GET_NEXT(fd, off);
+ /* Check that we're not already in the cache, and if not, lock us. */
+ if (next > -2)
+ goto done;
+ if (next == -2)
+ goto redo_next;
+ if (!_HA_ATOMIC_CAS(&_GET_NEXT(fd, off), &next, -2))
+ goto redo_next;
+ __ha_barrier_atomic_store();
+
+ new = fd;
+redo_last:
+ /* First, insert in the linked list */
+ last = list->last;
+ old = -1;
+
+ _GET_PREV(fd, off) = -2;
+ /* Make sure the "prev" store is visible before we update the last entry */
+ __ha_barrier_store();
+
+ if (unlikely(last == -1)) {
+ /* list is empty, try to add ourselves alone so that list->last=fd */
+ if (unlikely(!_HA_ATOMIC_CAS(&list->last, &old, new)))
+ goto redo_last;
+
+ /* list->first was necessary -1, we're guaranteed to be alone here */
+ list->first = fd;
+ } else {
+ /* adding ourselves past the last element
+ * The CAS will only succeed if its next is -1,
+ * which means it's in the cache, and the last element.
+ */
+ if (unlikely(!_HA_ATOMIC_CAS(&_GET_NEXT(last, off), &old, new)))
+ goto redo_last;
+
+ /* Then, update the last entry */
+ list->last = fd;
+ }
+ __ha_barrier_store();
+ /* since we're alone at the end of the list and still locked(-2),
+ * we know no one tried to add past us. Mark the end of list.
+ */
+ _GET_PREV(fd, off) = last;
+ _GET_NEXT(fd, off) = -1;
+ __ha_barrier_store();
+done:
+ return;
+}
+
+/* removes fd <fd> from fd list <list> */
+void fd_rm_from_fd_list(volatile struct fdlist *list, int fd, int off)
+{
+#if defined(HA_HAVE_CAS_DW) || defined(HA_CAS_IS_8B)
+ volatile union {
+ struct fdlist_entry ent;
+ uint64_t u64;
+ uint32_t u32[2];
+ } cur_list, next_list;
+#endif
+ int old;
+ int new = -2;
+ int prev;
+ int next;
+ int last;
+lock_self:
+#if (defined(HA_CAS_IS_8B) || defined(HA_HAVE_CAS_DW))
+ next_list.ent.next = next_list.ent.prev = -2;
+ cur_list.ent = *(volatile struct fdlist_entry *)(((char *)&fdtab[fd]) + off);
+ /* First, attempt to lock our own entries */
+ do {
+ /* The FD is not in the FD cache, give up */
+ if (unlikely(cur_list.ent.next <= -3))
+ return;
+ if (unlikely(cur_list.ent.prev == -2 || cur_list.ent.next == -2))
+ goto lock_self;
+ } while (
+#ifdef HA_CAS_IS_8B
+ unlikely(!_HA_ATOMIC_CAS(((uint64_t *)&_GET_NEXT(fd, off)), (uint64_t *)&cur_list.u64, next_list.u64))
+#else
+ unlikely(!_HA_ATOMIC_DWCAS(((long *)&_GET_NEXT(fd, off)), (uint32_t *)&cur_list.u32, (const uint32_t *)&next_list.u32))
+#endif
+ );
+ next = cur_list.ent.next;
+ prev = cur_list.ent.prev;
+
+#else
+lock_self_next:
+ next = _GET_NEXT(fd, off);
+ if (next == -2)
+ goto lock_self_next;
+ if (next <= -3)
+ goto done;
+ if (unlikely(!_HA_ATOMIC_CAS(&_GET_NEXT(fd, off), &next, -2)))
+ goto lock_self_next;
+lock_self_prev:
+ prev = _GET_PREV(fd, off);
+ if (prev == -2)
+ goto lock_self_prev;
+ if (unlikely(!_HA_ATOMIC_CAS(&_GET_PREV(fd, off), &prev, -2)))
+ goto lock_self_prev;
+#endif
+ __ha_barrier_atomic_store();
+
+ /* Now, lock the entries of our neighbours */
+ if (likely(prev != -1)) {
+redo_prev:
+ old = fd;
+
+ if (unlikely(!_HA_ATOMIC_CAS(&_GET_NEXT(prev, off), &old, new))) {
+ if (unlikely(old == -2)) {
+ /* Neighbour already locked, give up and
+ * retry again once he's done
+ */
+ _GET_PREV(fd, off) = prev;
+ __ha_barrier_store();
+ _GET_NEXT(fd, off) = next;
+ __ha_barrier_store();
+ goto lock_self;
+ }
+ goto redo_prev;
+ }
+ }
+ if (likely(next != -1)) {
+redo_next:
+ old = fd;
+ if (unlikely(!_HA_ATOMIC_CAS(&_GET_PREV(next, off), &old, new))) {
+ if (unlikely(old == -2)) {
+ /* Neighbour already locked, give up and
+ * retry again once he's done
+ */
+ if (prev != -1) {
+ _GET_NEXT(prev, off) = fd;
+ __ha_barrier_store();
+ }
+ _GET_PREV(fd, off) = prev;
+ __ha_barrier_store();
+ _GET_NEXT(fd, off) = next;
+ __ha_barrier_store();
+ goto lock_self;
+ }
+ goto redo_next;
+ }
+ }
+ if (list->first == fd)
+ list->first = next;
+ __ha_barrier_store();
+ last = list->last;
+ while (unlikely(last == fd && (!_HA_ATOMIC_CAS(&list->last, &last, prev))))
+ __ha_compiler_barrier();
+ /* Make sure we let other threads know we're no longer in cache,
+ * before releasing our neighbours.
+ */
+ __ha_barrier_store();
+ if (likely(prev != -1))
+ _GET_NEXT(prev, off) = next;
+ __ha_barrier_store();
+ if (likely(next != -1))
+ _GET_PREV(next, off) = prev;
+ __ha_barrier_store();
+ /* Ok, now we're out of the fd cache */
+ _GET_NEXT(fd, off) = -(next + 4);
+ __ha_barrier_store();
+done:
+ return;
+}
+
+#undef _GET_NEXT
+#undef _GET_PREV
+
+/* deletes the FD once nobody uses it anymore, as detected by the caller by its
+ * thread_mask being zero and its running mask turning to zero. There is no
+ * protection against concurrent accesses, it's up to the caller to make sure
+ * only the last thread will call it. This is only for internal use, please use
+ * fd_delete() instead.
+ */
+void _fd_delete_orphan(int fd)
+{
+ uint fd_disown;
+
+ fd_disown = fdtab[fd].state & FD_DISOWN;
+ if (fdtab[fd].state & FD_LINGER_RISK) {
+ /* this is generally set when connecting to servers */
+ DISGUISE(setsockopt(fd, SOL_SOCKET, SO_LINGER,
+ (struct linger *) &nolinger, sizeof(struct linger)));
+ }
+
+ /* It's expected that a close() will result in the FD disappearing from
+ * pollers, but some pollers may have some internal bookkeeping to be
+ * done prior to the call (e.g. remove references from internal tables).
+ */
+ if (cur_poller.clo)
+ cur_poller.clo(fd);
+
+ /* now we're about to reset some of this FD's fields. We don't want
+ * anyone to grab it anymore and we need to make sure those which could
+ * possibly have stumbled upon it right now are leaving before we
+ * proceed. This is done in two steps. First we reset the tgid so that
+ * fd_take_tgid() and fd_grab_tgid() fail, then we wait for existing
+ * ref counts to drop. Past this point we're alone dealing with the
+ * FD's thead/running/update/polled masks.
+ */
+ fd_reset_tgid(fd);
+
+ while (_HA_ATOMIC_LOAD(&fdtab[fd].refc_tgid) != 0) // refc==0 ?
+ __ha_cpu_relax();
+
+ /* we don't want this FD anymore in the global list */
+ fd_rm_from_fd_list(&update_list, fd, offsetof(struct fdtab, update));
+
+ /* no more updates on this FD are relevant anymore */
+ HA_ATOMIC_STORE(&fdtab[fd].update_mask, 0);
+ if (fd_nbupdt > 0 && fd_updt[fd_nbupdt - 1] == fd)
+ fd_nbupdt--;
+
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ polled_mask[fd].poll_recv = polled_mask[fd].poll_send = 0;
+
+ fdtab[fd].state = 0;
+
+#ifdef DEBUG_FD
+ fdtab[fd].event_count = 0;
+#endif
+ fdinfo[fd].port_range = NULL;
+ fdtab[fd].owner = NULL;
+
+ /* perform the close() call last as it's what unlocks the instant reuse
+ * of this FD by any other thread.
+ */
+ if (!fd_disown)
+ close(fd);
+ _HA_ATOMIC_DEC(&ha_used_fds);
+}
+
+/* Deletes an FD from the fdsets. The file descriptor is also closed, possibly
+ * asynchronously. Only the owning thread may do this.
+ */
+void fd_delete(int fd)
+{
+ /* This must never happen and would definitely indicate a bug, in
+ * addition to overwriting some unexpected memory areas.
+ */
+ BUG_ON(fd < 0 || fd >= global.maxsock);
+
+ /* the tgid cannot change before a complete close so we should never
+ * face the situation where we try to close an fd that was reassigned.
+ */
+ BUG_ON(fd_tgid(fd) != 1 && !thread_isolated());
+
+ /* we must postpone removal of an FD that may currently be in use
+ * by another thread. This can happen in the following two situations:
+ * - after a takeover, the owning thread closes the connection but
+ * the previous one just woke up from the poller and entered
+ * the FD handler iocb. That thread holds an entry in running_mask
+ * and requires removal protection.
+ * - multiple threads are accepting connections on a listener, and
+ * one of them (or even an separate one) decides to unbind the
+ * listener under the listener's lock while other ones still hold
+ * the running bit.
+ * In both situations the FD is marked as unused (thread_mask = 0) and
+ * will not take new bits in its running_mask so we have the guarantee
+ * that the last thread eliminating running_mask is the one allowed to
+ * safely delete the FD. Most of the time it will be the current thread.
+ * We still need to set and check the one-shot flag FD_MUST_CLOSE
+ * to take care of the rare cases where a thread wakes up on late I/O
+ * before the thread_mask is zero, and sets its bit in the running_mask
+ * just after the current thread finishes clearing its own bit, hence
+ * the two threads see themselves as last ones (which they really are).
+ */
+
+ HA_ATOMIC_OR(&fdtab[fd].running_mask, tid_bit);
+ HA_ATOMIC_OR(&fdtab[fd].state, FD_MUST_CLOSE);
+ HA_ATOMIC_STORE(&fdtab[fd].thread_mask, 0);
+ if (fd_clr_running(fd) == tid_bit) {
+ if (HA_ATOMIC_BTR(&fdtab[fd].state, FD_MUST_CLOSE_BIT)) {
+ _fd_delete_orphan(fd);
+ }
+ }
+}
+
+/* makes the new fd non-blocking and clears all other O_* flags; this is meant
+ * to be used on new FDs. Returns -1 on failure. The result is disguised at the
+ * end because some callers need to be able to ignore it regardless of the libc
+ * attributes.
+ */
+int fd_set_nonblock(int fd)
+{
+ int ret = fcntl(fd, F_SETFL, O_NONBLOCK);
+
+ return DISGUISE(ret);
+}
+
+/* sets the close-on-exec flag on fd; returns -1 on failure. The result is
+ * disguised at the end because some callers need to be able to ignore it
+ * regardless of the libc attributes.
+ */
+int fd_set_cloexec(int fd)
+{
+ int flags, ret;
+
+ flags = fcntl(fd, F_GETFD);
+ flags |= FD_CLOEXEC;
+ ret = fcntl(fd, F_SETFD, flags);
+ return DISGUISE(ret);
+}
+
+/*
+ * Take over a FD belonging to another thread.
+ * unexpected_conn is the expected owner of the fd.
+ * Returns 0 on success, and -1 on failure.
+ */
+int fd_takeover(int fd, void *expected_owner)
+{
+ unsigned long old;
+
+ /* protect ourself against a delete then an insert for the same fd,
+ * if it happens, then the owner will no longer be the expected
+ * connection.
+ */
+ if (fdtab[fd].owner != expected_owner)
+ return -1;
+
+ /* we must be alone to work on this idle FD. If not, it means that its
+ * poller is currently waking up and is about to use it, likely to
+ * close it on shut/error, but maybe also to process any unexpectedly
+ * pending data. It's also possible that the FD was closed and
+ * reassigned to another thread group, so let's be careful.
+ */
+ if (unlikely(!fd_grab_tgid(fd, 1)))
+ return -1;
+
+ old = 0;
+ if (!HA_ATOMIC_CAS(&fdtab[fd].running_mask, &old, tid_bit)) {
+ fd_drop_tgid(fd);
+ return -1;
+ }
+
+ /* success, from now on it's ours */
+ HA_ATOMIC_STORE(&fdtab[fd].thread_mask, tid_bit);
+
+ /* Make sure the FD doesn't have the active bit. It is possible that
+ * the fd is polled by the thread that used to own it, the new thread
+ * is supposed to call subscribe() later, to activate polling.
+ */
+ fd_stop_recv(fd);
+
+ /* we're done with it */
+ HA_ATOMIC_AND(&fdtab[fd].running_mask, ~tid_bit);
+
+ /* no more changes planned */
+ fd_drop_tgid(fd);
+ return 0;
+}
+
+void updt_fd_polling(const int fd)
+{
+ uint tgrp = fd_take_tgid(fd);
+
+ /* closed ? may happen */
+ if (!tgrp)
+ return;
+
+ fd_drop_tgid(fd);
+
+ if (all_threads_mask == 1UL || (fdtab[fd].thread_mask & all_threads_mask) == tid_bit) {
+ if (HA_ATOMIC_BTS(&fdtab[fd].update_mask, tid))
+ return;
+
+ fd_updt[fd_nbupdt++] = fd;
+ } else {
+ unsigned long update_mask = fdtab[fd].update_mask;
+ do {
+ if (update_mask == fdtab[fd].thread_mask)
+ return;
+ } while (!_HA_ATOMIC_CAS(&fdtab[fd].update_mask, &update_mask, fdtab[fd].thread_mask));
+
+ fd_add_to_fd_list(&update_list, fd, offsetof(struct fdtab, update));
+
+ if (fd_active(fd) &&
+ !(fdtab[fd].thread_mask & tid_bit) &&
+ (fdtab[fd].thread_mask & ~tid_bit & all_threads_mask & ~sleeping_thread_mask) == 0) {
+ /* we need to wake up one thread to handle it immediately */
+ int thr = my_ffsl(fdtab[fd].thread_mask & ~tid_bit & all_threads_mask) - 1;
+
+ _HA_ATOMIC_AND(&sleeping_thread_mask, ~(1UL << thr));
+ wake_thread(thr);
+ }
+ }
+}
+
+/* Update events seen for FD <fd> and its state if needed. This should be
+ * called by the poller, passing FD_EV_*_{R,W,RW} in <evts>. FD_EV_ERR_*
+ * doesn't need to also pass FD_EV_SHUT_*, it's implied. ERR and SHUT are
+ * allowed to be reported regardless of R/W readiness. Returns one of
+ * FD_UPDT_*.
+ */
+int fd_update_events(int fd, uint evts)
+{
+ unsigned long locked;
+ uint old, new;
+ uint new_flags, must_stop;
+ ulong rmask, tmask;
+
+ th_ctx->flags &= ~TH_FL_STUCK; // this thread is still running
+
+ if (unlikely(!fd_grab_tgid(fd, 1))) {
+ /* the FD changed to another tgid, we can't safely
+ * check it anymore. The bits in the masks are not
+ * ours anymore and we're not allowed to touch them.
+ * Ours have already been cleared and the FD was
+ * closed in between so we can safely leave now.
+ */
+ activity[tid].poll_drop_fd++;
+ return FD_UPDT_CLOSED;
+ }
+
+ /* Do not take running_mask if not strictly needed (will trigger a
+ * cosmetic BUG_ON() in fd_insert() anyway if done).
+ */
+ tmask = _HA_ATOMIC_LOAD(&fdtab[fd].thread_mask);
+ if (!(tmask & tid_bit))
+ goto do_update;
+
+ HA_ATOMIC_OR(&fdtab[fd].running_mask, tid_bit);
+
+ /* From this point, our bit may possibly be in thread_mask, but it may
+ * still vanish, either because a takeover completed just before taking
+ * the bit above with the new owner deleting the FD, or because a
+ * takeover started just before taking the bit. In order to make sure a
+ * started takeover is complete, we need to verify that all bits of
+ * running_mask are present in thread_mask, since takeover first takes
+ * running then atomically replaces thread_mask. Once it's stable, if
+ * our bit remains there, no further takeover may happen because we
+ * hold running, but if our bit is not there it means we've lost the
+ * takeover race and have to decline touching the FD. Regarding the
+ * risk of deletion, our bit in running_mask prevents fd_delete() from
+ * finalizing the close, and the caller will leave the FD with a zero
+ * thread_mask and the FD_MUST_CLOSE flag set. It will then be our
+ * responsibility to close it.
+ */
+ do {
+ rmask = _HA_ATOMIC_LOAD(&fdtab[fd].running_mask);
+ tmask = _HA_ATOMIC_LOAD(&fdtab[fd].thread_mask);
+ rmask &= ~tid_bit;
+ } while (rmask & ~tmask);
+
+ /* Now tmask is stable. Do nothing if the FD was taken over under us */
+
+ if (!(tmask & tid_bit)) {
+ /* a takeover has started */
+ activity[tid].poll_skip_fd++;
+
+ if (fd_clr_running(fd) == tid_bit)
+ goto closed_or_migrated;
+
+ goto do_update;
+ }
+
+ /* with running we're safe now, we can drop the reference */
+ fd_drop_tgid(fd);
+
+ locked = (tmask != tid_bit);
+
+ /* OK now we are guaranteed that our thread_mask was present and
+ * that we're allowed to update the FD.
+ */
+
+ new_flags =
+ ((evts & FD_EV_READY_R) ? FD_POLL_IN : 0) |
+ ((evts & FD_EV_READY_W) ? FD_POLL_OUT : 0) |
+ ((evts & FD_EV_SHUT_R) ? FD_POLL_HUP : 0) |
+ ((evts & FD_EV_ERR_RW) ? FD_POLL_ERR : 0);
+
+ /* SHUTW reported while FD was active for writes is an error */
+ if ((fdtab[fd].state & FD_EV_ACTIVE_W) && (evts & FD_EV_SHUT_W))
+ new_flags |= FD_POLL_ERR;
+
+ /* compute the inactive events reported late that must be stopped */
+ must_stop = 0;
+ if (unlikely(!fd_active(fd))) {
+ /* both sides stopped */
+ must_stop = FD_POLL_IN | FD_POLL_OUT;
+ }
+ else if (unlikely(!fd_recv_active(fd) && (evts & (FD_EV_READY_R | FD_EV_SHUT_R | FD_EV_ERR_RW)))) {
+ /* only send remains */
+ must_stop = FD_POLL_IN;
+ }
+ else if (unlikely(!fd_send_active(fd) && (evts & (FD_EV_READY_W | FD_EV_SHUT_W | FD_EV_ERR_RW)))) {
+ /* only recv remains */
+ must_stop = FD_POLL_OUT;
+ }
+
+ if (new_flags & (FD_POLL_IN | FD_POLL_HUP | FD_POLL_ERR))
+ new_flags |= FD_EV_READY_R;
+
+ if (new_flags & (FD_POLL_OUT | FD_POLL_ERR))
+ new_flags |= FD_EV_READY_W;
+
+ old = fdtab[fd].state;
+ new = (old & ~FD_POLL_UPDT_MASK) | new_flags;
+
+ if (unlikely(locked)) {
+ /* Locked FDs (those with more than 2 threads) are atomically updated */
+ while (unlikely(new != old && !_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)))
+ new = (old & ~FD_POLL_UPDT_MASK) | new_flags;
+ } else {
+ if (new != old)
+ fdtab[fd].state = new;
+ }
+
+ if (fdtab[fd].iocb && fd_active(fd)) {
+ fdtab[fd].iocb(fd);
+ }
+
+ /*
+ * We entered iocb with running set and with the valid tgid.
+ * Since then, this is what could have happened:
+ * - another thread tried to close the FD (e.g. timeout task from
+ * another one that owns it). We still have running set, but not
+ * tmask. We must call fd_clr_running() then _fd_delete_orphan()
+ * if we were the last one.
+ *
+ * - the iocb tried to close the FD => bit no more present in running,
+ * nothing to do. If it managed to close it, the poller's ->clo()
+ * has already been called.
+ *
+ * - after we closed, the FD was reassigned to another thread in
+ * another group => running not present, tgid differs, nothing to
+ * do because if it got reassigned it indicates it was already
+ * closed.
+ *
+ * There's no risk of takeover of the valid FD here during this period.
+ * Also if we still have running, immediately after we release it, the
+ * events above might instantly happen due to another thread taking
+ * over.
+ *
+ * As such, the only cases where the FD is still relevant are:
+ * - tgid still set and running still set (most common)
+ * - tgid still valid but running cleared due to fd_delete(): we may
+ * still need to stop polling otherwise we may keep it enabled
+ * while waiting for other threads to close it.
+ * And given that we may need to program a tentative update in case we
+ * don't immediately close, it's easier to grab the tgid during the
+ * whole check.
+ */
+
+ if (!fd_grab_tgid(fd, tgid))
+ return FD_UPDT_CLOSED;
+
+ tmask = _HA_ATOMIC_LOAD(&fdtab[fd].thread_mask);
+
+ /* another thread might have attempted to close this FD in the mean
+ * time (e.g. timeout task) striking on a previous thread and closing.
+ * This is detected by us being the last owners of a running_mask bit,
+ * and the thread_mask being zero. At the moment we release the running
+ * bit, a takeover may also happen, so in practice we check for our loss
+ * of the thread_mask bitboth thread_mask and running_mask being 0 after
+ * we remove ourselves last. There is no risk the FD gets reassigned
+ * to a different group since it's not released until the real close()
+ * in _fd_delete_orphan().
+ */
+ if (fd_clr_running(fd) == tid_bit && !(tmask & tid_bit))
+ goto closed_or_migrated;
+
+ /* we had to stop this FD and it still must be stopped after the I/O
+ * cb's changes, so let's program an update for this.
+ */
+ if (must_stop && !(fdtab[fd].update_mask & tid_bit)) {
+ if (((must_stop & FD_POLL_IN) && !fd_recv_active(fd)) ||
+ ((must_stop & FD_POLL_OUT) && !fd_send_active(fd)))
+ if (!HA_ATOMIC_BTS(&fdtab[fd].update_mask, tid))
+ fd_updt[fd_nbupdt++] = fd;
+ }
+
+ fd_drop_tgid(fd);
+ return FD_UPDT_DONE;
+
+ closed_or_migrated:
+ /* We only come here once we've last dropped running and the FD is
+ * not for us as per !(tmask & tid_bit). It may imply we're
+ * responsible for closing it. Otherwise it's just a migration.
+ */
+ if (HA_ATOMIC_BTR(&fdtab[fd].state, FD_MUST_CLOSE_BIT)) {
+ fd_drop_tgid(fd);
+ _fd_delete_orphan(fd);
+ return FD_UPDT_CLOSED;
+ }
+
+ /* So we were alone, no close bit, at best the FD was migrated, at
+ * worst it's in the process of being closed by another thread. We must
+ * be ultra-careful as it can be re-inserted by yet another thread as
+ * the result of socket() or accept(). Let's just tell the poller the
+ * FD was lost. If it was closed it was already removed and this will
+ * only cost an update for nothing.
+ */
+
+ do_update:
+ /* The FD is not closed but we don't want the poller to wake up for
+ * it anymore.
+ */
+ if (!HA_ATOMIC_BTS(&fdtab[fd].update_mask, tid))
+ fd_updt[fd_nbupdt++] = fd;
+
+ fd_drop_tgid(fd);
+ return FD_UPDT_MIGRATED;
+}
+
+/* Tries to send <npfx> parts from <prefix> followed by <nmsg> parts from <msg>
+ * optionally followed by a newline if <nl> is non-null, to file descriptor
+ * <fd>. The message is sent atomically using writev(). It may be truncated to
+ * <maxlen> bytes if <maxlen> is non-null. There is no distinction between the
+ * two lists, it's just a convenience to help the caller prepend some prefixes
+ * when necessary. It takes the fd's lock to make sure no other thread will
+ * write to the same fd in parallel. Returns the number of bytes sent, or <=0
+ * on failure. A limit to 31 total non-empty segments is enforced. The caller
+ * is responsible for taking care of making the fd non-blocking.
+ */
+ssize_t fd_write_frag_line(int fd, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg, int nl)
+{
+ struct iovec iovec[32];
+ size_t sent = 0;
+ int vec = 0;
+ int attempts = 0;
+
+ if (!maxlen)
+ maxlen = ~0;
+
+ /* keep one char for a possible trailing '\n' in any case */
+ maxlen--;
+
+ /* make an iovec from the concatenation of all parts of the original
+ * message. Skip empty fields and truncate the whole message to maxlen,
+ * leaving one spare iovec for the '\n'.
+ */
+ while (vec < (sizeof(iovec) / sizeof(iovec[0]) - 1)) {
+ if (!npfx) {
+ pfx = msg;
+ npfx = nmsg;
+ nmsg = 0;
+ if (!npfx)
+ break;
+ }
+
+ iovec[vec].iov_base = pfx->ptr;
+ iovec[vec].iov_len = MIN(maxlen, pfx->len);
+ maxlen -= iovec[vec].iov_len;
+ if (iovec[vec].iov_len)
+ vec++;
+ pfx++; npfx--;
+ };
+
+ if (nl) {
+ iovec[vec].iov_base = "\n";
+ iovec[vec].iov_len = 1;
+ vec++;
+ }
+
+ /* make sure we never interleave writes and we never block. This means
+ * we prefer to fail on collision than to block. But we don't want to
+ * lose too many logs so we just perform a few lock attempts then give
+ * up.
+ */
+
+ while (HA_ATOMIC_BTS(&fdtab[fd].state, FD_EXCL_SYSCALL_BIT)) {
+ if (++attempts >= 200) {
+ /* so that the caller knows the message couldn't be delivered */
+ sent = -1;
+ errno = EAGAIN;
+ goto leave;
+ }
+ ha_thread_relax();
+ }
+
+ if (unlikely(!(fdtab[fd].state & FD_INITIALIZED))) {
+ HA_ATOMIC_OR(&fdtab[fd].state, FD_INITIALIZED);
+ if (!isatty(fd))
+ fd_set_nonblock(fd);
+ }
+ sent = writev(fd, iovec, vec);
+ HA_ATOMIC_BTR(&fdtab[fd].state, FD_EXCL_SYSCALL_BIT);
+
+ leave:
+ /* sent > 0 if the message was delivered */
+ return sent;
+}
+
+#if defined(USE_CLOSEFROM)
+void my_closefrom(int start)
+{
+ closefrom(start);
+}
+
+#elif defined(USE_POLL)
+/* This is a portable implementation of closefrom(). It closes all open file
+ * descriptors starting at <start> and above. It relies on the fact that poll()
+ * will return POLLNVAL for each invalid (hence close) file descriptor passed
+ * in argument in order to skip them. It acts with batches of FDs and will
+ * typically perform one poll() call per 1024 FDs so the overhead is low in
+ * case all FDs have to be closed.
+ */
+void my_closefrom(int start)
+{
+ struct pollfd poll_events[1024];
+ struct rlimit limit;
+ int nbfds, fd, ret, idx;
+ int step, next;
+
+ if (getrlimit(RLIMIT_NOFILE, &limit) == 0)
+ step = nbfds = limit.rlim_cur;
+ else
+ step = nbfds = 0;
+
+ if (nbfds <= 0) {
+ /* set safe limit */
+ nbfds = 1024;
+ step = 256;
+ }
+
+ if (step > sizeof(poll_events) / sizeof(poll_events[0]))
+ step = sizeof(poll_events) / sizeof(poll_events[0]);
+
+ while (start < nbfds) {
+ next = (start / step + 1) * step;
+
+ for (fd = start; fd < next && fd < nbfds; fd++) {
+ poll_events[fd - start].fd = fd;
+ poll_events[fd - start].events = 0;
+ }
+
+ do {
+ ret = poll(poll_events, fd - start, 0);
+ if (ret >= 0)
+ break;
+ } while (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR || errno == ENOMEM);
+
+ if (ret)
+ ret = fd - start;
+
+ for (idx = 0; idx < ret; idx++) {
+ if (poll_events[idx].revents & POLLNVAL)
+ continue; /* already closed */
+
+ fd = poll_events[idx].fd;
+ close(fd);
+ }
+ start = next;
+ }
+}
+
+#else // defined(USE_POLL)
+
+/* This is a portable implementation of closefrom(). It closes all open file
+ * descriptors starting at <start> and above. This is a naive version for use
+ * when the operating system provides no alternative.
+ */
+void my_closefrom(int start)
+{
+ struct rlimit limit;
+ int nbfds;
+
+ if (getrlimit(RLIMIT_NOFILE, &limit) == 0)
+ nbfds = limit.rlim_cur;
+ else
+ nbfds = 0;
+
+ if (nbfds <= 0)
+ nbfds = 1024; /* safe limit */
+
+ while (start < nbfds)
+ close(start++);
+}
+#endif // defined(USE_POLL)
+
+/* Sets the RLIMIT_NOFILE setting to <new_limit> and returns the previous one
+ * in <old_limit> if the pointer is not NULL, even if set_rlimit() fails. The
+ * two pointers may point to the same variable as the copy happens after
+ * setting the new value. The value is only changed if at least one of the new
+ * limits is strictly higher than the current one, otherwise returns 0 without
+ * changing anything. The getrlimit() or setrlimit() syscall return value is
+ * returned and errno is preserved.
+ */
+int raise_rlim_nofile(struct rlimit *old_limit, struct rlimit *new_limit)
+{
+ struct rlimit limit = { };
+ int ret = 0;
+
+ ret = getrlimit(RLIMIT_NOFILE, &limit);
+
+ if (ret == 0 &&
+ (limit.rlim_max < new_limit->rlim_max ||
+ limit.rlim_cur < new_limit->rlim_cur)) {
+ ret = setrlimit(RLIMIT_NOFILE, new_limit);
+ }
+
+ if (old_limit)
+ *old_limit = limit;
+
+ return ret;
+}
+
+/* Computes the bounded poll() timeout based on the next expiration timer <next>
+ * by bounding it to MAX_DELAY_MS. <next> may equal TICK_ETERNITY. The pollers
+ * just needs to call this function right before polling to get their timeout
+ * value. Timeouts that are already expired (possibly due to a pending event)
+ * are accounted for in activity.poll_exp.
+ */
+int compute_poll_timeout(int next)
+{
+ int wait_time;
+
+ if (!tick_isset(next))
+ wait_time = MAX_DELAY_MS;
+ else if (tick_is_expired(next, now_ms)) {
+ activity[tid].poll_exp++;
+ wait_time = 0;
+ }
+ else {
+ wait_time = TICKS_TO_MS(tick_remain(now_ms, next)) + 1;
+ if (wait_time > MAX_DELAY_MS)
+ wait_time = MAX_DELAY_MS;
+ }
+ return wait_time;
+}
+
+/* disable the specified poller */
+void disable_poller(const char *poller_name)
+{
+ int p;
+
+ for (p = 0; p < nbpollers; p++)
+ if (strcmp(pollers[p].name, poller_name) == 0)
+ pollers[p].pref = 0;
+}
+
+void poller_pipe_io_handler(int fd)
+{
+ char buf[1024];
+ /* Flush the pipe */
+ while (read(fd, buf, sizeof(buf)) > 0);
+ fd_cant_recv(fd);
+}
+
+/* allocate the per-thread fd_updt thus needs to be called early after
+ * thread creation.
+ */
+static int alloc_pollers_per_thread()
+{
+ fd_updt = calloc(global.maxsock, sizeof(*fd_updt));
+ return fd_updt != NULL;
+}
+
+/* Initialize the pollers per thread.*/
+static int init_pollers_per_thread()
+{
+ int mypipe[2];
+
+ if (pipe(mypipe) < 0)
+ return 0;
+
+ poller_rd_pipe = mypipe[0];
+ poller_wr_pipe[tid] = mypipe[1];
+ fd_set_nonblock(poller_rd_pipe);
+ fd_insert(poller_rd_pipe, poller_pipe_io_handler, poller_pipe_io_handler, tid_bit);
+ fd_insert(poller_wr_pipe[tid], poller_pipe_io_handler, poller_pipe_io_handler, tid_bit);
+ fd_want_recv(poller_rd_pipe);
+ fd_stop_both(poller_wr_pipe[tid]);
+ return 1;
+}
+
+/* Deinitialize the pollers per thread */
+static void deinit_pollers_per_thread()
+{
+ /* rd and wr are init at the same place, but only rd is init to -1, so
+ we rely to rd to close. */
+ if (poller_rd_pipe > -1) {
+ fd_delete(poller_rd_pipe);
+ poller_rd_pipe = -1;
+ fd_delete(poller_wr_pipe[tid]);
+ poller_wr_pipe[tid] = -1;
+ }
+}
+
+/* Release the pollers per thread, to be called late */
+static void free_pollers_per_thread()
+{
+ ha_free(&fd_updt);
+}
+
+/*
+ * Initialize the pollers till the best one is found.
+ * If none works, returns 0, otherwise 1.
+ */
+int init_pollers()
+{
+ int p;
+ struct poller *bp;
+
+ if ((fdtab_addr = calloc(global.maxsock, sizeof(*fdtab) + 64)) == NULL) {
+ ha_alert("Not enough memory to allocate %d entries for fdtab!\n", global.maxsock);
+ goto fail_tab;
+ }
+
+ /* always provide an aligned fdtab */
+ fdtab = (struct fdtab*)((((size_t)fdtab_addr) + 63) & -(size_t)64);
+
+ if ((polled_mask = calloc(global.maxsock, sizeof(*polled_mask))) == NULL) {
+ ha_alert("Not enough memory to allocate %d entries for polled_mask!\n", global.maxsock);
+ goto fail_polledmask;
+ }
+
+ if ((fdinfo = calloc(global.maxsock, sizeof(*fdinfo))) == NULL) {
+ ha_alert("Not enough memory to allocate %d entries for fdinfo!\n", global.maxsock);
+ goto fail_info;
+ }
+
+ update_list.first = update_list.last = -1;
+
+ for (p = 0; p < global.maxsock; p++) {
+ /* Mark the fd as out of the fd cache */
+ fdtab[p].update.next = -3;
+ }
+
+ do {
+ bp = NULL;
+ for (p = 0; p < nbpollers; p++)
+ if (!bp || (pollers[p].pref > bp->pref))
+ bp = &pollers[p];
+
+ if (!bp || bp->pref == 0)
+ break;
+
+ if (bp->init(bp)) {
+ memcpy(&cur_poller, bp, sizeof(*bp));
+ return 1;
+ }
+ } while (!bp || bp->pref == 0);
+
+ free(fdinfo);
+ fail_info:
+ free(polled_mask);
+ fail_polledmask:
+ free(fdtab_addr);
+ fail_tab:
+ return 0;
+}
+
+/*
+ * Deinitialize the pollers.
+ */
+void deinit_pollers() {
+
+ struct poller *bp;
+ int p;
+
+ for (p = 0; p < nbpollers; p++) {
+ bp = &pollers[p];
+
+ if (bp && bp->pref)
+ bp->term(bp);
+ }
+
+ ha_free(&fdinfo);
+ ha_free(&fdtab_addr);
+ ha_free(&polled_mask);
+}
+
+/*
+ * Lists the known pollers on <out>.
+ * Should be performed only before initialization.
+ */
+int list_pollers(FILE *out)
+{
+ int p;
+ int last, next;
+ int usable;
+ struct poller *bp;
+
+ fprintf(out, "Available polling systems :\n");
+
+ usable = 0;
+ bp = NULL;
+ last = next = -1;
+ while (1) {
+ for (p = 0; p < nbpollers; p++) {
+ if ((next < 0 || pollers[p].pref > next)
+ && (last < 0 || pollers[p].pref < last)) {
+ next = pollers[p].pref;
+ if (!bp || (pollers[p].pref > bp->pref))
+ bp = &pollers[p];
+ }
+ }
+
+ if (next == -1)
+ break;
+
+ for (p = 0; p < nbpollers; p++) {
+ if (pollers[p].pref == next) {
+ fprintf(out, " %10s : ", pollers[p].name);
+ if (pollers[p].pref == 0)
+ fprintf(out, "disabled, ");
+ else
+ fprintf(out, "pref=%3d, ", pollers[p].pref);
+ if (pollers[p].test(&pollers[p])) {
+ fprintf(out, " test result OK");
+ if (next > 0)
+ usable++;
+ } else {
+ fprintf(out, " test result FAILED");
+ if (bp == &pollers[p])
+ bp = NULL;
+ }
+ fprintf(out, "\n");
+ }
+ }
+ last = next;
+ next = -1;
+ };
+ fprintf(out, "Total: %d (%d usable), will use %s.\n", nbpollers, usable, bp ? bp->name : "none");
+ return 0;
+}
+
+/*
+ * Some pollers may lose their connection after a fork(). It may be necessary
+ * to create initialize part of them again. Returns 0 in case of failure,
+ * otherwise 1. The fork() function may be NULL if unused. In case of error,
+ * the the current poller is destroyed and the caller is responsible for trying
+ * another one by calling init_pollers() again.
+ */
+int fork_poller()
+{
+ int fd;
+ for (fd = 0; fd < global.maxsock; fd++) {
+ if (fdtab[fd].owner) {
+ HA_ATOMIC_OR(&fdtab[fd].state, FD_CLONED);
+ }
+ }
+
+ if (cur_poller.fork) {
+ if (cur_poller.fork(&cur_poller))
+ return 1;
+ cur_poller.term(&cur_poller);
+ return 0;
+ }
+ return 1;
+}
+
+/* config parser for global "tune.fd.edge-triggered", accepts "on" or "off" */
+static int cfg_parse_tune_fd_edge_triggered(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ global.tune.options |= GTUNE_FD_ET;
+ else if (strcmp(args[1], "off") == 0)
+ global.tune.options &= ~GTUNE_FD_ET;
+ else {
+ memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.fd.edge-triggered", cfg_parse_tune_fd_edge_triggered, KWF_EXPERIMENTAL },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+REGISTER_PER_THREAD_ALLOC(alloc_pollers_per_thread);
+REGISTER_PER_THREAD_INIT(init_pollers_per_thread);
+REGISTER_PER_THREAD_DEINIT(deinit_pollers_per_thread);
+REGISTER_PER_THREAD_FREE(free_pollers_per_thread);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/filters.c b/src/filters.c
new file mode 100644
index 0000000..d91f4a7
--- /dev/null
+++ b/src/filters.c
@@ -0,0 +1,1133 @@
+/*
+ * Stream filters related variables and functions.
+ *
+ * Copyright (C) 2015 Qualys Inc., Christopher Faulet <cfaulet@qualys.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/buf-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/compression.h>
+#include <haproxy/errors.h>
+#include <haproxy/filters.h>
+#include <haproxy/flt_http_comp.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/namespace.h>
+#include <haproxy/proxy.h>
+#include <haproxy/stream.h>
+#include <haproxy/tools.h>
+#include <haproxy/trace.h>
+
+
+#define TRACE_SOURCE &trace_strm
+
+/* Pool used to allocate filters */
+DECLARE_STATIC_POOL(pool_head_filter, "filter", sizeof(struct filter));
+
+static int handle_analyzer_result(struct stream *s, struct channel *chn, unsigned int an_bit, int ret);
+
+/* - RESUME_FILTER_LOOP and RESUME_FILTER_END must always be used together.
+ * The first one begins a loop and the seconds one ends it.
+ *
+ * - BREAK_EXECUTION must be used to break the loop and set the filter from
+ * which to resume the next time.
+ *
+ * Here is an example:
+ *
+ * RESUME_FILTER_LOOP(stream, channel) {
+ * ...
+ * if (cond)
+ * BREAK_EXECUTION(stream, channel, label);
+ * ...
+ * } RESUME_FILTER_END;
+ * ...
+ * label:
+ * ...
+ *
+ */
+#define RESUME_FILTER_LOOP(strm, chn) \
+ do { \
+ struct filter *filter; \
+ \
+ if (strm_flt(strm)->current[CHN_IDX(chn)]) { \
+ filter = strm_flt(strm)->current[CHN_IDX(chn)]; \
+ strm_flt(strm)->current[CHN_IDX(chn)] = NULL; \
+ goto resume_execution; \
+ } \
+ \
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) { \
+ resume_execution:
+
+#define RESUME_FILTER_END \
+ } \
+ } while(0)
+
+#define BREAK_EXECUTION(strm, chn, label) \
+ do { \
+ strm_flt(strm)->current[CHN_IDX(chn)] = filter; \
+ goto label; \
+ } while (0)
+
+
+/* List head of all known filter keywords */
+static struct flt_kw_list flt_keywords = {
+ .list = LIST_HEAD_INIT(flt_keywords.list)
+};
+
+/*
+ * Registers the filter keyword list <kwl> as a list of valid keywords for next
+ * parsing sessions.
+ */
+void
+flt_register_keywords(struct flt_kw_list *kwl)
+{
+ LIST_APPEND(&flt_keywords.list, &kwl->list);
+}
+
+/*
+ * Returns a pointer to the filter keyword <kw>, or NULL if not found. If the
+ * keyword is found with a NULL ->parse() function, then an attempt is made to
+ * find one with a valid ->parse() function. This way it is possible to declare
+ * platform-dependant, known keywords as NULL, then only declare them as valid
+ * if some options are met. Note that if the requested keyword contains an
+ * opening parenthesis, everything from this point is ignored.
+ */
+struct flt_kw *
+flt_find_kw(const char *kw)
+{
+ int index;
+ const char *kwend;
+ struct flt_kw_list *kwl;
+ struct flt_kw *ret = NULL;
+
+ kwend = strchr(kw, '(');
+ if (!kwend)
+ kwend = kw + strlen(kw);
+
+ list_for_each_entry(kwl, &flt_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if ((strncmp(kwl->kw[index].kw, kw, kwend - kw) == 0) &&
+ kwl->kw[index].kw[kwend-kw] == 0) {
+ if (kwl->kw[index].parse)
+ return &kwl->kw[index]; /* found it !*/
+ else
+ ret = &kwl->kw[index]; /* may be OK */
+ }
+ }
+ }
+ return ret;
+}
+
+/*
+ * Dumps all registered "filter" keywords to the <out> string pointer. The
+ * unsupported keywords are only dumped if their supported form was not found.
+ * If <out> is NULL, the output is emitted using a more compact format on stdout.
+ */
+void
+flt_dump_kws(char **out)
+{
+ struct flt_kw_list *kwl;
+ const struct flt_kw *kwp, *kw;
+ const char *scope = NULL;
+ int index;
+
+ if (out)
+ *out = NULL;
+
+ for (kw = kwp = NULL;; kwp = kw) {
+ list_for_each_entry(kwl, &flt_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if ((kwl->kw[index].parse ||
+ flt_find_kw(kwl->kw[index].kw) == &kwl->kw[index])
+ && strordered(kwp ? kwp->kw : NULL,
+ kwl->kw[index].kw,
+ kw != kwp ? kw->kw : NULL)) {
+ kw = &kwl->kw[index];
+ scope = kwl->scope;
+ }
+ }
+ }
+
+ if (kw == kwp)
+ break;
+
+ if (out)
+ memprintf(out, "%s[%4s] %s%s\n", *out ? *out : "",
+ scope,
+ kw->kw,
+ kw->parse ? "" : " (not supported)");
+ else
+ printf("%s [%s]\n",
+ kw->kw, scope);
+ }
+}
+
+/*
+ * Lists the known filters on <out>
+ */
+void
+list_filters(FILE *out)
+{
+ char *filters, *p, *f;
+
+ fprintf(out, "Available filters :\n");
+ flt_dump_kws(&filters);
+ for (p = filters; (f = strtok_r(p,"\n",&p));)
+ fprintf(out, "\t%s\n", f);
+ free(filters);
+}
+
+/*
+ * Parses the "filter" keyword. All keywords must be handled by filters
+ * themselves
+ */
+static int
+parse_filter(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line, char **err)
+{
+ struct flt_conf *fconf = NULL;
+
+ /* Filter cannot be defined on a default proxy */
+ if (curpx == defpx) {
+ memprintf(err, "parsing [%s:%d] : %s is not allowed in a 'default' section.",
+ file, line, args[0]);
+ return -1;
+ }
+ if (strcmp(args[0], "filter") == 0) {
+ struct flt_kw *kw;
+ int cur_arg;
+
+ if (!*args[1]) {
+ memprintf(err,
+ "parsing [%s:%d] : missing argument for '%s' in %s '%s'.",
+ file, line, args[0], proxy_type_str(curpx), curpx->id);
+ goto error;
+ }
+ fconf = calloc(1, sizeof(*fconf));
+ if (!fconf) {
+ memprintf(err, "'%s' : out of memory", args[0]);
+ goto error;
+ }
+
+ cur_arg = 1;
+ kw = flt_find_kw(args[cur_arg]);
+ if (kw) {
+ if (!kw->parse) {
+ memprintf(err, "parsing [%s:%d] : '%s' : "
+ "'%s' option is not implemented in this version (check build options).",
+ file, line, args[0], args[cur_arg]);
+ goto error;
+ }
+ if (kw->parse(args, &cur_arg, curpx, fconf, err, kw->private) != 0) {
+ if (err && *err)
+ memprintf(err, "'%s' : '%s'",
+ args[0], *err);
+ else
+ memprintf(err, "'%s' : error encountered while processing '%s'",
+ args[0], args[cur_arg]);
+ goto error;
+ }
+ }
+ else {
+ flt_dump_kws(err);
+ indent_msg(err, 4);
+ memprintf(err, "'%s' : unknown keyword '%s'.%s%s",
+ args[0], args[cur_arg],
+ err && *err ? " Registered keywords :" : "", err && *err ? *err : "");
+ goto error;
+ }
+ if (*args[cur_arg]) {
+ memprintf(err, "'%s %s' : unknown keyword '%s'.",
+ args[0], args[1], args[cur_arg]);
+ goto error;
+ }
+ if (fconf->ops == NULL) {
+ memprintf(err, "'%s %s' : no callbacks defined.",
+ args[0], args[1]);
+ goto error;
+ }
+
+ LIST_APPEND(&curpx->filter_configs, &fconf->list);
+ }
+ return 0;
+
+ error:
+ free(fconf);
+ return -1;
+
+
+}
+
+/*
+ * Calls 'init' callback for all filters attached to a proxy. This happens after
+ * the configuration parsing. Filters can finish to fill their config. Returns
+ * (ERR_ALERT|ERR_FATAL) if an error occurs, 0 otherwise.
+ */
+static int
+flt_init(struct proxy *proxy)
+{
+ struct flt_conf *fconf;
+
+ list_for_each_entry(fconf, &proxy->filter_configs, list) {
+ if (fconf->ops->init && fconf->ops->init(proxy, fconf) < 0)
+ return ERR_ALERT|ERR_FATAL;
+ }
+ return 0;
+}
+
+/*
+ * Calls 'init_per_thread' callback for all filters attached to a proxy for each
+ * threads. This happens after the thread creation. Filters can finish to fill
+ * their config. Returns (ERR_ALERT|ERR_FATAL) if an error occurs, 0 otherwise.
+ */
+static int
+flt_init_per_thread(struct proxy *proxy)
+{
+ struct flt_conf *fconf;
+
+ list_for_each_entry(fconf, &proxy->filter_configs, list) {
+ if (fconf->ops->init_per_thread && fconf->ops->init_per_thread(proxy, fconf) < 0)
+ return ERR_ALERT|ERR_FATAL;
+ }
+ return 0;
+}
+
+/* Calls flt_init() for all proxies, see above */
+static int
+flt_init_all()
+{
+ struct proxy *px;
+ int err_code = ERR_NONE;
+
+ for (px = proxies_list; px; px = px->next) {
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) {
+ flt_deinit(px);
+ continue;
+ }
+ err_code |= flt_init(px);
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Failed to initialize filters for proxy '%s'.\n",
+ px->id);
+ return err_code;
+ }
+ }
+ return 0;
+}
+
+/* Calls flt_init_per_thread() for all proxies, see above. Be careful here, it
+ * returns 0 if an error occurred. This is the opposite of flt_init_all. */
+static int
+flt_init_all_per_thread()
+{
+ struct proxy *px;
+ int err_code = 0;
+
+ for (px = proxies_list; px; px = px->next) {
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ continue;
+
+ err_code = flt_init_per_thread(px);
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Failed to initialize filters for proxy '%s' for thread %u.\n",
+ px->id, tid);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/*
+ * Calls 'check' callback for all filters attached to a proxy. This happens
+ * after the configuration parsing but before filters initialization. Returns
+ * the number of encountered errors.
+ */
+int
+flt_check(struct proxy *proxy)
+{
+ struct flt_conf *fconf;
+ int err = 0;
+
+ err += check_implicit_http_comp_flt(proxy);
+ list_for_each_entry(fconf, &proxy->filter_configs, list) {
+ if (fconf->ops->check)
+ err += fconf->ops->check(proxy, fconf);
+ }
+ return err;
+}
+
+/*
+ * Calls 'denit' callback for all filters attached to a proxy. This happens when
+ * HAProxy is stopped.
+ */
+void
+flt_deinit(struct proxy *proxy)
+{
+ struct flt_conf *fconf, *back;
+
+ list_for_each_entry_safe(fconf, back, &proxy->filter_configs, list) {
+ if (fconf->ops->deinit)
+ fconf->ops->deinit(proxy, fconf);
+ LIST_DELETE(&fconf->list);
+ free(fconf);
+ }
+}
+
+/*
+ * Calls 'denit_per_thread' callback for all filters attached to a proxy for
+ * each threads. This happens before exiting a thread.
+ */
+void
+flt_deinit_per_thread(struct proxy *proxy)
+{
+ struct flt_conf *fconf, *back;
+
+ list_for_each_entry_safe(fconf, back, &proxy->filter_configs, list) {
+ if (fconf->ops->deinit_per_thread)
+ fconf->ops->deinit_per_thread(proxy, fconf);
+ }
+}
+
+
+/* Calls flt_deinit_per_thread() for all proxies, see above */
+static void
+flt_deinit_all_per_thread()
+{
+ struct proxy *px;
+
+ for (px = proxies_list; px; px = px->next)
+ flt_deinit_per_thread(px);
+}
+
+/* Attaches a filter to a stream. Returns -1 if an error occurs, 0 otherwise. */
+static int
+flt_stream_add_filter(struct stream *s, struct flt_conf *fconf, unsigned int flags)
+{
+ struct filter *f;
+
+ if (IS_HTX_STRM(s) && !(fconf->flags & FLT_CFG_FL_HTX))
+ return 0;
+
+ f = pool_zalloc(pool_head_filter);
+ if (!f) /* not enough memory */
+ return -1;
+ f->config = fconf;
+ f->flags |= flags;
+
+ if (FLT_OPS(f)->attach) {
+ int ret = FLT_OPS(f)->attach(s, f);
+ if (ret <= 0) {
+ pool_free(pool_head_filter, f);
+ return ret;
+ }
+ }
+
+ LIST_APPEND(&strm_flt(s)->filters, &f->list);
+ strm_flt(s)->flags |= STRM_FLT_FL_HAS_FILTERS;
+ return 0;
+}
+
+/*
+ * Called when a stream is created. It attaches all frontend filters to the
+ * stream. Returns -1 if an error occurs, 0 otherwise.
+ */
+int
+flt_stream_init(struct stream *s)
+{
+ struct flt_conf *fconf;
+
+ memset(strm_flt(s), 0, sizeof(*strm_flt(s)));
+ LIST_INIT(&strm_flt(s)->filters);
+ list_for_each_entry(fconf, &strm_fe(s)->filter_configs, list) {
+ if (flt_stream_add_filter(s, fconf, 0) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Called when a stream is closed or when analyze ends (For an HTTP stream, this
+ * happens after each request/response exchange). When analyze ends, backend
+ * filters are removed. When the stream is closed, all filters attached to the
+ * stream are removed.
+ */
+void
+flt_stream_release(struct stream *s, int only_backend)
+{
+ struct filter *filter, *back;
+
+ list_for_each_entry_safe(filter, back, &strm_flt(s)->filters, list) {
+ if (!only_backend || (filter->flags & FLT_FL_IS_BACKEND_FILTER)) {
+ if (FLT_OPS(filter)->detach)
+ FLT_OPS(filter)->detach(s, filter);
+ LIST_DELETE(&filter->list);
+ pool_free(pool_head_filter, filter);
+ }
+ }
+ if (LIST_ISEMPTY(&strm_flt(s)->filters))
+ strm_flt(s)->flags &= ~STRM_FLT_FL_HAS_FILTERS;
+}
+
+/*
+ * Calls 'stream_start' for all filters attached to a stream. This happens when
+ * the stream is created, just after calling flt_stream_init
+ * function. Returns -1 if an error occurs, 0 otherwise.
+ */
+int
+flt_stream_start(struct stream *s)
+{
+ struct filter *filter;
+
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_OPS(filter)->stream_start && FLT_OPS(filter)->stream_start(s, filter) < 0)
+ return -1;
+ }
+ if (strm_li(s) && (strm_li(s)->analysers & AN_REQ_FLT_START_FE)) {
+ s->req.flags |= CF_FLT_ANALYZE;
+ s->req.analysers |= AN_REQ_FLT_END;
+ }
+ return 0;
+}
+
+/*
+ * Calls 'stream_stop' for all filters attached to a stream. This happens when
+ * the stream is stopped, just before calling flt_stream_release function.
+ */
+void
+flt_stream_stop(struct stream *s)
+{
+ struct filter *filter;
+
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_OPS(filter)->stream_stop)
+ FLT_OPS(filter)->stream_stop(s, filter);
+ }
+}
+
+/*
+ * Calls 'check_timeouts' for all filters attached to a stream. This happens when
+ * the stream is woken up because of expired timer.
+ */
+void
+flt_stream_check_timeouts(struct stream *s)
+{
+ struct filter *filter;
+
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_OPS(filter)->check_timeouts)
+ FLT_OPS(filter)->check_timeouts(s, filter);
+ }
+}
+
+/*
+ * Called when a backend is set for a stream. If the frontend and the backend
+ * are not the same, this function attaches all backend filters to the
+ * stream. Returns -1 if an error occurs, 0 otherwise.
+ */
+int
+flt_set_stream_backend(struct stream *s, struct proxy *be)
+{
+ struct flt_conf *fconf;
+ struct filter *filter;
+
+ if (strm_fe(s) == be)
+ goto end;
+
+ list_for_each_entry(fconf, &be->filter_configs, list) {
+ if (flt_stream_add_filter(s, fconf, FLT_FL_IS_BACKEND_FILTER) < 0)
+ return -1;
+ }
+
+ end:
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_OPS(filter)->stream_set_backend &&
+ FLT_OPS(filter)->stream_set_backend(s, filter, be) < 0)
+ return -1;
+ }
+ if (be->be_req_ana & AN_REQ_FLT_START_BE) {
+ s->req.flags |= CF_FLT_ANALYZE;
+ s->req.analysers |= AN_REQ_FLT_END;
+ }
+ if ((strm_fe(s)->fe_rsp_ana | be->be_rsp_ana) & (AN_RES_FLT_START_FE|AN_RES_FLT_START_BE)) {
+ s->res.flags |= CF_FLT_ANALYZE;
+ s->res.analysers |= AN_RES_FLT_END;
+ }
+
+ return 0;
+}
+
+
+/*
+ * Calls 'http_end' callback for all filters attached to a stream. All filters
+ * are called here, but only if there is at least one "data" filter. This
+ * functions is called when all data were parsed and forwarded. 'http_end'
+ * callback is resumable, so this function returns a negative value if an error
+ * occurs, 0 if it needs to wait for some reason, any other value otherwise.
+ */
+int
+flt_http_end(struct stream *s, struct http_msg *msg)
+{
+ unsigned long long *strm_off = &FLT_STRM_OFF(s, msg->chn);
+ unsigned int offset = 0;
+ int ret = 1;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s, s->txn, msg);
+ RESUME_FILTER_LOOP(s, msg->chn) {
+ unsigned long long flt_off = FLT_OFF(filter, msg->chn);
+ offset = flt_off - *strm_off;
+
+ /* Call http_end for data filters only. But the filter offset is
+ * still valid for all filters
+ . */
+ if (!IS_DATA_FILTER(filter, msg->chn))
+ continue;
+
+ if (FLT_OPS(filter)->http_end) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->http_end(s, filter, msg);
+ if (ret <= 0)
+ BREAK_EXECUTION(s, msg->chn, end);
+ }
+ } RESUME_FILTER_END;
+
+ c_adv(msg->chn, offset);
+ *strm_off += offset;
+
+end:
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * Calls 'http_reset' callback for all filters attached to a stream. This
+ * happens when a 100-continue response is received.
+ */
+void
+flt_http_reset(struct stream *s, struct http_msg *msg)
+{
+ struct filter *filter;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s, s->txn, msg);
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_OPS(filter)->http_reset) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ FLT_OPS(filter)->http_reset(s, filter, msg);
+ }
+ }
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+}
+
+/*
+ * Calls 'http_reply' callback for all filters attached to a stream when HA
+ * decides to stop the HTTP message processing.
+ */
+void
+flt_http_reply(struct stream *s, short status, const struct buffer *msg)
+{
+ struct filter *filter;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s, s->txn, msg);
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_OPS(filter)->http_reply) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ FLT_OPS(filter)->http_reply(s, filter, status, msg);
+ }
+ }
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+}
+
+/*
+ * Calls 'http_payload' callback for all "data" filters attached to a
+ * stream. This function is called when some data can be forwarded in the
+ * AN_REQ_HTTP_XFER_BODY and AN_RES_HTTP_XFER_BODY analyzers. It takes care to
+ * update the filters and the stream offset to be sure that a filter cannot
+ * forward more data than its predecessors. A filter can choose to not forward
+ * all data. Returns a negative value if an error occurs, else the number of
+ * forwarded bytes.
+ */
+int
+flt_http_payload(struct stream *s, struct http_msg *msg, unsigned int len)
+{
+ struct filter *filter;
+ unsigned long long *strm_off = &FLT_STRM_OFF(s, msg->chn);
+ unsigned int out = co_data(msg->chn);
+ int ret, data;
+
+ strm_flt(s)->flags &= ~STRM_FLT_FL_HOLD_HTTP_HDRS;
+
+ ret = data = len - out;
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s, s->txn, msg);
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ unsigned long long *flt_off = &FLT_OFF(filter, msg->chn);
+ unsigned int offset = *flt_off - *strm_off;
+
+ /* Call http_payload for filters only. Forward all data for
+ * others and update the filter offset
+ */
+ if (!IS_DATA_FILTER(filter, msg->chn)) {
+ *flt_off += data - offset;
+ continue;
+ }
+
+ if (FLT_OPS(filter)->http_payload) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->http_payload(s, filter, msg, out + offset, data - offset);
+ if (ret < 0)
+ goto end;
+ data = ret + *flt_off - *strm_off;
+ *flt_off += ret;
+ }
+ }
+
+ /* If nothing was forwarded yet, we take care to hold the headers if
+ * following conditions are met :
+ *
+ * - *strm_off == 0 (nothing forwarded yet)
+ * - ret == 0 (no data forwarded at all on this turn)
+ * - STRM_FLT_FL_HOLD_HTTP_HDRS flag set (at least one filter want to hold the headers)
+ *
+ * Be careful, STRM_FLT_FL_HOLD_HTTP_HDRS is removed before each http_payload loop.
+ * Thus, it must explicitly be set when necessary. We must do that to hold the headers
+ * when there is no payload.
+ */
+ if (!ret && !*strm_off && (strm_flt(s)->flags & STRM_FLT_FL_HOLD_HTTP_HDRS))
+ goto end;
+
+ ret = data;
+ *strm_off += ret;
+ end:
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * Calls 'channel_start_analyze' callback for all filters attached to a
+ * stream. This function is called when we start to analyze a request or a
+ * response. For frontend filters, it is called before all other analyzers. For
+ * backend ones, it is called before all backend
+ * analyzers. 'channel_start_analyze' callback is resumable, so this function
+ * returns 0 if an error occurs or if it needs to wait, any other value
+ * otherwise.
+ */
+int
+flt_start_analyze(struct stream *s, struct channel *chn, unsigned int an_bit)
+{
+ int ret = 1;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+
+ /* If this function is called, this means there is at least one filter,
+ * so we do not need to check the filter list's emptiness. */
+
+ /* Set flag on channel to tell that the channel is filtered */
+ chn->flags |= CF_FLT_ANALYZE;
+ chn->analysers |= ((chn->flags & CF_ISRESP) ? AN_RES_FLT_END : AN_REQ_FLT_END);
+
+ RESUME_FILTER_LOOP(s, chn) {
+ if (!(chn->flags & CF_ISRESP)) {
+ if (an_bit == AN_REQ_FLT_START_BE &&
+ !(filter->flags & FLT_FL_IS_BACKEND_FILTER))
+ continue;
+ }
+ else {
+ if (an_bit == AN_RES_FLT_START_BE &&
+ !(filter->flags & FLT_FL_IS_BACKEND_FILTER))
+ continue;
+ }
+
+ FLT_OFF(filter, chn) = 0;
+ if (FLT_OPS(filter)->channel_start_analyze) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->channel_start_analyze(s, filter, chn);
+ if (ret <= 0)
+ BREAK_EXECUTION(s, chn, end);
+ }
+ } RESUME_FILTER_END;
+
+ end:
+ ret = handle_analyzer_result(s, chn, an_bit, ret);
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * Calls 'channel_pre_analyze' callback for all filters attached to a
+ * stream. This function is called BEFORE each analyzer attached to a channel,
+ * expects analyzers responsible for data sending. 'channel_pre_analyze'
+ * callback is resumable, so this function returns 0 if an error occurs or if it
+ * needs to wait, any other value otherwise.
+ *
+ * Note this function can be called many times for the same analyzer. In fact,
+ * it is called until the analyzer finishes its processing.
+ */
+int
+flt_pre_analyze(struct stream *s, struct channel *chn, unsigned int an_bit)
+{
+ int ret = 1;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+
+ RESUME_FILTER_LOOP(s, chn) {
+ if (FLT_OPS(filter)->channel_pre_analyze && (filter->pre_analyzers & an_bit)) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->channel_pre_analyze(s, filter, chn, an_bit);
+ if (ret <= 0)
+ BREAK_EXECUTION(s, chn, check_result);
+ filter->pre_analyzers &= ~an_bit;
+ }
+ } RESUME_FILTER_END;
+
+ check_result:
+ ret = handle_analyzer_result(s, chn, 0, ret);
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * Calls 'channel_post_analyze' callback for all filters attached to a
+ * stream. This function is called AFTER each analyzer attached to a channel,
+ * expects analyzers responsible for data sending. 'channel_post_analyze'
+ * callback is NOT resumable, so this function returns a 0 if an error occurs,
+ * any other value otherwise.
+ *
+ * Here, AFTER means when the analyzer finishes its processing.
+ */
+int
+flt_post_analyze(struct stream *s, struct channel *chn, unsigned int an_bit)
+{
+ struct filter *filter;
+ int ret = 1;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_OPS(filter)->channel_post_analyze && (filter->post_analyzers & an_bit)) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->channel_post_analyze(s, filter, chn, an_bit);
+ if (ret < 0)
+ break;
+ filter->post_analyzers &= ~an_bit;
+ }
+ }
+ ret = handle_analyzer_result(s, chn, 0, ret);
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * This function is the AN_REQ/RES_FLT_HTTP_HDRS analyzer, used to filter HTTP
+ * headers or a request or a response. Returns 0 if an error occurs or if it
+ * needs to wait, any other value otherwise.
+ */
+int
+flt_analyze_http_headers(struct stream *s, struct channel *chn, unsigned int an_bit)
+{
+ struct http_msg *msg;
+ int ret = 1;
+
+ msg = ((chn->flags & CF_ISRESP) ? &s->txn->rsp : &s->txn->req);
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s, s->txn, msg);
+
+ RESUME_FILTER_LOOP(s, chn) {
+ if (FLT_OPS(filter)->http_headers) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->http_headers(s, filter, msg);
+ if (ret <= 0)
+ BREAK_EXECUTION(s, chn, check_result);
+ }
+ } RESUME_FILTER_END;
+
+ if (HAS_DATA_FILTERS(s, chn)) {
+ size_t data = http_get_hdrs_size(htxbuf(&chn->buf));
+ struct filter *f;
+
+ list_for_each_entry(f, &strm_flt(s)->filters, list)
+ FLT_OFF(f, chn) = data;
+ }
+
+ check_result:
+ ret = handle_analyzer_result(s, chn, an_bit, ret);
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * Calls 'channel_end_analyze' callback for all filters attached to a
+ * stream. This function is called when we stop to analyze a request or a
+ * response. It is called after all other analyzers. 'channel_end_analyze'
+ * callback is resumable, so this function returns 0 if an error occurs or if it
+ * needs to wait, any other value otherwise.
+ */
+int
+flt_end_analyze(struct stream *s, struct channel *chn, unsigned int an_bit)
+{
+ int ret = 1;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+
+ /* Check if all filters attached on the stream have finished their
+ * processing on this channel. */
+ if (!(chn->flags & CF_FLT_ANALYZE))
+ goto sync;
+
+ RESUME_FILTER_LOOP(s, chn) {
+ FLT_OFF(filter, chn) = 0;
+ unregister_data_filter(s, chn, filter);
+
+ if (FLT_OPS(filter)->channel_end_analyze) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->channel_end_analyze(s, filter, chn);
+ if (ret <= 0)
+ BREAK_EXECUTION(s, chn, end);
+ }
+ } RESUME_FILTER_END;
+
+ end:
+ /* We don't remove yet this analyzer because we need to synchronize the
+ * both channels. So here, we just remove the flag CF_FLT_ANALYZE. */
+ ret = handle_analyzer_result(s, chn, 0, ret);
+ if (ret) {
+ chn->flags &= ~CF_FLT_ANALYZE;
+
+ /* Pretend there is an activity on both channels. Flag on the
+ * current one will be automatically removed, so only the other
+ * one will remain. This is a way to be sure that
+ * 'channel_end_analyze' callback will have a chance to be
+ * called at least once for the other side to finish the current
+ * processing. Of course, this is the filter responsibility to
+ * wakeup the stream if it choose to loop on this callback. */
+ s->req.flags |= CF_WAKE_ONCE;
+ s->res.flags |= CF_WAKE_ONCE;
+ }
+
+
+ sync:
+ /* Now we can check if filters have finished their work on the both
+ * channels */
+ if (!(s->req.flags & CF_FLT_ANALYZE) && !(s->res.flags & CF_FLT_ANALYZE)) {
+ /* Sync channels by removing this analyzer for the both channels */
+ s->req.analysers &= ~AN_REQ_FLT_END;
+ s->res.analysers &= ~AN_RES_FLT_END;
+
+ /* Remove backend filters from the list */
+ flt_stream_release(s, 1);
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+ }
+ else {
+ DBG_TRACE_DEVEL("waiting for sync", STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+ }
+ return ret;
+}
+
+
+/*
+ * Calls 'tcp_payload' callback for all "data" filters attached to a
+ * stream. This function is called when some data can be forwarded in the
+ * AN_REQ_FLT_XFER_BODY and AN_RES_FLT_XFER_BODY analyzers. It takes care to
+ * update the filters and the stream offset to be sure that a filter cannot
+ * forward more data than its predecessors. A filter can choose to not forward
+ * all data. Returns a negative value if an error occurs, else the number of
+ * forwarded bytes.
+ */
+int
+flt_tcp_payload(struct stream *s, struct channel *chn, unsigned int len)
+{
+ struct filter *filter;
+ unsigned long long *strm_off = &FLT_STRM_OFF(s, chn);
+ unsigned int out = co_data(chn);
+ int ret, data;
+
+ ret = data = len - out;
+ DBG_TRACE_ENTER(STRM_EV_TCP_ANA|STRM_EV_FLT_ANA, s);
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ unsigned long long *flt_off = &FLT_OFF(filter, chn);
+ unsigned int offset = *flt_off - *strm_off;
+
+ /* Call tcp_payload for filters only. Forward all data for
+ * others and update the filter offset
+ */
+ if (!IS_DATA_FILTER(filter, chn)) {
+ *flt_off += data - offset;
+ continue;
+ }
+
+ if (FLT_OPS(filter)->tcp_payload) {
+
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_TCP_ANA|STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->tcp_payload(s, filter, chn, out + offset, data - offset);
+ if (ret < 0)
+ goto end;
+ data = ret + *flt_off - *strm_off;
+ *flt_off += ret;
+ }
+ }
+
+ /* Only forward data if the last filter decides to forward something */
+ if (ret > 0) {
+ ret = data;
+ *strm_off += ret;
+ }
+ end:
+ DBG_TRACE_LEAVE(STRM_EV_TCP_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * Called when TCP data must be filtered on a channel. This function is the
+ * AN_REQ/RES_FLT_XFER_DATA analyzer. When called, it is responsible to forward
+ * data when the proxy is not in http mode. Behind the scene, it calls
+ * consecutively 'tcp_data' and 'tcp_forward_data' callbacks for all "data"
+ * filters attached to a stream. Returns 0 if an error occurs or if it needs to
+ * wait, any other value otherwise.
+ */
+int
+flt_xfer_data(struct stream *s, struct channel *chn, unsigned int an_bit)
+{
+ unsigned int len;
+ int ret = 1;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA|STRM_EV_FLT_ANA, s);
+
+ /* If there is no "data" filters, we do nothing */
+ if (!HAS_DATA_FILTERS(s, chn))
+ goto end;
+
+ /* Be sure that the output is still opened. Else we stop the data
+ * filtering. */
+ if ((chn->flags & (CF_READ_ERROR|CF_READ_TIMEOUT|CF_WRITE_ERROR|CF_WRITE_TIMEOUT)) ||
+ ((chn->flags & CF_SHUTW) && (chn->to_forward || co_data(chn))))
+ goto end;
+
+ if (s->flags & SF_HTX) {
+ struct htx *htx = htxbuf(&chn->buf);
+ len = htx->data;
+ }
+ else
+ len = c_data(chn);
+
+ ret = flt_tcp_payload(s, chn, len);
+ if (ret < 0)
+ goto end;
+ c_adv(chn, ret);
+
+ /* Stop waiting data if the input in closed and no data is pending or if
+ * the output is closed. */
+ if (chn->flags & CF_SHUTW) {
+ ret = 1;
+ goto end;
+ }
+ if (chn->flags & CF_SHUTR) {
+ if (((s->flags & SF_HTX) && htx_is_empty(htxbuf(&chn->buf))) || c_empty(chn)) {
+ ret = 1;
+ goto end;
+ }
+ }
+
+ /* Wait for data */
+ DBG_TRACE_DEVEL("waiting for more data", STRM_EV_STRM_ANA|STRM_EV_TCP_ANA|STRM_EV_FLT_ANA, s);
+ return 0;
+ end:
+ /* Terminate the data filtering. If <ret> is negative, an error was
+ * encountered during the filtering. */
+ ret = handle_analyzer_result(s, chn, an_bit, ret);
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * Handles result of filter's analyzers. It returns 0 if an error occurs or if
+ * it needs to wait, any other value otherwise.
+ */
+static int
+handle_analyzer_result(struct stream *s, struct channel *chn,
+ unsigned int an_bit, int ret)
+{
+ int finst;
+ int status = 0;
+
+ if (ret < 0)
+ goto return_bad_req;
+ else if (!ret)
+ goto wait;
+
+ /* End of job, return OK */
+ if (an_bit) {
+ chn->analysers &= ~an_bit;
+ chn->analyse_exp = TICK_ETERNITY;
+ }
+ return 1;
+
+ return_bad_req:
+ /* An error occurs */
+ channel_abort(&s->req);
+ channel_abort(&s->res);
+
+ if (!(chn->flags & CF_ISRESP)) {
+ s->req.analysers &= AN_REQ_FLT_END;
+ finst = SF_FINST_R;
+ status = 400;
+ /* FIXME: incr counters */
+ }
+ else {
+ s->res.analysers &= AN_RES_FLT_END;
+ finst = SF_FINST_H;
+ status = 502;
+ /* FIXME: incr counters */
+ }
+
+ if (IS_HTX_STRM(s)) {
+ /* Do not do that when we are waiting for the next request */
+ if (s->txn->status > 0)
+ http_reply_and_close(s, s->txn->status, NULL);
+ else {
+ s->txn->status = status;
+ http_reply_and_close(s, status, http_error_message(s));
+ }
+ }
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= finst;
+ DBG_TRACE_DEVEL("leaving on error", STRM_EV_FLT_ANA|STRM_EV_FLT_ERR, s);
+ return 0;
+
+ wait:
+ if (!(chn->flags & CF_ISRESP))
+ channel_dont_connect(chn);
+ DBG_TRACE_DEVEL("wairing for more data", STRM_EV_FLT_ANA, s);
+ return 0;
+}
+
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted, doing so helps
+ * all code contributors.
+ * Optional keywords are also declared with a NULL ->parse() function so that
+ * the config parser can report an appropriate error when a known keyword was
+ * not enabled. */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_LISTEN, "filter", parse_filter },
+ { 0, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+REGISTER_POST_CHECK(flt_init_all);
+REGISTER_PER_THREAD_INIT(flt_init_all_per_thread);
+REGISTER_PER_THREAD_DEINIT(flt_deinit_all_per_thread);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/fix.c b/src/fix.c
new file mode 100644
index 0000000..abf3119
--- /dev/null
+++ b/src/fix.c
@@ -0,0 +1,264 @@
+/*
+ * Financial Information eXchange Protocol
+ *
+ * Copyright 2020 Baptiste Assmann <bedis9@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/intops.h>
+#include <haproxy/fix.h>
+/*
+ * Return the corresponding numerical tag id if <str> looks like a valid FIX
+ * protocol tag ID. Otherwise, 0 is returned (0 is an invalid id).
+ *
+ * If <version> is given, it must be one of a defined FIX version string (see
+ * FIX_X_Y macros). In this case, the function will also check tag ID ranges. If
+ * no <version> is provided, any strictly positive integer is valid.
+ *
+ * tag ID range depends on FIX protocol version:
+ * - FIX.4.0: 1-140
+ * - FIX.4.1: 1-211
+ * - FIX.4.2: 1-446
+ * - FIX.4.3: 1-659
+ * - FIX.4.4: 1-956
+ * - FIX.5.0: 1-1139
+ * - FIX.5.0SP1: 1-1426
+ * - FIX.5.0SP2: 1-1621
+ * range 10000 to 19999 is for "user defined tags"
+ */
+unsigned int fix_check_id(const struct ist str, const struct ist version) {
+ const char *s, *end;
+ unsigned int ret;
+
+ s = istptr(str);
+ end = istend(str);
+ ret = read_uint(&s, end);
+
+ /* we did not consume all characters from <str>, this is an error */
+ if (s != end)
+ return 0;
+
+ /* field ID can't be 0 */
+ if (ret == 0)
+ return 0;
+
+ /* we can leave now if version was not provided */
+ if (!isttest(version))
+ return ret;
+
+ /* we can leave now if this is a "user defined tag id" */
+ if (ret >= 10000 && ret <= 19999)
+ return ret;
+
+ /* now perform checking per FIX version */
+ if (istissame(FIX_4_0, version) && (ret <= 140))
+ return ret;
+ else if (istissame(FIX_4_1, version) && (ret <= 211))
+ return ret;
+ else if (istissame(FIX_4_2, version) && (ret <= 446))
+ return ret;
+ else if (istissame(FIX_4_3, version) && (ret <= 659))
+ return ret;
+ else if (istissame(FIX_4_4, version) && (ret <= 956))
+ return ret;
+ /* version string is the same for all 5.0 versions, so we can only take
+ * into consideration the biggest range
+ */
+ else if (istissame(FIX_5_0, version) && (ret <= 1621))
+ return ret;
+
+ return 0;
+}
+
+/*
+ * Parse a FIX message <msg> and performs following sanity checks:
+ *
+ * - checks tag ids and values are not empty
+ * - checks tag ids are numerical value
+ * - checks the first tag is BeginString with a valid version
+ * - checks the second tag is BodyLength with the right body length
+ * - checks the third tag is MsgType
+ * - checks the last tag is CheckSum with a valid checksum
+ *
+ * Returns:
+ * FIX_INVALID_MESSAGE if the message is invalid
+ * FIX_NEED_MORE_DATA if we need more data to fully validate the message
+ * FIX_VALID_MESSAGE if the message looks valid
+ */
+int fix_validate_message(const struct ist msg)
+{
+ struct ist parser, version;
+ unsigned int tagnum, bodylen;
+ unsigned char checksum;
+ char *body;
+ int ret = FIX_INVALID_MESSAGE;
+
+ if (istlen(msg) < FIX_MSG_MINSIZE) {
+ ret = FIX_NEED_MORE_DATA;
+ goto end;
+ }
+
+ /* parsing the whole message to compute the checksum and check all tag
+ * ids are properly set. Here we are sure to have the 2 first tags. Thus
+ * the version and the body length can be checked.
+ */
+ parser = msg;
+ version = IST_NULL;
+ checksum = tagnum = bodylen = 0;
+ body = NULL;
+ while (istlen(parser) > 0) {
+ struct ist tag, value;
+ unsigned int tagid;
+ const char *p, *end;
+
+ /* parse the tag ID and its value and perform first sanity checks */
+ value = iststop(istfind(parser, '='), FIX_DELIMITER);
+
+ /* end of value not found */
+ if (istend(value) == istend(parser)) {
+ ret = FIX_NEED_MORE_DATA;
+ goto end;
+ }
+ /* empty tag or empty value are forbidden */
+ if (istptr(parser) == istptr(value) ||!istlen(value))
+ goto end;
+
+ /* value points on '='. get the tag and skip '=' */
+ tag = ist2(istptr(parser), istptr(value) - istptr(parser));
+ value = istnext(value);
+
+ /* Check the tag id */
+ tagid = fix_check_id(tag, version);
+ if (!tagid)
+ goto end;
+ tagnum++;
+
+ if (tagnum == 1) {
+ /* the first tag must be BeginString */
+ if (tagid != FIX_TAG_BeginString)
+ goto end;
+
+ version = fix_version(value);
+ if (!isttest(version))
+ goto end;
+ }
+ else if (tagnum == 2) {
+ /* the second tag must be bodyLength */
+ if (tagid != FIX_TAG_BodyLength)
+ goto end;
+
+ p = istptr(value);
+ end = istend(value);
+ bodylen = read_uint(&p, end);
+
+ /* we did not consume all characters from <str> or no body, this is an error.
+ * There is at least the message type in the body.
+ */
+ if (p != end || !bodylen)
+ goto end;
+
+ body = istend(value) + 1;
+ }
+ else if (tagnum == 3) {
+ /* the third tag must be MsgType */
+ if (tagid != FIX_TAG_MsgType)
+ goto end;
+ }
+ else if (tagnum > 3 && tagid == FIX_TAG_CheckSum) {
+ /* CheckSum tag should be the last one and is not taken into account
+ * to compute the checksum itself and the body length. The value is
+ * a three-octet representation of the checksum decimal value.
+ */
+ if (bodylen != istptr(parser) - body)
+ goto end;
+
+ if (istlen(value) != 3)
+ goto end;
+ if (checksum != strl2ui(istptr(value), istlen(value)))
+ goto end;
+
+ /* End of the message, exit from the loop */
+ ret = FIX_VALID_MESSAGE;
+ goto end;
+ }
+
+ /* compute checksum of tag=value<delim> */
+ for (p = istptr(tag) ; p < istend(tag) ; ++p)
+ checksum += *p;
+ checksum += '=';
+ for (p = istptr(value) ; p < istend(value) ; ++p)
+ checksum += *p;
+ checksum += FIX_DELIMITER;
+
+ /* move the parser after the value and its delimiter */
+ parser = istadv(parser, istlen(tag) + istlen(value) + 2);
+ }
+
+ if (body) {
+ /* We start to read the body but we don't reached the checksum tag */
+ ret = FIX_NEED_MORE_DATA;
+ }
+
+ end:
+ return ret;
+}
+
+
+/*
+ * Iter on a FIX message <msg> and return the value of <tagid>.
+ *
+ * Returns the corresponding value if <tagid> is found. If <tagid> is not found
+ * because more data are required, the message with a length set to 0 is
+ * returned. If <tagid> is not found in the message or if the message is
+ * invalid, IST_NULL is returned.
+ *
+ * Note: Only simple sanity checks are performed on tags and values (not empty).
+ *
+ * the tag looks like
+ * <tagid>=<value>FIX_DELIMITER with <tag> and <value> not empty
+ */
+struct ist fix_tag_value(const struct ist msg, unsigned int tagid)
+{
+ struct ist parser, t, v;
+ unsigned int id;
+
+ parser = msg;
+ while (istlen(parser) > 0) {
+ v = iststop(istfind(parser, '='), FIX_DELIMITER);
+
+ /* delimiter not found, need more data */
+ if (istend(v) == istend(parser))
+ break;
+
+ /* empty tag or empty value, invalid */
+ if (istptr(parser) == istptr(v) || !istlen(v))
+ goto not_found_or_invalid;
+
+ t = ist2(istptr(parser), istptr(v) - istptr(parser));
+ v = istnext(v);
+
+ id = fix_check_id(t, IST_NULL);
+ if (!id)
+ goto not_found_or_invalid;
+ if (id == tagid) {
+ /* <tagId> found, return the corresponding value */
+ return v;
+ }
+
+ /* CheckSum tag is the last one, no <tagid> found */
+ if (id == FIX_TAG_CheckSum)
+ goto not_found_or_invalid;
+
+ parser = istadv(parser, istlen(t) + istlen(v) + 2);
+ }
+ /* not enough data to find <tagid> */
+ return ist2(istptr(msg), 0);
+
+ not_found_or_invalid:
+ return IST_NULL;
+}
diff --git a/src/flt_http_comp.c b/src/flt_http_comp.c
new file mode 100644
index 0000000..f2f04ee
--- /dev/null
+++ b/src/flt_http_comp.c
@@ -0,0 +1,872 @@
+/*
+ * Stream filters related variables and functions.
+ *
+ * Copyright (C) 2015 Qualys Inc., Christopher Faulet <cfaulet@qualys.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/compression.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/filters.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana-t.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/list.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/stream.h>
+#include <haproxy/tools.h>
+
+#define COMP_STATE_PROCESSING 0x01
+
+const char *http_comp_flt_id = "compression filter";
+
+struct flt_ops comp_ops;
+
+struct comp_state {
+ struct comp_ctx *comp_ctx; /* compression context */
+ struct comp_algo *comp_algo; /* compression algorithm if not NULL */
+ unsigned int flags; /* COMP_STATE_* */
+};
+
+/* Pools used to allocate comp_state structs */
+DECLARE_STATIC_POOL(pool_head_comp_state, "comp_state", sizeof(struct comp_state));
+
+static THREAD_LOCAL struct buffer tmpbuf;
+static THREAD_LOCAL struct buffer zbuf;
+
+static int select_compression_request_header(struct comp_state *st,
+ struct stream *s,
+ struct http_msg *msg);
+static int select_compression_response_header(struct comp_state *st,
+ struct stream *s,
+ struct http_msg *msg);
+static int set_compression_response_header(struct comp_state *st,
+ struct stream *s,
+ struct http_msg *msg);
+
+static int htx_compression_buffer_init(struct htx *htx, struct buffer *out);
+static int htx_compression_buffer_add_data(struct comp_state *st, const char *data, size_t len,
+ struct buffer *out);
+static int htx_compression_buffer_end(struct comp_state *st, struct buffer *out, int end);
+
+/***********************************************************************/
+static int
+comp_flt_init(struct proxy *px, struct flt_conf *fconf)
+{
+ fconf->flags |= FLT_CFG_FL_HTX;
+ return 0;
+}
+
+static int
+comp_flt_init_per_thread(struct proxy *px, struct flt_conf *fconf)
+{
+ if (b_alloc(&tmpbuf) == NULL)
+ return -1;
+ if (b_alloc(&zbuf) == NULL)
+ return -1;
+ return 0;
+}
+
+static void
+comp_flt_deinit_per_thread(struct proxy *px, struct flt_conf *fconf)
+{
+ if (tmpbuf.size)
+ b_free(&tmpbuf);
+ if (zbuf.size)
+ b_free(&zbuf);
+}
+
+static int
+comp_strm_init(struct stream *s, struct filter *filter)
+{
+ struct comp_state *st;
+
+ st = pool_alloc(pool_head_comp_state);
+ if (st == NULL)
+ return -1;
+
+ st->comp_algo = NULL;
+ st->comp_ctx = NULL;
+ st->flags = 0;
+ filter->ctx = st;
+
+ /* Register post-analyzer on AN_RES_WAIT_HTTP because we need to
+ * analyze response headers before http-response rules execution
+ * to be sure we can use res.comp and res.comp_algo sample
+ * fetches */
+ filter->post_analyzers |= AN_RES_WAIT_HTTP;
+ return 1;
+}
+
+static void
+comp_strm_deinit(struct stream *s, struct filter *filter)
+{
+ struct comp_state *st = filter->ctx;
+
+ if (!st)
+ return;
+
+ /* release any possible compression context */
+ if (st->comp_algo)
+ st->comp_algo->end(&st->comp_ctx);
+ pool_free(pool_head_comp_state, st);
+ filter->ctx = NULL;
+}
+
+static int
+comp_http_headers(struct stream *s, struct filter *filter, struct http_msg *msg)
+{
+ struct comp_state *st = filter->ctx;
+
+ if (!strm_fe(s)->comp && !s->be->comp)
+ goto end;
+
+ if (!(msg->chn->flags & CF_ISRESP))
+ select_compression_request_header(st, s, msg);
+ else {
+ /* Response headers have already been checked in
+ * comp_http_post_analyze callback. */
+ if (st->comp_algo) {
+ if (!set_compression_response_header(st, s, msg))
+ goto end;
+ register_data_filter(s, msg->chn, filter);
+ st->flags |= COMP_STATE_PROCESSING;
+ }
+ }
+
+ end:
+ return 1;
+}
+
+static int
+comp_http_post_analyze(struct stream *s, struct filter *filter,
+ struct channel *chn, unsigned an_bit)
+{
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->rsp;
+ struct comp_state *st = filter->ctx;
+
+ if (an_bit != AN_RES_WAIT_HTTP)
+ goto end;
+
+ if (!strm_fe(s)->comp && !s->be->comp)
+ goto end;
+
+ select_compression_response_header(st, s, msg);
+
+ end:
+ return 1;
+}
+
+static int
+comp_http_payload(struct stream *s, struct filter *filter, struct http_msg *msg,
+ unsigned int offset, unsigned int len)
+{
+ struct comp_state *st = filter->ctx;
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct htx_ret htxret = htx_find_offset(htx, offset);
+ struct htx_blk *blk, *next;
+ int ret, consumed = 0, to_forward = 0, last = 0;
+
+ blk = htxret.blk;
+ offset = htxret.ret;
+ for (next = NULL; blk && len; blk = next) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t sz = htx_get_blksz(blk);
+ struct ist v;
+
+ next = htx_get_next_blk(htx, blk);
+ while (next && htx_get_blk_type(next) == HTX_BLK_UNUSED)
+ next = htx_get_next_blk(htx, next);
+
+ if (!(st->flags & COMP_STATE_PROCESSING))
+ goto consume;
+
+ if (htx_compression_buffer_init(htx, &trash) < 0) {
+ msg->chn->flags |= CF_WAKE_WRITE;
+ goto end;
+ }
+
+ switch (type) {
+ case HTX_BLK_DATA:
+ /* it is the last data block */
+ last = ((!next && (htx->flags & HTX_FL_EOM)) || (next && htx_get_blk_type(next) != HTX_BLK_DATA));
+ v = htx_get_blk_value(htx, blk);
+ v = istadv(v, offset);
+ if (v.len > len) {
+ last = 0;
+ v.len = len;
+ }
+
+ ret = htx_compression_buffer_add_data(st, v.ptr, v.len, &trash);
+ if (ret < 0 || htx_compression_buffer_end(st, &trash, last) < 0)
+ goto error;
+ BUG_ON(v.len != ret);
+
+ if (ret == sz && !b_data(&trash))
+ next = htx_remove_blk(htx, blk);
+ else {
+ blk = htx_replace_blk_value(htx, blk, v, ist2(b_head(&trash), b_data(&trash)));
+ next = htx_get_next_blk(htx, blk);
+ }
+
+ len -= ret;
+ consumed += ret;
+ to_forward += b_data(&trash);
+ if (last)
+ st->flags &= ~COMP_STATE_PROCESSING;
+ break;
+
+ case HTX_BLK_TLR:
+ case HTX_BLK_EOT:
+ if (htx_compression_buffer_end(st, &trash, 1) < 0)
+ goto error;
+ if (b_data(&trash)) {
+ struct htx_blk *last = htx_add_last_data(htx, ist2(b_head(&trash), b_data(&trash)));
+ if (!last)
+ goto error;
+ blk = htx_get_next_blk(htx, last);
+ if (!blk)
+ goto error;
+ next = htx_get_next_blk(htx, blk);
+ to_forward += b_data(&trash);
+ }
+ st->flags &= ~COMP_STATE_PROCESSING;
+ /* fall through */
+
+ default:
+ consume:
+ sz -= offset;
+ if (sz > len)
+ sz = len;
+ consumed += sz;
+ to_forward += sz;
+ len -= sz;
+ break;
+ }
+
+ offset = 0;
+ }
+
+ end:
+ if (to_forward != consumed)
+ flt_update_offsets(filter, msg->chn, to_forward - consumed);
+
+ if (st->comp_ctx && st->comp_ctx->cur_lvl > 0) {
+ update_freq_ctr(&global.comp_bps_in, consumed);
+ _HA_ATOMIC_ADD(&strm_fe(s)->fe_counters.comp_in, consumed);
+ _HA_ATOMIC_ADD(&s->be->be_counters.comp_in, consumed);
+ update_freq_ctr(&global.comp_bps_out, to_forward);
+ _HA_ATOMIC_ADD(&strm_fe(s)->fe_counters.comp_out, to_forward);
+ _HA_ATOMIC_ADD(&s->be->be_counters.comp_out, to_forward);
+ } else {
+ _HA_ATOMIC_ADD(&strm_fe(s)->fe_counters.comp_byp, consumed);
+ _HA_ATOMIC_ADD(&s->be->be_counters.comp_byp, consumed);
+ }
+ return to_forward;
+
+ error:
+ return -1;
+}
+
+
+static int
+comp_http_end(struct stream *s, struct filter *filter,
+ struct http_msg *msg)
+{
+ struct comp_state *st = filter->ctx;
+
+ if (!(msg->chn->flags & CF_ISRESP) || !st || !st->comp_algo)
+ goto end;
+
+ if (strm_fe(s)->mode == PR_MODE_HTTP)
+ _HA_ATOMIC_INC(&strm_fe(s)->fe_counters.p.http.comp_rsp);
+ if ((s->flags & SF_BE_ASSIGNED) && (s->be->mode == PR_MODE_HTTP))
+ _HA_ATOMIC_INC(&s->be->be_counters.p.http.comp_rsp);
+ end:
+ return 1;
+}
+
+/***********************************************************************/
+static int
+set_compression_response_header(struct comp_state *st, struct stream *s, struct http_msg *msg)
+{
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct htx_sl *sl;
+ struct http_hdr_ctx ctx;
+
+ sl = http_get_stline(htx);
+ if (!sl)
+ goto error;
+
+ /* add "Transfer-Encoding: chunked" header */
+ if (!(msg->flags & HTTP_MSGF_TE_CHNK)) {
+ if (!http_add_header(htx, ist("Transfer-Encoding"), ist("chunked")))
+ goto error;
+ msg->flags |= HTTP_MSGF_TE_CHNK;
+ sl->flags |= (HTX_SL_F_XFER_ENC|HTX_SL_F_CHNK);
+ }
+
+ /* remove Content-Length header */
+ if (msg->flags & HTTP_MSGF_CNT_LEN) {
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("Content-Length"), &ctx, 1))
+ http_remove_header(htx, &ctx);
+ msg->flags &= ~HTTP_MSGF_CNT_LEN;
+ sl->flags &= ~HTX_SL_F_CLEN;
+ }
+
+ /* convert "ETag" header to a weak ETag */
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("ETag"), &ctx, 1)) {
+ if (ctx.value.ptr[0] == '"') {
+ /* This a strong ETag. Convert it to a weak one. */
+ struct ist v = ist2(trash.area, 0);
+ if (istcat(&v, ist("W/"), trash.size) == -1 || istcat(&v, ctx.value, trash.size) == -1)
+ goto error;
+
+ if (!http_replace_header_value(htx, &ctx, v))
+ goto error;
+ }
+ }
+
+ if (!http_add_header(htx, ist("Vary"), ist("Accept-Encoding")))
+ goto error;
+
+ /*
+ * Add Content-Encoding header when it's not identity encoding.
+ * RFC 2616 : Identity encoding: This content-coding is used only in the
+ * Accept-Encoding header, and SHOULD NOT be used in the Content-Encoding
+ * header.
+ */
+ if (st->comp_algo->cfg_name_len != 8 || memcmp(st->comp_algo->cfg_name, "identity", 8) != 0) {
+ struct ist v = ist2(st->comp_algo->ua_name, st->comp_algo->ua_name_len);
+
+ if (!http_add_header(htx, ist("Content-Encoding"), v))
+ goto error;
+ }
+
+ return 1;
+
+ error:
+ st->comp_algo->end(&st->comp_ctx);
+ st->comp_algo = NULL;
+ return 0;
+}
+
+/*
+ * Selects a compression algorithm depending on the client request.
+ */
+static int
+select_compression_request_header(struct comp_state *st, struct stream *s, struct http_msg *msg)
+{
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct http_hdr_ctx ctx;
+ struct comp_algo *comp_algo = NULL;
+ struct comp_algo *comp_algo_back = NULL;
+
+ /* Disable compression for older user agents announcing themselves as "Mozilla/4"
+ * unless they are known good (MSIE 6 with XP SP2, or MSIE 7 and later).
+ * See http://zoompf.com/2012/02/lose-the-wait-http-compression for more details.
+ */
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("User-Agent"), &ctx, 1) &&
+ ctx.value.len >= 9 &&
+ memcmp(ctx.value.ptr, "Mozilla/4", 9) == 0 &&
+ (ctx.value.len < 31 ||
+ memcmp(ctx.value.ptr + 25, "MSIE ", 5) != 0 ||
+ *(ctx.value.ptr + 30) < '6' ||
+ (*(ctx.value.ptr + 30) == '6' &&
+ (ctx.value.len < 54 || memcmp(ctx.value.ptr + 51, "SV1", 3) != 0)))) {
+ st->comp_algo = NULL;
+ return 0;
+ }
+
+ /* search for the algo in the backend in priority or the frontend */
+ if ((s->be->comp && (comp_algo_back = s->be->comp->algos)) ||
+ (strm_fe(s)->comp && (comp_algo_back = strm_fe(s)->comp->algos))) {
+ int best_q = 0;
+
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("Accept-Encoding"), &ctx, 0)) {
+ const char *qval;
+ int q;
+ int toklen;
+
+ /* try to isolate the token from the optional q-value */
+ toklen = 0;
+ while (toklen < ctx.value.len && HTTP_IS_TOKEN(*(ctx.value.ptr + toklen)))
+ toklen++;
+
+ qval = ctx.value.ptr + toklen;
+ while (1) {
+ while (qval < istend(ctx.value) && HTTP_IS_LWS(*qval))
+ qval++;
+
+ if (qval >= istend(ctx.value) || *qval != ';') {
+ qval = NULL;
+ break;
+ }
+ qval++;
+
+ while (qval < istend(ctx.value) && HTTP_IS_LWS(*qval))
+ qval++;
+
+ if (qval >= istend(ctx.value)) {
+ qval = NULL;
+ break;
+ }
+ if (strncmp(qval, "q=", MIN(istend(ctx.value) - qval, 2)) == 0)
+ break;
+
+ while (qval < istend(ctx.value) && *qval != ';')
+ qval++;
+ }
+
+ /* here we have qval pointing to the first "q=" attribute or NULL if not found */
+ q = qval ? http_parse_qvalue(qval + 2, NULL) : 1000;
+
+ if (q <= best_q)
+ continue;
+
+ for (comp_algo = comp_algo_back; comp_algo; comp_algo = comp_algo->next) {
+ if (*(ctx.value.ptr) == '*' ||
+ word_match(ctx.value.ptr, toklen, comp_algo->ua_name, comp_algo->ua_name_len)) {
+ st->comp_algo = comp_algo;
+ best_q = q;
+ break;
+ }
+ }
+ }
+ }
+
+ /* remove all occurrences of the header when "compression offload" is set */
+ if (st->comp_algo) {
+ if ((s->be->comp && s->be->comp->offload) ||
+ (strm_fe(s)->comp && strm_fe(s)->comp->offload)) {
+ http_remove_header(htx, &ctx);
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("Accept-Encoding"), &ctx, 1))
+ http_remove_header(htx, &ctx);
+ }
+ return 1;
+ }
+
+ /* identity is implicit does not require headers */
+ if ((s->be->comp && (comp_algo_back = s->be->comp->algos)) ||
+ (strm_fe(s)->comp && (comp_algo_back = strm_fe(s)->comp->algos))) {
+ for (comp_algo = comp_algo_back; comp_algo; comp_algo = comp_algo->next) {
+ if (comp_algo->cfg_name_len == 8 && memcmp(comp_algo->cfg_name, "identity", 8) == 0) {
+ st->comp_algo = comp_algo;
+ return 1;
+ }
+ }
+ }
+
+ st->comp_algo = NULL;
+ return 0;
+}
+
+/*
+ * Selects a compression algorithm depending of the server response.
+ */
+static int
+select_compression_response_header(struct comp_state *st, struct stream *s, struct http_msg *msg)
+{
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct http_txn *txn = s->txn;
+ struct http_hdr_ctx ctx;
+ struct comp_type *comp_type;
+
+ /* no common compression algorithm was found in request header */
+ if (st->comp_algo == NULL)
+ goto fail;
+
+ /* compression already in progress */
+ if (msg->flags & HTTP_MSGF_COMPRESSING)
+ goto fail;
+
+ /* HTTP < 1.1 should not be compressed */
+ if (!(msg->flags & HTTP_MSGF_VER_11) || !(txn->req.flags & HTTP_MSGF_VER_11))
+ goto fail;
+
+ if (txn->meth == HTTP_METH_HEAD)
+ goto fail;
+
+ /* compress 200,201,202,203 responses only */
+ if ((txn->status != 200) &&
+ (txn->status != 201) &&
+ (txn->status != 202) &&
+ (txn->status != 203))
+ goto fail;
+
+ if (!(msg->flags & HTTP_MSGF_XFER_LEN) || msg->flags & HTTP_MSGF_BODYLESS)
+ goto fail;
+
+ /* content is already compressed */
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("Content-Encoding"), &ctx, 1))
+ goto fail;
+
+ /* no compression when Cache-Control: no-transform is present in the message */
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("Cache-Control"), &ctx, 0)) {
+ if (word_match(ctx.value.ptr, ctx.value.len, "no-transform", 12))
+ goto fail;
+ }
+
+ /* no compression when ETag is malformed */
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("ETag"), &ctx, 1)) {
+ if (http_get_etag_type(ctx.value) == ETAG_INVALID)
+ goto fail;
+ }
+ /* no compression when multiple ETags are present
+ * Note: Do not reset ctx.blk!
+ */
+ if (http_find_header(htx, ist("ETag"), &ctx, 1))
+ goto fail;
+
+ comp_type = NULL;
+
+ /* we don't want to compress multipart content-types, nor content-types that are
+ * not listed in the "compression type" directive if any. If no content-type was
+ * found but configuration requires one, we don't compress either. Backend has
+ * the priority.
+ */
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("Content-Type"), &ctx, 1)) {
+ if (ctx.value.len >= 9 && strncasecmp("multipart", ctx.value.ptr, 9) == 0)
+ goto fail;
+
+ if ((s->be->comp && (comp_type = s->be->comp->types)) ||
+ (strm_fe(s)->comp && (comp_type = strm_fe(s)->comp->types))) {
+ for (; comp_type; comp_type = comp_type->next) {
+ if (ctx.value.len >= comp_type->name_len &&
+ strncasecmp(ctx.value.ptr, comp_type->name, comp_type->name_len) == 0)
+ /* this Content-Type should be compressed */
+ break;
+ }
+ /* this Content-Type should not be compressed */
+ if (comp_type == NULL)
+ goto fail;
+ }
+ }
+ else { /* no content-type header */
+ if ((s->be->comp && s->be->comp->types) ||
+ (strm_fe(s)->comp && strm_fe(s)->comp->types))
+ goto fail; /* a content-type was required */
+ }
+
+ /* limit compression rate */
+ if (global.comp_rate_lim > 0)
+ if (read_freq_ctr(&global.comp_bps_in) > global.comp_rate_lim)
+ goto fail;
+
+ /* limit cpu usage */
+ if (th_ctx->idle_pct < compress_min_idle)
+ goto fail;
+
+ /* initialize compression */
+ if (st->comp_algo->init(&st->comp_ctx, global.tune.comp_maxlevel) < 0)
+ goto fail;
+ msg->flags |= HTTP_MSGF_COMPRESSING;
+ return 1;
+
+ fail:
+ st->comp_algo = NULL;
+ return 0;
+}
+
+/***********************************************************************/
+static int
+htx_compression_buffer_init(struct htx *htx, struct buffer *out)
+{
+ /* output stream requires at least 10 bytes for the gzip header, plus
+ * at least 8 bytes for the gzip trailer (crc+len), plus a possible
+ * plus at most 5 bytes per 32kB block and 2 bytes to close the stream.
+ */
+ if (htx_free_space(htx) < 20 + 5 * ((htx->data + 32767) >> 15))
+ return -1;
+ b_reset(out);
+ return 0;
+}
+
+static int
+htx_compression_buffer_add_data(struct comp_state *st, const char *data, size_t len,
+ struct buffer *out)
+{
+ return st->comp_algo->add_data(st->comp_ctx, data, len, out);
+}
+
+static int
+htx_compression_buffer_end(struct comp_state *st, struct buffer *out, int end)
+{
+ if (end)
+ return st->comp_algo->finish(st->comp_ctx, out);
+ else
+ return st->comp_algo->flush(st->comp_ctx, out);
+}
+
+
+/***********************************************************************/
+struct flt_ops comp_ops = {
+ .init = comp_flt_init,
+ .init_per_thread = comp_flt_init_per_thread,
+ .deinit_per_thread = comp_flt_deinit_per_thread,
+
+ .attach = comp_strm_init,
+ .detach = comp_strm_deinit,
+
+ .channel_post_analyze = comp_http_post_analyze,
+
+ .http_headers = comp_http_headers,
+ .http_payload = comp_http_payload,
+ .http_end = comp_http_end,
+};
+
+static int
+parse_compression_options(char **args, int section, struct proxy *proxy,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ struct comp *comp;
+ int ret = 0;
+
+ if (proxy->comp == NULL) {
+ comp = calloc(1, sizeof(*comp));
+ proxy->comp = comp;
+ }
+ else
+ comp = proxy->comp;
+
+ if (strcmp(args[1], "algo") == 0) {
+ struct comp_ctx *ctx;
+ int cur_arg = 2;
+
+ if (!*args[cur_arg]) {
+ memprintf(err, "parsing [%s:%d] : '%s' expects <algorithm>.",
+ file, line, args[0]);
+ ret = -1;
+ goto end;
+ }
+ while (*(args[cur_arg])) {
+ int retval = comp_append_algo(comp, args[cur_arg]);
+ if (retval) {
+ if (retval < 0)
+ memprintf(err, "'%s' : '%s' is not a supported algorithm.",
+ args[0], args[cur_arg]);
+ else
+ memprintf(err, "'%s' : out of memory while parsing algo '%s'.",
+ args[0], args[cur_arg]);
+ ret = -1;
+ goto end;
+ }
+
+ if (proxy->comp->algos->init(&ctx, 9) == 0)
+ proxy->comp->algos->end(&ctx);
+ else {
+ memprintf(err, "'%s' : Can't init '%s' algorithm.",
+ args[0], args[cur_arg]);
+ ret = -1;
+ goto end;
+ }
+ cur_arg++;
+ continue;
+ }
+ }
+ else if (strcmp(args[1], "offload") == 0) {
+ if (proxy->cap & PR_CAP_DEF) {
+ memprintf(err, "'%s' : '%s' ignored in 'defaults' section.",
+ args[0], args[1]);
+ ret = 1;
+ }
+ comp->offload = 1;
+ }
+ else if (strcmp(args[1], "type") == 0) {
+ int cur_arg = 2;
+
+ if (!*args[cur_arg]) {
+ memprintf(err, "'%s' expects <type>.", args[0]);
+ ret = -1;
+ goto end;
+ }
+ while (*(args[cur_arg])) {
+ if (comp_append_type(comp, args[cur_arg])) {
+ memprintf(err, "'%s': out of memory.", args[0]);
+ ret = -1;
+ goto end;
+ }
+ cur_arg++;
+ continue;
+ }
+ }
+ else {
+ memprintf(err, "'%s' expects 'algo', 'type' or 'offload'",
+ args[0]);
+ ret = -1;
+ goto end;
+ }
+
+ end:
+ return ret;
+}
+
+static int
+parse_http_comp_flt(char **args, int *cur_arg, struct proxy *px,
+ struct flt_conf *fconf, char **err, void *private)
+{
+ struct flt_conf *fc, *back;
+
+ list_for_each_entry_safe(fc, back, &px->filter_configs, list) {
+ if (fc->id == http_comp_flt_id) {
+ memprintf(err, "%s: Proxy supports only one compression filter\n", px->id);
+ return -1;
+ }
+ }
+
+ fconf->id = http_comp_flt_id;
+ fconf->conf = NULL;
+ fconf->ops = &comp_ops;
+ (*cur_arg)++;
+
+ return 0;
+}
+
+
+int
+check_implicit_http_comp_flt(struct proxy *proxy)
+{
+ struct flt_conf *fconf;
+ int explicit = 0;
+ int comp = 0;
+ int err = 0;
+
+ if (proxy->comp == NULL)
+ goto end;
+ if (!LIST_ISEMPTY(&proxy->filter_configs)) {
+ list_for_each_entry(fconf, &proxy->filter_configs, list) {
+ if (fconf->id == http_comp_flt_id)
+ comp = 1;
+ else if (fconf->id == cache_store_flt_id) {
+ if (comp) {
+ ha_alert("config: %s '%s': unable to enable the compression filter "
+ "before any cache filter.\n",
+ proxy_type_str(proxy), proxy->id);
+ err++;
+ goto end;
+ }
+ }
+ else if (fconf->id == fcgi_flt_id)
+ continue;
+ else
+ explicit = 1;
+ }
+ }
+ if (comp)
+ goto end;
+ else if (explicit) {
+ ha_alert("config: %s '%s': require an explicit filter declaration to use "
+ "HTTP compression\n", proxy_type_str(proxy), proxy->id);
+ err++;
+ goto end;
+ }
+
+ /* Implicit declaration of the compression filter is always the last
+ * one */
+ fconf = calloc(1, sizeof(*fconf));
+ if (!fconf) {
+ ha_alert("config: %s '%s': out of memory\n",
+ proxy_type_str(proxy), proxy->id);
+ err++;
+ goto end;
+ }
+ fconf->id = http_comp_flt_id;
+ fconf->conf = NULL;
+ fconf->ops = &comp_ops;
+ LIST_APPEND(&proxy->filter_configs, &fconf->list);
+ end:
+ return err;
+}
+
+/*
+ * boolean, returns true if compression is used (either gzip or deflate) in the
+ * response.
+ */
+static int
+smp_fetch_res_comp(const struct arg *args, struct sample *smp, const char *kw,
+ void *private)
+{
+ struct http_txn *txn = smp->strm ? smp->strm->txn : NULL;
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = (txn && (txn->rsp.flags & HTTP_MSGF_COMPRESSING));
+ return 1;
+}
+
+/*
+ * string, returns algo
+ */
+static int
+smp_fetch_res_comp_algo(const struct arg *args, struct sample *smp,
+ const char *kw, void *private)
+{
+ struct http_txn *txn = smp->strm ? smp->strm->txn : NULL;
+ struct filter *filter;
+ struct comp_state *st;
+
+ if (!txn || !(txn->rsp.flags & HTTP_MSGF_COMPRESSING))
+ return 0;
+
+ list_for_each_entry(filter, &strm_flt(smp->strm)->filters, list) {
+ if (FLT_ID(filter) != http_comp_flt_id)
+ continue;
+
+ if (!(st = filter->ctx))
+ break;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.area = st->comp_algo->cfg_name;
+ smp->data.u.str.data = st->comp_algo->cfg_name_len;
+ return 1;
+ }
+ return 0;
+}
+
+/* Declare the config parser for "compression" keyword */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_LISTEN, "compression", parse_compression_options },
+ { 0, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/* Declare the filter parser for "compression" keyword */
+static struct flt_kw_list filter_kws = { "COMP", { }, {
+ { "compression", parse_http_comp_flt, NULL },
+ { NULL, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, flt_register_keywords, &filter_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "res.comp", smp_fetch_res_comp, 0, NULL, SMP_T_BOOL, SMP_USE_HRSHP },
+ { "res.comp_algo", smp_fetch_res_comp_algo, 0, NULL, SMP_T_STR, SMP_USE_HRSHP },
+ { /* END */ },
+ }
+};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
diff --git a/src/flt_spoe.c b/src/flt_spoe.c
new file mode 100644
index 0000000..1c511ae
--- /dev/null
+++ b/src/flt_spoe.c
@@ -0,0 +1,4785 @@
+/*
+ * Stream processing offload engine management.
+ *
+ * Copyright 2016 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+#include <ctype.h>
+#include <errno.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/applet.h>
+#include <haproxy/action-t.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/filters.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/log.h>
+#include <haproxy/pool.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/session.h>
+#include <haproxy/signal.h>
+#include <haproxy/sink.h>
+#include <haproxy/spoe.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/thread.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/vars.h>
+
+
+#if defined(DEBUG_SPOE) || defined(DEBUG_FULL)
+#define SPOE_PRINTF(x...) fprintf(x)
+#define SPOE_DEBUG_STMT(statement) statement
+#else
+#define SPOE_PRINTF(x...)
+#define SPOE_DEBUG_STMT(statement)
+#endif
+
+/* Reserved 4 bytes to the frame size. So a frame and its size can be written
+ * together in a buffer */
+#define MAX_FRAME_SIZE global.tune.bufsize - 4
+
+/* The minimum size for a frame */
+#define MIN_FRAME_SIZE 256
+
+/* Reserved for the metadata and the frame type.
+ * So <MAX_FRAME_SIZE> - <FRAME_HDR_SIZE> is the maximum payload size */
+#define FRAME_HDR_SIZE 32
+
+/* Helper to get SPOE ctx inside an appctx */
+#define SPOE_APPCTX(appctx) ((struct spoe_appctx *)((appctx)->svcctx))
+
+/* SPOE filter id. Used to identify SPOE filters */
+const char *spoe_filter_id = "SPOE filter";
+
+/* Set if the handle on SIGUSR1 is registered */
+static int sighandler_registered = 0;
+
+/* proxy used during the parsing */
+struct proxy *curproxy = NULL;
+
+/* The name of the SPOE engine, used during the parsing */
+char *curengine = NULL;
+
+/* SPOE agent used during the parsing */
+/* SPOE agent/group/message used during the parsing */
+struct spoe_agent *curagent = NULL;
+struct spoe_group *curgrp = NULL;
+struct spoe_message *curmsg = NULL;
+
+/* list of SPOE messages and placeholders used during the parsing */
+struct list curmsgs;
+struct list curgrps;
+struct list curmphs;
+struct list curgphs;
+struct list curvars;
+
+/* list of log servers used during the parsing */
+struct list curlogsrvs;
+
+/* agent's proxy flags (PR_O_* and PR_O2_*) used during parsing */
+int curpxopts;
+int curpxopts2;
+
+/* Pools used to allocate SPOE structs */
+DECLARE_STATIC_POOL(pool_head_spoe_ctx, "spoe_ctx", sizeof(struct spoe_context));
+DECLARE_STATIC_POOL(pool_head_spoe_appctx, "spoe_appctx", sizeof(struct spoe_appctx));
+
+struct flt_ops spoe_ops;
+
+static int spoe_queue_context(struct spoe_context *ctx);
+static int spoe_acquire_buffer(struct buffer *buf, struct buffer_wait *buffer_wait);
+static void spoe_release_buffer(struct buffer *buf, struct buffer_wait *buffer_wait);
+static struct appctx *spoe_create_appctx(struct spoe_config *conf);
+
+/********************************************************************
+ * helper functions/globals
+ ********************************************************************/
+static void
+spoe_release_placeholder(struct spoe_placeholder *ph)
+{
+ if (!ph)
+ return;
+ free(ph->id);
+ free(ph);
+}
+
+static void
+spoe_release_message(struct spoe_message *msg)
+{
+ struct spoe_arg *arg, *argback;
+ struct acl *acl, *aclback;
+
+ if (!msg)
+ return;
+ free(msg->id);
+ free(msg->conf.file);
+ list_for_each_entry_safe(arg, argback, &msg->args, list) {
+ release_sample_expr(arg->expr);
+ free(arg->name);
+ LIST_DELETE(&arg->list);
+ free(arg);
+ }
+ list_for_each_entry_safe(acl, aclback, &msg->acls, list) {
+ LIST_DELETE(&acl->list);
+ prune_acl(acl);
+ free(acl);
+ }
+ if (msg->cond) {
+ prune_acl_cond(msg->cond);
+ free(msg->cond);
+ }
+ free(msg);
+}
+
+static void
+spoe_release_group(struct spoe_group *grp)
+{
+ if (!grp)
+ return;
+ free(grp->id);
+ free(grp->conf.file);
+ free(grp);
+}
+
+static void
+spoe_release_agent(struct spoe_agent *agent)
+{
+ struct spoe_message *msg, *msgback;
+ struct spoe_group *grp, *grpback;
+ int i;
+
+ if (!agent)
+ return;
+ free(agent->id);
+ free(agent->conf.file);
+ free(agent->var_pfx);
+ free(agent->var_on_error);
+ free(agent->var_t_process);
+ free(agent->var_t_total);
+ list_for_each_entry_safe(msg, msgback, &agent->messages, list) {
+ LIST_DELETE(&msg->list);
+ spoe_release_message(msg);
+ }
+ list_for_each_entry_safe(grp, grpback, &agent->groups, list) {
+ LIST_DELETE(&grp->list);
+ spoe_release_group(grp);
+ }
+ if (agent->rt) {
+ for (i = 0; i < global.nbthread; ++i) {
+ free(agent->rt[i].engine_id);
+ HA_SPIN_DESTROY(&agent->rt[i].lock);
+ }
+ }
+ free(agent->rt);
+ free(agent);
+}
+
+static const char *spoe_frm_err_reasons[SPOE_FRM_ERRS] = {
+ [SPOE_FRM_ERR_NONE] = "normal",
+ [SPOE_FRM_ERR_IO] = "I/O error",
+ [SPOE_FRM_ERR_TOUT] = "a timeout occurred",
+ [SPOE_FRM_ERR_TOO_BIG] = "frame is too big",
+ [SPOE_FRM_ERR_INVALID] = "invalid frame received",
+ [SPOE_FRM_ERR_NO_VSN] = "version value not found",
+ [SPOE_FRM_ERR_NO_FRAME_SIZE] = "max-frame-size value not found",
+ [SPOE_FRM_ERR_NO_CAP] = "capabilities value not found",
+ [SPOE_FRM_ERR_BAD_VSN] = "unsupported version",
+ [SPOE_FRM_ERR_BAD_FRAME_SIZE] = "max-frame-size too big or too small",
+ [SPOE_FRM_ERR_FRAG_NOT_SUPPORTED] = "fragmentation not supported",
+ [SPOE_FRM_ERR_INTERLACED_FRAMES] = "invalid interlaced frames",
+ [SPOE_FRM_ERR_FRAMEID_NOTFOUND] = "frame-id not found",
+ [SPOE_FRM_ERR_RES] = "resource allocation error",
+ [SPOE_FRM_ERR_UNKNOWN] = "an unknown error occurred",
+};
+
+static const char *spoe_event_str[SPOE_EV_EVENTS] = {
+ [SPOE_EV_ON_CLIENT_SESS] = "on-client-session",
+ [SPOE_EV_ON_TCP_REQ_FE] = "on-frontend-tcp-request",
+ [SPOE_EV_ON_TCP_REQ_BE] = "on-backend-tcp-request",
+ [SPOE_EV_ON_HTTP_REQ_FE] = "on-frontend-http-request",
+ [SPOE_EV_ON_HTTP_REQ_BE] = "on-backend-http-request",
+
+ [SPOE_EV_ON_SERVER_SESS] = "on-server-session",
+ [SPOE_EV_ON_TCP_RSP] = "on-tcp-response",
+ [SPOE_EV_ON_HTTP_RSP] = "on-http-response",
+};
+
+
+#if defined(DEBUG_SPOE) || defined(DEBUG_FULL)
+
+static const char *spoe_ctx_state_str[SPOE_CTX_ST_ERROR+1] = {
+ [SPOE_CTX_ST_NONE] = "NONE",
+ [SPOE_CTX_ST_READY] = "READY",
+ [SPOE_CTX_ST_ENCODING_MSGS] = "ENCODING_MSGS",
+ [SPOE_CTX_ST_SENDING_MSGS] = "SENDING_MSGS",
+ [SPOE_CTX_ST_WAITING_ACK] = "WAITING_ACK",
+ [SPOE_CTX_ST_DONE] = "DONE",
+ [SPOE_CTX_ST_ERROR] = "ERROR",
+};
+
+static const char *spoe_appctx_state_str[SPOE_APPCTX_ST_END+1] = {
+ [SPOE_APPCTX_ST_CONNECT] = "CONNECT",
+ [SPOE_APPCTX_ST_CONNECTING] = "CONNECTING",
+ [SPOE_APPCTX_ST_IDLE] = "IDLE",
+ [SPOE_APPCTX_ST_PROCESSING] = "PROCESSING",
+ [SPOE_APPCTX_ST_SENDING_FRAG_NOTIFY] = "SENDING_FRAG_NOTIFY",
+ [SPOE_APPCTX_ST_WAITING_SYNC_ACK] = "WAITING_SYNC_ACK",
+ [SPOE_APPCTX_ST_DISCONNECT] = "DISCONNECT",
+ [SPOE_APPCTX_ST_DISCONNECTING] = "DISCONNECTING",
+ [SPOE_APPCTX_ST_EXIT] = "EXIT",
+ [SPOE_APPCTX_ST_END] = "END",
+};
+
+#endif
+
+/* Used to generates a unique id for an engine. On success, it returns a
+ * allocated string. So it is the caller's responsibility to release it. If the
+ * allocation failed, it returns NULL. */
+static char *
+generate_pseudo_uuid()
+{
+ ha_generate_uuid(&trash);
+ return my_strndup(trash.area, trash.data);
+}
+
+
+static inline void
+spoe_update_stat_time(struct timeval *tv, long *t)
+{
+ if (*t == -1)
+ *t = tv_ms_elapsed(tv, &now);
+ else
+ *t += tv_ms_elapsed(tv, &now);
+ tv_zero(tv);
+}
+
+/********************************************************************
+ * Functions that encode/decode SPOE frames
+ ********************************************************************/
+/* Helper to get static string length, excluding the terminating null byte */
+#define SLEN(str) (sizeof(str)-1)
+
+/* Predefined key used in HELLO/DISCONNECT frames */
+#define SUPPORTED_VERSIONS_KEY "supported-versions"
+#define VERSION_KEY "version"
+#define MAX_FRAME_SIZE_KEY "max-frame-size"
+#define CAPABILITIES_KEY "capabilities"
+#define ENGINE_ID_KEY "engine-id"
+#define HEALTHCHECK_KEY "healthcheck"
+#define STATUS_CODE_KEY "status-code"
+#define MSG_KEY "message"
+
+struct spoe_version {
+ char *str;
+ int min;
+ int max;
+};
+
+/* All supported versions */
+static struct spoe_version supported_versions[] = {
+ /* 1.0 is now unsupported because of a bug about frame's flags*/
+ {"2.0", 2000, 2000},
+ {NULL, 0, 0}
+};
+
+/* Comma-separated list of supported versions */
+#define SUPPORTED_VERSIONS_VAL "2.0"
+
+/* Convert a string to a SPOE version value. The string must follow the format
+ * "MAJOR.MINOR". It will be concerted into the integer (1000 * MAJOR + MINOR).
+ * If an error occurred, -1 is returned. */
+static int
+spoe_str_to_vsn(const char *str, size_t len)
+{
+ const char *p, *end;
+ int maj, min, vsn;
+
+ p = str;
+ end = str+len;
+ maj = min = 0;
+ vsn = -1;
+
+ /* skip leading spaces */
+ while (p < end && isspace((unsigned char)*p))
+ p++;
+
+ /* parse Major number, until the '.' */
+ while (*p != '.') {
+ if (p >= end || *p < '0' || *p > '9')
+ goto out;
+ maj *= 10;
+ maj += (*p - '0');
+ p++;
+ }
+
+ /* check Major version */
+ if (!maj)
+ goto out;
+
+ p++; /* skip the '.' */
+ if (p >= end || *p < '0' || *p > '9') /* Minor number is missing */
+ goto out;
+
+ /* Parse Minor number */
+ while (p < end) {
+ if (*p < '0' || *p > '9')
+ break;
+ min *= 10;
+ min += (*p - '0');
+ p++;
+ }
+
+ /* check Minor number */
+ if (min > 999)
+ goto out;
+
+ /* skip trailing spaces */
+ while (p < end && isspace((unsigned char)*p))
+ p++;
+ if (p != end)
+ goto out;
+
+ vsn = maj * 1000 + min;
+ out:
+ return vsn;
+}
+
+/* Encode the HELLO frame sent by HAProxy to an agent. It returns the number of
+ * encoded bytes in the frame on success, 0 if an encoding error occurred and -1
+ * if a fatal error occurred. */
+static int
+spoe_prepare_hahello_frame(struct appctx *appctx, char *frame, size_t size)
+{
+ struct buffer *chk;
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ char *p, *end;
+ unsigned int flags = SPOE_FRM_FL_FIN;
+ size_t sz;
+
+ p = frame;
+ end = frame+size;
+
+ /* Set Frame type */
+ *p++ = SPOE_FRM_T_HAPROXY_HELLO;
+
+ /* Set flags */
+ flags = htonl(flags);
+ memcpy(p, (char *)&flags, 4);
+ p += 4;
+
+ /* No stream-id and frame-id for HELLO frames */
+ *p++ = 0; *p++ = 0;
+
+ /* There are 3 mandatory items: "supported-versions", "max-frame-size"
+ * and "capabilities" */
+
+ /* "supported-versions" K/V item */
+ sz = SLEN(SUPPORTED_VERSIONS_KEY);
+ if (spoe_encode_buffer(SUPPORTED_VERSIONS_KEY, sz, &p, end) == -1)
+ goto too_big;
+
+ *p++ = SPOE_DATA_T_STR;
+ sz = SLEN(SUPPORTED_VERSIONS_VAL);
+ if (spoe_encode_buffer(SUPPORTED_VERSIONS_VAL, sz, &p, end) == -1)
+ goto too_big;
+
+ /* "max-fram-size" K/V item */
+ sz = SLEN(MAX_FRAME_SIZE_KEY);
+ if (spoe_encode_buffer(MAX_FRAME_SIZE_KEY, sz, &p, end) == -1)
+ goto too_big;
+
+ *p++ = SPOE_DATA_T_UINT32;
+ if (encode_varint(SPOE_APPCTX(appctx)->max_frame_size, &p, end) == -1)
+ goto too_big;
+
+ /* "capabilities" K/V item */
+ sz = SLEN(CAPABILITIES_KEY);
+ if (spoe_encode_buffer(CAPABILITIES_KEY, sz, &p, end) == -1)
+ goto too_big;
+
+ *p++ = SPOE_DATA_T_STR;
+ chk = get_trash_chunk();
+ if (agent != NULL && (agent->flags & SPOE_FL_PIPELINING)) {
+ memcpy(chk->area, "pipelining", 10);
+ chk->data += 10;
+ }
+ if (agent != NULL && (agent->flags & SPOE_FL_ASYNC)) {
+ if (chk->data) chk->area[chk->data++] = ',';
+ memcpy(chk->area+chk->data, "async", 5);
+ chk->data += 5;
+ }
+ if (agent != NULL && (agent->flags & SPOE_FL_RCV_FRAGMENTATION)) {
+ if (chk->data) chk->area[chk->data++] = ',';
+ memcpy(chk->area+chk->data, "fragmentation", 13);
+ chk->data += 13;
+ }
+ if (spoe_encode_buffer(chk->area, chk->data, &p, end) == -1)
+ goto too_big;
+
+ /* (optional) "engine-id" K/V item, if present */
+ if (agent != NULL && agent->rt[tid].engine_id != NULL) {
+ sz = SLEN(ENGINE_ID_KEY);
+ if (spoe_encode_buffer(ENGINE_ID_KEY, sz, &p, end) == -1)
+ goto too_big;
+
+ *p++ = SPOE_DATA_T_STR;
+ sz = strlen(agent->rt[tid].engine_id);
+ if (spoe_encode_buffer(agent->rt[tid].engine_id, sz, &p, end) == -1)
+ goto too_big;
+ }
+
+ return (p - frame);
+
+ too_big:
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOO_BIG;
+ return 0;
+}
+
+/* Encode DISCONNECT frame sent by HAProxy to an agent. It returns the number of
+ * encoded bytes in the frame on success, 0 if an encoding error occurred and -1
+ * if a fatal error occurred. */
+static int
+spoe_prepare_hadiscon_frame(struct appctx *appctx, char *frame, size_t size)
+{
+ const char *reason;
+ char *p, *end;
+ unsigned int flags = SPOE_FRM_FL_FIN;
+ size_t sz;
+
+ p = frame;
+ end = frame+size;
+
+ /* Set Frame type */
+ *p++ = SPOE_FRM_T_HAPROXY_DISCON;
+
+ /* Set flags */
+ flags = htonl(flags);
+ memcpy(p, (char *)&flags, 4);
+ p += 4;
+
+ /* No stream-id and frame-id for DISCONNECT frames */
+ *p++ = 0; *p++ = 0;
+
+ if (SPOE_APPCTX(appctx)->status_code >= SPOE_FRM_ERRS)
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_UNKNOWN;
+
+ /* There are 2 mandatory items: "status-code" and "message" */
+
+ /* "status-code" K/V item */
+ sz = SLEN(STATUS_CODE_KEY);
+ if (spoe_encode_buffer(STATUS_CODE_KEY, sz, &p, end) == -1)
+ goto too_big;
+
+ *p++ = SPOE_DATA_T_UINT32;
+ if (encode_varint(SPOE_APPCTX(appctx)->status_code, &p, end) == -1)
+ goto too_big;
+
+ /* "message" K/V item */
+ sz = SLEN(MSG_KEY);
+ if (spoe_encode_buffer(MSG_KEY, sz, &p, end) == -1)
+ goto too_big;
+
+ /*Get the message corresponding to the status code */
+ reason = spoe_frm_err_reasons[SPOE_APPCTX(appctx)->status_code];
+
+ *p++ = SPOE_DATA_T_STR;
+ sz = strlen(reason);
+ if (spoe_encode_buffer(reason, sz, &p, end) == -1)
+ goto too_big;
+
+ return (p - frame);
+
+ too_big:
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOO_BIG;
+ return 0;
+}
+
+/* Encode the NOTIFY frame sent by HAProxy to an agent. It returns the number of
+ * encoded bytes in the frame on success, 0 if an encoding error occurred and -1
+ * if a fatal error occurred. */
+static int
+spoe_prepare_hanotify_frame(struct appctx *appctx, struct spoe_context *ctx,
+ char *frame, size_t size)
+{
+ char *p, *end;
+ unsigned int stream_id, frame_id;
+ unsigned int flags = SPOE_FRM_FL_FIN;
+ size_t sz;
+
+ p = frame;
+ end = frame+size;
+
+ stream_id = ctx->stream_id;
+ frame_id = ctx->frame_id;
+
+ if (ctx->flags & SPOE_CTX_FL_FRAGMENTED) {
+ /* The fragmentation is not supported by the applet */
+ if (!(SPOE_APPCTX(appctx)->flags & SPOE_APPCTX_FL_FRAGMENTATION)) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_FRAG_NOT_SUPPORTED;
+ return -1;
+ }
+ flags = ctx->frag_ctx.flags;
+ }
+
+ /* Set Frame type */
+ *p++ = SPOE_FRM_T_HAPROXY_NOTIFY;
+
+ /* Set flags */
+ flags = htonl(flags);
+ memcpy(p, (char *)&flags, 4);
+ p += 4;
+
+ /* Set stream-id and frame-id */
+ if (encode_varint(stream_id, &p, end) == -1)
+ goto too_big;
+ if (encode_varint(frame_id, &p, end) == -1)
+ goto too_big;
+
+ /* Copy encoded messages, if possible */
+ sz = b_data(&ctx->buffer);
+ if (p + sz >= end)
+ goto too_big;
+ memcpy(p, b_head(&ctx->buffer), sz);
+ p += sz;
+
+ return (p - frame);
+
+ too_big:
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOO_BIG;
+ return 0;
+}
+
+/* Encode next part of a fragmented frame sent by HAProxy to an agent. It
+ * returns the number of encoded bytes in the frame on success, 0 if an encoding
+ * error occurred and -1 if a fatal error occurred. */
+static int
+spoe_prepare_hafrag_frame(struct appctx *appctx, struct spoe_context *ctx,
+ char *frame, size_t size)
+{
+ char *p, *end;
+ unsigned int stream_id, frame_id;
+ unsigned int flags;
+ size_t sz;
+
+ p = frame;
+ end = frame+size;
+
+ /* <ctx> is null when the stream has aborted the processing of a
+ * fragmented frame. In this case, we must notify the corresponding
+ * agent using ids stored in <frag_ctx>. */
+ if (ctx == NULL) {
+ flags = (SPOE_FRM_FL_FIN|SPOE_FRM_FL_ABRT);
+ stream_id = SPOE_APPCTX(appctx)->frag_ctx.cursid;
+ frame_id = SPOE_APPCTX(appctx)->frag_ctx.curfid;
+ }
+ else {
+ flags = ctx->frag_ctx.flags;
+ stream_id = ctx->stream_id;
+ frame_id = ctx->frame_id;
+ }
+
+ /* Set Frame type */
+ *p++ = SPOE_FRM_T_UNSET;
+
+ /* Set flags */
+ flags = htonl(flags);
+ memcpy(p, (char *)&flags, 4);
+ p += 4;
+
+ /* Set stream-id and frame-id */
+ if (encode_varint(stream_id, &p, end) == -1)
+ goto too_big;
+ if (encode_varint(frame_id, &p, end) == -1)
+ goto too_big;
+
+ if (ctx == NULL)
+ goto end;
+
+ /* Copy encoded messages, if possible */
+ sz = b_data(&ctx->buffer);
+ if (p + sz >= end)
+ goto too_big;
+ memcpy(p, b_head(&ctx->buffer), sz);
+ p += sz;
+
+ end:
+ return (p - frame);
+
+ too_big:
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOO_BIG;
+ return 0;
+}
+
+/* Decode and process the HELLO frame sent by an agent. It returns the number of
+ * read bytes on success, 0 if a decoding error occurred, and -1 if a fatal
+ * error occurred. */
+static int
+spoe_handle_agenthello_frame(struct appctx *appctx, char *frame, size_t size)
+{
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ char *p, *end;
+ int vsn, max_frame_size;
+ unsigned int flags;
+
+ p = frame;
+ end = frame + size;
+
+ /* Check frame type */
+ if (*p++ != SPOE_FRM_T_AGENT_HELLO) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ if (size < 7 /* TYPE + METADATA */) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ /* Retrieve flags */
+ memcpy((char *)&flags, p, 4);
+ flags = ntohl(flags);
+ p += 4;
+
+ /* Fragmentation is not supported for HELLO frame */
+ if (!(flags & SPOE_FRM_FL_FIN)) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_FRAG_NOT_SUPPORTED;
+ return -1;
+ }
+
+ /* stream-id and frame-id must be cleared */
+ if (*p != 0 || *(p+1) != 0) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ p += 2;
+
+ /* There are 3 mandatory items: "version", "max-frame-size" and
+ * "capabilities" */
+
+ /* Loop on K/V items */
+ vsn = max_frame_size = flags = 0;
+ while (p < end) {
+ char *str;
+ uint64_t sz;
+ int ret;
+
+ /* Decode the item key */
+ ret = spoe_decode_buffer(&p, end, &str, &sz);
+ if (ret == -1 || !sz) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ /* Check "version" K/V item */
+ if (sz >= strlen(VERSION_KEY) && !memcmp(str, VERSION_KEY, strlen(VERSION_KEY))) {
+ int i, type = *p++;
+
+ /* The value must be a string */
+ if ((type & SPOE_DATA_T_MASK) != SPOE_DATA_T_STR) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ if (spoe_decode_buffer(&p, end, &str, &sz) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ vsn = spoe_str_to_vsn(str, sz);
+ if (vsn == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_BAD_VSN;
+ return -1;
+ }
+ for (i = 0; supported_versions[i].str != NULL; ++i) {
+ if (vsn >= supported_versions[i].min &&
+ vsn <= supported_versions[i].max)
+ break;
+ }
+ if (supported_versions[i].str == NULL) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_BAD_VSN;
+ return -1;
+ }
+ }
+ /* Check "max-frame-size" K/V item */
+ else if (sz >= strlen(MAX_FRAME_SIZE_KEY) && !memcmp(str, MAX_FRAME_SIZE_KEY, strlen(MAX_FRAME_SIZE_KEY))) {
+ int type = *p++;
+
+ /* The value must be integer */
+ if ((type & SPOE_DATA_T_MASK) != SPOE_DATA_T_INT32 &&
+ (type & SPOE_DATA_T_MASK) != SPOE_DATA_T_INT64 &&
+ (type & SPOE_DATA_T_MASK) != SPOE_DATA_T_UINT32 &&
+ (type & SPOE_DATA_T_MASK) != SPOE_DATA_T_UINT64) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ if (decode_varint(&p, end, &sz) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ if (sz < MIN_FRAME_SIZE ||
+ sz > SPOE_APPCTX(appctx)->max_frame_size) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_BAD_FRAME_SIZE;
+ return -1;
+ }
+ max_frame_size = sz;
+ }
+ /* Check "capabilities" K/V item */
+ else if (sz >= strlen(CAPABILITIES_KEY) && !memcmp(str, CAPABILITIES_KEY, strlen(CAPABILITIES_KEY))) {
+ int type = *p++;
+
+ /* The value must be a string */
+ if ((type & SPOE_DATA_T_MASK) != SPOE_DATA_T_STR) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ if (spoe_decode_buffer(&p, end, &str, &sz) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ while (sz) {
+ char *delim;
+
+ /* Skip leading spaces */
+ for (; isspace((unsigned char)*str) && sz; str++, sz--);
+
+ if (sz >= 10 && !strncmp(str, "pipelining", 10)) {
+ str += 10; sz -= 10;
+ if (!sz || isspace((unsigned char)*str) || *str == ',')
+ flags |= SPOE_APPCTX_FL_PIPELINING;
+ }
+ else if (sz >= 5 && !strncmp(str, "async", 5)) {
+ str += 5; sz -= 5;
+ if (!sz || isspace((unsigned char)*str) || *str == ',')
+ flags |= SPOE_APPCTX_FL_ASYNC;
+ }
+ else if (sz >= 13 && !strncmp(str, "fragmentation", 13)) {
+ str += 13; sz -= 13;
+ if (!sz || isspace((unsigned char)*str) || *str == ',')
+ flags |= SPOE_APPCTX_FL_FRAGMENTATION;
+ }
+
+ /* Get the next comma or break */
+ if (!sz || (delim = memchr(str, ',', sz)) == NULL)
+ break;
+ delim++;
+ sz -= (delim - str);
+ str = delim;
+ }
+ }
+ else {
+ /* Silently ignore unknown item */
+ if (spoe_skip_data(&p, end) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ }
+ }
+
+ /* Final checks */
+ if (!vsn) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_NO_VSN;
+ return -1;
+ }
+ if (!max_frame_size) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_NO_FRAME_SIZE;
+ return -1;
+ }
+ if (!agent)
+ flags &= ~(SPOE_APPCTX_FL_PIPELINING|SPOE_APPCTX_FL_ASYNC);
+ else {
+ if ((flags & SPOE_APPCTX_FL_PIPELINING) && !(agent->flags & SPOE_FL_PIPELINING))
+ flags &= ~SPOE_APPCTX_FL_PIPELINING;
+ if ((flags & SPOE_APPCTX_FL_ASYNC) && !(agent->flags & SPOE_FL_ASYNC))
+ flags &= ~SPOE_APPCTX_FL_ASYNC;
+ }
+
+ SPOE_APPCTX(appctx)->version = (unsigned int)vsn;
+ SPOE_APPCTX(appctx)->max_frame_size = (unsigned int)max_frame_size;
+ SPOE_APPCTX(appctx)->flags |= flags;
+
+ return (p - frame);
+}
+
+/* Decode DISCONNECT frame sent by an agent. It returns the number of by read
+ * bytes on success, 0 if the frame can be ignored and -1 if an error
+ * occurred. */
+static int
+spoe_handle_agentdiscon_frame(struct appctx *appctx, char *frame, size_t size)
+{
+ char *p, *end;
+ unsigned int flags;
+
+ p = frame;
+ end = frame + size;
+
+ /* Check frame type */
+ if (*p++ != SPOE_FRM_T_AGENT_DISCON) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ if (size < 7 /* TYPE + METADATA */) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ /* Retrieve flags */
+ memcpy((char *)&flags, p, 4);
+ flags = ntohl(flags);
+ p += 4;
+
+ /* Fragmentation is not supported for DISCONNECT frame */
+ if (!(flags & SPOE_FRM_FL_FIN)) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_FRAG_NOT_SUPPORTED;
+ return -1;
+ }
+
+ /* stream-id and frame-id must be cleared */
+ if (*p != 0 || *(p+1) != 0) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ p += 2;
+
+ /* There are 2 mandatory items: "status-code" and "message" */
+
+ /* Loop on K/V items */
+ while (p < end) {
+ char *str;
+ uint64_t sz;
+ int ret;
+
+ /* Decode the item key */
+ ret = spoe_decode_buffer(&p, end, &str, &sz);
+ if (ret == -1 || !sz) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ /* Check "status-code" K/V item */
+ if (sz >= strlen(STATUS_CODE_KEY) && !memcmp(str, STATUS_CODE_KEY, strlen(STATUS_CODE_KEY))) {
+ int type = *p++;
+
+ /* The value must be an integer */
+ if ((type & SPOE_DATA_T_MASK) != SPOE_DATA_T_INT32 &&
+ (type & SPOE_DATA_T_MASK) != SPOE_DATA_T_INT64 &&
+ (type & SPOE_DATA_T_MASK) != SPOE_DATA_T_UINT32 &&
+ (type & SPOE_DATA_T_MASK) != SPOE_DATA_T_UINT64) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ if (decode_varint(&p, end, &sz) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ SPOE_APPCTX(appctx)->status_code = sz;
+ }
+
+ /* Check "message" K/V item */
+ else if (sz >= strlen(MSG_KEY) && !memcmp(str, MSG_KEY, strlen(MSG_KEY))) {
+ int type = *p++;
+
+ /* The value must be a string */
+ if ((type & SPOE_DATA_T_MASK) != SPOE_DATA_T_STR) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ ret = spoe_decode_buffer(&p, end, &str, &sz);
+ if (ret == -1 || sz > 255) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+#if defined(DEBUG_SPOE) || defined(DEBUG_FULL)
+ SPOE_APPCTX(appctx)->reason = str;
+ SPOE_APPCTX(appctx)->rlen = sz;
+#endif
+ }
+ else {
+ /* Silently ignore unknown item */
+ if (spoe_skip_data(&p, end) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ }
+ }
+
+ return (p - frame);
+}
+
+
+/* Decode ACK frame sent by an agent. It returns the number of read bytes on
+ * success, 0 if the frame can be ignored and -1 if an error occurred. */
+static int
+spoe_handle_agentack_frame(struct appctx *appctx, struct spoe_context **ctx,
+ char *frame, size_t size)
+{
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ char *p, *end;
+ uint64_t stream_id, frame_id;
+ int len;
+ unsigned int flags;
+
+ p = frame;
+ end = frame + size;
+ *ctx = NULL;
+
+ /* Check frame type */
+ if (*p++ != SPOE_FRM_T_AGENT_ACK) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ if (size < 7 /* TYPE + METADATA */) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ /* Retrieve flags */
+ memcpy((char *)&flags, p, 4);
+ flags = ntohl(flags);
+ p += 4;
+
+ /* Fragmentation is not supported for now */
+ if (!(flags & SPOE_FRM_FL_FIN)) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_FRAG_NOT_SUPPORTED;
+ return -1;
+ }
+
+ /* Get the stream-id and the frame-id */
+ if (decode_varint(&p, end, &stream_id) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ if (decode_varint(&p, end, &frame_id) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ /* Try to find the corresponding SPOE context */
+ if (SPOE_APPCTX(appctx)->flags & SPOE_APPCTX_FL_ASYNC) {
+ list_for_each_entry((*ctx), &agent->rt[tid].waiting_queue, list) {
+ if ((*ctx)->stream_id == (unsigned int)stream_id &&
+ (*ctx)->frame_id == (unsigned int)frame_id)
+ goto found;
+ }
+ }
+ else {
+ list_for_each_entry((*ctx), &SPOE_APPCTX(appctx)->waiting_queue, list) {
+ if ((*ctx)->stream_id == (unsigned int)stream_id &&
+ (*ctx)->frame_id == (unsigned int)frame_id)
+ goto found;
+ }
+ }
+
+ if (SPOE_APPCTX(appctx)->frag_ctx.ctx &&
+ SPOE_APPCTX(appctx)->frag_ctx.cursid == (unsigned int)stream_id &&
+ SPOE_APPCTX(appctx)->frag_ctx.curfid == (unsigned int)frame_id) {
+
+ /* ABRT bit is set for an unfinished fragmented frame */
+ if (flags & SPOE_FRM_FL_ABRT) {
+ *ctx = SPOE_APPCTX(appctx)->frag_ctx.ctx;
+ (*ctx)->state = SPOE_CTX_ST_ERROR;
+ (*ctx)->status_code = SPOE_CTX_ERR_FRAG_FRAME_ABRT;
+ /* Ignore the payload */
+ goto end;
+ }
+ /* TODO: Handle more flags for fragmented frames: RESUME, FINISH... */
+ /* For now, we ignore the ack */
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ /* No Stream found, ignore the frame */
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - Ignore ACK frame"
+ " - stream-id=%u - frame-id=%u\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, appctx,
+ (unsigned int)stream_id, (unsigned int)frame_id);
+
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_FRAMEID_NOTFOUND;
+ if (appctx->st0 == SPOE_APPCTX_ST_WAITING_SYNC_ACK) {
+ /* Report an error if we are waiting the ack for another frame,
+ * but not if there is no longer frame waiting for a ack
+ * (timeout)
+ */
+ if (!LIST_ISEMPTY(&SPOE_APPCTX(appctx)->waiting_queue) ||
+ SPOE_APPCTX(appctx)->frag_ctx.ctx)
+ return -1;
+ appctx->st0 = SPOE_APPCTX_ST_PROCESSING;
+ SPOE_APPCTX(appctx)->cur_fpa = 0;
+ }
+ return 0;
+
+ found:
+ if (!spoe_acquire_buffer(&SPOE_APPCTX(appctx)->buffer,
+ &SPOE_APPCTX(appctx)->buffer_wait)) {
+ *ctx = NULL;
+ return 1; /* Retry later */
+ }
+
+ /* Copy encoded actions */
+ len = (end - p);
+ memcpy(b_head(&SPOE_APPCTX(appctx)->buffer), p, len);
+ b_set_data(&SPOE_APPCTX(appctx)->buffer, len);
+ p += len;
+
+ /* Transfer the buffer ownership to the SPOE context */
+ (*ctx)->buffer = SPOE_APPCTX(appctx)->buffer;
+ SPOE_APPCTX(appctx)->buffer = BUF_NULL;
+
+ (*ctx)->state = SPOE_CTX_ST_DONE;
+
+ end:
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - ACK frame received"
+ " - ctx=%p - stream-id=%u - frame-id=%u - flags=0x%08x\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, appctx, *ctx, (*ctx)->stream_id,
+ (*ctx)->frame_id, flags);
+ return (p - frame);
+}
+
+/* This function is used in cfgparse.c and declared in proto/checks.h. It
+ * prepare the request to send to agents during a healthcheck. It returns 0 on
+ * success and -1 if an error occurred. */
+int
+spoe_prepare_healthcheck_request(char **req, int *len)
+{
+ struct appctx appctx;
+ struct spoe_appctx spoe_appctx;
+ char *frame, *end, buf[MAX_FRAME_SIZE+4];
+ size_t sz;
+ int ret;
+
+ memset(&appctx, 0, sizeof(appctx));
+ memset(&spoe_appctx, 0, sizeof(spoe_appctx));
+ memset(buf, 0, sizeof(buf));
+
+ appctx.svcctx = &spoe_appctx;
+ SPOE_APPCTX(&appctx)->max_frame_size = MAX_FRAME_SIZE;
+
+ frame = buf+4; /* Reserved the 4 first bytes for the frame size */
+ end = frame + MAX_FRAME_SIZE;
+
+ ret = spoe_prepare_hahello_frame(&appctx, frame, MAX_FRAME_SIZE);
+ if (ret <= 0)
+ return -1;
+ frame += ret;
+
+ /* Add "healthcheck" K/V item */
+ sz = SLEN(HEALTHCHECK_KEY);
+ if (spoe_encode_buffer(HEALTHCHECK_KEY, sz, &frame, end) == -1)
+ return -1;
+ *frame++ = (SPOE_DATA_T_BOOL | SPOE_DATA_FL_TRUE);
+
+ *len = frame - buf;
+ sz = htonl(*len - 4);
+ memcpy(buf, (char *)&sz, 4);
+
+ if ((*req = malloc(*len)) == NULL)
+ return -1;
+ memcpy(*req, buf, *len);
+ return 0;
+}
+
+/* This function is used in checks.c and declared in proto/checks.h. It decode
+ * the response received from an agent during a healthcheck. It returns 0 on
+ * success and -1 if an error occurred. */
+int
+spoe_handle_healthcheck_response(char *frame, size_t size, char *err, int errlen)
+{
+ struct appctx appctx;
+ struct spoe_appctx spoe_appctx;
+
+ memset(&appctx, 0, sizeof(appctx));
+ memset(&spoe_appctx, 0, sizeof(spoe_appctx));
+
+ appctx.svcctx = &spoe_appctx;
+ SPOE_APPCTX(&appctx)->max_frame_size = MAX_FRAME_SIZE;
+
+ if (*frame == SPOE_FRM_T_AGENT_DISCON) {
+ spoe_handle_agentdiscon_frame(&appctx, frame, size);
+ goto error;
+ }
+ if (spoe_handle_agenthello_frame(&appctx, frame, size) <= 0)
+ goto error;
+
+ return 0;
+
+ error:
+ if (SPOE_APPCTX(&appctx)->status_code >= SPOE_FRM_ERRS)
+ SPOE_APPCTX(&appctx)->status_code = SPOE_FRM_ERR_UNKNOWN;
+ strncpy(err, spoe_frm_err_reasons[SPOE_APPCTX(&appctx)->status_code], errlen);
+ return -1;
+}
+
+/* Send a SPOE frame to an agent. It returns -1 when an error occurred, 0 when
+ * the frame can be ignored, 1 to retry later, and the frame length on
+ * success. */
+static int
+spoe_send_frame(struct appctx *appctx, char *buf, size_t framesz)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ int ret;
+ uint32_t netint;
+
+ /* 4 bytes are reserved at the beginning of <buf> to store the frame
+ * length. */
+ netint = htonl(framesz);
+ memcpy(buf, (char *)&netint, 4);
+ ret = applet_putblk(appctx, buf, framesz+4);
+ if (ret <= 0) {
+ if ((ret == -3 && b_is_null(&sc_ic(sc)->buf)) || ret == -1) {
+ /* WT: is this still needed for the case ret==-3 ? */
+ sc_need_room(sc);
+ return 1; /* retry */
+ }
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_IO;
+ return -1; /* error */
+ }
+ return framesz;
+}
+
+/* Receive a SPOE frame from an agent. It return -1 when an error occurred, 0
+ * when the frame can be ignored, 1 to retry later and the frame length on
+ * success. */
+static int
+spoe_recv_frame(struct appctx *appctx, char *buf, size_t framesz)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ int ret;
+ uint32_t netint;
+
+ ret = co_getblk(sc_oc(sc), (char *)&netint, 4, 0);
+ if (ret > 0) {
+ framesz = ntohl(netint);
+ if (framesz > SPOE_APPCTX(appctx)->max_frame_size) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOO_BIG;
+ return -1;
+ }
+ ret = co_getblk(sc_oc(sc), buf, framesz, 4);
+ }
+ if (ret <= 0) {
+ if (ret == 0) {
+ return 1; /* retry */
+ }
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_IO;
+ return -1; /* error */
+ }
+ return framesz;
+}
+
+/********************************************************************
+ * Functions that manage the SPOE applet
+ ********************************************************************/
+static int
+spoe_wakeup_appctx(struct appctx *appctx)
+{
+ applet_will_consume(appctx);
+ applet_have_more_data(appctx);
+ appctx_wakeup(appctx);
+ return 1;
+}
+
+/* Callback function that catches applet timeouts. If a timeout occurred, we set
+ * <appctx->st1> flag and the SPOE applet is woken up. */
+static struct task *
+spoe_process_appctx(struct task * task, void *context, unsigned int state)
+{
+ struct appctx *appctx = context;
+
+ appctx->st1 = SPOE_APPCTX_ERR_NONE;
+ if (tick_is_expired(task->expire, now_ms)) {
+ task->expire = TICK_ETERNITY;
+ appctx->st1 = SPOE_APPCTX_ERR_TOUT;
+ }
+ spoe_wakeup_appctx(appctx);
+ return task;
+}
+
+static int
+spoe_init_appctx(struct appctx *appctx)
+{
+ struct spoe_appctx *spoe_appctx = SPOE_APPCTX(appctx);
+ struct spoe_agent *agent = spoe_appctx->agent;
+ struct task *task;
+ struct stream *s;
+
+ if ((task = task_new_here()) == NULL)
+ goto out_error;
+ task->process = spoe_process_appctx;
+ task->context = appctx;
+
+ if (appctx_finalize_startup(appctx, &agent->spoe_conf->agent_fe, &BUF_NULL) == -1)
+ goto out_free_task;
+
+ spoe_appctx->owner = appctx;
+ spoe_appctx->task = task;
+
+ LIST_INIT(&spoe_appctx->buffer_wait.list);
+ spoe_appctx->buffer_wait.target = appctx;
+ spoe_appctx->buffer_wait.wakeup_cb = (int (*)(void *))spoe_wakeup_appctx;
+
+ s = appctx_strm(appctx);
+ stream_set_backend(s, agent->b.be);
+
+ /* applet is waiting for data */
+ applet_need_more_data(appctx);
+
+ s->do_log = NULL;
+ s->res.flags |= CF_READ_DONTWAIT;
+
+ HA_SPIN_LOCK(SPOE_APPLET_LOCK, &agent->rt[tid].lock);
+ LIST_APPEND(&agent->rt[tid].applets, &spoe_appctx->list);
+ HA_SPIN_UNLOCK(SPOE_APPLET_LOCK, &agent->rt[tid].lock);
+ _HA_ATOMIC_INC(&agent->counters.applets);
+
+ appctx->st0 = SPOE_APPCTX_ST_CONNECT;
+ task_wakeup(spoe_appctx->task, TASK_WOKEN_INIT);
+ return 0;
+ out_free_task:
+ task_destroy(task);
+ out_error:
+ return -1;
+}
+
+/* Callback function that releases a SPOE applet. This happens when the
+ * connection with the agent is closed. */
+static void
+spoe_release_appctx(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct spoe_appctx *spoe_appctx = SPOE_APPCTX(appctx);
+ struct spoe_agent *agent;
+ struct spoe_context *ctx, *back;
+
+ if (spoe_appctx == NULL)
+ return;
+
+ appctx->svcctx = NULL;
+ agent = spoe_appctx->agent;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, appctx);
+
+ /* Remove applet from the list of running applets */
+ _HA_ATOMIC_DEC(&agent->counters.applets);
+ HA_SPIN_LOCK(SPOE_APPLET_LOCK, &agent->rt[tid].lock);
+ if (!LIST_ISEMPTY(&spoe_appctx->list)) {
+ LIST_DELETE(&spoe_appctx->list);
+ LIST_INIT(&spoe_appctx->list);
+ }
+ HA_SPIN_UNLOCK(SPOE_APPLET_LOCK, &agent->rt[tid].lock);
+
+ /* Shutdown the server connection, if needed */
+ if (appctx->st0 != SPOE_APPCTX_ST_END) {
+ if (appctx->st0 == SPOE_APPCTX_ST_IDLE) {
+ eb32_delete(&spoe_appctx->node);
+ _HA_ATOMIC_DEC(&agent->counters.idles);
+ }
+
+ appctx->st0 = SPOE_APPCTX_ST_END;
+ if (spoe_appctx->status_code == SPOE_FRM_ERR_NONE)
+ spoe_appctx->status_code = SPOE_FRM_ERR_IO;
+
+ sc_shutw(sc);
+ sc_shutr(sc);
+ sc_ic(sc)->flags |= CF_READ_NULL;
+ }
+
+ /* Destroy the task attached to this applet */
+ task_destroy(spoe_appctx->task);
+
+ /* Report an error to all streams in the appctx waiting queue */
+ list_for_each_entry_safe(ctx, back, &spoe_appctx->waiting_queue, list) {
+ LIST_DELETE(&ctx->list);
+ LIST_INIT(&ctx->list);
+ _HA_ATOMIC_DEC(&agent->counters.nb_waiting);
+ spoe_update_stat_time(&ctx->stats.tv_wait, &ctx->stats.t_waiting);
+ ctx->spoe_appctx = NULL;
+ ctx->state = SPOE_CTX_ST_ERROR;
+ ctx->status_code = (spoe_appctx->status_code + 0x100);
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ }
+
+ /* If the applet was processing a fragmented frame, report an error to
+ * the corresponding stream. */
+ if (spoe_appctx->frag_ctx.ctx) {
+ ctx = spoe_appctx->frag_ctx.ctx;
+ ctx->spoe_appctx = NULL;
+ ctx->state = SPOE_CTX_ST_ERROR;
+ ctx->status_code = (spoe_appctx->status_code + 0x100);
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ }
+
+ if (!LIST_ISEMPTY(&agent->rt[tid].applets)) {
+ /* If there are still some running applets, remove reference on
+ * the current one from streams in the async waiting queue. In
+ * async mode, the ACK may be received from another appctx.
+ */
+ list_for_each_entry_safe(ctx, back, &agent->rt[tid].waiting_queue, list) {
+ if (ctx->spoe_appctx == spoe_appctx)
+ ctx->spoe_appctx = NULL;
+ }
+ goto end;
+ }
+ else {
+ /* It is the last running applet and the sending and async
+ * waiting queues are not empty. So try to start a new applet if
+ * HAproxy is not stopping. On success, we remove reference on
+ * the current appctx from streams in the async waiting queue.
+ * In async mode, the ACK may be received from another appctx.
+ */
+ if (!stopping &&
+ (!LIST_ISEMPTY(&agent->rt[tid].sending_queue) || !LIST_ISEMPTY(&agent->rt[tid].waiting_queue)) &&
+ spoe_create_appctx(agent->spoe_conf)) {
+ list_for_each_entry_safe(ctx, back, &agent->rt[tid].waiting_queue, list) {
+ if (ctx->spoe_appctx == spoe_appctx)
+ ctx->spoe_appctx = NULL;
+ }
+ goto end;
+ }
+
+ /* Otherwise, report an error to all streams in the sending and
+ * async waiting queues.
+ */
+ list_for_each_entry_safe(ctx, back, &agent->rt[tid].sending_queue, list) {
+ LIST_DELETE(&ctx->list);
+ LIST_INIT(&ctx->list);
+ _HA_ATOMIC_DEC(&agent->counters.nb_sending);
+ spoe_update_stat_time(&ctx->stats.tv_queue, &ctx->stats.t_queue);
+ ctx->spoe_appctx = NULL;
+ ctx->state = SPOE_CTX_ST_ERROR;
+ ctx->status_code = (spoe_appctx->status_code + 0x100);
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ }
+ list_for_each_entry_safe(ctx, back, &agent->rt[tid].waiting_queue, list) {
+ LIST_DELETE(&ctx->list);
+ LIST_INIT(&ctx->list);
+ _HA_ATOMIC_DEC(&agent->counters.nb_waiting);
+ spoe_update_stat_time(&ctx->stats.tv_wait, &ctx->stats.t_waiting);
+ ctx->spoe_appctx = NULL;
+ ctx->state = SPOE_CTX_ST_ERROR;
+ ctx->status_code = (spoe_appctx->status_code + 0x100);
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ }
+ }
+
+ end:
+ /* Release allocated memory */
+ spoe_release_buffer(&spoe_appctx->buffer,
+ &spoe_appctx->buffer_wait);
+ pool_free(pool_head_spoe_appctx, spoe_appctx);
+
+ /* Update runtinme agent info */
+ agent->rt[tid].frame_size = agent->max_frame_size;
+ list_for_each_entry(spoe_appctx, &agent->rt[tid].applets, list)
+ HA_ATOMIC_UPDATE_MIN(&agent->rt[tid].frame_size, spoe_appctx->max_frame_size);
+}
+
+static int
+spoe_handle_connect_appctx(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ char *frame, *buf;
+ int ret;
+
+ if (sc_state_in(sc->state, SC_SB_CER|SC_SB_DIS|SC_SB_CLO)) {
+ /* closed */
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_IO;
+ goto exit;
+ }
+
+ if (!sc_state_in(sc->state, SC_SB_RDY|SC_SB_EST)) {
+ /* not connected yet */
+ applet_have_more_data(appctx);
+ task_wakeup(__sc_strm(sc)->task, TASK_WOKEN_MSG);
+ goto stop;
+ }
+
+ if (appctx->st1 == SPOE_APPCTX_ERR_TOUT) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - Connection timed out\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, appctx);
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOUT;
+ goto exit;
+ }
+
+ if (SPOE_APPCTX(appctx)->task->expire == TICK_ETERNITY)
+ SPOE_APPCTX(appctx)->task->expire =
+ tick_add_ifset(now_ms, agent->timeout.hello);
+
+ /* 4 bytes are reserved at the beginning of <buf> to store the frame
+ * length. */
+ buf = trash.area; frame = buf+4;
+ ret = spoe_prepare_hahello_frame(appctx, frame,
+ SPOE_APPCTX(appctx)->max_frame_size);
+ if (ret > 1)
+ ret = spoe_send_frame(appctx, buf, ret);
+
+ switch (ret) {
+ case -1: /* error */
+ case 0: /* ignore => an error, cannot be ignored */
+ goto exit;
+
+ case 1: /* retry later */
+ goto stop;
+
+ default:
+ /* HELLO frame successfully sent, now wait for the
+ * reply. */
+ appctx->st0 = SPOE_APPCTX_ST_CONNECTING;
+ goto next;
+ }
+
+ next:
+ return 0;
+ stop:
+ return 1;
+ exit:
+ appctx->st0 = SPOE_APPCTX_ST_EXIT;
+ return 0;
+}
+
+static int
+spoe_handle_connecting_appctx(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ char *frame;
+ int ret;
+
+
+ if (sc->state == SC_ST_CLO || sc_opposite(sc)->state == SC_ST_CLO) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_IO;
+ goto exit;
+ }
+
+ if (appctx->st1 == SPOE_APPCTX_ERR_TOUT) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - Connection timed out\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, appctx);
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOUT;
+ goto exit;
+ }
+
+ frame = trash.area; trash.data = 0;
+ ret = spoe_recv_frame(appctx, frame,
+ SPOE_APPCTX(appctx)->max_frame_size);
+ if (ret > 1) {
+ if (*frame == SPOE_FRM_T_AGENT_DISCON) {
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECTING;
+ goto next;
+ }
+ trash.data = ret + 4;
+ ret = spoe_handle_agenthello_frame(appctx, frame, ret);
+ }
+
+ switch (ret) {
+ case -1: /* error */
+ case 0: /* ignore => an error, cannot be ignored */
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECT;
+ goto next;
+
+ case 1: /* retry later */
+ goto stop;
+
+ default:
+ _HA_ATOMIC_INC(&agent->counters.idles);
+ appctx->st0 = SPOE_APPCTX_ST_IDLE;
+ SPOE_APPCTX(appctx)->node.key = 0;
+ eb32_insert(&agent->rt[tid].idle_applets, &SPOE_APPCTX(appctx)->node);
+
+ /* Update runtinme agent info */
+ HA_ATOMIC_UPDATE_MIN(&agent->rt[tid].frame_size, SPOE_APPCTX(appctx)->max_frame_size);
+ goto next;
+ }
+
+ next:
+ /* Do not forget to remove processed frame from the output buffer */
+ if (trash.data)
+ co_skip(sc_oc(sc), trash.data);
+
+ SPOE_APPCTX(appctx)->task->expire =
+ tick_add_ifset(now_ms, agent->timeout.idle);
+ return 0;
+ stop:
+ return 1;
+ exit:
+ appctx->st0 = SPOE_APPCTX_ST_EXIT;
+ return 0;
+}
+
+
+static int
+spoe_handle_sending_frame_appctx(struct appctx *appctx, int *skip)
+{
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ struct spoe_context *ctx = NULL;
+ char *frame, *buf;
+ int ret;
+
+ /* 4 bytes are reserved at the beginning of <buf> to store the frame
+ * length. */
+ buf = trash.area; frame = buf+4;
+
+ if (appctx->st0 == SPOE_APPCTX_ST_SENDING_FRAG_NOTIFY) {
+ ctx = SPOE_APPCTX(appctx)->frag_ctx.ctx;
+ ret = spoe_prepare_hafrag_frame(appctx, ctx, frame,
+ SPOE_APPCTX(appctx)->max_frame_size);
+ }
+ else if (LIST_ISEMPTY(&agent->rt[tid].sending_queue)) {
+ *skip = 1;
+ ret = 1;
+ goto end;
+ }
+ else {
+ ctx = LIST_NEXT(&agent->rt[tid].sending_queue, typeof(ctx), list);
+ ret = spoe_prepare_hanotify_frame(appctx, ctx, frame,
+ SPOE_APPCTX(appctx)->max_frame_size);
+
+ }
+
+ if (ret > 1)
+ ret = spoe_send_frame(appctx, buf, ret);
+
+ switch (ret) {
+ case -1: /* error */
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECT;
+ goto end;
+
+ case 0: /* ignore */
+ if (ctx == NULL)
+ goto abort_frag_frame;
+
+ spoe_release_buffer(&ctx->buffer, &ctx->buffer_wait);
+ LIST_DELETE(&ctx->list);
+ LIST_INIT(&ctx->list);
+ _HA_ATOMIC_DEC(&agent->counters.nb_sending);
+ spoe_update_stat_time(&ctx->stats.tv_queue, &ctx->stats.t_queue);
+ ctx->spoe_appctx = NULL;
+ ctx->state = SPOE_CTX_ST_ERROR;
+ ctx->status_code = (SPOE_APPCTX(appctx)->status_code + 0x100);
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ *skip = 1;
+ break;
+
+ case 1: /* retry */
+ *skip = 1;
+ break;
+
+ default:
+ if (ctx == NULL)
+ goto abort_frag_frame;
+
+ spoe_release_buffer(&ctx->buffer, &ctx->buffer_wait);
+ LIST_DELETE(&ctx->list);
+ LIST_INIT(&ctx->list);
+ _HA_ATOMIC_DEC(&agent->counters.nb_sending);
+ spoe_update_stat_time(&ctx->stats.tv_queue, &ctx->stats.t_queue);
+ ctx->spoe_appctx = SPOE_APPCTX(appctx);
+ if (!(ctx->flags & SPOE_CTX_FL_FRAGMENTED) ||
+ (ctx->frag_ctx.flags & SPOE_FRM_FL_FIN))
+ goto no_frag_frame_sent;
+ else
+ goto frag_frame_sent;
+ }
+ goto end;
+
+ frag_frame_sent:
+ appctx->st0 = SPOE_APPCTX_ST_SENDING_FRAG_NOTIFY;
+ *skip = 1;
+ SPOE_APPCTX(appctx)->frag_ctx.ctx = ctx;
+ SPOE_APPCTX(appctx)->frag_ctx.cursid = ctx->stream_id;
+ SPOE_APPCTX(appctx)->frag_ctx.curfid = ctx->frame_id;
+ ctx->state = SPOE_CTX_ST_ENCODING_MSGS;
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ goto end;
+
+ no_frag_frame_sent:
+ if (SPOE_APPCTX(appctx)->flags & SPOE_APPCTX_FL_ASYNC) {
+ appctx->st0 = SPOE_APPCTX_ST_PROCESSING;
+ LIST_APPEND(&agent->rt[tid].waiting_queue, &ctx->list);
+ }
+ else if (SPOE_APPCTX(appctx)->flags & SPOE_APPCTX_FL_PIPELINING) {
+ appctx->st0 = SPOE_APPCTX_ST_PROCESSING;
+ LIST_APPEND(&SPOE_APPCTX(appctx)->waiting_queue, &ctx->list);
+ }
+ else {
+ appctx->st0 = SPOE_APPCTX_ST_WAITING_SYNC_ACK;
+ *skip = 1;
+ LIST_APPEND(&SPOE_APPCTX(appctx)->waiting_queue, &ctx->list);
+ }
+ _HA_ATOMIC_INC(&agent->counters.nb_waiting);
+ ctx->stats.tv_wait = now;
+ SPOE_APPCTX(appctx)->frag_ctx.ctx = NULL;
+ SPOE_APPCTX(appctx)->frag_ctx.cursid = 0;
+ SPOE_APPCTX(appctx)->frag_ctx.curfid = 0;
+ SPOE_APPCTX(appctx)->cur_fpa++;
+
+ ctx->state = SPOE_CTX_ST_WAITING_ACK;
+ goto end;
+
+ abort_frag_frame:
+ appctx->st0 = SPOE_APPCTX_ST_PROCESSING;
+ SPOE_APPCTX(appctx)->frag_ctx.ctx = NULL;
+ SPOE_APPCTX(appctx)->frag_ctx.cursid = 0;
+ SPOE_APPCTX(appctx)->frag_ctx.curfid = 0;
+ goto end;
+
+ end:
+ return ret;
+}
+
+static int
+spoe_handle_receiving_frame_appctx(struct appctx *appctx, int *skip)
+{
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ struct spoe_context *ctx = NULL;
+ char *frame;
+ int ret;
+
+ frame = trash.area; trash.data = 0;
+ ret = spoe_recv_frame(appctx, frame,
+ SPOE_APPCTX(appctx)->max_frame_size);
+ if (ret > 1) {
+ if (*frame == SPOE_FRM_T_AGENT_DISCON) {
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECTING;
+ ret = -1;
+ goto end;
+ }
+ trash.data = ret + 4;
+ ret = spoe_handle_agentack_frame(appctx, &ctx, frame, ret);
+ }
+ switch (ret) {
+ case -1: /* error */
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECT;
+ break;
+
+ case 0: /* ignore */
+ break;
+
+ case 1: /* retry */
+ *skip = 1;
+ break;
+
+ default:
+ LIST_DELETE(&ctx->list);
+ LIST_INIT(&ctx->list);
+ _HA_ATOMIC_DEC(&agent->counters.nb_waiting);
+ spoe_update_stat_time(&ctx->stats.tv_wait, &ctx->stats.t_waiting);
+ ctx->stats.tv_response = now;
+ if (ctx->spoe_appctx) {
+ ctx->spoe_appctx->cur_fpa--;
+ ctx->spoe_appctx = NULL;
+ }
+ if (appctx->st0 == SPOE_APPCTX_ST_SENDING_FRAG_NOTIFY &&
+ ctx == SPOE_APPCTX(appctx)->frag_ctx.ctx) {
+ appctx->st0 = SPOE_APPCTX_ST_PROCESSING;
+ SPOE_APPCTX(appctx)->frag_ctx.ctx = NULL;
+ SPOE_APPCTX(appctx)->frag_ctx.cursid = 0;
+ SPOE_APPCTX(appctx)->frag_ctx.curfid = 0;
+ }
+ else if (appctx->st0 == SPOE_APPCTX_ST_WAITING_SYNC_ACK)
+ appctx->st0 = SPOE_APPCTX_ST_PROCESSING;
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ break;
+ }
+
+ /* Do not forget to remove processed frame from the output buffer */
+ if (trash.data)
+ co_skip(sc_oc(appctx_sc(appctx)), trash.data);
+ end:
+ return ret;
+}
+
+static int
+spoe_handle_processing_appctx(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct server *srv = objt_server(__sc_strm(sc)->target);
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ int ret, skip_sending = 0, skip_receiving = 0, active_s = 0, active_r = 0, close_asap = 0;
+
+ if (sc->state == SC_ST_CLO || sc_opposite(sc)->state == SC_ST_CLO) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_IO;
+ goto exit;
+ }
+
+ if (appctx->st1 == SPOE_APPCTX_ERR_TOUT) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOUT;
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECT;
+ appctx->st1 = SPOE_APPCTX_ERR_NONE;
+ goto next;
+ }
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - process: fpa=%u/%u - appctx-state=%s - weight=%u - flags=0x%08x\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, appctx, SPOE_APPCTX(appctx)->cur_fpa,
+ agent->max_fpa, spoe_appctx_state_str[appctx->st0],
+ SPOE_APPCTX(appctx)->node.key, SPOE_APPCTX(appctx)->flags);
+
+
+ /* Close the applet ASAP because some sessions are waiting for a free
+ * connection slot. It is only an issue in multithreaded mode.
+ */
+ close_asap = (global.nbthread > 1 &&
+ (agent->b.be->queue.length ||
+ (srv && (srv->queue.length || (srv->maxconn && srv->served >= srv_dynamic_maxconn(srv))))));
+
+ /* Don"t try to send new frame we are waiting for at lease a ack, in
+ * sync mode or if applet must be closed ASAP
+ */
+ if (appctx->st0 == SPOE_APPCTX_ST_WAITING_SYNC_ACK || (close_asap && SPOE_APPCTX(appctx)->cur_fpa))
+ skip_sending = 1;
+
+ /* receiving_frame loop */
+ while (!skip_receiving) {
+ ret = spoe_handle_receiving_frame_appctx(appctx, &skip_receiving);
+ switch (ret) {
+ case -1: /* error */
+ goto next;
+
+ case 0: /* ignore */
+ active_r = 1;
+ break;
+
+ case 1: /* retry */
+ break;
+
+ default:
+ active_r = 1;
+ break;
+ }
+ }
+
+ /* send_frame loop */
+ while (!skip_sending && SPOE_APPCTX(appctx)->cur_fpa < agent->max_fpa) {
+ ret = spoe_handle_sending_frame_appctx(appctx, &skip_sending);
+ switch (ret) {
+ case -1: /* error */
+ goto next;
+
+ case 0: /* ignore */
+ if (SPOE_APPCTX(appctx)->node.key)
+ SPOE_APPCTX(appctx)->node.key--;
+ active_s++;
+ break;
+
+ case 1: /* retry */
+ break;
+
+ default:
+ if (SPOE_APPCTX(appctx)->node.key)
+ SPOE_APPCTX(appctx)->node.key--;
+ active_s++;
+ break;
+ }
+
+ /* if applet must be close ASAP, don't send more than a frame */
+ if (close_asap)
+ break;
+ }
+
+ if (active_s || active_r) {
+ update_freq_ctr(&agent->rt[tid].processing_per_sec, active_s);
+ SPOE_APPCTX(appctx)->task->expire = tick_add_ifset(now_ms, agent->timeout.idle);
+ }
+
+ if (appctx->st0 == SPOE_APPCTX_ST_PROCESSING && SPOE_APPCTX(appctx)->cur_fpa < agent->max_fpa) {
+ /* If applet must be closed, don't switch it in IDLE state and
+ * close it when the last waiting frame is acknowledged.
+ */
+ if (close_asap) {
+ if (SPOE_APPCTX(appctx)->cur_fpa)
+ goto out;
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_NONE;
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECT;
+ appctx->st1 = SPOE_APPCTX_ERR_NONE;
+ goto next;
+ }
+ _HA_ATOMIC_INC(&agent->counters.idles);
+ appctx->st0 = SPOE_APPCTX_ST_IDLE;
+ eb32_insert(&agent->rt[tid].idle_applets, &SPOE_APPCTX(appctx)->node);
+ }
+
+ out:
+ return 1;
+
+ next:
+ SPOE_APPCTX(appctx)->task->expire = tick_add_ifset(now_ms, agent->timeout.idle);
+ return 0;
+
+ exit:
+ appctx->st0 = SPOE_APPCTX_ST_EXIT;
+ return 0;
+}
+
+static int
+spoe_handle_disconnect_appctx(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ char *frame, *buf;
+ int ret;
+
+ if (sc->state == SC_ST_CLO || sc_opposite(sc)->state == SC_ST_CLO)
+ goto exit;
+
+ if (appctx->st1 == SPOE_APPCTX_ERR_TOUT)
+ goto exit;
+
+ /* 4 bytes are reserved at the beginning of <buf> to store the frame
+ * length. */
+ buf = trash.area; frame = buf+4;
+ ret = spoe_prepare_hadiscon_frame(appctx, frame,
+ SPOE_APPCTX(appctx)->max_frame_size);
+ if (ret > 1)
+ ret = spoe_send_frame(appctx, buf, ret);
+
+ switch (ret) {
+ case -1: /* error */
+ case 0: /* ignore => an error, cannot be ignored */
+ goto exit;
+
+ case 1: /* retry */
+ goto stop;
+
+ default:
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - disconnected by HAProxy (%d): %s\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, appctx,
+ SPOE_APPCTX(appctx)->status_code,
+ spoe_frm_err_reasons[SPOE_APPCTX(appctx)->status_code]);
+
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECTING;
+ goto next;
+ }
+
+ next:
+ SPOE_APPCTX(appctx)->task->expire =
+ tick_add_ifset(now_ms, agent->timeout.idle);
+ return 0;
+ stop:
+ return 1;
+ exit:
+ appctx->st0 = SPOE_APPCTX_ST_EXIT;
+ return 0;
+}
+
+static int
+spoe_handle_disconnecting_appctx(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ char *frame;
+ int ret;
+
+ if (sc->state == SC_ST_CLO || sc_opposite(sc)->state == SC_ST_CLO) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_IO;
+ goto exit;
+ }
+
+ if (appctx->st1 == SPOE_APPCTX_ERR_TOUT) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOUT;
+ goto exit;
+ }
+
+ frame = trash.area; trash.data = 0;
+ ret = spoe_recv_frame(appctx, frame,
+ SPOE_APPCTX(appctx)->max_frame_size);
+ if (ret > 1) {
+ trash.data = ret + 4;
+ ret = spoe_handle_agentdiscon_frame(appctx, frame, ret);
+ }
+
+ switch (ret) {
+ case -1: /* error */
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - error on frame (%s)\n",
+ (int)now.tv_sec, (int)now.tv_usec,
+ ((struct spoe_agent *)SPOE_APPCTX(appctx)->agent)->id,
+ __FUNCTION__, appctx,
+ spoe_frm_err_reasons[SPOE_APPCTX(appctx)->status_code]);
+ goto exit;
+
+ case 0: /* ignore */
+ goto next;
+
+ case 1: /* retry */
+ goto stop;
+
+ default:
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - disconnected by peer (%d): %.*s\n",
+ (int)now.tv_sec, (int)now.tv_usec,
+ ((struct spoe_agent *)SPOE_APPCTX(appctx)->agent)->id,
+ __FUNCTION__, appctx, SPOE_APPCTX(appctx)->status_code,
+ SPOE_APPCTX(appctx)->rlen, SPOE_APPCTX(appctx)->reason);
+ goto exit;
+ }
+
+ next:
+ /* Do not forget to remove processed frame from the output buffer */
+ if (trash.data)
+ co_skip(sc_oc(sc), trash.data);
+
+ return 0;
+ stop:
+ return 1;
+ exit:
+ appctx->st0 = SPOE_APPCTX_ST_EXIT;
+ return 0;
+}
+
+/* I/O Handler processing messages exchanged with the agent */
+static void
+spoe_handle_appctx(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct spoe_agent *agent;
+
+ if (SPOE_APPCTX(appctx) == NULL)
+ return;
+
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_NONE;
+ agent = SPOE_APPCTX(appctx)->agent;
+
+ switchstate:
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - appctx-state=%s\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, appctx, spoe_appctx_state_str[appctx->st0]);
+
+ switch (appctx->st0) {
+ case SPOE_APPCTX_ST_CONNECT:
+ if (spoe_handle_connect_appctx(appctx))
+ goto out;
+ goto switchstate;
+
+ case SPOE_APPCTX_ST_CONNECTING:
+ if (spoe_handle_connecting_appctx(appctx))
+ goto out;
+ goto switchstate;
+
+ case SPOE_APPCTX_ST_IDLE:
+ _HA_ATOMIC_DEC(&agent->counters.idles);
+ eb32_delete(&SPOE_APPCTX(appctx)->node);
+ if (stopping &&
+ LIST_ISEMPTY(&agent->rt[tid].sending_queue) &&
+ LIST_ISEMPTY(&SPOE_APPCTX(appctx)->waiting_queue)) {
+ SPOE_APPCTX(appctx)->task->expire =
+ tick_add_ifset(now_ms, agent->timeout.idle);
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECT;
+ goto switchstate;
+ }
+ appctx->st0 = SPOE_APPCTX_ST_PROCESSING;
+ /* fall through */
+
+ case SPOE_APPCTX_ST_PROCESSING:
+ case SPOE_APPCTX_ST_SENDING_FRAG_NOTIFY:
+ case SPOE_APPCTX_ST_WAITING_SYNC_ACK:
+ if (spoe_handle_processing_appctx(appctx))
+ goto out;
+ goto switchstate;
+
+ case SPOE_APPCTX_ST_DISCONNECT:
+ if (spoe_handle_disconnect_appctx(appctx))
+ goto out;
+ goto switchstate;
+
+ case SPOE_APPCTX_ST_DISCONNECTING:
+ if (spoe_handle_disconnecting_appctx(appctx))
+ goto out;
+ goto switchstate;
+
+ case SPOE_APPCTX_ST_EXIT:
+ appctx->st0 = SPOE_APPCTX_ST_END;
+ SPOE_APPCTX(appctx)->task->expire = TICK_ETERNITY;
+
+ sc_shutw(sc);
+ sc_shutr(sc);
+ sc_ic(sc)->flags |= CF_READ_NULL;
+ /* fall through */
+
+ case SPOE_APPCTX_ST_END:
+ return;
+ }
+ out:
+ if (stopping)
+ spoe_wakeup_appctx(appctx);
+
+ if (SPOE_APPCTX(appctx)->task->expire != TICK_ETERNITY)
+ task_queue(SPOE_APPCTX(appctx)->task);
+}
+
+struct applet spoe_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<SPOE>", /* used for logging */
+ .fct = spoe_handle_appctx,
+ .init = spoe_init_appctx,
+ .release = spoe_release_appctx,
+};
+
+/* Create a SPOE applet. On success, the created applet is returned, else
+ * NULL. */
+static struct appctx *
+spoe_create_appctx(struct spoe_config *conf)
+{
+ struct spoe_appctx *spoe_appctx;
+ struct appctx *appctx;
+
+ spoe_appctx = pool_zalloc(pool_head_spoe_appctx);
+ if (spoe_appctx == NULL)
+ goto out_error;
+
+ spoe_appctx->agent = conf->agent;
+ spoe_appctx->version = 0;
+ spoe_appctx->max_frame_size = conf->agent->max_frame_size;
+ spoe_appctx->flags = 0;
+ spoe_appctx->status_code = SPOE_FRM_ERR_NONE;
+ spoe_appctx->buffer = BUF_NULL;
+ spoe_appctx->cur_fpa = 0;
+ LIST_INIT(&spoe_appctx->list);
+ LIST_INIT(&spoe_appctx->waiting_queue);
+
+
+ if ((appctx = appctx_new_here(&spoe_applet, NULL)) == NULL)
+ goto out_free_spoe_appctx;
+
+ appctx->svcctx = spoe_appctx;
+ if (appctx_init(appctx) == -1)
+ goto out_free_appctx;
+
+ appctx_wakeup(appctx);
+ return appctx;
+
+ /* Error unrolling */
+ out_free_appctx:
+ appctx_free_on_early_error(appctx);
+ out_free_spoe_appctx:
+ pool_free(pool_head_spoe_appctx, spoe_appctx);
+ out_error:
+ return NULL;
+}
+
+static int
+spoe_queue_context(struct spoe_context *ctx)
+{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
+ struct appctx *appctx;
+ struct spoe_appctx *spoe_appctx;
+
+ /* Check if we need to create a new SPOE applet or not. */
+ if (!eb_is_empty(&agent->rt[tid].idle_applets) &&
+ (agent->rt[tid].processing == 1 || agent->rt[tid].processing < read_freq_ctr(&agent->rt[tid].processing_per_sec)))
+ goto end;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - try to create new SPOE appctx\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id, __FUNCTION__,
+ ctx->strm);
+
+ /* Do not try to create a new applet if there is no server up for the
+ * agent's backend. */
+ if (!agent->b.be->srv_act && !agent->b.be->srv_bck) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - cannot create SPOE appctx: no server up\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, ctx->strm);
+ goto end;
+ }
+
+ /* Do not try to create a new applet if we have reached the maximum of
+ * connection per seconds */
+ if (agent->cps_max > 0) {
+ if (!freq_ctr_remain(&agent->rt[tid].conn_per_sec, agent->cps_max, 0)) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - cannot create SPOE appctx: max CPS reached\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, ctx->strm);
+ goto end;
+ }
+ }
+
+ appctx = spoe_create_appctx(conf);
+ if (appctx == NULL) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - failed to create SPOE appctx\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, ctx->strm);
+ send_log(&conf->agent_fe, LOG_EMERG,
+ "SPOE: [%s] failed to create SPOE applet\n",
+ agent->id);
+
+ goto end;
+ }
+
+ /* Increase the per-process number of cumulated connections */
+ if (agent->cps_max > 0)
+ update_freq_ctr(&agent->rt[tid].conn_per_sec, 1);
+
+ end:
+ /* The only reason to return an error is when there is no applet */
+ if (LIST_ISEMPTY(&agent->rt[tid].applets)) {
+ ctx->status_code = SPOE_CTX_ERR_RES;
+ return -1;
+ }
+
+ /* Add the SPOE context in the sending queue if the stream has no applet
+ * already assigned and wakeup all idle applets. Otherwise, don't queue
+ * it. */
+ _HA_ATOMIC_INC(&agent->counters.nb_sending);
+ spoe_update_stat_time(&ctx->stats.tv_request, &ctx->stats.t_request);
+ ctx->stats.tv_queue = now;
+ if (ctx->spoe_appctx)
+ return 1;
+ LIST_APPEND(&agent->rt[tid].sending_queue, &ctx->list);
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - Add stream in sending queue"
+ " - applets=%u - idles=%u - processing=%u\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id, __FUNCTION__,
+ ctx->strm, agent->counters.applets, agent->counters.idles,
+ agent->rt[tid].processing);
+
+ /* Finally try to wakeup an IDLE applet. */
+ if (!eb_is_empty(&agent->rt[tid].idle_applets)) {
+ struct eb32_node *node;
+
+ node = eb32_first(&agent->rt[tid].idle_applets);
+ spoe_appctx = eb32_entry(node, struct spoe_appctx, node);
+ if (node && spoe_appctx) {
+ eb32_delete(&spoe_appctx->node);
+ spoe_appctx->node.key++;
+ eb32_insert(&agent->rt[tid].idle_applets, &spoe_appctx->node);
+ spoe_wakeup_appctx(spoe_appctx->owner);
+ }
+ }
+ return 1;
+}
+
+/***************************************************************************
+ * Functions that encode SPOE messages
+ **************************************************************************/
+/* Encode a SPOE message. Info in <ctx->frag_ctx>, if any, are used to handle
+ * fragmented_content. If the next message can be processed, it returns 0. If
+ * the message is too big, it returns -1.*/
+static int
+spoe_encode_message(struct stream *s, struct spoe_context *ctx,
+ struct spoe_message *msg, int dir,
+ char **buf, char *end)
+{
+ struct sample *smp;
+ struct spoe_arg *arg;
+ int ret;
+
+ if (msg->cond) {
+ ret = acl_exec_cond(msg->cond, s->be, s->sess, s, dir|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (msg->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+
+ /* the rule does not match */
+ if (!ret)
+ goto next;
+ }
+
+ /* Resume encoding of a SPOE argument */
+ if (ctx->frag_ctx.curarg != NULL) {
+ arg = ctx->frag_ctx.curarg;
+ goto encode_argument;
+ }
+
+ if (ctx->frag_ctx.curoff != UINT_MAX)
+ goto encode_msg_payload;
+
+ /* Check if there is enough space for the message name and the
+ * number of arguments. It implies <msg->id_len> is encoded on 2
+ * bytes, at most (< 2288). */
+ if (*buf + 2 + msg->id_len + 1 > end)
+ goto too_big;
+
+ /* Encode the message name */
+ if (spoe_encode_buffer(msg->id, msg->id_len, buf, end) == -1)
+ goto too_big;
+
+ /* Set the number of arguments for this message */
+ **buf = msg->nargs;
+ (*buf)++;
+
+ ctx->frag_ctx.curoff = 0;
+ encode_msg_payload:
+
+ /* Loop on arguments */
+ list_for_each_entry(arg, &msg->args, list) {
+ ctx->frag_ctx.curarg = arg;
+ ctx->frag_ctx.curoff = UINT_MAX;
+ ctx->frag_ctx.curlen = 0;
+
+ encode_argument:
+ if (ctx->frag_ctx.curoff != UINT_MAX)
+ goto encode_arg_value;
+
+ /* Encode the argument name as a string. It can by NULL */
+ if (spoe_encode_buffer(arg->name, arg->name_len, buf, end) == -1)
+ goto too_big;
+
+ ctx->frag_ctx.curoff = 0;
+ encode_arg_value:
+
+ /* Fetch the argument value */
+ smp = sample_process(s->be, s->sess, s, dir|SMP_OPT_FINAL, arg->expr, NULL);
+ if (smp) {
+ smp->ctx.a[0] = &ctx->frag_ctx.curlen;
+ smp->ctx.a[1] = &ctx->frag_ctx.curoff;
+ }
+ ret = spoe_encode_data(smp, buf, end);
+ if (ret == -1 || ctx->frag_ctx.curoff)
+ goto too_big;
+ }
+
+ next:
+ return 0;
+
+ too_big:
+ return -1;
+}
+
+/* Encode list of SPOE messages. Info in <ctx->frag_ctx>, if any, are used to
+ * handle fragmented content. On success it returns 1. If an error occurred, -1
+ * is returned. If nothing has been encoded, it returns 0 (this is only possible
+ * for unfragmented payload). */
+static int
+spoe_encode_messages(struct stream *s, struct spoe_context *ctx,
+ struct list *messages, int dir, int type)
+{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
+ struct spoe_message *msg;
+ char *p, *end;
+
+ p = b_head(&ctx->buffer);
+ end = p + agent->rt[tid].frame_size - FRAME_HDR_SIZE;
+
+ if (type == SPOE_MSGS_BY_EVENT) { /* Loop on messages by event */
+ /* Resume encoding of a SPOE message */
+ if (ctx->frag_ctx.curmsg != NULL) {
+ msg = ctx->frag_ctx.curmsg;
+ goto encode_evt_message;
+ }
+
+ list_for_each_entry(msg, messages, by_evt) {
+ ctx->frag_ctx.curmsg = msg;
+ ctx->frag_ctx.curarg = NULL;
+ ctx->frag_ctx.curoff = UINT_MAX;
+
+ encode_evt_message:
+ if (spoe_encode_message(s, ctx, msg, dir, &p, end) == -1)
+ goto too_big;
+ }
+ }
+ else if (type == SPOE_MSGS_BY_GROUP) { /* Loop on messages by group */
+ /* Resume encoding of a SPOE message */
+ if (ctx->frag_ctx.curmsg != NULL) {
+ msg = ctx->frag_ctx.curmsg;
+ goto encode_grp_message;
+ }
+
+ list_for_each_entry(msg, messages, by_grp) {
+ ctx->frag_ctx.curmsg = msg;
+ ctx->frag_ctx.curarg = NULL;
+ ctx->frag_ctx.curoff = UINT_MAX;
+
+ encode_grp_message:
+ if (spoe_encode_message(s, ctx, msg, dir, &p, end) == -1)
+ goto too_big;
+ }
+ }
+ else
+ goto skip;
+
+
+ /* nothing has been encoded for an unfragmented payload */
+ if (!(ctx->flags & SPOE_CTX_FL_FRAGMENTED) && p == b_head(&ctx->buffer))
+ goto skip;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - encode %s messages - spoe_appctx=%p"
+ "- max_size=%u - encoded=%ld\n",
+ (int)now.tv_sec, (int)now.tv_usec,
+ agent->id, __FUNCTION__, s,
+ ((ctx->flags & SPOE_CTX_FL_FRAGMENTED) ? "last fragment of" : "unfragmented"),
+ ctx->spoe_appctx, (agent->rt[tid].frame_size - FRAME_HDR_SIZE),
+ p - b_head(&ctx->buffer));
+
+ b_set_data(&ctx->buffer, p - b_head(&ctx->buffer));
+ ctx->frag_ctx.curmsg = NULL;
+ ctx->frag_ctx.curarg = NULL;
+ ctx->frag_ctx.curoff = 0;
+ ctx->frag_ctx.flags = SPOE_FRM_FL_FIN;
+
+ return 1;
+
+ too_big:
+ /* Return an error if fragmentation is unsupported or if nothing has
+ * been encoded because its too big and not splittable. */
+ if (!(agent->flags & SPOE_FL_SND_FRAGMENTATION) || p == b_head(&ctx->buffer)) {
+ ctx->status_code = SPOE_CTX_ERR_TOO_BIG;
+ return -1;
+ }
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - encode fragmented messages - spoe_appctx=%p"
+ " - curmsg=%p - curarg=%p - curoff=%u"
+ " - max_size=%u - encoded=%ld\n",
+ (int)now.tv_sec, (int)now.tv_usec,
+ agent->id, __FUNCTION__, s, ctx->spoe_appctx,
+ ctx->frag_ctx.curmsg, ctx->frag_ctx.curarg, ctx->frag_ctx.curoff,
+ (agent->rt[tid].frame_size - FRAME_HDR_SIZE), p - b_head(&ctx->buffer));
+
+ b_set_data(&ctx->buffer, p - b_head(&ctx->buffer));
+ ctx->flags |= SPOE_CTX_FL_FRAGMENTED;
+ ctx->frag_ctx.flags &= ~SPOE_FRM_FL_FIN;
+ return 1;
+
+ skip:
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - skip the frame because nothing has been encoded\n",
+ (int)now.tv_sec, (int)now.tv_usec,
+ agent->id, __FUNCTION__, s);
+ return 0;
+}
+
+
+/***************************************************************************
+ * Functions that handle SPOE actions
+ **************************************************************************/
+/* Helper function to set a variable */
+static void
+spoe_set_var(struct spoe_context *ctx, char *scope, char *name, int len,
+ struct sample *smp)
+{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
+ char varname[64];
+
+ memset(varname, 0, sizeof(varname));
+ len = snprintf(varname, sizeof(varname), "%s.%s.%.*s",
+ scope, agent->var_pfx, len, name);
+ if (agent->flags & SPOE_FL_FORCE_SET_VAR)
+ vars_set_by_name(varname, len, smp);
+ else
+ vars_set_by_name_ifexist(varname, len, smp);
+}
+
+/* Helper function to unset a variable */
+static void
+spoe_unset_var(struct spoe_context *ctx, char *scope, char *name, int len,
+ struct sample *smp)
+{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
+ char varname[64];
+
+ memset(varname, 0, sizeof(varname));
+ len = snprintf(varname, sizeof(varname), "%s.%s.%.*s",
+ scope, agent->var_pfx, len, name);
+ vars_unset_by_name_ifexist(varname, len, smp);
+}
+
+
+static inline int
+spoe_decode_action_set_var(struct stream *s, struct spoe_context *ctx,
+ char **buf, char *end, int dir)
+{
+ char *str, *scope, *p = *buf;
+ struct sample smp;
+ uint64_t sz;
+ int ret;
+
+ if (p + 2 >= end)
+ goto skip;
+
+ /* SET-VAR requires 3 arguments */
+ if (*p++ != 3)
+ goto skip;
+
+ switch (*p++) {
+ case SPOE_SCOPE_PROC: scope = "proc"; break;
+ case SPOE_SCOPE_SESS: scope = "sess"; break;
+ case SPOE_SCOPE_TXN : scope = "txn"; break;
+ case SPOE_SCOPE_REQ : scope = "req"; break;
+ case SPOE_SCOPE_RES : scope = "res"; break;
+ default: goto skip;
+ }
+
+ if (spoe_decode_buffer(&p, end, &str, &sz) == -1)
+ goto skip;
+ memset(&smp, 0, sizeof(smp));
+ smp_set_owner(&smp, s->be, s->sess, s, dir|SMP_OPT_FINAL);
+
+ if (spoe_decode_data(&p, end, &smp) == -1)
+ goto skip;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - set-var '%s.%s.%.*s'\n",
+ (int)now.tv_sec, (int)now.tv_usec,
+ ((struct spoe_config *)FLT_CONF(ctx->filter))->agent->id,
+ __FUNCTION__, s, scope,
+ ((struct spoe_config *)FLT_CONF(ctx->filter))->agent->var_pfx,
+ (int)sz, str);
+
+ if (smp.data.type == SMP_T_ANY)
+ spoe_unset_var(ctx, scope, str, sz, &smp);
+ else
+ spoe_set_var(ctx, scope, str, sz, &smp);
+
+ ret = (p - *buf);
+ *buf = p;
+ return ret;
+ skip:
+ return 0;
+}
+
+static inline int
+spoe_decode_action_unset_var(struct stream *s, struct spoe_context *ctx,
+ char **buf, char *end, int dir)
+{
+ char *str, *scope, *p = *buf;
+ struct sample smp;
+ uint64_t sz;
+ int ret;
+
+ if (p + 2 >= end)
+ goto skip;
+
+ /* UNSET-VAR requires 2 arguments */
+ if (*p++ != 2)
+ goto skip;
+
+ switch (*p++) {
+ case SPOE_SCOPE_PROC: scope = "proc"; break;
+ case SPOE_SCOPE_SESS: scope = "sess"; break;
+ case SPOE_SCOPE_TXN : scope = "txn"; break;
+ case SPOE_SCOPE_REQ : scope = "req"; break;
+ case SPOE_SCOPE_RES : scope = "res"; break;
+ default: goto skip;
+ }
+
+ if (spoe_decode_buffer(&p, end, &str, &sz) == -1)
+ goto skip;
+ memset(&smp, 0, sizeof(smp));
+ smp_set_owner(&smp, s->be, s->sess, s, dir|SMP_OPT_FINAL);
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - unset-var '%s.%s.%.*s'\n",
+ (int)now.tv_sec, (int)now.tv_usec,
+ ((struct spoe_config *)FLT_CONF(ctx->filter))->agent->id,
+ __FUNCTION__, s, scope,
+ ((struct spoe_config *)FLT_CONF(ctx->filter))->agent->var_pfx,
+ (int)sz, str);
+
+ spoe_unset_var(ctx, scope, str, sz, &smp);
+
+ ret = (p - *buf);
+ *buf = p;
+ return ret;
+ skip:
+ return 0;
+}
+
+/* Process SPOE actions for a specific event. It returns 1 on success. If an
+ * error occurred, 0 is returned. */
+static int
+spoe_process_actions(struct stream *s, struct spoe_context *ctx, int dir)
+{
+ char *p, *end;
+ int ret;
+
+ p = b_head(&ctx->buffer);
+ end = p + b_data(&ctx->buffer);
+
+ while (p < end) {
+ enum spoe_action_type type;
+
+ type = *p++;
+ switch (type) {
+ case SPOE_ACT_T_SET_VAR:
+ ret = spoe_decode_action_set_var(s, ctx, &p, end, dir);
+ if (!ret)
+ goto skip;
+ break;
+
+ case SPOE_ACT_T_UNSET_VAR:
+ ret = spoe_decode_action_unset_var(s, ctx, &p, end, dir);
+ if (!ret)
+ goto skip;
+ break;
+
+ default:
+ goto skip;
+ }
+ }
+
+ return 1;
+ skip:
+ return 0;
+}
+
+/***************************************************************************
+ * Functions that process SPOE events
+ **************************************************************************/
+static void
+spoe_update_stats(struct stream *s, struct spoe_agent *agent,
+ struct spoe_context *ctx, int dir)
+{
+ if (!tv_iszero(&ctx->stats.tv_start)) {
+ spoe_update_stat_time(&ctx->stats.tv_start, &ctx->stats.t_process);
+ ctx->stats.t_total += ctx->stats.t_process;
+ tv_zero(&ctx->stats.tv_request);
+ tv_zero(&ctx->stats.tv_queue);
+ tv_zero(&ctx->stats.tv_wait);
+ tv_zero(&ctx->stats.tv_response);
+ }
+
+ if (agent->var_t_process) {
+ struct sample smp;
+
+ memset(&smp, 0, sizeof(smp));
+ smp_set_owner(&smp, s->be, s->sess, s, dir|SMP_OPT_FINAL);
+ smp.data.u.sint = ctx->stats.t_process;
+ smp.data.type = SMP_T_SINT;
+
+ spoe_set_var(ctx, "txn", agent->var_t_process,
+ strlen(agent->var_t_process), &smp);
+ }
+
+ if (agent->var_t_total) {
+ struct sample smp;
+
+ memset(&smp, 0, sizeof(smp));
+ smp_set_owner(&smp, s->be, s->sess, s, dir|SMP_OPT_FINAL);
+ smp.data.u.sint = ctx->stats.t_total;
+ smp.data.type = SMP_T_SINT;
+
+ spoe_set_var(ctx, "txn", agent->var_t_total,
+ strlen(agent->var_t_total), &smp);
+ }
+}
+
+static void
+spoe_handle_processing_error(struct stream *s, struct spoe_agent *agent,
+ struct spoe_context *ctx, int dir)
+{
+ if (agent->eps_max > 0)
+ update_freq_ctr(&agent->rt[tid].err_per_sec, 1);
+
+ if (agent->var_on_error) {
+ struct sample smp;
+
+ memset(&smp, 0, sizeof(smp));
+ smp_set_owner(&smp, s->be, s->sess, s, dir|SMP_OPT_FINAL);
+ smp.data.u.sint = ctx->status_code;
+ smp.data.type = SMP_T_BOOL;
+
+ spoe_set_var(ctx, "txn", agent->var_on_error,
+ strlen(agent->var_on_error), &smp);
+ }
+
+ ctx->state = ((agent->flags & SPOE_FL_CONT_ON_ERR)
+ ? SPOE_CTX_ST_READY
+ : SPOE_CTX_ST_NONE);
+}
+
+static inline int
+spoe_start_processing(struct spoe_agent *agent, struct spoe_context *ctx, int dir)
+{
+ /* If a process is already started for this SPOE context, retry
+ * later. */
+ if (ctx->flags & SPOE_CTX_FL_PROCESS)
+ return 0;
+
+ agent->rt[tid].processing++;
+ ctx->stats.tv_start = now;
+ ctx->stats.tv_request = now;
+ ctx->stats.t_request = -1;
+ ctx->stats.t_queue = -1;
+ ctx->stats.t_waiting = -1;
+ ctx->stats.t_response = -1;
+ ctx->stats.t_process = -1;
+
+ ctx->status_code = 0;
+
+ /* Set the right flag to prevent request and response processing
+ * in same time. */
+ ctx->flags |= ((dir == SMP_OPT_DIR_REQ)
+ ? SPOE_CTX_FL_REQ_PROCESS
+ : SPOE_CTX_FL_RSP_PROCESS);
+ return 1;
+}
+
+static inline void
+spoe_stop_processing(struct spoe_agent *agent, struct spoe_context *ctx)
+{
+ struct spoe_appctx *sa = ctx->spoe_appctx;
+
+ if (!(ctx->flags & SPOE_CTX_FL_PROCESS))
+ return;
+ _HA_ATOMIC_INC(&agent->counters.nb_processed);
+ if (sa) {
+ if (sa->frag_ctx.ctx == ctx) {
+ sa->frag_ctx.ctx = NULL;
+ spoe_wakeup_appctx(sa->owner);
+ }
+ else
+ sa->cur_fpa--;
+ }
+
+ /* Reset the flag to allow next processing */
+ agent->rt[tid].processing--;
+ ctx->flags &= ~(SPOE_CTX_FL_PROCESS|SPOE_CTX_FL_FRAGMENTED);
+
+ /* Reset processing timer */
+ ctx->process_exp = TICK_ETERNITY;
+
+ spoe_release_buffer(&ctx->buffer, &ctx->buffer_wait);
+
+ ctx->spoe_appctx = NULL;
+ ctx->frag_ctx.curmsg = NULL;
+ ctx->frag_ctx.curarg = NULL;
+ ctx->frag_ctx.curoff = 0;
+ ctx->frag_ctx.flags = 0;
+
+ if (!LIST_ISEMPTY(&ctx->list)) {
+ if (ctx->state == SPOE_CTX_ST_SENDING_MSGS)
+ _HA_ATOMIC_DEC(&agent->counters.nb_sending);
+ else
+ _HA_ATOMIC_DEC(&agent->counters.nb_waiting);
+
+ LIST_DELETE(&ctx->list);
+ LIST_INIT(&ctx->list);
+ }
+}
+
+/* Process a list of SPOE messages. First, this functions will process messages
+ * and send them to an agent in a NOTIFY frame. Then, it will wait a ACK frame
+ * to process corresponding actions. During all the processing, it returns 0
+ * and it returns 1 when the processing is finished. If an error occurred, -1
+ * is returned. */
+static int
+spoe_process_messages(struct stream *s, struct spoe_context *ctx,
+ struct list *messages, int dir, int type)
+{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
+ int ret = 1;
+
+ if (ctx->state == SPOE_CTX_ST_ERROR)
+ goto end;
+
+ if (tick_is_expired(ctx->process_exp, now_ms) && ctx->state != SPOE_CTX_ST_DONE) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - failed to process messages: timeout\n",
+ (int)now.tv_sec, (int)now.tv_usec,
+ agent->id, __FUNCTION__, s);
+ ctx->status_code = SPOE_CTX_ERR_TOUT;
+ goto end;
+ }
+
+ if (ctx->state == SPOE_CTX_ST_READY) {
+ if (agent->eps_max > 0) {
+ if (!freq_ctr_remain(&agent->rt[tid].err_per_sec, agent->eps_max, 0)) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - skip processing of messages: max EPS reached\n",
+ (int)now.tv_sec, (int)now.tv_usec,
+ agent->id, __FUNCTION__, s);
+ goto skip;
+ }
+ }
+
+ if (!tick_isset(ctx->process_exp)) {
+ ctx->process_exp = tick_add_ifset(now_ms, agent->timeout.processing);
+ s->task->expire = tick_first((tick_is_expired(s->task->expire, now_ms) ? 0 : s->task->expire),
+ ctx->process_exp);
+ }
+ ret = spoe_start_processing(agent, ctx, dir);
+ if (!ret)
+ goto out;
+
+ ctx->state = SPOE_CTX_ST_ENCODING_MSGS;
+ /* fall through */
+ }
+
+ if (ctx->state == SPOE_CTX_ST_ENCODING_MSGS) {
+ if (tv_iszero(&ctx->stats.tv_request))
+ ctx->stats.tv_request = now;
+ if (!spoe_acquire_buffer(&ctx->buffer, &ctx->buffer_wait))
+ goto out;
+ ret = spoe_encode_messages(s, ctx, messages, dir, type);
+ if (ret < 0)
+ goto end;
+ if (!ret)
+ goto skip;
+ if (spoe_queue_context(ctx) < 0)
+ goto end;
+ ctx->state = SPOE_CTX_ST_SENDING_MSGS;
+ }
+
+ if (ctx->state == SPOE_CTX_ST_SENDING_MSGS) {
+ if (ctx->spoe_appctx)
+ spoe_wakeup_appctx(ctx->spoe_appctx->owner);
+ ret = 0;
+ goto out;
+ }
+
+ if (ctx->state == SPOE_CTX_ST_WAITING_ACK) {
+ ret = 0;
+ goto out;
+ }
+
+ if (ctx->state == SPOE_CTX_ST_DONE) {
+ spoe_process_actions(s, ctx, dir);
+ ret = 1;
+ ctx->frame_id++;
+ ctx->state = SPOE_CTX_ST_READY;
+ spoe_update_stat_time(&ctx->stats.tv_response, &ctx->stats.t_response);
+ goto end;
+ }
+
+ out:
+ return ret;
+
+ skip:
+ tv_zero(&ctx->stats.tv_start);
+ ctx->state = SPOE_CTX_ST_READY;
+ spoe_stop_processing(agent, ctx);
+ return 1;
+
+ end:
+ spoe_update_stats(s, agent, ctx, dir);
+ spoe_stop_processing(agent, ctx);
+ if (ctx->status_code) {
+ _HA_ATOMIC_INC(&agent->counters.nb_errors);
+ spoe_handle_processing_error(s, agent, ctx, dir);
+ ret = 1;
+ }
+ return ret;
+}
+
+/* Process a SPOE group, ie the list of messages attached to the group <grp>.
+ * See spoe_process_message for details. */
+static int
+spoe_process_group(struct stream *s, struct spoe_context *ctx,
+ struct spoe_group *group, int dir)
+{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
+ int ret;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - ctx-state=%s - Process messages for group=%s\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, s, spoe_ctx_state_str[ctx->state],
+ group->id);
+
+ if (LIST_ISEMPTY(&group->messages))
+ return 1;
+
+ ret = spoe_process_messages(s, ctx, &group->messages, dir, SPOE_MSGS_BY_GROUP);
+ if (ret && ctx->stats.t_process != -1) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - <GROUP:%s> sid=%u st=%u %ld/%ld/%ld/%ld/%ld %u/%u %u/%u %llu/%llu %u/%u\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, s, group->id, s->uniq_id, ctx->status_code,
+ ctx->stats.t_request, ctx->stats.t_queue, ctx->stats.t_waiting,
+ ctx->stats.t_response, ctx->stats.t_process,
+ agent->counters.idles, agent->counters.applets,
+ agent->counters.nb_sending, agent->counters.nb_waiting,
+ agent->counters.nb_errors, agent->counters.nb_processed,
+ agent->rt[tid].processing, read_freq_ctr(&agent->rt[tid].processing_per_sec));
+ if (ctx->status_code || !(conf->agent_fe.options2 & PR_O2_NOLOGNORM))
+ send_log(&conf->agent_fe, (!ctx->status_code ? LOG_NOTICE : LOG_WARNING),
+ "SPOE: [%s] <GROUP:%s> sid=%u st=%u %ld/%ld/%ld/%ld/%ld %u/%u %u/%u %llu/%llu\n",
+ agent->id, group->id, s->uniq_id, ctx->status_code,
+ ctx->stats.t_request, ctx->stats.t_queue, ctx->stats.t_waiting,
+ ctx->stats.t_response, ctx->stats.t_process,
+ agent->counters.idles, agent->counters.applets,
+ agent->counters.nb_sending, agent->counters.nb_waiting,
+ agent->counters.nb_errors, agent->counters.nb_processed);
+ }
+ return ret;
+}
+
+/* Process a SPOE event, ie the list of messages attached to the event <ev>.
+ * See spoe_process_message for details. */
+static int
+spoe_process_event(struct stream *s, struct spoe_context *ctx,
+ enum spoe_event ev)
+{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
+ int dir, ret;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - ctx-state=%s - Process messages for event=%s\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, s, spoe_ctx_state_str[ctx->state],
+ spoe_event_str[ev]);
+
+ dir = ((ev < SPOE_EV_ON_SERVER_SESS) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES);
+
+ if (LIST_ISEMPTY(&(ctx->events[ev])))
+ return 1;
+
+ ret = spoe_process_messages(s, ctx, &(ctx->events[ev]), dir, SPOE_MSGS_BY_EVENT);
+ if (ret && ctx->stats.t_process != -1) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - <EVENT:%s> sid=%u st=%u %ld/%ld/%ld/%ld/%ld %u/%u %u/%u %llu/%llu %u/%u\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, s, spoe_event_str[ev], s->uniq_id, ctx->status_code,
+ ctx->stats.t_request, ctx->stats.t_queue, ctx->stats.t_waiting,
+ ctx->stats.t_response, ctx->stats.t_process,
+ agent->counters.idles, agent->counters.applets,
+ agent->counters.nb_sending, agent->counters.nb_waiting,
+ agent->counters.nb_errors, agent->counters.nb_processed,
+ agent->rt[tid].processing, read_freq_ctr(&agent->rt[tid].processing_per_sec));
+ if (ctx->status_code || !(conf->agent_fe.options2 & PR_O2_NOLOGNORM))
+ send_log(&conf->agent_fe, (!ctx->status_code ? LOG_NOTICE : LOG_WARNING),
+ "SPOE: [%s] <EVENT:%s> sid=%u st=%u %ld/%ld/%ld/%ld/%ld %u/%u %u/%u %llu/%llu\n",
+ agent->id, spoe_event_str[ev], s->uniq_id, ctx->status_code,
+ ctx->stats.t_request, ctx->stats.t_queue, ctx->stats.t_waiting,
+ ctx->stats.t_response, ctx->stats.t_process,
+ agent->counters.idles, agent->counters.applets,
+ agent->counters.nb_sending, agent->counters.nb_waiting,
+ agent->counters.nb_errors, agent->counters.nb_processed);
+ }
+ return ret;
+}
+
+/***************************************************************************
+ * Functions that create/destroy SPOE contexts
+ **************************************************************************/
+static int
+spoe_acquire_buffer(struct buffer *buf, struct buffer_wait *buffer_wait)
+{
+ if (buf->size)
+ return 1;
+
+ if (LIST_INLIST(&buffer_wait->list))
+ LIST_DEL_INIT(&buffer_wait->list);
+
+ if (b_alloc(buf))
+ return 1;
+
+ LIST_APPEND(&th_ctx->buffer_wq, &buffer_wait->list);
+ return 0;
+}
+
+static void
+spoe_release_buffer(struct buffer *buf, struct buffer_wait *buffer_wait)
+{
+ if (LIST_INLIST(&buffer_wait->list))
+ LIST_DEL_INIT(&buffer_wait->list);
+
+ /* Release the buffer if needed */
+ if (buf->size) {
+ b_free(buf);
+ offer_buffers(buffer_wait->target, 1);
+ }
+}
+
+static int
+spoe_wakeup_context(struct spoe_context *ctx)
+{
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ return 1;
+}
+
+static struct spoe_context *
+spoe_create_context(struct stream *s, struct filter *filter)
+{
+ struct spoe_config *conf = FLT_CONF(filter);
+ struct spoe_context *ctx;
+
+ ctx = pool_zalloc(pool_head_spoe_ctx);
+ if (ctx == NULL) {
+ return NULL;
+ }
+ ctx->filter = filter;
+ ctx->state = SPOE_CTX_ST_NONE;
+ ctx->status_code = SPOE_CTX_ERR_NONE;
+ ctx->flags = 0;
+ ctx->events = conf->agent->events;
+ ctx->groups = &conf->agent->groups;
+ ctx->buffer = BUF_NULL;
+ LIST_INIT(&ctx->buffer_wait.list);
+ ctx->buffer_wait.target = ctx;
+ ctx->buffer_wait.wakeup_cb = (int (*)(void *))spoe_wakeup_context;
+ LIST_INIT(&ctx->list);
+
+ ctx->stream_id = 0;
+ ctx->frame_id = 1;
+ ctx->process_exp = TICK_ETERNITY;
+
+ tv_zero(&ctx->stats.tv_start);
+ tv_zero(&ctx->stats.tv_request);
+ tv_zero(&ctx->stats.tv_queue);
+ tv_zero(&ctx->stats.tv_wait);
+ tv_zero(&ctx->stats.tv_response);
+ ctx->stats.t_request = -1;
+ ctx->stats.t_queue = -1;
+ ctx->stats.t_waiting = -1;
+ ctx->stats.t_response = -1;
+ ctx->stats.t_process = -1;
+ ctx->stats.t_total = 0;
+
+ ctx->strm = s;
+ ctx->state = SPOE_CTX_ST_READY;
+ filter->ctx = ctx;
+
+ return ctx;
+}
+
+static void
+spoe_destroy_context(struct filter *filter)
+{
+ struct spoe_config *conf = FLT_CONF(filter);
+ struct spoe_context *ctx = filter->ctx;
+
+ if (!ctx)
+ return;
+
+ spoe_stop_processing(conf->agent, ctx);
+ pool_free(pool_head_spoe_ctx, ctx);
+ filter->ctx = NULL;
+}
+
+static void
+spoe_reset_context(struct spoe_context *ctx)
+{
+ ctx->state = SPOE_CTX_ST_READY;
+ ctx->flags &= ~(SPOE_CTX_FL_PROCESS|SPOE_CTX_FL_FRAGMENTED);
+
+ tv_zero(&ctx->stats.tv_start);
+ tv_zero(&ctx->stats.tv_request);
+ tv_zero(&ctx->stats.tv_queue);
+ tv_zero(&ctx->stats.tv_wait);
+ tv_zero(&ctx->stats.tv_response);
+ ctx->stats.t_request = -1;
+ ctx->stats.t_queue = -1;
+ ctx->stats.t_waiting = -1;
+ ctx->stats.t_response = -1;
+ ctx->stats.t_process = -1;
+ ctx->stats.t_total = 0;
+}
+
+
+/***************************************************************************
+ * Hooks that manage the filter lifecycle (init/check/deinit)
+ **************************************************************************/
+/* Signal handler: Do a soft stop, wakeup SPOE applet */
+static void
+spoe_sig_stop(struct sig_handler *sh)
+{
+ struct proxy *p;
+
+ p = proxies_list;
+ while (p) {
+ struct flt_conf *fconf;
+
+ list_for_each_entry(fconf, &p->filter_configs, list) {
+ struct spoe_config *conf;
+ struct spoe_agent *agent;
+ struct spoe_appctx *spoe_appctx;
+ int i;
+
+ if (fconf->id != spoe_filter_id)
+ continue;
+
+ conf = fconf->conf;
+ agent = conf->agent;
+
+ for (i = 0; i < global.nbthread; ++i) {
+ HA_SPIN_LOCK(SPOE_APPLET_LOCK, &agent->rt[i].lock);
+ list_for_each_entry(spoe_appctx, &agent->rt[i].applets, list)
+ spoe_wakeup_appctx(spoe_appctx->owner);
+ HA_SPIN_UNLOCK(SPOE_APPLET_LOCK, &agent->rt[i].lock);
+ }
+ }
+ p = p->next;
+ }
+}
+
+
+/* Initialize the SPOE filter. Returns -1 on error, else 0. */
+static int
+spoe_init(struct proxy *px, struct flt_conf *fconf)
+{
+ struct spoe_config *conf = fconf->conf;
+
+ /* conf->agent_fe was already initialized during the config
+ * parsing. Finish initialization. */
+ conf->agent_fe.last_change = now.tv_sec;
+ conf->agent_fe.cap = PR_CAP_FE;
+ conf->agent_fe.mode = PR_MODE_TCP;
+ conf->agent_fe.maxconn = 0;
+ conf->agent_fe.options2 |= PR_O2_INDEPSTR;
+ conf->agent_fe.conn_retries = CONN_RETRIES;
+ conf->agent_fe.accept = frontend_accept;
+ conf->agent_fe.srv = NULL;
+ conf->agent_fe.timeout.client = TICK_ETERNITY;
+ conf->agent_fe.fe_req_ana = AN_REQ_SWITCHING_RULES;
+
+ if (!sighandler_registered) {
+ signal_register_fct(0, spoe_sig_stop, 0);
+ sighandler_registered = 1;
+ }
+
+ fconf->flags |= FLT_CFG_FL_HTX;
+ return 0;
+}
+
+/* Free resources allocated by the SPOE filter. */
+static void
+spoe_deinit(struct proxy *px, struct flt_conf *fconf)
+{
+ struct spoe_config *conf = fconf->conf;
+
+ if (conf) {
+ struct spoe_agent *agent = conf->agent;
+
+ spoe_release_agent(agent);
+ free(conf->id);
+ free(conf);
+ }
+ fconf->conf = NULL;
+}
+
+/* Check configuration of a SPOE filter for a specified proxy.
+ * Return 1 on error, else 0. */
+static int
+spoe_check(struct proxy *px, struct flt_conf *fconf)
+{
+ struct flt_conf *f;
+ struct spoe_config *conf = fconf->conf;
+ struct proxy *target;
+ struct logsrv *logsrv;
+ int i;
+
+ /* Check all SPOE filters for proxy <px> to be sure all SPOE agent names
+ * are uniq */
+ list_for_each_entry(f, &px->filter_configs, list) {
+ struct spoe_config *c = f->conf;
+
+ /* This is not an SPOE filter */
+ if (f->id != spoe_filter_id)
+ continue;
+ /* This is the current SPOE filter */
+ if (f == fconf)
+ continue;
+
+ /* Check engine Id. It should be uniq */
+ if (strcmp(conf->id, c->id) == 0) {
+ ha_alert("Proxy %s : duplicated name for SPOE engine '%s'.\n",
+ px->id, conf->id);
+ return 1;
+ }
+ }
+
+ target = proxy_be_by_name(conf->agent->b.name);
+ if (target == NULL) {
+ ha_alert("Proxy %s : unknown backend '%s' used by SPOE agent '%s'"
+ " declared at %s:%d.\n",
+ px->id, conf->agent->b.name, conf->agent->id,
+ conf->agent->conf.file, conf->agent->conf.line);
+ return 1;
+ }
+ if (target->mode != PR_MODE_TCP) {
+ ha_alert("Proxy %s : backend '%s' used by SPOE agent '%s' declared"
+ " at %s:%d does not support HTTP mode.\n",
+ px->id, target->id, conf->agent->id,
+ conf->agent->conf.file, conf->agent->conf.line);
+ return 1;
+ }
+
+ if ((conf->agent->rt = calloc(global.nbthread, sizeof(*conf->agent->rt))) == NULL) {
+ ha_alert("Proxy %s : out of memory initializing SPOE agent '%s' declared at %s:%d.\n",
+ px->id, conf->agent->id, conf->agent->conf.file, conf->agent->conf.line);
+ return 1;
+ }
+ for (i = 0; i < global.nbthread; ++i) {
+ conf->agent->rt[i].engine_id = NULL;
+ conf->agent->rt[i].frame_size = conf->agent->max_frame_size;
+ conf->agent->rt[i].processing = 0;
+ LIST_INIT(&conf->agent->rt[i].applets);
+ LIST_INIT(&conf->agent->rt[i].sending_queue);
+ LIST_INIT(&conf->agent->rt[i].waiting_queue);
+ HA_SPIN_INIT(&conf->agent->rt[i].lock);
+ }
+
+ list_for_each_entry(logsrv, &conf->agent_fe.logsrvs, list) {
+ if (logsrv->type == LOG_TARGET_BUFFER) {
+ struct sink *sink = sink_find(logsrv->ring_name);
+
+ if (!sink || sink->type != SINK_TYPE_BUFFER) {
+ ha_alert("Proxy %s : log server used by SPOE agent '%s' declared"
+ " at %s:%d uses unknown ring named '%s'.\n",
+ px->id, conf->agent->id, conf->agent->conf.file,
+ conf->agent->conf.line, logsrv->ring_name);
+ return 1;
+ }
+ logsrv->sink = sink;
+ }
+ }
+
+ ha_free(&conf->agent->b.name);
+ conf->agent->b.be = target;
+ return 0;
+}
+
+/* Initializes the SPOE filter for a proxy for a specific thread.
+ * Returns a negative value if an error occurs. */
+static int
+spoe_init_per_thread(struct proxy *p, struct flt_conf *fconf)
+{
+ struct spoe_config *conf = fconf->conf;
+ struct spoe_agent *agent = conf->agent;
+
+ agent->rt[tid].engine_id = generate_pseudo_uuid();
+ if (agent->rt[tid].engine_id == NULL)
+ return -1;
+ return 0;
+}
+
+/**************************************************************************
+ * Hooks attached to a stream
+ *************************************************************************/
+/* Called when a filter instance is created and attach to a stream. It creates
+ * the context that will be used to process this stream. */
+static int
+spoe_start(struct stream *s, struct filter *filter)
+{
+ struct spoe_config *conf = FLT_CONF(filter);
+ struct spoe_agent *agent = conf->agent;
+ struct spoe_context *ctx;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, s);
+
+ if ((ctx = spoe_create_context(s, filter)) == NULL) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - failed to create SPOE context\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, s);
+ send_log(&conf->agent_fe, LOG_EMERG,
+ "SPOE: [%s] failed to create SPOE context\n",
+ agent->id);
+ return 0;
+ }
+
+ if (!LIST_ISEMPTY(&ctx->events[SPOE_EV_ON_TCP_REQ_FE]))
+ filter->pre_analyzers |= AN_REQ_INSPECT_FE;
+
+ if (!LIST_ISEMPTY(&ctx->events[SPOE_EV_ON_TCP_REQ_BE]))
+ filter->pre_analyzers |= AN_REQ_INSPECT_BE;
+
+ if (!LIST_ISEMPTY(&ctx->events[SPOE_EV_ON_TCP_RSP]))
+ filter->pre_analyzers |= AN_RES_INSPECT;
+
+ if (!LIST_ISEMPTY(&ctx->events[SPOE_EV_ON_HTTP_REQ_FE]))
+ filter->pre_analyzers |= AN_REQ_HTTP_PROCESS_FE;
+
+ if (!LIST_ISEMPTY(&ctx->events[SPOE_EV_ON_HTTP_REQ_BE]))
+ filter->pre_analyzers |= AN_REQ_HTTP_PROCESS_BE;
+
+ if (!LIST_ISEMPTY(&ctx->events[SPOE_EV_ON_HTTP_RSP]))
+ filter->pre_analyzers |= AN_RES_HTTP_PROCESS_FE;
+
+ return 1;
+}
+
+/* Called when a filter instance is detached from a stream. It release the
+ * attached SPOE context. */
+static void
+spoe_stop(struct stream *s, struct filter *filter)
+{
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p\n",
+ (int)now.tv_sec, (int)now.tv_usec,
+ ((struct spoe_config *)FLT_CONF(filter))->agent->id,
+ __FUNCTION__, s);
+ spoe_destroy_context(filter);
+}
+
+
+/*
+ * Called when the stream is woken up because of expired timer.
+ */
+static void
+spoe_check_timeouts(struct stream *s, struct filter *filter)
+{
+ struct spoe_context *ctx = filter->ctx;
+
+ if (tick_is_expired(ctx->process_exp, now_ms))
+ s->pending_events |= TASK_WOKEN_MSG;
+}
+
+/* Called when we are ready to filter data on a channel */
+static int
+spoe_start_analyze(struct stream *s, struct filter *filter, struct channel *chn)
+{
+ struct spoe_context *ctx = filter->ctx;
+ int ret = 1;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p - ctx-state=%s"
+ " - ctx-flags=0x%08x\n",
+ (int)now.tv_sec, (int)now.tv_usec,
+ ((struct spoe_config *)FLT_CONF(filter))->agent->id,
+ __FUNCTION__, s, spoe_ctx_state_str[ctx->state], ctx->flags);
+
+ if (ctx->state == SPOE_CTX_ST_NONE)
+ goto out;
+
+ if (!(chn->flags & CF_ISRESP)) {
+ if (filter->pre_analyzers & AN_REQ_INSPECT_FE)
+ chn->analysers |= AN_REQ_INSPECT_FE;
+ if (filter->pre_analyzers & AN_REQ_INSPECT_BE)
+ chn->analysers |= AN_REQ_INSPECT_BE;
+
+ if (ctx->flags & SPOE_CTX_FL_CLI_CONNECTED)
+ goto out;
+
+ ctx->stream_id = s->uniq_id;
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_CLIENT_SESS);
+ if (!ret)
+ goto out;
+ ctx->flags |= SPOE_CTX_FL_CLI_CONNECTED;
+ }
+ else {
+ if (filter->pre_analyzers & AN_RES_INSPECT)
+ chn->analysers |= AN_RES_INSPECT;
+
+ if (ctx->flags & SPOE_CTX_FL_SRV_CONNECTED)
+ goto out;
+
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_SERVER_SESS);
+ if (!ret) {
+ channel_dont_read(chn);
+ channel_dont_close(chn);
+ goto out;
+ }
+ ctx->flags |= SPOE_CTX_FL_SRV_CONNECTED;
+ }
+
+ out:
+ return ret;
+}
+
+/* Called before a processing happens on a given channel */
+static int
+spoe_chn_pre_analyze(struct stream *s, struct filter *filter,
+ struct channel *chn, unsigned an_bit)
+{
+ struct spoe_context *ctx = filter->ctx;
+ int ret = 1;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p - ctx-state=%s"
+ " - ctx-flags=0x%08x - ana=0x%08x\n",
+ (int)now.tv_sec, (int)now.tv_usec,
+ ((struct spoe_config *)FLT_CONF(filter))->agent->id,
+ __FUNCTION__, s, spoe_ctx_state_str[ctx->state],
+ ctx->flags, an_bit);
+
+ if (ctx->state == SPOE_CTX_ST_NONE)
+ goto out;
+
+ switch (an_bit) {
+ case AN_REQ_INSPECT_FE:
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_TCP_REQ_FE);
+ break;
+ case AN_REQ_INSPECT_BE:
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_TCP_REQ_BE);
+ break;
+ case AN_RES_INSPECT:
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_TCP_RSP);
+ break;
+ case AN_REQ_HTTP_PROCESS_FE:
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_HTTP_REQ_FE);
+ break;
+ case AN_REQ_HTTP_PROCESS_BE:
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_HTTP_REQ_BE);
+ break;
+ case AN_RES_HTTP_PROCESS_FE:
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_HTTP_RSP);
+ break;
+ }
+
+ out:
+ if (!ret && (chn->flags & CF_ISRESP)) {
+ channel_dont_read(chn);
+ channel_dont_close(chn);
+ }
+ return ret;
+}
+
+/* Called when the filtering on the channel ends. */
+static int
+spoe_end_analyze(struct stream *s, struct filter *filter, struct channel *chn)
+{
+ struct spoe_context *ctx = filter->ctx;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p - ctx-state=%s"
+ " - ctx-flags=0x%08x\n",
+ (int)now.tv_sec, (int)now.tv_usec,
+ ((struct spoe_config *)FLT_CONF(filter))->agent->id,
+ __FUNCTION__, s, spoe_ctx_state_str[ctx->state], ctx->flags);
+
+ if (!(ctx->flags & SPOE_CTX_FL_PROCESS)) {
+ spoe_reset_context(ctx);
+ }
+
+ return 1;
+}
+
+/********************************************************************
+ * Functions that manage the filter initialization
+ ********************************************************************/
+struct flt_ops spoe_ops = {
+ /* Manage SPOE filter, called for each filter declaration */
+ .init = spoe_init,
+ .deinit = spoe_deinit,
+ .check = spoe_check,
+ .init_per_thread = spoe_init_per_thread,
+
+ /* Handle start/stop of SPOE */
+ .attach = spoe_start,
+ .detach = spoe_stop,
+ .check_timeouts = spoe_check_timeouts,
+
+ /* Handle channels activity */
+ .channel_start_analyze = spoe_start_analyze,
+ .channel_pre_analyze = spoe_chn_pre_analyze,
+ .channel_end_analyze = spoe_end_analyze,
+};
+
+
+static int
+cfg_parse_spoe_agent(const char *file, int linenum, char **args, int kwm)
+{
+ const char *err;
+ int i, err_code = 0;
+
+ if ((cfg_scope == NULL && curengine != NULL) ||
+ (cfg_scope != NULL && curengine == NULL) ||
+ (curengine != NULL && cfg_scope != NULL && strcmp(curengine, cfg_scope) != 0))
+ goto out;
+
+ if (strcmp(args[0], "spoe-agent") == 0) { /* new spoe-agent section */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for spoe-agent section.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (curagent != NULL) {
+ ha_alert("parsing [%s:%d] : another spoe-agent section previously defined.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ if ((curagent = calloc(1, sizeof(*curagent))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ curagent->id = strdup(args[1]);
+
+ curagent->conf.file = strdup(file);
+ curagent->conf.line = linenum;
+
+ curagent->timeout.hello = TICK_ETERNITY;
+ curagent->timeout.idle = TICK_ETERNITY;
+ curagent->timeout.processing = TICK_ETERNITY;
+
+ curagent->var_pfx = NULL;
+ curagent->var_on_error = NULL;
+ curagent->var_t_process = NULL;
+ curagent->var_t_total = NULL;
+ curagent->flags = (SPOE_FL_ASYNC | SPOE_FL_PIPELINING | SPOE_FL_SND_FRAGMENTATION);
+ curagent->cps_max = 0;
+ curagent->eps_max = 0;
+ curagent->max_frame_size = MAX_FRAME_SIZE;
+ curagent->max_fpa = 20;
+
+ for (i = 0; i < SPOE_EV_EVENTS; ++i)
+ LIST_INIT(&curagent->events[i]);
+ LIST_INIT(&curagent->groups);
+ LIST_INIT(&curagent->messages);
+ }
+ else if (strcmp(args[0], "use-backend") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects a backend name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ free(curagent->b.name);
+ curagent->b.name = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "messages") == 0) {
+ int cur_arg = 1;
+ while (*args[cur_arg]) {
+ struct spoe_placeholder *ph = NULL;
+
+ list_for_each_entry(ph, &curmphs, list) {
+ if (strcmp(ph->id, args[cur_arg]) == 0) {
+ ha_alert("parsing [%s:%d]: spoe-message '%s' already used.\n",
+ file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ if ((ph = calloc(1, sizeof(*ph))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ ph->id = strdup(args[cur_arg]);
+ LIST_APPEND(&curmphs, &ph->list);
+ cur_arg++;
+ }
+ }
+ else if (strcmp(args[0], "groups") == 0) {
+ int cur_arg = 1;
+ while (*args[cur_arg]) {
+ struct spoe_placeholder *ph = NULL;
+
+ list_for_each_entry(ph, &curgphs, list) {
+ if (strcmp(ph->id, args[cur_arg]) == 0) {
+ ha_alert("parsing [%s:%d]: spoe-group '%s' already used.\n",
+ file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ if ((ph = calloc(1, sizeof(*ph))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ ph->id = strdup(args[cur_arg]);
+ LIST_APPEND(&curgphs, &ph->list);
+ cur_arg++;
+ }
+ }
+ else if (strcmp(args[0], "timeout") == 0) {
+ unsigned int *tv = NULL;
+ const char *res;
+ unsigned timeout;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : 'timeout' expects 'hello', 'idle' and 'processing'.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(2, file, linenum, args, &err_code))
+ goto out;
+ if (strcmp(args[1], "hello") == 0)
+ tv = &curagent->timeout.hello;
+ else if (strcmp(args[1], "idle") == 0)
+ tv = &curagent->timeout.idle;
+ else if (strcmp(args[1], "processing") == 0)
+ tv = &curagent->timeout.processing;
+ else {
+ ha_alert("parsing [%s:%d] : 'timeout' supports 'hello', 'idle' or 'processing' (got %s).\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d] : 'timeout %s' expects an integer value (in milliseconds).\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ res = parse_time_err(args[2], &timeout, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s %s>, maximum value is 2147483647 ms (~24.8 days).\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s %s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d] : unexpected character '%c' in 'timeout %s'.\n",
+ file, linenum, *res, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ *tv = MS_TO_TICKS(timeout);
+ }
+ else if (strcmp(args[0], "option") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects an option name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[1], "pipelining") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == 1)
+ curagent->flags &= ~SPOE_FL_PIPELINING;
+ else
+ curagent->flags |= SPOE_FL_PIPELINING;
+ goto out;
+ }
+ else if (strcmp(args[1], "async") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == 1)
+ curagent->flags &= ~SPOE_FL_ASYNC;
+ else
+ curagent->flags |= SPOE_FL_ASYNC;
+ goto out;
+ }
+ else if (strcmp(args[1], "send-frag-payload") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == 1)
+ curagent->flags &= ~SPOE_FL_SND_FRAGMENTATION;
+ else
+ curagent->flags |= SPOE_FL_SND_FRAGMENTATION;
+ goto out;
+ }
+ else if (strcmp(args[1], "dontlog-normal") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == 1)
+ curpxopts2 &= ~PR_O2_NOLOGNORM;
+ else
+ curpxopts2 |= PR_O2_NOLOGNORM;
+ goto out;
+ }
+
+ /* Following options does not support negation */
+ if (kwm == 1) {
+ ha_alert("parsing [%s:%d]: negation is not supported for option '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[1], "var-prefix") == 0) {
+ char *tmp;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d]: '%s %s' expects a value.\n",
+ file, linenum, args[0],
+ args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(2, file, linenum, args, &err_code))
+ goto out;
+ tmp = args[2];
+ while (*tmp) {
+ if (!isalnum((unsigned char)*tmp) && *tmp != '_' && *tmp != '.') {
+ ha_alert("parsing [%s:%d]: '%s %s' only supports [a-zA-Z0-9_.] chars.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ tmp++;
+ }
+ curagent->var_pfx = strdup(args[2]);
+ }
+ else if (strcmp(args[1], "force-set-var") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ curagent->flags |= SPOE_FL_FORCE_SET_VAR;
+ }
+ else if (strcmp(args[1], "continue-on-error") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ curagent->flags |= SPOE_FL_CONT_ON_ERR;
+ }
+ else if (strcmp(args[1], "set-on-error") == 0) {
+ char *tmp;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d]: '%s %s' expects a value.\n",
+ file, linenum, args[0],
+ args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(2, file, linenum, args, &err_code))
+ goto out;
+ tmp = args[2];
+ while (*tmp) {
+ if (!isalnum((unsigned char)*tmp) && *tmp != '_' && *tmp != '.') {
+ ha_alert("parsing [%s:%d]: '%s %s' only supports [a-zA-Z0-9_.] chars.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ tmp++;
+ }
+ curagent->var_on_error = strdup(args[2]);
+ }
+ else if (strcmp(args[1], "set-process-time") == 0) {
+ char *tmp;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d]: '%s %s' expects a value.\n",
+ file, linenum, args[0],
+ args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(2, file, linenum, args, &err_code))
+ goto out;
+ tmp = args[2];
+ while (*tmp) {
+ if (!isalnum((unsigned char)*tmp) && *tmp != '_' && *tmp != '.') {
+ ha_alert("parsing [%s:%d]: '%s %s' only supports [a-zA-Z0-9_.] chars.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ tmp++;
+ }
+ curagent->var_t_process = strdup(args[2]);
+ }
+ else if (strcmp(args[1], "set-total-time") == 0) {
+ char *tmp;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d]: '%s %s' expects a value.\n",
+ file, linenum, args[0],
+ args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(2, file, linenum, args, &err_code))
+ goto out;
+ tmp = args[2];
+ while (*tmp) {
+ if (!isalnum((unsigned char)*tmp) && *tmp != '_' && *tmp != '.') {
+ ha_alert("parsing [%s:%d]: '%s %s' only supports [a-zA-Z0-9_.] chars.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ tmp++;
+ }
+ curagent->var_t_total = strdup(args[2]);
+ }
+ else {
+ ha_alert("parsing [%s:%d]: option '%s' is not supported.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "maxconnrate") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ curagent->cps_max = atol(args[1]);
+ }
+ else if (strcmp(args[0], "maxerrrate") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ curagent->eps_max = atol(args[1]);
+ }
+ else if (strcmp(args[0], "max-frame-size") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ curagent->max_frame_size = atol(args[1]);
+ if (curagent->max_frame_size < MIN_FRAME_SIZE ||
+ curagent->max_frame_size > MAX_FRAME_SIZE) {
+ ha_alert("parsing [%s:%d] : '%s' expects a positive integer argument in the range [%d, %d].\n",
+ file, linenum, args[0], MIN_FRAME_SIZE, MAX_FRAME_SIZE);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "max-waiting-frames") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ curagent->max_fpa = atol(args[1]);
+ if (curagent->max_fpa < 1) {
+ ha_alert("parsing [%s:%d] : '%s' expects a positive integer argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "register-var-names") == 0) {
+ int cur_arg;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects one or more variable names.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ cur_arg = 1;
+ while (*args[cur_arg]) {
+ struct spoe_var_placeholder *vph;
+
+ if ((vph = calloc(1, sizeof(*vph))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ if ((vph->name = strdup(args[cur_arg])) == NULL) {
+ free(vph);
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ LIST_APPEND(&curvars, &vph->list);
+ cur_arg++;
+ }
+ }
+ else if (strcmp(args[0], "log") == 0) {
+ char *errmsg = NULL;
+
+ if (!parse_logsrv(args, &curlogsrvs, (kwm == 1), file, linenum, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (*args[0]) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in spoe-agent section.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ out:
+ return err_code;
+}
+static int
+cfg_parse_spoe_group(const char *file, int linenum, char **args, int kwm)
+{
+ struct spoe_group *grp;
+ const char *err;
+ int err_code = 0;
+
+ if ((cfg_scope == NULL && curengine != NULL) ||
+ (cfg_scope != NULL && curengine == NULL) ||
+ (curengine != NULL && cfg_scope != NULL && strcmp(curengine, cfg_scope) != 0))
+ goto out;
+
+ if (strcmp(args[0], "spoe-group") == 0) { /* new spoe-group section */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for spoe-group section.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ list_for_each_entry(grp, &curgrps, list) {
+ if (strcmp(grp->id, args[1]) == 0) {
+ ha_alert("parsing [%s:%d]: spoe-group section '%s' has the same"
+ " name as another one declared at %s:%d.\n",
+ file, linenum, args[1], grp->conf.file, grp->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ if ((curgrp = calloc(1, sizeof(*curgrp))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ curgrp->id = strdup(args[1]);
+ curgrp->conf.file = strdup(file);
+ curgrp->conf.line = linenum;
+ LIST_INIT(&curgrp->phs);
+ LIST_INIT(&curgrp->messages);
+ LIST_APPEND(&curgrps, &curgrp->list);
+ }
+ else if (strcmp(args[0], "messages") == 0) {
+ int cur_arg = 1;
+ while (*args[cur_arg]) {
+ struct spoe_placeholder *ph = NULL;
+
+ list_for_each_entry(ph, &curgrp->phs, list) {
+ if (strcmp(ph->id, args[cur_arg]) == 0) {
+ ha_alert("parsing [%s:%d]: spoe-message '%s' already used.\n",
+ file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ if ((ph = calloc(1, sizeof(*ph))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ ph->id = strdup(args[cur_arg]);
+ LIST_APPEND(&curgrp->phs, &ph->list);
+ cur_arg++;
+ }
+ }
+ else if (*args[0]) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in spoe-group section.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ out:
+ return err_code;
+}
+
+static int
+cfg_parse_spoe_message(const char *file, int linenum, char **args, int kwm)
+{
+ struct spoe_message *msg;
+ struct spoe_arg *arg;
+ const char *err;
+ char *errmsg = NULL;
+ int err_code = 0;
+
+ if ((cfg_scope == NULL && curengine != NULL) ||
+ (cfg_scope != NULL && curengine == NULL) ||
+ (curengine != NULL && cfg_scope != NULL && strcmp(curengine, cfg_scope) != 0))
+ goto out;
+
+ if (strcmp(args[0], "spoe-message") == 0) { /* new spoe-message section */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for spoe-message section.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ list_for_each_entry(msg, &curmsgs, list) {
+ if (strcmp(msg->id, args[1]) == 0) {
+ ha_alert("parsing [%s:%d]: spoe-message section '%s' has the same"
+ " name as another one declared at %s:%d.\n",
+ file, linenum, args[1], msg->conf.file, msg->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ if ((curmsg = calloc(1, sizeof(*curmsg))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ curmsg->id = strdup(args[1]);
+ curmsg->id_len = strlen(curmsg->id);
+ curmsg->event = SPOE_EV_NONE;
+ curmsg->conf.file = strdup(file);
+ curmsg->conf.line = linenum;
+ curmsg->nargs = 0;
+ LIST_INIT(&curmsg->args);
+ LIST_INIT(&curmsg->acls);
+ LIST_INIT(&curmsg->by_evt);
+ LIST_INIT(&curmsg->by_grp);
+ LIST_APPEND(&curmsgs, &curmsg->list);
+ }
+ else if (strcmp(args[0], "args") == 0) {
+ int cur_arg = 1;
+
+ curproxy->conf.args.ctx = ARGC_SPOE;
+ curproxy->conf.args.file = file;
+ curproxy->conf.args.line = linenum;
+ while (*args[cur_arg]) {
+ char *delim = strchr(args[cur_arg], '=');
+ int idx = 0;
+
+ if ((arg = calloc(1, sizeof(*arg))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (!delim) {
+ arg->name = NULL;
+ arg->name_len = 0;
+ delim = args[cur_arg];
+ }
+ else {
+ arg->name = my_strndup(args[cur_arg], delim - args[cur_arg]);
+ arg->name_len = delim - args[cur_arg];
+ delim++;
+ }
+ arg->expr = sample_parse_expr((char*[]){delim, NULL},
+ &idx, file, linenum, &errmsg,
+ &curproxy->conf.args, NULL);
+ if (arg->expr == NULL) {
+ ha_alert("parsing [%s:%d] : '%s': %s.\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(arg->name);
+ free(arg);
+ goto out;
+ }
+ curmsg->nargs++;
+ LIST_APPEND(&curmsg->args, &arg->list);
+ cur_arg++;
+ }
+ curproxy->conf.args.file = NULL;
+ curproxy->conf.args.line = 0;
+ }
+ else if (strcmp(args[0], "acl") == 0) {
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in acl name '%s'.\n",
+ file, linenum, *err, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (strcasecmp(args[1], "or") == 0) {
+ ha_alert("parsing [%s:%d] : acl name '%s' will never match. 'or' is used to express a "
+ "logical disjunction within a condition.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (parse_acl((const char **)args + 1, &curmsg->acls, &errmsg, &curproxy->conf.args, file, linenum) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing ACL '%s' : %s.\n",
+ file, linenum, args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "event") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing event name.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ /* if (alertif_too_many_args(1, file, linenum, args, &err_code)) */
+ /* goto out; */
+
+ if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_CLIENT_SESS]) == 0)
+ curmsg->event = SPOE_EV_ON_CLIENT_SESS;
+ else if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_SERVER_SESS]) == 0)
+ curmsg->event = SPOE_EV_ON_SERVER_SESS;
+
+ else if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_TCP_REQ_FE]) == 0)
+ curmsg->event = SPOE_EV_ON_TCP_REQ_FE;
+ else if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_TCP_REQ_BE]) == 0)
+ curmsg->event = SPOE_EV_ON_TCP_REQ_BE;
+ else if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_TCP_RSP]) == 0)
+ curmsg->event = SPOE_EV_ON_TCP_RSP;
+
+ else if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_HTTP_REQ_FE]) == 0)
+ curmsg->event = SPOE_EV_ON_HTTP_REQ_FE;
+ else if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_HTTP_REQ_BE]) == 0)
+ curmsg->event = SPOE_EV_ON_HTTP_REQ_BE;
+ else if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_HTTP_RSP]) == 0)
+ curmsg->event = SPOE_EV_ON_HTTP_RSP;
+ else {
+ ha_alert("parsing [%s:%d] : unknown event '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[2], "if") == 0 || strcmp(args[2], "unless") == 0) {
+ struct acl_cond *cond;
+
+ cond = build_acl_cond(file, linenum, &curmsg->acls,
+ curproxy, (const char **)args+2,
+ &errmsg);
+ if (cond == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while "
+ "parsing an 'event %s' condition : %s.\n",
+ file, linenum, args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curmsg->cond = cond;
+ }
+ else if (*args[2]) {
+ ha_alert("parsing [%s:%d]: 'event %s' expects either 'if' "
+ "or 'unless' followed by a condition but found '%s'.\n",
+ file, linenum, args[1], args[2]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (!*args[0]) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in spoe-message section.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ out:
+ free(errmsg);
+ return err_code;
+}
+
+/* Return -1 on error, else 0 */
+static int
+parse_spoe_flt(char **args, int *cur_arg, struct proxy *px,
+ struct flt_conf *fconf, char **err, void *private)
+{
+ struct list backup_sections;
+ struct spoe_config *conf;
+ struct spoe_message *msg, *msgback;
+ struct spoe_group *grp, *grpback;
+ struct spoe_placeholder *ph, *phback;
+ struct spoe_var_placeholder *vph, *vphback;
+ struct logsrv *logsrv, *logsrvback;
+ char *file = NULL, *engine = NULL;
+ int ret, pos = *cur_arg + 1;
+
+ LIST_INIT(&curmsgs);
+ LIST_INIT(&curgrps);
+ LIST_INIT(&curmphs);
+ LIST_INIT(&curgphs);
+ LIST_INIT(&curvars);
+ LIST_INIT(&curlogsrvs);
+ curpxopts = 0;
+ curpxopts2 = 0;
+
+ conf = calloc(1, sizeof(*conf));
+ if (conf == NULL) {
+ memprintf(err, "%s: out of memory", args[*cur_arg]);
+ goto error;
+ }
+ conf->proxy = px;
+
+ while (*args[pos]) {
+ if (strcmp(args[pos], "config") == 0) {
+ if (!*args[pos+1]) {
+ memprintf(err, "'%s' : '%s' option without value",
+ args[*cur_arg], args[pos]);
+ goto error;
+ }
+ file = args[pos+1];
+ pos += 2;
+ }
+ else if (strcmp(args[pos], "engine") == 0) {
+ if (!*args[pos+1]) {
+ memprintf(err, "'%s' : '%s' option without value",
+ args[*cur_arg], args[pos]);
+ goto error;
+ }
+ engine = args[pos+1];
+ pos += 2;
+ }
+ else {
+ memprintf(err, "unknown keyword '%s'", args[pos]);
+ goto error;
+ }
+ }
+ if (file == NULL) {
+ memprintf(err, "'%s' : missing config file", args[*cur_arg]);
+ goto error;
+ }
+
+ /* backup sections and register SPOE sections */
+ LIST_INIT(&backup_sections);
+ cfg_backup_sections(&backup_sections);
+ cfg_register_section("spoe-agent", cfg_parse_spoe_agent, NULL);
+ cfg_register_section("spoe-group", cfg_parse_spoe_group, NULL);
+ cfg_register_section("spoe-message", cfg_parse_spoe_message, NULL);
+
+ /* Parse SPOE filter configuration file */
+ curengine = engine;
+ curproxy = px;
+ curagent = NULL;
+ curmsg = NULL;
+ ret = readcfgfile(file);
+ curproxy = NULL;
+
+ /* unregister SPOE sections and restore previous sections */
+ cfg_unregister_sections();
+ cfg_restore_sections(&backup_sections);
+
+ if (ret == -1) {
+ memprintf(err, "Could not open configuration file %s : %s",
+ file, strerror(errno));
+ goto error;
+ }
+ if (ret & (ERR_ABORT|ERR_FATAL)) {
+ memprintf(err, "Error(s) found in configuration file %s", file);
+ goto error;
+ }
+
+ /* Check SPOE agent */
+ if (curagent == NULL) {
+ memprintf(err, "No SPOE agent found in file %s", file);
+ goto error;
+ }
+ if (curagent->b.name == NULL) {
+ memprintf(err, "No backend declared for SPOE agent '%s' declared at %s:%d",
+ curagent->id, curagent->conf.file, curagent->conf.line);
+ goto error;
+ }
+ if (curagent->timeout.hello == TICK_ETERNITY ||
+ curagent->timeout.idle == TICK_ETERNITY ||
+ curagent->timeout.processing == TICK_ETERNITY) {
+ ha_warning("Proxy '%s': missing timeouts for SPOE agent '%s' declare at %s:%d.\n"
+ " | While not properly invalid, you will certainly encounter various problems\n"
+ " | with such a configuration. To fix this, please ensure that all following\n"
+ " | timeouts are set to a non-zero value: 'hello', 'idle', 'processing'.\n",
+ px->id, curagent->id, curagent->conf.file, curagent->conf.line);
+ }
+ if (curagent->var_pfx == NULL) {
+ char *tmp = curagent->id;
+
+ while (*tmp) {
+ if (!isalnum((unsigned char)*tmp) && *tmp != '_' && *tmp != '.') {
+ memprintf(err, "Invalid variable prefix '%s' for SPOE agent '%s' declared at %s:%d. "
+ "Use 'option var-prefix' to set it. Only [a-zA-Z0-9_.] chars are supported.\n",
+ curagent->id, curagent->id, curagent->conf.file, curagent->conf.line);
+ goto error;
+ }
+ tmp++;
+ }
+ curagent->var_pfx = strdup(curagent->id);
+ }
+
+ if (curagent->var_on_error) {
+ struct arg arg;
+
+ trash.data = snprintf(trash.area, trash.size, "txn.%s.%s",
+ curagent->var_pfx, curagent->var_on_error);
+
+ arg.type = ARGT_STR;
+ arg.data.str.area = trash.area;
+ arg.data.str.data = trash.data;
+ arg.data.str.size = 0; /* Set it to 0 to not release it in vars_check_arg() */
+ if (!vars_check_arg(&arg, err)) {
+ memprintf(err, "SPOE agent '%s': failed to register variable %s.%s (%s)",
+ curagent->id, curagent->var_pfx, curagent->var_on_error, *err);
+ goto error;
+ }
+ }
+
+ if (curagent->var_t_process) {
+ struct arg arg;
+
+ trash.data = snprintf(trash.area, trash.size, "txn.%s.%s",
+ curagent->var_pfx, curagent->var_t_process);
+
+ arg.type = ARGT_STR;
+ arg.data.str.area = trash.area;
+ arg.data.str.data = trash.data;
+ arg.data.str.size = 0; /* Set it to 0 to not release it in vars_check_arg() */
+ if (!vars_check_arg(&arg, err)) {
+ memprintf(err, "SPOE agent '%s': failed to register variable %s.%s (%s)",
+ curagent->id, curagent->var_pfx, curagent->var_t_process, *err);
+ goto error;
+ }
+ }
+
+ if (curagent->var_t_total) {
+ struct arg arg;
+
+ trash.data = snprintf(trash.area, trash.size, "txn.%s.%s",
+ curagent->var_pfx, curagent->var_t_total);
+
+ arg.type = ARGT_STR;
+ arg.data.str.area = trash.area;
+ arg.data.str.data = trash.data;
+ arg.data.str.size = 0; /* Set it to 0 to not release it in vars_check_arg() */
+ if (!vars_check_arg(&arg, err)) {
+ memprintf(err, "SPOE agent '%s': failed to register variable %s.%s (%s)",
+ curagent->id, curagent->var_pfx, curagent->var_t_process, *err);
+ goto error;
+ }
+ }
+
+ if (LIST_ISEMPTY(&curmphs) && LIST_ISEMPTY(&curgphs)) {
+ ha_warning("Proxy '%s': No message/group used by SPOE agent '%s' declared at %s:%d.\n",
+ px->id, curagent->id, curagent->conf.file, curagent->conf.line);
+ goto finish;
+ }
+
+ /* Replace placeholders by the corresponding messages for the SPOE
+ * agent */
+ list_for_each_entry(ph, &curmphs, list) {
+ list_for_each_entry(msg, &curmsgs, list) {
+ struct spoe_arg *arg;
+ unsigned int where;
+
+ if (strcmp(msg->id, ph->id) == 0) {
+ if ((px->cap & (PR_CAP_FE|PR_CAP_BE)) == (PR_CAP_FE|PR_CAP_BE)) {
+ if (msg->event == SPOE_EV_ON_TCP_REQ_BE)
+ msg->event = SPOE_EV_ON_TCP_REQ_FE;
+ if (msg->event == SPOE_EV_ON_HTTP_REQ_BE)
+ msg->event = SPOE_EV_ON_HTTP_REQ_FE;
+ }
+ if (!(px->cap & PR_CAP_FE) && (msg->event == SPOE_EV_ON_CLIENT_SESS ||
+ msg->event == SPOE_EV_ON_TCP_REQ_FE ||
+ msg->event == SPOE_EV_ON_HTTP_REQ_FE)) {
+ ha_warning("Proxy '%s': frontend event used on a backend proxy at %s:%d.\n",
+ px->id, msg->conf.file, msg->conf.line);
+ goto next_mph;
+ }
+ if (msg->event == SPOE_EV_NONE) {
+ ha_warning("Proxy '%s': Ignore SPOE message '%s' without event at %s:%d.\n",
+ px->id, msg->id, msg->conf.file, msg->conf.line);
+ goto next_mph;
+ }
+
+ where = 0;
+ switch (msg->event) {
+ case SPOE_EV_ON_CLIENT_SESS:
+ where |= SMP_VAL_FE_CON_ACC;
+ break;
+
+ case SPOE_EV_ON_TCP_REQ_FE:
+ where |= SMP_VAL_FE_REQ_CNT;
+ break;
+
+ case SPOE_EV_ON_HTTP_REQ_FE:
+ where |= SMP_VAL_FE_HRQ_HDR;
+ break;
+
+ case SPOE_EV_ON_TCP_REQ_BE:
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_REQ_CNT;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_REQ_CNT;
+ break;
+
+ case SPOE_EV_ON_HTTP_REQ_BE:
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+ break;
+
+ case SPOE_EV_ON_SERVER_SESS:
+ where |= SMP_VAL_BE_SRV_CON;
+ break;
+
+ case SPOE_EV_ON_TCP_RSP:
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_RES_CNT;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_RES_CNT;
+ break;
+
+ case SPOE_EV_ON_HTTP_RSP:
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRS_HDR;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRS_HDR;
+ break;
+
+ default:
+ break;
+ }
+
+ list_for_each_entry(arg, &msg->args, list) {
+ if (!(arg->expr->fetch->val & where)) {
+ memprintf(err, "Ignore SPOE message '%s' at %s:%d: "
+ "some args extract information from '%s', "
+ "none of which is available here ('%s')",
+ msg->id, msg->conf.file, msg->conf.line,
+ sample_ckp_names(arg->expr->fetch->use),
+ sample_ckp_names(where));
+ goto error;
+ }
+ }
+
+ msg->agent = curagent;
+ LIST_APPEND(&curagent->events[msg->event], &msg->by_evt);
+ goto next_mph;
+ }
+ }
+ memprintf(err, "SPOE agent '%s' try to use undefined SPOE message '%s' at %s:%d",
+ curagent->id, ph->id, curagent->conf.file, curagent->conf.line);
+ goto error;
+ next_mph:
+ continue;
+ }
+
+ /* Replace placeholders by the corresponding groups for the SPOE
+ * agent */
+ list_for_each_entry(ph, &curgphs, list) {
+ list_for_each_entry_safe(grp, grpback, &curgrps, list) {
+ if (strcmp(grp->id, ph->id) == 0) {
+ grp->agent = curagent;
+ LIST_DELETE(&grp->list);
+ LIST_APPEND(&curagent->groups, &grp->list);
+ goto next_aph;
+ }
+ }
+ memprintf(err, "SPOE agent '%s' try to use undefined SPOE group '%s' at %s:%d",
+ curagent->id, ph->id, curagent->conf.file, curagent->conf.line);
+ goto error;
+ next_aph:
+ continue;
+ }
+
+ /* Replace placeholders by the corresponding message for each SPOE
+ * group of the SPOE agent */
+ list_for_each_entry(grp, &curagent->groups, list) {
+ list_for_each_entry_safe(ph, phback, &grp->phs, list) {
+ list_for_each_entry(msg, &curmsgs, list) {
+ if (strcmp(msg->id, ph->id) == 0) {
+ if (msg->group != NULL) {
+ memprintf(err, "SPOE message '%s' already belongs to "
+ "the SPOE group '%s' declare at %s:%d",
+ msg->id, msg->group->id,
+ msg->group->conf.file,
+ msg->group->conf.line);
+ goto error;
+ }
+
+ /* Scope for arguments are not checked for now. We will check
+ * them only if a rule use the corresponding SPOE group. */
+ msg->agent = curagent;
+ msg->group = grp;
+ LIST_DELETE(&ph->list);
+ LIST_APPEND(&grp->messages, &msg->by_grp);
+ goto next_mph_grp;
+ }
+ }
+ memprintf(err, "SPOE group '%s' try to use undefined SPOE message '%s' at %s:%d",
+ grp->id, ph->id, curagent->conf.file, curagent->conf.line);
+ goto error;
+ next_mph_grp:
+ continue;
+ }
+ }
+
+ finish:
+ /* move curmsgs to the agent message list */
+ curmsgs.n->p = &curagent->messages;
+ curmsgs.p->n = &curagent->messages;
+ curagent->messages = curmsgs;
+ LIST_INIT(&curmsgs);
+
+ conf->id = strdup(engine ? engine : curagent->id);
+ conf->agent = curagent;
+ curagent->spoe_conf = conf;
+
+ /* Start agent's proxy initialization here. It will be finished during
+ * the filter init. */
+ memset(&conf->agent_fe, 0, sizeof(conf->agent_fe));
+ init_new_proxy(&conf->agent_fe);
+ conf->agent_fe.id = conf->agent->id;
+ conf->agent_fe.parent = conf->agent;
+ conf->agent_fe.options |= curpxopts;
+ conf->agent_fe.options2 |= curpxopts2;
+
+ list_for_each_entry_safe(logsrv, logsrvback, &curlogsrvs, list) {
+ LIST_DELETE(&logsrv->list);
+ LIST_APPEND(&conf->agent_fe.logsrvs, &logsrv->list);
+ }
+
+ list_for_each_entry_safe(ph, phback, &curmphs, list) {
+ LIST_DELETE(&ph->list);
+ spoe_release_placeholder(ph);
+ }
+ list_for_each_entry_safe(ph, phback, &curgphs, list) {
+ LIST_DELETE(&ph->list);
+ spoe_release_placeholder(ph);
+ }
+ list_for_each_entry_safe(vph, vphback, &curvars, list) {
+ struct arg arg;
+
+ trash.data = snprintf(trash.area, trash.size, "proc.%s.%s",
+ curagent->var_pfx, vph->name);
+
+ arg.type = ARGT_STR;
+ arg.data.str.area = trash.area;
+ arg.data.str.data = trash.data;
+ arg.data.str.size = 0; /* Set it to 0 to not release it in vars_check_arg() */
+ if (!vars_check_arg(&arg, err)) {
+ memprintf(err, "SPOE agent '%s': failed to register variable %s.%s (%s)",
+ curagent->id, curagent->var_pfx, vph->name, *err);
+ goto error;
+ }
+
+ LIST_DELETE(&vph->list);
+ free(vph->name);
+ free(vph);
+ }
+ list_for_each_entry_safe(grp, grpback, &curgrps, list) {
+ LIST_DELETE(&grp->list);
+ spoe_release_group(grp);
+ }
+ *cur_arg = pos;
+ fconf->id = spoe_filter_id;
+ fconf->ops = &spoe_ops;
+ fconf->conf = conf;
+ return 0;
+
+ error:
+ spoe_release_agent(curagent);
+ list_for_each_entry_safe(ph, phback, &curmphs, list) {
+ LIST_DELETE(&ph->list);
+ spoe_release_placeholder(ph);
+ }
+ list_for_each_entry_safe(ph, phback, &curgphs, list) {
+ LIST_DELETE(&ph->list);
+ spoe_release_placeholder(ph);
+ }
+ list_for_each_entry_safe(vph, vphback, &curvars, list) {
+ LIST_DELETE(&vph->list);
+ free(vph->name);
+ free(vph);
+ }
+ list_for_each_entry_safe(grp, grpback, &curgrps, list) {
+ LIST_DELETE(&grp->list);
+ spoe_release_group(grp);
+ }
+ list_for_each_entry_safe(msg, msgback, &curmsgs, list) {
+ LIST_DELETE(&msg->list);
+ spoe_release_message(msg);
+ }
+ list_for_each_entry_safe(logsrv, logsrvback, &curlogsrvs, list) {
+ LIST_DELETE(&logsrv->list);
+ free(logsrv);
+ }
+ free(conf);
+ return -1;
+}
+
+/* Send message of a SPOE group. This is the action_ptr callback of a rule
+ * associated to a "send-spoe-group" action.
+ *
+ * It returns ACT_RET_CONT if processing is finished (with error or not), it returns
+ * ACT_RET_YIELD if the action is in progress. */
+static enum act_return
+spoe_send_group(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct filter *filter;
+ struct spoe_agent *agent = NULL;
+ struct spoe_group *group = NULL;
+ struct spoe_context *ctx = NULL;
+ int ret, dir;
+
+ list_for_each_entry(filter, &s->strm_flt.filters, list) {
+ if (filter->config == rule->arg.act.p[0]) {
+ agent = rule->arg.act.p[2];
+ group = rule->arg.act.p[3];
+ ctx = filter->ctx;
+ break;
+ }
+ }
+ if (agent == NULL || group == NULL || ctx == NULL)
+ return ACT_RET_CONT;
+ if (ctx->state == SPOE_CTX_ST_NONE)
+ return ACT_RET_CONT;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_SES: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_REQ_CNT: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_RES_CNT: dir = SMP_OPT_DIR_RES; break;
+ case ACT_F_HTTP_REQ: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_HTTP_RES: dir = SMP_OPT_DIR_RES; break;
+ default:
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - internal error while execute spoe-send-group\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, s);
+ send_log(px, LOG_ERR, "SPOE: [%s] internal error while execute spoe-send-group\n",
+ agent->id);
+ return ACT_RET_CONT;
+ }
+
+ ret = spoe_process_group(s, ctx, group, dir);
+ if (ret == 1)
+ return ACT_RET_CONT;
+ else if (ret == 0) {
+ if (flags & ACT_OPT_FINAL) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - failed to process group '%s': interrupted by caller\n",
+ (int)now.tv_sec, (int)now.tv_usec,
+ agent->id, __FUNCTION__, s, group->id);
+ ctx->status_code = SPOE_CTX_ERR_INTERRUPT;
+ spoe_stop_processing(agent, ctx);
+ spoe_handle_processing_error(s, agent, ctx, dir);
+ return ACT_RET_CONT;
+ }
+ return ACT_RET_YIELD;
+ }
+ else
+ return ACT_RET_CONT;
+}
+
+/* Check an "send-spoe-group" action. Here, we'll try to find the real SPOE
+ * group associated to <rule>. The format of an rule using 'send-spoe-group'
+ * action should be:
+ *
+ * (http|tcp)-(request|response) send-spoe-group <engine-id> <group-id>
+ *
+ * So, we'll loop on each configured SPOE filter for the proxy <px> to find the
+ * SPOE engine matching <engine-id>. And then, we'll try to find the good group
+ * matching <group-id>. Finally, we'll check all messages referenced by the SPOE
+ * group.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+static int
+check_send_spoe_group(struct act_rule *rule, struct proxy *px, char **err)
+{
+ struct flt_conf *fconf;
+ struct spoe_config *conf;
+ struct spoe_agent *agent = NULL;
+ struct spoe_group *group;
+ struct spoe_message *msg;
+ char *engine_id = rule->arg.act.p[0];
+ char *group_id = rule->arg.act.p[1];
+ unsigned int where = 0;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_SES: where = SMP_VAL_FE_SES_ACC; break;
+ case ACT_F_TCP_REQ_CNT: where = SMP_VAL_FE_REQ_CNT; break;
+ case ACT_F_TCP_RES_CNT: where = SMP_VAL_BE_RES_CNT; break;
+ case ACT_F_HTTP_REQ: where = SMP_VAL_FE_HRQ_HDR; break;
+ case ACT_F_HTTP_RES: where = SMP_VAL_BE_HRS_HDR; break;
+ default:
+ memprintf(err,
+ "internal error, unexpected rule->from=%d, please report this bug!",
+ rule->from);
+ goto error;
+ }
+
+ /* Try to find the SPOE engine by checking all SPOE filters for proxy
+ * <px> */
+ list_for_each_entry(fconf, &px->filter_configs, list) {
+ conf = fconf->conf;
+
+ /* This is not an SPOE filter */
+ if (fconf->id != spoe_filter_id)
+ continue;
+
+ /* This is the good engine */
+ if (strcmp(conf->id, engine_id) == 0) {
+ agent = conf->agent;
+ break;
+ }
+ }
+ if (agent == NULL) {
+ memprintf(err, "unable to find SPOE engine '%s' used by the send-spoe-group '%s'",
+ engine_id, group_id);
+ goto error;
+ }
+
+ /* Try to find the right group */
+ list_for_each_entry(group, &agent->groups, list) {
+ /* This is the good group */
+ if (strcmp(group->id, group_id) == 0)
+ break;
+ }
+ if (&group->list == &agent->groups) {
+ memprintf(err, "unable to find SPOE group '%s' into SPOE engine '%s' configuration",
+ group_id, engine_id);
+ goto error;
+ }
+
+ /* Ok, we found the group, we need to check messages and their
+ * arguments */
+ list_for_each_entry(msg, &group->messages, by_grp) {
+ struct spoe_arg *arg;
+
+ list_for_each_entry(arg, &msg->args, list) {
+ if (!(arg->expr->fetch->val & where)) {
+ memprintf(err, "Invalid SPOE message '%s' used by SPOE group '%s' at %s:%d: "
+ "some args extract information from '%s',"
+ "none of which is available here ('%s')",
+ msg->id, group->id, msg->conf.file, msg->conf.line,
+ sample_ckp_names(arg->expr->fetch->use),
+ sample_ckp_names(where));
+ goto error;
+ }
+ }
+ }
+
+ free(engine_id);
+ free(group_id);
+ rule->arg.act.p[0] = fconf; /* Associate filter config with the rule */
+ rule->arg.act.p[1] = conf; /* Associate SPOE config with the rule */
+ rule->arg.act.p[2] = agent; /* Associate SPOE agent with the rule */
+ rule->arg.act.p[3] = group; /* Associate SPOE group with the rule */
+ return 1;
+
+ error:
+ free(engine_id);
+ free(group_id);
+ return 0;
+}
+
+/* Parse 'send-spoe-group' action following the format:
+ *
+ * ... send-spoe-group <engine-id> <group-id>
+ *
+ * It returns ACT_RET_PRS_ERR if fails and <err> is filled with an error
+ * message. Otherwise, it returns ACT_RET_PRS_OK and parsing engine and group
+ * ids are saved and used later, when the rule will be checked.
+ */
+static enum act_parse_ret
+parse_send_spoe_group(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ if (!*args[*orig_arg] || !*args[*orig_arg+1] ||
+ (*args[*orig_arg+2] && strcmp(args[*orig_arg+2], "if") != 0 && strcmp(args[*orig_arg+2], "unless") != 0)) {
+ memprintf(err, "expects 2 arguments: <engine-id> <group-id>");
+ return ACT_RET_PRS_ERR;
+ }
+ rule->arg.act.p[0] = strdup(args[*orig_arg]); /* Copy the SPOE engine id */
+ rule->arg.act.p[1] = strdup(args[*orig_arg+1]); /* Cope the SPOE group id */
+
+ (*orig_arg) += 2;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = spoe_send_group;
+ rule->check_ptr = check_send_spoe_group;
+ return ACT_RET_PRS_OK;
+}
+
+
+/* Declare the filter parser for "spoe" keyword */
+static struct flt_kw_list flt_kws = { "SPOE", { }, {
+ { "spoe", parse_spoe_flt, NULL },
+ { NULL, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, flt_register_keywords, &flt_kws);
+
+/* Delcate the action parser for "spoe-action" keyword */
+static struct action_kw_list tcp_req_action_kws = { { }, {
+ { "send-spoe-group", parse_send_spoe_group },
+ { /* END */ },
+ }
+};
+
+INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_req_action_kws);
+
+static struct action_kw_list tcp_res_action_kws = { { }, {
+ { "send-spoe-group", parse_send_spoe_group },
+ { /* END */ },
+ }
+};
+
+INITCALL1(STG_REGISTER, tcp_res_cont_keywords_register, &tcp_res_action_kws);
+
+static struct action_kw_list http_req_action_kws = { { }, {
+ { "send-spoe-group", parse_send_spoe_group },
+ { /* END */ },
+ }
+};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_action_kws);
+
+static struct action_kw_list http_res_action_kws = { { }, {
+ { "send-spoe-group", parse_send_spoe_group },
+ { /* END */ },
+ }
+};
+
+INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_action_kws);
diff --git a/src/flt_trace.c b/src/flt_trace.c
new file mode 100644
index 0000000..5aabcb2
--- /dev/null
+++ b/src/flt_trace.c
@@ -0,0 +1,675 @@
+/*
+ * Stream filters related variables and functions.
+ *
+ * Copyright (C) 2015 Qualys Inc., Christopher Faulet <cfaulet@qualys.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+
+#include <haproxy/api.h>
+#include <haproxy/channel-t.h>
+#include <haproxy/errors.h>
+#include <haproxy/filters.h>
+#include <haproxy/global.h>
+#include <haproxy/http_ana-t.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/stream.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+
+const char *trace_flt_id = "trace filter";
+
+struct flt_ops trace_ops;
+
+#define TRACE_F_QUIET 0x00000001
+#define TRACE_F_RAND_FWD 0x00000002
+#define TRACE_F_HEXDUMP 0x00000004
+
+struct trace_config {
+ struct proxy *proxy;
+ char *name;
+ unsigned int flags;
+};
+
+#define FLT_TRACE(conf, fmt, ...) \
+ do { \
+ if (!(conf->flags & TRACE_F_QUIET)) \
+ fprintf(stderr, "%d.%06d [%-20s] " fmt "\n", \
+ (int)now.tv_sec, (int)now.tv_usec, (conf)->name,\
+ ##__VA_ARGS__); \
+ } while (0)
+
+#define FLT_STRM_TRACE(conf, strm, fmt, ...) \
+ do { \
+ if (!(conf->flags & TRACE_F_QUIET)) \
+ fprintf(stderr, "%d.%06d [%-20s] [strm %p(%x) 0x%08x 0x%08x] " fmt "\n", \
+ (int)now.tv_sec, (int)now.tv_usec, (conf)->name, \
+ strm, (strm ? ((struct stream *)strm)->uniq_id : ~0U), \
+ (strm ? strm->req.analysers : 0), (strm ? strm->res.analysers : 0), \
+ ##__VA_ARGS__); \
+ } while (0)
+
+
+static const char *
+channel_label(const struct channel *chn)
+{
+ return (chn->flags & CF_ISRESP) ? "RESPONSE" : "REQUEST";
+}
+
+static const char *
+proxy_mode(const struct stream *s)
+{
+ struct proxy *px = (s->flags & SF_BE_ASSIGNED ? s->be : strm_fe(s));
+
+ return ((px->mode == PR_MODE_HTTP) ? "HTTP" : "TCP");
+}
+
+static const char *
+stream_pos(const struct stream *s)
+{
+ return (s->flags & SF_BE_ASSIGNED) ? "backend" : "frontend";
+}
+
+static const char *
+filter_type(const struct filter *f)
+{
+ return (f->flags & FLT_FL_IS_BACKEND_FILTER) ? "backend" : "frontend";
+}
+
+static void
+trace_hexdump(struct ist ist)
+{
+ int i, j, padding;
+
+ padding = ((ist.len % 16) ? (16 - ist.len % 16) : 0);
+ for (i = 0; i < ist.len + padding; i++) {
+ if (!(i % 16))
+ fprintf(stderr, "\t0x%06x: ", i);
+ else if (!(i % 8))
+ fprintf(stderr, " ");
+
+ if (i < ist.len)
+ fprintf(stderr, "%02x ", (unsigned char)*(ist.ptr+i));
+ else
+ fprintf(stderr, " ");
+
+ /* print ASCII dump */
+ if (i % 16 == 15) {
+ fprintf(stderr, " |");
+ for(j = i - 15; j <= i && j < ist.len; j++)
+ fprintf(stderr, "%c", (isprint((unsigned char)*(ist.ptr+j)) ? *(ist.ptr+j) : '.'));
+ fprintf(stderr, "|\n");
+ }
+ }
+}
+
+static void
+trace_raw_hexdump(struct buffer *buf, unsigned int offset, unsigned int len)
+{
+ unsigned char p[len];
+ int block1, block2;
+
+ block1 = len;
+ if (block1 > b_contig_data(buf, offset))
+ block1 = b_contig_data(buf, offset);
+ block2 = len - block1;
+
+ memcpy(p, b_peek(buf, offset), block1);
+ memcpy(p+block1, b_orig(buf), block2);
+ trace_hexdump(ist2(p, len));
+}
+
+static void
+trace_htx_hexdump(struct htx *htx, unsigned int offset, unsigned int len)
+{
+ struct htx_blk *blk;
+
+ for (blk = htx_get_first_blk(htx); blk && len; blk = htx_get_next_blk(htx, blk)) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t sz = htx_get_blksz(blk);
+ struct ist v;
+
+ if (offset >= sz) {
+ offset -= sz;
+ continue;
+ }
+
+ v = htx_get_blk_value(htx, blk);
+ v = istadv(v, offset);
+ offset = 0;
+
+ v = isttrim(v, len);
+ len -= v.len;
+ if (type == HTX_BLK_DATA)
+ trace_hexdump(v);
+ }
+}
+
+static unsigned int
+trace_get_htx_datalen(struct htx *htx, unsigned int offset, unsigned int len)
+{
+ struct htx_blk *blk;
+ struct htx_ret htxret = htx_find_offset(htx, offset);
+ uint32_t data = 0;
+
+ blk = htxret.blk;
+ if (blk && htxret.ret && htx_get_blk_type(blk) == HTX_BLK_DATA) {
+ data += htxret.ret;
+ blk = htx_get_next_blk(htx, blk);
+ }
+ while (blk) {
+ if (htx_get_blk_type(blk) == HTX_BLK_UNUSED)
+ goto next;
+ else if (htx_get_blk_type(blk) != HTX_BLK_DATA)
+ break;
+ data += htx_get_blksz(blk);
+ next:
+ blk = htx_get_next_blk(htx, blk);
+ }
+ return data;
+}
+
+/***************************************************************************
+ * Hooks that manage the filter lifecycle (init/check/deinit)
+ **************************************************************************/
+/* Initialize the filter. Returns -1 on error, else 0. */
+static int
+trace_init(struct proxy *px, struct flt_conf *fconf)
+{
+ struct trace_config *conf = fconf->conf;
+
+ if (conf->name)
+ memprintf(&conf->name, "%s/%s", conf->name, px->id);
+ else
+ memprintf(&conf->name, "TRACE/%s", px->id);
+
+ fconf->flags |= FLT_CFG_FL_HTX;
+ fconf->conf = conf;
+
+ FLT_TRACE(conf, "filter initialized [quiet=%s - fwd random=%s - hexdump=%s]",
+ ((conf->flags & TRACE_F_QUIET) ? "true" : "false"),
+ ((conf->flags & TRACE_F_RAND_FWD) ? "true" : "false"),
+ ((conf->flags & TRACE_F_HEXDUMP) ? "true" : "false"));
+ return 0;
+}
+
+/* Free resources allocated by the trace filter. */
+static void
+trace_deinit(struct proxy *px, struct flt_conf *fconf)
+{
+ struct trace_config *conf = fconf->conf;
+
+ if (conf) {
+ FLT_TRACE(conf, "filter deinitialized");
+ free(conf->name);
+ free(conf);
+ }
+ fconf->conf = NULL;
+}
+
+/* Check configuration of a trace filter for a specified proxy.
+ * Return 1 on error, else 0. */
+static int
+trace_check(struct proxy *px, struct flt_conf *fconf)
+{
+ return 0;
+}
+
+/* Initialize the filter for each thread. Return -1 on error, else 0. */
+static int
+trace_init_per_thread(struct proxy *px, struct flt_conf *fconf)
+{
+ struct trace_config *conf = fconf->conf;
+
+ FLT_TRACE(conf, "filter initialized for thread tid %u", tid);
+ return 0;
+}
+
+/* Free resources allocate by the trace filter for each thread. */
+static void
+trace_deinit_per_thread(struct proxy *px, struct flt_conf *fconf)
+{
+ struct trace_config *conf = fconf->conf;
+
+ if (conf)
+ FLT_TRACE(conf, "filter deinitialized for thread tid %u", tid);
+}
+
+/**************************************************************************
+ * Hooks to handle start/stop of streams
+ *************************************************************************/
+/* Called when a filter instance is created and attach to a stream */
+static int
+trace_attach(struct stream *s, struct filter *filter)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: filter-type=%s",
+ __FUNCTION__, filter_type(filter));
+
+ return 1;
+}
+
+/* Called when a filter instance is detach from a stream, just before its
+ * destruction */
+static void
+trace_detach(struct stream *s, struct filter *filter)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: filter-type=%s",
+ __FUNCTION__, filter_type(filter));
+}
+
+/* Called when a stream is created */
+static int
+trace_stream_start(struct stream *s, struct filter *filter)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s",
+ __FUNCTION__);
+ return 0;
+}
+
+
+/* Called when a backend is set for a stream */
+static int
+trace_stream_set_backend(struct stream *s, struct filter *filter,
+ struct proxy *be)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: backend=%s",
+ __FUNCTION__, be->id);
+ return 0;
+}
+
+/* Called when a stream is destroyed */
+static void
+trace_stream_stop(struct stream *s, struct filter *filter)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s",
+ __FUNCTION__);
+}
+
+/* Called when the stream is woken up because of an expired timer */
+static void
+trace_check_timeouts(struct stream *s, struct filter *filter)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s",
+ __FUNCTION__);
+}
+
+/**************************************************************************
+ * Hooks to handle channels activity
+ *************************************************************************/
+/* Called when analyze starts for a given channel */
+static int
+trace_chn_start_analyze(struct stream *s, struct filter *filter,
+ struct channel *chn)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s)",
+ __FUNCTION__,
+ channel_label(chn), proxy_mode(s), stream_pos(s));
+ filter->pre_analyzers |= (AN_REQ_ALL | AN_RES_ALL);
+ filter->post_analyzers |= (AN_REQ_ALL | AN_RES_ALL);
+ register_data_filter(s, chn, filter);
+ return 1;
+}
+
+/* Called before a processing happens on a given channel */
+static int
+trace_chn_analyze(struct stream *s, struct filter *filter,
+ struct channel *chn, unsigned an_bit)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+ char *ana;
+
+ switch (an_bit) {
+ case AN_REQ_INSPECT_FE:
+ ana = "AN_REQ_INSPECT_FE";
+ break;
+ case AN_REQ_WAIT_HTTP:
+ ana = "AN_REQ_WAIT_HTTP";
+ break;
+ case AN_REQ_HTTP_BODY:
+ ana = "AN_REQ_HTTP_BODY";
+ break;
+ case AN_REQ_HTTP_PROCESS_FE:
+ ana = "AN_REQ_HTTP_PROCESS_FE";
+ break;
+ case AN_REQ_SWITCHING_RULES:
+ ana = "AN_REQ_SWITCHING_RULES";
+ break;
+ case AN_REQ_INSPECT_BE:
+ ana = "AN_REQ_INSPECT_BE";
+ break;
+ case AN_REQ_HTTP_PROCESS_BE:
+ ana = "AN_REQ_HTTP_PROCESS_BE";
+ break;
+ case AN_REQ_SRV_RULES:
+ ana = "AN_REQ_SRV_RULES";
+ break;
+ case AN_REQ_HTTP_INNER:
+ ana = "AN_REQ_HTTP_INNER";
+ break;
+ case AN_REQ_HTTP_TARPIT:
+ ana = "AN_REQ_HTTP_TARPIT";
+ break;
+ case AN_REQ_STICKING_RULES:
+ ana = "AN_REQ_STICKING_RULES";
+ break;
+ case AN_REQ_PRST_RDP_COOKIE:
+ ana = "AN_REQ_PRST_RDP_COOKIE";
+ break;
+ case AN_REQ_HTTP_XFER_BODY:
+ ana = "AN_REQ_HTTP_XFER_BODY";
+ break;
+ case AN_RES_INSPECT:
+ ana = "AN_RES_INSPECT";
+ break;
+ case AN_RES_WAIT_HTTP:
+ ana = "AN_RES_WAIT_HTTP";
+ break;
+ case AN_RES_HTTP_PROCESS_FE: // AN_RES_HTTP_PROCESS_BE
+ ana = "AN_RES_HTTP_PROCESS_FE/BE";
+ break;
+ case AN_RES_STORE_RULES:
+ ana = "AN_RES_STORE_RULES";
+ break;
+ case AN_RES_HTTP_XFER_BODY:
+ ana = "AN_RES_HTTP_XFER_BODY";
+ break;
+ default:
+ ana = "unknown";
+ }
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s) - "
+ "analyzer=%s - step=%s",
+ __FUNCTION__,
+ channel_label(chn), proxy_mode(s), stream_pos(s),
+ ana, ((chn->analysers & an_bit) ? "PRE" : "POST"));
+ return 1;
+}
+
+/* Called when analyze ends for a given channel */
+static int
+trace_chn_end_analyze(struct stream *s, struct filter *filter,
+ struct channel *chn)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s)",
+ __FUNCTION__,
+ channel_label(chn), proxy_mode(s), stream_pos(s));
+ return 1;
+}
+
+/**************************************************************************
+ * Hooks to filter HTTP messages
+ *************************************************************************/
+static int
+trace_http_headers(struct stream *s, struct filter *filter,
+ struct http_msg *msg)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct htx_sl *sl = http_get_stline(htx);
+ int32_t pos;
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s)\t%.*s %.*s %.*s",
+ __FUNCTION__,
+ channel_label(msg->chn), proxy_mode(s), stream_pos(s),
+ HTX_SL_P1_LEN(sl), HTX_SL_P1_PTR(sl),
+ HTX_SL_P2_LEN(sl), HTX_SL_P2_PTR(sl),
+ HTX_SL_P3_LEN(sl), HTX_SL_P3_PTR(sl));
+
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ struct ist n, v;
+
+ if (type == HTX_BLK_EOH)
+ break;
+ if (type != HTX_BLK_HDR)
+ continue;
+
+ n = htx_get_blk_name(htx, blk);
+ v = htx_get_blk_value(htx, blk);
+ FLT_STRM_TRACE(conf, s, "\t%.*s: %.*s",
+ (int)n.len, n.ptr, (int)v.len, v.ptr);
+ }
+ return 1;
+}
+
+static int
+trace_http_payload(struct stream *s, struct filter *filter, struct http_msg *msg,
+ unsigned int offset, unsigned int len)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+ int ret = len;
+
+ if (ret && (conf->flags & TRACE_F_RAND_FWD)) {
+ unsigned int data = trace_get_htx_datalen(htxbuf(&msg->chn->buf), offset, len);
+
+ if (data) {
+ ret = ha_random() % (ret+1);
+ if (!ret || ret >= data)
+ ret = len;
+ }
+ }
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s) - "
+ "offset=%u - len=%u - forward=%d",
+ __FUNCTION__,
+ channel_label(msg->chn), proxy_mode(s), stream_pos(s),
+ offset, len, ret);
+
+ if (conf->flags & TRACE_F_HEXDUMP)
+ trace_htx_hexdump(htxbuf(&msg->chn->buf), offset, ret);
+
+ if (ret != len)
+ task_wakeup(s->task, TASK_WOKEN_MSG);
+ return ret;
+}
+
+static int
+trace_http_end(struct stream *s, struct filter *filter,
+ struct http_msg *msg)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s)",
+ __FUNCTION__,
+ channel_label(msg->chn), proxy_mode(s), stream_pos(s));
+ return 1;
+}
+
+static void
+trace_http_reset(struct stream *s, struct filter *filter,
+ struct http_msg *msg)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s)",
+ __FUNCTION__,
+ channel_label(msg->chn), proxy_mode(s), stream_pos(s));
+}
+
+static void
+trace_http_reply(struct stream *s, struct filter *filter, short status,
+ const struct buffer *msg)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s)",
+ __FUNCTION__, "-", proxy_mode(s), stream_pos(s));
+}
+
+/**************************************************************************
+ * Hooks to filter TCP data
+ *************************************************************************/
+static int
+trace_tcp_payload(struct stream *s, struct filter *filter, struct channel *chn,
+ unsigned int offset, unsigned int len)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+ int ret = len;
+
+ if (s->flags & SF_HTX) {
+ if (ret && (conf->flags & TRACE_F_RAND_FWD)) {
+ unsigned int data = trace_get_htx_datalen(htxbuf(&chn->buf), offset, len);
+
+ if (data) {
+ ret = ha_random() % (ret+1);
+ if (!ret || ret >= data)
+ ret = len;
+ }
+ }
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s) - "
+ "offset=%u - len=%u - forward=%d",
+ __FUNCTION__,
+ channel_label(chn), proxy_mode(s), stream_pos(s),
+ offset, len, ret);
+
+ if (conf->flags & TRACE_F_HEXDUMP)
+ trace_htx_hexdump(htxbuf(&chn->buf), offset, ret);
+ }
+ else {
+
+ if (ret && (conf->flags & TRACE_F_RAND_FWD))
+ ret = ha_random() % (ret+1);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s) - "
+ "offset=%u - len=%u - forward=%d",
+ __FUNCTION__,
+ channel_label(chn), proxy_mode(s), stream_pos(s),
+ offset, len, ret);
+
+ if (conf->flags & TRACE_F_HEXDUMP)
+ trace_raw_hexdump(&chn->buf, offset, ret);
+ }
+
+ if (ret != len)
+ task_wakeup(s->task, TASK_WOKEN_MSG);
+ return ret;
+}
+/********************************************************************
+ * Functions that manage the filter initialization
+ ********************************************************************/
+struct flt_ops trace_ops = {
+ /* Manage trace filter, called for each filter declaration */
+ .init = trace_init,
+ .deinit = trace_deinit,
+ .check = trace_check,
+ .init_per_thread = trace_init_per_thread,
+ .deinit_per_thread = trace_deinit_per_thread,
+
+ /* Handle start/stop of streams */
+ .attach = trace_attach,
+ .detach = trace_detach,
+ .stream_start = trace_stream_start,
+ .stream_set_backend = trace_stream_set_backend,
+ .stream_stop = trace_stream_stop,
+ .check_timeouts = trace_check_timeouts,
+
+ /* Handle channels activity */
+ .channel_start_analyze = trace_chn_start_analyze,
+ .channel_pre_analyze = trace_chn_analyze,
+ .channel_post_analyze = trace_chn_analyze,
+ .channel_end_analyze = trace_chn_end_analyze,
+
+ /* Filter HTTP requests and responses */
+ .http_headers = trace_http_headers,
+ .http_payload = trace_http_payload,
+ .http_end = trace_http_end,
+ .http_reset = trace_http_reset,
+ .http_reply = trace_http_reply,
+
+ /* Filter TCP data */
+ .tcp_payload = trace_tcp_payload,
+};
+
+/* Return -1 on error, else 0 */
+static int
+parse_trace_flt(char **args, int *cur_arg, struct proxy *px,
+ struct flt_conf *fconf, char **err, void *private)
+{
+ struct trace_config *conf;
+ int pos = *cur_arg;
+
+ conf = calloc(1, sizeof(*conf));
+ if (!conf) {
+ memprintf(err, "%s: out of memory", args[*cur_arg]);
+ return -1;
+ }
+ conf->proxy = px;
+ conf->flags = 0;
+ if (strcmp(args[pos], "trace") == 0) {
+ pos++;
+
+ while (*args[pos]) {
+ if (strcmp(args[pos], "name") == 0) {
+ if (!*args[pos + 1]) {
+ memprintf(err, "'%s' : '%s' option without value",
+ args[*cur_arg], args[pos]);
+ goto error;
+ }
+ conf->name = strdup(args[pos + 1]);
+ if (!conf->name) {
+ memprintf(err, "%s: out of memory", args[*cur_arg]);
+ goto error;
+ }
+ pos++;
+ }
+ else if (strcmp(args[pos], "quiet") == 0)
+ conf->flags |= TRACE_F_QUIET;
+ else if (strcmp(args[pos], "random-parsing") == 0)
+ ; // ignore
+ else if (strcmp(args[pos], "random-forwarding") == 0)
+ conf->flags |= TRACE_F_RAND_FWD;
+ else if (strcmp(args[pos], "hexdump") == 0)
+ conf->flags |= TRACE_F_HEXDUMP;
+ else
+ break;
+ pos++;
+ }
+ *cur_arg = pos;
+ fconf->id = trace_flt_id;
+ fconf->ops = &trace_ops;
+ }
+
+ fconf->conf = conf;
+ return 0;
+
+ error:
+ if (conf->name)
+ free(conf->name);
+ free(conf);
+ return -1;
+}
+
+/* Declare the filter parser for "trace" keyword */
+static struct flt_kw_list flt_kws = { "TRACE", { }, {
+ { "trace", parse_trace_flt, NULL },
+ { NULL, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, flt_register_keywords, &flt_kws);
diff --git a/src/freq_ctr.c b/src/freq_ctr.c
new file mode 100644
index 0000000..54aa78f
--- /dev/null
+++ b/src/freq_ctr.c
@@ -0,0 +1,104 @@
+/*
+ * Event rate calculation functions.
+ *
+ * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/tools.h>
+
+/* Returns the total number of events over the current + last period, including
+ * a number of already pending events <pend>. The average frequency will be
+ * obtained by dividing the output by <period>. This is essentially made to
+ * ease implementation of higher-level read functions.
+ *
+ * As a special case, if pend < 0, it's assumed there are no pending
+ * events and a flapping correction must be applied at the end. This is used by
+ * read_freq_ctr_period() to avoid reporting ups and downs on low-frequency
+ * events when the past value is <= 1.
+ */
+ullong freq_ctr_total(const struct freq_ctr *ctr, uint period, int pend)
+{
+ ullong curr, past, old_curr, old_past;
+ uint tick, old_tick;
+ int remain;
+
+ tick = HA_ATOMIC_LOAD(&ctr->curr_tick);
+ curr = HA_ATOMIC_LOAD(&ctr->curr_ctr);
+ past = HA_ATOMIC_LOAD(&ctr->prev_ctr);
+
+ while (1) {
+ if (tick & 0x1) // change in progress
+ goto redo0;
+
+ old_tick = tick;
+ old_curr = curr;
+ old_past = past;
+
+ /* now let's load the values a second time and make sure they
+ * did not change, which will indicate it was a stable reading.
+ */
+
+ tick = HA_ATOMIC_LOAD(&ctr->curr_tick);
+ if (tick & 0x1) // change in progress
+ goto redo0;
+
+ if (tick != old_tick)
+ goto redo1;
+
+ curr = HA_ATOMIC_LOAD(&ctr->curr_ctr);
+ if (curr != old_curr)
+ goto redo2;
+
+ past = HA_ATOMIC_LOAD(&ctr->prev_ctr);
+ if (past != old_past)
+ goto redo3;
+
+ /* all values match between two loads, they're stable, let's
+ * quit now.
+ */
+ break;
+ redo0:
+ tick = HA_ATOMIC_LOAD(&ctr->curr_tick);
+ redo1:
+ curr = HA_ATOMIC_LOAD(&ctr->curr_ctr);
+ redo2:
+ past = HA_ATOMIC_LOAD(&ctr->prev_ctr);
+ redo3:
+ __ha_cpu_relax();
+ };
+
+ remain = tick + period - HA_ATOMIC_LOAD(&global_now_ms);
+ if (unlikely(remain < 0)) {
+ /* We're past the first period, check if we can still report a
+ * part of last period or if we're too far away.
+ */
+ remain += period;
+ past = (remain >= 0) ? curr : 0;
+ curr = 0;
+ }
+
+ if (pend < 0) {
+ /* enable flapping correction at very low rates */
+ pend = 0;
+ if (!curr && past <= 1)
+ return past * period;
+ }
+
+ /* compute the total number of confirmed events over the period */
+ return past * remain + (curr + pend) * period;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/frontend.c b/src/frontend.c
new file mode 100644
index 0000000..7b71357
--- /dev/null
+++ b/src/frontend.c
@@ -0,0 +1,312 @@
+/*
+ * Frontend variables and functions.
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <netinet/tcp.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/chunk.h>
+#include <haproxy/connection.h>
+#include <haproxy/fd.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/log.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+#include <haproxy/tools.h>
+
+
+/* Finish a stream accept() for a proxy (TCP or HTTP). It returns a negative
+ * value in case of a critical failure which must cause the listener to be
+ * disabled, a positive or null value in case of success.
+ */
+int frontend_accept(struct stream *s)
+{
+ const struct sockaddr_storage *src, *dst;
+ struct session *sess = s->sess;
+ struct connection *conn = objt_conn(sess->origin);
+ struct listener *l = sess->listener;
+ struct proxy *fe = sess->fe;
+
+ if ((fe->mode == PR_MODE_TCP || fe->mode == PR_MODE_HTTP)
+ && (!LIST_ISEMPTY(&fe->logsrvs))) {
+ if (likely(!LIST_ISEMPTY(&fe->logformat))) {
+ /* we have the client ip */
+ if (s->logs.logwait & LW_CLIP)
+ if (!(s->logs.logwait &= ~(LW_CLIP|LW_INIT)))
+ s->do_log(s);
+ }
+ else if (conn) {
+ src = sc_src(s->scf);
+ if (!src)
+ send_log(fe, LOG_INFO, "Connect from unknown source to listener %d (%s/%s)\n",
+ l->luid, fe->id, (fe->mode == PR_MODE_HTTP) ? "HTTP" : "TCP");
+ else {
+ char pn[INET6_ADDRSTRLEN], sn[INET6_ADDRSTRLEN];
+ int port;
+
+ switch (addr_to_str(src, pn, sizeof(pn))) {
+ case AF_INET:
+ case AF_INET6:
+ dst = sc_dst(s->scf);
+ if (dst) {
+ addr_to_str(dst, sn, sizeof(sn));
+ port = get_host_port(dst);
+ } else {
+ strcpy(sn, "undetermined address");
+ port = 0;
+ }
+ send_log(fe, LOG_INFO, "Connect from %s:%d to %s:%d (%s/%s)\n",
+ pn, get_host_port(src),
+ sn, port,
+ fe->id, (fe->mode == PR_MODE_HTTP) ? "HTTP" : "TCP");
+ break;
+ case AF_UNIX:
+ /* UNIX socket, only the destination is known */
+ send_log(fe, LOG_INFO, "Connect to unix:%d (%s/%s)\n",
+ l->luid,
+ fe->id, (fe->mode == PR_MODE_HTTP) ? "HTTP" : "TCP");
+ break;
+ }
+ }
+ }
+ }
+
+ if (unlikely((global.mode & MODE_DEBUG) && conn &&
+ (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)))) {
+ char pn[INET6_ADDRSTRLEN];
+ char alpn[16] = "<none>";
+ const char *alpn_str = NULL;
+ int alpn_len;
+
+ /* try to report the ALPN value when available (also works for NPN) */
+ if (conn == sc_conn(s->scf)) {
+ if (conn_get_alpn(conn, &alpn_str, &alpn_len) && alpn_str) {
+ int len = MIN(alpn_len, sizeof(alpn) - 1);
+ memcpy(alpn, alpn_str, len);
+ alpn[len] = 0;
+ }
+ }
+
+ src = sc_src(s->scf);
+ if (!src) {
+ chunk_printf(&trash, "%08x:%s.accept(%04x)=%04x from [listener:%d] ALPN=%s\n",
+ s->uniq_id, fe->id, (unsigned short)l->rx.fd, (unsigned short)conn->handle.fd,
+ l->luid, alpn);
+ }
+ else switch (addr_to_str(src, pn, sizeof(pn))) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_printf(&trash, "%08x:%s.accept(%04x)=%04x from [%s:%d] ALPN=%s\n",
+ s->uniq_id, fe->id, (unsigned short)l->rx.fd, (unsigned short)conn->handle.fd,
+ pn, get_host_port(src), alpn);
+ break;
+ case AF_UNIX:
+ /* UNIX socket, only the destination is known */
+ chunk_printf(&trash, "%08x:%s.accept(%04x)=%04x from [unix:%d] ALPN=%s\n",
+ s->uniq_id, fe->id, (unsigned short)l->rx.fd, (unsigned short)conn->handle.fd,
+ l->luid, alpn);
+ break;
+ }
+
+ DISGUISE(write(1, trash.area, trash.data));
+ }
+
+ if (fe->mode == PR_MODE_HTTP)
+ s->req.flags |= CF_READ_DONTWAIT; /* one read is usually enough */
+
+ if (unlikely(fe->nb_req_cap > 0)) {
+ if ((s->req_cap = pool_zalloc(fe->req_cap_pool)) == NULL)
+ goto out_return; /* no memory */
+ }
+
+ if (unlikely(fe->nb_rsp_cap > 0)) {
+ if ((s->res_cap = pool_zalloc(fe->rsp_cap_pool)) == NULL)
+ goto out_free_reqcap; /* no memory */
+ }
+
+ if ((fe->http_needed || IS_HTX_STRM(s)) && !http_create_txn(s))
+ goto out_free_rspcap;
+
+ /* everything's OK, let's go on */
+ return 1;
+
+ /* Error unrolling */
+ out_free_rspcap:
+ pool_free(fe->rsp_cap_pool, s->res_cap);
+ out_free_reqcap:
+ pool_free(fe->req_cap_pool, s->req_cap);
+ out_return:
+ return -1;
+}
+
+/************************************************************************/
+/* All supported sample and ACL keywords must be declared here. */
+/************************************************************************/
+
+/* set temp integer to the id of the frontend */
+static int
+smp_fetch_fe_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->sess->fe->uuid;
+ return 1;
+}
+
+/* set string to the name of the frontend */
+static int
+smp_fetch_fe_name(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.u.str.area = (char *)smp->sess->fe->id;
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+ return 1;
+}
+
+/* set string to the name of the default backend */
+static int
+smp_fetch_fe_defbe(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->sess->fe->defbe.be)
+ return 0;
+ smp->data.u.str.area = (char *)smp->sess->fe->defbe.be->id;
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+ return 1;
+}
+
+/* set temp integer to the number of HTTP requests per second reaching the frontend.
+ * Accepts exactly 1 argument. Argument is a frontend, other types will cause
+ * an undefined behaviour.
+ */
+static int
+smp_fetch_fe_req_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = read_freq_ctr(&px->fe_req_per_sec);
+ return 1;
+}
+
+/* set temp integer to the number of connections per second reaching the frontend.
+ * Accepts exactly 1 argument. Argument is a frontend, other types will cause
+ * an undefined behaviour.
+ */
+static int
+smp_fetch_fe_sess_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = read_freq_ctr(&px->fe_sess_per_sec);
+ return 1;
+}
+
+/* set temp integer to the number of concurrent connections on the frontend
+ * Accepts exactly 1 argument. Argument is a frontend, other types will cause
+ * an undefined behaviour.
+ */
+static int
+smp_fetch_fe_conn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = px->feconn;
+ return 1;
+}
+
+static int
+smp_fetch_fe_client_timeout(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = TICKS_TO_MS(smp->sess->fe->timeout.client);
+ return 1;
+}
+
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { "fe_client_timeout", smp_fetch_fe_client_timeout, 0, NULL, SMP_T_SINT, SMP_USE_FTEND, },
+ { "fe_conn", smp_fetch_fe_conn, ARG1(1,FE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "fe_defbe", smp_fetch_fe_defbe, 0, NULL, SMP_T_STR, SMP_USE_FTEND, },
+ { "fe_id", smp_fetch_fe_id, 0, NULL, SMP_T_SINT, SMP_USE_FTEND, },
+ { "fe_name", smp_fetch_fe_name, 0, NULL, SMP_T_STR, SMP_USE_FTEND, },
+ { "fe_req_rate", smp_fetch_fe_req_rate, ARG1(1,FE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "fe_sess_rate", smp_fetch_fe_sess_rate, ARG1(1,FE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct acl_kw_list acl_kws = {ILH, {
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/h1.c b/src/h1.c
new file mode 100644
index 0000000..88a54c4
--- /dev/null
+++ b/src/h1.c
@@ -0,0 +1,1278 @@
+/*
+ * HTTP/1 protocol analyzer
+ *
+ * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+
+#include <import/sha1.h>
+
+#include <haproxy/api.h>
+#include <haproxy/base64.h>
+#include <haproxy/h1.h>
+#include <haproxy/http-hdr.h>
+#include <haproxy/tools.h>
+
+/* Parse the Content-Length header field of an HTTP/1 request. The function
+ * checks all possible occurrences of a comma-delimited value, and verifies
+ * if any of them doesn't match a previous value. It returns <0 if a value
+ * differs, 0 if the whole header can be dropped (i.e. already known), or >0
+ * if the value can be indexed (first one). In the last case, the value might
+ * be adjusted and the caller must only add the updated value.
+ */
+int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
+{
+ char *e, *n;
+ long long cl;
+ int not_first = !!(h1m->flags & H1_MF_CLEN);
+ struct ist word;
+
+ word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
+ e = value->ptr + value->len;
+
+ while (++word.ptr < e) {
+ /* skip leading delimiter and blanks */
+ if (unlikely(HTTP_IS_LWS(*word.ptr)))
+ continue;
+
+ /* digits only now */
+ for (cl = 0, n = word.ptr; n < e; n++) {
+ unsigned int c = *n - '0';
+ if (unlikely(c > 9)) {
+ /* non-digit */
+ if (unlikely(n == word.ptr)) // spaces only
+ goto fail;
+ break;
+ }
+ if (unlikely(cl > ULLONG_MAX / 10ULL))
+ goto fail; /* multiply overflow */
+ cl = cl * 10ULL;
+ if (unlikely(cl + c < cl))
+ goto fail; /* addition overflow */
+ cl = cl + c;
+ }
+
+ /* keep a copy of the exact cleaned value */
+ word.len = n - word.ptr;
+
+ /* skip trailing LWS till next comma or EOL */
+ for (; n < e; n++) {
+ if (!HTTP_IS_LWS(*n)) {
+ if (unlikely(*n != ','))
+ goto fail;
+ break;
+ }
+ }
+
+ /* if duplicate, must be equal */
+ if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
+ goto fail;
+
+ /* OK, store this result as the one to be indexed */
+ h1m->flags |= H1_MF_CLEN;
+ h1m->curr_len = h1m->body_len = cl;
+ *value = word;
+ word.ptr = n;
+ }
+ /* here we've reached the end with a single value or a series of
+ * identical values, all matching previous series if any. The last
+ * parsed value was sent back into <value>. We just have to decide
+ * if this occurrence has to be indexed (it's the first one) or
+ * silently skipped (it's not the first one)
+ */
+ return !not_first;
+ fail:
+ return -1;
+}
+
+/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
+ * "chunked" encoding to perform some checks (it must be the last encoding for
+ * the request and must not be performed twice for any message). The
+ * H1_MF_TE_CHUNKED is set if a valid "chunked" encoding is found. The
+ * H1_MF_TE_OTHER flag is set if any other encoding is found. The H1_MF_XFER_ENC
+ * flag is always set. The H1_MF_CHNK is set when "chunked" encoding is the last
+ * one. Note that transfer codings are case-insensitive (cf RFC7230#4). This
+ * function returns <0 if a error is found, 0 if the whole header can be dropped
+ * (not used yet), or >0 if the value can be indexed.
+ */
+int h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
+{
+ char *e, *n;
+ struct ist word;
+
+ h1m->flags |= H1_MF_XFER_ENC;
+
+ word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
+ e = istend(value);
+
+ while (++word.ptr < e) {
+ /* skip leading delimiter and blanks */
+ if (HTTP_IS_LWS(*word.ptr))
+ continue;
+
+ n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
+ word.len = n - word.ptr;
+
+ /* trim trailing blanks */
+ while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
+ word.len--;
+
+ h1m->flags &= ~H1_MF_CHNK;
+ if (isteqi(word, ist("chunked"))) {
+ if (h1m->flags & H1_MF_TE_CHUNKED) {
+ /* cf RFC7230#3.3.1 : A sender MUST NOT apply
+ * chunked more than once to a message body
+ * (i.e., chunking an already chunked message is
+ * not allowed)
+ */
+ goto fail;
+ }
+ h1m->flags |= (H1_MF_TE_CHUNKED|H1_MF_CHNK);
+ }
+ else {
+ if ((h1m->flags & (H1_MF_RESP|H1_MF_TE_CHUNKED)) == H1_MF_TE_CHUNKED) {
+ /* cf RFC7230#3.3.1 : If any transfer coding
+ * other than chunked is applied to a request
+ * payload body, the sender MUST apply chunked
+ * as the final transfer coding to ensure that
+ * the message is properly framed.
+ */
+ goto fail;
+ }
+ h1m->flags |= H1_MF_TE_OTHER;
+ }
+
+ word.ptr = n;
+ }
+
+ return 1;
+ fail:
+ return -1;
+}
+
+/* Validate the authority and the host header value for CONNECT method. If there
+ * is hast header, its value is normalized. 0 is returned on success, -1 if the
+ * authority is invalid and -2 if the host is invalid.
+ */
+static int h1_validate_connect_authority(struct ist authority, struct ist *host_hdr)
+{
+ struct ist uri_host, uri_port, host, host_port;
+
+ if (!isttest(authority))
+ goto invalid_authority;
+ uri_host = authority;
+ uri_port = http_get_host_port(authority);
+ if (!istlen(uri_port))
+ goto invalid_authority;
+ uri_host.len -= (istlen(uri_port) + 1);
+
+ if (!host_hdr || !isttest(*host_hdr))
+ goto end;
+
+ /* Get the port of the host header value, if any */
+ host = *host_hdr;
+ host_port = http_get_host_port(*host_hdr);
+ if (isttest(host_port))
+ host.len -= (istlen(host_port) + 1);
+
+ if (istlen(host_port)) {
+ if (!isteqi(host, uri_host) || !isteq(host_port, uri_port))
+ goto invalid_host;
+ if (http_is_default_port(IST_NULL, uri_port))
+ *host_hdr = host; /* normalize */
+ }
+ else {
+ if (!http_is_default_port(IST_NULL, uri_port) || !isteqi(host, uri_host))
+ goto invalid_host;
+ }
+
+ end:
+ return 0;
+
+ invalid_authority:
+ return -1;
+
+ invalid_host:
+ return -2;
+}
+
+
+/* Validate the authority and the host header value for non-CONNECT method, when
+ * an absolute-URI is detected but when it does not exactly match the host
+ * value. The idea is to detect default port (http or https). authority and host
+ * are defined here. 0 is returned on success, -1 if the host is does not match
+ * the authority.
+ */
+static int h1_validate_mismatch_authority(struct ist scheme, struct ist authority, struct ist host_hdr)
+{
+ struct ist uri_host, uri_port, host, host_port;
+
+ if (!isttest(scheme))
+ goto mismatch;
+
+ uri_host = authority;
+ uri_port = http_get_host_port(authority);
+ if (isttest(uri_port))
+ uri_host.len -= (istlen(uri_port) + 1);
+
+ host = host_hdr;
+ host_port = http_get_host_port(host_hdr);
+ if (isttest(host_port))
+ host.len -= (istlen(host_port) + 1);
+
+ if (!isttest(uri_port) && !isttest(host_port)) {
+ /* No port on both: we already know the authority does not match
+ * the host value
+ */
+ goto mismatch;
+ }
+ else if (isttest(uri_port) && !http_is_default_port(scheme, uri_port)) {
+ /* here there is no port for the host value and the port for the
+ * authority is not the default one
+ */
+ goto mismatch;
+ }
+ else if (isttest(host_port) && !http_is_default_port(scheme, host_port)) {
+ /* here there is no port for the authority and the port for the
+ * host value is not the default one
+ */
+ goto mismatch;
+ }
+ else {
+ /* the authority or the host value contain a default port and
+ * there is no port on the other value
+ */
+ if (!isteqi(uri_host, host))
+ goto mismatch;
+ }
+
+ return 0;
+
+ mismatch:
+ return -1;
+}
+
+
+/* Parse the Connection: header of an HTTP/1 request, looking for "close",
+ * "keep-alive", and "upgrade" values, and updating h1m->flags according to
+ * what was found there. Note that flags are only added, not removed, so the
+ * function is safe for being called multiple times if multiple occurrences
+ * are found. If the flag H1_MF_CLEAN_CONN_HDR, the header value is cleaned
+ * up from "keep-alive" and "close" values. To do so, the header value is
+ * rewritten in place and its length is updated.
+ */
+void h1_parse_connection_header(struct h1m *h1m, struct ist *value)
+{
+ char *e, *n, *p;
+ struct ist word;
+
+ word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
+ p = value->ptr;
+ e = value->ptr + value->len;
+ if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
+ value->len = 0;
+
+ while (++word.ptr < e) {
+ /* skip leading delimiter and blanks */
+ if (HTTP_IS_LWS(*word.ptr))
+ continue;
+
+ n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
+ word.len = n - word.ptr;
+
+ /* trim trailing blanks */
+ while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
+ word.len--;
+
+ if (isteqi(word, ist("keep-alive"))) {
+ h1m->flags |= H1_MF_CONN_KAL;
+ if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
+ goto skip_val;
+ }
+ else if (isteqi(word, ist("close"))) {
+ h1m->flags |= H1_MF_CONN_CLO;
+ if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
+ goto skip_val;
+ }
+ else if (isteqi(word, ist("upgrade")))
+ h1m->flags |= H1_MF_CONN_UPG;
+
+ if (h1m->flags & H1_MF_CLEAN_CONN_HDR) {
+ if (value->ptr + value->len == p) {
+ /* no rewrite done till now */
+ value->len = n - value->ptr;
+ }
+ else {
+ if (value->len)
+ value->ptr[value->len++] = ',';
+ istcat(value, word, e - value->ptr);
+ }
+ }
+
+ skip_val:
+ word.ptr = p = n;
+ }
+}
+
+/* Parse the Upgrade: header of an HTTP/1 request.
+ * If "websocket" is found, set H1_MF_UPG_WEBSOCKET flag
+ */
+void h1_parse_upgrade_header(struct h1m *h1m, struct ist value)
+{
+ char *e, *n;
+ struct ist word;
+
+ h1m->flags &= ~H1_MF_UPG_WEBSOCKET;
+
+ word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
+ e = istend(value);
+
+ while (++word.ptr < e) {
+ /* skip leading delimiter and blanks */
+ if (HTTP_IS_LWS(*word.ptr))
+ continue;
+
+ n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
+ word.len = n - word.ptr;
+
+ /* trim trailing blanks */
+ while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
+ word.len--;
+
+ if (isteqi(word, ist("websocket")))
+ h1m->flags |= H1_MF_UPG_WEBSOCKET;
+
+ word.ptr = n;
+ }
+}
+
+/* Macros used in the HTTP/1 parser, to check for the expected presence of
+ * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
+ */
+
+/* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
+ * <bad>.
+ */
+#define EXPECT_LF_HERE(ptr, bad, state, where) \
+ do { \
+ if (unlikely(*(ptr) != '\n')) { \
+ state = (where); \
+ goto bad; \
+ } \
+ } while (0)
+
+/* Increments pointer <ptr>, continues to label <more> if it's still below
+ * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
+ * of buffer was reached.
+ */
+#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \
+ do { \
+ if (likely(++(ptr) < (end))) \
+ goto more; \
+ else { \
+ state = (where); \
+ goto stop; \
+ } \
+ } while (0)
+
+/* This function parses a contiguous HTTP/1 headers block starting at <start>
+ * and ending before <stop>, at once, and converts it a list of (name,value)
+ * pairs representing header fields into the array <hdr> of size <hdr_num>,
+ * whose last entry will have an empty name and an empty value. If <hdr_num> is
+ * too small to represent the whole message, an error is returned. Some
+ * protocol elements such as content-length and transfer-encoding will be
+ * parsed and stored into h1m as well. <hdr> may be null, in which case only
+ * the parsing state will be updated. This may be used to restart the parsing
+ * where it stopped for example.
+ *
+ * For now it's limited to the response. If the header block is incomplete,
+ * 0 is returned, waiting to be called again with more data to try it again.
+ * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
+ * and h1m->next to zero on the first call, the parser will do the rest. If
+ * an incomplete message is seen, the caller only needs to present h1m->state
+ * and h1m->next again, with an empty header list so that the parser can start
+ * again. In this case, it will detect that it interrupted a previous session
+ * and will first look for the end of the message before reparsing it again and
+ * indexing it at the same time. This ensures that incomplete messages fed 1
+ * character at a time are never processed entirely more than exactly twice,
+ * and that there is no need to store all the internal state and pre-parsed
+ * headers or start line between calls.
+ *
+ * A pointer to a start line descriptor may be passed in <slp>, in which case
+ * the parser will fill it with whatever it found.
+ *
+ * The code derived from the main HTTP/1 parser above but was simplified and
+ * optimized to process responses produced or forwarded by haproxy. The caller
+ * is responsible for ensuring that the message doesn't wrap, and should ensure
+ * it is complete to avoid having to retry the operation after a failed
+ * attempt. The message is not supposed to be invalid, which is why a few
+ * properties such as the character set used in the header field names are not
+ * checked. In case of an unparsable response message, a negative value will be
+ * returned with h1m->err_pos and h1m->err_state matching the location and
+ * state where the error was met. Leading blank likes are tolerated but not
+ * recommended. If flag H1_MF_HDRS_ONLY is set in h1m->flags, only headers are
+ * parsed and the start line is skipped. It is not required to set h1m->state
+ * nor h1m->next in this case.
+ *
+ * This function returns :
+ * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
+ * set) with the state the error occurred in and h1m->err_pos with the
+ * the position relative to <start>
+ * -2 if the output is full (hdr_num reached). err_state and err_pos also
+ * indicate where it failed.
+ * 0 in case of missing data.
+ * > 0 on success, it then corresponds to the number of bytes read since
+ * <start> so that the caller can go on with the payload.
+ */
+int h1_headers_to_hdr_list(char *start, const char *stop,
+ struct http_hdr *hdr, unsigned int hdr_num,
+ struct h1m *h1m, union h1_sl *slp)
+{
+ enum h1m_state state;
+ register char *ptr;
+ register const char *end;
+ unsigned int hdr_count;
+ unsigned int skip; /* number of bytes skipped at the beginning */
+ unsigned int sol; /* start of line */
+ unsigned int col; /* position of the colon */
+ unsigned int eol; /* end of line */
+ unsigned int sov; /* start of value */
+ union h1_sl sl;
+ int skip_update;
+ int restarting;
+ int host_idx;
+ struct ist n, v; /* header name and value during parsing */
+
+ skip = 0; // do it only once to keep track of the leading CRLF.
+
+ try_again:
+ hdr_count = sol = col = eol = sov = 0;
+ sl.st.status = 0;
+ skip_update = restarting = 0;
+ host_idx = -1;
+
+ if (h1m->flags & H1_MF_HDRS_ONLY) {
+ state = H1_MSG_HDR_FIRST;
+ h1m->next = 0;
+ }
+ else {
+ state = h1m->state;
+ if (h1m->state != H1_MSG_RQBEFORE && h1m->state != H1_MSG_RPBEFORE)
+ restarting = 1;
+ }
+
+ ptr = start + h1m->next;
+ end = stop;
+
+ if (unlikely(ptr >= end))
+ goto http_msg_ood;
+
+ /* don't update output if hdr is NULL or if we're restarting */
+ if (!hdr || restarting)
+ skip_update = 1;
+
+ switch (state) {
+ case H1_MSG_RQBEFORE:
+ http_msg_rqbefore:
+ if (likely(HTTP_IS_TOKEN(*ptr))) {
+ /* we have a start of message, we may have skipped some
+ * heading CRLF. Skip them now.
+ */
+ skip += ptr - start;
+ start = ptr;
+
+ sol = 0;
+ sl.rq.m.ptr = ptr;
+ hdr_count = 0;
+ state = H1_MSG_RQMETH;
+ goto http_msg_rqmeth;
+ }
+
+ if (unlikely(!HTTP_IS_CRLF(*ptr))) {
+ state = H1_MSG_RQBEFORE;
+ goto http_msg_invalid;
+ }
+
+ if (unlikely(*ptr == '\n'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
+ /* stop here */
+
+ case H1_MSG_RQBEFORE_CR:
+ http_msg_rqbefore_cr:
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
+ /* stop here */
+
+ case H1_MSG_RQMETH:
+ http_msg_rqmeth:
+ if (likely(HTTP_IS_TOKEN(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
+
+ if (likely(HTTP_IS_SPHT(*ptr))) {
+ sl.rq.m.len = ptr - sl.rq.m.ptr;
+ sl.rq.meth = find_http_meth(start, sl.rq.m.len);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
+ }
+
+ if (likely(HTTP_IS_CRLF(*ptr))) {
+ /* HTTP 0.9 request */
+ sl.rq.m.len = ptr - sl.rq.m.ptr;
+ sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
+ http_msg_req09_uri:
+ sl.rq.u.ptr = ptr;
+ http_msg_req09_uri_e:
+ sl.rq.u.len = ptr - sl.rq.u.ptr;
+ http_msg_req09_ver:
+ sl.rq.v = ist2(ptr, 0);
+ goto http_msg_rqline_eol;
+ }
+ state = H1_MSG_RQMETH;
+ goto http_msg_invalid;
+
+ case H1_MSG_RQMETH_SP:
+ http_msg_rqmeth_sp:
+ if (likely(!HTTP_IS_LWS(*ptr))) {
+ sl.rq.u.ptr = ptr;
+ goto http_msg_rquri;
+ }
+ if (likely(HTTP_IS_SPHT(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
+ /* so it's a CR/LF, meaning an HTTP 0.9 request */
+ goto http_msg_req09_uri;
+
+ case H1_MSG_RQURI:
+ http_msg_rquri:
+#ifdef HA_UNALIGNED_LE
+ /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
+ while (ptr <= end - sizeof(int)) {
+ int x = *(int *)ptr - 0x21212121;
+ if (x & 0x80808080)
+ break;
+
+ x -= 0x5e5e5e5e;
+ if (!(x & 0x80808080))
+ break;
+
+ ptr += sizeof(int);
+ }
+#endif
+ if (ptr >= end) {
+ state = H1_MSG_RQURI;
+ goto http_msg_ood;
+ }
+ http_msg_rquri2:
+ if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
+
+ if (likely(HTTP_IS_SPHT(*ptr))) {
+ sl.rq.u.len = ptr - sl.rq.u.ptr;
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
+ }
+ if (likely((unsigned char)*ptr >= 128)) {
+ /* non-ASCII chars are forbidden unless option
+ * accept-invalid-http-request is enabled in the frontend.
+ * In any case, we capture the faulty char.
+ */
+ if (h1m->err_pos < -1)
+ goto invalid_char;
+ if (h1m->err_pos == -1)
+ h1m->err_pos = ptr - start + skip;
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
+ }
+
+ if (likely(HTTP_IS_CRLF(*ptr))) {
+ /* so it's a CR/LF, meaning an HTTP 0.9 request */
+ goto http_msg_req09_uri_e;
+ }
+
+ /* OK forbidden chars, 0..31 or 127 */
+ invalid_char:
+ state = H1_MSG_RQURI;
+ goto http_msg_invalid;
+
+ case H1_MSG_RQURI_SP:
+ http_msg_rquri_sp:
+ if (likely(!HTTP_IS_LWS(*ptr))) {
+ sl.rq.v.ptr = ptr;
+ goto http_msg_rqver;
+ }
+ if (likely(HTTP_IS_SPHT(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
+ /* so it's a CR/LF, meaning an HTTP 0.9 request */
+ goto http_msg_req09_ver;
+
+
+ case H1_MSG_RQVER:
+ http_msg_rqver:
+ if (likely(HTTP_IS_VER_TOKEN(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
+
+ if (likely(HTTP_IS_CRLF(*ptr))) {
+ sl.rq.v.len = ptr - sl.rq.v.ptr;
+ http_msg_rqline_eol:
+ /* We have seen the end of line. Note that we do not
+ * necessarily have the \n yet, but at least we know that we
+ * have EITHER \r OR \n, otherwise the request would not be
+ * complete. We can then record the request length and return
+ * to the caller which will be able to register it.
+ */
+
+ if (likely(!skip_update)) {
+ if ((sl.rq.v.len == 8) &&
+ (*(sl.rq.v.ptr + 5) > '1' ||
+ (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
+ h1m->flags |= H1_MF_VER_11;
+
+ if (unlikely(hdr_count >= hdr_num)) {
+ state = H1_MSG_RQVER;
+ goto http_output_full;
+ }
+ if (!(h1m->flags & H1_MF_NO_PHDR))
+ http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
+
+ if (unlikely(hdr_count >= hdr_num)) {
+ state = H1_MSG_RQVER;
+ goto http_output_full;
+ }
+ if (!(h1m->flags & H1_MF_NO_PHDR))
+ http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
+ }
+
+ sol = ptr - start;
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
+ goto http_msg_rqline_end;
+ }
+
+ /* neither an HTTP_VER token nor a CRLF */
+ state = H1_MSG_RQVER;
+ goto http_msg_invalid;
+
+ case H1_MSG_RQLINE_END:
+ http_msg_rqline_end:
+ /* check for HTTP/0.9 request : no version information
+ * available. sol must point to the first of CR or LF. However
+ * since we don't save these elements between calls, if we come
+ * here from a restart, we don't necessarily know. Thus in this
+ * case we simply start over.
+ */
+ if (restarting)
+ goto restart;
+
+ if (unlikely(sl.rq.v.len == 0))
+ goto http_msg_last_lf;
+
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
+ /* stop here */
+
+ /*
+ * Common states below
+ */
+ case H1_MSG_RPBEFORE:
+ http_msg_rpbefore:
+ if (likely(HTTP_IS_TOKEN(*ptr))) {
+ /* we have a start of message, we may have skipped some
+ * heading CRLF. Skip them now.
+ */
+ skip += ptr - start;
+ start = ptr;
+
+ sol = 0;
+ sl.st.v.ptr = ptr;
+ hdr_count = 0;
+ state = H1_MSG_RPVER;
+ goto http_msg_rpver;
+ }
+
+ if (unlikely(!HTTP_IS_CRLF(*ptr))) {
+ state = H1_MSG_RPBEFORE;
+ goto http_msg_invalid;
+ }
+
+ if (unlikely(*ptr == '\n'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
+ /* stop here */
+
+ case H1_MSG_RPBEFORE_CR:
+ http_msg_rpbefore_cr:
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
+ /* stop here */
+
+ case H1_MSG_RPVER:
+ http_msg_rpver:
+ if (likely(HTTP_IS_VER_TOKEN(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
+
+ if (likely(HTTP_IS_SPHT(*ptr))) {
+ sl.st.v.len = ptr - sl.st.v.ptr;
+
+ if ((sl.st.v.len == 8) &&
+ (*(sl.st.v.ptr + 5) > '1' ||
+ (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
+ h1m->flags |= H1_MF_VER_11;
+
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
+ }
+ state = H1_MSG_RPVER;
+ goto http_msg_invalid;
+
+ case H1_MSG_RPVER_SP:
+ http_msg_rpver_sp:
+ if (likely(!HTTP_IS_LWS(*ptr))) {
+ sl.st.status = 0;
+ sl.st.c.ptr = ptr;
+ goto http_msg_rpcode;
+ }
+ if (likely(HTTP_IS_SPHT(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
+ /* so it's a CR/LF, this is invalid */
+ state = H1_MSG_RPVER_SP;
+ goto http_msg_invalid;
+
+ case H1_MSG_RPCODE:
+ http_msg_rpcode:
+ if (likely(HTTP_IS_DIGIT(*ptr))) {
+ sl.st.status = sl.st.status * 10 + *ptr - '0';
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
+ }
+
+ if (unlikely(!HTTP_IS_LWS(*ptr))) {
+ state = H1_MSG_RPCODE;
+ goto http_msg_invalid;
+ }
+
+ if (likely(HTTP_IS_SPHT(*ptr))) {
+ sl.st.c.len = ptr - sl.st.c.ptr;
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
+ }
+
+ /* so it's a CR/LF, so there is no reason phrase */
+ sl.st.c.len = ptr - sl.st.c.ptr;
+
+ http_msg_rsp_reason:
+ sl.st.r = ist2(ptr, 0);
+ goto http_msg_rpline_eol;
+
+ case H1_MSG_RPCODE_SP:
+ http_msg_rpcode_sp:
+ if (likely(!HTTP_IS_LWS(*ptr))) {
+ sl.st.r.ptr = ptr;
+ goto http_msg_rpreason;
+ }
+ if (likely(HTTP_IS_SPHT(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
+ /* so it's a CR/LF, so there is no reason phrase */
+ goto http_msg_rsp_reason;
+
+ case H1_MSG_RPREASON:
+ http_msg_rpreason:
+ if (likely(!HTTP_IS_CRLF(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
+ sl.st.r.len = ptr - sl.st.r.ptr;
+ http_msg_rpline_eol:
+ /* We have seen the end of line. Note that we do not
+ * necessarily have the \n yet, but at least we know that we
+ * have EITHER \r OR \n, otherwise the response would not be
+ * complete. We can then record the response length and return
+ * to the caller which will be able to register it.
+ */
+
+ if (likely(!skip_update)) {
+ if (unlikely(hdr_count >= hdr_num)) {
+ state = H1_MSG_RPREASON;
+ goto http_output_full;
+ }
+ if (!(h1m->flags & H1_MF_NO_PHDR))
+ http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
+ }
+
+ sol = ptr - start;
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
+ goto http_msg_rpline_end;
+
+ case H1_MSG_RPLINE_END:
+ http_msg_rpline_end:
+ /* sol must point to the first of CR or LF. */
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
+ /* stop here */
+
+ case H1_MSG_HDR_FIRST:
+ http_msg_hdr_first:
+ sol = ptr - start;
+ if (likely(!HTTP_IS_CRLF(*ptr))) {
+ goto http_msg_hdr_name;
+ }
+
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
+ goto http_msg_last_lf;
+
+ case H1_MSG_HDR_NAME:
+ http_msg_hdr_name:
+ /* assumes sol points to the first char */
+ if (likely(HTTP_IS_TOKEN(*ptr))) {
+ if (!skip_update) {
+ /* turn it to lower case if needed */
+ if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
+ *ptr = tolower((unsigned char)*ptr);
+ }
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
+ }
+
+ if (likely(*ptr == ':')) {
+ col = ptr - start;
+ if (col <= sol) {
+ state = H1_MSG_HDR_NAME;
+ goto http_msg_invalid;
+ }
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
+ }
+
+ if (likely(h1m->err_pos < -1) || *ptr == '\n') {
+ state = H1_MSG_HDR_NAME;
+ goto http_msg_invalid;
+ }
+
+ if (h1m->err_pos == -1) /* capture the error pointer */
+ h1m->err_pos = ptr - start + skip; /* >= 0 now */
+
+ /* and we still accept this non-token character */
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
+
+ case H1_MSG_HDR_L1_SP:
+ http_msg_hdr_l1_sp:
+ /* assumes sol points to the first char */
+ if (likely(HTTP_IS_SPHT(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
+
+ /* header value can be basically anything except CR/LF */
+ sov = ptr - start;
+
+ if (likely(!HTTP_IS_CRLF(*ptr))) {
+ goto http_msg_hdr_val;
+ }
+
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
+ goto http_msg_hdr_l1_lf;
+
+ case H1_MSG_HDR_L1_LF:
+ http_msg_hdr_l1_lf:
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
+
+ case H1_MSG_HDR_L1_LWS:
+ http_msg_hdr_l1_lws:
+ if (likely(HTTP_IS_SPHT(*ptr))) {
+ if (!skip_update) {
+ /* replace HT,CR,LF with spaces */
+ for (; start + sov < ptr; sov++)
+ start[sov] = ' ';
+ }
+ goto http_msg_hdr_l1_sp;
+ }
+ /* we had a header consisting only in spaces ! */
+ eol = sov;
+ goto http_msg_complete_header;
+
+ case H1_MSG_HDR_VAL:
+ http_msg_hdr_val:
+ /* assumes sol points to the first char, and sov
+ * points to the first character of the value.
+ */
+
+ /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
+ * and lower. In fact since most of the time is spent in the loop, we
+ * also remove the sign bit test so that bytes 0x8e..0x0d break the
+ * loop, but we don't care since they're very rare in header values.
+ */
+#ifdef HA_UNALIGNED_LE64
+ while (ptr <= end - sizeof(long)) {
+ if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
+ goto http_msg_hdr_val2;
+ ptr += sizeof(long);
+ }
+#endif
+#ifdef HA_UNALIGNED_LE
+ while (ptr <= end - sizeof(int)) {
+ if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
+ goto http_msg_hdr_val2;
+ ptr += sizeof(int);
+ }
+#endif
+ if (ptr >= end) {
+ state = H1_MSG_HDR_VAL;
+ goto http_msg_ood;
+ }
+ http_msg_hdr_val2:
+ if (likely(!HTTP_IS_CRLF(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
+
+ eol = ptr - start;
+ /* Note: we could also copy eol into ->eoh so that we have the
+ * real header end in case it ends with lots of LWS, but is this
+ * really needed ?
+ */
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
+ goto http_msg_hdr_l2_lf;
+
+ case H1_MSG_HDR_L2_LF:
+ http_msg_hdr_l2_lf:
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
+
+ case H1_MSG_HDR_L2_LWS:
+ http_msg_hdr_l2_lws:
+ if (unlikely(HTTP_IS_SPHT(*ptr))) {
+ if (!skip_update) {
+ /* LWS: replace HT,CR,LF with spaces */
+ for (; start + eol < ptr; eol++)
+ start[eol] = ' ';
+ }
+ goto http_msg_hdr_val;
+ }
+ http_msg_complete_header:
+ /*
+ * It was a new header, so the last one is finished. Assumes
+ * <sol> points to the first char of the name, <col> to the
+ * colon, <sov> points to the first character of the value and
+ * <eol> to the first CR or LF so we know how the line ends. We
+ * will trim spaces around the value. It's possible to do it by
+ * adjusting <eol> and <sov> which are no more used after this.
+ * We can add the header field to the list.
+ */
+ if (likely(!skip_update)) {
+ while (sov < eol && HTTP_IS_LWS(start[sov]))
+ sov++;
+
+ while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
+ eol--;
+
+
+ n = ist2(start + sol, col - sol);
+ v = ist2(start + sov, eol - sov);
+
+ do {
+ int ret;
+
+ if (unlikely(hdr_count >= hdr_num)) {
+ state = H1_MSG_HDR_L2_LWS;
+ goto http_output_full;
+ }
+
+ if (isteqi(n, ist("transfer-encoding"))) {
+ ret = h1_parse_xfer_enc_header(h1m, v);
+ if (ret < 0) {
+ state = H1_MSG_HDR_L2_LWS;
+ ptr = v.ptr; /* Set ptr on the error */
+ goto http_msg_invalid;
+ }
+ else if (ret == 0) {
+ /* skip it */
+ break;
+ }
+ }
+ else if (isteqi(n, ist("content-length"))) {
+ ret = h1_parse_cont_len_header(h1m, &v);
+
+ if (ret < 0) {
+ state = H1_MSG_HDR_L2_LWS;
+ ptr = v.ptr; /* Set ptr on the error */
+ goto http_msg_invalid;
+ }
+ else if (ret == 0) {
+ /* skip it */
+ break;
+ }
+ }
+ else if (isteqi(n, ist("connection"))) {
+ h1_parse_connection_header(h1m, &v);
+ if (!v.len) {
+ /* skip it */
+ break;
+ }
+ }
+ else if (isteqi(n, ist("upgrade"))) {
+ h1_parse_upgrade_header(h1m, v);
+ }
+ else if (!(h1m->flags & H1_MF_RESP) && isteqi(n, ist("host"))) {
+ if (host_idx == -1)
+ host_idx = hdr_count;
+ else {
+ if (!isteqi(v, hdr[host_idx].v)) {
+ state = H1_MSG_HDR_L2_LWS;
+ ptr = v.ptr; /* Set ptr on the error */
+ goto http_msg_invalid;
+ }
+ /* if the same host, skip it */
+ break;
+ }
+ }
+
+ http_set_hdr(&hdr[hdr_count++], n, v);
+ } while (0);
+ }
+
+ sol = ptr - start;
+
+ if (likely(!HTTP_IS_CRLF(*ptr)))
+ goto http_msg_hdr_name;
+
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
+ goto http_msg_last_lf;
+
+ case H1_MSG_LAST_LF:
+ http_msg_last_lf:
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
+ ptr++;
+ /* <ptr> now points to the first byte of payload. If needed sol
+ * still points to the first of either CR or LF of the empty
+ * line ending the headers block.
+ */
+ if (likely(!skip_update)) {
+ if (unlikely(hdr_count >= hdr_num)) {
+ state = H1_MSG_LAST_LF;
+ goto http_output_full;
+ }
+ http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
+ }
+
+ /* reaching here we've parsed the whole message. We may detect
+ * that we were already continuing an interrupted parsing pass
+ * so we were silently looking for the end of message not
+ * updating anything before deciding to parse it fully at once.
+ * It's guaranteed that we won't match this test twice in a row
+ * since restarting will turn zero.
+ */
+ if (restarting)
+ goto restart;
+
+
+ if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP))) {
+ struct http_uri_parser parser = http_uri_parser_init(sl.rq.u);
+ struct ist scheme, authority;
+ int ret;
+
+ scheme = http_parse_scheme(&parser);
+ authority = http_parse_authority(&parser, 1);
+ if (sl.rq.meth == HTTP_METH_CONNECT) {
+ struct ist *host = ((host_idx != -1) ? &hdr[host_idx].v : NULL);
+
+ ret = h1_validate_connect_authority(authority, host);
+ if (ret < 0) {
+ if (h1m->err_pos < -1) {
+ state = H1_MSG_LAST_LF;
+ /* WT: gcc seems to see a path where sl.rq.u.ptr was used
+ * uninitialized, but it doesn't know that the function is
+ * called with initial states making this impossible.
+ */
+ ALREADY_CHECKED(sl.rq.u.ptr);
+ ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */
+ goto http_msg_invalid;
+ }
+ if (h1m->err_pos == -1) /* capture the error pointer */
+ h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */
+ }
+ }
+ else if (host_idx != -1 && istlen(authority)) {
+ struct ist host = hdr[host_idx].v;
+
+ /* For non-CONNECT method, the authority must match the host header value */
+ if (!isteqi(authority, host)) {
+ ret = h1_validate_mismatch_authority(scheme, authority, host);
+ if (ret < 0) {
+ if (h1m->err_pos < -1) {
+ state = H1_MSG_LAST_LF;
+ ptr = host.ptr; /* Set ptr on the error */
+ goto http_msg_invalid;
+ }
+ if (h1m->err_pos == -1) /* capture the error pointer */
+ h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
+ }
+ }
+ }
+ }
+
+ state = H1_MSG_DATA;
+ if (h1m->flags & H1_MF_XFER_ENC) {
+ if (h1m->flags & H1_MF_CLEN) {
+ /* T-E + C-L: force close and remove C-L */
+ h1m->flags |= H1_MF_CONN_CLO;
+ h1m->flags &= ~H1_MF_CLEN;
+ hdr_count = http_del_hdr(hdr, ist("content-length"));
+ }
+ else if (!(h1m->flags & H1_MF_VER_11)) {
+ /* T-E + HTTP/1.0: force close */
+ h1m->flags |= H1_MF_CONN_CLO;
+ }
+
+ if (h1m->flags & H1_MF_CHNK)
+ state = H1_MSG_CHUNK_SIZE;
+ else if (!(h1m->flags & H1_MF_RESP)) {
+ /* cf RFC7230#3.3.3 : transfer-encoding in
+ * request without chunked encoding is invalid.
+ */
+ goto http_msg_invalid;
+ }
+ }
+
+ break;
+
+ default:
+ /* impossible states */
+ goto http_msg_invalid;
+ }
+
+ /* Now we've left the headers state and are either in H1_MSG_DATA or
+ * H1_MSG_CHUNK_SIZE.
+ */
+
+ if (slp && !skip_update)
+ *slp = sl;
+
+ h1m->state = state;
+ h1m->next = ptr - start + skip;
+ return h1m->next;
+
+ http_msg_ood:
+ /* out of data at <ptr> during state <state> */
+ if (slp && !skip_update)
+ *slp = sl;
+
+ h1m->state = state;
+ h1m->next = ptr - start + skip;
+ return 0;
+
+ http_msg_invalid:
+ /* invalid message, error at <ptr> */
+ if (slp && !skip_update)
+ *slp = sl;
+
+ h1m->err_state = h1m->state = state;
+ h1m->err_pos = h1m->next = ptr - start + skip;
+ return -1;
+
+ http_output_full:
+ /* no more room to store the current header, error at <ptr> */
+ if (slp && !skip_update)
+ *slp = sl;
+
+ h1m->err_state = h1m->state = state;
+ h1m->err_pos = h1m->next = ptr - start + skip;
+ return -2;
+
+ restart:
+ h1m->flags &= H1_MF_RESTART_MASK;
+ h1m->curr_len = h1m->body_len = h1m->next = 0;
+ if (h1m->flags & H1_MF_RESP)
+ h1m->state = H1_MSG_RPBEFORE;
+ else
+ h1m->state = H1_MSG_RQBEFORE;
+ goto try_again;
+}
+
+/* This function performs a very minimal parsing of the trailers block present
+ * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
+ * bytes to delete to skip the trailers. It may return 0 if it's missing some
+ * input data, or < 0 in case of parse error (in which case the caller may have
+ * to decide how to proceed, possibly eating everything).
+ */
+int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
+{
+ const char *stop = b_peek(buf, ofs + max);
+ int count = ofs;
+
+ while (1) {
+ const char *p1 = NULL, *p2 = NULL;
+ const char *start = b_peek(buf, count);
+ const char *ptr = start;
+
+ /* scan current line and stop at LF or CRLF */
+ while (1) {
+ if (ptr == stop)
+ return 0;
+
+ if (*ptr == '\n') {
+ if (!p1)
+ p1 = ptr;
+ p2 = ptr;
+ break;
+ }
+
+ if (*ptr == '\r') {
+ if (p1)
+ return -1;
+ p1 = ptr;
+ }
+
+ ptr = b_next(buf, ptr);
+ }
+
+ /* after LF; point to beginning of next line */
+ p2 = b_next(buf, p2);
+ count += b_dist(buf, start, p2);
+
+ /* LF/CRLF at beginning of line => end of trailers at p2.
+ * Everything was scheduled for forwarding, there's nothing left
+ * from this message. */
+ if (p1 == start)
+ break;
+ /* OK, next line then */
+ }
+ return count - ofs;
+}
+
+/* Generate a random key for a WebSocket Handshake in respect with rfc6455
+ * The key is 128-bits long encoded as a base64 string in <key_out> parameter
+ * (25 bytes long).
+ */
+void h1_generate_random_ws_input_key(char key_out[25])
+{
+ /* generate a random websocket key */
+ const uint64_t rand1 = ha_random64(), rand2 = ha_random64();
+ char key[16];
+
+ memcpy(key, &rand1, 8);
+ memcpy(&key[8], &rand2, 8);
+ a2base64(key, 16, key_out, 25);
+}
+
+#define H1_WS_KEY_SUFFIX_GUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
+
+/*
+ * Calculate the WebSocket handshake response key from <key_in>. Following the
+ * rfc6455, <key_in> must be 24 bytes longs. The result is stored in <key_out>
+ * as a 29 bytes long string.
+ */
+void h1_calculate_ws_output_key(const char *key, char *result)
+{
+ blk_SHA_CTX sha1_ctx;
+ char hash_in[60], hash_out[20];
+
+ /* concatenate the key with a fixed suffix */
+ memcpy(hash_in, key, 24);
+ memcpy(&hash_in[24], H1_WS_KEY_SUFFIX_GUID, 36);
+
+ /* sha1 the result */
+ blk_SHA1_Init(&sha1_ctx);
+ blk_SHA1_Update(&sha1_ctx, hash_in, 60);
+ blk_SHA1_Final((unsigned char *)hash_out, &sha1_ctx);
+
+ /* encode in base64 the hash */
+ a2base64(hash_out, 20, result, 29);
+}
diff --git a/src/h1_htx.c b/src/h1_htx.c
new file mode 100644
index 0000000..cb41448
--- /dev/null
+++ b/src/h1_htx.c
@@ -0,0 +1,1072 @@
+/*
+ * Functions to manipulate H1 messages using the internal representation.
+ *
+ * Copyright (C) 2019 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/global.h>
+#include <haproxy/h1.h>
+#include <haproxy/h1_htx.h>
+#include <haproxy/http.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/tools.h>
+
+/* Estimate the size of the HTX headers after the parsing, including the EOH. */
+static size_t h1_eval_htx_hdrs_size(const struct http_hdr *hdrs)
+{
+ size_t sz = 0;
+ int i;
+
+ for (i = 0; hdrs[i].n.len; i++)
+ sz += sizeof(struct htx_blk) + hdrs[i].n.len + hdrs[i].v.len;
+ sz += sizeof(struct htx_blk) + 1;
+ return sz;
+}
+
+/* Estimate the size of the HTX request after the parsing. */
+static size_t h1_eval_htx_size(const struct ist p1, const struct ist p2, const struct ist p3,
+ const struct http_hdr *hdrs)
+{
+ size_t sz;
+
+ /* size of the HTX start-line */
+ sz = sizeof(struct htx_blk) + sizeof(struct htx_sl) + p1.len + p2.len + p3.len;
+ sz += h1_eval_htx_hdrs_size(hdrs);
+ return sz;
+}
+
+/* Check the validity of the request version. If the version is valid, it
+ * returns 1. Otherwise, it returns 0.
+ */
+static int h1_process_req_vsn(struct h1m *h1m, union h1_sl *sl)
+{
+ /* RFC7230#2.6 has enforced the format of the HTTP version string to be
+ * exactly one digit "." one digit. This check may be disabled using
+ * option accept-invalid-http-request.
+ */
+ if (h1m->err_pos == -2) { /* PR_O2_REQBUG_OK not set */
+ if (sl->rq.v.len != 8)
+ return 0;
+
+ if (!istnmatch(sl->rq.v, ist("HTTP/"), 5) ||
+ !isdigit((unsigned char)*(sl->rq.v.ptr + 5)) ||
+ *(sl->rq.v.ptr + 6) != '.' ||
+ !isdigit((unsigned char)*(sl->rq.v.ptr + 7)))
+ return 0;
+ }
+ else if (!sl->rq.v.len) {
+ /* try to convert HTTP/0.9 requests to HTTP/1.0 */
+
+ /* RFC 1945 allows only GET for HTTP/0.9 requests */
+ if (sl->rq.meth != HTTP_METH_GET)
+ return 0;
+
+ /* HTTP/0.9 requests *must* have a request URI, per RFC 1945 */
+ if (!sl->rq.u.len)
+ return 0;
+
+ /* Add HTTP version */
+ sl->rq.v = ist("HTTP/1.0");
+ return 1;
+ }
+
+ if ((sl->rq.v.len == 8) &&
+ ((*(sl->rq.v.ptr + 5) > '1') ||
+ ((*(sl->rq.v.ptr + 5) == '1') && (*(sl->rq.v.ptr + 7) >= '1'))))
+ h1m->flags |= H1_MF_VER_11;
+ return 1;
+}
+
+/* Check the validity of the response version. If the version is valid, it
+ * returns 1. Otherwise, it returns 0.
+ */
+static int h1_process_res_vsn(struct h1m *h1m, union h1_sl *sl)
+{
+ /* RFC7230#2.6 has enforced the format of the HTTP version string to be
+ * exactly one digit "." one digit. This check may be disabled using
+ * option accept-invalid-http-request.
+ */
+ if (h1m->err_pos == -2) { /* PR_O2_REQBUG_OK not set */
+ if (sl->st.v.len != 8)
+ return 0;
+
+ if (*(sl->st.v.ptr + 4) != '/' ||
+ !isdigit((unsigned char)*(sl->st.v.ptr + 5)) ||
+ *(sl->st.v.ptr + 6) != '.' ||
+ !isdigit((unsigned char)*(sl->st.v.ptr + 7)))
+ return 0;
+ }
+
+ if ((sl->st.v.len == 8) &&
+ ((*(sl->st.v.ptr + 5) > '1') ||
+ ((*(sl->st.v.ptr + 5) == '1') && (*(sl->st.v.ptr + 7) >= '1'))))
+ h1m->flags |= H1_MF_VER_11;
+
+ return 1;
+}
+
+/* Convert H1M flags to HTX start-line flags. */
+static unsigned int h1m_htx_sl_flags(struct h1m *h1m)
+{
+ unsigned int flags = HTX_SL_F_NONE;
+
+ if (h1m->flags & H1_MF_RESP)
+ flags |= HTX_SL_F_IS_RESP;
+ if (h1m->flags & H1_MF_VER_11)
+ flags |= HTX_SL_F_VER_11;
+ if (h1m->flags & H1_MF_XFER_ENC)
+ flags |= HTX_SL_F_XFER_ENC;
+ if (h1m->flags & H1_MF_XFER_LEN) {
+ flags |= HTX_SL_F_XFER_LEN;
+ if (h1m->flags & H1_MF_CHNK)
+ flags |= HTX_SL_F_CHNK;
+ else if (h1m->flags & H1_MF_CLEN) {
+ flags |= HTX_SL_F_CLEN;
+ if (h1m->body_len == 0)
+ flags |= HTX_SL_F_BODYLESS;
+ }
+ else
+ flags |= HTX_SL_F_BODYLESS;
+ }
+ if (h1m->flags & H1_MF_CONN_UPG)
+ flags |= HTX_SL_F_CONN_UPG;
+ return flags;
+}
+
+/* Postprocess the parsed headers for a request and convert them into an htx
+ * message. It returns the number of bytes parsed if > 0, or 0 if it couldn't
+ * proceed. Parsing errors are reported by setting the htx flag
+ * HTX_FL_PARSING_ERROR and filling h1m->err_pos and h1m->err_state fields.
+ */
+static int h1_postparse_req_hdrs(struct h1m *h1m, union h1_sl *h1sl, struct htx *htx,
+ struct http_hdr *hdrs, size_t max)
+{
+ struct htx_sl *sl;
+ struct ist meth, uri, vsn;
+ unsigned int flags;
+
+ /* <h1sl> is always defined for a request */
+ meth = h1sl->rq.m;
+ uri = h1sl->rq.u;
+ vsn = h1sl->rq.v;
+
+ /* Be sure the message, once converted into HTX, will not exceed the max
+ * size allowed.
+ */
+ if (h1_eval_htx_size(meth, uri, vsn, hdrs) > max) {
+ if (htx_is_empty(htx))
+ goto error;
+ goto output_full;
+ }
+
+ /* By default, request have always a known length */
+ h1m->flags |= H1_MF_XFER_LEN;
+
+ if (h1sl->rq.meth == HTTP_METH_CONNECT) {
+ h1m->flags &= ~(H1_MF_CLEN|H1_MF_CHNK);
+ h1m->curr_len = h1m->body_len = 0;
+ }
+
+
+ flags = h1m_htx_sl_flags(h1m);
+ if ((flags & (HTX_SL_F_CONN_UPG|HTX_SL_F_BODYLESS)) == HTX_SL_F_CONN_UPG) {
+ int i;
+
+ for (i = 0; hdrs[i].n.len; i++) {
+ if (isteqi(hdrs[i].n, ist("upgrade")))
+ hdrs[i].v = IST_NULL;
+ }
+ h1m->flags &=~ H1_MF_CONN_UPG;
+ flags &= ~HTX_SL_F_CONN_UPG;
+ }
+ sl = htx_add_stline(htx, HTX_BLK_REQ_SL, flags, meth, uri, vsn);
+ if (!sl || !htx_add_all_headers(htx, hdrs))
+ goto error;
+ sl->info.req.meth = h1sl->rq.meth;
+
+ /* Check if the uri contains an authority. Also check if it contains an
+ * explicit scheme and if it is "http" or "https". */
+ if (h1sl->rq.meth == HTTP_METH_CONNECT)
+ sl->flags |= HTX_SL_F_HAS_AUTHORITY;
+ else if (uri.len && uri.ptr[0] != '/' && uri.ptr[0] != '*') {
+ sl->flags |= (HTX_SL_F_HAS_AUTHORITY|HTX_SL_F_HAS_SCHM);
+ if (uri.len > 4 && (uri.ptr[0] | 0x20) == 'h')
+ sl->flags |= ((uri.ptr[4] == ':') ? HTX_SL_F_SCHM_HTTP : HTX_SL_F_SCHM_HTTPS);
+
+ /* absolute-form target URI present, proceed to scheme-based
+ * normalization */
+ http_scheme_based_normalize(htx);
+ }
+
+ /* If body length cannot be determined, set htx->extra to
+ * ULLONG_MAX. This value is impossible in other cases.
+ */
+ htx->extra = ((h1m->flags & H1_MF_XFER_LEN) ? h1m->curr_len : ULLONG_MAX);
+
+ end:
+ return 1;
+ output_full:
+ h1m_init_req(h1m);
+ h1m->flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
+ return -2;
+ error:
+ h1m->err_pos = h1m->next;
+ h1m->err_state = h1m->state;
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ return -1;
+}
+
+/* Postprocess the parsed headers for a response and convert them into an htx
+ * message. It returns the number of bytes parsed if > 0, or 0 if it couldn't
+ * proceed. Parsing errors are reported by setting the htx flag
+ * HTX_FL_PARSING_ERROR and filling h1m->err_pos and h1m->err_state fields.
+ */
+static int h1_postparse_res_hdrs(struct h1m *h1m, union h1_sl *h1sl, struct htx *htx,
+ struct http_hdr *hdrs, size_t max)
+{
+ struct htx_sl *sl;
+ struct ist vsn, status, reason;
+ unsigned int flags;
+ uint16_t code = 0;
+
+ if (h1sl) {
+ /* For HTTP responses, the start-line was parsed */
+ code = h1sl->st.status;
+ vsn = h1sl->st.v;
+ status = h1sl->st.c;
+ reason = h1sl->st.r;
+ }
+ else {
+ /* For FCGI responses, there is no start(-line but the "Status"
+ * header must be parsed, if found.
+ */
+ int hdr;
+
+ vsn = ((h1m->flags & H1_MF_VER_11) ? ist("HTTP/1.1") : ist("HTTP/1.0"));
+ for (hdr = 0; hdrs[hdr].n.len; hdr++) {
+ if (isteqi(hdrs[hdr].n, ist("status"))) {
+ code = http_parse_status_val(hdrs[hdr].v, &status, &reason);
+ }
+ else if (isteqi(hdrs[hdr].n, ist("location"))) {
+ code = 302;
+ status = ist("302");
+ reason = ist("Moved Temporarily");
+ }
+ }
+ if (!code) {
+ code = 200;
+ status = ist("200");
+ reason = ist("OK");
+ }
+ /* FIXME: Check the codes 1xx ? */
+ }
+
+ /* Be sure the message, once converted into HTX, will not exceed the max
+ * size allowed.
+ */
+ if (h1_eval_htx_size(vsn, status, reason, hdrs) > max) {
+ if (htx_is_empty(htx))
+ goto error;
+ goto output_full;
+ }
+
+ if ((h1m->flags & (H1_MF_CONN_UPG|H1_MF_UPG_WEBSOCKET)) && code != 101)
+ h1m->flags &= ~(H1_MF_CONN_UPG|H1_MF_UPG_WEBSOCKET);
+
+ if (((h1m->flags & H1_MF_METH_CONNECT) && code >= 200 && code < 300) || code == 101) {
+ h1m->flags &= ~(H1_MF_CLEN|H1_MF_CHNK);
+ h1m->flags |= H1_MF_XFER_LEN;
+ h1m->curr_len = h1m->body_len = 0;
+ }
+ else if ((h1m->flags & H1_MF_METH_HEAD) || (code >= 100 && code < 200) ||
+ (code == 204) || (code == 304)) {
+ /* Responses known to have no body. */
+ h1m->flags &= ~(H1_MF_CLEN|H1_MF_CHNK);
+ h1m->flags |= H1_MF_XFER_LEN;
+ h1m->curr_len = h1m->body_len = 0;
+ }
+ else if (h1m->flags & (H1_MF_CLEN|H1_MF_CHNK)) {
+ /* Responses with a known body length. */
+ h1m->flags |= H1_MF_XFER_LEN;
+ }
+
+ flags = h1m_htx_sl_flags(h1m);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, vsn, status, reason);
+ if (!sl || !htx_add_all_headers(htx, hdrs))
+ goto error;
+ sl->info.res.status = code;
+
+ /* If body length cannot be determined, set htx->extra to
+ * ULLONG_MAX. This value is impossible in other cases.
+ */
+ htx->extra = ((h1m->flags & H1_MF_XFER_LEN) ? h1m->curr_len : ULLONG_MAX);
+
+ end:
+ return 1;
+ output_full:
+ h1m_init_res(h1m);
+ h1m->flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
+ return -2;
+ error:
+ h1m->err_pos = h1m->next;
+ h1m->err_state = h1m->state;
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ return -1;
+}
+
+/* Parse HTTP/1 headers. It returns the number of bytes parsed on success, 0 if
+ * headers are incomplete, -1 if an error occurred or -2 if it needs more space
+ * to proceed while the output buffer is not empty. Parsing errors are reported
+ * by setting the htx flag HTX_FL_PARSING_ERROR and filling h1m->err_pos and
+ * h1m->err_state fields. This functions is responsible to update the parser
+ * state <h1m> and the start-line <h1sl> if not NULL. For the requests, <h1sl>
+ * must always be provided. For responses, <h1sl> may be NULL and <h1m> flags
+ * HTTP_METH_CONNECT of HTTP_METH_HEAD may be set.
+ */
+int h1_parse_msg_hdrs(struct h1m *h1m, union h1_sl *h1sl, struct htx *dsthtx,
+ struct buffer *srcbuf, size_t ofs, size_t max)
+{
+ struct http_hdr hdrs[global.tune.max_http_hdr];
+ int total = 0, ret = 0;
+
+ if (!max || !b_data(srcbuf))
+ goto end;
+
+ /* Realing input buffer if necessary */
+ if (b_head(srcbuf) + b_data(srcbuf) > b_wrap(srcbuf))
+ b_slow_realign_ofs(srcbuf, trash.area, 0);
+
+ if (!h1sl) {
+ /* If there no start-line, be sure to only parse the headers */
+ h1m->flags |= H1_MF_HDRS_ONLY;
+ }
+ ret = h1_headers_to_hdr_list(b_peek(srcbuf, ofs), b_tail(srcbuf),
+ hdrs, sizeof(hdrs)/sizeof(hdrs[0]), h1m, h1sl);
+ if (ret <= 0) {
+ /* Incomplete or invalid message. If the input buffer only
+ * contains headers and is full, which is detected by it being
+ * full and the offset to be zero, it's an error because
+ * headers are too large to be handled by the parser. */
+ if (ret < 0 || (!ret && !ofs && !buf_room_for_htx_data(srcbuf)))
+ goto error;
+ goto end;
+ }
+ total = ret;
+
+ /* messages headers fully parsed, do some checks to prepare the body
+ * parsing.
+ */
+
+ if (!(h1m->flags & H1_MF_RESP)) {
+ if (!h1_process_req_vsn(h1m, h1sl)) {
+ h1m->err_pos = h1sl->rq.v.ptr - b_head(srcbuf);
+ h1m->err_state = h1m->state;
+ goto vsn_error;
+ }
+ ret = h1_postparse_req_hdrs(h1m, h1sl, dsthtx, hdrs, max);
+ if (ret < 0)
+ return ret;
+ }
+ else {
+ if (h1sl && !h1_process_res_vsn(h1m, h1sl)) {
+ h1m->err_pos = h1sl->st.v.ptr - b_head(srcbuf);
+ h1m->err_state = h1m->state;
+ goto vsn_error;
+ }
+ ret = h1_postparse_res_hdrs(h1m, h1sl, dsthtx, hdrs, max);
+ if (ret < 0)
+ return ret;
+ }
+
+ /* Switch messages without any payload to DONE state */
+ if (((h1m->flags & H1_MF_CLEN) && h1m->body_len == 0) ||
+ ((h1m->flags & (H1_MF_XFER_LEN|H1_MF_CLEN|H1_MF_CHNK)) == H1_MF_XFER_LEN)) {
+ h1m->state = H1_MSG_DONE;
+ dsthtx->flags |= HTX_FL_EOM;
+ }
+
+ end:
+ return total;
+ error:
+ h1m->err_pos = h1m->next;
+ h1m->err_state = h1m->state;
+ vsn_error:
+ dsthtx->flags |= HTX_FL_PARSING_ERROR;
+ return -1;
+
+}
+
+/* Copy data from <srbuf> into an DATA block in <dsthtx>. If possible, a
+ * zero-copy is performed. It returns the number of bytes copied.
+ */
+static size_t h1_copy_msg_data(struct htx **dsthtx, struct buffer *srcbuf, size_t ofs,
+ size_t count, size_t max, struct buffer *htxbuf)
+{
+ struct htx *tmp_htx = *dsthtx;
+ size_t block1, block2, ret = 0;
+
+ /* Be prepared to create at least one HTX block by reserving its size
+ * and adjust <count> accordingly.
+ */
+ if (max <= sizeof(struct htx_blk))
+ goto end;
+ max -= sizeof(struct htx_blk);
+ if (count > max)
+ count = max;
+
+ /* very often with large files we'll face the following
+ * situation :
+ * - htx is empty and points to <htxbuf>
+ * - count == srcbuf->data
+ * - srcbuf->head == sizeof(struct htx)
+ * => we can swap the buffers and place an htx header into
+ * the target buffer instead
+ */
+ if (unlikely(htx_is_empty(tmp_htx) && count == b_data(srcbuf) &&
+ !ofs && b_head_ofs(srcbuf) == sizeof(struct htx))) {
+ void *raw_area = srcbuf->area;
+ void *htx_area = htxbuf->area;
+ struct htx_blk *blk;
+
+ srcbuf->area = htx_area;
+ htxbuf->area = raw_area;
+ tmp_htx = (struct htx *)htxbuf->area;
+ tmp_htx->size = htxbuf->size - sizeof(*tmp_htx);
+ htx_reset(tmp_htx);
+ b_set_data(htxbuf, b_size(htxbuf));
+
+ blk = htx_add_blk(tmp_htx, HTX_BLK_DATA, count);
+ blk->info += count;
+
+ *dsthtx = tmp_htx;
+ /* nothing else to do, the old buffer now contains an
+ * empty pre-initialized HTX header
+ */
+ return count;
+ }
+
+ /* * First block is the copy of contiguous data starting at offset <ofs>
+ * with <count> as max. <max> is updated accordingly
+ *
+ * * Second block is the remaining (count - block1) if <max> is large
+ * enough. Another HTX block is reserved.
+ */
+ block1 = b_contig_data(srcbuf, ofs);
+ block2 = 0;
+ if (block1 > count)
+ block1 = count;
+ max -= block1;
+
+ if (max > sizeof(struct htx_blk)) {
+ block2 = count - block1;
+ max -= sizeof(struct htx_blk);
+ if (block2 > max)
+ block2 = max;
+ }
+
+ ret = htx_add_data(tmp_htx, ist2(b_peek(srcbuf, ofs), block1));
+ if (ret == block1 && block2)
+ ret += htx_add_data(tmp_htx, ist2(b_orig(srcbuf), block2));
+ end:
+ return ret;
+}
+
+static const char hextable[] = {
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
+ -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
+};
+
+/* Generic function to parse the current HTTP chunk. It may be used to parsed
+ * any kind of chunks, including incomplete HTTP chunks or split chunks
+ * because the buffer wraps. This version tries to performed zero-copy on large
+ * chunks if possible.
+ */
+static size_t h1_parse_chunk(struct h1m *h1m, struct htx **dsthtx,
+ struct buffer *srcbuf, size_t ofs, size_t *max,
+ struct buffer *htxbuf)
+{
+ uint64_t chksz;
+ size_t sz, used, lmax, total = 0;
+ int ret = 0;
+
+ lmax = *max;
+ switch (h1m->state) {
+ case H1_MSG_DATA:
+ new_chunk:
+ used = htx_used_space(*dsthtx);
+ if (b_data(srcbuf) == ofs || lmax <= sizeof(struct htx_blk))
+ break;
+
+ sz = b_data(srcbuf) - ofs;
+ if (unlikely(sz > h1m->curr_len))
+ sz = h1m->curr_len;
+ sz = h1_copy_msg_data(dsthtx, srcbuf, ofs, sz, lmax, htxbuf);
+ lmax -= htx_used_space(*dsthtx) - used;
+ ofs += sz;
+ total += sz;
+ h1m->curr_len -= sz;
+ if (h1m->curr_len)
+ break;
+
+ h1m->state = H1_MSG_CHUNK_CRLF;
+ /*fall through */
+
+ case H1_MSG_CHUNK_CRLF:
+ ret = h1_skip_chunk_crlf(srcbuf, ofs, b_data(srcbuf));
+ if (ret <= 0)
+ break;
+ ofs += ret;
+ total += ret;
+
+ /* Don't parse next chunk to try to handle contiguous chunks if possible */
+ h1m->state = H1_MSG_CHUNK_SIZE;
+ break;
+
+ case H1_MSG_CHUNK_SIZE:
+ ret = h1_parse_chunk_size(srcbuf, ofs, b_data(srcbuf), &chksz);
+ if (ret <= 0)
+ break;
+ h1m->state = ((!chksz) ? H1_MSG_TRAILERS : H1_MSG_DATA);
+ h1m->curr_len = chksz;
+ h1m->body_len += chksz;
+ ofs += ret;
+ total += ret;
+
+ if (h1m->curr_len) {
+ h1m->state = H1_MSG_DATA;
+ goto new_chunk;
+ }
+ h1m->state = H1_MSG_TRAILERS;
+ break;
+
+ default:
+ /* unexpected */
+ ret = -1;
+ break;
+ }
+
+ if (ret < 0) {
+ (*dsthtx)->flags |= HTX_FL_PARSING_ERROR;
+ h1m->err_state = h1m->state;
+ h1m->err_pos = ofs;
+ total = 0;
+ }
+
+ /* Don't forget to update htx->extra */
+ (*dsthtx)->extra = h1m->curr_len;
+ *max = lmax;
+ return total;
+}
+
+/* Parses full contiguous HTTP chunks. This version is optimized for small
+ * chunks and does not performed zero-copy. It must be called in
+ * H1_MSG_CHUNK_SIZE state. Be careful if you change something in this
+ * function. It is really sensitive, any change may have an impact on
+ * performance.
+ */
+static size_t h1_parse_full_contig_chunks(struct h1m *h1m, struct htx **dsthtx,
+ struct buffer *srcbuf, size_t ofs, size_t *max,
+ struct buffer *htxbuf)
+{
+ char *start, *end, *dptr;
+ ssize_t dpos, ridx, save;
+ size_t lmax, total = 0;
+ uint64_t chksz;
+ struct htx_ret htxret;
+
+ lmax = *max;
+ if (lmax <= sizeof(struct htx_blk))
+ goto out;
+
+ /* source info :
+ * start : pointer at <ofs> position
+ * end : pointer marking the end of data to parse
+ * ridx : the reverse index (negative) marking the parser position (end[ridx])
+ */
+ ridx = -b_contig_data(srcbuf, ofs);
+ if (!ridx)
+ goto out;
+ start = b_peek(srcbuf, ofs);
+ end = start - ridx;
+
+ /* Reserve the maximum possible size for the data */
+ htxret = htx_reserve_max_data(*dsthtx);
+ if (!htxret.blk)
+ goto out;
+
+ /* destination info :
+ * dptr : pointer on the beginning of the data
+ * dpos : current position where to copy data
+ */
+ dptr = htx_get_blk_ptr(*dsthtx, htxret.blk);
+ dpos = htxret.ret;
+
+ /* Empty DATA block is not possible, thus if <dpos> is the beginning of
+ * the block, it means it is a new block. We can remove the block size
+ * from <max>. Then we must adjust it if it exceeds the free size in the
+ * block.
+ */
+ if (!dpos)
+ lmax -= sizeof(struct htx_blk);
+ if (lmax > htx_get_blksz(htxret.blk) - dpos)
+ lmax = htx_get_blksz(htxret.blk) - dpos;
+
+ while (1) {
+ /* The chunk size is in the following form, though we are only
+ * interested in the size and CRLF :
+ * 1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
+ */
+ chksz = 0;
+ save = ridx; /* Save the parser position to rewind if necessary */
+ while (1) {
+ int c;
+
+ if (!ridx)
+ goto end_parsing;
+
+ /* Convert current character */
+ c = hextable[(unsigned char)end[ridx]];
+
+ /* not a hex digit anymore */
+ if (c & 0xF0)
+ break;
+
+ /* Update current chunk size */
+ chksz = (chksz << 4) + c;
+
+ if (unlikely(chksz & 0xF0000000000000ULL)) {
+ /* Don't get more than 13 hexa-digit (2^52 - 1)
+ * to never fed possibly bogus values from
+ * languages that use floats for their integers
+ */
+ goto parsing_error;
+ }
+ ++ridx;
+ }
+
+ if (unlikely(chksz > lmax))
+ goto end_parsing;
+
+ if (unlikely(ridx == save)) {
+ /* empty size not allowed */
+ goto parsing_error;
+ }
+
+ /* Skip spaces */
+ while (HTTP_IS_SPHT(end[ridx])) {
+ if (!++ridx)
+ goto end_parsing;
+ }
+
+ /* Up to there, we know that at least one byte is present. Check
+ * for the end of chunk size.
+ */
+ while (1) {
+ if (likely(end[ridx] == '\r')) {
+ /* Parse CRLF */
+ if (!++ridx)
+ goto end_parsing;
+ if (unlikely(end[ridx] != '\n')) {
+ /* CR must be followed by LF */
+ goto parsing_error;
+ }
+
+ /* done */
+ ++ridx;
+ break;
+ }
+ else if (end[ridx] == '\n') {
+ /* Parse LF only, nothing more to do */
+ ++ridx;
+ break;
+ }
+ else if (likely(end[ridx] == ';')) {
+ /* chunk extension, ends at next CRLF */
+ if (!++ridx)
+ goto end_parsing;
+ while (!HTTP_IS_CRLF(end[ridx])) {
+ if (!++ridx)
+ goto end_parsing;
+ }
+ /* we have a CRLF now, loop above */
+ continue;
+ }
+ else {
+ /* all other characters are unexpected */
+ goto parsing_error;
+ }
+ }
+
+ /* Exit if it is the last chunk */
+ if (unlikely(!chksz)) {
+ h1m->state = H1_MSG_TRAILERS;
+ save = ridx;
+ goto end_parsing;
+ }
+
+ /* Now check if the whole chunk is here (including the CRLF at
+ * the end), otherwise we switch in H1_MSG_DATA state.
+ */
+ if (chksz + 2 > -ridx) {
+ h1m->curr_len = chksz;
+ h1m->body_len += chksz;
+ h1m->state = H1_MSG_DATA;
+ (*dsthtx)->extra = h1m->curr_len;
+ save = ridx;
+ goto end_parsing;
+ }
+
+ memcpy(dptr + dpos, end + ridx, chksz);
+ h1m->body_len += chksz;
+ lmax -= chksz;
+ dpos += chksz;
+ ridx += chksz;
+
+ /* Parse CRLF or LF (always present) */
+ if (likely(end[ridx] == '\r'))
+ ++ridx;
+ if (end[ridx] != '\n') {
+ h1m->state = H1_MSG_CHUNK_CRLF;
+ goto parsing_error;
+ }
+ ++ridx;
+ }
+
+ end_parsing:
+ ridx = save;
+
+ /* Adjust the HTX block size or remove the block if nothing was copied
+ * (Empty HTX data block are not supported).
+ */
+ if (!dpos)
+ htx_remove_blk(*dsthtx, htxret.blk);
+ else
+ htx_change_blk_value_len(*dsthtx, htxret.blk, dpos);
+ total = end + ridx - start;
+ *max = lmax;
+
+ out:
+ return total;
+
+ parsing_error:
+ (*dsthtx)->flags |= HTX_FL_PARSING_ERROR;
+ h1m->err_state = h1m->state;
+ h1m->err_pos = ofs + end + ridx - start;
+ return 0;
+}
+
+/* Parse HTTP chunks. This function relies on an optimized function to parse
+ * contiguous chunks if possible. Otherwise, when a chunk is incomplete or when
+ * the underlying buffer is wrapping, a generic function is used.
+ */
+static size_t h1_parse_msg_chunks(struct h1m *h1m, struct htx **dsthtx,
+ struct buffer *srcbuf, size_t ofs, size_t max,
+ struct buffer *htxbuf)
+{
+ size_t ret, total = 0;
+
+ while (ofs < b_data(srcbuf)) {
+ ret = 0;
+
+ /* First parse full contiguous chunks. It is only possible if we
+ * are waiting for the next chunk size.
+ */
+ if (h1m->state == H1_MSG_CHUNK_SIZE) {
+ ret = h1_parse_full_contig_chunks(h1m, dsthtx, srcbuf, ofs, &max, htxbuf);
+ /* exit on error */
+ if (!ret && (*dsthtx)->flags & HTX_FL_PARSING_ERROR) {
+ total = 0;
+ break;
+ }
+ /* or let a chance to parse remaining data */
+ total += ret;
+ ofs += ret;
+ ret = 0;
+ }
+
+ /* If some data remains, try to parse it using the generic
+ * function handling incomplete chunks and split chunks
+ * because of a wrapping buffer.
+ */
+ if (h1m->state < H1_MSG_TRAILERS && ofs < b_data(srcbuf)) {
+ ret = h1_parse_chunk(h1m, dsthtx, srcbuf, ofs, &max, htxbuf);
+ total += ret;
+ ofs += ret;
+ }
+
+ /* nothing more was parsed or parsing was stopped on incomplete
+ * chunk, we can exit, handling parsing error if necessary.
+ */
+ if (!ret || h1m->state != H1_MSG_CHUNK_SIZE) {
+ if ((*dsthtx)->flags & HTX_FL_PARSING_ERROR)
+ total = 0;
+ break;
+ }
+ }
+
+ return total;
+}
+
+/* Parse HTTP/1 body. It returns the number of bytes parsed if > 0, or 0 if it
+ * couldn't proceed. Parsing errors are reported by setting the htx flags
+ * HTX_FL_PARSING_ERROR and filling h1m->err_pos and h1m->err_state fields. This
+ * functions is responsible to update the parser state <h1m>.
+ */
+size_t h1_parse_msg_data(struct h1m *h1m, struct htx **dsthtx,
+ struct buffer *srcbuf, size_t ofs, size_t max,
+ struct buffer *htxbuf)
+{
+ size_t sz, total = 0;
+
+ if (b_data(srcbuf) == ofs || max <= sizeof(struct htx_blk))
+ return 0;
+
+ if (h1m->flags & H1_MF_CLEN) {
+ /* content-length: read only h2m->body_len */
+ sz = b_data(srcbuf) - ofs;
+ if (unlikely(sz > h1m->curr_len))
+ sz = h1m->curr_len;
+ sz = h1_copy_msg_data(dsthtx, srcbuf, ofs, sz, max, htxbuf);
+ h1m->curr_len -= sz;
+ (*dsthtx)->extra = h1m->curr_len;
+ total += sz;
+ if (!h1m->curr_len) {
+ h1m->state = H1_MSG_DONE;
+ (*dsthtx)->flags |= HTX_FL_EOM;
+ }
+ }
+ else if (h1m->flags & H1_MF_CHNK) {
+ /* te:chunked : parse chunks */
+ total += h1_parse_msg_chunks(h1m, dsthtx, srcbuf, ofs, max, htxbuf);
+ }
+ else if (h1m->flags & H1_MF_XFER_LEN) {
+ /* XFER_LEN is set but not CLEN nor CHNK, it means there is no
+ * body. Switch the message in DONE state
+ */
+ h1m->state = H1_MSG_DONE;
+ (*dsthtx)->flags |= HTX_FL_EOM;
+ }
+ else {
+ /* no content length, read till SHUTW */
+ sz = b_data(srcbuf) - ofs;
+ sz = h1_copy_msg_data(dsthtx, srcbuf, ofs, sz, max, htxbuf);
+ total += sz;
+ }
+
+ return total;
+}
+
+/* Parse HTTP/1 trailers. It returns the number of bytes parsed on success, 0 if
+ * trailers are incomplete, -1 if an error occurred or -2 if it needs more space
+ * to proceed while the output buffer is not empty. Parsing errors are reported
+ * by setting the htx flags HTX_FL_PARSING_ERROR and filling h1m->err_pos and
+ * h1m->err_state fields. This functions is responsible to update the parser
+ * state <h1m>.
+ */
+int h1_parse_msg_tlrs(struct h1m *h1m, struct htx *dsthtx,
+ struct buffer *srcbuf, size_t ofs, size_t max)
+{
+ struct http_hdr hdrs[global.tune.max_http_hdr];
+ struct h1m tlr_h1m;
+ int ret = 0;
+
+ if (b_data(srcbuf) == ofs) {
+ /* Nothing to parse */
+ goto end;
+ }
+ if (!max) {
+ /* No more room */
+ goto output_full;
+ }
+
+ /* Realing input buffer if necessary */
+ if (b_peek(srcbuf, ofs) > b_tail(srcbuf))
+ b_slow_realign_ofs(srcbuf, trash.area, 0);
+
+ tlr_h1m.flags = (H1_MF_NO_PHDR|H1_MF_HDRS_ONLY);
+ ret = h1_headers_to_hdr_list(b_peek(srcbuf, ofs), b_tail(srcbuf),
+ hdrs, sizeof(hdrs)/sizeof(hdrs[0]), &tlr_h1m, NULL);
+ if (ret <= 0) {
+ /* Incomplete or invalid trailers. If the input buffer only
+ * contains trailers and is full, which is detected by it being
+ * full and the offset to be zero, it's an error because
+ * trailers are too large to be handled by the parser. */
+ if (ret < 0 || (!ret && !ofs && !buf_room_for_htx_data(srcbuf)))
+ goto error;
+ goto end;
+ }
+
+ /* messages trailers fully parsed. */
+ if (h1_eval_htx_hdrs_size(hdrs) > max) {
+ if (htx_is_empty(dsthtx))
+ goto error;
+ goto output_full;
+ }
+
+ if (!htx_add_all_trailers(dsthtx, hdrs))
+ goto error;
+
+ h1m->state = H1_MSG_DONE;
+ dsthtx->flags |= HTX_FL_EOM;
+
+ end:
+ return ret;
+ output_full:
+ return -2;
+ error:
+ h1m->err_state = h1m->state;
+ h1m->err_pos = h1m->next;
+ dsthtx->flags |= HTX_FL_PARSING_ERROR;
+ return -1;
+}
+
+/* Appends the H1 representation of the request line <sl> to the chunk <chk>. It
+ * returns 1 if data are successfully appended, otherwise it returns 0.
+ */
+int h1_format_htx_reqline(const struct htx_sl *sl, struct buffer *chk)
+{
+ struct ist uri;
+ size_t sz = chk->data;
+
+ uri = h1_get_uri(sl);
+ if (!chunk_memcat(chk, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl)) ||
+ !chunk_memcat(chk, " ", 1) ||
+ !chunk_memcat(chk, uri.ptr, uri.len) ||
+ !chunk_memcat(chk, " ", 1))
+ goto full;
+
+ if (sl->flags & HTX_SL_F_VER_11) {
+ if (!chunk_memcat(chk, "HTTP/1.1", 8))
+ goto full;
+ }
+ else {
+ if (!chunk_memcat(chk, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl)))
+ goto full;
+ }
+
+ if (!chunk_memcat(chk, "\r\n", 2))
+ goto full;
+
+ return 1;
+
+ full:
+ chk->data = sz;
+ return 0;
+}
+
+/* Appends the H1 representation of the status line <sl> to the chunk <chk>. It
+ * returns 1 if data are successfully appended, otherwise it returns 0.
+ */
+int h1_format_htx_stline(const struct htx_sl *sl, struct buffer *chk)
+{
+ size_t sz = chk->data;
+
+ if (HTX_SL_LEN(sl) + 4 > b_room(chk))
+ return 0;
+
+ if (sl->flags & HTX_SL_F_VER_11) {
+ if (!chunk_memcat(chk, "HTTP/1.1", 8))
+ goto full;
+ }
+ else {
+ if (!chunk_memcat(chk, HTX_SL_RES_VPTR(sl), HTX_SL_RES_VLEN(sl)))
+ goto full;
+ }
+ if (!chunk_memcat(chk, " ", 1) ||
+ !chunk_memcat(chk, HTX_SL_RES_CPTR(sl), HTX_SL_RES_CLEN(sl)) ||
+ !chunk_memcat(chk, " ", 1) ||
+ !chunk_memcat(chk, HTX_SL_RES_RPTR(sl), HTX_SL_RES_RLEN(sl)) ||
+ !chunk_memcat(chk, "\r\n", 2))
+ goto full;
+
+ return 1;
+
+ full:
+ chk->data = sz;
+ return 0;
+}
+
+/* Appends the H1 representation of the header <n> with the value <v> to the
+ * chunk <chk>. It returns 1 if data are successfully appended, otherwise it
+ * returns 0.
+ */
+int h1_format_htx_hdr(const struct ist n, const struct ist v, struct buffer *chk)
+{
+ size_t sz = chk->data;
+
+ if (n.len + v.len + 4 > b_room(chk))
+ return 0;
+
+ if (!chunk_memcat(chk, n.ptr, n.len) ||
+ !chunk_memcat(chk, ": ", 2) ||
+ !chunk_memcat(chk, v.ptr, v.len) ||
+ !chunk_memcat(chk, "\r\n", 2))
+ goto full;
+
+ return 1;
+
+ full:
+ chk->data = sz;
+ return 0;
+}
+
+/* Appends the H1 representation of the data <data> to the chunk <chk>. If
+ * <chunked> is non-zero, it emits HTTP/1 chunk-encoded data. It returns 1 if
+ * data are successfully appended, otherwise it returns 0.
+ */
+int h1_format_htx_data(const struct ist data, struct buffer *chk, int chunked)
+{
+ size_t sz = chk->data;
+
+ if (chunked) {
+ uint32_t chksz;
+ char tmp[10];
+ char *beg, *end;
+
+ chksz = data.len;
+
+ beg = end = tmp+10;
+ *--beg = '\n';
+ *--beg = '\r';
+ do {
+ *--beg = hextab[chksz & 0xF];
+ } while (chksz >>= 4);
+
+ if (!chunk_memcat(chk, beg, end - beg) ||
+ !chunk_memcat(chk, data.ptr, data.len) ||
+ !chunk_memcat(chk, "\r\n", 2))
+ goto full;
+ }
+ else {
+ if (!chunk_memcat(chk, data.ptr, data.len))
+ return 0;
+ }
+
+ return 1;
+
+ full:
+ chk->data = sz;
+ return 0;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/h2.c b/src/h2.c
new file mode 100644
index 0000000..f794262
--- /dev/null
+++ b/src/h2.c
@@ -0,0 +1,814 @@
+/*
+ * HTTP/2 protocol processing
+ *
+ * Copyright 2017 Willy Tarreau <w@1wt.eu>
+ * Copyright (C) 2017 HAProxy Technologies
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include <haproxy/api.h>
+#include <haproxy/global.h>
+#include <haproxy/h2.h>
+#include <haproxy/http-hdr-t.h>
+#include <haproxy/http.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <import/ist.h>
+
+
+struct h2_frame_definition h2_frame_definition[H2_FT_ENTRIES] = {
+ [H2_FT_DATA ] = { .dir = 3, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 0, .max_len = H2_MAX_FRAME_LEN, },
+ [H2_FT_HEADERS ] = { .dir = 3, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 1, .max_len = H2_MAX_FRAME_LEN, },
+ [H2_FT_PRIORITY ] = { .dir = 3, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 5, .max_len = 5, },
+ [H2_FT_RST_STREAM ] = { .dir = 3, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 4, .max_len = 4, },
+ [H2_FT_SETTINGS ] = { .dir = 3, .min_id = 0, .max_id = 0, .min_len = 0, .max_len = H2_MAX_FRAME_LEN, },
+ [H2_FT_PUSH_PROMISE ] = { .dir = 0, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 4, .max_len = H2_MAX_FRAME_LEN, },
+ [H2_FT_PING ] = { .dir = 3, .min_id = 0, .max_id = 0, .min_len = 8, .max_len = 8, },
+ [H2_FT_GOAWAY ] = { .dir = 3, .min_id = 0, .max_id = 0, .min_len = 8, .max_len = H2_MAX_FRAME_LEN, },
+ [H2_FT_WINDOW_UPDATE] = { .dir = 3, .min_id = 0, .max_id = H2_MAX_STREAM_ID, .min_len = 4, .max_len = 4, },
+ [H2_FT_CONTINUATION ] = { .dir = 3, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 0, .max_len = H2_MAX_FRAME_LEN, },
+};
+
+/* Looks into <ist> for forbidden characters for header values (0x00, 0x0A,
+ * 0x0D), starting at pointer <start> which must be within <ist>. Returns
+ * non-zero if such a character is found, 0 otherwise. When run on unlikely
+ * header match, it's recommended to first check for the presence of control
+ * chars using ist_find_ctl().
+ */
+static int has_forbidden_char(const struct ist ist, const char *start)
+{
+ do {
+ if ((uint8_t)*start <= 0x0d &&
+ (1U << (uint8_t)*start) & ((1<<13) | (1<<10) | (1<<0)))
+ return 1;
+ start++;
+ } while (start < istend(ist));
+ return 0;
+}
+
+/* Prepare the request line into <htx> from pseudo headers stored in <phdr[]>.
+ * <fields> indicates what was found so far. This should be called once at the
+ * detection of the first general header field or at the end of the request if
+ * no general header field was found yet. Returns the created start line on
+ * success, or NULL on failure. Upon success, <msgf> is updated with a few
+ * H2_MSGF_* flags indicating what was found while parsing.
+ *
+ * The rules below deserve a bit of explanation. There tends to be some
+ * confusion regarding H2's authority vs the Host header. They are different
+ * though may sometimes be exchanged. In H2, the request line is broken into :
+ * - :method
+ * - :scheme
+ * - :authority
+ * - :path
+ *
+ * An equivalent HTTP/1.x absolute-form request would then look like :
+ * <:method> <:scheme>://<:authority><:path> HTTP/x.y
+ *
+ * Except for CONNECT which doesn't have scheme nor path and looks like :
+ * <:method> <:authority> HTTP/x.y
+ *
+ * It's worth noting that H2 still supports an encoding to map H1 origin-form
+ * and asterisk-form requests. These ones do not specify the authority. However
+ * in H2 they must still specify the scheme, which is not present in H1. Also,
+ * when encoding an absolute-form H1 request without a path, the path
+ * automatically becomes "/" except for the OPTIONS method where it
+ * becomes "*".
+ *
+ * As such it is explicitly permitted for an H2 client to send a request
+ * featuring a Host header and no :authority, though it's not the recommended
+ * way to use H2 for a client. It is however the only permitted way to encode
+ * an origin-form H1 request over H2. Thus we need to respect such differences
+ * as much as possible when re-encoding the H2 request into HTX.
+ */
+static struct htx_sl *h2_prepare_htx_reqline(uint32_t fields, struct ist *phdr, struct htx *htx, unsigned int *msgf)
+{
+ struct ist uri, meth_sl;
+ unsigned int flags = HTX_SL_F_NONE;
+ struct htx_sl *sl;
+ size_t i;
+
+ if ((fields & H2_PHDR_FND_METH) && isteq(phdr[H2_PHDR_IDX_METH], ist("CONNECT"))) {
+ if (fields & H2_PHDR_FND_PROT) {
+ /* rfc 8441 Extended Connect Protocol
+ * #4 :scheme and :path must be present, as well as
+ * :authority like all h2 requests
+ */
+ if (!(fields & H2_PHDR_FND_SCHM)) {
+ /* missing scheme */
+ goto fail;
+ }
+ else if (!(fields & H2_PHDR_FND_PATH)) {
+ /* missing path */
+ goto fail;
+ }
+ else if (!(fields & H2_PHDR_FND_AUTH)) {
+ /* missing authority */
+ goto fail;
+ }
+
+ flags |= HTX_SL_F_HAS_SCHM;
+ if (isteqi(phdr[H2_PHDR_IDX_SCHM], ist("http")))
+ flags |= HTX_SL_F_SCHM_HTTP;
+ else if (isteqi(phdr[H2_PHDR_IDX_SCHM], ist("https")))
+ flags |= HTX_SL_F_SCHM_HTTPS;
+ else if (!http_validate_scheme(phdr[H2_PHDR_IDX_SCHM]))
+ htx->flags |= HTX_FL_PARSING_ERROR;
+
+ meth_sl = ist("GET");
+
+ *msgf |= H2_MSGF_EXT_CONNECT;
+ /* no ES on the HEADERS frame but no body either for
+ * Extended CONNECT */
+ *msgf &= ~H2_MSGF_BODY;
+ }
+ else {
+ /* RFC 7540 #8.2.6 regarding CONNECT: ":scheme" and ":path"
+ * MUST be omitted ; ":authority" contains the host and port
+ * to connect to.
+ */
+ if (fields & H2_PHDR_FND_SCHM) {
+ /* scheme not allowed */
+ goto fail;
+ }
+ else if (fields & H2_PHDR_FND_PATH) {
+ /* path not allowed */
+ goto fail;
+ }
+ else if (!(fields & H2_PHDR_FND_AUTH)) {
+ /* missing authority */
+ goto fail;
+ }
+
+ meth_sl = phdr[H2_PHDR_IDX_METH];
+ }
+
+ *msgf |= H2_MSGF_BODY_TUNNEL;
+ }
+ else if ((fields & (H2_PHDR_FND_METH|H2_PHDR_FND_SCHM|H2_PHDR_FND_PATH)) !=
+ (H2_PHDR_FND_METH|H2_PHDR_FND_SCHM|H2_PHDR_FND_PATH)) {
+ /* RFC 7540 #8.1.2.3 : all requests MUST include exactly one
+ * valid value for the ":method", ":scheme" and ":path" phdr
+ * unless it is a CONNECT request.
+ */
+ if (!(fields & H2_PHDR_FND_METH)) {
+ /* missing method */
+ goto fail;
+ }
+ else if (!(fields & H2_PHDR_FND_SCHM)) {
+ /* missing scheme */
+ goto fail;
+ }
+ else {
+ /* missing path */
+ goto fail;
+ }
+ }
+ else { /* regular methods */
+ /* RFC3986#6.2.2.1: scheme is case-insensitive. We need to
+ * classify the scheme as "present/http", "present/https",
+ * "present/other", "absent" so as to decide whether or not
+ * we're facing a normalized URI that will have to be encoded
+ * in origin or absolute form. Indeed, 7540#8.1.2.3 says that
+ * clients should use the absolute form, thus we cannot infer
+ * whether or not the client wanted to use a proxy here.
+ */
+ flags |= HTX_SL_F_HAS_SCHM;
+ if (isteqi(phdr[H2_PHDR_IDX_SCHM], ist("http")))
+ flags |= HTX_SL_F_SCHM_HTTP;
+ else if (isteqi(phdr[H2_PHDR_IDX_SCHM], ist("https")))
+ flags |= HTX_SL_F_SCHM_HTTPS;
+ else if (!http_validate_scheme(phdr[H2_PHDR_IDX_SCHM]))
+ htx->flags |= HTX_FL_PARSING_ERROR;
+
+ meth_sl = phdr[H2_PHDR_IDX_METH];
+ }
+
+ if (fields & H2_PHDR_FND_PATH) {
+ /* 7540#8.1.2.3: :path must not be empty, and must be either
+ * '*' or an RFC3986 "path-absolute" starting with a "/" but
+ * not with "//".
+ * However, this "path-absolute" was a mistake which was
+ * later fixed in http2bis as "absolute-path" to match
+ * HTTP/1, thus also allowing "//".
+ */
+ if (unlikely(!phdr[H2_PHDR_IDX_PATH].len))
+ goto fail;
+ else if (unlikely(phdr[H2_PHDR_IDX_PATH].ptr[0] != '/')) {
+ if (!isteq(phdr[H2_PHDR_IDX_PATH], ist("*")))
+ goto fail;
+ }
+ }
+
+ if (!(flags & HTX_SL_F_HAS_SCHM)) {
+ /* no scheme, use authority only (CONNECT) */
+ uri = phdr[H2_PHDR_IDX_AUTH];
+ flags |= HTX_SL_F_HAS_AUTHORITY;
+ }
+ else if (fields & H2_PHDR_FND_AUTH) {
+ /* authority is present, let's use the absolute form. We simply
+ * use the trash to concatenate them since all of them MUST fit
+ * in a bufsize since it's where they come from.
+ */
+ uri = ist2bin(trash.area, phdr[H2_PHDR_IDX_SCHM]);
+ istcat(&uri, ist("://"), trash.size);
+ istcat(&uri, phdr[H2_PHDR_IDX_AUTH], trash.size);
+ if (!isteq(phdr[H2_PHDR_IDX_PATH], ist("*")))
+ istcat(&uri, phdr[H2_PHDR_IDX_PATH], trash.size);
+ flags |= HTX_SL_F_HAS_AUTHORITY;
+
+ if (flags & (HTX_SL_F_SCHM_HTTP|HTX_SL_F_SCHM_HTTPS)) {
+ /* we don't know if it was originally an absolute or a
+ * relative request because newer versions of HTTP use
+ * the absolute URI format by default, which we call
+ * the normalized URI format internally. This is the
+ * strongly recommended way of sending a request for
+ * a regular client, so we cannot distinguish this
+ * from a request intended for a proxy. For other
+ * schemes however there is no doubt.
+ */
+ flags |= HTX_SL_F_NORMALIZED_URI;
+ }
+ }
+ else {
+ /* usual schemes with or without authority, use origin form */
+ uri = phdr[H2_PHDR_IDX_PATH];
+ if (fields & H2_PHDR_FND_AUTH)
+ flags |= HTX_SL_F_HAS_AUTHORITY;
+ }
+
+ /* The method is a non-empty token (RFC7231#4.1) */
+ if (!meth_sl.len)
+ goto fail;
+ for (i = 0; i < meth_sl.len; i++) {
+ if (!HTTP_IS_TOKEN(meth_sl.ptr[i]))
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ }
+
+ /* make sure the final URI isn't empty. Note that 7540#8.1.2.3 states
+ * that :path must not be empty.
+ */
+ if (!uri.len)
+ goto fail;
+
+ /* The final URI must not contain LWS nor CTL characters */
+ for (i = 0; i < uri.len; i++) {
+ unsigned char c = uri.ptr[i];
+ if (HTTP_IS_LWS(c) || HTTP_IS_CTL(c))
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ }
+
+ /* Set HTX start-line flags */
+ flags |= HTX_SL_F_VER_11; // V2 in fact
+ flags |= HTX_SL_F_XFER_LEN; // xfer len always known with H2
+
+ sl = htx_add_stline(htx, HTX_BLK_REQ_SL, flags, meth_sl, uri, ist("HTTP/2.0"));
+ if (!sl)
+ goto fail;
+
+ sl->info.req.meth = find_http_meth(meth_sl.ptr, meth_sl.len);
+ if (sl->info.req.meth == HTTP_METH_HEAD)
+ *msgf |= H2_MSGF_BODYLESS_RSP;
+ return sl;
+ fail:
+ return NULL;
+}
+
+/* Takes an H2 request present in the headers list <list> terminated by a name
+ * being <NULL,0> and emits the equivalent HTX request according to the rules
+ * documented in RFC7540 #8.1.2. The output contents are emitted in <htx>, and
+ * non-zero is returned if some bytes were emitted. In case of error, a
+ * negative error code is returned.
+ *
+ * Upon success, <msgf> is filled with a few H2_MSGF_* flags indicating what
+ * was found while parsing. The caller must set it to zero in or H2_MSGF_BODY
+ * if a body is detected (!ES).
+ *
+ * The headers list <list> must be composed of :
+ * - n.name != NULL, n.len > 0 : literal header name
+ * - n.name == NULL, n.len > 0 : indexed pseudo header name number <n.len>
+ * among H2_PHDR_IDX_*
+ * - n.name ignored, n.len == 0 : end of list
+ * - in all cases except the end of list, v.name and v.len must designate a
+ * valid value.
+ *
+ * The Cookie header will be reassembled at the end, and for this, the <list>
+ * will be used to create a linked list, so its contents may be destroyed.
+ */
+int h2_make_htx_request(struct http_hdr *list, struct htx *htx, unsigned int *msgf, unsigned long long *body_len)
+{
+ struct ist phdr_val[H2_PHDR_NUM_ENTRIES];
+ uint32_t fields; /* bit mask of H2_PHDR_FND_* */
+ uint32_t idx;
+ int ck, lck; /* cookie index and last cookie index */
+ int phdr;
+ int ret;
+ int i;
+ struct htx_sl *sl = NULL;
+ unsigned int sl_flags = 0;
+ const char *ctl;
+
+ lck = ck = -1; // no cookie for now
+ fields = 0;
+ for (idx = 0; list[idx].n.len != 0; idx++) {
+ if (!isttest(list[idx].n)) {
+ /* this is an indexed pseudo-header */
+ phdr = list[idx].n.len;
+ }
+ else {
+ /* this can be any type of header */
+ /* RFC7540#8.1.2: upper case not allowed in header field names.
+ * #10.3: header names must be valid (i.e. match a token).
+ * For pseudo-headers we check from 2nd char and for other ones
+ * from the first char, because HTTP_IS_TOKEN() also excludes
+ * the colon.
+ */
+ phdr = h2_str_to_phdr(list[idx].n);
+
+ for (i = !!phdr; i < list[idx].n.len; i++)
+ if ((uint8_t)(list[idx].n.ptr[i] - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(list[idx].n.ptr[i]))
+ goto fail;
+ }
+
+ /* RFC7540#10.3: intermediaries forwarding to HTTP/1 must take care of
+ * rejecting NUL, CR and LF characters.
+ */
+ ctl = ist_find_ctl(list[idx].v);
+ if (unlikely(ctl) && has_forbidden_char(list[idx].v, ctl))
+ goto fail;
+
+ if (phdr > 0 && phdr < H2_PHDR_NUM_ENTRIES) {
+ /* insert a pseudo header by its index (in phdr) and value (in value) */
+ if (fields & ((1 << phdr) | H2_PHDR_FND_NONE)) {
+ if (fields & H2_PHDR_FND_NONE) {
+ /* pseudo header field after regular headers */
+ goto fail;
+ }
+ else {
+ /* repeated pseudo header field */
+ goto fail;
+ }
+ }
+ fields |= 1 << phdr;
+ phdr_val[phdr] = list[idx].v;
+ continue;
+ }
+ else if (phdr != 0) {
+ /* invalid pseudo header -- should never happen here */
+ goto fail;
+ }
+
+ /* regular header field in (name,value) */
+ if (unlikely(!(fields & H2_PHDR_FND_NONE))) {
+ /* no more pseudo-headers, time to build the request line */
+ sl = h2_prepare_htx_reqline(fields, phdr_val, htx, msgf);
+ if (!sl)
+ goto fail;
+ fields |= H2_PHDR_FND_NONE;
+
+ /* http2bis draft recommends to drop Host in favor of :authority when
+ * the latter is present. This is required to make sure there is no
+ * discrepancy between the authority and the host header, especially
+ * since routing rules usually involve Host. Here we already know if
+ * :authority was found so we can emit it right now and mark the host
+ * as filled so that it's skipped later.
+ */
+ if (fields & H2_PHDR_FND_AUTH) {
+ if (!htx_add_header(htx, ist("host"), phdr_val[H2_PHDR_IDX_AUTH]))
+ goto fail;
+ fields |= H2_PHDR_FND_HOST;
+ }
+ }
+
+ if (isteq(list[idx].n, ist("host"))) {
+ if (fields & H2_PHDR_FND_HOST)
+ continue;
+
+ fields |= H2_PHDR_FND_HOST;
+ }
+
+ if (isteq(list[idx].n, ist("content-length"))) {
+ ret = http_parse_cont_len_header(&list[idx].v, body_len,
+ *msgf & H2_MSGF_BODY_CL);
+ if (ret < 0)
+ goto fail;
+
+ *msgf |= H2_MSGF_BODY_CL;
+ sl_flags |= HTX_SL_F_CLEN;
+ if (ret == 0)
+ continue; // skip this duplicate
+ }
+
+ /* these ones are forbidden in requests (RFC7540#8.1.2.2) */
+ if (isteq(list[idx].n, ist("connection")) ||
+ isteq(list[idx].n, ist("proxy-connection")) ||
+ isteq(list[idx].n, ist("keep-alive")) ||
+ isteq(list[idx].n, ist("upgrade")) ||
+ isteq(list[idx].n, ist("transfer-encoding")))
+ goto fail;
+
+ if (isteq(list[idx].n, ist("te")) && !isteq(list[idx].v, ist("trailers")))
+ goto fail;
+
+ /* cookie requires special processing at the end */
+ if (isteq(list[idx].n, ist("cookie"))) {
+ http_cookie_register(list, idx, &ck, &lck);
+ continue;
+ }
+
+ if (!htx_add_header(htx, list[idx].n, list[idx].v))
+ goto fail;
+ }
+
+ /* RFC7540#8.1.2.1 mandates to reject response pseudo-headers (:status) */
+ if (fields & H2_PHDR_FND_STAT)
+ goto fail;
+
+ /* Let's dump the request now if not yet emitted. */
+ if (!(fields & H2_PHDR_FND_NONE)) {
+ sl = h2_prepare_htx_reqline(fields, phdr_val, htx, msgf);
+ if (!sl)
+ goto fail;
+ }
+
+ if (*msgf & H2_MSGF_BODY_TUNNEL)
+ *msgf &= ~(H2_MSGF_BODY|H2_MSGF_BODY_CL);
+
+ if (!(*msgf & H2_MSGF_BODY) || ((*msgf & H2_MSGF_BODY_CL) && *body_len == 0) ||
+ (*msgf & H2_MSGF_BODY_TUNNEL)) {
+ /* Request without body or tunnel requested */
+ sl_flags |= HTX_SL_F_BODYLESS;
+ htx->flags |= HTX_FL_EOM;
+ }
+
+ if (*msgf & H2_MSGF_EXT_CONNECT) {
+ if (!htx_add_header(htx, ist("upgrade"), phdr_val[H2_PHDR_IDX_PROT]))
+ goto fail;
+ if (!htx_add_header(htx, ist("connection"), ist("upgrade")))
+ goto fail;
+ sl_flags |= HTX_SL_F_CONN_UPG;
+ }
+
+ /* update the start line with last detected header info */
+ sl->flags |= sl_flags;
+
+ /* complete with missing Host if needed (we may validate this test if
+ * no regular header was found).
+ */
+ if ((fields & (H2_PHDR_FND_HOST|H2_PHDR_FND_AUTH)) == H2_PHDR_FND_AUTH) {
+ /* missing Host field, use :authority instead */
+ if (!htx_add_header(htx, ist("host"), phdr_val[H2_PHDR_IDX_AUTH]))
+ goto fail;
+ }
+
+ /* now we may have to build a cookie list. We'll dump the values of all
+ * visited headers.
+ */
+ if (ck >= 0) {
+ if (http_cookie_merge(htx, list, ck))
+ goto fail;
+ }
+
+ /* now send the end of headers marker */
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto fail;
+
+ /* proceed to scheme-based normalization on target-URI */
+ if (fields & H2_PHDR_FND_SCHM)
+ http_scheme_based_normalize(htx);
+
+ ret = 1;
+ return ret;
+
+ fail:
+ return -1;
+}
+
+/* Prepare the status line into <htx> from pseudo headers stored in <phdr[]>.
+ * <fields> indicates what was found so far. This should be called once at the
+ * detection of the first general header field or at the end of the message if
+ * no general header field was found yet. Returns the created start line on
+ * success, or NULL on failure. Upon success, <msgf> is updated with a few
+ * H2_MSGF_* flags indicating what was found while parsing.
+ */
+static struct htx_sl *h2_prepare_htx_stsline(uint32_t fields, struct ist *phdr, struct htx *htx, unsigned int *msgf)
+{
+ unsigned int status, flags = HTX_SL_F_IS_RESP;
+ struct htx_sl *sl;
+ struct ist stat;
+
+ /* only :status is allowed as a pseudo header */
+ if (!(fields & H2_PHDR_FND_STAT))
+ goto fail;
+
+ if (phdr[H2_PHDR_IDX_STAT].len != 3)
+ goto fail;
+
+ /* if Extended CONNECT is used, convert status code from 200 to htx 101
+ * following rfc 8441 */
+ if (unlikely(*msgf & H2_MSGF_EXT_CONNECT) &&
+ isteq(phdr[H2_PHDR_IDX_STAT], ist("200"))) {
+ stat = ist("101");
+ status = 101;
+ }
+ else {
+ unsigned char h, t, u;
+
+ stat = phdr[H2_PHDR_IDX_STAT];
+
+ h = stat.ptr[0] - '0';
+ t = stat.ptr[1] - '0';
+ u = stat.ptr[2] - '0';
+ if (h > 9 || t > 9 || u > 9)
+ goto fail;
+ status = h * 100 + t * 10 + u;
+ }
+
+ /* 101 responses are not supported in H2, so return a error.
+ * On 1xx responses there is no ES on the HEADERS frame but there is no
+ * body. So remove the flag H2_MSGF_BODY and add H2_MSGF_RSP_1XX to
+ * notify the decoder another HEADERS frame is expected.
+ * 204/304 response have no body by definition. So remove the flag
+ * H2_MSGF_BODY and set H2_MSGF_BODYLESS_RSP.
+ *
+ * Note however that there is a special condition for Extended CONNECT.
+ * In this case, we explicitly convert it to HTX 101 to mimic
+ * Get+Upgrade HTTP/1.1 mechanism
+ */
+ if (status == 101) {
+ if (!(*msgf & H2_MSGF_EXT_CONNECT))
+ goto fail;
+ }
+ else if (status < 200) {
+ *msgf |= H2_MSGF_RSP_1XX;
+ *msgf &= ~H2_MSGF_BODY;
+ }
+ else if (status == 204 || status == 304) {
+ *msgf &= ~H2_MSGF_BODY;
+ *msgf |= H2_MSGF_BODYLESS_RSP;
+ }
+
+ /* Set HTX start-line flags */
+ flags |= HTX_SL_F_VER_11; // V2 in fact
+ flags |= HTX_SL_F_XFER_LEN; // xfer len always known with H2
+
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/2.0"), stat, ist(""));
+ if (!sl)
+ goto fail;
+ sl->info.res.status = status;
+ return sl;
+ fail:
+ return NULL;
+}
+
+/* Takes an H2 response present in the headers list <list> terminated by a name
+ * being <NULL,0> and emits the equivalent HTX response according to the rules
+ * documented in RFC7540 #8.1.2. The output contents are emitted in <htx>, and
+ * a positive value is returned if some bytes were emitted. In case of error, a
+ * negative error code is returned.
+ *
+ * Upon success, <msgf> is filled with a few H2_MSGF_* flags indicating what
+ * was found while parsing. The caller must set it to zero in or H2_MSGF_BODY
+ * if a body is detected (!ES).
+ *
+ * The headers list <list> must be composed of :
+ * - n.name != NULL, n.len > 0 : literal header name
+ * - n.name == NULL, n.len > 0 : indexed pseudo header name number <n.len>
+ * among H2_PHDR_IDX_*
+ * - n.name ignored, n.len == 0 : end of list
+ * - in all cases except the end of list, v.name and v.len must designate a
+ * valid value.
+ *
+ * <upgrade_protocol> is only used if the htx status code is 101 indicating a
+ * response to an upgrade or h2-equivalent request.
+ */
+int h2_make_htx_response(struct http_hdr *list, struct htx *htx, unsigned int *msgf, unsigned long long *body_len, char *upgrade_protocol)
+{
+ struct ist phdr_val[H2_PHDR_NUM_ENTRIES];
+ uint32_t fields; /* bit mask of H2_PHDR_FND_* */
+ uint32_t idx;
+ int phdr;
+ int ret;
+ int i;
+ struct htx_sl *sl = NULL;
+ unsigned int sl_flags = 0;
+ const char *ctl;
+
+ fields = 0;
+ for (idx = 0; list[idx].n.len != 0; idx++) {
+ if (!isttest(list[idx].n)) {
+ /* this is an indexed pseudo-header */
+ phdr = list[idx].n.len;
+ }
+ else {
+ /* this can be any type of header */
+ /* RFC7540#8.1.2: upper case not allowed in header field names.
+ * #10.3: header names must be valid (i.e. match a token).
+ * For pseudo-headers we check from 2nd char and for other ones
+ * from the first char, because HTTP_IS_TOKEN() also excludes
+ * the colon.
+ */
+ phdr = h2_str_to_phdr(list[idx].n);
+
+ for (i = !!phdr; i < list[idx].n.len; i++)
+ if ((uint8_t)(list[idx].n.ptr[i] - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(list[idx].n.ptr[i]))
+ goto fail;
+ }
+
+ /* RFC7540#10.3: intermediaries forwarding to HTTP/1 must take care of
+ * rejecting NUL, CR and LF characters.
+ */
+ ctl = ist_find_ctl(list[idx].v);
+ if (unlikely(ctl) && has_forbidden_char(list[idx].v, ctl))
+ goto fail;
+
+ if (phdr > 0 && phdr < H2_PHDR_NUM_ENTRIES) {
+ /* insert a pseudo header by its index (in phdr) and value (in value) */
+ if (fields & ((1 << phdr) | H2_PHDR_FND_NONE)) {
+ if (fields & H2_PHDR_FND_NONE) {
+ /* pseudo header field after regular headers */
+ goto fail;
+ }
+ else {
+ /* repeated pseudo header field */
+ goto fail;
+ }
+ }
+ fields |= 1 << phdr;
+ phdr_val[phdr] = list[idx].v;
+ continue;
+ }
+ else if (phdr != 0) {
+ /* invalid pseudo header -- should never happen here */
+ goto fail;
+ }
+
+ /* regular header field in (name,value) */
+ if (!(fields & H2_PHDR_FND_NONE)) {
+ /* no more pseudo-headers, time to build the status line */
+ sl = h2_prepare_htx_stsline(fields, phdr_val, htx, msgf);
+ if (!sl)
+ goto fail;
+ fields |= H2_PHDR_FND_NONE;
+ }
+
+ if (isteq(list[idx].n, ist("content-length"))) {
+ ret = http_parse_cont_len_header(&list[idx].v, body_len,
+ *msgf & H2_MSGF_BODY_CL);
+ if (ret < 0)
+ goto fail;
+
+ *msgf |= H2_MSGF_BODY_CL;
+ sl_flags |= HTX_SL_F_CLEN;
+ if (ret == 0)
+ continue; // skip this duplicate
+ }
+
+ /* these ones are forbidden in responses (RFC7540#8.1.2.2) */
+ if (isteq(list[idx].n, ist("connection")) ||
+ isteq(list[idx].n, ist("proxy-connection")) ||
+ isteq(list[idx].n, ist("keep-alive")) ||
+ isteq(list[idx].n, ist("upgrade")) ||
+ isteq(list[idx].n, ist("transfer-encoding")))
+ goto fail;
+
+ if (!htx_add_header(htx, list[idx].n, list[idx].v))
+ goto fail;
+ }
+
+ /* RFC7540#8.1.2.1 mandates to reject request pseudo-headers */
+ if (fields & (H2_PHDR_FND_AUTH|H2_PHDR_FND_METH|H2_PHDR_FND_PATH|H2_PHDR_FND_SCHM))
+ goto fail;
+
+ /* Let's dump the request now if not yet emitted. */
+ if (!(fields & H2_PHDR_FND_NONE)) {
+ sl = h2_prepare_htx_stsline(fields, phdr_val, htx, msgf);
+ if (!sl)
+ goto fail;
+ }
+
+ if (sl->info.res.status == 101 && upgrade_protocol) {
+ if (!htx_add_header(htx, ist("connection"), ist("upgrade")))
+ goto fail;
+ if (!htx_add_header(htx, ist("upgrade"), ist(upgrade_protocol)))
+ goto fail;
+ sl_flags |= HTX_SL_F_CONN_UPG;
+ }
+
+ if ((*msgf & H2_MSGF_BODY_TUNNEL) &&
+ ((sl->info.res.status >= 200 && sl->info.res.status < 300) || sl->info.res.status == 101))
+ *msgf &= ~(H2_MSGF_BODY|H2_MSGF_BODY_CL);
+ else
+ *msgf &= ~H2_MSGF_BODY_TUNNEL;
+
+ if (!(*msgf & H2_MSGF_BODY) || ((*msgf & H2_MSGF_BODY_CL) && *body_len == 0) ||
+ (*msgf & H2_MSGF_BODY_TUNNEL)) {
+ /* Response without body or tunnel successfully established */
+ sl_flags |= HTX_SL_F_BODYLESS;
+ htx->flags |= HTX_FL_EOM;
+ }
+
+ /* update the start line with last detected header info */
+ sl->flags |= sl_flags;
+
+ if ((*msgf & (H2_MSGF_BODY|H2_MSGF_BODY_TUNNEL|H2_MSGF_BODY_CL)) == H2_MSGF_BODY) {
+ /* FIXME: Do we need to signal anything when we have a body and
+ * no content-length, to have the equivalent of H1's chunked
+ * encoding?
+ */
+ }
+
+ /* now send the end of headers marker */
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto fail;
+
+ ret = 1;
+ return ret;
+
+ fail:
+ return -1;
+}
+
+/* Takes an H2 headers list <list> terminated by a name being <NULL,0> and emits
+ * the equivalent HTX trailers blocks. The output contents are emitted in <htx>,
+ * and a positive value is returned if some bytes were emitted. In case of
+ * error, a negative error code is returned. The caller must have verified that
+ * the message in the buffer is compatible with receipt of trailers.
+ *
+ * The headers list <list> must be composed of :
+ * - n.name != NULL, n.len > 0 : literal header name
+ * - n.name == NULL, n.len > 0 : indexed pseudo header name number <n.len>
+ * among H2_PHDR_IDX_* (illegal here)
+ * - n.name ignored, n.len == 0 : end of list
+ * - in all cases except the end of list, v.name and v.len must designate a
+ * valid value.
+ */
+int h2_make_htx_trailers(struct http_hdr *list, struct htx *htx)
+{
+ const char *ctl;
+ uint32_t idx;
+ int i;
+
+ for (idx = 0; list[idx].n.len != 0; idx++) {
+ if (!isttest(list[idx].n)) {
+ /* This is an indexed pseudo-header (RFC7540#8.1.2.1) */
+ goto fail;
+ }
+
+ /* RFC7540#8.1.2: upper case not allowed in header field names.
+ * #10.3: header names must be valid (i.e. match a token). This
+ * also catches pseudo-headers which are forbidden in trailers.
+ */
+ for (i = 0; i < list[idx].n.len; i++)
+ if ((uint8_t)(list[idx].n.ptr[i] - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(list[idx].n.ptr[i]))
+ goto fail;
+
+ /* these ones are forbidden in trailers (RFC7540#8.1.2.2) */
+ if (isteq(list[idx].n, ist("host")) ||
+ isteq(list[idx].n, ist("content-length")) ||
+ isteq(list[idx].n, ist("connection")) ||
+ isteq(list[idx].n, ist("proxy-connection")) ||
+ isteq(list[idx].n, ist("keep-alive")) ||
+ isteq(list[idx].n, ist("upgrade")) ||
+ isteq(list[idx].n, ist("te")) ||
+ isteq(list[idx].n, ist("transfer-encoding")))
+ goto fail;
+
+ /* RFC7540#10.3: intermediaries forwarding to HTTP/1 must take care of
+ * rejecting NUL, CR and LF characters.
+ */
+ ctl = ist_find_ctl(list[idx].v);
+ if (unlikely(ctl) && has_forbidden_char(list[idx].v, ctl))
+ goto fail;
+
+ if (!htx_add_trailer(htx, list[idx].n, list[idx].v))
+ goto fail;
+ }
+
+ if (!htx_add_endof(htx, HTX_BLK_EOT))
+ goto fail;
+
+ return 1;
+
+ fail:
+ return -1;
+}
diff --git a/src/h3.c b/src/h3.c
new file mode 100644
index 0000000..efa4068
--- /dev/null
+++ b/src/h3.c
@@ -0,0 +1,1545 @@
+/*
+ * HTTP/3 protocol processing
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <import/ist.h>
+
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/chunk.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/h3.h>
+#include <haproxy/h3_stats.h>
+#include <haproxy/http.h>
+#include <haproxy/http-hdr-t.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/intops.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/mux_quic.h>
+#include <haproxy/pool.h>
+#include <haproxy/qmux_http.h>
+#include <haproxy/qpack-dec.h>
+#include <haproxy/qpack-enc.h>
+#include <haproxy/quic_conn-t.h>
+#include <haproxy/quic_enc.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/tools.h>
+#include <haproxy/trace.h>
+
+/* trace source and events */
+static void h3_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+static const struct trace_event h3_trace_events[] = {
+#define H3_EV_RX_FRAME (1ULL << 0)
+ { .mask = H3_EV_RX_FRAME, .name = "rx_frame", .desc = "receipt of any H3 frame" },
+#define H3_EV_RX_DATA (1ULL << 1)
+ { .mask = H3_EV_RX_DATA, .name = "rx_data", .desc = "receipt of H3 DATA frame" },
+#define H3_EV_RX_HDR (1ULL << 2)
+ { .mask = H3_EV_RX_HDR, .name = "rx_hdr", .desc = "receipt of H3 HEADERS frame" },
+#define H3_EV_RX_SETTINGS (1ULL << 3)
+ { .mask = H3_EV_RX_SETTINGS, .name = "rx_settings", .desc = "receipt of H3 SETTINGS frame" },
+#define H3_EV_TX_DATA (1ULL << 4)
+ { .mask = H3_EV_TX_DATA, .name = "tx_data", .desc = "transmission of H3 DATA frame" },
+#define H3_EV_TX_HDR (1ULL << 5)
+ { .mask = H3_EV_TX_HDR, .name = "tx_hdr", .desc = "transmission of H3 HEADERS frame" },
+#define H3_EV_TX_SETTINGS (1ULL << 6)
+ { .mask = H3_EV_TX_SETTINGS, .name = "tx_settings", .desc = "transmission of H3 SETTINGS frame" },
+#define H3_EV_H3S_NEW (1ULL << 7)
+ { .mask = H3_EV_H3S_NEW, .name = "h3s_new", .desc = "new H3 stream" },
+#define H3_EV_H3S_END (1ULL << 8)
+ { .mask = H3_EV_H3S_END, .name = "h3s_end", .desc = "H3 stream terminated" },
+ { }
+};
+
+static const struct name_desc h3_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the connection */ },
+ /* arg2 */ { .name="qcs", .desc="QUIC stream" },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc h3_trace_decoding[] = {
+#define H3_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define H3_VERB_MINIMAL 2
+ { .name="minimal", .desc="report only qcc/qcs state and flags, no real decoding" },
+ { /* end */ }
+};
+
+struct trace_source trace_h3 = {
+ .name = IST("h3"),
+ .desc = "HTTP/3 transcoder",
+ .arg_def = TRC_ARG1_CONN, /* TRACE()'s first argument is always a connection */
+ .default_cb = h3_trace,
+ .known_events = h3_trace_events,
+ .lockon_args = h3_trace_lockon_args,
+ .decoding = h3_trace_decoding,
+ .report_events = ~0, /* report everything by default */
+};
+
+#define TRACE_SOURCE &trace_h3
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+#if defined(DEBUG_H3)
+#define h3_debug_printf fprintf
+#define h3_debug_hexdump debug_hexdump
+#else
+#define h3_debug_printf(...) do { } while (0)
+#define h3_debug_hexdump(...) do { } while (0)
+#endif
+
+#define H3_CF_SETTINGS_SENT 0x00000001 /* SETTINGS frame already sent on local control stream */
+#define H3_CF_SETTINGS_RECV 0x00000002 /* SETTINGS frame already received on remote control stream */
+#define H3_CF_UNI_CTRL_SET 0x00000004 /* Remote H3 Control stream opened */
+#define H3_CF_UNI_QPACK_DEC_SET 0x00000008 /* Remote QPACK decoder stream opened */
+#define H3_CF_UNI_QPACK_ENC_SET 0x00000010 /* Remote QPACK encoder stream opened */
+
+/* Default settings */
+static uint64_t h3_settings_qpack_max_table_capacity = 0;
+static uint64_t h3_settings_qpack_blocked_streams = 4096;
+static uint64_t h3_settings_max_field_section_size = QUIC_VARINT_8_BYTE_MAX; /* Unlimited */
+
+struct h3c {
+ struct qcc *qcc;
+ struct qcs *ctrl_strm; /* Control stream */
+ enum h3_err err;
+ uint32_t flags;
+
+ /* Settings */
+ uint64_t qpack_max_table_capacity;
+ uint64_t qpack_blocked_streams;
+ uint64_t max_field_section_size;
+
+ uint64_t id_goaway; /* stream ID used for a GOAWAY frame */
+
+ struct buffer_wait buf_wait; /* wait list for buffer allocations */
+ /* Stats counters */
+ struct h3_counters *prx_counters;
+};
+
+DECLARE_STATIC_POOL(pool_head_h3c, "h3c", sizeof(struct h3c));
+
+#define H3_SF_UNI_INIT 0x00000001 /* stream type not parsed for unidirectional stream */
+#define H3_SF_UNI_NO_H3 0x00000002 /* unidirectional stream does not carry H3 frames */
+#define H3_SF_HAVE_CLEN 0x00000004 /* content-length header is present */
+
+struct h3s {
+ struct h3c *h3c;
+
+ enum h3s_t type;
+ enum h3s_st_req st_req; /* only used for request streams */
+ uint64_t demux_frame_len;
+ uint64_t demux_frame_type;
+
+ unsigned long long body_len; /* known request body length from content-length header if present */
+ unsigned long long data_len; /* total length of all parsed DATA */
+
+ int flags;
+};
+
+DECLARE_STATIC_POOL(pool_head_h3s, "h3s", sizeof(struct h3s));
+
+/* Initialize an uni-stream <qcs> by reading its type from <b>.
+ *
+ * Returns the count of consumed bytes or a negative error code.
+ */
+static ssize_t h3_init_uni_stream(struct h3c *h3c, struct qcs *qcs,
+ struct buffer *b)
+{
+ /* decode unidirectional stream type */
+ struct h3s *h3s = qcs->ctx;
+ uint64_t type;
+ size_t len = 0, ret;
+
+ TRACE_ENTER(H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+
+ BUG_ON_HOT(!quic_stream_is_uni(qcs->id) ||
+ h3s->flags & H3_SF_UNI_INIT);
+
+ ret = b_quic_dec_int(&type, b, &len);
+ if (!ret) {
+ /* not enough data to decode uni stream type, retry later */
+ TRACE_DATA("cannot decode uni stream type due to incomplete data", H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+ goto out;
+ }
+
+ switch (type) {
+ case H3_UNI_S_T_CTRL:
+ if (h3c->flags & H3_CF_UNI_CTRL_SET) {
+ qcc_emit_cc_app(qcs->qcc, H3_STREAM_CREATION_ERROR, 1);
+ return -1;
+ }
+ h3c->flags |= H3_CF_UNI_CTRL_SET;
+ h3s->type = H3S_T_CTRL;
+ break;
+
+ case H3_UNI_S_T_PUSH:
+ /* TODO not supported for the moment */
+ h3s->type = H3S_T_PUSH;
+ break;
+
+ case H3_UNI_S_T_QPACK_DEC:
+ if (h3c->flags & H3_CF_UNI_QPACK_DEC_SET) {
+ qcc_emit_cc_app(qcs->qcc, H3_STREAM_CREATION_ERROR, 1);
+ return -1;
+ }
+ h3c->flags |= H3_CF_UNI_QPACK_DEC_SET;
+ h3s->type = H3S_T_QPACK_DEC;
+ h3s->flags |= H3_SF_UNI_NO_H3;
+ break;
+
+ case H3_UNI_S_T_QPACK_ENC:
+ if (h3c->flags & H3_CF_UNI_QPACK_ENC_SET) {
+ qcc_emit_cc_app(qcs->qcc, H3_STREAM_CREATION_ERROR, 1);
+ return -1;
+ }
+ h3c->flags |= H3_CF_UNI_QPACK_ENC_SET;
+ h3s->type = H3S_T_QPACK_ENC;
+ h3s->flags |= H3_SF_UNI_NO_H3;
+ break;
+
+ default:
+ /* draft-ietf-quic-http34 9. Extensions to HTTP/3
+ *
+ * Implementations MUST [...] abort reading on unidirectional
+ * streams that have unknown or unsupported types.
+ */
+ qcs->flags |= QC_SF_READ_ABORTED;
+ return -1;
+ };
+
+ h3s->flags |= H3_SF_UNI_INIT;
+
+ out:
+ TRACE_LEAVE(H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+ return len;
+}
+
+/* Parse a buffer <b> for a <qcs> uni-stream which does not contains H3 frames.
+ * This may be used for QPACK encoder/decoder streams for example. <fin> is set
+ * if this is the last frame of the stream.
+ *
+ * Returns the number of consumed bytes or a negative error code.
+ */
+static ssize_t h3_parse_uni_stream_no_h3(struct qcs *qcs, struct buffer *b, int fin)
+{
+ struct h3s *h3s = qcs->ctx;
+
+ BUG_ON_HOT(!quic_stream_is_uni(qcs->id) ||
+ !(h3s->flags & H3_SF_UNI_NO_H3));
+
+ switch (h3s->type) {
+ case H3S_T_QPACK_DEC:
+ if (qpack_decode_dec(b, fin, qcs))
+ return -1;
+ break;
+ case H3S_T_QPACK_ENC:
+ if (qpack_decode_enc(b, fin, qcs))
+ return -1;
+ break;
+ case H3S_T_UNKNOWN:
+ default:
+ /* Unknown stream should be flagged with QC_SF_READ_ABORTED. */
+ ABORT_NOW();
+ }
+
+ /* TODO adjust return code */
+ return 0;
+}
+
+/* Decode a H3 frame header from <rxbuf> buffer. The frame type is stored in
+ * <ftype> and length in <flen>.
+ *
+ * Returns the size of the H3 frame header. Note that the input buffer is not
+ * consumed.
+ */
+static inline size_t h3_decode_frm_header(uint64_t *ftype, uint64_t *flen,
+ struct buffer *b)
+{
+ size_t hlen;
+
+ hlen = 0;
+ if (!b_quic_dec_int(ftype, b, &hlen) ||
+ !b_quic_dec_int(flen, b, &hlen)) {
+ return 0;
+ }
+
+ return hlen;
+}
+
+/* Check if H3 frame of type <ftype> is valid when received on stream <qcs>.
+ *
+ * Returns a boolean. If false, a connection error H3_FRAME_UNEXPECTED should
+ * be reported.
+ */
+static int h3_is_frame_valid(struct h3c *h3c, struct qcs *qcs, uint64_t ftype)
+{
+ struct h3s *h3s = qcs->ctx;
+ const uint64_t id = qcs->id;
+
+ BUG_ON_HOT(h3s->type == H3S_T_UNKNOWN);
+
+ switch (ftype) {
+ case H3_FT_DATA:
+ return h3s->type != H3S_T_CTRL && (h3s->st_req == H3S_ST_REQ_HEADERS ||
+ h3s->st_req == H3S_ST_REQ_DATA);
+
+ case H3_FT_HEADERS:
+ return h3s->type != H3S_T_CTRL && h3s->st_req != H3S_ST_REQ_TRAILERS;
+
+ case H3_FT_CANCEL_PUSH:
+ case H3_FT_GOAWAY:
+ case H3_FT_MAX_PUSH_ID:
+ /* Only allowed for control stream. First frame of control
+ * stream MUST be SETTINGS.
+ */
+ return h3s->type == H3S_T_CTRL &&
+ (h3c->flags & H3_CF_SETTINGS_RECV);
+
+ case H3_FT_SETTINGS:
+ /* draft-ietf-quic-http34 7.2.4. SETTINGS
+ *
+ * If an endpoint receives a second SETTINGS frame on the control
+ * stream, the endpoint MUST respond with a connection error of type
+ * H3_FRAME_UNEXPECTED.
+ */
+ return h3s->type == H3S_T_CTRL &&
+ !(h3c->flags & H3_CF_SETTINGS_RECV);
+
+ case H3_FT_PUSH_PROMISE:
+ return h3s->type != H3S_T_CTRL &&
+ (id & QCS_ID_SRV_INTIATOR_BIT);
+
+ default:
+ /* draft-ietf-quic-http34 9. Extensions to HTTP/3
+ *
+ * Implementations MUST discard frames [...] that have unknown
+ * or unsupported types.
+ */
+ return h3s->type != H3S_T_CTRL || (h3c->flags & H3_CF_SETTINGS_RECV);
+ }
+}
+
+/* Check from stream <qcs> that length of all DATA frames does not exceed with
+ * a previously parsed content-length header. <fin> must be set for the last
+ * data of the stream so that length of DATA frames must be equal to the
+ * content-length.
+ *
+ * This must only be called for a stream with H3_SF_HAVE_CLEN flag.
+ *
+ * Return 0 on valid else non-zero.
+ */
+static int h3_check_body_size(struct qcs *qcs, int fin)
+{
+ struct h3s *h3s = qcs->ctx;
+ int ret = 0;
+ TRACE_ENTER(H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+
+ /* Reserved for streams with a previously parsed content-length header. */
+ BUG_ON(!(h3s->flags & H3_SF_HAVE_CLEN));
+
+ /* RFC 9114 4.1.2. Malformed Requests and Responses
+ *
+ * A request or response that is defined as having content when it
+ * contains a Content-Length header field (Section 8.6 of [HTTP]) is
+ * malformed if the value of the Content-Length header field does not
+ * equal the sum of the DATA frame lengths received.
+ *
+ * TODO for backend support
+ * A response that is
+ * defined as never having content, even when a Content-Length is
+ * present, can have a non-zero Content-Length header field even though
+ * no content is included in DATA frames.
+ */
+ if (h3s->data_len > h3s->body_len ||
+ (fin && h3s->data_len < h3s->body_len)) {
+ TRACE_ERROR("Content-length does not match DATA frame size", H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs);
+ ret = -1;
+ }
+
+ TRACE_LEAVE(H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+ return ret;
+}
+
+/* Parse from buffer <buf> a H3 HEADERS frame of length <len>. Data are copied
+ * in a local HTX buffer and transfer to the stream connector layer. <fin> must be
+ * set if this is the last data to transfer from this stream.
+ *
+ * Returns the number of consumed bytes or a negative error code.
+ */
+static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf,
+ uint64_t len, char fin)
+{
+ struct h3s *h3s = qcs->ctx;
+ struct h3c *h3c = h3s->h3c;
+ struct buffer htx_buf = BUF_NULL;
+ struct buffer *tmp = get_trash_chunk();
+ struct htx *htx = NULL;
+ struct htx_sl *sl;
+ struct http_hdr list[global.tune.max_http_hdr];
+ unsigned int flags = HTX_SL_F_NONE;
+ struct ist meth = IST_NULL, path = IST_NULL;
+ struct ist scheme = IST_NULL, authority = IST_NULL;
+ int hdr_idx, ret;
+ int cookie = -1, last_cookie = -1, i;
+
+ /* RFC 9114 4.1.2. Malformed Requests and Responses
+ *
+ * A malformed request or response is one that is an otherwise valid
+ * sequence of frames but is invalid due to:
+ * - the presence of prohibited fields or pseudo-header fields,
+ * - the absence of mandatory pseudo-header fields,
+ * - invalid values for pseudo-header fields,
+ * - pseudo-header fields after fields,
+ * - an invalid sequence of HTTP messages,
+ * - the inclusion of uppercase field names, or
+ * - the inclusion of invalid characters in field names or values.
+ *
+ * [...]
+ *
+ * Intermediaries that process HTTP requests or responses (i.e., any
+ * intermediary not acting as a tunnel) MUST NOT forward a malformed
+ * request or response. Malformed requests or responses that are
+ * detected MUST be treated as a stream error of type H3_MESSAGE_ERROR.
+ */
+
+ TRACE_ENTER(H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+
+ /* TODO support trailer parsing in this function */
+
+ /* TODO support buffer wrapping */
+ BUG_ON(b_head(buf) + len >= b_wrap(buf));
+ ret = qpack_decode_fs((const unsigned char *)b_head(buf), len, tmp,
+ list, sizeof(list) / sizeof(list[0]));
+ if (ret < 0) {
+ TRACE_ERROR("QPACK decoding error", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3c->err = -ret;
+ len = -1;
+ goto out;
+ }
+
+ qc_get_buf(qcs, &htx_buf);
+ BUG_ON(!b_size(&htx_buf)); /* TODO */
+ htx = htx_from_buf(&htx_buf);
+
+ /* first treat pseudo-header to build the start line */
+ hdr_idx = 0;
+ while (1) {
+ /* RFC 9114 4.3. HTTP Control Data
+ *
+ * Endpoints MUST treat a request or response that contains
+ * undefined or invalid pseudo-header fields as malformed.
+ *
+ * All pseudo-header fields MUST appear in the header section before
+ * regular header fields. Any request or response that contains a
+ * pseudo-header field that appears in a header section after a regular
+ * header field MUST be treated as malformed.
+ */
+
+ /* Stop at first non pseudo-header. */
+ if (!istmatch(list[hdr_idx].n, ist(":")))
+ break;
+
+ /* pseudo-header. Malformed name with uppercase character or
+ * invalid token will be rejected in the else clause.
+ */
+ if (isteq(list[hdr_idx].n, ist(":method"))) {
+ if (isttest(meth)) {
+ TRACE_ERROR("duplicated method pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ len = -1;
+ goto out;
+ }
+ meth = list[hdr_idx].v;
+ }
+ else if (isteq(list[hdr_idx].n, ist(":path"))) {
+ if (isttest(path)) {
+ TRACE_ERROR("duplicated path pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ len = -1;
+ goto out;
+ }
+ path = list[hdr_idx].v;
+ }
+ else if (isteq(list[hdr_idx].n, ist(":scheme"))) {
+ if (isttest(scheme)) {
+ /* duplicated pseudo-header */
+ TRACE_ERROR("duplicated scheme pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ len = -1;
+ goto out;
+ }
+ scheme = list[hdr_idx].v;
+ }
+ else if (isteq(list[hdr_idx].n, ist(":authority"))) {
+ if (isttest(authority)) {
+ TRACE_ERROR("duplicated authority pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ len = -1;
+ goto out;
+ }
+ authority = list[hdr_idx].v;
+ }
+ else {
+ TRACE_ERROR("unknown pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ len = -1;
+ goto out;
+ }
+
+ ++hdr_idx;
+ }
+
+ if (!istmatch(meth, ist("CONNECT"))) {
+ /* RFC 9114 4.3.1. Request Pseudo-Header Fields
+ *
+ * All HTTP/3 requests MUST include exactly one value for the :method,
+ * :scheme, and :path pseudo-header fields, unless the request is a
+ * CONNECT request; see Section 4.4.
+ */
+ if (!isttest(meth) || !isttest(scheme) || !isttest(path)) {
+ TRACE_ERROR("missing mandatory pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ len = -1;
+ goto out;
+ }
+ }
+
+ flags |= HTX_SL_F_VER_11;
+ flags |= HTX_SL_F_XFER_LEN;
+
+ sl = htx_add_stline(htx, HTX_BLK_REQ_SL, flags, meth, path, ist("HTTP/3.0"));
+ if (!sl) {
+ h3c->err = H3_INTERNAL_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ if (fin)
+ sl->flags |= HTX_SL_F_BODYLESS;
+
+ sl->info.req.meth = find_http_meth(meth.ptr, meth.len);
+
+ if (isttest(authority))
+ htx_add_header(htx, ist("host"), authority);
+
+ /* now treat standard headers */
+ while (1) {
+ if (isteq(list[hdr_idx].n, ist("")))
+ break;
+
+ if (istmatch(list[hdr_idx].n, ist(":"))) {
+ TRACE_ERROR("pseudo-header field after fields", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ len = -1;
+ goto out;
+ }
+
+ for (i = 0; i < list[hdr_idx].n.len; ++i) {
+ const char c = list[hdr_idx].n.ptr[i];
+ if ((uint8_t)(c - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(c)) {
+ TRACE_ERROR("invalid characters in field name", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ len = -1;
+ goto out;
+ }
+ }
+
+ if (isteq(list[hdr_idx].n, ist("cookie"))) {
+ http_cookie_register(list, hdr_idx, &cookie, &last_cookie);
+ ++hdr_idx;
+ continue;
+ }
+ else if (isteq(list[hdr_idx].n, ist("content-length"))) {
+ ret = http_parse_cont_len_header(&list[hdr_idx].v,
+ &h3s->body_len,
+ h3s->flags & H3_SF_HAVE_CLEN);
+ if (ret < 0) {
+ TRACE_ERROR("invalid content-length", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ len = -1;
+ goto out;
+ }
+ else if (!ret) {
+ /* Skip duplicated value. */
+ ++hdr_idx;
+ continue;
+ }
+
+ h3s->flags |= H3_SF_HAVE_CLEN;
+ /* This will fail if current frame is the last one and
+ * content-length is not null.
+ */
+ if (h3_check_body_size(qcs, fin)) {
+ len = -1;
+ goto out;
+ }
+ }
+ else if (isteq(list[hdr_idx].n, ist("connection")) ||
+ isteq(list[hdr_idx].n, ist("proxy-connection")) ||
+ isteq(list[hdr_idx].n, ist("keep-alive")) ||
+ isteq(list[hdr_idx].n, ist("transfer-encoding"))) {
+ /* RFC 9114 4.2. HTTP Fields
+ *
+ * HTTP/3 does not use the Connection header field to indicate
+ * connection-specific fields; in this protocol, connection-
+ * specific metadata is conveyed by other means. An endpoint
+ * MUST NOT generate an HTTP/3 field section containing
+ * connection-specific fields; any message containing
+ * connection-specific fields MUST be treated as malformed.
+ */
+ TRACE_ERROR("invalid connection header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ len = -1;
+ goto out;
+ }
+ else if (isteq(list[hdr_idx].n, ist("te")) &&
+ !isteq(list[hdr_idx].v, ist("trailers"))) {
+ /* RFC 9114 4.2. HTTP Fields
+ *
+ * The only exception to this is the TE header field, which MAY
+ * be present in an HTTP/3 request header; when it is, it MUST
+ * NOT contain any value other than "trailers".
+ */
+ TRACE_ERROR("invalid te header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ len = -1;
+ goto out;
+ }
+
+ htx_add_header(htx, list[hdr_idx].n, list[hdr_idx].v);
+ ++hdr_idx;
+ }
+
+ if (cookie >= 0) {
+ if (http_cookie_merge(htx, list, cookie)) {
+ h3c->err = H3_INTERNAL_ERROR;
+ len = -1;
+ goto out;
+ }
+ }
+
+ htx_add_endof(htx, HTX_BLK_EOH);
+ if (fin)
+ htx->flags |= HTX_FL_EOM;
+
+ htx_to_buf(htx, &htx_buf);
+ htx = NULL;
+
+ if (!qc_attach_sc(qcs, &htx_buf)) {
+ h3c->err = H3_INTERNAL_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ /* RFC 9114 5.2. Connection Shutdown
+ *
+ * The GOAWAY frame contains an identifier that
+ * indicates to the receiver the range of requests or pushes that were
+ * or might be processed in this connection. The server sends a client-
+ * initiated bidirectional stream ID; the client sends a push ID.
+ * Requests or pushes with the indicated identifier or greater are
+ * rejected (Section 4.1.1) by the sender of the GOAWAY. This
+ * identifier MAY be zero if no requests or pushes were processed.
+ */
+ if (qcs->id >= h3c->id_goaway)
+ h3c->id_goaway = qcs->id + 4;
+
+ out:
+ /* HTX may be non NULL if error before previous htx_to_buf(). */
+ if (htx)
+ htx_to_buf(htx, &htx_buf);
+
+ /* buffer is transferred to the stream connector and set to NULL
+ * except on stream creation error.
+ */
+ if (b_size(&htx_buf)) {
+ b_free(&htx_buf);
+ offer_buffers(NULL, 1);
+ }
+
+ TRACE_LEAVE(H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ return len;
+}
+
+/* Copy from buffer <buf> a H3 DATA frame of length <len> in QUIC stream <qcs>
+ * HTX buffer. <fin> must be set if this is the last data to transfer from this
+ * stream.
+ *
+ * Returns the number of consumed bytes or a negative error code.
+ */
+static ssize_t h3_data_to_htx(struct qcs *qcs, const struct buffer *buf,
+ uint64_t len, char fin)
+{
+ struct buffer *appbuf;
+ struct htx *htx = NULL;
+ size_t htx_sent = 0;
+ int htx_space;
+ char *head;
+
+ TRACE_ENTER(H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs);
+
+ appbuf = qc_get_buf(qcs, &qcs->rx.app_buf);
+ BUG_ON(!appbuf);
+ htx = htx_from_buf(appbuf);
+
+ if (len > b_data(buf)) {
+ len = b_data(buf);
+ fin = 0;
+ }
+
+ head = b_head(buf);
+ retry:
+ htx_space = htx_free_data_space(htx);
+ if (!htx_space) {
+ qcs->flags |= QC_SF_DEM_FULL;
+ goto out;
+ }
+
+ if (len > htx_space) {
+ len = htx_space;
+ fin = 0;
+ }
+
+ if (head + len > b_wrap(buf)) {
+ size_t contig = b_wrap(buf) - head;
+ htx_sent = htx_add_data(htx, ist2(b_head(buf), contig));
+ if (htx_sent < contig) {
+ qcs->flags |= QC_SF_DEM_FULL;
+ goto out;
+ }
+
+ len -= contig;
+ head = b_orig(buf);
+ goto retry;
+ }
+
+ htx_sent += htx_add_data(htx, ist2(head, len));
+ if (htx_sent < len) {
+ qcs->flags |= QC_SF_DEM_FULL;
+ goto out;
+ }
+
+ if (fin && len == htx_sent)
+ htx->flags |= HTX_FL_EOM;
+
+ out:
+ htx_to_buf(htx, appbuf);
+
+ TRACE_LEAVE(H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs);
+ return htx_sent;
+}
+
+/* Parse a SETTINGS frame of length <len> of payload <buf>.
+ *
+ * Returns the number of consumed bytes or a negative error code.
+ */
+static ssize_t h3_parse_settings_frm(struct h3c *h3c, const struct buffer *buf,
+ size_t len)
+{
+ struct buffer b;
+ uint64_t id, value;
+ size_t ret = 0;
+ long mask = 0; /* used to detect duplicated settings identifier */
+
+ TRACE_ENTER(H3_EV_RX_FRAME|H3_EV_RX_SETTINGS, h3c->qcc->conn);
+
+ /* Work on a copy of <buf>. */
+ b = b_make(b_orig(buf), b_size(buf), b_head_ofs(buf), len);
+
+ while (b_data(&b)) {
+ if (!b_quic_dec_int(&id, &b, &ret) || !b_quic_dec_int(&value, &b, &ret)) {
+ h3c->err = H3_FRAME_ERROR;
+ return -1;
+ }
+
+ h3_debug_printf(stderr, "%s id: %llu value: %llu\n",
+ __func__, (unsigned long long)id, (unsigned long long)value);
+
+ /* draft-ietf-quic-http34 7.2.4. SETTINGS
+ *
+ * The same setting identifier MUST NOT occur more than once in the
+ * SETTINGS frame. A receiver MAY treat the presence of duplicate
+ * setting identifiers as a connection error of type H3_SETTINGS_ERROR.
+ */
+
+ /* Ignore duplicate check for ID too big used for GREASE. */
+ if (id < sizeof(mask)) {
+ if (ha_bit_test(id, &mask)) {
+ h3c->err = H3_SETTINGS_ERROR;
+ return -1;
+ }
+ ha_bit_set(id, &mask);
+ }
+
+ switch (id) {
+ case H3_SETTINGS_QPACK_MAX_TABLE_CAPACITY:
+ h3c->qpack_max_table_capacity = value;
+ break;
+ case H3_SETTINGS_MAX_FIELD_SECTION_SIZE:
+ h3c->max_field_section_size = value;
+ break;
+ case H3_SETTINGS_QPACK_BLOCKED_STREAMS:
+ h3c->qpack_blocked_streams = value;
+ break;
+
+ case H3_SETTINGS_RESERVED_0:
+ case H3_SETTINGS_RESERVED_2:
+ case H3_SETTINGS_RESERVED_3:
+ case H3_SETTINGS_RESERVED_4:
+ case H3_SETTINGS_RESERVED_5:
+ /* draft-ietf-quic-http34 7.2.4.1. Defined SETTINGS Parameters
+ *
+ * Setting identifiers which were defined in [HTTP2] where there is no
+ * corresponding HTTP/3 setting have also been reserved
+ * (Section 11.2.2). These reserved settings MUST NOT be sent, and
+ * their receipt MUST be treated as a connection error of type
+ * H3_SETTINGS_ERROR.
+ */
+ h3c->err = H3_SETTINGS_ERROR;
+ return -1;
+ default:
+ /* MUST be ignored */
+ break;
+ }
+ }
+
+ TRACE_LEAVE(H3_EV_RX_FRAME|H3_EV_RX_SETTINGS, h3c->qcc->conn);
+ return ret;
+}
+
+/* Decode <qcs> remotely initiated bidi-stream. <fin> must be set to indicate
+ * that we received the last data of the stream.
+ *
+ * Returns 0 on success else non-zero.
+ */
+static ssize_t h3_decode_qcs(struct qcs *qcs, struct buffer *b, int fin)
+{
+ struct h3s *h3s = qcs->ctx;
+ struct h3c *h3c = h3s->h3c;
+ ssize_t total = 0, ret;
+
+ h3_debug_printf(stderr, "%s: STREAM ID: %lu\n", __func__, qcs->id);
+
+ if (quic_stream_is_uni(qcs->id) && !(h3s->flags & H3_SF_UNI_INIT)) {
+ ret = h3_init_uni_stream(h3c, qcs, b);
+ if (ret < 0) {
+ return -1;
+ }
+ else if (!ret) {
+ /* not enough data to initialize uni stream, retry later */
+ return 0;
+ }
+
+ total += ret;
+ }
+
+ if (quic_stream_is_uni(qcs->id) && (h3s->flags & H3_SF_UNI_NO_H3)) {
+ /* For non-h3 STREAM, parse it and return immediately. */
+ if ((ret = h3_parse_uni_stream_no_h3(qcs, b, fin)) < 0)
+ return -1;
+
+ total += ret;
+ return total;
+ }
+
+ /* RFC 9114 6.2.1. Control Streams
+ *
+ * The sender MUST NOT close the control stream, and the receiver MUST NOT
+ * request that the sender close the control stream. If either control
+ * stream is closed at any point, this MUST be treated as a connection
+ * error of type H3_CLOSED_CRITICAL_STREAM.
+ */
+ if (h3s->type == H3S_T_CTRL && fin) {
+ qcc_emit_cc_app(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1);
+ return -1;
+ }
+
+ if (!b_data(b) && fin && quic_stream_is_bidi(qcs->id)) {
+ qcs_http_handle_standalone_fin(qcs);
+ return 0;
+ }
+
+ while (b_data(b) && !(qcs->flags & QC_SF_DEM_FULL)) {
+ uint64_t ftype, flen;
+ char last_stream_frame = 0;
+
+ if (!h3s->demux_frame_len) {
+ /* Switch to a new frame. */
+ size_t hlen = h3_decode_frm_header(&ftype, &flen, b);
+ if (!hlen)
+ break;
+
+ h3_debug_printf(stderr, "%s: ftype: %lu, flen: %lu\n",
+ __func__, ftype, flen);
+
+ h3s->demux_frame_type = ftype;
+ h3s->demux_frame_len = flen;
+ total += hlen;
+
+ /* Check that content-length is not exceeded on a new DATA frame. */
+ if (ftype == H3_FT_DATA) {
+ h3s->data_len += flen;
+ if (h3s->flags & H3_SF_HAVE_CLEN && h3_check_body_size(qcs, fin)) {
+ qcc_emit_cc_app(qcs->qcc, h3c->err, 1);
+ return -1;
+ }
+ }
+
+ if (!h3_is_frame_valid(h3c, qcs, ftype)) {
+ qcc_emit_cc_app(qcs->qcc, H3_FRAME_UNEXPECTED, 1);
+ return -1;
+ }
+
+ if (!b_data(b))
+ break;
+ }
+
+ flen = h3s->demux_frame_len;
+ ftype = h3s->demux_frame_type;
+
+ /* Do not demux incomplete frames except H3 DATA which can be
+ * fragmented in multiple HTX blocks.
+ */
+ if (flen > b_data(b) && ftype != H3_FT_DATA) {
+ /* Reject frames bigger than bufsize.
+ *
+ * TODO HEADERS should in complement be limited with H3
+ * SETTINGS_MAX_FIELD_SECTION_SIZE parameter to prevent
+ * excessive decompressed size.
+ */
+ if (flen > QC_S_RX_BUF_SZ) {
+ qcc_emit_cc_app(qcs->qcc, H3_EXCESSIVE_LOAD, 1);
+ return -1;
+ }
+ break;
+ }
+
+ /* Check content-length equality with DATA frames length on the last frame. */
+ if (fin && h3s->flags & H3_SF_HAVE_CLEN && h3_check_body_size(qcs, fin)) {
+ qcc_emit_cc_app(qcs->qcc, h3c->err, 1);
+ return -1;
+ }
+
+ last_stream_frame = (fin && flen == b_data(b));
+
+ h3_inc_frame_type_cnt(h3c->prx_counters, ftype);
+ switch (ftype) {
+ case H3_FT_DATA:
+ ret = h3_data_to_htx(qcs, b, flen, last_stream_frame);
+ /* TODO handle error reporting. Stream closure required. */
+ if (ret < 0) { ABORT_NOW(); }
+ h3s->st_req = H3S_ST_REQ_DATA;
+ break;
+ case H3_FT_HEADERS:
+ ret = h3_headers_to_htx(qcs, b, flen, last_stream_frame);
+ if (ret < 0) {
+ /* TODO for some error, it may be preferable to
+ * only close the stream once RESET_STREAM is
+ * supported.
+ */
+ qcc_emit_cc_app(qcs->qcc, h3c->err, 1);
+ return -1;
+ }
+ h3s->st_req = (h3s->st_req == H3S_ST_REQ_BEFORE) ?
+ H3S_ST_REQ_HEADERS : H3S_ST_REQ_TRAILERS;
+ break;
+ case H3_FT_CANCEL_PUSH:
+ case H3_FT_PUSH_PROMISE:
+ case H3_FT_MAX_PUSH_ID:
+ case H3_FT_GOAWAY:
+ /* Not supported */
+ ret = flen;
+ break;
+ case H3_FT_SETTINGS:
+ ret = h3_parse_settings_frm(qcs->qcc->ctx, b, flen);
+ if (ret < 0) {
+ qcc_emit_cc_app(qcs->qcc, h3c->err, 1);
+ return -1;
+ }
+ h3c->flags |= H3_CF_SETTINGS_RECV;
+ break;
+ default:
+ /* draft-ietf-quic-http34 9. Extensions to HTTP/3
+ *
+ * Implementations MUST discard frames [...] that have unknown
+ * or unsupported types.
+ */
+ h3_debug_printf(stderr, "ignore unknown frame type 0x%lx\n", ftype);
+ ret = flen;
+ break;
+ }
+
+ if (ret) {
+ BUG_ON(h3s->demux_frame_len < ret);
+ h3s->demux_frame_len -= ret;
+ b_del(b, ret);
+ total += ret;
+ }
+ }
+
+ /* TODO may be useful to wakeup the MUX if blocked due to full buffer.
+ * However, currently, io-cb of MUX does not handle Rx.
+ */
+
+ return total;
+}
+
+/* Returns buffer for data sending.
+ * May be NULL if the allocation failed.
+ */
+static struct buffer *mux_get_buf(struct qcs *qcs)
+{
+ if (!b_size(&qcs->tx.buf))
+ b_alloc(&qcs->tx.buf);
+
+ return &qcs->tx.buf;
+}
+
+/* Function used to emit stream data from <qcs> control uni-stream */
+static int h3_control_send(struct qcs *qcs, void *ctx)
+{
+ int ret;
+ struct h3c *h3c = ctx;
+ unsigned char data[(2 + 3) * 2 * QUIC_VARINT_MAX_SIZE]; /* enough for 3 settings */
+ struct buffer pos, *res;
+ size_t frm_len;
+
+ TRACE_ENTER(H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs);
+
+ BUG_ON_HOT(h3c->flags & H3_CF_SETTINGS_SENT);
+
+ ret = 0;
+ pos = b_make((char *)data, sizeof(data), 0, 0);
+
+ frm_len = quic_int_getsize(H3_SETTINGS_QPACK_MAX_TABLE_CAPACITY) +
+ quic_int_getsize(h3_settings_qpack_max_table_capacity) +
+ quic_int_getsize(H3_SETTINGS_QPACK_BLOCKED_STREAMS) +
+ quic_int_getsize(h3_settings_qpack_blocked_streams);
+ if (h3_settings_max_field_section_size) {
+ frm_len += quic_int_getsize(H3_SETTINGS_MAX_FIELD_SECTION_SIZE) +
+ quic_int_getsize(h3_settings_max_field_section_size);
+ }
+
+ b_quic_enc_int(&pos, H3_UNI_S_T_CTRL);
+ /* Build a SETTINGS frame */
+ b_quic_enc_int(&pos, H3_FT_SETTINGS);
+ b_quic_enc_int(&pos, frm_len);
+ b_quic_enc_int(&pos, H3_SETTINGS_QPACK_MAX_TABLE_CAPACITY);
+ b_quic_enc_int(&pos, h3_settings_qpack_max_table_capacity);
+ b_quic_enc_int(&pos, H3_SETTINGS_QPACK_BLOCKED_STREAMS);
+ b_quic_enc_int(&pos, h3_settings_qpack_blocked_streams);
+ if (h3_settings_max_field_section_size) {
+ b_quic_enc_int(&pos, H3_SETTINGS_MAX_FIELD_SECTION_SIZE);
+ b_quic_enc_int(&pos, h3_settings_max_field_section_size);
+ }
+
+ res = mux_get_buf(qcs);
+ if (b_room(res) < b_data(&pos)) {
+ // TODO the mux should be put in blocked state, with
+ // the stream in state waiting for settings to be sent
+ ABORT_NOW();
+ }
+
+ ret = b_force_xfer(res, &pos, b_data(&pos));
+ if (ret > 0)
+ h3c->flags |= H3_CF_SETTINGS_SENT;
+
+ TRACE_LEAVE(H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs);
+ return ret;
+}
+
+static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx)
+{
+ struct buffer outbuf;
+ struct buffer headers_buf = BUF_NULL;
+ struct buffer *res;
+ struct http_hdr list[global.tune.max_http_hdr];
+ struct htx_sl *sl;
+ struct htx_blk *blk;
+ enum htx_blk_type type;
+ int frame_length_size; /* size in bytes of frame length varint field */
+ int ret = 0;
+ int hdr;
+ int status = 0;
+
+ TRACE_ENTER(H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+
+ sl = NULL;
+ hdr = 0;
+ for (blk = htx_get_head_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_UNUSED)
+ continue;
+
+ if (type == HTX_BLK_EOH)
+ break;
+
+ if (type == HTX_BLK_RES_SL) {
+ /* start-line -> HEADERS h3 frame */
+ BUG_ON(sl);
+ sl = htx_get_blk_ptr(htx, blk);
+ /* TODO should be on h3 layer */
+ status = sl->info.res.status;
+ }
+ else if (type == HTX_BLK_HDR) {
+ if (unlikely(hdr >= sizeof(list) / sizeof(list[0]) - 1))
+ goto err;
+ list[hdr].n = htx_get_blk_name(htx, blk);
+ list[hdr].v = htx_get_blk_value(htx, blk);
+ hdr++;
+ }
+ else {
+ ABORT_NOW();
+ goto err;
+ }
+ }
+
+ BUG_ON(!sl);
+
+ list[hdr].n = ist("");
+
+ res = mux_get_buf(qcs);
+
+ /* At least 5 bytes to store frame type + length as a varint max size */
+ if (b_room(res) < 5)
+ ABORT_NOW();
+
+ b_reset(&outbuf);
+ outbuf = b_make(b_tail(res), b_contig_space(res), 0, 0);
+ /* Start the headers after frame type + length */
+ headers_buf = b_make(b_head(res) + 5, b_size(res) - 5, 0, 0);
+
+ if (qpack_encode_field_section_line(&headers_buf))
+ ABORT_NOW();
+ if (qpack_encode_int_status(&headers_buf, status))
+ ABORT_NOW();
+
+ for (hdr = 0; hdr < sizeof(list) / sizeof(list[0]); ++hdr) {
+ if (isteq(list[hdr].n, ist("")))
+ break;
+
+ /* RFC 9114 4.2. HTTP Fields
+ *
+ * An intermediary transforming an HTTP/1.x message to HTTP/3
+ * MUST remove connection-specific header fields as discussed in
+ * Section 7.6.1 of [HTTP], or their messages will be treated by
+ * other HTTP/3 endpoints as malformed.
+ */
+ if (isteq(list[hdr].n, ist("connection")) ||
+ isteq(list[hdr].n, ist("proxy-connection")) ||
+ isteq(list[hdr].n, ist("keep-alive")) ||
+ isteq(list[hdr].n, ist("transfer-encoding"))) {
+ continue;
+ }
+ else if (isteq(list[hdr].n, ist("te"))) {
+ /* "te" may only be sent with "trailers" if this value
+ * is present, otherwise it must be deleted.
+ */
+ const struct ist v = istist(list[hdr].v, ist("trailers"));
+ if (!isttest(v) || (v.len > 8 && v.ptr[8] != ','))
+ continue;
+ list[hdr].v = ist("trailers");
+ }
+
+ if (qpack_encode_header(&headers_buf, list[hdr].n, list[hdr].v))
+ ABORT_NOW();
+ }
+
+ /* Now that all headers are encoded, we are certain that res buffer is
+ * big enough
+ */
+ frame_length_size = quic_int_getsize(b_data(&headers_buf));
+ res->head += 4 - frame_length_size;
+ b_putchr(res, 0x01); /* h3 HEADERS frame type */
+ if (!b_quic_enc_int(res, b_data(&headers_buf)))
+ ABORT_NOW();
+ b_add(res, b_data(&headers_buf));
+
+ ret = 0;
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ type = htx_get_blk_type(blk);
+ ret += htx_get_blksz(blk);
+ blk = htx_remove_blk(htx, blk);
+ if (type == HTX_BLK_EOH)
+ break;
+ }
+
+ TRACE_LEAVE(H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+ return ret;
+
+ err:
+ TRACE_DEVEL("leaving on error", H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+ return 0;
+}
+
+/* Returns the total of bytes sent. */
+static int h3_resp_data_send(struct qcs *qcs, struct htx *htx, size_t count)
+{
+ struct buffer outbuf;
+ struct buffer *res;
+ size_t total = 0;
+ int bsize, fsize, hsize;
+ struct htx_blk *blk;
+ enum htx_blk_type type;
+
+ TRACE_ENTER(H3_EV_TX_DATA, qcs->qcc->conn, qcs);
+
+ new_frame:
+ if (!count || htx_is_empty(htx))
+ goto end;
+
+ blk = htx_get_head_blk(htx);
+ type = htx_get_blk_type(blk);
+ fsize = bsize = htx_get_blksz(blk);
+
+ if (type != HTX_BLK_DATA)
+ goto end;
+
+ res = mux_get_buf(qcs);
+
+ if (fsize > count)
+ fsize = count;
+
+ /* h3 DATA headers : 1-byte frame type + varint frame length */
+ hsize = 1 + QUIC_VARINT_MAX_SIZE;
+
+ while (1) {
+ b_reset(&outbuf);
+ outbuf = b_make(b_tail(res), b_contig_space(res), 0, 0);
+ if (b_size(&outbuf) > hsize || !b_space_wraps(res))
+ break;
+ b_slow_realign(res, trash.area, b_data(res));
+ }
+
+ /* Not enough room for headers and at least one data byte, block the
+ * stream. It is expected that the stream connector layer will subscribe
+ * on SEND.
+ */
+ if (b_size(&outbuf) <= hsize) {
+ qcs->flags |= QC_SF_BLK_MROOM;
+ goto end;
+ }
+
+ if (b_size(&outbuf) < hsize + fsize)
+ fsize = b_size(&outbuf) - hsize;
+ BUG_ON(fsize <= 0);
+
+ b_putchr(&outbuf, 0x00); /* h3 frame type = DATA */
+ b_quic_enc_int(&outbuf, fsize); /* h3 frame length */
+
+ b_putblk(&outbuf, htx_get_blk_ptr(htx, blk), fsize);
+ total += fsize;
+ count -= fsize;
+
+ if (fsize == bsize)
+ htx_remove_blk(htx, blk);
+ else
+ htx_cut_data_blk(htx, blk, fsize);
+
+ /* commit the buffer */
+ b_add(res, b_data(&outbuf));
+ goto new_frame;
+
+ end:
+ TRACE_LEAVE(H3_EV_TX_DATA, qcs->qcc->conn, qcs);
+ return total;
+}
+
+static size_t h3_snd_buf(struct qcs *qcs, struct htx *htx, size_t count)
+{
+ size_t total = 0;
+ enum htx_blk_type btype;
+ struct htx_blk *blk;
+ uint32_t bsize;
+ int32_t idx;
+ int ret;
+
+ h3_debug_printf(stderr, "%s\n", __func__);
+
+ while (count && !htx_is_empty(htx) && !(qcs->flags & QC_SF_BLK_MROOM)) {
+ idx = htx_get_head(htx);
+ blk = htx_get_blk(htx, idx);
+ btype = htx_get_blk_type(blk);
+ bsize = htx_get_blksz(blk);
+
+ /* Not implemented : QUIC on backend side */
+ BUG_ON(btype == HTX_BLK_REQ_SL);
+
+ switch (btype) {
+ case HTX_BLK_RES_SL:
+ /* start-line -> HEADERS h3 frame */
+ ret = h3_resp_headers_send(qcs, htx);
+ if (ret > 0) {
+ total += ret;
+ count -= ret;
+ if (ret < bsize)
+ goto out;
+ }
+ break;
+
+ case HTX_BLK_DATA:
+ ret = h3_resp_data_send(qcs, htx, count);
+ if (ret > 0) {
+ total += ret;
+ count -= ret;
+ if (ret < bsize)
+ goto out;
+ }
+ break;
+
+ case HTX_BLK_TLR:
+ case HTX_BLK_EOT:
+ /* TODO trailers */
+
+ default:
+ htx_remove_blk(htx, blk);
+ total += bsize;
+ count -= bsize;
+ break;
+ }
+ }
+
+ out:
+ return total;
+}
+
+/* Notify about a closure on <qcs> stream requested by the remote peer.
+ *
+ * Stream channel <side> is explained relative to our endpoint : WR for
+ * STOP_SENDING or RD for RESET_STREAM reception. Callback decode_qcs() is used
+ * instead for closure performed using a STREAM frame with FIN bit.
+ *
+ * The main objective of this function is to check if closure is valid
+ * according to HTTP/3 specification.
+ *
+ * Returns 0 on success else non-zero. A CONNECTION_CLOSE is generated on
+ * error.
+ */
+static int h3_close(struct qcs *qcs, enum qcc_app_ops_close_side side)
+{
+ struct h3s *h3s = qcs->ctx;
+ struct h3c *h3c = h3s->h3c;;
+
+ /* RFC 9114 6.2.1. Control Streams
+ *
+ * The sender
+ * MUST NOT close the control stream, and the receiver MUST NOT
+ * request that the sender close the control stream. If either
+ * control stream is closed at any point, this MUST be treated
+ * as a connection error of type H3_CLOSED_CRITICAL_STREAM.
+ */
+ if (qcs == h3c->ctrl_strm) {
+ TRACE_ERROR("closure detected on control stream", H3_EV_H3S_END, qcs->qcc->conn, qcs);
+ qcc_emit_cc_app(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int h3_attach(struct qcs *qcs, void *conn_ctx)
+{
+ struct h3s *h3s;
+
+ TRACE_ENTER(H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+
+ h3s = pool_alloc(pool_head_h3s);
+ if (!h3s)
+ return 1;
+
+ qcs->ctx = h3s;
+ h3s->h3c = conn_ctx;
+
+ h3s->demux_frame_len = 0;
+ h3s->demux_frame_type = 0;
+ h3s->body_len = 0;
+ h3s->data_len = 0;
+ h3s->flags = 0;
+
+ if (quic_stream_is_bidi(qcs->id)) {
+ h3s->type = H3S_T_REQ;
+ h3s->st_req = H3S_ST_REQ_BEFORE;
+ qcs_wait_http_req(qcs);
+ }
+ else {
+ /* stream type must be decoded for unidirectional streams */
+ h3s->type = H3S_T_UNKNOWN;
+ }
+
+ TRACE_LEAVE(H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+ return 0;
+}
+
+static void h3_detach(struct qcs *qcs)
+{
+ struct h3s *h3s = qcs->ctx;
+
+ TRACE_ENTER(H3_EV_H3S_END, qcs->qcc->conn, qcs);
+
+ pool_free(pool_head_h3s, h3s);
+ qcs->ctx = NULL;
+
+ TRACE_LEAVE(H3_EV_H3S_END, qcs->qcc->conn, qcs);
+}
+
+static int h3_finalize(void *ctx)
+{
+ struct h3c *h3c = ctx;
+ struct qcs *qcs;
+
+ qcs = qcc_init_stream_local(h3c->qcc, 0);
+ if (!qcs)
+ return 0;
+
+ h3_control_send(qcs, h3c);
+ h3c->ctrl_strm = qcs;
+
+ return 1;
+}
+
+/* Generate a GOAWAY frame for <h3c> connection on the control stream.
+ *
+ * Returns 0 on success else non-zero.
+ */
+static int h3_send_goaway(struct h3c *h3c)
+{
+ struct qcs *qcs = h3c->ctrl_strm;
+ struct buffer pos, *res;
+ unsigned char data[3 * QUIC_VARINT_MAX_SIZE];
+ size_t frm_len = quic_int_getsize(h3c->id_goaway);
+
+ if (!qcs)
+ return 1;
+
+ pos = b_make((char *)data, sizeof(data), 0, 0);
+
+ b_quic_enc_int(&pos, H3_FT_GOAWAY);
+ b_quic_enc_int(&pos, frm_len);
+ b_quic_enc_int(&pos, h3c->id_goaway);
+
+ res = mux_get_buf(qcs);
+ if (!res || b_room(res) < b_data(&pos)) {
+ /* Do not try forcefully to emit GOAWAY if no space left. */
+ return 1;
+ }
+
+ b_force_xfer(res, &pos, b_data(&pos));
+
+ return 0;
+}
+
+/* Initialize the HTTP/3 context for <qcc> mux.
+ * Return 1 if succeeded, 0 if not.
+ */
+static int h3_init(struct qcc *qcc)
+{
+ struct h3c *h3c;
+ struct quic_conn *qc = qcc->conn->handle.qc;
+
+ h3c = pool_alloc(pool_head_h3c);
+ if (!h3c)
+ goto fail_no_h3;
+
+ h3c->qcc = qcc;
+ h3c->ctrl_strm = NULL;
+ h3c->err = H3_NO_ERROR;
+ h3c->flags = 0;
+ h3c->id_goaway = 0;
+
+ qcc->ctx = h3c;
+ /* TODO cleanup only ref to quic_conn */
+ h3c->prx_counters =
+ EXTRA_COUNTERS_GET(qc->li->bind_conf->frontend->extra_counters_fe,
+ &h3_stats_module);
+ LIST_INIT(&h3c->buf_wait.list);
+
+ return 1;
+
+ fail_no_h3:
+ return 0;
+}
+
+/* Send a HTTP/3 GOAWAY followed by a CONNECTION_CLOSE_APP. */
+static void h3_shutdown(void *ctx)
+{
+ struct h3c *h3c = ctx;
+
+ /* RFC 9114 5.2. Connection Shutdown
+ *
+ * Even when a connection is not idle, either endpoint can decide to
+ * stop using the connection and initiate a graceful connection close.
+ * Endpoints initiate the graceful shutdown of an HTTP/3 connection by
+ * sending a GOAWAY frame.
+ */
+ h3_send_goaway(h3c);
+
+ /* RFC 9114 5.2. Connection Shutdown
+ *
+ * An endpoint that completes a
+ * graceful shutdown SHOULD use the H3_NO_ERROR error code when closing
+ * the connection.
+ */
+ qcc_emit_cc_app(h3c->qcc, H3_NO_ERROR, 0);
+}
+
+static void h3_release(void *ctx)
+{
+ struct h3c *h3c = ctx;
+ pool_free(pool_head_h3c, h3c);
+}
+
+/* Increment the h3 error code counters for <error_code> value */
+static void h3_stats_inc_err_cnt(void *ctx, int err_code)
+{
+ struct h3c *h3c = ctx;
+
+ h3_inc_err_cnt(h3c->prx_counters, err_code);
+}
+
+/* h3 trace handler */
+static void h3_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct connection *conn = a1;
+ const struct qcc *qcc = conn ? conn->ctx : NULL;
+ const struct qcs *qcs = a2;
+
+ if (!qcc)
+ return;
+
+ if (src->verbosity > H3_VERB_CLEAN) {
+ chunk_appendf(&trace_buf, " : qcc=%p(F)", qcc);
+ if (qcc->conn->handle.qc)
+ chunk_appendf(&trace_buf, " qc=%p", qcc->conn->handle.qc);
+
+ if (qcs)
+ chunk_appendf(&trace_buf, " qcs=%p(%llu)", qcs, (ull)qcs->id);
+ }
+}
+
+/* HTTP/3 application layer operations */
+const struct qcc_app_ops h3_ops = {
+ .init = h3_init,
+ .attach = h3_attach,
+ .decode_qcs = h3_decode_qcs,
+ .snd_buf = h3_snd_buf,
+ .close = h3_close,
+ .detach = h3_detach,
+ .finalize = h3_finalize,
+ .shutdown = h3_shutdown,
+ .inc_err_cnt = h3_stats_inc_err_cnt,
+ .release = h3_release,
+};
diff --git a/src/h3_stats.c b/src/h3_stats.c
new file mode 100644
index 0000000..c96093f
--- /dev/null
+++ b/src/h3_stats.c
@@ -0,0 +1,276 @@
+#include <haproxy/h3.h>
+#include <haproxy/stats.h>
+
+enum {
+ /* h3 frame type counters */
+ H3_ST_DATA,
+ H3_ST_HEADERS,
+ H3_ST_CANCEL_PUSH,
+ H3_ST_PUSH_PROMISE,
+ H3_ST_MAX_PUSH_ID,
+ H3_ST_GOAWAY,
+ H3_ST_SETTINGS,
+ /* h3 error counters */
+ H3_ST_H3_NO_ERROR,
+ H3_ST_H3_GENERAL_PROTOCOL_ERROR,
+ H3_ST_H3_INTERNAL_ERROR,
+ H3_ST_H3_STREAM_CREATION_ERROR,
+ H3_ST_H3_CLOSED_CRITICAL_STREAM,
+ H3_ST_H3_FRAME_UNEXPECTED,
+ H3_ST_H3_FRAME_ERROR,
+ H3_ST_H3_EXCESSIVE_LOAD,
+ H3_ST_H3_ID_ERROR,
+ H3_ST_H3_SETTINGS_ERROR,
+ H3_ST_H3_MISSING_SETTINGS,
+ H3_ST_H3_REQUEST_REJECTED,
+ H3_ST_H3_REQUEST_CANCELLED,
+ H3_ST_H3_REQUEST_INCOMPLETE,
+ H3_ST_H3_MESSAGE_ERROR,
+ H3_ST_H3_CONNECT_ERROR,
+ H3_ST_H3_VERSION_FALLBACK,
+ /* QPACK error counters */
+ H3_ST_QPACK_DECOMPRESSION_FAILED,
+ H3_ST_QPACK_ENCODER_STREAM_ERROR,
+ H3_ST_QPACK_DECODER_STREAM_ERROR,
+ H3_STATS_COUNT /* must be the last */
+};
+
+static struct name_desc h3_stats[] = {
+ /* h3 frame type counters */
+ [H3_ST_DATA] = { .name = "h3_data",
+ .desc = "Total number of DATA frames received" },
+ [H3_ST_HEADERS] = { .name = "h3_headers",
+ .desc = "Total number of HEADERS frames received" },
+ [H3_ST_CANCEL_PUSH] = { .name = "h3_cancel_push",
+ .desc = "Total number of CANCEL_PUSH frames received" },
+ [H3_ST_PUSH_PROMISE] = { .name = "h3_push_promise",
+ .desc = "Total number of PUSH_PROMISE frames received" },
+ [H3_ST_MAX_PUSH_ID] = { .name = "h3_max_push_id",
+ .desc = "Total number of MAX_PUSH_ID frames received" },
+ [H3_ST_GOAWAY] = { .name = "h3_goaway",
+ .desc = "Total number of GOAWAY frames received" },
+ [H3_ST_SETTINGS] = { .name = "h3_settings",
+ .desc = "Total number of SETTINGS frames received" },
+ /* h3 error counters */
+ [H3_ST_H3_NO_ERROR] = { .name = "h3_no_error",
+ .desc = "Total number of H3_NO_ERROR errors received" },
+ [H3_ST_H3_GENERAL_PROTOCOL_ERROR] = { .name = "h3_general_protocol_error",
+ .desc = "Total number of H3_GENERAL_PROTOCOL_ERROR errors received" },
+ [H3_ST_H3_INTERNAL_ERROR] = { .name = "h3_internal_error",
+ .desc = "Total number of H3_INTERNAL_ERROR errors received" },
+ [H3_ST_H3_STREAM_CREATION_ERROR] = { .name = "h3_stream_creation_error",
+ .desc = "Total number of H3_STREAM_CREATION_ERROR errors received" },
+ [H3_ST_H3_CLOSED_CRITICAL_STREAM] = { .name = "h3_closed_critical_stream",
+ .desc = "Total number of H3_CLOSED_CRITICAL_STREAM errors received" },
+ [H3_ST_H3_FRAME_UNEXPECTED] = { .name = "h3_frame_unexpected",
+ .desc = "Total number of H3_FRAME_UNEXPECTED errors received" },
+ [H3_ST_H3_FRAME_ERROR] = { .name = "h3_frame_error",
+ .desc = "Total number of H3_FRAME_ERROR errors received" },
+ [H3_ST_H3_EXCESSIVE_LOAD] = { .name = "h3_excessive_load",
+ .desc = "Total number of H3_EXCESSIVE_LOAD errors received" },
+ [H3_ST_H3_ID_ERROR] = { .name = "h3_id_error",
+ .desc = "Total number of H3_ID_ERROR errors received" },
+ [H3_ST_H3_SETTINGS_ERROR] = { .name = "h3_settings_error",
+ .desc = "Total number of H3_SETTINGS_ERROR errors received" },
+ [H3_ST_H3_MISSING_SETTINGS] = { .name = "h3_missing_settings",
+ .desc = "Total number of H3_MISSING_SETTINGS errors received" },
+ [H3_ST_H3_REQUEST_REJECTED] = { .name = "h3_request_rejected",
+ .desc = "Total number of H3_REQUEST_REJECTED errors received" },
+ [H3_ST_H3_REQUEST_CANCELLED] = { .name = "h3_request_cancelled",
+ .desc = "Total number of H3_REQUEST_CANCELLED errors received" },
+ [H3_ST_H3_REQUEST_INCOMPLETE] = { .name = "h3_request_incomplete",
+ .desc = "Total number of H3_REQUEST_INCOMPLETE errors received" },
+ [H3_ST_H3_MESSAGE_ERROR] = { .name = "h3_message_error",
+ .desc = "Total number of H3_MESSAGE_ERROR errors received" },
+ [H3_ST_H3_CONNECT_ERROR] = { .name = "h3_connect_error",
+ .desc = "Total number of H3_CONNECT_ERROR errors received" },
+ [H3_ST_H3_VERSION_FALLBACK] = { .name = "h3_version_fallback",
+ .desc = "Total number of H3_VERSION_FALLBACK errors received" },
+ /* QPACK error counters */
+ [H3_ST_QPACK_DECOMPRESSION_FAILED] = { .name = "pack_decompression_failed",
+ .desc = "Total number of QPACK_DECOMPRESSION_FAILED errors received" },
+ [H3_ST_QPACK_ENCODER_STREAM_ERROR] = { .name = "qpack_encoder_stream_error",
+ .desc = "Total number of QPACK_ENCODER_STREAM_ERROR errors received" },
+ [H3_ST_QPACK_DECODER_STREAM_ERROR] = { .name = "qpack_decoder_stream_error",
+ .desc = "Total number of QPACK_DECODER_STREAM_ERROR errors received" },
+};
+
+static struct h3_counters {
+ /* h3 frame type counters */
+ long long h3_data; /* total number of DATA frames received */
+ long long h3_headers; /* total number of HEADERS frames received */
+ long long h3_cancel_push; /* total number of CANCEL_PUSH frames received */
+ long long h3_push_promise; /* total number of PUSH_PROMISE frames received */
+ long long h3_max_push_id; /* total number of MAX_PUSH_ID frames received */
+ long long h3_goaway; /* total number of GOAWAY frames received */
+ long long h3_settings; /* total number of SETTINGS frames received */
+ /* h3 error counters */
+ long long h3_no_error; /* total number of H3_NO_ERROR errors received */
+ long long h3_general_protocol_error; /* total number of H3_GENERAL_PROTOCOL_ERROR errors received */
+ long long h3_internal_error; /* total number of H3_INTERNAL_ERROR errors received */
+ long long h3_stream_creation_error; /* total number of H3_STREAM_CREATION_ERROR errors received */
+ long long h3_closed_critical_stream; /* total number of H3_CLOSED_CRITICAL_STREAM errors received */
+ long long h3_frame_unexpected; /* total number of H3_FRAME_UNEXPECTED errors received */
+ long long h3_frame_error; /* total number of H3_FRAME_ERROR errors received */
+ long long h3_excessive_load; /* total number of H3_EXCESSIVE_LOAD errors received */
+ long long h3_id_error; /* total number of H3_ID_ERROR errors received */
+ long long h3_settings_error; /* total number of H3_SETTINGS_ERROR errors received */
+ long long h3_missing_settings; /* total number of H3_MISSING_SETTINGS errors received */
+ long long h3_request_rejected; /* total number of H3_REQUEST_REJECTED errors received */
+ long long h3_request_cancelled; /* total number of H3_REQUEST_CANCELLED errors received */
+ long long h3_request_incomplete; /* total number of H3_REQUEST_INCOMPLETE errors received */
+ long long h3_message_error; /* total number of H3_MESSAGE_ERROR errors received */
+ long long h3_connect_error; /* total number of H3_CONNECT_ERROR errors received */
+ long long h3_version_fallback; /* total number of H3_VERSION_FALLBACK errors received */
+ /* QPACK error counters */
+ long long qpack_decompression_failed; /* total number of QPACK_DECOMPRESSION_FAILED errors received */
+ long long qpack_encoder_stream_error; /* total number of QPACK_ENCODER_STREAM_ERROR errors received */
+ long long qpack_decoder_stream_error; /* total number of QPACK_DECODER_STREAM_ERROR errors received */
+} h3_counters;
+
+static void h3_fill_stats(void *data, struct field *stats)
+{
+ struct h3_counters *counters = data;
+
+ /* h3 frame type counters */
+ stats[H3_ST_DATA] = mkf_u64(FN_COUNTER, counters->h3_data);
+ stats[H3_ST_HEADERS] = mkf_u64(FN_COUNTER, counters->h3_headers);
+ stats[H3_ST_CANCEL_PUSH] = mkf_u64(FN_COUNTER, counters->h3_cancel_push);
+ stats[H3_ST_PUSH_PROMISE] = mkf_u64(FN_COUNTER, counters->h3_push_promise);
+ stats[H3_ST_MAX_PUSH_ID] = mkf_u64(FN_COUNTER, counters->h3_max_push_id);
+ stats[H3_ST_GOAWAY] = mkf_u64(FN_COUNTER, counters->h3_goaway);
+ stats[H3_ST_SETTINGS] = mkf_u64(FN_COUNTER, counters->h3_settings);
+ /* h3 error counters */
+ stats[H3_ST_H3_NO_ERROR] = mkf_u64(FN_COUNTER, counters->h3_no_error);
+ stats[H3_ST_H3_GENERAL_PROTOCOL_ERROR] = mkf_u64(FN_COUNTER, counters->h3_general_protocol_error);
+ stats[H3_ST_H3_INTERNAL_ERROR] = mkf_u64(FN_COUNTER, counters->h3_internal_error);
+ stats[H3_ST_H3_STREAM_CREATION_ERROR] = mkf_u64(FN_COUNTER, counters->h3_stream_creation_error);
+ stats[H3_ST_H3_CLOSED_CRITICAL_STREAM] = mkf_u64(FN_COUNTER, counters->h3_closed_critical_stream);
+ stats[H3_ST_H3_FRAME_UNEXPECTED] = mkf_u64(FN_COUNTER, counters->h3_frame_unexpected);
+ stats[H3_ST_H3_FRAME_ERROR] = mkf_u64(FN_COUNTER, counters->h3_frame_error);
+ stats[H3_ST_H3_EXCESSIVE_LOAD] = mkf_u64(FN_COUNTER, counters->h3_excessive_load);
+ stats[H3_ST_H3_ID_ERROR] = mkf_u64(FN_COUNTER, counters->h3_id_error);
+ stats[H3_ST_H3_SETTINGS_ERROR] = mkf_u64(FN_COUNTER, counters->h3_settings_error);
+ stats[H3_ST_H3_MISSING_SETTINGS] = mkf_u64(FN_COUNTER, counters->h3_missing_settings);
+ stats[H3_ST_H3_REQUEST_REJECTED] = mkf_u64(FN_COUNTER, counters->h3_request_rejected);
+ stats[H3_ST_H3_REQUEST_CANCELLED] = mkf_u64(FN_COUNTER, counters->h3_request_cancelled);
+ stats[H3_ST_H3_REQUEST_INCOMPLETE] = mkf_u64(FN_COUNTER, counters->h3_request_incomplete);
+ stats[H3_ST_H3_MESSAGE_ERROR] = mkf_u64(FN_COUNTER, counters->h3_message_error);
+ stats[H3_ST_H3_CONNECT_ERROR] = mkf_u64(FN_COUNTER, counters->h3_connect_error);
+ stats[H3_ST_H3_VERSION_FALLBACK] = mkf_u64(FN_COUNTER, counters->h3_version_fallback);
+ /* QPACK error counters */
+ stats[H3_ST_QPACK_DECOMPRESSION_FAILED] = mkf_u64(FN_COUNTER, counters->qpack_decompression_failed);
+ stats[H3_ST_QPACK_ENCODER_STREAM_ERROR] = mkf_u64(FN_COUNTER, counters->qpack_encoder_stream_error);
+ stats[H3_ST_QPACK_DECODER_STREAM_ERROR] = mkf_u64(FN_COUNTER, counters->qpack_decoder_stream_error);
+}
+
+struct stats_module h3_stats_module = {
+ .name = "h3",
+ .fill_stats = h3_fill_stats,
+ .stats = h3_stats,
+ .stats_count = H3_STATS_COUNT,
+ .counters = &h3_counters,
+ .counters_size = sizeof(h3_counters),
+ .domain_flags = MK_STATS_PROXY_DOMAIN(STATS_PX_CAP_FE),
+ .clearable = 1,
+};
+
+INITCALL1(STG_REGISTER, stats_register_module, &h3_stats_module);
+
+void h3_inc_err_cnt(struct h3_counters *ctrs, int error_code)
+{
+ switch (error_code) {
+ case H3_NO_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_no_error);
+ break;
+ case H3_GENERAL_PROTOCOL_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_general_protocol_error);
+ break;
+ case H3_INTERNAL_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_internal_error);
+ break;
+ case H3_STREAM_CREATION_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_stream_creation_error);
+ break;
+ case H3_CLOSED_CRITICAL_STREAM:
+ HA_ATOMIC_INC(&ctrs->h3_closed_critical_stream);
+ break;
+ case H3_FRAME_UNEXPECTED:
+ HA_ATOMIC_INC(&ctrs->h3_frame_unexpected);
+ break;
+ case H3_FRAME_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_frame_error);
+ break;
+ case H3_EXCESSIVE_LOAD:
+ HA_ATOMIC_INC(&ctrs->h3_excessive_load);
+ break;
+ case H3_ID_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_id_error);
+ break;
+ case H3_SETTINGS_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_settings_error);
+ break;
+ case H3_MISSING_SETTINGS:
+ HA_ATOMIC_INC(&ctrs->h3_missing_settings);
+ break;
+ case H3_REQUEST_REJECTED:
+ HA_ATOMIC_INC(&ctrs->h3_request_rejected);
+ break;
+ case H3_REQUEST_CANCELLED:
+ HA_ATOMIC_INC(&ctrs->h3_request_cancelled);
+ break;
+ case H3_REQUEST_INCOMPLETE:
+ HA_ATOMIC_INC(&ctrs->h3_request_incomplete);
+ break;
+ case H3_MESSAGE_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_message_error);
+ break;
+ case H3_CONNECT_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_connect_error);
+ break;
+ case H3_VERSION_FALLBACK:
+ HA_ATOMIC_INC(&ctrs->h3_version_fallback);
+ break;
+ case QPACK_DECOMPRESSION_FAILED:
+ HA_ATOMIC_INC(&ctrs->qpack_decompression_failed);
+ break;
+ case QPACK_ENCODER_STREAM_ERROR:
+ HA_ATOMIC_INC(&ctrs->qpack_encoder_stream_error);
+ break;
+ case QPACK_DECODER_STREAM_ERROR:
+ HA_ATOMIC_INC(&ctrs->qpack_decoder_stream_error);
+ break;
+ default:
+ break;
+
+ }
+}
+
+void h3_inc_frame_type_cnt(struct h3_counters *ctrs, int frm_type)
+{
+ switch (frm_type) {
+ case H3_FT_DATA:
+ HA_ATOMIC_INC(&ctrs->h3_data);
+ break;
+ case H3_FT_HEADERS:
+ HA_ATOMIC_INC(&ctrs->h3_headers);
+ break;
+ case H3_FT_CANCEL_PUSH:
+ HA_ATOMIC_INC(&ctrs->h3_cancel_push);
+ break;
+ case H3_FT_PUSH_PROMISE:
+ HA_ATOMIC_INC(&ctrs->h3_push_promise);
+ break;
+ case H3_FT_MAX_PUSH_ID:
+ HA_ATOMIC_INC(&ctrs->h3_max_push_id);
+ break;
+ case H3_FT_GOAWAY:
+ HA_ATOMIC_INC(&ctrs->h3_goaway);
+ break;
+ case H3_FT_SETTINGS:
+ HA_ATOMIC_INC(&ctrs->h3_settings);
+ break;
+ default:
+ break;
+ }
+}
diff --git a/src/haproxy.c b/src/haproxy.c
new file mode 100644
index 0000000..7f59af6
--- /dev/null
+++ b/src/haproxy.c
@@ -0,0 +1,3674 @@
+/*
+ * HAProxy : High Availability-enabled HTTP/TCP proxy
+ * Copyright 2000-2023 Willy Tarreau <willy@haproxy.org>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Please refer to RFC7230 - RFC7235 information about HTTP protocol, and
+ * RFC6265 for information about cookies usage. More generally, the IETF HTTP
+ * Working Group's web site should be consulted for protocol related changes :
+ *
+ * http://ftp.ics.uci.edu/pub/ietf/http/
+ *
+ * Pending bugs (may be not fixed because never reproduced) :
+ * - solaris only : sometimes, an HTTP proxy with only a dispatch address causes
+ * the proxy to terminate (no core) if the client breaks the connection during
+ * the response. Seen on 1.1.8pre4, but never reproduced. May not be related to
+ * the snprintf() bug since requests were simple (GET / HTTP/1.0), but may be
+ * related to missing setsid() (fixed in 1.1.15)
+ * - a proxy with an invalid config will prevent the startup even if disabled.
+ *
+ * ChangeLog has moved to the CHANGELOG file.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <sys/resource.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <syslog.h>
+#include <grp.h>
+
+#ifdef USE_THREAD
+#include <pthread.h>
+#endif
+
+#ifdef USE_CPU_AFFINITY
+#include <sched.h>
+#if defined(__FreeBSD__) || defined(__DragonFly__)
+#include <sys/param.h>
+#ifdef __FreeBSD__
+#include <sys/cpuset.h>
+#endif
+#endif
+#endif
+
+#if defined(USE_PRCTL)
+#include <sys/prctl.h>
+#endif
+
+#if defined(USE_PROCCTL)
+#include <sys/procctl.h>
+#endif
+
+#ifdef DEBUG_FULL
+#include <assert.h>
+#endif
+#if defined(USE_SYSTEMD)
+#include <systemd/sd-daemon.h>
+#endif
+
+#include <import/sha1.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/action.h>
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/auth.h>
+#include <haproxy/base64.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/cfgcond.h>
+#include <haproxy/cfgdiag.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/chunk.h>
+#include <haproxy/cli.h>
+#include <haproxy/clock.h>
+#include <haproxy/connection.h>
+#ifdef USE_CPU_AFFINITY
+#include <haproxy/cpuset.h>
+#endif
+#include <haproxy/dns.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/filters.h>
+#include <haproxy/global.h>
+#include <haproxy/hlua.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/mworker.h>
+#include <haproxy/namespace.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/openssl-compat.h>
+#include <haproxy/quic_conn-t.h>
+#include <haproxy/quic_tp-t.h>
+#include <haproxy/pattern.h>
+#include <haproxy/peers.h>
+#include <haproxy/pool.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/proxy.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/server.h>
+#include <haproxy/session.h>
+#include <haproxy/signal.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_inet.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/thread.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/uri_auth-t.h>
+#include <haproxy/vars.h>
+#include <haproxy/version.h>
+
+
+/* array of init calls for older platforms */
+DECLARE_INIT_STAGES;
+
+/* create a read_mostly section to hold variables which are accessed a lot
+ * but which almost never change. The purpose is to isolate them in their
+ * own cache lines where they don't risk to be perturbated by write accesses
+ * to neighbor variables. We need to create an empty aligned variable for
+ * this. The fact that the variable is of size zero means that it will be
+ * eliminated at link time if no other variable uses it, but alignment will
+ * be respected.
+ */
+empty_t __read_mostly_align HA_SECTION("read_mostly") ALIGNED(64);
+
+#ifdef BUILD_FEATURES
+const char *build_features = BUILD_FEATURES;
+#else
+const char *build_features = "";
+#endif
+
+/* list of config files */
+static struct list cfg_cfgfiles = LIST_HEAD_INIT(cfg_cfgfiles);
+int pid; /* current process id */
+
+volatile unsigned long sleeping_thread_mask = 0; /* Threads that are about to sleep in poll() */
+volatile unsigned long stopping_thread_mask = 0; /* Threads acknowledged stopping */
+
+/* global options */
+struct global global = {
+ .hard_stop_after = TICK_ETERNITY,
+ .close_spread_time = TICK_ETERNITY,
+ .close_spread_end = TICK_ETERNITY,
+ .numa_cpu_mapping = 1,
+ .nbthread = 0,
+ .req_count = 0,
+ .logsrvs = LIST_HEAD_INIT(global.logsrvs),
+ .maxzlibmem = DEFAULT_MAXZLIBMEM * 1024U * 1024U,
+ .comp_rate_lim = 0,
+ .ssl_server_verify = SSL_SERVER_VERIFY_REQUIRED,
+ .unix_bind = {
+ .ux = {
+ .uid = -1,
+ .gid = -1,
+ .mode = 0,
+ }
+ },
+ .tune = {
+ .options = GTUNE_LISTENER_MQ,
+ .bufsize = (BUFSIZE + 2*sizeof(void *) - 1) & -(2*sizeof(void *)),
+ .maxrewrite = MAXREWRITE,
+ .reserved_bufs = RESERVED_BUFS,
+ .pattern_cache = DEFAULT_PAT_LRU_SIZE,
+ .pool_low_ratio = 20,
+ .pool_high_ratio = 25,
+ .max_http_hdr = MAX_HTTP_HDR,
+#ifdef USE_OPENSSL
+ .sslcachesize = SSLCACHESIZE,
+#endif
+ .comp_maxlevel = 1,
+#ifdef DEFAULT_IDLE_TIMER
+ .idle_timer = DEFAULT_IDLE_TIMER,
+#else
+ .idle_timer = 1000, /* 1 second */
+#endif
+#ifdef USE_QUIC
+ .quic_backend_max_idle_timeout = QUIC_TP_DFLT_BACK_MAX_IDLE_TIMEOUT,
+ .quic_frontend_max_idle_timeout = QUIC_TP_DFLT_FRONT_MAX_IDLE_TIMEOUT,
+ .quic_frontend_max_streams_bidi = QUIC_TP_DFLT_FRONT_MAX_STREAMS_BIDI,
+ .quic_retry_threshold = QUIC_DFLT_RETRY_THRESHOLD,
+ .quic_streams_buf = 30,
+#endif /* USE_QUIC */
+ },
+#ifdef USE_OPENSSL
+#ifdef DEFAULT_MAXSSLCONN
+ .maxsslconn = DEFAULT_MAXSSLCONN,
+#endif
+#endif
+ /* others NULL OK */
+};
+
+/*********************************************************************/
+
+int stopping; /* non zero means stopping in progress */
+int killed; /* non zero means a hard-stop is triggered */
+int jobs = 0; /* number of active jobs (conns, listeners, active tasks, ...) */
+int unstoppable_jobs = 0; /* number of active jobs that can't be stopped during a soft stop */
+int active_peers = 0; /* number of active peers (connection attempts and connected) */
+int connected_peers = 0; /* number of connected peers (verified ones) */
+int arg_mode = 0; /* MODE_DEBUG etc as passed on command line ... */
+char *change_dir = NULL; /* set when -C is passed */
+char *check_condition = NULL; /* check condition passed to -cc */
+
+/* Here we store information about the pids of the processes we may pause
+ * or kill. We will send them a signal every 10 ms until we can bind to all
+ * our ports. With 200 retries, that's about 2 seconds.
+ */
+#define MAX_START_RETRIES 200
+static int *oldpids = NULL;
+static int oldpids_sig; /* use USR1 or TERM */
+
+/* Path to the unix socket we use to retrieve listener sockets from the old process */
+static const char *old_unixsocket;
+
+int atexit_flag = 0;
+
+int nb_oldpids = 0;
+const int zero = 0;
+const int one = 1;
+const struct linger nolinger = { .l_onoff = 1, .l_linger = 0 };
+
+char hostname[MAX_HOSTNAME_LEN];
+char *localpeer = NULL;
+static char *kwd_dump = NULL; // list of keyword dumps to produce
+
+static char **old_argv = NULL; /* previous argv but cleaned up */
+
+struct list proc_list = LIST_HEAD_INIT(proc_list);
+
+int master = 0; /* 1 if in master, 0 if in child */
+unsigned int rlim_fd_cur_at_boot = 0;
+unsigned int rlim_fd_max_at_boot = 0;
+
+/* per-boot randomness */
+unsigned char boot_seed[20]; /* per-boot random seed (160 bits initially) */
+
+/* takes the thread config in argument or NULL for any thread */
+static void *run_thread_poll_loop(void *data);
+
+/* bitfield of a few warnings to emit just once (WARN_*) */
+unsigned int warned = 0;
+
+/* set if experimental features have been used for the current process */
+unsigned int tainted = 0;
+
+unsigned int experimental_directives_allowed = 0;
+
+int check_kw_experimental(struct cfg_keyword *kw, const char *file, int linenum,
+ char **errmsg)
+{
+ if (kw->flags & KWF_EXPERIMENTAL) {
+ if (!experimental_directives_allowed) {
+ memprintf(errmsg, "parsing [%s:%d] : '%s' directive is experimental, must be allowed via a global 'expose-experimental-directives'",
+ file, linenum, kw->kw);
+ return 1;
+ }
+ mark_tainted(TAINTED_CONFIG_EXP_KW_DECLARED);
+ }
+
+ return 0;
+}
+
+/* master CLI configuration (-S flag) */
+struct list mworker_cli_conf = LIST_HEAD_INIT(mworker_cli_conf);
+
+/* These are strings to be reported in the output of "haproxy -vv". They may
+ * either be constants (in which case must_free must be zero) or dynamically
+ * allocated strings to pass to free() on exit, and in this case must_free
+ * must be non-zero.
+ */
+struct list build_opts_list = LIST_HEAD_INIT(build_opts_list);
+struct build_opts_str {
+ struct list list;
+ const char *str;
+ int must_free;
+};
+
+/*********************************************************************/
+/* general purpose functions ***************************************/
+/*********************************************************************/
+
+/* used to register some build option strings at boot. Set must_free to
+ * non-zero if the string must be freed upon exit.
+ */
+void hap_register_build_opts(const char *str, int must_free)
+{
+ struct build_opts_str *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->str = str;
+ b->must_free = must_free;
+ LIST_APPEND(&build_opts_list, &b->list);
+}
+
+#define VERSION_MAX_ELTS 7
+
+/* This function splits an haproxy version string into an array of integers.
+ * The syntax of the supported version string is the following:
+ *
+ * <a>[.<b>[.<c>[.<d>]]][-{dev,pre,rc}<f>][-*][-<g>]
+ *
+ * This validates for example:
+ * 1.2.1-pre2, 1.2.1, 1.2.10.1, 1.3.16-rc1, 1.4-dev3, 1.5-dev18, 1.5-dev18-43
+ * 2.4-dev18-f6818d-20
+ *
+ * The result is set in a array of <VERSION_MAX_ELTS> elements. Each letter has
+ * one fixed place in the array. The tags take a numeric value called <e> which
+ * defaults to 3. "dev" is 1, "rc" and "pre" are 2. Numbers not encountered are
+ * considered as zero (henxe 1.5 and 1.5.0 are the same).
+ *
+ * The resulting values are:
+ * 1.2.1-pre2 1, 2, 1, 0, 2, 2, 0
+ * 1.2.1 1, 2, 1, 0, 3, 0, 0
+ * 1.2.10.1 1, 2, 10, 1, 3, 0, 0
+ * 1.3.16-rc1 1, 3, 16, 0, 2, 1, 0
+ * 1.4-dev3 1, 4, 0, 0, 1, 3, 0
+ * 1.5-dev18 1, 5, 0, 0, 1, 18, 0
+ * 1.5-dev18-43 1, 5, 0, 0, 1, 18, 43
+ * 2.4-dev18-f6818d-20 2, 4, 0, 0, 1, 18, 20
+ *
+ * The function returns non-zero if the conversion succeeded, or zero if it
+ * failed.
+ */
+int split_version(const char *version, unsigned int *value)
+{
+ const char *p, *s;
+ char *error;
+ int nelts;
+
+ /* Initialize array with zeroes */
+ for (nelts = 0; nelts < VERSION_MAX_ELTS; nelts++)
+ value[nelts] = 0;
+ value[4] = 3;
+
+ p = version;
+
+ /* If the version number is empty, return false */
+ if (*p == '\0')
+ return 0;
+
+ /* Convert first number <a> */
+ value[0] = strtol(p, &error, 10);
+ p = error + 1;
+ if (*error == '\0')
+ return 1;
+ if (*error == '-')
+ goto split_version_tag;
+ if (*error != '.')
+ return 0;
+
+ /* Convert first number <b> */
+ value[1] = strtol(p, &error, 10);
+ p = error + 1;
+ if (*error == '\0')
+ return 1;
+ if (*error == '-')
+ goto split_version_tag;
+ if (*error != '.')
+ return 0;
+
+ /* Convert first number <c> */
+ value[2] = strtol(p, &error, 10);
+ p = error + 1;
+ if (*error == '\0')
+ return 1;
+ if (*error == '-')
+ goto split_version_tag;
+ if (*error != '.')
+ return 0;
+
+ /* Convert first number <d> */
+ value[3] = strtol(p, &error, 10);
+ p = error + 1;
+ if (*error == '\0')
+ return 1;
+ if (*error != '-')
+ return 0;
+
+ split_version_tag:
+ /* Check for commit number */
+ if (*p >= '0' && *p <= '9')
+ goto split_version_commit;
+
+ /* Read tag */
+ if (strncmp(p, "dev", 3) == 0) { value[4] = 1; p += 3; }
+ else if (strncmp(p, "rc", 2) == 0) { value[4] = 2; p += 2; }
+ else if (strncmp(p, "pre", 3) == 0) { value[4] = 2; p += 3; }
+ else
+ goto split_version_commit;
+
+ /* Convert tag number */
+ value[5] = strtol(p, &error, 10);
+ p = error + 1;
+ if (*error == '\0')
+ return 1;
+ if (*error != '-')
+ return 0;
+
+ split_version_commit:
+ /* Search the last "-" */
+ s = strrchr(p, '-');
+ if (s) {
+ s++;
+ if (*s == '\0')
+ return 0;
+ value[6] = strtol(s, &error, 10);
+ if (*error != '\0')
+ value[6] = 0;
+ return 1;
+ }
+
+ /* convert the version */
+ value[6] = strtol(p, &error, 10);
+ if (*error != '\0')
+ value[6] = 0;
+
+ return 1;
+}
+
+/* This function compares the current haproxy version with an arbitrary version
+ * string. It returns:
+ * -1 : the version in argument is older than the current haproxy version
+ * 0 : the version in argument is the same as the current haproxy version
+ * 1 : the version in argument is newer than the current haproxy version
+ *
+ * Or some errors:
+ * -2 : the current haproxy version is not parsable
+ * -3 : the version in argument is not parsable
+ */
+int compare_current_version(const char *version)
+{
+ unsigned int loc[VERSION_MAX_ELTS];
+ unsigned int mod[VERSION_MAX_ELTS];
+ int i;
+
+ /* split versions */
+ if (!split_version(haproxy_version, loc))
+ return -2;
+ if (!split_version(version, mod))
+ return -3;
+
+ /* compare versions */
+ for (i = 0; i < VERSION_MAX_ELTS; i++) {
+ if (mod[i] < loc[i])
+ return -1;
+ else if (mod[i] > loc[i])
+ return 1;
+ }
+ return 0;
+}
+
+static void display_version()
+{
+ struct utsname utsname;
+
+ printf("HAProxy version %s %s - https://haproxy.org/\n"
+ PRODUCT_STATUS "\n", haproxy_version, haproxy_date);
+
+ if (strlen(PRODUCT_URL_BUGS) > 0) {
+ char base_version[20];
+ int dots = 0;
+ char *del;
+
+ /* only retrieve the base version without distro-specific extensions */
+ for (del = haproxy_version; *del; del++) {
+ if (*del == '.')
+ dots++;
+ else if (*del < '0' || *del > '9')
+ break;
+ }
+
+ strlcpy2(base_version, haproxy_version, del - haproxy_version + 1);
+ if (dots < 2)
+ printf("Known bugs: https://github.com/haproxy/haproxy/issues?q=is:issue+is:open\n");
+ else
+ printf("Known bugs: " PRODUCT_URL_BUGS "\n", base_version);
+ }
+
+ if (uname(&utsname) == 0) {
+ printf("Running on: %s %s %s %s\n", utsname.sysname, utsname.release, utsname.version, utsname.machine);
+ }
+}
+
+static void display_build_opts()
+{
+ struct build_opts_str *item;
+
+ printf("Build options :"
+#ifdef BUILD_TARGET
+ "\n TARGET = " BUILD_TARGET
+#endif
+#ifdef BUILD_CPU
+ "\n CPU = " BUILD_CPU
+#endif
+#ifdef BUILD_CC
+ "\n CC = " BUILD_CC
+#endif
+#ifdef BUILD_CFLAGS
+ "\n CFLAGS = " BUILD_CFLAGS
+#endif
+#ifdef BUILD_OPTIONS
+ "\n OPTIONS = " BUILD_OPTIONS
+#endif
+#ifdef BUILD_DEBUG
+ "\n DEBUG = " BUILD_DEBUG
+#endif
+#ifdef BUILD_FEATURES
+ "\n\nFeature list : " BUILD_FEATURES
+#endif
+ "\n\nDefault settings :"
+ "\n bufsize = %d, maxrewrite = %d, maxpollevents = %d"
+ "\n\n",
+ BUFSIZE, MAXREWRITE, MAX_POLL_EVENTS);
+
+ list_for_each_entry(item, &build_opts_list, list) {
+ puts(item->str);
+ }
+
+ putchar('\n');
+
+ list_pollers(stdout);
+ putchar('\n');
+ list_mux_proto(stdout);
+ putchar('\n');
+ list_services(stdout);
+ putchar('\n');
+ list_filters(stdout);
+ putchar('\n');
+}
+
+/*
+ * This function prints the command line usage and exits
+ */
+static void usage(char *name)
+{
+ display_version();
+ fprintf(stderr,
+ "Usage : %s [-f <cfgfile|cfgdir>]* [ -vdV"
+ "D ] [ -n <maxconn> ] [ -N <maxpconn> ]\n"
+ " [ -p <pidfile> ] [ -m <max megs> ] [ -C <dir> ] [-- <cfgfile>*]\n"
+ " -v displays version ; -vv shows known build options.\n"
+ " -d enters debug mode ; -db only disables background mode.\n"
+ " -dM[<byte>,help,...] debug memory (default: poison with <byte>/0x50)\n"
+ " -V enters verbose mode (disables quiet mode)\n"
+ " -D goes daemon ; -C changes to <dir> before loading files.\n"
+ " -W master-worker mode.\n"
+#if defined(USE_SYSTEMD)
+ " -Ws master-worker mode with systemd notify support.\n"
+#endif
+ " -q quiet mode : don't display messages\n"
+ " -c check mode : only check config files and exit\n"
+ " -cc check condition : evaluate a condition and exit\n"
+ " -n sets the maximum total # of connections (uses ulimit -n)\n"
+ " -m limits the usable amount of memory (in MB)\n"
+ " -N sets the default, per-proxy maximum # of connections (%d)\n"
+ " -L set local peer name (default to hostname)\n"
+ " -p writes pids of all children to this file\n"
+#if defined(USE_EPOLL)
+ " -de disables epoll() usage even when available\n"
+#endif
+#if defined(USE_KQUEUE)
+ " -dk disables kqueue() usage even when available\n"
+#endif
+#if defined(USE_EVPORTS)
+ " -dv disables event ports usage even when available\n"
+#endif
+#if defined(USE_POLL)
+ " -dp disables poll() usage even when available\n"
+#endif
+#if defined(USE_LINUX_SPLICE)
+ " -dS disables splice usage (broken on old kernels)\n"
+#endif
+#if defined(USE_GETADDRINFO)
+ " -dG disables getaddrinfo() usage\n"
+#endif
+#if defined(SO_REUSEPORT)
+ " -dR disables SO_REUSEPORT usage\n"
+#endif
+#if defined(HA_HAVE_DUMP_LIBS)
+ " -dL dumps loaded object files after config checks\n"
+#endif
+ " -dK{class[,...]} dump registered keywords (use 'help' for list)\n"
+ " -dr ignores server address resolution failures\n"
+ " -dV disables SSL verify on servers side\n"
+ " -dW fails if any warning is emitted\n"
+ " -dD diagnostic mode : warn about suspicious configuration statements\n"
+ " -sf/-st [pid ]* finishes/terminates old pids.\n"
+ " -x <unix_socket> get listening sockets from a unix socket\n"
+ " -S <bind>[,<bind options>...] new master CLI\n"
+ "\n",
+ name, cfg_maxpconn);
+ exit(1);
+}
+
+
+
+/*********************************************************************/
+/* more specific functions ***************************************/
+/*********************************************************************/
+
+/* sends the signal <sig> to all pids found in <oldpids>. Returns the number of
+ * pids the signal was correctly delivered to.
+ */
+int tell_old_pids(int sig)
+{
+ int p;
+ int ret = 0;
+ for (p = 0; p < nb_oldpids; p++)
+ if (kill(oldpids[p], sig) == 0)
+ ret++;
+ return ret;
+}
+
+/*
+ * remove a pid forom the olpid array and decrease nb_oldpids
+ * return 1 pid was found otherwise return 0
+ */
+
+int delete_oldpid(int pid)
+{
+ int i;
+
+ for (i = 0; i < nb_oldpids; i++) {
+ if (oldpids[i] == pid) {
+ oldpids[i] = oldpids[nb_oldpids - 1];
+ oldpids[nb_oldpids - 1] = 0;
+ nb_oldpids--;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+/*
+ * When called, this function reexec haproxy with -sf followed by current
+ * children PIDs and possibly old children PIDs if they didn't leave yet.
+ */
+static void mworker_reexec()
+{
+ char **next_argv = NULL;
+ int old_argc = 0; /* previous number of argument */
+ int next_argc = 0;
+ int i = 0;
+ char *msg = NULL;
+ struct rlimit limit;
+ struct mworker_proc *current_child = NULL;
+
+ mworker_block_signals();
+#if defined(USE_SYSTEMD)
+ if (global.tune.options & GTUNE_USE_SYSTEMD)
+ sd_notify(0, "RELOADING=1");
+#endif
+ setenv("HAPROXY_MWORKER_REEXEC", "1", 1);
+
+ mworker_cleanup_proc();
+ mworker_proc_list_to_env(); /* put the children description in the env */
+
+ /* ensure that we close correctly every listeners before reexecuting */
+ mworker_cleanlisteners();
+
+ /* during the reload we must ensure that every FDs that can't be
+ * reuse (ie those that are not referenced in the proc_list)
+ * are closed or they will leak. */
+
+ /* close the listeners FD */
+ mworker_cli_proxy_stop();
+
+ if (fdtab)
+ deinit_pollers();
+
+#ifdef HAVE_SSL_RAND_KEEP_RANDOM_DEVICES_OPEN
+ /* close random device FDs */
+ RAND_keep_random_devices_open(0);
+#endif
+
+ /* restore the initial FD limits */
+ limit.rlim_cur = rlim_fd_cur_at_boot;
+ limit.rlim_max = rlim_fd_max_at_boot;
+ if (raise_rlim_nofile(&limit, &limit) != 0) {
+ ha_warning("Failed to restore initial FD limits (cur=%u max=%u), using cur=%u max=%u\n",
+ rlim_fd_cur_at_boot, rlim_fd_max_at_boot,
+ (unsigned int)limit.rlim_cur, (unsigned int)limit.rlim_max);
+ }
+
+ /* compute length */
+ while (old_argv[old_argc])
+ old_argc++;
+
+ /* 1 for haproxy -sf, 2 for -x /socket */
+ next_argv = calloc(old_argc + 1 + 2 + mworker_child_nb() + 1,
+ sizeof(*next_argv));
+ if (next_argv == NULL)
+ goto alloc_error;
+
+ /* copy the program name */
+ next_argv[next_argc++] = old_argv[0];
+
+ /* insert the new options just after argv[0] in case we have a -- */
+
+ if (getenv("HAPROXY_MWORKER_WAIT_ONLY") == NULL) {
+ /* add -sf <PID>* to argv */
+ if (mworker_child_nb() > 0) {
+ struct mworker_proc *child;
+
+ next_argv[next_argc++] = "-sf";
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (!(child->options & PROC_O_LEAVING) && (child->options & PROC_O_TYPE_WORKER))
+ current_child = child;
+
+ if (!(child->options & (PROC_O_TYPE_WORKER|PROC_O_TYPE_PROG)) || child->pid <= -1)
+ continue;
+ if ((next_argv[next_argc++] = memprintf(&msg, "%d", child->pid)) == NULL)
+ goto alloc_error;
+ msg = NULL;
+ }
+ }
+
+ if (current_child) {
+ /* add the -x option with the socketpair of the current worker */
+ next_argv[next_argc++] = "-x";
+ if ((next_argv[next_argc++] = memprintf(&msg, "sockpair@%d", current_child->ipc_fd[0])) == NULL)
+ goto alloc_error;
+ msg = NULL;
+ }
+ }
+
+ /* copy the previous options */
+ for (i = 1; i < old_argc; i++)
+ next_argv[next_argc++] = old_argv[i];
+
+ signal(SIGPROF, SIG_IGN);
+ execvp(next_argv[0], next_argv);
+ ha_warning("Failed to reexecute the master process [%d]: %s\n", pid, strerror(errno));
+ ha_free(&next_argv);
+ return;
+
+alloc_error:
+ ha_free(&next_argv);
+ ha_warning("Failed to reexecute the master process [%d]: Cannot allocate memory\n", pid);
+ return;
+}
+
+/* reexec haproxy in waitmode */
+static void mworker_reexec_waitmode()
+{
+ setenv("HAPROXY_MWORKER_WAIT_ONLY", "1", 1);
+ mworker_reexec();
+}
+
+/* reload haproxy and emit a warning */
+void mworker_reload()
+{
+ struct mworker_proc *child;
+ struct per_thread_deinit_fct *ptdf;
+
+ ha_notice("Reloading HAProxy\n");
+
+ /* close the poller FD and the thread waker pipe FD */
+ list_for_each_entry(ptdf, &per_thread_deinit_list, list)
+ ptdf->fct();
+
+ /* increment the number of reloads */
+ list_for_each_entry(child, &proc_list, list) {
+ child->reloads++;
+ }
+
+ mworker_reexec();
+}
+
+static void mworker_loop()
+{
+
+#if defined(USE_SYSTEMD)
+ if (global.tune.options & GTUNE_USE_SYSTEMD)
+ sd_notifyf(0, "READY=1\nMAINPID=%lu", (unsigned long)getpid());
+#endif
+ /* Busy polling makes no sense in the master :-) */
+ global.tune.options &= ~GTUNE_BUSY_POLLING;
+
+
+ signal_unregister(SIGTTIN);
+ signal_unregister(SIGTTOU);
+ signal_unregister(SIGUSR1);
+ signal_unregister(SIGHUP);
+ signal_unregister(SIGQUIT);
+
+ signal_register_fct(SIGTERM, mworker_catch_sigterm, SIGTERM);
+ signal_register_fct(SIGUSR1, mworker_catch_sigterm, SIGUSR1);
+ signal_register_fct(SIGTTIN, mworker_broadcast_signal, SIGTTIN);
+ signal_register_fct(SIGTTOU, mworker_broadcast_signal, SIGTTOU);
+ signal_register_fct(SIGINT, mworker_catch_sigterm, SIGINT);
+ signal_register_fct(SIGHUP, mworker_catch_sighup, SIGHUP);
+ signal_register_fct(SIGUSR2, mworker_catch_sighup, SIGUSR2);
+ signal_register_fct(SIGCHLD, mworker_catch_sigchld, SIGCHLD);
+
+ mworker_unblock_signals();
+ mworker_cleantasks();
+
+ mworker_catch_sigchld(NULL); /* ensure we clean the children in case
+ some SIGCHLD were lost */
+
+ jobs++; /* this is the "master" job, we want to take care of the
+ signals even if there is no listener so the poll loop don't
+ leave */
+
+ fork_poller();
+ run_thread_poll_loop(NULL);
+}
+
+/*
+ * Reexec the process in failure mode, instead of exiting
+ */
+void reexec_on_failure()
+{
+ struct mworker_proc *child;
+
+ if (!atexit_flag)
+ return;
+
+ /* get the info of the children in the env */
+ if (mworker_env_to_proc_list() < 0) {
+ exit(EXIT_FAILURE);
+ }
+
+ /* increment the number of failed reloads */
+ list_for_each_entry(child, &proc_list, list) {
+ child->failedreloads++;
+ }
+
+ /* do not keep unused FDs retrieved from the previous process */
+ sock_drop_unused_old_sockets();
+
+ usermsgs_clr(NULL);
+ ha_warning("Loading failure!\n");
+ mworker_reexec_waitmode();
+}
+
+/*
+ * Exit with an error message upon a wait-mode failure.
+ */
+void exit_on_waitmode_failure()
+{
+ if (!atexit_flag)
+ return;
+
+ ha_alert("Non-recoverable mworker wait-mode error, exiting.\n");
+}
+
+
+/*
+ * upon SIGUSR1, let's have a soft stop. Note that soft_stop() broadcasts
+ * a signal zero to all subscribers. This means that it's as easy as
+ * subscribing to signal 0 to get informed about an imminent shutdown.
+ */
+static void sig_soft_stop(struct sig_handler *sh)
+{
+ soft_stop();
+ signal_unregister_handler(sh);
+ pool_gc(NULL);
+}
+
+/*
+ * upon SIGTTOU, we pause everything
+ */
+static void sig_pause(struct sig_handler *sh)
+{
+ if (protocol_pause_all() & ERR_FATAL) {
+ const char *msg = "Some proxies refused to pause, performing soft stop now.\n";
+ ha_warning("%s", msg);
+ send_log(NULL, LOG_WARNING, "%s", msg);
+ soft_stop();
+ }
+ pool_gc(NULL);
+}
+
+/*
+ * upon SIGTTIN, let's have a soft stop.
+ */
+static void sig_listen(struct sig_handler *sh)
+{
+ if (protocol_resume_all() & ERR_FATAL) {
+ const char *msg = "Some proxies refused to resume, probably due to a conflict on a listening port. You may want to try again after the conflicting application is stopped, otherwise a restart might be needed to resume safe operations.\n";
+ ha_warning("%s", msg);
+ send_log(NULL, LOG_WARNING, "%s", msg);
+ }
+}
+
+/*
+ * this function dumps every server's state when the process receives SIGHUP.
+ */
+static void sig_dump_state(struct sig_handler *sh)
+{
+ struct proxy *p = proxies_list;
+
+ ha_warning("SIGHUP received, dumping servers states.\n");
+ while (p) {
+ struct server *s = p->srv;
+
+ send_log(p, LOG_NOTICE, "SIGHUP received, dumping servers states for proxy %s.\n", p->id);
+ while (s) {
+ chunk_printf(&trash,
+ "SIGHUP: Server %s/%s is %s. Conn: %d act, %d pend, %lld tot.",
+ p->id, s->id,
+ (s->cur_state != SRV_ST_STOPPED) ? "UP" : "DOWN",
+ s->cur_sess, s->queue.length, s->counters.cum_sess);
+ ha_warning("%s\n", trash.area);
+ send_log(p, LOG_NOTICE, "%s\n", trash.area);
+ s = s->next;
+ }
+
+ /* FIXME: those info are a bit outdated. We should be able to distinguish between FE and BE. */
+ if (!p->srv) {
+ chunk_printf(&trash,
+ "SIGHUP: Proxy %s has no servers. Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.",
+ p->id,
+ p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_conn);
+ } else if (p->srv_act == 0) {
+ chunk_printf(&trash,
+ "SIGHUP: Proxy %s %s ! Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.",
+ p->id,
+ (p->srv_bck) ? "is running on backup servers" : "has no server available",
+ p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_conn);
+ } else {
+ chunk_printf(&trash,
+ "SIGHUP: Proxy %s has %d active servers and %d backup servers available."
+ " Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.",
+ p->id, p->srv_act, p->srv_bck,
+ p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_conn);
+ }
+ ha_warning("%s\n", trash.area);
+ send_log(p, LOG_NOTICE, "%s\n", trash.area);
+
+ p = p->next;
+ }
+}
+
+static void dump(struct sig_handler *sh)
+{
+ /* dump memory usage then free everything possible */
+ dump_pools();
+ pool_gc(NULL);
+}
+
+/*
+ * This function dup2 the stdio FDs (0,1,2) with <fd>, then closes <fd>
+ * If <fd> < 0, it opens /dev/null and use it to dup
+ *
+ * In the case of chrooting, you have to open /dev/null before the chroot, and
+ * pass the <fd> to this function
+ */
+static void stdio_quiet(int fd)
+{
+ if (fd < 0)
+ fd = open("/dev/null", O_RDWR, 0);
+
+ if (fd > -1) {
+ fclose(stdin);
+ fclose(stdout);
+ fclose(stderr);
+
+ dup2(fd, 0);
+ dup2(fd, 1);
+ dup2(fd, 2);
+ if (fd > 2)
+ close(fd);
+ return;
+ }
+
+ ha_alert("Cannot open /dev/null\n");
+ exit(EXIT_FAILURE);
+}
+
+
+/* This function checks if cfg_cfgfiles contains directories.
+ * If it finds one, it adds all the files (and only files) it contains
+ * in cfg_cfgfiles in place of the directory (and removes the directory).
+ * It adds the files in lexical order.
+ * It adds only files with .cfg extension.
+ * It doesn't add files with name starting with '.'
+ */
+static void cfgfiles_expand_directories(void)
+{
+ struct wordlist *wl, *wlb;
+ char *err = NULL;
+
+ list_for_each_entry_safe(wl, wlb, &cfg_cfgfiles, list) {
+ struct stat file_stat;
+ struct dirent **dir_entries = NULL;
+ int dir_entries_nb;
+ int dir_entries_it;
+
+ if (stat(wl->s, &file_stat)) {
+ ha_alert("Cannot open configuration file/directory %s : %s\n",
+ wl->s,
+ strerror(errno));
+ exit(1);
+ }
+
+ if (!S_ISDIR(file_stat.st_mode))
+ continue;
+
+ /* from this point wl->s is a directory */
+
+ dir_entries_nb = scandir(wl->s, &dir_entries, NULL, alphasort);
+ if (dir_entries_nb < 0) {
+ ha_alert("Cannot open configuration directory %s : %s\n",
+ wl->s,
+ strerror(errno));
+ exit(1);
+ }
+
+ /* for each element in the directory wl->s */
+ for (dir_entries_it = 0; dir_entries_it < dir_entries_nb; dir_entries_it++) {
+ struct dirent *dir_entry = dir_entries[dir_entries_it];
+ char *filename = NULL;
+ char *d_name_cfgext = strstr(dir_entry->d_name, ".cfg");
+
+ /* don't add filename that begin with .
+ * only add filename with .cfg extension
+ */
+ if (dir_entry->d_name[0] == '.' ||
+ !(d_name_cfgext && d_name_cfgext[4] == '\0'))
+ goto next_dir_entry;
+
+ if (!memprintf(&filename, "%s/%s", wl->s, dir_entry->d_name)) {
+ ha_alert("Cannot load configuration files %s : out of memory.\n",
+ filename);
+ exit(1);
+ }
+
+ if (stat(filename, &file_stat)) {
+ ha_alert("Cannot open configuration file %s : %s\n",
+ wl->s,
+ strerror(errno));
+ exit(1);
+ }
+
+ /* don't add anything else than regular file in cfg_cfgfiles
+ * this way we avoid loops
+ */
+ if (!S_ISREG(file_stat.st_mode))
+ goto next_dir_entry;
+
+ if (!list_append_word(&wl->list, filename, &err)) {
+ ha_alert("Cannot load configuration files %s : %s\n",
+ filename,
+ err);
+ exit(1);
+ }
+
+next_dir_entry:
+ free(filename);
+ free(dir_entry);
+ }
+
+ free(dir_entries);
+
+ /* remove the current directory (wl) from cfg_cfgfiles */
+ free(wl->s);
+ LIST_DELETE(&wl->list);
+ free(wl);
+ }
+
+ free(err);
+}
+
+/*
+ * copy and cleanup the current argv
+ * Remove the -sf /-st / -x parameters
+ * Return an allocated copy of argv
+ */
+
+static char **copy_argv(int argc, char **argv)
+{
+ char **newargv, **retargv;
+
+ newargv = calloc(argc + 2, sizeof(*newargv));
+ if (newargv == NULL) {
+ ha_warning("Cannot allocate memory\n");
+ return NULL;
+ }
+ retargv = newargv;
+
+ /* first copy argv[0] */
+ *newargv++ = *argv++;
+ argc--;
+
+ while (argc > 0) {
+ if (**argv != '-') {
+ /* non options are copied but will fail in the argument parser */
+ *newargv++ = *argv++;
+ argc--;
+
+ } else {
+ char *flag;
+
+ flag = *argv + 1;
+
+ if (flag[0] == '-' && flag[1] == 0) {
+ /* "--\0" copy every arguments till the end of argv */
+ *newargv++ = *argv++;
+ argc--;
+
+ while (argc > 0) {
+ *newargv++ = *argv++;
+ argc--;
+ }
+ } else {
+ switch (*flag) {
+ case 's':
+ /* -sf / -st and their parameters are ignored */
+ if (flag[1] == 'f' || flag[1] == 't') {
+ argc--;
+ argv++;
+ /* The list can't contain a negative value since the only
+ way to know the end of this list is by looking for the
+ next option or the end of the options */
+ while (argc > 0 && argv[0][0] != '-') {
+ argc--;
+ argv++;
+ }
+ } else {
+ argc--;
+ argv++;
+
+ }
+ break;
+
+ case 'x':
+ /* this option and its parameter are ignored */
+ argc--;
+ argv++;
+ if (argc > 0) {
+ argc--;
+ argv++;
+ }
+ break;
+
+ case 'C':
+ case 'n':
+ case 'm':
+ case 'N':
+ case 'L':
+ case 'f':
+ case 'p':
+ case 'S':
+ /* these options have only 1 parameter which must be copied and can start with a '-' */
+ *newargv++ = *argv++;
+ argc--;
+ if (argc == 0)
+ goto error;
+ *newargv++ = *argv++;
+ argc--;
+ break;
+ default:
+ /* for other options just copy them without parameters, this is also done
+ * for options like "--foo", but this will fail in the argument parser.
+ * */
+ *newargv++ = *argv++;
+ argc--;
+ break;
+ }
+ }
+ }
+ }
+
+ return retargv;
+
+error:
+ free(retargv);
+ return NULL;
+}
+
+
+/* Performs basic random seed initialization. The main issue with this is that
+ * srandom_r() only takes 32 bits and purposely provides a reproducible sequence,
+ * which means that there will only be 4 billion possible random sequences once
+ * srandom() is called, regardless of the internal state. Not calling it is
+ * even worse as we'll always produce the same randoms sequences. What we do
+ * here is to create an initial sequence from various entropy sources, hash it
+ * using SHA1 and keep the resulting 160 bits available globally.
+ *
+ * We initialize the current process with the first 32 bits before starting the
+ * polling loop, where all this will be changed to have process specific and
+ * thread specific sequences.
+ *
+ * Before starting threads, it's still possible to call random() as srandom()
+ * is initialized from this, but after threads and/or processes are started,
+ * only ha_random() is expected to be used to guarantee distinct sequences.
+ */
+static void ha_random_boot(char *const *argv)
+{
+ unsigned char message[256];
+ unsigned char *m = message;
+ struct timeval tv;
+ blk_SHA_CTX ctx;
+ unsigned long l;
+ int fd;
+ int i;
+
+ /* start with current time as pseudo-random seed */
+ gettimeofday(&tv, NULL);
+ write_u32(m, tv.tv_sec); m += 4;
+ write_u32(m, tv.tv_usec); m += 4;
+
+ /* PID and PPID add some OS-based randomness */
+ write_u16(m, getpid()); m += 2;
+ write_u16(m, getppid()); m += 2;
+
+ /* take up to 160 bits bytes from /dev/urandom if available (non-blocking) */
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd >= 0) {
+ i = read(fd, m, 20);
+ if (i > 0)
+ m += i;
+ close(fd);
+ }
+
+ /* take up to 160 bits bytes from openssl (non-blocking) */
+#ifdef USE_OPENSSL
+ if (RAND_bytes(m, 20) == 1)
+ m += 20;
+#endif
+
+ /* take 160 bits from existing random in case it was already initialized */
+ for (i = 0; i < 5; i++) {
+ write_u32(m, random());
+ m += 4;
+ }
+
+ /* stack address (benefit form operating system's ASLR) */
+ l = (unsigned long)&m;
+ memcpy(m, &l, sizeof(l)); m += sizeof(l);
+
+ /* argv address (benefit form operating system's ASLR) */
+ l = (unsigned long)&argv;
+ memcpy(m, &l, sizeof(l)); m += sizeof(l);
+
+ /* use tv_usec again after all the operations above */
+ gettimeofday(&tv, NULL);
+ write_u32(m, tv.tv_usec); m += 4;
+
+ /*
+ * At this point, ~84-92 bytes have been used
+ */
+
+ /* finish with the hostname */
+ strncpy((char *)m, hostname, message + sizeof(message) - m);
+ m += strlen(hostname);
+
+ /* total message length */
+ l = m - message;
+
+ memset(&ctx, 0, sizeof(ctx));
+ blk_SHA1_Init(&ctx);
+ blk_SHA1_Update(&ctx, message, l);
+ blk_SHA1_Final(boot_seed, &ctx);
+
+ srandom(read_u32(boot_seed));
+ ha_random_seed(boot_seed, sizeof(boot_seed));
+}
+
+/* considers splicing proxies' maxconn, computes the ideal global.maxpipes
+ * setting, and returns it. It may return -1 meaning "unlimited" if some
+ * unlimited proxies have been found and the global.maxconn value is not yet
+ * set. It may also return a value greater than maxconn if it's not yet set.
+ * Note that a value of zero means there is no need for pipes. -1 is never
+ * returned if global.maxconn is valid.
+ */
+static int compute_ideal_maxpipes()
+{
+ struct proxy *cur;
+ int nbfe = 0, nbbe = 0;
+ int unlimited = 0;
+ int pipes;
+ int max;
+
+ for (cur = proxies_list; cur; cur = cur->next) {
+ if (cur->options2 & (PR_O2_SPLIC_ANY)) {
+ if (cur->cap & PR_CAP_FE) {
+ max = cur->maxconn;
+ nbfe += max;
+ if (!max) {
+ unlimited = 1;
+ break;
+ }
+ }
+ if (cur->cap & PR_CAP_BE) {
+ max = cur->fullconn ? cur->fullconn : global.maxconn;
+ nbbe += max;
+ if (!max) {
+ unlimited = 1;
+ break;
+ }
+ }
+ }
+ }
+
+ pipes = MAX(nbfe, nbbe);
+ if (global.maxconn) {
+ if (pipes > global.maxconn || unlimited)
+ pipes = global.maxconn;
+ } else if (unlimited) {
+ pipes = -1;
+ }
+
+ return pipes >= 4 ? pipes / 4 : pipes;
+}
+
+/* considers global.maxsocks, global.maxpipes, async engines, SSL frontends and
+ * rlimits and computes an ideal maxconn. It's meant to be called only when
+ * maxsock contains the sum of listening FDs, before it is updated based on
+ * maxconn and pipes. If there are not enough FDs left, DEFAULT_MAXCONN (by
+ * default 100) is returned as it is expected that it will even run on tight
+ * environments, and will maintain compatibility with previous packages that
+ * used to rely on this value as the default one. The system will emit a
+ * warning indicating how many FDs are missing anyway if needed.
+ */
+static int compute_ideal_maxconn()
+{
+ int ssl_sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
+ int engine_fds = global.ssl_used_async_engines * ssl_sides;
+ int pipes = compute_ideal_maxpipes();
+ int remain = MAX(rlim_fd_cur_at_boot, rlim_fd_max_at_boot);
+ int maxconn;
+
+ /* we have to take into account these elements :
+ * - number of engine_fds, which inflates the number of FD needed per
+ * connection by this number.
+ * - number of pipes per connection on average : for the unlimited
+ * case, this is 0.5 pipe FDs per connection, otherwise it's a
+ * fixed value of 2*pipes.
+ * - two FDs per connection
+ */
+
+ if (global.fd_hard_limit && remain > global.fd_hard_limit)
+ remain = global.fd_hard_limit;
+
+ /* subtract listeners and checks */
+ remain -= global.maxsock;
+
+ /* one epoll_fd/kqueue_fd per thread */
+ remain -= global.nbthread;
+
+ /* one wake-up pipe (2 fd) per thread */
+ remain -= 2 * global.nbthread;
+
+ /* Fixed pipes values : we only subtract them if they're not larger
+ * than the remaining FDs because pipes are optional.
+ */
+ if (pipes >= 0 && pipes * 2 < remain)
+ remain -= pipes * 2;
+
+ if (pipes < 0) {
+ /* maxsock = maxconn * 2 + maxconn/4 * 2 + maxconn * engine_fds.
+ * = maxconn * (2 + 0.5 + engine_fds)
+ * = maxconn * (4 + 1 + 2*engine_fds) / 2
+ */
+ maxconn = 2 * remain / (5 + 2 * engine_fds);
+ } else {
+ /* maxsock = maxconn * 2 + maxconn * engine_fds.
+ * = maxconn * (2 + engine_fds)
+ */
+ maxconn = remain / (2 + engine_fds);
+ }
+
+ return MAX(maxconn, DEFAULT_MAXCONN);
+}
+
+/* computes the estimated maxsock value for the given maxconn based on the
+ * possibly set global.maxpipes and existing partial global.maxsock. It may
+ * temporarily change global.maxconn for the time needed to propagate the
+ * computations, and will reset it.
+ */
+static int compute_ideal_maxsock(int maxconn)
+{
+ int maxpipes = global.maxpipes;
+ int maxsock = global.maxsock;
+
+
+ if (!maxpipes) {
+ int old_maxconn = global.maxconn;
+
+ global.maxconn = maxconn;
+ maxpipes = compute_ideal_maxpipes();
+ global.maxconn = old_maxconn;
+ }
+
+ maxsock += maxconn * 2; /* each connection needs two sockets */
+ maxsock += maxpipes * 2; /* each pipe needs two FDs */
+ maxsock += global.nbthread; /* one epoll_fd/kqueue_fd per thread */
+ maxsock += 2 * global.nbthread; /* one wake-up pipe (2 fd) per thread */
+
+ /* compute fd used by async engines */
+ if (global.ssl_used_async_engines) {
+ int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
+
+ maxsock += maxconn * sides * global.ssl_used_async_engines;
+ }
+ return maxsock;
+}
+
+/* Tests if it is possible to set the current process's RLIMIT_NOFILE to
+ * <maxsock>, then sets it back to the previous value. Returns non-zero if the
+ * value is accepted, non-zero otherwise. This is used to determine if an
+ * automatic limit may be applied or not. When it is not, the caller knows that
+ * the highest we can do is the rlim_max at boot. In case of error, we return
+ * that the setting is possible, so that we defer the error processing to the
+ * final stage in charge of enforcing this.
+ */
+static int check_if_maxsock_permitted(int maxsock)
+{
+ struct rlimit orig_limit, test_limit;
+ int ret;
+
+ if (global.fd_hard_limit && maxsock > global.fd_hard_limit)
+ return 0;
+
+ if (getrlimit(RLIMIT_NOFILE, &orig_limit) != 0)
+ return 1;
+
+ /* don't go further if we can't even set to what we have */
+ if (raise_rlim_nofile(NULL, &orig_limit) != 0)
+ return 1;
+
+ test_limit.rlim_max = MAX(maxsock, orig_limit.rlim_max);
+ test_limit.rlim_cur = test_limit.rlim_max;
+ ret = raise_rlim_nofile(NULL, &test_limit);
+
+ if (raise_rlim_nofile(NULL, &orig_limit) != 0)
+ return 1;
+
+ return ret == 0;
+}
+
+/* This performs th every basic early initialization at the end of the PREPARE
+ * init stage. It may only assume that list heads are initialized, but not that
+ * anything else is correct. It will initialize a number of variables that
+ * depend on command line and will pre-parse the command line. If it fails, it
+ * directly exits.
+ */
+static void init_early(int argc, char **argv)
+{
+ char *progname;
+ char *tmp;
+ int len;
+
+ setenv("HAPROXY_STARTUP_VERSION", HAPROXY_VERSION, 0);
+
+ /* First, let's initialize most global variables */
+ totalconn = actconn = listeners = stopping = 0;
+ killed = pid = 0;
+
+ global.maxsock = 10; /* reserve 10 fds ; will be incremented by socket eaters */
+ global.rlimit_memmax_all = HAPROXY_MEMMAX;
+ global.mode = MODE_STARTING;
+
+ /* if we were in mworker mode, we should restart in mworker mode */
+ if (getenv("HAPROXY_MWORKER_REEXEC") != NULL)
+ global.mode |= MODE_MWORKER;
+
+ /* initialize date, time, and pid */
+ tzset();
+ clock_init_process_date();
+ start_date = now;
+ pid = getpid();
+
+ /* Set local host name and adjust some environment variables.
+ * NB: POSIX does not make it mandatory for gethostname() to
+ * NULL-terminate the string in case of truncation, and at least
+ * FreeBSD appears not to do it.
+ */
+ memset(hostname, 0, sizeof(hostname));
+ gethostname(hostname, sizeof(hostname) - 1);
+
+ /* preset some environment variables */
+ localpeer = strdup(hostname);
+ if (!localpeer || setenv("HAPROXY_LOCALPEER", localpeer, 1) < 0) {
+ ha_alert("Cannot allocate memory for local peer.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Some CPU affinity stuff may have to be initialized */
+#ifdef USE_CPU_AFFINITY
+ {
+ int i;
+ ha_cpuset_zero(&cpu_map.proc);
+ ha_cpuset_zero(&cpu_map.proc_t1);
+ for (i = 0; i < MAX_THREADS; ++i) {
+ ha_cpuset_zero(&cpu_map.thread[i]);
+ }
+ }
+#endif
+
+ /* extract the program name from argv[0], it will be used for the logs
+ * and error messages.
+ */
+ progname = *argv;
+ while ((tmp = strchr(progname, '/')) != NULL)
+ progname = tmp + 1;
+
+ len = strlen(progname);
+ progname = strdup(progname);
+ if (!progname) {
+ ha_alert("Cannot allocate memory for log_tag.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ chunk_initlen(&global.log_tag, progname, len, len);
+}
+
+/* handles program arguments. Very minimal parsing is performed, variables are
+ * fed with some values, and lists are completed with other ones. In case of
+ * error, it will exit.
+ */
+static void init_args(int argc, char **argv)
+{
+ char *progname = global.log_tag.area;
+ char *err_msg = NULL;
+
+ /* pre-fill in the global tuning options before we let the cmdline
+ * change them.
+ */
+ global.tune.options |= GTUNE_USE_SELECT; /* select() is always available */
+#if defined(USE_POLL)
+ global.tune.options |= GTUNE_USE_POLL;
+#endif
+#if defined(USE_EPOLL)
+ global.tune.options |= GTUNE_USE_EPOLL;
+#endif
+#if defined(USE_KQUEUE)
+ global.tune.options |= GTUNE_USE_KQUEUE;
+#endif
+#if defined(USE_EVPORTS)
+ global.tune.options |= GTUNE_USE_EVPORTS;
+#endif
+#if defined(USE_LINUX_SPLICE)
+ global.tune.options |= GTUNE_USE_SPLICE;
+#endif
+#if defined(USE_GETADDRINFO)
+ global.tune.options |= GTUNE_USE_GAI;
+#endif
+#if defined(SO_REUSEPORT)
+ global.tune.options |= GTUNE_USE_REUSEPORT;
+#endif
+#ifdef USE_THREAD
+ global.tune.options |= GTUNE_IDLE_POOL_SHARED;
+#endif
+ global.tune.options |= GTUNE_STRICT_LIMITS;
+
+ /* keep a copy of original arguments for the master process */
+ old_argv = copy_argv(argc, argv);
+ if (!old_argv) {
+ ha_alert("failed to copy argv.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* skip program name and start */
+ argc--; argv++;
+ while (argc > 0) {
+ char *flag;
+
+ if (**argv == '-') {
+ flag = *argv+1;
+
+ /* 1 arg */
+ if (*flag == 'v') {
+ display_version();
+ if (flag[1] == 'v') /* -vv */
+ display_build_opts();
+ deinit_and_exit(0);
+ }
+#if defined(USE_EPOLL)
+ else if (*flag == 'd' && flag[1] == 'e')
+ global.tune.options &= ~GTUNE_USE_EPOLL;
+#endif
+#if defined(USE_POLL)
+ else if (*flag == 'd' && flag[1] == 'p')
+ global.tune.options &= ~GTUNE_USE_POLL;
+#endif
+#if defined(USE_KQUEUE)
+ else if (*flag == 'd' && flag[1] == 'k')
+ global.tune.options &= ~GTUNE_USE_KQUEUE;
+#endif
+#if defined(USE_EVPORTS)
+ else if (*flag == 'd' && flag[1] == 'v')
+ global.tune.options &= ~GTUNE_USE_EVPORTS;
+#endif
+#if defined(USE_LINUX_SPLICE)
+ else if (*flag == 'd' && flag[1] == 'S')
+ global.tune.options &= ~GTUNE_USE_SPLICE;
+#endif
+#if defined(USE_GETADDRINFO)
+ else if (*flag == 'd' && flag[1] == 'G')
+ global.tune.options &= ~GTUNE_USE_GAI;
+#endif
+#if defined(SO_REUSEPORT)
+ else if (*flag == 'd' && flag[1] == 'R')
+ global.tune.options &= ~GTUNE_USE_REUSEPORT;
+#endif
+ else if (*flag == 'd' && flag[1] == 'V')
+ global.ssl_server_verify = SSL_SERVER_VERIFY_NONE;
+ else if (*flag == 'V')
+ arg_mode |= MODE_VERBOSE;
+ else if (*flag == 'd' && flag[1] == 'b')
+ arg_mode |= MODE_FOREGROUND;
+ else if (*flag == 'd' && flag[1] == 'D')
+ arg_mode |= MODE_DIAG;
+ else if (*flag == 'd' && flag[1] == 'W')
+ arg_mode |= MODE_ZERO_WARNING;
+ else if (*flag == 'd' && flag[1] == 'M') {
+ int ret = pool_parse_debugging(flag + 2, &err_msg);
+
+ if (ret <= -1) {
+ if (ret < -1)
+ ha_alert("-dM: %s\n", err_msg);
+ else
+ printf("%s\n", err_msg);
+ ha_free(&err_msg);
+ exit(ret < -1 ? EXIT_FAILURE : 0);
+ } else if (ret == 0) {
+ ha_warning("-dM: %s\n", err_msg);
+ ha_free(&err_msg);
+ }
+ }
+ else if (*flag == 'd' && flag[1] == 'r')
+ global.tune.options |= GTUNE_RESOLVE_DONTFAIL;
+#if defined(HA_HAVE_DUMP_LIBS)
+ else if (*flag == 'd' && flag[1] == 'L')
+ arg_mode |= MODE_DUMP_LIBS;
+#endif
+ else if (*flag == 'd' && flag[1] == 'K') {
+ arg_mode |= MODE_DUMP_KWD;
+ kwd_dump = flag + 2;
+ }
+ else if (*flag == 'd')
+ arg_mode |= MODE_DEBUG;
+ else if (*flag == 'c' && flag[1] == 'c') {
+ arg_mode |= MODE_CHECK_CONDITION;
+ argv++;
+ argc--;
+ check_condition = *argv;
+ }
+ else if (*flag == 'c')
+ arg_mode |= MODE_CHECK;
+ else if (*flag == 'D')
+ arg_mode |= MODE_DAEMON;
+ else if (*flag == 'W' && flag[1] == 's') {
+ arg_mode |= MODE_MWORKER | MODE_FOREGROUND;
+#if defined(USE_SYSTEMD)
+ global.tune.options |= GTUNE_USE_SYSTEMD;
+#else
+ ha_alert("master-worker mode with systemd support (-Ws) requested, but not compiled. Use master-worker mode (-W) if you are not using Type=notify in your unit file or recompile with USE_SYSTEMD=1.\n\n");
+ usage(progname);
+#endif
+ }
+ else if (*flag == 'W')
+ arg_mode |= MODE_MWORKER;
+ else if (*flag == 'q')
+ arg_mode |= MODE_QUIET;
+ else if (*flag == 'x') {
+ if (argc <= 1) {
+ ha_alert("Unix socket path expected with the -x flag\n\n");
+ usage(progname);
+ }
+ if (old_unixsocket)
+ ha_warning("-x option already set, overwriting the value\n");
+ old_unixsocket = argv[1];
+
+ argv++;
+ argc--;
+ }
+ else if (*flag == 'S') {
+ struct wordlist *c;
+
+ if (argc <= 1) {
+ ha_alert("Socket and optional bind parameters expected with the -S flag\n");
+ usage(progname);
+ }
+ if ((c = malloc(sizeof(*c))) == NULL || (c->s = strdup(argv[1])) == NULL) {
+ ha_alert("Cannot allocate memory\n");
+ exit(EXIT_FAILURE);
+ }
+ LIST_INSERT(&mworker_cli_conf, &c->list);
+
+ argv++;
+ argc--;
+ }
+ else if (*flag == 's' && (flag[1] == 'f' || flag[1] == 't')) {
+ /* list of pids to finish ('f') or terminate ('t') */
+
+ if (flag[1] == 'f')
+ oldpids_sig = SIGUSR1; /* finish then exit */
+ else
+ oldpids_sig = SIGTERM; /* terminate immediately */
+ while (argc > 1 && argv[1][0] != '-') {
+ char * endptr = NULL;
+ oldpids = realloc(oldpids, (nb_oldpids + 1) * sizeof(int));
+ if (!oldpids) {
+ ha_alert("Cannot allocate old pid : out of memory.\n");
+ exit(1);
+ }
+ argc--; argv++;
+ errno = 0;
+ oldpids[nb_oldpids] = strtol(*argv, &endptr, 10);
+ if (errno) {
+ ha_alert("-%2s option: failed to parse {%s}: %s\n",
+ flag,
+ *argv, strerror(errno));
+ exit(1);
+ } else if (endptr && strlen(endptr)) {
+ while (isspace((unsigned char)*endptr)) endptr++;
+ if (*endptr != 0) {
+ ha_alert("-%2s option: some bytes unconsumed in PID list {%s}\n",
+ flag, endptr);
+ exit(1);
+ }
+ }
+ if (oldpids[nb_oldpids] <= 0)
+ usage(progname);
+ nb_oldpids++;
+ }
+ }
+ else if (flag[0] == '-' && flag[1] == 0) { /* "--" */
+ /* now that's a cfgfile list */
+ argv++; argc--;
+ while (argc > 0) {
+ if (!list_append_word(&cfg_cfgfiles, *argv, &err_msg)) {
+ ha_alert("Cannot load configuration file/directory %s : %s\n",
+ *argv,
+ err_msg);
+ exit(1);
+ }
+ argv++; argc--;
+ }
+ break;
+ }
+ else { /* >=2 args */
+ argv++; argc--;
+ if (argc == 0)
+ usage(progname);
+
+ switch (*flag) {
+ case 'C' : change_dir = *argv; break;
+ case 'n' : cfg_maxconn = atol(*argv); break;
+ case 'm' : global.rlimit_memmax_all = atol(*argv); break;
+ case 'N' : cfg_maxpconn = atol(*argv); break;
+ case 'L' :
+ free(localpeer);
+ if ((localpeer = strdup(*argv)) == NULL) {
+ ha_alert("Cannot allocate memory for local peer.\n");
+ exit(EXIT_FAILURE);
+ }
+ setenv("HAPROXY_LOCALPEER", localpeer, 1);
+ global.localpeer_cmdline = 1;
+ break;
+ case 'f' :
+ if (!list_append_word(&cfg_cfgfiles, *argv, &err_msg)) {
+ ha_alert("Cannot load configuration file/directory %s : %s\n",
+ *argv,
+ err_msg);
+ exit(1);
+ }
+ break;
+ case 'p' :
+ free(global.pidfile);
+ if ((global.pidfile = strdup(*argv)) == NULL) {
+ ha_alert("Cannot allocate memory for pidfile.\n");
+ exit(EXIT_FAILURE);
+ }
+ break;
+ default: usage(progname);
+ }
+ }
+ }
+ else
+ usage(progname);
+ argv++; argc--;
+ }
+ free(err_msg);
+}
+
+/* call the various keyword dump functions based on the comma-delimited list of
+ * classes in kwd_dump.
+ */
+static void dump_registered_keywords(void)
+{
+ char *end;
+ int all __maybe_unused = 0;
+
+ for (; kwd_dump && *kwd_dump; kwd_dump = end) {
+ end = strchr(kwd_dump, ',');
+ if (end)
+ *(end++) = 0;
+
+ if (strcmp(kwd_dump, "help") == 0) {
+ printf("# List of supported keyword classes:\n");
+ printf("all: list all keywords\n");
+ printf("acl: ACL keywords\n");
+ printf("cfg: configuration keywords\n");
+ printf("cli: CLI keywords\n");
+ printf("cnv: sample converter keywords\n");
+ printf("flt: filter names\n");
+ printf("smp: sample fetch functions\n");
+ printf("svc: service names\n");
+ continue;
+ }
+ else if (strcmp(kwd_dump, "all") == 0) {
+ all = 1;
+ }
+
+ if (all || strcmp(kwd_dump, "acl") == 0) {
+ printf("# List of registered ACL keywords:\n");
+ acl_dump_kwd();
+ }
+
+ if (all || strcmp(kwd_dump, "cfg") == 0) {
+ printf("# List of registered configuration keywords:\n");
+ cfg_dump_registered_keywords();
+ }
+
+ if (all || strcmp(kwd_dump, "cli") == 0) {
+ printf("# List of registered CLI keywords:\n");
+ cli_list_keywords();
+ }
+
+ if (all || strcmp(kwd_dump, "cnv") == 0) {
+ printf("# List of registered sample converter functions:\n");
+ smp_dump_conv_kw();
+ }
+
+ if (all || strcmp(kwd_dump, "flt") == 0) {
+ printf("# List of registered filter names:\n");
+ flt_dump_kws(NULL);
+ }
+
+ if (all || strcmp(kwd_dump, "smp") == 0) {
+ printf("# List of registered sample fetch functions:\n");
+ smp_dump_fetch_kw();
+ }
+
+ if (all || strcmp(kwd_dump, "svc") == 0) {
+ printf("# List of registered service names:\n");
+ list_services(NULL);
+ }
+ }
+}
+
+/* Generate a random cluster-secret in case the setting is not provided in the
+ * configuration. This allows to use features which rely on it albeit with some
+ * limitations.
+ */
+static void generate_random_cluster_secret()
+{
+ /* used as a default random cluster-secret if none defined. */
+ uint64_t rand = ha_random64();
+
+ /* The caller must not overwrite an already defined secret. */
+ BUG_ON(global.cluster_secret);
+
+ global.cluster_secret = malloc(8);
+ if (!global.cluster_secret)
+ return;
+
+ memcpy(global.cluster_secret, &rand, sizeof(rand));
+ global.cluster_secret[7] = '\0';
+}
+
+/*
+ * This function initializes all the necessary variables. It only returns
+ * if everything is OK. If something fails, it exits.
+ */
+static void init(int argc, char **argv)
+{
+ char *progname = global.log_tag.area;
+ int err_code = 0;
+ struct wordlist *wl;
+ struct proxy *px;
+ struct post_check_fct *pcf;
+ struct pre_check_fct *prcf;
+ int ideal_maxconn;
+
+ if (!init_trash_buffers(1)) {
+ ha_alert("failed to initialize trash buffers.\n");
+ exit(1);
+ }
+
+ if (init_acl() != 0)
+ exit(1);
+
+ /* Initialise lua. */
+ hlua_init();
+
+ global.mode |= (arg_mode & (MODE_DAEMON | MODE_MWORKER | MODE_FOREGROUND | MODE_VERBOSE
+ | MODE_QUIET | MODE_CHECK | MODE_DEBUG | MODE_ZERO_WARNING
+ | MODE_DIAG | MODE_CHECK_CONDITION | MODE_DUMP_LIBS | MODE_DUMP_KWD));
+
+ if (getenv("HAPROXY_MWORKER_WAIT_ONLY")) {
+ unsetenv("HAPROXY_MWORKER_WAIT_ONLY");
+ global.mode |= MODE_MWORKER_WAIT;
+ global.mode &= ~MODE_MWORKER;
+ }
+
+ /* set the atexit functions when not doing configuration check */
+ if (!(global.mode & (MODE_CHECK | MODE_CHECK_CONDITION))
+ && (getenv("HAPROXY_MWORKER_REEXEC") != NULL)) {
+
+ if (global.mode & MODE_MWORKER) {
+ atexit_flag = 1;
+ atexit(reexec_on_failure);
+ } else if (global.mode & MODE_MWORKER_WAIT) {
+ atexit_flag = 1;
+ atexit(exit_on_waitmode_failure);
+ }
+ }
+
+ if (change_dir && chdir(change_dir) < 0) {
+ ha_alert("Could not change to directory %s : %s\n", change_dir, strerror(errno));
+ exit(1);
+ }
+
+ usermsgs_clr("config");
+
+ if (global.mode & MODE_CHECK_CONDITION) {
+ int result;
+
+ uint32_t err;
+ const char *errptr;
+ char *errmsg = NULL;
+
+ char *args[MAX_LINE_ARGS+1];
+ int arg = sizeof(args) / sizeof(*args);
+ size_t outlen;
+ char *w;
+
+ if (!check_condition)
+ usage(progname);
+
+ outlen = strlen(check_condition) + 1;
+ err = parse_line(check_condition, check_condition, &outlen, args, &arg,
+ PARSE_OPT_ENV | PARSE_OPT_WORD_EXPAND | PARSE_OPT_DQUOTE | PARSE_OPT_SQUOTE | PARSE_OPT_BKSLASH,
+ &errptr);
+
+ if (err & PARSE_ERR_QUOTE) {
+ ha_alert("Syntax Error in condition: Unmatched quote.\n");
+ exit(2);
+ }
+
+ if (err & PARSE_ERR_HEX) {
+ ha_alert("Syntax Error in condition: Truncated or invalid hexadecimal sequence.\n");
+ exit(2);
+ }
+
+ if (err & (PARSE_ERR_TOOLARGE|PARSE_ERR_OVERLAP)) {
+ ha_alert("Error in condition: Line too long.\n");
+ exit(2);
+ }
+
+ if (err & PARSE_ERR_TOOMANY) {
+ ha_alert("Error in condition: Too many words.\n");
+ exit(2);
+ }
+
+ if (err) {
+ ha_alert("Unhandled error in condition, please report this to the developers.\n");
+ exit(2);
+ }
+
+ /* remerge all words into a single expression */
+ for (w = *args; (w += strlen(w)) < check_condition + outlen - 1; *w = ' ')
+ ;
+
+ result = cfg_eval_condition(args, &errmsg, &errptr);
+
+ if (result < 0) {
+ if (errmsg)
+ ha_alert("Failed to evaluate condition: %s\n", errmsg);
+
+ exit(2);
+ }
+
+ exit(result ? 0 : 1);
+ }
+
+ /* in wait mode, we don't try to read the configuration files */
+ if (!(global.mode & MODE_MWORKER_WAIT)) {
+ char *env_cfgfiles = NULL;
+ int env_err = 0;
+
+ /* handle cfgfiles that are actually directories */
+ cfgfiles_expand_directories();
+
+ if (LIST_ISEMPTY(&cfg_cfgfiles))
+ usage(progname);
+
+
+ list_for_each_entry(wl, &cfg_cfgfiles, list) {
+ int ret;
+
+ if (env_err == 0) {
+ if (!memprintf(&env_cfgfiles, "%s%s%s",
+ (env_cfgfiles ? env_cfgfiles : ""),
+ (env_cfgfiles ? ";" : ""), wl->s))
+ env_err = 1;
+ }
+
+ ret = readcfgfile(wl->s);
+ if (ret == -1) {
+ ha_alert("Could not open configuration file %s : %s\n",
+ wl->s, strerror(errno));
+ free(env_cfgfiles);
+ exit(1);
+ }
+ if (ret & (ERR_ABORT|ERR_FATAL))
+ ha_alert("Error(s) found in configuration file : %s\n", wl->s);
+ err_code |= ret;
+ if (err_code & ERR_ABORT) {
+ free(env_cfgfiles);
+ exit(1);
+ }
+ }
+
+ /* do not try to resolve arguments nor to spot inconsistencies when
+ * the configuration contains fatal errors caused by files not found
+ * or failed memory allocations.
+ */
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Fatal errors found in configuration.\n");
+ free(env_cfgfiles);
+ exit(1);
+ }
+ if (env_err) {
+ ha_alert("Could not allocate memory for HAPROXY_CFGFILES env variable\n");
+ exit(1);
+ }
+ setenv("HAPROXY_CFGFILES", env_cfgfiles, 1);
+ free(env_cfgfiles);
+
+ }
+ if (global.mode & MODE_MWORKER) {
+ struct mworker_proc *tmproc;
+
+ setenv("HAPROXY_MWORKER", "1", 1);
+
+ if (getenv("HAPROXY_MWORKER_REEXEC") == NULL) {
+
+ tmproc = mworker_proc_new();
+ if (!tmproc) {
+ ha_alert("Cannot allocate process structures.\n");
+ exit(EXIT_FAILURE);
+ }
+ tmproc->options |= PROC_O_TYPE_MASTER; /* master */
+ tmproc->pid = pid;
+ tmproc->timestamp = start_date.tv_sec;
+ proc_self = tmproc;
+
+ LIST_APPEND(&proc_list, &tmproc->list);
+ }
+
+ tmproc = mworker_proc_new();
+ if (!tmproc) {
+ ha_alert("Cannot allocate process structures.\n");
+ exit(EXIT_FAILURE);
+ }
+ tmproc->options |= PROC_O_TYPE_WORKER; /* worker */
+
+ if (mworker_cli_sockpair_new(tmproc, 0) < 0) {
+ exit(EXIT_FAILURE);
+ }
+
+ LIST_APPEND(&proc_list, &tmproc->list);
+ }
+
+ if (global.mode & MODE_MWORKER_WAIT) {
+ /* in exec mode, there's always exactly one thread. Failure to
+ * set these ones now will result in nbthread being detected
+ * automatically.
+ */
+ global.nbthread = 1;
+#ifdef USE_THREAD
+ tid_bit = 1;
+ all_threads_mask = 1;
+#endif
+ }
+
+ if (global.mode & (MODE_MWORKER|MODE_MWORKER_WAIT)) {
+ struct wordlist *it, *c;
+
+ /* get the info of the children in the env */
+ if (mworker_env_to_proc_list() < 0) {
+ exit(EXIT_FAILURE);
+ }
+
+ if (!LIST_ISEMPTY(&mworker_cli_conf)) {
+
+ if (mworker_cli_proxy_create() < 0) {
+ ha_alert("Can't create the master's CLI.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ list_for_each_entry_safe(c, it, &mworker_cli_conf, list) {
+
+ if (mworker_cli_proxy_new_listener(c->s) < 0) {
+ ha_alert("Can't create the master's CLI.\n");
+ exit(EXIT_FAILURE);
+ }
+ LIST_DELETE(&c->list);
+ free(c->s);
+ free(c);
+ }
+ }
+ }
+
+ if (!LIST_ISEMPTY(&mworker_cli_conf) && !(arg_mode & MODE_MWORKER)) {
+ ha_warning("a master CLI socket was defined, but master-worker mode (-W) is not enabled.\n");
+ }
+
+ /* destroy unreferenced defaults proxies */
+ proxy_destroy_all_unref_defaults();
+
+ list_for_each_entry(prcf, &pre_check_list, list)
+ err_code |= prcf->fct();
+
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Fatal errors found in configuration.\n");
+ exit(1);
+ }
+
+ err_code |= check_config_validity();
+ for (px = proxies_list; px; px = px->next) {
+ struct server *srv;
+ struct post_proxy_check_fct *ppcf;
+ struct post_server_check_fct *pscf;
+
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ continue;
+
+ list_for_each_entry(pscf, &post_server_check_list, list) {
+ for (srv = px->srv; srv; srv = srv->next)
+ err_code |= pscf->fct(srv);
+ }
+ list_for_each_entry(ppcf, &post_proxy_check_list, list)
+ err_code |= ppcf->fct(px);
+ }
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Fatal errors found in configuration.\n");
+ exit(1);
+ }
+
+ err_code |= pattern_finalize_config();
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Failed to finalize pattern config.\n");
+ exit(1);
+ }
+
+ if (global.rlimit_memmax_all)
+ global.rlimit_memmax = global.rlimit_memmax_all;
+
+#ifdef USE_NS
+ err_code |= netns_init();
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Failed to initialize namespace support.\n");
+ exit(1);
+ }
+#endif
+
+ /* Apply server states */
+ apply_server_state();
+
+ for (px = proxies_list; px; px = px->next)
+ srv_compute_all_admin_states(px);
+
+ /* Apply servers' configured address */
+ err_code |= srv_init_addr();
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Failed to initialize server(s) addr.\n");
+ exit(1);
+ }
+
+ if (warned & WARN_ANY && global.mode & MODE_ZERO_WARNING) {
+ ha_alert("Some warnings were found and 'zero-warning' is set. Aborting.\n");
+ exit(1);
+ }
+
+#if defined(HA_HAVE_DUMP_LIBS)
+ if (global.mode & MODE_DUMP_LIBS) {
+ qfprintf(stdout, "List of loaded object files:\n");
+ chunk_reset(&trash);
+ if (dump_libs(&trash, 0))
+ printf("%s", trash.area);
+ }
+#endif
+
+ if (global.mode & MODE_DUMP_KWD)
+ dump_registered_keywords();
+
+ if (global.mode & MODE_CHECK) {
+ struct peers *pr;
+ struct proxy *px;
+
+ if (warned & WARN_ANY)
+ qfprintf(stdout, "Warnings were found.\n");
+
+ for (pr = cfg_peers; pr; pr = pr->next)
+ if (pr->peers_fe)
+ break;
+
+ for (px = proxies_list; px; px = px->next)
+ if (!(px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) && px->li_all)
+ break;
+
+ if (!px) {
+ /* We may only have log-forward section */
+ for (px = cfg_log_forward; px; px = px->next)
+ if (!(px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) && px->li_all)
+ break;
+ }
+
+ if (pr || px) {
+ /* At least one peer or one listener has been found */
+ qfprintf(stdout, "Configuration file is valid\n");
+ deinit_and_exit(0);
+ }
+ qfprintf(stdout, "Configuration file has no error but will not start (no listener) => exit(2).\n");
+ exit(2);
+ }
+
+ if (global.mode & MODE_DIAG) {
+ cfg_run_diagnostics();
+ }
+
+ /* Initialize the random generators */
+#ifdef USE_OPENSSL
+ /* Initialize SSL random generator. Must be called before chroot for
+ * access to /dev/urandom, and before ha_random_boot() which may use
+ * RAND_bytes().
+ */
+ if (!ssl_initialize_random()) {
+ ha_alert("OpenSSL random data generator initialization failed.\n");
+ exit(EXIT_FAILURE);
+ }
+#endif
+ ha_random_boot(argv); // the argv pointer brings some kernel-fed entropy
+
+ /* now we know the buffer size, we can initialize the channels and buffers */
+ init_buffer();
+
+ list_for_each_entry(pcf, &post_check_list, list) {
+ err_code |= pcf->fct();
+ if (err_code & (ERR_ABORT|ERR_FATAL))
+ exit(1);
+ }
+
+ /* set the default maxconn in the master, but let it be rewritable with -n */
+ if (global.mode & MODE_MWORKER_WAIT)
+ global.maxconn = MASTER_MAXCONN;
+
+ if (cfg_maxconn > 0)
+ global.maxconn = cfg_maxconn;
+
+ if (global.cli_fe)
+ global.maxsock += global.cli_fe->maxconn;
+
+ if (cfg_peers) {
+ /* peers also need to bypass global maxconn */
+ struct peers *p = cfg_peers;
+
+ for (p = cfg_peers; p; p = p->next)
+ if (p->peers_fe)
+ global.maxsock += p->peers_fe->maxconn;
+ }
+
+ /* Now we want to compute the maxconn and possibly maxsslconn values.
+ * It's a bit tricky. Maxconn defaults to the pre-computed value based
+ * on rlim_fd_cur and the number of FDs in use due to the configuration,
+ * and maxsslconn defaults to DEFAULT_MAXSSLCONN. On top of that we can
+ * enforce a lower limit based on memmax.
+ *
+ * If memmax is set, then it depends on which values are set. If
+ * maxsslconn is set, we use memmax to determine how many cleartext
+ * connections may be added, and set maxconn to the sum of the two.
+ * If maxconn is set and not maxsslconn, maxsslconn is computed from
+ * the remaining amount of memory between memmax and the cleartext
+ * connections. If neither are set, then it is considered that all
+ * connections are SSL-capable, and maxconn is computed based on this,
+ * then maxsslconn accordingly. We need to know if SSL is used on the
+ * frontends, backends, or both, because when it's used on both sides,
+ * we need twice the value for maxsslconn, but we only count the
+ * handshake once since it is not performed on the two sides at the
+ * same time (frontend-side is terminated before backend-side begins).
+ * The SSL stack is supposed to have filled ssl_session_cost and
+ * ssl_handshake_cost during its initialization. In any case, if
+ * SYSTEM_MAXCONN is set, we still enforce it as an upper limit for
+ * maxconn in order to protect the system.
+ */
+ ideal_maxconn = compute_ideal_maxconn();
+
+ if (!global.rlimit_memmax) {
+ if (global.maxconn == 0) {
+ global.maxconn = ideal_maxconn;
+ if (global.mode & (MODE_VERBOSE|MODE_DEBUG))
+ fprintf(stderr, "Note: setting global.maxconn to %d.\n", global.maxconn);
+ }
+ }
+#ifdef USE_OPENSSL
+ else if (!global.maxconn && !global.maxsslconn &&
+ (global.ssl_used_frontend || global.ssl_used_backend)) {
+ /* memmax is set, compute everything automatically. Here we want
+ * to ensure that all SSL connections will be served. We take
+ * care of the number of sides where SSL is used, and consider
+ * the worst case : SSL used on both sides and doing a handshake
+ * simultaneously. Note that we can't have more than maxconn
+ * handshakes at a time by definition, so for the worst case of
+ * two SSL conns per connection, we count a single handshake.
+ */
+ int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
+ int64_t mem = global.rlimit_memmax * 1048576ULL;
+ int retried = 0;
+
+ mem -= global.tune.sslcachesize * 200ULL; // about 200 bytes per SSL cache entry
+ mem -= global.maxzlibmem;
+ mem = mem * MEM_USABLE_RATIO;
+
+ /* Principle: we test once to set maxconn according to the free
+ * memory. If it results in values the system rejects, we try a
+ * second time by respecting rlim_fd_max. If it fails again, we
+ * go back to the initial value and will let the final code
+ * dealing with rlimit report the error. That's up to 3 attempts.
+ */
+ do {
+ global.maxconn = mem /
+ ((STREAM_MAX_COST + 2 * global.tune.bufsize) + // stream + 2 buffers per stream
+ sides * global.ssl_session_max_cost + // SSL buffers, one per side
+ global.ssl_handshake_max_cost); // 1 handshake per connection max
+
+ if (retried == 1)
+ global.maxconn = MIN(global.maxconn, ideal_maxconn);
+ global.maxconn = round_2dig(global.maxconn);
+#ifdef SYSTEM_MAXCONN
+ if (global.maxconn > SYSTEM_MAXCONN)
+ global.maxconn = SYSTEM_MAXCONN;
+#endif /* SYSTEM_MAXCONN */
+ global.maxsslconn = sides * global.maxconn;
+
+ if (check_if_maxsock_permitted(compute_ideal_maxsock(global.maxconn)))
+ break;
+ } while (retried++ < 2);
+
+ if (global.mode & (MODE_VERBOSE|MODE_DEBUG))
+ fprintf(stderr, "Note: setting global.maxconn to %d and global.maxsslconn to %d.\n",
+ global.maxconn, global.maxsslconn);
+ }
+ else if (!global.maxsslconn &&
+ (global.ssl_used_frontend || global.ssl_used_backend)) {
+ /* memmax and maxconn are known, compute maxsslconn automatically.
+ * maxsslconn being forced, we don't know how many of it will be
+ * on each side if both sides are being used. The worst case is
+ * when all connections use only one SSL instance because
+ * handshakes may be on two sides at the same time.
+ */
+ int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
+ int64_t mem = global.rlimit_memmax * 1048576ULL;
+ int64_t sslmem;
+
+ mem -= global.tune.sslcachesize * 200ULL; // about 200 bytes per SSL cache entry
+ mem -= global.maxzlibmem;
+ mem = mem * MEM_USABLE_RATIO;
+
+ sslmem = mem - global.maxconn * (int64_t)(STREAM_MAX_COST + 2 * global.tune.bufsize);
+ global.maxsslconn = sslmem / (global.ssl_session_max_cost + global.ssl_handshake_max_cost);
+ global.maxsslconn = round_2dig(global.maxsslconn);
+
+ if (sslmem <= 0 || global.maxsslconn < sides) {
+ ha_alert("Cannot compute the automatic maxsslconn because global.maxconn is already too "
+ "high for the global.memmax value (%d MB). The absolute maximum possible value "
+ "without SSL is %d, but %d was found and SSL is in use.\n",
+ global.rlimit_memmax,
+ (int)(mem / (STREAM_MAX_COST + 2 * global.tune.bufsize)),
+ global.maxconn);
+ exit(1);
+ }
+
+ if (global.maxsslconn > sides * global.maxconn)
+ global.maxsslconn = sides * global.maxconn;
+
+ if (global.mode & (MODE_VERBOSE|MODE_DEBUG))
+ fprintf(stderr, "Note: setting global.maxsslconn to %d\n", global.maxsslconn);
+ }
+#endif
+ else if (!global.maxconn) {
+ /* memmax and maxsslconn are known/unused, compute maxconn automatically */
+ int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
+ int64_t mem = global.rlimit_memmax * 1048576ULL;
+ int64_t clearmem;
+ int retried = 0;
+
+ if (global.ssl_used_frontend || global.ssl_used_backend)
+ mem -= global.tune.sslcachesize * 200ULL; // about 200 bytes per SSL cache entry
+
+ mem -= global.maxzlibmem;
+ mem = mem * MEM_USABLE_RATIO;
+
+ clearmem = mem;
+ if (sides)
+ clearmem -= (global.ssl_session_max_cost + global.ssl_handshake_max_cost) * (int64_t)global.maxsslconn;
+
+ /* Principle: we test once to set maxconn according to the free
+ * memory. If it results in values the system rejects, we try a
+ * second time by respecting rlim_fd_max. If it fails again, we
+ * go back to the initial value and will let the final code
+ * dealing with rlimit report the error. That's up to 3 attempts.
+ */
+ do {
+ global.maxconn = clearmem / (STREAM_MAX_COST + 2 * global.tune.bufsize);
+ if (retried == 1)
+ global.maxconn = MIN(global.maxconn, ideal_maxconn);
+ global.maxconn = round_2dig(global.maxconn);
+#ifdef SYSTEM_MAXCONN
+ if (global.maxconn > SYSTEM_MAXCONN)
+ global.maxconn = SYSTEM_MAXCONN;
+#endif /* SYSTEM_MAXCONN */
+
+ if (clearmem <= 0 || !global.maxconn) {
+ ha_alert("Cannot compute the automatic maxconn because global.maxsslconn is already too "
+ "high for the global.memmax value (%d MB). The absolute maximum possible value "
+ "is %d, but %d was found.\n",
+ global.rlimit_memmax,
+ (int)(mem / (global.ssl_session_max_cost + global.ssl_handshake_max_cost)),
+ global.maxsslconn);
+ exit(1);
+ }
+
+ if (check_if_maxsock_permitted(compute_ideal_maxsock(global.maxconn)))
+ break;
+ } while (retried++ < 2);
+
+ if (global.mode & (MODE_VERBOSE|MODE_DEBUG)) {
+ if (sides && global.maxsslconn > sides * global.maxconn) {
+ fprintf(stderr, "Note: global.maxsslconn is forced to %d which causes global.maxconn "
+ "to be limited to %d. Better reduce global.maxsslconn to get more "
+ "room for extra connections.\n", global.maxsslconn, global.maxconn);
+ }
+ fprintf(stderr, "Note: setting global.maxconn to %d\n", global.maxconn);
+ }
+ }
+
+ global.maxsock = compute_ideal_maxsock(global.maxconn);
+ global.hardmaxconn = global.maxconn;
+ if (!global.maxpipes)
+ global.maxpipes = compute_ideal_maxpipes();
+
+ /* update connection pool thresholds */
+ global.tune.pool_low_count = ((long long)global.maxsock * global.tune.pool_low_ratio + 99) / 100;
+ global.tune.pool_high_count = ((long long)global.maxsock * global.tune.pool_high_ratio + 99) / 100;
+
+ proxy_adjust_all_maxconn();
+
+ if (global.tune.maxpollevents <= 0)
+ global.tune.maxpollevents = MAX_POLL_EVENTS;
+
+ if (global.tune.runqueue_depth <= 0) {
+ /* tests on various thread counts from 1 to 64 have shown an
+ * optimal queue depth following roughly 1/sqrt(threads).
+ */
+ int s = my_flsl(global.nbthread);
+ s += (global.nbthread / s); // roughly twice the sqrt.
+ global.tune.runqueue_depth = RUNQUEUE_DEPTH * 2 / s;
+ }
+
+ if (global.tune.recv_enough == 0)
+ global.tune.recv_enough = MIN_RECV_AT_ONCE_ENOUGH;
+
+ if (global.tune.maxrewrite >= global.tune.bufsize / 2)
+ global.tune.maxrewrite = global.tune.bufsize / 2;
+
+ usermsgs_clr(NULL);
+
+ if (arg_mode & (MODE_DEBUG | MODE_FOREGROUND)) {
+ /* command line debug mode inhibits configuration mode */
+ global.mode &= ~(MODE_DAEMON | MODE_QUIET);
+ global.mode |= (arg_mode & (MODE_DEBUG | MODE_FOREGROUND));
+ }
+
+ if (arg_mode & MODE_DAEMON) {
+ /* command line daemon mode inhibits foreground and debug modes mode */
+ global.mode &= ~(MODE_DEBUG | MODE_FOREGROUND);
+ global.mode |= arg_mode & MODE_DAEMON;
+ }
+
+ global.mode |= (arg_mode & (MODE_QUIET | MODE_VERBOSE));
+
+ if ((global.mode & MODE_DEBUG) && (global.mode & (MODE_DAEMON | MODE_QUIET))) {
+ ha_warning("<debug> mode incompatible with <quiet> and <daemon>. Keeping <debug> only.\n");
+ global.mode &= ~(MODE_DAEMON | MODE_QUIET);
+ }
+
+ if (global.nbthread < 1)
+ global.nbthread = 1;
+
+ /* Realloc trash buffers because global.tune.bufsize may have changed */
+ if (!init_trash_buffers(0)) {
+ ha_alert("failed to initialize trash buffers.\n");
+ exit(1);
+ }
+
+ if (!init_log_buffers()) {
+ ha_alert("failed to initialize log buffers.\n");
+ exit(1);
+ }
+
+ if (!global.cluster_secret)
+ generate_random_cluster_secret();
+
+ /*
+ * Note: we could register external pollers here.
+ * Built-in pollers have been registered before main().
+ */
+
+ if (!(global.tune.options & GTUNE_USE_KQUEUE))
+ disable_poller("kqueue");
+
+ if (!(global.tune.options & GTUNE_USE_EVPORTS))
+ disable_poller("evports");
+
+ if (!(global.tune.options & GTUNE_USE_EPOLL))
+ disable_poller("epoll");
+
+ if (!(global.tune.options & GTUNE_USE_POLL))
+ disable_poller("poll");
+
+ if (!(global.tune.options & GTUNE_USE_SELECT))
+ disable_poller("select");
+
+ /* Note: we could disable any poller by name here */
+
+ if (global.mode & (MODE_VERBOSE|MODE_DEBUG)) {
+ list_pollers(stderr);
+ fprintf(stderr, "\n");
+ list_filters(stderr);
+ }
+
+ if (!init_pollers()) {
+ ha_alert("No polling mechanism available.\n"
+ " It is likely that haproxy was built with TARGET=generic and that FD_SETSIZE\n"
+ " is too low on this platform to support maxconn and the number of listeners\n"
+ " and servers. You should rebuild haproxy specifying your system using TARGET=\n"
+ " in order to support other polling systems (poll, epoll, kqueue) or reduce the\n"
+ " global maxconn setting to accommodate the system's limitation. For reference,\n"
+ " FD_SETSIZE=%d on this system, global.maxconn=%d resulting in a maximum of\n"
+ " %d file descriptors. You should thus reduce global.maxconn by %d. Also,\n"
+ " check build settings using 'haproxy -vv'.\n\n",
+ FD_SETSIZE, global.maxconn, global.maxsock, (global.maxsock + 1 - FD_SETSIZE) / 2);
+ exit(1);
+ }
+ if (global.mode & (MODE_VERBOSE|MODE_DEBUG)) {
+ printf("Using %s() as the polling mechanism.\n", cur_poller.name);
+ }
+
+ if (!global.node)
+ global.node = strdup(hostname);
+
+ /* stop disabled proxies */
+ for (px = proxies_list; px; px = px->next) {
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ stop_proxy(px);
+ }
+
+ if (!hlua_post_init())
+ exit(1);
+}
+
+void deinit(void)
+{
+ struct proxy *p = proxies_list, *p0;
+ struct wordlist *wl, *wlb;
+ struct uri_auth *uap, *ua = NULL;
+ struct logsrv *log, *logb;
+ struct build_opts_str *bol, *bolb;
+ struct post_deinit_fct *pdf, *pdfb;
+ struct proxy_deinit_fct *pxdf, *pxdfb;
+ struct server_deinit_fct *srvdf, *srvdfb;
+ struct per_thread_init_fct *tif, *tifb;
+ struct per_thread_deinit_fct *tdf, *tdfb;
+ struct per_thread_alloc_fct *taf, *tafb;
+ struct per_thread_free_fct *tff, *tffb;
+ struct post_server_check_fct *pscf, *pscfb;
+ struct post_check_fct *pcf, *pcfb;
+ struct post_proxy_check_fct *ppcf, *ppcfb;
+ struct pre_check_fct *prcf, *prcfb;
+ struct cfg_postparser *pprs, *pprsb;
+ int cur_fd;
+
+ /* At this point the listeners state is weird:
+ * - most listeners are still bound and referenced in their protocol
+ * - some might be zombies that are not in their proto anymore, but
+ * still appear in their proxy's listeners with a valid FD.
+ * - some might be stopped and still appear in their proxy as FD #-1
+ * - among all of them, some might be inherited hence shared and we're
+ * not allowed to pause them or whatever, we must just close them.
+ * - finally some are not listeners (pipes, logs, stdout, etc) and
+ * must be left intact.
+ *
+ * The safe way to proceed is to unbind (and close) whatever is not yet
+ * unbound so that no more receiver/listener remains alive. Then close
+ * remaining listener FDs, which correspond to zombie listeners (those
+ * belonging to disabled proxies that were in another process).
+ * objt_listener() would be cleaner here but not converted yet.
+ */
+ protocol_unbind_all();
+
+ for (cur_fd = 0; cur_fd < global.maxsock; cur_fd++) {
+ if (!fdtab || !fdtab[cur_fd].owner)
+ continue;
+
+ if (fdtab[cur_fd].iocb == &sock_accept_iocb) {
+ struct listener *l = fdtab[cur_fd].owner;
+
+ BUG_ON(l->state != LI_INIT);
+ unbind_listener(l);
+ }
+ }
+
+ deinit_signals();
+ while (p) {
+ /* build a list of unique uri_auths */
+ if (!ua)
+ ua = p->uri_auth;
+ else {
+ /* check if p->uri_auth is unique */
+ for (uap = ua; uap; uap=uap->next)
+ if (uap == p->uri_auth)
+ break;
+
+ if (!uap && p->uri_auth) {
+ /* add it, if it is */
+ p->uri_auth->next = ua;
+ ua = p->uri_auth;
+ }
+ }
+
+ p0 = p;
+ p = p->next;
+ free_proxy(p0);
+ }/* end while(p) */
+
+ /* destroy all referenced defaults proxies */
+ proxy_destroy_all_unref_defaults();
+
+ while (ua) {
+ struct stat_scope *scope, *scopep;
+
+ uap = ua;
+ ua = ua->next;
+
+ free(uap->uri_prefix);
+ free(uap->auth_realm);
+ free(uap->node);
+ free(uap->desc);
+
+ userlist_free(uap->userlist);
+ free_act_rules(&uap->http_req_rules);
+
+ scope = uap->scope;
+ while (scope) {
+ scopep = scope;
+ scope = scope->next;
+
+ free(scopep->px_id);
+ free(scopep);
+ }
+
+ free(uap);
+ }
+
+ userlist_free(userlist);
+
+ cfg_unregister_sections();
+
+ deinit_log_buffers();
+
+ list_for_each_entry(pdf, &post_deinit_list, list)
+ pdf->fct();
+
+ ha_free(&global.log_send_hostname);
+ chunk_destroy(&global.log_tag);
+ ha_free(&global.chroot);
+ ha_free(&global.cluster_secret);
+ ha_free(&global.pidfile);
+ ha_free(&global.node);
+ ha_free(&global.desc);
+ ha_free(&oldpids);
+ ha_free(&old_argv);
+ ha_free(&localpeer);
+ ha_free(&global.server_state_base);
+ ha_free(&global.server_state_file);
+ task_destroy(idle_conn_task);
+ idle_conn_task = NULL;
+
+ list_for_each_entry_safe(log, logb, &global.logsrvs, list) {
+ LIST_DEL_INIT(&log->list);
+ free_logsrv(log);
+ }
+
+ list_for_each_entry_safe(wl, wlb, &cfg_cfgfiles, list) {
+ free(wl->s);
+ LIST_DELETE(&wl->list);
+ free(wl);
+ }
+
+ list_for_each_entry_safe(bol, bolb, &build_opts_list, list) {
+ if (bol->must_free)
+ free((void *)bol->str);
+ LIST_DELETE(&bol->list);
+ free(bol);
+ }
+
+ list_for_each_entry_safe(pxdf, pxdfb, &proxy_deinit_list, list) {
+ LIST_DELETE(&pxdf->list);
+ free(pxdf);
+ }
+
+ list_for_each_entry_safe(pdf, pdfb, &post_deinit_list, list) {
+ LIST_DELETE(&pdf->list);
+ free(pdf);
+ }
+
+ list_for_each_entry_safe(srvdf, srvdfb, &server_deinit_list, list) {
+ LIST_DELETE(&srvdf->list);
+ free(srvdf);
+ }
+
+ list_for_each_entry_safe(pcf, pcfb, &post_check_list, list) {
+ LIST_DELETE(&pcf->list);
+ free(pcf);
+ }
+
+ list_for_each_entry_safe(pscf, pscfb, &post_server_check_list, list) {
+ LIST_DELETE(&pscf->list);
+ free(pscf);
+ }
+
+ list_for_each_entry_safe(ppcf, ppcfb, &post_proxy_check_list, list) {
+ LIST_DELETE(&ppcf->list);
+ free(ppcf);
+ }
+
+ list_for_each_entry_safe(prcf, prcfb, &pre_check_list, list) {
+ LIST_DELETE(&prcf->list);
+ free(prcf);
+ }
+
+ list_for_each_entry_safe(tif, tifb, &per_thread_init_list, list) {
+ LIST_DELETE(&tif->list);
+ free(tif);
+ }
+
+ list_for_each_entry_safe(tdf, tdfb, &per_thread_deinit_list, list) {
+ LIST_DELETE(&tdf->list);
+ free(tdf);
+ }
+
+ list_for_each_entry_safe(taf, tafb, &per_thread_alloc_list, list) {
+ LIST_DELETE(&taf->list);
+ free(taf);
+ }
+
+ list_for_each_entry_safe(tff, tffb, &per_thread_free_list, list) {
+ LIST_DELETE(&tff->list);
+ free(tff);
+ }
+
+ list_for_each_entry_safe(pprs, pprsb, &postparsers, list) {
+ LIST_DELETE(&pprs->list);
+ free(pprs);
+ }
+
+ vars_prune(&proc_vars, NULL, NULL);
+ pool_destroy_all();
+ deinit_pollers();
+} /* end deinit() */
+
+__attribute__((noreturn)) void deinit_and_exit(int status)
+{
+ global.mode |= MODE_STOPPING;
+ deinit();
+ exit(status);
+}
+
+/* Runs the polling loop */
+void run_poll_loop()
+{
+ int next, wake;
+
+ clock_update_date(0,1);
+ while (1) {
+ wake_expired_tasks();
+
+ /* check if we caught some signals and process them in the
+ first thread */
+ if (signal_queue_len && tid == 0) {
+ activity[tid].wake_signal++;
+ signal_process_queue();
+ }
+
+ /* Process a few tasks */
+ process_runnable_tasks();
+
+ /* also stop if we failed to cleanly stop all tasks */
+ if (killed > 1)
+ break;
+
+ /* expire immediately if events or signals are pending */
+ wake = 1;
+ if (thread_has_tasks())
+ activity[tid].wake_tasks++;
+ else {
+ _HA_ATOMIC_OR(&sleeping_thread_mask, tid_bit);
+ __ha_barrier_atomic_store();
+ if (thread_has_tasks()) {
+ activity[tid].wake_tasks++;
+ _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit);
+ } else if (signal_queue_len) {
+ /* this check is required to avoid
+ * a race with wakeup on signals using wake_threads() */
+ _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit);
+ } else
+ wake = 0;
+ }
+
+ if (!wake) {
+ int i;
+
+ if (stopping) {
+ /* stop muxes before acknowledging stopping */
+ if (!(stopping_thread_mask & tid_bit)) {
+ task_wakeup(mux_stopping_data[tid].task, TASK_WOKEN_OTHER);
+ wake = 1;
+ }
+
+ if (_HA_ATOMIC_OR_FETCH(&stopping_thread_mask, tid_bit) == tid_bit) {
+ /* notify all threads that stopping was just set */
+ for (i = 0; i < global.nbthread; i++)
+ if (((all_threads_mask & ~stopping_thread_mask) >> i) & 1)
+ wake_thread(i);
+ }
+ }
+
+ /* stop when there's nothing left to do */
+ if ((jobs - unstoppable_jobs) == 0 &&
+ (stopping_thread_mask & all_threads_mask) == all_threads_mask) {
+ /* wake all threads waiting on jobs==0 */
+ for (i = 0; i < global.nbthread; i++)
+ if (((all_threads_mask & ~tid_bit) >> i) & 1)
+ wake_thread(i);
+ break;
+ }
+ }
+
+ /* If we have to sleep, measure how long */
+ next = wake ? TICK_ETERNITY : next_timer_expiry();
+
+ /* The poller will ensure it returns around <next> */
+ cur_poller.poll(&cur_poller, next, wake);
+
+ activity[tid].loops++;
+ }
+}
+
+static void *run_thread_poll_loop(void *data)
+{
+ struct per_thread_alloc_fct *ptaf;
+ struct per_thread_init_fct *ptif;
+ struct per_thread_deinit_fct *ptdf;
+ struct per_thread_free_fct *ptff;
+ static int init_left = 0;
+ __decl_thread(static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER);
+ __decl_thread(static pthread_cond_t init_cond = PTHREAD_COND_INITIALIZER);
+
+ ha_set_thread(data);
+ set_thread_cpu_affinity();
+ clock_set_local_source();
+
+ /* Now, initialize one thread init at a time. This is better since
+ * some init code is a bit tricky and may release global resources
+ * after reallocating them locally. This will also ensure there is
+ * no race on file descriptors allocation.
+ */
+#ifdef USE_THREAD
+ pthread_mutex_lock(&init_mutex);
+#endif
+ /* The first thread must set the number of threads left */
+ if (!init_left)
+ init_left = global.nbthread;
+ init_left--;
+
+ clock_init_thread_date();
+
+ /* per-thread alloc calls performed here are not allowed to snoop on
+ * other threads, so they are free to initialize at their own rhythm
+ * as long as they act as if they were alone. None of them may rely
+ * on resources initialized by the other ones.
+ */
+ list_for_each_entry(ptaf, &per_thread_alloc_list, list) {
+ if (!ptaf->fct()) {
+ ha_alert("failed to allocate resources for thread %u.\n", tid);
+#ifdef USE_THREAD
+ pthread_mutex_unlock(&init_mutex);
+#endif
+ exit(1);
+ }
+ }
+
+ /* per-thread init calls performed here are not allowed to snoop on
+ * other threads, so they are free to initialize at their own rhythm
+ * as long as they act as if they were alone.
+ */
+ list_for_each_entry(ptif, &per_thread_init_list, list) {
+ if (!ptif->fct()) {
+ ha_alert("failed to initialize thread %u.\n", tid);
+#ifdef USE_THREAD
+ pthread_mutex_unlock(&init_mutex);
+#endif
+ exit(1);
+ }
+ }
+
+ /* enabling protocols will result in fd_insert() calls to be performed,
+ * we want all threads to have already allocated their local fd tables
+ * before doing so, thus only the last thread does it.
+ */
+ if (init_left == 0)
+ protocol_enable_all();
+
+#ifdef USE_THREAD
+ pthread_cond_broadcast(&init_cond);
+ pthread_mutex_unlock(&init_mutex);
+
+ /* now wait for other threads to finish starting */
+ pthread_mutex_lock(&init_mutex);
+ while (init_left)
+ pthread_cond_wait(&init_cond, &init_mutex);
+ pthread_mutex_unlock(&init_mutex);
+#endif
+
+#if defined(PR_SET_NO_NEW_PRIVS) && defined(USE_PRCTL)
+ /* Let's refrain from using setuid executables. This way the impact of
+ * an eventual vulnerability in a library remains limited. It may
+ * impact external checks but who cares about them anyway ? In the
+ * worst case it's possible to disable the option. Obviously we do this
+ * in workers only. We can't hard-fail on this one as it really is
+ * implementation dependent though we're interested in feedback, hence
+ * the warning.
+ */
+ if (!(global.tune.options & GTUNE_INSECURE_SETUID) && !master) {
+ static int warn_fail;
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1 && !_HA_ATOMIC_FETCH_ADD(&warn_fail, 1)) {
+ ha_warning("Failed to disable setuid, please report to developers with detailed "
+ "information about your operating system. You can silence this warning "
+ "by adding 'insecure-setuid-wanted' in the 'global' section.\n");
+ }
+ }
+#endif
+
+#if defined(RLIMIT_NPROC)
+ /* all threads have started, it's now time to prevent any new thread
+ * or process from starting. Obviously we do this in workers only. We
+ * can't hard-fail on this one as it really is implementation dependent
+ * though we're interested in feedback, hence the warning.
+ */
+ if (!(global.tune.options & GTUNE_INSECURE_FORK) && !master) {
+ struct rlimit limit = { .rlim_cur = 0, .rlim_max = 0 };
+ static int warn_fail;
+
+ if (setrlimit(RLIMIT_NPROC, &limit) == -1 && !_HA_ATOMIC_FETCH_ADD(&warn_fail, 1)) {
+ ha_warning("Failed to disable forks, please report to developers with detailed "
+ "information about your operating system. You can silence this warning "
+ "by adding 'insecure-fork-wanted' in the 'global' section.\n");
+ }
+ }
+#endif
+ run_poll_loop();
+
+ list_for_each_entry(ptdf, &per_thread_deinit_list, list)
+ ptdf->fct();
+
+ list_for_each_entry(ptff, &per_thread_free_list, list)
+ ptff->fct();
+
+#ifdef USE_THREAD
+ _HA_ATOMIC_AND(&all_threads_mask, ~tid_bit);
+ if (tid > 0)
+ pthread_exit(NULL);
+#endif
+ return NULL;
+}
+
+/* set uid/gid depending on global settings */
+static void set_identity(const char *program_name)
+{
+ if (global.gid) {
+ if (getgroups(0, NULL) > 0 && setgroups(0, NULL) == -1)
+ ha_warning("[%s.main()] Failed to drop supplementary groups. Using 'gid'/'group'"
+ " without 'uid'/'user' is generally useless.\n", program_name);
+
+ if (setgid(global.gid) == -1) {
+ ha_alert("[%s.main()] Cannot set gid %d.\n", program_name, global.gid);
+ protocol_unbind_all();
+ exit(1);
+ }
+ }
+
+ if (global.uid && setuid(global.uid) == -1) {
+ ha_alert("[%s.main()] Cannot set uid %d.\n", program_name, global.uid);
+ protocol_unbind_all();
+ exit(1);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int err, retry;
+ struct rlimit limit;
+ int pidfd = -1;
+ int intovf = (unsigned char)argc + 1; /* let the compiler know it's strictly positive */
+
+ /* Catch broken toolchains */
+ if (sizeof(long) != sizeof(void *) || (intovf + 0x7FFFFFFF >= intovf)) {
+ const char *msg;
+
+ if (sizeof(long) != sizeof(void *))
+ /* Apparently MingW64 was not made for us and can also break openssl */
+ msg = "The compiler this program was built with uses unsupported integral type sizes.\n"
+ "Most likely it follows the unsupported LLP64 model. Never try to link HAProxy\n"
+ "against libraries built with that compiler either! Please only use a compiler\n"
+ "producing ILP32 or LP64 programs for both programs and libraries.\n";
+ else if (intovf + 0x7FFFFFFF >= intovf)
+ /* Catch forced CFLAGS that miss 2-complement integer overflow */
+ msg = "The source code was miscompiled by the compiler, which usually indicates that\n"
+ "some of the CFLAGS needed to work around overzealous compiler optimizations\n"
+ "were overwritten at build time. Please do not force CFLAGS, and read Makefile\n"
+ "and INSTALL files to decide on the best way to pass your local build options.\n";
+ else
+ msg = "Bug in the compiler bug detection code, please report it to developers!\n";
+
+ fprintf(stderr,
+ "FATAL ERROR: invalid code detected -- cannot go further, please recompile!\n"
+ "%s"
+ "\nBuild options :"
+#ifdef BUILD_TARGET
+ "\n TARGET = " BUILD_TARGET
+#endif
+#ifdef BUILD_CPU
+ "\n CPU = " BUILD_CPU
+#endif
+#ifdef BUILD_CC
+ "\n CC = " BUILD_CC
+#endif
+#ifdef BUILD_CFLAGS
+ "\n CFLAGS = " BUILD_CFLAGS
+#endif
+#ifdef BUILD_OPTIONS
+ "\n OPTIONS = " BUILD_OPTIONS
+#endif
+#ifdef BUILD_DEBUG
+ "\n DEBUG = " BUILD_DEBUG
+#endif
+ "\n\n", msg);
+
+ return 1;
+ }
+
+ setvbuf(stdout, NULL, _IONBF, 0);
+
+ /* take a copy of initial limits before we possibly change them */
+ getrlimit(RLIMIT_NOFILE, &limit);
+
+ if (limit.rlim_max == RLIM_INFINITY)
+ limit.rlim_max = limit.rlim_cur;
+ rlim_fd_cur_at_boot = limit.rlim_cur;
+ rlim_fd_max_at_boot = limit.rlim_max;
+
+ /* process all initcalls in order of potential dependency */
+ RUN_INITCALLS(STG_PREPARE);
+ RUN_INITCALLS(STG_LOCK);
+ RUN_INITCALLS(STG_REGISTER);
+
+ /* now's time to initialize early boot variables */
+ init_early(argc, argv);
+
+ /* handles argument parsing */
+ init_args(argc, argv);
+
+ RUN_INITCALLS(STG_ALLOC);
+ RUN_INITCALLS(STG_POOL);
+ RUN_INITCALLS(STG_INIT);
+
+ /* this is the late init where the config is parsed */
+ init(argc, argv);
+
+ signal_register_fct(SIGQUIT, dump, SIGQUIT);
+ signal_register_fct(SIGUSR1, sig_soft_stop, SIGUSR1);
+ signal_register_fct(SIGHUP, sig_dump_state, SIGHUP);
+ signal_register_fct(SIGUSR2, NULL, 0);
+
+ /* Always catch SIGPIPE even on platforms which define MSG_NOSIGNAL.
+ * Some recent FreeBSD setups report broken pipes, and MSG_NOSIGNAL
+ * was defined there, so let's stay on the safe side.
+ */
+ signal_register_fct(SIGPIPE, NULL, 0);
+
+ /* ulimits */
+ if (!global.rlimit_nofile)
+ global.rlimit_nofile = global.maxsock;
+
+ if (global.rlimit_nofile) {
+ limit.rlim_cur = global.rlimit_nofile;
+ limit.rlim_max = MAX(rlim_fd_max_at_boot, limit.rlim_cur);
+
+ if ((global.fd_hard_limit && limit.rlim_cur > global.fd_hard_limit) ||
+ raise_rlim_nofile(NULL, &limit) != 0) {
+ getrlimit(RLIMIT_NOFILE, &limit);
+ if (global.fd_hard_limit && limit.rlim_cur > global.fd_hard_limit)
+ limit.rlim_cur = global.fd_hard_limit;
+
+ if (global.tune.options & GTUNE_STRICT_LIMITS) {
+ ha_alert("[%s.main()] Cannot raise FD limit to %d, limit is %d.\n",
+ argv[0], global.rlimit_nofile, (int)limit.rlim_cur);
+ exit(1);
+ }
+ else {
+ /* try to set it to the max possible at least */
+ limit.rlim_cur = limit.rlim_max;
+ if (global.fd_hard_limit && limit.rlim_cur > global.fd_hard_limit)
+ limit.rlim_cur = global.fd_hard_limit;
+
+ if (raise_rlim_nofile(&limit, &limit) == 0)
+ getrlimit(RLIMIT_NOFILE, &limit);
+
+ ha_warning("[%s.main()] Cannot raise FD limit to %d, limit is %d.\n",
+ argv[0], global.rlimit_nofile, (int)limit.rlim_cur);
+ global.rlimit_nofile = limit.rlim_cur;
+ }
+ }
+ }
+
+ if (global.rlimit_memmax) {
+ limit.rlim_cur = limit.rlim_max =
+ global.rlimit_memmax * 1048576ULL;
+#ifdef RLIMIT_AS
+ if (setrlimit(RLIMIT_AS, &limit) == -1) {
+ if (global.tune.options & GTUNE_STRICT_LIMITS) {
+ ha_alert("[%s.main()] Cannot fix MEM limit to %d megs.\n",
+ argv[0], global.rlimit_memmax);
+ exit(1);
+ }
+ else
+ ha_warning("[%s.main()] Cannot fix MEM limit to %d megs.\n",
+ argv[0], global.rlimit_memmax);
+ }
+#else
+ if (setrlimit(RLIMIT_DATA, &limit) == -1) {
+ if (global.tune.options & GTUNE_STRICT_LIMITS) {
+ ha_alert("[%s.main()] Cannot fix MEM limit to %d megs.\n",
+ argv[0], global.rlimit_memmax);
+ exit(1);
+ }
+ else
+ ha_warning("[%s.main()] Cannot fix MEM limit to %d megs.\n",
+ argv[0], global.rlimit_memmax);
+ }
+#endif
+ }
+
+ /* Try to get the listeners FD from the previous process using
+ * _getsocks on the stat socket, it must never been done in wait mode
+ * and check mode
+ */
+ if (old_unixsocket &&
+ !(global.mode & (MODE_MWORKER_WAIT|MODE_CHECK|MODE_CHECK_CONDITION))) {
+ if (strcmp("/dev/null", old_unixsocket) != 0) {
+ if (sock_get_old_sockets(old_unixsocket) != 0) {
+ ha_alert("Failed to get the sockets from the old process!\n");
+ if (!(global.mode & MODE_MWORKER))
+ exit(1);
+ }
+ }
+ }
+
+ /* We will loop at most 100 times with 10 ms delay each time.
+ * That's at most 1 second. We only send a signal to old pids
+ * if we cannot grab at least one port.
+ */
+ retry = MAX_START_RETRIES;
+ err = ERR_NONE;
+ while (retry >= 0) {
+ struct timeval w;
+ err = protocol_bind_all(retry == 0 || nb_oldpids == 0);
+ /* exit the loop on no error or fatal error */
+ if ((err & (ERR_RETRYABLE|ERR_FATAL)) != ERR_RETRYABLE)
+ break;
+ if (nb_oldpids == 0 || retry == 0)
+ break;
+
+ /* FIXME-20060514: Solaris and OpenBSD do not support shutdown() on
+ * listening sockets. So on those platforms, it would be wiser to
+ * simply send SIGUSR1, which will not be undoable.
+ */
+ if (tell_old_pids(SIGTTOU) == 0) {
+ /* no need to wait if we can't contact old pids */
+ retry = 0;
+ continue;
+ }
+ /* give some time to old processes to stop listening */
+ w.tv_sec = 0;
+ w.tv_usec = 10*1000;
+ select(0, NULL, NULL, NULL, &w);
+ retry--;
+ }
+
+ /* Note: protocol_bind_all() sends an alert when it fails. */
+ if ((err & ~ERR_WARN) != ERR_NONE) {
+ ha_alert("[%s.main()] Some protocols failed to start their listeners! Exiting.\n", argv[0]);
+ if (retry != MAX_START_RETRIES && nb_oldpids) {
+ protocol_unbind_all(); /* cleanup everything we can */
+ tell_old_pids(SIGTTIN);
+ }
+ exit(1);
+ }
+
+ if (!(global.mode & MODE_MWORKER_WAIT) && listeners == 0) {
+ ha_alert("[%s.main()] No enabled listener found (check for 'bind' directives) ! Exiting.\n", argv[0]);
+ /* Note: we don't have to send anything to the old pids because we
+ * never stopped them. */
+ exit(1);
+ }
+
+ /* Ok, all listeners should now be bound, close any leftover sockets
+ * the previous process gave us, we don't need them anymore
+ */
+ sock_drop_unused_old_sockets();
+
+ /* prepare pause/play signals */
+ signal_register_fct(SIGTTOU, sig_pause, SIGTTOU);
+ signal_register_fct(SIGTTIN, sig_listen, SIGTTIN);
+
+ /* MODE_QUIET can inhibit alerts and warnings below this line */
+
+ if (getenv("HAPROXY_MWORKER_REEXEC") != NULL) {
+ /* either stdin/out/err are already closed or should stay as they are. */
+ if ((global.mode & MODE_DAEMON)) {
+ /* daemon mode re-executing, stdin/stdout/stderr are already closed so keep quiet */
+ global.mode &= ~MODE_VERBOSE;
+ global.mode |= MODE_QUIET; /* ensure that we won't say anything from now */
+ }
+ } else {
+ if ((global.mode & MODE_QUIET) && !(global.mode & MODE_VERBOSE)) {
+ /* detach from the tty */
+ stdio_quiet(-1);
+ }
+ }
+
+ /* open log & pid files before the chroot */
+ if ((global.mode & MODE_DAEMON || global.mode & MODE_MWORKER) &&
+ !(global.mode & MODE_MWORKER_WAIT) && global.pidfile != NULL) {
+ unlink(global.pidfile);
+ pidfd = open(global.pidfile, O_CREAT | O_WRONLY | O_TRUNC, 0644);
+ if (pidfd < 0) {
+ ha_alert("[%s.main()] Cannot create pidfile %s\n", argv[0], global.pidfile);
+ if (nb_oldpids)
+ tell_old_pids(SIGTTIN);
+ protocol_unbind_all();
+ exit(1);
+ }
+ }
+
+ if ((global.last_checks & LSTCHK_NETADM) && global.uid) {
+ ha_alert("[%s.main()] Some configuration options require full privileges, so global.uid cannot be changed.\n"
+ "", argv[0]);
+ protocol_unbind_all();
+ exit(1);
+ }
+
+ /* If the user is not root, we'll still let them try the configuration
+ * but we inform them that unexpected behaviour may occur.
+ */
+ if ((global.last_checks & LSTCHK_NETADM) && getuid())
+ ha_warning("[%s.main()] Some options which require full privileges"
+ " might not work well.\n"
+ "", argv[0]);
+
+ if ((global.mode & (MODE_MWORKER|MODE_DAEMON)) == 0) {
+
+ /* chroot if needed */
+ if (global.chroot != NULL) {
+ if (chroot(global.chroot) == -1 || chdir("/") == -1) {
+ ha_alert("[%s.main()] Cannot chroot(%s).\n", argv[0], global.chroot);
+ if (nb_oldpids)
+ tell_old_pids(SIGTTIN);
+ protocol_unbind_all();
+ exit(1);
+ }
+ }
+ }
+
+ if (nb_oldpids && !(global.mode & MODE_MWORKER_WAIT))
+ nb_oldpids = tell_old_pids(oldpids_sig);
+
+ /* send a SIGTERM to workers who have a too high reloads number */
+ if ((global.mode & MODE_MWORKER) && !(global.mode & MODE_MWORKER_WAIT))
+ mworker_kill_max_reloads(SIGTERM);
+
+ /* Note that any error at this stage will be fatal because we will not
+ * be able to restart the old pids.
+ */
+
+ if ((global.mode & (MODE_MWORKER | MODE_DAEMON)) == 0)
+ set_identity(argv[0]);
+
+ /* check ulimits */
+ limit.rlim_cur = limit.rlim_max = 0;
+ getrlimit(RLIMIT_NOFILE, &limit);
+ if (limit.rlim_cur < global.maxsock) {
+ if (global.tune.options & GTUNE_STRICT_LIMITS) {
+ ha_alert("[%s.main()] FD limit (%d) too low for maxconn=%d/maxsock=%d. "
+ "Please raise 'ulimit-n' to %d or more to avoid any trouble.\n",
+ argv[0], (int)limit.rlim_cur, global.maxconn, global.maxsock,
+ global.maxsock);
+ exit(1);
+ }
+ else
+ ha_alert("[%s.main()] FD limit (%d) too low for maxconn=%d/maxsock=%d. "
+ "Please raise 'ulimit-n' to %d or more to avoid any trouble.\n",
+ argv[0], (int)limit.rlim_cur, global.maxconn, global.maxsock,
+ global.maxsock);
+ }
+
+ if (global.mode & (MODE_DAEMON | MODE_MWORKER | MODE_MWORKER_WAIT)) {
+ int ret = 0;
+ int in_parent = 0;
+ int devnullfd = -1;
+
+ /*
+ * if daemon + mworker: must fork here to let a master
+ * process live in background before forking children
+ */
+
+ if ((getenv("HAPROXY_MWORKER_REEXEC") == NULL)
+ && (global.mode & MODE_MWORKER)
+ && (global.mode & MODE_DAEMON)) {
+ ret = fork();
+ if (ret < 0) {
+ ha_alert("[%s.main()] Cannot fork.\n", argv[0]);
+ protocol_unbind_all();
+ exit(1); /* there has been an error */
+ } else if (ret > 0) { /* parent leave to daemonize */
+ exit(0);
+ } else /* change the process group ID in the child (master process) */
+ setsid();
+ }
+
+
+ /* if in master-worker mode, write the PID of the father */
+ if (global.mode & MODE_MWORKER) {
+ char pidstr[100];
+ snprintf(pidstr, sizeof(pidstr), "%d\n", (int)getpid());
+ if (pidfd >= 0)
+ DISGUISE(write(pidfd, pidstr, strlen(pidstr)));
+ }
+
+ /* the father launches the required number of processes */
+ if (!(global.mode & MODE_MWORKER_WAIT)) {
+ if (global.mode & MODE_MWORKER)
+ mworker_ext_launch_all();
+
+ ret = fork();
+ if (ret < 0) {
+ ha_alert("[%s.main()] Cannot fork.\n", argv[0]);
+ protocol_unbind_all();
+ exit(1); /* there has been an error */
+ }
+ else if (ret == 0) { /* child breaks here */
+ /* This one must not be exported, it's internal! */
+ unsetenv("HAPROXY_MWORKER_REEXEC");
+ ha_random_jump96(1);
+ }
+ else { /* parent here */
+ in_parent = 1;
+
+ if (pidfd >= 0 && !(global.mode & MODE_MWORKER)) {
+ char pidstr[100];
+ snprintf(pidstr, sizeof(pidstr), "%d\n", ret);
+ DISGUISE(write(pidfd, pidstr, strlen(pidstr)));
+ }
+ if (global.mode & MODE_MWORKER) {
+ struct mworker_proc *child;
+
+ ha_notice("New worker (%d) forked\n", ret);
+ /* find the right mworker_proc */
+ list_for_each_entry(child, &proc_list, list) {
+ if (child->reloads == 0 &&
+ child->options & PROC_O_TYPE_WORKER &&
+ child->pid == -1) {
+ child->timestamp = date.tv_sec;
+ child->pid = ret;
+ child->version = strdup(haproxy_version);
+ break;
+ }
+ }
+ }
+ }
+
+ } else {
+ /* wait mode */
+ in_parent = 1;
+ }
+
+#ifdef USE_CPU_AFFINITY
+ if (!in_parent && ha_cpuset_count(&cpu_map.proc)) { /* only do this if the process has a CPU map */
+
+#if defined(CPUSET_USE_CPUSET) || defined(__DragonFly__)
+ struct hap_cpuset *set = &cpu_map.proc;
+ sched_setaffinity(0, sizeof(set->cpuset), &set->cpuset);
+#elif defined(__FreeBSD__)
+ struct hap_cpuset *set = &cpu_map.proc;
+ ret = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(set->cpuset), &set->cpuset);
+#endif
+ }
+#endif
+ /* close the pidfile both in children and father */
+ if (pidfd >= 0) {
+ //lseek(pidfd, 0, SEEK_SET); /* debug: emulate eglibc bug */
+ close(pidfd);
+ }
+
+ /* We won't ever use this anymore */
+ ha_free(&global.pidfile);
+
+ if (in_parent) {
+ if (global.mode & (MODE_MWORKER|MODE_MWORKER_WAIT)) {
+ master = 1;
+
+ if ((!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) &&
+ (global.mode & MODE_DAEMON)) {
+ /* detach from the tty, this is required to properly daemonize. */
+ if ((getenv("HAPROXY_MWORKER_REEXEC") == NULL))
+ stdio_quiet(-1);
+
+ global.mode &= ~MODE_VERBOSE;
+ global.mode |= MODE_QUIET; /* ensure that we won't say anything from now */
+ }
+
+ if (global.mode & MODE_MWORKER_WAIT) {
+ /* only the wait mode handles the master CLI */
+ mworker_loop();
+ } else {
+
+ /* if not in wait mode, reload in wait mode to free the memory */
+ ha_notice("Loading success.\n");
+ proc_self->failedreloads = 0; /* reset the number of failure */
+ mworker_reexec_waitmode();
+ }
+ /* should never get there */
+ exit(EXIT_FAILURE);
+ }
+#if defined(USE_OPENSSL) && !defined(OPENSSL_NO_DH)
+ ssl_free_dh();
+#endif
+ exit(0); /* parent must leave */
+ }
+
+ /* child must never use the atexit function */
+ atexit_flag = 0;
+
+ /* close useless master sockets */
+ if (global.mode & MODE_MWORKER) {
+ struct mworker_proc *child, *it;
+ master = 0;
+
+ mworker_cli_proxy_stop();
+
+ /* free proc struct of other processes */
+ list_for_each_entry_safe(child, it, &proc_list, list) {
+ /* close the FD of the master side for all
+ * workers, we don't need to close the worker
+ * side of other workers since it's done with
+ * the bind_proc */
+ if (child->ipc_fd[0] >= 0) {
+ close(child->ipc_fd[0]);
+ child->ipc_fd[0] = -1;
+ }
+ if (child->options & PROC_O_TYPE_WORKER &&
+ child->reloads == 0 &&
+ child->pid == -1) {
+ /* keep this struct if this is our pid */
+ proc_self = child;
+ continue;
+ }
+ LIST_DELETE(&child->list);
+ mworker_free_child(child);
+ child = NULL;
+ }
+ }
+
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) {
+ devnullfd = open("/dev/null", O_RDWR, 0);
+ if (devnullfd < 0) {
+ ha_alert("Cannot open /dev/null\n");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ /* Must chroot and setgid/setuid in the children */
+ /* chroot if needed */
+ if (global.chroot != NULL) {
+ if (chroot(global.chroot) == -1 || chdir("/") == -1) {
+ ha_alert("[%s.main()] Cannot chroot(%s).\n", argv[0], global.chroot);
+ if (nb_oldpids)
+ tell_old_pids(SIGTTIN);
+ protocol_unbind_all();
+ exit(1);
+ }
+ }
+
+ ha_free(&global.chroot);
+ set_identity(argv[0]);
+
+ /* pass through every cli socket, and check if it's bound to
+ * the current process and if it exposes listeners sockets.
+ * Caution: the GTUNE_SOCKET_TRANSFER is now set after the fork.
+ * */
+
+ if (global.cli_fe) {
+ struct bind_conf *bind_conf;
+
+ list_for_each_entry(bind_conf, &global.cli_fe->conf.bind, by_fe) {
+ if (bind_conf->level & ACCESS_FD_LISTENERS) {
+ global.tune.options |= GTUNE_SOCKET_TRANSFER;
+ break;
+ }
+ }
+ }
+
+ /*
+ * This is only done in daemon mode because we might want the
+ * logs on stdout in mworker mode. If we're NOT in QUIET mode,
+ * we should now close the 3 first FDs to ensure that we can
+ * detach from the TTY. We MUST NOT do it in other cases since
+ * it would have already be done, and 0-2 would have been
+ * affected to listening sockets
+ */
+ if ((global.mode & MODE_DAEMON) &&
+ (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))) {
+ /* detach from the tty */
+ stdio_quiet(devnullfd);
+ global.mode &= ~MODE_VERBOSE;
+ global.mode |= MODE_QUIET; /* ensure that we won't say anything from now */
+ }
+ pid = getpid(); /* update child's pid */
+ if (!(global.mode & MODE_MWORKER)) /* in mworker mode we don't want a new pgid for the children */
+ setsid();
+ fork_poller();
+ }
+
+ /* try our best to re-enable core dumps depending on system capabilities.
+ * What is addressed here :
+ * - remove file size limits
+ * - remove core size limits
+ * - mark the process dumpable again if it lost it due to user/group
+ */
+ if (global.tune.options & GTUNE_SET_DUMPABLE) {
+ limit.rlim_cur = limit.rlim_max = RLIM_INFINITY;
+
+#if defined(RLIMIT_FSIZE)
+ if (setrlimit(RLIMIT_FSIZE, &limit) == -1) {
+ if (global.tune.options & GTUNE_STRICT_LIMITS) {
+ ha_alert("[%s.main()] Failed to set the raise the maximum "
+ "file size.\n", argv[0]);
+ exit(1);
+ }
+ else
+ ha_warning("[%s.main()] Failed to set the raise the maximum "
+ "file size.\n", argv[0]);
+ }
+#endif
+
+#if defined(RLIMIT_CORE)
+ if (setrlimit(RLIMIT_CORE, &limit) == -1) {
+ if (global.tune.options & GTUNE_STRICT_LIMITS) {
+ ha_alert("[%s.main()] Failed to set the raise the core "
+ "dump size.\n", argv[0]);
+ exit(1);
+ }
+ else
+ ha_warning("[%s.main()] Failed to set the raise the core "
+ "dump size.\n", argv[0]);
+ }
+#endif
+
+#if defined(USE_PRCTL)
+ if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1)
+ ha_warning("[%s.main()] Failed to set the dumpable flag, "
+ "no core will be dumped.\n", argv[0]);
+#elif defined(USE_PROCCTL)
+ {
+ int traceable = PROC_TRACE_CTL_ENABLE;
+ if (procctl(P_PID, getpid(), PROC_TRACE_CTL, &traceable) == -1)
+ ha_warning("[%s.main()] Failed to set the traceable flag, "
+ "no core will be dumped.\n", argv[0]);
+ }
+#endif
+ }
+
+ global.mode &= ~MODE_STARTING;
+ reset_usermsgs_ctx();
+
+ /* start threads 2 and above */
+ setup_extra_threads(&run_thread_poll_loop);
+
+ /* when multithreading we need to let only the thread 0 handle the signals */
+ haproxy_unblock_signals();
+
+ /* Finally, start the poll loop for the first thread */
+ run_thread_poll_loop(&ha_thread_info[0]);
+
+ /* wait for all threads to terminate */
+ wait_for_threads_completion();
+
+ deinit_and_exit(0);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/hash.c b/src/hash.c
new file mode 100644
index 0000000..a20b90c
--- /dev/null
+++ b/src/hash.c
@@ -0,0 +1,189 @@
+/*
+ * Hash function implementation
+ *
+ * See mailing list thread on "Consistent hashing alternative to sdbm"
+ * http://marc.info/?l=haproxy&m=138213693909219
+ *
+ * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+
+#include <haproxy/hash.h>
+
+
+unsigned int hash_wt6(const void *input, int len)
+{
+ const unsigned char *key = input;
+ unsigned h0 = 0xa53c965aUL;
+ unsigned h1 = 0x5ca6953aUL;
+ unsigned step0 = 6;
+ unsigned step1 = 18;
+
+ for (; len > 0; len--) {
+ unsigned int t;
+
+ t = *key;
+ key++;
+
+ h0 = ~(h0 ^ t);
+ h1 = ~(h1 + t);
+
+ t = (h1 << step0) | (h1 >> (32-step0));
+ h1 = (h0 << step1) | (h0 >> (32-step1));
+ h0 = t;
+
+ t = ((h0 >> 16) ^ h1) & 0xffff;
+ step0 = t & 0x1F;
+ step1 = t >> 11;
+ }
+ return h0 ^ h1;
+}
+
+unsigned int hash_djb2(const void *input, int len)
+{
+ const unsigned char *key = input;
+ unsigned int hash = 5381;
+
+ /* the hash unrolled eight times */
+ for (; len >= 8; len -= 8) {
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ }
+ switch (len) {
+ case 7: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+ case 6: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+ case 5: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+ case 4: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+ case 3: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+ case 2: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+ case 1: hash = ((hash << 5) + hash) + *key++; break;
+ default: /* case 0: */ break;
+ }
+ return hash;
+}
+
+unsigned int hash_sdbm(const void *input, int len)
+{
+ const unsigned char *key = input;
+ unsigned int hash = 0;
+ int c;
+
+ while (len--) {
+ c = *key++;
+ hash = c + (hash << 6) + (hash << 16) - hash;
+ }
+
+ return hash;
+}
+
+/* Small yet efficient CRC32 calculation loosely inspired from crc32b found
+ * here : http://www.hackersdelight.org/hdcodetxt/crc.c.txt
+ * The magic value represents the polynom with one bit per exponent. Much
+ * faster table-based versions exist but are pointless for our usage here,
+ * this hash already sustains gigabit speed which is far faster than what
+ * we'd ever need. Better preserve the CPU's cache instead.
+ */
+unsigned int hash_crc32(const void *input, int len)
+{
+ const unsigned char *key = input;
+ unsigned int hash;
+ int bit;
+
+ hash = ~0;
+ while (len--) {
+ hash ^= *key++;
+ for (bit = 0; bit < 8; bit++)
+ hash = (hash >> 1) ^ ((hash & 1) ? 0xedb88320 : 0);
+ }
+ return ~hash;
+}
+
+/* CRC32c poly 0x11EDC6F41 (RFC4960, Appendix B [8].) */
+static const uint32_t crctable[256] = {
+ 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L,
+ 0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL,
+ 0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL,
+ 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L,
+ 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,
+ 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L,
+ 0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L,
+ 0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL,
+ 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL,
+ 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,
+ 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L,
+ 0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL,
+ 0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L,
+ 0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL,
+ 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,
+ 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L,
+ 0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L,
+ 0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L,
+ 0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L,
+ 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,
+ 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L,
+ 0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L,
+ 0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L,
+ 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L,
+ 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,
+ 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L,
+ 0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L,
+ 0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L,
+ 0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L,
+ 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,
+ 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L,
+ 0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L,
+ 0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL,
+ 0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L,
+ 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
+ 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL,
+ 0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L,
+ 0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL,
+ 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL,
+ 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,
+ 0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L,
+ 0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL,
+ 0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL,
+ 0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L,
+ 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
+ 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L,
+ 0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L,
+ 0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL,
+ 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L,
+ 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,
+ 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL,
+ 0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L,
+ 0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL,
+ 0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L,
+ 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,
+ 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL,
+ 0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL,
+ 0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L,
+ 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L,
+ 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,
+ 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L,
+ 0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL,
+ 0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL,
+ 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L
+};
+
+uint32_t hash_crc32c(const void *input, int len)
+{
+ const unsigned char *buf = input;
+ uint32_t crc = 0xffffffff;
+ while (len-- > 0) {
+ crc = (crc >> 8) ^ crctable[(crc ^ (*buf++)) & 0xff];
+ }
+ return (crc ^ 0xffffffff);
+}
diff --git a/src/hlua.c b/src/hlua.c
new file mode 100644
index 0000000..1781477
--- /dev/null
+++ b/src/hlua.c
@@ -0,0 +1,12681 @@
+/*
+ * Lua unsafe core engine
+ *
+ * Copyright 2015-2016 Thierry Fournier <tfournier@arpalert.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <ctype.h>
+#include <setjmp.h>
+
+#include <lauxlib.h>
+#include <lua.h>
+#include <lualib.h>
+
+#if !defined(LUA_VERSION_NUM) || LUA_VERSION_NUM < 503
+#error "Requires Lua 5.3 or later."
+#endif
+
+#include <import/ebpttree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/arg.h>
+#include <haproxy/auth.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/cli.h>
+#include <haproxy/clock.h>
+#include <haproxy/connection.h>
+#include <haproxy/filters.h>
+#include <haproxy/h1.h>
+#include <haproxy/hlua.h>
+#include <haproxy/hlua_fcn.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_client.h>
+#include <haproxy/http_fetch.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/log.h>
+#include <haproxy/map.h>
+#include <haproxy/obj_type.h>
+#include <haproxy/pattern.h>
+#include <haproxy/payload.h>
+#include <haproxy/proxy.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/session.h>
+#include <haproxy/ssl_ckch.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/thread.h>
+#include <haproxy/tools.h>
+#include <haproxy/vars.h>
+#include <haproxy/xref.h>
+
+
+/* Lua uses longjmp to perform yield or throwing errors. This
+ * macro is used only for identifying the function that can
+ * not return because a longjmp is executed.
+ * __LJMP marks a prototype of hlua file that can use longjmp.
+ * WILL_LJMP() marks an lua function that will use longjmp.
+ * MAY_LJMP() marks an lua function that may use longjmp.
+ */
+#define __LJMP
+#define WILL_LJMP(func) do { func; my_unreachable(); } while(0)
+#define MAY_LJMP(func) func
+
+/* This couple of function executes securely some Lua calls outside of
+ * the lua runtime environment. Each Lua call can return a longjmp
+ * if it encounter a memory error.
+ *
+ * Lua documentation extract:
+ *
+ * If an error happens outside any protected environment, Lua calls
+ * a panic function (see lua_atpanic) and then calls abort, thus
+ * exiting the host application. Your panic function can avoid this
+ * exit by never returning (e.g., doing a long jump to your own
+ * recovery point outside Lua).
+ *
+ * The panic function runs as if it were a message handler (see
+ * #2.3); in particular, the error message is at the top of the
+ * stack. However, there is no guarantee about stack space. To push
+ * anything on the stack, the panic function must first check the
+ * available space (see #4.2).
+ *
+ * We must check all the Lua entry point. This includes:
+ * - The include/proto/hlua.h exported functions
+ * - the task wrapper function
+ * - The action wrapper function
+ * - The converters wrapper function
+ * - The sample-fetch wrapper functions
+ *
+ * It is tolerated that the initialisation function returns an abort.
+ * Before each Lua abort, an error message is written on stderr.
+ *
+ * The macro SET_SAFE_LJMP initialise the longjmp. The Macro
+ * RESET_SAFE_LJMP reset the longjmp. These function must be macro
+ * because they must be exists in the program stack when the longjmp
+ * is called.
+ *
+ * Note that the Lua processing is not really thread safe. It provides
+ * heavy system which consists to add our own lock function in the Lua
+ * code and recompile the library. This system will probably not accepted
+ * by maintainers of various distribs.
+ *
+ * Our main execution point of the Lua is the function lua_resume(). A
+ * quick looking on the Lua sources displays a lua_lock() a the start
+ * of function and a lua_unlock() at the end of the function. So I
+ * conclude that the Lua thread safe mode just perform a mutex around
+ * all execution. So I prefer to do this in the HAProxy code, it will be
+ * easier for distro maintainers.
+ *
+ * Note that the HAProxy lua functions rounded by the macro SET_SAFE_LJMP
+ * and RESET_SAFE_LJMP manipulates the Lua stack, so it will be careful
+ * to set mutex around these functions.
+ */
+__decl_spinlock(hlua_global_lock);
+THREAD_LOCAL jmp_buf safe_ljmp_env;
+static int hlua_panic_safe(lua_State *L) { return 0; }
+static int hlua_panic_ljmp(lua_State *L) { WILL_LJMP(longjmp(safe_ljmp_env, 1)); return 0; }
+
+/* This is the chained list of struct hlua_function referenced
+ * for haproxy action, sample-fetches, converters, cli and
+ * applet bindings. It is used for a post-initialisation control.
+ */
+static struct list referenced_functions = LIST_HEAD_INIT(referenced_functions);
+
+/* This variable is used only during initialization to identify the Lua state
+ * currently being initialized. 0 is the common lua state, 1 to n are the Lua
+ * states dedicated to each thread (in this case hlua_state_id==tid+1).
+ */
+static int hlua_state_id;
+
+/* This is a NULL-terminated list of lua file which are referenced to load per thread */
+static char **per_thread_load = NULL;
+
+lua_State *hlua_init_state(int thread_id);
+
+/* This function takes the Lua global lock. Keep this function's visibility
+ * global so that it can appear in stack dumps and performance profiles!
+ */
+void lua_take_global_lock()
+{
+ HA_SPIN_LOCK(LUA_LOCK, &hlua_global_lock);
+}
+
+static inline void lua_drop_global_lock()
+{
+ HA_SPIN_UNLOCK(LUA_LOCK, &hlua_global_lock);
+}
+
+#define SET_SAFE_LJMP_L(__L, __HLUA) \
+ ({ \
+ int ret; \
+ if ((__HLUA)->state_id == 0) \
+ lua_take_global_lock(); \
+ if (setjmp(safe_ljmp_env) != 0) { \
+ lua_atpanic(__L, hlua_panic_safe); \
+ ret = 0; \
+ if ((__HLUA)->state_id == 0) \
+ lua_drop_global_lock(); \
+ } else { \
+ lua_atpanic(__L, hlua_panic_ljmp); \
+ ret = 1; \
+ } \
+ ret; \
+ })
+
+/* If we are the last function catching Lua errors, we
+ * must reset the panic function.
+ */
+#define RESET_SAFE_LJMP_L(__L, __HLUA) \
+ do { \
+ lua_atpanic(__L, hlua_panic_safe); \
+ if ((__HLUA)->state_id == 0) \
+ lua_drop_global_lock(); \
+ } while(0)
+
+#define SET_SAFE_LJMP(__HLUA) \
+ SET_SAFE_LJMP_L((__HLUA)->T, __HLUA)
+
+#define RESET_SAFE_LJMP(__HLUA) \
+ RESET_SAFE_LJMP_L((__HLUA)->T, __HLUA)
+
+#define SET_SAFE_LJMP_PARENT(__HLUA) \
+ SET_SAFE_LJMP_L(hlua_states[(__HLUA)->state_id], __HLUA)
+
+#define RESET_SAFE_LJMP_PARENT(__HLUA) \
+ RESET_SAFE_LJMP_L(hlua_states[(__HLUA)->state_id], __HLUA)
+
+/* Applet status flags */
+#define APPLET_DONE 0x01 /* applet processing is done. */
+/* unused: 0x02 */
+#define APPLET_HDR_SENT 0x04 /* Response header sent. */
+/* unused: 0x08, 0x10 */
+#define APPLET_HTTP11 0x20 /* Last chunk sent. */
+#define APPLET_RSP_SENT 0x40 /* The response was fully sent */
+
+/* The main Lua execution context. The 0 index is the
+ * common state shared by all threads.
+ */
+static lua_State *hlua_states[MAX_THREADS + 1];
+
+#define HLUA_FLT_CB_FINAL 0x00000001
+#define HLUA_FLT_CB_RETVAL 0x00000002
+#define HLUA_FLT_CB_ARG_CHN 0x00000004
+#define HLUA_FLT_CB_ARG_HTTP_MSG 0x00000008
+
+#define HLUA_FLT_CTX_FL_PAYLOAD 0x00000001
+
+struct hlua_reg_filter {
+ char *name;
+ int flt_ref[MAX_THREADS + 1];
+ int fun_ref[MAX_THREADS + 1];
+ struct list l;
+};
+
+struct hlua_flt_config {
+ struct hlua_reg_filter *reg;
+ int ref[MAX_THREADS + 1];
+ char **args;
+};
+
+struct hlua_flt_ctx {
+ int ref; /* ref to the filter lua object */
+ struct hlua *hlua[2]; /* lua runtime context (0: request, 1: response) */
+ unsigned int cur_off[2]; /* current offset (0: request, 1: response) */
+ unsigned int cur_len[2]; /* current forwardable length (0: request, 1: response) */
+ unsigned int flags; /* HLUA_FLT_CTX_FL_* */
+};
+
+/* appctx context used by the cosockets */
+struct hlua_csk_ctx {
+ int connected;
+ struct xref xref; /* cross reference with the Lua object owner. */
+ struct list wake_on_read;
+ struct list wake_on_write;
+ struct appctx *appctx;
+ int die;
+};
+
+/* appctx context used by TCP services */
+struct hlua_tcp_ctx {
+ struct hlua *hlua;
+ int flags;
+ struct task *task;
+};
+
+/* appctx context used by HTTP services */
+struct hlua_http_ctx {
+ struct hlua *hlua;
+ int left_bytes; /* The max amount of bytes that we can read. */
+ int flags;
+ int status;
+ const char *reason;
+ struct task *task;
+};
+
+/* used by registered CLI keywords */
+struct hlua_cli_ctx {
+ struct hlua *hlua;
+ struct task *task;
+ struct hlua_function *fcn;
+};
+
+DECLARE_STATIC_POOL(pool_head_hlua_flt_ctx, "hlua_flt_ctx", sizeof(struct hlua_flt_ctx));
+
+static int hlua_filter_from_payload(struct filter *filter);
+
+/* This is the chained list of struct hlua_flt referenced
+ * for haproxy filters. It is used for a post-initialisation control.
+ */
+static struct list referenced_filters = LIST_HEAD_INIT(referenced_filters);
+
+
+/* This is the memory pool containing struct lua for applets
+ * (including cli).
+ */
+DECLARE_STATIC_POOL(pool_head_hlua, "hlua", sizeof(struct hlua));
+
+/* Used for Socket connection. */
+static struct proxy *socket_proxy;
+static struct server *socket_tcp;
+#ifdef USE_OPENSSL
+static struct server *socket_ssl;
+#endif
+
+/* List head of the function called at the initialisation time. */
+struct list hlua_init_functions[MAX_THREADS + 1];
+
+/* The following variables contains the reference of the different
+ * Lua classes. These references are useful for identify metadata
+ * associated with an object.
+ */
+static int class_txn_ref;
+static int class_socket_ref;
+static int class_channel_ref;
+static int class_fetches_ref;
+static int class_converters_ref;
+static int class_http_ref;
+static int class_http_msg_ref;
+static int class_httpclient_ref;
+static int class_map_ref;
+static int class_applet_tcp_ref;
+static int class_applet_http_ref;
+static int class_txn_reply_ref;
+
+/* Global Lua execution timeout. By default Lua, execution linked
+ * with stream (actions, sample-fetches and converters) have a
+ * short timeout. Lua linked with tasks doesn't have a timeout
+ * because a task may remain alive during all the haproxy execution.
+ */
+static unsigned int hlua_timeout_session = 4000; /* session timeout. */
+static unsigned int hlua_timeout_task = TICK_ETERNITY; /* task timeout. */
+static unsigned int hlua_timeout_applet = 4000; /* applet timeout. */
+
+/* Interrupts the Lua processing each "hlua_nb_instruction" instructions.
+ * it is used for preventing infinite loops.
+ *
+ * I test the scheer with an infinite loop containing one incrementation
+ * and one test. I run this loop between 10 seconds, I raise a ceil of
+ * 710M loops from one interrupt each 9000 instructions, so I fix the value
+ * to one interrupt each 10 000 instructions.
+ *
+ * configured | Number of
+ * instructions | loops executed
+ * between two | in milions
+ * forced yields |
+ * ---------------+---------------
+ * 10 | 160
+ * 500 | 670
+ * 1000 | 680
+ * 5000 | 700
+ * 7000 | 700
+ * 8000 | 700
+ * 9000 | 710 <- ceil
+ * 10000 | 710
+ * 100000 | 710
+ * 1000000 | 710
+ *
+ */
+static unsigned int hlua_nb_instruction = 10000;
+
+/* Descriptor for the memory allocation state. The limit is pre-initialised to
+ * 0 until it is replaced by "tune.lua.maxmem" during the config parsing, or it
+ * is replaced with ~0 during post_init after everything was loaded. This way
+ * it is guaranteed that if limit is ~0 the boot is complete and that if it's
+ * zero it's not yet limited and proper accounting is required.
+ */
+struct hlua_mem_allocator {
+ size_t allocated;
+ size_t limit;
+};
+
+static struct hlua_mem_allocator hlua_global_allocator THREAD_ALIGNED(64);
+
+/* These functions converts types between HAProxy internal args or
+ * sample and LUA types. Another function permits to check if the
+ * LUA stack contains arguments according with an required ARG_T
+ * format.
+ */
+static int hlua_arg2lua(lua_State *L, const struct arg *arg);
+static int hlua_lua2arg(lua_State *L, int ud, struct arg *arg);
+__LJMP static int hlua_lua2arg_check(lua_State *L, int first, struct arg *argp,
+ uint64_t mask, struct proxy *p);
+static int hlua_smp2lua(lua_State *L, struct sample *smp);
+static int hlua_smp2lua_str(lua_State *L, struct sample *smp);
+static int hlua_lua2smp(lua_State *L, int ud, struct sample *smp);
+
+__LJMP static int hlua_http_get_headers(lua_State *L, struct http_msg *msg);
+
+struct prepend_path {
+ struct list l;
+ char *type;
+ char *path;
+};
+
+static struct list prepend_path_list = LIST_HEAD_INIT(prepend_path_list);
+
+#define SEND_ERR(__be, __fmt, __args...) \
+ do { \
+ send_log(__be, LOG_ERR, __fmt, ## __args); \
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) \
+ ha_alert(__fmt, ## __args); \
+ } while (0)
+
+static inline struct hlua_function *new_hlua_function()
+{
+ struct hlua_function *fcn;
+ int i;
+
+ fcn = calloc(1, sizeof(*fcn));
+ if (!fcn)
+ return NULL;
+ LIST_APPEND(&referenced_functions, &fcn->l);
+ for (i = 0; i < MAX_THREADS + 1; i++)
+ fcn->function_ref[i] = -1;
+ return fcn;
+}
+
+static inline void release_hlua_function(struct hlua_function *fcn)
+{
+ if (!fcn)
+ return;
+ if (fcn->name)
+ ha_free(&fcn->name);
+ LIST_DELETE(&fcn->l);
+ ha_free(&fcn);
+}
+
+/* If the common state is set, the stack id is 0, otherwise it is the tid + 1 */
+static inline int fcn_ref_to_stack_id(struct hlua_function *fcn)
+{
+ if (fcn->function_ref[0] == -1)
+ return tid + 1;
+ return 0;
+}
+
+/* Create a new registered filter. Only its name is filled */
+static inline struct hlua_reg_filter *new_hlua_reg_filter(const char *name)
+{
+ struct hlua_reg_filter *reg_flt;
+ int i;
+
+ reg_flt = calloc(1, sizeof(*reg_flt));
+ if (!reg_flt)
+ return NULL;
+ reg_flt->name = strdup(name);
+ if (!reg_flt->name) {
+ free(reg_flt);
+ return NULL;
+ }
+ LIST_APPEND(&referenced_filters, &reg_flt->l);
+ for (i = 0; i < MAX_THREADS + 1; i++) {
+ reg_flt->flt_ref[i] = -1;
+ reg_flt->fun_ref[i] = -1;
+ }
+ return reg_flt;
+}
+
+/* Release a registered filter */
+static inline void release_hlua_reg_filter(struct hlua_reg_filter *reg_flt)
+{
+ if (!reg_flt)
+ return;
+ if (reg_flt->name)
+ ha_free(&reg_flt->name);
+ LIST_DELETE(&reg_flt->l);
+ ha_free(&reg_flt);
+}
+
+/* If the common state is set, the stack id is 0, otherwise it is the tid + 1 */
+static inline int reg_flt_to_stack_id(struct hlua_reg_filter *reg_flt)
+{
+ if (reg_flt->fun_ref[0] == -1)
+ return tid + 1;
+ return 0;
+}
+
+/* Used to check an Lua function type in the stack. It creates and
+ * returns a reference of the function. This function throws an
+ * error if the rgument is not a "function".
+ */
+__LJMP unsigned int hlua_checkfunction(lua_State *L, int argno)
+{
+ if (!lua_isfunction(L, argno)) {
+ const char *msg = lua_pushfstring(L, "function expected, got %s", luaL_typename(L, argno));
+ WILL_LJMP(luaL_argerror(L, argno, msg));
+ }
+ lua_pushvalue(L, argno);
+ return luaL_ref(L, LUA_REGISTRYINDEX);
+}
+
+/* Used to check an Lua table type in the stack. It creates and
+ * returns a reference of the table. This function throws an
+ * error if the rgument is not a "table".
+ */
+__LJMP unsigned int hlua_checktable(lua_State *L, int argno)
+{
+ if (!lua_istable(L, argno)) {
+ const char *msg = lua_pushfstring(L, "table expected, got %s", luaL_typename(L, argno));
+ WILL_LJMP(luaL_argerror(L, argno, msg));
+ }
+ lua_pushvalue(L, argno);
+ return luaL_ref(L, LUA_REGISTRYINDEX);
+}
+
+__LJMP const char *hlua_traceback(lua_State *L, const char* sep)
+{
+ lua_Debug ar;
+ int level = 0;
+ struct buffer *msg = get_trash_chunk();
+
+ while (lua_getstack(L, level++, &ar)) {
+
+ /* Add separator */
+ if (b_data(msg))
+ chunk_appendf(msg, "%s", sep);
+
+ /* Fill fields:
+ * 'S': fills in the fields source, short_src, linedefined, lastlinedefined, and what;
+ * 'l': fills in the field currentline;
+ * 'n': fills in the field name and namewhat;
+ * 't': fills in the field istailcall;
+ */
+ lua_getinfo(L, "Slnt", &ar);
+
+ /* Append code localisation */
+ if (ar.currentline > 0)
+ chunk_appendf(msg, "%s:%d: ", ar.short_src, ar.currentline);
+ else
+ chunk_appendf(msg, "%s: ", ar.short_src);
+
+ /*
+ * Get function name
+ *
+ * if namewhat is no empty, name is defined.
+ * what contains "Lua" for Lua function, "C" for C function,
+ * or "main" for main code.
+ */
+ if (*ar.namewhat != '\0' && ar.name != NULL) /* is there a name from code? */
+ chunk_appendf(msg, "in %s '%s'", ar.namewhat, ar.name); /* use it */
+
+ else if (*ar.what == 'm') /* "main", the code is not executed in a function */
+ chunk_appendf(msg, "in main chunk");
+
+ else if (*ar.what != 'C') /* for Lua functions, use <file:line> */
+ chunk_appendf(msg, "in function line %d", ar.linedefined);
+
+ else /* nothing left... */
+ chunk_appendf(msg, "?");
+
+
+ /* Display tailed call */
+ if (ar.istailcall)
+ chunk_appendf(msg, " ...");
+ }
+
+ return msg->area;
+}
+
+
+/* This function check the number of arguments available in the
+ * stack. If the number of arguments available is not the same
+ * then <nb> an error is thrown.
+ */
+__LJMP static inline void check_args(lua_State *L, int nb, char *fcn)
+{
+ if (lua_gettop(L) == nb)
+ return;
+ WILL_LJMP(luaL_error(L, "'%s' needs %d arguments", fcn, nb));
+}
+
+/* This function pushes an error string prefixed by the file name
+ * and the line number where the error is encountered.
+ */
+static int hlua_pusherror(lua_State *L, const char *fmt, ...)
+{
+ va_list argp;
+ va_start(argp, fmt);
+ luaL_where(L, 1);
+ lua_pushvfstring(L, fmt, argp);
+ va_end(argp);
+ lua_concat(L, 2);
+ return 1;
+}
+
+/* This functions is used with sample fetch and converters. It
+ * converts the HAProxy configuration argument in a lua stack
+ * values.
+ *
+ * It takes an array of "arg", and each entry of the array is
+ * converted and pushed in the LUA stack.
+ */
+static int hlua_arg2lua(lua_State *L, const struct arg *arg)
+{
+ switch (arg->type) {
+ case ARGT_SINT:
+ case ARGT_TIME:
+ case ARGT_SIZE:
+ lua_pushinteger(L, arg->data.sint);
+ break;
+
+ case ARGT_STR:
+ lua_pushlstring(L, arg->data.str.area, arg->data.str.data);
+ break;
+
+ case ARGT_IPV4:
+ case ARGT_IPV6:
+ case ARGT_MSK4:
+ case ARGT_MSK6:
+ case ARGT_FE:
+ case ARGT_BE:
+ case ARGT_TAB:
+ case ARGT_SRV:
+ case ARGT_USR:
+ case ARGT_MAP:
+ default:
+ lua_pushnil(L);
+ break;
+ }
+ return 1;
+}
+
+/* This function take one entry in an LUA stack at the index "ud",
+ * and try to convert it in an HAProxy argument entry. This is useful
+ * with sample fetch wrappers. The input arguments are given to the
+ * lua wrapper and converted as arg list by the function.
+ */
+static int hlua_lua2arg(lua_State *L, int ud, struct arg *arg)
+{
+ switch (lua_type(L, ud)) {
+
+ case LUA_TNUMBER:
+ case LUA_TBOOLEAN:
+ arg->type = ARGT_SINT;
+ arg->data.sint = lua_tointeger(L, ud);
+ break;
+
+ case LUA_TSTRING:
+ arg->type = ARGT_STR;
+ arg->data.str.area = (char *)lua_tolstring(L, ud, &arg->data.str.data);
+ /* We don't know the actual size of the underlying allocation, so be conservative. */
+ arg->data.str.size = arg->data.str.data+1; /* count the terminating null byte */
+ arg->data.str.head = 0;
+ break;
+
+ case LUA_TUSERDATA:
+ case LUA_TNIL:
+ case LUA_TTABLE:
+ case LUA_TFUNCTION:
+ case LUA_TTHREAD:
+ case LUA_TLIGHTUSERDATA:
+ arg->type = ARGT_SINT;
+ arg->data.sint = 0;
+ break;
+ }
+ return 1;
+}
+
+/* the following functions are used to convert a struct sample
+ * in Lua type. This useful to convert the return of the
+ * fetches or converters.
+ */
+static int hlua_smp2lua(lua_State *L, struct sample *smp)
+{
+ switch (smp->data.type) {
+ case SMP_T_SINT:
+ case SMP_T_BOOL:
+ lua_pushinteger(L, smp->data.u.sint);
+ break;
+
+ case SMP_T_BIN:
+ case SMP_T_STR:
+ lua_pushlstring(L, smp->data.u.str.area, smp->data.u.str.data);
+ break;
+
+ case SMP_T_METH:
+ switch (smp->data.u.meth.meth) {
+ case HTTP_METH_OPTIONS: lua_pushstring(L, "OPTIONS"); break;
+ case HTTP_METH_GET: lua_pushstring(L, "GET"); break;
+ case HTTP_METH_HEAD: lua_pushstring(L, "HEAD"); break;
+ case HTTP_METH_POST: lua_pushstring(L, "POST"); break;
+ case HTTP_METH_PUT: lua_pushstring(L, "PUT"); break;
+ case HTTP_METH_DELETE: lua_pushstring(L, "DELETE"); break;
+ case HTTP_METH_TRACE: lua_pushstring(L, "TRACE"); break;
+ case HTTP_METH_CONNECT: lua_pushstring(L, "CONNECT"); break;
+ case HTTP_METH_OTHER:
+ lua_pushlstring(L, smp->data.u.meth.str.area, smp->data.u.meth.str.data);
+ break;
+ default:
+ lua_pushnil(L);
+ break;
+ }
+ break;
+
+ case SMP_T_IPV4:
+ case SMP_T_IPV6:
+ case SMP_T_ADDR: /* This type is never used to qualify a sample. */
+ if (sample_casts[smp->data.type][SMP_T_STR] &&
+ sample_casts[smp->data.type][SMP_T_STR](smp))
+ lua_pushlstring(L, smp->data.u.str.area, smp->data.u.str.data);
+ else
+ lua_pushnil(L);
+ break;
+ default:
+ lua_pushnil(L);
+ break;
+ }
+ return 1;
+}
+
+/* the following functions are used to convert a struct sample
+ * in Lua strings. This is useful to convert the return of the
+ * fetches or converters.
+ */
+static int hlua_smp2lua_str(lua_State *L, struct sample *smp)
+{
+ switch (smp->data.type) {
+
+ case SMP_T_BIN:
+ case SMP_T_STR:
+ lua_pushlstring(L, smp->data.u.str.area, smp->data.u.str.data);
+ break;
+
+ case SMP_T_METH:
+ switch (smp->data.u.meth.meth) {
+ case HTTP_METH_OPTIONS: lua_pushstring(L, "OPTIONS"); break;
+ case HTTP_METH_GET: lua_pushstring(L, "GET"); break;
+ case HTTP_METH_HEAD: lua_pushstring(L, "HEAD"); break;
+ case HTTP_METH_POST: lua_pushstring(L, "POST"); break;
+ case HTTP_METH_PUT: lua_pushstring(L, "PUT"); break;
+ case HTTP_METH_DELETE: lua_pushstring(L, "DELETE"); break;
+ case HTTP_METH_TRACE: lua_pushstring(L, "TRACE"); break;
+ case HTTP_METH_CONNECT: lua_pushstring(L, "CONNECT"); break;
+ case HTTP_METH_OTHER:
+ lua_pushlstring(L, smp->data.u.meth.str.area, smp->data.u.meth.str.data);
+ break;
+ default:
+ lua_pushstring(L, "");
+ break;
+ }
+ break;
+
+ case SMP_T_SINT:
+ case SMP_T_BOOL:
+ case SMP_T_IPV4:
+ case SMP_T_IPV6:
+ case SMP_T_ADDR: /* This type is never used to qualify a sample. */
+ if (sample_casts[smp->data.type][SMP_T_STR] &&
+ sample_casts[smp->data.type][SMP_T_STR](smp))
+ lua_pushlstring(L, smp->data.u.str.area, smp->data.u.str.data);
+ else
+ lua_pushstring(L, "");
+ break;
+ default:
+ lua_pushstring(L, "");
+ break;
+ }
+ return 1;
+}
+
+/* the following functions are used to convert an Lua type in a
+ * struct sample. This is useful to provide data from a converter
+ * to the LUA code.
+ */
+static int hlua_lua2smp(lua_State *L, int ud, struct sample *smp)
+{
+ switch (lua_type(L, ud)) {
+
+ case LUA_TNUMBER:
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = lua_tointeger(L, ud);
+ break;
+
+
+ case LUA_TBOOLEAN:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = lua_toboolean(L, ud);
+ break;
+
+ case LUA_TSTRING:
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.str.area = (char *)lua_tolstring(L, ud, &smp->data.u.str.data);
+ /* We don't know the actual size of the underlying allocation, so be conservative. */
+ smp->data.u.str.size = smp->data.u.str.data+1; /* count the terminating null byte */
+ smp->data.u.str.head = 0;
+ break;
+
+ case LUA_TUSERDATA:
+ case LUA_TNIL:
+ case LUA_TTABLE:
+ case LUA_TFUNCTION:
+ case LUA_TTHREAD:
+ case LUA_TLIGHTUSERDATA:
+ case LUA_TNONE:
+ default:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 0;
+ break;
+ }
+ return 1;
+}
+
+/* This function check the "argp" built by another conversion function
+ * is in accord with the expected argp defined by the "mask". The function
+ * returns true or false. It can be adjust the types if there compatibles.
+ *
+ * This function assumes that the argp argument contains ARGM_NBARGS + 1
+ * entries and that there is at least one stop at the last position.
+ */
+__LJMP int hlua_lua2arg_check(lua_State *L, int first, struct arg *argp,
+ uint64_t mask, struct proxy *p)
+{
+ int min_arg;
+ int idx;
+ struct proxy *px;
+ struct userlist *ul;
+ struct my_regex *reg;
+ const char *msg = NULL;
+ char *sname, *pname, *err = NULL;
+
+ idx = 0;
+ min_arg = ARGM(mask);
+ mask >>= ARGM_BITS;
+
+ while (1) {
+ struct buffer tmp = BUF_NULL;
+
+ /* Check for mandatory arguments. */
+ if (argp[idx].type == ARGT_STOP) {
+ if (idx < min_arg) {
+
+ /* If miss other argument than the first one, we return an error. */
+ if (idx > 0) {
+ msg = "Mandatory argument expected";
+ goto error;
+ }
+
+ /* If first argument have a certain type, some default values
+ * may be used. See the function smp_resolve_args().
+ */
+ switch (mask & ARGT_MASK) {
+
+ case ARGT_FE:
+ if (!(p->cap & PR_CAP_FE)) {
+ msg = "Mandatory argument expected";
+ goto error;
+ }
+ argp[idx].data.prx = p;
+ argp[idx].type = ARGT_FE;
+ argp[idx+1].type = ARGT_STOP;
+ break;
+
+ case ARGT_BE:
+ if (!(p->cap & PR_CAP_BE)) {
+ msg = "Mandatory argument expected";
+ goto error;
+ }
+ argp[idx].data.prx = p;
+ argp[idx].type = ARGT_BE;
+ argp[idx+1].type = ARGT_STOP;
+ break;
+
+ case ARGT_TAB:
+ if (!p->table) {
+ msg = "Mandatory argument expected";
+ goto error;
+ }
+ argp[idx].data.t = p->table;
+ argp[idx].type = ARGT_TAB;
+ argp[idx+1].type = ARGT_STOP;
+ break;
+
+ default:
+ msg = "Mandatory argument expected";
+ goto error;
+ break;
+ }
+ }
+ break;
+ }
+
+ /* Check for exceed the number of required argument. */
+ if ((mask & ARGT_MASK) == ARGT_STOP &&
+ argp[idx].type != ARGT_STOP) {
+ msg = "Last argument expected";
+ goto error;
+ }
+
+ if ((mask & ARGT_MASK) == ARGT_STOP &&
+ argp[idx].type == ARGT_STOP) {
+ break;
+ }
+
+ /* Convert some argument types. All string in argp[] are for not
+ * duplicated yet.
+ */
+ switch (mask & ARGT_MASK) {
+ case ARGT_SINT:
+ if (argp[idx].type != ARGT_SINT) {
+ msg = "integer expected";
+ goto error;
+ }
+ argp[idx].type = ARGT_SINT;
+ break;
+
+ case ARGT_TIME:
+ if (argp[idx].type != ARGT_SINT) {
+ msg = "integer expected";
+ goto error;
+ }
+ argp[idx].type = ARGT_TIME;
+ break;
+
+ case ARGT_SIZE:
+ if (argp[idx].type != ARGT_SINT) {
+ msg = "integer expected";
+ goto error;
+ }
+ argp[idx].type = ARGT_SIZE;
+ break;
+
+ case ARGT_FE:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ argp[idx].data.prx = proxy_fe_by_name(argp[idx].data.str.area);
+ if (!argp[idx].data.prx) {
+ msg = "frontend doesn't exist";
+ goto error;
+ }
+ argp[idx].type = ARGT_FE;
+ break;
+
+ case ARGT_BE:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ argp[idx].data.prx = proxy_be_by_name(argp[idx].data.str.area);
+ if (!argp[idx].data.prx) {
+ msg = "backend doesn't exist";
+ goto error;
+ }
+ argp[idx].type = ARGT_BE;
+ break;
+
+ case ARGT_TAB:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ argp[idx].data.t = stktable_find_by_name(argp[idx].data.str.area);
+ if (!argp[idx].data.t) {
+ msg = "table doesn't exist";
+ goto error;
+ }
+ argp[idx].type = ARGT_TAB;
+ break;
+
+ case ARGT_SRV:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ sname = strrchr(argp[idx].data.str.area, '/');
+ if (sname) {
+ *sname++ = '\0';
+ pname = argp[idx].data.str.area;
+ px = proxy_be_by_name(pname);
+ if (!px) {
+ msg = "backend doesn't exist";
+ goto error;
+ }
+ }
+ else {
+ sname = argp[idx].data.str.area;
+ px = p;
+ }
+ argp[idx].data.srv = findserver(px, sname);
+ if (!argp[idx].data.srv) {
+ msg = "server doesn't exist";
+ goto error;
+ }
+ argp[idx].type = ARGT_SRV;
+ break;
+
+ case ARGT_IPV4:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ if (inet_pton(AF_INET, argp[idx].data.str.area, &argp[idx].data.ipv4)) {
+ msg = "invalid IPv4 address";
+ goto error;
+ }
+ argp[idx].type = ARGT_IPV4;
+ break;
+
+ case ARGT_MSK4:
+ if (argp[idx].type == ARGT_SINT)
+ len2mask4(argp[idx].data.sint, &argp[idx].data.ipv4);
+ else if (argp[idx].type == ARGT_STR) {
+ if (!str2mask(argp[idx].data.str.area, &argp[idx].data.ipv4)) {
+ msg = "invalid IPv4 mask";
+ goto error;
+ }
+ }
+ else {
+ msg = "integer or string expected";
+ goto error;
+ }
+ argp[idx].type = ARGT_MSK4;
+ break;
+
+ case ARGT_IPV6:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ if (inet_pton(AF_INET6, argp[idx].data.str.area, &argp[idx].data.ipv6)) {
+ msg = "invalid IPv6 address";
+ goto error;
+ }
+ argp[idx].type = ARGT_IPV6;
+ break;
+
+ case ARGT_MSK6:
+ if (argp[idx].type == ARGT_SINT)
+ len2mask6(argp[idx].data.sint, &argp[idx].data.ipv6);
+ else if (argp[idx].type == ARGT_STR) {
+ if (!str2mask6(argp[idx].data.str.area, &argp[idx].data.ipv6)) {
+ msg = "invalid IPv6 mask";
+ goto error;
+ }
+ }
+ else {
+ msg = "integer or string expected";
+ goto error;
+ }
+ argp[idx].type = ARGT_MSK6;
+ break;
+
+ case ARGT_REG:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ reg = regex_comp(argp[idx].data.str.area, !(argp[idx].type_flags & ARGF_REG_ICASE), 1, &err);
+ if (!reg) {
+ msg = lua_pushfstring(L, "error compiling regex '%s' : '%s'",
+ argp[idx].data.str.area, err);
+ free(err);
+ goto error;
+ }
+ argp[idx].type = ARGT_REG;
+ argp[idx].data.reg = reg;
+ break;
+
+ case ARGT_USR:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ if (p->uri_auth && p->uri_auth->userlist &&
+ strcmp(p->uri_auth->userlist->name, argp[idx].data.str.area) == 0)
+ ul = p->uri_auth->userlist;
+ else
+ ul = auth_find_userlist(argp[idx].data.str.area);
+
+ if (!ul) {
+ msg = lua_pushfstring(L, "unable to find userlist '%s'", argp[idx].data.str.area);
+ goto error;
+ }
+ argp[idx].type = ARGT_USR;
+ argp[idx].data.usr = ul;
+ break;
+
+ case ARGT_STR:
+ if (!chunk_dup(&tmp, &argp[idx].data.str)) {
+ msg = "unable to duplicate string arg";
+ goto error;
+ }
+ argp[idx].data.str = tmp;
+ break;
+
+ case ARGT_MAP:
+ msg = "type not yet supported";
+ goto error;
+ break;
+
+ }
+
+ /* Check for type of argument. */
+ if ((mask & ARGT_MASK) != argp[idx].type) {
+ msg = lua_pushfstring(L, "'%s' expected, got '%s'",
+ arg_type_names[(mask & ARGT_MASK)],
+ arg_type_names[argp[idx].type & ARGT_MASK]);
+ goto error;
+ }
+
+ /* Next argument. */
+ mask >>= ARGT_BITS;
+ idx++;
+ }
+ return 0;
+
+ error:
+ argp[idx].type = ARGT_STOP;
+ free_args(argp);
+ WILL_LJMP(luaL_argerror(L, first + idx, msg));
+ return 0; /* Never reached */
+}
+
+/*
+ * The following functions are used to make correspondence between the the
+ * executed lua pointer and the "struct hlua *" that contain the context.
+ *
+ * - hlua_gethlua : return the hlua context associated with an lua_State.
+ * - hlua_sethlua : create the association between hlua context and lua_state.
+ */
+static inline struct hlua *hlua_gethlua(lua_State *L)
+{
+ struct hlua **hlua = lua_getextraspace(L);
+ return *hlua;
+}
+static inline void hlua_sethlua(struct hlua *hlua)
+{
+ struct hlua **hlua_store = lua_getextraspace(hlua->T);
+ *hlua_store = hlua;
+}
+
+/* This function is used to send logs. It try to send on screen (stderr)
+ * and on the default syslog server.
+ */
+static inline void hlua_sendlog(struct proxy *px, int level, const char *msg)
+{
+ struct tm tm;
+ char *p;
+
+ /* Cleanup the log message. */
+ p = trash.area;
+ for (; *msg != '\0'; msg++, p++) {
+ if (p >= trash.area + trash.size - 1) {
+ /* Break the message if exceed the buffer size. */
+ *(p-4) = ' ';
+ *(p-3) = '.';
+ *(p-2) = '.';
+ *(p-1) = '.';
+ break;
+ }
+ if (isprint((unsigned char)*msg))
+ *p = *msg;
+ else
+ *p = '.';
+ }
+ *p = '\0';
+
+ send_log(px, level, "%s\n", trash.area);
+ if (!(global.mode & MODE_QUIET) || (global.mode & (MODE_VERBOSE | MODE_STARTING))) {
+ if (level == LOG_DEBUG && !(global.mode & MODE_DEBUG))
+ return;
+
+ get_localtime(date.tv_sec, &tm);
+ fprintf(stderr, "[%s] %03d/%02d%02d%02d (%d) : %s\n",
+ log_levels[level], tm.tm_yday, tm.tm_hour, tm.tm_min, tm.tm_sec,
+ (int)getpid(), trash.area);
+ fflush(stderr);
+ }
+}
+
+/* This function just ensure that the yield will be always
+ * returned with a timeout and permit to set some flags
+ */
+__LJMP void hlua_yieldk(lua_State *L, int nresults, int ctx,
+ lua_KFunction k, int timeout, unsigned int flags)
+{
+ struct hlua *hlua;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua) {
+ return;
+ }
+
+ /* Set the wake timeout. If timeout is required, we set
+ * the expiration time.
+ */
+ hlua->wake_time = timeout;
+
+ hlua->flags |= flags;
+
+ /* Process the yield. */
+ MAY_LJMP(lua_yieldk(L, nresults, ctx, k));
+}
+
+/* This function initialises the Lua environment stored in the stream.
+ * It must be called at the start of the stream. This function creates
+ * an LUA coroutine. It can not be use to crete the main LUA context.
+ *
+ * This function is particular. it initialises a new Lua thread. If the
+ * initialisation fails (example: out of memory error), the lua function
+ * throws an error (longjmp).
+ *
+ * In some case (at least one), this function can be called from safe
+ * environment, so we must not initialise it. While the support of
+ * threads appear, the safe environment set a lock to ensure only one
+ * Lua execution at a time. If we initialize safe environment in another
+ * safe environment, we have a dead lock.
+ *
+ * set "already_safe" true if the context is initialized form safe
+ * Lua function.
+ *
+ * This function manipulates two Lua stacks: the main and the thread. Only
+ * the main stack can fail. The thread is not manipulated. This function
+ * MUST NOT manipulate the created thread stack state, because it is not
+ * protected against errors thrown by the thread stack.
+ */
+int hlua_ctx_init(struct hlua *lua, int state_id, struct task *task, int already_safe)
+{
+ lua->Mref = LUA_REFNIL;
+ lua->flags = 0;
+ lua->gc_count = 0;
+ lua->wake_time = TICK_ETERNITY;
+ lua->state_id = state_id;
+ LIST_INIT(&lua->com);
+ MT_LIST_INIT(&lua->hc_list);
+ if (!already_safe) {
+ if (!SET_SAFE_LJMP_PARENT(lua)) {
+ lua->Tref = LUA_REFNIL;
+ return 0;
+ }
+ }
+ lua->T = lua_newthread(hlua_states[state_id]);
+ if (!lua->T) {
+ lua->Tref = LUA_REFNIL;
+ if (!already_safe)
+ RESET_SAFE_LJMP_PARENT(lua);
+ return 0;
+ }
+ hlua_sethlua(lua);
+ lua->Tref = luaL_ref(hlua_states[state_id], LUA_REGISTRYINDEX);
+ lua->task = task;
+ if (!already_safe)
+ RESET_SAFE_LJMP_PARENT(lua);
+ return 1;
+}
+
+/* kill all associated httpclient to this hlua task
+ * We must take extra precautions as we're manipulating lua-exposed
+ * objects without the main lua lock.
+ */
+static void hlua_httpclient_destroy_all(struct hlua *hlua)
+{
+ struct hlua_httpclient *hlua_hc;
+
+ /* use thread-safe accessors for hc_list since GC cycle initiated by
+ * another thread sharing the same main lua stack (lua coroutine)
+ * could execute hlua_httpclient_gc() on the hlua->hc_list items
+ * in parallel: Lua GC applies on the main stack, it is not limited to
+ * a single coroutine stack, see Github issue #2037 for reference.
+ * Remember, coroutines created using lua_newthread() are not meant to
+ * be thread safe in Lua. (From lua co-author:
+ * http://lua-users.org/lists/lua-l/2011-07/msg00072.html)
+ *
+ * This security measure is superfluous when 'lua-load-per-thread' is used
+ * since in this case coroutines exclusively run on the same thread
+ * (main stack is not shared between OS threads).
+ */
+ while ((hlua_hc = MT_LIST_POP(&hlua->hc_list, typeof(hlua_hc), by_hlua))) {
+ httpclient_stop_and_destroy(hlua_hc->hc);
+ hlua_hc->hc = NULL;
+ }
+}
+
+
+/* Used to destroy the Lua coroutine when the attached stream or task
+ * is destroyed. The destroy also the memory context. The struct "lua"
+ * is not freed.
+ */
+void hlua_ctx_destroy(struct hlua *lua)
+{
+ if (!lua)
+ return;
+
+ if (!lua->T)
+ goto end;
+
+ /* clean all running httpclient */
+ hlua_httpclient_destroy_all(lua);
+
+ /* Purge all the pending signals. */
+ notification_purge(&lua->com);
+
+ if (!SET_SAFE_LJMP(lua))
+ return;
+ luaL_unref(lua->T, LUA_REGISTRYINDEX, lua->Mref);
+ RESET_SAFE_LJMP(lua);
+
+ if (!SET_SAFE_LJMP_PARENT(lua))
+ return;
+ luaL_unref(hlua_states[lua->state_id], LUA_REGISTRYINDEX, lua->Tref);
+ RESET_SAFE_LJMP_PARENT(lua);
+ /* Forces a garbage collecting process. If the Lua program is finished
+ * without error, we run the GC on the thread pointer. Its freed all
+ * the unused memory.
+ * If the thread is finnish with an error or is currently yielded,
+ * it seems that the GC applied on the thread doesn't clean anything,
+ * so e run the GC on the main thread.
+ * NOTE: maybe this action locks all the Lua threads untiml the en of
+ * the garbage collection.
+ */
+ if (lua->gc_count) {
+ if (!SET_SAFE_LJMP_PARENT(lua))
+ return;
+ lua_gc(hlua_states[lua->state_id], LUA_GCCOLLECT, 0);
+ RESET_SAFE_LJMP_PARENT(lua);
+ }
+
+ lua->T = NULL;
+
+end:
+ pool_free(pool_head_hlua, lua);
+}
+
+/* This function is used to restore the Lua context when a coroutine
+ * fails. This function copy the common memory between old coroutine
+ * and the new coroutine. The old coroutine is destroyed, and its
+ * replaced by the new coroutine.
+ * If the flag "keep_msg" is set, the last entry of the old is assumed
+ * as string error message and it is copied in the new stack.
+ */
+static int hlua_ctx_renew(struct hlua *lua, int keep_msg)
+{
+ lua_State *T;
+ int new_ref;
+
+ /* New Lua coroutine. */
+ T = lua_newthread(hlua_states[lua->state_id]);
+ if (!T)
+ return 0;
+
+ /* Copy last error message. */
+ if (keep_msg)
+ lua_xmove(lua->T, T, 1);
+
+ /* Copy data between the coroutines. */
+ lua_rawgeti(lua->T, LUA_REGISTRYINDEX, lua->Mref);
+ lua_xmove(lua->T, T, 1);
+ new_ref = luaL_ref(T, LUA_REGISTRYINDEX); /* Value popped. */
+
+ /* Destroy old data. */
+ luaL_unref(lua->T, LUA_REGISTRYINDEX, lua->Mref);
+
+ /* The thread is garbage collected by Lua. */
+ luaL_unref(hlua_states[lua->state_id], LUA_REGISTRYINDEX, lua->Tref);
+
+ /* Fill the struct with the new coroutine values. */
+ lua->Mref = new_ref;
+ lua->T = T;
+ lua->Tref = luaL_ref(hlua_states[lua->state_id], LUA_REGISTRYINDEX);
+
+ /* Set context. */
+ hlua_sethlua(lua);
+
+ return 1;
+}
+
+void hlua_hook(lua_State *L, lua_Debug *ar)
+{
+ struct hlua *hlua;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua)
+ return;
+
+ /* Lua cannot yield when its returning from a function,
+ * so, we can fix the interrupt hook to 1 instruction,
+ * expecting that the function is finished.
+ */
+ if (lua_gethookmask(L) & LUA_MASKRET) {
+ lua_sethook(hlua->T, hlua_hook, LUA_MASKCOUNT, 1);
+ return;
+ }
+
+ /* restore the interrupt condition. */
+ lua_sethook(hlua->T, hlua_hook, LUA_MASKCOUNT, hlua_nb_instruction);
+
+ /* If we interrupt the Lua processing in yieldable state, we yield.
+ * If the state is not yieldable, trying yield causes an error.
+ */
+ if (lua_isyieldable(L))
+ MAY_LJMP(hlua_yieldk(L, 0, 0, NULL, TICK_ETERNITY, HLUA_CTRLYIELD));
+
+ /* If we cannot yield, update the clock and check the timeout. */
+ clock_update_date(0, 1);
+ hlua->run_time += now_ms - hlua->start_time;
+ if (hlua->max_time && hlua->run_time >= hlua->max_time) {
+ lua_pushfstring(L, "execution timeout");
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* Update the start time. */
+ hlua->start_time = now_ms;
+
+ /* Try to interrupt the process at the end of the current
+ * unyieldable function.
+ */
+ lua_sethook(hlua->T, hlua_hook, LUA_MASKRET|LUA_MASKCOUNT, hlua_nb_instruction);
+}
+
+/* This function start or resumes the Lua stack execution. If the flag
+ * "yield_allowed" if no set and the LUA stack execution returns a yield
+ * The function return an error.
+ *
+ * The function can returns 4 values:
+ * - HLUA_E_OK : The execution is terminated without any errors.
+ * - HLUA_E_AGAIN : The execution must continue at the next associated
+ * task wakeup.
+ * - HLUA_E_ERRMSG : An error has occurred, an error message is set in
+ * the top of the stack.
+ * - HLUA_E_ERR : An error has occurred without error message.
+ *
+ * If an error occurred, the stack is renewed and it is ready to run new
+ * LUA code.
+ */
+static enum hlua_exec hlua_ctx_resume(struct hlua *lua, int yield_allowed)
+{
+#if defined(LUA_VERSION_NUM) && LUA_VERSION_NUM >= 504
+ int nres;
+#endif
+ int ret;
+ const char *msg;
+ const char *trace;
+
+ /* Initialise run time counter. */
+ if (!HLUA_IS_RUNNING(lua))
+ lua->run_time = 0;
+
+ /* Lock the whole Lua execution. This lock must be before the
+ * label "resume_execution".
+ */
+ if (lua->state_id == 0)
+ lua_take_global_lock();
+
+resume_execution:
+
+ /* This hook interrupts the Lua processing each 'hlua_nb_instruction'
+ * instructions. it is used for preventing infinite loops.
+ */
+ lua_sethook(lua->T, hlua_hook, LUA_MASKCOUNT, hlua_nb_instruction);
+
+ /* Remove all flags except the running flags. */
+ HLUA_SET_RUN(lua);
+ HLUA_CLR_CTRLYIELD(lua);
+ HLUA_CLR_WAKERESWR(lua);
+ HLUA_CLR_WAKEREQWR(lua);
+ HLUA_CLR_NOYIELD(lua);
+ if (!yield_allowed)
+ HLUA_SET_NOYIELD(lua);
+
+ /* Update the start time and reset wake_time. */
+ lua->start_time = now_ms;
+ lua->wake_time = TICK_ETERNITY;
+
+ /* Call the function. */
+#if defined(LUA_VERSION_NUM) && LUA_VERSION_NUM >= 504
+ ret = lua_resume(lua->T, hlua_states[lua->state_id], lua->nargs, &nres);
+#else
+ ret = lua_resume(lua->T, hlua_states[lua->state_id], lua->nargs);
+#endif
+ switch (ret) {
+
+ case LUA_OK:
+ ret = HLUA_E_OK;
+ break;
+
+ case LUA_YIELD:
+ /* Check if the execution timeout is expired. It it is the case, we
+ * break the Lua execution.
+ */
+ clock_update_date(0, 1);
+ lua->run_time += now_ms - lua->start_time;
+ if (lua->max_time && lua->run_time > lua->max_time) {
+ lua_settop(lua->T, 0); /* Empty the stack. */
+ ret = HLUA_E_ETMOUT;
+ break;
+ }
+ /* Process the forced yield. if the general yield is not allowed or
+ * if no task were associated this the current Lua execution
+ * coroutine, we resume the execution. Else we want to return in the
+ * scheduler and we want to be waked up again, to continue the
+ * current Lua execution. So we schedule our own task.
+ */
+ if (HLUA_IS_CTRLYIELDING(lua)) {
+ if (!yield_allowed || !lua->task)
+ goto resume_execution;
+ task_wakeup(lua->task, TASK_WOKEN_MSG);
+ }
+ if (!yield_allowed) {
+ lua_settop(lua->T, 0); /* Empty the stack. */
+ ret = HLUA_E_YIELD;
+ break;
+ }
+ ret = HLUA_E_AGAIN;
+ break;
+
+ case LUA_ERRRUN:
+
+ /* Special exit case. The traditional exit is returned as an error
+ * because the errors ares the only one mean to return immediately
+ * from and lua execution.
+ */
+ if (lua->flags & HLUA_EXIT) {
+ ret = HLUA_E_OK;
+ hlua_ctx_renew(lua, 1);
+ break;
+ }
+
+ lua->wake_time = TICK_ETERNITY;
+ if (!lua_checkstack(lua->T, 1)) {
+ ret = HLUA_E_ERR;
+ break;
+ }
+ msg = lua_tostring(lua->T, -1);
+ lua_settop(lua->T, 0); /* Empty the stack. */
+ lua_pop(lua->T, 1);
+ trace = hlua_traceback(lua->T, ", ");
+ if (msg)
+ lua_pushfstring(lua->T, "[state-id %d] runtime error: %s from %s", lua->state_id, msg, trace);
+ else
+ lua_pushfstring(lua->T, "[state-id %d] unknown runtime error from %s", lua->state_id, trace);
+ ret = HLUA_E_ERRMSG;
+ break;
+
+ case LUA_ERRMEM:
+ lua->wake_time = TICK_ETERNITY;
+ lua_settop(lua->T, 0); /* Empty the stack. */
+ ret = HLUA_E_NOMEM;
+ break;
+
+ case LUA_ERRERR:
+ lua->wake_time = TICK_ETERNITY;
+ if (!lua_checkstack(lua->T, 1)) {
+ ret = HLUA_E_ERR;
+ break;
+ }
+ msg = lua_tostring(lua->T, -1);
+ lua_settop(lua->T, 0); /* Empty the stack. */
+ lua_pop(lua->T, 1);
+ if (msg)
+ lua_pushfstring(lua->T, "[state-id %d] message handler error: %s", lua->state_id, msg);
+ else
+ lua_pushfstring(lua->T, "[state-id %d] message handler error", lua->state_id);
+ ret = HLUA_E_ERRMSG;
+ break;
+
+ default:
+ lua->wake_time = TICK_ETERNITY;
+ lua_settop(lua->T, 0); /* Empty the stack. */
+ ret = HLUA_E_ERR;
+ break;
+ }
+
+ switch (ret) {
+ case HLUA_E_AGAIN:
+ break;
+
+ case HLUA_E_ERRMSG:
+ notification_purge(&lua->com);
+ hlua_ctx_renew(lua, 1);
+ HLUA_CLR_RUN(lua);
+ break;
+
+ case HLUA_E_ETMOUT:
+ case HLUA_E_NOMEM:
+ case HLUA_E_YIELD:
+ case HLUA_E_ERR:
+ HLUA_CLR_RUN(lua);
+ notification_purge(&lua->com);
+ hlua_ctx_renew(lua, 0);
+ break;
+
+ case HLUA_E_OK:
+ HLUA_CLR_RUN(lua);
+ notification_purge(&lua->com);
+ break;
+ }
+
+ /* This is the main exit point, remove the Lua lock. */
+ if (lua->state_id == 0)
+ lua_drop_global_lock();
+
+ return ret;
+}
+
+/* This function exit the current code. */
+__LJMP static int hlua_done(lua_State *L)
+{
+ struct hlua *hlua;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua)
+ return 0;
+
+ hlua->flags |= HLUA_EXIT;
+ WILL_LJMP(lua_error(L));
+
+ return 0;
+}
+
+/* This function is an LUA binding. It provides a function
+ * for deleting ACL from a referenced ACL file.
+ */
+__LJMP static int hlua_del_acl(lua_State *L)
+{
+ const char *name;
+ const char *key;
+ struct pat_ref *ref;
+
+ MAY_LJMP(check_args(L, 2, "del_acl"));
+
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+ key = MAY_LJMP(luaL_checkstring(L, 2));
+
+ ref = pat_ref_lookup(name);
+ if (!ref)
+ WILL_LJMP(luaL_error(L, "'del_acl': unknown acl file '%s'", name));
+
+ HA_SPIN_LOCK(PATREF_LOCK, &ref->lock);
+ pat_ref_delete(ref, key);
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ref->lock);
+ return 0;
+}
+
+/* This function is an LUA binding. It provides a function
+ * for deleting map entry from a referenced map file.
+ */
+static int hlua_del_map(lua_State *L)
+{
+ const char *name;
+ const char *key;
+ struct pat_ref *ref;
+
+ MAY_LJMP(check_args(L, 2, "del_map"));
+
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+ key = MAY_LJMP(luaL_checkstring(L, 2));
+
+ ref = pat_ref_lookup(name);
+ if (!ref)
+ WILL_LJMP(luaL_error(L, "'del_map': unknown acl file '%s'", name));
+
+ HA_SPIN_LOCK(PATREF_LOCK, &ref->lock);
+ pat_ref_delete(ref, key);
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ref->lock);
+ return 0;
+}
+
+/* This function is an LUA binding. It provides a function
+ * for adding ACL pattern from a referenced ACL file.
+ */
+static int hlua_add_acl(lua_State *L)
+{
+ const char *name;
+ const char *key;
+ struct pat_ref *ref;
+
+ MAY_LJMP(check_args(L, 2, "add_acl"));
+
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+ key = MAY_LJMP(luaL_checkstring(L, 2));
+
+ ref = pat_ref_lookup(name);
+ if (!ref)
+ WILL_LJMP(luaL_error(L, "'add_acl': unknown acl file '%s'", name));
+
+ HA_SPIN_LOCK(PATREF_LOCK, &ref->lock);
+ if (pat_ref_find_elt(ref, key) == NULL)
+ pat_ref_add(ref, key, NULL, NULL);
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ref->lock);
+ return 0;
+}
+
+/* This function is an LUA binding. It provides a function
+ * for setting map pattern and sample from a referenced map
+ * file.
+ */
+static int hlua_set_map(lua_State *L)
+{
+ const char *name;
+ const char *key;
+ const char *value;
+ struct pat_ref *ref;
+
+ MAY_LJMP(check_args(L, 3, "set_map"));
+
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+ key = MAY_LJMP(luaL_checkstring(L, 2));
+ value = MAY_LJMP(luaL_checkstring(L, 3));
+
+ ref = pat_ref_lookup(name);
+ if (!ref)
+ WILL_LJMP(luaL_error(L, "'set_map': unknown map file '%s'", name));
+
+ HA_SPIN_LOCK(PATREF_LOCK, &ref->lock);
+ if (pat_ref_find_elt(ref, key) != NULL)
+ pat_ref_set(ref, key, value, NULL);
+ else
+ pat_ref_add(ref, key, value, NULL);
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ref->lock);
+ return 0;
+}
+
+/* A class is a lot of memory that contain data. This data can be a table,
+ * an integer or user data. This data is associated with a metatable. This
+ * metatable have an original version registered in the global context with
+ * the name of the object (_G[<name>] = <metable> ).
+ *
+ * A metable is a table that modify the standard behavior of a standard
+ * access to the associated data. The entries of this new metatable are
+ * defined as is:
+ *
+ * http://lua-users.org/wiki/MetatableEvents
+ *
+ * __index
+ *
+ * we access an absent field in a table, the result is nil. This is
+ * true, but it is not the whole truth. Actually, such access triggers
+ * the interpreter to look for an __index metamethod: If there is no
+ * such method, as usually happens, then the access results in nil;
+ * otherwise, the metamethod will provide the result.
+ *
+ * Control 'prototype' inheritance. When accessing "myTable[key]" and
+ * the key does not appear in the table, but the metatable has an __index
+ * property:
+ *
+ * - if the value is a function, the function is called, passing in the
+ * table and the key; the return value of that function is returned as
+ * the result.
+ *
+ * - if the value is another table, the value of the key in that table is
+ * asked for and returned (and if it doesn't exist in that table, but that
+ * table's metatable has an __index property, then it continues on up)
+ *
+ * - Use "rawget(myTable,key)" to skip this metamethod.
+ *
+ * http://www.lua.org/pil/13.4.1.html
+ *
+ * __newindex
+ *
+ * Like __index, but control property assignment.
+ *
+ * __mode - Control weak references. A string value with one or both
+ * of the characters 'k' and 'v' which specifies that the the
+ * keys and/or values in the table are weak references.
+ *
+ * __call - Treat a table like a function. When a table is followed by
+ * parenthesis such as "myTable( 'foo' )" and the metatable has
+ * a __call key pointing to a function, that function is invoked
+ * (passing any specified arguments) and the return value is
+ * returned.
+ *
+ * __metatable - Hide the metatable. When "getmetatable( myTable )" is
+ * called, if the metatable for myTable has a __metatable
+ * key, the value of that key is returned instead of the
+ * actual metatable.
+ *
+ * __tostring - Control string representation. When the builtin
+ * "tostring( myTable )" function is called, if the metatable
+ * for myTable has a __tostring property set to a function,
+ * that function is invoked (passing myTable to it) and the
+ * return value is used as the string representation.
+ *
+ * __len - Control table length. When the table length is requested using
+ * the length operator ( '#' ), if the metatable for myTable has
+ * a __len key pointing to a function, that function is invoked
+ * (passing myTable to it) and the return value used as the value
+ * of "#myTable".
+ *
+ * __gc - Userdata finalizer code. When userdata is set to be garbage
+ * collected, if the metatable has a __gc field pointing to a
+ * function, that function is first invoked, passing the userdata
+ * to it. The __gc metamethod is not called for tables.
+ * (See http://lua-users.org/lists/lua-l/2006-11/msg00508.html)
+ *
+ * Special metamethods for redefining standard operators:
+ * http://www.lua.org/pil/13.1.html
+ *
+ * __add "+"
+ * __sub "-"
+ * __mul "*"
+ * __div "/"
+ * __unm "!"
+ * __pow "^"
+ * __concat ".."
+ *
+ * Special methods for redefining standard relations
+ * http://www.lua.org/pil/13.2.html
+ *
+ * __eq "=="
+ * __lt "<"
+ * __le "<="
+ */
+
+/*
+ *
+ *
+ * Class Map
+ *
+ *
+ */
+
+/* Returns a struct hlua_map if the stack entry "ud" is
+ * a class session, otherwise it throws an error.
+ */
+__LJMP static struct map_descriptor *hlua_checkmap(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_map_ref));
+}
+
+/* This function is the map constructor. It don't need
+ * the class Map object. It creates and return a new Map
+ * object. It must be called only during "body" or "init"
+ * context because it process some filesystem accesses.
+ */
+__LJMP static int hlua_map_new(struct lua_State *L)
+{
+ const char *fn;
+ int match = PAT_MATCH_STR;
+ struct sample_conv conv;
+ const char *file = "";
+ int line = 0;
+ lua_Debug ar;
+ char *err = NULL;
+ struct arg args[2];
+
+ if (lua_gettop(L) < 1 || lua_gettop(L) > 2)
+ WILL_LJMP(luaL_error(L, "'new' needs at least 1 argument."));
+
+ fn = MAY_LJMP(luaL_checkstring(L, 1));
+
+ if (lua_gettop(L) >= 2) {
+ match = MAY_LJMP(luaL_checkinteger(L, 2));
+ if (match < 0 || match >= PAT_MATCH_NUM)
+ WILL_LJMP(luaL_error(L, "'new' needs a valid match method."));
+ }
+
+ /* Get Lua filename and line number. */
+ if (lua_getstack(L, 1, &ar)) { /* check function at level */
+ lua_getinfo(L, "Sl", &ar); /* get info about it */
+ if (ar.currentline > 0) { /* is there info? */
+ file = ar.short_src;
+ line = ar.currentline;
+ }
+ }
+
+ /* fill fake sample_conv struct. */
+ conv.kw = ""; /* unused. */
+ conv.process = NULL; /* unused. */
+ conv.arg_mask = 0; /* unused. */
+ conv.val_args = NULL; /* unused. */
+ conv.out_type = SMP_T_STR;
+ conv.private = (void *)(long)match;
+ switch (match) {
+ case PAT_MATCH_STR: conv.in_type = SMP_T_STR; break;
+ case PAT_MATCH_BEG: conv.in_type = SMP_T_STR; break;
+ case PAT_MATCH_SUB: conv.in_type = SMP_T_STR; break;
+ case PAT_MATCH_DIR: conv.in_type = SMP_T_STR; break;
+ case PAT_MATCH_DOM: conv.in_type = SMP_T_STR; break;
+ case PAT_MATCH_END: conv.in_type = SMP_T_STR; break;
+ case PAT_MATCH_REG: conv.in_type = SMP_T_STR; break;
+ case PAT_MATCH_INT: conv.in_type = SMP_T_SINT; break;
+ case PAT_MATCH_IP: conv.in_type = SMP_T_ADDR; break;
+ default:
+ WILL_LJMP(luaL_error(L, "'new' doesn't support this match mode."));
+ }
+
+ /* fill fake args. */
+ args[0].type = ARGT_STR;
+ args[0].data.str.area = strdup(fn);
+ args[0].data.str.data = strlen(fn);
+ args[0].data.str.size = args[0].data.str.data+1;
+ args[1].type = ARGT_STOP;
+
+ /* load the map. */
+ if (!sample_load_map(args, &conv, file, line, &err)) {
+ /* error case: we can't use luaL_error because we must
+ * free the err variable.
+ */
+ luaL_where(L, 1);
+ lua_pushfstring(L, "'new': %s.", err);
+ lua_concat(L, 2);
+ free(err);
+ chunk_destroy(&args[0].data.str);
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* create the lua object. */
+ lua_newtable(L);
+ lua_pushlightuserdata(L, args[0].data.map);
+ lua_rawseti(L, -2, 0);
+
+ /* Pop a class Map metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_map_ref);
+ lua_setmetatable(L, -2);
+
+
+ return 1;
+}
+
+__LJMP static inline int _hlua_map_lookup(struct lua_State *L, int str)
+{
+ struct map_descriptor *desc;
+ struct pattern *pat;
+ struct sample smp;
+
+ MAY_LJMP(check_args(L, 2, "lookup"));
+ desc = MAY_LJMP(hlua_checkmap(L, 1));
+ if (desc->pat.expect_type == SMP_T_SINT) {
+ smp.data.type = SMP_T_SINT;
+ smp.data.u.sint = MAY_LJMP(luaL_checkinteger(L, 2));
+ }
+ else {
+ smp.data.type = SMP_T_STR;
+ smp.flags = SMP_F_CONST;
+ smp.data.u.str.area = (char *)MAY_LJMP(luaL_checklstring(L, 2, (size_t *)&smp.data.u.str.data));
+ smp.data.u.str.size = smp.data.u.str.data + 1;
+ }
+
+ pat = pattern_exec_match(&desc->pat, &smp, 1);
+ if (!pat || !pat->data) {
+ if (str)
+ lua_pushstring(L, "");
+ else
+ lua_pushnil(L);
+ return 1;
+ }
+
+ /* The Lua pattern must return a string, so we can't check the returned type */
+ lua_pushlstring(L, pat->data->u.str.area, pat->data->u.str.data);
+ return 1;
+}
+
+__LJMP static int hlua_map_lookup(struct lua_State *L)
+{
+ return _hlua_map_lookup(L, 0);
+}
+
+__LJMP static int hlua_map_slookup(struct lua_State *L)
+{
+ return _hlua_map_lookup(L, 1);
+}
+
+/*
+ *
+ *
+ * Class Socket
+ *
+ *
+ */
+
+__LJMP static struct hlua_socket *hlua_checksocket(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_socket_ref));
+}
+
+/* This function is the handler called for each I/O on the established
+ * connection. It is used for notify space available to send or data
+ * received.
+ */
+static void hlua_socket_handler(struct appctx *appctx)
+{
+ struct hlua_csk_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+
+ if (ctx->die) {
+ sc_shutw(sc);
+ sc_shutr(sc);
+ sc_ic(sc)->flags |= CF_READ_NULL;
+ notification_wake(&ctx->wake_on_read);
+ notification_wake(&ctx->wake_on_write);
+ stream_shutdown(__sc_strm(sc), SF_ERR_KILLED);
+ }
+
+ /* If we can't write, wakeup the pending write signals. */
+ if (channel_output_closed(sc_ic(sc)))
+ notification_wake(&ctx->wake_on_write);
+
+ /* If we can't read, wakeup the pending read signals. */
+ if (channel_input_closed(sc_oc(sc)))
+ notification_wake(&ctx->wake_on_read);
+
+ /* if the connection is not established, inform the stream that we want
+ * to be notified whenever the connection completes.
+ */
+ if (sc_opposite(sc)->state < SC_ST_EST) {
+ applet_need_more_data(appctx);
+ se_need_remote_conn(appctx->sedesc);
+ applet_have_more_data(appctx);
+ return;
+ }
+
+ /* This function is called after the connect. */
+ ctx->connected = 1;
+
+ /* Wake the tasks which wants to write if the buffer have available space. */
+ if (channel_may_recv(sc_ic(sc)))
+ notification_wake(&ctx->wake_on_write);
+
+ /* Wake the tasks which wants to read if the buffer contains data. */
+ if (!channel_is_empty(sc_oc(sc)))
+ notification_wake(&ctx->wake_on_read);
+
+ /* Some data were injected in the buffer, notify the stream
+ * interface.
+ */
+ if (!channel_is_empty(sc_ic(sc)))
+ sc_update(sc);
+
+ /* If write notifications are registered, we considers we want
+ * to write, so we clear the blocking flag.
+ */
+ if (notification_registered(&ctx->wake_on_write))
+ applet_have_more_data(appctx);
+}
+
+static int hlua_socket_init(struct appctx *appctx)
+{
+ struct hlua_csk_ctx *ctx = appctx->svcctx;
+ struct stream *s;
+
+ if (appctx_finalize_startup(appctx, socket_proxy, &BUF_NULL) == -1)
+ goto error;
+
+ s = appctx_strm(appctx);
+
+ /* Configure "right" stream connector. This stconn is used to connect
+ * and retrieve data from the server. The connection is initialized
+ * with the "struct server".
+ */
+ sc_set_state(s->scb, SC_ST_ASS);
+
+ /* Force destination server. */
+ s->flags |= SF_DIRECT | SF_ASSIGNED | SF_BE_ASSIGNED;
+ s->target = &socket_tcp->obj_type;
+
+ ctx->appctx = appctx;
+ return 0;
+
+ error:
+ return -1;
+}
+
+/* This function is called when the "struct stream" is destroyed.
+ * Remove the link from the object to this stream.
+ * Wake all the pending signals.
+ */
+static void hlua_socket_release(struct appctx *appctx)
+{
+ struct hlua_csk_ctx *ctx = appctx->svcctx;
+ struct xref *peer;
+
+ /* Remove my link in the original objects. */
+ peer = xref_get_peer_and_lock(&ctx->xref);
+ if (peer)
+ xref_disconnect(&ctx->xref, peer);
+
+ /* Wake all the task waiting for me. */
+ notification_wake(&ctx->wake_on_read);
+ notification_wake(&ctx->wake_on_write);
+}
+
+/* If the garbage collectio of the object is launch, nobody
+ * uses this object. If the stream does not exists, just quit.
+ * Send the shutdown signal to the stream. In some cases,
+ * pending signal can rest in the read and write lists. destroy
+ * it.
+ */
+__LJMP static int hlua_socket_gc(lua_State *L)
+{
+ struct hlua_socket *socket;
+ struct hlua_csk_ctx *ctx;
+ struct xref *peer;
+
+ MAY_LJMP(check_args(L, 1, "__gc"));
+
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer)
+ return 0;
+
+ ctx = container_of(peer, struct hlua_csk_ctx, xref);
+
+ /* Set the flag which destroy the session. */
+ ctx->die = 1;
+ appctx_wakeup(ctx->appctx);
+
+ /* Remove all reference between the Lua stack and the coroutine stream. */
+ xref_disconnect(&socket->xref, peer);
+ return 0;
+}
+
+/* The close function send shutdown signal and break the
+ * links between the stream and the object.
+ */
+__LJMP static int hlua_socket_close_helper(lua_State *L)
+{
+ struct hlua_socket *socket;
+ struct hlua_csk_ctx *ctx;
+ struct xref *peer;
+ struct hlua *hlua;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua)
+ return 0;
+
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer)
+ return 0;
+
+ hlua->gc_count--;
+ ctx = container_of(peer, struct hlua_csk_ctx, xref);
+
+ /* Set the flag which destroy the session. */
+ ctx->die = 1;
+ appctx_wakeup(ctx->appctx);
+
+ /* Remove all reference between the Lua stack and the coroutine stream. */
+ xref_disconnect(&socket->xref, peer);
+ return 0;
+}
+
+/* The close function calls close_helper.
+ */
+__LJMP static int hlua_socket_close(lua_State *L)
+{
+ MAY_LJMP(check_args(L, 1, "close"));
+ return hlua_socket_close_helper(L);
+}
+
+/* This Lua function assumes that the stack contain three parameters.
+ * 1 - USERDATA containing a struct socket
+ * 2 - INTEGER with values of the macro defined below
+ * If the integer is -1, we must read at most one line.
+ * If the integer is -2, we ust read all the data until the
+ * end of the stream.
+ * If the integer is positive value, we must read a number of
+ * bytes corresponding to this value.
+ */
+#define HLSR_READ_LINE (-1)
+#define HLSR_READ_ALL (-2)
+__LJMP static int hlua_socket_receive_yield(struct lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_socket *socket = MAY_LJMP(hlua_checksocket(L, 1));
+ int wanted = lua_tointeger(L, 2);
+ struct hlua *hlua;
+ struct hlua_csk_ctx *csk_ctx;
+ struct appctx *appctx;
+ size_t len;
+ int nblk;
+ const char *blk1;
+ size_t len1;
+ const char *blk2;
+ size_t len2;
+ int skip_at_end = 0;
+ struct channel *oc;
+ struct stream *s;
+ struct xref *peer;
+ int missing_bytes;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+
+ /* Check if this lua stack is schedulable. */
+ if (!hlua || !hlua->task)
+ WILL_LJMP(luaL_error(L, "The 'receive' function is only allowed in "
+ "'frontend', 'backend' or 'task'"));
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ /* check for connection break. If some data where read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer)
+ goto no_peer;
+
+ csk_ctx = container_of(peer, struct hlua_csk_ctx, xref);
+ appctx = csk_ctx->appctx;
+ s = appctx_strm(appctx);
+
+ oc = &s->res;
+ if (wanted == HLSR_READ_LINE) {
+ /* Read line. */
+ nblk = co_getline_nc(oc, &blk1, &len1, &blk2, &len2);
+ if (nblk < 0) /* Connection close. */
+ goto connection_closed;
+ if (nblk == 0) /* No data available. */
+ goto connection_empty;
+
+ /* remove final \r\n. */
+ if (nblk == 1) {
+ if (blk1[len1-1] == '\n') {
+ len1--;
+ skip_at_end++;
+ if (blk1[len1-1] == '\r') {
+ len1--;
+ skip_at_end++;
+ }
+ }
+ }
+ else {
+ if (blk2[len2-1] == '\n') {
+ len2--;
+ skip_at_end++;
+ if (blk2[len2-1] == '\r') {
+ len2--;
+ skip_at_end++;
+ }
+ }
+ }
+ }
+
+ else if (wanted == HLSR_READ_ALL) {
+ /* Read all the available data. */
+ nblk = co_getblk_nc(oc, &blk1, &len1, &blk2, &len2);
+ if (nblk < 0) /* Connection close. */
+ goto connection_closed;
+ if (nblk == 0) /* No data available. */
+ goto connection_empty;
+ }
+
+ else {
+ /* Read a block of data. */
+ nblk = co_getblk_nc(oc, &blk1, &len1, &blk2, &len2);
+ if (nblk < 0) /* Connection close. */
+ goto connection_closed;
+ if (nblk == 0) /* No data available. */
+ goto connection_empty;
+
+ missing_bytes = wanted - socket->b.n;
+ if (len1 > missing_bytes) {
+ nblk = 1;
+ len1 = missing_bytes;
+ } if (nblk == 2 && len1 + len2 > missing_bytes)
+ len2 = missing_bytes - len1;
+ }
+
+ len = len1;
+
+ luaL_addlstring(&socket->b, blk1, len1);
+ if (nblk == 2) {
+ len += len2;
+ luaL_addlstring(&socket->b, blk2, len2);
+ }
+
+ /* Consume data. */
+ co_skip(oc, len + skip_at_end);
+
+ /* Don't wait anything. */
+ appctx_wakeup(appctx);
+
+ /* If the pattern reclaim to read all the data
+ * in the connection, got out.
+ */
+ if (wanted == HLSR_READ_ALL)
+ goto connection_empty;
+ else if (wanted >= 0 && socket->b.n < wanted)
+ goto connection_empty;
+
+ /* Return result. */
+ luaL_pushresult(&socket->b);
+ xref_unlock(&socket->xref, peer);
+ return 1;
+
+connection_closed:
+
+ xref_unlock(&socket->xref, peer);
+
+no_peer:
+
+ /* If the buffer containds data. */
+ if (socket->b.n > 0) {
+ luaL_pushresult(&socket->b);
+ return 1;
+ }
+ lua_pushnil(L);
+ lua_pushstring(L, "connection closed.");
+ return 2;
+
+connection_empty:
+
+ if (!notification_new(&hlua->com, &csk_ctx->wake_on_read, hlua->task)) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "out of memory"));
+ }
+ xref_unlock(&socket->xref, peer);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_socket_receive_yield, TICK_ETERNITY, 0));
+ return 0;
+}
+
+/* This Lua function gets two parameters. The first one can be string
+ * or a number. If the string is "*l", the user requires one line. If
+ * the string is "*a", the user requires all the contents of the stream.
+ * If the value is a number, the user require a number of bytes equal
+ * to the value. The default value is "*l" (a line).
+ *
+ * This parameter with a variable type is converted in integer. This
+ * integer takes this values:
+ * -1 : read a line
+ * -2 : read all the stream
+ * >0 : amount of bytes.
+ *
+ * The second parameter is optional. It contains a string that must be
+ * concatenated with the read data.
+ */
+__LJMP static int hlua_socket_receive(struct lua_State *L)
+{
+ int wanted = HLSR_READ_LINE;
+ const char *pattern;
+ int lastarg, type;
+ char *error;
+ size_t len;
+ struct hlua_socket *socket;
+
+ if (lua_gettop(L) < 1 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "The 'receive' function requires between 1 and 3 arguments."));
+
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ /* check for pattern. */
+ if (lua_gettop(L) >= 2) {
+ type = lua_type(L, 2);
+ if (type == LUA_TSTRING) {
+ pattern = lua_tostring(L, 2);
+ if (strcmp(pattern, "*a") == 0)
+ wanted = HLSR_READ_ALL;
+ else if (strcmp(pattern, "*l") == 0)
+ wanted = HLSR_READ_LINE;
+ else {
+ wanted = strtoll(pattern, &error, 10);
+ if (*error != '\0')
+ WILL_LJMP(luaL_error(L, "Unsupported pattern."));
+ }
+ }
+ else if (type == LUA_TNUMBER) {
+ wanted = lua_tointeger(L, 2);
+ if (wanted < 0)
+ WILL_LJMP(luaL_error(L, "Unsupported size."));
+ }
+ }
+
+ /* Set pattern. */
+ lua_pushinteger(L, wanted);
+
+ /* Check if we would replace the top by itself. */
+ if (lua_gettop(L) != 2)
+ lua_replace(L, 2);
+
+ /* Save index of the top of the stack because since buffers are used, it
+ * may change
+ */
+ lastarg = lua_gettop(L);
+
+ /* init buffer, and fill it with prefix. */
+ luaL_buffinit(L, &socket->b);
+
+ /* Check prefix. */
+ if (lastarg >= 3) {
+ if (lua_type(L, 3) != LUA_TSTRING)
+ WILL_LJMP(luaL_error(L, "Expect a 'string' for the prefix"));
+ pattern = lua_tolstring(L, 3, &len);
+ luaL_addlstring(&socket->b, pattern, len);
+ }
+
+ return __LJMP(hlua_socket_receive_yield(L, 0, 0));
+}
+
+/* Write the Lua input string in the output buffer.
+ * This function returns a yield if no space is available.
+ */
+static int hlua_socket_write_yield(struct lua_State *L,int status, lua_KContext ctx)
+{
+ struct hlua_socket *socket;
+ struct hlua *hlua;
+ struct hlua_csk_ctx *csk_ctx;
+ struct appctx *appctx;
+ size_t buf_len;
+ const char *buf;
+ int len;
+ int send_len;
+ int sent;
+ struct xref *peer;
+ struct stream *s;
+ struct stconn *sc;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+
+ /* Check if this lua stack is schedulable. */
+ if (!hlua || !hlua->task)
+ WILL_LJMP(luaL_error(L, "The 'write' function is only allowed in "
+ "'frontend', 'backend' or 'task'"));
+
+ /* Get object */
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+ buf = MAY_LJMP(luaL_checklstring(L, 2, &buf_len));
+ sent = MAY_LJMP(luaL_checkinteger(L, 3));
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ /* check for connection break. If some data where read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer) {
+ lua_pushinteger(L, -1);
+ return 1;
+ }
+
+ csk_ctx = container_of(peer, struct hlua_csk_ctx, xref);
+ appctx = csk_ctx->appctx;
+ sc = appctx_sc(appctx);
+ s = __sc_strm(sc);
+
+ /* Check for connection close. */
+ if (channel_output_closed(&s->req)) {
+ xref_unlock(&socket->xref, peer);
+ lua_pushinteger(L, -1);
+ return 1;
+ }
+
+ /* Update the input buffer data. */
+ buf += sent;
+ send_len = buf_len - sent;
+
+ /* All the data are sent. */
+ if (sent >= buf_len) {
+ xref_unlock(&socket->xref, peer);
+ return 1; /* Implicitly return the length sent. */
+ }
+
+ /* Check if the buffer is available because HAProxy doesn't allocate
+ * the request buffer if its not required.
+ */
+ if (s->req.buf.size == 0) {
+ if (!sc_alloc_ibuf(sc, &appctx->buffer_wait))
+ goto hlua_socket_write_yield_return;
+ }
+
+ /* Check for available space. */
+ len = b_room(&s->req.buf);
+ if (len <= 0) {
+ goto hlua_socket_write_yield_return;
+ }
+
+ /* send data */
+ if (len < send_len)
+ send_len = len;
+ len = ci_putblk(&s->req, buf, send_len);
+
+ /* "Not enough space" (-1), "Buffer too little to contain
+ * the data" (-2) are not expected because the available length
+ * is tested.
+ * Other unknown error are also not expected.
+ */
+ if (len <= 0) {
+ if (len == -1)
+ s->req.flags |= CF_WAKE_WRITE;
+
+ MAY_LJMP(hlua_socket_close_helper(L));
+ lua_pop(L, 1);
+ lua_pushinteger(L, -1);
+ xref_unlock(&socket->xref, peer);
+ return 1;
+ }
+
+ /* update buffers. */
+ appctx_wakeup(appctx);
+
+ s->req.rex = TICK_ETERNITY;
+ s->res.wex = TICK_ETERNITY;
+
+ /* Update length sent. */
+ lua_pop(L, 1);
+ lua_pushinteger(L, sent + len);
+
+ /* All the data buffer is sent ? */
+ if (sent + len >= buf_len) {
+ xref_unlock(&socket->xref, peer);
+ return 1;
+ }
+
+hlua_socket_write_yield_return:
+ if (!notification_new(&hlua->com, &csk_ctx->wake_on_write, hlua->task)) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "out of memory"));
+ }
+ xref_unlock(&socket->xref, peer);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_socket_write_yield, TICK_ETERNITY, 0));
+ return 0;
+}
+
+/* This function initiate the send of data. It just check the input
+ * parameters and push an integer in the Lua stack that contain the
+ * amount of data written to the buffer. This is used by the function
+ * "hlua_socket_write_yield" that can yield.
+ *
+ * The Lua function gets between 3 and 4 parameters. The first one is
+ * the associated object. The second is a string buffer. The third is
+ * a facultative integer that represents where is the buffer position
+ * of the start of the data that can send. The first byte is the
+ * position "1". The default value is "1". The fourth argument is a
+ * facultative integer that represents where is the buffer position
+ * of the end of the data that can send. The default is the last byte.
+ */
+static int hlua_socket_send(struct lua_State *L)
+{
+ int i;
+ int j;
+ const char *buf;
+ size_t buf_len;
+
+ /* Check number of arguments. */
+ if (lua_gettop(L) < 2 || lua_gettop(L) > 4)
+ WILL_LJMP(luaL_error(L, "'send' needs between 2 and 4 arguments"));
+
+ /* Get the string. */
+ buf = MAY_LJMP(luaL_checklstring(L, 2, &buf_len));
+
+ /* Get and check j. */
+ if (lua_gettop(L) == 4) {
+ j = MAY_LJMP(luaL_checkinteger(L, 4));
+ if (j < 0)
+ j = buf_len + j + 1;
+ if (j > buf_len)
+ j = buf_len + 1;
+ lua_pop(L, 1);
+ }
+ else
+ j = buf_len;
+
+ /* Get and check i. */
+ if (lua_gettop(L) == 3) {
+ i = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (i < 0)
+ i = buf_len + i + 1;
+ if (i > buf_len)
+ i = buf_len + 1;
+ lua_pop(L, 1);
+ } else
+ i = 1;
+
+ /* Check bth i and j. */
+ if (i > j) {
+ lua_pushinteger(L, 0);
+ return 1;
+ }
+ if (i == 0 && j == 0) {
+ lua_pushinteger(L, 0);
+ return 1;
+ }
+ if (i == 0)
+ i = 1;
+ if (j == 0)
+ j = 1;
+
+ /* Pop the string. */
+ lua_pop(L, 1);
+
+ /* Update the buffer length. */
+ buf += i - 1;
+ buf_len = j - i + 1;
+ lua_pushlstring(L, buf, buf_len);
+
+ /* This unsigned is used to remember the amount of sent data. */
+ lua_pushinteger(L, 0);
+
+ return MAY_LJMP(hlua_socket_write_yield(L, 0, 0));
+}
+
+#define SOCKET_INFO_MAX_LEN sizeof("[0000:0000:0000:0000:0000:0000:0000:0000]:12345")
+__LJMP static inline int hlua_socket_info(struct lua_State *L, const struct sockaddr_storage *addr)
+{
+ static char buffer[SOCKET_INFO_MAX_LEN];
+ int ret;
+ int len;
+ char *p;
+
+ ret = addr_to_str(addr, buffer+1, SOCKET_INFO_MAX_LEN-1);
+ if (ret <= 0) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ if (ret == AF_UNIX) {
+ lua_pushstring(L, buffer+1);
+ return 1;
+ }
+ else if (ret == AF_INET6) {
+ buffer[0] = '[';
+ len = strlen(buffer);
+ buffer[len] = ']';
+ len++;
+ buffer[len] = ':';
+ len++;
+ p = buffer;
+ }
+ else if (ret == AF_INET) {
+ p = buffer + 1;
+ len = strlen(p);
+ p[len] = ':';
+ len++;
+ }
+ else {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ if (port_to_str(addr, p + len, SOCKET_INFO_MAX_LEN-1 - len) <= 0) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_pushstring(L, p);
+ return 1;
+}
+
+/* Returns information about the peer of the connection. */
+__LJMP static int hlua_socket_getpeername(struct lua_State *L)
+{
+ struct hlua_socket *socket;
+ struct xref *peer;
+ struct appctx *appctx;
+ struct stconn *sc;
+ const struct sockaddr_storage *dst;
+ int ret;
+
+ MAY_LJMP(check_args(L, 1, "getpeername"));
+
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ /* check for connection break. If some data where read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ appctx = container_of(peer, struct hlua_csk_ctx, xref)->appctx;
+ sc = appctx_sc(appctx);
+ dst = sc_dst(sc_opposite(sc));
+ if (!dst) {
+ xref_unlock(&socket->xref, peer);
+ lua_pushnil(L);
+ return 1;
+ }
+
+ ret = MAY_LJMP(hlua_socket_info(L, dst));
+ xref_unlock(&socket->xref, peer);
+ return ret;
+}
+
+/* Returns information about my connection side. */
+static int hlua_socket_getsockname(struct lua_State *L)
+{
+ struct hlua_socket *socket;
+ struct connection *conn;
+ struct appctx *appctx;
+ struct xref *peer;
+ struct stream *s;
+ int ret;
+
+ MAY_LJMP(check_args(L, 1, "getsockname"));
+
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ /* check for connection break. If some data where read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ appctx = container_of(peer, struct hlua_csk_ctx, xref)->appctx;
+ s = appctx_strm(appctx);
+
+ conn = sc_conn(s->scb);
+ if (!conn || !conn_get_src(conn)) {
+ xref_unlock(&socket->xref, peer);
+ lua_pushnil(L);
+ return 1;
+ }
+
+ ret = hlua_socket_info(L, conn->src);
+ xref_unlock(&socket->xref, peer);
+ return ret;
+}
+
+/* This struct define the applet. */
+static struct applet update_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<LUA_TCP>",
+ .fct = hlua_socket_handler,
+ .init = hlua_socket_init,
+ .release = hlua_socket_release,
+};
+
+__LJMP static int hlua_socket_connect_yield(struct lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_socket *socket = MAY_LJMP(hlua_checksocket(L, 1));
+ struct hlua *hlua;
+ struct xref *peer;
+ struct hlua_csk_ctx *csk_ctx;
+ struct appctx *appctx;
+ struct stream *s;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua)
+ return 0;
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ /* check for connection break. If some data where read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer) {
+ lua_pushnil(L);
+ lua_pushstring(L, "Can't connect");
+ return 2;
+ }
+
+ csk_ctx = container_of(peer, struct hlua_csk_ctx, xref);
+ appctx = csk_ctx->appctx;
+ s = appctx_strm(appctx);
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+ }
+
+ /* Check for connection close. */
+ if (!hlua || channel_output_closed(&s->req)) {
+ xref_unlock(&socket->xref, peer);
+ lua_pushnil(L);
+ lua_pushstring(L, "Can't connect");
+ return 2;
+ }
+
+ appctx = __sc_appctx(s->scf);
+
+ /* Check for connection established. */
+ if (csk_ctx->connected) {
+ xref_unlock(&socket->xref, peer);
+ lua_pushinteger(L, 1);
+ return 1;
+ }
+
+ if (!notification_new(&hlua->com, &csk_ctx->wake_on_write, hlua->task)) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "out of memory error"));
+ }
+ xref_unlock(&socket->xref, peer);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_socket_connect_yield, TICK_ETERNITY, 0));
+ return 0;
+}
+
+/* This function fail or initite the connection. */
+__LJMP static int hlua_socket_connect(struct lua_State *L)
+{
+ struct hlua_socket *socket;
+ int port = -1;
+ const char *ip;
+ struct hlua *hlua;
+ struct hlua_csk_ctx *csk_ctx;
+ struct appctx *appctx;
+ int low, high;
+ struct sockaddr_storage *addr;
+ struct xref *peer;
+ struct stconn *sc;
+ struct stream *s;
+
+ if (lua_gettop(L) < 2)
+ WILL_LJMP(luaL_error(L, "connect: need at least 2 arguments"));
+
+ /* Get args. */
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ ip = MAY_LJMP(luaL_checkstring(L, 2));
+ if (lua_gettop(L) >= 3) {
+ luaL_Buffer b;
+ port = MAY_LJMP(luaL_checkinteger(L, 3));
+
+ /* Force the ip to end with a colon, to support IPv6 addresses
+ * that are not enclosed within square brackets.
+ */
+ if (port > 0) {
+ luaL_buffinit(L, &b);
+ luaL_addstring(&b, ip);
+ luaL_addchar(&b, ':');
+ luaL_pushresult(&b);
+ ip = lua_tolstring(L, lua_gettop(L), NULL);
+ }
+ }
+
+ /* check for connection break. If some data where read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ /* Parse ip address. */
+ addr = str2sa_range(ip, NULL, &low, &high, NULL, NULL, NULL, NULL, NULL, PA_O_PORT_OK | PA_O_STREAM);
+ if (!addr) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "connect: cannot parse destination address '%s'", ip));
+ }
+
+ /* Set port. */
+ if (low == 0) {
+ if (addr->ss_family == AF_INET) {
+ if (port == -1) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "connect: port missing"));
+ }
+ ((struct sockaddr_in *)addr)->sin_port = htons(port);
+ } else if (addr->ss_family == AF_INET6) {
+ if (port == -1) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "connect: port missing"));
+ }
+ ((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
+ }
+ }
+
+ csk_ctx = container_of(peer, struct hlua_csk_ctx, xref);
+ appctx = csk_ctx->appctx;
+ sc = appctx_sc(appctx);
+ s = __sc_strm(sc);
+
+ if (!sockaddr_alloc(&sc_opposite(sc)->dst, addr, sizeof(*addr))) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "connect: internal error"));
+ }
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua)
+ return 0;
+
+ /* inform the stream that we want to be notified whenever the
+ * connection completes.
+ */
+ applet_need_more_data(appctx);
+ applet_have_more_data(appctx);
+ appctx_wakeup(appctx);
+
+ hlua->gc_count++;
+
+ if (!notification_new(&hlua->com, &csk_ctx->wake_on_write, hlua->task)) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "out of memory"));
+ }
+ xref_unlock(&socket->xref, peer);
+
+ task_wakeup(s->task, TASK_WOKEN_INIT);
+ /* Return yield waiting for connection. */
+
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_socket_connect_yield, TICK_ETERNITY, 0));
+
+ return 0;
+}
+
+#ifdef USE_OPENSSL
+__LJMP static int hlua_socket_connect_ssl(struct lua_State *L)
+{
+ struct hlua_socket *socket;
+ struct xref *peer;
+ struct stream *s;
+
+ MAY_LJMP(check_args(L, 3, "connect_ssl"));
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+
+ /* check for connection break. If some data where read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ s = appctx_strm(container_of(peer, struct hlua_csk_ctx, xref)->appctx);
+
+ s->target = &socket_ssl->obj_type;
+ xref_unlock(&socket->xref, peer);
+ return MAY_LJMP(hlua_socket_connect(L));
+}
+#endif
+
+__LJMP static int hlua_socket_setoption(struct lua_State *L)
+{
+ return 0;
+}
+
+__LJMP static int hlua_socket_settimeout(struct lua_State *L)
+{
+ struct hlua_socket *socket;
+ int tmout;
+ double dtmout;
+ struct xref *peer;
+ struct stream *s;
+
+ MAY_LJMP(check_args(L, 2, "settimeout"));
+
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+
+ /* convert the timeout to millis */
+ dtmout = MAY_LJMP(luaL_checknumber(L, 2)) * 1000;
+
+ /* Check for negative values */
+ if (dtmout < 0)
+ WILL_LJMP(luaL_error(L, "settimeout: cannot set negatives values"));
+
+ if (dtmout > INT_MAX) /* overflow check */
+ WILL_LJMP(luaL_error(L, "settimeout: cannot set values larger than %d ms", INT_MAX));
+
+ tmout = MS_TO_TICKS((int)dtmout);
+ if (tmout == 0)
+ tmout++; /* very small timeouts are adjusted to a minimum of 1ms */
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ /* check for connection break. If some data were read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer) {
+ hlua_pusherror(L, "socket: not yet initialised, you can't set timeouts.");
+ WILL_LJMP(lua_error(L));
+ return 0;
+ }
+
+ s = appctx_strm(container_of(peer, struct hlua_csk_ctx, xref)->appctx);
+
+ s->sess->fe->timeout.connect = tmout;
+ s->req.rto = tmout;
+ s->req.wto = tmout;
+ s->res.rto = tmout;
+ s->res.wto = tmout;
+ s->req.rex = tick_add_ifset(now_ms, tmout);
+ s->req.wex = tick_add_ifset(now_ms, tmout);
+ s->res.rex = tick_add_ifset(now_ms, tmout);
+ s->res.wex = tick_add_ifset(now_ms, tmout);
+
+ s->task->expire = tick_add_ifset(now_ms, tmout);
+ task_queue(s->task);
+
+ xref_unlock(&socket->xref, peer);
+
+ lua_pushinteger(L, 1);
+ return 1;
+}
+
+__LJMP static int hlua_socket_new(lua_State *L)
+{
+ struct hlua_socket *socket;
+ struct hlua_csk_ctx *ctx;
+ struct appctx *appctx;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3)) {
+ hlua_pusherror(L, "socket: full stack");
+ goto out_fail_conf;
+ }
+
+ /* Create the object: obj[0] = userdata. */
+ lua_newtable(L);
+ socket = MAY_LJMP(lua_newuserdata(L, sizeof(*socket)));
+ lua_rawseti(L, -2, 0);
+ memset(socket, 0, sizeof(*socket));
+ socket->tid = tid;
+
+ /* Check if the various memory pools are initialized. */
+ if (!pool_head_stream || !pool_head_buffer) {
+ hlua_pusherror(L, "socket: uninitialized pools.");
+ goto out_fail_conf;
+ }
+
+ /* Pop a class stream metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_socket_ref);
+ lua_setmetatable(L, -2);
+
+ /* Create the applet context */
+ appctx = appctx_new_here(&update_applet, NULL);
+ if (!appctx) {
+ hlua_pusherror(L, "socket: out of memory");
+ goto out_fail_conf;
+ }
+ ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ ctx->connected = 0;
+ ctx->die = 0;
+ LIST_INIT(&ctx->wake_on_write);
+ LIST_INIT(&ctx->wake_on_read);
+
+ if (appctx_init(appctx) == -1) {
+ hlua_pusherror(L, "socket: fail to init applet.");
+ goto out_fail_appctx;
+ }
+
+ /* Initialise cross reference between stream and Lua socket object. */
+ xref_create(&socket->xref, &ctx->xref);
+ return 1;
+
+ out_fail_appctx:
+ appctx_free_on_early_error(appctx);
+ out_fail_conf:
+ WILL_LJMP(lua_error(L));
+ return 0;
+}
+
+/*
+ *
+ *
+ * Class Channel
+ *
+ *
+ */
+
+/* Returns the struct hlua_channel join to the class channel in the
+ * stack entry "ud" or throws an argument error.
+ */
+__LJMP static struct channel *hlua_checkchannel(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_channel_ref));
+}
+
+/* Pushes the channel onto the top of the stack. If the stask does not have a
+ * free slots, the function fails and returns 0;
+ */
+static int hlua_channel_new(lua_State *L, struct channel *channel)
+{
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ lua_newtable(L);
+ lua_pushlightuserdata(L, channel);
+ lua_rawseti(L, -2, 0);
+
+ /* Pop a class sesison metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_channel_ref);
+ lua_setmetatable(L, -2);
+ return 1;
+}
+
+/* Helper function returning a filter attached to a channel at the position <ud>
+ * in the stack, filling the current offset and length of the filter. If no
+ * filter is attached, NULL is returned and <offset> and <len> are not
+ * initialized.
+ */
+static struct filter *hlua_channel_filter(lua_State *L, int ud, struct channel *chn, size_t *offset, size_t *len)
+{
+ struct filter *filter = NULL;
+
+ if (lua_getfield(L, ud, "__filter") == LUA_TLIGHTUSERDATA) {
+ struct hlua_flt_ctx *flt_ctx;
+
+ filter = lua_touserdata (L, -1);
+ flt_ctx = filter->ctx;
+ if (hlua_filter_from_payload(filter)) {
+ *offset = flt_ctx->cur_off[CHN_IDX(chn)];
+ *len = flt_ctx->cur_len[CHN_IDX(chn)];
+ }
+ }
+
+ lua_pop(L, 1);
+ return filter;
+}
+
+/* Copies <len> bytes of data present in the channel's buffer, starting at the
+* offset <offset>, and put it in a LUA string variable. It is the caller
+* responsibility to ensure <len> and <offset> are valid. It always return the
+* length of the built string. <len> may be 0, in this case, an empty string is
+* created and 0 is returned.
+*/
+static inline int _hlua_channel_dup(struct channel *chn, lua_State *L, size_t offset, size_t len)
+{
+ size_t block1, block2;
+ luaL_Buffer b;
+
+ block1 = len;
+ if (block1 > b_contig_data(&chn->buf, b_peek_ofs(&chn->buf, offset)))
+ block1 = b_contig_data(&chn->buf, b_peek_ofs(&chn->buf, offset));
+ block2 = len - block1;
+
+ luaL_buffinit(L, &b);
+ luaL_addlstring(&b, b_peek(&chn->buf, offset), block1);
+ if (block2)
+ luaL_addlstring(&b, b_orig(&chn->buf), block2);
+ luaL_pushresult(&b);
+ return len;
+}
+
+/* Inserts the string <str> to the channel's buffer at the offset <offset>. This
+ * function returns -1 if data cannot be copied. Otherwise, it returns the
+ * number of bytes copied.
+ */
+static int _hlua_channel_insert(struct channel *chn, lua_State *L, struct ist str, size_t offset)
+{
+ int ret = 0;
+
+ /* Nothing to do, just return */
+ if (unlikely(istlen(str) == 0))
+ goto end;
+
+ if (istlen(str) > c_room(chn)) {
+ ret = -1;
+ goto end;
+ }
+ ret = b_insert_blk(&chn->buf, offset, istptr(str), istlen(str));
+
+ end:
+ return ret;
+}
+
+/* Removes <len> bytes of data at the absolute position <offset>.
+ */
+static void _hlua_channel_delete(struct channel *chn, size_t offset, size_t len)
+{
+ size_t end = offset + len;
+
+ if (b_peek(&chn->buf, end) != b_tail(&chn->buf))
+ b_move(&chn->buf, b_peek_ofs(&chn->buf, end),
+ b_data(&chn->buf) - end, -len);
+ b_sub(&chn->buf, len);
+}
+
+/* Copies input data in the channel's buffer. It is possible to set a specific
+ * offset (0 by default) and a length (all remaining input data starting for the
+ * offset by default). If there is not enough input data and more data can be
+ * received, this function yields.
+ *
+ * From an action, All input data are considered. For a filter, the offset and
+ * the length of input data to consider are retrieved from the filter context.
+ */
+__LJMP static int hlua_channel_get_data_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t input, output;
+ int offset, len;
+
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+
+ output = co_data(chn);
+ input = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &output, &input);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 1) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 2));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset < output || offset > input + output) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+ len = output + input - offset;
+ if (lua_gettop(L) == 3) {
+ len = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (!len)
+ goto dup;
+ if (len == -1)
+ len = global.tune.bufsize;
+ if (len < 0) {
+ lua_pushfstring(L, "length out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ /* Wait for more data if possible if no length was specified and there
+ * is no data or not enough data was received.
+ */
+ if (!len || offset + len > output + input) {
+ if (!HLUA_CANT_YIELD(hlua_gethlua(L)) && !channel_input_closed(chn) && channel_may_recv(chn)) {
+ /* Yield waiting for more data, as requested */
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_channel_get_data_yield, TICK_ETERNITY, 0));
+ }
+
+ /* Return 'nil' if there is no data and the channel can't receive more data */
+ if (!len) {
+ lua_pushnil(L);
+ return -1;
+ }
+
+ /* Otherwise, return all data */
+ len = output + input - offset;
+ }
+
+ dup:
+ _hlua_channel_dup(chn, L, offset, len);
+ return 1;
+}
+
+/* Copies the first line (including the trailing LF) of input data in the
+ * channel's buffer. It is possible to set a specific offset (0 by default) and
+ * a length (all remaining input data starting for the offset by default). If
+ * there is not enough input data and more data can be received, the function
+ * yields. If a length is explicitly specified, no more data are
+ * copied. Otherwise, if no LF is found and more data can be received, this
+ * function yields.
+ *
+ * From an action, All input data are considered. For a filter, the offset and
+ * the length of input data to consider are retrieved from the filter context.
+ */
+__LJMP static int hlua_channel_get_line_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t l, input, output;
+ int offset, len;
+
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ output = co_data(chn);
+ input = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &output, &input);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 1) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 2));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset < output || offset > input + output) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ len = output + input - offset;
+ if (lua_gettop(L) == 3) {
+ len = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (!len)
+ goto dup;
+ if (len == -1)
+ len = global.tune.bufsize;
+ if (len < 0) {
+ lua_pushfstring(L, "length out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ for (l = 0; l < len; l++) {
+ if (l + offset >= output + input)
+ break;
+ if (*(b_peek(&chn->buf, offset + l)) == '\n') {
+ len = l+1;
+ goto dup;
+ }
+ }
+
+ /* Wait for more data if possible if no line is found and no length was
+ * specified or not enough data was received.
+ */
+ if (lua_gettop(L) != 3 || offset + len > output + input) {
+ if (!HLUA_CANT_YIELD(hlua_gethlua(L)) && !channel_input_closed(chn) && channel_may_recv(chn)) {
+ /* Yield waiting for more data */
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_channel_get_line_yield, TICK_ETERNITY, 0));
+ }
+
+ /* Return 'nil' if there is no data and the channel can't receive more data */
+ if (!len) {
+ lua_pushnil(L);
+ return -1;
+ }
+
+ /* Otherwise, return all data */
+ len = output + input - offset;
+ }
+
+ dup:
+ _hlua_channel_dup(chn, L, offset, len);
+ return 1;
+}
+
+/* [ DEPRECATED ]
+ *
+ * Duplicate all input data foud in the channel's buffer. The data are not
+ * removed from the buffer. This function relies on _hlua_channel_dup().
+ *
+ * From an action, All input data are considered. For a filter, the offset and
+ * the length of input data to consider are retrieved from the filter context.
+ */
+__LJMP static int hlua_channel_dup(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t offset, len;
+
+ MAY_LJMP(check_args(L, 1, "dup"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ offset = co_data(chn);
+ len = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &offset, &len);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ if (!ci_data(chn) && channel_input_closed(chn)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ _hlua_channel_dup(chn, L, offset, len);
+ return 1;
+}
+
+/* [ DEPRECATED ]
+ *
+ * Get all input data foud in the channel's buffer. The data are removed from
+ * the buffer after the copy. This function relies on _hlua_channel_dup() and
+ * _hlua_channel_delete().
+ *
+ * From an action, All input data are considered. For a filter, the offset and
+ * the length of input data to consider are retrieved from the filter context.
+ */
+__LJMP static int hlua_channel_get(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t offset, len;
+ int ret;
+
+ MAY_LJMP(check_args(L, 1, "get"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ offset = co_data(chn);
+ len = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &offset, &len);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ if (!ci_data(chn) && channel_input_closed(chn)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ ret = _hlua_channel_dup(chn, L, offset, len);
+ _hlua_channel_delete(chn, offset, ret);
+ return 1;
+}
+
+/* This functions consumes and returns one line. If the channel is closed,
+ * and the last data does not contains a final '\n', the data are returned
+ * without the final '\n'. When no more data are available, it returns nil
+ * value.
+ *
+ * From an action, All input data are considered. For a filter, the offset and
+ * the length of input data to consider are retrieved from the filter context.
+ */
+__LJMP static int hlua_channel_getline_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t l, offset, len;
+ int ret;
+
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+
+ offset = co_data(chn);
+ len = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &offset, &len);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ if (!ci_data(chn) && channel_input_closed(chn)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ for (l = 0; l < len; l++) {
+ if (*(b_peek(&chn->buf, offset+l)) == '\n') {
+ len = l+1;
+ goto dup;
+ }
+ }
+
+ if (!HLUA_CANT_YIELD(hlua_gethlua(L)) && !channel_input_closed(chn) && channel_may_recv(chn)) {
+ /* Yield waiting for more data */
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_channel_getline_yield, TICK_ETERNITY, 0));
+ }
+
+ dup:
+ ret = _hlua_channel_dup(chn, L, offset, len);
+ _hlua_channel_delete(chn, offset, ret);
+ return 1;
+}
+
+/* [ DEPRECATED ]
+ *
+ * Check arguments for the function "hlua_channel_getline_yield".
+ */
+__LJMP static int hlua_channel_getline(lua_State *L)
+{
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 1, "getline"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+ return MAY_LJMP(hlua_channel_getline_yield(L, 0, 0));
+}
+
+/* Retrieves a given amount of input data at the given offset. By default all
+ * available input data are returned. The offset may be negactive to start from
+ * the end of input data. The length may be -1 to set it to the maximum buffer
+ * size.
+ */
+__LJMP static int hlua_channel_get_data(lua_State *L)
+{
+ struct channel *chn;
+
+ if (lua_gettop(L) < 1 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "'data' expects at most 2 arguments"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+ return MAY_LJMP(hlua_channel_get_data_yield(L, 0, 0));
+}
+
+/* Retrieves a given amount of input data at the given offset. By default all
+ * available input data are returned. The offset may be negactive to start from
+ * the end of input data. The length may be -1 to set it to the maximum buffer
+ * size.
+ */
+__LJMP static int hlua_channel_get_line(lua_State *L)
+{
+ struct channel *chn;
+
+ if (lua_gettop(L) < 1 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "'line' expects at most 2 arguments"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+ return MAY_LJMP(hlua_channel_get_line_yield(L, 0, 0));
+}
+
+/* Appends a string into the input side of channel. It returns the length of the
+ * written string, or -1 if the channel is closed or if the buffer size is too
+ * little for the data. 0 may be returned if nothing is copied. This function
+ * does not yield.
+ *
+ * For a filter, the context is updated on success.
+ */
+__LJMP static int hlua_channel_append(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ const char *str;
+ size_t sz, offset, len;
+ int ret;
+
+ MAY_LJMP(check_args(L, 2, "append"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ offset = co_data(chn);
+ len = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &offset, &len);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ ret = _hlua_channel_insert(chn, L, ist2(str, sz), offset);
+ if (ret > 0 && filter) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_update_offsets(filter, chn, ret);
+ flt_ctx->cur_len[CHN_IDX(chn)] += ret;
+ }
+ lua_pushinteger(L, ret);
+ return 1;
+}
+
+/* Prepends a string into the input side of channel. It returns the length of the
+ * written string, or -1 if the channel is closed or if the buffer size is too
+ * little for the data. 0 may be returned if nothing is copied. This function
+ * does not yield.
+ *
+ * For a filter, the context is updated on success.
+ */
+__LJMP static int hlua_channel_prepend(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ const char *str;
+ size_t sz, offset, len;
+ int ret;
+
+ MAY_LJMP(check_args(L, 2, "prepend"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ offset = co_data(chn);
+ len = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &offset, &len);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ ret = _hlua_channel_insert(chn, L, ist2(str, sz), offset);
+ if (ret > 0 && filter) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_update_offsets(filter, chn, ret);
+ flt_ctx->cur_len[CHN_IDX(chn)] += ret;
+ }
+
+ lua_pushinteger(L, ret);
+ return 1;
+}
+
+/* Inserts a given amount of input data at the given offset by a string
+ * content. By default the string is appended in front of input data. It
+ * returns the length of the written string, or -1 if the channel is closed or
+ * if the buffer size is too little for the data.
+ *
+ * For a filter, the context is updated on success.
+ */
+__LJMP static int hlua_channel_insert_data(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ const char *str;
+ size_t sz, input, output;
+ int ret, offset;
+
+ if (lua_gettop(L) < 2 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "'insert' expects at least 1 argument and at most 2 arguments"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+
+ output = co_data(chn);
+ input = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &output, &input);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 2) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset > output + input) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ ret = _hlua_channel_insert(chn, L, ist2(str, sz), offset);
+ if (ret > 0 && filter) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_update_offsets(filter, chn, ret);
+ flt_ctx->cur_len[CHN_IDX(chn)] += ret;
+ }
+
+ lua_pushinteger(L, ret);
+ return 1;
+}
+/* Replaces a given amount of input data at the given offset by a string
+ * content. By default all remaining data are removed (offset = 0 and len =
+ * -1). It returns the length of the written string, or -1 if the channel is
+ * closed or if the buffer size is too little for the data.
+ *
+ * For a filter, the context is updated on success.
+ */
+__LJMP static int hlua_channel_set_data(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ const char *str;
+ size_t sz, input, output;
+ int ret, offset, len;
+
+ if (lua_gettop(L) < 2 || lua_gettop(L) > 4)
+ WILL_LJMP(luaL_error(L, "'set' expects at least 1 argument and at most 3 arguments"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ output = co_data(chn);
+ input = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &output, &input);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 2) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset < output || offset > input + output) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ len = output + input - offset;
+ if (lua_gettop(L) == 4) {
+ len = MAY_LJMP(luaL_checkinteger(L, 4));
+ if (!len)
+ goto set;
+ if (len == -1)
+ len = output + input - offset;
+ if (len < 0 || offset + len > output + input) {
+ lua_pushfstring(L, "length out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ set:
+ /* Be sure we can copied the string once input data will be removed. */
+ if (sz > c_room(chn) + len)
+ lua_pushinteger(L, -1);
+ else {
+ _hlua_channel_delete(chn, offset, len);
+ ret = _hlua_channel_insert(chn, L, ist2(str, sz), offset);
+ if (filter) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ len -= (ret > 0 ? ret : 0);
+ flt_update_offsets(filter, chn, -len);
+ flt_ctx->cur_len[CHN_IDX(chn)] -= len;
+ }
+
+ lua_pushinteger(L, ret);
+ }
+ return 1;
+}
+
+/* Removes a given amount of input data at the given offset. By default all
+ * input data are removed (offset = 0 and len = -1). It returns the amount of
+ * the removed data.
+ *
+ * For a filter, the context is updated on success.
+ */
+__LJMP static int hlua_channel_del_data(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t input, output;
+ int offset, len;
+
+ if (lua_gettop(L) < 1 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "'remove' expects at most 2 arguments"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ output = co_data(chn);
+ input = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &output, &input);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 1) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 2));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset < output || offset > input + output) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ len = output + input - offset;
+ if (lua_gettop(L) == 3) {
+ len = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (!len)
+ goto end;
+ if (len == -1)
+ len = output + input - offset;
+ if (len < 0 || offset + len > output + input) {
+ lua_pushfstring(L, "length out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ _hlua_channel_delete(chn, offset, len);
+ if (filter) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_update_offsets(filter, chn, -len);
+ flt_ctx->cur_len[CHN_IDX(chn)] -= len;
+ }
+
+ end:
+ lua_pushinteger(L, len);
+ return 1;
+}
+
+/* Append data in the output side of the buffer. This data is immediately
+ * sent. The function returns the amount of data written. If the buffer
+ * cannot contain the data, the function yields. The function returns -1
+ * if the channel is closed.
+ */
+__LJMP static int hlua_channel_send_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct channel *chn;
+ struct filter *filter;
+ const char *str;
+ size_t offset, len, sz;
+ int l, ret;
+ struct hlua *hlua;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ l = MAY_LJMP(luaL_checkinteger(L, 3));
+
+ offset = co_data(chn);
+ len = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &offset, &len);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+
+ if (unlikely(channel_output_closed(chn))) {
+ lua_pushinteger(L, -1);
+ return 1;
+ }
+
+ len = c_room(chn);
+ if (len > sz -l) {
+ if (filter) {
+ lua_pushinteger(L, -1);
+ return 1;
+ }
+ len = sz - l;
+ }
+
+ ret = _hlua_channel_insert(chn, L, ist2(str, len), offset);
+ if (ret == -1) {
+ lua_pop(L, 1);
+ lua_pushinteger(L, -1);
+ return 1;
+ }
+ if (ret) {
+ if (filter) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+
+ flt_update_offsets(filter, chn, ret);
+ FLT_OFF(filter, chn) += ret;
+ flt_ctx->cur_off[CHN_IDX(chn)] += ret;
+ }
+ else
+ c_adv(chn, ret);
+
+ l += ret;
+ lua_pop(L, 1);
+ lua_pushinteger(L, l);
+ }
+
+ if (l < sz) {
+ /* Yield only if the channel's output is not empty.
+ * Otherwise it means we cannot add more data. */
+ if (co_data(chn) == 0 || HLUA_CANT_YIELD(hlua_gethlua(L)))
+ return 1;
+
+ /* If we are waiting for space in the response buffer, we
+ * must set the flag WAKERESWR. This flag required the task
+ * wake up if any activity is detected on the response buffer.
+ */
+ if (chn->flags & CF_ISRESP)
+ HLUA_SET_WAKERESWR(hlua);
+ else
+ HLUA_SET_WAKEREQWR(hlua);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_channel_send_yield, TICK_ETERNITY, 0));
+ }
+
+ return 1;
+}
+
+/* Just a wrapper of "_hlua_channel_send". This wrapper permits
+ * yield the LUA process, and resume it without checking the
+ * input arguments.
+ *
+ * This function cannot be called from a filter.
+ */
+__LJMP static int hlua_channel_send(lua_State *L)
+{
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 2, "send"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+ lua_pushinteger(L, 0);
+ return MAY_LJMP(hlua_channel_send_yield(L, 0, 0));
+}
+
+/* This function forward and amount of butes. The data pass from
+ * the input side of the buffer to the output side, and can be
+ * forwarded. This function never fails.
+ *
+ * The Lua function takes an amount of bytes to be forwarded in
+ * input. It returns the number of bytes forwarded.
+ */
+__LJMP static int hlua_channel_forward_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t offset, len, fwd;
+ int l, max;
+ struct hlua *hlua;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ fwd = MAY_LJMP(luaL_checkinteger(L, 2));
+ l = MAY_LJMP(luaL_checkinteger(L, -1));
+
+ offset = co_data(chn);
+ len = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &offset, &len);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ max = fwd - l;
+ if (max > len)
+ max = len;
+
+ if (filter) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ FLT_OFF(filter, chn) += max;
+ flt_ctx->cur_off[CHN_IDX(chn)] += max;
+ flt_ctx->cur_len[CHN_IDX(chn)] -= max;
+ }
+ else
+ channel_forward(chn, max);
+
+ l += max;
+ lua_pop(L, 1);
+ lua_pushinteger(L, l);
+
+ /* Check if it miss bytes to forward. */
+ if (l < fwd) {
+ /* The the input channel or the output channel are closed, we
+ * must return the amount of data forwarded.
+ */
+ if (channel_input_closed(chn) || channel_output_closed(chn) || HLUA_CANT_YIELD(hlua_gethlua(L)))
+ return 1;
+
+ /* If we are waiting for space data in the response buffer, we
+ * must set the flag WAKERESWR. This flag required the task
+ * wake up if any activity is detected on the response buffer.
+ */
+ if (chn->flags & CF_ISRESP)
+ HLUA_SET_WAKERESWR(hlua);
+ else
+ HLUA_SET_WAKEREQWR(hlua);
+
+ /* Otherwise, we can yield waiting for new data in the inpout side. */
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_channel_forward_yield, TICK_ETERNITY, 0));
+ }
+
+ return 1;
+}
+
+/* Just check the input and prepare the stack for the previous
+ * function "hlua_channel_forward_yield"
+ *
+ * This function cannot be called from a filter.
+ */
+__LJMP static int hlua_channel_forward(lua_State *L)
+{
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 2, "forward"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+ lua_pushinteger(L, 0);
+ return MAY_LJMP(hlua_channel_forward_yield(L, 0, 0));
+}
+
+/* Just returns the number of bytes available in the input
+ * side of the buffer. This function never fails.
+ */
+__LJMP static int hlua_channel_get_in_len(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t output, input;
+
+ MAY_LJMP(check_args(L, 1, "input"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+
+ output = co_data(chn);
+ input = ci_data(chn);
+ filter = hlua_channel_filter(L, 1, chn, &output, &input);
+ if (filter || !IS_HTX_STRM(chn_strm(chn)))
+ lua_pushinteger(L, input);
+ else {
+ struct htx *htx = htxbuf(&chn->buf);
+
+ lua_pushinteger(L, htx->data - co_data(chn));
+ }
+ return 1;
+}
+
+/* Returns true if the channel is full. */
+__LJMP static int hlua_channel_is_full(lua_State *L)
+{
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 1, "is_full"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ /* ignore the reserve, we are not on a producer side (ie in an
+ * applet).
+ */
+ lua_pushboolean(L, channel_full(chn, 0));
+ return 1;
+}
+
+/* Returns true if the channel may still receive data. */
+__LJMP static int hlua_channel_may_recv(lua_State *L)
+{
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 1, "may_recv"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ lua_pushboolean(L, (!channel_input_closed(chn) && channel_may_recv(chn)));
+ return 1;
+}
+
+/* Returns true if the channel is the response channel. */
+__LJMP static int hlua_channel_is_resp(lua_State *L)
+{
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 1, "is_resp"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+
+ lua_pushboolean(L, !!(chn->flags & CF_ISRESP));
+ return 1;
+}
+
+/* Just returns the number of bytes available in the output
+ * side of the buffer. This function never fails.
+ */
+__LJMP static int hlua_channel_get_out_len(lua_State *L)
+{
+ struct channel *chn;
+ size_t output, input;
+
+ MAY_LJMP(check_args(L, 1, "output"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+
+ output = co_data(chn);
+ input = ci_data(chn);
+ hlua_channel_filter(L, 1, chn, &output, &input);
+
+ lua_pushinteger(L, output);
+ return 1;
+}
+
+/*
+ *
+ *
+ * Class Fetches
+ *
+ *
+ */
+
+/* Returns a struct hlua_session if the stack entry "ud" is
+ * a class stream, otherwise it throws an error.
+ */
+__LJMP static struct hlua_smp *hlua_checkfetches(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_fetches_ref));
+}
+
+/* This function creates and push in the stack a fetch object according
+ * with a current TXN.
+ */
+static int hlua_fetches_new(lua_State *L, struct hlua_txn *txn, unsigned int flags)
+{
+ struct hlua_smp *hsmp;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ /* Create the object: obj[0] = userdata.
+ * Note that the base of the Fetches object is the
+ * transaction object.
+ */
+ lua_newtable(L);
+ hsmp = lua_newuserdata(L, sizeof(*hsmp));
+ lua_rawseti(L, -2, 0);
+
+ hsmp->s = txn->s;
+ hsmp->p = txn->p;
+ hsmp->dir = txn->dir;
+ hsmp->flags = flags;
+
+ /* Pop a class sesison metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_fetches_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+/* This function is an LUA binding. It is called with each sample-fetch.
+ * It uses closure argument to store the associated sample-fetch. It
+ * returns only one argument or throws an error. An error is thrown
+ * only if an error is encountered during the argument parsing. If
+ * the "sample-fetch" function fails, nil is returned.
+ */
+__LJMP static int hlua_run_sample_fetch(lua_State *L)
+{
+ struct hlua_smp *hsmp;
+ struct sample_fetch *f;
+ struct arg args[ARGM_NBARGS + 1] = {{0}};
+ int i;
+ struct sample smp;
+
+ /* Get closure arguments. */
+ f = lua_touserdata(L, lua_upvalueindex(1));
+
+ /* Get traditional arguments. */
+ hsmp = MAY_LJMP(hlua_checkfetches(L, 1));
+
+ /* Check execution authorization. */
+ if (f->use & SMP_USE_HTTP_ANY &&
+ !(hsmp->flags & HLUA_F_MAY_USE_HTTP)) {
+ lua_pushfstring(L, "the sample-fetch '%s' needs an HTTP parser which "
+ "is not available in Lua services", f->kw);
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* Get extra arguments. */
+ for (i = 0; i < lua_gettop(L) - 1; i++) {
+ if (i >= ARGM_NBARGS)
+ break;
+ hlua_lua2arg(L, i + 2, &args[i]);
+ }
+ args[i].type = ARGT_STOP;
+ args[i].data.str.area = NULL;
+
+ /* Check arguments. */
+ MAY_LJMP(hlua_lua2arg_check(L, 2, args, f->arg_mask, hsmp->p));
+
+ /* Run the special args checker. */
+ if (f->val_args && !f->val_args(args, NULL)) {
+ lua_pushfstring(L, "error in arguments");
+ goto error;
+ }
+
+ /* Initialise the sample. */
+ memset(&smp, 0, sizeof(smp));
+
+ /* Run the sample fetch process. */
+ smp_set_owner(&smp, hsmp->p, hsmp->s->sess, hsmp->s, hsmp->dir & SMP_OPT_DIR);
+ if (!f->process(args, &smp, f->kw, f->private)) {
+ if (hsmp->flags & HLUA_F_AS_STRING)
+ lua_pushstring(L, "");
+ else
+ lua_pushnil(L);
+ goto end;
+ }
+
+ /* Convert the returned sample in lua value. */
+ if (hsmp->flags & HLUA_F_AS_STRING)
+ hlua_smp2lua_str(L, &smp);
+ else
+ hlua_smp2lua(L, &smp);
+
+ end:
+ free_args(args);
+ return 1;
+
+ error:
+ free_args(args);
+ WILL_LJMP(lua_error(L));
+ return 0; /* Never reached */
+}
+
+/*
+ *
+ *
+ * Class Converters
+ *
+ *
+ */
+
+/* Returns a struct hlua_session if the stack entry "ud" is
+ * a class stream, otherwise it throws an error.
+ */
+__LJMP static struct hlua_smp *hlua_checkconverters(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_converters_ref));
+}
+
+/* This function creates and push in the stack a Converters object
+ * according with a current TXN.
+ */
+static int hlua_converters_new(lua_State *L, struct hlua_txn *txn, unsigned int flags)
+{
+ struct hlua_smp *hsmp;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ /* Create the object: obj[0] = userdata.
+ * Note that the base of the Converters object is the
+ * same than the TXN object.
+ */
+ lua_newtable(L);
+ hsmp = lua_newuserdata(L, sizeof(*hsmp));
+ lua_rawseti(L, -2, 0);
+
+ hsmp->s = txn->s;
+ hsmp->p = txn->p;
+ hsmp->dir = txn->dir;
+ hsmp->flags = flags;
+
+ /* Pop a class stream metatable and affect it to the table. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_converters_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+/* This function is an LUA binding. It is called with each converter.
+ * It uses closure argument to store the associated converter. It
+ * returns only one argument or throws an error. An error is thrown
+ * only if an error is encountered during the argument parsing. If
+ * the converter function function fails, nil is returned.
+ */
+__LJMP static int hlua_run_sample_conv(lua_State *L)
+{
+ struct hlua_smp *hsmp;
+ struct sample_conv *conv;
+ struct arg args[ARGM_NBARGS + 1] = {{0}};
+ int i;
+ struct sample smp;
+
+ /* Get closure arguments. */
+ conv = lua_touserdata(L, lua_upvalueindex(1));
+
+ /* Get traditional arguments. */
+ hsmp = MAY_LJMP(hlua_checkconverters(L, 1));
+
+ /* Get extra arguments. */
+ for (i = 0; i < lua_gettop(L) - 2; i++) {
+ if (i >= ARGM_NBARGS)
+ break;
+ hlua_lua2arg(L, i + 3, &args[i]);
+ }
+ args[i].type = ARGT_STOP;
+ args[i].data.str.area = NULL;
+
+ /* Check arguments. */
+ MAY_LJMP(hlua_lua2arg_check(L, 3, args, conv->arg_mask, hsmp->p));
+
+ /* Run the special args checker. */
+ if (conv->val_args && !conv->val_args(args, conv, "", 0, NULL)) {
+ hlua_pusherror(L, "error in arguments");
+ goto error;
+ }
+
+ /* Initialise the sample. */
+ memset(&smp, 0, sizeof(smp));
+ if (!hlua_lua2smp(L, 2, &smp)) {
+ hlua_pusherror(L, "error in the input argument");
+ goto error;
+ }
+
+ smp_set_owner(&smp, hsmp->p, hsmp->s->sess, hsmp->s, hsmp->dir & SMP_OPT_DIR);
+
+ /* Apply expected cast. */
+ if (!sample_casts[smp.data.type][conv->in_type]) {
+ hlua_pusherror(L, "invalid input argument: cannot cast '%s' to '%s'",
+ smp_to_type[smp.data.type], smp_to_type[conv->in_type]);
+ goto error;
+ }
+ if (sample_casts[smp.data.type][conv->in_type] != c_none &&
+ !sample_casts[smp.data.type][conv->in_type](&smp)) {
+ hlua_pusherror(L, "error during the input argument casting");
+ goto error;
+ }
+
+ /* Run the sample conversion process. */
+ if (!conv->process(args, &smp, conv->private)) {
+ if (hsmp->flags & HLUA_F_AS_STRING)
+ lua_pushstring(L, "");
+ else
+ lua_pushnil(L);
+ goto end;
+ }
+
+ /* Convert the returned sample in lua value. */
+ if (hsmp->flags & HLUA_F_AS_STRING)
+ hlua_smp2lua_str(L, &smp);
+ else
+ hlua_smp2lua(L, &smp);
+ end:
+ free_args(args);
+ return 1;
+
+ error:
+ free_args(args);
+ WILL_LJMP(lua_error(L));
+ return 0; /* Never reached */
+}
+
+/*
+ *
+ *
+ * Class AppletTCP
+ *
+ *
+ */
+
+/* Returns a struct hlua_txn if the stack entry "ud" is
+ * a class stream, otherwise it throws an error.
+ */
+__LJMP static struct hlua_appctx *hlua_checkapplet_tcp(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_applet_tcp_ref));
+}
+
+/* This function creates and push in the stack an Applet object
+ * according with a current TXN.
+ */
+static int hlua_applet_tcp_new(lua_State *L, struct appctx *ctx)
+{
+ struct hlua_appctx *luactx;
+ struct stream *s = appctx_strm(ctx);
+ struct proxy *p;
+
+ ALREADY_CHECKED(s);
+ p = s->be;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ /* Create the object: obj[0] = userdata.
+ * Note that the base of the Converters object is the
+ * same than the TXN object.
+ */
+ lua_newtable(L);
+ luactx = lua_newuserdata(L, sizeof(*luactx));
+ lua_rawseti(L, -2, 0);
+ luactx->appctx = ctx;
+ luactx->htxn.s = s;
+ luactx->htxn.p = p;
+
+ /* Create the "f" field that contains a list of fetches. */
+ lua_pushstring(L, "f");
+ if (!hlua_fetches_new(L, &luactx->htxn, 0))
+ return 0;
+ lua_settable(L, -3);
+
+ /* Create the "sf" field that contains a list of stringsafe fetches. */
+ lua_pushstring(L, "sf");
+ if (!hlua_fetches_new(L, &luactx->htxn, HLUA_F_AS_STRING))
+ return 0;
+ lua_settable(L, -3);
+
+ /* Create the "c" field that contains a list of converters. */
+ lua_pushstring(L, "c");
+ if (!hlua_converters_new(L, &luactx->htxn, 0))
+ return 0;
+ lua_settable(L, -3);
+
+ /* Create the "sc" field that contains a list of stringsafe converters. */
+ lua_pushstring(L, "sc");
+ if (!hlua_converters_new(L, &luactx->htxn, HLUA_F_AS_STRING))
+ return 0;
+ lua_settable(L, -3);
+
+ /* Pop a class stream metatable and affect it to the table. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_applet_tcp_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+__LJMP static int hlua_applet_tcp_set_var(lua_State *L)
+{
+ struct hlua_appctx *luactx;
+ struct stream *s;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ if (lua_gettop(L) < 3 || lua_gettop(L) > 4)
+ WILL_LJMP(luaL_error(L, "'set_var' needs between 3 and 4 arguments"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ s = luactx->htxn.s;
+
+ /* Converts the third argument in a sample. */
+ memset(&smp, 0, sizeof(smp));
+ hlua_lua2smp(L, 3, &smp);
+
+ /* Store the sample in a variable. */
+ smp_set_owner(&smp, s->be, s->sess, s, 0);
+
+ if (lua_gettop(L) == 4 && lua_toboolean(L, 4))
+ lua_pushboolean(L, vars_set_by_name_ifexist(name, len, &smp) != 0);
+ else
+ lua_pushboolean(L, vars_set_by_name(name, len, &smp) != 0);
+
+ return 1;
+}
+
+__LJMP static int hlua_applet_tcp_unset_var(lua_State *L)
+{
+ struct hlua_appctx *luactx;
+ struct stream *s;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ MAY_LJMP(check_args(L, 2, "unset_var"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ s = luactx->htxn.s;
+
+ /* Unset the variable. */
+ smp_set_owner(&smp, s->be, s->sess, s, 0);
+ lua_pushboolean(L, vars_unset_by_name_ifexist(name, len, &smp) != 0);
+ return 1;
+}
+
+__LJMP static int hlua_applet_tcp_get_var(lua_State *L)
+{
+ struct hlua_appctx *luactx;
+ struct stream *s;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ MAY_LJMP(check_args(L, 2, "get_var"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ s = luactx->htxn.s;
+
+ smp_set_owner(&smp, s->be, s->sess, s, 0);
+ if (!vars_get_by_name(name, len, &smp, NULL)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ return hlua_smp2lua(L, &smp);
+}
+
+__LJMP static int hlua_applet_tcp_set_priv(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ struct stream *s = luactx->htxn.s;
+ struct hlua *hlua;
+
+ /* Note that this hlua struct is from the session and not from the applet. */
+ if (!s->hlua)
+ return 0;
+ hlua = s->hlua;
+
+ MAY_LJMP(check_args(L, 2, "set_priv"));
+
+ /* Remove previous value. */
+ luaL_unref(L, LUA_REGISTRYINDEX, hlua->Mref);
+
+ /* Get and store new value. */
+ lua_pushvalue(L, 2); /* Copy the element 2 at the top of the stack. */
+ hlua->Mref = luaL_ref(L, LUA_REGISTRYINDEX); /* pop the previously pushed value. */
+
+ return 0;
+}
+
+__LJMP static int hlua_applet_tcp_get_priv(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ struct stream *s = luactx->htxn.s;
+ struct hlua *hlua;
+
+ /* Note that this hlua struct is from the session and not from the applet. */
+ if (!s->hlua) {
+ lua_pushnil(L);
+ return 1;
+ }
+ hlua = s->hlua;
+
+ /* Push configuration index in the stack. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, hlua->Mref);
+
+ return 1;
+}
+
+/* If expected data not yet available, it returns a yield. This function
+ * consumes the data in the buffer. It returns a string containing the
+ * data. This string can be empty.
+ */
+__LJMP static int hlua_applet_tcp_getline_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ int ret;
+ const char *blk1;
+ size_t len1;
+ const char *blk2;
+ size_t len2;
+
+ /* Read the maximum amount of data available. */
+ ret = co_getline_nc(sc_oc(sc), &blk1, &len1, &blk2, &len2);
+
+ /* Data not yet available. return yield. */
+ if (ret == 0) {
+ applet_need_more_data(luactx->appctx);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_tcp_getline_yield, TICK_ETERNITY, 0));
+ }
+
+ /* End of data: commit the total strings and return. */
+ if (ret < 0) {
+ luaL_pushresult(&luactx->b);
+ return 1;
+ }
+
+ /* Ensure that the block 2 length is usable. */
+ if (ret == 1)
+ len2 = 0;
+
+ /* don't check the max length read and don't check. */
+ luaL_addlstring(&luactx->b, blk1, len1);
+ luaL_addlstring(&luactx->b, blk2, len2);
+
+ /* Consume input channel output buffer data. */
+ co_skip(sc_oc(sc), len1 + len2);
+ luaL_pushresult(&luactx->b);
+ return 1;
+}
+
+/* Check arguments for the function "hlua_channel_get_yield". */
+__LJMP static int hlua_applet_tcp_getline(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+
+ /* Initialise the string catenation. */
+ luaL_buffinit(L, &luactx->b);
+
+ return MAY_LJMP(hlua_applet_tcp_getline_yield(L, 0, 0));
+}
+
+/* If expected data not yet available, it returns a yield. This function
+ * consumes the data in the buffer. It returns a string containing the
+ * data. This string can be empty.
+ */
+__LJMP static int hlua_applet_tcp_recv_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ size_t len = MAY_LJMP(luaL_checkinteger(L, 2));
+ int ret;
+ const char *blk1;
+ size_t len1;
+ const char *blk2;
+ size_t len2;
+
+ /* Read the maximum amount of data available. */
+ ret = co_getblk_nc(sc_oc(sc), &blk1, &len1, &blk2, &len2);
+
+ /* Data not yet available. return yield. */
+ if (ret == 0) {
+ applet_need_more_data(luactx->appctx);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_tcp_recv_yield, TICK_ETERNITY, 0));
+ }
+
+ /* End of data: commit the total strings and return. */
+ if (ret < 0) {
+ luaL_pushresult(&luactx->b);
+ return 1;
+ }
+
+ /* Ensure that the block 2 length is usable. */
+ if (ret == 1)
+ len2 = 0;
+
+ if (len == -1) {
+
+ /* If len == -1, catenate all the data avalaile and
+ * yield because we want to get all the data until
+ * the end of data stream.
+ */
+ luaL_addlstring(&luactx->b, blk1, len1);
+ luaL_addlstring(&luactx->b, blk2, len2);
+ co_skip(sc_oc(sc), len1 + len2);
+ applet_need_more_data(luactx->appctx);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_tcp_recv_yield, TICK_ETERNITY, 0));
+
+ } else {
+
+ /* Copy the first block caping to the length required. */
+ if (len1 > len)
+ len1 = len;
+ luaL_addlstring(&luactx->b, blk1, len1);
+ len -= len1;
+
+ /* Copy the second block. */
+ if (len2 > len)
+ len2 = len;
+ luaL_addlstring(&luactx->b, blk2, len2);
+ len -= len2;
+
+ /* Consume input channel output buffer data. */
+ co_skip(sc_oc(sc), len1 + len2);
+
+ /* If there is no other data available, yield waiting for new data. */
+ if (len > 0) {
+ lua_pushinteger(L, len);
+ lua_replace(L, 2);
+ applet_need_more_data(luactx->appctx);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_tcp_recv_yield, TICK_ETERNITY, 0));
+ }
+
+ /* return the result. */
+ luaL_pushresult(&luactx->b);
+ return 1;
+ }
+
+ /* we never execute this */
+ hlua_pusherror(L, "Lua: internal error");
+ WILL_LJMP(lua_error(L));
+ return 0;
+}
+
+/* Check arguments for the function "hlua_channel_get_yield". */
+__LJMP static int hlua_applet_tcp_recv(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ int len = -1;
+
+ if (lua_gettop(L) > 2)
+ WILL_LJMP(luaL_error(L, "The 'recv' function requires between 1 and 2 arguments."));
+ if (lua_gettop(L) >= 2) {
+ len = MAY_LJMP(luaL_checkinteger(L, 2));
+ lua_pop(L, 1);
+ }
+
+ /* Confirm or set the required length */
+ lua_pushinteger(L, len);
+
+ /* Initialise the string catenation. */
+ luaL_buffinit(L, &luactx->b);
+
+ return MAY_LJMP(hlua_applet_tcp_recv_yield(L, 0, 0));
+}
+
+/* Append data in the output side of the buffer. This data is immediately
+ * sent. The function returns the amount of data written. If the buffer
+ * cannot contain the data, the function yields. The function returns -1
+ * if the channel is closed.
+ */
+__LJMP static int hlua_applet_tcp_send_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ size_t len;
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ const char *str = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ int l = MAY_LJMP(luaL_checkinteger(L, 3));
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ struct channel *chn = sc_ic(sc);
+ int max;
+
+ /* Get the max amount of data which can write as input in the channel. */
+ max = channel_recv_max(chn);
+ if (max > (len - l))
+ max = len - l;
+
+ /* Copy data. */
+ ci_putblk(chn, str + l, max);
+
+ /* update counters. */
+ l += max;
+ lua_pop(L, 1);
+ lua_pushinteger(L, l);
+
+ /* If some data is not send, declares the situation to the
+ * applet, and returns a yield.
+ */
+ if (l < len) {
+ sc_need_room(sc);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_tcp_send_yield, TICK_ETERNITY, 0));
+ }
+
+ return 1;
+}
+
+/* Just a wrapper of "hlua_applet_tcp_send_yield". This wrapper permits
+ * yield the LUA process, and resume it without checking the
+ * input arguments.
+ */
+__LJMP static int hlua_applet_tcp_send(lua_State *L)
+{
+ MAY_LJMP(check_args(L, 2, "send"));
+ lua_pushinteger(L, 0);
+
+ return MAY_LJMP(hlua_applet_tcp_send_yield(L, 0, 0));
+}
+
+/*
+ *
+ *
+ * Class AppletHTTP
+ *
+ *
+ */
+
+/* Returns a struct hlua_txn if the stack entry "ud" is
+ * a class stream, otherwise it throws an error.
+ */
+__LJMP static struct hlua_appctx *hlua_checkapplet_http(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_applet_http_ref));
+}
+
+/* This function creates and push in the stack an Applet object
+ * according with a current TXN.
+ * It relies on the caller to have already reserved the room in ctx->svcctx
+ * for the local storage of hlua_http_ctx.
+ */
+static int hlua_applet_http_new(lua_State *L, struct appctx *ctx)
+{
+ struct hlua_http_ctx *http_ctx = ctx->svcctx;
+ struct hlua_appctx *luactx;
+ struct hlua_txn htxn;
+ struct stream *s = appctx_strm(ctx);
+ struct proxy *px = s->be;
+ struct htx *htx;
+ struct htx_blk *blk;
+ struct htx_sl *sl;
+ struct ist path;
+ unsigned long long len = 0;
+ int32_t pos;
+ struct http_uri_parser parser;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ /* Create the object: obj[0] = userdata.
+ * Note that the base of the Converters object is the
+ * same than the TXN object.
+ */
+ lua_newtable(L);
+ luactx = lua_newuserdata(L, sizeof(*luactx));
+ lua_rawseti(L, -2, 0);
+ luactx->appctx = ctx;
+ http_ctx->status = 200; /* Default status code returned. */
+ http_ctx->reason = NULL; /* Use default reason based on status */
+ luactx->htxn.s = s;
+ luactx->htxn.p = px;
+
+ /* Create the "f" field that contains a list of fetches. */
+ lua_pushstring(L, "f");
+ if (!hlua_fetches_new(L, &luactx->htxn, 0))
+ return 0;
+ lua_settable(L, -3);
+
+ /* Create the "sf" field that contains a list of stringsafe fetches. */
+ lua_pushstring(L, "sf");
+ if (!hlua_fetches_new(L, &luactx->htxn, HLUA_F_AS_STRING))
+ return 0;
+ lua_settable(L, -3);
+
+ /* Create the "c" field that contains a list of converters. */
+ lua_pushstring(L, "c");
+ if (!hlua_converters_new(L, &luactx->htxn, 0))
+ return 0;
+ lua_settable(L, -3);
+
+ /* Create the "sc" field that contains a list of stringsafe converters. */
+ lua_pushstring(L, "sc");
+ if (!hlua_converters_new(L, &luactx->htxn, HLUA_F_AS_STRING))
+ return 0;
+ lua_settable(L, -3);
+
+ htx = htxbuf(&s->req.buf);
+ blk = htx_get_first_blk(htx);
+ BUG_ON(!blk || htx_get_blk_type(blk) != HTX_BLK_REQ_SL);
+ sl = htx_get_blk_ptr(htx, blk);
+
+ /* Stores the request method. */
+ lua_pushstring(L, "method");
+ lua_pushlstring(L, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl));
+ lua_settable(L, -3);
+
+ /* Stores the http version. */
+ lua_pushstring(L, "version");
+ lua_pushlstring(L, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl));
+ lua_settable(L, -3);
+
+ /* creates an array of headers. hlua_http_get_headers() crates and push
+ * the array on the top of the stack.
+ */
+ lua_pushstring(L, "headers");
+ htxn.s = s;
+ htxn.p = px;
+ htxn.dir = SMP_OPT_DIR_REQ;
+ if (!hlua_http_get_headers(L, &htxn.s->txn->req))
+ return 0;
+ lua_settable(L, -3);
+
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
+ if (isttest(path)) {
+ char *p, *q, *end;
+
+ p = path.ptr;
+ end = istend(path);
+ q = p;
+ while (q < end && *q != '?')
+ q++;
+
+ /* Stores the request path. */
+ lua_pushstring(L, "path");
+ lua_pushlstring(L, p, q - p);
+ lua_settable(L, -3);
+
+ /* Stores the query string. */
+ lua_pushstring(L, "qs");
+ if (*q == '?')
+ q++;
+ lua_pushlstring(L, q, end - q);
+ lua_settable(L, -3);
+ }
+
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA)
+ len += htx_get_blksz(blk);
+ }
+ if (htx->extra != ULLONG_MAX)
+ len += htx->extra;
+
+ /* Stores the request path. */
+ lua_pushstring(L, "length");
+ lua_pushinteger(L, len);
+ lua_settable(L, -3);
+
+ /* Create an empty array of HTTP request headers. */
+ lua_pushstring(L, "response");
+ lua_newtable(L);
+ lua_settable(L, -3);
+
+ /* Pop a class stream metatable and affect it to the table. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_applet_http_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+__LJMP static int hlua_applet_http_set_var(lua_State *L)
+{
+ struct hlua_appctx *luactx;
+ struct stream *s;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ if (lua_gettop(L) < 3 || lua_gettop(L) > 4)
+ WILL_LJMP(luaL_error(L, "'set_var' needs between 3 and 4 arguments"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ s = luactx->htxn.s;
+
+ /* Converts the third argument in a sample. */
+ memset(&smp, 0, sizeof(smp));
+ hlua_lua2smp(L, 3, &smp);
+
+ /* Store the sample in a variable. */
+ smp_set_owner(&smp, s->be, s->sess, s, 0);
+
+ if (lua_gettop(L) == 4 && lua_toboolean(L, 4))
+ lua_pushboolean(L, vars_set_by_name_ifexist(name, len, &smp) != 0);
+ else
+ lua_pushboolean(L, vars_set_by_name(name, len, &smp) != 0);
+
+ return 1;
+}
+
+__LJMP static int hlua_applet_http_unset_var(lua_State *L)
+{
+ struct hlua_appctx *luactx;
+ struct stream *s;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ MAY_LJMP(check_args(L, 2, "unset_var"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ s = luactx->htxn.s;
+
+ /* Unset the variable. */
+ smp_set_owner(&smp, s->be, s->sess, s, 0);
+ lua_pushboolean(L, vars_unset_by_name_ifexist(name, len, &smp) != 0);
+ return 1;
+}
+
+__LJMP static int hlua_applet_http_get_var(lua_State *L)
+{
+ struct hlua_appctx *luactx;
+ struct stream *s;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ MAY_LJMP(check_args(L, 2, "get_var"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ s = luactx->htxn.s;
+
+ smp_set_owner(&smp, s->be, s->sess, s, 0);
+ if (!vars_get_by_name(name, len, &smp, NULL)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ return hlua_smp2lua(L, &smp);
+}
+
+__LJMP static int hlua_applet_http_set_priv(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct stream *s = luactx->htxn.s;
+ struct hlua *hlua;
+
+ /* Note that this hlua struct is from the session and not from the applet. */
+ if (!s->hlua)
+ return 0;
+ hlua = s->hlua;
+
+ MAY_LJMP(check_args(L, 2, "set_priv"));
+
+ /* Remove previous value. */
+ luaL_unref(L, LUA_REGISTRYINDEX, hlua->Mref);
+
+ /* Get and store new value. */
+ lua_pushvalue(L, 2); /* Copy the element 2 at the top of the stack. */
+ hlua->Mref = luaL_ref(L, LUA_REGISTRYINDEX); /* pop the previously pushed value. */
+
+ return 0;
+}
+
+__LJMP static int hlua_applet_http_get_priv(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct stream *s = luactx->htxn.s;
+ struct hlua *hlua;
+
+ /* Note that this hlua struct is from the session and not from the applet. */
+ if (!s->hlua) {
+ lua_pushnil(L);
+ return 1;
+ }
+ hlua = s->hlua;
+
+ /* Push configuration index in the stack. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, hlua->Mref);
+
+ return 1;
+}
+
+/* If expected data not yet available, it returns a yield. This function
+ * consumes the data in the buffer. It returns a string containing the
+ * data. This string can be empty.
+ */
+__LJMP static int hlua_applet_http_getline_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ struct channel *req = sc_oc(sc);
+ struct htx *htx;
+ struct htx_blk *blk;
+ size_t count;
+ int stop = 0;
+
+ htx = htx_from_buf(&req->buf);
+ count = co_data(req);
+ blk = htx_get_first_blk(htx);
+
+ while (count && !stop && blk) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t sz = htx_get_blksz(blk);
+ struct ist v;
+ uint32_t vlen;
+ char *nl;
+
+ vlen = sz;
+ if (vlen > count) {
+ if (type != HTX_BLK_DATA)
+ break;
+ vlen = count;
+ }
+
+ switch (type) {
+ case HTX_BLK_UNUSED:
+ break;
+
+ case HTX_BLK_DATA:
+ v = htx_get_blk_value(htx, blk);
+ v.len = vlen;
+ nl = istchr(v, '\n');
+ if (nl != NULL) {
+ stop = 1;
+ vlen = nl - v.ptr + 1;
+ }
+ luaL_addlstring(&luactx->b, v.ptr, vlen);
+ break;
+
+ case HTX_BLK_TLR:
+ case HTX_BLK_EOT:
+ stop = 1;
+ break;
+
+ default:
+ break;
+ }
+
+ c_rew(req, vlen);
+ count -= vlen;
+ if (sz == vlen)
+ blk = htx_remove_blk(htx, blk);
+ else {
+ htx_cut_data_blk(htx, blk, vlen);
+ break;
+ }
+ }
+
+ /* The message was fully consumed and no more data are expected
+ * (EOM flag set).
+ */
+ if (htx_is_empty(htx) && (req->flags & CF_EOI))
+ stop = 1;
+
+ htx_to_buf(htx, &req->buf);
+ if (!stop) {
+ applet_need_more_data(luactx->appctx);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_http_getline_yield, TICK_ETERNITY, 0));
+ }
+
+ /* return the result. */
+ luaL_pushresult(&luactx->b);
+ return 1;
+}
+
+
+/* Check arguments for the function "hlua_channel_get_yield". */
+__LJMP static int hlua_applet_http_getline(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+
+ /* Initialise the string catenation. */
+ luaL_buffinit(L, &luactx->b);
+
+ return MAY_LJMP(hlua_applet_http_getline_yield(L, 0, 0));
+}
+
+/* If expected data not yet available, it returns a yield. This function
+ * consumes the data in the buffer. It returns a string containing the
+ * data. This string can be empty.
+ */
+__LJMP static int hlua_applet_http_recv_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ struct channel *req = sc_oc(sc);
+ struct htx *htx;
+ struct htx_blk *blk;
+ size_t count;
+ int len;
+
+ htx = htx_from_buf(&req->buf);
+ len = MAY_LJMP(luaL_checkinteger(L, 2));
+ count = co_data(req);
+ blk = htx_get_head_blk(htx);
+ while (count && len && blk) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t sz = htx_get_blksz(blk);
+ struct ist v;
+ uint32_t vlen;
+
+ vlen = sz;
+ if (len > 0 && vlen > len)
+ vlen = len;
+ if (vlen > count) {
+ if (type != HTX_BLK_DATA)
+ break;
+ vlen = count;
+ }
+
+ switch (type) {
+ case HTX_BLK_UNUSED:
+ break;
+
+ case HTX_BLK_DATA:
+ v = htx_get_blk_value(htx, blk);
+ luaL_addlstring(&luactx->b, v.ptr, vlen);
+ break;
+
+ case HTX_BLK_TLR:
+ case HTX_BLK_EOT:
+ len = 0;
+ break;
+
+ default:
+ break;
+ }
+
+ c_rew(req, vlen);
+ count -= vlen;
+ if (len > 0)
+ len -= vlen;
+ if (sz == vlen)
+ blk = htx_remove_blk(htx, blk);
+ else {
+ htx_cut_data_blk(htx, blk, vlen);
+ break;
+ }
+ }
+
+ /* The message was fully consumed and no more data are expected
+ * (EOM flag set).
+ */
+ if (htx_is_empty(htx) && (req->flags & CF_EOI))
+ len = 0;
+
+ htx_to_buf(htx, &req->buf);
+
+ /* If we are no other data available, yield waiting for new data. */
+ if (len) {
+ if (len > 0) {
+ lua_pushinteger(L, len);
+ lua_replace(L, 2);
+ }
+ applet_need_more_data(luactx->appctx);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_http_recv_yield, TICK_ETERNITY, 0));
+ }
+
+ /* return the result. */
+ luaL_pushresult(&luactx->b);
+ return 1;
+}
+
+/* Check arguments for the function "hlua_channel_get_yield". */
+__LJMP static int hlua_applet_http_recv(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ int len = -1;
+
+ /* Check arguments. */
+ if (lua_gettop(L) > 2)
+ WILL_LJMP(luaL_error(L, "The 'recv' function requires between 1 and 2 arguments."));
+ if (lua_gettop(L) >= 2) {
+ len = MAY_LJMP(luaL_checkinteger(L, 2));
+ lua_pop(L, 1);
+ }
+
+ lua_pushinteger(L, len);
+
+ /* Initialise the string catenation. */
+ luaL_buffinit(L, &luactx->b);
+
+ return MAY_LJMP(hlua_applet_http_recv_yield(L, 0, 0));
+}
+
+/* Append data in the output side of the buffer. This data is immediately
+ * sent. The function returns the amount of data written. If the buffer
+ * cannot contain the data, the function yields. The function returns -1
+ * if the channel is closed.
+ */
+__LJMP static int hlua_applet_http_send_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ struct channel *res = sc_ic(sc);
+ struct htx *htx = htx_from_buf(&res->buf);
+ const char *data;
+ size_t len;
+ int l = MAY_LJMP(luaL_checkinteger(L, 3));
+ int max;
+
+ max = htx_get_max_blksz(htx, channel_htx_recv_max(res, htx));
+ if (!max)
+ goto snd_yield;
+
+ data = MAY_LJMP(luaL_checklstring(L, 2, &len));
+
+ /* Get the max amount of data which can write as input in the channel. */
+ if (max > (len - l))
+ max = len - l;
+
+ /* Copy data. */
+ max = htx_add_data(htx, ist2(data + l, max));
+ channel_add_input(res, max);
+
+ /* update counters. */
+ l += max;
+ lua_pop(L, 1);
+ lua_pushinteger(L, l);
+
+ /* If some data is not send, declares the situation to the
+ * applet, and returns a yield.
+ */
+ if (l < len) {
+ snd_yield:
+ htx_to_buf(htx, &res->buf);
+ sc_need_room(sc);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_http_send_yield, TICK_ETERNITY, 0));
+ }
+
+ htx_to_buf(htx, &res->buf);
+ return 1;
+}
+
+/* Just a wrapper of "hlua_applet_send_yield". This wrapper permits
+ * yield the LUA process, and resume it without checking the
+ * input arguments.
+ */
+__LJMP static int hlua_applet_http_send(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct hlua_http_ctx *http_ctx = luactx->appctx->svcctx;
+
+ /* We want to send some data. Headers must be sent. */
+ if (!(http_ctx->flags & APPLET_HDR_SENT)) {
+ hlua_pusherror(L, "Lua: 'send' you must call start_response() before sending data.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* This integer is used for followinf the amount of data sent. */
+ lua_pushinteger(L, 0);
+
+ return MAY_LJMP(hlua_applet_http_send_yield(L, 0, 0));
+}
+
+__LJMP static int hlua_applet_http_addheader(lua_State *L)
+{
+ const char *name;
+ int ret;
+
+ MAY_LJMP(hlua_checkapplet_http(L, 1));
+ name = MAY_LJMP(luaL_checkstring(L, 2));
+ MAY_LJMP(luaL_checkstring(L, 3));
+
+ /* Push in the stack the "response" entry. */
+ ret = lua_getfield(L, 1, "response");
+ if (ret != LUA_TTABLE) {
+ hlua_pusherror(L, "Lua: 'add_header' internal error: AppletHTTP['response'] "
+ "is expected as an array. %s found", lua_typename(L, ret));
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* check if the header is already registered if it is not
+ * the case, register it.
+ */
+ ret = lua_getfield(L, -1, name);
+ if (ret == LUA_TNIL) {
+
+ /* Entry not found. */
+ lua_pop(L, 1); /* remove the nil. The "response" table is the top of the stack. */
+
+ /* Insert the new header name in the array in the top of the stack.
+ * It left the new array in the top of the stack.
+ */
+ lua_newtable(L);
+ lua_pushvalue(L, 2);
+ lua_pushvalue(L, -2);
+ lua_settable(L, -4);
+
+ } else if (ret != LUA_TTABLE) {
+
+ /* corruption error. */
+ hlua_pusherror(L, "Lua: 'add_header' internal error: AppletHTTP['response']['%s'] "
+ "is expected as an array. %s found", name, lua_typename(L, ret));
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* Now the top of thestack is an array of values. We push
+ * the header value as new entry.
+ */
+ lua_pushvalue(L, 3);
+ ret = lua_rawlen(L, -2);
+ lua_rawseti(L, -2, ret + 1);
+ lua_pushboolean(L, 1);
+ return 1;
+}
+
+__LJMP static int hlua_applet_http_status(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ int status = MAY_LJMP(luaL_checkinteger(L, 2));
+ const char *reason = MAY_LJMP(luaL_optlstring(L, 3, NULL, NULL));
+ struct hlua_http_ctx *http_ctx = luactx->appctx->svcctx;
+
+ if (status < 100 || status > 599) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+
+ http_ctx->status = status;
+ http_ctx->reason = reason;
+ lua_pushboolean(L, 1);
+ return 1;
+}
+
+
+__LJMP static int hlua_applet_http_send_response(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct hlua_http_ctx *http_ctx = luactx->appctx->svcctx;
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ struct channel *res = sc_ic(sc);
+ struct htx *htx;
+ struct htx_sl *sl;
+ struct h1m h1m;
+ const char *status, *reason;
+ const char *name, *value;
+ size_t nlen, vlen;
+ unsigned int flags;
+
+ /* Send the message at once. */
+ htx = htx_from_buf(&res->buf);
+ h1m_init_res(&h1m);
+
+ /* Use the same http version than the request. */
+ status = ultoa_r(http_ctx->status, trash.area, trash.size);
+ reason = http_ctx->reason;
+ if (reason == NULL)
+ reason = http_get_reason(http_ctx->status);
+ if (http_ctx->flags & APPLET_HTTP11) {
+ flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), ist(status), ist(reason));
+ }
+ else {
+ flags = HTX_SL_F_IS_RESP;
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.0"), ist(status), ist(reason));
+ }
+ if (!sl) {
+ hlua_pusherror(L, "Lua applet http '%s': Failed to create response.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name);
+ WILL_LJMP(lua_error(L));
+ }
+ sl->info.res.status = http_ctx->status;
+
+ /* Get the array associated to the field "response" in the object AppletHTTP. */
+ lua_pushvalue(L, 0);
+ if (lua_getfield(L, 1, "response") != LUA_TTABLE) {
+ hlua_pusherror(L, "Lua applet http '%s': AppletHTTP['response'] missing.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name);
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* Browse the list of headers. */
+ lua_pushnil(L);
+ while(lua_next(L, -2) != 0) {
+ /* We expect a string as -2. */
+ if (lua_type(L, -2) != LUA_TSTRING) {
+ hlua_pusherror(L, "Lua applet http '%s': AppletHTTP['response'][] element must be a string. got %s.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name,
+ lua_typename(L, lua_type(L, -2)));
+ WILL_LJMP(lua_error(L));
+ }
+ name = lua_tolstring(L, -2, &nlen);
+
+ /* We expect an array as -1. */
+ if (lua_type(L, -1) != LUA_TTABLE) {
+ hlua_pusherror(L, "Lua applet http '%s': AppletHTTP['response']['%s'] element must be an table. got %s.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name,
+ name,
+ lua_typename(L, lua_type(L, -1)));
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* Browse the table who is on the top of the stack. */
+ lua_pushnil(L);
+ while(lua_next(L, -2) != 0) {
+ int id;
+
+ /* We expect a number as -2. */
+ if (lua_type(L, -2) != LUA_TNUMBER) {
+ hlua_pusherror(L, "Lua applet http '%s': AppletHTTP['response']['%s'][] element must be a number. got %s.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name,
+ name,
+ lua_typename(L, lua_type(L, -2)));
+ WILL_LJMP(lua_error(L));
+ }
+ id = lua_tointeger(L, -2);
+
+ /* We expect a string as -2. */
+ if (lua_type(L, -1) != LUA_TSTRING) {
+ hlua_pusherror(L, "Lua applet http '%s': AppletHTTP['response']['%s'][%d] element must be a string. got %s.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name,
+ name, id,
+ lua_typename(L, lua_type(L, -1)));
+ WILL_LJMP(lua_error(L));
+ }
+ value = lua_tolstring(L, -1, &vlen);
+
+ /* Simple Protocol checks. */
+ if (isteqi(ist2(name, nlen), ist("transfer-encoding"))) {
+ int ret;
+
+ ret = h1_parse_xfer_enc_header(&h1m, ist2(value, vlen));
+ if (ret < 0) {
+ hlua_pusherror(L, "Lua applet http '%s': Invalid '%s' header.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name,
+ name);
+ WILL_LJMP(lua_error(L));
+ }
+ else if (ret == 0)
+ goto next; /* Skip it */
+ }
+ else if (isteqi(ist2(name, nlen), ist("content-length"))) {
+ struct ist v = ist2(value, vlen);
+ int ret;
+
+ ret = h1_parse_cont_len_header(&h1m, &v);
+ if (ret < 0) {
+ hlua_pusherror(L, "Lua applet http '%s': Invalid '%s' header.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name,
+ name);
+ WILL_LJMP(lua_error(L));
+ }
+ else if (ret == 0)
+ goto next; /* Skip it */
+ }
+
+ /* Add a new header */
+ if (!htx_add_header(htx, ist2(name, nlen), ist2(value, vlen))) {
+ hlua_pusherror(L, "Lua applet http '%s': Failed to add header '%s' in the response.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name,
+ name);
+ WILL_LJMP(lua_error(L));
+ }
+ next:
+ /* Remove the array from the stack, and get next element with a remaining string. */
+ lua_pop(L, 1);
+ }
+
+ /* Remove the array from the stack, and get next element with a remaining string. */
+ lua_pop(L, 1);
+ }
+
+ if (h1m.flags & H1_MF_CHNK)
+ h1m.flags &= ~H1_MF_CLEN;
+ if (h1m.flags & (H1_MF_CLEN|H1_MF_CHNK))
+ h1m.flags |= H1_MF_XFER_LEN;
+
+ /* Uset HTX start-line flags */
+ if (h1m.flags & H1_MF_XFER_ENC)
+ flags |= HTX_SL_F_XFER_ENC;
+ if (h1m.flags & H1_MF_XFER_LEN) {
+ flags |= HTX_SL_F_XFER_LEN;
+ if (h1m.flags & H1_MF_CHNK)
+ flags |= HTX_SL_F_CHNK;
+ else if (h1m.flags & H1_MF_CLEN)
+ flags |= HTX_SL_F_CLEN;
+ if (h1m.body_len == 0)
+ flags |= HTX_SL_F_BODYLESS;
+ }
+ sl->flags |= flags;
+
+ /* If we don't have a content-length set, and the HTTP version is 1.1
+ * and the status code implies the presence of a message body, we must
+ * announce a transfer encoding chunked. This is required by haproxy
+ * for the keepalive compliance. If the applet announces a transfer-encoding
+ * chunked itself, don't do anything.
+ */
+ if ((flags & (HTX_SL_F_VER_11|HTX_SL_F_XFER_LEN)) == HTX_SL_F_VER_11 &&
+ http_ctx->status >= 200 && http_ctx->status != 204 && http_ctx->status != 304) {
+ /* Add a new header */
+ sl->flags |= (HTX_SL_F_XFER_ENC|H1_MF_CHNK|H1_MF_XFER_LEN);
+ if (!htx_add_header(htx, ist("transfer-encoding"), ist("chunked"))) {
+ hlua_pusherror(L, "Lua applet http '%s': Failed to add header 'transfer-encoding' in the response.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name);
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ /* Finalize headers. */
+ if (!htx_add_endof(htx, HTX_BLK_EOH)) {
+ hlua_pusherror(L, "Lua applet http '%s': Failed create the response.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name);
+ WILL_LJMP(lua_error(L));
+ }
+
+ if (htx_used_space(htx) > b_size(&res->buf) - global.tune.maxrewrite) {
+ b_reset(&res->buf);
+ hlua_pusherror(L, "Lua: 'start_response': response header block too big");
+ WILL_LJMP(lua_error(L));
+ }
+
+ htx_to_buf(htx, &res->buf);
+ channel_add_input(res, htx->data);
+
+ /* Headers sent, set the flag. */
+ http_ctx->flags |= APPLET_HDR_SENT;
+ return 0;
+
+}
+/* We will build the status line and the headers of the HTTP response.
+ * We will try send at once if its not possible, we give back the hand
+ * waiting for more room.
+ */
+__LJMP static int hlua_applet_http_start_response_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ struct channel *res = sc_ic(sc);
+
+ if (co_data(res)) {
+ sc_need_room(sc);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_http_start_response_yield, TICK_ETERNITY, 0));
+ }
+ return MAY_LJMP(hlua_applet_http_send_response(L));
+}
+
+
+__LJMP static int hlua_applet_http_start_response(lua_State *L)
+{
+ return MAY_LJMP(hlua_applet_http_start_response_yield(L, 0, 0));
+}
+
+/*
+ *
+ *
+ * Class HTTP
+ *
+ *
+ */
+
+/* Returns a struct hlua_txn if the stack entry "ud" is
+ * a class stream, otherwise it throws an error.
+ */
+__LJMP static struct hlua_txn *hlua_checkhttp(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_http_ref));
+}
+
+/* This function creates and push in the stack a HTTP object
+ * according with a current TXN.
+ */
+static int hlua_http_new(lua_State *L, struct hlua_txn *txn)
+{
+ struct hlua_txn *htxn;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ /* Create the object: obj[0] = userdata.
+ * Note that the base of the Converters object is the
+ * same than the TXN object.
+ */
+ lua_newtable(L);
+ htxn = lua_newuserdata(L, sizeof(*htxn));
+ lua_rawseti(L, -2, 0);
+
+ htxn->s = txn->s;
+ htxn->p = txn->p;
+ htxn->dir = txn->dir;
+ htxn->flags = txn->flags;
+
+ /* Pop a class stream metatable and affect it to the table. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_http_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+/* This function creates and returns an array containing the status-line
+ * elements. This function does not fails.
+ */
+__LJMP static int hlua_http_get_stline(lua_State *L, struct htx_sl *sl)
+{
+ /* Create the table. */
+ lua_newtable(L);
+
+ if (sl->flags & HTX_SL_F_IS_RESP) {
+ lua_pushstring(L, "version");
+ lua_pushlstring(L, HTX_SL_RES_VPTR(sl), HTX_SL_RES_VLEN(sl));
+ lua_settable(L, -3);
+ lua_pushstring(L, "code");
+ lua_pushlstring(L, HTX_SL_RES_CPTR(sl), HTX_SL_RES_CLEN(sl));
+ lua_settable(L, -3);
+ lua_pushstring(L, "reason");
+ lua_pushlstring(L, HTX_SL_RES_RPTR(sl), HTX_SL_RES_RLEN(sl));
+ lua_settable(L, -3);
+ }
+ else {
+ lua_pushstring(L, "method");
+ lua_pushlstring(L, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl));
+ lua_settable(L, -3);
+ lua_pushstring(L, "uri");
+ lua_pushlstring(L, HTX_SL_REQ_UPTR(sl), HTX_SL_REQ_ULEN(sl));
+ lua_settable(L, -3);
+ lua_pushstring(L, "version");
+ lua_pushlstring(L, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl));
+ lua_settable(L, -3);
+ }
+ return 1;
+}
+
+/* This function creates ans returns an array of HTTP headers.
+ * This function does not fails. It is used as wrapper with the
+ * 2 following functions.
+ */
+__LJMP static int hlua_http_get_headers(lua_State *L, struct http_msg *msg)
+{
+ struct htx *htx;
+ int32_t pos;
+
+ /* Create the table. */
+ lua_newtable(L);
+
+
+ htx = htxbuf(&msg->chn->buf);
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ struct ist n, v;
+ int len;
+
+ if (type == HTX_BLK_HDR) {
+ n = htx_get_blk_name(htx,blk);
+ v = htx_get_blk_value(htx, blk);
+ }
+ else if (type == HTX_BLK_EOH)
+ break;
+ else
+ continue;
+
+ /* Check for existing entry:
+ * assume that the table is on the top of the stack, and
+ * push the key in the stack, the function lua_gettable()
+ * perform the lookup.
+ */
+ lua_pushlstring(L, n.ptr, n.len);
+ lua_gettable(L, -2);
+
+ switch (lua_type(L, -1)) {
+ case LUA_TNIL:
+ /* Table not found, create it. */
+ lua_pop(L, 1); /* remove the nil value. */
+ lua_pushlstring(L, n.ptr, n.len); /* push the header name as key. */
+ lua_newtable(L); /* create and push empty table. */
+ lua_pushlstring(L, v.ptr, v.len); /* push header value. */
+ lua_rawseti(L, -2, 0); /* index header value (pop it). */
+ lua_rawset(L, -3); /* index new table with header name (pop the values). */
+ break;
+
+ case LUA_TTABLE:
+ /* Entry found: push the value in the table. */
+ len = lua_rawlen(L, -1);
+ lua_pushlstring(L, v.ptr, v.len); /* push header value. */
+ lua_rawseti(L, -2, len+1); /* index header value (pop it). */
+ lua_pop(L, 1); /* remove the table (it is stored in the main table). */
+ break;
+
+ default:
+ /* Other cases are errors. */
+ hlua_pusherror(L, "internal error during the parsing of headers.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+ return 1;
+}
+
+__LJMP static int hlua_http_req_get_headers(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 1, "req_get_headers"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_get_headers(L, &htxn->s->txn->req);
+}
+
+__LJMP static int hlua_http_res_get_headers(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 1, "res_get_headers"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_RES || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_get_headers(L, &htxn->s->txn->rsp);
+}
+
+/* This function replace full header, or just a value in
+ * the request or in the response. It is a wrapper fir the
+ * 4 following functions.
+ */
+__LJMP static inline int hlua_http_rep_hdr(lua_State *L, struct http_msg *msg, int full)
+{
+ size_t name_len;
+ const char *name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+ const char *reg = MAY_LJMP(luaL_checkstring(L, 3));
+ const char *value = MAY_LJMP(luaL_checkstring(L, 4));
+ struct htx *htx;
+ struct my_regex *re;
+
+ if (!(re = regex_comp(reg, 1, 1, NULL)))
+ WILL_LJMP(luaL_argerror(L, 3, "invalid regex"));
+
+ htx = htxbuf(&msg->chn->buf);
+ http_replace_hdrs(chn_strm(msg->chn), htx, ist2(name, name_len), value, re, full);
+ regex_free(re);
+ return 0;
+}
+
+__LJMP static int hlua_http_req_rep_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 4, "req_rep_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return MAY_LJMP(hlua_http_rep_hdr(L, &htxn->s->txn->req, 1));
+}
+
+__LJMP static int hlua_http_res_rep_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 4, "res_rep_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_RES || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return MAY_LJMP(hlua_http_rep_hdr(L, &htxn->s->txn->rsp, 1));
+}
+
+__LJMP static int hlua_http_req_rep_val(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 4, "req_rep_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return MAY_LJMP(hlua_http_rep_hdr(L, &htxn->s->txn->req, 0));
+}
+
+__LJMP static int hlua_http_res_rep_val(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 4, "res_rep_val"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_RES || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return MAY_LJMP(hlua_http_rep_hdr(L, &htxn->s->txn->rsp, 0));
+}
+
+/* This function deletes all the occurrences of an header.
+ * It is a wrapper for the 2 following functions.
+ */
+__LJMP static inline int hlua_http_del_hdr(lua_State *L, struct http_msg *msg)
+{
+ size_t len;
+ const char *name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct http_hdr_ctx ctx;
+
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist2(name, len), &ctx, 1))
+ http_remove_header(htx, &ctx);
+ return 0;
+}
+
+__LJMP static int hlua_http_req_del_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "req_del_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_del_hdr(L, &htxn->s->txn->req);
+}
+
+__LJMP static int hlua_http_res_del_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "res_del_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_RES || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_del_hdr(L, &htxn->s->txn->rsp);
+}
+
+/* This function adds an header. It is a wrapper used by
+ * the 2 following functions.
+ */
+__LJMP static inline int hlua_http_add_hdr(lua_State *L, struct http_msg *msg)
+{
+ size_t name_len;
+ const char *name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+ size_t value_len;
+ const char *value = MAY_LJMP(luaL_checklstring(L, 3, &value_len));
+ struct htx *htx = htxbuf(&msg->chn->buf);
+
+ lua_pushboolean(L, http_add_header(htx, ist2(name, name_len),
+ ist2(value, value_len)));
+ return 0;
+}
+
+__LJMP static int hlua_http_req_add_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 3, "req_add_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_add_hdr(L, &htxn->s->txn->req);
+}
+
+__LJMP static int hlua_http_res_add_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 3, "res_add_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_RES || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_add_hdr(L, &htxn->s->txn->rsp);
+}
+
+static int hlua_http_req_set_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 3, "req_set_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ hlua_http_del_hdr(L, &htxn->s->txn->req);
+ return hlua_http_add_hdr(L, &htxn->s->txn->req);
+}
+
+static int hlua_http_res_set_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 3, "res_set_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_RES || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ hlua_http_del_hdr(L, &htxn->s->txn->rsp);
+ return hlua_http_add_hdr(L, &htxn->s->txn->rsp);
+}
+
+/* This function set the method. */
+static int hlua_http_req_set_meth(lua_State *L)
+{
+ struct hlua_txn *htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+ size_t name_len;
+ const char *name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ lua_pushboolean(L, http_req_replace_stline(0, name, name_len, htxn->p, htxn->s) != -1);
+ return 1;
+}
+
+/* This function set the method. */
+static int hlua_http_req_set_path(lua_State *L)
+{
+ struct hlua_txn *htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+ size_t name_len;
+ const char *name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ lua_pushboolean(L, http_req_replace_stline(1, name, name_len, htxn->p, htxn->s) != -1);
+ return 1;
+}
+
+/* This function set the query-string. */
+static int hlua_http_req_set_query(lua_State *L)
+{
+ struct hlua_txn *htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+ size_t name_len;
+ const char *name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ /* Check length. */
+ if (name_len > trash.size - 1) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+
+ /* Add the mark question as prefix. */
+ chunk_reset(&trash);
+ trash.area[trash.data++] = '?';
+ memcpy(trash.area + trash.data, name, name_len);
+ trash.data += name_len;
+
+ lua_pushboolean(L,
+ http_req_replace_stline(2, trash.area, trash.data, htxn->p, htxn->s) != -1);
+ return 1;
+}
+
+/* This function set the uri. */
+static int hlua_http_req_set_uri(lua_State *L)
+{
+ struct hlua_txn *htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+ size_t name_len;
+ const char *name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ lua_pushboolean(L, http_req_replace_stline(3, name, name_len, htxn->p, htxn->s) != -1);
+ return 1;
+}
+
+/* This function set the response code & optionally reason. */
+static int hlua_http_res_set_status(lua_State *L)
+{
+ struct hlua_txn *htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+ unsigned int code = MAY_LJMP(luaL_checkinteger(L, 2));
+ const char *str = MAY_LJMP(luaL_optlstring(L, 3, NULL, NULL));
+ const struct ist reason = ist2(str, (str ? strlen(str) : 0));
+
+ if (htxn->dir != SMP_OPT_DIR_RES || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ http_res_set_status(code, reason, htxn->s);
+ return 0;
+}
+
+/*
+ *
+ *
+ * Class HTTPMessage
+ *
+ *
+ */
+
+/* Returns a struct http_msg if the stack entry "ud" is a class HTTPMessage,
+ * otherwise it throws an error.
+ */
+__LJMP static struct http_msg *hlua_checkhttpmsg(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_http_msg_ref));
+}
+
+/* Creates and pushes on the stack a HTTP object according with a current TXN.
+ */
+static int hlua_http_msg_new(lua_State *L, struct http_msg *msg)
+{
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ lua_newtable(L);
+ lua_pushlightuserdata(L, msg);
+ lua_rawseti(L, -2, 0);
+
+ /* Create the "channel" field that contains the request channel object. */
+ lua_pushstring(L, "channel");
+ if (!hlua_channel_new(L, msg->chn))
+ return 0;
+ lua_rawset(L, -3);
+
+ /* Pop a class stream metatable and affect it to the table. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_http_msg_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+/* Helper function returning a filter attached to the HTTP message at the
+ * position <ud> in the stack, filling the current offset and length of the
+ * filter. If no filter is attached, NULL is returned and <offset> and <len> are
+ * filled with output and input length respectively.
+ */
+static struct filter *hlua_http_msg_filter(lua_State *L, int ud, struct http_msg *msg, size_t *offset, size_t *len)
+{
+ struct channel *chn = msg->chn;
+ struct htx *htx = htxbuf(&chn->buf);
+ struct filter *filter = NULL;
+
+ *offset = co_data(msg->chn);
+ *len = htx->data - co_data(msg->chn);
+
+ if (lua_getfield(L, ud, "__filter") == LUA_TLIGHTUSERDATA) {
+ filter = lua_touserdata (L, -1);
+ if (msg->msg_state >= HTTP_MSG_DATA) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ *offset = flt_ctx->cur_off[CHN_IDX(chn)];
+ *len = flt_ctx->cur_len[CHN_IDX(chn)];
+ }
+ }
+
+ lua_pop(L, 1);
+ return filter;
+}
+
+/* Returns true if the channel attached to the HTTP message is the response
+ * channel.
+ */
+__LJMP static int hlua_http_msg_is_resp(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 1, "is_resp"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ lua_pushboolean(L, !!(msg->chn->flags & CF_ISRESP));
+ return 1;
+}
+
+/* Returns an array containing the elements status-line of the HTTP message. It relies
+ * on hlua_http_get_stline().
+ */
+__LJMP static int hlua_http_msg_get_stline(lua_State *L)
+{
+ struct http_msg *msg;
+ struct htx *htx;
+ struct htx_sl *sl;
+
+ MAY_LJMP(check_args(L, 1, "get_stline"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ htx = htxbuf(&msg->chn->buf);
+ sl = http_get_stline(htx);
+ if (!sl)
+ return 0;
+ return hlua_http_get_stline(L, sl);
+}
+
+/* Returns an array containing all headers of the HTTP message. it relies on
+ * hlua_http_get_headers().
+ */
+__LJMP static int hlua_http_msg_get_headers(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 1, "get_headers"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_get_headers(L, msg);
+}
+
+/* Deletes all occurrences of an header in the HTTP message matching on its
+ * name. It relies on hlua_http_del_hdr().
+ */
+__LJMP static int hlua_http_msg_del_hdr(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 2, "del_header"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_del_hdr(L, msg);
+}
+
+/* Matches the full value line of all occurrences of an header in the HTTP
+ * message given its name against a regex and replaces it if it matches. It
+ * relies on hlua_http_rep_hdr().
+ */
+__LJMP static int hlua_http_msg_rep_hdr(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 4, "rep_header"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_rep_hdr(L, msg, 1);
+}
+
+/* Matches all comma-separated values of all occurrences of an header in the HTTP
+ * message given its name against a regex and replaces it if it matches. It
+ * relies on hlua_http_rep_hdr().
+ */
+__LJMP static int hlua_http_msg_rep_val(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 4, "rep_value"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_rep_hdr(L, msg, 0);
+}
+
+/* Add an header in the HTTP message. It relies on hlua_http_add_hdr() */
+__LJMP static int hlua_http_msg_add_hdr(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 3, "add_header"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_add_hdr(L, msg);
+}
+
+/* Add an header in the HTTP message removing existing headers with the same
+ * name. It relies on hlua_http_del_hdr() and hlua_http_add_hdr().
+ */
+__LJMP static int hlua_http_msg_set_hdr(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 3, "set_header"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ hlua_http_del_hdr(L, msg);
+ return hlua_http_add_hdr(L, msg);
+}
+
+/* Rewrites the request method. It relies on http_req_replace_stline(). */
+__LJMP static int hlua_http_msg_set_meth(lua_State *L)
+{
+ struct stream *s;
+ struct http_msg *msg;
+ const char *name;
+ size_t name_len;
+
+ MAY_LJMP(check_args(L, 2, "set_method"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if ((msg->chn->flags & CF_ISRESP) || msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ s = chn_strm(msg->chn);
+ lua_pushboolean(L, http_req_replace_stline(0, name, name_len, s->be, s) != -1);
+ return 1;
+}
+
+/* Rewrites the request path. It relies on http_req_replace_stline(). */
+__LJMP static int hlua_http_msg_set_path(lua_State *L)
+{
+ struct stream *s;
+ struct http_msg *msg;
+ const char *name;
+ size_t name_len;
+
+ MAY_LJMP(check_args(L, 2, "set_path"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if ((msg->chn->flags & CF_ISRESP) || msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ s = chn_strm(msg->chn);
+ lua_pushboolean(L, http_req_replace_stline(1, name, name_len, s->be, s) != -1);
+ return 1;
+}
+
+/* Rewrites the request query-string. It relies on http_req_replace_stline(). */
+__LJMP static int hlua_http_msg_set_query(lua_State *L)
+{
+ struct stream *s;
+ struct http_msg *msg;
+ const char *name;
+ size_t name_len;
+
+ MAY_LJMP(check_args(L, 2, "set_query"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if ((msg->chn->flags & CF_ISRESP) || msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ /* Check length. */
+ if (name_len > trash.size - 1) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+
+ /* Add the mark question as prefix. */
+ chunk_reset(&trash);
+ trash.area[trash.data++] = '?';
+ memcpy(trash.area + trash.data, name, name_len);
+ trash.data += name_len;
+
+ s = chn_strm(msg->chn);
+ lua_pushboolean(L, http_req_replace_stline(2, trash.area, trash.data, s->be, s) != -1);
+ return 1;
+}
+
+/* Rewrites the request URI. It relies on http_req_replace_stline(). */
+__LJMP static int hlua_http_msg_set_uri(lua_State *L)
+{
+ struct stream *s;
+ struct http_msg *msg;
+ const char *name;
+ size_t name_len;
+
+ MAY_LJMP(check_args(L, 2, "set_uri"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if ((msg->chn->flags & CF_ISRESP) || msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ s = chn_strm(msg->chn);
+ lua_pushboolean(L, http_req_replace_stline(3, name, name_len, s->be, s) != -1);
+ return 1;
+}
+
+/* Rewrites the response status code. It relies on http_res_set_status(). */
+__LJMP static int hlua_http_msg_set_status(lua_State *L)
+{
+ struct http_msg *msg;
+ unsigned int code;
+ const char *reason;
+ size_t reason_len;
+
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ code = MAY_LJMP(luaL_checkinteger(L, 2));
+ reason = MAY_LJMP(luaL_optlstring(L, 3, NULL, &reason_len));
+
+ if (!(msg->chn->flags & CF_ISRESP) || msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ lua_pushboolean(L, http_res_set_status(code, ist2(reason, reason_len), chn_strm(msg->chn)) != -1);
+ return 1;
+}
+
+/* Returns true if the HTTP message is full. */
+__LJMP static int hlua_http_msg_is_full(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 1, "is_full"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ lua_pushboolean(L, channel_full(msg->chn, 0));
+ return 1;
+}
+
+/* Returns true if the HTTP message may still receive data. */
+__LJMP static int hlua_http_msg_may_recv(lua_State *L)
+{
+ struct http_msg *msg;
+ struct htx *htx;
+
+ MAY_LJMP(check_args(L, 1, "may_recv"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ htx = htxbuf(&msg->chn->buf);
+ lua_pushboolean(L, (htx_expect_more(htx) && !channel_input_closed(msg->chn) && channel_may_recv(msg->chn)));
+ return 1;
+}
+
+/* Returns true if the HTTP message EOM was received */
+__LJMP static int hlua_http_msg_is_eom(lua_State *L)
+{
+ struct http_msg *msg;
+ struct htx *htx;
+
+ MAY_LJMP(check_args(L, 1, "may_recv"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ htx = htxbuf(&msg->chn->buf);
+ lua_pushboolean(L, !htx_expect_more(htx));
+ return 1;
+}
+
+/* Returns the number of bytes available in the input side of the HTTP
+ * message. This function never fails.
+ */
+__LJMP static int hlua_http_msg_get_in_len(lua_State *L)
+{
+ struct http_msg *msg;
+ size_t output, input;
+
+ MAY_LJMP(check_args(L, 1, "input"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ hlua_http_msg_filter(L, 1, msg, &output, &input);
+ lua_pushinteger(L, input);
+ return 1;
+}
+
+/* Returns the number of bytes available in the output side of the HTTP
+ * message. This function never fails.
+ */
+__LJMP static int hlua_http_msg_get_out_len(lua_State *L)
+{
+ struct http_msg *msg;
+ size_t output, input;
+
+ MAY_LJMP(check_args(L, 1, "output"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ hlua_http_msg_filter(L, 1, msg, &output, &input);
+ lua_pushinteger(L, output);
+ return 1;
+}
+
+/* Copies at most <len> bytes of DATA blocks from the HTTP message <msg>
+ * starting at the offset <offset> and put it in a string LUA variables. It
+ * returns the built string length. It stops on the first non-DATA HTX
+ * block. This function is called during the payload filtering, so the headers
+ * are already scheduled for output (from the filter point of view).
+ */
+static int _hlua_http_msg_dup(struct http_msg *msg, lua_State *L, size_t offset, size_t len)
+{
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct htx_blk *blk;
+ struct htx_ret htxret;
+ luaL_Buffer b;
+ int ret = 0;
+
+ luaL_buffinit(L, &b);
+ htxret = htx_find_offset(htx, offset);
+ for (blk = htxret.blk, offset = htxret.ret; blk && len; blk = htx_get_next_blk(htx, blk)) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ struct ist v;
+
+ switch (type) {
+ case HTX_BLK_UNUSED:
+ break;
+
+ case HTX_BLK_DATA:
+ v = htx_get_blk_value(htx, blk);
+ v = istadv(v, offset);
+ v = isttrim(v, len);
+
+ luaL_addlstring(&b, v.ptr, v.len);
+ ret += v.len;
+ break;
+
+ default:
+ if (!ret)
+ goto no_data;
+ goto end;
+ }
+ offset = 0;
+ }
+
+end:
+ if (!ret && (htx->flags & HTX_FL_EOM))
+ goto no_data;
+ luaL_pushresult(&b);
+ return ret;
+
+ no_data:
+ /* Remove the empty string and push nil on the stack */
+ lua_pop(L, 1);
+ lua_pushnil(L);
+ return 0;
+}
+
+/* Copies the string <str> to the HTTP message <msg> at the offset
+ * <offset>. This function returns -1 if data cannot be copied. Otherwise, it
+ * returns the amount of data written. This function is responsible to update
+ * the filter context.
+ */
+static int _hlua_http_msg_insert(struct http_msg *msg, struct filter *filter, struct ist str, size_t offset)
+{
+ struct htx *htx = htx_from_buf(&msg->chn->buf);
+ struct htx_ret htxret;
+ int /*max, */ret = 0;
+
+ /* Nothing to do, just return */
+ if (unlikely(istlen(str) == 0))
+ goto end;
+
+ if (istlen(str) > htx_free_data_space(htx)) {
+ ret = -1;
+ goto end;
+ }
+
+ htxret = htx_find_offset(htx, offset);
+ if (!htxret.blk || htx_get_blk_type(htxret.blk) != HTX_BLK_DATA) {
+ if (!htx_add_last_data(htx, str))
+ goto end;
+ }
+ else {
+ struct ist v = htx_get_blk_value(htx, htxret.blk);
+ v.ptr += htxret.ret;
+ v.len = 0;
+ if (!htx_replace_blk_value(htx, htxret.blk, v, str))
+ goto end;
+ }
+ ret = str.len;
+ if (ret) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+ flt_update_offsets(filter, msg->chn, ret);
+ flt_ctx->cur_len[CHN_IDX(msg->chn)] += ret;
+ }
+
+ end:
+ htx_to_buf(htx, &msg->chn->buf);
+ return ret;
+}
+
+/* Helper function removing at most <len> bytes of DATA blocks at the absolute
+ * position <offset>. It stops on the first non-DATA HTX block. This function is
+ * called during the payload filtering, so the headers are already scheduled for
+ * output (from the filter point of view). This function is responsible to
+ * update the filter context.
+ */
+static void _hlua_http_msg_delete(struct http_msg *msg, struct filter *filter, size_t offset, size_t len)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+ struct htx *htx = htx_from_buf(&msg->chn->buf);
+ struct htx_blk *blk;
+ struct htx_ret htxret;
+ size_t ret = 0;
+
+ /* Be sure <len> is always the amount of DATA to remove */
+ if (htx->data == offset+len && htx_get_tail_type(htx) == HTX_BLK_DATA) {
+ /* When htx tail type == HTX_BLK_DATA, no need to take care
+ * of special blocks like HTX_BLK_EOT.
+ * We simply truncate after offset
+ * (truncate targeted blk and discard the following ones)
+ */
+ htx_truncate(htx, offset);
+ ret = len;
+ goto end;
+ }
+
+ htxret = htx_find_offset(htx, offset);
+ blk = htxret.blk;
+ if (htxret.ret) {
+ /* dealing with offset: we need to trim targeted blk */
+ struct ist v;
+
+ if (htx_get_blk_type(blk) != HTX_BLK_DATA)
+ goto end;
+
+ v = htx_get_blk_value(htx, blk);
+ v.ptr += htxret.ret;
+ v.len -= htxret.ret;
+
+ v = isttrim(v, len);
+ /* trimming data in blk: discard everything after the offset
+ * (replace 'v' with 'IST_NULL')
+ */
+ blk = htx_replace_blk_value(htx, blk, v, IST_NULL);
+ if (blk && v.len < len) {
+ /* In this case, caller wants to keep removing data,
+ * but we need to spare current blk
+ * because it was already trimmed
+ */
+ blk = htx_get_next_blk(htx, blk);
+ }
+ len -= v.len;
+ ret += v.len;
+ }
+
+
+ while (blk && len) {
+ /* there is more data that needs to be discarded */
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t sz = htx_get_blksz(blk);
+
+ switch (type) {
+ case HTX_BLK_UNUSED:
+ break;
+
+ case HTX_BLK_DATA:
+ if (len < sz) {
+ /* don't discard whole blk, only part of it
+ * (from the beginning)
+ */
+ htx_cut_data_blk(htx, blk, len);
+ ret += len;
+ goto end;
+ }
+ break;
+
+ default:
+ /* HTX_BLK_EOT blk won't be removed */
+ goto end;
+ }
+
+ /* Remove all the data block */
+ len -= sz;
+ ret += sz;
+ blk = htx_remove_blk(htx, blk);
+ }
+
+end:
+ flt_update_offsets(filter, msg->chn, -ret);
+ flt_ctx->cur_len[CHN_IDX(msg->chn)] -= ret;
+ /* WARNING: we don't call htx_to_buf() on purpose, because we don't want
+ * to loose the EOM flag if the message is empty.
+ */
+}
+
+/* Copies input data found in an HTTP message. Unlike the channel function used
+ * to duplicate raw data, this one can only be called inside a filter, from
+ * http_payload callback. So it cannot yield. An exception is returned if it is
+ * called from another callback. If nothing was copied, a nil value is pushed on
+ * the stack.
+ */
+__LJMP static int hlua_http_msg_get_body(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ size_t output, input;
+ int offset, len;
+
+ if (lua_gettop(L) < 1 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "'data' expects at most 2 arguments"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ filter = hlua_http_msg_filter(L, 1, msg, &output, &input);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ if (!ci_data(msg->chn) && channel_input_closed(msg->chn)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ offset = output;
+ if (lua_gettop(L) > 1) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 2));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset < output || offset > input + output) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+ len = output + input - offset;
+ if (lua_gettop(L) == 3) {
+ len = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (!len)
+ goto dup;
+ if (len == -1)
+ len = global.tune.bufsize;
+ if (len < 0) {
+ lua_pushfstring(L, "length out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ dup:
+ _hlua_http_msg_dup(msg, L, offset, len);
+ return 1;
+}
+
+/* Appends a string to the HTTP message, after all existing DATA blocks but
+ * before the trailers, if any. It returns the amount of data written or -1 if
+ * nothing was copied. Unlike the channel function used to append data, this one
+ * can only be called inside a filter, from http_payload callback. So it cannot
+ * yield. An exception is returned if it is called from another callback.
+ */
+__LJMP static int hlua_http_msg_append(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ const char *str;
+ size_t offset, len, sz;
+ int ret;
+
+ MAY_LJMP(check_args(L, 2, "append"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ filter = hlua_http_msg_filter(L, 1, msg, &offset, &len);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ ret = _hlua_http_msg_insert(msg, filter, ist2(str, sz), offset+len);
+ lua_pushinteger(L, ret);
+ return 1;
+}
+
+/* Prepends a string to the HTTP message, before all existing DATA blocks. It
+ * returns the amount of data written or -1 if nothing was copied. Unlike the
+ * channel function used to prepend data, this one can only be called inside a
+ * filter, from http_payload callback. So it cannot yield. An exception is
+ * returned if it is called from another callback.
+ */
+__LJMP static int hlua_http_msg_prepend(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ const char *str;
+ size_t offset, len, sz;
+ int ret;
+
+ MAY_LJMP(check_args(L, 2, "prepend"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ filter = hlua_http_msg_filter(L, 1, msg, &offset, &len);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ ret = _hlua_http_msg_insert(msg, filter, ist2(str, sz), offset);
+ lua_pushinteger(L, ret);
+ return 1;
+}
+
+/* Inserts a string to the HTTP message at a given offset. By default the string
+ * is appended at the end of DATA blocks. It returns the amount of data written
+ * or -1 if nothing was copied. Unlike the channel function used to insert data,
+ * this one can only be called inside a filter, from http_payload callback. So
+ * it cannot yield. An exception is returned if it is called from another
+ * callback.
+ */
+__LJMP static int hlua_http_msg_insert_data(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ const char *str;
+ size_t input, output, sz;
+ int offset;
+ int ret;
+
+ if (lua_gettop(L) < 2 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "'insert' expects at least 1 argument and at most 2 arguments"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ filter = hlua_http_msg_filter(L, 1, msg, &output, &input);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 2) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset > output + input) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ ret = _hlua_http_msg_insert(msg, filter, ist2(str, sz), offset);
+ lua_pushinteger(L, ret);
+ return 1;
+}
+
+/* Removes a given amount of data from the HTTP message at a given offset. By
+ * default all DATA blocks are removed. It returns the amount of data
+ * removed. Unlike the channel function used to remove data, this one can only
+ * be called inside a filter, from http_payload callback. So it cannot yield. An
+ * exception is returned if it is called from another callback.
+ */
+__LJMP static int hlua_http_msg_del_data(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ size_t input, output;
+ int offset, len;
+
+ if (lua_gettop(L) < 1 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "'remove' expects at most 2 arguments"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ filter = hlua_http_msg_filter(L, 1, msg, &output, &input);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 1) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 2));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset > output + input) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ len = output + input - offset;
+ if (lua_gettop(L) == 3) {
+ len = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (!len)
+ goto end;
+ if (len == -1)
+ len = output + input - offset;
+ if (len < 0 || offset + len > output + input) {
+ lua_pushfstring(L, "length out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ _hlua_http_msg_delete(msg, filter, offset, len);
+
+ end:
+ lua_pushinteger(L, len);
+ return 1;
+}
+
+/* Replaces a given amount of data at the given offset by a string. By default,
+ * all remaining data are removed, accordingly to the filter context. It returns
+ * the amount of data written or -1 if nothing was copied. Unlike the channel
+ * function used to replace data, this one can only be called inside a filter,
+ * from http_payload callback. So it cannot yield. An exception is returned if
+ * it is called from another callback.
+ */
+__LJMP static int hlua_http_msg_set_data(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ struct htx *htx;
+ const char *str;
+ size_t input, output, sz;
+ int offset, len;
+ int ret;
+
+ if (lua_gettop(L) < 2 || lua_gettop(L) > 4)
+ WILL_LJMP(luaL_error(L, "'set' expects at least 1 argument and at most 3 arguments"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ filter = hlua_http_msg_filter(L, 1, msg, &output, &input);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 2) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset < output || offset > input + output) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ len = output + input - offset;
+ if (lua_gettop(L) == 4) {
+ len = MAY_LJMP(luaL_checkinteger(L, 4));
+ if (!len)
+ goto set;
+ if (len == -1)
+ len = output + input - offset;
+ if (len < 0 || offset + len > output + input) {
+ lua_pushfstring(L, "length out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ set:
+ /* Be sure we can copied the string once input data will be removed. */
+ htx = htx_from_buf(&msg->chn->buf);
+ if (sz > htx_free_data_space(htx) + len)
+ lua_pushinteger(L, -1);
+ else {
+ _hlua_http_msg_delete(msg, filter, offset, len);
+ ret = _hlua_http_msg_insert(msg, filter, ist2(str, sz), offset);
+ lua_pushinteger(L, ret);
+ }
+ return 1;
+}
+
+/* Prepends data into an HTTP message and forward it, from the filter point of
+ * view. It returns the amount of data written or -1 if nothing was sent. Unlike
+ * the channel function used to send data, this one can only be called inside a
+ * filter, from http_payload callback. So it cannot yield. An exception is
+ * returned if it is called from another callback.
+ */
+__LJMP static int hlua_http_msg_send(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ struct htx *htx;
+ const char *str;
+ size_t offset, len, sz;
+ int ret;
+
+ MAY_LJMP(check_args(L, 2, "send"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ filter = hlua_http_msg_filter(L, 1, msg, &offset, &len);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ /* Return an error if the channel's output is closed */
+ if (unlikely(channel_output_closed(msg->chn))) {
+ lua_pushinteger(L, -1);
+ return 1;
+ }
+
+ htx = htx_from_buf(&msg->chn->buf);
+ if (sz > htx_free_data_space(htx)) {
+ lua_pushinteger(L, -1);
+ return 1;
+ }
+
+ ret = _hlua_http_msg_insert(msg, filter, ist2(str, sz), offset);
+ if (ret > 0) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ FLT_OFF(filter, msg->chn) += ret;
+ flt_ctx->cur_len[CHN_IDX(msg->chn)] -= ret;
+ flt_ctx->cur_off[CHN_IDX(msg->chn)] += ret;
+ }
+
+ lua_pushinteger(L, ret);
+ return 1;
+}
+
+/* Forwards a given amount of bytes. It return -1 if the channel's output is
+ * closed. Otherwise, it returns the number of bytes forwarded. Unlike the
+ * channel function used to forward data, this one can only be called inside a
+ * filter, from http_payload callback. So it cannot yield. An exception is
+ * returned if it is called from another callback. All other functions deal with
+ * DATA block, this one not.
+*/
+__LJMP static int hlua_http_msg_forward(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ size_t offset, len;
+ int fwd, ret = 0;
+
+ MAY_LJMP(check_args(L, 2, "forward"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ fwd = MAY_LJMP(luaL_checkinteger(L, 2));
+ filter = hlua_http_msg_filter(L, 1, msg, &offset, &len);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ /* Nothing to do, just return */
+ if (!fwd)
+ goto end;
+
+ /* Return an error if the channel's output is closed */
+ if (unlikely(channel_output_closed(msg->chn))) {
+ ret = -1;
+ goto end;
+ }
+
+ ret = fwd;
+ if (ret > len)
+ ret = len;
+
+ if (ret) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ FLT_OFF(filter, msg->chn) += ret;
+ flt_ctx->cur_off[CHN_IDX(msg->chn)] += ret;
+ flt_ctx->cur_len[CHN_IDX(msg->chn)] -= ret;
+ }
+
+ end:
+ lua_pushinteger(L, ret);
+ return 1;
+}
+
+/* Set EOM flag on the HTX message.
+ *
+ * NOTE: Not sure it is a good idea to manipulate this flag but for now I don't
+ * really know how to do without this feature.
+ */
+__LJMP static int hlua_http_msg_set_eom(lua_State *L)
+{
+ struct http_msg *msg;
+ struct htx *htx;
+
+ MAY_LJMP(check_args(L, 1, "set_eom"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ htx = htxbuf(&msg->chn->buf);
+ htx->flags |= HTX_FL_EOM;
+ return 0;
+}
+
+/* Unset EOM flag on the HTX message.
+ *
+ * NOTE: Not sure it is a good idea to manipulate this flag but for now I don't
+ * really know how to do without this feature.
+ */
+__LJMP static int hlua_http_msg_unset_eom(lua_State *L)
+{
+ struct http_msg *msg;
+ struct htx *htx;
+
+ MAY_LJMP(check_args(L, 1, "set_eom"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ htx = htxbuf(&msg->chn->buf);
+ htx->flags &= ~HTX_FL_EOM;
+ return 0;
+}
+
+/*
+ *
+ *
+ * Class HTTPClient
+ *
+ *
+ */
+__LJMP static struct hlua_httpclient *hlua_checkhttpclient(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_httpclient_ref));
+}
+
+
+/* stops the httpclient and ask it to kill itself */
+__LJMP static int hlua_httpclient_gc(lua_State *L)
+{
+ struct hlua_httpclient *hlua_hc;
+
+ MAY_LJMP(check_args(L, 1, "__gc"));
+
+ hlua_hc = MAY_LJMP(hlua_checkhttpclient(L, 1));
+
+ if (MT_LIST_DELETE(&hlua_hc->by_hlua)) {
+ /* we won the race against hlua_httpclient_destroy_all() */
+ httpclient_stop_and_destroy(hlua_hc->hc);
+ hlua_hc->hc = NULL;
+ }
+
+ return 0;
+}
+
+
+__LJMP static int hlua_httpclient_new(lua_State *L)
+{
+ struct hlua_httpclient *hlua_hc;
+ struct hlua *hlua;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua)
+ return 0;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3)) {
+ hlua_pusherror(L, "httpclient: full stack");
+ goto err;
+ }
+ /* Create the object: obj[0] = userdata. */
+ lua_newtable(L);
+ hlua_hc = MAY_LJMP(lua_newuserdata(L, sizeof(*hlua_hc)));
+ lua_rawseti(L, -2, 0);
+ memset(hlua_hc, 0, sizeof(*hlua_hc));
+
+ hlua_hc->hc = httpclient_new(hlua, 0, IST_NULL);
+ if (!hlua_hc->hc)
+ goto err;
+
+ MT_LIST_APPEND(&hlua->hc_list, &hlua_hc->by_hlua);
+
+ /* Pop a class stream metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_httpclient_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+
+ err:
+ WILL_LJMP(lua_error(L));
+ return 0;
+}
+
+
+/*
+ * Callback of the httpclient, this callback wakes the lua task up, once the
+ * httpclient receives some data
+ *
+ */
+
+static void hlua_httpclient_cb(struct httpclient *hc)
+{
+ struct hlua *hlua = hc->caller;
+
+ if (!hlua || !hlua->task)
+ return;
+
+ task_wakeup(hlua->task, TASK_WOKEN_MSG);
+}
+
+/*
+ * Fill the lua stack with headers from the httpclient response
+ * This works the same way as the hlua_http_get_headers() function
+ */
+__LJMP static int hlua_httpclient_get_headers(lua_State *L, struct hlua_httpclient *hlua_hc)
+{
+ struct http_hdr *hdr;
+
+ lua_newtable(L);
+
+ for (hdr = hlua_hc->hc->res.hdrs; hdr && isttest(hdr->n); hdr++) {
+ struct ist n, v;
+ int len;
+
+ n = hdr->n;
+ v = hdr->v;
+
+ /* Check for existing entry:
+ * assume that the table is on the top of the stack, and
+ * push the key in the stack, the function lua_gettable()
+ * perform the lookup.
+ */
+
+ lua_pushlstring(L, n.ptr, n.len);
+ lua_gettable(L, -2);
+
+ switch (lua_type(L, -1)) {
+ case LUA_TNIL:
+ /* Table not found, create it. */
+ lua_pop(L, 1); /* remove the nil value. */
+ lua_pushlstring(L, n.ptr, n.len); /* push the header name as key. */
+ lua_newtable(L); /* create and push empty table. */
+ lua_pushlstring(L, v.ptr, v.len); /* push header value. */
+ lua_rawseti(L, -2, 0); /* index header value (pop it). */
+ lua_rawset(L, -3); /* index new table with header name (pop the values). */
+ break;
+
+ case LUA_TTABLE:
+ /* Entry found: push the value in the table. */
+ len = lua_rawlen(L, -1);
+ lua_pushlstring(L, v.ptr, v.len); /* push header value. */
+ lua_rawseti(L, -2, len+1); /* index header value (pop it). */
+ lua_pop(L, 1); /* remove the table (it is stored in the main table). */
+ break;
+
+ default:
+ /* Other cases are errors. */
+ hlua_pusherror(L, "internal error during the parsing of headers.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+ return 1;
+}
+
+/*
+ * Allocate and return an array of http_hdr ist extracted from the <headers> lua table
+ *
+ * Caller must free the result
+ */
+struct http_hdr *hlua_httpclient_table_to_hdrs(lua_State *L)
+{
+ struct http_hdr hdrs[global.tune.max_http_hdr];
+ struct http_hdr *result = NULL;
+ uint32_t hdr_num = 0;
+
+ lua_pushnil(L);
+ while (lua_next(L, -2) != 0) {
+ struct ist name, value;
+ const char *n, *v;
+ size_t nlen, vlen;
+
+ if (!lua_isstring(L, -2) || !lua_istable(L, -1)) {
+ /* Skip element if the key is not a string or if the value is not a table */
+ goto next_hdr;
+ }
+
+ n = lua_tolstring(L, -2, &nlen);
+ name = ist2(n, nlen);
+
+ /* Loop on header's values */
+ lua_pushnil(L);
+ while (lua_next(L, -2)) {
+ if (!lua_isstring(L, -1)) {
+ /* Skip the value if it is not a string */
+ goto next_value;
+ }
+
+ v = lua_tolstring(L, -1, &vlen);
+ value = ist2(v, vlen);
+ name = ist2(n, nlen);
+
+ hdrs[hdr_num].n = istdup(name);
+ hdrs[hdr_num].v = istdup(value);
+
+ hdr_num++;
+
+ next_value:
+ lua_pop(L, 1);
+ }
+
+ next_hdr:
+ lua_pop(L, 1);
+
+ }
+
+ if (hdr_num) {
+ /* alloc and copy the headers in the httpclient struct */
+ result = calloc((hdr_num + 1), sizeof(*result));
+ if (!result)
+ goto skip_headers;
+ memcpy(result, hdrs, sizeof(struct http_hdr) * (hdr_num + 1));
+
+ result[hdr_num].n = IST_NULL;
+ result[hdr_num].v = IST_NULL;
+ }
+
+skip_headers:
+
+ return result;
+}
+
+
+/*
+ * For each yield, checks if there is some data in the httpclient and push them
+ * in the lua buffer, once the httpclient finished its job, push the result on
+ * the stack
+ */
+__LJMP static int hlua_httpclient_rcv_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct buffer *tr;
+ int res;
+ struct hlua *hlua = hlua_gethlua(L);
+ struct hlua_httpclient *hlua_hc = hlua_checkhttpclient(L, 1);
+
+
+ tr = get_trash_chunk();
+
+ res = httpclient_res_xfer(hlua_hc->hc, tr);
+ luaL_addlstring(&hlua_hc->b, b_orig(tr), res);
+
+ if (!httpclient_data(hlua_hc->hc) && httpclient_ended(hlua_hc->hc)) {
+
+ luaL_pushresult(&hlua_hc->b);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "status");
+ lua_pushinteger(L, hlua_hc->hc->res.status);
+ lua_settable(L, -3);
+
+
+ lua_pushstring(L, "reason");
+ lua_pushlstring(L, hlua_hc->hc->res.reason.ptr, hlua_hc->hc->res.reason.len);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "headers");
+ hlua_httpclient_get_headers(L, hlua_hc);
+ lua_settable(L, -3);
+
+ return 1;
+ }
+
+ if (httpclient_data(hlua_hc->hc))
+ task_wakeup(hlua->task, TASK_WOKEN_MSG);
+
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_httpclient_rcv_yield, TICK_ETERNITY, 0));
+
+ return 0;
+}
+
+/*
+ * Call this when trying to stream a body during a request
+ */
+__LJMP static int hlua_httpclient_snd_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua *hlua;
+ struct hlua_httpclient *hlua_hc = hlua_checkhttpclient(L, 1);
+ const char *body_str = NULL;
+ int ret;
+ int end = 0;
+ size_t buf_len;
+ size_t to_send = 0;
+
+ hlua = hlua_gethlua(L);
+
+ if (!hlua || !hlua->task)
+ WILL_LJMP(luaL_error(L, "The 'get' function is only allowed in "
+ "'frontend', 'backend' or 'task'"));
+
+ ret = lua_getfield(L, -1, "body");
+ if (ret != LUA_TSTRING)
+ goto rcv;
+
+ body_str = lua_tolstring(L, -1, &buf_len);
+ lua_pop(L, 1);
+
+ to_send = buf_len - hlua_hc->sent;
+
+ if ((hlua_hc->sent + to_send) >= buf_len)
+ end = 1;
+
+ /* the end flag is always set since we are using the whole remaining size */
+ hlua_hc->sent += httpclient_req_xfer(hlua_hc->hc, ist2(body_str + hlua_hc->sent, to_send), end);
+
+ if (buf_len > hlua_hc->sent) {
+ /* still need to process the buffer */
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_httpclient_snd_yield, TICK_ETERNITY, 0));
+ } else {
+ goto rcv;
+ /* we sent the whole request buffer we can recv */
+ }
+ return 0;
+
+rcv:
+
+ /* we return a "res" object */
+ lua_newtable(L);
+
+ lua_pushstring(L, "body");
+ luaL_buffinit(L, &hlua_hc->b);
+
+ task_wakeup(hlua->task, TASK_WOKEN_MSG);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_httpclient_rcv_yield, TICK_ETERNITY, 0));
+
+ return 1;
+}
+
+/*
+ * Send an HTTP request and wait for a response
+ */
+
+__LJMP static int hlua_httpclient_send(lua_State *L, enum http_meth_t meth)
+{
+ struct hlua_httpclient *hlua_hc;
+ struct http_hdr *hdrs = NULL;
+ struct http_hdr *hdrs_i = NULL;
+ struct hlua *hlua;
+ const char *url_str = NULL;
+ const char *body_str = NULL;
+ size_t buf_len = 0;
+ int ret;
+
+ hlua = hlua_gethlua(L);
+
+ if (!hlua || !hlua->task)
+ WILL_LJMP(luaL_error(L, "The 'get' function is only allowed in "
+ "'frontend', 'backend' or 'task'"));
+
+ if (lua_gettop(L) != 2 || lua_type(L, -1) != LUA_TTABLE)
+ WILL_LJMP(luaL_error(L, "'get' needs a table as argument"));
+
+ hlua_hc = hlua_checkhttpclient(L, 1);
+
+ lua_pushnil(L); /* first key */
+ while (lua_next(L, 2)) {
+ if (strcmp(lua_tostring(L, -2), "dst") == 0) {
+ if (httpclient_set_dst(hlua_hc->hc, lua_tostring(L, -1)) < 0)
+ WILL_LJMP(luaL_error(L, "Can't use the 'dst' argument"));
+
+ } else if (strcmp(lua_tostring(L, -2), "url") == 0) {
+ if (lua_type(L, -1) != LUA_TSTRING)
+ WILL_LJMP(luaL_error(L, "invalid parameter in 'url', must be a string"));
+ url_str = lua_tostring(L, -1);
+
+ } else if (strcmp(lua_tostring(L, -2), "timeout") == 0) {
+ if (lua_type(L, -1) != LUA_TNUMBER)
+ WILL_LJMP(luaL_error(L, "invalid parameter in 'timeout', must be a number"));
+ httpclient_set_timeout(hlua_hc->hc, lua_tointeger(L, -1));
+
+ } else if (strcmp(lua_tostring(L, -2), "headers") == 0) {
+ if (lua_type(L, -1) != LUA_TTABLE)
+ WILL_LJMP(luaL_error(L, "invalid parameter in 'headers', must be a table"));
+ hdrs = hlua_httpclient_table_to_hdrs(L);
+
+ } else if (strcmp(lua_tostring(L, -2), "body") == 0) {
+ if (lua_type(L, -1) != LUA_TSTRING)
+ WILL_LJMP(luaL_error(L, "invalid parameter in 'body', must be a string"));
+ body_str = lua_tolstring(L, -1, &buf_len);
+
+ } else {
+ WILL_LJMP(luaL_error(L, "'%s' invalid parameter name", lua_tostring(L, -2)));
+ }
+ /* removes 'value'; keeps 'key' for next iteration */
+ lua_pop(L, 1);
+ }
+
+ if (!url_str) {
+ WILL_LJMP(luaL_error(L, "'get' need a 'url' argument"));
+ return 0;
+ }
+
+ hlua_hc->sent = 0;
+
+ istfree(&hlua_hc->hc->req.url);
+ hlua_hc->hc->req.url = istdup(ist(url_str));
+ hlua_hc->hc->req.meth = meth;
+
+ /* update the httpclient callbacks */
+ hlua_hc->hc->ops.res_stline = hlua_httpclient_cb;
+ hlua_hc->hc->ops.res_headers = hlua_httpclient_cb;
+ hlua_hc->hc->ops.res_payload = hlua_httpclient_cb;
+ hlua_hc->hc->ops.res_end = hlua_httpclient_cb;
+
+ /* a body is available, it will use the request callback */
+ if (body_str && buf_len) {
+ hlua_hc->hc->ops.req_payload = hlua_httpclient_cb;
+ }
+
+ ret = httpclient_req_gen(hlua_hc->hc, hlua_hc->hc->req.url, meth, hdrs, IST_NULL);
+
+ /* free the temporary headers array */
+ hdrs_i = hdrs;
+ while (hdrs_i && isttest(hdrs_i->n)) {
+ istfree(&hdrs_i->n);
+ istfree(&hdrs_i->v);
+ hdrs_i++;
+ }
+ ha_free(&hdrs);
+
+
+ if (ret != ERR_NONE) {
+ WILL_LJMP(luaL_error(L, "Can't generate the HTTP request"));
+ return 0;
+ }
+
+ if (!httpclient_start(hlua_hc->hc))
+ WILL_LJMP(luaL_error(L, "couldn't start the httpclient"));
+
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_httpclient_snd_yield, TICK_ETERNITY, 0));
+
+ return 0;
+}
+
+/*
+ * Sends an HTTP HEAD request and wait for a response
+ *
+ * httpclient:head(url, headers, payload)
+ */
+__LJMP static int hlua_httpclient_head(lua_State *L)
+{
+ return hlua_httpclient_send(L, HTTP_METH_HEAD);
+}
+
+/*
+ * Send an HTTP GET request and wait for a response
+ *
+ * httpclient:get(url, headers, payload)
+ */
+__LJMP static int hlua_httpclient_get(lua_State *L)
+{
+ return hlua_httpclient_send(L, HTTP_METH_GET);
+
+}
+
+/*
+ * Sends an HTTP PUT request and wait for a response
+ *
+ * httpclient:put(url, headers, payload)
+ */
+__LJMP static int hlua_httpclient_put(lua_State *L)
+{
+ return hlua_httpclient_send(L, HTTP_METH_PUT);
+}
+
+/*
+ * Send an HTTP POST request and wait for a response
+ *
+ * httpclient:post(url, headers, payload)
+ */
+__LJMP static int hlua_httpclient_post(lua_State *L)
+{
+ return hlua_httpclient_send(L, HTTP_METH_POST);
+}
+
+
+/*
+ * Sends an HTTP DELETE request and wait for a response
+ *
+ * httpclient:delete(url, headers, payload)
+ */
+__LJMP static int hlua_httpclient_delete(lua_State *L)
+{
+ return hlua_httpclient_send(L, HTTP_METH_DELETE);
+}
+
+/*
+ *
+ *
+ * Class TXN
+ *
+ *
+ */
+
+/* Returns a struct hlua_session if the stack entry "ud" is
+ * a class stream, otherwise it throws an error.
+ */
+__LJMP static struct hlua_txn *hlua_checktxn(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_txn_ref));
+}
+
+__LJMP static int hlua_set_var(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ if (lua_gettop(L) < 3 || lua_gettop(L) > 4)
+ WILL_LJMP(luaL_error(L, "'set_var' needs between 3 and 4 arguments"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+
+ /* Converts the third argument in a sample. */
+ memset(&smp, 0, sizeof(smp));
+ hlua_lua2smp(L, 3, &smp);
+
+ /* Store the sample in a variable. */
+ smp_set_owner(&smp, htxn->p, htxn->s->sess, htxn->s, htxn->dir & SMP_OPT_DIR);
+
+ if (lua_gettop(L) == 4 && lua_toboolean(L, 4))
+ lua_pushboolean(L, vars_set_by_name_ifexist(name, len, &smp) != 0);
+ else
+ lua_pushboolean(L, vars_set_by_name(name, len, &smp) != 0);
+
+ return 1;
+}
+
+__LJMP static int hlua_unset_var(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ MAY_LJMP(check_args(L, 2, "unset_var"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+
+ /* Unset the variable. */
+ smp_set_owner(&smp, htxn->p, htxn->s->sess, htxn->s, htxn->dir & SMP_OPT_DIR);
+ lua_pushboolean(L, vars_unset_by_name_ifexist(name, len, &smp) != 0);
+ return 1;
+}
+
+__LJMP static int hlua_get_var(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ MAY_LJMP(check_args(L, 2, "get_var"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+
+ smp_set_owner(&smp, htxn->p, htxn->s->sess, htxn->s, htxn->dir & SMP_OPT_DIR);
+ if (!vars_get_by_name(name, len, &smp, NULL)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ return hlua_smp2lua(L, &smp);
+}
+
+__LJMP static int hlua_set_priv(lua_State *L)
+{
+ struct hlua *hlua;
+
+ MAY_LJMP(check_args(L, 2, "set_priv"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ MAY_LJMP(hlua_checktxn(L, 1));
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua)
+ return 0;
+
+ /* Remove previous value. */
+ luaL_unref(L, LUA_REGISTRYINDEX, hlua->Mref);
+
+ /* Get and store new value. */
+ lua_pushvalue(L, 2); /* Copy the element 2 at the top of the stack. */
+ hlua->Mref = luaL_ref(L, LUA_REGISTRYINDEX); /* pop the previously pushed value. */
+
+ return 0;
+}
+
+__LJMP static int hlua_get_priv(lua_State *L)
+{
+ struct hlua *hlua;
+
+ MAY_LJMP(check_args(L, 1, "get_priv"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ MAY_LJMP(hlua_checktxn(L, 1));
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ /* Push configuration index in the stack. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, hlua->Mref);
+
+ return 1;
+}
+
+/* Create stack entry containing a class TXN. This function
+ * return 0 if the stack does not contains free slots,
+ * otherwise it returns 1.
+ */
+static int hlua_txn_new(lua_State *L, struct stream *s, struct proxy *p, int dir, int flags)
+{
+ struct hlua_txn *htxn;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ /* NOTE: The allocation never fails. The failure
+ * throw an error, and the function never returns.
+ * if the throw is not available, the process is aborted.
+ */
+ /* Create the object: obj[0] = userdata. */
+ lua_newtable(L);
+ htxn = lua_newuserdata(L, sizeof(*htxn));
+ lua_rawseti(L, -2, 0);
+
+ htxn->s = s;
+ htxn->p = p;
+ htxn->dir = dir;
+ htxn->flags = flags;
+
+ /* Create the "f" field that contains a list of fetches. */
+ lua_pushstring(L, "f");
+ if (!hlua_fetches_new(L, htxn, HLUA_F_MAY_USE_HTTP))
+ return 0;
+ lua_rawset(L, -3);
+
+ /* Create the "sf" field that contains a list of stringsafe fetches. */
+ lua_pushstring(L, "sf");
+ if (!hlua_fetches_new(L, htxn, HLUA_F_MAY_USE_HTTP | HLUA_F_AS_STRING))
+ return 0;
+ lua_rawset(L, -3);
+
+ /* Create the "c" field that contains a list of converters. */
+ lua_pushstring(L, "c");
+ if (!hlua_converters_new(L, htxn, 0))
+ return 0;
+ lua_rawset(L, -3);
+
+ /* Create the "sc" field that contains a list of stringsafe converters. */
+ lua_pushstring(L, "sc");
+ if (!hlua_converters_new(L, htxn, HLUA_F_AS_STRING))
+ return 0;
+ lua_rawset(L, -3);
+
+ /* Create the "req" field that contains the request channel object. */
+ lua_pushstring(L, "req");
+ if (!hlua_channel_new(L, &s->req))
+ return 0;
+ lua_rawset(L, -3);
+
+ /* Create the "res" field that contains the response channel object. */
+ lua_pushstring(L, "res");
+ if (!hlua_channel_new(L, &s->res))
+ return 0;
+ lua_rawset(L, -3);
+
+ /* Creates the HTTP object is the current proxy allows http. */
+ lua_pushstring(L, "http");
+ if (IS_HTX_STRM(s)) {
+ if (!hlua_http_new(L, htxn))
+ return 0;
+ }
+ else
+ lua_pushnil(L);
+ lua_rawset(L, -3);
+
+ if ((htxn->flags & HLUA_TXN_CTX_MASK) == HLUA_TXN_FLT_CTX) {
+ /* HTTPMessage object are created when a lua TXN is created from
+ * a filter context only
+ */
+
+ /* Creates the HTTP-Request object is the current proxy allows http. */
+ lua_pushstring(L, "http_req");
+ if (p->mode == PR_MODE_HTTP) {
+ if (!hlua_http_msg_new(L, &s->txn->req))
+ return 0;
+ }
+ else
+ lua_pushnil(L);
+ lua_rawset(L, -3);
+
+ /* Creates the HTTP-Response object is the current proxy allows http. */
+ lua_pushstring(L, "http_res");
+ if (p->mode == PR_MODE_HTTP) {
+ if (!hlua_http_msg_new(L, &s->txn->rsp))
+ return 0;
+ }
+ else
+ lua_pushnil(L);
+ lua_rawset(L, -3);
+ }
+
+ /* Pop a class sesison metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_txn_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+__LJMP static int hlua_txn_deflog(lua_State *L)
+{
+ const char *msg;
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "deflog"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ msg = MAY_LJMP(luaL_checkstring(L, 2));
+
+ hlua_sendlog(htxn->s->be, htxn->s->logs.level, msg);
+ return 0;
+}
+
+__LJMP static int hlua_txn_log(lua_State *L)
+{
+ int level;
+ const char *msg;
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 3, "log"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ level = MAY_LJMP(luaL_checkinteger(L, 2));
+ msg = MAY_LJMP(luaL_checkstring(L, 3));
+
+ if (level < 0 || level >= NB_LOG_LEVELS)
+ WILL_LJMP(luaL_argerror(L, 1, "Invalid loglevel."));
+
+ hlua_sendlog(htxn->s->be, level, msg);
+ return 0;
+}
+
+__LJMP static int hlua_txn_log_debug(lua_State *L)
+{
+ const char *msg;
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "Debug"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ msg = MAY_LJMP(luaL_checkstring(L, 2));
+ hlua_sendlog(htxn->s->be, LOG_DEBUG, msg);
+ return 0;
+}
+
+__LJMP static int hlua_txn_log_info(lua_State *L)
+{
+ const char *msg;
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "Info"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ msg = MAY_LJMP(luaL_checkstring(L, 2));
+ hlua_sendlog(htxn->s->be, LOG_INFO, msg);
+ return 0;
+}
+
+__LJMP static int hlua_txn_log_warning(lua_State *L)
+{
+ const char *msg;
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "Warning"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ msg = MAY_LJMP(luaL_checkstring(L, 2));
+ hlua_sendlog(htxn->s->be, LOG_WARNING, msg);
+ return 0;
+}
+
+__LJMP static int hlua_txn_log_alert(lua_State *L)
+{
+ const char *msg;
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "Alert"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ msg = MAY_LJMP(luaL_checkstring(L, 2));
+ hlua_sendlog(htxn->s->be, LOG_ALERT, msg);
+ return 0;
+}
+
+__LJMP static int hlua_txn_set_loglevel(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ int ll;
+
+ MAY_LJMP(check_args(L, 2, "set_loglevel"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ ll = MAY_LJMP(luaL_checkinteger(L, 2));
+
+ if (ll < 0 || ll > 7)
+ WILL_LJMP(luaL_argerror(L, 2, "Bad log level. It must be between 0 and 7"));
+
+ htxn->s->logs.level = ll;
+ return 0;
+}
+
+__LJMP static int hlua_txn_set_tos(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ int tos;
+
+ MAY_LJMP(check_args(L, 2, "set_tos"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ tos = MAY_LJMP(luaL_checkinteger(L, 2));
+
+ conn_set_tos(objt_conn(htxn->s->sess->origin), tos);
+ return 0;
+}
+
+__LJMP static int hlua_txn_set_mark(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ int mark;
+
+ MAY_LJMP(check_args(L, 2, "set_mark"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ mark = MAY_LJMP(luaL_checkinteger(L, 2));
+
+ conn_set_mark(objt_conn(htxn->s->sess->origin), mark);
+ return 0;
+}
+
+__LJMP static int hlua_txn_set_priority_class(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "set_priority_class"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ htxn->s->priority_class = queue_limit_class(MAY_LJMP(luaL_checkinteger(L, 2)));
+ return 0;
+}
+
+__LJMP static int hlua_txn_set_priority_offset(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "set_priority_offset"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ htxn->s->priority_offset = queue_limit_offset(MAY_LJMP(luaL_checkinteger(L, 2)));
+ return 0;
+}
+
+/* Forward the Reply object to the client. This function converts the reply in
+ * HTX an push it to into the response channel. It is response to forward the
+ * message and terminate the transaction. It returns 1 on success and 0 on
+ * error. The Reply must be on top of the stack.
+ */
+__LJMP static int hlua_txn_forward_reply(lua_State *L, struct stream *s)
+{
+ struct htx *htx;
+ struct htx_sl *sl;
+ struct h1m h1m;
+ const char *status, *reason, *body;
+ size_t status_len, reason_len, body_len;
+ int ret, code, flags;
+
+ code = 200;
+ status = "200";
+ status_len = 3;
+ ret = lua_getfield(L, -1, "status");
+ if (ret == LUA_TNUMBER) {
+ code = lua_tointeger(L, -1);
+ status = lua_tolstring(L, -1, &status_len);
+ }
+ lua_pop(L, 1);
+
+ reason = http_get_reason(code);
+ reason_len = strlen(reason);
+ ret = lua_getfield(L, -1, "reason");
+ if (ret == LUA_TSTRING)
+ reason = lua_tolstring(L, -1, &reason_len);
+ lua_pop(L, 1);
+
+ body = NULL;
+ body_len = 0;
+ ret = lua_getfield(L, -1, "body");
+ if (ret == LUA_TSTRING)
+ body = lua_tolstring(L, -1, &body_len);
+ lua_pop(L, 1);
+
+ /* Prepare the response before inserting the headers */
+ h1m_init_res(&h1m);
+ htx = htx_from_buf(&s->res.buf);
+ channel_htx_truncate(&s->res, htx);
+ if (s->txn->req.flags & HTTP_MSGF_VER_11) {
+ flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"),
+ ist2(status, status_len), ist2(reason, reason_len));
+ }
+ else {
+ flags = HTX_SL_F_IS_RESP;
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.0"),
+ ist2(status, status_len), ist2(reason, reason_len));
+ }
+ if (!sl)
+ goto fail;
+ sl->info.res.status = code;
+
+ /* Push in the stack the "headers" entry. */
+ ret = lua_getfield(L, -1, "headers");
+ if (ret != LUA_TTABLE)
+ goto skip_headers;
+
+ lua_pushnil(L);
+ while (lua_next(L, -2) != 0) {
+ struct ist name, value;
+ const char *n, *v;
+ size_t nlen, vlen;
+
+ if (!lua_isstring(L, -2) || !lua_istable(L, -1)) {
+ /* Skip element if the key is not a string or if the value is not a table */
+ goto next_hdr;
+ }
+
+ n = lua_tolstring(L, -2, &nlen);
+ name = ist2(n, nlen);
+ if (isteqi(name, ist("content-length"))) {
+ /* Always skip content-length header. It will be added
+ * later with the correct len
+ */
+ goto next_hdr;
+ }
+
+ /* Loop on header's values */
+ lua_pushnil(L);
+ while (lua_next(L, -2)) {
+ if (!lua_isstring(L, -1)) {
+ /* Skip the value if it is not a string */
+ goto next_value;
+ }
+
+ v = lua_tolstring(L, -1, &vlen);
+ value = ist2(v, vlen);
+
+ if (isteqi(name, ist("transfer-encoding")))
+ h1_parse_xfer_enc_header(&h1m, value);
+ if (!htx_add_header(htx, ist2(n, nlen), ist2(v, vlen)))
+ goto fail;
+
+ next_value:
+ lua_pop(L, 1);
+ }
+
+ next_hdr:
+ lua_pop(L, 1);
+ }
+ skip_headers:
+ lua_pop(L, 1);
+
+ /* Update h1m flags: CLEN is set if CHNK is not present */
+ if (!(h1m.flags & H1_MF_CHNK)) {
+ const char *clen = ultoa(body_len);
+
+ h1m.flags |= H1_MF_CLEN;
+ if (!htx_add_header(htx, ist("content-length"), ist(clen)))
+ goto fail;
+ }
+ if (h1m.flags & (H1_MF_CLEN|H1_MF_CHNK))
+ h1m.flags |= H1_MF_XFER_LEN;
+
+ /* Update HTX start-line flags */
+ if (h1m.flags & H1_MF_XFER_ENC)
+ flags |= HTX_SL_F_XFER_ENC;
+ if (h1m.flags & H1_MF_XFER_LEN) {
+ flags |= HTX_SL_F_XFER_LEN;
+ if (h1m.flags & H1_MF_CHNK)
+ flags |= HTX_SL_F_CHNK;
+ else if (h1m.flags & H1_MF_CLEN)
+ flags |= HTX_SL_F_CLEN;
+ if (h1m.body_len == 0)
+ flags |= HTX_SL_F_BODYLESS;
+ }
+ sl->flags |= flags;
+
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH) ||
+ (body_len && !htx_add_data_atonce(htx, ist2(body, body_len))))
+ goto fail;
+
+ htx->flags |= HTX_FL_EOM;
+
+ /* Now, forward the response and terminate the transaction */
+ s->txn->status = code;
+ htx_to_buf(htx, &s->res.buf);
+ if (!http_forward_proxy_resp(s, 1))
+ goto fail;
+
+ return 1;
+
+ fail:
+ channel_htx_truncate(&s->res, htx);
+ return 0;
+}
+
+/* Terminate a transaction if called from a lua action. For TCP streams,
+ * processing is just aborted. Nothing is returned to the client and all
+ * arguments are ignored. For HTTP streams, if a reply is passed as argument, it
+ * is forwarded to the client before terminating the transaction. On success,
+ * the function exits with ACT_RET_DONE code. If an error occurred, it exits
+ * with ACT_RET_ERR code. If this function is not called from a lua action, it
+ * just exits without any processing.
+ */
+__LJMP static int hlua_txn_done(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ struct stream *s;
+ int finst;
+
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+
+ /* If the flags NOTERM is set, we cannot terminate the session, so we
+ * just end the execution of the current lua code. */
+ if (htxn->flags & HLUA_TXN_NOTERM)
+ WILL_LJMP(hlua_done(L));
+
+ s = htxn->s;
+ if (!IS_HTX_STRM(htxn->s)) {
+ struct channel *req = &s->req;
+ struct channel *res = &s->res;
+
+ channel_auto_read(req);
+ channel_abort(req);
+ channel_auto_close(req);
+ channel_erase(req);
+
+ res->wex = tick_add_ifset(now_ms, res->wto);
+ channel_auto_read(res);
+ channel_auto_close(res);
+ channel_shutr_now(res);
+
+ finst = ((htxn->dir == SMP_OPT_DIR_REQ) ? SF_FINST_R : SF_FINST_D);
+ goto done;
+ }
+
+ if (lua_gettop(L) == 1 || !lua_istable(L, 2)) {
+ /* No reply or invalid reply */
+ s->txn->status = 0;
+ http_reply_and_close(s, 0, NULL);
+ }
+ else {
+ /* Remove extra args to have the reply on top of the stack */
+ if (lua_gettop(L) > 2)
+ lua_pop(L, lua_gettop(L) - 2);
+
+ if (!hlua_txn_forward_reply(L, s)) {
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ lua_pushinteger(L, ACT_RET_ERR);
+ WILL_LJMP(hlua_done(L));
+ return 0; /* Never reached */
+ }
+ }
+
+ finst = ((htxn->dir == SMP_OPT_DIR_REQ) ? SF_FINST_R : SF_FINST_H);
+ if (htxn->dir == SMP_OPT_DIR_REQ) {
+ /* let's log the request time */
+ s->logs.tv_request = now;
+ if (s->sess->fe == s->be) /* report it if the request was intercepted by the frontend */
+ _HA_ATOMIC_INC(&s->sess->fe->fe_counters.intercepted_req);
+ }
+
+ done:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_LOCAL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= finst;
+
+ if ((htxn->flags & HLUA_TXN_CTX_MASK) == HLUA_TXN_FLT_CTX)
+ lua_pushinteger(L, -1);
+ else
+ lua_pushinteger(L, ACT_RET_ABRT);
+ WILL_LJMP(hlua_done(L));
+ return 0;
+}
+
+/*
+ *
+ *
+ * Class REPLY
+ *
+ *
+ */
+
+/* Pushes the TXN reply onto the top of the stack. If the stask does not have a
+ * free slots, the function fails and returns 0;
+ */
+static int hlua_txn_reply_new(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ const char *reason, *body = NULL;
+ int ret, status;
+
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ if (!IS_HTX_STRM(htxn->s)) {
+ hlua_pusherror(L, "txn object is not an HTTP transaction.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* Default value */
+ status = 200;
+ reason = http_get_reason(status);
+
+ if (lua_istable(L, 2)) {
+ /* load status and reason from the table argument at index 2 */
+ ret = lua_getfield(L, 2, "status");
+ if (ret == LUA_TNIL)
+ goto reason;
+ else if (ret != LUA_TNUMBER) {
+ /* invalid status: ignore the reason */
+ goto body;
+ }
+ status = lua_tointeger(L, -1);
+
+ reason:
+ lua_pop(L, 1); /* restore the stack: remove status */
+ ret = lua_getfield(L, 2, "reason");
+ if (ret == LUA_TSTRING)
+ reason = lua_tostring(L, -1);
+
+ body:
+ lua_pop(L, 1); /* restore the stack: remove invalid status or reason */
+ ret = lua_getfield(L, 2, "body");
+ if (ret == LUA_TSTRING)
+ body = lua_tostring(L, -1);
+ lua_pop(L, 1); /* restore the stack: remove body */
+ }
+
+ /* Create the Reply table */
+ lua_newtable(L);
+
+ /* Add status element */
+ lua_pushstring(L, "status");
+ lua_pushinteger(L, status);
+ lua_settable(L, -3);
+
+ /* Add reason element */
+ reason = http_get_reason(status);
+ lua_pushstring(L, "reason");
+ lua_pushstring(L, reason);
+ lua_settable(L, -3);
+
+ /* Add body element, nil if undefined */
+ lua_pushstring(L, "body");
+ if (body)
+ lua_pushstring(L, body);
+ else
+ lua_pushnil(L);
+ lua_settable(L, -3);
+
+ /* Add headers element */
+ lua_pushstring(L, "headers");
+ lua_newtable(L);
+
+ /* stack: [ txn, <Arg:table>, <Reply:table>, "headers", <headers:table> ] */
+ if (lua_istable(L, 2)) {
+ /* load headers from the table argument at index 2. If it is a table, copy it. */
+ ret = lua_getfield(L, 2, "headers");
+ if (ret == LUA_TTABLE) {
+ /* stack: [ ... <headers:table>, <table> ] */
+ lua_pushnil(L);
+ while (lua_next(L, -2) != 0) {
+ /* stack: [ ... <headers:table>, <table>, k, v] */
+ if (!lua_isstring(L, -1) && !lua_istable(L, -1)) {
+ /* invalid value type, skip it */
+ lua_pop(L, 1);
+ continue;
+ }
+
+
+ /* Duplicate the key and swap it with the value. */
+ lua_pushvalue(L, -2);
+ lua_insert(L, -2);
+ /* stack: [ ... <headers:table>, <table>, k, k, v ] */
+
+ lua_newtable(L);
+ lua_insert(L, -2);
+ /* stack: [ ... <headers:table>, <table>, k, k, <inner:table>, v ] */
+
+ if (lua_isstring(L, -1)) {
+ /* push the value in the inner table */
+ lua_rawseti(L, -2, 1);
+ }
+ else { /* table */
+ lua_pushnil(L);
+ while (lua_next(L, -2) != 0) {
+ /* stack: [ ... <headers:table>, <table>, k, k, <inner:table>, <v:table>, k2, v2 ] */
+ if (!lua_isstring(L, -1)) {
+ /* invalid value type, skip it*/
+ lua_pop(L, 1);
+ continue;
+ }
+ /* push the value in the inner table */
+ lua_rawseti(L, -4, lua_rawlen(L, -4) + 1);
+ /* stack: [ ... <headers:table>, <table>, k, k, <inner:table>, <v:table>, k2 ] */
+ }
+ lua_pop(L, 1);
+ /* stack: [ ... <headers:table>, <table>, k, k, <inner:table> ] */
+ }
+
+ /* push (k,v) on the stack in the headers table:
+ * stack: [ ... <headers:table>, <table>, k, k, v ]
+ */
+ lua_settable(L, -5);
+ /* stack: [ ... <headers:table>, <table>, k ] */
+ }
+ }
+ lua_pop(L, 1);
+ }
+ /* stack: [ txn, <Arg:table>, <Reply:table>, "headers", <headers:table> ] */
+ lua_settable(L, -3);
+ /* stack: [ txn, <Arg:table>, <Reply:table> ] */
+
+ /* Pop a class sesison metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_txn_reply_ref);
+ lua_setmetatable(L, -2);
+ return 1;
+}
+
+/* Set the reply status code, and optionally the reason. If no reason is
+ * provided, the default one corresponding to the status code is used.
+ */
+__LJMP static int hlua_txn_reply_set_status(lua_State *L)
+{
+ int status = MAY_LJMP(luaL_checkinteger(L, 2));
+ const char *reason = MAY_LJMP(luaL_optlstring(L, 3, NULL, NULL));
+
+ /* First argument (self) must be a table */
+ luaL_checktype(L, 1, LUA_TTABLE);
+
+ if (status < 100 || status > 599) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+ if (!reason)
+ reason = http_get_reason(status);
+
+ lua_pushinteger(L, status);
+ lua_setfield(L, 1, "status");
+
+ lua_pushstring(L, reason);
+ lua_setfield(L, 1, "reason");
+
+ lua_pushboolean(L, 1);
+ return 1;
+}
+
+/* Add a header into the reply object. Each header name is associated to an
+ * array of values in the "headers" table. If the header name is not found, a
+ * new entry is created.
+ */
+__LJMP static int hlua_txn_reply_add_header(lua_State *L)
+{
+ const char *name = MAY_LJMP(luaL_checkstring(L, 2));
+ const char *value = MAY_LJMP(luaL_checkstring(L, 3));
+ int ret;
+
+ /* First argument (self) must be a table */
+ luaL_checktype(L, 1, LUA_TTABLE);
+
+ /* Push in the stack the "headers" entry. */
+ ret = lua_getfield(L, 1, "headers");
+ if (ret != LUA_TTABLE) {
+ hlua_pusherror(L, "Reply['headers'] is expected to a an array. %s found", lua_typename(L, ret));
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* check if the header is already registered. If not, register it. */
+ ret = lua_getfield(L, -1, name);
+ if (ret == LUA_TNIL) {
+ /* Entry not found. */
+ lua_pop(L, 1); /* remove the nil. The "headers" table is the top of the stack. */
+
+ /* Insert the new header name in the array in the top of the stack.
+ * It left the new array in the top of the stack.
+ */
+ lua_newtable(L);
+ lua_pushstring(L, name);
+ lua_pushvalue(L, -2);
+ lua_settable(L, -4);
+ }
+ else if (ret != LUA_TTABLE) {
+ hlua_pusherror(L, "Reply['headers']['%s'] is expected to be an array. %s found", name, lua_typename(L, ret));
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* Now the top of thestack is an array of values. We push
+ * the header value as new entry.
+ */
+ lua_pushstring(L, value);
+ ret = lua_rawlen(L, -2);
+ lua_rawseti(L, -2, ret + 1);
+
+ lua_pushboolean(L, 1);
+ return 1;
+}
+
+/* Remove all occurrences of a given header name. */
+__LJMP static int hlua_txn_reply_del_header(lua_State *L)
+{
+ const char *name = MAY_LJMP(luaL_checkstring(L, 2));
+ int ret;
+
+ /* First argument (self) must be a table */
+ luaL_checktype(L, 1, LUA_TTABLE);
+
+ /* Push in the stack the "headers" entry. */
+ ret = lua_getfield(L, 1, "headers");
+ if (ret != LUA_TTABLE) {
+ hlua_pusherror(L, "Reply['headers'] is expected to be an array. %s found", lua_typename(L, ret));
+ WILL_LJMP(lua_error(L));
+ }
+
+ lua_pushstring(L, name);
+ lua_pushnil(L);
+ lua_settable(L, -3);
+
+ lua_pushboolean(L, 1);
+ return 1;
+}
+
+/* Set the reply's body. Overwrite any existing entry. */
+__LJMP static int hlua_txn_reply_set_body(lua_State *L)
+{
+ const char *payload = MAY_LJMP(luaL_checkstring(L, 2));
+
+ /* First argument (self) must be a table */
+ luaL_checktype(L, 1, LUA_TTABLE);
+
+ lua_pushstring(L, payload);
+ lua_setfield(L, 1, "body");
+
+ lua_pushboolean(L, 1);
+ return 1;
+}
+
+__LJMP static int hlua_log(lua_State *L)
+{
+ int level;
+ const char *msg;
+
+ MAY_LJMP(check_args(L, 2, "log"));
+ level = MAY_LJMP(luaL_checkinteger(L, 1));
+ msg = MAY_LJMP(luaL_checkstring(L, 2));
+
+ if (level < 0 || level >= NB_LOG_LEVELS)
+ WILL_LJMP(luaL_argerror(L, 1, "Invalid loglevel."));
+
+ hlua_sendlog(NULL, level, msg);
+ return 0;
+}
+
+__LJMP static int hlua_log_debug(lua_State *L)
+{
+ const char *msg;
+
+ MAY_LJMP(check_args(L, 1, "debug"));
+ msg = MAY_LJMP(luaL_checkstring(L, 1));
+ hlua_sendlog(NULL, LOG_DEBUG, msg);
+ return 0;
+}
+
+__LJMP static int hlua_log_info(lua_State *L)
+{
+ const char *msg;
+
+ MAY_LJMP(check_args(L, 1, "info"));
+ msg = MAY_LJMP(luaL_checkstring(L, 1));
+ hlua_sendlog(NULL, LOG_INFO, msg);
+ return 0;
+}
+
+__LJMP static int hlua_log_warning(lua_State *L)
+{
+ const char *msg;
+
+ MAY_LJMP(check_args(L, 1, "warning"));
+ msg = MAY_LJMP(luaL_checkstring(L, 1));
+ hlua_sendlog(NULL, LOG_WARNING, msg);
+ return 0;
+}
+
+__LJMP static int hlua_log_alert(lua_State *L)
+{
+ const char *msg;
+
+ MAY_LJMP(check_args(L, 1, "alert"));
+ msg = MAY_LJMP(luaL_checkstring(L, 1));
+ hlua_sendlog(NULL, LOG_ALERT, msg);
+ return 0;
+}
+
+__LJMP static int hlua_sleep_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ int wakeup_ms = lua_tointeger(L, -1);
+ if (!tick_is_expired(wakeup_ms, now_ms))
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_sleep_yield, wakeup_ms, 0));
+ return 0;
+}
+
+__LJMP static int hlua_sleep(lua_State *L)
+{
+ unsigned int delay;
+ unsigned int wakeup_ms;
+
+ MAY_LJMP(check_args(L, 1, "sleep"));
+
+ delay = MAY_LJMP(luaL_checkinteger(L, 1)) * 1000;
+ wakeup_ms = tick_add(now_ms, delay);
+ lua_pushinteger(L, wakeup_ms);
+
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_sleep_yield, wakeup_ms, 0));
+ return 0;
+}
+
+__LJMP static int hlua_msleep(lua_State *L)
+{
+ unsigned int delay;
+ unsigned int wakeup_ms;
+
+ MAY_LJMP(check_args(L, 1, "msleep"));
+
+ delay = MAY_LJMP(luaL_checkinteger(L, 1));
+ wakeup_ms = tick_add(now_ms, delay);
+ lua_pushinteger(L, wakeup_ms);
+
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_sleep_yield, wakeup_ms, 0));
+ return 0;
+}
+
+/* This functionis an LUA binding. it permits to give back
+ * the hand at the HAProxy scheduler. It is used when the
+ * LUA processing consumes a lot of time.
+ */
+__LJMP static int hlua_yield_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ return 0;
+}
+
+__LJMP static int hlua_yield(lua_State *L)
+{
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_yield_yield, TICK_ETERNITY, HLUA_CTRLYIELD));
+ return 0;
+}
+
+/* This function change the nice of the currently executed
+ * task. It is used set low or high priority at the current
+ * task.
+ */
+__LJMP static int hlua_set_nice(lua_State *L)
+{
+ struct hlua *hlua;
+ int nice;
+
+ MAY_LJMP(check_args(L, 1, "set_nice"));
+ nice = MAY_LJMP(luaL_checkinteger(L, 1));
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+
+ /* If the task is not set, I'm in a start mode. */
+ if (!hlua || !hlua->task)
+ return 0;
+
+ if (nice < -1024)
+ nice = -1024;
+ else if (nice > 1024)
+ nice = 1024;
+
+ hlua->task->nice = nice;
+ return 0;
+}
+
+/* This function is used as a callback of a task. It is called by the
+ * HAProxy task subsystem when the task is awaked. The LUA runtime can
+ * return an E_AGAIN signal, the emmiter of this signal must set a
+ * signal to wake the task.
+ *
+ * Task wrapper are longjmp safe because the only one Lua code
+ * executed is the safe hlua_ctx_resume();
+ */
+struct task *hlua_process_task(struct task *task, void *context, unsigned int state)
+{
+ struct hlua *hlua = context;
+ enum hlua_exec status;
+
+ if (task->thread_mask == MAX_THREADS_MASK)
+ task_set_affinity(task, tid_bit);
+
+ /* If it is the first call to the task, we must initialize the
+ * execution timeouts.
+ */
+ if (!HLUA_IS_RUNNING(hlua))
+ hlua->max_time = hlua_timeout_task;
+
+ /* Execute the Lua code. */
+ status = hlua_ctx_resume(hlua, 1);
+
+ switch (status) {
+ /* finished or yield */
+ case HLUA_E_OK:
+ hlua_ctx_destroy(hlua);
+ task_destroy(task);
+ task = NULL;
+ break;
+
+ case HLUA_E_AGAIN: /* co process or timeout wake me later. */
+ notification_gc(&hlua->com);
+ task->expire = hlua->wake_time;
+ break;
+
+ /* finished with error. */
+ case HLUA_E_ERRMSG:
+ SEND_ERR(NULL, "Lua task: %s.\n", lua_tostring(hlua->T, -1));
+ hlua_ctx_destroy(hlua);
+ task_destroy(task);
+ task = NULL;
+ break;
+
+ case HLUA_E_ERR:
+ default:
+ SEND_ERR(NULL, "Lua task: unknown error.\n");
+ hlua_ctx_destroy(hlua);
+ task_destroy(task);
+ task = NULL;
+ break;
+ }
+ return task;
+}
+
+/* This function is an LUA binding that register LUA function to be
+ * executed after the HAProxy configuration parsing and before the
+ * HAProxy scheduler starts. This function expect only one LUA
+ * argument that is a function. This function returns nothing, but
+ * throws if an error is encountered.
+ */
+__LJMP static int hlua_register_init(lua_State *L)
+{
+ struct hlua_init_function *init;
+ int ref;
+
+ MAY_LJMP(check_args(L, 1, "register_init"));
+
+ ref = MAY_LJMP(hlua_checkfunction(L, 1));
+
+ init = calloc(1, sizeof(*init));
+ if (!init)
+ WILL_LJMP(luaL_error(L, "Lua out of memory error."));
+
+ init->function_ref = ref;
+ LIST_APPEND(&hlua_init_functions[hlua_state_id], &init->l);
+ return 0;
+}
+
+/* This functio is an LUA binding. It permits to register a task
+ * executed in parallel of the main HAroxy activity. The task is
+ * created and it is set in the HAProxy scheduler. It can be called
+ * from the "init" section, "post init" or during the runtime.
+ *
+ * Lua prototype:
+ *
+ * <none> core.register_task(<function>)
+ */
+static int hlua_register_task(lua_State *L)
+{
+ struct hlua *hlua = NULL;
+ struct task *task = NULL;
+ int ref;
+ int state_id;
+
+ MAY_LJMP(check_args(L, 1, "register_task"));
+
+ ref = MAY_LJMP(hlua_checkfunction(L, 1));
+
+ /* Get the reference state. If the reference is NULL, L is the master
+ * state, otherwise hlua->T is.
+ */
+ hlua = hlua_gethlua(L);
+ if (hlua)
+ /* we are in runtime processing */
+ state_id = hlua->state_id;
+ else
+ /* we are in initialization mode */
+ state_id = hlua_state_id;
+
+ hlua = pool_alloc(pool_head_hlua);
+ if (!hlua)
+ goto alloc_error;
+ HLUA_INIT(hlua);
+
+ /* We are in the common lua state, execute the task anywhere,
+ * otherwise, inherit the current thread identifier
+ */
+ if (state_id == 0)
+ task = task_new_anywhere();
+ else
+ task = task_new_here();
+ if (!task)
+ goto alloc_error;
+
+ task->context = hlua;
+ task->process = hlua_process_task;
+
+ if (!hlua_ctx_init(hlua, state_id, task, 1))
+ goto alloc_error;
+
+ /* Restore the function in the stack. */
+ lua_rawgeti(hlua->T, LUA_REGISTRYINDEX, ref);
+ hlua->nargs = 0;
+
+ /* Schedule task. */
+ task_wakeup(task, TASK_WOKEN_INIT);
+
+ return 0;
+
+ alloc_error:
+ task_destroy(task);
+ hlua_ctx_destroy(hlua);
+ WILL_LJMP(luaL_error(L, "Lua out of memory error."));
+ return 0; /* Never reached */
+}
+
+/* Wrapper called by HAProxy to execute an LUA converter. This wrapper
+ * doesn't allow "yield" functions because the HAProxy engine cannot
+ * resume converters.
+ */
+static int hlua_sample_conv_wrapper(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct hlua_function *fcn = private;
+ struct stream *stream = smp->strm;
+ const char *error;
+
+ if (!stream)
+ return 0;
+
+ /* In the execution wrappers linked with a stream, the
+ * Lua context can be not initialized. This behavior
+ * permits to save performances because a systematic
+ * Lua initialization cause 5% performances loss.
+ */
+ if (!stream->hlua) {
+ struct hlua *hlua;
+
+ hlua = pool_alloc(pool_head_hlua);
+ if (!hlua) {
+ SEND_ERR(stream->be, "Lua converter '%s': can't initialize Lua context.\n", fcn->name);
+ return 0;
+ }
+ HLUA_INIT(hlua);
+ stream->hlua = hlua;
+ if (!hlua_ctx_init(stream->hlua, fcn_ref_to_stack_id(fcn), stream->task, 0)) {
+ SEND_ERR(stream->be, "Lua converter '%s': can't initialize Lua context.\n", fcn->name);
+ return 0;
+ }
+ }
+
+ /* If it is the first run, initialize the data for the call. */
+ if (!HLUA_IS_RUNNING(stream->hlua)) {
+
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(stream->hlua)) {
+ if (lua_type(stream->hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(stream->hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(stream->be, "Lua converter '%s': %s.\n", fcn->name, error);
+ return 0;
+ }
+
+ /* Check stack available size. */
+ if (!lua_checkstack(stream->hlua->T, 1)) {
+ SEND_ERR(stream->be, "Lua converter '%s': full stack.\n", fcn->name);
+ RESET_SAFE_LJMP(stream->hlua);
+ return 0;
+ }
+
+ /* Restore the function in the stack. */
+ lua_rawgeti(stream->hlua->T, LUA_REGISTRYINDEX, fcn->function_ref[stream->hlua->state_id]);
+
+ /* convert input sample and pust-it in the stack. */
+ if (!lua_checkstack(stream->hlua->T, 1)) {
+ SEND_ERR(stream->be, "Lua converter '%s': full stack.\n", fcn->name);
+ RESET_SAFE_LJMP(stream->hlua);
+ return 0;
+ }
+ hlua_smp2lua(stream->hlua->T, smp);
+ stream->hlua->nargs = 1;
+
+ /* push keywords in the stack. */
+ if (arg_p) {
+ for (; arg_p->type != ARGT_STOP; arg_p++) {
+ if (!lua_checkstack(stream->hlua->T, 1)) {
+ SEND_ERR(stream->be, "Lua converter '%s': full stack.\n", fcn->name);
+ RESET_SAFE_LJMP(stream->hlua);
+ return 0;
+ }
+ hlua_arg2lua(stream->hlua->T, arg_p);
+ stream->hlua->nargs++;
+ }
+ }
+
+ /* We must initialize the execution timeouts. */
+ stream->hlua->max_time = hlua_timeout_session;
+
+ /* At this point the execution is safe. */
+ RESET_SAFE_LJMP(stream->hlua);
+ }
+
+ /* Execute the function. */
+ switch (hlua_ctx_resume(stream->hlua, 0)) {
+ /* finished. */
+ case HLUA_E_OK:
+ /* If the stack is empty, the function fails. */
+ if (lua_gettop(stream->hlua->T) <= 0)
+ return 0;
+
+ /* Convert the returned value in sample. */
+ hlua_lua2smp(stream->hlua->T, -1, smp);
+ lua_pop(stream->hlua->T, 1);
+ return 1;
+
+ /* yield. */
+ case HLUA_E_AGAIN:
+ SEND_ERR(stream->be, "Lua converter '%s': cannot use yielded functions.\n", fcn->name);
+ return 0;
+
+ /* finished with error. */
+ case HLUA_E_ERRMSG:
+ /* Display log. */
+ SEND_ERR(stream->be, "Lua converter '%s': %s.\n",
+ fcn->name, lua_tostring(stream->hlua->T, -1));
+ lua_pop(stream->hlua->T, 1);
+ return 0;
+
+ case HLUA_E_ETMOUT:
+ SEND_ERR(stream->be, "Lua converter '%s': execution timeout.\n", fcn->name);
+ return 0;
+
+ case HLUA_E_NOMEM:
+ SEND_ERR(stream->be, "Lua converter '%s': out of memory error.\n", fcn->name);
+ return 0;
+
+ case HLUA_E_YIELD:
+ SEND_ERR(stream->be, "Lua converter '%s': yield functions like core.tcp() or core.sleep() are not allowed.\n", fcn->name);
+ return 0;
+
+ case HLUA_E_ERR:
+ /* Display log. */
+ SEND_ERR(stream->be, "Lua converter '%s' returns an unknown error.\n", fcn->name);
+ /* fall through */
+
+ default:
+ return 0;
+ }
+}
+
+/* Wrapper called by HAProxy to execute a sample-fetch. this wrapper
+ * doesn't allow "yield" functions because the HAProxy engine cannot
+ * resume sample-fetches. This function will be called by the sample
+ * fetch engine to call lua-based fetch operations.
+ */
+static int hlua_sample_fetch_wrapper(const struct arg *arg_p, struct sample *smp,
+ const char *kw, void *private)
+{
+ struct hlua_function *fcn = private;
+ struct stream *stream = smp->strm;
+ const char *error;
+ unsigned int hflags = HLUA_TXN_NOTERM | HLUA_TXN_SMP_CTX;
+
+ if (!stream)
+ return 0;
+
+ /* In the execution wrappers linked with a stream, the
+ * Lua context can be not initialized. This behavior
+ * permits to save performances because a systematic
+ * Lua initialization cause 5% performances loss.
+ */
+ if (!stream->hlua) {
+ struct hlua *hlua;
+
+ hlua = pool_alloc(pool_head_hlua);
+ if (!hlua) {
+ SEND_ERR(stream->be, "Lua sample-fetch '%s': can't initialize Lua context.\n", fcn->name);
+ return 0;
+ }
+ hlua->T = NULL;
+ stream->hlua = hlua;
+ if (!hlua_ctx_init(stream->hlua, fcn_ref_to_stack_id(fcn), stream->task, 0)) {
+ SEND_ERR(stream->be, "Lua sample-fetch '%s': can't initialize Lua context.\n", fcn->name);
+ return 0;
+ }
+ }
+
+ /* If it is the first run, initialize the data for the call. */
+ if (!HLUA_IS_RUNNING(stream->hlua)) {
+
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(stream->hlua)) {
+ if (lua_type(stream->hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(stream->hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': %s.\n", fcn->name, error);
+ return 0;
+ }
+
+ /* Check stack available size. */
+ if (!lua_checkstack(stream->hlua->T, 2)) {
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': full stack.\n", fcn->name);
+ RESET_SAFE_LJMP(stream->hlua);
+ return 0;
+ }
+
+ /* Restore the function in the stack. */
+ lua_rawgeti(stream->hlua->T, LUA_REGISTRYINDEX, fcn->function_ref[stream->hlua->state_id]);
+
+ /* push arguments in the stack. */
+ if (!hlua_txn_new(stream->hlua->T, stream, smp->px, smp->opt & SMP_OPT_DIR, hflags)) {
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': full stack.\n", fcn->name);
+ RESET_SAFE_LJMP(stream->hlua);
+ return 0;
+ }
+ stream->hlua->nargs = 1;
+
+ /* push keywords in the stack. */
+ for (; arg_p && arg_p->type != ARGT_STOP; arg_p++) {
+ /* Check stack available size. */
+ if (!lua_checkstack(stream->hlua->T, 1)) {
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': full stack.\n", fcn->name);
+ RESET_SAFE_LJMP(stream->hlua);
+ return 0;
+ }
+ hlua_arg2lua(stream->hlua->T, arg_p);
+ stream->hlua->nargs++;
+ }
+
+ /* We must initialize the execution timeouts. */
+ stream->hlua->max_time = hlua_timeout_session;
+
+ /* At this point the execution is safe. */
+ RESET_SAFE_LJMP(stream->hlua);
+ }
+
+ /* Execute the function. */
+ switch (hlua_ctx_resume(stream->hlua, 0)) {
+ /* finished. */
+ case HLUA_E_OK:
+ /* If the stack is empty, the function fails. */
+ if (lua_gettop(stream->hlua->T) <= 0)
+ return 0;
+
+ /* Convert the returned value in sample. */
+ hlua_lua2smp(stream->hlua->T, -1, smp);
+ lua_pop(stream->hlua->T, 1);
+
+ /* Set the end of execution flag. */
+ smp->flags &= ~SMP_F_MAY_CHANGE;
+ return 1;
+
+ /* yield. */
+ case HLUA_E_AGAIN:
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': cannot use yielded functions.\n", fcn->name);
+ return 0;
+
+ /* finished with error. */
+ case HLUA_E_ERRMSG:
+ /* Display log. */
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': %s.\n",
+ fcn->name, lua_tostring(stream->hlua->T, -1));
+ lua_pop(stream->hlua->T, 1);
+ return 0;
+
+ case HLUA_E_ETMOUT:
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': execution timeout.\n", fcn->name);
+ return 0;
+
+ case HLUA_E_NOMEM:
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': out of memory error.\n", fcn->name);
+ return 0;
+
+ case HLUA_E_YIELD:
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': yield not allowed.\n", fcn->name);
+ return 0;
+
+ case HLUA_E_ERR:
+ /* Display log. */
+ SEND_ERR(smp->px, "Lua sample-fetch '%s' returns an unknown error.\n", fcn->name);
+ /* fall through */
+
+ default:
+ return 0;
+ }
+}
+
+/* This function is an LUA binding used for registering
+ * "sample-conv" functions. It expects a converter name used
+ * in the haproxy configuration file, and an LUA function.
+ */
+__LJMP static int hlua_register_converters(lua_State *L)
+{
+ struct sample_conv_kw_list *sck;
+ const char *name;
+ int ref;
+ int len;
+ struct hlua_function *fcn = NULL;
+ struct sample_conv *sc;
+ struct buffer *trash;
+
+ MAY_LJMP(check_args(L, 2, "register_converters"));
+
+ /* First argument : converter name. */
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+
+ /* Second argument : lua function. */
+ ref = MAY_LJMP(hlua_checkfunction(L, 2));
+
+ /* Check if the converter is already registered */
+ trash = get_trash_chunk();
+ chunk_printf(trash, "lua.%s", name);
+ sc = find_sample_conv(trash->area, trash->data);
+ if (sc != NULL) {
+ fcn = sc->private;
+ if (fcn->function_ref[hlua_state_id] != -1) {
+ ha_warning("Trying to register converter 'lua.%s' more than once. "
+ "This will become a hard error in version 2.5.\n", name);
+ }
+ fcn->function_ref[hlua_state_id] = ref;
+ return 0;
+ }
+
+ /* Allocate and fill the sample fetch keyword struct. */
+ sck = calloc(1, sizeof(*sck) + sizeof(struct sample_conv) * 2);
+ if (!sck)
+ goto alloc_error;
+ fcn = new_hlua_function();
+ if (!fcn)
+ goto alloc_error;
+
+ /* Fill fcn. */
+ fcn->name = strdup(name);
+ if (!fcn->name)
+ goto alloc_error;
+ fcn->function_ref[hlua_state_id] = ref;
+
+ /* List head */
+ sck->list.n = sck->list.p = NULL;
+
+ /* converter keyword. */
+ len = strlen("lua.") + strlen(name) + 1;
+ sck->kw[0].kw = calloc(1, len);
+ if (!sck->kw[0].kw)
+ goto alloc_error;
+
+ snprintf((char *)sck->kw[0].kw, len, "lua.%s", name);
+ sck->kw[0].process = hlua_sample_conv_wrapper;
+ sck->kw[0].arg_mask = ARG12(0,STR,STR,STR,STR,STR,STR,STR,STR,STR,STR,STR,STR);
+ sck->kw[0].val_args = NULL;
+ sck->kw[0].in_type = SMP_T_STR;
+ sck->kw[0].out_type = SMP_T_STR;
+ sck->kw[0].private = fcn;
+
+ /* Register this new converter */
+ sample_register_convs(sck);
+
+ return 0;
+
+ alloc_error:
+ release_hlua_function(fcn);
+ ha_free(&sck);
+ WILL_LJMP(luaL_error(L, "Lua out of memory error."));
+ return 0; /* Never reached */
+}
+
+/* This function is an LUA binding used for registering
+ * "sample-fetch" functions. It expects a converter name used
+ * in the haproxy configuration file, and an LUA function.
+ */
+__LJMP static int hlua_register_fetches(lua_State *L)
+{
+ const char *name;
+ int ref;
+ int len;
+ struct sample_fetch_kw_list *sfk;
+ struct hlua_function *fcn = NULL;
+ struct sample_fetch *sf;
+ struct buffer *trash;
+
+ MAY_LJMP(check_args(L, 2, "register_fetches"));
+
+ /* First argument : sample-fetch name. */
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+
+ /* Second argument : lua function. */
+ ref = MAY_LJMP(hlua_checkfunction(L, 2));
+
+ /* Check if the sample-fetch is already registered */
+ trash = get_trash_chunk();
+ chunk_printf(trash, "lua.%s", name);
+ sf = find_sample_fetch(trash->area, trash->data);
+ if (sf != NULL) {
+ fcn = sf->private;
+ if (fcn->function_ref[hlua_state_id] != -1) {
+ ha_warning("Trying to register sample-fetch 'lua.%s' more than once. "
+ "This will become a hard error in version 2.5.\n", name);
+ }
+ fcn->function_ref[hlua_state_id] = ref;
+ return 0;
+ }
+
+ /* Allocate and fill the sample fetch keyword struct. */
+ sfk = calloc(1, sizeof(*sfk) + sizeof(struct sample_fetch) * 2);
+ if (!sfk)
+ goto alloc_error;
+ fcn = new_hlua_function();
+ if (!fcn)
+ goto alloc_error;
+
+ /* Fill fcn. */
+ fcn->name = strdup(name);
+ if (!fcn->name)
+ goto alloc_error;
+ fcn->function_ref[hlua_state_id] = ref;
+
+ /* List head */
+ sfk->list.n = sfk->list.p = NULL;
+
+ /* sample-fetch keyword. */
+ len = strlen("lua.") + strlen(name) + 1;
+ sfk->kw[0].kw = calloc(1, len);
+ if (!sfk->kw[0].kw)
+ goto alloc_error;
+
+ snprintf((char *)sfk->kw[0].kw, len, "lua.%s", name);
+ sfk->kw[0].process = hlua_sample_fetch_wrapper;
+ sfk->kw[0].arg_mask = ARG12(0,STR,STR,STR,STR,STR,STR,STR,STR,STR,STR,STR,STR);
+ sfk->kw[0].val_args = NULL;
+ sfk->kw[0].out_type = SMP_T_STR;
+ sfk->kw[0].use = SMP_USE_HTTP_ANY;
+ sfk->kw[0].val = 0;
+ sfk->kw[0].private = fcn;
+
+ /* Register this new fetch. */
+ sample_register_fetches(sfk);
+
+ return 0;
+
+ alloc_error:
+ release_hlua_function(fcn);
+ ha_free(&sfk);
+ WILL_LJMP(luaL_error(L, "Lua out of memory error."));
+ return 0; /* Never reached */
+}
+
+/* This function is a lua binding to set the wake_time.
+ */
+__LJMP static int hlua_set_wake_time(lua_State *L)
+{
+ struct hlua *hlua;
+ unsigned int delay;
+ unsigned int wakeup_ms;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua) {
+ return 0;
+ }
+
+ MAY_LJMP(check_args(L, 1, "wake_time"));
+
+ delay = MAY_LJMP(luaL_checkinteger(L, 1));
+ wakeup_ms = tick_add(now_ms, delay);
+ hlua->wake_time = wakeup_ms;
+ return 0;
+}
+
+/* This function is a wrapper to execute each LUA function declared as an action
+ * wrapper during the initialisation period. This function may return any
+ * ACT_RET_* value. On error ACT_RET_CONT is returned and the action is
+ * ignored. If the lua action yields, ACT_RET_YIELD is returned. On success, the
+ * return value is the first element on the stack.
+ */
+static enum act_return hlua_action(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ char **arg;
+ unsigned int hflags = HLUA_TXN_ACT_CTX;
+ int dir, act_ret = ACT_RET_CONT;
+ const char *error;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CNT: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_RES_CNT: dir = SMP_OPT_DIR_RES; break;
+ case ACT_F_HTTP_REQ: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_HTTP_RES: dir = SMP_OPT_DIR_RES; break;
+ default:
+ SEND_ERR(px, "Lua: internal error while execute action.\n");
+ goto end;
+ }
+
+ /* In the execution wrappers linked with a stream, the
+ * Lua context can be not initialized. This behavior
+ * permits to save performances because a systematic
+ * Lua initialization cause 5% performances loss.
+ */
+ if (!s->hlua) {
+ struct hlua *hlua;
+
+ hlua = pool_alloc(pool_head_hlua);
+ if (!hlua) {
+ SEND_ERR(px, "Lua action '%s': can't initialize Lua context.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto end;
+ }
+ HLUA_INIT(hlua);
+ s->hlua = hlua;
+ if (!hlua_ctx_init(s->hlua, fcn_ref_to_stack_id(rule->arg.hlua_rule->fcn), s->task, 0)) {
+ SEND_ERR(px, "Lua action '%s': can't initialize Lua context.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto end;
+ }
+ }
+
+ /* If it is the first run, initialize the data for the call. */
+ if (!HLUA_IS_RUNNING(s->hlua)) {
+
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(s->hlua)) {
+ if (lua_type(s->hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(s->hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(px, "Lua function '%s': %s.\n",
+ rule->arg.hlua_rule->fcn->name, error);
+ goto end;
+ }
+
+ /* Check stack available size. */
+ if (!lua_checkstack(s->hlua->T, 1)) {
+ SEND_ERR(px, "Lua function '%s': full stack.\n",
+ rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(s->hlua);
+ goto end;
+ }
+
+ /* Restore the function in the stack. */
+ lua_rawgeti(s->hlua->T, LUA_REGISTRYINDEX, rule->arg.hlua_rule->fcn->function_ref[s->hlua->state_id]);
+
+ /* Create and and push object stream in the stack. */
+ if (!hlua_txn_new(s->hlua->T, s, px, dir, hflags)) {
+ SEND_ERR(px, "Lua function '%s': full stack.\n",
+ rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(s->hlua);
+ goto end;
+ }
+ s->hlua->nargs = 1;
+
+ /* push keywords in the stack. */
+ for (arg = rule->arg.hlua_rule->args; arg && *arg; arg++) {
+ if (!lua_checkstack(s->hlua->T, 1)) {
+ SEND_ERR(px, "Lua function '%s': full stack.\n",
+ rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(s->hlua);
+ goto end;
+ }
+ lua_pushstring(s->hlua->T, *arg);
+ s->hlua->nargs++;
+ }
+
+ /* Now the execution is safe. */
+ RESET_SAFE_LJMP(s->hlua);
+
+ /* We must initialize the execution timeouts. */
+ s->hlua->max_time = hlua_timeout_session;
+ }
+
+ /* Execute the function. */
+ switch (hlua_ctx_resume(s->hlua, !(flags & ACT_OPT_FINAL))) {
+ /* finished. */
+ case HLUA_E_OK:
+ /* Catch the return value */
+ if (lua_gettop(s->hlua->T) > 0)
+ act_ret = lua_tointeger(s->hlua->T, -1);
+
+ /* Set timeout in the required channel. */
+ if (act_ret == ACT_RET_YIELD) {
+ if (flags & ACT_OPT_FINAL)
+ goto err_yield;
+
+ if (dir == SMP_OPT_DIR_REQ)
+ s->req.analyse_exp = tick_first((tick_is_expired(s->req.analyse_exp, now_ms) ? 0 : s->req.analyse_exp),
+ s->hlua->wake_time);
+ else
+ s->res.analyse_exp = tick_first((tick_is_expired(s->res.analyse_exp, now_ms) ? 0 : s->res.analyse_exp),
+ s->hlua->wake_time);
+ }
+ goto end;
+
+ /* yield. */
+ case HLUA_E_AGAIN:
+ /* Set timeout in the required channel. */
+ if (dir == SMP_OPT_DIR_REQ)
+ s->req.analyse_exp = tick_first((tick_is_expired(s->req.analyse_exp, now_ms) ? 0 : s->req.analyse_exp),
+ s->hlua->wake_time);
+ else
+ s->res.analyse_exp = tick_first((tick_is_expired(s->res.analyse_exp, now_ms) ? 0 : s->res.analyse_exp),
+ s->hlua->wake_time);
+
+ /* Some actions can be wake up when a "write" event
+ * is detected on a response channel. This is useful
+ * only for actions targeted on the requests.
+ */
+ if (HLUA_IS_WAKERESWR(s->hlua))
+ s->res.flags |= CF_WAKE_WRITE;
+ if (HLUA_IS_WAKEREQWR(s->hlua))
+ s->req.flags |= CF_WAKE_WRITE;
+ act_ret = ACT_RET_YIELD;
+ goto end;
+
+ /* finished with error. */
+ case HLUA_E_ERRMSG:
+ /* Display log. */
+ SEND_ERR(px, "Lua function '%s': %s.\n",
+ rule->arg.hlua_rule->fcn->name, lua_tostring(s->hlua->T, -1));
+ lua_pop(s->hlua->T, 1);
+ goto end;
+
+ case HLUA_E_ETMOUT:
+ SEND_ERR(px, "Lua function '%s': execution timeout.\n", rule->arg.hlua_rule->fcn->name);
+ goto end;
+
+ case HLUA_E_NOMEM:
+ SEND_ERR(px, "Lua function '%s': out of memory error.\n", rule->arg.hlua_rule->fcn->name);
+ goto end;
+
+ case HLUA_E_YIELD:
+ err_yield:
+ act_ret = ACT_RET_CONT;
+ SEND_ERR(px, "Lua function '%s': aborting Lua processing on expired timeout.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto end;
+
+ case HLUA_E_ERR:
+ /* Display log. */
+ SEND_ERR(px, "Lua function '%s' return an unknown error.\n",
+ rule->arg.hlua_rule->fcn->name);
+
+ default:
+ goto end;
+ }
+
+ end:
+ if (act_ret != ACT_RET_YIELD && s->hlua)
+ s->hlua->wake_time = TICK_ETERNITY;
+ return act_ret;
+}
+
+struct task *hlua_applet_wakeup(struct task *t, void *context, unsigned int state)
+{
+ struct appctx *ctx = context;
+
+ appctx_wakeup(ctx);
+ t->expire = TICK_ETERNITY;
+ return t;
+}
+
+static int hlua_applet_tcp_init(struct appctx *ctx)
+{
+ struct hlua_tcp_ctx *tcp_ctx = applet_reserve_svcctx(ctx, sizeof(*tcp_ctx));
+ struct stconn *sc = appctx_sc(ctx);
+ struct stream *strm = __sc_strm(sc);
+ struct hlua *hlua;
+ struct task *task;
+ char **arg;
+ const char *error;
+
+ hlua = pool_alloc(pool_head_hlua);
+ if (!hlua) {
+ SEND_ERR(strm->be, "Lua applet tcp '%s': out of memory.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ return -1;
+ }
+ HLUA_INIT(hlua);
+ tcp_ctx->hlua = hlua;
+ tcp_ctx->flags = 0;
+
+ /* Create task used by signal to wakeup applets. */
+ task = task_new_here();
+ if (!task) {
+ SEND_ERR(strm->be, "Lua applet tcp '%s': out of memory.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ return -1;
+ }
+ task->nice = 0;
+ task->context = ctx;
+ task->process = hlua_applet_wakeup;
+ tcp_ctx->task = task;
+
+ /* In the execution wrappers linked with a stream, the
+ * Lua context can be not initialized. This behavior
+ * permits to save performances because a systematic
+ * Lua initialization cause 5% performances loss.
+ */
+ if (!hlua_ctx_init(hlua, fcn_ref_to_stack_id(ctx->rule->arg.hlua_rule->fcn), task, 0)) {
+ SEND_ERR(strm->be, "Lua applet tcp '%s': can't initialize Lua context.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ return -1;
+ }
+
+ /* Set timeout according with the applet configuration. */
+ hlua->max_time = ctx->applet->timeout;
+
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(hlua)) {
+ if (lua_type(hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(strm->be, "Lua applet tcp '%s': %s.\n",
+ ctx->rule->arg.hlua_rule->fcn->name, error);
+ return -1;
+ }
+
+ /* Check stack available size. */
+ if (!lua_checkstack(hlua->T, 1)) {
+ SEND_ERR(strm->be, "Lua applet tcp '%s': full stack.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(hlua);
+ return -1;
+ }
+
+ /* Restore the function in the stack. */
+ lua_rawgeti(hlua->T, LUA_REGISTRYINDEX, ctx->rule->arg.hlua_rule->fcn->function_ref[hlua->state_id]);
+
+ /* Create and and push object stream in the stack. */
+ if (!hlua_applet_tcp_new(hlua->T, ctx)) {
+ SEND_ERR(strm->be, "Lua applet tcp '%s': full stack.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(hlua);
+ return -1;
+ }
+ hlua->nargs = 1;
+
+ /* push keywords in the stack. */
+ for (arg = ctx->rule->arg.hlua_rule->args; arg && *arg; arg++) {
+ if (!lua_checkstack(hlua->T, 1)) {
+ SEND_ERR(strm->be, "Lua applet tcp '%s': full stack.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(hlua);
+ return -1;
+ }
+ lua_pushstring(hlua->T, *arg);
+ hlua->nargs++;
+ }
+
+ RESET_SAFE_LJMP(hlua);
+
+ /* Wakeup the applet ASAP. */
+ applet_need_more_data(ctx);
+ applet_have_more_data(ctx);
+
+ return 0;
+}
+
+void hlua_applet_tcp_fct(struct appctx *ctx)
+{
+ struct hlua_tcp_ctx *tcp_ctx = ctx->svcctx;
+ struct stconn *sc = appctx_sc(ctx);
+ struct stream *strm = __sc_strm(sc);
+ struct channel *res = sc_ic(sc);
+ struct act_rule *rule = ctx->rule;
+ struct proxy *px = strm->be;
+ struct hlua *hlua = tcp_ctx->hlua;
+
+ /* The applet execution is already done. */
+ if (tcp_ctx->flags & APPLET_DONE) {
+ /* eat the whole request */
+ co_skip(sc_oc(sc), co_data(sc_oc(sc)));
+ return;
+ }
+
+ /* If the stream is disconnect or closed, ldo nothing. */
+ if (unlikely(sc->state == SC_ST_DIS || sc->state == SC_ST_CLO))
+ return;
+
+ /* Execute the function. */
+ switch (hlua_ctx_resume(hlua, 1)) {
+ /* finished. */
+ case HLUA_E_OK:
+ tcp_ctx->flags |= APPLET_DONE;
+
+ /* eat the whole request */
+ co_skip(sc_oc(sc), co_data(sc_oc(sc)));
+ res->flags |= CF_READ_NULL;
+ sc_shutr(sc);
+ return;
+
+ /* yield. */
+ case HLUA_E_AGAIN:
+ if (hlua->wake_time != TICK_ETERNITY)
+ task_schedule(tcp_ctx->task, hlua->wake_time);
+ return;
+
+ /* finished with error. */
+ case HLUA_E_ERRMSG:
+ /* Display log. */
+ SEND_ERR(px, "Lua applet tcp '%s': %s.\n",
+ rule->arg.hlua_rule->fcn->name, lua_tostring(hlua->T, -1));
+ lua_pop(hlua->T, 1);
+ goto error;
+
+ case HLUA_E_ETMOUT:
+ SEND_ERR(px, "Lua applet tcp '%s': execution timeout.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ case HLUA_E_NOMEM:
+ SEND_ERR(px, "Lua applet tcp '%s': out of memory error.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ case HLUA_E_YIELD: /* unexpected */
+ SEND_ERR(px, "Lua applet tcp '%s': yield not allowed.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ case HLUA_E_ERR:
+ /* Display log. */
+ SEND_ERR(px, "Lua applet tcp '%s' return an unknown error.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ default:
+ goto error;
+ }
+
+error:
+
+ /* For all other cases, just close the stream. */
+ sc_shutw(sc);
+ sc_shutr(sc);
+ tcp_ctx->flags |= APPLET_DONE;
+}
+
+static void hlua_applet_tcp_release(struct appctx *ctx)
+{
+ struct hlua_tcp_ctx *tcp_ctx = ctx->svcctx;
+
+ task_destroy(tcp_ctx->task);
+ tcp_ctx->task = NULL;
+ hlua_ctx_destroy(tcp_ctx->hlua);
+ tcp_ctx->hlua = NULL;
+}
+
+/* The function returns 0 if the initialisation is complete or -1 if
+ * an errors occurs. It also reserves the appctx for an hlua_http_ctx.
+ */
+static int hlua_applet_http_init(struct appctx *ctx)
+{
+ struct hlua_http_ctx *http_ctx = applet_reserve_svcctx(ctx, sizeof(*http_ctx));
+ struct stconn *sc = appctx_sc(ctx);
+ struct stream *strm = __sc_strm(sc);
+ struct http_txn *txn;
+ struct hlua *hlua;
+ char **arg;
+ struct task *task;
+ const char *error;
+
+ txn = strm->txn;
+ hlua = pool_alloc(pool_head_hlua);
+ if (!hlua) {
+ SEND_ERR(strm->be, "Lua applet http '%s': out of memory.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ return -1;
+ }
+ HLUA_INIT(hlua);
+ http_ctx->hlua = hlua;
+ http_ctx->left_bytes = -1;
+ http_ctx->flags = 0;
+
+ if (txn->req.flags & HTTP_MSGF_VER_11)
+ http_ctx->flags |= APPLET_HTTP11;
+
+ /* Create task used by signal to wakeup applets. */
+ task = task_new_here();
+ if (!task) {
+ SEND_ERR(strm->be, "Lua applet http '%s': out of memory.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ return -1;
+ }
+ task->nice = 0;
+ task->context = ctx;
+ task->process = hlua_applet_wakeup;
+ http_ctx->task = task;
+
+ /* In the execution wrappers linked with a stream, the
+ * Lua context can be not initialized. This behavior
+ * permits to save performances because a systematic
+ * Lua initialization cause 5% performances loss.
+ */
+ if (!hlua_ctx_init(hlua, fcn_ref_to_stack_id(ctx->rule->arg.hlua_rule->fcn), task, 0)) {
+ SEND_ERR(strm->be, "Lua applet http '%s': can't initialize Lua context.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ return -1;
+ }
+
+ /* Set timeout according with the applet configuration. */
+ hlua->max_time = ctx->applet->timeout;
+
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(hlua)) {
+ if (lua_type(hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(strm->be, "Lua applet http '%s': %s.\n",
+ ctx->rule->arg.hlua_rule->fcn->name, error);
+ return -1;
+ }
+
+ /* Check stack available size. */
+ if (!lua_checkstack(hlua->T, 1)) {
+ SEND_ERR(strm->be, "Lua applet http '%s': full stack.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(hlua);
+ return -1;
+ }
+
+ /* Restore the function in the stack. */
+ lua_rawgeti(hlua->T, LUA_REGISTRYINDEX, ctx->rule->arg.hlua_rule->fcn->function_ref[hlua->state_id]);
+
+ /* Create and and push object stream in the stack. */
+ if (!hlua_applet_http_new(hlua->T, ctx)) {
+ SEND_ERR(strm->be, "Lua applet http '%s': full stack.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(hlua);
+ return -1;
+ }
+ hlua->nargs = 1;
+
+ /* push keywords in the stack. */
+ for (arg = ctx->rule->arg.hlua_rule->args; arg && *arg; arg++) {
+ if (!lua_checkstack(hlua->T, 1)) {
+ SEND_ERR(strm->be, "Lua applet http '%s': full stack.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(hlua);
+ return -1;
+ }
+ lua_pushstring(hlua->T, *arg);
+ hlua->nargs++;
+ }
+
+ RESET_SAFE_LJMP(hlua);
+
+ /* Wakeup the applet when data is ready for read. */
+ applet_need_more_data(ctx);
+
+ return 0;
+}
+
+void hlua_applet_http_fct(struct appctx *ctx)
+{
+ struct hlua_http_ctx *http_ctx = ctx->svcctx;
+ struct stconn *sc = appctx_sc(ctx);
+ struct stream *strm = __sc_strm(sc);
+ struct channel *req = sc_oc(sc);
+ struct channel *res = sc_ic(sc);
+ struct act_rule *rule = ctx->rule;
+ struct proxy *px = strm->be;
+ struct hlua *hlua = http_ctx->hlua;
+ struct htx *req_htx, *res_htx;
+
+ res_htx = htx_from_buf(&res->buf);
+
+ /* If the stream is disconnect or closed, ldo nothing. */
+ if (unlikely(sc->state == SC_ST_DIS || sc->state == SC_ST_CLO))
+ goto out;
+
+ /* Check if the input buffer is available. */
+ if (!b_size(&res->buf)) {
+ sc_need_room(sc);
+ goto out;
+ }
+ /* check that the output is not closed */
+ if (res->flags & (CF_SHUTW|CF_SHUTW_NOW|CF_SHUTR))
+ http_ctx->flags |= APPLET_DONE;
+
+ /* Set the currently running flag. */
+ if (!HLUA_IS_RUNNING(hlua) &&
+ !(http_ctx->flags & APPLET_DONE)) {
+ if (!co_data(req)) {
+ applet_need_more_data(ctx);
+ goto out;
+ }
+ }
+
+ /* Executes The applet if it is not done. */
+ if (!(http_ctx->flags & APPLET_DONE)) {
+
+ /* Execute the function. */
+ switch (hlua_ctx_resume(hlua, 1)) {
+ /* finished. */
+ case HLUA_E_OK:
+ http_ctx->flags |= APPLET_DONE;
+ break;
+
+ /* yield. */
+ case HLUA_E_AGAIN:
+ if (hlua->wake_time != TICK_ETERNITY)
+ task_schedule(http_ctx->task, hlua->wake_time);
+ goto out;
+
+ /* finished with error. */
+ case HLUA_E_ERRMSG:
+ /* Display log. */
+ SEND_ERR(px, "Lua applet http '%s': %s.\n",
+ rule->arg.hlua_rule->fcn->name, lua_tostring(hlua->T, -1));
+ lua_pop(hlua->T, 1);
+ goto error;
+
+ case HLUA_E_ETMOUT:
+ SEND_ERR(px, "Lua applet http '%s': execution timeout.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ case HLUA_E_NOMEM:
+ SEND_ERR(px, "Lua applet http '%s': out of memory error.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ case HLUA_E_YIELD: /* unexpected */
+ SEND_ERR(px, "Lua applet http '%s': yield not allowed.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ case HLUA_E_ERR:
+ /* Display log. */
+ SEND_ERR(px, "Lua applet http '%s' return an unknown error.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ default:
+ goto error;
+ }
+ }
+
+ if (http_ctx->flags & APPLET_DONE) {
+ if (http_ctx->flags & APPLET_RSP_SENT)
+ goto done;
+
+ if (!(http_ctx->flags & APPLET_HDR_SENT))
+ goto error;
+
+ /* no more data are expected. If the response buffer is empty
+ * for a chunked message, be sure to add something (EOT block in
+ * this case) to have something to send. It is important to be
+ * sure the EOM flags will be handled by the endpoint.
+ */
+ if (htx_is_empty(res_htx) && (strm->txn->rsp.flags & (HTTP_MSGF_XFER_LEN|HTTP_MSGF_CNT_LEN)) == HTTP_MSGF_XFER_LEN) {
+ if (!htx_add_endof(res_htx, HTX_BLK_EOT)) {
+ sc_need_room(sc);
+ goto out;
+ }
+ channel_add_input(res, 1);
+ }
+
+ res_htx->flags |= HTX_FL_EOM;
+ res->flags |= CF_EOI;
+ se_fl_set(ctx->sedesc, SE_FL_EOI);
+ strm->txn->status = http_ctx->status;
+ http_ctx->flags |= APPLET_RSP_SENT;
+ }
+
+ done:
+ if (http_ctx->flags & APPLET_DONE) {
+ if (!(res->flags & CF_SHUTR)) {
+ res->flags |= CF_READ_NULL;
+ sc_shutr(sc);
+ }
+
+ /* eat the whole request */
+ if (co_data(req)) {
+ req_htx = htx_from_buf(&req->buf);
+ co_htx_skip(req, req_htx, co_data(req));
+ htx_to_buf(req_htx, &req->buf);
+ }
+ }
+
+ out:
+ htx_to_buf(res_htx, &res->buf);
+ return;
+
+ error:
+
+ /* If we are in HTTP mode, and we are not send any
+ * data, return a 500 server error in best effort:
+ * if there is no room available in the buffer,
+ * just close the connection.
+ */
+ if (!(http_ctx->flags & APPLET_HDR_SENT)) {
+ struct buffer *err = &http_err_chunks[HTTP_ERR_500];
+
+ channel_erase(res);
+ res->buf.data = b_data(err);
+ memcpy(res->buf.area, b_head(err), b_data(err));
+ res_htx = htx_from_buf(&res->buf);
+ channel_add_input(res, res_htx->data);
+ }
+ if (!(strm->flags & SF_ERR_MASK))
+ strm->flags |= SF_ERR_RESOURCE;
+ http_ctx->flags |= APPLET_DONE;
+ goto done;
+}
+
+static void hlua_applet_http_release(struct appctx *ctx)
+{
+ struct hlua_http_ctx *http_ctx = ctx->svcctx;
+
+ task_destroy(http_ctx->task);
+ http_ctx->task = NULL;
+ hlua_ctx_destroy(http_ctx->hlua);
+ http_ctx->hlua = NULL;
+}
+
+/* global {tcp|http}-request parser. Return ACT_RET_PRS_OK in
+ * success case, else return ACT_RET_PRS_ERR.
+ *
+ * This function can fail with an abort() due to an Lua critical error.
+ * We are in the configuration parsing process of HAProxy, this abort() is
+ * tolerated.
+ */
+static enum act_parse_ret action_register_lua(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ struct hlua_function *fcn = rule->kw->private;
+ int i;
+
+ /* Memory for the rule. */
+ rule->arg.hlua_rule = calloc(1, sizeof(*rule->arg.hlua_rule));
+ if (!rule->arg.hlua_rule) {
+ memprintf(err, "out of memory error");
+ goto error;
+ }
+
+ /* Memory for arguments. */
+ rule->arg.hlua_rule->args = calloc(fcn->nargs + 1,
+ sizeof(*rule->arg.hlua_rule->args));
+ if (!rule->arg.hlua_rule->args) {
+ memprintf(err, "out of memory error");
+ goto error;
+ }
+
+ /* Reference the Lua function and store the reference. */
+ rule->arg.hlua_rule->fcn = fcn;
+
+ /* Expect some arguments */
+ for (i = 0; i < fcn->nargs; i++) {
+ if (*args[*cur_arg] == '\0') {
+ memprintf(err, "expect %d arguments", fcn->nargs);
+ goto error;
+ }
+ rule->arg.hlua_rule->args[i] = strdup(args[*cur_arg]);
+ if (!rule->arg.hlua_rule->args[i]) {
+ memprintf(err, "out of memory error");
+ goto error;
+ }
+ (*cur_arg)++;
+ }
+ rule->arg.hlua_rule->args[i] = NULL;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = hlua_action;
+ return ACT_RET_PRS_OK;
+
+ error:
+ if (rule->arg.hlua_rule) {
+ if (rule->arg.hlua_rule->args) {
+ for (i = 0; i < fcn->nargs; i++)
+ ha_free(&rule->arg.hlua_rule->args[i]);
+ ha_free(&rule->arg.hlua_rule->args);
+ }
+ ha_free(&rule->arg.hlua_rule);
+ }
+ return ACT_RET_PRS_ERR;
+}
+
+static enum act_parse_ret action_register_service_http(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ struct hlua_function *fcn = rule->kw->private;
+
+ /* HTTP applets are forbidden in tcp-request rules.
+ * HTTP applet request requires everything initialized by
+ * "http_process_request" (analyzer flag AN_REQ_HTTP_INNER).
+ * The applet will be immediately initialized, but its before
+ * the call of this analyzer.
+ */
+ if (rule->from != ACT_F_HTTP_REQ) {
+ memprintf(err, "HTTP applets are forbidden from 'tcp-request' rulesets");
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* Memory for the rule. */
+ rule->arg.hlua_rule = calloc(1, sizeof(*rule->arg.hlua_rule));
+ if (!rule->arg.hlua_rule) {
+ memprintf(err, "out of memory error");
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* Reference the Lua function and store the reference. */
+ rule->arg.hlua_rule->fcn = fcn;
+
+ /* TODO: later accept arguments. */
+ rule->arg.hlua_rule->args = NULL;
+
+ /* Add applet pointer in the rule. */
+ rule->applet.obj_type = OBJ_TYPE_APPLET;
+ rule->applet.name = fcn->name;
+ rule->applet.init = hlua_applet_http_init;
+ rule->applet.fct = hlua_applet_http_fct;
+ rule->applet.release = hlua_applet_http_release;
+ rule->applet.timeout = hlua_timeout_applet;
+
+ return ACT_RET_PRS_OK;
+}
+
+/* This function is an LUA binding used for registering
+ * "sample-conv" functions. It expects a converter name used
+ * in the haproxy configuration file, and an LUA function.
+ */
+__LJMP static int hlua_register_action(lua_State *L)
+{
+ struct action_kw_list *akl = NULL;
+ const char *name;
+ int ref;
+ int len;
+ struct hlua_function *fcn = NULL;
+ int nargs;
+ struct buffer *trash;
+ struct action_kw *akw;
+
+ /* Initialise the number of expected arguments at 0. */
+ nargs = 0;
+
+ if (lua_gettop(L) < 3 || lua_gettop(L) > 4)
+ WILL_LJMP(luaL_error(L, "'register_action' needs between 3 and 4 arguments"));
+
+ /* First argument : converter name. */
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+
+ /* Second argument : environment. */
+ if (lua_type(L, 2) != LUA_TTABLE)
+ WILL_LJMP(luaL_error(L, "register_action: second argument must be a table of strings"));
+
+ /* Third argument : lua function. */
+ ref = MAY_LJMP(hlua_checkfunction(L, 3));
+
+ /* Fourth argument : number of mandatory arguments expected on the configuration line. */
+ if (lua_gettop(L) >= 4)
+ nargs = MAY_LJMP(luaL_checkinteger(L, 4));
+
+ /* browse the second argument as an array. */
+ lua_pushnil(L);
+ while (lua_next(L, 2) != 0) {
+ if (lua_type(L, -1) != LUA_TSTRING)
+ WILL_LJMP(luaL_error(L, "register_action: second argument must be a table of strings"));
+
+ /* Check if action exists */
+ trash = get_trash_chunk();
+ chunk_printf(trash, "lua.%s", name);
+ if (strcmp(lua_tostring(L, -1), "tcp-req") == 0) {
+ akw = tcp_req_cont_action(trash->area);
+ } else if (strcmp(lua_tostring(L, -1), "tcp-res") == 0) {
+ akw = tcp_res_cont_action(trash->area);
+ } else if (strcmp(lua_tostring(L, -1), "http-req") == 0) {
+ akw = action_http_req_custom(trash->area);
+ } else if (strcmp(lua_tostring(L, -1), "http-res") == 0) {
+ akw = action_http_res_custom(trash->area);
+ } else {
+ akw = NULL;
+ }
+ if (akw != NULL) {
+ fcn = akw->private;
+ if (fcn->function_ref[hlua_state_id] != -1) {
+ ha_warning("Trying to register action 'lua.%s' more than once. "
+ "This will become a hard error in version 2.5.\n", name);
+ }
+ fcn->function_ref[hlua_state_id] = ref;
+
+ /* pop the environment string. */
+ lua_pop(L, 1);
+ continue;
+ }
+
+ /* Check required environment. Only accepted "http" or "tcp". */
+ /* Allocate and fill the sample fetch keyword struct. */
+ akl = calloc(1, sizeof(*akl) + sizeof(struct action_kw) * 2);
+ if (!akl)
+ goto alloc_error;;
+ fcn = new_hlua_function();
+ if (!fcn)
+ goto alloc_error;
+
+ /* Fill fcn. */
+ fcn->name = strdup(name);
+ if (!fcn->name)
+ goto alloc_error;
+ fcn->function_ref[hlua_state_id] = ref;
+
+ /* Set the expected number of arguments. */
+ fcn->nargs = nargs;
+
+ /* List head */
+ akl->list.n = akl->list.p = NULL;
+
+ /* action keyword. */
+ len = strlen("lua.") + strlen(name) + 1;
+ akl->kw[0].kw = calloc(1, len);
+ if (!akl->kw[0].kw)
+ goto alloc_error;
+
+ snprintf((char *)akl->kw[0].kw, len, "lua.%s", name);
+
+ akl->kw[0].flags = 0;
+ akl->kw[0].private = fcn;
+ akl->kw[0].parse = action_register_lua;
+
+ /* select the action registering point. */
+ if (strcmp(lua_tostring(L, -1), "tcp-req") == 0)
+ tcp_req_cont_keywords_register(akl);
+ else if (strcmp(lua_tostring(L, -1), "tcp-res") == 0)
+ tcp_res_cont_keywords_register(akl);
+ else if (strcmp(lua_tostring(L, -1), "http-req") == 0)
+ http_req_keywords_register(akl);
+ else if (strcmp(lua_tostring(L, -1), "http-res") == 0)
+ http_res_keywords_register(akl);
+ else {
+ release_hlua_function(fcn);
+ if (akl)
+ ha_free((char **)&(akl->kw[0].kw));
+ ha_free(&akl);
+ WILL_LJMP(luaL_error(L, "Lua action environment '%s' is unknown. "
+ "'tcp-req', 'tcp-res', 'http-req' or 'http-res' "
+ "are expected.", lua_tostring(L, -1)));
+ }
+
+ /* pop the environment string. */
+ lua_pop(L, 1);
+
+ /* reset for next loop */
+ akl = NULL;
+ fcn = NULL;
+ }
+ return ACT_RET_PRS_OK;
+
+ alloc_error:
+ release_hlua_function(fcn);
+ ha_free(&akl);
+ WILL_LJMP(luaL_error(L, "Lua out of memory error."));
+ return 0; /* Never reached */
+}
+
+static enum act_parse_ret action_register_service_tcp(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ struct hlua_function *fcn = rule->kw->private;
+
+ if (px->mode == PR_MODE_HTTP) {
+ memprintf(err, "Lua TCP services cannot be used on HTTP proxies");
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* Memory for the rule. */
+ rule->arg.hlua_rule = calloc(1, sizeof(*rule->arg.hlua_rule));
+ if (!rule->arg.hlua_rule) {
+ memprintf(err, "out of memory error");
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* Reference the Lua function and store the reference. */
+ rule->arg.hlua_rule->fcn = fcn;
+
+ /* TODO: later accept arguments. */
+ rule->arg.hlua_rule->args = NULL;
+
+ /* Add applet pointer in the rule. */
+ rule->applet.obj_type = OBJ_TYPE_APPLET;
+ rule->applet.name = fcn->name;
+ rule->applet.init = hlua_applet_tcp_init;
+ rule->applet.fct = hlua_applet_tcp_fct;
+ rule->applet.release = hlua_applet_tcp_release;
+ rule->applet.timeout = hlua_timeout_applet;
+
+ return 0;
+}
+
+/* This function is an LUA binding used for registering
+ * "sample-conv" functions. It expects a converter name used
+ * in the haproxy configuration file, and an LUA function.
+ */
+__LJMP static int hlua_register_service(lua_State *L)
+{
+ struct action_kw_list *akl;
+ const char *name;
+ const char *env;
+ int ref;
+ int len;
+ struct hlua_function *fcn = NULL;
+ struct buffer *trash;
+ struct action_kw *akw;
+
+ MAY_LJMP(check_args(L, 3, "register_service"));
+
+ /* First argument : converter name. */
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+
+ /* Second argument : environment. */
+ env = MAY_LJMP(luaL_checkstring(L, 2));
+
+ /* Third argument : lua function. */
+ ref = MAY_LJMP(hlua_checkfunction(L, 3));
+
+ /* Check for service already registered */
+ trash = get_trash_chunk();
+ chunk_printf(trash, "lua.%s", name);
+ akw = service_find(trash->area);
+ if (akw != NULL) {
+ fcn = akw->private;
+ if (fcn->function_ref[hlua_state_id] != -1) {
+ ha_warning("Trying to register service 'lua.%s' more than once. "
+ "This will become a hard error in version 2.5.\n", name);
+ }
+ fcn->function_ref[hlua_state_id] = ref;
+ return 0;
+ }
+
+ /* Allocate and fill the sample fetch keyword struct. */
+ akl = calloc(1, sizeof(*akl) + sizeof(struct action_kw) * 2);
+ if (!akl)
+ goto alloc_error;
+ fcn = new_hlua_function();
+ if (!fcn)
+ goto alloc_error;
+
+ /* Fill fcn. */
+ len = strlen("<lua.>") + strlen(name) + 1;
+ fcn->name = calloc(1, len);
+ if (!fcn->name)
+ goto alloc_error;
+ snprintf((char *)fcn->name, len, "<lua.%s>", name);
+ fcn->function_ref[hlua_state_id] = ref;
+
+ /* List head */
+ akl->list.n = akl->list.p = NULL;
+
+ /* converter keyword. */
+ len = strlen("lua.") + strlen(name) + 1;
+ akl->kw[0].kw = calloc(1, len);
+ if (!akl->kw[0].kw)
+ goto alloc_error;
+
+ snprintf((char *)akl->kw[0].kw, len, "lua.%s", name);
+
+ /* Check required environment. Only accepted "http" or "tcp". */
+ if (strcmp(env, "tcp") == 0)
+ akl->kw[0].parse = action_register_service_tcp;
+ else if (strcmp(env, "http") == 0)
+ akl->kw[0].parse = action_register_service_http;
+ else {
+ release_hlua_function(fcn);
+ if (akl)
+ ha_free((char **)&(akl->kw[0].kw));
+ ha_free(&akl);
+ WILL_LJMP(luaL_error(L, "Lua service environment '%s' is unknown. "
+ "'tcp' or 'http' are expected.", env));
+ }
+
+ akl->kw[0].flags = 0;
+ akl->kw[0].private = fcn;
+
+ /* End of array. */
+ memset(&akl->kw[1], 0, sizeof(*akl->kw));
+
+ /* Register this new converter */
+ service_keywords_register(akl);
+
+ return 0;
+
+ alloc_error:
+ release_hlua_function(fcn);
+ ha_free(&akl);
+ WILL_LJMP(luaL_error(L, "Lua out of memory error."));
+ return 0; /* Never reached */
+}
+
+/* This function initialises Lua cli handler. It copies the
+ * arguments in the Lua stack and create channel IO objects.
+ */
+static int hlua_cli_parse_fct(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct hlua_cli_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct hlua *hlua;
+ struct hlua_function *fcn;
+ int i;
+ const char *error;
+
+ fcn = private;
+ ctx->fcn = private;
+
+ hlua = pool_alloc(pool_head_hlua);
+ if (!hlua) {
+ SEND_ERR(NULL, "Lua cli '%s': out of memory.\n", fcn->name);
+ return 1;
+ }
+ HLUA_INIT(hlua);
+ ctx->hlua = hlua;
+
+ /* Create task used by signal to wakeup applets.
+ * We use the same wakeup function than the Lua applet_tcp and
+ * applet_http. It is absolutely compatible.
+ */
+ ctx->task = task_new_here();
+ if (!ctx->task) {
+ SEND_ERR(NULL, "Lua cli '%s': out of memory.\n", fcn->name);
+ goto error;
+ }
+ ctx->task->nice = 0;
+ ctx->task->context = appctx;
+ ctx->task->process = hlua_applet_wakeup;
+
+ /* Initialises the Lua context */
+ if (!hlua_ctx_init(hlua, fcn_ref_to_stack_id(fcn), ctx->task, 0)) {
+ SEND_ERR(NULL, "Lua cli '%s': can't initialize Lua context.\n", fcn->name);
+ goto error;
+ }
+
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(hlua)) {
+ if (lua_type(hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(NULL, "Lua cli '%s': %s.\n", fcn->name, error);
+ goto error;
+ }
+
+ /* Check stack available size. */
+ if (!lua_checkstack(hlua->T, 2)) {
+ SEND_ERR(NULL, "Lua cli '%s': full stack.\n", fcn->name);
+ goto error;
+ }
+
+ /* Restore the function in the stack. */
+ lua_rawgeti(hlua->T, LUA_REGISTRYINDEX, fcn->function_ref[hlua->state_id]);
+
+ /* Once the arguments parsed, the CLI is like an AppletTCP,
+ * so push AppletTCP in the stack.
+ */
+ if (!hlua_applet_tcp_new(hlua->T, appctx)) {
+ SEND_ERR(NULL, "Lua cli '%s': full stack.\n", fcn->name);
+ goto error;
+ }
+ hlua->nargs = 1;
+
+ /* push keywords in the stack. */
+ for (i = 0; *args[i]; i++) {
+ /* Check stack available size. */
+ if (!lua_checkstack(hlua->T, 1)) {
+ SEND_ERR(NULL, "Lua cli '%s': full stack.\n", fcn->name);
+ goto error;
+ }
+ lua_pushstring(hlua->T, args[i]);
+ hlua->nargs++;
+ }
+
+ /* We must initialize the execution timeouts. */
+ hlua->max_time = hlua_timeout_session;
+
+ /* At this point the execution is safe. */
+ RESET_SAFE_LJMP(hlua);
+
+ /* It's ok */
+ return 0;
+
+ /* It's not ok. */
+error:
+ RESET_SAFE_LJMP(hlua);
+ hlua_ctx_destroy(hlua);
+ ctx->hlua = NULL;
+ return 1;
+}
+
+static int hlua_cli_io_handler_fct(struct appctx *appctx)
+{
+ struct hlua_cli_ctx *ctx = appctx->svcctx;
+ struct hlua *hlua;
+ struct stconn *sc;
+ struct hlua_function *fcn;
+
+ hlua = ctx->hlua;
+ sc = appctx_sc(appctx);
+ fcn = ctx->fcn;
+
+ /* If the stream is disconnect or closed, ldo nothing. */
+ if (unlikely(sc->state == SC_ST_DIS || sc->state == SC_ST_CLO))
+ return 1;
+
+ /* Execute the function. */
+ switch (hlua_ctx_resume(hlua, 1)) {
+
+ /* finished. */
+ case HLUA_E_OK:
+ return 1;
+
+ /* yield. */
+ case HLUA_E_AGAIN:
+ /* We want write. */
+ if (HLUA_IS_WAKERESWR(hlua))
+ sc_need_room(sc);
+ /* Set the timeout. */
+ if (hlua->wake_time != TICK_ETERNITY)
+ task_schedule(hlua->task, hlua->wake_time);
+ return 0;
+
+ /* finished with error. */
+ case HLUA_E_ERRMSG:
+ /* Display log. */
+ SEND_ERR(NULL, "Lua cli '%s': %s.\n",
+ fcn->name, lua_tostring(hlua->T, -1));
+ lua_pop(hlua->T, 1);
+ return 1;
+
+ case HLUA_E_ETMOUT:
+ SEND_ERR(NULL, "Lua converter '%s': execution timeout.\n",
+ fcn->name);
+ return 1;
+
+ case HLUA_E_NOMEM:
+ SEND_ERR(NULL, "Lua converter '%s': out of memory error.\n",
+ fcn->name);
+ return 1;
+
+ case HLUA_E_YIELD: /* unexpected */
+ SEND_ERR(NULL, "Lua converter '%s': yield not allowed.\n",
+ fcn->name);
+ return 1;
+
+ case HLUA_E_ERR:
+ /* Display log. */
+ SEND_ERR(NULL, "Lua cli '%s' return an unknown error.\n",
+ fcn->name);
+ return 1;
+
+ default:
+ return 1;
+ }
+
+ return 1;
+}
+
+static void hlua_cli_io_release_fct(struct appctx *appctx)
+{
+ struct hlua_cli_ctx *ctx = appctx->svcctx;
+
+ hlua_ctx_destroy(ctx->hlua);
+ ctx->hlua = NULL;
+}
+
+/* This function is an LUA binding used for registering
+ * new keywords in the cli. It expects a list of keywords
+ * which are the "path". It is limited to 5 keywords. A
+ * description of the command, a function to be executed
+ * for the parsing and a function for io handlers.
+ */
+__LJMP static int hlua_register_cli(lua_State *L)
+{
+ struct cli_kw_list *cli_kws;
+ const char *message;
+ int ref_io;
+ int len;
+ struct hlua_function *fcn = NULL;
+ int index;
+ int i;
+ struct buffer *trash;
+ const char *kw[5];
+ struct cli_kw *cli_kw;
+ const char *errmsg;
+
+ MAY_LJMP(check_args(L, 3, "register_cli"));
+
+ /* First argument : an array of maximum 5 keywords. */
+ if (!lua_istable(L, 1))
+ WILL_LJMP(luaL_argerror(L, 1, "1st argument must be a table"));
+
+ /* Second argument : string with contextual message. */
+ message = MAY_LJMP(luaL_checkstring(L, 2));
+
+ /* Third and fourth argument : lua function. */
+ ref_io = MAY_LJMP(hlua_checkfunction(L, 3));
+
+ /* Check for CLI service already registered */
+ trash = get_trash_chunk();
+ index = 0;
+ lua_pushnil(L);
+ memset(kw, 0, sizeof(kw));
+ while (lua_next(L, 1) != 0) {
+ if (index >= CLI_PREFIX_KW_NB)
+ WILL_LJMP(luaL_argerror(L, 1, "1st argument must be a table with a maximum of 5 entries"));
+ if (lua_type(L, -1) != LUA_TSTRING)
+ WILL_LJMP(luaL_argerror(L, 1, "1st argument must be a table filled with strings"));
+ kw[index] = lua_tostring(L, -1);
+ if (index == 0)
+ chunk_printf(trash, "%s", kw[index]);
+ else
+ chunk_appendf(trash, " %s", kw[index]);
+ index++;
+ lua_pop(L, 1);
+ }
+ cli_kw = cli_find_kw_exact((char **)kw);
+ if (cli_kw != NULL) {
+ fcn = cli_kw->private;
+ if (fcn->function_ref[hlua_state_id] != -1) {
+ ha_warning("Trying to register CLI keyword 'lua.%s' more than once. "
+ "This will become a hard error in version 2.5.\n", trash->area);
+ }
+ fcn->function_ref[hlua_state_id] = ref_io;
+ return 0;
+ }
+
+ /* Allocate and fill the sample fetch keyword struct. */
+ cli_kws = calloc(1, sizeof(*cli_kws) + sizeof(struct cli_kw) * 2);
+ if (!cli_kws) {
+ errmsg = "Lua out of memory error.";
+ goto error;
+ }
+ fcn = new_hlua_function();
+ if (!fcn) {
+ errmsg = "Lua out of memory error.";
+ goto error;
+ }
+
+ /* Fill path. */
+ index = 0;
+ lua_pushnil(L);
+ while(lua_next(L, 1) != 0) {
+ if (index >= 5) {
+ errmsg = "1st argument must be a table with a maximum of 5 entries";
+ goto error;
+ }
+ if (lua_type(L, -1) != LUA_TSTRING) {
+ errmsg = "1st argument must be a table filled with strings";
+ goto error;
+ }
+ cli_kws->kw[0].str_kw[index] = strdup(lua_tostring(L, -1));
+ if (!cli_kws->kw[0].str_kw[index]) {
+ errmsg = "Lua out of memory error.";
+ goto error;
+ }
+ index++;
+ lua_pop(L, 1);
+ }
+
+ /* Copy help message. */
+ cli_kws->kw[0].usage = strdup(message);
+ if (!cli_kws->kw[0].usage) {
+ errmsg = "Lua out of memory error.";
+ goto error;
+ }
+
+ /* Fill fcn io handler. */
+ len = strlen("<lua.cli>") + 1;
+ for (i = 0; i < index; i++)
+ len += strlen(cli_kws->kw[0].str_kw[i]) + 1;
+ fcn->name = calloc(1, len);
+ if (!fcn->name) {
+ errmsg = "Lua out of memory error.";
+ goto error;
+ }
+ strncat((char *)fcn->name, "<lua.cli", len);
+ for (i = 0; i < index; i++) {
+ strncat((char *)fcn->name, ".", len);
+ strncat((char *)fcn->name, cli_kws->kw[0].str_kw[i], len);
+ }
+ strncat((char *)fcn->name, ">", len);
+ fcn->function_ref[hlua_state_id] = ref_io;
+
+ /* Fill last entries. */
+ cli_kws->kw[0].private = fcn;
+ cli_kws->kw[0].parse = hlua_cli_parse_fct;
+ cli_kws->kw[0].io_handler = hlua_cli_io_handler_fct;
+ cli_kws->kw[0].io_release = hlua_cli_io_release_fct;
+
+ /* Register this new converter */
+ cli_register_kw(cli_kws);
+
+ return 0;
+
+ error:
+ release_hlua_function(fcn);
+ if (cli_kws) {
+ for (i = 0; i < index; i++)
+ ha_free((char **)&(cli_kws->kw[0].str_kw[i]));
+ ha_free((char **)&(cli_kws->kw[0].usage));
+ }
+ ha_free(&cli_kws);
+ WILL_LJMP(luaL_error(L, errmsg));
+ return 0; /* Never reached */
+}
+
+static int hlua_filter_init_per_thread(struct proxy *px, struct flt_conf *fconf)
+{
+ struct hlua_flt_config *conf = fconf->conf;
+ lua_State *L;
+ int error, pos, state_id, flt_ref;
+
+ state_id = reg_flt_to_stack_id(conf->reg);
+ L = hlua_states[state_id];
+ pos = lua_gettop(L);
+
+ /* The filter parsing function */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, conf->reg->fun_ref[state_id]);
+
+ /* Push the filter class on the stack and resolve all callbacks */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, conf->reg->flt_ref[state_id]);
+
+ /* Duplicate the filter class so each filter will have its own copy */
+ lua_newtable(L);
+ lua_pushnil(L);
+
+ while (lua_next(L, pos+2)) {
+ lua_pushvalue(L, -2);
+ lua_insert(L, -2);
+ lua_settable(L, -4);
+ }
+ flt_ref = luaL_ref(L, LUA_REGISTRYINDEX);
+
+ /* Remove the original lua filter class from the stack */
+ lua_pop(L, 1);
+
+ /* Push the copy on the stack */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, flt_ref);
+
+ /* extra args are pushed in a table */
+ lua_newtable(L);
+ for (pos = 0; conf->args[pos]; pos++) {
+ /* Check stack available size. */
+ if (!lua_checkstack(L, 1)) {
+ ha_alert("Lua filter '%s' : Lua error : full stack.", conf->reg->name);
+ goto error;
+ }
+ lua_pushstring(L, conf->args[pos]);
+ lua_rawseti(L, -2, lua_rawlen(L, -2) + 1);
+ }
+
+ error = lua_pcall(L, 2, LUA_MULTRET, 0);
+ switch (error) {
+ case LUA_OK:
+ /* replace the filter ref */
+ conf->ref[state_id] = flt_ref;
+ break;
+ case LUA_ERRRUN:
+ ha_alert("Lua filter '%s' : runtime error : %s", conf->reg->name, lua_tostring(L, -1));
+ goto error;
+ case LUA_ERRMEM:
+ ha_alert("Lua filter '%s' : out of memory error", conf->reg->name);
+ goto error;
+ case LUA_ERRERR:
+ ha_alert("Lua filter '%s' : message handler error : %s", conf->reg->name, lua_tostring(L, -1));
+ goto error;
+#if defined(LUA_VERSION_NUM) && LUA_VERSION_NUM <= 503
+ case LUA_ERRGCMM:
+ ha_alert("Lua filter '%s' : garbage collector error : %s", conf->reg->name, lua_tostring(L, -1));
+ goto error;
+#endif
+ default:
+ ha_alert("Lua filter '%s' : unknown error : %s", conf->reg->name, lua_tostring(L, -1));
+ goto error;
+ }
+
+ lua_settop(L, 0);
+ return 0;
+
+ error:
+ lua_settop(L, 0);
+ return -1;
+}
+
+static void hlua_filter_deinit_per_thread(struct proxy *px, struct flt_conf *fconf)
+{
+ struct hlua_flt_config *conf = fconf->conf;
+ lua_State *L;
+ int state_id;
+
+ if (!conf)
+ return;
+
+ state_id = reg_flt_to_stack_id(conf->reg);
+ L = hlua_states[state_id];
+ luaL_unref(L, LUA_REGISTRYINDEX, conf->ref[state_id]);
+}
+
+static int hlua_filter_init(struct proxy *px, struct flt_conf *fconf)
+{
+ struct hlua_flt_config *conf = fconf->conf;
+ int state_id = reg_flt_to_stack_id(conf->reg);
+
+ /* Rely on per-thread init for global scripts */
+ if (!state_id)
+ return hlua_filter_init_per_thread(px, fconf);
+ return 0;
+}
+
+static void hlua_filter_deinit(struct proxy *px, struct flt_conf *fconf)
+{
+
+ if (fconf->conf) {
+ struct hlua_flt_config *conf = fconf->conf;
+ int state_id = reg_flt_to_stack_id(conf->reg);
+ int pos;
+
+ /* Rely on per-thread deinit for global scripts */
+ if (!state_id)
+ hlua_filter_deinit_per_thread(px, fconf);
+
+ for (pos = 0; conf->args[pos]; pos++)
+ free(conf->args[pos]);
+ free(conf->args);
+ }
+ ha_free(&fconf->conf);
+ ha_free((char **)&fconf->id);
+ ha_free(&fconf->ops);
+}
+
+static int hlua_filter_new(struct stream *s, struct filter *filter)
+{
+ struct hlua_flt_config *conf = FLT_CONF(filter);
+ struct hlua_flt_ctx *flt_ctx = NULL;
+ int ret = 1;
+
+ /* In the execution wrappers linked with a stream, the
+ * Lua context can be not initialized. This behavior
+ * permits to save performances because a systematic
+ * Lua initialization cause 5% performances loss.
+ */
+ if (!s->hlua) {
+ struct hlua *hlua;
+
+ hlua = pool_alloc(pool_head_hlua);
+ if (!hlua) {
+ SEND_ERR(s->be, "Lua filter '%s': can't initialize Lua context.\n",
+ conf->reg->name);
+ ret = 0;
+ goto end;
+ }
+ HLUA_INIT(hlua);
+ s->hlua = hlua;
+ if (!hlua_ctx_init(s->hlua, reg_flt_to_stack_id(conf->reg), s->task, 0)) {
+ SEND_ERR(s->be, "Lua filter '%s': can't initialize Lua context.\n",
+ conf->reg->name);
+ ret = 0;
+ goto end;
+ }
+ }
+
+ flt_ctx = pool_zalloc(pool_head_hlua_flt_ctx);
+ if (!flt_ctx) {
+ SEND_ERR(s->be, "Lua filter '%s': can't initialize filter Lua context.\n",
+ conf->reg->name);
+ ret = 0;
+ goto end;
+ }
+ flt_ctx->hlua[0] = pool_alloc(pool_head_hlua);
+ flt_ctx->hlua[1] = pool_alloc(pool_head_hlua);
+ if (!flt_ctx->hlua[0] || !flt_ctx->hlua[1]) {
+ SEND_ERR(s->be, "Lua filter '%s': can't initialize filter Lua context.\n",
+ conf->reg->name);
+ ret = 0;
+ goto end;
+ }
+ HLUA_INIT(flt_ctx->hlua[0]);
+ HLUA_INIT(flt_ctx->hlua[1]);
+ if (!hlua_ctx_init(flt_ctx->hlua[0], reg_flt_to_stack_id(conf->reg), s->task, 0) ||
+ !hlua_ctx_init(flt_ctx->hlua[1], reg_flt_to_stack_id(conf->reg), s->task, 0)) {
+ SEND_ERR(s->be, "Lua filter '%s': can't initialize filter Lua context.\n",
+ conf->reg->name);
+ ret = 0;
+ goto end;
+ }
+
+ if (!HLUA_IS_RUNNING(s->hlua)) {
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(s->hlua)) {
+ const char *error;
+
+ if (lua_type(s->hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(s->hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(s->be, "Lua filter '%s': %s.\n", conf->reg->name, error);
+ ret = 0;
+ goto end;
+ }
+
+ /* Check stack size. */
+ if (!lua_checkstack(s->hlua->T, 1)) {
+ SEND_ERR(s->be, "Lua filter '%s': full stack.\n", conf->reg->name);
+ RESET_SAFE_LJMP(s->hlua);
+ ret = 0;
+ goto end;
+ }
+
+ lua_rawgeti(s->hlua->T, LUA_REGISTRYINDEX, conf->ref[s->hlua->state_id]);
+ if (lua_getfield(s->hlua->T, -1, "new") != LUA_TFUNCTION) {
+ SEND_ERR(s->be, "Lua filter '%s': 'new' field is not a function.\n",
+ conf->reg->name);
+ RESET_SAFE_LJMP(s->hlua);
+ ret = 0;
+ goto end;
+ }
+ lua_insert(s->hlua->T, -2);
+
+ /* Push the copy on the stack */
+ s->hlua->nargs = 1;
+
+ /* We must initialize the execution timeouts. */
+ s->hlua->max_time = hlua_timeout_session;
+
+ /* At this point the execution is safe. */
+ RESET_SAFE_LJMP(s->hlua);
+ }
+
+ switch (hlua_ctx_resume(s->hlua, 0)) {
+ case HLUA_E_OK:
+ /* Nothing returned or not a table, ignore the filter for current stream */
+ if (!lua_gettop(s->hlua->T) || !lua_istable(s->hlua->T, 1)) {
+ ret = 0;
+ goto end;
+ }
+
+ /* Attached the filter pointer to the ctx */
+ lua_pushstring(s->hlua->T, "__filter");
+ lua_pushlightuserdata(s->hlua->T, filter);
+ lua_settable(s->hlua->T, -3);
+
+ /* Save a ref on the filter ctx */
+ lua_pushvalue(s->hlua->T, 1);
+ flt_ctx->ref = luaL_ref(s->hlua->T, LUA_REGISTRYINDEX);
+ filter->ctx = flt_ctx;
+ break;
+ case HLUA_E_ERRMSG:
+ SEND_ERR(s->be, "Lua filter '%s' : %s.\n", conf->reg->name, lua_tostring(s->hlua->T, -1));
+ ret = -1;
+ goto end;
+ case HLUA_E_ETMOUT:
+ SEND_ERR(s->be, "Lua filter '%s' : 'new' execution timeout.\n", conf->reg->name);
+ ret = 0;
+ goto end;
+ case HLUA_E_NOMEM:
+ SEND_ERR(s->be, "Lua filter '%s' : out of memory error.\n", conf->reg->name);
+ ret = 0;
+ goto end;
+ case HLUA_E_AGAIN:
+ case HLUA_E_YIELD:
+ SEND_ERR(s->be, "Lua filter '%s': yield functions like core.tcp() or core.sleep()"
+ " are not allowed from 'new' function.\n", conf->reg->name);
+ ret = 0;
+ goto end;
+ case HLUA_E_ERR:
+ SEND_ERR(s->be, "Lua filter '%s': 'new' returns an unknown error.\n", conf->reg->name);
+ ret = 0;
+ goto end;
+ default:
+ ret = 0;
+ goto end;
+ }
+
+ end:
+ if (s->hlua)
+ lua_settop(s->hlua->T, 0);
+ if (ret <= 0) {
+ if (flt_ctx) {
+ hlua_ctx_destroy(flt_ctx->hlua[0]);
+ hlua_ctx_destroy(flt_ctx->hlua[1]);
+ pool_free(pool_head_hlua_flt_ctx, flt_ctx);
+ }
+ }
+ return ret;
+}
+
+static void hlua_filter_delete(struct stream *s, struct filter *filter)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ luaL_unref(s->hlua->T, LUA_REGISTRYINDEX, flt_ctx->ref);
+ hlua_ctx_destroy(flt_ctx->hlua[0]);
+ hlua_ctx_destroy(flt_ctx->hlua[1]);
+ pool_free(pool_head_hlua_flt_ctx, flt_ctx);
+ filter->ctx = NULL;
+}
+
+static int hlua_filter_from_payload(struct filter *filter)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ return (flt_ctx && !!(flt_ctx->flags & HLUA_FLT_CTX_FL_PAYLOAD));
+}
+
+static int hlua_filter_callback(struct stream *s, struct filter *filter, const char *fun,
+ int dir, unsigned int flags)
+{
+ struct hlua *flt_hlua;
+ struct hlua_flt_config *conf = FLT_CONF(filter);
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+ unsigned int hflags = HLUA_TXN_FLT_CTX;
+ int ret = 1;
+
+ flt_hlua = flt_ctx->hlua[(dir == SMP_OPT_DIR_REQ ? 0 : 1)];
+ if (!flt_hlua)
+ goto end;
+
+ if (!HLUA_IS_RUNNING(flt_hlua)) {
+ int extra_idx = lua_gettop(flt_hlua->T);
+
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(flt_hlua)) {
+ const char *error;
+
+ if (lua_type(flt_hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(flt_hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(s->be, "Lua filter '%s': %s.\n", conf->reg->name, error);
+ goto end;
+ }
+
+ /* Check stack size. */
+ if (!lua_checkstack(flt_hlua->T, 3)) {
+ SEND_ERR(s->be, "Lua filter '%s': full stack.\n", conf->reg->name);
+ RESET_SAFE_LJMP(flt_hlua);
+ goto end;
+ }
+
+ lua_rawgeti(flt_hlua->T, LUA_REGISTRYINDEX, flt_ctx->ref);
+ if (lua_getfield(flt_hlua->T, -1, fun) != LUA_TFUNCTION) {
+ RESET_SAFE_LJMP(flt_hlua);
+ goto end;
+ }
+ lua_insert(flt_hlua->T, -2);
+
+ if (!hlua_txn_new(flt_hlua->T, s, s->be, dir, hflags)) {
+ SEND_ERR(s->be, "Lua filter '%s': full stack.\n", conf->reg->name);
+ RESET_SAFE_LJMP(flt_hlua);
+ goto end;
+ }
+ flt_hlua->nargs = 2;
+
+ if (flags & HLUA_FLT_CB_ARG_CHN) {
+ if (dir == SMP_OPT_DIR_REQ)
+ lua_getfield(flt_hlua->T, -1, "req");
+ else
+ lua_getfield(flt_hlua->T, -1, "res");
+ if (lua_type(flt_hlua->T, -1) == LUA_TTABLE) {
+ lua_pushstring(flt_hlua->T, "__filter");
+ lua_pushlightuserdata(flt_hlua->T, filter);
+ lua_settable(flt_hlua->T, -3);
+ }
+ flt_hlua->nargs++;
+ }
+ else if (flags & HLUA_FLT_CB_ARG_HTTP_MSG) {
+ if (dir == SMP_OPT_DIR_REQ)
+ lua_getfield(flt_hlua->T, -1, "http_req");
+ else
+ lua_getfield(flt_hlua->T, -1, "http_res");
+ if (lua_type(flt_hlua->T, -1) == LUA_TTABLE) {
+ lua_pushstring(flt_hlua->T, "__filter");
+ lua_pushlightuserdata(flt_hlua->T, filter);
+ lua_settable(flt_hlua->T, -3);
+ }
+ flt_hlua->nargs++;
+ }
+
+ /* Check stack size. */
+ if (!lua_checkstack(flt_hlua->T, 1)) {
+ SEND_ERR(s->be, "Lua filter '%s': full stack.\n", conf->reg->name);
+ RESET_SAFE_LJMP(flt_hlua);
+ goto end;
+ }
+
+ while (extra_idx--) {
+ lua_pushvalue(flt_hlua->T, 1);
+ lua_remove(flt_hlua->T, 1);
+ flt_hlua->nargs++;
+ }
+
+ /* We must initialize the execution timeouts. */
+ flt_hlua->max_time = hlua_timeout_session;
+
+ /* At this point the execution is safe. */
+ RESET_SAFE_LJMP(flt_hlua);
+ }
+
+ switch (hlua_ctx_resume(flt_hlua, !(flags & HLUA_FLT_CB_FINAL))) {
+ case HLUA_E_OK:
+ /* Catch the return value if it required */
+ if ((flags & HLUA_FLT_CB_RETVAL) && lua_gettop(flt_hlua->T) > 0) {
+ ret = lua_tointeger(flt_hlua->T, -1);
+ lua_settop(flt_hlua->T, 0); /* Empty the stack. */
+ }
+
+ /* Set timeout in the required channel. */
+ if (flt_hlua->wake_time != TICK_ETERNITY) {
+ if (dir == SMP_OPT_DIR_REQ)
+ s->req.analyse_exp = flt_hlua->wake_time;
+ else
+ s->res.analyse_exp = flt_hlua->wake_time;
+ }
+ break;
+ case HLUA_E_AGAIN:
+ /* Set timeout in the required channel. */
+ if (flt_hlua->wake_time != TICK_ETERNITY) {
+ if (dir == SMP_OPT_DIR_REQ)
+ s->req.analyse_exp = flt_hlua->wake_time;
+ else
+ s->res.analyse_exp = flt_hlua->wake_time;
+ }
+ /* Some actions can be wake up when a "write" event
+ * is detected on a response channel. This is useful
+ * only for actions targeted on the requests.
+ */
+ if (HLUA_IS_WAKERESWR(flt_hlua))
+ s->res.flags |= CF_WAKE_WRITE;
+ if (HLUA_IS_WAKEREQWR(flt_hlua))
+ s->req.flags |= CF_WAKE_WRITE;
+ ret = 0;
+ goto end;
+ case HLUA_E_ERRMSG:
+ SEND_ERR(s->be, "Lua filter '%s' : %s.\n", conf->reg->name, lua_tostring(flt_hlua->T, -1));
+ ret = -1;
+ goto end;
+ case HLUA_E_ETMOUT:
+ SEND_ERR(s->be, "Lua filter '%s' : '%s' callback execution timeout.\n", conf->reg->name, fun);
+ goto end;
+ case HLUA_E_NOMEM:
+ SEND_ERR(s->be, "Lua filter '%s' : out of memory error.\n", conf->reg->name);
+ goto end;
+ case HLUA_E_YIELD:
+ SEND_ERR(s->be, "Lua filter '%s': yield functions like core.tcp() or core.sleep()"
+ " are not allowed from '%s' callback.\n", conf->reg->name, fun);
+ goto end;
+ case HLUA_E_ERR:
+ SEND_ERR(s->be, "Lua filter '%s': '%s' returns an unknown error.\n", conf->reg->name, fun);
+ goto end;
+ default:
+ goto end;
+ }
+
+
+ end:
+ return ret;
+}
+
+static int hlua_filter_start_analyze(struct stream *s, struct filter *filter, struct channel *chn)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_ctx->flags = 0;
+ return hlua_filter_callback(s, filter, "start_analyze",
+ (!(chn->flags & CF_ISRESP) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES),
+ (HLUA_FLT_CB_FINAL | HLUA_FLT_CB_RETVAL | HLUA_FLT_CB_ARG_CHN));
+}
+
+static int hlua_filter_end_analyze(struct stream *s, struct filter *filter, struct channel *chn)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_ctx->flags &= ~HLUA_FLT_CTX_FL_PAYLOAD;
+ return hlua_filter_callback(s, filter, "end_analyze",
+ (!(chn->flags & CF_ISRESP) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES),
+ (HLUA_FLT_CB_FINAL | HLUA_FLT_CB_RETVAL | HLUA_FLT_CB_ARG_CHN));
+}
+
+static int hlua_filter_http_headers(struct stream *s, struct filter *filter, struct http_msg *msg)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_ctx->flags &= ~HLUA_FLT_CTX_FL_PAYLOAD;
+ return hlua_filter_callback(s, filter, "http_headers",
+ (!(msg->chn->flags & CF_ISRESP) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES),
+ (HLUA_FLT_CB_FINAL | HLUA_FLT_CB_RETVAL | HLUA_FLT_CB_ARG_HTTP_MSG));
+}
+
+static int hlua_filter_http_payload(struct stream *s, struct filter *filter, struct http_msg *msg,
+ unsigned int offset, unsigned int len)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+ struct hlua *flt_hlua;
+ int dir = (!(msg->chn->flags & CF_ISRESP) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES);
+ int idx = (dir == SMP_OPT_DIR_REQ ? 0 : 1);
+ int ret;
+
+ flt_hlua = flt_ctx->hlua[idx];
+ flt_ctx->cur_off[idx] = offset;
+ flt_ctx->cur_len[idx] = len;
+ flt_ctx->flags |= HLUA_FLT_CTX_FL_PAYLOAD;
+ ret = hlua_filter_callback(s, filter, "http_payload", dir, (HLUA_FLT_CB_FINAL | HLUA_FLT_CB_ARG_HTTP_MSG));
+ if (ret != -1) {
+ ret = flt_ctx->cur_len[idx];
+ if (lua_gettop(flt_hlua->T) > 0) {
+ ret = lua_tointeger(flt_hlua->T, -1);
+ if (ret > flt_ctx->cur_len[idx])
+ ret = flt_ctx->cur_len[idx];
+ lua_settop(flt_hlua->T, 0); /* Empty the stack. */
+ }
+ }
+ return ret;
+}
+
+static int hlua_filter_http_end(struct stream *s, struct filter *filter, struct http_msg *msg)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_ctx->flags &= ~HLUA_FLT_CTX_FL_PAYLOAD;
+ return hlua_filter_callback(s, filter, "http_end",
+ (!(msg->chn->flags & CF_ISRESP) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES),
+ (HLUA_FLT_CB_FINAL | HLUA_FLT_CB_RETVAL | HLUA_FLT_CB_ARG_HTTP_MSG));
+}
+
+static int hlua_filter_tcp_payload(struct stream *s, struct filter *filter, struct channel *chn,
+ unsigned int offset, unsigned int len)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+ struct hlua *flt_hlua;
+ int dir = (!(chn->flags & CF_ISRESP) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES);
+ int idx = (dir == SMP_OPT_DIR_REQ ? 0 : 1);
+ int ret;
+
+ flt_hlua = flt_ctx->hlua[idx];
+ flt_ctx->cur_off[idx] = offset;
+ flt_ctx->cur_len[idx] = len;
+ flt_ctx->flags |= HLUA_FLT_CTX_FL_PAYLOAD;
+ ret = hlua_filter_callback(s, filter, "tcp_payload", dir, (HLUA_FLT_CB_FINAL | HLUA_FLT_CB_ARG_CHN));
+ if (ret != -1) {
+ ret = flt_ctx->cur_len[idx];
+ if (lua_gettop(flt_hlua->T) > 0) {
+ ret = lua_tointeger(flt_hlua->T, -1);
+ if (ret > flt_ctx->cur_len[idx])
+ ret = flt_ctx->cur_len[idx];
+ lua_settop(flt_hlua->T, 0); /* Empty the stack. */
+ }
+ }
+ return ret;
+}
+
+static int hlua_filter_parse_fct(char **args, int *cur_arg, struct proxy *px,
+ struct flt_conf *fconf, char **err, void *private)
+{
+ struct hlua_reg_filter *reg_flt = private;
+ lua_State *L;
+ struct hlua_flt_config *conf = NULL;
+ const char *flt_id = NULL;
+ int state_id, pos, flt_flags = 0;
+ struct flt_ops *hlua_flt_ops = NULL;
+
+ state_id = reg_flt_to_stack_id(reg_flt);
+ L = hlua_states[state_id];
+
+ /* Initialize the filter ops with default callbacks */
+ hlua_flt_ops = calloc(1, sizeof(*hlua_flt_ops));
+ if (!hlua_flt_ops)
+ goto error;
+ hlua_flt_ops->init = hlua_filter_init;
+ hlua_flt_ops->deinit = hlua_filter_deinit;
+ if (state_id) {
+ /* Set per-thread callback if script is loaded per-thread */
+ hlua_flt_ops->init_per_thread = hlua_filter_init_per_thread;
+ hlua_flt_ops->deinit_per_thread = hlua_filter_deinit_per_thread;
+ }
+ hlua_flt_ops->attach = hlua_filter_new;
+ hlua_flt_ops->detach = hlua_filter_delete;
+
+ /* Push the filter class on the stack and resolve all callbacks */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, reg_flt->flt_ref[state_id]);
+
+ if (lua_getfield(L, -1, "start_analyze") == LUA_TFUNCTION)
+ hlua_flt_ops->channel_start_analyze = hlua_filter_start_analyze;
+ lua_pop(L, 1);
+ if (lua_getfield(L, -1, "end_analyze") == LUA_TFUNCTION)
+ hlua_flt_ops->channel_end_analyze = hlua_filter_end_analyze;
+ lua_pop(L, 1);
+ if (lua_getfield(L, -1, "http_headers") == LUA_TFUNCTION)
+ hlua_flt_ops->http_headers = hlua_filter_http_headers;
+ lua_pop(L, 1);
+ if (lua_getfield(L, -1, "http_payload") == LUA_TFUNCTION)
+ hlua_flt_ops->http_payload = hlua_filter_http_payload;
+ lua_pop(L, 1);
+ if (lua_getfield(L, -1, "http_end") == LUA_TFUNCTION)
+ hlua_flt_ops->http_end = hlua_filter_http_end;
+ lua_pop(L, 1);
+ if (lua_getfield(L, -1, "tcp_payload") == LUA_TFUNCTION)
+ hlua_flt_ops->tcp_payload = hlua_filter_tcp_payload;
+ lua_pop(L, 1);
+
+ /* Get id and flags of the filter class */
+ if (lua_getfield(L, -1, "id") == LUA_TSTRING)
+ flt_id = lua_tostring(L, -1);
+ lua_pop(L, 1);
+ if (lua_getfield(L, -1, "flags") == LUA_TNUMBER)
+ flt_flags = lua_tointeger(L, -1);
+ lua_pop(L, 1);
+
+ /* Create the filter config */
+ conf = calloc(1, sizeof(*conf));
+ if (!conf)
+ goto error;
+ conf->reg = reg_flt;
+
+ /* duplicate args */
+ for (pos = 0; *args[*cur_arg + 1 + pos]; pos++);
+ conf->args = calloc(pos + 1, sizeof(*conf->args));
+ if (!conf->args)
+ goto error;
+ for (pos = 0; *args[*cur_arg + 1 + pos]; pos++) {
+ conf->args[pos] = strdup(args[*cur_arg + 1 + pos]);
+ if (!conf->args[pos])
+ goto error;
+ }
+ conf->args[pos] = NULL;
+ *cur_arg += pos + 1;
+
+ if (flt_id) {
+ fconf->id = strdup(flt_id);
+ if (!fconf->id)
+ goto error;
+ }
+ fconf->flags = flt_flags;
+ fconf->conf = conf;
+ fconf->ops = hlua_flt_ops;
+
+ lua_settop(L, 0);
+ return 0;
+
+ error:
+ memprintf(err, "Lua filter '%s' : Lua out of memory error", reg_flt->name);
+ free(hlua_flt_ops);
+ if (conf && conf->args) {
+ for (pos = 0; conf->args[pos]; pos++)
+ free(conf->args[pos]);
+ free(conf->args);
+ }
+ free(conf);
+ free((char *)fconf->id);
+ lua_settop(L, 0);
+ return -1;
+}
+
+__LJMP static int hlua_register_data_filter(lua_State *L)
+{
+ struct filter *filter;
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 2, "register_data_filter"));
+ MAY_LJMP(luaL_checktype(L, 1, LUA_TTABLE));
+ chn = MAY_LJMP(hlua_checkchannel(L, 2));
+
+ lua_getfield(L, 1, "__filter");
+ MAY_LJMP(luaL_checktype(L, -1, LUA_TLIGHTUSERDATA));
+ filter = lua_touserdata (L, -1);
+ lua_pop(L, 1);
+
+ register_data_filter(chn_strm(chn), chn, filter);
+ return 1;
+}
+
+__LJMP static int hlua_unregister_data_filter(lua_State *L)
+{
+ struct filter *filter;
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 2, "unregister_data_filter"));
+ MAY_LJMP(luaL_checktype(L, 1, LUA_TTABLE));
+ chn = MAY_LJMP(hlua_checkchannel(L, 2));
+
+ lua_getfield(L, 1, "__filter");
+ MAY_LJMP(luaL_checktype(L, -1, LUA_TLIGHTUSERDATA));
+ filter = lua_touserdata (L, -1);
+ lua_pop(L, 1);
+
+ unregister_data_filter(chn_strm(chn), chn, filter);
+ return 1;
+}
+
+/* This function is an LUA binding used for registering a filter. It expects a
+ * filter name used in the haproxy configuration file and a LUA function to
+ * parse configuration arguments.
+ */
+__LJMP static int hlua_register_filter(lua_State *L)
+{
+ struct buffer *trash;
+ struct flt_kw_list *fkl;
+ struct flt_kw *fkw;
+ const char *name;
+ struct hlua_reg_filter *reg_flt= NULL;
+ int flt_ref, fun_ref;
+ int len;
+
+ MAY_LJMP(check_args(L, 3, "register_filter"));
+
+ /* First argument : filter name. */
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+
+ /* Second argument : The filter class */
+ flt_ref = MAY_LJMP(hlua_checktable(L, 2));
+
+ /* Third argument : lua function. */
+ fun_ref = MAY_LJMP(hlua_checkfunction(L, 3));
+
+ trash = get_trash_chunk();
+ chunk_printf(trash, "lua.%s", name);
+ fkw = flt_find_kw(trash->area);
+ if (fkw != NULL) {
+ reg_flt = fkw->private;
+ if (reg_flt->flt_ref[hlua_state_id] != -1 || reg_flt->fun_ref[hlua_state_id] != -1) {
+ ha_warning("Trying to register filter 'lua.%s' more than once. "
+ "This will become a hard error in version 2.5.\n", name);
+ }
+ reg_flt->flt_ref[hlua_state_id] = flt_ref;
+ reg_flt->fun_ref[hlua_state_id] = fun_ref;
+ return 0;
+ }
+
+ fkl = calloc(1, sizeof(*fkl) + sizeof(struct flt_kw) * 2);
+ if (!fkl)
+ goto alloc_error;
+ fkl->scope = "HLUA";
+
+ reg_flt = new_hlua_reg_filter(name);
+ if (!reg_flt)
+ goto alloc_error;
+
+ reg_flt->flt_ref[hlua_state_id] = flt_ref;
+ reg_flt->fun_ref[hlua_state_id] = fun_ref;
+
+ /* The filter keyword */
+ len = strlen("lua.") + strlen(name) + 1;
+ fkl->kw[0].kw = calloc(1, len);
+ if (!fkl->kw[0].kw)
+ goto alloc_error;
+
+ snprintf((char *)fkl->kw[0].kw, len, "lua.%s", name);
+
+ fkl->kw[0].parse = hlua_filter_parse_fct;
+ fkl->kw[0].private = reg_flt;
+ memset(&fkl->kw[1], 0, sizeof(*fkl->kw));
+
+ /* Register this new filter */
+ flt_register_keywords(fkl);
+
+ return 0;
+
+ alloc_error:
+ release_hlua_reg_filter(reg_flt);
+ ha_free(&fkl);
+ WILL_LJMP(luaL_error(L, "Lua out of memory error."));
+ return 0; /* Never reached */
+}
+
+static int hlua_read_timeout(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err, unsigned int *timeout)
+{
+ const char *error;
+
+ error = parse_time_err(args[1], timeout, TIME_UNIT_MS);
+ if (error == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument <%s> to <%s> (maximum value is 2147483647 ms or ~24.8 days)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (error == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument <%s> to <%s> (minimum non-null value is 1 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (error) {
+ memprintf(err, "%s: invalid timeout", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+static int hlua_session_timeout(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return hlua_read_timeout(args, section_type, curpx, defpx,
+ file, line, err, &hlua_timeout_session);
+}
+
+static int hlua_task_timeout(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return hlua_read_timeout(args, section_type, curpx, defpx,
+ file, line, err, &hlua_timeout_task);
+}
+
+static int hlua_applet_timeout(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return hlua_read_timeout(args, section_type, curpx, defpx,
+ file, line, err, &hlua_timeout_applet);
+}
+
+static int hlua_forced_yield(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char *error;
+
+ hlua_nb_instruction = strtoll(args[1], &error, 10);
+ if (*error != '\0') {
+ memprintf(err, "%s: invalid number", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+static int hlua_parse_maxmem(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char *error;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects an integer argument (Lua memory size in MB).", args[0]);
+ return -1;
+ }
+ hlua_global_allocator.limit = strtoll(args[1], &error, 10) * 1024L * 1024L;
+ if (*error != '\0') {
+ memprintf(err, "%s: invalid number %s (error at '%c')", args[0], args[1], *error);
+ return -1;
+ }
+ return 0;
+}
+
+
+/* This function is called by the main configuration key "lua-load". It loads and
+ * execute an lua file during the parsing of the HAProxy configuration file. It is
+ * the main lua entry point.
+ *
+ * This function runs with the HAProxy keywords API. It returns -1 if an error
+ * occurs, otherwise it returns 0.
+ *
+ * In some error case, LUA set an error message in top of the stack. This function
+ * returns this error message in the HAProxy logs and pop it from the stack.
+ *
+ * This function can fail with an abort() due to an Lua critical error.
+ * We are in the configuration parsing process of HAProxy, this abort() is
+ * tolerated.
+ */
+static int hlua_load_state(char *filename, lua_State *L, char **err)
+{
+ int error;
+
+ /* Just load and compile the file. */
+ error = luaL_loadfile(L, filename);
+ if (error) {
+ memprintf(err, "error in Lua file '%s': %s", filename, lua_tostring(L, -1));
+ lua_pop(L, 1);
+ return -1;
+ }
+
+ /* If no syntax error where detected, execute the code. */
+ error = lua_pcall(L, 0, LUA_MULTRET, 0);
+ switch (error) {
+ case LUA_OK:
+ break;
+ case LUA_ERRRUN:
+ memprintf(err, "Lua runtime error: %s", lua_tostring(L, -1));
+ lua_pop(L, 1);
+ return -1;
+ case LUA_ERRMEM:
+ memprintf(err, "Lua out of memory error");
+ return -1;
+ case LUA_ERRERR:
+ memprintf(err, "Lua message handler error: %s", lua_tostring(L, -1));
+ lua_pop(L, 1);
+ return -1;
+#if defined(LUA_VERSION_NUM) && LUA_VERSION_NUM <= 503
+ case LUA_ERRGCMM:
+ memprintf(err, "Lua garbage collector error: %s", lua_tostring(L, -1));
+ lua_pop(L, 1);
+ return -1;
+#endif
+ default:
+ memprintf(err, "Lua unknown error: %s", lua_tostring(L, -1));
+ lua_pop(L, 1);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int hlua_load(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects a file name as parameter.", args[0]);
+ return -1;
+ }
+
+ /* loading for global state */
+ hlua_state_id = 0;
+ ha_set_thread(NULL);
+ return hlua_load_state(args[1], hlua_states[0], err);
+}
+
+static int hlua_load_per_thread(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int len;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects a file as parameter.", args[0]);
+ return -1;
+ }
+
+ if (per_thread_load == NULL) {
+ /* allocate the first entry large enough to store the final NULL */
+ per_thread_load = calloc(1, sizeof(*per_thread_load));
+ if (per_thread_load == NULL) {
+ memprintf(err, "out of memory error");
+ return -1;
+ }
+ }
+
+ /* count used entries */
+ for (len = 0; per_thread_load[len] != NULL; len++)
+ ;
+
+ per_thread_load = realloc(per_thread_load, (len + 2) * sizeof(*per_thread_load));
+ if (per_thread_load == NULL) {
+ memprintf(err, "out of memory error");
+ return -1;
+ }
+
+ per_thread_load[len] = strdup(args[1]);
+ per_thread_load[len + 1] = NULL;
+
+ if (per_thread_load[len] == NULL) {
+ memprintf(err, "out of memory error");
+ return -1;
+ }
+
+ /* loading for thread 1 only */
+ hlua_state_id = 1;
+ ha_set_thread(NULL);
+ return hlua_load_state(args[1], hlua_states[1], err);
+}
+
+/* Prepend the given <path> followed by a semicolon to the `package.<type>` variable
+ * in the given <ctx>.
+ */
+static int hlua_prepend_path(lua_State *L, char *type, char *path)
+{
+ lua_getglobal(L, "package"); /* push package variable */
+ lua_pushstring(L, path); /* push given path */
+ lua_pushstring(L, ";"); /* push semicolon */
+ lua_getfield(L, -3, type); /* push old path */
+ lua_concat(L, 3); /* concatenate to new path */
+ lua_setfield(L, -2, type); /* store new path */
+ lua_pop(L, 1); /* pop package variable */
+
+ return 0;
+}
+
+static int hlua_config_prepend_path(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char *path;
+ char *type = "path";
+ struct prepend_path *p = NULL;
+ size_t i;
+
+ if (too_many_args(2, args, err, NULL)) {
+ goto err;
+ }
+
+ if (!(*args[1])) {
+ memprintf(err, "'%s' expects to receive a <path> as argument", args[0]);
+ goto err;
+ }
+ path = args[1];
+
+ if (*args[2]) {
+ if (strcmp(args[2], "path") != 0 && strcmp(args[2], "cpath") != 0) {
+ memprintf(err, "'%s' expects <type> to either be 'path' or 'cpath'", args[0]);
+ goto err;
+ }
+ type = args[2];
+ }
+
+ p = calloc(1, sizeof(*p));
+ if (p == NULL) {
+ memprintf(err, "memory allocation failed");
+ goto err;
+ }
+ p->path = strdup(path);
+ if (p->path == NULL) {
+ memprintf(err, "memory allocation failed");
+ goto err2;
+ }
+ p->type = strdup(type);
+ if (p->type == NULL) {
+ memprintf(err, "memory allocation failed");
+ goto err2;
+ }
+ LIST_APPEND(&prepend_path_list, &p->l);
+
+ /* Handle the global state and the per-thread state for the first
+ * thread. The remaining threads will be initialized based on
+ * prepend_path_list.
+ */
+ for (i = 0; i < 2; i++) {
+ lua_State *L = hlua_states[i];
+ const char *error;
+
+ if (setjmp(safe_ljmp_env) != 0) {
+ lua_atpanic(L, hlua_panic_safe);
+ if (lua_type(L, -1) == LUA_TSTRING)
+ error = lua_tostring(L, -1);
+ else
+ error = "critical error";
+ fprintf(stderr, "lua-prepend-path: %s.\n", error);
+ exit(1);
+ } else {
+ lua_atpanic(L, hlua_panic_ljmp);
+ }
+
+ hlua_prepend_path(L, type, path);
+
+ lua_atpanic(L, hlua_panic_safe);
+ }
+
+ return 0;
+
+err2:
+ free(p->type);
+ free(p->path);
+err:
+ free(p);
+ return -1;
+}
+
+/* configuration keywords declaration */
+static struct cfg_kw_list cfg_kws = {{ },{
+ { CFG_GLOBAL, "lua-prepend-path", hlua_config_prepend_path },
+ { CFG_GLOBAL, "lua-load", hlua_load },
+ { CFG_GLOBAL, "lua-load-per-thread", hlua_load_per_thread },
+ { CFG_GLOBAL, "tune.lua.session-timeout", hlua_session_timeout },
+ { CFG_GLOBAL, "tune.lua.task-timeout", hlua_task_timeout },
+ { CFG_GLOBAL, "tune.lua.service-timeout", hlua_applet_timeout },
+ { CFG_GLOBAL, "tune.lua.forced-yield", hlua_forced_yield },
+ { CFG_GLOBAL, "tune.lua.maxmem", hlua_parse_maxmem },
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+#ifdef USE_OPENSSL
+
+/*
+ * This function replace a ckch_store by another one, and rebuild the ckch_inst and all its dependencies.
+ * It does the sam as "cli_io_handler_commit_cert" but for lua, the major
+ * difference is that the yield in lua and for the CLI is not handled the same
+ * way.
+ */
+__LJMP static int hlua_ckch_commit_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct ckch_inst **lua_ckchi = lua_touserdata(L, -1);
+ struct ckch_store **lua_ckchs = lua_touserdata(L, -2);
+ struct ckch_inst *ckchi = *lua_ckchi;
+ struct ckch_store *old_ckchs = lua_ckchs[0];
+ struct ckch_store *new_ckchs = lua_ckchs[1];
+ struct hlua *hlua;
+ char *err = NULL;
+ int y = 1;
+
+ hlua = hlua_gethlua(L);
+
+ /* get the first ckchi to copy */
+ if (ckchi == NULL)
+ ckchi = LIST_ELEM(old_ckchs->ckch_inst.n, typeof(ckchi), by_ckchs);
+
+ /* walk through the old ckch_inst and creates new ckch_inst using the updated ckchs */
+ list_for_each_entry_from(ckchi, &old_ckchs->ckch_inst, by_ckchs) {
+ struct ckch_inst *new_inst;
+
+ /* it takes a lot of CPU to creates SSL_CTXs, so we yield every 10 CKCH instances */
+ if (y % 10 == 0) {
+
+ *lua_ckchi = ckchi;
+
+ task_wakeup(hlua->task, TASK_WOKEN_MSG);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_ckch_commit_yield, TICK_ETERNITY, 0));
+ }
+
+ if (ckch_inst_rebuild(new_ckchs, ckchi, &new_inst, &err))
+ goto error;
+
+ /* link the new ckch_inst to the duplicate */
+ LIST_APPEND(&new_ckchs->ckch_inst, &new_inst->by_ckchs);
+ y++;
+ }
+
+ /* The generation is finished, we can insert everything */
+ ckch_store_replace(old_ckchs, new_ckchs);
+
+ lua_pop(L, 2); /* pop the lua_ckchs and ckchi */
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ return 0;
+
+error:
+ ckch_store_free(new_ckchs);
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ WILL_LJMP(luaL_error(L, "%s", err));
+ free(err);
+
+ return 0;
+}
+
+/*
+ * Replace a ckch_store <filename> in the ckchs_tree with a ckch_store created
+ * from the table in parameter.
+ *
+ * This is equivalent to "set ssl cert" + "commit ssl cert" over the CLI, which
+ * means it does not need to have a transaction since everything is done in the
+ * same function.
+ *
+ * CertCache.set{filename="", crt="", key="", sctl="", ocsp="", issuer=""}
+ *
+ */
+__LJMP static int hlua_ckch_set(lua_State *L)
+{
+ struct hlua *hlua;
+ struct ckch_inst **lua_ckchi;
+ struct ckch_store **lua_ckchs;
+ struct ckch_store *old_ckchs = NULL;
+ struct ckch_store *new_ckchs = NULL;
+ int errcode = 0;
+ char *err = NULL;
+ struct cert_exts *cert_ext = NULL;
+ char *filename;
+ struct cert_key_and_chain *ckch;
+ int ret;
+
+ if (lua_type(L, -1) != LUA_TTABLE)
+ WILL_LJMP(luaL_error(L, "'CertCache.set' needs a table as argument"));
+
+ hlua = hlua_gethlua(L);
+
+ /* FIXME: this should not return an error but should come back later */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ WILL_LJMP(luaL_error(L, "CertCache already under lock"));
+
+ ret = lua_getfield(L, -1, "filename");
+ if (ret != LUA_TSTRING) {
+ memprintf(&err, "%sNo filename specified!", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ filename = (char *)lua_tostring(L, -1);
+
+
+ /* look for the filename in the tree */
+ old_ckchs = ckchs_lookup(filename);
+ if (!old_ckchs) {
+ memprintf(&err, "%sCan't replace a certificate which is not referenced by the configuration!", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ /* TODO: handle extra_files_noext */
+
+ new_ckchs = ckchs_dup(old_ckchs);
+ if (!new_ckchs) {
+ memprintf(&err, "%sCannot allocate memory!", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ ckch = new_ckchs->ckch;
+
+ /* loop on the field in the table, which have the same name as the
+ * possible extensions of files */
+ lua_pushnil(L);
+ while (lua_next(L, 1)) {
+ int i;
+ const char *field = lua_tostring(L, -2);
+ char *payload = (char *)lua_tostring(L, -1);
+
+ if (!field || strcmp(field, "filename") == 0) {
+ lua_pop(L, 1);
+ continue;
+ }
+
+ for (i = 0; field && cert_exts[i].ext != NULL; i++) {
+ if (strcmp(field, cert_exts[i].ext) == 0) {
+ cert_ext = &cert_exts[i];
+ break;
+ }
+ }
+
+ /* this is the default type, the field is not supported */
+ if (cert_ext == NULL) {
+ memprintf(&err, "%sUnsupported field '%s'", err ? err : "", field);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* appply the change on the duplicate */
+ if (cert_ext->load(filename, payload, ckch, &err) != 0) {
+ memprintf(&err, "%sCan't load the payload for '%s'", err ? err : "", cert_ext->ext);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ lua_pop(L, 1);
+ }
+
+ /* store the pointers on the lua stack */
+ lua_ckchs = lua_newuserdata(L, sizeof(struct ckch_store *) * 2);
+ lua_ckchs[0] = old_ckchs;
+ lua_ckchs[1] = new_ckchs;
+ lua_ckchi = lua_newuserdata(L, sizeof(struct ckch_inst *));
+ *lua_ckchi = NULL;
+
+ task_wakeup(hlua->task, TASK_WOKEN_MSG);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_ckch_commit_yield, TICK_ETERNITY, 0));
+
+end:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ if (errcode & ERR_CODE) {
+ ckch_store_free(new_ckchs);
+ WILL_LJMP(luaL_error(L, "%s", err));
+ }
+ free(err);
+
+ return 0;
+}
+
+#else
+
+__LJMP static int hlua_ckch_set(lua_State *L)
+{
+ WILL_LJMP(luaL_error(L, "'CertCache.set' needs an HAProxy built with OpenSSL"));
+
+ return 0;
+}
+#endif /* ! USE_OPENSSL */
+
+
+
+/* This function can fail with an abort() due to an Lua critical error.
+ * We are in the initialisation process of HAProxy, this abort() is
+ * tolerated.
+ */
+int hlua_post_init_state(lua_State *L)
+{
+ struct hlua_init_function *init;
+ const char *msg;
+ enum hlua_exec ret;
+ const char *error;
+ const char *kind;
+ const char *trace;
+ int return_status = 1;
+#if defined(LUA_VERSION_NUM) && LUA_VERSION_NUM >= 504
+ int nres;
+#endif
+
+ /* disable memory limit checks if limit is not set */
+ if (!hlua_global_allocator.limit)
+ hlua_global_allocator.limit = ~hlua_global_allocator.limit;
+
+ /* Call post initialisation function in safe environment. */
+ if (setjmp(safe_ljmp_env) != 0) {
+ lua_atpanic(L, hlua_panic_safe);
+ if (lua_type(L, -1) == LUA_TSTRING)
+ error = lua_tostring(L, -1);
+ else
+ error = "critical error";
+ fprintf(stderr, "Lua post-init: %s.\n", error);
+ exit(1);
+ } else {
+ lua_atpanic(L, hlua_panic_ljmp);
+ }
+
+ hlua_fcn_post_init(L);
+
+ list_for_each_entry(init, &hlua_init_functions[hlua_state_id], l) {
+ lua_rawgeti(L, LUA_REGISTRYINDEX, init->function_ref);
+
+#if defined(LUA_VERSION_NUM) && LUA_VERSION_NUM >= 504
+ ret = lua_resume(L, L, 0, &nres);
+#else
+ ret = lua_resume(L, L, 0);
+#endif
+ kind = NULL;
+ switch (ret) {
+
+ case LUA_OK:
+ lua_pop(L, -1);
+ break;
+
+ case LUA_ERRERR:
+ kind = "message handler error";
+ /* Fall through */
+ case LUA_ERRRUN:
+ if (!kind)
+ kind = "runtime error";
+ msg = lua_tostring(L, -1);
+ lua_settop(L, 0); /* Empty the stack. */
+ lua_pop(L, 1);
+ trace = hlua_traceback(L, ", ");
+ if (msg)
+ ha_alert("Lua init: %s: '%s' from %s\n", kind, msg, trace);
+ else
+ ha_alert("Lua init: unknown %s from %s\n", kind, trace);
+ return_status = 0;
+ break;
+
+ default:
+ /* Unknown error */
+ kind = "Unknown error";
+ /* Fall through */
+ case LUA_YIELD:
+ /* yield is not configured at this step, this state doesn't happen */
+ if (!kind)
+ kind = "yield not allowed";
+ /* Fall through */
+ case LUA_ERRMEM:
+ if (!kind)
+ kind = "out of memory error";
+ lua_settop(L, 0);
+ lua_pop(L, 1);
+ trace = hlua_traceback(L, ", ");
+ ha_alert("Lua init: %s: %s\n", kind, trace);
+ return_status = 0;
+ break;
+ }
+ if (!return_status)
+ break;
+ }
+
+ lua_atpanic(L, hlua_panic_safe);
+ return return_status;
+}
+
+int hlua_post_init()
+{
+ int ret;
+ int i;
+ int errors;
+ char *err = NULL;
+ struct hlua_function *fcn;
+ struct hlua_reg_filter *reg_flt;
+
+#if defined(USE_OPENSSL)
+ /* Initialize SSL server. */
+ if (socket_ssl->xprt->prepare_srv) {
+ int saved_used_backed = global.ssl_used_backend;
+ // don't affect maxconn automatic computation
+ socket_ssl->xprt->prepare_srv(socket_ssl);
+ global.ssl_used_backend = saved_used_backed;
+ }
+#endif
+
+ /* Perform post init of common thread */
+ hlua_state_id = 0;
+ ha_set_thread(&ha_thread_info[0]);
+ ret = hlua_post_init_state(hlua_states[hlua_state_id]);
+ if (ret == 0)
+ return 0;
+
+ /* init remaining lua states and load files */
+ for (hlua_state_id = 2; hlua_state_id < global.nbthread + 1; hlua_state_id++) {
+
+ /* set thread context */
+ ha_set_thread(&ha_thread_info[hlua_state_id - 1]);
+
+ /* Init lua state */
+ hlua_states[hlua_state_id] = hlua_init_state(hlua_state_id);
+
+ /* Load lua files */
+ for (i = 0; per_thread_load && per_thread_load[i]; i++) {
+ ret = hlua_load_state(per_thread_load[i], hlua_states[hlua_state_id], &err);
+ if (ret != 0) {
+ ha_alert("Lua init: %s\n", err);
+ return 0;
+ }
+ }
+ }
+
+ /* Reset thread context */
+ ha_set_thread(NULL);
+
+ /* Execute post init for all states */
+ for (hlua_state_id = 1; hlua_state_id < global.nbthread + 1; hlua_state_id++) {
+
+ /* set thread context */
+ ha_set_thread(&ha_thread_info[hlua_state_id - 1]);
+
+ /* run post init */
+ ret = hlua_post_init_state(hlua_states[hlua_state_id]);
+ if (ret == 0)
+ return 0;
+ }
+
+ /* Reset thread context */
+ ha_set_thread(NULL);
+
+ /* control functions registering. Each function must have:
+ * - only the function_ref[0] set positive and all other to -1
+ * - only the function_ref[0] set to -1 and all other positive
+ * This ensure a same reference is not used both in shared
+ * lua state and thread dedicated lua state. Note: is the case
+ * reach, the shared state is priority, but the bug will be
+ * complicated to found for the end user.
+ */
+ errors = 0;
+ list_for_each_entry(fcn, &referenced_functions, l) {
+ ret = 0;
+ for (i = 1; i < global.nbthread + 1; i++) {
+ if (fcn->function_ref[i] == -1)
+ ret--;
+ else
+ ret++;
+ }
+ if (abs(ret) != global.nbthread) {
+ ha_alert("Lua function '%s' is not referenced in all thread. "
+ "Expect function in all thread or in none thread.\n", fcn->name);
+ errors++;
+ continue;
+ }
+
+ if ((fcn->function_ref[0] == -1) == (ret < 0)) {
+ ha_alert("Lua function '%s' is referenced both ins shared Lua context (through lua-load) "
+ "and per-thread Lua context (through lua-load-per-thread). these two context "
+ "exclusive.\n", fcn->name);
+ errors++;
+ }
+ }
+
+ /* Do the same with registered filters */
+ list_for_each_entry(reg_flt, &referenced_filters, l) {
+ ret = 0;
+ for (i = 1; i < global.nbthread + 1; i++) {
+ if (reg_flt->flt_ref[i] == -1)
+ ret--;
+ else
+ ret++;
+ }
+ if (abs(ret) != global.nbthread) {
+ ha_alert("Lua filter '%s' is not referenced in all thread. "
+ "Expect function in all thread or in none thread.\n", reg_flt->name);
+ errors++;
+ continue;
+ }
+
+ if ((reg_flt->flt_ref[0] == -1) == (ret < 0)) {
+ ha_alert("Lua filter '%s' is referenced both ins shared Lua context (through lua-load) "
+ "and per-thread Lua context (through lua-load-per-thread). these two context "
+ "exclusive.\n", fcn->name);
+ errors++;
+ }
+ }
+
+
+ if (errors > 0)
+ return 0;
+
+ /* after this point, this global will no longer be used, so set to
+ * -1 in order to have probably a segfault if someone use it
+ */
+ hlua_state_id = -1;
+
+ return 1;
+}
+
+/* The memory allocator used by the Lua stack. <ud> is a pointer to the
+ * allocator's context. <ptr> is the pointer to alloc/free/realloc. <osize>
+ * is the previously allocated size or the kind of object in case of a new
+ * allocation. <nsize> is the requested new size. A new allocation is
+ * indicated by <ptr> being NULL. A free is indicated by <nsize> being
+ * zero. This one verifies that the limits are respected but is optimized
+ * for the fast case where limits are not used, hence stats are not updated.
+ *
+ * Warning: while this API ressembles glibc's realloc() a lot, glibc surpasses
+ * POSIX by making realloc(ptr,0) an effective free(), but others do not do
+ * that and will simply allocate zero as if it were the result of malloc(0),
+ * so mapping this onto realloc() will lead to memory leaks on non-glibc
+ * systems.
+ */
+static void *hlua_alloc(void *ud, void *ptr, size_t osize, size_t nsize)
+{
+ struct hlua_mem_allocator *zone = ud;
+ size_t limit, old, new;
+
+ /* a limit of ~0 means unlimited and boot complete, so there's no need
+ * for accounting anymore.
+ */
+ if (likely(~zone->limit == 0)) {
+ if (!nsize)
+ ha_free(&ptr);
+ else
+ ptr = realloc(ptr, nsize);
+ return ptr;
+ }
+
+ if (!ptr)
+ osize = 0;
+
+ /* enforce strict limits across all threads */
+ limit = zone->limit;
+ old = _HA_ATOMIC_LOAD(&zone->allocated);
+ do {
+ new = old + nsize - osize;
+ if (unlikely(nsize && limit && new > limit))
+ return NULL;
+ } while (!_HA_ATOMIC_CAS(&zone->allocated, &old, new));
+
+ if (!nsize)
+ ha_free(&ptr);
+ else
+ ptr = realloc(ptr, nsize);
+
+ if (unlikely(!ptr && nsize)) // failed
+ _HA_ATOMIC_SUB(&zone->allocated, nsize - osize);
+
+ __ha_barrier_atomic_store();
+ return ptr;
+}
+
+/* This function can fail with an abort() due to a Lua critical error.
+ * We are in the initialisation process of HAProxy, this abort() is
+ * tolerated.
+ */
+lua_State *hlua_init_state(int thread_num)
+{
+ int i;
+ int idx;
+ struct sample_fetch *sf;
+ struct sample_conv *sc;
+ char *p;
+ const char *error_msg;
+ void **context;
+ lua_State *L;
+ struct prepend_path *pp;
+
+ /* Init main lua stack. */
+ L = lua_newstate(hlua_alloc, &hlua_global_allocator);
+
+ /* Initialise Lua context to NULL */
+ context = lua_getextraspace(L);
+ *context = NULL;
+
+ /* From this point, until the end of the initialisation function,
+ * the Lua function can fail with an abort. We are in the initialisation
+ * process of HAProxy, this abort() is tolerated.
+ */
+
+ /* Call post initialisation function in safe environment. */
+ if (setjmp(safe_ljmp_env) != 0) {
+ lua_atpanic(L, hlua_panic_safe);
+ if (lua_type(L, -1) == LUA_TSTRING)
+ error_msg = lua_tostring(L, -1);
+ else
+ error_msg = "critical error";
+ fprintf(stderr, "Lua init: %s.\n", error_msg);
+ exit(1);
+ } else {
+ lua_atpanic(L, hlua_panic_ljmp);
+ }
+
+ /* Initialise lua. */
+ luaL_openlibs(L);
+#define HLUA_PREPEND_PATH_TOSTRING1(x) #x
+#define HLUA_PREPEND_PATH_TOSTRING(x) HLUA_PREPEND_PATH_TOSTRING1(x)
+#ifdef HLUA_PREPEND_PATH
+ hlua_prepend_path(L, "path", HLUA_PREPEND_PATH_TOSTRING(HLUA_PREPEND_PATH));
+#endif
+#ifdef HLUA_PREPEND_CPATH
+ hlua_prepend_path(L, "cpath", HLUA_PREPEND_PATH_TOSTRING(HLUA_PREPEND_CPATH));
+#endif
+#undef HLUA_PREPEND_PATH_TOSTRING
+#undef HLUA_PREPEND_PATH_TOSTRING1
+
+ /* Apply configured prepend path */
+ list_for_each_entry(pp, &prepend_path_list, l)
+ hlua_prepend_path(L, pp->type, pp->path);
+
+ /*
+ *
+ * Create "core" object.
+ *
+ */
+
+ /* This table entry is the object "core" base. */
+ lua_newtable(L);
+
+ /* set the thread id */
+ hlua_class_const_int(L, "thread", thread_num);
+
+ /* Push the loglevel constants. */
+ for (i = 0; i < NB_LOG_LEVELS; i++)
+ hlua_class_const_int(L, log_levels[i], i);
+
+ /* Register special functions. */
+ hlua_class_function(L, "register_init", hlua_register_init);
+ hlua_class_function(L, "register_task", hlua_register_task);
+ hlua_class_function(L, "register_fetches", hlua_register_fetches);
+ hlua_class_function(L, "register_converters", hlua_register_converters);
+ hlua_class_function(L, "register_action", hlua_register_action);
+ hlua_class_function(L, "register_service", hlua_register_service);
+ hlua_class_function(L, "register_cli", hlua_register_cli);
+ hlua_class_function(L, "register_filter", hlua_register_filter);
+ hlua_class_function(L, "yield", hlua_yield);
+ hlua_class_function(L, "set_nice", hlua_set_nice);
+ hlua_class_function(L, "sleep", hlua_sleep);
+ hlua_class_function(L, "msleep", hlua_msleep);
+ hlua_class_function(L, "add_acl", hlua_add_acl);
+ hlua_class_function(L, "del_acl", hlua_del_acl);
+ hlua_class_function(L, "set_map", hlua_set_map);
+ hlua_class_function(L, "del_map", hlua_del_map);
+ hlua_class_function(L, "tcp", hlua_socket_new);
+ hlua_class_function(L, "httpclient", hlua_httpclient_new);
+ hlua_class_function(L, "log", hlua_log);
+ hlua_class_function(L, "Debug", hlua_log_debug);
+ hlua_class_function(L, "Info", hlua_log_info);
+ hlua_class_function(L, "Warning", hlua_log_warning);
+ hlua_class_function(L, "Alert", hlua_log_alert);
+ hlua_class_function(L, "done", hlua_done);
+ hlua_fcn_reg_core_fcn(L);
+
+ lua_setglobal(L, "core");
+
+ /*
+ *
+ * Create "act" object.
+ *
+ */
+
+ /* This table entry is the object "act" base. */
+ lua_newtable(L);
+
+ /* push action return constants */
+ hlua_class_const_int(L, "CONTINUE", ACT_RET_CONT);
+ hlua_class_const_int(L, "STOP", ACT_RET_STOP);
+ hlua_class_const_int(L, "YIELD", ACT_RET_YIELD);
+ hlua_class_const_int(L, "ERROR", ACT_RET_ERR);
+ hlua_class_const_int(L, "DONE", ACT_RET_DONE);
+ hlua_class_const_int(L, "DENY", ACT_RET_DENY);
+ hlua_class_const_int(L, "ABORT", ACT_RET_ABRT);
+ hlua_class_const_int(L, "INVALID", ACT_RET_INV);
+
+ hlua_class_function(L, "wake_time", hlua_set_wake_time);
+
+ lua_setglobal(L, "act");
+
+ /*
+ *
+ * Create "Filter" object.
+ *
+ */
+
+ /* This table entry is the object "filter" base. */
+ lua_newtable(L);
+
+ /* push flags and constants */
+ hlua_class_const_int(L, "CONTINUE", 1);
+ hlua_class_const_int(L, "WAIT", 0);
+ hlua_class_const_int(L, "ERROR", -1);
+
+ hlua_class_const_int(L, "FLT_CFG_FL_HTX", FLT_CFG_FL_HTX);
+
+ hlua_class_function(L, "wake_time", hlua_set_wake_time);
+ hlua_class_function(L, "register_data_filter", hlua_register_data_filter);
+ hlua_class_function(L, "unregister_data_filter", hlua_unregister_data_filter);
+
+ lua_setglobal(L, "filter");
+
+ /*
+ *
+ * Register class Map
+ *
+ */
+
+ /* This table entry is the object "Map" base. */
+ lua_newtable(L);
+
+ /* register pattern types. */
+ for (i=0; i<PAT_MATCH_NUM; i++)
+ hlua_class_const_int(L, pat_match_names[i], i);
+ for (i=0; i<PAT_MATCH_NUM; i++) {
+ snprintf(trash.area, trash.size, "_%s", pat_match_names[i]);
+ hlua_class_const_int(L, trash.area, i);
+ }
+
+ /* register constructor. */
+ hlua_class_function(L, "new", hlua_map_new);
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Register . */
+ hlua_class_function(L, "lookup", hlua_map_lookup);
+ hlua_class_function(L, "slookup", hlua_map_slookup);
+
+ lua_rawset(L, -3);
+
+ /* Register previous table in the registry with reference and named entry.
+ * The function hlua_register_metatable() pops the stack, so we
+ * previously create a copy of the table.
+ */
+ lua_pushvalue(L, -1); /* Copy the -1 entry and push it on the stack. */
+ class_map_ref = hlua_register_metatable(L, CLASS_MAP);
+
+ /* Assign the metatable to the mai Map object. */
+ lua_setmetatable(L, -2);
+
+ /* Set a name to the table. */
+ lua_setglobal(L, "Map");
+
+ /*
+ *
+ * Register "CertCache" class
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+ /* Register */
+ hlua_class_function(L, "set", hlua_ckch_set);
+ lua_setglobal(L, CLASS_CERTCACHE); /* Create global object called Regex */
+
+ /*
+ *
+ * Register class Channel
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Register . */
+ hlua_class_function(L, "data", hlua_channel_get_data);
+ hlua_class_function(L, "line", hlua_channel_get_line);
+ hlua_class_function(L, "set", hlua_channel_set_data);
+ hlua_class_function(L, "remove", hlua_channel_del_data);
+ hlua_class_function(L, "append", hlua_channel_append);
+ hlua_class_function(L, "prepend", hlua_channel_prepend);
+ hlua_class_function(L, "insert", hlua_channel_insert_data);
+ hlua_class_function(L, "send", hlua_channel_send);
+ hlua_class_function(L, "forward", hlua_channel_forward);
+ hlua_class_function(L, "input", hlua_channel_get_in_len);
+ hlua_class_function(L, "output", hlua_channel_get_out_len);
+ hlua_class_function(L, "may_recv", hlua_channel_may_recv);
+ hlua_class_function(L, "is_full", hlua_channel_is_full);
+ hlua_class_function(L, "is_resp", hlua_channel_is_resp);
+
+ /* Deprecated API */
+ hlua_class_function(L, "get", hlua_channel_get);
+ hlua_class_function(L, "dup", hlua_channel_dup);
+ hlua_class_function(L, "getline", hlua_channel_getline);
+ hlua_class_function(L, "get_in_len", hlua_channel_get_in_len);
+ hlua_class_function(L, "get_out_len", hlua_channel_get_out_len);
+
+ lua_rawset(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_channel_ref = hlua_register_metatable(L, CLASS_CHANNEL);
+
+ /*
+ *
+ * Register class Fetches
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Browse existing fetches and create the associated
+ * object method.
+ */
+ sf = NULL;
+ while ((sf = sample_fetch_getnext(sf, &idx)) != NULL) {
+ /* gL.Tua doesn't support '.' and '-' in the function names, replace it
+ * by an underscore.
+ */
+ strlcpy2(trash.area, sf->kw, trash.size);
+ for (p = trash.area; *p; p++)
+ if (*p == '.' || *p == '-' || *p == '+')
+ *p = '_';
+
+ /* Register the function. */
+ lua_pushstring(L, trash.area);
+ lua_pushlightuserdata(L, sf);
+ lua_pushcclosure(L, hlua_run_sample_fetch, 1);
+ lua_rawset(L, -3);
+ }
+
+ lua_rawset(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_fetches_ref = hlua_register_metatable(L, CLASS_FETCHES);
+
+ /*
+ *
+ * Register class Converters
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Browse existing converters and create the associated
+ * object method.
+ */
+ sc = NULL;
+ while ((sc = sample_conv_getnext(sc, &idx)) != NULL) {
+ /* gL.Tua doesn't support '.' and '-' in the function names, replace it
+ * by an underscore.
+ */
+ strlcpy2(trash.area, sc->kw, trash.size);
+ for (p = trash.area; *p; p++)
+ if (*p == '.' || *p == '-' || *p == '+')
+ *p = '_';
+
+ /* Register the function. */
+ lua_pushstring(L, trash.area);
+ lua_pushlightuserdata(L, sc);
+ lua_pushcclosure(L, hlua_run_sample_conv, 1);
+ lua_rawset(L, -3);
+ }
+
+ lua_rawset(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_converters_ref = hlua_register_metatable(L, CLASS_CONVERTERS);
+
+ /*
+ *
+ * Register class HTTP
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Register Lua functions. */
+ hlua_class_function(L, "req_get_headers",hlua_http_req_get_headers);
+ hlua_class_function(L, "req_del_header", hlua_http_req_del_hdr);
+ hlua_class_function(L, "req_rep_header", hlua_http_req_rep_hdr);
+ hlua_class_function(L, "req_rep_value", hlua_http_req_rep_val);
+ hlua_class_function(L, "req_add_header", hlua_http_req_add_hdr);
+ hlua_class_function(L, "req_set_header", hlua_http_req_set_hdr);
+ hlua_class_function(L, "req_set_method", hlua_http_req_set_meth);
+ hlua_class_function(L, "req_set_path", hlua_http_req_set_path);
+ hlua_class_function(L, "req_set_query", hlua_http_req_set_query);
+ hlua_class_function(L, "req_set_uri", hlua_http_req_set_uri);
+
+ hlua_class_function(L, "res_get_headers",hlua_http_res_get_headers);
+ hlua_class_function(L, "res_del_header", hlua_http_res_del_hdr);
+ hlua_class_function(L, "res_rep_header", hlua_http_res_rep_hdr);
+ hlua_class_function(L, "res_rep_value", hlua_http_res_rep_val);
+ hlua_class_function(L, "res_add_header", hlua_http_res_add_hdr);
+ hlua_class_function(L, "res_set_header", hlua_http_res_set_hdr);
+ hlua_class_function(L, "res_set_status", hlua_http_res_set_status);
+
+ lua_rawset(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_http_ref = hlua_register_metatable(L, CLASS_HTTP);
+
+ /*
+ *
+ * Register class HTTPMessage
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Register Lua functions. */
+ hlua_class_function(L, "is_resp", hlua_http_msg_is_resp);
+ hlua_class_function(L, "get_stline", hlua_http_msg_get_stline);
+ hlua_class_function(L, "get_headers", hlua_http_msg_get_headers);
+ hlua_class_function(L, "del_header", hlua_http_msg_del_hdr);
+ hlua_class_function(L, "rep_header", hlua_http_msg_rep_hdr);
+ hlua_class_function(L, "rep_value", hlua_http_msg_rep_val);
+ hlua_class_function(L, "add_header", hlua_http_msg_add_hdr);
+ hlua_class_function(L, "set_header", hlua_http_msg_set_hdr);
+ hlua_class_function(L, "set_method", hlua_http_msg_set_meth);
+ hlua_class_function(L, "set_path", hlua_http_msg_set_path);
+ hlua_class_function(L, "set_query", hlua_http_msg_set_query);
+ hlua_class_function(L, "set_uri", hlua_http_msg_set_uri);
+ hlua_class_function(L, "set_status", hlua_http_msg_set_status);
+ hlua_class_function(L, "is_full", hlua_http_msg_is_full);
+ hlua_class_function(L, "may_recv", hlua_http_msg_may_recv);
+ hlua_class_function(L, "eom", hlua_http_msg_is_eom);
+ hlua_class_function(L, "input", hlua_http_msg_get_in_len);
+ hlua_class_function(L, "output", hlua_http_msg_get_out_len);
+
+ hlua_class_function(L, "body", hlua_http_msg_get_body);
+ hlua_class_function(L, "set", hlua_http_msg_set_data);
+ hlua_class_function(L, "remove", hlua_http_msg_del_data);
+ hlua_class_function(L, "append", hlua_http_msg_append);
+ hlua_class_function(L, "prepend", hlua_http_msg_prepend);
+ hlua_class_function(L, "insert", hlua_http_msg_insert_data);
+ hlua_class_function(L, "set_eom", hlua_http_msg_set_eom);
+ hlua_class_function(L, "unset_eom", hlua_http_msg_unset_eom);
+
+ hlua_class_function(L, "send", hlua_http_msg_send);
+ hlua_class_function(L, "forward", hlua_http_msg_forward);
+
+ lua_rawset(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_http_msg_ref = hlua_register_metatable(L, CLASS_HTTP_MSG);
+
+ /*
+ *
+ * Register class HTTPClient
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+ hlua_class_function(L, "get", hlua_httpclient_get);
+ hlua_class_function(L, "head", hlua_httpclient_head);
+ hlua_class_function(L, "put", hlua_httpclient_put);
+ hlua_class_function(L, "post", hlua_httpclient_post);
+ hlua_class_function(L, "delete", hlua_httpclient_delete);
+ lua_settable(L, -3); /* Sets the __index entry. */
+ /* Register the garbage collector entry. */
+ lua_pushstring(L, "__gc");
+ lua_pushcclosure(L, hlua_httpclient_gc, 0);
+ lua_settable(L, -3); /* Push the last 2 entries in the table at index -3 */
+
+
+
+ class_httpclient_ref = hlua_register_metatable(L, CLASS_HTTPCLIENT);
+ /*
+ *
+ * Register class AppletTCP
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Register Lua functions. */
+ hlua_class_function(L, "getline", hlua_applet_tcp_getline);
+ hlua_class_function(L, "receive", hlua_applet_tcp_recv);
+ hlua_class_function(L, "send", hlua_applet_tcp_send);
+ hlua_class_function(L, "set_priv", hlua_applet_tcp_set_priv);
+ hlua_class_function(L, "get_priv", hlua_applet_tcp_get_priv);
+ hlua_class_function(L, "set_var", hlua_applet_tcp_set_var);
+ hlua_class_function(L, "unset_var", hlua_applet_tcp_unset_var);
+ hlua_class_function(L, "get_var", hlua_applet_tcp_get_var);
+
+ lua_settable(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_applet_tcp_ref = hlua_register_metatable(L, CLASS_APPLET_TCP);
+
+ /*
+ *
+ * Register class AppletHTTP
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Register Lua functions. */
+ hlua_class_function(L, "set_priv", hlua_applet_http_set_priv);
+ hlua_class_function(L, "get_priv", hlua_applet_http_get_priv);
+ hlua_class_function(L, "set_var", hlua_applet_http_set_var);
+ hlua_class_function(L, "unset_var", hlua_applet_http_unset_var);
+ hlua_class_function(L, "get_var", hlua_applet_http_get_var);
+ hlua_class_function(L, "getline", hlua_applet_http_getline);
+ hlua_class_function(L, "receive", hlua_applet_http_recv);
+ hlua_class_function(L, "send", hlua_applet_http_send);
+ hlua_class_function(L, "add_header", hlua_applet_http_addheader);
+ hlua_class_function(L, "set_status", hlua_applet_http_status);
+ hlua_class_function(L, "start_response", hlua_applet_http_start_response);
+
+ lua_settable(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_applet_http_ref = hlua_register_metatable(L, CLASS_APPLET_HTTP);
+
+ /*
+ *
+ * Register class TXN
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Register Lua functions. */
+ hlua_class_function(L, "set_priv", hlua_set_priv);
+ hlua_class_function(L, "get_priv", hlua_get_priv);
+ hlua_class_function(L, "set_var", hlua_set_var);
+ hlua_class_function(L, "unset_var", hlua_unset_var);
+ hlua_class_function(L, "get_var", hlua_get_var);
+ hlua_class_function(L, "done", hlua_txn_done);
+ hlua_class_function(L, "reply", hlua_txn_reply_new);
+ hlua_class_function(L, "set_loglevel", hlua_txn_set_loglevel);
+ hlua_class_function(L, "set_tos", hlua_txn_set_tos);
+ hlua_class_function(L, "set_mark", hlua_txn_set_mark);
+ hlua_class_function(L, "set_priority_class", hlua_txn_set_priority_class);
+ hlua_class_function(L, "set_priority_offset", hlua_txn_set_priority_offset);
+ hlua_class_function(L, "deflog", hlua_txn_deflog);
+ hlua_class_function(L, "log", hlua_txn_log);
+ hlua_class_function(L, "Debug", hlua_txn_log_debug);
+ hlua_class_function(L, "Info", hlua_txn_log_info);
+ hlua_class_function(L, "Warning", hlua_txn_log_warning);
+ hlua_class_function(L, "Alert", hlua_txn_log_alert);
+
+ lua_rawset(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_txn_ref = hlua_register_metatable(L, CLASS_TXN);
+
+ /*
+ *
+ * Register class reply
+ *
+ */
+ lua_newtable(L);
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+ hlua_class_function(L, "set_status", hlua_txn_reply_set_status);
+ hlua_class_function(L, "add_header", hlua_txn_reply_add_header);
+ hlua_class_function(L, "del_header", hlua_txn_reply_del_header);
+ hlua_class_function(L, "set_body", hlua_txn_reply_set_body);
+ lua_settable(L, -3); /* Sets the __index entry. */
+ class_txn_reply_ref = luaL_ref(L, LUA_REGISTRYINDEX);
+
+
+ /*
+ *
+ * Register class Socket
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+#ifdef USE_OPENSSL
+ hlua_class_function(L, "connect_ssl", hlua_socket_connect_ssl);
+#endif
+ hlua_class_function(L, "connect", hlua_socket_connect);
+ hlua_class_function(L, "send", hlua_socket_send);
+ hlua_class_function(L, "receive", hlua_socket_receive);
+ hlua_class_function(L, "close", hlua_socket_close);
+ hlua_class_function(L, "getpeername", hlua_socket_getpeername);
+ hlua_class_function(L, "getsockname", hlua_socket_getsockname);
+ hlua_class_function(L, "setoption", hlua_socket_setoption);
+ hlua_class_function(L, "settimeout", hlua_socket_settimeout);
+
+ lua_rawset(L, -3); /* Push the last 2 entries in the table at index -3 */
+
+ /* Register the garbage collector entry. */
+ lua_pushstring(L, "__gc");
+ lua_pushcclosure(L, hlua_socket_gc, 0);
+ lua_rawset(L, -3); /* Push the last 2 entries in the table at index -3 */
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_socket_ref = hlua_register_metatable(L, CLASS_SOCKET);
+
+ lua_atpanic(L, hlua_panic_safe);
+
+ return L;
+}
+
+void hlua_init(void) {
+ int i;
+ char *errmsg;
+#ifdef USE_OPENSSL
+ struct srv_kw *kw;
+ int tmp_error;
+ char *error;
+ char *args[] = { /* SSL client configuration. */
+ "ssl",
+ "verify",
+ "none",
+ NULL
+ };
+#endif
+
+ /* Init post init function list head */
+ for (i = 0; i < MAX_THREADS + 1; i++)
+ LIST_INIT(&hlua_init_functions[i]);
+
+ /* Init state for common/shared lua parts */
+ hlua_state_id = 0;
+ ha_set_thread(NULL);
+ hlua_states[0] = hlua_init_state(0);
+
+ /* Init state 1 for thread 0. We have at least one thread. */
+ hlua_state_id = 1;
+ ha_set_thread(NULL);
+ hlua_states[1] = hlua_init_state(1);
+
+ /* Proxy and server configuration initialisation. */
+ socket_proxy = alloc_new_proxy("LUA-SOCKET", PR_CAP_FE|PR_CAP_BE|PR_CAP_INT, &errmsg);
+ if (!socket_proxy) {
+ fprintf(stderr, "Lua init: %s\n", errmsg);
+ exit(1);
+ }
+ proxy_preset_defaults(socket_proxy);
+
+ /* Init TCP server: unchanged parameters */
+ socket_tcp = new_server(socket_proxy);
+ if (!socket_tcp) {
+ fprintf(stderr, "Lua init: failed to allocate tcp server socket\n");
+ exit(1);
+ }
+
+#ifdef USE_OPENSSL
+ /* Init TCP server: unchanged parameters */
+ socket_ssl = new_server(socket_proxy);
+ if (!socket_ssl) {
+ fprintf(stderr, "Lua init: failed to allocate ssl server socket\n");
+ exit(1);
+ }
+
+ socket_ssl->use_ssl = 1;
+ socket_ssl->xprt = xprt_get(XPRT_SSL);
+
+ for (i = 0; args[i] != NULL; i++) {
+ if ((kw = srv_find_kw(args[i])) != NULL) { /* Maybe it's registered server keyword */
+ /*
+ *
+ * If the keyword is not known, we can search in the registered
+ * server keywords. This is useful to configure special SSL
+ * features like client certificates and ssl_verify.
+ *
+ */
+ tmp_error = kw->parse(args, &i, socket_proxy, socket_ssl, &error);
+ if (tmp_error != 0) {
+ fprintf(stderr, "INTERNAL ERROR: %s\n", error);
+ abort(); /* This must be never arrives because the command line
+ not editable by the user. */
+ }
+ i += kw->skip;
+ }
+ }
+#endif
+
+}
+
+static void hlua_deinit()
+{
+ int thr;
+ struct hlua_reg_filter *reg_flt, *reg_flt_bck;
+
+ list_for_each_entry_safe(reg_flt, reg_flt_bck, &referenced_filters, l)
+ release_hlua_reg_filter(reg_flt);
+
+ for (thr = 0; thr < MAX_THREADS+1; thr++) {
+ if (hlua_states[thr])
+ lua_close(hlua_states[thr]);
+ }
+
+ srv_drop(socket_tcp);
+
+#ifdef USE_OPENSSL
+ srv_drop(socket_ssl);
+#endif
+
+ free_proxy(socket_proxy);
+}
+
+REGISTER_POST_DEINIT(hlua_deinit);
+
+static void hlua_register_build_options(void)
+{
+ char *ptr = NULL;
+
+ memprintf(&ptr, "Built with Lua version : %s", LUA_RELEASE);
+ hap_register_build_opts(ptr, 1);
+}
+
+INITCALL0(STG_REGISTER, hlua_register_build_options);
diff --git a/src/hlua_fcn.c b/src/hlua_fcn.c
new file mode 100644
index 0000000..0046e9e
--- /dev/null
+++ b/src/hlua_fcn.c
@@ -0,0 +1,1780 @@
+/*
+ * Lua safe functions
+ *
+ * Copyright 2015-2016 Thierry Fournier <tfournier@arpalert.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * All the functions in this file runs with a Lua stack, and can
+ * return with a longjmp. All of these function must be launched
+ * in an environment able to catch a longjmp, otherwise a
+ * critical error can be raised.
+ */
+
+#define _GNU_SOURCE
+
+#include <lauxlib.h>
+#include <lua.h>
+#include <lualib.h>
+
+#include <import/ebmbtree.h>
+
+#include <haproxy/cli-t.h>
+#include <haproxy/errors.h>
+#include <haproxy/hlua-t.h>
+#include <haproxy/hlua_fcn.h>
+#include <haproxy/http.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/pattern-t.h>
+#include <haproxy/proxy.h>
+#include <haproxy/regex.h>
+#include <haproxy/server.h>
+#include <haproxy/stats.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/stream-t.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+
+/* Contains the class reference of the concat object. */
+static int class_concat_ref;
+static int class_proxy_ref;
+static int class_server_ref;
+static int class_listener_ref;
+static int class_regex_ref;
+static int class_stktable_ref;
+
+#define STATS_LEN (MAX((int)ST_F_TOTAL_FIELDS, (int)INF_TOTAL_FIELDS))
+
+static THREAD_LOCAL struct field stats[STATS_LEN];
+
+int hlua_checkboolean(lua_State *L, int index)
+{
+ if (!lua_isboolean(L, index))
+ luaL_argerror(L, index, "boolean expected");
+ return lua_toboolean(L, index);
+}
+
+/* Helper to push unsigned integers to Lua stack, respecting Lua limitations */
+static int hlua_fcn_pushunsigned(lua_State *L, unsigned int val)
+{
+#if (LUA_MAXINTEGER == LLONG_MAX || ((LUA_MAXINTEGER == LONG_MAX) && (__WORDSIZE == 64)))
+ lua_pushinteger(L, val);
+#else
+ if (val > INT_MAX)
+ lua_pushnumber(L, (lua_Number)val);
+ else
+ lua_pushinteger(L, (int)val);
+#endif
+ return 1;
+}
+
+/* Helper to push unsigned long long to Lua stack, respecting Lua limitations */
+static int hlua_fcn_pushunsigned_ll(lua_State *L, unsigned long long val) {
+#if (LUA_MAXINTEGER == LLONG_MAX || ((LUA_MAXINTEGER == LONG_MAX) && (__WORDSIZE == 64)))
+ /* 64 bits case, U64 is supported until LLONG_MAX */
+ if (val > LLONG_MAX)
+ lua_pushnumber(L, (lua_Number)val);
+ else
+ lua_pushinteger(L, val);
+#else
+ /* 32 bits case, U64 is supported until INT_MAX */
+ if (val > INT_MAX)
+ lua_pushnumber(L, (lua_Number)val);
+ else
+ lua_pushinteger(L, (int)val);
+#endif
+ return 1;
+}
+
+/* This function gets a struct field and converts it in Lua
+ * variable. The variable is pushed at the top of the stack.
+ */
+int hlua_fcn_pushfield(lua_State *L, struct field *field)
+{
+ /* The lua_Integer is always signed. Its length depends on
+ * compilation options, so the following code is conditioned
+ * by some macros. Windows maros are not supported.
+ * If the number cannot be represented as integer, we try to
+ * convert to float.
+ */
+ switch (field_format(field, 0)) {
+
+ case FF_EMPTY:
+ lua_pushnil(L);
+ return 1;
+
+ case FF_S32:
+ /* S32 is always supported. */
+ lua_pushinteger(L, field->u.s32);
+ return 1;
+
+ case FF_U32:
+#if (LUA_MAXINTEGER == LLONG_MAX || ((LUA_MAXINTEGER == LONG_MAX) && (__WORDSIZE == 64)))
+ /* 64 bits case, U32 is always supported */
+ lua_pushinteger(L, field->u.u32);
+#else
+ /* 32 bits case, U32 is supported until INT_MAX. */
+ if (field->u.u32 > INT_MAX)
+ lua_pushnumber(L, (lua_Number)field->u.u32);
+ else
+ lua_pushinteger(L, field->u.u32);
+#endif
+ return 1;
+
+ case FF_S64:
+#if (LUA_MAXINTEGER == LLONG_MAX || ((LUA_MAXINTEGER == LONG_MAX) && (__WORDSIZE == 64)))
+ /* 64 bits case, S64 is always supported */
+ lua_pushinteger(L, field->u.s64);
+#else
+ /* 64 bits case, S64 is supported between INT_MIN and INT_MAX */
+ if (field->u.s64 < INT_MIN || field->u.s64 > INT_MAX)
+ lua_pushnumber(L, (lua_Number)field->u.s64);
+ else
+ lua_pushinteger(L, (int)field->u.s64);
+#endif
+ return 1;
+
+ case FF_U64:
+#if (LUA_MAXINTEGER == LLONG_MAX || ((LUA_MAXINTEGER == LONG_MAX) && (__WORDSIZE == 64)))
+ /* 64 bits case, U64 is supported until LLONG_MAX */
+ if (field->u.u64 > LLONG_MAX)
+ lua_pushnumber(L, (lua_Number)field->u.u64);
+ else
+ lua_pushinteger(L, field->u.u64);
+#else
+ /* 64 bits case, U64 is supported until INT_MAX */
+ if (field->u.u64 > INT_MAX)
+ lua_pushnumber(L, (lua_Number)field->u.u64);
+ else
+ lua_pushinteger(L, (int)field->u.u64);
+#endif
+ return 1;
+
+ case FF_STR:
+ lua_pushstring(L, field->u.str);
+ return 1;
+
+ default:
+ break;
+ }
+
+ /* Default case, never reached. */
+ lua_pushnil(L);
+ return 1;
+}
+
+/* Some string are started or terminated by blank chars,
+ * this function removes the spaces, tabs, \r and
+ * \n at the begin and at the end of the string "str", and
+ * push the result in the lua stack.
+ * Returns a pointer to the Lua internal copy of the string.
+ */
+const char *hlua_pushstrippedstring(lua_State *L, const char *str)
+{
+ const char *p;
+ int l;
+
+ for (p = str; HTTP_IS_LWS(*p); p++);
+
+ for (l = strlen(p); l && HTTP_IS_LWS(p[l-1]); l--);
+
+ return lua_pushlstring(L, p, l);
+}
+
+/* The three following functions are useful for adding entries
+ * in a table. These functions takes a string and respectively an
+ * integer, a string or a function and add it to the table in the
+ * top of the stack.
+ *
+ * These functions throws an error if no more stack size is
+ * available.
+ */
+void hlua_class_const_int(lua_State *L, const char *name, int value)
+{
+ lua_pushstring(L, name);
+ lua_pushinteger(L, value);
+ lua_rawset(L, -3);
+}
+void hlua_class_const_str(lua_State *L, const char *name, const char *value)
+{
+ lua_pushstring(L, name);
+ lua_pushstring(L, value);
+ lua_rawset(L, -3);
+}
+void hlua_class_function(lua_State *L, const char *name, int (*function)(lua_State *L))
+{
+ lua_pushstring(L, name);
+ lua_pushcclosure(L, function, 0);
+ lua_rawset(L, -3);
+}
+
+/* This function returns a string containing the HAProxy object name. */
+int hlua_dump_object(struct lua_State *L)
+{
+ const char *name = (const char *)lua_tostring(L, lua_upvalueindex(1));
+ lua_pushfstring(L, "HAProxy class %s", name);
+ return 1;
+}
+
+/* This function register a table as metatable and. It names
+ * the metatable, and returns the associated reference.
+ * The original table is popped from the top of the stack.
+ * "name" is the referenced class name.
+ */
+int hlua_register_metatable(struct lua_State *L, char *name)
+{
+ /* Check the type of the top element. it must be
+ * a table.
+ */
+ if (lua_type(L, -1) != LUA_TTABLE)
+ luaL_error(L, "hlua_register_metatable() requires a type Table "
+ "in the top of the stack");
+
+ /* Add the __tostring function which identify the
+ * created object.
+ */
+ lua_pushstring(L, "__tostring");
+ lua_pushstring(L, name);
+ lua_pushcclosure(L, hlua_dump_object, 1);
+ lua_rawset(L, -3);
+
+ /* Register a named entry for the table. The table
+ * reference is copied first because the function
+ * lua_setfield() pop the entry.
+ */
+ lua_pushvalue(L, -1);
+ lua_setfield(L, LUA_REGISTRYINDEX, name);
+
+ /* Creates the reference of the object. The
+ * function luaL_ref pop the top of the stack.
+ */
+ return luaL_ref(L, LUA_REGISTRYINDEX);
+}
+
+/* Return an object of the expected type, or throws an error. */
+void *hlua_checkudata(lua_State *L, int ud, int class_ref)
+{
+ void *p;
+ int ret;
+
+ /* Check if the stack entry is an array. */
+ if (!lua_istable(L, ud))
+ luaL_argerror(L, ud, NULL);
+
+ /* pop the metatable of the referencecd object. */
+ if (!lua_getmetatable(L, ud))
+ luaL_argerror(L, ud, NULL);
+
+ /* pop the expected metatable. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_ref);
+
+ /* Check if the metadata have the expected type. */
+ ret = lua_rawequal(L, -1, -2);
+ lua_pop(L, 2);
+ if (!ret)
+ luaL_argerror(L, ud, NULL);
+
+ /* Push on the stack at the entry [0] of the table. */
+ lua_rawgeti(L, ud, 0);
+
+ /* Check if this entry is userdata. */
+ p = lua_touserdata(L, -1);
+ if (!p)
+ luaL_argerror(L, ud, NULL);
+
+ /* Remove the entry returned by lua_rawgeti(). */
+ lua_pop(L, 1);
+
+ /* Return the associated struct. */
+ return p;
+}
+
+/* This function return the current date at epoch format in milliseconds. */
+int hlua_now(lua_State *L)
+{
+ lua_newtable(L);
+ lua_pushstring(L, "sec");
+ lua_pushinteger(L, now.tv_sec);
+ lua_rawset(L, -3);
+ lua_pushstring(L, "usec");
+ lua_pushinteger(L, now.tv_usec);
+ lua_rawset(L, -3);
+ return 1;
+}
+
+/* This functions expects a Lua string as HTTP date, parse it and
+ * returns an integer containing the epoch format of the date, or
+ * nil if the parsing fails.
+ */
+static int hlua_parse_date(lua_State *L, int (*fcn)(const char *, int, struct tm*))
+{
+ const char *str;
+ size_t len;
+ struct tm tm;
+ time_t time;
+
+ str = luaL_checklstring(L, 1, &len);
+
+ if (!fcn(str, len, &tm)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ /* This function considers the content of the broken-down time
+ * is exprimed in the UTC timezone. timegm don't care about
+ * the gnu variable tm_gmtoff. If gmtoff is set, or if you know
+ * the timezone from the broken-down time, it must be fixed
+ * after the conversion.
+ */
+ time = my_timegm(&tm);
+ if (time == -1) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_pushinteger(L, (int)time);
+ return 1;
+}
+static int hlua_http_date(lua_State *L)
+{
+ return hlua_parse_date(L, parse_http_date);
+}
+static int hlua_imf_date(lua_State *L)
+{
+ return hlua_parse_date(L, parse_imf_date);
+}
+static int hlua_rfc850_date(lua_State *L)
+{
+ return hlua_parse_date(L, parse_rfc850_date);
+}
+static int hlua_asctime_date(lua_State *L)
+{
+ return hlua_parse_date(L, parse_asctime_date);
+}
+
+static int hlua_get_info(lua_State *L)
+{
+ int i;
+
+ stats_fill_info(stats, STATS_LEN, 0);
+
+ lua_newtable(L);
+ for (i=0; i<INF_TOTAL_FIELDS; i++) {
+ lua_pushstring(L, info_fields[i].name);
+ hlua_fcn_pushfield(L, &stats[i]);
+ lua_settable(L, -3);
+ }
+ return 1;
+}
+
+static struct hlua_concat *hlua_check_concat(lua_State *L, int ud)
+{
+ return (hlua_checkudata(L, ud, class_concat_ref));
+}
+
+static int hlua_concat_add(lua_State *L)
+{
+ struct hlua_concat *b;
+ char *buffer;
+ char *new;
+ const char *str;
+ size_t l;
+
+ /* First arg must be a concat object. */
+ b = hlua_check_concat(L, 1);
+
+ /* Second arg must be a string. */
+ str = luaL_checklstring(L, 2, &l);
+
+ /* Get the buffer. */
+ lua_rawgeti(L, 1, 1);
+ buffer = lua_touserdata(L, -1);
+ lua_pop(L, 1);
+
+ /* Update the buffer size if it s required. The old buffer
+ * is crushed by the new in the object array, so it will
+ * be deleted by the GC.
+ * Note that in the first loop, the "new" variable is only
+ * used as a flag.
+ */
+ new = NULL;
+ while (b->size - b->len < l) {
+ b->size += HLUA_CONCAT_BLOCSZ;
+ new = buffer;
+ }
+ if (new) {
+ new = lua_newuserdata(L, b->size);
+ memcpy(new, buffer, b->len);
+ lua_rawseti(L, 1, 1);
+ buffer = new;
+ }
+
+ /* Copy string, and update metadata. */
+ memcpy(buffer + b->len, str, l);
+ b->len += l;
+ return 0;
+}
+
+static int hlua_concat_dump(lua_State *L)
+{
+ struct hlua_concat *b;
+ char *buffer;
+
+ /* First arg must be a concat object. */
+ b = hlua_check_concat(L, 1);
+
+ /* Get the buffer. */
+ lua_rawgeti(L, 1, 1);
+ buffer = lua_touserdata(L, -1);
+ lua_pop(L, 1);
+
+ /* Push the soncatenated string in the stack. */
+ lua_pushlstring(L, buffer, b->len);
+ return 1;
+}
+
+int hlua_concat_new(lua_State *L)
+{
+ struct hlua_concat *b;
+
+ lua_newtable(L);
+ b = lua_newuserdata(L, sizeof(*b));
+ b->size = HLUA_CONCAT_BLOCSZ;
+ b->len = 0;
+ lua_rawseti(L, -2, 0);
+ lua_newuserdata(L, HLUA_CONCAT_BLOCSZ);
+ lua_rawseti(L, -2, 1);
+
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_concat_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+static int concat_tostring(lua_State *L)
+{
+ const void *ptr = lua_topointer(L, 1);
+ lua_pushfstring(L, "Concat object: %p", ptr);
+ return 1;
+}
+
+static int hlua_concat_init(lua_State *L)
+{
+ /* Creates the buffered concat object. */
+ lua_newtable(L);
+
+ lua_pushstring(L, "__tostring");
+ lua_pushcclosure(L, concat_tostring, 0);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "__index"); /* Creates the index entry. */
+ lua_newtable(L); /* The "__index" content. */
+
+ lua_pushstring(L, "add");
+ lua_pushcclosure(L, hlua_concat_add, 0);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "dump");
+ lua_pushcclosure(L, hlua_concat_dump, 0);
+ lua_settable(L, -3);
+
+ lua_settable(L, -3); /* Sets the __index entry. */
+ class_concat_ref = luaL_ref(L, LUA_REGISTRYINDEX);
+
+ return 1;
+}
+
+int hlua_fcn_new_stktable(lua_State *L, struct stktable *tbl)
+{
+ lua_newtable(L);
+
+ /* Pop a class stktbl metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_stktable_ref);
+ lua_setmetatable(L, -2);
+
+ lua_pushlightuserdata(L, tbl);
+ lua_rawseti(L, -2, 0);
+ return 1;
+}
+
+static struct stktable *hlua_check_stktable(lua_State *L, int ud)
+{
+ return hlua_checkudata(L, ud, class_stktable_ref);
+}
+
+/* Extract stick table attributes into Lua table */
+int hlua_stktable_info(lua_State *L)
+{
+ struct stktable *tbl;
+ int dt;
+
+ tbl = hlua_check_stktable(L, 1);
+
+ if (!tbl->id) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_newtable(L);
+
+ lua_pushstring(L, "type");
+ lua_pushstring(L, stktable_types[tbl->type].kw);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "length");
+ lua_pushinteger(L, tbl->key_size);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "size");
+ hlua_fcn_pushunsigned(L, tbl->size);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "used");
+ hlua_fcn_pushunsigned(L, tbl->current);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "nopurge");
+ lua_pushboolean(L, tbl->nopurge > 0);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "expire");
+ lua_pushinteger(L, tbl->expire);
+ lua_settable(L, -3);
+
+ /* Save data types periods (if applicable) in 'data' table */
+ lua_pushstring(L, "data");
+ lua_newtable(L);
+
+ for (dt = 0; dt < STKTABLE_DATA_TYPES; dt++) {
+ if (tbl->data_ofs[dt] == 0)
+ continue;
+
+ lua_pushstring(L, stktable_data_types[dt].name);
+
+ if (stktable_data_types[dt].arg_type == ARG_T_DELAY)
+ lua_pushinteger(L, tbl->data_arg[dt].u);
+ else
+ lua_pushinteger(L, -1);
+
+ lua_settable(L, -3);
+ }
+
+ lua_settable(L, -3);
+
+ return 1;
+}
+
+/* Helper to get extract stick table entry into Lua table */
+static void hlua_stktable_entry(lua_State *L, struct stktable *t, struct stksess *ts)
+{
+ int dt;
+ void *ptr;
+
+ for (dt = 0; dt < STKTABLE_DATA_TYPES; dt++) {
+
+ if (t->data_ofs[dt] == 0)
+ continue;
+
+ lua_pushstring(L, stktable_data_types[dt].name);
+
+ ptr = stktable_data_ptr(t, ts, dt);
+ switch (stktable_data_types[dt].std_type) {
+ case STD_T_SINT:
+ lua_pushinteger(L, stktable_data_cast(ptr, std_t_sint));
+ break;
+ case STD_T_UINT:
+ hlua_fcn_pushunsigned(L, stktable_data_cast(ptr, std_t_uint));
+ break;
+ case STD_T_ULL:
+ hlua_fcn_pushunsigned_ll(L, stktable_data_cast(ptr, std_t_ull));
+ break;
+ case STD_T_FRQP:
+ lua_pushinteger(L, read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[dt].u));
+ break;
+ case STD_T_DICT: {
+ struct dict_entry *de;
+ de = stktable_data_cast(ptr, std_t_dict);
+ lua_pushstring(L, de ? (char *)de->value.key : "-");
+ break;
+ }
+ }
+
+ lua_settable(L, -3);
+ }
+}
+
+/* Looks in table <t> for a sticky session matching key <key>
+ * Returns table with session data or nil
+ *
+ * The returned table always contains 'use' and 'expire' (integer) fields.
+ * For frequency/rate counters, each data entry is returned as table with
+ * 'value' and 'period' fields.
+ */
+int hlua_stktable_lookup(lua_State *L)
+{
+ struct stktable *t;
+ struct sample smp;
+ struct stktable_key *skey;
+ struct stksess *ts;
+
+ t = hlua_check_stktable(L, 1);
+ smp.data.type = SMP_T_STR;
+ smp.flags = SMP_F_CONST;
+ smp.data.u.str.area = (char *)lua_tolstring(L, 2, &smp.data.u.str.data);
+
+ skey = smp_to_stkey(&smp, t);
+ if (!skey) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ ts = stktable_lookup_key(t, skey);
+ if (!ts) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_newtable(L);
+ lua_pushstring(L, "use");
+ lua_pushinteger(L, ts->ref_cnt - 1);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "expire");
+ lua_pushinteger(L, tick_remain(now_ms, ts->expire));
+ lua_settable(L, -3);
+
+ hlua_stktable_entry(L, t, ts);
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &t->lock);
+ ts->ref_cnt--;
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &t->lock);
+
+ return 1;
+}
+
+struct stk_filter {
+ long long val;
+ int type;
+ int op;
+};
+
+
+/* Helper for returning errors to callers using Lua convention (nil, err) */
+static int hlua_error(lua_State *L, const char *fmt, ...) {
+ char buf[256];
+ int len;
+ va_list args;
+ va_start(args, fmt);
+ len = vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ if (len < 0) {
+ ha_alert("hlua_error(): Could not write error message.\n");
+ lua_pushnil(L);
+ return 1;
+ } else if (len >= sizeof(buf))
+ ha_alert("hlua_error(): Error message was truncated.\n");
+
+ lua_pushnil(L);
+ lua_pushstring(L, buf);
+
+ return 2;
+}
+
+/* Dump the contents of stick table <t>*/
+int hlua_stktable_dump(lua_State *L)
+{
+ struct stktable *t;
+ struct ebmb_node *eb;
+ struct ebmb_node *n;
+ struct stksess *ts;
+ int type;
+ int op;
+ int dt;
+ long long val;
+ struct stk_filter filter[STKTABLE_FILTER_LEN];
+ int filter_count = 0;
+ int i;
+ int skip_entry;
+ void *ptr;
+
+ t = hlua_check_stktable(L, 1);
+ type = lua_type(L, 2);
+
+ switch (type) {
+ case LUA_TNONE:
+ case LUA_TNIL:
+ break;
+ case LUA_TTABLE:
+ lua_pushnil(L);
+ while (lua_next(L, 2) != 0) {
+ int entry_idx = 0;
+
+ if (filter_count >= STKTABLE_FILTER_LEN)
+ return hlua_error(L, "Filter table too large (len > %d)", STKTABLE_FILTER_LEN);
+
+ if (lua_type(L, -1) != LUA_TTABLE || lua_rawlen(L, -1) != 3)
+ return hlua_error(L, "Filter table entry must be a triplet: {\"data_col\", \"op\", val} (entry #%d)", filter_count + 1);
+
+ lua_pushnil(L);
+ while (lua_next(L, -2) != 0) {
+ switch (entry_idx) {
+ case 0:
+ if (lua_type(L, -1) != LUA_TSTRING)
+ return hlua_error(L, "Filter table data column must be string (entry #%d)", filter_count + 1);
+
+ dt = stktable_get_data_type((char *)lua_tostring(L, -1));
+ if (dt < 0 || t->data_ofs[dt] == 0)
+ return hlua_error(L, "Filter table data column not present in stick table (entry #%d)", filter_count + 1);
+ filter[filter_count].type = dt;
+ break;
+ case 1:
+ if (lua_type(L, -1) != LUA_TSTRING)
+ return hlua_error(L, "Filter table operator must be string (entry #%d)", filter_count + 1);
+
+ op = get_std_op(lua_tostring(L, -1));
+ if (op < 0)
+ return hlua_error(L, "Unknown operator in filter table (entry #%d)", filter_count + 1);
+ filter[filter_count].op = op;
+ break;
+ case 2:
+ val = lua_tointeger(L, -1);
+ filter[filter_count].val = val;
+ filter_count++;
+ break;
+ default:
+ break;
+ }
+ entry_idx++;
+ lua_pop(L, 1);
+ }
+ lua_pop(L, 1);
+ }
+ break;
+ default:
+ return hlua_error(L, "filter table expected");
+ }
+
+ lua_newtable(L);
+
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &t->lock);
+ eb = ebmb_first(&t->keys);
+ for (n = eb; n; n = ebmb_next(n)) {
+ ts = ebmb_entry(n, struct stksess, key);
+ if (!ts) {
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &t->lock);
+ return 1;
+ }
+ ts->ref_cnt++;
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &t->lock);
+
+ /* multi condition/value filter */
+ skip_entry = 0;
+ for (i = 0; i < filter_count; i++) {
+ if (t->data_ofs[filter[i].type] == 0)
+ continue;
+
+ ptr = stktable_data_ptr(t, ts, filter[i].type);
+
+ switch (stktable_data_types[filter[i].type].std_type) {
+ case STD_T_SINT:
+ val = stktable_data_cast(ptr, std_t_sint);
+ break;
+ case STD_T_UINT:
+ val = stktable_data_cast(ptr, std_t_uint);
+ break;
+ case STD_T_ULL:
+ val = stktable_data_cast(ptr, std_t_ull);
+ break;
+ case STD_T_FRQP:
+ val = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[filter[i].type].u);
+ break;
+ default:
+ continue;
+ break;
+ }
+
+ op = filter[i].op;
+
+ if ((val < filter[i].val && (op == STD_OP_EQ || op == STD_OP_GT || op == STD_OP_GE)) ||
+ (val == filter[i].val && (op == STD_OP_NE || op == STD_OP_GT || op == STD_OP_LT)) ||
+ (val > filter[i].val && (op == STD_OP_EQ || op == STD_OP_LT || op == STD_OP_LE))) {
+ skip_entry = 1;
+ break;
+ }
+ }
+
+ if (skip_entry) {
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &t->lock);
+ ts->ref_cnt--;
+ continue;
+ }
+
+ if (t->type == SMP_T_IPV4) {
+ char addr[INET_ADDRSTRLEN];
+ inet_ntop(AF_INET, (const void *)&ts->key.key, addr, sizeof(addr));
+ lua_pushstring(L, addr);
+ } else if (t->type == SMP_T_IPV6) {
+ char addr[INET6_ADDRSTRLEN];
+ inet_ntop(AF_INET6, (const void *)&ts->key.key, addr, sizeof(addr));
+ lua_pushstring(L, addr);
+ } else if (t->type == SMP_T_SINT) {
+ lua_pushinteger(L, *ts->key.key);
+ } else if (t->type == SMP_T_STR) {
+ lua_pushstring(L, (const char *)ts->key.key);
+ } else {
+ return hlua_error(L, "Unsupported stick table key type");
+ }
+
+ lua_newtable(L);
+ hlua_stktable_entry(L, t, ts);
+ lua_settable(L, -3);
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &t->lock);
+ ts->ref_cnt--;
+ }
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &t->lock);
+
+ return 1;
+}
+
+int hlua_fcn_new_listener(lua_State *L, struct listener *lst)
+{
+ lua_newtable(L);
+
+ /* Pop a class sesison metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_listener_ref);
+ lua_setmetatable(L, -2);
+
+ lua_pushlightuserdata(L, lst);
+ lua_rawseti(L, -2, 0);
+ return 1;
+}
+
+static struct listener *hlua_check_listener(lua_State *L, int ud)
+{
+ return hlua_checkudata(L, ud, class_listener_ref);
+}
+
+int hlua_listener_get_stats(lua_State *L)
+{
+ struct listener *li;
+ int i;
+
+ li = hlua_check_listener(L, 1);
+
+ if (!li->bind_conf->frontend) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ stats_fill_li_stats(li->bind_conf->frontend, li, STAT_SHLGNDS, stats,
+ STATS_LEN, NULL);
+
+ lua_newtable(L);
+ for (i=0; i<ST_F_TOTAL_FIELDS; i++) {
+ lua_pushstring(L, stat_fields[i].name);
+ hlua_fcn_pushfield(L, &stats[i]);
+ lua_settable(L, -3);
+ }
+ return 1;
+
+}
+
+int hlua_fcn_new_server(lua_State *L, struct server *srv)
+{
+ char buffer[12];
+
+ lua_newtable(L);
+
+ /* Pop a class sesison metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_server_ref);
+ lua_setmetatable(L, -2);
+
+ lua_pushlightuserdata(L, srv);
+ lua_rawseti(L, -2, 0);
+
+ /* Add server name. */
+ lua_pushstring(L, "name");
+ lua_pushstring(L, srv->id);
+ lua_settable(L, -3);
+
+ /* Add server puid. */
+ lua_pushstring(L, "puid");
+ snprintf(buffer, sizeof(buffer), "%d", srv->puid);
+ lua_pushstring(L, buffer);
+ lua_settable(L, -3);
+
+ return 1;
+}
+
+static struct server *hlua_check_server(lua_State *L, int ud)
+{
+ struct server *srv = hlua_checkudata(L, ud, class_server_ref);
+ srv->flags |= SRV_F_NON_PURGEABLE;
+ return srv;
+}
+
+int hlua_server_get_stats(lua_State *L)
+{
+ struct server *srv;
+ int i;
+
+ srv = hlua_check_server(L, 1);
+
+ if (!srv->proxy) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ stats_fill_sv_stats(srv->proxy, srv, STAT_SHLGNDS, stats,
+ STATS_LEN, NULL);
+
+ lua_newtable(L);
+ for (i=0; i<ST_F_TOTAL_FIELDS; i++) {
+ lua_pushstring(L, stat_fields[i].name);
+ hlua_fcn_pushfield(L, &stats[i]);
+ lua_settable(L, -3);
+ }
+ return 1;
+
+}
+
+int hlua_server_get_addr(lua_State *L)
+{
+ struct server *srv;
+ char addr[INET6_ADDRSTRLEN];
+ luaL_Buffer b;
+
+ srv = hlua_check_server(L, 1);
+
+ luaL_buffinit(L, &b);
+
+ switch (srv->addr.ss_family) {
+ case AF_INET:
+ inet_ntop(AF_INET, &((struct sockaddr_in *)&srv->addr)->sin_addr,
+ addr, INET_ADDRSTRLEN);
+ luaL_addstring(&b, addr);
+ luaL_addstring(&b, ":");
+ snprintf(addr, INET_ADDRSTRLEN, "%d", srv->svc_port);
+ luaL_addstring(&b, addr);
+ break;
+ case AF_INET6:
+ inet_ntop(AF_INET6, &((struct sockaddr_in6 *)&srv->addr)->sin6_addr,
+ addr, INET6_ADDRSTRLEN);
+ luaL_addstring(&b, addr);
+ luaL_addstring(&b, ":");
+ snprintf(addr, INET_ADDRSTRLEN, "%d", srv->svc_port);
+ luaL_addstring(&b, addr);
+ break;
+ case AF_UNIX:
+ luaL_addstring(&b, (char *)((struct sockaddr_un *)&srv->addr)->sun_path);
+ break;
+ default:
+ luaL_addstring(&b, "<unknown>");
+ break;
+ }
+
+ luaL_pushresult(&b);
+ return 1;
+}
+
+int hlua_server_is_draining(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ lua_pushinteger(L, server_is_draining(srv));
+ return 1;
+}
+
+int hlua_server_set_maxconn(lua_State *L)
+{
+ struct server *srv;
+ const char *maxconn;
+ const char *err;
+
+ srv = hlua_check_server(L, 1);
+ maxconn = luaL_checkstring(L, 2);
+
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ err = server_parse_maxconn_change_request(srv, maxconn);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ if (!err)
+ lua_pushnil(L);
+ else
+ hlua_pushstrippedstring(L, err);
+ return 1;
+}
+
+int hlua_server_get_maxconn(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ lua_pushinteger(L, srv->maxconn);
+ return 1;
+}
+
+int hlua_server_set_weight(lua_State *L)
+{
+ struct server *srv;
+ const char *weight;
+ const char *err;
+
+ srv = hlua_check_server(L, 1);
+ weight = luaL_checkstring(L, 2);
+
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ err = server_parse_weight_change_request(srv, weight);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ if (!err)
+ lua_pushnil(L);
+ else
+ hlua_pushstrippedstring(L, err);
+ return 1;
+}
+
+int hlua_server_get_weight(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ lua_pushinteger(L, srv->uweight);
+ return 1;
+}
+
+int hlua_server_set_addr(lua_State *L)
+{
+ struct server *srv;
+ const char *addr;
+ const char *port;
+ const char *err;
+
+ srv = hlua_check_server(L, 1);
+ addr = luaL_checkstring(L, 2);
+ if (lua_gettop(L) >= 3)
+ port = luaL_checkstring(L, 3);
+ else
+ port = NULL;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ err = srv_update_addr_port(srv, addr, port, "Lua script");
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ if (!err)
+ lua_pushnil(L);
+ else
+ hlua_pushstrippedstring(L, err);
+ return 1;
+}
+
+int hlua_server_shut_sess(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_shutdown_streams(srv, SF_ERR_KILLED);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ return 0;
+}
+
+int hlua_server_set_drain(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_adm_set_drain(srv);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ return 0;
+}
+
+int hlua_server_set_maint(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_adm_set_maint(srv);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ return 0;
+}
+
+int hlua_server_set_ready(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_adm_set_ready(srv);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ return 0;
+}
+
+int hlua_server_check_enable(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (sv->check.state & CHK_ST_CONFIGURED) {
+ sv->check.state |= CHK_ST_ENABLED;
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_check_disable(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (sv->check.state & CHK_ST_CONFIGURED) {
+ sv->check.state &= ~CHK_ST_ENABLED;
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_check_force_up(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (!(sv->track)) {
+ sv->check.health = sv->check.rise + sv->check.fall - 1;
+ srv_set_running(sv, "changed from Lua script", NULL);
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_check_force_nolb(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (!(sv->track)) {
+ sv->check.health = sv->check.rise + sv->check.fall - 1;
+ srv_set_stopping(sv, "changed from Lua script", NULL);
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_check_force_down(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (!(sv->track)) {
+ sv->check.health = 0;
+ srv_set_stopped(sv, "changed from Lua script", NULL);
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_agent_enable(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (sv->agent.state & CHK_ST_CONFIGURED) {
+ sv->agent.state |= CHK_ST_ENABLED;
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_agent_disable(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (sv->agent.state & CHK_ST_CONFIGURED) {
+ sv->agent.state &= ~CHK_ST_ENABLED;
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_agent_force_up(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (sv->agent.state & CHK_ST_ENABLED) {
+ sv->agent.health = sv->agent.rise + sv->agent.fall - 1;
+ srv_set_running(sv, "changed from Lua script", NULL);
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_agent_force_down(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (sv->agent.state & CHK_ST_ENABLED) {
+ sv->agent.health = 0;
+ srv_set_stopped(sv, "changed from Lua script", NULL);
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_fcn_new_proxy(lua_State *L, struct proxy *px)
+{
+ struct server *srv;
+ struct listener *lst;
+ int lid;
+ char buffer[17];
+
+ lua_newtable(L);
+
+ /* Pop a class sesison metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_proxy_ref);
+ lua_setmetatable(L, -2);
+
+ lua_pushlightuserdata(L, px);
+ lua_rawseti(L, -2, 0);
+
+ /* Add proxy name. */
+ lua_pushstring(L, "name");
+ lua_pushstring(L, px->id);
+ lua_settable(L, -3);
+
+ /* Add proxy uuid. */
+ lua_pushstring(L, "uuid");
+ snprintf(buffer, sizeof(buffer), "%d", px->uuid);
+ lua_pushstring(L, buffer);
+ lua_settable(L, -3);
+
+ /* Browse and register servers. */
+ lua_pushstring(L, "servers");
+ lua_newtable(L);
+ for (srv = px->srv; srv; srv = srv->next) {
+ lua_pushstring(L, srv->id);
+ hlua_fcn_new_server(L, srv);
+ lua_settable(L, -3);
+ }
+ lua_settable(L, -3);
+
+ /* Browse and register listeners. */
+ lua_pushstring(L, "listeners");
+ lua_newtable(L);
+ lid = 1;
+ list_for_each_entry(lst, &px->conf.listeners, by_fe) {
+ if (lst->name)
+ lua_pushstring(L, lst->name);
+ else {
+ snprintf(buffer, sizeof(buffer), "sock-%d", lid);
+ lid++;
+ lua_pushstring(L, buffer);
+ }
+ hlua_fcn_new_listener(L, lst);
+ lua_settable(L, -3);
+ }
+ lua_settable(L, -3);
+
+ if (px->table && px->table->id) {
+ lua_pushstring(L, "stktable");
+ hlua_fcn_new_stktable(L, px->table);
+ lua_settable(L, -3);
+ }
+
+ return 1;
+}
+
+static struct proxy *hlua_check_proxy(lua_State *L, int ud)
+{
+ return hlua_checkudata(L, ud, class_proxy_ref);
+}
+
+int hlua_proxy_pause(lua_State *L)
+{
+ struct proxy *px;
+
+ px = hlua_check_proxy(L, 1);
+ /* safe to call without PROXY_LOCK - pause_proxy takes it */
+ pause_proxy(px);
+ return 0;
+}
+
+int hlua_proxy_resume(lua_State *L)
+{
+ struct proxy *px;
+
+ px = hlua_check_proxy(L, 1);
+ /* safe to call without PROXY_LOCK - resume_proxy takes it */
+ resume_proxy(px);
+ return 0;
+}
+
+int hlua_proxy_stop(lua_State *L)
+{
+ struct proxy *px;
+
+ px = hlua_check_proxy(L, 1);
+ /* safe to call without PROXY_LOCK - stop_proxy takes it */
+ stop_proxy(px);
+ return 0;
+}
+
+int hlua_proxy_get_cap(lua_State *L)
+{
+ struct proxy *px;
+ const char *str;
+
+ px = hlua_check_proxy(L, 1);
+ str = proxy_cap_str(px->cap);
+ lua_pushstring(L, str);
+ return 1;
+}
+
+int hlua_proxy_get_stats(lua_State *L)
+{
+ struct proxy *px;
+ int i;
+
+ px = hlua_check_proxy(L, 1);
+ if (px->cap & PR_CAP_BE)
+ stats_fill_be_stats(px, STAT_SHLGNDS, stats, STATS_LEN, NULL);
+ else
+ stats_fill_fe_stats(px, stats, STATS_LEN, NULL);
+ lua_newtable(L);
+ for (i=0; i<ST_F_TOTAL_FIELDS; i++) {
+ lua_pushstring(L, stat_fields[i].name);
+ hlua_fcn_pushfield(L, &stats[i]);
+ lua_settable(L, -3);
+ }
+ return 1;
+}
+
+int hlua_proxy_get_mode(lua_State *L)
+{
+ struct proxy *px;
+ const char *str;
+
+ px = hlua_check_proxy(L, 1);
+ str = proxy_mode_str(px->mode);
+ lua_pushstring(L, str);
+ return 1;
+}
+
+int hlua_proxy_shut_bcksess(lua_State *L)
+{
+ struct proxy *px;
+
+ px = hlua_check_proxy(L, 1);
+ srv_shutdown_backup_streams(px, SF_ERR_KILLED);
+ return 0;
+}
+
+int hlua_fcn_post_init(lua_State *L)
+{
+ struct proxy *px;
+
+ /* get core array. */
+ if (lua_getglobal(L, "core") != LUA_TTABLE)
+ lua_error(L);
+
+ /* Create proxies entry. */
+ lua_pushstring(L, "proxies");
+ lua_newtable(L);
+
+ /* List all proxies. */
+ for (px = proxies_list; px; px = px->next) {
+ if (px->cap & PR_CAP_INT)
+ continue;
+ lua_pushstring(L, px->id);
+ hlua_fcn_new_proxy(L, px);
+ lua_settable(L, -3);
+ }
+
+ /* push "proxies" in "core" */
+ lua_settable(L, -3);
+
+ /* Create proxies entry. */
+ lua_pushstring(L, "frontends");
+ lua_newtable(L);
+
+ /* List all proxies. */
+ for (px = proxies_list; px; px = px->next) {
+ if (!(px->cap & PR_CAP_FE) || (px->cap & PR_CAP_INT))
+ continue;
+ lua_pushstring(L, px->id);
+ hlua_fcn_new_proxy(L, px);
+ lua_settable(L, -3);
+ }
+
+ /* push "frontends" in "core" */
+ lua_settable(L, -3);
+
+ /* Create proxies entry. */
+ lua_pushstring(L, "backends");
+ lua_newtable(L);
+
+ /* List all proxies. */
+ for (px = proxies_list; px; px = px->next) {
+ if (!(px->cap & PR_CAP_BE) || (px->cap & PR_CAP_INT))
+ continue;
+ lua_pushstring(L, px->id);
+ hlua_fcn_new_proxy(L, px);
+ lua_settable(L, -3);
+ }
+
+ /* push "backend" in "core" */
+ lua_settable(L, -3);
+
+ return 1;
+}
+
+/* This Lua function take a string, a list of separators.
+ * It tokenize the input string using the list of separators
+ * as separator.
+ *
+ * The functionreturns a table filled with tokens.
+ */
+int hlua_tokenize(lua_State *L)
+{
+ const char *str;
+ const char *sep;
+ int index;
+ const char *token;
+ const char *p;
+ const char *c;
+ int ignore_empty;
+
+ ignore_empty = 0;
+
+ str = luaL_checkstring(L, 1);
+ sep = luaL_checkstring(L, 2);
+ if (lua_gettop(L) == 3)
+ ignore_empty = hlua_checkboolean(L, 3);
+
+ lua_newtable(L);
+ index = 1;
+ token = str;
+ p = str;
+ while(1) {
+ for (c = sep; *c != '\0'; c++)
+ if (*p == *c)
+ break;
+ if (*p == *c) {
+ if ((!ignore_empty) || (p - token > 0)) {
+ lua_pushlstring(L, token, p - token);
+ lua_rawseti(L, -2, index);
+ index++;
+ }
+ token = p + 1;
+ }
+ if (*p == '\0')
+ break;
+ p++;
+ }
+
+ return 1;
+}
+
+int hlua_parse_addr(lua_State *L)
+{
+ struct net_addr *addr;
+ const char *str = luaL_checkstring(L, 1);
+ unsigned char mask;
+
+ addr = lua_newuserdata(L, sizeof(struct net_addr));
+ if (!addr) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ if (str2net(str, PAT_MF_NO_DNS, &addr->addr.v4.ip, &addr->addr.v4.mask)) {
+ addr->family = AF_INET;
+ return 1;
+ }
+
+ if (str62net(str, &addr->addr.v6.ip, &mask)) {
+ len2mask6(mask, &addr->addr.v6.mask);
+ addr->family = AF_INET6;
+ return 1;
+ }
+
+ lua_pop(L, 1);
+ lua_pushnil(L);
+ return 1;
+}
+
+int hlua_match_addr(lua_State *L)
+{
+ struct net_addr *addr1;
+ struct net_addr *addr2;
+
+ if (!lua_isuserdata(L, 1) ||
+ !lua_isuserdata(L, 2)) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+
+ addr1 = lua_touserdata(L, 1);
+ addr2 = lua_touserdata(L, 2);
+
+ if (addr1->family != addr2->family) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+
+ if (addr1->family == AF_INET) {
+ if ((addr1->addr.v4.ip.s_addr & addr2->addr.v4.mask.s_addr) ==
+ (addr2->addr.v4.ip.s_addr & addr1->addr.v4.mask.s_addr)) {
+ lua_pushboolean(L, 1);
+ return 1;
+ }
+ } else {
+ int i;
+
+ for (i = 0; i < 16; i += 4) {
+ if ((read_u32(&addr1->addr.v6.ip.s6_addr[i]) &
+ read_u32(&addr2->addr.v6.mask.s6_addr[i])) !=
+ (read_u32(&addr2->addr.v6.ip.s6_addr[i]) &
+ read_u32(&addr1->addr.v6.mask.s6_addr[i])))
+ break;
+ }
+ if (i == 16) {
+ lua_pushboolean(L, 1);
+ return 1;
+ }
+ }
+
+ lua_pushboolean(L, 0);
+ return 1;
+}
+
+static struct my_regex **hlua_check_regex(lua_State *L, int ud)
+{
+ return (hlua_checkudata(L, ud, class_regex_ref));
+}
+
+static int hlua_regex_comp(struct lua_State *L)
+{
+ struct my_regex **regex;
+ const char *str;
+ int cs;
+ char *err;
+
+ str = luaL_checkstring(L, 1);
+ luaL_argcheck(L, lua_isboolean(L, 2), 2, NULL);
+ cs = lua_toboolean(L, 2);
+
+ regex = lua_newuserdata(L, sizeof(*regex));
+
+ err = NULL;
+ if (!(*regex = regex_comp(str, cs, 1, &err))) {
+ lua_pushboolean(L, 0); /* status error */
+ lua_pushstring(L, err); /* Reason */
+ free(err);
+ return 2;
+ }
+
+ lua_pushboolean(L, 1); /* Status ok */
+
+ /* Create object */
+ lua_newtable(L);
+ lua_pushvalue(L, -3); /* Get the userdata pointer. */
+ lua_rawseti(L, -2, 0);
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_regex_ref);
+ lua_setmetatable(L, -2);
+ return 2;
+}
+
+static int hlua_regex_exec(struct lua_State *L)
+{
+ struct my_regex **regex;
+ const char *str;
+ size_t len;
+ struct buffer *tmp;
+
+ regex = hlua_check_regex(L, 1);
+ str = luaL_checklstring(L, 2, &len);
+
+ if (!*regex) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+
+ /* Copy the string because regex_exec2 require a 'char *'
+ * and not a 'const char *'.
+ */
+ tmp = get_trash_chunk();
+ if (len >= tmp->size) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+ memcpy(tmp->area, str, len);
+
+ lua_pushboolean(L, regex_exec2(*regex, tmp->area, len));
+
+ return 1;
+}
+
+static int hlua_regex_match(struct lua_State *L)
+{
+ struct my_regex **regex;
+ const char *str;
+ size_t len;
+ regmatch_t pmatch[20];
+ int ret;
+ int i;
+ struct buffer *tmp;
+
+ regex = hlua_check_regex(L, 1);
+ str = luaL_checklstring(L, 2, &len);
+
+ if (!*regex) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+
+ /* Copy the string because regex_exec2 require a 'char *'
+ * and not a 'const char *'.
+ */
+ tmp = get_trash_chunk();
+ if (len >= tmp->size) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+ memcpy(tmp->area, str, len);
+
+ ret = regex_exec_match2(*regex, tmp->area, len, 20, pmatch, 0);
+ lua_pushboolean(L, ret);
+ lua_newtable(L);
+ if (ret) {
+ for (i = 0; i < 20 && pmatch[i].rm_so != -1; i++) {
+ lua_pushlstring(L, str + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
+ lua_rawseti(L, -2, i + 1);
+ }
+ }
+ return 2;
+}
+
+static int hlua_regex_free(struct lua_State *L)
+{
+ struct my_regex **regex;
+
+ regex = hlua_check_regex(L, 1);
+ regex_free(*regex);
+ *regex = NULL;
+ return 0;
+}
+
+int hlua_fcn_reg_core_fcn(lua_State *L)
+{
+ if (!hlua_concat_init(L))
+ return 0;
+
+ hlua_class_function(L, "now", hlua_now);
+ hlua_class_function(L, "http_date", hlua_http_date);
+ hlua_class_function(L, "imf_date", hlua_imf_date);
+ hlua_class_function(L, "rfc850_date", hlua_rfc850_date);
+ hlua_class_function(L, "asctime_date", hlua_asctime_date);
+ hlua_class_function(L, "concat", hlua_concat_new);
+ hlua_class_function(L, "get_info", hlua_get_info);
+ hlua_class_function(L, "parse_addr", hlua_parse_addr);
+ hlua_class_function(L, "match_addr", hlua_match_addr);
+ hlua_class_function(L, "tokenize", hlua_tokenize);
+
+ /* Create regex object. */
+ lua_newtable(L);
+ hlua_class_function(L, "new", hlua_regex_comp);
+
+ lua_newtable(L); /* The metatable. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+ hlua_class_function(L, "exec", hlua_regex_exec);
+ hlua_class_function(L, "match", hlua_regex_match);
+ lua_rawset(L, -3); /* -> META["__index"] = TABLE */
+ hlua_class_function(L, "__gc", hlua_regex_free);
+
+ lua_pushvalue(L, -1); /* Duplicate the metatable reference. */
+ class_regex_ref = hlua_register_metatable(L, CLASS_REGEX);
+
+ lua_setmetatable(L, -2);
+ lua_setglobal(L, CLASS_REGEX); /* Create global object called Regex */
+
+ /* Create stktable object. */
+ lua_newtable(L);
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+ hlua_class_function(L, "info", hlua_stktable_info);
+ hlua_class_function(L, "lookup", hlua_stktable_lookup);
+ hlua_class_function(L, "dump", hlua_stktable_dump);
+ lua_settable(L, -3); /* -> META["__index"] = TABLE */
+ class_stktable_ref = hlua_register_metatable(L, CLASS_STKTABLE);
+
+ /* Create listener object. */
+ lua_newtable(L);
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+ hlua_class_function(L, "get_stats", hlua_listener_get_stats);
+ lua_settable(L, -3); /* -> META["__index"] = TABLE */
+ class_listener_ref = hlua_register_metatable(L, CLASS_LISTENER);
+
+ /* Create server object. */
+ lua_newtable(L);
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+ hlua_class_function(L, "is_draining", hlua_server_is_draining);
+ hlua_class_function(L, "set_maxconn", hlua_server_set_maxconn);
+ hlua_class_function(L, "get_maxconn", hlua_server_get_maxconn);
+ hlua_class_function(L, "set_weight", hlua_server_set_weight);
+ hlua_class_function(L, "get_weight", hlua_server_get_weight);
+ hlua_class_function(L, "set_addr", hlua_server_set_addr);
+ hlua_class_function(L, "get_addr", hlua_server_get_addr);
+ hlua_class_function(L, "get_stats", hlua_server_get_stats);
+ hlua_class_function(L, "shut_sess", hlua_server_shut_sess);
+ hlua_class_function(L, "set_drain", hlua_server_set_drain);
+ hlua_class_function(L, "set_maint", hlua_server_set_maint);
+ hlua_class_function(L, "set_ready", hlua_server_set_ready);
+ hlua_class_function(L, "check_enable", hlua_server_check_enable);
+ hlua_class_function(L, "check_disable", hlua_server_check_disable);
+ hlua_class_function(L, "check_force_up", hlua_server_check_force_up);
+ hlua_class_function(L, "check_force_nolb", hlua_server_check_force_nolb);
+ hlua_class_function(L, "check_force_down", hlua_server_check_force_down);
+ hlua_class_function(L, "agent_enable", hlua_server_agent_enable);
+ hlua_class_function(L, "agent_disable", hlua_server_agent_disable);
+ hlua_class_function(L, "agent_force_up", hlua_server_agent_force_up);
+ hlua_class_function(L, "agent_force_down", hlua_server_agent_force_down);
+ lua_settable(L, -3); /* -> META["__index"] = TABLE */
+ class_server_ref = hlua_register_metatable(L, CLASS_SERVER);
+
+ /* Create proxy object. */
+ lua_newtable(L);
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+ hlua_class_function(L, "pause", hlua_proxy_pause);
+ hlua_class_function(L, "resume", hlua_proxy_resume);
+ hlua_class_function(L, "stop", hlua_proxy_stop);
+ hlua_class_function(L, "shut_bcksess", hlua_proxy_shut_bcksess);
+ hlua_class_function(L, "get_cap", hlua_proxy_get_cap);
+ hlua_class_function(L, "get_mode", hlua_proxy_get_mode);
+ hlua_class_function(L, "get_stats", hlua_proxy_get_stats);
+ lua_settable(L, -3); /* -> META["__index"] = TABLE */
+ class_proxy_ref = hlua_register_metatable(L, CLASS_PROXY);
+
+ return 5;
+}
diff --git a/src/hpack-dec.c b/src/hpack-dec.c
new file mode 100644
index 0000000..052a7c3
--- /dev/null
+++ b/src/hpack-dec.c
@@ -0,0 +1,475 @@
+/*
+ * HPACK decompressor (RFC7541)
+ *
+ * Copyright (C) 2014-2017 Willy Tarreau <willy@haproxy.org>
+ * Copyright (C) 2017 HAProxy Technologies
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <import/ist.h>
+#include <haproxy/chunk.h>
+#include <haproxy/global.h>
+#include <haproxy/h2.h>
+#include <haproxy/hpack-dec.h>
+#include <haproxy/hpack-huff.h>
+#include <haproxy/hpack-tbl.h>
+#include <haproxy/tools.h>
+
+
+#if defined(DEBUG_HPACK)
+#define hpack_debug_printf printf
+#define hpack_debug_hexdump debug_hexdump
+#else
+#define hpack_debug_printf(...) do { } while (0)
+#define hpack_debug_hexdump(...) do { } while (0)
+#endif
+
+/* reads a varint from <raw>'s lowest <b> bits and <len> bytes max (raw included).
+ * returns the 32-bit value on success after updating raw_in and len_in. Forces
+ * len_in to (uint32_t)-1 on truncated input.
+ */
+static uint32_t get_var_int(const uint8_t **raw_in, uint32_t *len_in, int b)
+{
+ uint32_t ret = 0;
+ int len = *len_in;
+ const uint8_t *raw = *raw_in;
+ uint8_t shift = 0;
+
+ len--;
+ ret = *(raw++) & ((1 << b) - 1);
+ if (ret != (uint32_t)((1 << b) - 1))
+ goto end;
+
+ while (len && (*raw & 128)) {
+ ret += ((uint32_t)(*raw++) & 127) << shift;
+ shift += 7;
+ len--;
+ }
+
+ /* last 7 bits */
+ if (!len)
+ goto too_short;
+ len--;
+ ret += ((uint32_t)(*raw++) & 127) << shift;
+
+ end:
+ *raw_in = raw;
+ *len_in = len;
+ return ret;
+
+ too_short:
+ *len_in = (uint32_t)-1;
+ return 0;
+}
+
+/* returns the pseudo-header <idx> corresponds to among the following values :
+ * - 0 = unknown, the header's string needs to be used instead
+ * - 1 = ":authority"
+ * - 2 = ":method"
+ * - 3 = ":path"
+ * - 4 = ":scheme"
+ * - 5 = ":status"
+ */
+static inline int hpack_idx_to_phdr(uint32_t idx)
+{
+ if (idx > 14)
+ return 0;
+
+ idx >>= 1;
+ idx <<= 2;
+ return (0x55554321U >> idx) & 0xF;
+}
+
+/* If <idx> designates a static header, returns <in>. Otherwise allocates some
+ * room from chunk <store> to duplicate <in> into it and returns the string
+ * allocated there. In case of allocation failure, returns a string whose
+ * pointer is NULL.
+ */
+static inline struct ist hpack_alloc_string(struct buffer *store, uint32_t idx,
+ struct ist in)
+{
+ struct ist out;
+
+ if (idx < HPACK_SHT_SIZE)
+ return in;
+
+ out.len = in.len;
+ out.ptr = chunk_newstr(store);
+ if (unlikely(!isttest(out)))
+ return out;
+
+ if (unlikely(store->data + out.len > store->size)) {
+ out.ptr = NULL;
+ return out;
+ }
+
+ store->data += out.len;
+ memcpy(out.ptr, in.ptr, out.len);
+ return out;
+}
+
+/* decode an HPACK frame starting at <raw> for <len> bytes, using the dynamic
+ * headers table <dht>, produces the output into list <list> of <list_size>
+ * entries max, and uses pre-allocated buffer <tmp> for temporary storage (some
+ * list elements will point to it). Some <list> name entries may be made of a
+ * NULL pointer and a len, in which case they will designate a pseudo header
+ * index according to the values returned by hpack_idx_to_phdr() above. The
+ * number of <list> entries used is returned on success, or <0 on failure, with
+ * the opposite one of the HPACK_ERR_* codes. A last element is always zeroed
+ * and is not counted in the number of returned entries. This way the caller
+ * can use list[].n.len == 0 as a marker for the end of list.
+ */
+int hpack_decode_frame(struct hpack_dht *dht, const uint8_t *raw, uint32_t len,
+ struct http_hdr *list, int list_size,
+ struct buffer *tmp)
+{
+ uint32_t idx;
+ uint32_t nlen;
+ uint32_t vlen;
+ uint8_t huff;
+ struct ist name;
+ struct ist value;
+ int must_index;
+ int ret;
+
+ hpack_debug_hexdump(stderr, "[HPACK-DEC] ", (const char *)raw, 0, len);
+
+ chunk_reset(tmp);
+ ret = 0;
+ while (len) {
+ int __maybe_unused code = *raw; /* first byte, only for debugging */
+
+ must_index = 0;
+ if (*raw >= 0x80) {
+ /* indexed header field */
+ if (*raw == 0x80) {
+ hpack_debug_printf("unhandled code 0x%02x (raw=%p, len=%u)\n", *raw, raw, len);
+ ret = -HPACK_ERR_UNKNOWN_OPCODE;
+ goto leave;
+ }
+
+ hpack_debug_printf("%02x: p14: indexed header field : ", code);
+
+ idx = get_var_int(&raw, &len, 7);
+ if (len == (uint32_t)-1) { // truncated
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+
+ hpack_debug_printf(" idx=%u ", idx);
+
+ if (!hpack_valid_idx(dht, idx)) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ value = hpack_alloc_string(tmp, idx, hpack_idx_to_value(dht, idx));
+ if (!isttest(value)) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ /* here we don't index so we can always keep the pseudo header number */
+ name = ist2(NULL, hpack_idx_to_phdr(idx));
+
+ if (!name.len) {
+ name = hpack_alloc_string(tmp, idx, hpack_idx_to_name(dht, idx));
+ if (!isttest(name)) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+ }
+ /* <name> and <value> are now set and point to stable values */
+ }
+ else if (*raw >= 0x20 && *raw <= 0x3f) {
+ /* max dyn table size change */
+ hpack_debug_printf("%02x: p18: dynamic table size update : ", code);
+
+ if (ret) {
+ /* 7541#4.2.1 : DHT size update must only be at the beginning */
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ idx = get_var_int(&raw, &len, 5);
+ if (len == (uint32_t)-1) { // truncated
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+ hpack_debug_printf(" new len=%u\n", idx);
+
+ if (idx > dht->size) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_INVALID_ARGUMENT;
+ goto leave;
+ }
+ continue;
+ }
+ else if (!(*raw & (*raw - 0x10))) {
+ /* 0x00, 0x10, and 0x40 (0x20 and 0x80 were already handled above) */
+
+ /* literal header field without/never/with incremental indexing -- literal name */
+ if (*raw == 0x00)
+ hpack_debug_printf("%02x: p17: literal without indexing : ", code);
+ else if (*raw == 0x10)
+ hpack_debug_printf("%02x: p18: literal never indexed : ", code);
+ else if (*raw == 0x40)
+ hpack_debug_printf("%02x: p16: literal with indexing : ", code);
+
+ if (*raw == 0x40)
+ must_index = 1;
+
+ raw++; len--;
+
+ /* retrieve name */
+ if (!len) { // truncated
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+
+ huff = *raw & 0x80;
+ nlen = get_var_int(&raw, &len, 7);
+ if (len == (uint32_t)-1 || len < nlen) { // truncated
+ hpack_debug_printf("##ERR@%d## (truncated): nlen=%d len=%d\n",
+ __LINE__, (int)nlen, (int)len);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+
+ name = ist2(raw, nlen);
+
+ raw += nlen;
+ len -= nlen;
+
+ if (huff) {
+ char *ntrash = chunk_newstr(tmp);
+ if (!ntrash) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ nlen = huff_dec((const uint8_t *)name.ptr, name.len, ntrash,
+ tmp->size - tmp->data);
+ if (nlen == (uint32_t)-1) {
+ hpack_debug_printf("2: can't decode huffman.\n");
+ ret = -HPACK_ERR_HUFFMAN;
+ goto leave;
+ }
+ hpack_debug_printf(" [name huff %d->%d] ", (int)name.len, (int)nlen);
+
+ tmp->data += nlen; // make room for the value
+ name = ist2(ntrash, nlen);
+ }
+
+ /* retrieve value */
+ if (!len) { // truncated
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+
+ huff = *raw & 0x80;
+ vlen = get_var_int(&raw, &len, 7);
+ if (len == (uint32_t)-1 || len < vlen) { // truncated
+ hpack_debug_printf("##ERR@%d## : vlen=%d len=%d\n",
+ __LINE__, (int)vlen, (int)len);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+
+ value = ist2(raw, vlen);
+ raw += vlen;
+ len -= vlen;
+
+ if (huff) {
+ char *vtrash = chunk_newstr(tmp);
+ if (!vtrash) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ vlen = huff_dec((const uint8_t *)value.ptr, value.len, vtrash,
+ tmp->size - tmp->data);
+ if (vlen == (uint32_t)-1) {
+ hpack_debug_printf("3: can't decode huffman.\n");
+ ret = -HPACK_ERR_HUFFMAN;
+ goto leave;
+ }
+ hpack_debug_printf(" [value huff %d->%d] ", (int)value.len, (int)vlen);
+
+ tmp->data += vlen; // make room for the value
+ value = ist2(vtrash, vlen);
+ }
+
+ /* <name> and <value> are correctly filled here */
+ }
+ else {
+ /* 0x01..0x0f : literal header field without indexing -- indexed name */
+ /* 0x11..0x1f : literal header field never indexed -- indexed name */
+ /* 0x41..0x7f : literal header field with incremental indexing -- indexed name */
+
+ if (*raw <= 0x0f)
+ hpack_debug_printf("%02x: p16: literal without indexing -- indexed name : ", code);
+ else if (*raw >= 0x41)
+ hpack_debug_printf("%02x: p15: literal with indexing -- indexed name : ", code);
+ else
+ hpack_debug_printf("%02x: p16: literal never indexed -- indexed name : ", code);
+
+ /* retrieve name index */
+ if (*raw >= 0x41) {
+ must_index = 1;
+ idx = get_var_int(&raw, &len, 6);
+ }
+ else
+ idx = get_var_int(&raw, &len, 4);
+
+ hpack_debug_printf(" idx=%u ", idx);
+
+ if (len == (uint32_t)-1 || !len) { // truncated
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+
+ if (!hpack_valid_idx(dht, idx)) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ /* retrieve value */
+ huff = *raw & 0x80;
+ vlen = get_var_int(&raw, &len, 7);
+ if (len == (uint32_t)-1 || len < vlen) { // truncated
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+
+ value = ist2(raw, vlen);
+ raw += vlen;
+ len -= vlen;
+
+ if (huff) {
+ char *vtrash = chunk_newstr(tmp);
+ if (!vtrash) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ vlen = huff_dec((const uint8_t *)value.ptr, value.len, vtrash,
+ tmp->size - tmp->data);
+ if (vlen == (uint32_t)-1) {
+ hpack_debug_printf("##ERR@%d## can't decode huffman : ilen=%d osize=%d\n",
+ __LINE__, (int)value.len,
+ (int)(tmp->size - tmp->data));
+ hpack_debug_hexdump(stderr, "[HUFFMAN] ", value.ptr, 0, value.len);
+ ret = -HPACK_ERR_HUFFMAN;
+ goto leave;
+ }
+ tmp->data += vlen; // make room for the value
+ value = ist2(vtrash, vlen);
+ }
+
+ name = IST_NULL;
+ if (!must_index)
+ name.len = hpack_idx_to_phdr(idx);
+
+ if (!name.len) {
+ name = hpack_alloc_string(tmp, idx, hpack_idx_to_name(dht, idx));
+ if (!isttest(name)) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+ }
+ /* <name> and <value> are correctly filled here */
+ }
+
+ /* We must not accept empty header names (forbidden by the spec and used
+ * as a list termination).
+ */
+ if (!name.len) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_INVALID_ARGUMENT;
+ goto leave;
+ }
+
+ /* here's what we have here :
+ * - name.len > 0
+ * - value is filled with either const data or data allocated from tmp
+ * - name.ptr == NULL && !must_index : known pseudo-header #name.len
+ * - name.ptr != NULL || must_index : general header, unknown pseudo-header or index needed
+ */
+ if (ret >= list_size) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ list[ret].n = name;
+ list[ret].v = value;
+ ret++;
+
+ if (must_index && hpack_dht_insert(dht, name, value) < 0) {
+ hpack_debug_printf("failed to find some room in the dynamic table\n");
+ ret = -HPACK_ERR_DHT_INSERT_FAIL;
+ goto leave;
+ }
+
+ hpack_debug_printf("\e[1;34m%s\e[0m: ",
+ isttest(name) ? istpad(trash.area, name).ptr : h2_phdr_to_str(name.len));
+
+ hpack_debug_printf("\e[1;35m%s\e[0m [mustidx=%d, used=%d] [n=(%p,%d) v=(%p,%d)]\n",
+ istpad(trash.area, value).ptr, must_index,
+ dht->used,
+ name.ptr, (int)name.len, value.ptr, (int)value.len);
+ }
+
+ if (ret >= list_size) {
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ /* put an end marker */
+ list[ret].n = list[ret].v = IST_NULL;
+ ret++;
+
+ leave:
+ hpack_debug_printf("-- done: ret=%d list_size=%d --\n", (int)ret, (int)list_size);
+ return ret;
+}
diff --git a/src/hpack-enc.c b/src/hpack-enc.c
new file mode 100644
index 0000000..3ab21bc
--- /dev/null
+++ b/src/hpack-enc.c
@@ -0,0 +1,210 @@
+/*
+ * HPACK decompressor (RFC7541)
+ *
+ * Copyright (C) 2014-2017 Willy Tarreau <willy@haproxy.org>
+ * Copyright (C) 2017 HAProxy Technologies
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <import/ist.h>
+#include <haproxy/hpack-enc.h>
+#include <haproxy/http-hdr-t.h>
+
+/*
+ * HPACK encoding: these tables were generated using gen-enc.c
+ */
+
+/* encoding of stream of compressed headers. This stream is composed of series
+ * of <len:8b> <index:8b> <name:<len>*8b>.
+ */
+const char hpack_enc_stream[666] = {
+ /* 0: */ 0x03, 0x15, 0x61, 0x67, 0x65, 0x03, 0x3c, 0x76,
+ /* 8: */ 0x69, 0x61, 0x04, 0x21, 0x64, 0x61, 0x74, 0x65,
+ /* 16: */ 0x04, 0x26, 0x68, 0x6f, 0x73, 0x74, 0x04, 0x22,
+ /* 24: */ 0x65, 0x74, 0x61, 0x67, 0x04, 0x25, 0x66, 0x72,
+ /* 32: */ 0x6f, 0x6d, 0x04, 0x2d, 0x6c, 0x69, 0x6e, 0x6b,
+ /* 40: */ 0x04, 0x3b, 0x76, 0x61, 0x72, 0x79, 0x05, 0x04,
+ /* 48: */ 0x3a, 0x70, 0x61, 0x74, 0x68, 0x05, 0x16, 0x61,
+ /* 56: */ 0x6c, 0x6c, 0x6f, 0x77, 0x05, 0x32, 0x72, 0x61,
+ /* 64: */ 0x6e, 0x67, 0x65, 0x06, 0x13, 0x61, 0x63, 0x63,
+ /* 72: */ 0x65, 0x70, 0x74, 0x06, 0x36, 0x73, 0x65, 0x72,
+ /* 80: */ 0x76, 0x65, 0x72, 0x06, 0x20, 0x63, 0x6f, 0x6f,
+ /* 88: */ 0x6b, 0x69, 0x65, 0x06, 0x23, 0x65, 0x78, 0x70,
+ /* 96: */ 0x65, 0x63, 0x74, 0x07, 0x33, 0x72, 0x65, 0x66,
+ /* 104: */ 0x65, 0x72, 0x65, 0x72, 0x07, 0x24, 0x65, 0x78,
+ /* 112: */ 0x70, 0x69, 0x72, 0x65, 0x73, 0x07, 0x02, 0x3a,
+ /* 120: */ 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x07, 0x06,
+ /* 128: */ 0x3a, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x65, 0x07,
+ /* 136: */ 0x08, 0x3a, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73,
+ /* 144: */ 0x07, 0x34, 0x72, 0x65, 0x66, 0x72, 0x65, 0x73,
+ /* 152: */ 0x68, 0x08, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x74,
+ /* 160: */ 0x69, 0x6f, 0x6e, 0x08, 0x27, 0x69, 0x66, 0x2d,
+ /* 168: */ 0x6d, 0x61, 0x74, 0x63, 0x68, 0x08, 0x2a, 0x69,
+ /* 176: */ 0x66, 0x2d, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x0a,
+ /* 184: */ 0x3a, 0x75, 0x73, 0x65, 0x72, 0x2d, 0x61, 0x67,
+ /* 192: */ 0x65, 0x6e, 0x74, 0x0a, 0x37, 0x73, 0x65, 0x74,
+ /* 200: */ 0x2d, 0x63, 0x6f, 0x6f, 0x6b, 0x69, 0x65, 0x0a,
+ /* 208: */ 0x01, 0x3a, 0x61, 0x75, 0x74, 0x68, 0x6f, 0x72,
+ /* 216: */ 0x69, 0x74, 0x79, 0x0b, 0x35, 0x72, 0x65, 0x74,
+ /* 224: */ 0x72, 0x79, 0x2d, 0x61, 0x66, 0x74, 0x65, 0x72,
+ /* 232: */ 0x0c, 0x1f, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
+ /* 240: */ 0x74, 0x2d, 0x74, 0x79, 0x70, 0x65, 0x0c, 0x2f,
+ /* 248: */ 0x6d, 0x61, 0x78, 0x2d, 0x66, 0x6f, 0x72, 0x77,
+ /* 256: */ 0x61, 0x72, 0x64, 0x73, 0x0d, 0x18, 0x63, 0x61,
+ /* 264: */ 0x63, 0x68, 0x65, 0x2d, 0x63, 0x6f, 0x6e, 0x74,
+ /* 272: */ 0x72, 0x6f, 0x6c, 0x0d, 0x2c, 0x6c, 0x61, 0x73,
+ /* 280: */ 0x74, 0x2d, 0x6d, 0x6f, 0x64, 0x69, 0x66, 0x69,
+ /* 288: */ 0x65, 0x64, 0x0d, 0x12, 0x61, 0x63, 0x63, 0x65,
+ /* 296: */ 0x70, 0x74, 0x2d, 0x72, 0x61, 0x6e, 0x67, 0x65,
+ /* 304: */ 0x73, 0x0d, 0x29, 0x69, 0x66, 0x2d, 0x6e, 0x6f,
+ /* 312: */ 0x6e, 0x65, 0x2d, 0x6d, 0x61, 0x74, 0x63, 0x68,
+ /* 320: */ 0x0d, 0x17, 0x61, 0x75, 0x74, 0x68, 0x6f, 0x72,
+ /* 328: */ 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x0d,
+ /* 336: */ 0x1e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
+ /* 344: */ 0x2d, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x0e, 0x1c,
+ /* 352: */ 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x2d,
+ /* 360: */ 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x0e, 0x0f,
+ /* 368: */ 0x61, 0x63, 0x63, 0x65, 0x70, 0x74, 0x2d, 0x63,
+ /* 376: */ 0x68, 0x61, 0x72, 0x73, 0x65, 0x74, 0x0f, 0x10,
+ /* 384: */ 0x61, 0x63, 0x63, 0x65, 0x70, 0x74, 0x2d, 0x65,
+ /* 392: */ 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x0f,
+ /* 400: */ 0x11, 0x61, 0x63, 0x63, 0x65, 0x70, 0x74, 0x2d,
+ /* 408: */ 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65,
+ /* 416: */ 0x10, 0x1a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
+ /* 424: */ 0x74, 0x2d, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x69,
+ /* 432: */ 0x6e, 0x67, 0x10, 0x1b, 0x63, 0x6f, 0x6e, 0x74,
+ /* 440: */ 0x65, 0x6e, 0x74, 0x2d, 0x6c, 0x61, 0x6e, 0x67,
+ /* 448: */ 0x75, 0x61, 0x67, 0x65, 0x10, 0x1d, 0x63, 0x6f,
+ /* 456: */ 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x2d, 0x6c, 0x6f,
+ /* 464: */ 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x10, 0x3d,
+ /* 472: */ 0x77, 0x77, 0x77, 0x2d, 0x61, 0x75, 0x74, 0x68,
+ /* 480: */ 0x65, 0x6e, 0x74, 0x69, 0x63, 0x61, 0x74, 0x65,
+ /* 488: */ 0x11, 0x39, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x66,
+ /* 496: */ 0x65, 0x72, 0x2d, 0x65, 0x6e, 0x63, 0x6f, 0x64,
+ /* 504: */ 0x69, 0x6e, 0x67, 0x11, 0x28, 0x69, 0x66, 0x2d,
+ /* 512: */ 0x6d, 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, 0x64,
+ /* 520: */ 0x2d, 0x73, 0x69, 0x6e, 0x63, 0x65, 0x12, 0x30,
+ /* 528: */ 0x70, 0x72, 0x6f, 0x78, 0x79, 0x2d, 0x61, 0x75,
+ /* 536: */ 0x74, 0x68, 0x65, 0x6e, 0x74, 0x69, 0x63, 0x61,
+ /* 544: */ 0x74, 0x65, 0x13, 0x19, 0x63, 0x6f, 0x6e, 0x74,
+ /* 552: */ 0x65, 0x6e, 0x74, 0x2d, 0x64, 0x69, 0x73, 0x70,
+ /* 560: */ 0x6f, 0x73, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x13,
+ /* 568: */ 0x2b, 0x69, 0x66, 0x2d, 0x75, 0x6e, 0x6d, 0x6f,
+ /* 576: */ 0x64, 0x69, 0x66, 0x69, 0x65, 0x64, 0x2d, 0x73,
+ /* 584: */ 0x69, 0x6e, 0x63, 0x65, 0x13, 0x31, 0x70, 0x72,
+ /* 592: */ 0x6f, 0x78, 0x79, 0x2d, 0x61, 0x75, 0x74, 0x68,
+ /* 600: */ 0x6f, 0x72, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f,
+ /* 608: */ 0x6e, 0x19, 0x38, 0x73, 0x74, 0x72, 0x69, 0x63,
+ /* 616: */ 0x74, 0x2d, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70,
+ /* 624: */ 0x6f, 0x72, 0x74, 0x2d, 0x73, 0x65, 0x63, 0x75,
+ /* 632: */ 0x72, 0x69, 0x74, 0x79, 0x1b, 0x14, 0x61, 0x63,
+ /* 640: */ 0x63, 0x65, 0x73, 0x73, 0x2d, 0x63, 0x6f, 0x6e,
+ /* 648: */ 0x74, 0x72, 0x6f, 0x6c, 0x2d, 0x61, 0x6c, 0x6c,
+ /* 656: */ 0x6f, 0x77, 0x2d, 0x6f, 0x72, 0x69, 0x67, 0x69,
+ /* 664: */ 0x6e, 0x00,
+};
+
+/* This points to the first position in table hpack_enc_stream[] of a header
+ * of the same length.
+ */
+const signed short hpack_pos_len[32] = {
+ /* 0: */ -1, -1, -1, 0, 10, 46, 67, 99,
+ /* 8: */ 153, -1, 183, 219, 232, 260, 350, 382,
+ /* 16: */ 416, 488, 526, 546, -1, -1, -1, -1,
+ /* 24: */ -1, 609, -1, 636, -1, -1, -1, -1,
+};
+
+/* Tries to encode header whose name is <n> and value <v> into the chunk <out>.
+ * Returns non-zero on success, 0 on failure (buffer full).
+ */
+int hpack_encode_header(struct buffer *out, const struct ist n,
+ const struct ist v)
+{
+ int len = out->data;
+ int size = out->size;
+ int pos;
+
+ if (len >= size)
+ return 0;
+
+ /* look for the header field <n> in the static table */
+ if (n.len >= sizeof(hpack_pos_len) / sizeof(hpack_pos_len[0]))
+ goto make_literal;
+
+ pos = hpack_pos_len[n.len];
+ if (pos >= 0) {
+ /* At least one header field of this length exist */
+ do {
+ char idx;
+
+ pos++;
+ idx = hpack_enc_stream[pos++];
+ pos += n.len;
+ if (isteq(ist2(&hpack_enc_stream[pos - n.len], n.len), n)) {
+ /* emit literal with indexing (7541#6.2.1) :
+ * [ 0 | 1 | Index (6+) ]
+ */
+ out->area[len++] = idx | 0x40;
+ goto emit_value;
+ }
+ } while ((unsigned char)hpack_enc_stream[pos] == n.len);
+ }
+
+ make_literal:
+ if (likely(n.len < 127 && len + 2 + n.len <= size)) {
+ out->area[len++] = 0x00; /* literal without indexing -- new name */
+ out->area[len++] = n.len; /* single-byte length encoding */
+ ist2bin(out->area + len, n);
+ len += n.len;
+ }
+ else if (hpack_len_to_bytes(n.len) &&
+ len + 1 + hpack_len_to_bytes(n.len) + n.len <= size) {
+ out->area[len++] = 0x00; /* literal without indexing -- new name */
+ len = hpack_encode_len(out->area, len, n.len);
+ ist2bin(out->area + len, n);
+ len += n.len;
+ }
+ else {
+ /* header field name too large for the buffer */
+ return 0;
+ }
+
+ emit_value:
+ /* copy literal header field value */
+ if (!hpack_len_to_bytes(v.len) ||
+ len + hpack_len_to_bytes(v.len) + v.len > size) {
+ /* header value too large for the buffer */
+ return 0;
+ }
+
+ len = hpack_encode_len(out->area, len, v.len);
+ memcpy(out->area + len, v.ptr, v.len);
+ len += v.len;
+
+ out->data = len;
+ return 1;
+}
diff --git a/src/hpack-huff.c b/src/hpack-huff.c
new file mode 100644
index 0000000..35fa52d
--- /dev/null
+++ b/src/hpack-huff.c
@@ -0,0 +1,1532 @@
+/*
+ * Huffman decoding and encoding for HPACK (RFC7541)
+ *
+ * Copyright (C) 2014-2017 Willy Tarreau <willy@haproxy.org>
+ * Copyright (C) 2017 HAProxy Technologies
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/hpack-huff.h>
+#include <haproxy/net_helper.h>
+
+struct huff {
+ uint32_t c; /* code point */
+ int b; /* bits */
+};
+
+struct rht {
+ uint8_t c; // code or 0 if length > 8 + position*8
+ uint8_t l; // length in bits
+};
+
+/* huffman table as per RFC7541 appendix B */
+static const struct huff ht[257] = {
+ [ 0] = { .c = 0x00001ff8, .b = 13 },
+ [ 1] = { .c = 0x007fffd8, .b = 23 },
+ [ 2] = { .c = 0x0fffffe2, .b = 28 },
+ [ 3] = { .c = 0x0fffffe3, .b = 28 },
+ [ 4] = { .c = 0x0fffffe4, .b = 28 },
+ [ 5] = { .c = 0x0fffffe5, .b = 28 },
+ [ 6] = { .c = 0x0fffffe6, .b = 28 },
+ [ 7] = { .c = 0x0fffffe7, .b = 28 },
+ [ 8] = { .c = 0x0fffffe8, .b = 28 },
+ [ 9] = { .c = 0x00ffffea, .b = 24 },
+ [ 10] = { .c = 0x3ffffffc, .b = 30 },
+ [ 11] = { .c = 0x0fffffe9, .b = 28 },
+ [ 12] = { .c = 0x0fffffea, .b = 28 },
+ [ 13] = { .c = 0x3ffffffd, .b = 30 },
+ [ 14] = { .c = 0x0fffffeb, .b = 28 },
+ [ 15] = { .c = 0x0fffffec, .b = 28 },
+ [ 16] = { .c = 0x0fffffed, .b = 28 },
+ [ 17] = { .c = 0x0fffffee, .b = 28 },
+ [ 18] = { .c = 0x0fffffef, .b = 28 },
+ [ 19] = { .c = 0x0ffffff0, .b = 28 },
+ [ 20] = { .c = 0x0ffffff1, .b = 28 },
+ [ 21] = { .c = 0x0ffffff2, .b = 28 },
+ [ 22] = { .c = 0x3ffffffe, .b = 30 },
+ [ 23] = { .c = 0x0ffffff3, .b = 28 },
+ [ 24] = { .c = 0x0ffffff4, .b = 28 },
+ [ 25] = { .c = 0x0ffffff5, .b = 28 },
+ [ 26] = { .c = 0x0ffffff6, .b = 28 },
+ [ 27] = { .c = 0x0ffffff7, .b = 28 },
+ [ 28] = { .c = 0x0ffffff8, .b = 28 },
+ [ 29] = { .c = 0x0ffffff9, .b = 28 },
+ [ 30] = { .c = 0x0ffffffa, .b = 28 },
+ [ 31] = { .c = 0x0ffffffb, .b = 28 },
+ [ 32] = { .c = 0x00000014, .b = 6 },
+ [ 33] = { .c = 0x000003f8, .b = 10 },
+ [ 34] = { .c = 0x000003f9, .b = 10 },
+ [ 35] = { .c = 0x00000ffa, .b = 12 },
+ [ 36] = { .c = 0x00001ff9, .b = 13 },
+ [ 37] = { .c = 0x00000015, .b = 6 },
+ [ 38] = { .c = 0x000000f8, .b = 8 },
+ [ 39] = { .c = 0x000007fa, .b = 11 },
+ [ 40] = { .c = 0x000003fa, .b = 10 },
+ [ 41] = { .c = 0x000003fb, .b = 10 },
+ [ 42] = { .c = 0x000000f9, .b = 8 },
+ [ 43] = { .c = 0x000007fb, .b = 11 },
+ [ 44] = { .c = 0x000000fa, .b = 8 },
+ [ 45] = { .c = 0x00000016, .b = 6 },
+ [ 46] = { .c = 0x00000017, .b = 6 },
+ [ 47] = { .c = 0x00000018, .b = 6 },
+ [ 48] = { .c = 0x00000000, .b = 5 },
+ [ 49] = { .c = 0x00000001, .b = 5 },
+ [ 50] = { .c = 0x00000002, .b = 5 },
+ [ 51] = { .c = 0x00000019, .b = 6 },
+ [ 52] = { .c = 0x0000001a, .b = 6 },
+ [ 53] = { .c = 0x0000001b, .b = 6 },
+ [ 54] = { .c = 0x0000001c, .b = 6 },
+ [ 55] = { .c = 0x0000001d, .b = 6 },
+ [ 56] = { .c = 0x0000001e, .b = 6 },
+ [ 57] = { .c = 0x0000001f, .b = 6 },
+ [ 58] = { .c = 0x0000005c, .b = 7 },
+ [ 59] = { .c = 0x000000fb, .b = 8 },
+ [ 60] = { .c = 0x00007ffc, .b = 15 },
+ [ 61] = { .c = 0x00000020, .b = 6 },
+ [ 62] = { .c = 0x00000ffb, .b = 12 },
+ [ 63] = { .c = 0x000003fc, .b = 10 },
+ [ 64] = { .c = 0x00001ffa, .b = 13 },
+ [ 65] = { .c = 0x00000021, .b = 6 },
+ [ 66] = { .c = 0x0000005d, .b = 7 },
+ [ 67] = { .c = 0x0000005e, .b = 7 },
+ [ 68] = { .c = 0x0000005f, .b = 7 },
+ [ 69] = { .c = 0x00000060, .b = 7 },
+ [ 70] = { .c = 0x00000061, .b = 7 },
+ [ 71] = { .c = 0x00000062, .b = 7 },
+ [ 72] = { .c = 0x00000063, .b = 7 },
+ [ 73] = { .c = 0x00000064, .b = 7 },
+ [ 74] = { .c = 0x00000065, .b = 7 },
+ [ 75] = { .c = 0x00000066, .b = 7 },
+ [ 76] = { .c = 0x00000067, .b = 7 },
+ [ 77] = { .c = 0x00000068, .b = 7 },
+ [ 78] = { .c = 0x00000069, .b = 7 },
+ [ 79] = { .c = 0x0000006a, .b = 7 },
+ [ 80] = { .c = 0x0000006b, .b = 7 },
+ [ 81] = { .c = 0x0000006c, .b = 7 },
+ [ 82] = { .c = 0x0000006d, .b = 7 },
+ [ 83] = { .c = 0x0000006e, .b = 7 },
+ [ 84] = { .c = 0x0000006f, .b = 7 },
+ [ 85] = { .c = 0x00000070, .b = 7 },
+ [ 86] = { .c = 0x00000071, .b = 7 },
+ [ 87] = { .c = 0x00000072, .b = 7 },
+ [ 88] = { .c = 0x000000fc, .b = 8 },
+ [ 89] = { .c = 0x00000073, .b = 7 },
+ [ 90] = { .c = 0x000000fd, .b = 8 },
+ [ 91] = { .c = 0x00001ffb, .b = 13 },
+ [ 92] = { .c = 0x0007fff0, .b = 19 },
+ [ 93] = { .c = 0x00001ffc, .b = 13 },
+ [ 94] = { .c = 0x00003ffc, .b = 14 },
+ [ 95] = { .c = 0x00000022, .b = 6 },
+ [ 96] = { .c = 0x00007ffd, .b = 15 },
+ [ 97] = { .c = 0x00000003, .b = 5 },
+ [ 98] = { .c = 0x00000023, .b = 6 },
+ [ 99] = { .c = 0x00000004, .b = 5 },
+ [100] = { .c = 0x00000024, .b = 6 },
+ [101] = { .c = 0x00000005, .b = 5 },
+ [102] = { .c = 0x00000025, .b = 6 },
+ [103] = { .c = 0x00000026, .b = 6 },
+ [104] = { .c = 0x00000027, .b = 6 },
+ [105] = { .c = 0x00000006, .b = 5 },
+ [106] = { .c = 0x00000074, .b = 7 },
+ [107] = { .c = 0x00000075, .b = 7 },
+ [108] = { .c = 0x00000028, .b = 6 },
+ [109] = { .c = 0x00000029, .b = 6 },
+ [110] = { .c = 0x0000002a, .b = 6 },
+ [111] = { .c = 0x00000007, .b = 5 },
+ [112] = { .c = 0x0000002b, .b = 6 },
+ [113] = { .c = 0x00000076, .b = 7 },
+ [114] = { .c = 0x0000002c, .b = 6 },
+ [115] = { .c = 0x00000008, .b = 5 },
+ [116] = { .c = 0x00000009, .b = 5 },
+ [117] = { .c = 0x0000002d, .b = 6 },
+ [118] = { .c = 0x00000077, .b = 7 },
+ [119] = { .c = 0x00000078, .b = 7 },
+ [120] = { .c = 0x00000079, .b = 7 },
+ [121] = { .c = 0x0000007a, .b = 7 },
+ [122] = { .c = 0x0000007b, .b = 7 },
+ [123] = { .c = 0x00007ffe, .b = 15 },
+ [124] = { .c = 0x000007fc, .b = 11 },
+ [125] = { .c = 0x00003ffd, .b = 14 },
+ [126] = { .c = 0x00001ffd, .b = 13 },
+ [127] = { .c = 0x0ffffffc, .b = 28 },
+ [128] = { .c = 0x000fffe6, .b = 20 },
+ [129] = { .c = 0x003fffd2, .b = 22 },
+ [130] = { .c = 0x000fffe7, .b = 20 },
+ [131] = { .c = 0x000fffe8, .b = 20 },
+ [132] = { .c = 0x003fffd3, .b = 22 },
+ [133] = { .c = 0x003fffd4, .b = 22 },
+ [134] = { .c = 0x003fffd5, .b = 22 },
+ [135] = { .c = 0x007fffd9, .b = 23 },
+ [136] = { .c = 0x003fffd6, .b = 22 },
+ [137] = { .c = 0x007fffda, .b = 23 },
+ [138] = { .c = 0x007fffdb, .b = 23 },
+ [139] = { .c = 0x007fffdc, .b = 23 },
+ [140] = { .c = 0x007fffdd, .b = 23 },
+ [141] = { .c = 0x007fffde, .b = 23 },
+ [142] = { .c = 0x00ffffeb, .b = 24 },
+ [143] = { .c = 0x007fffdf, .b = 23 },
+ [144] = { .c = 0x00ffffec, .b = 24 },
+ [145] = { .c = 0x00ffffed, .b = 24 },
+ [146] = { .c = 0x003fffd7, .b = 22 },
+ [147] = { .c = 0x007fffe0, .b = 23 },
+ [148] = { .c = 0x00ffffee, .b = 24 },
+ [149] = { .c = 0x007fffe1, .b = 23 },
+ [150] = { .c = 0x007fffe2, .b = 23 },
+ [151] = { .c = 0x007fffe3, .b = 23 },
+ [152] = { .c = 0x007fffe4, .b = 23 },
+ [153] = { .c = 0x001fffdc, .b = 21 },
+ [154] = { .c = 0x003fffd8, .b = 22 },
+ [155] = { .c = 0x007fffe5, .b = 23 },
+ [156] = { .c = 0x003fffd9, .b = 22 },
+ [157] = { .c = 0x007fffe6, .b = 23 },
+ [158] = { .c = 0x007fffe7, .b = 23 },
+ [159] = { .c = 0x00ffffef, .b = 24 },
+ [160] = { .c = 0x003fffda, .b = 22 },
+ [161] = { .c = 0x001fffdd, .b = 21 },
+ [162] = { .c = 0x000fffe9, .b = 20 },
+ [163] = { .c = 0x003fffdb, .b = 22 },
+ [164] = { .c = 0x003fffdc, .b = 22 },
+ [165] = { .c = 0x007fffe8, .b = 23 },
+ [166] = { .c = 0x007fffe9, .b = 23 },
+ [167] = { .c = 0x001fffde, .b = 21 },
+ [168] = { .c = 0x007fffea, .b = 23 },
+ [169] = { .c = 0x003fffdd, .b = 22 },
+ [170] = { .c = 0x003fffde, .b = 22 },
+ [171] = { .c = 0x00fffff0, .b = 24 },
+ [172] = { .c = 0x001fffdf, .b = 21 },
+ [173] = { .c = 0x003fffdf, .b = 22 },
+ [174] = { .c = 0x007fffeb, .b = 23 },
+ [175] = { .c = 0x007fffec, .b = 23 },
+ [176] = { .c = 0x001fffe0, .b = 21 },
+ [177] = { .c = 0x001fffe1, .b = 21 },
+ [178] = { .c = 0x003fffe0, .b = 22 },
+ [179] = { .c = 0x001fffe2, .b = 21 },
+ [180] = { .c = 0x007fffed, .b = 23 },
+ [181] = { .c = 0x003fffe1, .b = 22 },
+ [182] = { .c = 0x007fffee, .b = 23 },
+ [183] = { .c = 0x007fffef, .b = 23 },
+ [184] = { .c = 0x000fffea, .b = 20 },
+ [185] = { .c = 0x003fffe2, .b = 22 },
+ [186] = { .c = 0x003fffe3, .b = 22 },
+ [187] = { .c = 0x003fffe4, .b = 22 },
+ [188] = { .c = 0x007ffff0, .b = 23 },
+ [189] = { .c = 0x003fffe5, .b = 22 },
+ [190] = { .c = 0x003fffe6, .b = 22 },
+ [191] = { .c = 0x007ffff1, .b = 23 },
+ [192] = { .c = 0x03ffffe0, .b = 26 },
+ [193] = { .c = 0x03ffffe1, .b = 26 },
+ [194] = { .c = 0x000fffeb, .b = 20 },
+ [195] = { .c = 0x0007fff1, .b = 19 },
+ [196] = { .c = 0x003fffe7, .b = 22 },
+ [197] = { .c = 0x007ffff2, .b = 23 },
+ [198] = { .c = 0x003fffe8, .b = 22 },
+ [199] = { .c = 0x01ffffec, .b = 25 },
+ [200] = { .c = 0x03ffffe2, .b = 26 },
+ [201] = { .c = 0x03ffffe3, .b = 26 },
+ [202] = { .c = 0x03ffffe4, .b = 26 },
+ [203] = { .c = 0x07ffffde, .b = 27 },
+ [204] = { .c = 0x07ffffdf, .b = 27 },
+ [205] = { .c = 0x03ffffe5, .b = 26 },
+ [206] = { .c = 0x00fffff1, .b = 24 },
+ [207] = { .c = 0x01ffffed, .b = 25 },
+ [208] = { .c = 0x0007fff2, .b = 19 },
+ [209] = { .c = 0x001fffe3, .b = 21 },
+ [210] = { .c = 0x03ffffe6, .b = 26 },
+ [211] = { .c = 0x07ffffe0, .b = 27 },
+ [212] = { .c = 0x07ffffe1, .b = 27 },
+ [213] = { .c = 0x03ffffe7, .b = 26 },
+ [214] = { .c = 0x07ffffe2, .b = 27 },
+ [215] = { .c = 0x00fffff2, .b = 24 },
+ [216] = { .c = 0x001fffe4, .b = 21 },
+ [217] = { .c = 0x001fffe5, .b = 21 },
+ [218] = { .c = 0x03ffffe8, .b = 26 },
+ [219] = { .c = 0x03ffffe9, .b = 26 },
+ [220] = { .c = 0x0ffffffd, .b = 28 },
+ [221] = { .c = 0x07ffffe3, .b = 27 },
+ [222] = { .c = 0x07ffffe4, .b = 27 },
+ [223] = { .c = 0x07ffffe5, .b = 27 },
+ [224] = { .c = 0x000fffec, .b = 20 },
+ [225] = { .c = 0x00fffff3, .b = 24 },
+ [226] = { .c = 0x000fffed, .b = 20 },
+ [227] = { .c = 0x001fffe6, .b = 21 },
+ [228] = { .c = 0x003fffe9, .b = 22 },
+ [229] = { .c = 0x001fffe7, .b = 21 },
+ [230] = { .c = 0x001fffe8, .b = 21 },
+ [231] = { .c = 0x007ffff3, .b = 23 },
+ [232] = { .c = 0x003fffea, .b = 22 },
+ [233] = { .c = 0x003fffeb, .b = 22 },
+ [234] = { .c = 0x01ffffee, .b = 25 },
+ [235] = { .c = 0x01ffffef, .b = 25 },
+ [236] = { .c = 0x00fffff4, .b = 24 },
+ [237] = { .c = 0x00fffff5, .b = 24 },
+ [238] = { .c = 0x03ffffea, .b = 26 },
+ [239] = { .c = 0x007ffff4, .b = 23 },
+ [240] = { .c = 0x03ffffeb, .b = 26 },
+ [241] = { .c = 0x07ffffe6, .b = 27 },
+ [242] = { .c = 0x03ffffec, .b = 26 },
+ [243] = { .c = 0x03ffffed, .b = 26 },
+ [244] = { .c = 0x07ffffe7, .b = 27 },
+ [245] = { .c = 0x07ffffe8, .b = 27 },
+ [246] = { .c = 0x07ffffe9, .b = 27 },
+ [247] = { .c = 0x07ffffea, .b = 27 },
+ [248] = { .c = 0x07ffffeb, .b = 27 },
+ [249] = { .c = 0x0ffffffe, .b = 28 },
+ [250] = { .c = 0x07ffffec, .b = 27 },
+ [251] = { .c = 0x07ffffed, .b = 27 },
+ [252] = { .c = 0x07ffffee, .b = 27 },
+ [253] = { .c = 0x07ffffef, .b = 27 },
+ [254] = { .c = 0x07fffff0, .b = 27 },
+ [255] = { .c = 0x03ffffee, .b = 26 },
+ [256] = { .c = 0x3fffffff, .b = 30 }, /* EOS */
+};
+
+
+/* Reversed huffman codes, generated by dev/hpack/gen-rht.c from the table
+ * above.
+ *
+ * The codes are aligned on the MSB since that's how they appear in the stream.
+ *
+ * Quick summary below of the way the tables work. They're based on how the
+ * prefixes are organized, starting from the MSB.
+ *
+ * These codes fit in a single octet (5 to 8 bits) :
+ * 00/5 08/5 10/5 18/5 20/5 28/5 30/5 38/5
+ * 40/5 48/5
+ *
+ * 50/6 54/6 58/6 5c/6 60/6 64/6 68/6 6c/6
+ * 70/6 74/6 78/6 7c/6 80/6 84/6 88/6 8c/6
+ * 90/6 94/6 98/6 9c/6 a0/6 a4/6 a8/6 ac/6
+ * b0/6 b4/6
+ *
+ * b8/7 ba/7 bc/7 be/7 c0/7 c2/7 c4/7 c6/7
+ * c8/7 ca/7 cc/7 ce/7 d0/7 d2/7 d4/7 d6/7
+ * d8/7 da/7 dc/7 de/7 e0/7 e2/7 e4/7 e6/7
+ * e8/7 ea/7 ec/7 ee/7 f0/7 f2/7 f4/7 f6/7
+ *
+ * f8/8 f9/8 fa/8 fb/8 fc/8 fd/8
+ *
+ * ==> a single 256-symbol table based on the full byte provides a direct
+ * access and the bit count
+ *
+ * These codes fit in two octets (10 to 15 bits, neither 9 nor 16 bits code) :
+ *
+ * fe + 2 bits:
+ * 00/2 40/2 80/2 c0/2
+ *
+ * ff + 2..7 bits :
+ * 00/2
+ * 40/3 60/3 80/3
+ * a0/4 b0/4
+ * c0/5 c8/5 d0/5 d8/5 e0/5 e8/5
+ * f0/6 f4/6
+ * f8/7 fa/7 fc/7
+ *
+ * ==> a single 256-symbol table made of b0.0 and b1.7-1 provides a direct
+ * access and the bit count after a miss on the first one above.
+ *
+ * These ones fit in three octets :
+ * ff fe + 3..5 bits :
+ * 00/3 20/3 40/3 60/4 70/4 80/4 90/4 a0/4
+ * b0/4 c0/4 d0/4
+ * e0/5 e8/5 f0/5 f8/5
+ *
+ * ff ff + 5..8 bits :
+ * 00/5 08/5 10/5 18/5 20/5 28/5 30/5 38/5
+ * 40/5
+ * 48/6 4c/6 50/6 54/6 58/6 5c/6 60/6 64/6
+ * 68/6 6c/6 70/6 74/6 78/6 7c/6 80/6 84/6
+ * 88/6 8c/6 90/6 94/6 98/6 9c/6 a0/6 a4/6
+ * a8/6 ac/6
+ * b0/7 b2/7 b4/7 b6/7 b8/7 ba/7 bc/7 be/7
+ * c0/7 c2/7 c4/7 c6/7 c8/7 ca/7 cc/7 ce/7
+ * d0/7 d2/7 d4/7 d6/7 d8/7 da/7 dc/7 de/7
+ * e0/7 e2/7 e4/7 e6/7 e8/7
+ * ea/8 eb/8 ec/8 ed/8 ee/8 ef/8 f0/8 f1/8
+ * f2/8 f3/8 f4/8 f5/8
+ *
+ * ==> a 32-symbol table has to be applied to 0xfffe
+ * ==> a 256-symbol table has to be applied to 0xffff
+ *
+ * The other ones fit in four octets with 1 to 6 bits in the last one :
+ * ff ff f6 : 00/1 80/1
+ * ff ff f7 : 00/1 80/1
+ * ff ff f8 : 00/2 40/2 80/2 c0/2
+ * ff ff f9 : 00/2 40/2 80/2 c0/2
+ * ff ff fa : 00/2 40/2 80/2 c0/2
+ * ff ff fb : 00/2 40/2 80/2
+ * ff ff fb : c0/3 e0/3
+ * ff ff fc : 00/3 20/3 40/3 60/3 80/3 a0/3 c0/3 e0/3
+ * ff ff fd : 00/3 20/3 40/3 60/3 80/3 a0/3 c0/3 e0/3
+ * ff ff fe : 00/3
+ * ff ff fe : 20/4 30/4 40/4 50/4 60/4 70/4 80/4 90/4 a0/4 b0/4 c0/4 d0/4 e0/4 f0/4
+ * ff ff ff : 00/4 10/4 20/4 30/4 40/4 50/4 60/4 70/4 80/4 90/4 a0/4 b0/4 c0/4 d0/4 e0/4
+ * ff ff ff : f0/6 f4/6 f8/6 fc/6
+ *
+ * ==> a 256-symbol table with b2.0-3,b3.7-4 gives all of them except the
+ * distinction between ffffff{f0,f4,f8,fc} which is rare enough
+ * and can be done by hand when bit count == 30.
+ *
+ *
+ * Code lengths :
+ * 5..8 : 0x00..0xfe
+ * 10..15 : 0xfe
+ * 0xff 0x00..0xfe
+ * 19..20 : 0xff 0xfe 0x00..0xdf
+ * 21 : 0xff 0xfe 0xe0..0xff
+ * 21 : 0xff 0xff 0x00..0x40
+ * 22..24 : 0xff 0xff 0x00..0xf5
+ * 24..28 : 0xff 0xff 0xf5..0xff
+ * 30 : 0xff 0xff 0xff 0xf0..0xff
+ *
+ *
+ * if b0 < 0xfe ==> 5..8 bits (74 codes)
+ * if b0 == 0xfe or 0xff : 10..15
+ * => if b0 == 0xfe || b1 < 0xfe : lookup (b0:0|b1:7..1) (21 codes)
+ *
+ * -- b0 = 0xff --
+ * if b1 == 0xfe : 19..21 bits
+ * => lookup b2:7..3 (15 codes)
+ *
+ * -- b0 = 0xff, b1 = 0xff : 147 codes --
+ * if b2 < 0xf6 : 21..24 bits (76 codes)
+ * if b2 >= 0xf6 : 25..30 bits (71 codes)
+ *
+ * Algorithm:
+ * - if > 24 and < 32, read missing bits.
+ * - if less than 24 bits, read 1 byte. If past end, insert 0xff instead.
+ * - if b0 < 0xfe lookup b0 in table0[0..255]
+ * - else if b0 == 0xfe, manual lookup
+ * - else if b0 == 0xff, lookup b1 in table1[0..255]
+ * ...
+ */
+
+struct rht rht_bit31_24[256] = {
+ [0x00] = { .c = 0x30, .l = 5 },
+ [0x01] = { .c = 0x30, .l = 5 },
+ [0x02] = { .c = 0x30, .l = 5 },
+ [0x03] = { .c = 0x30, .l = 5 },
+ [0x04] = { .c = 0x30, .l = 5 },
+ [0x05] = { .c = 0x30, .l = 5 },
+ [0x06] = { .c = 0x30, .l = 5 },
+ [0x07] = { .c = 0x30, .l = 5 },
+ [0x08] = { .c = 0x31, .l = 5 },
+ [0x09] = { .c = 0x31, .l = 5 },
+ [0x0a] = { .c = 0x31, .l = 5 },
+ [0x0b] = { .c = 0x31, .l = 5 },
+ [0x0c] = { .c = 0x31, .l = 5 },
+ [0x0d] = { .c = 0x31, .l = 5 },
+ [0x0e] = { .c = 0x31, .l = 5 },
+ [0x0f] = { .c = 0x31, .l = 5 },
+ [0x10] = { .c = 0x32, .l = 5 },
+ [0x11] = { .c = 0x32, .l = 5 },
+ [0x12] = { .c = 0x32, .l = 5 },
+ [0x13] = { .c = 0x32, .l = 5 },
+ [0x14] = { .c = 0x32, .l = 5 },
+ [0x15] = { .c = 0x32, .l = 5 },
+ [0x16] = { .c = 0x32, .l = 5 },
+ [0x17] = { .c = 0x32, .l = 5 },
+ [0x18] = { .c = 0x61, .l = 5 },
+ [0x19] = { .c = 0x61, .l = 5 },
+ [0x1a] = { .c = 0x61, .l = 5 },
+ [0x1b] = { .c = 0x61, .l = 5 },
+ [0x1c] = { .c = 0x61, .l = 5 },
+ [0x1d] = { .c = 0x61, .l = 5 },
+ [0x1e] = { .c = 0x61, .l = 5 },
+ [0x1f] = { .c = 0x61, .l = 5 },
+ [0x20] = { .c = 0x63, .l = 5 },
+ [0x21] = { .c = 0x63, .l = 5 },
+ [0x22] = { .c = 0x63, .l = 5 },
+ [0x23] = { .c = 0x63, .l = 5 },
+ [0x24] = { .c = 0x63, .l = 5 },
+ [0x25] = { .c = 0x63, .l = 5 },
+ [0x26] = { .c = 0x63, .l = 5 },
+ [0x27] = { .c = 0x63, .l = 5 },
+ [0x28] = { .c = 0x65, .l = 5 },
+ [0x29] = { .c = 0x65, .l = 5 },
+ [0x2a] = { .c = 0x65, .l = 5 },
+ [0x2b] = { .c = 0x65, .l = 5 },
+ [0x2c] = { .c = 0x65, .l = 5 },
+ [0x2d] = { .c = 0x65, .l = 5 },
+ [0x2e] = { .c = 0x65, .l = 5 },
+ [0x2f] = { .c = 0x65, .l = 5 },
+ [0x30] = { .c = 0x69, .l = 5 },
+ [0x31] = { .c = 0x69, .l = 5 },
+ [0x32] = { .c = 0x69, .l = 5 },
+ [0x33] = { .c = 0x69, .l = 5 },
+ [0x34] = { .c = 0x69, .l = 5 },
+ [0x35] = { .c = 0x69, .l = 5 },
+ [0x36] = { .c = 0x69, .l = 5 },
+ [0x37] = { .c = 0x69, .l = 5 },
+ [0x38] = { .c = 0x6f, .l = 5 },
+ [0x39] = { .c = 0x6f, .l = 5 },
+ [0x3a] = { .c = 0x6f, .l = 5 },
+ [0x3b] = { .c = 0x6f, .l = 5 },
+ [0x3c] = { .c = 0x6f, .l = 5 },
+ [0x3d] = { .c = 0x6f, .l = 5 },
+ [0x3e] = { .c = 0x6f, .l = 5 },
+ [0x3f] = { .c = 0x6f, .l = 5 },
+ [0x40] = { .c = 0x73, .l = 5 },
+ [0x41] = { .c = 0x73, .l = 5 },
+ [0x42] = { .c = 0x73, .l = 5 },
+ [0x43] = { .c = 0x73, .l = 5 },
+ [0x44] = { .c = 0x73, .l = 5 },
+ [0x45] = { .c = 0x73, .l = 5 },
+ [0x46] = { .c = 0x73, .l = 5 },
+ [0x47] = { .c = 0x73, .l = 5 },
+ [0x48] = { .c = 0x74, .l = 5 },
+ [0x49] = { .c = 0x74, .l = 5 },
+ [0x4a] = { .c = 0x74, .l = 5 },
+ [0x4b] = { .c = 0x74, .l = 5 },
+ [0x4c] = { .c = 0x74, .l = 5 },
+ [0x4d] = { .c = 0x74, .l = 5 },
+ [0x4e] = { .c = 0x74, .l = 5 },
+ [0x4f] = { .c = 0x74, .l = 5 },
+ [0x50] = { .c = 0x20, .l = 6 },
+ [0x51] = { .c = 0x20, .l = 6 },
+ [0x52] = { .c = 0x20, .l = 6 },
+ [0x53] = { .c = 0x20, .l = 6 },
+ [0x54] = { .c = 0x25, .l = 6 },
+ [0x55] = { .c = 0x25, .l = 6 },
+ [0x56] = { .c = 0x25, .l = 6 },
+ [0x57] = { .c = 0x25, .l = 6 },
+ [0x58] = { .c = 0x2d, .l = 6 },
+ [0x59] = { .c = 0x2d, .l = 6 },
+ [0x5a] = { .c = 0x2d, .l = 6 },
+ [0x5b] = { .c = 0x2d, .l = 6 },
+ [0x5c] = { .c = 0x2e, .l = 6 },
+ [0x5d] = { .c = 0x2e, .l = 6 },
+ [0x5e] = { .c = 0x2e, .l = 6 },
+ [0x5f] = { .c = 0x2e, .l = 6 },
+ [0x60] = { .c = 0x2f, .l = 6 },
+ [0x61] = { .c = 0x2f, .l = 6 },
+ [0x62] = { .c = 0x2f, .l = 6 },
+ [0x63] = { .c = 0x2f, .l = 6 },
+ [0x64] = { .c = 0x33, .l = 6 },
+ [0x65] = { .c = 0x33, .l = 6 },
+ [0x66] = { .c = 0x33, .l = 6 },
+ [0x67] = { .c = 0x33, .l = 6 },
+ [0x68] = { .c = 0x34, .l = 6 },
+ [0x69] = { .c = 0x34, .l = 6 },
+ [0x6a] = { .c = 0x34, .l = 6 },
+ [0x6b] = { .c = 0x34, .l = 6 },
+ [0x6c] = { .c = 0x35, .l = 6 },
+ [0x6d] = { .c = 0x35, .l = 6 },
+ [0x6e] = { .c = 0x35, .l = 6 },
+ [0x6f] = { .c = 0x35, .l = 6 },
+ [0x70] = { .c = 0x36, .l = 6 },
+ [0x71] = { .c = 0x36, .l = 6 },
+ [0x72] = { .c = 0x36, .l = 6 },
+ [0x73] = { .c = 0x36, .l = 6 },
+ [0x74] = { .c = 0x37, .l = 6 },
+ [0x75] = { .c = 0x37, .l = 6 },
+ [0x76] = { .c = 0x37, .l = 6 },
+ [0x77] = { .c = 0x37, .l = 6 },
+ [0x78] = { .c = 0x38, .l = 6 },
+ [0x79] = { .c = 0x38, .l = 6 },
+ [0x7a] = { .c = 0x38, .l = 6 },
+ [0x7b] = { .c = 0x38, .l = 6 },
+ [0x7c] = { .c = 0x39, .l = 6 },
+ [0x7d] = { .c = 0x39, .l = 6 },
+ [0x7e] = { .c = 0x39, .l = 6 },
+ [0x7f] = { .c = 0x39, .l = 6 },
+ [0x80] = { .c = 0x3d, .l = 6 },
+ [0x81] = { .c = 0x3d, .l = 6 },
+ [0x82] = { .c = 0x3d, .l = 6 },
+ [0x83] = { .c = 0x3d, .l = 6 },
+ [0x84] = { .c = 0x41, .l = 6 },
+ [0x85] = { .c = 0x41, .l = 6 },
+ [0x86] = { .c = 0x41, .l = 6 },
+ [0x87] = { .c = 0x41, .l = 6 },
+ [0x88] = { .c = 0x5f, .l = 6 },
+ [0x89] = { .c = 0x5f, .l = 6 },
+ [0x8a] = { .c = 0x5f, .l = 6 },
+ [0x8b] = { .c = 0x5f, .l = 6 },
+ [0x8c] = { .c = 0x62, .l = 6 },
+ [0x8d] = { .c = 0x62, .l = 6 },
+ [0x8e] = { .c = 0x62, .l = 6 },
+ [0x8f] = { .c = 0x62, .l = 6 },
+ [0x90] = { .c = 0x64, .l = 6 },
+ [0x91] = { .c = 0x64, .l = 6 },
+ [0x92] = { .c = 0x64, .l = 6 },
+ [0x93] = { .c = 0x64, .l = 6 },
+ [0x94] = { .c = 0x66, .l = 6 },
+ [0x95] = { .c = 0x66, .l = 6 },
+ [0x96] = { .c = 0x66, .l = 6 },
+ [0x97] = { .c = 0x66, .l = 6 },
+ [0x98] = { .c = 0x67, .l = 6 },
+ [0x99] = { .c = 0x67, .l = 6 },
+ [0x9a] = { .c = 0x67, .l = 6 },
+ [0x9b] = { .c = 0x67, .l = 6 },
+ [0x9c] = { .c = 0x68, .l = 6 },
+ [0x9d] = { .c = 0x68, .l = 6 },
+ [0x9e] = { .c = 0x68, .l = 6 },
+ [0x9f] = { .c = 0x68, .l = 6 },
+ [0xa0] = { .c = 0x6c, .l = 6 },
+ [0xa1] = { .c = 0x6c, .l = 6 },
+ [0xa2] = { .c = 0x6c, .l = 6 },
+ [0xa3] = { .c = 0x6c, .l = 6 },
+ [0xa4] = { .c = 0x6d, .l = 6 },
+ [0xa5] = { .c = 0x6d, .l = 6 },
+ [0xa6] = { .c = 0x6d, .l = 6 },
+ [0xa7] = { .c = 0x6d, .l = 6 },
+ [0xa8] = { .c = 0x6e, .l = 6 },
+ [0xa9] = { .c = 0x6e, .l = 6 },
+ [0xaa] = { .c = 0x6e, .l = 6 },
+ [0xab] = { .c = 0x6e, .l = 6 },
+ [0xac] = { .c = 0x70, .l = 6 },
+ [0xad] = { .c = 0x70, .l = 6 },
+ [0xae] = { .c = 0x70, .l = 6 },
+ [0xaf] = { .c = 0x70, .l = 6 },
+ [0xb0] = { .c = 0x72, .l = 6 },
+ [0xb1] = { .c = 0x72, .l = 6 },
+ [0xb2] = { .c = 0x72, .l = 6 },
+ [0xb3] = { .c = 0x72, .l = 6 },
+ [0xb4] = { .c = 0x75, .l = 6 },
+ [0xb5] = { .c = 0x75, .l = 6 },
+ [0xb6] = { .c = 0x75, .l = 6 },
+ [0xb7] = { .c = 0x75, .l = 6 },
+ [0xb8] = { .c = 0x3a, .l = 7 },
+ [0xb9] = { .c = 0x3a, .l = 7 },
+ [0xba] = { .c = 0x42, .l = 7 },
+ [0xbb] = { .c = 0x42, .l = 7 },
+ [0xbc] = { .c = 0x43, .l = 7 },
+ [0xbd] = { .c = 0x43, .l = 7 },
+ [0xbe] = { .c = 0x44, .l = 7 },
+ [0xbf] = { .c = 0x44, .l = 7 },
+ [0xc0] = { .c = 0x45, .l = 7 },
+ [0xc1] = { .c = 0x45, .l = 7 },
+ [0xc2] = { .c = 0x46, .l = 7 },
+ [0xc3] = { .c = 0x46, .l = 7 },
+ [0xc4] = { .c = 0x47, .l = 7 },
+ [0xc5] = { .c = 0x47, .l = 7 },
+ [0xc6] = { .c = 0x48, .l = 7 },
+ [0xc7] = { .c = 0x48, .l = 7 },
+ [0xc8] = { .c = 0x49, .l = 7 },
+ [0xc9] = { .c = 0x49, .l = 7 },
+ [0xca] = { .c = 0x4a, .l = 7 },
+ [0xcb] = { .c = 0x4a, .l = 7 },
+ [0xcc] = { .c = 0x4b, .l = 7 },
+ [0xcd] = { .c = 0x4b, .l = 7 },
+ [0xce] = { .c = 0x4c, .l = 7 },
+ [0xcf] = { .c = 0x4c, .l = 7 },
+ [0xd0] = { .c = 0x4d, .l = 7 },
+ [0xd1] = { .c = 0x4d, .l = 7 },
+ [0xd2] = { .c = 0x4e, .l = 7 },
+ [0xd3] = { .c = 0x4e, .l = 7 },
+ [0xd4] = { .c = 0x4f, .l = 7 },
+ [0xd5] = { .c = 0x4f, .l = 7 },
+ [0xd6] = { .c = 0x50, .l = 7 },
+ [0xd7] = { .c = 0x50, .l = 7 },
+ [0xd8] = { .c = 0x51, .l = 7 },
+ [0xd9] = { .c = 0x51, .l = 7 },
+ [0xda] = { .c = 0x52, .l = 7 },
+ [0xdb] = { .c = 0x52, .l = 7 },
+ [0xdc] = { .c = 0x53, .l = 7 },
+ [0xdd] = { .c = 0x53, .l = 7 },
+ [0xde] = { .c = 0x54, .l = 7 },
+ [0xdf] = { .c = 0x54, .l = 7 },
+ [0xe0] = { .c = 0x55, .l = 7 },
+ [0xe1] = { .c = 0x55, .l = 7 },
+ [0xe2] = { .c = 0x56, .l = 7 },
+ [0xe3] = { .c = 0x56, .l = 7 },
+ [0xe4] = { .c = 0x57, .l = 7 },
+ [0xe5] = { .c = 0x57, .l = 7 },
+ [0xe6] = { .c = 0x59, .l = 7 },
+ [0xe7] = { .c = 0x59, .l = 7 },
+ [0xe8] = { .c = 0x6a, .l = 7 },
+ [0xe9] = { .c = 0x6a, .l = 7 },
+ [0xea] = { .c = 0x6b, .l = 7 },
+ [0xeb] = { .c = 0x6b, .l = 7 },
+ [0xec] = { .c = 0x71, .l = 7 },
+ [0xed] = { .c = 0x71, .l = 7 },
+ [0xee] = { .c = 0x76, .l = 7 },
+ [0xef] = { .c = 0x76, .l = 7 },
+ [0xf0] = { .c = 0x77, .l = 7 },
+ [0xf1] = { .c = 0x77, .l = 7 },
+ [0xf2] = { .c = 0x78, .l = 7 },
+ [0xf3] = { .c = 0x78, .l = 7 },
+ [0xf4] = { .c = 0x79, .l = 7 },
+ [0xf5] = { .c = 0x79, .l = 7 },
+ [0xf6] = { .c = 0x7a, .l = 7 },
+ [0xf7] = { .c = 0x7a, .l = 7 },
+ [0xf8] = { .c = 0x26, .l = 8 },
+ [0xf9] = { .c = 0x2a, .l = 8 },
+ [0xfa] = { .c = 0x2c, .l = 8 },
+ [0xfb] = { .c = 0x3b, .l = 8 },
+ [0xfc] = { .c = 0x58, .l = 8 },
+ [0xfd] = { .c = 0x5a, .l = 8 },
+};
+
+struct rht rht_bit24_17[256] = {
+ [0x00] = { .c = 0x21, .l = 10 },
+ [0x01] = { .c = 0x21, .l = 10 },
+ [0x02] = { .c = 0x21, .l = 10 },
+ [0x03] = { .c = 0x21, .l = 10 },
+ [0x04] = { .c = 0x21, .l = 10 },
+ [0x05] = { .c = 0x21, .l = 10 },
+ [0x06] = { .c = 0x21, .l = 10 },
+ [0x07] = { .c = 0x21, .l = 10 },
+ [0x08] = { .c = 0x21, .l = 10 },
+ [0x09] = { .c = 0x21, .l = 10 },
+ [0x0a] = { .c = 0x21, .l = 10 },
+ [0x0b] = { .c = 0x21, .l = 10 },
+ [0x0c] = { .c = 0x21, .l = 10 },
+ [0x0d] = { .c = 0x21, .l = 10 },
+ [0x0e] = { .c = 0x21, .l = 10 },
+ [0x0f] = { .c = 0x21, .l = 10 },
+ [0x10] = { .c = 0x21, .l = 10 },
+ [0x11] = { .c = 0x21, .l = 10 },
+ [0x12] = { .c = 0x21, .l = 10 },
+ [0x13] = { .c = 0x21, .l = 10 },
+ [0x14] = { .c = 0x21, .l = 10 },
+ [0x15] = { .c = 0x21, .l = 10 },
+ [0x16] = { .c = 0x21, .l = 10 },
+ [0x17] = { .c = 0x21, .l = 10 },
+ [0x18] = { .c = 0x21, .l = 10 },
+ [0x19] = { .c = 0x21, .l = 10 },
+ [0x1a] = { .c = 0x21, .l = 10 },
+ [0x1b] = { .c = 0x21, .l = 10 },
+ [0x1c] = { .c = 0x21, .l = 10 },
+ [0x1d] = { .c = 0x21, .l = 10 },
+ [0x1e] = { .c = 0x21, .l = 10 },
+ [0x1f] = { .c = 0x21, .l = 10 },
+ [0x20] = { .c = 0x22, .l = 10 },
+ [0x21] = { .c = 0x22, .l = 10 },
+ [0x22] = { .c = 0x22, .l = 10 },
+ [0x23] = { .c = 0x22, .l = 10 },
+ [0x24] = { .c = 0x22, .l = 10 },
+ [0x25] = { .c = 0x22, .l = 10 },
+ [0x26] = { .c = 0x22, .l = 10 },
+ [0x27] = { .c = 0x22, .l = 10 },
+ [0x28] = { .c = 0x22, .l = 10 },
+ [0x29] = { .c = 0x22, .l = 10 },
+ [0x2a] = { .c = 0x22, .l = 10 },
+ [0x2b] = { .c = 0x22, .l = 10 },
+ [0x2c] = { .c = 0x22, .l = 10 },
+ [0x2d] = { .c = 0x22, .l = 10 },
+ [0x2e] = { .c = 0x22, .l = 10 },
+ [0x2f] = { .c = 0x22, .l = 10 },
+ [0x30] = { .c = 0x22, .l = 10 },
+ [0x31] = { .c = 0x22, .l = 10 },
+ [0x32] = { .c = 0x22, .l = 10 },
+ [0x33] = { .c = 0x22, .l = 10 },
+ [0x34] = { .c = 0x22, .l = 10 },
+ [0x35] = { .c = 0x22, .l = 10 },
+ [0x36] = { .c = 0x22, .l = 10 },
+ [0x37] = { .c = 0x22, .l = 10 },
+ [0x38] = { .c = 0x22, .l = 10 },
+ [0x39] = { .c = 0x22, .l = 10 },
+ [0x3a] = { .c = 0x22, .l = 10 },
+ [0x3b] = { .c = 0x22, .l = 10 },
+ [0x3c] = { .c = 0x22, .l = 10 },
+ [0x3d] = { .c = 0x22, .l = 10 },
+ [0x3e] = { .c = 0x22, .l = 10 },
+ [0x3f] = { .c = 0x22, .l = 10 },
+ [0x40] = { .c = 0x28, .l = 10 },
+ [0x41] = { .c = 0x28, .l = 10 },
+ [0x42] = { .c = 0x28, .l = 10 },
+ [0x43] = { .c = 0x28, .l = 10 },
+ [0x44] = { .c = 0x28, .l = 10 },
+ [0x45] = { .c = 0x28, .l = 10 },
+ [0x46] = { .c = 0x28, .l = 10 },
+ [0x47] = { .c = 0x28, .l = 10 },
+ [0x48] = { .c = 0x28, .l = 10 },
+ [0x49] = { .c = 0x28, .l = 10 },
+ [0x4a] = { .c = 0x28, .l = 10 },
+ [0x4b] = { .c = 0x28, .l = 10 },
+ [0x4c] = { .c = 0x28, .l = 10 },
+ [0x4d] = { .c = 0x28, .l = 10 },
+ [0x4e] = { .c = 0x28, .l = 10 },
+ [0x4f] = { .c = 0x28, .l = 10 },
+ [0x50] = { .c = 0x28, .l = 10 },
+ [0x51] = { .c = 0x28, .l = 10 },
+ [0x52] = { .c = 0x28, .l = 10 },
+ [0x53] = { .c = 0x28, .l = 10 },
+ [0x54] = { .c = 0x28, .l = 10 },
+ [0x55] = { .c = 0x28, .l = 10 },
+ [0x56] = { .c = 0x28, .l = 10 },
+ [0x57] = { .c = 0x28, .l = 10 },
+ [0x58] = { .c = 0x28, .l = 10 },
+ [0x59] = { .c = 0x28, .l = 10 },
+ [0x5a] = { .c = 0x28, .l = 10 },
+ [0x5b] = { .c = 0x28, .l = 10 },
+ [0x5c] = { .c = 0x28, .l = 10 },
+ [0x5d] = { .c = 0x28, .l = 10 },
+ [0x5e] = { .c = 0x28, .l = 10 },
+ [0x5f] = { .c = 0x28, .l = 10 },
+ [0x60] = { .c = 0x29, .l = 10 },
+ [0x61] = { .c = 0x29, .l = 10 },
+ [0x62] = { .c = 0x29, .l = 10 },
+ [0x63] = { .c = 0x29, .l = 10 },
+ [0x64] = { .c = 0x29, .l = 10 },
+ [0x65] = { .c = 0x29, .l = 10 },
+ [0x66] = { .c = 0x29, .l = 10 },
+ [0x67] = { .c = 0x29, .l = 10 },
+ [0x68] = { .c = 0x29, .l = 10 },
+ [0x69] = { .c = 0x29, .l = 10 },
+ [0x6a] = { .c = 0x29, .l = 10 },
+ [0x6b] = { .c = 0x29, .l = 10 },
+ [0x6c] = { .c = 0x29, .l = 10 },
+ [0x6d] = { .c = 0x29, .l = 10 },
+ [0x6e] = { .c = 0x29, .l = 10 },
+ [0x6f] = { .c = 0x29, .l = 10 },
+ [0x70] = { .c = 0x29, .l = 10 },
+ [0x71] = { .c = 0x29, .l = 10 },
+ [0x72] = { .c = 0x29, .l = 10 },
+ [0x73] = { .c = 0x29, .l = 10 },
+ [0x74] = { .c = 0x29, .l = 10 },
+ [0x75] = { .c = 0x29, .l = 10 },
+ [0x76] = { .c = 0x29, .l = 10 },
+ [0x77] = { .c = 0x29, .l = 10 },
+ [0x78] = { .c = 0x29, .l = 10 },
+ [0x79] = { .c = 0x29, .l = 10 },
+ [0x7a] = { .c = 0x29, .l = 10 },
+ [0x7b] = { .c = 0x29, .l = 10 },
+ [0x7c] = { .c = 0x29, .l = 10 },
+ [0x7d] = { .c = 0x29, .l = 10 },
+ [0x7e] = { .c = 0x29, .l = 10 },
+ [0x7f] = { .c = 0x29, .l = 10 },
+ [0x80] = { .c = 0x3f, .l = 10 },
+ [0x81] = { .c = 0x3f, .l = 10 },
+ [0x82] = { .c = 0x3f, .l = 10 },
+ [0x83] = { .c = 0x3f, .l = 10 },
+ [0x84] = { .c = 0x3f, .l = 10 },
+ [0x85] = { .c = 0x3f, .l = 10 },
+ [0x86] = { .c = 0x3f, .l = 10 },
+ [0x87] = { .c = 0x3f, .l = 10 },
+ [0x88] = { .c = 0x3f, .l = 10 },
+ [0x89] = { .c = 0x3f, .l = 10 },
+ [0x8a] = { .c = 0x3f, .l = 10 },
+ [0x8b] = { .c = 0x3f, .l = 10 },
+ [0x8c] = { .c = 0x3f, .l = 10 },
+ [0x8d] = { .c = 0x3f, .l = 10 },
+ [0x8e] = { .c = 0x3f, .l = 10 },
+ [0x8f] = { .c = 0x3f, .l = 10 },
+ [0x90] = { .c = 0x3f, .l = 10 },
+ [0x91] = { .c = 0x3f, .l = 10 },
+ [0x92] = { .c = 0x3f, .l = 10 },
+ [0x93] = { .c = 0x3f, .l = 10 },
+ [0x94] = { .c = 0x3f, .l = 10 },
+ [0x95] = { .c = 0x3f, .l = 10 },
+ [0x96] = { .c = 0x3f, .l = 10 },
+ [0x97] = { .c = 0x3f, .l = 10 },
+ [0x98] = { .c = 0x3f, .l = 10 },
+ [0x99] = { .c = 0x3f, .l = 10 },
+ [0x9a] = { .c = 0x3f, .l = 10 },
+ [0x9b] = { .c = 0x3f, .l = 10 },
+ [0x9c] = { .c = 0x3f, .l = 10 },
+ [0x9d] = { .c = 0x3f, .l = 10 },
+ [0x9e] = { .c = 0x3f, .l = 10 },
+ [0x9f] = { .c = 0x3f, .l = 10 },
+ [0xa0] = { .c = 0x27, .l = 11 },
+ [0xa1] = { .c = 0x27, .l = 11 },
+ [0xa2] = { .c = 0x27, .l = 11 },
+ [0xa3] = { .c = 0x27, .l = 11 },
+ [0xa4] = { .c = 0x27, .l = 11 },
+ [0xa5] = { .c = 0x27, .l = 11 },
+ [0xa6] = { .c = 0x27, .l = 11 },
+ [0xa7] = { .c = 0x27, .l = 11 },
+ [0xa8] = { .c = 0x27, .l = 11 },
+ [0xa9] = { .c = 0x27, .l = 11 },
+ [0xaa] = { .c = 0x27, .l = 11 },
+ [0xab] = { .c = 0x27, .l = 11 },
+ [0xac] = { .c = 0x27, .l = 11 },
+ [0xad] = { .c = 0x27, .l = 11 },
+ [0xae] = { .c = 0x27, .l = 11 },
+ [0xaf] = { .c = 0x27, .l = 11 },
+ [0xb0] = { .c = 0x2b, .l = 11 },
+ [0xb1] = { .c = 0x2b, .l = 11 },
+ [0xb2] = { .c = 0x2b, .l = 11 },
+ [0xb3] = { .c = 0x2b, .l = 11 },
+ [0xb4] = { .c = 0x2b, .l = 11 },
+ [0xb5] = { .c = 0x2b, .l = 11 },
+ [0xb6] = { .c = 0x2b, .l = 11 },
+ [0xb7] = { .c = 0x2b, .l = 11 },
+ [0xb8] = { .c = 0x2b, .l = 11 },
+ [0xb9] = { .c = 0x2b, .l = 11 },
+ [0xba] = { .c = 0x2b, .l = 11 },
+ [0xbb] = { .c = 0x2b, .l = 11 },
+ [0xbc] = { .c = 0x2b, .l = 11 },
+ [0xbd] = { .c = 0x2b, .l = 11 },
+ [0xbe] = { .c = 0x2b, .l = 11 },
+ [0xbf] = { .c = 0x2b, .l = 11 },
+ [0xc0] = { .c = 0x7c, .l = 11 },
+ [0xc1] = { .c = 0x7c, .l = 11 },
+ [0xc2] = { .c = 0x7c, .l = 11 },
+ [0xc3] = { .c = 0x7c, .l = 11 },
+ [0xc4] = { .c = 0x7c, .l = 11 },
+ [0xc5] = { .c = 0x7c, .l = 11 },
+ [0xc6] = { .c = 0x7c, .l = 11 },
+ [0xc7] = { .c = 0x7c, .l = 11 },
+ [0xc8] = { .c = 0x7c, .l = 11 },
+ [0xc9] = { .c = 0x7c, .l = 11 },
+ [0xca] = { .c = 0x7c, .l = 11 },
+ [0xcb] = { .c = 0x7c, .l = 11 },
+ [0xcc] = { .c = 0x7c, .l = 11 },
+ [0xcd] = { .c = 0x7c, .l = 11 },
+ [0xce] = { .c = 0x7c, .l = 11 },
+ [0xcf] = { .c = 0x7c, .l = 11 },
+ [0xd0] = { .c = 0x23, .l = 12 },
+ [0xd1] = { .c = 0x23, .l = 12 },
+ [0xd2] = { .c = 0x23, .l = 12 },
+ [0xd3] = { .c = 0x23, .l = 12 },
+ [0xd4] = { .c = 0x23, .l = 12 },
+ [0xd5] = { .c = 0x23, .l = 12 },
+ [0xd6] = { .c = 0x23, .l = 12 },
+ [0xd7] = { .c = 0x23, .l = 12 },
+ [0xd8] = { .c = 0x3e, .l = 12 },
+ [0xd9] = { .c = 0x3e, .l = 12 },
+ [0xda] = { .c = 0x3e, .l = 12 },
+ [0xdb] = { .c = 0x3e, .l = 12 },
+ [0xdc] = { .c = 0x3e, .l = 12 },
+ [0xdd] = { .c = 0x3e, .l = 12 },
+ [0xde] = { .c = 0x3e, .l = 12 },
+ [0xdf] = { .c = 0x3e, .l = 12 },
+ [0xe0] = { .c = 0x00, .l = 13 },
+ [0xe1] = { .c = 0x00, .l = 13 },
+ [0xe2] = { .c = 0x00, .l = 13 },
+ [0xe3] = { .c = 0x00, .l = 13 },
+ [0xe4] = { .c = 0x24, .l = 13 },
+ [0xe5] = { .c = 0x24, .l = 13 },
+ [0xe6] = { .c = 0x24, .l = 13 },
+ [0xe7] = { .c = 0x24, .l = 13 },
+ [0xe8] = { .c = 0x40, .l = 13 },
+ [0xe9] = { .c = 0x40, .l = 13 },
+ [0xea] = { .c = 0x40, .l = 13 },
+ [0xeb] = { .c = 0x40, .l = 13 },
+ [0xec] = { .c = 0x5b, .l = 13 },
+ [0xed] = { .c = 0x5b, .l = 13 },
+ [0xee] = { .c = 0x5b, .l = 13 },
+ [0xef] = { .c = 0x5b, .l = 13 },
+ [0xf0] = { .c = 0x5d, .l = 13 },
+ [0xf1] = { .c = 0x5d, .l = 13 },
+ [0xf2] = { .c = 0x5d, .l = 13 },
+ [0xf3] = { .c = 0x5d, .l = 13 },
+ [0xf4] = { .c = 0x7e, .l = 13 },
+ [0xf5] = { .c = 0x7e, .l = 13 },
+ [0xf6] = { .c = 0x7e, .l = 13 },
+ [0xf7] = { .c = 0x7e, .l = 13 },
+ [0xf8] = { .c = 0x5e, .l = 14 },
+ [0xf9] = { .c = 0x5e, .l = 14 },
+ [0xfa] = { .c = 0x7d, .l = 14 },
+ [0xfb] = { .c = 0x7d, .l = 14 },
+ [0xfc] = { .c = 0x3c, .l = 15 },
+ [0xfd] = { .c = 0x60, .l = 15 },
+ [0xfe] = { .c = 0x7b, .l = 15 },
+};
+
+struct rht rht_bit15_11_fe[32] = {
+ [0x00] = { .c = 0x5c, .l = 19 },
+ [0x01] = { .c = 0x5c, .l = 19 },
+ [0x02] = { .c = 0x5c, .l = 19 },
+ [0x03] = { .c = 0x5c, .l = 19 },
+ [0x04] = { .c = 0xc3, .l = 19 },
+ [0x05] = { .c = 0xc3, .l = 19 },
+ [0x06] = { .c = 0xc3, .l = 19 },
+ [0x07] = { .c = 0xc3, .l = 19 },
+ [0x08] = { .c = 0xd0, .l = 19 },
+ [0x09] = { .c = 0xd0, .l = 19 },
+ [0x0a] = { .c = 0xd0, .l = 19 },
+ [0x0b] = { .c = 0xd0, .l = 19 },
+ [0x0c] = { .c = 0x80, .l = 20 },
+ [0x0d] = { .c = 0x80, .l = 20 },
+ [0x0e] = { .c = 0x82, .l = 20 },
+ [0x0f] = { .c = 0x82, .l = 20 },
+ [0x10] = { .c = 0x83, .l = 20 },
+ [0x11] = { .c = 0x83, .l = 20 },
+ [0x12] = { .c = 0xa2, .l = 20 },
+ [0x13] = { .c = 0xa2, .l = 20 },
+ [0x14] = { .c = 0xb8, .l = 20 },
+ [0x15] = { .c = 0xb8, .l = 20 },
+ [0x16] = { .c = 0xc2, .l = 20 },
+ [0x17] = { .c = 0xc2, .l = 20 },
+ [0x18] = { .c = 0xe0, .l = 20 },
+ [0x19] = { .c = 0xe0, .l = 20 },
+ [0x1a] = { .c = 0xe2, .l = 20 },
+ [0x1b] = { .c = 0xe2, .l = 20 },
+ [0x1c] = { .c = 0x99, .l = 21 },
+ [0x1d] = { .c = 0xa1, .l = 21 },
+ [0x1e] = { .c = 0xa7, .l = 21 },
+ [0x1f] = { .c = 0xac, .l = 21 },
+};
+
+struct rht rht_bit15_8[256] = {
+ [0x00] = { .c = 0xb0, .l = 21 },
+ [0x01] = { .c = 0xb0, .l = 21 },
+ [0x02] = { .c = 0xb0, .l = 21 },
+ [0x03] = { .c = 0xb0, .l = 21 },
+ [0x04] = { .c = 0xb0, .l = 21 },
+ [0x05] = { .c = 0xb0, .l = 21 },
+ [0x06] = { .c = 0xb0, .l = 21 },
+ [0x07] = { .c = 0xb0, .l = 21 },
+ [0x08] = { .c = 0xb1, .l = 21 },
+ [0x09] = { .c = 0xb1, .l = 21 },
+ [0x0a] = { .c = 0xb1, .l = 21 },
+ [0x0b] = { .c = 0xb1, .l = 21 },
+ [0x0c] = { .c = 0xb1, .l = 21 },
+ [0x0d] = { .c = 0xb1, .l = 21 },
+ [0x0e] = { .c = 0xb1, .l = 21 },
+ [0x0f] = { .c = 0xb1, .l = 21 },
+ [0x10] = { .c = 0xb3, .l = 21 },
+ [0x11] = { .c = 0xb3, .l = 21 },
+ [0x12] = { .c = 0xb3, .l = 21 },
+ [0x13] = { .c = 0xb3, .l = 21 },
+ [0x14] = { .c = 0xb3, .l = 21 },
+ [0x15] = { .c = 0xb3, .l = 21 },
+ [0x16] = { .c = 0xb3, .l = 21 },
+ [0x17] = { .c = 0xb3, .l = 21 },
+ [0x18] = { .c = 0xd1, .l = 21 },
+ [0x19] = { .c = 0xd1, .l = 21 },
+ [0x1a] = { .c = 0xd1, .l = 21 },
+ [0x1b] = { .c = 0xd1, .l = 21 },
+ [0x1c] = { .c = 0xd1, .l = 21 },
+ [0x1d] = { .c = 0xd1, .l = 21 },
+ [0x1e] = { .c = 0xd1, .l = 21 },
+ [0x1f] = { .c = 0xd1, .l = 21 },
+ [0x20] = { .c = 0xd8, .l = 21 },
+ [0x21] = { .c = 0xd8, .l = 21 },
+ [0x22] = { .c = 0xd8, .l = 21 },
+ [0x23] = { .c = 0xd8, .l = 21 },
+ [0x24] = { .c = 0xd8, .l = 21 },
+ [0x25] = { .c = 0xd8, .l = 21 },
+ [0x26] = { .c = 0xd8, .l = 21 },
+ [0x27] = { .c = 0xd8, .l = 21 },
+ [0x28] = { .c = 0xd9, .l = 21 },
+ [0x29] = { .c = 0xd9, .l = 21 },
+ [0x2a] = { .c = 0xd9, .l = 21 },
+ [0x2b] = { .c = 0xd9, .l = 21 },
+ [0x2c] = { .c = 0xd9, .l = 21 },
+ [0x2d] = { .c = 0xd9, .l = 21 },
+ [0x2e] = { .c = 0xd9, .l = 21 },
+ [0x2f] = { .c = 0xd9, .l = 21 },
+ [0x30] = { .c = 0xe3, .l = 21 },
+ [0x31] = { .c = 0xe3, .l = 21 },
+ [0x32] = { .c = 0xe3, .l = 21 },
+ [0x33] = { .c = 0xe3, .l = 21 },
+ [0x34] = { .c = 0xe3, .l = 21 },
+ [0x35] = { .c = 0xe3, .l = 21 },
+ [0x36] = { .c = 0xe3, .l = 21 },
+ [0x37] = { .c = 0xe3, .l = 21 },
+ [0x38] = { .c = 0xe5, .l = 21 },
+ [0x39] = { .c = 0xe5, .l = 21 },
+ [0x3a] = { .c = 0xe5, .l = 21 },
+ [0x3b] = { .c = 0xe5, .l = 21 },
+ [0x3c] = { .c = 0xe5, .l = 21 },
+ [0x3d] = { .c = 0xe5, .l = 21 },
+ [0x3e] = { .c = 0xe5, .l = 21 },
+ [0x3f] = { .c = 0xe5, .l = 21 },
+ [0x40] = { .c = 0xe6, .l = 21 },
+ [0x41] = { .c = 0xe6, .l = 21 },
+ [0x42] = { .c = 0xe6, .l = 21 },
+ [0x43] = { .c = 0xe6, .l = 21 },
+ [0x44] = { .c = 0xe6, .l = 21 },
+ [0x45] = { .c = 0xe6, .l = 21 },
+ [0x46] = { .c = 0xe6, .l = 21 },
+ [0x47] = { .c = 0xe6, .l = 21 },
+ [0x48] = { .c = 0x81, .l = 22 },
+ [0x49] = { .c = 0x81, .l = 22 },
+ [0x4a] = { .c = 0x81, .l = 22 },
+ [0x4b] = { .c = 0x81, .l = 22 },
+ [0x4c] = { .c = 0x84, .l = 22 },
+ [0x4d] = { .c = 0x84, .l = 22 },
+ [0x4e] = { .c = 0x84, .l = 22 },
+ [0x4f] = { .c = 0x84, .l = 22 },
+ [0x50] = { .c = 0x85, .l = 22 },
+ [0x51] = { .c = 0x85, .l = 22 },
+ [0x52] = { .c = 0x85, .l = 22 },
+ [0x53] = { .c = 0x85, .l = 22 },
+ [0x54] = { .c = 0x86, .l = 22 },
+ [0x55] = { .c = 0x86, .l = 22 },
+ [0x56] = { .c = 0x86, .l = 22 },
+ [0x57] = { .c = 0x86, .l = 22 },
+ [0x58] = { .c = 0x88, .l = 22 },
+ [0x59] = { .c = 0x88, .l = 22 },
+ [0x5a] = { .c = 0x88, .l = 22 },
+ [0x5b] = { .c = 0x88, .l = 22 },
+ [0x5c] = { .c = 0x92, .l = 22 },
+ [0x5d] = { .c = 0x92, .l = 22 },
+ [0x5e] = { .c = 0x92, .l = 22 },
+ [0x5f] = { .c = 0x92, .l = 22 },
+ [0x60] = { .c = 0x9a, .l = 22 },
+ [0x61] = { .c = 0x9a, .l = 22 },
+ [0x62] = { .c = 0x9a, .l = 22 },
+ [0x63] = { .c = 0x9a, .l = 22 },
+ [0x64] = { .c = 0x9c, .l = 22 },
+ [0x65] = { .c = 0x9c, .l = 22 },
+ [0x66] = { .c = 0x9c, .l = 22 },
+ [0x67] = { .c = 0x9c, .l = 22 },
+ [0x68] = { .c = 0xa0, .l = 22 },
+ [0x69] = { .c = 0xa0, .l = 22 },
+ [0x6a] = { .c = 0xa0, .l = 22 },
+ [0x6b] = { .c = 0xa0, .l = 22 },
+ [0x6c] = { .c = 0xa3, .l = 22 },
+ [0x6d] = { .c = 0xa3, .l = 22 },
+ [0x6e] = { .c = 0xa3, .l = 22 },
+ [0x6f] = { .c = 0xa3, .l = 22 },
+ [0x70] = { .c = 0xa4, .l = 22 },
+ [0x71] = { .c = 0xa4, .l = 22 },
+ [0x72] = { .c = 0xa4, .l = 22 },
+ [0x73] = { .c = 0xa4, .l = 22 },
+ [0x74] = { .c = 0xa9, .l = 22 },
+ [0x75] = { .c = 0xa9, .l = 22 },
+ [0x76] = { .c = 0xa9, .l = 22 },
+ [0x77] = { .c = 0xa9, .l = 22 },
+ [0x78] = { .c = 0xaa, .l = 22 },
+ [0x79] = { .c = 0xaa, .l = 22 },
+ [0x7a] = { .c = 0xaa, .l = 22 },
+ [0x7b] = { .c = 0xaa, .l = 22 },
+ [0x7c] = { .c = 0xad, .l = 22 },
+ [0x7d] = { .c = 0xad, .l = 22 },
+ [0x7e] = { .c = 0xad, .l = 22 },
+ [0x7f] = { .c = 0xad, .l = 22 },
+ [0x80] = { .c = 0xb2, .l = 22 },
+ [0x81] = { .c = 0xb2, .l = 22 },
+ [0x82] = { .c = 0xb2, .l = 22 },
+ [0x83] = { .c = 0xb2, .l = 22 },
+ [0x84] = { .c = 0xb5, .l = 22 },
+ [0x85] = { .c = 0xb5, .l = 22 },
+ [0x86] = { .c = 0xb5, .l = 22 },
+ [0x87] = { .c = 0xb5, .l = 22 },
+ [0x88] = { .c = 0xb9, .l = 22 },
+ [0x89] = { .c = 0xb9, .l = 22 },
+ [0x8a] = { .c = 0xb9, .l = 22 },
+ [0x8b] = { .c = 0xb9, .l = 22 },
+ [0x8c] = { .c = 0xba, .l = 22 },
+ [0x8d] = { .c = 0xba, .l = 22 },
+ [0x8e] = { .c = 0xba, .l = 22 },
+ [0x8f] = { .c = 0xba, .l = 22 },
+ [0x90] = { .c = 0xbb, .l = 22 },
+ [0x91] = { .c = 0xbb, .l = 22 },
+ [0x92] = { .c = 0xbb, .l = 22 },
+ [0x93] = { .c = 0xbb, .l = 22 },
+ [0x94] = { .c = 0xbd, .l = 22 },
+ [0x95] = { .c = 0xbd, .l = 22 },
+ [0x96] = { .c = 0xbd, .l = 22 },
+ [0x97] = { .c = 0xbd, .l = 22 },
+ [0x98] = { .c = 0xbe, .l = 22 },
+ [0x99] = { .c = 0xbe, .l = 22 },
+ [0x9a] = { .c = 0xbe, .l = 22 },
+ [0x9b] = { .c = 0xbe, .l = 22 },
+ [0x9c] = { .c = 0xc4, .l = 22 },
+ [0x9d] = { .c = 0xc4, .l = 22 },
+ [0x9e] = { .c = 0xc4, .l = 22 },
+ [0x9f] = { .c = 0xc4, .l = 22 },
+ [0xa0] = { .c = 0xc6, .l = 22 },
+ [0xa1] = { .c = 0xc6, .l = 22 },
+ [0xa2] = { .c = 0xc6, .l = 22 },
+ [0xa3] = { .c = 0xc6, .l = 22 },
+ [0xa4] = { .c = 0xe4, .l = 22 },
+ [0xa5] = { .c = 0xe4, .l = 22 },
+ [0xa6] = { .c = 0xe4, .l = 22 },
+ [0xa7] = { .c = 0xe4, .l = 22 },
+ [0xa8] = { .c = 0xe8, .l = 22 },
+ [0xa9] = { .c = 0xe8, .l = 22 },
+ [0xaa] = { .c = 0xe8, .l = 22 },
+ [0xab] = { .c = 0xe8, .l = 22 },
+ [0xac] = { .c = 0xe9, .l = 22 },
+ [0xad] = { .c = 0xe9, .l = 22 },
+ [0xae] = { .c = 0xe9, .l = 22 },
+ [0xaf] = { .c = 0xe9, .l = 22 },
+ [0xb0] = { .c = 0x01, .l = 23 },
+ [0xb1] = { .c = 0x01, .l = 23 },
+ [0xb2] = { .c = 0x87, .l = 23 },
+ [0xb3] = { .c = 0x87, .l = 23 },
+ [0xb4] = { .c = 0x89, .l = 23 },
+ [0xb5] = { .c = 0x89, .l = 23 },
+ [0xb6] = { .c = 0x8a, .l = 23 },
+ [0xb7] = { .c = 0x8a, .l = 23 },
+ [0xb8] = { .c = 0x8b, .l = 23 },
+ [0xb9] = { .c = 0x8b, .l = 23 },
+ [0xba] = { .c = 0x8c, .l = 23 },
+ [0xbb] = { .c = 0x8c, .l = 23 },
+ [0xbc] = { .c = 0x8d, .l = 23 },
+ [0xbd] = { .c = 0x8d, .l = 23 },
+ [0xbe] = { .c = 0x8f, .l = 23 },
+ [0xbf] = { .c = 0x8f, .l = 23 },
+ [0xc0] = { .c = 0x93, .l = 23 },
+ [0xc1] = { .c = 0x93, .l = 23 },
+ [0xc2] = { .c = 0x95, .l = 23 },
+ [0xc3] = { .c = 0x95, .l = 23 },
+ [0xc4] = { .c = 0x96, .l = 23 },
+ [0xc5] = { .c = 0x96, .l = 23 },
+ [0xc6] = { .c = 0x97, .l = 23 },
+ [0xc7] = { .c = 0x97, .l = 23 },
+ [0xc8] = { .c = 0x98, .l = 23 },
+ [0xc9] = { .c = 0x98, .l = 23 },
+ [0xca] = { .c = 0x9b, .l = 23 },
+ [0xcb] = { .c = 0x9b, .l = 23 },
+ [0xcc] = { .c = 0x9d, .l = 23 },
+ [0xcd] = { .c = 0x9d, .l = 23 },
+ [0xce] = { .c = 0x9e, .l = 23 },
+ [0xcf] = { .c = 0x9e, .l = 23 },
+ [0xd0] = { .c = 0xa5, .l = 23 },
+ [0xd1] = { .c = 0xa5, .l = 23 },
+ [0xd2] = { .c = 0xa6, .l = 23 },
+ [0xd3] = { .c = 0xa6, .l = 23 },
+ [0xd4] = { .c = 0xa8, .l = 23 },
+ [0xd5] = { .c = 0xa8, .l = 23 },
+ [0xd6] = { .c = 0xae, .l = 23 },
+ [0xd7] = { .c = 0xae, .l = 23 },
+ [0xd8] = { .c = 0xaf, .l = 23 },
+ [0xd9] = { .c = 0xaf, .l = 23 },
+ [0xda] = { .c = 0xb4, .l = 23 },
+ [0xdb] = { .c = 0xb4, .l = 23 },
+ [0xdc] = { .c = 0xb6, .l = 23 },
+ [0xdd] = { .c = 0xb6, .l = 23 },
+ [0xde] = { .c = 0xb7, .l = 23 },
+ [0xdf] = { .c = 0xb7, .l = 23 },
+ [0xe0] = { .c = 0xbc, .l = 23 },
+ [0xe1] = { .c = 0xbc, .l = 23 },
+ [0xe2] = { .c = 0xbf, .l = 23 },
+ [0xe3] = { .c = 0xbf, .l = 23 },
+ [0xe4] = { .c = 0xc5, .l = 23 },
+ [0xe5] = { .c = 0xc5, .l = 23 },
+ [0xe6] = { .c = 0xe7, .l = 23 },
+ [0xe7] = { .c = 0xe7, .l = 23 },
+ [0xe8] = { .c = 0xef, .l = 23 },
+ [0xe9] = { .c = 0xef, .l = 23 },
+ [0xea] = { .c = 0x09, .l = 24 },
+ [0xeb] = { .c = 0x8e, .l = 24 },
+ [0xec] = { .c = 0x90, .l = 24 },
+ [0xed] = { .c = 0x91, .l = 24 },
+ [0xee] = { .c = 0x94, .l = 24 },
+ [0xef] = { .c = 0x9f, .l = 24 },
+ [0xf0] = { .c = 0xab, .l = 24 },
+ [0xf1] = { .c = 0xce, .l = 24 },
+ [0xf2] = { .c = 0xd7, .l = 24 },
+ [0xf3] = { .c = 0xe1, .l = 24 },
+ [0xf4] = { .c = 0xec, .l = 24 },
+ [0xf5] = { .c = 0xed, .l = 24 },
+};
+
+struct rht rht_bit11_4[256] = {
+ [0x60] = { .c = 0xc7, .l = 25 },
+ [0x61] = { .c = 0xc7, .l = 25 },
+ [0x62] = { .c = 0xc7, .l = 25 },
+ [0x63] = { .c = 0xc7, .l = 25 },
+ [0x64] = { .c = 0xc7, .l = 25 },
+ [0x65] = { .c = 0xc7, .l = 25 },
+ [0x66] = { .c = 0xc7, .l = 25 },
+ [0x67] = { .c = 0xc7, .l = 25 },
+ [0x68] = { .c = 0xcf, .l = 25 },
+ [0x69] = { .c = 0xcf, .l = 25 },
+ [0x6a] = { .c = 0xcf, .l = 25 },
+ [0x6b] = { .c = 0xcf, .l = 25 },
+ [0x6c] = { .c = 0xcf, .l = 25 },
+ [0x6d] = { .c = 0xcf, .l = 25 },
+ [0x6e] = { .c = 0xcf, .l = 25 },
+ [0x6f] = { .c = 0xcf, .l = 25 },
+ [0x70] = { .c = 0xea, .l = 25 },
+ [0x71] = { .c = 0xea, .l = 25 },
+ [0x72] = { .c = 0xea, .l = 25 },
+ [0x73] = { .c = 0xea, .l = 25 },
+ [0x74] = { .c = 0xea, .l = 25 },
+ [0x75] = { .c = 0xea, .l = 25 },
+ [0x76] = { .c = 0xea, .l = 25 },
+ [0x77] = { .c = 0xea, .l = 25 },
+ [0x78] = { .c = 0xeb, .l = 25 },
+ [0x79] = { .c = 0xeb, .l = 25 },
+ [0x7a] = { .c = 0xeb, .l = 25 },
+ [0x7b] = { .c = 0xeb, .l = 25 },
+ [0x7c] = { .c = 0xeb, .l = 25 },
+ [0x7d] = { .c = 0xeb, .l = 25 },
+ [0x7e] = { .c = 0xeb, .l = 25 },
+ [0x7f] = { .c = 0xeb, .l = 25 },
+ [0x80] = { .c = 0xc0, .l = 26 },
+ [0x81] = { .c = 0xc0, .l = 26 },
+ [0x82] = { .c = 0xc0, .l = 26 },
+ [0x83] = { .c = 0xc0, .l = 26 },
+ [0x84] = { .c = 0xc1, .l = 26 },
+ [0x85] = { .c = 0xc1, .l = 26 },
+ [0x86] = { .c = 0xc1, .l = 26 },
+ [0x87] = { .c = 0xc1, .l = 26 },
+ [0x88] = { .c = 0xc8, .l = 26 },
+ [0x89] = { .c = 0xc8, .l = 26 },
+ [0x8a] = { .c = 0xc8, .l = 26 },
+ [0x8b] = { .c = 0xc8, .l = 26 },
+ [0x8c] = { .c = 0xc9, .l = 26 },
+ [0x8d] = { .c = 0xc9, .l = 26 },
+ [0x8e] = { .c = 0xc9, .l = 26 },
+ [0x8f] = { .c = 0xc9, .l = 26 },
+ [0x90] = { .c = 0xca, .l = 26 },
+ [0x91] = { .c = 0xca, .l = 26 },
+ [0x92] = { .c = 0xca, .l = 26 },
+ [0x93] = { .c = 0xca, .l = 26 },
+ [0x94] = { .c = 0xcd, .l = 26 },
+ [0x95] = { .c = 0xcd, .l = 26 },
+ [0x96] = { .c = 0xcd, .l = 26 },
+ [0x97] = { .c = 0xcd, .l = 26 },
+ [0x98] = { .c = 0xd2, .l = 26 },
+ [0x99] = { .c = 0xd2, .l = 26 },
+ [0x9a] = { .c = 0xd2, .l = 26 },
+ [0x9b] = { .c = 0xd2, .l = 26 },
+ [0x9c] = { .c = 0xd5, .l = 26 },
+ [0x9d] = { .c = 0xd5, .l = 26 },
+ [0x9e] = { .c = 0xd5, .l = 26 },
+ [0x9f] = { .c = 0xd5, .l = 26 },
+ [0xa0] = { .c = 0xda, .l = 26 },
+ [0xa1] = { .c = 0xda, .l = 26 },
+ [0xa2] = { .c = 0xda, .l = 26 },
+ [0xa3] = { .c = 0xda, .l = 26 },
+ [0xa4] = { .c = 0xdb, .l = 26 },
+ [0xa5] = { .c = 0xdb, .l = 26 },
+ [0xa6] = { .c = 0xdb, .l = 26 },
+ [0xa7] = { .c = 0xdb, .l = 26 },
+ [0xa8] = { .c = 0xee, .l = 26 },
+ [0xa9] = { .c = 0xee, .l = 26 },
+ [0xaa] = { .c = 0xee, .l = 26 },
+ [0xab] = { .c = 0xee, .l = 26 },
+ [0xac] = { .c = 0xf0, .l = 26 },
+ [0xad] = { .c = 0xf0, .l = 26 },
+ [0xae] = { .c = 0xf0, .l = 26 },
+ [0xaf] = { .c = 0xf0, .l = 26 },
+ [0xb0] = { .c = 0xf2, .l = 26 },
+ [0xb1] = { .c = 0xf2, .l = 26 },
+ [0xb2] = { .c = 0xf2, .l = 26 },
+ [0xb3] = { .c = 0xf2, .l = 26 },
+ [0xb4] = { .c = 0xf3, .l = 26 },
+ [0xb5] = { .c = 0xf3, .l = 26 },
+ [0xb6] = { .c = 0xf3, .l = 26 },
+ [0xb7] = { .c = 0xf3, .l = 26 },
+ [0xb8] = { .c = 0xff, .l = 26 },
+ [0xb9] = { .c = 0xff, .l = 26 },
+ [0xba] = { .c = 0xff, .l = 26 },
+ [0xbb] = { .c = 0xff, .l = 26 },
+ [0xbc] = { .c = 0xcb, .l = 27 },
+ [0xbd] = { .c = 0xcb, .l = 27 },
+ [0xbe] = { .c = 0xcc, .l = 27 },
+ [0xbf] = { .c = 0xcc, .l = 27 },
+ [0xc0] = { .c = 0xd3, .l = 27 },
+ [0xc1] = { .c = 0xd3, .l = 27 },
+ [0xc2] = { .c = 0xd4, .l = 27 },
+ [0xc3] = { .c = 0xd4, .l = 27 },
+ [0xc4] = { .c = 0xd6, .l = 27 },
+ [0xc5] = { .c = 0xd6, .l = 27 },
+ [0xc6] = { .c = 0xdd, .l = 27 },
+ [0xc7] = { .c = 0xdd, .l = 27 },
+ [0xc8] = { .c = 0xde, .l = 27 },
+ [0xc9] = { .c = 0xde, .l = 27 },
+ [0xca] = { .c = 0xdf, .l = 27 },
+ [0xcb] = { .c = 0xdf, .l = 27 },
+ [0xcc] = { .c = 0xf1, .l = 27 },
+ [0xcd] = { .c = 0xf1, .l = 27 },
+ [0xce] = { .c = 0xf4, .l = 27 },
+ [0xcf] = { .c = 0xf4, .l = 27 },
+ [0xd0] = { .c = 0xf5, .l = 27 },
+ [0xd1] = { .c = 0xf5, .l = 27 },
+ [0xd2] = { .c = 0xf6, .l = 27 },
+ [0xd3] = { .c = 0xf6, .l = 27 },
+ [0xd4] = { .c = 0xf7, .l = 27 },
+ [0xd5] = { .c = 0xf7, .l = 27 },
+ [0xd6] = { .c = 0xf8, .l = 27 },
+ [0xd7] = { .c = 0xf8, .l = 27 },
+ [0xd8] = { .c = 0xfa, .l = 27 },
+ [0xd9] = { .c = 0xfa, .l = 27 },
+ [0xda] = { .c = 0xfb, .l = 27 },
+ [0xdb] = { .c = 0xfb, .l = 27 },
+ [0xdc] = { .c = 0xfc, .l = 27 },
+ [0xdd] = { .c = 0xfc, .l = 27 },
+ [0xde] = { .c = 0xfd, .l = 27 },
+ [0xdf] = { .c = 0xfd, .l = 27 },
+ [0xe0] = { .c = 0xfe, .l = 27 },
+ [0xe1] = { .c = 0xfe, .l = 27 },
+ [0xe2] = { .c = 0x02, .l = 28 },
+ [0xe3] = { .c = 0x03, .l = 28 },
+ [0xe4] = { .c = 0x04, .l = 28 },
+ [0xe5] = { .c = 0x05, .l = 28 },
+ [0xe6] = { .c = 0x06, .l = 28 },
+ [0xe7] = { .c = 0x07, .l = 28 },
+ [0xe8] = { .c = 0x08, .l = 28 },
+ [0xe9] = { .c = 0x0b, .l = 28 },
+ [0xea] = { .c = 0x0c, .l = 28 },
+ [0xeb] = { .c = 0x0e, .l = 28 },
+ [0xec] = { .c = 0x0f, .l = 28 },
+ [0xed] = { .c = 0x10, .l = 28 },
+ [0xee] = { .c = 0x11, .l = 28 },
+ [0xef] = { .c = 0x12, .l = 28 },
+ [0xf0] = { .c = 0x13, .l = 28 },
+ [0xf1] = { .c = 0x14, .l = 28 },
+ [0xf2] = { .c = 0x15, .l = 28 },
+ [0xf3] = { .c = 0x17, .l = 28 },
+ [0xf4] = { .c = 0x18, .l = 28 },
+ [0xf5] = { .c = 0x19, .l = 28 },
+ [0xf6] = { .c = 0x1a, .l = 28 },
+ [0xf7] = { .c = 0x1b, .l = 28 },
+ [0xf8] = { .c = 0x1c, .l = 28 },
+ [0xf9] = { .c = 0x1d, .l = 28 },
+ [0xfa] = { .c = 0x1e, .l = 28 },
+ [0xfb] = { .c = 0x1f, .l = 28 },
+ [0xfc] = { .c = 0x7f, .l = 28 },
+ [0xfd] = { .c = 0xdc, .l = 28 },
+ [0xfe] = { .c = 0xf9, .l = 28 },
+ [0xff] = { .c = 0x0a, .l = 30 },
+ /* Note, when l==30, bits 2..3 give 00:0x0a, 01:0x0d, 10:0x16, 11:EOS */
+};
+
+/* huffman-encode string <s> into the huff_tmp buffer and returns the amount
+ * of output bytes. The caller must ensure the output is large enough (ie at
+ * least 4 times as long as s).
+ *
+ * FIXME: bits are only counted for now, no code is emitted!
+ */
+int huff_enc(const char *s, char *out)
+{
+ int bits = 0;
+
+ while (*s) {
+ bits += ht[(uint8_t)*s].b;
+ s++;
+ }
+ bits += 7;
+
+ /* FIXME: huffman code is not emitted yet. */
+ //memset(out, 'H', bits / 8);
+ return bits / 8;
+}
+
+/* pass a huffman string, it will decode it and return the new output size or
+ * -1 in case of error.
+ *
+ * The principle of the decoder is to lookup full bytes in reverse-huffman
+ * tables. Since we may need up to 30 bits and the word positions are not
+ * always multiples of 8, we build the code word by shifting the "current"
+ * 32-bit word and the "next" one of the appropriate amount of bits. Once
+ * the shift goes beyond 32, words are swapped and the "next" one is refilled
+ * with new bytes. Shift operations are cheap when done a single time like this.
+ * On 64-bit platforms it is possible to further improve this by storing both
+ * of them in a single word.
+ */
+int huff_dec(const uint8_t *huff, int hlen, char *out, int olen)
+{
+ char *out_start = out;
+ char *out_end = out + olen;
+ const uint8_t *huff_end = huff + hlen;
+ uint32_t curr = 0;
+ uint32_t next = 0;
+ uint32_t shift;
+ uint32_t code; /* The 30-bit code being looked up, MSB-aligned */
+ uint8_t sym;
+ int bleft; /* bits left */
+ int l;
+
+ code = 0;
+ shift = 64; // start with an empty buffer
+ bleft = hlen << 3;
+ while (bleft > 0 && out != out_end) {
+ while (shift >= 32) {
+ curr = next;
+
+ /* read up to 4 bytes into next */
+ next = 0;
+
+ if (huff + 4 <= huff_end) {
+ next = read_n32(huff);
+ huff += 4;
+ }
+ else {
+ /* note: we append 0 and not 0xff so that we can
+ * distinguish shifted bits from a really inserted
+ * EOS.
+ */
+ next = (((huff + 0 < huff_end) ? (uint32_t)huff[0] : 0x00) << 24) +
+ (((huff + 1 < huff_end) ? (uint32_t)huff[1] : 0x00) << 16) +
+ (((huff + 2 < huff_end) ? (uint32_t)huff[2] : 0x00) << 8) +
+ ((huff + 3 < huff_end) ? (uint32_t)huff[3] : 0x00);
+ huff = huff_end;
+ }
+
+ shift -= 32;
+ }
+
+ /* curr:next contain 64 bit of huffman code */
+ code = curr;
+ if (shift)
+ code = (code << shift) + (next >> (32 - shift));
+
+ /* now we necessarily have 32 bits available */
+ if ((code >> 24) < 0xfe) {
+ /* single byte */
+ l = rht_bit31_24[code >> 24].l;
+ sym = rht_bit31_24[code >> 24].c;
+ }
+ else if (((code >> 17) & 0xff) < 0xff) {
+ /* two bytes, 0xfe + 2 bits or 0xff + 2..7 bits */
+ l = rht_bit24_17[(code >> 17) & 0xff].l;
+ sym = rht_bit24_17[(code >> 17) & 0xff].c;
+ }
+ else if (((code >> 16) & 0xff) < 0xff) { /* 3..5 bits */
+ /* 0xff + 0xfe + 3..5 bits or
+ * 0xff + 0xff + 5..8 bits for values till 0xf5
+ */
+ l = rht_bit15_11_fe[(code >> 11) & 0x1f].l;
+ sym = rht_bit15_11_fe[(code >> 11) & 0x1f].c;
+ }
+ else if (((code >> 8) & 0xff) < 0xf6) { /* 5..8 bits */
+ /* that's 0xff + 0xff */
+ l = rht_bit15_8[(code >> 8) & 0xff].l;
+ sym = rht_bit15_8[(code >> 8) & 0xff].c;
+ }
+ else {
+ /* 0xff 0xff 0xf6..0xff */
+ l = rht_bit11_4[(code >> 4) & 0xff].l;
+ if (l < 30)
+ sym = rht_bit11_4[(code >> 4) & 0xff].c;
+ else if ((code & 0xff) == 0xf0)
+ sym = 10;
+ else if ((code & 0xff) == 0xf4)
+ sym = 13;
+ else if ((code & 0xff) == 0xf8)
+ sym = 22;
+ else { // 0xfc : EOS
+ break;
+ }
+ }
+
+ if (!l || bleft - l < 0)
+ break;
+
+ bleft -= l;
+ shift += l;
+ *out++ = sym;
+ }
+
+ if (bleft > 0) {
+ /* some bits were not consumed after the last code, they must
+ * match EOS (ie: all ones) and there must be 7 bits or less.
+ * (7541#5.2).
+ */
+ if (bleft > 7)
+ return -1;
+
+ if ((code & -(1 << (32 - bleft))) != (uint32_t)-(1 << (32 - bleft)))
+ return -1;
+ }
+
+ if (out < out_end)
+ *out = 0; // end of string whenever possible
+ return out - out_start;
+}
diff --git a/src/hpack-tbl.c b/src/hpack-tbl.c
new file mode 100644
index 0000000..990d2f7
--- /dev/null
+++ b/src/hpack-tbl.c
@@ -0,0 +1,372 @@
+/*
+ * HPACK header table management (RFC7541)
+ *
+ * Copyright (C) 2014-2017 Willy Tarreau <willy@haproxy.org>
+ * Copyright (C) 2017 HAProxy Technologies
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <import/ist.h>
+#include <haproxy/hpack-huff.h>
+#include <haproxy/hpack-tbl.h>
+
+/* static header table as in RFC7541 Appendix A. [0] unused. */
+const struct http_hdr hpack_sht[HPACK_SHT_SIZE] = {
+ [ 1] = { .n = IST(":authority"), .v = IST("") },
+ [ 2] = { .n = IST(":method"), .v = IST("GET") },
+ [ 3] = { .n = IST(":method"), .v = IST("POST") },
+ [ 4] = { .n = IST(":path"), .v = IST("/") },
+ [ 5] = { .n = IST(":path"), .v = IST("/index.html") },
+ [ 6] = { .n = IST(":scheme"), .v = IST("http") },
+ [ 7] = { .n = IST(":scheme"), .v = IST("https") },
+ [ 8] = { .n = IST(":status"), .v = IST("200") },
+ [ 9] = { .n = IST(":status"), .v = IST("204") },
+ [10] = { .n = IST(":status"), .v = IST("206") },
+ [11] = { .n = IST(":status"), .v = IST("304") },
+ [12] = { .n = IST(":status"), .v = IST("400") },
+ [13] = { .n = IST(":status"), .v = IST("404") },
+ [14] = { .n = IST(":status"), .v = IST("500") },
+ [15] = { .n = IST("accept-charset"), .v = IST("") },
+ [16] = { .n = IST("accept-encoding"), .v = IST("gzip, deflate") },
+ [17] = { .n = IST("accept-language"), .v = IST("") },
+ [18] = { .n = IST("accept-ranges"), .v = IST("") },
+ [19] = { .n = IST("accept"), .v = IST("") },
+ [20] = { .n = IST("access-control-allow-origin"), .v = IST("") },
+ [21] = { .n = IST("age"), .v = IST("") },
+ [22] = { .n = IST("allow"), .v = IST("") },
+ [23] = { .n = IST("authorization"), .v = IST("") },
+ [24] = { .n = IST("cache-control"), .v = IST("") },
+ [25] = { .n = IST("content-disposition"), .v = IST("") },
+ [26] = { .n = IST("content-encoding"), .v = IST("") },
+ [27] = { .n = IST("content-language"), .v = IST("") },
+ [28] = { .n = IST("content-length"), .v = IST("") },
+ [29] = { .n = IST("content-location"), .v = IST("") },
+ [30] = { .n = IST("content-range"), .v = IST("") },
+ [31] = { .n = IST("content-type") , .v = IST("") },
+ [32] = { .n = IST("cookie"), .v = IST("") },
+ [33] = { .n = IST("date"), .v = IST("") },
+ [34] = { .n = IST("etag"), .v = IST("") },
+ [35] = { .n = IST("expect"), .v = IST("") },
+ [36] = { .n = IST("expires"), .v = IST("") },
+ [37] = { .n = IST("from"), .v = IST("") },
+ [38] = { .n = IST("host"), .v = IST("") },
+ [39] = { .n = IST("if-match"), .v = IST("") },
+ [40] = { .n = IST("if-modified-since"), .v = IST("") },
+ [41] = { .n = IST("if-none-match"), .v = IST("") },
+ [42] = { .n = IST("if-range"), .v = IST("") },
+ [43] = { .n = IST("if-unmodified-since"), .v = IST("") },
+ [44] = { .n = IST("last-modified"), .v = IST("") },
+ [45] = { .n = IST("link"), .v = IST("") },
+ [46] = { .n = IST("location"), .v = IST("") },
+ [47] = { .n = IST("max-forwards"), .v = IST("") },
+ [48] = { .n = IST("proxy-authenticate"), .v = IST("") },
+ [49] = { .n = IST("proxy-authorization"), .v = IST("") },
+ [50] = { .n = IST("range"), .v = IST("") },
+ [51] = { .n = IST("referer"), .v = IST("") },
+ [52] = { .n = IST("refresh"), .v = IST("") },
+ [53] = { .n = IST("retry-after"), .v = IST("") },
+ [54] = { .n = IST("server"), .v = IST("") },
+ [55] = { .n = IST("set-cookie"), .v = IST("") },
+ [56] = { .n = IST("strict-transport-security"), .v = IST("") },
+ [57] = { .n = IST("transfer-encoding"), .v = IST("") },
+ [58] = { .n = IST("user-agent"), .v = IST("") },
+ [59] = { .n = IST("vary"), .v = IST("") },
+ [60] = { .n = IST("via"), .v = IST("") },
+ [61] = { .n = IST("www-authenticate"), .v = IST("") },
+};
+
+struct pool_head *pool_head_hpack_tbl __read_mostly = NULL;
+
+#ifdef DEBUG_HPACK
+/* dump the whole dynamic header table */
+void hpack_dht_dump(FILE *out, const struct hpack_dht *dht)
+{
+ unsigned int i;
+ unsigned int slot;
+ char name[4096], value[4096];
+
+ for (i = HPACK_SHT_SIZE; i < HPACK_SHT_SIZE + dht->used; i++) {
+ slot = (hpack_get_dte(dht, i - HPACK_SHT_SIZE + 1) - dht->dte);
+ fprintf(out, "idx=%u slot=%u name=<%s> value=<%s> addr=%u-%u\n",
+ i, slot,
+ istpad(name, hpack_idx_to_name(dht, i)).ptr,
+ istpad(value, hpack_idx_to_value(dht, i)).ptr,
+ dht->dte[slot].addr, dht->dte[slot].addr+dht->dte[slot].nlen+dht->dte[slot].vlen-1);
+ }
+}
+
+/* check for the whole dynamic header table consistency, abort on failures */
+void hpack_dht_check_consistency(const struct hpack_dht *dht)
+{
+ unsigned slot = hpack_dht_get_tail(dht);
+ unsigned used2 = dht->used;
+ unsigned total = 0;
+
+ if (!dht->used)
+ return;
+
+ if (dht->front >= dht->wrap)
+ abort();
+
+ if (dht->used > dht->wrap)
+ abort();
+
+ if (dht->head >= dht->wrap)
+ abort();
+
+ while (used2--) {
+ total += dht->dte[slot].nlen + dht->dte[slot].vlen;
+ slot++;
+ if (slot >= dht->wrap)
+ slot = 0;
+ }
+
+ if (total != dht->total) {
+ fprintf(stderr, "%d: total=%u dht=%u\n", __LINE__, total, dht->total);
+ abort();
+ }
+}
+#endif // DEBUG_HPACK
+
+/* rebuild a new dynamic header table from <dht> with an unwrapped index and
+ * contents at the end. The new table is returned, the caller must not use the
+ * previous one anymore. NULL may be returned if no table could be allocated.
+ */
+static struct hpack_dht *hpack_dht_defrag(struct hpack_dht *dht)
+{
+ struct hpack_dht *alt_dht;
+ uint16_t old, new;
+ uint32_t addr;
+
+ /* Note: for small tables we could use alloca() instead but
+ * portability especially for large tables can be problematic.
+ */
+ alt_dht = hpack_dht_alloc();
+ if (!alt_dht)
+ return NULL;
+
+ alt_dht->total = dht->total;
+ alt_dht->used = dht->used;
+ alt_dht->wrap = dht->used;
+
+ new = 0;
+ addr = alt_dht->size;
+
+ if (dht->used) {
+ /* start from the tail */
+ old = hpack_dht_get_tail(dht);
+ do {
+ alt_dht->dte[new].nlen = dht->dte[old].nlen;
+ alt_dht->dte[new].vlen = dht->dte[old].vlen;
+ addr -= dht->dte[old].nlen + dht->dte[old].vlen;
+ alt_dht->dte[new].addr = addr;
+
+ memcpy((void *)alt_dht + alt_dht->dte[new].addr,
+ (void *)dht + dht->dte[old].addr,
+ dht->dte[old].nlen + dht->dte[old].vlen);
+
+ old++;
+ if (old >= dht->wrap)
+ old = 0;
+ new++;
+ } while (new < dht->used);
+ }
+
+ alt_dht->front = alt_dht->head = new - 1;
+
+ memcpy(dht, alt_dht, dht->size);
+ hpack_dht_free(alt_dht);
+
+ return dht;
+}
+
+/* Purges table dht until a header field of <needed> bytes fits according to
+ * the protocol (adding 32 bytes overhead). Returns non-zero on success, zero
+ * on failure (ie: table empty but still not sufficient). It must only be
+ * called when the table is not large enough to suit the new entry and there
+ * are some entries left. In case of doubt, use dht_make_room() instead.
+ */
+int __hpack_dht_make_room(struct hpack_dht *dht, unsigned int needed)
+{
+ unsigned int used = dht->used;
+ unsigned int wrap = dht->wrap;
+ unsigned int tail;
+
+ do {
+ tail = ((dht->head + 1U < used) ? wrap : 0) + dht->head + 1U - used;
+ dht->total -= dht->dte[tail].nlen + dht->dte[tail].vlen;
+ if (tail == dht->front)
+ dht->front = dht->head;
+ used--;
+ } while (used && used * 32 + dht->total + needed + 32 > dht->size);
+
+ dht->used = used;
+
+ /* realign if empty */
+ if (!used)
+ dht->front = dht->head = 0;
+
+ /* pack the table if it doesn't wrap anymore */
+ if (dht->head + 1U >= used)
+ dht->wrap = dht->head + 1;
+
+ /* no need to check for 'used' here as if it doesn't fit, used==0 */
+ return needed + 32 <= dht->size;
+}
+
+/* tries to insert a new header <name>:<value> in front of the current head. A
+ * negative value is returned on error.
+ */
+int hpack_dht_insert(struct hpack_dht *dht, struct ist name, struct ist value)
+{
+ unsigned int used;
+ unsigned int head;
+ unsigned int prev;
+ unsigned int wrap;
+ unsigned int tail;
+ uint32_t headroom, tailroom;
+
+ if (!hpack_dht_make_room(dht, name.len + value.len))
+ return 0;
+
+ /* Now there is enough room in the table, that's guaranteed by the
+ * protocol, but not necessarily where we need it.
+ */
+
+ used = dht->used;
+ if (!used) {
+ /* easy, the table was empty */
+ dht->front = dht->head = 0;
+ dht->wrap = dht->used = 1;
+ dht->total = 0;
+ head = 0;
+ dht->dte[head].addr = dht->size - (name.len + value.len);
+ goto copy;
+ }
+
+ /* compute the new head, used and wrap position */
+ prev = head = dht->head;
+ wrap = dht->wrap;
+ tail = hpack_dht_get_tail(dht);
+
+ used++;
+ head++;
+
+ if (head >= wrap) {
+ /* head is leading the entries, we either need to push the
+ * table further or to loop back to released entries. We could
+ * force to loop back when at least half of the allocatable
+ * entries are free but in practice it never happens.
+ */
+ if ((sizeof(*dht) + (wrap + 1) * sizeof(dht->dte[0]) <= dht->dte[dht->front].addr))
+ wrap++;
+ else if (head >= used) /* there's a hole at the beginning */
+ head = 0;
+ else {
+ /* no more room, head hits tail and the index cannot be
+ * extended, we have to realign the whole table.
+ */
+ if (!hpack_dht_defrag(dht))
+ return -1;
+
+ wrap = dht->wrap + 1;
+ head = dht->head + 1;
+ prev = head - 1;
+ tail = 0;
+ }
+ }
+ else if (used >= wrap) {
+ /* we've hit the tail, we need to reorganize the index so that
+ * the head is at the end (but not necessarily move the data).
+ */
+ if (!hpack_dht_defrag(dht))
+ return -1;
+
+ wrap = dht->wrap + 1;
+ head = dht->head + 1;
+ prev = head - 1;
+ tail = 0;
+ }
+
+ /* Now we have updated head, used and wrap, we know that there is some
+ * available room at least from the protocol's perspective. This space
+ * is split in two areas :
+ *
+ * 1: if the previous head was the front cell, the space between the
+ * end of the index table and the front cell's address.
+ * 2: if the previous head was the front cell, the space between the
+ * end of the tail and the end of the table ; or if the previous
+ * head was not the front cell, the space between the end of the
+ * tail and the head's address.
+ */
+ if (prev == dht->front) {
+ /* the area was contiguous */
+ headroom = dht->dte[dht->front].addr - (sizeof(*dht) + wrap * sizeof(dht->dte[0]));
+ tailroom = dht->size - dht->dte[tail].addr - dht->dte[tail].nlen - dht->dte[tail].vlen;
+ }
+ else {
+ /* it's already wrapped so we can't store anything in the headroom */
+ headroom = 0;
+ tailroom = dht->dte[prev].addr - dht->dte[tail].addr - dht->dte[tail].nlen - dht->dte[tail].vlen;
+ }
+
+ /* We can decide to stop filling the headroom as soon as there's enough
+ * room left in the tail to suit the protocol, but tests show that in
+ * practice it almost never happens in other situations so the extra
+ * test is useless and we simply fill the headroom as long as it's
+ * available and we don't wrap.
+ */
+ if (prev == dht->front && headroom >= name.len + value.len) {
+ /* install upfront and update ->front */
+ dht->dte[head].addr = dht->dte[dht->front].addr - (name.len + value.len);
+ dht->front = head;
+ }
+ else if (tailroom >= name.len + value.len) {
+ dht->dte[head].addr = dht->dte[tail].addr + dht->dte[tail].nlen + dht->dte[tail].vlen + tailroom - (name.len + value.len);
+ }
+ else {
+ /* need to defragment the table before inserting upfront */
+ dht = hpack_dht_defrag(dht);
+ wrap = dht->wrap + 1;
+ head = dht->head + 1;
+ dht->dte[head].addr = dht->dte[dht->front].addr - (name.len + value.len);
+ dht->front = head;
+ }
+
+ dht->wrap = wrap;
+ dht->head = head;
+ dht->used = used;
+
+ copy:
+ dht->total += name.len + value.len;
+ dht->dte[head].nlen = name.len;
+ dht->dte[head].vlen = value.len;
+
+ memcpy((void *)dht + dht->dte[head].addr, name.ptr, name.len);
+ memcpy((void *)dht + dht->dte[head].addr + name.len, value.ptr, value.len);
+ return 0;
+}
diff --git a/src/hq_interop.c b/src/hq_interop.c
new file mode 100644
index 0000000..175b92d
--- /dev/null
+++ b/src/hq_interop.c
@@ -0,0 +1,172 @@
+#include <haproxy/hq_interop.h>
+
+#include <import/ist.h>
+#include <haproxy/buf.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/htx.h>
+#include <haproxy/http.h>
+#include <haproxy/mux_quic.h>
+#include <haproxy/qmux_http.h>
+
+static ssize_t hq_interop_decode_qcs(struct qcs *qcs, struct buffer *b, int fin)
+{
+ struct htx *htx;
+ struct htx_sl *sl;
+ struct buffer htx_buf = BUF_NULL;
+ struct ist path;
+ char *ptr = b_head(b);
+ char *end = b_wrap(b);
+ size_t size = b_size(b);
+ size_t data = b_data(b);
+
+ if (!data && fin) {
+ /* FIN is notified with an empty STREAM frame. */
+ BUG_ON(!qcs->sd); /* sd must already be attached here */
+ qcs_http_handle_standalone_fin(qcs);
+ return 0;
+ }
+
+ b_alloc(&htx_buf);
+ htx = htx_from_buf(&htx_buf);
+
+ /* skip method */
+ while (data && HTTP_IS_TOKEN(*ptr)) {
+ if (++ptr == end)
+ ptr -= size;
+ data--;
+ }
+
+ if (!data || !HTTP_IS_SPHT(*ptr)) {
+ fprintf(stderr, "truncated stream\n");
+ return 0;
+ }
+
+ if (++ptr == end)
+ ptr -= size;
+
+ if (!--data) {
+ fprintf(stderr, "truncated stream\n");
+ return 0;
+ }
+
+ /* extract path */
+ BUG_ON(HTTP_IS_LWS(*ptr));
+ path.ptr = ptr;
+ while (data && !HTTP_IS_LWS(*ptr)) {
+ if (++ptr == end)
+ ptr -= size;
+ data--;
+ }
+
+ if (!data) {
+ fprintf(stderr, "truncated stream\n");
+ return 0;
+ }
+
+ BUG_ON(!HTTP_IS_LWS(*ptr));
+ path.len = ptr - path.ptr;
+
+ sl = htx_add_stline(htx, HTX_BLK_REQ_SL, 0, ist("GET"), path, ist("HTTP/1.0"));
+ if (!sl)
+ return -1;
+
+ sl->flags |= HTX_SL_F_BODYLESS;
+ sl->info.req.meth = find_http_meth("GET", 3);
+
+ htx_add_endof(htx, HTX_BLK_EOH);
+ htx_to_buf(htx, &htx_buf);
+
+ if (!qc_attach_sc(qcs, &htx_buf))
+ return -1;
+
+ b_free(&htx_buf);
+
+ if (fin)
+ htx->flags |= HTX_FL_EOM;
+
+ return b_data(b);
+}
+
+static struct buffer *mux_get_buf(struct qcs *qcs)
+{
+ if (!b_size(&qcs->tx.buf))
+ b_alloc(&qcs->tx.buf);
+
+ return &qcs->tx.buf;
+}
+
+static size_t hq_interop_snd_buf(struct qcs *qcs, struct htx *htx,
+ size_t count)
+{
+ enum htx_blk_type btype;
+ struct htx_blk *blk;
+ int32_t idx;
+ uint32_t bsize, fsize;
+ struct buffer *res, outbuf;
+ size_t total = 0;
+
+ res = mux_get_buf(qcs);
+ outbuf = b_make(b_tail(res), b_contig_space(res), 0, 0);
+
+ while (count && !htx_is_empty(htx) && !(qcs->flags & QC_SF_BLK_MROOM)) {
+ /* Not implemented : QUIC on backend side */
+ idx = htx_get_head(htx);
+ blk = htx_get_blk(htx, idx);
+ btype = htx_get_blk_type(blk);
+ fsize = bsize = htx_get_blksz(blk);
+
+ BUG_ON(btype == HTX_BLK_REQ_SL);
+
+ switch (btype) {
+ case HTX_BLK_DATA:
+ if (fsize > count)
+ fsize = count;
+
+ if (b_room(&outbuf) < fsize)
+ fsize = b_room(&outbuf);
+
+ if (!fsize) {
+ qcs->flags |= QC_SF_BLK_MROOM;
+ goto end;
+ }
+
+ b_putblk(&outbuf, htx_get_blk_ptr(htx, blk), fsize);
+ total += fsize;
+ count -= fsize;
+
+ if (fsize == bsize)
+ htx_remove_blk(htx, blk);
+ else
+ htx_cut_data_blk(htx, blk, fsize);
+ break;
+
+ /* only body is transferred on HTTP/0.9 */
+ case HTX_BLK_RES_SL:
+ case HTX_BLK_TLR:
+ case HTX_BLK_EOT:
+ default:
+ htx_remove_blk(htx, blk);
+ total += bsize;
+ count -= bsize;
+ break;
+ }
+ }
+
+ end:
+ b_add(res, b_data(&outbuf));
+
+ return total;
+}
+
+static int hq_interop_attach(struct qcs *qcs, void *conn_ctx)
+{
+ qcs_wait_http_req(qcs);
+ return 0;
+}
+
+const struct qcc_app_ops hq_interop_ops = {
+ .decode_qcs = hq_interop_decode_qcs,
+ .snd_buf = hq_interop_snd_buf,
+ .attach = hq_interop_attach,
+};
diff --git a/src/http.c b/src/http.c
new file mode 100644
index 0000000..900c0ae
--- /dev/null
+++ b/src/http.c
@@ -0,0 +1,1309 @@
+/*
+ * HTTP semantics
+ *
+ * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <haproxy/api.h>
+#include <haproxy/http.h>
+#include <haproxy/tools.h>
+
+/* It is about twice as fast on recent architectures to lookup a byte in a
+ * table than to perform a boolean AND or OR between two tests. Refer to
+ * RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
+ * neither a separator nor a CTL char. An http ver_token is any ASCII which can
+ * be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
+ * digit. Note: please do not overwrite values in assignment since gcc-2.95
+ * will not handle them correctly. It's worth noting that chars 128..255 are
+ * nothing, not even control chars.
+ */
+const unsigned char http_char_classes[256] = {
+ [ 0] = HTTP_FLG_CTL,
+ [ 1] = HTTP_FLG_CTL,
+ [ 2] = HTTP_FLG_CTL,
+ [ 3] = HTTP_FLG_CTL,
+ [ 4] = HTTP_FLG_CTL,
+ [ 5] = HTTP_FLG_CTL,
+ [ 6] = HTTP_FLG_CTL,
+ [ 7] = HTTP_FLG_CTL,
+ [ 8] = HTTP_FLG_CTL,
+ [ 9] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP | HTTP_FLG_CTL,
+ [ 10] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
+ [ 11] = HTTP_FLG_CTL,
+ [ 12] = HTTP_FLG_CTL,
+ [ 13] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
+ [ 14] = HTTP_FLG_CTL,
+ [ 15] = HTTP_FLG_CTL,
+ [ 16] = HTTP_FLG_CTL,
+ [ 17] = HTTP_FLG_CTL,
+ [ 18] = HTTP_FLG_CTL,
+ [ 19] = HTTP_FLG_CTL,
+ [ 20] = HTTP_FLG_CTL,
+ [ 21] = HTTP_FLG_CTL,
+ [ 22] = HTTP_FLG_CTL,
+ [ 23] = HTTP_FLG_CTL,
+ [ 24] = HTTP_FLG_CTL,
+ [ 25] = HTTP_FLG_CTL,
+ [ 26] = HTTP_FLG_CTL,
+ [ 27] = HTTP_FLG_CTL,
+ [ 28] = HTTP_FLG_CTL,
+ [ 29] = HTTP_FLG_CTL,
+ [ 30] = HTTP_FLG_CTL,
+ [ 31] = HTTP_FLG_CTL,
+ [' '] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP,
+ ['!'] = HTTP_FLG_TOK,
+ ['"'] = HTTP_FLG_SEP,
+ ['#'] = HTTP_FLG_TOK,
+ ['$'] = HTTP_FLG_TOK,
+ ['%'] = HTTP_FLG_TOK,
+ ['&'] = HTTP_FLG_TOK,
+ [ 39] = HTTP_FLG_TOK,
+ ['('] = HTTP_FLG_SEP,
+ [')'] = HTTP_FLG_SEP,
+ ['*'] = HTTP_FLG_TOK,
+ ['+'] = HTTP_FLG_TOK,
+ [','] = HTTP_FLG_SEP,
+ ['-'] = HTTP_FLG_TOK,
+ ['.'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['/'] = HTTP_FLG_SEP | HTTP_FLG_VER,
+ ['0'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['1'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['2'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['3'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['4'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['5'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['6'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['7'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['8'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['9'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ [':'] = HTTP_FLG_SEP,
+ [';'] = HTTP_FLG_SEP,
+ ['<'] = HTTP_FLG_SEP,
+ ['='] = HTTP_FLG_SEP,
+ ['>'] = HTTP_FLG_SEP,
+ ['?'] = HTTP_FLG_SEP,
+ ['@'] = HTTP_FLG_SEP,
+ ['A'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['B'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['C'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['D'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['E'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['F'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['G'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['H'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['I'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['J'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['K'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['L'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['M'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['N'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['O'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['P'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['Q'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['R'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['S'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['T'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['U'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['V'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['W'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['X'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['Y'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['Z'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['['] = HTTP_FLG_SEP,
+ [ 92] = HTTP_FLG_SEP,
+ [']'] = HTTP_FLG_SEP,
+ ['^'] = HTTP_FLG_TOK,
+ ['_'] = HTTP_FLG_TOK,
+ ['`'] = HTTP_FLG_TOK,
+ ['a'] = HTTP_FLG_TOK,
+ ['b'] = HTTP_FLG_TOK,
+ ['c'] = HTTP_FLG_TOK,
+ ['d'] = HTTP_FLG_TOK,
+ ['e'] = HTTP_FLG_TOK,
+ ['f'] = HTTP_FLG_TOK,
+ ['g'] = HTTP_FLG_TOK,
+ ['h'] = HTTP_FLG_TOK,
+ ['i'] = HTTP_FLG_TOK,
+ ['j'] = HTTP_FLG_TOK,
+ ['k'] = HTTP_FLG_TOK,
+ ['l'] = HTTP_FLG_TOK,
+ ['m'] = HTTP_FLG_TOK,
+ ['n'] = HTTP_FLG_TOK,
+ ['o'] = HTTP_FLG_TOK,
+ ['p'] = HTTP_FLG_TOK,
+ ['q'] = HTTP_FLG_TOK,
+ ['r'] = HTTP_FLG_TOK,
+ ['s'] = HTTP_FLG_TOK,
+ ['t'] = HTTP_FLG_TOK,
+ ['u'] = HTTP_FLG_TOK,
+ ['v'] = HTTP_FLG_TOK,
+ ['w'] = HTTP_FLG_TOK,
+ ['x'] = HTTP_FLG_TOK,
+ ['y'] = HTTP_FLG_TOK,
+ ['z'] = HTTP_FLG_TOK,
+ ['{'] = HTTP_FLG_SEP,
+ ['|'] = HTTP_FLG_TOK,
+ ['}'] = HTTP_FLG_SEP,
+ ['~'] = HTTP_FLG_TOK,
+ [127] = HTTP_FLG_CTL,
+};
+
+const int http_err_codes[HTTP_ERR_SIZE] = {
+ [HTTP_ERR_200] = 200, /* used by "monitor-uri" */
+ [HTTP_ERR_400] = 400,
+ [HTTP_ERR_401] = 401,
+ [HTTP_ERR_403] = 403,
+ [HTTP_ERR_404] = 404,
+ [HTTP_ERR_405] = 405,
+ [HTTP_ERR_407] = 407,
+ [HTTP_ERR_408] = 408,
+ [HTTP_ERR_410] = 410,
+ [HTTP_ERR_413] = 413,
+ [HTTP_ERR_421] = 421,
+ [HTTP_ERR_422] = 422,
+ [HTTP_ERR_425] = 425,
+ [HTTP_ERR_429] = 429,
+ [HTTP_ERR_500] = 500,
+ [HTTP_ERR_501] = 501,
+ [HTTP_ERR_502] = 502,
+ [HTTP_ERR_503] = 503,
+ [HTTP_ERR_504] = 504,
+};
+
+const char *http_err_msgs[HTTP_ERR_SIZE] = {
+ [HTTP_ERR_200] =
+ "HTTP/1.1 200 OK\r\n"
+ "Content-length: 58\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>200 OK</h1>\nService ready.\n</body></html>\n",
+
+ [HTTP_ERR_400] =
+ "HTTP/1.1 400 Bad request\r\n"
+ "Content-length: 90\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Connection: close\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>400 Bad request</h1>\nYour browser sent an invalid request.\n</body></html>\n",
+
+ [HTTP_ERR_401] =
+ "HTTP/1.1 401 Unauthorized\r\n"
+ "Content-length: 112\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>401 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n",
+
+ [HTTP_ERR_403] =
+ "HTTP/1.1 403 Forbidden\r\n"
+ "Content-length: 93\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>403 Forbidden</h1>\nRequest forbidden by administrative rules.\n</body></html>\n",
+
+ [HTTP_ERR_404] =
+ "HTTP/1.1 404 Not Found\r\n"
+ "Content-length: 83\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>404 Not Found</h1>\nThe resource could not be found.\n</body></html>\n",
+
+ [HTTP_ERR_405] =
+ "HTTP/1.1 405 Method Not Allowed\r\n"
+ "Content-length: 146\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>405 Method Not Allowed</h1>\nA request was made of a resource using a request method not supported by that resource\n</body></html>\n",
+
+ [HTTP_ERR_407] =
+ "HTTP/1.1 407 Unauthorized\r\n"
+ "Content-length: 112\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>407 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n",
+
+ [HTTP_ERR_408] =
+ "HTTP/1.1 408 Request Time-out\r\n"
+ "Content-length: 110\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Connection: close\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>408 Request Time-out</h1>\nYour browser didn't send a complete request in time.\n</body></html>\n",
+
+ [HTTP_ERR_410] =
+ "HTTP/1.1 410 Gone\r\n"
+ "Content-length: 114\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>410 Gone</h1>\nThe resource is no longer available and will not be available again.\n</body></html>\n",
+
+ [HTTP_ERR_413] =
+ "HTTP/1.1 413 Payload Too Large\r\n"
+ "Content-length: 106\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>413 Payload Too Large</h1>\nThe request entity exceeds the maximum allowed.\n</body></html>\n",
+
+ [HTTP_ERR_421] =
+ "HTTP/1.1 421 Misdirected Request\r\n"
+ "Content-length: 104\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>421 Misdirected Request</h1>\nRequest sent to a non-authoritative server.\n</body></html>\n",
+
+ [HTTP_ERR_422] =
+ "HTTP/1.1 422 Unprocessable Content\r\n"
+ "Content-length: 116\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>422 Unprocessable Content</h1>\nThe server cannot process the contained instructions.\n</body></html>\n",
+
+ [HTTP_ERR_425] =
+ "HTTP/1.1 425 Too Early\r\n"
+ "Content-length: 80\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>425 Too Early</h1>\nYour browser sent early data.\n</body></html>\n",
+
+ [HTTP_ERR_429] =
+ "HTTP/1.1 429 Too Many Requests\r\n"
+ "Content-length: 117\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>429 Too Many Requests</h1>\nYou have sent too many requests in a given amount of time.\n</body></html>\n",
+
+ [HTTP_ERR_500] =
+ "HTTP/1.1 500 Internal Server Error\r\n"
+ "Content-length: 97\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>500 Internal Server Error</h1>\nAn internal server error occurred.\n</body></html>\n",
+
+ [HTTP_ERR_501] =
+ "HTTP/1.1 501 Not Implemented\r\n"
+ "Content-length: 136\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>501 Not Implemented</h1>\n.The server does not support the functionality required to fulfill the request.\n</body></html>\n",
+
+ [HTTP_ERR_502] =
+ "HTTP/1.1 502 Bad Gateway\r\n"
+ "Content-length: 107\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>502 Bad Gateway</h1>\nThe server returned an invalid or incomplete response.\n</body></html>\n",
+
+ [HTTP_ERR_503] =
+ "HTTP/1.1 503 Service Unavailable\r\n"
+ "Content-length: 107\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>503 Service Unavailable</h1>\nNo server is available to handle this request.\n</body></html>\n",
+
+ [HTTP_ERR_504] =
+ "HTTP/1.1 504 Gateway Time-out\r\n"
+ "Content-length: 92\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>504 Gateway Time-out</h1>\nThe server didn't respond in time.\n</body></html>\n",
+};
+
+const struct ist http_known_methods[HTTP_METH_OTHER] = {
+ [HTTP_METH_OPTIONS] = IST("OPTIONS"),
+ [HTTP_METH_GET] = IST("GET"),
+ [HTTP_METH_HEAD] = IST("HEAD"),
+ [HTTP_METH_POST] = IST("POST"),
+ [HTTP_METH_PUT] = IST("PUT"),
+ [HTTP_METH_DELETE] = IST("DELETE"),
+ [HTTP_METH_TRACE] = IST("TRACE"),
+ [HTTP_METH_CONNECT] = IST("CONNECT"),
+};
+
+/*
+ * returns a known method among HTTP_METH_* or HTTP_METH_OTHER for all unknown
+ * ones.
+ */
+enum http_meth_t find_http_meth(const char *str, const int len)
+{
+ const struct ist m = ist2(str, len);
+
+ if (isteq(m, ist("GET"))) return HTTP_METH_GET;
+ else if (isteq(m, ist("HEAD"))) return HTTP_METH_HEAD;
+ else if (isteq(m, ist("POST"))) return HTTP_METH_POST;
+ else if (isteq(m, ist("CONNECT"))) return HTTP_METH_CONNECT;
+ else if (isteq(m, ist("PUT"))) return HTTP_METH_PUT;
+ else if (isteq(m, ist("OPTIONS"))) return HTTP_METH_OPTIONS;
+ else if (isteq(m, ist("DELETE"))) return HTTP_METH_DELETE;
+ else if (isteq(m, ist("TRACE"))) return HTTP_METH_TRACE;
+ else return HTTP_METH_OTHER;
+}
+
+/* This function returns HTTP_ERR_<num> (enum) matching http status code.
+ * Returned value should match codes from http_err_codes.
+ */
+int http_get_status_idx(unsigned int status)
+{
+ switch (status) {
+ case 200: return HTTP_ERR_200;
+ case 400: return HTTP_ERR_400;
+ case 401: return HTTP_ERR_401;
+ case 403: return HTTP_ERR_403;
+ case 404: return HTTP_ERR_404;
+ case 405: return HTTP_ERR_405;
+ case 407: return HTTP_ERR_407;
+ case 408: return HTTP_ERR_408;
+ case 410: return HTTP_ERR_410;
+ case 413: return HTTP_ERR_413;
+ case 421: return HTTP_ERR_421;
+ case 422: return HTTP_ERR_422;
+ case 425: return HTTP_ERR_425;
+ case 429: return HTTP_ERR_429;
+ case 500: return HTTP_ERR_500;
+ case 501: return HTTP_ERR_501;
+ case 502: return HTTP_ERR_502;
+ case 503: return HTTP_ERR_503;
+ case 504: return HTTP_ERR_504;
+ default: return HTTP_ERR_500;
+ }
+}
+
+/* This function returns a reason associated with the HTTP status.
+ * This function never fails, a message is always returned.
+ */
+const char *http_get_reason(unsigned int status)
+{
+ switch (status) {
+ case 100: return "Continue";
+ case 101: return "Switching Protocols";
+ case 102: return "Processing";
+ case 200: return "OK";
+ case 201: return "Created";
+ case 202: return "Accepted";
+ case 203: return "Non-Authoritative Information";
+ case 204: return "No Content";
+ case 205: return "Reset Content";
+ case 206: return "Partial Content";
+ case 207: return "Multi-Status";
+ case 210: return "Content Different";
+ case 226: return "IM Used";
+ case 300: return "Multiple Choices";
+ case 301: return "Moved Permanently";
+ case 302: return "Moved Temporarily";
+ case 303: return "See Other";
+ case 304: return "Not Modified";
+ case 305: return "Use Proxy";
+ case 307: return "Temporary Redirect";
+ case 308: return "Permanent Redirect";
+ case 310: return "Too many Redirects";
+ case 400: return "Bad Request";
+ case 401: return "Unauthorized";
+ case 402: return "Payment Required";
+ case 403: return "Forbidden";
+ case 404: return "Not Found";
+ case 405: return "Method Not Allowed";
+ case 406: return "Not Acceptable";
+ case 407: return "Proxy Authentication Required";
+ case 408: return "Request Time-out";
+ case 409: return "Conflict";
+ case 410: return "Gone";
+ case 411: return "Length Required";
+ case 412: return "Precondition Failed";
+ case 413: return "Request Entity Too Large";
+ case 414: return "Request-URI Too Long";
+ case 415: return "Unsupported Media Type";
+ case 416: return "Requested range unsatisfiable";
+ case 417: return "Expectation failed";
+ case 418: return "I'm a teapot";
+ case 421: return "Misdirected Request";
+ case 422: return "Unprocessable Content";
+ case 423: return "Locked";
+ case 424: return "Method failure";
+ case 425: return "Too Early";
+ case 426: return "Upgrade Required";
+ case 428: return "Precondition Required";
+ case 429: return "Too Many Requests";
+ case 431: return "Request Header Fields Too Large";
+ case 449: return "Retry With";
+ case 450: return "Blocked by Windows Parental Controls";
+ case 451: return "Unavailable For Legal Reasons";
+ case 456: return "Unrecoverable Error";
+ case 499: return "client has closed connection";
+ case 500: return "Internal Server Error";
+ case 501: return "Not Implemented";
+ case 502: return "Bad Gateway or Proxy Error";
+ case 503: return "Service Unavailable";
+ case 504: return "Gateway Time-out";
+ case 505: return "HTTP Version not supported";
+ case 506: return "Variant also negotiate";
+ case 507: return "Insufficient storage";
+ case 508: return "Loop detected";
+ case 509: return "Bandwidth Limit Exceeded";
+ case 510: return "Not extended";
+ case 511: return "Network authentication required";
+ case 520: return "Web server is returning an unknown error";
+ default:
+ switch (status) {
+ case 100 ... 199: return "Informational";
+ case 200 ... 299: return "Success";
+ case 300 ... 399: return "Redirection";
+ case 400 ... 499: return "Client Error";
+ case 500 ... 599: return "Server Error";
+ default: return "Other";
+ }
+ }
+}
+
+/* Returns the ist string corresponding to port part (without ':') in the host
+ * <host>, IST_NULL if no ':' is found or an empty IST if there is no digit. In
+ * the last case, the result is the original ist trimed to 0. So be sure to test
+ * the result length before doing any pointer arithmetic.
+*/
+struct ist http_get_host_port(const struct ist host)
+{
+ char *start, *end, *ptr;
+
+ start = istptr(host);
+ end = istend(host);
+ for (ptr = end; ptr > start && isdigit((unsigned char)*--ptr););
+
+ /* no port found */
+ if (likely(*ptr != ':'))
+ return IST_NULL;
+ if (ptr+1 == end)
+ return isttrim(host, 0);
+
+ return istnext(ist2(ptr, end - ptr));
+}
+
+
+/* Return non-zero if the port <port> is a default port. If the scheme <schm> is
+ * set, it is used to detect default ports (HTTP => 80 and HTTPS => 443)
+ * port. Otherwise, both are considered as default ports.
+ */
+int http_is_default_port(const struct ist schm, const struct ist port)
+{
+ if (!istlen(port))
+ return 1;
+
+ if (!isttest(schm))
+ return (isteq(port, ist("443")) || isteq(port, ist("80")));
+ else
+ return (isteq(port, ist("443")) && isteqi(schm, ist("https://"))) ||
+ (isteq(port, ist("80")) && isteqi(schm, ist("http://")));
+}
+
+/* Returns non-zero if the scheme <schm> is syntactically correct according to
+ * RFC3986#3.1, otherwise zero. It expects only the scheme and nothing else
+ * (particularly not the following "://").
+ * Scheme = alpha *(alpha|digit|'+'|'-'|'.')
+ */
+int http_validate_scheme(const struct ist schm)
+{
+ size_t i;
+
+ for (i = 0; i < schm.len; i++) {
+ if (likely((schm.ptr[i] >= 'a' && schm.ptr[i] <= 'z') ||
+ (schm.ptr[i] >= 'A' && schm.ptr[i] <= 'Z')))
+ continue;
+ if (unlikely(!i)) // first char must be alpha
+ return 0;
+ if ((schm.ptr[i] >= '0' && schm.ptr[i] <= '9') ||
+ schm.ptr[i] == '+' || schm.ptr[i] == '-' || schm.ptr[i] == '.')
+ continue;
+ return 0;
+ }
+ return !!i;
+}
+
+/* Parse the uri and looks for the scheme. If not found, an empty ist is
+ * returned. Otherwise, the ist pointing to the scheme is returned.
+ *
+ * <parser> must have been initialized via http_uri_parser_init. See the
+ * related http_uri_parser documentation for the specific API usage.
+ */
+struct ist http_parse_scheme(struct http_uri_parser *parser)
+{
+ const char *ptr, *start, *end;
+
+ if (parser->state >= URI_PARSER_STATE_SCHEME_DONE)
+ goto not_found;
+
+ if (parser->format != URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY)
+ goto not_found;
+
+ ptr = start = istptr(parser->uri);
+ end = istend(parser->uri);
+
+ if (isalpha((unsigned char)*ptr)) {
+ /* this is a scheme as described by RFC3986, par. 3.1, or only
+ * an authority (in case of a CONNECT method).
+ */
+ ptr++;
+ /* retrieve the scheme up to the suffix '://'. If the suffix is
+ * not found, this means there is no scheme and it is an
+ * authority-only uri.
+ */
+ while (ptr < end &&
+ (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
+ ptr++;
+ if (ptr == end || *ptr++ != ':')
+ goto not_found;
+ if (ptr == end || *ptr++ != '/')
+ goto not_found;
+ if (ptr == end || *ptr++ != '/')
+ goto not_found;
+ }
+ else {
+ goto not_found;
+ }
+
+ parser->uri = ist2(ptr, end - ptr);
+ parser->state = URI_PARSER_STATE_SCHEME_DONE;
+ return ist2(start, ptr - start);
+
+ not_found:
+ parser->state = URI_PARSER_STATE_SCHEME_DONE;
+ return IST_NULL;
+}
+
+/* Parse the uri and looks for the authority, between the scheme and the
+ * path. if no_userinfo is not zero, the part before the '@' (including it) is
+ * skipped. If not found, an empty ist is returned. Otherwise, the ist pointing
+ * on the authority is returned.
+ *
+ * <parser> must have been initialized via http_uri_parser_init. See the
+ * related http_uri_parser documentation for the specific API usage.
+ */
+struct ist http_parse_authority(struct http_uri_parser *parser, int no_userinfo)
+{
+ const char *ptr, *start, *end;
+
+ if (parser->state >= URI_PARSER_STATE_AUTHORITY_DONE)
+ goto not_found;
+
+ if (parser->format != URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY)
+ goto not_found;
+
+ if (parser->state < URI_PARSER_STATE_SCHEME_DONE)
+ http_parse_scheme(parser);
+
+ ptr = start = istptr(parser->uri);
+ end = istend(parser->uri);
+
+ while (ptr < end && *ptr != '/') {
+ if (*ptr++ == '@' && no_userinfo)
+ start = ptr;
+ }
+
+ /* OK, ptr point on the '/' or the end */
+
+ authority:
+ parser->uri = ist2(ptr, end - ptr);
+ parser->state = URI_PARSER_STATE_AUTHORITY_DONE;
+ return ist2(start, ptr - start);
+
+ not_found:
+ parser->state = URI_PARSER_STATE_AUTHORITY_DONE;
+ return IST_NULL;
+}
+
+/* Parse the URI from the given transaction (which is assumed to be in request
+ * phase) and look for the "/" beginning the PATH. If not found, ist2(0,0) is
+ * returned. Otherwise the pointer and length are returned.
+ *
+ * <parser> must have been initialized via http_uri_parser_init. See the
+ * related http_uri_parser documentation for the specific API usage.
+ */
+struct ist http_parse_path(struct http_uri_parser *parser)
+{
+ const char *ptr, *end;
+
+ if (parser->state >= URI_PARSER_STATE_PATH_DONE)
+ goto not_found;
+
+ if (parser->format == URI_PARSER_FORMAT_EMPTY ||
+ parser->format == URI_PARSER_FORMAT_ASTERISK) {
+ goto not_found;
+ }
+
+ ptr = istptr(parser->uri);
+ end = istend(parser->uri);
+
+ /* If the uri is in absolute-path format, first skip the scheme and
+ * authority parts. No scheme will be found if the uri is in authority
+ * format, which indicates that the path won't be present.
+ */
+ if (parser->format == URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY) {
+ if (parser->state < URI_PARSER_STATE_SCHEME_DONE) {
+ /* If no scheme found, uri is in authority format. No
+ * path is present.
+ */
+ if (!isttest(http_parse_scheme(parser)))
+ goto not_found;
+ }
+
+ if (parser->state < URI_PARSER_STATE_AUTHORITY_DONE)
+ http_parse_authority(parser, 1);
+
+ ptr = istptr(parser->uri);
+
+ if (ptr == end)
+ goto not_found;
+ }
+
+ parser->state = URI_PARSER_STATE_PATH_DONE;
+ return ist2(ptr, end - ptr);
+
+ not_found:
+ parser->state = URI_PARSER_STATE_PATH_DONE;
+ return IST_NULL;
+}
+
+/* Parse <value> Content-Length header field of an HTTP request. The function
+ * checks all possible occurrences of a comma-delimited value, and verifies if
+ * any of them doesn't match a previous value. <value> is sanitized on return
+ * to contain a single value if several identical values were found.
+ *
+ * <body_len> must be a valid pointer and is used to return the parsed length
+ * unless values differ. Also if <not_first> is true, <body_len> is assumed to
+ * point to previously parsed value and which must be equal to the new length.
+ * This is useful if an HTTP message contains several Content-Length headers.
+ *
+ * Returns <0 if a value differs, 0 if the whole header can be dropped (i.e.
+ * already known), or >0 if the value can be indexed (first one). In the last
+ * case, the value might be adjusted and the caller must only add the updated
+ * value.
+ */
+int http_parse_cont_len_header(struct ist *value, unsigned long long *body_len,
+ int not_first)
+{
+ char *e, *n;
+ unsigned long long cl;
+ struct ist word;
+ int check_prev = not_first;
+
+ word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
+ e = value->ptr + value->len;
+
+ while (++word.ptr < e) {
+ /* skip leading delimiter and blanks */
+ if (unlikely(HTTP_IS_LWS(*word.ptr)))
+ continue;
+
+ /* digits only now */
+ for (cl = 0, n = word.ptr; n < e; n++) {
+ unsigned int c = *n - '0';
+ if (unlikely(c > 9)) {
+ /* non-digit */
+ if (unlikely(n == word.ptr)) // spaces only
+ goto fail;
+ break;
+ }
+ if (unlikely(cl > ULLONG_MAX / 10ULL))
+ goto fail; /* multiply overflow */
+ cl = cl * 10ULL;
+ if (unlikely(cl + c < cl))
+ goto fail; /* addition overflow */
+ cl = cl + c;
+ }
+
+ /* keep a copy of the exact cleaned value */
+ word.len = n - word.ptr;
+
+ /* skip trailing LWS till next comma or EOL */
+ for (; n < e; n++) {
+ if (!HTTP_IS_LWS(*n)) {
+ if (unlikely(*n != ','))
+ goto fail;
+ break;
+ }
+ }
+
+ /* if duplicate, must be equal */
+ if (check_prev && cl != *body_len)
+ goto fail;
+
+ /* OK, store this result as the one to be indexed */
+ *body_len = cl;
+ *value = word;
+ word.ptr = n;
+ check_prev = 1;
+ }
+
+ /* here we've reached the end with a single value or a series of
+ * identical values, all matching previous series if any. The last
+ * parsed value was sent back into <value>. We just have to decide
+ * if this occurrence has to be indexed (it's the first one) or
+ * silently skipped (it's not the first one)
+ */
+ return !not_first;
+ fail:
+ return -1;
+}
+
+/*
+ * Checks if <hdr> is exactly <name> for <len> chars, and ends with a colon.
+ * If so, returns the position of the first non-space character relative to
+ * <hdr>, or <end>-<hdr> if not found before. If no value is found, it tries
+ * to return a pointer to the place after the first space. Returns 0 if the
+ * header name does not match. Checks are case-insensitive.
+ */
+int http_header_match2(const char *hdr, const char *end,
+ const char *name, int len)
+{
+ const char *val;
+
+ if (hdr + len >= end)
+ return 0;
+ if (hdr[len] != ':')
+ return 0;
+ if (strncasecmp(hdr, name, len) != 0)
+ return 0;
+ val = hdr + len + 1;
+ while (val < end && HTTP_IS_SPHT(*val))
+ val++;
+ if ((val >= end) && (len + 2 <= end - hdr))
+ return len + 2; /* we may replace starting from second space */
+ return val - hdr;
+}
+
+/* Find the end of the header value contained between <s> and <e>. See RFC7230,
+ * par 3.2 for more information. Note that it requires a valid header to return
+ * a valid result. This works for headers defined as comma-separated lists.
+ */
+char *http_find_hdr_value_end(char *s, const char *e)
+{
+ int quoted, qdpair;
+
+ quoted = qdpair = 0;
+
+#ifdef HA_UNALIGNED_LE
+ /* speedup: skip everything not a comma nor a double quote */
+ for (; s <= e - sizeof(int); s += sizeof(int)) {
+ unsigned int c = *(int *)s; // comma
+ unsigned int q = c; // quote
+
+ c ^= 0x2c2c2c2c; // contains one zero on a comma
+ q ^= 0x22222222; // contains one zero on a quote
+
+ c = (c - 0x01010101) & ~c; // contains 0x80 below a comma
+ q = (q - 0x01010101) & ~q; // contains 0x80 below a quote
+
+ if ((c | q) & 0x80808080)
+ break; // found a comma or a quote
+ }
+#endif
+ for (; s < e; s++) {
+ if (qdpair) qdpair = 0;
+ else if (quoted) {
+ if (*s == '\\') qdpair = 1;
+ else if (*s == '"') quoted = 0;
+ }
+ else if (*s == '"') quoted = 1;
+ else if (*s == ',') return s;
+ }
+ return s;
+}
+
+/* Find the end of a cookie value contained between <s> and <e>. It works the
+ * same way as with headers above except that the semi-colon also ends a token.
+ * See RFC2965 for more information. Note that it requires a valid header to
+ * return a valid result.
+ */
+char *http_find_cookie_value_end(char *s, const char *e)
+{
+ int quoted, qdpair;
+
+ quoted = qdpair = 0;
+ for (; s < e; s++) {
+ if (qdpair) qdpair = 0;
+ else if (quoted) {
+ if (*s == '\\') qdpair = 1;
+ else if (*s == '"') quoted = 0;
+ }
+ else if (*s == '"') quoted = 1;
+ else if (*s == ',' || *s == ';') return s;
+ }
+ return s;
+}
+
+/* Try to find the next occurrence of a cookie name in a cookie header value.
+ * To match on any cookie name, <cookie_name_l> must be set to 0.
+ * The lookup begins at <hdr>. The pointer and size of the next occurrence of
+ * the cookie value is returned into *value and *value_l, and the function
+ * returns a pointer to the next pointer to search from if the value was found.
+ * Otherwise if the cookie was not found, NULL is returned and neither value
+ * nor value_l are touched. The input <hdr> string should first point to the
+ * header's value, and the <hdr_end> pointer must point to the first character
+ * not part of the value. <list> must be non-zero if value may represent a list
+ * of values (cookie headers). This makes it faster to abort parsing when no
+ * list is expected.
+ */
+char *http_extract_cookie_value(char *hdr, const char *hdr_end,
+ char *cookie_name, size_t cookie_name_l,
+ int list, char **value, size_t *value_l)
+{
+ char *equal, *att_end, *att_beg, *val_beg, *val_end;
+ char *next;
+
+ /* we search at least a cookie name followed by an equal, and more
+ * generally something like this :
+ * Cookie: NAME1 = VALUE 1 ; NAME2 = VALUE2 ; NAME3 = VALUE3\r\n
+ */
+ for (att_beg = hdr; att_beg + cookie_name_l + 1 < hdr_end; att_beg = next + 1) {
+ /* Iterate through all cookies on this line */
+
+ while (att_beg < hdr_end && HTTP_IS_SPHT(*att_beg))
+ att_beg++;
+
+ /* find att_end : this is the first character after the last non
+ * space before the equal. It may be equal to hdr_end.
+ */
+ equal = att_end = att_beg;
+
+ while (equal < hdr_end) {
+ if (*equal == '=' || *equal == ';' || (list && *equal == ','))
+ break;
+ if (HTTP_IS_SPHT(*equal++))
+ continue;
+ att_end = equal;
+ }
+
+ /* here, <equal> points to '=', a delimiter or the end. <att_end>
+ * is between <att_beg> and <equal>, both may be identical.
+ */
+
+ /* look for end of cookie if there is an equal sign */
+ if (equal < hdr_end && *equal == '=') {
+ /* look for the beginning of the value */
+ val_beg = equal + 1;
+ while (val_beg < hdr_end && HTTP_IS_SPHT(*val_beg))
+ val_beg++;
+
+ /* find the end of the value, respecting quotes */
+ next = http_find_cookie_value_end(val_beg, hdr_end);
+
+ /* make val_end point to the first white space or delimiter after the value */
+ val_end = next;
+ while (val_end > val_beg && HTTP_IS_SPHT(*(val_end - 1)))
+ val_end--;
+ } else {
+ val_beg = val_end = next = equal;
+ }
+
+ /* We have nothing to do with attributes beginning with '$'. However,
+ * they will automatically be removed if a header before them is removed,
+ * since they're supposed to be linked together.
+ */
+ if (*att_beg == '$')
+ continue;
+
+ /* Ignore cookies with no equal sign */
+ if (equal == next)
+ continue;
+
+ /* Now we have the cookie name between att_beg and att_end, and
+ * its value between val_beg and val_end.
+ */
+
+ if (cookie_name_l == 0 || (att_end - att_beg == cookie_name_l &&
+ memcmp(att_beg, cookie_name, cookie_name_l) == 0)) {
+ /* let's return this value and indicate where to go on from */
+ *value = val_beg;
+ *value_l = val_end - val_beg;
+ return next + 1;
+ }
+
+ /* Set-Cookie headers only have the name in the first attr=value part */
+ if (!list)
+ break;
+ }
+
+ return NULL;
+}
+
+/* Parses a qvalue and returns it multiplied by 1000, from 0 to 1000. If the
+ * value is larger than 1000, it is bound to 1000. The parser consumes up to
+ * 1 digit, one dot and 3 digits and stops on the first invalid character.
+ * Unparsable qvalues return 1000 as "q=1.000".
+ */
+int http_parse_qvalue(const char *qvalue, const char **end)
+{
+ int q = 1000;
+
+ if (!isdigit((unsigned char)*qvalue))
+ goto out;
+ q = (*qvalue++ - '0') * 1000;
+
+ if (*qvalue++ != '.')
+ goto out;
+
+ if (!isdigit((unsigned char)*qvalue))
+ goto out;
+ q += (*qvalue++ - '0') * 100;
+
+ if (!isdigit((unsigned char)*qvalue))
+ goto out;
+ q += (*qvalue++ - '0') * 10;
+
+ if (!isdigit((unsigned char)*qvalue))
+ goto out;
+ q += (*qvalue++ - '0') * 1;
+ out:
+ if (q > 1000)
+ q = 1000;
+ if (end)
+ *end = qvalue;
+ return q;
+}
+
+/*
+ * Given a url parameter, find the starting position of the first occurrence,
+ * or NULL if the parameter is not found.
+ *
+ * Example: if query_string is "yo=mama;ye=daddy" and url_param_name is "ye",
+ * the function will return query_string+8.
+ *
+ * Warning: this function returns a pointer that can point to the first chunk
+ * or the second chunk. The caller must be check the position before using the
+ * result.
+ */
+const char *http_find_url_param_pos(const char **chunks,
+ const char* url_param_name, size_t url_param_name_l,
+ char delim)
+{
+ const char *pos, *last, *equal;
+ const char **bufs = chunks;
+ int l1, l2;
+
+
+ pos = bufs[0];
+ last = bufs[1];
+ while (pos < last) {
+ /* Check the equal. */
+ equal = pos + url_param_name_l;
+ if (fix_pointer_if_wrap(chunks, &equal)) {
+ if (equal >= chunks[3])
+ return NULL;
+ } else {
+ if (equal >= chunks[1])
+ return NULL;
+ }
+ if (*equal == '=') {
+ if (pos + url_param_name_l > last) {
+ /* process wrap case, we detect a wrap. In this case, the
+ * comparison is performed in two parts.
+ */
+
+ /* This is the end, we don't have any other chunk. */
+ if (bufs != chunks || !bufs[2])
+ return NULL;
+
+ /* Compute the length of each part of the comparison. */
+ l1 = last - pos;
+ l2 = url_param_name_l - l1;
+
+ /* The second buffer is too short to contain the compared string. */
+ if (bufs[2] + l2 > bufs[3])
+ return NULL;
+
+ if (memcmp(pos, url_param_name, l1) == 0 &&
+ memcmp(bufs[2], url_param_name+l1, l2) == 0)
+ return pos;
+
+ /* Perform wrapping and jump the string who fail the comparison. */
+ bufs += 2;
+ pos = bufs[0] + l2;
+ last = bufs[1];
+
+ } else {
+ /* process a simple comparison. */
+ if (memcmp(pos, url_param_name, url_param_name_l) == 0)
+ return pos;
+ pos += url_param_name_l + 1;
+ if (fix_pointer_if_wrap(chunks, &pos))
+ last = bufs[2];
+ }
+ }
+
+ while (1) {
+ /* Look for the next delimiter. */
+ while (pos < last && !http_is_param_delimiter(*pos, delim))
+ pos++;
+ if (pos < last)
+ break;
+ /* process buffer wrapping. */
+ if (bufs != chunks || !bufs[2])
+ return NULL;
+ bufs += 2;
+ pos = bufs[0];
+ last = bufs[1];
+ }
+ pos++;
+ }
+ return NULL;
+}
+
+/*
+ * Given a url parameter name and a query string, find the next value.
+ * An empty url_param_name matches the first available parameter.
+ * If the parameter is found, 1 is returned and *vstart / *vend are updated to
+ * respectively provide a pointer to the value and its end.
+ * Otherwise, 0 is returned and vstart/vend are not modified.
+ */
+int http_find_next_url_param(const char **chunks,
+ const char* url_param_name, size_t url_param_name_l,
+ const char **vstart, const char **vend, char delim)
+{
+ const char *arg_start, *qs_end;
+ const char *value_start, *value_end;
+
+ arg_start = chunks[0];
+ qs_end = chunks[1];
+ if (url_param_name_l) {
+ /* Looks for an argument name. */
+ arg_start = http_find_url_param_pos(chunks,
+ url_param_name, url_param_name_l,
+ delim);
+ /* Check for wrapping. */
+ if (arg_start >= qs_end)
+ qs_end = chunks[3];
+ }
+ if (!arg_start)
+ return 0;
+
+ if (!url_param_name_l) {
+ while (1) {
+ /* looks for the first argument. */
+ value_start = memchr(arg_start, '=', qs_end - arg_start);
+ if (!value_start) {
+ /* Check for wrapping. */
+ if (arg_start >= chunks[0] &&
+ arg_start < chunks[1] &&
+ chunks[2]) {
+ arg_start = chunks[2];
+ qs_end = chunks[3];
+ continue;
+ }
+ return 0;
+ }
+ break;
+ }
+ value_start++;
+ }
+ else {
+ /* Jump the argument length. */
+ value_start = arg_start + url_param_name_l + 1;
+
+ /* Check for pointer wrapping. */
+ if (fix_pointer_if_wrap(chunks, &value_start)) {
+ /* Update the end pointer. */
+ qs_end = chunks[3];
+
+ /* Check for overflow. */
+ if (value_start >= qs_end)
+ return 0;
+ }
+ }
+
+ value_end = value_start;
+
+ while (1) {
+ while ((value_end < qs_end) && !http_is_param_delimiter(*value_end, delim))
+ value_end++;
+ if (value_end < qs_end)
+ break;
+ /* process buffer wrapping. */
+ if (value_end >= chunks[0] &&
+ value_end < chunks[1] &&
+ chunks[2]) {
+ value_end = chunks[2];
+ qs_end = chunks[3];
+ continue;
+ }
+ break;
+ }
+
+ *vstart = value_start;
+ *vend = value_end;
+ return 1;
+}
+
+/* Parses a single header line (without the CRLF) and splits it into its name
+ * and its value. The parsing is pretty naive and just skip spaces.
+ */
+int http_parse_header(const struct ist hdr, struct ist *name, struct ist *value)
+{
+ char *p = hdr.ptr;
+ char *end = p + hdr.len;
+
+ name->len = value->len = 0;
+
+ /* Skip leading spaces */
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+
+ /* Set the header name */
+ name->ptr = p;
+ for (; p < end && HTTP_IS_TOKEN(*p); p++);
+ name->len = p - name->ptr;
+
+ /* Skip the ':' and spaces before and after it */
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+ if (p < end && *p == ':') p++;
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+
+ /* Set the header value */
+ value->ptr = p;
+ value->len = end - p;
+
+ return 1;
+}
+
+/* Parses a single start line (without the CRLF) and splits it into 3 parts. The
+ * parsing is pretty naive and just skip spaces.
+ */
+int http_parse_stline(const struct ist line, struct ist *p1, struct ist *p2, struct ist *p3)
+{
+ char *p = line.ptr;
+ char *end = p + line.len;
+
+ p1->len = p2->len = p3->len = 0;
+
+ /* Skip leading spaces */
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+
+ /* Set the first part */
+ p1->ptr = p;
+ for (; p < end && HTTP_IS_TOKEN(*p); p++);
+ p1->len = p - p1->ptr;
+
+ /* Skip spaces between p1 and p2 */
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+
+ /* Set the second part */
+ p2->ptr = p;
+ for (; p < end && !HTTP_IS_SPHT(*p); p++);
+ p2->len = p - p2->ptr;
+
+ /* Skip spaces between p2 and p3 */
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+
+ /* The remaining is the third value */
+ p3->ptr = p;
+ p3->len = end - p;
+
+ return 1;
+}
+
+/* Parses value of a Status header with the following format: "Status: Code[
+ * Reason]". The parsing is pretty naive and just skip spaces. It return the
+ * numeric value of the status code.
+ */
+int http_parse_status_val(const struct ist value, struct ist *status, struct ist *reason)
+{
+ char *p = value.ptr;
+ char *end = p + value.len;
+ uint16_t code;
+
+ status->len = reason->len = 0;
+
+ /* Skip leading spaces */
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+
+ /* Set the status part */
+ status->ptr = p;
+ for (; p < end && HTTP_IS_TOKEN(*p); p++);
+ status->len = p - status->ptr;
+
+ /* Skip spaces between status and reason */
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+
+ /* the remaining is the reason */
+ reason->ptr = p;
+ reason->len = end - p;
+
+ code = strl2ui(status->ptr, status->len);
+ return code;
+}
+
+
+/* Returns non-zero if the two ETags are comparable (see RFC 7232#2.3.2).
+ * If any of them is a weak ETag, we discard the weakness prefix and perform
+ * a strict string comparison.
+ * Returns 0 otherwise.
+ */
+int http_compare_etags(struct ist etag1, struct ist etag2)
+{
+ enum http_etag_type etag_type1;
+ enum http_etag_type etag_type2;
+
+ etag_type1 = http_get_etag_type(etag1);
+ etag_type2 = http_get_etag_type(etag2);
+
+ if (etag_type1 == ETAG_INVALID || etag_type2 == ETAG_INVALID)
+ return 0;
+
+ /* Discard the 'W/' prefix an ETag is a weak one. */
+ if (etag_type1 == ETAG_WEAK)
+ etag1 = istadv(etag1, 2);
+ if (etag_type2 == ETAG_WEAK)
+ etag2 = istadv(etag2, 2);
+
+ return isteq(etag1, etag2);
+}
+
+
+/*
+ * Trim leading space or horizontal tab characters from <value> string.
+ * Returns the trimmed string.
+ */
+struct ist http_trim_leading_spht(struct ist value)
+{
+ struct ist ret = value;
+
+ while (ret.len && HTTP_IS_SPHT(ret.ptr[0])) {
+ ++ret.ptr;
+ --ret.len;
+ }
+
+ return ret;
+}
+
+/*
+ * Trim trailing space or horizontal tab characters from <value> string.
+ * Returns the trimmed string.
+ */
+struct ist http_trim_trailing_spht(struct ist value)
+{
+ struct ist ret = value;
+
+ while (ret.len && HTTP_IS_SPHT(ret.ptr[-1]))
+ --ret.len;
+
+ return ret;
+}
diff --git a/src/http_acl.c b/src/http_acl.c
new file mode 100644
index 0000000..bf29fc3
--- /dev/null
+++ b/src/http_acl.c
@@ -0,0 +1,185 @@
+/*
+ * HTTP ACLs declaration
+ *
+ * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <string.h>
+#include <time.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/auth.h>
+#include <haproxy/chunk.h>
+#include <haproxy/http.h>
+#include <haproxy/pattern.h>
+#include <haproxy/pool.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+
+/* We use the pre-parsed method if it is known, and store its number as an
+ * integer. If it is unknown, we use the pointer and the length.
+ */
+static int pat_parse_meth(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ int len, meth;
+
+ len = strlen(text);
+ meth = find_http_meth(text, len);
+
+ pattern->val.i = meth;
+ if (meth == HTTP_METH_OTHER) {
+ pattern->ptr.str = (char *)text;
+ pattern->len = len;
+ }
+ else {
+ pattern->ptr.str = NULL;
+ pattern->len = 0;
+ }
+ return 1;
+}
+
+/* See above how the method is stored in the global pattern */
+static struct pattern *pat_match_meth(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ int icase;
+ struct pattern_list *lst;
+ struct pattern *pattern;
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ /* well-known method */
+ if (pattern->val.i != HTTP_METH_OTHER) {
+ if (smp->data.u.meth.meth == pattern->val.i)
+ return pattern;
+ else
+ continue;
+ }
+
+ /* Other method, we must compare the strings */
+ if (pattern->len != smp->data.u.meth.str.data)
+ continue;
+
+ icase = expr->mflags & PAT_MF_IGNORE_CASE;
+ if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.meth.str.area, smp->data.u.meth.str.data) == 0) ||
+ (!icase && strncmp(pattern->ptr.str, smp->data.u.meth.str.area, smp->data.u.meth.str.data) == 0))
+ return pattern;
+ }
+ return NULL;
+}
+
+/************************************************************************/
+/* All supported ACL keywords must be declared here. */
+/************************************************************************/
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct acl_kw_list acl_kws = {ILH, {
+ { "base", "base", PAT_MATCH_STR },
+ { "base_beg", "base", PAT_MATCH_BEG },
+ { "base_dir", "base", PAT_MATCH_DIR },
+ { "base_dom", "base", PAT_MATCH_DOM },
+ { "base_end", "base", PAT_MATCH_END },
+ { "base_len", "base", PAT_MATCH_LEN },
+ { "base_reg", "base", PAT_MATCH_REG },
+ { "base_sub", "base", PAT_MATCH_SUB },
+
+ { "cook", "req.cook", PAT_MATCH_STR },
+ { "cook_beg", "req.cook", PAT_MATCH_BEG },
+ { "cook_dir", "req.cook", PAT_MATCH_DIR },
+ { "cook_dom", "req.cook", PAT_MATCH_DOM },
+ { "cook_end", "req.cook", PAT_MATCH_END },
+ { "cook_len", "req.cook", PAT_MATCH_LEN },
+ { "cook_reg", "req.cook", PAT_MATCH_REG },
+ { "cook_sub", "req.cook", PAT_MATCH_SUB },
+
+ { "hdr", "req.hdr", PAT_MATCH_STR },
+ { "hdr_beg", "req.hdr", PAT_MATCH_BEG },
+ { "hdr_dir", "req.hdr", PAT_MATCH_DIR },
+ { "hdr_dom", "req.hdr", PAT_MATCH_DOM },
+ { "hdr_end", "req.hdr", PAT_MATCH_END },
+ { "hdr_len", "req.hdr", PAT_MATCH_LEN },
+ { "hdr_reg", "req.hdr", PAT_MATCH_REG },
+ { "hdr_sub", "req.hdr", PAT_MATCH_SUB },
+
+ /* these two declarations uses strings with list storage (in place
+ * of tree storage). The basic match is PAT_MATCH_STR, but the indexation
+ * and delete functions are relative to the list management. The parse
+ * and match method are related to the corresponding fetch methods. This
+ * is very particular ACL declaration mode.
+ */
+ { "http_auth_group", NULL, PAT_MATCH_STR, NULL, pat_idx_list_str, NULL, NULL, pat_match_auth },
+ { "method", NULL, PAT_MATCH_STR, pat_parse_meth, pat_idx_list_str, NULL, NULL, pat_match_meth },
+
+ { "path", "path", PAT_MATCH_STR },
+ { "path_beg", "path", PAT_MATCH_BEG },
+ { "path_dir", "path", PAT_MATCH_DIR },
+ { "path_dom", "path", PAT_MATCH_DOM },
+ { "path_end", "path", PAT_MATCH_END },
+ { "path_len", "path", PAT_MATCH_LEN },
+ { "path_reg", "path", PAT_MATCH_REG },
+ { "path_sub", "path", PAT_MATCH_SUB },
+
+ { "req_ver", "req.ver", PAT_MATCH_STR },
+ { "resp_ver", "res.ver", PAT_MATCH_STR },
+
+ { "scook", "res.cook", PAT_MATCH_STR },
+ { "scook_beg", "res.cook", PAT_MATCH_BEG },
+ { "scook_dir", "res.cook", PAT_MATCH_DIR },
+ { "scook_dom", "res.cook", PAT_MATCH_DOM },
+ { "scook_end", "res.cook", PAT_MATCH_END },
+ { "scook_len", "res.cook", PAT_MATCH_LEN },
+ { "scook_reg", "res.cook", PAT_MATCH_REG },
+ { "scook_sub", "res.cook", PAT_MATCH_SUB },
+
+ { "shdr", "res.hdr", PAT_MATCH_STR },
+ { "shdr_beg", "res.hdr", PAT_MATCH_BEG },
+ { "shdr_dir", "res.hdr", PAT_MATCH_DIR },
+ { "shdr_dom", "res.hdr", PAT_MATCH_DOM },
+ { "shdr_end", "res.hdr", PAT_MATCH_END },
+ { "shdr_len", "res.hdr", PAT_MATCH_LEN },
+ { "shdr_reg", "res.hdr", PAT_MATCH_REG },
+ { "shdr_sub", "res.hdr", PAT_MATCH_SUB },
+
+ { "url", "url", PAT_MATCH_STR },
+ { "url_beg", "url", PAT_MATCH_BEG },
+ { "url_dir", "url", PAT_MATCH_DIR },
+ { "url_dom", "url", PAT_MATCH_DOM },
+ { "url_end", "url", PAT_MATCH_END },
+ { "url_len", "url", PAT_MATCH_LEN },
+ { "url_reg", "url", PAT_MATCH_REG },
+ { "url_sub", "url", PAT_MATCH_SUB },
+
+ { "urlp", "urlp", PAT_MATCH_STR },
+ { "urlp_beg", "urlp", PAT_MATCH_BEG },
+ { "urlp_dir", "urlp", PAT_MATCH_DIR },
+ { "urlp_dom", "urlp", PAT_MATCH_DOM },
+ { "urlp_end", "urlp", PAT_MATCH_END },
+ { "urlp_len", "urlp", PAT_MATCH_LEN },
+ { "urlp_reg", "urlp", PAT_MATCH_REG },
+ { "urlp_sub", "urlp", PAT_MATCH_SUB },
+
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/http_act.c b/src/http_act.c
new file mode 100644
index 0000000..64802a8
--- /dev/null
+++ b/src/http_act.c
@@ -0,0 +1,2498 @@
+/*
+ * HTTP actions
+ *
+ * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <string.h>
+#include <time.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/chunk.h>
+#include <haproxy/global.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/log.h>
+#include <haproxy/pattern.h>
+#include <haproxy/pool.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+#include <haproxy/uri_auth-t.h>
+#include <haproxy/uri_normalizer.h>
+#include <haproxy/version.h>
+
+
+/* Release memory allocated by most of HTTP actions. Concretly, it releases
+ * <arg.http>.
+ */
+static void release_http_action(struct act_rule *rule)
+{
+ struct logformat_node *lf, *lfb;
+
+ istfree(&rule->arg.http.str);
+ if (rule->arg.http.re)
+ regex_free(rule->arg.http.re);
+ list_for_each_entry_safe(lf, lfb, &rule->arg.http.fmt, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+}
+
+/* Release memory allocated by HTTP actions relying on an http reply. Concretly,
+ * it releases <.arg.http_reply>
+ */
+static void release_act_http_reply(struct act_rule *rule)
+{
+ release_http_reply(rule->arg.http_reply);
+ rule->arg.http_reply = NULL;
+}
+
+
+/* Check function for HTTP actions relying on an http reply. The function
+ * returns 1 in success case, otherwise, it returns 0 and err is filled.
+ */
+static int check_act_http_reply(struct act_rule *rule, struct proxy *px, char **err)
+{
+ struct http_reply *reply = rule->arg.http_reply;
+
+ if (!http_check_http_reply(reply, px, err)) {
+ release_act_http_reply(rule);
+ return 0;
+ }
+ return 1;
+}
+
+
+/* This function executes one of the set-{method,path,query,uri} actions. It
+ * builds a string in the trash from the specified format string. It finds
+ * the action to be performed in <.action>, previously filled by function
+ * parse_set_req_line(). The replacement action is executed by the function
+ * http_action_set_req_line(). On success, it returns ACT_RET_CONT. If an error
+ * occurs while soft rewrites are enabled, the action is canceled, but the rule
+ * processing continue. Otherwsize ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_set_req_line(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct buffer *replace;
+ enum act_return ret = ACT_RET_CONT;
+
+ replace = alloc_trash_chunk();
+ if (!replace)
+ goto fail_alloc;
+
+ /* If we have to create a query string, prepare a '?'. */
+ if (rule->action == 2) // set-query
+ replace->area[replace->data++] = '?';
+ replace->data += build_logline(s, replace->area + replace->data,
+ replace->size - replace->data,
+ &rule->arg.http.fmt);
+
+ if (http_req_replace_stline(rule->action, replace->area, replace->data, px, s) == -1)
+ goto fail_rewrite;
+
+ leave:
+ free_trash_chunk(replace);
+ return ret;
+
+ fail_alloc:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ ret = ACT_RET_ERR;
+ goto leave;
+
+ fail_rewrite:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+
+ if (!(s->txn->req.flags & HTTP_MSGF_SOFT_RW)) {
+ ret = ACT_RET_ERR;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ }
+ goto leave;
+}
+
+/* parse an http-request action among :
+ * set-method
+ * set-path
+ * set-pathq
+ * set-query
+ * set-uri
+ *
+ * All of them accept a single argument of type string representing a log-format.
+ * The resulting rule makes use of <http.fmt> to store the log-format list head,
+ * and <.action> to store the action type as an int (0=method, 1=path, 2=query,
+ * 3=uri). It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_set_req_line(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg = *orig_arg;
+ int cap = 0;
+
+ switch (args[0][4]) {
+ case 'm' :
+ rule->action = 0; // set-method
+ break;
+ case 'p' :
+ if (args[0][8] == 'q')
+ rule->action = 4; // set-pathq
+ else
+ rule->action = 1; // set-path
+ break;
+ case 'q' :
+ rule->action = 2; // set-query
+ break;
+ case 'u' :
+ rule->action = 3; // set-uri
+ break;
+ default:
+ memprintf(err, "internal error: unhandled action '%s'", args[0]);
+ return ACT_RET_PRS_ERR;
+ }
+ rule->action_ptr = http_action_set_req_line;
+ rule->release_ptr = release_http_action;
+ LIST_INIT(&rule->arg.http.fmt);
+
+ if (!*args[cur_arg] ||
+ (*args[cur_arg + 1] && strcmp(args[cur_arg + 1], "if") != 0 && strcmp(args[cur_arg + 1], "unless") != 0)) {
+ memprintf(err, "expects exactly 1 argument <format>");
+ return ACT_RET_PRS_ERR;
+ }
+
+ px->conf.args.ctx = ARGC_HRQ;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRQ_HDR;
+ if (!parse_logformat_string(args[cur_arg], px, &rule->arg.http.fmt, LOG_OPT_HTTP, cap, err)) {
+ return ACT_RET_PRS_ERR;
+ }
+
+ (*orig_arg)++;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes the http-request normalize-uri action.
+ * `rule->action` is expected to be a value from `enum act_normalize_uri`.
+ *
+ * On success, it returns ACT_RET_CONT. If an error
+ * occurs while soft rewrites are enabled, the action is canceled, but the rule
+ * processing continue. Otherwsize ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_normalize_uri(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ enum act_return ret = ACT_RET_CONT;
+ struct htx *htx = htxbuf(&s->req.buf);
+ const struct ist uri = htx_sl_req_uri(http_get_stline(htx));
+ struct buffer *replace = alloc_trash_chunk();
+ enum uri_normalizer_err err = URI_NORMALIZER_ERR_INTERNAL_ERROR;
+
+ if (!replace)
+ goto fail_alloc;
+
+ switch ((enum act_normalize_uri) rule->action) {
+ case ACT_NORMALIZE_URI_PATH_MERGE_SLASHES: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newpath = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_path_merge_slashes(iststop(path, '?'), &newpath);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_path(htx, newpath, 0))
+ goto fail_rewrite;
+
+ break;
+ }
+ case ACT_NORMALIZE_URI_PATH_STRIP_DOT: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newpath = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_path_dot(iststop(path, '?'), &newpath);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_path(htx, newpath, 0))
+ goto fail_rewrite;
+
+ break;
+ }
+ case ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT:
+ case ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT_FULL: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newpath = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_path_dotdot(iststop(path, '?'), rule->action == ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT_FULL, &newpath);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_path(htx, newpath, 0))
+ goto fail_rewrite;
+
+ break;
+ }
+ case ACT_NORMALIZE_URI_QUERY_SORT_BY_NAME: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newquery = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_query_sort(istfind(path, '?'), '&', &newquery);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_query(htx, newquery))
+ goto fail_rewrite;
+
+ break;
+ }
+ case ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE:
+ case ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newpath = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_percent_upper(path, rule->action == ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT, &newpath);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_path(htx, newpath, 1))
+ goto fail_rewrite;
+
+ break;
+ }
+ case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED:
+ case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newpath = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_percent_decode_unreserved(path, rule->action == ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT, &newpath);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_path(htx, newpath, 1))
+ goto fail_rewrite;
+
+ break;
+ }
+ case ACT_NORMALIZE_URI_FRAGMENT_STRIP: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newpath = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_fragment_strip(path, &newpath);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_path(htx, newpath, 1))
+ goto fail_rewrite;
+
+ break;
+ }
+ case ACT_NORMALIZE_URI_FRAGMENT_ENCODE: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newpath = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_fragment_encode(path, &newpath);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_path(htx, newpath, 1))
+ goto fail_rewrite;
+
+ break;
+ }
+ }
+
+ switch (err) {
+ case URI_NORMALIZER_ERR_NONE:
+ break;
+ case URI_NORMALIZER_ERR_INTERNAL_ERROR:
+ ret = ACT_RET_ERR;
+ break;
+ case URI_NORMALIZER_ERR_INVALID_INPUT:
+ ret = ACT_RET_INV;
+ break;
+ case URI_NORMALIZER_ERR_ALLOC:
+ goto fail_alloc;
+ }
+
+ leave:
+ free_trash_chunk(replace);
+ return ret;
+
+ fail_alloc:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ ret = ACT_RET_ERR;
+ goto leave;
+
+ fail_rewrite:
+ _HA_ATOMIC_ADD(&sess->fe->fe_counters.failed_rewrites, 1);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_ADD(&s->be->be_counters.failed_rewrites, 1);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_ADD(&sess->listener->counters->failed_rewrites, 1);
+ if (objt_server(s->target))
+ _HA_ATOMIC_ADD(&__objt_server(s->target)->counters.failed_rewrites, 1);
+
+ if (!(s->txn->req.flags & HTTP_MSGF_SOFT_RW)) {
+ ret = ACT_RET_ERR;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ }
+ goto leave;
+}
+
+/* Parses the http-request normalize-uri action. It expects a single <normalizer>
+ * argument, corresponding too a value in `enum act_normalize_uri`.
+ *
+ * It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_normalize_uri(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg = *orig_arg;
+
+ rule->action_ptr = http_action_normalize_uri;
+ rule->release_ptr = NULL;
+
+ if (!*args[cur_arg]) {
+ memprintf(err, "missing argument <normalizer>");
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (strcmp(args[cur_arg], "path-merge-slashes") == 0) {
+ cur_arg++;
+
+ rule->action = ACT_NORMALIZE_URI_PATH_MERGE_SLASHES;
+ }
+ else if (strcmp(args[cur_arg], "path-strip-dot") == 0) {
+ cur_arg++;
+
+ rule->action = ACT_NORMALIZE_URI_PATH_STRIP_DOT;
+ }
+ else if (strcmp(args[cur_arg], "path-strip-dotdot") == 0) {
+ cur_arg++;
+
+ if (strcmp(args[cur_arg], "full") == 0) {
+ cur_arg++;
+ rule->action = ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT_FULL;
+ }
+ else if (!*args[cur_arg]) {
+ rule->action = ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT;
+ }
+ else if (strcmp(args[cur_arg], "if") != 0 && strcmp(args[cur_arg], "unless") != 0) {
+ memprintf(err, "unknown argument '%s' for 'path-strip-dotdot' normalizer", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+ else if (strcmp(args[cur_arg], "query-sort-by-name") == 0) {
+ cur_arg++;
+
+ rule->action = ACT_NORMALIZE_URI_QUERY_SORT_BY_NAME;
+ }
+ else if (strcmp(args[cur_arg], "percent-to-uppercase") == 0) {
+ cur_arg++;
+
+ if (strcmp(args[cur_arg], "strict") == 0) {
+ cur_arg++;
+ rule->action = ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT;
+ }
+ else if (!*args[cur_arg]) {
+ rule->action = ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE;
+ }
+ else if (strcmp(args[cur_arg], "if") != 0 && strcmp(args[cur_arg], "unless") != 0) {
+ memprintf(err, "unknown argument '%s' for 'percent-to-uppercase' normalizer", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+ else if (strcmp(args[cur_arg], "percent-decode-unreserved") == 0) {
+ cur_arg++;
+
+ if (strcmp(args[cur_arg], "strict") == 0) {
+ cur_arg++;
+ rule->action = ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT;
+ }
+ else if (!*args[cur_arg]) {
+ rule->action = ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED;
+ }
+ else if (strcmp(args[cur_arg], "if") != 0 && strcmp(args[cur_arg], "unless") != 0) {
+ memprintf(err, "unknown argument '%s' for 'percent-decode-unreserved' normalizer", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+ else if (strcmp(args[cur_arg], "fragment-strip") == 0) {
+ cur_arg++;
+
+ rule->action = ACT_NORMALIZE_URI_FRAGMENT_STRIP;
+ }
+ else if (strcmp(args[cur_arg], "fragment-encode") == 0) {
+ cur_arg++;
+
+ rule->action = ACT_NORMALIZE_URI_FRAGMENT_ENCODE;
+ }
+ else {
+ memprintf(err, "unknown normalizer '%s'", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ *orig_arg = cur_arg;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a replace-uri action. It finds its arguments in
+ * <rule>.arg.http. It builds a string in the trash from the format string
+ * previously filled by function parse_replace_uri() and will execute the regex
+ * in <http.re> to replace the URI. It uses the format string present in
+ * <http.fmt>. The component to act on (path/uri) is taken from <.action> which
+ * contains 1 for the path or 3 for the URI (values used by
+ * http_req_replace_stline()). On success, it returns ACT_RET_CONT. If an error
+ * occurs while soft rewrites are enabled, the action is canceled, but the rule
+ * processing continue. Otherwsize ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_replace_uri(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ enum act_return ret = ACT_RET_CONT;
+ struct buffer *replace, *output;
+ struct ist uri;
+ int len;
+
+ replace = alloc_trash_chunk();
+ output = alloc_trash_chunk();
+ if (!replace || !output)
+ goto fail_alloc;
+ uri = htx_sl_req_uri(http_get_stline(htxbuf(&s->req.buf)));
+
+ if (rule->action == 1) { // replace-path
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ uri = iststop(http_parse_path(&parser), '?');
+ }
+ else if (rule->action == 4) { // replace-pathq
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ uri = http_parse_path(&parser);
+ }
+
+ if (!istlen(uri))
+ goto leave;
+
+ if (!regex_exec_match2(rule->arg.http.re, uri.ptr, uri.len, MAX_MATCH, pmatch, 0))
+ goto leave;
+
+ replace->data = build_logline(s, replace->area, replace->size, &rule->arg.http.fmt);
+
+ /* note: uri.ptr doesn't need to be zero-terminated because it will
+ * only be used to pick pmatch references.
+ */
+ len = exp_replace(output->area, output->size, uri.ptr, replace->area, pmatch);
+ if (len == -1)
+ goto fail_rewrite;
+
+ if (http_req_replace_stline(rule->action, output->area, len, px, s) == -1)
+ goto fail_rewrite;
+
+ leave:
+ free_trash_chunk(output);
+ free_trash_chunk(replace);
+ return ret;
+
+ fail_alloc:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ ret = ACT_RET_ERR;
+ goto leave;
+
+ fail_rewrite:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+
+ if (!(s->txn->req.flags & HTTP_MSGF_SOFT_RW)) {
+ ret = ACT_RET_ERR;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ }
+ goto leave;
+}
+
+/* parse a "replace-uri", "replace-path" or "replace-pathq"
+ * http-request action.
+ * This action takes 2 arguments (a regex and a replacement format string).
+ * The resulting rule makes use of <.action> to store the action (1/3 for now),
+ * <http.re> to store the compiled regex, and <http.fmt> to store the log-format
+ * list head. It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_replace_uri(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg = *orig_arg;
+ int cap = 0;
+ char *error = NULL;
+
+ switch (args[0][8]) {
+ case 'p':
+ if (args[0][12] == 'q')
+ rule->action = 4; // replace-pathq, same as set-pathq
+ else
+ rule->action = 1; // replace-path, same as set-path
+ break;
+ case 'u':
+ rule->action = 3; // replace-uri, same as set-uri
+ break;
+ default:
+ memprintf(err, "internal error: unhandled action '%s'", args[0]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ rule->action_ptr = http_action_replace_uri;
+ rule->release_ptr = release_http_action;
+ LIST_INIT(&rule->arg.http.fmt);
+
+ if (!*args[cur_arg] || !*args[cur_arg+1] ||
+ (*args[cur_arg+2] && strcmp(args[cur_arg+2], "if") != 0 && strcmp(args[cur_arg+2], "unless") != 0)) {
+ memprintf(err, "expects exactly 2 arguments <match-regex> and <replace-format>");
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (!(rule->arg.http.re = regex_comp(args[cur_arg], 1, 1, &error))) {
+ memprintf(err, "failed to parse the regex : %s", error);
+ free(error);
+ return ACT_RET_PRS_ERR;
+ }
+
+ px->conf.args.ctx = ARGC_HRQ;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRQ_HDR;
+ if (!parse_logformat_string(args[cur_arg + 1], px, &rule->arg.http.fmt, LOG_OPT_HTTP, cap, err)) {
+ regex_free(rule->arg.http.re);
+ return ACT_RET_PRS_ERR;
+ }
+
+ (*orig_arg) += 2;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function is just a compliant action wrapper for "set-status". */
+static enum act_return action_http_set_status(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ if (http_res_set_status(rule->arg.http.i, rule->arg.http.str, s) == -1) {
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+
+ if (!(s->txn->req.flags & HTTP_MSGF_SOFT_RW)) {
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ return ACT_RET_ERR;
+ }
+ }
+
+ return ACT_RET_CONT;
+}
+
+/* parse set-status action:
+ * This action accepts a single argument of type int representing
+ * an http status code. It returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_set_status(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ char *error;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = action_http_set_status;
+ rule->release_ptr = release_http_action;
+ LIST_INIT(&rule->arg.http.fmt);
+
+ /* Check if an argument is available */
+ if (!*args[*orig_arg]) {
+ memprintf(err, "expects 1 argument: <status>; or 3 arguments: <status> reason <fmt>");
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* convert status code as integer */
+ rule->arg.http.i = strtol(args[*orig_arg], &error, 10);
+ if (*error != '\0' || rule->arg.http.i < 100 || rule->arg.http.i > 999) {
+ memprintf(err, "expects an integer status code between 100 and 999");
+ return ACT_RET_PRS_ERR;
+ }
+
+ (*orig_arg)++;
+
+ /* set custom reason string */
+ rule->arg.http.str = ist(NULL); // If null, we use the default reason for the status code.
+ if (*args[*orig_arg] && strcmp(args[*orig_arg], "reason") == 0 &&
+ (*args[*orig_arg + 1] && strcmp(args[*orig_arg + 1], "if") != 0 && strcmp(args[*orig_arg + 1], "unless") != 0)) {
+ (*orig_arg)++;
+ rule->arg.http.str = ist(strdup(args[*orig_arg]));
+ (*orig_arg)++;
+ }
+
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes the "reject" HTTP action. It clears the request and
+ * response buffer without sending any response. It can be useful as an HTTP
+ * alternative to the silent-drop action to defend against DoS attacks, and may
+ * also be used with HTTP/2 to close a connection instead of just a stream.
+ * The txn status is unchanged, indicating no response was sent. The termination
+ * flags will indicate "PR". It always returns ACT_RET_ABRT.
+ */
+static enum act_return http_action_reject(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ sc_must_kill_conn(chn_prod(&s->req));
+ channel_abort(&s->req);
+ channel_abort(&s->res);
+ s->req.analysers &= AN_REQ_FLT_END;
+ s->res.analysers &= AN_RES_FLT_END;
+
+ _HA_ATOMIC_INC(&s->be->be_counters.denied_req);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_req);
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_R;
+
+ return ACT_RET_ABRT;
+}
+
+/* parse the "reject" action:
+ * This action takes no argument and returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_action_reject(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_reject;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes the "disable-l7-retry" HTTP action.
+ * It disables L7 retries (all retry except for a connection failure). This
+ * can be useful for example to avoid retrying on POST requests.
+ * It just removes the L7 retry flag on the HTTP transaction, and always
+ * return ACT_RET_CONT;
+ */
+static enum act_return http_req_disable_l7_retry(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ /* In theory, the TX_L7_RETRY flags isn't set at this point, but
+ * let's be future-proof and remove it anyway.
+ */
+ s->txn->flags &= ~TX_L7_RETRY;
+ s->txn->flags |= TX_D_L7_RETRY;
+ return ACT_RET_CONT;
+}
+
+/* parse the "disable-l7-retry" action:
+ * This action takes no argument and returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_req_disable_l7_retry(const char **args,
+ int *orig_args, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_req_disable_l7_retry;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes the "capture" action. It executes a fetch expression,
+ * turns the result into a string and puts it in a capture slot. It always
+ * returns 1. If an error occurs the action is cancelled, but the rule
+ * processing continues.
+ */
+static enum act_return http_action_req_capture(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct sample *key;
+ struct cap_hdr *h = rule->arg.cap.hdr;
+ char **cap = s->req_cap;
+ int len;
+
+ key = sample_fetch_as_type(s->be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.cap.expr, SMP_T_STR);
+ if (!key)
+ return ACT_RET_CONT;
+
+ if (cap[h->index] == NULL)
+ cap[h->index] = pool_alloc(h->pool);
+
+ if (cap[h->index] == NULL) /* no more capture memory */
+ return ACT_RET_CONT;
+
+ len = key->data.u.str.data;
+ if (len > h->len)
+ len = h->len;
+
+ memcpy(cap[h->index], key->data.u.str.area, len);
+ cap[h->index][len] = 0;
+ return ACT_RET_CONT;
+}
+
+/* This function executes the "capture" action and store the result in a
+ * capture slot if exists. It executes a fetch expression, turns the result
+ * into a string and puts it in a capture slot. It always returns 1. If an
+ * error occurs the action is cancelled, but the rule processing continues.
+ */
+static enum act_return http_action_req_capture_by_id(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct sample *key;
+ struct cap_hdr *h;
+ char **cap = s->req_cap;
+ struct proxy *fe = strm_fe(s);
+ int len;
+ int i;
+
+ /* Look for the original configuration. */
+ for (h = fe->req_cap, i = fe->nb_req_cap - 1;
+ h != NULL && i != rule->arg.capid.idx ;
+ i--, h = h->next);
+ if (!h)
+ return ACT_RET_CONT;
+
+ key = sample_fetch_as_type(s->be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.capid.expr, SMP_T_STR);
+ if (!key)
+ return ACT_RET_CONT;
+
+ if (cap[h->index] == NULL)
+ cap[h->index] = pool_alloc(h->pool);
+
+ if (cap[h->index] == NULL) /* no more capture memory */
+ return ACT_RET_CONT;
+
+ len = key->data.u.str.data;
+ if (len > h->len)
+ len = h->len;
+
+ memcpy(cap[h->index], key->data.u.str.area, len);
+ cap[h->index][len] = 0;
+ return ACT_RET_CONT;
+}
+
+/* Check an "http-request capture" action.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+static int check_http_req_capture(struct act_rule *rule, struct proxy *px, char **err)
+{
+ if (rule->action_ptr != http_action_req_capture_by_id)
+ return 1;
+
+ /* capture slots can only be declared in frontends, so we can't check their
+ * existence in backends at configuration parsing step
+ */
+ if (px->cap & PR_CAP_FE && rule->arg.capid.idx >= px->nb_req_cap) {
+ memprintf(err, "unable to find capture id '%d' referenced by http-request capture rule",
+ rule->arg.capid.idx);
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Release memory allocate by an http capture action */
+static void release_http_capture(struct act_rule *rule)
+{
+ if (rule->action_ptr == http_action_req_capture)
+ release_sample_expr(rule->arg.cap.expr);
+ else
+ release_sample_expr(rule->arg.capid.expr);
+}
+
+/* parse an "http-request capture" action. It takes a single argument which is
+ * a sample fetch expression. It stores the expression into arg->act.p[0] and
+ * the allocated hdr_cap struct or the preallocated "id" into arg->act.p[1].
+ * It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_req_capture(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ struct sample_expr *expr;
+ struct cap_hdr *hdr;
+ int cur_arg;
+ int len = 0;
+
+ for (cur_arg = *orig_arg; cur_arg < *orig_arg + 3 && *args[cur_arg]; cur_arg++)
+ if (strcmp(args[cur_arg], "if") == 0 ||
+ strcmp(args[cur_arg], "unless") == 0)
+ break;
+
+ if (cur_arg < *orig_arg + 3) {
+ memprintf(err, "expects <expression> [ 'len' <length> | id <idx> ]");
+ return ACT_RET_PRS_ERR;
+ }
+
+ cur_arg = *orig_arg;
+ expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args, NULL);
+ if (!expr)
+ return ACT_RET_PRS_ERR;
+
+ if (!(expr->fetch->val & SMP_VAL_FE_HRQ_HDR)) {
+ memprintf(err,
+ "fetch method '%s' extracts information from '%s', none of which is available here",
+ args[cur_arg-1], sample_src_names(expr->fetch->use));
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (!args[cur_arg] || !*args[cur_arg]) {
+ memprintf(err, "expects 'len or 'id'");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (strcmp(args[cur_arg], "len") == 0) {
+ cur_arg++;
+
+ if (!(px->cap & PR_CAP_FE)) {
+ memprintf(err, "proxy '%s' has no frontend capability", px->id);
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ px->conf.args.ctx = ARGC_CAP;
+
+ if (!args[cur_arg]) {
+ memprintf(err, "missing length value");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+ /* we copy the table name for now, it will be resolved later */
+ len = atoi(args[cur_arg]);
+ if (len <= 0) {
+ memprintf(err, "length must be > 0");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+ cur_arg++;
+
+ hdr = calloc(1, sizeof(*hdr));
+ if (!hdr) {
+ memprintf(err, "out of memory");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+ hdr->next = px->req_cap;
+ hdr->name = NULL; /* not a header capture */
+ hdr->namelen = 0;
+ hdr->len = len;
+ hdr->pool = create_pool("caphdr", hdr->len + 1, MEM_F_SHARED);
+ hdr->index = px->nb_req_cap++;
+
+ px->req_cap = hdr;
+ px->to_log |= LW_REQHDR;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_req_capture;
+ rule->release_ptr = release_http_capture;
+ rule->arg.cap.expr = expr;
+ rule->arg.cap.hdr = hdr;
+ }
+
+ else if (strcmp(args[cur_arg], "id") == 0) {
+ int id;
+ char *error;
+
+ cur_arg++;
+
+ if (!args[cur_arg]) {
+ memprintf(err, "missing id value");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ id = strtol(args[cur_arg], &error, 10);
+ if (*error != '\0') {
+ memprintf(err, "cannot parse id '%s'", args[cur_arg]);
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+ cur_arg++;
+
+ px->conf.args.ctx = ARGC_CAP;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_req_capture_by_id;
+ rule->check_ptr = check_http_req_capture;
+ rule->release_ptr = release_http_capture;
+ rule->arg.capid.expr = expr;
+ rule->arg.capid.idx = id;
+ }
+
+ else {
+ memprintf(err, "expects 'len' or 'id', found '%s'", args[cur_arg]);
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ *orig_arg = cur_arg;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes the "capture" action and store the result in a
+ * capture slot if exists. It executes a fetch expression, turns the result
+ * into a string and puts it in a capture slot. It always returns 1. If an
+ * error occurs the action is cancelled, but the rule processing continues.
+ */
+static enum act_return http_action_res_capture_by_id(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct sample *key;
+ struct cap_hdr *h;
+ char **cap = s->res_cap;
+ struct proxy *fe = strm_fe(s);
+ int len;
+ int i;
+
+ /* Look for the original configuration. */
+ for (h = fe->rsp_cap, i = fe->nb_rsp_cap - 1;
+ h != NULL && i != rule->arg.capid.idx ;
+ i--, h = h->next);
+ if (!h)
+ return ACT_RET_CONT;
+
+ key = sample_fetch_as_type(s->be, sess, s, SMP_OPT_DIR_RES|SMP_OPT_FINAL, rule->arg.capid.expr, SMP_T_STR);
+ if (!key)
+ return ACT_RET_CONT;
+
+ if (cap[h->index] == NULL)
+ cap[h->index] = pool_alloc(h->pool);
+
+ if (cap[h->index] == NULL) /* no more capture memory */
+ return ACT_RET_CONT;
+
+ len = key->data.u.str.data;
+ if (len > h->len)
+ len = h->len;
+
+ memcpy(cap[h->index], key->data.u.str.area, len);
+ cap[h->index][len] = 0;
+ return ACT_RET_CONT;
+}
+
+/* Check an "http-response capture" action.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+static int check_http_res_capture(struct act_rule *rule, struct proxy *px, char **err)
+{
+ if (rule->action_ptr != http_action_res_capture_by_id)
+ return 1;
+
+ /* capture slots can only be declared in frontends, so we can't check their
+ * existence in backends at configuration parsing step
+ */
+ if (px->cap & PR_CAP_FE && rule->arg.capid.idx >= px->nb_rsp_cap) {
+ memprintf(err, "unable to find capture id '%d' referenced by http-response capture rule",
+ rule->arg.capid.idx);
+ return 0;
+ }
+
+ return 1;
+}
+
+/* parse an "http-response capture" action. It takes a single argument which is
+ * a sample fetch expression. It stores the expression into arg->act.p[0] and
+ * the allocated hdr_cap struct of the preallocated id into arg->act.p[1].
+ * It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_res_capture(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ struct sample_expr *expr;
+ int cur_arg;
+ int id;
+ char *error;
+
+ for (cur_arg = *orig_arg; cur_arg < *orig_arg + 3 && *args[cur_arg]; cur_arg++)
+ if (strcmp(args[cur_arg], "if") == 0 ||
+ strcmp(args[cur_arg], "unless") == 0)
+ break;
+
+ if (cur_arg < *orig_arg + 3) {
+ memprintf(err, "expects <expression> id <idx>");
+ return ACT_RET_PRS_ERR;
+ }
+
+ cur_arg = *orig_arg;
+ expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args, NULL);
+ if (!expr)
+ return ACT_RET_PRS_ERR;
+
+ if (!(expr->fetch->val & SMP_VAL_FE_HRS_HDR)) {
+ memprintf(err,
+ "fetch method '%s' extracts information from '%s', none of which is available here",
+ args[cur_arg-1], sample_src_names(expr->fetch->use));
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (!args[cur_arg] || !*args[cur_arg]) {
+ memprintf(err, "expects 'id'");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (strcmp(args[cur_arg], "id") != 0) {
+ memprintf(err, "expects 'id', found '%s'", args[cur_arg]);
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ cur_arg++;
+
+ if (!args[cur_arg]) {
+ memprintf(err, "missing id value");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ id = strtol(args[cur_arg], &error, 10);
+ if (*error != '\0') {
+ memprintf(err, "cannot parse id '%s'", args[cur_arg]);
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+ cur_arg++;
+
+ px->conf.args.ctx = ARGC_CAP;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_res_capture_by_id;
+ rule->check_ptr = check_http_res_capture;
+ rule->release_ptr = release_http_capture;
+ rule->arg.capid.expr = expr;
+ rule->arg.capid.idx = id;
+
+ *orig_arg = cur_arg;
+ return ACT_RET_PRS_OK;
+}
+
+/* Parse a "allow" action for a request or a response rule. It takes no argument. It
+ * returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_allow(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ rule->action = ACT_ACTION_ALLOW;
+ rule->flags |= ACT_FLAG_FINAL;
+ return ACT_RET_PRS_OK;
+}
+
+/* Parse "deny" or "tarpit" actions for a request rule or "deny" action for a
+ * response rule. It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on
+ * error. It relies on http_parse_http_reply() to set
+ * <.arg.http_reply>.
+ */
+static enum act_parse_ret parse_http_deny(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int default_status;
+ int cur_arg, arg = 0;
+
+ cur_arg = *orig_arg;
+ if (rule->from == ACT_F_HTTP_REQ) {
+ if (strcmp(args[cur_arg - 1], "tarpit") == 0) {
+ rule->action = ACT_HTTP_REQ_TARPIT;
+ default_status = 500;
+ }
+ else {
+ rule->action = ACT_ACTION_DENY;
+ default_status = 403;
+ }
+ }
+ else {
+ rule->action = ACT_ACTION_DENY;
+ default_status = 502;
+ }
+
+ /* If no args or only a deny_status specified, fallback on the legacy
+ * mode and use default error files despite the fact that
+ * default-errorfiles is not used. Otherwise, parse an http reply.
+ */
+
+ /* Prepare parsing of log-format strings */
+ px->conf.args.ctx = ((rule->from == ACT_F_HTTP_REQ) ? ARGC_HRQ : ARGC_HRS);
+
+ if (!*(args[cur_arg]) || strcmp(args[cur_arg], "if") == 0 || strcmp(args[cur_arg], "unless") == 0) {
+ rule->arg.http_reply = http_parse_http_reply((const char *[]){"default-errorfiles", ""}, &arg, px, default_status, err);
+ goto end;
+ }
+
+ if (strcmp(args[cur_arg], "deny_status") == 0) {
+ if (!*(args[cur_arg+2]) || strcmp(args[cur_arg+2], "if") == 0 || strcmp(args[cur_arg+2], "unless") == 0) {
+ rule->arg.http_reply = http_parse_http_reply((const char *[]){"status", args[cur_arg+1], "default-errorfiles", ""},
+ &arg, px, default_status, err);
+ *orig_arg += 2;
+ goto end;
+ }
+ args[cur_arg] += 5; /* skip "deny_" for the parsing */
+ }
+
+ rule->arg.http_reply = http_parse_http_reply(args, orig_arg, px, default_status, err);
+
+ end:
+ if (!rule->arg.http_reply)
+ return ACT_RET_PRS_ERR;
+
+ rule->flags |= ACT_FLAG_FINAL;
+ rule->check_ptr = check_act_http_reply;
+ rule->release_ptr = release_act_http_reply;
+ return ACT_RET_PRS_OK;
+}
+
+
+/* This function executes a auth action. It builds an 401/407 HTX message using
+ * the corresponding proxy's error message. On success, it returns
+ * ACT_RET_ABRT. If an error occurs ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_auth(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct channel *req = &s->req;
+ struct channel *res = &s->res;
+ struct htx *htx = htx_from_buf(&res->buf);
+ struct http_reply *reply;
+ const char *auth_realm;
+ struct http_hdr_ctx ctx;
+ struct ist hdr;
+
+ /* Auth might be performed on regular http-req rules as well as on stats */
+ auth_realm = rule->arg.http.str.ptr;
+ if (!auth_realm) {
+ if (px->uri_auth && s->current_rule_list == &px->uri_auth->http_req_rules)
+ auth_realm = STATS_DEFAULT_REALM;
+ else
+ auth_realm = px->id;
+ }
+
+ if (!(s->txn->flags & TX_USE_PX_CONN)) {
+ s->txn->status = 401;
+ hdr = ist("WWW-Authenticate");
+ }
+ else {
+ s->txn->status = 407;
+ hdr = ist("Proxy-Authenticate");
+ }
+ reply = http_error_message(s);
+ channel_htx_truncate(res, htx);
+
+ if (chunk_printf(&trash, "Basic realm=\"%s\"", auth_realm) == -1)
+ goto fail;
+
+ /* Write the generic 40x message */
+ if (http_reply_to_htx(s, htx, reply) == -1)
+ goto fail;
+
+ /* Remove all existing occurrences of the XXX-Authenticate header */
+ ctx.blk = NULL;
+ while (http_find_header(htx, hdr, &ctx, 1))
+ http_remove_header(htx, &ctx);
+
+ /* Now a the right XXX-Authenticate header */
+ if (!http_add_header(htx, hdr, ist2(b_orig(&trash), b_data(&trash))))
+ goto fail;
+
+ /* Finally forward the reply */
+ htx_to_buf(htx, &res->buf);
+ if (!http_forward_proxy_resp(s, 1))
+ goto fail;
+
+ /* Note: Only eval on the request */
+ s->logs.tv_request = now;
+ req->analysers &= AN_REQ_FLT_END;
+
+ if (s->sess->fe == s->be) /* report it if the request was intercepted by the frontend */
+ _HA_ATOMIC_INC(&s->sess->fe->fe_counters.intercepted_req);
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_LOCAL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_R;
+
+ stream_inc_http_err_ctr(s);
+ return ACT_RET_ABRT;
+
+ fail:
+ /* If an error occurred, remove the incomplete HTTP response from the
+ * buffer */
+ channel_htx_truncate(res, htx);
+ return ACT_RET_ERR;
+}
+
+/* Parse a "auth" action. It may take 2 optional arguments to define a "realm"
+ * parameter. It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_auth(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg;
+
+ rule->action = ACT_CUSTOM;
+ rule->flags |= ACT_FLAG_FINAL;
+ rule->action_ptr = http_action_auth;
+ rule->release_ptr = release_http_action;
+ LIST_INIT(&rule->arg.http.fmt);
+
+ cur_arg = *orig_arg;
+ if (strcmp(args[cur_arg], "realm") == 0) {
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(err, "missing realm value.\n");
+ return ACT_RET_PRS_ERR;
+ }
+ rule->arg.http.str = ist(strdup(args[cur_arg]));
+ cur_arg++;
+ }
+
+ *orig_arg = cur_arg;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a early-hint action. It adds an HTTP Early Hint HTTP
+ * 103 response header with <.arg.http.str> name and with a value built
+ * according to <.arg.http.fmt> log line format. If it is the first early-hint
+ * rule of series, the 103 response start-line is added first. At the end, if
+ * the next rule is not an early-hint rule or if it is the last rule, the EOH
+ * block is added to terminate the response. On success, it returns
+ * ACT_RET_CONT. If an error occurs while soft rewrites are enabled, the action
+ * is canceled, but the rule processing continue. Otherwsize ACT_RET_ERR is
+ * returned.
+ */
+static enum act_return http_action_early_hint(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct act_rule *next_rule;
+ struct channel *res = &s->res;
+ struct htx *htx = htx_from_buf(&res->buf);
+ struct buffer *value = alloc_trash_chunk();
+ enum act_return ret = ACT_RET_CONT;
+
+ if (!(s->txn->req.flags & HTTP_MSGF_VER_11))
+ goto leave;
+
+ if (!value) {
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ goto error;
+ }
+
+ /* if there is no pending 103 response, start a new response. Otherwise,
+ * continue to add link to a previously started response
+ */
+ if (s->txn->status != 103) {
+ struct htx_sl *sl;
+ unsigned int flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|
+ HTX_SL_F_XFER_LEN|HTX_SL_F_BODYLESS);
+
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags,
+ ist("HTTP/1.1"), ist("103"), ist("Early Hints"));
+ if (!sl)
+ goto error;
+ sl->info.res.status = 103;
+ s->txn->status = 103;
+ }
+
+ /* Add the HTTP Early Hint HTTP 103 response heade */
+ value->data = build_logline(s, b_tail(value), b_room(value), &rule->arg.http.fmt);
+ if (!htx_add_header(htx, rule->arg.http.str, ist2(b_head(value), b_data(value))))
+ goto error;
+
+ /* if it is the last rule or the next one is not an early-hint or an
+ * conditional early-hint, terminate the current response.
+ */
+ next_rule = LIST_NEXT(&rule->list, typeof(rule), list);
+ if (&next_rule->list == s->current_rule_list || next_rule->action_ptr != http_action_early_hint || next_rule->cond) {
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto error;
+ if (!http_forward_proxy_resp(s, 0))
+ goto error;
+ s->txn->status = 0;
+ }
+
+ leave:
+ free_trash_chunk(value);
+ return ret;
+
+ error:
+ /* If an error occurred during an Early-hint rule, remove the incomplete
+ * HTTP 103 response from the buffer */
+ channel_htx_truncate(res, htx);
+ ret = ACT_RET_ERR;
+ s->txn->status = 0;
+ goto leave;
+}
+
+/* This function executes a set-header or add-header actions. It builds a string
+ * in the trash from the specified format string. It finds the action to be
+ * performed in <.action>, previously filled by function parse_set_header(). The
+ * replacement action is executed by the function http_action_set_header(). On
+ * success, it returns ACT_RET_CONT. If an error occurs while soft rewrites are
+ * enabled, the action is canceled, but the rule processing continue. Otherwsize
+ * ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_set_header(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct http_msg *msg = ((rule->from == ACT_F_HTTP_REQ) ? &s->txn->req : &s->txn->rsp);
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ enum act_return ret = ACT_RET_CONT;
+ struct buffer *replace;
+ struct http_hdr_ctx ctx;
+ struct ist n, v;
+
+ replace = alloc_trash_chunk();
+ if (!replace)
+ goto fail_alloc;
+
+ replace->data = build_logline(s, replace->area, replace->size, &rule->arg.http.fmt);
+ n = rule->arg.http.str;
+ v = ist2(replace->area, replace->data);
+
+ if (rule->action == 0) { // set-header
+ /* remove all occurrences of the header */
+ ctx.blk = NULL;
+ while (http_find_header(htx, n, &ctx, 1))
+ http_remove_header(htx, &ctx);
+ }
+
+ /* Now add header */
+ if (!http_add_header(htx, n, v))
+ goto fail_rewrite;
+
+ leave:
+ free_trash_chunk(replace);
+ return ret;
+
+ fail_alloc:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ ret = ACT_RET_ERR;
+ goto leave;
+
+ fail_rewrite:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+
+ if (!(msg->flags & HTTP_MSGF_SOFT_RW)) {
+ ret = ACT_RET_ERR;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ }
+ goto leave;
+}
+
+/* Parse a "set-header", "add-header" or "early-hint" actions. It takes an
+ * header name and a log-format string as arguments. It returns ACT_RET_PRS_OK
+ * on success, ACT_RET_PRS_ERR on error.
+ *
+ * Note: same function is used for the request and the response. However
+ * "early-hint" rules are only supported for request rules.
+ */
+static enum act_parse_ret parse_http_set_header(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cap = 0, cur_arg;
+
+ if (args[*orig_arg-1][0] == 'e') {
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_early_hint;
+ }
+ else {
+ if (args[*orig_arg-1][0] == 's')
+ rule->action = 0; // set-header
+ else
+ rule->action = 1; // add-header
+ rule->action_ptr = http_action_set_header;
+ }
+ rule->release_ptr = release_http_action;
+ LIST_INIT(&rule->arg.http.fmt);
+
+ cur_arg = *orig_arg;
+ if (!*args[cur_arg] || !*args[cur_arg+1]) {
+ memprintf(err, "expects exactly 2 arguments");
+ return ACT_RET_PRS_ERR;
+ }
+
+
+ rule->arg.http.str = ist(strdup(args[cur_arg]));
+
+ if (rule->from == ACT_F_HTTP_REQ) {
+ px->conf.args.ctx = ARGC_HRQ;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRQ_HDR;
+ }
+ else{
+ px->conf.args.ctx = ARGC_HRS;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRS_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRS_HDR;
+ }
+
+ cur_arg++;
+ if (!parse_logformat_string(args[cur_arg], px, &rule->arg.http.fmt, LOG_OPT_HTTP, cap, err)) {
+ istfree(&rule->arg.http.str);
+ return ACT_RET_PRS_ERR;
+ }
+
+ free(px->conf.lfs_file);
+ px->conf.lfs_file = strdup(px->conf.args.file);
+ px->conf.lfs_line = px->conf.args.line;
+
+ *orig_arg = cur_arg + 1;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a replace-header or replace-value actions. It
+ * builds a string in the trash from the specified format string. It finds
+ * the action to be performed in <.action>, previously filled by function
+ * parse_replace_header(). The replacement action is executed by the function
+ * http_action_replace_header(). On success, it returns ACT_RET_CONT. If an error
+ * occurs while soft rewrites are enabled, the action is canceled, but the rule
+ * processing continue. Otherwsize ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_replace_header(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct http_msg *msg = ((rule->from == ACT_F_HTTP_REQ) ? &s->txn->req : &s->txn->rsp);
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ enum act_return ret = ACT_RET_CONT;
+ struct buffer *replace;
+ int r;
+
+ replace = alloc_trash_chunk();
+ if (!replace)
+ goto fail_alloc;
+
+ replace->data = build_logline(s, replace->area, replace->size, &rule->arg.http.fmt);
+
+ r = http_replace_hdrs(s, htx, rule->arg.http.str, replace->area, rule->arg.http.re, (rule->action == 0));
+ if (r == -1)
+ goto fail_rewrite;
+
+ leave:
+ free_trash_chunk(replace);
+ return ret;
+
+ fail_alloc:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ ret = ACT_RET_ERR;
+ goto leave;
+
+ fail_rewrite:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+
+ if (!(msg->flags & HTTP_MSGF_SOFT_RW)) {
+ ret = ACT_RET_ERR;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ }
+ goto leave;
+}
+
+/* Parse a "replace-header" or "replace-value" actions. It takes an header name,
+ * a regex and replacement string as arguments. It returns ACT_RET_PRS_OK on
+ * success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_replace_header(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cap = 0, cur_arg;
+
+ if (args[*orig_arg-1][8] == 'h')
+ rule->action = 0; // replace-header
+ else
+ rule->action = 1; // replace-value
+ rule->action_ptr = http_action_replace_header;
+ rule->release_ptr = release_http_action;
+ LIST_INIT(&rule->arg.http.fmt);
+
+ cur_arg = *orig_arg;
+ if (!*args[cur_arg] || !*args[cur_arg+1] || !*args[cur_arg+2]) {
+ memprintf(err, "expects exactly 3 arguments");
+ return ACT_RET_PRS_ERR;
+ }
+
+ rule->arg.http.str = ist(strdup(args[cur_arg]));
+
+ cur_arg++;
+ if (!(rule->arg.http.re = regex_comp(args[cur_arg], 1, 1, err))) {
+ istfree(&rule->arg.http.str);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (rule->from == ACT_F_HTTP_REQ) {
+ px->conf.args.ctx = ARGC_HRQ;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRQ_HDR;
+ }
+ else{
+ px->conf.args.ctx = ARGC_HRS;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRS_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRS_HDR;
+ }
+
+ cur_arg++;
+ if (!parse_logformat_string(args[cur_arg], px, &rule->arg.http.fmt, LOG_OPT_HTTP, cap, err)) {
+ istfree(&rule->arg.http.str);
+ regex_free(rule->arg.http.re);
+ return ACT_RET_PRS_ERR;
+ }
+
+ free(px->conf.lfs_file);
+ px->conf.lfs_file = strdup(px->conf.args.file);
+ px->conf.lfs_line = px->conf.args.line;
+
+ *orig_arg = cur_arg + 1;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a del-header action with selected matching mode for
+ * header name. It finds the matching method to be performed in <.action>, previously
+ * filled by function parse_http_del_header(). On success, it returns ACT_RET_CONT.
+ * Otherwise ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_del_header(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct http_hdr_ctx ctx;
+ struct http_msg *msg = ((rule->from == ACT_F_HTTP_REQ) ? &s->txn->req : &s->txn->rsp);
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ enum act_return ret = ACT_RET_CONT;
+
+ /* remove all occurrences of the header */
+ ctx.blk = NULL;
+ switch (rule->action) {
+ case PAT_MATCH_STR:
+ while (http_find_header(htx, rule->arg.http.str, &ctx, 1))
+ http_remove_header(htx, &ctx);
+ break;
+ case PAT_MATCH_BEG:
+ while (http_find_pfx_header(htx, rule->arg.http.str, &ctx, 1))
+ http_remove_header(htx, &ctx);
+ break;
+ case PAT_MATCH_END:
+ while (http_find_sfx_header(htx, rule->arg.http.str, &ctx, 1))
+ http_remove_header(htx, &ctx);
+ break;
+ case PAT_MATCH_SUB:
+ while (http_find_sub_header(htx, rule->arg.http.str, &ctx, 1))
+ http_remove_header(htx, &ctx);
+ break;
+ case PAT_MATCH_REG:
+ while (http_match_header(htx, rule->arg.http.re, &ctx, 1))
+ http_remove_header(htx, &ctx);
+ break;
+ default:
+ return ACT_RET_ERR;
+ }
+ return ret;
+}
+
+/* Parse a "del-header" action. It takes string as a required argument,
+ * optional flag (currently only -m) and optional matching method of input string
+ * with header name to be deleted. Default matching method is exact match (-m str).
+ * It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_del_header(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg;
+ int pat_idx;
+
+ /* set exact matching (-m str) as default */
+ rule->action = PAT_MATCH_STR;
+ rule->action_ptr = http_action_del_header;
+ rule->release_ptr = release_http_action;
+ LIST_INIT(&rule->arg.http.fmt);
+
+ cur_arg = *orig_arg;
+ if (!*args[cur_arg]) {
+ memprintf(err, "expects at least 1 argument");
+ return ACT_RET_PRS_ERR;
+ }
+
+ rule->arg.http.str = ist(strdup(args[cur_arg]));
+ px->conf.args.ctx = (rule->from == ACT_F_HTTP_REQ ? ARGC_HRQ : ARGC_HRS);
+
+ if (strcmp(args[cur_arg+1], "-m") == 0) {
+ cur_arg++;
+ if (!*args[cur_arg+1]) {
+ memprintf(err, "-m flag expects exactly 1 argument");
+ return ACT_RET_PRS_ERR;
+ }
+
+ cur_arg++;
+ pat_idx = pat_find_match_name(args[cur_arg]);
+ switch (pat_idx) {
+ case PAT_MATCH_REG:
+ if (!(rule->arg.http.re = regex_comp(rule->arg.http.str.ptr, 1, 1, err)))
+ return ACT_RET_PRS_ERR;
+ /* fall through */
+ case PAT_MATCH_STR:
+ case PAT_MATCH_BEG:
+ case PAT_MATCH_END:
+ case PAT_MATCH_SUB:
+ rule->action = pat_idx;
+ break;
+ default:
+ memprintf(err, "-m with unsupported matching method '%s'", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+
+ *orig_arg = cur_arg + 1;
+ return ACT_RET_PRS_OK;
+}
+
+/* Release memory allocated by an http redirect action. */
+static void release_http_redir(struct act_rule *rule)
+{
+ struct logformat_node *lf, *lfb;
+ struct redirect_rule *redir;
+
+ redir = rule->arg.redir;
+ if (!redir)
+ return;
+
+ LIST_DELETE(&redir->list);
+ if (redir->cond) {
+ prune_acl_cond(redir->cond);
+ free(redir->cond);
+ }
+ free(redir->rdr_str);
+ free(redir->cookie_str);
+ list_for_each_entry_safe(lf, lfb, &redir->rdr_fmt, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ free(redir);
+}
+
+/* Parse a "redirect" action. It returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_redirect(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ struct redirect_rule *redir;
+ int dir, cur_arg;
+
+ rule->action = ACT_HTTP_REDIR;
+ rule->release_ptr = release_http_redir;
+
+ cur_arg = *orig_arg;
+
+ dir = (rule->from == ACT_F_HTTP_REQ ? 0 : 1);
+ if ((redir = http_parse_redirect_rule(px->conf.args.file, px->conf.args.line, px, &args[cur_arg], err, 1, dir)) == NULL)
+ return ACT_RET_PRS_ERR;
+
+ if (!(redir->flags & REDIRECT_FLAG_IGNORE_EMPTY))
+ rule->flags |= ACT_FLAG_FINAL;
+
+ rule->arg.redir = redir;
+ rule->cond = redir->cond;
+ redir->cond = NULL;
+
+ /* skip all arguments */
+ while (*args[cur_arg])
+ cur_arg++;
+
+ *orig_arg = cur_arg;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a add-acl, del-acl, set-map or del-map actions. On
+ * success, it returns ACT_RET_CONT. Otherwsize ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_set_map(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct pat_ref *ref;
+ struct buffer *key = NULL, *value = NULL;
+ enum act_return ret = ACT_RET_CONT;
+
+ /* collect reference */
+ ref = pat_ref_lookup(rule->arg.map.ref);
+ if (!ref)
+ goto leave;
+
+ /* allocate key */
+ key = alloc_trash_chunk();
+ if (!key)
+ goto fail_alloc;
+
+ /* collect key */
+ key->data = build_logline(s, key->area, key->size, &rule->arg.map.key);
+ key->area[key->data] = '\0';
+
+ switch (rule->action) {
+ case 0: // add-acl
+ /* add entry only if it does not already exist */
+ HA_SPIN_LOCK(PATREF_LOCK, &ref->lock);
+ if (pat_ref_find_elt(ref, key->area) == NULL)
+ pat_ref_add(ref, key->area, NULL, NULL);
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ref->lock);
+ break;
+
+ case 1: // set-map
+ /* allocate value */
+ value = alloc_trash_chunk();
+ if (!value)
+ goto fail_alloc;
+
+ /* collect value */
+ value->data = build_logline(s, value->area, value->size, &rule->arg.map.value);
+ value->area[value->data] = '\0';
+
+ HA_SPIN_LOCK(PATREF_LOCK, &ref->lock);
+ if (pat_ref_find_elt(ref, key->area) != NULL) {
+ /* update entry if it exists */
+ pat_ref_set(ref, key->area, value->area, NULL);
+ }
+ else {
+ /* insert a new entry */
+ pat_ref_add(ref, key->area, value->area, NULL);
+ }
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ref->lock);
+ break;
+
+ case 2: // del-acl
+ case 3: // del-map
+ /* returned code: 1=ok, 0=ko */
+ HA_SPIN_LOCK(PATREF_LOCK, &ref->lock);
+ pat_ref_delete(ref, key->area);
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ref->lock);
+ break;
+
+ default:
+ ret = ACT_RET_ERR;
+ }
+
+
+ leave:
+ free_trash_chunk(key);
+ free_trash_chunk(value);
+ return ret;
+
+ fail_alloc:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ ret = ACT_RET_ERR;
+ goto leave;
+}
+
+/* Release memory allocated by an http map/acl action. */
+static void release_http_map(struct act_rule *rule)
+{
+ struct logformat_node *lf, *lfb;
+
+ free(rule->arg.map.ref);
+ list_for_each_entry_safe(lf, lfb, &rule->arg.map.key, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ if (rule->action == 1) {
+ list_for_each_entry_safe(lf, lfb, &rule->arg.map.value, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ }
+}
+
+/* Parse a "add-acl", "del-acl", "set-map" or "del-map" actions. It takes one or
+ * two log-format string as argument depending on the action. The action is
+ * stored in <.action> as an int (0=add-acl, 1=set-map, 2=del-acl,
+ * 3=del-map). It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_set_map(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cap = 0, cur_arg;
+
+ if (args[*orig_arg-1][0] == 'a') // add-acl
+ rule->action = 0;
+ else if (args[*orig_arg-1][0] == 's') // set-map
+ rule->action = 1;
+ else if (args[*orig_arg-1][4] == 'a') // del-acl
+ rule->action = 2;
+ else if (args[*orig_arg-1][4] == 'm') // del-map
+ rule->action = 3;
+ else {
+ memprintf(err, "internal error: unhandled action '%s'", args[0]);
+ return ACT_RET_PRS_ERR;
+ }
+ rule->action_ptr = http_action_set_map;
+ rule->release_ptr = release_http_map;
+
+ cur_arg = *orig_arg;
+ if (rule->action == 1 && (!*args[cur_arg] || !*args[cur_arg+1])) {
+ /* 2 args for set-map */
+ memprintf(err, "expects exactly 2 arguments");
+ return ACT_RET_PRS_ERR;
+ }
+ else if (!*args[cur_arg]) {
+ /* only one arg for other actions */
+ memprintf(err, "expects exactly 1 arguments");
+ return ACT_RET_PRS_ERR;
+ }
+
+ /*
+ * '+ 8' for 'set-map(' (same for del-map)
+ * '- 9' for 'set-map(' + trailing ')' (same for del-map)
+ */
+ rule->arg.map.ref = my_strndup(args[cur_arg-1] + 8, strlen(args[cur_arg-1]) - 9);
+
+ if (rule->from == ACT_F_HTTP_REQ) {
+ px->conf.args.ctx = ARGC_HRQ;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRQ_HDR;
+ }
+ else{
+ px->conf.args.ctx = ARGC_HRS;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRS_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRS_HDR;
+ }
+
+ /* key pattern */
+ LIST_INIT(&rule->arg.map.key);
+ if (!parse_logformat_string(args[cur_arg], px, &rule->arg.map.key, LOG_OPT_HTTP, cap, err)) {
+ free(rule->arg.map.ref);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (rule->action == 1) {
+ /* value pattern for set-map only */
+ cur_arg++;
+ LIST_INIT(&rule->arg.map.value);
+ if (!parse_logformat_string(args[cur_arg], px, &rule->arg.map.value, LOG_OPT_HTTP, cap, err)) {
+ free(rule->arg.map.ref);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+
+ free(px->conf.lfs_file);
+ px->conf.lfs_file = strdup(px->conf.args.file);
+ px->conf.lfs_line = px->conf.args.line;
+
+ *orig_arg = cur_arg + 1;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a track-sc* actions. On success, it returns
+ * ACT_RET_CONT. Otherwsize ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_track_sc(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct stktable *t;
+ struct stksess *ts;
+ struct stktable_key *key;
+ void *ptr1, *ptr2, *ptr3, *ptr4, *ptr5, *ptr6;
+ int opt;
+
+ ptr1 = ptr2 = ptr3 = ptr4 = ptr5 = ptr6 = NULL;
+ opt = ((rule->from == ACT_F_HTTP_REQ) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES) | SMP_OPT_FINAL;
+
+ t = rule->arg.trk_ctr.table.t;
+
+ if (stkctr_entry(&s->stkctr[rule->action]))
+ goto end;
+
+ key = stktable_fetch_key(t, s->be, sess, s, opt, rule->arg.trk_ctr.expr, NULL);
+
+ if (!key)
+ goto end;
+ ts = stktable_get_entry(t, key);
+ if (!ts)
+ goto end;
+
+ stream_track_stkctr(&s->stkctr[rule->action], t, ts);
+
+ /* let's count a new HTTP request as it's the first time we do it */
+ ptr1 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_REQ_CNT);
+ ptr2 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_REQ_RATE);
+
+ /* When the client triggers a 4xx from the server, it's most often due
+ * to a missing object or permission. These events should be tracked
+ * because if they happen often, it may indicate a brute force or a
+ * vulnerability scan. Normally this is done when receiving the response
+ * but here we're tracking after this ought to have been done so we have
+ * to do it on purpose.
+ */
+ if (rule->from == ACT_F_HTTP_RES && (unsigned)(s->txn->status - 400) < 100) {
+ ptr3 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_ERR_CNT);
+ ptr4 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_ERR_RATE);
+ }
+
+ if (rule->from == ACT_F_HTTP_RES && (unsigned)(s->txn->status - 500) < 100 &&
+ s->txn->status != 501 && s->txn->status != 505) {
+ ptr5 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_FAIL_CNT);
+ ptr6 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_FAIL_RATE);
+ }
+
+ if (ptr1 || ptr2 || ptr3 || ptr4 || ptr5 || ptr6) {
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ if (ptr1)
+ stktable_data_cast(ptr1, std_t_uint)++;
+ if (ptr2)
+ update_freq_ctr_period(&stktable_data_cast(ptr2, std_t_frqp),
+ t->data_arg[STKTABLE_DT_HTTP_REQ_RATE].u, 1);
+ if (ptr3)
+ stktable_data_cast(ptr3, std_t_uint)++;
+ if (ptr4)
+ update_freq_ctr_period(&stktable_data_cast(ptr4, std_t_frqp),
+ t->data_arg[STKTABLE_DT_HTTP_ERR_RATE].u, 1);
+ if (ptr5)
+ stktable_data_cast(ptr5, std_t_uint)++;
+ if (ptr6)
+ update_freq_ctr_period(&stktable_data_cast(ptr6, std_t_frqp),
+ t->data_arg[STKTABLE_DT_HTTP_FAIL_RATE].u, 1);
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(t, ts, 0);
+ }
+
+ stkctr_set_flags(&s->stkctr[rule->action], STKCTR_TRACK_CONTENT);
+ if (sess->fe != s->be)
+ stkctr_set_flags(&s->stkctr[rule->action], STKCTR_TRACK_BACKEND);
+
+ end:
+ return ACT_RET_CONT;
+}
+
+static void release_http_track_sc(struct act_rule *rule)
+{
+ release_sample_expr(rule->arg.trk_ctr.expr);
+}
+
+/* Parse a "track-sc*" actions. It returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_track_sc(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ struct sample_expr *expr;
+ unsigned int where;
+ unsigned int tsc_num;
+ const char *tsc_num_str;
+ int cur_arg;
+
+ tsc_num_str = &args[*orig_arg-1][8];
+ if (cfg_parse_track_sc_num(&tsc_num, tsc_num_str, tsc_num_str + strlen(tsc_num_str), err) == -1)
+ return ACT_RET_PRS_ERR;
+
+ cur_arg = *orig_arg;
+ expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line,
+ err, &px->conf.args, NULL);
+ if (!expr)
+ return ACT_RET_PRS_ERR;
+
+ where = 0;
+ if (px->cap & PR_CAP_FE)
+ where |= (rule->from == ACT_F_HTTP_REQ ? SMP_VAL_FE_HRQ_HDR : SMP_VAL_FE_HRS_HDR);
+ if (px->cap & PR_CAP_BE)
+ where |= (rule->from == ACT_F_HTTP_REQ ? SMP_VAL_BE_HRQ_HDR : SMP_VAL_BE_HRS_HDR);
+
+ if (!(expr->fetch->val & where)) {
+ memprintf(err, "fetch method '%s' extracts information from '%s', none of which is available here",
+ args[cur_arg-1], sample_src_names(expr->fetch->use));
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (strcmp(args[cur_arg], "table") == 0) {
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(err, "missing table name");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* we copy the table name for now, it will be resolved later */
+ rule->arg.trk_ctr.table.n = strdup(args[cur_arg]);
+ cur_arg++;
+ }
+
+ rule->action = tsc_num;
+ rule->arg.trk_ctr.expr = expr;
+ rule->action_ptr = http_action_track_sc;
+ rule->release_ptr = release_http_track_sc;
+ rule->check_ptr = check_trk_action;
+
+ *orig_arg = cur_arg;
+ return ACT_RET_PRS_OK;
+}
+
+static enum act_return action_timeout_set_stream_timeout(struct act_rule *rule,
+ struct proxy *px,
+ struct session *sess,
+ struct stream *s,
+ int flags)
+{
+ struct sample *key;
+
+ if (rule->arg.timeout.expr) {
+ key = sample_fetch_as_type(px, sess, s, SMP_OPT_FINAL, rule->arg.timeout.expr, SMP_T_SINT);
+ if (!key)
+ return ACT_RET_CONT;
+
+ stream_set_timeout(s, rule->arg.timeout.type, MS_TO_TICKS(key->data.u.sint));
+ }
+ else {
+ stream_set_timeout(s, rule->arg.timeout.type, MS_TO_TICKS(rule->arg.timeout.value));
+ }
+
+ return ACT_RET_CONT;
+}
+
+/* Parse a "set-timeout" action. Returns ACT_RET_PRS_ERR if parsing error.
+ */
+static enum act_parse_ret parse_http_set_timeout(const char **args,
+ int *orig_arg,
+ struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = action_timeout_set_stream_timeout;
+ rule->release_ptr = release_timeout_action;
+
+ cur_arg = *orig_arg;
+ if (!*args[cur_arg] || !*args[cur_arg + 1]) {
+ memprintf(err, "expects exactly 2 arguments");
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (!(px->cap & PR_CAP_BE)) {
+ memprintf(err, "proxy '%s' has no backend capability", px->id);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (cfg_parse_rule_set_timeout(args, cur_arg,
+ &rule->arg.timeout.value,
+ &rule->arg.timeout.type,
+ &rule->arg.timeout.expr,
+ err,
+ px->conf.args.file,
+ px->conf.args.line, &px->conf.args) == -1) {
+ return ACT_RET_PRS_ERR;
+ }
+
+ *orig_arg = cur_arg + 2;
+
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a strict-mode actions. On success, it always returns
+ * ACT_RET_CONT
+ */
+static enum act_return http_action_strict_mode(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct http_msg *msg = ((rule->from == ACT_F_HTTP_REQ) ? &s->txn->req : &s->txn->rsp);
+
+ if (rule->action == 0) // strict-mode on
+ msg->flags &= ~HTTP_MSGF_SOFT_RW;
+ else // strict-mode off
+ msg->flags |= HTTP_MSGF_SOFT_RW;
+ return ACT_RET_CONT;
+}
+
+/* Parse a "strict-mode" action. It returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_strict_mode(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg;
+
+ cur_arg = *orig_arg;
+ if (!*args[cur_arg]) {
+ memprintf(err, "expects exactly 1 arguments");
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (strcasecmp(args[cur_arg], "on") == 0)
+ rule->action = 0; // strict-mode on
+ else if (strcasecmp(args[cur_arg], "off") == 0)
+ rule->action = 1; // strict-mode off
+ else {
+ memprintf(err, "Unexpected value '%s'. Only 'on' and 'off' are supported", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+ rule->action_ptr = http_action_strict_mode;
+
+ *orig_arg = cur_arg + 1;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a return action. It builds an HTX message from an
+ * errorfile, an raw file or a log-format string, depending on <.action>
+ * value. On success, it returns ACT_RET_ABRT. If an error occurs ACT_RET_ERR is
+ * returned.
+ */
+static enum act_return http_action_return(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct channel *req = &s->req;
+
+ s->txn->status = rule->arg.http_reply->status;
+ if (http_reply_message(s, rule->arg.http_reply) == -1)
+ return ACT_RET_ERR;
+
+ if (rule->from == ACT_F_HTTP_REQ) {
+ /* let's log the request time */
+ s->logs.tv_request = now;
+ req->analysers &= AN_REQ_FLT_END;
+
+ if (s->sess->fe == s->be) /* report it if the request was intercepted by the frontend */
+ _HA_ATOMIC_INC(&s->sess->fe->fe_counters.intercepted_req);
+ }
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_LOCAL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= ((rule->from == ACT_F_HTTP_REQ) ? SF_FINST_R : SF_FINST_H);
+
+ return ACT_RET_ABRT;
+}
+
+/* Parse a "return" action. It returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error. It relies on http_parse_http_reply() to set
+ * <.arg.http_reply>.
+ */
+static enum act_parse_ret parse_http_return(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ /* Prepare parsing of log-format strings */
+ px->conf.args.ctx = ((rule->from == ACT_F_HTTP_REQ) ? ARGC_HRQ : ARGC_HRS);
+ rule->arg.http_reply = http_parse_http_reply(args, orig_arg, px, 200, err);
+ if (!rule->arg.http_reply)
+ return ACT_RET_PRS_ERR;
+
+ rule->flags |= ACT_FLAG_FINAL;
+ rule->action = ACT_CUSTOM;
+ rule->check_ptr = check_act_http_reply;
+ rule->action_ptr = http_action_return;
+ rule->release_ptr = release_act_http_reply;
+ return ACT_RET_PRS_OK;
+}
+
+
+
+/* This function executes a wait-for-body action. It waits for the message
+ * payload for a max configured time (.arg.p[0]) and eventually for only first
+ * <arg.p[1]> bytes (0 means no limit). It relies on http_wait_for_msg_body()
+ * function. it returns ACT_RET_CONT when conditions are met to stop to wait.
+ * Otherwise ACT_RET_YIELD is returned to wait for more data. ACT_RET_INV is
+ * returned if a parsing error is raised by lower level and ACT_RET_ERR if an
+ * internal error occurred. Finally ACT_RET_ABRT is returned when a timeout
+ * occurred.
+ */
+static enum act_return http_action_wait_for_body(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct channel *chn = ((rule->from == ACT_F_HTTP_REQ) ? &s->req : &s->res);
+ unsigned int time = (uintptr_t)rule->arg.act.p[0];
+ unsigned int bytes = (uintptr_t)rule->arg.act.p[1];
+
+ switch (http_wait_for_msg_body(s, chn, time, bytes)) {
+ case HTTP_RULE_RES_CONT:
+ return ACT_RET_CONT;
+ case HTTP_RULE_RES_YIELD:
+ return ACT_RET_YIELD;
+ case HTTP_RULE_RES_BADREQ:
+ return ACT_RET_INV;
+ case HTTP_RULE_RES_ERROR:
+ return ACT_RET_ERR;
+ case HTTP_RULE_RES_ABRT:
+ return ACT_RET_ABRT;
+ default:
+ return ACT_RET_ERR;
+ }
+}
+
+/* Parse a "wait-for-body" action. It returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_wait_for_body(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg;
+ unsigned int time, bytes;
+ const char *res;
+
+ cur_arg = *orig_arg;
+ if (!*args[cur_arg]) {
+ memprintf(err, "expects time <time> [ at-least <bytes> ]");
+ return ACT_RET_PRS_ERR;
+ }
+
+ time = UINT_MAX; /* To be sure it is set */
+ bytes = 0; /* Default value, wait all the body */
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "time") == 0) {
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "missing argument for '%s'", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+ res = parse_time_err(args[cur_arg+1], &time, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "time overflow (maximum value is 2147483647 ms or ~24.8 days)");
+ return ACT_RET_PRS_ERR;
+ }
+ if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "time underflow (minimum non-null value is 1 ms)");
+ return ACT_RET_PRS_ERR;
+ }
+ if (res) {
+ memprintf(err, "unexpected character '%c'", *res);
+ return ACT_RET_PRS_ERR;
+ }
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "at-least") == 0) {
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "missing argument for '%s'", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+ res = parse_size_err(args[cur_arg+1], &bytes);
+ if (res) {
+ memprintf(err, "unexpected character '%c'", *res);
+ return ACT_RET_PRS_ERR;
+ }
+ cur_arg++;
+ }
+ else
+ break;
+ cur_arg++;
+ }
+
+ if (time == UINT_MAX) {
+ memprintf(err, "expects time <time> [ at-least <bytes> ]");
+ return ACT_RET_PRS_ERR;
+ }
+
+ rule->arg.act.p[0] = (void *)(uintptr_t)time;
+ rule->arg.act.p[1] = (void *)(uintptr_t)bytes;
+
+ *orig_arg = cur_arg;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_wait_for_body;
+ return ACT_RET_PRS_OK;
+}
+
+/************************************************************************/
+/* All supported http-request action keywords must be declared here. */
+/************************************************************************/
+
+static struct action_kw_list http_req_actions = {
+ .kw = {
+ { "add-acl", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "add-header", parse_http_set_header, 0 },
+ { "allow", parse_http_allow, 0 },
+ { "auth", parse_http_auth, 0 },
+ { "capture", parse_http_req_capture, 0 },
+ { "del-acl", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "del-header", parse_http_del_header, 0 },
+ { "del-map", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "deny", parse_http_deny, 0 },
+ { "disable-l7-retry", parse_http_req_disable_l7_retry, 0 },
+ { "early-hint", parse_http_set_header, 0 },
+ { "normalize-uri", parse_http_normalize_uri, KWF_EXPERIMENTAL },
+ { "redirect", parse_http_redirect, 0 },
+ { "reject", parse_http_action_reject, 0 },
+ { "replace-header", parse_http_replace_header, 0 },
+ { "replace-path", parse_replace_uri, 0 },
+ { "replace-pathq", parse_replace_uri, 0 },
+ { "replace-uri", parse_replace_uri, 0 },
+ { "replace-value", parse_http_replace_header, 0 },
+ { "return", parse_http_return, 0 },
+ { "set-header", parse_http_set_header, 0 },
+ { "set-map", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "set-method", parse_set_req_line, 0 },
+ { "set-path", parse_set_req_line, 0 },
+ { "set-pathq", parse_set_req_line, 0 },
+ { "set-query", parse_set_req_line, 0 },
+ { "set-uri", parse_set_req_line, 0 },
+ { "strict-mode", parse_http_strict_mode, 0 },
+ { "tarpit", parse_http_deny, 0 },
+ { "track-sc", parse_http_track_sc, KWF_MATCH_PREFIX },
+ { "set-timeout", parse_http_set_timeout, 0 },
+ { "wait-for-body", parse_http_wait_for_body, 0 },
+ { NULL, NULL }
+ }
+};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_actions);
+
+static struct action_kw_list http_res_actions = {
+ .kw = {
+ { "add-acl", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "add-header", parse_http_set_header, 0 },
+ { "allow", parse_http_allow, 0 },
+ { "capture", parse_http_res_capture, 0 },
+ { "del-acl", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "del-header", parse_http_del_header, 0 },
+ { "del-map", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "deny", parse_http_deny, 0 },
+ { "redirect", parse_http_redirect, 0 },
+ { "replace-header", parse_http_replace_header, 0 },
+ { "replace-value", parse_http_replace_header, 0 },
+ { "return", parse_http_return, 0 },
+ { "set-header", parse_http_set_header, 0 },
+ { "set-map", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "set-status", parse_http_set_status, 0 },
+ { "strict-mode", parse_http_strict_mode, 0 },
+ { "track-sc", parse_http_track_sc, KWF_MATCH_PREFIX },
+ { "wait-for-body", parse_http_wait_for_body, 0 },
+ { NULL, NULL }
+ }
+};
+
+INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_actions);
+
+static struct action_kw_list http_after_res_actions = {
+ .kw = {
+ { "add-header", parse_http_set_header, 0 },
+ { "allow", parse_http_allow, 0 },
+ { "capture", parse_http_res_capture, 0 },
+ { "del-header", parse_http_del_header, 0 },
+ { "replace-header", parse_http_replace_header, 0 },
+ { "replace-value", parse_http_replace_header, 0 },
+ { "set-header", parse_http_set_header, 0 },
+ { "set-status", parse_http_set_status, 0 },
+ { "strict-mode", parse_http_strict_mode, 0 },
+ { NULL, NULL }
+ }
+};
+
+INITCALL1(STG_REGISTER, http_after_res_keywords_register, &http_after_res_actions);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/http_ana.c b/src/http_ana.c
new file mode 100644
index 0000000..273fe16
--- /dev/null
+++ b/src/http_ana.c
@@ -0,0 +1,5277 @@
+/*
+ * HTTP protocol analyzer
+ *
+ * Copyright (C) 2018 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/acl.h>
+#include <haproxy/action-t.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/backend.h>
+#include <haproxy/base64.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/filters.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/log.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/proxy.h>
+#include <haproxy/regex.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server-t.h>
+#include <haproxy/stats.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/trace.h>
+#include <haproxy/uri_auth-t.h>
+#include <haproxy/vars.h>
+
+
+#define TRACE_SOURCE &trace_strm
+
+extern const char *stat_status_codes[];
+
+struct pool_head *pool_head_requri __read_mostly = NULL;
+struct pool_head *pool_head_capture __read_mostly = NULL;
+
+
+static void http_end_request(struct stream *s);
+static void http_end_response(struct stream *s);
+
+static void http_capture_headers(struct htx *htx, char **cap, struct cap_hdr *cap_hdr);
+static int http_del_hdr_value(char *start, char *end, char **from, char *next);
+static size_t http_fmt_req_line(const struct htx_sl *sl, char *str, size_t len);
+static void http_debug_stline(const char *dir, struct stream *s, const struct htx_sl *sl);
+static void http_debug_hdr(const char *dir, struct stream *s, const struct ist n, const struct ist v);
+
+static enum rule_result http_req_get_intercept_rule(struct proxy *px, struct list *def_rules, struct list *rules, struct stream *s);
+static enum rule_result http_res_get_intercept_rule(struct proxy *px, struct list *def_rules, struct list *rules, struct stream *s);
+static enum rule_result http_req_restrict_header_names(struct stream *s, struct htx *htx, struct proxy *px);
+
+static void http_manage_client_side_cookies(struct stream *s, struct channel *req);
+static void http_manage_server_side_cookies(struct stream *s, struct channel *res);
+
+static int http_stats_check_uri(struct stream *s, struct http_txn *txn, struct proxy *backend);
+static int http_handle_stats(struct stream *s, struct channel *req);
+
+static int http_handle_expect_hdr(struct stream *s, struct htx *htx, struct http_msg *msg);
+static int http_reply_100_continue(struct stream *s);
+
+/* This stream analyser waits for a complete HTTP request. It returns 1 if the
+ * processing can continue on next analysers, or zero if it either needs more
+ * data or wants to immediately abort the request (eg: timeout, error, ...). It
+ * is tied to AN_REQ_WAIT_HTTP and may may remove itself from s->req.analysers
+ * when it has nothing left to do, and may remove any analyser when it wants to
+ * abort.
+ */
+int http_wait_for_request(struct stream *s, struct channel *req, int an_bit)
+{
+
+ /*
+ * We will analyze a complete HTTP request to check the its syntax.
+ *
+ * Once the start line and all headers are received, we may perform a
+ * capture of the error (if any), and we will set a few fields. We also
+ * check for monitor-uri, logging and finally headers capture.
+ */
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->req;
+ struct htx *htx;
+ struct htx_sl *sl;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, msg);
+
+ if (unlikely(!IS_HTX_STRM(s))) {
+ /* It is only possible when a TCP stream is upgrade to HTTP.
+ * There is a transition period during which there is no
+ * data. The stream is still in raw mode and SF_IGNORE flag is
+ * still set. When this happens, the new mux is responsible to
+ * handle all errors. Thus we may leave immediately.
+ */
+ BUG_ON(!(s->flags & SF_IGNORE) || !c_empty(&s->req));
+
+ /* Don't connect for now */
+ channel_dont_connect(req);
+
+ /* A SHUTR at this stage means we are performing a "destructive"
+ * HTTP upgrade (TCP>H2). In this case, we can leave.
+ */
+ if (req->flags & CF_SHUTR) {
+ s->logs.logwait = 0;
+ s->logs.level = 0;
+ channel_abort(&s->req);
+ channel_abort(&s->res);
+ req->analysers &= AN_REQ_FLT_END;
+ req->analyse_exp = TICK_ETERNITY;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA, s);
+ return 1;
+ }
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA, s);
+ return 0;
+ }
+
+ htx = htxbuf(&req->buf);
+
+ /* Parsing errors are caught here */
+ if (htx->flags & (HTX_FL_PARSING_ERROR|HTX_FL_PROCESSING_ERROR)) {
+ stream_inc_http_req_ctr(s);
+ proxy_inc_fe_req_ctr(sess->listener, sess->fe);
+ if (htx->flags & HTX_FL_PARSING_ERROR) {
+ stream_inc_http_err_ctr(s);
+ goto return_bad_req;
+ }
+ else
+ goto return_int_err;
+ }
+
+ /* we're speaking HTTP here, so let's speak HTTP to the client */
+ s->srv_error = http_return_srv_error;
+
+ msg->msg_state = HTTP_MSG_BODY;
+ stream_inc_http_req_ctr(s);
+ proxy_inc_fe_req_ctr(sess->listener, sess->fe); /* one more valid request for this FE */
+
+ /* kill the pending keep-alive timeout */
+ req->analyse_exp = TICK_ETERNITY;
+
+ BUG_ON(htx_get_first_type(htx) != HTX_BLK_REQ_SL);
+ sl = http_get_stline(htx);
+
+ /* 0: we might have to print this header in debug mode */
+ if (unlikely((global.mode & MODE_DEBUG) &&
+ (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)))) {
+ int32_t pos;
+
+ http_debug_stline("clireq", s, sl);
+
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_EOH)
+ break;
+ if (type != HTX_BLK_HDR)
+ continue;
+
+ http_debug_hdr("clihdr", s,
+ htx_get_blk_name(htx, blk),
+ htx_get_blk_value(htx, blk));
+ }
+ }
+
+ /*
+ * 1: identify the method and the version. Also set HTTP flags
+ */
+ txn->meth = sl->info.req.meth;
+ if (sl->flags & HTX_SL_F_VER_11)
+ msg->flags |= HTTP_MSGF_VER_11;
+ msg->flags |= HTTP_MSGF_XFER_LEN;
+ if (sl->flags & HTX_SL_F_CLEN)
+ msg->flags |= HTTP_MSGF_CNT_LEN;
+ else if (sl->flags & HTX_SL_F_CHNK)
+ msg->flags |= HTTP_MSGF_TE_CHNK;
+ if (sl->flags & HTX_SL_F_BODYLESS)
+ msg->flags |= HTTP_MSGF_BODYLESS;
+ if (sl->flags & HTX_SL_F_CONN_UPG)
+ msg->flags |= HTTP_MSGF_CONN_UPG;
+
+ /* we can make use of server redirect on GET and HEAD */
+ if (txn->meth == HTTP_METH_GET || txn->meth == HTTP_METH_HEAD)
+ s->flags |= SF_REDIRECTABLE;
+ else if (txn->meth == HTTP_METH_OTHER && isteqi(htx_sl_req_meth(sl), ist("PRI"))) {
+ /* PRI is reserved for the HTTP/2 preface */
+ goto return_bad_req;
+ }
+
+ /*
+ * 2: check if the URI matches the monitor_uri. We have to do this for
+ * every request which gets in, because the monitor-uri is defined by
+ * the frontend. If the monitor-uri starts with a '/', the matching is
+ * done against the request's path. Otherwise, the request's uri is
+ * used. It is a workaround to let HTTP/2 health-checks work as
+ * expected.
+ */
+ if (unlikely(isttest(sess->fe->monitor_uri))) {
+ const struct ist monitor_uri = sess->fe->monitor_uri;
+ struct http_uri_parser parser = http_uri_parser_init(htx_sl_req_uri(sl));
+
+ if ((istptr(monitor_uri)[0] == '/' &&
+ isteq(http_parse_path(&parser), monitor_uri)) ||
+ isteq(htx_sl_req_uri(sl), monitor_uri)) {
+ /*
+ * We have found the monitor URI
+ */
+ struct acl_cond *cond;
+
+ s->flags |= SF_MONITOR;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.intercepted_req);
+
+ /* Check if we want to fail this monitor request or not */
+ list_for_each_entry(cond, &sess->fe->mon_fail_cond, list) {
+ int ret = acl_exec_cond(cond, sess->fe, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+
+ ret = acl_pass(ret);
+ if (cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+
+ if (ret) {
+ /* we fail this request, let's return 503 service unavail */
+ txn->status = 503;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_LOCAL; /* we don't want a real error here */
+ goto return_prx_cond;
+ }
+ }
+
+ /* nothing to fail, let's reply normally */
+ txn->status = 200;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_LOCAL; /* we don't want a real error here */
+ goto return_prx_cond;
+ }
+ }
+
+ /*
+ * 3: Maybe we have to copy the original REQURI for the logs ?
+ * Note: we cannot log anymore if the request has been
+ * classified as invalid.
+ */
+ if (unlikely(s->logs.logwait & LW_REQ)) {
+ /* we have a complete HTTP request that we must log */
+ if ((txn->uri = pool_alloc(pool_head_requri)) != NULL) {
+ size_t len;
+
+ len = http_fmt_req_line(sl, txn->uri, global.tune.requri_len - 1);
+ txn->uri[len] = 0;
+
+ if (!(s->logs.logwait &= ~(LW_REQ|LW_INIT)))
+ s->do_log(s);
+ } else {
+ ha_alert("HTTP logging : out of memory.\n");
+ }
+ }
+
+ /* if the frontend has "option http-use-proxy-header", we'll check if
+ * we have what looks like a proxied connection instead of a connection,
+ * and in this case set the TX_USE_PX_CONN flag to use Proxy-connection.
+ * Note that this is *not* RFC-compliant, however browsers and proxies
+ * happen to do that despite being non-standard :-(
+ * We consider that a request not beginning with either '/' or '*' is
+ * a proxied connection, which covers both "scheme://location" and
+ * CONNECT ip:port.
+ */
+ if ((sess->fe->options2 & PR_O2_USE_PXHDR) &&
+ *HTX_SL_REQ_UPTR(sl) != '/' && *HTX_SL_REQ_UPTR(sl) != '*')
+ txn->flags |= TX_USE_PX_CONN;
+
+ /* 5: we may need to capture headers */
+ if (unlikely((s->logs.logwait & LW_REQHDR) && s->req_cap))
+ http_capture_headers(htx, s->req_cap, sess->fe->req_cap);
+
+ /* we may have to wait for the request's body */
+ if (s->be->options & PR_O_WREQ_BODY)
+ req->analysers |= AN_REQ_HTTP_BODY;
+
+ /*
+ * RFC7234#4:
+ * A cache MUST write through requests with methods
+ * that are unsafe (Section 4.2.1 of [RFC7231]) to
+ * the origin server; i.e., a cache is not allowed
+ * to generate a reply to such a request before
+ * having forwarded the request and having received
+ * a corresponding response.
+ *
+ * RFC7231#4.2.1:
+ * Of the request methods defined by this
+ * specification, the GET, HEAD, OPTIONS, and TRACE
+ * methods are defined to be safe.
+ */
+ if (likely(txn->meth == HTTP_METH_GET ||
+ txn->meth == HTTP_METH_HEAD ||
+ txn->meth == HTTP_METH_OPTIONS ||
+ txn->meth == HTTP_METH_TRACE))
+ txn->flags |= TX_CACHEABLE | TX_CACHE_COOK;
+
+ /* end of job, return OK */
+ req->analysers &= ~an_bit;
+ req->analyse_exp = TICK_ETERNITY;
+
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 1;
+
+ return_int_err:
+ txn->status = 500;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_INTERNAL;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ goto return_prx_cond;
+
+ return_bad_req:
+ txn->status = 400;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+ /* fall through */
+
+ return_prx_cond:
+ http_reply_and_close(s, txn->status, http_error_message(s));
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_R;
+
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+}
+
+
+/* This stream analyser runs all HTTP request processing which is common to
+ * frontends and backends, which means blocking ACLs, filters, connection-close,
+ * reqadd, stats and redirects. This is performed for the designated proxy.
+ * It returns 1 if the processing can continue on next analysers, or zero if it
+ * either needs more data or wants to immediately abort the request (eg: deny,
+ * error, ...).
+ */
+int http_process_req_common(struct stream *s, struct channel *req, int an_bit, struct proxy *px)
+{
+ struct list *def_rules, *rules;
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->req;
+ struct htx *htx;
+ struct redirect_rule *rule;
+ enum rule_result verdict;
+ struct connection *conn = objt_conn(sess->origin);
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, msg);
+
+ htx = htxbuf(&req->buf);
+
+ /* just in case we have some per-backend tracking. Only called the first
+ * execution of the analyser. */
+ if (!s->current_rule && !s->current_rule_list)
+ stream_inc_be_http_req_ctr(s);
+
+ def_rules = ((px->defpx && (an_bit == AN_REQ_HTTP_PROCESS_FE || px != sess->fe)) ? &px->defpx->http_req_rules : NULL);
+ rules = &px->http_req_rules;
+
+ /* evaluate http-request rules */
+ if ((def_rules && !LIST_ISEMPTY(def_rules)) || !LIST_ISEMPTY(rules)) {
+ verdict = http_req_get_intercept_rule(px, def_rules, rules, s);
+
+ switch (verdict) {
+ case HTTP_RULE_RES_YIELD: /* some data miss, call the function later. */
+ goto return_prx_yield;
+
+ case HTTP_RULE_RES_CONT:
+ case HTTP_RULE_RES_STOP: /* nothing to do */
+ break;
+
+ case HTTP_RULE_RES_DENY: /* deny or tarpit */
+ if (txn->flags & TX_CLTARPIT)
+ goto tarpit;
+ goto deny;
+
+ case HTTP_RULE_RES_ABRT: /* abort request, response already sent. Eg: auth */
+ goto return_prx_cond;
+
+ case HTTP_RULE_RES_DONE: /* OK, but terminate request processing (eg: redirect) */
+ goto done;
+
+ case HTTP_RULE_RES_BADREQ: /* failed with a bad request */
+ goto return_bad_req;
+
+ case HTTP_RULE_RES_ERROR: /* failed with a bad request */
+ goto return_int_err;
+ }
+ }
+
+ if (px->options2 & (PR_O2_RSTRICT_REQ_HDR_NAMES_BLK|PR_O2_RSTRICT_REQ_HDR_NAMES_DEL)) {
+ verdict = http_req_restrict_header_names(s, htx, px);
+ if (verdict == HTTP_RULE_RES_DENY)
+ goto deny;
+ }
+
+ if (conn && (conn->flags & CO_FL_EARLY_DATA) &&
+ (conn->flags & (CO_FL_EARLY_SSL_HS | CO_FL_SSL_WAIT_HS))) {
+ struct http_hdr_ctx ctx;
+
+ ctx.blk = NULL;
+ if (!http_find_header(htx, ist("Early-Data"), &ctx, 0)) {
+ if (unlikely(!http_add_header(htx, ist("Early-Data"), ist("1"))))
+ goto return_fail_rewrite;
+ }
+ }
+
+ /* OK at this stage, we know that the request was accepted according to
+ * the http-request rules, we can check for the stats. Note that the
+ * URI is detected *before* the req* rules in order not to be affected
+ * by a possible reqrep, while they are processed *after* so that a
+ * reqdeny can still block them. This clearly needs to change in 1.6!
+ */
+ if (!s->target && http_stats_check_uri(s, txn, px)) {
+ s->target = &http_stats_applet.obj_type;
+ if (unlikely(!sc_applet_create(s->scb, objt_applet(s->target)))) {
+ s->logs.tv_request = now;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ goto return_int_err;
+ }
+
+ /* parse the whole stats request and extract the relevant information */
+ http_handle_stats(s, req);
+ verdict = http_req_get_intercept_rule(px, NULL, &px->uri_auth->http_req_rules, s);
+ /* not all actions implemented: deny, allow, auth */
+
+ if (verdict == HTTP_RULE_RES_DENY) /* stats http-request deny */
+ goto deny;
+
+ if (verdict == HTTP_RULE_RES_ABRT) /* stats auth / stats http-request auth */
+ goto return_prx_cond;
+
+ if (verdict == HTTP_RULE_RES_BADREQ) /* failed with a bad request */
+ goto return_bad_req;
+
+ if (verdict == HTTP_RULE_RES_ERROR) /* failed with a bad request */
+ goto return_int_err;
+ }
+
+ /* Proceed with the applets now. */
+ if (unlikely(objt_applet(s->target))) {
+ if (sess->fe == s->be) /* report it if the request was intercepted by the frontend */
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.intercepted_req);
+
+ if (http_handle_expect_hdr(s, htx, msg) == -1)
+ goto return_int_err;
+
+ if (!(s->flags & SF_ERR_MASK)) // this is not really an error but it is
+ s->flags |= SF_ERR_LOCAL; // to mark that it comes from the proxy
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_R;
+
+ if (HAS_FILTERS(s))
+ req->analysers |= AN_REQ_FLT_HTTP_HDRS;
+
+ /* enable the minimally required analyzers to handle keep-alive and compression on the HTTP response */
+ req->analysers &= (AN_REQ_HTTP_BODY | AN_REQ_FLT_HTTP_HDRS | AN_REQ_FLT_END);
+ req->analysers &= ~AN_REQ_FLT_XFER_DATA;
+ req->analysers |= AN_REQ_HTTP_XFER_BODY;
+
+ req->flags |= CF_SEND_DONTWAIT;
+ s->flags |= SF_ASSIGNED;
+ goto done;
+ }
+
+ /* check whether we have some ACLs set to redirect this request */
+ list_for_each_entry(rule, &px->redirect_rules, list) {
+ if (rule->cond) {
+ int ret;
+
+ ret = acl_exec_cond(rule->cond, px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ if (!ret)
+ continue;
+ }
+ if (!http_apply_redirect_rule(rule, s, txn))
+ goto return_int_err;
+ goto done;
+ }
+
+ /* POST requests may be accompanied with an "Expect: 100-Continue" header.
+ * If this happens, then the data will not come immediately, so we must
+ * send all what we have without waiting. Note that due to the small gain
+ * in waiting for the body of the request, it's easier to simply put the
+ * CF_SEND_DONTWAIT flag any time. It's a one-shot flag so it will remove
+ * itself once used.
+ */
+ req->flags |= CF_SEND_DONTWAIT;
+
+ done: /* done with this analyser, continue with next ones that the calling
+ * points will have set, if any.
+ */
+ req->analyse_exp = TICK_ETERNITY;
+ done_without_exp: /* done with this analyser, but don't reset the analyse_exp. */
+ req->analysers &= ~an_bit;
+ s->current_rule = s->current_rule_list = NULL;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 1;
+
+ tarpit:
+ /* Allow cookie logging
+ */
+ if (s->be->cookie_name || sess->fe->capture_name)
+ http_manage_client_side_cookies(s, req);
+
+ /* When a connection is tarpitted, we use the tarpit timeout,
+ * which may be the same as the connect timeout if unspecified.
+ * If unset, then set it to zero because we really want it to
+ * eventually expire. We build the tarpit as an analyser.
+ */
+ channel_htx_erase(&s->req, htx);
+
+ /* wipe the request out so that we can drop the connection early
+ * if the client closes first.
+ */
+ channel_dont_connect(req);
+
+ req->analysers &= AN_REQ_FLT_END; /* remove switching rules etc... */
+ req->analysers |= AN_REQ_HTTP_TARPIT;
+ req->analyse_exp = tick_add_ifset(now_ms, s->be->timeout.tarpit);
+ if (!req->analyse_exp)
+ req->analyse_exp = tick_add(now_ms, 0);
+ stream_inc_http_err_ctr(s);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_req);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.denied_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_req);
+ goto done_without_exp;
+
+ deny: /* this request was blocked (denied) */
+
+ /* Allow cookie logging
+ */
+ if (s->be->cookie_name || sess->fe->capture_name)
+ http_manage_client_side_cookies(s, req);
+
+ s->logs.tv_request = now;
+ stream_inc_http_err_ctr(s);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_req);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.denied_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_req);
+ goto return_prx_err;
+
+ return_fail_rewrite:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+ /* fall through */
+
+ return_int_err:
+ txn->status = 500;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_INTERNAL;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ goto return_prx_err;
+
+ return_bad_req:
+ txn->status = 400;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+ /* fall through */
+
+ return_prx_err:
+ http_reply_and_close(s, txn->status, http_error_message(s));
+ /* fall through */
+
+ return_prx_cond:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_R;
+
+ req->analysers &= AN_REQ_FLT_END;
+ req->analyse_exp = TICK_ETERNITY;
+ s->current_rule = s->current_rule_list = NULL;
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+
+ return_prx_yield:
+ channel_dont_connect(req);
+ DBG_TRACE_DEVEL("waiting for more data",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+}
+
+/* This function performs all the processing enabled for the current request.
+ * It returns 1 if the processing can continue on next analysers, or zero if it
+ * needs more data, encounters an error, or wants to immediately abort the
+ * request. It relies on buffers flags, and updates s->req.analysers.
+ */
+int http_process_request(struct stream *s, struct channel *req, int an_bit)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct htx *htx;
+ struct connection *cli_conn = objt_conn(strm_sess(s)->origin);
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+
+ /*
+ * Right now, we know that we have processed the entire headers
+ * and that unwanted requests have been filtered out. We can do
+ * whatever we want with the remaining request. Also, now we
+ * may have separate values for ->fe, ->be.
+ */
+ htx = htxbuf(&req->buf);
+
+ /*
+ * 7: Now we can work with the cookies.
+ * Note that doing so might move headers in the request, but
+ * the fields will stay coherent and the URI will not move.
+ * This should only be performed in the backend.
+ */
+ if (s->be->cookie_name || sess->fe->capture_name)
+ http_manage_client_side_cookies(s, req);
+
+ /* 8: Generate unique ID if a "unique-id-format" is defined.
+ *
+ * A unique ID is generated even when it is not sent to ensure that the ID can make use of
+ * fetches only available in the HTTP request processing stage.
+ */
+ if (!LIST_ISEMPTY(&sess->fe->format_unique_id)) {
+ struct ist unique_id = stream_generate_unique_id(s, &sess->fe->format_unique_id);
+
+ if (!isttest(unique_id)) {
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ goto return_int_err;
+ }
+
+ /* send unique ID if a "unique-id-header" is defined */
+ if (isttest(sess->fe->header_unique_id) &&
+ unlikely(!http_add_header(htx, sess->fe->header_unique_id, unique_id)))
+ goto return_fail_rewrite;
+ }
+
+ /*
+ * 9: add X-Forwarded-For if either the frontend or the backend
+ * asks for it.
+ */
+ if ((sess->fe->options | s->be->options) & PR_O_FWDFOR) {
+ const struct sockaddr_storage *src = sc_src(s->scf);
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ struct ist hdr = isttest(s->be->fwdfor_hdr_name) ? s->be->fwdfor_hdr_name : sess->fe->fwdfor_hdr_name;
+
+ if (!((sess->fe->options | s->be->options) & PR_O_FF_ALWAYS) &&
+ http_find_header(htx, hdr, &ctx, 0)) {
+ /* The header is set to be added only if none is present
+ * and we found it, so don't do anything.
+ */
+ }
+ else if (src && src->ss_family == AF_INET) {
+ /* Add an X-Forwarded-For header unless the source IP is
+ * in the 'except' network range.
+ */
+ if (ipcmp2net(src, &sess->fe->except_xff_net) &&
+ ipcmp2net(src, &s->be->except_xff_net)) {
+ unsigned char *pn = (unsigned char *)&((struct sockaddr_in *)src)->sin_addr;
+
+ /* Note: we rely on the backend to get the header name to be used for
+ * x-forwarded-for, because the header is really meant for the backends.
+ * However, if the backend did not specify any option, we have to rely
+ * on the frontend's header name.
+ */
+ chunk_printf(&trash, "%d.%d.%d.%d", pn[0], pn[1], pn[2], pn[3]);
+ if (unlikely(!http_add_header(htx, hdr, ist2(trash.area, trash.data))))
+ goto return_fail_rewrite;
+ }
+ }
+ else if (src && src->ss_family == AF_INET6) {
+ /* Add an X-Forwarded-For header unless the source IP is
+ * in the 'except' network range.
+ */
+ if (ipcmp2net(src, &sess->fe->except_xff_net) &&
+ ipcmp2net(src, &s->be->except_xff_net)) {
+ char pn[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6,
+ (const void *)&((struct sockaddr_in6 *)(src))->sin6_addr,
+ pn, sizeof(pn));
+
+ /* Note: we rely on the backend to get the header name to be used for
+ * x-forwarded-for, because the header is really meant for the backends.
+ * However, if the backend did not specify any option, we have to rely
+ * on the frontend's header name.
+ */
+ chunk_printf(&trash, "%s", pn);
+ if (unlikely(!http_add_header(htx, hdr, ist2(trash.area, trash.data))))
+ goto return_fail_rewrite;
+ }
+ }
+ }
+
+ /*
+ * 10: add X-Original-To if either the frontend or the backend
+ * asks for it.
+ */
+ if ((sess->fe->options | s->be->options) & PR_O_ORGTO) {
+ const struct sockaddr_storage *dst = sc_dst(s->scf);
+ struct ist hdr = isttest(s->be->orgto_hdr_name) ? s->be->orgto_hdr_name : sess->fe->orgto_hdr_name;
+
+ if (dst && dst->ss_family == AF_INET) {
+ /* Add an X-Original-To header unless the destination IP is
+ * in the 'except' network range.
+ */
+ if (ipcmp2net(dst, &sess->fe->except_xot_net) &&
+ ipcmp2net(dst, &s->be->except_xot_net)) {
+ unsigned char *pn = (unsigned char *)&((struct sockaddr_in *)dst)->sin_addr;
+
+ /* Note: we rely on the backend to get the header name to be used for
+ * x-original-to, because the header is really meant for the backends.
+ * However, if the backend did not specify any option, we have to rely
+ * on the frontend's header name.
+ */
+ chunk_printf(&trash, "%d.%d.%d.%d", pn[0], pn[1], pn[2], pn[3]);
+ if (unlikely(!http_add_header(htx, hdr, ist2(trash.area, trash.data))))
+ goto return_fail_rewrite;
+ }
+ }
+ else if (dst && dst->ss_family == AF_INET6) {
+ /* Add an X-Original-To header unless the source IP is
+ * in the 'except' network range.
+ */
+ if (ipcmp2net(dst, &sess->fe->except_xot_net) &&
+ ipcmp2net(dst, &s->be->except_xot_net)) {
+ char pn[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6,
+ (const void *)&((struct sockaddr_in6 *)dst)->sin6_addr,
+ pn, sizeof(pn));
+
+ /* Note: we rely on the backend to get the header name to be used for
+ * x-forwarded-for, because the header is really meant for the backends.
+ * However, if the backend did not specify any option, we have to rely
+ * on the frontend's header name.
+ */
+ chunk_printf(&trash, "%s", pn);
+ if (unlikely(!http_add_header(htx, hdr, ist2(trash.area, trash.data))))
+ goto return_fail_rewrite;
+ }
+ }
+ }
+
+ /* Filter the request headers if there are filters attached to the
+ * stream.
+ */
+ if (HAS_FILTERS(s))
+ req->analysers |= AN_REQ_FLT_HTTP_HDRS;
+
+ /* If we have no server assigned yet and we're balancing on url_param
+ * with a POST request, we may be interested in checking the body for
+ * that parameter. This will be done in another analyser.
+ */
+ if (!(s->flags & (SF_ASSIGNED|SF_DIRECT)) &&
+ s->txn->meth == HTTP_METH_POST &&
+ (s->be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_PH) {
+ channel_dont_connect(req);
+ req->analysers |= AN_REQ_HTTP_BODY;
+ }
+
+ req->analysers &= ~AN_REQ_FLT_XFER_DATA;
+ req->analysers |= AN_REQ_HTTP_XFER_BODY;
+
+ /* We expect some data from the client. Unless we know for sure
+ * we already have a full request, we have to re-enable quick-ack
+ * in case we previously disabled it, otherwise we might cause
+ * the client to delay further data.
+ */
+ if ((sess->listener && (sess->listener->options & LI_O_NOQUICKACK)) && !(htx->flags & HTX_FL_EOM))
+ conn_set_quickack(cli_conn, 1);
+
+ /*************************************************************
+ * OK, that's finished for the headers. We have done what we *
+ * could. Let's switch to the DATA state. *
+ ************************************************************/
+ req->analyse_exp = TICK_ETERNITY;
+ req->analysers &= ~an_bit;
+
+ s->logs.tv_request = now;
+ /* OK let's go on with the BODY now */
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 1;
+
+ return_fail_rewrite:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+ /* fall through */
+
+ return_int_err:
+ txn->status = 500;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_INTERNAL;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+
+ http_reply_and_close(s, txn->status, http_error_message(s));
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_R;
+
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+}
+
+/* This function is an analyser which processes the HTTP tarpit. It always
+ * returns zero, at the beginning because it prevents any other processing
+ * from occurring, and at the end because it terminates the request.
+ */
+int http_process_tarpit(struct stream *s, struct channel *req, int an_bit)
+{
+ struct http_txn *txn = s->txn;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, &txn->req);
+ /* This connection is being tarpitted. The CLIENT side has
+ * already set the connect expiration date to the right
+ * timeout. We just have to check that the client is still
+ * there and that the timeout has not expired.
+ */
+ channel_dont_connect(req);
+ if ((req->flags & (CF_SHUTR|CF_READ_ERROR)) == 0 &&
+ !tick_is_expired(req->analyse_exp, now_ms)) {
+ /* Be sure to drain all data from the request channel */
+ channel_htx_erase(req, htxbuf(&req->buf));
+ DBG_TRACE_DEVEL("waiting for tarpit timeout expiry",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+ }
+
+
+ /* We will set the queue timer to the time spent, just for
+ * logging purposes. We fake a 500 server error, so that the
+ * attacker will not suspect his connection has been tarpitted.
+ * It will not cause trouble to the logs because we can exclude
+ * the tarpitted connections by filtering on the 'PT' status flags.
+ */
+ s->logs.t_queue = tv_ms_elapsed(&s->logs.tv_accept, &now);
+
+ http_reply_and_close(s, txn->status, (!(req->flags & CF_READ_ERROR) ? http_error_message(s) : NULL));
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_T;
+
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+}
+
+/* This function is an analyser which waits for the HTTP request body. It waits
+ * for either the buffer to be full, or the full advertised contents to have
+ * reached the buffer. It must only be called after the standard HTTP request
+ * processing has occurred, because it expects the request to be parsed and will
+ * look for the Expect header. It may send a 100-Continue interim response. It
+ * takes in input any state starting from HTTP_MSG_BODY and leaves with one of
+ * HTTP_MSG_CHK_SIZE, HTTP_MSG_DATA or HTTP_MSG_TRAILERS. It returns zero if it
+ * needs to read more data, or 1 once it has completed its analysis.
+ */
+int http_wait_for_request_body(struct stream *s, struct channel *req, int an_bit)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, &s->txn->req);
+
+
+ switch (http_wait_for_msg_body(s, req, s->be->timeout.httpreq, 0)) {
+ case HTTP_RULE_RES_CONT:
+ goto http_end;
+ case HTTP_RULE_RES_YIELD:
+ goto missing_data_or_waiting;
+ case HTTP_RULE_RES_BADREQ:
+ goto return_bad_req;
+ case HTTP_RULE_RES_ERROR:
+ goto return_int_err;
+ case HTTP_RULE_RES_ABRT:
+ goto return_prx_cond;
+ default:
+ goto return_int_err;
+ }
+
+ http_end:
+ /* The situation will not evolve, so let's give up on the analysis. */
+ s->logs.tv_request = now; /* update the request timer to reflect full request */
+ req->analysers &= ~an_bit;
+ req->analyse_exp = TICK_ETERNITY;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 1;
+
+ missing_data_or_waiting:
+ channel_dont_connect(req);
+ DBG_TRACE_DEVEL("waiting for more data",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+
+ return_int_err:
+ txn->status = 500;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_INTERNAL;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ goto return_prx_err;
+
+ return_bad_req: /* let's centralize all bad requests */
+ txn->status = 400;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+ /* fall through */
+
+ return_prx_err:
+ http_reply_and_close(s, txn->status, http_error_message(s));
+ /* fall through */
+
+ return_prx_cond:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_R;
+
+ req->analysers &= AN_REQ_FLT_END;
+ req->analyse_exp = TICK_ETERNITY;
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+}
+
+/* This function is an analyser which forwards request body (including chunk
+ * sizes if any). It is called as soon as we must forward, even if we forward
+ * zero byte. The only situation where it must not be called is when we're in
+ * tunnel mode and we want to forward till the close. It's used both to forward
+ * remaining data and to resync after end of body. It expects the msg_state to
+ * be between MSG_BODY and MSG_DONE (inclusive). It returns zero if it needs to
+ * read more data, or 1 once we can go on with next request or end the stream.
+ * When in MSG_DATA or MSG_TRAILERS, it will automatically forward chunk_len
+ * bytes of pending data + the headers if not already done.
+ */
+int http_request_forward_body(struct stream *s, struct channel *req, int an_bit)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->req;
+ struct htx *htx;
+ short status = 0;
+ int ret;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, msg);
+
+ htx = htxbuf(&req->buf);
+
+ if (htx->flags & HTX_FL_PARSING_ERROR)
+ goto return_bad_req;
+ if (htx->flags & HTX_FL_PROCESSING_ERROR)
+ goto return_int_err;
+
+ if ((req->flags & (CF_READ_ERROR|CF_READ_TIMEOUT|CF_WRITE_ERROR|CF_WRITE_TIMEOUT)) ||
+ ((req->flags & CF_SHUTW) && (req->to_forward || co_data(req)))) {
+ /* Output closed while we were sending data. We must abort and
+ * wake the other side up.
+ *
+ * If we have finished to send the request and the response is
+ * still in progress, don't catch write error on the request
+ * side if it is in fact a read error on the server side.
+ */
+ if (msg->msg_state == HTTP_MSG_DONE && (s->res.flags & CF_READ_ERROR) && s->res.analysers)
+ return 0;
+
+ /* Don't abort yet if we had L7 retries activated and it
+ * was a write error, we may recover.
+ */
+ if (!(req->flags & (CF_READ_ERROR | CF_READ_TIMEOUT)) &&
+ (txn->flags & TX_L7_RETRY)) {
+ DBG_TRACE_DEVEL("leaving on L7 retry",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+ }
+ msg->msg_state = HTTP_MSG_ERROR;
+ http_end_request(s);
+ http_end_response(s);
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 1;
+ }
+
+ /* Note that we don't have to send 100-continue back because we don't
+ * need the data to complete our job, and it's up to the server to
+ * decide whether to return 100, 417 or anything else in return of
+ * an "Expect: 100-continue" header.
+ */
+ if (msg->msg_state == HTTP_MSG_BODY)
+ msg->msg_state = HTTP_MSG_DATA;
+
+ /* in most states, we should abort in case of early close */
+ channel_auto_close(req);
+
+ if (req->to_forward) {
+ if (req->to_forward == CHN_INFINITE_FORWARD) {
+ if (req->flags & CF_EOI)
+ msg->msg_state = HTTP_MSG_ENDING;
+ }
+ else {
+ /* We can't process the buffer's contents yet */
+ req->flags |= CF_WAKE_WRITE;
+ goto missing_data_or_waiting;
+ }
+ }
+
+ if (msg->msg_state >= HTTP_MSG_ENDING)
+ goto ending;
+
+ if (txn->meth == HTTP_METH_CONNECT) {
+ msg->msg_state = HTTP_MSG_ENDING;
+ goto ending;
+ }
+
+ /* Forward input data. We get it by removing all outgoing data not
+ * forwarded yet from HTX data size. If there are some data filters, we
+ * let them decide the amount of data to forward.
+ */
+ if (HAS_REQ_DATA_FILTERS(s)) {
+ ret = flt_http_payload(s, msg, htx->data);
+ if (ret < 0)
+ goto return_bad_req;
+ c_adv(req, ret);
+ }
+ else {
+ c_adv(req, htx->data - co_data(req));
+ if (msg->flags & HTTP_MSGF_XFER_LEN)
+ channel_htx_forward_forever(req, htx);
+ }
+
+ if (htx->data != co_data(req))
+ goto missing_data_or_waiting;
+
+ /* Check if the end-of-message is reached and if so, switch the message
+ * in HTTP_MSG_ENDING state. Then if all data was marked to be
+ * forwarded, set the state to HTTP_MSG_DONE.
+ */
+ if (!(htx->flags & HTX_FL_EOM))
+ goto missing_data_or_waiting;
+
+ msg->msg_state = HTTP_MSG_ENDING;
+
+ ending:
+ req->flags &= ~CF_EXPECT_MORE; /* no more data are expected */
+
+ /* other states, ENDING...TUNNEL */
+ if (msg->msg_state >= HTTP_MSG_DONE)
+ goto done;
+
+ if (HAS_REQ_DATA_FILTERS(s)) {
+ ret = flt_http_end(s, msg);
+ if (ret <= 0) {
+ if (!ret)
+ goto missing_data_or_waiting;
+ goto return_bad_req;
+ }
+ }
+
+ if (txn->meth == HTTP_METH_CONNECT)
+ msg->msg_state = HTTP_MSG_TUNNEL;
+ else {
+ msg->msg_state = HTTP_MSG_DONE;
+ req->to_forward = 0;
+ }
+
+ done:
+ /* we don't want to forward closes on DONE except in tunnel mode. */
+ if (!(txn->flags & TX_CON_WANT_TUN))
+ channel_dont_close(req);
+
+ http_end_request(s);
+ if (!(req->analysers & an_bit)) {
+ http_end_response(s);
+ if (unlikely(msg->msg_state == HTTP_MSG_ERROR)) {
+ if (req->flags & CF_SHUTW) {
+ /* request errors are most likely due to the
+ * server aborting the transfer. */
+ goto return_srv_abort;
+ }
+ goto return_bad_req;
+ }
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 1;
+ }
+
+ /* If "option abortonclose" is set on the backend, we want to monitor
+ * the client's connection and forward any shutdown notification to the
+ * server, which will decide whether to close or to go on processing the
+ * request. We only do that in tunnel mode, and not in other modes since
+ * it can be abused to exhaust source ports. */
+ if (s->be->options & PR_O_ABRT_CLOSE) {
+ channel_auto_read(req);
+ if ((req->flags & (CF_SHUTR|CF_READ_NULL)) && !(txn->flags & TX_CON_WANT_TUN))
+ s->scb->flags |= SC_FL_NOLINGER;
+ channel_auto_close(req);
+ }
+ else if (s->txn->meth == HTTP_METH_POST) {
+ /* POST requests may require to read extra CRLF sent by broken
+ * browsers and which could cause an RST to be sent upon close
+ * on some systems (eg: Linux). */
+ channel_auto_read(req);
+ }
+ DBG_TRACE_DEVEL("waiting for the end of the HTTP txn",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+
+ missing_data_or_waiting:
+ /* stop waiting for data if the input is closed before the end */
+ if (msg->msg_state < HTTP_MSG_ENDING && req->flags & CF_SHUTR)
+ goto return_cli_abort;
+
+ waiting:
+ /* waiting for the last bits to leave the buffer */
+ if (req->flags & CF_SHUTW)
+ goto return_srv_abort;
+
+ /* When TE: chunked is used, we need to get there again to parse remaining
+ * chunks even if the client has closed, so we don't want to set CF_DONTCLOSE.
+ * And when content-length is used, we never want to let the possible
+ * shutdown be forwarded to the other side, as the state machine will
+ * take care of it once the client responds. It's also important to
+ * prevent TIME_WAITs from accumulating on the backend side, and for
+ * HTTP/2 where the last frame comes with a shutdown.
+ */
+ if (msg->flags & HTTP_MSGF_XFER_LEN)
+ channel_dont_close(req);
+
+ /* We know that more data are expected, but we couldn't send more that
+ * what we did. So we always set the CF_EXPECT_MORE flag so that the
+ * system knows it must not set a PUSH on this first part. Interactive
+ * modes are already handled by the stream sock layer. We must not do
+ * this in content-length mode because it could present the MSG_MORE
+ * flag with the last block of forwarded data, which would cause an
+ * additional delay to be observed by the receiver.
+ */
+ if (HAS_REQ_DATA_FILTERS(s))
+ req->flags |= CF_EXPECT_MORE;
+
+ DBG_TRACE_DEVEL("waiting for more data to forward",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+
+ return_cli_abort:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.cli_aborts);
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->cli_aborts);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.cli_aborts);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_CLICL;
+ status = 400;
+ goto return_prx_cond;
+
+ return_srv_abort:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.srv_aborts);
+ _HA_ATOMIC_INC(&s->be->be_counters.srv_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->srv_aborts);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.srv_aborts);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_SRVCL;
+ status = 502;
+ goto return_prx_cond;
+
+ return_int_err:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_INTERNAL;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.internal_errors);
+ status = 500;
+ goto return_prx_cond;
+
+ return_bad_req:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+ status = 400;
+ /* fall through */
+
+ return_prx_cond:
+ if (txn->status > 0) {
+ /* Note: we don't send any error if some data were already sent */
+ http_reply_and_close(s, txn->status, NULL);
+ } else {
+ txn->status = status;
+ http_reply_and_close(s, txn->status, http_error_message(s));
+ }
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= ((txn->rsp.msg_state < HTTP_MSG_ERROR) ? SF_FINST_H : SF_FINST_D);
+ DBG_TRACE_DEVEL("leaving on error ",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+}
+
+/* Reset the stream and the backend stream connector to a situation suitable for attemption connection */
+/* Returns 0 if we can attempt to retry, -1 otherwise */
+static __inline int do_l7_retry(struct stream *s, struct stconn *sc)
+{
+ struct channel *req, *res;
+ int co_data;
+
+ if (s->conn_retries >= s->be->conn_retries)
+ return -1;
+ s->conn_retries++;
+ if (objt_server(s->target)) {
+ if (s->flags & SF_CURR_SESS) {
+ s->flags &= ~SF_CURR_SESS;
+ _HA_ATOMIC_DEC(&__objt_server(s->target)->cur_sess);
+ }
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.retries);
+ }
+ _HA_ATOMIC_INC(&s->be->be_counters.retries);
+
+ req = &s->req;
+ res = &s->res;
+ /* Remove any write error from the request, and read error from the response */
+ req->flags &= ~(CF_WRITE_ERROR | CF_WRITE_TIMEOUT | CF_SHUTW | CF_SHUTW_NOW);
+ res->flags &= ~(CF_READ_ERROR | CF_READ_TIMEOUT | CF_SHUTR | CF_EOI | CF_READ_NULL | CF_SHUTR_NOW);
+ res->analysers &= AN_RES_FLT_END;
+ s->conn_err_type = STRM_ET_NONE;
+ s->flags &= ~(SF_CONN_EXP | SF_ERR_MASK | SF_FINST_MASK);
+ s->conn_exp = TICK_ETERNITY;
+ stream_choose_redispatch(s);
+ res->rex = TICK_ETERNITY;
+ res->to_forward = 0;
+ res->analyse_exp = TICK_ETERNITY;
+ res->total = 0;
+
+ if (sc_reset_endp(s->scb) < 0) {
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_INTERNAL;
+ return -1;
+ }
+
+ b_free(&req->buf);
+ /* Swap the L7 buffer with the channel buffer */
+ /* We know we stored the co_data as b_data, so get it there */
+ co_data = b_data(&s->txn->l7_buffer);
+ b_set_data(&s->txn->l7_buffer, b_size(&s->txn->l7_buffer));
+ b_xfer(&req->buf, &s->txn->l7_buffer, b_data(&s->txn->l7_buffer));
+ co_set_data(req, co_data);
+
+ DBG_TRACE_DEVEL("perform a L7 retry", STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, s->txn);
+
+ b_reset(&res->buf);
+ co_set_data(res, 0);
+ return 0;
+}
+
+/* This stream analyser waits for a complete HTTP response. It returns 1 if the
+ * processing can continue on next analysers, or zero if it either needs more
+ * data or wants to immediately abort the response (eg: timeout, error, ...). It
+ * is tied to AN_RES_WAIT_HTTP and may may remove itself from s->res.analysers
+ * when it has nothing left to do, and may remove any analyser when it wants to
+ * abort.
+ */
+int http_wait_for_response(struct stream *s, struct channel *rep, int an_bit)
+{
+ /*
+ * We will analyze a complete HTTP response to check the its syntax.
+ *
+ * Once the start line and all headers are received, we may perform a
+ * capture of the error (if any), and we will set a few fields. We also
+ * logging and finally headers capture.
+ */
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->rsp;
+ struct htx *htx;
+ struct connection *srv_conn;
+ struct htx_sl *sl;
+ int n;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, msg);
+
+ htx = htxbuf(&rep->buf);
+
+ /* Parsing errors are caught here */
+ if (htx->flags & HTX_FL_PARSING_ERROR)
+ goto return_bad_res;
+ if (htx->flags & HTX_FL_PROCESSING_ERROR)
+ goto return_int_err;
+
+ /*
+ * Now we quickly check if we have found a full valid response.
+ * If not so, we check the FD and buffer states before leaving.
+ * A full response is indicated by the fact that we have seen
+ * the double LF/CRLF, so the state is >= HTTP_MSG_BODY. Invalid
+ * responses are checked first.
+ *
+ * Depending on whether the client is still there or not, we
+ * may send an error response back or not. Note that normally
+ * we should only check for HTTP status there, and check I/O
+ * errors somewhere else.
+ */
+ next_one:
+ if (unlikely(htx_is_empty(htx) || htx->first == -1)) {
+ /* 1: have we encountered a read error ? */
+ if (rep->flags & CF_READ_ERROR) {
+ struct connection *conn = sc_conn(s->scb);
+
+
+ if ((txn->flags & TX_L7_RETRY) &&
+ (s->be->retry_type & PR_RE_DISCONNECTED) &&
+ (!conn || conn->err_code != CO_ER_SSL_EARLY_FAILED)) {
+ if (co_data(rep) || do_l7_retry(s, s->scb) == 0)
+ return 0;
+ }
+
+ /* Perform a L7 retry on empty response or because server refuses the early data. */
+ if ((txn->flags & TX_L7_RETRY) &&
+ (s->be->retry_type & PR_RE_EARLY_ERROR) &&
+ conn && conn->err_code == CO_ER_SSL_EARLY_FAILED &&
+ do_l7_retry(s, s->scb) == 0) {
+ DBG_TRACE_DEVEL("leaving on L7 retry",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+ }
+
+ if (txn->flags & TX_NOT_FIRST)
+ goto abort_keep_alive;
+
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target)) {
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_READ_ERROR);
+ }
+
+ /* if the server refused the early data, just send a 425 */
+ if (conn && conn->err_code == CO_ER_SSL_EARLY_FAILED)
+ txn->status = 425;
+ else {
+ txn->status = 502;
+ stream_inc_http_fail_ctr(s);
+ }
+
+ s->scb->flags |= SC_FL_NOLINGER;
+ http_reply_and_close(s, txn->status, http_error_message(s));
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_SRVCL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_H;
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+ }
+
+ /* 2: read timeout : return a 504 to the client. */
+ else if (rep->flags & CF_READ_TIMEOUT) {
+ if ((txn->flags & TX_L7_RETRY) &&
+ (s->be->retry_type & PR_RE_TIMEOUT)) {
+ if (co_data(rep) || do_l7_retry(s, s->scb) == 0) {
+ DBG_TRACE_DEVEL("leaving on L7 retry",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+ }
+ }
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target)) {
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_READ_TIMEOUT);
+ }
+
+ txn->status = 504;
+ stream_inc_http_fail_ctr(s);
+ s->scb->flags |= SC_FL_NOLINGER;
+ http_reply_and_close(s, txn->status, http_error_message(s));
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_SRVTO;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_H;
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+ }
+
+ /* 3: client abort with an abortonclose */
+ else if ((rep->flags & CF_SHUTR) && ((s->req.flags & (CF_SHUTR|CF_SHUTW)) == (CF_SHUTR|CF_SHUTW))) {
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.cli_aborts);
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->cli_aborts);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.cli_aborts);
+
+ txn->status = 400;
+ http_reply_and_close(s, txn->status, http_error_message(s));
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_CLICL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_H;
+
+ /* process_stream() will take care of the error */
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+ }
+
+ /* 4: close from server, capture the response if the server has started to respond */
+ else if (rep->flags & CF_SHUTR) {
+ if ((txn->flags & TX_L7_RETRY) &&
+ (s->be->retry_type & PR_RE_DISCONNECTED)) {
+ if (co_data(rep) || do_l7_retry(s, s->scb) == 0) {
+ DBG_TRACE_DEVEL("leaving on L7 retry",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+ }
+ }
+
+ if (txn->flags & TX_NOT_FIRST)
+ goto abort_keep_alive;
+
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target)) {
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_BROKEN_PIPE);
+ }
+
+ txn->status = 502;
+ stream_inc_http_fail_ctr(s);
+ s->scb->flags |= SC_FL_NOLINGER;
+ http_reply_and_close(s, txn->status, http_error_message(s));
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_SRVCL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_H;
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+ }
+
+ /* 5: write error to client (we don't send any message then) */
+ else if (rep->flags & CF_WRITE_ERROR) {
+ if (txn->flags & TX_NOT_FIRST)
+ goto abort_keep_alive;
+
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+ rep->analysers &= AN_RES_FLT_END;
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_CLICL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_H;
+
+ /* process_stream() will take care of the error */
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+ }
+
+ channel_dont_close(rep);
+ rep->flags |= CF_READ_DONTWAIT; /* try to get back here ASAP */
+ DBG_TRACE_DEVEL("waiting for more data",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+ }
+
+ /* More interesting part now : we know that we have a complete
+ * response which at least looks like HTTP. We have an indicator
+ * of each header's length, so we can parse them quickly.
+ */
+ BUG_ON(htx_get_first_type(htx) != HTX_BLK_RES_SL);
+ sl = http_get_stline(htx);
+
+ /* Perform a L7 retry because of the status code */
+ if ((txn->flags & TX_L7_RETRY) &&
+ l7_status_match(s->be, sl->info.res.status) &&
+ do_l7_retry(s, s->scb) == 0) {
+ DBG_TRACE_DEVEL("leaving on L7 retry", STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+ }
+
+ /* Now, L7 buffer is useless, it can be released */
+ b_free(&txn->l7_buffer);
+
+ msg->msg_state = HTTP_MSG_BODY;
+
+
+ /* 0: we might have to print this header in debug mode */
+ if (unlikely((global.mode & MODE_DEBUG) &&
+ (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)))) {
+ int32_t pos;
+
+ http_debug_stline("srvrep", s, sl);
+
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_EOH)
+ break;
+ if (type != HTX_BLK_HDR)
+ continue;
+
+ http_debug_hdr("srvhdr", s,
+ htx_get_blk_name(htx, blk),
+ htx_get_blk_value(htx, blk));
+ }
+ }
+
+ /* 1: get the status code and the version. Also set HTTP flags */
+ txn->status = sl->info.res.status;
+ if (sl->flags & HTX_SL_F_VER_11)
+ msg->flags |= HTTP_MSGF_VER_11;
+ if (sl->flags & HTX_SL_F_XFER_LEN) {
+ msg->flags |= HTTP_MSGF_XFER_LEN;
+ if (sl->flags & HTX_SL_F_CLEN)
+ msg->flags |= HTTP_MSGF_CNT_LEN;
+ else if (sl->flags & HTX_SL_F_CHNK)
+ msg->flags |= HTTP_MSGF_TE_CHNK;
+ }
+ if (sl->flags & HTX_SL_F_BODYLESS)
+ msg->flags |= HTTP_MSGF_BODYLESS;
+ if (sl->flags & HTX_SL_F_CONN_UPG)
+ msg->flags |= HTTP_MSGF_CONN_UPG;
+
+ n = txn->status / 100;
+ if (n < 1 || n > 5)
+ n = 0;
+
+ /* when the client triggers a 4xx from the server, it's most often due
+ * to a missing object or permission. These events should be tracked
+ * because if they happen often, it may indicate a brute force or a
+ * vulnerability scan.
+ */
+ if (n == 4)
+ stream_inc_http_err_ctr(s);
+
+ if (n == 5 && txn->status != 501 && txn->status != 505)
+ stream_inc_http_fail_ctr(s);
+
+ if (objt_server(s->target)) {
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.p.http.rsp[n]);
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.p.http.cum_req);
+ }
+
+ /* Adjust server's health based on status code. Note: status codes 501
+ * and 505 are triggered on demand by client request, so we must not
+ * count them as server failures.
+ */
+ if (objt_server(s->target)) {
+ if (txn->status >= 100 && (txn->status < 500 || txn->status == 501 || txn->status == 505))
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_OK);
+ else
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_STS);
+ }
+
+ /*
+ * We may be facing a 100-continue response, or any other informational
+ * 1xx response which is non-final, in which case this is not the right
+ * response, and we're waiting for the next one. Let's allow this response
+ * to go to the client and wait for the next one. There's an exception for
+ * 101 which is used later in the code to switch protocols.
+ */
+ if (txn->status < 200 &&
+ (txn->status == 100 || txn->status >= 102)) {
+ FLT_STRM_CB(s, flt_http_reset(s, msg));
+ htx->first = channel_htx_fwd_headers(rep, htx);
+ msg->msg_state = HTTP_MSG_RPBEFORE;
+ msg->flags = 0;
+ txn->status = 0;
+ s->logs.t_data = -1; /* was not a response yet */
+ rep->flags |= CF_SEND_DONTWAIT; /* Send ASAP informational messages */
+ goto next_one;
+ }
+
+ /* A 101-switching-protocols must contains a Connection header with the
+ * "upgrade" option and the request too. It means both are agree to
+ * upgrade. It is not so strict because there is no test on the Upgrade
+ * header content. But it is probably stronger enough for now.
+ */
+ if (txn->status == 101 &&
+ (!(txn->req.flags & HTTP_MSGF_CONN_UPG) || !(txn->rsp.flags & HTTP_MSGF_CONN_UPG)))
+ goto return_bad_res;
+
+ /*
+ * 2: check for cacheability.
+ */
+
+ switch (txn->status) {
+ case 200:
+ case 203:
+ case 204:
+ case 206:
+ case 300:
+ case 301:
+ case 404:
+ case 405:
+ case 410:
+ case 414:
+ case 501:
+ break;
+ default:
+ /* RFC7231#6.1:
+ * Responses with status codes that are defined as
+ * cacheable by default (e.g., 200, 203, 204, 206,
+ * 300, 301, 404, 405, 410, 414, and 501 in this
+ * specification) can be reused by a cache with
+ * heuristic expiration unless otherwise indicated
+ * by the method definition or explicit cache
+ * controls [RFC7234]; all other status codes are
+ * not cacheable by default.
+ */
+ txn->flags &= ~(TX_CACHEABLE | TX_CACHE_COOK);
+ break;
+ }
+
+ /*
+ * 3: we may need to capture headers
+ */
+ s->logs.logwait &= ~LW_RESP;
+ if (unlikely((s->logs.logwait & LW_RSPHDR) && s->res_cap))
+ http_capture_headers(htx, s->res_cap, sess->fe->rsp_cap);
+
+ /* Skip parsing if no content length is possible. */
+ if (unlikely((txn->meth == HTTP_METH_CONNECT && txn->status >= 200 && txn->status < 300) ||
+ txn->status == 101)) {
+ /* Either we've established an explicit tunnel, or we're
+ * switching the protocol. In both cases, we're very unlikely
+ * to understand the next protocols. We have to switch to tunnel
+ * mode, so that we transfer the request and responses then let
+ * this protocol pass unmodified. When we later implement specific
+ * parsers for such protocols, we'll want to check the Upgrade
+ * header which contains information about that protocol for
+ * responses with status 101 (eg: see RFC2817 about TLS).
+ */
+ txn->flags |= TX_CON_WANT_TUN;
+ }
+
+ /* check for NTML authentication headers in 401 (WWW-Authenticate) and
+ * 407 (Proxy-Authenticate) responses and set the connection to private
+ */
+ srv_conn = sc_conn(s->scb);
+ if (srv_conn) {
+ struct ist hdr;
+ struct http_hdr_ctx ctx;
+
+ if (txn->status == 401)
+ hdr = ist("WWW-Authenticate");
+ else if (txn->status == 407)
+ hdr = ist("Proxy-Authenticate");
+ else
+ goto end;
+
+ ctx.blk = NULL;
+ while (http_find_header(htx, hdr, &ctx, 0)) {
+ /* If www-authenticate contains "Negotiate", "Nego2", or "NTLM",
+ * possibly followed by blanks and a base64 string, the connection
+ * is private. Since it's a mess to deal with, we only check for
+ * values starting with "NTLM" or "Nego". Note that often multiple
+ * headers are sent by the server there.
+ */
+ if ((ctx.value.len >= 4 && strncasecmp(ctx.value.ptr, "Nego", 4) == 0) ||
+ (ctx.value.len >= 4 && strncasecmp(ctx.value.ptr, "NTLM", 4) == 0)) {
+ sess->flags |= SESS_FL_PREFER_LAST;
+ conn_set_owner(srv_conn, sess, NULL);
+ conn_set_private(srv_conn);
+ /* If it fail now, the same will be done in mux->detach() callback */
+ session_add_conn(srv_conn->owner, srv_conn, srv_conn->target);
+ break;
+ }
+ }
+ }
+
+ end:
+ /* we want to have the response time before we start processing it */
+ s->logs.t_data = tv_ms_elapsed(&s->logs.tv_accept, &now);
+
+ /* end of job, return OK */
+ rep->analysers &= ~an_bit;
+ rep->analyse_exp = TICK_ETERNITY;
+ channel_auto_close(rep);
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 1;
+
+ return_int_err:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.internal_errors);
+ txn->status = 500;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_INTERNAL;
+ goto return_prx_cond;
+
+ return_bad_res:
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target)) {
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_HDRRSP);
+ }
+ if ((s->be->retry_type & PR_RE_JUNK_REQUEST) &&
+ (txn->flags & TX_L7_RETRY) &&
+ do_l7_retry(s, s->scb) == 0) {
+ DBG_TRACE_DEVEL("leaving on L7 retry",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+ }
+ txn->status = 502;
+ stream_inc_http_fail_ctr(s);
+ /* fall through */
+
+ return_prx_cond:
+ http_reply_and_close(s, txn->status, http_error_message(s));
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_H;
+
+ s->scb->flags |= SC_FL_NOLINGER;
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+
+ abort_keep_alive:
+ /* A keep-alive request to the server failed on a network error.
+ * The client is required to retry. We need to close without returning
+ * any other information so that the client retries.
+ */
+ txn->status = 0;
+ s->logs.logwait = 0;
+ s->logs.level = 0;
+ s->res.flags &= ~CF_EXPECT_MORE; /* speed up sending a previous response */
+ http_reply_and_close(s, txn->status, NULL);
+ DBG_TRACE_DEVEL("leaving by closing K/A connection",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+}
+
+/* This function performs all the processing enabled for the current response.
+ * It normally returns 1 unless it wants to break. It relies on buffers flags,
+ * and updates s->res.analysers. It might make sense to explode it into several
+ * other functions. It works like process_request (see indications above).
+ */
+int http_process_res_common(struct stream *s, struct channel *rep, int an_bit, struct proxy *px)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->rsp;
+ struct htx *htx;
+ struct proxy *cur_proxy;
+ enum rule_result ret = HTTP_RULE_RES_CONT;
+
+ if (unlikely(msg->msg_state < HTTP_MSG_BODY)) /* we need more data */
+ return 0;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, msg);
+
+ htx = htxbuf(&rep->buf);
+
+ /* The stats applet needs to adjust the Connection header but we don't
+ * apply any filter there.
+ */
+ if (unlikely(objt_applet(s->target) == &http_stats_applet)) {
+ rep->analysers &= ~an_bit;
+ rep->analyse_exp = TICK_ETERNITY;
+ goto end;
+ }
+
+ /*
+ * We will have to evaluate the filters.
+ * As opposed to version 1.2, now they will be evaluated in the
+ * filters order and not in the header order. This means that
+ * each filter has to be validated among all headers.
+ *
+ * Filters are tried with ->be first, then with ->fe if it is
+ * different from ->be.
+ *
+ * Maybe we are in resume condiion. In this case I choose the
+ * "struct proxy" which contains the rule list matching the resume
+ * pointer. If none of these "struct proxy" match, I initialise
+ * the process with the first one.
+ *
+ * In fact, I check only correspondence between the current list
+ * pointer and the ->fe rule list. If it doesn't match, I initialize
+ * the loop with the ->be.
+ */
+ if (s->current_rule_list == &sess->fe->http_res_rules ||
+ (sess->fe->defpx && s->current_rule_list == &sess->fe->defpx->http_res_rules))
+ cur_proxy = sess->fe;
+ else
+ cur_proxy = s->be;
+
+ while (1) {
+ /* evaluate http-response rules */
+ if (ret == HTTP_RULE_RES_CONT || ret == HTTP_RULE_RES_STOP) {
+ struct list *def_rules, *rules;
+
+ def_rules = ((cur_proxy->defpx && (cur_proxy == s->be || cur_proxy->defpx != s->be->defpx)) ? &cur_proxy->defpx->http_res_rules : NULL);
+ rules = &cur_proxy->http_res_rules;
+
+ ret = http_res_get_intercept_rule(cur_proxy, def_rules, rules, s);
+
+ switch (ret) {
+ case HTTP_RULE_RES_YIELD: /* some data miss, call the function later. */
+ goto return_prx_yield;
+
+ case HTTP_RULE_RES_CONT:
+ case HTTP_RULE_RES_STOP: /* nothing to do */
+ break;
+
+ case HTTP_RULE_RES_DENY: /* deny or tarpit */
+ goto deny;
+
+ case HTTP_RULE_RES_ABRT: /* abort request, response already sent */
+ goto return_prx_cond;
+
+ case HTTP_RULE_RES_DONE: /* OK, but terminate request processing (eg: redirect) */
+ goto done;
+
+ case HTTP_RULE_RES_BADREQ: /* failed with a bad request */
+ goto return_bad_res;
+
+ case HTTP_RULE_RES_ERROR: /* failed with a bad request */
+ goto return_int_err;
+ }
+
+ }
+
+ /* check whether we're already working on the frontend */
+ if (cur_proxy == sess->fe)
+ break;
+ cur_proxy = sess->fe;
+ }
+
+ /* OK that's all we can do for 1xx responses */
+ if (unlikely(txn->status < 200 && txn->status != 101))
+ goto end;
+
+ /*
+ * Now check for a server cookie.
+ */
+ if (s->be->cookie_name || sess->fe->capture_name || (s->be->options & PR_O_CHK_CACHE))
+ http_manage_server_side_cookies(s, rep);
+
+ /*
+ * Check for cache-control or pragma headers if required.
+ */
+ if ((s->be->options & PR_O_CHK_CACHE) || (s->be->ck_opts & PR_CK_NOC))
+ http_check_response_for_cacheability(s, rep);
+
+ /*
+ * Add server cookie in the response if needed
+ */
+ if (objt_server(s->target) && (s->be->ck_opts & PR_CK_INS) &&
+ !((txn->flags & TX_SCK_FOUND) && (s->be->ck_opts & PR_CK_PSV)) &&
+ (!(s->flags & SF_DIRECT) ||
+ ((s->be->cookie_maxidle || txn->cookie_last_date) &&
+ (!txn->cookie_last_date || (txn->cookie_last_date - date.tv_sec) < 0)) ||
+ (s->be->cookie_maxlife && !txn->cookie_first_date) || // set the first_date
+ (!s->be->cookie_maxlife && txn->cookie_first_date)) && // remove the first_date
+ (!(s->be->ck_opts & PR_CK_POST) || (txn->meth == HTTP_METH_POST)) &&
+ !(s->flags & SF_IGNORE_PRST)) {
+ /* the server is known, it's not the one the client requested, or the
+ * cookie's last seen date needs to be refreshed. We have to
+ * insert a set-cookie here, except if we want to insert only on POST
+ * requests and this one isn't. Note that servers which don't have cookies
+ * (eg: some backup servers) will return a full cookie removal request.
+ */
+ if (!__objt_server(s->target)->cookie) {
+ chunk_printf(&trash,
+ "%s=; Expires=Thu, 01-Jan-1970 00:00:01 GMT; path=/",
+ s->be->cookie_name);
+ }
+ else {
+ chunk_printf(&trash, "%s=%s", s->be->cookie_name, __objt_server(s->target)->cookie);
+
+ if (s->be->cookie_maxidle || s->be->cookie_maxlife) {
+ /* emit last_date, which is mandatory */
+ trash.area[trash.data++] = COOKIE_DELIM_DATE;
+ s30tob64((date.tv_sec+3) >> 2,
+ trash.area + trash.data);
+ trash.data += 5;
+
+ if (s->be->cookie_maxlife) {
+ /* emit first_date, which is either the original one or
+ * the current date.
+ */
+ trash.area[trash.data++] = COOKIE_DELIM_DATE;
+ s30tob64(txn->cookie_first_date ?
+ txn->cookie_first_date >> 2 :
+ (date.tv_sec+3) >> 2,
+ trash.area + trash.data);
+ trash.data += 5;
+ }
+ }
+ chunk_appendf(&trash, "; path=/");
+ }
+
+ if (s->be->cookie_domain)
+ chunk_appendf(&trash, "; domain=%s", s->be->cookie_domain);
+
+ if (s->be->ck_opts & PR_CK_HTTPONLY)
+ chunk_appendf(&trash, "; HttpOnly");
+
+ if (s->be->ck_opts & PR_CK_SECURE)
+ chunk_appendf(&trash, "; Secure");
+
+ if (s->be->cookie_attrs)
+ chunk_appendf(&trash, "; %s", s->be->cookie_attrs);
+
+ if (unlikely(!http_add_header(htx, ist("Set-Cookie"), ist2(trash.area, trash.data))))
+ goto return_fail_rewrite;
+
+ txn->flags &= ~TX_SCK_MASK;
+ if (__objt_server(s->target)->cookie && (s->flags & SF_DIRECT))
+ /* the server did not change, only the date was updated */
+ txn->flags |= TX_SCK_UPDATED;
+ else
+ txn->flags |= TX_SCK_INSERTED;
+
+ /* Here, we will tell an eventual cache on the client side that we don't
+ * want it to cache this reply because HTTP/1.0 caches also cache cookies !
+ * Some caches understand the correct form: 'no-cache="set-cookie"', but
+ * others don't (eg: apache <= 1.3.26). So we use 'private' instead.
+ */
+ if ((s->be->ck_opts & PR_CK_NOC) && (txn->flags & TX_CACHEABLE)) {
+
+ txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK;
+
+ if (unlikely(!http_add_header(htx, ist("Cache-control"), ist("private"))))
+ goto return_fail_rewrite;
+ }
+ }
+
+ /*
+ * Check if result will be cacheable with a cookie.
+ * We'll block the response if security checks have caught
+ * nasty things such as a cacheable cookie.
+ */
+ if (((txn->flags & (TX_CACHEABLE | TX_CACHE_COOK | TX_SCK_PRESENT)) ==
+ (TX_CACHEABLE | TX_CACHE_COOK | TX_SCK_PRESENT)) &&
+ (s->be->options & PR_O_CHK_CACHE)) {
+ /* we're in presence of a cacheable response containing
+ * a set-cookie header. We'll block it as requested by
+ * the 'checkcache' option, and send an alert.
+ */
+ ha_alert("Blocking cacheable cookie in response from instance %s, server %s.\n",
+ s->be->id, objt_server(s->target) ? __objt_server(s->target)->id : "<dispatch>");
+ send_log(s->be, LOG_ALERT,
+ "Blocking cacheable cookie in response from instance %s, server %s.\n",
+ s->be->id, objt_server(s->target) ? __objt_server(s->target)->id : "<dispatch>");
+ goto deny;
+ }
+
+ end:
+ /*
+ * Evaluate after-response rules before forwarding the response. rules
+ * from the backend are evaluated first, then one from the frontend if
+ * it differs.
+ */
+ if (!http_eval_after_res_rules(s))
+ goto return_int_err;
+
+ /* Filter the response headers if there are filters attached to the
+ * stream.
+ */
+ if (HAS_FILTERS(s))
+ rep->analysers |= AN_RES_FLT_HTTP_HDRS;
+
+ /* Always enter in the body analyzer */
+ rep->analysers &= ~AN_RES_FLT_XFER_DATA;
+ rep->analysers |= AN_RES_HTTP_XFER_BODY;
+
+ /* if the user wants to log as soon as possible, without counting
+ * bytes from the server, then this is the right moment. We have
+ * to temporarily assign bytes_out to log what we currently have.
+ */
+ if (!LIST_ISEMPTY(&sess->fe->logformat) && !(s->logs.logwait & LW_BYTES)) {
+ s->logs.t_close = s->logs.t_data; /* to get a valid end date */
+ s->logs.bytes_out = htx->data;
+ s->do_log(s);
+ s->logs.bytes_out = 0;
+ }
+
+ done:
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ rep->analysers &= ~an_bit;
+ rep->analyse_exp = TICK_ETERNITY;
+ s->current_rule = s->current_rule_list = NULL;
+ return 1;
+
+ deny:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_resp);
+ _HA_ATOMIC_INC(&s->be->be_counters.denied_resp);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_resp);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.denied_resp);
+ goto return_prx_err;
+
+ return_fail_rewrite:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+ /* fall through */
+
+ return_int_err:
+ txn->status = 500;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_INTERNAL;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.internal_errors);
+ goto return_prx_err;
+
+ return_bad_res:
+ txn->status = 502;
+ stream_inc_http_fail_ctr(s);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target)) {
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_RSP);
+ }
+ /* fall through */
+
+ return_prx_err:
+ http_reply_and_close(s, txn->status, http_error_message(s));
+ /* fall through */
+
+ return_prx_cond:
+ s->logs.t_data = -1; /* was not a valid response */
+ s->scb->flags |= SC_FL_NOLINGER;
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_H;
+
+ rep->analysers &= AN_RES_FLT_END;
+ s->req.analysers &= AN_REQ_FLT_END;
+ rep->analyse_exp = TICK_ETERNITY;
+ s->current_rule = s->current_rule_list = NULL;
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+
+ return_prx_yield:
+ channel_dont_close(rep);
+ DBG_TRACE_DEVEL("waiting for more data",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+}
+
+/* This function is an analyser which forwards response body (including chunk
+ * sizes if any). It is called as soon as we must forward, even if we forward
+ * zero byte. The only situation where it must not be called is when we're in
+ * tunnel mode and we want to forward till the close. It's used both to forward
+ * remaining data and to resync after end of body. It expects the msg_state to
+ * be between MSG_BODY and MSG_DONE (inclusive). It returns zero if it needs to
+ * read more data, or 1 once we can go on with next request or end the stream.
+ *
+ * It is capable of compressing response data both in content-length mode and
+ * in chunked mode. The state machines follows different flows depending on
+ * whether content-length and chunked modes are used, since there are no
+ * trailers in content-length :
+ *
+ * chk-mode cl-mode
+ * ,----- BODY -----.
+ * / \
+ * V size > 0 V chk-mode
+ * .--> SIZE -------------> DATA -------------> CRLF
+ * | | size == 0 | last byte |
+ * | v final crlf v inspected |
+ * | TRAILERS -----------> DONE |
+ * | |
+ * `----------------------------------------------'
+ *
+ * Compression only happens in the DATA state, and must be flushed in final
+ * states (TRAILERS/DONE) or when leaving on missing data. Normal forwarding
+ * is performed at once on final states for all bytes parsed, or when leaving
+ * on missing data.
+ */
+int http_response_forward_body(struct stream *s, struct channel *res, int an_bit)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &s->txn->rsp;
+ struct htx *htx;
+ int ret;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, msg);
+
+ htx = htxbuf(&res->buf);
+
+ if (htx->flags & HTX_FL_PARSING_ERROR)
+ goto return_bad_res;
+ if (htx->flags & HTX_FL_PROCESSING_ERROR)
+ goto return_int_err;
+
+ if ((res->flags & (CF_READ_ERROR|CF_READ_TIMEOUT|CF_WRITE_ERROR|CF_WRITE_TIMEOUT)) ||
+ ((res->flags & CF_SHUTW) && (res->to_forward || co_data(res)))) {
+ /* Output closed while we were sending data. We must abort and
+ * wake the other side up.
+ */
+ msg->msg_state = HTTP_MSG_ERROR;
+ http_end_response(s);
+ http_end_request(s);
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 1;
+ }
+
+ if (msg->msg_state == HTTP_MSG_BODY)
+ msg->msg_state = HTTP_MSG_DATA;
+
+ /* in most states, we should abort in case of early close */
+ channel_auto_close(res);
+
+ if (res->to_forward) {
+ if (res->to_forward == CHN_INFINITE_FORWARD) {
+ if (res->flags & CF_EOI)
+ msg->msg_state = HTTP_MSG_ENDING;
+ }
+ else {
+ /* We can't process the buffer's contents yet */
+ res->flags |= CF_WAKE_WRITE;
+ goto missing_data_or_waiting;
+ }
+ }
+
+ if (msg->msg_state >= HTTP_MSG_ENDING)
+ goto ending;
+
+ if ((txn->meth == HTTP_METH_CONNECT && txn->status >= 200 && txn->status < 300) || txn->status == 101 ||
+ (!(msg->flags & HTTP_MSGF_XFER_LEN) && !HAS_RSP_DATA_FILTERS(s))) {
+ msg->msg_state = HTTP_MSG_ENDING;
+ goto ending;
+ }
+
+ /* Forward input data. We get it by removing all outgoing data not
+ * forwarded yet from HTX data size. If there are some data filters, we
+ * let them decide the amount of data to forward.
+ */
+ if (HAS_RSP_DATA_FILTERS(s)) {
+ ret = flt_http_payload(s, msg, htx->data);
+ if (ret < 0)
+ goto return_bad_res;
+ c_adv(res, ret);
+ }
+ else {
+ c_adv(res, htx->data - co_data(res));
+ if (msg->flags & HTTP_MSGF_XFER_LEN)
+ channel_htx_forward_forever(res, htx);
+ }
+
+ if (htx->data != co_data(res))
+ goto missing_data_or_waiting;
+
+ if (!(msg->flags & HTTP_MSGF_XFER_LEN) && res->flags & CF_SHUTR) {
+ msg->msg_state = HTTP_MSG_ENDING;
+ goto ending;
+ }
+
+ /* Check if the end-of-message is reached and if so, switch the message
+ * in HTTP_MSG_ENDING state. Then if all data was marked to be
+ * forwarded, set the state to HTTP_MSG_DONE.
+ */
+ if (!(htx->flags & HTX_FL_EOM))
+ goto missing_data_or_waiting;
+
+ msg->msg_state = HTTP_MSG_ENDING;
+
+ ending:
+ res->flags &= ~CF_EXPECT_MORE; /* no more data are expected */
+
+ /* other states, ENDING...TUNNEL */
+ if (msg->msg_state >= HTTP_MSG_DONE)
+ goto done;
+
+ if (HAS_RSP_DATA_FILTERS(s)) {
+ ret = flt_http_end(s, msg);
+ if (ret <= 0) {
+ if (!ret)
+ goto missing_data_or_waiting;
+ goto return_bad_res;
+ }
+ }
+
+ if ((txn->meth == HTTP_METH_CONNECT && txn->status >= 200 && txn->status < 300) || txn->status == 101 ||
+ !(msg->flags & HTTP_MSGF_XFER_LEN)) {
+ msg->msg_state = HTTP_MSG_TUNNEL;
+ goto ending;
+ }
+ else {
+ msg->msg_state = HTTP_MSG_DONE;
+ res->to_forward = 0;
+ }
+
+ done:
+
+ channel_dont_close(res);
+
+ http_end_response(s);
+ if (!(res->analysers & an_bit)) {
+ http_end_request(s);
+ if (unlikely(msg->msg_state == HTTP_MSG_ERROR)) {
+ if (res->flags & CF_SHUTW) {
+ /* response errors are most likely due to the
+ * client aborting the transfer. */
+ goto return_cli_abort;
+ }
+ goto return_bad_res;
+ }
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 1;
+ }
+ DBG_TRACE_DEVEL("waiting for the end of the HTTP txn",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+
+ missing_data_or_waiting:
+ if (res->flags & CF_SHUTW)
+ goto return_cli_abort;
+
+ /* stop waiting for data if the input is closed before the end. If the
+ * client side was already closed, it means that the client has aborted,
+ * so we don't want to count this as a server abort. Otherwise it's a
+ * server abort.
+ */
+ if (msg->msg_state < HTTP_MSG_ENDING && res->flags & CF_SHUTR) {
+ if ((s->req.flags & (CF_SHUTR|CF_SHUTW)) == (CF_SHUTR|CF_SHUTW))
+ goto return_cli_abort;
+ /* If we have some pending data, we continue the processing */
+ if (htx_is_empty(htx))
+ goto return_srv_abort;
+ }
+
+ /* When TE: chunked is used, we need to get there again to parse
+ * remaining chunks even if the server has closed, so we don't want to
+ * set CF_DONTCLOSE. Similarly when there is a content-leng or if there
+ * are filters registered on the stream, we don't want to forward a
+ * close
+ */
+ if ((msg->flags & HTTP_MSGF_XFER_LEN) || HAS_RSP_DATA_FILTERS(s))
+ channel_dont_close(res);
+
+ /* We know that more data are expected, but we couldn't send more that
+ * what we did. So we always set the CF_EXPECT_MORE flag so that the
+ * system knows it must not set a PUSH on this first part. Interactive
+ * modes are already handled by the stream sock layer. We must not do
+ * this in content-length mode because it could present the MSG_MORE
+ * flag with the last block of forwarded data, which would cause an
+ * additional delay to be observed by the receiver.
+ */
+ if (HAS_RSP_DATA_FILTERS(s))
+ res->flags |= CF_EXPECT_MORE;
+
+ /* the stream handler will take care of timeouts and errors */
+ DBG_TRACE_DEVEL("waiting for more data to forward",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+
+ return_srv_abort:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.srv_aborts);
+ _HA_ATOMIC_INC(&s->be->be_counters.srv_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->srv_aborts);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.srv_aborts);
+ stream_inc_http_fail_ctr(s);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_SRVCL;
+ goto return_error;
+
+ return_cli_abort:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.cli_aborts);
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->cli_aborts);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.cli_aborts);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_CLICL;
+ goto return_error;
+
+ return_int_err:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.internal_errors);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_INTERNAL;
+ goto return_error;
+
+ return_bad_res:
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target)) {
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_RSP);
+ }
+ stream_inc_http_fail_ctr(s);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_SRVCL;
+ /* fall through */
+
+ return_error:
+ /* don't send any error message as we're in the body */
+ http_reply_and_close(s, txn->status, NULL);
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_D;
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+}
+
+/* Perform an HTTP redirect based on the information in <rule>. The function
+ * returns zero in case of an irrecoverable error such as too large a request
+ * to build a valid response, 1 in case of successful redirect (hence the rule
+ * is final), or 2 if the rule has to be silently skipped.
+ */
+int http_apply_redirect_rule(struct redirect_rule *rule, struct stream *s, struct http_txn *txn)
+{
+ struct channel *req = &s->req;
+ struct channel *res = &s->res;
+ struct htx *htx;
+ struct htx_sl *sl;
+ struct buffer *chunk;
+ struct ist status, reason, location;
+ unsigned int flags;
+ int ret = 1, close = 0; /* Try to keep the connection alive byt default */
+
+ chunk = alloc_trash_chunk();
+ if (!chunk) {
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ goto fail;
+ }
+
+ /*
+ * Create the location
+ */
+ htx = htxbuf(&req->buf);
+ switch(rule->type) {
+ case REDIRECT_TYPE_SCHEME: {
+ struct http_hdr_ctx ctx;
+ struct ist path, host;
+ struct http_uri_parser parser;
+
+ host = ist("");
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("Host"), &ctx, 0))
+ host = ctx.value;
+
+ sl = http_get_stline(htx);
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
+ /* build message using path */
+ if (isttest(path)) {
+ if (rule->flags & REDIRECT_FLAG_DROP_QS) {
+ int qs = 0;
+ while (qs < path.len) {
+ if (*(path.ptr + qs) == '?') {
+ path.len = qs;
+ break;
+ }
+ qs++;
+ }
+ }
+ }
+ else
+ path = ist("/");
+
+ if (rule->rdr_str) { /* this is an old "redirect" rule */
+ /* add scheme */
+ if (!chunk_memcat(chunk, rule->rdr_str, rule->rdr_len))
+ goto fail;
+ }
+ else {
+ /* add scheme with executing log format */
+ chunk->data += build_logline(s, chunk->area + chunk->data,
+ chunk->size - chunk->data,
+ &rule->rdr_fmt);
+ }
+ /* add "://" + host + path */
+ if (!chunk_memcat(chunk, "://", 3) ||
+ !chunk_memcat(chunk, host.ptr, host.len) ||
+ !chunk_memcat(chunk, path.ptr, path.len))
+ goto fail;
+
+ /* append a slash at the end of the location if needed and missing */
+ if (chunk->data && chunk->area[chunk->data - 1] != '/' &&
+ (rule->flags & REDIRECT_FLAG_APPEND_SLASH)) {
+ if (chunk->data + 1 >= chunk->size)
+ goto fail;
+ chunk->area[chunk->data++] = '/';
+ }
+ break;
+ }
+
+ case REDIRECT_TYPE_PREFIX: {
+ struct ist path;
+ struct http_uri_parser parser;
+
+ sl = http_get_stline(htx);
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
+ /* build message using path */
+ if (isttest(path)) {
+ if (rule->flags & REDIRECT_FLAG_DROP_QS) {
+ int qs = 0;
+ while (qs < path.len) {
+ if (*(path.ptr + qs) == '?') {
+ path.len = qs;
+ break;
+ }
+ qs++;
+ }
+ }
+ }
+ else
+ path = ist("/");
+
+ if (rule->rdr_str) { /* this is an old "redirect" rule */
+ /* add prefix. Note that if prefix == "/", we don't want to
+ * add anything, otherwise it makes it hard for the user to
+ * configure a self-redirection.
+ */
+ if (rule->rdr_len != 1 || *rule->rdr_str != '/') {
+ if (!chunk_memcat(chunk, rule->rdr_str, rule->rdr_len))
+ goto fail;
+ }
+ }
+ else {
+ /* add prefix with executing log format */
+ chunk->data += build_logline(s, chunk->area + chunk->data,
+ chunk->size - chunk->data,
+ &rule->rdr_fmt);
+ }
+
+ /* add path */
+ if (!chunk_memcat(chunk, path.ptr, path.len))
+ goto fail;
+
+ /* append a slash at the end of the location if needed and missing */
+ if (chunk->data && chunk->area[chunk->data - 1] != '/' &&
+ (rule->flags & REDIRECT_FLAG_APPEND_SLASH)) {
+ if (chunk->data + 1 >= chunk->size)
+ goto fail;
+ chunk->area[chunk->data++] = '/';
+ }
+ break;
+ }
+ case REDIRECT_TYPE_LOCATION:
+ default:
+ if (rule->rdr_str) { /* this is an old "redirect" rule */
+ /* add location */
+ if (!chunk_memcat(chunk, rule->rdr_str, rule->rdr_len))
+ goto fail;
+ }
+ else {
+ /* add location with executing log format */
+ int len = build_logline(s, chunk->area + chunk->data,
+ chunk->size - chunk->data,
+ &rule->rdr_fmt);
+ if (!len && rule->flags & REDIRECT_FLAG_IGNORE_EMPTY) {
+ ret = 2;
+ goto out;
+ }
+
+ chunk->data += len;
+ }
+ break;
+ }
+ location = ist2(chunk->area, chunk->data);
+
+ /*
+ * Create the 30x response
+ */
+ switch (rule->code) {
+ case 308:
+ status = ist("308");
+ reason = ist("Permanent Redirect");
+ break;
+ case 307:
+ status = ist("307");
+ reason = ist("Temporary Redirect");
+ break;
+ case 303:
+ status = ist("303");
+ reason = ist("See Other");
+ break;
+ case 301:
+ status = ist("301");
+ reason = ist("Moved Permanently");
+ break;
+ case 302:
+ default:
+ status = ist("302");
+ reason = ist("Found");
+ break;
+ }
+
+ if (!(txn->req.flags & HTTP_MSGF_BODYLESS) && txn->req.msg_state != HTTP_MSG_DONE)
+ close = 1;
+
+ htx = htx_from_buf(&res->buf);
+ /* Trim any possible response */
+ channel_htx_truncate(&s->res, htx);
+ flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_LEN|HTX_SL_F_BODYLESS);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), status, reason);
+ if (!sl)
+ goto fail;
+ sl->info.res.status = rule->code;
+ s->txn->status = rule->code;
+
+ if (close && !htx_add_header(htx, ist("Connection"), ist("close")))
+ goto fail;
+
+ if (!htx_add_header(htx, ist("Content-length"), ist("0")) ||
+ !htx_add_header(htx, ist("Location"), location))
+ goto fail;
+
+ if (rule->code == 302 || rule->code == 303 || rule->code == 307) {
+ if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache")))
+ goto fail;
+ }
+
+ if (rule->cookie_len) {
+ if (!htx_add_header(htx, ist("Set-Cookie"), ist2(rule->cookie_str, rule->cookie_len)))
+ goto fail;
+ }
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto fail;
+
+ htx->flags |= HTX_FL_EOM;
+ htx_to_buf(htx, &res->buf);
+ if (!http_forward_proxy_resp(s, 1))
+ goto fail;
+
+ if (rule->flags & REDIRECT_FLAG_FROM_REQ) {
+ /* let's log the request time */
+ s->logs.tv_request = now;
+ req->analysers &= AN_REQ_FLT_END;
+
+ if (s->sess->fe == s->be) /* report it if the request was intercepted by the frontend */
+ _HA_ATOMIC_INC(&s->sess->fe->fe_counters.intercepted_req);
+ }
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_LOCAL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= ((rule->flags & REDIRECT_FLAG_FROM_REQ) ? SF_FINST_R : SF_FINST_H);
+
+ out:
+ free_trash_chunk(chunk);
+ return ret;
+
+ fail:
+ /* If an error occurred, remove the incomplete HTTP response from the
+ * buffer */
+ channel_htx_truncate(res, htxbuf(&res->buf));
+ ret = 0;
+ goto out;
+}
+
+/* This function filters the request header names to only allow [0-9a-zA-Z-]
+ * characters. Depending on the proxy configuration, headers with a name not
+ * matching this charset are removed or the request is rejected with a
+ * 403-Forbidden response if such name are found. It returns HTTP_RULE_RES_CONT
+ * to continue the request processing or HTTP_RULE_RES_DENY if the request is
+ * rejected.
+ */
+static enum rule_result http_req_restrict_header_names(struct stream *s, struct htx *htx, struct proxy *px)
+{
+ struct htx_blk *blk;
+ enum rule_result rule_ret = HTTP_RULE_RES_CONT;
+
+ blk = htx_get_first_blk(htx);
+ while (blk) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_HDR) {
+ struct ist n = htx_get_blk_name(htx, blk);
+ int i, end = istlen(n);
+
+ for (i = 0; i < end; i++) {
+ if (!isalnum((unsigned char)n.ptr[i]) && n.ptr[i] != '-') {
+ break;
+ }
+ }
+
+ if (i < end) {
+ /* Disallowed character found - block the request or remove the header */
+ if (px->options2 & PR_O2_RSTRICT_REQ_HDR_NAMES_BLK)
+ goto block;
+ blk = htx_remove_blk(htx, blk);
+ continue;
+ }
+ }
+ if (type == HTX_BLK_EOH)
+ break;
+
+ blk = htx_get_next_blk(htx, blk);
+ }
+ out:
+ return rule_ret;
+ block:
+ /* Block the request returning a 403-Forbidden response */
+ s->txn->status = 403;
+ rule_ret = HTTP_RULE_RES_DENY;
+ goto out;
+}
+
+/* Replace all headers matching the name <name>. The header value is replaced if
+ * it matches the regex <re>. <str> is used for the replacement. If <full> is
+ * set to 1, the full-line is matched and replaced. Otherwise, comma-separated
+ * values are evaluated one by one. It returns 0 on success and -1 on error.
+ */
+int http_replace_hdrs(struct stream* s, struct htx *htx, struct ist name,
+ const char *str, struct my_regex *re, int full)
+{
+ struct http_hdr_ctx ctx;
+ struct buffer *output = get_trash_chunk();
+
+ ctx.blk = NULL;
+ while (http_find_header(htx, name, &ctx, full)) {
+ if (!regex_exec_match2(re, ctx.value.ptr, ctx.value.len, MAX_MATCH, pmatch, 0))
+ continue;
+
+ output->data = exp_replace(output->area, output->size, ctx.value.ptr, str, pmatch);
+ if (output->data == -1)
+ return -1;
+ if (!http_replace_header_value(htx, &ctx, ist2(output->area, output->data)))
+ return -1;
+ }
+ return 0;
+}
+
+/* This function executes one of the set-{method,path,query,uri} actions. It
+ * takes the string from the variable 'replace' with length 'len', then modifies
+ * the relevant part of the request line accordingly. Then it updates various
+ * pointers to the next elements which were moved, and the total buffer length.
+ * It finds the action to be performed in p[2], previously filled by function
+ * parse_set_req_line(). It returns 0 in case of success, -1 in case of internal
+ * error, though this can be revisited when this code is finally exploited.
+ *
+ * 'action' can be '0' to replace method, '1' to replace path, '2' to replace
+ * query string, 3 to replace uri or 4 to replace the path+query.
+ *
+ * In query string case, the mark question '?' must be set at the start of the
+ * string by the caller, event if the replacement query string is empty.
+ */
+int http_req_replace_stline(int action, const char *replace, int len,
+ struct proxy *px, struct stream *s)
+{
+ struct htx *htx = htxbuf(&s->req.buf);
+
+ switch (action) {
+ case 0: // method
+ if (!http_replace_req_meth(htx, ist2(replace, len)))
+ return -1;
+ break;
+
+ case 1: // path
+ if (!http_replace_req_path(htx, ist2(replace, len), 0))
+ return -1;
+ break;
+
+ case 2: // query
+ if (!http_replace_req_query(htx, ist2(replace, len)))
+ return -1;
+ break;
+
+ case 3: // uri
+ if (!http_replace_req_uri(htx, ist2(replace, len)))
+ return -1;
+ break;
+
+ case 4: // path + query
+ if (!http_replace_req_path(htx, ist2(replace, len), 1))
+ return -1;
+ break;
+
+ default:
+ return -1;
+ }
+ return 0;
+}
+
+/* This function replace the HTTP status code and the associated message. The
+ * variable <status> contains the new status code. This function never fails. It
+ * returns 0 in case of success, -1 in case of internal error.
+ */
+int http_res_set_status(unsigned int status, struct ist reason, struct stream *s)
+{
+ struct htx *htx = htxbuf(&s->res.buf);
+ char *res;
+
+ chunk_reset(&trash);
+ res = ultoa_o(status, trash.area, trash.size);
+ trash.data = res - trash.area;
+
+ /* Do we have a custom reason format string? */
+ if (!isttest(reason)) {
+ const char *str = http_get_reason(status);
+ reason = ist(str);
+ }
+
+ if (!http_replace_res_status(htx, ist2(trash.area, trash.data), reason))
+ return -1;
+ s->txn->status = status;
+ return 0;
+}
+
+/* Executes the http-request rules <rules> for stream <s>, proxy <px> and
+ * transaction <txn>. Returns the verdict of the first rule that prevents
+ * further processing of the request (auth, deny, ...), and defaults to
+ * HTTP_RULE_RES_STOP if it executed all rules or stopped on an allow, or
+ * HTTP_RULE_RES_CONT if the last rule was reached. It may set the TX_CLTARPIT
+ * on txn->flags if it encounters a tarpit rule. If <deny_status> is not NULL
+ * and a deny/tarpit rule is matched, it will be filled with this rule's deny
+ * status.
+ */
+static enum rule_result http_req_get_intercept_rule(struct proxy *px, struct list *def_rules,
+ struct list *rules, struct stream *s)
+{
+ struct session *sess = strm_sess(s);
+ struct http_txn *txn = s->txn;
+ struct act_rule *rule;
+ enum rule_result rule_ret = HTTP_RULE_RES_CONT;
+ int act_opts = 0;
+
+ /* If "the current_rule_list" match the executed rule list, we are in
+ * resume condition. If a resume is needed it is always in the action
+ * and never in the ACL or converters. In this case, we initialise the
+ * current rule, and go to the action execution point.
+ */
+ if (s->current_rule) {
+ rule = s->current_rule;
+ s->current_rule = NULL;
+ if (s->current_rule_list == rules || (def_rules && s->current_rule_list == def_rules))
+ goto resume_execution;
+ }
+ s->current_rule_list = ((!def_rules || s->current_rule_list == def_rules) ? rules : def_rules);
+
+ restart:
+ /* start the ruleset evaluation in strict mode */
+ txn->req.flags &= ~HTTP_MSGF_SOFT_RW;
+
+ list_for_each_entry(rule, s->current_rule_list, list) {
+ /* check optional condition */
+ if (rule->cond) {
+ int ret;
+
+ ret = acl_exec_cond(rule->cond, px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+
+ if (!ret) /* condition not matched */
+ continue;
+ }
+
+ act_opts |= ACT_OPT_FIRST;
+ resume_execution:
+ if (rule->kw->flags & KWF_EXPERIMENTAL)
+ mark_tainted(TAINTED_ACTION_EXP_EXECUTED);
+
+ /* Always call the action function if defined */
+ if (rule->action_ptr) {
+ if ((s->req.flags & CF_READ_ERROR) ||
+ ((s->req.flags & (CF_SHUTR|CF_READ_NULL)) &&
+ (px->options & PR_O_ABRT_CLOSE)))
+ act_opts |= ACT_OPT_FINAL;
+
+ switch (rule->action_ptr(rule, px, sess, s, act_opts)) {
+ case ACT_RET_CONT:
+ break;
+ case ACT_RET_STOP:
+ rule_ret = HTTP_RULE_RES_STOP;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_YIELD:
+ s->current_rule = rule;
+ rule_ret = HTTP_RULE_RES_YIELD;
+ goto end;
+ case ACT_RET_ERR:
+ rule_ret = HTTP_RULE_RES_ERROR;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_DONE:
+ rule_ret = HTTP_RULE_RES_DONE;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_DENY:
+ if (txn->status == -1)
+ txn->status = 403;
+ rule_ret = HTTP_RULE_RES_DENY;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_ABRT:
+ rule_ret = HTTP_RULE_RES_ABRT;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_INV:
+ rule_ret = HTTP_RULE_RES_BADREQ;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ }
+ continue; /* eval the next rule */
+ }
+
+ /* If not action function defined, check for known actions */
+ switch (rule->action) {
+ case ACT_ACTION_ALLOW:
+ rule_ret = HTTP_RULE_RES_STOP;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+
+ case ACT_ACTION_DENY:
+ txn->status = rule->arg.http_reply->status;
+ txn->http_reply = rule->arg.http_reply;
+ rule_ret = HTTP_RULE_RES_DENY;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+
+ case ACT_HTTP_REQ_TARPIT:
+ txn->flags |= TX_CLTARPIT;
+ txn->status = rule->arg.http_reply->status;
+ txn->http_reply = rule->arg.http_reply;
+ rule_ret = HTTP_RULE_RES_DENY;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+
+ case ACT_HTTP_REDIR: {
+ int ret = http_apply_redirect_rule(rule->arg.redir, s, txn);
+
+ if (ret == 2) // 2 == skip
+ break;
+
+ rule_ret = ret ? HTTP_RULE_RES_ABRT : HTTP_RULE_RES_ERROR;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ }
+
+ /* other flags exists, but normally, they never be matched. */
+ default:
+ break;
+ }
+ }
+
+ if (def_rules && s->current_rule_list == def_rules) {
+ s->current_rule_list = rules;
+ goto restart;
+ }
+
+ end:
+ /* if the ruleset evaluation is finished reset the strict mode */
+ if (rule_ret != HTTP_RULE_RES_YIELD)
+ txn->req.flags &= ~HTTP_MSGF_SOFT_RW;
+
+ /* we reached the end of the rules, nothing to report */
+ return rule_ret;
+}
+
+/* Executes the http-response rules <rules> for stream <s> and proxy <px>. It
+ * returns one of 5 possible statuses: HTTP_RULE_RES_CONT, HTTP_RULE_RES_STOP,
+ * HTTP_RULE_RES_DONE, HTTP_RULE_RES_YIELD, or HTTP_RULE_RES_BADREQ. If *CONT
+ * is returned, the process can continue the evaluation of next rule list. If
+ * *STOP or *DONE is returned, the process must stop the evaluation. If *BADREQ
+ * is returned, it means the operation could not be processed and a server error
+ * must be returned. If *YIELD is returned, the caller must call again the
+ * function with the same context.
+ */
+static enum rule_result http_res_get_intercept_rule(struct proxy *px, struct list *def_rules,
+ struct list *rules, struct stream *s)
+{
+ struct session *sess = strm_sess(s);
+ struct http_txn *txn = s->txn;
+ struct act_rule *rule;
+ enum rule_result rule_ret = HTTP_RULE_RES_CONT;
+ int act_opts = 0;
+
+ /* If "the current_rule_list" match the executed rule list, we are in
+ * resume condition. If a resume is needed it is always in the action
+ * and never in the ACL or converters. In this case, we initialise the
+ * current rule, and go to the action execution point.
+ */
+ if (s->current_rule) {
+ rule = s->current_rule;
+ s->current_rule = NULL;
+ if (s->current_rule_list == rules || (def_rules && s->current_rule_list == def_rules))
+ goto resume_execution;
+ }
+ s->current_rule_list = ((!def_rules || s->current_rule_list == def_rules) ? rules : def_rules);
+
+ restart:
+
+ /* start the ruleset evaluation in strict mode */
+ txn->rsp.flags &= ~HTTP_MSGF_SOFT_RW;
+
+ list_for_each_entry(rule, s->current_rule_list, list) {
+ /* check optional condition */
+ if (rule->cond) {
+ int ret;
+
+ ret = acl_exec_cond(rule->cond, px, sess, s, SMP_OPT_DIR_RES|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+
+ if (!ret) /* condition not matched */
+ continue;
+ }
+
+ act_opts |= ACT_OPT_FIRST;
+resume_execution:
+ if (rule->kw->flags & KWF_EXPERIMENTAL)
+ mark_tainted(TAINTED_ACTION_EXP_EXECUTED);
+
+ /* Always call the action function if defined */
+ if (rule->action_ptr) {
+ if ((s->req.flags & CF_READ_ERROR) ||
+ ((s->req.flags & (CF_SHUTR|CF_READ_NULL)) &&
+ (px->options & PR_O_ABRT_CLOSE)))
+ act_opts |= ACT_OPT_FINAL;
+
+ switch (rule->action_ptr(rule, px, sess, s, act_opts)) {
+ case ACT_RET_CONT:
+ break;
+ case ACT_RET_STOP:
+ rule_ret = HTTP_RULE_RES_STOP;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_YIELD:
+ s->current_rule = rule;
+ rule_ret = HTTP_RULE_RES_YIELD;
+ goto end;
+ case ACT_RET_ERR:
+ rule_ret = HTTP_RULE_RES_ERROR;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_DONE:
+ rule_ret = HTTP_RULE_RES_DONE;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_DENY:
+ if (txn->status == -1)
+ txn->status = 502;
+ rule_ret = HTTP_RULE_RES_DENY;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_ABRT:
+ rule_ret = HTTP_RULE_RES_ABRT;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_INV:
+ rule_ret = HTTP_RULE_RES_BADREQ;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ }
+ continue; /* eval the next rule */
+ }
+
+ /* If not action function defined, check for known actions */
+ switch (rule->action) {
+ case ACT_ACTION_ALLOW:
+ rule_ret = HTTP_RULE_RES_STOP; /* "allow" rules are OK */
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+
+ case ACT_ACTION_DENY:
+ txn->status = rule->arg.http_reply->status;
+ txn->http_reply = rule->arg.http_reply;
+ rule_ret = HTTP_RULE_RES_DENY;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+
+ case ACT_HTTP_REDIR: {
+ int ret = http_apply_redirect_rule(rule->arg.redir, s, txn);
+
+ if (ret == 2) // 2 == skip
+ break;
+
+ rule_ret = ret ? HTTP_RULE_RES_ABRT : HTTP_RULE_RES_ERROR;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ }
+ /* other flags exists, but normally, they never be matched. */
+ default:
+ break;
+ }
+ }
+
+ if (def_rules && s->current_rule_list == def_rules) {
+ s->current_rule_list = rules;
+ goto restart;
+ }
+
+ end:
+ /* if the ruleset evaluation is finished reset the strict mode */
+ if (rule_ret != HTTP_RULE_RES_YIELD)
+ txn->rsp.flags &= ~HTTP_MSGF_SOFT_RW;
+
+ /* we reached the end of the rules, nothing to report */
+ return rule_ret;
+}
+
+/* Executes backend and frontend http-after-response rules for the stream <s>,
+ * in that order. it return 1 on success and 0 on error. It is the caller
+ * responsibility to catch error or ignore it. If it catches it, this function
+ * may be called a second time, for the internal error.
+ */
+int http_eval_after_res_rules(struct stream *s)
+{
+ struct list *def_rules, *rules;
+ struct session *sess = s->sess;
+ enum rule_result ret = HTTP_RULE_RES_CONT;
+
+ /* Eval after-response ruleset only if the reply is not const */
+ if (s->txn->flags & TX_CONST_REPLY)
+ goto end;
+
+ /* prune the request variables if not already done and swap to the response variables. */
+ if (s->vars_reqres.scope != SCOPE_RES) {
+ if (!LIST_ISEMPTY(&s->vars_reqres.head))
+ vars_prune(&s->vars_reqres, s->sess, s);
+ vars_init_head(&s->vars_reqres, SCOPE_RES);
+ }
+
+ def_rules = (s->be->defpx ? &s->be->defpx->http_after_res_rules : NULL);
+ rules = &s->be->http_after_res_rules;
+
+ ret = http_res_get_intercept_rule(s->be, def_rules, rules, s);
+ if ((ret == HTTP_RULE_RES_CONT || ret == HTTP_RULE_RES_STOP) && sess->fe != s->be) {
+ def_rules = ((sess->fe->defpx && sess->fe->defpx != s->be->defpx) ? &sess->fe->defpx->http_after_res_rules : NULL);
+ rules = &sess->fe->http_after_res_rules;
+ ret = http_res_get_intercept_rule(sess->fe, def_rules, rules, s);
+ }
+
+ end:
+ /* All other codes than CONTINUE, STOP or DONE are forbidden */
+ return (ret == HTTP_RULE_RES_CONT || ret == HTTP_RULE_RES_STOP || ret == HTTP_RULE_RES_DONE);
+}
+
+/*
+ * Manage client-side cookie. It can impact performance by about 2% so it is
+ * desirable to call it only when needed. This code is quite complex because
+ * of the multiple very crappy and ambiguous syntaxes we have to support. it
+ * highly recommended not to touch this part without a good reason !
+ */
+static void http_manage_client_side_cookies(struct stream *s, struct channel *req)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct htx *htx;
+ struct http_hdr_ctx ctx;
+ char *hdr_beg, *hdr_end, *del_from;
+ char *prev, *att_beg, *att_end, *equal, *val_beg, *val_end, *next;
+ int preserve_hdr;
+
+ htx = htxbuf(&req->buf);
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("Cookie"), &ctx, 1)) {
+ int is_first = 1;
+ del_from = NULL; /* nothing to be deleted */
+ preserve_hdr = 0; /* assume we may kill the whole header */
+
+ /* Now look for cookies. Conforming to RFC2109, we have to support
+ * attributes whose name begin with a '$', and associate them with
+ * the right cookie, if we want to delete this cookie.
+ * So there are 3 cases for each cookie read :
+ * 1) it's a special attribute, beginning with a '$' : ignore it.
+ * 2) it's a server id cookie that we *MAY* want to delete : save
+ * some pointers on it (last semi-colon, beginning of cookie...)
+ * 3) it's an application cookie : we *MAY* have to delete a previous
+ * "special" cookie.
+ * At the end of loop, if a "special" cookie remains, we may have to
+ * remove it. If no application cookie persists in the header, we
+ * *MUST* delete it.
+ *
+ * Note: RFC2965 is unclear about the processing of spaces around
+ * the equal sign in the ATTR=VALUE form. A careful inspection of
+ * the RFC explicitly allows spaces before it, and not within the
+ * tokens (attrs or values). An inspection of RFC2109 allows that
+ * too but section 10.1.3 lets one think that spaces may be allowed
+ * after the equal sign too, resulting in some (rare) buggy
+ * implementations trying to do that. So let's do what servers do.
+ * Latest ietf draft forbids spaces all around. Also, earlier RFCs
+ * allowed quoted strings in values, with any possible character
+ * after a backslash, including control chars and delimiters, which
+ * causes parsing to become ambiguous. Browsers also allow spaces
+ * within values even without quotes.
+ *
+ * We have to keep multiple pointers in order to support cookie
+ * removal at the beginning, middle or end of header without
+ * corrupting the header. All of these headers are valid :
+ *
+ * hdr_beg hdr_end
+ * | |
+ * v |
+ * NAME1=VALUE1;NAME2=VALUE2;NAME3=VALUE3 |
+ * NAME1=VALUE1;NAME2_ONLY ;NAME3=VALUE3 v
+ * NAME1 = VALUE 1 ; NAME2 = VALUE2 ; NAME3 = VALUE3
+ * | | | | | | |
+ * | | | | | | |
+ * | | | | | | +--> next
+ * | | | | | +----> val_end
+ * | | | | +-----------> val_beg
+ * | | | +--------------> equal
+ * | | +----------------> att_end
+ * | +---------------------> att_beg
+ * +--------------------------> prev
+ *
+ */
+ hdr_beg = ctx.value.ptr;
+ hdr_end = hdr_beg + ctx.value.len;
+ for (prev = hdr_beg; prev < hdr_end; prev = next) {
+ /* Iterate through all cookies on this line */
+
+ /* find att_beg */
+ att_beg = prev;
+ if (!is_first)
+ att_beg++;
+ is_first = 0;
+
+ while (att_beg < hdr_end && HTTP_IS_SPHT(*att_beg))
+ att_beg++;
+
+ /* find att_end : this is the first character after the last non
+ * space before the equal. It may be equal to hdr_end.
+ */
+ equal = att_end = att_beg;
+ while (equal < hdr_end) {
+ if (*equal == '=' || *equal == ',' || *equal == ';')
+ break;
+ if (HTTP_IS_SPHT(*equal++))
+ continue;
+ att_end = equal;
+ }
+
+ /* here, <equal> points to '=', a delimiter or the end. <att_end>
+ * is between <att_beg> and <equal>, both may be identical.
+ */
+ /* look for end of cookie if there is an equal sign */
+ if (equal < hdr_end && *equal == '=') {
+ /* look for the beginning of the value */
+ val_beg = equal + 1;
+ while (val_beg < hdr_end && HTTP_IS_SPHT(*val_beg))
+ val_beg++;
+
+ /* find the end of the value, respecting quotes */
+ next = http_find_cookie_value_end(val_beg, hdr_end);
+
+ /* make val_end point to the first white space or delimiter after the value */
+ val_end = next;
+ while (val_end > val_beg && HTTP_IS_SPHT(*(val_end - 1)))
+ val_end--;
+ }
+ else
+ val_beg = val_end = next = equal;
+
+ /* We have nothing to do with attributes beginning with
+ * '$'. However, they will automatically be removed if a
+ * header before them is removed, since they're supposed
+ * to be linked together.
+ */
+ if (*att_beg == '$')
+ continue;
+
+ /* Ignore cookies with no equal sign */
+ if (equal == next) {
+ /* This is not our cookie, so we must preserve it. But if we already
+ * scheduled another cookie for removal, we cannot remove the
+ * complete header, but we can remove the previous block itself.
+ */
+ preserve_hdr = 1;
+ if (del_from != NULL) {
+ int delta = http_del_hdr_value(hdr_beg, hdr_end, &del_from, prev);
+ val_end += delta;
+ next += delta;
+ hdr_end += delta;
+ prev = del_from;
+ del_from = NULL;
+ }
+ continue;
+ }
+
+ /* if there are spaces around the equal sign, we need to
+ * strip them otherwise we'll get trouble for cookie captures,
+ * or even for rewrites. Since this happens extremely rarely,
+ * it does not hurt performance.
+ */
+ if (unlikely(att_end != equal || val_beg > equal + 1)) {
+ int stripped_before = 0;
+ int stripped_after = 0;
+
+ if (att_end != equal) {
+ memmove(att_end, equal, hdr_end - equal);
+ stripped_before = (att_end - equal);
+ equal += stripped_before;
+ val_beg += stripped_before;
+ }
+
+ if (val_beg > equal + 1) {
+ memmove(equal + 1, val_beg, hdr_end + stripped_before - val_beg);
+ stripped_after = (equal + 1) - val_beg;
+ val_beg += stripped_after;
+ stripped_before += stripped_after;
+ }
+
+ val_end += stripped_before;
+ next += stripped_before;
+ hdr_end += stripped_before;
+ }
+ /* now everything is as on the diagram above */
+
+ /* First, let's see if we want to capture this cookie. We check
+ * that we don't already have a client side cookie, because we
+ * can only capture one. Also as an optimisation, we ignore
+ * cookies shorter than the declared name.
+ */
+ if (sess->fe->capture_name != NULL && txn->cli_cookie == NULL &&
+ (val_end - att_beg >= sess->fe->capture_namelen) &&
+ memcmp(att_beg, sess->fe->capture_name, sess->fe->capture_namelen) == 0) {
+ int log_len = val_end - att_beg;
+
+ if ((txn->cli_cookie = pool_alloc(pool_head_capture)) == NULL) {
+ ha_alert("HTTP logging : out of memory.\n");
+ } else {
+ if (log_len > sess->fe->capture_len)
+ log_len = sess->fe->capture_len;
+ memcpy(txn->cli_cookie, att_beg, log_len);
+ txn->cli_cookie[log_len] = 0;
+ }
+ }
+
+ /* Persistence cookies in passive, rewrite or insert mode have the
+ * following form :
+ *
+ * Cookie: NAME=SRV[|<lastseen>[|<firstseen>]]
+ *
+ * For cookies in prefix mode, the form is :
+ *
+ * Cookie: NAME=SRV~VALUE
+ */
+ if ((att_end - att_beg == s->be->cookie_len) && (s->be->cookie_name != NULL) &&
+ (memcmp(att_beg, s->be->cookie_name, att_end - att_beg) == 0)) {
+ struct server *srv = s->be->srv;
+ char *delim;
+
+ /* if we're in cookie prefix mode, we'll search the delimiter so that we
+ * have the server ID between val_beg and delim, and the original cookie between
+ * delim+1 and val_end. Otherwise, delim==val_end :
+ *
+ * hdr_beg
+ * |
+ * v
+ * NAME=SRV; # in all but prefix modes
+ * NAME=SRV~OPAQUE ; # in prefix mode
+ * || || | |+-> next
+ * || || | +--> val_end
+ * || || +---------> delim
+ * || |+------------> val_beg
+ * || +-------------> att_end = equal
+ * |+-----------------> att_beg
+ * +------------------> prev
+ *
+ */
+ if (s->be->ck_opts & PR_CK_PFX) {
+ for (delim = val_beg; delim < val_end; delim++)
+ if (*delim == COOKIE_DELIM)
+ break;
+ }
+ else {
+ char *vbar1;
+ delim = val_end;
+ /* Now check if the cookie contains a date field, which would
+ * appear after a vertical bar ('|') just after the server name
+ * and before the delimiter.
+ */
+ vbar1 = memchr(val_beg, COOKIE_DELIM_DATE, val_end - val_beg);
+ if (vbar1) {
+ /* OK, so left of the bar is the server's cookie and
+ * right is the last seen date. It is a base64 encoded
+ * 30-bit value representing the UNIX date since the
+ * epoch in 4-second quantities.
+ */
+ int val;
+ delim = vbar1++;
+ if (val_end - vbar1 >= 5) {
+ val = b64tos30(vbar1);
+ if (val > 0)
+ txn->cookie_last_date = val << 2;
+ }
+ /* look for a second vertical bar */
+ vbar1 = memchr(vbar1, COOKIE_DELIM_DATE, val_end - vbar1);
+ if (vbar1 && (val_end - vbar1 > 5)) {
+ val = b64tos30(vbar1 + 1);
+ if (val > 0)
+ txn->cookie_first_date = val << 2;
+ }
+ }
+ }
+
+ /* if the cookie has an expiration date and the proxy wants to check
+ * it, then we do that now. We first check if the cookie is too old,
+ * then only if it has expired. We detect strict overflow because the
+ * time resolution here is not great (4 seconds). Cookies with dates
+ * in the future are ignored if their offset is beyond one day. This
+ * allows an admin to fix timezone issues without expiring everyone
+ * and at the same time avoids keeping unwanted side effects for too
+ * long.
+ */
+ if (txn->cookie_first_date && s->be->cookie_maxlife &&
+ (((signed)(date.tv_sec - txn->cookie_first_date) > (signed)s->be->cookie_maxlife) ||
+ ((signed)(txn->cookie_first_date - date.tv_sec) > 86400))) {
+ txn->flags &= ~TX_CK_MASK;
+ txn->flags |= TX_CK_OLD;
+ delim = val_beg; // let's pretend we have not found the cookie
+ txn->cookie_first_date = 0;
+ txn->cookie_last_date = 0;
+ }
+ else if (txn->cookie_last_date && s->be->cookie_maxidle &&
+ (((signed)(date.tv_sec - txn->cookie_last_date) > (signed)s->be->cookie_maxidle) ||
+ ((signed)(txn->cookie_last_date - date.tv_sec) > 86400))) {
+ txn->flags &= ~TX_CK_MASK;
+ txn->flags |= TX_CK_EXPIRED;
+ delim = val_beg; // let's pretend we have not found the cookie
+ txn->cookie_first_date = 0;
+ txn->cookie_last_date = 0;
+ }
+
+ /* Here, we'll look for the first running server which supports the cookie.
+ * This allows to share a same cookie between several servers, for example
+ * to dedicate backup servers to specific servers only.
+ * However, to prevent clients from sticking to cookie-less backup server
+ * when they have incidentely learned an empty cookie, we simply ignore
+ * empty cookies and mark them as invalid.
+ * The same behaviour is applied when persistence must be ignored.
+ */
+ if ((delim == val_beg) || (s->flags & (SF_IGNORE_PRST | SF_ASSIGNED)))
+ srv = NULL;
+
+ while (srv) {
+ if (srv->cookie && (srv->cklen == delim - val_beg) &&
+ !memcmp(val_beg, srv->cookie, delim - val_beg)) {
+ if ((srv->cur_state != SRV_ST_STOPPED) ||
+ (s->be->options & PR_O_PERSIST) ||
+ (s->flags & SF_FORCE_PRST)) {
+ /* we found the server and we can use it */
+ txn->flags &= ~TX_CK_MASK;
+ txn->flags |= (srv->cur_state != SRV_ST_STOPPED) ? TX_CK_VALID : TX_CK_DOWN;
+ s->flags |= SF_DIRECT | SF_ASSIGNED;
+ s->target = &srv->obj_type;
+ break;
+ } else {
+ /* we found a server, but it's down,
+ * mark it as such and go on in case
+ * another one is available.
+ */
+ txn->flags &= ~TX_CK_MASK;
+ txn->flags |= TX_CK_DOWN;
+ }
+ }
+ srv = srv->next;
+ }
+
+ if (!srv && !(txn->flags & (TX_CK_DOWN|TX_CK_EXPIRED|TX_CK_OLD))) {
+ /* no server matched this cookie or we deliberately skipped it */
+ txn->flags &= ~TX_CK_MASK;
+ if ((s->flags & (SF_IGNORE_PRST | SF_ASSIGNED)))
+ txn->flags |= TX_CK_UNUSED;
+ else
+ txn->flags |= TX_CK_INVALID;
+ }
+
+ /* depending on the cookie mode, we may have to either :
+ * - delete the complete cookie if we're in insert+indirect mode, so that
+ * the server never sees it ;
+ * - remove the server id from the cookie value, and tag the cookie as an
+ * application cookie so that it does not get accidentally removed later,
+ * if we're in cookie prefix mode
+ */
+ if ((s->be->ck_opts & PR_CK_PFX) && (delim != val_end)) {
+ int delta; /* negative */
+
+ memmove(val_beg, delim + 1, hdr_end - (delim + 1));
+ delta = val_beg - (delim + 1);
+ val_end += delta;
+ next += delta;
+ hdr_end += delta;
+ del_from = NULL;
+ preserve_hdr = 1; /* we want to keep this cookie */
+ }
+ else if (del_from == NULL &&
+ (s->be->ck_opts & (PR_CK_INS | PR_CK_IND)) == (PR_CK_INS | PR_CK_IND)) {
+ del_from = prev;
+ }
+ }
+ else {
+ /* This is not our cookie, so we must preserve it. But if we already
+ * scheduled another cookie for removal, we cannot remove the
+ * complete header, but we can remove the previous block itself.
+ */
+ preserve_hdr = 1;
+
+ if (del_from != NULL) {
+ int delta = http_del_hdr_value(hdr_beg, hdr_end, &del_from, prev);
+ if (att_beg >= del_from)
+ att_beg += delta;
+ if (att_end >= del_from)
+ att_end += delta;
+ val_beg += delta;
+ val_end += delta;
+ next += delta;
+ hdr_end += delta;
+ prev = del_from;
+ del_from = NULL;
+ }
+ }
+
+ } /* for each cookie */
+
+
+ /* There are no more cookies on this line.
+ * We may still have one (or several) marked for deletion at the
+ * end of the line. We must do this now in two ways :
+ * - if some cookies must be preserved, we only delete from the
+ * mark to the end of line ;
+ * - if nothing needs to be preserved, simply delete the whole header
+ */
+ if (del_from) {
+ hdr_end = (preserve_hdr ? del_from : hdr_beg);
+ }
+ if ((hdr_end - hdr_beg) != ctx.value.len) {
+ if (hdr_beg != hdr_end)
+ htx_change_blk_value_len(htx, ctx.blk, hdr_end - hdr_beg);
+ else
+ http_remove_header(htx, &ctx);
+ }
+ } /* for each "Cookie header */
+}
+
+/*
+ * Manage server-side cookies. It can impact performance by about 2% so it is
+ * desirable to call it only when needed. This function is also used when we
+ * just need to know if there is a cookie (eg: for check-cache).
+ */
+static void http_manage_server_side_cookies(struct stream *s, struct channel *res)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct htx *htx;
+ struct http_hdr_ctx ctx;
+ struct server *srv;
+ char *hdr_beg, *hdr_end;
+ char *prev, *att_beg, *att_end, *equal, *val_beg, *val_end, *next;
+ int is_cookie2 = 0;
+
+ htx = htxbuf(&res->buf);
+
+ ctx.blk = NULL;
+ while (1) {
+ int is_first = 1;
+
+ if (is_cookie2 || !http_find_header(htx, ist("Set-Cookie"), &ctx, 1)) {
+ if (!http_find_header(htx, ist("Set-Cookie2"), &ctx, 1))
+ break;
+ is_cookie2 = 1;
+ }
+
+ /* OK, right now we know we have a Set-Cookie* at hdr_beg, and
+ * <prev> points to the colon.
+ */
+ txn->flags |= TX_SCK_PRESENT;
+
+ /* Maybe we only wanted to see if there was a Set-Cookie (eg:
+ * check-cache is enabled) and we are not interested in checking
+ * them. Warning, the cookie capture is declared in the frontend.
+ */
+ if (s->be->cookie_name == NULL && sess->fe->capture_name == NULL)
+ break;
+
+ /* OK so now we know we have to process this response cookie.
+ * The format of the Set-Cookie header is slightly different
+ * from the format of the Cookie header in that it does not
+ * support the comma as a cookie delimiter (thus the header
+ * cannot be folded) because the Expires attribute described in
+ * the original Netscape's spec may contain an unquoted date
+ * with a comma inside. We have to live with this because
+ * many browsers don't support Max-Age and some browsers don't
+ * support quoted strings. However the Set-Cookie2 header is
+ * clean.
+ *
+ * We have to keep multiple pointers in order to support cookie
+ * removal at the beginning, middle or end of header without
+ * corrupting the header (in case of set-cookie2). A special
+ * pointer, <scav> points to the beginning of the set-cookie-av
+ * fields after the first semi-colon. The <next> pointer points
+ * either to the end of line (set-cookie) or next unquoted comma
+ * (set-cookie2). All of these headers are valid :
+ *
+ * hdr_beg hdr_end
+ * | |
+ * v |
+ * NAME1 = VALUE 1 ; Secure; Path="/" |
+ * NAME=VALUE; Secure; Expires=Thu, 01-Jan-1970 00:00:01 GMT v
+ * NAME = VALUE ; Secure; Expires=Thu, 01-Jan-1970 00:00:01 GMT
+ * NAME1 = VALUE 1 ; Max-Age=0, NAME2=VALUE2; Discard
+ * | | | | | | | |
+ * | | | | | | | +-> next
+ * | | | | | | +------------> scav
+ * | | | | | +--------------> val_end
+ * | | | | +--------------------> val_beg
+ * | | | +----------------------> equal
+ * | | +------------------------> att_end
+ * | +----------------------------> att_beg
+ * +------------------------------> prev
+ * -------------------------------> hdr_beg
+ */
+ hdr_beg = ctx.value.ptr;
+ hdr_end = hdr_beg + ctx.value.len;
+ for (prev = hdr_beg; prev < hdr_end; prev = next) {
+
+ /* Iterate through all cookies on this line */
+
+ /* find att_beg */
+ att_beg = prev;
+ if (!is_first)
+ att_beg++;
+ is_first = 0;
+
+ while (att_beg < hdr_end && HTTP_IS_SPHT(*att_beg))
+ att_beg++;
+
+ /* find att_end : this is the first character after the last non
+ * space before the equal. It may be equal to hdr_end.
+ */
+ equal = att_end = att_beg;
+
+ while (equal < hdr_end) {
+ if (*equal == '=' || *equal == ';' || (is_cookie2 && *equal == ','))
+ break;
+ if (HTTP_IS_SPHT(*equal++))
+ continue;
+ att_end = equal;
+ }
+
+ /* here, <equal> points to '=', a delimiter or the end. <att_end>
+ * is between <att_beg> and <equal>, both may be identical.
+ */
+
+ /* look for end of cookie if there is an equal sign */
+ if (equal < hdr_end && *equal == '=') {
+ /* look for the beginning of the value */
+ val_beg = equal + 1;
+ while (val_beg < hdr_end && HTTP_IS_SPHT(*val_beg))
+ val_beg++;
+
+ /* find the end of the value, respecting quotes */
+ next = http_find_cookie_value_end(val_beg, hdr_end);
+
+ /* make val_end point to the first white space or delimiter after the value */
+ val_end = next;
+ while (val_end > val_beg && HTTP_IS_SPHT(*(val_end - 1)))
+ val_end--;
+ }
+ else {
+ /* <equal> points to next comma, semi-colon or EOL */
+ val_beg = val_end = next = equal;
+ }
+
+ if (next < hdr_end) {
+ /* Set-Cookie2 supports multiple cookies, and <next> points to
+ * a colon or semi-colon before the end. So skip all attr-value
+ * pairs and look for the next comma. For Set-Cookie, since
+ * commas are permitted in values, skip to the end.
+ */
+ if (is_cookie2)
+ next = http_find_hdr_value_end(next, hdr_end);
+ else
+ next = hdr_end;
+ }
+
+ /* Now everything is as on the diagram above */
+
+ /* Ignore cookies with no equal sign */
+ if (equal == val_end)
+ continue;
+
+ /* If there are spaces around the equal sign, we need to
+ * strip them otherwise we'll get trouble for cookie captures,
+ * or even for rewrites. Since this happens extremely rarely,
+ * it does not hurt performance.
+ */
+ if (unlikely(att_end != equal || val_beg > equal + 1)) {
+ int stripped_before = 0;
+ int stripped_after = 0;
+
+ if (att_end != equal) {
+ memmove(att_end, equal, hdr_end - equal);
+ stripped_before = (att_end - equal);
+ equal += stripped_before;
+ val_beg += stripped_before;
+ }
+
+ if (val_beg > equal + 1) {
+ memmove(equal + 1, val_beg, hdr_end + stripped_before - val_beg);
+ stripped_after = (equal + 1) - val_beg;
+ val_beg += stripped_after;
+ stripped_before += stripped_after;
+ }
+
+ val_end += stripped_before;
+ next += stripped_before;
+ hdr_end += stripped_before;
+
+ htx_change_blk_value_len(htx, ctx.blk, hdr_end - hdr_beg);
+ ctx.value.len = hdr_end - hdr_beg;
+ }
+
+ /* First, let's see if we want to capture this cookie. We check
+ * that we don't already have a server side cookie, because we
+ * can only capture one. Also as an optimisation, we ignore
+ * cookies shorter than the declared name.
+ */
+ if (sess->fe->capture_name != NULL &&
+ txn->srv_cookie == NULL &&
+ (val_end - att_beg >= sess->fe->capture_namelen) &&
+ memcmp(att_beg, sess->fe->capture_name, sess->fe->capture_namelen) == 0) {
+ int log_len = val_end - att_beg;
+ if ((txn->srv_cookie = pool_alloc(pool_head_capture)) == NULL) {
+ ha_alert("HTTP logging : out of memory.\n");
+ }
+ else {
+ if (log_len > sess->fe->capture_len)
+ log_len = sess->fe->capture_len;
+ memcpy(txn->srv_cookie, att_beg, log_len);
+ txn->srv_cookie[log_len] = 0;
+ }
+ }
+
+ srv = objt_server(s->target);
+ /* now check if we need to process it for persistence */
+ if (!(s->flags & SF_IGNORE_PRST) &&
+ (att_end - att_beg == s->be->cookie_len) && (s->be->cookie_name != NULL) &&
+ (memcmp(att_beg, s->be->cookie_name, att_end - att_beg) == 0)) {
+ /* assume passive cookie by default */
+ txn->flags &= ~TX_SCK_MASK;
+ txn->flags |= TX_SCK_FOUND;
+
+ /* If the cookie is in insert mode on a known server, we'll delete
+ * this occurrence because we'll insert another one later.
+ * We'll delete it too if the "indirect" option is set and we're in
+ * a direct access.
+ */
+ if (s->be->ck_opts & PR_CK_PSV) {
+ /* The "preserve" flag was set, we don't want to touch the
+ * server's cookie.
+ */
+ }
+ else if ((srv && (s->be->ck_opts & PR_CK_INS)) ||
+ ((s->flags & SF_DIRECT) && (s->be->ck_opts & PR_CK_IND))) {
+ /* this cookie must be deleted */
+ if (prev == hdr_beg && next == hdr_end) {
+ /* whole header */
+ http_remove_header(htx, &ctx);
+ /* note: while both invalid now, <next> and <hdr_end>
+ * are still equal, so the for() will stop as expected.
+ */
+ } else {
+ /* just remove the value */
+ int delta = http_del_hdr_value(hdr_beg, hdr_end, &prev, next);
+ next = prev;
+ hdr_end += delta;
+ }
+ txn->flags &= ~TX_SCK_MASK;
+ txn->flags |= TX_SCK_DELETED;
+ /* and go on with next cookie */
+ }
+ else if (srv && srv->cookie && (s->be->ck_opts & PR_CK_RW)) {
+ /* replace bytes val_beg->val_end with the cookie name associated
+ * with this server since we know it.
+ */
+ int sliding, delta;
+
+ ctx.value = ist2(val_beg, val_end - val_beg);
+ ctx.lws_before = ctx.lws_after = 0;
+ http_replace_header_value(htx, &ctx, ist2(srv->cookie, srv->cklen));
+ delta = srv->cklen - (val_end - val_beg);
+ sliding = (ctx.value.ptr - val_beg);
+ hdr_beg += sliding;
+ val_beg += sliding;
+ next += sliding + delta;
+ hdr_end += sliding + delta;
+
+ txn->flags &= ~TX_SCK_MASK;
+ txn->flags |= TX_SCK_REPLACED;
+ }
+ else if (srv && srv->cookie && (s->be->ck_opts & PR_CK_PFX)) {
+ /* insert the cookie name associated with this server
+ * before existing cookie, and insert a delimiter between them..
+ */
+ int sliding, delta;
+ ctx.value = ist2(val_beg, 0);
+ ctx.lws_before = ctx.lws_after = 0;
+ http_replace_header_value(htx, &ctx, ist2(srv->cookie, srv->cklen + 1));
+ delta = srv->cklen + 1;
+ sliding = (ctx.value.ptr - val_beg);
+ hdr_beg += sliding;
+ val_beg += sliding;
+ next += sliding + delta;
+ hdr_end += sliding + delta;
+
+ val_beg[srv->cklen] = COOKIE_DELIM;
+ txn->flags &= ~TX_SCK_MASK;
+ txn->flags |= TX_SCK_REPLACED;
+ }
+ }
+ /* that's done for this cookie, check the next one on the same
+ * line when next != hdr_end (only if is_cookie2).
+ */
+ }
+ }
+}
+
+/*
+ * Parses the Cache-Control and Pragma request header fields to determine if
+ * the request may be served from the cache and/or if it is cacheable. Updates
+ * s->txn->flags.
+ */
+void http_check_request_for_cacheability(struct stream *s, struct channel *req)
+{
+ struct http_txn *txn = s->txn;
+ struct htx *htx;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ int pragma_found, cc_found;
+
+ if ((txn->flags & (TX_CACHEABLE|TX_CACHE_IGNORE)) == TX_CACHE_IGNORE)
+ return; /* nothing more to do here */
+
+ htx = htxbuf(&req->buf);
+ pragma_found = cc_found = 0;
+
+ /* Check "pragma" header for HTTP/1.0 compatibility. */
+ if (http_find_header(htx, ist("pragma"), &ctx, 1)) {
+ if (isteqi(ctx.value, ist("no-cache"))) {
+ pragma_found = 1;
+ }
+ }
+
+ ctx.blk = NULL;
+ /* Don't use the cache and don't try to store if we found the
+ * Authorization header */
+ if (http_find_header(htx, ist("authorization"), &ctx, 1)) {
+ txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK;
+ txn->flags |= TX_CACHE_IGNORE;
+ }
+
+
+ /* Look for "cache-control" header and iterate over all the values
+ * until we find one that specifies that caching is possible or not. */
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("cache-control"), &ctx, 0)) {
+ cc_found = 1;
+ /* We don't check the values after max-age, max-stale nor min-fresh,
+ * we simply don't use the cache when they're specified. */
+ if (istmatchi(ctx.value, ist("max-age")) ||
+ istmatchi(ctx.value, ist("no-cache")) ||
+ istmatchi(ctx.value, ist("max-stale")) ||
+ istmatchi(ctx.value, ist("min-fresh"))) {
+ txn->flags |= TX_CACHE_IGNORE;
+ continue;
+ }
+ if (istmatchi(ctx.value, ist("no-store"))) {
+ txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK;
+ continue;
+ }
+ }
+
+ /* RFC7234#5.4:
+ * When the Cache-Control header field is also present and
+ * understood in a request, Pragma is ignored.
+ * When the Cache-Control header field is not present in a
+ * request, caches MUST consider the no-cache request
+ * pragma-directive as having the same effect as if
+ * "Cache-Control: no-cache" were present.
+ */
+ if (!cc_found && pragma_found)
+ txn->flags |= TX_CACHE_IGNORE;
+}
+
+/*
+ * Check if response is cacheable or not. Updates s->txn->flags.
+ */
+void http_check_response_for_cacheability(struct stream *s, struct channel *res)
+{
+ struct http_txn *txn = s->txn;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ struct htx *htx;
+ int has_freshness_info = 0;
+ int has_validator = 0;
+
+ if (txn->status < 200) {
+ /* do not try to cache interim responses! */
+ txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK;
+ return;
+ }
+
+ htx = htxbuf(&res->buf);
+ /* Check "pragma" header for HTTP/1.0 compatibility. */
+ if (http_find_header(htx, ist("pragma"), &ctx, 1)) {
+ if (isteqi(ctx.value, ist("no-cache"))) {
+ txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK;
+ return;
+ }
+ }
+
+ /* Look for "cache-control" header and iterate over all the values
+ * until we find one that specifies that caching is possible or not. */
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("cache-control"), &ctx, 0)) {
+ if (isteqi(ctx.value, ist("public"))) {
+ txn->flags |= TX_CACHEABLE | TX_CACHE_COOK;
+ continue;
+ }
+ if (isteqi(ctx.value, ist("private")) ||
+ isteqi(ctx.value, ist("no-cache")) ||
+ isteqi(ctx.value, ist("no-store")) ||
+ isteqi(ctx.value, ist("max-age=0")) ||
+ isteqi(ctx.value, ist("s-maxage=0"))) {
+ txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK;
+ continue;
+ }
+ /* We might have a no-cache="set-cookie" form. */
+ if (istmatchi(ctx.value, ist("no-cache=\"set-cookie"))) {
+ txn->flags &= ~TX_CACHE_COOK;
+ continue;
+ }
+
+ if (istmatchi(ctx.value, ist("s-maxage")) ||
+ istmatchi(ctx.value, ist("max-age"))) {
+ has_freshness_info = 1;
+ continue;
+ }
+ }
+
+ /* If no freshness information could be found in Cache-Control values,
+ * look for an Expires header. */
+ if (!has_freshness_info) {
+ ctx.blk = NULL;
+ has_freshness_info = http_find_header(htx, ist("expires"), &ctx, 0);
+ }
+
+ /* If no freshness information could be found in Cache-Control or Expires
+ * values, look for an explicit validator. */
+ if (!has_freshness_info) {
+ ctx.blk = NULL;
+ has_validator = 1;
+ if (!http_find_header(htx, ist("etag"), &ctx, 0)) {
+ ctx.blk = NULL;
+ if (!http_find_header(htx, ist("last-modified"), &ctx, 0))
+ has_validator = 0;
+ }
+ }
+
+ /* We won't store an entry that has neither a cache validator nor an
+ * explicit expiration time, as suggested in RFC 7234#3. */
+ if (!has_freshness_info && !has_validator)
+ txn->flags &= ~TX_CACHEABLE;
+}
+
+/*
+ * In a GET, HEAD or POST request, check if the requested URI matches the stats uri
+ * for the current backend.
+ *
+ * It is assumed that the request is either a HEAD, GET, or POST and that the
+ * uri_auth field is valid.
+ *
+ * Returns 1 if stats should be provided, otherwise 0.
+ */
+static int http_stats_check_uri(struct stream *s, struct http_txn *txn, struct proxy *backend)
+{
+ struct uri_auth *uri_auth = backend->uri_auth;
+ struct htx *htx;
+ struct htx_sl *sl;
+ struct ist uri;
+
+ if (!uri_auth)
+ return 0;
+
+ if (txn->meth != HTTP_METH_GET && txn->meth != HTTP_METH_HEAD && txn->meth != HTTP_METH_POST)
+ return 0;
+
+ htx = htxbuf(&s->req.buf);
+ sl = http_get_stline(htx);
+ uri = htx_sl_req_uri(sl);
+ if (*uri_auth->uri_prefix == '/') {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ uri = http_parse_path(&parser);
+ }
+
+ /* check URI size */
+ if (uri_auth->uri_len > uri.len)
+ return 0;
+
+ if (memcmp(uri.ptr, uri_auth->uri_prefix, uri_auth->uri_len) != 0)
+ return 0;
+
+ return 1;
+}
+
+/* This function prepares an applet to handle the stats. It can deal with the
+ * "100-continue" expectation, check that admin rules are met for POST requests,
+ * and program a response message if something was unexpected. It cannot fail
+ * and always relies on the stats applet to complete the job. It does not touch
+ * analysers nor counters, which are left to the caller. It does not touch
+ * s->target which is supposed to already point to the stats applet. The caller
+ * is expected to have already assigned an appctx to the stream.
+ */
+static int http_handle_stats(struct stream *s, struct channel *req)
+{
+ struct stats_admin_rule *stats_admin_rule;
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->req;
+ struct uri_auth *uri_auth = s->be->uri_auth;
+ const char *h, *lookup, *end;
+ struct appctx *appctx = __sc_appctx(s->scb);
+ struct show_stat_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct htx *htx;
+ struct htx_sl *sl;
+
+ appctx->st1 = 0;
+ ctx->state = STAT_STATE_INIT;
+ ctx->st_code = STAT_STATUS_INIT;
+ ctx->flags |= uri_auth->flags;
+ ctx->flags |= STAT_FMT_HTML; /* assume HTML mode by default */
+ if ((msg->flags & HTTP_MSGF_VER_11) && (txn->meth != HTTP_METH_HEAD))
+ ctx->flags |= STAT_CHUNKED;
+
+ htx = htxbuf(&req->buf);
+ sl = http_get_stline(htx);
+ lookup = HTX_SL_REQ_UPTR(sl) + uri_auth->uri_len;
+ end = HTX_SL_REQ_UPTR(sl) + HTX_SL_REQ_ULEN(sl);
+
+ for (h = lookup; h <= end - 3; h++) {
+ if (memcmp(h, ";up", 3) == 0) {
+ ctx->flags |= STAT_HIDE_DOWN;
+ break;
+ }
+ }
+
+ for (h = lookup; h <= end - 9; h++) {
+ if (memcmp(h, ";no-maint", 9) == 0) {
+ ctx->flags |= STAT_HIDE_MAINT;
+ break;
+ }
+ }
+
+ if (uri_auth->refresh) {
+ for (h = lookup; h <= end - 10; h++) {
+ if (memcmp(h, ";norefresh", 10) == 0) {
+ ctx->flags |= STAT_NO_REFRESH;
+ break;
+ }
+ }
+ }
+
+ for (h = lookup; h <= end - 4; h++) {
+ if (memcmp(h, ";csv", 4) == 0) {
+ ctx->flags &= ~(STAT_FMT_MASK|STAT_JSON_SCHM);
+ break;
+ }
+ }
+
+ for (h = lookup; h <= end - 6; h++) {
+ if (memcmp(h, ";typed", 6) == 0) {
+ ctx->flags &= ~(STAT_FMT_MASK|STAT_JSON_SCHM);
+ ctx->flags |= STAT_FMT_TYPED;
+ break;
+ }
+ }
+
+ for (h = lookup; h <= end - 5; h++) {
+ if (memcmp(h, ";json", 5) == 0) {
+ ctx->flags &= ~(STAT_FMT_MASK|STAT_JSON_SCHM);
+ ctx->flags |= STAT_FMT_JSON;
+ break;
+ }
+ }
+
+ for (h = lookup; h <= end - 12; h++) {
+ if (memcmp(h, ";json-schema", 12) == 0) {
+ ctx->flags &= ~STAT_FMT_MASK;
+ ctx->flags |= STAT_JSON_SCHM;
+ break;
+ }
+ }
+
+ for (h = lookup; h <= end - 8; h++) {
+ if (memcmp(h, ";st=", 4) == 0) {
+ int i;
+ h += 4;
+ ctx->st_code = STAT_STATUS_UNKN;
+ for (i = STAT_STATUS_INIT + 1; i < STAT_STATUS_SIZE; i++) {
+ if (strncmp(stat_status_codes[i], h, 4) == 0) {
+ ctx->st_code = i;
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+ ctx->scope_str = 0;
+ ctx->scope_len = 0;
+ for (h = lookup; h <= end - 8; h++) {
+ if (memcmp(h, STAT_SCOPE_INPUT_NAME "=", strlen(STAT_SCOPE_INPUT_NAME) + 1) == 0) {
+ int itx = 0;
+ const char *h2;
+ char scope_txt[STAT_SCOPE_TXT_MAXLEN + 1];
+ const char *err;
+
+ h += strlen(STAT_SCOPE_INPUT_NAME) + 1;
+ h2 = h;
+ ctx->scope_str = h2 - HTX_SL_REQ_UPTR(sl);
+ while (h < end) {
+ if (*h == ';' || *h == '&' || *h == ' ')
+ break;
+ itx++;
+ h++;
+ }
+
+ if (itx > STAT_SCOPE_TXT_MAXLEN)
+ itx = STAT_SCOPE_TXT_MAXLEN;
+ ctx->scope_len = itx;
+
+ /* scope_txt = search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */
+ memcpy(scope_txt, h2, itx);
+ scope_txt[itx] = '\0';
+ err = invalid_char(scope_txt);
+ if (err) {
+ /* bad char in search text => clear scope */
+ ctx->scope_str = 0;
+ ctx->scope_len = 0;
+ }
+ break;
+ }
+ }
+
+ /* now check whether we have some admin rules for this request */
+ list_for_each_entry(stats_admin_rule, &uri_auth->admin_rules, list) {
+ int ret = 1;
+
+ if (stats_admin_rule->cond) {
+ ret = acl_exec_cond(stats_admin_rule->cond, s->be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (stats_admin_rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ /* no rule, or the rule matches */
+ ctx->flags |= STAT_ADMIN;
+ break;
+ }
+ }
+
+ if (txn->meth == HTTP_METH_GET || txn->meth == HTTP_METH_HEAD)
+ appctx->st0 = STAT_HTTP_HEAD;
+ else if (txn->meth == HTTP_METH_POST) {
+ if (ctx->flags & STAT_ADMIN) {
+ appctx->st0 = STAT_HTTP_POST;
+ if (msg->msg_state < HTTP_MSG_DATA)
+ req->analysers |= AN_REQ_HTTP_BODY;
+ }
+ else {
+ /* POST without admin level */
+ ctx->flags &= ~STAT_CHUNKED;
+ ctx->st_code = STAT_STATUS_DENY;
+ appctx->st0 = STAT_HTTP_LAST;
+ }
+ }
+ else {
+ /* Unsupported method */
+ ctx->flags &= ~STAT_CHUNKED;
+ ctx->st_code = STAT_STATUS_IVAL;
+ appctx->st0 = STAT_HTTP_LAST;
+ }
+
+ s->task->nice = -32; /* small boost for HTTP statistics */
+ return 1;
+}
+
+/* This function waits for the message payload at most <time> milliseconds (may
+ * be set to TICK_ETERNITY). It stops to wait if at least <bytes> bytes of the
+ * payload are received (0 means no limit). It returns HTTP_RULE_* depending on
+ * the result:
+ *
+ * - HTTP_RULE_RES_CONT when conditions are met to stop waiting
+ * - HTTP_RULE_RES_YIELD to wait for more data
+ * - HTTP_RULE_RES_ABRT when a timeout occurred.
+ * - HTTP_RULE_RES_BADREQ if a parsing error is raised by lower level
+ * - HTTP_RULE_RES_ERROR if an internal error occurred
+ *
+ * If a timeout occurred, this function is responsible to emit the right response
+ * to the client, depending on the channel (408 on request side, 504 on response
+ * side). All other errors must be handled by the caller.
+ */
+enum rule_result http_wait_for_msg_body(struct stream *s, struct channel *chn,
+ unsigned int time, unsigned int bytes)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = ((chn->flags & CF_ISRESP) ? &txn->rsp : &txn->req);
+ struct htx *htx;
+ enum rule_result ret = HTTP_RULE_RES_CONT;
+
+ htx = htxbuf(&chn->buf);
+
+ if (htx->flags & HTX_FL_PARSING_ERROR) {
+ ret = HTTP_RULE_RES_BADREQ;
+ goto end;
+ }
+ if (htx->flags & HTX_FL_PROCESSING_ERROR) {
+ ret = HTTP_RULE_RES_ERROR;
+ goto end;
+ }
+
+ /* Do nothing for bodyless and CONNECT requests */
+ if (txn->meth == HTTP_METH_CONNECT || (msg->flags & HTTP_MSGF_BODYLESS))
+ goto end;
+
+ if (!(chn->flags & CF_ISRESP) && msg->msg_state < HTTP_MSG_DATA) {
+ if (http_handle_expect_hdr(s, htx, msg) == -1) {
+ ret = HTTP_RULE_RES_ERROR;
+ goto end;
+ }
+ }
+
+ msg->msg_state = HTTP_MSG_DATA;
+
+ /* Now we're in HTTP_MSG_DATA. We just need to know if all data have
+ * been received or if the buffer is full.
+ */
+ if ((htx->flags & HTX_FL_EOM) ||
+ htx_get_tail_type(htx) > HTX_BLK_DATA ||
+ channel_htx_full(chn, htx, global.tune.maxrewrite) ||
+ sc_waiting_room(chn_prod(chn)))
+ goto end;
+
+ if (bytes) {
+ struct htx_blk *blk;
+ unsigned int len = 0;
+
+ for (blk = htx_get_first_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ if (htx_get_blk_type(blk) != HTX_BLK_DATA)
+ continue;
+ len += htx_get_blksz(blk);
+ if (len >= bytes)
+ goto end;
+ }
+ }
+
+ if ((chn->flags & CF_READ_TIMEOUT) || tick_is_expired(chn->analyse_exp, now_ms)) {
+ if (!(chn->flags & CF_ISRESP))
+ goto abort_req;
+ goto abort_res;
+ }
+
+ /* we get here if we need to wait for more data */
+ if (!(chn->flags & (CF_SHUTR | CF_READ_ERROR))) {
+ if (!tick_isset(chn->analyse_exp))
+ chn->analyse_exp = tick_add_ifset(now_ms, time);
+ ret = HTTP_RULE_RES_YIELD;
+ }
+
+ end:
+ return ret;
+
+ abort_req:
+ txn->status = 408;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_CLITO;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_R;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+ http_reply_and_close(s, txn->status, http_error_message(s));
+ ret = HTTP_RULE_RES_ABRT;
+ goto end;
+
+ abort_res:
+ txn->status = 504;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_SRVTO;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_R;
+ stream_inc_http_fail_ctr(s);
+ http_reply_and_close(s, txn->status, http_error_message(s));
+ ret = HTTP_RULE_RES_ABRT;
+ goto end;
+}
+
+void http_perform_server_redirect(struct stream *s, struct stconn *sc)
+{
+ struct channel *req = &s->req;
+ struct channel *res = &s->res;
+ struct server *srv;
+ struct htx *htx;
+ struct htx_sl *sl;
+ struct ist path, location;
+ unsigned int flags;
+ struct http_uri_parser parser;
+
+ /*
+ * Create the location
+ */
+ chunk_reset(&trash);
+
+ /* 1: add the server's prefix */
+ /* special prefix "/" means don't change URL */
+ srv = __objt_server(s->target);
+ if (srv->rdr_len != 1 || *srv->rdr_pfx != '/') {
+ if (!chunk_memcat(&trash, srv->rdr_pfx, srv->rdr_len))
+ return;
+ }
+
+ /* 2: add the request Path */
+ htx = htxbuf(&req->buf);
+ sl = http_get_stline(htx);
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
+ if (!isttest(path))
+ return;
+
+ if (!chunk_memcat(&trash, path.ptr, path.len))
+ return;
+ location = ist2(trash.area, trash.data);
+
+ /*
+ * Create the 302 respone
+ */
+ htx = htx_from_buf(&res->buf);
+ flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_LEN|HTX_SL_F_BODYLESS);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags,
+ ist("HTTP/1.1"), ist("302"), ist("Found"));
+ if (!sl)
+ goto fail;
+ sl->info.res.status = 302;
+ s->txn->status = 302;
+
+ if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache")) ||
+ !htx_add_header(htx, ist("Connection"), ist("close")) ||
+ !htx_add_header(htx, ist("Content-length"), ist("0")) ||
+ !htx_add_header(htx, ist("Location"), location))
+ goto fail;
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto fail;
+
+ htx->flags |= HTX_FL_EOM;
+ htx_to_buf(htx, &res->buf);
+ if (!http_forward_proxy_resp(s, 1))
+ goto fail;
+
+ /* return without error. */
+ sc_shutr(sc);
+ sc_shutw(sc);
+ s->conn_err_type = STRM_ET_NONE;
+ sc->state = SC_ST_CLO;
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_LOCAL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_C;
+
+ /* FIXME: we should increase a counter of redirects per server and per backend. */
+ srv_inc_sess_ctr(srv);
+ srv_set_sess_last(srv);
+ return;
+
+ fail:
+ /* If an error occurred, remove the incomplete HTTP response from the
+ * buffer */
+ channel_htx_truncate(res, htx);
+}
+
+/* This function terminates the request because it was completely analyzed or
+ * because an error was triggered during the body forwarding.
+ */
+static void http_end_request(struct stream *s)
+{
+ struct channel *chn = &s->req;
+ struct http_txn *txn = s->txn;
+
+ DBG_TRACE_ENTER(STRM_EV_HTTP_ANA, s, txn);
+
+ if (unlikely(txn->req.msg_state == HTTP_MSG_ERROR ||
+ txn->rsp.msg_state == HTTP_MSG_ERROR)) {
+ channel_abort(chn);
+ channel_htx_truncate(chn, htxbuf(&chn->buf));
+ goto end;
+ }
+
+ if (unlikely(txn->req.msg_state < HTTP_MSG_DONE)) {
+ DBG_TRACE_DEVEL("waiting end of the request", STRM_EV_HTTP_ANA, s, txn);
+ return;
+ }
+
+ if (txn->req.msg_state == HTTP_MSG_DONE) {
+ /* No need to read anymore, the request was completely parsed.
+ * We can shut the read side unless we want to abort_on_close,
+ * or we have a POST request. The issue with POST requests is
+ * that some browsers still send a CRLF after the request, and
+ * this CRLF must be read so that it does not remain in the kernel
+ * buffers, otherwise a close could cause an RST on some systems
+ * (eg: Linux).
+ */
+ if (!(s->be->options & PR_O_ABRT_CLOSE) && txn->meth != HTTP_METH_POST)
+ channel_dont_read(chn);
+
+ /* if the server closes the connection, we want to immediately react
+ * and close the socket to save packets and syscalls.
+ */
+ s->scb->flags |= SC_FL_NOHALF;
+
+ /* In any case we've finished parsing the request so we must
+ * disable Nagle when sending data because 1) we're not going
+ * to shut this side, and 2) the server is waiting for us to
+ * send pending data.
+ */
+ chn->flags |= CF_NEVER_WAIT;
+
+ if (txn->rsp.msg_state < HTTP_MSG_DONE) {
+ /* The server has not finished to respond, so we
+ * don't want to move in order not to upset it.
+ */
+ DBG_TRACE_DEVEL("waiting end of the response", STRM_EV_HTTP_ANA, s, txn);
+ return;
+ }
+
+ /* When we get here, it means that both the request and the
+ * response have finished receiving. Depending on the connection
+ * mode, we'll have to wait for the last bytes to leave in either
+ * direction, and sometimes for a close to be effective.
+ */
+ if (txn->flags & TX_CON_WANT_TUN) {
+ /* Tunnel mode will not have any analyser so it needs to
+ * poll for reads.
+ */
+ channel_auto_read(chn);
+ txn->req.msg_state = HTTP_MSG_TUNNEL;
+ }
+ else {
+ /* we're not expecting any new data to come for this
+ * transaction, so we can close it.
+ *
+ * However, there is an exception if the response
+ * length is undefined. In this case, we need to wait
+ * the close from the server. The response will be
+ * switched in TUNNEL mode until the end.
+ */
+ if (!(txn->rsp.flags & HTTP_MSGF_XFER_LEN) &&
+ txn->rsp.msg_state != HTTP_MSG_CLOSED)
+ goto check_channel_flags;
+
+ if (!(chn->flags & (CF_SHUTW|CF_SHUTW_NOW))) {
+ channel_shutr_now(chn);
+ channel_shutw_now(chn);
+ }
+ }
+ goto check_channel_flags;
+ }
+
+ if (txn->req.msg_state == HTTP_MSG_CLOSING) {
+ http_msg_closing:
+ /* nothing else to forward, just waiting for the output buffer
+ * to be empty and for the shutw_now to take effect.
+ */
+ if (channel_is_empty(chn)) {
+ txn->req.msg_state = HTTP_MSG_CLOSED;
+ goto http_msg_closed;
+ }
+ else if (chn->flags & CF_SHUTW) {
+ txn->req.msg_state = HTTP_MSG_ERROR;
+ goto end;
+ }
+ DBG_TRACE_LEAVE(STRM_EV_HTTP_ANA, s, txn);
+ return;
+ }
+
+ if (txn->req.msg_state == HTTP_MSG_CLOSED) {
+ http_msg_closed:
+ /* if we don't know whether the server will close, we need to hard close */
+ if (txn->rsp.flags & HTTP_MSGF_XFER_LEN)
+ s->scb->flags |= SC_FL_NOLINGER; /* we want to close ASAP */
+ /* see above in MSG_DONE why we only do this in these states */
+ if (!(s->be->options & PR_O_ABRT_CLOSE))
+ channel_dont_read(chn);
+ goto end;
+ }
+
+ check_channel_flags:
+ /* Here, we are in HTTP_MSG_DONE or HTTP_MSG_TUNNEL */
+ if (chn->flags & (CF_SHUTW|CF_SHUTW_NOW)) {
+ /* if we've just closed an output, let's switch */
+ txn->req.msg_state = HTTP_MSG_CLOSING;
+ goto http_msg_closing;
+ }
+
+ end:
+ chn->analysers &= AN_REQ_FLT_END;
+ if (txn->req.msg_state == HTTP_MSG_TUNNEL) {
+ chn->flags |= CF_NEVER_WAIT;
+ if (HAS_REQ_DATA_FILTERS(s))
+ chn->analysers |= AN_REQ_FLT_XFER_DATA;
+ }
+ channel_auto_close(chn);
+ channel_auto_read(chn);
+ DBG_TRACE_LEAVE(STRM_EV_HTTP_ANA, s, txn);
+}
+
+
+/* This function terminates the response because it was completely analyzed or
+ * because an error was triggered during the body forwarding.
+ */
+static void http_end_response(struct stream *s)
+{
+ struct channel *chn = &s->res;
+ struct http_txn *txn = s->txn;
+
+ DBG_TRACE_ENTER(STRM_EV_HTTP_ANA, s, txn);
+
+ if (unlikely(txn->req.msg_state == HTTP_MSG_ERROR ||
+ txn->rsp.msg_state == HTTP_MSG_ERROR)) {
+ channel_htx_truncate(&s->req, htxbuf(&s->req.buf));
+ channel_abort(&s->req);
+ goto end;
+ }
+
+ if (unlikely(txn->rsp.msg_state < HTTP_MSG_DONE)) {
+ DBG_TRACE_DEVEL("waiting end of the response", STRM_EV_HTTP_ANA, s, txn);
+ return;
+ }
+
+ if (txn->rsp.msg_state == HTTP_MSG_DONE) {
+ /* In theory, we don't need to read anymore, but we must
+ * still monitor the server connection for a possible close
+ * while the request is being uploaded, so we don't disable
+ * reading.
+ */
+ /* channel_dont_read(chn); */
+
+ if (txn->req.msg_state < HTTP_MSG_DONE) {
+ /* The client seems to still be sending data, probably
+ * because we got an error response during an upload.
+ * We have the choice of either breaking the connection
+ * or letting it pass through. Let's do the later.
+ */
+ DBG_TRACE_DEVEL("waiting end of the request", STRM_EV_HTTP_ANA, s, txn);
+ return;
+ }
+
+ /* When we get here, it means that both the request and the
+ * response have finished receiving. Depending on the connection
+ * mode, we'll have to wait for the last bytes to leave in either
+ * direction, and sometimes for a close to be effective.
+ */
+ if (txn->flags & TX_CON_WANT_TUN) {
+ channel_auto_read(chn);
+ txn->rsp.msg_state = HTTP_MSG_TUNNEL;
+ }
+ else {
+ /* we're not expecting any new data to come for this
+ * transaction, so we can close it.
+ */
+ if (!(chn->flags & (CF_SHUTW|CF_SHUTW_NOW))) {
+ channel_shutr_now(chn);
+ channel_shutw_now(chn);
+ }
+ }
+ goto check_channel_flags;
+ }
+
+ if (txn->rsp.msg_state == HTTP_MSG_CLOSING) {
+ http_msg_closing:
+ /* nothing else to forward, just waiting for the output buffer
+ * to be empty and for the shutw_now to take effect.
+ */
+ if (channel_is_empty(chn)) {
+ txn->rsp.msg_state = HTTP_MSG_CLOSED;
+ goto http_msg_closed;
+ }
+ else if (chn->flags & CF_SHUTW) {
+ txn->rsp.msg_state = HTTP_MSG_ERROR;
+ _HA_ATOMIC_INC(&strm_sess(s)->fe->fe_counters.cli_aborts);
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ if (strm_sess(s)->listener && strm_sess(s)->listener->counters)
+ _HA_ATOMIC_INC(&strm_sess(s)->listener->counters->cli_aborts);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.cli_aborts);
+ goto end;
+ }
+ DBG_TRACE_LEAVE(STRM_EV_HTTP_ANA, s, txn);
+ return;
+ }
+
+ if (txn->rsp.msg_state == HTTP_MSG_CLOSED) {
+ http_msg_closed:
+ /* drop any pending data */
+ channel_htx_truncate(&s->req, htxbuf(&s->req.buf));
+ channel_abort(&s->req);
+ goto end;
+ }
+
+ check_channel_flags:
+ /* Here, we are in HTTP_MSG_DONE or HTTP_MSG_TUNNEL */
+ if (chn->flags & (CF_SHUTW|CF_SHUTW_NOW)) {
+ /* if we've just closed an output, let's switch */
+ txn->rsp.msg_state = HTTP_MSG_CLOSING;
+ goto http_msg_closing;
+ }
+
+ end:
+ chn->analysers &= AN_RES_FLT_END;
+ if (txn->rsp.msg_state == HTTP_MSG_TUNNEL) {
+ chn->flags |= CF_NEVER_WAIT;
+ if (HAS_RSP_DATA_FILTERS(s))
+ chn->analysers |= AN_RES_FLT_XFER_DATA;
+ }
+ channel_auto_close(chn);
+ channel_auto_read(chn);
+ DBG_TRACE_LEAVE(STRM_EV_HTTP_ANA, s, txn);
+}
+
+/* Forward a response generated by HAProxy (error/redirect/return). This
+ * function forwards all pending incoming data. If <final> is set to 0, nothing
+ * more is performed. It is used for 1xx informational messages. Otherwise, the
+ * transaction is terminated and the request is emptied. On success 1 is
+ * returned. If an error occurred, 0 is returned. If it fails, this function
+ * only exits. It is the caller responsibility to do the cleanup.
+ */
+int http_forward_proxy_resp(struct stream *s, int final)
+{
+ struct channel *req = &s->req;
+ struct channel *res = &s->res;
+ struct htx *htx = htxbuf(&res->buf);
+ size_t data;
+
+ if (final) {
+ htx->flags |= HTX_FL_PROXY_RESP;
+
+ if (!htx_is_empty(htx) && !http_eval_after_res_rules(s))
+ return 0;
+
+ if (s->txn->meth == HTTP_METH_HEAD)
+ htx_skip_msg_payload(htx);
+
+ channel_auto_read(req);
+ channel_abort(req);
+ channel_auto_close(req);
+ channel_htx_erase(req, htxbuf(&req->buf));
+
+ res->wex = tick_add_ifset(now_ms, res->wto);
+ channel_auto_read(res);
+ channel_auto_close(res);
+ channel_shutr_now(res);
+ res->flags |= CF_EOI; /* The response is terminated, add EOI */
+ htxbuf(&res->buf)->flags |= HTX_FL_EOM; /* no more data are expected */
+ }
+ else {
+ /* Send ASAP informational messages. Rely on CF_EOI for final
+ * response.
+ */
+ res->flags |= CF_SEND_DONTWAIT;
+ }
+
+ data = htx->data - co_data(res);
+ c_adv(res, data);
+ htx->first = -1;
+ res->total += data;
+ return 1;
+}
+
+void http_server_error(struct stream *s, struct stconn *sc, int err,
+ int finst, struct http_reply *msg)
+{
+ http_reply_and_close(s, s->txn->status, msg);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= err;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= finst;
+}
+
+void http_reply_and_close(struct stream *s, short status, struct http_reply *msg)
+{
+ if (!msg) {
+ channel_htx_truncate(&s->res, htxbuf(&s->res.buf));
+ goto end;
+ }
+
+ if (http_reply_message(s, msg) == -1) {
+ /* On error, return a 500 error message, but don't rewrite it if
+ * it is already an internal error. If it was already a "const"
+ * 500 error, just fail.
+ */
+ if (s->txn->status == 500) {
+ if (s->txn->flags & TX_CONST_REPLY)
+ goto end;
+ s->txn->flags |= TX_CONST_REPLY;
+ }
+ s->txn->status = 500;
+ s->txn->http_reply = NULL;
+ return http_reply_and_close(s, s->txn->status, http_error_message(s));
+ }
+
+end:
+ s->res.wex = tick_add_ifset(now_ms, s->res.wto);
+
+ /* At this staged, HTTP analysis is finished */
+ s->req.analysers &= AN_REQ_FLT_END;
+ s->req.analyse_exp = TICK_ETERNITY;
+
+ s->res.analysers &= AN_RES_FLT_END;
+ s->res.analyse_exp = TICK_ETERNITY;
+
+ channel_auto_read(&s->req);
+ channel_abort(&s->req);
+ channel_auto_close(&s->req);
+ channel_htx_erase(&s->req, htxbuf(&s->req.buf));
+ channel_auto_read(&s->res);
+ channel_auto_close(&s->res);
+ channel_shutr_now(&s->res);
+}
+
+struct http_reply *http_error_message(struct stream *s)
+{
+ const int msgnum = http_get_status_idx(s->txn->status);
+
+ if (s->txn->http_reply)
+ return s->txn->http_reply;
+ else if (s->be->replies[msgnum])
+ return s->be->replies[msgnum];
+ else if (strm_fe(s)->replies[msgnum])
+ return strm_fe(s)->replies[msgnum];
+ else
+ return &http_err_replies[msgnum];
+}
+
+/* Produces an HTX message from an http reply. Depending on the http reply type,
+ * a, errorfile, an raw file or a log-format string is used. On success, it
+ * returns 0. If an error occurs -1 is returned. If it fails, this function only
+ * exits. It is the caller responsibility to do the cleanup.
+ */
+int http_reply_to_htx(struct stream *s, struct htx *htx, struct http_reply *reply)
+{
+ struct buffer *errmsg;
+ struct htx_sl *sl;
+ struct buffer *body = NULL;
+ const char *status, *reason, *clen, *ctype;
+ unsigned int slflags;
+ int ret = 0;
+
+ /*
+ * - HTTP_REPLY_ERRFILES unexpected here. handled as no payload if so
+ *
+ * - HTTP_REPLY_INDIRECT: switch on another reply if defined or handled
+ * as no payload if NULL. the TXN status code is set with the status
+ * of the original reply.
+ */
+
+ if (reply->type == HTTP_REPLY_INDIRECT) {
+ if (reply->body.reply)
+ reply = reply->body.reply;
+ }
+ if (reply->type == HTTP_REPLY_ERRMSG && !reply->body.errmsg) {
+ /* get default error message */
+ if (reply == s->txn->http_reply)
+ s->txn->http_reply = NULL;
+ reply = http_error_message(s);
+ if (reply->type == HTTP_REPLY_INDIRECT) {
+ if (reply->body.reply)
+ reply = reply->body.reply;
+ }
+ }
+
+ if (reply->type == HTTP_REPLY_ERRMSG) {
+ /* implicit or explicit error message*/
+ errmsg = reply->body.errmsg;
+ if (errmsg && !b_is_null(errmsg)) {
+ if (!htx_copy_msg(htx, errmsg))
+ goto fail;
+ }
+ }
+ else {
+ /* no payload, file or log-format string */
+ if (reply->type == HTTP_REPLY_RAW) {
+ /* file */
+ body = &reply->body.obj;
+ }
+ else if (reply->type == HTTP_REPLY_LOGFMT) {
+ /* log-format string */
+ body = alloc_trash_chunk();
+ if (!body)
+ goto fail_alloc;
+ body->data = build_logline(s, body->area, body->size, &reply->body.fmt);
+ }
+ /* else no payload */
+
+ status = ultoa(reply->status);
+ reason = http_get_reason(reply->status);
+ slflags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_LEN|HTX_SL_F_CLEN);
+ if (!body || !b_data(body))
+ slflags |= HTX_SL_F_BODYLESS;
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, slflags, ist("HTTP/1.1"), ist(status), ist(reason));
+ if (!sl)
+ goto fail;
+ sl->info.res.status = reply->status;
+
+ clen = (body ? ultoa(b_data(body)) : "0");
+ ctype = reply->ctype;
+
+ if (!LIST_ISEMPTY(&reply->hdrs)) {
+ struct http_reply_hdr *hdr;
+ struct buffer *value = alloc_trash_chunk();
+
+ if (!value)
+ goto fail;
+
+ list_for_each_entry(hdr, &reply->hdrs, list) {
+ chunk_reset(value);
+ value->data = build_logline(s, value->area, value->size, &hdr->value);
+ if (b_data(value) && !htx_add_header(htx, hdr->name, ist2(b_head(value), b_data(value)))) {
+ free_trash_chunk(value);
+ goto fail;
+ }
+ chunk_reset(value);
+ }
+ free_trash_chunk(value);
+ }
+
+ if (!htx_add_header(htx, ist("content-length"), ist(clen)) ||
+ (body && b_data(body) && ctype && !htx_add_header(htx, ist("content-type"), ist(ctype))) ||
+ !htx_add_endof(htx, HTX_BLK_EOH) ||
+ (body && b_data(body) && !htx_add_data_atonce(htx, ist2(b_head(body), b_data(body)))))
+ goto fail;
+
+ htx->flags |= HTX_FL_EOM;
+ }
+
+ leave:
+ if (reply->type == HTTP_REPLY_LOGFMT)
+ free_trash_chunk(body);
+ return ret;
+
+ fail_alloc:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ /* fall through */
+ fail:
+ ret = -1;
+ goto leave;
+}
+
+/* Send an http reply to the client. On success, it returns 0. If an error
+ * occurs -1 is returned and the response channel is truncated, removing this
+ * way the faulty reply. This function may fail when the reply is formatted
+ * (http_reply_to_htx) or when the reply is forwarded
+ * (http_forward_proxy_resp). On the last case, it is because a
+ * http-after-response rule fails.
+ */
+int http_reply_message(struct stream *s, struct http_reply *reply)
+{
+ struct channel *res = &s->res;
+ struct htx *htx = htx_from_buf(&res->buf);
+
+ if (s->txn->status == -1)
+ s->txn->status = reply->status;
+ channel_htx_truncate(res, htx);
+
+ if (http_reply_to_htx(s, htx, reply) == -1)
+ goto fail;
+
+ htx_to_buf(htx, &s->res.buf);
+ if (!http_forward_proxy_resp(s, 1))
+ goto fail;
+ return 0;
+
+ fail:
+ channel_htx_truncate(res, htx);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ return -1;
+}
+
+/* Return the error message corresponding to s->conn_err_type. It is assumed
+ * that the server side is closed. Note that err_type is actually a
+ * bitmask, where almost only aborts may be cumulated with other
+ * values. We consider that aborted operations are more important
+ * than timeouts or errors due to the fact that nobody else in the
+ * logs might explain incomplete retries. All others should avoid
+ * being cumulated. It should normally not be possible to have multiple
+ * aborts at once, but just in case, the first one in sequence is reported.
+ * Note that connection errors appearing on the second request of a keep-alive
+ * connection are not reported since this allows the client to retry.
+ */
+void http_return_srv_error(struct stream *s, struct stconn *sc)
+{
+ int err_type = s->conn_err_type;
+
+ /* set s->txn->status for http_error_message(s) */
+ if (err_type & STRM_ET_QUEUE_ABRT) {
+ s->txn->status = -1;
+ http_server_error(s, sc, SF_ERR_CLICL, SF_FINST_Q, NULL);
+ }
+ else if (err_type & STRM_ET_CONN_ABRT) {
+ s->txn->status = -1;
+ http_server_error(s, sc, SF_ERR_CLICL, SF_FINST_C, NULL);
+ }
+ else if (err_type & STRM_ET_QUEUE_TO) {
+ s->txn->status = 503;
+ http_server_error(s, sc, SF_ERR_SRVTO, SF_FINST_Q,
+ http_error_message(s));
+ }
+ else if (err_type & STRM_ET_QUEUE_ERR) {
+ s->txn->status = 503;
+ http_server_error(s, sc, SF_ERR_SRVCL, SF_FINST_Q,
+ http_error_message(s));
+ }
+ else if (err_type & STRM_ET_CONN_TO) {
+ s->txn->status = 503;
+ http_server_error(s, sc, SF_ERR_SRVTO, SF_FINST_C,
+ (s->txn->flags & TX_NOT_FIRST) ? NULL :
+ http_error_message(s));
+ }
+ else if (err_type & STRM_ET_CONN_ERR) {
+ s->txn->status = 503;
+ http_server_error(s, sc, SF_ERR_SRVCL, SF_FINST_C,
+ (s->flags & SF_SRV_REUSED) ? NULL :
+ http_error_message(s));
+ }
+ else if (err_type & STRM_ET_CONN_RES) {
+ s->txn->status = 503;
+ http_server_error(s, sc, SF_ERR_RESOURCE, SF_FINST_C,
+ (s->txn->flags & TX_NOT_FIRST) ? NULL :
+ http_error_message(s));
+ }
+ else { /* STRM_ET_CONN_OTHER and others */
+ s->txn->status = 500;
+ http_server_error(s, sc, SF_ERR_INTERNAL, SF_FINST_C,
+ http_error_message(s));
+ }
+}
+
+
+/* Handle Expect: 100-continue for HTTP/1.1 messages if necessary. It returns 0
+ * on success and -1 on error.
+ */
+static int http_handle_expect_hdr(struct stream *s, struct htx *htx, struct http_msg *msg)
+{
+ /* If we have HTTP/1.1 message with a body and Expect: 100-continue,
+ * then we must send an HTTP/1.1 100 Continue intermediate response.
+ */
+ if (msg->msg_state == HTTP_MSG_BODY && (msg->flags & HTTP_MSGF_VER_11) &&
+ (msg->flags & (HTTP_MSGF_CNT_LEN|HTTP_MSGF_TE_CHNK))) {
+ struct ist hdr = { .ptr = "Expect", .len = 6 };
+ struct http_hdr_ctx ctx;
+
+ ctx.blk = NULL;
+ /* Expect is allowed in 1.1, look for it */
+ if (http_find_header(htx, hdr, &ctx, 0) &&
+ unlikely(isteqi(ctx.value, ist2("100-continue", 12)))) {
+ if (http_reply_100_continue(s) == -1)
+ return -1;
+ http_remove_header(htx, &ctx);
+ }
+ }
+ return 0;
+}
+
+/* Send a 100-Continue response to the client. It returns 0 on success and -1
+ * on error. The response channel is updated accordingly.
+ */
+static int http_reply_100_continue(struct stream *s)
+{
+ struct channel *res = &s->res;
+ struct htx *htx = htx_from_buf(&res->buf);
+ struct htx_sl *sl;
+ unsigned int flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|
+ HTX_SL_F_XFER_LEN|HTX_SL_F_BODYLESS);
+
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags,
+ ist("HTTP/1.1"), ist("100"), ist("Continue"));
+ if (!sl)
+ goto fail;
+ sl->info.res.status = 100;
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto fail;
+
+ if (!http_forward_proxy_resp(s, 0))
+ goto fail;
+ return 0;
+
+ fail:
+ /* If an error occurred, remove the incomplete HTTP response from the
+ * buffer */
+ channel_htx_truncate(res, htx);
+ return -1;
+}
+
+
+/*
+ * Capture headers from message <htx> according to header list <cap_hdr>, and
+ * fill the <cap> pointers appropriately.
+ */
+static void http_capture_headers(struct htx *htx, char **cap, struct cap_hdr *cap_hdr)
+{
+ struct cap_hdr *h;
+ int32_t pos;
+
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ struct ist n, v;
+
+ if (type == HTX_BLK_EOH)
+ break;
+ if (type != HTX_BLK_HDR)
+ continue;
+
+ n = htx_get_blk_name(htx, blk);
+
+ for (h = cap_hdr; h; h = h->next) {
+ if (h->namelen && (h->namelen == n.len) &&
+ (strncasecmp(n.ptr, h->name, h->namelen) == 0)) {
+ if (cap[h->index] == NULL)
+ cap[h->index] =
+ pool_alloc(h->pool);
+
+ if (cap[h->index] == NULL) {
+ ha_alert("HTTP capture : out of memory.\n");
+ break;
+ }
+
+ v = htx_get_blk_value(htx, blk);
+ v = isttrim(v, h->len);
+
+ memcpy(cap[h->index], v.ptr, v.len);
+ cap[h->index][v.len]=0;
+ }
+ }
+ }
+}
+
+/* Delete a value in a header between delimiters <from> and <next>. The header
+ * itself is delimited by <start> and <end> pointers. The number of characters
+ * displaced is returned, and the pointer to the first delimiter is updated if
+ * required. The function tries as much as possible to respect the following
+ * principles :
+ * - replace <from> delimiter by the <next> one unless <from> points to <start>,
+ * in which case <next> is simply removed
+ * - set exactly one space character after the new first delimiter, unless there
+ * are not enough characters in the block being moved to do so.
+ * - remove unneeded spaces before the previous delimiter and after the new
+ * one.
+ *
+ * It is the caller's responsibility to ensure that :
+ * - <from> points to a valid delimiter or <start> ;
+ * - <next> points to a valid delimiter or <end> ;
+ * - there are non-space chars before <from>.
+ */
+static int http_del_hdr_value(char *start, char *end, char **from, char *next)
+{
+ char *prev = *from;
+
+ if (prev == start) {
+ /* We're removing the first value. eat the semicolon, if <next>
+ * is lower than <end> */
+ if (next < end)
+ next++;
+
+ while (next < end && HTTP_IS_SPHT(*next))
+ next++;
+ }
+ else {
+ /* Remove useless spaces before the old delimiter. */
+ while (HTTP_IS_SPHT(*(prev-1)))
+ prev--;
+ *from = prev;
+
+ /* copy the delimiter and if possible a space if we're
+ * not at the end of the line.
+ */
+ if (next < end) {
+ *prev++ = *next++;
+ if (prev + 1 < next)
+ *prev++ = ' ';
+ while (next < end && HTTP_IS_SPHT(*next))
+ next++;
+ }
+ }
+ memmove(prev, next, end - next);
+ return (prev - next);
+}
+
+
+/* Formats the start line of the request (without CRLF) and puts it in <str> and
+ * return the written length. The line can be truncated if it exceeds <len>.
+ */
+static size_t http_fmt_req_line(const struct htx_sl *sl, char *str, size_t len)
+{
+ struct ist dst = ist2(str, 0);
+
+ if (istcat(&dst, htx_sl_req_meth(sl), len) == -1)
+ goto end;
+ if (dst.len + 1 > len)
+ goto end;
+ dst.ptr[dst.len++] = ' ';
+
+ if (istcat(&dst, htx_sl_req_uri(sl), len) == -1)
+ goto end;
+ if (dst.len + 1 > len)
+ goto end;
+ dst.ptr[dst.len++] = ' ';
+
+ istcat(&dst, htx_sl_req_vsn(sl), len);
+ end:
+ return dst.len;
+}
+
+/*
+ * Print a debug line with a start line.
+ */
+static void http_debug_stline(const char *dir, struct stream *s, const struct htx_sl *sl)
+{
+ struct session *sess = strm_sess(s);
+ int max;
+
+ chunk_printf(&trash, "%08x:%s.%s[%04x:%04x]: ", s->uniq_id, s->be->id,
+ dir,
+ objt_conn(sess->origin) ? (unsigned short)__objt_conn(sess->origin)->handle.fd : -1,
+ sc_conn(s->scb) ? (unsigned short)(__sc_conn(s->scb))->handle.fd : -1);
+
+ max = HTX_SL_P1_LEN(sl);
+ UBOUND(max, trash.size - trash.data - 3);
+ chunk_memcat(&trash, HTX_SL_P1_PTR(sl), max);
+ trash.area[trash.data++] = ' ';
+
+ max = HTX_SL_P2_LEN(sl);
+ UBOUND(max, trash.size - trash.data - 2);
+ chunk_memcat(&trash, HTX_SL_P2_PTR(sl), max);
+ trash.area[trash.data++] = ' ';
+
+ max = HTX_SL_P3_LEN(sl);
+ UBOUND(max, trash.size - trash.data - 1);
+ chunk_memcat(&trash, HTX_SL_P3_PTR(sl), max);
+ trash.area[trash.data++] = '\n';
+
+ DISGUISE(write(1, trash.area, trash.data));
+}
+
+/*
+ * Print a debug line with a header.
+ */
+static void http_debug_hdr(const char *dir, struct stream *s, const struct ist n, const struct ist v)
+{
+ struct session *sess = strm_sess(s);
+ int max;
+
+ chunk_printf(&trash, "%08x:%s.%s[%04x:%04x]: ", s->uniq_id, s->be->id,
+ dir,
+ objt_conn(sess->origin) ? (unsigned short)__objt_conn(sess->origin)->handle.fd : -1,
+ sc_conn(s->scb) ? (unsigned short)(__sc_conn(s->scb))->handle.fd : -1);
+
+ max = n.len;
+ UBOUND(max, trash.size - trash.data - 3);
+ chunk_memcat(&trash, n.ptr, max);
+ trash.area[trash.data++] = ':';
+ trash.area[trash.data++] = ' ';
+
+ max = v.len;
+ UBOUND(max, trash.size - trash.data - 1);
+ chunk_memcat(&trash, v.ptr, max);
+ trash.area[trash.data++] = '\n';
+
+ DISGUISE(write(1, trash.area, trash.data));
+}
+
+void http_txn_reset_req(struct http_txn *txn)
+{
+ txn->req.flags = 0;
+ txn->req.msg_state = HTTP_MSG_RQBEFORE; /* at the very beginning of the request */
+}
+
+void http_txn_reset_res(struct http_txn *txn)
+{
+ txn->rsp.flags = 0;
+ txn->rsp.msg_state = HTTP_MSG_RPBEFORE; /* at the very beginning of the response */
+}
+
+/*
+ * Create and initialize a new HTTP transaction for stream <s>. This should be
+ * used before processing any new request. It returns the transaction or NLULL
+ * on error.
+ */
+struct http_txn *http_create_txn(struct stream *s)
+{
+ struct http_txn *txn;
+ struct stconn *sc = s->scf;
+
+ txn = pool_alloc(pool_head_http_txn);
+ if (!txn)
+ return NULL;
+ s->txn = txn;
+
+ txn->meth = HTTP_METH_OTHER;
+ txn->flags = ((sc && sc_ep_test(sc, SE_FL_NOT_FIRST)) ? TX_NOT_FIRST : 0);
+ txn->status = -1;
+ txn->http_reply = NULL;
+ txn->l7_buffer = BUF_NULL;
+ write_u32(txn->cache_hash, 0);
+
+ txn->cookie_first_date = 0;
+ txn->cookie_last_date = 0;
+
+ txn->srv_cookie = NULL;
+ txn->cli_cookie = NULL;
+ txn->uri = NULL;
+
+ http_txn_reset_req(txn);
+ http_txn_reset_res(txn);
+
+ txn->req.chn = &s->req;
+ txn->rsp.chn = &s->res;
+
+ txn->auth.method = HTTP_AUTH_UNKNOWN;
+
+ /* here we don't want to re-initialize s->vars_txn and s->vars_reqres
+ * variable lists, because they were already initialized upon stream
+ * creation in stream_new(), and thus may already contain some variables
+ */
+
+ return txn;
+}
+
+/* to be used at the end of a transaction */
+void http_destroy_txn(struct stream *s)
+{
+ struct http_txn *txn = s->txn;
+
+ /* these ones will have been dynamically allocated */
+ pool_free(pool_head_requri, txn->uri);
+ pool_free(pool_head_capture, txn->cli_cookie);
+ pool_free(pool_head_capture, txn->srv_cookie);
+ pool_free(pool_head_uniqueid, s->unique_id.ptr);
+
+ s->unique_id = IST_NULL;
+ txn->uri = NULL;
+ txn->srv_cookie = NULL;
+ txn->cli_cookie = NULL;
+
+ if (!LIST_ISEMPTY(&s->vars_txn.head))
+ vars_prune(&s->vars_txn, s->sess, s);
+ if (!LIST_ISEMPTY(&s->vars_reqres.head))
+ vars_prune(&s->vars_reqres, s->sess, s);
+
+ b_free(&txn->l7_buffer);
+
+ pool_free(pool_head_http_txn, txn);
+ s->txn = NULL;
+}
+
+
+DECLARE_POOL(pool_head_http_txn, "http_txn", sizeof(struct http_txn));
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/http_client.c b/src/http_client.c
new file mode 100644
index 0000000..d18dee2
--- /dev/null
+++ b/src/http_client.c
@@ -0,0 +1,1429 @@
+/*
+ * HTTP Client
+ *
+ * Copyright (C) 2021 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This file implements an HTTP Client API.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cli.h>
+#include <haproxy/ssl_ckch.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/global.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/h1_htx.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana-t.h>
+#include <haproxy/http_client.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/htx.h>
+#include <haproxy/log.h>
+#include <haproxy/proxy.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/ssl_sock-t.h>
+#include <haproxy/sock_inet.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+
+#include <string.h>
+
+
+static struct proxy *httpclient_proxy;
+static struct server *httpclient_srv_raw;
+
+#ifdef USE_OPENSSL
+/* if the httpclient is not configured, error are ignored and features are limited */
+static int hard_error_ssl = 0;
+static struct server *httpclient_srv_ssl;
+static int httpclient_ssl_verify = SSL_SOCK_VERIFY_REQUIRED;
+static char *httpclient_ssl_ca_file = NULL;
+#endif
+static struct applet httpclient_applet;
+
+/* if the httpclient is not configured, error are ignored and features are limited */
+static int hard_error_resolvers = 0;
+static char *resolvers_id = NULL;
+static char *resolvers_prefer = NULL;
+
+/* --- This part of the file implement an HTTP client over the CLI ---
+ * The functions will be starting by "hc_cli" for "httpclient cli"
+ */
+
+/* What kind of data we need to read */
+#define HC_CLI_F_RES_STLINE 0x01
+#define HC_CLI_F_RES_HDR 0x02
+#define HC_CLI_F_RES_BODY 0x04
+#define HC_CLI_F_RES_END 0x08
+
+/* the CLI context for the httpclient command */
+struct hcli_svc_ctx {
+ struct httpclient *hc; /* the httpclient instance */
+ uint flags; /* flags from HC_CLI_F_* above */
+};
+
+/* These are the callback used by the HTTP Client when it needs to notify new
+ * data, we only sets a flag in the IO handler via the svcctx.
+ */
+void hc_cli_res_stline_cb(struct httpclient *hc)
+{
+ struct appctx *appctx = hc->caller;
+ struct hcli_svc_ctx *ctx;
+
+ if (!appctx)
+ return;
+
+ ctx = appctx->svcctx;
+ ctx->flags |= HC_CLI_F_RES_STLINE;
+ appctx_wakeup(appctx);
+}
+
+void hc_cli_res_headers_cb(struct httpclient *hc)
+{
+ struct appctx *appctx = hc->caller;
+ struct hcli_svc_ctx *ctx;
+
+ if (!appctx)
+ return;
+
+ ctx = appctx->svcctx;
+ ctx->flags |= HC_CLI_F_RES_HDR;
+ appctx_wakeup(appctx);
+}
+
+void hc_cli_res_body_cb(struct httpclient *hc)
+{
+ struct appctx *appctx = hc->caller;
+ struct hcli_svc_ctx *ctx;
+
+ if (!appctx)
+ return;
+
+ ctx = appctx->svcctx;
+ ctx->flags |= HC_CLI_F_RES_BODY;
+ appctx_wakeup(appctx);
+}
+
+void hc_cli_res_end_cb(struct httpclient *hc)
+{
+ struct appctx *appctx = hc->caller;
+ struct hcli_svc_ctx *ctx;
+
+ if (!appctx)
+ return;
+
+ ctx = appctx->svcctx;
+ ctx->flags |= HC_CLI_F_RES_END;
+ appctx_wakeup(appctx);
+}
+
+/*
+ * Parse an httpclient keyword on the cli:
+ * httpclient <ID> <method> <URI>
+ */
+static int hc_cli_parse(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct hcli_svc_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct httpclient *hc;
+ char *err = NULL;
+ enum http_meth_t meth;
+ char *meth_str;
+ struct ist uri;
+ struct ist body = IST_NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[1] || !*args[2]) {
+ memprintf(&err, ": not enough parameters");
+ goto err;
+ }
+
+ meth_str = args[1];
+ uri = ist(args[2]);
+
+ if (payload)
+ body = ist(payload);
+
+ meth = find_http_meth(meth_str, strlen(meth_str));
+
+ hc = httpclient_new(appctx, meth, uri);
+ if (!hc) {
+ goto err;
+ }
+
+ /* update the httpclient callbacks */
+ hc->ops.res_stline = hc_cli_res_stline_cb;
+ hc->ops.res_headers = hc_cli_res_headers_cb;
+ hc->ops.res_payload = hc_cli_res_body_cb;
+ hc->ops.res_end = hc_cli_res_end_cb;
+
+ ctx->hc = hc; /* store the httpclient ptr in the applet */
+ ctx->flags = 0;
+
+ if (httpclient_req_gen(hc, hc->req.url, hc->req.meth, NULL, body) != ERR_NONE)
+ goto err;
+
+
+ if (!httpclient_start(hc))
+ goto err;
+
+ return 0;
+
+err:
+ memprintf(&err, "Can't start the HTTP client%s.\n", err ? err : "");
+ return cli_err(appctx, err);
+}
+
+/* This function dumps the content of the httpclient receive buffer
+ * on the CLI output
+ *
+ * Return 1 when the processing is finished
+ * return 0 if it needs to be called again
+ */
+static int hc_cli_io_handler(struct appctx *appctx)
+{
+ struct hcli_svc_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct httpclient *hc = ctx->hc;
+ struct http_hdr *hdrs, *hdr;
+
+ if (ctx->flags & HC_CLI_F_RES_STLINE) {
+ chunk_printf(&trash, "%.*s %d %.*s\n", (unsigned int)istlen(hc->res.vsn), istptr(hc->res.vsn),
+ hc->res.status, (unsigned int)istlen(hc->res.reason), istptr(hc->res.reason));
+ if (applet_putchk(appctx, &trash) == -1)
+ goto more;
+ ctx->flags &= ~HC_CLI_F_RES_STLINE;
+ }
+
+ if (ctx->flags & HC_CLI_F_RES_HDR) {
+ chunk_reset(&trash);
+ hdrs = hc->res.hdrs;
+ for (hdr = hdrs; isttest(hdr->v); hdr++) {
+ if (!h1_format_htx_hdr(hdr->n, hdr->v, &trash))
+ goto too_many_hdrs;
+ }
+ if (!chunk_memcat(&trash, "\r\n", 2))
+ goto too_many_hdrs;
+ if (applet_putchk(appctx, &trash) == -1)
+ goto more;
+ ctx->flags &= ~HC_CLI_F_RES_HDR;
+ }
+
+ if (ctx->flags & HC_CLI_F_RES_BODY) {
+ int ret;
+
+ ret = httpclient_res_xfer(hc, sc_ib(sc));
+ channel_add_input(sc_ic(sc), ret); /* forward what we put in the buffer channel */
+
+ /* remove the flag if the buffer was emptied */
+ if (httpclient_data(hc))
+ goto more;
+ ctx->flags &= ~HC_CLI_F_RES_BODY;
+ }
+
+ /* we must close only if F_END is the last flag */
+ if (ctx->flags == HC_CLI_F_RES_END) {
+ ctx->flags &= ~HC_CLI_F_RES_END;
+ goto end;
+ }
+
+more:
+ if (!ctx->flags)
+ applet_have_no_more_data(appctx);
+ return 0;
+end:
+ return 1;
+
+too_many_hdrs:
+ return cli_err(appctx, "Too many headers.\n");
+}
+
+static void hc_cli_release(struct appctx *appctx)
+{
+ struct hcli_svc_ctx *ctx = appctx->svcctx;
+ struct httpclient *hc = ctx->hc;
+
+ /* Everything possible was printed on the CLI, we can destroy the client */
+ httpclient_stop_and_destroy(hc);
+
+ return;
+}
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "httpclient", NULL }, "httpclient <method> <URI> : launch an HTTP request", hc_cli_parse, hc_cli_io_handler, hc_cli_release, NULL, ACCESS_EXPERT},
+ { { NULL }, NULL, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+
+/* --- This part of the file implements the actual HTTP client API --- */
+
+/*
+ * Generate a simple request and fill the httpclient request buffer with it.
+ * The request contains a request line generated from the absolute <url> and
+ * <meth> as well as list of headers <hdrs>.
+ *
+ * If the buffer was filled correctly the function returns 0, if not it returns
+ * an error_code but there is no guarantee that the buffer wasn't modified.
+ */
+int httpclient_req_gen(struct httpclient *hc, const struct ist url, enum http_meth_t meth, const struct http_hdr *hdrs, const struct ist payload)
+{
+ struct htx_sl *sl;
+ struct htx *htx;
+ int err_code = 0;
+ struct ist meth_ist, vsn;
+ unsigned int flags = HTX_SL_F_VER_11 | HTX_SL_F_NORMALIZED_URI | HTX_SL_F_HAS_SCHM;
+ int i;
+ int foundhost = 0, foundaccept = 0, foundua = 0;
+
+ if (!b_alloc(&hc->req.buf))
+ goto error;
+
+ if (meth >= HTTP_METH_OTHER)
+ goto error;
+
+ meth_ist = http_known_methods[meth];
+
+ vsn = ist("HTTP/1.1");
+
+ htx = htx_from_buf(&hc->req.buf);
+ if (!htx)
+ goto error;
+
+ if (!hc->ops.req_payload && !isttest(payload))
+ flags |= HTX_SL_F_BODYLESS;
+
+ sl = htx_add_stline(htx, HTX_BLK_REQ_SL, flags, meth_ist, url, vsn);
+ if (!sl) {
+ goto error;
+ }
+ sl->info.req.meth = meth;
+
+ for (i = 0; hdrs && hdrs[i].n.len; i++) {
+ /* Don't check the value length because a header value may be empty */
+ if (isttest(hdrs[i].v) == 0)
+ continue;
+
+ if (isteqi(hdrs[i].n, ist("host")))
+ foundhost = 1;
+ else if (isteqi(hdrs[i].n, ist("accept")))
+ foundaccept = 1;
+ else if (isteqi(hdrs[i].n, ist("user-agent")))
+ foundua = 1;
+
+ if (!htx_add_header(htx, hdrs[i].n, hdrs[i].v))
+ goto error;
+ }
+
+ if (!foundhost) {
+ /* Add Host Header from URL */
+ if (!htx_add_header(htx, ist("Host"), ist("h")))
+ goto error;
+ if (!http_update_host(htx, sl, url))
+ goto error;
+ }
+
+ if (!foundaccept) {
+ if (!htx_add_header(htx, ist("Accept"), ist("*/*")))
+ goto error;
+ }
+
+ if (!foundua) {
+ if (!htx_add_header(htx, ist("User-Agent"), ist(HTTPCLIENT_USERAGENT)))
+ goto error;
+ }
+
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto error;
+
+ if (isttest(payload)) {
+ /* add the payload if it can feat in the buffer, no need to set
+ * the Content-Length, the data will be sent chunked */
+ if (!htx_add_data_atonce(htx, payload))
+ goto error;
+ }
+
+ /* If req.payload was set, does not set the end of stream which *MUST*
+ * be set in the callback */
+ if (!hc->ops.req_payload)
+ htx->flags |= HTX_FL_EOM;
+
+ htx_to_buf(htx, &hc->req.buf);
+
+ return 0;
+error:
+ err_code |= ERR_ALERT | ERR_ABORT;
+ return err_code;
+}
+
+/*
+ * transfer the response to the destination buffer and wakeup the HTTP client
+ * applet so it could fill again its buffer.
+ *
+ * Return the number of bytes transferred.
+ */
+int httpclient_res_xfer(struct httpclient *hc, struct buffer *dst)
+{
+ size_t room = b_room(dst);
+ int ret;
+
+ ret = b_force_xfer(dst, &hc->res.buf, MIN(room, b_data(&hc->res.buf)));
+ /* call the client once we consumed all data */
+ if (!b_data(&hc->res.buf)) {
+ b_free(&hc->res.buf);
+ if (hc->appctx)
+ appctx_wakeup(hc->appctx);
+ }
+ return ret;
+}
+
+/*
+ * Transfer raw HTTP payload from src, and insert it into HTX format in the
+ * httpclient.
+ *
+ * Must be used to transfer the request body.
+ * Then wakeup the httpclient so it can transfer it.
+ *
+ * <end> tries to add the ending data flag if it succeed to copy all data.
+ *
+ * Return the number of bytes copied from src.
+ */
+int httpclient_req_xfer(struct httpclient *hc, struct ist src, int end)
+{
+ int ret = 0;
+ struct htx *htx;
+
+ if (!b_alloc(&hc->req.buf))
+ goto error;
+
+ htx = htx_from_buf(&hc->req.buf);
+ if (!htx)
+ goto error;
+
+ if (hc->appctx)
+ appctx_wakeup(hc->appctx);
+
+ ret += htx_add_data(htx, src);
+
+
+ /* if we copied all the data and the end flag is set */
+ if ((istlen(src) == ret) && end) {
+ /* no more data are expected. If the HTX buffer is empty, be
+ * sure to add something (EOT block in this case) to have
+ * something to send. It is important to be sure the EOM flags
+ * will be handled by the endpoint. Because the message is
+ * empty, this should not fail. Otherwise it is an error
+ */
+ if (htx_is_empty(htx)) {
+ if (!htx_add_endof(htx, HTX_BLK_EOT))
+ goto error;
+ }
+ htx->flags |= HTX_FL_EOM;
+ }
+ htx_to_buf(htx, &hc->req.buf);
+
+error:
+
+ return ret;
+}
+
+/* Set the 'timeout server' in ms for the next httpclient request */
+void httpclient_set_timeout(struct httpclient *hc, int timeout)
+{
+ hc->timeout_server = timeout;
+}
+
+/*
+ * Sets a destination for the httpclient from an HAProxy addr format
+ * This will prevent to determine the destination from the URL
+ * Return 0 in case of success or -1 otherwise.
+ */
+int httpclient_set_dst(struct httpclient *hc, const char *dst)
+{
+ struct sockaddr_storage *sk;
+ char *errmsg = NULL;
+
+ sockaddr_free(&hc->dst);
+ /* 'sk' is statically allocated (no need to be freed). */
+ sk = str2sa_range(dst, NULL, NULL, NULL, NULL, NULL,
+ &errmsg, NULL, NULL,
+ PA_O_PORT_OK | PA_O_STREAM | PA_O_XPRT | PA_O_CONNECT);
+ if (!sk) {
+ ha_alert("httpclient: Failed to parse destination address in %s\n", errmsg);
+ free(errmsg);
+ return -1;
+ }
+
+ if (!sockaddr_alloc(&hc->dst, sk, sizeof(*sk))) {
+ ha_alert("httpclient: Failed to allocate sockaddr in %s:%d.\n", __FUNCTION__, __LINE__);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Split <url> in <scheme>, <host>, <port>
+ */
+static int httpclient_spliturl(struct ist url, enum http_scheme *scheme,
+ struct ist *host, int *port)
+{
+ enum http_scheme scheme_tmp = SCH_HTTP;
+ int port_tmp = 0;
+ struct ist scheme_ist, authority_ist, host_ist, port_ist;
+ char *p, *end;
+ struct http_uri_parser parser;
+
+ parser = http_uri_parser_init(url);
+ scheme_ist = http_parse_scheme(&parser);
+ if (!isttest(scheme_ist)) {
+ return 0;
+ }
+
+ if (isteqi(scheme_ist, ist("http://"))){
+ scheme_tmp = SCH_HTTP;
+ port_tmp = 80;
+ } else if (isteqi(scheme_ist, ist("https://"))) {
+ scheme_tmp = SCH_HTTPS;
+ port_tmp = 443;
+ }
+
+ authority_ist = http_parse_authority(&parser, 1);
+ if (!isttest(authority_ist)) {
+ return 0;
+ }
+ p = end = istend(authority_ist);
+
+ /* look for a port at the end of the authority */
+ while (p > istptr(authority_ist) && isdigit((unsigned char)*--p))
+ ;
+
+ if (*p == ':') {
+ host_ist = ist2(istptr(authority_ist), p - istptr(authority_ist));
+ port_ist = istnext(ist2(p, end - p));
+ ist2str(trash.area, port_ist);
+ port_tmp = atoi(trash.area);
+ } else {
+ host_ist = authority_ist;
+ }
+
+ if (scheme)
+ *scheme = scheme_tmp;
+ if (host)
+ *host = host_ist;
+ if (port)
+ *port = port_tmp;
+
+ return 1;
+}
+
+/*
+ * Start the HTTP client
+ * Create the appctx, session, stream and wakeup the applet
+ *
+ * Return the <appctx> or NULL if it failed
+ */
+struct appctx *httpclient_start(struct httpclient *hc)
+{
+ struct applet *applet = &httpclient_applet;
+ struct appctx *appctx;
+
+ /* if the client was started and not ended, an applet is already
+ * running, we shouldn't try anything */
+ if (httpclient_started(hc) && !httpclient_ended(hc))
+ return NULL;
+
+ /* The HTTP client will be created in the same thread as the caller,
+ * avoiding threading issues */
+ appctx = appctx_new_here(applet, NULL);
+ if (!appctx)
+ goto out;
+ appctx->svcctx = hc;
+ hc->flags = 0;
+
+ if (appctx_init(appctx) == -1) {
+ ha_alert("httpclient: Failed to initialize appctx %s:%d.\n", __FUNCTION__, __LINE__);
+ goto out_free_appctx;
+ }
+
+ return appctx;
+
+out_free_appctx:
+ appctx_free_on_early_error(appctx);
+out:
+
+ return NULL;
+}
+
+/*
+ * This function tries to destroy the httpclient if it wasn't running.
+ * If it was running, stop the client and ask it to autodestroy itself.
+ *
+ * Once this function is used, all pointer sto the client must be removed
+ *
+ */
+void httpclient_stop_and_destroy(struct httpclient *hc)
+{
+
+ /* The httpclient was already stopped or never started, we can safely destroy it */
+ if (hc->flags & HTTPCLIENT_FS_ENDED || !(hc->flags & HTTPCLIENT_FS_STARTED)) {
+ httpclient_destroy(hc);
+ } else {
+ /* if the client wasn't stopped, ask for a stop and destroy */
+ hc->flags |= (HTTPCLIENT_FA_AUTOKILL | HTTPCLIENT_FA_STOP);
+ /* the calling applet doesn't exist anymore */
+ hc->caller = NULL;
+ if (hc->appctx)
+ appctx_wakeup(hc->appctx);
+ }
+}
+
+/* Free the httpclient */
+void httpclient_destroy(struct httpclient *hc)
+{
+ struct http_hdr *hdrs;
+
+
+ if (!hc)
+ return;
+
+ /* we should never destroy a client which was started but not stopped */
+ BUG_ON(httpclient_started(hc) && !httpclient_ended(hc));
+
+ /* request */
+ istfree(&hc->req.url);
+ b_free(&hc->req.buf);
+ /* response */
+ istfree(&hc->res.vsn);
+ istfree(&hc->res.reason);
+ hdrs = hc->res.hdrs;
+ while (hdrs && isttest(hdrs->n)) {
+ istfree(&hdrs->n);
+ istfree(&hdrs->v);
+ hdrs++;
+ }
+ ha_free(&hc->res.hdrs);
+ b_free(&hc->res.buf);
+ sockaddr_free(&hc->dst);
+
+ free(hc);
+
+ return;
+}
+
+/* Allocate an httpclient and its buffers
+ * Return NULL on failure */
+struct httpclient *httpclient_new(void *caller, enum http_meth_t meth, struct ist url)
+{
+ struct httpclient *hc;
+
+ hc = calloc(1, sizeof(*hc));
+ if (!hc)
+ goto err;
+
+ hc->req.buf = BUF_NULL;
+ hc->res.buf = BUF_NULL;
+ hc->caller = caller;
+ hc->req.url = istdup(url);
+ hc->req.meth = meth;
+
+ return hc;
+
+err:
+ httpclient_destroy(hc);
+ return NULL;
+}
+
+static void httpclient_applet_io_handler(struct appctx *appctx)
+{
+ struct httpclient *hc = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct stream *s = __sc_strm(sc);
+ struct channel *req = &s->req;
+ struct channel *res = &s->res;
+ struct htx_blk *blk = NULL;
+ struct htx *htx;
+ struct htx_sl *sl = NULL;
+ uint32_t hdr_num;
+ uint32_t sz;
+ int ret;
+
+ /* The IO handler could be called after the release, so we need to
+ * check if hc is still there to run the IO handler */
+ if (!hc)
+ return;
+
+ while (1) {
+
+ /* required to stop */
+ if (hc->flags & HTTPCLIENT_FA_STOP)
+ goto end;
+
+ switch(appctx->st0) {
+
+ case HTTPCLIENT_S_REQ:
+ /* we know that the buffer is empty here, since
+ * it's the first call, we can freely copy the
+ * request from the httpclient buffer */
+ ret = b_xfer(&req->buf, &hc->req.buf, b_data(&hc->req.buf));
+ if (!ret)
+ goto more;
+
+ if (!b_data(&hc->req.buf))
+ b_free(&hc->req.buf);
+
+ htx = htx_from_buf(&req->buf);
+ if (!htx)
+ goto more;
+
+ channel_add_input(req, htx->data);
+
+ if (htx->flags & HTX_FL_EOM) /* check if a body need to be added */
+ appctx->st0 = HTTPCLIENT_S_RES_STLINE;
+ else
+ appctx->st0 = HTTPCLIENT_S_REQ_BODY;
+
+ goto more; /* we need to leave the IO handler once we wrote the request */
+ break;
+ case HTTPCLIENT_S_REQ_BODY:
+ /* call the payload callback */
+ {
+ if (hc->ops.req_payload) {
+ struct htx *hc_htx;
+
+ /* call the request callback */
+ hc->ops.req_payload(hc);
+
+ hc_htx = htx_from_buf(&hc->req.buf);
+ htx = htx_from_buf(&req->buf);
+
+ if (htx_is_empty(hc_htx))
+ goto more;
+
+ if (htx_is_empty(htx)) {
+ size_t data = hc_htx->data;
+
+ /* Here htx_to_buf() will set buffer data to 0 because
+ * the HTX is empty, and allow us to do an xfer.
+ */
+ htx_to_buf(hc_htx, &hc->req.buf);
+ htx_to_buf(htx, &req->buf);
+ b_xfer(&req->buf, &hc->req.buf, b_data(&hc->req.buf));
+ channel_add_input(req, data);
+ } else {
+ struct htx_ret ret;
+
+ ret = htx_xfer_blks(htx, hc_htx, htx_used_space(hc_htx), HTX_BLK_UNUSED);
+ channel_add_input(req, ret.ret);
+
+ /* we must copy the EOM if we empty the buffer */
+ if (htx_is_empty(hc_htx)) {
+ htx->flags |= (hc_htx->flags & HTX_FL_EOM);
+ }
+ htx_to_buf(htx, &req->buf);
+ htx_to_buf(hc_htx, &hc->req.buf);
+ }
+
+
+ if (!b_data(&hc->req.buf))
+ b_free(&hc->req.buf);
+ }
+
+ htx = htx_from_buf(&req->buf);
+ if (!htx)
+ goto more;
+
+ /* if the request contains the HTX_FL_EOM, we finished the request part. */
+ if (htx->flags & HTX_FL_EOM) {
+ req->flags |= CF_EOI;
+ se_fl_set(appctx->sedesc, SE_FL_EOI);
+ appctx->st0 = HTTPCLIENT_S_RES_STLINE;
+ }
+
+ goto process_data; /* we need to leave the IO handler once we wrote the request */
+ }
+ break;
+
+ case HTTPCLIENT_S_RES_STLINE:
+ /* copy the start line in the hc structure,then remove the htx block */
+ if (!co_data(res))
+ goto more;
+ htx = htxbuf(&res->buf);
+ if (!htx)
+ goto more;
+ blk = htx_get_head_blk(htx);
+ if (blk && (htx_get_blk_type(blk) == HTX_BLK_RES_SL))
+ sl = htx_get_blk_ptr(htx, blk);
+ if (!sl || (!(sl->flags & HTX_SL_F_IS_RESP)))
+ goto more;
+
+ /* copy the status line in the httpclient */
+ hc->res.status = sl->info.res.status;
+ hc->res.vsn = istdup(htx_sl_res_vsn(sl));
+ hc->res.reason = istdup(htx_sl_res_reason(sl));
+ sz = htx_get_blksz(blk);
+ c_rew(res, sz);
+ htx_remove_blk(htx, blk);
+ /* caller callback */
+ if (hc->ops.res_stline)
+ hc->ops.res_stline(hc);
+
+ /* if there is no HTX data anymore and the EOM flag is
+ * set, leave (no body) */
+ if (htx_is_empty(htx) && htx->flags & HTX_FL_EOM)
+ appctx->st0 = HTTPCLIENT_S_RES_END;
+ else
+ appctx->st0 = HTTPCLIENT_S_RES_HDR;
+ break;
+
+ case HTTPCLIENT_S_RES_HDR:
+ /* first copy the headers in a local hdrs
+ * structure, once we the total numbers of the
+ * header we allocate the right size and copy
+ * them. The htx block of the headers are
+ * removed each time one is read */
+ {
+ struct http_hdr hdrs[global.tune.max_http_hdr];
+
+ if (!co_data(res))
+ goto more;
+ htx = htxbuf(&res->buf);
+ if (!htx)
+ goto more;
+
+ hdr_num = 0;
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t sz = htx_get_blksz(blk);
+
+ c_rew(res, sz);
+
+ if (type == HTX_BLK_HDR) {
+ hdrs[hdr_num].n = istdup(htx_get_blk_name(htx, blk));
+ hdrs[hdr_num].v = istdup(htx_get_blk_value(htx, blk));
+ hdr_num++;
+ }
+ else if (type == HTX_BLK_EOH) {
+ /* create a NULL end of array and leave the loop */
+ hdrs[hdr_num].n = IST_NULL;
+ hdrs[hdr_num].v = IST_NULL;
+ htx_remove_blk(htx, blk);
+ break;
+ }
+ blk = htx_remove_blk(htx, blk);
+ }
+
+ if (hdr_num) {
+ /* alloc and copy the headers in the httpclient struct */
+ hc->res.hdrs = calloc((hdr_num + 1), sizeof(*hc->res.hdrs));
+ if (!hc->res.hdrs)
+ goto end;
+ memcpy(hc->res.hdrs, hdrs, sizeof(struct http_hdr) * (hdr_num + 1));
+
+ /* caller callback */
+ if (hc->ops.res_headers)
+ hc->ops.res_headers(hc);
+ }
+
+ /* if there is no HTX data anymore and the EOM flag is
+ * set, leave (no body) */
+ if (htx_is_empty(htx) && htx->flags & HTX_FL_EOM) {
+ appctx->st0 = HTTPCLIENT_S_RES_END;
+ } else {
+ appctx->st0 = HTTPCLIENT_S_RES_BODY;
+ }
+ }
+ break;
+
+ case HTTPCLIENT_S_RES_BODY:
+ /*
+ * The IO handler removes the htx blocks in the response buffer and
+ * push them in the hc->res.buf buffer in a raw format.
+ */
+ if (!co_data(res))
+ goto more;
+
+ htx = htxbuf(&res->buf);
+ if (!htx || htx_is_empty(htx))
+ goto more;
+
+ if (!b_alloc(&hc->res.buf))
+ goto more;
+
+ if (b_full(&hc->res.buf))
+ goto process_data;
+
+ /* decapsule the htx data to raw data */
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ size_t count = co_data(res);
+ uint32_t blksz = htx_get_blksz(blk);
+ uint32_t room = b_room(&hc->res.buf);
+ uint32_t vlen;
+
+ /* we should try to copy the maximum output data in a block, which fit
+ * the destination buffer */
+ vlen = MIN(count, blksz);
+ vlen = MIN(vlen, room);
+
+ if (vlen == 0)
+ goto process_data;
+
+ if (type == HTX_BLK_DATA) {
+ struct ist v = htx_get_blk_value(htx, blk);
+
+ __b_putblk(&hc->res.buf, v.ptr, vlen);
+ c_rew(res, vlen);
+
+ if (vlen == blksz)
+ blk = htx_remove_blk(htx, blk);
+ else
+ htx_cut_data_blk(htx, blk, vlen);
+
+ /* the data must be processed by the caller in the receive phase */
+ if (hc->ops.res_payload)
+ hc->ops.res_payload(hc);
+
+ /* cannot copy everything, need to process */
+ if (vlen != blksz)
+ goto process_data;
+ } else {
+ if (vlen != blksz)
+ goto process_data;
+
+ /* remove any block which is not a data block */
+ c_rew(res, blksz);
+ blk = htx_remove_blk(htx, blk);
+ }
+ }
+
+ /* if not finished, should be called again */
+ if (!(htx_is_empty(htx) && (htx->flags & HTX_FL_EOM)))
+ goto more;
+
+
+ /* end of message, we should quit */
+ appctx->st0 = HTTPCLIENT_S_RES_END;
+ break;
+
+ case HTTPCLIENT_S_RES_END:
+ goto end;
+ break;
+ }
+ }
+
+process_data:
+
+ sc_will_read(sc);
+
+ return;
+more:
+ /* There was not enough data in the response channel */
+
+ sc_need_room(sc);
+
+ if (appctx->st0 == HTTPCLIENT_S_RES_END)
+ goto end;
+
+ /* The state machine tries to handle as much data as possible, if there
+ * isn't any data to handle and a shutdown is detected, let's stop
+ * everything */
+ if ((req->flags & (CF_SHUTR|CF_SHUTR_NOW)) ||
+ (res->flags & CF_SHUTW) ||
+ ((res->flags & CF_SHUTW_NOW) && channel_is_empty(res))) {
+ goto end;
+ }
+ return;
+
+end:
+ sc_shutw(sc);
+ sc_shutr(sc);
+ return;
+}
+
+static int httpclient_applet_init(struct appctx *appctx)
+{
+ struct httpclient *hc = appctx->svcctx;
+ struct stream *s;
+ struct sockaddr_storage *addr = NULL;
+ struct sockaddr_storage ss_url = {};
+ struct sockaddr_storage *ss_dst;
+ enum obj_type *target = NULL;
+ struct ist host = IST_NULL;
+ enum http_scheme scheme;
+ int port;
+ int doresolve = 0;
+
+
+ /* parse the URL and */
+ if (!httpclient_spliturl(hc->req.url, &scheme, &host, &port))
+ goto out_error;
+
+ if (hc->dst) {
+ /* if httpclient_set_dst() was used, sets the alternative address */
+ ss_dst = hc->dst;
+ } else {
+ /* set the dst using the host, or 0.0.0.0 to resolve */
+ ist2str(trash.area, host);
+ ss_dst = str2ip2(trash.area, &ss_url, 0);
+ if (!ss_dst) { /* couldn't get an IP from that, try to resolve */
+ doresolve = 1;
+ ss_dst = str2ip2("0.0.0.0", &ss_url, 0);
+ }
+ sock_inet_set_port(ss_dst, port);
+ }
+
+ if (!sockaddr_alloc(&addr, ss_dst, sizeof(*ss_dst)))
+ goto out_error;
+
+ /* choose the SSL server or not */
+ switch (scheme) {
+ case SCH_HTTP:
+ target = &httpclient_srv_raw->obj_type;
+ break;
+ case SCH_HTTPS:
+#ifdef USE_OPENSSL
+ if (httpclient_srv_ssl) {
+ target = &httpclient_srv_ssl->obj_type;
+ } else {
+ ha_alert("httpclient: SSL was disabled (wrong verify/ca-file)!\n");
+ goto out_free_addr;
+ }
+#else
+ ha_alert("httpclient: OpenSSL is not available %s:%d.\n", __FUNCTION__, __LINE__);
+ goto out_free_addr;
+#endif
+ break;
+ }
+
+ if (appctx_finalize_startup(appctx, httpclient_proxy, &hc->req.buf) == -1) {
+ ha_alert("httpclient: Failed to initialize appctx %s:%d.\n", __FUNCTION__, __LINE__);
+ goto out_free_addr;
+ }
+
+ s = appctx_strm(appctx);
+ s->target = target;
+ /* set the "timeout server" */
+ s->req.wto = hc->timeout_server;
+ s->res.rto = hc->timeout_server;
+
+ if (doresolve) {
+ /* in order to do the set-dst we need to put the address on the front */
+ s->scf->dst = addr;
+ } else {
+ /* in cases we don't use the resolve we already have the address
+ * and must put it on the backend side, some of the cases are
+ * not meant to be used on the frontend (sockpair, unix socket etc.) */
+ s->scb->dst = addr;
+ }
+
+ s->scb->flags |= SC_FL_NOLINGER;
+ s->flags |= SF_ASSIGNED;
+ s->res.flags |= CF_READ_DONTWAIT;
+
+ /* applet is waiting for data */
+ applet_need_more_data(appctx);
+ appctx_wakeup(appctx);
+
+ hc->appctx = appctx;
+ hc->flags |= HTTPCLIENT_FS_STARTED;
+
+ /* The request was transferred when the stream was created. So switch
+ * directly to REQ_BODY or RES_STLINE state
+ */
+ appctx->st0 = (hc->ops.req_payload ? HTTPCLIENT_S_REQ_BODY : HTTPCLIENT_S_RES_STLINE);
+ return 0;
+
+ out_free_addr:
+ sockaddr_free(&addr);
+ out_error:
+ return -1;
+}
+
+static void httpclient_applet_release(struct appctx *appctx)
+{
+ struct httpclient *hc = appctx->svcctx;
+
+ /* mark the httpclient as ended */
+ hc->flags |= HTTPCLIENT_FS_ENDED;
+ /* the applet is leaving, remove the ptr so we don't try to call it
+ * again from the caller */
+ hc->appctx = NULL;
+
+ if (hc->ops.res_end)
+ hc->ops.res_end(hc);
+
+ /* destroy the httpclient when set to autotokill */
+ if (hc->flags & HTTPCLIENT_FA_AUTOKILL) {
+ httpclient_destroy(hc);
+ }
+
+ /* be sure not to use this ptr anymore if the IO handler is called a
+ * last time */
+ appctx->svcctx = NULL;
+
+ return;
+}
+
+/* HTTP client applet */
+static struct applet httpclient_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<HTTPCLIENT>",
+ .fct = httpclient_applet_io_handler,
+ .init = httpclient_applet_init,
+ .release = httpclient_applet_release,
+};
+
+
+static int httpclient_resolve_init()
+{
+ struct act_rule *rule;
+ int i;
+ char *do_resolve = NULL;
+ char *http_rules[][11] = {
+ { "set-var(txn.hc_ip)", "dst", "" },
+ { do_resolve, "hdr(Host),host_only", "if", "{", "var(txn.hc_ip)", "-m", "ip", "0.0.0.0", "}", "" },
+ { "return", "status", "503", "if", "{", "var(txn.hc_ip)", "-m", "ip", "0.0.0.0", "}", "" },
+ { "capture", "var(txn.hc_ip)", "len", "40", "" },
+ { "set-dst", "var(txn.hc_ip)", "" },
+ { "" }
+ };
+
+ if (!resolvers_id)
+ resolvers_id = strdup("default");
+
+ memprintf(&do_resolve, "do-resolve(txn.hc_ip,%s%s%s)", resolvers_id, resolvers_prefer ? "," : "", resolvers_prefer ? resolvers_prefer : "");
+ http_rules[1][0] = do_resolve;
+
+ /* Try to create the default resolvers section */
+ resolvers_create_default();
+
+ /* if the resolver does not exist and no hard_error was set, simply ignore resolving */
+ if (!find_resolvers_by_id(resolvers_id) && !hard_error_resolvers) {
+ free(do_resolve);
+ return 0;
+ }
+
+
+ for (i = 0; *http_rules[i][0] != '\0'; i++) {
+ rule = parse_http_req_cond((const char **)http_rules[i], "httpclient", 0, httpclient_proxy);
+ if (!rule) {
+ free(do_resolve);
+ ha_alert("Couldn't setup the httpclient resolver.\n");
+ return 1;
+ }
+ LIST_APPEND(&httpclient_proxy->http_req_rules, &rule->list);
+ }
+
+ free(do_resolve);
+ return 0;
+}
+
+
+
+/*
+ * Initialize the proxy for the HTTP client with 2 servers, one for raw HTTP,
+ * the other for HTTPS.
+ */
+static int httpclient_precheck()
+{
+ int err_code = ERR_NONE;
+ char *errmsg = NULL;
+
+ if (global.mode & MODE_MWORKER_WAIT)
+ return ERR_NONE;
+
+ httpclient_proxy = alloc_new_proxy("<HTTPCLIENT>", PR_CAP_LISTEN|PR_CAP_INT, &errmsg);
+ if (!httpclient_proxy) {
+ memprintf(&errmsg, "couldn't allocate proxy.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ proxy_preset_defaults(httpclient_proxy);
+
+ httpclient_proxy->options |= PR_O_WREQ_BODY;
+ httpclient_proxy->retry_type |= PR_RE_CONN_FAILED | PR_RE_DISCONNECTED | PR_RE_TIMEOUT;
+ httpclient_proxy->options2 |= PR_O2_INDEPSTR;
+ httpclient_proxy->mode = PR_MODE_HTTP;
+ httpclient_proxy->maxconn = 0;
+ httpclient_proxy->accept = NULL;
+ httpclient_proxy->conn_retries = CONN_RETRIES;
+ httpclient_proxy->timeout.client = TICK_ETERNITY;
+ /* The HTTP Client use the "option httplog" with the global log server */
+ httpclient_proxy->conf.logformat_string = default_http_log_format;
+ httpclient_proxy->http_needed = 1;
+
+ /* clear HTTP server */
+ httpclient_srv_raw = new_server(httpclient_proxy);
+ if (!httpclient_srv_raw) {
+ memprintf(&errmsg, "out of memory.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ srv_settings_cpy(httpclient_srv_raw, &httpclient_proxy->defsrv, 0);
+ httpclient_srv_raw->iweight = 0;
+ httpclient_srv_raw->uweight = 0;
+ httpclient_srv_raw->xprt = xprt_get(XPRT_RAW);
+ httpclient_srv_raw->flags |= SRV_F_MAPPORTS; /* needed to apply the port change with resolving */
+ httpclient_srv_raw->id = strdup("<HTTPCLIENT>");
+ if (!httpclient_srv_raw->id) {
+ memprintf(&errmsg, "out of memory.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+#ifdef USE_OPENSSL
+ /* SSL HTTP server */
+ httpclient_srv_ssl = new_server(httpclient_proxy);
+ if (!httpclient_srv_ssl) {
+ memprintf(&errmsg, "out of memory.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ srv_settings_cpy(httpclient_srv_ssl, &httpclient_proxy->defsrv, 0);
+ httpclient_srv_ssl->iweight = 0;
+ httpclient_srv_ssl->uweight = 0;
+ httpclient_srv_ssl->xprt = xprt_get(XPRT_SSL);
+ httpclient_srv_ssl->use_ssl = 1;
+ httpclient_srv_ssl->flags |= SRV_F_MAPPORTS; /* needed to apply the port change with resolving */
+ httpclient_srv_ssl->id = strdup("<HTTPSCLIENT>");
+ if (!httpclient_srv_ssl->id) {
+ memprintf(&errmsg, "out of memory.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ httpclient_srv_ssl->ssl_ctx.verify = httpclient_ssl_verify;
+ /* if the verify is required, try to load the system CA */
+ if (httpclient_ssl_verify == SSL_SOCK_VERIFY_REQUIRED) {
+
+ httpclient_srv_ssl->ssl_ctx.ca_file = strdup(httpclient_ssl_ca_file ? httpclient_ssl_ca_file : "@system-ca");
+ if (!__ssl_store_load_locations_file(httpclient_srv_ssl->ssl_ctx.ca_file, 1, CAFILE_CERT, !hard_error_ssl)) {
+ /* if we failed to load the ca-file, only quits in
+ * error with hard_error, otherwise just disable the
+ * feature. */
+ if (hard_error_ssl) {
+ memprintf(&errmsg, "cannot initialize SSL verify with 'ca-file \"%s\"'.", httpclient_srv_ssl->ssl_ctx.ca_file);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ } else {
+ ha_free(&httpclient_srv_ssl->ssl_ctx.ca_file);
+ srv_drop(httpclient_srv_ssl);
+ httpclient_srv_ssl = NULL;
+ }
+ }
+ }
+
+#endif
+
+ /* add the proxy in the proxy list only if everything is successful */
+ httpclient_proxy->next = proxies_list;
+ proxies_list = httpclient_proxy;
+
+ if (httpclient_resolve_init() != 0) {
+ memprintf(&errmsg, "cannot initialize resolvers.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ /* link the 2 servers in the proxy */
+ httpclient_srv_raw->next = httpclient_proxy->srv;
+ httpclient_proxy->srv = httpclient_srv_raw;
+
+#ifdef USE_OPENSSL
+ if (httpclient_srv_ssl) {
+ httpclient_srv_ssl->next = httpclient_proxy->srv;
+ httpclient_proxy->srv = httpclient_srv_ssl;
+ }
+#endif
+
+
+err:
+ if (err_code & ERR_CODE) {
+ ha_alert("httpclient: cannot initialize: %s\n", errmsg);
+ free(errmsg);
+ srv_drop(httpclient_srv_raw);
+#ifdef USE_OPENSSL
+ srv_drop(httpclient_srv_ssl);
+#endif
+ free_proxy(httpclient_proxy);
+ }
+ return err_code;
+}
+
+static int httpclient_postcheck()
+{
+ int err_code = ERR_NONE;
+ struct logsrv *logsrv;
+ struct proxy *curproxy = httpclient_proxy;
+ char *errmsg = NULL;
+
+ if (global.mode & MODE_MWORKER_WAIT)
+ return ERR_NONE;
+
+ /* copy logs from "global" log list */
+ list_for_each_entry(logsrv, &global.logsrvs, list) {
+ struct logsrv *node = malloc(sizeof(*node));
+
+ if (!node) {
+ memprintf(&errmsg, "out of memory.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ memcpy(node, logsrv, sizeof(*node));
+ LIST_INIT(&node->list);
+ LIST_APPEND(&curproxy->logsrvs, &node->list);
+ node->ring_name = logsrv->ring_name ? strdup(logsrv->ring_name) : NULL;
+ node->conf.file = logsrv->conf.file ? strdup(logsrv->conf.file) : NULL;
+ }
+ if (curproxy->conf.logformat_string) {
+ curproxy->conf.args.ctx = ARGC_LOG;
+ if (!parse_logformat_string(curproxy->conf.logformat_string, curproxy, &curproxy->logformat,
+ LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES,
+ SMP_VAL_FE_LOG_END, &errmsg)) {
+ memprintf(&errmsg, "failed to parse log-format : %s.", errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ curproxy->conf.args.file = NULL;
+ curproxy->conf.args.line = 0;
+ }
+
+#ifdef USE_OPENSSL
+ if (httpclient_srv_ssl) {
+ /* init the SNI expression */
+ /* always use the host header as SNI, without the port */
+ httpclient_srv_ssl->sni_expr = strdup("req.hdr(host),field(1,:)");
+ err_code |= server_parse_sni_expr(httpclient_srv_ssl, httpclient_proxy, &errmsg);
+ if (err_code & ERR_CODE) {
+ memprintf(&errmsg, "failed to configure sni: %s.", errmsg);
+ goto err;
+ }
+ }
+#endif
+
+err:
+ if (err_code & ERR_CODE) {
+ ha_alert("httpclient: failed to initialize: %s\n", errmsg);
+ free(errmsg);
+
+ }
+ return err_code;
+}
+
+/* initialize the proxy and servers for the HTTP client */
+
+REGISTER_PRE_CHECK(httpclient_precheck);
+REGISTER_POST_CHECK(httpclient_postcheck);
+
+static int httpclient_parse_global_resolvers(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ /* any configuration should set the hard_error flag */
+ hard_error_resolvers = 1;
+
+ free(resolvers_id);
+ resolvers_id = strdup(args[1]);
+
+ return 0;
+}
+
+static int httpclient_parse_global_prefer(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ /* any configuration should set the hard_error flag */
+ hard_error_resolvers = 1;
+
+
+ if (strcmp(args[1],"ipv4") == 0)
+ resolvers_prefer = "ipv4";
+ else if (strcmp(args[1],"ipv6") == 0)
+ resolvers_prefer = "ipv6";
+ else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'ipv4' or 'ipv6' as argument.\n", file, line, args[0]);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+#ifdef USE_OPENSSL
+static int httpclient_parse_global_ca_file(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ /* any configuration should set the hard_error flag */
+ hard_error_ssl = 1;
+
+ free(httpclient_ssl_ca_file);
+ httpclient_ssl_ca_file = strdup(args[1]);
+
+ return 0;
+}
+
+static int httpclient_parse_global_verify(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ /* any configuration should set the hard_error flag */
+ hard_error_ssl = 1;
+
+ if (strcmp(args[1],"none") == 0)
+ httpclient_ssl_verify = SSL_SOCK_VERIFY_NONE;
+ else if (strcmp(args[1],"required") == 0)
+ httpclient_ssl_verify = SSL_SOCK_VERIFY_REQUIRED;
+ else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'none' or 'required' as argument.\n", file, line, args[0]);
+ return -1;
+ }
+
+ return 0;
+}
+#endif /* ! USE_OPENSSL */
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "httpclient.resolvers.id", httpclient_parse_global_resolvers },
+ { CFG_GLOBAL, "httpclient.resolvers.prefer", httpclient_parse_global_prefer },
+#ifdef USE_OPENSSL
+ { CFG_GLOBAL, "httpclient.ssl.verify", httpclient_parse_global_verify },
+ { CFG_GLOBAL, "httpclient.ssl.ca-file", httpclient_parse_global_ca_file },
+#endif
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
diff --git a/src/http_conv.c b/src/http_conv.c
new file mode 100644
index 0000000..f33336a
--- /dev/null
+++ b/src/http_conv.c
@@ -0,0 +1,453 @@
+/*
+ * HTTP sample conversion
+ *
+ * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <string.h>
+#include <time.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/chunk.h>
+#include <haproxy/http.h>
+#include <haproxy/pool.h>
+#include <haproxy/sample.h>
+#include <haproxy/stream.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+static int smp_check_http_date_unit(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ return smp_check_date_unit(args, err);
+}
+
+/* takes an UINT value on input supposed to represent the time since EPOCH,
+ * adds an optional offset found in args[0] and emits a string representing
+ * the date in RFC-1123/5322 format. If optional unit param in args[1] is
+ * provided, decode timestamp in milliseconds ("ms") or microseconds("us"),
+ * and use relevant output date format.
+ */
+static int sample_conv_http_date(const struct arg *args, struct sample *smp, void *private)
+{
+ const char day[7][4] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
+ const char mon[12][4] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
+ struct buffer *temp;
+ struct tm tm;
+ int sec_frac = 0;
+ time_t curr_date;
+
+ /* add offset */
+ if (args[0].type == ARGT_SINT)
+ smp->data.u.sint += args[0].data.sint;
+
+ /* report in milliseconds */
+ if (args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_MS) {
+ sec_frac = smp->data.u.sint % 1000;
+ smp->data.u.sint /= 1000;
+ }
+ /* report in microseconds */
+ else if (args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_US) {
+ sec_frac = smp->data.u.sint % 1000000;
+ smp->data.u.sint /= 1000000;
+ }
+
+ /* With high numbers, the date returned can be negative, the 55 bits mask prevent this. */
+ curr_date = smp->data.u.sint & 0x007fffffffffffffLL;
+
+ get_gmtime(curr_date, &tm);
+
+ temp = get_trash_chunk();
+ if (args[1].type == ARGT_SINT && args[1].data.sint != TIME_UNIT_S) {
+ temp->data = snprintf(temp->area, temp->size - temp->data,
+ "%s, %02d %s %04d %02d:%02d:%02d.%d GMT",
+ day[tm.tm_wday], tm.tm_mday, mon[tm.tm_mon],
+ 1900+tm.tm_year,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, sec_frac);
+ } else {
+ temp->data = snprintf(temp->area, temp->size - temp->data,
+ "%s, %02d %s %04d %02d:%02d:%02d GMT",
+ day[tm.tm_wday], tm.tm_mday, mon[tm.tm_mon],
+ 1900+tm.tm_year,
+ tm.tm_hour, tm.tm_min, tm.tm_sec);
+ }
+
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_STR;
+ return 1;
+}
+
+/* Arguments: The list of expected value, the number of parts returned and the separator */
+static int sample_conv_q_preferred(const struct arg *args, struct sample *smp, void *private)
+{
+ const char *al = smp->data.u.str.area;
+ const char *end = al + smp->data.u.str.data;
+ const char *token;
+ int toklen;
+ int qvalue;
+ const char *str;
+ const char *w;
+ int best_q = 0;
+
+ /* Set the constant to the sample, because the output of the
+ * function will be peek in the constant configuration string.
+ */
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.str.size = 0;
+ smp->data.u.str.area = "";
+ smp->data.u.str.data = 0;
+
+ /* Parse the accept language */
+ while (1) {
+
+ /* Jump spaces, quit if the end is detected. */
+ while (al < end && isspace((unsigned char)*al))
+ al++;
+ if (al >= end)
+ break;
+
+ /* Start of the first word. */
+ token = al;
+
+ /* Look for separator: isspace(), ',' or ';'. Next value if 0 length word. */
+ while (al < end && *al != ';' && *al != ',' && !isspace((unsigned char)*al))
+ al++;
+ if (al == token)
+ goto expect_comma;
+
+ /* Length of the token. */
+ toklen = al - token;
+ qvalue = 1000;
+
+ /* Check if the token exists in the list. If the token not exists,
+ * jump to the next token.
+ */
+ str = args[0].data.str.area;
+ w = str;
+ while (1) {
+ if (*str == ';' || *str == '\0') {
+ if (http_language_range_match(token, toklen, w, str - w))
+ goto look_for_q;
+ if (*str == '\0')
+ goto expect_comma;
+ w = str + 1;
+ }
+ str++;
+ }
+ goto expect_comma;
+
+look_for_q:
+
+ /* Jump spaces, quit if the end is detected. */
+ while (al < end && isspace((unsigned char)*al))
+ al++;
+ if (al >= end)
+ goto process_value;
+
+ /* If ',' is found, process the result */
+ if (*al == ',')
+ goto process_value;
+
+ /* If the character is different from ';', look
+ * for the end of the header part in best effort.
+ */
+ if (*al != ';')
+ goto expect_comma;
+
+ /* Assumes that the char is ';', now expect "q=". */
+ al++;
+
+ /* Jump spaces, process value if the end is detected. */
+ while (al < end && isspace((unsigned char)*al))
+ al++;
+ if (al >= end)
+ goto process_value;
+
+ /* Expect 'q'. If no 'q', continue in best effort */
+ if (*al != 'q')
+ goto process_value;
+ al++;
+
+ /* Jump spaces, process value if the end is detected. */
+ while (al < end && isspace((unsigned char)*al))
+ al++;
+ if (al >= end)
+ goto process_value;
+
+ /* Expect '='. If no '=', continue in best effort */
+ if (*al != '=')
+ goto process_value;
+ al++;
+
+ /* Jump spaces, process value if the end is detected. */
+ while (al < end && isspace((unsigned char)*al))
+ al++;
+ if (al >= end)
+ goto process_value;
+
+ /* Parse the q value. */
+ qvalue = http_parse_qvalue(al, &al);
+
+process_value:
+
+ /* If the new q value is the best q value, then store the associated
+ * language in the response. If qvalue is the biggest value (1000),
+ * break the process.
+ */
+ if (qvalue > best_q) {
+ smp->data.u.str.area = (char *)w;
+ smp->data.u.str.data = str - w;
+ if (qvalue >= 1000)
+ break;
+ best_q = qvalue;
+ }
+
+expect_comma:
+
+ /* Expect comma or end. If the end is detected, quit the loop. */
+ while (al < end && *al != ',')
+ al++;
+ if (al >= end)
+ break;
+
+ /* Comma is found, jump it and restart the analyzer. */
+ al++;
+ }
+
+ /* Set default value if required. */
+ if (smp->data.u.str.data == 0 && args[1].type == ARGT_STR) {
+ smp->data.u.str.area = args[1].data.str.area;
+ smp->data.u.str.data = args[1].data.str.data;
+ }
+
+ /* Return true only if a matching language was found. */
+ return smp->data.u.str.data != 0;
+}
+
+/* This fetch url-decode any input string. */
+static int sample_conv_url_dec(const struct arg *args, struct sample *smp, void *private)
+{
+ int in_form = 0;
+ int len;
+
+ /* If the constant flag is set or if not size is available at
+ * the end of the buffer, copy the string in other buffer
+ * before decoding.
+ */
+ if (smp->flags & SMP_F_CONST || smp->data.u.str.size <= smp->data.u.str.data) {
+ struct buffer *str = get_trash_chunk();
+ memcpy(str->area, smp->data.u.str.area, smp->data.u.str.data);
+ smp->data.u.str.area = str->area;
+ smp->data.u.str.size = str->size;
+ smp->flags &= ~SMP_F_CONST;
+ }
+
+ /* Add final \0 required by url_decode(), and convert the input string. */
+ smp->data.u.str.area[smp->data.u.str.data] = '\0';
+
+ if (args[0].type == ARGT_SINT)
+ in_form = !!args[0].data.sint;
+
+ len = url_decode(smp->data.u.str.area, in_form);
+ if (len < 0)
+ return 0;
+ smp->data.u.str.data = len;
+ return 1;
+}
+
+/* url-encode types and encode maps */
+enum encode_type {
+ ENC_QUERY = 0,
+};
+long query_encode_map[(256 / 8) / sizeof(long)];
+
+/* Check url-encode type */
+static int sample_conv_url_enc_check(struct arg *arg, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ enum encode_type enc_type;
+
+ if (strcmp(arg->data.str.area, "") == 0)
+ enc_type = ENC_QUERY;
+ else if (strcmp(arg->data.str.area, "query") == 0)
+ enc_type = ENC_QUERY;
+ else {
+ memprintf(err, "Unexpected encode type. "
+ "Allowed value is 'query'");
+ return 0;
+ }
+
+ chunk_destroy(&arg->data.str);
+ arg->type = ARGT_SINT;
+ arg->data.sint = enc_type;
+ return 1;
+}
+
+/* Initializes some url encode data at boot */
+static void sample_conf_url_enc_init()
+{
+ int i;
+
+ memset(query_encode_map, 0, sizeof(query_encode_map));
+ /* use rfc3986 to determine list of characters to keep unchanged for
+ * query string */
+ for (i = 0; i < 256; i++) {
+ if (!((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z')
+ || (i >= '0' && i <= '9') ||
+ i == '-' || i == '.' || i == '_' || i == '~'))
+ ha_bit_set(i, query_encode_map);
+ }
+}
+
+INITCALL0(STG_PREPARE, sample_conf_url_enc_init);
+
+/* This fetch url-encode any input string. Only support query string for now */
+static int sample_conv_url_enc(const struct arg *args, struct sample *smp, void
+ *private)
+{
+ enum encode_type enc_type;
+ struct buffer *trash = get_trash_chunk();
+ long *encode_map;
+ char *ret;
+
+ enc_type = ENC_QUERY;
+ enc_type = args->data.sint;
+
+ if (enc_type == ENC_QUERY)
+ encode_map = query_encode_map;
+ else
+ return 0;
+
+ ret = encode_chunk(trash->area, trash->area + trash->size, '%',
+ encode_map, &smp->data.u.str);
+ if (ret == NULL || *ret != '\0')
+ return 0;
+ trash->data = ret - trash->area;
+ smp->data.u.str = *trash;
+ return 1;
+}
+
+static int smp_conv_req_capture(const struct arg *args, struct sample *smp, void *private)
+{
+ struct proxy *fe;
+ int idx, i;
+ struct cap_hdr *hdr;
+ int len;
+
+ if (args->type != ARGT_SINT)
+ return 0;
+
+ if (!smp->strm)
+ return 0;
+
+ fe = strm_fe(smp->strm);
+ idx = args->data.sint;
+
+ /* Check the availibity of the capture id. */
+ if (idx > fe->nb_req_cap - 1)
+ return 0;
+
+ /* Look for the original configuration. */
+ for (hdr = fe->req_cap, i = fe->nb_req_cap - 1;
+ hdr != NULL && i != idx ;
+ i--, hdr = hdr->next);
+ if (!hdr)
+ return 0;
+
+ /* check for the memory allocation */
+ if (smp->strm->req_cap[hdr->index] == NULL)
+ smp->strm->req_cap[hdr->index] = pool_alloc(hdr->pool);
+ if (smp->strm->req_cap[hdr->index] == NULL)
+ return 0;
+
+ /* Check length. */
+ len = smp->data.u.str.data;
+ if (len > hdr->len)
+ len = hdr->len;
+
+ /* Capture input data. */
+ memcpy(smp->strm->req_cap[idx], smp->data.u.str.area, len);
+ smp->strm->req_cap[idx][len] = '\0';
+
+ return 1;
+}
+
+static int smp_conv_res_capture(const struct arg *args, struct sample *smp, void *private)
+{
+ struct proxy *fe;
+ int idx, i;
+ struct cap_hdr *hdr;
+ int len;
+
+ if (args->type != ARGT_SINT)
+ return 0;
+
+ if (!smp->strm)
+ return 0;
+
+ fe = strm_fe(smp->strm);
+ idx = args->data.sint;
+
+ /* Check the availibity of the capture id. */
+ if (idx > fe->nb_rsp_cap - 1)
+ return 0;
+
+ /* Look for the original configuration. */
+ for (hdr = fe->rsp_cap, i = fe->nb_rsp_cap - 1;
+ hdr != NULL && i != idx ;
+ i--, hdr = hdr->next);
+ if (!hdr)
+ return 0;
+
+ /* check for the memory allocation */
+ if (smp->strm->res_cap[hdr->index] == NULL)
+ smp->strm->res_cap[hdr->index] = pool_alloc(hdr->pool);
+ if (smp->strm->res_cap[hdr->index] == NULL)
+ return 0;
+
+ /* Check length. */
+ len = smp->data.u.str.data;
+ if (len > hdr->len)
+ len = hdr->len;
+
+ /* Capture input data. */
+ memcpy(smp->strm->res_cap[idx], smp->data.u.str.area, len);
+ smp->strm->res_cap[idx][len] = '\0';
+
+ return 1;
+}
+
+/************************************************************************/
+/* All supported converter keywords must be declared here. */
+/************************************************************************/
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_conv_kw_list sample_conv_kws = {ILH, {
+ { "http_date", sample_conv_http_date, ARG2(0,SINT,STR), smp_check_http_date_unit, SMP_T_SINT, SMP_T_STR},
+ { "language", sample_conv_q_preferred, ARG2(1,STR,STR), NULL, SMP_T_STR, SMP_T_STR},
+ { "capture-req", smp_conv_req_capture, ARG1(1,SINT), NULL, SMP_T_STR, SMP_T_STR},
+ { "capture-res", smp_conv_res_capture, ARG1(1,SINT), NULL, SMP_T_STR, SMP_T_STR},
+ { "url_dec", sample_conv_url_dec, ARG1(0,SINT), NULL, SMP_T_STR, SMP_T_STR},
+ { "url_enc", sample_conv_url_enc, ARG1(1,STR), sample_conv_url_enc_check, SMP_T_STR, SMP_T_STR},
+ { NULL, NULL, 0, 0, 0 },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/http_fetch.c b/src/http_fetch.c
new file mode 100644
index 0000000..1028599
--- /dev/null
+++ b/src/http_fetch.c
@@ -0,0 +1,2248 @@
+/*
+ * HTTP samples fetching
+ *
+ * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <string.h>
+#include <time.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/auth.h>
+#include <haproxy/base64.h>
+#include <haproxy/channel.h>
+#include <haproxy/chunk.h>
+#include <haproxy/connection.h>
+#include <haproxy/global.h>
+#include <haproxy/h1.h>
+#include <haproxy/h1_htx.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_fetch.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/obj_type.h>
+#include <haproxy/pool.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stream.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+
+/* this struct is used between calls to smp_fetch_hdr() or smp_fetch_cookie() */
+static THREAD_LOCAL struct http_hdr_ctx static_http_hdr_ctx;
+/* this is used to convert raw connection buffers to htx */
+static THREAD_LOCAL struct buffer static_raw_htx_chunk;
+static THREAD_LOCAL char *static_raw_htx_buf;
+
+#define SMP_REQ_CHN(smp) (smp->strm ? &smp->strm->req : NULL)
+#define SMP_RES_CHN(smp) (smp->strm ? &smp->strm->res : NULL)
+
+/* This function returns the static htx chunk, where raw connections get
+ * converted to HTX as needed for samplxsing.
+ */
+struct buffer *get_raw_htx_chunk(void)
+{
+ chunk_reset(&static_raw_htx_chunk);
+ return &static_raw_htx_chunk;
+}
+
+static int alloc_raw_htx_chunk_per_thread()
+{
+ static_raw_htx_buf = malloc(global.tune.bufsize);
+ if (!static_raw_htx_buf)
+ return 0;
+ chunk_init(&static_raw_htx_chunk, static_raw_htx_buf, global.tune.bufsize);
+ return 1;
+}
+
+static void free_raw_htx_chunk_per_thread()
+{
+ ha_free(&static_raw_htx_buf);
+}
+
+REGISTER_PER_THREAD_ALLOC(alloc_raw_htx_chunk_per_thread);
+REGISTER_PER_THREAD_FREE(free_raw_htx_chunk_per_thread);
+
+/*
+ * Returns the data from Authorization header. Function may be called more
+ * than once so data is stored in txn->auth_data. When no header is found
+ * or auth method is unknown auth_method is set to HTTP_AUTH_WRONG to avoid
+ * searching again for something we are unable to find anyway. However, if
+ * the result if valid, the cache is not reused because we would risk to
+ * have the credentials overwritten by another stream in parallel.
+ * The caller is responsible for passing a sample with a valid stream/txn,
+ * and a valid htx.
+ */
+
+static int get_http_auth(struct sample *smp, struct htx *htx)
+{
+ struct stream *s = smp->strm;
+ struct http_txn *txn = s->txn;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ struct ist hdr;
+ struct buffer auth_method;
+ char *p;
+ int len;
+
+#ifdef DEBUG_AUTH
+ printf("Auth for stream %p: %d\n", s, txn->auth.method);
+#endif
+ if (txn->auth.method == HTTP_AUTH_WRONG)
+ return 0;
+
+ txn->auth.method = HTTP_AUTH_WRONG;
+
+ if (txn->flags & TX_USE_PX_CONN)
+ hdr = ist("Proxy-Authorization");
+ else
+ hdr = ist("Authorization");
+
+ ctx.blk = NULL;
+ if (!http_find_header(htx, hdr, &ctx, 0))
+ return 0;
+
+ p = memchr(ctx.value.ptr, ' ', ctx.value.len);
+ if (!p || p == ctx.value.ptr) /* if no space was found or if the space is the first character */
+ return 0;
+ len = p - ctx.value.ptr;
+
+ if (chunk_initlen(&auth_method, ctx.value.ptr, 0, len) != 1)
+ return 0;
+
+ /* According to RFC7235, there could be multiple spaces between the
+ * scheme and its value, we must skip all of them.
+ */
+ while (p < istend(ctx.value) && *p == ' ')
+ ++p;
+
+ chunk_initlen(&txn->auth.method_data, p, 0, istend(ctx.value) - p);
+
+ if (!strncasecmp("Basic", auth_method.area, auth_method.data)) {
+ struct buffer *http_auth = get_trash_chunk();
+
+ len = base64dec(txn->auth.method_data.area,
+ txn->auth.method_data.data,
+ http_auth->area, global.tune.bufsize - 1);
+
+ if (len < 0)
+ return 0;
+
+
+ http_auth->area[len] = '\0';
+
+ p = strchr(http_auth->area, ':');
+
+ if (!p)
+ return 0;
+
+ txn->auth.user = http_auth->area;
+ *p = '\0';
+ txn->auth.pass = p+1;
+
+ txn->auth.method = HTTP_AUTH_BASIC;
+ return 1;
+ } else if (!strncasecmp("Bearer", auth_method.area, auth_method.data)) {
+ txn->auth.method = HTTP_AUTH_BEARER;
+ return 1;
+ }
+
+ return 0;
+}
+
+/* This function ensures that the prerequisites for an L7 fetch are ready,
+ * which means that a request or response is ready. If some data is missing,
+ * a parsing attempt is made. This is useful in TCP-based ACLs which are able
+ * to extract data from L7. If <vol> is non-null during a prefetch, another
+ * test is made to ensure the required information is not gone.
+ *
+ * The function returns :
+ * NULL with SMP_F_MAY_CHANGE in the sample flags if some data is missing to
+ * decide whether or not an HTTP message is present ;
+ * NULL if the requested data cannot be fetched or if it is certain that
+ * we'll never have any HTTP message there; this includes null strm or chn.
+ * NULL if the sample's direction does not match the channel's (i.e. the
+ * function was asked to work on the wrong channel)
+ * The HTX message if ready
+ */
+struct htx *smp_prefetch_htx(struct sample *smp, struct channel *chn, struct check *check, int vol)
+{
+ struct stream *s = smp->strm;
+ struct http_txn *txn = NULL;
+ struct htx *htx = NULL;
+ struct http_msg *msg;
+ struct htx_sl *sl;
+
+ if (chn &&
+ (((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_REQ && (chn->flags & CF_ISRESP)) ||
+ ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES && !(chn->flags & CF_ISRESP))))
+ return 0;
+
+ /* Note: it is possible that <s> is NULL when called before stream
+ * initialization (eg: tcp-request connection), so this function is the
+ * one responsible for guarding against this case for all HTTP users.
+ *
+ * In the health check context, the stream and the channel must be NULL
+ * and <check> must be set. In this case, only the input buffer,
+ * corresponding to the response, is considered. It is the caller
+ * responsibility to provide <check>.
+ */
+ BUG_ON(check && (s || chn));
+ if (!s || !chn) {
+ if (check) {
+ htx = htxbuf(&check->bi);
+
+ /* Analyse not yet started */
+ if (htx_is_empty(htx) || htx->first == -1)
+ return NULL;
+
+ sl = http_get_stline(htx);
+ if (vol && !sl) {
+ /* The start-line was already forwarded, it is too late to fetch anything */
+ return NULL;
+ }
+ goto end;
+ }
+
+ return NULL;
+ }
+
+ if (!s->txn && !http_create_txn(s))
+ return NULL;
+ txn = s->txn;
+ msg = (!(chn->flags & CF_ISRESP) ? &txn->req : &txn->rsp);
+
+ if (IS_HTX_STRM(s)) {
+ htx = htxbuf(&chn->buf);
+
+ if (htx->flags & HTX_FL_PARSING_ERROR)
+ return NULL;
+
+ if (msg->msg_state < HTTP_MSG_BODY) {
+ /* Analyse not yet started */
+ if (htx_is_empty(htx) || htx->first == -1) {
+ /* Parsing is done by the mux, just wait */
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return NULL;
+ }
+ }
+ sl = http_get_stline(htx);
+ if (vol && !sl) {
+ /* The start-line was already forwarded, it is too late to fetch anything */
+ return NULL;
+ }
+ }
+ else { /* RAW mode */
+ struct buffer *buf;
+ struct h1m h1m;
+ struct http_hdr hdrs[global.tune.max_http_hdr];
+ union h1_sl h1sl;
+ unsigned int flags = HTX_FL_NONE;
+ int ret;
+
+ /* no HTTP fetch on the response in TCP mode */
+ if (chn->flags & CF_ISRESP)
+ return NULL;
+
+ /* Now we are working on the request only */
+ buf = &chn->buf;
+ if (b_head(buf) + b_data(buf) > b_wrap(buf))
+ b_slow_realign(buf, trash.area, 0);
+
+ h1m_init_req(&h1m);
+ ret = h1_headers_to_hdr_list(b_head(buf), b_stop(buf),
+ hdrs, sizeof(hdrs)/sizeof(hdrs[0]), &h1m, &h1sl);
+ if (ret <= 0) {
+ /* Invalid or too big*/
+ if (ret < 0 || channel_full(&s->req, global.tune.maxrewrite))
+ return NULL;
+
+ /* wait for a full request */
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return NULL;
+ }
+
+ /* OK we just got a valid HTTP message. We have to convert it
+ * into an HTX message.
+ */
+ if (unlikely(h1sl.rq.v.len == 0)) {
+ /* try to convert HTTP/0.9 requests to HTTP/1.0 */
+ if (h1sl.rq.meth != HTTP_METH_GET || !h1sl.rq.u.len)
+ return NULL;
+ h1sl.rq.v = ist("HTTP/1.0");
+ }
+
+ /* Set HTX start-line flags */
+ if (h1m.flags & H1_MF_VER_11)
+ flags |= HTX_SL_F_VER_11;
+ if (h1m.flags & H1_MF_XFER_ENC)
+ flags |= HTX_SL_F_XFER_ENC;
+ flags |= HTX_SL_F_XFER_LEN;
+ if (h1m.flags & H1_MF_CHNK)
+ flags |= HTX_SL_F_CHNK;
+ else if (h1m.flags & H1_MF_CLEN)
+ flags |= HTX_SL_F_CLEN;
+
+ htx = htx_from_buf(get_raw_htx_chunk());
+ sl = htx_add_stline(htx, HTX_BLK_REQ_SL, flags, h1sl.rq.m, h1sl.rq.u, h1sl.rq.v);
+ if (!sl || !htx_add_all_headers(htx, hdrs))
+ return NULL;
+ sl->info.req.meth = h1sl.rq.meth;
+ }
+
+ /* OK we just got a valid HTTP message. If not already done by
+ * HTTP analyzers, we have some minor preparation to perform so
+ * that further checks can rely on HTTP tests.
+ */
+ if (sl && msg->msg_state < HTTP_MSG_BODY) {
+ if (!(chn->flags & CF_ISRESP)) {
+ txn->meth = sl->info.req.meth;
+ if (txn->meth == HTTP_METH_GET || txn->meth == HTTP_METH_HEAD)
+ s->flags |= SF_REDIRECTABLE;
+ }
+ else if (txn->status == -1)
+ txn->status = sl->info.res.status;
+ if (sl->flags & HTX_SL_F_VER_11)
+ msg->flags |= HTTP_MSGF_VER_11;
+ }
+
+ /* everything's OK */
+ end:
+ return htx;
+}
+
+/* This function fetches the method of current HTTP request and stores
+ * it in the global pattern struct as a chunk. There are two possibilities :
+ * - if the method is known (not HTTP_METH_OTHER), its identifier is stored
+ * in <len> and <ptr> is NULL ;
+ * - if the method is unknown (HTTP_METH_OTHER), <ptr> points to the text and
+ * <len> to its length.
+ * This is intended to be used with pat_match_meth() only.
+ */
+static int smp_fetch_meth(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct http_txn *txn;
+ struct htx *htx = NULL;
+ int meth;
+
+ txn = (smp->strm ? smp->strm->txn : NULL);
+ if (!txn)
+ return 0;
+
+ meth = txn->meth;
+ if (meth == HTTP_METH_OTHER) {
+ htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ if (!htx)
+ return 0;
+ meth = txn->meth;
+ }
+
+ smp->data.type = SMP_T_METH;
+ smp->data.u.meth.meth = meth;
+ if (meth == HTTP_METH_OTHER) {
+ struct htx_sl *sl;
+
+ sl = http_get_stline(htx);
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.meth.str.area = HTX_SL_REQ_MPTR(sl);
+ smp->data.u.meth.str.data = HTX_SL_REQ_MLEN(sl);
+ }
+ smp->flags |= SMP_F_VOL_1ST;
+ return 1;
+}
+
+static int smp_fetch_rqver(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+ char *ptr;
+ int len;
+
+ if (!htx)
+ return 0;
+
+ sl = http_get_stline(htx);
+ len = HTX_SL_REQ_VLEN(sl);
+ ptr = HTX_SL_REQ_VPTR(sl);
+
+ while ((len-- > 0) && (*ptr++ != '/'));
+ if (len <= 0)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = ptr;
+ smp->data.u.str.data = len;
+
+ smp->flags = SMP_F_VOL_1ST | SMP_F_CONST;
+ return 1;
+}
+
+static int smp_fetch_stver(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_RES_CHN(smp);
+ struct check *check = objt_check(smp->sess->origin);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct htx_sl *sl;
+ char *ptr;
+ int len;
+
+ if (!htx)
+ return 0;
+
+ sl = http_get_stline(htx);
+ len = HTX_SL_RES_VLEN(sl);
+ ptr = HTX_SL_RES_VPTR(sl);
+
+ while ((len-- > 0) && (*ptr++ != '/'));
+ if (len <= 0)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = ptr;
+ smp->data.u.str.data = len;
+
+ smp->flags = SMP_F_VOL_1ST | SMP_F_CONST;
+ return 1;
+}
+
+/* 3. Check on Status Code. We manipulate integers here. */
+static int smp_fetch_stcode(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_RES_CHN(smp);
+ struct check *check = objt_check(smp->sess->origin);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct htx_sl *sl;
+ char *ptr;
+ int len;
+
+ if (!htx)
+ return 0;
+
+ sl = http_get_stline(htx);
+ len = HTX_SL_RES_CLEN(sl);
+ ptr = HTX_SL_RES_CPTR(sl);
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = __strl2ui(ptr, len);
+ smp->flags = SMP_F_VOL_1ST;
+ return 1;
+}
+
+static int smp_fetch_uniqueid(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct ist unique_id;
+
+ if (LIST_ISEMPTY(&smp->sess->fe->format_unique_id))
+ return 0;
+
+ if (!smp->strm)
+ return 0;
+
+ unique_id = stream_generate_unique_id(smp->strm, &smp->sess->fe->format_unique_id);
+ if (!isttest(unique_id))
+ return 0;
+
+ smp->data.u.str.area = smp->strm->unique_id.ptr;
+ smp->data.u.str.data = smp->strm->unique_id.len;
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ return 1;
+}
+
+/* Returns a string block containing all headers including the
+ * empty line which separes headers from the body. This is useful
+ * for some headers analysis.
+ */
+static int smp_fetch_hdrs(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.hdrs, res.hdrs */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct buffer *temp;
+ int32_t pos;
+
+ if (!htx)
+ return 0;
+ temp = get_trash_chunk();
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_HDR) {
+ struct ist n = htx_get_blk_name(htx, blk);
+ struct ist v = htx_get_blk_value(htx, blk);
+
+ if (!h1_format_htx_hdr(n, v, temp))
+ return 0;
+ }
+ else if (type == HTX_BLK_EOH) {
+ if (!chunk_memcat(temp, "\r\n", 2))
+ return 0;
+ break;
+ }
+ }
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *temp;
+ return 1;
+}
+
+/* Returns the header request in a length/value encoded format.
+ * This is useful for exchanges with the SPOE.
+ *
+ * A "length value" is a multibyte code encoding numbers. It uses the
+ * SPOE format. The encoding is the following:
+ *
+ * Each couple "header name" / "header value" is composed
+ * like this:
+ * "length value" "header name bytes"
+ * "length value" "header value bytes"
+ * When the last header is reached, the header name and the header
+ * value are empty. Their length are 0
+ */
+static int smp_fetch_hdrs_bin(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.hdrs_bin, res.hdrs_bin */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct buffer *temp;
+ char *p, *end;
+ int32_t pos;
+ int ret;
+
+ if (!htx)
+ return 0;
+ temp = get_trash_chunk();
+ p = temp->area;
+ end = temp->area + temp->size;
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ struct ist n, v;
+
+ if (type == HTX_BLK_HDR) {
+ n = htx_get_blk_name(htx,blk);
+ v = htx_get_blk_value(htx, blk);
+
+ /* encode the header name. */
+ ret = encode_varint(n.len, &p, end);
+ if (ret == -1)
+ return 0;
+ if (p + n.len > end)
+ return 0;
+ memcpy(p, n.ptr, n.len);
+ p += n.len;
+
+ /* encode the header value. */
+ ret = encode_varint(v.len, &p, end);
+ if (ret == -1)
+ return 0;
+ if (p + v.len > end)
+ return 0;
+ memcpy(p, v.ptr, v.len);
+ p += v.len;
+
+ }
+ else if (type == HTX_BLK_EOH) {
+ /* encode the end of the header list with empty
+ * header name and header value.
+ */
+ ret = encode_varint(0, &p, end);
+ if (ret == -1)
+ return 0;
+ ret = encode_varint(0, &p, end);
+ if (ret == -1)
+ return 0;
+ break;
+ }
+ }
+
+ /* Initialise sample data which will be filled. */
+ smp->data.type = SMP_T_BIN;
+ smp->data.u.str.area = temp->area;
+ smp->data.u.str.data = p - temp->area;
+ smp->data.u.str.size = temp->size;
+ return 1;
+}
+
+/* returns the longest available part of the body. This requires that the body
+ * has been waited for using http-buffer-request.
+ */
+static int smp_fetch_body(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.body, res.body */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct buffer *temp;
+ int32_t pos;
+ int finished = 0;
+
+ if (!htx)
+ return 0;
+
+ temp = get_trash_chunk();
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT) {
+ finished = 1;
+ break;
+ }
+ if (type == HTX_BLK_DATA) {
+ if (!h1_format_htx_data(htx_get_blk_value(htx, blk), temp, 0))
+ return 0;
+ }
+ }
+
+ smp->data.type = SMP_T_BIN;
+ smp->data.u.str = *temp;
+ smp->flags = SMP_F_VOL_TEST;
+
+ if (!finished && (check || (chn && !channel_full(chn, global.tune.maxrewrite) &&
+ !(chn->flags & (CF_EOI|CF_SHUTR|CF_READ_ERROR)))))
+ smp->flags |= SMP_F_MAY_CHANGE;
+
+ return 1;
+}
+
+
+/* returns the available length of the body. This requires that the body
+ * has been waited for using http-buffer-request.
+ */
+static int smp_fetch_body_len(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.body_len, res.body_len */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ int32_t pos;
+ unsigned long long len = 0;
+
+ if (!htx)
+ return 0;
+
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA)
+ len += htx_get_blksz(blk);
+ }
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = len;
+ smp->flags = SMP_F_VOL_TEST;
+ return 1;
+}
+
+
+/* returns the advertised length of the body, or the advertised size of the
+ * chunks available in the buffer. This requires that the body has been waited
+ * for using http-buffer-request.
+ */
+static int smp_fetch_body_size(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.body_size, res.body_size */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ int32_t pos;
+ unsigned long long len = 0;
+
+ if (!htx)
+ return 0;
+
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA)
+ len += htx_get_blksz(blk);
+ }
+ if (htx->extra != ULLONG_MAX)
+ len += htx->extra;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = len;
+ smp->flags = SMP_F_VOL_TEST;
+ return 1;
+}
+
+
+/* 4. Check on URL/URI. A pointer to the URI is stored. */
+static int smp_fetch_url(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+
+ if (!htx)
+ return 0;
+ sl = http_get_stline(htx);
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = HTX_SL_REQ_UPTR(sl);
+ smp->data.u.str.data = HTX_SL_REQ_ULEN(sl);
+ smp->flags = SMP_F_VOL_1ST | SMP_F_CONST;
+ return 1;
+}
+
+static int smp_fetch_url_ip(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+ struct sockaddr_storage addr;
+
+ memset(&addr, 0, sizeof(addr));
+
+ if (!htx)
+ return 0;
+ sl = http_get_stline(htx);
+ if (url2sa(HTX_SL_REQ_UPTR(sl), HTX_SL_REQ_ULEN(sl), &addr, NULL) < 0)
+ return 0;
+
+ if (addr.ss_family != AF_INET)
+ return 0;
+
+ smp->data.type = SMP_T_IPV4;
+ smp->data.u.ipv4 = ((struct sockaddr_in *)&addr)->sin_addr;
+ smp->flags = 0;
+ return 1;
+}
+
+static int smp_fetch_url_port(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+ struct sockaddr_storage addr;
+
+ memset(&addr, 0, sizeof(addr));
+
+ if (!htx)
+ return 0;
+ sl = http_get_stline(htx);
+ if (url2sa(HTX_SL_REQ_UPTR(sl), HTX_SL_REQ_ULEN(sl), &addr, NULL) < 0)
+ return 0;
+
+ if (addr.ss_family != AF_INET)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = get_host_port(&addr);
+ smp->flags = 0;
+ return 1;
+}
+
+/* Fetch an HTTP header. A pointer to the beginning of the value is returned.
+ * Accepts an optional argument of type string containing the header field name,
+ * and an optional argument of type signed or unsigned integer to request an
+ * explicit occurrence of the header. Note that in the event of a missing name,
+ * headers are considered from the first one. It does not stop on commas and
+ * returns full lines instead (useful for User-Agent or Date for example).
+ */
+static int smp_fetch_fhdr(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.fhdr, res.fhdr */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct http_hdr_ctx *ctx = smp->ctx.a[0];
+ struct ist name;
+ int occ = 0;
+
+ if (!ctx) {
+ /* first call */
+ ctx = &static_http_hdr_ctx;
+ ctx->blk = NULL;
+ smp->ctx.a[0] = ctx;
+ }
+
+ if (args[0].type != ARGT_STR)
+ return 0;
+ name = ist2(args[0].data.str.area, args[0].data.str.data);
+
+ if (args[1].type == ARGT_SINT)
+ occ = args[1].data.sint;
+
+ if (!htx)
+ return 0;
+
+ if (ctx && !(smp->flags & SMP_F_NOT_LAST))
+ /* search for header from the beginning */
+ ctx->blk = NULL;
+
+ if (!occ && !(smp->opt & SMP_OPT_ITERATE))
+ /* no explicit occurrence and single fetch => last header by default */
+ occ = -1;
+
+ if (!occ)
+ /* prepare to report multiple occurrences for ACL fetches */
+ smp->flags |= SMP_F_NOT_LAST;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_VOL_HDR | SMP_F_CONST;
+ if (http_get_htx_fhdr(htx, name, occ, ctx, &smp->data.u.str.area, &smp->data.u.str.data))
+ return 1;
+ smp->flags &= ~SMP_F_NOT_LAST;
+ return 0;
+}
+
+/* 6. Check on HTTP header count. The number of occurrences is returned.
+ * Accepts exactly 1 argument of type string. It does not stop on commas and
+ * returns full lines instead (useful for User-Agent or Date for example).
+ */
+static int smp_fetch_fhdr_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.fhdr_cnt, res.fhdr_cnt */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct http_hdr_ctx ctx;
+ struct ist name;
+ int cnt;
+
+ if (!htx)
+ return 0;
+
+ if (args->type == ARGT_STR) {
+ name = ist2(args->data.str.area, args->data.str.data);
+ } else {
+ name = IST_NULL;
+ }
+
+ ctx.blk = NULL;
+ cnt = 0;
+ while (http_find_header(htx, name, &ctx, 1))
+ cnt++;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = cnt;
+ smp->flags = SMP_F_VOL_HDR;
+ return 1;
+}
+
+static int smp_fetch_hdr_names(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.hdr_names, res.hdr_names */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct buffer *temp;
+ char del = ',';
+
+ int32_t pos;
+
+ if (!htx)
+ return 0;
+
+ if (args->type == ARGT_STR)
+ del = *args[0].data.str.area;
+
+ temp = get_trash_chunk();
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ struct ist n;
+
+ if (type == HTX_BLK_EOH)
+ break;
+ if (type != HTX_BLK_HDR)
+ continue;
+ n = htx_get_blk_name(htx, blk);
+
+ if (temp->data)
+ temp->area[temp->data++] = del;
+ chunk_istcat(temp, n);
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *temp;
+ smp->flags = SMP_F_VOL_HDR;
+ return 1;
+}
+
+/* Fetch an HTTP header. A pointer to the beginning of the value is returned.
+ * Accepts an optional argument of type string containing the header field name,
+ * and an optional argument of type signed or unsigned integer to request an
+ * explicit occurrence of the header. Note that in the event of a missing name,
+ * headers are considered from the first one.
+ */
+static int smp_fetch_hdr(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.hdr / hdr, res.hdr / shdr */
+ struct channel *chn = ((kw[0] == 'h' || kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[0] == 's' || kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct http_hdr_ctx *ctx = smp->ctx.a[0];
+ struct ist name;
+ int occ = 0;
+
+ if (!ctx) {
+ /* first call */
+ ctx = &static_http_hdr_ctx;
+ ctx->blk = NULL;
+ smp->ctx.a[0] = ctx;
+ }
+
+ if (args[0].type != ARGT_STR)
+ return 0;
+ name = ist2(args[0].data.str.area, args[0].data.str.data);
+
+ if (args[1].type == ARGT_SINT)
+ occ = args[1].data.sint;
+
+ if (!htx)
+ return 0;
+
+ if (ctx && !(smp->flags & SMP_F_NOT_LAST))
+ /* search for header from the beginning */
+ ctx->blk = NULL;
+
+ if (!occ && !(smp->opt & SMP_OPT_ITERATE))
+ /* no explicit occurrence and single fetch => last header by default */
+ occ = -1;
+
+ if (!occ)
+ /* prepare to report multiple occurrences for ACL fetches */
+ smp->flags |= SMP_F_NOT_LAST;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_VOL_HDR | SMP_F_CONST;
+ if (http_get_htx_hdr(htx, name, occ, ctx, &smp->data.u.str.area, &smp->data.u.str.data))
+ return 1;
+
+ smp->flags &= ~SMP_F_NOT_LAST;
+ return 0;
+}
+
+/* Same than smp_fetch_hdr() but only relies on the sample direction to choose
+ * the right channel. So instead of duplicating the code, we just change the
+ * keyword and then fallback on smp_fetch_hdr().
+ */
+static int smp_fetch_chn_hdr(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ kw = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_REQ ? "req.hdr" : "res.hdr");
+ return smp_fetch_hdr(args, smp, kw, private);
+}
+
+/* 6. Check on HTTP header count. The number of occurrences is returned.
+ * Accepts exactly 1 argument of type string.
+ */
+static int smp_fetch_hdr_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.hdr_cnt / hdr_cnt, res.hdr_cnt / shdr_cnt */
+ struct channel *chn = ((kw[0] == 'h' || kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[0] == 's' || kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct http_hdr_ctx ctx;
+ struct ist name;
+ int cnt;
+
+ if (!htx)
+ return 0;
+
+ if (args->type == ARGT_STR) {
+ name = ist2(args->data.str.area, args->data.str.data);
+ } else {
+ name = IST_NULL;
+ }
+
+ ctx.blk = NULL;
+ cnt = 0;
+ while (http_find_header(htx, name, &ctx, 0))
+ cnt++;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = cnt;
+ smp->flags = SMP_F_VOL_HDR;
+ return 1;
+}
+
+/* Fetch an HTTP header's integer value. The integer value is returned. It
+ * takes a mandatory argument of type string and an optional one of type int
+ * to designate a specific occurrence. It returns an unsigned integer, which
+ * may or may not be appropriate for everything.
+ */
+static int smp_fetch_hdr_val(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int ret = smp_fetch_hdr(args, smp, kw, private);
+
+ if (ret > 0) {
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = strl2ic(smp->data.u.str.area,
+ smp->data.u.str.data);
+ }
+
+ return ret;
+}
+
+/* Fetch an HTTP header's IP value. takes a mandatory argument of type string
+ * and an optional one of type int to designate a specific occurrence.
+ * It returns an IPv4 or IPv6 address. Addresses surrounded by invalid chars
+ * are rejected. However IPv4 addresses may be followed with a colon and a
+ * valid port number.
+ */
+static int smp_fetch_hdr_ip(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct buffer *temp = get_trash_chunk();
+ int ret, len;
+ int port;
+
+ while ((ret = smp_fetch_hdr(args, smp, kw, private)) > 0) {
+ if (smp->data.u.str.data < temp->size - 1) {
+ memcpy(temp->area, smp->data.u.str.area,
+ smp->data.u.str.data);
+ temp->area[smp->data.u.str.data] = '\0';
+ len = url2ipv4((char *) temp->area, &smp->data.u.ipv4);
+ if (len > 0 && len == smp->data.u.str.data) {
+ /* plain IPv4 address */
+ smp->data.type = SMP_T_IPV4;
+ break;
+ } else if (len > 0 && temp->area[len] == ':' &&
+ strl2irc(temp->area + len + 1, smp->data.u.str.data - len - 1, &port) == 0 &&
+ port >= 0 && port <= 65535) {
+ /* IPv4 address suffixed with ':' followed by a valid port number */
+ smp->data.type = SMP_T_IPV4;
+ break;
+ } else if (inet_pton(AF_INET6, temp->area, &smp->data.u.ipv6)) {
+ smp->data.type = SMP_T_IPV6;
+ break;
+ }
+ }
+
+ /* if the header doesn't match an IP address, fetch next one */
+ if (!(smp->flags & SMP_F_NOT_LAST))
+ return 0;
+ }
+ return ret;
+}
+
+/* 8. Check on URI PATH. A pointer to the PATH is stored. The path starts at the
+ * first '/' after the possible hostname. It ends before the possible '?' except
+ * for 'pathq' keyword.
+ */
+static int smp_fetch_path(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+ struct ist path;
+ struct http_uri_parser parser;
+
+ if (!htx)
+ return 0;
+
+ sl = http_get_stline(htx);
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+
+ if (kw[4] == 'q' && (kw[0] == 'p' || kw[0] == 'b')) // pathq or baseq
+ path = http_parse_path(&parser);
+ else
+ path = iststop(http_parse_path(&parser), '?');
+
+ if (!isttest(path))
+ return 0;
+
+ /* OK, we got the '/' ! */
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = path.ptr;
+ smp->data.u.str.data = path.len;
+ smp->flags = SMP_F_VOL_1ST | SMP_F_CONST;
+ return 1;
+}
+
+/* This produces a concatenation of the first occurrence of the Host header
+ * followed by the path component if it begins with a slash ('/'). This means
+ * that '*' will not be added, resulting in exactly the first Host entry.
+ * If no Host header is found, then the path is returned as-is. The returned
+ * value is stored in the trash so it does not need to be marked constant.
+ * The returned sample is of type string.
+ */
+static int smp_fetch_base(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+ struct buffer *temp;
+ struct http_hdr_ctx ctx;
+ struct ist path;
+ struct http_uri_parser parser;
+
+ if (!htx)
+ return 0;
+
+ ctx.blk = NULL;
+ if (!http_find_header(htx, ist("Host"), &ctx, 0) || !ctx.value.len)
+ return smp_fetch_path(args, smp, kw, private);
+
+ /* OK we have the header value in ctx.value */
+ temp = get_trash_chunk();
+ chunk_istcat(temp, ctx.value);
+
+ /* now retrieve the path */
+ sl = http_get_stline(htx);
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
+ if (isttest(path)) {
+ size_t len;
+
+ if (kw[4] == 'q' && kw[0] == 'b') { // baseq
+ len = path.len;
+ } else {
+ for (len = 0; len < path.len && *(path.ptr + len) != '?'; len++)
+ ;
+ }
+
+ if (len && *(path.ptr) == '/')
+ chunk_memcat(temp, path.ptr, len);
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *temp;
+ smp->flags = SMP_F_VOL_1ST;
+ return 1;
+}
+
+/* This produces a 32-bit hash of the concatenation of the first occurrence of
+ * the Host header followed by the path component if it begins with a slash ('/').
+ * This means that '*' will not be added, resulting in exactly the first Host
+ * entry. If no Host header is found, then the path is used. The resulting value
+ * is hashed using the path hash followed by a full avalanche hash and provides a
+ * 32-bit integer value. This fetch is useful for tracking per-path activity on
+ * high-traffic sites without having to store whole paths.
+ */
+static int smp_fetch_base32(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+ struct http_hdr_ctx ctx;
+ struct ist path;
+ unsigned int hash = 0;
+ struct http_uri_parser parser;
+
+ if (!htx)
+ return 0;
+
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("Host"), &ctx, 0)) {
+ /* OK we have the header value in ctx.value */
+ while (ctx.value.len--)
+ hash = *(ctx.value.ptr++) + (hash << 6) + (hash << 16) - hash;
+ }
+
+ /* now retrieve the path */
+ sl = http_get_stline(htx);
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
+ if (isttest(path)) {
+ size_t len;
+
+ for (len = 0; len < path.len && *(path.ptr + len) != '?'; len++)
+ ;
+
+ if (len && *(path.ptr) == '/') {
+ while (len--)
+ hash = *(path.ptr++) + (hash << 6) + (hash << 16) - hash;
+ }
+ }
+
+ hash = full_hash(hash);
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = hash;
+ smp->flags = SMP_F_VOL_1ST;
+ return 1;
+}
+
+/* This concatenates the source address with the 32-bit hash of the Host and
+ * path as returned by smp_fetch_base32(). The idea is to have per-source and
+ * per-path counters. The result is a binary block from 8 to 20 bytes depending
+ * on the source address length. The path hash is stored before the address so
+ * that in environments where IPv6 is insignificant, truncating the output to
+ * 8 bytes would still work.
+ */
+static int smp_fetch_base32_src(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ const struct sockaddr_storage *src = (smp->strm ? sc_src(smp->strm->scf) : NULL);
+ struct buffer *temp;
+
+ if (!src)
+ return 0;
+
+ if (!smp_fetch_base32(args, smp, kw, private))
+ return 0;
+
+ temp = get_trash_chunk();
+ *(unsigned int *) temp->area = htonl(smp->data.u.sint);
+ temp->data += sizeof(unsigned int);
+
+ switch (src->ss_family) {
+ case AF_INET:
+ memcpy(temp->area + temp->data,
+ &((struct sockaddr_in *)src)->sin_addr,
+ 4);
+ temp->data += 4;
+ break;
+ case AF_INET6:
+ memcpy(temp->area + temp->data,
+ &((struct sockaddr_in6 *)src)->sin6_addr,
+ 16);
+ temp->data += 16;
+ break;
+ default:
+ return 0;
+ }
+
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+/* Extracts the query string, which comes after the question mark '?'. If no
+ * question mark is found, nothing is returned. Otherwise it returns a sample
+ * of type string carrying the whole query string.
+ */
+static int smp_fetch_query(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+ char *ptr, *end;
+
+ if (!htx)
+ return 0;
+
+ sl = http_get_stline(htx);
+ ptr = HTX_SL_REQ_UPTR(sl);
+ end = HTX_SL_REQ_UPTR(sl) + HTX_SL_REQ_ULEN(sl);
+
+ /* look up the '?' */
+ do {
+ if (ptr == end)
+ return 0;
+ } while (*ptr++ != '?');
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = ptr;
+ smp->data.u.str.data = end - ptr;
+ smp->flags = SMP_F_VOL_1ST | SMP_F_CONST;
+ return 1;
+}
+
+static int smp_fetch_proto_http(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 0);
+
+ if (!htx)
+ return 0;
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 1;
+ return 1;
+}
+
+/* return a valid test if the current request is the first one on the connection */
+static int smp_fetch_http_first_req(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = !(smp->strm->txn->flags & TX_NOT_FIRST);
+ return 1;
+}
+
+/* Fetch the authentication method if there is an Authorization header. It
+ * relies on get_http_auth()
+ */
+static int smp_fetch_http_auth_type(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct http_txn *txn;
+
+ if (!htx)
+ return 0;
+
+ txn = smp->strm->txn;
+ if (!get_http_auth(smp, htx))
+ return 0;
+
+ switch (txn->auth.method) {
+ case HTTP_AUTH_BASIC:
+ smp->data.u.str.area = "Basic";
+ smp->data.u.str.data = 5;
+ break;
+ case HTTP_AUTH_DIGEST:
+ /* Unexpected because not supported */
+ smp->data.u.str.area = "Digest";
+ smp->data.u.str.data = 6;
+ break;
+ case HTTP_AUTH_BEARER:
+ smp->data.u.str.area = "Bearer";
+ smp->data.u.str.data = 6;
+ break;
+ default:
+ return 0;
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ return 1;
+}
+
+/* Fetch the user supplied if there is an Authorization header. It relies on
+ * get_http_auth()
+ */
+static int smp_fetch_http_auth_user(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct http_txn *txn;
+
+ if (!htx)
+ return 0;
+
+ txn = smp->strm->txn;
+ if (!get_http_auth(smp, htx) || txn->auth.method != HTTP_AUTH_BASIC)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = txn->auth.user;
+ smp->data.u.str.data = strlen(txn->auth.user);
+ smp->flags = SMP_F_CONST;
+ return 1;
+}
+
+/* Fetch the password supplied if there is an Authorization header. It relies on
+ * get_http_auth()
+ */
+static int smp_fetch_http_auth_pass(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct http_txn *txn;
+
+ if (!htx)
+ return 0;
+
+ txn = smp->strm->txn;
+ if (!get_http_auth(smp, htx) || txn->auth.method != HTTP_AUTH_BASIC)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = txn->auth.pass;
+ smp->data.u.str.data = strlen(txn->auth.pass);
+ smp->flags = SMP_F_CONST;
+ return 1;
+}
+
+static int smp_fetch_http_auth_bearer(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct http_txn *txn;
+ struct buffer bearer_val = {};
+
+ if (!htx)
+ return 0;
+
+ if (args->type == ARGT_STR) {
+ struct http_hdr_ctx ctx;
+ struct ist hdr_name = ist2(args->data.str.area, args->data.str.data);
+
+ ctx.blk = NULL;
+ if (http_find_header(htx, hdr_name, &ctx, 0)) {
+ struct ist type = istsplit(&ctx.value, ' ');
+
+ /* There must be "at least" one space character between
+ * the scheme and the following value so ctx.value might
+ * still have leading spaces here (see RFC7235).
+ */
+ ctx.value = istskip(ctx.value, ' ');
+
+ if (isteqi(type, ist("Bearer")) && istlen(ctx.value))
+ chunk_initlen(&bearer_val, istptr(ctx.value), 0, istlen(ctx.value));
+ }
+ }
+ else {
+ txn = smp->strm->txn;
+ if (!get_http_auth(smp, htx) || txn->auth.method != HTTP_AUTH_BEARER)
+ return 0;
+
+ bearer_val = txn->auth.method_data;
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = bearer_val;
+ smp->flags = SMP_F_CONST;
+ return 1;
+}
+
+/* Accepts exactly 1 argument of type userlist */
+static int smp_fetch_http_auth(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+
+ if (args->type != ARGT_USR)
+ return 0;
+
+ if (!htx)
+ return 0;
+ if (!get_http_auth(smp, htx) || smp->strm->txn->auth.method != HTTP_AUTH_BASIC)
+ return 0;
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = check_user(args->data.usr, smp->strm->txn->auth.user,
+ smp->strm->txn->auth.pass);
+ return 1;
+}
+
+/* Accepts exactly 1 argument of type userlist */
+static int smp_fetch_http_auth_grp(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+
+ if (args->type != ARGT_USR)
+ return 0;
+
+ if (!htx)
+ return 0;
+ if (!get_http_auth(smp, htx) || smp->strm->txn->auth.method != HTTP_AUTH_BASIC)
+ return 0;
+
+ /* if the user does not belong to the userlist or has a wrong password,
+ * report that it unconditionally does not match. Otherwise we return
+ * a string containing the username.
+ */
+ if (!check_user(args->data.usr, smp->strm->txn->auth.user,
+ smp->strm->txn->auth.pass))
+ return 0;
+
+ /* pat_match_auth() will need the user list */
+ smp->ctx.a[0] = args->data.usr;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.area = smp->strm->txn->auth.user;
+ smp->data.u.str.data = strlen(smp->strm->txn->auth.user);
+
+ return 1;
+}
+
+/* Fetch a captured HTTP request header. The index is the position of
+ * the "capture" option in the configuration file
+ */
+static int smp_fetch_capture_req_hdr(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *fe;
+ int idx;
+
+ if (args->type != ARGT_SINT)
+ return 0;
+
+ if (!smp->strm)
+ return 0;
+
+ fe = strm_fe(smp->strm);
+ idx = args->data.sint;
+
+ if (idx > (fe->nb_req_cap - 1) || smp->strm->req_cap == NULL || smp->strm->req_cap[idx] == NULL)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.str.area = smp->strm->req_cap[idx];
+ smp->data.u.str.data = strlen(smp->strm->req_cap[idx]);
+
+ return 1;
+}
+
+/* Fetch a captured HTTP response header. The index is the position of
+ * the "capture" option in the configuration file
+ */
+static int smp_fetch_capture_res_hdr(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *fe;
+ int idx;
+
+ if (args->type != ARGT_SINT)
+ return 0;
+
+ if (!smp->strm)
+ return 0;
+
+ fe = strm_fe(smp->strm);
+ idx = args->data.sint;
+
+ if (idx > (fe->nb_rsp_cap - 1) || smp->strm->res_cap == NULL || smp->strm->res_cap[idx] == NULL)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.str.area = smp->strm->res_cap[idx];
+ smp->data.u.str.data = strlen(smp->strm->res_cap[idx]);
+
+ return 1;
+}
+
+/* Extracts the METHOD in the HTTP request, the txn->uri should be filled before the call */
+static int smp_fetch_capture_req_method(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct buffer *temp;
+ struct http_txn *txn;
+ char *ptr;
+
+ if (!smp->strm)
+ return 0;
+
+ txn = smp->strm->txn;
+ if (!txn || !txn->uri)
+ return 0;
+
+ ptr = txn->uri;
+
+ while (*ptr != ' ' && *ptr != '\0') /* find first space */
+ ptr++;
+
+ temp = get_trash_chunk();
+ temp->area = txn->uri;
+ temp->data = ptr - txn->uri;
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+
+ return 1;
+
+}
+
+/* Extracts the path in the HTTP request, the txn->uri should be filled before the call */
+static int smp_fetch_capture_req_uri(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct http_txn *txn;
+ struct ist path;
+ const char *ptr;
+ struct http_uri_parser parser;
+
+ if (!smp->strm)
+ return 0;
+
+ txn = smp->strm->txn;
+ if (!txn || !txn->uri)
+ return 0;
+
+ ptr = txn->uri;
+
+ while (*ptr != ' ' && *ptr != '\0') /* find first space */
+ ptr++;
+
+ if (!*ptr)
+ return 0;
+
+ /* skip the first space and find space after URI */
+ path = ist2(++ptr, 0);
+ while (*ptr != ' ' && *ptr != '\0')
+ ptr++;
+ path.len = ptr - path.ptr;
+
+ parser = http_uri_parser_init(path);
+ path = http_parse_path(&parser);
+ if (!isttest(path))
+ return 0;
+
+ smp->data.u.str.area = path.ptr;
+ smp->data.u.str.data = path.len;
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+
+ return 1;
+}
+
+/* Retrieves the HTTP version from the request (either 1.0 or 1.1) and emits it
+ * as a string (either "HTTP/1.0" or "HTTP/1.1").
+ */
+static int smp_fetch_capture_req_ver(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct http_txn *txn;
+
+ if (!smp->strm)
+ return 0;
+
+ txn = smp->strm->txn;
+ if (!txn || txn->req.msg_state < HTTP_MSG_BODY)
+ return 0;
+
+ if (txn->req.flags & HTTP_MSGF_VER_11)
+ smp->data.u.str.area = "HTTP/1.1";
+ else
+ smp->data.u.str.area = "HTTP/1.0";
+
+ smp->data.u.str.data = 8;
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ return 1;
+
+}
+
+/* Retrieves the HTTP version from the response (either 1.0 or 1.1) and emits it
+ * as a string (either "HTTP/1.0" or "HTTP/1.1").
+ */
+static int smp_fetch_capture_res_ver(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct http_txn *txn;
+
+ if (!smp->strm)
+ return 0;
+
+ txn = smp->strm->txn;
+ if (!txn || txn->rsp.msg_state < HTTP_MSG_BODY)
+ return 0;
+
+ if (txn->rsp.flags & HTTP_MSGF_VER_11)
+ smp->data.u.str.area = "HTTP/1.1";
+ else
+ smp->data.u.str.area = "HTTP/1.0";
+
+ smp->data.u.str.data = 8;
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ return 1;
+
+}
+
+/* Iterate over all cookies present in a message. The context is stored in
+ * smp->ctx.a[0] for the in-header position, smp->ctx.a[1] for the
+ * end-of-header-value, and smp->ctx.a[2] for the hdr_ctx. Depending on
+ * the direction, multiple cookies may be parsed on the same line or not.
+ * If provided, the searched cookie name is in args, in args->data.str. If
+ * the input options indicate that no iterating is desired, then only last
+ * value is fetched if any. If no cookie name is provided, the first cookie
+ * value found is fetched. The returned sample is of type CSTR. Can be used
+ * to parse cookies in other files.
+ */
+static int smp_fetch_cookie(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.cookie / cookie / cook, res.cookie / scook / set-cookie */
+ struct channel *chn = ((kw[0] == 'c' || kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[0] == 's' || kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct http_hdr_ctx *ctx = smp->ctx.a[2];
+ struct ist hdr;
+ char *cook = NULL;
+ size_t cook_l = 0;
+ int found = 0;
+
+ if (args->type == ARGT_STR) {
+ cook = args->data.str.area;
+ cook_l = args->data.str.data;
+ }
+
+ if (!ctx) {
+ /* first call */
+ ctx = &static_http_hdr_ctx;
+ ctx->blk = NULL;
+ smp->ctx.a[2] = ctx;
+ }
+
+ if (!htx)
+ return 0;
+
+ hdr = (!(check || (chn && chn->flags & CF_ISRESP)) ? ist("Cookie") : ist("Set-Cookie"));
+
+ /* OK so basically here, either we want only one value or we want to
+ * iterate over all of them and we fetch the next one. In this last case
+ * SMP_OPT_ITERATE option is set.
+ */
+
+ if (!(smp->flags & SMP_F_NOT_LAST)) {
+ /* search for the header from the beginning, we must first initialize
+ * the search parameters.
+ */
+ smp->ctx.a[0] = NULL;
+ ctx->blk = NULL;
+ }
+
+ smp->flags |= SMP_F_VOL_HDR;
+ while (1) {
+ /* Note: smp->ctx.a[0] == NULL every time we need to fetch a new header */
+ if (!smp->ctx.a[0]) {
+ if (!http_find_header(htx, hdr, ctx, 0))
+ goto out;
+
+ if (ctx->value.len < cook_l + 1)
+ continue;
+
+ smp->ctx.a[0] = ctx->value.ptr;
+ smp->ctx.a[1] = smp->ctx.a[0] + ctx->value.len;
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_CONST;
+ smp->ctx.a[0] = http_extract_cookie_value(smp->ctx.a[0], smp->ctx.a[1],
+ cook, cook_l,
+ (smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_REQ,
+ &smp->data.u.str.area,
+ &smp->data.u.str.data);
+ if (smp->ctx.a[0]) {
+ found = 1;
+ if (smp->opt & SMP_OPT_ITERATE) {
+ /* iterate on cookie value */
+ smp->flags |= SMP_F_NOT_LAST;
+ return 1;
+ }
+ if (args->data.str.data == 0) {
+ /* No cookie name, first occurrence returned */
+ break;
+ }
+ }
+ /* if we're looking for last occurrence, let's loop */
+ }
+
+ /* all cookie headers and values were scanned. If we're looking for the
+ * last occurrence, we may return it now.
+ */
+ out:
+ smp->flags &= ~SMP_F_NOT_LAST;
+ return found;
+}
+
+/* Same than smp_fetch_cookie() but only relies on the sample direction to
+ * choose the right channel. So instead of duplicating the code, we just change
+ * the keyword and then fallback on smp_fetch_cookie().
+ */
+static int smp_fetch_chn_cookie(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ kw = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_REQ ? "req.cook" : "res.cook");
+ return smp_fetch_cookie(args, smp, kw, private);
+}
+
+/* Iterate over all cookies present in a request to count how many occurrences
+ * match the name in args and args->data.str.len. If <multi> is non-null, then
+ * multiple cookies may be parsed on the same line. The returned sample is of
+ * type UINT. Accepts exactly 1 argument of type string.
+ */
+static int smp_fetch_cookie_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.cook_cnt / cook_cnt, res.cook_cnt / scook_cnt */
+ struct channel *chn = ((kw[0] == 'c' || kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[0] == 's' || kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct http_hdr_ctx ctx;
+ struct ist hdr;
+ char *val_beg, *val_end;
+ char *cook = NULL;
+ size_t cook_l = 0;
+ int cnt;
+
+ if (args->type == ARGT_STR){
+ cook = args->data.str.area;
+ cook_l = args->data.str.data;
+ }
+
+ if (!htx)
+ return 0;
+
+ hdr = (!(check || (chn && chn->flags & CF_ISRESP)) ? ist("Cookie") : ist("Set-Cookie"));
+
+ val_end = val_beg = NULL;
+ ctx.blk = NULL;
+ cnt = 0;
+ while (1) {
+ /* Note: val_beg == NULL every time we need to fetch a new header */
+ if (!val_beg) {
+ if (!http_find_header(htx, hdr, &ctx, 0))
+ break;
+
+ if (ctx.value.len < cook_l + 1)
+ continue;
+
+ val_beg = ctx.value.ptr;
+ val_end = val_beg + ctx.value.len;
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_CONST;
+ while ((val_beg = http_extract_cookie_value(val_beg, val_end,
+ cook, cook_l,
+ (smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_REQ,
+ &smp->data.u.str.area,
+ &smp->data.u.str.data))) {
+ cnt++;
+ }
+ }
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = cnt;
+ smp->flags |= SMP_F_VOL_HDR;
+ return 1;
+}
+
+/* Fetch an cookie's integer value. The integer value is returned. It
+ * takes a mandatory argument of type string. It relies on smp_fetch_cookie().
+ */
+static int smp_fetch_cookie_val(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int ret = smp_fetch_cookie(args, smp, kw, private);
+
+ if (ret > 0) {
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = strl2ic(smp->data.u.str.area,
+ smp->data.u.str.data);
+ }
+
+ return ret;
+}
+
+/************************************************************************/
+/* The code below is dedicated to sample fetches */
+/************************************************************************/
+
+/* This scans a URL-encoded query string. It takes an optionally wrapping
+ * string whose first contiguous chunk has its beginning in ctx->a[0] and end
+ * in ctx->a[1], and the optional second part in (ctx->a[2]..ctx->a[3]). The
+ * pointers are updated for next iteration before leaving.
+ */
+static int smp_fetch_param(char delim, const char *name, int name_len, const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ const char *vstart, *vend;
+ struct buffer *temp;
+ const char **chunks = (const char **)smp->ctx.a;
+
+ if (!http_find_next_url_param(chunks, name, name_len,
+ &vstart, &vend, delim))
+ return 0;
+
+ /* Create sample. If the value is contiguous, return the pointer as CONST,
+ * if the value is wrapped, copy-it in a buffer.
+ */
+ smp->data.type = SMP_T_STR;
+ if (chunks[2] &&
+ vstart >= chunks[0] && vstart <= chunks[1] &&
+ vend >= chunks[2] && vend <= chunks[3]) {
+ /* Wrapped case. */
+ temp = get_trash_chunk();
+ memcpy(temp->area, vstart, chunks[1] - vstart);
+ memcpy(temp->area + ( chunks[1] - vstart ), chunks[2],
+ vend - chunks[2]);
+ smp->data.u.str.area = temp->area;
+ smp->data.u.str.data = ( chunks[1] - vstart ) + ( vend - chunks[2] );
+ } else {
+ /* Contiguous case. */
+ smp->data.u.str.area = (char *)vstart;
+ smp->data.u.str.data = vend - vstart;
+ smp->flags = SMP_F_VOL_1ST | SMP_F_CONST;
+ }
+
+ /* Update context, check wrapping. */
+ chunks[0] = vend;
+ if (chunks[2] && vend >= chunks[2] && vend <= chunks[3]) {
+ chunks[1] = chunks[3];
+ chunks[2] = NULL;
+ }
+
+ if (chunks[0] < chunks[1])
+ smp->flags |= SMP_F_NOT_LAST;
+
+ return 1;
+}
+
+/* This function iterates over each parameter of the query string. It uses
+ * ctx->a[0] and ctx->a[1] to store the beginning and end of the current
+ * parameter. Since it uses smp_fetch_param(), ctx->a[2..3] are both NULL.
+ * An optional parameter name is passed in args[0], otherwise any parameter is
+ * considered. It supports an optional delimiter argument for the beginning of
+ * the string in args[1], which defaults to "?".
+ */
+static int smp_fetch_url_param(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ char delim = '?';
+ const char *name;
+ int name_len;
+
+ if ((args[0].type && args[0].type != ARGT_STR) ||
+ (args[1].type && args[1].type != ARGT_STR))
+ return 0;
+
+ name = "";
+ name_len = 0;
+ if (args->type == ARGT_STR) {
+ name = args->data.str.area;
+ name_len = args->data.str.data;
+ }
+
+ if (args[1].type)
+ delim = *args[1].data.str.area;
+
+ if (!smp->ctx.a[0]) { // first call, find the query string
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+
+ if (!htx)
+ return 0;
+
+ sl = http_get_stline(htx);
+ smp->ctx.a[0] = http_find_param_list(HTX_SL_REQ_UPTR(sl), HTX_SL_REQ_ULEN(sl), delim);
+ if (!smp->ctx.a[0])
+ return 0;
+
+ smp->ctx.a[1] = HTX_SL_REQ_UPTR(sl) + HTX_SL_REQ_ULEN(sl);
+
+ /* Assume that the context is filled with NULL pointer
+ * before the first call.
+ * smp->ctx.a[2] = NULL;
+ * smp->ctx.a[3] = NULL;
+ */
+ }
+
+ return smp_fetch_param(delim, name, name_len, args, smp, kw, private);
+}
+
+/* This function iterates over each parameter of the body. This requires
+ * that the body has been waited for using http-buffer-request. It uses
+ * ctx->a[0] and ctx->a[1] to store the beginning and end of the first
+ * contiguous part of the body, and optionally ctx->a[2..3] to reference the
+ * optional second part if the body wraps at the end of the buffer. An optional
+ * parameter name is passed in args[0], otherwise any parameter is considered.
+ */
+static int smp_fetch_body_param(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ const char *name;
+ int name_len;
+
+ if (args[0].type && args[0].type != ARGT_STR)
+ return 0;
+
+ name = "";
+ name_len = 0;
+ if (args[0].type == ARGT_STR) {
+ name = args[0].data.str.area;
+ name_len = args[0].data.str.data;
+ }
+
+ if (!smp->ctx.a[0]) { // first call, find the query string
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct buffer *temp;
+ int32_t pos;
+
+ if (!htx)
+ return 0;
+
+ temp = get_trash_chunk();
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA) {
+ if (!h1_format_htx_data(htx_get_blk_value(htx, blk), temp, 0))
+ return 0;
+ }
+ }
+
+ smp->ctx.a[0] = temp->area;
+ smp->ctx.a[1] = temp->area + temp->data;
+
+ /* Assume that the context is filled with NULL pointer
+ * before the first call.
+ * smp->ctx.a[2] = NULL;
+ * smp->ctx.a[3] = NULL;
+ */
+
+ }
+
+ return smp_fetch_param('&', name, name_len, args, smp, kw, private);
+}
+
+/* Return the signed integer value for the specified url parameter (see url_param
+ * above).
+ */
+static int smp_fetch_url_param_val(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int ret = smp_fetch_url_param(args, smp, kw, private);
+
+ if (ret > 0) {
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = strl2ic(smp->data.u.str.area,
+ smp->data.u.str.data);
+ }
+
+ return ret;
+}
+
+/* This produces a 32-bit hash of the concatenation of the first occurrence of
+ * the Host header followed by the path component if it begins with a slash ('/').
+ * This means that '*' will not be added, resulting in exactly the first Host
+ * entry. If no Host header is found, then the path is used. The resulting value
+ * is hashed using the url hash followed by a full avalanche hash and provides a
+ * 32-bit integer value. This fetch is useful for tracking per-URL activity on
+ * high-traffic sites without having to store whole paths.
+ * this differs from the base32 functions in that it includes the url parameters
+ * as well as the path
+ */
+static int smp_fetch_url32(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct http_hdr_ctx ctx;
+ struct htx_sl *sl;
+ struct ist path;
+ unsigned int hash = 0;
+ struct http_uri_parser parser;
+
+ if (!htx)
+ return 0;
+
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("Host"), &ctx, 1)) {
+ /* OK we have the header value in ctx.value */
+ while (ctx.value.len--)
+ hash = *(ctx.value.ptr++) + (hash << 6) + (hash << 16) - hash;
+ }
+
+ /* now retrieve the path */
+ sl = http_get_stline(htx);
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
+ if (path.len && *(path.ptr) == '/') {
+ while (path.len--)
+ hash = *(path.ptr++) + (hash << 6) + (hash << 16) - hash;
+ }
+
+ hash = full_hash(hash);
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = hash;
+ smp->flags = SMP_F_VOL_1ST;
+ return 1;
+}
+
+/* This concatenates the source address with the 32-bit hash of the Host and
+ * URL as returned by smp_fetch_base32(). The idea is to have per-source and
+ * per-url counters. The result is a binary block from 8 to 20 bytes depending
+ * on the source address length. The URL hash is stored before the address so
+ * that in environments where IPv6 is insignificant, truncating the output to
+ * 8 bytes would still work.
+ */
+static int smp_fetch_url32_src(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ const struct sockaddr_storage *src = (smp->strm ? sc_src(smp->strm->scf) : NULL);
+ struct buffer *temp;
+
+ if (!src)
+ return 0;
+
+ if (!smp_fetch_url32(args, smp, kw, private))
+ return 0;
+
+ temp = get_trash_chunk();
+ *(unsigned int *) temp->area = htonl(smp->data.u.sint);
+ temp->data += sizeof(unsigned int);
+
+ switch (src->ss_family) {
+ case AF_INET:
+ memcpy(temp->area + temp->data,
+ &((struct sockaddr_in *)src)->sin_addr,
+ 4);
+ temp->data += 4;
+ break;
+ case AF_INET6:
+ memcpy(temp->area + temp->data,
+ &((struct sockaddr_in6 *)src)->sin6_addr,
+ 16);
+ temp->data += 16;
+ break;
+ default:
+ return 0;
+ }
+
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+/************************************************************************/
+/* Other utility functions */
+/************************************************************************/
+
+/* This function is used to validate the arguments passed to any "hdr" fetch
+ * keyword. These keywords support an optional positive or negative occurrence
+ * number. We must ensure that the number is greater than -MAX_HDR_HISTORY. It
+ * is assumed that the types are already the correct ones. Returns 0 on error,
+ * non-zero if OK. If <err> is not NULL, it will be filled with a pointer to an
+ * error message in case of error, that the caller is responsible for freeing.
+ * The initial location must either be freeable or NULL.
+ * Note: this function's pointer is checked from Lua.
+ */
+int val_hdr(struct arg *arg, char **err_msg)
+{
+ if (arg && arg[1].type == ARGT_SINT && arg[1].data.sint < -MAX_HDR_HISTORY) {
+ memprintf(err_msg, "header occurrence must be >= %d", -MAX_HDR_HISTORY);
+ return 0;
+ }
+ return 1;
+}
+
+/************************************************************************/
+/* All supported sample fetch keywords must be declared here. */
+/************************************************************************/
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "base", smp_fetch_base, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "base32", smp_fetch_base32, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "base32+src", smp_fetch_base32_src, 0, NULL, SMP_T_BIN, SMP_USE_HRQHV },
+ { "baseq", smp_fetch_base, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+
+ /* capture are allocated and are permanent in the stream */
+ { "capture.req.hdr", smp_fetch_capture_req_hdr, ARG1(1,SINT), NULL, SMP_T_STR, SMP_USE_HRQHP },
+
+ /* retrieve these captures from the HTTP logs */
+ { "capture.req.method", smp_fetch_capture_req_method, 0, NULL, SMP_T_STR, SMP_USE_HRQHP },
+ { "capture.req.uri", smp_fetch_capture_req_uri, 0, NULL, SMP_T_STR, SMP_USE_HRQHP },
+ { "capture.req.ver", smp_fetch_capture_req_ver, 0, NULL, SMP_T_STR, SMP_USE_HRQHP },
+
+ { "capture.res.hdr", smp_fetch_capture_res_hdr, ARG1(1,SINT), NULL, SMP_T_STR, SMP_USE_HRSHP },
+ { "capture.res.ver", smp_fetch_capture_res_ver, 0, NULL, SMP_T_STR, SMP_USE_HRQHP },
+
+ /* cookie is valid in both directions (eg: for "stick ...") but cook*
+ * are only here to match the ACL's name, are request-only and are used
+ * for ACL compatibility only.
+ */
+ { "cook", smp_fetch_cookie, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "cookie", smp_fetch_chn_cookie, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRQHV|SMP_USE_HRSHV },
+ { "cook_cnt", smp_fetch_cookie_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "cook_val", smp_fetch_cookie_val, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+
+ /* hdr is valid in both directions (eg: for "stick ...") but hdr_* are
+ * only here to match the ACL's name, are request-only and are used for
+ * ACL compatibility only.
+ */
+ { "hdr", smp_fetch_chn_hdr, ARG2(0,STR,SINT), val_hdr, SMP_T_STR, SMP_USE_HRQHV|SMP_USE_HRSHV },
+ { "hdr_cnt", smp_fetch_hdr_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "hdr_ip", smp_fetch_hdr_ip, ARG2(0,STR,SINT), val_hdr, SMP_T_IPV4, SMP_USE_HRQHV },
+ { "hdr_val", smp_fetch_hdr_val, ARG2(0,STR,SINT), val_hdr, SMP_T_SINT, SMP_USE_HRQHV },
+
+ { "http_auth_type", smp_fetch_http_auth_type, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "http_auth_user", smp_fetch_http_auth_user, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "http_auth_pass", smp_fetch_http_auth_pass, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "http_auth_bearer", smp_fetch_http_auth_bearer, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "http_auth", smp_fetch_http_auth, ARG1(1,USR), NULL, SMP_T_BOOL, SMP_USE_HRQHV },
+ { "http_auth_group", smp_fetch_http_auth_grp, ARG1(1,USR), NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "http_first_req", smp_fetch_http_first_req, 0, NULL, SMP_T_BOOL, SMP_USE_HRQHP },
+ { "method", smp_fetch_meth, 0, NULL, SMP_T_METH, SMP_USE_HRQHP },
+ { "path", smp_fetch_path, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "pathq", smp_fetch_path, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "query", smp_fetch_query, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+
+ /* HTTP protocol on the request path */
+ { "req.proto_http", smp_fetch_proto_http, 0, NULL, SMP_T_BOOL, SMP_USE_HRQHP },
+ { "req_proto_http", smp_fetch_proto_http, 0, NULL, SMP_T_BOOL, SMP_USE_HRQHP },
+
+ /* HTTP version on the request path */
+ { "req.ver", smp_fetch_rqver, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "req_ver", smp_fetch_rqver, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+
+ { "req.body", smp_fetch_body, 0, NULL, SMP_T_BIN, SMP_USE_HRQHV },
+ { "req.body_len", smp_fetch_body_len, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "req.body_size", smp_fetch_body_size, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "req.body_param", smp_fetch_body_param, ARG1(0,STR), NULL, SMP_T_BIN, SMP_USE_HRQHV },
+
+ { "req.hdrs", smp_fetch_hdrs, 0, NULL, SMP_T_BIN, SMP_USE_HRQHV },
+ { "req.hdrs_bin", smp_fetch_hdrs_bin, 0, NULL, SMP_T_BIN, SMP_USE_HRQHV },
+
+ /* HTTP version on the response path */
+ { "res.ver", smp_fetch_stver, 0, NULL, SMP_T_STR, SMP_USE_HRSHV },
+ { "resp_ver", smp_fetch_stver, 0, NULL, SMP_T_STR, SMP_USE_HRSHV },
+
+ { "res.body", smp_fetch_body, 0, NULL, SMP_T_BIN, SMP_USE_HRSHV },
+ { "res.body_len", smp_fetch_body_len, 0, NULL, SMP_T_SINT, SMP_USE_HRSHV },
+ { "res.body_size", smp_fetch_body_size, 0, NULL, SMP_T_SINT, SMP_USE_HRSHV },
+
+ { "res.hdrs", smp_fetch_hdrs, 0, NULL, SMP_T_BIN, SMP_USE_HRSHV },
+ { "res.hdrs_bin", smp_fetch_hdrs_bin, 0, NULL, SMP_T_BIN, SMP_USE_HRSHV },
+
+ /* explicit req.{cook,hdr} are used to force the fetch direction to be request-only */
+ { "req.cook", smp_fetch_cookie, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "req.cook_cnt", smp_fetch_cookie_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "req.cook_val", smp_fetch_cookie_val, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+
+ { "req.fhdr", smp_fetch_fhdr, ARG2(0,STR,SINT), val_hdr, SMP_T_STR, SMP_USE_HRQHV },
+ { "req.fhdr_cnt", smp_fetch_fhdr_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "req.hdr", smp_fetch_hdr, ARG2(0,STR,SINT), val_hdr, SMP_T_STR, SMP_USE_HRQHV },
+ { "req.hdr_cnt", smp_fetch_hdr_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "req.hdr_ip", smp_fetch_hdr_ip, ARG2(0,STR,SINT), val_hdr, SMP_T_IPV4, SMP_USE_HRQHV },
+ { "req.hdr_names", smp_fetch_hdr_names, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "req.hdr_val", smp_fetch_hdr_val, ARG2(0,STR,SINT), val_hdr, SMP_T_SINT, SMP_USE_HRQHV },
+
+ /* explicit req.{cook,hdr} are used to force the fetch direction to be response-only */
+ { "res.cook", smp_fetch_cookie, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRSHV },
+ { "res.cook_cnt", smp_fetch_cookie_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRSHV },
+ { "res.cook_val", smp_fetch_cookie_val, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRSHV },
+
+ { "res.fhdr", smp_fetch_fhdr, ARG2(0,STR,SINT), val_hdr, SMP_T_STR, SMP_USE_HRSHV },
+ { "res.fhdr_cnt", smp_fetch_fhdr_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRSHV },
+ { "res.hdr", smp_fetch_hdr, ARG2(0,STR,SINT), val_hdr, SMP_T_STR, SMP_USE_HRSHV },
+ { "res.hdr_cnt", smp_fetch_hdr_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRSHV },
+ { "res.hdr_ip", smp_fetch_hdr_ip, ARG2(0,STR,SINT), val_hdr, SMP_T_IPV4, SMP_USE_HRSHV },
+ { "res.hdr_names", smp_fetch_hdr_names, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRSHV },
+ { "res.hdr_val", smp_fetch_hdr_val, ARG2(0,STR,SINT), val_hdr, SMP_T_SINT, SMP_USE_HRSHV },
+
+ /* scook is valid only on the response and is used for ACL compatibility */
+ { "scook", smp_fetch_cookie, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRSHV },
+ { "scook_cnt", smp_fetch_cookie_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRSHV },
+ { "scook_val", smp_fetch_cookie_val, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRSHV },
+
+ /* shdr is valid only on the response and is used for ACL compatibility */
+ { "shdr", smp_fetch_hdr, ARG2(0,STR,SINT), val_hdr, SMP_T_STR, SMP_USE_HRSHV },
+ { "shdr_cnt", smp_fetch_hdr_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRSHV },
+ { "shdr_ip", smp_fetch_hdr_ip, ARG2(0,STR,SINT), val_hdr, SMP_T_IPV4, SMP_USE_HRSHV },
+ { "shdr_val", smp_fetch_hdr_val, ARG2(0,STR,SINT), val_hdr, SMP_T_SINT, SMP_USE_HRSHV },
+
+ { "status", smp_fetch_stcode, 0, NULL, SMP_T_SINT, SMP_USE_HRSHP },
+ { "unique-id", smp_fetch_uniqueid, 0, NULL, SMP_T_STR, SMP_SRC_L4SRV },
+ { "url", smp_fetch_url, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "url32", smp_fetch_url32, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "url32+src", smp_fetch_url32_src, 0, NULL, SMP_T_BIN, SMP_USE_HRQHV },
+ { "url_ip", smp_fetch_url_ip, 0, NULL, SMP_T_IPV4, SMP_USE_HRQHV },
+ { "url_port", smp_fetch_url_port, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "url_param", smp_fetch_url_param, ARG2(0,STR,STR), NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "urlp" , smp_fetch_url_param, ARG2(0,STR,STR), NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "urlp_val", smp_fetch_url_param_val, ARG2(0,STR,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/http_htx.c b/src/http_htx.c
new file mode 100644
index 0000000..6aa81c1
--- /dev/null
+++ b/src/http_htx.c
@@ -0,0 +1,2915 @@
+/*
+ * Functions to manipulate HTTP messages using the internal representation.
+ *
+ * Copyright (C) 2018 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/global.h>
+#include <haproxy/h1.h>
+#include <haproxy/http.h>
+#include <haproxy/http-hdr.h>
+#include <haproxy/http_fetch.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/log.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/tools.h>
+
+
+struct buffer http_err_chunks[HTTP_ERR_SIZE];
+struct http_reply http_err_replies[HTTP_ERR_SIZE];
+
+struct eb_root http_error_messages = EB_ROOT;
+struct list http_errors_list = LIST_HEAD_INIT(http_errors_list);
+struct list http_replies_list = LIST_HEAD_INIT(http_replies_list);
+
+/* The declaration of an errorfiles/errorfile directives. Used during config
+ * parsing only. */
+struct conf_errors {
+ char type; /* directive type (0: errorfiles, 1: errorfile) */
+ union {
+ struct {
+ int status; /* the status code associated to this error */
+ struct http_reply *reply; /* the http reply for the errorfile */
+ } errorfile; /* describe an "errorfile" directive */
+ struct {
+ char *name; /* the http-errors section name */
+ char status[HTTP_ERR_SIZE]; /* list of status to import (0: ignore, 1: implicit import, 2: explicit import) */
+ } errorfiles; /* describe an "errorfiles" directive */
+ } info;
+
+ char *file; /* file where the directive appears */
+ int line; /* line where the directive appears */
+
+ struct list list; /* next conf_errors */
+};
+
+/* Returns the next unporocessed start line in the HTX message. It returns NULL
+ * if the start-line is undefined (first == -1). Otherwise, it returns the
+ * pointer on the htx_sl structure.
+ */
+struct htx_sl *http_get_stline(const struct htx *htx)
+{
+ struct htx_blk *blk;
+
+ blk = htx_get_first_blk(htx);
+ if (!blk || (htx_get_blk_type(blk) != HTX_BLK_REQ_SL && htx_get_blk_type(blk) != HTX_BLK_RES_SL))
+ return NULL;
+ return htx_get_blk_ptr(htx, blk);
+}
+
+/* Returns the headers size in the HTX message */
+size_t http_get_hdrs_size(struct htx *htx)
+{
+ struct htx_blk *blk;
+ size_t sz = 0;
+
+ blk = htx_get_first_blk(htx);
+ if (!blk || htx_get_blk_type(blk) > HTX_BLK_EOH)
+ return sz;
+
+ for (; blk; blk = htx_get_next_blk(htx, blk)) {
+ sz += htx_get_blksz(blk);
+ if (htx_get_blk_type(blk) == HTX_BLK_EOH)
+ break;
+ }
+ return sz;
+}
+
+/* Finds the first or next occurrence of header matching <pattern> in the HTX
+ * message <htx> using the context <ctx>. This structure holds everything
+ * necessary to use the header and find next occurrence. If its <blk> member is
+ * NULL, the header is searched from the beginning. Otherwise, the next
+ * occurrence is returned. The function returns 1 when it finds a value, and 0
+ * when there is no more. It is designed to work with headers defined as
+ * comma-separated lists. If HTTP_FIND_FL_FULL flag is set, it works on
+ * full-line headers in whose comma is not a delimiter but is part of the
+ * syntax. A special case, if ctx->value is NULL when searching for a new values
+ * of a header, the current header is rescanned. This allows rescanning after a
+ * header deletion.
+ *
+ * The matching method is chosen by checking the flags :
+ *
+ * * HTTP_FIND_FL_MATCH_REG : <pattern> is a regex. header names matching
+ * the regex are evaluated.
+ * * HTTP_FIND_FL_MATCH_STR : <pattern> is a string. The header names equal
+ * to the string are evaluated.
+ * * HTTP_FIND_FL_MATCH_PFX : <pattern> is a string. The header names
+ * starting by the string are evaluated.
+ * * HTTP_FIND_FL_MATCH_SFX : <pattern> is a string. The header names
+ * ending by the string are evaluated.
+ * * HTTP_FIND_FL_MATCH_SUB : <pattern> is a string. The header names
+ * containing the string are evaluated.
+ */
+
+#define HTTP_FIND_FL_MATCH_STR 0x0001
+#define HTTP_FIND_FL_MATCH_PFX 0x0002
+#define HTTP_FIND_FL_MATCH_SFX 0x0003
+#define HTTP_FIND_FL_MATCH_SUB 0x0004
+#define HTTP_FIND_FL_MATCH_REG 0x0005
+/* 0x0006..0x000f: for other matching methods */
+#define HTTP_FIND_FL_MATCH_TYPE 0x000F
+#define HTTP_FIND_FL_FULL 0x0010
+
+static int __http_find_header(const struct htx *htx, const void *pattern, struct http_hdr_ctx *ctx, int flags)
+{
+ struct htx_blk *blk = ctx->blk;
+ struct ist n, v;
+ enum htx_blk_type type;
+
+ if (blk) {
+ char *p;
+
+ if (!isttest(ctx->value))
+ goto rescan_hdr;
+ if (flags & HTTP_FIND_FL_FULL)
+ goto next_blk;
+ v = htx_get_blk_value(htx, blk);
+ p = istend(ctx->value) + ctx->lws_after;
+ v.len -= (p - v.ptr);
+ v.ptr = p;
+ if (!v.len)
+ goto next_blk;
+ /* Skip comma */
+ if (*(v.ptr) == ',') {
+ v = istnext(v);
+ }
+
+ goto return_hdr;
+ }
+
+ if (htx_is_empty(htx))
+ return 0;
+
+ for (blk = htx_get_first_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ rescan_hdr:
+ type = htx_get_blk_type(blk);
+ if (type == HTX_BLK_EOH)
+ break;
+ if (type != HTX_BLK_HDR)
+ continue;
+
+ if ((flags & HTTP_FIND_FL_MATCH_TYPE) == HTTP_FIND_FL_MATCH_REG) {
+ const struct my_regex *re = pattern;
+
+ n = htx_get_blk_name(htx, blk);
+ if (!regex_exec2(re, n.ptr, n.len))
+ goto next_blk;
+ }
+ else {
+ const struct ist name = *(const struct ist *)(pattern);
+
+ /* If no name was passed, we want any header. So skip the comparison */
+ if (!istlen(name))
+ goto match;
+
+ n = htx_get_blk_name(htx, blk);
+ switch (flags & HTTP_FIND_FL_MATCH_TYPE) {
+ case HTTP_FIND_FL_MATCH_STR:
+ if (!isteqi(n, name))
+ goto next_blk;
+ break;
+ case HTTP_FIND_FL_MATCH_PFX:
+ if (istlen(n) < istlen(name))
+ goto next_blk;
+
+ n = ist2(istptr(n), istlen(name));
+ if (!isteqi(n, name))
+ goto next_blk;
+ break;
+ case HTTP_FIND_FL_MATCH_SFX:
+ if (istlen(n) < istlen(name))
+ goto next_blk;
+
+ n = ist2(istend(n) - istlen(name),
+ istlen(name));
+ if (!isteqi(n, name))
+ goto next_blk;
+ break;
+ case HTTP_FIND_FL_MATCH_SUB:
+ if (!strnistr(n.ptr, n.len, name.ptr, name.len))
+ goto next_blk;
+ break;
+ default:
+ goto next_blk;
+ break;
+ }
+ }
+ match:
+ v = htx_get_blk_value(htx, blk);
+
+ return_hdr:
+ ctx->lws_before = 0;
+ ctx->lws_after = 0;
+ while (v.len && HTTP_IS_LWS(*v.ptr)) {
+ v = istnext(v);
+ ctx->lws_before++;
+ }
+ if (!(flags & HTTP_FIND_FL_FULL))
+ v.len = http_find_hdr_value_end(v.ptr, istend(v)) - v.ptr;
+
+ while (v.len && HTTP_IS_LWS(*(istend(v) - 1))) {
+ v.len--;
+ ctx->lws_after++;
+ }
+ ctx->blk = blk;
+ ctx->value = v;
+ return 1;
+
+ next_blk:
+ ;
+ }
+
+ ctx->blk = NULL;
+ ctx->value = ist("");
+ ctx->lws_before = ctx->lws_after = 0;
+ return 0;
+}
+
+
+/* Header names must match <name> */
+int http_find_header(const struct htx *htx, const struct ist name, struct http_hdr_ctx *ctx, int full)
+{
+ return __http_find_header(htx, &name, ctx, HTTP_FIND_FL_MATCH_STR | (full ? HTTP_FIND_FL_FULL : 0));
+}
+
+/* Header names must match <name>. Same than http_find_header */
+int http_find_str_header(const struct htx *htx, const struct ist name, struct http_hdr_ctx *ctx, int full)
+{
+ return __http_find_header(htx, &name, ctx, HTTP_FIND_FL_MATCH_STR | (full ? HTTP_FIND_FL_FULL : 0));
+}
+
+
+/* Header names must start with <prefix> */
+int http_find_pfx_header(const struct htx *htx, const struct ist prefix, struct http_hdr_ctx *ctx, int full)
+{
+ return __http_find_header(htx, &prefix, ctx, HTTP_FIND_FL_MATCH_PFX | (full ? HTTP_FIND_FL_FULL : 0));
+}
+
+/* Header names must end with <suffix> */
+int http_find_sfx_header(const struct htx *htx, const struct ist suffix, struct http_hdr_ctx *ctx, int full)
+{
+ return __http_find_header(htx, &suffix, ctx, HTTP_FIND_FL_MATCH_SFX | (full ? HTTP_FIND_FL_FULL : 0));
+}
+/* Header names must contain <sub> */
+int http_find_sub_header(const struct htx *htx, const struct ist sub, struct http_hdr_ctx *ctx, int full)
+{
+ return __http_find_header(htx, &sub, ctx, HTTP_FIND_FL_MATCH_SUB | (full ? HTTP_FIND_FL_FULL : 0));
+}
+
+/* Header names must match <re> regex*/
+int http_match_header(const struct htx *htx, const struct my_regex *re, struct http_hdr_ctx *ctx, int full)
+{
+ return __http_find_header(htx, re, ctx, HTTP_FIND_FL_MATCH_REG | (full ? HTTP_FIND_FL_FULL : 0));
+}
+
+
+/* Adds a header block int the HTX message <htx>, just before the EOH block. It
+ * returns 1 on success, otherwise it returns 0.
+ */
+int http_add_header(struct htx *htx, const struct ist n, const struct ist v)
+{
+ struct htx_blk *blk;
+ struct htx_sl *sl;
+ enum htx_blk_type type = htx_get_tail_type(htx);
+ int32_t prev;
+
+ blk = htx_add_header(htx, n, v);
+ if (!blk)
+ goto fail;
+
+ if (unlikely(type < HTX_BLK_EOH))
+ goto end;
+
+ /* <blk> is the head, swap it iteratively with its predecessor to place
+ * it just before the end-of-header block. So blocks remains ordered. */
+ for (prev = htx_get_prev(htx, htx->tail); prev != htx->first; prev = htx_get_prev(htx, prev)) {
+ struct htx_blk *pblk = htx_get_blk(htx, prev);
+ enum htx_blk_type type = htx_get_blk_type(pblk);
+
+ /* Swap .addr and .info fields */
+ blk->addr ^= pblk->addr; pblk->addr ^= blk->addr; blk->addr ^= pblk->addr;
+ blk->info ^= pblk->info; pblk->info ^= blk->info; blk->info ^= pblk->info;
+
+ if (blk->addr == pblk->addr)
+ blk->addr += htx_get_blksz(pblk);
+
+ /* Stop when end-of-header is reached */
+ if (type == HTX_BLK_EOH)
+ break;
+
+ blk = pblk;
+ }
+
+ end:
+ sl = http_get_stline(htx);
+ if (sl && (sl->flags & HTX_SL_F_HAS_AUTHORITY) && isteqi(n, ist("host"))) {
+ if (!http_update_authority(htx, sl, v))
+ goto fail;
+ }
+ return 1;
+
+ fail:
+ return 0;
+}
+
+/* Replaces parts of the start-line of the HTX message <htx>. It returns 1 on
+ * success, otherwise it returns 0.
+ */
+int http_replace_stline(struct htx *htx, const struct ist p1, const struct ist p2, const struct ist p3)
+{
+ struct htx_blk *blk;
+
+ blk = htx_get_first_blk(htx);
+ if (!blk || !htx_replace_stline(htx, blk, p1, p2, p3))
+ return 0;
+ return 1;
+}
+
+/* Replace the request method in the HTX message <htx> by <meth>. It returns 1
+ * on success, otherwise 0.
+ */
+int http_replace_req_meth(struct htx *htx, const struct ist meth)
+{
+ struct buffer *temp = get_trash_chunk();
+ struct htx_sl *sl = http_get_stline(htx);
+ struct ist uri, vsn;
+
+ if (!sl)
+ return 0;
+
+ /* Start by copying old uri and version */
+ chunk_memcat(temp, HTX_SL_REQ_UPTR(sl), HTX_SL_REQ_ULEN(sl)); /* uri */
+ uri = ist2(temp->area, HTX_SL_REQ_ULEN(sl));
+
+ chunk_memcat(temp, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl)); /* vsn */
+ vsn = ist2(temp->area + uri.len, HTX_SL_REQ_VLEN(sl));
+
+ /* create the new start line */
+ sl->info.req.meth = find_http_meth(meth.ptr, meth.len);
+ return http_replace_stline(htx, meth, uri, vsn);
+}
+
+/* Replace the request uri in the HTX message <htx> by <uri>. It returns 1 on
+ * success, otherwise 0.
+ */
+int http_replace_req_uri(struct htx *htx, const struct ist uri)
+{
+ struct buffer *temp = get_trash_chunk();
+ struct htx_sl *sl = http_get_stline(htx);
+ struct ist meth, vsn;
+
+ if (!sl)
+ goto fail;
+
+ /* Start by copying old method and version */
+ chunk_memcat(temp, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl)); /* meth */
+ meth = ist2(temp->area, HTX_SL_REQ_MLEN(sl));
+
+ chunk_memcat(temp, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl)); /* vsn */
+ vsn = ist2(temp->area + meth.len, HTX_SL_REQ_VLEN(sl));
+
+ /* create the new start line */
+ if (!http_replace_stline(htx, meth, uri, vsn))
+ goto fail;
+
+ sl = http_get_stline(htx);
+ ALREADY_CHECKED(sl); /* the stline exists because http_replace_stline() succeded */
+ sl->flags &= ~HTX_SL_F_NORMALIZED_URI;
+
+ if (!http_update_host(htx, sl, uri))
+ goto fail;
+
+ return 1;
+ fail:
+ return 0;
+}
+
+/* Replace the request path in the HTX message <htx> by <path>. The host part is
+ * preserverd. if <with_qs> is set, the query string is evaluated as part of the
+ * path and replaced. Otherwise, it is preserved too. It returns 1 on success,
+ * otherwise 0.
+ */
+int http_replace_req_path(struct htx *htx, const struct ist path, int with_qs)
+{
+ struct buffer *temp = get_trash_chunk();
+ struct htx_sl *sl = http_get_stline(htx);
+ struct ist meth, uri, vsn, p;
+ size_t plen = 0;
+ struct http_uri_parser parser;
+
+ if (!sl)
+ return 0;
+
+ uri = htx_sl_req_uri(sl);
+ parser = http_uri_parser_init(uri);
+ p = http_parse_path(&parser);
+ if (!isttest(p))
+ p = uri;
+ if (with_qs)
+ plen = p.len;
+ else {
+ while (plen < p.len && *(p.ptr + plen) != '?')
+ plen++;
+ }
+
+ /* Start by copying old method and version and create the new uri */
+ chunk_memcat(temp, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl)); /* meth */
+ meth = ist2(temp->area, HTX_SL_REQ_MLEN(sl));
+
+ chunk_memcat(temp, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl)); /* vsn */
+ vsn = ist2(temp->area + meth.len, HTX_SL_REQ_VLEN(sl));
+
+ chunk_memcat(temp, uri.ptr, p.ptr - uri.ptr); /* uri: host part */
+ chunk_istcat(temp, path); /* uri: new path */
+ chunk_memcat(temp, p.ptr + plen, p.len - plen); /* uri: QS part */
+ uri = ist2(temp->area + meth.len + vsn.len, uri.len - plen + path.len);
+
+ /* create the new start line */
+ return http_replace_stline(htx, meth, uri, vsn);
+}
+
+/* Replace the request query-string in the HTX message <htx> by <query>. The
+ * host part and the path are preserved. It returns 1 on success, otherwise
+ * 0.
+ */
+int http_replace_req_query(struct htx *htx, const struct ist query)
+{
+ struct buffer *temp = get_trash_chunk();
+ struct htx_sl *sl = http_get_stline(htx);
+ struct ist meth, uri, vsn, q;
+ int offset = 1;
+
+ if (!sl)
+ return 0;
+
+ uri = htx_sl_req_uri(sl);
+ q = uri;
+ while (q.len > 0 && *(q.ptr) != '?') {
+ q = istnext(q);
+ }
+
+ /* skip the question mark or indicate that we must insert it
+ * (but only if the format string is not empty then).
+ */
+ if (q.len) {
+ q = istnext(q);
+ }
+ else if (query.len > 1)
+ offset = 0;
+
+ /* Start by copying old method and version and create the new uri */
+ chunk_memcat(temp, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl)); /* meth */
+ meth = ist2(temp->area, HTX_SL_REQ_MLEN(sl));
+
+ chunk_memcat(temp, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl)); /* vsn */
+ vsn = ist2(temp->area + meth.len, HTX_SL_REQ_VLEN(sl));
+
+ chunk_memcat(temp, uri.ptr, q.ptr - uri.ptr); /* uri: host + path part */
+ chunk_memcat(temp, query.ptr + offset, query.len - offset); /* uri: new QS */
+ uri = ist2(temp->area + meth.len + vsn.len, uri.len - q.len + query.len - offset);
+
+ /* create the new start line */
+ return http_replace_stline(htx, meth, uri, vsn);
+}
+
+/* Replace the response status in the HTX message <htx> by <status>. It returns
+ * 1 on success, otherwise 0.
+*/
+int http_replace_res_status(struct htx *htx, const struct ist status, const struct ist reason)
+{
+ struct buffer *temp = get_trash_chunk();
+ struct htx_sl *sl = http_get_stline(htx);
+ struct ist vsn, r;
+
+ if (!sl)
+ return 0;
+
+ /* Start by copying old uri and version */
+ chunk_memcat(temp, HTX_SL_RES_VPTR(sl), HTX_SL_RES_VLEN(sl)); /* vsn */
+ vsn = ist2(temp->area, HTX_SL_RES_VLEN(sl));
+ r = reason;
+ if (!isttest(r)) {
+ chunk_memcat(temp, HTX_SL_RES_RPTR(sl), HTX_SL_RES_RLEN(sl)); /* reason */
+ r = ist2(temp->area + vsn.len, HTX_SL_RES_RLEN(sl));
+ }
+
+ /* create the new start line */
+ sl->info.res.status = strl2ui(status.ptr, status.len);
+ return http_replace_stline(htx, vsn, status, r);
+}
+
+/* Replace the response reason in the HTX message <htx> by <reason>. It returns
+ * 1 on success, otherwise 0.
+*/
+int http_replace_res_reason(struct htx *htx, const struct ist reason)
+{
+ struct buffer *temp = get_trash_chunk();
+ struct htx_sl *sl = http_get_stline(htx);
+ struct ist vsn, status;
+
+ if (!sl)
+ return 0;
+
+ /* Start by copying old uri and version */
+ chunk_memcat(temp, HTX_SL_RES_VPTR(sl), HTX_SL_RES_VLEN(sl)); /* vsn */
+ vsn = ist2(temp->area, HTX_SL_RES_VLEN(sl));
+
+ chunk_memcat(temp, HTX_SL_RES_CPTR(sl), HTX_SL_RES_CLEN(sl)); /* code */
+ status = ist2(temp->area + vsn.len, HTX_SL_RES_CLEN(sl));
+
+ /* create the new start line */
+ return http_replace_stline(htx, vsn, status, reason);
+}
+
+/* Replaces a part of a header value referenced in the context <ctx> by
+ * <data>. It returns 1 on success, otherwise it returns 0. The context is
+ * updated if necessary.
+ */
+int http_replace_header_value(struct htx *htx, struct http_hdr_ctx *ctx, const struct ist data)
+{
+ struct htx_blk *blk = ctx->blk;
+ struct htx_sl *sl;
+ char *start;
+ struct ist v;
+ uint32_t len, off;
+
+ if (!blk)
+ goto fail;
+
+ v = htx_get_blk_value(htx, blk);
+ start = ctx->value.ptr - ctx->lws_before;
+ len = ctx->lws_before + ctx->value.len + ctx->lws_after;
+ off = start - v.ptr;
+
+ blk = htx_replace_blk_value(htx, blk, ist2(start, len), data);
+ if (!blk)
+ goto fail;
+
+ v = htx_get_blk_value(htx, blk);
+
+ sl = http_get_stline(htx);
+ if (sl && (sl->flags & HTX_SL_F_HAS_AUTHORITY)) {
+ struct ist n = htx_get_blk_name(htx, blk);
+
+ if (isteq(n, ist("host"))) {
+ if (!http_update_authority(htx, sl, v))
+ goto fail;
+ ctx->blk = NULL;
+ http_find_header(htx, ist("host"), ctx, 1);
+ blk = ctx->blk;
+ v = htx_get_blk_value(htx, blk);
+ }
+ }
+
+ ctx->blk = blk;
+ ctx->value = ist2(v.ptr + off, data.len);
+ ctx->lws_before = ctx->lws_after = 0;
+
+ return 1;
+ fail:
+ return 0;
+}
+
+/* Fully replaces a header referenced in the context <ctx> by the name <name>
+ * with the value <value>. It returns 1 on success, otherwise it returns 0. The
+ * context is updated if necessary.
+ */
+int http_replace_header(struct htx *htx, struct http_hdr_ctx *ctx,
+ const struct ist name, const struct ist value)
+{
+ struct htx_blk *blk = ctx->blk;
+ struct htx_sl *sl;
+
+ if (!blk)
+ goto fail;
+
+ blk = htx_replace_header(htx, blk, name, value);
+ if (!blk)
+ goto fail;
+
+ sl = http_get_stline(htx);
+ if (sl && (sl->flags & HTX_SL_F_HAS_AUTHORITY) && isteqi(name, ist("host"))) {
+ if (!http_update_authority(htx, sl, value))
+ goto fail;
+ ctx->blk = NULL;
+ http_find_header(htx, ist("host"), ctx, 1);
+ blk = ctx->blk;
+ }
+
+ ctx->blk = blk;
+ ctx->value = ist(NULL);
+ ctx->lws_before = ctx->lws_after = 0;
+
+ return 1;
+ fail:
+ return 0;
+}
+
+/* Remove one value of a header. This only works on a <ctx> returned by
+ * http_find_header function. The value is removed, as well as surrounding commas
+ * if any. If the removed value was alone, the whole header is removed. The
+ * <ctx> is always updated accordingly, as well as the HTX message <htx>. It
+ * returns 1 on success. Otherwise, it returns 0. The <ctx> is always left in a
+ * form that can be handled by http_find_header() to find next occurrence.
+ */
+int http_remove_header(struct htx *htx, struct http_hdr_ctx *ctx)
+{
+ struct htx_blk *blk = ctx->blk;
+ char *start;
+ struct ist v;
+ uint32_t len;
+
+ if (!blk)
+ return 0;
+
+ start = ctx->value.ptr - ctx->lws_before;
+ len = ctx->lws_before + ctx->value.len + ctx->lws_after;
+
+ v = htx_get_blk_value(htx, blk);
+ if (len == v.len) {
+ blk = htx_remove_blk(htx, blk);
+ if (blk || htx_is_empty(htx)) {
+ ctx->blk = blk;
+ ctx->value = IST_NULL;
+ ctx->lws_before = ctx->lws_after = 0;
+ }
+ else {
+ ctx->blk = htx_get_blk(htx, htx->tail);
+ ctx->value = htx_get_blk_value(htx, ctx->blk);
+ ctx->lws_before = ctx->lws_after = 0;
+ }
+ return 1;
+ }
+
+ /* This was not the only value of this header. We have to remove the
+ * part pointed by ctx->value. If it is the last entry of the list, we
+ * remove the last separator.
+ */
+ if (start == v.ptr) {
+ /* It's the first header part but not the only one. So remove
+ * the comma after it. */
+ len++;
+ }
+ else {
+ /* There is at least one header part before the removed one. So
+ * remove the comma between them. */
+ start--;
+ len++;
+ }
+ /* Update the block content and its len */
+ memmove(start, start+len, v.len-len);
+ htx_change_blk_value_len(htx, blk, v.len-len);
+
+ /* Finally update the ctx */
+ ctx->value = ist2(start, 0);
+ ctx->lws_before = ctx->lws_after = 0;
+
+ return 1;
+}
+
+/* Updates the authority part of the uri with the value <host>. It happens when
+ * the header host is modified. It returns 0 on failure and 1 on success. It is
+ * the caller responsibility to provide the start-line and to be sure the uri
+ * contains an authority. Thus, if no authority is found in the uri, an error is
+ * returned.
+ */
+int http_update_authority(struct htx *htx, struct htx_sl *sl, const struct ist host)
+{
+ struct buffer *temp = get_trash_chunk();
+ struct ist meth, vsn, uri, authority;
+ struct http_uri_parser parser;
+
+ uri = htx_sl_req_uri(sl);
+ parser = http_uri_parser_init(uri);
+ authority = http_parse_authority(&parser, 1);
+ if (!authority.len)
+ return 0;
+
+ /* Don't update the uri if there is no change */
+ if (isteq(host, authority))
+ return 1;
+
+ /* Start by copying old method and version */
+ chunk_memcat(temp, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl)); /* meth */
+ meth = ist2(temp->area, HTX_SL_REQ_MLEN(sl));
+
+ chunk_memcat(temp, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl)); /* vsn */
+ vsn = ist2(temp->area + meth.len, HTX_SL_REQ_VLEN(sl));
+
+ chunk_memcat(temp, uri.ptr, authority.ptr - uri.ptr);
+ chunk_istcat(temp, host);
+ chunk_memcat(temp, istend(authority), istend(uri) - istend(authority));
+ uri = ist2(temp->area + meth.len + vsn.len, host.len + uri.len - authority.len); /* uri */
+
+ return http_replace_stline(htx, meth, uri, vsn);
+
+}
+
+/* Update the header host by extracting the authority of the uri <uri>. flags of
+ * the start-line are also updated accordingly. For orgin-form and asterisk-form
+ * uri, the header host is not changed and the flag HTX_SL_F_HAS_AUTHORITY is
+ * removed from the flags of the start-line. Otherwise, this flag is set and the
+ * authority is used to set the value of the header host. This function returns
+ * 0 on failure and 1 on success.
+*/
+int http_update_host(struct htx *htx, struct htx_sl *sl, const struct ist uri)
+{
+ struct ist authority;
+ struct http_hdr_ctx ctx;
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+
+ if (parser.format == URI_PARSER_FORMAT_EMPTY ||
+ parser.format == URI_PARSER_FORMAT_ASTERISK ||
+ parser.format == URI_PARSER_FORMAT_ABSPATH) {
+ sl->flags &= ~HTX_SL_F_HAS_AUTHORITY;
+ }
+ else {
+ sl->flags |= HTX_SL_F_HAS_AUTHORITY;
+ if (sl->info.req.meth != HTTP_METH_CONNECT) {
+ // absolute-form (RFC7320 #5.3.2)
+ sl->flags |= HTX_SL_F_HAS_SCHM;
+ if (uri.len > 4 && (uri.ptr[0] | 0x20) == 'h')
+ sl->flags |= ((uri.ptr[4] == ':') ? HTX_SL_F_SCHM_HTTP : HTX_SL_F_SCHM_HTTPS);
+
+ authority = http_parse_authority(&parser, 1);
+ if (!authority.len)
+ goto fail;
+ }
+ else {
+ // authority-form (RFC7320 #5.3.3)
+ authority = uri;
+ }
+
+ /* Replace header host value */
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("host"), &ctx, 1)) {
+ if (!http_replace_header_value(htx, &ctx, authority))
+ goto fail;
+ }
+
+ }
+ return 1;
+ fail:
+ return 0;
+}
+
+/* Return in <vptr> and <vlen> the pointer and length of occurrence <occ> of
+ * header whose name is <hname> of length <hlen>. If <ctx> is null, lookup is
+ * performed over the whole headers. Otherwise it must contain a valid header
+ * context, initialised with ctx->blk=NULL for the first lookup in a series. If
+ * <occ> is positive or null, occurrence #occ from the beginning (or last ctx)
+ * is returned. Occ #0 and #1 are equivalent. If <occ> is negative (and no less
+ * than -MAX_HDR_HISTORY), the occurrence is counted from the last one which is
+ * -1. The value fetch stops at commas, so this function is suited for use with
+ * list headers.
+ * The return value is 0 if nothing was found, or non-zero otherwise.
+ */
+unsigned int http_get_htx_hdr(const struct htx *htx, const struct ist hdr,
+ int occ, struct http_hdr_ctx *ctx, char **vptr, size_t *vlen)
+{
+ struct http_hdr_ctx local_ctx;
+ struct ist val_hist[MAX_HDR_HISTORY];
+ unsigned int hist_idx;
+ int found;
+
+ if (!ctx) {
+ local_ctx.blk = NULL;
+ ctx = &local_ctx;
+ }
+
+ if (occ >= 0) {
+ /* search from the beginning */
+ while (http_find_header(htx, hdr, ctx, 0)) {
+ occ--;
+ if (occ <= 0) {
+ *vptr = ctx->value.ptr;
+ *vlen = ctx->value.len;
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ /* negative occurrence, we scan all the list then walk back */
+ if (-occ > MAX_HDR_HISTORY)
+ return 0;
+
+ found = hist_idx = 0;
+ while (http_find_header(htx, hdr, ctx, 0)) {
+ val_hist[hist_idx] = ctx->value;
+ if (++hist_idx >= MAX_HDR_HISTORY)
+ hist_idx = 0;
+ found++;
+ }
+ if (-occ > found)
+ return 0;
+
+ /* OK now we have the last occurrence in [hist_idx-1], and we need to
+ * find occurrence -occ. 0 <= hist_idx < MAX_HDR_HISTORY, and we have
+ * -10 <= occ <= -1. So we have to check [hist_idx%MAX_HDR_HISTORY+occ]
+ * to remain in the 0..9 range.
+ */
+ hist_idx += occ + MAX_HDR_HISTORY;
+ if (hist_idx >= MAX_HDR_HISTORY)
+ hist_idx -= MAX_HDR_HISTORY;
+ *vptr = val_hist[hist_idx].ptr;
+ *vlen = val_hist[hist_idx].len;
+ return 1;
+}
+
+/* Return in <vptr> and <vlen> the pointer and length of occurrence <occ> of
+ * header whose name is <hname> of length <hlen>. If <ctx> is null, lookup is
+ * performed over the whole headers. Otherwise it must contain a valid header
+ * context, initialised with ctx->blk=NULL for the first lookup in a series. If
+ * <occ> is positive or null, occurrence #occ from the beginning (or last ctx)
+ * is returned. Occ #0 and #1 are equivalent. If <occ> is negative (and no less
+ * than -MAX_HDR_HISTORY), the occurrence is counted from the last one which is
+ * -1. This function differs from http_get_hdr() in that it only returns full
+ * line header values and does not stop at commas.
+ * The return value is 0 if nothing was found, or non-zero otherwise.
+ */
+unsigned int http_get_htx_fhdr(const struct htx *htx, const struct ist hdr,
+ int occ, struct http_hdr_ctx *ctx, char **vptr, size_t *vlen)
+{
+ struct http_hdr_ctx local_ctx;
+ struct ist val_hist[MAX_HDR_HISTORY];
+ unsigned int hist_idx;
+ int found;
+
+ if (!ctx) {
+ local_ctx.blk = NULL;
+ ctx = &local_ctx;
+ }
+
+ if (occ >= 0) {
+ /* search from the beginning */
+ while (http_find_header(htx, hdr, ctx, 1)) {
+ occ--;
+ if (occ <= 0) {
+ *vptr = ctx->value.ptr;
+ *vlen = ctx->value.len;
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ /* negative occurrence, we scan all the list then walk back */
+ if (-occ > MAX_HDR_HISTORY)
+ return 0;
+
+ found = hist_idx = 0;
+ while (http_find_header(htx, hdr, ctx, 1)) {
+ val_hist[hist_idx] = ctx->value;
+ if (++hist_idx >= MAX_HDR_HISTORY)
+ hist_idx = 0;
+ found++;
+ }
+ if (-occ > found)
+ return 0;
+
+ /* OK now we have the last occurrence in [hist_idx-1], and we need to
+ * find occurrence -occ. 0 <= hist_idx < MAX_HDR_HISTORY, and we have
+ * -10 <= occ <= -1. So we have to check [hist_idx%MAX_HDR_HISTORY+occ]
+ * to remain in the 0..9 range.
+ */
+ hist_idx += occ + MAX_HDR_HISTORY;
+ if (hist_idx >= MAX_HDR_HISTORY)
+ hist_idx -= MAX_HDR_HISTORY;
+ *vptr = val_hist[hist_idx].ptr;
+ *vlen = val_hist[hist_idx].len;
+ return 1;
+}
+
+int http_str_to_htx(struct buffer *buf, struct ist raw, char **errmsg)
+{
+ struct htx *htx;
+ struct htx_sl *sl;
+ struct h1m h1m;
+ struct http_hdr hdrs[global.tune.max_http_hdr];
+ union h1_sl h1sl;
+ unsigned int flags = HTX_SL_F_IS_RESP;
+ int ret = 0;
+
+ b_reset(buf);
+ if (!raw.len) {
+ buf->size = 0;
+ buf->area = NULL;
+ return 1;
+ }
+
+ buf->size = global.tune.bufsize;
+ buf->area = malloc(buf->size);
+ if (!buf->area)
+ goto error;
+
+ h1m_init_res(&h1m);
+ h1m.flags |= H1_MF_NO_PHDR;
+ ret = h1_headers_to_hdr_list(raw.ptr, istend(raw),
+ hdrs, sizeof(hdrs)/sizeof(hdrs[0]), &h1m, &h1sl);
+ if (ret <= 0) {
+ memprintf(errmsg, "unable to parse headers (error offset: %d)", h1m.err_pos);
+ goto error;
+ }
+
+ if (unlikely(h1sl.st.v.len != 8)) {
+ memprintf(errmsg, "invalid http version (%.*s)", (int)h1sl.st.v.len, h1sl.st.v.ptr);
+ goto error;
+ }
+ if ((*(h1sl.st.v.ptr + 5) > '1') ||
+ ((*(h1sl.st.v.ptr + 5) == '1') && (*(h1sl.st.v.ptr + 7) >= '1')))
+ h1m.flags |= H1_MF_VER_11;
+
+ if (h1sl.st.status < 200 && (h1sl.st.status == 100 || h1sl.st.status >= 102)) {
+ memprintf(errmsg, "invalid http status code for an error message (%u)",
+ h1sl.st.status);
+ goto error;
+ }
+
+ if (h1sl.st.status == 204 || h1sl.st.status == 304) {
+ /* Responses known to have no body. */
+ h1m.flags &= ~(H1_MF_CLEN|H1_MF_CHNK);
+ h1m.flags |= H1_MF_XFER_LEN;
+ h1m.curr_len = h1m.body_len = 0;
+ }
+ else if (h1m.flags & (H1_MF_CLEN|H1_MF_CHNK))
+ h1m.flags |= H1_MF_XFER_LEN;
+
+ if (h1m.flags & H1_MF_VER_11)
+ flags |= HTX_SL_F_VER_11;
+ if (h1m.flags & H1_MF_XFER_ENC)
+ flags |= HTX_SL_F_XFER_ENC;
+ if (h1m.flags & H1_MF_XFER_LEN) {
+ flags |= HTX_SL_F_XFER_LEN;
+ if (h1m.flags & H1_MF_CHNK) {
+ memprintf(errmsg, "chunk-encoded payload not supported");
+ goto error;
+ }
+ else if (h1m.flags & H1_MF_CLEN) {
+ flags |= HTX_SL_F_CLEN;
+ if (h1m.body_len == 0)
+ flags |= HTX_SL_F_BODYLESS;
+ }
+ else
+ flags |= HTX_SL_F_BODYLESS;
+ }
+
+ if ((flags & HTX_SL_F_BODYLESS) && raw.len > ret) {
+ memprintf(errmsg, "message payload not expected");
+ goto error;
+ }
+ if ((flags & HTX_SL_F_CLEN) && h1m.body_len != (raw.len - ret)) {
+ memprintf(errmsg, "payload size does not match the announced content-length (%lu != %lu)",
+ (unsigned long)(raw.len - ret), (unsigned long)h1m.body_len);
+ goto error;
+ }
+
+ htx = htx_from_buf(buf);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, h1sl.st.v, h1sl.st.c, h1sl.st.r);
+ if (!sl || !htx_add_all_headers(htx, hdrs)) {
+ memprintf(errmsg, "unable to add headers into the HTX message");
+ goto error;
+ }
+ sl->info.res.status = h1sl.st.status;
+
+ while (raw.len > ret) {
+ int sent = htx_add_data(htx, ist2(raw.ptr + ret, raw.len - ret));
+ if (!sent) {
+ memprintf(errmsg, "unable to add payload into the HTX message");
+ goto error;
+ }
+ ret += sent;
+ }
+
+ htx->flags |= HTX_FL_EOM;
+
+ return 1;
+
+error:
+ if (buf->size)
+ free(buf->area);
+ return 0;
+}
+
+void release_http_reply(struct http_reply *http_reply)
+{
+ struct logformat_node *lf, *lfb;
+ struct http_reply_hdr *hdr, *hdrb;
+
+ if (!http_reply)
+ return;
+
+ ha_free(&http_reply->ctype);
+ list_for_each_entry_safe(hdr, hdrb, &http_reply->hdrs, list) {
+ LIST_DELETE(&hdr->list);
+ list_for_each_entry_safe(lf, lfb, &hdr->value, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ istfree(&hdr->name);
+ free(hdr);
+ }
+
+ if (http_reply->type == HTTP_REPLY_ERRFILES) {
+ ha_free(&http_reply->body.http_errors);
+ }
+ else if (http_reply->type == HTTP_REPLY_RAW)
+ chunk_destroy(&http_reply->body.obj);
+ else if (http_reply->type == HTTP_REPLY_LOGFMT) {
+ list_for_each_entry_safe(lf, lfb, &http_reply->body.fmt, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ }
+ free(http_reply);
+}
+
+static int http_htx_init(void)
+{
+ struct buffer chk;
+ struct ist raw;
+ char *errmsg = NULL;
+ int rc;
+ int err_code = 0;
+
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++) {
+ if (!http_err_msgs[rc]) {
+ ha_alert("Internal error: no default message defined for HTTP return code %d", rc);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ continue;
+ }
+
+ raw = ist(http_err_msgs[rc]);
+ if (!http_str_to_htx(&chk, raw, &errmsg)) {
+ ha_alert("Internal error: invalid default message for HTTP return code %d: %s.\n",
+ http_err_codes[rc], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ else if (errmsg) {
+ ha_warning("invalid default message for HTTP return code %d: %s.\n", http_err_codes[rc], errmsg);
+ err_code |= ERR_WARN;
+ }
+
+ /* Reset errmsg */
+ ha_free(&errmsg);
+
+ http_err_chunks[rc] = chk;
+ http_err_replies[rc].type = HTTP_REPLY_ERRMSG;
+ http_err_replies[rc].status = http_err_codes[rc];
+ http_err_replies[rc].ctype = NULL;
+ LIST_INIT(&http_err_replies[rc].hdrs);
+ http_err_replies[rc].body.errmsg = &http_err_chunks[rc];
+ }
+end:
+ return err_code;
+}
+
+static void http_htx_deinit(void)
+{
+ struct http_errors *http_errs, *http_errsb;
+ struct http_reply *http_rep, *http_repb;
+ struct ebpt_node *node, *next;
+ struct http_error_msg *http_errmsg;
+ int rc;
+
+ node = ebpt_first(&http_error_messages);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ http_errmsg = container_of(node, typeof(*http_errmsg), node);
+ chunk_destroy(&http_errmsg->msg);
+ free(node->key);
+ free(http_errmsg);
+ node = next;
+ }
+
+ list_for_each_entry_safe(http_errs, http_errsb, &http_errors_list, list) {
+ free(http_errs->conf.file);
+ free(http_errs->id);
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++)
+ release_http_reply(http_errs->replies[rc]);
+ LIST_DELETE(&http_errs->list);
+ free(http_errs);
+ }
+
+ list_for_each_entry_safe(http_rep, http_repb, &http_replies_list, list) {
+ LIST_DELETE(&http_rep->list);
+ release_http_reply(http_rep);
+ }
+
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++)
+ chunk_destroy(&http_err_chunks[rc]);
+}
+
+REGISTER_CONFIG_POSTPARSER("http_htx", http_htx_init);
+REGISTER_POST_DEINIT(http_htx_deinit);
+
+/* Reads content of the error file <file> and convert it into an HTX message. On
+ * success, the HTX message is returned. On error, NULL is returned and an error
+ * message is written into the <errmsg> buffer.
+ */
+struct buffer *http_load_errorfile(const char *file, char **errmsg)
+{
+ struct buffer *buf = NULL;
+ struct buffer chk;
+ struct ebpt_node *node;
+ struct http_error_msg *http_errmsg;
+ struct stat stat;
+ char *err = NULL;
+ int errnum, errlen;
+ int fd = -1;
+
+ /* already loaded */
+ node = ebis_lookup_len(&http_error_messages, file, strlen(file));
+ if (node) {
+ http_errmsg = container_of(node, typeof(*http_errmsg), node);
+ buf = &http_errmsg->msg;
+ goto out;
+ }
+
+ /* Read the error file content */
+ fd = open(file, O_RDONLY);
+ if ((fd < 0) || (fstat(fd, &stat) < 0)) {
+ memprintf(errmsg, "error opening file '%s'.", file);
+ goto out;
+ }
+
+ if (stat.st_size <= global.tune.bufsize)
+ errlen = stat.st_size;
+ else {
+ ha_warning("custom error message file '%s' larger than %d bytes. Truncating.\n",
+ file, global.tune.bufsize);
+ errlen = global.tune.bufsize;
+ }
+
+ err = malloc(errlen);
+ if (!err) {
+ memprintf(errmsg, "out of memory.");
+ goto out;
+ }
+
+ errnum = read(fd, err, errlen);
+ if (errnum != errlen) {
+ memprintf(errmsg, "error reading file '%s'.", file);
+ goto out;
+ }
+
+ /* Create the node corresponding to the error file */
+ http_errmsg = calloc(1, sizeof(*http_errmsg));
+ if (!http_errmsg) {
+ memprintf(errmsg, "out of memory.");
+ goto out;
+ }
+ http_errmsg->node.key = strdup(file);
+ if (!http_errmsg->node.key) {
+ memprintf(errmsg, "out of memory.");
+ free(http_errmsg);
+ goto out;
+ }
+
+ /* Convert the error file into an HTX message */
+ if (!http_str_to_htx(&chk, ist2(err, errlen), errmsg)) {
+ memprintf(errmsg, "'%s': %s", file, *errmsg);
+ free(http_errmsg->node.key);
+ free(http_errmsg);
+ goto out;
+ }
+
+ /* Insert the node in the tree and return the HTX message */
+ http_errmsg->msg = chk;
+ ebis_insert(&http_error_messages, &http_errmsg->node);
+ buf = &http_errmsg->msg;
+
+ out:
+ if (fd >= 0)
+ close(fd);
+ free(err);
+ return buf;
+}
+
+/* Convert the raw http message <msg> into an HTX message. On success, the HTX
+ * message is returned. On error, NULL is returned and an error message is
+ * written into the <errmsg> buffer.
+ */
+struct buffer *http_load_errormsg(const char *key, const struct ist msg, char **errmsg)
+{
+ struct buffer *buf = NULL;
+ struct buffer chk;
+ struct ebpt_node *node;
+ struct http_error_msg *http_errmsg;
+
+ /* already loaded */
+ node = ebis_lookup_len(&http_error_messages, key, strlen(key));
+ if (node) {
+ http_errmsg = container_of(node, typeof(*http_errmsg), node);
+ buf = &http_errmsg->msg;
+ goto out;
+ }
+ /* Create the node corresponding to the error file */
+ http_errmsg = calloc(1, sizeof(*http_errmsg));
+ if (!http_errmsg) {
+ memprintf(errmsg, "out of memory.");
+ goto out;
+ }
+ http_errmsg->node.key = strdup(key);
+ if (!http_errmsg->node.key) {
+ memprintf(errmsg, "out of memory.");
+ free(http_errmsg);
+ goto out;
+ }
+
+ /* Convert the error file into an HTX message */
+ if (!http_str_to_htx(&chk, msg, errmsg)) {
+ memprintf(errmsg, "invalid error message: %s", *errmsg);
+ free(http_errmsg->node.key);
+ free(http_errmsg);
+ goto out;
+ }
+
+ /* Insert the node in the tree and return the HTX message */
+ http_errmsg->msg = chk;
+ ebis_insert(&http_error_messages, &http_errmsg->node);
+ buf = &http_errmsg->msg;
+ out:
+ return buf;
+}
+
+/* This function parses the raw HTTP error file <file> for the status code
+ * <status>. It returns NULL if there is any error, otherwise it return the
+ * corresponding HTX message.
+ */
+struct buffer *http_parse_errorfile(int status, const char *file, char **errmsg)
+{
+ struct buffer *buf = NULL;
+ int rc;
+
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++) {
+ if (http_err_codes[rc] == status) {
+ buf = http_load_errorfile(file, errmsg);
+ break;
+ }
+ }
+
+ if (rc >= HTTP_ERR_SIZE)
+ memprintf(errmsg, "status code '%d' not handled.", status);
+ return buf;
+}
+
+/* This function creates HTX error message corresponding to a redirect message
+ * for the status code <status>. <url> is used as location url for the
+ * redirect. <errloc> is used to know if it is a 302 or a 303 redirect. It
+ * returns NULL if there is any error, otherwise it return the corresponding HTX
+ * message.
+ */
+struct buffer *http_parse_errorloc(int errloc, int status, const char *url, char **errmsg)
+{
+ static const char *HTTP_302 =
+ "HTTP/1.1 302 Found\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-length: 0\r\n"
+ "Location: "; /* not terminated since it will be concatenated with the URL */
+ static const char *HTTP_303 =
+ "HTTP/1.1 303 See Other\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-length: 0\r\n"
+ "Location: "; /* not terminated since it will be concatenated with the URL */
+
+ struct buffer *buf = NULL;
+ const char *msg;
+ char *key = NULL, *err = NULL;
+ int rc, errlen;
+
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++) {
+ if (http_err_codes[rc] == status) {
+ /* Create the error key */
+ if (!memprintf(&key, "errorloc%d %s", errloc, url)) {
+ memprintf(errmsg, "out of memory.");
+ goto out;
+ }
+ /* Create the error message */
+ msg = (errloc == 302 ? HTTP_302 : HTTP_303);
+ errlen = strlen(msg) + strlen(url) + 5;
+ err = malloc(errlen);
+ if (!err) {
+ memprintf(errmsg, "out of memory.");
+ goto out;
+ }
+ errlen = snprintf(err, errlen, "%s%s\r\n\r\n", msg, url);
+
+ /* Load it */
+ buf = http_load_errormsg(key, ist2(err, errlen), errmsg);
+ break;
+ }
+ }
+
+ if (rc >= HTTP_ERR_SIZE)
+ memprintf(errmsg, "status code '%d' not handled.", status);
+out:
+ free(key);
+ free(err);
+ return buf;
+}
+
+/* Check an "http reply" and, for replies referencing an http-errors section,
+ * try to find the right section and the right error message in this section. If
+ * found, the reply is updated. If the http-errors section exists but the error
+ * message is not found, no error message is set to fallback on the default
+ * ones. Otherwise (unknown section) an error is returned.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and errmsg is
+ * filled.
+ */
+int http_check_http_reply(struct http_reply *reply, struct proxy *px, char **errmsg)
+{
+ struct http_errors *http_errs;
+ int ret = 1;
+
+ if (reply->type != HTTP_REPLY_ERRFILES)
+ goto end;
+
+ list_for_each_entry(http_errs, &http_errors_list, list) {
+ if (strcmp(http_errs->id, reply->body.http_errors) == 0) {
+ reply->type = HTTP_REPLY_INDIRECT;
+ free(reply->body.http_errors);
+ reply->body.reply = http_errs->replies[http_get_status_idx(reply->status)];
+ if (!reply->body.reply)
+ ha_warning("Proxy '%s': status '%d' referenced by an http reply "
+ "not declared in http-errors section '%s'.\n",
+ px->id, reply->status, http_errs->id);
+ break;
+ }
+ }
+
+ if (&http_errs->list == &http_errors_list) {
+ memprintf(errmsg, "unknown http-errors section '%s' referenced by an http reply ",
+ reply->body.http_errors);
+ ret = 0;
+ }
+
+ end:
+ return ret;
+}
+
+/* Parse an "http reply". It returns the reply on success or NULL on error. This
+ * function creates one of the following http replies :
+ *
+ * - HTTP_REPLY_EMPTY : dummy response, no payload
+ * - HTTP_REPLY_ERRMSG : implicit error message depending on the status code or explicit one
+ * - HTTP_REPLY_ERRFILES : points on an http-errors section (resolved during post-parsing)
+ * - HTTP_REPLY_RAW : explicit file object ('file' argument)
+ * - HTTP_REPLY_LOGFMT : explicit log-format string ('content' argument)
+ *
+ * The content-type must be defined for non-empty payload. It is ignored for
+ * error messages (implicit or explicit). When an http-errors section is
+ * referenced (HTTP_REPLY_ERRFILES), the real error message should be resolved
+ * during the configuration validity check or dynamically. It is the caller
+ * responsibility to choose. If no status code is configured, <default_status>
+ * is set.
+ */
+struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struct proxy *px,
+ int default_status, char **errmsg)
+{
+ struct logformat_node *lf, *lfb;
+ struct http_reply *reply = NULL;
+ struct http_reply_hdr *hdr, *hdrb;
+ struct stat stat;
+ const char *act_arg = NULL;
+ char *obj = NULL;
+ int cur_arg, cap = 0, objlen = 0, fd = -1;
+
+
+ reply = calloc(1, sizeof(*reply));
+ if (!reply) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ LIST_INIT(&reply->hdrs);
+ reply->type = HTTP_REPLY_EMPTY;
+ reply->status = default_status;
+
+ if (px->conf.args.ctx == ARGC_HERR)
+ cap = (SMP_VAL_REQUEST | SMP_VAL_RESPONSE);
+ else {
+ if (px->cap & PR_CAP_FE)
+ cap |= ((px->conf.args.ctx == ARGC_HRQ) ? SMP_VAL_FE_HRQ_HDR : SMP_VAL_FE_HRS_HDR);
+ if (px->cap & PR_CAP_BE)
+ cap |= ((px->conf.args.ctx == ARGC_HRQ) ? SMP_VAL_BE_HRQ_HDR : SMP_VAL_BE_HRS_HDR);
+ }
+
+ cur_arg = *orig_arg;
+ while (*args[cur_arg]) {
+ if (strcmp(args[cur_arg], "status") == 0) {
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <status_code> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ reply->status = atol(args[cur_arg]);
+ if (reply->status < 200 || reply->status > 599) {
+ memprintf(errmsg, "Unexpected status code '%d'", reply->status);
+ goto error;
+ }
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "content-type") == 0) {
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <ctype> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ free(reply->ctype);
+ reply->ctype = strdup(args[cur_arg]);
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "errorfiles") == 0) {
+ if (reply->type != HTTP_REPLY_EMPTY) {
+ memprintf(errmsg, "unexpected '%s' argument, '%s' already defined", args[cur_arg], act_arg);
+ goto error;
+ }
+ act_arg = args[cur_arg];
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <name> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ reply->body.http_errors = strdup(args[cur_arg]);
+ if (!reply->body.http_errors) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ reply->type = HTTP_REPLY_ERRFILES;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "default-errorfiles") == 0) {
+ if (reply->type != HTTP_REPLY_EMPTY) {
+ memprintf(errmsg, "unexpected '%s' argument, '%s' already defined", args[cur_arg], act_arg);
+ goto error;
+ }
+ act_arg = args[cur_arg];
+ reply->type = HTTP_REPLY_ERRMSG;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "errorfile") == 0) {
+ if (reply->type != HTTP_REPLY_EMPTY) {
+ memprintf(errmsg, "unexpected '%s' argument, '%s' already defined", args[cur_arg], act_arg);
+ goto error;
+ }
+ act_arg = args[cur_arg];
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <fmt> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ reply->body.errmsg = http_load_errorfile(args[cur_arg], errmsg);
+ if (!reply->body.errmsg) {
+ goto error;
+ }
+ reply->type = HTTP_REPLY_ERRMSG;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "file") == 0) {
+ if (reply->type != HTTP_REPLY_EMPTY) {
+ memprintf(errmsg, "unexpected '%s' argument, '%s' already defined", args[cur_arg], act_arg);
+ goto error;
+ }
+ act_arg = args[cur_arg];
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <file> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ fd = open(args[cur_arg], O_RDONLY);
+ if ((fd < 0) || (fstat(fd, &stat) < 0)) {
+ memprintf(errmsg, "error opening file '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (stat.st_size > global.tune.bufsize) {
+ memprintf(errmsg, "file '%s' exceeds the buffer size (%lld > %d)",
+ args[cur_arg], (long long)stat.st_size, global.tune.bufsize);
+ goto error;
+ }
+ objlen = stat.st_size;
+ obj = malloc(objlen);
+ if (!obj || read(fd, obj, objlen) != objlen) {
+ memprintf(errmsg, "error reading file '%s'", args[cur_arg]);
+ goto error;
+ }
+ close(fd);
+ fd = -1;
+ reply->type = HTTP_REPLY_RAW;
+ chunk_initlen(&reply->body.obj, obj, global.tune.bufsize, objlen);
+ obj = NULL;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "string") == 0) {
+ if (reply->type != HTTP_REPLY_EMPTY) {
+ memprintf(errmsg, "unexpected '%s' argument, '%s' already defined", args[cur_arg], act_arg);
+ goto error;
+ }
+ act_arg = args[cur_arg];
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <str> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ obj = strdup(args[cur_arg]);
+ objlen = strlen(args[cur_arg]);
+ if (!obj) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ reply->type = HTTP_REPLY_RAW;
+ chunk_initlen(&reply->body.obj, obj, global.tune.bufsize, objlen);
+ obj = NULL;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "lf-file") == 0) {
+ if (reply->type != HTTP_REPLY_EMPTY) {
+ memprintf(errmsg, "unexpected '%s' argument, '%s' already defined", args[cur_arg], act_arg);
+ goto error;
+ }
+ act_arg = args[cur_arg];
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <file> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ fd = open(args[cur_arg], O_RDONLY);
+ if ((fd < 0) || (fstat(fd, &stat) < 0)) {
+ memprintf(errmsg, "error opening file '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (stat.st_size > global.tune.bufsize) {
+ memprintf(errmsg, "file '%s' exceeds the buffer size (%lld > %d)",
+ args[cur_arg], (long long)stat.st_size, global.tune.bufsize);
+ goto error;
+ }
+ objlen = stat.st_size;
+ obj = malloc(objlen + 1);
+ if (!obj || read(fd, obj, objlen) != objlen) {
+ memprintf(errmsg, "error reading file '%s'", args[cur_arg]);
+ goto error;
+ }
+ close(fd);
+ fd = -1;
+ obj[objlen] = '\0';
+ reply->type = HTTP_REPLY_LOGFMT;
+ LIST_INIT(&reply->body.fmt);
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "lf-string") == 0) {
+ if (reply->type != HTTP_REPLY_EMPTY) {
+ memprintf(errmsg, "unexpected '%s' argument, '%s' already defined", args[cur_arg], act_arg);
+ goto error;
+ }
+ act_arg = args[cur_arg];
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <fmt> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ obj = strdup(args[cur_arg]);
+ objlen = strlen(args[cur_arg]);
+ reply->type = HTTP_REPLY_LOGFMT;
+ LIST_INIT(&reply->body.fmt);
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "hdr") == 0) {
+ cur_arg++;
+ if (!*args[cur_arg] || !*args[cur_arg+1]) {
+ memprintf(errmsg, "'%s' expects <name> and <value> as arguments", args[cur_arg-1]);
+ goto error;
+ }
+ if (strcasecmp(args[cur_arg], "content-length") == 0 ||
+ strcasecmp(args[cur_arg], "transfer-encoding") == 0 ||
+ strcasecmp(args[cur_arg], "content-type") == 0) {
+ ha_warning("parsing [%s:%d] : header '%s' always ignored by the http reply.\n",
+ px->conf.args.file, px->conf.args.line, args[cur_arg]);
+ cur_arg += 2;
+ continue;
+ }
+ hdr = calloc(1, sizeof(*hdr));
+ if (!hdr) {
+ memprintf(errmsg, "'%s' : out of memory", args[cur_arg-1]);
+ goto error;
+ }
+ LIST_APPEND(&reply->hdrs, &hdr->list);
+ LIST_INIT(&hdr->value);
+ hdr->name = ist(strdup(args[cur_arg]));
+ if (!isttest(hdr->name)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ if (!parse_logformat_string(args[cur_arg+1], px, &hdr->value, LOG_OPT_HTTP, cap, errmsg))
+ goto error;
+
+ free(px->conf.lfs_file);
+ px->conf.lfs_file = strdup(px->conf.args.file);
+ px->conf.lfs_line = px->conf.args.line;
+ cur_arg += 2;
+ }
+ else
+ break;
+ }
+
+ if (reply->type == HTTP_REPLY_EMPTY) { /* no payload */
+ if (reply->ctype) {
+ ha_warning("parsing [%s:%d] : content-type '%s' ignored by the http reply because"
+ " neither errorfile nor payload defined.\n",
+ px->conf.args.file, px->conf.args.line, reply->ctype);
+ ha_free(&reply->ctype);
+ }
+ }
+ else if (reply->type == HTTP_REPLY_ERRFILES || reply->type == HTTP_REPLY_ERRMSG) { /* errorfiles or errorfile */
+
+ if (reply->type != HTTP_REPLY_ERRMSG || !reply->body.errmsg) {
+ /* default errorfile or errorfiles: check the status */
+ int rc;
+
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++) {
+ if (http_err_codes[rc] == reply->status)
+ break;
+ }
+
+ if (rc >= HTTP_ERR_SIZE) {
+ memprintf(errmsg, "status code '%d' not handled by default with '%s' argument.",
+ reply->status, act_arg);
+ goto error;
+ }
+ }
+
+ if (reply->ctype) {
+ ha_warning("parsing [%s:%d] : content-type '%s' ignored by the http reply when used "
+ "with an erorrfile.\n",
+ px->conf.args.file, px->conf.args.line, reply->ctype);
+ ha_free(&reply->ctype);
+ }
+ if (!LIST_ISEMPTY(&reply->hdrs)) {
+ ha_warning("parsing [%s:%d] : hdr parameters ignored by the http reply when used "
+ "with an erorrfile.\n",
+ px->conf.args.file, px->conf.args.line);
+ list_for_each_entry_safe(hdr, hdrb, &reply->hdrs, list) {
+ LIST_DELETE(&hdr->list);
+ list_for_each_entry_safe(lf, lfb, &hdr->value, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ istfree(&hdr->name);
+ free(hdr);
+ }
+ }
+ }
+ else if (reply->type == HTTP_REPLY_RAW) { /* explicit parameter using 'file' parameter*/
+ if ((reply->status == 204 || reply->status == 304) && objlen) {
+ memprintf(errmsg, "No body expected for %d responses", reply->status);
+ goto error;
+ }
+ if (!reply->ctype && objlen) {
+ memprintf(errmsg, "a content type must be defined when non-empty payload is configured");
+ goto error;
+ }
+ if (reply->ctype && !b_data(&reply->body.obj)) {
+ ha_warning("parsing [%s:%d] : content-type '%s' ignored by the http reply when used "
+ "with an empty payload.\n",
+ px->conf.args.file, px->conf.args.line, reply->ctype);
+ ha_free(&reply->ctype);
+ }
+ if (b_room(&reply->body.obj) < global.tune.maxrewrite) {
+ ha_warning("parsing [%s:%d] : http reply payload runs over the buffer space reserved to headers rewriting."
+ " It may lead to internal errors if strict rewriting mode is enabled.\n",
+ px->conf.args.file, px->conf.args.line);
+ }
+ }
+ else if (reply->type == HTTP_REPLY_LOGFMT) { /* log-format payload using 'lf-file' of 'lf-string' parameter */
+ LIST_INIT(&reply->body.fmt);
+ if ((reply->status == 204 || reply->status == 304)) {
+ memprintf(errmsg, "No body expected for %d responses", reply->status);
+ goto error;
+ }
+ if (!reply->ctype) {
+ memprintf(errmsg, "a content type must be defined with a log-format payload");
+ goto error;
+ }
+ if (!parse_logformat_string(obj, px, &reply->body.fmt, LOG_OPT_HTTP, cap, errmsg))
+ goto error;
+
+ free(px->conf.lfs_file);
+ px->conf.lfs_file = strdup(px->conf.args.file);
+ px->conf.lfs_line = px->conf.args.line;
+ }
+
+ free(obj);
+ *orig_arg = cur_arg;
+ return reply;
+
+ error:
+ free(obj);
+ if (fd >= 0)
+ close(fd);
+ release_http_reply(reply);
+ return NULL;
+}
+
+/* Apply schemed-based normalization as described on rfc3986 on section 6.3.2.
+ * Returns 0 if no error has been found else non-zero.
+ *
+ * The normalization is processed on the target-uri at the condition that it is
+ * in absolute-form. In the case where the target-uri was normalized, every
+ * host headers values found are also replaced by the normalized hostname. This
+ * assumes that the target-uri and host headers were properly identify as
+ * similar before calling this function.
+ */
+int http_scheme_based_normalize(struct htx *htx)
+{
+ struct http_hdr_ctx ctx;
+ struct htx_sl *sl;
+ struct ist uri, scheme, authority, host, port;
+ struct http_uri_parser parser;
+
+ sl = http_get_stline(htx);
+
+ if (!sl || !(sl->flags & (HTX_SL_F_HAS_SCHM|HTX_SL_F_HAS_AUTHORITY)))
+ return 0;
+
+ uri = htx_sl_req_uri(sl);
+
+ parser = http_uri_parser_init(uri);
+ scheme = http_parse_scheme(&parser);
+ /* if no scheme found, no normalization to proceed */
+ if (!isttest(scheme))
+ return 0;
+
+ /* Extract the port if present in authority */
+ authority = http_parse_authority(&parser, 1);
+ port = http_get_host_port(authority);
+ if (!isttest(port)) {
+ /* if no port found, no normalization to proceed */
+ return 0;
+ }
+ host = isttrim(authority, istlen(authority) - istlen(port) - 1);
+
+ if (http_is_default_port(scheme, port)) {
+ /* reconstruct the uri with removal of the port */
+ struct buffer *temp = get_trash_chunk();
+ struct ist meth, vsn;
+
+ /* meth */
+ chunk_memcat(temp, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl));
+ meth = ist2(temp->area, HTX_SL_REQ_MLEN(sl));
+
+ /* vsn */
+ chunk_memcat(temp, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl));
+ vsn = ist2(temp->area + meth.len, HTX_SL_REQ_VLEN(sl));
+
+ /* reconstruct uri without port */
+ chunk_memcat(temp, uri.ptr, authority.ptr - uri.ptr);
+ chunk_istcat(temp, host);
+ chunk_memcat(temp, istend(authority), istend(uri) - istend(authority));
+ uri = ist2(temp->area + meth.len + vsn.len, host.len + uri.len - authority.len); /* uri */
+
+ http_replace_stline(htx, meth, uri, vsn);
+
+ /* replace every host headers values by the normalized host */
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("host"), &ctx, 0)) {
+ if (!http_replace_header_value(htx, &ctx, host))
+ goto fail;
+ }
+ }
+
+ return 0;
+
+ fail:
+ return 1;
+}
+
+/* First step function to merge multiple cookie headers in a single entry.
+ *
+ * Use it for each cookie header at <idx> index over HTTP headers in <list>.
+ * <first> and <last> are state variables used internally and must be
+ * initialized to -1 before the first invocation.
+ */
+void http_cookie_register(struct http_hdr *list, int idx, int *first, int *last)
+{
+ /* Build a linked list of cookie headers. Use header length to point to
+ * the next one. The last entry will contains -1.
+ */
+
+ /* Mark the current end of cookie linked list. */
+ list[idx].n.len = -1;
+ if (*first < 0) {
+ /* Save first found cookie for http_cookie_merge call. */
+ *first = idx;
+ }
+ else {
+ /* Update linked list of cookies. */
+ list[*last].n.len = idx;
+ }
+
+ *last = idx;
+}
+
+/* Second step to merge multiple cookie headers in a single entry.
+ *
+ * Use it when looping over HTTP headers is done and <htx> message is built.
+ * This will concatenate each cookie headers present from <list> directly into
+ * <htx> message. <first> is reused from previous http_cookie_register
+ * invocation.
+ *
+ * Returns 0 on success else non-zero.
+ */
+int http_cookie_merge(struct htx *htx, struct http_hdr *list, int first)
+{
+ uint32_t fs; /* free space */
+ uint32_t bs; /* block size */
+ uint32_t vl; /* value len */
+ uint32_t tl; /* total length */
+ struct htx_blk *blk;
+
+ if (first < 0)
+ return 0;
+
+ blk = htx_add_header(htx, ist("cookie"), list[first].v);
+ if (!blk)
+ return 1;
+
+ tl = list[first].v.len;
+ fs = htx_free_data_space(htx);
+ bs = htx_get_blksz(blk);
+
+ /* for each extra cookie, we'll extend the cookie's value and insert
+ * ";" before the new value.
+ */
+ fs += tl; /* first one is already counted */
+
+ /* Loop over cookies linked list built from http_cookie_register. */
+ while ((first = list[first].n.len) >= 0) {
+ vl = list[first].v.len;
+ tl += vl + 2;
+ if (tl > fs)
+ return 1;
+
+ htx_change_blk_value_len(htx, blk, tl);
+ *(char *)(htx_get_blk_ptr(htx, blk) + bs + 0) = ';';
+ *(char *)(htx_get_blk_ptr(htx, blk) + bs + 1) = ' ';
+ memcpy(htx_get_blk_ptr(htx, blk) + bs + 2,
+ list[first].v.ptr, vl);
+ bs += vl + 2;
+ }
+
+ return 0;
+}
+
+/* Parses the "errorloc[302|303]" proxy keyword */
+static int proxy_parse_errorloc(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **errmsg)
+{
+ struct conf_errors *conf_err;
+ struct http_reply *reply;
+ struct buffer *msg;
+ int errloc, status;
+ int ret = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_FE | PR_CAP_BE, file, line, args[0], NULL)) {
+ ret = 1;
+ goto out;
+ }
+
+ if (*(args[1]) == 0 || *(args[2]) == 0) {
+ memprintf(errmsg, "%s : expects <status_code> and <url> as arguments.\n", args[0]);
+ ret = -1;
+ goto out;
+ }
+
+ status = atol(args[1]);
+ errloc = (strcmp(args[0], "errorloc303") == 0 ? 303 : 302);
+ msg = http_parse_errorloc(errloc, status, args[2], errmsg);
+ if (!msg) {
+ memprintf(errmsg, "%s : %s", args[0], *errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ reply = calloc(1, sizeof(*reply));
+ if (!reply) {
+ memprintf(errmsg, "%s : out of memory.", args[0]);
+ ret = -1;
+ goto out;
+ }
+ reply->type = HTTP_REPLY_ERRMSG;
+ reply->status = status;
+ reply->ctype = NULL;
+ LIST_INIT(&reply->hdrs);
+ reply->body.errmsg = msg;
+ LIST_APPEND(&http_replies_list, &reply->list);
+
+ conf_err = calloc(1, sizeof(*conf_err));
+ if (!conf_err) {
+ memprintf(errmsg, "%s : out of memory.", args[0]);
+ free(reply);
+ ret = -1;
+ goto out;
+ }
+ conf_err->type = 1;
+ conf_err->info.errorfile.status = status;
+ conf_err->info.errorfile.reply = reply;
+
+ conf_err->file = strdup(file);
+ conf_err->line = line;
+ LIST_APPEND(&curpx->conf.errors, &conf_err->list);
+
+ /* handle warning message */
+ if (*errmsg)
+ ret = 1;
+ out:
+ return ret;
+
+}
+
+/* Parses the "errorfile" proxy keyword */
+static int proxy_parse_errorfile(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **errmsg)
+{
+ struct conf_errors *conf_err;
+ struct http_reply *reply;
+ struct buffer *msg;
+ int status;
+ int ret = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_FE | PR_CAP_BE, file, line, args[0], NULL)) {
+ ret = 1;
+ goto out;
+ }
+
+ if (*(args[1]) == 0 || *(args[2]) == 0) {
+ memprintf(errmsg, "%s : expects <status_code> and <file> as arguments.\n", args[0]);
+ ret = -1;
+ goto out;
+ }
+
+ status = atol(args[1]);
+ msg = http_parse_errorfile(status, args[2], errmsg);
+ if (!msg) {
+ memprintf(errmsg, "%s : %s", args[0], *errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ reply = calloc(1, sizeof(*reply));
+ if (!reply) {
+ memprintf(errmsg, "%s : out of memory.", args[0]);
+ ret = -1;
+ goto out;
+ }
+ reply->type = HTTP_REPLY_ERRMSG;
+ reply->status = status;
+ reply->ctype = NULL;
+ LIST_INIT(&reply->hdrs);
+ reply->body.errmsg = msg;
+ LIST_APPEND(&http_replies_list, &reply->list);
+
+ conf_err = calloc(1, sizeof(*conf_err));
+ if (!conf_err) {
+ memprintf(errmsg, "%s : out of memory.", args[0]);
+ free(reply);
+ ret = -1;
+ goto out;
+ }
+ conf_err->type = 1;
+ conf_err->info.errorfile.status = status;
+ conf_err->info.errorfile.reply = reply;
+ conf_err->file = strdup(file);
+ conf_err->line = line;
+ LIST_APPEND(&curpx->conf.errors, &conf_err->list);
+
+ /* handle warning message */
+ if (*errmsg)
+ ret = 1;
+ out:
+ return ret;
+
+}
+
+/* Parses the "errorfiles" proxy keyword */
+static int proxy_parse_errorfiles(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ struct conf_errors *conf_err = NULL;
+ char *name = NULL;
+ int rc, ret = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_FE | PR_CAP_BE, file, line, args[0], NULL)) {
+ ret = 1;
+ goto out;
+ }
+
+ if (!*(args[1])) {
+ memprintf(err, "%s : expects <name> as argument.", args[0]);
+ ret = -1;
+ goto out;
+ }
+
+ name = strdup(args[1]);
+ conf_err = calloc(1, sizeof(*conf_err));
+ if (!name || !conf_err) {
+ memprintf(err, "%s : out of memory.", args[0]);
+ goto error;
+ }
+ conf_err->type = 0;
+
+ conf_err->info.errorfiles.name = name;
+ if (!*(args[2])) {
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++)
+ conf_err->info.errorfiles.status[rc] = 1;
+ }
+ else {
+ int cur_arg, status;
+ for (cur_arg = 2; *(args[cur_arg]); cur_arg++) {
+ status = atol(args[cur_arg]);
+
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++) {
+ if (http_err_codes[rc] == status) {
+ conf_err->info.errorfiles.status[rc] = 2;
+ break;
+ }
+ }
+ if (rc >= HTTP_ERR_SIZE) {
+ memprintf(err, "%s : status code '%d' not handled.", args[0], status);
+ goto error;
+ }
+ }
+ }
+ conf_err->file = strdup(file);
+ conf_err->line = line;
+ LIST_APPEND(&curpx->conf.errors, &conf_err->list);
+ out:
+ return ret;
+
+ error:
+ free(name);
+ free(conf_err);
+ ret = -1;
+ goto out;
+}
+
+/* Parses the "http-error" proxy keyword */
+static int proxy_parse_http_error(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **errmsg)
+{
+ struct conf_errors *conf_err;
+ struct http_reply *reply = NULL;
+ int rc, cur_arg, ret = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_FE | PR_CAP_BE, file, line, args[0], NULL)) {
+ ret = 1;
+ goto out;
+ }
+
+ cur_arg = 1;
+ curpx->conf.args.ctx = ARGC_HERR;
+ reply = http_parse_http_reply((const char **)args, &cur_arg, curpx, 0, errmsg);
+ if (!reply) {
+ memprintf(errmsg, "%s : %s", args[0], *errmsg);
+ goto error;
+ }
+ else if (!reply->status) {
+ memprintf(errmsg, "%s : expects at least a <status> as arguments.\n", args[0]);
+ goto error;
+ }
+
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++) {
+ if (http_err_codes[rc] == reply->status)
+ break;
+ }
+
+ if (rc >= HTTP_ERR_SIZE) {
+ memprintf(errmsg, "%s: status code '%d' not handled.", args[0], reply->status);
+ goto error;
+ }
+ if (*args[cur_arg]) {
+ memprintf(errmsg, "%s : unknown keyword '%s'.", args[0], args[cur_arg]);
+ goto error;
+ }
+
+ conf_err = calloc(1, sizeof(*conf_err));
+ if (!conf_err) {
+ memprintf(errmsg, "%s : out of memory.", args[0]);
+ goto error;
+ }
+ if (reply->type == HTTP_REPLY_ERRFILES) {
+ int rc = http_get_status_idx(reply->status);
+
+ conf_err->type = 2;
+ conf_err->info.errorfiles.name = reply->body.http_errors;
+ conf_err->info.errorfiles.status[rc] = 2;
+ reply->body.http_errors = NULL;
+ release_http_reply(reply);
+ }
+ else {
+ conf_err->type = 1;
+ conf_err->info.errorfile.status = reply->status;
+ conf_err->info.errorfile.reply = reply;
+ LIST_APPEND(&http_replies_list, &reply->list);
+ }
+ conf_err->file = strdup(file);
+ conf_err->line = line;
+ LIST_APPEND(&curpx->conf.errors, &conf_err->list);
+
+ /* handle warning message */
+ if (*errmsg)
+ ret = 1;
+ out:
+ return ret;
+
+ error:
+ release_http_reply(reply);
+ ret = -1;
+ goto out;
+
+}
+
+/* Check "errorfiles" proxy keyword */
+static int proxy_check_errors(struct proxy *px)
+{
+ struct conf_errors *conf_err, *conf_err_back;
+ struct http_errors *http_errs;
+ int rc, err = ERR_NONE;
+
+ list_for_each_entry_safe(conf_err, conf_err_back, &px->conf.errors, list) {
+ if (conf_err->type == 1) {
+ /* errorfile */
+ rc = http_get_status_idx(conf_err->info.errorfile.status);
+ px->replies[rc] = conf_err->info.errorfile.reply;
+
+ /* For proxy, to rely on default replies, just don't reference a reply */
+ if (px->replies[rc]->type == HTTP_REPLY_ERRMSG && !px->replies[rc]->body.errmsg)
+ px->replies[rc] = NULL;
+ }
+ else {
+ /* errorfiles */
+ list_for_each_entry(http_errs, &http_errors_list, list) {
+ if (strcmp(http_errs->id, conf_err->info.errorfiles.name) == 0)
+ break;
+ }
+
+ /* unknown http-errors section */
+ if (&http_errs->list == &http_errors_list) {
+ ha_alert("proxy '%s': unknown http-errors section '%s' (at %s:%d).\n",
+ px->id, conf_err->info.errorfiles.name, conf_err->file, conf_err->line);
+ err |= ERR_ALERT | ERR_FATAL;
+ free(conf_err->info.errorfiles.name);
+ goto next;
+ }
+
+ free(conf_err->info.errorfiles.name);
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++) {
+ if (conf_err->info.errorfiles.status[rc] > 0) {
+ if (http_errs->replies[rc])
+ px->replies[rc] = http_errs->replies[rc];
+ else if (conf_err->info.errorfiles.status[rc] == 2)
+ ha_warning("config: proxy '%s' : status '%d' not declared in"
+ " http-errors section '%s' (at %s:%d).\n",
+ px->id, http_err_codes[rc], http_errs->id,
+ conf_err->file, conf_err->line);
+ }
+ }
+ }
+ next:
+ LIST_DELETE(&conf_err->list);
+ free(conf_err->file);
+ free(conf_err);
+ }
+
+ out:
+ return err;
+}
+
+static int post_check_errors()
+{
+ struct ebpt_node *node;
+ struct http_error_msg *http_errmsg;
+ struct htx *htx;
+ int err_code = ERR_NONE;
+
+ node = ebpt_first(&http_error_messages);
+ while (node) {
+ http_errmsg = container_of(node, typeof(*http_errmsg), node);
+ if (b_is_null(&http_errmsg->msg))
+ goto next;
+ htx = htxbuf(&http_errmsg->msg);
+ if (htx_free_data_space(htx) < global.tune.maxrewrite) {
+ ha_warning("config: errorfile '%s' runs over the buffer space"
+ " reserved to headers rewriting. It may lead to internal errors if "
+ " http-after-response rules are evaluated on this message.\n",
+ (char *)node->key);
+ err_code |= ERR_WARN;
+ }
+ next:
+ node = ebpt_next(node);
+ }
+
+ return err_code;
+}
+
+int proxy_dup_default_conf_errors(struct proxy *curpx, const struct proxy *defpx, char **errmsg)
+{
+ struct conf_errors *conf_err, *new_conf_err = NULL;
+ int ret = 0;
+
+ list_for_each_entry(conf_err, &defpx->conf.errors, list) {
+ new_conf_err = calloc(1, sizeof(*new_conf_err));
+ if (!new_conf_err) {
+ memprintf(errmsg, "unable to duplicate default errors (out of memory).");
+ goto out;
+ }
+ new_conf_err->type = conf_err->type;
+ if (conf_err->type == 1) {
+ new_conf_err->info.errorfile.status = conf_err->info.errorfile.status;
+ new_conf_err->info.errorfile.reply = conf_err->info.errorfile.reply;
+ }
+ else {
+ new_conf_err->info.errorfiles.name = strdup(conf_err->info.errorfiles.name);
+ if (!new_conf_err->info.errorfiles.name) {
+ memprintf(errmsg, "unable to duplicate default errors (out of memory).");
+ goto out;
+ }
+ memcpy(&new_conf_err->info.errorfiles.status, &conf_err->info.errorfiles.status,
+ sizeof(conf_err->info.errorfiles.status));
+ }
+ new_conf_err->file = strdup(conf_err->file);
+ new_conf_err->line = conf_err->line;
+ LIST_APPEND(&curpx->conf.errors, &new_conf_err->list);
+ new_conf_err = NULL;
+ }
+ ret = 1;
+
+ out:
+ free(new_conf_err);
+ return ret;
+}
+
+void proxy_release_conf_errors(struct proxy *px)
+{
+ struct conf_errors *conf_err, *conf_err_back;
+
+ list_for_each_entry_safe(conf_err, conf_err_back, &px->conf.errors, list) {
+ if (conf_err->type == 0)
+ free(conf_err->info.errorfiles.name);
+ LIST_DELETE(&conf_err->list);
+ free(conf_err->file);
+ free(conf_err);
+ }
+}
+
+/*
+ * Parse an <http-errors> section.
+ * Returns the error code, 0 if OK, or any combination of :
+ * - ERR_ABORT: must abort ASAP
+ * - ERR_FATAL: we can continue parsing but not start the service
+ * - ERR_WARN: a warning has been emitted
+ * - ERR_ALERT: an alert has been emitted
+ * Only the two first ones can stop processing, the two others are just
+ * indicators.
+ */
+static int cfg_parse_http_errors(const char *file, int linenum, char **args, int kwm)
+{
+ static struct http_errors *curr_errs = NULL;
+ int err_code = 0;
+ const char *err;
+ char *errmsg = NULL;
+
+ if (strcmp(args[0], "http-errors") == 0) { /* new errors section */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for http-errors section.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ list_for_each_entry(curr_errs, &http_errors_list, list) {
+ /* Error if two errors section owns the same name */
+ if (strcmp(curr_errs->id, args[1]) == 0) {
+ ha_alert("parsing [%s:%d]: http-errors section '%s' already exists (declared at %s:%d).\n",
+ file, linenum, args[1], curr_errs->conf.file, curr_errs->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ if ((curr_errs = calloc(1, sizeof(*curr_errs))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ LIST_APPEND(&http_errors_list, &curr_errs->list);
+ curr_errs->id = strdup(args[1]);
+ curr_errs->conf.file = strdup(file);
+ curr_errs->conf.line = linenum;
+ }
+ else if (strcmp(args[0], "errorfile") == 0) { /* error message from a file */
+ struct http_reply *reply;
+ struct buffer *msg;
+ int status, rc;
+
+ if (*(args[1]) == 0 || *(args[2]) == 0) {
+ ha_alert("parsing [%s:%d] : %s: expects <status_code> and <file> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ status = atol(args[1]);
+ msg = http_parse_errorfile(status, args[2], &errmsg);
+ if (!msg) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (errmsg) {
+ ha_warning("parsing [%s:%d] : %s: %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_WARN;
+ }
+
+ reply = calloc(1, sizeof(*reply));
+ if (!reply) {
+ ha_alert("parsing [%s:%d] : %s : out of memory.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ reply->type = HTTP_REPLY_ERRMSG;
+ reply->status = status;
+ reply->ctype = NULL;
+ LIST_INIT(&reply->hdrs);
+ reply->body.errmsg = msg;
+
+ rc = http_get_status_idx(status);
+ curr_errs->replies[rc] = reply;
+ }
+ else if (*args[0] != 0) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], cursection);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+out:
+ free(errmsg);
+ return err_code;
+}
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_LISTEN, "errorloc", proxy_parse_errorloc },
+ { CFG_LISTEN, "errorloc302", proxy_parse_errorloc },
+ { CFG_LISTEN, "errorloc303", proxy_parse_errorloc },
+ { CFG_LISTEN, "errorfile", proxy_parse_errorfile },
+ { CFG_LISTEN, "errorfiles", proxy_parse_errorfiles },
+ { CFG_LISTEN, "http-error", proxy_parse_http_error },
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+REGISTER_POST_PROXY_CHECK(proxy_check_errors);
+REGISTER_POST_CHECK(post_check_errors);
+
+REGISTER_CONFIG_SECTION("http-errors", cfg_parse_http_errors, NULL);
+
+/************************************************************************/
+/* HTX sample fetches */
+/************************************************************************/
+
+/* Returns 1 if a stream is an HTX stream. Otherwise, it returns 0. */
+static int
+smp_fetch_is_htx(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.u.sint = !!IS_HTX_STRM(smp->strm);
+ smp->data.type = SMP_T_BOOL;
+ return 1;
+}
+
+/* Returns the number of blocks in an HTX message. The channel is chosen
+ * depending on the sample direction. */
+static int
+smp_fetch_htx_nbblks(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+
+ if (!smp->strm)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ smp->data.u.sint = htx_nbblks(htx);
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the size of an HTX message. The channel is chosen depending on the
+ * sample direction. */
+static int
+smp_fetch_htx_size(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+
+ if (!smp->strm)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ smp->data.u.sint = htx->size;
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the data size of an HTX message. The channel is chosen depending on the
+ * sample direction. */
+static int
+smp_fetch_htx_data(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+
+ if (!smp->strm)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ smp->data.u.sint = htx->data;
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the used space (data+meta) of an HTX message. The channel is chosen
+ * depending on the sample direction. */
+static int
+smp_fetch_htx_used(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+
+ if (!smp->strm)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ smp->data.u.sint = htx_used_space(htx);
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the free space (size-used) of an HTX message. The channel is chosen
+ * depending on the sample direction. */
+static int
+smp_fetch_htx_free(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+
+ if (!smp->strm)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ smp->data.u.sint = htx_free_space(htx);
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the free space for data (free-sizeof(blk)) of an HTX message. The
+ * channel is chosen depending on the sample direction. */
+static int
+smp_fetch_htx_free_data(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+
+ if (!smp->strm)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ smp->data.u.sint = htx_free_data_space(htx);
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns 1 if the HTX message contains EOM flag. Otherwise it returns 0. The
+ * channel is chosen depending on the sample direction.
+ */
+static int
+smp_fetch_htx_has_eom(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+
+ if (!smp->strm)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ smp->data.u.sint = !!(htx->flags & HTX_FL_EOM);
+ smp->data.type = SMP_T_BOOL;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the type of a specific HTX block, if found in the message. Otherwise
+ * HTX_BLK_UNUSED is returned. Any positive integer (>= 0) is supported or
+ * "head", "tail" or "first". The channel is chosen depending on the sample
+ * direction. */
+static int
+smp_fetch_htx_blk_type(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+ enum htx_blk_type type;
+ int32_t pos;
+
+ if (!smp->strm || !arg_p)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ pos = arg_p[0].data.sint;
+ if (pos == -1)
+ type = htx_get_head_type(htx);
+ else if (pos == -2)
+ type = htx_get_tail_type(htx);
+ else if (pos == -3)
+ type = htx_get_first_type(htx);
+ else
+ type = ((pos >= htx->head && pos <= htx->tail)
+ ? htx_get_blk_type(htx_get_blk(htx, pos))
+ : HTX_BLK_UNUSED);
+
+ chunk_initstr(&smp->data.u.str, htx_blk_type_str(type));
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST | SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the size of a specific HTX block, if found in the message. Otherwise
+ * 0 is returned. Any positive integer (>= 0) is supported or "head", "tail" or
+ * "first". The channel is chosen depending on the sample direction. */
+static int
+smp_fetch_htx_blk_size(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+ struct htx_blk *blk;
+ int32_t pos;
+
+ if (!smp->strm || !arg_p)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ pos = arg_p[0].data.sint;
+ if (pos == -1)
+ blk = htx_get_head_blk(htx);
+ else if (pos == -2)
+ blk = htx_get_tail_blk(htx);
+ else if (pos == -3)
+ blk = htx_get_first_blk(htx);
+ else
+ blk = ((pos >= htx->head && pos <= htx->tail) ? htx_get_blk(htx, pos) : NULL);
+
+ smp->data.u.sint = (blk ? htx_get_blksz(blk) : 0);
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the start-line if the selected HTX block exists and is a
+ * start-line. Otherwise 0 an empty string. Any positive integer (>= 0) is
+ * supported or "head", "tail" or "first". The channel is chosen depending on
+ * the sample direction. */
+static int
+smp_fetch_htx_blk_stline(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct buffer *temp;
+ struct channel *chn;
+ struct htx *htx;
+ struct htx_blk *blk;
+ struct htx_sl *sl;
+ int32_t pos;
+
+ if (!smp->strm || !arg_p)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ pos = arg_p[0].data.sint;
+ if (pos == -1)
+ blk = htx_get_head_blk(htx);
+ else if (pos == -2)
+ blk = htx_get_tail_blk(htx);
+ else if (pos == -3)
+ blk = htx_get_first_blk(htx);
+ else
+ blk = ((pos >= htx->head && pos <= htx->tail) ? htx_get_blk(htx, pos) : NULL);
+
+ if (!blk || (htx_get_blk_type(blk) != HTX_BLK_REQ_SL && htx_get_blk_type(blk) != HTX_BLK_RES_SL)) {
+ smp->data.u.str.size = 0;
+ smp->data.u.str.area = "";
+ smp->data.u.str.data = 0;
+ }
+ else {
+ sl = htx_get_blk_ptr(htx, blk);
+
+ temp = get_trash_chunk();
+ chunk_istcat(temp, htx_sl_p1(sl));
+ temp->area[temp->data++] = ' ';
+ chunk_istcat(temp, htx_sl_p2(sl));
+ temp->area[temp->data++] = ' ';
+ chunk_istcat(temp, htx_sl_p3(sl));
+
+ smp->data.u.str = *temp;
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the header name if the selected HTX block exists and is a header or a
+ * trailer. Otherwise 0 an empty string. Any positive integer (>= 0) is
+ * supported or "head", "tail" or "first". The channel is chosen depending on
+ * the sample direction. */
+static int
+smp_fetch_htx_blk_hdrname(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+ struct htx_blk *blk;
+ int32_t pos;
+
+ if (!smp->strm || !arg_p)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ pos = arg_p[0].data.sint;
+ if (pos == -1)
+ blk = htx_get_head_blk(htx);
+ else if (pos == -2)
+ blk = htx_get_tail_blk(htx);
+ else if (pos == -3)
+ blk = htx_get_first_blk(htx);
+ else
+ blk = ((pos >= htx->head && pos <= htx->tail) ? htx_get_blk(htx, pos) : NULL);
+
+ if (!blk || (htx_get_blk_type(blk) != HTX_BLK_HDR && htx_get_blk_type(blk) != HTX_BLK_TLR)) {
+ smp->data.u.str.size = 0;
+ smp->data.u.str.area = "";
+ smp->data.u.str.data = 0;
+ }
+ else {
+ struct ist name = htx_get_blk_name(htx, blk);
+
+ chunk_initlen(&smp->data.u.str, name.ptr, name.len, name.len);
+ }
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST | SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the header value if the selected HTX block exists and is a header or
+ * a trailer. Otherwise 0 an empty string. Any positive integer (>= 0) is
+ * supported or "head", "tail" or "first". The channel is chosen depending on
+ * the sample direction. */
+static int
+smp_fetch_htx_blk_hdrval(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+ struct htx_blk *blk;
+ int32_t pos;
+
+ if (!smp->strm || !arg_p)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ pos = arg_p[0].data.sint;
+ if (pos == -1)
+ blk = htx_get_head_blk(htx);
+ else if (pos == -2)
+ blk = htx_get_tail_blk(htx);
+ else if (pos == -3)
+ blk = htx_get_first_blk(htx);
+ else
+ blk = ((pos >= htx->head && pos <= htx->tail) ? htx_get_blk(htx, pos) : NULL);
+
+ if (!blk || (htx_get_blk_type(blk) != HTX_BLK_HDR && htx_get_blk_type(blk) != HTX_BLK_TLR)) {
+ smp->data.u.str.size = 0;
+ smp->data.u.str.area = "";
+ smp->data.u.str.data = 0;
+ }
+ else {
+ struct ist val = htx_get_blk_value(htx, blk);
+
+ chunk_initlen(&smp->data.u.str, val.ptr, val.len, val.len);
+ }
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST | SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the value if the selected HTX block exists and is a data
+ * block. Otherwise 0 an empty string. Any positive integer (>= 0) is supported
+ * or "head", "tail" or "first". The channel is chosen depending on the sample
+ * direction. */
+static int
+smp_fetch_htx_blk_data(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+ struct htx_blk *blk;
+ int32_t pos;
+
+ if (!smp->strm || !arg_p)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ pos = arg_p[0].data.sint;
+ if (pos == -1)
+ blk = htx_get_head_blk(htx);
+ else if (pos == -2)
+ blk = htx_get_tail_blk(htx);
+ else if (pos == -3)
+ blk = htx_get_first_blk(htx);
+ else
+ blk = ((pos >= htx->head && pos <= htx->tail) ? htx_get_blk(htx, pos) : NULL);
+
+ if (!blk || htx_get_blk_type(blk) != HTX_BLK_DATA) {
+ smp->data.u.str.size = 0;
+ smp->data.u.str.area = "";
+ smp->data.u.str.data = 0;
+ }
+ else {
+ struct ist val = htx_get_blk_value(htx, blk);
+
+ chunk_initlen(&smp->data.u.str, val.ptr, val.len, val.len);
+ }
+ smp->data.type = SMP_T_BIN;
+ smp->flags = SMP_F_CONST | SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* This function is used to validate the arguments passed to any "htx_blk" fetch
+ * keywords. An argument is expected by these keywords. It must be a positive
+ * integer or on of the following strings: "head", "tail" or "first". It returns
+ * 0 on error, and a non-zero value if OK.
+ */
+int val_blk_arg(struct arg *arg, char **err_msg)
+{
+ if (arg[0].type != ARGT_STR || !arg[0].data.str.data) {
+ memprintf(err_msg, "a block position is expected (> 0) or a special block name (head, tail, first)");
+ return 0;
+ }
+ if (arg[0].data.str.data == 4 && !strncmp(arg[0].data.str.area, "head", 4)) {
+ chunk_destroy(&arg[0].data.str);
+ arg[0].type = ARGT_SINT;
+ arg[0].data.sint = -1;
+ }
+ else if (arg[0].data.str.data == 4 && !strncmp(arg[0].data.str.area, "tail", 4)) {
+ chunk_destroy(&arg[0].data.str);
+ arg[0].type = ARGT_SINT;
+ arg[0].data.sint = -2;
+ }
+ else if (arg[0].data.str.data == 5 && !strncmp(arg[0].data.str.area, "first", 5)) {
+ chunk_destroy(&arg[0].data.str);
+ arg[0].type = ARGT_SINT;
+ arg[0].data.sint = -3;
+ }
+ else {
+ int pos;
+
+ for (pos = 0; pos < arg[0].data.str.data; pos++) {
+ if (!isdigit((unsigned char)arg[0].data.str.area[pos])) {
+ memprintf(err_msg, "invalid block position");
+ return 0;
+ }
+ }
+
+ pos = strl2uic(arg[0].data.str.area, arg[0].data.str.data);
+ if (pos < 0) {
+ memprintf(err_msg, "block position must not be negative");
+ return 0;
+ }
+ chunk_destroy(&arg[0].data.str);
+ arg[0].type = ARGT_SINT;
+ arg[0].data.sint = pos;
+ }
+
+ return 1;
+}
+
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Note: htx sample fetches should only used for development purpose.
+ */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "internal.strm.is_htx", smp_fetch_is_htx, 0, NULL, SMP_T_BOOL, SMP_USE_INTRN },
+
+ { "internal.htx.nbblks", smp_fetch_htx_nbblks, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx.size", smp_fetch_htx_size, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx.data", smp_fetch_htx_data, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx.used", smp_fetch_htx_used, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx.free", smp_fetch_htx_free, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx.free_data", smp_fetch_htx_free_data, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx.has_eom", smp_fetch_htx_has_eom, 0, NULL, SMP_T_BOOL, SMP_USE_HRQHV|SMP_USE_HRSHV},
+
+ { "internal.htx_blk.type", smp_fetch_htx_blk_type, ARG1(1,STR), val_blk_arg, SMP_T_STR, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx_blk.size", smp_fetch_htx_blk_size, ARG1(1,STR), val_blk_arg, SMP_T_SINT, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx_blk.start_line", smp_fetch_htx_blk_stline, ARG1(1,STR), val_blk_arg, SMP_T_STR, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx_blk.hdrname", smp_fetch_htx_blk_hdrname, ARG1(1,STR), val_blk_arg, SMP_T_STR, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx_blk.hdrval", smp_fetch_htx_blk_hdrval, ARG1(1,STR), val_blk_arg, SMP_T_STR, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx_blk.data", smp_fetch_htx_blk_data, ARG1(1,STR), val_blk_arg, SMP_T_BIN, SMP_USE_HRQHV|SMP_USE_HRSHV},
+
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
diff --git a/src/http_rules.c b/src/http_rules.c
new file mode 100644
index 0000000..8e257ea
--- /dev/null
+++ b/src/http_rules.c
@@ -0,0 +1,498 @@
+/*
+ * HTTP rules parsing and registration
+ *
+ * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <string.h>
+#include <time.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/chunk.h>
+#include <haproxy/global.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana-t.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/log.h>
+#include <haproxy/pool.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+
+/* List head of all known action keywords for "http-request" */
+struct action_kw_list http_req_keywords = {
+ .list = LIST_HEAD_INIT(http_req_keywords.list)
+};
+
+/* List head of all known action keywords for "http-response" */
+struct action_kw_list http_res_keywords = {
+ .list = LIST_HEAD_INIT(http_res_keywords.list)
+};
+
+/* List head of all known action keywords for "http-after-response" */
+struct action_kw_list http_after_res_keywords = {
+ .list = LIST_HEAD_INIT(http_after_res_keywords.list)
+};
+
+void http_req_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&http_req_keywords.list, &kw_list->list);
+}
+
+void http_res_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&http_res_keywords.list, &kw_list->list);
+}
+
+void http_after_res_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&http_after_res_keywords.list, &kw_list->list);
+}
+
+/*
+ * Return the struct http_req_action_kw associated to a keyword.
+ */
+struct action_kw *action_http_req_custom(const char *kw)
+{
+ return action_lookup(&http_req_keywords.list, kw);
+}
+
+/*
+ * Return the struct http_res_action_kw associated to a keyword.
+ */
+struct action_kw *action_http_res_custom(const char *kw)
+{
+ return action_lookup(&http_res_keywords.list, kw);
+}
+
+/*
+ * Return the struct http_after_res_action_kw associated to a keyword.
+ */
+struct action_kw *action_http_after_res_custom(const char *kw)
+{
+ return action_lookup(&http_after_res_keywords.list, kw);
+}
+
+/* parse an "http-request" rule */
+struct act_rule *parse_http_req_cond(const char **args, const char *file, int linenum, struct proxy *proxy)
+{
+ struct act_rule *rule;
+ const struct action_kw *custom = NULL;
+ int cur_arg;
+
+ rule = new_act_rule(ACT_F_HTTP_REQ, file, linenum);
+ if (!rule) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ goto out;
+ }
+
+ if (((custom = action_http_req_custom(args[0])) != NULL)) {
+ char *errmsg = NULL;
+
+ cur_arg = 1;
+ /* try in the module list */
+ rule->kw = custom;
+
+ if (custom->flags & KWF_EXPERIMENTAL) {
+ if (!experimental_directives_allowed) {
+ ha_alert("parsing [%s:%d] : '%s' action is experimental, must be allowed via a global 'expose-experimental-directives'\n",
+ file, linenum, custom->kw);
+ goto out_err;
+ }
+ mark_tainted(TAINTED_CONFIG_EXP_KW_DECLARED);
+ }
+
+ if (custom->parse(args, &cur_arg, proxy, rule, &errmsg) == ACT_RET_PRS_ERR) {
+ ha_alert("parsing [%s:%d] : error detected in %s '%s' while parsing 'http-request %s' rule : %s.\n",
+ file, linenum, proxy_type_str(proxy), proxy->id, args[0], errmsg);
+ free(errmsg);
+ goto out_err;
+ }
+ else if (errmsg) {
+ ha_warning("parsing [%s:%d] : %s.\n", file, linenum, errmsg);
+ free(errmsg);
+ }
+ }
+ else {
+ const char *best = action_suggest(args[0], &http_req_keywords.list, NULL);
+
+ action_build_list(&http_req_keywords.list, &trash);
+ ha_alert("parsing [%s:%d]: 'http-request' expects %s, but got '%s'%s.%s%s%s\n",
+ file, linenum, trash.area,
+ args[0], *args[0] ? "" : " (missing argument)",
+ best ? " Did you mean '" : "",
+ best ? best : "",
+ best ? "' maybe ?" : "");
+ goto out_err;
+ }
+
+ if (strcmp(args[cur_arg], "if") == 0 || strcmp(args[cur_arg], "unless") == 0) {
+ struct acl_cond *cond;
+ char *errmsg = NULL;
+
+ if ((cond = build_acl_cond(file, linenum, &proxy->acl, proxy, args+cur_arg, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing an 'http-request %s' condition : %s.\n",
+ file, linenum, args[0], errmsg);
+ free(errmsg);
+ goto out_err;
+ }
+ rule->cond = cond;
+ }
+ else if (*args[cur_arg]) {
+ ha_alert("parsing [%s:%d]: 'http-request %s' expects"
+ " either 'if' or 'unless' followed by a condition but found '%s'.\n",
+ file, linenum, args[0], args[cur_arg]);
+ goto out_err;
+ }
+
+ return rule;
+ out_err:
+ free_act_rule(rule);
+ out:
+ return NULL;
+}
+
+/* parse an "http-respose" rule */
+struct act_rule *parse_http_res_cond(const char **args, const char *file, int linenum, struct proxy *proxy)
+{
+ struct act_rule *rule;
+ const struct action_kw *custom = NULL;
+ int cur_arg;
+
+ rule = new_act_rule(ACT_F_HTTP_RES, file, linenum);
+ if (!rule) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ goto out;
+ }
+
+ if (((custom = action_http_res_custom(args[0])) != NULL)) {
+ char *errmsg = NULL;
+
+ cur_arg = 1;
+ /* try in the module list */
+ rule->kw = custom;
+
+ if (custom->flags & KWF_EXPERIMENTAL) {
+ if (!experimental_directives_allowed) {
+ ha_alert("parsing [%s:%d] : '%s' action is experimental, must be allowed via a global 'expose-experimental-directives'\n",
+ file, linenum, custom->kw);
+ goto out_err;
+ }
+ mark_tainted(TAINTED_CONFIG_EXP_KW_DECLARED);
+ }
+
+ if (custom->parse(args, &cur_arg, proxy, rule, &errmsg) == ACT_RET_PRS_ERR) {
+ ha_alert("parsing [%s:%d] : error detected in %s '%s' while parsing 'http-response %s' rule : %s.\n",
+ file, linenum, proxy_type_str(proxy), proxy->id, args[0], errmsg);
+ free(errmsg);
+ goto out_err;
+ }
+ else if (errmsg) {
+ ha_warning("parsing [%s:%d] : %s.\n", file, linenum, errmsg);
+ free(errmsg);
+ }
+ }
+ else {
+ const char *best = action_suggest(args[0], &http_res_keywords.list, NULL);
+
+ action_build_list(&http_res_keywords.list, &trash);
+ ha_alert("parsing [%s:%d]: 'http-response' expects %s, but got '%s'%s.%s%s%s\n",
+ file, linenum, trash.area,
+ args[0], *args[0] ? "" : " (missing argument)",
+ best ? " Did you mean '" : "",
+ best ? best : "",
+ best ? "' maybe ?" : "");
+ goto out_err;
+ }
+
+ if (strcmp(args[cur_arg], "if") == 0 || strcmp(args[cur_arg], "unless") == 0) {
+ struct acl_cond *cond;
+ char *errmsg = NULL;
+
+ if ((cond = build_acl_cond(file, linenum, &proxy->acl, proxy, args+cur_arg, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing an 'http-response %s' condition : %s.\n",
+ file, linenum, args[0], errmsg);
+ free(errmsg);
+ goto out_err;
+ }
+ rule->cond = cond;
+ }
+ else if (*args[cur_arg]) {
+ ha_alert("parsing [%s:%d]: 'http-response %s' expects"
+ " either 'if' or 'unless' followed by a condition but found '%s'.\n",
+ file, linenum, args[0], args[cur_arg]);
+ goto out_err;
+ }
+
+ return rule;
+ out_err:
+ free_act_rule(rule);
+ out:
+ return NULL;
+}
+
+
+/* parse an "http-after-response" rule */
+struct act_rule *parse_http_after_res_cond(const char **args, const char *file, int linenum, struct proxy *proxy)
+{
+ struct act_rule *rule;
+ const struct action_kw *custom = NULL;
+ int cur_arg;
+
+ rule = new_act_rule(ACT_F_HTTP_RES, file, linenum);
+ if (!rule) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ goto out;
+ }
+
+ if (((custom = action_http_after_res_custom(args[0])) != NULL)) {
+ char *errmsg = NULL;
+
+ cur_arg = 1;
+ /* try in the module list */
+ rule->kw = custom;
+ if (custom->parse(args, &cur_arg, proxy, rule, &errmsg) == ACT_RET_PRS_ERR) {
+ ha_alert("parsing [%s:%d] : error detected in %s '%s' while parsing 'http-after-response %s' rule : %s.\n",
+ file, linenum, proxy_type_str(proxy), proxy->id, args[0], errmsg);
+ free(errmsg);
+ goto out_err;
+ }
+ else if (errmsg) {
+ ha_warning("parsing [%s:%d] : %s.\n", file, linenum, errmsg);
+ free(errmsg);
+ }
+ }
+ else {
+ const char *best = action_suggest(args[0], &http_after_res_keywords.list, NULL);
+
+ action_build_list(&http_after_res_keywords.list, &trash);
+ ha_alert("parsing [%s:%d]: 'http-after-response' expects %s, but got '%s'%s.%s%s%s\n",
+ file, linenum, trash.area,
+ args[0], *args[0] ? "" : " (missing argument)",
+ best ? " Did you mean '" : "",
+ best ? best : "",
+ best ? "' maybe ?" : "");
+ goto out_err;
+ }
+
+ if (strcmp(args[cur_arg], "if") == 0 || strcmp(args[cur_arg], "unless") == 0) {
+ struct acl_cond *cond;
+ char *errmsg = NULL;
+
+ if ((cond = build_acl_cond(file, linenum, &proxy->acl, proxy, args+cur_arg, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing an 'http-after-response %s' condition : %s.\n",
+ file, linenum, args[0], errmsg);
+ free(errmsg);
+ goto out_err;
+ }
+ rule->cond = cond;
+ }
+ else if (*args[cur_arg]) {
+ ha_alert("parsing [%s:%d]: 'http-after-response %s' expects"
+ " either 'if' or 'unless' followed by a condition but found '%s'.\n",
+ file, linenum, args[0], args[cur_arg]);
+ goto out_err;
+ }
+
+ return rule;
+ out_err:
+ free_act_rule(rule);
+ out:
+ return NULL;
+}
+
+/* Parses a redirect rule. Returns the redirect rule on success or NULL on error,
+ * with <err> filled with the error message. If <use_fmt> is not null, builds a
+ * dynamic log-format rule instead of a static string. Parameter <dir> indicates
+ * the direction of the rule, and equals 0 for request, non-zero for responses.
+ */
+struct redirect_rule *http_parse_redirect_rule(const char *file, int linenum, struct proxy *curproxy,
+ const char **args, char **errmsg, int use_fmt, int dir)
+{
+ struct redirect_rule *rule;
+ int cur_arg;
+ int type = REDIRECT_TYPE_NONE;
+ int code = 302;
+ const char *destination = NULL;
+ const char *cookie = NULL;
+ int cookie_set = 0;
+ unsigned int flags = (!dir ? REDIRECT_FLAG_FROM_REQ : REDIRECT_FLAG_NONE);
+ struct acl_cond *cond = NULL;
+
+ cur_arg = 0;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "location") == 0) {
+ if (!*args[cur_arg + 1])
+ goto missing_arg;
+
+ type = REDIRECT_TYPE_LOCATION;
+ cur_arg++;
+ destination = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "prefix") == 0) {
+ if (!*args[cur_arg + 1])
+ goto missing_arg;
+ type = REDIRECT_TYPE_PREFIX;
+ cur_arg++;
+ destination = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "scheme") == 0) {
+ if (!*args[cur_arg + 1])
+ goto missing_arg;
+
+ type = REDIRECT_TYPE_SCHEME;
+ cur_arg++;
+ destination = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "set-cookie") == 0) {
+ if (!*args[cur_arg + 1])
+ goto missing_arg;
+
+ cur_arg++;
+ cookie = args[cur_arg];
+ cookie_set = 1;
+ }
+ else if (strcmp(args[cur_arg], "clear-cookie") == 0) {
+ if (!*args[cur_arg + 1])
+ goto missing_arg;
+
+ cur_arg++;
+ cookie = args[cur_arg];
+ cookie_set = 0;
+ }
+ else if (strcmp(args[cur_arg], "code") == 0) {
+ if (!*args[cur_arg + 1])
+ goto missing_arg;
+
+ cur_arg++;
+ code = atol(args[cur_arg]);
+ if (code < 301 || code > 308 || (code > 303 && code < 307)) {
+ memprintf(errmsg,
+ "'%s': unsupported HTTP code '%s' (must be one of 301, 302, 303, 307 or 308)",
+ args[cur_arg - 1], args[cur_arg]);
+ return NULL;
+ }
+ }
+ else if (strcmp(args[cur_arg], "drop-query") == 0) {
+ flags |= REDIRECT_FLAG_DROP_QS;
+ }
+ else if (strcmp(args[cur_arg], "append-slash") == 0) {
+ flags |= REDIRECT_FLAG_APPEND_SLASH;
+ }
+ else if (strcmp(args[cur_arg], "ignore-empty") == 0) {
+ flags |= REDIRECT_FLAG_IGNORE_EMPTY;
+ }
+ else if (strcmp(args[cur_arg], "if") == 0 ||
+ strcmp(args[cur_arg], "unless") == 0) {
+ cond = build_acl_cond(file, linenum, &curproxy->acl, curproxy, (const char **)args + cur_arg, errmsg);
+ if (!cond) {
+ memprintf(errmsg, "error in condition: %s", *errmsg);
+ return NULL;
+ }
+ break;
+ }
+ else {
+ memprintf(errmsg,
+ "expects 'code', 'prefix', 'location', 'scheme', 'set-cookie', 'clear-cookie', 'drop-query', 'ignore-empty' or 'append-slash' (was '%s')",
+ args[cur_arg]);
+ return NULL;
+ }
+ cur_arg++;
+ }
+
+ if (type == REDIRECT_TYPE_NONE) {
+ memprintf(errmsg, "redirection type expected ('prefix', 'location', or 'scheme')");
+ return NULL;
+ }
+
+ if (dir && type != REDIRECT_TYPE_LOCATION) {
+ memprintf(errmsg, "response only supports redirect type 'location'");
+ return NULL;
+ }
+
+ rule = calloc(1, sizeof(*rule));
+ if (!rule) {
+ memprintf(errmsg, "parsing [%s:%d]: out of memory.", file, linenum);
+ return NULL;
+ }
+ rule->cond = cond;
+ LIST_INIT(&rule->rdr_fmt);
+
+ if (!use_fmt) {
+ /* old-style static redirect rule */
+ rule->rdr_str = strdup(destination);
+ rule->rdr_len = strlen(destination);
+ }
+ else {
+ /* log-format based redirect rule */
+ int cap = 0;
+
+ /* Parse destination. Note that in the REDIRECT_TYPE_PREFIX case,
+ * if prefix == "/", we don't want to add anything, otherwise it
+ * makes it hard for the user to configure a self-redirection.
+ */
+ curproxy->conf.args.ctx = ARGC_RDR;
+ if (curproxy->cap & PR_CAP_FE)
+ cap |= (dir ? SMP_VAL_FE_HRS_HDR : SMP_VAL_FE_HRQ_HDR);
+ if (curproxy->cap & PR_CAP_BE)
+ cap |= (dir ? SMP_VAL_BE_HRS_HDR : SMP_VAL_BE_HRQ_HDR);
+ if (!(type == REDIRECT_TYPE_PREFIX && destination[0] == '/' && destination[1] == '\0')) {
+ if (!parse_logformat_string(destination, curproxy, &rule->rdr_fmt, LOG_OPT_HTTP, cap, errmsg)) {
+ return NULL;
+ }
+ free(curproxy->conf.lfs_file);
+ curproxy->conf.lfs_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.lfs_line = curproxy->conf.args.line;
+ }
+ }
+
+ if (cookie) {
+ /* depending on cookie_set, either we want to set the cookie, or to clear it.
+ * a clear consists in appending "; path=/; Max-Age=0;" at the end.
+ */
+ rule->cookie_len = strlen(cookie);
+ if (cookie_set) {
+ rule->cookie_str = malloc(rule->cookie_len + 10);
+ memcpy(rule->cookie_str, cookie, rule->cookie_len);
+ memcpy(rule->cookie_str + rule->cookie_len, "; path=/;", 10);
+ rule->cookie_len += 9;
+ } else {
+ rule->cookie_str = malloc(rule->cookie_len + 21);
+ memcpy(rule->cookie_str, cookie, rule->cookie_len);
+ memcpy(rule->cookie_str + rule->cookie_len, "; path=/; Max-Age=0;", 21);
+ rule->cookie_len += 20;
+ }
+ }
+ rule->type = type;
+ rule->code = code;
+ rule->flags = flags;
+ LIST_INIT(&rule->list);
+ return rule;
+
+ missing_arg:
+ memprintf(errmsg, "missing argument for '%s'", args[cur_arg]);
+ return NULL;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/htx.c b/src/htx.c
new file mode 100644
index 0000000..7e5bd46
--- /dev/null
+++ b/src/htx.c
@@ -0,0 +1,1087 @@
+/*
+ * internal HTTP message
+ *
+ * Copyright 2018 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/chunk.h>
+#include <haproxy/htx.h>
+
+struct htx htx_empty = { .size = 0, .data = 0, .head = -1, .tail = -1, .first = -1 };
+
+/* Defragments an HTX message. It removes unused blocks and unwraps the payloads
+ * part. A temporary buffer is used to do so. This function never fails. Most of
+ * time, we need keep a ref on a specific HTX block. Thus is <blk> is set, the
+ * pointer on its new position, after defrag, is returned. In addition, if the
+ * size of the block must be altered, <blkinfo> info must be provided (!=
+ * 0). But in this case, it remains the caller responsibility to update the
+ * block content.
+ */
+/* TODO: merge data blocks into one */
+struct htx_blk *htx_defrag(struct htx *htx, struct htx_blk *blk, uint32_t blkinfo)
+{
+ struct buffer *chunk = get_trash_chunk();
+ struct htx *tmp = htxbuf(chunk);
+ struct htx_blk *newblk, *oldblk;
+ uint32_t new, old, blkpos;
+ uint32_t addr, blksz;
+ int32_t first = -1;
+
+ if (htx->head == -1)
+ return NULL;
+
+ blkpos = -1;
+
+ new = 0;
+ addr = 0;
+ tmp->size = htx->size;
+ tmp->data = 0;
+
+ /* start from the head */
+ for (old = htx_get_head(htx); old != -1; old = htx_get_next(htx, old)) {
+ oldblk = htx_get_blk(htx, old);
+ if (htx_get_blk_type(oldblk) == HTX_BLK_UNUSED)
+ continue;
+
+ blksz = htx_get_blksz(oldblk);
+ memcpy((void *)tmp->blocks + addr, htx_get_blk_ptr(htx, oldblk), blksz);
+
+ /* update the start-line position */
+ if (htx->first == old)
+ first = new;
+
+ newblk = htx_get_blk(tmp, new);
+ newblk->addr = addr;
+ newblk->info = oldblk->info;
+
+ /* if <blk> is defined, save its new position */
+ if (blk != NULL && blk == oldblk) {
+ if (blkinfo)
+ newblk->info = blkinfo;
+ blkpos = new;
+ }
+
+ blksz = htx_get_blksz(newblk);
+ addr += blksz;
+ tmp->data += blksz;
+ new++;
+ }
+
+ htx->data = tmp->data;
+ htx->first = first;
+ htx->head = 0;
+ htx->tail = new - 1;
+ htx->head_addr = htx->end_addr = 0;
+ htx->tail_addr = addr;
+ htx->flags &= ~HTX_FL_FRAGMENTED;
+ memcpy((void *)htx->blocks, (void *)tmp->blocks, htx->size);
+
+ return ((blkpos == -1) ? NULL : htx_get_blk(htx, blkpos));
+}
+
+/* Degragments HTX blocks of an HTX message. Payloads part is keep untouched
+ * here. This function will move back all blocks starting at the position 0,
+ * removing unused blocks. It must never be called with an empty message.
+ */
+static void htx_defrag_blks(struct htx *htx)
+{
+ int32_t pos, new;
+
+ new = 0;
+ for (pos = htx_get_head(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *posblk, *newblk;
+
+ if (pos == new) {
+ new++;
+ continue;
+ }
+
+ posblk = htx_get_blk(htx, pos);
+ if (htx_get_blk_type(posblk) == HTX_BLK_UNUSED)
+ continue;
+
+ if (htx->first == pos)
+ htx->first = new;
+ newblk = htx_get_blk(htx, new++);
+ newblk->info = posblk->info;
+ newblk->addr = posblk->addr;
+ }
+ BUG_ON(!new);
+ htx->head = 0;
+ htx->tail = new - 1;
+}
+
+/* Reserves a new block in the HTX message <htx> with a content of <blksz>
+ * bytes. If there is not enough space, NULL is returned. Otherwise the reserved
+ * block is returned and the HTX message is updated. Space for this new block is
+ * reserved in the HTX message. But it is the caller responsibility to set right
+ * info in the block to reflect the stored data.
+ */
+static struct htx_blk *htx_reserve_nxblk(struct htx *htx, uint32_t blksz)
+{
+ struct htx_blk *blk;
+ uint32_t tail, headroom, tailroom;
+
+ if (blksz > htx_free_data_space(htx))
+ return NULL; /* full */
+
+ if (htx->head == -1) {
+ /* Empty message */
+ htx->head = htx->tail = htx->first = 0;
+ blk = htx_get_blk(htx, htx->tail);
+ blk->addr = 0;
+ htx->data = blksz;
+ htx->tail_addr = blksz;
+ return blk;
+ }
+
+ /* Find the block's position. First, we try to get the next position in
+ * the message, increasing the tail by one. If this position is not
+ * available with some holes, we try to defrag the blocks without
+ * touching their paylood. If it is impossible, we fully defrag the
+ * message.
+ */
+ tail = htx->tail + 1;
+ if (htx_pos_to_addr(htx, tail) >= htx->tail_addr)
+ ;
+ else if (htx->head > 0) {
+ htx_defrag_blks(htx);
+ tail = htx->tail + 1;
+ BUG_ON(htx_pos_to_addr(htx, tail) < htx->tail_addr);
+ }
+ else
+ goto defrag;
+
+ /* Now, we have found the block's position. Try to find where to put its
+ * payload. The free space is split in two areas:
+ *
+ * * The free space in front of the blocks table. This one is used if and
+ * only if the other one was not used yet.
+ *
+ * * The free space at the beginning of the message. Once this one is
+ * used, the other one is never used again, until the next defrag.
+ */
+ headroom = (htx->end_addr - htx->head_addr);
+ tailroom = (!htx->head_addr ? htx_pos_to_addr(htx, tail) - htx->tail_addr : 0);
+ BUG_ON((int32_t)headroom < 0);
+ BUG_ON((int32_t)tailroom < 0);
+
+ if (blksz <= tailroom) {
+ blk = htx_get_blk(htx, tail);
+ blk->addr = htx->tail_addr;
+ htx->tail_addr += blksz;
+ }
+ else if (blksz <= headroom) {
+ blk = htx_get_blk(htx, tail);
+ blk->addr = htx->head_addr;
+ htx->head_addr += blksz;
+ }
+ else {
+ defrag:
+ /* need to defragment the message before inserting upfront */
+ htx_defrag(htx, NULL, 0);
+ tail = htx->tail + 1;
+ blk = htx_get_blk(htx, tail);
+ blk->addr = htx->tail_addr;
+ htx->tail_addr += blksz;
+ }
+
+ htx->tail = tail;
+ htx->data += blksz;
+ /* Set first position if not already set */
+ if (htx->first == -1)
+ htx->first = tail;
+
+ BUG_ON((int32_t)htx->tail_addr < 0);
+ BUG_ON((int32_t)htx->head_addr < 0);
+ BUG_ON(htx->end_addr > htx->tail_addr);
+ BUG_ON(htx->head_addr > htx->end_addr);
+
+ return blk;
+}
+
+/* Prepares the block to an expansion of its payload. The payload will be
+ * expanded by <delta> bytes and we need find where this expansion will be
+ * performed. It can be a compression if <delta> is negative. This function only
+ * updates all addresses. The caller have the responsibility to perform the
+ * expansion and update the block and the HTX message accordingly. No error must
+ * occur. It returns following values:
+ *
+ * 0: The expansion cannot be performed, there is not enough space.
+ *
+ * 1: the expansion must be performed in place, there is enough space after
+ * the block's payload to handle it. This is especially true if it is a
+ * compression and not an expension.
+ *
+ * 2: the block's payload must be moved at the new block address before doing
+ * the expansion.
+ *
+ * 3: the HTX message message must be defragmented
+ */
+static int htx_prepare_blk_expansion(struct htx *htx, struct htx_blk *blk, int32_t delta)
+{
+ uint32_t sz, tailroom, headroom;
+ int ret = 3;
+
+ BUG_ON(htx->head == -1);
+
+ headroom = (htx->end_addr - htx->head_addr);
+ tailroom = (htx_pos_to_addr(htx, htx->tail) - htx->tail_addr);
+ BUG_ON((int32_t)headroom < 0);
+ BUG_ON((int32_t)tailroom < 0);
+
+ sz = htx_get_blksz(blk);
+ if (delta <= 0) {
+ /* It is a compression, it can be performed in place */
+ if (blk->addr+sz == htx->tail_addr)
+ htx->tail_addr += delta;
+ else if (blk->addr+sz == htx->head_addr)
+ htx->head_addr += delta;
+ ret = 1;
+ }
+ else if (delta > htx_free_space(htx)) {
+ /* There is not enough space to handle the expansion */
+ ret = 0;
+ }
+ else if (blk->addr+sz == htx->tail_addr) {
+ /* The block's payload is just before the tail room */
+ if (delta < tailroom) {
+ /* Expand the block's payload */
+ htx->tail_addr += delta;
+ ret = 1;
+ }
+ else if ((sz + delta) < headroom) {
+ uint32_t oldaddr = blk->addr;
+
+ /* Move the block's payload into the headroom */
+ blk->addr = htx->head_addr;
+ htx->tail_addr -= sz;
+ htx->head_addr += sz + delta;
+ if (oldaddr == htx->end_addr) {
+ if (htx->end_addr == htx->tail_addr) {
+ htx->tail_addr = htx->head_addr;
+ htx->head_addr = htx->end_addr = 0;
+ }
+ else
+ htx->end_addr += sz;
+ }
+ ret = 2;
+ }
+ }
+ else if (blk->addr+sz == htx->head_addr) {
+ /* The block's payload is just before the head room */
+ if (delta < headroom) {
+ /* Expand the block's payload */
+ htx->head_addr += delta;
+ ret = 1;
+ }
+ }
+ else {
+ /* The block's payload is not at the rooms edge */
+ if (!htx->head_addr && sz+delta < tailroom) {
+ /* Move the block's payload into the tailroom */
+ if (blk->addr == htx->end_addr)
+ htx->end_addr += sz;
+ blk->addr = htx->tail_addr;
+ htx->tail_addr += sz + delta;
+ ret = 2;
+ }
+ else if (sz+delta < headroom) {
+ /* Move the block's payload into the headroom */
+ if (blk->addr == htx->end_addr)
+ htx->end_addr += sz;
+ blk->addr = htx->head_addr;
+ htx->head_addr += sz + delta;
+ ret = 2;
+ }
+ }
+ /* Otherwise defrag the HTX message */
+
+ BUG_ON((int32_t)htx->tail_addr < 0);
+ BUG_ON((int32_t)htx->head_addr < 0);
+ BUG_ON(htx->end_addr > htx->tail_addr);
+ BUG_ON(htx->head_addr > htx->end_addr);
+ return ret;
+}
+
+/* Adds a new block of type <type> in the HTX message <htx>. Its content size is
+ * passed but it is the caller responsibility to do the copy.
+ */
+struct htx_blk *htx_add_blk(struct htx *htx, enum htx_blk_type type, uint32_t blksz)
+{
+ struct htx_blk *blk;
+
+ BUG_ON(blksz >= 256 << 20);
+ blk = htx_reserve_nxblk(htx, blksz);
+ if (!blk)
+ return NULL;
+ BUG_ON(blk->addr > htx->size);
+
+ blk->info = (type << 28);
+ return blk;
+}
+
+/* Removes the block <blk> from the HTX message <htx>. The function returns the
+ * block following <blk> or NULL if <blk> is the last block or the last inserted
+ * one.
+ */
+struct htx_blk *htx_remove_blk(struct htx *htx, struct htx_blk *blk)
+{
+ enum htx_blk_type type;
+ uint32_t pos, addr, sz;
+
+ BUG_ON(!blk || htx->head == -1);
+
+ /* This is the last block in use */
+ if (htx->head == htx->tail) {
+ uint32_t flags = (htx->flags & ~HTX_FL_FRAGMENTED); /* Preserve flags except FRAGMENTED */
+
+ htx_reset(htx);
+ htx->flags = flags; /* restore flags */
+ return NULL;
+ }
+
+ type = htx_get_blk_type(blk);
+ pos = htx_get_blk_pos(htx, blk);
+ sz = htx_get_blksz(blk);
+ addr = blk->addr;
+ if (type != HTX_BLK_UNUSED) {
+ /* Mark the block as unused, decrement allocated size */
+ htx->data -= htx_get_blksz(blk);
+ blk->info = ((uint32_t)HTX_BLK_UNUSED << 28);
+ }
+
+ /* There is at least 2 blocks, so tail is always > 0 */
+ if (pos == htx->head) {
+ /* move the head forward */
+ htx->head++;
+ }
+ else if (pos == htx->tail) {
+ /* remove the tail. this was the last inserted block so
+ * return NULL. */
+ htx->tail--;
+ blk = NULL;
+ goto end;
+ }
+ else
+ htx->flags |= HTX_FL_FRAGMENTED;
+
+ blk = htx_get_blk(htx, pos+1);
+
+ end:
+ if (pos == htx->first)
+ htx->first = (blk ? htx_get_blk_pos(htx, blk) : -1);
+
+ if (htx->head == htx->tail) {
+ /* If there is just one block in the HTX message, free space can
+ * be adjusted. This operation could save some defrags. */
+ struct htx_blk *lastblk = htx_get_blk(htx, htx->tail);
+
+ htx->head_addr = 0;
+ htx->end_addr = lastblk->addr;
+ htx->tail_addr = lastblk->addr+htx->data;
+ }
+ else {
+ if (addr+sz == htx->tail_addr)
+ htx->tail_addr = addr;
+ else if (addr+sz == htx->head_addr)
+ htx->head_addr = addr;
+ if (addr == htx->end_addr) {
+ if (htx->tail_addr == htx->end_addr) {
+ htx->tail_addr = htx->head_addr;
+ htx->head_addr = htx->end_addr = 0;
+ }
+ else
+ htx->end_addr += sz;
+ }
+ }
+
+ BUG_ON((int32_t)htx->tail_addr < 0);
+ BUG_ON((int32_t)htx->head_addr < 0);
+ BUG_ON(htx->end_addr > htx->tail_addr);
+ BUG_ON(htx->head_addr > htx->end_addr);
+ return blk;
+}
+
+/* Looks for the HTX block containing the offset <offset>, starting at the HTX
+ * message's head. The function returns an htx_ret with the found HTX block and
+ * the position inside this block where the offset is. If the offset <offset> is
+ * outside of the HTX message, htx_ret.blk is set to NULL.
+ */
+struct htx_ret htx_find_offset(struct htx *htx, uint32_t offset)
+{
+ struct htx_blk *blk;
+ struct htx_ret htxret = { .blk = NULL, .ret = 0 };
+
+ if (offset >= htx->data)
+ return htxret;
+
+ for (blk = htx_get_head_blk(htx); blk && offset; blk = htx_get_next_blk(htx, blk)) {
+ uint32_t sz = htx_get_blksz(blk);
+
+ if (offset < sz)
+ break;
+ offset -= sz;
+ }
+ htxret.blk = blk;
+ htxret.ret = offset;
+ return htxret;
+}
+
+/* Removes all blocks after the one containing the offset <offset>. This last
+ * one may be truncated if it is a DATA block.
+ */
+void htx_truncate(struct htx *htx, uint32_t offset)
+{
+ struct htx_blk *blk;
+ struct htx_ret htxret = htx_find_offset(htx, offset);
+
+ blk = htxret.blk;
+ if (blk && htxret.ret && htx_get_blk_type(blk) == HTX_BLK_DATA) {
+ htx_change_blk_value_len(htx, blk, htxret.ret);
+ blk = htx_get_next_blk(htx, blk);
+ }
+ while (blk)
+ blk = htx_remove_blk(htx, blk);
+}
+
+/* Drains <count> bytes from the HTX message <htx>. If the last block is a DATA
+ * block, it will be cut if necessary. Others blocks will be removed at once if
+ * <count> is large enough. The function returns an htx_ret with the first block
+ * remaining in the message and the amount of data drained. If everything is
+ * removed, htx_ret.blk is set to NULL.
+ */
+struct htx_ret htx_drain(struct htx *htx, uint32_t count)
+{
+ struct htx_blk *blk;
+ struct htx_ret htxret = { .blk = NULL, .ret = 0 };
+
+ if (count == htx->data) {
+ uint32_t flags = (htx->flags & ~HTX_FL_FRAGMENTED); /* Preserve flags except FRAGMENTED */
+
+ htx_reset(htx);
+ htx->flags = flags; /* restore flags */
+ htxret.ret = count;
+ return htxret;
+ }
+
+ blk = htx_get_head_blk(htx);
+ while (count && blk) {
+ uint32_t sz = htx_get_blksz(blk);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ /* Ignore unused block */
+ if (type == HTX_BLK_UNUSED)
+ goto next;
+
+ if (sz > count) {
+ if (type == HTX_BLK_DATA) {
+ htx_cut_data_blk(htx, blk, count);
+ htxret.ret += count;
+ }
+ break;
+ }
+ count -= sz;
+ htxret.ret += sz;
+ next:
+ blk = htx_remove_blk(htx, blk);
+ }
+ htxret.blk = blk;
+
+ return htxret;
+}
+
+/* Tries to append data to the last inserted block, if the type matches and if
+ * there is enough space to take it all. If the space wraps, the buffer is
+ * defragmented and a new block is inserted. If an error occurred, NULL is
+ * returned. Otherwise, on success, the updated block (or the new one) is
+ * returned. Due to its nature this function can be expensive and should be
+ * avoided whenever possible.
+ */
+struct htx_blk *htx_add_data_atonce(struct htx *htx, struct ist data)
+{
+ struct htx_blk *blk, *tailblk;
+ void *ptr;
+ uint32_t len, sz, tailroom, headroom;
+
+ if (htx->head == -1)
+ goto add_new_block;
+
+ /* Not enough space to store data */
+ if (data.len > htx_free_data_space(htx))
+ return NULL;
+
+ /* get the tail block and its size */
+ tailblk = htx_get_tail_blk(htx);
+ if (tailblk == NULL)
+ goto add_new_block;
+ sz = htx_get_blksz(tailblk);
+
+ /* Don't try to append data if the last inserted block is not of the
+ * same type */
+ if (htx_get_blk_type(tailblk) != HTX_BLK_DATA)
+ goto add_new_block;
+
+ /*
+ * Same type and enough space: append data
+ */
+ headroom = (htx->end_addr - htx->head_addr);
+ tailroom = (htx_pos_to_addr(htx, htx->tail) - htx->tail_addr);
+ BUG_ON((int32_t)headroom < 0);
+ BUG_ON((int32_t)tailroom < 0);
+
+ len = data.len;
+ if (tailblk->addr+sz == htx->tail_addr) {
+ if (data.len <= tailroom)
+ goto append_data;
+ else if (!htx->head_addr) {
+ len = tailroom;
+ goto append_data;
+ }
+ }
+ else if (tailblk->addr+sz == htx->head_addr && data.len <= headroom)
+ goto append_data;
+
+ goto add_new_block;
+
+ append_data:
+ /* Append data and update the block itself */
+ ptr = htx_get_blk_ptr(htx, tailblk);
+ memcpy(ptr+sz, data.ptr, len);
+ htx_change_blk_value_len(htx, tailblk, sz+len);
+
+ if (data.len == len) {
+ blk = tailblk;
+ goto end;
+ }
+ data = istadv(data, len);
+
+ add_new_block:
+ blk = htx_add_blk(htx, HTX_BLK_DATA, data.len);
+ if (!blk)
+ return NULL;
+
+ blk->info += data.len;
+ memcpy(htx_get_blk_ptr(htx, blk), data.ptr, data.len);
+
+ end:
+ BUG_ON((int32_t)htx->tail_addr < 0);
+ BUG_ON((int32_t)htx->head_addr < 0);
+ BUG_ON(htx->end_addr > htx->tail_addr);
+ BUG_ON(htx->head_addr > htx->end_addr);
+ return blk;
+}
+
+/* Replaces a value part of a block by a new one. The new part can be smaller or
+ * larger than the old one. This function works for any kind of block with
+ * attached data. It returns the new block on success, otherwise it returns
+ * NULL.
+ */
+struct htx_blk *htx_replace_blk_value(struct htx *htx, struct htx_blk *blk,
+ const struct ist old, const struct ist new)
+{
+ struct ist n, v;
+ int32_t delta;
+ int ret;
+
+ n = htx_get_blk_name(htx, blk);
+ v = htx_get_blk_value(htx, blk);
+ delta = new.len - old.len;
+ ret = htx_prepare_blk_expansion(htx, blk, delta);
+ if (!ret)
+ return NULL; /* not enough space */
+
+ if (ret == 1) { /* Replace in place */
+ if (delta <= 0) {
+ /* compression: copy new data first then move the end */
+ memcpy(old.ptr, new.ptr, new.len);
+ memmove(old.ptr + new.len, istend(old),
+ istend(v) - istend(old));
+ }
+ else {
+ /* expansion: move the end first then copy new data */
+ memmove(old.ptr + new.len, istend(old),
+ istend(v) - istend(old));
+ memcpy(old.ptr, new.ptr, new.len);
+ }
+
+ /* set the new block size and update HTX message */
+ htx_set_blk_value_len(blk, v.len + delta);
+ htx->data += delta;
+ }
+ else if (ret == 2) { /* New address but no defrag */
+ void *ptr = htx_get_blk_ptr(htx, blk);
+
+ /* Copy the name, if any */
+ memcpy(ptr, n.ptr, n.len);
+ ptr += n.len;
+
+ /* Copy value before old part, if any */
+ memcpy(ptr, v.ptr, old.ptr - v.ptr);
+ ptr += old.ptr - v.ptr;
+
+ /* Copy new value */
+ memcpy(ptr, new.ptr, new.len);
+ ptr += new.len;
+
+ /* Copy value after old part, if any */
+ memcpy(ptr, istend(old), istend(v) - istend(old));
+
+ /* set the new block size and update HTX message */
+ htx_set_blk_value_len(blk, v.len + delta);
+ htx->data += delta;
+ }
+ else { /* Do a degrag first (it is always an expansion) */
+ struct htx_blk tmpblk;
+ int32_t offset;
+
+ /* use tmpblk to set new block size before defrag and to compute
+ * the offset after defrag
+ */
+ tmpblk.addr = blk->addr;
+ tmpblk.info = blk->info;
+ htx_set_blk_value_len(&tmpblk, v.len + delta);
+
+ /* htx_defrag() will take care to update the block size and the htx message */
+ blk = htx_defrag(htx, blk, tmpblk.info);
+
+ /* newblk is now the new HTX block. Compute the offset to copy/move payload */
+ offset = blk->addr - tmpblk.addr;
+
+ /* move the end first and copy new data
+ */
+ memmove(old.ptr + offset + new.len, old.ptr + offset + old.len,
+ istend(v) - istend(old));
+ memcpy(old.ptr + offset, new.ptr, new.len);
+ }
+ return blk;
+}
+
+/* Transfer HTX blocks from <src> to <dst>, stopping on the first block of the
+ * type <mark> (typically EOH or EOT) or when <count> bytes were moved
+ * (including payload and meta-data). It returns the number of bytes moved and
+ * the last HTX block inserted in <dst>.
+ */
+struct htx_ret htx_xfer_blks(struct htx *dst, struct htx *src, uint32_t count,
+ enum htx_blk_type mark)
+{
+ struct htx_blk *blk, *dstblk;
+ struct htx_blk *srcref, *dstref;
+ enum htx_blk_type type;
+ uint32_t info, max, sz, ret;
+
+ ret = htx_used_space(dst);
+ srcref = dstref = dstblk = NULL;
+
+ /* blocks are not removed yet from <src> HTX message to be able to
+ * rollback the transfer if all the headers/trailers are not copied.
+ */
+ for (blk = htx_get_head_blk(src); blk && count; blk = htx_get_next_blk(src, blk)) {
+ type = htx_get_blk_type(blk);
+
+ /* Ignore unused block */
+ if (type == HTX_BLK_UNUSED)
+ continue;
+
+
+ max = htx_get_max_blksz(dst, count);
+ if (!max)
+ break;
+
+ sz = htx_get_blksz(blk);
+ info = blk->info;
+ if (sz > max) {
+ /* Only DATA blocks can be partially xferred */
+ if (type != HTX_BLK_DATA)
+ break;
+ sz = max;
+ info = (type << 28) + sz;
+ }
+
+ dstblk = htx_reserve_nxblk(dst, sz);
+ if (!dstblk)
+ break;
+ dstblk->info = info;
+ memcpy(htx_get_blk_ptr(dst, dstblk), htx_get_blk_ptr(src, blk), sz);
+
+ count -= sizeof(dstblk) + sz;
+ if (blk->info != info) {
+ /* Partial xfer: don't remove <blk> from <src> but
+ * resize its content */
+ htx_cut_data_blk(src, blk, sz);
+ break;
+ }
+
+ if (type == mark) {
+ blk = htx_get_next_blk(src, blk);
+ srcref = dstref = NULL;
+ break;
+ }
+
+ /* Save <blk> to <srcref> and <dstblk> to <dstref> when we start
+ * to xfer headers or trailers. When EOH/EOT block is reached,
+ * both are reset. It is mandatory to be able to rollback a
+ * partial transfer.
+ */
+ if (!srcref && !dstref &&
+ (type == HTX_BLK_REQ_SL || type == HTX_BLK_RES_SL || type == HTX_BLK_TLR)) {
+ srcref = blk;
+ dstref = dstblk;
+ }
+ else if (type == HTX_BLK_EOH || type == HTX_BLK_EOT)
+ srcref = dstref = NULL;
+ }
+
+ if (unlikely(dstref)) {
+ /* Headers or trailers part was partially xferred, so rollback
+ * the copy by removing all block between <dstref> and <dstblk>,
+ * both included. <dstblk> may be NULL.
+ */
+ while (dstref && dstref != dstblk)
+ dstref = htx_remove_blk(dst, dstref);
+ if (dstblk)
+ htx_remove_blk(dst, dstblk);
+
+ /* <dst> HTX message is empty, it means the headers or trailers
+ * part is too big to be copied at once.
+ */
+ if (htx_is_empty(dst))
+ src->flags |= HTX_FL_PARSING_ERROR;
+ }
+
+ /* Now, remove xferred blocks from <src> htx message */
+ if (!blk && !srcref) {
+ /* End of src reached, all blocks were consumed, drain all data */
+ htx_drain(src, src->data);
+ }
+ else {
+ /* Remove all block from the head to <blk>, or <srcref> if defined, excluded */
+ srcref = (srcref ? srcref : blk);
+ for (blk = htx_get_head_blk(src); blk && blk != srcref; blk = htx_remove_blk(src, blk));
+ }
+
+ end:
+ ret = htx_used_space(dst) - ret;
+ return (struct htx_ret){.ret = ret, .blk = dstblk};
+}
+
+/* Replaces an header by a new one. The new header can be smaller or larger than
+ * the old one. It returns the new block on success, otherwise it returns NULL.
+ * The header name is always lower cased.
+ */
+struct htx_blk *htx_replace_header(struct htx *htx, struct htx_blk *blk,
+ const struct ist name, const struct ist value)
+{
+ enum htx_blk_type type;
+ void *ptr;
+ int32_t delta;
+ int ret;
+
+ type = htx_get_blk_type(blk);
+ if (type != HTX_BLK_HDR)
+ return NULL;
+
+ delta = name.len + value.len - htx_get_blksz(blk);
+ ret = htx_prepare_blk_expansion(htx, blk, delta);
+ if (!ret)
+ return NULL; /* not enough space */
+
+
+ /* Replace in place or at a new address is the same. We replace all the
+ * header (name+value). Only take care to defrag the message if
+ * necessary. */
+ if (ret == 3)
+ blk = htx_defrag(htx, blk, (type << 28) + (value.len << 8) + name.len);
+ else {
+ /* Set the new block size and update HTX message */
+ blk->info = (type << 28) + (value.len << 8) + name.len;
+ htx->data += delta;
+ }
+
+ /* Finally, copy data. */
+ ptr = htx_get_blk_ptr(htx, blk);
+ ist2bin_lc(ptr, name);
+ memcpy(ptr + name.len, value.ptr, value.len);
+ return blk;
+}
+
+/* Replaces the parts of the start-line. It returns the new start-line on
+ * success, otherwise it returns NULL. It is the caller responsibility to update
+ * sl->info, if necessary.
+ */
+struct htx_sl *htx_replace_stline(struct htx *htx, struct htx_blk *blk, const struct ist p1,
+ const struct ist p2, const struct ist p3)
+{
+ enum htx_blk_type type;
+ struct htx_sl *sl;
+ struct htx_sl tmp; /* used to save sl->info and sl->flags */
+ uint32_t sz;
+ int32_t delta;
+ int ret;
+
+ type = htx_get_blk_type(blk);
+ if (type != HTX_BLK_REQ_SL && type != HTX_BLK_RES_SL)
+ return NULL;
+
+ /* Save start-line info and flags */
+ sl = htx_get_blk_ptr(htx, blk);
+ tmp.info = sl->info;
+ tmp.flags = sl->flags;
+
+ sz = htx_get_blksz(blk);
+ delta = sizeof(*sl) + p1.len + p2.len + p3.len - sz;
+ ret = htx_prepare_blk_expansion(htx, blk, delta);
+ if (!ret)
+ return NULL; /* not enough space */
+
+ /* Replace in place or at a new address is the same. We replace all the
+ * start-line. Only take care to defrag the message if necessary. */
+ if (ret == 3) {
+ blk = htx_defrag(htx, blk, (type << 28) + sz + delta);
+ }
+ else {
+ /* Set the new block size and update HTX message */
+ blk->info = (type << 28) + sz + delta;
+ htx->data += delta;
+ }
+
+ /* Restore start-line info and flags and copy parts of the start-line */
+ sl = htx_get_blk_ptr(htx, blk);
+ sl->info = tmp.info;
+ sl->flags = tmp.flags;
+
+ HTX_SL_P1_LEN(sl) = p1.len;
+ HTX_SL_P2_LEN(sl) = p2.len;
+ HTX_SL_P3_LEN(sl) = p3.len;
+
+ memcpy(HTX_SL_P1_PTR(sl), p1.ptr, p1.len);
+ memcpy(HTX_SL_P2_PTR(sl), p2.ptr, p2.len);
+ memcpy(HTX_SL_P3_PTR(sl), p3.ptr, p3.len);
+
+ return sl;
+}
+
+/* Reserves the maximum possible size for an HTX data block, by extending an
+ * existing one or by creating a now one. It returns a compound result with the
+ * HTX block and the position where new data must be inserted (0 for a new
+ * block). If an error occurs or if there is no space left, NULL is returned
+ * instead of a pointer on an HTX block.
+ */
+struct htx_ret htx_reserve_max_data(struct htx *htx)
+{
+ struct htx_blk *blk, *tailblk;
+ uint32_t sz, room;
+ int32_t len = htx_free_data_space(htx);
+
+ if (htx->head == -1)
+ goto rsv_new_block;
+
+ if (!len)
+ return (struct htx_ret){.ret = 0, .blk = NULL};
+
+ /* get the tail and head block */
+ tailblk = htx_get_tail_blk(htx);
+ if (tailblk == NULL)
+ goto rsv_new_block;
+ sz = htx_get_blksz(tailblk);
+
+ /* Don't try to append data if the last inserted block is not of the
+ * same type */
+ if (htx_get_blk_type(tailblk) != HTX_BLK_DATA)
+ goto rsv_new_block;
+
+ /*
+ * Same type and enough space: append data
+ */
+ if (!htx->head_addr) {
+ if (tailblk->addr+sz != htx->tail_addr)
+ goto rsv_new_block;
+ room = (htx_pos_to_addr(htx, htx->tail) - htx->tail_addr);
+ }
+ else {
+ if (tailblk->addr+sz != htx->head_addr)
+ goto rsv_new_block;
+ room = (htx->end_addr - htx->head_addr);
+ }
+ BUG_ON((int32_t)room < 0);
+ if (room < len)
+ len = room;
+
+ append_data:
+ htx_change_blk_value_len(htx, tailblk, sz+len);
+
+ BUG_ON((int32_t)htx->tail_addr < 0);
+ BUG_ON((int32_t)htx->head_addr < 0);
+ BUG_ON(htx->end_addr > htx->tail_addr);
+ BUG_ON(htx->head_addr > htx->end_addr);
+ return (struct htx_ret){.ret = sz, .blk = tailblk};
+
+ rsv_new_block:
+ blk = htx_add_blk(htx, HTX_BLK_DATA, len);
+ if (!blk)
+ return (struct htx_ret){.ret = 0, .blk = NULL};
+ blk->info += len;
+ return (struct htx_ret){.ret = 0, .blk = blk};
+}
+
+/* Adds an HTX block of type DATA in <htx>. It first tries to append data if
+ * possible. It returns the number of bytes consumed from <data>, which may be
+ * zero if nothing could be copied.
+ */
+size_t htx_add_data(struct htx *htx, const struct ist data)
+{
+ struct htx_blk *blk, *tailblk;
+ void *ptr;
+ uint32_t sz, room;
+ int32_t len = data.len;
+
+ /* Not enough space to store data */
+ if (len > htx_free_data_space(htx))
+ len = htx_free_data_space(htx);
+
+ if (!len)
+ return 0;
+
+ if (htx->head == -1)
+ goto add_new_block;
+
+ /* get the tail and head block */
+ tailblk = htx_get_tail_blk(htx);
+ if (tailblk == NULL)
+ goto add_new_block;
+ sz = htx_get_blksz(tailblk);
+
+ /* Don't try to append data if the last inserted block is not of the
+ * same type */
+ if (htx_get_blk_type(tailblk) != HTX_BLK_DATA)
+ goto add_new_block;
+
+ /*
+ * Same type and enough space: append data
+ */
+ if (!htx->head_addr) {
+ if (tailblk->addr+sz != htx->tail_addr)
+ goto add_new_block;
+ room = (htx_pos_to_addr(htx, htx->tail) - htx->tail_addr);
+ }
+ else {
+ if (tailblk->addr+sz != htx->head_addr)
+ goto add_new_block;
+ room = (htx->end_addr - htx->head_addr);
+ }
+ BUG_ON((int32_t)room < 0);
+ if (room < len)
+ len = room;
+
+ append_data:
+ /* Append data and update the block itself */
+ ptr = htx_get_blk_ptr(htx, tailblk);
+ memcpy(ptr + sz, data.ptr, len);
+ htx_change_blk_value_len(htx, tailblk, sz+len);
+
+ BUG_ON((int32_t)htx->tail_addr < 0);
+ BUG_ON((int32_t)htx->head_addr < 0);
+ BUG_ON(htx->end_addr > htx->tail_addr);
+ BUG_ON(htx->head_addr > htx->end_addr);
+ return len;
+
+ add_new_block:
+ blk = htx_add_blk(htx, HTX_BLK_DATA, len);
+ if (!blk)
+ return 0;
+
+ blk->info += len;
+ memcpy(htx_get_blk_ptr(htx, blk), data.ptr, len);
+ return len;
+}
+
+
+/* Adds an HTX block of type DATA in <htx> just after all other DATA
+ * blocks. Because it relies on htx_add_data_atonce(), It may be happened to a
+ * DATA block if possible. But, if the function succeeds, it will be the last
+ * DATA block in all cases. If an error occurred, NULL is returned. Otherwise,
+ * on success, the updated block (or the new one) is returned.
+ */
+struct htx_blk *htx_add_last_data(struct htx *htx, struct ist data)
+{
+ struct htx_blk *blk, *pblk;
+
+ blk = htx_add_data_atonce(htx, data);
+ if (!blk)
+ return NULL;
+
+ for (pblk = htx_get_prev_blk(htx, blk); pblk; pblk = htx_get_prev_blk(htx, pblk)) {
+ if (htx_get_blk_type(pblk) <= HTX_BLK_DATA)
+ break;
+
+ /* Swap .addr and .info fields */
+ blk->addr ^= pblk->addr; pblk->addr ^= blk->addr; blk->addr ^= pblk->addr;
+ blk->info ^= pblk->info; pblk->info ^= blk->info; blk->info ^= pblk->info;
+
+ if (blk->addr == pblk->addr)
+ blk->addr += htx_get_blksz(pblk);
+ blk = pblk;
+ }
+
+ return blk;
+}
+
+/* Moves the block <blk> just before the block <ref>. Both blocks must be in the
+ * HTX message <htx> and <blk> must be placed after <ref>. pointer to these
+ * blocks are updated to remain valid after the move. */
+void htx_move_blk_before(struct htx *htx, struct htx_blk **blk, struct htx_blk **ref)
+{
+ struct htx_blk *cblk, *pblk;
+
+ cblk = *blk;
+ for (pblk = htx_get_prev_blk(htx, cblk); pblk; pblk = htx_get_prev_blk(htx, pblk)) {
+ /* Swap .addr and .info fields */
+ cblk->addr ^= pblk->addr; pblk->addr ^= cblk->addr; cblk->addr ^= pblk->addr;
+ cblk->info ^= pblk->info; pblk->info ^= cblk->info; cblk->info ^= pblk->info;
+
+ if (cblk->addr == pblk->addr)
+ cblk->addr += htx_get_blksz(pblk);
+ if (pblk == *ref)
+ break;
+ cblk = pblk;
+ }
+ *blk = cblk;
+ *ref = pblk;
+}
+
+/* Append the HTX message <src> to the HTX message <dst>. It returns 1 on
+ * success and 0 on error. All the message or nothing is copied. If an error
+ * occurred, all blocks from <src> already appended to <dst> are truncated.
+ */
+int htx_append_msg(struct htx *dst, const struct htx *src)
+{
+ struct htx_blk *blk, *newblk;
+ enum htx_blk_type type;
+ uint32_t blksz, offset = dst->data;
+
+ for (blk = htx_get_head_blk(src); blk; blk = htx_get_next_blk(src, blk)) {
+ type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_UNUSED)
+ continue;
+
+ blksz = htx_get_blksz(blk);
+ newblk = htx_add_blk(dst, type, blksz);
+ if (!newblk)
+ goto error;
+ newblk->info = blk->info;
+ memcpy(htx_get_blk_ptr(dst, newblk), htx_get_blk_ptr(src, blk), blksz);
+ }
+
+ return 1;
+
+ error:
+ htx_truncate(dst, offset);
+ return 0;
+}
diff --git a/src/init.c b/src/init.c
new file mode 100644
index 0000000..6367ac5
--- /dev/null
+++ b/src/init.c
@@ -0,0 +1,249 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <haproxy/init.h>
+#include <haproxy/list.h>
+
+/* These functions are called just before a config validity check, which mean
+ * they are suited to use them in case we need to generate part of the
+ * configuration. It could be used for example to generate a proxy with
+ * multiple servers using the configuration parser itself. At this step the
+ * trash buffers are allocated.
+ * The functions must return 0 on success, or a combination
+ * of ERR_* flags (ERR_WARN, ERR_ABORT, ERR_FATAL, ...). The 2 latter cause
+ * and immediate exit, so the function must have emitted any useful error.
+ */
+struct list pre_check_list = LIST_HEAD_INIT(pre_check_list);
+
+/* These functions are called just after the point where the program exits
+ * after a config validity check, so they are generally suited for resource
+ * allocation and slow initializations that should be skipped during basic
+ * config checks. The functions must return 0 on success, or a combination
+ * of ERR_* flags (ERR_WARN, ERR_ABORT, ERR_FATAL, ...). The 2 latter cause
+ * and immediate exit, so the function must have emitted any useful error.
+ */
+struct list post_check_list = LIST_HEAD_INIT(post_check_list);
+
+/* These functions are called for each proxy just after the config validity
+ * check. The functions must return 0 on success, or a combination of ERR_*
+ * flags (ERR_WARN, ERR_ABORT, ERR_FATAL, ...). The 2 latter cause and immediate
+ * exit, so the function must have emitted any useful error.
+ */
+struct list post_proxy_check_list = LIST_HEAD_INIT(post_proxy_check_list);
+
+/* These functions are called for each server just after the config validity
+ * check. The functions must return 0 on success, or a combination of ERR_*
+ * flags (ERR_WARN, ERR_ABORT, ERR_FATAL, ...). The 2 latter cause and immediate
+ * exit, so the function must have emitted any useful error.
+ */
+struct list post_server_check_list = LIST_HEAD_INIT(post_server_check_list);
+
+/* These functions are called for each thread just after the thread creation
+ * and before running the init functions. They should be used to do per-thread
+ * (re-)allocations that are needed by subsequent functoins. They must return 0
+ * if an error occurred. */
+struct list per_thread_alloc_list = LIST_HEAD_INIT(per_thread_alloc_list);
+
+/* These functions are called for each thread just after the thread creation
+ * and before running the scheduler. They should be used to do per-thread
+ * initializations. They must return 0 if an error occurred. */
+struct list per_thread_init_list = LIST_HEAD_INIT(per_thread_init_list);
+
+/* These functions are called when freeing the global sections at the end of
+ * deinit, after everything is stopped. They don't return anything. They should
+ * not release shared resources that are possibly used by other deinit
+ * functions, only close/release what is private. Use the per_thread_free_list
+ * to release shared resources.
+ */
+struct list post_deinit_list = LIST_HEAD_INIT(post_deinit_list);
+
+/* These functions are called when freeing a proxy during the deinit, after
+ * everything isg stopped. They don't return anything. They should not release
+ * the proxy itself or any shared resources that are possibly used by other
+ * deinit functions, only close/release what is private.
+ */
+struct list proxy_deinit_list = LIST_HEAD_INIT(proxy_deinit_list);
+
+/* These functions are called when freeing a server during the deinit, after
+ * everything isg stopped. They don't return anything. They should not release
+ * the proxy itself or any shared resources that are possibly used by other
+ * deinit functions, only close/release what is private.
+ */
+struct list server_deinit_list = LIST_HEAD_INIT(server_deinit_list);
+
+/* These functions are called when freeing the global sections at the end of
+ * deinit, after the thread deinit functions, to release unneeded memory
+ * allocations. They don't return anything, and they work in best effort mode
+ * as their sole goal is to make valgrind mostly happy.
+ */
+struct list per_thread_free_list = LIST_HEAD_INIT(per_thread_free_list);
+
+/* These functions are called for each thread just after the scheduler loop and
+ * before exiting the thread. They don't return anything and, as for post-deinit
+ * functions, they work in best effort mode as their sole goal is to make
+ * valgrind mostly happy. */
+struct list per_thread_deinit_list = LIST_HEAD_INIT(per_thread_deinit_list);
+
+/* used to register some initialization functions to call before the checks. */
+void hap_register_pre_check(int (*fct)())
+{
+ struct pre_check_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&pre_check_list, &b->list);
+}
+
+/* used to register some initialization functions to call after the checks. */
+void hap_register_post_check(int (*fct)())
+{
+ struct post_check_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&post_check_list, &b->list);
+}
+
+/* used to register some initialization functions to call for each proxy after
+ * the checks.
+ */
+void hap_register_post_proxy_check(int (*fct)(struct proxy *))
+{
+ struct post_proxy_check_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&post_proxy_check_list, &b->list);
+}
+
+/* used to register some initialization functions to call for each server after
+ * the checks.
+ */
+void hap_register_post_server_check(int (*fct)(struct server *))
+{
+ struct post_server_check_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&post_server_check_list, &b->list);
+}
+
+/* used to register some de-initialization functions to call after everything
+ * has stopped.
+ */
+void hap_register_post_deinit(void (*fct)())
+{
+ struct post_deinit_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&post_deinit_list, &b->list);
+}
+
+/* used to register some per proxy de-initialization functions to call after
+ * everything has stopped.
+ */
+void hap_register_proxy_deinit(void (*fct)(struct proxy *))
+{
+ struct proxy_deinit_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&proxy_deinit_list, &b->list);
+}
+
+/* used to register some per server de-initialization functions to call after
+ * everything has stopped.
+ */
+void hap_register_server_deinit(void (*fct)(struct server *))
+{
+ struct server_deinit_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&server_deinit_list, &b->list);
+}
+
+/* used to register some allocation functions to call for each thread. */
+void hap_register_per_thread_alloc(int (*fct)())
+{
+ struct per_thread_alloc_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&per_thread_alloc_list, &b->list);
+}
+
+/* used to register some initialization functions to call for each thread. */
+void hap_register_per_thread_init(int (*fct)())
+{
+ struct per_thread_init_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&per_thread_init_list, &b->list);
+}
+
+/* used to register some de-initialization functions to call for each thread. */
+void hap_register_per_thread_deinit(void (*fct)())
+{
+ struct per_thread_deinit_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&per_thread_deinit_list, &b->list);
+}
+
+/* used to register some free functions to call for each thread. */
+void hap_register_per_thread_free(void (*fct)())
+{
+ struct per_thread_free_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&per_thread_free_list, &b->list);
+}
diff --git a/src/jwt.c b/src/jwt.c
new file mode 100644
index 0000000..b901588
--- /dev/null
+++ b/src/jwt.c
@@ -0,0 +1,461 @@
+/*
+ * JSON Web Token (JWT) processing
+ *
+ * Copyright 2021 HAProxy Technologies
+ * Remi Tricot-Le Breton <rlebreton@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <import/ebmbtree.h>
+#include <import/ebsttree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/tools.h>
+#include <haproxy/openssl-compat.h>
+#include <haproxy/base64.h>
+#include <haproxy/jwt.h>
+#include <haproxy/buf.h>
+
+
+#ifdef USE_OPENSSL
+/* Tree into which the public certificates used to validate JWTs will be stored. */
+static struct eb_root jwt_cert_tree = EB_ROOT_UNIQUE;
+
+/*
+ * The possible algorithm strings that can be found in a JWS's JOSE header are
+ * defined in section 3.1 of RFC7518.
+ */
+enum jwt_alg jwt_parse_alg(const char *alg_str, unsigned int alg_len)
+{
+ enum jwt_alg alg = JWT_ALG_DEFAULT;
+
+ /* Algorithms are all 5 characters long apart from "none". */
+ if (alg_len < sizeof("HS256")-1) {
+ if (alg_len == sizeof("none")-1 && strcmp("none", alg_str) == 0)
+ alg = JWS_ALG_NONE;
+ return alg;
+ }
+
+ if (alg == JWT_ALG_DEFAULT) {
+ switch(*alg_str++) {
+ case 'H':
+ if (strncmp(alg_str, "S256", alg_len-1) == 0)
+ alg = JWS_ALG_HS256;
+ else if (strncmp(alg_str, "S384", alg_len-1) == 0)
+ alg = JWS_ALG_HS384;
+ else if (strncmp(alg_str, "S512", alg_len-1) == 0)
+ alg = JWS_ALG_HS512;
+ break;
+ case 'R':
+ if (strncmp(alg_str, "S256", alg_len-1) == 0)
+ alg = JWS_ALG_RS256;
+ else if (strncmp(alg_str, "S384", alg_len-1) == 0)
+ alg = JWS_ALG_RS384;
+ else if (strncmp(alg_str, "S512", alg_len-1) == 0)
+ alg = JWS_ALG_RS512;
+ break;
+ case 'E':
+ if (strncmp(alg_str, "S256", alg_len-1) == 0)
+ alg = JWS_ALG_ES256;
+ else if (strncmp(alg_str, "S384", alg_len-1) == 0)
+ alg = JWS_ALG_ES384;
+ else if (strncmp(alg_str, "S512", alg_len-1) == 0)
+ alg = JWS_ALG_ES512;
+ break;
+ case 'P':
+ if (strncmp(alg_str, "S256", alg_len-1) == 0)
+ alg = JWS_ALG_PS256;
+ else if (strncmp(alg_str, "S384", alg_len-1) == 0)
+ alg = JWS_ALG_PS384;
+ else if (strncmp(alg_str, "S512", alg_len-1) == 0)
+ alg = JWS_ALG_PS512;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return alg;
+}
+
+/*
+ * Split a JWT into its separate dot-separated parts.
+ * Since only JWS following the Compact Serialization format are managed for
+ * now, we don't need to manage more than three subparts in the tokens.
+ * See section 3.1 of RFC7515 for more information about JWS Compact
+ * Serialization.
+ * Returns 0 in case of success.
+ */
+int jwt_tokenize(const struct buffer *jwt, struct jwt_item *items, unsigned int *item_num)
+{
+ char *ptr = jwt->area;
+ char *jwt_end = jwt->area + jwt->data;
+ unsigned int index = 0;
+ unsigned int length = 0;
+
+ if (index < *item_num) {
+ items[index].start = ptr;
+ items[index].length = 0;
+ }
+
+ while (index < *item_num && ptr < jwt_end) {
+ if (*ptr++ == '.') {
+ items[index++].length = length;
+
+ if (index == *item_num)
+ return -1;
+ items[index].start = ptr;
+ items[index].length = 0;
+ length = 0;
+ } else
+ ++length;
+ }
+
+ if (index < *item_num)
+ items[index].length = length;
+
+ *item_num = (index+1);
+
+ return (ptr != jwt_end);
+}
+
+/*
+ * Parse a public certificate and insert it into the jwt_cert_tree.
+ * Returns 0 in case of success.
+ */
+int jwt_tree_load_cert(char *path, int pathlen, char **err)
+{
+ int retval = -1;
+ struct jwt_cert_tree_entry *entry = NULL;
+ EVP_PKEY *pkey = NULL;
+ BIO *bio = NULL;
+
+ entry = calloc(1, sizeof(*entry) + pathlen + 1);
+ if (!entry) {
+ memprintf(err, "%sunable to allocate memory (jwt_cert_tree_entry).\n", err && *err ? *err : "");
+ return -1;
+ }
+ memcpy(entry->path, path, pathlen + 1);
+
+ if (ebst_insert(&jwt_cert_tree, &entry->node) != &entry->node) {
+ free(entry);
+ return 0; /* Entry already in the tree */
+ }
+
+ bio = BIO_new(BIO_s_file());
+ if (!bio) {
+ memprintf(err, "%sunable to allocate memory (BIO).\n", err && *err ? *err : "");
+ goto end;
+ }
+
+ if (BIO_read_filename(bio, path) == 1) {
+
+ pkey = PEM_read_bio_PUBKEY(bio, NULL, NULL, NULL);
+
+ if (!pkey) {
+ memprintf(err, "%sfile not found (%s)\n", err && *err ? *err : "", path);
+ goto end;
+ }
+
+ entry->pkey = pkey;
+ retval = 0;
+ }
+
+end:
+ if (retval) {
+ /* Some error happened during pkey parsing, remove the already
+ * inserted node from the tree and free it.
+ */
+ ebmb_delete(&entry->node);
+ free(entry);
+ }
+ BIO_free(bio);
+ return retval;
+}
+
+/*
+ * Calculate the HMAC signature of a specific JWT and check that it matches the
+ * one included in the token.
+ * Returns 1 in case of success.
+ */
+static enum jwt_vrfy_status
+jwt_jwsverify_hmac(const struct jwt_ctx *ctx, const struct buffer *decoded_signature)
+{
+ const EVP_MD *evp = NULL;
+ unsigned char signature[EVP_MAX_MD_SIZE];
+ unsigned int signature_length = 0;
+ unsigned char *hmac_res = NULL;
+ enum jwt_vrfy_status retval = JWT_VRFY_KO;
+
+ switch(ctx->alg) {
+ case JWS_ALG_HS256:
+ evp = EVP_sha256();
+ break;
+ case JWS_ALG_HS384:
+ evp = EVP_sha384();
+ break;
+ case JWS_ALG_HS512:
+ evp = EVP_sha512();
+ break;
+ default: break;
+ }
+
+ hmac_res = HMAC(evp, ctx->key, ctx->key_length, (const unsigned char*)ctx->jose.start,
+ ctx->jose.length + ctx->claims.length + 1, signature, &signature_length);
+
+ if (hmac_res && signature_length == decoded_signature->data &&
+ (CRYPTO_memcmp(decoded_signature->area, signature, signature_length) == 0))
+ retval = JWT_VRFY_OK;
+
+ return retval;
+}
+
+/*
+ * Convert a JWT ECDSA signature (R and S parameters concatenatedi, see section
+ * 3.4 of RFC7518) into an ECDSA_SIG that can be fed back into OpenSSL's digest
+ * verification functions.
+ * Returns 0 in case of success.
+ */
+static int convert_ecdsa_sig(const struct jwt_ctx *ctx, EVP_PKEY *pkey, struct buffer *signature)
+{
+ int retval = 0;
+ ECDSA_SIG *ecdsa_sig = NULL;
+ BIGNUM *ec_R = NULL, *ec_S = NULL;
+ unsigned int bignum_len;
+ unsigned char *p;
+
+ ecdsa_sig = ECDSA_SIG_new();
+ if (!ecdsa_sig) {
+ retval = JWT_VRFY_OUT_OF_MEMORY;
+ goto end;
+ }
+
+ if (b_data(signature) % 2) {
+ retval = JWT_VRFY_INVALID_TOKEN;
+ goto end;
+ }
+
+ bignum_len = b_data(signature) / 2;
+
+ ec_R = BN_bin2bn((unsigned char*)b_orig(signature), bignum_len, NULL);
+ ec_S = BN_bin2bn((unsigned char *)(b_orig(signature) + bignum_len), bignum_len, NULL);
+
+ if (!ec_R || !ec_S) {
+ retval = JWT_VRFY_INVALID_TOKEN;
+ goto end;
+ }
+
+ /* Build ecdsa out of R and S values. */
+ ECDSA_SIG_set0(ecdsa_sig, ec_R, ec_S);
+
+ p = (unsigned char*)signature->area;
+
+ signature->data = i2d_ECDSA_SIG(ecdsa_sig, &p);
+ if (signature->data == 0) {
+ retval = JWT_VRFY_INVALID_TOKEN;
+ goto end;
+ }
+
+end:
+ ECDSA_SIG_free(ecdsa_sig);
+ return retval;
+}
+
+/*
+ * Check that the signature included in a JWT signed via RSA or ECDSA is valid
+ * and can be verified thanks to a given public certificate.
+ * Returns 1 in case of success.
+ */
+static enum jwt_vrfy_status
+jwt_jwsverify_rsa_ecdsa(const struct jwt_ctx *ctx, struct buffer *decoded_signature)
+{
+ const EVP_MD *evp = NULL;
+ EVP_MD_CTX *evp_md_ctx;
+ enum jwt_vrfy_status retval = JWT_VRFY_KO;
+ struct ebmb_node *eb;
+ struct jwt_cert_tree_entry *entry = NULL;
+ int is_ecdsa = 0;
+
+ switch(ctx->alg) {
+ case JWS_ALG_RS256:
+ evp = EVP_sha256();
+ break;
+ case JWS_ALG_RS384:
+ evp = EVP_sha384();
+ break;
+ case JWS_ALG_RS512:
+ evp = EVP_sha512();
+ break;
+
+ case JWS_ALG_ES256:
+ evp = EVP_sha256();
+ is_ecdsa = 1;
+ break;
+ case JWS_ALG_ES384:
+ evp = EVP_sha384();
+ is_ecdsa = 1;
+ break;
+ case JWS_ALG_ES512:
+ evp = EVP_sha512();
+ is_ecdsa = 1;
+ break;
+ default: break;
+ }
+
+ evp_md_ctx = EVP_MD_CTX_new();
+ if (!evp_md_ctx)
+ return JWT_VRFY_OUT_OF_MEMORY;
+
+ eb = ebst_lookup(&jwt_cert_tree, ctx->key);
+
+ if (!eb) {
+ retval = JWT_VRFY_UNKNOWN_CERT;
+ goto end;
+ }
+
+ entry = ebmb_entry(eb, struct jwt_cert_tree_entry, node);
+
+ if (!entry->pkey) {
+ retval = JWT_VRFY_UNKNOWN_CERT;
+ goto end;
+ }
+
+ /*
+ * ECXXX signatures are a direct concatenation of the (R, S) pair and
+ * need to be converted back to asn.1 in order for verify operations to
+ * work with OpenSSL.
+ */
+ if (is_ecdsa) {
+ int conv_retval = convert_ecdsa_sig(ctx, entry->pkey, decoded_signature);
+ if (conv_retval != 0) {
+ retval = conv_retval;
+ goto end;
+ }
+ }
+
+ if (EVP_DigestVerifyInit(evp_md_ctx, NULL, evp, NULL, entry->pkey) == 1 &&
+ EVP_DigestVerifyUpdate(evp_md_ctx, (const unsigned char*)ctx->jose.start,
+ ctx->jose.length + ctx->claims.length + 1) == 1 &&
+ EVP_DigestVerifyFinal(evp_md_ctx, (const unsigned char*)decoded_signature->area, decoded_signature->data) == 1) {
+ retval = JWT_VRFY_OK;
+ }
+
+end:
+ EVP_MD_CTX_free(evp_md_ctx);
+ return retval;
+}
+
+/*
+ * Check that the <token> that was signed via algorithm <alg> using the <key>
+ * (either an HMAC secret or the path to a public certificate) has a valid
+ * signature.
+ * Returns 1 in case of success.
+ */
+enum jwt_vrfy_status jwt_verify(const struct buffer *token, const struct buffer *alg,
+ const struct buffer *key)
+{
+ struct jwt_item items[JWT_ELT_MAX] = { { 0 } };
+ unsigned int item_num = JWT_ELT_MAX;
+ struct buffer *decoded_sig = NULL;
+ struct jwt_ctx ctx = {};
+ enum jwt_vrfy_status retval = JWT_VRFY_KO;
+ int ret;
+
+ ctx.alg = jwt_parse_alg(alg->area, alg->data);
+
+ if (ctx.alg == JWT_ALG_DEFAULT)
+ return JWT_VRFY_UNKNOWN_ALG;
+
+ if (jwt_tokenize(token, items, &item_num))
+ return JWT_VRFY_INVALID_TOKEN;
+
+ if (item_num != JWT_ELT_MAX)
+ if (ctx.alg != JWS_ALG_NONE || item_num != JWT_ELT_SIG)
+ return JWT_VRFY_INVALID_TOKEN;
+
+ ctx.jose = items[JWT_ELT_JOSE];
+ ctx.claims = items[JWT_ELT_CLAIMS];
+ ctx.signature = items[JWT_ELT_SIG];
+
+ /* "alg" is "none", the signature must be empty for the JWS to be valid. */
+ if (ctx.alg == JWS_ALG_NONE) {
+ return (ctx.signature.length == 0) ? JWT_VRFY_OK : JWT_VRFY_KO;
+ }
+
+ if (ctx.signature.length == 0)
+ return JWT_VRFY_INVALID_TOKEN;
+
+ decoded_sig = alloc_trash_chunk();
+ if (!decoded_sig)
+ return JWT_VRFY_OUT_OF_MEMORY;
+
+ ret = base64urldec(ctx.signature.start, ctx.signature.length,
+ decoded_sig->area, decoded_sig->size);
+ if (ret == -1) {
+ retval = JWT_VRFY_INVALID_TOKEN;
+ goto end;
+ }
+
+ decoded_sig->data = ret;
+ ctx.key = key->area;
+ ctx.key_length = key->data;
+
+ /* We have all three sections, signature calculation can begin. */
+
+ switch(ctx.alg) {
+
+ case JWS_ALG_HS256:
+ case JWS_ALG_HS384:
+ case JWS_ALG_HS512:
+ /* HMAC + SHA-XXX */
+ retval = jwt_jwsverify_hmac(&ctx, decoded_sig);
+ break;
+ case JWS_ALG_RS256:
+ case JWS_ALG_RS384:
+ case JWS_ALG_RS512:
+ case JWS_ALG_ES256:
+ case JWS_ALG_ES384:
+ case JWS_ALG_ES512:
+ /* RSASSA-PKCS1-v1_5 + SHA-XXX */
+ /* ECDSA using P-XXX and SHA-XXX */
+ retval = jwt_jwsverify_rsa_ecdsa(&ctx, decoded_sig);
+ break;
+ case JWS_ALG_PS256:
+ case JWS_ALG_PS384:
+ case JWS_ALG_PS512:
+ default:
+ /* RSASSA-PSS using SHA-XXX and MGF1 with SHA-XXX */
+
+ /* Not managed yet */
+ retval = JWT_VRFY_UNMANAGED_ALG;
+ break;
+ }
+
+end:
+ free_trash_chunk(decoded_sig);
+
+ return retval;
+}
+
+static void jwt_deinit(void)
+{
+ struct ebmb_node *node = NULL;
+ struct jwt_cert_tree_entry *entry = NULL;
+
+ node = ebmb_first(&jwt_cert_tree);
+ while (node) {
+ entry = ebmb_entry(node, struct jwt_cert_tree_entry, node);
+ ebmb_delete(node);
+ EVP_PKEY_free(entry->pkey);
+ ha_free(&entry);
+ node = ebmb_first(&jwt_cert_tree);
+ }
+}
+REGISTER_POST_DEINIT(jwt_deinit);
+
+
+#endif /* USE_OPENSSL */
diff --git a/src/lb_chash.c b/src/lb_chash.c
new file mode 100644
index 0000000..023219c
--- /dev/null
+++ b/src/lb_chash.c
@@ -0,0 +1,517 @@
+/*
+ * Consistent Hash implementation
+ * Please consult this very well detailed article for more information :
+ * http://www.spiteful.com/2008/03/17/programmers-toolbox-part-3-consistent-hashing/
+ *
+ * Our implementation has to support both weighted hashing and weighted round
+ * robin because we'll use it to replace the previous map-based implementation
+ * which offered both algorithms.
+ *
+ * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/eb32tree.h>
+#include <haproxy/api.h>
+#include <haproxy/backend.h>
+#include <haproxy/errors.h>
+#include <haproxy/queue.h>
+#include <haproxy/server-t.h>
+#include <haproxy/tools.h>
+
+/* Return next tree node after <node> which must still be in the tree, or be
+ * NULL. Lookup wraps around the end to the beginning. If the next node is the
+ * same node, return NULL. This is designed to find a valid next node before
+ * deleting one from the tree.
+ */
+static inline struct eb32_node *chash_skip_node(struct eb_root *root, struct eb32_node *node)
+{
+ struct eb32_node *stop = node;
+
+ if (!node)
+ return NULL;
+ node = eb32_next(node);
+ if (!node)
+ node = eb32_first(root);
+ if (node == stop)
+ return NULL;
+ return node;
+}
+
+/* Remove all of a server's entries from its tree. This may be used when
+ * setting a server down.
+ */
+static inline void chash_dequeue_srv(struct server *s)
+{
+ while (s->lb_nodes_now > 0) {
+ if (s->lb_nodes_now >= s->lb_nodes_tot) // should always be false anyway
+ s->lb_nodes_now = s->lb_nodes_tot;
+ s->lb_nodes_now--;
+ if (s->proxy->lbprm.chash.last == &s->lb_nodes[s->lb_nodes_now].node)
+ s->proxy->lbprm.chash.last = chash_skip_node(s->lb_tree, s->proxy->lbprm.chash.last);
+ eb32_delete(&s->lb_nodes[s->lb_nodes_now].node);
+ }
+}
+
+/* Adjust the number of entries of a server in its tree. The server must appear
+ * as many times as its weight indicates it. If it's there too often, we remove
+ * the last occurrences. If it's not there enough, we add more occurrences. To
+ * remove a server from the tree, normally call this with eweight=0.
+ *
+ * The server's lock and the lbprm's lock must be held.
+ */
+static inline void chash_queue_dequeue_srv(struct server *s)
+{
+ while (s->lb_nodes_now > s->next_eweight) {
+ if (s->lb_nodes_now >= s->lb_nodes_tot) // should always be false anyway
+ s->lb_nodes_now = s->lb_nodes_tot;
+ s->lb_nodes_now--;
+ if (s->proxy->lbprm.chash.last == &s->lb_nodes[s->lb_nodes_now].node)
+ s->proxy->lbprm.chash.last = chash_skip_node(s->lb_tree, s->proxy->lbprm.chash.last);
+ eb32_delete(&s->lb_nodes[s->lb_nodes_now].node);
+ }
+
+ /* Attempt to increase the total number of nodes, if the user
+ * increased the weight beyond the original weight
+ */
+ if (s->lb_nodes_tot < s->next_eweight) {
+ struct tree_occ *new_nodes;
+
+ /* First we need to remove all server's entries from its tree
+ * because the realloc will change all nodes pointers */
+ chash_dequeue_srv(s);
+
+ new_nodes = realloc(s->lb_nodes, s->next_eweight * sizeof(*new_nodes));
+ if (new_nodes) {
+ unsigned int j;
+
+ s->lb_nodes = new_nodes;
+ memset(&s->lb_nodes[s->lb_nodes_tot], 0,
+ (s->next_eweight - s->lb_nodes_tot) * sizeof(*s->lb_nodes));
+ for (j = s->lb_nodes_tot; j < s->next_eweight; j++) {
+ s->lb_nodes[j].server = s;
+ s->lb_nodes[j].node.key = full_hash(s->puid * SRV_EWGHT_RANGE + j);
+ }
+ s->lb_nodes_tot = s->next_eweight;
+ }
+ }
+ while (s->lb_nodes_now < s->next_eweight) {
+ if (s->lb_nodes_now >= s->lb_nodes_tot) // should always be false anyway
+ break;
+ if (s->proxy->lbprm.chash.last == &s->lb_nodes[s->lb_nodes_now].node)
+ s->proxy->lbprm.chash.last = chash_skip_node(s->lb_tree, s->proxy->lbprm.chash.last);
+ eb32_insert(s->lb_tree, &s->lb_nodes[s->lb_nodes_now].node);
+ s->lb_nodes_now++;
+ }
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to down.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely down (the caller may not
+ * know all the variables of a server's state).
+ *
+ * The server's lock must be held. The lbprm lock will be used.
+ */
+static void chash_set_server_status_down(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (srv_willbe_usable(srv))
+ goto out_update_state;
+
+ if (!srv_currently_usable(srv))
+ /* server was already down */
+ goto out_update_backend;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck -= srv->cur_eweight;
+ p->srv_bck--;
+
+ if (srv == p->lbprm.fbck) {
+ /* we lost the first backup server in a single-backup
+ * configuration, we must search another one.
+ */
+ struct server *srv2 = p->lbprm.fbck;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 &&
+ !((srv2->flags & SRV_F_BACKUP) &&
+ srv_willbe_usable(srv2)));
+ p->lbprm.fbck = srv2;
+ }
+ } else {
+ p->lbprm.tot_wact -= srv->cur_eweight;
+ p->srv_act--;
+ }
+
+ chash_dequeue_srv(srv);
+
+out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ out_update_state:
+ srv_lb_commit_status(srv);
+
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to up.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely UP (the caller may not
+ * know all the variables of a server's state). This function will not change
+ * the weight of a server which was already up.
+ *
+ * The server's lock must be held. The lbprm lock will be used.
+ */
+static void chash_set_server_status_up(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (!srv_willbe_usable(srv))
+ goto out_update_state;
+
+ if (srv_currently_usable(srv))
+ /* server was already up */
+ goto out_update_backend;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck += srv->next_eweight;
+ p->srv_bck++;
+
+ if (!(p->options & PR_O_USE_ALL_BK)) {
+ if (!p->lbprm.fbck) {
+ /* there was no backup server anymore */
+ p->lbprm.fbck = srv;
+ } else {
+ /* we may have restored a backup server prior to fbck,
+ * in which case it should replace it.
+ */
+ struct server *srv2 = srv;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 && (srv2 != p->lbprm.fbck));
+ if (srv2)
+ p->lbprm.fbck = srv;
+ }
+ }
+ } else {
+ p->lbprm.tot_wact += srv->next_eweight;
+ p->srv_act++;
+ }
+
+ /* note that eweight cannot be 0 here */
+ chash_queue_dequeue_srv(srv);
+
+ out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ out_update_state:
+ srv_lb_commit_status(srv);
+
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+}
+
+/* This function must be called after an update to server <srv>'s effective
+ * weight. It may be called after a state change too.
+ *
+ * The server's lock must be held. The lbprm lock may be used.
+ */
+static void chash_update_server_weight(struct server *srv)
+{
+ int old_state, new_state;
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ /* If changing the server's weight changes its state, we simply apply
+ * the procedures we already have for status change. If the state
+ * remains down, the server is not in any tree, so it's as easy as
+ * updating its values. If the state remains up with different weights,
+ * there are some computations to perform to find a new place and
+ * possibly a new tree for this server.
+ */
+
+ old_state = srv_currently_usable(srv);
+ new_state = srv_willbe_usable(srv);
+
+ if (!old_state && !new_state) {
+ srv_lb_commit_status(srv);
+ return;
+ }
+ else if (!old_state && new_state) {
+ chash_set_server_status_up(srv);
+ return;
+ }
+ else if (old_state && !new_state) {
+ chash_set_server_status_down(srv);
+ return;
+ }
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ /* only adjust the server's presence in the tree */
+ chash_queue_dequeue_srv(srv);
+
+ if (srv->flags & SRV_F_BACKUP)
+ p->lbprm.tot_wbck += srv->next_eweight - srv->cur_eweight;
+ else
+ p->lbprm.tot_wact += srv->next_eweight - srv->cur_eweight;
+
+ update_backend_weight(p);
+ srv_lb_commit_status(srv);
+
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+}
+
+/*
+ * This function implements the "Consistent Hashing with Bounded Loads" algorithm
+ * of Mirrokni, Thorup, and Zadimoghaddam (arxiv:1608.01350), adapted for use with
+ * unequal server weights.
+ */
+int chash_server_is_eligible(struct server *s)
+{
+ /* The total number of slots to allocate is the total number of outstanding requests
+ * (including the one we're about to make) times the load-balance-factor, rounded up.
+ */
+ unsigned tot_slots = ((s->proxy->served + 1) * s->proxy->lbprm.hash_balance_factor + 99) / 100;
+ unsigned slots_per_weight = tot_slots / s->proxy->lbprm.tot_weight;
+ unsigned remainder = tot_slots % s->proxy->lbprm.tot_weight;
+
+ /* Allocate a whole number of slots per weight unit... */
+ unsigned slots = s->cur_eweight * slots_per_weight;
+
+ /* And then distribute the rest among servers proportionally to their weight. */
+ slots += ((s->cumulative_weight + s->cur_eweight) * remainder) / s->proxy->lbprm.tot_weight
+ - (s->cumulative_weight * remainder) / s->proxy->lbprm.tot_weight;
+
+ /* But never leave a server with 0. */
+ if (slots == 0)
+ slots = 1;
+
+ return s->served < slots;
+}
+
+/*
+ * This function returns the running server from the CHASH tree, which is at
+ * the closest distance from the value of <hash>. Doing so ensures that even
+ * with a well imbalanced hash, if some servers are close to each other, they
+ * will still both receive traffic. If any server is found, it will be returned.
+ * It will also skip server <avoid> if the hash result ends on this one.
+ * If no valid server is found, NULL is returned.
+ *
+ * The lbprm's lock will be used in R/O mode. The server's lock is not used.
+ */
+struct server *chash_get_server_hash(struct proxy *p, unsigned int hash, const struct server *avoid)
+{
+ struct eb32_node *next, *prev;
+ struct server *nsrv, *psrv;
+ struct eb_root *root;
+ unsigned int dn, dp;
+ int loop;
+
+ HA_RWLOCK_RDLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (p->srv_act)
+ root = &p->lbprm.chash.act;
+ else if (p->lbprm.fbck) {
+ nsrv = p->lbprm.fbck;
+ goto out;
+ }
+ else if (p->srv_bck)
+ root = &p->lbprm.chash.bck;
+ else {
+ nsrv = NULL;
+ goto out;
+ }
+
+ /* find the node after and the node before */
+ next = eb32_lookup_ge(root, hash);
+ if (!next)
+ next = eb32_first(root);
+ if (!next) {
+ nsrv = NULL; /* tree is empty */
+ goto out;
+ }
+
+ prev = eb32_prev(next);
+ if (!prev)
+ prev = eb32_last(root);
+
+ nsrv = eb32_entry(next, struct tree_occ, node)->server;
+ psrv = eb32_entry(prev, struct tree_occ, node)->server;
+
+ /* OK we're located between two servers, let's
+ * compare distances between hash and the two servers
+ * and select the closest server.
+ */
+ dp = hash - prev->key;
+ dn = next->key - hash;
+
+ if (dp <= dn) {
+ next = prev;
+ nsrv = psrv;
+ }
+
+ loop = 0;
+ while (nsrv == avoid || (p->lbprm.hash_balance_factor && !chash_server_is_eligible(nsrv))) {
+ next = eb32_next(next);
+ if (!next) {
+ next = eb32_first(root);
+ if (++loop > 1) // protection against accidental loop
+ break;
+ }
+ nsrv = eb32_entry(next, struct tree_occ, node)->server;
+ }
+
+ out:
+ HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ return nsrv;
+}
+
+/* Return next server from the CHASH tree in backend <p>. If the tree is empty,
+ * return NULL. Saturated servers are skipped.
+ *
+ * The lbprm's lock will be used in R/W mode. The server's lock is not used.
+ */
+struct server *chash_get_next_server(struct proxy *p, struct server *srvtoavoid)
+{
+ struct server *srv, *avoided;
+ struct eb32_node *node, *stop, *avoided_node;
+ struct eb_root *root;
+
+ srv = avoided = NULL;
+ avoided_node = NULL;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ if (p->srv_act)
+ root = &p->lbprm.chash.act;
+ else if (p->lbprm.fbck) {
+ srv = p->lbprm.fbck;
+ goto out;
+ }
+ else if (p->srv_bck)
+ root = &p->lbprm.chash.bck;
+ else {
+ srv = NULL;
+ goto out;
+ }
+
+ stop = node = p->lbprm.chash.last;
+ do {
+ struct server *s;
+
+ if (node)
+ node = eb32_next(node);
+ if (!node)
+ node = eb32_first(root);
+
+ p->lbprm.chash.last = node;
+ if (!node) {
+ /* no node is available */
+ srv = NULL;
+ goto out;
+ }
+
+ /* Note: if we came here after a down/up cycle with no last
+ * pointer, and after a redispatch (srvtoavoid is set), we
+ * must set stop to non-null otherwise we can loop forever.
+ */
+ if (!stop)
+ stop = node;
+
+ /* OK, we have a server. However, it may be saturated, in which
+ * case we don't want to reconsider it for now, so we'll simply
+ * skip it. Same if it's the server we try to avoid, in which
+ * case we simply remember it for later use if needed.
+ */
+ s = eb32_entry(node, struct tree_occ, node)->server;
+ if (!s->maxconn || (!s->queue.length && s->served < srv_dynamic_maxconn(s))) {
+ if (s != srvtoavoid) {
+ srv = s;
+ break;
+ }
+ avoided = s;
+ avoided_node = node;
+ }
+ } while (node != stop);
+
+ if (!srv) {
+ srv = avoided;
+ p->lbprm.chash.last = avoided_node;
+ }
+
+ out:
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ return srv;
+}
+
+/* This function is responsible for building the active and backup trees for
+ * constistent hashing. The servers receive an array of initialized nodes
+ * with their assigned keys. It also sets p->lbprm.wdiv to the eweight to
+ * uweight ratio.
+ * Return 0 in case of success, -1 in case of allocation failure.
+ */
+int chash_init_server_tree(struct proxy *p)
+{
+ struct server *srv;
+ struct eb_root init_head = EB_ROOT;
+ int node;
+
+ p->lbprm.set_server_status_up = chash_set_server_status_up;
+ p->lbprm.set_server_status_down = chash_set_server_status_down;
+ p->lbprm.update_server_eweight = chash_update_server_weight;
+ p->lbprm.server_take_conn = NULL;
+ p->lbprm.server_drop_conn = NULL;
+
+ p->lbprm.wdiv = BE_WEIGHT_SCALE;
+ for (srv = p->srv; srv; srv = srv->next) {
+ srv->next_eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
+ srv_lb_commit_status(srv);
+ }
+
+ recount_servers(p);
+ update_backend_weight(p);
+
+ p->lbprm.chash.act = init_head;
+ p->lbprm.chash.bck = init_head;
+ p->lbprm.chash.last = NULL;
+
+ /* queue active and backup servers in two distinct groups */
+ for (srv = p->srv; srv; srv = srv->next) {
+ srv->lb_tree = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.chash.bck : &p->lbprm.chash.act;
+ srv->lb_nodes_tot = srv->uweight * BE_WEIGHT_SCALE;
+ srv->lb_nodes_now = 0;
+ srv->lb_nodes = calloc(srv->lb_nodes_tot,
+ sizeof(*srv->lb_nodes));
+ if (!srv->lb_nodes) {
+ ha_alert("failed to allocate lb_nodes for server %s.\n", srv->id);
+ return -1;
+ }
+ for (node = 0; node < srv->lb_nodes_tot; node++) {
+ srv->lb_nodes[node].server = srv;
+ srv->lb_nodes[node].node.key = full_hash(srv->puid * SRV_EWGHT_RANGE + node);
+ }
+
+ if (srv_currently_usable(srv))
+ chash_queue_dequeue_srv(srv);
+ }
+ return 0;
+}
diff --git a/src/lb_fas.c b/src/lb_fas.c
new file mode 100644
index 0000000..d90388b
--- /dev/null
+++ b/src/lb_fas.c
@@ -0,0 +1,348 @@
+/*
+ * First Available Server load balancing algorithm.
+ *
+ * This file implements an algorithm which emerged during a discussion with
+ * Steen Larsen, initially inspired from Anshul Gandhi et.al.'s work now
+ * described as "packing" in section 3.5:
+ *
+ * http://reports-archive.adm.cs.cmu.edu/anon/2012/CMU-CS-12-109.pdf
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/eb32tree.h>
+#include <haproxy/api.h>
+#include <haproxy/backend.h>
+#include <haproxy/queue.h>
+#include <haproxy/server-t.h>
+
+
+/* Remove a server from a tree. It must have previously been dequeued. This
+ * function is meant to be called when a server is going down or has its
+ * weight disabled.
+ *
+ * The server's lock and the lbprm's lock must be held.
+ */
+static inline void fas_remove_from_tree(struct server *s)
+{
+ s->lb_tree = NULL;
+}
+
+/* simply removes a server from a tree.
+ *
+ * The lbprm's lock must be held.
+ */
+static inline void fas_dequeue_srv(struct server *s)
+{
+ eb32_delete(&s->lb_node);
+}
+
+/* Queue a server in its associated tree, assuming the weight is >0.
+ * Servers are sorted by unique ID so that we send all connections to the first
+ * available server in declaration order (or ID order) until its maxconn is
+ * reached. It is important to understand that the server weight is not used
+ * here.
+ *
+ * The lbprm's lock must be held.
+ */
+static inline void fas_queue_srv(struct server *s)
+{
+ s->lb_node.key = s->puid;
+ eb32_insert(s->lb_tree, &s->lb_node);
+}
+
+/* Re-position the server in the FS tree after it has been assigned one
+ * connection or after it has released one. Note that it is possible that
+ * the server has been moved out of the tree due to failed health-checks.
+ * The lbprm's lock will be used.
+ */
+static void fas_srv_reposition(struct server *s)
+{
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+ if (s->lb_tree) {
+ fas_dequeue_srv(s);
+ fas_queue_srv(s);
+ }
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to down.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely down (the caller may not
+ * know all the variables of a server's state).
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fas_set_server_status_down(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (srv_willbe_usable(srv))
+ goto out_update_state;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (!srv_currently_usable(srv))
+ /* server was already down */
+ goto out_update_backend;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck -= srv->cur_eweight;
+ p->srv_bck--;
+
+ if (srv == p->lbprm.fbck) {
+ /* we lost the first backup server in a single-backup
+ * configuration, we must search another one.
+ */
+ struct server *srv2 = p->lbprm.fbck;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 &&
+ !((srv2->flags & SRV_F_BACKUP) &&
+ srv_willbe_usable(srv2)));
+ p->lbprm.fbck = srv2;
+ }
+ } else {
+ p->lbprm.tot_wact -= srv->cur_eweight;
+ p->srv_act--;
+ }
+
+ fas_dequeue_srv(srv);
+ fas_remove_from_tree(srv);
+
+ out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to up.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely UP (the caller may not
+ * know all the variables of a server's state). This function will not change
+ * the weight of a server which was already up.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fas_set_server_status_up(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (!srv_willbe_usable(srv))
+ goto out_update_state;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (srv_currently_usable(srv))
+ /* server was already up */
+ goto out_update_backend;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ srv->lb_tree = &p->lbprm.fas.bck;
+ p->lbprm.tot_wbck += srv->next_eweight;
+ p->srv_bck++;
+
+ if (!(p->options & PR_O_USE_ALL_BK)) {
+ if (!p->lbprm.fbck) {
+ /* there was no backup server anymore */
+ p->lbprm.fbck = srv;
+ } else {
+ /* we may have restored a backup server prior to fbck,
+ * in which case it should replace it.
+ */
+ struct server *srv2 = srv;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 && (srv2 != p->lbprm.fbck));
+ if (srv2)
+ p->lbprm.fbck = srv;
+ }
+ }
+ } else {
+ srv->lb_tree = &p->lbprm.fas.act;
+ p->lbprm.tot_wact += srv->next_eweight;
+ p->srv_act++;
+ }
+
+ /* note that eweight cannot be 0 here */
+ fas_queue_srv(srv);
+
+ out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function must be called after an update to server <srv>'s effective
+ * weight. It may be called after a state change too.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fas_update_server_weight(struct server *srv)
+{
+ int old_state, new_state;
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ /* If changing the server's weight changes its state, we simply apply
+ * the procedures we already have for status change. If the state
+ * remains down, the server is not in any tree, so it's as easy as
+ * updating its values. If the state remains up with different weights,
+ * there are some computations to perform to find a new place and
+ * possibly a new tree for this server.
+ */
+
+ old_state = srv_currently_usable(srv);
+ new_state = srv_willbe_usable(srv);
+
+ if (!old_state && !new_state) {
+ srv_lb_commit_status(srv);
+ return;
+ }
+ else if (!old_state && new_state) {
+ fas_set_server_status_up(srv);
+ return;
+ }
+ else if (old_state && !new_state) {
+ fas_set_server_status_down(srv);
+ return;
+ }
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (srv->lb_tree)
+ fas_dequeue_srv(srv);
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck += srv->next_eweight - srv->cur_eweight;
+ srv->lb_tree = &p->lbprm.fas.bck;
+ } else {
+ p->lbprm.tot_wact += srv->next_eweight - srv->cur_eweight;
+ srv->lb_tree = &p->lbprm.fas.act;
+ }
+
+ fas_queue_srv(srv);
+
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ srv_lb_commit_status(srv);
+}
+
+/* This function is responsible for building the trees in case of fast
+ * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
+ * uweight ratio. Both active and backup groups are initialized.
+ */
+void fas_init_server_tree(struct proxy *p)
+{
+ struct server *srv;
+ struct eb_root init_head = EB_ROOT;
+
+ p->lbprm.set_server_status_up = fas_set_server_status_up;
+ p->lbprm.set_server_status_down = fas_set_server_status_down;
+ p->lbprm.update_server_eweight = fas_update_server_weight;
+ p->lbprm.server_take_conn = fas_srv_reposition;
+ p->lbprm.server_drop_conn = fas_srv_reposition;
+
+ p->lbprm.wdiv = BE_WEIGHT_SCALE;
+ for (srv = p->srv; srv; srv = srv->next) {
+ srv->next_eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
+ srv_lb_commit_status(srv);
+ }
+
+ recount_servers(p);
+ update_backend_weight(p);
+
+ p->lbprm.fas.act = init_head;
+ p->lbprm.fas.bck = init_head;
+
+ /* queue active and backup servers in two distinct groups */
+ for (srv = p->srv; srv; srv = srv->next) {
+ if (!srv_currently_usable(srv))
+ continue;
+ srv->lb_tree = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fas.bck : &p->lbprm.fas.act;
+ fas_queue_srv(srv);
+ }
+}
+
+/* Return next server from the FS tree in backend <p>. If the tree is empty,
+ * return NULL. Saturated servers are skipped.
+ *
+ * The lbprm's lock will be used. The server's lock is not used.
+ */
+struct server *fas_get_next_server(struct proxy *p, struct server *srvtoavoid)
+{
+ struct server *srv, *avoided;
+ struct eb32_node *node;
+
+ srv = avoided = NULL;
+
+ HA_RWLOCK_RDLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ if (p->srv_act)
+ node = eb32_first(&p->lbprm.fas.act);
+ else if (p->lbprm.fbck) {
+ srv = p->lbprm.fbck;
+ goto out;
+ }
+ else if (p->srv_bck)
+ node = eb32_first(&p->lbprm.fas.bck);
+ else {
+ srv = NULL;
+ goto out;
+ }
+
+ while (node) {
+ /* OK, we have a server. However, it may be saturated, in which
+ * case we don't want to reconsider it for now, so we'll simply
+ * skip it. Same if it's the server we try to avoid, in which
+ * case we simply remember it for later use if needed.
+ */
+ struct server *s;
+
+ s = eb32_entry(node, struct server, lb_node);
+ if (!s->maxconn || (!s->queue.length && s->served < srv_dynamic_maxconn(s))) {
+ if (s != srvtoavoid) {
+ srv = s;
+ break;
+ }
+ avoided = s;
+ }
+ node = eb32_next(node);
+ }
+
+ if (!srv)
+ srv = avoided;
+ out:
+ HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ return srv;
+}
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/lb_fwlc.c b/src/lb_fwlc.c
new file mode 100644
index 0000000..8e913d4
--- /dev/null
+++ b/src/lb_fwlc.c
@@ -0,0 +1,375 @@
+/*
+ * Fast Weighted Least Connection load balancing algorithm.
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/eb32tree.h>
+#include <haproxy/api.h>
+#include <haproxy/backend.h>
+#include <haproxy/queue.h>
+#include <haproxy/server-t.h>
+
+
+/* Remove a server from a tree. It must have previously been dequeued. This
+ * function is meant to be called when a server is going down or has its
+ * weight disabled.
+ *
+ * The server's lock and the lbprm's lock must be held.
+ */
+static inline void fwlc_remove_from_tree(struct server *s)
+{
+ s->lb_tree = NULL;
+}
+
+/* simply removes a server from a tree.
+ *
+ * The lbprm's lock must be held.
+ */
+static inline void fwlc_dequeue_srv(struct server *s)
+{
+ eb32_delete(&s->lb_node);
+}
+
+/* Queue a server in its associated tree, assuming the <eweight> is >0.
+ * Servers are sorted by (#conns+1)/weight. To ensure maximum accuracy,
+ * we use (#conns+1)*SRV_EWGHT_MAX/eweight as the sorting key. The reason
+ * for using #conns+1 is to sort by weights in case the server is picked
+ * and not before it is picked. This provides a better load accuracy for
+ * low connection counts when weights differ and makes sure the round-robin
+ * applies between servers of highest weight first. However servers with no
+ * connection are always picked first so that under low loads, it's not
+ * always the single server with the highest weight that gets picked.
+ *
+ * NOTE: Depending on the calling context, we use s->next_eweight or
+ * s->cur_eweight. The next value is used when the server state is updated
+ * (because the weight changed for instance). During this step, the server
+ * state is not yet committed. The current value is used to reposition the
+ * server in the tree. This happens when the server is used.
+ *
+ * The lbprm's lock must be held.
+ */
+static inline void fwlc_queue_srv(struct server *s, unsigned int eweight)
+{
+ unsigned int inflight = _HA_ATOMIC_LOAD(&s->served) + _HA_ATOMIC_LOAD(&s->queue.length);
+
+ s->lb_node.key = inflight ? (inflight + 1) * SRV_EWGHT_MAX / eweight : 0;
+ eb32_insert(s->lb_tree, &s->lb_node);
+}
+
+/* Re-position the server in the FWLC tree after it has been assigned one
+ * connection or after it has released one. Note that it is possible that
+ * the server has been moved out of the tree due to failed health-checks.
+ * The lbprm's lock will be used.
+ */
+static void fwlc_srv_reposition(struct server *s)
+{
+ unsigned int inflight = _HA_ATOMIC_LOAD(&s->served) + _HA_ATOMIC_LOAD(&s->queue.length);
+ unsigned int eweight = _HA_ATOMIC_LOAD(&s->cur_eweight);
+ unsigned int new_key = inflight ? (inflight + 1) * SRV_EWGHT_MAX / (eweight ? eweight : 1) : 0;
+
+ /* some calls will be made for no change (e.g connect_server() after
+ * assign_server(). Let's check that first.
+ */
+ if (s->lb_node.node.leaf_p && eweight && s->lb_node.key == new_key)
+ return;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+ if (s->lb_tree) {
+ /* we might have been waiting for a while on the lock above
+ * so it's worth testing again because other threads are very
+ * likely to have released a connection or taken one leading
+ * to our target value (50% of the case in measurements).
+ */
+ inflight = _HA_ATOMIC_LOAD(&s->served) + _HA_ATOMIC_LOAD(&s->queue.length);
+ eweight = _HA_ATOMIC_LOAD(&s->cur_eweight);
+ new_key = inflight ? (inflight + 1) * SRV_EWGHT_MAX / (eweight ? eweight : 1) : 0;
+ if (!s->lb_node.node.leaf_p || s->lb_node.key != new_key) {
+ eb32_delete(&s->lb_node);
+ s->lb_node.key = new_key;
+ eb32_insert(s->lb_tree, &s->lb_node);
+ }
+ }
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to down.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely down (the caller may not
+ * know all the variables of a server's state).
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fwlc_set_server_status_down(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (srv_willbe_usable(srv))
+ goto out_update_state;
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+
+ if (!srv_currently_usable(srv))
+ /* server was already down */
+ goto out_update_backend;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck -= srv->cur_eweight;
+ p->srv_bck--;
+
+ if (srv == p->lbprm.fbck) {
+ /* we lost the first backup server in a single-backup
+ * configuration, we must search another one.
+ */
+ struct server *srv2 = p->lbprm.fbck;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 &&
+ !((srv2->flags & SRV_F_BACKUP) &&
+ srv_willbe_usable(srv2)));
+ p->lbprm.fbck = srv2;
+ }
+ } else {
+ p->lbprm.tot_wact -= srv->cur_eweight;
+ p->srv_act--;
+ }
+
+ fwlc_dequeue_srv(srv);
+ fwlc_remove_from_tree(srv);
+
+out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to up.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely UP (the caller may not
+ * know all the variables of a server's state). This function will not change
+ * the weight of a server which was already up.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fwlc_set_server_status_up(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (!srv_willbe_usable(srv))
+ goto out_update_state;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (srv_currently_usable(srv))
+ /* server was already up */
+ goto out_update_backend;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ srv->lb_tree = &p->lbprm.fwlc.bck;
+ p->lbprm.tot_wbck += srv->next_eweight;
+ p->srv_bck++;
+
+ if (!(p->options & PR_O_USE_ALL_BK)) {
+ if (!p->lbprm.fbck) {
+ /* there was no backup server anymore */
+ p->lbprm.fbck = srv;
+ } else {
+ /* we may have restored a backup server prior to fbck,
+ * in which case it should replace it.
+ */
+ struct server *srv2 = srv;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 && (srv2 != p->lbprm.fbck));
+ if (srv2)
+ p->lbprm.fbck = srv;
+ }
+ }
+ } else {
+ srv->lb_tree = &p->lbprm.fwlc.act;
+ p->lbprm.tot_wact += srv->next_eweight;
+ p->srv_act++;
+ }
+
+ /* note that eweight cannot be 0 here */
+ fwlc_queue_srv(srv, srv->next_eweight);
+
+ out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function must be called after an update to server <srv>'s effective
+ * weight. It may be called after a state change too.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fwlc_update_server_weight(struct server *srv)
+{
+ int old_state, new_state;
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ /* If changing the server's weight changes its state, we simply apply
+ * the procedures we already have for status change. If the state
+ * remains down, the server is not in any tree, so it's as easy as
+ * updating its values. If the state remains up with different weights,
+ * there are some computations to perform to find a new place and
+ * possibly a new tree for this server.
+ */
+
+ old_state = srv_currently_usable(srv);
+ new_state = srv_willbe_usable(srv);
+
+ if (!old_state && !new_state) {
+ srv_lb_commit_status(srv);
+ return;
+ }
+ else if (!old_state && new_state) {
+ fwlc_set_server_status_up(srv);
+ return;
+ }
+ else if (old_state && !new_state) {
+ fwlc_set_server_status_down(srv);
+ return;
+ }
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (srv->lb_tree)
+ fwlc_dequeue_srv(srv);
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck += srv->next_eweight - srv->cur_eweight;
+ srv->lb_tree = &p->lbprm.fwlc.bck;
+ } else {
+ p->lbprm.tot_wact += srv->next_eweight - srv->cur_eweight;
+ srv->lb_tree = &p->lbprm.fwlc.act;
+ }
+
+ fwlc_queue_srv(srv, srv->next_eweight);
+
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ srv_lb_commit_status(srv);
+}
+
+/* This function is responsible for building the trees in case of fast
+ * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
+ * uweight ratio. Both active and backup groups are initialized.
+ */
+void fwlc_init_server_tree(struct proxy *p)
+{
+ struct server *srv;
+ struct eb_root init_head = EB_ROOT;
+
+ p->lbprm.set_server_status_up = fwlc_set_server_status_up;
+ p->lbprm.set_server_status_down = fwlc_set_server_status_down;
+ p->lbprm.update_server_eweight = fwlc_update_server_weight;
+ p->lbprm.server_take_conn = fwlc_srv_reposition;
+ p->lbprm.server_drop_conn = fwlc_srv_reposition;
+
+ p->lbprm.wdiv = BE_WEIGHT_SCALE;
+ for (srv = p->srv; srv; srv = srv->next) {
+ srv->next_eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
+ srv_lb_commit_status(srv);
+ }
+
+ recount_servers(p);
+ update_backend_weight(p);
+
+ p->lbprm.fwlc.act = init_head;
+ p->lbprm.fwlc.bck = init_head;
+
+ /* queue active and backup servers in two distinct groups */
+ for (srv = p->srv; srv; srv = srv->next) {
+ if (!srv_currently_usable(srv))
+ continue;
+ srv->lb_tree = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fwlc.bck : &p->lbprm.fwlc.act;
+ fwlc_queue_srv(srv, srv->next_eweight);
+ }
+}
+
+/* Return next server from the FWLC tree in backend <p>. If the tree is empty,
+ * return NULL. Saturated servers are skipped.
+ *
+ * The lbprm's lock will be used in R/O mode. The server's lock is not used.
+ */
+struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid)
+{
+ struct server *srv, *avoided;
+ struct eb32_node *node;
+
+ srv = avoided = NULL;
+
+ HA_RWLOCK_RDLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ if (p->srv_act)
+ node = eb32_first(&p->lbprm.fwlc.act);
+ else if (p->lbprm.fbck) {
+ srv = p->lbprm.fbck;
+ goto out;
+ }
+ else if (p->srv_bck)
+ node = eb32_first(&p->lbprm.fwlc.bck);
+ else {
+ srv = NULL;
+ goto out;
+ }
+
+ while (node) {
+ /* OK, we have a server. However, it may be saturated, in which
+ * case we don't want to reconsider it for now, so we'll simply
+ * skip it. Same if it's the server we try to avoid, in which
+ * case we simply remember it for later use if needed.
+ */
+ struct server *s;
+
+ s = eb32_entry(node, struct server, lb_node);
+ if (!s->maxconn || s->served + s->queue.length < srv_dynamic_maxconn(s) + s->maxqueue) {
+ if (s != srvtoavoid) {
+ srv = s;
+ break;
+ }
+ avoided = s;
+ }
+ node = eb32_next(node);
+ }
+
+ if (!srv)
+ srv = avoided;
+ out:
+ HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ return srv;
+}
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/lb_fwrr.c b/src/lb_fwrr.c
new file mode 100644
index 0000000..a762623
--- /dev/null
+++ b/src/lb_fwrr.c
@@ -0,0 +1,623 @@
+/*
+ * Fast Weighted Round Robin load balancing algorithm.
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/eb32tree.h>
+#include <haproxy/api.h>
+#include <haproxy/backend.h>
+#include <haproxy/queue.h>
+#include <haproxy/server-t.h>
+
+
+static inline void fwrr_remove_from_tree(struct server *s);
+static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
+static inline void fwrr_dequeue_srv(struct server *s);
+static void fwrr_get_srv(struct server *s);
+static void fwrr_queue_srv(struct server *s);
+
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to down.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely down (the caller may not
+ * know all the variables of a server's state).
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fwrr_set_server_status_down(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+ struct fwrr_group *grp;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (srv_willbe_usable(srv))
+ goto out_update_state;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (!srv_currently_usable(srv))
+ /* server was already down */
+ goto out_update_backend;
+
+ grp = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
+ grp->next_weight -= srv->cur_eweight;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
+ p->srv_bck--;
+
+ if (srv == p->lbprm.fbck) {
+ /* we lost the first backup server in a single-backup
+ * configuration, we must search another one.
+ */
+ struct server *srv2 = p->lbprm.fbck;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 &&
+ !((srv2->flags & SRV_F_BACKUP) &&
+ srv_willbe_usable(srv2)));
+ p->lbprm.fbck = srv2;
+ }
+ } else {
+ p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
+ p->srv_act--;
+ }
+
+ fwrr_dequeue_srv(srv);
+ fwrr_remove_from_tree(srv);
+
+out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to up.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely UP (the caller may not
+ * know all the variables of a server's state). This function will not change
+ * the weight of a server which was already up.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fwrr_set_server_status_up(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+ struct fwrr_group *grp;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (!srv_willbe_usable(srv))
+ goto out_update_state;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (srv_currently_usable(srv))
+ /* server was already up */
+ goto out_update_backend;
+
+ grp = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
+ grp->next_weight += srv->next_eweight;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
+ p->srv_bck++;
+
+ if (!(p->options & PR_O_USE_ALL_BK)) {
+ if (!p->lbprm.fbck) {
+ /* there was no backup server anymore */
+ p->lbprm.fbck = srv;
+ } else {
+ /* we may have restored a backup server prior to fbck,
+ * in which case it should replace it.
+ */
+ struct server *srv2 = srv;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 && (srv2 != p->lbprm.fbck));
+ if (srv2)
+ p->lbprm.fbck = srv;
+ }
+ }
+ } else {
+ p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
+ p->srv_act++;
+ }
+
+ /* note that eweight cannot be 0 here */
+ fwrr_get_srv(srv);
+ srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->next_eweight;
+ fwrr_queue_srv(srv);
+
+out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function must be called after an update to server <srv>'s effective
+ * weight. It may be called after a state change too.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fwrr_update_server_weight(struct server *srv)
+{
+ int old_state, new_state;
+ struct proxy *p = srv->proxy;
+ struct fwrr_group *grp;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ /* If changing the server's weight changes its state, we simply apply
+ * the procedures we already have for status change. If the state
+ * remains down, the server is not in any tree, so it's as easy as
+ * updating its values. If the state remains up with different weights,
+ * there are some computations to perform to find a new place and
+ * possibly a new tree for this server.
+ */
+
+ old_state = srv_currently_usable(srv);
+ new_state = srv_willbe_usable(srv);
+
+ if (!old_state && !new_state) {
+ srv_lb_commit_status(srv);
+ return;
+ }
+ else if (!old_state && new_state) {
+ fwrr_set_server_status_up(srv);
+ return;
+ }
+ else if (old_state && !new_state) {
+ fwrr_set_server_status_down(srv);
+ return;
+ }
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ grp = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
+ grp->next_weight = grp->next_weight - srv->cur_eweight + srv->next_eweight;
+
+ p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
+ p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
+
+ if (srv->lb_tree == grp->init) {
+ fwrr_dequeue_srv(srv);
+ fwrr_queue_by_weight(grp->init, srv);
+ }
+ else if (!srv->lb_tree) {
+ /* FIXME: server was down. This is not possible right now but
+ * may be needed soon for slowstart or graceful shutdown.
+ */
+ fwrr_dequeue_srv(srv);
+ fwrr_get_srv(srv);
+ srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->next_eweight;
+ fwrr_queue_srv(srv);
+ } else {
+ /* The server is either active or in the next queue. If it's
+ * still in the active queue and it has not consumed all of its
+ * places, let's adjust its next position.
+ */
+ fwrr_get_srv(srv);
+
+ if (srv->next_eweight > 0) {
+ int prev_next = srv->npos;
+ int step = grp->next_weight / srv->next_eweight;
+
+ srv->npos = srv->lpos + step;
+ srv->rweight = 0;
+
+ if (srv->npos > prev_next)
+ srv->npos = prev_next;
+ if (srv->npos < grp->curr_pos + 2)
+ srv->npos = grp->curr_pos + step;
+ } else {
+ /* push it into the next tree */
+ srv->npos = grp->curr_pos + grp->curr_weight;
+ }
+
+ fwrr_dequeue_srv(srv);
+ fwrr_queue_srv(srv);
+ }
+
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ srv_lb_commit_status(srv);
+}
+
+/* Remove a server from a tree. It must have previously been dequeued. This
+ * function is meant to be called when a server is going down or has its
+ * weight disabled.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static inline void fwrr_remove_from_tree(struct server *s)
+{
+ s->lb_tree = NULL;
+}
+
+/* Queue a server in the weight tree <root>, assuming the weight is >0.
+ * We want to sort them by inverted weights, because we need to place
+ * heavy servers first in order to get a smooth distribution.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
+{
+ s->lb_node.key = SRV_EWGHT_MAX - s->next_eweight;
+ eb32_insert(root, &s->lb_node);
+ s->lb_tree = root;
+}
+
+/* This function is responsible for building the weight trees in case of fast
+ * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
+ * ratio. Both active and backup groups are initialized.
+ */
+void fwrr_init_server_groups(struct proxy *p)
+{
+ struct server *srv;
+ struct eb_root init_head = EB_ROOT;
+
+ p->lbprm.set_server_status_up = fwrr_set_server_status_up;
+ p->lbprm.set_server_status_down = fwrr_set_server_status_down;
+ p->lbprm.update_server_eweight = fwrr_update_server_weight;
+
+ p->lbprm.wdiv = BE_WEIGHT_SCALE;
+ for (srv = p->srv; srv; srv = srv->next) {
+ srv->next_eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
+ srv_lb_commit_status(srv);
+ }
+
+ recount_servers(p);
+ update_backend_weight(p);
+
+ /* prepare the active servers group */
+ p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
+ p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
+ p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
+ p->lbprm.fwrr.act.t1 = init_head;
+ p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
+ p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
+
+ /* prepare the backup servers group */
+ p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
+ p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
+ p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
+ p->lbprm.fwrr.bck.t1 = init_head;
+ p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
+ p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
+
+ /* queue active and backup servers in two distinct groups */
+ for (srv = p->srv; srv; srv = srv->next) {
+ if (!srv_currently_usable(srv))
+ continue;
+ fwrr_queue_by_weight((srv->flags & SRV_F_BACKUP) ?
+ p->lbprm.fwrr.bck.init :
+ p->lbprm.fwrr.act.init,
+ srv);
+ }
+}
+
+/* simply removes a server from a weight tree.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static inline void fwrr_dequeue_srv(struct server *s)
+{
+ eb32_delete(&s->lb_node);
+}
+
+/* queues a server into the appropriate group and tree depending on its
+ * backup status, and ->npos. If the server is disabled, simply assign
+ * it to the NULL tree.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static void fwrr_queue_srv(struct server *s)
+{
+ struct proxy *p = s->proxy;
+ struct fwrr_group *grp;
+
+ grp = (s->flags & SRV_F_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
+
+ /* Delay everything which does not fit into the window and everything
+ * which does not fit into the theoretical new window.
+ */
+ if (!srv_willbe_usable(s)) {
+ fwrr_remove_from_tree(s);
+ }
+ else if (s->next_eweight <= 0 ||
+ s->npos >= 2 * grp->curr_weight ||
+ s->npos >= grp->curr_weight + grp->next_weight) {
+ /* put into next tree, and readjust npos in case we could
+ * finally take this back to current. */
+ s->npos -= grp->curr_weight;
+ fwrr_queue_by_weight(grp->next, s);
+ }
+ else {
+ /* The sorting key is stored in units of s->npos * user_weight
+ * in order to avoid overflows. As stated in backend.h, the
+ * lower the scale, the rougher the weights modulation, and the
+ * higher the scale, the lower the number of servers without
+ * overflow. With this formula, the result is always positive,
+ * so we can use eb32_insert().
+ */
+ s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
+ (unsigned)(SRV_EWGHT_MAX + s->rweight - s->next_eweight) / BE_WEIGHT_SCALE;
+
+ eb32_insert(&grp->curr, &s->lb_node);
+ s->lb_tree = &grp->curr;
+ }
+}
+
+/* prepares a server when extracting it from the "init" tree.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static inline void fwrr_get_srv_init(struct server *s)
+{
+ s->npos = s->rweight = 0;
+}
+
+/* prepares a server when extracting it from the "next" tree.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static inline void fwrr_get_srv_next(struct server *s)
+{
+ struct fwrr_group *grp = (s->flags & SRV_F_BACKUP) ?
+ &s->proxy->lbprm.fwrr.bck :
+ &s->proxy->lbprm.fwrr.act;
+
+ s->npos += grp->curr_weight;
+}
+
+/* prepares a server when it was marked down.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static inline void fwrr_get_srv_down(struct server *s)
+{
+ struct fwrr_group *grp = (s->flags & SRV_F_BACKUP) ?
+ &s->proxy->lbprm.fwrr.bck :
+ &s->proxy->lbprm.fwrr.act;
+
+ s->npos = grp->curr_pos;
+}
+
+/* prepares a server when extracting it from its tree.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static void fwrr_get_srv(struct server *s)
+{
+ struct proxy *p = s->proxy;
+ struct fwrr_group *grp = (s->flags & SRV_F_BACKUP) ?
+ &p->lbprm.fwrr.bck :
+ &p->lbprm.fwrr.act;
+
+ if (s->lb_tree == grp->init) {
+ fwrr_get_srv_init(s);
+ }
+ else if (s->lb_tree == grp->next) {
+ fwrr_get_srv_next(s);
+ }
+ else if (s->lb_tree == NULL) {
+ fwrr_get_srv_down(s);
+ }
+}
+
+/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
+ * when this happens, and "next" filled with servers sorted by weights.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static inline void fwrr_switch_trees(struct fwrr_group *grp)
+{
+ struct eb_root *swap;
+ swap = grp->init;
+ grp->init = grp->next;
+ grp->next = swap;
+ grp->curr_weight = grp->next_weight;
+ grp->curr_pos = grp->curr_weight;
+}
+
+/* return next server from the current tree in FWRR group <grp>, or a server
+ * from the "init" tree if appropriate. If both trees are empty, return NULL.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
+{
+ struct eb32_node *node1;
+ struct eb32_node *node2;
+ struct server *s1 = NULL;
+ struct server *s2 = NULL;
+
+ node1 = eb32_first(&grp->curr);
+ if (node1) {
+ s1 = eb32_entry(node1, struct server, lb_node);
+ if (s1->cur_eweight && s1->npos <= grp->curr_pos)
+ return s1;
+ }
+
+ /* Either we have no server left, or we have a hole. We'll look in the
+ * init tree or a better proposal. At this point, if <s1> is non-null,
+ * it is guaranteed to remain available as the tree is locked.
+ */
+ node2 = eb32_first(grp->init);
+ if (node2) {
+ s2 = eb32_entry(node2, struct server, lb_node);
+ if (s2->cur_eweight) {
+ fwrr_get_srv_init(s2);
+ return s2;
+ }
+ }
+ return s1;
+}
+
+/* Computes next position of server <s> in the group. Nothing is done if <s>
+ * has a zero weight.
+ *
+ * The lbprm's lock must be held to protect lpos/npos/rweight.
+ */
+static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
+{
+ unsigned int eweight = *(volatile unsigned int *)&s->cur_eweight;
+
+ if (!eweight)
+ return;
+
+ if (!s->npos) {
+ /* first time ever for this server */
+ s->npos = grp->curr_pos;
+ }
+
+ s->lpos = s->npos;
+ s->npos += grp->next_weight / eweight;
+ s->rweight += grp->next_weight % eweight;
+
+ if (s->rweight >= eweight) {
+ s->rweight -= eweight;
+ s->npos++;
+ }
+}
+
+/* Return next server from the current tree in backend <p>, or a server from
+ * the init tree if appropriate. If both trees are empty, return NULL.
+ * Saturated servers are skipped and requeued.
+ *
+ * The lbprm's lock will be used in R/W mode. The server's lock is not used.
+ */
+struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
+{
+ struct server *srv, *full, *avoided;
+ struct fwrr_group *grp;
+ int switched;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ if (p->srv_act)
+ grp = &p->lbprm.fwrr.act;
+ else if (p->lbprm.fbck) {
+ srv = p->lbprm.fbck;
+ goto out;
+ }
+ else if (p->srv_bck)
+ grp = &p->lbprm.fwrr.bck;
+ else {
+ srv = NULL;
+ goto out;
+ }
+
+ switched = 0;
+ avoided = NULL;
+ full = NULL; /* NULL-terminated list of saturated servers */
+ while (1) {
+ /* if we see an empty group, let's first try to collect weights
+ * which might have recently changed.
+ */
+ if (!grp->curr_weight)
+ grp->curr_pos = grp->curr_weight = grp->next_weight;
+
+ /* get first server from the "current" tree. When the end of
+ * the tree is reached, we may have to switch, but only once.
+ */
+ while (1) {
+ srv = fwrr_get_server_from_group(grp);
+ if (srv)
+ break;
+ if (switched) {
+ if (avoided) {
+ srv = avoided;
+ goto take_this_one;
+ }
+ goto requeue_servers;
+ }
+ switched = 1;
+ fwrr_switch_trees(grp);
+ }
+
+ /* OK, we have a server. However, it may be saturated, in which
+ * case we don't want to reconsider it for now. We'll update
+ * its position and dequeue it anyway, so that we can move it
+ * to a better place afterwards.
+ */
+ fwrr_update_position(grp, srv);
+ fwrr_dequeue_srv(srv);
+ grp->curr_pos++;
+ if (!srv->maxconn || (!srv->queue.length && srv->served < srv_dynamic_maxconn(srv))) {
+ /* make sure it is not the server we are trying to exclude... */
+ if (srv != srvtoavoid || avoided)
+ break;
+
+ avoided = srv; /* ...but remember that is was selected yet avoided */
+ }
+
+ /* the server is saturated or avoided, let's chain it for later reinsertion.
+ */
+ srv->next_full = full;
+ full = srv;
+ }
+
+ take_this_one:
+ /* OK, we got the best server, let's update it */
+ fwrr_queue_srv(srv);
+
+ requeue_servers:
+ /* Requeue all extracted servers. If full==srv then it was
+ * avoided (unsuccessfully) and chained, omit it now. The
+ * only way to get there is by having <avoided>==NULL or
+ * <avoided>==<srv>.
+ */
+ if (unlikely(full != NULL)) {
+ if (switched) {
+ /* the tree has switched, requeue all extracted servers
+ * into "init", because their place was lost, and only
+ * their weight matters.
+ */
+ do {
+ if (likely(full != srv))
+ fwrr_queue_by_weight(grp->init, full);
+ full = full->next_full;
+ } while (full);
+ } else {
+ /* requeue all extracted servers just as if they were consumed
+ * so that they regain their expected place.
+ */
+ do {
+ if (likely(full != srv))
+ fwrr_queue_srv(full);
+ full = full->next_full;
+ } while (full);
+ }
+ }
+ out:
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ return srv;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/lb_map.c b/src/lb_map.c
new file mode 100644
index 0000000..592df91
--- /dev/null
+++ b/src/lb_map.c
@@ -0,0 +1,281 @@
+/*
+ * Map-based load-balancing (RR and HASH)
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/eb32tree.h>
+#include <haproxy/api.h>
+#include <haproxy/backend.h>
+#include <haproxy/lb_map.h>
+#include <haproxy/queue.h>
+#include <haproxy/server-t.h>
+
+/* this function updates the map according to server <srv>'s new state.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void map_set_server_status_down(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (srv_willbe_usable(srv))
+ goto out_update_state;
+
+ /* FIXME: could be optimized since we know what changed */
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ recount_servers(p);
+ update_backend_weight(p);
+ recalc_server_map(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function updates the map according to server <srv>'s new state.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void map_set_server_status_up(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (!srv_willbe_usable(srv))
+ goto out_update_state;
+
+ /* FIXME: could be optimized since we know what changed */
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ recount_servers(p);
+ update_backend_weight(p);
+ recalc_server_map(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function recomputes the server map for proxy px. It relies on
+ * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be
+ * called after recount_servers(). It also expects px->lbprm.map.srv
+ * to be allocated with the largest size needed. It updates tot_weight.
+ *
+ * The lbprm's lock must be held.
+ */
+void recalc_server_map(struct proxy *px)
+{
+ int o, tot, flag;
+ struct server *cur, *best;
+
+ switch (px->lbprm.tot_used) {
+ case 0: /* no server */
+ return;
+ default:
+ tot = px->lbprm.tot_weight;
+ break;
+ }
+
+ /* here we *know* that we have some servers */
+ if (px->srv_act)
+ flag = 0;
+ else
+ flag = SRV_F_BACKUP;
+
+ /* this algorithm gives priority to the first server, which means that
+ * it will respect the declaration order for equivalent weights, and
+ * that whatever the weights, the first server called will always be
+ * the first declared. This is an important assumption for the backup
+ * case, where we want the first server only.
+ */
+ for (cur = px->srv; cur; cur = cur->next)
+ cur->wscore = 0;
+
+ for (o = 0; o < tot; o++) {
+ int max = 0;
+ best = NULL;
+ for (cur = px->srv; cur; cur = cur->next) {
+ if ((cur->flags & SRV_F_BACKUP) == flag &&
+ srv_willbe_usable(cur)) {
+ int v;
+
+ /* If we are forced to return only one server, we don't want to
+ * go further, because we would return the wrong one due to
+ * divide overflow.
+ */
+ if (tot == 1) {
+ best = cur;
+ /* note that best->wscore will be wrong but we don't care */
+ break;
+ }
+
+ _HA_ATOMIC_ADD(&cur->wscore, cur->next_eweight);
+ v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
+ if (best == NULL || v > max) {
+ max = v;
+ best = cur;
+ }
+ }
+ }
+ px->lbprm.map.srv[o] = best;
+ if (best)
+ _HA_ATOMIC_SUB(&best->wscore, tot);
+ }
+}
+
+/* This function is responsible of building the server MAP for map-based LB
+ * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the
+ * weights if applicable. It should be called only once per proxy, at config
+ * time.
+ */
+void init_server_map(struct proxy *p)
+{
+ struct server *srv;
+ int pgcd;
+ int act, bck;
+
+ p->lbprm.set_server_status_up = map_set_server_status_up;
+ p->lbprm.set_server_status_down = map_set_server_status_down;
+ p->lbprm.update_server_eweight = NULL;
+
+ if (!p->srv)
+ return;
+
+ /* We will factor the weights to reduce the table,
+ * using Euclide's largest common divisor algorithm.
+ * Since we may have zero weights, we have to first
+ * find a non-zero weight server.
+ */
+ pgcd = 1;
+ srv = p->srv;
+ while (srv && !srv->uweight)
+ srv = srv->next;
+
+ if (srv) {
+ pgcd = srv->uweight; /* note: cannot be zero */
+ while (pgcd > 1 && (srv = srv->next)) {
+ int w = srv->uweight;
+ while (w) {
+ int t = pgcd % w;
+ pgcd = w;
+ w = t;
+ }
+ }
+ }
+
+ /* It is sometimes useful to know what factor to apply
+ * to the backend's effective weight to know its real
+ * weight.
+ */
+ p->lbprm.wmult = pgcd;
+
+ act = bck = 0;
+ for (srv = p->srv; srv; srv = srv->next) {
+ srv->next_eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
+
+ if (srv->flags & SRV_F_BACKUP)
+ bck += srv->next_eweight;
+ else
+ act += srv->next_eweight;
+ srv_lb_commit_status(srv);
+ }
+
+ /* this is the largest map we will ever need for this servers list */
+ if (act < bck)
+ act = bck;
+
+ if (!act)
+ act = 1;
+
+ p->lbprm.map.srv = calloc(act, sizeof(*p->lbprm.map.srv));
+ /* recounts servers and their weights */
+ recount_servers(p);
+ update_backend_weight(p);
+ recalc_server_map(p);
+}
+
+/*
+ * This function tries to find a running server with free connection slots for
+ * the proxy <px> following the round-robin method.
+ * If any server is found, it will be returned and px->lbprm.map.rr_idx will be updated
+ * to point to the next server. If no valid server is found, NULL is returned.
+ *
+ * The lbprm's lock will be used.
+ */
+struct server *map_get_server_rr(struct proxy *px, struct server *srvtoavoid)
+{
+ int newidx, avoididx;
+ struct server *srv, *avoided;
+
+ HA_RWLOCK_SKLOCK(LBPRM_LOCK, &px->lbprm.lock);
+ if (px->lbprm.tot_weight == 0) {
+ avoided = NULL;
+ goto out;
+ }
+
+ if (px->lbprm.map.rr_idx < 0 || px->lbprm.map.rr_idx >= px->lbprm.tot_weight)
+ px->lbprm.map.rr_idx = 0;
+ newidx = px->lbprm.map.rr_idx;
+
+ avoided = NULL;
+ avoididx = 0; /* shut a gcc warning */
+ do {
+ srv = px->lbprm.map.srv[newidx++];
+ if (!srv->maxconn || (!srv->queue.length && srv->served < srv_dynamic_maxconn(srv))) {
+ /* make sure it is not the server we are try to exclude... */
+ /* ...but remember that is was selected yet avoided */
+ avoided = srv;
+ avoididx = newidx;
+ if (srv != srvtoavoid) {
+ px->lbprm.map.rr_idx = newidx;
+ goto out;
+ }
+ }
+ if (newidx == px->lbprm.tot_weight)
+ newidx = 0;
+ } while (newidx != px->lbprm.map.rr_idx);
+
+ if (avoided)
+ px->lbprm.map.rr_idx = avoididx;
+
+ out:
+ HA_RWLOCK_SKUNLOCK(LBPRM_LOCK, &px->lbprm.lock);
+ /* return NULL or srvtoavoid if found */
+ return avoided;
+}
+
+/*
+ * This function returns the running server from the map at the location
+ * pointed to by the result of a modulo operation on <hash>. The server map may
+ * be recomputed if required before being looked up. If any server is found, it
+ * will be returned. If no valid server is found, NULL is returned.
+ *
+ * The lbprm's lock will be used.
+ */
+struct server *map_get_server_hash(struct proxy *px, unsigned int hash)
+{
+ struct server *srv = NULL;
+
+ HA_RWLOCK_RDLOCK(LBPRM_LOCK, &px->lbprm.lock);
+ if (px->lbprm.tot_weight)
+ srv = px->lbprm.map.srv[hash % px->lbprm.tot_weight];
+ HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &px->lbprm.lock);
+ return srv;
+}
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/listener.c b/src/listener.c
new file mode 100644
index 0000000..5dfe7cb
--- /dev/null
+++ b/src/listener.c
@@ -0,0 +1,1956 @@
+/*
+ * Listener management functions.
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/activity.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/cli-t.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/quic_tp.h>
+#include <haproxy/sample.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+#include <haproxy/tools.h>
+
+
+/* List head of all known bind keywords */
+struct bind_kw_list bind_keywords = {
+ .list = LIST_HEAD_INIT(bind_keywords.list)
+};
+
+/* list of the temporarily limited listeners because of lack of resource */
+static struct mt_list global_listener_queue = MT_LIST_HEAD_INIT(global_listener_queue);
+static struct task *global_listener_queue_task;
+__decl_thread(static HA_RWLOCK_T global_listener_rwlock);
+
+/* listener status for stats */
+const char* li_status_st[LI_STATE_COUNT] = {
+ [LI_STATUS_WAITING] = "WAITING",
+ [LI_STATUS_OPEN] = "OPEN",
+ [LI_STATUS_FULL] = "FULL",
+};
+
+#if defined(USE_THREAD)
+
+struct accept_queue_ring accept_queue_rings[MAX_THREADS] __attribute__((aligned(64))) = { };
+
+/* dequeue and process a pending connection from the local accept queue (single
+ * consumer). Returns the accepted connection or NULL if none was found.
+ */
+struct connection *accept_queue_pop_sc(struct accept_queue_ring *ring)
+{
+ unsigned int pos, next;
+ struct connection *ptr;
+ struct connection **e;
+
+ pos = ring->head;
+
+ if (pos == ring->tail)
+ return NULL;
+
+ next = pos + 1;
+ if (next >= ACCEPT_QUEUE_SIZE)
+ next = 0;
+
+ e = &ring->entry[pos];
+
+ /* wait for the producer to update the listener's pointer */
+ while (1) {
+ ptr = *e;
+ __ha_barrier_load();
+ if (ptr)
+ break;
+ pl_cpu_relax();
+ }
+
+ /* release the entry */
+ *e = NULL;
+
+ __ha_barrier_store();
+ ring->head = next;
+ return ptr;
+}
+
+
+/* tries to push a new accepted connection <conn> into ring <ring>. Returns
+ * non-zero if it succeeds, or zero if the ring is full. Supports multiple
+ * producers.
+ */
+int accept_queue_push_mp(struct accept_queue_ring *ring, struct connection *conn)
+{
+ unsigned int pos, next;
+
+ pos = ring->tail;
+ do {
+ next = pos + 1;
+ if (next >= ACCEPT_QUEUE_SIZE)
+ next = 0;
+ if (next == ring->head)
+ return 0; // ring full
+ } while (unlikely(!_HA_ATOMIC_CAS(&ring->tail, &pos, next)));
+
+ ring->entry[pos] = conn;
+ __ha_barrier_store();
+ return 1;
+}
+
+/* proceed with accepting new connections. Don't mark it static so that it appears
+ * in task dumps.
+ */
+struct task *accept_queue_process(struct task *t, void *context, unsigned int state)
+{
+ struct accept_queue_ring *ring = context;
+ struct connection *conn;
+ struct listener *li;
+ unsigned int max_accept;
+ int ret;
+
+ /* if global.tune.maxaccept is -1, then max_accept is UINT_MAX. It
+ * is not really illimited, but it is probably enough.
+ */
+ max_accept = global.tune.maxaccept ? global.tune.maxaccept : MAX_ACCEPT;
+ for (; max_accept; max_accept--) {
+ conn = accept_queue_pop_sc(ring);
+ if (!conn)
+ break;
+
+ li = __objt_listener(conn->target);
+ _HA_ATOMIC_INC(&li->thr_conn[tid]);
+ ret = li->accept(conn);
+ if (ret <= 0) {
+ /* connection was terminated by the application */
+ continue;
+ }
+
+ /* increase the per-process number of cumulated sessions, this
+ * may only be done once l->accept() has accepted the connection.
+ */
+ if (!(li->options & LI_O_UNLIMITED)) {
+ HA_ATOMIC_UPDATE_MAX(&global.sps_max,
+ update_freq_ctr(&global.sess_per_sec, 1));
+ if (li->bind_conf && li->bind_conf->options & BC_O_USE_SSL) {
+ HA_ATOMIC_UPDATE_MAX(&global.ssl_max,
+ update_freq_ctr(&global.ssl_per_sec, 1));
+ }
+ }
+ }
+
+ /* ran out of budget ? Let's come here ASAP */
+ if (!max_accept)
+ tasklet_wakeup(ring->tasklet);
+
+ return NULL;
+}
+
+/* Initializes the accept-queues. Returns 0 on success, otherwise ERR_* flags */
+static int accept_queue_init()
+{
+ struct tasklet *t;
+ int i;
+
+ for (i = 0; i < global.nbthread; i++) {
+ t = tasklet_new();
+ if (!t) {
+ ha_alert("Out of memory while initializing accept queue for thread %d\n", i);
+ return ERR_FATAL|ERR_ABORT;
+ }
+ t->tid = i;
+ t->process = accept_queue_process;
+ t->context = &accept_queue_rings[i];
+ accept_queue_rings[i].tasklet = t;
+ }
+ return 0;
+}
+
+REGISTER_CONFIG_POSTPARSER("multi-threaded accept queue", accept_queue_init);
+
+static void accept_queue_deinit()
+{
+ int i;
+
+ for (i = 0; i < global.nbthread; i++) {
+ if (accept_queue_rings[i].tasklet)
+ tasklet_free(accept_queue_rings[i].tasklet);
+ }
+}
+
+REGISTER_POST_DEINIT(accept_queue_deinit);
+
+#endif // USE_THREAD
+
+/* Memory allocation and initialization of the per_thr field.
+ * Returns 0 if the field has been successfully initialized, -1 on failure.
+ */
+int li_init_per_thr(struct listener *li)
+{
+ int i;
+
+ /* allocate per-thread elements for listener */
+ li->per_thr = calloc(global.nbthread, sizeof(*li->per_thr));
+ if (!li->per_thr)
+ return -1;
+
+ for (i = 0; i < global.nbthread; ++i) {
+ MT_LIST_INIT(&li->per_thr[i].quic_accept.list);
+ MT_LIST_INIT(&li->per_thr[i].quic_accept.conns);
+
+ li->per_thr[i].li = li;
+ }
+
+ return 0;
+}
+
+/* helper to get listener status for stats */
+enum li_status get_li_status(struct listener *l)
+{
+ if (!l->maxconn || l->nbconn < l->maxconn) {
+ if (l->state == LI_LIMITED)
+ return LI_STATUS_WAITING;
+ else
+ return LI_STATUS_OPEN;
+ }
+ return LI_STATUS_FULL;
+}
+
+/* adjust the listener's state and its proxy's listener counters if needed.
+ * It must be called under the listener's lock, but uses atomic ops to change
+ * the proxy's counters so that the proxy lock is not needed.
+ */
+void listener_set_state(struct listener *l, enum li_state st)
+{
+ struct proxy *px = l->bind_conf->frontend;
+
+ if (px) {
+ /* from state */
+ switch (l->state) {
+ case LI_NEW: /* first call */
+ _HA_ATOMIC_INC(&px->li_all);
+ break;
+ case LI_INIT:
+ case LI_ASSIGNED:
+ break;
+ case LI_PAUSED:
+ _HA_ATOMIC_DEC(&px->li_paused);
+ break;
+ case LI_LISTEN:
+ _HA_ATOMIC_DEC(&px->li_bound);
+ break;
+ case LI_READY:
+ case LI_FULL:
+ case LI_LIMITED:
+ _HA_ATOMIC_DEC(&px->li_ready);
+ break;
+ }
+
+ /* to state */
+ switch (st) {
+ case LI_NEW:
+ case LI_INIT:
+ case LI_ASSIGNED:
+ break;
+ case LI_PAUSED:
+ BUG_ON(l->rx.fd == -1);
+ _HA_ATOMIC_INC(&px->li_paused);
+ break;
+ case LI_LISTEN:
+ BUG_ON(l->rx.fd == -1);
+ _HA_ATOMIC_INC(&px->li_bound);
+ break;
+ case LI_READY:
+ case LI_FULL:
+ case LI_LIMITED:
+ BUG_ON(l->rx.fd == -1);
+ _HA_ATOMIC_INC(&px->li_ready);
+ break;
+ }
+ }
+ l->state = st;
+}
+
+/* This function adds the specified listener's file descriptor to the polling
+ * lists if it is in the LI_LISTEN state. The listener enters LI_READY or
+ * LI_FULL state depending on its number of connections. In daemon mode, we
+ * also support binding only the relevant processes to their respective
+ * listeners. We don't do that in debug mode however.
+ */
+void enable_listener(struct listener *listener)
+{
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &listener->lock);
+
+ /* If this listener is supposed to be only in the master, close it in
+ * the workers. Conversely, if it's supposed to be only in the workers
+ * close it in the master.
+ */
+ if (!!master != !!(listener->rx.flags & RX_F_MWORKER))
+ do_unbind_listener(listener);
+
+ if (listener->state == LI_LISTEN) {
+ BUG_ON(listener->rx.fd == -1);
+ if ((global.mode & (MODE_DAEMON | MODE_MWORKER)) &&
+ (!!master != !!(listener->rx.flags & RX_F_MWORKER))) {
+ /* we don't want to enable this listener and don't
+ * want any fd event to reach it.
+ */
+ do_unbind_listener(listener);
+ }
+ else if (!listener->maxconn || listener->nbconn < listener->maxconn) {
+ listener->rx.proto->enable(listener);
+ listener_set_state(listener, LI_READY);
+ }
+ else {
+ listener_set_state(listener, LI_FULL);
+ }
+ }
+
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &listener->lock);
+}
+
+/*
+ * This function completely stops a listener.
+ * The proxy's listeners count is updated and the proxy is
+ * disabled and woken up after the last one is gone.
+ * It will need to operate under the proxy's lock and the protocol's lock.
+ * The caller is responsible for indicating in lpx, lpr whether the
+ * respective locks are already held (non-zero) or not (zero) so that the
+ * function picks the missing ones, in this order.
+ */
+void stop_listener(struct listener *l, int lpx, int lpr)
+{
+ struct proxy *px = l->bind_conf->frontend;
+
+ if (l->options & LI_O_NOSTOP) {
+ /* master-worker sockpairs are never closed but don't count as a
+ * job.
+ */
+ return;
+ }
+
+ if (!lpx && px)
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &px->lock);
+
+ if (!lpr)
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &l->lock);
+
+ if (l->state > LI_INIT) {
+ do_unbind_listener(l);
+
+ if (l->state >= LI_ASSIGNED)
+ __delete_listener(l);
+
+ if (px)
+ proxy_cond_disable(px);
+ }
+
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &l->lock);
+
+ if (!lpr)
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+
+ if (!lpx && px)
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &px->lock);
+}
+
+/* This function adds the specified <listener> to the protocol <proto>. It
+ * does nothing if the protocol was already added. The listener's state is
+ * automatically updated from LI_INIT to LI_ASSIGNED. The number of listeners
+ * for the protocol is updated. This must be called with the proto lock held.
+ */
+void default_add_listener(struct protocol *proto, struct listener *listener)
+{
+ if (listener->state != LI_INIT)
+ return;
+ listener_set_state(listener, LI_ASSIGNED);
+ listener->rx.proto = proto;
+ LIST_APPEND(&proto->receivers, &listener->rx.proto_list);
+ proto->nb_receivers++;
+}
+
+/* default function called to suspend a listener: it simply passes the call to
+ * the underlying receiver. This is find for most socket-based protocols. This
+ * must be called under the listener's lock. It will return non-zero on success,
+ * 0 on failure. If no receiver-level suspend is provided, the operation is
+ * assumed to succeed.
+ */
+int default_suspend_listener(struct listener *l)
+{
+ int ret = 1;
+
+ if (!l->rx.proto->rx_suspend)
+ return 1;
+
+ ret = l->rx.proto->rx_suspend(&l->rx);
+ return ret > 0 ? ret : 0;
+}
+
+
+/* Tries to resume a suspended listener, and returns non-zero on success or
+ * zero on failure. On certain errors, an alert or a warning might be displayed.
+ * It must be called with the listener's lock held. Depending on the listener's
+ * state and protocol, a listen() call might be used to resume operations, or a
+ * call to the receiver's resume() function might be used as well. This is
+ * suitable as a default function for TCP and UDP. This must be called with the
+ * listener's lock held.
+ */
+int default_resume_listener(struct listener *l)
+{
+ int ret = 1;
+
+ if (l->state == LI_ASSIGNED) {
+ char msg[100];
+ int err;
+
+ err = l->rx.proto->listen(l, msg, sizeof(msg));
+ if (err & ERR_ALERT)
+ ha_alert("Resuming listener: %s\n", msg);
+ else if (err & ERR_WARN)
+ ha_warning("Resuming listener: %s\n", msg);
+
+ if (err & (ERR_FATAL | ERR_ABORT)) {
+ ret = 0;
+ goto end;
+ }
+ }
+
+ if (l->state < LI_PAUSED) {
+ ret = 0;
+ goto end;
+ }
+
+ if (l->state == LI_PAUSED && l->rx.proto->rx_resume &&
+ l->rx.proto->rx_resume(&l->rx) <= 0)
+ ret = 0;
+ end:
+ return ret;
+}
+
+
+/* This function tries to temporarily disable a listener, depending on the OS
+ * capabilities. Linux unbinds the listen socket after a SHUT_RD, and ignores
+ * SHUT_WR. Solaris refuses either shutdown(). OpenBSD ignores SHUT_RD but
+ * closes upon SHUT_WR and refuses to rebind. So a common validation path
+ * involves SHUT_WR && listen && SHUT_RD. In case of success, the FD's polling
+ * is disabled. It normally returns non-zero, unless an error is reported.
+ * It will need to operate under the proxy's lock. The caller is
+ * responsible for indicating in lpx whether the proxy locks is
+ * already held (non-zero) or not (zero) so that the function picks it.
+ */
+int pause_listener(struct listener *l, int lpx)
+{
+ struct proxy *px = l->bind_conf->frontend;
+ int ret = 1;
+
+ if (!lpx && px)
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &px->lock);
+
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &l->lock);
+
+ if (l->state <= LI_PAUSED)
+ goto end;
+
+ if (l->rx.proto->suspend)
+ ret = l->rx.proto->suspend(l);
+
+ MT_LIST_DELETE(&l->wait_queue);
+
+ listener_set_state(l, LI_PAUSED);
+
+ if (px && !px->li_ready) {
+ /* PROXY_LOCK is required */
+ proxy_cond_pause(px);
+ ha_warning("Paused %s %s.\n", proxy_cap_str(px->cap), px->id);
+ send_log(px, LOG_WARNING, "Paused %s %s.\n", proxy_cap_str(px->cap), px->id);
+ }
+ end:
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &l->lock);
+
+ if (!lpx && px)
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &px->lock);
+
+ return ret;
+}
+
+/* This function tries to resume a temporarily disabled listener. Paused, full,
+ * limited and disabled listeners are handled, which means that this function
+ * may replace enable_listener(). The resulting state will either be LI_READY
+ * or LI_FULL. 0 is returned in case of failure to resume (eg: dead socket).
+ * Listeners bound to a different process are not woken up unless we're in
+ * foreground mode, and are ignored. If the listener was only in the assigned
+ * state, it's totally rebound. This can happen if a pause() has completely
+ * stopped it. If the resume fails, 0 is returned and an error might be
+ * displayed.
+ * It will need to operate under the proxy's lock. The caller is
+ * responsible for indicating in lpx whether the proxy locks is
+ * already held (non-zero) or not (zero) so that the function picks it.
+ */
+int resume_listener(struct listener *l, int lpx)
+{
+ struct proxy *px = l->bind_conf->frontend;
+ int was_paused = px && px->li_paused;
+ int ret = 1;
+
+ if (!lpx && px)
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &px->lock);
+
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &l->lock);
+
+ /* check that another thread didn't to the job in parallel (e.g. at the
+ * end of listen_accept() while we'd come from dequeue_all_listeners().
+ */
+ if (MT_LIST_INLIST(&l->wait_queue))
+ goto end;
+
+ if (l->state == LI_READY)
+ goto end;
+
+ /* the listener might have been stopped in parallel */
+ if (l->state < LI_PAUSED)
+ goto end;
+
+ if (l->rx.proto->resume)
+ ret = l->rx.proto->resume(l);
+
+ if (l->maxconn && l->nbconn >= l->maxconn) {
+ l->rx.proto->disable(l);
+ listener_set_state(l, LI_FULL);
+ goto done;
+ }
+
+ l->rx.proto->enable(l);
+ listener_set_state(l, LI_READY);
+
+ done:
+ if (was_paused && !px->li_paused) {
+ /* PROXY_LOCK is required */
+ proxy_cond_resume(px);
+ ha_warning("Resumed %s %s.\n", proxy_cap_str(px->cap), px->id);
+ send_log(px, LOG_WARNING, "Resumed %s %s.\n", proxy_cap_str(px->cap), px->id);
+ }
+ end:
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &l->lock);
+
+ if (!lpx && px)
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &px->lock);
+
+ return ret;
+}
+
+/* Marks a ready listener as full so that the stream code tries to re-enable
+ * it upon next close() using resume_listener().
+ */
+static void listener_full(struct listener *l)
+{
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &l->lock);
+ if (l->state >= LI_READY) {
+ MT_LIST_DELETE(&l->wait_queue);
+ if (l->state != LI_FULL) {
+ l->rx.proto->disable(l);
+ listener_set_state(l, LI_FULL);
+ }
+ }
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &l->lock);
+}
+
+/* Marks a ready listener as limited so that we only try to re-enable it when
+ * resources are free again. It will be queued into the specified queue.
+ */
+static void limit_listener(struct listener *l, struct mt_list *list)
+{
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &l->lock);
+ if (l->state == LI_READY) {
+ MT_LIST_TRY_APPEND(list, &l->wait_queue);
+ l->rx.proto->disable(l);
+ listener_set_state(l, LI_LIMITED);
+ }
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &l->lock);
+}
+
+/* Dequeues all listeners waiting for a resource the global wait queue */
+void dequeue_all_listeners()
+{
+ struct listener *listener;
+
+ while ((listener = MT_LIST_POP(&global_listener_queue, struct listener *, wait_queue))) {
+ /* This cannot fail because the listeners are by definition in
+ * the LI_LIMITED state.
+ */
+ resume_listener(listener, 0);
+ }
+}
+
+/* Dequeues all listeners waiting for a resource in proxy <px>'s queue */
+void dequeue_proxy_listeners(struct proxy *px)
+{
+ struct listener *listener;
+
+ while ((listener = MT_LIST_POP(&px->listener_queue, struct listener *, wait_queue))) {
+ /* This cannot fail because the listeners are by definition in
+ * the LI_LIMITED state.
+ */
+ resume_listener(listener, 0);
+ }
+}
+
+
+/* default function used to unbind a listener. This is for use by standard
+ * protocols working on top of accepted sockets. The receiver's rx_unbind()
+ * will automatically be used after the listener is disabled if the socket is
+ * still bound. This must be used under the listener's lock.
+ */
+void default_unbind_listener(struct listener *listener)
+{
+ if (listener->state <= LI_ASSIGNED)
+ goto out_close;
+
+ if (listener->rx.fd == -1) {
+ listener_set_state(listener, LI_ASSIGNED);
+ goto out_close;
+ }
+
+ if (listener->state >= LI_READY) {
+ listener->rx.proto->disable(listener);
+ if (listener->rx.flags & RX_F_BOUND)
+ listener_set_state(listener, LI_LISTEN);
+ }
+
+ out_close:
+ if (listener->rx.flags & RX_F_BOUND)
+ listener->rx.proto->rx_unbind(&listener->rx);
+}
+
+/* This function closes the listening socket for the specified listener,
+ * provided that it's already in a listening state. The protocol's unbind()
+ * is called to put the listener into LI_ASSIGNED or LI_LISTEN and handle
+ * the unbinding tasks. The listener enters then the LI_ASSIGNED state if
+ * the receiver is unbound. Must be called with the lock held.
+ */
+void do_unbind_listener(struct listener *listener)
+{
+ MT_LIST_DELETE(&listener->wait_queue);
+
+ if (listener->rx.proto->unbind)
+ listener->rx.proto->unbind(listener);
+
+ /* we may have to downgrade the listener if the rx was closed */
+ if (!(listener->rx.flags & RX_F_BOUND) && listener->state > LI_ASSIGNED)
+ listener_set_state(listener, LI_ASSIGNED);
+}
+
+/* This function closes the listening socket for the specified listener,
+ * provided that it's already in a listening state. The listener enters the
+ * LI_ASSIGNED state, except if the FD is not closed, in which case it may
+ * remain in LI_LISTEN. This function is intended to be used as a generic
+ * function for standard protocols.
+ */
+void unbind_listener(struct listener *listener)
+{
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &listener->lock);
+ do_unbind_listener(listener);
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &listener->lock);
+}
+
+/* creates one or multiple listeners for bind_conf <bc> on sockaddr <ss> on port
+ * range <portl> to <porth>, and possibly attached to fd <fd> (or -1 for auto
+ * allocation). The address family is taken from ss->ss_family, and the protocol
+ * passed in <proto> must be usable on this family. The protocol's default iocb
+ * is automatically preset as the receivers' iocb. The number of jobs and
+ * listeners is automatically increased by the number of listeners created. It
+ * returns non-zero on success, zero on error with the error message set in <err>.
+ */
+int create_listeners(struct bind_conf *bc, const struct sockaddr_storage *ss,
+ int portl, int porth, int fd, struct protocol *proto, char **err)
+{
+ struct listener *l;
+ int port;
+
+ for (port = portl; port <= porth; port++) {
+ l = calloc(1, sizeof(*l));
+ if (!l) {
+ memprintf(err, "out of memory");
+ return 0;
+ }
+ l->obj_type = OBJ_TYPE_LISTENER;
+ LIST_APPEND(&bc->frontend->conf.listeners, &l->by_fe);
+ LIST_APPEND(&bc->listeners, &l->by_bind);
+ l->bind_conf = bc;
+ l->rx.settings = &bc->settings;
+ l->rx.owner = l;
+ l->rx.iocb = proto->default_iocb;
+ l->rx.fd = fd;
+
+ memcpy(&l->rx.addr, ss, sizeof(*ss));
+ if (proto->fam->set_port)
+ proto->fam->set_port(&l->rx.addr, port);
+
+ MT_LIST_INIT(&l->wait_queue);
+ listener_set_state(l, LI_INIT);
+
+ proto->add(proto, l);
+
+ if (fd != -1)
+ l->rx.flags |= RX_F_INHERITED;
+
+ l->extra_counters = NULL;
+
+ HA_RWLOCK_INIT(&l->lock);
+ _HA_ATOMIC_INC(&jobs);
+ _HA_ATOMIC_INC(&listeners);
+ }
+ return 1;
+}
+
+/* clones listener <src> and returns the new one. All dynamically allocated
+ * fields are reallocated (name for now). The new listener is inserted before
+ * the original one in the bind_conf and frontend lists. This allows it to be
+ * duplicated while iterating over the current list. The original listener must
+ * only be in the INIT or ASSIGNED states, and the new listener will only be
+ * placed into the INIT state. The counters are always set to NULL. Maxsock is
+ * updated. Returns NULL on allocation error.
+ */
+struct listener *clone_listener(struct listener *src)
+{
+ struct listener *l;
+
+ l = calloc(1, sizeof(*l));
+ if (!l)
+ goto oom1;
+ memcpy(l, src, sizeof(*l));
+
+ if (l->name) {
+ l->name = strdup(l->name);
+ if (!l->name)
+ goto oom2;
+ }
+
+ l->rx.owner = l;
+ l->state = LI_INIT;
+ l->counters = NULL;
+ l->extra_counters = NULL;
+
+ LIST_APPEND(&src->by_fe, &l->by_fe);
+ LIST_APPEND(&src->by_bind, &l->by_bind);
+
+ MT_LIST_INIT(&l->wait_queue);
+
+ l->rx.proto->add(l->rx.proto, l);
+
+ HA_RWLOCK_INIT(&l->lock);
+ _HA_ATOMIC_INC(&jobs);
+ _HA_ATOMIC_INC(&listeners);
+ global.maxsock++;
+ return l;
+
+ oom2:
+ free(l);
+ oom1:
+ return NULL;
+}
+
+/* Delete a listener from its protocol's list of listeners. The listener's
+ * state is automatically updated from LI_ASSIGNED to LI_INIT. The protocol's
+ * number of listeners is updated, as well as the global number of listeners
+ * and jobs. Note that the listener must have previously been unbound. This
+ * is a low-level function expected to be called with the proto_lock and the
+ * listener's lock held.
+ */
+void __delete_listener(struct listener *listener)
+{
+ if (listener->state == LI_ASSIGNED) {
+ listener_set_state(listener, LI_INIT);
+ LIST_DELETE(&listener->rx.proto_list);
+ listener->rx.proto->nb_receivers--;
+ _HA_ATOMIC_DEC(&jobs);
+ _HA_ATOMIC_DEC(&listeners);
+ }
+}
+
+/* Delete a listener from its protocol's list of listeners (please check
+ * __delete_listener() above). The proto_lock and the listener's lock will
+ * be grabbed in this order.
+ */
+void delete_listener(struct listener *listener)
+{
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &listener->lock);
+ __delete_listener(listener);
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &listener->lock);
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+}
+
+/* Returns a suitable value for a listener's backlog. It uses the listener's,
+ * otherwise the frontend's backlog, otherwise the listener's maxconn,
+ * otherwise the frontend's maxconn, otherwise 1024.
+ */
+int listener_backlog(const struct listener *l)
+{
+ if (l->backlog)
+ return l->backlog;
+
+ if (l->bind_conf->frontend->backlog)
+ return l->bind_conf->frontend->backlog;
+
+ if (l->maxconn)
+ return l->maxconn;
+
+ if (l->bind_conf->frontend->maxconn)
+ return l->bind_conf->frontend->maxconn;
+
+ return 1024;
+}
+
+/* This function is called on a read event from a listening socket, corresponding
+ * to an accept. It tries to accept as many connections as possible, and for each
+ * calls the listener's accept handler (generally the frontend's accept handler).
+ */
+void listener_accept(struct listener *l)
+{
+ struct connection *cli_conn;
+ struct proxy *p;
+ unsigned int max_accept;
+ int next_conn = 0;
+ int next_feconn = 0;
+ int next_actconn = 0;
+ int expire;
+ int ret;
+
+ p = l->bind_conf->frontend;
+
+ /* if l->maxaccept is -1, then max_accept is UINT_MAX. It is not really
+ * illimited, but it is probably enough.
+ */
+ max_accept = l->maxaccept ? l->maxaccept : 1;
+
+ if (!(l->options & LI_O_UNLIMITED) && global.sps_lim) {
+ int max = freq_ctr_remain(&global.sess_per_sec, global.sps_lim, 0);
+
+ if (unlikely(!max)) {
+ /* frontend accept rate limit was reached */
+ expire = tick_add(now_ms, next_event_delay(&global.sess_per_sec, global.sps_lim, 0));
+ goto limit_global;
+ }
+
+ if (max_accept > max)
+ max_accept = max;
+ }
+
+ if (!(l->options & LI_O_UNLIMITED) && global.cps_lim) {
+ int max = freq_ctr_remain(&global.conn_per_sec, global.cps_lim, 0);
+
+ if (unlikely(!max)) {
+ /* frontend accept rate limit was reached */
+ expire = tick_add(now_ms, next_event_delay(&global.conn_per_sec, global.cps_lim, 0));
+ goto limit_global;
+ }
+
+ if (max_accept > max)
+ max_accept = max;
+ }
+#ifdef USE_OPENSSL
+ if (!(l->options & LI_O_UNLIMITED) && global.ssl_lim &&
+ l->bind_conf && l->bind_conf->options & BC_O_USE_SSL) {
+ int max = freq_ctr_remain(&global.ssl_per_sec, global.ssl_lim, 0);
+
+ if (unlikely(!max)) {
+ /* frontend accept rate limit was reached */
+ expire = tick_add(now_ms, next_event_delay(&global.ssl_per_sec, global.ssl_lim, 0));
+ goto limit_global;
+ }
+
+ if (max_accept > max)
+ max_accept = max;
+ }
+#endif
+ if (p && p->fe_sps_lim) {
+ int max = freq_ctr_remain(&p->fe_sess_per_sec, p->fe_sps_lim, 0);
+
+ if (unlikely(!max)) {
+ /* frontend accept rate limit was reached */
+ expire = tick_add(now_ms, next_event_delay(&p->fe_sess_per_sec, p->fe_sps_lim, 0));
+ goto limit_proxy;
+ }
+
+ if (max_accept > max)
+ max_accept = max;
+ }
+
+ /* Note: if we fail to allocate a connection because of configured
+ * limits, we'll schedule a new attempt worst 1 second later in the
+ * worst case. If we fail due to system limits or temporary resource
+ * shortage, we try again 100ms later in the worst case.
+ */
+ for (; max_accept; next_conn = next_feconn = next_actconn = 0, max_accept--) {
+ unsigned int count;
+ int status;
+ __decl_thread(unsigned long mask);
+
+ /* pre-increase the number of connections without going too far.
+ * We process the listener, then the proxy, then the process.
+ * We know which ones to unroll based on the next_xxx value.
+ */
+ do {
+ count = l->nbconn;
+ if (unlikely(l->maxconn && count >= l->maxconn)) {
+ /* the listener was marked full or another
+ * thread is going to do it.
+ */
+ next_conn = 0;
+ listener_full(l);
+ goto end;
+ }
+ next_conn = count + 1;
+ } while (!_HA_ATOMIC_CAS(&l->nbconn, (int *)(&count), next_conn));
+
+ if (p) {
+ do {
+ count = p->feconn;
+ if (unlikely(count >= p->maxconn)) {
+ /* the frontend was marked full or another
+ * thread is going to do it.
+ */
+ next_feconn = 0;
+ expire = TICK_ETERNITY;
+ goto limit_proxy;
+ }
+ next_feconn = count + 1;
+ } while (!_HA_ATOMIC_CAS(&p->feconn, &count, next_feconn));
+ }
+
+ if (!(l->options & LI_O_UNLIMITED)) {
+ do {
+ count = actconn;
+ if (unlikely(count >= global.maxconn)) {
+ /* the process was marked full or another
+ * thread is going to do it.
+ */
+ next_actconn = 0;
+ expire = tick_add(now_ms, 1000); /* try again in 1 second */
+ goto limit_global;
+ }
+ next_actconn = count + 1;
+ } while (!_HA_ATOMIC_CAS(&actconn, (int *)(&count), next_actconn));
+ }
+
+ /* be careful below, the listener might be shutting down in
+ * another thread on error and we must not dereference its
+ * FD without a bit of protection.
+ */
+ cli_conn = NULL;
+ status = CO_AC_PERMERR;
+
+ HA_RWLOCK_RDLOCK(LISTENER_LOCK, &l->lock);
+ if (l->rx.flags & RX_F_BOUND)
+ cli_conn = l->rx.proto->accept_conn(l, &status);
+ HA_RWLOCK_RDUNLOCK(LISTENER_LOCK, &l->lock);
+
+ if (!cli_conn) {
+ switch (status) {
+ case CO_AC_DONE:
+ goto end;
+
+ case CO_AC_RETRY: /* likely a signal */
+ _HA_ATOMIC_DEC(&l->nbconn);
+ if (p)
+ _HA_ATOMIC_DEC(&p->feconn);
+ if (!(l->options & LI_O_UNLIMITED))
+ _HA_ATOMIC_DEC(&actconn);
+ continue;
+
+ case CO_AC_YIELD:
+ max_accept = 0;
+ goto end;
+
+ default:
+ goto transient_error;
+ }
+ }
+
+ /* The connection was accepted, it must be counted as such */
+ if (l->counters)
+ HA_ATOMIC_UPDATE_MAX(&l->counters->conn_max, next_conn);
+
+ if (p) {
+ HA_ATOMIC_UPDATE_MAX(&p->fe_counters.conn_max, next_feconn);
+ proxy_inc_fe_conn_ctr(l, p);
+ }
+
+ if (!(l->options & LI_O_UNLIMITED)) {
+ count = update_freq_ctr(&global.conn_per_sec, 1);
+ HA_ATOMIC_UPDATE_MAX(&global.cps_max, count);
+ }
+
+ _HA_ATOMIC_INC(&activity[tid].accepted);
+
+ /* past this point, l->accept() will automatically decrement
+ * l->nbconn, feconn and actconn once done. Setting next_*conn=0
+ * allows the error path not to rollback on nbconn. It's more
+ * convenient than duplicating all exit labels.
+ */
+ next_conn = 0;
+ next_feconn = 0;
+ next_actconn = 0;
+
+
+#if defined(USE_THREAD)
+ if (l->rx.flags & RX_F_LOCAL_ACCEPT)
+ goto local_accept;
+
+ mask = thread_mask(l->rx.bind_thread) & all_threads_mask;
+ if (atleast2(mask) && (global.tune.options & GTUNE_LISTENER_MQ) && !stopping) {
+ struct accept_queue_ring *ring;
+ unsigned int t, t0, t1, t2;
+
+ /* The principle is that we have two running indexes,
+ * each visiting in turn all threads bound to this
+ * listener. The connection will be assigned to the one
+ * with the least connections, and the other one will
+ * be updated. This provides a good fairness on short
+ * connections (round robin) and on long ones (conn
+ * count), without ever missing any idle thread.
+ */
+
+ /* keep a copy for the final update. thr_idx is composite
+ * and made of (t2<<16) + t1.
+ */
+ t0 = l->thr_idx;
+ do {
+ unsigned long m1, m2;
+ int q1, q2;
+
+ t2 = t1 = t0;
+ t2 >>= 16;
+ t1 &= 0xFFFF;
+
+ /* t1 walks low to high bits ;
+ * t2 walks high to low.
+ */
+ m1 = mask >> t1;
+ m2 = mask & (t2 ? nbits(t2 + 1) : ~0UL);
+
+ if (unlikely(!(m1 & 1))) {
+ m1 &= ~1UL;
+ if (!m1) {
+ m1 = mask;
+ t1 = 0;
+ }
+ t1 += my_ffsl(m1) - 1;
+ }
+
+ if (unlikely(!(m2 & (1UL << t2)) || t1 == t2)) {
+ /* highest bit not set */
+ if (!m2)
+ m2 = mask;
+
+ t2 = my_flsl(m2) - 1;
+ }
+
+ /* now we have two distinct thread IDs belonging to the mask */
+ q1 = accept_queue_rings[t1].tail - accept_queue_rings[t1].head + ACCEPT_QUEUE_SIZE;
+ if (q1 >= ACCEPT_QUEUE_SIZE)
+ q1 -= ACCEPT_QUEUE_SIZE;
+
+ q2 = accept_queue_rings[t2].tail - accept_queue_rings[t2].head + ACCEPT_QUEUE_SIZE;
+ if (q2 >= ACCEPT_QUEUE_SIZE)
+ q2 -= ACCEPT_QUEUE_SIZE;
+
+ /* we have 3 possibilities now :
+ * q1 < q2 : t1 is less loaded than t2, so we pick it
+ * and update t2 (since t1 might still be
+ * lower than another thread)
+ * q1 > q2 : t2 is less loaded than t1, so we pick it
+ * and update t1 (since t2 might still be
+ * lower than another thread)
+ * q1 = q2 : both are equally loaded, thus we pick t1
+ * and update t1 as it will become more loaded
+ * than t2.
+ */
+
+ q1 += l->thr_conn[t1];
+ q2 += l->thr_conn[t2];
+
+ if (q1 - q2 < 0) {
+ t = t1;
+ t2 = t2 ? t2 - 1 : LONGBITS - 1;
+ }
+ else if (q1 - q2 > 0) {
+ t = t2;
+ t1++;
+ if (t1 >= LONGBITS)
+ t1 = 0;
+ }
+ else {
+ t = t1;
+ t1++;
+ if (t1 >= LONGBITS)
+ t1 = 0;
+ }
+
+ /* new value for thr_idx */
+ t1 += (t2 << 16);
+ } while (unlikely(!_HA_ATOMIC_CAS(&l->thr_idx, &t0, t1)));
+
+ /* We successfully selected the best thread "t" for this
+ * connection. We use deferred accepts even if it's the
+ * local thread because tests show that it's the best
+ * performing model, likely due to better cache locality
+ * when processing this loop.
+ */
+ ring = &accept_queue_rings[t];
+ if (accept_queue_push_mp(ring, cli_conn)) {
+ _HA_ATOMIC_INC(&activity[t].accq_pushed);
+ tasklet_wakeup(ring->tasklet);
+ continue;
+ }
+ /* If the ring is full we do a synchronous accept on
+ * the local thread here.
+ */
+ _HA_ATOMIC_INC(&activity[t].accq_full);
+ }
+#endif // USE_THREAD
+
+ local_accept:
+ _HA_ATOMIC_INC(&l->thr_conn[tid]);
+ ret = l->accept(cli_conn);
+ if (unlikely(ret <= 0)) {
+ /* The connection was closed by stream_accept(). Either
+ * we just have to ignore it (ret == 0) or it's a critical
+ * error due to a resource shortage, and we must stop the
+ * listener (ret < 0).
+ */
+ if (ret == 0) /* successful termination */
+ continue;
+
+ goto transient_error;
+ }
+
+ /* increase the per-process number of cumulated sessions, this
+ * may only be done once l->accept() has accepted the connection.
+ */
+ if (!(l->options & LI_O_UNLIMITED)) {
+ count = update_freq_ctr(&global.sess_per_sec, 1);
+ HA_ATOMIC_UPDATE_MAX(&global.sps_max, count);
+ }
+#ifdef USE_OPENSSL
+ if (!(l->options & LI_O_UNLIMITED) &&
+ l->bind_conf && l->bind_conf->options & BC_O_USE_SSL) {
+ count = update_freq_ctr(&global.ssl_per_sec, 1);
+ HA_ATOMIC_UPDATE_MAX(&global.ssl_max, count);
+ }
+#endif
+
+ th_ctx->flags &= ~TH_FL_STUCK; // this thread is still running
+ } /* end of for (max_accept--) */
+
+ end:
+ if (next_conn)
+ _HA_ATOMIC_DEC(&l->nbconn);
+
+ if (p && next_feconn)
+ _HA_ATOMIC_DEC(&p->feconn);
+
+ if (next_actconn)
+ _HA_ATOMIC_DEC(&actconn);
+
+ if ((l->state == LI_FULL && (!l->maxconn || l->nbconn < l->maxconn)) ||
+ (l->state == LI_LIMITED &&
+ ((!p || p->feconn < p->maxconn) && (actconn < global.maxconn) &&
+ (!tick_isset(global_listener_queue_task->expire) ||
+ tick_is_expired(global_listener_queue_task->expire, now_ms))))) {
+ /* at least one thread has to this when quitting */
+ resume_listener(l, 0);
+
+ /* Dequeues all of the listeners waiting for a resource */
+ dequeue_all_listeners();
+
+ if (p && !MT_LIST_ISEMPTY(&p->listener_queue) &&
+ (!p->fe_sps_lim || freq_ctr_remain(&p->fe_sess_per_sec, p->fe_sps_lim, 0) > 0))
+ dequeue_proxy_listeners(p);
+ }
+ return;
+
+ transient_error:
+ /* pause the listener for up to 100 ms */
+ expire = tick_add(now_ms, 100);
+
+ /* This may be a shared socket that was paused by another process.
+ * Let's put it to pause in this case.
+ */
+ if (l->rx.proto && l->rx.proto->rx_listening(&l->rx) == 0) {
+ pause_listener(l, 0);
+ goto end;
+ }
+
+ limit_global:
+ /* (re-)queue the listener to the global queue and set it to expire no
+ * later than <expire> ahead. The listener turns to LI_LIMITED.
+ */
+ limit_listener(l, &global_listener_queue);
+ HA_RWLOCK_RDLOCK(LISTENER_LOCK, &global_listener_rwlock);
+ task_schedule(global_listener_queue_task, expire);
+ HA_RWLOCK_RDUNLOCK(LISTENER_LOCK, &global_listener_rwlock);
+ goto end;
+
+ limit_proxy:
+ /* (re-)queue the listener to the proxy's queue and set it to expire no
+ * later than <expire> ahead. The listener turns to LI_LIMITED.
+ */
+ limit_listener(l, &p->listener_queue);
+ if (p->task && tick_isset(expire))
+ task_schedule(p->task, expire);
+ goto end;
+}
+
+/* Notify the listener that a connection initiated from it was released. This
+ * is used to keep the connection count consistent and to possibly re-open
+ * listening when it was limited.
+ */
+void listener_release(struct listener *l)
+{
+ struct proxy *fe = l->bind_conf->frontend;
+
+ if (!(l->options & LI_O_UNLIMITED))
+ _HA_ATOMIC_DEC(&actconn);
+ if (fe)
+ _HA_ATOMIC_DEC(&fe->feconn);
+ _HA_ATOMIC_DEC(&l->nbconn);
+ _HA_ATOMIC_DEC(&l->thr_conn[tid]);
+
+ if (l->state == LI_FULL || l->state == LI_LIMITED)
+ resume_listener(l, 0);
+
+ /* Dequeues all of the listeners waiting for a resource */
+ dequeue_all_listeners();
+
+ if (fe && !MT_LIST_ISEMPTY(&fe->listener_queue) &&
+ (!fe->fe_sps_lim || freq_ctr_remain(&fe->fe_sess_per_sec, fe->fe_sps_lim, 0) > 0))
+ dequeue_proxy_listeners(fe);
+}
+
+/* Initializes the listener queues. Returns 0 on success, otherwise ERR_* flags */
+static int listener_queue_init()
+{
+ global_listener_queue_task = task_new_anywhere();
+ if (!global_listener_queue_task) {
+ ha_alert("Out of memory when initializing global listener queue\n");
+ return ERR_FATAL|ERR_ABORT;
+ }
+ /* very simple initialization, users will queue the task if needed */
+ global_listener_queue_task->context = NULL; /* not even a context! */
+ global_listener_queue_task->process = manage_global_listener_queue;
+ HA_RWLOCK_INIT(&global_listener_rwlock);
+
+ return 0;
+}
+
+static void listener_queue_deinit()
+{
+ task_destroy(global_listener_queue_task);
+ global_listener_queue_task = NULL;
+}
+
+REGISTER_CONFIG_POSTPARSER("multi-threaded listener queue", listener_queue_init);
+REGISTER_POST_DEINIT(listener_queue_deinit);
+
+
+/* This is the global management task for listeners. It enables listeners waiting
+ * for global resources when there are enough free resource, or at least once in
+ * a while. It is designed to be called as a task. It's exported so that it's easy
+ * to spot in "show tasks" or "show profiling".
+ */
+struct task *manage_global_listener_queue(struct task *t, void *context, unsigned int state)
+{
+ /* If there are still too many concurrent connections, let's wait for
+ * some of them to go away. We don't need to re-arm the timer because
+ * each of them will scan the queue anyway.
+ */
+ if (unlikely(actconn >= global.maxconn))
+ goto out;
+
+ /* We should periodically try to enable listeners waiting for a global
+ * resource here, because it is possible, though very unlikely, that
+ * they have been blocked by a temporary lack of global resource such
+ * as a file descriptor or memory and that the temporary condition has
+ * disappeared.
+ */
+ dequeue_all_listeners();
+
+ out:
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &global_listener_rwlock);
+ t->expire = TICK_ETERNITY;
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &global_listener_rwlock);
+ task_queue(t);
+ return t;
+}
+
+/*
+ * Registers the bind keyword list <kwl> as a list of valid keywords for next
+ * parsing sessions.
+ */
+void bind_register_keywords(struct bind_kw_list *kwl)
+{
+ LIST_APPEND(&bind_keywords.list, &kwl->list);
+}
+
+/* Return a pointer to the bind keyword <kw>, or NULL if not found. If the
+ * keyword is found with a NULL ->parse() function, then an attempt is made to
+ * find one with a valid ->parse() function. This way it is possible to declare
+ * platform-dependant, known keywords as NULL, then only declare them as valid
+ * if some options are met. Note that if the requested keyword contains an
+ * opening parenthesis, everything from this point is ignored.
+ */
+struct bind_kw *bind_find_kw(const char *kw)
+{
+ int index;
+ const char *kwend;
+ struct bind_kw_list *kwl;
+ struct bind_kw *ret = NULL;
+
+ kwend = strchr(kw, '(');
+ if (!kwend)
+ kwend = kw + strlen(kw);
+
+ list_for_each_entry(kwl, &bind_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if ((strncmp(kwl->kw[index].kw, kw, kwend - kw) == 0) &&
+ kwl->kw[index].kw[kwend-kw] == 0) {
+ if (kwl->kw[index].parse)
+ return &kwl->kw[index]; /* found it !*/
+ else
+ ret = &kwl->kw[index]; /* may be OK */
+ }
+ }
+ }
+ return ret;
+}
+
+/* Dumps all registered "bind" keywords to the <out> string pointer. The
+ * unsupported keywords are only dumped if their supported form was not
+ * found.
+ */
+void bind_dump_kws(char **out)
+{
+ struct bind_kw_list *kwl;
+ int index;
+
+ if (!out)
+ return;
+
+ *out = NULL;
+ list_for_each_entry(kwl, &bind_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (kwl->kw[index].parse ||
+ bind_find_kw(kwl->kw[index].kw) == &kwl->kw[index]) {
+ memprintf(out, "%s[%4s] %s%s%s\n", *out ? *out : "",
+ kwl->scope,
+ kwl->kw[index].kw,
+ kwl->kw[index].skip ? " <arg>" : "",
+ kwl->kw[index].parse ? "" : " (not supported)");
+ }
+ }
+ }
+}
+
+/* Try to find in srv_keyword the word that looks closest to <word> by counting
+ * transitions between letters, digits and other characters. Will return the
+ * best matching word if found, otherwise NULL.
+ */
+const char *bind_find_best_kw(const char *word)
+{
+ uint8_t word_sig[1024];
+ uint8_t list_sig[1024];
+ const struct bind_kw_list *kwl;
+ const char *best_ptr = NULL;
+ int dist, best_dist = INT_MAX;
+ int index;
+
+ make_word_fingerprint(word_sig, word);
+ list_for_each_entry(kwl, &bind_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ make_word_fingerprint(list_sig, kwl->kw[index].kw);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = kwl->kw[index].kw;
+ }
+ }
+ }
+
+ if (best_dist > 2 * strlen(word) || (best_ptr && best_dist > 2 * strlen(best_ptr)))
+ best_ptr = NULL;
+
+ return best_ptr;
+}
+
+/* allocate an bind_conf struct for a bind line, and chain it to the frontend <fe>.
+ * If <arg> is not NULL, it is duplicated into ->arg to store useful config
+ * information for error reporting. NULL is returned on error.
+ */
+struct bind_conf *bind_conf_alloc(struct proxy *fe, const char *file,
+ int line, const char *arg, struct xprt_ops *xprt)
+{
+ struct bind_conf *bind_conf = calloc(1, sizeof(*bind_conf));
+
+ if (!bind_conf)
+ goto err;
+
+ bind_conf->file = strdup(file);
+ if (!bind_conf->file)
+ goto err;
+ bind_conf->line = line;
+ if (arg) {
+ bind_conf->arg = strdup(arg);
+ if (!bind_conf->arg)
+ goto err;
+ }
+
+ LIST_APPEND(&fe->conf.bind, &bind_conf->by_fe);
+ bind_conf->settings.ux.uid = -1;
+ bind_conf->settings.ux.gid = -1;
+ bind_conf->settings.ux.mode = 0;
+ bind_conf->settings.shards = 1;
+ bind_conf->xprt = xprt;
+ bind_conf->frontend = fe;
+ bind_conf->severity_output = CLI_SEVERITY_NONE;
+#ifdef USE_OPENSSL
+ HA_RWLOCK_INIT(&bind_conf->sni_lock);
+ bind_conf->sni_ctx = EB_ROOT;
+ bind_conf->sni_w_ctx = EB_ROOT;
+#endif
+ LIST_INIT(&bind_conf->listeners);
+ return bind_conf;
+
+ err:
+ if (bind_conf) {
+ ha_free(&bind_conf->file);
+ ha_free(&bind_conf->arg);
+ }
+ ha_free(&bind_conf);
+ return NULL;
+}
+
+const char *listener_state_str(const struct listener *l)
+{
+ static const char *states[8] = {
+ "NEW", "INI", "ASS", "PAU", "LIS", "RDY", "FUL", "LIM",
+ };
+ unsigned int st = l->state;
+
+ if (st >= sizeof(states) / sizeof(*states))
+ return "INVALID";
+ return states[st];
+}
+
+/************************************************************************/
+/* All supported sample and ACL keywords must be declared here. */
+/************************************************************************/
+
+/* set temp integer to the number of connexions to the same listening socket */
+static int
+smp_fetch_dconn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->sess->listener->nbconn;
+ return 1;
+}
+
+/* set temp integer to the id of the socket (listener) */
+static int
+smp_fetch_so_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->sess->listener->luid;
+ return 1;
+}
+static int
+smp_fetch_so_name(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.u.str.area = smp->sess->listener->name;
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+ return 1;
+}
+
+/* parse the "accept-proxy" bind keyword */
+static int bind_parse_accept_proxy(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct listener *l;
+
+ list_for_each_entry(l, &conf->listeners, by_bind)
+ l->options |= LI_O_ACC_PROXY;
+
+ return 0;
+}
+
+/* parse the "accept-netscaler-cip" bind keyword */
+static int bind_parse_accept_netscaler_cip(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct listener *l;
+ uint32_t val;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ val = atol(args[cur_arg + 1]);
+ if (val <= 0) {
+ memprintf(err, "'%s' : invalid value %d, must be >= 0", args[cur_arg], val);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ list_for_each_entry(l, &conf->listeners, by_bind) {
+ l->options |= LI_O_ACC_CIP;
+ conf->ns_cip_magic = val;
+ }
+
+ return 0;
+}
+
+/* parse the "backlog" bind keyword */
+static int bind_parse_backlog(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct listener *l;
+ int val;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ val = atol(args[cur_arg + 1]);
+ if (val < 0) {
+ memprintf(err, "'%s' : invalid value %d, must be > 0", args[cur_arg], val);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ list_for_each_entry(l, &conf->listeners, by_bind)
+ l->backlog = val;
+
+ return 0;
+}
+
+/* parse the "id" bind keyword */
+static int bind_parse_id(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct eb32_node *node;
+ struct listener *l, *new;
+ char *error;
+
+ if (conf->listeners.n != conf->listeners.p) {
+ memprintf(err, "'%s' can only be used with a single socket", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : expects an integer argument", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ new = LIST_NEXT(&conf->listeners, struct listener *, by_bind);
+ new->luid = strtol(args[cur_arg + 1], &error, 10);
+ if (*error != '\0') {
+ memprintf(err, "'%s' : expects an integer argument, found '%s'", args[cur_arg], args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ new->conf.id.key = new->luid;
+
+ if (new->luid <= 0) {
+ memprintf(err, "'%s' : custom id has to be > 0", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ node = eb32_lookup(&px->conf.used_listener_id, new->luid);
+ if (node) {
+ l = container_of(node, struct listener, conf.id);
+ memprintf(err, "'%s' : custom id %d already used at %s:%d ('bind %s')",
+ args[cur_arg], l->luid, l->bind_conf->file, l->bind_conf->line,
+ l->bind_conf->arg);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ eb32_insert(&px->conf.used_listener_id, &new->conf.id);
+ return 0;
+}
+
+/* Complete a bind_conf by parsing the args after the address. <args> is the
+ * arguments array, <cur_arg> is the first one to be considered. <section> is
+ * the section name to report in error messages, and <file> and <linenum> are
+ * the file name and line number respectively. Note that args[0..1] are used
+ * in error messages to provide some context. The return value is an error
+ * code, zero on success or an OR of ERR_{FATAL,ABORT,ALERT,WARN}.
+ */
+int bind_parse_args_list(struct bind_conf *bind_conf, char **args, int cur_arg, const char *section, const char *file, int linenum)
+{
+ int err_code = 0;
+
+ while (*(args[cur_arg])) {
+ struct bind_kw *kw;
+ const char *best;
+
+ kw = bind_find_kw(args[cur_arg]);
+ if (kw) {
+ char *err = NULL;
+ int code;
+
+ if (!kw->parse) {
+ ha_alert("parsing [%s:%d] : '%s %s' in section '%s' : '%s' option is not implemented in this version (check build options).\n",
+ file, linenum, args[0], args[1], section, args[cur_arg]);
+ cur_arg += 1 + kw->skip ;
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ code = kw->parse(args, cur_arg, bind_conf->frontend, bind_conf, &err);
+ err_code |= code;
+
+ if (code) {
+ if (err && *err) {
+ indent_msg(&err, 2);
+ if (((code & (ERR_WARN|ERR_ALERT)) == ERR_WARN))
+ ha_warning("parsing [%s:%d] : '%s %s' in section '%s' : %s\n", file, linenum, args[0], args[1], section, err);
+ else
+ ha_alert("parsing [%s:%d] : '%s %s' in section '%s' : %s\n", file, linenum, args[0], args[1], section, err);
+ }
+ else
+ ha_alert("parsing [%s:%d] : '%s %s' in section '%s' : error encountered while processing '%s'.\n",
+ file, linenum, args[0], args[1], section, args[cur_arg]);
+ if (code & ERR_FATAL) {
+ free(err);
+ cur_arg += 1 + kw->skip;
+ goto out;
+ }
+ }
+ free(err);
+ cur_arg += 1 + kw->skip;
+ continue;
+ }
+
+ best = bind_find_best_kw(args[cur_arg]);
+ if (best)
+ ha_alert("parsing [%s:%d] : '%s %s' in section '%s': unknown keyword '%s'; did you mean '%s' maybe ?\n",
+ file, linenum, args[0], args[1], section, args[cur_arg], best);
+ else
+ ha_alert("parsing [%s:%d] : '%s %s' in section '%s': unknown keyword '%s'.\n",
+ file, linenum, args[0], args[1], section, args[cur_arg]);
+
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((bind_conf->options & (BC_O_USE_SOCK_DGRAM|BC_O_USE_SOCK_STREAM)) == (BC_O_USE_SOCK_DGRAM|BC_O_USE_SOCK_STREAM) ||
+ (bind_conf->options & (BC_O_USE_XPRT_DGRAM|BC_O_USE_XPRT_STREAM)) == (BC_O_USE_XPRT_DGRAM|BC_O_USE_XPRT_STREAM)) {
+ ha_alert("parsing [%s:%d] : '%s %s' in section '%s' : cannot mix datagram and stream protocols.\n",
+ file, linenum, args[0], args[1], section);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* The transport layer automatically switches to QUIC when QUIC is
+ * selected, regardless of bind_conf settings. We then need to
+ * initialize QUIC params.
+ */
+ if ((bind_conf->options & (BC_O_USE_SOCK_DGRAM|BC_O_USE_XPRT_STREAM)) == (BC_O_USE_SOCK_DGRAM|BC_O_USE_XPRT_STREAM)) {
+#ifdef USE_QUIC
+ bind_conf->xprt = xprt_get(XPRT_QUIC);
+ if (!(bind_conf->options & BC_O_USE_SSL)) {
+ bind_conf->options |= BC_O_USE_SSL;
+ ha_warning("parsing [%s:%d] : '%s %s' in section '%s' : QUIC protocol detected, enabling ssl. Use 'ssl' to shut this warning.\n",
+ file, linenum, args[0], args[1], section);
+ }
+ quic_transport_params_init(&bind_conf->quic_params, 1);
+#else
+ ha_alert("parsing [%s:%d] : '%s %s' in section '%s' : QUIC protocol selected but support not compiled in (check build options).\n",
+ file, linenum, args[0], args[1], section);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+#endif
+ }
+ else if (bind_conf->options & BC_O_USE_SSL) {
+ bind_conf->xprt = xprt_get(XPRT_SSL);
+ }
+
+ out:
+ return err_code;
+}
+
+/* parse the "maxconn" bind keyword */
+static int bind_parse_maxconn(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct listener *l;
+ int val;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ val = atol(args[cur_arg + 1]);
+ if (val < 0) {
+ memprintf(err, "'%s' : invalid value %d, must be >= 0", args[cur_arg], val);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ list_for_each_entry(l, &conf->listeners, by_bind)
+ l->maxconn = val;
+
+ return 0;
+}
+
+/* parse the "name" bind keyword */
+static int bind_parse_name(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct listener *l;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing name", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ list_for_each_entry(l, &conf->listeners, by_bind)
+ l->name = strdup(args[cur_arg + 1]);
+
+ return 0;
+}
+
+/* parse the "nice" bind keyword */
+static int bind_parse_nice(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct listener *l;
+ int val;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ val = atol(args[cur_arg + 1]);
+ if (val < -1024 || val > 1024) {
+ memprintf(err, "'%s' : invalid value %d, allowed range is -1024..1024", args[cur_arg], val);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ list_for_each_entry(l, &conf->listeners, by_bind)
+ l->nice = val;
+
+ return 0;
+}
+
+/* parse the "process" bind keyword */
+static int bind_parse_process(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ char *slash;
+ unsigned long proc = 0, thread = 0;
+
+ if ((slash = strchr(args[cur_arg + 1], '/')) != NULL)
+ *slash = 0;
+
+ if (parse_process_number(args[cur_arg + 1], &proc, 1, NULL, err)) {
+ memprintf(err, "'%s' : %s", args[cur_arg], *err);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (slash) {
+ if (parse_process_number(slash+1, &thread, MAX_THREADS, NULL, err)) {
+ memprintf(err, "'%s' : %s", args[cur_arg], *err);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ *slash = '/';
+ }
+
+ conf->bind_thread |= thread;
+
+ memprintf(err, "'process %s' on 'bind' lines is deprecated and will be removed in 2.7.", args[cur_arg+1]);
+ if (slash)
+ memprintf(err, "%s Please use 'thread %s' instead.", *err, slash + 1);
+ return ERR_WARN;
+}
+
+/* parse the "proto" bind keyword */
+static int bind_parse_proto(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct ist proto;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ proto = ist(args[cur_arg + 1]);
+ conf->mux_proto = get_mux_proto(proto);
+ if (!conf->mux_proto) {
+ memprintf(err, "'%s' : unknown MUX protocol '%s'", args[cur_arg], args[cur_arg+1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+}
+
+/* parse the "shards" bind keyword. Takes an integer or "by-thread" */
+static int bind_parse_shards(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ int val;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(args[cur_arg + 1], "by-thread") == 0) {
+ val = MAX_THREADS; /* will be trimmed later anyway */
+ } else {
+ val = atol(args[cur_arg + 1]);
+ if (val < 1 || val > MAX_THREADS) {
+ memprintf(err, "'%s' : invalid value %d, allowed range is %d..%d or 'by-thread'", args[cur_arg], val, 1, MAX_THREADS);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ conf->settings.shards = val;
+ return 0;
+}
+
+/* parse the "thread" bind keyword */
+static int bind_parse_thread(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ char *sep = NULL;
+ ulong thread = 0;
+ long tgroup = 0;
+
+ tgroup = strtol(args[cur_arg + 1], &sep, 10);
+ if (*sep == '/') {
+ /* a thread group was present */
+ if (tgroup < 1 || tgroup > MAX_TGROUPS) {
+ memprintf(err, "'%s' thread-group number must be between 1 and %d (was %ld)", args[cur_arg + 1], MAX_TGROUPS, tgroup);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ sep++;
+ }
+ else {
+ /* no thread group */
+ tgroup = 0;
+ sep = args[cur_arg + 1];
+ }
+
+ if ((conf->bind_tgroup || conf->bind_thread) &&
+ conf->bind_tgroup != tgroup) {
+ memprintf(err, "'%s' multiple thread-groups are not supported", args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (parse_process_number(sep, &thread, MAX_THREADS, NULL, err)) {
+ memprintf(err, "'%s' : %s", sep, *err);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->bind_thread |= thread;
+ conf->bind_tgroup = tgroup;
+ return 0;
+}
+
+/* config parser for global "tune.listener.multi-queue", accepts "on" or "off" */
+static int cfg_parse_tune_listener_mq(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ global.tune.options |= GTUNE_LISTENER_MQ;
+ else if (strcmp(args[1], "off") == 0)
+ global.tune.options &= ~GTUNE_LISTENER_MQ;
+ else {
+ memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { "dst_conn", smp_fetch_dconn, 0, NULL, SMP_T_SINT, SMP_USE_FTEND, },
+ { "so_id", smp_fetch_so_id, 0, NULL, SMP_T_SINT, SMP_USE_FTEND, },
+ { "so_name", smp_fetch_so_name, 0, NULL, SMP_T_STR, SMP_USE_FTEND, },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct acl_kw_list acl_kws = {ILH, {
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted, doing so helps
+ * all code contributors.
+ * Optional keywords are also declared with a NULL ->parse() function so that
+ * the config parser can report an appropriate error when a known keyword was
+ * not enabled.
+ */
+static struct bind_kw_list bind_kws = { "ALL", { }, {
+ { "accept-netscaler-cip", bind_parse_accept_netscaler_cip, 1 }, /* enable NetScaler Client IP insertion protocol */
+ { "accept-proxy", bind_parse_accept_proxy, 0 }, /* enable PROXY protocol */
+ { "backlog", bind_parse_backlog, 1 }, /* set backlog of listening socket */
+ { "id", bind_parse_id, 1 }, /* set id of listening socket */
+ { "maxconn", bind_parse_maxconn, 1 }, /* set maxconn of listening socket */
+ { "name", bind_parse_name, 1 }, /* set name of listening socket */
+ { "nice", bind_parse_nice, 1 }, /* set nice of listening socket */
+ { "process", bind_parse_process, 1 }, /* set list of allowed process for this socket */
+ { "proto", bind_parse_proto, 1 }, /* set the proto to use for all incoming connections */
+ { "shards", bind_parse_shards, 1 }, /* set number of shards */
+ { "thread", bind_parse_thread, 1 }, /* set list of allowed threads for this socket */
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws);
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.listener.multi-queue", cfg_parse_tune_listener_mq },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/log.c b/src/log.c
new file mode 100644
index 0000000..fd1c0ea
--- /dev/null
+++ b/src/log.c
@@ -0,0 +1,3968 @@
+/*
+ * General logging functions.
+ *
+ * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+#include <time.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <sys/time.h>
+#include <sys/uio.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/sink.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+
+/* global recv logs counter */
+int cum_log_messages;
+
+/* log forward proxy list */
+struct proxy *cfg_log_forward;
+
+struct log_fmt_st {
+ char *name;
+};
+
+static const struct log_fmt_st log_formats[LOG_FORMATS] = {
+ [LOG_FORMAT_LOCAL] = {
+ .name = "local",
+ },
+ [LOG_FORMAT_RFC3164] = {
+ .name = "rfc3164",
+ },
+ [LOG_FORMAT_RFC5424] = {
+ .name = "rfc5424",
+ },
+ [LOG_FORMAT_PRIO] = {
+ .name = "priority",
+ },
+ [LOG_FORMAT_SHORT] = {
+ .name = "short",
+ },
+ [LOG_FORMAT_TIMED] = {
+ .name = "timed",
+ },
+ [LOG_FORMAT_ISO] = {
+ .name = "iso",
+ },
+ [LOG_FORMAT_RAW] = {
+ .name = "raw",
+ },
+};
+
+/*
+ * This map is used with all the FD_* macros to check whether a particular bit
+ * is set or not. Each bit represents an ACSII code. ha_bit_set() sets those
+ * bytes which should be escaped. When ha_bit_test() returns non-zero, it means
+ * that the byte should be escaped. Be careful to always pass bytes from 0 to
+ * 255 exclusively to the macros.
+ */
+long rfc5424_escape_map[(256/8) / sizeof(long)];
+long hdr_encode_map[(256/8) / sizeof(long)];
+long url_encode_map[(256/8) / sizeof(long)];
+long http_encode_map[(256/8) / sizeof(long)];
+
+
+const char *log_facilities[NB_LOG_FACILITIES] = {
+ "kern", "user", "mail", "daemon",
+ "auth", "syslog", "lpr", "news",
+ "uucp", "cron", "auth2", "ftp",
+ "ntp", "audit", "alert", "cron2",
+ "local0", "local1", "local2", "local3",
+ "local4", "local5", "local6", "local7"
+};
+
+const char *log_levels[NB_LOG_LEVELS] = {
+ "emerg", "alert", "crit", "err",
+ "warning", "notice", "info", "debug"
+};
+
+const char sess_term_cond[16] = "-LcCsSPRIDKUIIII"; /* normal, Local, CliTo, CliErr, SrvTo, SrvErr, PxErr, Resource, Internal, Down, Killed, Up, -- */
+const char sess_fin_state[8] = "-RCHDLQT"; /* cliRequest, srvConnect, srvHeader, Data, Last, Queue, Tarpit */
+
+
+/* log_format */
+struct logformat_type {
+ char *name;
+ int type;
+ int mode;
+ int lw; /* logwait bitsfield */
+ int (*config_callback)(struct logformat_node *node, struct proxy *curproxy);
+};
+
+int prepare_addrsource(struct logformat_node *node, struct proxy *curproxy);
+
+/* log_format variable names */
+static const struct logformat_type logformat_keywords[] = {
+ { "o", LOG_FMT_GLOBAL, PR_MODE_TCP, 0, NULL }, /* global option */
+
+ /* please keep these lines sorted ! */
+ { "B", LOG_FMT_BYTES, PR_MODE_TCP, LW_BYTES, NULL }, /* bytes from server to client */
+ { "CC", LOG_FMT_CCLIENT, PR_MODE_HTTP, LW_REQHDR, NULL }, /* client cookie */
+ { "CS", LOG_FMT_CSERVER, PR_MODE_HTTP, LW_RSPHDR, NULL }, /* server cookie */
+ { "H", LOG_FMT_HOSTNAME, PR_MODE_TCP, LW_INIT, NULL }, /* Hostname */
+ { "ID", LOG_FMT_UNIQUEID, PR_MODE_TCP, LW_BYTES, NULL }, /* Unique ID */
+ { "ST", LOG_FMT_STATUS, PR_MODE_TCP, LW_RESP, NULL }, /* status code */
+ { "T", LOG_FMT_DATEGMT, PR_MODE_TCP, LW_INIT, NULL }, /* date GMT */
+ { "Ta", LOG_FMT_Ta, PR_MODE_HTTP, LW_BYTES, NULL }, /* Time active (tr to end) */
+ { "Tc", LOG_FMT_TC, PR_MODE_TCP, LW_BYTES, NULL }, /* Tc */
+ { "Th", LOG_FMT_Th, PR_MODE_TCP, LW_BYTES, NULL }, /* Time handshake */
+ { "Ti", LOG_FMT_Ti, PR_MODE_HTTP, LW_BYTES, NULL }, /* Time idle */
+ { "Tl", LOG_FMT_DATELOCAL, PR_MODE_TCP, LW_INIT, NULL }, /* date local timezone */
+ { "Tq", LOG_FMT_TQ, PR_MODE_HTTP, LW_BYTES, NULL }, /* Tq=Th+Ti+TR */
+ { "Tr", LOG_FMT_Tr, PR_MODE_HTTP, LW_BYTES, NULL }, /* Tr */
+ { "TR", LOG_FMT_TR, PR_MODE_HTTP, LW_BYTES, NULL }, /* Time to receive a valid request */
+ { "Td", LOG_FMT_TD, PR_MODE_TCP, LW_BYTES, NULL }, /* Td = Tt - (Tq + Tw + Tc + Tr) */
+ { "Ts", LOG_FMT_TS, PR_MODE_TCP, LW_INIT, NULL }, /* timestamp GMT */
+ { "Tt", LOG_FMT_TT, PR_MODE_TCP, LW_BYTES, NULL }, /* Tt */
+ { "Tu", LOG_FMT_TU, PR_MODE_TCP, LW_BYTES, NULL }, /* Tu = Tt -Ti */
+ { "Tw", LOG_FMT_TW, PR_MODE_TCP, LW_BYTES, NULL }, /* Tw */
+ { "U", LOG_FMT_BYTES_UP, PR_MODE_TCP, LW_BYTES, NULL }, /* bytes from client to server */
+ { "ac", LOG_FMT_ACTCONN, PR_MODE_TCP, LW_BYTES, NULL }, /* actconn */
+ { "b", LOG_FMT_BACKEND, PR_MODE_TCP, LW_INIT, NULL }, /* backend */
+ { "bc", LOG_FMT_BECONN, PR_MODE_TCP, LW_BYTES, NULL }, /* beconn */
+ { "bi", LOG_FMT_BACKENDIP, PR_MODE_TCP, LW_BCKIP, prepare_addrsource }, /* backend source ip */
+ { "bp", LOG_FMT_BACKENDPORT, PR_MODE_TCP, LW_BCKIP, prepare_addrsource }, /* backend source port */
+ { "bq", LOG_FMT_BCKQUEUE, PR_MODE_TCP, LW_BYTES, NULL }, /* backend_queue */
+ { "ci", LOG_FMT_CLIENTIP, PR_MODE_TCP, LW_CLIP | LW_XPRT, NULL }, /* client ip */
+ { "cp", LOG_FMT_CLIENTPORT, PR_MODE_TCP, LW_CLIP | LW_XPRT, NULL }, /* client port */
+ { "f", LOG_FMT_FRONTEND, PR_MODE_TCP, LW_INIT, NULL }, /* frontend */
+ { "fc", LOG_FMT_FECONN, PR_MODE_TCP, LW_BYTES, NULL }, /* feconn */
+ { "fi", LOG_FMT_FRONTENDIP, PR_MODE_TCP, LW_FRTIP | LW_XPRT, NULL }, /* frontend ip */
+ { "fp", LOG_FMT_FRONTENDPORT, PR_MODE_TCP, LW_FRTIP | LW_XPRT, NULL }, /* frontend port */
+ { "ft", LOG_FMT_FRONTEND_XPRT, PR_MODE_TCP, LW_INIT, NULL }, /* frontend with transport mode */
+ { "hr", LOG_FMT_HDRREQUEST, PR_MODE_TCP, LW_REQHDR, NULL }, /* header request */
+ { "hrl", LOG_FMT_HDRREQUESTLIST, PR_MODE_TCP, LW_REQHDR, NULL }, /* header request list */
+ { "hs", LOG_FMT_HDRRESPONS, PR_MODE_TCP, LW_RSPHDR, NULL }, /* header response */
+ { "hsl", LOG_FMT_HDRRESPONSLIST, PR_MODE_TCP, LW_RSPHDR, NULL }, /* header response list */
+ { "HM", LOG_FMT_HTTP_METHOD, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP method */
+ { "HP", LOG_FMT_HTTP_PATH, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP relative or absolute path */
+ { "HPO", LOG_FMT_HTTP_PATH_ONLY, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP path only (without host nor query string) */
+ { "HQ", LOG_FMT_HTTP_QUERY, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP query */
+ { "HU", LOG_FMT_HTTP_URI, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP full URI */
+ { "HV", LOG_FMT_HTTP_VERSION, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP version */
+ { "lc", LOG_FMT_LOGCNT, PR_MODE_TCP, LW_INIT, NULL }, /* log counter */
+ { "ms", LOG_FMT_MS, PR_MODE_TCP, LW_INIT, NULL }, /* accept date millisecond */
+ { "pid", LOG_FMT_PID, PR_MODE_TCP, LW_INIT, NULL }, /* log pid */
+ { "r", LOG_FMT_REQ, PR_MODE_HTTP, LW_REQ, NULL }, /* request */
+ { "rc", LOG_FMT_RETRIES, PR_MODE_TCP, LW_BYTES, NULL }, /* retries */
+ { "rt", LOG_FMT_COUNTER, PR_MODE_TCP, LW_REQ, NULL }, /* request counter (HTTP or TCP session) */
+ { "s", LOG_FMT_SERVER, PR_MODE_TCP, LW_SVID, NULL }, /* server */
+ { "sc", LOG_FMT_SRVCONN, PR_MODE_TCP, LW_BYTES, NULL }, /* srv_conn */
+ { "si", LOG_FMT_SERVERIP, PR_MODE_TCP, LW_SVIP, NULL }, /* server destination ip */
+ { "sp", LOG_FMT_SERVERPORT, PR_MODE_TCP, LW_SVIP, NULL }, /* server destination port */
+ { "sq", LOG_FMT_SRVQUEUE, PR_MODE_TCP, LW_BYTES, NULL }, /* srv_queue */
+ { "sslc", LOG_FMT_SSL_CIPHER, PR_MODE_TCP, LW_XPRT, NULL }, /* client-side SSL ciphers */
+ { "sslv", LOG_FMT_SSL_VERSION, PR_MODE_TCP, LW_XPRT, NULL }, /* client-side SSL protocol version */
+ { "t", LOG_FMT_DATE, PR_MODE_TCP, LW_INIT, NULL }, /* date */
+ { "tr", LOG_FMT_tr, PR_MODE_HTTP, LW_INIT, NULL }, /* date of start of request */
+ { "trg",LOG_FMT_trg, PR_MODE_HTTP, LW_INIT, NULL }, /* date of start of request, GMT */
+ { "trl",LOG_FMT_trl, PR_MODE_HTTP, LW_INIT, NULL }, /* date of start of request, local */
+ { "ts", LOG_FMT_TERMSTATE, PR_MODE_TCP, LW_BYTES, NULL },/* termination state */
+ { "tsc", LOG_FMT_TERMSTATE_CK, PR_MODE_TCP, LW_INIT, NULL },/* termination state */
+ { 0, 0, 0, 0, NULL }
+};
+
+char default_http_log_format[] = "%ci:%cp [%tr] %ft %b/%s %TR/%Tw/%Tc/%Tr/%Ta %ST %B %CC %CS %tsc %ac/%fc/%bc/%sc/%rc %sq/%bq %hr %hs %{+Q}r"; // default format
+char default_https_log_format[] = "%ci:%cp [%tr] %ft %b/%s %TR/%Tw/%Tc/%Tr/%Ta %ST %B %CC %CS %tsc %ac/%fc/%bc/%sc/%rc %sq/%bq %hr %hs %{+Q}r %[fc_err]/%[ssl_fc_err,hex]/%[ssl_c_err]/%[ssl_c_ca_err]/%[ssl_fc_is_resumed] %[ssl_fc_sni]/%sslv/%sslc";
+char clf_http_log_format[] = "%{+Q}o %{-Q}ci - - [%trg] %r %ST %B \"\" \"\" %cp %ms %ft %b %s %TR %Tw %Tc %Tr %Ta %tsc %ac %fc %bc %sc %rc %sq %bq %CC %CS %hrl %hsl";
+char default_tcp_log_format[] = "%ci:%cp [%t] %ft %b/%s %Tw/%Tc/%Tt %B %ts %ac/%fc/%bc/%sc/%rc %sq/%bq";
+char *log_format = NULL;
+
+/* Default string used for structured-data part in RFC5424 formatted
+ * syslog messages.
+ */
+char default_rfc5424_sd_log_format[] = "- ";
+
+/* total number of dropped logs */
+unsigned int dropped_logs = 0;
+
+/* This is a global syslog message buffer, common to all outgoing
+ * messages. It contains only the data part.
+ */
+THREAD_LOCAL char *logline = NULL;
+
+/* A global syslog message buffer, common to all RFC5424 syslog messages.
+ * Currently, it is used for generating the structured-data part.
+ */
+THREAD_LOCAL char *logline_rfc5424 = NULL;
+
+struct logformat_var_args {
+ char *name;
+ int mask;
+};
+
+struct logformat_var_args var_args_list[] = {
+// global
+ { "M", LOG_OPT_MANDATORY },
+ { "Q", LOG_OPT_QUOTE },
+ { "X", LOG_OPT_HEXA },
+ { "E", LOG_OPT_ESC },
+ { 0, 0 }
+};
+
+/*
+ * callback used to configure addr source retrieval
+ */
+int prepare_addrsource(struct logformat_node *node, struct proxy *curproxy)
+{
+ curproxy->options2 |= PR_O2_SRC_ADDR;
+
+ return 0;
+}
+
+
+/*
+ * Parse args in a logformat_var. Returns 0 in error
+ * case, otherwise, it returns 1.
+ */
+int parse_logformat_var_args(char *args, struct logformat_node *node, char **err)
+{
+ int i = 0;
+ int end = 0;
+ int flags = 0; // 1 = + 2 = -
+ char *sp = NULL; // start pointer
+
+ if (args == NULL) {
+ memprintf(err, "internal error: parse_logformat_var_args() expects non null 'args'");
+ return 0;
+ }
+
+ while (1) {
+ if (*args == '\0')
+ end = 1;
+
+ if (*args == '+') {
+ // add flag
+ sp = args + 1;
+ flags = 1;
+ }
+ if (*args == '-') {
+ // delete flag
+ sp = args + 1;
+ flags = 2;
+ }
+
+ if (*args == '\0' || *args == ',') {
+ *args = '\0';
+ for (i = 0; sp && var_args_list[i].name; i++) {
+ if (strcmp(sp, var_args_list[i].name) == 0) {
+ if (flags == 1) {
+ node->options |= var_args_list[i].mask;
+ break;
+ } else if (flags == 2) {
+ node->options &= ~var_args_list[i].mask;
+ break;
+ }
+ }
+ }
+ sp = NULL;
+ if (end)
+ break;
+ }
+ args++;
+ }
+ return 1;
+}
+
+/*
+ * Parse a variable '%varname' or '%{args}varname' in log-format. The caller
+ * must pass the args part in the <arg> pointer with its length in <arg_len>,
+ * and varname with its length in <var> and <var_len> respectively. <arg> is
+ * ignored when arg_len is 0. Neither <var> nor <var_len> may be null.
+ * Returns false in error case and err is filled, otherwise returns true.
+ */
+int parse_logformat_var(char *arg, int arg_len, char *var, int var_len, struct proxy *curproxy, struct list *list_format, int *defoptions, char **err)
+{
+ int j;
+ struct logformat_node *node = NULL;
+
+ for (j = 0; logformat_keywords[j].name; j++) { // search a log type
+ if (strlen(logformat_keywords[j].name) == var_len &&
+ strncmp(var, logformat_keywords[j].name, var_len) == 0) {
+ if (logformat_keywords[j].mode != PR_MODE_HTTP || curproxy->mode == PR_MODE_HTTP) {
+ node = calloc(1, sizeof(*node));
+ if (!node) {
+ memprintf(err, "out of memory error");
+ goto error_free;
+ }
+ node->type = logformat_keywords[j].type;
+ node->options = *defoptions;
+ if (arg_len) {
+ node->arg = my_strndup(arg, arg_len);
+ if (!parse_logformat_var_args(node->arg, node, err))
+ goto error_free;
+ }
+ if (node->type == LOG_FMT_GLOBAL) {
+ *defoptions = node->options;
+ free(node->arg);
+ free(node);
+ } else {
+ if (logformat_keywords[j].config_callback &&
+ logformat_keywords[j].config_callback(node, curproxy) != 0) {
+ goto error_free;
+ }
+ curproxy->to_log |= logformat_keywords[j].lw;
+ LIST_APPEND(list_format, &node->list);
+ }
+ return 1;
+ } else {
+ memprintf(err, "format variable '%s' is reserved for HTTP mode",
+ logformat_keywords[j].name);
+ goto error_free;
+ }
+ }
+ }
+
+ j = var[var_len];
+ var[var_len] = 0;
+ memprintf(err, "no such format variable '%s'. If you wanted to emit the '%%' character verbatim, you need to use '%%%%'", var);
+ var[var_len] = j;
+
+ error_free:
+ if (node) {
+ free(node->arg);
+ free(node);
+ }
+ return 0;
+}
+
+/*
+ * push to the logformat linked list
+ *
+ * start: start pointer
+ * end: end text pointer
+ * type: string type
+ * list_format: destination list
+ *
+ * LOG_TEXT: copy chars from start to end excluding end.
+ *
+*/
+int add_to_logformat_list(char *start, char *end, int type, struct list *list_format, char **err)
+{
+ char *str;
+
+ if (type == LF_TEXT) { /* type text */
+ struct logformat_node *node = calloc(1, sizeof(*node));
+ if (!node) {
+ memprintf(err, "out of memory error");
+ return 0;
+ }
+ str = calloc(1, end - start + 1);
+ strncpy(str, start, end - start);
+ str[end - start] = '\0';
+ node->arg = str;
+ node->type = LOG_FMT_TEXT; // type string
+ LIST_APPEND(list_format, &node->list);
+ } else if (type == LF_SEPARATOR) {
+ struct logformat_node *node = calloc(1, sizeof(*node));
+ if (!node) {
+ memprintf(err, "out of memory error");
+ return 0;
+ }
+ node->type = LOG_FMT_SEPARATOR;
+ LIST_APPEND(list_format, &node->list);
+ }
+ return 1;
+}
+
+/*
+ * Parse the sample fetch expression <text> and add a node to <list_format> upon
+ * success. At the moment, sample converters are not yet supported but fetch arguments
+ * should work. The curpx->conf.args.ctx must be set by the caller. If an end pointer
+ * is passed in <endptr>, it will be updated with the pointer to the first character
+ * not part of the sample expression.
+ *
+ * In error case, the function returns 0, otherwise it returns 1.
+ */
+int add_sample_to_logformat_list(char *text, char *arg, int arg_len, struct proxy *curpx, struct list *list_format, int options, int cap, char **err, char **endptr)
+{
+ char *cmd[2];
+ struct sample_expr *expr = NULL;
+ struct logformat_node *node = NULL;
+ int cmd_arg;
+
+ cmd[0] = text;
+ cmd[1] = "";
+ cmd_arg = 0;
+
+ expr = sample_parse_expr(cmd, &cmd_arg, curpx->conf.args.file, curpx->conf.args.line, err,
+ &curpx->conf.args, endptr);
+ if (!expr) {
+ memprintf(err, "failed to parse sample expression <%s> : %s", text, *err);
+ goto error_free;
+ }
+
+ node = calloc(1, sizeof(*node));
+ if (!node) {
+ memprintf(err, "out of memory error");
+ goto error_free;
+ }
+ node->type = LOG_FMT_EXPR;
+ node->expr = expr;
+ node->options = options;
+
+ if (arg_len) {
+ node->arg = my_strndup(arg, arg_len);
+ if (!parse_logformat_var_args(node->arg, node, err))
+ goto error_free;
+ }
+ if (expr->fetch->val & cap & SMP_VAL_REQUEST)
+ node->options |= LOG_OPT_REQ_CAP; /* fetch method is request-compatible */
+
+ if (expr->fetch->val & cap & SMP_VAL_RESPONSE)
+ node->options |= LOG_OPT_RES_CAP; /* fetch method is response-compatible */
+
+ if (!(expr->fetch->val & cap)) {
+ memprintf(err, "sample fetch <%s> may not be reliably used here because it needs '%s' which is not available here",
+ text, sample_src_names(expr->fetch->use));
+ goto error_free;
+ }
+
+ if ((options & LOG_OPT_HTTP) && (expr->fetch->use & (SMP_USE_L6REQ|SMP_USE_L6RES))) {
+ ha_warning("parsing [%s:%d] : L6 sample fetch <%s> ignored in HTTP log-format string.\n",
+ curpx->conf.args.file, curpx->conf.args.line, text);
+ }
+
+ /* check if we need to allocate an http_txn struct for HTTP parsing */
+ /* Note, we may also need to set curpx->to_log with certain fetches */
+ curpx->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY);
+
+ /* FIXME: temporary workaround for missing LW_XPRT and LW_REQ flags
+ * needed with some sample fetches (eg: ssl*). We always set it for
+ * now on, but this will leave with sample capabilities soon.
+ */
+ curpx->to_log |= LW_XPRT;
+ if (curpx->http_needed)
+ curpx->to_log |= LW_REQ;
+ LIST_APPEND(list_format, &node->list);
+ return 1;
+
+ error_free:
+ release_sample_expr(expr);
+ if (node) {
+ free(node->arg);
+ free(node);
+ }
+ return 0;
+}
+
+/*
+ * Parse the log_format string and fill a linked list.
+ * Variable name are preceded by % and composed by characters [a-zA-Z0-9]* : %varname
+ * You can set arguments using { } : %{many arguments}varname.
+ * The curproxy->conf.args.ctx must be set by the caller.
+ *
+ * fmt: the string to parse
+ * curproxy: the proxy affected
+ * list_format: the destination list
+ * options: LOG_OPT_* to force on every node
+ * cap: all SMP_VAL_* flags supported by the consumer
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is filled.
+ */
+int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list *list_format, int options, int cap, char **err)
+{
+ char *sp, *str, *backfmt; /* start pointer for text parts */
+ char *arg = NULL; /* start pointer for args */
+ char *var = NULL; /* start pointer for vars */
+ int arg_len = 0;
+ int var_len = 0;
+ int cformat; /* current token format */
+ int pformat; /* previous token format */
+ struct logformat_node *tmplf, *back;
+
+ sp = str = backfmt = strdup(fmt);
+ if (!str) {
+ memprintf(err, "out of memory error");
+ return 0;
+ }
+ curproxy->to_log |= LW_INIT;
+
+ /* flush the list first. */
+ list_for_each_entry_safe(tmplf, back, list_format, list) {
+ LIST_DELETE(&tmplf->list);
+ release_sample_expr(tmplf->expr);
+ free(tmplf->arg);
+ free(tmplf);
+ }
+
+ for (cformat = LF_INIT; cformat != LF_END; str++) {
+ pformat = cformat;
+
+ if (!*str)
+ cformat = LF_END; // preset it to save all states from doing this
+
+ /* The principle of the two-step state machine below is to first detect a change, and
+ * second have all common paths processed at one place. The common paths are the ones
+ * encountered in text areas (LF_INIT, LF_TEXT, LF_SEPARATOR) and at the end (LF_END).
+ * We use the common LF_INIT state to dispatch to the different final states.
+ */
+ switch (pformat) {
+ case LF_STARTVAR: // text immediately following a '%'
+ arg = NULL; var = NULL;
+ arg_len = var_len = 0;
+ if (*str == '{') { // optional argument
+ cformat = LF_STARG;
+ arg = str + 1;
+ }
+ else if (*str == '[') {
+ cformat = LF_STEXPR;
+ var = str + 1; // store expr in variable name
+ }
+ else if (isalpha((unsigned char)*str)) { // variable name
+ cformat = LF_VAR;
+ var = str;
+ }
+ else if (*str == '%')
+ cformat = LF_TEXT; // convert this character to a literal (useful for '%')
+ else if (isdigit((unsigned char)*str) || *str == ' ' || *str == '\t') {
+ /* single '%' followed by blank or digit, send them both */
+ cformat = LF_TEXT;
+ pformat = LF_TEXT; /* finally we include the previous char as well */
+ sp = str - 1; /* send both the '%' and the current char */
+ memprintf(err, "unexpected variable name near '%c' at position %d line : '%s'. Maybe you want to write a single '%%', use the syntax '%%%%'",
+ *str, (int)(str - backfmt), fmt);
+ goto fail;
+
+ }
+ else
+ cformat = LF_INIT; // handle other cases of literals
+ break;
+
+ case LF_STARG: // text immediately following '%{'
+ if (*str == '}') { // end of arg
+ cformat = LF_EDARG;
+ arg_len = str - arg;
+ *str = 0; // used for reporting errors
+ }
+ break;
+
+ case LF_EDARG: // text immediately following '%{arg}'
+ if (*str == '[') {
+ cformat = LF_STEXPR;
+ var = str + 1; // store expr in variable name
+ break;
+ }
+ else if (isalnum((unsigned char)*str)) { // variable name
+ cformat = LF_VAR;
+ var = str;
+ break;
+ }
+ memprintf(err, "parse argument modifier without variable name near '%%{%s}'", arg);
+ goto fail;
+
+ case LF_STEXPR: // text immediately following '%['
+ /* the whole sample expression is parsed at once,
+ * returning the pointer to the first character not
+ * part of the expression, which MUST be the trailing
+ * angle bracket.
+ */
+ if (!add_sample_to_logformat_list(var, arg, arg_len, curproxy, list_format, options, cap, err, &str))
+ goto fail;
+
+ if (*str == ']') {
+ // end of arg, go on with next state
+ cformat = pformat = LF_EDEXPR;
+ sp = str;
+ }
+ else {
+ char c = *str;
+ *str = 0;
+ if (isprint((unsigned char)c))
+ memprintf(err, "expected ']' after '%s', but found '%c'", var, c);
+ else
+ memprintf(err, "missing ']' after '%s'", var);
+ goto fail;
+ }
+ break;
+
+ case LF_VAR: // text part of a variable name
+ var_len = str - var;
+ if (!isalnum((unsigned char)*str))
+ cformat = LF_INIT; // not variable name anymore
+ break;
+
+ default: // LF_INIT, LF_TEXT, LF_SEPARATOR, LF_END, LF_EDEXPR
+ cformat = LF_INIT;
+ }
+
+ if (cformat == LF_INIT) { /* resynchronize state to text/sep/startvar */
+ switch (*str) {
+ case '%': cformat = LF_STARTVAR; break;
+ case 0 : cformat = LF_END; break;
+ case ' ':
+ if (options & LOG_OPT_MERGE_SPACES) {
+ cformat = LF_SEPARATOR;
+ break;
+ }
+ /* fall through */
+ default : cformat = LF_TEXT; break;
+ }
+ }
+
+ if (cformat != pformat || pformat == LF_SEPARATOR) {
+ switch (pformat) {
+ case LF_VAR:
+ if (!parse_logformat_var(arg, arg_len, var, var_len, curproxy, list_format, &options, err))
+ goto fail;
+ break;
+ case LF_TEXT:
+ case LF_SEPARATOR:
+ if (!add_to_logformat_list(sp, str, pformat, list_format, err))
+ goto fail;
+ break;
+ }
+ sp = str; /* new start of text at every state switch and at every separator */
+ }
+ }
+
+ if (pformat == LF_STARTVAR || pformat == LF_STARG || pformat == LF_STEXPR) {
+ memprintf(err, "truncated line after '%s'", var ? var : arg ? arg : "%");
+ goto fail;
+ }
+ free(backfmt);
+
+ return 1;
+ fail:
+ free(backfmt);
+ return 0;
+}
+
+/*
+ * Parse the first range of indexes from a string made of a list of comma separated
+ * ranges of indexes. Note that an index may be considered as a particular range
+ * with a high limit to the low limit.
+ */
+int get_logsrv_smp_range(unsigned int *low, unsigned int *high, char **arg, char **err)
+{
+ char *end, *p;
+
+ *low = *high = 0;
+
+ p = *arg;
+ end = strchr(p, ',');
+ if (!end)
+ end = p + strlen(p);
+
+ *high = *low = read_uint((const char **)&p, end);
+ if (!*low || (p != end && *p != '-'))
+ goto err;
+
+ if (p == end)
+ goto done;
+
+ p++;
+ *high = read_uint((const char **)&p, end);
+ if (!*high || *high <= *low || p != end)
+ goto err;
+
+ done:
+ if (*end == ',')
+ end++;
+ *arg = end;
+ return 1;
+
+ err:
+ memprintf(err, "wrong sample range '%s'", *arg);
+ return 0;
+}
+
+/*
+ * Returns 1 if the range defined by <low> and <high> overlaps
+ * one of them in <rgs> array of ranges with <sz> the size of this
+ * array, 0 if not.
+ */
+int smp_log_ranges_overlap(struct smp_log_range *rgs, size_t sz,
+ unsigned int low, unsigned int high, char **err)
+{
+ size_t i;
+
+ for (i = 0; i < sz; i++) {
+ if ((low >= rgs[i].low && low <= rgs[i].high) ||
+ (high >= rgs[i].low && high <= rgs[i].high)) {
+ memprintf(err, "ranges are overlapping");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int smp_log_range_cmp(const void *a, const void *b)
+{
+ const struct smp_log_range *rg_a = a;
+ const struct smp_log_range *rg_b = b;
+
+ if (rg_a->high < rg_b->low)
+ return -1;
+ else if (rg_a->low > rg_b->high)
+ return 1;
+
+ return 0;
+}
+
+/* frees log server <logsrv> after freeing all of its allocated fields. The
+ * server must not belong to a list anymore. Logsrv may be NULL, which is
+ * silently ignored.
+ */
+void free_logsrv(struct logsrv *logsrv)
+{
+ if (!logsrv)
+ return;
+
+ BUG_ON(LIST_INLIST(&logsrv->list));
+ ha_free(&logsrv->conf.file);
+ ha_free(&logsrv->ring_name);
+ free(logsrv);
+}
+
+/*
+ * Parse "log" keyword and update <logsrvs> list accordingly.
+ *
+ * When <do_del> is set, it means the "no log" line was parsed, so all log
+ * servers in <logsrvs> are released.
+ *
+ * Otherwise, we try to parse the "log" line. First of all, when the list is not
+ * the global one, we look for the parameter "global". If we find it,
+ * global.logsrvs is copied. Else we parse each arguments.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+int parse_logsrv(char **args, struct list *logsrvs, int do_del, const char *file, int linenum, char **err)
+{
+ struct smp_log_range *smp_rgs = NULL;
+ struct sockaddr_storage *sk;
+ struct protocol *proto;
+ struct logsrv *logsrv = NULL;
+ int port1, port2;
+ int cur_arg;
+ int fd;
+
+ /*
+ * "no log": delete previous herited or defined syslog
+ * servers.
+ */
+ if (do_del) {
+ struct logsrv *back;
+
+ if (*(args[1]) != 0) {
+ memprintf(err, "'no log' does not expect arguments");
+ goto error;
+ }
+
+ list_for_each_entry_safe(logsrv, back, logsrvs, list) {
+ LIST_DEL_INIT(&logsrv->list);
+ free_logsrv(logsrv);
+ }
+ return 1;
+ }
+
+ /*
+ * "log global": copy global.logrsvs linked list to the end of logsrvs
+ * list. But first, we check (logsrvs != global.logsrvs).
+ */
+ if (*(args[1]) && *(args[2]) == 0 && strcmp(args[1], "global") == 0) {
+ if (logsrvs == &global.logsrvs) {
+ memprintf(err, "'global' is not supported for a global syslog server");
+ goto error;
+ }
+ list_for_each_entry(logsrv, &global.logsrvs, list) {
+ struct logsrv *node;
+
+ list_for_each_entry(node, logsrvs, list) {
+ if (node->ref == logsrv)
+ goto skip_logsrv;
+ }
+
+ node = malloc(sizeof(*node));
+ memcpy(node, logsrv, sizeof(struct logsrv));
+ node->ref = logsrv;
+ LIST_INIT(&node->list);
+ LIST_APPEND(logsrvs, &node->list);
+ node->ring_name = logsrv->ring_name ? strdup(logsrv->ring_name) : NULL;
+ node->conf.file = strdup(file);
+ node->conf.line = linenum;
+
+ skip_logsrv:
+ continue;
+ }
+ return 1;
+ }
+
+ /*
+ * "log <address> ...: parse a syslog server line
+ */
+ if (*(args[1]) == 0 || *(args[2]) == 0) {
+ memprintf(err, "expects <address> and <facility> %s as arguments",
+ ((logsrvs == &global.logsrvs) ? "" : "or global"));
+ goto error;
+ }
+
+ /* take care of "stdout" and "stderr" as regular aliases for fd@1 / fd@2 */
+ if (strcmp(args[1], "stdout") == 0)
+ args[1] = "fd@1";
+ else if (strcmp(args[1], "stderr") == 0)
+ args[1] = "fd@2";
+
+ logsrv = calloc(1, sizeof(*logsrv));
+ if (!logsrv) {
+ memprintf(err, "out of memory");
+ goto error;
+ }
+ LIST_INIT(&logsrv->list);
+ logsrv->conf.file = strdup(file);
+ logsrv->conf.line = linenum;
+
+ /* skip address for now, it will be parsed at the end */
+ cur_arg = 2;
+
+ /* just after the address, a length may be specified */
+ logsrv->maxlen = MAX_SYSLOG_LEN;
+ if (strcmp(args[cur_arg], "len") == 0) {
+ int len = atoi(args[cur_arg+1]);
+ if (len < 80 || len > 65535) {
+ memprintf(err, "invalid log length '%s', must be between 80 and 65535",
+ args[cur_arg+1]);
+ goto error;
+ }
+ logsrv->maxlen = len;
+ cur_arg += 2;
+ }
+ if (logsrv->maxlen > global.max_syslog_len)
+ global.max_syslog_len = logsrv->maxlen;
+
+ /* after the length, a format may be specified */
+ if (strcmp(args[cur_arg], "format") == 0) {
+ logsrv->format = get_log_format(args[cur_arg+1]);
+ if (logsrv->format == LOG_FORMAT_UNSPEC) {
+ memprintf(err, "unknown log format '%s'", args[cur_arg+1]);
+ goto error;
+ }
+ cur_arg += 2;
+ }
+
+ if (strcmp(args[cur_arg], "sample") == 0) {
+ unsigned low, high;
+ char *p, *beg, *end, *smp_sz_str;
+ size_t smp_rgs_sz = 0, smp_sz = 0, new_smp_sz;
+
+ p = args[cur_arg+1];
+ smp_sz_str = strchr(p, ':');
+ if (!smp_sz_str) {
+ memprintf(err, "Missing sample size");
+ goto error;
+ }
+
+ *smp_sz_str++ = '\0';
+
+ end = p + strlen(p);
+
+ while (p != end) {
+ if (!get_logsrv_smp_range(&low, &high, &p, err))
+ goto error;
+
+ if (smp_rgs && smp_log_ranges_overlap(smp_rgs, smp_rgs_sz, low, high, err))
+ goto error;
+
+ smp_rgs = my_realloc2(smp_rgs, (smp_rgs_sz + 1) * sizeof *smp_rgs);
+ if (!smp_rgs) {
+ memprintf(err, "out of memory error");
+ goto error;
+ }
+
+ smp_rgs[smp_rgs_sz].low = low;
+ smp_rgs[smp_rgs_sz].high = high;
+ smp_rgs[smp_rgs_sz].sz = high - low + 1;
+ smp_rgs[smp_rgs_sz].curr_idx = 0;
+ if (smp_rgs[smp_rgs_sz].high > smp_sz)
+ smp_sz = smp_rgs[smp_rgs_sz].high;
+ smp_rgs_sz++;
+ }
+
+ if (smp_rgs == NULL) {
+ memprintf(err, "no sampling ranges given");
+ goto error;
+ }
+
+ beg = smp_sz_str;
+ end = beg + strlen(beg);
+ new_smp_sz = read_uint((const char **)&beg, end);
+ if (!new_smp_sz || beg != end) {
+ memprintf(err, "wrong sample size '%s' for sample range '%s'",
+ smp_sz_str, args[cur_arg+1]);
+ goto error;
+ }
+
+ if (new_smp_sz < smp_sz) {
+ memprintf(err, "sample size %zu should be greater or equal to "
+ "%zu the maximum of the high ranges limits",
+ new_smp_sz, smp_sz);
+ goto error;
+ }
+ smp_sz = new_smp_sz;
+
+ /* Let's order <smp_rgs> array. */
+ qsort(smp_rgs, smp_rgs_sz, sizeof(struct smp_log_range), smp_log_range_cmp);
+
+ logsrv->lb.smp_rgs = smp_rgs;
+ logsrv->lb.smp_rgs_sz = smp_rgs_sz;
+ logsrv->lb.smp_sz = smp_sz;
+
+ cur_arg += 2;
+ }
+ HA_SPIN_INIT(&logsrv->lock);
+ /* parse the facility */
+ logsrv->facility = get_log_facility(args[cur_arg]);
+ if (logsrv->facility < 0) {
+ memprintf(err, "unknown log facility '%s'", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+
+ /* parse the max syslog level (default: debug) */
+ logsrv->level = 7;
+ if (*(args[cur_arg])) {
+ logsrv->level = get_log_level(args[cur_arg]);
+ if (logsrv->level < 0) {
+ memprintf(err, "unknown optional log level '%s'", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ }
+
+ /* parse the limit syslog level (default: emerg) */
+ logsrv->minlvl = 0;
+ if (*(args[cur_arg])) {
+ logsrv->minlvl = get_log_level(args[cur_arg]);
+ if (logsrv->minlvl < 0) {
+ memprintf(err, "unknown optional minimum log level '%s'", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ }
+
+ /* Too many args */
+ if (*(args[cur_arg])) {
+ memprintf(err, "cannot handle unexpected argument '%s'", args[cur_arg]);
+ goto error;
+ }
+
+ /* now, back to the address */
+ logsrv->type = LOG_TARGET_DGRAM;
+ if (strncmp(args[1], "ring@", 5) == 0) {
+ logsrv->addr.ss_family = AF_UNSPEC;
+ logsrv->type = LOG_TARGET_BUFFER;
+ logsrv->sink = NULL;
+ logsrv->ring_name = strdup(args[1] + 5);
+ goto done;
+ }
+
+ sk = str2sa_range(args[1], NULL, &port1, &port2, &fd, &proto,
+ err, NULL, NULL,
+ PA_O_RESOLVE | PA_O_PORT_OK | PA_O_RAW_FD | PA_O_DGRAM | PA_O_STREAM | PA_O_DEFAULT_DGRAM);
+ if (!sk)
+ goto error;
+
+ if (fd != -1)
+ logsrv->type = LOG_TARGET_FD;
+ logsrv->addr = *sk;
+
+ if (sk->ss_family == AF_INET || sk->ss_family == AF_INET6) {
+ if (!port1)
+ set_host_port(&logsrv->addr, SYSLOG_PORT);
+ }
+
+ if (proto && proto->xprt_type == PROTO_TYPE_STREAM) {
+ static unsigned long ring_ids;
+
+ /* Implicit sink buffer will be
+ * initialized in post_check
+ */
+ logsrv->type = LOG_TARGET_BUFFER;
+ logsrv->sink = NULL;
+ /* compute uniq name for the ring */
+ memprintf(&logsrv->ring_name, "ring#%lu", ++ring_ids);
+ }
+
+ done:
+ LIST_APPEND(logsrvs, &logsrv->list);
+ return 1;
+
+ error:
+ free(smp_rgs);
+ free_logsrv(logsrv);
+ return 0;
+}
+
+
+/*
+ * returns log format, LOG_FORMAT_UNSPEC is return if not found.
+ */
+enum log_fmt get_log_format(const char *fmt)
+{
+ enum log_fmt format;
+
+ format = LOG_FORMATS - 1;
+ while (format > 0 && log_formats[format].name
+ && strcmp(log_formats[format].name, fmt) != 0)
+ format--;
+
+ /* Note: 0 is LOG_FORMAT_UNSPEC */
+ return format;
+}
+
+/*
+ * returns log level for <lev> or -1 if not found.
+ */
+int get_log_level(const char *lev)
+{
+ int level;
+
+ level = NB_LOG_LEVELS - 1;
+ while (level >= 0 && strcmp(log_levels[level], lev) != 0)
+ level--;
+
+ return level;
+}
+
+/*
+ * returns log facility for <fac> or -1 if not found.
+ */
+int get_log_facility(const char *fac)
+{
+ int facility;
+
+ facility = NB_LOG_FACILITIES - 1;
+ while (facility >= 0 && strcmp(log_facilities[facility], fac) != 0)
+ facility--;
+
+ return facility;
+}
+
+/*
+ * Encode the string.
+ *
+ * When using the +E log format option, it will try to escape '"\]'
+ * characters with '\' as prefix. The same prefix should not be used as
+ * <escape>.
+ */
+static char *lf_encode_string(char *start, char *stop,
+ const char escape, const long *map,
+ const char *string,
+ struct logformat_node *node)
+{
+ if (node->options & LOG_OPT_ESC) {
+ if (start < stop) {
+ stop--; /* reserve one byte for the final '\0' */
+ while (start < stop && *string != '\0') {
+ if (!ha_bit_test((unsigned char)(*string), map)) {
+ if (!ha_bit_test((unsigned char)(*string), rfc5424_escape_map))
+ *start++ = *string;
+ else {
+ if (start + 2 >= stop)
+ break;
+ *start++ = '\\';
+ *start++ = *string;
+ }
+ }
+ else {
+ if (start + 3 >= stop)
+ break;
+ *start++ = escape;
+ *start++ = hextab[(*string >> 4) & 15];
+ *start++ = hextab[*string & 15];
+ }
+ string++;
+ }
+ *start = '\0';
+ }
+ }
+ else {
+ return encode_string(start, stop, escape, map, string);
+ }
+
+ return start;
+}
+
+/*
+ * Encode the chunk.
+ *
+ * When using the +E log format option, it will try to escape '"\]'
+ * characters with '\' as prefix. The same prefix should not be used as
+ * <escape>.
+ */
+static char *lf_encode_chunk(char *start, char *stop,
+ const char escape, const long *map,
+ const struct buffer *chunk,
+ struct logformat_node *node)
+{
+ char *str, *end;
+
+ if (node->options & LOG_OPT_ESC) {
+ if (start < stop) {
+ str = chunk->area;
+ end = chunk->area + chunk->data;
+
+ stop--; /* reserve one byte for the final '\0' */
+ while (start < stop && str < end) {
+ if (!ha_bit_test((unsigned char)(*str), map)) {
+ if (!ha_bit_test((unsigned char)(*str), rfc5424_escape_map))
+ *start++ = *str;
+ else {
+ if (start + 2 >= stop)
+ break;
+ *start++ = '\\';
+ *start++ = *str;
+ }
+ }
+ else {
+ if (start + 3 >= stop)
+ break;
+ *start++ = escape;
+ *start++ = hextab[(*str >> 4) & 15];
+ *start++ = hextab[*str & 15];
+ }
+ str++;
+ }
+ *start = '\0';
+ }
+ }
+ else {
+ return encode_chunk(start, stop, escape, map, chunk);
+ }
+
+ return start;
+}
+
+/*
+ * Write a string in the log string
+ * Take cares of quote and escape options
+ *
+ * Return the address of the \0 character, or NULL on error
+ */
+char *lf_text_len(char *dst, const char *src, size_t len, size_t size, const struct logformat_node *node)
+{
+ if (size < 2)
+ return NULL;
+
+ if (node->options & LOG_OPT_QUOTE) {
+ *(dst++) = '"';
+ size--;
+ }
+
+ if (src && len) {
+ /* escape_string and strlcpy2 will both try to add terminating NULL-byte
+ * to dst, so we need to make sure that extra byte will fit into dst
+ * before calling them
+ */
+ if (node->options & LOG_OPT_ESC) {
+ char *ret;
+
+ ret = escape_string(dst, (dst + size - 1), '\\', rfc5424_escape_map, src, src + len);
+ if (ret == NULL || *ret != '\0')
+ return NULL;
+ len = ret - dst;
+ }
+ else {
+ if (++len > size)
+ len = size;
+ len = strlcpy2(dst, src, len);
+ }
+
+ size -= len;
+ dst += len;
+ }
+ else if ((node->options & (LOG_OPT_QUOTE|LOG_OPT_MANDATORY)) == LOG_OPT_MANDATORY) {
+ if (size < 2)
+ return NULL;
+ *(dst++) = '-';
+ size -= 1;
+ }
+
+ if (node->options & LOG_OPT_QUOTE) {
+ if (size < 2)
+ return NULL;
+ *(dst++) = '"';
+ }
+
+ *dst = '\0';
+ return dst;
+}
+
+static inline char *lf_text(char *dst, const char *src, size_t size, const struct logformat_node *node)
+{
+ return lf_text_len(dst, src, size, size, node);
+}
+
+/*
+ * Write a IP address to the log string
+ * +X option write in hexadecimal notation, most significant byte on the left
+ */
+char *lf_ip(char *dst, const struct sockaddr *sockaddr, size_t size, const struct logformat_node *node)
+{
+ char *ret = dst;
+ int iret;
+ char pn[INET6_ADDRSTRLEN];
+
+ if (node->options & LOG_OPT_HEXA) {
+ unsigned char *addr = NULL;
+ switch (sockaddr->sa_family) {
+ case AF_INET:
+ addr = (unsigned char *)&((struct sockaddr_in *)sockaddr)->sin_addr.s_addr;
+ iret = snprintf(dst, size, "%02X%02X%02X%02X", addr[0], addr[1], addr[2], addr[3]);
+ break;
+ case AF_INET6:
+ addr = (unsigned char *)&((struct sockaddr_in6 *)sockaddr)->sin6_addr.s6_addr;
+ iret = snprintf(dst, size, "%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X",
+ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], addr[6], addr[7],
+ addr[8], addr[9], addr[10], addr[11], addr[12], addr[13], addr[14], addr[15]);
+ break;
+ default:
+ return NULL;
+ }
+ if (iret < 0 || iret > size)
+ return NULL;
+ ret += iret;
+ } else {
+ addr_to_str((struct sockaddr_storage *)sockaddr, pn, sizeof(pn));
+ ret = lf_text(dst, pn, size, node);
+ if (ret == NULL)
+ return NULL;
+ }
+ return ret;
+}
+
+/*
+ * Write a port to the log
+ * +X option write in hexadecimal notation, most significant byte on the left
+ */
+char *lf_port(char *dst, const struct sockaddr *sockaddr, size_t size, const struct logformat_node *node)
+{
+ char *ret = dst;
+ int iret;
+
+ if (node->options & LOG_OPT_HEXA) {
+ const unsigned char *port = (const unsigned char *)&((struct sockaddr_in *)sockaddr)->sin_port;
+ iret = snprintf(dst, size, "%02X%02X", port[0], port[1]);
+ if (iret < 0 || iret > size)
+ return NULL;
+ ret += iret;
+ } else {
+ ret = ltoa_o(get_host_port((struct sockaddr_storage *)sockaddr), dst, size);
+ if (ret == NULL)
+ return NULL;
+ }
+ return ret;
+}
+
+
+/*
+ * This function sends the syslog message using a printf format string. It
+ * expects an LF-terminated message.
+ */
+void send_log(struct proxy *p, int level, const char *format, ...)
+{
+ va_list argp;
+ int data_len;
+
+ if (level < 0 || format == NULL || logline == NULL)
+ return;
+
+ va_start(argp, format);
+ data_len = vsnprintf(logline, global.max_syslog_len, format, argp);
+ if (data_len < 0 || data_len > global.max_syslog_len)
+ data_len = global.max_syslog_len;
+ va_end(argp);
+
+ __send_log((p ? &p->logsrvs : NULL), (p ? &p->log_tag : NULL), level,
+ logline, data_len, default_rfc5424_sd_log_format, 2);
+}
+/*
+ * This function builds a log header of given format using given
+ * metadata, if format is set to LOF_FORMAT_UNSPEC, it tries
+ * to determine format based on given metadas. It is useful
+ * for log-forwarding to be able to forward any format without
+ * settings.
+ * This function returns a struct ist array of elements of the header
+ * nbelem is set to the number of available elements.
+ * This function returns currently a maximum of NB_LOG_HDR_IST_ELEMENTS
+ * elements.
+ */
+struct ist *build_log_header(enum log_fmt format, int level, int facility,
+ struct ist *metadata, size_t *nbelem)
+{
+ static THREAD_LOCAL struct {
+ struct ist ist_vector[NB_LOG_HDR_MAX_ELEMENTS];
+ char timestamp_buffer[LOG_LEGACYTIME_LEN+1+1];
+ time_t cur_legacy_time;
+ char priority_buffer[6];
+ } hdr_ctx = { .priority_buffer = "<<<<>" };
+
+ struct tm logtime;
+ int len;
+ int fac_level = 0;
+ time_t time = date.tv_sec;
+
+ *nbelem = 0;
+
+
+ if (format == LOG_FORMAT_UNSPEC) {
+ format = LOG_FORMAT_RAW;
+ if (metadata) {
+ /* If a hostname is set, it appears we want to perform syslog
+ * because only rfc5427 or rfc3164 support an hostname.
+ */
+ if (metadata[LOG_META_HOST].len) {
+ /* If a rfc5424 compliant timestamp is used we consider
+ * that output format is rfc5424, else legacy format
+ * is used as specified default for local logs
+ * in documentation.
+ */
+ if ((metadata[LOG_META_TIME].len == 1 && metadata[LOG_META_TIME].ptr[0] == '-')
+ || (metadata[LOG_META_TIME].len >= LOG_ISOTIME_MINLEN))
+ format = LOG_FORMAT_RFC5424;
+ else
+ format = LOG_FORMAT_RFC3164;
+ }
+ else if (metadata[LOG_META_TAG].len) {
+ /* Tag is present but no hostname, we should
+ * consider we try to emit a local log
+ * in legacy format (analog to RFC3164 but
+ * with stripped hostname).
+ */
+ format = LOG_FORMAT_LOCAL;
+ }
+ else if (metadata[LOG_META_PRIO].len) {
+ /* the source seems a parsed message
+ * offering a valid level/prio prefix
+ * so we consider this format.
+ */
+ format = LOG_FORMAT_PRIO;
+ }
+ }
+ }
+
+ /* prepare priority, stored into 1 single elem */
+ switch (format) {
+ case LOG_FORMAT_LOCAL:
+ case LOG_FORMAT_RFC3164:
+ case LOG_FORMAT_RFC5424:
+ case LOG_FORMAT_PRIO:
+ fac_level = facility << 3;
+ /* further format ignore the facility */
+ /* fall through */
+ case LOG_FORMAT_TIMED:
+ case LOG_FORMAT_SHORT:
+ fac_level += level;
+ hdr_ctx.ist_vector[*nbelem].ptr = &hdr_ctx.priority_buffer[3]; /* last digit of the log level */
+ do {
+ *hdr_ctx.ist_vector[*nbelem].ptr = '0' + fac_level % 10;
+ fac_level /= 10;
+ hdr_ctx.ist_vector[*nbelem].ptr--;
+ } while (fac_level && hdr_ctx.ist_vector[*nbelem].ptr > &hdr_ctx.priority_buffer[0]);
+ *hdr_ctx.ist_vector[*nbelem].ptr = '<';
+ hdr_ctx.ist_vector[(*nbelem)++].len = &hdr_ctx.priority_buffer[5] - hdr_ctx.ist_vector[0].ptr;
+ break;
+ case LOG_FORMAT_ISO:
+ case LOG_FORMAT_RAW:
+ break;
+ case LOG_FORMAT_UNSPEC:
+ case LOG_FORMATS:
+ ABORT_NOW();
+ }
+
+
+ /* prepare timestamp, stored into a max of 4 elems */
+ switch (format) {
+ case LOG_FORMAT_LOCAL:
+ case LOG_FORMAT_RFC3164:
+ /* rfc3164 ex: 'Jan 1 00:00:00 ' */
+ if (metadata && metadata[LOG_META_TIME].len == LOG_LEGACYTIME_LEN) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_TIME];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ /* time is set, break immediately */
+ break;
+ }
+ else if (metadata && metadata[LOG_META_TIME].len >= LOG_ISOTIME_MINLEN) {
+ int month;
+ char *timestamp = metadata[LOG_META_TIME].ptr;
+
+ /* iso time always begins like this: '1970-01-01T00:00:00' */
+
+ /* compute month */
+ month = 10*(timestamp[5] - '0') + (timestamp[6] - '0');
+ if (month)
+ month--;
+ if (month <= 11) {
+ /* builds log prefix ex: 'Jan 1 ' */
+ len = snprintf(hdr_ctx.timestamp_buffer, sizeof(hdr_ctx.timestamp_buffer),
+ "%s %c%c ", monthname[month],
+ timestamp[8] != '0' ? timestamp[8] : ' ',
+ timestamp[9]);
+ /* we reused the timestamp_buffer, signal that it does not
+ * contain local time anymore
+ */
+ hdr_ctx.cur_legacy_time = 0;
+ if (len == 7) {
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(&hdr_ctx.timestamp_buffer[0], len);
+ /* adds 'HH:MM:SS' from iso time */
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(&timestamp[11], 8);
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ /* we successfully reuse iso time, we can break */
+ break;
+ }
+ }
+ /* Failed to reuse isotime time, fallback to local legacy time */
+ }
+
+ if (unlikely(time != hdr_ctx.cur_legacy_time)) {
+ /* re-builds timestamp from the current local time */
+ get_localtime(time, &logtime);
+
+ len = snprintf(hdr_ctx.timestamp_buffer, sizeof(hdr_ctx.timestamp_buffer),
+ "%s %2d %02d:%02d:%02d ",
+ monthname[logtime.tm_mon],
+ logtime.tm_mday, logtime.tm_hour, logtime.tm_min, logtime.tm_sec);
+ if (len != LOG_LEGACYTIME_LEN+1)
+ hdr_ctx.cur_legacy_time = 0;
+ else
+ hdr_ctx.cur_legacy_time = time;
+ }
+ if (likely(hdr_ctx.cur_legacy_time))
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(&hdr_ctx.timestamp_buffer[0], LOG_LEGACYTIME_LEN+1);
+ else
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("Jan 1 00:00:00 ", LOG_LEGACYTIME_LEN+1);
+ break;
+ case LOG_FORMAT_RFC5424:
+ /* adds rfc5425 version prefix */
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("1 ", 2);
+ if (metadata && metadata[LOG_META_TIME].len == 1 && metadata[LOG_META_TIME].ptr[0] == '-') {
+ /* submitted len is NILVALUE, it is a valid timestamp for rfc5425 */
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_TIME];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ break;
+ }
+ /* let continue as 'timed' and 'iso' format for usual timestamp */
+ /* fall through */
+ case LOG_FORMAT_TIMED:
+ case LOG_FORMAT_ISO:
+ /* ISO format ex: '1900:01:01T12:00:00.123456Z'
+ * '1900:01:01T14:00:00+02:00'
+ * '1900:01:01T10:00:00.123456-02:00'
+ */
+ if (metadata && metadata[LOG_META_TIME].len >= LOG_ISOTIME_MINLEN) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_TIME];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ /* time is set, break immediately */
+ break;
+ }
+ else if (metadata && metadata[LOG_META_TIME].len == LOG_LEGACYTIME_LEN) {
+ int month;
+ char *timestamp = metadata[LOG_META_TIME].ptr;
+
+ for (month = 0; month < 12; month++)
+ if (!memcmp(monthname[month], timestamp, 3))
+ break;
+
+ if (month < 12) {
+
+ /* get local time to retrieve year */
+ get_localtime(time, &logtime);
+
+ /* year seems changed since log */
+ if (logtime.tm_mon < month)
+ logtime.tm_year--;
+
+ /* builds rfc5424 prefix ex: '1900-01-01T' */
+ len = snprintf(hdr_ctx.timestamp_buffer, sizeof(hdr_ctx.timestamp_buffer),
+ "%4d-%02d-%c%cT",
+ logtime.tm_year+1900, month+1,
+ timestamp[4] != ' ' ? timestamp[4] : '0',
+ timestamp[5]);
+
+ /* we reused the timestamp_buffer, signal that it does not
+ * contain local time anymore
+ */
+ hdr_ctx.cur_legacy_time = 0;
+ if (len == 11) {
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(&hdr_ctx.timestamp_buffer[0], len);
+ /* adds HH:MM:SS from legacy timestamp */
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(&timestamp[7], 8);
+ /* skip secfraq because it is optional */
+ /* according to rfc: -00:00 means we don't know the timezone */
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("-00:00 ", 7);
+ /* we successfully reuse legacy time, we can break */
+ break;
+ }
+ }
+ /* Failed to reuse legacy time, fallback to local iso time */
+ }
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(timeofday_as_iso_us(1), LOG_ISOTIME_MAXLEN + 1);
+ break;
+ case LOG_FORMAT_PRIO:
+ case LOG_FORMAT_SHORT:
+ case LOG_FORMAT_RAW:
+ break;
+ case LOG_FORMAT_UNSPEC:
+ case LOG_FORMATS:
+ ABORT_NOW();
+ }
+
+ /* prepare other meta data, stored into a max of 10 elems */
+ switch (format) {
+ case LOG_FORMAT_RFC3164:
+ if (metadata && metadata[LOG_META_HOST].len) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_HOST];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ }
+ else /* the caller MUST fill the hostname, this field is mandatory */
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("localhost ", 10);
+ /* fall through */
+ case LOG_FORMAT_LOCAL:
+ if (!metadata || !metadata[LOG_META_TAG].len)
+ break;
+
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_TAG];
+ if (metadata[LOG_META_PID].len) {
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("[", 1);
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_PID];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("]", 1);
+ }
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(": ", 2);
+ break;
+ case LOG_FORMAT_RFC5424:
+ if (metadata && metadata[LOG_META_HOST].len) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_HOST];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ }
+ else
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("- ", 2);
+
+ if (metadata && metadata[LOG_META_TAG].len) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_TAG];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ }
+ else
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("- ", 2);
+
+ if (metadata && metadata[LOG_META_PID].len) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_PID];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ }
+ else
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("- ", 2);
+
+ if (metadata && metadata[LOG_META_MSGID].len) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_MSGID];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ }
+ else
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("- ", 2);
+
+ if (metadata && metadata[LOG_META_STDATA].len) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_STDATA];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ }
+ else
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("- ", 2);
+ break;
+ case LOG_FORMAT_PRIO:
+ case LOG_FORMAT_SHORT:
+ case LOG_FORMAT_TIMED:
+ case LOG_FORMAT_ISO:
+ case LOG_FORMAT_RAW:
+ break;
+ case LOG_FORMAT_UNSPEC:
+ case LOG_FORMATS:
+ ABORT_NOW();
+ }
+
+ return hdr_ctx.ist_vector;
+}
+
+/*
+ * This function sends a syslog message to <logsrv>.
+ * The argument <metadata> MUST be an array of size
+ * LOG_META_FIELDS*sizeof(struct ist) containing data to build the header.
+ * It overrides the last byte of the message vector with an LF character.
+ * Does not return any error,
+ */
+static inline void __do_send_log(struct logsrv *logsrv, int nblogger, int level, int facility, struct ist *metadata, char *message, size_t size)
+{
+ static THREAD_LOCAL struct iovec iovec[NB_LOG_HDR_MAX_ELEMENTS+1+1] = { }; /* header elements + message + LF */
+ static THREAD_LOCAL struct msghdr msghdr = {
+ //.msg_iov = iovec,
+ .msg_iovlen = NB_LOG_HDR_MAX_ELEMENTS+2
+ };
+ static THREAD_LOCAL int logfdunix = -1; /* syslog to AF_UNIX socket */
+ static THREAD_LOCAL int logfdinet = -1; /* syslog to AF_INET socket */
+ int *plogfd;
+ int sent;
+ size_t nbelem;
+ struct ist *msg_header = NULL;
+
+ msghdr.msg_iov = iovec;
+
+ /* historically some messages used to already contain the trailing LF
+ * or Zero. Let's remove all trailing LF or Zero
+ */
+ while (size && (message[size-1] == '\n' || (message[size-1] == 0)))
+ size--;
+
+ if (logsrv->type == LOG_TARGET_BUFFER) {
+ plogfd = NULL;
+ goto send;
+ }
+ else if (logsrv->addr.ss_family == AF_CUST_EXISTING_FD) {
+ /* the socket's address is a file descriptor */
+ plogfd = (int *)&((struct sockaddr_in *)&logsrv->addr)->sin_addr.s_addr;
+ }
+ else if (logsrv->addr.ss_family == AF_UNIX)
+ plogfd = &logfdunix;
+ else
+ plogfd = &logfdinet;
+
+ if (plogfd && unlikely(*plogfd < 0)) {
+ /* socket not successfully initialized yet */
+ if ((*plogfd = socket(logsrv->addr.ss_family, SOCK_DGRAM,
+ (logsrv->addr.ss_family == AF_UNIX) ? 0 : IPPROTO_UDP)) < 0) {
+ static char once;
+
+ if (!once) {
+ once = 1; /* note: no need for atomic ops here */
+ ha_alert("socket() failed in logger #%d: %s (errno=%d)\n",
+ nblogger, strerror(errno), errno);
+ }
+ return;
+ } else {
+ /* we don't want to receive anything on this socket */
+ setsockopt(*plogfd, SOL_SOCKET, SO_RCVBUF, &zero, sizeof(zero));
+ /* does nothing under Linux, maybe needed for others */
+ shutdown(*plogfd, SHUT_RD);
+ fd_set_cloexec(*plogfd);
+ }
+ }
+
+ msg_header = build_log_header(logsrv->format, level, facility, metadata, &nbelem);
+ send:
+ if (logsrv->type == LOG_TARGET_BUFFER) {
+ struct ist msg;
+
+ msg = ist2(message, size);
+ msg = isttrim(msg, logsrv->maxlen);
+
+ sent = sink_write(logsrv->sink, &msg, 1, level, facility, metadata);
+ }
+ else if (logsrv->addr.ss_family == AF_CUST_EXISTING_FD) {
+ struct ist msg;
+
+ msg = ist2(message, size);
+ msg = isttrim(msg, logsrv->maxlen);
+
+ sent = fd_write_frag_line(*plogfd, logsrv->maxlen, msg_header, nbelem, &msg, 1, 1);
+ }
+ else {
+ int i = 0;
+ int totlen = logsrv->maxlen;
+
+ for (i = 0 ; i < nbelem ; i++ ) {
+ iovec[i].iov_base = msg_header[i].ptr;
+ iovec[i].iov_len = msg_header[i].len;
+ if (totlen <= iovec[i].iov_len) {
+ iovec[i].iov_len = totlen;
+ totlen = 0;
+ break;
+ }
+ totlen -= iovec[i].iov_len;
+ }
+ if (totlen) {
+ iovec[i].iov_base = message;
+ iovec[i].iov_len = size;
+ if (totlen <= iovec[i].iov_len)
+ iovec[i].iov_len = totlen;
+ i++;
+ }
+ iovec[i].iov_base = "\n"; /* insert a \n at the end of the message */
+ iovec[i].iov_len = 1;
+ i++;
+
+ msghdr.msg_iovlen = i;
+ msghdr.msg_name = (struct sockaddr *)&logsrv->addr;
+ msghdr.msg_namelen = get_addr_len(&logsrv->addr);
+
+ sent = sendmsg(*plogfd, &msghdr, MSG_DONTWAIT | MSG_NOSIGNAL);
+ }
+
+ if (sent < 0) {
+ static char once;
+
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ _HA_ATOMIC_INC(&dropped_logs);
+ else if (!once) {
+ once = 1; /* note: no need for atomic ops here */
+ ha_alert("sendmsg()/writev() failed in logger #%d: %s (errno=%d)\n",
+ nblogger, strerror(errno), errno);
+ }
+ }
+}
+
+/*
+ * This function sends a syslog message.
+ * It doesn't care about errors nor does it report them.
+ * The argument <metadata> MUST be an array of size
+ * LOG_META_FIELDS*sizeof(struct ist) containing
+ * data to build the header.
+ */
+void process_send_log(struct list *logsrvs, int level, int facility,
+ struct ist *metadata, char *message, size_t size)
+{
+ struct logsrv *logsrv;
+ int nblogger;
+
+ /* Send log messages to syslog server. */
+ nblogger = 0;
+ list_for_each_entry(logsrv, logsrvs, list) {
+ int in_range = 1;
+
+ /* we can filter the level of the messages that are sent to each logger */
+ if (level > logsrv->level)
+ continue;
+
+ if (logsrv->lb.smp_rgs) {
+ struct smp_log_range *curr_rg;
+
+ HA_SPIN_LOCK(LOGSRV_LOCK, &logsrv->lock);
+ curr_rg = &logsrv->lb.smp_rgs[logsrv->lb.curr_rg];
+ in_range = in_smp_log_range(curr_rg, logsrv->lb.curr_idx);
+ if (in_range) {
+ /* Let's consume this range. */
+ curr_rg->curr_idx = (curr_rg->curr_idx + 1) % curr_rg->sz;
+ if (!curr_rg->curr_idx) {
+ /* If consumed, let's select the next range. */
+ logsrv->lb.curr_rg = (logsrv->lb.curr_rg + 1) % logsrv->lb.smp_rgs_sz;
+ }
+ }
+ logsrv->lb.curr_idx = (logsrv->lb.curr_idx + 1) % logsrv->lb.smp_sz;
+ HA_SPIN_UNLOCK(LOGSRV_LOCK, &logsrv->lock);
+ }
+ if (in_range)
+ __do_send_log(logsrv, ++nblogger, MAX(level, logsrv->minlvl),
+ (facility == -1) ? logsrv->facility : facility,
+ metadata, message, size);
+ }
+}
+
+/*
+ * This function sends a syslog message.
+ * It doesn't care about errors nor does it report them.
+ * The arguments <sd> and <sd_size> are used for the structured-data part
+ * in RFC5424 formatted syslog messages.
+ */
+void __send_log(struct list *logsrvs, struct buffer *tagb, int level,
+ char *message, size_t size, char *sd, size_t sd_size)
+{
+ static THREAD_LOCAL pid_t curr_pid;
+ static THREAD_LOCAL char pidstr[16];
+ static THREAD_LOCAL struct ist metadata[LOG_META_FIELDS];
+
+ if (logsrvs == NULL) {
+ if (!LIST_ISEMPTY(&global.logsrvs)) {
+ logsrvs = &global.logsrvs;
+ }
+ }
+ if (!logsrvs || LIST_ISEMPTY(logsrvs))
+ return;
+
+ if (!metadata[LOG_META_HOST].len) {
+ if (global.log_send_hostname)
+ metadata[LOG_META_HOST] = ist(global.log_send_hostname);
+ }
+
+ if (!tagb || !tagb->area)
+ tagb = &global.log_tag;
+
+ if (tagb)
+ metadata[LOG_META_TAG] = ist2(tagb->area, tagb->data);
+
+ if (unlikely(curr_pid != getpid()))
+ metadata[LOG_META_PID].len = 0;
+
+ if (!metadata[LOG_META_PID].len) {
+ curr_pid = getpid();
+ ltoa_o(curr_pid, pidstr, sizeof(pidstr));
+ metadata[LOG_META_PID] = ist2(pidstr, strlen(pidstr));
+ }
+
+ metadata[LOG_META_STDATA] = ist2(sd, sd_size);
+
+ /* Remove trailing space of structured data */
+ while (metadata[LOG_META_STDATA].len && metadata[LOG_META_STDATA].ptr[metadata[LOG_META_STDATA].len-1] == ' ')
+ metadata[LOG_META_STDATA].len--;
+
+ return process_send_log(logsrvs, level, -1, metadata, message, size);
+}
+
+const char sess_cookie[8] = "NIDVEOU7"; /* No cookie, Invalid cookie, cookie for a Down server, Valid cookie, Expired cookie, Old cookie, Unused, unknown */
+const char sess_set_cookie[8] = "NPDIRU67"; /* No set-cookie, Set-cookie found and left unchanged (passive),
+ Set-cookie Deleted, Set-Cookie Inserted, Set-cookie Rewritten,
+ Set-cookie Updated, unknown, unknown */
+
+/*
+ * try to write a character if there is enough space, or goto out
+ */
+#define LOGCHAR(x) do { \
+ if (tmplog < dst + maxsize - 1) { \
+ *(tmplog++) = (x); \
+ } else { \
+ goto out; \
+ } \
+ } while(0)
+
+
+/* Initializes some log data at boot */
+static void init_log()
+{
+ char *tmp;
+ int i;
+
+ /* Initialize the escape map for the RFC5424 structured-data : '"\]'
+ * inside PARAM-VALUE should be escaped with '\' as prefix.
+ * See https://tools.ietf.org/html/rfc5424#section-6.3.3 for more
+ * details.
+ */
+ memset(rfc5424_escape_map, 0, sizeof(rfc5424_escape_map));
+
+ tmp = "\"\\]";
+ while (*tmp) {
+ ha_bit_set(*tmp, rfc5424_escape_map);
+ tmp++;
+ }
+
+ /* initialize the log header encoding map : '{|}"#' should be encoded with
+ * '#' as prefix, as well as non-printable characters ( <32 or >= 127 ).
+ * URL encoding only requires '"', '#' to be encoded as well as non-
+ * printable characters above.
+ */
+ memset(hdr_encode_map, 0, sizeof(hdr_encode_map));
+ memset(url_encode_map, 0, sizeof(url_encode_map));
+ for (i = 0; i < 32; i++) {
+ ha_bit_set(i, hdr_encode_map);
+ ha_bit_set(i, url_encode_map);
+ }
+ for (i = 127; i < 256; i++) {
+ ha_bit_set(i, hdr_encode_map);
+ ha_bit_set(i, url_encode_map);
+ }
+
+ tmp = "\"#{|}";
+ while (*tmp) {
+ ha_bit_set(*tmp, hdr_encode_map);
+ tmp++;
+ }
+
+ tmp = "\"#";
+ while (*tmp) {
+ ha_bit_set(*tmp, url_encode_map);
+ tmp++;
+ }
+
+ /* initialize the http header encoding map. The draft httpbis define the
+ * header content as:
+ *
+ * HTTP-message = start-line
+ * *( header-field CRLF )
+ * CRLF
+ * [ message-body ]
+ * header-field = field-name ":" OWS field-value OWS
+ * field-value = *( field-content / obs-fold )
+ * field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+ * obs-fold = CRLF 1*( SP / HTAB )
+ * field-vchar = VCHAR / obs-text
+ * VCHAR = %x21-7E
+ * obs-text = %x80-FF
+ *
+ * All the chars are encoded except "VCHAR", "obs-text", SP and HTAB.
+ * The encoded chars are form 0x00 to 0x08, 0x0a to 0x1f and 0x7f. The
+ * "obs-fold" is voluntarily forgotten because haproxy remove this.
+ */
+ memset(http_encode_map, 0, sizeof(http_encode_map));
+ for (i = 0x00; i <= 0x08; i++)
+ ha_bit_set(i, http_encode_map);
+ for (i = 0x0a; i <= 0x1f; i++)
+ ha_bit_set(i, http_encode_map);
+ ha_bit_set(0x7f, http_encode_map);
+}
+
+INITCALL0(STG_PREPARE, init_log);
+
+/* Initialize log buffers used for syslog messages */
+int init_log_buffers()
+{
+ logline = my_realloc2(logline, global.max_syslog_len + 1);
+ logline_rfc5424 = my_realloc2(logline_rfc5424, global.max_syslog_len + 1);
+ if (!logline || !logline_rfc5424)
+ return 0;
+ return 1;
+}
+
+/* Deinitialize log buffers used for syslog messages */
+void deinit_log_buffers()
+{
+ free(logline);
+ free(logline_rfc5424);
+ logline = NULL;
+ logline_rfc5424 = NULL;
+}
+
+/* Builds a log line in <dst> based on <list_format>, and stops before reaching
+ * <maxsize> characters. Returns the size of the output string in characters,
+ * not counting the trailing zero which is always added if the resulting size
+ * is not zero. It requires a valid session and optionally a stream. If the
+ * stream is NULL, default values will be assumed for the stream part.
+ */
+int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t maxsize, struct list *list_format)
+{
+ struct proxy *fe = sess->fe;
+ struct proxy *be;
+ struct http_txn *txn;
+ const struct strm_logs *logs;
+ struct connection *fe_conn, *be_conn;
+ unsigned int s_flags;
+ unsigned int uniq_id;
+ struct buffer chunk;
+ char *uri;
+ char *spc;
+ char *qmark;
+ char *end;
+ struct tm tm;
+ int t_request;
+ int hdr;
+ int last_isspace = 1;
+ int nspaces = 0;
+ char *tmplog;
+ char *ret;
+ int iret;
+ int status;
+ struct logformat_node *tmp;
+ struct timeval tv;
+ struct strm_logs tmp_strm_log;
+ struct ist path;
+ struct http_uri_parser parser;
+
+ /* FIXME: let's limit ourselves to frontend logging for now. */
+
+ if (likely(s)) {
+ be = s->be;
+ txn = s->txn;
+ be_conn = sc_conn(s->scb);
+ status = (txn ? txn->status : 0);
+ s_flags = s->flags;
+ uniq_id = s->uniq_id;
+ logs = &s->logs;
+ } else {
+ /* we have no stream so we first need to initialize a few
+ * things that are needed later. We do increment the request
+ * ID so that it's uniquely assigned to this request just as
+ * if the request had reached the point of being processed.
+ * A request error is reported as it's the only element we have
+ * here and which justifies emitting such a log.
+ */
+ be = ((obj_type(sess->origin) == OBJ_TYPE_CHECK) ? __objt_check(sess->origin)->proxy : fe);
+ txn = NULL;
+ fe_conn = objt_conn(sess->origin);
+ be_conn = ((obj_type(sess->origin) == OBJ_TYPE_CHECK) ? sc_conn(__objt_check(sess->origin)->sc) : NULL);
+ status = 0;
+ s_flags = SF_ERR_PRXCOND | SF_FINST_R;
+ uniq_id = _HA_ATOMIC_FETCH_ADD(&global.req_count, 1);
+
+ /* prepare a valid log structure */
+ tmp_strm_log.tv_accept = sess->tv_accept;
+ tmp_strm_log.accept_date = sess->accept_date;
+ tmp_strm_log.t_handshake = sess->t_handshake;
+ tmp_strm_log.t_idle = (sess->t_idle >= 0 ? sess->t_idle : 0);
+ tv_zero(&tmp_strm_log.tv_request);
+ tmp_strm_log.t_queue = -1;
+ tmp_strm_log.t_connect = -1;
+ tmp_strm_log.t_data = -1;
+ tmp_strm_log.t_close = tv_ms_elapsed(&sess->tv_accept, &now);
+ tmp_strm_log.bytes_in = 0;
+ tmp_strm_log.bytes_out = 0;
+ tmp_strm_log.prx_queue_pos = 0;
+ tmp_strm_log.srv_queue_pos = 0;
+
+ logs = &tmp_strm_log;
+
+ if ((fe->mode == PR_MODE_HTTP) && fe_conn && fe_conn->mux && fe_conn->mux->ctl) {
+ enum mux_exit_status es = fe_conn->mux->ctl(fe_conn, MUX_EXIT_STATUS, &status);
+
+ switch (es) {
+ case MUX_ES_SUCCESS:
+ break;
+ case MUX_ES_INVALID_ERR:
+ status = (status ? status : 400);
+ if ((fe_conn->flags & CO_FL_ERROR) || conn_xprt_read0_pending(fe_conn))
+ s_flags = SF_ERR_CLICL | SF_FINST_R;
+ else
+ s_flags = SF_ERR_PRXCOND | SF_FINST_R;
+ break;
+ case MUX_ES_TOUT_ERR:
+ status = (status ? status : 408);
+ s_flags = SF_ERR_CLITO | SF_FINST_R;
+ break;
+ case MUX_ES_NOTIMPL_ERR:
+ status = (status ? status : 501);
+ s_flags = SF_ERR_PRXCOND | SF_FINST_R;
+ break;
+ case MUX_ES_INTERNAL_ERR:
+ status = (status ? status : 500);
+ s_flags = SF_ERR_INTERNAL | SF_FINST_R;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ t_request = -1;
+ if (tv_isge(&logs->tv_request, &logs->tv_accept))
+ t_request = tv_ms_elapsed(&logs->tv_accept, &logs->tv_request);
+
+ tmplog = dst;
+
+ /* fill logbuffer */
+ if (LIST_ISEMPTY(list_format))
+ return 0;
+
+ list_for_each_entry(tmp, list_format, list) {
+#ifdef USE_OPENSSL
+ struct connection *conn;
+#endif
+ const struct sockaddr_storage *addr;
+ const char *src = NULL;
+ struct sample *key;
+ const struct buffer empty = { };
+
+ switch (tmp->type) {
+ case LOG_FMT_SEPARATOR:
+ if (!last_isspace) {
+ LOGCHAR(' ');
+ last_isspace = 1;
+ }
+ break;
+
+ case LOG_FMT_TEXT: // text
+ src = tmp->arg;
+ iret = strlcpy2(tmplog, src, dst + maxsize - tmplog);
+ if (iret == 0)
+ goto out;
+ tmplog += iret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_EXPR: // sample expression, may be request or response
+ key = NULL;
+ if (tmp->options & LOG_OPT_REQ_CAP)
+ key = sample_fetch_as_type(be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, tmp->expr, SMP_T_STR);
+
+ if (!key && (tmp->options & LOG_OPT_RES_CAP))
+ key = sample_fetch_as_type(be, sess, s, SMP_OPT_DIR_RES|SMP_OPT_FINAL, tmp->expr, SMP_T_STR);
+
+ if (!key && !(tmp->options & (LOG_OPT_REQ_CAP|LOG_OPT_RES_CAP))) // cfg, cli
+ key = sample_fetch_as_type(be, sess, s, SMP_OPT_FINAL, tmp->expr, SMP_T_STR);
+
+ if (tmp->options & LOG_OPT_HTTP)
+ ret = lf_encode_chunk(tmplog, dst + maxsize,
+ '%', http_encode_map, key ? &key->data.u.str : &empty, tmp);
+ else
+ ret = lf_text_len(tmplog,
+ key ? key->data.u.str.area : NULL,
+ key ? key->data.u.str.data : 0,
+ dst + maxsize - tmplog,
+ tmp);
+ if (ret == 0)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_CLIENTIP: // %ci
+ addr = (s ? sc_src(s->scf) : sess_src(sess));
+ if (addr)
+ ret = lf_ip(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp);
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_CLIENTPORT: // %cp
+ addr = (s ? sc_src(s->scf) : sess_src(sess));
+ if (addr) {
+ /* sess->listener is always defined when the session's owner is an inbound connections */
+ if (addr->ss_family == AF_UNIX)
+ ret = ltoa_o(sess->listener->luid, tmplog, dst + maxsize - tmplog);
+ else
+ ret = lf_port(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp);
+ }
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_FRONTENDIP: // %fi
+ addr = (s ? sc_dst(s->scf) : sess_dst(sess));
+ if (addr)
+ ret = lf_ip(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp);
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_FRONTENDPORT: // %fp
+ addr = (s ? sc_dst(s->scf) : sess_dst(sess));
+ if (addr) {
+ /* sess->listener is always defined when the session's owner is an inbound connections */
+ if (addr->ss_family == AF_UNIX)
+ ret = ltoa_o(sess->listener->luid, tmplog, dst + maxsize - tmplog);
+ else
+ ret = lf_port(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp);
+ }
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_BACKENDIP: // %bi
+ if (be_conn && conn_get_src(be_conn))
+ ret = lf_ip(tmplog, (const struct sockaddr *)be_conn->src, dst + maxsize - tmplog, tmp);
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_BACKENDPORT: // %bp
+ if (be_conn && conn_get_src(be_conn))
+ ret = lf_port(tmplog, (struct sockaddr *)be_conn->src, dst + maxsize - tmplog, tmp);
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_SERVERIP: // %si
+ if (be_conn && conn_get_dst(be_conn))
+ ret = lf_ip(tmplog, (struct sockaddr *)be_conn->dst, dst + maxsize - tmplog, tmp);
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_SERVERPORT: // %sp
+ if (be_conn && conn_get_dst(be_conn))
+ ret = lf_port(tmplog, (struct sockaddr *)be_conn->dst, dst + maxsize - tmplog, tmp);
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_DATE: // %t = accept date
+ get_localtime(logs->accept_date.tv_sec, &tm);
+ ret = date2str_log(tmplog, &tm, &logs->accept_date, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_tr: // %tr = start of request date
+ /* Note that the timers are valid if we get here */
+ tv_ms_add(&tv, &logs->accept_date, logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0);
+ get_localtime(tv.tv_sec, &tm);
+ ret = date2str_log(tmplog, &tm, &tv, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_DATEGMT: // %T = accept date, GMT
+ get_gmtime(logs->accept_date.tv_sec, &tm);
+ ret = gmt2str_log(tmplog, &tm, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_trg: // %trg = start of request date, GMT
+ tv_ms_add(&tv, &logs->accept_date, logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0);
+ get_gmtime(tv.tv_sec, &tm);
+ ret = gmt2str_log(tmplog, &tm, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_DATELOCAL: // %Tl = accept date, local
+ get_localtime(logs->accept_date.tv_sec, &tm);
+ ret = localdate2str_log(tmplog, logs->accept_date.tv_sec, &tm, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_trl: // %trl = start of request date, local
+ tv_ms_add(&tv, &logs->accept_date, logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0);
+ get_localtime(tv.tv_sec, &tm);
+ ret = localdate2str_log(tmplog, tv.tv_sec, &tm, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TS: // %Ts
+ if (tmp->options & LOG_OPT_HEXA) {
+ iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", (unsigned int)logs->accept_date.tv_sec);
+ if (iret < 0 || iret > dst + maxsize - tmplog)
+ goto out;
+ last_isspace = 0;
+ tmplog += iret;
+ } else {
+ ret = ltoa_o(logs->accept_date.tv_sec, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ }
+ break;
+
+ case LOG_FMT_MS: // %ms
+ if (tmp->options & LOG_OPT_HEXA) {
+ iret = snprintf(tmplog, dst + maxsize - tmplog, "%02X",(unsigned int)logs->accept_date.tv_usec/1000);
+ if (iret < 0 || iret > dst + maxsize - tmplog)
+ goto out;
+ last_isspace = 0;
+ tmplog += iret;
+ } else {
+ if ((dst + maxsize - tmplog) < 4)
+ goto out;
+ ret = utoa_pad((unsigned int)logs->accept_date.tv_usec/1000,
+ tmplog, 4);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ }
+ break;
+
+ case LOG_FMT_FRONTEND: // %f
+ src = fe->id;
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_FRONTEND_XPRT: // %ft
+ src = fe->id;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ iret = strlcpy2(tmplog, src, dst + maxsize - tmplog);
+ if (iret == 0)
+ goto out;
+ tmplog += iret;
+
+ /* sess->listener may be undefined if the session's owner is a health-check */
+ if (sess->listener && sess->listener->bind_conf->xprt->get_ssl_sock_ctx)
+ LOGCHAR('~');
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ last_isspace = 0;
+ break;
+#ifdef USE_OPENSSL
+ case LOG_FMT_SSL_CIPHER: // %sslc
+ src = NULL;
+ conn = objt_conn(sess->origin);
+ if (conn) {
+ src = ssl_sock_get_cipher_name(conn);
+ }
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_SSL_VERSION: // %sslv
+ src = NULL;
+ conn = objt_conn(sess->origin);
+ if (conn) {
+ src = ssl_sock_get_proto_version(conn);
+ }
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+#endif
+ case LOG_FMT_BACKEND: // %b
+ src = be->id;
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_SERVER: // %s
+ switch (obj_type(s ? s->target : sess->origin)) {
+ case OBJ_TYPE_SERVER:
+ src = __objt_server(s->target)->id;
+ break;
+ case OBJ_TYPE_APPLET:
+ src = __objt_applet(s->target)->name;
+ break;
+ case OBJ_TYPE_CHECK:
+ src = (__objt_check(sess->origin)->server
+ ? __objt_check(sess->origin)->server->id
+ : "<NOSRV>");
+ break;
+ default:
+ src = "<NOSRV>";
+ break;
+ }
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_Th: // %Th = handshake time
+ ret = ltoa_o(logs->t_handshake, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_Ti: // %Ti = HTTP idle time
+ ret = ltoa_o(logs->t_idle, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TR: // %TR = HTTP request time
+ ret = ltoa_o((t_request >= 0) ? t_request - logs->t_idle - logs->t_handshake : -1,
+ tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TQ: // %Tq = Th + Ti + TR
+ ret = ltoa_o(t_request, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TW: // %Tw
+ ret = ltoa_o((logs->t_queue >= 0) ? logs->t_queue - t_request : -1,
+ tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TC: // %Tc
+ ret = ltoa_o((logs->t_connect >= 0) ? logs->t_connect - logs->t_queue : -1,
+ tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_Tr: // %Tr
+ ret = ltoa_o((logs->t_data >= 0) ? logs->t_data - logs->t_connect : -1,
+ tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TD: // %Td
+ if (be->mode == PR_MODE_HTTP)
+ ret = ltoa_o((logs->t_data >= 0) ? logs->t_close - logs->t_data : -1,
+ tmplog, dst + maxsize - tmplog);
+ else
+ ret = ltoa_o((logs->t_connect >= 0) ? logs->t_close - logs->t_connect : -1,
+ tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_Ta: // %Ta = active time = Tt - Th - Ti
+ if (!(fe->to_log & LW_BYTES))
+ LOGCHAR('+');
+ ret = ltoa_o(logs->t_close - (logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0),
+ tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TT: // %Tt = total time
+ if (!(fe->to_log & LW_BYTES))
+ LOGCHAR('+');
+ ret = ltoa_o(logs->t_close, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TU: // %Tu = total time seen by user = Tt - Ti
+ if (!(fe->to_log & LW_BYTES))
+ LOGCHAR('+');
+ ret = ltoa_o(logs->t_close - (logs->t_idle >= 0 ? logs->t_idle : 0),
+ tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_STATUS: // %ST
+ ret = ltoa_o(status, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_BYTES: // %B
+ if (!(fe->to_log & LW_BYTES))
+ LOGCHAR('+');
+ ret = lltoa(logs->bytes_out, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_BYTES_UP: // %U
+ ret = lltoa(logs->bytes_in, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_CCLIENT: // %CC
+ src = txn ? txn->cli_cookie : NULL;
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_CSERVER: // %CS
+ src = txn ? txn->srv_cookie : NULL;
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TERMSTATE: // %ts
+ LOGCHAR(sess_term_cond[(s_flags & SF_ERR_MASK) >> SF_ERR_SHIFT]);
+ LOGCHAR(sess_fin_state[(s_flags & SF_FINST_MASK) >> SF_FINST_SHIFT]);
+ *tmplog = '\0';
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TERMSTATE_CK: // %tsc, same as TS with cookie state (for mode HTTP)
+ LOGCHAR(sess_term_cond[(s_flags & SF_ERR_MASK) >> SF_ERR_SHIFT]);
+ LOGCHAR(sess_fin_state[(s_flags & SF_FINST_MASK) >> SF_FINST_SHIFT]);
+ LOGCHAR((txn && (be->ck_opts & PR_CK_ANY)) ? sess_cookie[(txn->flags & TX_CK_MASK) >> TX_CK_SHIFT] : '-');
+ LOGCHAR((txn && (be->ck_opts & PR_CK_ANY)) ? sess_set_cookie[(txn->flags & TX_SCK_MASK) >> TX_SCK_SHIFT] : '-');
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_ACTCONN: // %ac
+ ret = ltoa_o(actconn, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_FECONN: // %fc
+ ret = ltoa_o(fe->feconn, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_BECONN: // %bc
+ ret = ltoa_o(be->beconn, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_SRVCONN: // %sc
+ switch (obj_type(s ? s->target : sess->origin)) {
+ case OBJ_TYPE_SERVER:
+ ret = ultoa_o(__objt_server(s->target)->cur_sess,
+ tmplog, dst + maxsize - tmplog);
+ break;
+ case OBJ_TYPE_CHECK:
+ ret = ultoa_o(__objt_check(sess->origin)->server
+ ? __objt_check(sess->origin)->server->cur_sess
+ : 0, tmplog, dst + maxsize - tmplog);
+ break;
+ default:
+ ret = ultoa_o(0, tmplog, dst + maxsize - tmplog);
+ break;
+ }
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_RETRIES: // %rq
+ if (s_flags & SF_REDISP)
+ LOGCHAR('+');
+ ret = ltoa_o((s ? s->conn_retries : 0), tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_SRVQUEUE: // %sq
+ ret = ltoa_o(logs->srv_queue_pos, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_BCKQUEUE: // %bq
+ ret = ltoa_o(logs->prx_queue_pos, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HDRREQUEST: // %hr
+ /* request header */
+ if (fe->nb_req_cap && s && s->req_cap) {
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ LOGCHAR('{');
+ for (hdr = 0; hdr < fe->nb_req_cap; hdr++) {
+ if (hdr)
+ LOGCHAR('|');
+ if (s->req_cap[hdr] != NULL) {
+ ret = lf_encode_string(tmplog, dst + maxsize,
+ '#', hdr_encode_map, s->req_cap[hdr], tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+ tmplog = ret;
+ }
+ }
+ LOGCHAR('}');
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ last_isspace = 0;
+ }
+ break;
+
+ case LOG_FMT_HDRREQUESTLIST: // %hrl
+ /* request header list */
+ if (fe->nb_req_cap && s && s->req_cap) {
+ for (hdr = 0; hdr < fe->nb_req_cap; hdr++) {
+ if (hdr > 0)
+ LOGCHAR(' ');
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ if (s->req_cap[hdr] != NULL) {
+ ret = lf_encode_string(tmplog, dst + maxsize,
+ '#', hdr_encode_map, s->req_cap[hdr], tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+ tmplog = ret;
+ } else if (!(tmp->options & LOG_OPT_QUOTE))
+ LOGCHAR('-');
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ last_isspace = 0;
+ }
+ }
+ break;
+
+
+ case LOG_FMT_HDRRESPONS: // %hs
+ /* response header */
+ if (fe->nb_rsp_cap && s && s->res_cap) {
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ LOGCHAR('{');
+ for (hdr = 0; hdr < fe->nb_rsp_cap; hdr++) {
+ if (hdr)
+ LOGCHAR('|');
+ if (s->res_cap[hdr] != NULL) {
+ ret = lf_encode_string(tmplog, dst + maxsize,
+ '#', hdr_encode_map, s->res_cap[hdr], tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+ tmplog = ret;
+ }
+ }
+ LOGCHAR('}');
+ last_isspace = 0;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ }
+ break;
+
+ case LOG_FMT_HDRRESPONSLIST: // %hsl
+ /* response header list */
+ if (fe->nb_rsp_cap && s && s->res_cap) {
+ for (hdr = 0; hdr < fe->nb_rsp_cap; hdr++) {
+ if (hdr > 0)
+ LOGCHAR(' ');
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ if (s->res_cap[hdr] != NULL) {
+ ret = lf_encode_string(tmplog, dst + maxsize,
+ '#', hdr_encode_map, s->res_cap[hdr], tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+ tmplog = ret;
+ } else if (!(tmp->options & LOG_OPT_QUOTE))
+ LOGCHAR('-');
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ last_isspace = 0;
+ }
+ }
+ break;
+
+ case LOG_FMT_REQ: // %r
+ /* Request */
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ uri = txn && txn->uri ? txn->uri : "<BADREQ>";
+ ret = lf_encode_string(tmplog, dst + maxsize,
+ '#', url_encode_map, uri, tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+ tmplog = ret;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HTTP_PATH: // %HP
+ uri = txn && txn->uri ? txn->uri : "<BADREQ>";
+
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ end = uri + strlen(uri);
+ // look for the first whitespace character
+ while (uri < end && !HTTP_IS_SPHT(*uri))
+ uri++;
+
+ // keep advancing past multiple spaces
+ while (uri < end && HTTP_IS_SPHT(*uri)) {
+ uri++; nspaces++;
+ }
+
+ // look for first space or question mark after url
+ spc = uri;
+ while (spc < end && *spc != '?' && !HTTP_IS_SPHT(*spc))
+ spc++;
+
+ if (!txn || !txn->uri || nspaces == 0) {
+ chunk.area = "<BADREQ>";
+ chunk.data = strlen("<BADREQ>");
+ } else {
+ chunk.area = uri;
+ chunk.data = spc - uri;
+ }
+
+ ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ tmplog = ret;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HTTP_PATH_ONLY: // %HPO
+ uri = txn && txn->uri ? txn->uri : "<BADREQ>";
+
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ end = uri + strlen(uri);
+
+ // look for the first whitespace character
+ while (uri < end && !HTTP_IS_SPHT(*uri))
+ uri++;
+
+ // keep advancing past multiple spaces
+ while (uri < end && HTTP_IS_SPHT(*uri)) {
+ uri++; nspaces++;
+ }
+
+ // look for first space after url
+ spc = uri;
+ while (spc < end && !HTTP_IS_SPHT(*spc))
+ spc++;
+
+ path = ist2(uri, spc - uri);
+
+ // extract relative path without query params from url
+ parser = http_uri_parser_init(path);
+ path = iststop(http_parse_path(&parser), '?');
+ if (!txn || !txn->uri || nspaces == 0) {
+ chunk.area = "<BADREQ>";
+ chunk.data = strlen("<BADREQ>");
+ } else {
+ chunk.area = path.ptr;
+ chunk.data = path.len;
+ }
+
+ ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ tmplog = ret;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HTTP_QUERY: // %HQ
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ if (!txn || !txn->uri) {
+ chunk.area = "<BADREQ>";
+ chunk.data = strlen("<BADREQ>");
+ } else {
+ uri = txn->uri;
+ end = uri + strlen(uri);
+ // look for the first question mark
+ while (uri < end && *uri != '?')
+ uri++;
+
+ qmark = uri;
+ // look for first space or question mark after url
+ while (uri < end && !HTTP_IS_SPHT(*uri))
+ uri++;
+
+ chunk.area = qmark;
+ chunk.data = uri - qmark;
+ }
+
+ ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ tmplog = ret;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HTTP_URI: // %HU
+ uri = txn && txn->uri ? txn->uri : "<BADREQ>";
+
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ end = uri + strlen(uri);
+ // look for the first whitespace character
+ while (uri < end && !HTTP_IS_SPHT(*uri))
+ uri++;
+
+ // keep advancing past multiple spaces
+ while (uri < end && HTTP_IS_SPHT(*uri)) {
+ uri++; nspaces++;
+ }
+
+ // look for first space after url
+ spc = uri;
+ while (spc < end && !HTTP_IS_SPHT(*spc))
+ spc++;
+
+ if (!txn || !txn->uri || nspaces == 0) {
+ chunk.area = "<BADREQ>";
+ chunk.data = strlen("<BADREQ>");
+ } else {
+ chunk.area = uri;
+ chunk.data = spc - uri;
+ }
+
+ ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ tmplog = ret;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HTTP_METHOD: // %HM
+ uri = txn && txn->uri ? txn->uri : "<BADREQ>";
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ end = uri + strlen(uri);
+ // look for the first whitespace character
+ spc = uri;
+ while (spc < end && !HTTP_IS_SPHT(*spc))
+ spc++;
+
+ if (spc == end) { // odd case, we have txn->uri, but we only got a verb
+ chunk.area = "<BADREQ>";
+ chunk.data = strlen("<BADREQ>");
+ } else {
+ chunk.area = uri;
+ chunk.data = spc - uri;
+ }
+
+ ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ tmplog = ret;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HTTP_VERSION: // %HV
+ uri = txn && txn->uri ? txn->uri : "<BADREQ>";
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ end = uri + strlen(uri);
+ // look for the first whitespace character
+ while (uri < end && !HTTP_IS_SPHT(*uri))
+ uri++;
+
+ // keep advancing past multiple spaces
+ while (uri < end && HTTP_IS_SPHT(*uri)) {
+ uri++; nspaces++;
+ }
+
+ // look for the next whitespace character
+ while (uri < end && !HTTP_IS_SPHT(*uri))
+ uri++;
+
+ // keep advancing past multiple spaces
+ while (uri < end && HTTP_IS_SPHT(*uri))
+ uri++;
+
+ if (!txn || !txn->uri || nspaces == 0) {
+ chunk.area = "<BADREQ>";
+ chunk.data = strlen("<BADREQ>");
+ } else if (uri == end) {
+ chunk.area = "HTTP/0.9";
+ chunk.data = strlen("HTTP/0.9");
+ } else {
+ chunk.area = uri;
+ chunk.data = end - uri;
+ }
+
+ ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ tmplog = ret;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_COUNTER: // %rt
+ if (tmp->options & LOG_OPT_HEXA) {
+ iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", uniq_id);
+ if (iret < 0 || iret > dst + maxsize - tmplog)
+ goto out;
+ last_isspace = 0;
+ tmplog += iret;
+ } else {
+ ret = ltoa_o(uniq_id, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ }
+ break;
+
+ case LOG_FMT_LOGCNT: // %lc
+ if (tmp->options & LOG_OPT_HEXA) {
+ iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", fe->log_count);
+ if (iret < 0 || iret > dst + maxsize - tmplog)
+ goto out;
+ last_isspace = 0;
+ tmplog += iret;
+ } else {
+ ret = ultoa_o(fe->log_count, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ }
+ break;
+
+ case LOG_FMT_HOSTNAME: // %H
+ src = hostname;
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_PID: // %pid
+ if (tmp->options & LOG_OPT_HEXA) {
+ iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", pid);
+ if (iret < 0 || iret > dst + maxsize - tmplog)
+ goto out;
+ last_isspace = 0;
+ tmplog += iret;
+ } else {
+ ret = ltoa_o(pid, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ }
+ break;
+
+ case LOG_FMT_UNIQUEID: // %ID
+ ret = NULL;
+ if (s)
+ ret = lf_text_len(tmplog, s->unique_id.ptr, s->unique_id.len, maxsize - (tmplog - dst), tmp);
+ else
+ ret = lf_text_len(tmplog, NULL, 0, maxsize - (tmplog - dst), tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ }
+ }
+
+out:
+ /* *tmplog is a unused character */
+ *tmplog = '\0';
+ return tmplog - dst;
+
+}
+
+/*
+ * send a log for the stream when we have enough info about it.
+ * Will not log if the frontend has no log defined.
+ */
+void strm_log(struct stream *s)
+{
+ struct session *sess = s->sess;
+ int size, err, level;
+ int sd_size = 0;
+
+ /* if we don't want to log normal traffic, return now */
+ err = (s->flags & SF_REDISP) ||
+ ((s->flags & SF_ERR_MASK) > SF_ERR_LOCAL) ||
+ (((s->flags & SF_ERR_MASK) == SF_ERR_NONE) && s->conn_retries) ||
+ ((sess->fe->mode == PR_MODE_HTTP) && s->txn && s->txn->status >= 500);
+
+ if (!err && (sess->fe->options2 & PR_O2_NOLOGNORM))
+ return;
+
+ if (LIST_ISEMPTY(&sess->fe->logsrvs))
+ return;
+
+ if (s->logs.level) { /* loglevel was overridden */
+ if (s->logs.level == -1) {
+ s->logs.logwait = 0; /* logs disabled */
+ return;
+ }
+ level = s->logs.level - 1;
+ }
+ else {
+ level = LOG_INFO;
+ if (err && (sess->fe->options2 & PR_O2_LOGERRORS))
+ level = LOG_ERR;
+ }
+
+ /* if unique-id was not generated */
+ if (!isttest(s->unique_id) && !LIST_ISEMPTY(&sess->fe->format_unique_id)) {
+ stream_generate_unique_id(s, &sess->fe->format_unique_id);
+ }
+
+ if (!LIST_ISEMPTY(&sess->fe->logformat_sd)) {
+ sd_size = build_logline(s, logline_rfc5424, global.max_syslog_len,
+ &sess->fe->logformat_sd);
+ }
+
+ size = build_logline(s, logline, global.max_syslog_len, &sess->fe->logformat);
+ if (size > 0) {
+ _HA_ATOMIC_INC(&sess->fe->log_count);
+ __send_log(&sess->fe->logsrvs, &sess->fe->log_tag, level,
+ logline, size + 1, logline_rfc5424, sd_size);
+ s->logs.logwait = 0;
+ }
+}
+
+/*
+ * send a minimalist log for the session. Will not log if the frontend has no
+ * log defined. It is assumed that this is only used to report anomalies that
+ * cannot lead to the creation of a regular stream. Because of this the log
+ * level is LOG_INFO or LOG_ERR depending on the "log-separate-error" setting
+ * in the frontend. The caller must simply know that it should not call this
+ * function to report unimportant events. It is safe to call this function with
+ * sess==NULL (will not do anything).
+ */
+void sess_log(struct session *sess)
+{
+ int size, level;
+ int sd_size = 0;
+
+ if (!sess)
+ return;
+
+ if (LIST_ISEMPTY(&sess->fe->logsrvs))
+ return;
+
+ level = LOG_INFO;
+ if (sess->fe->options2 & PR_O2_LOGERRORS)
+ level = LOG_ERR;
+
+ if (!LIST_ISEMPTY(&sess->fe->logformat_sd)) {
+ sd_size = sess_build_logline(sess, NULL,
+ logline_rfc5424, global.max_syslog_len,
+ &sess->fe->logformat_sd);
+ }
+
+ if (!LIST_ISEMPTY(&sess->fe->logformat_error))
+ size = sess_build_logline(sess, NULL, logline, global.max_syslog_len, &sess->fe->logformat_error);
+ else
+ size = sess_build_logline(sess, NULL, logline, global.max_syslog_len, &sess->fe->logformat);
+ if (size > 0) {
+ _HA_ATOMIC_INC(&sess->fe->log_count);
+ __send_log(&sess->fe->logsrvs, &sess->fe->log_tag, level,
+ logline, size + 1, logline_rfc5424, sd_size);
+ }
+}
+
+void app_log(struct list *logsrvs, struct buffer *tag, int level, const char *format, ...)
+{
+ va_list argp;
+ int data_len;
+
+ if (level < 0 || format == NULL || logline == NULL)
+ return;
+
+ va_start(argp, format);
+ data_len = vsnprintf(logline, global.max_syslog_len, format, argp);
+ if (data_len < 0 || data_len > global.max_syslog_len)
+ data_len = global.max_syslog_len;
+ va_end(argp);
+
+ __send_log(logsrvs, tag, level, logline, data_len, default_rfc5424_sd_log_format, 2);
+}
+/*
+ * This function parse a received log message <buf>, of size <buflen>
+ * it fills <level>, <facility> and <metadata> depending of the detected
+ * header format and message will point on remaining payload of <size>
+ *
+ * <metadata> must point on a preallocated array of LOG_META_FIELDS*sizeof(struct ist)
+ * struct ist len will be set to 0 if field is not found
+ * <level> and <facility> will be set to -1 if not found.
+ */
+void parse_log_message(char *buf, size_t buflen, int *level, int *facility,
+ struct ist *metadata, char **message, size_t *size)
+{
+
+ char *p;
+ int fac_level = 0;
+
+ *level = *facility = -1;
+
+ *message = buf;
+ *size = buflen;
+
+ memset(metadata, 0, LOG_META_FIELDS*sizeof(struct ist));
+
+ p = buf;
+ if (*size < 2 || *p != '<')
+ return;
+
+ p++;
+ while (*p != '>') {
+ if (*p > '9' || *p < '0')
+ return;
+ fac_level = 10*fac_level + (*p - '0');
+ p++;
+ if ((p - buf) > buflen)
+ return;
+ }
+
+ *facility = fac_level >> 3;
+ *level = fac_level & 0x7;
+ p++;
+
+ metadata[LOG_META_PRIO] = ist2(buf, p - buf);
+
+ buflen -= p - buf;
+ buf = p;
+
+ *size = buflen;
+ *message = buf;
+
+ /* for rfc5424, prio is always followed by '1' and ' ' */
+ if ((*size > 2) && (p[0] == '1') && (p[1] == ' ')) {
+ /* format is always '1 TIMESTAMP HOSTNAME TAG PID MSGID STDATA '
+ * followed by message.
+ * Each header field can present NILVALUE: '-'
+ */
+
+ p += 2;
+ *size -= 2;
+ /* timestamp is NILVALUE '-' */
+ if (*size > 2 && (p[0] == '-') && p[1] == ' ') {
+ metadata[LOG_META_TIME] = ist2(p, 1);
+ p++;
+ }
+ else if (*size > LOG_ISOTIME_MINLEN) {
+ metadata[LOG_META_TIME].ptr = p;
+
+ /* check if optional secfrac is present
+ * in timestamp.
+ * possible format are:
+ * ex: '1970-01-01T00:00:00.000000Z'
+ * '1970-01-01T00:00:00.000000+00:00'
+ * '1970-01-01T00:00:00.000000-00:00'
+ * '1970-01-01T00:00:00Z'
+ * '1970-01-01T00:00:00+00:00'
+ * '1970-01-01T00:00:00-00:00'
+ */
+ p += 19;
+ if (*p == '.') {
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ while (*p != 'Z' && *p != '+' && *p != '-') {
+ if ((unsigned char)(*p - '0') > 9)
+ goto bad_format;
+
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ }
+ }
+
+ if (*p == 'Z')
+ p++;
+ else
+ p += 6; /* case of '+00:00 or '-00:00' */
+
+ if ((p - buf) >= buflen || *p != ' ')
+ goto bad_format;
+ metadata[LOG_META_TIME].len = p - metadata[LOG_META_TIME].ptr;
+ }
+ else
+ goto bad_format;
+
+
+ p++;
+ if ((p - buf) >= buflen || *p == ' ')
+ goto bad_format;
+
+ metadata[LOG_META_HOST].ptr = p;
+ while (*p != ' ') {
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ }
+ metadata[LOG_META_HOST].len = p - metadata[LOG_META_HOST].ptr;
+ if (metadata[LOG_META_HOST].len == 1 && metadata[LOG_META_HOST].ptr[0] == '-')
+ metadata[LOG_META_HOST].len = 0;
+
+ p++;
+ if ((p - buf) >= buflen || *p == ' ')
+ goto bad_format;
+
+ metadata[LOG_META_TAG].ptr = p;
+ while (*p != ' ') {
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ }
+ metadata[LOG_META_TAG].len = p - metadata[LOG_META_TAG].ptr;
+ if (metadata[LOG_META_TAG].len == 1 && metadata[LOG_META_TAG].ptr[0] == '-')
+ metadata[LOG_META_TAG].len = 0;
+
+ p++;
+ if ((p - buf) >= buflen || *p == ' ')
+ goto bad_format;
+
+ metadata[LOG_META_PID].ptr = p;
+ while (*p != ' ') {
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ }
+ metadata[LOG_META_PID].len = p - metadata[LOG_META_PID].ptr;
+ if (metadata[LOG_META_PID].len == 1 && metadata[LOG_META_PID].ptr[0] == '-')
+ metadata[LOG_META_PID].len = 0;
+
+ p++;
+ if ((p - buf) >= buflen || *p == ' ')
+ goto bad_format;
+
+ metadata[LOG_META_MSGID].ptr = p;
+ while (*p != ' ') {
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ }
+ metadata[LOG_META_MSGID].len = p - metadata[LOG_META_MSGID].ptr;
+ if (metadata[LOG_META_MSGID].len == 1 && metadata[LOG_META_MSGID].ptr[0] == '-')
+ metadata[LOG_META_MSGID].len = 0;
+
+ p++;
+ if ((p - buf) >= buflen || *p == ' ')
+ goto bad_format;
+
+ /* structured data format is:
+ * ex:
+ * '[key1=value1 key2=value2][key3=value3]'
+ *
+ * space is invalid outside [] because
+ * considered as the end of structured data field
+ */
+ metadata[LOG_META_STDATA].ptr = p;
+ if (*p == '[') {
+ int elem = 0;
+
+ while (1) {
+ if (elem) {
+ /* according to rfc this char is escaped in param values */
+ if (*p == ']' && *(p-1) != '\\')
+ elem = 0;
+ }
+ else {
+ if (*p == '[')
+ elem = 1;
+ else if (*p == ' ')
+ break;
+ else
+ goto bad_format;
+ }
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ }
+ }
+ else if (*p == '-') {
+ /* case of NILVALUE */
+ p++;
+ if ((p - buf) >= buflen || *p != ' ')
+ goto bad_format;
+ }
+ else
+ goto bad_format;
+
+ metadata[LOG_META_STDATA].len = p - metadata[LOG_META_STDATA].ptr;
+ if (metadata[LOG_META_STDATA].len == 1 && metadata[LOG_META_STDATA].ptr[0] == '-')
+ metadata[LOG_META_STDATA].len = 0;
+
+ p++;
+
+ buflen -= p - buf;
+ buf = p;
+
+ *size = buflen;
+ *message = p;
+ }
+ else if (*size > LOG_LEGACYTIME_LEN) {
+ int m;
+
+ /* supported header format according to rfc3164.
+ * ex:
+ * 'Jan 1 00:00:00 HOSTNAME TAG[PID]: '
+ * or 'Jan 1 00:00:00 HOSTNAME TAG: '
+ * or 'Jan 1 00:00:00 HOSTNAME '
+ * Note: HOSTNAME is mandatory, and day
+ * of month uses a single space prefix if
+ * less than 10 to ensure hour offset is
+ * always the same.
+ */
+
+ /* Check month to see if it correspond to a rfc3164
+ * header ex 'Jan 1 00:00:00' */
+ for (m = 0; m < 12; m++)
+ if (!memcmp(monthname[m], p, 3))
+ break;
+ /* Month not found */
+ if (m == 12)
+ goto bad_format;
+
+ metadata[LOG_META_TIME] = ist2(p, LOG_LEGACYTIME_LEN);
+
+ p += LOG_LEGACYTIME_LEN;
+ if ((p - buf) >= buflen || *p != ' ')
+ goto bad_format;
+
+ p++;
+ if ((p - buf) >= buflen || *p == ' ')
+ goto bad_format;
+
+ metadata[LOG_META_HOST].ptr = p;
+ while (*p != ' ') {
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ }
+ metadata[LOG_META_HOST].len = p - metadata[LOG_META_HOST].ptr;
+
+ /* TAG seems to no be mandatory */
+ p++;
+
+ buflen -= p - buf;
+ buf = p;
+
+ *size = buflen;
+ *message = buf;
+
+ if (!buflen)
+ return;
+
+ while (((p - buf) < buflen) && *p != ' ' && *p != ':')
+ p++;
+
+ /* a tag must present a trailing ':' */
+ if (((p - buf) >= buflen) || *p != ':')
+ return;
+ p++;
+ /* followed by a space */
+ if (((p - buf) >= buflen) || *p != ' ')
+ return;
+
+ /* rewind to parse tag and pid */
+ p = buf;
+ metadata[LOG_META_TAG].ptr = p;
+ /* we have the guarantee that ':' will be reach before size limit */
+ while (*p != ':') {
+ if (*p == '[') {
+ metadata[LOG_META_TAG].len = p - metadata[LOG_META_TAG].ptr;
+ metadata[LOG_META_PID].ptr = p + 1;
+ }
+ else if (*p == ']' && isttest(metadata[LOG_META_PID])) {
+ if (p[1] != ':')
+ return;
+ metadata[LOG_META_PID].len = p - metadata[LOG_META_PID].ptr;
+ }
+ p++;
+ }
+ if (!metadata[LOG_META_TAG].len)
+ metadata[LOG_META_TAG].len = p - metadata[LOG_META_TAG].ptr;
+
+ /* let pass ':' and ' ', we still have warranty size is large enough */
+ p += 2;
+
+ buflen -= p - buf;
+ buf = p;
+
+ *size = buflen;
+ *message = buf;
+ }
+
+ return;
+
+bad_format:
+ /* bad syslog format, we reset all parsed syslog fields
+ * but priority is kept because we are able to re-build
+ * this message using LOF_FORMAT_PRIO.
+ */
+ metadata[LOG_META_TIME].len = 0;
+ metadata[LOG_META_HOST].len = 0;
+ metadata[LOG_META_TAG].len = 0;
+ metadata[LOG_META_PID].len = 0;
+ metadata[LOG_META_MSGID].len = 0;
+ metadata[LOG_META_STDATA].len = 0;
+
+ return;
+}
+
+/*
+ * UDP syslog fd handler
+ */
+void syslog_fd_handler(int fd)
+{
+ static THREAD_LOCAL struct ist metadata[LOG_META_FIELDS];
+ ssize_t ret = 0;
+ struct buffer *buf = get_trash_chunk();
+ size_t size;
+ char *message;
+ int level;
+ int facility;
+ struct listener *l = objt_listener(fdtab[fd].owner);
+ int max_accept;
+
+ BUG_ON(!l);
+
+ if (fdtab[fd].state & FD_POLL_IN) {
+
+ if (!fd_recv_ready(fd))
+ return;
+
+ max_accept = l->maxaccept ? l->maxaccept : 1;
+
+ do {
+ /* Source address */
+ struct sockaddr_storage saddr = {0};
+ socklen_t saddrlen;
+
+ saddrlen = sizeof(saddr);
+
+ ret = recvfrom(fd, buf->area, buf->size, 0, (struct sockaddr *)&saddr, &saddrlen);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ fd_cant_recv(fd);
+ goto out;
+ }
+ buf->data = ret;
+
+ /* update counters */
+ _HA_ATOMIC_INC(&cum_log_messages);
+ proxy_inc_fe_req_ctr(l, l->bind_conf->frontend);
+
+ parse_log_message(buf->area, buf->data, &level, &facility, metadata, &message, &size);
+
+ process_send_log(&l->bind_conf->frontend->logsrvs, level, facility, metadata, message, size);
+
+ } while (--max_accept);
+ }
+
+out:
+ return;
+}
+
+/*
+ * IO Handler to handle message exchange with a syslog tcp client
+ */
+static void syslog_io_handler(struct appctx *appctx)
+{
+ static THREAD_LOCAL struct ist metadata[LOG_META_FIELDS];
+ struct stconn *sc = appctx_sc(appctx);
+ struct stream *s = __sc_strm(sc);
+ struct proxy *frontend = strm_fe(s);
+ struct listener *l = strm_li(s);
+ struct buffer *buf = get_trash_chunk();
+ int max_accept;
+ int to_skip;
+ int facility;
+ int level;
+ char *message;
+ size_t size;
+
+ max_accept = l->maxaccept ? l->maxaccept : 1;
+ while (co_data(sc_oc(sc))) {
+ char c;
+
+ if (max_accept <= 0)
+ goto missing_budget;
+ max_accept--;
+
+ to_skip = co_getchar(sc_oc(sc), &c);
+ if (!to_skip)
+ goto missing_data;
+ else if (to_skip < 0)
+ goto cli_abort;
+
+ if (c == '<') {
+ /* rfc-6587, Non-Transparent-Framing: messages separated by
+ * a trailing LF or CR LF
+ */
+ to_skip = co_getline(sc_oc(sc), buf->area, buf->size);
+ if (!to_skip)
+ goto missing_data;
+ else if (to_skip < 0)
+ goto cli_abort;
+
+ if (buf->area[to_skip - 1] != '\n')
+ goto parse_error;
+
+ buf->data = to_skip - 1;
+
+ /* according to rfc-6587, some devices adds CR before LF */
+ if (buf->data && buf->area[buf->data - 1] == '\r')
+ buf->data--;
+
+ }
+ else if ((unsigned char)(c - '1') <= 8) {
+ /* rfc-6587, Octet-Counting: message length in ASCII
+ * (first digit can not be ZERO), followed by a space
+ * and message length
+ */
+ char *p = NULL;
+ int msglen;
+
+ to_skip = co_getword(sc_oc(sc), buf->area, buf->size, ' ');
+ if (!to_skip)
+ goto missing_data;
+ else if (to_skip < 0)
+ goto cli_abort;
+
+ if (buf->area[to_skip - 1] != ' ')
+ goto parse_error;
+
+ msglen = strtol(buf->area, &p, 10);
+ if (!msglen || p != &buf->area[to_skip - 1])
+ goto parse_error;
+
+ /* message seems too large */
+ if (msglen > buf->size)
+ goto parse_error;
+
+ msglen = co_getblk(sc_oc(sc), buf->area, msglen, to_skip);
+ if (!msglen)
+ goto missing_data;
+ else if (msglen < 0)
+ goto cli_abort;
+
+
+ buf->data = msglen;
+ to_skip += msglen;
+ }
+ else
+ goto parse_error;
+
+ co_skip(sc_oc(sc), to_skip);
+
+ /* update counters */
+ _HA_ATOMIC_INC(&cum_log_messages);
+ proxy_inc_fe_req_ctr(l, frontend);
+
+ parse_log_message(buf->area, buf->data, &level, &facility, metadata, &message, &size);
+
+ process_send_log(&frontend->logsrvs, level, facility, metadata, message, size);
+
+ }
+
+missing_data:
+ /* we need more data to read */
+ sc_oc(sc)->flags |= CF_READ_DONTWAIT;
+
+ return;
+
+missing_budget:
+ /* it may remain some stuff to do, let's retry later */
+ appctx_wakeup(appctx);
+
+ return;
+
+parse_error:
+ if (l->counters)
+ _HA_ATOMIC_INC(&l->counters->failed_req);
+ _HA_ATOMIC_INC(&frontend->fe_counters.failed_req);
+
+ goto close;
+
+cli_abort:
+ if (l->counters)
+ _HA_ATOMIC_INC(&l->counters->cli_aborts);
+ _HA_ATOMIC_INC(&frontend->fe_counters.cli_aborts);
+
+close:
+ sc_shutw(sc);
+ sc_shutr(sc);
+
+ sc_ic(sc)->flags |= CF_READ_NULL;
+
+ return;
+}
+
+static struct applet syslog_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<SYSLOG>", /* used for logging */
+ .fct = syslog_io_handler,
+ .release = NULL,
+};
+
+/*
+ * Parse "log-forward" section and create corresponding sink buffer.
+ *
+ * The function returns 0 in success case, otherwise, it returns error
+ * flags.
+ */
+int cfg_parse_log_forward(const char *file, int linenum, char **args, int kwm)
+{
+ int err_code = 0;
+ struct proxy *px;
+ char *errmsg = NULL;
+ const char *err = NULL;
+
+ if (strcmp(args[0], "log-forward") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for ip-forward section.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ px = log_forward_by_name(args[1]);
+ if (px) {
+ ha_alert("Parsing [%s:%d]: log-forward section '%s' has the same name as another log-forward section declared at %s:%d.\n",
+ file, linenum, args[1], px->conf.file, px->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ px = proxy_find_by_name(args[1], 0, 0);
+ if (px) {
+ ha_alert("Parsing [%s:%d]: log forward section '%s' has the same name as %s '%s' declared at %s:%d.\n",
+ file, linenum, args[1], proxy_type_str(px),
+ px->id, px->conf.file, px->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ px = calloc(1, sizeof *px);
+ if (!px) {
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ init_new_proxy(px);
+ px->next = cfg_log_forward;
+ cfg_log_forward = px;
+ px->conf.file = strdup(file);
+ px->conf.line = linenum;
+ px->mode = PR_MODE_SYSLOG;
+ px->last_change = now.tv_sec;
+ px->cap = PR_CAP_FE;
+ px->maxconn = 10;
+ px->timeout.client = TICK_ETERNITY;
+ px->accept = frontend_accept;
+ px->default_target = &syslog_applet.obj_type;
+ px->id = strdup(args[1]);
+
+ }
+ else if (strcmp(args[0], "maxconn") == 0) { /* maxconn */
+ if (warnifnotcap(cfg_log_forward, PR_CAP_FE, file, linenum, args[0], " Maybe you want 'fullconn' instead ?"))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ cfg_log_forward->maxconn = atol(args[1]);
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[0], "backlog") == 0) { /* backlog */
+ if (warnifnotcap(cfg_log_forward, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ cfg_log_forward->backlog = atol(args[1]);
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[0], "bind") == 0) {
+ int cur_arg;
+ struct bind_conf *bind_conf;
+ struct listener *l;
+ int ret;
+
+ cur_arg = 1;
+
+ bind_conf = bind_conf_alloc(cfg_log_forward, file, linenum,
+ NULL, xprt_get(XPRT_RAW));
+ if (!bind_conf) {
+ ha_alert("parsing [%s:%d] : out of memory error.", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!str2listener(args[1], cfg_log_forward, bind_conf, file, linenum, &errmsg)) {
+ if (errmsg && *errmsg) {
+ indent_msg(&errmsg, 2);
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n", file, linenum, args[0], args[1], errmsg);
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s %s' : error encountered while parsing listening address %s.\n",
+ file, linenum, args[0], args[1], args[2]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ list_for_each_entry(l, &bind_conf->listeners, by_bind) {
+ l->maxaccept = global.tune.maxaccept ? global.tune.maxaccept : MAX_ACCEPT;
+ l->accept = session_accept_fd;
+ l->analysers |= cfg_log_forward->fe_req_ana;
+ l->default_target = cfg_log_forward->default_target;
+ global.maxsock++;
+ }
+ cur_arg++;
+
+ ret = bind_parse_args_list(bind_conf, args, cur_arg, cursection, file, linenum);
+ err_code |= ret;
+ if (ret != 0) {
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "dgram-bind") == 0) {
+ int cur_arg;
+ struct bind_conf *bind_conf;
+ struct bind_kw *kw;
+ struct listener *l;
+
+ cur_arg = 1;
+
+ bind_conf = bind_conf_alloc(cfg_log_forward, file, linenum,
+ NULL, xprt_get(XPRT_RAW));
+ if (!bind_conf) {
+ ha_alert("parsing [%s:%d] : out of memory error.", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!str2receiver(args[1], cfg_log_forward, bind_conf, file, linenum, &errmsg)) {
+ if (errmsg && *errmsg) {
+ indent_msg(&errmsg, 2);
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n", file, linenum, args[0], args[1], errmsg);
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s %s' : error encountered while parsing listening address %s.\n",
+ file, linenum, args[0], args[1], args[2]);
+ }
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ list_for_each_entry(l, &bind_conf->listeners, by_bind) {
+ /* the fact that the sockets are of type dgram is guaranteed by str2receiver() */
+ l->maxaccept = global.tune.maxaccept ? global.tune.maxaccept : MAX_ACCEPT;
+ l->rx.iocb = syslog_fd_handler;
+ global.maxsock++;
+ }
+ cur_arg++;
+
+ while (*args[cur_arg] && (kw = bind_find_kw(args[cur_arg]))) {
+ int ret;
+
+ ret = kw->parse(args, cur_arg, cfg_log_forward, bind_conf, &errmsg);
+ err_code |= ret;
+ if (ret) {
+ if (errmsg && *errmsg) {
+ indent_msg(&errmsg, 2);
+ ha_alert("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ }
+ else
+ ha_alert("parsing [%s:%d]: error encountered while processing '%s'\n",
+ file, linenum, args[cur_arg]);
+ if (ret & ERR_FATAL)
+ goto out;
+ }
+ cur_arg += 1 + kw->skip;
+ }
+ if (*args[cur_arg] != 0) {
+ const char *best = bind_find_best_kw(args[cur_arg]);
+ if (best)
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section; did you mean '%s' maybe ?\n",
+ file, linenum, args[cur_arg], cursection, best);
+ else
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section.\n",
+ file, linenum, args[cur_arg], cursection);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "log") == 0) {
+ if (!parse_logsrv(args, &cfg_log_forward->logsrvs, (kwm == KWM_NO), file, linenum, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "timeout") == 0) {
+ const char *res;
+ unsigned timeout;
+
+ if (strcmp(args[1], "client") != 0) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s %s' in log-forward section.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (*args[2] == 0) {
+ ha_alert("parsing [%s:%d] : missing timeout client value.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ res = parse_time_err(args[2], &timeout, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(&errmsg, "timer overflow in argument '%s' to 'timeout client' (maximum value is 2147483647 ms or ~24.8 days)", args[2]);
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(&errmsg, "timer underflow in argument '%s' to 'timeout client' (minimum non-null value is 1 ms)", args[2]);
+ }
+ else if (res) {
+ memprintf(&errmsg, "unexpected character '%c' in 'timeout client'", *res);
+ return -1;
+ }
+
+ if (res) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ cfg_log_forward->timeout.client = MS_TO_TICKS(timeout);
+ }
+ else {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in log-forward section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+out:
+ return err_code;
+}
+
+
+/* config parsers for this section */
+REGISTER_CONFIG_SECTION("log-forward", cfg_parse_log_forward, NULL);
+
+REGISTER_PER_THREAD_ALLOC(init_log_buffers);
+REGISTER_PER_THREAD_FREE(deinit_log_buffers);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/lru.c b/src/lru.c
new file mode 100644
index 0000000..07ef50c
--- /dev/null
+++ b/src/lru.c
@@ -0,0 +1,305 @@
+/*
+ * Copyright (C) 2015 Willy Tarreau <w@1wt.eu>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <import/lru.h>
+
+/* Minimal list manipulation macros for lru64_list */
+#define LIST_INSERT(lh, el) ({ (el)->n = (lh)->n; (el)->n->p = (lh)->n = (el); (el)->p = (lh); })
+#define LIST_DELETE(el) ({ (el)->n->p = (el)->p; (el)->p->n = (el)->n; })
+
+
+/* Lookup key <key> in LRU cache <lru> for use with domain <domain> whose data's
+ * current version is <revision>. It differs from lru64_get as it does not
+ * create missing keys. The function returns NULL if an error or a cache miss
+ * occurs. */
+struct lru64 *lru64_lookup(unsigned long long key, struct lru64_head *lru,
+ void *domain, unsigned long long revision)
+{
+ struct eb64_node *node;
+ struct lru64 *elem;
+
+ node = __eb64_lookup(&lru->keys, key);
+ elem = container_of(node, typeof(*elem), node);
+ if (elem) {
+ /* Existing entry found, check validity then move it at the
+ * head of the LRU list.
+ */
+ if (elem->domain == domain && elem->revision == revision) {
+ LIST_DELETE(&elem->lru);
+ LIST_INSERT(&lru->list, &elem->lru);
+ return elem;
+ }
+ }
+ return NULL;
+}
+
+/* Get key <key> from LRU cache <lru> for use with domain <domain> whose data's
+ * current revision is <revision>. If the key doesn't exist it's first created
+ * with ->domain = NULL. The caller detects this situation by checking ->domain
+ * and must perform the operation to be cached then call lru64_commit() to
+ * complete the operation. A lock (mutex or spinlock) may be added around the
+ * function to permit use in a multi-threaded environment. The function may
+ * return NULL upon memory allocation failure.
+ */
+struct lru64 *lru64_get(unsigned long long key, struct lru64_head *lru,
+ void *domain, unsigned long long revision)
+{
+ struct eb64_node *node;
+ struct lru64 *elem;
+
+ if (!lru->spare) {
+ if (!lru->cache_size)
+ return NULL;
+ lru->spare = malloc(sizeof(*lru->spare));
+ if (!lru->spare)
+ return NULL;
+ lru->spare->domain = NULL;
+ }
+
+ /* Lookup or insert */
+ lru->spare->node.key = key;
+ node = __eb64_insert(&lru->keys, &lru->spare->node);
+ elem = container_of(node, typeof(*elem), node);
+
+ if (elem != lru->spare) {
+ /* Existing entry found, check validity then move it at the
+ * head of the LRU list.
+ */
+ if (elem->domain == domain && elem->revision == revision) {
+ LIST_DELETE(&elem->lru);
+ LIST_INSERT(&lru->list, &elem->lru);
+ return elem;
+ }
+
+ if (!elem->domain)
+ return NULL; // currently locked
+
+ /* recycle this entry */
+ LIST_DELETE(&elem->lru);
+ }
+ else {
+ /* New entry inserted, initialize and move to the head of the
+ * LRU list, and lock it until commit.
+ */
+ lru->cache_usage++;
+ lru->spare = NULL; // used, need a new one next time
+ }
+
+ elem->domain = NULL;
+ LIST_INSERT(&lru->list, &elem->lru);
+
+ if (lru->cache_usage > lru->cache_size) {
+ /* try to kill oldest entry */
+ struct lru64 *old;
+
+ old = container_of(lru->list.p, typeof(*old), lru);
+ if (old->domain) {
+ /* not locked */
+ LIST_DELETE(&old->lru);
+ __eb64_delete(&old->node);
+ if (old->data && old->free)
+ old->free(old->data);
+ if (!lru->spare)
+ lru->spare = old;
+ else {
+ free(old);
+ }
+ lru->cache_usage--;
+ }
+ }
+ return elem;
+}
+
+/* Commit element <elem> with data <data>, domain <domain> and revision
+ * <revision>. <elem> is checked for NULL so that it's possible to call it
+ * with the result from a call to lru64_get(). The caller might lock it using a
+ * spinlock or mutex shared with the one around lru64_get().
+ */
+void lru64_commit(struct lru64 *elem, void *data, void *domain,
+ unsigned long long revision, void (*free)(void *))
+{
+ if (!elem)
+ return;
+
+ elem->data = data;
+ elem->revision = revision;
+ elem->domain = domain;
+ elem->free = free;
+}
+
+/* Create a new LRU cache of <size> entries. Returns the new cache or NULL in
+ * case of allocation failure.
+ */
+struct lru64_head *lru64_new(int size)
+{
+ struct lru64_head *lru;
+
+ lru = malloc(sizeof(*lru));
+ if (lru) {
+ lru->list.p = lru->list.n = &lru->list;
+ lru->keys = EB_ROOT_UNIQUE;
+ lru->spare = NULL;
+ lru->cache_size = size;
+ lru->cache_usage = 0;
+ }
+ return lru;
+}
+
+/* Tries to destroy the LRU cache <lru>. Returns the number of locked entries
+ * that prevent it from being destroyed, or zero meaning everything was done.
+ */
+int lru64_destroy(struct lru64_head *lru)
+{
+ struct lru64 *elem, *next;
+
+ if (!lru)
+ return 0;
+
+ elem = container_of(lru->list.p, typeof(*elem), lru);
+ while (&elem->lru != &lru->list) {
+ next = container_of(elem->lru.p, typeof(*next), lru);
+ if (elem->domain) {
+ /* not locked */
+ LIST_DELETE(&elem->lru);
+ eb64_delete(&elem->node);
+ if (elem->data && elem->free)
+ elem->free(elem->data);
+ free(elem);
+ lru->cache_usage--;
+ lru->cache_size--;
+ }
+ elem = next;
+ }
+
+ if (lru->cache_usage)
+ return lru->cache_usage;
+
+ free(lru);
+ return 0;
+}
+
+/* kill the <nb> least used entries from the <lru> cache */
+void lru64_kill_oldest(struct lru64_head *lru, unsigned long int nb)
+{
+ struct lru64 *elem, *next;
+
+ for (elem = container_of(lru->list.p, typeof(*elem), lru);
+ nb && (&elem->lru != &lru->list);
+ elem = next) {
+ next = container_of(elem->lru.p, typeof(*next), lru);
+ if (!elem->domain)
+ continue; /* locked entry */
+
+ LIST_DELETE(&elem->lru);
+ eb64_delete(&elem->node);
+ if (elem->data && elem->free)
+ elem->free(elem->data);
+ if (!lru->spare)
+ lru->spare = elem;
+ else
+ free(elem);
+ lru->cache_usage--;
+ nb--;
+ }
+}
+
+/* The code below is just for validation and performance testing. It's an
+ * example of a function taking some time to return results that could be
+ * cached.
+ */
+#ifdef STANDALONE
+
+#include <stdio.h>
+
+static unsigned int misses;
+
+static unsigned long long sum(unsigned long long x)
+{
+#ifndef TEST_LRU_FAST_OPERATION
+ if (x < 1)
+ return 0;
+ return x + sum(x * 99 / 100 - 1);
+#else
+ return (x << 16) - (x << 8) - 1;
+#endif
+}
+
+static long get_value(struct lru64_head *lru, long a)
+{
+ struct lru64 *item = NULL;
+
+ if (lru) {
+ item = lru64_get(a, lru, lru, 0);
+ if (item && item->domain)
+ return (long)item->data;
+ }
+ misses++;
+ /* do the painful work here */
+ a = sum(a);
+ if (item)
+ lru64_commit(item, (void *)a, lru, 1, 0);
+ return a;
+}
+
+static inline unsigned int statistical_prng()
+{
+ static unsigned int statistical_prng_state = 0x12345678;
+ unsigned int x = statistical_prng_state;
+
+ x ^= x << 13;
+ x ^= x >> 17;
+ x ^= x << 5;
+ return statistical_prng_state = x;
+}
+
+/* pass #of loops in argv[1] and set argv[2] to something to use the LRU */
+int main(int argc, char **argv)
+{
+ struct lru64_head *lru = NULL;
+ long long ret;
+ int total, loops;
+
+ if (argc < 2) {
+ printf("Need a number of rounds and optionally an LRU cache size (0..65536)\n");
+ exit(1);
+ }
+
+ total = atoi(argv[1]);
+
+ if (argc > 2) /* cache size */
+ lru = lru64_new(atoi(argv[2]));
+
+ ret = 0;
+ for (loops = 0; loops < total; loops++) {
+ ret += get_value(lru, statistical_prng() & 65535);
+ }
+ /* just for accuracy control */
+ printf("ret=%llx, hits=%u, misses=%u (%d %% hits)\n", ret, (unsigned)(total-misses), misses, (int)((float)(total-misses) * 100.0 / total));
+
+ while (lru64_destroy(lru));
+
+ return 0;
+}
+
+#endif
diff --git a/src/mailers.c b/src/mailers.c
new file mode 100644
index 0000000..05d5313
--- /dev/null
+++ b/src/mailers.c
@@ -0,0 +1,321 @@
+/*
+ * Mailer management.
+ *
+ * Copyright 2015 Horms Solutions Ltd, Simon Horman <horms@verge.net.au>
+ * Copyright 2020 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdlib.h>
+
+#include <haproxy/action-t.h>
+#include <haproxy/api.h>
+#include <haproxy/check.h>
+#include <haproxy/errors.h>
+#include <haproxy/list.h>
+#include <haproxy/mailers.h>
+#include <haproxy/pool.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/server-t.h>
+#include <haproxy/task.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/thread.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+
+
+struct mailers *mailers = NULL;
+
+DECLARE_STATIC_POOL(pool_head_email_alert, "email_alert", sizeof(struct email_alert));
+
+/****************************** Email alerts ******************************/
+/* NOTE: It may be pertinent to use an applet to handle email alerts */
+/* instead of a tcp-check ruleset */
+/**************************************************************************/
+void email_alert_free(struct email_alert *alert)
+{
+ struct tcpcheck_rule *rule, *back;
+
+ if (!alert)
+ return;
+
+ if (alert->rules.list) {
+ list_for_each_entry_safe(rule, back, alert->rules.list, list) {
+ LIST_DELETE(&rule->list);
+ free_tcpcheck(rule, 1);
+ }
+ free_tcpcheck_vars(&alert->rules.preset_vars);
+ ha_free(&alert->rules.list);
+ }
+ pool_free(pool_head_email_alert, alert);
+}
+
+static struct task *process_email_alert(struct task *t, void *context, unsigned int state)
+{
+ struct check *check = context;
+ struct email_alertq *q;
+ struct email_alert *alert;
+
+ q = container_of(check, typeof(*q), check);
+
+ HA_SPIN_LOCK(EMAIL_ALERTS_LOCK, &q->lock);
+ while (1) {
+ if (!(check->state & CHK_ST_ENABLED)) {
+ if (LIST_ISEMPTY(&q->email_alerts)) {
+ /* All alerts processed, queue the task */
+ t->expire = TICK_ETERNITY;
+ task_queue(t);
+ goto end;
+ }
+
+ alert = LIST_NEXT(&q->email_alerts, typeof(alert), list);
+ LIST_DELETE(&alert->list);
+ t->expire = now_ms;
+ check->tcpcheck_rules = &alert->rules;
+ check->status = HCHK_STATUS_INI;
+ check->state |= CHK_ST_ENABLED;
+ }
+
+ process_chk(t, context, state);
+ if (check->state & CHK_ST_INPROGRESS)
+ break;
+
+ alert = container_of(check->tcpcheck_rules, typeof(*alert), rules);
+ email_alert_free(alert);
+ check->tcpcheck_rules = NULL;
+ check->server = NULL;
+ check->state &= ~CHK_ST_ENABLED;
+ }
+ end:
+ HA_SPIN_UNLOCK(EMAIL_ALERTS_LOCK, &q->lock);
+ return t;
+}
+
+/* Initializes mailer alerts for the proxy <p> using <mls> parameters.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+int init_email_alert(struct mailers *mls, struct proxy *p, char **err)
+{
+ struct mailer *mailer;
+ struct email_alertq *queues;
+ const char *err_str;
+ int i = 0;
+
+ if ((queues = calloc(mls->count, sizeof(*queues))) == NULL) {
+ memprintf(err, "out of memory while allocating mailer alerts queues");
+ goto fail_no_queue;
+ }
+
+ for (mailer = mls->mailer_list; mailer; i++, mailer = mailer->next) {
+ struct email_alertq *q = &queues[i];
+ struct check *check = &q->check;
+ struct task *t;
+
+ LIST_INIT(&q->email_alerts);
+ HA_SPIN_INIT(&q->lock);
+ check->obj_type = OBJ_TYPE_CHECK;
+ check->inter = mls->timeout.mail;
+ check->rise = DEF_AGENT_RISETIME;
+ check->proxy = p;
+ check->fall = DEF_AGENT_FALLTIME;
+ if ((err_str = init_check(check, PR_O2_TCPCHK_CHK))) {
+ memprintf(err, "%s", err_str);
+ goto error;
+ }
+
+ check->xprt = mailer->xprt;
+ check->addr = mailer->addr;
+ check->port = get_host_port(&mailer->addr);
+
+ if ((t = task_new_anywhere()) == NULL) {
+ memprintf(err, "out of memory while allocating mailer alerts task");
+ goto error;
+ }
+
+ check->task = t;
+ t->process = process_email_alert;
+ t->context = check;
+
+ /* check this in one ms */
+ t->expire = TICK_ETERNITY;
+ check->start = now;
+ task_queue(t);
+ }
+
+ mls->users++;
+ free(p->email_alert.mailers.name);
+ p->email_alert.mailers.m = mls;
+ p->email_alert.queues = queues;
+ return 0;
+
+ error:
+ for (i = 0; i < mls->count; i++) {
+ struct email_alertq *q = &queues[i];
+ struct check *check = &q->check;
+
+ free_check(check);
+ }
+ free(queues);
+ fail_no_queue:
+ return 1;
+}
+
+static int enqueue_one_email_alert(struct proxy *p, struct server *s,
+ struct email_alertq *q, const char *msg)
+{
+ struct email_alert *alert;
+ struct tcpcheck_rule *tcpcheck;
+ struct check *check = &q->check;
+
+ if ((alert = pool_alloc(pool_head_email_alert)) == NULL)
+ goto error;
+ LIST_INIT(&alert->list);
+ alert->rules.flags = TCPCHK_RULES_TCP_CHK;
+ alert->rules.list = calloc(1, sizeof(*alert->rules.list));
+ if (!alert->rules.list)
+ goto error;
+ LIST_INIT(alert->rules.list);
+ LIST_INIT(&alert->rules.preset_vars); /* unused for email alerts */
+ alert->srv = s;
+
+ if ((tcpcheck = pool_zalloc(pool_head_tcpcheck_rule)) == NULL)
+ goto error;
+ tcpcheck->action = TCPCHK_ACT_CONNECT;
+ tcpcheck->comment = NULL;
+
+ LIST_APPEND(alert->rules.list, &tcpcheck->list);
+
+ if (!add_tcpcheck_expect_str(&alert->rules, "220 "))
+ goto error;
+
+ {
+ const char * const strs[4] = { "HELO ", p->email_alert.myhostname, "\r\n" };
+ if (!add_tcpcheck_send_strs(&alert->rules, strs))
+ goto error;
+ }
+
+ if (!add_tcpcheck_expect_str(&alert->rules, "250 "))
+ goto error;
+
+ {
+ const char * const strs[4] = { "MAIL FROM:<", p->email_alert.from, ">\r\n" };
+ if (!add_tcpcheck_send_strs(&alert->rules, strs))
+ goto error;
+ }
+
+ if (!add_tcpcheck_expect_str(&alert->rules, "250 "))
+ goto error;
+
+ {
+ const char * const strs[4] = { "RCPT TO:<", p->email_alert.to, ">\r\n" };
+ if (!add_tcpcheck_send_strs(&alert->rules, strs))
+ goto error;
+ }
+
+ if (!add_tcpcheck_expect_str(&alert->rules, "250 "))
+ goto error;
+
+ {
+ const char * const strs[2] = { "DATA\r\n" };
+ if (!add_tcpcheck_send_strs(&alert->rules, strs))
+ goto error;
+ }
+
+ if (!add_tcpcheck_expect_str(&alert->rules, "354 "))
+ goto error;
+
+ {
+ struct tm tm;
+ char datestr[48];
+ const char * const strs[18] = {
+ "From: ", p->email_alert.from, "\r\n",
+ "To: ", p->email_alert.to, "\r\n",
+ "Date: ", datestr, "\r\n",
+ "Subject: [HAProxy Alert] ", msg, "\r\n",
+ "\r\n",
+ msg, "\r\n",
+ "\r\n",
+ ".\r\n",
+ NULL
+ };
+
+ get_localtime(date.tv_sec, &tm);
+
+ if (strftime(datestr, sizeof(datestr), "%a, %d %b %Y %T %z (%Z)", &tm) == 0) {
+ goto error;
+ }
+
+ if (!add_tcpcheck_send_strs(&alert->rules, strs))
+ goto error;
+ }
+
+ if (!add_tcpcheck_expect_str(&alert->rules, "250 "))
+ goto error;
+
+ {
+ const char * const strs[2] = { "QUIT\r\n" };
+ if (!add_tcpcheck_send_strs(&alert->rules, strs))
+ goto error;
+ }
+
+ if (!add_tcpcheck_expect_str(&alert->rules, "221 "))
+ goto error;
+
+ HA_SPIN_LOCK(EMAIL_ALERTS_LOCK, &q->lock);
+ task_wakeup(check->task, TASK_WOKEN_MSG);
+ LIST_APPEND(&q->email_alerts, &alert->list);
+ HA_SPIN_UNLOCK(EMAIL_ALERTS_LOCK, &q->lock);
+ return 1;
+
+error:
+ email_alert_free(alert);
+ return 0;
+}
+
+static void enqueue_email_alert(struct proxy *p, struct server *s, const char *msg)
+{
+ int i;
+ struct mailer *mailer;
+
+ for (i = 0, mailer = p->email_alert.mailers.m->mailer_list;
+ i < p->email_alert.mailers.m->count; i++, mailer = mailer->next) {
+ if (!enqueue_one_email_alert(p, s, &p->email_alert.queues[i], msg)) {
+ ha_alert("Email alert [%s] could not be enqueued: out of memory\n", p->id);
+ return;
+ }
+ }
+
+ return;
+}
+
+/*
+ * Send email alert if configured.
+ */
+void send_email_alert(struct server *s, int level, const char *format, ...)
+{
+ va_list argp;
+ char buf[1024];
+ int len;
+ struct proxy *p = s->proxy;
+
+ if (!p->email_alert.mailers.m || level > p->email_alert.level || format == NULL)
+ return;
+
+ va_start(argp, format);
+ len = vsnprintf(buf, sizeof(buf), format, argp);
+ va_end(argp);
+
+ if (len < 0 || len >= sizeof(buf)) {
+ ha_alert("Email alert [%s] could not format message\n", p->id);
+ return;
+ }
+
+ enqueue_email_alert(p, s, buf);
+}
diff --git a/src/map.c b/src/map.c
new file mode 100644
index 0000000..52ddb5c
--- /dev/null
+++ b/src/map.c
@@ -0,0 +1,1229 @@
+/*
+ * MAP management functions.
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdio.h>
+#include <syslog.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/arg.h>
+#include <haproxy/cli.h>
+#include <haproxy/map.h>
+#include <haproxy/pattern.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+
+
+/* Parse an IPv4 or IPv6 address and store it into the sample.
+ * The output type is IPv4 or IPv6.
+ */
+int map_parse_ip(const char *text, struct sample_data *data)
+{
+ int len = strlen(text);
+
+ if (buf2ip(text, len, &data->u.ipv4)) {
+ data->type = SMP_T_IPV4;
+ return 1;
+ }
+ if (buf2ip6(text, len, &data->u.ipv6)) {
+ data->type = SMP_T_IPV6;
+ return 1;
+ }
+ return 0;
+}
+
+/* Parse a string and store a pointer to it into the sample. The original
+ * string must be left in memory because we return a direct memory reference.
+ * The output type is SMP_T_STR. There is no risk that the data will be
+ * overwritten because sample_conv_map() makes a const sample with this
+ * output.
+ */
+int map_parse_str(const char *text, struct sample_data *data)
+{
+ data->u.str.area = (char *)text;
+ data->u.str.data = strlen(text);
+ data->u.str.size = data->u.str.data + 1;
+ data->type = SMP_T_STR;
+ return 1;
+}
+
+/* Parse an integer and convert it to a sample. The output type is SINT if the
+ * number is negative, or UINT if it is positive or null. The function returns
+ * zero (error) if the number is too large.
+ */
+int map_parse_int(const char *text, struct sample_data *data)
+{
+ data->type = SMP_T_SINT;
+ data->u.sint = read_int64(&text, text + strlen(text));
+ if (*text != '\0')
+ return 0;
+ return 1;
+}
+
+/* This crete and initialize map descriptor.
+ * Return NULL if out of memory error
+ */
+static struct map_descriptor *map_create_descriptor(struct sample_conv *conv)
+{
+ struct map_descriptor *desc;
+
+ desc = calloc(1, sizeof(*desc));
+ if (!desc)
+ return NULL;
+
+ desc->conv = conv;
+
+ return desc;
+}
+
+/* This function load the map file according with data type declared into
+ * the "struct sample_conv".
+ *
+ * This function choose the indexation type (ebtree or list) according with
+ * the type of match needed.
+ */
+int sample_load_map(struct arg *arg, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ struct map_descriptor *desc;
+
+ if (!(global.mode & MODE_STARTING)) {
+ memprintf(err, "map: cannot load map at runtime");
+ return 0;
+ }
+
+ /* create new map descriptor */
+ desc = map_create_descriptor(conv);
+ if (!desc) {
+ memprintf(err, "out of memory");
+ return 0;
+ }
+
+ /* Initialize pattern */
+ pattern_init_head(&desc->pat);
+
+ /* This is original pattern, must free */
+ desc->do_free = 1;
+
+ /* Set the match method. */
+ desc->pat.match = pat_match_fcts[(long)conv->private];
+ desc->pat.parse = pat_parse_fcts[(long)conv->private];
+ desc->pat.index = pat_index_fcts[(long)conv->private];
+ desc->pat.prune = pat_prune_fcts[(long)conv->private];
+ desc->pat.expect_type = pat_match_types[(long)conv->private];
+
+ /* Set the output parse method. */
+ switch (desc->conv->out_type) {
+ case SMP_T_STR: desc->pat.parse_smp = map_parse_str; break;
+ case SMP_T_SINT: desc->pat.parse_smp = map_parse_int; break;
+ case SMP_T_ADDR: desc->pat.parse_smp = map_parse_ip; break;
+ default:
+ memprintf(err, "map: internal haproxy error: no default parse case for the input type <%d>.",
+ conv->out_type);
+ free(desc);
+ return 0;
+ }
+
+ /* Load map. */
+ if (!pattern_read_from_file(&desc->pat, PAT_REF_MAP, arg[0].data.str.area, PAT_MF_NO_DNS,
+ 1, err, file, line))
+ return 0;
+
+ /* the maps of type IP support a string as default value. This
+ * string can be an ipv4 or an ipv6, we must convert it.
+ */
+ if (arg[1].type != ARGT_STOP && desc->conv->out_type == SMP_T_ADDR) {
+ struct sample_data data;
+ if (!map_parse_ip(arg[1].data.str.area, &data)) {
+ memprintf(err, "map: cannot parse default ip <%s>.",
+ arg[1].data.str.area);
+ return 0;
+ }
+ chunk_destroy(&arg[1].data.str);
+ if (data.type == SMP_T_IPV4) {
+ arg[1].type = ARGT_IPV4;
+ arg[1].data.ipv4 = data.u.ipv4;
+ } else {
+ arg[1].type = ARGT_IPV6;
+ arg[1].data.ipv6 = data.u.ipv6;
+ }
+ }
+
+ /* replace the first argument by this definition */
+ chunk_destroy(&arg[0].data.str);
+ arg[0].type = ARGT_MAP;
+ arg[0].data.map = desc;
+
+ return 1;
+}
+
+static int sample_conv_map(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct map_descriptor *desc;
+ struct pattern *pat;
+ struct buffer *str;
+
+ /* get config */
+ desc = arg_p[0].data.map;
+
+ /* Execute the match function. */
+ pat = pattern_exec_match(&desc->pat, smp, 1);
+
+ /* Match case. */
+ if (pat) {
+ if (pat->data) {
+ /* In the regm case, merge the sample with the input. */
+ if ((long)private == PAT_MATCH_REGM) {
+ struct buffer *tmptrash;
+ int len;
+
+ /* Copy the content of the sample because it could
+ be scratched by incoming get_trash_chunk */
+ tmptrash = alloc_trash_chunk();
+ if (!tmptrash)
+ return 0;
+
+ tmptrash->data = smp->data.u.str.data;
+ if (tmptrash->data > (tmptrash->size-1))
+ tmptrash->data = tmptrash->size-1;
+
+ memcpy(tmptrash->area, smp->data.u.str.area, tmptrash->data);
+ tmptrash->area[tmptrash->data] = 0;
+
+ str = get_trash_chunk();
+ len = exp_replace(str->area, str->size,
+ tmptrash->area,
+ pat->data->u.str.area,
+ (regmatch_t *)smp->ctx.a[0]);
+ free_trash_chunk(tmptrash);
+
+ if (len == -1)
+ return 0;
+
+ str->data = len;
+ smp->data.u.str = *str;
+ return 1;
+ }
+ /* Copy sample. */
+ smp->data = *pat->data;
+ smp->flags |= SMP_F_CONST;
+ return 1;
+ }
+
+ /* Return just int sample containing 1. */
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 1;
+ return 1;
+ }
+
+ /* If no default value available, the converter fails. */
+ if (arg_p[1].type == ARGT_STOP)
+ return 0;
+
+ /* Return the default value. */
+ switch (desc->conv->out_type) {
+
+ case SMP_T_STR:
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.str = arg_p[1].data.str;
+ break;
+
+ case SMP_T_SINT:
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = arg_p[1].data.sint;
+ break;
+
+ case SMP_T_ADDR:
+ if (arg_p[1].type == ARGT_IPV4) {
+ smp->data.type = SMP_T_IPV4;
+ smp->data.u.ipv4 = arg_p[1].data.ipv4;
+ } else {
+ smp->data.type = SMP_T_IPV6;
+ smp->data.u.ipv6 = arg_p[1].data.ipv6;
+ }
+ break;
+ }
+
+ return 1;
+}
+
+/* This function is used with map and acl management. It permits to browse
+ * each reference. The variable <getnext> must contain the current node,
+ * <end> point to the root node and the <flags> permit to filter required
+ * nodes.
+ */
+static inline
+struct pat_ref *pat_list_get_next(struct pat_ref *getnext, struct list *end,
+ unsigned int flags)
+{
+ struct pat_ref *ref = getnext;
+
+ while (1) {
+
+ /* Get next list entry. */
+ ref = LIST_NEXT(&ref->list, struct pat_ref *, list);
+
+ /* If the entry is the last of the list, return NULL. */
+ if (&ref->list == end)
+ return NULL;
+
+ /* If the entry match the flag, return it. */
+ if (ref->flags & flags)
+ return ref;
+ }
+}
+
+static inline
+struct pat_ref *pat_ref_lookup_ref(const char *reference)
+{
+ int id;
+ char *error;
+
+ /* If the reference starts by a '#', this is numeric id. */
+ if (reference[0] == '#') {
+ /* Try to convert the numeric id. If the conversion fails, the lookup fails. */
+ id = strtol(reference + 1, &error, 10);
+ if (*error != '\0')
+ return NULL;
+
+ /* Perform the unique id lookup. */
+ return pat_ref_lookupid(id);
+ }
+
+ /* Perform the string lookup. */
+ return pat_ref_lookup(reference);
+}
+
+/* This function is used with map and acl management. It permits to browse
+ * each reference.
+ */
+static inline
+struct pattern_expr *pat_expr_get_next(struct pattern_expr *getnext, struct list *end)
+{
+ struct pattern_expr *expr;
+ expr = LIST_NEXT(&getnext->list, struct pattern_expr *, list);
+ if (&expr->list == end)
+ return NULL;
+ return expr;
+}
+
+/* appctx context for the "{show|get|add|del|*} {map|acl}" commands. This is
+ * used even by commands that only have a parser and no I/O handler because
+ * it provides a unified way to manipulate some fields and will allow to
+ * expand some of them more easily later if needed.
+ */
+struct show_map_ctx {
+ struct pat_ref *ref;
+ struct bref bref; /* back-reference from the pat_ref_elt being dumped */
+ struct pattern_expr *expr;
+ struct buffer chunk;
+ unsigned int display_flags;
+ unsigned int curr_gen; /* current/latest generation, for show/clear */
+ unsigned int prev_gen; /* prev generation, for clear */
+ enum {
+ STATE_INIT = 0, /* initialize list and backrefs */
+ STATE_LIST, /* list entries */
+ STATE_DONE, /* finished */
+ } state; /* state of the dump */
+};
+
+/* expects the current generation ID in ctx->curr_gen */
+static int cli_io_handler_pat_list(struct appctx *appctx)
+{
+ struct show_map_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct pat_ref_elt *elt;
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW))) {
+ /* If we're forced to shut down, we might have to remove our
+ * reference to the last ref_elt being dumped.
+ */
+ if (!LIST_ISEMPTY(&ctx->bref.users)) {
+ HA_SPIN_LOCK(PATREF_LOCK, &ctx->ref->lock);
+ LIST_DEL_INIT(&ctx->bref.users);
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ }
+ return 1;
+ }
+
+ switch (ctx->state) {
+ case STATE_INIT:
+ ctx->state = STATE_LIST;
+ /* fall through */
+
+ case STATE_LIST:
+ HA_SPIN_LOCK(PATREF_LOCK, &ctx->ref->lock);
+
+ if (!LIST_ISEMPTY(&ctx->bref.users)) {
+ LIST_DELETE(&ctx->bref.users);
+ LIST_INIT(&ctx->bref.users);
+ } else {
+ ctx->bref.ref = ctx->ref->head.n;
+ }
+
+ while (ctx->bref.ref != &ctx->ref->head) {
+ chunk_reset(&trash);
+
+ elt = LIST_ELEM(ctx->bref.ref, struct pat_ref_elt *, list);
+
+ if (elt->gen_id != ctx->curr_gen)
+ goto skip;
+
+ /* build messages */
+ if (elt->sample)
+ chunk_appendf(&trash, "%p %s %s\n",
+ elt, elt->pattern,
+ elt->sample);
+ else
+ chunk_appendf(&trash, "%p %s\n",
+ elt, elt->pattern);
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* let's try again later from this stream. We add ourselves into
+ * this stream's users so that it can remove us upon termination.
+ */
+ LIST_APPEND(&elt->back_refs, &ctx->bref.users);
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ return 0;
+ }
+ skip:
+ /* get next list entry and check the end of the list */
+ ctx->bref.ref = elt->list.n;
+ }
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ /* fall through */
+
+ default:
+ ctx->state = STATE_DONE;
+ return 1;
+ }
+}
+
+static int cli_io_handler_pats_list(struct appctx *appctx)
+{
+ struct show_map_ctx *ctx = appctx->svcctx;
+
+ switch (ctx->state) {
+ case STATE_INIT:
+ /* Display the column headers. If the message cannot be sent,
+ * quit the function with returning 0. The function is called
+ * later and restarted at the state "STATE_INIT".
+ */
+ chunk_reset(&trash);
+ chunk_appendf(&trash, "# id (file) description\n");
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ /* Now, we start the browsing of the references lists.
+ * Note that the following call to LIST_ELEM returns a bad pointer. The only
+ * available field of this pointer is <list>. It is used with the function
+ * pat_list_get_next() for returning the first available entry
+ */
+ ctx->ref = LIST_ELEM(&pattern_reference, struct pat_ref *, list);
+ ctx->ref = pat_list_get_next(ctx->ref, &pattern_reference,
+ ctx->display_flags);
+ ctx->state = STATE_LIST;
+ /* fall through */
+
+ case STATE_LIST:
+ while (ctx->ref) {
+ chunk_reset(&trash);
+
+ /* Build messages. If the reference is used by another category than
+ * the listed categories, display the information in the message.
+ */
+ chunk_appendf(&trash, "%d (%s) %s. curr_ver=%u next_ver=%u entry_cnt=%llu\n", ctx->ref->unique_id,
+ ctx->ref->reference ? ctx->ref->reference : "",
+ ctx->ref->display, ctx->ref->curr_gen, ctx->ref->next_gen,
+ ctx->ref->entry_cnt);
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* let's try again later from this stream. We add ourselves into
+ * this stream's users so that it can remove us upon termination.
+ */
+ return 0;
+ }
+
+ /* get next list entry and check the end of the list */
+ ctx->ref = pat_list_get_next(ctx->ref, &pattern_reference,
+ ctx->display_flags);
+ }
+
+ /* fall through */
+
+ default:
+ ctx->state = STATE_DONE;
+ return 1;
+ }
+ return 0;
+}
+
+static int cli_io_handler_map_lookup(struct appctx *appctx)
+{
+ struct show_map_ctx *ctx = appctx->svcctx;
+ struct sample sample;
+ struct pattern *pat;
+ int match_method;
+
+ switch (ctx->state) {
+ case STATE_INIT:
+ /* Init to the first entry. The list cannot be change */
+ ctx->expr = LIST_ELEM(&ctx->ref->pat, struct pattern_expr *, list);
+ ctx->expr = pat_expr_get_next(ctx->expr, &ctx->ref->pat);
+ ctx->state = STATE_LIST;
+ /* fall through */
+
+ case STATE_LIST:
+ HA_SPIN_LOCK(PATREF_LOCK, &ctx->ref->lock);
+ /* for each lookup type */
+ while (ctx->expr) {
+ /* initialise chunk to build new message */
+ chunk_reset(&trash);
+
+ /* execute pattern matching */
+ sample.data.type = SMP_T_STR;
+ sample.flags = SMP_F_CONST;
+ sample.data.u.str.data = ctx->chunk.data;
+ sample.data.u.str.area = ctx->chunk.area;
+
+ if (ctx->expr->pat_head->match &&
+ sample_convert(&sample, ctx->expr->pat_head->expect_type))
+ pat = ctx->expr->pat_head->match(&sample, ctx->expr, 1);
+ else
+ pat = NULL;
+
+ /* build return message: set type of match */
+ for (match_method=0; match_method<PAT_MATCH_NUM; match_method++)
+ if (ctx->expr->pat_head->match == pat_match_fcts[match_method])
+ break;
+ if (match_method >= PAT_MATCH_NUM)
+ chunk_appendf(&trash, "type=unknown(%p)", ctx->expr->pat_head->match);
+ else
+ chunk_appendf(&trash, "type=%s", pat_match_names[match_method]);
+
+ /* case sensitive */
+ if (ctx->expr->mflags & PAT_MF_IGNORE_CASE)
+ chunk_appendf(&trash, ", case=insensitive");
+ else
+ chunk_appendf(&trash, ", case=sensitive");
+
+ /* Display no match, and set default value */
+ if (!pat) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ chunk_appendf(&trash, ", found=no");
+ else
+ chunk_appendf(&trash, ", match=no");
+ }
+
+ /* Display match and match info */
+ else {
+ /* display match */
+ if (ctx->display_flags == PAT_REF_MAP)
+ chunk_appendf(&trash, ", found=yes");
+ else
+ chunk_appendf(&trash, ", match=yes");
+
+ /* display index mode */
+ if (pat->sflags & PAT_SF_TREE)
+ chunk_appendf(&trash, ", idx=tree");
+ else
+ chunk_appendf(&trash, ", idx=list");
+
+ /* display pattern */
+ if (ctx->display_flags == PAT_REF_MAP) {
+ if (pat->ref && pat->ref->pattern)
+ chunk_appendf(&trash, ", key=\"%s\"", pat->ref->pattern);
+ else
+ chunk_appendf(&trash, ", key=unknown");
+ }
+ else {
+ if (pat->ref && pat->ref->pattern)
+ chunk_appendf(&trash, ", pattern=\"%s\"", pat->ref->pattern);
+ else
+ chunk_appendf(&trash, ", pattern=unknown");
+ }
+
+ /* display return value */
+ if (ctx->display_flags == PAT_REF_MAP) {
+ if (pat->data && pat->ref && pat->ref->sample)
+ chunk_appendf(&trash, ", value=\"%s\", type=\"%s\"", pat->ref->sample,
+ smp_to_type[pat->data->type]);
+ else
+ chunk_appendf(&trash, ", value=none");
+ }
+ }
+
+ chunk_appendf(&trash, "\n");
+
+ /* display response */
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* let's try again later from this stream. We add ourselves into
+ * this stream's users so that it can remove us upon termination.
+ */
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ return 0;
+ }
+
+ /* get next entry */
+ ctx->expr = pat_expr_get_next(ctx->expr,
+ &ctx->ref->pat);
+ }
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ /* fall through */
+
+ default:
+ ctx->state = STATE_DONE;
+ return 1;
+ }
+}
+
+static void cli_release_mlook(struct appctx *appctx)
+{
+ struct show_map_ctx *ctx = appctx->svcctx;
+
+ ha_free(&ctx->chunk.area);
+}
+
+
+static int cli_parse_get_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[1], "map") == 0 || strcmp(args[1], "acl") == 0) {
+ /* Set flags. */
+ if (args[1][0] == 'm')
+ ctx->display_flags = PAT_REF_MAP;
+ else
+ ctx->display_flags = PAT_REF_ACL;
+
+ /* No parameter. */
+ if (!*args[2] || !*args[3]) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Missing map identifier and/or key.\n");
+ else
+ return cli_err(appctx, "Missing ACL identifier and/or key.\n");
+ }
+
+ /* lookup into the maps */
+ ctx->ref = pat_ref_lookup_ref(args[2]);
+ if (!ctx->ref) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+ else
+ return cli_err(appctx, "Unknown ACL identifier. Please use #<id> or <file>.\n");
+ }
+
+ /* copy input string. The string must be allocated because
+ * it may be used over multiple iterations. It's released
+ * at the end and upon abort anyway.
+ */
+ ctx->chunk.data = strlen(args[3]);
+ ctx->chunk.size = ctx->chunk.data + 1;
+ ctx->chunk.area = strdup(args[3]);
+ if (!ctx->chunk.area)
+ return cli_err(appctx, "Out of memory error.\n");
+
+ return 0;
+ }
+ return 1;
+}
+
+static int cli_parse_prepare_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[1], "map") == 0 ||
+ strcmp(args[1], "acl") == 0) {
+ uint next_gen;
+ char *msg = NULL;
+
+ /* Set ACL or MAP flags. */
+ if (args[1][0] == 'm')
+ ctx->display_flags = PAT_REF_MAP;
+ else
+ ctx->display_flags = PAT_REF_ACL;
+
+ /* lookup into the refs and check the map flag */
+ ctx->ref = pat_ref_lookup_ref(args[2]);
+ if (!ctx->ref ||
+ !(ctx->ref->flags & ctx->display_flags)) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+ else
+ return cli_err(appctx, "Unknown ACL identifier. Please use #<id> or <file>.\n");
+ }
+ next_gen = pat_ref_newgen(ctx->ref);
+ return cli_dynmsg(appctx, LOG_INFO, memprintf(&msg, "New version created: %u\n", next_gen));
+ }
+
+ return 0;
+}
+
+static void cli_release_show_map(struct appctx *appctx)
+{
+ struct show_map_ctx *ctx = appctx->svcctx;
+
+ if (!LIST_ISEMPTY(&ctx->bref.users)) {
+ HA_SPIN_LOCK(PATREF_LOCK, &ctx->ref->lock);
+ LIST_DEL_INIT(&ctx->bref.users);
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ }
+}
+
+static int cli_parse_show_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[1], "map") == 0 ||
+ strcmp(args[1], "acl") == 0) {
+ const char *gen = NULL;
+
+ /* Set ACL or MAP flags. */
+ if (args[1][0] == 'm')
+ ctx->display_flags = PAT_REF_MAP;
+ else
+ ctx->display_flags = PAT_REF_ACL;
+
+ /* no parameter: display all map available */
+ if (!*args[2]) {
+ appctx->io_handler = cli_io_handler_pats_list;
+ return 0;
+ }
+
+ /* For both "map" and "acl" we may have an optional generation
+ * number specified using a "@" character before the pattern
+ * file name.
+ */
+ if (*args[2] == '@') {
+ gen = args[2] + 1;
+ args++;
+ }
+
+ /* lookup into the refs and check the map flag */
+ ctx->ref = pat_ref_lookup_ref(args[2]);
+ if (!ctx->ref ||
+ !(ctx->ref->flags & ctx->display_flags)) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+ else
+ return cli_err(appctx, "Unknown ACL identifier. Please use #<id> or <file>.\n");
+ }
+
+ /* set the desired generation id in curr_gen */
+ if (gen)
+ ctx->curr_gen = str2uic(gen);
+ else
+ ctx->curr_gen = ctx->ref->curr_gen;
+
+ LIST_INIT(&ctx->bref.users);
+ appctx->io_handler = cli_io_handler_pat_list;
+ appctx->io_release = cli_release_show_map;
+ return 0;
+ }
+
+ return 0;
+}
+
+static int cli_parse_set_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[1], "map") == 0) {
+ char *err;
+
+ /* Set flags. */
+ ctx->display_flags = PAT_REF_MAP;
+
+ /* Expect three parameters: map name, key and new value. */
+ if (!*args[2] || !*args[3] || !*args[4])
+ return cli_err(appctx, "'set map' expects three parameters: map identifier, key and value.\n");
+
+ /* Lookup the reference in the maps. */
+ ctx->ref = pat_ref_lookup_ref(args[2]);
+ if (!ctx->ref)
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+
+ /* If the entry identifier start with a '#', it is considered as
+ * pointer id
+ */
+ if (args[3][0] == '#' && args[3][1] == '0' && args[3][2] == 'x') {
+ struct pat_ref_elt *ref;
+ long long int conv;
+ char *error;
+
+ /* Convert argument to integer value. */
+ conv = strtoll(&args[3][1], &error, 16);
+ if (*error != '\0')
+ return cli_err(appctx, "Malformed identifier. Please use #<id> or <file>.\n");
+
+ /* Convert and check integer to pointer. */
+ ref = (struct pat_ref_elt *)(long)conv;
+ if ((long long int)(long)ref != conv)
+ return cli_err(appctx, "Malformed identifier. Please use #<id> or <file>.\n");
+
+ /* Try to modify the entry. */
+ err = NULL;
+ HA_SPIN_LOCK(PATREF_LOCK, &ctx->ref->lock);
+ if (!pat_ref_set_by_id(ctx->ref, ref, args[4], &err)) {
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ if (err)
+ return cli_dynerr(appctx, memprintf(&err, "%s.\n", err));
+ else
+ return cli_err(appctx, "Failed to update an entry.\n");
+ }
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ }
+ else {
+ /* Else, use the entry identifier as pattern
+ * string, and update the value.
+ */
+ err = NULL;
+ HA_SPIN_LOCK(PATREF_LOCK, &ctx->ref->lock);
+ if (!pat_ref_set(ctx->ref, args[3], args[4], &err)) {
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ if (err)
+ return cli_dynerr(appctx, memprintf(&err, "%s.\n", err));
+ else
+ return cli_err(appctx, "Failed to update an entry.\n");
+ }
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ }
+
+ /* The set is done, send message. */
+ appctx->st0 = CLI_ST_PROMPT;
+ return 0;
+ }
+ return 1;
+}
+
+static int cli_parse_add_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[1], "map") == 0 ||
+ strcmp(args[1], "acl") == 0) {
+ const char *gen = NULL;
+ uint genid = 0;
+ int ret;
+ char *err;
+
+ /* Set flags. */
+ if (args[1][0] == 'm')
+ ctx->display_flags = PAT_REF_MAP;
+ else
+ ctx->display_flags = PAT_REF_ACL;
+
+ /* For both "map" and "acl" we may have an optional generation
+ * number specified using a "@" character before the pattern
+ * file name.
+ */
+ if (*args[2] == '@') {
+ gen = args[2] + 1;
+ args++;
+ }
+
+ /* If the keyword is "map", we expect:
+ * - three parameters if there is no payload
+ * - one parameter if there is a payload
+ * If it is "acl", we expect only two parameters
+ */
+ if (ctx->display_flags == PAT_REF_MAP) {
+ if ((!payload && (!*args[2] || !*args[3] || !*args[4])) ||
+ (payload && !*args[2]))
+ return cli_err(appctx,
+ "'add map' expects three parameters (map identifier, key and value)"
+ " or one parameter (map identifier) and a payload\n");
+ }
+ else if (!*args[2] || !*args[3])
+ return cli_err(appctx, "'add acl' expects two parameters: ACL identifier and pattern.\n");
+
+ /* Lookup for the reference. */
+ ctx->ref = pat_ref_lookup_ref(args[2]);
+ if (!ctx->ref) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+ else
+ return cli_err(appctx, "Unknown ACL identifier. Please use #<id> or <file>.\n");
+ }
+
+ if (gen) {
+ genid = str2uic(gen);
+ if ((int)(genid - ctx->ref->next_gen) > 0) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Version number in the future, please use 'prepare map' before.\n");
+ else
+ return cli_err(appctx, "Version number in the future, please use 'prepare acl' before.\n");
+ }
+ }
+
+ /* The command "add acl" is prohibited if the reference
+ * use samples.
+ */
+ if ((ctx->display_flags & PAT_REF_ACL) &&
+ (ctx->ref->flags & PAT_REF_SMP)) {
+ return cli_err(appctx,
+ "This ACL is shared with a map containing samples. "
+ "You must use the command 'add map' to add values.\n");
+ }
+
+ /* Add value(s). If no payload is used, key and value are read
+ * from the command line and only one key is set. If a payload
+ * is passed, one key/value pair is read per line till the end
+ * of the payload is reached.
+ */
+ err = NULL;
+
+ do {
+ char *key = args[3];
+ char *value = args[4];
+ size_t l;
+
+ if (payload) {
+ /* key and value passed as payload, one pair per line */
+ if (!*payload)
+ break;
+
+ key = payload;
+ l = strcspn(key, " \t");
+ payload += l;
+
+ if (!*payload && ctx->display_flags == PAT_REF_MAP)
+ return cli_dynerr(appctx, memprintf(&err, "Missing value for key '%s'.\n", key));
+
+ key[l] = 0;
+ payload++;
+
+ /* value */
+ payload += strspn(payload, " \t");
+ value = payload;
+ l = strcspn(value, "\n");
+ payload += l;
+ if (*payload)
+ payload++;
+ value[l] = 0;
+ }
+
+ if (ctx->display_flags != PAT_REF_MAP)
+ value = NULL;
+
+ HA_SPIN_LOCK(PATREF_LOCK, &ctx->ref->lock);
+ ret = !!pat_ref_load(ctx->ref, gen ? genid : ctx->ref->curr_gen, key, value, -1, &err);
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+
+ if (!ret) {
+ if (err)
+ return cli_dynerr(appctx, memprintf(&err, "%s.\n", err));
+ else
+ return cli_err(appctx, "Failed to add a key.\n");
+ }
+ } while (payload && *payload);
+
+ /* The add is done, send message. */
+ appctx->st0 = CLI_ST_PROMPT;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int cli_parse_del_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (args[1][0] == 'm')
+ ctx->display_flags = PAT_REF_MAP;
+ else
+ ctx->display_flags = PAT_REF_ACL;
+
+ /* Expect two parameters: map name and key. */
+ if (!*args[2] || !*args[3]) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "This command expects two parameters: map identifier and key.\n");
+ else
+ return cli_err(appctx, "This command expects two parameters: ACL identifier and key.\n");
+ }
+
+ /* Lookup the reference in the maps. */
+ ctx->ref = pat_ref_lookup_ref(args[2]);
+ if (!ctx->ref ||
+ !(ctx->ref->flags & ctx->display_flags))
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+
+ /* If the entry identifier start with a '#', it is considered as
+ * pointer id
+ */
+ if (args[3][0] == '#' && args[3][1] == '0' && args[3][2] == 'x') {
+ struct pat_ref_elt *ref;
+ long long int conv;
+ char *error;
+
+ /* Convert argument to integer value. */
+ conv = strtoll(&args[3][1], &error, 16);
+ if (*error != '\0')
+ return cli_err(appctx, "Malformed identifier. Please use #<id> or <file>.\n");
+
+ /* Convert and check integer to pointer. */
+ ref = (struct pat_ref_elt *)(long)conv;
+ if ((long long int)(long)ref != conv)
+ return cli_err(appctx, "Malformed identifier. Please use #<id> or <file>.\n");
+
+ /* Try to delete the entry. */
+ HA_SPIN_LOCK(PATREF_LOCK, &ctx->ref->lock);
+ if (!pat_ref_delete_by_id(ctx->ref, ref)) {
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ /* The entry is not found, send message. */
+ return cli_err(appctx, "Key not found.\n");
+ }
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ }
+ else {
+ /* Else, use the entry identifier as pattern
+ * string and try to delete the entry.
+ */
+ HA_SPIN_LOCK(PATREF_LOCK, &ctx->ref->lock);
+ if (!pat_ref_delete(ctx->ref, args[3])) {
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ /* The entry is not found, send message. */
+ return cli_err(appctx, "Key not found.\n");
+ }
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ }
+
+ /* The deletion is done, send message. */
+ appctx->st0 = CLI_ST_PROMPT;
+ return 1;
+}
+
+/* continue to clear a map which was started in the parser. The range of
+ * generations this applies to is taken from ctx->curr_gen for the oldest
+ * and ctx->prev_gen for the latest.
+ */
+static int cli_io_handler_clear_map(struct appctx *appctx)
+{
+ struct show_map_ctx *ctx = appctx->svcctx;
+ int finished;
+
+ HA_SPIN_LOCK(PATREF_LOCK, &ctx->ref->lock);
+ finished = pat_ref_purge_range(ctx->ref, ctx->curr_gen, ctx->prev_gen, 100);
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+
+ if (!finished) {
+ /* let's come back later */
+ applet_have_more_data(appctx);
+ return 0;
+ }
+ return 1;
+}
+
+/* note: sets ctx->curr_gen and ctx->prev_gen to the oldest and
+ * latest generations to clear, respectively, and will call the clear_map
+ * handler.
+ */
+static int cli_parse_clear_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[1], "map") == 0 || strcmp(args[1], "acl") == 0) {
+ const char *gen = NULL;
+
+ /* Set ACL or MAP flags. */
+ if (args[1][0] == 'm')
+ ctx->display_flags = PAT_REF_MAP;
+ else
+ ctx->display_flags = PAT_REF_ACL;
+
+ /* For both "map" and "acl" we may have an optional generation
+ * number specified using a "@" character before the pattern
+ * file name.
+ */
+ if (*args[2] == '@') {
+ gen = args[2] + 1;
+ args++;
+ }
+
+ /* no parameter */
+ if (!*args[2]) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Missing map identifier.\n");
+ else
+ return cli_err(appctx, "Missing ACL identifier.\n");
+ }
+
+ /* lookup into the refs and check the map flag */
+ ctx->ref = pat_ref_lookup_ref(args[2]);
+ if (!ctx->ref ||
+ !(ctx->ref->flags & ctx->display_flags)) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+ else
+ return cli_err(appctx, "Unknown ACL identifier. Please use #<id> or <file>.\n");
+ }
+
+ /* set the desired generation id in curr_gen/prev_gen */
+ if (gen)
+ ctx->prev_gen = ctx->curr_gen = str2uic(gen);
+ else
+ ctx->prev_gen = ctx->curr_gen = ctx->ref->curr_gen;
+
+ /* delegate the clearing to the I/O handler which can yield */
+ return 0;
+ }
+ return 1;
+}
+
+/* note: sets ctx->curr_gen and ctx->prev_gen to the oldest and
+ * latest generations to clear, respectively, and will call the clear_map
+ * handler.
+ */
+static int cli_parse_commit_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[1], "map") == 0 || strcmp(args[1], "acl") == 0) {
+ const char *gen = NULL;
+ uint genid;
+ uint ret;
+
+ /* Set ACL or MAP flags. */
+ if (args[1][0] == 'm')
+ ctx->display_flags = PAT_REF_MAP;
+ else
+ ctx->display_flags = PAT_REF_ACL;
+
+ if (*args[2] != '@')
+ return cli_err(appctx, "Missing version number.\n");
+
+ /* The generation number is mandatory for a commit. The range
+ * of generations that get trashed by a commit starts from the
+ * opposite of the current one and ends at the previous one.
+ */
+ gen = args[2] + 1;
+ genid = str2uic(gen);
+ ctx->prev_gen = genid - 1;
+ ctx->curr_gen = ctx->prev_gen - ((~0U) >> 1);
+
+ /* no parameter */
+ if (!*args[3]) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Missing map identifier.\n");
+ else
+ return cli_err(appctx, "Missing ACL identifier.\n");
+ }
+
+ /* lookup into the refs and check the map flag */
+ ctx->ref = pat_ref_lookup_ref(args[3]);
+ if (!ctx->ref ||
+ !(ctx->ref->flags & ctx->display_flags)) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+ else
+ return cli_err(appctx, "Unknown ACL identifier. Please use #<id> or <file>.\n");
+ }
+
+ HA_SPIN_LOCK(PATREF_LOCK, &ctx->ref->lock);
+ if (genid - (ctx->ref->curr_gen + 1) <
+ ctx->ref->next_gen - ctx->ref->curr_gen)
+ ret = pat_ref_commit(ctx->ref, genid);
+ else
+ ret = 1;
+ HA_SPIN_UNLOCK(PATREF_LOCK, &ctx->ref->lock);
+
+ if (ret != 0)
+ return cli_err(appctx, "Version number out of range.\n");
+
+ /* delegate the clearing to the I/O handler which can yield */
+ return 0;
+ }
+ return 1;
+}
+
+/* register cli keywords */
+
+static struct cli_kw_list cli_kws = {{ },{
+ { { "add", "acl", NULL }, "add acl [@<ver>] <acl> <pattern> : add an acl entry", cli_parse_add_map, NULL },
+ { { "clear", "acl", NULL }, "clear acl [@<ver>] <acl> : clear the contents of this acl", cli_parse_clear_map, cli_io_handler_clear_map, NULL },
+ { { "commit","acl", NULL }, "commit acl @<ver> <acl> : commit the ACL at this version", cli_parse_commit_map, cli_io_handler_clear_map, NULL },
+ { { "del", "acl", NULL }, "del acl <acl> [<key>|#<ref>] : delete acl entries matching <key>", cli_parse_del_map, NULL },
+ { { "get", "acl", NULL }, "get acl <acl> <value> : report the patterns matching a sample for an ACL", cli_parse_get_map, cli_io_handler_map_lookup, cli_release_mlook },
+ { { "prepare","acl",NULL }, "prepare acl <acl> : prepare a new version for atomic ACL replacement", cli_parse_prepare_map, NULL },
+ { { "show", "acl", NULL }, "show acl [@<ver>] <acl>] : report available acls or dump an acl's contents", cli_parse_show_map, NULL },
+ { { "add", "map", NULL }, "add map [@<ver>] <map> <key> <val> : add a map entry (payload supported instead of key/val)", cli_parse_add_map, NULL },
+ { { "clear", "map", NULL }, "clear map [@<ver>] <map> : clear the contents of this map", cli_parse_clear_map, cli_io_handler_clear_map, NULL },
+ { { "commit","map", NULL }, "commit map @<ver> <map> : commit the map at this version", cli_parse_commit_map, cli_io_handler_clear_map, NULL },
+ { { "del", "map", NULL }, "del map <map> [<key>|#<ref>] : delete map entries matching <key>", cli_parse_del_map, NULL },
+ { { "get", "map", NULL }, "get map <acl> <value> : report the keys and values matching a sample for a map", cli_parse_get_map, cli_io_handler_map_lookup, cli_release_mlook },
+ { { "prepare","map",NULL }, "prepare map <acl> : prepare a new version for atomic map replacement", cli_parse_prepare_map, NULL },
+ { { "set", "map", NULL }, "set map <map> [<key>|#<ref>] <value> : modify a map entry", cli_parse_set_map, NULL },
+ { { "show", "map", NULL }, "show map [@ver] [map] : report available maps or dump a map's contents", cli_parse_show_map, NULL },
+ { { NULL }, NULL, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten
+ *
+ * For the map_*_int keywords, the output is declared as SMP_T_UINT, but the converter function
+ * can provide SMP_T_UINT, SMP_T_SINT or SMP_T_BOOL depending on how the patterns found in the
+ * file can be parsed.
+ *
+ * For the map_*_ip keyword, the output is declared as SMP_T_IPV4, but the converter function
+ * can provide SMP_T_IPV4 or SMP_T_IPV6 depending on the patterns found in the file.
+ *
+ * The map_* keywords only emit strings.
+ *
+ * The output type is only used during the configuration parsing. It is used for detecting
+ * compatibility problems.
+ *
+ * The arguments are: <file>[,<default value>]
+ */
+static struct sample_conv_kw_list sample_conv_kws = {ILH, {
+ { "map", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_STR },
+ { "map_str", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_STR },
+ { "map_beg", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_BEG },
+ { "map_sub", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_SUB },
+ { "map_dir", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_DIR },
+ { "map_dom", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_DOM },
+ { "map_end", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_END },
+ { "map_reg", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_REG },
+ { "map_regm", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_REGM},
+ { "map_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_SINT, SMP_T_STR, (void *)PAT_MATCH_INT },
+ { "map_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_ADDR, SMP_T_STR, (void *)PAT_MATCH_IP },
+
+ { "map_str_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_STR, SMP_T_SINT, (void *)PAT_MATCH_STR },
+ { "map_beg_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_STR, SMP_T_SINT, (void *)PAT_MATCH_BEG },
+ { "map_sub_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_STR, SMP_T_SINT, (void *)PAT_MATCH_SUB },
+ { "map_dir_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_STR, SMP_T_SINT, (void *)PAT_MATCH_DIR },
+ { "map_dom_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_STR, SMP_T_SINT, (void *)PAT_MATCH_DOM },
+ { "map_end_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_STR, SMP_T_SINT, (void *)PAT_MATCH_END },
+ { "map_reg_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_STR, SMP_T_SINT, (void *)PAT_MATCH_REG },
+ { "map_int_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_SINT, SMP_T_SINT, (void *)PAT_MATCH_INT },
+ { "map_ip_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_ADDR, SMP_T_SINT, (void *)PAT_MATCH_IP },
+
+ { "map_str_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_ADDR, (void *)PAT_MATCH_STR },
+ { "map_beg_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_ADDR, (void *)PAT_MATCH_BEG },
+ { "map_sub_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_ADDR, (void *)PAT_MATCH_SUB },
+ { "map_dir_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_ADDR, (void *)PAT_MATCH_DIR },
+ { "map_dom_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_ADDR, (void *)PAT_MATCH_DOM },
+ { "map_end_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_ADDR, (void *)PAT_MATCH_END },
+ { "map_reg_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_ADDR, (void *)PAT_MATCH_REG },
+ { "map_int_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_SINT, SMP_T_ADDR, (void *)PAT_MATCH_INT },
+ { "map_ip_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_ADDR, SMP_T_ADDR, (void *)PAT_MATCH_IP },
+
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
diff --git a/src/mjson.c b/src/mjson.c
new file mode 100644
index 0000000..549b0d5
--- /dev/null
+++ b/src/mjson.c
@@ -0,0 +1,1048 @@
+// Copyright (c) 2018-2020 Cesanta Software Limited
+// All rights reserved
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include <float.h>
+#include <math.h>
+
+#include <import/mjson.h>
+
+#if defined(_MSC_VER)
+#define alloca(x) _alloca(x)
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER < 1700
+#define va_copy(x, y) (x) = (y)
+#define isinf(x) !_finite(x)
+#define isnan(x) _isnan(x)
+#endif
+
+static double mystrtod(const char *str, char **end);
+
+static int mjson_esc(int c, int esc) {
+ const char *p, *esc1 = "\b\f\n\r\t\\\"", *esc2 = "bfnrt\\\"";
+ for (p = esc ? esc1 : esc2; *p != '\0'; p++) {
+ if (*p == c) return esc ? esc2[p - esc1] : esc1[p - esc2];
+ }
+ return 0;
+}
+
+static int mjson_escape(int c) {
+ return mjson_esc(c, 1);
+}
+
+static int mjson_pass_string(const char *s, int len) {
+ int i;
+ for (i = 0; i < len; i++) {
+ if (s[i] == '\\' && i + 1 < len && mjson_escape(s[i + 1])) {
+ i++;
+ } else if (s[i] == '\0') {
+ return MJSON_ERROR_INVALID_INPUT;
+ } else if (s[i] == '"') {
+ return i;
+ }
+ }
+ return MJSON_ERROR_INVALID_INPUT;
+}
+
+int mjson(const char *s, int len, mjson_cb_t cb, void *ud) {
+ enum { S_VALUE, S_KEY, S_COLON, S_COMMA_OR_EOO } expecting = S_VALUE;
+ unsigned char nesting[MJSON_MAX_DEPTH];
+ int i, depth = 0;
+#define MJSONCALL(ev) \
+ if (cb != NULL && cb(ev, s, start, i - start + 1, ud)) return i + 1;
+
+// In the ascii table, the distance between `[` and `]` is 2.
+// Ditto for `{` and `}`. Hence +2 in the code below.
+#define MJSONEOO() \
+ do { \
+ if (c != nesting[depth - 1] + 2) return MJSON_ERROR_INVALID_INPUT; \
+ depth--; \
+ if (depth == 0) { \
+ MJSONCALL(tok); \
+ return i + 1; \
+ } \
+ } while (0)
+
+ for (i = 0; i < len; i++) {
+ int start = i;
+ unsigned char c = ((unsigned char *) s)[i];
+ int tok = c;
+ if (c == ' ' || c == '\t' || c == '\n' || c == '\r') continue;
+ // printf("- %c [%.*s] %d %d\n", c, i, s, depth, expecting);
+ switch (expecting) {
+ case S_VALUE:
+ if (c == '{') {
+ if (depth >= (int) sizeof(nesting)) return MJSON_ERROR_TOO_DEEP;
+ nesting[depth++] = c;
+ expecting = S_KEY;
+ break;
+ } else if (c == '[') {
+ if (depth >= (int) sizeof(nesting)) return MJSON_ERROR_TOO_DEEP;
+ nesting[depth++] = c;
+ break;
+ } else if (c == ']' && depth > 0) { // Empty array
+ MJSONEOO();
+ } else if (c == 't' && i + 3 < len && memcmp(&s[i], "true", 4) == 0) {
+ i += 3;
+ tok = MJSON_TOK_TRUE;
+ } else if (c == 'n' && i + 3 < len && memcmp(&s[i], "null", 4) == 0) {
+ i += 3;
+ tok = MJSON_TOK_NULL;
+ } else if (c == 'f' && i + 4 < len && memcmp(&s[i], "false", 5) == 0) {
+ i += 4;
+ tok = MJSON_TOK_FALSE;
+ } else if (c == '-' || ((c >= '0' && c <= '9'))) {
+ char *end = NULL;
+ mystrtod(&s[i], &end);
+ if (end != NULL) i += (int) (end - &s[i] - 1);
+ tok = MJSON_TOK_NUMBER;
+ } else if (c == '"') {
+ int n = mjson_pass_string(&s[i + 1], len - i - 1);
+ if (n < 0) return n;
+ i += n + 1;
+ tok = MJSON_TOK_STRING;
+ } else {
+ return MJSON_ERROR_INVALID_INPUT;
+ }
+ if (depth == 0) {
+ MJSONCALL(tok);
+ return i + 1;
+ }
+ expecting = S_COMMA_OR_EOO;
+ break;
+
+ case S_KEY:
+ if (c == '"') {
+ int n = mjson_pass_string(&s[i + 1], len - i - 1);
+ if (n < 0) return n;
+ i += n + 1;
+ tok = MJSON_TOK_KEY;
+ expecting = S_COLON;
+ } else if (c == '}') { // Empty object
+ MJSONEOO();
+ expecting = S_COMMA_OR_EOO;
+ } else {
+ return MJSON_ERROR_INVALID_INPUT;
+ }
+ break;
+
+ case S_COLON:
+ if (c == ':') {
+ expecting = S_VALUE;
+ } else {
+ return MJSON_ERROR_INVALID_INPUT;
+ }
+ break;
+
+ case S_COMMA_OR_EOO:
+ if (depth <= 0) return MJSON_ERROR_INVALID_INPUT;
+ if (c == ',') {
+ expecting = (nesting[depth - 1] == '{') ? S_KEY : S_VALUE;
+ } else if (c == ']' || c == '}') {
+ MJSONEOO();
+ } else {
+ return MJSON_ERROR_INVALID_INPUT;
+ }
+ break;
+ }
+ MJSONCALL(tok);
+ }
+ return MJSON_ERROR_INVALID_INPUT;
+}
+
+struct msjon_get_data {
+ const char *path; // Lookup json path
+ int pos; // Current path index
+ int d1; // Current depth of traversal
+ int d2; // Expected depth of traversal
+ int i1; // Index in an array
+ int i2; // Expected index in an array
+ int obj; // If the value is array/object, offset where it starts
+ const char **tokptr; // Destination
+ int *toklen; // Destination length
+ int tok; // Returned token
+};
+
+#include <stdio.h>
+
+static int plen1(const char *s) {
+ int i = 0, n = 0;
+ while (s[i] != '\0' && s[i] != '.' && s[i] != '[')
+ n++, i += s[i] == '\\' ? 2 : 1;
+ // printf("PLEN: s: [%s], [%.*s] => %d\n", s, i, s, n);
+ return n;
+}
+
+static int plen2(const char *s) {
+ int i = 0, n = 0;
+ while (s[i] != '\0' && s[i] != '.' && s[i] != '[')
+ n++, i += s[i] == '\\' ? 2 : 1;
+ // printf("PLEN: s: [%s], [%.*s] => %d\n", s, i, s, n);
+ return i;
+}
+
+static int kcmp(const char *a, const char *b, int n) {
+ int i = 0, j = 0, r = 0;
+ for (i = 0, j = 0; j < n; i++, j++) {
+ if (b[i] == '\\') i++;
+ if ((r = a[j] - b[i]) != 0) return r;
+ }
+ // printf("KCMP: a: [%.*s], b:[%.*s] ==> %d\n", n, a, i, b, r);
+ return r;
+}
+
+static int mjson_get_cb(int tok, const char *s, int off, int len, void *ud) {
+ struct msjon_get_data *data = (struct msjon_get_data *) ud;
+ // printf("--> %2x %2d %2d %2d %2d\t'%s'\t'%.*s'\t\t'%.*s'\n", tok, data->d1,
+ // data->d2, data->i1, data->i2, data->path + data->pos, off, s, len,
+ // s + off);
+ if (data->tok != MJSON_TOK_INVALID) return 1; // Found
+
+ if (tok == '{') {
+ if (!data->path[data->pos] && data->d1 == data->d2) data->obj = off;
+ data->d1++;
+ } else if (tok == '[') {
+ if (data->d1 == data->d2 && data->path[data->pos] == '[') {
+ data->i1 = 0;
+ data->i2 = (int) mystrtod(&data->path[data->pos + 1], NULL);
+ if (data->i1 == data->i2) {
+ data->d2++;
+ data->pos += 3;
+ }
+ }
+ if (!data->path[data->pos] && data->d1 == data->d2) data->obj = off;
+ data->d1++;
+ } else if (tok == ',') {
+ if (data->d1 == data->d2 + 1) {
+ data->i1++;
+ if (data->i1 == data->i2) {
+ while (data->path[data->pos] != ']') data->pos++;
+ data->pos++;
+ data->d2++;
+ }
+ }
+ } else if (tok == MJSON_TOK_KEY && data->d1 == data->d2 + 1 &&
+ data->path[data->pos] == '.' && s[off] == '"' &&
+ s[off + len - 1] == '"' &&
+ plen1(&data->path[data->pos + 1]) == len - 2 &&
+ kcmp(s + off + 1, &data->path[data->pos + 1], len - 2) == 0) {
+ data->d2++;
+ data->pos += plen2(&data->path[data->pos + 1]) + 1;
+ } else if (tok == MJSON_TOK_KEY && data->d1 == data->d2) {
+ return 1; // Exhausted path, not found
+ } else if (tok == '}' || tok == ']') {
+ data->d1--;
+ // data->d2--;
+ if (!data->path[data->pos] && data->d1 == data->d2 && data->obj != -1) {
+ data->tok = tok - 2;
+ if (data->tokptr) *data->tokptr = s + data->obj;
+ if (data->toklen) *data->toklen = off - data->obj + 1;
+ return 1;
+ }
+ } else if (MJSON_TOK_IS_VALUE(tok)) {
+ // printf("TOK --> %d\n", tok);
+ if (data->d1 == data->d2 && !data->path[data->pos]) {
+ data->tok = tok;
+ if (data->tokptr) *data->tokptr = s + off;
+ if (data->toklen) *data->toklen = len;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+enum mjson_tok mjson_find(const char *s, int len, const char *jp,
+ const char **tokptr, int *toklen) {
+ struct msjon_get_data data = {jp, 1, 0, 0, 0,
+ 0, -1, tokptr, toklen, MJSON_TOK_INVALID};
+ if (jp[0] != '$') return MJSON_TOK_INVALID;
+ if (mjson(s, len, mjson_get_cb, &data) < 0) return MJSON_TOK_INVALID;
+ return (enum mjson_tok) data.tok;
+}
+
+int mjson_get_number(const char *s, int len, const char *path, double *v) {
+ const char *p;
+ int tok, n;
+ if ((tok = mjson_find(s, len, path, &p, &n)) == MJSON_TOK_NUMBER) {
+ if (v != NULL) *v = mystrtod(p, NULL);
+ }
+ return tok == MJSON_TOK_NUMBER ? 1 : 0;
+}
+
+int mjson_get_bool(const char *s, int len, const char *path, int *v) {
+ int tok = mjson_find(s, len, path, NULL, NULL);
+ if (tok == MJSON_TOK_TRUE && v != NULL) *v = 1;
+ if (tok == MJSON_TOK_FALSE && v != NULL) *v = 0;
+ return tok == MJSON_TOK_TRUE || tok == MJSON_TOK_FALSE ? 1 : 0;
+}
+
+static unsigned char mjson_unhex_nimble(const char *s) {
+ unsigned char i, v = 0;
+ for (i = 0; i < 2; i++) {
+ int c = s[i];
+ if (i > 0) v <<= 4;
+ v |= (c >= '0' && c <= '9') ? c - '0'
+ : (c >= 'A' && c <= 'F') ? c - '7' : c - 'W';
+ }
+ return v;
+}
+
+static int mjson_unescape(const char *s, int len, char *to, int n) {
+ int i, j;
+ for (i = 0, j = 0; i < len && j < n; i++, j++) {
+ if (s[i] == '\\' && i + 5 < len && s[i + 1] == 'u') {
+ // \uXXXX escape. We could process a simple one-byte chars
+ // \u00xx from the ASCII range. More complex chars would require
+ // dragging in a UTF8 library, which is too much for us
+ if (s[i + 2] != '0' || s[i + 3] != '0') return -1; // Too much, give up
+ to[j] = mjson_unhex_nimble(s + i + 4);
+ i += 5;
+ } else if (s[i] == '\\' && i + 1 < len) {
+ int c = mjson_esc(s[i + 1], 0);
+ if (c == 0) return -1;
+ to[j] = c;
+ i++;
+ } else {
+ to[j] = s[i];
+ }
+ }
+ if (j >= n) return -1;
+ if (n > 0) to[j] = '\0';
+ return j;
+}
+
+int mjson_get_string(const char *s, int len, const char *path, char *to,
+ int n) {
+ const char *p;
+ int sz;
+ if (mjson_find(s, len, path, &p, &sz) != MJSON_TOK_STRING) return -1;
+ return mjson_unescape(p + 1, sz - 2, to, n);
+}
+
+int mjson_get_hex(const char *s, int len, const char *x, char *to, int n) {
+ const char *p;
+ int i, j, sz;
+ if (mjson_find(s, len, x, &p, &sz) != MJSON_TOK_STRING) return -1;
+ for (i = j = 0; i < sz - 3 && j < n; i += 2, j++) {
+ ((unsigned char *) to)[j] = mjson_unhex_nimble(p + i + 1);
+ }
+ if (j < n) to[j] = '\0';
+ return j;
+}
+
+#if MJSON_ENABLE_BASE64
+static int mjson_base64rev(int c) {
+ if (c >= 'A' && c <= 'Z') {
+ return c - 'A';
+ } else if (c >= 'a' && c <= 'z') {
+ return c + 26 - 'a';
+ } else if (c >= '0' && c <= '9') {
+ return c + 52 - '0';
+ } else if (c == '+') {
+ return 62;
+ } else if (c == '/') {
+ return 63;
+ } else {
+ return 64;
+ }
+}
+
+int mjson_base64_dec(const char *src, int n, char *dst, int dlen) {
+ const char *end = src + n;
+ int len = 0;
+ while (src + 3 < end && len < dlen) {
+ int a = mjson_base64rev(src[0]), b = mjson_base64rev(src[1]),
+ c = mjson_base64rev(src[2]), d = mjson_base64rev(src[3]);
+ dst[len++] = (a << 2) | (b >> 4);
+ if (src[2] != '=' && len < dlen) {
+ dst[len++] = (b << 4) | (c >> 2);
+ if (src[3] != '=' && len < dlen) {
+ dst[len++] = (c << 6) | d;
+ }
+ }
+ src += 4;
+ }
+ if (len < dlen) dst[len] = '\0';
+ return len;
+}
+
+int mjson_get_base64(const char *s, int len, const char *path, char *to,
+ int n) {
+ const char *p;
+ int sz;
+ if (mjson_find(s, len, path, &p, &sz) != MJSON_TOK_STRING) return 0;
+ return mjson_base64_dec(p + 1, sz - 2, to, n);
+}
+#endif // MJSON_ENABLE_BASE64
+
+#if MJSON_ENABLE_NEXT
+struct nextdata {
+ int off, len, depth, t, vo, arrayindex;
+ int *koff, *klen, *voff, *vlen, *vtype;
+};
+
+static int next_cb(int tok, const char *s, int off, int len, void *ud) {
+ struct nextdata *d = (struct nextdata *) ud;
+ // int i;
+ switch (tok) {
+ case '{':
+ case '[':
+ if (d->depth == 0 && tok == '[') d->arrayindex = 0;
+ if (d->depth == 1 && off > d->off) {
+ d->vo = off;
+ d->t = tok == '{' ? MJSON_TOK_OBJECT : MJSON_TOK_ARRAY;
+ if (d->voff) *d->voff = off;
+ if (d->vtype) *d->vtype = d->t;
+ }
+ d->depth++;
+ break;
+ case '}':
+ case ']':
+ d->depth--;
+ if (d->depth == 1 && d->vo) {
+ d->len = off + len;
+ if (d->vlen) *d->vlen = d->len - d->vo;
+ if (d->arrayindex >= 0) {
+ if (d->koff) *d->koff = d->arrayindex; // koff holds array index
+ if (d->klen) *d->klen = 0; // klen holds 0
+ }
+ return 1;
+ }
+ if (d->depth == 1 && d->arrayindex >= 0) d->arrayindex++;
+ break;
+ case ',':
+ case ':':
+ break;
+ case MJSON_TOK_KEY:
+ if (d->depth == 1 && d->off < off) {
+ if (d->koff) *d->koff = off; // And report back to the user
+ if (d->klen) *d->klen = len; // If we have to
+ }
+ break;
+ default:
+ if (d->depth != 1) break;
+ // If we're iterating over the array
+ if (off > d->off) {
+ d->len = off + len;
+ if (d->vlen) *d->vlen = len; // value length
+ if (d->voff) *d->voff = off; // value offset
+ if (d->vtype) *d->vtype = tok; // value type
+ if (d->arrayindex >= 0) {
+ if (d->koff) *d->koff = d->arrayindex; // koff holds array index
+ if (d->klen) *d->klen = 0; // klen holds 0
+ }
+ return 1;
+ }
+ if (d->arrayindex >= 0) d->arrayindex++;
+ break;
+ }
+ (void) s;
+ return 0;
+}
+
+int mjson_next(const char *s, int n, int off, int *koff, int *klen, int *voff,
+ int *vlen, int *vtype) {
+ struct nextdata d = {off, 0, 0, 0, 0, -1, koff, klen, voff, vlen, vtype};
+ mjson(s, n, next_cb, &d);
+ return d.len;
+}
+#endif
+
+#if MJSON_ENABLE_PRINT
+int mjson_print_fixed_buf(const char *ptr, int len, void *fndata) {
+ struct mjson_fixedbuf *fb = (struct mjson_fixedbuf *) fndata;
+ int i, left = fb->size - 1 - fb->len;
+ if (left < len) len = left;
+ for (i = 0; i < len; i++) fb->ptr[fb->len + i] = ptr[i];
+ fb->len += len;
+ fb->ptr[fb->len] = '\0';
+ return len;
+}
+
+// This function allocates memory in chunks of size MJSON_DYNBUF_CHUNK
+// to decrease memory fragmentation, when many calls are executed to
+// print e.g. a base64 string or a hex string.
+int mjson_print_dynamic_buf(const char *ptr, int len, void *fndata) {
+ char *s, *buf = *(char **) fndata;
+ size_t curlen = buf == NULL ? 0 : strlen(buf);
+ size_t new_size = curlen + len + 1 + MJSON_DYNBUF_CHUNK;
+ new_size -= new_size % MJSON_DYNBUF_CHUNK;
+
+ if ((s = (char *) realloc(buf, new_size)) == NULL) {
+ return 0;
+ } else {
+ memcpy(s + curlen, ptr, len);
+ s[curlen + len] = '\0';
+ *(char **) fndata = s;
+ return len;
+ }
+}
+
+int mjson_print_null(const char *ptr, int len, void *userdata) {
+ (void) ptr;
+ (void) userdata;
+ return len;
+}
+
+int mjson_print_buf(mjson_print_fn_t fn, void *fnd, const char *buf, int len) {
+ return fn(buf, len, fnd);
+}
+
+int mjson_print_long(mjson_print_fn_t fn, void *fnd, long val, int is_signed) {
+ unsigned long v = val, s = 0, n, i;
+ char buf[20], t;
+ if (is_signed && val < 0) {
+ buf[s++] = '-', v = -val;
+ }
+ // This loop prints a number in reverse order. I guess this is because we
+ // write numbers from right to left: least significant digit comes last.
+ // Maybe because we use Arabic numbers, and Arabs write RTL?
+ for (n = 0; v > 0; v /= 10) buf[s + n++] = "0123456789"[v % 10];
+ // Reverse a string
+ for (i = 0; i < n / 2; i++)
+ t = buf[s + i], buf[s + i] = buf[s + n - i - 1], buf[s + n - i - 1] = t;
+ if (val == 0) buf[n++] = '0'; // Handle special case
+ return fn(buf, s + n, fnd);
+}
+
+int mjson_print_int(mjson_print_fn_t fn, void *fnd, int v, int s) {
+ return mjson_print_long(fn, fnd, s ? (long) v : (unsigned) v, s);
+}
+
+static int addexp(char *buf, int e, int sign) {
+ int n = 0;
+ buf[n++] = 'e';
+ buf[n++] = sign;
+ if (e > 400) return 0;
+ if (e < 10) buf[n++] = '0';
+ if (e >= 100) buf[n++] = (e / 100) + '0', e -= 100 * (e / 100);
+ if (e >= 10) buf[n++] = (e / 10) + '0', e -= 10 * (e / 10);
+ buf[n++] = e + '0';
+ return n;
+}
+
+int mjson_print_dbl(mjson_print_fn_t fn, void *fnd, double d, int width) {
+ char buf[40];
+ int i, s = 0, n = 0, e = 0;
+ double t, mul, saved;
+ if (d == 0.0) return fn("0", 1, fnd);
+ if (isinf(d)) return fn(d > 0 ? "inf" : "-inf", d > 0 ? 3 : 4, fnd);
+ if (isnan(d)) return fn("nan", 3, fnd);
+ if (d < 0.0) d = -d, buf[s++] = '-';
+
+ // Round
+ saved = d;
+ mul = 1.0;
+ while (d >= 10.0 && d / mul >= 10.0) mul *= 10.0;
+ while (d <= 1.0 && d / mul <= 1.0) mul /= 10.0;
+ for (i = 0, t = mul * 5; i < width; i++) t /= 10.0;
+ d += t;
+ // Calculate exponent, and 'mul' for scientific representation
+ mul = 1.0;
+ while (d >= 10.0 && d / mul >= 10.0) mul *= 10.0, e++;
+ while (d < 1.0 && d / mul < 1.0) mul /= 10.0, e--;
+ // printf(" --> %g %d %g %g\n", saved, e, t, mul);
+
+ if (e >= width) {
+ struct mjson_fixedbuf fb = {buf + s, (int) sizeof(buf) - s, 0};
+ n = mjson_print_dbl(mjson_print_fixed_buf, &fb, saved / mul, width);
+ // printf(" --> %.*g %d [%.*s]\n", 10, d / t, e, fb.len, fb.ptr);
+ n += addexp(buf + s + n, e, '+');
+ return fn(buf, s + n, fnd);
+ } else if (e <= -width) {
+ struct mjson_fixedbuf fb = {buf + s, (int) sizeof(buf) - s, 0};
+ n = mjson_print_dbl(mjson_print_fixed_buf, &fb, saved / mul, width);
+ // printf(" --> %.*g %d [%.*s]\n", 10, d / mul, e, fb.len, fb.ptr);
+ n += addexp(buf + s + n, -e, '-');
+ return fn(buf, s + n, fnd);
+ } else {
+ for (i = 0, t = mul; d >= 1.0 && s + n < (int) sizeof(buf); i++) {
+ int ch = (int) (d / t);
+ if (n > 0 || ch > 0) buf[s + n++] = ch + '0';
+ d -= ch * t;
+ t /= 10.0;
+ }
+ // printf(" --> [%g] -> %g %g (%d) [%.*s]\n", saved, d, t, n, s + n, buf);
+ if (n == 0) buf[s++] = '0';
+ while (t >= 1.0 && n + s < (int) sizeof(buf)) buf[n++] = '0', t /= 10.0;
+ if (s + n < (int) sizeof(buf)) buf[n + s++] = '.';
+ // printf(" 1--> [%g] -> [%.*s]\n", saved, s + n, buf);
+ for (i = 0, t = 0.1; s + n < (int) sizeof(buf) && n < width; i++) {
+ int ch = (int) (d / t);
+ buf[s + n++] = ch + '0';
+ d -= ch * t;
+ t /= 10.0;
+ }
+ }
+ while (n > 0 && buf[s + n - 1] == '0') n--; // Trim trailing zeros
+ if (n > 0 && buf[s + n - 1] == '.') n--; // Trim trailing dot
+ return fn(buf, s + n, fnd);
+}
+
+int mjson_print_str(mjson_print_fn_t fn, void *fnd, const char *s, int len) {
+ int i, n = fn("\"", 1, fnd);
+ for (i = 0; i < len; i++) {
+ char c = mjson_escape(s[i]);
+ if (c) {
+ n += fn("\\", 1, fnd);
+ n += fn(&c, 1, fnd);
+ } else {
+ n += fn(&s[i], 1, fnd);
+ }
+ }
+ return n + fn("\"", 1, fnd);
+}
+
+#if MJSON_ENABLE_BASE64
+int mjson_print_b64(mjson_print_fn_t fn, void *fnd, const unsigned char *s,
+ int n) {
+ const char *t =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+ int i, len = fn("\"", 1, fnd);
+ for (i = 0; i < n; i += 3) {
+ int a = s[i], b = i + 1 < n ? s[i + 1] : 0, c = i + 2 < n ? s[i + 2] : 0;
+ char buf[4] = {t[a >> 2], t[(a & 3) << 4 | (b >> 4)], '=', '='};
+ if (i + 1 < n) buf[2] = t[(b & 15) << 2 | (c >> 6)];
+ if (i + 2 < n) buf[3] = t[c & 63];
+ len += fn(buf, sizeof(buf), fnd);
+ }
+ return len + fn("\"", 1, fnd);
+}
+#endif /* MJSON_ENABLE_BASE64 */
+
+int mjson_vprintf(mjson_print_fn_t fn, void *fnd, const char *fmt,
+ va_list xap) {
+ int i = 0, n = 0;
+ va_list ap;
+ va_copy(ap, xap);
+ while (fmt[i] != '\0') {
+ if (fmt[i] == '%') {
+ char fc = fmt[++i];
+ int is_long = 0;
+ if (fc == 'l') {
+ is_long = 1;
+ fc = fmt[i + 1];
+ }
+ if (fc == 'Q') {
+ char *buf = va_arg(ap, char *);
+ n += mjson_print_str(fn, fnd, buf ? buf : "",
+ buf ? (int) strlen(buf) : 0);
+ } else if (strncmp(&fmt[i], ".*Q", 3) == 0) {
+ int len = va_arg(ap, int);
+ char *buf = va_arg(ap, char *);
+ n += mjson_print_str(fn, fnd, buf, len);
+ i += 2;
+ } else if (fc == 'd' || fc == 'u') {
+ int is_signed = (fc == 'd');
+ if (is_long) {
+ long val = va_arg(ap, long);
+ n += mjson_print_long(fn, fnd, val, is_signed);
+ i++;
+ } else {
+ int val = va_arg(ap, int);
+ n += mjson_print_int(fn, fnd, val, is_signed);
+ }
+ } else if (fc == 'B') {
+ const char *s = va_arg(ap, int) ? "true" : "false";
+ n += mjson_print_buf(fn, fnd, s, (int) strlen(s));
+ } else if (fc == 's') {
+ char *buf = va_arg(ap, char *);
+ n += mjson_print_buf(fn, fnd, buf, (int) strlen(buf));
+ } else if (strncmp(&fmt[i], ".*s", 3) == 0) {
+ int len = va_arg(ap, int);
+ char *buf = va_arg(ap, char *);
+ n += mjson_print_buf(fn, fnd, buf, len);
+ i += 2;
+ } else if (fc == 'g') {
+ n += mjson_print_dbl(fn, fnd, va_arg(ap, double), 6);
+ } else if (strncmp(&fmt[i], ".*g", 3) == 0) {
+ int width = va_arg(ap, int);
+ n += mjson_print_dbl(fn, fnd, va_arg(ap, double), width);
+ i += 2;
+#if MJSON_ENABLE_BASE64
+ } else if (fc == 'V') {
+ int len = va_arg(ap, int);
+ const char *buf = va_arg(ap, const char *);
+ n += mjson_print_b64(fn, fnd, (unsigned char *) buf, len);
+#endif
+ } else if (fc == 'H') {
+ const char *hex = "0123456789abcdef";
+ int i, len = va_arg(ap, int);
+ const unsigned char *p = va_arg(ap, const unsigned char *);
+ n += fn("\"", 1, fnd);
+ for (i = 0; i < len; i++) {
+ n += fn(&hex[(p[i] >> 4) & 15], 1, fnd);
+ n += fn(&hex[p[i] & 15], 1, fnd);
+ }
+ n += fn("\"", 1, fnd);
+ } else if (fc == 'M') {
+ mjson_vprint_fn_t vfn = va_arg(ap, mjson_vprint_fn_t);
+ n += vfn(fn, fnd, &ap);
+ }
+ i++;
+ } else {
+ n += mjson_print_buf(fn, fnd, &fmt[i++], 1);
+ }
+ }
+ va_end(xap);
+ va_end(ap);
+ return n;
+}
+
+int mjson_printf(mjson_print_fn_t fn, void *fnd, const char *fmt, ...) {
+ va_list ap;
+ int len;
+ va_start(ap, fmt);
+ len = mjson_vprintf(fn, fnd, fmt, ap);
+ va_end(ap);
+ return len;
+}
+#endif /* MJSON_ENABLE_PRINT */
+
+static int is_digit(int c) {
+ return c >= '0' && c <= '9';
+}
+
+/* NOTE: strtod() implementation by Yasuhiro Matsumoto. */
+static double mystrtod(const char *str, char **end) {
+ double d = 0.0;
+ int sign = 1, n = 0;
+ const char *p = str, *a = str;
+
+ /* decimal part */
+ if (*p == '-') {
+ sign = -1;
+ ++p;
+ } else if (*p == '+') {
+ ++p;
+ }
+ if (is_digit(*p)) {
+ d = (double) (*p++ - '0');
+ while (*p && is_digit(*p)) {
+ d = d * 10.0 + (double) (*p - '0');
+ ++p;
+ ++n;
+ }
+ a = p;
+ } else if (*p != '.') {
+ goto done;
+ }
+ d *= sign;
+
+ /* fraction part */
+ if (*p == '.') {
+ double f = 0.0;
+ double base = 0.1;
+ ++p;
+
+ if (is_digit(*p)) {
+ while (*p && is_digit(*p)) {
+ f += base * (*p - '0');
+ base /= 10.0;
+ ++p;
+ ++n;
+ }
+ }
+ d += f * sign;
+ a = p;
+ }
+
+ /* exponential part */
+ if ((*p == 'E') || (*p == 'e')) {
+ int i, e = 0, neg = 0;
+ p++;
+ if (*p == '-') p++, neg++;
+ if (*p == '+') p++;
+ while (is_digit(*p)) e = e * 10 + *p++ - '0';
+ if (neg) e = -e;
+#if 0
+ if (d == 2.2250738585072011 && e == -308) {
+ d = 0.0;
+ a = p;
+ goto done;
+ }
+ if (d == 2.2250738585072012 && e <= -308) {
+ d *= 1.0e-308;
+ a = p;
+ goto done;
+ }
+#endif
+ for (i = 0; i < e; i++) d *= 10;
+ for (i = 0; i < -e; i++) d /= 10;
+ a = p;
+ } else if (p > str && !is_digit(*(p - 1))) {
+ a = str;
+ goto done;
+ }
+
+done:
+ if (end) *end = (char *) a;
+ return d;
+}
+
+#if MJSON_ENABLE_MERGE
+int mjson_merge(const char *s, int n, const char *s2, int n2,
+ mjson_print_fn_t fn, void *userdata) {
+ int koff, klen, voff, vlen, t, t2, k, off = 0, len = 0, comma = 0;
+ if (n < 2) return len;
+ len += fn("{", 1, userdata);
+ while ((off = mjson_next(s, n, off, &koff, &klen, &voff, &vlen, &t)) != 0) {
+ char *path = (char *) alloca(klen + 1);
+ const char *val;
+ memcpy(path, "$.", 2);
+ memcpy(path + 2, s + koff + 1, klen - 2);
+ path[klen] = '\0';
+ if ((t2 = mjson_find(s2, n2, path, &val, &k)) != MJSON_TOK_INVALID) {
+ if (t2 == MJSON_TOK_NULL) continue; // null deletes the key
+ } else {
+ val = s + voff; // Key is not found in the update. Copy the old value.
+ }
+ if (comma) len += fn(",", 1, userdata);
+ len += fn(s + koff, klen, userdata);
+ len += fn(":", 1, userdata);
+ if (t == MJSON_TOK_OBJECT && t2 == MJSON_TOK_OBJECT) {
+ len += mjson_merge(s + voff, vlen, val, k, fn, userdata);
+ } else {
+ if (t2 != MJSON_TOK_INVALID) vlen = k;
+ len += fn(val, vlen, userdata);
+ }
+ comma = 1;
+ }
+ // Add missing keys
+ off = 0;
+ while ((off = mjson_next(s2, n2, off, &koff, &klen, &voff, &vlen, &t)) != 0) {
+ char *path = (char *) alloca(klen + 1);
+ const char *val;
+ if (t == MJSON_TOK_NULL) continue;
+ memcpy(path, "$.", 2);
+ memcpy(path + 2, s2 + koff + 1, klen - 2);
+ path[klen] = '\0';
+ if (mjson_find(s, n, path, &val, &vlen) != MJSON_TOK_INVALID) continue;
+ if (comma) len += fn(",", 1, userdata);
+ len += fn(s2 + koff, klen, userdata);
+ len += fn(":", 1, userdata);
+ len += fn(s2 + voff, vlen, userdata);
+ comma = 1;
+ }
+ len += fn("}", 1, userdata);
+ return len;
+}
+#endif // MJSON_ENABLE_MERGE
+
+#if MJSON_ENABLE_PRETTY
+struct prettydata {
+ int level;
+ int len;
+ int prev;
+ const char *pad;
+ int padlen;
+ mjson_print_fn_t fn;
+ void *userdata;
+};
+
+static int pretty_cb(int ev, const char *s, int off, int len, void *ud) {
+ struct prettydata *d = (struct prettydata *) ud;
+ int i;
+ switch (ev) {
+ case '{':
+ case '[':
+ d->level++;
+ d->len += d->fn(s + off, len, d->userdata);
+ break;
+ case '}':
+ case ']':
+ d->level--;
+ if (d->prev != '[' && d->prev != '{' && d->padlen > 0) {
+ d->len += d->fn("\n", 1, d->userdata);
+ for (i = 0; i < d->level; i++)
+ d->len += d->fn(d->pad, d->padlen, d->userdata);
+ }
+ d->len += d->fn(s + off, len, d->userdata);
+ break;
+ case ',':
+ d->len += d->fn(s + off, len, d->userdata);
+ if (d->padlen > 0) {
+ d->len += d->fn("\n", 1, d->userdata);
+ for (i = 0; i < d->level; i++)
+ d->len += d->fn(d->pad, d->padlen, d->userdata);
+ }
+ break;
+ case ':':
+ d->len += d->fn(s + off, len, d->userdata);
+ if (d->padlen > 0) d->len += d->fn(" ", 1, d->userdata);
+ break;
+ case MJSON_TOK_KEY:
+ if (d->prev == '{' && d->padlen > 0) {
+ d->len += d->fn("\n", 1, d->userdata);
+ for (i = 0; i < d->level; i++)
+ d->len += d->fn(d->pad, d->padlen, d->userdata);
+ }
+ d->len += d->fn(s + off, len, d->userdata);
+ break;
+ default:
+ if (d->prev == '[' && d->padlen > 0) {
+ d->len += d->fn("\n", 1, d->userdata);
+ for (i = 0; i < d->level; i++)
+ d->len += d->fn(d->pad, d->padlen, d->userdata);
+ }
+ d->len += d->fn(s + off, len, d->userdata);
+ break;
+ }
+ d->prev = ev;
+ return 0;
+}
+
+int mjson_pretty(const char *s, int n, const char *pad, mjson_print_fn_t fn,
+ void *userdata) {
+ struct prettydata d = {0, 0, 0, pad, (int) strlen(pad), fn, userdata};
+ if (mjson(s, n, pretty_cb, &d) < 0) return -1;
+ return d.len;
+}
+#endif // MJSON_ENABLE_PRETTY
+
+#if MJSON_ENABLE_RPC
+struct jsonrpc_ctx jsonrpc_default_context;
+
+int mjson_globmatch(const char *s1, int n1, const char *s2, int n2) {
+ int i = 0, j = 0, ni = 0, nj = 0;
+ while (i < n1 || j < n2) {
+ if (i < n1 && j < n2 && (s1[i] == '?' || s2[j] == s1[i])) {
+ i++, j++;
+ } else if (i < n1 && (s1[i] == '*' || s1[i] == '#')) {
+ ni = i, nj = j + 1, i++;
+ } else if (nj > 0 && nj <= n2 && (s1[i - 1] == '#' || s2[j] != '/')) {
+ i = ni, j = nj;
+ } else {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+void jsonrpc_return_errorv(struct jsonrpc_request *r, int code,
+ const char *message, const char *data_fmt,
+ va_list ap) {
+ if (r->id_len == 0) return;
+ mjson_printf(r->fn, r->fndata,
+ "{\"id\":%.*s,\"error\":{\"code\":%d,\"message\":%Q", r->id_len,
+ r->id, code, message == NULL ? "" : message);
+ if (data_fmt != NULL) {
+ mjson_printf(r->fn, r->fndata, ",\"data\":");
+ mjson_vprintf(r->fn, r->fndata, data_fmt, ap);
+ }
+ mjson_printf(r->fn, r->fndata, "}}\n");
+}
+
+void jsonrpc_return_error(struct jsonrpc_request *r, int code,
+ const char *message, const char *data_fmt, ...) {
+ va_list ap;
+ va_start(ap, data_fmt);
+ jsonrpc_return_errorv(r, code, message, data_fmt, ap);
+ va_end(ap);
+}
+
+void jsonrpc_return_successv(struct jsonrpc_request *r, const char *result_fmt,
+ va_list ap) {
+ if (r->id_len == 0) return;
+ mjson_printf(r->fn, r->fndata, "{\"id\":%.*s,\"result\":", r->id_len, r->id);
+ if (result_fmt != NULL) {
+ mjson_vprintf(r->fn, r->fndata, result_fmt, ap);
+ } else {
+ mjson_printf(r->fn, r->fndata, "%s", "null");
+ }
+ mjson_printf(r->fn, r->fndata, "}\n");
+}
+
+void jsonrpc_return_success(struct jsonrpc_request *r, const char *result_fmt,
+ ...) {
+ va_list ap;
+ va_start(ap, result_fmt);
+ jsonrpc_return_successv(r, result_fmt, ap);
+ va_end(ap);
+}
+
+void jsonrpc_ctx_process(struct jsonrpc_ctx *ctx, const char *buf, int len,
+ mjson_print_fn_t fn, void *fndata, void *ud) {
+ const char *result = NULL, *error = NULL;
+ int result_sz = 0, error_sz = 0;
+ struct jsonrpc_method *m = NULL;
+ struct jsonrpc_request r = {ctx, buf, len, 0, 0, 0, 0, 0, 0, fn, fndata, ud};
+
+ // Is is a response frame?
+ mjson_find(buf, len, "$.result", &result, &result_sz);
+ if (result == NULL) mjson_find(buf, len, "$.error", &error, &error_sz);
+ if (result_sz > 0 || error_sz > 0) {
+ if (ctx->response_cb) ctx->response_cb(buf, len, ctx->response_cb_data);
+ return;
+ }
+
+ // Method must exist and must be a string
+ if (mjson_find(buf, len, "$.method", &r.method, &r.method_len) !=
+ MJSON_TOK_STRING) {
+ mjson_printf(fn, fndata, "{\"error\":{\"code\":-32700,\"message\":%.*Q}}\n",
+ len, buf);
+ return;
+ }
+
+ // id and params are optional
+ mjson_find(buf, len, "$.id", &r.id, &r.id_len);
+ mjson_find(buf, len, "$.params", &r.params, &r.params_len);
+
+ for (m = ctx->methods; m != NULL; m = m->next) {
+ if (mjson_globmatch(m->method, m->method_sz, r.method + 1,
+ r.method_len - 2) > 0) {
+ if (r.params == NULL) r.params = "";
+ m->cb(&r);
+ break;
+ }
+ }
+ if (m == NULL) {
+ jsonrpc_return_error(&r, JSONRPC_ERROR_NOT_FOUND, "method not found", NULL);
+ }
+}
+
+static int jsonrpc_print_methods(mjson_print_fn_t fn, void *fndata,
+ va_list *ap) {
+ struct jsonrpc_ctx *ctx = va_arg(*ap, struct jsonrpc_ctx *);
+ struct jsonrpc_method *m;
+ int len = 0;
+ for (m = ctx->methods; m != NULL; m = m->next) {
+ if (m != ctx->methods) len += mjson_print_buf(fn, fndata, ",", 1);
+ len += mjson_print_str(fn, fndata, m->method, (int) strlen(m->method));
+ }
+ return len;
+}
+
+static void rpclist(struct jsonrpc_request *r) {
+ jsonrpc_return_success(r, "[%M]", jsonrpc_print_methods, r->ctx);
+}
+
+void jsonrpc_ctx_init(struct jsonrpc_ctx *ctx, mjson_print_fn_t response_cb,
+ void *response_cb_data) {
+ ctx->response_cb = response_cb;
+ ctx->response_cb_data = response_cb_data;
+ jsonrpc_ctx_export(ctx, MJSON_RPC_LIST_NAME, rpclist);
+}
+
+void jsonrpc_init(mjson_print_fn_t response_cb, void *userdata) {
+ jsonrpc_ctx_init(&jsonrpc_default_context, response_cb, userdata);
+}
+#endif // MJSON_ENABLE_RPC
diff --git a/src/mqtt.c b/src/mqtt.c
new file mode 100644
index 0000000..5688296
--- /dev/null
+++ b/src/mqtt.c
@@ -0,0 +1,1281 @@
+/*
+ * MQTT Protocol
+ *
+ * Copyright 2020 Baptiste Assmann <bedis9@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/chunk.h>
+#include <haproxy/mqtt.h>
+
+uint8_t mqtt_cpt_flags[MQTT_CPT_ENTRIES] = {
+ [MQTT_CPT_INVALID] = 0x00,
+ [MQTT_CPT_CONNECT] = 0x00,
+ [MQTT_CPT_CONNACK] = 0x00,
+
+ /* MQTT_CPT_PUBLISH flags can have different values (DUP, QoS, RETAIN), must be
+ * check more carefully
+ */
+ [MQTT_CPT_PUBLISH] = 0x0F,
+
+ [MQTT_CPT_PUBACK] = 0x00,
+ [MQTT_CPT_PUBREC] = 0x00,
+ [MQTT_CPT_PUBREL] = 0x02,
+ [MQTT_CPT_PUBCOMP] = 0x00,
+ [MQTT_CPT_SUBSCRIBE] = 0x02,
+ [MQTT_CPT_SUBACK] = 0x00,
+ [MQTT_CPT_UNSUBSCRIBE] = 0x02,
+ [MQTT_CPT_UNSUBACK] = 0x00,
+ [MQTT_CPT_PINGREQ] = 0x00,
+ [MQTT_CPT_PINGRESP] = 0x00,
+ [MQTT_CPT_DISCONNECT] = 0x00,
+ [MQTT_CPT_AUTH] = 0x00,
+};
+
+const struct ist mqtt_fields_string[MQTT_FN_ENTRIES] = {
+ [MQTT_FN_INVALID] = IST(""),
+
+ /* it's MQTT 3.1, 3.1.1 and 5.0, those fields have no unique id, so we use strings */
+ [MQTT_FN_FLAGS] = IST("flags"),
+ [MQTT_FN_REASON_CODE] = IST("reason_code"), /* MQTT 3.1 and 3.1.1: return_code */
+ [MQTT_FN_PROTOCOL_NAME] = IST("protocol_name"),
+ [MQTT_FN_PROTOCOL_VERSION] = IST("protocol_version"), /* MQTT 3.1.1: protocol_level */
+ [MQTT_FN_CLIENT_IDENTIFIER] = IST("client_identifier"),
+ [MQTT_FN_WILL_TOPIC] = IST("will_topic"),
+ [MQTT_FN_WILL_PAYLOAD] = IST("will_payload"), /* MQTT 3.1 and 3.1.1: will_message */
+ [MQTT_FN_USERNAME] = IST("username"),
+ [MQTT_FN_PASSWORD] = IST("password"),
+ [MQTT_FN_KEEPALIVE] = IST("keepalive"),
+ /* from here, it's MQTT 5.0 only */
+ [MQTT_FN_PAYLOAD_FORMAT_INDICATOR] = IST("1"),
+ [MQTT_FN_MESSAGE_EXPIRY_INTERVAL] = IST("2"),
+ [MQTT_FN_CONTENT_TYPE] = IST("3"),
+ [MQTT_FN_RESPONSE_TOPIC] = IST("8"),
+ [MQTT_FN_CORRELATION_DATA] = IST("9"),
+ [MQTT_FN_SUBSCRIPTION_IDENTIFIER] = IST("11"),
+ [MQTT_FN_SESSION_EXPIRY_INTERVAL] = IST("17"),
+ [MQTT_FN_ASSIGNED_CLIENT_IDENTIFIER] = IST("18"),
+ [MQTT_FN_SERVER_KEEPALIVE] = IST("19"),
+ [MQTT_FN_AUTHENTICATION_METHOD] = IST("21"),
+ [MQTT_FN_AUTHENTICATION_DATA] = IST("22"),
+ [MQTT_FN_REQUEST_PROBLEM_INFORMATION] = IST("23"),
+ [MQTT_FN_DELAY_INTERVAL] = IST("24"),
+ [MQTT_FN_REQUEST_RESPONSE_INFORMATION] = IST("25"),
+ [MQTT_FN_RESPONSE_INFORMATION] = IST("26"),
+ [MQTT_FN_SERVER_REFERENCE] = IST("28"),
+ [MQTT_FN_REASON_STRING] = IST("31"),
+ [MQTT_FN_RECEIVE_MAXIMUM] = IST("33"),
+ [MQTT_FN_TOPIC_ALIAS_MAXIMUM] = IST("34"),
+ [MQTT_FN_TOPIC_ALIAS] = IST("35"),
+ [MQTT_FN_MAXIMUM_QOS] = IST("36"),
+ [MQTT_FN_RETAIN_AVAILABLE] = IST("37"),
+ [MQTT_FN_USER_PROPERTY] = IST("38"),
+ [MQTT_FN_MAXIMUM_PACKET_SIZE] = IST("39"),
+ [MQTT_FN_WILDCARD_SUBSCRIPTION_AVAILABLE] = IST("40"),
+ [MQTT_FN_SUBSCRIPTION_IDENTIFIERS_AVAILABLE] = IST("41"),
+ [MQTT_FN_SHARED_SUBSCRIPTION_AVAILABLE] = IST("42"),
+};
+
+/* list of supported capturable field names for each MQTT control packet type */
+const uint64_t mqtt_fields_per_packet[MQTT_CPT_ENTRIES] = {
+ [MQTT_CPT_INVALID] = 0,
+
+ [MQTT_CPT_CONNECT] = MQTT_FN_BIT_PROTOCOL_NAME | MQTT_FN_BIT_PROTOCOL_VERSION |
+ MQTT_FN_BIT_FLAGS | MQTT_FN_BIT_KEEPALIVE |
+ MQTT_FN_BIT_SESSION_EXPIRY_INTERVAL | MQTT_FN_BIT_RECEIVE_MAXIMUM |
+ MQTT_FN_BIT_MAXIMUM_PACKET_SIZE | MQTT_FN_BIT_TOPIC_ALIAS_MAXIMUM |
+ MQTT_FN_BIT_REQUEST_RESPONSE_INFORMATION | MQTT_FN_BIT_REQUEST_PROBLEM_INFORMATION |
+ MQTT_FN_BIT_USER_PROPERTY | MQTT_FN_BIT_AUTHENTICATION_METHOD |
+ MQTT_FN_BIT_AUTHENTICATION_DATA | MQTT_FN_BIT_CLIENT_IDENTIFIER |
+ MQTT_FN_BIT_DELAY_INTERVAL | MQTT_FN_BIT_PAYLOAD_FORMAT_INDICATOR |
+ MQTT_FN_BIT_MESSAGE_EXPIRY_INTERVAL | MQTT_FN_BIT_CONTENT_TYPE |
+ MQTT_FN_BIT_RESPONSE_TOPIC | MQTT_FN_BIT_CORRELATION_DATA |
+ MQTT_FN_BIT_USER_PROPERTY | MQTT_FN_BIT_WILL_TOPIC |
+ MQTT_FN_BIT_WILL_PAYLOAD | MQTT_FN_BIT_USERNAME |
+ MQTT_FN_BIT_PASSWORD,
+
+ [MQTT_CPT_CONNACK] = MQTT_FN_BIT_FLAGS | MQTT_FN_BIT_PROTOCOL_VERSION |
+ MQTT_FN_BIT_REASON_CODE | MQTT_FN_BIT_SESSION_EXPIRY_INTERVAL |
+ MQTT_FN_BIT_RECEIVE_MAXIMUM | MQTT_FN_BIT_MAXIMUM_QOS |
+ MQTT_FN_BIT_RETAIN_AVAILABLE | MQTT_FN_BIT_MAXIMUM_PACKET_SIZE |
+ MQTT_FN_BIT_ASSIGNED_CLIENT_IDENTIFIER | MQTT_FN_BIT_TOPIC_ALIAS_MAXIMUM |
+ MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_WILDCARD_SUBSCRIPTION_AVAILABLE |
+ MQTT_FN_BIT_SUBSCRIPTION_IDENTIFIERS_AVAILABLE| MQTT_FN_BIT_SHARED_SUBSCRIPTION_AVAILABLE |
+ MQTT_FN_BIT_SERVER_KEEPALIVE | MQTT_FN_BIT_RESPONSE_INFORMATION |
+ MQTT_FN_BIT_SERVER_REFERENCE | MQTT_FN_BIT_USER_PROPERTY |
+ MQTT_FN_BIT_AUTHENTICATION_METHOD | MQTT_FN_BIT_AUTHENTICATION_DATA,
+
+ [MQTT_CPT_PUBLISH] = MQTT_FN_BIT_PAYLOAD_FORMAT_INDICATOR | MQTT_FN_BIT_MESSAGE_EXPIRY_INTERVAL |
+ MQTT_FN_BIT_CONTENT_TYPE | MQTT_FN_BIT_RESPONSE_TOPIC |
+ MQTT_FN_BIT_CORRELATION_DATA | MQTT_FN_BIT_SUBSCRIPTION_IDENTIFIER |
+ MQTT_FN_BIT_TOPIC_ALIAS | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_PUBACK] = MQTT_FN_BIT_REASON_CODE | MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_PUBREC] = MQTT_FN_BIT_REASON_CODE | MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_PUBREL] = MQTT_FN_BIT_REASON_CODE | MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_PUBCOMP] = MQTT_FN_BIT_REASON_CODE | MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_SUBSCRIBE] = MQTT_FN_BIT_SUBSCRIPTION_IDENTIFIER | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_SUBACK] = MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_UNSUBSCRIBE] = MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_UNSUBACK] = MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_PINGREQ] = 0,
+
+ [MQTT_CPT_PINGRESP] = 0,
+
+ [MQTT_CPT_DISCONNECT] = MQTT_FN_BIT_REASON_CODE | MQTT_FN_BIT_SESSION_EXPIRY_INTERVAL |
+ MQTT_FN_BIT_SERVER_REFERENCE | MQTT_FN_BIT_REASON_STRING |
+ MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_AUTH] = MQTT_FN_BIT_AUTHENTICATION_METHOD | MQTT_FN_BIT_AUTHENTICATION_DATA |
+ MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_USER_PROPERTY,
+};
+
+/* Checks the first byte of a message to read the fixed header and extract the
+ * packet type and flags. <parser> is supposed to point to the fix header byte.
+ *
+ * Fix header looks like:
+ * +-------+-----------+-----------+-----------+---------+----------+----------+---------+------------+
+ * | bit | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+ * +-------+-----------+-----------+-----------+---------+----------+----------+---------+------------+
+ * | field | MQTT Control Packet Type | Flags specific to each Control Packet type |
+ * +-------+---------------------------------------------+--------------------------------------------+
+ *
+ * On success, <ptk> is updated with the packet type and flags and the new parser
+ * state is returned. On error, IST_NULL is returned.
+ */
+static inline struct ist mqtt_read_fixed_hdr(struct ist parser, struct mqtt_pkt *pkt)
+{
+ uint8_t type = (uint8_t)*istptr(parser);
+ uint8_t ptype = (type & 0xF0) >> 4;
+ uint8_t flags = type & 0x0F;
+
+ if (ptype == MQTT_CPT_INVALID || ptype >= MQTT_CPT_ENTRIES || flags != mqtt_cpt_flags[ptype])
+ return IST_NULL;
+
+ pkt->fixed_hdr.type = ptype;
+ pkt->fixed_hdr.flags = flags;
+ return istnext(parser);
+}
+
+/* Reads a one byte integer. more information here :
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901007
+ *
+ * <parser> is supposed to point to the first byte of the integer. On success
+ * the integer is stored in <*i>, if provided, and the new parser state is returned. On
+ * error, IST_NULL is returned.
+*/
+static inline struct ist mqtt_read_1byte_int(struct ist parser, uint8_t *i)
+{
+ if (istlen(parser) < 1)
+ return IST_NULL;
+ if (i)
+ *i = (uint8_t)*istptr(parser);
+ parser = istnext(parser);
+ return parser;
+}
+
+/* Reads a two byte integer. more information here :
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901008
+ *
+ * <parser> is supposed to point to the first byte of the integer. On success
+ * the integer is stored in <*i>, if provided, and the new parser state is returned. On
+ * error, IST_NULL is returned.
+*/
+static inline struct ist mqtt_read_2byte_int(struct ist parser, uint16_t *i)
+{
+ if (istlen(parser) < 2)
+ return IST_NULL;
+ if (i) {
+ *i = (uint8_t)*istptr(parser) << 8;
+ *i += (uint8_t)*(istptr(parser) + 1);
+ }
+ parser = istadv(parser, 2);
+ return parser;
+}
+
+/* Reads a four byte integer. more information here :
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901009
+ *
+ * <parser> is supposed to point to the first byte of the integer. On success
+ * the integer is stored in <*i>, if provided, and the new parser state is returned. On
+ * error, IST_NULL is returned.
+*/
+static inline struct ist mqtt_read_4byte_int(struct ist parser, uint32_t *i)
+{
+ if (istlen(parser) < 4)
+ return IST_NULL;
+ if (i) {
+ *i = (uint8_t)*istptr(parser) << 24;
+ *i += (uint8_t)*(istptr(parser) + 1) << 16;
+ *i += (uint8_t)*(istptr(parser) + 2) << 8;
+ *i += (uint8_t)*(istptr(parser) + 3);
+ }
+ parser = istadv(parser, 4);
+ return parser;
+}
+
+/* Reads a variable byte integer. more information here :
+ * https://docs.oasis-open.org/mqtt/mqtt/v3.1.1/os/mqtt-v3.1.1-os.html#_Toc398718023
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901011
+ *
+ * It is encoded using a variable length encoding scheme which uses a single
+ * byte for values up to 127. Larger values are handled as follows. The least
+ * significant seven bits of each byte encode the data, and the most significant
+ * bit is used to indicate that there are following bytes in the representation.
+ * Thus each byte encodes 128 values and a "continuation bit".
+ *
+ * The maximum number of bytes in the Remaining Length field is four
+ * (MQTT_REMAINING_LENGHT_MAX_SIZE).
+ *
+ * <parser> is supposed to point to the first byte of the integer. On success
+ * the integer is stored in <*i> and the new parser state is returned. On
+ * error, IST_NULL is returned.
+ */
+static inline struct ist mqtt_read_varint(struct ist parser, uint32_t *i)
+{
+ int off, m;
+
+ off = m = 0;
+ if (i)
+ *i = 0;
+ for (off = 0; off < MQTT_REMAINING_LENGHT_MAX_SIZE && istlen(parser); off++) {
+ uint8_t byte = (uint8_t)*istptr(parser);
+
+ if (i) {
+ *i += (byte & 127) << m;
+ m += 7; /* preparing <m> for next byte */
+ }
+ parser = istnext(parser);
+
+ /* we read the latest byte for the remaining length field */
+ if (byte <= 127)
+ break;
+ }
+
+ if (off == MQTT_REMAINING_LENGHT_MAX_SIZE)
+ return IST_NULL;
+ return parser;
+}
+
+/* Reads a MQTT string. more information here :
+ * http://docs.oasis-open.org/mqtt/mqtt/v3.1.1/os/mqtt-v3.1.1-os.html#_Toc398718016
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901010
+ *
+ * In MQTT, strings are prefixed by their size, encoded over 2 bytes:
+ * byte 1: length MSB
+ * byte 2: length LSB
+ * byte 3: string
+ * ...
+ *
+ * string size is MSB * 256 + LSB
+ *
+ * <parser> is supposed to point to the first byte of the string. On success the
+ * string is stored in <*str>, if provided, and the new parser state is
+ * returned. On error, IST_NULL is returned.
+ */
+static inline struct ist mqtt_read_string(struct ist parser, struct ist *str)
+{
+ uint16_t len = 0;
+
+ /* read and compute the string length */
+ if (istlen(parser) < 2)
+ goto error;
+
+ parser = mqtt_read_2byte_int(parser, &len);
+ if (!isttest(parser) || istlen(parser) < len)
+ goto error;
+
+ if (str) {
+ str->ptr = istptr(parser);
+ str->len = len;
+ }
+
+ return istadv(parser, len);
+
+ error:
+ return IST_NULL;
+}
+
+/* Helper function to convert a unsigned integer to a string. The result is
+ * written in <buf>. On success, the written size is returned, otherwise, on
+ * error, 0 is returned.
+ */
+static inline size_t mqtt_uint2str(struct buffer *buf, uint32_t i)
+{
+ char *end;
+
+ end = ultoa_o(i, buf->area, buf->size);
+ if (!end)
+ return 0;
+ buf->data = end - buf->area;
+ return buf->data;
+}
+
+/* Extracts the value of a <fieldname_id> of type <type> from a given MQTT
+ * message <msg>. IST_NULL is returned if an error occurred while parsing or if
+ * the field could not be found. If more data are required, the message with a
+ * length set to 0 is returned. If the field is found, the response is returned
+ * as a struct ist.
+ */
+struct ist mqtt_field_value(struct ist msg, int type, int fieldname_id)
+{
+ struct buffer *trash = get_trash_chunk();
+ struct mqtt_pkt mpkt;
+ struct ist res;
+
+ switch (mqtt_validate_message(msg, &mpkt)) {
+ case MQTT_VALID_MESSAGE:
+ if (mpkt.fixed_hdr.type != type)
+ goto not_found_or_invalid;
+ break;
+ case MQTT_NEED_MORE_DATA:
+ goto need_more;
+ case MQTT_INVALID_MESSAGE:
+ goto not_found_or_invalid;
+ }
+
+ switch (type) {
+ case MQTT_CPT_CONNECT:
+ switch (fieldname_id) {
+ case MQTT_FN_FLAGS:
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.flags))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_PROTOCOL_NAME:
+ if (!istlen(mpkt.data.connect.var_hdr.protocol_name))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.var_hdr.protocol_name;
+ goto end;
+
+ case MQTT_FN_PROTOCOL_VERSION:
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.protocol_version))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_CLIENT_IDENTIFIER:
+ if (!istlen(mpkt.data.connect.payload.client_identifier))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.client_identifier;
+ goto end;
+
+ case MQTT_FN_WILL_TOPIC:
+ if (!istlen(mpkt.data.connect.payload.will_topic))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.will_topic;
+ goto end;
+
+ case MQTT_FN_WILL_PAYLOAD:
+ if (!istlen(mpkt.data.connect.payload.will_payload))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.will_payload;
+ goto end;
+
+ case MQTT_FN_USERNAME:
+ if (!istlen(mpkt.data.connect.payload.username))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.username;
+ goto end;
+
+ case MQTT_FN_PASSWORD:
+ if (!istlen(mpkt.data.connect.payload.password))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.password;
+ goto end;
+
+ case MQTT_FN_KEEPALIVE:
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.keepalive))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_PAYLOAD_FORMAT_INDICATOR:
+ if ((mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0) ||
+ !(mpkt.data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL))
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.payload.will_props.payload_format_indicator))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_MESSAGE_EXPIRY_INTERVAL:
+ if ((mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0) ||
+ !(mpkt.data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL))
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.payload.will_props.message_expiry_interval))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_CONTENT_TYPE:
+ if ((mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0) ||
+ !(mpkt.data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL))
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connect.payload.will_props.content_type))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.will_props.content_type;
+ goto end;
+
+ case MQTT_FN_RESPONSE_TOPIC:
+ if ((mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0) ||
+ !(mpkt.data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL))
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connect.payload.will_props.response_topic))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.will_props.response_topic;
+ goto end;
+
+ case MQTT_FN_CORRELATION_DATA:
+ if ((mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0) ||
+ !(mpkt.data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL))
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connect.payload.will_props.correlation_data))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.will_props.correlation_data;
+ goto end;
+
+ case MQTT_FN_SESSION_EXPIRY_INTERVAL:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.props.session_expiry_interval))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_AUTHENTICATION_METHOD:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connect.var_hdr.props.authentication_method))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.var_hdr.props.authentication_method;
+ goto end;
+
+ case MQTT_FN_AUTHENTICATION_DATA:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connect.var_hdr.props.authentication_data))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.var_hdr.props.authentication_data;
+ goto end;
+
+ case MQTT_FN_REQUEST_PROBLEM_INFORMATION:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.props.request_problem_information))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_DELAY_INTERVAL:
+ if ((mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0) ||
+ !(mpkt.data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL))
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.payload.will_props.delay_interval))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_REQUEST_RESPONSE_INFORMATION:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.props.request_response_information))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_RECEIVE_MAXIMUM:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.props.receive_maximum))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_TOPIC_ALIAS_MAXIMUM:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.props.topic_alias_maximum))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_MAXIMUM_PACKET_SIZE:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.props.maximum_packet_size))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ default:
+ goto not_found_or_invalid;
+ }
+ break;
+
+ case MQTT_CPT_CONNACK:
+ switch (fieldname_id) {
+ case MQTT_FN_FLAGS:
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.flags))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_REASON_CODE:
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.reason_code))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_PROTOCOL_VERSION:
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.protocol_version))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_SESSION_EXPIRY_INTERVAL:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.session_expiry_interval))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_ASSIGNED_CLIENT_IDENTIFIER:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connack.var_hdr.props.assigned_client_identifier))
+ goto not_found_or_invalid;
+ res = mpkt.data.connack.var_hdr.props.assigned_client_identifier;
+ goto end;
+
+ case MQTT_FN_SERVER_KEEPALIVE:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.server_keepalive))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_AUTHENTICATION_METHOD:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connack.var_hdr.props.authentication_method))
+ goto not_found_or_invalid;
+ res = mpkt.data.connack.var_hdr.props.authentication_method;
+ goto end;
+
+ case MQTT_FN_AUTHENTICATION_DATA:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connack.var_hdr.props.authentication_data))
+ goto not_found_or_invalid;
+ res = mpkt.data.connack.var_hdr.props.authentication_data;
+ goto end;
+
+ case MQTT_FN_RESPONSE_INFORMATION:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connack.var_hdr.props.response_information))
+ goto not_found_or_invalid;
+ res = mpkt.data.connack.var_hdr.props.response_information;
+ goto end;
+
+ case MQTT_FN_SERVER_REFERENCE:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connack.var_hdr.props.server_reference))
+ goto not_found_or_invalid;
+ res = mpkt.data.connack.var_hdr.props.server_reference;
+ goto end;
+
+ case MQTT_FN_REASON_STRING:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connack.var_hdr.props.reason_string))
+ goto not_found_or_invalid;
+ res = mpkt.data.connack.var_hdr.props.reason_string;
+ goto end;
+
+ case MQTT_FN_RECEIVE_MAXIMUM:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.receive_maximum))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_TOPIC_ALIAS_MAXIMUM:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.topic_alias_maximum))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_MAXIMUM_QOS:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.maximum_qos))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_RETAIN_AVAILABLE:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.retain_available))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_MAXIMUM_PACKET_SIZE:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.maximum_packet_size))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_WILDCARD_SUBSCRIPTION_AVAILABLE:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.wildcard_subscription_available))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_SUBSCRIPTION_IDENTIFIERS_AVAILABLE:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.subscription_identifiers_available))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_SHARED_SUBSCRIPTION_AVAILABLE:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.shared_subsription_available))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ default:
+ goto not_found_or_invalid;
+ }
+ break;
+
+ default:
+ goto not_found_or_invalid;
+ }
+
+ end:
+ return res;
+
+ need_more:
+ return ist2(istptr(msg), 0);
+
+ not_found_or_invalid:
+ return IST_NULL;
+}
+
+/* Parses a CONNECT packet :
+ * https://public.dhe.ibm.com/software/dw/webservices/ws-mqtt/mqtt-v3r1.html#connect
+ * https://docs.oasis-open.org/mqtt/mqtt/v3.1.1/os/mqtt-v3.1.1-os.html#_Toc398718028
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901033
+ *
+ * <parser> should point right after the MQTT fixed header. The remaining length
+ * was already checked, thus missing data is an error. On success, the result of
+ * the parsing is stored in <mpkt>.
+ *
+ * Returns:
+ * MQTT_INVALID_MESSAGE if the CONNECT message is invalid
+ * MQTT_VALID_MESSAGE if the CONNECT message looks valid
+ */
+static int mqtt_parse_connect(struct ist parser, struct mqtt_pkt *mpkt)
+{
+ /* The parser length is stored to be sure exactly consumed the announced
+ * remaining length. */
+ size_t orig_len = istlen(parser);
+ int ret = MQTT_INVALID_MESSAGE;
+
+ /*
+ * parsing variable header
+ */
+ /* read protocol_name */
+ parser = mqtt_read_string(parser, &mpkt->data.connect.var_hdr.protocol_name);
+ if (!isttest(parser) || !(isteqi(mpkt->data.connect.var_hdr.protocol_name, ist("MQTT")) || isteqi(mpkt->data.connect.var_hdr.protocol_name, ist("MQIsdp"))))
+ goto end;
+
+ /* read protocol_version */
+ parser = mqtt_read_1byte_int(parser, &mpkt->data.connect.var_hdr.protocol_version);
+ if (!isttest(parser))
+ goto end;
+ if (mpkt->data.connect.var_hdr.protocol_version != MQTT_VERSION_3_1 &&
+ mpkt->data.connect.var_hdr.protocol_version != MQTT_VERSION_3_1_1 &&
+ mpkt->data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto end;
+
+ /* read flags */
+ /* bit 1 is 'reserved' and must be set to 0 in CONNECT message flags */
+ parser = mqtt_read_1byte_int(parser, &mpkt->data.connect.var_hdr.flags);
+ if (!isttest(parser) || (mpkt->data.connect.var_hdr.flags & MQTT_CONNECT_FL_RESERVED))
+ goto end;
+
+ /* if WILL flag must be set to have WILL_QOS flag or WILL_RETAIN set */
+ if ((mpkt->data.connect.var_hdr.flags & (MQTT_CONNECT_FL_WILL|MQTT_CONNECT_FL_WILL_QOS|MQTT_CONNECT_FL_WILL_RETAIN)) == MQTT_CONNECT_FL_WILL_QOS)
+ goto end;
+
+ /* read keepalive */
+ parser = mqtt_read_2byte_int(parser, &mpkt->data.connect.var_hdr.keepalive);
+ if (!isttest(parser))
+ goto end;
+
+ /* read properties, only available in MQTT_VERSION_5_0 */
+ if (mpkt->data.connect.var_hdr.protocol_version == MQTT_VERSION_5_0) {
+ struct ist props;
+ unsigned int user_prop_idx = 0;
+ uint64_t fields = 0;
+ uint32_t plen = 0;
+
+ parser = mqtt_read_varint(parser, &plen);
+ if (!isttest(parser) || istlen(parser) < plen)
+ goto end;
+ props = ist2(istptr(parser), plen);
+ parser = istadv(parser, props.len);
+
+ while (istlen(props) > 0) {
+ switch (*istptr(props)) {
+ case MQTT_PROP_SESSION_EXPIRY_INTERVAL:
+ if (fields & MQTT_FN_BIT_SESSION_EXPIRY_INTERVAL)
+ goto end;
+ props = mqtt_read_4byte_int(istnext(props), &mpkt->data.connect.var_hdr.props.session_expiry_interval);
+ fields |= MQTT_FN_BIT_SESSION_EXPIRY_INTERVAL;
+ break;
+
+ case MQTT_PROP_RECEIVE_MAXIMUM:
+ if (fields & MQTT_FN_BIT_RECEIVE_MAXIMUM)
+ goto end;
+ props = mqtt_read_2byte_int(istnext(props), &mpkt->data.connect.var_hdr.props.receive_maximum);
+ /* cannot be 0 */
+ if (!mpkt->data.connect.var_hdr.props.receive_maximum)
+ goto end;
+ fields |= MQTT_FN_BIT_RECEIVE_MAXIMUM;
+ break;
+
+ case MQTT_PROP_MAXIMUM_PACKET_SIZE:
+ if (fields & MQTT_FN_BIT_MAXIMUM_PACKET_SIZE)
+ goto end;
+ props = mqtt_read_4byte_int(istnext(props), &mpkt->data.connect.var_hdr.props.maximum_packet_size);
+ /* cannot be 0 */
+ if (!mpkt->data.connect.var_hdr.props.maximum_packet_size)
+ goto end;
+ fields |= MQTT_FN_BIT_MAXIMUM_PACKET_SIZE;
+ break;
+
+ case MQTT_PROP_TOPIC_ALIAS_MAXIMUM:
+ if (fields & MQTT_FN_BIT_TOPIC_ALIAS)
+ goto end;
+ props = mqtt_read_2byte_int(istnext(props), &mpkt->data.connect.var_hdr.props.topic_alias_maximum);
+ fields |= MQTT_FN_BIT_TOPIC_ALIAS;
+ break;
+
+ case MQTT_PROP_REQUEST_RESPONSE_INFORMATION:
+ if (fields & MQTT_FN_BIT_REQUEST_RESPONSE_INFORMATION)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connect.var_hdr.props.request_response_information);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connect.var_hdr.props.request_response_information > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_REQUEST_RESPONSE_INFORMATION;
+ break;
+
+ case MQTT_PROP_REQUEST_PROBLEM_INFORMATION:
+ if (fields & MQTT_FN_BIT_REQUEST_PROBLEM_INFORMATION)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connect.var_hdr.props.request_problem_information);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connect.var_hdr.props.request_problem_information > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_REQUEST_PROBLEM_INFORMATION;
+ break;
+
+ case MQTT_PROP_USER_PROPERTIES:
+ /* if we reached MQTT_PROP_USER_PROPERTY_ENTRIES already, then
+ * we start writing over the first property */
+ if (user_prop_idx >= MQTT_PROP_USER_PROPERTY_ENTRIES)
+ user_prop_idx = 0;
+
+ /* read user property name and value */
+ props = mqtt_read_string(istnext(props), &mpkt->data.connect.var_hdr.props.user_props[user_prop_idx].name);
+ if (!isttest(props))
+ goto end;
+ props = mqtt_read_string(props, &mpkt->data.connect.var_hdr.props.user_props[user_prop_idx].value);
+ ++user_prop_idx;
+ break;
+
+ case MQTT_PROP_AUTHENTICATION_METHOD:
+ if (fields & MQTT_FN_BIT_AUTHENTICATION_METHOD)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connect.var_hdr.props.authentication_method);
+ fields |= MQTT_FN_BIT_AUTHENTICATION_METHOD;
+ break;
+
+ case MQTT_PROP_AUTHENTICATION_DATA:
+ if (fields & MQTT_FN_BIT_AUTHENTICATION_DATA)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connect.var_hdr.props.authentication_data);
+ fields |= MQTT_FN_BIT_AUTHENTICATION_DATA;
+ break;
+
+ default:
+ goto end;
+ }
+
+ if (!isttest(props))
+ goto end;
+ }
+ }
+
+ /* cannot have auth data without auth method */
+ if (!istlen(mpkt->data.connect.var_hdr.props.authentication_method) &&
+ istlen(mpkt->data.connect.var_hdr.props.authentication_data))
+ goto end;
+
+ /* parsing payload
+ *
+ * Content of payload is related to flags parsed above and the field order is pre-defined:
+ * Client Identifier, Will Topic, Will Message, User Name, Password
+ */
+ /* read client identifier */
+ parser = mqtt_read_string(parser, &mpkt->data.connect.payload.client_identifier);
+ if (!isttest(parser))
+ goto end;
+
+ /* read Will Properties, for MQTT v5 only
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901060
+ */
+ if ((mpkt->data.connect.var_hdr.protocol_version == MQTT_VERSION_5_0) &&
+ (mpkt->data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL)) {
+ struct ist props;
+ unsigned int user_prop_idx = 0;
+ uint64_t fields = 0;
+ uint32_t plen = 0;
+
+ parser = mqtt_read_varint(parser, &plen);
+ if (!isttest(parser) || istlen(parser) < plen)
+ goto end;
+ props = ist2(istptr(parser), plen);
+ parser = istadv(parser, props.len);
+
+ while (istlen(props) > 0) {
+ switch (*istptr(props)) {
+ case MQTT_PROP_WILL_DELAY_INTERVAL:
+ if (fields & MQTT_FN_BIT_DELAY_INTERVAL)
+ goto end;
+ props = mqtt_read_4byte_int(istnext(props), &mpkt->data.connect.payload.will_props.delay_interval);
+ fields |= MQTT_FN_BIT_DELAY_INTERVAL;
+ break;
+
+ case MQTT_PROP_PAYLOAD_FORMAT_INDICATOR:
+ if (fields & MQTT_FN_BIT_PAYLOAD_FORMAT_INDICATOR)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connect.payload.will_props.payload_format_indicator);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connect.payload.will_props.payload_format_indicator > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_PAYLOAD_FORMAT_INDICATOR;
+ break;
+
+ case MQTT_PROP_MESSAGE_EXPIRY_INTERVAL:
+ if (fields & MQTT_FN_BIT_MESSAGE_EXPIRY_INTERVAL)
+ goto end;
+ props = mqtt_read_4byte_int(istnext(props), &mpkt->data.connect.payload.will_props.message_expiry_interval);
+ fields |= MQTT_FN_BIT_MESSAGE_EXPIRY_INTERVAL;
+ break;
+
+ case MQTT_PROP_CONTENT_TYPE:
+ if (fields & MQTT_FN_BIT_CONTENT_TYPE)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connect.payload.will_props.content_type);
+ fields |= MQTT_FN_BIT_CONTENT_TYPE;
+ break;
+
+ case MQTT_PROP_RESPONSE_TOPIC:
+ if (fields & MQTT_FN_BIT_RESPONSE_TOPIC)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connect.payload.will_props.response_topic);
+ fields |= MQTT_FN_BIT_RESPONSE_TOPIC;
+ break;
+
+ case MQTT_PROP_CORRELATION_DATA:
+ if (fields & MQTT_FN_BIT_CORRELATION_DATA)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connect.payload.will_props.correlation_data);
+ fields |= MQTT_FN_BIT_CORRELATION_DATA;
+ break;
+
+ case MQTT_PROP_USER_PROPERTIES:
+ /* if we reached MQTT_PROP_USER_PROPERTY_ENTRIES already, then
+ * we start writing over the first property */
+ if (user_prop_idx >= MQTT_PROP_USER_PROPERTY_ENTRIES)
+ user_prop_idx = 0;
+
+ /* read user property name and value */
+ props = mqtt_read_string(istnext(props), &mpkt->data.connect.payload.will_props.user_props[user_prop_idx].name);
+ if (!isttest(props))
+ goto end;
+ props = mqtt_read_string(props, &mpkt->data.connect.payload.will_props.user_props[user_prop_idx].value);
+ ++user_prop_idx;
+ break;
+
+ default:
+ goto end;
+ }
+
+ if (!isttest(props))
+ goto end;
+ }
+ }
+
+ /* read Will Topic and Will Message (MQTT 3.1.1) or Payload (MQTT 5.0) */
+ if (mpkt->data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL) {
+ parser = mqtt_read_string(parser, &mpkt->data.connect.payload.will_topic);
+ if (!isttest(parser))
+ goto end;
+ parser = mqtt_read_string(parser, &mpkt->data.connect.payload.will_payload);
+ if (!isttest(parser))
+ goto end;
+ }
+
+ /* read User Name */
+ if (mpkt->data.connect.var_hdr.flags & MQTT_CONNECT_FL_USERNAME) {
+ parser = mqtt_read_string(parser, &mpkt->data.connect.payload.username);
+ if (!isttest(parser))
+ goto end;
+ }
+
+ /* read Password */
+ if (mpkt->data.connect.var_hdr.flags & MQTT_CONNECT_FL_PASSWORD) {
+ parser = mqtt_read_string(parser, &mpkt->data.connect.payload.password);
+ if (!isttest(parser))
+ goto end;
+ }
+
+ if ((orig_len - istlen(parser)) == mpkt->fixed_hdr.remaining_length)
+ ret = MQTT_VALID_MESSAGE;
+
+ end:
+ return ret;
+}
+
+/* Parses a CONNACK packet :
+ * https://docs.oasis-open.org/mqtt/mqtt/v3.1.1/os/mqtt-v3.1.1-os.html#_Toc398718033
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901074
+ *
+ * <parser> should point right after the MQTT fixed header. The remaining length
+ * was already checked, thus missing data is an error. On success, the result of
+ * the parsing is stored in <mpkt>.
+ *
+ * Returns:
+ * MQTT_INVALID_MESSAGE if the CONNECT message is invalid
+ * MQTT_VALID_MESSAGE if the CONNECT message looks valid
+ */
+static int mqtt_parse_connack(struct ist parser, struct mqtt_pkt *mpkt)
+{
+ /* The parser length is stored to be sure exactly consumed the announced
+ * remaining length. */
+ size_t orig_len = istlen(parser);
+ int ret = MQTT_INVALID_MESSAGE;
+
+ if (istlen(parser) < 2)
+ goto end;
+ else if (istlen(parser) == 2)
+ mpkt->data.connack.var_hdr.protocol_version = MQTT_VERSION_3_1_1;
+ else
+ mpkt->data.connack.var_hdr.protocol_version = MQTT_VERSION_5_0;
+
+ /*
+ * parsing variable header
+ */
+ /* read flags */
+ /* bits 7 to 1 on flags are reserved and must be 0 */
+ parser = mqtt_read_1byte_int(parser, &mpkt->data.connack.var_hdr.flags);
+ if (!isttest(parser) || (mpkt->data.connack.var_hdr.flags & 0xFE))
+ goto end;
+
+ /* read reason_code */
+ parser = mqtt_read_1byte_int(parser, &mpkt->data.connack.var_hdr.reason_code);
+ if (!isttest(parser))
+ goto end;
+
+ /* we can leave here for MQTT 3.1.1 */
+ if (mpkt->data.connack.var_hdr.protocol_version == MQTT_VERSION_3_1_1) {
+ if ((orig_len - istlen(parser)) == mpkt->fixed_hdr.remaining_length)
+ ret = MQTT_VALID_MESSAGE;
+ goto end;
+ }
+
+ /* read properties, only available in MQTT_VERSION_5_0 */
+ if (mpkt->data.connack.var_hdr.protocol_version == MQTT_VERSION_5_0) {
+ struct ist props;
+ unsigned int user_prop_idx = 0;
+ uint64_t fields = 0;
+ uint32_t plen = 0;
+
+ parser = mqtt_read_varint(parser, &plen);
+ if (!isttest(parser) || istlen(parser) < plen)
+ goto end;
+ props = ist2(istptr(parser), plen);
+ parser = istadv(parser, props.len);
+
+ while (istlen(props) > 0) {
+ switch (*istptr(props)) {
+ case MQTT_PROP_SESSION_EXPIRY_INTERVAL:
+ if (fields & MQTT_FN_BIT_SESSION_EXPIRY_INTERVAL)
+ goto end;
+ props = mqtt_read_4byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.session_expiry_interval);
+ fields |= MQTT_FN_BIT_SESSION_EXPIRY_INTERVAL;
+ break;
+
+ case MQTT_PROP_RECEIVE_MAXIMUM:
+ if (fields & MQTT_FN_BIT_RECEIVE_MAXIMUM)
+ goto end;
+ props = mqtt_read_2byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.receive_maximum);
+ /* cannot be 0 */
+ if (!mpkt->data.connack.var_hdr.props.receive_maximum)
+ goto end;
+ fields |= MQTT_FN_BIT_RECEIVE_MAXIMUM;
+ break;
+
+ case MQTT_PROP_MAXIMUM_QOS:
+ if (fields & MQTT_FN_BIT_MAXIMUM_QOS)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.maximum_qos);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connack.var_hdr.props.maximum_qos > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_MAXIMUM_QOS;
+ break;
+
+ case MQTT_PROP_RETAIN_AVAILABLE:
+ if (fields & MQTT_FN_BIT_RETAIN_AVAILABLE)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.retain_available);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connack.var_hdr.props.retain_available > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_RETAIN_AVAILABLE;
+ break;
+
+ case MQTT_PROP_MAXIMUM_PACKET_SIZE:
+ if (fields & MQTT_FN_BIT_MAXIMUM_PACKET_SIZE)
+ goto end;
+ props = mqtt_read_4byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.maximum_packet_size);
+ /* cannot be 0 */
+ if (!mpkt->data.connack.var_hdr.props.maximum_packet_size)
+ goto end;
+ fields |= MQTT_FN_BIT_MAXIMUM_PACKET_SIZE;
+ break;
+
+ case MQTT_PROP_ASSIGNED_CLIENT_IDENTIFIER:
+ if (fields & MQTT_FN_BIT_ASSIGNED_CLIENT_IDENTIFIER)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connack.var_hdr.props.assigned_client_identifier);
+ if (!istlen(mpkt->data.connack.var_hdr.props.assigned_client_identifier))
+ goto end;
+ fields |= MQTT_FN_BIT_ASSIGNED_CLIENT_IDENTIFIER;
+ break;
+
+ case MQTT_PROP_TOPIC_ALIAS_MAXIMUM:
+ if (fields & MQTT_FN_BIT_TOPIC_ALIAS_MAXIMUM)
+ goto end;
+ props = mqtt_read_2byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.topic_alias_maximum);
+ fields |= MQTT_FN_BIT_TOPIC_ALIAS_MAXIMUM;
+ break;
+
+ case MQTT_PROP_REASON_STRING:
+ if (fields & MQTT_FN_BIT_REASON_STRING)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connack.var_hdr.props.reason_string);
+ fields |= MQTT_FN_BIT_REASON_STRING;
+ break;
+
+ case MQTT_PROP_WILDCARD_SUBSCRIPTION_AVAILABLE:
+ if (fields & MQTT_FN_BIT_WILDCARD_SUBSCRIPTION_AVAILABLE)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.wildcard_subscription_available);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connack.var_hdr.props.wildcard_subscription_available > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_WILDCARD_SUBSCRIPTION_AVAILABLE;
+ break;
+
+ case MQTT_PROP_SUBSCRIPTION_IDENTIFIERS_AVAILABLE:
+ if (fields & MQTT_FN_BIT_SUBSCRIPTION_IDENTIFIER)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.subscription_identifiers_available);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connack.var_hdr.props.subscription_identifiers_available > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_SUBSCRIPTION_IDENTIFIER;
+ break;
+
+ case MQTT_PROP_SHARED_SUBSRIPTION_AVAILABLE:
+ if (fields & MQTT_FN_BIT_SHARED_SUBSCRIPTION_AVAILABLE)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.shared_subsription_available);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connack.var_hdr.props.shared_subsription_available > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_SHARED_SUBSCRIPTION_AVAILABLE;
+ break;
+
+ case MQTT_PROP_SERVER_KEEPALIVE:
+ if (fields & MQTT_FN_BIT_SERVER_KEEPALIVE)
+ goto end;
+ props = mqtt_read_2byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.server_keepalive);
+ fields |= MQTT_FN_BIT_SERVER_KEEPALIVE;
+ break;
+
+ case MQTT_PROP_RESPONSE_INFORMATION:
+ if (fields & MQTT_FN_BIT_RESPONSE_INFORMATION)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connack.var_hdr.props.response_information);
+ fields |= MQTT_FN_BIT_RESPONSE_INFORMATION;
+ break;
+
+ case MQTT_PROP_SERVER_REFERENCE:
+ if (fields & MQTT_FN_BIT_SERVER_REFERENCE)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connack.var_hdr.props.server_reference);
+ fields |= MQTT_FN_BIT_SERVER_REFERENCE;
+ break;
+
+ case MQTT_PROP_USER_PROPERTIES:
+ /* if we reached MQTT_PROP_USER_PROPERTY_ENTRIES already, then
+ * we start writing over the first property */
+ if (user_prop_idx >= MQTT_PROP_USER_PROPERTY_ENTRIES)
+ user_prop_idx = 0;
+
+ /* read user property name and value */
+ props = mqtt_read_string(istnext(props), &mpkt->data.connack.var_hdr.props.user_props[user_prop_idx].name);
+ if (!isttest(props))
+ goto end;
+ props = mqtt_read_string(props, &mpkt->data.connack.var_hdr.props.user_props[user_prop_idx].value);
+ ++user_prop_idx;
+ break;
+
+ case MQTT_PROP_AUTHENTICATION_METHOD:
+ if (fields & MQTT_FN_BIT_AUTHENTICATION_METHOD)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connack.var_hdr.props.authentication_method);
+ fields |= MQTT_FN_BIT_AUTHENTICATION_METHOD;
+ break;
+
+ case MQTT_PROP_AUTHENTICATION_DATA:
+ if (fields & MQTT_FN_BIT_AUTHENTICATION_DATA)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connack.var_hdr.props.authentication_data);
+ fields |= MQTT_FN_BIT_AUTHENTICATION_DATA;
+ break;
+
+ default:
+ return 0;
+ }
+
+ if (!isttest(props))
+ goto end;
+ }
+ }
+
+ if ((orig_len - istlen(parser)) == mpkt->fixed_hdr.remaining_length)
+ ret = MQTT_VALID_MESSAGE;
+ end:
+ return ret;
+}
+
+
+/* Parses and validates a MQTT packet
+ * https://docs.oasis-open.org/mqtt/mqtt/v3.1.1/os/mqtt-v3.1.1-os.html#_Toc398718028
+ *
+ * For now, due to HAProxy limitation, only validation of CONNECT and CONNACK packets
+ * are supported.
+ *
+ * - check FIXED_HDR
+ * - check remaining length
+ * - check variable headers and payload
+ *
+ * if <mpkt> is not NULL, then this structure will be filled up as well. An
+ * unsupported packet type is considered as invalid. It is not a problem for now
+ * because only the first packet on each side can be parsed (CONNECT for the
+ * client and CONNACK for the server).
+ *
+ * Returns:
+ * MQTT_INVALID_MESSAGE if the message is invalid
+ * MQTT_NEED_MORE_DATA if we need more data to fully validate the message
+ * MQTT_VALID_MESSAGE if the message looks valid
+ */
+int mqtt_validate_message(const struct ist msg, struct mqtt_pkt *mpkt)
+{
+ struct ist parser;
+ struct mqtt_pkt tmp_mpkt;
+ int ret = MQTT_INVALID_MESSAGE;
+
+ if (!mpkt)
+ mpkt = &tmp_mpkt;
+ memset(mpkt, 0, sizeof(*mpkt));
+
+ parser = msg;
+ if (istlen(msg) < MQTT_MIN_PKT_SIZE) {
+ ret = MQTT_NEED_MORE_DATA;
+ goto end;
+ }
+
+ /* parse the MQTT fixed header */
+ parser = mqtt_read_fixed_hdr(parser, mpkt);
+ if (!isttest(parser)) {
+ ret = MQTT_INVALID_MESSAGE;
+ goto end;
+ }
+
+ /* Now parsing "remaining length" field */
+ parser = mqtt_read_varint(parser, &mpkt->fixed_hdr.remaining_length);
+ if (!isttest(parser)) {
+ ret = MQTT_INVALID_MESSAGE;
+ goto end;
+ }
+
+ if (istlen(parser) < mpkt->fixed_hdr.remaining_length)
+ return MQTT_NEED_MORE_DATA;
+
+ /* Now parsing the variable header and payload, which is based on the packet type */
+ switch (mpkt->fixed_hdr.type) {
+ case MQTT_CPT_CONNECT:
+ ret = mqtt_parse_connect(parser, mpkt);
+ break;
+ case MQTT_CPT_CONNACK:
+ ret = mqtt_parse_connack(parser, mpkt);
+ break;
+ default:
+ break;
+ }
+
+ end:
+ return ret;
+}
diff --git a/src/mux_fcgi.c b/src/mux_fcgi.c
new file mode 100644
index 0000000..2c417dd
--- /dev/null
+++ b/src/mux_fcgi.c
@@ -0,0 +1,4318 @@
+/*
+ * FastCGI mux-demux for connections
+ *
+ * Copyright (C) 2019 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/ist.h>
+#include <import/eb32tree.h>
+#include <import/ebmbtree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/errors.h>
+#include <haproxy/fcgi-app.h>
+#include <haproxy/fcgi.h>
+#include <haproxy/h1.h>
+#include <haproxy/h1_htx.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/proxy.h>
+#include <haproxy/regex.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/session-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/trace.h>
+#include <haproxy/version.h>
+
+
+/* FCGI Connection flags (32 bits) */
+#define FCGI_CF_NONE 0x00000000
+
+/* Flags indicating why writing to the mux is blockes */
+#define FCGI_CF_MUX_MALLOC 0x00000001 /* mux is blocked on lack connection's mux buffer */
+#define FCGI_CF_MUX_MFULL 0x00000002 /* mux is blocked on connection's mux buffer full */
+#define FCGI_CF_MUX_BLOCK_ANY 0x00000003 /* mux is blocked on connection's mux buffer full */
+
+/* Flags indicating why writing to the demux is blocked.
+ * The first two ones directly affect the ability for the mux to receive data
+ * from the connection. The other ones affect the mux's ability to demux
+ * received data.
+ */
+#define FCGI_CF_DEM_DALLOC 0x00000004 /* demux blocked on lack of connection's demux buffer */
+#define FCGI_CF_DEM_DFULL 0x00000008 /* demux blocked on connection's demux buffer full */
+#define FCGI_CF_DEM_MROOM 0x00000010 /* demux blocked on lack of room in mux buffer */
+#define FCGI_CF_DEM_SALLOC 0x00000020 /* demux blocked on lack of stream's rx buffer */
+#define FCGI_CF_DEM_SFULL 0x00000040 /* demux blocked on stream request buffer full */
+#define FCGI_CF_DEM_TOOMANY 0x00000080 /* demux blocked waiting for some stream connectors to leave */
+#define FCGI_CF_DEM_BLOCK_ANY 0x000000F0 /* aggregate of the demux flags above except DALLOC/DFULL */
+
+/* Other flags */
+#define FCGI_CF_MPXS_CONNS 0x00000100 /* connection multiplexing is supported */
+#define FCGI_CF_ABRTS_SENT 0x00000200 /* a record ABORT was successfully sent to all active streams */
+#define FCGI_CF_ABRTS_FAILED 0x00000400 /* failed to abort processing of all streams */
+#define FCGI_CF_WAIT_FOR_HS 0x00000800 /* We did check that at least a stream was waiting for handshake */
+#define FCGI_CF_KEEP_CONN 0x00001000 /* HAProxy is responsible to close the connection */
+#define FCGI_CF_GET_VALUES 0x00002000 /* retrieve settings */
+
+/* FCGI connection state (fcgi_conn->state) */
+enum fcgi_conn_st {
+ FCGI_CS_INIT = 0, /* init done, waiting for sending GET_VALUES record */
+ FCGI_CS_SETTINGS, /* GET_VALUES sent, waiting for the GET_VALUES_RESULT record */
+ FCGI_CS_RECORD_H, /* GET_VALUES_RESULT received, waiting for a record header */
+ FCGI_CS_RECORD_D, /* Record header OK, waiting for a record data */
+ FCGI_CS_RECORD_P, /* Record processed, remains the padding */
+ FCGI_CS_CLOSED, /* abort requests if necessary and close the connection ASAP */
+ FCGI_CS_ENTRIES
+} __attribute__((packed));
+
+/* 32 buffers: one for the ring's root, rest for the mbuf itself */
+#define FCGI_C_MBUF_CNT 32
+
+/* Size for a record header (also size of empty record) */
+#define FCGI_RECORD_HEADER_SZ 8
+
+/* FCGI connection descriptor */
+struct fcgi_conn {
+ struct connection *conn;
+
+ enum fcgi_conn_st state; /* FCGI connection state */
+ int16_t max_id; /* highest ID known on this connection, <0 before mgmt records */
+ uint32_t streams_limit; /* maximum number of concurrent streams the peer supports */
+ uint32_t flags; /* Connection flags: FCGI_CF_* */
+
+ int16_t dsi; /* dmux stream ID (<0 = idle ) */
+ uint16_t drl; /* demux record length (if dsi >= 0) */
+ uint8_t drt; /* demux record type (if dsi >= 0) */
+ uint8_t drp; /* demux record padding (if dsi >= 0) */
+
+ struct buffer dbuf; /* demux buffer */
+ struct buffer mbuf[FCGI_C_MBUF_CNT]; /* mux buffers (ring) */
+
+ int timeout; /* idle timeout duration in ticks */
+ int shut_timeout; /* idle timeout duration in ticks after shutdown */
+ unsigned int nb_streams; /* number of streams in the tree */
+ unsigned int nb_sc; /* number of attached stream connectors */
+ unsigned int nb_reserved; /* number of reserved streams */
+ unsigned int stream_cnt; /* total number of streams seen */
+
+ struct proxy *proxy; /* the proxy this connection was created for */
+ struct fcgi_app *app; /* FCGI application used by this mux */
+ struct task *task; /* timeout management task */
+ struct eb_root streams_by_id; /* all active streams by their ID */
+
+ struct list send_list; /* list of blocked streams requesting to send */
+
+ struct buffer_wait buf_wait; /* Wait list for buffer allocation */
+ struct wait_event wait_event; /* To be used if we're waiting for I/Os */
+};
+
+
+/* FCGI stream state, in fcgi_strm->state */
+enum fcgi_strm_st {
+ FCGI_SS_IDLE = 0,
+ FCGI_SS_OPEN,
+ FCGI_SS_HREM, // half-closed(remote)
+ FCGI_SS_HLOC, // half-closed(local)
+ FCGI_SS_ERROR,
+ FCGI_SS_CLOSED,
+ FCGI_SS_ENTRIES
+} __attribute__((packed));
+
+
+/* FCGI stream flags (32 bits) */
+#define FCGI_SF_NONE 0x00000000
+#define FCGI_SF_ES_RCVD 0x00000001 /* end-of-stream received (empty STDOUT or EDN_REQUEST record) */
+#define FCGI_SF_ES_SENT 0x00000002 /* end-of-stream sent (empty STDIN record) */
+#define FCGI_SF_EP_SENT 0x00000004 /* end-of-param sent (empty PARAMS record) */
+#define FCGI_SF_ABRT_SENT 0x00000008 /* abort sent (ABORT_REQUEST record) */
+
+/* Stream flags indicating the reason the stream is blocked */
+#define FCGI_SF_BLK_MBUSY 0x00000010 /* blocked waiting for mux access (transient) */
+#define FCGI_SF_BLK_MROOM 0x00000020 /* blocked waiting for room in the mux */
+#define FCGI_SF_BLK_ANY 0x00000030 /* any of the reasons above */
+
+#define FCGI_SF_BEGIN_SENT 0x00000100 /* a BEGIN_REQUEST record was sent for this stream */
+#define FCGI_SF_OUTGOING_DATA 0x00000200 /* set whenever we've seen outgoing data */
+#define FCGI_SF_NOTIFIED 0x00000400 /* a paused stream was notified to try to send again */
+
+#define FCGI_SF_WANT_SHUTR 0x00001000 /* a stream couldn't shutr() (mux full/busy) */
+#define FCGI_SF_WANT_SHUTW 0x00002000 /* a stream couldn't shutw() (mux full/busy) */
+
+
+/* FCGI stream descriptor */
+struct fcgi_strm {
+ struct sedesc *sd;
+ struct session *sess;
+ struct fcgi_conn *fconn;
+
+ int32_t id; /* stream ID */
+
+ uint32_t flags; /* Connection flags: FCGI_SF_* */
+ enum fcgi_strm_st state; /* FCGI stream state */
+ int proto_status; /* FCGI_PS_* */
+
+ struct h1m h1m; /* response parser state for H1 */
+
+ struct buffer rxbuf; /* receive buffer, always valid (buf_empty or real buffer) */
+
+ struct eb32_node by_id; /* place in fcgi_conn's streams_by_id */
+ struct wait_event *subs; /* Address of the wait_event the stream connector associated is waiting on */
+ struct list send_list; /* To be used when adding in fcgi_conn->send_list */
+ struct tasklet *shut_tl; /* deferred shutdown tasklet, to retry to close after we failed to by lack of space */
+};
+
+/* Flags representing all default FCGI parameters */
+#define FCGI_SP_CGI_GATEWAY 0x00000001
+#define FCGI_SP_DOC_ROOT 0x00000002
+#define FCGI_SP_SCRIPT_NAME 0x00000004
+#define FCGI_SP_PATH_INFO 0x00000008
+#define FCGI_SP_REQ_URI 0x00000010
+#define FCGI_SP_REQ_METH 0x00000020
+#define FCGI_SP_REQ_QS 0x00000040
+#define FCGI_SP_SRV_PORT 0x00000080
+#define FCGI_SP_SRV_PROTO 0x00000100
+#define FCGI_SP_SRV_NAME 0x00000200
+#define FCGI_SP_REM_ADDR 0x00000400
+#define FCGI_SP_REM_PORT 0x00000800
+#define FCGI_SP_SCRIPT_FILE 0x00001000
+#define FCGI_SP_PATH_TRANS 0x00002000
+#define FCGI_SP_CONT_LEN 0x00004000
+#define FCGI_SP_HTTPS 0x00008000
+#define FCGI_SP_SRV_SOFT 0x00010000
+#define FCGI_SP_MASK 0x0001FFFF
+#define FCGI_SP_URI_MASK (FCGI_SP_SCRIPT_NAME|FCGI_SP_PATH_INFO|FCGI_SP_REQ_QS)
+
+/* FCGI parameters used when PARAMS record is sent */
+struct fcgi_strm_params {
+ uint32_t mask;
+ struct ist docroot;
+ struct ist scriptname;
+ struct ist pathinfo;
+ struct ist meth;
+ struct ist uri;
+ struct ist vsn;
+ struct ist qs;
+ struct ist srv_name;
+ struct ist srv_port;
+ struct ist rem_addr;
+ struct ist rem_port;
+ struct ist cont_len;
+ struct ist srv_soft;
+ int https;
+ struct buffer *p;
+};
+
+/* Maximum amount of data we're OK with re-aligning for buffer optimizations */
+#define MAX_DATA_REALIGN 1024
+
+/* trace source and events */
+static void fcgi_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+/* The event representation is split like this :
+ * fconn - internal FCGI connection
+ * fstrm - internal FCGI stream
+ * strm - application layer
+ * rx - data receipt
+ * tx - data transmission
+ * rsp - response parsing
+ */
+static const struct trace_event fcgi_trace_events[] = {
+#define FCGI_EV_FCONN_NEW (1ULL << 0)
+ { .mask = FCGI_EV_FCONN_NEW, .name = "fconn_new", .desc = "new FCGI connection" },
+#define FCGI_EV_FCONN_RECV (1ULL << 1)
+ { .mask = FCGI_EV_FCONN_RECV, .name = "fconn_recv", .desc = "Rx on FCGI connection" },
+#define FCGI_EV_FCONN_SEND (1ULL << 2)
+ { .mask = FCGI_EV_FCONN_SEND, .name = "fconn_send", .desc = "Tx on FCGI connection" },
+#define FCGI_EV_FCONN_BLK (1ULL << 3)
+ { .mask = FCGI_EV_FCONN_BLK, .name = "fconn_blk", .desc = "FCGI connection blocked" },
+#define FCGI_EV_FCONN_WAKE (1ULL << 4)
+ { .mask = FCGI_EV_FCONN_WAKE, .name = "fconn_wake", .desc = "FCGI connection woken up" },
+#define FCGI_EV_FCONN_END (1ULL << 5)
+ { .mask = FCGI_EV_FCONN_END, .name = "fconn_end", .desc = "FCGI connection terminated" },
+#define FCGI_EV_FCONN_ERR (1ULL << 6)
+ { .mask = FCGI_EV_FCONN_ERR, .name = "fconn_err", .desc = "error on FCGI connection" },
+
+#define FCGI_EV_RX_FHDR (1ULL << 7)
+ { .mask = FCGI_EV_RX_FHDR, .name = "rx_fhdr", .desc = "FCGI record header received" },
+#define FCGI_EV_RX_RECORD (1ULL << 8)
+ { .mask = FCGI_EV_RX_RECORD, .name = "rx_record", .desc = "receipt of any FCGI record" },
+#define FCGI_EV_RX_EOI (1ULL << 9)
+ { .mask = FCGI_EV_RX_EOI, .name = "rx_eoi", .desc = "receipt of end of FCGI input" },
+#define FCGI_EV_RX_GETVAL (1ULL << 10)
+ { .mask = FCGI_EV_RX_GETVAL, .name = "rx_get_values", .desc = "receipt of FCGI GET_VALUES_RESULT record" },
+#define FCGI_EV_RX_STDOUT (1ULL << 11)
+ { .mask = FCGI_EV_RX_STDOUT, .name = "rx_stdout", .desc = "receipt of FCGI STDOUT record" },
+#define FCGI_EV_RX_STDERR (1ULL << 12)
+ { .mask = FCGI_EV_RX_STDERR, .name = "rx_stderr", .desc = "receipt of FCGI STDERR record" },
+#define FCGI_EV_RX_ENDREQ (1ULL << 13)
+ { .mask = FCGI_EV_RX_ENDREQ, .name = "rx_end_req", .desc = "receipt of FCGI END_REQUEST record" },
+
+#define FCGI_EV_TX_RECORD (1ULL << 14)
+ { .mask = FCGI_EV_TX_RECORD, .name = "tx_record", .desc = "transmission of any FCGI record" },
+#define FCGI_EV_TX_EOI (1ULL << 15)
+ { .mask = FCGI_EV_TX_EOI, .name = "tx_eoi", .desc = "transmission of FCGI end of input" },
+#define FCGI_EV_TX_BEGREQ (1ULL << 16)
+ { .mask = FCGI_EV_TX_BEGREQ, .name = "tx_begin_request", .desc = "transmission of FCGI BEGIN_REQUEST record" },
+#define FCGI_EV_TX_GETVAL (1ULL << 17)
+ { .mask = FCGI_EV_TX_GETVAL, .name = "tx_get_values", .desc = "transmission of FCGI GET_VALUES record" },
+#define FCGI_EV_TX_PARAMS (1ULL << 18)
+ { .mask = FCGI_EV_TX_PARAMS, .name = "tx_params", .desc = "transmission of FCGI PARAMS record" },
+#define FCGI_EV_TX_STDIN (1ULL << 19)
+ { .mask = FCGI_EV_TX_STDIN, .name = "tx_stding", .desc = "transmission of FCGI STDIN record" },
+#define FCGI_EV_TX_ABORT (1ULL << 20)
+ { .mask = FCGI_EV_TX_ABORT, .name = "tx_abort", .desc = "transmission of FCGI ABORT record" },
+
+#define FCGI_EV_RSP_DATA (1ULL << 21)
+ { .mask = FCGI_EV_RSP_DATA, .name = "rsp_data", .desc = "parse any data of H1 response" },
+#define FCGI_EV_RSP_EOM (1ULL << 22)
+ { .mask = FCGI_EV_RSP_EOM, .name = "rsp_eom", .desc = "reach the end of message of H1 response" },
+#define FCGI_EV_RSP_HDRS (1ULL << 23)
+ { .mask = FCGI_EV_RSP_HDRS, .name = "rsp_headers", .desc = "parse headers of H1 response" },
+#define FCGI_EV_RSP_BODY (1ULL << 24)
+ { .mask = FCGI_EV_RSP_BODY, .name = "rsp_body", .desc = "parse body part of H1 response" },
+#define FCGI_EV_RSP_TLRS (1ULL << 25)
+ { .mask = FCGI_EV_RSP_TLRS, .name = "rsp_trailerus", .desc = "parse trailers of H1 response" },
+
+#define FCGI_EV_FSTRM_NEW (1ULL << 26)
+ { .mask = FCGI_EV_FSTRM_NEW, .name = "fstrm_new", .desc = "new FCGI stream" },
+#define FCGI_EV_FSTRM_BLK (1ULL << 27)
+ { .mask = FCGI_EV_FSTRM_BLK, .name = "fstrm_blk", .desc = "FCGI stream blocked" },
+#define FCGI_EV_FSTRM_END (1ULL << 28)
+ { .mask = FCGI_EV_FSTRM_END, .name = "fstrm_end", .desc = "FCGI stream terminated" },
+#define FCGI_EV_FSTRM_ERR (1ULL << 29)
+ { .mask = FCGI_EV_FSTRM_ERR, .name = "fstrm_err", .desc = "error on FCGI stream" },
+
+#define FCGI_EV_STRM_NEW (1ULL << 30)
+ { .mask = FCGI_EV_STRM_NEW, .name = "strm_new", .desc = "app-layer stream creation" },
+#define FCGI_EV_STRM_RECV (1ULL << 31)
+ { .mask = FCGI_EV_STRM_RECV, .name = "strm_recv", .desc = "receiving data for stream" },
+#define FCGI_EV_STRM_SEND (1ULL << 32)
+ { .mask = FCGI_EV_STRM_SEND, .name = "strm_send", .desc = "sending data for stream" },
+#define FCGI_EV_STRM_FULL (1ULL << 33)
+ { .mask = FCGI_EV_STRM_FULL, .name = "strm_full", .desc = "stream buffer full" },
+#define FCGI_EV_STRM_WAKE (1ULL << 34)
+ { .mask = FCGI_EV_STRM_WAKE, .name = "strm_wake", .desc = "stream woken up" },
+#define FCGI_EV_STRM_SHUT (1ULL << 35)
+ { .mask = FCGI_EV_STRM_SHUT, .name = "strm_shut", .desc = "stream shutdown" },
+#define FCGI_EV_STRM_END (1ULL << 36)
+ { .mask = FCGI_EV_STRM_END, .name = "strm_end", .desc = "detaching app-layer stream" },
+#define FCGI_EV_STRM_ERR (1ULL << 37)
+ { .mask = FCGI_EV_STRM_ERR, .name = "strm_err", .desc = "stream error" },
+
+ { }
+};
+
+static const struct name_desc fcgi_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the connection */ },
+ /* arg2 */ { .name="fstrm", .desc="FCGI stream" },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+
+static const struct name_desc fcgi_trace_decoding[] = {
+#define FCGI_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define FCGI_VERB_MINIMAL 2
+ { .name="minimal", .desc="report only fconn/fstrm state and flags, no real decoding" },
+#define FCGI_VERB_SIMPLE 3
+ { .name="simple", .desc="add request/response status line or htx info when available" },
+#define FCGI_VERB_ADVANCED 4
+ { .name="advanced", .desc="add header fields or record decoding when available" },
+#define FCGI_VERB_COMPLETE 5
+ { .name="complete", .desc="add full data dump when available" },
+ { /* end */ }
+};
+
+static struct trace_source trace_fcgi __read_mostly = {
+ .name = IST("fcgi"),
+ .desc = "FastCGI multiplexer",
+ .arg_def = TRC_ARG1_CONN, // TRACE()'s first argument is always a connection
+ .default_cb = fcgi_trace,
+ .known_events = fcgi_trace_events,
+ .lockon_args = fcgi_trace_lockon_args,
+ .decoding = fcgi_trace_decoding,
+ .report_events = ~0, // report everything by default
+};
+
+#define TRACE_SOURCE &trace_fcgi
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+/* FCGI connection and stream pools */
+DECLARE_STATIC_POOL(pool_head_fcgi_conn, "fcgi_conn", sizeof(struct fcgi_conn));
+DECLARE_STATIC_POOL(pool_head_fcgi_strm, "fcgi_strm", sizeof(struct fcgi_strm));
+
+struct task *fcgi_timeout_task(struct task *t, void *context, unsigned int state);
+static int fcgi_process(struct fcgi_conn *fconn);
+/* fcgi_io_cb is exported to see it resolved in "show fd" */
+struct task *fcgi_io_cb(struct task *t, void *ctx, unsigned int state);
+static inline struct fcgi_strm *fcgi_conn_st_by_id(struct fcgi_conn *fconn, int id);
+struct task *fcgi_deferred_shut(struct task *t, void *ctx, unsigned int state);
+static struct fcgi_strm *fcgi_stconn_new(struct fcgi_conn *fconn, struct stconn *sc, struct session *sess);
+static void fcgi_strm_notify_recv(struct fcgi_strm *fstrm);
+static void fcgi_strm_notify_send(struct fcgi_strm *fstrm);
+static void fcgi_strm_alert(struct fcgi_strm *fstrm);
+static int fcgi_strm_send_abort(struct fcgi_conn *fconn, struct fcgi_strm *fstrm);
+
+/* a dummy closed endpoint */
+static const struct sedesc closed_ep = {
+ .sc = NULL,
+ .flags = SE_FL_DETACHED,
+};
+
+/* a dmumy management stream */
+static const struct fcgi_strm *fcgi_mgmt_stream = &(const struct fcgi_strm){
+ .sd = (struct sedesc*)&closed_ep,
+ .fconn = NULL,
+ .state = FCGI_SS_CLOSED,
+ .flags = FCGI_SF_NONE,
+ .id = 0,
+};
+
+/* and a dummy idle stream for use with any unknown stream */
+static const struct fcgi_strm *fcgi_unknown_stream = &(const struct fcgi_strm){
+ .sd = (struct sedesc*)&closed_ep,
+ .fconn = NULL,
+ .state = FCGI_SS_IDLE,
+ .flags = FCGI_SF_NONE,
+ .id = 0,
+};
+
+/* returns a fconn state as an abbreviated 3-letter string, or "???" if unknown */
+static inline const char *fconn_st_to_str(enum fcgi_conn_st st)
+{
+ switch (st) {
+ case FCGI_CS_INIT : return "INI";
+ case FCGI_CS_SETTINGS : return "STG";
+ case FCGI_CS_RECORD_H : return "RDH";
+ case FCGI_CS_RECORD_D : return "RDD";
+ case FCGI_CS_RECORD_P : return "RDP";
+ case FCGI_CS_CLOSED : return "CLO";
+ default : return "???";
+ }
+}
+
+/* returns a fstrm state as an abbreviated 3-letter string, or "???" if unknown */
+static inline const char *fstrm_st_to_str(enum fcgi_strm_st st)
+{
+ switch (st) {
+ case FCGI_SS_IDLE : return "IDL";
+ case FCGI_SS_OPEN : return "OPN";
+ case FCGI_SS_HREM : return "RCL";
+ case FCGI_SS_HLOC : return "HCL";
+ case FCGI_SS_ERROR : return "ERR";
+ case FCGI_SS_CLOSED : return "CLO";
+ default : return "???";
+ }
+}
+
+/* returns the stconn associated to the FCGI stream */
+static forceinline struct stconn *fcgi_strm_sc(const struct fcgi_strm *fstrm)
+{
+ return fstrm->sd->sc;
+}
+
+
+/* the FCGI traces always expect that arg1, if non-null, is of type connection
+ * (from which we can derive fconn), that arg2, if non-null, is of type fstrm,
+ * and that arg3, if non-null, is a htx for rx/tx headers.
+ */
+static void fcgi_trace(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct connection *conn = a1;
+ struct fcgi_conn *fconn = conn ? conn->ctx : NULL;
+ const struct fcgi_strm *fstrm = a2;
+ const struct htx *htx = a3;
+ const size_t *val = a4;
+
+ if (!fconn)
+ fconn = (fstrm ? fstrm->fconn : NULL);
+
+ if (!fconn || src->verbosity < FCGI_VERB_CLEAN)
+ return;
+
+ /* Display the response state if fstrm is defined */
+ if (fstrm)
+ chunk_appendf(&trace_buf, " [rsp:%s]", h1m_state_str(fstrm->h1m.state));
+
+ if (src->verbosity == FCGI_VERB_CLEAN)
+ return;
+
+ /* Display the value to the 4th argument (level > STATE) */
+ if (src->level > TRACE_LEVEL_STATE && val)
+ chunk_appendf(&trace_buf, " - VAL=%lu", (long)*val);
+
+ /* Display status-line if possible (verbosity > MINIMAL) */
+ if (src->verbosity > FCGI_VERB_MINIMAL && htx && htx_nbblks(htx)) {
+ const struct htx_blk *blk = __htx_get_head_blk(htx);
+ const struct htx_sl *sl = htx_get_blk_ptr(htx, blk);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_REQ_SL || type == HTX_BLK_RES_SL)
+ chunk_appendf(&trace_buf, " - \"%.*s %.*s %.*s\"",
+ HTX_SL_P1_LEN(sl), HTX_SL_P1_PTR(sl),
+ HTX_SL_P2_LEN(sl), HTX_SL_P2_PTR(sl),
+ HTX_SL_P3_LEN(sl), HTX_SL_P3_PTR(sl));
+ }
+
+ /* Display fconn info and, if defined, fstrm info */
+ chunk_appendf(&trace_buf, " - fconn=%p(%s,0x%08x)", fconn, fconn_st_to_str(fconn->state), fconn->flags);
+ if (fstrm)
+ chunk_appendf(&trace_buf, " fstrm=%p(%d,%s,0x%08x)", fstrm, fstrm->id, fstrm_st_to_str(fstrm->state), fstrm->flags);
+
+ if (!fstrm || fstrm->id <= 0)
+ chunk_appendf(&trace_buf, " dsi=%d", fconn->dsi);
+ if (fconn->dsi >= 0 && (mask & FCGI_EV_RX_FHDR))
+ chunk_appendf(&trace_buf, " drt=%s", fcgi_rt_str(fconn->drt));
+
+ if (src->verbosity == FCGI_VERB_MINIMAL)
+ return;
+
+ /* Display mbuf and dbuf info (level > USER & verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_USER) {
+ if (src->verbosity == FCGI_VERB_COMPLETE ||
+ (src->verbosity == FCGI_VERB_ADVANCED && (mask & (FCGI_EV_FCONN_RECV|FCGI_EV_RX_RECORD))))
+ chunk_appendf(&trace_buf, " dbuf=%u@%p+%u/%u",
+ (unsigned int)b_data(&fconn->dbuf), b_orig(&fconn->dbuf),
+ (unsigned int)b_head_ofs(&fconn->dbuf), (unsigned int)b_size(&fconn->dbuf));
+ if (src->verbosity == FCGI_VERB_COMPLETE ||
+ (src->verbosity == FCGI_VERB_ADVANCED && (mask & (FCGI_EV_FCONN_SEND|FCGI_EV_TX_RECORD)))) {
+ struct buffer *hmbuf = br_head(fconn->mbuf);
+ struct buffer *tmbuf = br_tail(fconn->mbuf);
+
+ chunk_appendf(&trace_buf, " .mbuf=[%u..%u|%u],h=[%u@%p+%u/%u],t=[%u@%p+%u/%u]",
+ br_head_idx(fconn->mbuf), br_tail_idx(fconn->mbuf), br_size(fconn->mbuf),
+ (unsigned int)b_data(hmbuf), b_orig(hmbuf),
+ (unsigned int)b_head_ofs(hmbuf), (unsigned int)b_size(hmbuf),
+ (unsigned int)b_data(tmbuf), b_orig(tmbuf),
+ (unsigned int)b_head_ofs(tmbuf), (unsigned int)b_size(tmbuf));
+ }
+
+ if (fstrm && (src->verbosity == FCGI_VERB_COMPLETE ||
+ (src->verbosity == FCGI_VERB_ADVANCED && (mask & (FCGI_EV_STRM_RECV|FCGI_EV_RSP_DATA)))))
+ chunk_appendf(&trace_buf, " rxbuf=%u@%p+%u/%u",
+ (unsigned int)b_data(&fstrm->rxbuf), b_orig(&fstrm->rxbuf),
+ (unsigned int)b_head_ofs(&fstrm->rxbuf), (unsigned int)b_size(&fstrm->rxbuf));
+ }
+
+ /* Display htx info if defined (level > USER) */
+ if (src->level > TRACE_LEVEL_USER && htx) {
+ int full = 0;
+
+ /* Full htx info (level > STATE && verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_STATE) {
+ if (src->verbosity == FCGI_VERB_COMPLETE)
+ full = 1;
+ else if (src->verbosity == FCGI_VERB_ADVANCED && (mask & (FCGI_EV_RSP_HDRS|FCGI_EV_TX_PARAMS)))
+ full = 1;
+ }
+
+ chunk_memcat(&trace_buf, "\n\t", 2);
+ htx_dump(&trace_buf, htx, full);
+ }
+}
+
+/*****************************************************/
+/* functions below are for dynamic buffer management */
+/*****************************************************/
+
+/* Indicates whether or not the we may call the fcgi_recv() function to attempt
+ * to receive data into the buffer and/or demux pending data. The condition is
+ * a bit complex due to some API limits for now. The rules are the following :
+ * - if an error or a shutdown was detected on the connection and the buffer
+ * is empty, we must not attempt to receive
+ * - if the demux buf failed to be allocated, we must not try to receive and
+ * we know there is nothing pending
+ * - if no flag indicates a blocking condition, we may attempt to receive,
+ * regardless of whether the demux buffer is full or not, so that only
+ * de demux part decides whether or not to block. This is needed because
+ * the connection API indeed prevents us from re-enabling receipt that is
+ * already enabled in a polled state, so we must always immediately stop
+ * as soon as the demux can't proceed so as never to hit an end of read
+ * with data pending in the buffers.
+ * - otherwise must may not attempt
+ */
+static inline int fcgi_recv_allowed(const struct fcgi_conn *fconn)
+{
+ if (b_data(&fconn->dbuf) == 0 &&
+ (fconn->state == FCGI_CS_CLOSED ||
+ fconn->conn->flags & CO_FL_ERROR ||
+ conn_xprt_read0_pending(fconn->conn)))
+ return 0;
+
+ if (!(fconn->flags & FCGI_CF_DEM_DALLOC) &&
+ !(fconn->flags & FCGI_CF_DEM_BLOCK_ANY))
+ return 1;
+
+ return 0;
+}
+
+/* Restarts reading on the connection if it was not enabled */
+static inline void fcgi_conn_restart_reading(const struct fcgi_conn *fconn, int consider_buffer)
+{
+ if (!fcgi_recv_allowed(fconn))
+ return;
+ if ((!consider_buffer || !b_data(&fconn->dbuf)) &&
+ (fconn->wait_event.events & SUB_RETRY_RECV))
+ return;
+ tasklet_wakeup(fconn->wait_event.tasklet);
+}
+
+
+/* Tries to grab a buffer and to re-enable processing on mux <target>. The
+ * fcgi_conn flags are used to figure what buffer was requested. It returns 1 if
+ * the allocation succeeds, in which case the connection is woken up, or 0 if
+ * it's impossible to wake up and we prefer to be woken up later.
+ */
+static int fcgi_buf_available(void *target)
+{
+ struct fcgi_conn *fconn = target;
+ struct fcgi_strm *fstrm;
+
+ if ((fconn->flags & FCGI_CF_DEM_DALLOC) && b_alloc(&fconn->dbuf)) {
+ TRACE_STATE("unblocking fconn, dbuf allocated", FCGI_EV_FCONN_RECV|FCGI_EV_FCONN_BLK|FCGI_EV_FCONN_WAKE, fconn->conn);
+ fconn->flags &= ~FCGI_CF_DEM_DALLOC;
+ fcgi_conn_restart_reading(fconn, 1);
+ return 1;
+ }
+
+ if ((fconn->flags & FCGI_CF_MUX_MALLOC) && b_alloc(br_tail(fconn->mbuf))) {
+ TRACE_STATE("unblocking fconn, mbuf allocated", FCGI_EV_FCONN_SEND|FCGI_EV_FCONN_BLK|FCGI_EV_FCONN_WAKE, fconn->conn);
+ fconn->flags &= ~FCGI_CF_MUX_MALLOC;
+ if (fconn->flags & FCGI_CF_DEM_MROOM) {
+ fconn->flags &= ~FCGI_CF_DEM_MROOM;
+ fcgi_conn_restart_reading(fconn, 1);
+ }
+ return 1;
+ }
+
+ if ((fconn->flags & FCGI_CF_DEM_SALLOC) &&
+ (fstrm = fcgi_conn_st_by_id(fconn, fconn->dsi)) && fcgi_strm_sc(fstrm) &&
+ b_alloc(&fstrm->rxbuf)) {
+ TRACE_STATE("unblocking fstrm, rxbuf allocated", FCGI_EV_STRM_RECV|FCGI_EV_FSTRM_BLK|FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+ fconn->flags &= ~FCGI_CF_DEM_SALLOC;
+ fcgi_conn_restart_reading(fconn, 1);
+ fcgi_strm_notify_recv(fstrm);
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline struct buffer *fcgi_get_buf(struct fcgi_conn *fconn, struct buffer *bptr)
+{
+ struct buffer *buf = NULL;
+
+ if (likely(!LIST_INLIST(&fconn->buf_wait.list)) &&
+ unlikely((buf = b_alloc(bptr)) == NULL)) {
+ fconn->buf_wait.target = fconn;
+ fconn->buf_wait.wakeup_cb = fcgi_buf_available;
+ LIST_APPEND(&th_ctx->buffer_wq, &fconn->buf_wait.list);
+ }
+ return buf;
+}
+
+static inline void fcgi_release_buf(struct fcgi_conn *fconn, struct buffer *bptr)
+{
+ if (bptr->size) {
+ b_free(bptr);
+ offer_buffers(NULL, 1);
+ }
+}
+
+static inline void fcgi_release_mbuf(struct fcgi_conn *fconn)
+{
+ struct buffer *buf;
+ unsigned int count = 0;
+
+ while (b_size(buf = br_head_pick(fconn->mbuf))) {
+ b_free(buf);
+ count++;
+ }
+ if (count)
+ offer_buffers(NULL, count);
+}
+
+/* Returns the number of allocatable outgoing streams for the connection taking
+ * the number reserved streams into account.
+ */
+static inline int fcgi_streams_left(const struct fcgi_conn *fconn)
+{
+ int ret;
+
+ ret = (unsigned int)(0x7FFF - fconn->max_id) - fconn->nb_reserved - 1;
+ if (ret < 0)
+ ret = 0;
+ return ret;
+}
+
+/* Returns the number of streams in use on a connection to figure if it's
+ * idle or not. We check nb_sc and not nb_streams as the caller will want
+ * to know if it was the last one after a detach().
+ */
+static int fcgi_used_streams(struct connection *conn)
+{
+ struct fcgi_conn *fconn = conn->ctx;
+
+ return fconn->nb_sc;
+}
+
+/* Returns the number of concurrent streams available on the connection */
+static int fcgi_avail_streams(struct connection *conn)
+{
+ struct server *srv = objt_server(conn->target);
+ struct fcgi_conn *fconn = conn->ctx;
+ int ret1, ret2;
+
+ /* Don't open new stream if the connection is closed */
+ if (fconn->state == FCGI_CS_CLOSED)
+ return 0;
+
+ /* May be negative if this setting has changed */
+ ret1 = (fconn->streams_limit - fconn->nb_streams);
+
+ /* we must also consider the limit imposed by stream IDs */
+ ret2 = fcgi_streams_left(fconn);
+ ret1 = MIN(ret1, ret2);
+ if (ret1 > 0 && srv && srv->max_reuse >= 0) {
+ ret2 = ((fconn->stream_cnt <= srv->max_reuse) ? srv->max_reuse - fconn->stream_cnt + 1: 0);
+ ret1 = MIN(ret1, ret2);
+ }
+ return ret1;
+}
+
+/*****************************************************************/
+/* functions below are dedicated to the mux setup and management */
+/*****************************************************************/
+
+/* Initializes the mux once it's attached. Only outgoing connections are
+ * supported. So the context is already initialized before installing the
+ * mux. <input> is always used as Input buffer and may contain data. It is the
+ * caller responsibility to not reuse it anymore. Returns < 0 on error.
+ */
+static int fcgi_init(struct connection *conn, struct proxy *px, struct session *sess,
+ struct buffer *input)
+{
+ struct fcgi_conn *fconn;
+ struct fcgi_strm *fstrm;
+ struct fcgi_app *app = get_px_fcgi_app(px);
+ struct task *t = NULL;
+ void *conn_ctx = conn->ctx;
+
+ TRACE_ENTER(FCGI_EV_FSTRM_NEW);
+
+ if (!app) {
+ TRACE_ERROR("No FCGI app found, don't create fconn", FCGI_EV_FCONN_NEW|FCGI_EV_FCONN_END|FCGI_EV_FCONN_ERR);
+ goto fail_conn;
+ }
+
+ fconn = pool_alloc(pool_head_fcgi_conn);
+ if (!fconn) {
+ TRACE_ERROR("fconn allocation failure", FCGI_EV_FCONN_NEW|FCGI_EV_FCONN_END|FCGI_EV_FCONN_ERR);
+ goto fail_conn;
+ }
+
+ fconn->shut_timeout = fconn->timeout = px->timeout.server;
+ if (tick_isset(px->timeout.serverfin))
+ fconn->shut_timeout = px->timeout.serverfin;
+
+ fconn->flags = FCGI_CF_NONE;
+
+ /* Retrieve useful info from the FCGI app */
+ if (app->flags & FCGI_APP_FL_KEEP_CONN)
+ fconn->flags |= FCGI_CF_KEEP_CONN;
+ if (app->flags & FCGI_APP_FL_GET_VALUES)
+ fconn->flags |= FCGI_CF_GET_VALUES;
+ if (app->flags & FCGI_APP_FL_MPXS_CONNS)
+ fconn->flags |= FCGI_CF_MPXS_CONNS;
+
+ fconn->proxy = px;
+ fconn->app = app;
+ fconn->task = NULL;
+ if (tick_isset(fconn->timeout)) {
+ t = task_new_here();
+ if (!t) {
+ TRACE_ERROR("fconn task allocation failure", FCGI_EV_FCONN_NEW|FCGI_EV_FCONN_END|FCGI_EV_FCONN_ERR);
+ goto fail;
+ }
+
+ fconn->task = t;
+ t->process = fcgi_timeout_task;
+ t->context = fconn;
+ t->expire = tick_add(now_ms, fconn->timeout);
+ }
+
+ fconn->wait_event.tasklet = tasklet_new();
+ if (!fconn->wait_event.tasklet)
+ goto fail;
+ fconn->wait_event.tasklet->process = fcgi_io_cb;
+ fconn->wait_event.tasklet->context = fconn;
+ fconn->wait_event.events = 0;
+
+ /* Initialise the context. */
+ fconn->state = FCGI_CS_INIT;
+ fconn->conn = conn;
+ fconn->streams_limit = app->maxreqs;
+ fconn->max_id = -1;
+ fconn->nb_streams = 0;
+ fconn->nb_sc = 0;
+ fconn->nb_reserved = 0;
+ fconn->stream_cnt = 0;
+
+ fconn->dbuf = *input;
+ fconn->dsi = -1;
+
+ br_init(fconn->mbuf, sizeof(fconn->mbuf) / sizeof(fconn->mbuf[0]));
+ fconn->streams_by_id = EB_ROOT;
+ LIST_INIT(&fconn->send_list);
+ LIST_INIT(&fconn->buf_wait.list);
+
+ conn->ctx = fconn;
+
+ if (t)
+ task_queue(t);
+
+ /* FIXME: this is temporary, for outgoing connections we need to
+ * immediately allocate a stream until the code is modified so that the
+ * caller calls ->attach(). For now the outgoing sc is stored as
+ * conn->ctx by the caller and saved in conn_ctx.
+ */
+ fstrm = fcgi_stconn_new(fconn, conn_ctx, sess);
+ if (!fstrm)
+ goto fail;
+
+
+ /* Repare to read something */
+ fcgi_conn_restart_reading(fconn, 1);
+ TRACE_LEAVE(FCGI_EV_FCONN_NEW, conn);
+ return 0;
+
+ fail:
+ task_destroy(t);
+ if (fconn->wait_event.tasklet)
+ tasklet_free(fconn->wait_event.tasklet);
+ pool_free(pool_head_fcgi_conn, fconn);
+ fail_conn:
+ conn->ctx = conn_ctx; // restore saved ctx
+ TRACE_DEVEL("leaving in error", FCGI_EV_FCONN_NEW|FCGI_EV_FCONN_END|FCGI_EV_FCONN_ERR);
+ return -1;
+}
+
+/* Returns the next allocatable outgoing stream ID for the FCGI connection, or
+ * -1 if no more is allocatable.
+ */
+static inline int32_t fcgi_conn_get_next_sid(const struct fcgi_conn *fconn)
+{
+ int32_t id = (fconn->max_id + 1) | 1;
+
+ if ((id & 0x80000000U))
+ id = -1;
+ return id;
+}
+
+/* Returns the stream associated with id <id> or NULL if not found */
+static inline struct fcgi_strm *fcgi_conn_st_by_id(struct fcgi_conn *fconn, int id)
+{
+ struct eb32_node *node;
+
+ if (id == 0)
+ return (struct fcgi_strm *)fcgi_mgmt_stream;
+
+ if (id > fconn->max_id)
+ return (struct fcgi_strm *)fcgi_unknown_stream;
+
+ node = eb32_lookup(&fconn->streams_by_id, id);
+ if (!node)
+ return (struct fcgi_strm *)fcgi_unknown_stream;
+ return container_of(node, struct fcgi_strm, by_id);
+}
+
+
+/* Release function. This one should be called to free all resources allocated
+ * to the mux.
+ */
+static void fcgi_release(struct fcgi_conn *fconn)
+{
+ struct connection *conn = fconn->conn;
+
+ TRACE_POINT(FCGI_EV_FCONN_END);
+
+ if (LIST_INLIST(&fconn->buf_wait.list))
+ LIST_DEL_INIT(&fconn->buf_wait.list);
+
+ fcgi_release_buf(fconn, &fconn->dbuf);
+ fcgi_release_mbuf(fconn);
+
+ if (fconn->task) {
+ fconn->task->context = NULL;
+ task_wakeup(fconn->task, TASK_WOKEN_OTHER);
+ fconn->task = NULL;
+ }
+ if (fconn->wait_event.tasklet)
+ tasklet_free(fconn->wait_event.tasklet);
+ if (conn && fconn->wait_event.events != 0)
+ conn->xprt->unsubscribe(conn, conn->xprt_ctx, fconn->wait_event.events,
+ &fconn->wait_event);
+
+ pool_free(pool_head_fcgi_conn, fconn);
+
+ if (conn) {
+ conn->mux = NULL;
+ conn->ctx = NULL;
+ TRACE_DEVEL("freeing conn", FCGI_EV_FCONN_END, conn);
+
+ conn_stop_tracking(conn);
+ conn_full_close(conn);
+ if (conn->destroy_cb)
+ conn->destroy_cb(conn);
+ conn_free(conn);
+ }
+}
+
+/* Detect a pending read0 for a FCGI connection. It happens if a read0 is
+ * pending on the connection AND if there is no more data in the demux
+ * buffer. The function returns 1 to report a read0 or 0 otherwise.
+ */
+static int fcgi_conn_read0_pending(struct fcgi_conn *fconn)
+{
+ if (conn_xprt_read0_pending(fconn->conn) && !b_data(&fconn->dbuf))
+ return 1;
+ return 0;
+}
+
+
+/* Returns true if the FCGI connection must be release */
+static inline int fcgi_conn_is_dead(struct fcgi_conn *fconn)
+{
+ if (eb_is_empty(&fconn->streams_by_id) && /* don't close if streams exist */
+ (!(fconn->flags & FCGI_CF_KEEP_CONN) || /* don't keep the connection alive */
+ (fconn->conn->flags & CO_FL_ERROR) || /* errors close immediately */
+ (fconn->state == FCGI_CS_CLOSED && !fconn->task) ||/* a timeout stroke earlier */
+ (!(fconn->conn->owner)) || /* Nobody's left to take care of the connection, drop it now */
+ (!br_data(fconn->mbuf) && /* mux buffer empty, also process clean events below */
+ conn_xprt_read0_pending(fconn->conn))))
+ return 1;
+ return 0;
+}
+
+
+/********************************************************/
+/* functions below are for the FCGI protocol processing */
+/********************************************************/
+
+/* Marks an error on the stream. */
+static inline void fcgi_strm_error(struct fcgi_strm *fstrm)
+{
+ if (fstrm->id && fstrm->state != FCGI_SS_ERROR) {
+ TRACE_POINT(FCGI_EV_FSTRM_ERR, fstrm->fconn->conn, fstrm);
+ if (fstrm->state < FCGI_SS_ERROR) {
+ fstrm->state = FCGI_SS_ERROR;
+ TRACE_STATE("switching to ERROR", FCGI_EV_FSTRM_ERR, fstrm->fconn->conn, fstrm);
+ }
+ se_fl_set_error(fstrm->sd);
+ }
+}
+
+/* Attempts to notify the data layer of recv availability */
+static void fcgi_strm_notify_recv(struct fcgi_strm *fstrm)
+{
+ if (fstrm->subs && (fstrm->subs->events & SUB_RETRY_RECV)) {
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fstrm->fconn->conn, fstrm);
+ tasklet_wakeup(fstrm->subs->tasklet);
+ fstrm->subs->events &= ~SUB_RETRY_RECV;
+ if (!fstrm->subs->events)
+ fstrm->subs = NULL;
+ }
+}
+
+/* Attempts to notify the data layer of send availability */
+static void fcgi_strm_notify_send(struct fcgi_strm *fstrm)
+{
+ if (fstrm->subs && (fstrm->subs->events & SUB_RETRY_SEND)) {
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fstrm->fconn->conn, fstrm);
+ fstrm->flags |= FCGI_SF_NOTIFIED;
+ tasklet_wakeup(fstrm->subs->tasklet);
+ fstrm->subs->events &= ~SUB_RETRY_SEND;
+ if (!fstrm->subs->events)
+ fstrm->subs = NULL;
+ }
+ else if (fstrm->flags & (FCGI_SF_WANT_SHUTR | FCGI_SF_WANT_SHUTW)) {
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fstrm->fconn->conn, fstrm);
+ tasklet_wakeup(fstrm->shut_tl);
+ }
+}
+
+/* Alerts the data layer, trying to wake it up by all means, following
+ * this sequence :
+ * - if the fcgi stream' data layer is subscribed to recv, then it's woken up
+ * for recv
+ * - if its subscribed to send, then it's woken up for send
+ * - if it was subscribed to neither, its ->wake() callback is called
+ * It is safe to call this function with a closed stream which doesn't have a
+ * stream connector anymore.
+ */
+static void fcgi_strm_alert(struct fcgi_strm *fstrm)
+{
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fstrm->fconn->conn, fstrm);
+ if (fstrm->subs ||
+ (fstrm->flags & (FCGI_SF_WANT_SHUTR|FCGI_SF_WANT_SHUTW))) {
+ fcgi_strm_notify_recv(fstrm);
+ fcgi_strm_notify_send(fstrm);
+ }
+ else if (fcgi_strm_sc(fstrm) && fcgi_strm_sc(fstrm)->app_ops->wake != NULL) {
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fstrm->fconn->conn, fstrm);
+ fcgi_strm_sc(fstrm)->app_ops->wake(fcgi_strm_sc(fstrm));
+ }
+}
+
+/* Writes the 16-bit record size <len> at address <record> */
+static inline void fcgi_set_record_size(void *record, uint16_t len)
+{
+ uint8_t *out = (record + 4);
+
+ *out = (len >> 8);
+ *(out + 1) = (len & 0xff);
+}
+
+/* Writes the 16-bit stream id <id> at address <record> */
+static inline void fcgi_set_record_id(void *record, uint16_t id)
+{
+ uint8_t *out = (record + 2);
+
+ *out = (id >> 8);
+ *(out + 1) = (id & 0xff);
+}
+
+/* Marks a FCGI stream as CLOSED and decrement the number of active streams for
+ * its connection if the stream was not yet closed. Please use this exclusively
+ * before closing a stream to ensure stream count is well maintained.
+ */
+static inline void fcgi_strm_close(struct fcgi_strm *fstrm)
+{
+ if (fstrm->state != FCGI_SS_CLOSED) {
+ TRACE_ENTER(FCGI_EV_FSTRM_END, fstrm->fconn->conn, fstrm);
+ fstrm->fconn->nb_streams--;
+ if (!fstrm->id)
+ fstrm->fconn->nb_reserved--;
+ if (fcgi_strm_sc(fstrm)) {
+ if (!se_fl_test(fstrm->sd, SE_FL_EOS) && !b_data(&fstrm->rxbuf))
+ fcgi_strm_notify_recv(fstrm);
+ }
+ fstrm->state = FCGI_SS_CLOSED;
+ TRACE_STATE("switching to CLOSED", FCGI_EV_FSTRM_END, fstrm->fconn->conn, fstrm);
+ TRACE_LEAVE(FCGI_EV_FSTRM_END, fstrm->fconn->conn, fstrm);
+ }
+}
+
+/* Detaches a FCGI stream from its FCGI connection and releases it to the
+ * fcgi_strm pool.
+ */
+static void fcgi_strm_destroy(struct fcgi_strm *fstrm)
+{
+ struct connection *conn = fstrm->fconn->conn;
+
+ TRACE_ENTER(FCGI_EV_FSTRM_END, conn, fstrm);
+
+ fcgi_strm_close(fstrm);
+ eb32_delete(&fstrm->by_id);
+ if (b_size(&fstrm->rxbuf)) {
+ b_free(&fstrm->rxbuf);
+ offer_buffers(NULL, 1);
+ }
+ if (fstrm->subs)
+ fstrm->subs->events = 0;
+ /* There's no need to explicitly call unsubscribe here, the only
+ * reference left would be in the fconn send_list/fctl_list, and if
+ * we're in it, we're getting out anyway
+ */
+ LIST_DEL_INIT(&fstrm->send_list);
+ tasklet_free(fstrm->shut_tl);
+ BUG_ON(fstrm->sd && !se_fl_test(fstrm->sd, SE_FL_ORPHAN));
+ sedesc_free(fstrm->sd);
+ pool_free(pool_head_fcgi_strm, fstrm);
+
+ TRACE_LEAVE(FCGI_EV_FSTRM_END, conn);
+}
+
+/* Allocates a new stream <id> for connection <fconn> and adds it into fconn's
+ * stream tree. In case of error, nothing is added and NULL is returned. The
+ * causes of errors can be any failed memory allocation. The caller is
+ * responsible for checking if the connection may support an extra stream prior
+ * to calling this function.
+ */
+static struct fcgi_strm *fcgi_strm_new(struct fcgi_conn *fconn, int id)
+{
+ struct fcgi_strm *fstrm;
+
+ TRACE_ENTER(FCGI_EV_FSTRM_NEW, fconn->conn);
+
+ fstrm = pool_alloc(pool_head_fcgi_strm);
+ if (!fstrm) {
+ TRACE_ERROR("fstrm allocation failure", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_ERR|FCGI_EV_FSTRM_END, fconn->conn);
+ goto out;
+ }
+
+ fstrm->shut_tl = tasklet_new();
+ if (!fstrm->shut_tl) {
+ TRACE_ERROR("fstrm shut tasklet allocation failure", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_ERR|FCGI_EV_FSTRM_END, fconn->conn);
+ pool_free(pool_head_fcgi_strm, fstrm);
+ goto out;
+ }
+ fstrm->subs = NULL;
+ fstrm->shut_tl->process = fcgi_deferred_shut;
+ fstrm->shut_tl->context = fstrm;
+ LIST_INIT(&fstrm->send_list);
+ fstrm->fconn = fconn;
+ fstrm->sd = NULL;
+ fstrm->flags = FCGI_SF_NONE;
+ fstrm->proto_status = 0;
+ fstrm->state = FCGI_SS_IDLE;
+ fstrm->rxbuf = BUF_NULL;
+
+ h1m_init_res(&fstrm->h1m);
+ fstrm->h1m.err_pos = -1; // don't care about errors on the request path
+ fstrm->h1m.flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
+
+ fstrm->by_id.key = fstrm->id = id;
+ if (id > 0)
+ fconn->max_id = id;
+ else
+ fconn->nb_reserved++;
+
+ eb32_insert(&fconn->streams_by_id, &fstrm->by_id);
+ fconn->nb_streams++;
+ fconn->stream_cnt++;
+
+ TRACE_LEAVE(FCGI_EV_FSTRM_NEW, fconn->conn, fstrm);
+ return fstrm;
+
+ out:
+ TRACE_DEVEL("leaving in error", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_ERR|FCGI_EV_FSTRM_END, fconn->conn);
+ return NULL;
+}
+
+/* Allocates a new stream associated to stream connector <sc> on the FCGI connection
+ * <fconn> and returns it, or NULL in case of memory allocation error or if the
+ * highest possible stream ID was reached.
+ */
+static struct fcgi_strm *fcgi_stconn_new(struct fcgi_conn *fconn, struct stconn *sc,
+ struct session *sess)
+{
+ struct fcgi_strm *fstrm = NULL;
+
+ TRACE_ENTER(FCGI_EV_FSTRM_NEW, fconn->conn);
+ if (fconn->nb_streams >= fconn->streams_limit) {
+ TRACE_ERROR("streams_limit reached", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_END|FCGI_EV_FSTRM_ERR, fconn->conn);
+ goto out;
+ }
+
+ if (fcgi_streams_left(fconn) < 1) {
+ TRACE_ERROR("!streams_left", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_END|FCGI_EV_FSTRM_ERR, fconn->conn);
+ goto out;
+ }
+
+ /* Defer choosing the ID until we send the first message to create the stream */
+ fstrm = fcgi_strm_new(fconn, 0);
+ if (!fstrm) {
+ TRACE_ERROR("fstream allocation failure", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_END|FCGI_EV_FSTRM_ERR, fconn->conn);
+ goto out;
+ }
+ if (sc_attach_mux(sc, fstrm, fconn->conn) < 0)
+ goto out;
+ fstrm->sd = sc->sedesc;
+ fstrm->sess = sess;
+ fconn->nb_sc++;
+
+ TRACE_LEAVE(FCGI_EV_FSTRM_NEW, fconn->conn, fstrm);
+ return fstrm;
+
+ out:
+ TRACE_DEVEL("leaving on error", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_END|FCGI_EV_FSTRM_ERR, fconn->conn);
+ fcgi_strm_destroy(fstrm);
+ return NULL;
+}
+
+/* Wakes a specific stream and assign its stream connector some SE_FL_* flags among
+ * SE_FL_ERR_PENDING and SE_FL_ERROR if needed. The stream's state is
+ * automatically updated accordingly. If the stream is orphaned, it is
+ * destroyed.
+ */
+static void fcgi_strm_wake_one_stream(struct fcgi_strm *fstrm)
+{
+ struct fcgi_conn *fconn = fstrm->fconn;
+
+ TRACE_ENTER(FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+
+ if (!fcgi_strm_sc(fstrm)) {
+ /* this stream was already orphaned */
+ fcgi_strm_destroy(fstrm);
+ TRACE_DEVEL("leaving with no fstrm", FCGI_EV_STRM_WAKE, fconn->conn);
+ return;
+ }
+
+ if (fcgi_conn_read0_pending(fconn)) {
+ if (fstrm->state == FCGI_SS_OPEN) {
+ fstrm->state = FCGI_SS_HREM;
+ TRACE_STATE("switching to HREM", FCGI_EV_STRM_WAKE|FCGI_EV_FSTRM_END, fconn->conn, fstrm);
+ }
+ else if (fstrm->state == FCGI_SS_HLOC)
+ fcgi_strm_close(fstrm);
+ }
+
+ if ((fconn->state == FCGI_CS_CLOSED || fconn->conn->flags & CO_FL_ERROR)) {
+ se_fl_set(fstrm->sd, SE_FL_ERR_PENDING);
+ if (se_fl_test(fstrm->sd, SE_FL_EOS))
+ se_fl_set(fstrm->sd, SE_FL_ERROR);
+
+ if (fstrm->state < FCGI_SS_ERROR) {
+ fstrm->state = FCGI_SS_ERROR;
+ TRACE_STATE("switching to ERROR", FCGI_EV_STRM_WAKE|FCGI_EV_FSTRM_END, fconn->conn, fstrm);
+ }
+ }
+
+ fcgi_strm_alert(fstrm);
+
+ TRACE_LEAVE(FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+}
+
+/* Wakes unassigned streams (ID == 0) attached to the connection. */
+static void fcgi_wake_unassigned_streams(struct fcgi_conn *fconn)
+{
+ struct eb32_node *node;
+ struct fcgi_strm *fstrm;
+
+ node = eb32_lookup(&fconn->streams_by_id, 0);
+ while (node) {
+ fstrm = container_of(node, struct fcgi_strm, by_id);
+ if (fstrm->id > 0)
+ break;
+ node = eb32_next(node);
+ fcgi_strm_wake_one_stream(fstrm);
+ }
+}
+
+/* Wakes the streams attached to the connection, whose id is greater than <last>
+ * or unassigned.
+ */
+static void fcgi_wake_some_streams(struct fcgi_conn *fconn, int last)
+{
+ struct eb32_node *node;
+ struct fcgi_strm *fstrm;
+
+ TRACE_ENTER(FCGI_EV_STRM_WAKE, fconn->conn);
+
+ /* Wake all streams with ID > last */
+ node = eb32_lookup_ge(&fconn->streams_by_id, last + 1);
+ while (node) {
+ fstrm = container_of(node, struct fcgi_strm, by_id);
+ node = eb32_next(node);
+ fcgi_strm_wake_one_stream(fstrm);
+ }
+ fcgi_wake_unassigned_streams(fconn);
+
+ TRACE_LEAVE(FCGI_EV_STRM_WAKE, fconn->conn);
+}
+
+static int fcgi_set_default_param(struct fcgi_conn *fconn, struct fcgi_strm *fstrm,
+ struct htx *htx, struct htx_sl *sl,
+ struct fcgi_strm_params *params)
+{
+ struct connection *cli_conn = objt_conn(fstrm->sess->origin);
+ const struct sockaddr_storage *src = (sc_check(fcgi_strm_sc(fstrm)) ? conn_src(fconn->conn) : sc_src(sc_opposite(fcgi_strm_sc(fstrm))));
+ const struct sockaddr_storage *dst = (sc_check(fcgi_strm_sc(fstrm)) ? conn_dst(fconn->conn) : sc_dst(sc_opposite(fcgi_strm_sc(fstrm))));
+ struct ist p;
+
+ if (!sl)
+ goto error;
+
+ if (!(params->mask & FCGI_SP_DOC_ROOT))
+ params->docroot = fconn->app->docroot;
+
+ if (!(params->mask & FCGI_SP_REQ_METH)) {
+ p = htx_sl_req_meth(sl);
+ params->meth = ist2(b_tail(params->p), p.len);
+ chunk_istcat(params->p, p);
+ }
+ if (!(params->mask & FCGI_SP_REQ_URI)) {
+ p = h1_get_uri(sl);
+ params->uri = ist2(b_tail(params->p), p.len);
+ chunk_istcat(params->p, p);
+ }
+ if (!(params->mask & FCGI_SP_SRV_PROTO)) {
+ p = htx_sl_req_vsn(sl);
+ params->vsn = ist2(b_tail(params->p), p.len);
+ chunk_istcat(params->p, p);
+ }
+ if (!(params->mask & FCGI_SP_SRV_PORT)) {
+ char *end;
+ int port = 0;
+ if (dst)
+ port = get_host_port(dst);
+ end = ultoa_o(port, b_tail(params->p), b_room(params->p));
+ if (!end)
+ goto error;
+ params->srv_port = ist2(b_tail(params->p), end - b_tail(params->p));
+ params->p->data += params->srv_port.len;
+ }
+ if (!(params->mask & FCGI_SP_SRV_NAME)) {
+ /* If no Host header found, use the server address to fill
+ * srv_name */
+ if (!istlen(params->srv_name)) {
+ char *ptr = NULL;
+
+ if (dst)
+ if (addr_to_str(dst, b_tail(params->p), b_room(params->p)) != -1)
+ ptr = b_tail(params->p);
+ if (ptr) {
+ params->srv_name = ist(ptr);
+ params->p->data += params->srv_name.len;
+ }
+ }
+ }
+ if (!(params->mask & FCGI_SP_REM_ADDR)) {
+ char *ptr = NULL;
+
+ if (src)
+ if (addr_to_str(src, b_tail(params->p), b_room(params->p)) != -1)
+ ptr = b_tail(params->p);
+ if (ptr) {
+ params->rem_addr = ist(ptr);
+ params->p->data += params->rem_addr.len;
+ }
+ }
+ if (!(params->mask & FCGI_SP_REM_PORT)) {
+ char *end;
+ int port = 0;
+ if (src)
+ port = get_host_port(src);
+ end = ultoa_o(port, b_tail(params->p), b_room(params->p));
+ if (!end)
+ goto error;
+ params->rem_port = ist2(b_tail(params->p), end - b_tail(params->p));
+ params->p->data += params->rem_port.len;
+ }
+ if (!(params->mask & FCGI_SP_CONT_LEN)) {
+ struct htx_blk *blk;
+ enum htx_blk_type type;
+ char *end;
+ size_t len = 0;
+
+ for (blk = htx_get_head_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA)
+ len += htx_get_blksz(blk);
+ }
+ end = ultoa_o(len, b_tail(params->p), b_room(params->p));
+ if (!end)
+ goto error;
+ params->cont_len = ist2(b_tail(params->p), end - b_tail(params->p));
+ params->p->data += params->cont_len.len;
+ }
+
+ if (!(params->mask & FCGI_SP_HTTPS)) {
+ if (cli_conn)
+ params->https = conn_is_ssl(cli_conn);
+ }
+
+ if ((params->mask & FCGI_SP_URI_MASK) != FCGI_SP_URI_MASK) {
+ /* one of scriptname, pathinfo or query_string is no set */
+ struct http_uri_parser parser = http_uri_parser_init(params->uri);
+ struct ist path = http_parse_path(&parser);
+ int len;
+
+ /* No scrit_name set but no valid path ==> error */
+ if (!(params->mask & FCGI_SP_SCRIPT_NAME) && !istlen(path))
+ goto error;
+
+ /* If there is a query-string, Set it if not already set */
+ if (!(params->mask & FCGI_SP_REQ_QS)) {
+ struct ist qs = istfind(path, '?');
+
+ /* Update the path length */
+ path.len -= qs.len;
+
+ /* Set the query-string skipping the '?', if any */
+ if (istlen(qs))
+ params->qs = istnext(qs);
+ }
+
+ /* If the script_name is set, don't try to deduce the path_info
+ * too. The opposite is not true.
+ */
+ if (params->mask & FCGI_SP_SCRIPT_NAME) {
+ params->mask |= FCGI_SP_PATH_INFO;
+ goto end;
+ }
+
+ /* Decode the path. it must first be copied to keep the URI
+ * untouched.
+ */
+ chunk_istcat(params->p, path);
+ path.ptr = b_tail(params->p) - path.len;
+ len = url_decode(ist0(path), 0);
+ if (len < 0)
+ goto error;
+ path.len = len;
+
+ /* script_name not set, preset it with the path for now */
+ params->scriptname = path;
+
+ /* If there is no regex to match the pathinfo, just to the last
+ * part and see if the index must be used.
+ */
+ if (!fconn->app->pathinfo_re)
+ goto check_index;
+
+ /* If some special characters are found in the decoded path (\n
+ * or \0), the PATH_INFO regex cannot match. This is theoretically
+ * valid, but probably unexpected, to have such characters. So,
+ * to avoid any surprises, an error is triggered in this
+ * case.
+ */
+ if (istchr(path, '\n') || istchr(path, '\0'))
+ goto error;
+
+ /* The regex does not match, just to the last part and see if
+ * the index must be used.
+ */
+ if (!regex_exec_match2(fconn->app->pathinfo_re, path.ptr, len, MAX_MATCH, pmatch, 0))
+ goto check_index;
+
+ /* We must have at least 1 capture for the script name,
+ * otherwise we do nothing and jump to the last part.
+ */
+ if (pmatch[1].rm_so == -1 || pmatch[1].rm_eo == -1)
+ goto check_index;
+
+ /* Finally we can set the script_name and the path_info. The
+ * path_info is set if not already defined, and if it was
+ * captured
+ */
+ params->scriptname = ist2(path.ptr + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
+ if (!(params->mask & FCGI_SP_PATH_INFO) && !(pmatch[2].rm_so == -1 || pmatch[2].rm_eo == -1))
+ params->pathinfo = ist2(path.ptr + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so);
+
+ check_index:
+ len = params->scriptname.len;
+ /* the script_name if finished by a '/' so we can add the index
+ * part, if any.
+ */
+ if (istlen(fconn->app->index) && params->scriptname.ptr[len-1] == '/') {
+ struct ist sn = params->scriptname;
+
+ params->scriptname = ist2(b_tail(params->p), len+fconn->app->index.len);
+ chunk_istcat(params->p, sn);
+ chunk_istcat(params->p, fconn->app->index);
+ }
+ }
+
+ if (!(params->mask & FCGI_SP_SRV_SOFT)) {
+ params->srv_soft = ist2(b_tail(params->p), 0);
+ chunk_appendf(params->p, "HAProxy %s", haproxy_version);
+ params->srv_soft.len = b_tail(params->p) - params->srv_soft.ptr;
+ }
+
+ end:
+ return 1;
+ error:
+ return 0;
+}
+
+static int fcgi_encode_default_param(struct fcgi_conn *fconn, struct fcgi_strm *fstrm,
+ struct fcgi_strm_params *params, struct buffer *outbuf, int flag)
+{
+ struct fcgi_param p;
+
+ if (params->mask & flag)
+ return 1;
+
+ chunk_reset(&trash);
+
+ switch (flag) {
+ case FCGI_SP_CGI_GATEWAY:
+ p.n = ist("GATEWAY_INTERFACE");
+ p.v = ist("CGI/1.1");
+ goto encode;
+ case FCGI_SP_DOC_ROOT:
+ p.n = ist("DOCUMENT_ROOT");
+ p.v = params->docroot;
+ goto encode;
+ case FCGI_SP_SCRIPT_NAME:
+ p.n = ist("SCRIPT_NAME");
+ p.v = params->scriptname;
+ goto encode;
+ case FCGI_SP_PATH_INFO:
+ p.n = ist("PATH_INFO");
+ p.v = params->pathinfo;
+ goto encode;
+ case FCGI_SP_REQ_URI:
+ p.n = ist("REQUEST_URI");
+ p.v = params->uri;
+ goto encode;
+ case FCGI_SP_REQ_METH:
+ p.n = ist("REQUEST_METHOD");
+ p.v = params->meth;
+ goto encode;
+ case FCGI_SP_REQ_QS:
+ p.n = ist("QUERY_STRING");
+ p.v = params->qs;
+ goto encode;
+ case FCGI_SP_SRV_NAME:
+ p.n = ist("SERVER_NAME");
+ p.v = params->srv_name;
+ goto encode;
+ case FCGI_SP_SRV_PORT:
+ p.n = ist("SERVER_PORT");
+ p.v = params->srv_port;
+ goto encode;
+ case FCGI_SP_SRV_PROTO:
+ p.n = ist("SERVER_PROTOCOL");
+ p.v = params->vsn;
+ goto encode;
+ case FCGI_SP_REM_ADDR:
+ p.n = ist("REMOTE_ADDR");
+ p.v = params->rem_addr;
+ goto encode;
+ case FCGI_SP_REM_PORT:
+ p.n = ist("REMOTE_PORT");
+ p.v = params->rem_port;
+ goto encode;
+ case FCGI_SP_SCRIPT_FILE:
+ p.n = ist("SCRIPT_FILENAME");
+ chunk_istcat(&trash, params->docroot);
+ chunk_istcat(&trash, params->scriptname);
+ p.v = ist2(b_head(&trash), b_data(&trash));
+ goto encode;
+ case FCGI_SP_PATH_TRANS:
+ if (!istlen(params->pathinfo))
+ goto skip;
+ p.n = ist("PATH_TRANSLATED");
+ chunk_istcat(&trash, params->docroot);
+ chunk_istcat(&trash, params->pathinfo);
+ p.v = ist2(b_head(&trash), b_data(&trash));
+ goto encode;
+ case FCGI_SP_CONT_LEN:
+ p.n = ist("CONTENT_LENGTH");
+ p.v = params->cont_len;
+ goto encode;
+ case FCGI_SP_HTTPS:
+ if (!params->https)
+ goto skip;
+ p.n = ist("HTTPS");
+ p.v = ist("on");
+ goto encode;
+ case FCGI_SP_SRV_SOFT:
+ p.n = ist("SERVER_SOFTWARE");
+ p.v = params->srv_soft;
+ goto encode;
+ default:
+ goto skip;
+ }
+
+ encode:
+ if (!istlen(p.v))
+ goto skip;
+ if (!fcgi_encode_param(outbuf, &p))
+ return 0;
+ skip:
+ params->mask |= flag;
+ return 1;
+}
+
+/* Sends a GET_VALUES record. Returns > 0 on success, 0 if it couldn't do
+ * anything. It is highly unexpected, but if the record is larger than a buffer
+ * and cannot be encoded in one time, an error is triggered and the connection is
+ * closed. GET_VALUES record cannot be split.
+ */
+static int fcgi_conn_send_get_values(struct fcgi_conn *fconn)
+{
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ struct fcgi_param max_reqs = { .n = ist("FCGI_MAX_REQS"), .v = ist("")};
+ struct fcgi_param mpxs_conns = { .n = ist("FCGI_MPXS_CONNS"), .v = ist("")};
+ int ret = 0;
+
+ TRACE_ENTER(FCGI_EV_TX_RECORD|FCGI_EV_TX_GETVAL, fconn->conn);
+
+ mbuf = br_tail(fconn->mbuf);
+ retry:
+ if (!fcgi_get_buf(fconn, mbuf)) {
+ fconn->flags |= FCGI_CF_MUX_MALLOC;
+ fconn->flags |= FCGI_CF_DEM_MROOM;
+ TRACE_STATE("waiting for fconn mbuf ring allocation", FCGI_EV_TX_RECORD|FCGI_EV_FCONN_BLK, fconn->conn);
+ ret = 0;
+ goto end;
+ }
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= FCGI_RECORD_HEADER_SZ || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < FCGI_RECORD_HEADER_SZ)
+ goto full;
+
+ /* vsn: 1(FCGI_VERSION), type: (9)FCGI_GET_VALUES, id: 0x0000,
+ * len: 0x0000 (fill later), padding: 0x00, rsv: 0x00 */
+ memcpy(outbuf.area, "\x01\x09\x00\x00\x00\x00\x00\x00", FCGI_RECORD_HEADER_SZ);
+ outbuf.data = FCGI_RECORD_HEADER_SZ;
+
+ /* Note: Don't send the param FCGI_MAX_CONNS because its value cannot be
+ * handled by HAProxy.
+ */
+ if (!fcgi_encode_param(&outbuf, &max_reqs) || !fcgi_encode_param(&outbuf, &mpxs_conns))
+ goto full;
+
+ /* update the record's size now */
+ TRACE_PROTO("FCGI GET_VALUES record xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_GETVAL, fconn->conn, 0, 0, (size_t[]){outbuf.data-8});
+ fcgi_set_record_size(outbuf.area, outbuf.data - FCGI_RECORD_HEADER_SZ);
+ b_add(mbuf, outbuf.data);
+ ret = 1;
+
+ end:
+ TRACE_LEAVE(FCGI_EV_TX_RECORD|FCGI_EV_TX_GETVAL, fconn->conn);
+ return ret;
+ full:
+ /* Too large to be encoded. For GET_VALUES records, it is an error */
+ if (!b_data(mbuf)) {
+ TRACE_ERROR("GET_VALUES record too large", FCGI_EV_TX_RECORD|FCGI_EV_TX_GETVAL|FCGI_EV_FCONN_ERR, fconn->conn);
+ goto fail;
+ }
+
+ if ((mbuf = br_tail_add(fconn->mbuf)) != NULL)
+ goto retry;
+ fconn->flags |= FCGI_CF_MUX_MFULL;
+ fconn->flags |= FCGI_CF_DEM_MROOM;
+ TRACE_STATE("mbuf ring full", FCGI_EV_TX_RECORD|FCGI_EV_FCONN_BLK, fconn->conn);
+ ret = 0;
+ goto end;
+ fail:
+ fconn->state = FCGI_CS_CLOSED;
+ TRACE_STATE("switching to CLOSED", FCGI_EV_TX_RECORD|FCGI_EV_TX_GETVAL|FCGI_EV_FCONN_END, fconn->conn);
+ TRACE_DEVEL("leaving on error", FCGI_EV_TX_RECORD|FCGI_EV_TX_GETVAL|FCGI_EV_FCONN_ERR, fconn->conn);
+ return 0;
+}
+
+/* Processes a GET_VALUES_RESULT record. Returns > 0 on success, 0 if it
+ * couldn't do anything. It is highly unexpected, but if the record is larger
+ * than a buffer and cannot be decoded in one time, an error is triggered and
+ * the connection is closed. GET_VALUES_RESULT record cannot be split.
+ */
+static int fcgi_conn_handle_values_result(struct fcgi_conn *fconn)
+{
+ struct buffer inbuf;
+ struct buffer *dbuf;
+ size_t offset;
+
+ TRACE_ENTER(FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn);
+
+ dbuf = &fconn->dbuf;
+
+ /* Record too large to be fully decoded */
+ if (b_size(dbuf) < (fconn->drl + fconn->drp))
+ goto fail;
+
+ /* process full record only */
+ if (b_data(dbuf) < (fconn->drl + fconn->drp)) {
+ TRACE_DEVEL("leaving on missing data", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn);
+ return 0;
+ }
+
+ if (unlikely(b_contig_data(dbuf, b_head_ofs(dbuf)) < fconn->drl)) {
+ /* Realign the dmux buffer if the record wraps. It is unexpected
+ * at this stage because it should be the first record received
+ * from the FCGI application.
+ */
+ b_slow_realign_ofs(dbuf, trash.area, 0);
+ }
+
+ inbuf = b_make(b_head(dbuf), b_data(dbuf), 0, fconn->drl);
+
+ for (offset = 0; offset < b_data(&inbuf); ) {
+ struct fcgi_param p;
+ size_t ret;
+
+ ret = fcgi_aligned_decode_param(&inbuf, offset, &p);
+ if (!ret) {
+ /* name or value too large to be decoded at once */
+ TRACE_ERROR("error decoding GET_VALUES_RESULT param", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL|FCGI_EV_FCONN_ERR, fconn->conn);
+ goto fail;
+ }
+ offset += ret;
+
+ if (isteqi(p.n, ist("FCGI_MPXS_CONNS"))) {
+ if (isteq(p.v, ist("1"))) {
+ TRACE_STATE("set mpxs param", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn, 0, 0, (size_t[]){1});
+ fconn->flags |= FCGI_CF_MPXS_CONNS;
+ }
+ else {
+ TRACE_STATE("set mpxs param", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn, 0, 0, (size_t[]){0});
+ fconn->flags &= ~FCGI_CF_MPXS_CONNS;
+ }
+ }
+ else if (isteqi(p.n, ist("FCGI_MAX_REQS"))) {
+ fconn->streams_limit = strl2ui(p.v.ptr, p.v.len);
+ TRACE_STATE("set streams_limit", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn, 0, 0, (size_t[]){fconn->streams_limit});
+ }
+ /*
+ * Ignore all other params
+ */
+ }
+
+ /* Reset the number of concurrent streams supported if the FCGI
+ * application does not support connection multiplexing
+ */
+ if (!(fconn->flags & FCGI_CF_MPXS_CONNS)) {
+ fconn->streams_limit = 1;
+ TRACE_STATE("no mpxs for streams_limit to 1", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn);
+ }
+
+ /* We must be sure to have read exactly the announced record length, no
+ * more no less
+ */
+ if (offset != fconn->drl) {
+ TRACE_ERROR("invalid GET_VALUES_RESULT record length", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL|FCGI_EV_FCONN_ERR, fconn->conn);
+ goto fail;
+ }
+
+ TRACE_PROTO("FCGI GET_VALUES_RESULT record rcvd", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn, 0, 0, (size_t[]){fconn->drl});
+ b_del(&fconn->dbuf, fconn->drl + fconn->drp);
+ fconn->drl = 0;
+ fconn->drp = 0;
+ fconn->state = FCGI_CS_RECORD_H;
+ fcgi_wake_unassigned_streams(fconn);
+ TRACE_STATE("switching to RECORD_H", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn);
+ TRACE_LEAVE(FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn);
+ return 1;
+ fail:
+ fconn->state = FCGI_CS_CLOSED;
+ TRACE_STATE("switching to CLOSED", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn);
+ TRACE_DEVEL("leaving on error", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL|FCGI_EV_FCONN_ERR, fconn->conn);
+ return 0;
+}
+
+/* Sends an ABORT_REQUEST record for each active streams. Closed streams are
+ * excluded, as the streams which already received the end-of-stream. It returns
+ * > 0 if the record was sent tp all streams. Otherwise it returns 0.
+ */
+static int fcgi_conn_send_aborts(struct fcgi_conn *fconn)
+{
+ struct eb32_node *node;
+ struct fcgi_strm *fstrm;
+
+ TRACE_ENTER(FCGI_EV_TX_RECORD, fconn->conn);
+
+ node = eb32_lookup_ge(&fconn->streams_by_id, 1);
+ while (node) {
+ fstrm = container_of(node, struct fcgi_strm, by_id);
+ node = eb32_next(node);
+ if (fstrm->state != FCGI_SS_CLOSED &&
+ !(fstrm->flags & (FCGI_SF_ES_RCVD|FCGI_SF_ABRT_SENT)) &&
+ !fcgi_strm_send_abort(fconn, fstrm))
+ return 0;
+ }
+ fconn->flags |= FCGI_CF_ABRTS_SENT;
+ TRACE_STATE("aborts sent to all fstrms", FCGI_EV_TX_RECORD, fconn->conn);
+ TRACE_LEAVE(FCGI_EV_TX_RECORD, fconn->conn);
+ return 1;
+}
+
+/* Sends a BEGIN_REQUEST record. It returns > 0 on success, 0 if it couldn't do
+ * anything. BEGIN_REQUEST record cannot be split. So we wait to have enough
+ * space to proceed. It is small enough to be encoded in an empty buffer.
+ */
+static int fcgi_strm_send_begin_request(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ struct fcgi_begin_request rec = { .role = FCGI_RESPONDER, .flags = 0};
+ int ret;
+
+ TRACE_ENTER(FCGI_EV_TX_RECORD|FCGI_EV_TX_BEGREQ, fconn->conn, fstrm);
+
+ mbuf = br_tail(fconn->mbuf);
+ retry:
+ if (!fcgi_get_buf(fconn, mbuf)) {
+ fconn->flags |= FCGI_CF_MUX_MALLOC;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("waiting for fconn mbuf ring allocation", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn, fstrm);
+ ret = 0;
+ goto end;
+ }
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= FCGI_RECORD_HEADER_SZ || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < FCGI_RECORD_HEADER_SZ)
+ goto full;
+
+ /* vsn: 1(FCGI_VERSION), type: (1)FCGI_BEGIN_REQUEST, id: fstrm->id,
+ * len: 0x0008, padding: 0x00, rsv: 0x00 */
+ memcpy(outbuf.area, "\x01\x01\x00\x00\x00\x08\x00\x00", FCGI_RECORD_HEADER_SZ);
+ fcgi_set_record_id(outbuf.area, fstrm->id);
+ outbuf.data = FCGI_RECORD_HEADER_SZ;
+
+ if (fconn->flags & FCGI_CF_KEEP_CONN) {
+ TRACE_STATE("keep connection opened", FCGI_EV_TX_RECORD|FCGI_EV_TX_BEGREQ, fconn->conn, fstrm);
+ rec.flags |= FCGI_KEEP_CONN;
+ }
+ if (!fcgi_encode_begin_request(&outbuf, &rec))
+ goto full;
+
+ /* commit the record */
+ TRACE_PROTO("FCGI BEGIN_REQUEST record xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_BEGREQ, fconn->conn, fstrm, 0, (size_t[]){0});
+ b_add(mbuf, outbuf.data);
+ fstrm->flags |= FCGI_SF_BEGIN_SENT;
+ fstrm->state = FCGI_SS_OPEN;
+ TRACE_STATE("switching to OPEN", FCGI_EV_TX_RECORD|FCGI_EV_TX_BEGREQ, fconn->conn, fstrm);
+ ret = 1;
+
+ end:
+ TRACE_LEAVE(FCGI_EV_TX_RECORD|FCGI_EV_TX_BEGREQ, fconn->conn, fstrm);
+ return ret;
+ full:
+ if ((mbuf = br_tail_add(fconn->mbuf)) != NULL)
+ goto retry;
+ fconn->flags |= FCGI_CF_MUX_MFULL;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("mbuf ring full", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn);
+ ret = 0;
+ goto end;
+}
+
+/* Sends an empty record of type <rtype>. It returns > 0 on success, 0 if it
+ * couldn't do anything. Empty record cannot be split. So we wait to have enough
+ * space to proceed. It is small enough to be encoded in an empty buffer.
+ */
+static int fcgi_strm_send_empty_record(struct fcgi_conn *fconn, struct fcgi_strm *fstrm,
+ enum fcgi_record_type rtype)
+{
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ int ret;
+
+ TRACE_ENTER(FCGI_EV_TX_RECORD, fconn->conn, fstrm);
+ mbuf = br_tail(fconn->mbuf);
+ retry:
+ if (!fcgi_get_buf(fconn, mbuf)) {
+ fconn->flags |= FCGI_CF_MUX_MALLOC;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("waiting for fconn mbuf ring allocation", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn, fstrm);
+ ret = 0;
+ goto end;
+ }
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= FCGI_RECORD_HEADER_SZ || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < FCGI_RECORD_HEADER_SZ)
+ goto full;
+
+ /* vsn: 1(FCGI_VERSION), type: rtype, id: fstrm->id,
+ * len: 0x0000, padding: 0x00, rsv: 0x00 */
+ memcpy(outbuf.area, "\x01\x05\x00\x00\x00\x00\x00\x00", FCGI_RECORD_HEADER_SZ);
+ outbuf.area[1] = rtype;
+ fcgi_set_record_id(outbuf.area, fstrm->id);
+ outbuf.data = FCGI_RECORD_HEADER_SZ;
+
+ /* commit the record */
+ b_add(mbuf, outbuf.data);
+ ret = 1;
+
+ end:
+ TRACE_LEAVE(FCGI_EV_TX_RECORD, fconn->conn, fstrm);
+ return ret;
+ full:
+ if ((mbuf = br_tail_add(fconn->mbuf)) != NULL)
+ goto retry;
+ fconn->flags |= FCGI_CF_MUX_MFULL;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("mbuf ring full", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn, fstrm);
+ ret = 0;
+ goto end;
+}
+
+
+/* Sends an empty PARAMS record. It relies on fcgi_strm_send_empty_record(). It
+ * marks the end of params.
+ */
+static int fcgi_strm_send_empty_params(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ int ret;
+
+ TRACE_POINT(FCGI_EV_TX_RECORD|FCGI_EV_TX_PARAMS, fconn->conn, fstrm);
+ ret = fcgi_strm_send_empty_record(fconn, fstrm, FCGI_PARAMS);
+ if (ret) {
+ fstrm->flags |= FCGI_SF_EP_SENT;
+ TRACE_PROTO("FCGI PARAMS record xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, 0, (size_t[]){0});
+ }
+ return ret;
+}
+
+/* Sends an empty STDIN record. It relies on fcgi_strm_send_empty_record(). It
+ * marks the end of input. On success, all the request was successfully sent.
+ */
+static int fcgi_strm_send_empty_stdin(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ int ret;
+
+ TRACE_POINT(FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN|FCGI_EV_TX_EOI, fconn->conn, fstrm);
+ ret = fcgi_strm_send_empty_record(fconn, fstrm, FCGI_STDIN);
+ if (ret) {
+ fstrm->flags |= FCGI_SF_ES_SENT;
+ TRACE_PROTO("FCGI STDIN record xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, 0, (size_t[]){0});
+ TRACE_USER("FCGI request fully xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN|FCGI_EV_TX_EOI, fconn->conn, fstrm);
+ TRACE_STATE("stdin data fully sent", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN|FCGI_EV_TX_EOI, fconn->conn, fstrm);
+ }
+ return ret;
+}
+
+/* Sends an ABORT_REQUEST record. It relies on fcgi_strm_send_empty_record(). It
+ * stops the request processing.
+ */
+static int fcgi_strm_send_abort(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ int ret;
+
+ TRACE_POINT(FCGI_EV_TX_RECORD|FCGI_EV_TX_ABORT, fconn->conn, fstrm);
+ ret = fcgi_strm_send_empty_record(fconn, fstrm, FCGI_ABORT_REQUEST);
+ if (ret) {
+ fstrm->flags |= FCGI_SF_ABRT_SENT;
+ TRACE_PROTO("FCGI ABORT record xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_ABORT, fconn->conn, fstrm, 0, (size_t[]){0});
+ TRACE_USER("FCGI request aborted", FCGI_EV_TX_RECORD|FCGI_EV_TX_ABORT, fconn->conn, fstrm);
+ TRACE_STATE("abort sent", FCGI_EV_TX_RECORD|FCGI_EV_TX_ABORT, fconn->conn, fstrm);
+ }
+ return ret;
+}
+
+/* Sends a PARAMS record. Returns > 0 on success, 0 if it couldn't do
+ * anything. If there are too much K/V params to be encoded in a PARAMS record,
+ * several records are sent. However, a K/V param cannot be split between 2
+ * records.
+ */
+static size_t fcgi_strm_send_params(struct fcgi_conn *fconn, struct fcgi_strm *fstrm,
+ struct htx *htx)
+{
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ struct htx_blk *blk;
+ struct htx_sl *sl = NULL;
+ struct fcgi_strm_params params;
+ size_t total = 0;
+
+ TRACE_ENTER(FCGI_EV_TX_RECORD|FCGI_EV_TX_PARAMS, fconn->conn, fstrm, htx);
+
+ memset(&params, 0, sizeof(params));
+ params.p = get_trash_chunk();
+
+ mbuf = br_tail(fconn->mbuf);
+ retry:
+ if (!fcgi_get_buf(fconn, mbuf)) {
+ fconn->flags |= FCGI_CF_MUX_MALLOC;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("waiting for fconn mbuf ring allocation", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn, fstrm);
+ goto end;
+ }
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= FCGI_RECORD_HEADER_SZ || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < FCGI_RECORD_HEADER_SZ)
+ goto full;
+
+ /* vsn: 1(FCGI_VERSION), type: (4)FCGI_PARAMS, id: fstrm->id,
+ * len: 0x0000 (fill later), padding: 0x00, rsv: 0x00 */
+ memcpy(outbuf.area, "\x01\x04\x00\x00\x00\x00\x00\x00", FCGI_RECORD_HEADER_SZ);
+ fcgi_set_record_id(outbuf.area, fstrm->id);
+ outbuf.data = FCGI_RECORD_HEADER_SZ;
+
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ enum htx_blk_type type;
+ uint32_t size = htx_get_blksz(blk);
+ struct fcgi_param p;
+
+ type = htx_get_blk_type(blk);
+ switch (type) {
+ case HTX_BLK_REQ_SL:
+ sl = htx_get_blk_ptr(htx, blk);
+ if (sl->info.req.meth == HTTP_METH_HEAD)
+ fstrm->h1m.flags |= H1_MF_METH_HEAD;
+ if (sl->flags & HTX_SL_F_VER_11)
+ fstrm->h1m.flags |= H1_MF_VER_11;
+ break;
+
+ case HTX_BLK_HDR:
+ p.n = htx_get_blk_name(htx, blk);
+ p.v = htx_get_blk_value(htx, blk);
+
+ if (istmatch(p.n, ist(":fcgi-"))) {
+ p.n = istadv(p.n, 6);
+ if (isteq(p.n, ist("gateway_interface")))
+ params.mask |= FCGI_SP_CGI_GATEWAY;
+ else if (isteq(p.n, ist("document_root"))) {
+ params.mask |= FCGI_SP_DOC_ROOT;
+ params.docroot = p.v;
+ }
+ else if (isteq(p.n, ist("script_name"))) {
+ params.mask |= FCGI_SP_SCRIPT_NAME;
+ params.scriptname = p.v;
+ }
+ else if (isteq(p.n, ist("path_info"))) {
+ params.mask |= FCGI_SP_PATH_INFO;
+ params.pathinfo = p.v;
+ }
+ else if (isteq(p.n, ist("request_uri"))) {
+ params.mask |= FCGI_SP_REQ_URI;
+ params.uri = p.v;
+ }
+ else if (isteq(p.n, ist("request_meth")))
+ params.mask |= FCGI_SP_REQ_METH;
+ else if (isteq(p.n, ist("query_string")))
+ params.mask |= FCGI_SP_REQ_QS;
+ else if (isteq(p.n, ist("server_name")))
+ params.mask |= FCGI_SP_SRV_NAME;
+ else if (isteq(p.n, ist("server_port")))
+ params.mask |= FCGI_SP_SRV_PORT;
+ else if (isteq(p.n, ist("server_protocol")))
+ params.mask |= FCGI_SP_SRV_PROTO;
+ else if (isteq(p.n, ist("remote_addr")))
+ params.mask |= FCGI_SP_REM_ADDR;
+ else if (isteq(p.n, ist("remote_port")))
+ params.mask |= FCGI_SP_REM_PORT;
+ else if (isteq(p.n, ist("script_filename")))
+ params.mask |= FCGI_SP_SCRIPT_FILE;
+ else if (isteq(p.n, ist("path_translated")))
+ params.mask |= FCGI_SP_PATH_TRANS;
+ else if (isteq(p.n, ist("https")))
+ params.mask |= FCGI_SP_HTTPS;
+ else if (isteq(p.n, ist("server_software")))
+ params.mask |= FCGI_SP_SRV_SOFT;
+ }
+ else if (isteq(p.n, ist("content-length"))) {
+ p.n = ist("CONTENT_LENGTH");
+ params.mask |= FCGI_SP_CONT_LEN;
+ }
+ else if (isteq(p.n, ist("content-type")))
+ p.n = ist("CONTENT_TYPE");
+ else {
+ struct ist n;
+
+ if (isteq(p.n, ist("host")))
+ params.srv_name = p.v;
+ else if (isteq(p.n, ist("te"))) {
+ /* "te" may only be sent with "trailers" if this value
+ * is present, otherwise it must be deleted.
+ */
+ p.v = istist(p.v, ist("trailers"));
+ if (!isttest(p.v) || (p.v.len > 8 && p.v.ptr[8] != ','))
+ break;
+ p.v = ist("trailers");
+ }
+
+ /* Skip header if same name is used to add the server name */
+ if (isttest(fconn->proxy->server_id_hdr_name) && isteq(p.n, fconn->proxy->server_id_hdr_name))
+ break;
+
+ n = ist2(trash.area, 0);
+ istcat(&n, ist("http_"), trash.size);
+ istcat(&n, p.n, trash.size);
+ p.n = n;
+ }
+
+ if (!fcgi_encode_param(&outbuf, &p)) {
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ if (outbuf.data == FCGI_RECORD_HEADER_SZ)
+ goto full;
+ goto done;
+ }
+ break;
+
+ case HTX_BLK_EOH:
+ if (isttest(fconn->proxy->server_id_hdr_name)) {
+ struct server *srv = objt_server(fconn->conn->target);
+
+ if (!srv)
+ goto done;
+
+ p.n = ist2(trash.area, 0);
+ istcat(&p.n, ist("http_"), trash.size);
+ istcat(&p.n, fconn->proxy->server_id_hdr_name, trash.size);
+ p.v = ist(srv->id);
+
+ if (!fcgi_encode_param(&outbuf, &p)) {
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ if (outbuf.data == FCGI_RECORD_HEADER_SZ)
+ goto full;
+ }
+ TRACE_STATE("add server name header", FCGI_EV_TX_RECORD|FCGI_EV_TX_PARAMS, fconn->conn, fstrm);
+ }
+ goto done;
+
+ default:
+ break;
+ }
+ total += size;
+ blk = htx_remove_blk(htx, blk);
+ }
+
+ done:
+ if (!fcgi_set_default_param(fconn, fstrm, htx, sl, &params)) {
+ TRACE_ERROR("error setting default params", FCGI_EV_TX_RECORD|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ goto error;
+ }
+
+ if (!fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_CGI_GATEWAY) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_DOC_ROOT) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_SCRIPT_NAME) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_PATH_INFO) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_REQ_URI) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_REQ_METH) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_REQ_QS) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_SRV_NAME) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_SRV_PORT) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_SRV_PROTO) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_REM_ADDR) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_REM_PORT) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_SCRIPT_FILE) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_PATH_TRANS) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_CONT_LEN) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_SRV_SOFT) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_HTTPS)) {
+ TRACE_ERROR("error encoding default params", FCGI_EV_TX_RECORD|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ goto error;
+ }
+
+ /* update the record's size */
+ TRACE_PROTO("FCGI PARAMS record xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_PARAMS, fconn->conn, fstrm, 0, (size_t[]){outbuf.data - FCGI_RECORD_HEADER_SZ});
+ fcgi_set_record_size(outbuf.area, outbuf.data - FCGI_RECORD_HEADER_SZ);
+ b_add(mbuf, outbuf.data);
+
+ end:
+ TRACE_LEAVE(FCGI_EV_TX_RECORD|FCGI_EV_TX_PARAMS, fconn->conn, fstrm, htx, (size_t[]){total});
+ return total;
+ full:
+ if ((mbuf = br_tail_add(fconn->mbuf)) != NULL)
+ goto retry;
+ fconn->flags |= FCGI_CF_MUX_MFULL;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("mbuf ring full", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn, fstrm);
+ if (total)
+ goto error;
+ goto end;
+
+ error:
+ htx->flags |= HTX_FL_PROCESSING_ERROR;
+ TRACE_ERROR("processing error sending PARAMS record", FCGI_EV_TX_RECORD|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ goto end;
+}
+
+/* Sends a STDIN record. Returns > 0 on success, 0 if it couldn't do
+ * anything. STDIN records contain the request body.
+ */
+static size_t fcgi_strm_send_stdin(struct fcgi_conn *fconn, struct fcgi_strm *fstrm,
+ struct htx *htx, size_t count, struct buffer *buf)
+{
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ struct htx_blk *blk;
+ enum htx_blk_type type;
+ uint32_t size, extra_bytes;
+ size_t total = 0;
+
+ extra_bytes = 0;
+
+ TRACE_ENTER(FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, htx, (size_t[]){count});
+ if (!count)
+ goto end;
+
+ mbuf = br_tail(fconn->mbuf);
+ retry:
+ if (!fcgi_get_buf(fconn, mbuf)) {
+ fconn->flags |= FCGI_CF_MUX_MALLOC;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("waiting for fconn mbuf ring allocation", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn, fstrm);
+ goto end;
+ }
+
+ /* Perform some optimizations to reduce the number of buffer copies.
+ * First, if the mux's buffer is empty and the htx area contains exactly
+ * one data block of the same size as the requested count, and this
+ * count fits within the record size, then it's possible to simply swap
+ * the caller's buffer with the mux's output buffer and adjust offsets
+ * and length to match the entire DATA HTX block in the middle. In this
+ * case we perform a true zero-copy operation from end-to-end. This is
+ * the situation that happens all the time with large files. Second, if
+ * this is not possible, but the mux's output buffer is empty, we still
+ * have an opportunity to avoid the copy to the intermediary buffer, by
+ * making the intermediary buffer's area point to the output buffer's
+ * area. In this case we want to skip the HTX header to make sure that
+ * copies remain aligned and that this operation remains possible all
+ * the time. This goes for headers, data blocks and any data extracted
+ * from the HTX blocks.
+ */
+ blk = htx_get_head_blk(htx);
+ if (!blk)
+ goto end;
+ type = htx_get_blk_type(blk);
+ size = htx_get_blksz(blk);
+ if (unlikely(size == count && htx_nbblks(htx) == 1 && type == HTX_BLK_DATA)) {
+ void *old_area = mbuf->area;
+ int eom = (htx->flags & HTX_FL_EOM);
+
+ /* Last block of the message: Reserve the size for the empty stdin record */
+ if (eom)
+ extra_bytes = FCGI_RECORD_HEADER_SZ;
+
+ if (b_data(mbuf)) {
+ /* Too bad there are data left there. We're willing to memcpy/memmove
+ * up to 1/4 of the buffer, which means that it's OK to copy a large
+ * record into a buffer containing few data if it needs to be realigned,
+ * and that it's also OK to copy few data without realigning. Otherwise
+ * we'll pretend the mbuf is full and wait for it to become empty.
+ */
+ if (size + FCGI_RECORD_HEADER_SZ + extra_bytes <= b_room(mbuf) &&
+ (b_data(mbuf) <= b_size(mbuf) / 4 ||
+ (size <= b_size(mbuf) / 4 && size + FCGI_RECORD_HEADER_SZ + extra_bytes <= b_contig_space(mbuf))))
+ goto copy;
+ goto full;
+ }
+
+ TRACE_PROTO("sending stding data (zero-copy)", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, htx, (size_t[]){size});
+ /* map a FCGI record to the HTX block so that we can put the
+ * record header there.
+ */
+ *mbuf = b_make(buf->area, buf->size, sizeof(struct htx) + blk->addr - FCGI_RECORD_HEADER_SZ, size + FCGI_RECORD_HEADER_SZ);
+ outbuf.area = b_head(mbuf);
+
+ /* prepend a FCGI record header just before the DATA block */
+ memcpy(outbuf.area, "\x01\x05\x00\x00\x00\x00\x00\x00", FCGI_RECORD_HEADER_SZ);
+ fcgi_set_record_id(outbuf.area, fstrm->id);
+ fcgi_set_record_size(outbuf.area, size);
+
+ /* and exchange with our old area */
+ buf->area = old_area;
+ buf->data = buf->head = 0;
+ total += size;
+
+ htx = (struct htx *)buf->area;
+ htx_reset(htx);
+ if (eom)
+ goto empty_stdin;
+ goto end;
+ }
+
+ copy:
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= FCGI_RECORD_HEADER_SZ + extra_bytes || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < FCGI_RECORD_HEADER_SZ + extra_bytes)
+ goto full;
+
+ /* vsn: 1(FCGI_VERSION), type: (5)FCGI_STDIN, id: fstrm->id,
+ * len: 0x0000 (fill later), padding: 0x00, rsv: 0x00 */
+ memcpy(outbuf.area, "\x01\x05\x00\x00\x00\x00\x00\x00", FCGI_RECORD_HEADER_SZ);
+ fcgi_set_record_id(outbuf.area, fstrm->id);
+ outbuf.data = FCGI_RECORD_HEADER_SZ;
+
+ blk = htx_get_head_blk(htx);
+ while (blk && count) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t size = htx_get_blksz(blk);
+ struct ist v;
+
+ switch (type) {
+ case HTX_BLK_DATA:
+ TRACE_PROTO("sending stding data", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, htx, (size_t[]){size});
+ v = htx_get_blk_value(htx, blk);
+
+ if (htx_is_unique_blk(htx, blk) && (htx->flags & HTX_FL_EOM))
+ extra_bytes = FCGI_RECORD_HEADER_SZ; /* Last block of the message */
+
+ if (v.len > count) {
+ v.len = count;
+ extra_bytes = 0;
+ }
+
+ if (v.len + FCGI_RECORD_HEADER_SZ + extra_bytes > b_room(&outbuf)) {
+ /* It doesn't fit at once. If it at least fits once split and
+ * the amount of data to move is low, let's defragment the
+ * buffer now.
+ */
+ if (b_space_wraps(mbuf) &&
+ b_data(&outbuf) + v.len + extra_bytes <= b_room(mbuf) &&
+ b_data(mbuf) <= MAX_DATA_REALIGN)
+ goto realign_again;
+ v.len = (FCGI_RECORD_HEADER_SZ + extra_bytes > b_room(&outbuf)
+ ? 0
+ : b_room(&outbuf) - FCGI_RECORD_HEADER_SZ - extra_bytes);
+ }
+ if (!v.len || !chunk_memcat(&outbuf, v.ptr, v.len)) {
+ if (outbuf.data == FCGI_RECORD_HEADER_SZ)
+ goto full;
+ goto done;
+ }
+ if (v.len != size) {
+ total += v.len;
+ count -= v.len;
+ htx_cut_data_blk(htx, blk, v.len);
+ goto done;
+ }
+ break;
+
+ default:
+ break;
+ }
+ total += size;
+ count -= size;
+ blk = htx_remove_blk(htx, blk);
+ }
+
+ done:
+ /* update the record's size */
+ TRACE_PROTO("FCGI STDIN record xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, 0, (size_t[]){outbuf.data - FCGI_RECORD_HEADER_SZ});
+ fcgi_set_record_size(outbuf.area, outbuf.data - FCGI_RECORD_HEADER_SZ);
+ b_add(mbuf, outbuf.data);
+
+ /* Send the empty stding here to finish the message */
+ if (htx_is_empty(htx) && (htx->flags & HTX_FL_EOM)) {
+ empty_stdin:
+ TRACE_PROTO("sending FCGI STDIN record", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, htx);
+ if (!fcgi_strm_send_empty_stdin(fconn, fstrm)) {
+ /* bytes already reserved for this record. It should not fail */
+ htx->flags |= HTX_FL_PROCESSING_ERROR;
+ TRACE_ERROR("processing error sending empty STDIN record", FCGI_EV_TX_RECORD|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ }
+ }
+
+ end:
+ TRACE_LEAVE(FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, htx, (size_t[]){total});
+ return total;
+ full:
+ if ((mbuf = br_tail_add(fconn->mbuf)) != NULL)
+ goto retry;
+ fconn->flags |= FCGI_CF_MUX_MFULL;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("mbuf ring full", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn, fstrm);
+ goto end;
+}
+
+/* Processes a STDOUT record. Returns > 0 on success, 0 if it couldn't do
+ * anything. STDOUT records contain the entire response. All the content is
+ * copied in the stream's rxbuf. The parsing will be handled in fcgi_rcv_buf().
+ */
+static int fcgi_strm_handle_stdout(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ struct buffer *dbuf;
+ size_t ret;
+ size_t max;
+
+ TRACE_ENTER(FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+
+ dbuf = &fconn->dbuf;
+
+ /* Only padding remains */
+ if (fconn->state == FCGI_CS_RECORD_P)
+ goto end_transfer;
+
+ if (b_data(dbuf) < (fconn->drl + fconn->drp) &&
+ b_size(dbuf) > (fconn->drl + fconn->drp) &&
+ buf_room_for_htx_data(dbuf))
+ goto fail; // incomplete record
+
+ if (!fcgi_get_buf(fconn, &fstrm->rxbuf)) {
+ fconn->flags |= FCGI_CF_DEM_SALLOC;
+ TRACE_STATE("waiting for fstrm rxbuf allocation", FCGI_EV_RX_RECORD|FCGI_EV_FSTRM_BLK, fconn->conn, fstrm);
+ goto fail;
+ }
+
+ /*max = MIN(b_room(&fstrm->rxbuf), fconn->drl);*/
+ max = buf_room_for_htx_data(&fstrm->rxbuf);
+ if (!b_data(&fstrm->rxbuf))
+ fstrm->rxbuf.head = sizeof(struct htx);
+ if (max > fconn->drl)
+ max = fconn->drl;
+
+ ret = b_xfer(&fstrm->rxbuf, dbuf, max);
+ if (!ret)
+ goto fail;
+ fconn->drl -= ret;
+ TRACE_DATA("move some data to fstrm rxbuf", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm, 0, (size_t[]){ret});
+ TRACE_PROTO("FCGI STDOUT record rcvd", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm, 0, (size_t[]){ret});
+
+ if (!buf_room_for_htx_data(&fstrm->rxbuf)) {
+ fconn->flags |= FCGI_CF_DEM_SFULL;
+ TRACE_STATE("fstrm rxbuf full", FCGI_EV_RX_RECORD|FCGI_EV_FSTRM_BLK, fconn->conn, fstrm);
+ }
+
+ if (fconn->drl)
+ goto fail;
+
+ end_transfer:
+ fconn->state = FCGI_CS_RECORD_P;
+ fconn->drl += fconn->drp;
+ fconn->drp = 0;
+ ret = MIN(b_data(&fconn->dbuf), fconn->drl);
+ b_del(&fconn->dbuf, ret);
+ fconn->drl -= ret;
+ if (fconn->drl)
+ goto fail;
+
+ fconn->state = FCGI_CS_RECORD_H;
+ TRACE_STATE("switching to RECORD_H", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn, fstrm);
+ TRACE_LEAVE(FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+ return 1;
+ fail:
+ TRACE_DEVEL("leaving on missing data or error", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+ return 0;
+}
+
+
+/* Processes an empty STDOUT. Returns > 0 on success, 0 if it couldn't do
+ * anything. It only skip the padding in fact, there is no payload for such
+ * records. It marks the end of the response.
+ */
+static int fcgi_strm_handle_empty_stdout(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ int ret;
+
+ TRACE_ENTER(FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+
+ fconn->state = FCGI_CS_RECORD_P;
+ TRACE_STATE("switching to RECORD_P", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+ fconn->drl += fconn->drp;
+ fconn->drp = 0;
+ ret = MIN(b_data(&fconn->dbuf), fconn->drl);
+ b_del(&fconn->dbuf, ret);
+ fconn->drl -= ret;
+ if (fconn->drl) {
+ TRACE_DEVEL("leaving on missing data or error", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+ return 0;
+ }
+ fconn->state = FCGI_CS_RECORD_H;
+ fstrm->flags |= FCGI_SF_ES_RCVD;
+ TRACE_PROTO("FCGI STDOUT record rcvd", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm, 0, (size_t[]){0});
+ TRACE_STATE("stdout data fully send, switching to RECORD_H", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR|FCGI_EV_RX_EOI, fconn->conn, fstrm);
+ TRACE_LEAVE(FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+ return 1;
+}
+
+/* Processes a STDERR record. Returns > 0 on success, 0 if it couldn't do
+ * anything.
+ */
+static int fcgi_strm_handle_stderr(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ struct buffer *dbuf;
+ struct buffer tag;
+ size_t ret;
+
+ TRACE_ENTER(FCGI_EV_RX_RECORD|FCGI_EV_RX_STDERR, fconn->conn, fstrm);
+ dbuf = &fconn->dbuf;
+
+ /* Only padding remains */
+ if (fconn->state == FCGI_CS_RECORD_P || !fconn->drl)
+ goto end_transfer;
+
+ if (b_data(dbuf) < (fconn->drl + fconn->drp) &&
+ b_size(dbuf) > (fconn->drl + fconn->drp) &&
+ buf_room_for_htx_data(dbuf))
+ goto fail; // incomplete record
+
+ chunk_reset(&trash);
+ ret = b_xfer(&trash, dbuf, MIN(b_room(&trash), fconn->drl));
+ if (!ret)
+ goto fail;
+ fconn->drl -= ret;
+ TRACE_PROTO("FCGI STDERR record rcvd", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDERR, fconn->conn, fstrm, 0, (size_t[]){ret});
+
+ trash.area[ret] = '\n';
+ trash.area[ret+1] = '\0';
+ tag.area = fconn->app->name; tag.data = strlen(fconn->app->name);
+ app_log(&fconn->app->logsrvs, &tag, LOG_ERR, "%s", trash.area);
+
+ if (fconn->drl)
+ goto fail;
+
+ end_transfer:
+ fconn->state = FCGI_CS_RECORD_P;
+ fconn->drl += fconn->drp;
+ fconn->drp = 0;
+ ret = MIN(b_data(&fconn->dbuf), fconn->drl);
+ b_del(&fconn->dbuf, ret);
+ fconn->drl -= ret;
+ if (fconn->drl)
+ goto fail;
+ fconn->state = FCGI_CS_RECORD_H;
+ TRACE_STATE("switching to RECORD_H", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn, fstrm);
+ TRACE_LEAVE(FCGI_EV_RX_RECORD|FCGI_EV_RX_STDERR, fconn->conn, fstrm);
+ return 1;
+ fail:
+ TRACE_DEVEL("leaving on missing data or error", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDERR, fconn->conn, fstrm);
+ return 0;
+}
+
+/* Processes an END_REQUEST record. Returns > 0 on success, 0 if it couldn't do
+ * anything. If the empty STDOUT record is not already received, this one marks
+ * the end of the response. It is highly unexpected, but if the record is larger
+ * than a buffer and cannot be decoded in one time, an error is triggered and
+ * the connection is closed. END_REQUEST record cannot be split.
+ */
+static int fcgi_strm_handle_end_request(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ struct buffer inbuf;
+ struct buffer *dbuf;
+ struct fcgi_end_request endreq;
+
+ TRACE_ENTER(FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ, fconn->conn, fstrm);
+ dbuf = &fconn->dbuf;
+
+ /* Record too large to be fully decoded */
+ if (b_size(dbuf) < (fconn->drl + fconn->drp)) {
+ TRACE_ERROR("END_REQUEST record too large", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ|FCGI_EV_FSTRM_ERR, fconn->conn, fstrm);
+ goto fail;
+ }
+
+ /* process full record only */
+ if (b_data(dbuf) < (fconn->drl + fconn->drp)) {
+ TRACE_DEVEL("leaving on missing data", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ, fconn->conn);
+ return 0;
+ }
+
+ if (unlikely(b_contig_data(dbuf, b_head_ofs(dbuf)) < fconn->drl)) {
+ /* Realign the dmux buffer if the record wraps. It is unexpected
+ * at this stage because it should be the first record received
+ * from the FCGI application.
+ */
+ b_slow_realign_ofs(dbuf, trash.area, 0);
+ }
+
+ inbuf = b_make(b_head(dbuf), b_data(dbuf), 0, fconn->drl);
+
+ if (!fcgi_decode_end_request(&inbuf, 0, &endreq)) {
+ TRACE_ERROR("END_REQUEST record decoding failure", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ|FCGI_EV_FSTRM_ERR, fconn->conn, fstrm);
+ goto fail;
+ }
+
+ fstrm->flags |= FCGI_SF_ES_RCVD;
+ TRACE_STATE("end of script reported", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ|FCGI_EV_RX_EOI, fconn->conn, fstrm);
+ TRACE_PROTO("FCGI END_REQUEST record rcvd", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ, fconn->conn, fstrm, 0, (size_t[]){fconn->drl});
+ fstrm->proto_status = endreq.errcode;
+ fcgi_strm_close(fstrm);
+
+ b_del(&fconn->dbuf, fconn->drl + fconn->drp);
+ fconn->drl = 0;
+ fconn->drp = 0;
+ fconn->state = FCGI_CS_RECORD_H;
+ TRACE_STATE("switching to RECORD_H", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn, fstrm);
+ TRACE_LEAVE(FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ, fconn->conn, fstrm);
+ return 1;
+
+ fail:
+ fcgi_strm_error(fstrm);
+ TRACE_DEVEL("leaving on error", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ|FCGI_EV_FSTRM_ERR, fconn->conn, fstrm);
+ return 0;
+}
+
+/* process Rx records to be demultiplexed */
+static void fcgi_process_demux(struct fcgi_conn *fconn)
+{
+ struct fcgi_strm *fstrm = NULL, *tmp_fstrm;
+ struct fcgi_header hdr;
+ int ret;
+
+ TRACE_ENTER(FCGI_EV_FCONN_WAKE, fconn->conn);
+
+ if (fconn->state == FCGI_CS_CLOSED)
+ return;
+
+ if (unlikely(fconn->state < FCGI_CS_RECORD_H)) {
+ if (fconn->state == FCGI_CS_INIT) {
+ TRACE_STATE("waiting FCGI GET_VALUES to be sent", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR|FCGI_EV_RX_GETVAL, fconn->conn);
+ return;
+ }
+ if (fconn->state == FCGI_CS_SETTINGS) {
+ /* ensure that what is pending is a valid GET_VALUES_RESULT record. */
+ TRACE_STATE("receiving FCGI record header", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn);
+ ret = fcgi_decode_record_hdr(&fconn->dbuf, 0, &hdr);
+ if (!ret) {
+ TRACE_ERROR("header record decoding failure", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ|FCGI_EV_FSTRM_ERR, fconn->conn, fstrm);
+ goto fail;
+ }
+ b_del(&fconn->dbuf, ret);
+
+ if (hdr.id || (hdr.type != FCGI_GET_VALUES_RESULT && hdr.type != FCGI_UNKNOWN_TYPE)) {
+ fconn->state = FCGI_CS_CLOSED;
+ TRACE_ERROR("unexpected record type or flags", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR|FCGI_EV_RX_GETVAL|FCGI_EV_FCONN_ERR, fconn->conn);
+ TRACE_STATE("switching to CLOSED", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR|FCGI_EV_RX_GETVAL|FCGI_EV_FCONN_ERR, fconn->conn);
+ goto fail;
+ }
+ goto new_record;
+ }
+ }
+
+ /* process as many incoming records as possible below */
+ while (1) {
+ if (!b_data(&fconn->dbuf)) {
+ TRACE_DEVEL("no more Rx data", FCGI_EV_RX_RECORD, fconn->conn);
+ break;
+ }
+
+ if (fconn->state == FCGI_CS_CLOSED) {
+ TRACE_STATE("end of connection reported", FCGI_EV_RX_RECORD|FCGI_EV_RX_EOI, fconn->conn);
+ break;
+ }
+
+ if (fconn->state == FCGI_CS_RECORD_H) {
+ TRACE_PROTO("receiving FCGI record header", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn);
+ ret = fcgi_decode_record_hdr(&fconn->dbuf, 0, &hdr);
+ if (!ret)
+ break;
+ b_del(&fconn->dbuf, ret);
+
+ new_record:
+ fconn->dsi = hdr.id;
+ fconn->drt = hdr.type;
+ fconn->drl = hdr.len;
+ fconn->drp = hdr.padding;
+ fconn->state = FCGI_CS_RECORD_D;
+ TRACE_STATE("FCGI record header rcvd, switching to RECORD_D", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn);
+ }
+
+ /* Only FCGI_CS_RECORD_D or FCGI_CS_RECORD_P */
+ tmp_fstrm = fcgi_conn_st_by_id(fconn, fconn->dsi);
+
+ if (tmp_fstrm != fstrm && fstrm && fcgi_strm_sc(fstrm) &&
+ (b_data(&fstrm->rxbuf) ||
+ fcgi_conn_read0_pending(fconn) ||
+ fstrm->state == FCGI_SS_CLOSED ||
+ (fstrm->flags & FCGI_SF_ES_RCVD) ||
+ se_fl_test(fstrm->sd, SE_FL_ERROR | SE_FL_ERR_PENDING | SE_FL_EOS))) {
+ /* we may have to signal the upper layers */
+ TRACE_DEVEL("notifying stream before switching SID", FCGI_EV_RX_RECORD|FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+ se_fl_set(fstrm->sd, SE_FL_RCV_MORE);
+ fcgi_strm_notify_recv(fstrm);
+ }
+ fstrm = tmp_fstrm;
+
+ if (fstrm->state == FCGI_SS_CLOSED && fconn->dsi != 0) {
+ /* ignore all record for closed streams */
+ goto ignore_record;
+ }
+ if (fstrm->state == FCGI_SS_IDLE) {
+ /* ignore all record for unknown streams */
+ goto ignore_record;
+ }
+
+ switch (fconn->drt) {
+ case FCGI_GET_VALUES_RESULT:
+ TRACE_PROTO("receiving FCGI GET_VALUES_RESULT record", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn);
+ ret = fcgi_conn_handle_values_result(fconn);
+ break;
+
+ case FCGI_STDOUT:
+ if (fstrm->flags & FCGI_SF_ES_RCVD)
+ goto ignore_record;
+
+ TRACE_PROTO("receiving FCGI STDOUT record", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+ if (fconn->drl)
+ ret = fcgi_strm_handle_stdout(fconn, fstrm);
+ else
+ ret = fcgi_strm_handle_empty_stdout(fconn, fstrm);
+ break;
+
+ case FCGI_STDERR:
+ TRACE_PROTO("receiving FCGI STDERR record", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDERR, fconn->conn, fstrm);
+ ret = fcgi_strm_handle_stderr(fconn, fstrm);
+ break;
+
+ case FCGI_END_REQUEST:
+ TRACE_PROTO("receiving FCGI END_REQUEST record", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ, fconn->conn, fstrm);
+ ret = fcgi_strm_handle_end_request(fconn, fstrm);
+ break;
+
+ /* implement all extra record types here */
+ default:
+ ignore_record:
+ /* drop records that we ignore. They may be
+ * larger than the buffer so we drain all of
+ * their contents until we reach the end.
+ */
+ fconn->state = FCGI_CS_RECORD_P;
+ fconn->drl += fconn->drp;
+ fconn->drp = 0;
+ ret = MIN(b_data(&fconn->dbuf), fconn->drl);
+ TRACE_PROTO("receiving FCGI ignored record", FCGI_EV_RX_RECORD, fconn->conn, fstrm, 0, (size_t[]){ret});
+ TRACE_STATE("switching to RECORD_P", FCGI_EV_RX_RECORD, fconn->conn, fstrm);
+ b_del(&fconn->dbuf, ret);
+ fconn->drl -= ret;
+ ret = (fconn->drl == 0);
+ }
+
+ /* error or missing data condition met above ? */
+ if (ret <= 0) {
+ TRACE_DEVEL("insufficient data to proceed", FCGI_EV_RX_RECORD, fconn->conn, fstrm);
+ break;
+ }
+
+ if (fconn->state != FCGI_CS_RECORD_H && !(fconn->drl+fconn->drp)) {
+ fconn->state = FCGI_CS_RECORD_H;
+ TRACE_STATE("switching to RECORD_H", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn);
+ }
+ }
+
+ fail:
+ /* we can go here on missing data, blocked response or error */
+ if (fstrm && fcgi_strm_sc(fstrm) &&
+ (b_data(&fstrm->rxbuf) ||
+ fcgi_conn_read0_pending(fconn) ||
+ fstrm->state == FCGI_SS_CLOSED ||
+ (fstrm->flags & FCGI_SF_ES_RCVD) ||
+ se_fl_test(fstrm->sd, SE_FL_ERROR | SE_FL_ERR_PENDING | SE_FL_EOS))) {
+ /* we may have to signal the upper layers */
+ TRACE_DEVEL("notifying stream before switching SID", FCGI_EV_RX_RECORD|FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+ se_fl_set(fstrm->sd, SE_FL_RCV_MORE);
+ fcgi_strm_notify_recv(fstrm);
+ }
+
+ fcgi_conn_restart_reading(fconn, 0);
+}
+
+/* process Tx records from streams to be multiplexed. Returns > 0 if it reached
+ * the end.
+ */
+static int fcgi_process_mux(struct fcgi_conn *fconn)
+{
+ struct fcgi_strm *fstrm, *fstrm_back;
+
+ TRACE_ENTER(FCGI_EV_FCONN_WAKE, fconn->conn);
+
+ if (unlikely(fconn->state < FCGI_CS_RECORD_H)) {
+ if (unlikely(fconn->state == FCGI_CS_INIT)) {
+ if (!(fconn->flags & FCGI_CF_GET_VALUES)) {
+ fconn->state = FCGI_CS_RECORD_H;
+ TRACE_STATE("switching to RECORD_H", FCGI_EV_TX_RECORD|FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn);
+ fcgi_wake_unassigned_streams(fconn);
+ goto mux;
+ }
+ TRACE_PROTO("sending FCGI GET_VALUES record", FCGI_EV_TX_RECORD|FCGI_EV_TX_GETVAL, fconn->conn);
+ if (unlikely(!fcgi_conn_send_get_values(fconn)))
+ goto fail;
+ fconn->state = FCGI_CS_SETTINGS;
+ TRACE_STATE("switching to SETTINGS", FCGI_EV_TX_RECORD|FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn);
+ }
+ /* need to wait for the other side */
+ if (fconn->state < FCGI_CS_RECORD_H)
+ goto done;
+ }
+
+ mux:
+ list_for_each_entry_safe(fstrm, fstrm_back, &fconn->send_list, send_list) {
+ if (fconn->state == FCGI_CS_CLOSED || fconn->flags & FCGI_CF_MUX_BLOCK_ANY)
+ break;
+
+ if (fstrm->flags & FCGI_SF_NOTIFIED)
+ continue;
+
+ /* If the sender changed his mind and unsubscribed, let's just
+ * remove the stream from the send_list.
+ */
+ if (!(fstrm->flags & (FCGI_SF_WANT_SHUTR|FCGI_SF_WANT_SHUTW)) &&
+ (!fstrm->subs || !(fstrm->subs->events & SUB_RETRY_SEND))) {
+ LIST_DEL_INIT(&fstrm->send_list);
+ continue;
+ }
+
+ if (fstrm->subs && fstrm->subs->events & SUB_RETRY_SEND) {
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+ fstrm->flags &= ~FCGI_SF_BLK_ANY;
+ fstrm->flags |= FCGI_SF_NOTIFIED;
+ tasklet_wakeup(fstrm->subs->tasklet);
+ fstrm->subs->events &= ~SUB_RETRY_SEND;
+ if (!fstrm->subs->events)
+ fstrm->subs = NULL;
+ } else {
+ /* it's the shut request that was queued */
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+ tasklet_wakeup(fstrm->shut_tl);
+ }
+ }
+
+ fail:
+ if (fconn->state == FCGI_CS_CLOSED) {
+ if (fconn->stream_cnt - fconn->nb_reserved > 0) {
+ fcgi_conn_send_aborts(fconn);
+ if (fconn->flags & FCGI_CF_MUX_BLOCK_ANY) {
+ TRACE_DEVEL("leaving in blocked situation", FCGI_EV_FCONN_WAKE|FCGI_EV_FCONN_BLK, fconn->conn);
+ return 0;
+ }
+ }
+ }
+
+ done:
+ TRACE_LEAVE(FCGI_EV_FCONN_WAKE, fconn->conn);
+ return 1;
+}
+
+
+/* Attempt to read data, and subscribe if none available.
+ * The function returns 1 if data has been received, otherwise zero.
+ */
+static int fcgi_recv(struct fcgi_conn *fconn)
+{
+ struct connection *conn = fconn->conn;
+ struct buffer *buf;
+ int max;
+ size_t ret;
+
+ TRACE_ENTER(FCGI_EV_FCONN_RECV, conn);
+
+ if (fconn->wait_event.events & SUB_RETRY_RECV) {
+ TRACE_DEVEL("leaving on sub_recv", FCGI_EV_FCONN_RECV, conn);
+ return (b_data(&fconn->dbuf));
+ }
+
+ if (!fcgi_recv_allowed(fconn)) {
+ TRACE_DEVEL("leaving on !recv_allowed", FCGI_EV_FCONN_RECV, conn);
+ return 1;
+ }
+
+ buf = fcgi_get_buf(fconn, &fconn->dbuf);
+ if (!buf) {
+ TRACE_DEVEL("waiting for fconn dbuf allocation", FCGI_EV_FCONN_RECV|FCGI_EV_FCONN_BLK, conn);
+ fconn->flags |= FCGI_CF_DEM_DALLOC;
+ return 0;
+ }
+
+ if (!b_data(buf)) {
+ /* try to pre-align the buffer like the
+ * rxbufs will be to optimize memory copies. We'll make
+ * sure that the record header lands at the end of the
+ * HTX block to alias it upon recv. We cannot use the
+ * head because rcv_buf() will realign the buffer if
+ * it's empty. Thus we cheat and pretend we already
+ * have a few bytes there.
+ */
+ max = buf_room_for_htx_data(buf) + (fconn->state == FCGI_CS_RECORD_H ? FCGI_RECORD_HEADER_SZ : 0);
+ buf->head = sizeof(struct htx) - (fconn->state == FCGI_CS_RECORD_H ? FCGI_RECORD_HEADER_SZ : 0);
+ }
+ else
+ max = buf_room_for_htx_data(buf);
+
+ ret = max ? conn->xprt->rcv_buf(conn, conn->xprt_ctx, buf, max, 0) : 0;
+
+ if (max && !ret && fcgi_recv_allowed(fconn)) {
+ TRACE_DATA("failed to receive data, subscribing", FCGI_EV_FCONN_RECV, conn);
+ conn->xprt->subscribe(conn, conn->xprt_ctx, SUB_RETRY_RECV, &fconn->wait_event);
+ }
+ else
+ TRACE_DATA("recv data", FCGI_EV_FCONN_RECV, conn, 0, 0, (size_t[]){ret});
+
+ if (!b_data(buf)) {
+ fcgi_release_buf(fconn, &fconn->dbuf);
+ TRACE_LEAVE(FCGI_EV_FCONN_RECV, conn);
+ return (conn->flags & CO_FL_ERROR || conn_xprt_read0_pending(conn));
+ }
+
+ if (ret == max) {
+ TRACE_DEVEL("fconn dbuf full", FCGI_EV_FCONN_RECV|FCGI_EV_FCONN_BLK, conn);
+ fconn->flags |= FCGI_CF_DEM_DFULL;
+ }
+
+ TRACE_LEAVE(FCGI_EV_FCONN_RECV, conn);
+ return !!ret || (conn->flags & CO_FL_ERROR) || conn_xprt_read0_pending(conn);
+}
+
+
+/* Try to send data if possible.
+ * The function returns 1 if data have been sent, otherwise zero.
+ */
+static int fcgi_send(struct fcgi_conn *fconn)
+{
+ struct connection *conn = fconn->conn;
+ int done;
+ int sent = 0;
+
+ TRACE_ENTER(FCGI_EV_FCONN_SEND, conn);
+
+ if (conn->flags & CO_FL_ERROR) {
+ TRACE_DEVEL("leaving on connection error", FCGI_EV_FCONN_SEND, conn);
+ return 1;
+ }
+
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ /* a handshake was requested */
+ goto schedule;
+ }
+
+ /* This loop is quite simple : it tries to fill as much as it can from
+ * pending streams into the existing buffer until it's reportedly full
+ * or the end of send requests is reached. Then it tries to send this
+ * buffer's contents out, marks it not full if at least one byte could
+ * be sent, and tries again.
+ *
+ * The snd_buf() function normally takes a "flags" argument which may
+ * be made of a combination of CO_SFL_MSG_MORE to indicate that more
+ * data immediately comes and CO_SFL_STREAMER to indicate that the
+ * connection is streaming lots of data (used to increase TLS record
+ * size at the expense of latency). The former can be sent any time
+ * there's a buffer full flag, as it indicates at least one stream
+ * attempted to send and failed so there are pending data. An
+ * alternative would be to set it as long as there's an active stream
+ * but that would be problematic for ACKs until we have an absolute
+ * guarantee that all waiters have at least one byte to send. The
+ * latter should possibly not be set for now.
+ */
+
+ done = 0;
+ while (!done) {
+ unsigned int flags = 0;
+ unsigned int released = 0;
+ struct buffer *buf;
+
+ /* fill as much as we can into the current buffer */
+ while (((fconn->flags & (FCGI_CF_MUX_MFULL|FCGI_CF_MUX_MALLOC)) == 0) && !done)
+ done = fcgi_process_mux(fconn);
+
+ if (fconn->flags & FCGI_CF_MUX_MALLOC)
+ done = 1; // we won't go further without extra buffers
+
+ if (conn->flags & CO_FL_ERROR)
+ break;
+
+ if (fconn->flags & (FCGI_CF_MUX_MFULL | FCGI_CF_DEM_MROOM))
+ flags |= CO_SFL_MSG_MORE;
+
+ for (buf = br_head(fconn->mbuf); b_size(buf); buf = br_del_head(fconn->mbuf)) {
+ if (b_data(buf)) {
+ int ret;
+
+ ret = conn->xprt->snd_buf(conn, conn->xprt_ctx, buf, b_data(buf), flags);
+ if (!ret) {
+ done = 1;
+ break;
+ }
+ sent = 1;
+ TRACE_DATA("send data", FCGI_EV_FCONN_SEND, conn, 0, 0, (size_t[]){ret});
+ b_del(buf, ret);
+ if (b_data(buf)) {
+ done = 1;
+ break;
+ }
+ }
+ b_free(buf);
+ released++;
+ }
+
+ if (released)
+ offer_buffers(NULL, released);
+
+ /* wrote at least one byte, the buffer is not full anymore */
+ if (fconn->flags & (FCGI_CF_MUX_MFULL | FCGI_CF_DEM_MROOM))
+ TRACE_STATE("fconn mbuf ring not fill anymore", FCGI_EV_FCONN_SEND|FCGI_EV_FCONN_BLK, conn);
+ fconn->flags &= ~(FCGI_CF_MUX_MFULL | FCGI_CF_DEM_MROOM);
+ }
+
+ if (conn->flags & CO_FL_SOCK_WR_SH) {
+ /* output closed, nothing to send, clear the buffer to release it */
+ b_reset(br_tail(fconn->mbuf));
+ }
+ /* We're not full anymore, so we can wake any task that are waiting
+ * for us.
+ */
+ if (!(fconn->flags & (FCGI_CF_MUX_MFULL | FCGI_CF_DEM_MROOM)) && fconn->state >= FCGI_CS_RECORD_H) {
+ struct fcgi_strm *fstrm;
+
+ list_for_each_entry(fstrm, &fconn->send_list, send_list) {
+ if (fconn->state == FCGI_CS_CLOSED || fconn->flags & FCGI_CF_MUX_BLOCK_ANY)
+ break;
+
+ if (fstrm->flags & FCGI_SF_NOTIFIED)
+ continue;
+
+ /* If the sender changed his mind and unsubscribed, let's just
+ * remove the stream from the send_list.
+ */
+ if (!(fstrm->flags & (FCGI_SF_WANT_SHUTR|FCGI_SF_WANT_SHUTW)) &&
+ (!fstrm->subs || !(fstrm->subs->events & SUB_RETRY_SEND))) {
+ LIST_DEL_INIT(&fstrm->send_list);
+ continue;
+ }
+
+ if (fstrm->subs && fstrm->subs->events & SUB_RETRY_SEND) {
+ TRACE_DEVEL("waking up pending stream", FCGI_EV_FCONN_SEND|FCGI_EV_STRM_WAKE, conn, fstrm);
+ fstrm->flags &= ~FCGI_SF_BLK_ANY;
+ fstrm->flags |= FCGI_SF_NOTIFIED;
+ tasklet_wakeup(fstrm->subs->tasklet);
+ fstrm->subs->events &= ~SUB_RETRY_SEND;
+ if (!fstrm->subs->events)
+ fstrm->subs = NULL;
+ } else {
+ /* it's the shut request that was queued */
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+ tasklet_wakeup(fstrm->shut_tl);
+ }
+ }
+ }
+ /* We're done, no more to send */
+ if (!br_data(fconn->mbuf)) {
+ TRACE_DEVEL("leaving with everything sent", FCGI_EV_FCONN_SEND, conn);
+ return sent;
+ }
+schedule:
+ if (!(conn->flags & CO_FL_ERROR) && !(fconn->wait_event.events & SUB_RETRY_SEND)) {
+ TRACE_STATE("more data to send, subscribing", FCGI_EV_FCONN_SEND, conn);
+ conn->xprt->subscribe(conn, conn->xprt_ctx, SUB_RETRY_SEND, &fconn->wait_event);
+ }
+
+ TRACE_DEVEL("leaving with some data left to send", FCGI_EV_FCONN_SEND, conn);
+ return sent;
+}
+
+/* this is the tasklet referenced in fconn->wait_event.tasklet */
+struct task *fcgi_io_cb(struct task *t, void *ctx, unsigned int state)
+{
+ struct connection *conn;
+ struct fcgi_conn *fconn = ctx;
+ struct tasklet *tl = (struct tasklet *)t;
+ int conn_in_list;
+ int ret = 0;
+
+ if (state & TASK_F_USR1) {
+ /* the tasklet was idling on an idle connection, it might have
+ * been stolen, let's be careful!
+ */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ if (tl->context == NULL) {
+ /* The connection has been taken over by another thread,
+ * we're no longer responsible for it, so just free the
+ * tasklet, and do nothing.
+ */
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ tasklet_free(tl);
+ return NULL;
+ }
+ conn = fconn->conn;
+ TRACE_POINT(FCGI_EV_FCONN_WAKE, conn);
+
+ conn_in_list = conn_get_idle_flag(conn);
+ if (conn_in_list)
+ conn_delete_from_tree(&conn->hash_node->node);
+
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ } else {
+ /* we're certain the connection was not in an idle list */
+ conn = fconn->conn;
+ TRACE_ENTER(FCGI_EV_FCONN_WAKE, conn);
+ conn_in_list = 0;
+ }
+
+ if (!(fconn->wait_event.events & SUB_RETRY_SEND))
+ ret = fcgi_send(fconn);
+ if (!(fconn->wait_event.events & SUB_RETRY_RECV))
+ ret |= fcgi_recv(fconn);
+ if (ret || b_data(&fconn->dbuf))
+ ret = fcgi_process(fconn);
+
+ /* If we were in an idle list, we want to add it back into it,
+ * unless fcgi_process() returned -1, which mean it has destroyed
+ * the connection (testing !ret is enough, if fcgi_process() wasn't
+ * called then ret will be 0 anyway.
+ */
+ if (ret < 0)
+ t = NULL;
+
+ if (!ret && conn_in_list) {
+ struct server *srv = objt_server(conn->target);
+
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ if (conn_in_list == CO_FL_SAFE_LIST)
+ eb64_insert(&srv->per_thr[tid].safe_conns, &conn->hash_node->node);
+ else
+ eb64_insert(&srv->per_thr[tid].idle_conns, &conn->hash_node->node);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+ return t;
+}
+
+/* callback called on any event by the connection handler.
+ * It applies changes and returns zero, or < 0 if it wants immediate
+ * destruction of the connection (which normally doesn not happen in FCGI).
+ */
+static int fcgi_process(struct fcgi_conn *fconn)
+{
+ struct connection *conn = fconn->conn;
+
+ TRACE_POINT(FCGI_EV_FCONN_WAKE, conn);
+
+ if (b_data(&fconn->dbuf) && !(fconn->flags & FCGI_CF_DEM_BLOCK_ANY)) {
+ fcgi_process_demux(fconn);
+
+ if (fconn->state == FCGI_CS_CLOSED || conn->flags & CO_FL_ERROR)
+ b_reset(&fconn->dbuf);
+
+ if (buf_room_for_htx_data(&fconn->dbuf))
+ fconn->flags &= ~FCGI_CF_DEM_DFULL;
+ }
+ fcgi_send(fconn);
+
+ if (unlikely(fconn->proxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ /* frontend is stopping, reload likely in progress, let's try
+ * to announce a graceful shutdown if not yet done. We don't
+ * care if it fails, it will be tried again later.
+ */
+ TRACE_STATE("proxy stopped, sending ABORT to all streams", FCGI_EV_FCONN_WAKE|FCGI_EV_TX_RECORD, conn);
+ if (!(fconn->flags & (FCGI_CF_ABRTS_SENT|FCGI_CF_ABRTS_FAILED))) {
+ if (fconn->stream_cnt - fconn->nb_reserved > 0)
+ fcgi_conn_send_aborts(fconn);
+ }
+ }
+
+ /*
+ * If we received early data, and the handshake is done, wake
+ * any stream that was waiting for it.
+ */
+ if (!(fconn->flags & FCGI_CF_WAIT_FOR_HS) &&
+ (conn->flags & (CO_FL_EARLY_SSL_HS | CO_FL_WAIT_XPRT | CO_FL_EARLY_DATA)) == CO_FL_EARLY_DATA) {
+ struct eb32_node *node;
+ struct fcgi_strm *fstrm;
+
+ fconn->flags |= FCGI_CF_WAIT_FOR_HS;
+ node = eb32_lookup_ge(&fconn->streams_by_id, 1);
+
+ while (node) {
+ fstrm = container_of(node, struct fcgi_strm, by_id);
+ if (fcgi_strm_sc(fstrm) && se_fl_test(fstrm->sd, SE_FL_WAIT_FOR_HS))
+ fcgi_strm_notify_recv(fstrm);
+ node = eb32_next(node);
+ }
+ }
+
+ if ((conn->flags & CO_FL_ERROR) || fcgi_conn_read0_pending(fconn) ||
+ fconn->state == FCGI_CS_CLOSED || (fconn->flags & FCGI_CF_ABRTS_FAILED) ||
+ eb_is_empty(&fconn->streams_by_id)) {
+ fcgi_wake_some_streams(fconn, 0);
+
+ if (eb_is_empty(&fconn->streams_by_id)) {
+ /* no more stream, kill the connection now */
+ fcgi_release(fconn);
+ TRACE_DEVEL("leaving after releasing the connection", FCGI_EV_FCONN_WAKE);
+ return -1;
+ }
+ }
+
+ if (!b_data(&fconn->dbuf))
+ fcgi_release_buf(fconn, &fconn->dbuf);
+
+ if ((conn->flags & CO_FL_SOCK_WR_SH) ||
+ fconn->state == FCGI_CS_CLOSED || (fconn->flags & FCGI_CF_ABRTS_FAILED) ||
+ (!br_data(fconn->mbuf) && ((fconn->flags & FCGI_CF_MUX_BLOCK_ANY) || LIST_ISEMPTY(&fconn->send_list))))
+ fcgi_release_mbuf(fconn);
+
+ if (fconn->task) {
+ fconn->task->expire = tick_add(now_ms, (fconn->state == FCGI_CS_CLOSED ? fconn->shut_timeout : fconn->timeout));
+ task_queue(fconn->task);
+ }
+
+ fcgi_send(fconn);
+ TRACE_LEAVE(FCGI_EV_FCONN_WAKE, conn);
+ return 0;
+}
+
+
+/* wake-up function called by the connection layer (mux_ops.wake) */
+static int fcgi_wake(struct connection *conn)
+{
+ struct fcgi_conn *fconn = conn->ctx;
+
+ TRACE_POINT(FCGI_EV_FCONN_WAKE, conn);
+ return (fcgi_process(fconn));
+}
+
+
+static int fcgi_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *output)
+{
+ int ret = 0;
+ switch (mux_ctl) {
+ case MUX_STATUS:
+ if (!(conn->flags & CO_FL_WAIT_XPRT))
+ ret |= MUX_STATUS_READY;
+ return ret;
+ case MUX_EXIT_STATUS:
+ return MUX_ES_UNKNOWN;
+ default:
+ return -1;
+ }
+}
+
+/* Connection timeout management. The principle is that if there's no receipt
+ * nor sending for a certain amount of time, the connection is closed. If the
+ * MUX buffer still has lying data or is not allocatable, the connection is
+ * immediately killed. If it's allocatable and empty, we attempt to send a
+ * ABORT records.
+ */
+struct task *fcgi_timeout_task(struct task *t, void *context, unsigned int state)
+{
+ struct fcgi_conn *fconn = context;
+ int expired = tick_is_expired(t->expire, now_ms);
+
+ TRACE_ENTER(FCGI_EV_FCONN_WAKE, (fconn ? fconn->conn : NULL));
+
+ if (fconn) {
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+
+ /* Somebody already stole the connection from us, so we should not
+ * free it, we just have to free the task.
+ */
+ if (!t->context) {
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ fconn = NULL;
+ goto do_leave;
+ }
+
+ if (!expired) {
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ TRACE_DEVEL("leaving (not expired)", FCGI_EV_FCONN_WAKE, fconn->conn);
+ return t;
+ }
+
+ /* We're about to destroy the connection, so make sure nobody attempts
+ * to steal it from us.
+ */
+ if (fconn->conn->flags & CO_FL_LIST_MASK)
+ conn_delete_from_tree(&fconn->conn->hash_node->node);
+
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+do_leave:
+ task_destroy(t);
+
+ if (!fconn) {
+ /* resources were already deleted */
+ TRACE_DEVEL("leaving (not more fconn)", FCGI_EV_FCONN_WAKE);
+ return NULL;
+ }
+
+ fconn->task = NULL;
+ fconn->state = FCGI_CS_CLOSED;
+ fcgi_wake_some_streams(fconn, 0);
+
+ if (br_data(fconn->mbuf)) {
+ /* don't even try to send aborts, the buffer is stuck */
+ fconn->flags |= FCGI_CF_ABRTS_FAILED;
+ goto end;
+ }
+
+ /* try to send but no need to insist */
+ if (!fcgi_conn_send_aborts(fconn))
+ fconn->flags |= FCGI_CF_ABRTS_FAILED;
+
+ if (br_data(fconn->mbuf) && !(fconn->flags & FCGI_CF_ABRTS_FAILED) &&
+ conn_xprt_ready(fconn->conn)) {
+ unsigned int released = 0;
+ struct buffer *buf;
+
+ for (buf = br_head(fconn->mbuf); b_size(buf); buf = br_del_head(fconn->mbuf)) {
+ if (b_data(buf)) {
+ int ret = fconn->conn->xprt->snd_buf(fconn->conn, fconn->conn->xprt_ctx,
+ buf, b_data(buf), 0);
+ if (!ret)
+ break;
+ b_del(buf, ret);
+ if (b_data(buf))
+ break;
+ b_free(buf);
+ released++;
+ }
+ }
+
+ if (released)
+ offer_buffers(NULL, released);
+ }
+
+ end:
+ /* either we can release everything now or it will be done later once
+ * the last stream closes.
+ */
+ if (eb_is_empty(&fconn->streams_by_id))
+ fcgi_release(fconn);
+
+ TRACE_LEAVE(FCGI_EV_FCONN_WAKE);
+ return NULL;
+}
+
+
+/*******************************************/
+/* functions below are used by the streams */
+/*******************************************/
+
+/* Append the description of what is present in error snapshot <es> into <out>.
+ * The description must be small enough to always fit in a buffer. The output
+ * buffer may be the trash so the trash must not be used inside this function.
+ */
+static void fcgi_show_error_snapshot(struct buffer *out, const struct error_snapshot *es)
+{
+ chunk_appendf(out,
+ " FCGI connection flags 0x%08x, FCGI stream flags 0x%08x\n"
+ " H1 msg state %s(%d), H1 msg flags 0x%08x\n"
+ " H1 chunk len %lld bytes, H1 body len %lld bytes :\n",
+ es->ctx.h1.c_flags, es->ctx.h1.s_flags,
+ h1m_state_str(es->ctx.h1.state), es->ctx.h1.state,
+ es->ctx.h1.m_flags, es->ctx.h1.m_clen, es->ctx.h1.m_blen);
+}
+/*
+ * Capture a bad response and archive it in the proxy's structure. By default
+ * it tries to report the error position as h1m->err_pos. However if this one is
+ * not set, it will then report h1m->next, which is the last known parsing
+ * point. The function is able to deal with wrapping buffers. It always displays
+ * buffers as a contiguous area starting at buf->p. The direction is determined
+ * thanks to the h1m's flags.
+ */
+static void fcgi_strm_capture_bad_message(struct fcgi_conn *fconn, struct fcgi_strm *fstrm,
+ struct h1m *h1m, struct buffer *buf)
+{
+ struct session *sess = fstrm->sess;
+ struct proxy *proxy = fconn->proxy;
+ struct proxy *other_end;
+ union error_snapshot_ctx ctx;
+
+ if (fcgi_strm_sc(fstrm) && sc_strm(fcgi_strm_sc(fstrm))) {
+ if (sess == NULL)
+ sess = __sc_strm(fcgi_strm_sc(fstrm))->sess;
+ if (!(h1m->flags & H1_MF_RESP))
+ other_end = __sc_strm(fcgi_strm_sc(fstrm))->be;
+ else
+ other_end = sess->fe;
+ } else
+ other_end = NULL;
+ /* http-specific part now */
+ ctx.h1.state = h1m->state;
+ ctx.h1.c_flags = fconn->flags;
+ ctx.h1.s_flags = fstrm->flags;
+ ctx.h1.m_flags = h1m->flags;
+ ctx.h1.m_clen = h1m->curr_len;
+ ctx.h1.m_blen = h1m->body_len;
+
+ proxy_capture_error(proxy, 1, other_end, fconn->conn->target, sess, buf, 0, 0,
+ (h1m->err_pos >= 0) ? h1m->err_pos : h1m->next,
+ &ctx, fcgi_show_error_snapshot);
+}
+
+static size_t fcgi_strm_parse_headers(struct fcgi_strm *fstrm, struct h1m *h1m, struct htx *htx,
+ struct buffer *buf, size_t *ofs, size_t max)
+{
+ int ret;
+
+ TRACE_ENTER(FCGI_EV_RSP_DATA|FCGI_EV_RSP_HDRS, fstrm->fconn->conn, fstrm, 0, (size_t[]){max});
+ ret = h1_parse_msg_hdrs(h1m, NULL, htx, buf, *ofs, max);
+ if (ret <= 0) {
+ TRACE_DEVEL("leaving on missing data or error", FCGI_EV_RSP_DATA|FCGI_EV_RSP_HDRS, fstrm->fconn->conn, fstrm);
+ if (htx->flags & HTX_FL_PARSING_ERROR) {
+ TRACE_ERROR("parsing error, reject H1 response", FCGI_EV_RSP_DATA|FCGI_EV_RSP_HDRS|FCGI_EV_FSTRM_ERR, fstrm->fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ fcgi_strm_capture_bad_message(fstrm->fconn, fstrm, h1m, buf);
+ }
+ ret = 0;
+ goto end;
+ }
+
+ /* Reject any message with an unknown transfer-encoding. In fact if any
+ * encoding other than "chunked". A 422-Unprocessable-Content is
+ * returned for an invalid request, a 502-Bad-Gateway for an invalid
+ * response.
+ */
+ if (h1m->flags & H1_MF_TE_OTHER) {
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ TRACE_ERROR("Unknown transfer-encoding", FCGI_EV_RSP_DATA|FCGI_EV_RSP_HDRS|FCGI_EV_FSTRM_ERR, fstrm->fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ fcgi_strm_capture_bad_message(fstrm->fconn, fstrm, h1m, buf);
+ ret = 0;
+ goto end;
+ }
+
+ *ofs += ret;
+ end:
+ TRACE_LEAVE(FCGI_EV_RSP_DATA|FCGI_EV_RSP_HDRS, fstrm->fconn->conn, fstrm, 0, (size_t[]){ret});
+ return ret;
+
+}
+
+static size_t fcgi_strm_parse_data(struct fcgi_strm *fstrm, struct h1m *h1m, struct htx **htx,
+ struct buffer *buf, size_t *ofs, size_t max, struct buffer *htxbuf)
+{
+ size_t ret;
+
+ TRACE_ENTER(FCGI_EV_RSP_DATA|FCGI_EV_RSP_BODY, fstrm->fconn->conn, fstrm, 0, (size_t[]){max});
+ ret = h1_parse_msg_data(h1m, htx, buf, *ofs, max, htxbuf);
+ if (!ret) {
+ TRACE_DEVEL("leaving on missing data or error", FCGI_EV_RSP_DATA|FCGI_EV_RSP_BODY, fstrm->fconn->conn, fstrm);
+ if ((*htx)->flags & HTX_FL_PARSING_ERROR) {
+ TRACE_ERROR("parsing error, reject H1 response", FCGI_EV_RSP_DATA|FCGI_EV_RSP_BODY|FCGI_EV_FSTRM_ERR, fstrm->fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ fcgi_strm_capture_bad_message(fstrm->fconn, fstrm, h1m, buf);
+ }
+ goto end;
+ }
+ *ofs += ret;
+ end:
+ TRACE_LEAVE(FCGI_EV_RSP_DATA|FCGI_EV_RSP_BODY, fstrm->fconn->conn, fstrm, 0, (size_t[]){ret});
+ return ret;
+}
+
+static size_t fcgi_strm_parse_trailers(struct fcgi_strm *fstrm, struct h1m *h1m, struct htx *htx,
+ struct buffer *buf, size_t *ofs, size_t max)
+{
+ int ret;
+
+ TRACE_ENTER(FCGI_EV_RSP_DATA|FCGI_EV_RSP_TLRS, fstrm->fconn->conn, fstrm, 0, (size_t[]){max});
+ ret = h1_parse_msg_tlrs(h1m, htx, buf, *ofs, max);
+ if (ret <= 0) {
+ TRACE_DEVEL("leaving on missing data or error", FCGI_EV_RSP_DATA|FCGI_EV_RSP_TLRS, fstrm->fconn->conn, fstrm);
+ if (htx->flags & HTX_FL_PARSING_ERROR) {
+ TRACE_ERROR("parsing error, reject H1 response", FCGI_EV_RSP_DATA|FCGI_EV_RSP_TLRS|FCGI_EV_FSTRM_ERR, fstrm->fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ fcgi_strm_capture_bad_message(fstrm->fconn, fstrm, h1m, buf);
+ }
+ ret = 0;
+ goto end;
+ }
+ *ofs += ret;
+ end:
+ TRACE_LEAVE(FCGI_EV_RSP_DATA|FCGI_EV_RSP_TLRS, fstrm->fconn->conn, fstrm, 0, (size_t[]){ret});
+ return ret;
+}
+
+static size_t fcgi_strm_parse_response(struct fcgi_strm *fstrm, struct buffer *buf, size_t count)
+{
+ struct fcgi_conn *fconn = fstrm->fconn;
+ struct htx *htx;
+ struct h1m *h1m = &fstrm->h1m;
+ size_t ret, data, total = 0;
+
+ htx = htx_from_buf(buf);
+ TRACE_ENTER(FCGI_EV_RSP_DATA, fconn->conn, fstrm, htx, (size_t[]){count});
+
+ data = htx->data;
+ if (fstrm->state == FCGI_SS_ERROR)
+ goto end;
+
+ do {
+ size_t used = htx_used_space(htx);
+
+ if (h1m->state <= H1_MSG_LAST_LF) {
+ TRACE_PROTO("parsing response headers", FCGI_EV_RSP_DATA|FCGI_EV_RSP_HDRS, fconn->conn, fstrm);
+ ret = fcgi_strm_parse_headers(fstrm, h1m, htx, &fstrm->rxbuf, &total, count);
+ if (!ret)
+ break;
+
+ TRACE_USER("rcvd H1 response headers", FCGI_EV_RSP_DATA|FCGI_EV_RSP_HDRS, fconn->conn, fstrm, htx);
+
+ if ((h1m->flags & (H1_MF_VER_11|H1_MF_XFER_LEN)) == H1_MF_VER_11) {
+ struct htx_blk *blk = htx_get_head_blk(htx);
+ struct htx_sl *sl;
+
+ if (!blk)
+ break;
+ sl = htx_get_blk_ptr(htx, blk);
+ sl->flags |= HTX_SL_F_XFER_LEN;
+ htx->extra = 0;
+ }
+ }
+ else if (h1m->state < H1_MSG_TRAILERS) {
+ TRACE_PROTO("parsing response payload", FCGI_EV_RSP_DATA|FCGI_EV_RSP_BODY, fconn->conn, fstrm);
+ fcgi_strm_parse_data(fstrm, h1m, &htx, &fstrm->rxbuf, &total, count, buf);
+
+ if (!(h1m->flags & H1_MF_XFER_LEN) && fstrm->state != FCGI_SS_ERROR &&
+ (fstrm->flags & FCGI_SF_ES_RCVD) && b_data(&fstrm->rxbuf) == total) {
+ TRACE_DEVEL("end of data", FCGI_EV_RSP_DATA, fconn->conn, fstrm);
+ if (htx_is_empty(htx) && !htx_add_endof(htx, HTX_BLK_EOT))
+ break;
+ htx->flags |= HTX_FL_EOM;
+ h1m->state = H1_MSG_DONE;
+ TRACE_USER("H1 response fully rcvd", FCGI_EV_RSP_DATA|FCGI_EV_RSP_EOM, fconn->conn, fstrm, htx);
+ }
+
+ if (h1m->state < H1_MSG_TRAILERS)
+ break;
+
+ TRACE_PROTO("rcvd response payload data", FCGI_EV_RSP_DATA|FCGI_EV_RSP_BODY, fconn->conn, fstrm, htx);
+ }
+ else if (h1m->state == H1_MSG_TRAILERS) {
+ TRACE_PROTO("parsing response trailers", FCGI_EV_RSP_DATA|FCGI_EV_RSP_TLRS, fconn->conn, fstrm);
+ fcgi_strm_parse_trailers(fstrm, h1m, htx, &fstrm->rxbuf, &total, count);
+ if (h1m->state != H1_MSG_DONE)
+ break;
+
+ TRACE_PROTO("rcvd H1 response trailers", FCGI_EV_RSP_DATA|FCGI_EV_RSP_TLRS, fconn->conn, fstrm, htx);
+ }
+ else if (h1m->state == H1_MSG_DONE) {
+ TRACE_USER("H1 response fully rcvd", FCGI_EV_RSP_DATA|FCGI_EV_RSP_EOM, fconn->conn, fstrm, htx);
+ if (b_data(&fstrm->rxbuf) > total) {
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ TRACE_PROTO("too much data, parsing error", FCGI_EV_RSP_DATA, fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ }
+ break;
+ }
+ else {
+ htx->flags |= HTX_FL_PROCESSING_ERROR;
+ TRACE_ERROR("unexpected processing error", FCGI_EV_RSP_DATA|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ break;
+ }
+
+ count -= htx_used_space(htx) - used;
+ } while (fstrm->state != FCGI_SS_ERROR);
+
+ if (fstrm->state == FCGI_SS_ERROR) {
+ b_reset(&fstrm->rxbuf);
+ htx_to_buf(htx, buf);
+ TRACE_DEVEL("leaving on error", FCGI_EV_RSP_DATA|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ return 0;
+ }
+
+ b_del(&fstrm->rxbuf, total);
+
+ end:
+ htx_to_buf(htx, buf);
+ ret = htx->data - data;
+ TRACE_LEAVE(FCGI_EV_RSP_DATA, fconn->conn, fstrm, htx, (size_t[]){ret});
+ return ret;
+}
+
+/*
+ * Attach a new stream to a connection
+ * (Used for outgoing connections)
+ */
+static int fcgi_attach(struct connection *conn, struct sedesc *sd, struct session *sess)
+{
+ struct fcgi_strm *fstrm;
+ struct fcgi_conn *fconn = conn->ctx;
+
+ TRACE_ENTER(FCGI_EV_FSTRM_NEW, conn);
+ fstrm = fcgi_stconn_new(fconn, sd->sc, sess);
+ if (!fstrm)
+ goto err;
+
+ /* the connection is not idle anymore, let's mark this */
+ HA_ATOMIC_AND(&fconn->wait_event.tasklet->state, ~TASK_F_USR1);
+ xprt_set_used(conn, conn->xprt, conn->xprt_ctx);
+
+ TRACE_LEAVE(FCGI_EV_FSTRM_NEW, conn, fstrm);
+ return 0;
+
+ err:
+ TRACE_DEVEL("leaving on error", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_ERR, conn);
+ return -1;
+}
+
+/* Retrieves the first valid stream connector from this connection, or returns NULL.
+ * We have to scan because we may have some orphan streams. It might be
+ * beneficial to scan backwards from the end to reduce the likeliness to find
+ * orphans.
+ */
+static struct stconn *fcgi_get_first_sc(const struct connection *conn)
+{
+ struct fcgi_conn *fconn = conn->ctx;
+ struct fcgi_strm *fstrm;
+ struct eb32_node *node;
+
+ node = eb32_first(&fconn->streams_by_id);
+ while (node) {
+ fstrm = container_of(node, struct fcgi_strm, by_id);
+ if (fcgi_strm_sc(fstrm))
+ return fcgi_strm_sc(fstrm);
+ node = eb32_next(node);
+ }
+ return NULL;
+}
+
+/*
+ * Destroy the mux and the associated connection, if it is no longer used
+ */
+static void fcgi_destroy(void *ctx)
+{
+ struct fcgi_conn *fconn = ctx;
+
+ TRACE_POINT(FCGI_EV_FCONN_END, fconn->conn);
+ if (eb_is_empty(&fconn->streams_by_id)) {
+ BUG_ON(fconn->conn->ctx != fconn);
+ fcgi_release(fconn);
+ }
+}
+
+/*
+ * Detach the stream from the connection and possibly release the connection.
+ */
+static void fcgi_detach(struct sedesc *sd)
+{
+ struct fcgi_strm *fstrm = sd->se;
+ struct fcgi_conn *fconn;
+ struct session *sess;
+
+ TRACE_ENTER(FCGI_EV_STRM_END, (fstrm ? fstrm->fconn->conn : NULL), fstrm);
+
+ if (!fstrm) {
+ TRACE_LEAVE(FCGI_EV_STRM_END);
+ return;
+ }
+
+ /* there's no txbuf so we're certain no to be able to send anything */
+ fstrm->flags &= ~FCGI_SF_NOTIFIED;
+
+ sess = fstrm->sess;
+ fconn = fstrm->fconn;
+ fconn->nb_sc--;
+
+ if (fstrm->proto_status == FCGI_PS_CANT_MPX_CONN) {
+ fconn->flags &= ~FCGI_CF_MPXS_CONNS;
+ fconn->streams_limit = 1;
+ }
+ else if (fstrm->proto_status == FCGI_PS_OVERLOADED ||
+ fstrm->proto_status == FCGI_PS_UNKNOWN_ROLE) {
+ fconn->flags &= ~FCGI_CF_KEEP_CONN;
+ fconn->state = FCGI_CS_CLOSED;
+ }
+
+ /* this stream may be blocked waiting for some data to leave, so orphan
+ * it in this case.
+ */
+ if (!(fconn->conn->flags & CO_FL_ERROR) &&
+ (fconn->state != FCGI_CS_CLOSED) &&
+ (fstrm->flags & (FCGI_SF_BLK_MBUSY|FCGI_SF_BLK_MROOM)) &&
+ (fstrm->subs || (fstrm->flags & (FCGI_SF_WANT_SHUTR|FCGI_SF_WANT_SHUTW)))) {
+ TRACE_DEVEL("leaving on stream blocked", FCGI_EV_STRM_END|FCGI_EV_FSTRM_BLK, fconn->conn, fstrm);
+ return;
+ }
+
+ if ((fconn->flags & FCGI_CF_DEM_BLOCK_ANY && fstrm->id == fconn->dsi)) {
+ /* unblock the connection if it was blocked on this stream. */
+ fconn->flags &= ~FCGI_CF_DEM_BLOCK_ANY;
+ fcgi_conn_restart_reading(fconn, 1);
+ }
+
+ fcgi_strm_destroy(fstrm);
+
+ if (!(fconn->conn->flags & (CO_FL_ERROR|CO_FL_SOCK_RD_SH|CO_FL_SOCK_WR_SH)) &&
+ (fconn->flags & FCGI_CF_KEEP_CONN)) {
+ if (fconn->conn->flags & CO_FL_PRIVATE) {
+ /* Add the connection in the session serverlist, if not already done */
+ if (!session_add_conn(sess, fconn->conn, fconn->conn->target)) {
+ fconn->conn->owner = NULL;
+ if (eb_is_empty(&fconn->streams_by_id)) {
+ /* let's kill the connection right away */
+ fconn->conn->mux->destroy(fconn);
+ TRACE_DEVEL("outgoing connection killed", FCGI_EV_STRM_END|FCGI_EV_FCONN_ERR);
+ return;
+ }
+ }
+ if (eb_is_empty(&fconn->streams_by_id)) {
+ if (session_check_idle_conn(fconn->conn->owner, fconn->conn) != 0) {
+ /* The connection is destroyed, let's leave */
+ TRACE_DEVEL("outgoing connection killed", FCGI_EV_STRM_END|FCGI_EV_FCONN_ERR);
+ return;
+ }
+ }
+ }
+ else {
+ if (eb_is_empty(&fconn->streams_by_id)) {
+ /* If the connection is owned by the session, first remove it
+ * from its list
+ */
+ if (fconn->conn->owner) {
+ session_unown_conn(fconn->conn->owner, fconn->conn);
+ fconn->conn->owner = NULL;
+ }
+
+ /* mark that the tasklet may lose its context to another thread and
+ * that the handler needs to check it under the idle conns lock.
+ */
+ HA_ATOMIC_OR(&fconn->wait_event.tasklet->state, TASK_F_USR1);
+ xprt_set_idle(fconn->conn, fconn->conn->xprt, fconn->conn->xprt_ctx);
+
+ if (!srv_add_to_idle_list(objt_server(fconn->conn->target), fconn->conn, 1)) {
+ /* The server doesn't want it, let's kill the connection right away */
+ fconn->conn->mux->destroy(fconn);
+ TRACE_DEVEL("outgoing connection killed", FCGI_EV_STRM_END|FCGI_EV_FCONN_ERR);
+ return;
+ }
+ /* At this point, the connection has been added to the
+ * server idle list, so another thread may already have
+ * hijacked it, so we can't do anything with it.
+ */
+ TRACE_DEVEL("reusable idle connection", FCGI_EV_STRM_END, fconn->conn);
+ return;
+ }
+ else if (!fconn->conn->hash_node->node.node.leaf_p &&
+ fcgi_avail_streams(fconn->conn) > 0 && objt_server(fconn->conn->target) &&
+ !LIST_INLIST(&fconn->conn->session_list)) {
+ eb64_insert(&__objt_server(fconn->conn->target)->per_thr[tid].avail_conns,
+ &fconn->conn->hash_node->node);
+ }
+ }
+ }
+
+ /* We don't want to close right now unless we're removing the last
+ * stream and the connection is in error.
+ */
+ if (fcgi_conn_is_dead(fconn)) {
+ /* no more stream will come, kill it now */
+ TRACE_DEVEL("leaving, killing dead connection", FCGI_EV_STRM_END, fconn->conn);
+ fcgi_release(fconn);
+ }
+ else if (fconn->task) {
+ fconn->task->expire = tick_add(now_ms, (fconn->state == FCGI_CS_CLOSED ? fconn->shut_timeout : fconn->timeout));
+ task_queue(fconn->task);
+ TRACE_DEVEL("leaving, refreshing connection's timeout", FCGI_EV_STRM_END, fconn->conn);
+ }
+ else
+ TRACE_DEVEL("leaving", FCGI_EV_STRM_END, fconn->conn);
+}
+
+
+/* Performs a synchronous or asynchronous shutr(). */
+static void fcgi_do_shutr(struct fcgi_strm *fstrm)
+{
+ struct fcgi_conn *fconn = fstrm->fconn;
+
+ TRACE_ENTER(FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+
+ if (fstrm->state == FCGI_SS_CLOSED)
+ goto done;
+
+ /* a connstream may require us to immediately kill the whole connection
+ * for example because of a "tcp-request content reject" rule that is
+ * normally used to limit abuse.
+ */
+ if (se_fl_test(fstrm->sd, SE_FL_KILL_CONN) &&
+ !(fconn->flags & (FCGI_CF_ABRTS_SENT|FCGI_CF_ABRTS_FAILED))) {
+ TRACE_STATE("stream wants to kill the connection", FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+ fconn->state = FCGI_CS_CLOSED;
+ }
+ else if (fstrm->flags & FCGI_SF_BEGIN_SENT) {
+ TRACE_STATE("no headers sent yet, trying a retryable abort", FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+ if (!(fstrm->flags & (FCGI_SF_ES_SENT|FCGI_SF_ABRT_SENT)) &&
+ !fcgi_strm_send_abort(fconn, fstrm))
+ goto add_to_list;
+ }
+
+ fcgi_strm_close(fstrm);
+
+ if (!(fconn->wait_event.events & SUB_RETRY_SEND))
+ tasklet_wakeup(fconn->wait_event.tasklet);
+ done:
+ fstrm->flags &= ~FCGI_SF_WANT_SHUTR;
+ TRACE_LEAVE(FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+ return;
+
+ add_to_list:
+ /* Let the handler know we want to shutr, and add ourselves to the
+ * send list if not yet done. fcgi_deferred_shut() will be
+ * automatically called via the shut_tl tasklet when there's room
+ * again.
+ */
+ if (!LIST_INLIST(&fstrm->send_list)) {
+ if (fstrm->flags & (FCGI_SF_BLK_MBUSY|FCGI_SF_BLK_MROOM)) {
+ LIST_APPEND(&fconn->send_list, &fstrm->send_list);
+ }
+ }
+ fstrm->flags |= FCGI_SF_WANT_SHUTR;
+ TRACE_LEAVE(FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+ return;
+}
+
+/* Performs a synchronous or asynchronous shutw(). */
+static void fcgi_do_shutw(struct fcgi_strm *fstrm)
+{
+ struct fcgi_conn *fconn = fstrm->fconn;
+
+ TRACE_ENTER(FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+
+ if (fstrm->state != FCGI_SS_HLOC || fstrm->state == FCGI_SS_CLOSED)
+ goto done;
+
+ if (fstrm->state != FCGI_SS_ERROR && (fstrm->flags & FCGI_SF_BEGIN_SENT)) {
+ if (!(fstrm->flags & (FCGI_SF_ES_SENT|FCGI_SF_ABRT_SENT)) &&
+ !fcgi_strm_send_abort(fconn, fstrm))
+ goto add_to_list;
+
+ if (fstrm->state == FCGI_SS_HREM)
+ fcgi_strm_close(fstrm);
+ else
+ fstrm->state = FCGI_SS_HLOC;
+ } else {
+ /* a connstream may require us to immediately kill the whole connection
+ * for example because of a "tcp-request content reject" rule that is
+ * normally used to limit abuse.
+ */
+ if (se_fl_test(fstrm->sd, SE_FL_KILL_CONN) &&
+ !(fconn->flags & (FCGI_CF_ABRTS_SENT|FCGI_CF_ABRTS_FAILED))) {
+ TRACE_STATE("stream wants to kill the connection", FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+ fconn->state = FCGI_CS_CLOSED;
+ }
+
+ fcgi_strm_close(fstrm);
+ }
+
+ if (!(fconn->wait_event.events & SUB_RETRY_SEND))
+ tasklet_wakeup(fconn->wait_event.tasklet);
+ done:
+ fstrm->flags &= ~FCGI_SF_WANT_SHUTW;
+ TRACE_LEAVE(FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+ return;
+
+ add_to_list:
+ /* Let the handler know we want to shutr, and add ourselves to the
+ * send list if not yet done. fcgi_deferred_shut() will be
+ * automatically called via the shut_tl tasklet when there's room
+ * again.
+ */
+ if (!LIST_INLIST(&fstrm->send_list)) {
+ if (fstrm->flags & (FCGI_SF_BLK_MBUSY|FCGI_SF_BLK_MROOM)) {
+ LIST_APPEND(&fconn->send_list, &fstrm->send_list);
+ }
+ }
+ fstrm->flags |= FCGI_SF_WANT_SHUTW;
+ TRACE_LEAVE(FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+ return;
+}
+
+/* This is the tasklet referenced in fstrm->shut_tl, it is used for
+ * deferred shutdowns when the fcgi_detach() was done but the mux buffer was full
+ * and prevented the last record from being emitted.
+ */
+struct task *fcgi_deferred_shut(struct task *t, void *ctx, unsigned int state)
+{
+ struct fcgi_strm *fstrm = ctx;
+ struct fcgi_conn *fconn = fstrm->fconn;
+
+ TRACE_ENTER(FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+
+ if (fstrm->flags & FCGI_SF_NOTIFIED) {
+ /* some data processing remains to be done first */
+ goto end;
+ }
+
+ if (fstrm->flags & FCGI_SF_WANT_SHUTW)
+ fcgi_do_shutw(fstrm);
+
+ if (fstrm->flags & FCGI_SF_WANT_SHUTR)
+ fcgi_do_shutr(fstrm);
+
+ if (!(fstrm->flags & (FCGI_SF_WANT_SHUTR|FCGI_SF_WANT_SHUTW))) {
+ /* We're done trying to send, remove ourself from the send_list */
+ LIST_DEL_INIT(&fstrm->send_list);
+
+ if (!fcgi_strm_sc(fstrm)) {
+ fcgi_strm_destroy(fstrm);
+ if (fcgi_conn_is_dead(fconn))
+ fcgi_release(fconn);
+ }
+ }
+ end:
+ TRACE_LEAVE(FCGI_EV_STRM_SHUT);
+ return NULL;
+}
+
+/* shutr() called by the stream connector (mux_ops.shutr) */
+static void fcgi_shutr(struct stconn *sc, enum co_shr_mode mode)
+{
+ struct fcgi_strm *fstrm = __sc_mux_strm(sc);
+
+ TRACE_POINT(FCGI_EV_STRM_SHUT, fstrm->fconn->conn, fstrm);
+ if (!mode)
+ return;
+ fcgi_do_shutr(fstrm);
+}
+
+/* shutw() called by the stream connector (mux_ops.shutw) */
+static void fcgi_shutw(struct stconn *sc, enum co_shw_mode mode)
+{
+ struct fcgi_strm *fstrm = __sc_mux_strm(sc);
+
+ TRACE_POINT(FCGI_EV_STRM_SHUT, fstrm->fconn->conn, fstrm);
+ fcgi_do_shutw(fstrm);
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+static int fcgi_subscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct fcgi_strm *fstrm = __sc_mux_strm(sc);
+ struct fcgi_conn *fconn = fstrm->fconn;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(fstrm->subs && fstrm->subs != es);
+
+ es->events |= event_type;
+ fstrm->subs = es;
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("unsubscribe(recv)", FCGI_EV_STRM_RECV, fconn->conn, fstrm);
+
+ if (event_type & SUB_RETRY_SEND) {
+ TRACE_DEVEL("unsubscribe(send)", FCGI_EV_STRM_SEND, fconn->conn, fstrm);
+ if (!LIST_INLIST(&fstrm->send_list))
+ LIST_APPEND(&fconn->send_list, &fstrm->send_list);
+ }
+ return 0;
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>
+ * (undo fcgi_subscribe). The <es> pointer is not allowed to differ from the one
+ * passed to the subscribe() call. It always returns zero.
+ */
+static int fcgi_unsubscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct fcgi_strm *fstrm = __sc_mux_strm(sc);
+ struct fcgi_conn *fconn = fstrm->fconn;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(fstrm->subs && fstrm->subs != es);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ fstrm->subs = NULL;
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("subscribe(recv)", FCGI_EV_STRM_RECV, fconn->conn, fstrm);
+
+ if (event_type & SUB_RETRY_SEND) {
+ TRACE_DEVEL("subscribe(send)", FCGI_EV_STRM_SEND, fconn->conn, fstrm);
+ fstrm->flags &= ~FCGI_SF_NOTIFIED;
+ if (!(fstrm->flags & (FCGI_SF_WANT_SHUTR|FCGI_SF_WANT_SHUTW)))
+ LIST_DEL_INIT(&fstrm->send_list);
+ }
+ return 0;
+}
+
+/* Called from the upper layer, to receive data
+ *
+ * The caller is responsible for defragmenting <buf> if necessary. But <flags>
+ * must be tested to know the calling context. If CO_RFL_BUF_FLUSH is set, it
+ * means the caller wants to flush input data (from the mux buffer and the
+ * channel buffer) to be able to use kernel splicing or any kind of mux-to-mux
+ * xfer. If CO_RFL_KEEP_RECV is set, the mux must always subscribe for read
+ * events before giving back. CO_RFL_BUF_WET is set if <buf> is congested with
+ * data scheduled for leaving soon. CO_RFL_BUF_NOT_STUCK is set to instruct the
+ * mux it may optimize the data copy to <buf> if necessary. Otherwise, it should
+ * copy as much data as possible.
+ */
+static size_t fcgi_rcv_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct fcgi_strm *fstrm = __sc_mux_strm(sc);
+ struct fcgi_conn *fconn = fstrm->fconn;
+ size_t ret = 0;
+
+ TRACE_ENTER(FCGI_EV_STRM_RECV, fconn->conn, fstrm);
+
+ if (!(fconn->flags & FCGI_CF_DEM_SALLOC))
+ ret = fcgi_strm_parse_response(fstrm, buf, count);
+ else
+ TRACE_STATE("fstrm rxbuf not allocated", FCGI_EV_STRM_RECV|FCGI_EV_FSTRM_BLK, fconn->conn, fstrm);
+
+ if (b_data(&fstrm->rxbuf))
+ se_fl_set(fstrm->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ else {
+ se_fl_clr(fstrm->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ if (fstrm->state == FCGI_SS_ERROR || (fstrm->h1m.state == H1_MSG_DONE)) {
+ se_fl_set(fstrm->sd, SE_FL_EOI);
+ if (!(fstrm->h1m.flags & (H1_MF_VER_11|H1_MF_XFER_LEN)))
+ se_fl_set(fstrm->sd, SE_FL_EOS);
+ }
+ if (fcgi_conn_read0_pending(fconn))
+ se_fl_set(fstrm->sd, SE_FL_EOS);
+ if (se_fl_test(fstrm->sd, SE_FL_ERR_PENDING))
+ se_fl_set(fstrm->sd, SE_FL_ERROR);
+ fcgi_release_buf(fconn, &fstrm->rxbuf);
+ }
+
+ if (ret && fconn->dsi == fstrm->id) {
+ /* demux is blocking on this stream's buffer */
+ fconn->flags &= ~FCGI_CF_DEM_SFULL;
+ fcgi_conn_restart_reading(fconn, 1);
+ }
+
+ TRACE_LEAVE(FCGI_EV_STRM_RECV, fconn->conn, fstrm);
+ return ret;
+}
+
+
+/* Called from the upper layer, to send data from buffer <buf> for no more than
+ * <count> bytes. Returns the number of bytes effectively sent. Some status
+ * flags may be updated on the stream connector.
+ */
+static size_t fcgi_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct fcgi_strm *fstrm = __sc_mux_strm(sc);
+ struct fcgi_conn *fconn = fstrm->fconn;
+ size_t total = 0;
+ size_t ret;
+ struct htx *htx = NULL;
+ struct htx_sl *sl;
+ struct htx_blk *blk;
+ uint32_t bsize;
+
+ TRACE_ENTER(FCGI_EV_STRM_SEND, fconn->conn, fstrm, 0, (size_t[]){count});
+
+ /* If we were not just woken because we wanted to send but couldn't,
+ * and there's somebody else that is waiting to send, do nothing,
+ * we will subscribe later and be put at the end of the list
+ */
+ if (!(fstrm->flags & FCGI_SF_NOTIFIED) && !LIST_ISEMPTY(&fconn->send_list)) {
+ TRACE_STATE("other streams already waiting, going to the queue and leaving", FCGI_EV_STRM_SEND|FCGI_EV_FSTRM_BLK, fconn->conn, fstrm);
+ return 0;
+ }
+ fstrm->flags &= ~FCGI_SF_NOTIFIED;
+
+ if (fconn->state < FCGI_CS_RECORD_H) {
+ TRACE_STATE("connection not ready, leaving", FCGI_EV_STRM_SEND|FCGI_EV_FSTRM_BLK, fconn->conn, fstrm);
+ return 0;
+ }
+
+ htx = htxbuf(buf);
+ if (fstrm->id == 0) {
+ int32_t id = fcgi_conn_get_next_sid(fconn);
+
+ if (id < 0) {
+ fcgi_strm_close(fstrm);
+ se_fl_set(fstrm->sd, SE_FL_ERROR);
+ TRACE_DEVEL("couldn't get a stream ID, leaving in error", FCGI_EV_STRM_SEND|FCGI_EV_FSTRM_ERR|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ return 0;
+ }
+
+ eb32_delete(&fstrm->by_id);
+ fstrm->by_id.key = fstrm->id = id;
+ fconn->max_id = id;
+ fconn->nb_reserved--;
+ eb32_insert(&fconn->streams_by_id, &fstrm->by_id);
+
+
+ /* Check if length of the body is known or if the message is
+ * full. Otherwise, the request is invalid.
+ */
+ sl = http_get_stline(htx);
+ if (!sl || (!(sl->flags & HTX_SL_F_CLEN) && !(htx->flags & HTX_FL_EOM))) {
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ fcgi_strm_error(fstrm);
+ goto done;
+ }
+ }
+
+ if (!(fstrm->flags & FCGI_SF_BEGIN_SENT)) {
+ TRACE_PROTO("sending FCGI BEGIN_REQUEST record", FCGI_EV_TX_RECORD|FCGI_EV_TX_BEGREQ, fconn->conn, fstrm);
+ if (!fcgi_strm_send_begin_request(fconn, fstrm))
+ goto done;
+ }
+
+ if (!(fstrm->flags & FCGI_SF_OUTGOING_DATA) && count)
+ fstrm->flags |= FCGI_SF_OUTGOING_DATA;
+
+ while (fstrm->state < FCGI_SS_HLOC && !(fstrm->flags & FCGI_SF_BLK_ANY) &&
+ count && !htx_is_empty(htx)) {
+ blk = htx_get_head_blk(htx);
+ ALREADY_CHECKED(blk);
+ bsize = htx_get_blksz(blk);
+
+ switch (htx_get_blk_type(blk)) {
+ case HTX_BLK_REQ_SL:
+ case HTX_BLK_HDR:
+ TRACE_USER("sending FCGI PARAMS record", FCGI_EV_TX_RECORD|FCGI_EV_TX_PARAMS, fconn->conn, fstrm, htx);
+ ret = fcgi_strm_send_params(fconn, fstrm, htx);
+ if (!ret) {
+ goto done;
+ }
+ total += ret;
+ count -= ret;
+ break;
+
+ case HTX_BLK_EOH:
+ if (!(fstrm->flags & FCGI_SF_EP_SENT)) {
+ TRACE_PROTO("sending FCGI PARAMS record", FCGI_EV_TX_RECORD|FCGI_EV_TX_PARAMS, fconn->conn, fstrm, htx);
+ ret = fcgi_strm_send_empty_params(fconn, fstrm);
+ if (!ret)
+ goto done;
+ }
+ if (htx_is_unique_blk(htx, blk) && (htx->flags & HTX_FL_EOM)) {
+ TRACE_PROTO("sending FCGI STDIN record", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, htx);
+ ret = fcgi_strm_send_empty_stdin(fconn, fstrm);
+ if (!ret)
+ goto done;
+ }
+ goto remove_blk;
+
+ case HTX_BLK_DATA:
+ TRACE_PROTO("sending FCGI STDIN record", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, htx);
+ ret = fcgi_strm_send_stdin(fconn, fstrm, htx, count, buf);
+ if (ret > 0) {
+ htx = htx_from_buf(buf);
+ total += ret;
+ count -= ret;
+ if (ret < bsize)
+ goto done;
+ }
+ break;
+
+ default:
+ remove_blk:
+ htx_remove_blk(htx, blk);
+ total += bsize;
+ count -= bsize;
+ break;
+ }
+ }
+
+ done:
+ if (fstrm->state >= FCGI_SS_HLOC) {
+ /* trim any possibly pending data after we close (extra CR-LF,
+ * unprocessed trailers, abnormal extra data, ...)
+ */
+ total += count;
+ count = 0;
+ }
+
+ if (fstrm->state == FCGI_SS_ERROR) {
+ TRACE_DEVEL("reporting error to the app-layer stream", FCGI_EV_STRM_SEND|FCGI_EV_FSTRM_ERR|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ se_fl_set_error(fstrm->sd);
+ if (!(fstrm->flags & FCGI_SF_BEGIN_SENT) || fcgi_strm_send_abort(fconn, fstrm))
+ fcgi_strm_close(fstrm);
+ }
+
+ if (htx)
+ htx_to_buf(htx, buf);
+
+ if (total > 0) {
+ if (!(fconn->wait_event.events & SUB_RETRY_SEND)) {
+ TRACE_DEVEL("data queued, waking up fconn sender", FCGI_EV_STRM_SEND|FCGI_EV_FCONN_SEND|FCGI_EV_FCONN_WAKE, fconn->conn, fstrm);
+ tasklet_wakeup(fconn->wait_event.tasklet);
+ }
+
+ /* Ok we managed to send something, leave the send_list */
+ if (!(fstrm->flags & (FCGI_SF_WANT_SHUTR|FCGI_SF_WANT_SHUTW)))
+ LIST_DEL_INIT(&fstrm->send_list);
+ }
+
+ TRACE_LEAVE(FCGI_EV_STRM_SEND, fconn->conn, fstrm, htx, (size_t[]){total});
+ return total;
+}
+
+/* for debugging with CLI's "show fd" command */
+static int fcgi_show_fd(struct buffer *msg, struct connection *conn)
+{
+ struct fcgi_conn *fconn = conn->ctx;
+ struct fcgi_strm *fstrm = NULL;
+ struct eb32_node *node;
+ int send_cnt = 0;
+ int tree_cnt = 0;
+ int orph_cnt = 0;
+ struct buffer *hmbuf, *tmbuf;
+
+ if (!fconn)
+ return 0;
+
+ list_for_each_entry(fstrm, &fconn->send_list, send_list)
+ send_cnt++;
+
+ fstrm = NULL;
+ node = eb32_first(&fconn->streams_by_id);
+ while (node) {
+ fstrm = container_of(node, struct fcgi_strm, by_id);
+ tree_cnt++;
+ if (!fcgi_strm_sc(fstrm))
+ orph_cnt++;
+ node = eb32_next(node);
+ }
+
+ hmbuf = br_head(fconn->mbuf);
+ tmbuf = br_tail(fconn->mbuf);
+ chunk_appendf(msg, " fconn.st0=%d .maxid=%d .flg=0x%04x .nbst=%u"
+ " .nbcs=%u .send_cnt=%d .tree_cnt=%d .orph_cnt=%d .sub=%d "
+ ".dsi=%d .dbuf=%u@%p+%u/%u .mbuf=[%u..%u|%u],h=[%u@%p+%u/%u],t=[%u@%p+%u/%u]",
+ fconn->state, fconn->max_id, fconn->flags,
+ fconn->nb_streams, fconn->nb_sc, send_cnt, tree_cnt, orph_cnt,
+ fconn->wait_event.events, fconn->dsi,
+ (unsigned int)b_data(&fconn->dbuf), b_orig(&fconn->dbuf),
+ (unsigned int)b_head_ofs(&fconn->dbuf), (unsigned int)b_size(&fconn->dbuf),
+ br_head_idx(fconn->mbuf), br_tail_idx(fconn->mbuf), br_size(fconn->mbuf),
+ (unsigned int)b_data(hmbuf), b_orig(hmbuf),
+ (unsigned int)b_head_ofs(hmbuf), (unsigned int)b_size(hmbuf),
+ (unsigned int)b_data(tmbuf), b_orig(tmbuf),
+ (unsigned int)b_head_ofs(tmbuf), (unsigned int)b_size(tmbuf));
+
+ if (fstrm) {
+ chunk_appendf(msg, " last_fstrm=%p .id=%d .flg=0x%04x .rxbuf=%u@%p+%u/%u .sc=%p",
+ fstrm, fstrm->id, fstrm->flags,
+ (unsigned int)b_data(&fstrm->rxbuf), b_orig(&fstrm->rxbuf),
+ (unsigned int)b_head_ofs(&fstrm->rxbuf), (unsigned int)b_size(&fstrm->rxbuf),
+ fcgi_strm_sc(fstrm));
+
+ chunk_appendf(msg, " .sd.flg=0x%08x", se_fl_get(fstrm->sd));
+ if (!se_fl_test(fstrm->sd, SE_FL_ORPHAN))
+ chunk_appendf(msg, " .sc.flg=0x%08x .sc.app=%p",
+ fcgi_strm_sc(fstrm)->flags, fcgi_strm_sc(fstrm)->app);
+
+ chunk_appendf(msg, " .subs=%p", fstrm->subs);
+ if (fstrm->subs) {
+ chunk_appendf(msg, "(ev=%d tl=%p", fstrm->subs->events, fstrm->subs->tasklet);
+ chunk_appendf(msg, " tl.calls=%d tl.ctx=%p tl.fct=",
+ fstrm->subs->tasklet->calls,
+ fstrm->subs->tasklet->context);
+ resolve_sym_name(msg, NULL, fstrm->subs->tasklet->process);
+ chunk_appendf(msg, ")");
+ }
+ }
+ return 0;
+}
+
+/* Migrate the the connection to the current thread.
+ * Return 0 if successful, non-zero otherwise.
+ * Expected to be called with the old thread lock held.
+ */
+static int fcgi_takeover(struct connection *conn, int orig_tid)
+{
+ struct fcgi_conn *fcgi = conn->ctx;
+ struct task *task;
+
+ if (fd_takeover(conn->handle.fd, conn) != 0)
+ return -1;
+
+ if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid) != 0) {
+ /* We failed to takeover the xprt, even if the connection may
+ * still be valid, flag it as error'd, as we have already
+ * taken over the fd, and wake the tasklet, so that it will
+ * destroy it.
+ */
+ conn->flags |= CO_FL_ERROR;
+ tasklet_wakeup_on(fcgi->wait_event.tasklet, orig_tid);
+ return -1;
+ }
+
+ if (fcgi->wait_event.events)
+ fcgi->conn->xprt->unsubscribe(fcgi->conn, fcgi->conn->xprt_ctx,
+ fcgi->wait_event.events, &fcgi->wait_event);
+ /* To let the tasklet know it should free itself, and do nothing else,
+ * set its context to NULL;
+ */
+ fcgi->wait_event.tasklet->context = NULL;
+ tasklet_wakeup_on(fcgi->wait_event.tasklet, orig_tid);
+
+ task = fcgi->task;
+ if (task) {
+ task->context = NULL;
+ fcgi->task = NULL;
+ __ha_barrier_store();
+ task_kill(task);
+
+ fcgi->task = task_new_here();
+ if (!fcgi->task) {
+ fcgi_release(fcgi);
+ return -1;
+ }
+ fcgi->task->process = fcgi_timeout_task;
+ fcgi->task->context = fcgi;
+ }
+ fcgi->wait_event.tasklet = tasklet_new();
+ if (!fcgi->wait_event.tasklet) {
+ fcgi_release(fcgi);
+ return -1;
+ }
+ fcgi->wait_event.tasklet->process = fcgi_io_cb;
+ fcgi->wait_event.tasklet->context = fcgi;
+ fcgi->conn->xprt->subscribe(fcgi->conn, fcgi->conn->xprt_ctx,
+ SUB_RETRY_RECV, &fcgi->wait_event);
+
+ return 0;
+}
+
+/****************************************/
+/* MUX initialization and instantiation */
+/****************************************/
+
+/* The mux operations */
+static const struct mux_ops mux_fcgi_ops = {
+ .init = fcgi_init,
+ .wake = fcgi_wake,
+ .attach = fcgi_attach,
+ .get_first_sc = fcgi_get_first_sc,
+ .detach = fcgi_detach,
+ .destroy = fcgi_destroy,
+ .avail_streams = fcgi_avail_streams,
+ .used_streams = fcgi_used_streams,
+ .rcv_buf = fcgi_rcv_buf,
+ .snd_buf = fcgi_snd_buf,
+ .subscribe = fcgi_subscribe,
+ .unsubscribe = fcgi_unsubscribe,
+ .shutr = fcgi_shutr,
+ .shutw = fcgi_shutw,
+ .ctl = fcgi_ctl,
+ .show_fd = fcgi_show_fd,
+ .takeover = fcgi_takeover,
+ .flags = MX_FL_HTX|MX_FL_HOL_RISK|MX_FL_NO_UPG,
+ .name = "FCGI",
+};
+
+
+/* this mux registers FCGI proto */
+static struct mux_proto_list mux_proto_fcgi =
+{ .token = IST("fcgi"), .mode = PROTO_MODE_HTTP, .side = PROTO_SIDE_BE, .mux = &mux_fcgi_ops };
+
+INITCALL1(STG_REGISTER, register_mux_proto, &mux_proto_fcgi);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/mux_h1.c b/src/mux_h1.c
new file mode 100644
index 0000000..9bb76c7
--- /dev/null
+++ b/src/mux_h1.c
@@ -0,0 +1,4316 @@
+/*
+ * HTTP/1 mux-demux for connections
+ *
+ * Copyright 2018 Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+#include <import/ebistree.h>
+#include <import/ebmbtree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/h1.h>
+#include <haproxy/h1_htx.h>
+#include <haproxy/h2.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/log.h>
+#include <haproxy/pipe-t.h>
+#include <haproxy/proxy.h>
+#include <haproxy/session-t.h>
+#include <haproxy/stats.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/trace.h>
+
+/*
+ * H1 Connection flags (32 bits)
+ */
+#define H1C_F_NONE 0x00000000
+
+/* Flags indicating why writing output data are blocked */
+#define H1C_F_OUT_ALLOC 0x00000001 /* mux is blocked on lack of output buffer */
+#define H1C_F_OUT_FULL 0x00000002 /* mux is blocked on output buffer full */
+/* 0x00000004 - 0x00000008 unused */
+
+/* Flags indicating why reading input data are blocked. */
+#define H1C_F_IN_ALLOC 0x00000010 /* mux is blocked on lack of input buffer */
+#define H1C_F_IN_FULL 0x00000020 /* mux is blocked on input buffer full */
+#define H1C_F_IN_SALLOC 0x00000040 /* mux is blocked on lack of stream's request buffer */
+
+/* Flags indicating the connection state */
+#define H1C_F_ST_EMBRYONIC 0x00000100 /* Set when a H1 stream with no stream connector is attached to the connection */
+#define H1C_F_ST_ATTACHED 0x00000200 /* Set when a H1 stream with a stream connector is attached to the connection (may be not READY) */
+#define H1C_F_ST_IDLE 0x00000400 /* connection is idle and may be reused
+ * (exclusive to all H1C_F_ST flags and never set when an h1s is attached) */
+#define H1C_F_ST_ERROR 0x00000800 /* connection must be closed ASAP because an error occurred (stream connector may still be attached) */
+#define H1C_F_ST_SHUTDOWN 0x00001000 /* connection must be shut down ASAP flushing output first (stream connector may still be attached) */
+#define H1C_F_ST_READY 0x00002000 /* Set in ATTACHED state with a READY stream connector. A stream connector is not ready when
+ * a TCP>H1 upgrade is in progress Thus this flag is only set if ATTACHED is also set */
+#define H1C_F_ST_ALIVE (H1C_F_ST_IDLE|H1C_F_ST_EMBRYONIC|H1C_F_ST_ATTACHED)
+#define H1C_F_ST_SILENT_SHUT 0x00004000 /* silent (or dirty) shutdown must be performed (implied ST_SHUTDOWN) */
+/* 0x00008000 unused */
+
+#define H1C_F_WANT_SPLICE 0x00010000 /* Don't read into a buffer because we want to use or we are using splicing */
+#define H1C_F_ERR_PENDING 0x00020000 /* Send an error and close the connection ASAP (implies H1C_F_ST_ERROR) */
+#define H1C_F_WAIT_NEXT_REQ 0x00040000 /* waiting for the next request to start, use keep-alive timeout */
+#define H1C_F_UPG_H2C 0x00080000 /* set if an upgrade to h2 should be done */
+#define H1C_F_CO_MSG_MORE 0x00100000 /* set if CO_SFL_MSG_MORE must be set when calling xprt->snd_buf() */
+#define H1C_F_CO_STREAMER 0x00200000 /* set if CO_SFL_STREAMER must be set when calling xprt->snd_buf() */
+
+/* 0x00400000 - 0x40000000 unusued*/
+#define H1C_F_IS_BACK 0x80000000 /* Set on outgoing connection */
+
+/*
+ * H1 Stream flags (32 bits)
+ */
+#define H1S_F_NONE 0x00000000
+
+#define H1S_F_RX_BLK 0x00100000 /* Don't process more input data, waiting sync with output side */
+#define H1S_F_TX_BLK 0x00200000 /* Don't process more output data, waiting sync with input side */
+#define H1S_F_RX_CONGESTED 0x00000004 /* Cannot process input data RX path is congested (waiting for more space in channel's buffer) */
+
+#define H1S_F_REOS 0x00000008 /* End of input stream seen even if not delivered yet */
+#define H1S_F_WANT_KAL 0x00000010
+#define H1S_F_WANT_TUN 0x00000020
+#define H1S_F_WANT_CLO 0x00000040
+#define H1S_F_WANT_MSK 0x00000070
+#define H1S_F_NOT_FIRST 0x00000080 /* The H1 stream is not the first one */
+#define H1S_F_BODYLESS_RESP 0x00000100 /* Bodyless response message */
+
+/* 0x00000200 unused */
+#define H1S_F_NOT_IMPL_ERROR 0x00000400 /* Set when a feature is not implemented during the message parsing */
+#define H1S_F_PARSING_ERROR 0x00000800 /* Set when an error occurred during the message parsing */
+#define H1S_F_PROCESSING_ERROR 0x00001000 /* Set when an error occurred during the message xfer */
+#define H1S_F_ERROR 0x00001800 /* stream error mask */
+
+#define H1S_F_HAVE_SRV_NAME 0x00002000 /* Set during output process if the server name header was added to the request */
+#define H1S_F_HAVE_O_CONN 0x00004000 /* Set during output process to know connection mode was processed */
+
+/* H1 connection descriptor */
+struct h1c {
+ struct connection *conn;
+ struct proxy *px;
+ uint32_t flags; /* Connection flags: H1C_F_* */
+ unsigned int errcode; /* Status code when an error occurred at the H1 connection level */
+ struct buffer ibuf; /* Input buffer to store data before parsing */
+ struct buffer obuf; /* Output buffer to store data after reformatting */
+
+ struct buffer_wait buf_wait; /* Wait list for buffer allocation */
+ struct wait_event wait_event; /* To be used if we're waiting for I/Os */
+
+ struct h1s *h1s; /* H1 stream descriptor */
+ struct task *task; /* timeout management task */
+ struct h1_counters *px_counters; /* h1 counters attached to proxy */
+ int idle_exp; /* idle expiration date (http-keep-alive or http-request timeout) */
+ int timeout; /* client/server timeout duration */
+ int shut_timeout; /* client-fin/server-fin timeout duration */
+};
+
+/* H1 stream descriptor */
+struct h1s {
+ struct h1c *h1c;
+ struct sedesc *sd;
+ uint32_t flags; /* Connection flags: H1S_F_* */
+
+ struct wait_event *subs; /* Address of the wait_event the stream connector associated is waiting on */
+
+ struct session *sess; /* Associated session */
+ struct buffer rxbuf; /* receive buffer, always valid (buf_empty or real buffer) */
+ struct h1m req;
+ struct h1m res;
+
+ enum http_meth_t meth; /* HTTP request method */
+ uint16_t status; /* HTTP response status */
+
+ char ws_key[25]; /* websocket handshake key */
+};
+
+/* Map of headers used to convert outgoing headers */
+struct h1_hdrs_map {
+ char *name;
+ struct eb_root map;
+};
+
+/* An entry in a headers map */
+struct h1_hdr_entry {
+ struct ist name;
+ struct ebpt_node node;
+};
+
+/* Declare the headers map */
+static struct h1_hdrs_map hdrs_map = { .name = NULL, .map = EB_ROOT };
+static int accept_payload_with_any_method = 0;
+
+/* trace source and events */
+static void h1_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+/* The event representation is split like this :
+ * h1c - internal H1 connection
+ * h1s - internal H1 stream
+ * strm - application layer
+ * rx - data receipt
+ * tx - data transmission
+ *
+ */
+static const struct trace_event h1_trace_events[] = {
+#define H1_EV_H1C_NEW (1ULL << 0)
+ { .mask = H1_EV_H1C_NEW, .name = "h1c_new", .desc = "new H1 connection" },
+#define H1_EV_H1C_RECV (1ULL << 1)
+ { .mask = H1_EV_H1C_RECV, .name = "h1c_recv", .desc = "Rx on H1 connection" },
+#define H1_EV_H1C_SEND (1ULL << 2)
+ { .mask = H1_EV_H1C_SEND, .name = "h1c_send", .desc = "Tx on H1 connection" },
+#define H1_EV_H1C_BLK (1ULL << 3)
+ { .mask = H1_EV_H1C_BLK, .name = "h1c_blk", .desc = "H1 connection blocked" },
+#define H1_EV_H1C_WAKE (1ULL << 4)
+ { .mask = H1_EV_H1C_WAKE, .name = "h1c_wake", .desc = "H1 connection woken up" },
+#define H1_EV_H1C_END (1ULL << 5)
+ { .mask = H1_EV_H1C_END, .name = "h1c_end", .desc = "H1 connection terminated" },
+#define H1_EV_H1C_ERR (1ULL << 6)
+ { .mask = H1_EV_H1C_ERR, .name = "h1c_err", .desc = "error on H1 connection" },
+
+#define H1_EV_RX_DATA (1ULL << 7)
+ { .mask = H1_EV_RX_DATA, .name = "rx_data", .desc = "receipt of any H1 data" },
+#define H1_EV_RX_EOI (1ULL << 8)
+ { .mask = H1_EV_RX_EOI, .name = "rx_eoi", .desc = "receipt of end of H1 input" },
+#define H1_EV_RX_HDRS (1ULL << 9)
+ { .mask = H1_EV_RX_HDRS, .name = "rx_headers", .desc = "receipt of H1 headers" },
+#define H1_EV_RX_BODY (1ULL << 10)
+ { .mask = H1_EV_RX_BODY, .name = "rx_body", .desc = "receipt of H1 body" },
+#define H1_EV_RX_TLRS (1ULL << 11)
+ { .mask = H1_EV_RX_TLRS, .name = "rx_trailerus", .desc = "receipt of H1 trailers" },
+
+#define H1_EV_TX_DATA (1ULL << 12)
+ { .mask = H1_EV_TX_DATA, .name = "tx_data", .desc = "transmission of any H1 data" },
+#define H1_EV_TX_EOI (1ULL << 13)
+ { .mask = H1_EV_TX_EOI, .name = "tx_eoi", .desc = "transmission of end of H1 input" },
+#define H1_EV_TX_HDRS (1ULL << 14)
+ { .mask = H1_EV_TX_HDRS, .name = "tx_headers", .desc = "transmission of all headers" },
+#define H1_EV_TX_BODY (1ULL << 15)
+ { .mask = H1_EV_TX_BODY, .name = "tx_body", .desc = "transmission of H1 body" },
+#define H1_EV_TX_TLRS (1ULL << 16)
+ { .mask = H1_EV_TX_TLRS, .name = "tx_trailerus", .desc = "transmission of H1 trailers" },
+
+#define H1_EV_H1S_NEW (1ULL << 17)
+ { .mask = H1_EV_H1S_NEW, .name = "h1s_new", .desc = "new H1 stream" },
+#define H1_EV_H1S_BLK (1ULL << 18)
+ { .mask = H1_EV_H1S_BLK, .name = "h1s_blk", .desc = "H1 stream blocked" },
+#define H1_EV_H1S_END (1ULL << 19)
+ { .mask = H1_EV_H1S_END, .name = "h1s_end", .desc = "H1 stream terminated" },
+#define H1_EV_H1S_ERR (1ULL << 20)
+ { .mask = H1_EV_H1S_ERR, .name = "h1s_err", .desc = "error on H1 stream" },
+
+#define H1_EV_STRM_NEW (1ULL << 21)
+ { .mask = H1_EV_STRM_NEW, .name = "strm_new", .desc = "app-layer stream creation" },
+#define H1_EV_STRM_RECV (1ULL << 22)
+ { .mask = H1_EV_STRM_RECV, .name = "strm_recv", .desc = "receiving data for stream" },
+#define H1_EV_STRM_SEND (1ULL << 23)
+ { .mask = H1_EV_STRM_SEND, .name = "strm_send", .desc = "sending data for stream" },
+#define H1_EV_STRM_WAKE (1ULL << 24)
+ { .mask = H1_EV_STRM_WAKE, .name = "strm_wake", .desc = "stream woken up" },
+#define H1_EV_STRM_SHUT (1ULL << 25)
+ { .mask = H1_EV_STRM_SHUT, .name = "strm_shut", .desc = "stream shutdown" },
+#define H1_EV_STRM_END (1ULL << 26)
+ { .mask = H1_EV_STRM_END, .name = "strm_end", .desc = "detaching app-layer stream" },
+#define H1_EV_STRM_ERR (1ULL << 27)
+ { .mask = H1_EV_STRM_ERR, .name = "strm_err", .desc = "stream error" },
+
+ { }
+};
+
+static const struct name_desc h1_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the connection */ },
+ /* arg2 */ { .name="h1s", .desc="H1 stream" },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc h1_trace_decoding[] = {
+#define H1_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define H1_VERB_MINIMAL 2
+ { .name="minimal", .desc="report only h1c/h1s state and flags, no real decoding" },
+#define H1_VERB_SIMPLE 3
+ { .name="simple", .desc="add request/response status line or htx info when available" },
+#define H1_VERB_ADVANCED 4
+ { .name="advanced", .desc="add header fields or frame decoding when available" },
+#define H1_VERB_COMPLETE 5
+ { .name="complete", .desc="add full data dump when available" },
+ { /* end */ }
+};
+
+static struct trace_source trace_h1 __read_mostly = {
+ .name = IST("h1"),
+ .desc = "HTTP/1 multiplexer",
+ .arg_def = TRC_ARG1_CONN, // TRACE()'s first argument is always a connection
+ .default_cb = h1_trace,
+ .known_events = h1_trace_events,
+ .lockon_args = h1_trace_lockon_args,
+ .decoding = h1_trace_decoding,
+ .report_events = ~0, // report everything by default
+};
+
+#define TRACE_SOURCE &trace_h1
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+
+/* h1 stats module */
+enum {
+ H1_ST_OPEN_CONN,
+ H1_ST_OPEN_STREAM,
+ H1_ST_TOTAL_CONN,
+ H1_ST_TOTAL_STREAM,
+
+ H1_ST_BYTES_IN,
+ H1_ST_BYTES_OUT,
+#if defined(USE_LINUX_SPLICE)
+ H1_ST_SPLICED_BYTES_IN,
+ H1_ST_SPLICED_BYTES_OUT,
+#endif
+ H1_STATS_COUNT /* must be the last member of the enum */
+};
+
+
+static struct name_desc h1_stats[] = {
+ [H1_ST_OPEN_CONN] = { .name = "h1_open_connections",
+ .desc = "Count of currently open connections" },
+ [H1_ST_OPEN_STREAM] = { .name = "h1_open_streams",
+ .desc = "Count of currently open streams" },
+ [H1_ST_TOTAL_CONN] = { .name = "h1_total_connections",
+ .desc = "Total number of connections" },
+ [H1_ST_TOTAL_STREAM] = { .name = "h1_total_streams",
+ .desc = "Total number of streams" },
+
+ [H1_ST_BYTES_IN] = { .name = "h1_bytes_in",
+ .desc = "Total number of bytes received" },
+ [H1_ST_BYTES_OUT] = { .name = "h1_bytes_out",
+ .desc = "Total number of bytes send" },
+#if defined(USE_LINUX_SPLICE)
+ [H1_ST_SPLICED_BYTES_IN] = { .name = "h1_spliced_bytes_in",
+ .desc = "Total number of bytes received using kernel splicing" },
+ [H1_ST_SPLICED_BYTES_OUT] = { .name = "h1_spliced_bytes_out",
+ .desc = "Total number of bytes sendusing kernel splicing" },
+#endif
+
+};
+
+static struct h1_counters {
+ long long open_conns; /* count of currently open connections */
+ long long open_streams; /* count of currently open streams */
+ long long total_conns; /* total number of connections */
+ long long total_streams; /* total number of streams */
+
+ long long bytes_in; /* number of bytes received */
+ long long bytes_out; /* number of bytes sent */
+#if defined(USE_LINUX_SPLICE)
+ long long spliced_bytes_in; /* number of bytes received using kernel splicing */
+ long long spliced_bytes_out; /* number of bytes sent using kernel splicing */
+#endif
+} h1_counters;
+
+static void h1_fill_stats(void *data, struct field *stats)
+{
+ struct h1_counters *counters = data;
+
+ stats[H1_ST_OPEN_CONN] = mkf_u64(FN_GAUGE, counters->open_conns);
+ stats[H1_ST_OPEN_STREAM] = mkf_u64(FN_GAUGE, counters->open_streams);
+ stats[H1_ST_TOTAL_CONN] = mkf_u64(FN_COUNTER, counters->total_conns);
+ stats[H1_ST_TOTAL_STREAM] = mkf_u64(FN_COUNTER, counters->total_streams);
+
+ stats[H1_ST_BYTES_IN] = mkf_u64(FN_COUNTER, counters->bytes_in);
+ stats[H1_ST_BYTES_OUT] = mkf_u64(FN_COUNTER, counters->bytes_out);
+#if defined(USE_LINUX_SPLICE)
+ stats[H1_ST_SPLICED_BYTES_IN] = mkf_u64(FN_COUNTER, counters->spliced_bytes_in);
+ stats[H1_ST_SPLICED_BYTES_OUT] = mkf_u64(FN_COUNTER, counters->spliced_bytes_out);
+#endif
+}
+
+static struct stats_module h1_stats_module = {
+ .name = "h1",
+ .fill_stats = h1_fill_stats,
+ .stats = h1_stats,
+ .stats_count = H1_STATS_COUNT,
+ .counters = &h1_counters,
+ .counters_size = sizeof(h1_counters),
+ .domain_flags = MK_STATS_PROXY_DOMAIN(STATS_PX_CAP_FE|STATS_PX_CAP_BE),
+ .clearable = 1,
+};
+
+INITCALL1(STG_REGISTER, stats_register_module, &h1_stats_module);
+
+
+/* the h1c and h1s pools */
+DECLARE_STATIC_POOL(pool_head_h1c, "h1c", sizeof(struct h1c));
+DECLARE_STATIC_POOL(pool_head_h1s, "h1s", sizeof(struct h1s));
+
+static int h1_recv(struct h1c *h1c);
+static int h1_send(struct h1c *h1c);
+static int h1_process(struct h1c *h1c);
+/* h1_io_cb is exported to see it resolved in "show fd" */
+struct task *h1_io_cb(struct task *t, void *ctx, unsigned int state);
+struct task *h1_timeout_task(struct task *t, void *context, unsigned int state);
+static void h1_shutw_conn(struct connection *conn);
+static void h1_wake_stream_for_recv(struct h1s *h1s);
+static void h1_wake_stream_for_send(struct h1s *h1s);
+static void h1s_destroy(struct h1s *h1s);
+
+/* returns the stconn associated to the H1 stream */
+static forceinline struct stconn *h1s_sc(const struct h1s *h1s)
+{
+ return h1s->sd->sc;
+}
+
+/* the H1 traces always expect that arg1, if non-null, is of type connection
+ * (from which we can derive h1c), that arg2, if non-null, is of type h1s, and
+ * that arg3, if non-null, is a htx for rx/tx headers.
+ */
+static void h1_trace(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct connection *conn = a1;
+ const struct h1c *h1c = conn ? conn->ctx : NULL;
+ const struct h1s *h1s = a2;
+ const struct htx *htx = a3;
+ const size_t *val = a4;
+
+ if (!h1c)
+ h1c = (h1s ? h1s->h1c : NULL);
+
+ if (!h1c || src->verbosity < H1_VERB_CLEAN)
+ return;
+
+ /* Display frontend/backend info by default */
+ chunk_appendf(&trace_buf, " : [%c]", ((h1c->flags & H1C_F_IS_BACK) ? 'B' : 'F'));
+
+ /* Display request and response states if h1s is defined */
+ if (h1s) {
+ chunk_appendf(&trace_buf, " [%s, %s]",
+ h1m_state_str(h1s->req.state), h1m_state_str(h1s->res.state));
+
+ if (src->verbosity > H1_VERB_SIMPLE) {
+ chunk_appendf(&trace_buf, " - req=(.fl=0x%08x .curr_len=%lu .body_len=%lu)",
+ h1s->req.flags, (unsigned long)h1s->req.curr_len, (unsigned long)h1s->req.body_len);
+ chunk_appendf(&trace_buf, " res=(.fl=0x%08x .curr_len=%lu .body_len=%lu)",
+ h1s->res.flags, (unsigned long)h1s->res.curr_len, (unsigned long)h1s->res.body_len);
+ }
+
+ }
+
+ if (src->verbosity == H1_VERB_CLEAN)
+ return;
+
+ /* Display the value to the 4th argument (level > STATE) */
+ if (src->level > TRACE_LEVEL_STATE && val)
+ chunk_appendf(&trace_buf, " - VAL=%lu", (long)*val);
+
+ /* Display status-line if possible (verbosity > MINIMAL) */
+ if (src->verbosity > H1_VERB_MINIMAL && htx && htx_nbblks(htx)) {
+ const struct htx_blk *blk = htx_get_head_blk(htx);
+ const struct htx_sl *sl = htx_get_blk_ptr(htx, blk);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_REQ_SL || type == HTX_BLK_RES_SL)
+ chunk_appendf(&trace_buf, " - \"%.*s %.*s %.*s\"",
+ HTX_SL_P1_LEN(sl), HTX_SL_P1_PTR(sl),
+ HTX_SL_P2_LEN(sl), HTX_SL_P2_PTR(sl),
+ HTX_SL_P3_LEN(sl), HTX_SL_P3_PTR(sl));
+ }
+
+ /* Display h1c info and, if defined, h1s info (pointer + flags) */
+ chunk_appendf(&trace_buf, " - h1c=%p(0x%08x)", h1c, h1c->flags);
+ if (h1c->conn)
+ chunk_appendf(&trace_buf, " conn=%p(0x%08x)", h1c->conn, h1c->conn->flags);
+ if (h1s) {
+ chunk_appendf(&trace_buf, " h1s=%p(0x%08x)", h1s, h1s->flags);
+ if (h1s->sd)
+ chunk_appendf(&trace_buf, " sd=%p(0x%08x)", h1s->sd, se_fl_get(h1s->sd));
+ if (h1s->sd && h1s_sc(h1s))
+ chunk_appendf(&trace_buf, " sc=%p(0x%08x)", h1s_sc(h1s), h1s_sc(h1s)->flags);
+ }
+
+ if (src->verbosity == H1_VERB_MINIMAL)
+ return;
+
+ /* Display input and output buffer info (level > USER & verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_USER) {
+ if (src->verbosity == H1_VERB_COMPLETE ||
+ (src->verbosity == H1_VERB_ADVANCED && (mask & (H1_EV_H1C_RECV|H1_EV_STRM_RECV))))
+ chunk_appendf(&trace_buf, " ibuf=%u@%p+%u/%u",
+ (unsigned int)b_data(&h1c->ibuf), b_orig(&h1c->ibuf),
+ (unsigned int)b_head_ofs(&h1c->ibuf), (unsigned int)b_size(&h1c->ibuf));
+ if (src->verbosity == H1_VERB_COMPLETE ||
+ (src->verbosity == H1_VERB_ADVANCED && (mask & (H1_EV_H1C_SEND|H1_EV_STRM_SEND))))
+ chunk_appendf(&trace_buf, " obuf=%u@%p+%u/%u",
+ (unsigned int)b_data(&h1c->obuf), b_orig(&h1c->obuf),
+ (unsigned int)b_head_ofs(&h1c->obuf), (unsigned int)b_size(&h1c->obuf));
+ }
+
+ /* Display htx info if defined (level > USER) */
+ if (src->level > TRACE_LEVEL_USER && htx) {
+ int full = 0;
+
+ /* Full htx info (level > STATE && verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_STATE) {
+ if (src->verbosity == H1_VERB_COMPLETE)
+ full = 1;
+ else if (src->verbosity == H1_VERB_ADVANCED && (mask & (H1_EV_RX_HDRS|H1_EV_TX_HDRS)))
+ full = 1;
+ }
+
+ chunk_memcat(&trace_buf, "\n\t", 2);
+ htx_dump(&trace_buf, htx, full);
+ }
+}
+
+
+/*****************************************************/
+/* functions below are for dynamic buffer management */
+/*****************************************************/
+/*
+ * Indicates whether or not we may receive data. The rules are the following :
+ * - if an error or a shutdown for reads was detected on the connection we
+ * must not attempt to receive
+ * - if we are waiting for the connection establishment, we must not attempt
+ * to receive
+ * - if an error was detected on the stream we must not attempt to receive
+ * - if reads are explicitly disabled, we must not attempt to receive
+ * - if the input buffer failed to be allocated or is full , we must not try
+ * to receive
+ * - if the mux is not blocked on an input condition, we may attempt to receive
+ * - otherwise must may not attempt to receive
+ */
+static inline int h1_recv_allowed(const struct h1c *h1c)
+{
+ if (h1c->flags & H1C_F_ST_ERROR) {
+ TRACE_DEVEL("recv not allowed because of error on h1c", H1_EV_H1C_RECV|H1_EV_H1C_BLK, h1c->conn);
+ return 0;
+ }
+
+ if (h1c->conn->flags & (CO_FL_ERROR|CO_FL_SOCK_RD_SH|CO_FL_WAIT_L4_CONN|CO_FL_WAIT_L6_CONN)) {
+ TRACE_DEVEL("recv not allowed because of (error|read0|waitl4|waitl6) on connection", H1_EV_H1C_RECV|H1_EV_H1C_BLK, h1c->conn);
+ return 0;
+ }
+
+ if (h1c->h1s && (h1c->h1s->flags & H1S_F_ERROR)) {
+ TRACE_DEVEL("recv not allowed because of error on h1s", H1_EV_H1C_RECV|H1_EV_H1C_BLK, h1c->conn);
+ return 0;
+ }
+
+ if (!(h1c->flags & (H1C_F_IN_ALLOC|H1C_F_IN_FULL|H1C_F_IN_SALLOC)))
+ return 1;
+
+ TRACE_DEVEL("recv not allowed because input is blocked", H1_EV_H1C_RECV|H1_EV_H1C_BLK, h1c->conn);
+ return 0;
+}
+
+/*
+ * Tries to grab a buffer and to re-enables processing on mux <target>. The h1
+ * flags are used to figure what buffer was requested. It returns 1 if the
+ * allocation succeeds, in which case the connection is woken up, or 0 if it's
+ * impossible to wake up and we prefer to be woken up later.
+ */
+static int h1_buf_available(void *target)
+{
+ struct h1c *h1c = target;
+
+ if ((h1c->flags & H1C_F_IN_ALLOC) && b_alloc(&h1c->ibuf)) {
+ TRACE_STATE("unblocking h1c, ibuf allocated", H1_EV_H1C_RECV|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn);
+ h1c->flags &= ~H1C_F_IN_ALLOC;
+ if (h1_recv_allowed(h1c))
+ tasklet_wakeup(h1c->wait_event.tasklet);
+ return 1;
+ }
+
+ if ((h1c->flags & H1C_F_OUT_ALLOC) && b_alloc(&h1c->obuf)) {
+ TRACE_STATE("unblocking h1s, obuf allocated", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1c->h1s);
+ h1c->flags &= ~H1C_F_OUT_ALLOC;
+ if (h1c->h1s)
+ h1_wake_stream_for_send(h1c->h1s);
+ return 1;
+ }
+
+ if ((h1c->flags & H1C_F_IN_SALLOC) && h1c->h1s && b_alloc(&h1c->h1s->rxbuf)) {
+ TRACE_STATE("unblocking h1c, stream rxbuf allocated", H1_EV_H1C_RECV|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn);
+ h1c->flags &= ~H1C_F_IN_SALLOC;
+ tasklet_wakeup(h1c->wait_event.tasklet);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Allocate a buffer. If if fails, it adds the mux in buffer wait queue.
+ */
+static inline struct buffer *h1_get_buf(struct h1c *h1c, struct buffer *bptr)
+{
+ struct buffer *buf = NULL;
+
+ if (likely(!LIST_INLIST(&h1c->buf_wait.list)) &&
+ unlikely((buf = b_alloc(bptr)) == NULL)) {
+ h1c->buf_wait.target = h1c;
+ h1c->buf_wait.wakeup_cb = h1_buf_available;
+ LIST_APPEND(&th_ctx->buffer_wq, &h1c->buf_wait.list);
+ }
+ return buf;
+}
+
+/*
+ * Release a buffer, if any, and try to wake up entities waiting in the buffer
+ * wait queue.
+ */
+static inline void h1_release_buf(struct h1c *h1c, struct buffer *bptr)
+{
+ if (bptr->size) {
+ b_free(bptr);
+ offer_buffers(h1c->buf_wait.target, 1);
+ }
+}
+
+/* returns the number of streams in use on a connection to figure if it's idle
+ * or not. We rely on H1C_F_ST_IDLE to know if the connection is in-use or
+ * not. This flag is only set when no H1S is attached and when the previous
+ * stream, if any, was fully terminated without any error and in K/A mode.
+ */
+static int h1_used_streams(struct connection *conn)
+{
+ struct h1c *h1c = conn->ctx;
+
+ return ((h1c->flags & H1C_F_ST_IDLE) ? 0 : 1);
+}
+
+/* returns the number of streams still available on a connection */
+static int h1_avail_streams(struct connection *conn)
+{
+ return 1 - h1_used_streams(conn);
+}
+
+/* Refresh the h1c task timeout if necessary */
+static void h1_refresh_timeout(struct h1c *h1c)
+{
+ int is_idle_conn = 0;
+
+ if (h1c->task) {
+ if (!(h1c->flags & H1C_F_ST_ALIVE) || (h1c->flags & H1C_F_ST_SHUTDOWN)) {
+ /* half-closed or dead connections : switch to clientfin/serverfin
+ * timeouts so that we don't hang too long on clients that have
+ * gone away (especially in tunnel mode).
+ */
+ h1c->task->expire = tick_add(now_ms, h1c->shut_timeout);
+ TRACE_DEVEL("refreshing connection's timeout (dead or half-closed)", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn);
+ is_idle_conn = 1;
+ }
+ else if (b_data(&h1c->obuf)) {
+ /* connection with pending outgoing data, need a timeout (server or client). */
+ h1c->task->expire = tick_add(now_ms, h1c->timeout);
+ TRACE_DEVEL("refreshing connection's timeout (pending outgoing data)", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn);
+ }
+ else if (!(h1c->flags & (H1C_F_IS_BACK|H1C_F_ST_READY))) {
+ /* front connections waiting for a fully usable stream need a timeout. */
+ h1c->task->expire = tick_add(now_ms, h1c->timeout);
+ TRACE_DEVEL("refreshing connection's timeout (alive front h1c but not ready)", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn);
+ /* A frontend connection not yet ready could be treated the same way as an idle
+ * one in case of soft-close.
+ */
+ is_idle_conn = 1;
+ }
+ else {
+ /* alive back connections of front connections with a stream connector attached */
+ h1c->task->expire = TICK_ETERNITY;
+ TRACE_DEVEL("no connection timeout (alive back h1c or front h1c with an SC)", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn);
+ }
+
+ /* Finally set the idle expiration date if shorter */
+ h1c->task->expire = tick_first(h1c->task->expire, h1c->idle_exp);
+
+ if ((h1c->px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) &&
+ is_idle_conn && tick_isset(global.close_spread_end)) {
+ /* If a soft-stop is in progress and a close-spread-time
+ * is set, we want to spread idle connection closing roughly
+ * evenly across the defined window. This should only
+ * act on idle frontend connections.
+ * If the window end is already in the past, we wake the
+ * timeout task up immediately so that it can be closed.
+ */
+ int remaining_window = tick_remain(now_ms, global.close_spread_end);
+ if (remaining_window) {
+ /* We don't need to reset the expire if it would
+ * already happen before the close window end.
+ */
+ if (tick_is_le(global.close_spread_end, h1c->task->expire)) {
+ /* Set an expire value shorter than the current value
+ * because the close spread window end comes earlier.
+ */
+ h1c->task->expire = tick_add(now_ms, statistical_prng_range(remaining_window));
+ TRACE_DEVEL("connection timeout set to value before close-spread window end", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn);
+ }
+ }
+ else {
+ /* We are past the soft close window end, wake the timeout
+ * task up immediately.
+ */
+ task_wakeup(h1c->task, TASK_WOKEN_TIMER);
+ }
+ }
+ TRACE_DEVEL("new expiration date", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn, 0, 0, (size_t[]){h1c->task->expire});
+ task_queue(h1c->task);
+ }
+}
+
+static void h1_set_idle_expiration(struct h1c *h1c)
+{
+ if (h1c->flags & H1C_F_IS_BACK || !h1c->task) {
+ TRACE_DEVEL("no idle expiration (backend connection || no task)", H1_EV_H1C_RECV, h1c->conn);
+ h1c->idle_exp = TICK_ETERNITY;
+ return;
+ }
+
+ if (h1c->flags & H1C_F_ST_IDLE) {
+ if (!tick_isset(h1c->idle_exp)) {
+ if ((h1c->flags & H1C_F_WAIT_NEXT_REQ) && /* Not the first request */
+ !b_data(&h1c->ibuf) && /* No input data */
+ tick_isset(h1c->px->timeout.httpka)) { /* K-A timeout set */
+ h1c->idle_exp = tick_add_ifset(now_ms, h1c->px->timeout.httpka);
+ TRACE_DEVEL("set idle expiration (keep-alive timeout)", H1_EV_H1C_RECV, h1c->conn);
+ }
+ else {
+ h1c->idle_exp = tick_add_ifset(now_ms, h1c->px->timeout.httpreq);
+ TRACE_DEVEL("set idle expiration (http-request timeout)", H1_EV_H1C_RECV, h1c->conn);
+ }
+ }
+ }
+ else if ((h1c->flags & H1C_F_ST_ALIVE) && !(h1c->flags & H1C_F_ST_READY)) {
+ if (!tick_isset(h1c->idle_exp)) {
+ h1c->idle_exp = tick_add_ifset(now_ms, h1c->px->timeout.httpreq);
+ TRACE_DEVEL("set idle expiration (http-request timeout)", H1_EV_H1C_RECV, h1c->conn);
+ }
+ }
+ else { // ST_ATTACHED or SHUTDOWN
+ h1c->idle_exp = TICK_ETERNITY;
+ TRACE_DEVEL("unset idle expiration (attached || shutdown)", H1_EV_H1C_RECV, h1c->conn);
+ }
+}
+/*****************************************************************/
+/* functions below are dedicated to the mux setup and management */
+/*****************************************************************/
+
+/* returns non-zero if there are input data pending for stream h1s. */
+static inline size_t h1s_data_pending(const struct h1s *h1s)
+{
+ const struct h1m *h1m;
+
+ h1m = ((h1s->h1c->flags & H1C_F_IS_BACK) ? &h1s->res : &h1s->req);
+ return ((h1m->state == H1_MSG_DONE) ? 0 : b_data(&h1s->h1c->ibuf));
+}
+
+/* Creates a new stream connector and the associate stream. <input> is used as input
+ * buffer for the stream. On success, it is transferred to the stream and the
+ * mux is no longer responsible of it. On error, <input> is unchanged, thus the
+ * mux must still take care of it. However, there is nothing special to do
+ * because, on success, <input> is updated to points on BUF_NULL. Thus, calling
+ * b_free() on it is always safe. This function returns the stream connector on
+ * success or NULL on error. */
+static struct stconn *h1s_new_sc(struct h1s *h1s, struct buffer *input)
+{
+ struct h1c *h1c = h1s->h1c;
+
+ TRACE_ENTER(H1_EV_STRM_NEW, h1c->conn, h1s);
+
+ if (h1s->flags & H1S_F_NOT_FIRST)
+ se_fl_set(h1s->sd, SE_FL_NOT_FIRST);
+ if (h1s->req.flags & H1_MF_UPG_WEBSOCKET)
+ se_fl_set(h1s->sd, SE_FL_WEBSOCKET);
+
+ if (!sc_new_from_endp(h1s->sd, h1c->conn->owner, input)) {
+ TRACE_ERROR("SC allocation failure", H1_EV_STRM_NEW|H1_EV_STRM_END|H1_EV_STRM_ERR, h1c->conn, h1s);
+ goto err;
+ }
+
+ h1c->flags = (h1c->flags & ~H1C_F_ST_EMBRYONIC) | H1C_F_ST_ATTACHED | H1C_F_ST_READY;
+ TRACE_LEAVE(H1_EV_STRM_NEW, h1c->conn, h1s);
+ return h1s_sc(h1s);
+
+ err:
+ TRACE_DEVEL("leaving on error", H1_EV_STRM_NEW|H1_EV_STRM_ERR, h1c->conn, h1s);
+ return NULL;
+}
+
+static struct stconn *h1s_upgrade_sc(struct h1s *h1s, struct buffer *input)
+{
+ TRACE_ENTER(H1_EV_STRM_NEW, h1s->h1c->conn, h1s);
+
+ if (stream_upgrade_from_sc(h1s_sc(h1s), input) < 0) {
+ TRACE_ERROR("stream upgrade failure", H1_EV_STRM_NEW|H1_EV_STRM_END|H1_EV_STRM_ERR, h1s->h1c->conn, h1s);
+ goto err;
+ }
+
+ h1s->h1c->flags |= H1C_F_ST_READY;
+ TRACE_LEAVE(H1_EV_STRM_NEW, h1s->h1c->conn, h1s);
+ return h1s_sc(h1s);
+
+ err:
+ TRACE_DEVEL("leaving on error", H1_EV_STRM_NEW|H1_EV_STRM_ERR, h1s->h1c->conn, h1s);
+ return NULL;
+}
+
+static struct h1s *h1s_new(struct h1c *h1c)
+{
+ struct h1s *h1s;
+
+ TRACE_ENTER(H1_EV_H1S_NEW, h1c->conn);
+
+ h1s = pool_alloc(pool_head_h1s);
+ if (!h1s) {
+ TRACE_ERROR("H1S allocation failure", H1_EV_H1S_NEW|H1_EV_H1S_END|H1_EV_H1S_ERR, h1c->conn);
+ goto fail;
+ }
+ h1s->h1c = h1c;
+ h1c->h1s = h1s;
+ h1s->sess = NULL;
+ h1s->sd = NULL;
+ h1s->flags = H1S_F_WANT_KAL;
+ h1s->subs = NULL;
+ h1s->rxbuf = BUF_NULL;
+ memset(h1s->ws_key, 0, sizeof(h1s->ws_key));
+
+ h1m_init_req(&h1s->req);
+ h1s->req.flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
+
+ h1m_init_res(&h1s->res);
+ h1s->res.flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
+
+ h1s->status = 0;
+ h1s->meth = HTTP_METH_OTHER;
+
+ if (h1c->flags & H1C_F_WAIT_NEXT_REQ)
+ h1s->flags |= H1S_F_NOT_FIRST;
+ h1c->flags = (h1c->flags & ~(H1C_F_ST_IDLE|H1C_F_WAIT_NEXT_REQ)) | H1C_F_ST_EMBRYONIC;
+
+ TRACE_LEAVE(H1_EV_H1S_NEW, h1c->conn, h1s);
+ return h1s;
+
+ fail:
+ TRACE_DEVEL("leaving on error", H1_EV_STRM_NEW|H1_EV_STRM_ERR, h1c->conn);
+ return NULL;
+}
+
+static struct h1s *h1c_frt_stream_new(struct h1c *h1c, struct stconn *sc, struct session *sess)
+{
+ struct h1s *h1s;
+
+ TRACE_ENTER(H1_EV_H1S_NEW, h1c->conn);
+
+ h1s = h1s_new(h1c);
+ if (!h1s)
+ goto fail;
+
+ if (sc) {
+ if (sc_attach_mux(sc, h1s, h1c->conn) < 0)
+ goto fail;
+ h1s->sd = sc->sedesc;
+ }
+ else {
+ h1s->sd = sedesc_new();
+ if (!h1s->sd)
+ goto fail;
+ h1s->sd->se = h1s;
+ h1s->sd->conn = h1c->conn;
+ se_fl_set(h1s->sd, SE_FL_T_MUX | SE_FL_ORPHAN);
+ }
+
+ h1s->sess = sess;
+
+ if (h1c->px->options2 & PR_O2_REQBUG_OK)
+ h1s->req.err_pos = -1;
+
+ HA_ATOMIC_INC(&h1c->px_counters->open_streams);
+ HA_ATOMIC_INC(&h1c->px_counters->total_streams);
+
+ h1c->idle_exp = TICK_ETERNITY;
+ h1_set_idle_expiration(h1c);
+ TRACE_LEAVE(H1_EV_H1S_NEW, h1c->conn, h1s);
+ return h1s;
+
+ fail:
+ TRACE_DEVEL("leaving on error", H1_EV_STRM_NEW|H1_EV_STRM_ERR, h1c->conn);
+ h1s_destroy(h1s);
+ return NULL;
+}
+
+static struct h1s *h1c_bck_stream_new(struct h1c *h1c, struct stconn *sc, struct session *sess)
+{
+ struct h1s *h1s;
+
+ TRACE_ENTER(H1_EV_H1S_NEW, h1c->conn);
+
+ h1s = h1s_new(h1c);
+ if (!h1s)
+ goto fail;
+
+ if (sc_attach_mux(sc, h1s, h1c->conn) < 0)
+ goto fail;
+
+ h1s->flags |= H1S_F_RX_BLK;
+ h1s->sd = sc->sedesc;
+ h1s->sess = sess;
+
+ h1c->flags = (h1c->flags & ~H1C_F_ST_EMBRYONIC) | H1C_F_ST_ATTACHED | H1C_F_ST_READY;
+
+ if (h1c->px->options2 & PR_O2_RSPBUG_OK)
+ h1s->res.err_pos = -1;
+
+ HA_ATOMIC_INC(&h1c->px_counters->open_streams);
+ HA_ATOMIC_INC(&h1c->px_counters->total_streams);
+
+ TRACE_LEAVE(H1_EV_H1S_NEW, h1c->conn, h1s);
+ return h1s;
+
+ fail:
+ TRACE_DEVEL("leaving on error", H1_EV_STRM_NEW|H1_EV_STRM_ERR, h1c->conn);
+ h1s_destroy(h1s);
+ return NULL;
+}
+
+static void h1s_destroy(struct h1s *h1s)
+{
+ if (h1s) {
+ struct h1c *h1c = h1s->h1c;
+
+ TRACE_POINT(H1_EV_H1S_END, h1c->conn, h1s);
+ h1c->h1s = NULL;
+
+ if (h1s->subs)
+ h1s->subs->events = 0;
+
+ h1_release_buf(h1c, &h1s->rxbuf);
+
+ h1c->flags &= ~(H1C_F_WANT_SPLICE|
+ H1C_F_ST_EMBRYONIC|H1C_F_ST_ATTACHED|H1C_F_ST_READY|
+ H1C_F_OUT_FULL|H1C_F_OUT_ALLOC|H1C_F_IN_SALLOC|
+ H1C_F_CO_MSG_MORE|H1C_F_CO_STREAMER);
+ if (h1s->flags & H1S_F_ERROR) {
+ h1c->flags |= H1C_F_ST_ERROR;
+ TRACE_ERROR("h1s on error, set error on h1c", H1_EV_H1S_END|H1_EV_H1C_ERR, h1c->conn, h1s);
+ }
+
+ if (!(h1c->flags & (H1C_F_ST_ERROR|H1C_F_ST_SHUTDOWN)) && /* No error/shutdown on h1c */
+ !(h1c->conn->flags & (CO_FL_ERROR|CO_FL_SOCK_RD_SH|CO_FL_SOCK_WR_SH)) && /* No error/shutdown on conn */
+ (h1s->flags & H1S_F_WANT_KAL) && /* K/A possible */
+ h1s->req.state == H1_MSG_DONE && h1s->res.state == H1_MSG_DONE) { /* req/res in DONE state */
+ h1c->flags |= (H1C_F_ST_IDLE|H1C_F_WAIT_NEXT_REQ);
+ TRACE_STATE("set idle mode on h1c, waiting for the next request", H1_EV_H1C_ERR, h1c->conn, h1s);
+ }
+ else {
+ TRACE_STATE("set shudown on h1c", H1_EV_H1S_END, h1c->conn, h1s);
+ h1c->flags |= H1C_F_ST_SHUTDOWN;
+ }
+
+ HA_ATOMIC_DEC(&h1c->px_counters->open_streams);
+ BUG_ON(h1s->sd && !se_fl_test(h1s->sd, SE_FL_ORPHAN));
+ sedesc_free(h1s->sd);
+ pool_free(pool_head_h1s, h1s);
+ }
+}
+
+/*
+ * Initialize the mux once it's attached. It is expected that conn->ctx points
+ * to the existing stream connector (for outgoing connections or for incoming
+ * ones during a mux upgrade) or NULL (for incoming ones during the connection
+ * establishment). <input> is always used as Input buffer and may contain
+ * data. It is the caller responsibility to not reuse it anymore. Returns < 0 on
+ * error.
+ */
+static int h1_init(struct connection *conn, struct proxy *proxy, struct session *sess,
+ struct buffer *input)
+{
+ struct h1c *h1c;
+ struct task *t = NULL;
+ void *conn_ctx = conn->ctx;
+
+ TRACE_ENTER(H1_EV_H1C_NEW);
+
+ h1c = pool_alloc(pool_head_h1c);
+ if (!h1c) {
+ TRACE_ERROR("H1C allocation failure", H1_EV_H1C_NEW|H1_EV_H1C_END|H1_EV_H1C_ERR);
+ goto fail_h1c;
+ }
+ h1c->conn = conn;
+ h1c->px = proxy;
+
+ h1c->flags = H1C_F_ST_IDLE;
+ h1c->errcode = 0;
+ h1c->ibuf = *input;
+ h1c->obuf = BUF_NULL;
+ h1c->h1s = NULL;
+ h1c->task = NULL;
+
+ LIST_INIT(&h1c->buf_wait.list);
+ h1c->wait_event.tasklet = tasklet_new();
+ if (!h1c->wait_event.tasklet)
+ goto fail;
+ h1c->wait_event.tasklet->process = h1_io_cb;
+ h1c->wait_event.tasklet->context = h1c;
+ h1c->wait_event.events = 0;
+ h1c->idle_exp = TICK_ETERNITY;
+
+ if (conn_is_back(conn)) {
+ h1c->flags |= H1C_F_IS_BACK;
+ h1c->shut_timeout = h1c->timeout = proxy->timeout.server;
+ if (tick_isset(proxy->timeout.serverfin))
+ h1c->shut_timeout = proxy->timeout.serverfin;
+
+ h1c->px_counters = EXTRA_COUNTERS_GET(proxy->extra_counters_be,
+ &h1_stats_module);
+ } else {
+ h1c->shut_timeout = h1c->timeout = proxy->timeout.client;
+ if (tick_isset(proxy->timeout.clientfin))
+ h1c->shut_timeout = proxy->timeout.clientfin;
+
+ h1c->px_counters = EXTRA_COUNTERS_GET(proxy->extra_counters_fe,
+ &h1_stats_module);
+
+ LIST_APPEND(&mux_stopping_data[tid].list,
+ &h1c->conn->stopping_list);
+ }
+ if (tick_isset(h1c->timeout)) {
+ t = task_new_here();
+ if (!t) {
+ TRACE_ERROR("H1C task allocation failure", H1_EV_H1C_NEW|H1_EV_H1C_END|H1_EV_H1C_ERR);
+ goto fail;
+ }
+
+ h1c->task = t;
+ t->process = h1_timeout_task;
+ t->context = h1c;
+
+ t->expire = tick_add(now_ms, h1c->timeout);
+ }
+
+ conn->ctx = h1c;
+
+ if (h1c->flags & H1C_F_IS_BACK) {
+ /* Create a new H1S now for backend connection only */
+ if (!h1c_bck_stream_new(h1c, conn_ctx, sess))
+ goto fail;
+ }
+ else if (conn_ctx) {
+ /* Upgraded frontend connection (from TCP) */
+ if (!h1c_frt_stream_new(h1c, conn_ctx, h1c->conn->owner))
+ goto fail;
+
+ /* Attach the SC but Not ready yet */
+ h1c->flags = (h1c->flags & ~H1C_F_ST_EMBRYONIC) | H1C_F_ST_ATTACHED;
+ TRACE_DEVEL("Inherit the SC from TCP connection to perform an upgrade",
+ H1_EV_H1C_NEW|H1_EV_STRM_NEW, h1c->conn, h1c->h1s);
+ }
+
+ if (t) {
+ h1_set_idle_expiration(h1c);
+ t->expire = tick_first(t->expire, h1c->idle_exp);
+ task_queue(t);
+ }
+
+ /* prepare to read something */
+ if (b_data(&h1c->ibuf))
+ tasklet_wakeup(h1c->wait_event.tasklet);
+ else if (h1_recv_allowed(h1c))
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+
+ HA_ATOMIC_INC(&h1c->px_counters->open_conns);
+ HA_ATOMIC_INC(&h1c->px_counters->total_conns);
+
+ /* mux->wake will be called soon to complete the operation */
+ TRACE_LEAVE(H1_EV_H1C_NEW, conn, h1c->h1s);
+ return 0;
+
+ fail:
+ task_destroy(t);
+ if (h1c->wait_event.tasklet)
+ tasklet_free(h1c->wait_event.tasklet);
+ pool_free(pool_head_h1c, h1c);
+ fail_h1c:
+ if (!conn_is_back(conn))
+ LIST_DEL_INIT(&conn->stopping_list);
+ conn->ctx = conn_ctx; // restore saved context
+ TRACE_DEVEL("leaving in error", H1_EV_H1C_NEW|H1_EV_H1C_END|H1_EV_H1C_ERR);
+ return -1;
+}
+
+/* release function. This one should be called to free all resources allocated
+ * to the mux.
+ */
+static void h1_release(struct h1c *h1c)
+{
+ struct connection *conn = NULL;
+
+ TRACE_POINT(H1_EV_H1C_END);
+
+ /* The connection must be aattached to this mux to be released */
+ if (h1c->conn && h1c->conn->ctx == h1c)
+ conn = h1c->conn;
+
+ if (conn && h1c->flags & H1C_F_UPG_H2C) {
+ TRACE_DEVEL("upgrading H1 to H2", H1_EV_H1C_END, conn);
+ /* Make sure we're no longer subscribed to anything */
+ if (h1c->wait_event.events)
+ conn->xprt->unsubscribe(conn, conn->xprt_ctx,
+ h1c->wait_event.events, &h1c->wait_event);
+ if (conn_upgrade_mux_fe(conn, NULL, &h1c->ibuf, ist("h2"), PROTO_MODE_HTTP) != -1) {
+ /* connection successfully upgraded to H2, this
+ * mux was already released */
+ return;
+ }
+ TRACE_ERROR("h2 upgrade failed", H1_EV_H1C_END|H1_EV_H1C_ERR, conn);
+ sess_log(conn->owner); /* Log if the upgrade failed */
+ }
+
+
+ if (LIST_INLIST(&h1c->buf_wait.list))
+ LIST_DEL_INIT(&h1c->buf_wait.list);
+
+ h1_release_buf(h1c, &h1c->ibuf);
+ h1_release_buf(h1c, &h1c->obuf);
+
+ if (h1c->task) {
+ h1c->task->context = NULL;
+ task_wakeup(h1c->task, TASK_WOKEN_OTHER);
+ h1c->task = NULL;
+ }
+
+ if (h1c->wait_event.tasklet) {
+ tasklet_free(h1c->wait_event.tasklet);
+ h1c->wait_event.tasklet = NULL;
+ }
+
+ h1s_destroy(h1c->h1s);
+ if (conn) {
+ if (h1c->wait_event.events != 0)
+ conn->xprt->unsubscribe(conn, conn->xprt_ctx, h1c->wait_event.events,
+ &h1c->wait_event);
+ h1_shutw_conn(conn);
+ }
+
+ HA_ATOMIC_DEC(&h1c->px_counters->open_conns);
+ pool_free(pool_head_h1c, h1c);
+
+ if (conn) {
+ if (!conn_is_back(conn))
+ LIST_DEL_INIT(&conn->stopping_list);
+
+ conn->mux = NULL;
+ conn->ctx = NULL;
+ TRACE_DEVEL("freeing conn", H1_EV_H1C_END, conn);
+
+ conn_stop_tracking(conn);
+ conn_full_close(conn);
+ if (conn->destroy_cb)
+ conn->destroy_cb(conn);
+ conn_free(conn);
+ }
+}
+
+/******************************************************/
+/* functions below are for the H1 protocol processing */
+/******************************************************/
+/* Parse the request version and set H1_MF_VER_11 on <h1m> if the version is
+ * greater or equal to 1.1
+ */
+static void h1_parse_req_vsn(struct h1m *h1m, const struct htx_sl *sl)
+{
+ const char *p = HTX_SL_REQ_VPTR(sl);
+
+ if ((HTX_SL_REQ_VLEN(sl) == 8) &&
+ (*(p + 5) > '1' ||
+ (*(p + 5) == '1' && *(p + 7) >= '1')))
+ h1m->flags |= H1_MF_VER_11;
+}
+
+/* Parse the response version and set H1_MF_VER_11 on <h1m> if the version is
+ * greater or equal to 1.1
+ */
+static void h1_parse_res_vsn(struct h1m *h1m, const struct htx_sl *sl)
+{
+ const char *p = HTX_SL_RES_VPTR(sl);
+
+ if ((HTX_SL_RES_VLEN(sl) == 8) &&
+ (*(p + 5) > '1' ||
+ (*(p + 5) == '1' && *(p + 7) >= '1')))
+ h1m->flags |= H1_MF_VER_11;
+}
+
+/* Deduce the connection mode of the client connection, depending on the
+ * configuration and the H1 message flags. This function is called twice, the
+ * first time when the request is parsed and the second time when the response
+ * is parsed.
+ */
+static void h1_set_cli_conn_mode(struct h1s *h1s, struct h1m *h1m)
+{
+ struct proxy *fe = h1s->h1c->px;
+
+ if (h1m->flags & H1_MF_RESP) {
+ /* Output direction: second pass */
+ if ((h1s->meth == HTTP_METH_CONNECT && h1s->status >= 200 && h1s->status < 300) ||
+ h1s->status == 101) {
+ /* Either we've established an explicit tunnel, or we're
+ * switching the protocol. In both cases, we're very unlikely to
+ * understand the next protocols. We have to switch to tunnel
+ * mode, so that we transfer the request and responses then let
+ * this protocol pass unmodified. When we later implement
+ * specific parsers for such protocols, we'll want to check the
+ * Upgrade header which contains information about that protocol
+ * for responses with status 101 (eg: see RFC2817 about TLS).
+ */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_TUN;
+ TRACE_STATE("set tunnel mode (resp)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ else if (h1s->flags & H1S_F_WANT_KAL) {
+ /* By default the client is in KAL mode. CLOSE mode mean
+ * it is imposed by the client itself. So only change
+ * KAL mode here. */
+ if (!(h1m->flags & H1_MF_XFER_LEN) || (h1m->flags & H1_MF_CONN_CLO)) {
+ /* no length known or explicit close => close */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("detect close mode (resp)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ else if (!(h1m->flags & H1_MF_CONN_KAL) &&
+ (fe->options & PR_O_HTTP_MODE) == PR_O_HTTP_CLO) {
+ /* no explicit keep-alive and option httpclose => close */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("force close mode (resp)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ }
+ }
+ else {
+ /* Input direction: first pass */
+ if (!(h1m->flags & (H1_MF_VER_11|H1_MF_CONN_KAL)) || h1m->flags & H1_MF_CONN_CLO) {
+ /* no explicit keep-alive in HTTP/1.0 or explicit close => close*/
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("detect close mode (req)", H1_EV_RX_DATA|H1_EV_RX_HDRS, h1s->h1c->conn, h1s);
+ }
+ }
+
+ /* If KAL, check if the frontend is stopping. If yes, switch in CLO mode
+ * unless a 'close-spread-time' option is set (either to define a
+ * soft-close window or to disable active closing (close-spread-time
+ * option set to 0).
+ */
+ if (h1s->flags & H1S_F_WANT_KAL && (fe->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ int want_clo = 1;
+ /* If a close-spread-time option is set, we want to avoid
+ * closing all the active HTTP connections at once so we add a
+ * random factor that will spread the closing.
+ */
+ if (tick_isset(global.close_spread_end)) {
+ int remaining_window = tick_remain(now_ms, global.close_spread_end);
+ if (remaining_window) {
+ /* This should increase the closing rate the further along
+ * the window we are.
+ */
+ want_clo = (remaining_window <= statistical_prng_range(global.close_spread_time));
+ }
+ }
+ else if (global.tune.options & GTUNE_DISABLE_ACTIVE_CLOSE)
+ want_clo = 0;
+
+ if (want_clo) {
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("stopping, set close mode", H1_EV_RX_DATA|H1_EV_RX_HDRS|H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ }
+}
+
+/* Deduce the connection mode of the client connection, depending on the
+ * configuration and the H1 message flags. This function is called twice, the
+ * first time when the request is parsed and the second time when the response
+ * is parsed.
+ */
+static void h1_set_srv_conn_mode(struct h1s *h1s, struct h1m *h1m)
+{
+ struct session *sess = h1s->sess;
+ struct proxy *be = h1s->h1c->px;
+ int fe_flags = sess ? sess->fe->options : 0;
+
+ if (h1m->flags & H1_MF_RESP) {
+ /* Input direction: second pass */
+ if ((h1s->meth == HTTP_METH_CONNECT && h1s->status >= 200 && h1s->status < 300) ||
+ h1s->status == 101) {
+ /* Either we've established an explicit tunnel, or we're
+ * switching the protocol. In both cases, we're very unlikely to
+ * understand the next protocols. We have to switch to tunnel
+ * mode, so that we transfer the request and responses then let
+ * this protocol pass unmodified. When we later implement
+ * specific parsers for such protocols, we'll want to check the
+ * Upgrade header which contains information about that protocol
+ * for responses with status 101 (eg: see RFC2817 about TLS).
+ */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_TUN;
+ TRACE_STATE("set tunnel mode (resp)", H1_EV_RX_DATA|H1_EV_RX_HDRS, h1s->h1c->conn, h1s);
+ }
+ else if (h1s->flags & H1S_F_WANT_KAL) {
+ /* By default the server is in KAL mode. CLOSE mode mean
+ * it is imposed by haproxy itself. So only change KAL
+ * mode here. */
+ if (!(h1m->flags & H1_MF_XFER_LEN) || h1m->flags & H1_MF_CONN_CLO ||
+ !(h1m->flags & (H1_MF_VER_11|H1_MF_CONN_KAL))){
+ /* no length known or explicit close or no explicit keep-alive in HTTP/1.0 => close */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("detect close mode (resp)", H1_EV_RX_DATA|H1_EV_RX_HDRS, h1s->h1c->conn, h1s);
+ }
+ }
+ }
+ else {
+ /* Output direction: first pass */
+ if (h1m->flags & H1_MF_CONN_CLO) {
+ /* explicit close => close */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("detect close mode (req)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ else if (!(h1m->flags & H1_MF_CONN_KAL) &&
+ ((fe_flags & PR_O_HTTP_MODE) == PR_O_HTTP_SCL ||
+ (be->options & PR_O_HTTP_MODE) == PR_O_HTTP_SCL ||
+ (fe_flags & PR_O_HTTP_MODE) == PR_O_HTTP_CLO ||
+ (be->options & PR_O_HTTP_MODE) == PR_O_HTTP_CLO)) {
+ /* no explicit keep-alive option httpclose/server-close => close */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("force close mode (req)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ }
+
+ /* If KAL, check if the backend is stopping. If yes, switch in CLO mode */
+ if (h1s->flags & H1S_F_WANT_KAL && (be->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("stopping, set close mode", H1_EV_RX_DATA|H1_EV_RX_HDRS|H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+}
+
+static void h1_update_req_conn_value(struct h1s *h1s, struct h1m *h1m, struct ist *conn_val)
+{
+ struct proxy *px = h1s->h1c->px;
+
+ /* Don't update "Connection:" header in TUNNEL mode or if "Upgrage"
+ * token is found
+ */
+ if (h1s->flags & H1S_F_WANT_TUN || h1m->flags & H1_MF_CONN_UPG)
+ return;
+
+ if (h1s->flags & H1S_F_WANT_KAL || px->options2 & PR_O2_FAKE_KA) {
+ if (!(h1m->flags & H1_MF_VER_11)) {
+ TRACE_STATE("add \"Connection: keep-alive\"", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ *conn_val = ist("keep-alive");
+ }
+ }
+ else { /* H1S_F_WANT_CLO && !PR_O2_FAKE_KA */
+ if (h1m->flags & H1_MF_VER_11) {
+ TRACE_STATE("add \"Connection: close\"", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ *conn_val = ist("close");
+ }
+ }
+}
+
+static void h1_update_res_conn_value(struct h1s *h1s, struct h1m *h1m, struct ist *conn_val)
+{
+ /* Don't update "Connection:" header in TUNNEL mode or if "Upgrage"
+ * token is found
+ */
+ if (h1s->flags & H1S_F_WANT_TUN || h1m->flags & H1_MF_CONN_UPG)
+ return;
+
+ if (h1s->flags & H1S_F_WANT_KAL) {
+ if (!(h1m->flags & H1_MF_VER_11) ||
+ !((h1m->flags & h1s->req.flags) & H1_MF_VER_11)) {
+ TRACE_STATE("add \"Connection: keep-alive\"", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ *conn_val = ist("keep-alive");
+ }
+ }
+ else { /* H1S_F_WANT_CLO */
+ if (h1m->flags & H1_MF_VER_11) {
+ TRACE_STATE("add \"Connection: close\"", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ *conn_val = ist("close");
+ }
+ }
+}
+
+static void h1_process_input_conn_mode(struct h1s *h1s, struct h1m *h1m, struct htx *htx)
+{
+ if (!(h1s->h1c->flags & H1C_F_IS_BACK))
+ h1_set_cli_conn_mode(h1s, h1m);
+ else
+ h1_set_srv_conn_mode(h1s, h1m);
+}
+
+static void h1_process_output_conn_mode(struct h1s *h1s, struct h1m *h1m, struct ist *conn_val)
+{
+ if (!(h1s->h1c->flags & H1C_F_IS_BACK))
+ h1_set_cli_conn_mode(h1s, h1m);
+ else
+ h1_set_srv_conn_mode(h1s, h1m);
+
+ if (!(h1m->flags & H1_MF_RESP))
+ h1_update_req_conn_value(h1s, h1m, conn_val);
+ else
+ h1_update_res_conn_value(h1s, h1m, conn_val);
+}
+
+/* Try to adjust the case of the message header name using the global map
+ * <hdrs_map>.
+ */
+static void h1_adjust_case_outgoing_hdr(struct h1s *h1s, struct h1m *h1m, struct ist *name)
+{
+ struct ebpt_node *node;
+ struct h1_hdr_entry *entry;
+
+ /* No entry in the map, do nothing */
+ if (eb_is_empty(&hdrs_map.map))
+ return;
+
+ /* No conversion for the request headers */
+ if (!(h1m->flags & H1_MF_RESP) && !(h1s->h1c->px->options2 & PR_O2_H1_ADJ_BUGSRV))
+ return;
+
+ /* No conversion for the response headers */
+ if ((h1m->flags & H1_MF_RESP) && !(h1s->h1c->px->options2 & PR_O2_H1_ADJ_BUGCLI))
+ return;
+
+ node = ebis_lookup_len(&hdrs_map.map, name->ptr, name->len);
+ if (!node)
+ return;
+ entry = container_of(node, struct h1_hdr_entry, node);
+ name->ptr = entry->name.ptr;
+ name->len = entry->name.len;
+}
+
+/* Append the description of what is present in error snapshot <es> into <out>.
+ * The description must be small enough to always fit in a buffer. The output
+ * buffer may be the trash so the trash must not be used inside this function.
+ */
+static void h1_show_error_snapshot(struct buffer *out, const struct error_snapshot *es)
+{
+ chunk_appendf(out,
+ " H1 connection flags 0x%08x, H1 stream flags 0x%08x\n"
+ " H1 msg state %s(%d), H1 msg flags 0x%08x\n"
+ " H1 chunk len %lld bytes, H1 body len %lld bytes :\n",
+ es->ctx.h1.c_flags, es->ctx.h1.s_flags,
+ h1m_state_str(es->ctx.h1.state), es->ctx.h1.state,
+ es->ctx.h1.m_flags, es->ctx.h1.m_clen, es->ctx.h1.m_blen);
+}
+/*
+ * Capture a bad request or response and archive it in the proxy's structure.
+ * By default it tries to report the error position as h1m->err_pos. However if
+ * this one is not set, it will then report h1m->next, which is the last known
+ * parsing point. The function is able to deal with wrapping buffers. It always
+ * displays buffers as a contiguous area starting at buf->p. The direction is
+ * determined thanks to the h1m's flags.
+ */
+static void h1_capture_bad_message(struct h1c *h1c, struct h1s *h1s,
+ struct h1m *h1m, struct buffer *buf)
+{
+ struct session *sess = h1s->sess;
+ struct proxy *proxy = h1c->px;
+ struct proxy *other_end;
+ union error_snapshot_ctx ctx;
+
+ if ((h1c->flags & H1C_F_ST_ATTACHED) && sc_strm(h1s_sc(h1s))) {
+ if (sess == NULL)
+ sess = __sc_strm(h1s_sc(h1s))->sess;
+ if (!(h1m->flags & H1_MF_RESP))
+ other_end = __sc_strm(h1s_sc(h1s))->be;
+ else
+ other_end = sess->fe;
+ } else
+ other_end = NULL;
+
+ /* http-specific part now */
+ ctx.h1.state = h1m->state;
+ ctx.h1.c_flags = h1c->flags;
+ ctx.h1.s_flags = h1s->flags;
+ ctx.h1.m_flags = h1m->flags;
+ ctx.h1.m_clen = h1m->curr_len;
+ ctx.h1.m_blen = h1m->body_len;
+
+ proxy_capture_error(proxy, !!(h1m->flags & H1_MF_RESP), other_end,
+ h1c->conn->target, sess, buf, 0, 0,
+ (h1m->err_pos >= 0) ? h1m->err_pos : h1m->next,
+ &ctx, h1_show_error_snapshot);
+}
+
+/* Emit the chunksize followed by a CRLF in front of data of the buffer
+ * <buf>. It goes backwards and starts with the byte before the buffer's
+ * head. The caller is responsible for ensuring there is enough room left before
+ * the buffer's head for the string.
+ */
+static void h1_emit_chunk_size(struct buffer *buf, size_t chksz)
+{
+ char *beg, *end;
+
+ beg = end = b_head(buf);
+ *--beg = '\n';
+ *--beg = '\r';
+ do {
+ *--beg = hextab[chksz & 0xF];
+ } while (chksz >>= 4);
+ buf->head -= (end - beg);
+ b_add(buf, end - beg);
+}
+
+/* Emit a CRLF after the data of the buffer <buf>. The caller is responsible for
+ * ensuring there is enough room left in the buffer for the string. */
+static void h1_emit_chunk_crlf(struct buffer *buf)
+{
+ *(b_peek(buf, b_data(buf))) = '\r';
+ *(b_peek(buf, b_data(buf) + 1)) = '\n';
+ b_add(buf, 2);
+}
+
+/*
+ * Switch the stream to tunnel mode. This function must only be called on 2xx
+ * (successful) replies to CONNECT requests or on 101 (switching protocol).
+ */
+static void h1_set_tunnel_mode(struct h1s *h1s)
+{
+ struct h1c *h1c = h1s->h1c;
+
+ h1s->req.state = H1_MSG_TUNNEL;
+ h1s->req.flags &= ~(H1_MF_XFER_LEN|H1_MF_CLEN|H1_MF_CHNK);
+
+ h1s->res.state = H1_MSG_TUNNEL;
+ h1s->res.flags &= ~(H1_MF_XFER_LEN|H1_MF_CLEN|H1_MF_CHNK);
+
+ TRACE_STATE("switch H1 stream in tunnel mode", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s);
+
+ if (h1s->flags & H1S_F_RX_BLK) {
+ h1s->flags &= ~H1S_F_RX_BLK;
+ h1_wake_stream_for_recv(h1s);
+ TRACE_STATE("Re-enable input processing", H1_EV_RX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1s);
+ }
+ if (h1s->flags & H1S_F_TX_BLK) {
+ h1s->flags &= ~H1S_F_TX_BLK;
+ h1_wake_stream_for_send(h1s);
+ TRACE_STATE("Re-enable output processing", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1s);
+ }
+}
+
+/* Search for a websocket key header. The message should have been identified
+ * as a valid websocket handshake.
+ *
+ * On the request side, if found the key is stored in the session. It might be
+ * needed to calculate response key if the server side is using http/2.
+ *
+ * On the response side, the key might be verified if haproxy has been
+ * responsible for the generation of a key. This happens when a h2 client is
+ * interfaced with a h1 server.
+ *
+ * Returns 0 if no key found or invalid key
+ */
+static int h1_search_websocket_key(struct h1s *h1s, struct h1m *h1m, struct htx *htx)
+{
+ struct htx_blk *blk;
+ enum htx_blk_type type;
+ struct ist n, v;
+ int ws_key_found = 0, idx;
+
+ idx = htx_get_head(htx); // returns the SL that we skip
+ while ((idx = htx_get_next(htx, idx)) != -1) {
+ blk = htx_get_blk(htx, idx);
+ type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_UNUSED)
+ continue;
+
+ if (type != HTX_BLK_HDR)
+ break;
+
+ n = htx_get_blk_name(htx, blk);
+ v = htx_get_blk_value(htx, blk);
+
+ /* Websocket key is base64 encoded of 16 bytes */
+ if (isteqi(n, ist("sec-websocket-key")) && v.len == 24 &&
+ !(h1m->flags & H1_MF_RESP)) {
+ /* Copy the key on request side
+ * we might need it if the server is using h2 and does
+ * not provide the response
+ */
+ memcpy(h1s->ws_key, v.ptr, 24);
+ ws_key_found = 1;
+ break;
+ }
+ else if (isteqi(n, ist("sec-websocket-accept")) &&
+ h1m->flags & H1_MF_RESP) {
+ /* Need to verify the response key if the input was
+ * generated by haproxy
+ */
+ if (h1s->ws_key[0]) {
+ char key[29];
+ h1_calculate_ws_output_key(h1s->ws_key, key);
+ if (!isteqi(ist(key), v))
+ break;
+ }
+ ws_key_found = 1;
+ break;
+ }
+ }
+
+ /* missing websocket key, reject the message */
+ if (!ws_key_found) {
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Parse HTTP/1 headers. It returns the number of bytes parsed if > 0, or 0 if
+ * it couldn't proceed. Parsing errors are reported by setting H1S_F_*_ERROR
+ * flag. If more room is requested, H1S_F_RX_CONGESTED flag is set. If relies on
+ * the function http_parse_msg_hdrs() to do the parsing.
+ */
+static size_t h1_handle_headers(struct h1s *h1s, struct h1m *h1m, struct htx *htx,
+ struct buffer *buf, size_t *ofs, size_t max)
+{
+ union h1_sl h1sl;
+ int ret = 0;
+
+ TRACE_ENTER(H1_EV_RX_DATA|H1_EV_RX_HDRS, h1s->h1c->conn, h1s, 0, (size_t[]){max});
+
+ if (h1s->meth == HTTP_METH_CONNECT)
+ h1m->flags |= H1_MF_METH_CONNECT;
+ if (h1s->meth == HTTP_METH_HEAD)
+ h1m->flags |= H1_MF_METH_HEAD;
+
+ ret = h1_parse_msg_hdrs(h1m, &h1sl, htx, buf, *ofs, max);
+ if (ret <= 0) {
+ TRACE_DEVEL("leaving on missing data or error", H1_EV_RX_DATA|H1_EV_RX_HDRS, h1s->h1c->conn, h1s);
+ if (ret == -1) {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ TRACE_ERROR("parsing error, reject H1 message", H1_EV_RX_DATA|H1_EV_RX_HDRS|H1_EV_H1S_ERR, h1s->h1c->conn, h1s);
+ h1_capture_bad_message(h1s->h1c, h1s, h1m, buf);
+ }
+ else if (ret == -2) {
+ TRACE_STATE("RX path congested, waiting for more space", H1_EV_RX_DATA|H1_EV_RX_HDRS|H1_EV_H1S_BLK, h1s->h1c->conn, h1s);
+ h1s->flags |= H1S_F_RX_CONGESTED;
+ }
+ ret = 0;
+ goto end;
+ }
+
+
+ /* Reject HTTP/1.0 GET/HEAD/DELETE requests with a payload except if
+ * accept_payload_with_any_method global option is set.
+ *There is a payload if the c-l is not null or the the payload is
+ * chunk-encoded. A parsing error is reported but a A
+ * 413-Payload-Too-Large is returned instead of a 400-Bad-Request.
+ */
+ if (!accept_payload_with_any_method &&
+ !(h1m->flags & (H1_MF_RESP|H1_MF_VER_11)) &&
+ (((h1m->flags & H1_MF_CLEN) && h1m->body_len) || (h1m->flags & H1_MF_CHNK)) &&
+ (h1sl.rq.meth == HTTP_METH_GET || h1sl.rq.meth == HTTP_METH_HEAD || h1sl.rq.meth == HTTP_METH_DELETE)) {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ h1s->h1c->errcode = 413;
+ TRACE_ERROR("HTTP/1.0 GET/HEAD/DELETE request with a payload forbidden", H1_EV_RX_DATA|H1_EV_RX_HDRS|H1_EV_H1S_ERR, h1s->h1c->conn, h1s);
+ h1_capture_bad_message(h1s->h1c, h1s, h1m, buf);
+ ret = 0;
+ goto end;
+ }
+
+ /* Reject any message with an unknown transfer-encoding. In fact if any
+ * encoding other than "chunked". A 422-Unprocessable-Content is
+ * returned for an invalid request, a 502-Bad-Gateway for an invalid
+ * response.
+ */
+ if (h1m->flags & H1_MF_TE_OTHER) {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ if (!(h1m->flags & H1_MF_RESP))
+ h1s->h1c->errcode = 422;
+ TRACE_ERROR("Unknown transfer-encoding", H1_EV_RX_DATA|H1_EV_RX_HDRS|H1_EV_H1S_ERR, h1s->h1c->conn, h1s);
+ h1_capture_bad_message(h1s->h1c, h1s, h1m, buf);
+ ret = 0;
+ goto end;
+ }
+
+ /* If websocket handshake, search for the websocket key */
+ if ((h1m->flags & (H1_MF_CONN_UPG|H1_MF_UPG_WEBSOCKET)) ==
+ (H1_MF_CONN_UPG|H1_MF_UPG_WEBSOCKET)) {
+ int ws_ret = h1_search_websocket_key(h1s, h1m, htx);
+ if (!ws_ret) {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ TRACE_ERROR("missing/invalid websocket key, reject H1 message", H1_EV_RX_DATA|H1_EV_RX_HDRS|H1_EV_H1S_ERR, h1s->h1c->conn, h1s);
+ h1_capture_bad_message(h1s->h1c, h1s, h1m, buf);
+
+ ret = 0;
+ goto end;
+ }
+ }
+
+ if (h1m->err_pos >= 0) {
+ /* Maybe we found an error during the parsing while we were
+ * configured not to block on that, so we have to capture it
+ * now.
+ */
+ TRACE_STATE("Ignored parsing error", H1_EV_RX_DATA|H1_EV_RX_HDRS, h1s->h1c->conn, h1s);
+ h1_capture_bad_message(h1s->h1c, h1s, h1m, buf);
+ }
+
+ if (!(h1m->flags & H1_MF_RESP)) {
+ h1s->meth = h1sl.rq.meth;
+ if (h1s->meth == HTTP_METH_HEAD)
+ h1s->flags |= H1S_F_BODYLESS_RESP;
+ }
+ else {
+ h1s->status = h1sl.st.status;
+ if (h1s->status == 204 || h1s->status == 304)
+ h1s->flags |= H1S_F_BODYLESS_RESP;
+ }
+ h1_process_input_conn_mode(h1s, h1m, htx);
+ *ofs += ret;
+
+ end:
+ TRACE_LEAVE(H1_EV_RX_DATA|H1_EV_RX_HDRS, h1s->h1c->conn, h1s, 0, (size_t[]){ret});
+ return ret;
+}
+
+/*
+ * Parse HTTP/1 body. It returns the number of bytes parsed if > 0, or 0 if it
+ * couldn't proceed. Parsing errors are reported by setting H1S_F_*_ERROR flag.
+ * If relies on the function http_parse_msg_data() to do the parsing.
+ */
+static size_t h1_handle_data(struct h1s *h1s, struct h1m *h1m, struct htx **htx,
+ struct buffer *buf, size_t *ofs, size_t max,
+ struct buffer *htxbuf)
+{
+ size_t ret;
+
+ TRACE_ENTER(H1_EV_RX_DATA|H1_EV_RX_BODY, h1s->h1c->conn, h1s, 0, (size_t[]){max});
+ ret = h1_parse_msg_data(h1m, htx, buf, *ofs, max, htxbuf);
+ if (!ret) {
+ TRACE_DEVEL("leaving on missing data or error", H1_EV_RX_DATA|H1_EV_RX_BODY, h1s->h1c->conn, h1s);
+ if ((*htx)->flags & HTX_FL_PARSING_ERROR) {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ TRACE_ERROR("parsing error, reject H1 message", H1_EV_RX_DATA|H1_EV_RX_BODY|H1_EV_H1S_ERR, h1s->h1c->conn, h1s);
+ h1_capture_bad_message(h1s->h1c, h1s, h1m, buf);
+ }
+ goto end;
+ }
+
+ *ofs += ret;
+
+ end:
+ if (b_data(buf) != *ofs && (h1m->state == H1_MSG_DATA || h1m->state == H1_MSG_TUNNEL)) {
+ TRACE_STATE("RX path congested, waiting for more space", H1_EV_RX_DATA|H1_EV_RX_BODY|H1_EV_H1S_BLK, h1s->h1c->conn, h1s);
+ h1s->flags |= H1S_F_RX_CONGESTED;
+ }
+
+ TRACE_LEAVE(H1_EV_RX_DATA|H1_EV_RX_BODY, h1s->h1c->conn, h1s, 0, (size_t[]){ret});
+ return ret;
+}
+
+/*
+ * Parse HTTP/1 trailers. It returns the number of bytes parsed if > 0, or 0 if
+ * it couldn't proceed. Parsing errors are reported by setting H1S_F_*_ERROR
+ * flag and filling h1s->err_pos and h1s->err_state fields. This functions is
+ * responsible to update the parser state <h1m>. If more room is requested,
+ * H1S_F_RX_CONGESTED flag is set.
+ */
+static size_t h1_handle_trailers(struct h1s *h1s, struct h1m *h1m, struct htx *htx,
+ struct buffer *buf, size_t *ofs, size_t max)
+{
+ int ret;
+
+ TRACE_ENTER(H1_EV_RX_DATA|H1_EV_RX_TLRS, h1s->h1c->conn, h1s, 0, (size_t[]){max});
+ ret = h1_parse_msg_tlrs(h1m, htx, buf, *ofs, max);
+ if (ret <= 0) {
+ TRACE_DEVEL("leaving on missing data or error", H1_EV_RX_DATA|H1_EV_RX_BODY, h1s->h1c->conn, h1s);
+ if (ret == -1) {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ TRACE_ERROR("parsing error, reject H1 message", H1_EV_RX_DATA|H1_EV_RX_TLRS|H1_EV_H1S_ERR, h1s->h1c->conn, h1s);
+ h1_capture_bad_message(h1s->h1c, h1s, h1m, buf);
+ }
+ else if (ret == -2) {
+ TRACE_STATE("RX path congested, waiting for more space", H1_EV_RX_DATA|H1_EV_RX_TLRS|H1_EV_H1S_BLK, h1s->h1c->conn, h1s);
+ h1s->flags |= H1S_F_RX_CONGESTED;
+ }
+ ret = 0;
+ goto end;
+ }
+
+ *ofs += ret;
+
+ end:
+ TRACE_LEAVE(H1_EV_RX_DATA|H1_EV_RX_TLRS, h1s->h1c->conn, h1s, 0, (size_t[]){ret});
+ return ret;
+}
+
+/*
+ * Process incoming data. It parses data and transfer them from h1c->ibuf into
+ * <buf>. It returns the number of bytes parsed and transferred if > 0, or 0 if
+ * it couldn't proceed.
+ *
+ * WARNING: H1S_F_RX_CONGESTED flag must be removed before processing input data.
+ */
+static size_t h1_process_demux(struct h1c *h1c, struct buffer *buf, size_t count)
+{
+ struct h1s *h1s = h1c->h1s;
+ struct h1m *h1m;
+ struct htx *htx;
+ size_t data;
+ size_t ret = 0;
+ size_t total = 0;
+
+ htx = htx_from_buf(buf);
+ TRACE_ENTER(H1_EV_RX_DATA, h1c->conn, h1s, htx, (size_t[]){count});
+
+ h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->req : &h1s->res);
+ data = htx->data;
+
+ if (h1s->flags & (H1S_F_PARSING_ERROR|H1S_F_NOT_IMPL_ERROR))
+ goto end;
+
+ if (h1s->flags & H1S_F_RX_BLK)
+ goto out;
+
+ /* Always remove congestion flags and try to process more input data */
+ h1s->flags &= ~H1S_F_RX_CONGESTED;
+
+ do {
+ size_t used = htx_used_space(htx);
+
+ if (h1m->state <= H1_MSG_LAST_LF) {
+ TRACE_PROTO("parsing message headers", H1_EV_RX_DATA|H1_EV_RX_HDRS, h1c->conn, h1s);
+ ret = h1_handle_headers(h1s, h1m, htx, &h1c->ibuf, &total, count);
+ if (!ret)
+ break;
+
+ TRACE_USER((!(h1m->flags & H1_MF_RESP) ? "rcvd H1 request headers" : "rcvd H1 response headers"),
+ H1_EV_RX_DATA|H1_EV_RX_HDRS, h1c->conn, h1s, htx, (size_t[]){ret});
+
+ if ((h1m->flags & H1_MF_RESP) &&
+ h1s->status < 200 && (h1s->status == 100 || h1s->status >= 102)) {
+ h1m_init_res(&h1s->res);
+ h1m->flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
+ TRACE_STATE("1xx response rcvd", H1_EV_RX_DATA|H1_EV_RX_HDRS, h1c->conn, h1s);
+ }
+ }
+ else if (h1m->state < H1_MSG_TRAILERS) {
+ TRACE_PROTO("parsing message payload", H1_EV_RX_DATA|H1_EV_RX_BODY, h1c->conn, h1s);
+ ret = h1_handle_data(h1s, h1m, &htx, &h1c->ibuf, &total, count, buf);
+ if (h1m->state < H1_MSG_TRAILERS)
+ break;
+
+ TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "rcvd H1 request payload data" : "rcvd H1 response payload data"),
+ H1_EV_RX_DATA|H1_EV_RX_BODY, h1c->conn, h1s, htx, (size_t[]){ret});
+ }
+ else if (h1m->state == H1_MSG_TRAILERS) {
+ TRACE_PROTO("parsing message trailers", H1_EV_RX_DATA|H1_EV_RX_TLRS, h1c->conn, h1s);
+ ret = h1_handle_trailers(h1s, h1m, htx, &h1c->ibuf, &total, count);
+ if (h1m->state != H1_MSG_DONE)
+ break;
+
+ TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "rcvd H1 request trailers" : "rcvd H1 response trailers"),
+ H1_EV_RX_DATA|H1_EV_RX_TLRS, h1c->conn, h1s, htx, (size_t[]){ret});
+ }
+ else if (h1m->state == H1_MSG_DONE) {
+ TRACE_USER((!(h1m->flags & H1_MF_RESP) ? "H1 request fully rcvd" : "H1 response fully rcvd"),
+ H1_EV_RX_DATA|H1_EV_RX_EOI, h1c->conn, h1s, htx);
+
+ if ((h1m->flags & H1_MF_RESP) &&
+ ((h1s->meth == HTTP_METH_CONNECT && h1s->status >= 200 && h1s->status < 300) || h1s->status == 101))
+ h1_set_tunnel_mode(h1s);
+ else {
+ if (h1s->req.state < H1_MSG_DONE || h1s->res.state < H1_MSG_DONE) {
+ /* Unfinished transaction: block this input side waiting the end of the output side */
+ h1s->flags |= H1S_F_RX_BLK;
+ TRACE_STATE("Disable input processing", H1_EV_RX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ }
+ if (h1s->flags & H1S_F_TX_BLK) {
+ h1s->flags &= ~H1S_F_TX_BLK;
+ h1_wake_stream_for_send(h1s);
+ TRACE_STATE("Re-enable output processing", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1s);
+ }
+ break;
+ }
+ }
+ else if (h1m->state == H1_MSG_TUNNEL) {
+ TRACE_PROTO("parsing tunneled data", H1_EV_RX_DATA, h1c->conn, h1s);
+ ret = h1_handle_data(h1s, h1m, &htx, &h1c->ibuf, &total, count, buf);
+ if (!ret)
+ break;
+
+ TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "rcvd H1 request tunneled data" : "rcvd H1 response tunneled data"),
+ H1_EV_RX_DATA|H1_EV_RX_EOI, h1c->conn, h1s, htx, (size_t[]){ret});
+ }
+ else {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ break;
+ }
+
+ count -= htx_used_space(htx) - used;
+ } while (!(h1s->flags & (H1S_F_PARSING_ERROR|H1S_F_NOT_IMPL_ERROR|H1S_F_RX_BLK|H1S_F_RX_CONGESTED)));
+
+
+ if (h1s->flags & (H1S_F_PARSING_ERROR|H1S_F_NOT_IMPL_ERROR)) {
+ TRACE_ERROR("parsing or not-implemented error", H1_EV_RX_DATA|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto err;
+ }
+
+ b_del(&h1c->ibuf, total);
+
+ TRACE_DEVEL("incoming data parsed", H1_EV_RX_DATA, h1c->conn, h1s, htx, (size_t[]){ret});
+
+ ret = htx->data - data;
+ if ((h1c->flags & H1C_F_IN_FULL) && buf_room_for_htx_data(&h1c->ibuf)) {
+ h1c->flags &= ~H1C_F_IN_FULL;
+ TRACE_STATE("h1c ibuf not full anymore", H1_EV_RX_DATA|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn, h1s);
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+
+ if (!b_data(&h1c->ibuf))
+ h1_release_buf(h1c, &h1c->ibuf);
+
+ if (!(h1c->flags & H1C_F_ST_READY)) {
+ /* The H1 connection is not ready. Most of time, there is no SC
+ * attached, except for TCP>H1 upgrade, from a TCP frontend. In both
+ * cases, it is only possible on the client side.
+ */
+ BUG_ON(h1c->flags & H1C_F_IS_BACK);
+
+ if (h1m->state <= H1_MSG_LAST_LF) {
+ TRACE_STATE("Incomplete message, subscribing", H1_EV_RX_DATA|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn, h1s);
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ goto end;
+ }
+
+ if (!(h1c->flags & H1C_F_ST_ATTACHED)) {
+ TRACE_DEVEL("request headers fully parsed, create and attach the SC", H1_EV_RX_DATA, h1c->conn, h1s);
+ BUG_ON(h1s_sc(h1s));
+ if (!h1s_new_sc(h1s, buf)) {
+ h1c->flags |= H1C_F_ST_ERROR;
+ goto err;
+ }
+ }
+ else {
+ TRACE_DEVEL("request headers fully parsed, upgrade the inherited SC", H1_EV_RX_DATA, h1c->conn, h1s);
+ BUG_ON(h1s_sc(h1s) == NULL);
+ if (!h1s_upgrade_sc(h1s, buf)) {
+ h1c->flags |= H1C_F_ST_ERROR;
+ TRACE_ERROR("H1S upgrade failure", H1_EV_RX_DATA|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto err;
+ }
+ }
+ }
+
+ /* Here h1s_sc(h1s) is always defined */
+ if (!(h1m->flags & H1_MF_CHNK) && (h1m->state == H1_MSG_DATA || (h1m->state == H1_MSG_TUNNEL))) {
+ TRACE_STATE("notify the mux can use splicing", H1_EV_RX_DATA|H1_EV_RX_BODY, h1c->conn, h1s);
+ se_fl_set(h1s->sd, SE_FL_MAY_SPLICE);
+ }
+ else {
+ TRACE_STATE("notify the mux can't use splicing anymore", H1_EV_RX_DATA|H1_EV_RX_BODY, h1c->conn, h1s);
+ se_fl_clr(h1s->sd, SE_FL_MAY_SPLICE);
+ }
+
+ /* Set EOI on stream connector in DONE state iff:
+ * - it is a response
+ * - it is a request but no a protocol upgrade nor a CONNECT
+ *
+ * If not set, Wait the response to do so or not depending on the status
+ * code.
+ */
+ if (((h1m->state == H1_MSG_DONE) && (h1m->flags & H1_MF_RESP)) ||
+ ((h1m->state == H1_MSG_DONE) && (h1s->meth != HTTP_METH_CONNECT) && !(h1m->flags & H1_MF_CONN_UPG)))
+ se_fl_set(h1s->sd, SE_FL_EOI);
+
+ out:
+ /* When Input data are pending for this message, notify upper layer that
+ * the mux need more space in the HTX buffer to continue if :
+ *
+ * - The parser is blocked in MSG_DATA or MSG_TUNNEL state
+ * - Headers or trailers are pending to be copied.
+ */
+ if (h1s->flags & (H1S_F_RX_CONGESTED)) {
+ se_fl_set(h1s->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ TRACE_STATE("waiting for more room", H1_EV_RX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ }
+ else {
+ se_fl_clr(h1s->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ if (h1s->flags & H1S_F_REOS) {
+ se_fl_set(h1s->sd, SE_FL_EOS);
+ if (h1m->state >= H1_MSG_DONE || !(h1m->flags & H1_MF_XFER_LEN)) {
+ /* DONE or TUNNEL or SHUTR without XFER_LEN, set
+ * EOI on the stream connector */
+ se_fl_set(h1s->sd, SE_FL_EOI);
+ }
+ else if (h1m->state > H1_MSG_LAST_LF && h1m->state < H1_MSG_DONE) {
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("message aborted, set error on SC", H1_EV_RX_DATA|H1_EV_H1S_ERR, h1c->conn, h1s);
+ }
+
+ if (h1s->flags & H1S_F_TX_BLK) {
+ h1s->flags &= ~H1S_F_TX_BLK;
+ h1_wake_stream_for_send(h1s);
+ TRACE_STATE("Re-enable output processing", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1s);
+ }
+ }
+ }
+
+ end:
+ htx_to_buf(htx, buf);
+ TRACE_LEAVE(H1_EV_RX_DATA, h1c->conn, h1s, htx, (size_t[]){ret});
+ return ret;
+
+ err:
+ htx_to_buf(htx, buf);
+ se_fl_set(h1s->sd, SE_FL_EOI);
+ TRACE_DEVEL("leaving on error", H1_EV_RX_DATA|H1_EV_STRM_ERR, h1c->conn, h1s);
+ return 0;
+}
+
+/*
+ * Process outgoing data. It parses data and transfer them from the channel buffer into
+ * h1c->obuf. It returns the number of bytes parsed and transferred if > 0, or
+ * 0 if it couldn't proceed.
+ */
+static size_t h1_process_mux(struct h1c *h1c, struct buffer *buf, size_t count)
+{
+ struct h1s *h1s = h1c->h1s;
+ struct h1m *h1m;
+ struct htx *chn_htx = NULL;
+ struct htx_blk *blk;
+ struct buffer tmp;
+ size_t total = 0;
+ int last_data = 0;
+ int ws_key_found = 0;
+
+ chn_htx = htxbuf(buf);
+ TRACE_ENTER(H1_EV_TX_DATA, h1c->conn, h1s, chn_htx, (size_t[]){count});
+
+ if (htx_is_empty(chn_htx))
+ goto end;
+
+ if (h1s->flags & (H1S_F_PROCESSING_ERROR|H1S_F_TX_BLK))
+ goto end;
+
+ if (!h1_get_buf(h1c, &h1c->obuf)) {
+ h1c->flags |= H1C_F_OUT_ALLOC;
+ TRACE_STATE("waiting for h1c obuf allocation", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ goto end;
+ }
+
+ h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->res : &h1s->req);
+
+ /* the htx is non-empty thus has at least one block */
+ blk = htx_get_head_blk(chn_htx);
+
+ /* Perform some optimizations to reduce the number of buffer copies.
+ * First, if the mux's buffer is empty and the htx area contains
+ * exactly one data block of the same size as the requested count,
+ * then it's possible to simply swap the caller's buffer with the
+ * mux's output buffer and adjust offsets and length to match the
+ * entire DATA HTX block in the middle. In this case we perform a
+ * true zero-copy operation from end-to-end. This is the situation
+ * that happens all the time with large files. Second, if this is not
+ * possible, but the mux's output buffer is empty, we still have an
+ * opportunity to avoid the copy to the intermediary buffer, by making
+ * the intermediary buffer's area point to the output buffer's area.
+ * In this case we want to skip the HTX header to make sure that copies
+ * remain aligned and that this operation remains possible all the
+ * time. This goes for headers, data blocks and any data extracted from
+ * the HTX blocks.
+ */
+ if (!b_data(&h1c->obuf)) {
+ if ((h1m->state == H1_MSG_DATA || h1m->state == H1_MSG_TUNNEL) &&
+ (!(h1m->flags & H1_MF_RESP) || !(h1s->flags & H1S_F_BODYLESS_RESP)) &&
+ htx_nbblks(chn_htx) == 1 &&
+ htx_get_blk_type(blk) == HTX_BLK_DATA &&
+ htx_get_blk_value(chn_htx, blk).len == count) {
+ void *old_area;
+
+ TRACE_PROTO("sending message data (zero-copy)", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, chn_htx, (size_t[]){count});
+ if (h1m->state == H1_MSG_DATA) {
+ if (h1m->flags & H1_MF_CLEN) {
+ if (count > h1m->curr_len) {
+ TRACE_ERROR("too much payload, more than announced",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto error;
+ }
+ h1m->curr_len -= count;
+ if (!h1m->curr_len)
+ last_data = 1;
+ }
+ if (chn_htx->flags & HTX_FL_EOM) {
+ TRACE_DEVEL("last message block", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s);
+ last_data = 1;
+ }
+ }
+
+ old_area = h1c->obuf.area;
+ h1c->obuf.area = buf->area;
+ h1c->obuf.head = sizeof(struct htx) + blk->addr;
+ h1c->obuf.data = count;
+
+ buf->area = old_area;
+ buf->data = buf->head = 0;
+
+ chn_htx = (struct htx *)buf->area;
+ htx_reset(chn_htx);
+
+ /* The message is chunked. We need to emit the chunk
+ * size and eventually the last chunk. We have at least
+ * the size of the struct htx to write the chunk
+ * envelope. It should be enough.
+ */
+ if (h1m->flags & H1_MF_CHNK) {
+ h1_emit_chunk_size(&h1c->obuf, count);
+ h1_emit_chunk_crlf(&h1c->obuf);
+ if (last_data) {
+ /* Emit the last chunk too at the buffer's end */
+ b_putblk(&h1c->obuf, "0\r\n\r\n", 5);
+ }
+ }
+
+ if (h1m->state == H1_MSG_DATA)
+ TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "H1 request payload data xferred" : "H1 response payload data xferred"),
+ H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, 0, (size_t[]){count});
+ else
+ TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "H1 request tunneled data xferred" : "H1 response tunneled data xferred"),
+ H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, 0, (size_t[]){count});
+
+ total += count;
+ if (last_data) {
+ h1m->state = H1_MSG_DONE;
+ if (h1s->flags & H1S_F_RX_BLK) {
+ h1s->flags &= ~H1S_F_RX_BLK;
+ h1_wake_stream_for_recv(h1s);
+ TRACE_STATE("Re-enable input processing", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1s);
+ }
+
+ TRACE_USER((!(h1m->flags & H1_MF_RESP) ? "H1 request fully xferred" : "H1 response fully xferred"),
+ H1_EV_TX_DATA, h1c->conn, h1s);
+ }
+ goto out;
+ }
+ tmp.area = h1c->obuf.area + h1c->obuf.head;
+ }
+ else
+ tmp.area = trash.area;
+
+ tmp.data = 0;
+ tmp.size = b_room(&h1c->obuf);
+ while (count && !(h1s->flags & (H1S_F_PROCESSING_ERROR|H1S_F_TX_BLK)) && blk) {
+ struct htx_sl *sl;
+ struct ist n, v;
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t sz = htx_get_blksz(blk);
+ uint32_t vlen, chklen;
+
+ vlen = sz;
+ if (type != HTX_BLK_DATA && vlen > count)
+ goto full;
+
+ if (type == HTX_BLK_UNUSED)
+ goto nextblk;
+
+ switch (h1m->state) {
+ case H1_MSG_RQBEFORE:
+ if (type != HTX_BLK_REQ_SL)
+ goto error;
+ TRACE_USER("sending request headers", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s, chn_htx);
+ sl = htx_get_blk_ptr(chn_htx, blk);
+ h1s->meth = sl->info.req.meth;
+ h1_parse_req_vsn(h1m, sl);
+ if (!h1_format_htx_reqline(sl, &tmp))
+ goto full;
+ h1m->flags |= H1_MF_XFER_LEN;
+ if (sl->flags & HTX_SL_F_BODYLESS)
+ h1m->flags |= H1_MF_CLEN;
+ h1m->state = H1_MSG_HDR_FIRST;
+ if (h1s->meth == HTTP_METH_HEAD)
+ h1s->flags |= H1S_F_BODYLESS_RESP;
+ if (h1s->flags & H1S_F_RX_BLK) {
+ h1s->flags &= ~H1S_F_RX_BLK;
+ h1_wake_stream_for_recv(h1s);
+ TRACE_STATE("Re-enable input processing", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1s);
+ }
+ break;
+
+ case H1_MSG_RPBEFORE:
+ if (type != HTX_BLK_RES_SL)
+ goto error;
+ TRACE_USER("sending response headers", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s, chn_htx);
+ sl = htx_get_blk_ptr(chn_htx, blk);
+ h1s->status = sl->info.res.status;
+ h1_parse_res_vsn(h1m, sl);
+ if (!h1_format_htx_stline(sl, &tmp))
+ goto full;
+ if (sl->flags & HTX_SL_F_XFER_LEN)
+ h1m->flags |= H1_MF_XFER_LEN;
+ if (h1s->status < 200)
+ h1s->flags |= H1S_F_HAVE_O_CONN;
+ else if (h1s->status == 204 || h1s->status == 304)
+ h1s->flags |= H1S_F_BODYLESS_RESP;
+ h1m->state = H1_MSG_HDR_FIRST;
+ break;
+
+ case H1_MSG_HDR_FIRST:
+ case H1_MSG_HDR_NAME:
+ case H1_MSG_HDR_L2_LWS:
+ if (type == HTX_BLK_EOH)
+ goto last_lf;
+ if (type != HTX_BLK_HDR)
+ goto error;
+
+ h1m->state = H1_MSG_HDR_NAME;
+ n = htx_get_blk_name(chn_htx, blk);
+ v = htx_get_blk_value(chn_htx, blk);
+
+ /* Skip all pseudo-headers */
+ if (*(n.ptr) == ':')
+ goto skip_hdr;
+
+ if (isteq(n, ist("transfer-encoding"))) {
+ if ((h1m->flags & H1_MF_RESP) && (h1s->status < 200 || h1s->status == 204))
+ goto skip_hdr;
+ h1_parse_xfer_enc_header(h1m, v);
+ }
+ else if (isteq(n, ist("content-length"))) {
+ if ((h1m->flags & H1_MF_RESP) && (h1s->status < 200 || h1s->status == 204))
+ goto skip_hdr;
+ /* Only skip C-L header with invalid value. */
+ if (h1_parse_cont_len_header(h1m, &v) < 0)
+ goto skip_hdr;
+ }
+ else if (isteq(n, ist("connection"))) {
+ h1_parse_connection_header(h1m, &v);
+ if (!v.len)
+ goto skip_hdr;
+ }
+ else if (isteq(n, ist("upgrade"))) {
+ h1_parse_upgrade_header(h1m, v);
+ }
+ else if ((isteq(n, ist("sec-websocket-accept")) &&
+ h1m->flags & H1_MF_RESP) ||
+ (isteq(n, ist("sec-websocket-key")) &&
+ !(h1m->flags & H1_MF_RESP))) {
+ ws_key_found = 1;
+ }
+ else if (isteq(n, ist("te"))) {
+ /* "te" may only be sent with "trailers" if this value
+ * is present, otherwise it must be deleted.
+ */
+ v = istist(v, ist("trailers"));
+ if (!isttest(v) || (v.len > 8 && v.ptr[8] != ','))
+ goto skip_hdr;
+ v = ist("trailers");
+ }
+
+ /* Skip header if same name is used to add the server name */
+ if (!(h1m->flags & H1_MF_RESP) && isttest(h1c->px->server_id_hdr_name) &&
+ isteqi(n, h1c->px->server_id_hdr_name))
+ goto skip_hdr;
+
+ /* Try to adjust the case of the header name */
+ if (h1c->px->options2 & (PR_O2_H1_ADJ_BUGCLI|PR_O2_H1_ADJ_BUGSRV))
+ h1_adjust_case_outgoing_hdr(h1s, h1m, &n);
+ if (!h1_format_htx_hdr(n, v, &tmp))
+ goto full;
+ skip_hdr:
+ h1m->state = H1_MSG_HDR_L2_LWS;
+ break;
+
+ case H1_MSG_LAST_LF:
+ if (type != HTX_BLK_EOH)
+ goto error;
+ last_lf:
+ h1m->state = H1_MSG_LAST_LF;
+ if (!(h1s->flags & H1S_F_HAVE_O_CONN)) {
+ if ((chn_htx->flags & HTX_FL_PROXY_RESP) && h1s->req.state != H1_MSG_DONE) {
+ /* If the reply comes from haproxy while the request is
+ * not finished, we force the connection close. */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("force close mode (resp)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ else if ((h1m->flags & (H1_MF_XFER_ENC|H1_MF_CLEN)) == (H1_MF_XFER_ENC|H1_MF_CLEN)) {
+ /* T-E + C-L: force close */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("force close mode (T-E + C-L)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ else if ((h1m->flags & (H1_MF_VER_11|H1_MF_XFER_ENC)) == H1_MF_XFER_ENC) {
+ /* T-E + HTTP/1.0: force close */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("force close mode (T-E + HTTP/1.0)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+
+ /* the conn_mode must be processed. So do it */
+ n = ist("connection");
+ v = ist("");
+ h1_process_output_conn_mode(h1s, h1m, &v);
+ if (v.len) {
+ /* Try to adjust the case of the header name */
+ if (h1c->px->options2 & (PR_O2_H1_ADJ_BUGCLI|PR_O2_H1_ADJ_BUGSRV))
+ h1_adjust_case_outgoing_hdr(h1s, h1m, &n);
+ if (!h1_format_htx_hdr(n, v, &tmp))
+ goto full;
+ }
+ h1s->flags |= H1S_F_HAVE_O_CONN;
+ }
+
+ if ((h1s->meth != HTTP_METH_CONNECT &&
+ (h1m->flags & (H1_MF_VER_11|H1_MF_RESP|H1_MF_CLEN|H1_MF_CHNK|H1_MF_XFER_LEN)) ==
+ (H1_MF_VER_11|H1_MF_XFER_LEN)) ||
+ (h1s->status >= 200 && !(h1s->flags & H1S_F_BODYLESS_RESP) &&
+ !(h1s->meth == HTTP_METH_CONNECT && h1s->status >= 200 && h1s->status < 300) &&
+ (h1m->flags & (H1_MF_VER_11|H1_MF_RESP|H1_MF_CLEN|H1_MF_CHNK|H1_MF_XFER_LEN)) ==
+ (H1_MF_VER_11|H1_MF_RESP|H1_MF_XFER_LEN))) {
+ /* chunking needed but header not seen */
+ n = ist("transfer-encoding");
+ v = ist("chunked");
+ if (h1c->px->options2 & (PR_O2_H1_ADJ_BUGCLI|PR_O2_H1_ADJ_BUGSRV))
+ h1_adjust_case_outgoing_hdr(h1s, h1m, &n);
+ if (!h1_format_htx_hdr(n, v, &tmp))
+ goto full;
+ TRACE_STATE("add \"Transfer-Encoding: chunked\"", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s);
+ h1m->flags |= H1_MF_CHNK;
+ }
+
+ /* Now add the server name to a header (if requested) */
+ if (!(h1s->flags & H1S_F_HAVE_SRV_NAME) &&
+ !(h1m->flags & H1_MF_RESP) && isttest(h1c->px->server_id_hdr_name)) {
+ struct server *srv = objt_server(h1c->conn->target);
+
+ if (srv) {
+ n = h1c->px->server_id_hdr_name;
+ v = ist(srv->id);
+
+ /* Try to adjust the case of the header name */
+ if (h1c->px->options2 & (PR_O2_H1_ADJ_BUGCLI|PR_O2_H1_ADJ_BUGSRV))
+ h1_adjust_case_outgoing_hdr(h1s, h1m, &n);
+ if (!h1_format_htx_hdr(n, v, &tmp))
+ goto full;
+ }
+ TRACE_STATE("add server name header", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s);
+ h1s->flags |= H1S_F_HAVE_SRV_NAME;
+ }
+
+ /* Add websocket handshake key if needed */
+ if ((h1m->flags & (H1_MF_CONN_UPG|H1_MF_UPG_WEBSOCKET)) == (H1_MF_CONN_UPG|H1_MF_UPG_WEBSOCKET) &&
+ !ws_key_found) {
+ if (!(h1m->flags & H1_MF_RESP)) {
+ /* generate a random websocket key
+ * stored in the session to
+ * verify it on the response side
+ */
+ h1_generate_random_ws_input_key(h1s->ws_key);
+
+ if (!h1_format_htx_hdr(ist("Sec-Websocket-Key"),
+ ist(h1s->ws_key),
+ &tmp)) {
+ goto full;
+ }
+ }
+ else {
+ /* add the response header key */
+ char key[29];
+ h1_calculate_ws_output_key(h1s->ws_key, key);
+ if (!h1_format_htx_hdr(ist("Sec-Websocket-Accept"),
+ ist(key),
+ &tmp)) {
+ goto full;
+ }
+ }
+ }
+
+ TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "H1 request headers xferred" : "H1 response headers xferred"),
+ H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s);
+
+ if (!(h1m->flags & H1_MF_RESP) && h1s->meth == HTTP_METH_CONNECT) {
+ if (!chunk_memcat(&tmp, "\r\n", 2))
+ goto full;
+ goto done;
+ }
+ else if ((h1m->flags & H1_MF_RESP) &&
+ ((h1s->meth == HTTP_METH_CONNECT && h1s->status >= 200 && h1s->status < 300) || h1s->status == 101)) {
+ if (!chunk_memcat(&tmp, "\r\n", 2))
+ goto full;
+ goto done;
+ }
+ else if ((h1m->flags & H1_MF_RESP) &&
+ h1s->status < 200 && (h1s->status == 100 || h1s->status >= 102)) {
+ if (!chunk_memcat(&tmp, "\r\n", 2))
+ goto full;
+ h1m_init_res(&h1s->res);
+ h1m->flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
+ h1s->flags &= ~H1S_F_HAVE_O_CONN;
+ TRACE_STATE("1xx response xferred", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s);
+ }
+ else {
+ /* EOM flag is set or empty payload (C-L to 0) and it is the last block */
+ if (htx_is_unique_blk(chn_htx, blk) &&
+ ((chn_htx->flags & HTX_FL_EOM) || ((h1m->flags & H1_MF_CLEN) && !h1m->curr_len))) {
+ if ((h1m->flags & H1_MF_CHNK) && !(h1s->flags & H1S_F_BODYLESS_RESP)) {
+ if (!chunk_memcat(&tmp, "\r\n0\r\n\r\n", 7))
+ goto full;
+ }
+ else if (!chunk_memcat(&tmp, "\r\n", 2))
+ goto full;
+ goto done;
+ }
+ else if (!chunk_memcat(&tmp, "\r\n", 2))
+ goto full;
+ h1m->state = H1_MSG_DATA;
+ }
+ break;
+
+ case H1_MSG_DATA:
+ case H1_MSG_TUNNEL:
+ if (type == HTX_BLK_EOT || type == HTX_BLK_TLR) {
+ if ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))
+ goto trailers;
+
+ /* If the message is not chunked, never
+ * add the last chunk. */
+ if ((h1m->flags & H1_MF_CHNK) && !chunk_memcat(&tmp, "0\r\n", 3))
+ goto full;
+ TRACE_PROTO("sending message trailers", H1_EV_TX_DATA|H1_EV_TX_TLRS, h1c->conn, h1s, chn_htx);
+ goto trailers;
+ }
+ else if (type != HTX_BLK_DATA)
+ goto error;
+
+ TRACE_PROTO("sending message data", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, chn_htx, (size_t[]){sz});
+
+ /* It is the last block of this message. After this one,
+ * only tunneled data may be forwarded. */
+ if (h1m->state == H1_MSG_DATA && htx_is_unique_blk(chn_htx, blk) && (chn_htx->flags & HTX_FL_EOM)) {
+ TRACE_DEVEL("last message block", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s);
+ last_data = 1;
+ }
+
+ if (vlen > count) {
+ /* Get the maximum amount of data we can xferred */
+ vlen = count;
+ last_data = 0;
+ }
+
+ if (h1m->state == H1_MSG_DATA) {
+ if (h1m->flags & H1_MF_CLEN) {
+ if (vlen > h1m->curr_len) {
+ TRACE_ERROR("too much payload, more than announced",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto error;
+ }
+ }
+ if ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP)) {
+ TRACE_PROTO("Skip data for bodyless response", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, chn_htx);
+ goto skip_data;
+ }
+ }
+
+ chklen = 0;
+ if (h1m->flags & H1_MF_CHNK) {
+ chklen = b_room(&tmp);
+ chklen = ((chklen < 16) ? 1 : (chklen < 256) ? 2 :
+ (chklen < 4096) ? 3 : (chklen < 65536) ? 4 :
+ (chklen < 1048576) ? 5 : 8);
+ chklen += 4; /* 2 x CRLF */
+
+ /* If it is the end of the chunked message (without EOT), reserve the
+ * last chunk size */
+ if (last_data)
+ chklen += 5;
+ }
+
+ if (vlen + chklen > b_room(&tmp)) {
+ /* too large for the buffer */
+ if (chklen >= b_room(&tmp))
+ goto full;
+ vlen = b_room(&tmp) - chklen;
+ last_data = 0;
+ }
+ v = htx_get_blk_value(chn_htx, blk);
+ v.len = vlen;
+ if (!h1_format_htx_data(v, &tmp, !!(h1m->flags & H1_MF_CHNK)))
+ goto full;
+
+ /* Space already reserved, so it must succeed */
+ if ((h1m->flags & H1_MF_CHNK) && last_data && !chunk_memcat(&tmp, "0\r\n\r\n", 5))
+ goto error;
+
+ if (h1m->state == H1_MSG_DATA)
+ TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "H1 request payload data xferred" : "H1 response payload data xferred"),
+ H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, 0, (size_t[]){v.len});
+ else
+ TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "H1 request tunneled data xferred" : "H1 response tunneled data xferred"),
+ H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, 0, (size_t[]){v.len});
+
+ skip_data:
+ if (h1m->state == H1_MSG_DATA && (h1m->flags & H1_MF_CLEN)) {
+ h1m->curr_len -= vlen;
+ if (!h1m->curr_len)
+ last_data = 1;
+ }
+ if (last_data)
+ goto done;
+ break;
+
+ case H1_MSG_TRAILERS:
+ if (type != HTX_BLK_TLR && type != HTX_BLK_EOT)
+ goto error;
+ trailers:
+ h1m->state = H1_MSG_TRAILERS;
+
+ if (!(h1m->flags & H1_MF_CHNK))
+ goto done;
+
+ if ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP)) {
+ TRACE_PROTO("Skip trailers for bodyless response", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, chn_htx);
+ if (type == HTX_BLK_EOT)
+ goto done;
+ break;
+ }
+
+ if (type == HTX_BLK_EOT) {
+ if (!chunk_memcat(&tmp, "\r\n", 2))
+ goto full;
+ TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "H1 request trailers xferred" : "H1 response trailers xferred"),
+ H1_EV_TX_DATA|H1_EV_TX_TLRS, h1c->conn, h1s);
+ goto done;
+ }
+ else { // HTX_BLK_TLR
+ n = htx_get_blk_name(chn_htx, blk);
+ v = htx_get_blk_value(chn_htx, blk);
+
+ /* Try to adjust the case of the header name */
+ if (h1c->px->options2 & (PR_O2_H1_ADJ_BUGCLI|PR_O2_H1_ADJ_BUGSRV))
+ h1_adjust_case_outgoing_hdr(h1s, h1m, &n);
+ if (!h1_format_htx_hdr(n, v, &tmp))
+ goto full;
+ }
+ break;
+
+ case H1_MSG_DONE:
+ /* If the message is not chunked, ignore
+ * trailers. It may happen with H2 messages. */
+ if ((type == HTX_BLK_TLR || type == HTX_BLK_EOT) && !(h1m->flags & H1_MF_CHNK))
+ break;
+
+ TRACE_STATE("unexpected data xferred in done state", H1_EV_TX_DATA|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto error; /* For now return an error */
+
+ done:
+ h1m->state = H1_MSG_DONE;
+ if (!(h1m->flags & H1_MF_RESP) && h1s->meth == HTTP_METH_CONNECT) {
+ h1s->flags |= H1S_F_TX_BLK;
+ TRACE_STATE("Disable output processing", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ }
+ else if ((h1m->flags & H1_MF_RESP) &&
+ ((h1s->meth == HTTP_METH_CONNECT && h1s->status >= 200 && h1s->status < 300) || h1s->status == 101)) {
+ /* a successful reply to a CONNECT or a protocol switching is sent
+ * to the client. Switch the response to tunnel mode.
+ */
+ h1_set_tunnel_mode(h1s);
+ TRACE_STATE("switch H1 response in tunnel mode", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s);
+ }
+
+ if (h1s->flags & H1S_F_RX_BLK) {
+ h1s->flags &= ~H1S_F_RX_BLK;
+ h1_wake_stream_for_recv(h1s);
+ TRACE_STATE("Re-enable input processing", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1s);
+ }
+
+ TRACE_USER((!(h1m->flags & H1_MF_RESP) ? "H1 request fully xferred" : "H1 response fully xferred"),
+ H1_EV_TX_DATA, h1c->conn, h1s);
+ break;
+
+ default:
+ error:
+ /* Unexpected error during output processing */
+ chn_htx->flags |= HTX_FL_PROCESSING_ERROR;
+ h1s->flags |= H1S_F_PROCESSING_ERROR;
+ h1c->flags |= H1C_F_ST_ERROR;
+ TRACE_ERROR("processing output error, set error on h1c/h1s",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto end;
+ }
+
+ nextblk:
+ total += vlen;
+ count -= vlen;
+ if (sz == vlen)
+ blk = htx_remove_blk(chn_htx, blk);
+ else {
+ htx_cut_data_blk(chn_htx, blk, vlen);
+ break;
+ }
+ }
+
+ copy:
+ /* when the output buffer is empty, tmp shares the same area so that we
+ * only have to update pointers and lengths.
+ */
+ if (tmp.area == h1c->obuf.area + h1c->obuf.head)
+ h1c->obuf.data = tmp.data;
+ else
+ b_putblk(&h1c->obuf, tmp.area, tmp.data);
+
+ htx_to_buf(chn_htx, buf);
+ out:
+ if (!buf_room_for_htx_data(&h1c->obuf)) {
+ TRACE_STATE("h1c obuf full", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ h1c->flags |= H1C_F_OUT_FULL;
+ }
+ end:
+ /* Both the request and the response reached the DONE state. So set EOI
+ * flag on the stream connector. Most of time, the flag will already be set,
+ * except for protocol upgrades. Report an error if data remains blocked
+ * in the output buffer.
+ */
+ if (h1s->req.state == H1_MSG_DONE && h1s->res.state == H1_MSG_DONE) {
+ if (!htx_is_empty(chn_htx)) {
+ h1c->flags |= H1C_F_ST_ERROR;
+ TRACE_ERROR("txn done but data waiting to be sent, set error on h1c", H1_EV_H1C_ERR, h1c->conn, h1s);
+ }
+ se_fl_set(h1s->sd, SE_FL_EOI);
+ }
+
+ TRACE_LEAVE(H1_EV_TX_DATA, h1c->conn, h1s, chn_htx, (size_t[]){total});
+ return total;
+
+ full:
+ TRACE_STATE("h1c obuf full", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ h1c->flags |= H1C_F_OUT_FULL;
+ goto copy;
+}
+
+/*********************************************************/
+/* functions below are I/O callbacks from the connection */
+/*********************************************************/
+static void h1_wake_stream_for_recv(struct h1s *h1s)
+{
+ if (h1s && h1s->subs && h1s->subs->events & SUB_RETRY_RECV) {
+ TRACE_POINT(H1_EV_STRM_WAKE, h1s->h1c->conn, h1s);
+ tasklet_wakeup(h1s->subs->tasklet);
+ h1s->subs->events &= ~SUB_RETRY_RECV;
+ if (!h1s->subs->events)
+ h1s->subs = NULL;
+ }
+}
+static void h1_wake_stream_for_send(struct h1s *h1s)
+{
+ if (h1s && h1s->subs && h1s->subs->events & SUB_RETRY_SEND) {
+ TRACE_POINT(H1_EV_STRM_WAKE, h1s->h1c->conn, h1s);
+ tasklet_wakeup(h1s->subs->tasklet);
+ h1s->subs->events &= ~SUB_RETRY_SEND;
+ if (!h1s->subs->events)
+ h1s->subs = NULL;
+ }
+}
+
+/* alerts the data layer following this sequence :
+ * - if the h1s' data layer is subscribed to recv, then it's woken up for recv
+ * - if its subscribed to send, then it's woken up for send
+ * - if it was subscribed to neither, its ->wake() callback is called
+ */
+static void h1_alert(struct h1s *h1s)
+{
+ if (h1s->subs) {
+ h1_wake_stream_for_recv(h1s);
+ h1_wake_stream_for_send(h1s);
+ }
+ else if (h1s_sc(h1s) && h1s_sc(h1s)->app_ops->wake != NULL) {
+ TRACE_POINT(H1_EV_STRM_WAKE, h1s->h1c->conn, h1s);
+ h1s_sc(h1s)->app_ops->wake(h1s_sc(h1s));
+ }
+}
+
+/* Try to send an HTTP error with h1c->errcode status code. It returns 1 on success
+ * and 0 on error. The flag H1C_F_ERR_PENDING is set on the H1 connection for
+ * retryable errors (allocation error or buffer full). On success, the error is
+ * copied in the output buffer.
+*/
+static int h1_send_error(struct h1c *h1c)
+{
+ int rc = http_get_status_idx(h1c->errcode);
+ int ret = 0;
+
+ TRACE_ENTER(H1_EV_H1C_ERR, h1c->conn, 0, 0, (size_t[]){h1c->errcode});
+
+ /* Verify if the error is mapped on /dev/null or any empty file */
+ /// XXX: do a function !
+ if (h1c->px->replies[rc] &&
+ h1c->px->replies[rc]->type == HTTP_REPLY_ERRMSG &&
+ h1c->px->replies[rc]->body.errmsg &&
+ b_is_null(h1c->px->replies[rc]->body.errmsg)) {
+ /* Empty error, so claim a success */
+ ret = 1;
+ goto out;
+ }
+
+ if (h1c->flags & (H1C_F_OUT_ALLOC|H1C_F_OUT_FULL)) {
+ h1c->flags |= H1C_F_ERR_PENDING;
+ goto out;
+ }
+
+ if (!h1_get_buf(h1c, &h1c->obuf)) {
+ h1c->flags |= (H1C_F_OUT_ALLOC|H1C_F_ERR_PENDING);
+ TRACE_STATE("waiting for h1c obuf allocation", H1_EV_H1C_ERR|H1_EV_H1C_BLK, h1c->conn);
+ goto out;
+ }
+ ret = b_istput(&h1c->obuf, ist(http_err_msgs[rc]));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ h1c->flags |= (H1C_F_OUT_FULL|H1C_F_ERR_PENDING);
+ TRACE_STATE("h1c obuf full", H1_EV_H1C_ERR|H1_EV_H1C_BLK, h1c->conn);
+ goto out;
+ }
+ else {
+ /* we cannot report this error, so claim a success */
+ ret = 1;
+ }
+ }
+ h1c->flags &= ~H1C_F_ERR_PENDING;
+ out:
+ TRACE_LEAVE(H1_EV_H1C_ERR, h1c->conn);
+ return ret;
+}
+
+/* Try to send a 500 internal error. It relies on h1_send_error to send the
+ * error. This function takes care of incrementing stats and tracked counters.
+ */
+static int h1_handle_internal_err(struct h1c *h1c)
+{
+ struct session *sess = h1c->conn->owner;
+ int ret = 1;
+
+ session_inc_http_req_ctr(sess);
+ proxy_inc_fe_req_ctr(sess->listener, sess->fe);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.p.http.rsp[5]);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+
+ h1c->errcode = 500;
+ ret = h1_send_error(h1c);
+ sess_log(sess);
+ return ret;
+}
+
+/* Try to send an error because of a parsing error. By default a 400 bad request
+ * error is returned. But the status code may be specified by setting
+ * h1c->errcode. It relies on h1_send_error to send the error. This function
+ * takes care of incrementing stats and tracked counters.
+ */
+static int h1_handle_parsing_error(struct h1c *h1c)
+{
+ struct session *sess = h1c->conn->owner;
+ int ret = 1;
+
+ if (!b_data(&h1c->ibuf) && ((h1c->flags & H1C_F_WAIT_NEXT_REQ) || (sess->fe->options & PR_O_IGNORE_PRB)))
+ goto end;
+
+ session_inc_http_req_ctr(sess);
+ session_inc_http_err_ctr(sess);
+ proxy_inc_fe_req_ctr(sess->listener, sess->fe);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.p.http.rsp[4]);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+
+ if (!h1c->errcode)
+ h1c->errcode = 400;
+ ret = h1_send_error(h1c);
+ if (b_data(&h1c->ibuf) || !(sess->fe->options & PR_O_NULLNOLOG))
+ sess_log(sess);
+
+ end:
+ return ret;
+}
+
+/* Try to send a 501 not implemented error. It relies on h1_send_error to send
+ * the error. This function takes care of incrementing stats and tracked
+ * counters.
+ */
+static int h1_handle_not_impl_err(struct h1c *h1c)
+{
+ struct session *sess = h1c->conn->owner;
+ int ret = 1;
+
+ if (!b_data(&h1c->ibuf) && ((h1c->flags & H1C_F_WAIT_NEXT_REQ) || (sess->fe->options & PR_O_IGNORE_PRB)))
+ goto end;
+
+ session_inc_http_req_ctr(sess);
+ proxy_inc_fe_req_ctr(sess->listener, sess->fe);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.p.http.rsp[4]);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+
+ h1c->errcode = 501;
+ ret = h1_send_error(h1c);
+ if (b_data(&h1c->ibuf) || !(sess->fe->options & PR_O_NULLNOLOG))
+ sess_log(sess);
+
+ end:
+ return ret;
+}
+
+/* Try to send a 408 timeout error. It relies on h1_send_error to send the
+ * error. This function takes care of incrementing stats and tracked counters.
+ */
+static int h1_handle_req_tout(struct h1c *h1c)
+{
+ struct session *sess = h1c->conn->owner;
+ int ret = 1;
+
+ if (!b_data(&h1c->ibuf) && ((h1c->flags & H1C_F_WAIT_NEXT_REQ) || (sess->fe->options & PR_O_IGNORE_PRB)))
+ goto end;
+
+ session_inc_http_req_ctr(sess);
+ proxy_inc_fe_req_ctr(sess->listener, sess->fe);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.p.http.rsp[4]);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+
+ h1c->errcode = 408;
+ ret = h1_send_error(h1c);
+ if (b_data(&h1c->ibuf) || !(sess->fe->options & PR_O_NULLNOLOG))
+ sess_log(sess);
+
+ end:
+ return ret;
+}
+
+
+/*
+ * Attempt to read data, and subscribe if none available
+ */
+static int h1_recv(struct h1c *h1c)
+{
+ struct connection *conn = h1c->conn;
+ size_t ret = 0, max;
+ int flags = 0;
+
+ TRACE_ENTER(H1_EV_H1C_RECV, h1c->conn);
+
+ if (h1c->wait_event.events & SUB_RETRY_RECV) {
+ TRACE_DEVEL("leaving on sub_recv", H1_EV_H1C_RECV, h1c->conn);
+ return (b_data(&h1c->ibuf));
+ }
+
+ if ((h1c->flags & H1C_F_WANT_SPLICE) || !h1_recv_allowed(h1c)) {
+ TRACE_DEVEL("leaving on (want_splice|!recv_allowed)", H1_EV_H1C_RECV, h1c->conn);
+ return 1;
+ }
+
+ if (!h1_get_buf(h1c, &h1c->ibuf)) {
+ h1c->flags |= H1C_F_IN_ALLOC;
+ TRACE_STATE("waiting for h1c ibuf allocation", H1_EV_H1C_RECV|H1_EV_H1C_BLK, h1c->conn);
+ return 0;
+ }
+
+ /*
+ * If we only have a small amount of data, realign it,
+ * it's probably cheaper than doing 2 recv() calls.
+ */
+ if (b_data(&h1c->ibuf) > 0 && b_data(&h1c->ibuf) < 128)
+ b_slow_realign_ofs(&h1c->ibuf, trash.area, sizeof(struct htx));
+
+ max = buf_room_for_htx_data(&h1c->ibuf);
+
+ /* avoid useless reads after first responses */
+ if (!h1c->h1s ||
+ (!(h1c->flags & H1C_F_IS_BACK) && h1c->h1s->req.state == H1_MSG_RQBEFORE) ||
+ ((h1c->flags & H1C_F_IS_BACK) && h1c->h1s->res.state == H1_MSG_RPBEFORE)) {
+ flags |= CO_RFL_READ_ONCE;
+
+ /* we know that the first read will be constrained to a smaller
+ * read by the stream layer in order to respect the reserve.
+ * Reading too much will result in global.tune.maxrewrite being
+ * left at the end of the buffer, and in a very small read
+ * being performed again to complete them (typically 16 bytes
+ * freed in the index after headers were consumed) before
+ * another larger read. Instead, given that we know we're
+ * waiting for a header and we'll be limited, let's perform a
+ * shorter first read that the upper layer can retrieve by just
+ * a pointer swap and the next read will be doable at once in
+ * an empty buffer.
+ */
+ if (max > global.tune.bufsize - global.tune.maxrewrite)
+ max = global.tune.bufsize - global.tune.maxrewrite;
+ }
+
+ if (max) {
+ if (h1c->flags & H1C_F_IN_FULL) {
+ h1c->flags &= ~H1C_F_IN_FULL;
+ TRACE_STATE("h1c ibuf not full anymore", H1_EV_H1C_RECV|H1_EV_H1C_BLK);
+ }
+
+ if (!b_data(&h1c->ibuf)) {
+ /* try to pre-align the buffer like the rxbufs will be
+ * to optimize memory copies.
+ */
+ h1c->ibuf.head = sizeof(struct htx);
+ }
+ ret = conn->xprt->rcv_buf(conn, conn->xprt_ctx, &h1c->ibuf, max, flags);
+ HA_ATOMIC_ADD(&h1c->px_counters->bytes_in, ret);
+ }
+ if (max && !ret && h1_recv_allowed(h1c)) {
+ TRACE_STATE("failed to receive data, subscribing", H1_EV_H1C_RECV, h1c->conn);
+ conn->xprt->subscribe(conn, conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+ else {
+ h1_wake_stream_for_recv(h1c->h1s);
+ TRACE_DATA("data received", H1_EV_H1C_RECV, h1c->conn, 0, 0, (size_t[]){ret});
+ }
+
+ if (!b_data(&h1c->ibuf))
+ h1_release_buf(h1c, &h1c->ibuf);
+ else if (!buf_room_for_htx_data(&h1c->ibuf)) {
+ h1c->flags |= H1C_F_IN_FULL;
+ TRACE_STATE("h1c ibuf full", H1_EV_H1C_RECV|H1_EV_H1C_BLK);
+ }
+
+ TRACE_LEAVE(H1_EV_H1C_RECV, h1c->conn);
+ return !!ret || (conn->flags & CO_FL_ERROR) || conn_xprt_read0_pending(conn);
+}
+
+
+/*
+ * Try to send data if possible
+ */
+static int h1_send(struct h1c *h1c)
+{
+ struct connection *conn = h1c->conn;
+ unsigned int flags = 0;
+ size_t ret;
+ int sent = 0;
+
+ TRACE_ENTER(H1_EV_H1C_SEND, h1c->conn);
+
+ if (conn->flags & CO_FL_ERROR) {
+ TRACE_DEVEL("leaving on connection error", H1_EV_H1C_SEND, h1c->conn);
+ b_reset(&h1c->obuf);
+ return 1;
+ }
+
+ if (!b_data(&h1c->obuf))
+ goto end;
+
+ if (h1c->flags & H1C_F_CO_MSG_MORE)
+ flags |= CO_SFL_MSG_MORE;
+ if (h1c->flags & H1C_F_CO_STREAMER)
+ flags |= CO_SFL_STREAMER;
+
+ ret = conn->xprt->snd_buf(conn, conn->xprt_ctx, &h1c->obuf, b_data(&h1c->obuf), flags);
+ if (ret > 0) {
+ TRACE_DATA("data sent", H1_EV_H1C_SEND, h1c->conn, 0, 0, (size_t[]){ret});
+ if (h1c->flags & H1C_F_OUT_FULL) {
+ h1c->flags &= ~H1C_F_OUT_FULL;
+ TRACE_STATE("h1c obuf not full anymore", H1_EV_STRM_SEND|H1_EV_H1S_BLK, h1c->conn);
+ }
+ HA_ATOMIC_ADD(&h1c->px_counters->bytes_out, ret);
+ b_del(&h1c->obuf, ret);
+ sent = 1;
+ }
+
+ if (conn->flags & (CO_FL_ERROR|CO_FL_SOCK_WR_SH)) {
+ TRACE_DEVEL("connection error or output closed", H1_EV_H1C_SEND, h1c->conn);
+ /* error or output closed, nothing to send, clear the buffer to release it */
+ b_reset(&h1c->obuf);
+ }
+
+ end:
+ if (!(h1c->flags & (H1C_F_OUT_FULL|H1C_F_OUT_ALLOC)))
+ h1_wake_stream_for_send(h1c->h1s);
+
+ /* We're done, no more to send */
+ if (!b_data(&h1c->obuf)) {
+ TRACE_DEVEL("leaving with everything sent", H1_EV_H1C_SEND, h1c->conn);
+ h1_release_buf(h1c, &h1c->obuf);
+ if (h1c->flags & H1C_F_ST_SHUTDOWN) {
+ TRACE_STATE("process pending shutdown for writes", H1_EV_H1C_SEND, h1c->conn);
+ h1_shutw_conn(conn);
+ }
+ }
+ else if (!(h1c->wait_event.events & SUB_RETRY_SEND)) {
+ TRACE_STATE("more data to send, subscribing", H1_EV_H1C_SEND, h1c->conn);
+ conn->xprt->subscribe(conn, conn->xprt_ctx, SUB_RETRY_SEND, &h1c->wait_event);
+ }
+
+ TRACE_LEAVE(H1_EV_H1C_SEND, h1c->conn);
+ return sent;
+}
+
+/* callback called on any event by the connection handler.
+ * It applies changes and returns zero, or < 0 if it wants immediate
+ * destruction of the connection.
+ */
+static int h1_process(struct h1c * h1c)
+{
+ struct connection *conn = h1c->conn;
+ struct h1s *h1s = h1c->h1s;
+
+ TRACE_ENTER(H1_EV_H1C_WAKE, conn);
+
+ /* Try to parse now the first block of a request, creating the H1 stream if necessary */
+ if (b_data(&h1c->ibuf) && /* Input data to be processed */
+ (h1c->flags & H1C_F_ST_ALIVE) && !(h1c->flags & H1C_F_ST_READY) && /* ST_IDLE/ST_EMBRYONIC or ST_ATTACH but not ST_READY */
+ !(h1c->flags & (H1C_F_IN_SALLOC|H1C_F_ST_ERROR))) { /* No allocation failure on the stream rxbuf and no ERROR on the H1C */
+ struct buffer *buf;
+ size_t count;
+
+ /* When it happens for a backend connection, we may release it (it is probably a 408) */
+ if (h1c->flags & H1C_F_IS_BACK)
+ goto release;
+
+ /* First of all handle H1 to H2 upgrade (no need to create the H1 stream) */
+ if (!(h1c->flags & H1C_F_WAIT_NEXT_REQ) && /* First request */
+ !(h1c->px->options2 & PR_O2_NO_H2_UPGRADE) && /* H2 upgrade supported by the proxy */
+ !(conn->mux->flags & MX_FL_NO_UPG)) { /* the current mux supports upgrades */
+ /* Try to match H2 preface before parsing the request headers. */
+ if (b_isteq(&h1c->ibuf, 0, b_data(&h1c->ibuf), ist(H2_CONN_PREFACE)) > 0) {
+ h1c->flags |= H1C_F_UPG_H2C;
+ if (h1c->flags & H1C_F_ST_ATTACHED) {
+ /* Force the REOS here to be sure to release the SC.
+ Here ATTACHED implies !READY, and h1s defined
+ */
+ BUG_ON(!h1s || (h1c->flags & H1C_F_ST_READY));
+ h1s->flags |= H1S_F_REOS;
+ }
+ TRACE_STATE("release h1c to perform H2 upgrade ", H1_EV_RX_DATA|H1_EV_H1C_WAKE);
+ goto release;
+ }
+ }
+
+ /* Create the H1 stream if not already there */
+ if (!h1s) {
+ h1s = h1c_frt_stream_new(h1c, NULL, h1c->conn->owner);
+ if (!h1s) {
+ b_reset(&h1c->ibuf);
+ h1c->flags = (h1c->flags & ~(H1C_F_ST_IDLE|H1C_F_WAIT_NEXT_REQ)) | H1C_F_ST_ERROR;
+ goto no_parsing;
+ }
+ }
+
+ if (h1s->sess->t_idle == -1)
+ h1s->sess->t_idle = tv_ms_elapsed(&h1s->sess->tv_accept, &now) - h1s->sess->t_handshake;
+
+ /* Get the stream rxbuf */
+ buf = h1_get_buf(h1c, &h1s->rxbuf);
+ if (!buf) {
+ h1c->flags |= H1C_F_IN_SALLOC;
+ TRACE_STATE("waiting for stream rxbuf allocation", H1_EV_H1C_WAKE|H1_EV_H1C_BLK, h1c->conn);
+ return 0;
+ }
+
+ count = (buf->size - sizeof(struct htx) - global.tune.maxrewrite);
+ h1_process_demux(h1c, buf, count);
+ h1_release_buf(h1c, &h1s->rxbuf);
+ h1_set_idle_expiration(h1c);
+
+ no_parsing:
+ if (h1c->flags & H1C_F_ST_ERROR) {
+ h1_handle_internal_err(h1c);
+ h1c->flags &= ~(H1C_F_ST_IDLE|H1C_F_WAIT_NEXT_REQ);
+ TRACE_ERROR("internal error detected", H1_EV_H1C_WAKE|H1_EV_H1C_ERR);
+ }
+ else if (h1s->flags & H1S_F_PARSING_ERROR) {
+ h1_handle_parsing_error(h1c);
+ h1c->flags = (h1c->flags & ~(H1C_F_ST_IDLE|H1C_F_WAIT_NEXT_REQ)) | H1C_F_ST_ERROR;
+ TRACE_ERROR("parsing error detected", H1_EV_H1C_WAKE|H1_EV_H1C_ERR);
+ }
+ else if (h1s->flags & H1S_F_NOT_IMPL_ERROR) {
+ h1_handle_not_impl_err(h1c);
+ h1c->flags = (h1c->flags & ~(H1C_F_ST_IDLE|H1C_F_WAIT_NEXT_REQ)) | H1C_F_ST_ERROR;
+ TRACE_ERROR("not-implemented error detected", H1_EV_H1C_WAKE|H1_EV_H1C_ERR);
+ }
+ }
+ h1_send(h1c);
+
+ /* H1 connection must be released ASAP if:
+ * - an error occurred on the connection or the H1C or
+ * - a read0 was received or
+ * - a silent shutdown was emitted and all outgoing data sent
+ */
+ if ((conn->flags & CO_FL_ERROR) ||
+ conn_xprt_read0_pending(conn) ||
+ (h1c->flags & H1C_F_ST_ERROR) ||
+ ((h1c->flags & H1C_F_ST_SILENT_SHUT) && !b_data(&h1c->obuf))) {
+ if (!(h1c->flags & H1C_F_ST_READY)) {
+ /* No stream connector or not ready */
+ /* shutdown for reads and error on the frontend connection: Send an error */
+ if (!(h1c->flags & (H1C_F_IS_BACK|H1C_F_ST_ERROR|H1C_F_ST_SHUTDOWN))) {
+ if (h1_handle_parsing_error(h1c))
+ h1_send(h1c);
+ h1c->flags = (h1c->flags & ~(H1C_F_ST_IDLE|H1C_F_WAIT_NEXT_REQ)) | H1C_F_ST_ERROR;
+ }
+
+ /* Handle pending error, if any (only possible on frontend connection) */
+ if (h1c->flags & H1C_F_ERR_PENDING) {
+ BUG_ON(h1c->flags & H1C_F_IS_BACK);
+ if (h1_send_error(h1c))
+ h1_send(h1c);
+ }
+
+ /* If there is some pending outgoing data or error, just wait */
+ if (b_data(&h1c->obuf) || (h1c->flags & H1C_F_ERR_PENDING))
+ goto end;
+
+ /* Otherwise we can release the H1 connection */
+ goto release;
+ }
+ else {
+ /* Here there is still a H1 stream with a stream connector.
+ * Report the connection state at the stream level
+ */
+ if (conn_xprt_read0_pending(conn)) {
+ h1s->flags |= H1S_F_REOS;
+ TRACE_STATE("read0 on connection", H1_EV_H1C_RECV, conn, h1s);
+ }
+ if ((h1c->flags & H1C_F_ST_ERROR) || ((conn->flags & CO_FL_ERROR) &&
+ (se_fl_test(h1s->sd, SE_FL_EOI | SE_FL_EOS) || !b_data(&h1c->ibuf))))
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_POINT(H1_EV_STRM_WAKE, h1c->conn, h1s);
+ h1_alert(h1s);
+ }
+ }
+
+ if (!b_data(&h1c->ibuf))
+ h1_release_buf(h1c, &h1c->ibuf);
+
+ /* Check if a soft-stop is in progress.
+ * Release idling front connection if this is the case.
+ */
+ if (!(h1c->flags & H1C_F_IS_BACK)) {
+ if (unlikely(h1c->px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ if (!(h1c->px->options & PR_O_IDLE_CLOSE_RESP) &&
+ h1c->flags & H1C_F_WAIT_NEXT_REQ) {
+
+ int send_close = 1;
+ /* If a close-spread-time option is set, we want to avoid
+ * closing all the active HTTP2 connections at once so we add a
+ * random factor that will spread the closing.
+ */
+ if (tick_isset(global.close_spread_end)) {
+ int remaining_window = tick_remain(now_ms, global.close_spread_end);
+ if (remaining_window) {
+ /* This should increase the closing rate the
+ * further along the window we are.
+ */
+ send_close = (remaining_window <= statistical_prng_range(global.close_spread_time));
+ }
+ }
+ else if (global.tune.options & GTUNE_DISABLE_ACTIVE_CLOSE)
+ send_close = 0; /* let the client close his connection himself */
+ if (send_close)
+ goto release;
+ }
+ }
+ }
+
+ if ((h1c->flags & H1C_F_WANT_SPLICE) && !h1s_data_pending(h1s)) {
+ TRACE_DEVEL("xprt rcv_buf blocked (want_splice), notify h1s for recv", H1_EV_H1C_RECV, h1c->conn);
+ h1_wake_stream_for_recv(h1s);
+ }
+
+ end:
+ h1_refresh_timeout(h1c);
+ TRACE_LEAVE(H1_EV_H1C_WAKE, conn);
+ return 0;
+
+ release:
+ if (h1c->flags & H1C_F_ST_ATTACHED) {
+ /* Don't release the H1 connection right now, we must destroy the
+ * attached SC first. Here, the H1C must not be READY */
+ BUG_ON(!h1s || h1c->flags & H1C_F_ST_READY);
+
+ if (conn_xprt_read0_pending(conn) || (h1s->flags & H1S_F_REOS))
+ se_fl_set(h1s->sd, SE_FL_EOS);
+ if ((h1c->flags & H1C_F_ST_ERROR) || (conn->flags & CO_FL_ERROR))
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ h1_alert(h1s);
+ TRACE_DEVEL("waiting to release the SC before releasing the connection", H1_EV_H1C_WAKE);
+ }
+ else {
+ h1_release(h1c);
+ TRACE_DEVEL("leaving after releasing the connection", H1_EV_H1C_WAKE);
+ }
+ return -1;
+}
+
+struct task *h1_io_cb(struct task *t, void *ctx, unsigned int state)
+{
+ struct connection *conn;
+ struct tasklet *tl = (struct tasklet *)t;
+ int conn_in_list;
+ struct h1c *h1c = ctx;
+ int ret = 0;
+
+ if (state & TASK_F_USR1) {
+ /* the tasklet was idling on an idle connection, it might have
+ * been stolen, let's be careful!
+ */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ if (tl->context == NULL) {
+ /* The connection has been taken over by another thread,
+ * we're no longer responsible for it, so just free the
+ * tasklet, and do nothing.
+ */
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ tasklet_free(tl);
+ return NULL;
+ }
+ conn = h1c->conn;
+ TRACE_POINT(H1_EV_H1C_WAKE, conn);
+
+ /* Remove the connection from the list, to be sure nobody attempts
+ * to use it while we handle the I/O events
+ */
+ conn_in_list = conn_get_idle_flag(conn);
+ if (conn_in_list)
+ conn_delete_from_tree(&conn->hash_node->node);
+
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ } else {
+ /* we're certain the connection was not in an idle list */
+ conn = h1c->conn;
+ TRACE_ENTER(H1_EV_H1C_WAKE, conn);
+ conn_in_list = 0;
+ }
+
+ if (!(h1c->wait_event.events & SUB_RETRY_SEND))
+ ret = h1_send(h1c);
+ if (!(h1c->wait_event.events & SUB_RETRY_RECV))
+ ret |= h1_recv(h1c);
+ if (ret || b_data(&h1c->ibuf))
+ ret = h1_process(h1c);
+
+ /* If we were in an idle list, we want to add it back into it,
+ * unless h1_process() returned -1, which mean it has destroyed
+ * the connection (testing !ret is enough, if h1_process() wasn't
+ * called then ret will be 0 anyway.
+ */
+ if (ret < 0)
+ t = NULL;
+
+ if (!ret && conn_in_list) {
+ struct server *srv = objt_server(conn->target);
+
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ if (conn_in_list == CO_FL_SAFE_LIST)
+ eb64_insert(&srv->per_thr[tid].safe_conns, &conn->hash_node->node);
+ else
+ eb64_insert(&srv->per_thr[tid].idle_conns, &conn->hash_node->node);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+ return t;
+}
+
+static int h1_wake(struct connection *conn)
+{
+ struct h1c *h1c = conn->ctx;
+ int ret;
+
+ TRACE_POINT(H1_EV_H1C_WAKE, conn);
+
+ h1_send(h1c);
+ ret = h1_process(h1c);
+ if (ret == 0) {
+ struct h1s *h1s = h1c->h1s;
+
+ if (h1c->flags & H1C_F_ST_ATTACHED)
+ h1_alert(h1s);
+ }
+ return ret;
+}
+
+/* Connection timeout management. The principle is that if there's no receipt
+ * nor sending for a certain amount of time, the connection is closed.
+ */
+struct task *h1_timeout_task(struct task *t, void *context, unsigned int state)
+{
+ struct h1c *h1c = context;
+ int expired = tick_is_expired(t->expire, now_ms);
+
+ TRACE_ENTER(H1_EV_H1C_WAKE, h1c ? h1c->conn : NULL);
+
+ if (h1c) {
+ /* Make sure nobody stole the connection from us */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+
+ /* Somebody already stole the connection from us, so we should not
+ * free it, we just have to free the task.
+ */
+ if (!t->context) {
+ h1c = NULL;
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ goto do_leave;
+ }
+
+ if (!expired) {
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ TRACE_DEVEL("leaving (not expired)", H1_EV_H1C_WAKE, h1c->conn, h1c->h1s);
+ return t;
+ }
+
+ /* If a stream connector is still attached and ready to the mux, wait for the
+ * stream's timeout
+ */
+ if (h1c->flags & H1C_F_ST_READY) {
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ t->expire = TICK_ETERNITY;
+ TRACE_DEVEL("leaving (SC still attached)", H1_EV_H1C_WAKE, h1c->conn, h1c->h1s);
+ return t;
+ }
+
+ /* Try to send an error to the client */
+ if (!(h1c->flags & (H1C_F_IS_BACK|H1C_F_ST_ERROR|H1C_F_ERR_PENDING|H1C_F_ST_SHUTDOWN))) {
+ h1c->flags = (h1c->flags & ~H1C_F_ST_IDLE) | H1C_F_ST_ERROR;
+ TRACE_DEVEL("timeout error detected", H1_EV_H1C_WAKE|H1_EV_H1C_ERR, h1c->conn, h1c->h1s);
+ if (h1_handle_req_tout(h1c))
+ h1_send(h1c);
+ if (b_data(&h1c->obuf) || (h1c->flags & H1C_F_ERR_PENDING)) {
+ h1_refresh_timeout(h1c);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ return t;
+ }
+ }
+
+ if (h1c->flags & H1C_F_ST_ATTACHED) {
+ /* Don't release the H1 connection right now, we must destroy the
+ * attached SC first. Here, the H1C must not be READY */
+ se_fl_set(h1c->h1s->sd, SE_FL_EOS | SE_FL_ERROR);
+ h1_alert(h1c->h1s);
+ h1_refresh_timeout(h1c);
+ HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conns[tid].idle_conns_lock);
+ TRACE_DEVEL("waiting to release the SC before releasing the connection", H1_EV_H1C_WAKE);
+ return t;
+ }
+
+ /* We're about to destroy the connection, so make sure nobody attempts
+ * to steal it from us.
+ */
+ if (h1c->conn->flags & CO_FL_LIST_MASK)
+ conn_delete_from_tree(&h1c->conn->hash_node->node);
+
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+ do_leave:
+ task_destroy(t);
+
+ if (!h1c) {
+ /* resources were already deleted */
+ TRACE_DEVEL("leaving (not more h1c)", H1_EV_H1C_WAKE);
+ return NULL;
+ }
+
+ h1c->task = NULL;
+ h1_release(h1c);
+ TRACE_LEAVE(H1_EV_H1C_WAKE);
+ return NULL;
+}
+
+/*******************************************/
+/* functions below are used by the streams */
+/*******************************************/
+
+/*
+ * Attach a new stream to a connection
+ * (Used for outgoing connections)
+ */
+static int h1_attach(struct connection *conn, struct sedesc *sd, struct session *sess)
+{
+ struct h1c *h1c = conn->ctx;
+ struct h1s *h1s;
+
+ /* this connection is no more idle (if it was at all) */
+ h1c->flags &= ~H1C_F_ST_SILENT_SHUT;
+
+ TRACE_ENTER(H1_EV_STRM_NEW, conn);
+ if (h1c->flags & H1C_F_ST_ERROR) {
+ TRACE_ERROR("h1c on error", H1_EV_STRM_NEW|H1_EV_STRM_END|H1_EV_STRM_ERR, conn);
+ goto err;
+ }
+
+ h1s = h1c_bck_stream_new(h1c, sd->sc, sess);
+ if (h1s == NULL) {
+ TRACE_ERROR("h1s creation failure", H1_EV_STRM_NEW|H1_EV_STRM_END|H1_EV_STRM_ERR, conn);
+ goto err;
+ }
+
+ /* the connection is not idle anymore, let's mark this */
+ HA_ATOMIC_AND(&h1c->wait_event.tasklet->state, ~TASK_F_USR1);
+ xprt_set_used(conn, conn->xprt, conn->xprt_ctx);
+
+ TRACE_LEAVE(H1_EV_STRM_NEW, conn, h1s);
+ return 0;
+ err:
+ TRACE_DEVEL("leaving on error", H1_EV_STRM_NEW|H1_EV_STRM_END|H1_EV_STRM_ERR, conn);
+ return -1;
+}
+
+/* Retrieves a valid stream connector from this connection, or returns NULL.
+ * For this mux, it's easy as we can only store a single stream connector.
+ */
+static struct stconn *h1_get_first_sc(const struct connection *conn)
+{
+ struct h1c *h1c = conn->ctx;
+ struct h1s *h1s = h1c->h1s;
+
+ if (h1s)
+ return h1s_sc(h1s);
+
+ return NULL;
+}
+
+static void h1_destroy(void *ctx)
+{
+ struct h1c *h1c = ctx;
+
+ TRACE_POINT(H1_EV_H1C_END, h1c->conn);
+ if (!h1c->h1s || h1c->conn->ctx != h1c)
+ h1_release(h1c);
+}
+
+/*
+ * Detach the stream from the connection and possibly release the connection.
+ */
+static void h1_detach(struct sedesc *sd)
+{
+ struct h1s *h1s = sd->se;
+ struct h1c *h1c;
+ struct session *sess;
+ int is_not_first;
+
+ TRACE_ENTER(H1_EV_STRM_END, h1s ? h1s->h1c->conn : NULL, h1s);
+
+ if (!h1s) {
+ TRACE_LEAVE(H1_EV_STRM_END);
+ return;
+ }
+
+ sess = h1s->sess;
+ h1c = h1s->h1c;
+
+ sess->accept_date = date;
+ sess->tv_accept = now;
+ sess->t_handshake = 0;
+ sess->t_idle = -1;
+
+ is_not_first = h1s->flags & H1S_F_NOT_FIRST;
+ h1s_destroy(h1s);
+
+ if ((h1c->flags & (H1C_F_IS_BACK|H1C_F_ST_IDLE)) == (H1C_F_IS_BACK|H1C_F_ST_IDLE)) {
+ /* this connection may be killed at any moment, we want it to
+ * die "cleanly" (i.e. only an RST).
+ */
+ h1c->flags |= H1C_F_ST_SILENT_SHUT;
+
+ /* If there are any excess server data in the input buffer,
+ * release it and close the connection ASAP (some data may
+ * remain in the output buffer). This happens if a server sends
+ * invalid responses. So in such case, we don't want to reuse
+ * the connection
+ */
+ if (b_data(&h1c->ibuf)) {
+ h1_release_buf(h1c, &h1c->ibuf);
+ h1c->flags = (h1c->flags & ~H1C_F_ST_IDLE) | H1C_F_ST_SHUTDOWN;
+ TRACE_DEVEL("remaining data on detach, kill connection", H1_EV_STRM_END|H1_EV_H1C_END);
+ goto release;
+ }
+
+ if (h1c->conn->flags & CO_FL_PRIVATE) {
+ /* Add the connection in the session server list, if not already done */
+ if (!session_add_conn(sess, h1c->conn, h1c->conn->target)) {
+ h1c->conn->owner = NULL;
+ h1c->conn->mux->destroy(h1c);
+ goto end;
+ }
+ /* Always idle at this step */
+ if (session_check_idle_conn(sess, h1c->conn)) {
+ /* The connection got destroyed, let's leave */
+ TRACE_DEVEL("outgoing connection killed", H1_EV_STRM_END|H1_EV_H1C_END);
+ goto end;
+ }
+ }
+ else {
+ if (h1c->conn->owner == sess)
+ h1c->conn->owner = NULL;
+
+ /* mark that the tasklet may lose its context to another thread and
+ * that the handler needs to check it under the idle conns lock.
+ */
+ HA_ATOMIC_OR(&h1c->wait_event.tasklet->state, TASK_F_USR1);
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ xprt_set_idle(h1c->conn, h1c->conn->xprt, h1c->conn->xprt_ctx);
+
+ if (!srv_add_to_idle_list(objt_server(h1c->conn->target), h1c->conn, is_not_first)) {
+ /* The server doesn't want it, let's kill the connection right away */
+ h1c->conn->mux->destroy(h1c);
+ TRACE_DEVEL("outgoing connection killed", H1_EV_STRM_END|H1_EV_H1C_END);
+ goto end;
+ }
+ /* At this point, the connection has been added to the
+ * server idle list, so another thread may already have
+ * hijacked it, so we can't do anything with it.
+ */
+ return;
+ }
+ }
+
+ release:
+ /* We don't want to close right now unless the connection is in error or shut down for writes */
+ if ((h1c->flags & H1C_F_ST_ERROR) ||
+ (h1c->conn->flags & (CO_FL_ERROR|CO_FL_SOCK_WR_SH)) ||
+ ((h1c->flags & H1C_F_ST_SHUTDOWN) && !b_data(&h1c->obuf)) ||
+ !h1c->conn->owner) {
+ TRACE_DEVEL("killing dead connection", H1_EV_STRM_END, h1c->conn);
+ h1_release(h1c);
+ }
+ else {
+ if (h1c->flags & H1C_F_ST_IDLE) {
+ /* If we have a new request, process it immediately or
+ * subscribe for reads waiting for new data
+ */
+ if (unlikely(b_data(&h1c->ibuf))) {
+ if (h1_process(h1c) == -1)
+ goto end;
+ }
+ else
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+ h1_set_idle_expiration(h1c);
+ h1_refresh_timeout(h1c);
+ }
+ end:
+ TRACE_LEAVE(H1_EV_STRM_END);
+}
+
+
+static void h1_shutr(struct stconn *sc, enum co_shr_mode mode)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c;
+
+ if (!h1s)
+ return;
+ h1c = h1s->h1c;
+
+ TRACE_ENTER(H1_EV_STRM_SHUT, h1c->conn, h1s, 0, (size_t[]){mode});
+
+ if (se_fl_test(h1s->sd, SE_FL_SHR))
+ goto end;
+ if (se_fl_test(h1s->sd, SE_FL_KILL_CONN)) {
+ TRACE_STATE("stream wants to kill the connection", H1_EV_STRM_SHUT, h1c->conn, h1s);
+ goto do_shutr;
+ }
+ if (h1c->conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH)) {
+ TRACE_STATE("shutdown on connection (error|rd_sh|wr_sh)", H1_EV_STRM_SHUT, h1c->conn, h1s);
+ goto do_shutr;
+ }
+
+ if (!(h1c->flags & (H1C_F_ST_READY|H1C_F_ST_ERROR))) {
+ /* Here attached is implicit because there is SC */
+ TRACE_STATE("keep connection alive (ALIVE but not READY nor ERROR)", H1_EV_STRM_SHUT, h1c->conn, h1s);
+ goto end;
+ }
+ if (h1s->flags & H1S_F_WANT_KAL) {
+ TRACE_STATE("keep connection alive (want_kal)", H1_EV_STRM_SHUT, h1c->conn, h1s);
+ goto end;
+ }
+
+ do_shutr:
+ /* NOTE: Be sure to handle abort (cf. h2_shutr) */
+ if (se_fl_test(h1s->sd, SE_FL_SHR))
+ goto end;
+
+ if (conn_xprt_ready(h1c->conn) && h1c->conn->xprt->shutr)
+ h1c->conn->xprt->shutr(h1c->conn, h1c->conn->xprt_ctx, (mode == CO_SHR_DRAIN));
+ end:
+ TRACE_LEAVE(H1_EV_STRM_SHUT, h1c->conn, h1s);
+}
+
+static void h1_shutw(struct stconn *sc, enum co_shw_mode mode)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c;
+
+ if (!h1s)
+ return;
+ h1c = h1s->h1c;
+
+ TRACE_ENTER(H1_EV_STRM_SHUT, h1c->conn, h1s, 0, (size_t[]){mode});
+
+ if (se_fl_test(h1s->sd, SE_FL_SHW))
+ goto end;
+ if (se_fl_test(h1s->sd, SE_FL_KILL_CONN)) {
+ TRACE_STATE("stream wants to kill the connection", H1_EV_STRM_SHUT, h1c->conn, h1s);
+ goto do_shutw;
+ }
+ if (h1c->conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH)) {
+ TRACE_STATE("shutdown on connection (error|rd_sh|wr_sh)", H1_EV_STRM_SHUT, h1c->conn, h1s);
+ goto do_shutw;
+ }
+
+ if (!(h1c->flags & (H1C_F_ST_READY|H1C_F_ST_ERROR))) {
+ /* Here attached is implicit because there is SC */
+ TRACE_STATE("keep connection alive (ALIVE but not READY nor ERROR)", H1_EV_STRM_SHUT, h1c->conn, h1s);
+ goto end;
+ }
+ if (((h1s->flags & H1S_F_WANT_KAL) && h1s->req.state == H1_MSG_DONE && h1s->res.state == H1_MSG_DONE)) {
+ TRACE_STATE("keep connection alive (want_kal)", H1_EV_STRM_SHUT, h1c->conn, h1s);
+ goto end;
+ }
+
+ do_shutw:
+ h1c->flags |= H1C_F_ST_SHUTDOWN;
+ if (mode != CO_SHW_NORMAL)
+ h1c->flags |= H1C_F_ST_SILENT_SHUT;
+
+ if (!b_data(&h1c->obuf))
+ h1_shutw_conn(h1c->conn);
+ end:
+ TRACE_LEAVE(H1_EV_STRM_SHUT, h1c->conn, h1s);
+}
+
+static void h1_shutw_conn(struct connection *conn)
+{
+ struct h1c *h1c = conn->ctx;
+
+ if (conn->flags & CO_FL_SOCK_WR_SH)
+ return;
+
+ TRACE_ENTER(H1_EV_H1C_END, conn);
+ conn_xprt_shutw(conn);
+ conn_sock_shutw(conn, (h1c && !(h1c->flags & H1C_F_ST_SILENT_SHUT)));
+
+ if (h1c->wait_event.tasklet && !h1c->wait_event.events)
+ tasklet_wakeup(h1c->wait_event.tasklet);
+
+ TRACE_LEAVE(H1_EV_H1C_END, conn);
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int h1_unsubscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+
+ if (!h1s)
+ return 0;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(h1s->subs && h1s->subs != es);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ h1s->subs = NULL;
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("unsubscribe(recv)", H1_EV_STRM_RECV, h1s->h1c->conn, h1s);
+
+ if (event_type & SUB_RETRY_SEND)
+ TRACE_DEVEL("unsubscribe(send)", H1_EV_STRM_SEND, h1s->h1c->conn, h1s);
+
+ return 0;
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0, unless
+ * the stream connector <sc> was already detached, in which case it will return
+ * -1.
+ */
+static int h1_subscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c;
+
+ if (!h1s)
+ return -1;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(h1s->subs && h1s->subs != es);
+
+ es->events |= event_type;
+ h1s->subs = es;
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("subscribe(recv)", H1_EV_STRM_RECV, h1s->h1c->conn, h1s);
+
+
+ if (event_type & SUB_RETRY_SEND) {
+ TRACE_DEVEL("subscribe(send)", H1_EV_STRM_SEND, h1s->h1c->conn, h1s);
+ /*
+ * If the stconn attempts to subscribe, and the
+ * mux isn't subscribed to the connection, then it
+ * probably means the connection wasn't established
+ * yet, so we have to subscribe.
+ */
+ h1c = h1s->h1c;
+ if (!(h1c->wait_event.events & SUB_RETRY_SEND))
+ h1c->conn->xprt->subscribe(h1c->conn,
+ h1c->conn->xprt_ctx,
+ SUB_RETRY_SEND,
+ &h1c->wait_event);
+ }
+ return 0;
+}
+
+/* Called from the upper layer, to receive data.
+ *
+ * The caller is responsible for defragmenting <buf> if necessary. But <flags>
+ * must be tested to know the calling context. If CO_RFL_BUF_FLUSH is set, it
+ * means the caller wants to flush input data (from the mux buffer and the
+ * channel buffer) to be able to use kernel splicing or any kind of mux-to-mux
+ * xfer. If CO_RFL_KEEP_RECV is set, the mux must always subscribe for read
+ * events before giving back. CO_RFL_BUF_WET is set if <buf> is congested with
+ * data scheduled for leaving soon. CO_RFL_BUF_NOT_STUCK is set to instruct the
+ * mux it may optimize the data copy to <buf> if necessary. Otherwise, it should
+ * copy as much data as possible.
+ */
+static size_t h1_rcv_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c = h1s->h1c;
+ struct h1m *h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->req : &h1s->res);
+ size_t ret = 0;
+
+ TRACE_ENTER(H1_EV_STRM_RECV, h1c->conn, h1s, 0, (size_t[]){count});
+
+ /* Do nothing for now if not READY */
+ if (!(h1c->flags & H1C_F_ST_READY)) {
+ TRACE_DEVEL("h1c not ready yet", H1_EV_H1C_RECV|H1_EV_H1C_BLK, h1c->conn);
+ goto end;
+ }
+
+ if (!(h1c->flags & H1C_F_IN_ALLOC))
+ ret = h1_process_demux(h1c, buf, count);
+ else
+ TRACE_DEVEL("h1c ibuf not allocated", H1_EV_H1C_RECV|H1_EV_H1C_BLK, h1c->conn);
+
+ if ((flags & CO_RFL_BUF_FLUSH) && se_fl_test(h1s->sd, SE_FL_MAY_SPLICE)) {
+ h1c->flags |= H1C_F_WANT_SPLICE;
+ TRACE_STATE("Block xprt rcv_buf to flush stream's buffer (want_splice)", H1_EV_STRM_RECV, h1c->conn, h1s);
+ }
+ else {
+ if (((flags & CO_RFL_KEEP_RECV) || (h1m->state != H1_MSG_DONE)) && !(h1c->wait_event.events & SUB_RETRY_RECV))
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+
+ end:
+ TRACE_LEAVE(H1_EV_STRM_RECV, h1c->conn, h1s, 0, (size_t[]){ret});
+ return ret;
+}
+
+
+/* Called from the upper layer, to send data */
+static size_t h1_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c;
+ size_t total = 0;
+
+ if (!h1s)
+ return 0;
+ h1c = h1s->h1c;
+
+ TRACE_ENTER(H1_EV_STRM_SEND, h1c->conn, h1s, 0, (size_t[]){count});
+
+ /* If we're not connected yet, or we're waiting for a handshake, stop
+ * now, as we don't want to remove everything from the channel buffer
+ * before we're sure we can send it.
+ */
+ if (h1c->conn->flags & CO_FL_WAIT_XPRT) {
+ TRACE_LEAVE(H1_EV_STRM_SEND, h1c->conn, h1s);
+ return 0;
+ }
+
+ if (h1c->flags & H1C_F_ST_ERROR) {
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("H1C on error, leaving in error", H1_EV_STRM_SEND|H1_EV_H1C_ERR|H1_EV_H1S_ERR|H1_EV_STRM_ERR, h1c->conn, h1s);
+ return 0;
+ }
+
+ /* Inherit some flags from the upper layer */
+ h1c->flags &= ~(H1C_F_CO_MSG_MORE|H1C_F_CO_STREAMER);
+ if (flags & CO_SFL_MSG_MORE)
+ h1c->flags |= H1C_F_CO_MSG_MORE;
+ if (flags & CO_SFL_STREAMER)
+ h1c->flags |= H1C_F_CO_STREAMER;
+
+ while (count) {
+ size_t ret = 0;
+
+ if (!(h1c->flags & (H1C_F_OUT_FULL|H1C_F_OUT_ALLOC)))
+ ret = h1_process_mux(h1c, buf, count);
+ else
+ TRACE_DEVEL("h1c obuf not allocated", H1_EV_STRM_SEND|H1_EV_H1S_BLK, h1c->conn, h1s);
+
+ if (!ret)
+ break;
+
+ if ((count - ret) > 0)
+ h1c->flags |= H1C_F_CO_MSG_MORE;
+
+ total += ret;
+ count -= ret;
+
+ if ((h1c->wait_event.events & SUB_RETRY_SEND) || !h1_send(h1c))
+ break;
+
+ if ((h1c->conn->flags & (CO_FL_ERROR|CO_FL_SOCK_WR_SH)))
+ break;
+ }
+
+ if ((h1c->flags & H1C_F_ST_ERROR) || ((h1c->conn->flags & CO_FL_ERROR) &&
+ (se_fl_test(h1s->sd, SE_FL_EOI | SE_FL_EOS) || !b_data(&h1c->ibuf)))) {
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("reporting error to the app-layer stream", H1_EV_STRM_SEND|H1_EV_H1S_ERR|H1_EV_STRM_ERR, h1c->conn, h1s);
+ }
+
+ h1_refresh_timeout(h1c);
+ TRACE_LEAVE(H1_EV_STRM_SEND, h1c->conn, h1s, 0, (size_t[]){total});
+ return total;
+}
+
+#if defined(USE_LINUX_SPLICE)
+/* Send and get, using splicing */
+static int h1_rcv_pipe(struct stconn *sc, struct pipe *pipe, unsigned int count)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c = h1s->h1c;
+ struct h1m *h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->req : &h1s->res);
+ int ret = 0;
+
+ TRACE_ENTER(H1_EV_STRM_RECV, h1c->conn, h1s, 0, (size_t[]){count});
+
+ if ((h1m->flags & H1_MF_CHNK) || (h1m->state != H1_MSG_DATA && h1m->state != H1_MSG_TUNNEL)) {
+ h1c->flags &= ~H1C_F_WANT_SPLICE;
+ TRACE_STATE("Allow xprt rcv_buf on !(msg_data|msg_tunnel)", H1_EV_STRM_RECV, h1c->conn, h1s);
+ goto end;
+ }
+
+ h1c->flags |= H1C_F_WANT_SPLICE;
+ if (h1s_data_pending(h1s)) {
+ TRACE_STATE("flush input buffer before splicing", H1_EV_STRM_RECV, h1c->conn, h1s);
+ goto end;
+ }
+
+ if (!h1_recv_allowed(h1c)) {
+ TRACE_DEVEL("leaving on !recv_allowed", H1_EV_STRM_RECV, h1c->conn, h1s);
+ goto end;
+ }
+
+ if (h1m->state == H1_MSG_DATA && (h1m->flags & H1_MF_CLEN) && count > h1m->curr_len)
+ count = h1m->curr_len;
+ ret = h1c->conn->xprt->rcv_pipe(h1c->conn, h1c->conn->xprt_ctx, pipe, count);
+ if (ret >= 0) {
+ if (h1m->state == H1_MSG_DATA && (h1m->flags & H1_MF_CLEN)) {
+ if (ret > h1m->curr_len) {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ h1c->flags |= H1C_F_ST_ERROR;
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("too much payload, more than announced",
+ H1_EV_RX_DATA|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto end;
+ }
+ h1m->curr_len -= ret;
+ if (!h1m->curr_len) {
+ h1m->state = H1_MSG_DONE;
+ h1c->flags &= ~H1C_F_WANT_SPLICE;
+ TRACE_STATE("payload fully received", H1_EV_STRM_RECV, h1c->conn, h1s);
+ }
+ }
+ HA_ATOMIC_ADD(&h1c->px_counters->bytes_in, ret);
+ HA_ATOMIC_ADD(&h1c->px_counters->spliced_bytes_in, ret);
+ }
+
+ end:
+ if (conn_xprt_read0_pending(h1c->conn)) {
+ h1s->flags |= H1S_F_REOS;
+ h1c->flags &= ~H1C_F_WANT_SPLICE;
+ TRACE_STATE("Allow xprt rcv_buf on read0", H1_EV_STRM_RECV, h1c->conn, h1s);
+ }
+
+ if (!(h1c->flags & H1C_F_WANT_SPLICE)) {
+ TRACE_STATE("notify the mux can't use splicing anymore", H1_EV_STRM_RECV, h1c->conn, h1s);
+ se_fl_clr(h1s->sd, SE_FL_MAY_SPLICE);
+ if (!(h1c->wait_event.events & SUB_RETRY_RECV)) {
+ TRACE_STATE("restart receiving data, subscribing", H1_EV_STRM_RECV, h1c->conn, h1s);
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+ }
+
+ TRACE_LEAVE(H1_EV_STRM_RECV, h1c->conn, h1s, 0, (size_t[]){ret});
+ return ret;
+}
+
+static int h1_snd_pipe(struct stconn *sc, struct pipe *pipe)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c = h1s->h1c;
+ struct h1m *h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->res : &h1s->req);
+ int ret = 0;
+
+ TRACE_ENTER(H1_EV_STRM_SEND, h1c->conn, h1s, 0, (size_t[]){pipe->data});
+
+ if (b_data(&h1c->obuf)) {
+ if (!(h1c->wait_event.events & SUB_RETRY_SEND)) {
+ TRACE_STATE("more data to send, subscribing", H1_EV_STRM_SEND, h1c->conn, h1s);
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_SEND, &h1c->wait_event);
+ }
+ goto end;
+ }
+
+ ret = h1c->conn->xprt->snd_pipe(h1c->conn, h1c->conn->xprt_ctx, pipe);
+ if (h1m->state == H1_MSG_DATA && (h1m->flags & H1_MF_CLEN)) {
+ if (ret > h1m->curr_len) {
+ h1s->flags |= H1S_F_PROCESSING_ERROR;
+ h1c->flags |= H1C_F_ST_ERROR;
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("too much payload, more than announced",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto end;
+ }
+ h1m->curr_len -= ret;
+ if (!h1m->curr_len) {
+ h1m->state = H1_MSG_DONE;
+ TRACE_STATE("payload fully xferred", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s);
+ }
+ }
+ HA_ATOMIC_ADD(&h1c->px_counters->bytes_out, ret);
+ HA_ATOMIC_ADD(&h1c->px_counters->spliced_bytes_out, ret);
+
+ end:
+ TRACE_LEAVE(H1_EV_STRM_SEND, h1c->conn, h1s, 0, (size_t[]){ret});
+ return ret;
+}
+#endif
+
+static int h1_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *output)
+{
+ const struct h1c *h1c = conn->ctx;
+ int ret = 0;
+
+ switch (mux_ctl) {
+ case MUX_STATUS:
+ if (!(conn->flags & CO_FL_WAIT_XPRT))
+ ret |= MUX_STATUS_READY;
+ return ret;
+ case MUX_EXIT_STATUS:
+ if (output)
+ *((int *)output) = h1c->errcode;
+ ret = (h1c->errcode == 408 ? MUX_ES_TOUT_ERR :
+ (h1c->errcode == 501 ? MUX_ES_NOTIMPL_ERR :
+ (h1c->errcode == 500 ? MUX_ES_INTERNAL_ERR :
+ ((h1c->errcode >= 400 && h1c->errcode <= 499) ? MUX_ES_INVALID_ERR :
+ MUX_ES_SUCCESS))));
+ return ret;
+ default:
+ return -1;
+ }
+}
+
+/* for debugging with CLI's "show fd" command */
+static int h1_show_fd(struct buffer *msg, struct connection *conn)
+{
+ struct h1c *h1c = conn->ctx;
+ struct h1s *h1s = h1c->h1s;
+ int ret = 0;
+
+ chunk_appendf(msg, " h1c.flg=0x%x .sub=%d .ibuf=%u@%p+%u/%u .obuf=%u@%p+%u/%u",
+ h1c->flags, h1c->wait_event.events,
+ (unsigned int)b_data(&h1c->ibuf), b_orig(&h1c->ibuf),
+ (unsigned int)b_head_ofs(&h1c->ibuf), (unsigned int)b_size(&h1c->ibuf),
+ (unsigned int)b_data(&h1c->obuf), b_orig(&h1c->obuf),
+ (unsigned int)b_head_ofs(&h1c->obuf), (unsigned int)b_size(&h1c->obuf));
+
+ chunk_appendf(msg, " .task=%p", h1c->task);
+ if (h1c->task) {
+ chunk_appendf(msg, " .exp=%s",
+ h1c->task->expire ? tick_is_expired(h1c->task->expire, now_ms) ? "<PAST>" :
+ human_time(TICKS_TO_MS(h1c->task->expire - now_ms), TICKS_TO_MS(1000)) : "<NEVER>");
+ }
+
+ if (h1s) {
+ char *method;
+
+ if (h1s->meth < HTTP_METH_OTHER)
+ method = http_known_methods[h1s->meth].ptr;
+ else
+ method = "UNKNOWN";
+ chunk_appendf(msg, " h1s=%p h1s.flg=0x%x .sd.flg=0x%x .req.state=%s .res.state=%s"
+ " .meth=%s status=%d",
+ h1s, h1s->flags, se_fl_get(h1s->sd),
+ h1m_state_str(h1s->req.state),
+ h1m_state_str(h1s->res.state), method, h1s->status);
+
+ chunk_appendf(msg, " .sd.flg=0x%08x", se_fl_get(h1s->sd));
+ if (!se_fl_test(h1s->sd, SE_FL_ORPHAN))
+ chunk_appendf(msg, " .sc.flg=0x%08x .sc.app=%p",
+ h1s_sc(h1s)->flags, h1s_sc(h1s)->app);
+
+ chunk_appendf(msg, " .subs=%p", h1s->subs);
+ if (h1s->subs) {
+ chunk_appendf(msg, "(ev=%d tl=%p", h1s->subs->events, h1s->subs->tasklet);
+ chunk_appendf(msg, " tl.calls=%d tl.ctx=%p tl.fct=",
+ h1s->subs->tasklet->calls,
+ h1s->subs->tasklet->context);
+ if (h1s->subs->tasklet->calls >= 1000000)
+ ret = 1;
+ resolve_sym_name(msg, NULL, h1s->subs->tasklet->process);
+ chunk_appendf(msg, ")");
+ }
+ }
+ return ret;
+}
+
+
+/* Add an entry in the headers map. Returns -1 on error and 0 on success. */
+static int add_hdr_case_adjust(const char *from, const char *to, char **err)
+{
+ struct h1_hdr_entry *entry;
+
+ /* Be sure there is a non-empty <to> */
+ if (!strlen(to)) {
+ memprintf(err, "expect <to>");
+ return -1;
+ }
+
+ /* Be sure only the case differs between <from> and <to> */
+ if (strcasecmp(from, to) != 0) {
+ memprintf(err, "<from> and <to> must not differ execpt the case");
+ return -1;
+ }
+
+ /* Be sure <from> does not already existsin the tree */
+ if (ebis_lookup(&hdrs_map.map, from)) {
+ memprintf(err, "duplicate entry '%s'", from);
+ return -1;
+ }
+
+ /* Create the entry and insert it in the tree */
+ entry = malloc(sizeof(*entry));
+ if (!entry) {
+ memprintf(err, "out of memory");
+ return -1;
+ }
+
+ entry->node.key = strdup(from);
+ entry->name = ist(strdup(to));
+ if (!entry->node.key || !isttest(entry->name)) {
+ free(entry->node.key);
+ istfree(&entry->name);
+ free(entry);
+ memprintf(err, "out of memory");
+ return -1;
+ }
+ ebis_insert(&hdrs_map.map, &entry->node);
+ return 0;
+}
+
+/* Migrate the the connection to the current thread.
+ * Return 0 if successful, non-zero otherwise.
+ * Expected to be called with the old thread lock held.
+ */
+static int h1_takeover(struct connection *conn, int orig_tid)
+{
+ struct h1c *h1c = conn->ctx;
+ struct task *task;
+
+ if (fd_takeover(conn->handle.fd, conn) != 0)
+ return -1;
+
+ if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid) != 0) {
+ /* We failed to takeover the xprt, even if the connection may
+ * still be valid, flag it as error'd, as we have already
+ * taken over the fd, and wake the tasklet, so that it will
+ * destroy it.
+ */
+ conn->flags |= CO_FL_ERROR;
+ tasklet_wakeup_on(h1c->wait_event.tasklet, orig_tid);
+ return -1;
+ }
+
+ if (h1c->wait_event.events)
+ h1c->conn->xprt->unsubscribe(h1c->conn, h1c->conn->xprt_ctx,
+ h1c->wait_event.events, &h1c->wait_event);
+ /* To let the tasklet know it should free itself, and do nothing else,
+ * set its context to NULL.
+ */
+ h1c->wait_event.tasklet->context = NULL;
+ tasklet_wakeup_on(h1c->wait_event.tasklet, orig_tid);
+
+ task = h1c->task;
+ if (task) {
+ task->context = NULL;
+ h1c->task = NULL;
+ __ha_barrier_store();
+ task_kill(task);
+
+ h1c->task = task_new_here();
+ if (!h1c->task) {
+ h1_release(h1c);
+ return -1;
+ }
+ h1c->task->process = h1_timeout_task;
+ h1c->task->context = h1c;
+ }
+ h1c->wait_event.tasklet = tasklet_new();
+ if (!h1c->wait_event.tasklet) {
+ h1_release(h1c);
+ return -1;
+ }
+ h1c->wait_event.tasklet->process = h1_io_cb;
+ h1c->wait_event.tasklet->context = h1c;
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx,
+ SUB_RETRY_RECV, &h1c->wait_event);
+
+ return 0;
+}
+
+
+static void h1_hdeaders_case_adjust_deinit()
+{
+ struct ebpt_node *node, *next;
+ struct h1_hdr_entry *entry;
+
+ node = ebpt_first(&hdrs_map.map);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ entry = container_of(node, struct h1_hdr_entry, node);
+ free(entry->node.key);
+ istfree(&entry->name);
+ free(entry);
+ node = next;
+ }
+ free(hdrs_map.name);
+}
+
+static int cfg_h1_headers_case_adjust_postparser()
+{
+ FILE *file = NULL;
+ char *c, *key_beg, *key_end, *value_beg, *value_end;
+ char *err;
+ int rc, line = 0, err_code = 0;
+
+ if (!hdrs_map.name)
+ goto end;
+
+ file = fopen(hdrs_map.name, "r");
+ if (!file) {
+ ha_alert("h1-headers-case-adjust-file '%s': failed to open file.\n",
+ hdrs_map.name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* now parse all lines. The file may contain only two header name per
+ * line, separated by spaces. All heading and trailing spaces will be
+ * ignored. Lines starting with a # are ignored.
+ */
+ while (fgets(trash.area, trash.size, file) != NULL) {
+ line++;
+ c = trash.area;
+
+ /* strip leading spaces and tabs */
+ while (*c == ' ' || *c == '\t')
+ c++;
+
+ /* ignore emptu lines, or lines beginning with a dash */
+ if (*c == '#' || *c == '\0' || *c == '\r' || *c == '\n')
+ continue;
+
+ /* look for the end of the key */
+ key_beg = c;
+ while (*c != '\0' && *c != ' ' && *c != '\t' && *c != '\n' && *c != '\r')
+ c++;
+ key_end = c;
+
+ /* strip middle spaces and tabs */
+ while (*c == ' ' || *c == '\t')
+ c++;
+
+ /* look for the end of the value, it is the end of the line */
+ value_beg = c;
+ while (*c && *c != '\n' && *c != '\r')
+ c++;
+ value_end = c;
+
+ /* trim possibly trailing spaces and tabs */
+ while (value_end > value_beg && (value_end[-1] == ' ' || value_end[-1] == '\t'))
+ value_end--;
+
+ /* set final \0 and check entries */
+ *key_end = '\0';
+ *value_end = '\0';
+
+ err = NULL;
+ rc = add_hdr_case_adjust(key_beg, value_beg, &err);
+ if (rc < 0) {
+ ha_alert("h1-headers-case-adjust-file '%s' : %s at line %d.\n",
+ hdrs_map.name, err, line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(err);
+ goto end;
+ }
+ if (rc > 0) {
+ ha_warning("h1-headers-case-adjust-file '%s' : %s at line %d.\n",
+ hdrs_map.name, err, line);
+ err_code |= ERR_WARN;
+ free(err);
+ }
+ }
+
+ end:
+ if (file)
+ fclose(file);
+ hap_register_post_deinit(h1_hdeaders_case_adjust_deinit);
+ return err_code;
+}
+
+/* config parser for global "h1-accept-payload_=-with-any-method" */
+static int cfg_parse_h1_accept_payload_with_any_method(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(0, args, err, NULL))
+ return -1;
+ accept_payload_with_any_method = 1;
+ return 0;
+}
+
+
+/* config parser for global "h1-header-case-adjust" */
+static int cfg_parse_h1_header_case_adjust(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(2, args, err, NULL))
+ return -1;
+ if (!*(args[1]) || !*(args[2])) {
+ memprintf(err, "'%s' expects <from> and <to> as argument.", args[0]);
+ return -1;
+ }
+ return add_hdr_case_adjust(args[1], args[2], err);
+}
+
+/* config parser for global "h1-headers-case-adjust-file" */
+static int cfg_parse_h1_headers_case_adjust_file(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+ if (!*(args[1])) {
+ memprintf(err, "'%s' expects <file> as argument.", args[0]);
+ return -1;
+ }
+ free(hdrs_map.name);
+ hdrs_map.name = strdup(args[1]);
+ return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {{ }, {
+ { CFG_GLOBAL, "h1-accept-payload-with-any-method", cfg_parse_h1_accept_payload_with_any_method },
+ { CFG_GLOBAL, "h1-case-adjust", cfg_parse_h1_header_case_adjust },
+ { CFG_GLOBAL, "h1-case-adjust-file", cfg_parse_h1_headers_case_adjust_file },
+ { 0, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+REGISTER_CONFIG_POSTPARSER("h1-headers-map", cfg_h1_headers_case_adjust_postparser);
+
+
+/****************************************/
+/* MUX initialization and instantiation */
+/****************************************/
+
+/* The mux operations */
+static const struct mux_ops mux_http_ops = {
+ .init = h1_init,
+ .wake = h1_wake,
+ .attach = h1_attach,
+ .get_first_sc = h1_get_first_sc,
+ .detach = h1_detach,
+ .destroy = h1_destroy,
+ .avail_streams = h1_avail_streams,
+ .used_streams = h1_used_streams,
+ .rcv_buf = h1_rcv_buf,
+ .snd_buf = h1_snd_buf,
+#if defined(USE_LINUX_SPLICE)
+ .rcv_pipe = h1_rcv_pipe,
+ .snd_pipe = h1_snd_pipe,
+#endif
+ .subscribe = h1_subscribe,
+ .unsubscribe = h1_unsubscribe,
+ .shutr = h1_shutr,
+ .shutw = h1_shutw,
+ .show_fd = h1_show_fd,
+ .ctl = h1_ctl,
+ .takeover = h1_takeover,
+ .flags = MX_FL_HTX,
+ .name = "H1",
+};
+
+static const struct mux_ops mux_h1_ops = {
+ .init = h1_init,
+ .wake = h1_wake,
+ .attach = h1_attach,
+ .get_first_sc = h1_get_first_sc,
+ .detach = h1_detach,
+ .destroy = h1_destroy,
+ .avail_streams = h1_avail_streams,
+ .used_streams = h1_used_streams,
+ .rcv_buf = h1_rcv_buf,
+ .snd_buf = h1_snd_buf,
+#if defined(USE_LINUX_SPLICE)
+ .rcv_pipe = h1_rcv_pipe,
+ .snd_pipe = h1_snd_pipe,
+#endif
+ .subscribe = h1_subscribe,
+ .unsubscribe = h1_unsubscribe,
+ .shutr = h1_shutr,
+ .shutw = h1_shutw,
+ .show_fd = h1_show_fd,
+ .ctl = h1_ctl,
+ .takeover = h1_takeover,
+ .flags = MX_FL_HTX|MX_FL_NO_UPG,
+ .name = "H1",
+};
+
+/* this mux registers default HTX proto but also h1 proto (to be referenced in the conf */
+static struct mux_proto_list mux_proto_h1 =
+ { .token = IST("h1"), .mode = PROTO_MODE_HTTP, .side = PROTO_SIDE_BOTH, .mux = &mux_h1_ops };
+static struct mux_proto_list mux_proto_http =
+ { .token = IST(""), .mode = PROTO_MODE_HTTP, .side = PROTO_SIDE_BOTH, .mux = &mux_http_ops };
+
+INITCALL1(STG_REGISTER, register_mux_proto, &mux_proto_h1);
+INITCALL1(STG_REGISTER, register_mux_proto, &mux_proto_http);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/mux_h2.c b/src/mux_h2.c
new file mode 100644
index 0000000..18a53f5
--- /dev/null
+++ b/src/mux_h2.c
@@ -0,0 +1,7149 @@
+/*
+ * HTTP/2 mux-demux for connections
+ *
+ * Copyright 2017 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/eb32tree.h>
+#include <import/ebmbtree.h>
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/h2.h>
+#include <haproxy/hpack-dec.h>
+#include <haproxy/hpack-enc.h>
+#include <haproxy/hpack-tbl.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/log.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/session-t.h>
+#include <haproxy/stats.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/trace.h>
+
+
+/* dummy streams returned for closed, error, refused, idle and states */
+static const struct h2s *h2_closed_stream;
+static const struct h2s *h2_error_stream;
+static const struct h2s *h2_refused_stream;
+static const struct h2s *h2_idle_stream;
+
+/* Connection flags (32 bit), in h2c->flags */
+#define H2_CF_NONE 0x00000000
+
+/* Flags indicating why writing to the mux is blocked. */
+#define H2_CF_MUX_MALLOC 0x00000001 // mux blocked on lack of connection's mux buffer
+#define H2_CF_MUX_MFULL 0x00000002 // mux blocked on connection's mux buffer full
+#define H2_CF_MUX_BLOCK_ANY 0x00000003 // aggregate of the mux flags above
+
+/* Flags indicating why writing to the demux is blocked.
+ * The first two ones directly affect the ability for the mux to receive data
+ * from the connection. The other ones affect the mux's ability to demux
+ * received data.
+ */
+#define H2_CF_DEM_DALLOC 0x00000004 // demux blocked on lack of connection's demux buffer
+#define H2_CF_DEM_DFULL 0x00000008 // demux blocked on connection's demux buffer full
+
+#define H2_CF_DEM_MBUSY 0x00000010 // demux blocked on connection's mux side busy
+#define H2_CF_DEM_MROOM 0x00000020 // demux blocked on lack of room in mux buffer
+#define H2_CF_DEM_SALLOC 0x00000040 // demux blocked on lack of stream's request buffer
+#define H2_CF_DEM_SFULL 0x00000080 // demux blocked on stream request buffer full
+#define H2_CF_DEM_TOOMANY 0x00000100 // demux blocked waiting for some stream connectors to leave
+#define H2_CF_DEM_BLOCK_ANY 0x000001F0 // aggregate of the demux flags above except DALLOC/DFULL
+ // (SHORT_READ is also excluded)
+
+#define H2_CF_DEM_SHORT_READ 0x00000200 // demux blocked on incomplete frame
+#define H2_CF_DEM_IN_PROGRESS 0x00000400 // demux in progress (dsi,dfl,dft are valid)
+
+/* other flags */
+#define H2_CF_GOAWAY_SENT 0x00001000 // a GOAWAY frame was successfully sent
+#define H2_CF_GOAWAY_FAILED 0x00002000 // a GOAWAY frame failed to be sent
+#define H2_CF_WAIT_FOR_HS 0x00004000 // We did check that at least a stream was waiting for handshake
+#define H2_CF_IS_BACK 0x00008000 // this is an outgoing connection
+#define H2_CF_WINDOW_OPENED 0x00010000 // demux increased window already advertised
+#define H2_CF_RCVD_SHUT 0x00020000 // a recv() attempt already failed on a shutdown
+#define H2_CF_END_REACHED 0x00040000 // pending data too short with RCVD_SHUT present
+
+#define H2_CF_RCVD_RFC8441 0x00100000 // settings from RFC8441 has been received indicating support for Extended CONNECT
+#define H2_CF_SHTS_UPDATED 0x00200000 // SETTINGS_HEADER_TABLE_SIZE updated
+#define H2_CF_DTSU_EMITTED 0x00400000 // HPACK Dynamic Table Size Update opcode emitted
+
+/* H2 connection state, in h2c->st0 */
+enum h2_cs {
+ H2_CS_PREFACE, // init done, waiting for connection preface
+ H2_CS_SETTINGS1, // preface OK, waiting for first settings frame
+ H2_CS_FRAME_H, // first settings frame ok, waiting for frame header
+ H2_CS_FRAME_P, // frame header OK, waiting for frame payload
+ H2_CS_FRAME_A, // frame payload OK, trying to send ACK frame
+ H2_CS_FRAME_E, // frame payload OK, trying to send RST frame
+ H2_CS_ERROR, // send GOAWAY(errcode) and close the connection ASAP
+ H2_CS_ERROR2, // GOAWAY(errcode) sent, close the connection ASAP
+ H2_CS_ENTRIES // must be last
+} __attribute__((packed));
+
+
+/* 32 buffers: one for the ring's root, rest for the mbuf itself */
+#define H2C_MBUF_CNT 32
+
+/* H2 connection descriptor */
+struct h2c {
+ struct connection *conn;
+
+ enum h2_cs st0; /* mux state */
+ enum h2_err errcode; /* H2 err code (H2_ERR_*) */
+
+ /* 16 bit hole here */
+ uint32_t flags; /* connection flags: H2_CF_* */
+ uint32_t streams_limit; /* maximum number of concurrent streams the peer supports */
+ int32_t max_id; /* highest ID known on this connection, <0 before preface */
+ uint32_t rcvd_c; /* newly received data to ACK for the connection */
+ uint32_t rcvd_s; /* newly received data to ACK for the current stream (dsi) or zero */
+
+ /* states for the demux direction */
+ struct hpack_dht *ddht; /* demux dynamic header table */
+ struct buffer dbuf; /* demux buffer */
+
+ int32_t dsi; /* demux stream ID (<0 = idle) */
+ int32_t dfl; /* demux frame length (if dsi >= 0) */
+ int8_t dft; /* demux frame type (if dsi >= 0) */
+ int8_t dff; /* demux frame flags (if dsi >= 0) */
+ uint8_t dpl; /* demux pad length (part of dfl), init to 0 */
+ /* 8 bit hole here */
+ int32_t last_sid; /* last processed stream ID for GOAWAY, <0 before preface */
+
+ /* states for the mux direction */
+ struct buffer mbuf[H2C_MBUF_CNT]; /* mux buffers (ring) */
+ int32_t msi; /* mux stream ID (<0 = idle) */
+ int32_t mfl; /* mux frame length (if dsi >= 0) */
+ int8_t mft; /* mux frame type (if dsi >= 0) */
+ int8_t mff; /* mux frame flags (if dsi >= 0) */
+ /* 16 bit hole here */
+ int32_t miw; /* mux initial window size for all new streams */
+ int32_t mws; /* mux window size. Can be negative. */
+ int32_t mfs; /* mux's max frame size */
+
+ int timeout; /* idle timeout duration in ticks */
+ int shut_timeout; /* idle timeout duration in ticks after GOAWAY was sent */
+ int idle_start; /* date of the last time the connection went idle */
+ /* 32-bit hole here */
+ unsigned int nb_streams; /* number of streams in the tree */
+ unsigned int nb_sc; /* number of attached stream connectors */
+ unsigned int nb_reserved; /* number of reserved streams */
+ unsigned int stream_cnt; /* total number of streams seen */
+ struct proxy *proxy; /* the proxy this connection was created for */
+ struct task *task; /* timeout management task */
+ struct h2_counters *px_counters; /* h2 counters attached to proxy */
+ struct eb_root streams_by_id; /* all active streams by their ID */
+ struct list send_list; /* list of blocked streams requesting to send */
+ struct list fctl_list; /* list of streams blocked by connection's fctl */
+ struct list blocked_list; /* list of streams blocked for other reasons (e.g. sfctl, dep) */
+ struct buffer_wait buf_wait; /* wait list for buffer allocations */
+ struct wait_event wait_event; /* To be used if we're waiting for I/Os */
+};
+
+/* H2 stream state, in h2s->st */
+enum h2_ss {
+ H2_SS_IDLE = 0, // idle
+ H2_SS_RLOC, // reserved(local)
+ H2_SS_RREM, // reserved(remote)
+ H2_SS_OPEN, // open
+ H2_SS_HREM, // half-closed(remote)
+ H2_SS_HLOC, // half-closed(local)
+ H2_SS_ERROR, // an error needs to be sent using RST_STREAM
+ H2_SS_CLOSED, // closed
+ H2_SS_ENTRIES // must be last
+} __attribute__((packed));
+
+#define H2_SS_MASK(state) (1UL << (state))
+#define H2_SS_IDLE_BIT (1UL << H2_SS_IDLE)
+#define H2_SS_RLOC_BIT (1UL << H2_SS_RLOC)
+#define H2_SS_RREM_BIT (1UL << H2_SS_RREM)
+#define H2_SS_OPEN_BIT (1UL << H2_SS_OPEN)
+#define H2_SS_HREM_BIT (1UL << H2_SS_HREM)
+#define H2_SS_HLOC_BIT (1UL << H2_SS_HLOC)
+#define H2_SS_ERROR_BIT (1UL << H2_SS_ERROR)
+#define H2_SS_CLOSED_BIT (1UL << H2_SS_CLOSED)
+
+/* HTTP/2 stream flags (32 bit), in h2s->flags */
+#define H2_SF_NONE 0x00000000
+#define H2_SF_ES_RCVD 0x00000001
+#define H2_SF_ES_SENT 0x00000002
+
+#define H2_SF_RST_RCVD 0x00000004 // received RST_STREAM
+#define H2_SF_RST_SENT 0x00000008 // sent RST_STREAM
+
+/* stream flags indicating the reason the stream is blocked */
+#define H2_SF_BLK_MBUSY 0x00000010 // blocked waiting for mux access (transient)
+#define H2_SF_BLK_MROOM 0x00000020 // blocked waiting for room in the mux (must be in send list)
+#define H2_SF_BLK_MFCTL 0x00000040 // blocked due to mux fctl (must be in fctl list)
+#define H2_SF_BLK_SFCTL 0x00000080 // blocked due to stream fctl (must be in blocked list)
+#define H2_SF_BLK_ANY 0x000000F0 // any of the reasons above
+
+/* stream flags indicating how data is supposed to be sent */
+#define H2_SF_DATA_CLEN 0x00000100 // data sent using content-length
+#define H2_SF_BODYLESS_RESP 0x00000200 /* Bodyless response message */
+#define H2_SF_BODY_TUNNEL 0x00000400 // Attempt to establish a Tunnelled stream (the result depends on the status code)
+
+
+#define H2_SF_NOTIFIED 0x00000800 // a paused stream was notified to try to send again
+#define H2_SF_HEADERS_SENT 0x00001000 // a HEADERS frame was sent for this stream
+#define H2_SF_OUTGOING_DATA 0x00002000 // set whenever we've seen outgoing data
+
+#define H2_SF_HEADERS_RCVD 0x00004000 // a HEADERS frame was received for this stream
+
+#define H2_SF_WANT_SHUTR 0x00008000 // a stream couldn't shutr() (mux full/busy)
+#define H2_SF_WANT_SHUTW 0x00010000 // a stream couldn't shutw() (mux full/busy)
+
+#define H2_SF_EXT_CONNECT_SENT 0x00040000 // rfc 8441 an Extended CONNECT has been sent
+#define H2_SF_EXT_CONNECT_RCVD 0x00080000 // rfc 8441 an Extended CONNECT has been received and parsed
+
+#define H2_SF_TUNNEL_ABRT 0x00100000 // A tunnel attempt was aborted
+
+/* H2 stream descriptor, describing the stream as it appears in the H2C, and as
+ * it is being processed in the internal HTTP representation (HTX).
+ */
+struct h2s {
+ struct sedesc *sd;
+ struct session *sess;
+ struct h2c *h2c;
+ struct eb32_node by_id; /* place in h2c's streams_by_id */
+ int32_t id; /* stream ID */
+ uint32_t flags; /* H2_SF_* */
+ int sws; /* stream window size, to be added to the mux's initial window size */
+ enum h2_err errcode; /* H2 err code (H2_ERR_*) */
+ enum h2_ss st;
+ uint16_t status; /* HTTP response status */
+ unsigned long long body_len; /* remaining body length according to content-length if H2_SF_DATA_CLEN */
+ struct buffer rxbuf; /* receive buffer, always valid (buf_empty or real buffer) */
+ struct wait_event *subs; /* recv wait_event the stream connector associated is waiting on (via h2_subscribe) */
+ struct list list; /* To be used when adding in h2c->send_list or h2c->fctl_lsit */
+ struct tasklet *shut_tl; /* deferred shutdown tasklet, to retry to send an RST after we failed to,
+ * in case there's no other subscription to do it */
+
+ char upgrade_protocol[16]; /* rfc 8441: requested protocol on Extended CONNECT */
+};
+
+/* descriptor for an h2 frame header */
+struct h2_fh {
+ uint32_t len; /* length, host order, 24 bits */
+ uint32_t sid; /* stream id, host order, 31 bits */
+ uint8_t ft; /* frame type */
+ uint8_t ff; /* frame flags */
+};
+
+/* trace source and events */
+static void h2_trace(enum trace_level level, uint64_t mask, \
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+/* The event representation is split like this :
+ * strm - application layer
+ * h2s - internal H2 stream
+ * h2c - internal H2 connection
+ * conn - external connection
+ *
+ */
+static const struct trace_event h2_trace_events[] = {
+#define H2_EV_H2C_NEW (1ULL << 0)
+ { .mask = H2_EV_H2C_NEW, .name = "h2c_new", .desc = "new H2 connection" },
+#define H2_EV_H2C_RECV (1ULL << 1)
+ { .mask = H2_EV_H2C_RECV, .name = "h2c_recv", .desc = "Rx on H2 connection" },
+#define H2_EV_H2C_SEND (1ULL << 2)
+ { .mask = H2_EV_H2C_SEND, .name = "h2c_send", .desc = "Tx on H2 connection" },
+#define H2_EV_H2C_FCTL (1ULL << 3)
+ { .mask = H2_EV_H2C_FCTL, .name = "h2c_fctl", .desc = "H2 connection flow-controlled" },
+#define H2_EV_H2C_BLK (1ULL << 4)
+ { .mask = H2_EV_H2C_BLK, .name = "h2c_blk", .desc = "H2 connection blocked" },
+#define H2_EV_H2C_WAKE (1ULL << 5)
+ { .mask = H2_EV_H2C_WAKE, .name = "h2c_wake", .desc = "H2 connection woken up" },
+#define H2_EV_H2C_END (1ULL << 6)
+ { .mask = H2_EV_H2C_END, .name = "h2c_end", .desc = "H2 connection terminated" },
+#define H2_EV_H2C_ERR (1ULL << 7)
+ { .mask = H2_EV_H2C_ERR, .name = "h2c_err", .desc = "error on H2 connection" },
+#define H2_EV_RX_FHDR (1ULL << 8)
+ { .mask = H2_EV_RX_FHDR, .name = "rx_fhdr", .desc = "H2 frame header received" },
+#define H2_EV_RX_FRAME (1ULL << 9)
+ { .mask = H2_EV_RX_FRAME, .name = "rx_frame", .desc = "receipt of any H2 frame" },
+#define H2_EV_RX_EOI (1ULL << 10)
+ { .mask = H2_EV_RX_EOI, .name = "rx_eoi", .desc = "receipt of end of H2 input (ES or RST)" },
+#define H2_EV_RX_PREFACE (1ULL << 11)
+ { .mask = H2_EV_RX_PREFACE, .name = "rx_preface", .desc = "receipt of H2 preface" },
+#define H2_EV_RX_DATA (1ULL << 12)
+ { .mask = H2_EV_RX_DATA, .name = "rx_data", .desc = "receipt of H2 DATA frame" },
+#define H2_EV_RX_HDR (1ULL << 13)
+ { .mask = H2_EV_RX_HDR, .name = "rx_hdr", .desc = "receipt of H2 HEADERS frame" },
+#define H2_EV_RX_PRIO (1ULL << 14)
+ { .mask = H2_EV_RX_PRIO, .name = "rx_prio", .desc = "receipt of H2 PRIORITY frame" },
+#define H2_EV_RX_RST (1ULL << 15)
+ { .mask = H2_EV_RX_RST, .name = "rx_rst", .desc = "receipt of H2 RST_STREAM frame" },
+#define H2_EV_RX_SETTINGS (1ULL << 16)
+ { .mask = H2_EV_RX_SETTINGS, .name = "rx_settings", .desc = "receipt of H2 SETTINGS frame" },
+#define H2_EV_RX_PUSH (1ULL << 17)
+ { .mask = H2_EV_RX_PUSH, .name = "rx_push", .desc = "receipt of H2 PUSH_PROMISE frame" },
+#define H2_EV_RX_PING (1ULL << 18)
+ { .mask = H2_EV_RX_PING, .name = "rx_ping", .desc = "receipt of H2 PING frame" },
+#define H2_EV_RX_GOAWAY (1ULL << 19)
+ { .mask = H2_EV_RX_GOAWAY, .name = "rx_goaway", .desc = "receipt of H2 GOAWAY frame" },
+#define H2_EV_RX_WU (1ULL << 20)
+ { .mask = H2_EV_RX_WU, .name = "rx_wu", .desc = "receipt of H2 WINDOW_UPDATE frame" },
+#define H2_EV_RX_CONT (1ULL << 21)
+ { .mask = H2_EV_RX_CONT, .name = "rx_cont", .desc = "receipt of H2 CONTINUATION frame" },
+#define H2_EV_TX_FRAME (1ULL << 22)
+ { .mask = H2_EV_TX_FRAME, .name = "tx_frame", .desc = "transmission of any H2 frame" },
+#define H2_EV_TX_EOI (1ULL << 23)
+ { .mask = H2_EV_TX_EOI, .name = "tx_eoi", .desc = "transmission of H2 end of input (ES or RST)" },
+#define H2_EV_TX_PREFACE (1ULL << 24)
+ { .mask = H2_EV_TX_PREFACE, .name = "tx_preface", .desc = "transmission of H2 preface" },
+#define H2_EV_TX_DATA (1ULL << 25)
+ { .mask = H2_EV_TX_DATA, .name = "tx_data", .desc = "transmission of H2 DATA frame" },
+#define H2_EV_TX_HDR (1ULL << 26)
+ { .mask = H2_EV_TX_HDR, .name = "tx_hdr", .desc = "transmission of H2 HEADERS frame" },
+#define H2_EV_TX_PRIO (1ULL << 27)
+ { .mask = H2_EV_TX_PRIO, .name = "tx_prio", .desc = "transmission of H2 PRIORITY frame" },
+#define H2_EV_TX_RST (1ULL << 28)
+ { .mask = H2_EV_TX_RST, .name = "tx_rst", .desc = "transmission of H2 RST_STREAM frame" },
+#define H2_EV_TX_SETTINGS (1ULL << 29)
+ { .mask = H2_EV_TX_SETTINGS, .name = "tx_settings", .desc = "transmission of H2 SETTINGS frame" },
+#define H2_EV_TX_PUSH (1ULL << 30)
+ { .mask = H2_EV_TX_PUSH, .name = "tx_push", .desc = "transmission of H2 PUSH_PROMISE frame" },
+#define H2_EV_TX_PING (1ULL << 31)
+ { .mask = H2_EV_TX_PING, .name = "tx_ping", .desc = "transmission of H2 PING frame" },
+#define H2_EV_TX_GOAWAY (1ULL << 32)
+ { .mask = H2_EV_TX_GOAWAY, .name = "tx_goaway", .desc = "transmission of H2 GOAWAY frame" },
+#define H2_EV_TX_WU (1ULL << 33)
+ { .mask = H2_EV_TX_WU, .name = "tx_wu", .desc = "transmission of H2 WINDOW_UPDATE frame" },
+#define H2_EV_TX_CONT (1ULL << 34)
+ { .mask = H2_EV_TX_CONT, .name = "tx_cont", .desc = "transmission of H2 CONTINUATION frame" },
+#define H2_EV_H2S_NEW (1ULL << 35)
+ { .mask = H2_EV_H2S_NEW, .name = "h2s_new", .desc = "new H2 stream" },
+#define H2_EV_H2S_RECV (1ULL << 36)
+ { .mask = H2_EV_H2S_RECV, .name = "h2s_recv", .desc = "Rx for H2 stream" },
+#define H2_EV_H2S_SEND (1ULL << 37)
+ { .mask = H2_EV_H2S_SEND, .name = "h2s_send", .desc = "Tx for H2 stream" },
+#define H2_EV_H2S_FCTL (1ULL << 38)
+ { .mask = H2_EV_H2S_FCTL, .name = "h2s_fctl", .desc = "H2 stream flow-controlled" },
+#define H2_EV_H2S_BLK (1ULL << 39)
+ { .mask = H2_EV_H2S_BLK, .name = "h2s_blk", .desc = "H2 stream blocked" },
+#define H2_EV_H2S_WAKE (1ULL << 40)
+ { .mask = H2_EV_H2S_WAKE, .name = "h2s_wake", .desc = "H2 stream woken up" },
+#define H2_EV_H2S_END (1ULL << 41)
+ { .mask = H2_EV_H2S_END, .name = "h2s_end", .desc = "H2 stream terminated" },
+#define H2_EV_H2S_ERR (1ULL << 42)
+ { .mask = H2_EV_H2S_ERR, .name = "h2s_err", .desc = "error on H2 stream" },
+#define H2_EV_STRM_NEW (1ULL << 43)
+ { .mask = H2_EV_STRM_NEW, .name = "strm_new", .desc = "app-layer stream creation" },
+#define H2_EV_STRM_RECV (1ULL << 44)
+ { .mask = H2_EV_STRM_RECV, .name = "strm_recv", .desc = "receiving data for stream" },
+#define H2_EV_STRM_SEND (1ULL << 45)
+ { .mask = H2_EV_STRM_SEND, .name = "strm_send", .desc = "sending data for stream" },
+#define H2_EV_STRM_FULL (1ULL << 46)
+ { .mask = H2_EV_STRM_FULL, .name = "strm_full", .desc = "stream buffer full" },
+#define H2_EV_STRM_WAKE (1ULL << 47)
+ { .mask = H2_EV_STRM_WAKE, .name = "strm_wake", .desc = "stream woken up" },
+#define H2_EV_STRM_SHUT (1ULL << 48)
+ { .mask = H2_EV_STRM_SHUT, .name = "strm_shut", .desc = "stream shutdown" },
+#define H2_EV_STRM_END (1ULL << 49)
+ { .mask = H2_EV_STRM_END, .name = "strm_end", .desc = "detaching app-layer stream" },
+#define H2_EV_STRM_ERR (1ULL << 50)
+ { .mask = H2_EV_STRM_ERR, .name = "strm_err", .desc = "stream error" },
+#define H2_EV_PROTO_ERR (1ULL << 51)
+ { .mask = H2_EV_PROTO_ERR, .name = "proto_err", .desc = "protocol error" },
+ { }
+};
+
+static const struct name_desc h2_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the connection */ },
+ /* arg2 */ { .name="h2s", .desc="H2 stream" },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc h2_trace_decoding[] = {
+#define H2_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define H2_VERB_MINIMAL 2
+ { .name="minimal", .desc="report only h2c/h2s state and flags, no real decoding" },
+#define H2_VERB_SIMPLE 3
+ { .name="simple", .desc="add request/response status line or frame info when available" },
+#define H2_VERB_ADVANCED 4
+ { .name="advanced", .desc="add header fields or frame decoding when available" },
+#define H2_VERB_COMPLETE 5
+ { .name="complete", .desc="add full data dump when available" },
+ { /* end */ }
+};
+
+static struct trace_source trace_h2 __read_mostly = {
+ .name = IST("h2"),
+ .desc = "HTTP/2 multiplexer",
+ .arg_def = TRC_ARG1_CONN, // TRACE()'s first argument is always a connection
+ .default_cb = h2_trace,
+ .known_events = h2_trace_events,
+ .lockon_args = h2_trace_lockon_args,
+ .decoding = h2_trace_decoding,
+ .report_events = ~0, // report everything by default
+};
+
+#define TRACE_SOURCE &trace_h2
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+/* h2 stats module */
+enum {
+ H2_ST_HEADERS_RCVD,
+ H2_ST_DATA_RCVD,
+ H2_ST_SETTINGS_RCVD,
+ H2_ST_RST_STREAM_RCVD,
+ H2_ST_GOAWAY_RCVD,
+
+ H2_ST_CONN_PROTO_ERR,
+ H2_ST_STRM_PROTO_ERR,
+ H2_ST_RST_STREAM_RESP,
+ H2_ST_GOAWAY_RESP,
+
+ H2_ST_OPEN_CONN,
+ H2_ST_OPEN_STREAM,
+ H2_ST_TOTAL_CONN,
+ H2_ST_TOTAL_STREAM,
+
+ H2_STATS_COUNT /* must be the last member of the enum */
+};
+
+static struct name_desc h2_stats[] = {
+ [H2_ST_HEADERS_RCVD] = { .name = "h2_headers_rcvd",
+ .desc = "Total number of received HEADERS frames" },
+ [H2_ST_DATA_RCVD] = { .name = "h2_data_rcvd",
+ .desc = "Total number of received DATA frames" },
+ [H2_ST_SETTINGS_RCVD] = { .name = "h2_settings_rcvd",
+ .desc = "Total number of received SETTINGS frames" },
+ [H2_ST_RST_STREAM_RCVD] = { .name = "h2_rst_stream_rcvd",
+ .desc = "Total number of received RST_STREAM frames" },
+ [H2_ST_GOAWAY_RCVD] = { .name = "h2_goaway_rcvd",
+ .desc = "Total number of received GOAWAY frames" },
+
+ [H2_ST_CONN_PROTO_ERR] = { .name = "h2_detected_conn_protocol_errors",
+ .desc = "Total number of connection protocol errors" },
+ [H2_ST_STRM_PROTO_ERR] = { .name = "h2_detected_strm_protocol_errors",
+ .desc = "Total number of stream protocol errors" },
+ [H2_ST_RST_STREAM_RESP] = { .name = "h2_rst_stream_resp",
+ .desc = "Total number of RST_STREAM sent on detected error" },
+ [H2_ST_GOAWAY_RESP] = { .name = "h2_goaway_resp",
+ .desc = "Total number of GOAWAY sent on detected error" },
+
+ [H2_ST_OPEN_CONN] = { .name = "h2_open_connections",
+ .desc = "Count of currently open connections" },
+ [H2_ST_OPEN_STREAM] = { .name = "h2_backend_open_streams",
+ .desc = "Count of currently open streams" },
+ [H2_ST_TOTAL_CONN] = { .name = "h2_total_connections",
+ .desc = "Total number of connections" },
+ [H2_ST_TOTAL_STREAM] = { .name = "h2_backend_total_streams",
+ .desc = "Total number of streams" },
+};
+
+static struct h2_counters {
+ long long headers_rcvd; /* total number of HEADERS frame received */
+ long long data_rcvd; /* total number of DATA frame received */
+ long long settings_rcvd; /* total number of SETTINGS frame received */
+ long long rst_stream_rcvd; /* total number of RST_STREAM frame received */
+ long long goaway_rcvd; /* total number of GOAWAY frame received */
+
+ long long conn_proto_err; /* total number of protocol errors detected */
+ long long strm_proto_err; /* total number of protocol errors detected */
+ long long rst_stream_resp; /* total number of RST_STREAM frame sent on error */
+ long long goaway_resp; /* total number of GOAWAY frame sent on error */
+
+ long long open_conns; /* count of currently open connections */
+ long long open_streams; /* count of currently open streams */
+ long long total_conns; /* total number of connections */
+ long long total_streams; /* total number of streams */
+} h2_counters;
+
+static void h2_fill_stats(void *data, struct field *stats)
+{
+ struct h2_counters *counters = data;
+
+ stats[H2_ST_HEADERS_RCVD] = mkf_u64(FN_COUNTER, counters->headers_rcvd);
+ stats[H2_ST_DATA_RCVD] = mkf_u64(FN_COUNTER, counters->data_rcvd);
+ stats[H2_ST_SETTINGS_RCVD] = mkf_u64(FN_COUNTER, counters->settings_rcvd);
+ stats[H2_ST_RST_STREAM_RCVD] = mkf_u64(FN_COUNTER, counters->rst_stream_rcvd);
+ stats[H2_ST_GOAWAY_RCVD] = mkf_u64(FN_COUNTER, counters->goaway_rcvd);
+
+ stats[H2_ST_CONN_PROTO_ERR] = mkf_u64(FN_COUNTER, counters->conn_proto_err);
+ stats[H2_ST_STRM_PROTO_ERR] = mkf_u64(FN_COUNTER, counters->strm_proto_err);
+ stats[H2_ST_RST_STREAM_RESP] = mkf_u64(FN_COUNTER, counters->rst_stream_resp);
+ stats[H2_ST_GOAWAY_RESP] = mkf_u64(FN_COUNTER, counters->goaway_resp);
+
+ stats[H2_ST_OPEN_CONN] = mkf_u64(FN_GAUGE, counters->open_conns);
+ stats[H2_ST_OPEN_STREAM] = mkf_u64(FN_GAUGE, counters->open_streams);
+ stats[H2_ST_TOTAL_CONN] = mkf_u64(FN_COUNTER, counters->total_conns);
+ stats[H2_ST_TOTAL_STREAM] = mkf_u64(FN_COUNTER, counters->total_streams);
+}
+
+static struct stats_module h2_stats_module = {
+ .name = "h2",
+ .fill_stats = h2_fill_stats,
+ .stats = h2_stats,
+ .stats_count = H2_STATS_COUNT,
+ .counters = &h2_counters,
+ .counters_size = sizeof(h2_counters),
+ .domain_flags = MK_STATS_PROXY_DOMAIN(STATS_PX_CAP_FE|STATS_PX_CAP_BE),
+ .clearable = 1,
+};
+
+INITCALL1(STG_REGISTER, stats_register_module, &h2_stats_module);
+
+/* the h2c connection pool */
+DECLARE_STATIC_POOL(pool_head_h2c, "h2c", sizeof(struct h2c));
+
+/* the h2s stream pool */
+DECLARE_STATIC_POOL(pool_head_h2s, "h2s", sizeof(struct h2s));
+
+/* The default connection window size is 65535, it may only be enlarged using
+ * a WINDOW_UPDATE message. Since the window must never be larger than 2G-1,
+ * we'll pretend we already received the difference between the two to send
+ * an equivalent window update to enlarge it to 2G-1.
+ */
+#define H2_INITIAL_WINDOW_INCREMENT ((1U<<31)-1 - 65535)
+
+/* maximum amount of data we're OK with re-aligning for buffer optimizations */
+#define MAX_DATA_REALIGN 1024
+
+/* a few settings from the global section */
+static int h2_settings_header_table_size = 4096; /* initial value */
+static int h2_settings_initial_window_size = 65535; /* initial value */
+static unsigned int h2_settings_max_concurrent_streams = 100;
+static int h2_settings_max_frame_size = 0; /* unset */
+
+/* a dummy closed endpoint */
+static const struct sedesc closed_ep = {
+ .sc = NULL,
+ .flags = SE_FL_DETACHED,
+};
+
+/* a dmumy closed stream */
+static const struct h2s *h2_closed_stream = &(const struct h2s){
+ .sd = (struct sedesc *)&closed_ep,
+ .h2c = NULL,
+ .st = H2_SS_CLOSED,
+ .errcode = H2_ERR_STREAM_CLOSED,
+ .flags = H2_SF_RST_RCVD,
+ .id = 0,
+};
+
+/* a dmumy closed stream returning a PROTOCOL_ERROR error */
+static const struct h2s *h2_error_stream = &(const struct h2s){
+ .sd = (struct sedesc *)&closed_ep,
+ .h2c = NULL,
+ .st = H2_SS_CLOSED,
+ .errcode = H2_ERR_PROTOCOL_ERROR,
+ .flags = 0,
+ .id = 0,
+};
+
+/* a dmumy closed stream returning a REFUSED_STREAM error */
+static const struct h2s *h2_refused_stream = &(const struct h2s){
+ .sd = (struct sedesc *)&closed_ep,
+ .h2c = NULL,
+ .st = H2_SS_CLOSED,
+ .errcode = H2_ERR_REFUSED_STREAM,
+ .flags = 0,
+ .id = 0,
+};
+
+/* and a dummy idle stream for use with any unannounced stream */
+static const struct h2s *h2_idle_stream = &(const struct h2s){
+ .sd = (struct sedesc *)&closed_ep,
+ .h2c = NULL,
+ .st = H2_SS_IDLE,
+ .errcode = H2_ERR_STREAM_CLOSED,
+ .id = 0,
+};
+
+struct task *h2_timeout_task(struct task *t, void *context, unsigned int state);
+static int h2_send(struct h2c *h2c);
+static int h2_recv(struct h2c *h2c);
+static int h2_process(struct h2c *h2c);
+/* h2_io_cb is exported to see it resolved in "show fd" */
+struct task *h2_io_cb(struct task *t, void *ctx, unsigned int state);
+static inline struct h2s *h2c_st_by_id(struct h2c *h2c, int id);
+static int h2c_decode_headers(struct h2c *h2c, struct buffer *rxbuf, uint32_t *flags, unsigned long long *body_len, char *upgrade_protocol);
+static int h2_frt_transfer_data(struct h2s *h2s);
+struct task *h2_deferred_shut(struct task *t, void *ctx, unsigned int state);
+static struct h2s *h2c_bck_stream_new(struct h2c *h2c, struct stconn *sc, struct session *sess);
+static void h2s_alert(struct h2s *h2s);
+
+/* returns a h2c state as an abbreviated 3-letter string, or "???" if unknown */
+static inline const char *h2c_st_to_str(enum h2_cs st)
+{
+ switch (st) {
+ case H2_CS_PREFACE: return "PRF";
+ case H2_CS_SETTINGS1: return "STG";
+ case H2_CS_FRAME_H: return "FRH";
+ case H2_CS_FRAME_P: return "FRP";
+ case H2_CS_FRAME_A: return "FRA";
+ case H2_CS_FRAME_E: return "FRE";
+ case H2_CS_ERROR: return "ERR";
+ case H2_CS_ERROR2: return "ER2";
+ default: return "???";
+ }
+}
+
+/* returns a h2s state as an abbreviated 3-letter string, or "???" if unknown */
+static inline const char *h2s_st_to_str(enum h2_ss st)
+{
+ switch (st) {
+ case H2_SS_IDLE: return "IDL"; // idle
+ case H2_SS_RLOC: return "RSL"; // reserved local
+ case H2_SS_RREM: return "RSR"; // reserved remote
+ case H2_SS_OPEN: return "OPN"; // open
+ case H2_SS_HREM: return "HCR"; // half-closed remote
+ case H2_SS_HLOC: return "HCL"; // half-closed local
+ case H2_SS_ERROR : return "ERR"; // error
+ case H2_SS_CLOSED: return "CLO"; // closed
+ default: return "???";
+ }
+}
+
+/* returns the stconn associated to the H2 stream */
+static forceinline struct stconn *h2s_sc(const struct h2s *h2s)
+{
+ return h2s->sd->sc;
+}
+
+/* the H2 traces always expect that arg1, if non-null, is of type connection
+ * (from which we can derive h2c), that arg2, if non-null, is of type h2s, and
+ * that arg3, if non-null, is either of type htx for tx headers, or of type
+ * buffer for everything else.
+ */
+static void h2_trace(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct connection *conn = a1;
+ const struct h2c *h2c = conn ? conn->ctx : NULL;
+ const struct h2s *h2s = a2;
+ const struct buffer *buf = a3;
+ const struct htx *htx;
+ int pos;
+
+ if (!h2c) // nothing to add
+ return;
+
+ if (src->verbosity > H2_VERB_CLEAN) {
+ chunk_appendf(&trace_buf, " : h2c=%p(%c,%s)", h2c, conn_is_back(conn) ? 'B' : 'F', h2c_st_to_str(h2c->st0));
+
+ if (mask & H2_EV_H2C_NEW) // inside h2_init, otherwise it's hard to match conn & h2c
+ conn_append_debug_info(&trace_buf, conn, " : ");
+
+ if (h2c->errcode)
+ chunk_appendf(&trace_buf, " err=%s/%02x", h2_err_str(h2c->errcode), h2c->errcode);
+
+ if (h2c->flags & H2_CF_DEM_IN_PROGRESS && // frame processing has started, type and length are valid
+ (mask & (H2_EV_RX_FRAME|H2_EV_RX_FHDR)) == (H2_EV_RX_FRAME|H2_EV_RX_FHDR)) {
+ chunk_appendf(&trace_buf, " dft=%s/%02x dfl=%d", h2_ft_str(h2c->dft), h2c->dff, h2c->dfl);
+ }
+
+ if (h2s) {
+ if (h2s->id <= 0)
+ chunk_appendf(&trace_buf, " dsi=%d", h2c->dsi);
+ if (h2s == h2_idle_stream)
+ chunk_appendf(&trace_buf, " h2s=IDL");
+ else if (h2s != h2_closed_stream)
+ chunk_appendf(&trace_buf, " h2s=%p(%d,%s)", h2s, h2s->id, h2s_st_to_str(h2s->st));
+ if (h2s->id && h2s->errcode)
+ chunk_appendf(&trace_buf, " err=%s/%02x", h2_err_str(h2s->errcode), h2s->errcode);
+ }
+ }
+
+ /* Let's dump decoded requests and responses right after parsing. They
+ * are traced at level USER with a few recognizable flags.
+ */
+ if ((mask == (H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW) ||
+ mask == (H2_EV_RX_FRAME|H2_EV_RX_HDR)) && buf)
+ htx = htxbuf(buf); // recv req/res
+ else if (mask == (H2_EV_TX_FRAME|H2_EV_TX_HDR))
+ htx = a3; // send req/res
+ else
+ htx = NULL;
+
+ if (level == TRACE_LEVEL_USER && src->verbosity != H2_VERB_MINIMAL && htx && (pos = htx_get_head(htx)) != -1) {
+ const struct htx_blk *blk = htx_get_blk(htx, pos);
+ const struct htx_sl *sl = htx_get_blk_ptr(htx, blk);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_REQ_SL)
+ chunk_appendf(&trace_buf, " : [%d] H2 REQ: %.*s %.*s %.*s",
+ h2s ? h2s->id : h2c->dsi,
+ HTX_SL_P1_LEN(sl), HTX_SL_P1_PTR(sl),
+ HTX_SL_P2_LEN(sl), HTX_SL_P2_PTR(sl),
+ HTX_SL_P3_LEN(sl), HTX_SL_P3_PTR(sl));
+ else if (type == HTX_BLK_RES_SL)
+ chunk_appendf(&trace_buf, " : [%d] H2 RES: %.*s %.*s %.*s",
+ h2s ? h2s->id : h2c->dsi,
+ HTX_SL_P1_LEN(sl), HTX_SL_P1_PTR(sl),
+ HTX_SL_P2_LEN(sl), HTX_SL_P2_PTR(sl),
+ HTX_SL_P3_LEN(sl), HTX_SL_P3_PTR(sl));
+ }
+}
+
+
+/* Detect a pending read0 for a H2 connection. It happens if a read0 was
+ * already reported on a previous xprt->rcvbuf() AND a frame parser failed
+ * to parse pending data, confirming no more progress is possible because
+ * we're facing a truncated frame. The function returns 1 to report a read0
+ * or 0 otherwise.
+ */
+static inline int h2c_read0_pending(struct h2c *h2c)
+{
+ return !!(h2c->flags & H2_CF_END_REACHED);
+}
+
+/* returns true if the connection is allowed to expire, false otherwise. A
+ * connection may expire when it has no attached streams. As long as streams
+ * are attached, the application layer is responsible for timeout management,
+ * and each layer will detach when it doesn't want to wait anymore. When the
+ * last one leaves, the connection must take over timeout management.
+ */
+static inline int h2c_may_expire(const struct h2c *h2c)
+{
+ return !h2c->nb_sc;
+}
+
+/* update h2c timeout if needed */
+static void h2c_update_timeout(struct h2c *h2c)
+{
+ int is_idle_conn = 0;
+
+ TRACE_ENTER(H2_EV_H2C_WAKE, h2c->conn);
+
+ if (!h2c->task)
+ goto leave;
+
+ if (h2c_may_expire(h2c)) {
+ /* no more streams attached */
+ if (h2c->last_sid >= 0) {
+ /* GOAWAY sent, closing in progress */
+ h2c->task->expire = tick_add_ifset(now_ms, h2c->shut_timeout);
+ is_idle_conn = 1;
+ } else if (br_data(h2c->mbuf)) {
+ /* pending output data: always the regular data timeout */
+ h2c->task->expire = tick_add_ifset(now_ms, h2c->timeout);
+ } else if (!(h2c->flags & H2_CF_IS_BACK) && h2c->max_id > 0 && !b_data(&h2c->dbuf)) {
+ /* idle after having seen one stream => keep-alive */
+ int to;
+
+ if (tick_isset(h2c->proxy->timeout.httpka))
+ to = h2c->proxy->timeout.httpka;
+ else
+ to = h2c->proxy->timeout.httpreq;
+
+ h2c->task->expire = tick_add_ifset(h2c->idle_start, to);
+ is_idle_conn = 1;
+ } else {
+ /* before first request, or started to deserialize a
+ * new req => http-request, but only set, not refresh.
+ */
+ int exp = (h2c->flags & H2_CF_IS_BACK) ? TICK_ETERNITY : h2c->proxy->timeout.httpreq;
+ h2c->task->expire = tick_add_ifset(h2c->idle_start, exp);
+ }
+ /* if a timeout above was not set, fall back to the default one */
+ if (!tick_isset(h2c->task->expire))
+ h2c->task->expire = tick_add_ifset(now_ms, h2c->timeout);
+
+ if ((h2c->proxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) &&
+ is_idle_conn && tick_isset(global.close_spread_end)) {
+ /* If a soft-stop is in progress and a close-spread-time
+ * is set, we want to spread idle connection closing roughly
+ * evenly across the defined window. This should only
+ * act on idle frontend connections.
+ * If the window end is already in the past, we wake the
+ * timeout task up immediately so that it can be closed.
+ */
+ int remaining_window = tick_remain(now_ms, global.close_spread_end);
+ if (remaining_window) {
+ /* We don't need to reset the expire if it would
+ * already happen before the close window end.
+ */
+ if (tick_isset(h2c->task->expire) &&
+ tick_is_le(global.close_spread_end, h2c->task->expire)) {
+ /* Set an expire value shorter than the current value
+ * because the close spread window end comes earlier.
+ */
+ h2c->task->expire = tick_add(now_ms, statistical_prng_range(remaining_window));
+ }
+ }
+ else {
+ /* We are past the soft close window end, wake the timeout
+ * task up immediately.
+ */
+ task_wakeup(h2c->task, TASK_WOKEN_TIMER);
+ }
+ }
+
+ } else {
+ h2c->task->expire = TICK_ETERNITY;
+ }
+ task_queue(h2c->task);
+ leave:
+ TRACE_LEAVE(H2_EV_H2C_WAKE);
+}
+
+static __inline int
+h2c_is_dead(const struct h2c *h2c)
+{
+ if (eb_is_empty(&h2c->streams_by_id) && /* don't close if streams exist */
+ ((h2c->conn->flags & CO_FL_ERROR) || /* errors close immediately */
+ (h2c->st0 >= H2_CS_ERROR && !h2c->task) || /* a timeout stroke earlier */
+ (!(h2c->conn->owner)) || /* Nobody's left to take care of the connection, drop it now */
+ (!br_data(h2c->mbuf) && /* mux buffer empty, also process clean events below */
+ (conn_xprt_read0_pending(h2c->conn) ||
+ (h2c->last_sid >= 0 && h2c->max_id >= h2c->last_sid)))))
+ return 1;
+
+ return 0;
+}
+
+/*****************************************************/
+/* functions below are for dynamic buffer management */
+/*****************************************************/
+
+/* indicates whether or not the we may call the h2_recv() function to attempt
+ * to receive data into the buffer and/or demux pending data. The condition is
+ * a bit complex due to some API limits for now. The rules are the following :
+ * - if an error or a shutdown was detected on the connection and the buffer
+ * is empty, we must not attempt to receive
+ * - if the demux buf failed to be allocated, we must not try to receive and
+ * we know there is nothing pending
+ * - if no flag indicates a blocking condition, we may attempt to receive,
+ * regardless of whether the demux buffer is full or not, so that only
+ * de demux part decides whether or not to block. This is needed because
+ * the connection API indeed prevents us from re-enabling receipt that is
+ * already enabled in a polled state, so we must always immediately stop
+ * as soon as the demux can't proceed so as never to hit an end of read
+ * with data pending in the buffers.
+ * - otherwise must may not attempt
+ */
+static inline int h2_recv_allowed(const struct h2c *h2c)
+{
+ if (b_data(&h2c->dbuf) == 0 &&
+ (h2c->st0 >= H2_CS_ERROR ||
+ h2c->conn->flags & CO_FL_ERROR ||
+ conn_xprt_read0_pending(h2c->conn)))
+ return 0;
+
+ if (!(h2c->flags & H2_CF_DEM_DALLOC) &&
+ !(h2c->flags & H2_CF_DEM_BLOCK_ANY))
+ return 1;
+
+ return 0;
+}
+
+/* restarts reading on the connection if it was not enabled */
+static inline void h2c_restart_reading(const struct h2c *h2c, int consider_buffer)
+{
+ if (!h2_recv_allowed(h2c))
+ return;
+ if ((!consider_buffer || !b_data(&h2c->dbuf))
+ && (h2c->wait_event.events & SUB_RETRY_RECV))
+ return;
+ tasklet_wakeup(h2c->wait_event.tasklet);
+}
+
+
+/* returns true if the front connection has too many stream connectors attached */
+static inline int h2_frt_has_too_many_sc(const struct h2c *h2c)
+{
+ return h2c->nb_sc > h2_settings_max_concurrent_streams;
+}
+
+/* Tries to grab a buffer and to re-enable processing on mux <target>. The h2c
+ * flags are used to figure what buffer was requested. It returns 1 if the
+ * allocation succeeds, in which case the connection is woken up, or 0 if it's
+ * impossible to wake up and we prefer to be woken up later.
+ */
+static int h2_buf_available(void *target)
+{
+ struct h2c *h2c = target;
+ struct h2s *h2s;
+
+ if ((h2c->flags & H2_CF_DEM_DALLOC) && b_alloc(&h2c->dbuf)) {
+ h2c->flags &= ~H2_CF_DEM_DALLOC;
+ h2c_restart_reading(h2c, 1);
+ return 1;
+ }
+
+ if ((h2c->flags & H2_CF_MUX_MALLOC) && b_alloc(br_tail(h2c->mbuf))) {
+ h2c->flags &= ~H2_CF_MUX_MALLOC;
+
+ if (h2c->flags & H2_CF_DEM_MROOM) {
+ h2c->flags &= ~H2_CF_DEM_MROOM;
+ h2c_restart_reading(h2c, 1);
+ }
+ return 1;
+ }
+
+ if ((h2c->flags & H2_CF_DEM_SALLOC) &&
+ (h2s = h2c_st_by_id(h2c, h2c->dsi)) && h2s_sc(h2s) &&
+ b_alloc(&h2s->rxbuf)) {
+ h2c->flags &= ~H2_CF_DEM_SALLOC;
+ h2c_restart_reading(h2c, 1);
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline struct buffer *h2_get_buf(struct h2c *h2c, struct buffer *bptr)
+{
+ struct buffer *buf = NULL;
+
+ if (likely(!LIST_INLIST(&h2c->buf_wait.list)) &&
+ unlikely((buf = b_alloc(bptr)) == NULL)) {
+ h2c->buf_wait.target = h2c;
+ h2c->buf_wait.wakeup_cb = h2_buf_available;
+ LIST_APPEND(&th_ctx->buffer_wq, &h2c->buf_wait.list);
+ }
+ return buf;
+}
+
+static inline void h2_release_buf(struct h2c *h2c, struct buffer *bptr)
+{
+ if (bptr->size) {
+ b_free(bptr);
+ offer_buffers(NULL, 1);
+ }
+}
+
+static inline void h2_release_mbuf(struct h2c *h2c)
+{
+ struct buffer *buf;
+ unsigned int count = 0;
+
+ while (b_size(buf = br_head_pick(h2c->mbuf))) {
+ b_free(buf);
+ count++;
+ }
+ if (count)
+ offer_buffers(NULL, count);
+}
+
+/* returns the number of allocatable outgoing streams for the connection taking
+ * the last_sid and the reserved ones into account.
+ */
+static inline int h2_streams_left(const struct h2c *h2c)
+{
+ int ret;
+
+ /* consider the number of outgoing streams we're allowed to create before
+ * reaching the last GOAWAY frame seen. max_id is the last assigned id,
+ * nb_reserved is the number of streams which don't yet have an ID.
+ */
+ ret = (h2c->last_sid >= 0) ? h2c->last_sid : 0x7FFFFFFF;
+ ret = (unsigned int)(ret - h2c->max_id) / 2 - h2c->nb_reserved - 1;
+ if (ret < 0)
+ ret = 0;
+ return ret;
+}
+
+/* returns the number of streams in use on a connection to figure if it's
+ * idle or not. We check nb_sc and not nb_streams as the caller will want
+ * to know if it was the last one after a detach().
+ */
+static int h2_used_streams(struct connection *conn)
+{
+ struct h2c *h2c = conn->ctx;
+
+ return h2c->nb_sc;
+}
+
+/* returns the number of concurrent streams available on the connection */
+static int h2_avail_streams(struct connection *conn)
+{
+ struct server *srv = objt_server(conn->target);
+ struct h2c *h2c = conn->ctx;
+ int ret1, ret2;
+
+ /* RFC7540#6.8: Receivers of a GOAWAY frame MUST NOT open additional
+ * streams on the connection.
+ */
+ if (h2c->last_sid >= 0)
+ return 0;
+
+ if (h2c->st0 >= H2_CS_ERROR)
+ return 0;
+
+ /* note: may be negative if a SETTINGS frame changes the limit */
+ ret1 = h2c->streams_limit - h2c->nb_streams;
+
+ /* we must also consider the limit imposed by stream IDs */
+ ret2 = h2_streams_left(h2c);
+ ret1 = MIN(ret1, ret2);
+ if (ret1 > 0 && srv && srv->max_reuse >= 0) {
+ ret2 = h2c->stream_cnt <= srv->max_reuse ? srv->max_reuse - h2c->stream_cnt + 1: 0;
+ ret1 = MIN(ret1, ret2);
+ }
+ return ret1;
+}
+
+/* inconditionally produce a trace of the header. Please do not call this one
+ * and use h2_trace_header() instead which first checks if traces are enabled.
+ */
+void _h2_trace_header(const struct ist hn, const struct ist hv,
+ uint64_t mask, const struct ist trc_loc, const char *func,
+ const struct h2c *h2c, const struct h2s *h2s)
+{
+ struct ist n_ist, v_ist;
+ const char *c_str, *s_str;
+
+ chunk_reset(&trash);
+ c_str = chunk_newstr(&trash);
+ if (h2c) {
+ chunk_appendf(&trash, "h2c=%p(%c,%s) ",
+ h2c, (h2c->flags & H2_CF_IS_BACK) ? 'B' : 'F', h2c_st_to_str(h2c->st0));
+ }
+
+ s_str = chunk_newstr(&trash);
+ if (h2s) {
+ if (h2s->id <= 0)
+ chunk_appendf(&trash, "dsi=%d ", h2s->h2c->dsi);
+ chunk_appendf(&trash, "h2s=%p(%d,%s) ", h2s, h2s->id, h2s_st_to_str(h2s->st));
+ }
+ else if (h2c)
+ chunk_appendf(&trash, "dsi=%d ", h2c->dsi);
+
+ n_ist = ist2(chunk_newstr(&trash), 0);
+ istscpy(&n_ist, hn, 256);
+ trash.data += n_ist.len;
+ if (n_ist.len != hn.len)
+ chunk_appendf(&trash, " (... +%ld)", (long)(hn.len - n_ist.len));
+
+ v_ist = ist2(chunk_newstr(&trash), 0);
+ istscpy(&v_ist, hv, 1024);
+ trash.data += v_ist.len;
+ if (v_ist.len != hv.len)
+ chunk_appendf(&trash, " (... +%ld)", (long)(hv.len - v_ist.len));
+
+ TRACE_PRINTF_LOC(TRACE_LEVEL_USER, mask, trc_loc, func,
+ (h2c ? h2c->conn : 0), 0, 0, 0,
+ "%s%s%s %s: %s", c_str, s_str,
+ (mask & H2_EV_TX_HDR) ? "sndh" : "rcvh",
+ n_ist.ptr, v_ist.ptr);
+}
+
+/* produce a trace of the header after checking that tracing is enabled */
+static inline void h2_trace_header(const struct ist hn, const struct ist hv,
+ uint64_t mask, const struct ist trc_loc, const char *func,
+ const struct h2c *h2c, const struct h2s *h2s)
+{
+ if ((TRACE_SOURCE)->verbosity >= H2_VERB_ADVANCED &&
+ TRACE_ENABLED(TRACE_LEVEL_USER, mask, h2c ? h2c->conn : 0, h2s, 0, 0))
+ _h2_trace_header(hn, hv, mask, trc_loc, func, h2c, h2s);
+}
+
+/* hpack-encode header name <hn> and value <hv>, possibly emitting a trace if
+ * currently enabled. This is done on behalf of function <func> at <trc_loc>
+ * passed as ist(TRC_LOC), h2c <h2c>, and h2s <h2s>, all of which may be NULL.
+ * The trace is only emitted if the header is emitted (in which case non-zero
+ * is returned). The trash is modified. In the traces, the header's name will
+ * be truncated to 256 chars and the header's value to 1024 chars.
+ */
+static inline int h2_encode_header(struct buffer *buf, const struct ist hn, const struct ist hv,
+ uint64_t mask, const struct ist trc_loc, const char *func,
+ const struct h2c *h2c, const struct h2s *h2s)
+{
+ int ret;
+
+ ret = hpack_encode_header(buf, hn, hv);
+ if (ret)
+ h2_trace_header(hn, hv, mask, trc_loc, func, h2c, h2s);
+
+ return ret;
+}
+
+/*****************************************************************/
+/* functions below are dedicated to the mux setup and management */
+/*****************************************************************/
+
+/* Initialize the mux once it's attached. For outgoing connections, the context
+ * is already initialized before installing the mux, so we detect incoming
+ * connections from the fact that the context is still NULL (even during mux
+ * upgrades). <input> is always used as Input buffer and may contain data. It is
+ * the caller responsibility to not reuse it anymore. Returns < 0 on error.
+ */
+static int h2_init(struct connection *conn, struct proxy *prx, struct session *sess,
+ struct buffer *input)
+{
+ struct h2c *h2c;
+ struct task *t = NULL;
+ void *conn_ctx = conn->ctx;
+
+ TRACE_ENTER(H2_EV_H2C_NEW);
+
+ h2c = pool_alloc(pool_head_h2c);
+ if (!h2c)
+ goto fail_no_h2c;
+
+ if (conn_is_back(conn)) {
+ h2c->flags = H2_CF_IS_BACK;
+ h2c->shut_timeout = h2c->timeout = prx->timeout.server;
+ if (tick_isset(prx->timeout.serverfin))
+ h2c->shut_timeout = prx->timeout.serverfin;
+
+ h2c->px_counters = EXTRA_COUNTERS_GET(prx->extra_counters_be,
+ &h2_stats_module);
+ } else {
+ h2c->flags = H2_CF_NONE;
+ h2c->shut_timeout = h2c->timeout = prx->timeout.client;
+ if (tick_isset(prx->timeout.clientfin))
+ h2c->shut_timeout = prx->timeout.clientfin;
+
+ h2c->px_counters = EXTRA_COUNTERS_GET(prx->extra_counters_fe,
+ &h2_stats_module);
+ }
+
+ h2c->proxy = prx;
+ h2c->task = NULL;
+ h2c->wait_event.tasklet = NULL;
+ h2c->idle_start = now_ms;
+ if (tick_isset(h2c->timeout)) {
+ t = task_new_here();
+ if (!t)
+ goto fail;
+
+ h2c->task = t;
+ t->process = h2_timeout_task;
+ t->context = h2c;
+ t->expire = tick_add(now_ms, h2c->timeout);
+ }
+
+ h2c->wait_event.tasklet = tasklet_new();
+ if (!h2c->wait_event.tasklet)
+ goto fail;
+ h2c->wait_event.tasklet->process = h2_io_cb;
+ h2c->wait_event.tasklet->context = h2c;
+ h2c->wait_event.events = 0;
+ if (!conn_is_back(conn)) {
+ /* Connection might already be in the stopping_list if subject
+ * to h1->h2 upgrade.
+ */
+ if (!LIST_INLIST(&conn->stopping_list)) {
+ LIST_APPEND(&mux_stopping_data[tid].list,
+ &conn->stopping_list);
+ }
+ }
+
+ h2c->ddht = hpack_dht_alloc();
+ if (!h2c->ddht)
+ goto fail;
+
+ /* Initialise the context. */
+ h2c->st0 = H2_CS_PREFACE;
+ h2c->conn = conn;
+ h2c->streams_limit = h2_settings_max_concurrent_streams;
+ h2c->max_id = -1;
+ h2c->errcode = H2_ERR_NO_ERROR;
+ h2c->rcvd_c = 0;
+ h2c->rcvd_s = 0;
+ h2c->nb_streams = 0;
+ h2c->nb_sc = 0;
+ h2c->nb_reserved = 0;
+ h2c->stream_cnt = 0;
+
+ h2c->dbuf = *input;
+ h2c->dsi = -1;
+ h2c->msi = -1;
+
+ h2c->last_sid = -1;
+
+ br_init(h2c->mbuf, sizeof(h2c->mbuf) / sizeof(h2c->mbuf[0]));
+ h2c->miw = 65535; /* mux initial window size */
+ h2c->mws = 65535; /* mux window size */
+ h2c->mfs = 16384; /* initial max frame size */
+ h2c->streams_by_id = EB_ROOT;
+ LIST_INIT(&h2c->send_list);
+ LIST_INIT(&h2c->fctl_list);
+ LIST_INIT(&h2c->blocked_list);
+ LIST_INIT(&h2c->buf_wait.list);
+
+ conn->ctx = h2c;
+
+ TRACE_USER("new H2 connection", H2_EV_H2C_NEW, conn);
+
+ if (t)
+ task_queue(t);
+
+ if (h2c->flags & H2_CF_IS_BACK) {
+ /* FIXME: this is temporary, for outgoing connections we need
+ * to immediately allocate a stream until the code is modified
+ * so that the caller calls ->attach(). For now the outgoing sc
+ * is stored as conn->ctx by the caller and saved in conn_ctx.
+ */
+ struct h2s *h2s;
+
+ h2s = h2c_bck_stream_new(h2c, conn_ctx, sess);
+ if (!h2s)
+ goto fail_stream;
+ }
+
+ HA_ATOMIC_INC(&h2c->px_counters->open_conns);
+ HA_ATOMIC_INC(&h2c->px_counters->total_conns);
+
+ /* prepare to read something */
+ h2c_restart_reading(h2c, 1);
+ TRACE_LEAVE(H2_EV_H2C_NEW, conn);
+ return 0;
+ fail_stream:
+ hpack_dht_free(h2c->ddht);
+ fail:
+ task_destroy(t);
+ if (h2c->wait_event.tasklet)
+ tasklet_free(h2c->wait_event.tasklet);
+ pool_free(pool_head_h2c, h2c);
+ fail_no_h2c:
+ if (!conn_is_back(conn))
+ LIST_DEL_INIT(&conn->stopping_list);
+ conn->ctx = conn_ctx; /* restore saved ctx */
+ TRACE_DEVEL("leaving in error", H2_EV_H2C_NEW|H2_EV_H2C_END|H2_EV_H2C_ERR);
+ return -1;
+}
+
+/* returns the next allocatable outgoing stream ID for the H2 connection, or
+ * -1 if no more is allocatable.
+ */
+static inline int32_t h2c_get_next_sid(const struct h2c *h2c)
+{
+ int32_t id = (h2c->max_id + 1) | 1;
+
+ if ((id & 0x80000000U) || (h2c->last_sid >= 0 && id > h2c->last_sid))
+ id = -1;
+ return id;
+}
+
+/* returns the stream associated with id <id> or NULL if not found */
+static inline struct h2s *h2c_st_by_id(struct h2c *h2c, int id)
+{
+ struct eb32_node *node;
+
+ if (id == 0)
+ return (struct h2s *)h2_closed_stream;
+
+ if (id > h2c->max_id)
+ return (struct h2s *)h2_idle_stream;
+
+ node = eb32_lookup(&h2c->streams_by_id, id);
+ if (!node)
+ return (struct h2s *)h2_closed_stream;
+
+ return container_of(node, struct h2s, by_id);
+}
+
+/* release function. This one should be called to free all resources allocated
+ * to the mux.
+ */
+static void h2_release(struct h2c *h2c)
+{
+ struct connection *conn = h2c->conn;
+
+ TRACE_ENTER(H2_EV_H2C_END);
+
+ hpack_dht_free(h2c->ddht);
+
+ if (LIST_INLIST(&h2c->buf_wait.list))
+ LIST_DEL_INIT(&h2c->buf_wait.list);
+
+ h2_release_buf(h2c, &h2c->dbuf);
+ h2_release_mbuf(h2c);
+
+ if (h2c->task) {
+ h2c->task->context = NULL;
+ task_wakeup(h2c->task, TASK_WOKEN_OTHER);
+ h2c->task = NULL;
+ }
+ if (h2c->wait_event.tasklet)
+ tasklet_free(h2c->wait_event.tasklet);
+ if (conn && h2c->wait_event.events != 0)
+ conn->xprt->unsubscribe(conn, conn->xprt_ctx, h2c->wait_event.events,
+ &h2c->wait_event);
+
+ HA_ATOMIC_DEC(&h2c->px_counters->open_conns);
+
+ pool_free(pool_head_h2c, h2c);
+
+ if (conn) {
+ if (!conn_is_back(conn))
+ LIST_DEL_INIT(&conn->stopping_list);
+
+ conn->mux = NULL;
+ conn->ctx = NULL;
+ TRACE_DEVEL("freeing conn", H2_EV_H2C_END, conn);
+
+ conn_stop_tracking(conn);
+
+ /* there might be a GOAWAY frame still pending in the TCP
+ * stack, and if the peer continues to send (i.e. window
+ * updates etc), this can result in losing the GOAWAY. For
+ * this reason we try to drain anything received in between.
+ */
+ conn->flags |= CO_FL_WANT_DRAIN;
+
+ conn_xprt_shutw(conn);
+ conn_xprt_close(conn);
+ conn_sock_shutw(conn, !conn_is_back(conn));
+ conn_ctrl_close(conn);
+
+ if (conn->destroy_cb)
+ conn->destroy_cb(conn);
+ conn_free(conn);
+ }
+
+ TRACE_LEAVE(H2_EV_H2C_END);
+}
+
+
+/******************************************************/
+/* functions below are for the H2 protocol processing */
+/******************************************************/
+
+/* returns the stream if of stream <h2s> or 0 if <h2s> is NULL */
+static inline __maybe_unused int h2s_id(const struct h2s *h2s)
+{
+ return h2s ? h2s->id : 0;
+}
+
+/* returns the sum of the stream's own window size and the mux's initial
+ * window, which together form the stream's effective window size.
+ */
+static inline int h2s_mws(const struct h2s *h2s)
+{
+ return h2s->sws + h2s->h2c->miw;
+}
+
+/* returns true of the mux is currently busy as seen from stream <h2s> */
+static inline __maybe_unused int h2c_mux_busy(const struct h2c *h2c, const struct h2s *h2s)
+{
+ if (h2c->msi < 0)
+ return 0;
+
+ if (h2c->msi == h2s_id(h2s))
+ return 0;
+
+ return 1;
+}
+
+/* marks an error on the connection. Before settings are sent, we must not send
+ * a GOAWAY frame, and the error state will prevent h2c_send_goaway_error()
+ * from verifying this so we set H2_CF_GOAWAY_FAILED to make sure it will not
+ * even try.
+ */
+static inline __maybe_unused void h2c_error(struct h2c *h2c, enum h2_err err)
+{
+ TRACE_POINT(H2_EV_H2C_ERR, h2c->conn, 0, 0, (void *)(long)(err));
+ h2c->errcode = err;
+ if (h2c->st0 < H2_CS_SETTINGS1)
+ h2c->flags |= H2_CF_GOAWAY_FAILED;
+ h2c->st0 = H2_CS_ERROR;
+}
+
+/* marks an error on the stream. It may also update an already closed stream
+ * (e.g. to report an error after an RST was received).
+ */
+static inline __maybe_unused void h2s_error(struct h2s *h2s, enum h2_err err)
+{
+ if (h2s->id && h2s->st != H2_SS_ERROR) {
+ TRACE_POINT(H2_EV_H2S_ERR, h2s->h2c->conn, h2s, 0, (void *)(long)(err));
+ h2s->errcode = err;
+ if (h2s->st < H2_SS_ERROR)
+ h2s->st = H2_SS_ERROR;
+ se_fl_set_error(h2s->sd);
+ }
+}
+
+/* attempt to notify the data layer of recv availability */
+static void __maybe_unused h2s_notify_recv(struct h2s *h2s)
+{
+ if (h2s->subs && h2s->subs->events & SUB_RETRY_RECV) {
+ TRACE_POINT(H2_EV_STRM_WAKE, h2s->h2c->conn, h2s);
+ tasklet_wakeup(h2s->subs->tasklet);
+ h2s->subs->events &= ~SUB_RETRY_RECV;
+ if (!h2s->subs->events)
+ h2s->subs = NULL;
+ }
+}
+
+/* attempt to notify the data layer of send availability */
+static void __maybe_unused h2s_notify_send(struct h2s *h2s)
+{
+ if (h2s->subs && h2s->subs->events & SUB_RETRY_SEND) {
+ TRACE_POINT(H2_EV_STRM_WAKE, h2s->h2c->conn, h2s);
+ h2s->flags |= H2_SF_NOTIFIED;
+ tasklet_wakeup(h2s->subs->tasklet);
+ h2s->subs->events &= ~SUB_RETRY_SEND;
+ if (!h2s->subs->events)
+ h2s->subs = NULL;
+ }
+ else if (h2s->flags & (H2_SF_WANT_SHUTR | H2_SF_WANT_SHUTW)) {
+ TRACE_POINT(H2_EV_STRM_WAKE, h2s->h2c->conn, h2s);
+ tasklet_wakeup(h2s->shut_tl);
+ }
+}
+
+/* alerts the data layer, trying to wake it up by all means, following
+ * this sequence :
+ * - if the h2s' data layer is subscribed to recv, then it's woken up for recv
+ * - if its subscribed to send, then it's woken up for send
+ * - if it was subscribed to neither, its ->wake() callback is called
+ * It is safe to call this function with a closed stream which doesn't have a
+ * stream connector anymore.
+ */
+static void __maybe_unused h2s_alert(struct h2s *h2s)
+{
+ TRACE_ENTER(H2_EV_H2S_WAKE, h2s->h2c->conn, h2s);
+
+ if (h2s->subs ||
+ (h2s->flags & (H2_SF_WANT_SHUTR | H2_SF_WANT_SHUTW))) {
+ h2s_notify_recv(h2s);
+ h2s_notify_send(h2s);
+ }
+ else if (h2s_sc(h2s) && h2s_sc(h2s)->app_ops->wake != NULL) {
+ TRACE_POINT(H2_EV_STRM_WAKE, h2s->h2c->conn, h2s);
+ h2s_sc(h2s)->app_ops->wake(h2s_sc(h2s));
+ }
+
+ TRACE_LEAVE(H2_EV_H2S_WAKE, h2s->h2c->conn, h2s);
+}
+
+/* writes the 24-bit frame size <len> at address <frame> */
+static inline __maybe_unused void h2_set_frame_size(void *frame, uint32_t len)
+{
+ uint8_t *out = frame;
+
+ *out = len >> 16;
+ write_n16(out + 1, len);
+}
+
+/* reads <bytes> bytes from buffer <b> starting at relative offset <o> from the
+ * current pointer, dealing with wrapping, and stores the result in <dst>. It's
+ * the caller's responsibility to verify that there are at least <bytes> bytes
+ * available in the buffer's input prior to calling this function. The buffer
+ * is assumed not to hold any output data.
+ */
+static inline __maybe_unused void h2_get_buf_bytes(void *dst, size_t bytes,
+ const struct buffer *b, int o)
+{
+ readv_bytes(dst, bytes, b_peek(b, o), b_wrap(b) - b_peek(b, o), b_orig(b));
+}
+
+static inline __maybe_unused uint16_t h2_get_n16(const struct buffer *b, int o)
+{
+ return readv_n16(b_peek(b, o), b_wrap(b) - b_peek(b, o), b_orig(b));
+}
+
+static inline __maybe_unused uint32_t h2_get_n32(const struct buffer *b, int o)
+{
+ return readv_n32(b_peek(b, o), b_wrap(b) - b_peek(b, o), b_orig(b));
+}
+
+static inline __maybe_unused uint64_t h2_get_n64(const struct buffer *b, int o)
+{
+ return readv_n64(b_peek(b, o), b_wrap(b) - b_peek(b, o), b_orig(b));
+}
+
+
+/* Peeks an H2 frame header from offset <o> of buffer <b> into descriptor <h>.
+ * The algorithm is not obvious. It turns out that H2 headers are neither
+ * aligned nor do they use regular sizes. And to add to the trouble, the buffer
+ * may wrap so each byte read must be checked. The header is formed like this :
+ *
+ * b0 b1 b2 b3 b4 b5..b8
+ * +----------+---------+--------+----+----+----------------------+
+ * |len[23:16]|len[15:8]|len[7:0]|type|flag|sid[31:0] (big endian)|
+ * +----------+---------+--------+----+----+----------------------+
+ *
+ * Here we read a big-endian 64 bit word from h[1]. This way in a single read
+ * we get the sid properly aligned and ordered, and 16 bits of len properly
+ * ordered as well. The type and flags can be extracted using bit shifts from
+ * the word, and only one extra read is needed to fetch len[16:23].
+ * Returns zero if some bytes are missing, otherwise non-zero on success. The
+ * buffer is assumed not to contain any output data.
+ */
+static __maybe_unused int h2_peek_frame_hdr(const struct buffer *b, int o, struct h2_fh *h)
+{
+ uint64_t w;
+
+ if (b_data(b) < o + 9)
+ return 0;
+
+ w = h2_get_n64(b, o + 1);
+ h->len = *(uint8_t*)b_peek(b, o) << 16;
+ h->sid = w & 0x7FFFFFFF; /* RFC7540#4.1: R bit must be ignored */
+ h->ff = w >> 32;
+ h->ft = w >> 40;
+ h->len += w >> 48;
+ return 1;
+}
+
+/* skip the next 9 bytes corresponding to the frame header possibly parsed by
+ * h2_peek_frame_hdr() above.
+ */
+static inline __maybe_unused void h2_skip_frame_hdr(struct buffer *b)
+{
+ b_del(b, 9);
+}
+
+/* same as above, automatically advances the buffer on success */
+static inline __maybe_unused int h2_get_frame_hdr(struct buffer *b, struct h2_fh *h)
+{
+ int ret;
+
+ ret = h2_peek_frame_hdr(b, 0, h);
+ if (ret > 0)
+ h2_skip_frame_hdr(b);
+ return ret;
+}
+
+
+/* try to fragment the headers frame present at the beginning of buffer <b>,
+ * enforcing a limit of <mfs> bytes per frame. Returns 0 on failure, 1 on
+ * success. Typical causes of failure include a buffer not large enough to
+ * add extra frame headers. The existing frame size is read in the current
+ * frame. Its EH flag will be cleared if CONTINUATION frames need to be added,
+ * and its length will be adjusted. The stream ID for continuation frames will
+ * be copied from the initial frame's.
+ */
+static int h2_fragment_headers(struct buffer *b, uint32_t mfs)
+{
+ size_t remain = b->data - 9;
+ int extra_frames = (remain - 1) / mfs;
+ size_t fsize;
+ char *fptr;
+ int frame;
+
+ if (b->data <= mfs + 9)
+ return 1;
+
+ /* Too large a frame, we need to fragment it using CONTINUATION
+ * frames. We start from the end and move tails as needed.
+ */
+ if (b->data + extra_frames * 9 > b->size)
+ return 0;
+
+ for (frame = extra_frames; frame; frame--) {
+ fsize = ((remain - 1) % mfs) + 1;
+ remain -= fsize;
+
+ /* move data */
+ fptr = b->area + 9 + remain + (frame - 1) * 9;
+ memmove(fptr + 9, b->area + 9 + remain, fsize);
+ b->data += 9;
+
+ /* write new frame header */
+ h2_set_frame_size(fptr, fsize);
+ fptr[3] = H2_FT_CONTINUATION;
+ fptr[4] = (frame == extra_frames) ? H2_F_HEADERS_END_HEADERS : 0;
+ write_n32(fptr + 5, read_n32(b->area + 5));
+ }
+
+ b->area[4] &= ~H2_F_HEADERS_END_HEADERS;
+ h2_set_frame_size(b->area, remain);
+ return 1;
+}
+
+
+/* marks stream <h2s> as CLOSED and decrement the number of active streams for
+ * its connection if the stream was not yet closed. Please use this exclusively
+ * before closing a stream to ensure stream count is well maintained. Note that
+ * it does explicitly support being called with a partially initialized h2s
+ * (e.g. sd==NULL).
+ */
+static inline void h2s_close(struct h2s *h2s)
+{
+ if (h2s->st != H2_SS_CLOSED) {
+ TRACE_ENTER(H2_EV_H2S_END, h2s->h2c->conn, h2s);
+ h2s->h2c->nb_streams--;
+ if (!h2s->id)
+ h2s->h2c->nb_reserved--;
+ if (h2s->sd && h2s_sc(h2s)) {
+ if (!se_fl_test(h2s->sd, SE_FL_EOS) && !b_data(&h2s->rxbuf))
+ h2s_notify_recv(h2s);
+ }
+ HA_ATOMIC_DEC(&h2s->h2c->px_counters->open_streams);
+
+ TRACE_LEAVE(H2_EV_H2S_END, h2s->h2c->conn, h2s);
+ }
+ h2s->st = H2_SS_CLOSED;
+}
+
+/* detaches an H2 stream from its H2C and releases it to the H2S pool. */
+/* h2s_destroy should only ever be called by the thread that owns the stream,
+ * that means that a tasklet should be used if we want to destroy the h2s
+ * from another thread
+ */
+static void h2s_destroy(struct h2s *h2s)
+{
+ struct connection *conn = h2s->h2c->conn;
+
+ TRACE_ENTER(H2_EV_H2S_END, conn, h2s);
+
+ h2s_close(h2s);
+ eb32_delete(&h2s->by_id);
+ if (b_size(&h2s->rxbuf)) {
+ b_free(&h2s->rxbuf);
+ offer_buffers(NULL, 1);
+ }
+
+ if (h2s->subs)
+ h2s->subs->events = 0;
+
+ /* There's no need to explicitly call unsubscribe here, the only
+ * reference left would be in the h2c send_list/fctl_list, and if
+ * we're in it, we're getting out anyway
+ */
+ LIST_DEL_INIT(&h2s->list);
+
+ /* ditto, calling tasklet_free() here should be ok */
+ tasklet_free(h2s->shut_tl);
+ BUG_ON(h2s->sd && !se_fl_test(h2s->sd, SE_FL_ORPHAN));
+ sedesc_free(h2s->sd);
+ pool_free(pool_head_h2s, h2s);
+
+ TRACE_LEAVE(H2_EV_H2S_END, conn);
+}
+
+/* allocates a new stream <id> for connection <h2c> and adds it into h2c's
+ * stream tree. In case of error, nothing is added and NULL is returned. The
+ * causes of errors can be any failed memory allocation. The caller is
+ * responsible for checking if the connection may support an extra stream
+ * prior to calling this function.
+ */
+static struct h2s *h2s_new(struct h2c *h2c, int id)
+{
+ struct h2s *h2s;
+
+ TRACE_ENTER(H2_EV_H2S_NEW, h2c->conn);
+
+ h2s = pool_alloc(pool_head_h2s);
+ if (!h2s)
+ goto out;
+
+ h2s->shut_tl = tasklet_new();
+ if (!h2s->shut_tl) {
+ pool_free(pool_head_h2s, h2s);
+ goto out;
+ }
+ h2s->subs = NULL;
+ h2s->shut_tl->process = h2_deferred_shut;
+ h2s->shut_tl->context = h2s;
+ LIST_INIT(&h2s->list);
+ h2s->h2c = h2c;
+ h2s->sd = NULL;
+ h2s->sws = 0;
+ h2s->flags = H2_SF_NONE;
+ h2s->errcode = H2_ERR_NO_ERROR;
+ h2s->st = H2_SS_IDLE;
+ h2s->status = 0;
+ h2s->body_len = 0;
+ h2s->rxbuf = BUF_NULL;
+ memset(h2s->upgrade_protocol, 0, sizeof(h2s->upgrade_protocol));
+
+ h2s->by_id.key = h2s->id = id;
+ if (id > 0)
+ h2c->max_id = id;
+ else
+ h2c->nb_reserved++;
+
+ eb32_insert(&h2c->streams_by_id, &h2s->by_id);
+ h2c->nb_streams++;
+ h2c->stream_cnt++;
+
+ HA_ATOMIC_INC(&h2c->px_counters->open_streams);
+ HA_ATOMIC_INC(&h2c->px_counters->total_streams);
+
+ TRACE_LEAVE(H2_EV_H2S_NEW, h2c->conn, h2s);
+ return h2s;
+ out:
+ TRACE_DEVEL("leaving in error", H2_EV_H2S_ERR|H2_EV_H2S_END, h2c->conn);
+ return NULL;
+}
+
+/* creates a new stream <id> on the h2c connection and returns it, or NULL in
+ * case of memory allocation error. <input> is used as input buffer for the new
+ * stream. On success, it is transferred to the stream and the mux is no longer
+ * responsible of it. On error, <input> is unchanged, thus the mux must still
+ * take care of it.
+ */
+static struct h2s *h2c_frt_stream_new(struct h2c *h2c, int id, struct buffer *input, uint32_t flags)
+{
+ struct session *sess = h2c->conn->owner;
+ struct h2s *h2s;
+
+ TRACE_ENTER(H2_EV_H2S_NEW, h2c->conn);
+
+ if (h2c->nb_streams >= h2_settings_max_concurrent_streams) {
+ TRACE_ERROR("HEADERS frame causing MAX_CONCURRENT_STREAMS to be exceeded", H2_EV_H2S_NEW|H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn);
+ goto out;
+ }
+
+ h2s = h2s_new(h2c, id);
+ if (!h2s)
+ goto out_alloc;
+
+ h2s->sd = sedesc_new();
+ if (!h2s->sd)
+ goto out_close;
+ h2s->sd->se = h2s;
+ h2s->sd->conn = h2c->conn;
+ se_fl_set(h2s->sd, SE_FL_T_MUX | SE_FL_ORPHAN | SE_FL_NOT_FIRST);
+
+ /* FIXME wrong analogy between ext-connect and websocket, this need to
+ * be refine.
+ */
+ if (flags & H2_SF_EXT_CONNECT_RCVD)
+ se_fl_set(h2s->sd, SE_FL_WEBSOCKET);
+
+ /* The stream will record the request's accept date (which is either the
+ * end of the connection's or the date immediately after the previous
+ * request) and the idle time, which is the delay since the previous
+ * request. We can set the value now, it will be copied by stream_new().
+ */
+ sess->t_idle = tv_ms_elapsed(&sess->tv_accept, &now) - sess->t_handshake;
+
+ if (!sc_new_from_endp(h2s->sd, sess, input))
+ goto out_close;
+
+ h2c->nb_sc++;
+
+ /* We want the accept date presented to the next stream to be the one
+ * we have now, the handshake time to be null (since the next stream
+ * is not delayed by a handshake), and the idle time to count since
+ * right now.
+ */
+ sess->accept_date = date;
+ sess->tv_accept = now;
+ sess->t_handshake = 0;
+ sess->t_idle = 0;
+
+ /* OK done, the stream lives its own life now */
+ if (h2_frt_has_too_many_sc(h2c))
+ h2c->flags |= H2_CF_DEM_TOOMANY;
+ TRACE_LEAVE(H2_EV_H2S_NEW, h2c->conn);
+ return h2s;
+
+ out_close:
+ h2s_destroy(h2s);
+ out_alloc:
+ TRACE_ERROR("Failed to allocate a new stream", H2_EV_H2S_NEW|H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn);
+ out:
+ sess_log(sess);
+ TRACE_LEAVE(H2_EV_H2S_NEW|H2_EV_H2S_ERR|H2_EV_H2S_END, h2c->conn);
+ return NULL;
+}
+
+/* allocates a new stream associated to stream connector <sc> on the h2c
+ * connection and returns it, or NULL in case of memory allocation error or if
+ * the highest possible stream ID was reached.
+ */
+static struct h2s *h2c_bck_stream_new(struct h2c *h2c, struct stconn *sc, struct session *sess)
+{
+ struct h2s *h2s = NULL;
+
+ TRACE_ENTER(H2_EV_H2S_NEW, h2c->conn);
+
+ if (h2c->nb_streams >= h2c->streams_limit) {
+ TRACE_ERROR("Aborting stream since negotiated limit is too low", H2_EV_H2S_NEW, h2c->conn);
+ goto out;
+ }
+
+ if (h2_streams_left(h2c) < 1) {
+ TRACE_ERROR("Aborting stream since no more streams left", H2_EV_H2S_NEW, h2c->conn);
+ goto out;
+ }
+
+ /* Defer choosing the ID until we send the first message to create the stream */
+ h2s = h2s_new(h2c, 0);
+ if (!h2s) {
+ TRACE_ERROR("Failed to allocate a new stream", H2_EV_H2S_NEW, h2c->conn);
+ goto out;
+ }
+
+ if (sc_attach_mux(sc, h2s, h2c->conn) < 0) {
+ TRACE_ERROR("Failed to allocate a new stream", H2_EV_H2S_NEW, h2c->conn);
+ h2s_destroy(h2s);
+ h2s = NULL;
+ goto out;
+ }
+ h2s->sd = sc->sedesc;
+ h2s->sess = sess;
+ h2c->nb_sc++;
+
+ out:
+ if (likely(h2s))
+ TRACE_LEAVE(H2_EV_H2S_NEW, h2c->conn, h2s);
+ else
+ TRACE_LEAVE(H2_EV_H2S_NEW|H2_EV_H2S_ERR|H2_EV_H2S_END, h2c->conn, h2s);
+ return h2s;
+}
+
+/* try to send a settings frame on the connection. Returns > 0 on success, 0 if
+ * it couldn't do anything. It may return an error in h2c. See RFC7540#11.3 for
+ * the various settings codes.
+ */
+static int h2c_send_settings(struct h2c *h2c)
+{
+ struct buffer *res;
+ char buf_data[100]; // enough for 15 settings
+ struct buffer buf;
+ int mfs;
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_SETTINGS, h2c->conn);
+
+ if (h2c_mux_busy(h2c, NULL)) {
+ h2c->flags |= H2_CF_DEM_MBUSY;
+ goto out;
+ }
+
+ chunk_init(&buf, buf_data, sizeof(buf_data));
+ chunk_memcpy(&buf,
+ "\x00\x00\x00" /* length : 0 for now */
+ "\x04\x00" /* type : 4 (settings), flags : 0 */
+ "\x00\x00\x00\x00", /* stream ID : 0 */
+ 9);
+
+ if (h2c->flags & H2_CF_IS_BACK) {
+ /* send settings_enable_push=0 */
+ chunk_memcat(&buf, "\x00\x02\x00\x00\x00\x00", 6);
+ }
+
+ /* rfc 8441 #3 SETTINGS_ENABLE_CONNECT_PROTOCOL=1,
+ * sent automatically unless disabled in the global config */
+ if (!(global.tune.options & GTUNE_DISABLE_H2_WEBSOCKET))
+ chunk_memcat(&buf, "\x00\x08\x00\x00\x00\x01", 6);
+
+ if (h2_settings_header_table_size != 4096) {
+ char str[6] = "\x00\x01"; /* header_table_size */
+
+ write_n32(str + 2, h2_settings_header_table_size);
+ chunk_memcat(&buf, str, 6);
+ }
+
+ if (h2_settings_initial_window_size != 65535) {
+ char str[6] = "\x00\x04"; /* initial_window_size */
+
+ write_n32(str + 2, h2_settings_initial_window_size);
+ chunk_memcat(&buf, str, 6);
+ }
+
+ if (h2_settings_max_concurrent_streams != 0) {
+ char str[6] = "\x00\x03"; /* max_concurrent_streams */
+
+ /* Note: 0 means "unlimited" for haproxy's config but not for
+ * the protocol, so never send this value!
+ */
+ write_n32(str + 2, h2_settings_max_concurrent_streams);
+ chunk_memcat(&buf, str, 6);
+ }
+
+ mfs = h2_settings_max_frame_size;
+ if (mfs > global.tune.bufsize)
+ mfs = global.tune.bufsize;
+
+ if (!mfs)
+ mfs = global.tune.bufsize;
+
+ if (mfs != 16384) {
+ char str[6] = "\x00\x05"; /* max_frame_size */
+
+ /* note: similarly we could also emit MAX_HEADER_LIST_SIZE to
+ * match bufsize - rewrite size, but at the moment it seems
+ * that clients don't take care of it.
+ */
+ write_n32(str + 2, mfs);
+ chunk_memcat(&buf, str, 6);
+ }
+
+ h2_set_frame_size(buf.area, buf.data - 9);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(buf.area, buf.data));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ }
+ }
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_SETTINGS, h2c->conn);
+ return ret;
+}
+
+/* Try to receive a connection preface, then upon success try to send our
+ * preface which is a SETTINGS frame. Returns > 0 on success or zero on
+ * missing data. It may return an error in h2c.
+ */
+static int h2c_frt_recv_preface(struct h2c *h2c)
+{
+ int ret1;
+ int ret2;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_PREFACE, h2c->conn);
+
+ ret1 = b_isteq(&h2c->dbuf, 0, b_data(&h2c->dbuf), ist(H2_CONN_PREFACE));
+
+ if (unlikely(ret1 <= 0)) {
+ if (!ret1)
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ if (ret1 < 0 || conn_xprt_read0_pending(h2c->conn)) {
+ TRACE_ERROR("I/O error or short read", H2_EV_RX_FRAME|H2_EV_RX_PREFACE, h2c->conn);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ if (b_data(&h2c->dbuf) ||
+ !(((const struct session *)h2c->conn->owner)->fe->options & PR_O_IGNORE_PRB))
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ }
+ ret2 = 0;
+ goto out;
+ }
+
+ ret2 = h2c_send_settings(h2c);
+ if (ret2 > 0)
+ b_del(&h2c->dbuf, ret1);
+ out:
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_PREFACE, h2c->conn);
+ return ret2;
+}
+
+/* Try to send a connection preface, then upon success try to send our
+ * preface which is a SETTINGS frame. Returns > 0 on success or zero on
+ * missing data. It may return an error in h2c.
+ */
+static int h2c_bck_send_preface(struct h2c *h2c)
+{
+ struct buffer *res;
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_PREFACE, h2c->conn);
+
+ if (h2c_mux_busy(h2c, NULL)) {
+ h2c->flags |= H2_CF_DEM_MBUSY;
+ goto out;
+ }
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+
+ if (!b_data(res)) {
+ /* preface not yet sent */
+ ret = b_istput(res, ist(H2_CONN_PREFACE));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ goto out;
+ }
+ }
+ }
+ ret = h2c_send_settings(h2c);
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_PREFACE, h2c->conn);
+ return ret;
+}
+
+/* try to send a GOAWAY frame on the connection to report an error or a graceful
+ * shutdown, with h2c->errcode as the error code. Returns > 0 on success or zero
+ * if nothing was done. It uses h2c->last_sid as the advertised ID, or copies it
+ * from h2c->max_id if it's not set yet (<0). In case of lack of room to write
+ * the message, it subscribes the requester (either <h2s> or <h2c>) to future
+ * notifications. It sets H2_CF_GOAWAY_SENT on success, and H2_CF_GOAWAY_FAILED
+ * on unrecoverable failure. It will not attempt to send one again in this last
+ * case, nor will it send one if settings were not sent (e.g. still waiting for
+ * a preface) so that it is safe to use h2c_error() to report such errors.
+ */
+static int h2c_send_goaway_error(struct h2c *h2c, struct h2s *h2s)
+{
+ struct buffer *res;
+ char str[17];
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_GOAWAY, h2c->conn);
+
+ if ((h2c->flags & H2_CF_GOAWAY_FAILED) || h2c->st0 < H2_CS_SETTINGS1) {
+ ret = 1; // claim that it worked
+ goto out;
+ }
+
+ if (h2c_mux_busy(h2c, h2s)) {
+ if (h2s)
+ h2s->flags |= H2_SF_BLK_MBUSY;
+ else
+ h2c->flags |= H2_CF_DEM_MBUSY;
+ goto out;
+ }
+
+ /* len: 8, type: 7, flags: none, sid: 0 */
+ memcpy(str, "\x00\x00\x08\x07\x00\x00\x00\x00\x00", 9);
+
+ if (h2c->last_sid < 0)
+ h2c->last_sid = h2c->max_id;
+
+ write_n32(str + 9, h2c->last_sid);
+ write_n32(str + 13, h2c->errcode);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ if (h2s)
+ h2s->flags |= H2_SF_BLK_MROOM;
+ else
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(str, 17));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ if (h2s)
+ h2s->flags |= H2_SF_BLK_MROOM;
+ else
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+ else {
+ /* we cannot report this error using GOAWAY, so we mark
+ * it and claim a success.
+ */
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ h2c->flags |= H2_CF_GOAWAY_FAILED;
+ ret = 1;
+ goto out;
+ }
+ }
+ h2c->flags |= H2_CF_GOAWAY_SENT;
+
+ /* some codes are not for real errors, just attempts to close cleanly */
+ switch (h2c->errcode) {
+ case H2_ERR_NO_ERROR:
+ case H2_ERR_ENHANCE_YOUR_CALM:
+ case H2_ERR_REFUSED_STREAM:
+ case H2_ERR_CANCEL:
+ break;
+ default:
+ HA_ATOMIC_INC(&h2c->px_counters->goaway_resp);
+ }
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_GOAWAY, h2c->conn);
+ return ret;
+}
+
+/* Try to send an RST_STREAM frame on the connection for the indicated stream
+ * during mux operations. This stream must be valid and cannot be closed
+ * already. h2s->id will be used for the stream ID and h2s->errcode will be
+ * used for the error code. h2s->st will be update to H2_SS_CLOSED if it was
+ * not yet.
+ *
+ * Returns > 0 on success or zero if nothing was done. In case of lack of room
+ * to write the message, it subscribes the stream to future notifications.
+ */
+static int h2s_send_rst_stream(struct h2c *h2c, struct h2s *h2s)
+{
+ struct buffer *res;
+ char str[13];
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_RST, h2c->conn, h2s);
+
+ if (!h2s || h2s->st == H2_SS_CLOSED) {
+ ret = 1;
+ goto out;
+ }
+
+ /* RFC7540#5.4.2: To avoid looping, an endpoint MUST NOT send a
+ * RST_STREAM in response to a RST_STREAM frame.
+ */
+ if (h2c->dsi == h2s->id && h2c->dft == H2_FT_RST_STREAM) {
+ ret = 1;
+ goto ignore;
+ }
+
+ if (h2c_mux_busy(h2c, h2s)) {
+ h2s->flags |= H2_SF_BLK_MBUSY;
+ goto out;
+ }
+
+ /* len: 4, type: 3, flags: none */
+ memcpy(str, "\x00\x00\x04\x03\x00", 5);
+ write_n32(str + 5, h2s->id);
+ write_n32(str + 9, h2s->errcode);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(str, 13));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ goto out;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ ignore:
+ h2s->flags |= H2_SF_RST_SENT;
+ h2s_close(h2s);
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_RST, h2c->conn, h2s);
+ return ret;
+}
+
+/* Try to send an RST_STREAM frame on the connection for the stream being
+ * demuxed using h2c->dsi for the stream ID. It will use h2s->errcode as the
+ * error code, even if the stream is one of the dummy ones, and will update
+ * h2s->st to H2_SS_CLOSED if it was not yet.
+ *
+ * Returns > 0 on success or zero if nothing was done. In case of lack of room
+ * to write the message, it blocks the demuxer and subscribes it to future
+ * notifications. It's worth mentioning that an RST may even be sent for a
+ * closed stream.
+ */
+static int h2c_send_rst_stream(struct h2c *h2c, struct h2s *h2s)
+{
+ struct buffer *res;
+ char str[13];
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_RST, h2c->conn, h2s);
+
+ /* RFC7540#5.4.2: To avoid looping, an endpoint MUST NOT send a
+ * RST_STREAM in response to a RST_STREAM frame.
+ */
+ if (h2c->dft == H2_FT_RST_STREAM) {
+ ret = 1;
+ goto ignore;
+ }
+
+ if (h2c_mux_busy(h2c, h2s)) {
+ h2c->flags |= H2_CF_DEM_MBUSY;
+ goto out;
+ }
+
+ /* len: 4, type: 3, flags: none */
+ memcpy(str, "\x00\x00\x04\x03\x00", 5);
+
+ write_n32(str + 5, h2c->dsi);
+ write_n32(str + 9, h2s->errcode);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(str, 13));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ ignore:
+ if (h2s->id) {
+ h2s->flags |= H2_SF_RST_SENT;
+ h2s_close(h2s);
+ }
+
+ out:
+ HA_ATOMIC_INC(&h2c->px_counters->rst_stream_resp);
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_RST, h2c->conn, h2s);
+ return ret;
+}
+
+/* try to send an empty DATA frame with the ES flag set to notify about the
+ * end of stream and match a shutdown(write). If an ES was already sent as
+ * indicated by HLOC/ERROR/RESET/CLOSED states, nothing is done. Returns > 0
+ * on success or zero if nothing was done. In case of lack of room to write the
+ * message, it subscribes the requesting stream to future notifications.
+ */
+static int h2_send_empty_data_es(struct h2s *h2s)
+{
+ struct h2c *h2c = h2s->h2c;
+ struct buffer *res;
+ char str[9];
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_DATA|H2_EV_TX_EOI, h2c->conn, h2s);
+
+ if (h2s->st == H2_SS_HLOC || h2s->st == H2_SS_ERROR || h2s->st == H2_SS_CLOSED) {
+ ret = 1;
+ goto out;
+ }
+
+ if (h2c_mux_busy(h2c, h2s)) {
+ h2s->flags |= H2_SF_BLK_MBUSY;
+ goto out;
+ }
+
+ /* len: 0x000000, type: 0(DATA), flags: ES=1 */
+ memcpy(str, "\x00\x00\x00\x00\x01", 5);
+ write_n32(str + 5, h2s->id);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(str, 9));
+ if (likely(ret > 0)) {
+ h2s->flags |= H2_SF_ES_SENT;
+ }
+ else if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ }
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_DATA|H2_EV_TX_EOI, h2c->conn, h2s);
+ return ret;
+}
+
+/* wake a specific stream and assign its stream connector some SE_FL_* flags
+ * among SE_FL_ERR_PENDING and SE_FL_ERROR if needed. The stream's state
+ * is automatically updated accordingly. If the stream is orphaned, it is
+ * destroyed.
+ */
+static void h2s_wake_one_stream(struct h2s *h2s)
+{
+ struct h2c *h2c = h2s->h2c;
+
+ TRACE_ENTER(H2_EV_H2S_WAKE, h2c->conn, h2s);
+
+ if (!h2s_sc(h2s)) {
+ /* this stream was already orphaned */
+ h2s_destroy(h2s);
+ TRACE_DEVEL("leaving with no h2s", H2_EV_H2S_WAKE, h2c->conn);
+ return;
+ }
+
+ if (h2c_read0_pending(h2s->h2c)) {
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HREM;
+ else if (h2s->st == H2_SS_HLOC)
+ h2s_close(h2s);
+ }
+
+ if ((h2s->h2c->st0 >= H2_CS_ERROR || h2s->h2c->conn->flags & CO_FL_ERROR) ||
+ (h2s->h2c->last_sid > 0 && (!h2s->id || h2s->id > h2s->h2c->last_sid))) {
+ se_fl_set(h2s->sd, SE_FL_ERR_PENDING);
+ if (se_fl_test(h2s->sd, SE_FL_EOS))
+ se_fl_set(h2s->sd, SE_FL_ERROR);
+
+ if (h2s->st < H2_SS_ERROR)
+ h2s->st = H2_SS_ERROR;
+ }
+
+ h2s_alert(h2s);
+ TRACE_LEAVE(H2_EV_H2S_WAKE, h2c->conn);
+}
+
+/* wake the streams attached to the connection, whose id is greater than <last>
+ * or unassigned.
+ */
+static void h2_wake_some_streams(struct h2c *h2c, int last)
+{
+ struct eb32_node *node;
+ struct h2s *h2s;
+
+ TRACE_ENTER(H2_EV_H2S_WAKE, h2c->conn);
+
+ /* Wake all streams with ID > last */
+ node = eb32_lookup_ge(&h2c->streams_by_id, last + 1);
+ while (node) {
+ h2s = container_of(node, struct h2s, by_id);
+ node = eb32_next(node);
+ h2s_wake_one_stream(h2s);
+ }
+
+ /* Wake all streams with unassigned ID (ID == 0) */
+ node = eb32_lookup(&h2c->streams_by_id, 0);
+ while (node) {
+ h2s = container_of(node, struct h2s, by_id);
+ if (h2s->id > 0)
+ break;
+ node = eb32_next(node);
+ h2s_wake_one_stream(h2s);
+ }
+
+ TRACE_LEAVE(H2_EV_H2S_WAKE, h2c->conn);
+}
+
+/* Wake up all blocked streams whose window size has become positive after the
+ * mux's initial window was adjusted. This should be done after having processed
+ * SETTINGS frames which have updated the mux's initial window size.
+ */
+static void h2c_unblock_sfctl(struct h2c *h2c)
+{
+ struct h2s *h2s;
+ struct eb32_node *node;
+
+ TRACE_ENTER(H2_EV_H2C_WAKE, h2c->conn);
+
+ node = eb32_first(&h2c->streams_by_id);
+ while (node) {
+ h2s = container_of(node, struct h2s, by_id);
+ if (h2s->flags & H2_SF_BLK_SFCTL && h2s_mws(h2s) > 0) {
+ h2s->flags &= ~H2_SF_BLK_SFCTL;
+ LIST_DEL_INIT(&h2s->list);
+ if ((h2s->subs && h2s->subs->events & SUB_RETRY_SEND) ||
+ h2s->flags & (H2_SF_WANT_SHUTR|H2_SF_WANT_SHUTW))
+ LIST_APPEND(&h2c->send_list, &h2s->list);
+ }
+ node = eb32_next(node);
+ }
+
+ TRACE_LEAVE(H2_EV_H2C_WAKE, h2c->conn);
+}
+
+/* processes a SETTINGS frame whose payload is <payload> for <plen> bytes, and
+ * ACKs it if needed. Returns > 0 on success or zero on missing data. It may
+ * return an error in h2c. The caller must have already verified frame length
+ * and stream ID validity. Described in RFC7540#6.5.
+ */
+static int h2c_handle_settings(struct h2c *h2c)
+{
+ unsigned int offset;
+ int error;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_SETTINGS, h2c->conn);
+
+ if (h2c->dff & H2_F_SETTINGS_ACK) {
+ if (h2c->dfl) {
+ error = H2_ERR_FRAME_SIZE_ERROR;
+ goto fail;
+ }
+ goto done;
+ }
+
+ /* process full frame only */
+ if (b_data(&h2c->dbuf) < h2c->dfl) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto out0;
+ }
+
+ /* parse the frame */
+ for (offset = 0; offset < h2c->dfl; offset += 6) {
+ uint16_t type = h2_get_n16(&h2c->dbuf, offset);
+ int32_t arg = h2_get_n32(&h2c->dbuf, offset + 2);
+
+ switch (type) {
+ case H2_SETTINGS_INITIAL_WINDOW_SIZE:
+ /* we need to update all existing streams with the
+ * difference from the previous iws.
+ */
+ if (arg < 0) { // RFC7540#6.5.2
+ error = H2_ERR_FLOW_CONTROL_ERROR;
+ goto fail;
+ }
+ h2c->miw = arg;
+ break;
+ case H2_SETTINGS_MAX_FRAME_SIZE:
+ if (arg < 16384 || arg > 16777215) { // RFC7540#6.5.2
+ TRACE_ERROR("MAX_FRAME_SIZE out of range", H2_EV_RX_FRAME|H2_EV_RX_SETTINGS, h2c->conn);
+ error = H2_ERR_PROTOCOL_ERROR;
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto fail;
+ }
+ h2c->mfs = arg;
+ break;
+ case H2_SETTINGS_HEADER_TABLE_SIZE:
+ h2c->flags |= H2_CF_SHTS_UPDATED;
+ break;
+ case H2_SETTINGS_ENABLE_PUSH:
+ if (arg < 0 || arg > 1) { // RFC7540#6.5.2
+ TRACE_ERROR("ENABLE_PUSH out of range", H2_EV_RX_FRAME|H2_EV_RX_SETTINGS, h2c->conn);
+ error = H2_ERR_PROTOCOL_ERROR;
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto fail;
+ }
+ break;
+ case H2_SETTINGS_MAX_CONCURRENT_STREAMS:
+ if (h2c->flags & H2_CF_IS_BACK) {
+ /* the limit is only for the backend; for the frontend it is our limit */
+ if ((unsigned int)arg > h2_settings_max_concurrent_streams)
+ arg = h2_settings_max_concurrent_streams;
+ h2c->streams_limit = arg;
+ }
+ break;
+ case H2_SETTINGS_ENABLE_CONNECT_PROTOCOL:
+ if (arg == 1)
+ h2c->flags |= H2_CF_RCVD_RFC8441;
+ break;
+ }
+ }
+
+ /* need to ACK this frame now */
+ h2c->st0 = H2_CS_FRAME_A;
+ done:
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_SETTINGS, h2c->conn);
+ return 1;
+ fail:
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ h2c_error(h2c, error);
+ out0:
+ TRACE_DEVEL("leaving with missing data or error", H2_EV_RX_FRAME|H2_EV_RX_SETTINGS, h2c->conn);
+ return 0;
+}
+
+/* try to send an ACK for a settings frame on the connection. Returns > 0 on
+ * success or one of the h2_status values.
+ */
+static int h2c_ack_settings(struct h2c *h2c)
+{
+ struct buffer *res;
+ char str[9];
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_SETTINGS, h2c->conn);
+
+ if (h2c_mux_busy(h2c, NULL)) {
+ h2c->flags |= H2_CF_DEM_MBUSY;
+ goto out;
+ }
+
+ memcpy(str,
+ "\x00\x00\x00" /* length : 0 (no data) */
+ "\x04" "\x01" /* type : 4, flags : ACK */
+ "\x00\x00\x00\x00" /* stream ID */, 9);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(str, 9));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ }
+ }
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_SETTINGS, h2c->conn);
+ return ret;
+}
+
+/* processes a PING frame and schedules an ACK if needed. The caller must pass
+ * the pointer to the payload in <payload>. Returns > 0 on success or zero on
+ * missing data. The caller must have already verified frame length
+ * and stream ID validity.
+ */
+static int h2c_handle_ping(struct h2c *h2c)
+{
+ /* schedule a response */
+ if (!(h2c->dff & H2_F_PING_ACK))
+ h2c->st0 = H2_CS_FRAME_A;
+ return 1;
+}
+
+/* Try to send a window update for stream id <sid> and value <increment>.
+ * Returns > 0 on success or zero on missing room or failure. It may return an
+ * error in h2c.
+ */
+static int h2c_send_window_update(struct h2c *h2c, int sid, uint32_t increment)
+{
+ struct buffer *res;
+ char str[13];
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+
+ if (h2c_mux_busy(h2c, NULL)) {
+ h2c->flags |= H2_CF_DEM_MBUSY;
+ goto out;
+ }
+
+ /* length: 4, type: 8, flags: none */
+ memcpy(str, "\x00\x00\x04\x08\x00", 5);
+ write_n32(str + 5, sid);
+ write_n32(str + 9, increment);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(str, 13));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ }
+ }
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+ return ret;
+}
+
+/* try to send pending window update for the connection. It's safe to call it
+ * with no pending updates. Returns > 0 on success or zero on missing room or
+ * failure. It may return an error in h2c.
+ */
+static int h2c_send_conn_wu(struct h2c *h2c)
+{
+ int ret = 1;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+
+ if (h2c->rcvd_c <= 0)
+ goto out;
+
+ if (!(h2c->flags & H2_CF_WINDOW_OPENED)) {
+ /* increase the advertised connection window to 2G on
+ * first update.
+ */
+ h2c->flags |= H2_CF_WINDOW_OPENED;
+ h2c->rcvd_c += H2_INITIAL_WINDOW_INCREMENT;
+ }
+
+ /* send WU for the connection */
+ ret = h2c_send_window_update(h2c, 0, h2c->rcvd_c);
+ if (ret > 0)
+ h2c->rcvd_c = 0;
+
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+ return ret;
+}
+
+/* try to send pending window update for the current dmux stream. It's safe to
+ * call it with no pending updates. Returns > 0 on success or zero on missing
+ * room or failure. It may return an error in h2c.
+ */
+static int h2c_send_strm_wu(struct h2c *h2c)
+{
+ int ret = 1;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+
+ if (h2c->rcvd_s <= 0)
+ goto out;
+
+ /* send WU for the stream */
+ ret = h2c_send_window_update(h2c, h2c->dsi, h2c->rcvd_s);
+ if (ret > 0)
+ h2c->rcvd_s = 0;
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+ return ret;
+}
+
+/* try to send an ACK for a ping frame on the connection. Returns > 0 on
+ * success, 0 on missing data or one of the h2_status values.
+ */
+static int h2c_ack_ping(struct h2c *h2c)
+{
+ struct buffer *res;
+ char str[17];
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_PING, h2c->conn);
+
+ if (b_data(&h2c->dbuf) < 8)
+ goto out;
+
+ if (h2c_mux_busy(h2c, NULL)) {
+ h2c->flags |= H2_CF_DEM_MBUSY;
+ goto out;
+ }
+
+ memcpy(str,
+ "\x00\x00\x08" /* length : 8 (same payload) */
+ "\x06" "\x01" /* type : 6, flags : ACK */
+ "\x00\x00\x00\x00" /* stream ID */, 9);
+
+ /* copy the original payload */
+ h2_get_buf_bytes(str + 9, 8, &h2c->dbuf, 0);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(str, 17));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ }
+ }
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_PING, h2c->conn);
+ return ret;
+}
+
+/* processes a WINDOW_UPDATE frame whose payload is <payload> for <plen> bytes.
+ * Returns > 0 on success or zero on missing data. It may return an error in
+ * h2c or h2s. The caller must have already verified frame length and stream ID
+ * validity. Described in RFC7540#6.9.
+ */
+static int h2c_handle_window_update(struct h2c *h2c, struct h2s *h2s)
+{
+ int32_t inc;
+ int error;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn);
+
+ /* process full frame only */
+ if (b_data(&h2c->dbuf) < h2c->dfl) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto out0;
+ }
+
+ inc = h2_get_n32(&h2c->dbuf, 0);
+
+ if (h2c->dsi != 0) {
+ /* stream window update */
+
+ /* it's not an error to receive WU on a closed stream */
+ if (h2s->st == H2_SS_CLOSED)
+ goto done;
+
+ if (!inc) {
+ TRACE_ERROR("stream WINDOW_UPDATE inc=0", H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn, h2s);
+ error = H2_ERR_PROTOCOL_ERROR;
+ HA_ATOMIC_INC(&h2c->px_counters->strm_proto_err);
+ goto strm_err;
+ }
+
+ if (h2s_mws(h2s) >= 0 && h2s_mws(h2s) + inc < 0) {
+ TRACE_ERROR("stream WINDOW_UPDATE inc<0", H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn, h2s);
+ error = H2_ERR_FLOW_CONTROL_ERROR;
+ HA_ATOMIC_INC(&h2c->px_counters->strm_proto_err);
+ goto strm_err;
+ }
+
+ h2s->sws += inc;
+ if (h2s_mws(h2s) > 0 && (h2s->flags & H2_SF_BLK_SFCTL)) {
+ h2s->flags &= ~H2_SF_BLK_SFCTL;
+ LIST_DEL_INIT(&h2s->list);
+ if ((h2s->subs && h2s->subs->events & SUB_RETRY_SEND) ||
+ h2s->flags & (H2_SF_WANT_SHUTR|H2_SF_WANT_SHUTW))
+ LIST_APPEND(&h2c->send_list, &h2s->list);
+ }
+ }
+ else {
+ /* connection window update */
+ if (!inc) {
+ TRACE_ERROR("conn WINDOW_UPDATE inc=0", H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn);
+ error = H2_ERR_PROTOCOL_ERROR;
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto conn_err;
+ }
+
+ if (h2c->mws >= 0 && h2c->mws + inc < 0) {
+ error = H2_ERR_FLOW_CONTROL_ERROR;
+ goto conn_err;
+ }
+
+ h2c->mws += inc;
+ }
+
+ done:
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn);
+ return 1;
+
+ conn_err:
+ h2c_error(h2c, error);
+ out0:
+ TRACE_DEVEL("leaving on missing data or error", H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn);
+ return 0;
+
+ strm_err:
+ h2s_error(h2s, error);
+ h2c->st0 = H2_CS_FRAME_E;
+ TRACE_DEVEL("leaving on stream error", H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn);
+ return 0;
+}
+
+/* processes a GOAWAY frame, and signals all streams whose ID is greater than
+ * the last ID. Returns > 0 on success or zero on missing data. The caller must
+ * have already verified frame length and stream ID validity. Described in
+ * RFC7540#6.8.
+ */
+static int h2c_handle_goaway(struct h2c *h2c)
+{
+ int last;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_GOAWAY, h2c->conn);
+ /* process full frame only */
+ if (b_data(&h2c->dbuf) < h2c->dfl) {
+ TRACE_DEVEL("leaving on missing data", H2_EV_RX_FRAME|H2_EV_RX_GOAWAY, h2c->conn);
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ return 0;
+ }
+
+ last = h2_get_n32(&h2c->dbuf, 0);
+ h2c->errcode = h2_get_n32(&h2c->dbuf, 4);
+ if (h2c->last_sid < 0)
+ h2c->last_sid = last;
+ h2_wake_some_streams(h2c, last);
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_GOAWAY, h2c->conn);
+ return 1;
+}
+
+/* processes a PRIORITY frame, and either skips it or rejects if it is
+ * invalid. Returns > 0 on success or zero on missing data. It may return an
+ * error in h2c. The caller must have already verified frame length and stream
+ * ID validity. Described in RFC7540#6.3.
+ */
+static int h2c_handle_priority(struct h2c *h2c)
+{
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_PRIO, h2c->conn);
+
+ /* process full frame only */
+ if (b_data(&h2c->dbuf) < h2c->dfl) {
+ TRACE_DEVEL("leaving on missing data", H2_EV_RX_FRAME|H2_EV_RX_PRIO, h2c->conn);
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ return 0;
+ }
+
+ if (h2_get_n32(&h2c->dbuf, 0) == h2c->dsi) {
+ /* 7540#5.3 : can't depend on itself */
+ TRACE_ERROR("PRIORITY depends on itself", H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ TRACE_DEVEL("leaving on error", H2_EV_RX_FRAME|H2_EV_RX_PRIO, h2c->conn);
+ return 0;
+ }
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_PRIO, h2c->conn);
+ return 1;
+}
+
+/* processes an RST_STREAM frame, and sets the 32-bit error code on the stream.
+ * Returns > 0 on success or zero on missing data. The caller must have already
+ * verified frame length and stream ID validity. Described in RFC7540#6.4.
+ */
+static int h2c_handle_rst_stream(struct h2c *h2c, struct h2s *h2s)
+{
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_RST|H2_EV_RX_EOI, h2c->conn, h2s);
+
+ /* process full frame only */
+ if (b_data(&h2c->dbuf) < h2c->dfl) {
+ TRACE_DEVEL("leaving on missing data", H2_EV_RX_FRAME|H2_EV_RX_RST|H2_EV_RX_EOI, h2c->conn, h2s);
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ return 0;
+ }
+
+ /* late RST, already handled */
+ if (h2s->st == H2_SS_CLOSED) {
+ TRACE_DEVEL("leaving on stream closed", H2_EV_RX_FRAME|H2_EV_RX_RST|H2_EV_RX_EOI, h2c->conn, h2s);
+ return 1;
+ }
+
+ h2s->errcode = h2_get_n32(&h2c->dbuf, 0);
+ h2s_close(h2s);
+
+ if (h2s_sc(h2s)) {
+ se_fl_set_error(h2s->sd);
+ h2s_alert(h2s);
+ }
+
+ h2s->flags |= H2_SF_RST_RCVD;
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_RST|H2_EV_RX_EOI, h2c->conn, h2s);
+ return 1;
+}
+
+/* processes a HEADERS frame. Returns h2s on success or NULL on missing data.
+ * It may return an error in h2c or h2s. The caller must consider that the
+ * return value is the new h2s in case one was allocated (most common case).
+ * Described in RFC7540#6.2. Most of the
+ * errors here are reported as connection errors since it's impossible to
+ * recover from such errors after the compression context has been altered.
+ */
+static struct h2s *h2c_frt_handle_headers(struct h2c *h2c, struct h2s *h2s)
+{
+ struct buffer rxbuf = BUF_NULL;
+ unsigned long long body_len = 0;
+ uint32_t flags = 0;
+ int error;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+
+ if (!b_size(&h2c->dbuf)) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto out; // empty buffer
+ }
+
+ if (b_data(&h2c->dbuf) < h2c->dfl && !b_full(&h2c->dbuf)) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto out; // incomplete frame
+ }
+
+ /* now either the frame is complete or the buffer is complete */
+ if (h2s->st != H2_SS_IDLE) {
+ /* The stream exists/existed, this must be a trailers frame */
+ if (h2s->st != H2_SS_CLOSED) {
+ error = h2c_decode_headers(h2c, &h2s->rxbuf, &h2s->flags, &body_len, NULL);
+ /* unrecoverable error ? */
+ if (h2c->st0 >= H2_CS_ERROR) {
+ TRACE_USER("Unrecoverable error decoding H2 trailers", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW|H2_EV_STRM_END, h2c->conn, 0, &rxbuf);
+ sess_log(h2c->conn->owner);
+ goto out;
+ }
+
+ if (error == 0) {
+ /* Demux not blocked because of the stream, it is an incomplete frame */
+ if (!(h2c->flags &H2_CF_DEM_BLOCK_ANY))
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto out; // missing data
+ }
+
+ if (error < 0) {
+ /* Failed to decode this frame (e.g. too large request)
+ * but the HPACK decompressor is still synchronized.
+ */
+ sess_log(h2c->conn->owner);
+ h2s_error(h2s, H2_ERR_INTERNAL_ERROR);
+ TRACE_USER("Stream error decoding H2 trailers", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW|H2_EV_STRM_END, h2c->conn, 0, &rxbuf);
+ h2c->st0 = H2_CS_FRAME_E;
+ goto out;
+ }
+ goto done;
+ }
+ /* the connection was already killed by an RST, let's consume
+ * the data and send another RST.
+ */
+ error = h2c_decode_headers(h2c, &rxbuf, &flags, &body_len, NULL);
+ sess_log(h2c->conn->owner);
+ h2s = (struct h2s*)h2_error_stream;
+ goto send_rst;
+ }
+ else if (h2c->dsi <= h2c->max_id || !(h2c->dsi & 1)) {
+ /* RFC7540#5.1.1 stream id > prev ones, and must be odd here */
+ error = H2_ERR_PROTOCOL_ERROR;
+ TRACE_ERROR("HEADERS on invalid stream ID", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ sess_log(h2c->conn->owner);
+ goto conn_err;
+ }
+ else if (h2c->flags & H2_CF_DEM_TOOMANY)
+ goto out; // IDLE but too many sc still present
+
+ error = h2c_decode_headers(h2c, &rxbuf, &flags, &body_len, NULL);
+
+ /* unrecoverable error ? */
+ if (h2c->st0 >= H2_CS_ERROR) {
+ TRACE_USER("Unrecoverable error decoding H2 request", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW|H2_EV_STRM_END, h2c->conn, 0, &rxbuf);
+ sess_log(h2c->conn->owner);
+ goto out;
+ }
+
+ if (error <= 0) {
+ if (error == 0) {
+ /* Demux not blocked because of the stream, it is an incomplete frame */
+ if (!(h2c->flags &H2_CF_DEM_BLOCK_ANY))
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto out; // missing data
+ }
+
+ /* Failed to decode this stream (e.g. too large request)
+ * but the HPACK decompressor is still synchronized.
+ */
+ sess_log(h2c->conn->owner);
+ h2s = (struct h2s*)h2_error_stream;
+ goto send_rst;
+ }
+
+ TRACE_USER("rcvd H2 request ", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW, h2c->conn, 0, &rxbuf);
+
+ /* Now we cannot roll back and we won't come back here anymore for this
+ * stream, this stream ID is open.
+ */
+ if (h2c->dsi > h2c->max_id)
+ h2c->max_id = h2c->dsi;
+
+ /* Note: we don't emit any other logs below because ff we return
+ * positively from h2c_frt_stream_new(), the stream will report the error,
+ * and if we return in error, h2c_frt_stream_new() will emit the error.
+ *
+ * Xfer the rxbuf to the stream. On success, the new stream owns the
+ * rxbuf. On error, it is released here.
+ */
+ h2s = h2c_frt_stream_new(h2c, h2c->dsi, &rxbuf, flags);
+ if (!h2s) {
+ h2s = (struct h2s*)h2_refused_stream;
+ goto send_rst;
+ }
+
+ h2s->st = H2_SS_OPEN;
+ h2s->flags |= flags;
+ h2s->body_len = body_len;
+
+ done:
+ if (h2c->dff & H2_F_HEADERS_END_STREAM)
+ h2s->flags |= H2_SF_ES_RCVD;
+
+ if (h2s->flags & H2_SF_ES_RCVD) {
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HREM;
+ else
+ h2s_close(h2s);
+ }
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ return h2s;
+
+ conn_err:
+ h2c_error(h2c, error);
+ goto out;
+
+ out:
+ h2_release_buf(h2c, &rxbuf);
+ TRACE_DEVEL("leaving on missing data or error", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ return NULL;
+
+ send_rst:
+ /* make the demux send an RST for the current stream. We may only
+ * do this if we're certain that the HEADERS frame was properly
+ * decompressed so that the HPACK decoder is still kept up to date.
+ */
+ h2_release_buf(h2c, &rxbuf);
+ h2c->st0 = H2_CS_FRAME_E;
+
+ TRACE_USER("rejected H2 request", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW|H2_EV_STRM_END, h2c->conn, 0, &rxbuf);
+ TRACE_DEVEL("leaving on error", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ return h2s;
+}
+
+/* processes a HEADERS frame. Returns h2s on success or NULL on missing data.
+ * It may return an error in h2c or h2s. Described in RFC7540#6.2. Most of the
+ * errors here are reported as connection errors since it's impossible to
+ * recover from such errors after the compression context has been altered.
+ */
+static struct h2s *h2c_bck_handle_headers(struct h2c *h2c, struct h2s *h2s)
+{
+ struct buffer rxbuf = BUF_NULL;
+ unsigned long long body_len = 0;
+ uint32_t flags = 0;
+ int error;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+
+ if (!b_size(&h2c->dbuf)) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto fail; // empty buffer
+ }
+
+ if (b_data(&h2c->dbuf) < h2c->dfl && !b_full(&h2c->dbuf)) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto fail; // incomplete frame
+ }
+
+ if (h2s->st != H2_SS_CLOSED) {
+ error = h2c_decode_headers(h2c, &h2s->rxbuf, &h2s->flags, &h2s->body_len, h2s->upgrade_protocol);
+ }
+ else {
+ /* the connection was already killed by an RST, let's consume
+ * the data and send another RST.
+ */
+ error = h2c_decode_headers(h2c, &rxbuf, &flags, &body_len, NULL);
+ h2s = (struct h2s*)h2_error_stream;
+ h2c->st0 = H2_CS_FRAME_E;
+ goto send_rst;
+ }
+
+ /* unrecoverable error ? */
+ if (h2c->st0 >= H2_CS_ERROR) {
+ TRACE_USER("Unrecoverable error decoding H2 HEADERS", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ goto fail;
+ }
+
+ if (h2s->st != H2_SS_OPEN && h2s->st != H2_SS_HLOC) {
+ /* RFC7540#5.1 */
+ TRACE_ERROR("response HEADERS in invalid state", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ h2s_error(h2s, H2_ERR_STREAM_CLOSED);
+ h2c->st0 = H2_CS_FRAME_E;
+ HA_ATOMIC_INC(&h2c->px_counters->strm_proto_err);
+ goto fail;
+ }
+
+ if (error <= 0) {
+ if (error == 0) {
+ /* Demux not blocked because of the stream, it is an incomplete frame */
+ if (!(h2c->flags &H2_CF_DEM_BLOCK_ANY))
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto fail; // missing data
+ }
+
+ /* stream error : send RST_STREAM */
+ TRACE_ERROR("couldn't decode response HEADERS", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ h2s_error(h2s, H2_ERR_PROTOCOL_ERROR);
+ h2c->st0 = H2_CS_FRAME_E;
+ HA_ATOMIC_INC(&h2c->px_counters->strm_proto_err);
+ goto fail;
+ }
+
+ if (h2c->dff & H2_F_HEADERS_END_STREAM)
+ h2s->flags |= H2_SF_ES_RCVD;
+
+ if (se_fl_test(h2s->sd, SE_FL_ERROR) && h2s->st < H2_SS_ERROR)
+ h2s->st = H2_SS_ERROR;
+ else if (h2s->flags & H2_SF_ES_RCVD) {
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HREM;
+ else if (h2s->st == H2_SS_HLOC)
+ h2s_close(h2s);
+ }
+
+ /* Unblock busy server h2s waiting for the response headers to validate
+ * the tunnel establishment or the end of the response of an oborted
+ * tunnel
+ */
+ if ((h2s->flags & (H2_SF_BODY_TUNNEL|H2_SF_BLK_MBUSY)) == (H2_SF_BODY_TUNNEL|H2_SF_BLK_MBUSY) ||
+ (h2s->flags & (H2_SF_TUNNEL_ABRT|H2_SF_ES_RCVD|H2_SF_BLK_MBUSY)) == (H2_SF_TUNNEL_ABRT|H2_SF_ES_RCVD|H2_SF_BLK_MBUSY)) {
+ TRACE_STATE("Unblock h2s blocked on tunnel establishment/abort", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ h2s->flags &= ~H2_SF_BLK_MBUSY;
+ }
+
+ TRACE_USER("rcvd H2 response ", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, 0, &h2s->rxbuf);
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ return h2s;
+ fail:
+ TRACE_DEVEL("leaving on missing data or error", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ return NULL;
+
+ send_rst:
+ /* make the demux send an RST for the current stream. We may only
+ * do this if we're certain that the HEADERS frame was properly
+ * decompressed so that the HPACK decoder is still kept up to date.
+ */
+ h2_release_buf(h2c, &rxbuf);
+ h2c->st0 = H2_CS_FRAME_E;
+
+ TRACE_USER("rejected H2 response", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW|H2_EV_STRM_END, h2c->conn, 0, &rxbuf);
+ TRACE_DEVEL("leaving on error", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ return h2s;
+}
+
+/* processes a DATA frame. Returns > 0 on success or zero on missing data.
+ * It may return an error in h2c or h2s. Described in RFC7540#6.1.
+ */
+static int h2c_handle_data(struct h2c *h2c, struct h2s *h2s)
+{
+ int error;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+
+ /* note that empty DATA frames are perfectly valid and sometimes used
+ * to signal an end of stream (with the ES flag).
+ */
+
+ if (!b_size(&h2c->dbuf) && h2c->dfl) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto fail; // empty buffer
+ }
+
+ if (b_data(&h2c->dbuf) < h2c->dfl && !b_full(&h2c->dbuf)) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto fail; // incomplete frame
+ }
+
+ /* now either the frame is complete or the buffer is complete */
+
+ if (h2s->st != H2_SS_OPEN && h2s->st != H2_SS_HLOC) {
+ /* RFC7540#6.1 */
+ error = H2_ERR_STREAM_CLOSED;
+ goto strm_err;
+ }
+
+ if ((h2s->flags & H2_SF_DATA_CLEN) && (h2c->dfl - h2c->dpl) > h2s->body_len) {
+ /* RFC7540#8.1.2 */
+ TRACE_ERROR("DATA frame larger than content-length", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ error = H2_ERR_PROTOCOL_ERROR;
+ HA_ATOMIC_INC(&h2c->px_counters->strm_proto_err);
+ goto strm_err;
+ }
+ if (!(h2c->flags & H2_CF_IS_BACK) &&
+ (h2s->flags & (H2_SF_TUNNEL_ABRT|H2_SF_ES_SENT)) == (H2_SF_TUNNEL_ABRT|H2_SF_ES_SENT) &&
+ ((h2c->dfl - h2c->dpl) || !(h2c->dff & H2_F_DATA_END_STREAM))) {
+ /* a tunnel attempt was aborted but the client still try to send some raw data.
+ * Thus the stream is closed with the CANCEL error. Here we take care it is not
+ * an empty DATA Frame with the ES flag. The error is only handled if ES was
+ * already sent to the client because depending on the scheduling, these data may
+ * have been sent before the server response but not handle here.
+ */
+ TRACE_ERROR("Request DATA frame for aborted tunnel", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ error = H2_ERR_CANCEL;
+ goto strm_err;
+ }
+
+ if (!h2_frt_transfer_data(h2s))
+ goto fail;
+
+ /* call the upper layers to process the frame, then let the upper layer
+ * notify the stream about any change.
+ */
+ if (!h2s_sc(h2s)) {
+ /* The upper layer has already closed, this may happen on
+ * 4xx/redirects during POST, or when receiving a response
+ * from an H2 server after the client has aborted.
+ */
+ error = H2_ERR_CANCEL;
+ goto strm_err;
+ }
+
+ if (h2c->st0 >= H2_CS_ERROR)
+ goto fail;
+
+ if (h2s->st >= H2_SS_ERROR) {
+ /* stream error : send RST_STREAM */
+ h2c->st0 = H2_CS_FRAME_E;
+ }
+
+ /* check for completion : the callee will change this to FRAME_A or
+ * FRAME_H once done.
+ */
+ if (h2c->st0 == H2_CS_FRAME_P)
+ goto fail;
+
+ /* last frame */
+ if (h2c->dff & H2_F_DATA_END_STREAM) {
+ h2s->flags |= H2_SF_ES_RCVD;
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HREM;
+ else
+ h2s_close(h2s);
+
+ if (h2s->flags & H2_SF_DATA_CLEN && h2s->body_len) {
+ /* RFC7540#8.1.2 */
+ TRACE_ERROR("ES on DATA frame before content-length", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ error = H2_ERR_PROTOCOL_ERROR;
+ HA_ATOMIC_INC(&h2c->px_counters->strm_proto_err);
+ goto strm_err;
+ }
+ }
+
+ /* Unblock busy server h2s waiting for the end of the response for an
+ * aborted tunnel
+ */
+ if ((h2c->flags & H2_CF_IS_BACK) &&
+ (h2s->flags & (H2_SF_TUNNEL_ABRT|H2_SF_ES_RCVD|H2_SF_BLK_MBUSY)) == (H2_SF_TUNNEL_ABRT|H2_SF_ES_RCVD|H2_SF_BLK_MBUSY)) {
+ TRACE_STATE("Unblock h2s blocked on tunnel abort", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ h2s->flags &= ~H2_SF_BLK_MBUSY;
+ }
+
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ return 1;
+
+ strm_err:
+ h2s_error(h2s, error);
+ h2c->st0 = H2_CS_FRAME_E;
+ fail:
+ TRACE_DEVEL("leaving on missing data or error", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ return 0;
+}
+
+/* check that the current frame described in h2c->{dsi,dft,dfl,dff,...} is
+ * valid for the current stream state. This is needed only after parsing the
+ * frame header but in practice it can be performed at any time during
+ * H2_CS_FRAME_P since no state transition happens there. Returns >0 on success
+ * or 0 in case of error, in which case either h2s or h2c will carry an error.
+ */
+static int h2_frame_check_vs_state(struct h2c *h2c, struct h2s *h2s)
+{
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn, h2s);
+
+ if (h2s->st == H2_SS_IDLE &&
+ h2c->dft != H2_FT_HEADERS && h2c->dft != H2_FT_PRIORITY) {
+ /* RFC7540#5.1: any frame other than HEADERS or PRIORITY in
+ * this state MUST be treated as a connection error
+ */
+ TRACE_ERROR("invalid frame type for IDLE state", H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn, h2s);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ if (!h2c->nb_streams && !(h2c->flags & H2_CF_IS_BACK)) {
+ /* only log if no other stream can report the error */
+ sess_log(h2c->conn->owner);
+ }
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ TRACE_DEVEL("leaving in error (idle&!hdrs&!prio)", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn, h2s);
+ return 0;
+ }
+
+ if (h2s->st == H2_SS_IDLE && (h2c->flags & H2_CF_IS_BACK)) {
+ /* only PUSH_PROMISE would be permitted here */
+ TRACE_ERROR("invalid frame type for IDLE state (back)", H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn, h2s);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ TRACE_DEVEL("leaving in error (idle&back)", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn, h2s);
+ return 0;
+ }
+
+ if (h2s->st == H2_SS_HREM && h2c->dft != H2_FT_WINDOW_UPDATE &&
+ h2c->dft != H2_FT_RST_STREAM && h2c->dft != H2_FT_PRIORITY) {
+ /* RFC7540#5.1: any frame other than WU/PRIO/RST in
+ * this state MUST be treated as a stream error.
+ * 6.2, 6.6 and 6.10 further mandate that HEADERS/
+ * PUSH_PROMISE/CONTINUATION cause connection errors.
+ */
+ if (h2_ft_bit(h2c->dft) & H2_FT_HDR_MASK) {
+ TRACE_ERROR("invalid frame type for HREM state", H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn, h2s);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ }
+ else {
+ h2s_error(h2s, H2_ERR_STREAM_CLOSED);
+ }
+ TRACE_DEVEL("leaving in error (hrem&!wu&!rst&!prio)", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn, h2s);
+ return 0;
+ }
+
+ /* Below the management of frames received in closed state is a
+ * bit hackish because the spec makes strong differences between
+ * streams closed by receiving RST, sending RST, and seeing ES
+ * in both directions. In addition to this, the creation of a
+ * new stream reusing the identifier of a closed one will be
+ * detected here. Given that we cannot keep track of all closed
+ * streams forever, we consider that unknown closed streams were
+ * closed on RST received, which allows us to respond with an
+ * RST without breaking the connection (eg: to abort a transfer).
+ * Some frames have to be silently ignored as well.
+ */
+ if (h2s->st == H2_SS_CLOSED && h2c->dsi) {
+ if (!(h2c->flags & H2_CF_IS_BACK) && h2_ft_bit(h2c->dft) & H2_FT_HDR_MASK) {
+ /* #5.1.1: The identifier of a newly
+ * established stream MUST be numerically
+ * greater than all streams that the initiating
+ * endpoint has opened or reserved. This
+ * governs streams that are opened using a
+ * HEADERS frame and streams that are reserved
+ * using PUSH_PROMISE. An endpoint that
+ * receives an unexpected stream identifier
+ * MUST respond with a connection error.
+ */
+ h2c_error(h2c, H2_ERR_STREAM_CLOSED);
+ TRACE_DEVEL("leaving in error (closed&hdrmask)", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn, h2s);
+ return 0;
+ }
+
+ if (h2s->flags & H2_SF_RST_RCVD &&
+ !(h2_ft_bit(h2c->dft) & (H2_FT_HDR_MASK | H2_FT_RST_STREAM_BIT | H2_FT_PRIORITY_BIT | H2_FT_WINDOW_UPDATE_BIT))) {
+ /* RFC7540#5.1:closed: an endpoint that
+ * receives any frame other than PRIORITY after
+ * receiving a RST_STREAM MUST treat that as a
+ * stream error of type STREAM_CLOSED.
+ *
+ * Note that old streams fall into this category
+ * and will lead to an RST being sent.
+ *
+ * However, we cannot generalize this to all frame types. Those
+ * carrying compression state must still be processed before
+ * being dropped or we'll desynchronize the decoder. This can
+ * happen with request trailers received after sending an
+ * RST_STREAM, or with header/trailers responses received after
+ * sending RST_STREAM (aborted stream).
+ *
+ * In addition, since our CLOSED streams always carry the
+ * RST_RCVD bit, we don't want to accidentally catch valid
+ * frames for a closed stream, i.e. RST/PRIO/WU.
+ */
+ h2s_error(h2s, H2_ERR_STREAM_CLOSED);
+ h2c->st0 = H2_CS_FRAME_E;
+ TRACE_DEVEL("leaving in error (rst_rcvd&!hdrmask)", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn, h2s);
+ return 0;
+ }
+
+ /* RFC7540#5.1:closed: if this state is reached as a
+ * result of sending a RST_STREAM frame, the peer that
+ * receives the RST_STREAM might have already sent
+ * frames on the stream that cannot be withdrawn. An
+ * endpoint MUST ignore frames that it receives on
+ * closed streams after it has sent a RST_STREAM
+ * frame. An endpoint MAY choose to limit the period
+ * over which it ignores frames and treat frames that
+ * arrive after this time as being in error.
+ */
+ if (h2s->id && !(h2s->flags & H2_SF_RST_SENT)) {
+ /* RFC7540#5.1:closed: any frame other than
+ * PRIO/WU/RST in this state MUST be treated as
+ * a connection error
+ */
+ if (h2c->dft != H2_FT_RST_STREAM &&
+ h2c->dft != H2_FT_PRIORITY &&
+ h2c->dft != H2_FT_WINDOW_UPDATE) {
+ h2c_error(h2c, H2_ERR_STREAM_CLOSED);
+ TRACE_DEVEL("leaving in error (rst_sent&!rst&!prio&!wu)", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn, h2s);
+ return 0;
+ }
+ }
+ }
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn, h2s);
+ return 1;
+}
+
+/* process Rx frames to be demultiplexed */
+static void h2_process_demux(struct h2c *h2c)
+{
+ struct h2s *h2s = NULL, *tmp_h2s;
+ struct h2_fh hdr;
+ unsigned int padlen = 0;
+ int32_t old_iw = h2c->miw;
+
+ TRACE_ENTER(H2_EV_H2C_WAKE, h2c->conn);
+
+ if (h2c->st0 >= H2_CS_ERROR)
+ goto out;
+
+ if (unlikely(h2c->st0 < H2_CS_FRAME_H)) {
+ if (h2c->st0 == H2_CS_PREFACE) {
+ TRACE_STATE("expecting preface", H2_EV_RX_PREFACE, h2c->conn);
+ if (h2c->flags & H2_CF_IS_BACK)
+ goto out;
+
+ if (unlikely(h2c_frt_recv_preface(h2c) <= 0)) {
+ /* RFC7540#3.5: a GOAWAY frame MAY be omitted */
+ if (h2c->st0 == H2_CS_ERROR) {
+ TRACE_PROTO("failed to receive preface", H2_EV_RX_PREFACE|H2_EV_PROTO_ERR, h2c->conn);
+ h2c->st0 = H2_CS_ERROR2;
+ if (b_data(&h2c->dbuf) ||
+ !(((const struct session *)h2c->conn->owner)->fe->options & (PR_O_NULLNOLOG|PR_O_IGNORE_PRB)))
+ sess_log(h2c->conn->owner);
+ }
+ goto done;
+ }
+ TRACE_PROTO("received preface", H2_EV_RX_PREFACE, h2c->conn);
+
+ h2c->max_id = 0;
+ h2c->st0 = H2_CS_SETTINGS1;
+ TRACE_STATE("switching to SETTINGS1", H2_EV_RX_PREFACE, h2c->conn);
+ }
+
+ if (h2c->st0 == H2_CS_SETTINGS1) {
+ /* ensure that what is pending is a valid SETTINGS frame
+ * without an ACK.
+ */
+ TRACE_STATE("expecting settings", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_SETTINGS, h2c->conn);
+ if (!h2_get_frame_hdr(&h2c->dbuf, &hdr)) {
+ /* RFC7540#3.5: a GOAWAY frame MAY be omitted */
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ if (h2c->st0 == H2_CS_ERROR) {
+ TRACE_ERROR("failed to receive settings", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_SETTINGS|H2_EV_PROTO_ERR, h2c->conn);
+ h2c->st0 = H2_CS_ERROR2;
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ }
+ goto done;
+ }
+
+ if (hdr.sid || hdr.ft != H2_FT_SETTINGS || hdr.ff & H2_F_SETTINGS_ACK) {
+ /* RFC7540#3.5: a GOAWAY frame MAY be omitted */
+ TRACE_ERROR("unexpected frame type or flags", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_SETTINGS|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ h2c->st0 = H2_CS_ERROR2;
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto done;
+ }
+
+ if ((int)hdr.len < 0 || (int)hdr.len > global.tune.bufsize) {
+ /* RFC7540#3.5: a GOAWAY frame MAY be omitted */
+ TRACE_ERROR("invalid settings frame length", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_SETTINGS|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_FRAME_SIZE_ERROR);
+ h2c->st0 = H2_CS_ERROR2;
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ goto done;
+ }
+
+ /* that's OK, switch to FRAME_P to process it. This is
+ * a SETTINGS frame whose header has already been
+ * deleted above.
+ */
+ padlen = 0;
+ HA_ATOMIC_INC(&h2c->px_counters->settings_rcvd);
+ goto new_frame;
+ }
+ }
+
+ /* process as many incoming frames as possible below */
+ while (1) {
+ int ret = 0;
+
+ if (!b_data(&h2c->dbuf)) {
+ TRACE_DEVEL("no more Rx data", H2_EV_RX_FRAME, h2c->conn);
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ break;
+ }
+
+ if (h2c->st0 >= H2_CS_ERROR) {
+ TRACE_STATE("end of connection reported", H2_EV_RX_FRAME|H2_EV_RX_EOI, h2c->conn);
+ break;
+ }
+
+ if (h2c->st0 == H2_CS_FRAME_H) {
+ TRACE_STATE("expecting H2 frame header", H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn);
+ if (!h2_peek_frame_hdr(&h2c->dbuf, 0, &hdr)) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ break;
+ }
+
+ if ((int)hdr.len < 0 || (int)hdr.len > global.tune.bufsize) {
+ TRACE_ERROR("invalid H2 frame length", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_FRAME_SIZE_ERROR);
+ if (!h2c->nb_streams && !(h2c->flags & H2_CF_IS_BACK)) {
+ /* only log if no other stream can report the error */
+ sess_log(h2c->conn->owner);
+ }
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ break;
+ }
+
+ if (h2c->rcvd_s && h2c->dsi != hdr.sid) {
+ /* changed stream with a pending WU, need to
+ * send it now.
+ */
+ TRACE_PROTO("sending stream WINDOW_UPDATE frame on stream switch", H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+ ret = h2c_send_strm_wu(h2c);
+ if (ret <= 0)
+ break;
+ }
+
+ padlen = 0;
+ if (h2_ft_bit(hdr.ft) & H2_FT_PADDED_MASK && hdr.ff & H2_F_PADDED) {
+ /* If the frame is padded (HEADERS, PUSH_PROMISE or DATA),
+ * we read the pad length and drop it from the remaining
+ * payload (one byte + the 9 remaining ones = 10 total
+ * removed), so we have a frame payload starting after the
+ * pad len. Flow controlled frames (DATA) also count the
+ * padlen in the flow control, so it must be adjusted.
+ */
+ if (hdr.len < 1) {
+ TRACE_ERROR("invalid H2 padded frame length", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_FRAME_SIZE_ERROR);
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto done;
+ }
+ hdr.len--;
+
+ if (b_data(&h2c->dbuf) < 10) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ break; // missing padlen
+ }
+
+ padlen = *(uint8_t *)b_peek(&h2c->dbuf, 9);
+
+ if (padlen > hdr.len) {
+ TRACE_ERROR("invalid H2 padding length", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn);
+ /* RFC7540#6.1 : pad length = length of
+ * frame payload or greater => error.
+ */
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto done;
+ }
+
+ if (h2_ft_bit(hdr.ft) & H2_FT_FC_MASK) {
+ h2c->rcvd_c++;
+ h2c->rcvd_s++;
+ }
+ b_del(&h2c->dbuf, 1);
+ }
+ h2_skip_frame_hdr(&h2c->dbuf);
+
+ new_frame:
+ h2c->dfl = hdr.len;
+ h2c->dsi = hdr.sid;
+ h2c->dft = hdr.ft;
+ h2c->dff = hdr.ff;
+ h2c->dpl = padlen;
+ h2c->flags |= H2_CF_DEM_IN_PROGRESS;
+ TRACE_STATE("rcvd H2 frame header, switching to FRAME_P state", H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn);
+ h2c->st0 = H2_CS_FRAME_P;
+
+ /* check for minimum basic frame format validity */
+ ret = h2_frame_check(h2c->dft, 1, h2c->dsi, h2c->dfl, global.tune.bufsize);
+ if (ret != H2_ERR_NO_ERROR) {
+ TRACE_ERROR("received invalid H2 frame header", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, ret);
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto done;
+ }
+
+ /* transition to HEADERS frame ends the keep-alive idle
+ * timer and starts the http-request idle delay.
+ */
+ if (hdr.ft == H2_FT_HEADERS)
+ h2c->idle_start = now_ms;
+ }
+
+ /* Only H2_CS_FRAME_P, H2_CS_FRAME_A and H2_CS_FRAME_E here.
+ * H2_CS_FRAME_P indicates an incomplete previous operation
+ * (most often the first attempt) and requires some validity
+ * checks for the frame and the current state. The two other
+ * ones are set after completion (or abortion) and must skip
+ * validity checks.
+ */
+ tmp_h2s = h2c_st_by_id(h2c, h2c->dsi);
+
+ if (tmp_h2s != h2s && h2s && h2s_sc(h2s) &&
+ (b_data(&h2s->rxbuf) ||
+ h2c_read0_pending(h2c) ||
+ h2s->st == H2_SS_CLOSED ||
+ (h2s->flags & H2_SF_ES_RCVD) ||
+ se_fl_test(h2s->sd, SE_FL_ERROR | SE_FL_ERR_PENDING | SE_FL_EOS))) {
+ /* we may have to signal the upper layers */
+ TRACE_DEVEL("notifying stream before switching SID", H2_EV_RX_FRAME|H2_EV_STRM_WAKE, h2c->conn, h2s);
+ se_fl_set(h2s->sd, SE_FL_RCV_MORE);
+ h2s_notify_recv(h2s);
+ }
+ h2s = tmp_h2s;
+
+ if (h2c->st0 == H2_CS_FRAME_E ||
+ (h2c->st0 == H2_CS_FRAME_P && !h2_frame_check_vs_state(h2c, h2s))) {
+ TRACE_PROTO("stream error reported", H2_EV_RX_FRAME|H2_EV_PROTO_ERR, h2c->conn, h2s);
+ goto strm_err;
+ }
+
+ switch (h2c->dft) {
+ case H2_FT_SETTINGS:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 SETTINGS frame", H2_EV_RX_FRAME|H2_EV_RX_SETTINGS, h2c->conn, h2s);
+ ret = h2c_handle_settings(h2c);
+ }
+ HA_ATOMIC_INC(&h2c->px_counters->settings_rcvd);
+
+ if (h2c->st0 == H2_CS_FRAME_A) {
+ TRACE_PROTO("sending H2 SETTINGS ACK frame", H2_EV_TX_FRAME|H2_EV_RX_SETTINGS, h2c->conn, h2s);
+ ret = h2c_ack_settings(h2c);
+ }
+ break;
+
+ case H2_FT_PING:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 PING frame", H2_EV_RX_FRAME|H2_EV_RX_PING, h2c->conn, h2s);
+ ret = h2c_handle_ping(h2c);
+ }
+
+ if (h2c->st0 == H2_CS_FRAME_A) {
+ TRACE_PROTO("sending H2 PING ACK frame", H2_EV_TX_FRAME|H2_EV_TX_SETTINGS, h2c->conn, h2s);
+ ret = h2c_ack_ping(h2c);
+ }
+ break;
+
+ case H2_FT_WINDOW_UPDATE:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 WINDOW_UPDATE frame", H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn, h2s);
+ ret = h2c_handle_window_update(h2c, h2s);
+ }
+ break;
+
+ case H2_FT_CONTINUATION:
+ /* RFC7540#6.10: CONTINUATION may only be preceded by
+ * a HEADERS/PUSH_PROMISE/CONTINUATION frame. These
+ * frames' parsers consume all following CONTINUATION
+ * frames so this one is out of sequence.
+ */
+ TRACE_ERROR("received unexpected H2 CONTINUATION frame", H2_EV_RX_FRAME|H2_EV_RX_CONT|H2_EV_H2C_ERR, h2c->conn, h2s);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto done;
+
+ case H2_FT_HEADERS:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 HEADERS frame", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ if (h2c->flags & H2_CF_IS_BACK)
+ tmp_h2s = h2c_bck_handle_headers(h2c, h2s);
+ else
+ tmp_h2s = h2c_frt_handle_headers(h2c, h2s);
+ if (tmp_h2s) {
+ h2s = tmp_h2s;
+ ret = 1;
+ }
+ }
+ HA_ATOMIC_INC(&h2c->px_counters->headers_rcvd);
+ break;
+
+ case H2_FT_DATA:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 DATA frame", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ ret = h2c_handle_data(h2c, h2s);
+ }
+ HA_ATOMIC_INC(&h2c->px_counters->data_rcvd);
+
+ if (h2c->st0 == H2_CS_FRAME_A) {
+ /* rcvd_s will suffice to trigger the sending of a WU */
+ h2c->st0 = H2_CS_FRAME_H;
+ }
+ break;
+
+ case H2_FT_PRIORITY:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 PRIORITY frame", H2_EV_RX_FRAME|H2_EV_RX_PRIO, h2c->conn, h2s);
+ ret = h2c_handle_priority(h2c);
+ }
+ break;
+
+ case H2_FT_RST_STREAM:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 RST_STREAM frame", H2_EV_RX_FRAME|H2_EV_RX_RST|H2_EV_RX_EOI, h2c->conn, h2s);
+ ret = h2c_handle_rst_stream(h2c, h2s);
+ }
+ HA_ATOMIC_INC(&h2c->px_counters->rst_stream_rcvd);
+ break;
+
+ case H2_FT_GOAWAY:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 GOAWAY frame", H2_EV_RX_FRAME|H2_EV_RX_GOAWAY, h2c->conn, h2s);
+ ret = h2c_handle_goaway(h2c);
+ }
+ HA_ATOMIC_INC(&h2c->px_counters->goaway_rcvd);
+ break;
+
+ /* implement all extra frame types here */
+ default:
+ TRACE_PROTO("receiving H2 ignored frame", H2_EV_RX_FRAME, h2c->conn, h2s);
+ /* drop frames that we ignore. They may be larger than
+ * the buffer so we drain all of their contents until
+ * we reach the end.
+ */
+ ret = MIN(b_data(&h2c->dbuf), h2c->dfl);
+ b_del(&h2c->dbuf, ret);
+ h2c->dfl -= ret;
+ ret = h2c->dfl == 0;
+ }
+
+ strm_err:
+ /* We may have to send an RST if not done yet */
+ if (h2s->st == H2_SS_ERROR) {
+ TRACE_STATE("stream error, switching to FRAME_E", H2_EV_RX_FRAME|H2_EV_H2S_ERR, h2c->conn, h2s);
+ h2c->st0 = H2_CS_FRAME_E;
+ }
+
+ if (h2c->st0 == H2_CS_FRAME_E) {
+ TRACE_PROTO("sending H2 RST_STREAM frame", H2_EV_TX_FRAME|H2_EV_TX_RST|H2_EV_TX_EOI, h2c->conn, h2s);
+ ret = h2c_send_rst_stream(h2c, h2s);
+ }
+
+ /* error or missing data condition met above ? */
+ if (ret <= 0)
+ break;
+
+ if (h2c->st0 != H2_CS_FRAME_H) {
+ if (h2c->dfl)
+ TRACE_DEVEL("skipping remaining frame payload", H2_EV_RX_FRAME, h2c->conn, h2s);
+ ret = MIN(b_data(&h2c->dbuf), h2c->dfl);
+ b_del(&h2c->dbuf, ret);
+ h2c->dfl -= ret;
+ if (!h2c->dfl) {
+ h2c->flags &= ~H2_CF_DEM_IN_PROGRESS;
+ TRACE_STATE("switching to FRAME_H", H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn);
+ h2c->st0 = H2_CS_FRAME_H;
+ }
+ }
+ }
+
+ if (h2c->rcvd_s > 0 &&
+ !(h2c->flags & (H2_CF_MUX_MFULL | H2_CF_DEM_MBUSY | H2_CF_DEM_MROOM))) {
+ TRACE_PROTO("sending stream WINDOW_UPDATE frame", H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn, h2s);
+ h2c_send_strm_wu(h2c);
+ }
+
+ if (h2c->rcvd_c > 0 &&
+ !(h2c->flags & (H2_CF_MUX_MFULL | H2_CF_DEM_MBUSY | H2_CF_DEM_MROOM))) {
+ TRACE_PROTO("sending H2 WINDOW_UPDATE frame", H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+ h2c_send_conn_wu(h2c);
+ }
+
+ done:
+ if (h2c->st0 >= H2_CS_ERROR || (h2c->flags & H2_CF_DEM_SHORT_READ)) {
+ if (h2c->flags & H2_CF_RCVD_SHUT)
+ h2c->flags |= H2_CF_END_REACHED;
+ }
+
+ if (h2s && h2s_sc(h2s) &&
+ (b_data(&h2s->rxbuf) ||
+ h2c_read0_pending(h2c) ||
+ h2s->st == H2_SS_CLOSED ||
+ (h2s->flags & H2_SF_ES_RCVD) ||
+ se_fl_test(h2s->sd, SE_FL_ERROR | SE_FL_ERR_PENDING | SE_FL_EOS))) {
+ /* we may have to signal the upper layers */
+ TRACE_DEVEL("notifying stream before switching SID", H2_EV_RX_FRAME|H2_EV_H2S_WAKE, h2c->conn, h2s);
+ se_fl_set(h2s->sd, SE_FL_RCV_MORE);
+ h2s_notify_recv(h2s);
+ }
+
+ if (old_iw != h2c->miw) {
+ TRACE_STATE("notifying streams about SFCTL increase", H2_EV_RX_FRAME|H2_EV_H2S_WAKE, h2c->conn);
+ h2c_unblock_sfctl(h2c);
+ }
+
+ h2c_restart_reading(h2c, 0);
+ out:
+ TRACE_LEAVE(H2_EV_H2C_WAKE, h2c->conn);
+ return;
+}
+
+/* resume each h2s eligible for sending in list head <head> */
+static void h2_resume_each_sending_h2s(struct h2c *h2c, struct list *head)
+{
+ struct h2s *h2s, *h2s_back;
+
+ TRACE_ENTER(H2_EV_H2C_SEND|H2_EV_H2S_WAKE, h2c->conn);
+
+ list_for_each_entry_safe(h2s, h2s_back, head, list) {
+ if (h2c->mws <= 0 ||
+ h2c->flags & H2_CF_MUX_BLOCK_ANY ||
+ h2c->st0 >= H2_CS_ERROR)
+ break;
+
+ h2s->flags &= ~H2_SF_BLK_ANY;
+
+ if (h2s->flags & H2_SF_NOTIFIED)
+ continue;
+
+ /* If the sender changed his mind and unsubscribed, let's just
+ * remove the stream from the send_list.
+ */
+ if (!(h2s->flags & (H2_SF_WANT_SHUTR|H2_SF_WANT_SHUTW)) &&
+ (!h2s->subs || !(h2s->subs->events & SUB_RETRY_SEND))) {
+ LIST_DEL_INIT(&h2s->list);
+ continue;
+ }
+
+ if (h2s->subs && h2s->subs->events & SUB_RETRY_SEND) {
+ h2s->flags |= H2_SF_NOTIFIED;
+ tasklet_wakeup(h2s->subs->tasklet);
+ h2s->subs->events &= ~SUB_RETRY_SEND;
+ if (!h2s->subs->events)
+ h2s->subs = NULL;
+ }
+ else if (h2s->flags & (H2_SF_WANT_SHUTR|H2_SF_WANT_SHUTW)) {
+ tasklet_wakeup(h2s->shut_tl);
+ }
+ }
+
+ TRACE_LEAVE(H2_EV_H2C_SEND|H2_EV_H2S_WAKE, h2c->conn);
+}
+
+/* process Tx frames from streams to be multiplexed. Returns > 0 if it reached
+ * the end.
+ */
+static int h2_process_mux(struct h2c *h2c)
+{
+ TRACE_ENTER(H2_EV_H2C_WAKE, h2c->conn);
+
+ if (unlikely(h2c->st0 < H2_CS_FRAME_H)) {
+ if (unlikely(h2c->st0 == H2_CS_PREFACE && (h2c->flags & H2_CF_IS_BACK))) {
+ if (unlikely(h2c_bck_send_preface(h2c) <= 0)) {
+ /* RFC7540#3.5: a GOAWAY frame MAY be omitted */
+ if (h2c->st0 == H2_CS_ERROR)
+ h2c->st0 = H2_CS_ERROR2;
+ goto fail;
+ }
+ h2c->st0 = H2_CS_SETTINGS1;
+ }
+ /* need to wait for the other side */
+ if (h2c->st0 < H2_CS_FRAME_H)
+ goto done;
+ }
+
+ /* start by sending possibly pending window updates */
+ if (h2c->rcvd_s > 0 &&
+ !(h2c->flags & (H2_CF_MUX_MFULL | H2_CF_MUX_MALLOC)) &&
+ h2c_send_strm_wu(h2c) < 0)
+ goto fail;
+
+ if (h2c->rcvd_c > 0 &&
+ !(h2c->flags & (H2_CF_MUX_MFULL | H2_CF_MUX_MALLOC)) &&
+ h2c_send_conn_wu(h2c) < 0)
+ goto fail;
+
+ /* First we always process the flow control list because the streams
+ * waiting there were already elected for immediate emission but were
+ * blocked just on this.
+ */
+ h2_resume_each_sending_h2s(h2c, &h2c->fctl_list);
+ h2_resume_each_sending_h2s(h2c, &h2c->send_list);
+
+ fail:
+ if (unlikely(h2c->st0 >= H2_CS_ERROR)) {
+ if (h2c->st0 == H2_CS_ERROR) {
+ if (h2c->max_id >= 0) {
+ h2c_send_goaway_error(h2c, NULL);
+ if (h2c->flags & H2_CF_MUX_BLOCK_ANY)
+ goto out0;
+ }
+
+ h2c->st0 = H2_CS_ERROR2; // sent (or failed hard) !
+ }
+ }
+ done:
+ TRACE_LEAVE(H2_EV_H2C_WAKE, h2c->conn);
+ return 1;
+ out0:
+ TRACE_DEVEL("leaving in blocked situation", H2_EV_H2C_WAKE, h2c->conn);
+ return 0;
+}
+
+
+/* Attempt to read data, and subscribe if none available.
+ * The function returns 1 if data has been received, otherwise zero.
+ */
+static int h2_recv(struct h2c *h2c)
+{
+ struct connection *conn = h2c->conn;
+ struct buffer *buf;
+ int max;
+ size_t ret;
+
+ TRACE_ENTER(H2_EV_H2C_RECV, h2c->conn);
+
+ if (h2c->wait_event.events & SUB_RETRY_RECV) {
+ TRACE_DEVEL("leaving on sub_recv", H2_EV_H2C_RECV, h2c->conn);
+ return (b_data(&h2c->dbuf));
+ }
+
+ if (!h2_recv_allowed(h2c)) {
+ TRACE_DEVEL("leaving on !recv_allowed", H2_EV_H2C_RECV, h2c->conn);
+ return 1;
+ }
+
+ buf = h2_get_buf(h2c, &h2c->dbuf);
+ if (!buf) {
+ h2c->flags |= H2_CF_DEM_DALLOC;
+ TRACE_DEVEL("leaving on !alloc", H2_EV_H2C_RECV, h2c->conn);
+ return 0;
+ }
+
+ if (h2c->flags & H2_CF_RCVD_SHUT) {
+ TRACE_DEVEL("leaving on rcvd_shut", H2_EV_H2C_RECV, h2c->conn);
+ return 1;
+ }
+
+ if (!b_data(buf)) {
+ /* try to pre-align the buffer like the
+ * rxbufs will be to optimize memory copies. We'll make
+ * sure that the frame header lands at the end of the
+ * HTX block to alias it upon recv. We cannot use the
+ * head because rcv_buf() will realign the buffer if
+ * it's empty. Thus we cheat and pretend we already
+ * have a few bytes there.
+ */
+ max = buf_room_for_htx_data(buf) + 9;
+ buf->head = sizeof(struct htx) - 9;
+ }
+ else
+ max = b_room(buf);
+
+ ret = max ? conn->xprt->rcv_buf(conn, conn->xprt_ctx, buf, max, 0) : 0;
+
+ if (max && !ret && h2_recv_allowed(h2c)) {
+ TRACE_DATA("failed to receive data, subscribing", H2_EV_H2C_RECV, h2c->conn);
+ conn->xprt->subscribe(conn, conn->xprt_ctx, SUB_RETRY_RECV, &h2c->wait_event);
+ } else if (ret) {
+ TRACE_DATA("received data", H2_EV_H2C_RECV, h2c->conn, 0, 0, (void*)(long)ret);
+ h2c->flags &= ~H2_CF_DEM_SHORT_READ;
+ }
+
+ if (conn_xprt_read0_pending(h2c->conn)) {
+ TRACE_DATA("received read0", H2_EV_H2C_RECV, h2c->conn);
+ h2c->flags |= H2_CF_RCVD_SHUT;
+ }
+
+ if (!b_data(buf)) {
+ h2_release_buf(h2c, &h2c->dbuf);
+ TRACE_LEAVE(H2_EV_H2C_RECV, h2c->conn);
+ return (conn->flags & CO_FL_ERROR || conn_xprt_read0_pending(conn));
+ }
+
+ if (b_data(buf) == buf->size) {
+ h2c->flags |= H2_CF_DEM_DFULL;
+ TRACE_STATE("demux buffer full", H2_EV_H2C_RECV|H2_EV_H2C_BLK, h2c->conn);
+ }
+
+ TRACE_LEAVE(H2_EV_H2C_RECV, h2c->conn);
+ return !!ret || (conn->flags & CO_FL_ERROR) || conn_xprt_read0_pending(conn);
+}
+
+/* Try to send data if possible.
+ * The function returns 1 if data have been sent, otherwise zero.
+ */
+static int h2_send(struct h2c *h2c)
+{
+ struct connection *conn = h2c->conn;
+ int done;
+ int sent = 0;
+
+ TRACE_ENTER(H2_EV_H2C_SEND, h2c->conn);
+
+ if (conn->flags & CO_FL_ERROR) {
+ TRACE_DEVEL("leaving on error", H2_EV_H2C_SEND, h2c->conn);
+ return 1;
+ }
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ /* a handshake was requested */
+ goto schedule;
+ }
+
+ /* This loop is quite simple : it tries to fill as much as it can from
+ * pending streams into the existing buffer until it's reportedly full
+ * or the end of send requests is reached. Then it tries to send this
+ * buffer's contents out, marks it not full if at least one byte could
+ * be sent, and tries again.
+ *
+ * The snd_buf() function normally takes a "flags" argument which may
+ * be made of a combination of CO_SFL_MSG_MORE to indicate that more
+ * data immediately comes and CO_SFL_STREAMER to indicate that the
+ * connection is streaming lots of data (used to increase TLS record
+ * size at the expense of latency). The former can be sent any time
+ * there's a buffer full flag, as it indicates at least one stream
+ * attempted to send and failed so there are pending data. An
+ * alternative would be to set it as long as there's an active stream
+ * but that would be problematic for ACKs until we have an absolute
+ * guarantee that all waiters have at least one byte to send. The
+ * latter should possibly not be set for now.
+ */
+
+ done = 0;
+ while (!done) {
+ unsigned int flags = 0;
+ unsigned int released = 0;
+ struct buffer *buf;
+
+ /* fill as much as we can into the current buffer */
+ while (((h2c->flags & (H2_CF_MUX_MFULL|H2_CF_MUX_MALLOC)) == 0) && !done)
+ done = h2_process_mux(h2c);
+
+ if (h2c->flags & H2_CF_MUX_MALLOC)
+ done = 1; // we won't go further without extra buffers
+
+ if ((conn->flags & (CO_FL_SOCK_WR_SH|CO_FL_ERROR)) ||
+ (h2c->flags & H2_CF_GOAWAY_FAILED))
+ break;
+
+ if (h2c->flags & (H2_CF_MUX_MFULL | H2_CF_DEM_MBUSY | H2_CF_DEM_MROOM))
+ flags |= CO_SFL_MSG_MORE;
+
+ if (!br_single(h2c->mbuf)) {
+ /* usually we want to emit small TLS records to speed
+ * up the decoding on the client. That's what is being
+ * done by default. However if there is more than one
+ * buffer being allocated, we're streaming large data
+ * so we stich to large records.
+ */
+ flags |= CO_SFL_STREAMER;
+ }
+
+ for (buf = br_head(h2c->mbuf); b_size(buf); buf = br_del_head(h2c->mbuf)) {
+ if (b_data(buf)) {
+ int ret = conn->xprt->snd_buf(conn, conn->xprt_ctx, buf, b_data(buf), flags);
+ if (!ret) {
+ done = 1;
+ break;
+ }
+ sent = 1;
+ TRACE_DATA("sent data", H2_EV_H2C_SEND, h2c->conn, 0, buf, (void*)(long)ret);
+ b_del(buf, ret);
+ if (b_data(buf)) {
+ done = 1;
+ break;
+ }
+ }
+ b_free(buf);
+ released++;
+ }
+
+ if (released)
+ offer_buffers(NULL, released);
+
+ /* Normally if wrote at least one byte, the buffer is not full
+ * anymore. However, if it was marked full because all of its
+ * buffers were used, we don't want to instantly wake up many
+ * streams because we'd create a thundering herd effect, notably
+ * when data are flushed in small chunks. Instead we wait for
+ * the buffer to be decongested again before allowing to send
+ * again. It also has the added benefit of not pumping more
+ * data from the other side when it's known that this one is
+ * still congested.
+ */
+ if (sent && br_single(h2c->mbuf))
+ h2c->flags &= ~(H2_CF_MUX_MFULL | H2_CF_DEM_MROOM);
+ }
+
+ if (conn->flags & CO_FL_SOCK_WR_SH) {
+ /* output closed, nothing to send, clear the buffer to release it */
+ b_reset(br_tail(h2c->mbuf));
+ }
+ /* We're not full anymore, so we can wake any task that are waiting
+ * for us.
+ */
+ if (!(h2c->flags & (H2_CF_MUX_MFULL | H2_CF_DEM_MROOM)) && h2c->st0 >= H2_CS_FRAME_H)
+ h2_resume_each_sending_h2s(h2c, &h2c->send_list);
+
+ /* We're done, no more to send */
+ if (!br_data(h2c->mbuf)) {
+ TRACE_DEVEL("leaving with everything sent", H2_EV_H2C_SEND, h2c->conn);
+ return sent;
+ }
+schedule:
+ if (!(conn->flags & CO_FL_ERROR) && !(h2c->wait_event.events & SUB_RETRY_SEND)) {
+ TRACE_STATE("more data to send, subscribing", H2_EV_H2C_SEND, h2c->conn);
+ conn->xprt->subscribe(conn, conn->xprt_ctx, SUB_RETRY_SEND, &h2c->wait_event);
+ }
+
+ TRACE_DEVEL("leaving with some data left to send", H2_EV_H2C_SEND, h2c->conn);
+ return sent;
+}
+
+/* this is the tasklet referenced in h2c->wait_event.tasklet */
+struct task *h2_io_cb(struct task *t, void *ctx, unsigned int state)
+{
+ struct connection *conn;
+ struct tasklet *tl = (struct tasklet *)t;
+ int conn_in_list;
+ struct h2c *h2c = ctx;
+ int ret = 0;
+
+ if (state & TASK_F_USR1) {
+ /* the tasklet was idling on an idle connection, it might have
+ * been stolen, let's be careful!
+ */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ if (t->context == NULL) {
+ /* The connection has been taken over by another thread,
+ * we're no longer responsible for it, so just free the
+ * tasklet, and do nothing.
+ */
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ tasklet_free(tl);
+ t = NULL;
+ goto leave;
+ }
+ conn = h2c->conn;
+ TRACE_ENTER(H2_EV_H2C_WAKE, conn);
+
+ /* Remove the connection from the list, to be sure nobody attempts
+ * to use it while we handle the I/O events
+ */
+ conn_in_list = conn_get_idle_flag(conn);
+ if (conn_in_list)
+ conn_delete_from_tree(&conn->hash_node->node);
+
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ } else {
+ /* we're certain the connection was not in an idle list */
+ conn = h2c->conn;
+ TRACE_ENTER(H2_EV_H2C_WAKE, conn);
+ conn_in_list = 0;
+ }
+
+ if (!(h2c->wait_event.events & SUB_RETRY_SEND))
+ ret = h2_send(h2c);
+ if (!(h2c->wait_event.events & SUB_RETRY_RECV))
+ ret |= h2_recv(h2c);
+ if (ret || b_data(&h2c->dbuf))
+ ret = h2_process(h2c);
+
+ /* If we were in an idle list, we want to add it back into it,
+ * unless h2_process() returned -1, which mean it has destroyed
+ * the connection (testing !ret is enough, if h2_process() wasn't
+ * called then ret will be 0 anyway.
+ */
+ if (ret < 0)
+ t = NULL;
+
+ if (!ret && conn_in_list) {
+ struct server *srv = objt_server(conn->target);
+
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ if (conn_in_list == CO_FL_SAFE_LIST)
+ eb64_insert(&srv->per_thr[tid].safe_conns, &conn->hash_node->node);
+ else
+ eb64_insert(&srv->per_thr[tid].idle_conns, &conn->hash_node->node);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+leave:
+ TRACE_LEAVE(H2_EV_H2C_WAKE);
+ return t;
+}
+
+/* callback called on any event by the connection handler.
+ * It applies changes and returns zero, or < 0 if it wants immediate
+ * destruction of the connection (which normally doesn not happen in h2).
+ */
+static int h2_process(struct h2c *h2c)
+{
+ struct connection *conn = h2c->conn;
+
+ TRACE_ENTER(H2_EV_H2C_WAKE, conn);
+
+ if (!(h2c->flags & H2_CF_DEM_BLOCK_ANY) &&
+ (b_data(&h2c->dbuf) || (h2c->flags & H2_CF_RCVD_SHUT))) {
+ h2_process_demux(h2c);
+
+ if (h2c->st0 >= H2_CS_ERROR || conn->flags & CO_FL_ERROR)
+ b_reset(&h2c->dbuf);
+
+ if (!b_full(&h2c->dbuf))
+ h2c->flags &= ~H2_CF_DEM_DFULL;
+ }
+ h2_send(h2c);
+
+ if (unlikely(h2c->proxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) && !(h2c->flags & H2_CF_IS_BACK)) {
+ int send_goaway = 1;
+ /* If a close-spread-time option is set, we want to avoid
+ * closing all the active HTTP2 connections at once so we add a
+ * random factor that will spread the closing.
+ */
+ if (tick_isset(global.close_spread_end)) {
+ int remaining_window = tick_remain(now_ms, global.close_spread_end);
+ if (remaining_window) {
+ /* This should increase the closing rate the
+ * further along the window we are. */
+ send_goaway = (remaining_window <= statistical_prng_range(global.close_spread_time));
+ }
+ }
+ else if (global.tune.options & GTUNE_DISABLE_ACTIVE_CLOSE)
+ send_goaway = 0; /* let the client close his connection himself */
+ /* frontend is stopping, reload likely in progress, let's try
+ * to announce a graceful shutdown if not yet done. We don't
+ * care if it fails, it will be tried again later.
+ */
+ if (send_goaway) {
+ TRACE_STATE("proxy stopped, sending GOAWAY", H2_EV_H2C_WAKE|H2_EV_TX_FRAME, conn);
+ if (!(h2c->flags & (H2_CF_GOAWAY_SENT|H2_CF_GOAWAY_FAILED))) {
+ if (h2c->last_sid < 0)
+ h2c->last_sid = (1U << 31) - 1;
+ h2c_send_goaway_error(h2c, NULL);
+ }
+ }
+ }
+
+ /*
+ * If we received early data, and the handshake is done, wake
+ * any stream that was waiting for it.
+ */
+ if (!(h2c->flags & H2_CF_WAIT_FOR_HS) &&
+ (conn->flags & (CO_FL_EARLY_SSL_HS | CO_FL_WAIT_XPRT | CO_FL_EARLY_DATA)) == CO_FL_EARLY_DATA) {
+ struct eb32_node *node;
+ struct h2s *h2s;
+
+ h2c->flags |= H2_CF_WAIT_FOR_HS;
+ node = eb32_lookup_ge(&h2c->streams_by_id, 1);
+
+ while (node) {
+ h2s = container_of(node, struct h2s, by_id);
+ if (se_fl_test(h2s->sd, SE_FL_WAIT_FOR_HS))
+ h2s_notify_recv(h2s);
+ node = eb32_next(node);
+ }
+ }
+
+ if (conn->flags & CO_FL_ERROR || h2c_read0_pending(h2c) ||
+ h2c->st0 == H2_CS_ERROR2 || h2c->flags & H2_CF_GOAWAY_FAILED ||
+ (eb_is_empty(&h2c->streams_by_id) && h2c->last_sid >= 0 &&
+ h2c->max_id >= h2c->last_sid)) {
+ h2_wake_some_streams(h2c, 0);
+
+ if (eb_is_empty(&h2c->streams_by_id)) {
+ /* no more stream, kill the connection now */
+ h2_release(h2c);
+ TRACE_DEVEL("leaving after releasing the connection", H2_EV_H2C_WAKE);
+ return -1;
+ }
+
+ /* connections in error must be removed from the idle lists */
+ if (conn->flags & CO_FL_LIST_MASK) {
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn_delete_from_tree(&conn->hash_node->node);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+ }
+ else if (h2c->st0 == H2_CS_ERROR) {
+ /* connections in error must be removed from the idle lists */
+ if (conn->flags & CO_FL_LIST_MASK) {
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn_delete_from_tree(&conn->hash_node->node);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+ }
+
+ if (!b_data(&h2c->dbuf))
+ h2_release_buf(h2c, &h2c->dbuf);
+
+ if ((conn->flags & CO_FL_SOCK_WR_SH) ||
+ h2c->st0 == H2_CS_ERROR2 || (h2c->flags & H2_CF_GOAWAY_FAILED) ||
+ (h2c->st0 != H2_CS_ERROR &&
+ !br_data(h2c->mbuf) &&
+ (h2c->mws <= 0 || LIST_ISEMPTY(&h2c->fctl_list)) &&
+ ((h2c->flags & H2_CF_MUX_BLOCK_ANY) || LIST_ISEMPTY(&h2c->send_list))))
+ h2_release_mbuf(h2c);
+
+ h2c_update_timeout(h2c);
+ h2_send(h2c);
+ TRACE_LEAVE(H2_EV_H2C_WAKE, conn);
+ return 0;
+}
+
+/* wake-up function called by the connection layer (mux_ops.wake) */
+static int h2_wake(struct connection *conn)
+{
+ struct h2c *h2c = conn->ctx;
+ int ret;
+
+ TRACE_ENTER(H2_EV_H2C_WAKE, conn);
+ ret = h2_process(h2c);
+ if (ret >= 0)
+ h2_wake_some_streams(h2c, 0);
+ TRACE_LEAVE(H2_EV_H2C_WAKE);
+ return ret;
+}
+
+/* Connection timeout management. The principle is that if there's no receipt
+ * nor sending for a certain amount of time, the connection is closed. If the
+ * MUX buffer still has lying data or is not allocatable, the connection is
+ * immediately killed. If it's allocatable and empty, we attempt to send a
+ * GOAWAY frame.
+ */
+struct task *h2_timeout_task(struct task *t, void *context, unsigned int state)
+{
+ struct h2c *h2c = context;
+ int expired = tick_is_expired(t->expire, now_ms);
+
+ TRACE_ENTER(H2_EV_H2C_WAKE, h2c ? h2c->conn : NULL);
+
+ if (h2c) {
+ /* Make sure nobody stole the connection from us */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+
+ /* Somebody already stole the connection from us, so we should not
+ * free it, we just have to free the task.
+ */
+ if (!t->context) {
+ h2c = NULL;
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ goto do_leave;
+ }
+
+
+ if (!expired) {
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ TRACE_DEVEL("leaving (not expired)", H2_EV_H2C_WAKE, h2c->conn);
+ return t;
+ }
+
+ if (!h2c_may_expire(h2c)) {
+ /* we do still have streams but all of them are idle, waiting
+ * for the data layer, so we must not enforce the timeout here.
+ */
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ t->expire = TICK_ETERNITY;
+ return t;
+ }
+
+ /* We're about to destroy the connection, so make sure nobody attempts
+ * to steal it from us.
+ */
+ if (h2c->conn->flags & CO_FL_LIST_MASK)
+ conn_delete_from_tree(&h2c->conn->hash_node->node);
+
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+do_leave:
+ task_destroy(t);
+
+ if (!h2c) {
+ /* resources were already deleted */
+ TRACE_DEVEL("leaving (not more h2c)", H2_EV_H2C_WAKE);
+ return NULL;
+ }
+
+ h2c->task = NULL;
+ h2c_error(h2c, H2_ERR_NO_ERROR);
+ h2_wake_some_streams(h2c, 0);
+
+ if (br_data(h2c->mbuf)) {
+ /* don't even try to send a GOAWAY, the buffer is stuck */
+ h2c->flags |= H2_CF_GOAWAY_FAILED;
+ }
+
+ /* try to send but no need to insist */
+ h2c->last_sid = h2c->max_id;
+ if (h2c_send_goaway_error(h2c, NULL) <= 0)
+ h2c->flags |= H2_CF_GOAWAY_FAILED;
+
+ if (br_data(h2c->mbuf) && !(h2c->flags & H2_CF_GOAWAY_FAILED) && conn_xprt_ready(h2c->conn)) {
+ unsigned int released = 0;
+ struct buffer *buf;
+
+ for (buf = br_head(h2c->mbuf); b_size(buf); buf = br_del_head(h2c->mbuf)) {
+ if (b_data(buf)) {
+ int ret = h2c->conn->xprt->snd_buf(h2c->conn, h2c->conn->xprt_ctx, buf, b_data(buf), 0);
+ if (!ret)
+ break;
+ b_del(buf, ret);
+ if (b_data(buf))
+ break;
+ b_free(buf);
+ released++;
+ }
+ }
+
+ if (released)
+ offer_buffers(NULL, released);
+ }
+
+ /* in any case this connection must not be considered idle anymore */
+ if (h2c->conn->flags & CO_FL_LIST_MASK) {
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn_delete_from_tree(&h2c->conn->hash_node->node);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+ /* either we can release everything now or it will be done later once
+ * the last stream closes.
+ */
+ if (eb_is_empty(&h2c->streams_by_id))
+ h2_release(h2c);
+
+ TRACE_LEAVE(H2_EV_H2C_WAKE);
+ return NULL;
+}
+
+
+/*******************************************/
+/* functions below are used by the streams */
+/*******************************************/
+
+/*
+ * Attach a new stream to a connection
+ * (Used for outgoing connections)
+ */
+static int h2_attach(struct connection *conn, struct sedesc *sd, struct session *sess)
+{
+ struct h2s *h2s;
+ struct h2c *h2c = conn->ctx;
+
+ TRACE_ENTER(H2_EV_H2S_NEW, conn);
+ h2s = h2c_bck_stream_new(h2c, sd->sc, sess);
+ if (!h2s) {
+ TRACE_DEVEL("leaving on stream creation failure", H2_EV_H2S_NEW|H2_EV_H2S_ERR, conn);
+ return -1;
+ }
+
+ /* the connection is not idle anymore, let's mark this */
+ HA_ATOMIC_AND(&h2c->wait_event.tasklet->state, ~TASK_F_USR1);
+ xprt_set_used(h2c->conn, h2c->conn->xprt, h2c->conn->xprt_ctx);
+
+ TRACE_LEAVE(H2_EV_H2S_NEW, conn, h2s);
+ return 0;
+}
+
+/* Retrieves the first valid stream connector from this connection, or returns
+ * NULL. We have to scan because we may have some orphan streams. It might be
+ * beneficial to scan backwards from the end to reduce the likeliness to find
+ * orphans.
+ */
+static struct stconn *h2_get_first_sc(const struct connection *conn)
+{
+ struct h2c *h2c = conn->ctx;
+ struct h2s *h2s;
+ struct eb32_node *node;
+
+ node = eb32_first(&h2c->streams_by_id);
+ while (node) {
+ h2s = container_of(node, struct h2s, by_id);
+ if (h2s_sc(h2s))
+ return h2s_sc(h2s);
+ node = eb32_next(node);
+ }
+ return NULL;
+}
+
+static int h2_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *output)
+{
+ int ret = 0;
+ struct h2c *h2c = conn->ctx;
+
+ switch (mux_ctl) {
+ case MUX_STATUS:
+ /* Only consider the mux to be ready if we're done with
+ * the preface and settings, and we had no error.
+ */
+ if (h2c->st0 >= H2_CS_FRAME_H && h2c->st0 < H2_CS_ERROR)
+ ret |= MUX_STATUS_READY;
+ return ret;
+ case MUX_EXIT_STATUS:
+ return MUX_ES_UNKNOWN;
+ default:
+ return -1;
+ }
+}
+
+/*
+ * Destroy the mux and the associated connection, if it is no longer used
+ */
+static void h2_destroy(void *ctx)
+{
+ struct h2c *h2c = ctx;
+
+ TRACE_ENTER(H2_EV_H2C_END, h2c->conn);
+ if (eb_is_empty(&h2c->streams_by_id)) {
+ BUG_ON(h2c->conn->ctx != h2c);
+ h2_release(h2c);
+ }
+ TRACE_LEAVE(H2_EV_H2C_END);
+}
+
+/*
+ * Detach the stream from the connection and possibly release the connection.
+ */
+static void h2_detach(struct sedesc *sd)
+{
+ struct h2s *h2s = sd->se;
+ struct h2c *h2c;
+ struct session *sess;
+
+ TRACE_ENTER(H2_EV_STRM_END, h2s ? h2s->h2c->conn : NULL, h2s);
+
+ if (!h2s) {
+ TRACE_LEAVE(H2_EV_STRM_END);
+ return;
+ }
+
+ /* there's no txbuf so we're certain not to be able to send anything */
+ h2s->flags &= ~H2_SF_NOTIFIED;
+
+ sess = h2s->sess;
+ h2c = h2s->h2c;
+ h2c->nb_sc--;
+ if (!h2c->nb_sc)
+ h2c->idle_start = now_ms;
+
+ if ((h2c->flags & (H2_CF_IS_BACK|H2_CF_DEM_TOOMANY)) == H2_CF_DEM_TOOMANY &&
+ !h2_frt_has_too_many_sc(h2c)) {
+ /* frontend connection was blocking new streams creation */
+ h2c->flags &= ~H2_CF_DEM_TOOMANY;
+ h2c_restart_reading(h2c, 1);
+ }
+
+ /* this stream may be blocked waiting for some data to leave (possibly
+ * an ES or RST frame), so orphan it in this case.
+ */
+ if (!(h2c->conn->flags & CO_FL_ERROR) &&
+ (h2c->st0 < H2_CS_ERROR) &&
+ (h2s->flags & (H2_SF_BLK_MBUSY | H2_SF_BLK_MROOM | H2_SF_BLK_MFCTL)) &&
+ ((h2s->flags & (H2_SF_WANT_SHUTR | H2_SF_WANT_SHUTW)) || h2s->subs)) {
+ TRACE_DEVEL("leaving on stream blocked", H2_EV_STRM_END|H2_EV_H2S_BLK, h2c->conn, h2s);
+ /* refresh the timeout if none was active, so that the last
+ * leaving stream may arm it.
+ */
+ if (h2c->task && !tick_isset(h2c->task->expire))
+ h2c_update_timeout(h2c);
+ return;
+ }
+
+ if ((h2c->flags & H2_CF_DEM_BLOCK_ANY && h2s->id == h2c->dsi) ||
+ (h2c->flags & H2_CF_MUX_BLOCK_ANY && h2s->id == h2c->msi)) {
+ /* unblock the connection if it was blocked on this
+ * stream.
+ */
+ h2c->flags &= ~H2_CF_DEM_BLOCK_ANY;
+ h2c->flags &= ~H2_CF_MUX_BLOCK_ANY;
+ h2c_restart_reading(h2c, 1);
+ }
+
+ h2s_destroy(h2s);
+
+ if (h2c->flags & H2_CF_IS_BACK) {
+ if (!(h2c->conn->flags &
+ (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH))) {
+ if (h2c->conn->flags & CO_FL_PRIVATE) {
+ /* Add the connection in the session server list, if not already done */
+ if (!session_add_conn(sess, h2c->conn, h2c->conn->target)) {
+ h2c->conn->owner = NULL;
+ if (eb_is_empty(&h2c->streams_by_id)) {
+ h2c->conn->mux->destroy(h2c);
+ TRACE_DEVEL("leaving on error after killing outgoing connection", H2_EV_STRM_END|H2_EV_H2C_ERR);
+ return;
+ }
+ }
+ if (eb_is_empty(&h2c->streams_by_id)) {
+ if (session_check_idle_conn(h2c->conn->owner, h2c->conn) != 0) {
+ /* At this point either the connection is destroyed, or it's been added to the server idle list, just stop */
+ TRACE_DEVEL("leaving without reusable idle connection", H2_EV_STRM_END);
+ return;
+ }
+ }
+ }
+ else {
+ if (eb_is_empty(&h2c->streams_by_id)) {
+ /* If the connection is owned by the session, first remove it
+ * from its list
+ */
+ if (h2c->conn->owner) {
+ session_unown_conn(h2c->conn->owner, h2c->conn);
+ h2c->conn->owner = NULL;
+ }
+
+ /* mark that the tasklet may lose its context to another thread and
+ * that the handler needs to check it under the idle conns lock.
+ */
+ HA_ATOMIC_OR(&h2c->wait_event.tasklet->state, TASK_F_USR1);
+ xprt_set_idle(h2c->conn, h2c->conn->xprt, h2c->conn->xprt_ctx);
+
+ if (!srv_add_to_idle_list(objt_server(h2c->conn->target), h2c->conn, 1)) {
+ /* The server doesn't want it, let's kill the connection right away */
+ h2c->conn->mux->destroy(h2c);
+ TRACE_DEVEL("leaving on error after killing outgoing connection", H2_EV_STRM_END|H2_EV_H2C_ERR);
+ return;
+ }
+ /* At this point, the connection has been added to the
+ * server idle list, so another thread may already have
+ * hijacked it, so we can't do anything with it.
+ */
+ TRACE_DEVEL("reusable idle connection", H2_EV_STRM_END);
+ return;
+
+ }
+ else if (!h2c->conn->hash_node->node.node.leaf_p &&
+ h2_avail_streams(h2c->conn) > 0 && objt_server(h2c->conn->target) &&
+ !LIST_INLIST(&h2c->conn->session_list)) {
+ eb64_insert(&__objt_server(h2c->conn->target)->per_thr[tid].avail_conns,
+ &h2c->conn->hash_node->node);
+ }
+ }
+ }
+ }
+
+ /* We don't want to close right now unless we're removing the
+ * last stream, and either the connection is in error, or it
+ * reached the ID already specified in a GOAWAY frame received
+ * or sent (as seen by last_sid >= 0).
+ */
+ if (h2c_is_dead(h2c)) {
+ /* no more stream will come, kill it now */
+ TRACE_DEVEL("leaving and killing dead connection", H2_EV_STRM_END, h2c->conn);
+ h2_release(h2c);
+ }
+ else if (h2c->task) {
+ h2c_update_timeout(h2c);
+ TRACE_DEVEL("leaving, refreshing connection's timeout", H2_EV_STRM_END, h2c->conn);
+ }
+ else
+ TRACE_DEVEL("leaving", H2_EV_STRM_END, h2c->conn);
+}
+
+/* Performs a synchronous or asynchronous shutr(). */
+static void h2_do_shutr(struct h2s *h2s)
+{
+ struct h2c *h2c = h2s->h2c;
+
+ if (h2s->st == H2_SS_CLOSED)
+ goto done;
+
+ TRACE_ENTER(H2_EV_STRM_SHUT, h2c->conn, h2s);
+
+ /* a connstream may require us to immediately kill the whole connection
+ * for example because of a "tcp-request content reject" rule that is
+ * normally used to limit abuse. In this case we schedule a goaway to
+ * close the connection.
+ */
+ if (se_fl_test(h2s->sd, SE_FL_KILL_CONN) &&
+ !(h2c->flags & (H2_CF_GOAWAY_SENT|H2_CF_GOAWAY_FAILED))) {
+ TRACE_STATE("stream wants to kill the connection", H2_EV_STRM_SHUT, h2c->conn, h2s);
+ h2c_error(h2c, H2_ERR_ENHANCE_YOUR_CALM);
+ h2s_error(h2s, H2_ERR_ENHANCE_YOUR_CALM);
+ }
+ else if (!(h2s->flags & H2_SF_HEADERS_SENT)) {
+ /* Nothing was never sent for this stream, so reset with
+ * REFUSED_STREAM error to let the client retry the
+ * request.
+ */
+ TRACE_STATE("no headers sent yet, trying a retryable abort", H2_EV_STRM_SHUT, h2c->conn, h2s);
+ h2s_error(h2s, H2_ERR_REFUSED_STREAM);
+ }
+ else {
+ /* a final response was already provided, we don't want this
+ * stream anymore. This may happen when the server responds
+ * before the end of an upload and closes quickly (redirect,
+ * deny, ...)
+ */
+ h2s_error(h2s, H2_ERR_CANCEL);
+ }
+
+ if (!(h2s->flags & H2_SF_RST_SENT) &&
+ h2s_send_rst_stream(h2c, h2s) <= 0)
+ goto add_to_list;
+
+ if (!(h2c->wait_event.events & SUB_RETRY_SEND))
+ tasklet_wakeup(h2c->wait_event.tasklet);
+ h2s_close(h2s);
+ done:
+ h2s->flags &= ~H2_SF_WANT_SHUTR;
+ TRACE_LEAVE(H2_EV_STRM_SHUT, h2c->conn, h2s);
+ return;
+add_to_list:
+ /* Let the handler know we want to shutr, and add ourselves to the
+ * most relevant list if not yet done. h2_deferred_shut() will be
+ * automatically called via the shut_tl tasklet when there's room
+ * again.
+ */
+ h2s->flags |= H2_SF_WANT_SHUTR;
+ if (!LIST_INLIST(&h2s->list)) {
+ if (h2s->flags & H2_SF_BLK_MFCTL)
+ LIST_APPEND(&h2c->fctl_list, &h2s->list);
+ else if (h2s->flags & (H2_SF_BLK_MBUSY|H2_SF_BLK_MROOM))
+ LIST_APPEND(&h2c->send_list, &h2s->list);
+ }
+ TRACE_LEAVE(H2_EV_STRM_SHUT, h2c->conn, h2s);
+ return;
+}
+
+/* Performs a synchronous or asynchronous shutw(). */
+static void h2_do_shutw(struct h2s *h2s)
+{
+ struct h2c *h2c = h2s->h2c;
+
+ if (h2s->st == H2_SS_HLOC || h2s->st == H2_SS_CLOSED)
+ goto done;
+
+ TRACE_ENTER(H2_EV_STRM_SHUT, h2c->conn, h2s);
+
+ if (h2s->st != H2_SS_ERROR && (h2s->flags & H2_SF_HEADERS_SENT)) {
+ /* we can cleanly close using an empty data frame only after headers */
+
+ if (!(h2s->flags & (H2_SF_ES_SENT|H2_SF_RST_SENT)) &&
+ h2_send_empty_data_es(h2s) <= 0)
+ goto add_to_list;
+
+ if (h2s->st == H2_SS_HREM)
+ h2s_close(h2s);
+ else
+ h2s->st = H2_SS_HLOC;
+ } else {
+ /* a connstream may require us to immediately kill the whole connection
+ * for example because of a "tcp-request content reject" rule that is
+ * normally used to limit abuse. In this case we schedule a goaway to
+ * close the connection.
+ */
+ if (se_fl_test(h2s->sd, SE_FL_KILL_CONN) &&
+ !(h2c->flags & (H2_CF_GOAWAY_SENT|H2_CF_GOAWAY_FAILED))) {
+ TRACE_STATE("stream wants to kill the connection", H2_EV_STRM_SHUT, h2c->conn, h2s);
+ h2c_error(h2c, H2_ERR_ENHANCE_YOUR_CALM);
+ h2s_error(h2s, H2_ERR_ENHANCE_YOUR_CALM);
+ }
+ else {
+ /* Nothing was never sent for this stream, so reset with
+ * REFUSED_STREAM error to let the client retry the
+ * request.
+ */
+ TRACE_STATE("no headers sent yet, trying a retryable abort", H2_EV_STRM_SHUT, h2c->conn, h2s);
+ h2s_error(h2s, H2_ERR_REFUSED_STREAM);
+ }
+
+ if (!(h2s->flags & H2_SF_RST_SENT) &&
+ h2s_send_rst_stream(h2c, h2s) <= 0)
+ goto add_to_list;
+
+ h2s_close(h2s);
+ }
+
+ if (!(h2c->wait_event.events & SUB_RETRY_SEND))
+ tasklet_wakeup(h2c->wait_event.tasklet);
+
+ TRACE_LEAVE(H2_EV_STRM_SHUT, h2c->conn, h2s);
+
+ done:
+ h2s->flags &= ~H2_SF_WANT_SHUTW;
+ return;
+
+ add_to_list:
+ /* Let the handler know we want to shutw, and add ourselves to the
+ * most relevant list if not yet done. h2_deferred_shut() will be
+ * automatically called via the shut_tl tasklet when there's room
+ * again.
+ */
+ h2s->flags |= H2_SF_WANT_SHUTW;
+ if (!LIST_INLIST(&h2s->list)) {
+ if (h2s->flags & H2_SF_BLK_MFCTL)
+ LIST_APPEND(&h2c->fctl_list, &h2s->list);
+ else if (h2s->flags & (H2_SF_BLK_MBUSY|H2_SF_BLK_MROOM))
+ LIST_APPEND(&h2c->send_list, &h2s->list);
+ }
+ TRACE_LEAVE(H2_EV_STRM_SHUT, h2c->conn, h2s);
+ return;
+}
+
+/* This is the tasklet referenced in h2s->shut_tl, it is used for
+ * deferred shutdowns when the h2_detach() was done but the mux buffer was full
+ * and prevented the last frame from being emitted.
+ */
+struct task *h2_deferred_shut(struct task *t, void *ctx, unsigned int state)
+{
+ struct h2s *h2s = ctx;
+ struct h2c *h2c = h2s->h2c;
+
+ TRACE_ENTER(H2_EV_STRM_SHUT, h2c->conn, h2s);
+
+ if (h2s->flags & H2_SF_NOTIFIED) {
+ /* some data processing remains to be done first */
+ goto end;
+ }
+
+ if (h2s->flags & H2_SF_WANT_SHUTW)
+ h2_do_shutw(h2s);
+
+ if (h2s->flags & H2_SF_WANT_SHUTR)
+ h2_do_shutr(h2s);
+
+ if (!(h2s->flags & (H2_SF_WANT_SHUTR|H2_SF_WANT_SHUTW))) {
+ /* We're done trying to send, remove ourself from the send_list */
+ LIST_DEL_INIT(&h2s->list);
+
+ if (!h2s_sc(h2s)) {
+ h2s_destroy(h2s);
+ if (h2c_is_dead(h2c)) {
+ h2_release(h2c);
+ t = NULL;
+ }
+ }
+ }
+ end:
+ TRACE_LEAVE(H2_EV_STRM_SHUT);
+ return t;
+}
+
+/* shutr() called by the stream connector (mux_ops.shutr) */
+static void h2_shutr(struct stconn *sc, enum co_shr_mode mode)
+{
+ struct h2s *h2s = __sc_mux_strm(sc);
+
+ TRACE_ENTER(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s);
+ if (mode)
+ h2_do_shutr(h2s);
+ TRACE_LEAVE(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s);
+}
+
+/* shutw() called by the stream connector (mux_ops.shutw) */
+static void h2_shutw(struct stconn *sc, enum co_shw_mode mode)
+{
+ struct h2s *h2s = __sc_mux_strm(sc);
+
+ TRACE_ENTER(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s);
+ h2_do_shutw(h2s);
+ TRACE_LEAVE(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s);
+}
+
+/* Decode the payload of a HEADERS frame and produce the HTX request or response
+ * depending on the connection's side. Returns a positive value on success, a
+ * negative value on failure, or 0 if it couldn't proceed. May report connection
+ * errors in h2c->errcode if the frame is non-decodable and the connection
+ * unrecoverable. In absence of connection error when a failure is reported, the
+ * caller must assume a stream error.
+ *
+ * The function may fold CONTINUATION frames into the initial HEADERS frame
+ * by removing padding and next frame header, then moving the CONTINUATION
+ * frame's payload and adjusting h2c->dfl to match the new aggregated frame,
+ * leaving a hole between the main frame and the beginning of the next one.
+ * The possibly remaining incomplete or next frame at the end may be moved
+ * if the aggregated frame is not deleted, in order to fill the hole. Wrapped
+ * HEADERS frames are unwrapped into a temporary buffer before decoding.
+ *
+ * A buffer at the beginning of processing may look like this :
+ *
+ * ,---.---------.-----.--------------.--------------.------.---.
+ * |///| HEADERS | PAD | CONTINUATION | CONTINUATION | DATA |///|
+ * `---^---------^-----^--------------^--------------^------^---'
+ * | | <-----> | |
+ * area | dpl | wrap
+ * |<--------------> |
+ * | dfl |
+ * |<-------------------------------------------------->|
+ * head data
+ *
+ * Padding is automatically overwritten when folding, participating to the
+ * hole size after dfl :
+ *
+ * ,---.------------------------.-----.--------------.------.---.
+ * |///| HEADERS : CONTINUATION |/////| CONTINUATION | DATA |///|
+ * `---^------------------------^-----^--------------^------^---'
+ * | | <-----> | |
+ * area | hole | wrap
+ * |<-----------------------> |
+ * | dfl |
+ * |<-------------------------------------------------->|
+ * head data
+ *
+ * Please note that the HEADERS frame is always deprived from its PADLEN byte
+ * however it may start with the 5 stream-dep+weight bytes in case of PRIORITY
+ * bit.
+ *
+ * The <flags> field must point to either the stream's flags or to a copy of it
+ * so that the function can update the following flags :
+ * - H2_SF_DATA_CLEN when content-length is seen
+ * - H2_SF_HEADERS_RCVD once the frame is successfully decoded
+ *
+ * The H2_SF_HEADERS_RCVD flag is also looked at in the <flags> field prior to
+ * decoding, in order to detect if we're dealing with a headers or a trailers
+ * block (the trailers block appears after H2_SF_HEADERS_RCVD was seen).
+ */
+static int h2c_decode_headers(struct h2c *h2c, struct buffer *rxbuf, uint32_t *flags, unsigned long long *body_len, char *upgrade_protocol)
+{
+ const uint8_t *hdrs = (uint8_t *)b_head(&h2c->dbuf);
+ struct buffer *tmp = get_trash_chunk();
+ struct http_hdr list[global.tune.max_http_hdr * 2];
+ struct buffer *copy = NULL;
+ unsigned int msgf;
+ struct htx *htx = NULL;
+ int flen; // header frame len
+ int hole = 0;
+ int ret = 0;
+ int outlen;
+ int wrap;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn);
+
+next_frame:
+ if (b_data(&h2c->dbuf) - hole < h2c->dfl)
+ goto leave; // incomplete input frame
+
+ /* No END_HEADERS means there's one or more CONTINUATION frames. In
+ * this case, we'll try to paste it immediately after the initial
+ * HEADERS frame payload and kill any possible padding. The initial
+ * frame's length will be increased to represent the concatenation
+ * of the two frames. The next frame is read from position <tlen>
+ * and written at position <flen> (minus padding if some is present).
+ */
+ if (unlikely(!(h2c->dff & H2_F_HEADERS_END_HEADERS))) {
+ struct h2_fh hdr;
+ int clen; // CONTINUATION frame's payload length
+
+ TRACE_STATE("EH missing, expecting continuation frame", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_HDR, h2c->conn);
+ if (!h2_peek_frame_hdr(&h2c->dbuf, h2c->dfl + hole, &hdr)) {
+ /* no more data, the buffer may be full, either due to
+ * too large a frame or because of too large a hole that
+ * we're going to compact at the end.
+ */
+ goto leave;
+ }
+
+ if (hdr.ft != H2_FT_CONTINUATION) {
+ /* RFC7540#6.10: frame of unexpected type */
+ TRACE_STATE("not continuation!", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_HDR|H2_EV_RX_CONT|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto fail;
+ }
+
+ if (hdr.sid != h2c->dsi) {
+ /* RFC7540#6.10: frame of different stream */
+ TRACE_STATE("different stream ID!", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_HDR|H2_EV_RX_CONT|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto fail;
+ }
+
+ if ((unsigned)hdr.len > (unsigned)global.tune.bufsize) {
+ /* RFC7540#4.2: invalid frame length */
+ TRACE_STATE("too large frame!", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_HDR|H2_EV_RX_CONT|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_FRAME_SIZE_ERROR);
+ goto fail;
+ }
+
+ /* detect when we must stop aggragating frames */
+ h2c->dff |= hdr.ff & H2_F_HEADERS_END_HEADERS;
+
+ /* Take as much as we can of the CONTINUATION frame's payload */
+ clen = b_data(&h2c->dbuf) - (h2c->dfl + hole + 9);
+ if (clen > hdr.len)
+ clen = hdr.len;
+
+ /* Move the frame's payload over the padding, hole and frame
+ * header. At least one of hole or dpl is null (see diagrams
+ * above). The hole moves after the new aggragated frame.
+ */
+ b_move(&h2c->dbuf, b_peek_ofs(&h2c->dbuf, h2c->dfl + hole + 9), clen, -(h2c->dpl + hole + 9));
+ h2c->dfl += hdr.len - h2c->dpl;
+ hole += h2c->dpl + 9;
+ h2c->dpl = 0;
+ TRACE_STATE("waiting for next continuation frame", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_CONT|H2_EV_RX_HDR, h2c->conn);
+ goto next_frame;
+ }
+
+ flen = h2c->dfl - h2c->dpl;
+
+ /* if the input buffer wraps, take a temporary copy of it (rare) */
+ wrap = b_wrap(&h2c->dbuf) - b_head(&h2c->dbuf);
+ if (wrap < h2c->dfl) {
+ copy = alloc_trash_chunk();
+ if (!copy) {
+ TRACE_DEVEL("failed to allocate temporary buffer", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ goto fail;
+ }
+ memcpy(copy->area, b_head(&h2c->dbuf), wrap);
+ memcpy(copy->area + wrap, b_orig(&h2c->dbuf), h2c->dfl - wrap);
+ hdrs = (uint8_t *) copy->area;
+ }
+
+ /* Skip StreamDep and weight for now (we don't support PRIORITY) */
+ if (h2c->dff & H2_F_HEADERS_PRIORITY) {
+ if (read_n32(hdrs) == h2c->dsi) {
+ /* RFC7540#5.3.1 : stream dep may not depend on itself */
+ TRACE_STATE("invalid stream dependency!", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto fail;
+ }
+
+ if (flen < 5) {
+ TRACE_STATE("frame too short for priority!", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_FRAME_SIZE_ERROR);
+ goto fail;
+ }
+
+ hdrs += 5; // stream dep = 4, weight = 1
+ flen -= 5;
+ }
+
+ if (!h2_get_buf(h2c, rxbuf)) {
+ TRACE_STATE("waiting for h2c rxbuf allocation", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_BLK, h2c->conn);
+ h2c->flags |= H2_CF_DEM_SALLOC;
+ goto leave;
+ }
+
+ /* we can't retry a failed decompression operation so we must be very
+ * careful not to take any risks. In practice the output buffer is
+ * always empty except maybe for trailers, in which case we simply have
+ * to wait for the upper layer to finish consuming what is available.
+ */
+ htx = htx_from_buf(rxbuf);
+ if (!htx_is_empty(htx)) {
+ TRACE_STATE("waiting for room in h2c rxbuf", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_BLK, h2c->conn);
+ h2c->flags |= H2_CF_DEM_SFULL;
+ goto leave;
+ }
+
+ /* past this point we cannot roll back in case of error */
+ outlen = hpack_decode_frame(h2c->ddht, hdrs, flen, list,
+ sizeof(list)/sizeof(list[0]), tmp);
+
+ if (outlen > 0 &&
+ (TRACE_SOURCE)->verbosity >= H2_VERB_ADVANCED &&
+ TRACE_ENABLED(TRACE_LEVEL_USER, H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, 0, 0, 0)) {
+ struct ist n;
+ int i;
+
+ for (i = 0; list[i].n.len; i++) {
+ n = list[i].n;
+
+ if (!isttest(n)) {
+ /* this is in fact a pseudo header whose number is in n.len */
+ n = h2_phdr_to_ist(n.len);
+ }
+
+ h2_trace_header(n, list[i].v, H2_EV_RX_FRAME|H2_EV_RX_HDR,
+ ist(TRC_LOC), __FUNCTION__, h2c, NULL);
+ }
+ }
+
+ if (outlen < 0) {
+ TRACE_STATE("failed to decompress HPACK", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_COMPRESSION_ERROR);
+ goto fail;
+ }
+
+ /* The PACK decompressor was updated, let's update the input buffer and
+ * the parser's state to commit these changes and allow us to later
+ * fail solely on the stream if needed.
+ */
+ b_del(&h2c->dbuf, h2c->dfl + hole);
+ h2c->dfl = hole = 0;
+ h2c->st0 = H2_CS_FRAME_H;
+
+ /* OK now we have our header list in <list> */
+ msgf = (h2c->dff & H2_F_HEADERS_END_STREAM) ? 0 : H2_MSGF_BODY;
+ msgf |= (*flags & H2_SF_BODY_TUNNEL) ? H2_MSGF_BODY_TUNNEL: 0;
+ /* If an Extended CONNECT has been sent on this stream, set message flag
+ * to convert 200 response to 101 htx response */
+ msgf |= (*flags & H2_SF_EXT_CONNECT_SENT) ? H2_MSGF_EXT_CONNECT: 0;
+
+ if (*flags & H2_SF_HEADERS_RCVD)
+ goto trailers;
+
+ /* This is the first HEADERS frame so it's a headers block */
+ if (h2c->flags & H2_CF_IS_BACK)
+ outlen = h2_make_htx_response(list, htx, &msgf, body_len, upgrade_protocol);
+ else
+ outlen = h2_make_htx_request(list, htx, &msgf, body_len);
+
+ if (outlen < 0 || htx_free_space(htx) < global.tune.maxrewrite) {
+ /* too large headers? this is a stream error only */
+ TRACE_STATE("message headers too large", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2S_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ goto fail;
+ }
+
+ if (msgf & H2_MSGF_BODY) {
+ /* a payload is present */
+ if (msgf & H2_MSGF_BODY_CL) {
+ *flags |= H2_SF_DATA_CLEN;
+ htx->extra = *body_len;
+ }
+ }
+ if (msgf & H2_MSGF_BODYLESS_RSP)
+ *flags |= H2_SF_BODYLESS_RESP;
+
+ if (msgf & H2_MSGF_BODY_TUNNEL)
+ *flags |= H2_SF_BODY_TUNNEL;
+ else {
+ /* Abort the tunnel attempt, if any */
+ if (*flags & H2_SF_BODY_TUNNEL)
+ *flags |= H2_SF_TUNNEL_ABRT;
+ *flags &= ~H2_SF_BODY_TUNNEL;
+ }
+
+ done:
+ /* indicate that a HEADERS frame was received for this stream, except
+ * for 1xx responses. For 1xx responses, another HEADERS frame is
+ * expected.
+ */
+ if (!(msgf & H2_MSGF_RSP_1XX))
+ *flags |= H2_SF_HEADERS_RCVD;
+
+ if (h2c->dff & H2_F_HEADERS_END_STREAM) {
+ if (msgf & H2_MSGF_RSP_1XX) {
+ /* RFC9113#8.1 : HEADERS frame with the ES flag set that carries an informational status code is malformed */
+ TRACE_STATE("invalid interim response with ES flag!", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ goto fail;
+ }
+ /* no more data are expected for this message */
+ htx->flags |= HTX_FL_EOM;
+ }
+
+ if (msgf & H2_MSGF_EXT_CONNECT)
+ *flags |= H2_SF_EXT_CONNECT_RCVD;
+
+ /* success */
+ ret = 1;
+
+ leave:
+ /* If there is a hole left and it's not at the end, we are forced to
+ * move the remaining data over it.
+ */
+ if (hole) {
+ if (b_data(&h2c->dbuf) > h2c->dfl + hole)
+ b_move(&h2c->dbuf, b_peek_ofs(&h2c->dbuf, h2c->dfl + hole),
+ b_data(&h2c->dbuf) - (h2c->dfl + hole), -hole);
+ b_sub(&h2c->dbuf, hole);
+ }
+
+ if (b_full(&h2c->dbuf) && h2c->dfl) {
+ /* too large frames */
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = -1;
+ }
+
+ if (htx)
+ htx_to_buf(htx, rxbuf);
+ free_trash_chunk(copy);
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn);
+ return ret;
+
+ fail:
+ ret = -1;
+ goto leave;
+
+ trailers:
+ /* This is the last HEADERS frame hence a trailer */
+ if (!(h2c->dff & H2_F_HEADERS_END_STREAM)) {
+ /* It's a trailer but it's missing ES flag */
+ TRACE_STATE("missing EH on trailers frame", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto fail;
+ }
+
+ /* Trailers terminate a DATA sequence */
+ if (h2_make_htx_trailers(list, htx) <= 0) {
+ TRACE_STATE("failed to append HTX trailers into rxbuf", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2S_ERR, h2c->conn);
+ goto fail;
+ }
+ goto done;
+}
+
+/* Transfer the payload of a DATA frame to the HTTP/1 side. The HTTP/2 frame
+ * parser state is automatically updated. Returns > 0 if it could completely
+ * send the current frame, 0 if it couldn't complete, in which case
+ * SE_FL_RCV_MORE must be checked to know if some data remain pending (an empty
+ * DATA frame can return 0 as a valid result). Stream errors are reported in
+ * h2s->errcode and connection errors in h2c->errcode. The caller must already
+ * have checked the frame header and ensured that the frame was complete or the
+ * buffer full. It changes the frame state to FRAME_A once done.
+ */
+static int h2_frt_transfer_data(struct h2s *h2s)
+{
+ struct h2c *h2c = h2s->h2c;
+ int block;
+ unsigned int flen = 0;
+ struct htx *htx = NULL;
+ struct buffer *scbuf;
+ unsigned int sent;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+
+ h2c->flags &= ~H2_CF_DEM_SFULL;
+
+ scbuf = h2_get_buf(h2c, &h2s->rxbuf);
+ if (!scbuf) {
+ h2c->flags |= H2_CF_DEM_SALLOC;
+ TRACE_STATE("waiting for an h2s rxbuf", H2_EV_RX_FRAME|H2_EV_RX_DATA|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto fail;
+ }
+ htx = htx_from_buf(scbuf);
+
+try_again:
+ flen = h2c->dfl - h2c->dpl;
+ if (!flen)
+ goto end_transfer;
+
+ if (flen > b_data(&h2c->dbuf)) {
+ flen = b_data(&h2c->dbuf);
+ if (!flen)
+ goto fail;
+ }
+
+ block = htx_free_data_space(htx);
+ if (!block) {
+ h2c->flags |= H2_CF_DEM_SFULL;
+ TRACE_STATE("h2s rxbuf is full", H2_EV_RX_FRAME|H2_EV_RX_DATA|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto fail;
+ }
+ if (flen > block)
+ flen = block;
+
+ /* here, flen is the max we can copy into the output buffer */
+ block = b_contig_data(&h2c->dbuf, 0);
+ if (flen > block)
+ flen = block;
+
+ sent = htx_add_data(htx, ist2(b_head(&h2c->dbuf), flen));
+ TRACE_DATA("move some data to h2s rxbuf", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s, 0, (void *)(long)sent);
+
+ b_del(&h2c->dbuf, sent);
+ h2c->dfl -= sent;
+ h2c->rcvd_c += sent;
+ h2c->rcvd_s += sent; // warning, this can also affect the closed streams!
+
+ if (h2s->flags & H2_SF_DATA_CLEN) {
+ h2s->body_len -= sent;
+ htx->extra = h2s->body_len;
+ }
+
+ if (sent < flen) {
+ h2c->flags |= H2_CF_DEM_SFULL;
+ TRACE_STATE("h2s rxbuf is full", H2_EV_RX_FRAME|H2_EV_RX_DATA|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto fail;
+ }
+
+ goto try_again;
+
+ end_transfer:
+ /* here we're done with the frame, all the payload (except padding) was
+ * transferred.
+ */
+
+ if (!(h2s->flags & H2_SF_BODY_TUNNEL) && (h2c->dff & H2_F_DATA_END_STREAM)) {
+ /* no more data are expected for this message. This add the EOM
+ * flag but only on the response path or if no tunnel attempt
+ * was aborted. Otherwise (request path + tunnel abrted), the
+ * EOM was already reported.
+ */
+ if ((h2c->flags & H2_CF_IS_BACK) || !(h2s->flags & H2_SF_TUNNEL_ABRT)) {
+ /* If we receive an empty DATA frame with ES flag while the HTX
+ * message is empty, we must be sure to push a block to be sure
+ * the HTX EOM flag will be handled on the other side. It is a
+ * workaround because for now it is not possible to push empty
+ * HTX DATA block. And without this block, there is no way to
+ * "commit" the end of the message.
+ */
+ if (htx_is_empty(htx)) {
+ if (!htx_add_endof(htx, HTX_BLK_EOT))
+ goto fail;
+ }
+ htx->flags |= HTX_FL_EOM;
+ }
+ }
+
+ h2c->rcvd_c += h2c->dpl;
+ h2c->rcvd_s += h2c->dpl;
+ h2c->dpl = 0;
+ h2c->st0 = H2_CS_FRAME_A; // send the corresponding window update
+ htx_to_buf(htx, scbuf);
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ return 1;
+ fail:
+ if (htx)
+ htx_to_buf(htx, scbuf);
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ return 0;
+}
+
+/* Try to send a HEADERS frame matching HTX response present in HTX message
+ * <htx> for the H2 stream <h2s>. Returns the number of bytes sent. The caller
+ * must check the stream's status to detect any error which might have happened
+ * subsequently to a successful send. The htx blocks are automatically removed
+ * from the message. The htx message is assumed to be valid since produced from
+ * the internal code, hence it contains a start line, an optional series of
+ * header blocks and an end of header, otherwise an invalid frame could be
+ * emitted and the resulting htx message could be left in an inconsistent state.
+ */
+static size_t h2s_frt_make_resp_headers(struct h2s *h2s, struct htx *htx)
+{
+ struct http_hdr list[global.tune.max_http_hdr];
+ struct h2c *h2c = h2s->h2c;
+ struct htx_blk *blk;
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ struct htx_sl *sl;
+ enum htx_blk_type type;
+ int es_now = 0;
+ int ret = 0;
+ int hdr;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+
+ if (h2c_mux_busy(h2c, h2s)) {
+ TRACE_STATE("mux output busy", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+ h2s->flags |= H2_SF_BLK_MBUSY;
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+ return 0;
+ }
+
+ /* get the start line (we do have one) and the rest of the headers,
+ * that we dump starting at header 0 */
+ sl = NULL;
+ hdr = 0;
+ for (blk = htx_get_head_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_UNUSED)
+ continue;
+
+ if (type == HTX_BLK_EOH)
+ break;
+
+ if (type == HTX_BLK_HDR) {
+ BUG_ON(!sl); /* The start-line mut be defined before any headers */
+ if (unlikely(hdr >= sizeof(list)/sizeof(list[0]) - 1)) {
+ TRACE_ERROR("too many headers", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+
+ list[hdr].n = htx_get_blk_name(htx, blk);
+ list[hdr].v = htx_get_blk_value(htx, blk);
+ hdr++;
+ }
+ else if (type == HTX_BLK_RES_SL) {
+ BUG_ON(sl); /* Only one start-line expected */
+ sl = htx_get_blk_ptr(htx, blk);
+ h2s->status = sl->info.res.status;
+ if (h2s->status == 204 || h2s->status == 304)
+ h2s->flags |= H2_SF_BODYLESS_RESP;
+ if (h2s->status < 100 || h2s->status > 999) {
+ TRACE_ERROR("will not encode an invalid status code", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+ else if (h2s->status == 101) {
+ if (unlikely(h2s->flags & H2_SF_EXT_CONNECT_RCVD)) {
+ /* If an Extended CONNECT has been received, we need to convert 101 to 200 */
+ h2s->status = 200;
+ h2s->flags &= ~H2_SF_EXT_CONNECT_RCVD;
+ }
+ else {
+ /* Otherwise, 101 responses are not supported in H2, so return a error (RFC7540#8.1.1) */
+ TRACE_ERROR("will not encode an invalid status code", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+ }
+ else if ((h2s->flags & H2_SF_BODY_TUNNEL) && h2s->status >= 300) {
+ /* Abort the tunnel attempt */
+ h2s->flags &= ~H2_SF_BODY_TUNNEL;
+ h2s->flags |= H2_SF_TUNNEL_ABRT;
+ }
+ }
+ else {
+ TRACE_ERROR("will not encode unexpected htx block", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+ }
+
+ /* The start-line me be defined */
+ BUG_ON(!sl);
+
+ /* marker for end of headers */
+ list[hdr].n = ist("");
+
+ mbuf = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, mbuf)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ TRACE_STATE("waiting for room in output buffer", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_BLK, h2c->conn, h2s);
+ return 0;
+ }
+
+ chunk_reset(&outbuf);
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= 9 || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < 9)
+ goto full;
+
+ /* len: 0x000000 (fill later), type: 1(HEADERS), flags: ENDH=4 */
+ memcpy(outbuf.area, "\x00\x00\x00\x01\x04", 5);
+ write_n32(outbuf.area + 5, h2s->id); // 4 bytes
+ outbuf.data = 9;
+
+ if ((h2c->flags & (H2_CF_SHTS_UPDATED|H2_CF_DTSU_EMITTED)) == H2_CF_SHTS_UPDATED) {
+ /* SETTINGS_HEADER_TABLE_SIZE changed, we must send an HPACK
+ * dynamic table size update so that some clients are not
+ * confused. In practice we only need to send the DTSU when the
+ * advertised size is lower than the current one, and since we
+ * don't use it and don't care about the default 4096 bytes,
+ * we only ack it with a zero size thus we at most have to deal
+ * with this once. See RFC7541#4.2 and #6.3 for the spec, and
+ * below for the whole context and interoperability risks:
+ * https://lists.w3.org/Archives/Public/ietf-http-wg/2021OctDec/0235.html
+ */
+ if (b_room(&outbuf) < 1)
+ goto full;
+ outbuf.area[outbuf.data++] = 0x20; // HPACK DTSU 0 bytes
+
+ /* let's not update the flags now but only once the buffer is
+ * really committed.
+ */
+ }
+
+ /* encode status, which necessarily is the first one */
+ if (!hpack_encode_int_status(&outbuf, h2s->status)) {
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+
+ if ((TRACE_SOURCE)->verbosity >= H2_VERB_ADVANCED) {
+ char sts[4];
+
+ h2_trace_header(ist(":status"), ist(ultoa_r(h2s->status, sts, sizeof(sts))),
+ H2_EV_TX_FRAME|H2_EV_TX_HDR, ist(TRC_LOC), __FUNCTION__,
+ h2c, h2s);
+ }
+
+ /* encode all headers, stop at empty name */
+ for (hdr = 0; hdr < sizeof(list)/sizeof(list[0]); hdr++) {
+ /* these ones do not exist in H2 and must be dropped. */
+ if (isteq(list[hdr].n, ist("connection")) ||
+ isteq(list[hdr].n, ist("proxy-connection")) ||
+ isteq(list[hdr].n, ist("keep-alive")) ||
+ isteq(list[hdr].n, ist("upgrade")) ||
+ isteq(list[hdr].n, ist("transfer-encoding")))
+ continue;
+
+ /* Skip all pseudo-headers */
+ if (*(list[hdr].n.ptr) == ':')
+ continue;
+
+ if (isteq(list[hdr].n, ist("")))
+ break; // end
+
+ if (!h2_encode_header(&outbuf, list[hdr].n, list[hdr].v, H2_EV_TX_FRAME|H2_EV_TX_HDR,
+ ist(TRC_LOC), __FUNCTION__, h2c, h2s)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+ }
+
+ /* update the frame's size */
+ h2_set_frame_size(outbuf.area, outbuf.data - 9);
+
+ if (outbuf.data > h2c->mfs + 9) {
+ if (!h2_fragment_headers(&outbuf, h2c->mfs)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+ }
+
+ TRACE_USER("sent H2 response ", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s, htx);
+
+ /* remove all header blocks including the EOH and compute the
+ * corresponding size.
+ */
+ ret = 0;
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ type = htx_get_blk_type(blk);
+ ret += htx_get_blksz(blk);
+ blk = htx_remove_blk(htx, blk);
+ /* The removed block is the EOH */
+ if (type == HTX_BLK_EOH)
+ break;
+ }
+
+ if (!h2s_sc(h2s) || se_fl_test(h2s->sd, SE_FL_SHW)) {
+ /* Response already closed: add END_STREAM */
+ es_now = 1;
+ }
+ else if ((htx->flags & HTX_FL_EOM) && htx_is_empty(htx) && h2s->status >= 200) {
+ /* EOM+empty: we may need to add END_STREAM except for 1xx
+ * responses and tunneled response.
+ */
+ if (!(h2s->flags & H2_SF_BODY_TUNNEL) || h2s->status >= 300)
+ es_now = 1;
+ }
+
+ if (es_now)
+ outbuf.area[4] |= H2_F_HEADERS_END_STREAM;
+
+ /* commit the H2 response */
+ b_add(mbuf, outbuf.data);
+
+ /* indicates the HEADERS frame was sent, except for 1xx responses. For
+ * 1xx responses, another HEADERS frame is expected.
+ */
+ if (h2s->status >= 200)
+ h2s->flags |= H2_SF_HEADERS_SENT;
+
+ if (h2c->flags & H2_CF_SHTS_UPDATED) {
+ /* was sent above */
+ h2c->flags |= H2_CF_DTSU_EMITTED;
+ h2c->flags &= ~H2_CF_SHTS_UPDATED;
+ }
+
+ if (es_now) {
+ h2s->flags |= H2_SF_ES_SENT;
+ TRACE_PROTO("setting ES on HEADERS frame", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s, htx);
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HLOC;
+ else
+ h2s_close(h2s);
+ }
+
+ /* OK we could properly deliver the response */
+ end:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+ return ret;
+ full:
+ if ((mbuf = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ ret = 0;
+ TRACE_STATE("mux buffer full", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto end;
+ fail:
+ /* unparsable HTX messages, too large ones to be produced in the local
+ * list etc go here (unrecoverable errors).
+ */
+ h2s_error(h2s, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ goto end;
+}
+
+/* Try to send a HEADERS frame matching HTX request present in HTX message
+ * <htx> for the H2 stream <h2s>. Returns the number of bytes sent. The caller
+ * must check the stream's status to detect any error which might have happened
+ * subsequently to a successful send. The htx blocks are automatically removed
+ * from the message. The htx message is assumed to be valid since produced from
+ * the internal code, hence it contains a start line, an optional series of
+ * header blocks and an end of header, otherwise an invalid frame could be
+ * emitted and the resulting htx message could be left in an inconsistent state.
+ */
+static size_t h2s_bck_make_req_headers(struct h2s *h2s, struct htx *htx)
+{
+ struct http_hdr list[global.tune.max_http_hdr];
+ struct h2c *h2c = h2s->h2c;
+ struct htx_blk *blk;
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ struct htx_sl *sl;
+ struct ist meth, uri, auth, host = IST_NULL;
+ enum htx_blk_type type;
+ int es_now = 0;
+ int ret = 0;
+ int hdr;
+ int extended_connect = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+
+ if (h2c_mux_busy(h2c, h2s)) {
+ TRACE_STATE("mux output busy", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+ h2s->flags |= H2_SF_BLK_MBUSY;
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+ return 0;
+ }
+
+ /* get the start line (we do have one) and the rest of the headers,
+ * that we dump starting at header 0 */
+ sl = NULL;
+ hdr = 0;
+ for (blk = htx_get_head_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_UNUSED)
+ continue;
+
+ if (type == HTX_BLK_EOH)
+ break;
+
+ if (type == HTX_BLK_HDR) {
+ BUG_ON(!sl); /* The start-line mut be defined before any headers */
+ if (unlikely(hdr >= sizeof(list)/sizeof(list[0]) - 1)) {
+ TRACE_ERROR("too many headers", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+
+ list[hdr].n = htx_get_blk_name(htx, blk);
+ list[hdr].v = htx_get_blk_value(htx, blk);
+
+ /* Skip header if same name is used to add the server name */
+ if ((h2c->flags & H2_CF_IS_BACK) && isttest(h2c->proxy->server_id_hdr_name) &&
+ isteq(list[hdr].n, h2c->proxy->server_id_hdr_name))
+ continue;
+
+ /* Convert connection: upgrade to Extended connect from rfc 8441 */
+ if ((sl->flags & HTX_SL_F_CONN_UPG) && isteqi(list[hdr].n, ist("connection"))) {
+ /* rfc 7230 #6.1 Connection = list of tokens */
+ struct ist connection_ist = list[hdr].v;
+ do {
+ if (isteqi(iststop(connection_ist, ','),
+ ist("upgrade"))) {
+ if (!(h2c->flags & H2_CF_RCVD_RFC8441)) {
+ TRACE_STATE("reject upgrade because of no RFC8441 support", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+ goto fail;
+ }
+
+ TRACE_STATE("convert upgrade to extended connect method", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+ h2s->flags |= (H2_SF_BODY_TUNNEL|H2_SF_EXT_CONNECT_SENT);
+ sl->info.req.meth = HTTP_METH_CONNECT;
+ meth = ist("CONNECT");
+
+ extended_connect = 1;
+ break;
+ }
+
+ connection_ist = istadv(istfind(connection_ist, ','), 1);
+ } while (istlen(connection_ist));
+ }
+
+ if ((sl->flags & HTX_SL_F_CONN_UPG) && isteq(list[hdr].n, ist("upgrade"))) {
+ /* rfc 7230 #6.7 Upgrade = list of protocols
+ * rfc 8441 #4 Extended connect = :protocol is single-valued
+ *
+ * only first HTTP/1 protocol is preserved
+ */
+ const struct ist protocol = iststop(list[hdr].v, ',');
+ /* upgrade_protocol field is 16 bytes long in h2s */
+ istpad(h2s->upgrade_protocol, isttrim(protocol, 15));
+ }
+
+ if (isteq(list[hdr].n, ist("host")))
+ host = list[hdr].v;
+
+ hdr++;
+ }
+ else if (type == HTX_BLK_REQ_SL) {
+ BUG_ON(sl); /* Only one start-line expected */
+ sl = htx_get_blk_ptr(htx, blk);
+ meth = htx_sl_req_meth(sl);
+ uri = htx_sl_req_uri(sl);
+ if (sl->info.req.meth == HTTP_METH_HEAD)
+ h2s->flags |= H2_SF_BODYLESS_RESP;
+ if (unlikely(uri.len == 0)) {
+ TRACE_ERROR("no URI in HTX request", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+ }
+ else {
+ TRACE_ERROR("will not encode unexpected htx block", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+ }
+
+ /* The start-line me be defined */
+ BUG_ON(!sl);
+
+ /* Now add the server name to a header (if requested) */
+ if ((h2c->flags & H2_CF_IS_BACK) && isttest(h2c->proxy->server_id_hdr_name)) {
+ struct server *srv = objt_server(h2c->conn->target);
+
+ if (srv) {
+ list[hdr].n = h2c->proxy->server_id_hdr_name;
+ list[hdr].v = ist(srv->id);
+ hdr++;
+ }
+ }
+
+ /* marker for end of headers */
+ list[hdr].n = ist("");
+
+ mbuf = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, mbuf)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ TRACE_STATE("waiting for room in output buffer", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_BLK, h2c->conn, h2s);
+ return 0;
+ }
+
+ chunk_reset(&outbuf);
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= 9 || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < 9)
+ goto full;
+
+ /* len: 0x000000 (fill later), type: 1(HEADERS), flags: ENDH=4 */
+ memcpy(outbuf.area, "\x00\x00\x00\x01\x04", 5);
+ write_n32(outbuf.area + 5, h2s->id); // 4 bytes
+ outbuf.data = 9;
+
+ /* encode the method, which necessarily is the first one */
+ if (!hpack_encode_method(&outbuf, sl->info.req.meth, meth)) {
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+
+ h2_trace_header(ist(":method"), meth, H2_EV_TX_FRAME|H2_EV_TX_HDR, ist(TRC_LOC), __FUNCTION__, h2c, h2s);
+
+ auth = ist(NULL);
+
+ /* RFC7540 #8.3: the CONNECT method must have :
+ * - :authority set to the URI part (host:port)
+ * - :method set to CONNECT
+ * - :scheme and :path omitted
+ *
+ * Note that this is not applicable in case of the Extended CONNECT
+ * protocol from rfc 8441.
+ */
+ if (unlikely(sl->info.req.meth == HTTP_METH_CONNECT) && !extended_connect) {
+ auth = uri;
+
+ if (!h2_encode_header(&outbuf, ist(":authority"), auth, H2_EV_TX_FRAME|H2_EV_TX_HDR,
+ ist(TRC_LOC), __FUNCTION__, h2c, h2s)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+
+ h2s->flags |= H2_SF_BODY_TUNNEL;
+ } else {
+ /* other methods need a :scheme. If an authority is known from
+ * the request line, it must be sent, otherwise only host is
+ * sent. Host is never sent as the authority.
+ *
+ * This code is also applicable for Extended CONNECT protocol
+ * from rfc 8441.
+ */
+ struct ist scheme = { };
+
+ if (uri.ptr[0] != '/' && uri.ptr[0] != '*') {
+ /* the URI seems to start with a scheme */
+ int len = 1;
+
+ while (len < uri.len && uri.ptr[len] != ':')
+ len++;
+
+ if (len + 2 < uri.len && uri.ptr[len + 1] == '/' && uri.ptr[len + 2] == '/') {
+ /* make the uri start at the authority now */
+ scheme = ist2(uri.ptr, len);
+ uri = istadv(uri, len + 3);
+
+ /* find the auth part of the URI */
+ auth = ist2(uri.ptr, 0);
+ while (auth.len < uri.len && auth.ptr[auth.len] != '/')
+ auth.len++;
+
+ uri = istadv(uri, auth.len);
+ }
+ }
+
+ /* For Extended CONNECT, the :authority must be present.
+ * Use host value for it.
+ */
+ if (unlikely(extended_connect) && isttest(host))
+ auth = host;
+
+ if (!scheme.len) {
+ /* no explicit scheme, we're using an origin-form URI,
+ * probably from an H1 request transcoded to H2 via an
+ * external layer, then received as H2 without authority.
+ * So we have to look up the scheme from the HTX flags.
+ * In such a case only http and https are possible, and
+ * https is the default (sent by browsers).
+ */
+ if ((sl->flags & (HTX_SL_F_HAS_SCHM|HTX_SL_F_SCHM_HTTP)) == (HTX_SL_F_HAS_SCHM|HTX_SL_F_SCHM_HTTP))
+ scheme = ist("http");
+ else
+ scheme = ist("https");
+ }
+
+ if (!hpack_encode_scheme(&outbuf, scheme)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+
+ if (auth.len &&
+ !h2_encode_header(&outbuf, ist(":authority"), auth, H2_EV_TX_FRAME|H2_EV_TX_HDR,
+ ist(TRC_LOC), __FUNCTION__, h2c, h2s)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+
+ /* encode the path. RFC7540#8.1.2.3: if path is empty it must
+ * be sent as '/' or '*'.
+ */
+ if (unlikely(!uri.len)) {
+ if (sl->info.req.meth == HTTP_METH_OPTIONS)
+ uri = ist("*");
+ else
+ uri = ist("/");
+ }
+
+ if (!hpack_encode_path(&outbuf, uri)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+
+ h2_trace_header(ist(":path"), uri, H2_EV_TX_FRAME|H2_EV_TX_HDR, ist(TRC_LOC), __FUNCTION__, h2c, h2s);
+
+ /* encode the pseudo-header protocol from rfc8441 if using
+ * Extended CONNECT method.
+ */
+ if (unlikely(extended_connect)) {
+ const struct ist protocol = ist(h2s->upgrade_protocol);
+ if (isttest(protocol)) {
+ if (!h2_encode_header(&outbuf, ist(":protocol"), protocol, H2_EV_TX_FRAME|H2_EV_TX_HDR,
+ ist(TRC_LOC), __FUNCTION__, h2c, h2s)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+ }
+ }
+ }
+
+ /* encode all headers, stop at empty name. Host is only sent if we
+ * do not provide an authority.
+ */
+ for (hdr = 0; hdr < sizeof(list)/sizeof(list[0]); hdr++) {
+ struct ist n = list[hdr].n;
+ struct ist v = list[hdr].v;
+
+ /* these ones do not exist in H2 and must be dropped. */
+ if (isteq(n, ist("connection")) ||
+ (auth.len && isteq(n, ist("host"))) ||
+ isteq(n, ist("proxy-connection")) ||
+ isteq(n, ist("keep-alive")) ||
+ isteq(n, ist("upgrade")) ||
+ isteq(n, ist("transfer-encoding")))
+ continue;
+
+ if (isteq(n, ist("te"))) {
+ /* "te" may only be sent with "trailers" if this value
+ * is present, otherwise it must be deleted.
+ */
+ v = istist(v, ist("trailers"));
+ if (!isttest(v) || (v.len > 8 && v.ptr[8] != ','))
+ continue;
+ v = ist("trailers");
+ }
+
+ /* Skip all pseudo-headers */
+ if (*(n.ptr) == ':')
+ continue;
+
+ if (isteq(n, ist("")))
+ break; // end
+
+ if (!h2_encode_header(&outbuf, n, v, H2_EV_TX_FRAME|H2_EV_TX_HDR, ist(TRC_LOC), __FUNCTION__, h2c, h2s)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+ }
+
+ /* update the frame's size */
+ h2_set_frame_size(outbuf.area, outbuf.data - 9);
+
+ if (outbuf.data > h2c->mfs + 9) {
+ if (!h2_fragment_headers(&outbuf, h2c->mfs)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+ }
+
+ TRACE_USER("sent H2 request ", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s, htx);
+
+ /* remove all header blocks including the EOH and compute the
+ * corresponding size.
+ */
+ ret = 0;
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ type = htx_get_blk_type(blk);
+ ret += htx_get_blksz(blk);
+ blk = htx_remove_blk(htx, blk);
+ /* The removed block is the EOH */
+ if (type == HTX_BLK_EOH)
+ break;
+ }
+
+ if (!h2s_sc(h2s) || se_fl_test(h2s->sd, SE_FL_SHW)) {
+ /* Request already closed: add END_STREAM */
+ es_now = 1;
+ }
+ if ((htx->flags & HTX_FL_EOM) && htx_is_empty(htx)) {
+ /* EOM+empty: we may need to add END_STREAM (except for CONNECT
+ * request)
+ */
+ if (!(h2s->flags & H2_SF_BODY_TUNNEL))
+ es_now = 1;
+ }
+
+ if (es_now)
+ outbuf.area[4] |= H2_F_HEADERS_END_STREAM;
+
+ /* commit the H2 response */
+ b_add(mbuf, outbuf.data);
+ h2s->flags |= H2_SF_HEADERS_SENT;
+ h2s->st = H2_SS_OPEN;
+
+ if (es_now) {
+ TRACE_PROTO("setting ES on HEADERS frame", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s, htx);
+ // trim any possibly pending data (eg: inconsistent content-length)
+ h2s->flags |= H2_SF_ES_SENT;
+ h2s->st = H2_SS_HLOC;
+ }
+
+ end:
+ return ret;
+ full:
+ if ((mbuf = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ ret = 0;
+ TRACE_STATE("mux buffer full", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto end;
+ fail:
+ /* unparsable HTX messages, too large ones to be produced in the local
+ * list etc go here (unrecoverable errors).
+ */
+ h2s_error(h2s, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ goto end;
+}
+
+/* Try to send a DATA frame matching HTTP response present in HTX structure
+ * present in <buf>, for stream <h2s>. Returns the number of bytes sent. The
+ * caller must check the stream's status to detect any error which might have
+ * happened subsequently to a successful send. Returns the number of data bytes
+ * consumed, or zero if nothing done.
+ */
+static size_t h2s_make_data(struct h2s *h2s, struct buffer *buf, size_t count)
+{
+ struct h2c *h2c = h2s->h2c;
+ struct htx *htx;
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ size_t total = 0;
+ int es_now = 0;
+ int bsize; /* htx block size */
+ int fsize; /* h2 frame size */
+ struct htx_blk *blk;
+ enum htx_blk_type type;
+ int trunc_out; /* non-zero if truncated on out buf */
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+
+ if (h2c_mux_busy(h2c, h2s)) {
+ TRACE_STATE("mux output busy", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ h2s->flags |= H2_SF_BLK_MBUSY;
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto end;
+ }
+
+ htx = htx_from_buf(buf);
+
+ /* We only come here with HTX_BLK_DATA blocks */
+
+ new_frame:
+ if (!count || htx_is_empty(htx))
+ goto end;
+
+ if ((h2c->flags & H2_CF_IS_BACK) &&
+ (h2s->flags & (H2_SF_HEADERS_RCVD|H2_SF_BODY_TUNNEL)) == H2_SF_BODY_TUNNEL) {
+ /* The response HEADERS frame not received yet. Thus the tunnel
+ * is not fully established yet. In this situation, we block
+ * data sending.
+ */
+ h2s->flags |= H2_SF_BLK_MBUSY;
+ TRACE_STATE("Request DATA frame blocked waiting for tunnel establishment", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto end;
+ }
+ else if ((h2c->flags & H2_CF_IS_BACK) && (h2s->flags & H2_SF_TUNNEL_ABRT)) {
+ /* a tunnel attempt was aborted but the is pending raw data to xfer to the server.
+ * Thus the stream is closed with the CANCEL error. The error will be reported to
+ * the upper layer as aserver abort. But at this stage there is nothing more we can
+ * do. We just wait for the end of the response to be sure to not truncate it.
+ */
+ if (!(h2s->flags & H2_SF_ES_RCVD)) {
+ TRACE_STATE("Request DATA frame blocked waiting end of aborted tunnel", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ h2s->flags |= H2_SF_BLK_MBUSY;
+ }
+ else {
+ TRACE_ERROR("Request DATA frame for aborted tunnel", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ h2s_error(h2s, H2_ERR_CANCEL);
+ }
+ goto end;
+ }
+
+ blk = htx_get_head_blk(htx);
+ type = htx_get_blk_type(blk);
+ bsize = htx_get_blksz(blk);
+ fsize = bsize;
+ trunc_out = 0;
+ if (type != HTX_BLK_DATA)
+ goto end;
+
+ mbuf = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, mbuf)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ TRACE_STATE("waiting for room in output buffer", H2_EV_TX_FRAME|H2_EV_TX_DATA|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto end;
+ }
+
+ /* Perform some optimizations to reduce the number of buffer copies.
+ * First, if the mux's buffer is empty and the htx area contains
+ * exactly one data block of the same size as the requested count, and
+ * this count fits within the frame size, the stream's window size, and
+ * the connection's window size, then it's possible to simply swap the
+ * caller's buffer with the mux's output buffer and adjust offsets and
+ * length to match the entire DATA HTX block in the middle. In this
+ * case we perform a true zero-copy operation from end-to-end. This is
+ * the situation that happens all the time with large files. Second, if
+ * this is not possible, but the mux's output buffer is empty, we still
+ * have an opportunity to avoid the copy to the intermediary buffer, by
+ * making the intermediary buffer's area point to the output buffer's
+ * area. In this case we want to skip the HTX header to make sure that
+ * copies remain aligned and that this operation remains possible all
+ * the time. This goes for headers, data blocks and any data extracted
+ * from the HTX blocks.
+ */
+ if (unlikely(fsize == count &&
+ htx_nbblks(htx) == 1 && type == HTX_BLK_DATA &&
+ fsize <= h2s_mws(h2s) && fsize <= h2c->mws && fsize <= h2c->mfs)) {
+ void *old_area = mbuf->area;
+
+ if (b_data(mbuf)) {
+ /* Too bad there are data left there. We're willing to memcpy/memmove
+ * up to 1/4 of the buffer, which means that it's OK to copy a large
+ * frame into a buffer containing few data if it needs to be realigned,
+ * and that it's also OK to copy few data without realigning. Otherwise
+ * we'll pretend the mbuf is full and wait for it to become empty.
+ */
+ if (fsize + 9 <= b_room(mbuf) &&
+ (b_data(mbuf) <= b_size(mbuf) / 4 ||
+ (fsize <= b_size(mbuf) / 4 && fsize + 9 <= b_contig_space(mbuf)))) {
+ TRACE_STATE("small data present in output buffer, appending", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto copy;
+ }
+
+ if ((mbuf = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ TRACE_STATE("too large data present in output buffer, waiting for emptiness", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto end;
+ }
+
+ if (htx->flags & HTX_FL_EOM) {
+ /* EOM+empty: we may need to add END_STREAM (except for tunneled
+ * message)
+ */
+ if (!(h2s->flags & H2_SF_BODY_TUNNEL))
+ es_now = 1;
+ }
+ /* map an H2 frame to the HTX block so that we can put the
+ * frame header there.
+ */
+ *mbuf = b_make(buf->area, buf->size, sizeof(struct htx) + blk->addr - 9, fsize + 9);
+ outbuf.area = b_head(mbuf);
+
+ /* prepend an H2 DATA frame header just before the DATA block */
+ memcpy(outbuf.area, "\x00\x00\x00\x00\x00", 5);
+ write_n32(outbuf.area + 5, h2s->id); // 4 bytes
+ if (es_now)
+ outbuf.area[4] |= H2_F_DATA_END_STREAM;
+ h2_set_frame_size(outbuf.area, fsize);
+
+ /* update windows */
+ h2s->sws -= fsize;
+ h2c->mws -= fsize;
+
+ /* and exchange with our old area */
+ buf->area = old_area;
+ buf->data = buf->head = 0;
+ total += fsize;
+ fsize = 0;
+
+ TRACE_PROTO("sent H2 DATA frame (zero-copy)", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto out;
+ }
+
+ copy:
+ /* for DATA and EOM we'll have to emit a frame, even if empty */
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= 9 || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < 9) {
+ if ((mbuf = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ TRACE_STATE("output buffer full", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto end;
+ }
+
+ /* len: 0x000000 (fill later), type: 0(DATA), flags: none=0 */
+ memcpy(outbuf.area, "\x00\x00\x00\x00\x00", 5);
+ write_n32(outbuf.area + 5, h2s->id); // 4 bytes
+ outbuf.data = 9;
+
+ /* we have in <fsize> the exact number of bytes we need to copy from
+ * the HTX buffer. We need to check this against the connection's and
+ * the stream's send windows, and to ensure that this fits in the max
+ * frame size and in the buffer's available space minus 9 bytes (for
+ * the frame header). The connection's flow control is applied last so
+ * that we can use a separate list of streams which are immediately
+ * unblocked on window opening. Note: we don't implement padding.
+ */
+
+ if (!fsize)
+ goto send_empty;
+
+ if (h2s_mws(h2s) <= 0) {
+ h2s->flags |= H2_SF_BLK_SFCTL;
+ if (LIST_INLIST(&h2s->list))
+ LIST_DEL_INIT(&h2s->list);
+ LIST_APPEND(&h2c->blocked_list, &h2s->list);
+ TRACE_STATE("stream window <=0, flow-controlled", H2_EV_TX_FRAME|H2_EV_TX_DATA|H2_EV_H2S_FCTL, h2c->conn, h2s);
+ goto end;
+ }
+
+ if (fsize > count)
+ fsize = count;
+
+ if (fsize > h2s_mws(h2s))
+ fsize = h2s_mws(h2s); // >0
+
+ if (h2c->mfs && fsize > h2c->mfs)
+ fsize = h2c->mfs; // >0
+
+ if (fsize + 9 > outbuf.size) {
+ /* It doesn't fit at once. If it at least fits once split and
+ * the amount of data to move is low, let's defragment the
+ * buffer now.
+ */
+ if (b_space_wraps(mbuf) &&
+ (fsize + 9 <= b_room(mbuf)) &&
+ b_data(mbuf) <= MAX_DATA_REALIGN)
+ goto realign_again;
+ fsize = outbuf.size - 9;
+ trunc_out = 1;
+
+ if (fsize <= 0) {
+ /* no need to send an empty frame here */
+ if ((mbuf = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ TRACE_STATE("output buffer full", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto end;
+ }
+ }
+
+ if (h2c->mws <= 0) {
+ h2s->flags |= H2_SF_BLK_MFCTL;
+ TRACE_STATE("connection window <=0, stream flow-controlled", H2_EV_TX_FRAME|H2_EV_TX_DATA|H2_EV_H2C_FCTL, h2c->conn, h2s);
+ goto end;
+ }
+
+ if (fsize > h2c->mws)
+ fsize = h2c->mws;
+
+ /* now let's copy this this into the output buffer */
+ memcpy(outbuf.area + 9, htx_get_blk_ptr(htx, blk), fsize);
+ h2s->sws -= fsize;
+ h2c->mws -= fsize;
+ count -= fsize;
+
+ send_empty:
+ /* update the frame's size */
+ h2_set_frame_size(outbuf.area, fsize);
+
+ /* consume incoming HTX block */
+ total += fsize;
+ if (fsize == bsize) {
+ htx_remove_blk(htx, blk);
+ if ((htx->flags & HTX_FL_EOM) && htx_is_empty(htx)) {
+ /* EOM+empty: we may need to add END_STREAM (except for tunneled
+ * message)
+ */
+ if (!(h2s->flags & H2_SF_BODY_TUNNEL))
+ es_now = 1;
+ }
+ }
+ else {
+ /* we've truncated this block */
+ htx_cut_data_blk(htx, blk, fsize);
+ }
+
+ if (es_now)
+ outbuf.area[4] |= H2_F_DATA_END_STREAM;
+
+ /* commit the H2 response */
+ b_add(mbuf, fsize + 9);
+
+ out:
+ if (es_now) {
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HLOC;
+ else
+ h2s_close(h2s);
+
+ h2s->flags |= H2_SF_ES_SENT;
+ TRACE_PROTO("ES flag set on outgoing frame", H2_EV_TX_FRAME|H2_EV_TX_DATA|H2_EV_TX_EOI, h2c->conn, h2s);
+ }
+ else if (fsize) {
+ if (fsize == bsize) {
+ TRACE_DEVEL("more data may be available, trying to send another frame", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto new_frame;
+ }
+ else if (trunc_out) {
+ /* we've truncated this block */
+ goto new_frame;
+ }
+ }
+
+ end:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ return total;
+}
+
+/* Skip the message payload (DATA blocks) and emit an empty DATA frame with the
+ * ES flag set for stream <h2s>. This function is called for response known to
+ * have no payload. Only DATA blocks are skipped. This means the trailers are
+ * still emitted. The caller must check the stream's status to detect any error
+ * which might have happened subsequently to a successful send. Returns the
+ * number of data bytes consumed, or zero if nothing done.
+ */
+static size_t h2s_skip_data(struct h2s *h2s, struct buffer *buf, size_t count)
+{
+ struct h2c *h2c = h2s->h2c;
+ struct htx *htx;
+ int bsize; /* htx block size */
+ int fsize; /* h2 frame size */
+ struct htx_blk *blk;
+ enum htx_blk_type type;
+ size_t total = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+
+ if (h2c_mux_busy(h2c, h2s)) {
+ TRACE_STATE("mux output busy", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ h2s->flags |= H2_SF_BLK_MBUSY;
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto end;
+ }
+
+ htx = htx_from_buf(buf);
+
+ next_data:
+ if (!count || htx_is_empty(htx))
+ goto end;
+ blk = htx_get_head_blk(htx);
+ type = htx_get_blk_type(blk);
+ bsize = htx_get_blksz(blk);
+ fsize = bsize;
+ if (type != HTX_BLK_DATA)
+ goto end;
+
+ if (fsize > count)
+ fsize = count;
+
+ if (fsize != bsize)
+ goto skip_data;
+
+ if (!(htx->flags & HTX_FL_EOM) || !htx_is_unique_blk(htx, blk))
+ goto skip_data;
+
+ /* Here, it is the last block and it is also the end of the message. So
+ * we can emit an empty DATA frame with the ES flag set
+ */
+ if (h2_send_empty_data_es(h2s) <= 0)
+ goto end;
+
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HLOC;
+ else
+ h2s_close(h2s);
+
+ skip_data:
+ /* consume incoming HTX block */
+ total += fsize;
+ if (fsize == bsize) {
+ TRACE_DEVEL("more data may be available, trying to skip another frame", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ htx_remove_blk(htx, blk);
+ goto next_data;
+ }
+ else {
+ /* we've truncated this block */
+ htx_cut_data_blk(htx, blk, fsize);
+ }
+
+ end:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ return total;
+}
+
+/* Try to send a HEADERS frame matching HTX_BLK_TLR series of blocks present in
+ * HTX message <htx> for the H2 stream <h2s>. Returns the number of bytes
+ * processed. The caller must check the stream's status to detect any error
+ * which might have happened subsequently to a successful send. The htx blocks
+ * are automatically removed from the message. The htx message is assumed to be
+ * valid since produced from the internal code. Processing stops when meeting
+ * the EOT, which *is* removed. All trailers are processed at once and sent as a
+ * single frame. The ES flag is always set.
+ */
+static size_t h2s_make_trailers(struct h2s *h2s, struct htx *htx)
+{
+ struct http_hdr list[global.tune.max_http_hdr];
+ struct h2c *h2c = h2s->h2c;
+ struct htx_blk *blk;
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ enum htx_blk_type type;
+ int ret = 0;
+ int hdr;
+ int idx;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+
+ if (h2c_mux_busy(h2c, h2s)) {
+ TRACE_STATE("mux output busy", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+ h2s->flags |= H2_SF_BLK_MBUSY;
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+ goto end;
+ }
+
+ /* get trailers. */
+ hdr = 0;
+ for (blk = htx_get_head_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_UNUSED)
+ continue;
+
+ if (type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_TLR) {
+ if (unlikely(hdr >= sizeof(list)/sizeof(list[0]) - 1)) {
+ TRACE_ERROR("too many headers", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+
+ list[hdr].n = htx_get_blk_name(htx, blk);
+ list[hdr].v = htx_get_blk_value(htx, blk);
+ hdr++;
+ }
+ else {
+ TRACE_ERROR("will not encode unexpected htx block", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+ }
+
+ /* marker for end of trailers */
+ list[hdr].n = ist("");
+
+ mbuf = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, mbuf)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ TRACE_STATE("waiting for room in output buffer", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto end;
+ }
+
+ chunk_reset(&outbuf);
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= 9 || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < 9)
+ goto full;
+
+ /* len: 0x000000 (fill later), type: 1(HEADERS), flags: ENDH=4,ES=1 */
+ memcpy(outbuf.area, "\x00\x00\x00\x01\x05", 5);
+ write_n32(outbuf.area + 5, h2s->id); // 4 bytes
+ outbuf.data = 9;
+
+ /* encode all headers */
+ for (idx = 0; idx < hdr; idx++) {
+ /* these ones do not exist in H2 or must not appear in
+ * trailers and must be dropped.
+ */
+ if (isteq(list[idx].n, ist("host")) ||
+ isteq(list[idx].n, ist("content-length")) ||
+ isteq(list[idx].n, ist("connection")) ||
+ isteq(list[idx].n, ist("proxy-connection")) ||
+ isteq(list[idx].n, ist("keep-alive")) ||
+ isteq(list[idx].n, ist("upgrade")) ||
+ isteq(list[idx].n, ist("te")) ||
+ isteq(list[idx].n, ist("transfer-encoding")))
+ continue;
+
+ /* Skip all pseudo-headers */
+ if (*(list[idx].n.ptr) == ':')
+ continue;
+
+ if (!h2_encode_header(&outbuf, list[idx].n, list[idx].v, H2_EV_TX_FRAME|H2_EV_TX_HDR,
+ ist(TRC_LOC), __FUNCTION__, h2c, h2s)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+ }
+
+ if (outbuf.data == 9) {
+ /* here we have a problem, we have nothing to emit (either we
+ * received an empty trailers block followed or we removed its
+ * contents above). Because of this we can't send a HEADERS
+ * frame, so we have to cheat and instead send an empty DATA
+ * frame conveying the ES flag.
+ */
+ outbuf.area[3] = H2_FT_DATA;
+ outbuf.area[4] = H2_F_DATA_END_STREAM;
+ }
+
+ /* update the frame's size */
+ h2_set_frame_size(outbuf.area, outbuf.data - 9);
+
+ if (outbuf.data > h2c->mfs + 9) {
+ if (!h2_fragment_headers(&outbuf, h2c->mfs)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+ }
+
+ /* commit the H2 response */
+ TRACE_PROTO("sent H2 trailers HEADERS frame", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_TX_EOI, h2c->conn, h2s);
+ b_add(mbuf, outbuf.data);
+ h2s->flags |= H2_SF_ES_SENT;
+
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HLOC;
+ else
+ h2s_close(h2s);
+
+ /* OK we could properly deliver the response */
+ done:
+ /* remove all header blocks till the end and compute the corresponding size. */
+ ret = 0;
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ type = htx_get_blk_type(blk);
+ ret += htx_get_blksz(blk);
+ blk = htx_remove_blk(htx, blk);
+ /* The removed block is the EOT */
+ if (type == HTX_BLK_EOT)
+ break;
+ }
+
+ end:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+ return ret;
+ full:
+ if ((mbuf = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ ret = 0;
+ TRACE_STATE("mux buffer full", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto end;
+ fail:
+ /* unparsable HTX messages, too large ones to be produced in the local
+ * list etc go here (unrecoverable errors).
+ */
+ h2s_error(h2s, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ goto end;
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+static int h2_subscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct h2s *h2s = __sc_mux_strm(sc);
+ struct h2c *h2c = h2s->h2c;
+
+ TRACE_ENTER(H2_EV_STRM_SEND|H2_EV_STRM_RECV, h2c->conn, h2s);
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(h2s->subs && h2s->subs != es);
+
+ es->events |= event_type;
+ h2s->subs = es;
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("subscribe(recv)", H2_EV_STRM_RECV, h2c->conn, h2s);
+
+ if (event_type & SUB_RETRY_SEND) {
+ TRACE_DEVEL("subscribe(send)", H2_EV_STRM_SEND, h2c->conn, h2s);
+ if (!(h2s->flags & H2_SF_BLK_SFCTL) &&
+ !LIST_INLIST(&h2s->list)) {
+ if (h2s->flags & H2_SF_BLK_MFCTL)
+ LIST_APPEND(&h2c->fctl_list, &h2s->list);
+ else
+ LIST_APPEND(&h2c->send_list, &h2s->list);
+ }
+ }
+ TRACE_LEAVE(H2_EV_STRM_SEND|H2_EV_STRM_RECV, h2c->conn, h2s);
+ return 0;
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int h2_unsubscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct h2s *h2s = __sc_mux_strm(sc);
+
+ TRACE_ENTER(H2_EV_STRM_SEND|H2_EV_STRM_RECV, h2s->h2c->conn, h2s);
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(h2s->subs && h2s->subs != es);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ h2s->subs = NULL;
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("unsubscribe(recv)", H2_EV_STRM_RECV, h2s->h2c->conn, h2s);
+
+ if (event_type & SUB_RETRY_SEND) {
+ TRACE_DEVEL("unsubscribe(send)", H2_EV_STRM_SEND, h2s->h2c->conn, h2s);
+ h2s->flags &= ~H2_SF_NOTIFIED;
+ if (!(h2s->flags & (H2_SF_WANT_SHUTR | H2_SF_WANT_SHUTW)))
+ LIST_DEL_INIT(&h2s->list);
+ }
+
+ TRACE_LEAVE(H2_EV_STRM_SEND|H2_EV_STRM_RECV, h2s->h2c->conn, h2s);
+ return 0;
+}
+
+
+/* Called from the upper layer, to receive data
+ *
+ * The caller is responsible for defragmenting <buf> if necessary. But <flags>
+ * must be tested to know the calling context. If CO_RFL_BUF_FLUSH is set, it
+ * means the caller wants to flush input data (from the mux buffer and the
+ * channel buffer) to be able to use kernel splicing or any kind of mux-to-mux
+ * xfer. If CO_RFL_KEEP_RECV is set, the mux must always subscribe for read
+ * events before giving back. CO_RFL_BUF_WET is set if <buf> is congested with
+ * data scheduled for leaving soon. CO_RFL_BUF_NOT_STUCK is set to instruct the
+ * mux it may optimize the data copy to <buf> if necessary. Otherwise, it should
+ * copy as much data as possible.
+ */
+static size_t h2_rcv_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct h2s *h2s = __sc_mux_strm(sc);
+ struct h2c *h2c = h2s->h2c;
+ struct htx *h2s_htx = NULL;
+ struct htx *buf_htx = NULL;
+ size_t ret = 0;
+
+ TRACE_ENTER(H2_EV_STRM_RECV, h2c->conn, h2s);
+
+ /* transfer possibly pending data to the upper layer */
+ h2s_htx = htx_from_buf(&h2s->rxbuf);
+ if (htx_is_empty(h2s_htx) && !(h2s_htx->flags & HTX_FL_PARSING_ERROR)) {
+ /* Here htx_to_buf() will set buffer data to 0 because
+ * the HTX is empty.
+ */
+ htx_to_buf(h2s_htx, &h2s->rxbuf);
+ goto end;
+ }
+
+ ret = h2s_htx->data;
+ buf_htx = htx_from_buf(buf);
+
+ /* <buf> is empty and the message is small enough, swap the
+ * buffers. */
+ if (htx_is_empty(buf_htx) && htx_used_space(h2s_htx) <= count) {
+ htx_to_buf(buf_htx, buf);
+ htx_to_buf(h2s_htx, &h2s->rxbuf);
+ b_xfer(buf, &h2s->rxbuf, b_data(&h2s->rxbuf));
+ goto end;
+ }
+
+ htx_xfer_blks(buf_htx, h2s_htx, count, HTX_BLK_UNUSED);
+
+ if (h2s_htx->flags & HTX_FL_PARSING_ERROR) {
+ buf_htx->flags |= HTX_FL_PARSING_ERROR;
+ if (htx_is_empty(buf_htx))
+ se_fl_set(h2s->sd, SE_FL_EOI);
+ }
+ else if (htx_is_empty(h2s_htx))
+ buf_htx->flags |= (h2s_htx->flags & HTX_FL_EOM);
+
+ buf_htx->extra = (h2s_htx->extra ? (h2s_htx->data + h2s_htx->extra) : 0);
+ htx_to_buf(buf_htx, buf);
+ htx_to_buf(h2s_htx, &h2s->rxbuf);
+ ret -= h2s_htx->data;
+
+ end:
+ if (b_data(&h2s->rxbuf))
+ se_fl_set(h2s->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ else {
+ se_fl_clr(h2s->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ if (h2s->flags & H2_SF_ES_RCVD) {
+ se_fl_set(h2s->sd, SE_FL_EOI);
+ /* Add EOS flag for tunnel */
+ if (h2s->flags & H2_SF_BODY_TUNNEL)
+ se_fl_set(h2s->sd, SE_FL_EOS);
+ }
+ if (h2c_read0_pending(h2c) || h2s->st == H2_SS_CLOSED)
+ se_fl_set(h2s->sd, SE_FL_EOS);
+ if (se_fl_test(h2s->sd, SE_FL_ERR_PENDING))
+ se_fl_set(h2s->sd, SE_FL_ERROR);
+ if (b_size(&h2s->rxbuf)) {
+ b_free(&h2s->rxbuf);
+ offer_buffers(NULL, 1);
+ }
+ }
+
+ if (ret && h2c->dsi == h2s->id) {
+ /* demux is blocking on this stream's buffer */
+ h2c->flags &= ~H2_CF_DEM_SFULL;
+ h2c_restart_reading(h2c, 1);
+ }
+
+ TRACE_LEAVE(H2_EV_STRM_RECV, h2c->conn, h2s);
+ return ret;
+}
+
+
+/* Called from the upper layer, to send data from buffer <buf> for no more than
+ * <count> bytes. Returns the number of bytes effectively sent. Some status
+ * flags may be updated on the stream connector.
+ */
+static size_t h2_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct h2s *h2s = __sc_mux_strm(sc);
+ size_t total = 0;
+ size_t ret;
+ struct htx *htx;
+ struct htx_blk *blk;
+ enum htx_blk_type btype;
+ uint32_t bsize;
+ int32_t idx;
+
+ TRACE_ENTER(H2_EV_H2S_SEND|H2_EV_STRM_SEND, h2s->h2c->conn, h2s);
+
+ /* If we were not just woken because we wanted to send but couldn't,
+ * and there's somebody else that is waiting to send, do nothing,
+ * we will subscribe later and be put at the end of the list
+ */
+ if (!(h2s->flags & H2_SF_NOTIFIED) &&
+ (!LIST_ISEMPTY(&h2s->h2c->send_list) || !LIST_ISEMPTY(&h2s->h2c->fctl_list))) {
+ TRACE_DEVEL("other streams already waiting, going to the queue and leaving", H2_EV_H2S_SEND|H2_EV_H2S_BLK, h2s->h2c->conn, h2s);
+ return 0;
+ }
+ h2s->flags &= ~H2_SF_NOTIFIED;
+
+ if (h2s->h2c->st0 < H2_CS_FRAME_H) {
+ TRACE_DEVEL("connection not ready, leaving", H2_EV_H2S_SEND|H2_EV_H2S_BLK, h2s->h2c->conn, h2s);
+ return 0;
+ }
+
+ if (h2s->h2c->st0 >= H2_CS_ERROR) {
+ se_fl_set(h2s->sd, SE_FL_ERROR);
+ TRACE_DEVEL("connection is in error, leaving in error", H2_EV_H2S_SEND|H2_EV_H2S_BLK|H2_EV_H2S_ERR|H2_EV_STRM_ERR, h2s->h2c->conn, h2s);
+ return 0;
+ }
+
+ htx = htx_from_buf(buf);
+
+ if (!(h2s->flags & H2_SF_OUTGOING_DATA) && count)
+ h2s->flags |= H2_SF_OUTGOING_DATA;
+
+ if (h2s->id == 0) {
+ int32_t id = h2c_get_next_sid(h2s->h2c);
+
+ if (id < 0) {
+ se_fl_set(h2s->sd, SE_FL_ERROR);
+ TRACE_DEVEL("couldn't get a stream ID, leaving in error", H2_EV_H2S_SEND|H2_EV_H2S_BLK|H2_EV_H2S_ERR|H2_EV_STRM_ERR, h2s->h2c->conn, h2s);
+ return 0;
+ }
+
+ eb32_delete(&h2s->by_id);
+ h2s->by_id.key = h2s->id = id;
+ h2s->h2c->max_id = id;
+ h2s->h2c->nb_reserved--;
+ eb32_insert(&h2s->h2c->streams_by_id, &h2s->by_id);
+ }
+
+ while (h2s->st < H2_SS_HLOC && !(h2s->flags & H2_SF_BLK_ANY) &&
+ count && !htx_is_empty(htx)) {
+ idx = htx_get_head(htx);
+ blk = htx_get_blk(htx, idx);
+ btype = htx_get_blk_type(blk);
+ bsize = htx_get_blksz(blk);
+
+ switch (btype) {
+ case HTX_BLK_REQ_SL:
+ /* start-line before headers */
+ ret = h2s_bck_make_req_headers(h2s, htx);
+ if (ret > 0) {
+ total += ret;
+ count -= ret;
+ if (ret < bsize)
+ goto done;
+ }
+ break;
+
+ case HTX_BLK_RES_SL:
+ /* start-line before headers */
+ ret = h2s_frt_make_resp_headers(h2s, htx);
+ if (ret > 0) {
+ total += ret;
+ count -= ret;
+ if (ret < bsize)
+ goto done;
+ }
+ break;
+
+ case HTX_BLK_DATA:
+ /* all these cause the emission of a DATA frame (possibly empty) */
+ if (!(h2s->h2c->flags & H2_CF_IS_BACK) &&
+ (h2s->flags & (H2_SF_BODY_TUNNEL|H2_SF_BODYLESS_RESP)) == H2_SF_BODYLESS_RESP)
+ ret = h2s_skip_data(h2s, buf, count);
+ else
+ ret = h2s_make_data(h2s, buf, count);
+ if (ret > 0) {
+ htx = htx_from_buf(buf);
+ total += ret;
+ count -= ret;
+ if (ret < bsize)
+ goto done;
+ }
+ break;
+
+ case HTX_BLK_TLR:
+ case HTX_BLK_EOT:
+ /* This is the first trailers block, all the subsequent ones */
+ ret = h2s_make_trailers(h2s, htx);
+ if (ret > 0) {
+ total += ret;
+ count -= ret;
+ if (ret < bsize)
+ goto done;
+ }
+ break;
+
+ default:
+ htx_remove_blk(htx, blk);
+ total += bsize;
+ count -= bsize;
+ break;
+ }
+ }
+
+ done:
+ if (h2s->st >= H2_SS_HLOC) {
+ /* trim any possibly pending data after we close (extra CR-LF,
+ * unprocessed trailers, abnormal extra data, ...)
+ */
+ total += count;
+ count = 0;
+ }
+
+ /* RST are sent similarly to frame acks */
+ if (h2s->st == H2_SS_ERROR || h2s->flags & H2_SF_RST_RCVD) {
+ TRACE_DEVEL("reporting RST/error to the app-layer stream", H2_EV_H2S_SEND|H2_EV_H2S_ERR|H2_EV_STRM_ERR, h2s->h2c->conn, h2s);
+ se_fl_set_error(h2s->sd);
+ if (h2s_send_rst_stream(h2s->h2c, h2s) > 0)
+ h2s_close(h2s);
+ }
+
+ htx_to_buf(htx, buf);
+
+ if (total > 0) {
+ if (!(h2s->h2c->wait_event.events & SUB_RETRY_SEND)) {
+ TRACE_DEVEL("data queued, waking up h2c sender", H2_EV_H2S_SEND|H2_EV_H2C_SEND, h2s->h2c->conn, h2s);
+ tasklet_wakeup(h2s->h2c->wait_event.tasklet);
+ }
+
+ }
+ /* If we're waiting for flow control, and we got a shutr on the
+ * connection, we will never be unlocked, so add an error on
+ * the stream connector.
+ */
+ if (conn_xprt_read0_pending(h2s->h2c->conn) &&
+ !b_data(&h2s->h2c->dbuf) &&
+ (h2s->flags & (H2_SF_BLK_SFCTL | H2_SF_BLK_MFCTL))) {
+ TRACE_DEVEL("fctl with shutr, reporting error to app-layer", H2_EV_H2S_SEND|H2_EV_STRM_SEND|H2_EV_STRM_ERR, h2s->h2c->conn, h2s);
+ if (se_fl_test(h2s->sd, SE_FL_EOS))
+ se_fl_set(h2s->sd, SE_FL_ERROR);
+ else
+ se_fl_set(h2s->sd, SE_FL_ERR_PENDING);
+ }
+
+ if (total > 0 && !(h2s->flags & H2_SF_BLK_SFCTL) &&
+ !(h2s->flags & (H2_SF_WANT_SHUTR|H2_SF_WANT_SHUTW))) {
+ /* Ok we managed to send something, leave the send_list if we were still there */
+ LIST_DEL_INIT(&h2s->list);
+ }
+
+ TRACE_LEAVE(H2_EV_H2S_SEND|H2_EV_STRM_SEND, h2s->h2c->conn, h2s);
+ return total;
+}
+
+/* for debugging with CLI's "show fd" command */
+static int h2_show_fd(struct buffer *msg, struct connection *conn)
+{
+ struct h2c *h2c = conn->ctx;
+ struct h2s *h2s = NULL;
+ struct eb32_node *node;
+ int fctl_cnt = 0;
+ int send_cnt = 0;
+ int tree_cnt = 0;
+ int orph_cnt = 0;
+ struct buffer *hmbuf, *tmbuf;
+ int ret = 0;
+
+ if (!h2c)
+ return ret;
+
+ list_for_each_entry(h2s, &h2c->fctl_list, list)
+ fctl_cnt++;
+
+ list_for_each_entry(h2s, &h2c->send_list, list)
+ send_cnt++;
+
+ h2s = NULL;
+ node = eb32_first(&h2c->streams_by_id);
+ while (node) {
+ h2s = container_of(node, struct h2s, by_id);
+ tree_cnt++;
+ if (!h2s_sc(h2s))
+ orph_cnt++;
+ node = eb32_next(node);
+ }
+
+ hmbuf = br_head(h2c->mbuf);
+ tmbuf = br_tail(h2c->mbuf);
+ chunk_appendf(msg, " h2c.st0=%s .err=%d .maxid=%d .lastid=%d .flg=0x%04x"
+ " .nbst=%u .nbcs=%u .fctl_cnt=%d .send_cnt=%d .tree_cnt=%d"
+ " .orph_cnt=%d .sub=%d .dsi=%d .dbuf=%u@%p+%u/%u .msi=%d"
+ " .mbuf=[%u..%u|%u],h=[%u@%p+%u/%u],t=[%u@%p+%u/%u]",
+ h2c_st_to_str(h2c->st0), h2c->errcode, h2c->max_id, h2c->last_sid, h2c->flags,
+ h2c->nb_streams, h2c->nb_sc, fctl_cnt, send_cnt, tree_cnt, orph_cnt,
+ h2c->wait_event.events, h2c->dsi,
+ (unsigned int)b_data(&h2c->dbuf), b_orig(&h2c->dbuf),
+ (unsigned int)b_head_ofs(&h2c->dbuf), (unsigned int)b_size(&h2c->dbuf),
+ h2c->msi,
+ br_head_idx(h2c->mbuf), br_tail_idx(h2c->mbuf), br_size(h2c->mbuf),
+ (unsigned int)b_data(hmbuf), b_orig(hmbuf),
+ (unsigned int)b_head_ofs(hmbuf), (unsigned int)b_size(hmbuf),
+ (unsigned int)b_data(tmbuf), b_orig(tmbuf),
+ (unsigned int)b_head_ofs(tmbuf), (unsigned int)b_size(tmbuf));
+
+ chunk_appendf(msg, " .task=%p", h2c->task);
+ if (h2c->task) {
+ chunk_appendf(msg, " .exp=%s",
+ h2c->task->expire ? tick_is_expired(h2c->task->expire, now_ms) ? "<PAST>" :
+ human_time(TICKS_TO_MS(h2c->task->expire - now_ms), TICKS_TO_MS(1000)) : "<NEVER>");
+ }
+
+ if (h2s) {
+ chunk_appendf(msg, " last_h2s=%p .id=%d .st=%s .flg=0x%04x .rxbuf=%u@%p+%u/%u .sc=%p",
+ h2s, h2s->id, h2s_st_to_str(h2s->st), h2s->flags,
+ (unsigned int)b_data(&h2s->rxbuf), b_orig(&h2s->rxbuf),
+ (unsigned int)b_head_ofs(&h2s->rxbuf), (unsigned int)b_size(&h2s->rxbuf),
+ h2s_sc(h2s));
+ if (h2s_sc(h2s))
+ chunk_appendf(msg, "(.flg=0x%08x .app=%p)",
+ h2s_sc(h2s)->flags, h2s_sc(h2s)->app);
+
+ chunk_appendf(msg, "sd=%p", h2s->sd);
+ chunk_appendf(msg, "(.flg=0x%08x)", se_fl_get(h2s->sd));
+
+ chunk_appendf(msg, " .subs=%p", h2s->subs);
+ if (h2s->subs) {
+ chunk_appendf(msg, "(ev=%d tl=%p", h2s->subs->events, h2s->subs->tasklet);
+ chunk_appendf(msg, " tl.calls=%d tl.ctx=%p tl.fct=",
+ h2s->subs->tasklet->calls,
+ h2s->subs->tasklet->context);
+ if (h2s->subs->tasklet->calls >= 1000000)
+ ret = 1;
+ resolve_sym_name(msg, NULL, h2s->subs->tasklet->process);
+ chunk_appendf(msg, ")");
+ }
+ }
+ return ret;
+}
+
+/* Migrate the the connection to the current thread.
+ * Return 0 if successful, non-zero otherwise.
+ * Expected to be called with the old thread lock held.
+ */
+static int h2_takeover(struct connection *conn, int orig_tid)
+{
+ struct h2c *h2c = conn->ctx;
+ struct task *task;
+
+ if (fd_takeover(conn->handle.fd, conn) != 0)
+ return -1;
+
+ if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid) != 0) {
+ /* We failed to takeover the xprt, even if the connection may
+ * still be valid, flag it as error'd, as we have already
+ * taken over the fd, and wake the tasklet, so that it will
+ * destroy it.
+ */
+ conn->flags |= CO_FL_ERROR;
+ tasklet_wakeup_on(h2c->wait_event.tasklet, orig_tid);
+ return -1;
+ }
+
+ if (h2c->wait_event.events)
+ h2c->conn->xprt->unsubscribe(h2c->conn, h2c->conn->xprt_ctx,
+ h2c->wait_event.events, &h2c->wait_event);
+ /* To let the tasklet know it should free itself, and do nothing else,
+ * set its context to NULL.
+ */
+ h2c->wait_event.tasklet->context = NULL;
+ tasklet_wakeup_on(h2c->wait_event.tasklet, orig_tid);
+
+ task = h2c->task;
+ if (task) {
+ task->context = NULL;
+ h2c->task = NULL;
+ __ha_barrier_store();
+ task_kill(task);
+
+ h2c->task = task_new_here();
+ if (!h2c->task) {
+ h2_release(h2c);
+ return -1;
+ }
+ h2c->task->process = h2_timeout_task;
+ h2c->task->context = h2c;
+ }
+ h2c->wait_event.tasklet = tasklet_new();
+ if (!h2c->wait_event.tasklet) {
+ h2_release(h2c);
+ return -1;
+ }
+ h2c->wait_event.tasklet->process = h2_io_cb;
+ h2c->wait_event.tasklet->context = h2c;
+ h2c->conn->xprt->subscribe(h2c->conn, h2c->conn->xprt_ctx,
+ SUB_RETRY_RECV, &h2c->wait_event);
+
+ return 0;
+}
+
+/*******************************************************/
+/* functions below are dedicated to the config parsers */
+/*******************************************************/
+
+/* config parser for global "tune.h2.header-table-size" */
+static int h2_parse_header_table_size(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ h2_settings_header_table_size = atoi(args[1]);
+ if (h2_settings_header_table_size < 4096 || h2_settings_header_table_size > 65536) {
+ memprintf(err, "'%s' expects a numeric value between 4096 and 65536.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config parser for global "tune.h2.initial-window-size" */
+static int h2_parse_initial_window_size(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ h2_settings_initial_window_size = atoi(args[1]);
+ if (h2_settings_initial_window_size < 0) {
+ memprintf(err, "'%s' expects a positive numeric value.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config parser for global "tune.h2.max-concurrent-streams" */
+static int h2_parse_max_concurrent_streams(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ h2_settings_max_concurrent_streams = atoi(args[1]);
+ if ((int)h2_settings_max_concurrent_streams < 0) {
+ memprintf(err, "'%s' expects a positive numeric value.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config parser for global "tune.h2.max-frame-size" */
+static int h2_parse_max_frame_size(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ h2_settings_max_frame_size = atoi(args[1]);
+ if (h2_settings_max_frame_size < 16384 || h2_settings_max_frame_size > 16777215) {
+ memprintf(err, "'%s' expects a numeric value between 16384 and 16777215.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+
+/****************************************/
+/* MUX initialization and instantiation */
+/***************************************/
+
+/* The mux operations */
+static const struct mux_ops h2_ops = {
+ .init = h2_init,
+ .wake = h2_wake,
+ .snd_buf = h2_snd_buf,
+ .rcv_buf = h2_rcv_buf,
+ .subscribe = h2_subscribe,
+ .unsubscribe = h2_unsubscribe,
+ .attach = h2_attach,
+ .get_first_sc = h2_get_first_sc,
+ .detach = h2_detach,
+ .destroy = h2_destroy,
+ .avail_streams = h2_avail_streams,
+ .used_streams = h2_used_streams,
+ .shutr = h2_shutr,
+ .shutw = h2_shutw,
+ .ctl = h2_ctl,
+ .show_fd = h2_show_fd,
+ .takeover = h2_takeover,
+ .flags = MX_FL_HTX|MX_FL_HOL_RISK|MX_FL_NO_UPG,
+ .name = "H2",
+};
+
+static struct mux_proto_list mux_proto_h2 =
+ { .token = IST("h2"), .mode = PROTO_MODE_HTTP, .side = PROTO_SIDE_BOTH, .mux = &h2_ops };
+
+INITCALL1(STG_REGISTER, register_mux_proto, &mux_proto_h2);
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.h2.header-table-size", h2_parse_header_table_size },
+ { CFG_GLOBAL, "tune.h2.initial-window-size", h2_parse_initial_window_size },
+ { CFG_GLOBAL, "tune.h2.max-concurrent-streams", h2_parse_max_concurrent_streams },
+ { CFG_GLOBAL, "tune.h2.max-frame-size", h2_parse_max_frame_size },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/* initialize internal structs after the config is parsed.
+ * Returns zero on success, non-zero on error.
+ */
+static int init_h2()
+{
+ pool_head_hpack_tbl = create_pool("hpack_tbl",
+ h2_settings_header_table_size,
+ MEM_F_SHARED|MEM_F_EXACT);
+ if (!pool_head_hpack_tbl) {
+ ha_alert("failed to allocate hpack_tbl memory pool\n");
+ return (ERR_ALERT | ERR_FATAL);
+ }
+ return ERR_NONE;
+}
+
+REGISTER_POST_CHECK(init_h2);
diff --git a/src/mux_pt.c b/src/mux_pt.c
new file mode 100644
index 0000000..baa67ce
--- /dev/null
+++ b/src/mux_pt.c
@@ -0,0 +1,709 @@
+/*
+ * Pass-through mux-demux for connections
+ *
+ * Copyright 2017 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/connection.h>
+#include <haproxy/pipe-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/trace.h>
+
+struct mux_pt_ctx {
+ struct sedesc *sd;
+ struct connection *conn;
+ struct wait_event wait_event;
+};
+
+DECLARE_STATIC_POOL(pool_head_pt_ctx, "mux_pt", sizeof(struct mux_pt_ctx));
+
+/* trace source and events */
+static void pt_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+/* The event representation is split like this :
+ * pt_ctx - internal PT context
+ * strm - application layer
+ */
+static const struct trace_event pt_trace_events[] = {
+#define PT_EV_CONN_NEW (1ULL << 0)
+ { .mask = PT_EV_CONN_NEW, .name = "pt_conn_new", .desc = "new PT connection" },
+#define PT_EV_CONN_WAKE (1ULL << 1)
+ { .mask = PT_EV_CONN_WAKE, .name = "pt_conn_wake", .desc = "PT connection woken up" },
+#define PT_EV_CONN_END (1ULL << 2)
+ { .mask = PT_EV_CONN_END, .name = "pt_conn_end", .desc = "PT connection terminated" },
+#define PT_EV_CONN_ERR (1ULL << 3)
+ { .mask = PT_EV_CONN_ERR, .name = "pt_conn_err", .desc = "error on PT connection" },
+#define PT_EV_STRM_NEW (1ULL << 4)
+ { .mask = PT_EV_STRM_NEW, .name = "strm_new", .desc = "app-layer stream creation" },
+#define PT_EV_STRM_SHUT (1ULL << 5)
+ { .mask = PT_EV_STRM_SHUT, .name = "strm_shut", .desc = "stream shutdown" },
+#define PT_EV_STRM_END (1ULL << 6)
+ { .mask = PT_EV_STRM_END, .name = "strm_end", .desc = "detaching app-layer stream" },
+#define PT_EV_STRM_ERR (1ULL << 7)
+ { .mask = PT_EV_STRM_ERR, .name = "strm_err", .desc = "stream error" },
+#define PT_EV_RX_DATA (1ULL << 8)
+ { .mask = PT_EV_RX_DATA, .name = "pt_rx_data", .desc = "Rx on PT connection" },
+#define PT_EV_TX_DATA (1ULL << 9)
+ { .mask = PT_EV_TX_DATA, .name = "pt_tx_data", .desc = "Tx on PT connection" },
+
+ {}
+};
+
+
+static const struct name_desc pt_trace_decoding[] = {
+#define PT_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define PT_VERB_MINIMAL 2
+ { .name="minimal", .desc="report only h1c/h1s state and flags, no real decoding" },
+#define PT_VERB_SIMPLE 3
+ { .name="simple", .desc="add request/response status line or htx info when available" },
+#define PT_VERB_ADVANCED 4
+ { .name="advanced", .desc="add header fields or frame decoding when available" },
+#define PT_VERB_COMPLETE 5
+ { .name="complete", .desc="add full data dump when available" },
+ { /* end */ }
+};
+
+static struct trace_source trace_pt __read_mostly = {
+ .name = IST("pt"),
+ .desc = "Passthrough multiplexer",
+ .arg_def = TRC_ARG1_CONN, // TRACE()'s first argument is always a connection
+ .default_cb = pt_trace,
+ .known_events = pt_trace_events,
+ .lockon_args = NULL,
+ .decoding = pt_trace_decoding,
+ .report_events = ~0, // report everything by default
+};
+
+#define TRACE_SOURCE &trace_pt
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+/* returns the stconn associated to the stream */
+static forceinline struct stconn *pt_sc(const struct mux_pt_ctx *pt)
+{
+ return pt->sd->sc;
+}
+
+static inline void pt_trace_buf(const struct buffer *buf, size_t ofs, size_t len)
+{
+ size_t block1, block2;
+ int line, ptr, newptr;
+
+ block1 = b_contig_data(buf, ofs);
+ block2 = 0;
+ if (block1 > len)
+ block1 = len;
+ block2 = len - block1;
+
+ ofs = b_peek_ofs(buf, ofs);
+
+ line = 0;
+ ptr = ofs;
+ while (ptr < ofs + block1) {
+ newptr = dump_text_line(&trace_buf, b_orig(buf), b_size(buf), ofs + block1, &line, ptr);
+ if (newptr == ptr)
+ break;
+ ptr = newptr;
+ }
+
+ line = ptr = 0;
+ while (ptr < block2) {
+ newptr = dump_text_line(&trace_buf, b_orig(buf), b_size(buf), block2, &line, ptr);
+ if (newptr == ptr)
+ break;
+ ptr = newptr;
+ }
+}
+
+/* the PT traces always expect that arg1, if non-null, is of type connection
+ * (from which we can derive the pt context), that arg2, if non-null, is a
+ * stream connector, and that arg3, if non-null, is a buffer.
+ */
+static void pt_trace(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct connection *conn = a1;
+ const struct mux_pt_ctx *ctx = conn ? conn->ctx : NULL;
+ const struct stconn *sc = a2;
+ const struct buffer *buf = a3;
+ const size_t *val = a4;
+
+ if (!ctx || src->verbosity < PT_VERB_CLEAN)
+ return;
+
+ /* Display frontend/backend info by default */
+ chunk_appendf(&trace_buf, " : [%c]", (conn_is_back(conn) ? 'B' : 'F'));
+
+ if (src->verbosity == PT_VERB_CLEAN)
+ return;
+
+ if (!sc)
+ sc = pt_sc(ctx);
+
+ /* Display the value to the 4th argument (level > STATE) */
+ if (src->level > TRACE_LEVEL_STATE && val)
+ chunk_appendf(&trace_buf, " - VAL=%lu", (long)*val);
+
+ /* Display conn and sc info, if defined (pointer + flags) */
+ chunk_appendf(&trace_buf, " - conn=%p(0x%08x)", conn, conn->flags);
+ chunk_appendf(&trace_buf, " sd=%p(0x%08x)", ctx->sd, se_fl_get(ctx->sd));
+ if (sc)
+ chunk_appendf(&trace_buf, " sc=%p(0x%08x)", sc, sc->flags);
+
+ if (src->verbosity == PT_VERB_MINIMAL)
+ return;
+
+ /* Display buffer info, if defined (level > USER & verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_USER && buf) {
+ int full = 0, max = 3000, chunk = 1024;
+
+ /* Full info (level > STATE && verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_STATE) {
+ if (src->verbosity == PT_VERB_COMPLETE)
+ full = 1;
+ else if (src->verbosity == PT_VERB_ADVANCED) {
+ full = 1;
+ max = 256;
+ chunk = 64;
+ }
+ }
+
+ chunk_appendf(&trace_buf, " buf=%u@%p+%u/%u",
+ (unsigned int)b_data(buf), b_orig(buf),
+ (unsigned int)b_head_ofs(buf), (unsigned int)b_size(buf));
+
+ if (b_data(buf) && full) {
+ chunk_memcat(&trace_buf, "\n", 1);
+ if (b_data(buf) < max)
+ pt_trace_buf(buf, 0, b_data(buf));
+ else {
+ pt_trace_buf(buf, 0, chunk);
+ chunk_memcat(&trace_buf, " ...\n", 6);
+ pt_trace_buf(buf, b_data(buf) - chunk, chunk);
+ }
+ }
+ }
+}
+
+static void mux_pt_destroy(struct mux_pt_ctx *ctx)
+{
+ struct connection *conn = NULL;
+
+ TRACE_POINT(PT_EV_CONN_END);
+
+ /* The connection must be attached to this mux to be released */
+ if (ctx->conn && ctx->conn->ctx == ctx)
+ conn = ctx->conn;
+
+ tasklet_free(ctx->wait_event.tasklet);
+
+ if (conn && ctx->wait_event.events != 0)
+ conn->xprt->unsubscribe(conn, conn->xprt_ctx, ctx->wait_event.events,
+ &ctx->wait_event);
+ BUG_ON(ctx->sd && !se_fl_test(ctx->sd, SE_FL_ORPHAN));
+ sedesc_free(ctx->sd);
+ pool_free(pool_head_pt_ctx, ctx);
+
+ if (conn) {
+ conn->mux = NULL;
+ conn->ctx = NULL;
+ TRACE_DEVEL("freeing conn", PT_EV_CONN_END, conn);
+
+ conn_stop_tracking(conn);
+ conn_full_close(conn);
+ if (conn->destroy_cb)
+ conn->destroy_cb(conn);
+ conn_free(conn);
+ }
+}
+
+/* Callback, used when we get I/Os while in idle mode. This one is exported so
+ * that "show fd" can resolve it.
+ */
+struct task *mux_pt_io_cb(struct task *t, void *tctx, unsigned int status)
+{
+ struct mux_pt_ctx *ctx = tctx;
+
+ TRACE_ENTER(PT_EV_CONN_WAKE, ctx->conn);
+ if (!se_fl_test(ctx->sd, SE_FL_ORPHAN)) {
+ /* There's a small race condition.
+ * mux_pt_io_cb() is only supposed to be called if we have no
+ * stream attached. However, maybe the tasklet got woken up,
+ * and this connection was then attached to a new stream.
+ * If this happened, just wake the tasklet up if anybody
+ * subscribed to receive events, and otherwise call the wake
+ * method, to make sure the event is noticed.
+ */
+ if (ctx->conn->subs) {
+ ctx->conn->subs->events = 0;
+ tasklet_wakeup(ctx->conn->subs->tasklet);
+ ctx->conn->subs = NULL;
+ } else if (pt_sc(ctx)->app_ops->wake)
+ pt_sc(ctx)->app_ops->wake(pt_sc(ctx));
+ TRACE_DEVEL("leaving waking up SC", PT_EV_CONN_WAKE, ctx->conn);
+ return t;
+ }
+ conn_ctrl_drain(ctx->conn);
+ if (ctx->conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH)) {
+ TRACE_DEVEL("leaving destroying pt context", PT_EV_CONN_WAKE, ctx->conn);
+ mux_pt_destroy(ctx);
+ t = NULL;
+ }
+ else {
+ ctx->conn->xprt->subscribe(ctx->conn, ctx->conn->xprt_ctx, SUB_RETRY_RECV,
+ &ctx->wait_event);
+ TRACE_DEVEL("leaving subscribing for reads", PT_EV_CONN_WAKE, ctx->conn);
+ }
+
+ return t;
+}
+
+/* Initialize the mux once it's attached. It is expected that conn->ctx points
+ * to the existing stream connector (for outgoing connections) or NULL (for
+ * incoming ones, in which case one will be allocated and a new stream will be
+ * instantiated). Returns < 0 on error.
+ */
+static int mux_pt_init(struct connection *conn, struct proxy *prx, struct session *sess,
+ struct buffer *input)
+{
+ struct stconn *sc = conn->ctx;
+ struct mux_pt_ctx *ctx = pool_alloc(pool_head_pt_ctx);
+
+ TRACE_ENTER(PT_EV_CONN_NEW);
+
+ if (!ctx) {
+ TRACE_ERROR("PT context allocation failure", PT_EV_CONN_NEW|PT_EV_CONN_END|PT_EV_CONN_ERR);
+ goto fail;
+ }
+
+ ctx->wait_event.tasklet = tasklet_new();
+ if (!ctx->wait_event.tasklet)
+ goto fail_free_ctx;
+ ctx->wait_event.tasklet->context = ctx;
+ ctx->wait_event.tasklet->process = mux_pt_io_cb;
+ ctx->wait_event.events = 0;
+ ctx->conn = conn;
+
+ if (!sc) {
+ ctx->sd = sedesc_new();
+ if (!ctx->sd) {
+ TRACE_ERROR("SC allocation failure", PT_EV_STRM_NEW|PT_EV_STRM_END|PT_EV_STRM_ERR, conn);
+ goto fail_free_ctx;
+ }
+ ctx->sd->se = ctx;
+ ctx->sd->conn = conn;
+ se_fl_set(ctx->sd, SE_FL_T_MUX | SE_FL_ORPHAN);
+
+ sc = sc_new_from_endp(ctx->sd, sess, input);
+ if (!sc) {
+ TRACE_ERROR("SC allocation failure", PT_EV_STRM_NEW|PT_EV_STRM_END|PT_EV_STRM_ERR, conn);
+ goto fail_free_sd;
+ }
+ TRACE_POINT(PT_EV_STRM_NEW, conn, sc);
+ }
+ else {
+ if (sc_attach_mux(sc, ctx, conn) < 0)
+ goto fail_free_ctx;
+ ctx->sd = sc->sedesc;
+ }
+ conn->ctx = ctx;
+ se_fl_set(ctx->sd, SE_FL_RCV_MORE);
+ if (global.tune.options & GTUNE_USE_SPLICE)
+ se_fl_set(ctx->sd, SE_FL_MAY_SPLICE);
+
+ TRACE_LEAVE(PT_EV_CONN_NEW, conn);
+ return 0;
+
+ fail_free_sd:
+ sedesc_free(ctx->sd);
+ fail_free_ctx:
+ if (ctx->wait_event.tasklet)
+ tasklet_free(ctx->wait_event.tasklet);
+ pool_free(pool_head_pt_ctx, ctx);
+ fail:
+ TRACE_DEVEL("leaving in error", PT_EV_CONN_NEW|PT_EV_CONN_END|PT_EV_CONN_ERR);
+ return -1;
+}
+
+/* callback to be used by default for the pass-through mux. It calls the data
+ * layer wake() callback if it is set otherwise returns 0.
+ */
+static int mux_pt_wake(struct connection *conn)
+{
+ struct mux_pt_ctx *ctx = conn->ctx;
+ int ret = 0;
+
+ TRACE_ENTER(PT_EV_CONN_WAKE, ctx->conn);
+ if (!se_fl_test(ctx->sd, SE_FL_ORPHAN)) {
+ ret = pt_sc(ctx)->app_ops->wake ? pt_sc(ctx)->app_ops->wake(pt_sc(ctx)) : 0;
+
+ if (ret < 0) {
+ TRACE_DEVEL("leaving waking up SC", PT_EV_CONN_WAKE, ctx->conn);
+ return ret;
+ }
+ } else {
+ conn_ctrl_drain(conn);
+ if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH)) {
+ TRACE_DEVEL("leaving destroying PT context", PT_EV_CONN_WAKE, ctx->conn);
+ mux_pt_destroy(ctx);
+ return -1;
+ }
+ }
+
+ /* If we had early data, and we're done with the handshake
+ * then we know the data are safe, and we can remove the flag.
+ */
+ if ((conn->flags & (CO_FL_EARLY_DATA | CO_FL_EARLY_SSL_HS | CO_FL_WAIT_XPRT)) ==
+ CO_FL_EARLY_DATA)
+ conn->flags &= ~CO_FL_EARLY_DATA;
+
+ TRACE_LEAVE(PT_EV_CONN_WAKE, ctx->conn);
+ return ret;
+}
+
+/*
+ * Attach a new stream to a connection
+ * (Used for outgoing connections)
+ */
+static int mux_pt_attach(struct connection *conn, struct sedesc *sd, struct session *sess)
+{
+ struct mux_pt_ctx *ctx = conn->ctx;
+
+ TRACE_ENTER(PT_EV_STRM_NEW, conn);
+ if (ctx->wait_event.events)
+ conn->xprt->unsubscribe(ctx->conn, conn->xprt_ctx, SUB_RETRY_RECV, &ctx->wait_event);
+ if (sc_attach_mux(sd->sc, ctx, conn) < 0)
+ return -1;
+ ctx->sd = sd;
+ se_fl_set(ctx->sd, SE_FL_RCV_MORE);
+
+ TRACE_LEAVE(PT_EV_STRM_NEW, conn, sd->sc);
+ return 0;
+}
+
+/* Retrieves a valid stream connector from this connection, or returns NULL.
+ * For this mux, it's easy as we can only store a single stream connector.
+ */
+static struct stconn *mux_pt_get_first_sc(const struct connection *conn)
+{
+ struct mux_pt_ctx *ctx = conn->ctx;
+
+ return pt_sc(ctx);
+}
+
+/* Destroy the mux and the associated connection if still attached to this mux
+ * and no longer used */
+static void mux_pt_destroy_meth(void *ctx)
+{
+ struct mux_pt_ctx *pt = ctx;
+
+ TRACE_POINT(PT_EV_CONN_END, pt->conn, pt_sc(pt));
+ if (se_fl_test(pt->sd, SE_FL_ORPHAN) || pt->conn->ctx != pt) {
+ if (pt->conn->ctx != pt) {
+ pt->sd = NULL;
+ }
+ mux_pt_destroy(pt);
+ }
+}
+
+/*
+ * Detach the stream from the connection and possibly release the connection.
+ */
+static void mux_pt_detach(struct sedesc *sd)
+{
+ struct connection *conn = sd->conn;
+ struct mux_pt_ctx *ctx;
+
+ TRACE_ENTER(PT_EV_STRM_END, conn, sd->sc);
+
+ ctx = conn->ctx;
+
+ /* Subscribe, to know if we got disconnected */
+ if (!conn_is_back(conn) && conn->owner != NULL &&
+ !(conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH))) {
+ conn->xprt->subscribe(conn, conn->xprt_ctx, SUB_RETRY_RECV, &ctx->wait_event);
+ } else {
+ /* There's no session attached to that connection, destroy it */
+ TRACE_DEVEL("killing dead connection", PT_EV_STRM_END, conn, sd->sc);
+ mux_pt_destroy(ctx);
+ }
+
+ TRACE_LEAVE(PT_EV_STRM_END);
+}
+
+/* returns the number of streams in use on a connection */
+static int mux_pt_used_streams(struct connection *conn)
+{
+ struct mux_pt_ctx *ctx = conn->ctx;
+
+ return (!se_fl_test(ctx->sd, SE_FL_ORPHAN) ? 1 : 0);
+}
+
+/* returns the number of streams still available on a connection */
+static int mux_pt_avail_streams(struct connection *conn)
+{
+ return 1 - mux_pt_used_streams(conn);
+}
+
+static void mux_pt_shutr(struct stconn *sc, enum co_shr_mode mode)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct mux_pt_ctx *ctx = conn->ctx;
+
+ TRACE_ENTER(PT_EV_STRM_SHUT, conn, sc);
+
+ if (se_fl_test(ctx->sd, SE_FL_SHR))
+ return;
+ se_fl_clr(ctx->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ if (conn_xprt_ready(conn) && conn->xprt->shutr)
+ conn->xprt->shutr(conn, conn->xprt_ctx,
+ (mode == CO_SHR_DRAIN));
+ else if (mode == CO_SHR_DRAIN)
+ conn_ctrl_drain(conn);
+ if (se_fl_test(ctx->sd, SE_FL_SHW))
+ conn_full_close(conn);
+
+ TRACE_LEAVE(PT_EV_STRM_SHUT, conn, sc);
+}
+
+static void mux_pt_shutw(struct stconn *sc, enum co_shw_mode mode)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct mux_pt_ctx *ctx = conn->ctx;
+
+ TRACE_ENTER(PT_EV_STRM_SHUT, conn, sc);
+
+ if (se_fl_test(ctx->sd, SE_FL_SHW))
+ return;
+ if (conn_xprt_ready(conn) && conn->xprt->shutw)
+ conn->xprt->shutw(conn, conn->xprt_ctx,
+ (mode == CO_SHW_NORMAL));
+ if (!se_fl_test(ctx->sd, SE_FL_SHR))
+ conn_sock_shutw(conn, (mode == CO_SHW_NORMAL));
+ else
+ conn_full_close(conn);
+
+ TRACE_LEAVE(PT_EV_STRM_SHUT, conn, sc);
+}
+
+/*
+ * Called from the upper layer, to get more data
+ *
+ * The caller is responsible for defragmenting <buf> if necessary. But <flags>
+ * must be tested to know the calling context. If CO_RFL_BUF_FLUSH is set, it
+ * means the caller wants to flush input data (from the mux buffer and the
+ * channel buffer) to be able to use kernel splicing or any kind of mux-to-mux
+ * xfer. If CO_RFL_KEEP_RECV is set, the mux must always subscribe for read
+ * events before giving back. CO_RFL_BUF_WET is set if <buf> is congested with
+ * data scheduled for leaving soon. CO_RFL_BUF_NOT_STUCK is set to instruct the
+ * mux it may optimize the data copy to <buf> if necessary. Otherwise, it should
+ * copy as much data as possible.
+ */
+static size_t mux_pt_rcv_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct mux_pt_ctx *ctx = conn->ctx;
+ size_t ret = 0;
+
+ TRACE_ENTER(PT_EV_RX_DATA, conn, sc, buf, (size_t[]){count});
+
+ if (!count) {
+ se_fl_set(ctx->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ goto end;
+ }
+ b_realign_if_empty(buf);
+ ret = conn->xprt->rcv_buf(conn, conn->xprt_ctx, buf, count, flags);
+ if (conn_xprt_read0_pending(conn)) {
+ se_fl_clr(ctx->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ se_fl_set(ctx->sd, SE_FL_EOS);
+ TRACE_DEVEL("read0 on connection", PT_EV_RX_DATA, conn, sc);
+ }
+ if (conn->flags & CO_FL_ERROR) {
+ se_fl_clr(ctx->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ se_fl_set(ctx->sd, SE_FL_ERROR);
+ TRACE_DEVEL("error on connection", PT_EV_RX_DATA|PT_EV_CONN_ERR, conn, sc);
+ }
+ end:
+ TRACE_LEAVE(PT_EV_RX_DATA, conn, sc, buf, (size_t[]){ret});
+ return ret;
+}
+
+/* Called from the upper layer, to send data */
+static size_t mux_pt_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct mux_pt_ctx *ctx = conn->ctx;
+ size_t ret;
+
+ TRACE_ENTER(PT_EV_TX_DATA, conn, sc, buf, (size_t[]){count});
+
+ ret = conn->xprt->snd_buf(conn, conn->xprt_ctx, buf, count, flags);
+
+ if (ret > 0)
+ b_del(buf, ret);
+
+ if (conn->flags & CO_FL_ERROR) {
+ se_fl_set(ctx->sd, SE_FL_ERROR);
+ TRACE_DEVEL("error on connection", PT_EV_TX_DATA|PT_EV_CONN_ERR, conn, sc);
+ }
+
+ TRACE_LEAVE(PT_EV_TX_DATA, conn, sc, buf, (size_t[]){ret});
+ return ret;
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+static int mux_pt_subscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct connection *conn = __sc_conn(sc);
+
+ TRACE_POINT(PT_EV_RX_DATA|PT_EV_TX_DATA, conn, sc, 0, (size_t[]){event_type});
+ return conn->xprt->subscribe(conn, conn->xprt_ctx, event_type, es);
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int mux_pt_unsubscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct connection *conn = __sc_conn(sc);
+
+ TRACE_POINT(PT_EV_RX_DATA|PT_EV_TX_DATA, conn, sc, 0, (size_t[]){event_type});
+ return conn->xprt->unsubscribe(conn, conn->xprt_ctx, event_type, es);
+}
+
+#if defined(USE_LINUX_SPLICE)
+/* Send and get, using splicing */
+static int mux_pt_rcv_pipe(struct stconn *sc, struct pipe *pipe, unsigned int count)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct mux_pt_ctx *ctx = conn->ctx;
+ int ret;
+
+ TRACE_ENTER(PT_EV_RX_DATA, conn, sc, 0, (size_t[]){count});
+
+ ret = conn->xprt->rcv_pipe(conn, conn->xprt_ctx, pipe, count);
+ if (conn_xprt_read0_pending(conn)) {
+ se_fl_set(ctx->sd, SE_FL_EOS);
+ TRACE_DEVEL("read0 on connection", PT_EV_RX_DATA, conn, sc);
+ }
+ if (conn->flags & CO_FL_ERROR) {
+ se_fl_set(ctx->sd, SE_FL_ERROR);
+ TRACE_DEVEL("error on connection", PT_EV_RX_DATA|PT_EV_CONN_ERR, conn, sc);
+ }
+
+ TRACE_LEAVE(PT_EV_RX_DATA, conn, sc, 0, (size_t[]){ret});
+ return (ret);
+}
+
+static int mux_pt_snd_pipe(struct stconn *sc, struct pipe *pipe)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct mux_pt_ctx *ctx = conn->ctx;
+ int ret;
+
+ TRACE_ENTER(PT_EV_TX_DATA, conn, sc, 0, (size_t[]){pipe->data});
+
+ ret = conn->xprt->snd_pipe(conn, conn->xprt_ctx, pipe);
+
+ if (conn->flags & CO_FL_ERROR) {
+ se_fl_set(ctx->sd, SE_FL_ERROR);
+ TRACE_DEVEL("error on connection", PT_EV_TX_DATA|PT_EV_CONN_ERR, conn, sc);
+ }
+
+ TRACE_LEAVE(PT_EV_TX_DATA, conn, sc, 0, (size_t[]){ret});
+ return ret;
+}
+#endif
+
+static int mux_pt_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *output)
+{
+ int ret = 0;
+ switch (mux_ctl) {
+ case MUX_STATUS:
+ if (!(conn->flags & CO_FL_WAIT_XPRT))
+ ret |= MUX_STATUS_READY;
+ return ret;
+ case MUX_EXIT_STATUS:
+ return MUX_ES_UNKNOWN;
+ default:
+ return -1;
+ }
+}
+
+/* The mux operations */
+const struct mux_ops mux_tcp_ops = {
+ .init = mux_pt_init,
+ .wake = mux_pt_wake,
+ .rcv_buf = mux_pt_rcv_buf,
+ .snd_buf = mux_pt_snd_buf,
+ .subscribe = mux_pt_subscribe,
+ .unsubscribe = mux_pt_unsubscribe,
+#if defined(USE_LINUX_SPLICE)
+ .rcv_pipe = mux_pt_rcv_pipe,
+ .snd_pipe = mux_pt_snd_pipe,
+#endif
+ .attach = mux_pt_attach,
+ .get_first_sc = mux_pt_get_first_sc,
+ .detach = mux_pt_detach,
+ .avail_streams = mux_pt_avail_streams,
+ .used_streams = mux_pt_used_streams,
+ .destroy = mux_pt_destroy_meth,
+ .ctl = mux_pt_ctl,
+ .shutr = mux_pt_shutr,
+ .shutw = mux_pt_shutw,
+ .flags = MX_FL_NONE,
+ .name = "PASS",
+};
+
+
+const struct mux_ops mux_pt_ops = {
+ .init = mux_pt_init,
+ .wake = mux_pt_wake,
+ .rcv_buf = mux_pt_rcv_buf,
+ .snd_buf = mux_pt_snd_buf,
+ .subscribe = mux_pt_subscribe,
+ .unsubscribe = mux_pt_unsubscribe,
+#if defined(USE_LINUX_SPLICE)
+ .rcv_pipe = mux_pt_rcv_pipe,
+ .snd_pipe = mux_pt_snd_pipe,
+#endif
+ .attach = mux_pt_attach,
+ .get_first_sc = mux_pt_get_first_sc,
+ .detach = mux_pt_detach,
+ .avail_streams = mux_pt_avail_streams,
+ .used_streams = mux_pt_used_streams,
+ .destroy = mux_pt_destroy_meth,
+ .ctl = mux_pt_ctl,
+ .shutr = mux_pt_shutr,
+ .shutw = mux_pt_shutw,
+ .flags = MX_FL_NONE|MX_FL_NO_UPG,
+ .name = "PASS",
+};
+
+/* PROT selection : default mux has empty name */
+static struct mux_proto_list mux_proto_none =
+ { .token = IST("none"), .mode = PROTO_MODE_TCP, .side = PROTO_SIDE_BOTH, .mux = &mux_pt_ops };
+static struct mux_proto_list mux_proto_tcp =
+ { .token = IST(""), .mode = PROTO_MODE_TCP, .side = PROTO_SIDE_BOTH, .mux = &mux_tcp_ops };
+
+INITCALL1(STG_REGISTER, register_mux_proto, &mux_proto_none);
+INITCALL1(STG_REGISTER, register_mux_proto, &mux_proto_tcp);
diff --git a/src/mux_quic.c b/src/mux_quic.c
new file mode 100644
index 0000000..a8c7e19
--- /dev/null
+++ b/src/mux_quic.c
@@ -0,0 +1,2321 @@
+#include <haproxy/mux_quic.h>
+
+#include <import/eb64tree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/list.h>
+#include <haproxy/ncbuf.h>
+#include <haproxy/pool.h>
+#include <haproxy/qmux_http.h>
+#include <haproxy/qmux_trace.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_stream.h>
+#include <haproxy/quic_tp-t.h>
+#include <haproxy/ssl_sock-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/trace.h>
+
+DECLARE_POOL(pool_head_qcc, "qcc", sizeof(struct qcc));
+DECLARE_POOL(pool_head_qcs, "qcs", sizeof(struct qcs));
+
+/* Emit a CONNECTION_CLOSE with error <err>. This will interrupt all future
+ * send/receive operations.
+ */
+static void qcc_emit_cc(struct qcc *qcc, int err)
+{
+ TRACE_ENTER(QMUX_EV_QCC_END, qcc->conn);
+
+ TRACE_STATE("set CONNECTION_CLOSE on quic-conn", QMUX_EV_QCC_WAKE, qcc->conn);
+ quic_set_connection_close(qcc->conn->handle.qc, quic_err_transport(err));
+ qcc->flags |= QC_CF_CC_EMIT;
+ tasklet_wakeup(qcc->wait_event.tasklet);
+
+ TRACE_LEAVE(QMUX_EV_QCC_END, qcc->conn);
+}
+
+static void qc_free_ncbuf(struct qcs *qcs, struct ncbuf *ncbuf)
+{
+ struct buffer buf;
+
+ if (ncb_is_null(ncbuf))
+ return;
+
+ buf = b_make(ncbuf->area, ncbuf->size, 0, 0);
+ b_free(&buf);
+ offer_buffers(NULL, 1);
+
+ *ncbuf = NCBUF_NULL;
+}
+
+/* Free <qcs> instance. This function is reserved for internal usage : it must
+ * only be called on qcs alloc error or on connection shutdown. Else
+ * qcs_destroy must be prefered to handle QUIC flow-control increase.
+ */
+static void qcs_free(struct qcs *qcs)
+{
+ struct qcc *qcc = qcs->qcc;
+
+ TRACE_ENTER(QMUX_EV_QCS_END, qcc->conn, qcs);
+
+ /* Safe to use even if already removed from the list. */
+ LIST_DEL_INIT(&qcs->el_opening);
+
+ /* Release stream endpoint descriptor. */
+ BUG_ON(qcs->sd && !se_fl_test(qcs->sd, SE_FL_ORPHAN));
+ sedesc_free(qcs->sd);
+
+ /* Release app-layer context. */
+ if (qcs->ctx && qcc->app_ops->detach)
+ qcc->app_ops->detach(qcs);
+
+ /* Release qc_stream_desc buffer from quic-conn layer. */
+ qc_stream_desc_release(qcs->stream);
+
+ /* Free Rx/Tx buffers. */
+ qc_free_ncbuf(qcs, &qcs->rx.ncbuf);
+ b_free(&qcs->tx.buf);
+
+ BUG_ON(!qcc->strms[qcs_id_type(qcs->id)].nb_streams);
+ --qcc->strms[qcs_id_type(qcs->id)].nb_streams;
+
+ /* Remove qcs from qcc tree. */
+ eb64_delete(&qcs->by_id);
+
+ pool_free(pool_head_qcs, qcs);
+
+ TRACE_LEAVE(QMUX_EV_QCS_END, qcc->conn);
+}
+
+/* Allocate a new QUIC streams with id <id> and type <type>. */
+static struct qcs *qcs_new(struct qcc *qcc, uint64_t id, enum qcs_type type)
+{
+ struct qcs *qcs;
+
+ TRACE_ENTER(QMUX_EV_QCS_NEW, qcc->conn);
+
+ qcs = pool_alloc(pool_head_qcs);
+ if (!qcs) {
+ TRACE_ERROR("alloc failure", QMUX_EV_QCS_NEW, qcc->conn);
+ return NULL;
+ }
+
+ qcs->stream = NULL;
+ qcs->qcc = qcc;
+ qcs->sd = NULL;
+ qcs->flags = QC_SF_NONE;
+ qcs->st = QC_SS_IDLE;
+ qcs->ctx = NULL;
+
+ /* App callback attach may register the stream for http-request wait.
+ * These fields must be initialed before.
+ */
+ LIST_INIT(&qcs->el_opening);
+ qcs->start = TICK_ETERNITY;
+
+ /* store transport layer stream descriptor in qcc tree */
+ qcs->id = qcs->by_id.key = id;
+ eb64_insert(&qcc->streams_by_id, &qcs->by_id);
+
+ qcc->strms[type].nb_streams++;
+
+ /* Allocate transport layer stream descriptor. Only needed for TX. */
+ if (!quic_stream_is_uni(id) || !quic_stream_is_remote(qcc, id)) {
+ struct quic_conn *qc = qcc->conn->handle.qc;
+ qcs->stream = qc_stream_desc_new(id, type, qcs, qc);
+ if (!qcs->stream) {
+ TRACE_ERROR("qc_stream_desc alloc failure", QMUX_EV_QCS_NEW, qcc->conn, qcs);
+ goto err;
+ }
+ }
+
+ if (qcc->app_ops->attach) {
+ if (qcc->app_ops->attach(qcs, qcc->ctx)) {
+ TRACE_ERROR("app proto failure", QMUX_EV_QCS_NEW, qcc->conn, qcs);
+ goto err;
+ }
+ }
+
+ /* If stream is local, use peer remote-limit, or else the opposite. */
+ if (quic_stream_is_bidi(id)) {
+ qcs->tx.msd = quic_stream_is_local(qcc, id) ? qcc->rfctl.msd_bidi_r :
+ qcc->rfctl.msd_bidi_l;
+ }
+ else if (quic_stream_is_local(qcc, id)) {
+ qcs->tx.msd = qcc->rfctl.msd_uni_l;
+ }
+
+ qcs->rx.ncbuf = NCBUF_NULL;
+ qcs->rx.app_buf = BUF_NULL;
+ qcs->rx.offset = qcs->rx.offset_max = 0;
+
+ if (quic_stream_is_bidi(id)) {
+ qcs->rx.msd = quic_stream_is_local(qcc, id) ? qcc->lfctl.msd_bidi_l :
+ qcc->lfctl.msd_bidi_r;
+ }
+ else if (quic_stream_is_remote(qcc, id)) {
+ qcs->rx.msd = qcc->lfctl.msd_uni_r;
+ }
+ qcs->rx.msd_init = qcs->rx.msd;
+
+ qcs->tx.buf = BUF_NULL;
+ qcs->tx.offset = 0;
+ qcs->tx.sent_offset = 0;
+
+ qcs->wait_event.tasklet = NULL;
+ qcs->wait_event.events = 0;
+ qcs->subs = NULL;
+
+ qcs->err = 0;
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCS_NEW, qcc->conn, qcs);
+ return qcs;
+
+ err:
+ qcs_free(qcs);
+ TRACE_LEAVE(QMUX_EV_QCS_NEW, qcc->conn);
+ return NULL;
+}
+
+static forceinline struct stconn *qcs_sc(const struct qcs *qcs)
+{
+ return qcs->sd ? qcs->sd->sc : NULL;
+}
+
+/* Reset the <qcc> inactivity timeout for http-keep-alive timeout. */
+static forceinline void qcc_reset_idle_start(struct qcc *qcc)
+{
+ qcc->idle_start = now_ms;
+}
+
+/* Decrement <qcc> sc. */
+static forceinline void qcc_rm_sc(struct qcc *qcc)
+{
+ BUG_ON_HOT(!qcc->nb_sc);
+ --qcc->nb_sc;
+
+ /* Reset qcc idle start for http-keep-alive timeout. Timeout will be
+ * refreshed after this on stream detach.
+ */
+ if (!qcc->nb_sc && !qcc->nb_hreq)
+ qcc_reset_idle_start(qcc);
+}
+
+/* Decrement <qcc> hreq. */
+static forceinline void qcc_rm_hreq(struct qcc *qcc)
+{
+ BUG_ON_HOT(!qcc->nb_hreq);
+ --qcc->nb_hreq;
+
+ /* Reset qcc idle start for http-keep-alive timeout. Timeout will be
+ * refreshed after this on I/O handler.
+ */
+ if (!qcc->nb_sc && !qcc->nb_hreq)
+ qcc_reset_idle_start(qcc);
+}
+
+static inline int qcc_is_dead(const struct qcc *qcc)
+{
+ /* Mux connection is considered dead if :
+ * - all stream-desc are detached AND
+ * = connection is on error OR
+ * = mux timeout has already fired or is unset
+ */
+ if (!qcc->nb_sc && ((qcc->conn->flags & CO_FL_ERROR) || !qcc->task))
+ return 1;
+
+ return 0;
+}
+
+/* Return true if the mux timeout should be armed. */
+static inline int qcc_may_expire(struct qcc *qcc)
+{
+ return !qcc->nb_sc;
+}
+
+/* Refresh the timeout on <qcc> if needed depending on its state. */
+static void qcc_refresh_timeout(struct qcc *qcc)
+{
+ const struct proxy *px = qcc->proxy;
+
+ TRACE_ENTER(QMUX_EV_QCC_WAKE, qcc->conn);
+
+ if (!qcc->task) {
+ TRACE_DEVEL("already expired", QMUX_EV_QCC_WAKE, qcc->conn);
+ goto leave;
+ }
+
+ /* Check if upper layer is responsible of timeout management. */
+ if (!qcc_may_expire(qcc)) {
+ TRACE_DEVEL("not eligible for timeout", QMUX_EV_QCC_WAKE, qcc->conn);
+ qcc->task->expire = TICK_ETERNITY;
+ task_queue(qcc->task);
+ goto leave;
+ }
+
+ /* TODO if connection is idle on frontend and proxy is disabled, remove
+ * it with global close_spread delay applied.
+ */
+
+ /* TODO implement client/server-fin timeout for graceful shutdown */
+
+ /* Frontend timeout management
+ * - detached streams with data left to send -> default timeout
+ * - stream waiting on incomplete request or no stream yet activated -> timeout http-request
+ * - idle after stream processing -> timeout http-keep-alive
+ */
+ if (!conn_is_back(qcc->conn)) {
+ if (qcc->nb_hreq) {
+ TRACE_DEVEL("one or more requests still in progress", QMUX_EV_QCC_WAKE, qcc->conn);
+ qcc->task->expire = tick_add_ifset(now_ms, qcc->timeout);
+ task_queue(qcc->task);
+ goto leave;
+ }
+
+ if (!LIST_ISEMPTY(&qcc->opening_list) || unlikely(!qcc->largest_bidi_r)) {
+ int timeout = px->timeout.httpreq;
+ struct qcs *qcs = NULL;
+ int base_time;
+
+ /* Use start time of first stream waiting on HTTP or
+ * qcc idle if no stream not yet used.
+ */
+ if (likely(!LIST_ISEMPTY(&qcc->opening_list)))
+ qcs = LIST_ELEM(qcc->opening_list.n, struct qcs *, el_opening);
+ base_time = qcs ? qcs->start : qcc->idle_start;
+
+ TRACE_DEVEL("waiting on http request", QMUX_EV_QCC_WAKE, qcc->conn, qcs);
+ qcc->task->expire = tick_add_ifset(base_time, timeout);
+ }
+ else {
+ /* Use http-request timeout if keep-alive timeout not set */
+ int timeout = tick_isset(px->timeout.httpka) ?
+ px->timeout.httpka : px->timeout.httpreq;
+
+ TRACE_DEVEL("at least one request achieved but none currently in progress", QMUX_EV_QCC_WAKE, qcc->conn);
+ qcc->task->expire = tick_add_ifset(qcc->idle_start, timeout);
+ }
+ }
+
+ /* fallback to default timeout if frontend specific undefined or for
+ * backend connections.
+ */
+ if (!tick_isset(qcc->task->expire)) {
+ TRACE_DEVEL("fallback to default timeout", QMUX_EV_QCC_WAKE, qcc->conn);
+ qcc->task->expire = tick_add_ifset(now_ms, qcc->timeout);
+ }
+
+ task_queue(qcc->task);
+
+ leave:
+ TRACE_LEAVE(QMUX_EV_QCS_NEW, qcc->conn);
+}
+
+/* Mark a stream as open if it was idle. This can be used on every
+ * successful emission/reception operation to update the stream state.
+ */
+static void qcs_idle_open(struct qcs *qcs)
+{
+ /* This operation must not be used if the stream is already closed. */
+ BUG_ON_HOT(qcs->st == QC_SS_CLO);
+
+ if (qcs->st == QC_SS_IDLE) {
+ TRACE_STATE("opening stream", QMUX_EV_QCS_NEW, qcs->qcc->conn, qcs);
+ qcs->st = QC_SS_OPEN;
+ }
+}
+
+/* Close the local channel of <qcs> instance. */
+static void qcs_close_local(struct qcs *qcs)
+{
+ TRACE_STATE("closing stream locally", QMUX_EV_QCS_SEND, qcs->qcc->conn, qcs);
+
+ /* The stream must have already been opened. */
+ BUG_ON_HOT(qcs->st == QC_SS_IDLE);
+
+ /* This operation cannot be used multiple times. */
+ BUG_ON_HOT(qcs->st == QC_SS_HLOC || qcs->st == QC_SS_CLO);
+
+ if (quic_stream_is_bidi(qcs->id)) {
+ qcs->st = (qcs->st == QC_SS_HREM) ? QC_SS_CLO : QC_SS_HLOC;
+
+ if (qcs->flags & QC_SF_HREQ_RECV)
+ qcc_rm_hreq(qcs->qcc);
+ }
+ else {
+ /* Only local uni streams are valid for this operation. */
+ BUG_ON_HOT(quic_stream_is_remote(qcs->qcc, qcs->id));
+ qcs->st = QC_SS_CLO;
+ }
+}
+
+/* Close the remote channel of <qcs> instance. */
+static void qcs_close_remote(struct qcs *qcs)
+{
+ TRACE_STATE("closing stream remotely", QMUX_EV_QCS_RECV, qcs->qcc->conn, qcs);
+
+ /* The stream must have already been opened. */
+ BUG_ON_HOT(qcs->st == QC_SS_IDLE);
+
+ /* This operation cannot be used multiple times. */
+ BUG_ON_HOT(qcs->st == QC_SS_HREM || qcs->st == QC_SS_CLO);
+
+ if (quic_stream_is_bidi(qcs->id)) {
+ qcs->st = (qcs->st == QC_SS_HLOC) ? QC_SS_CLO : QC_SS_HREM;
+ }
+ else {
+ /* Only remote uni streams are valid for this operation. */
+ BUG_ON_HOT(quic_stream_is_local(qcs->qcc, qcs->id));
+ qcs->st = QC_SS_CLO;
+ }
+}
+
+static int qcs_is_close_local(struct qcs *qcs)
+{
+ return qcs->st == QC_SS_HLOC || qcs->st == QC_SS_CLO;
+}
+
+static __maybe_unused int qcs_is_close_remote(struct qcs *qcs)
+{
+ return qcs->st == QC_SS_HREM || qcs->st == QC_SS_CLO;
+}
+
+struct buffer *qc_get_buf(struct qcs *qcs, struct buffer *bptr)
+{
+ struct buffer *buf = b_alloc(bptr);
+ BUG_ON(!buf);
+ return buf;
+}
+
+static struct ncbuf *qc_get_ncbuf(struct qcs *qcs, struct ncbuf *ncbuf)
+{
+ struct buffer buf = BUF_NULL;
+
+ if (ncb_is_null(ncbuf)) {
+ b_alloc(&buf);
+ BUG_ON(b_is_null(&buf));
+
+ *ncbuf = ncb_make(buf.area, buf.size, 0);
+ ncb_init(ncbuf, 0);
+ }
+
+ return ncbuf;
+}
+
+/* Notify an eventual subscriber on <qcs> or else wakeup up the stconn layer if
+ * initialized.
+ */
+static void qcs_alert(struct qcs *qcs)
+{
+ if (qcs->subs) {
+ qcs_notify_recv(qcs);
+ qcs_notify_send(qcs);
+ }
+ else if (qcs_sc(qcs) && qcs->sd->sc->app_ops->wake) {
+ qcs->sd->sc->app_ops->wake(qcs->sd->sc);
+ }
+}
+
+int qcs_subscribe(struct qcs *qcs, int event_type, struct wait_event *es)
+{
+ struct qcc *qcc = qcs->qcc;
+
+ TRACE_ENTER(QMUX_EV_STRM_SEND|QMUX_EV_STRM_RECV, qcc->conn, qcs);
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(qcs->subs && qcs->subs != es);
+
+ es->events |= event_type;
+ qcs->subs = es;
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("subscribe(recv)", QMUX_EV_STRM_RECV, qcc->conn, qcs);
+
+ if (event_type & SUB_RETRY_SEND)
+ TRACE_DEVEL("subscribe(send)", QMUX_EV_STRM_SEND, qcc->conn, qcs);
+
+ TRACE_LEAVE(QMUX_EV_STRM_SEND|QMUX_EV_STRM_RECV, qcc->conn, qcs);
+
+ return 0;
+}
+
+void qcs_notify_recv(struct qcs *qcs)
+{
+ if (qcs->subs && qcs->subs->events & SUB_RETRY_RECV) {
+ tasklet_wakeup(qcs->subs->tasklet);
+ qcs->subs->events &= ~SUB_RETRY_RECV;
+ if (!qcs->subs->events)
+ qcs->subs = NULL;
+ }
+}
+
+void qcs_notify_send(struct qcs *qcs)
+{
+ if (qcs->subs && qcs->subs->events & SUB_RETRY_SEND) {
+ tasklet_wakeup(qcs->subs->tasklet);
+ qcs->subs->events &= ~SUB_RETRY_SEND;
+ if (!qcs->subs->events)
+ qcs->subs = NULL;
+ }
+}
+
+/* Open a locally initiated stream for the connection <qcc>. Set <bidi> for a
+ * bidirectional stream, else an unidirectional stream is opened. The next
+ * available ID on the connection will be used according to the stream type.
+ *
+ * Returns the allocated stream instance or NULL on error.
+ */
+struct qcs *qcc_init_stream_local(struct qcc *qcc, int bidi)
+{
+ struct qcs *qcs;
+ enum qcs_type type;
+ uint64_t *next;
+
+ TRACE_ENTER(QMUX_EV_QCS_NEW, qcc->conn);
+
+ if (bidi) {
+ next = &qcc->next_bidi_l;
+ type = conn_is_back(qcc->conn) ? QCS_CLT_BIDI : QCS_SRV_BIDI;
+ }
+ else {
+ next = &qcc->next_uni_l;
+ type = conn_is_back(qcc->conn) ? QCS_CLT_UNI : QCS_SRV_UNI;
+ }
+
+ /* TODO ensure that we won't overflow remote peer flow control limit on
+ * streams. Else, we should emit a STREAMS_BLOCKED frame.
+ */
+
+ qcs = qcs_new(qcc, *next, type);
+ if (!qcs) {
+ TRACE_LEAVE(QMUX_EV_QCS_NEW, qcc->conn);
+ return NULL;
+ }
+
+ TRACE_PROTO("opening local stream", QMUX_EV_QCS_NEW, qcc->conn, qcs);
+ *next += 4;
+
+ TRACE_LEAVE(QMUX_EV_QCS_NEW, qcc->conn, qcs);
+ return qcs;
+}
+
+/* Open a remote initiated stream for the connection <qcc> with ID <id>. The
+ * caller is responsible to ensure that a stream with the same ID was not
+ * already opened. This function will also create all intermediaries streams
+ * with ID smaller than <id> not already opened before.
+ *
+ * Returns the allocated stream instance or NULL on error.
+ */
+static struct qcs *qcc_init_stream_remote(struct qcc *qcc, uint64_t id)
+{
+ struct qcs *qcs = NULL;
+ enum qcs_type type;
+ uint64_t *largest, max_id;
+
+ TRACE_ENTER(QMUX_EV_QCS_NEW, qcc->conn);
+
+ BUG_ON_HOT(quic_stream_is_local(qcc, id));
+
+ if (quic_stream_is_bidi(id)) {
+ largest = &qcc->largest_bidi_r;
+ type = conn_is_back(qcc->conn) ? QCS_SRV_BIDI : QCS_CLT_BIDI;
+ }
+ else {
+ largest = &qcc->largest_uni_r;
+ type = conn_is_back(qcc->conn) ? QCS_SRV_UNI : QCS_CLT_UNI;
+ }
+
+ /* RFC 9000 4.6. Controlling Concurrency
+ *
+ * An endpoint that receives a frame with a stream ID exceeding the
+ * limit it has sent MUST treat this as a connection error of type
+ * STREAM_LIMIT_ERROR
+ */
+ max_id = quic_stream_is_bidi(id) ? qcc->lfctl.ms_bidi * 4 :
+ qcc->lfctl.ms_uni * 4;
+ if (id >= max_id) {
+ TRACE_ERROR("flow control error", QMUX_EV_QCS_NEW|QMUX_EV_PROTO_ERR, qcc->conn);
+ qcc_emit_cc(qcc, QC_ERR_STREAM_LIMIT_ERROR);
+ goto err;
+ }
+
+ /* Only stream ID not already opened can be used. */
+ BUG_ON(id < *largest);
+
+ while (id >= *largest) {
+ const char *str = *largest < id ? "initializing intermediary remote stream" :
+ "initializing remote stream";
+
+ qcs = qcs_new(qcc, *largest, type);
+ if (!qcs) {
+ /* TODO emit RESET_STREAM */
+ TRACE_ERROR("stream fallocation failure", QMUX_EV_QCS_NEW, qcc->conn);
+ goto err;
+ }
+
+ TRACE_PROTO(str, QMUX_EV_QCS_NEW, qcc->conn, qcs);
+ *largest += 4;
+ }
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCS_NEW, qcc->conn, qcs);
+ return qcs;
+
+ err:
+ TRACE_LEAVE(QMUX_EV_QCS_NEW, qcc->conn);
+ return NULL;
+}
+
+/* Use this function for a stream <id> which is not in <qcc> stream tree. It
+ * returns true if the associated stream is closed.
+ */
+static int qcc_stream_id_is_closed(struct qcc *qcc, uint64_t id)
+{
+ uint64_t *largest;
+
+ /* This function must only be used for stream not present in the stream tree. */
+ BUG_ON_HOT(eb64_lookup(&qcc->streams_by_id, id));
+
+ if (quic_stream_is_local(qcc, id)) {
+ largest = quic_stream_is_uni(id) ? &qcc->next_uni_l :
+ &qcc->next_bidi_l;
+ }
+ else {
+ largest = quic_stream_is_uni(id) ? &qcc->largest_uni_r :
+ &qcc->largest_bidi_r;
+ }
+
+ return id < *largest;
+}
+
+/* Retrieve the stream instance from <id> ID. This can be used when receiving
+ * STREAM, STREAM_DATA_BLOCKED, RESET_STREAM, MAX_STREAM_DATA or STOP_SENDING
+ * frames. Set to false <receive_only> or <send_only> if these particular types
+ * of streams are not allowed. If the stream instance is found, it is stored in
+ * <out>.
+ *
+ * Returns 0 on success else non-zero. On error, a RESET_STREAM or a
+ * CONNECTION_CLOSE is automatically emitted. Beware that <out> may be NULL
+ * on success if the stream has already been closed.
+ */
+int qcc_get_qcs(struct qcc *qcc, uint64_t id, int receive_only, int send_only,
+ struct qcs **out)
+{
+ struct eb64_node *node;
+
+ TRACE_ENTER(QMUX_EV_QCC_RECV, qcc->conn);
+ *out = NULL;
+
+ if (!receive_only && quic_stream_is_uni(id) && quic_stream_is_remote(qcc, id)) {
+ TRACE_ERROR("receive-only stream not allowed", QMUX_EV_QCC_RECV|QMUX_EV_QCC_NQCS|QMUX_EV_PROTO_ERR, qcc->conn, NULL, &id);
+ qcc_emit_cc(qcc, QC_ERR_STREAM_STATE_ERROR);
+ goto err;
+ }
+
+ if (!send_only && quic_stream_is_uni(id) && quic_stream_is_local(qcc, id)) {
+ TRACE_ERROR("send-only stream not allowed", QMUX_EV_QCC_RECV|QMUX_EV_QCC_NQCS|QMUX_EV_PROTO_ERR, qcc->conn, NULL, &id);
+ qcc_emit_cc(qcc, QC_ERR_STREAM_STATE_ERROR);
+ goto err;
+ }
+
+ /* Search the stream in the connection tree. */
+ node = eb64_lookup(&qcc->streams_by_id, id);
+ if (node) {
+ *out = eb64_entry(node, struct qcs, by_id);
+ TRACE_DEVEL("using stream from connection tree", QMUX_EV_QCC_RECV, qcc->conn, *out);
+ goto out;
+ }
+
+ /* Check if stream is already closed. */
+ if (qcc_stream_id_is_closed(qcc, id)) {
+ TRACE_DATA("already closed stream", QMUX_EV_QCC_RECV|QMUX_EV_QCC_NQCS, qcc->conn, NULL, &id);
+ /* Consider this as a success even if <out> is left NULL. */
+ goto out;
+ }
+
+ /* Create the stream. This is valid only for remote initiated one. A
+ * local stream must have already been explicitely created by the
+ * application protocol layer.
+ */
+ if (quic_stream_is_local(qcc, id)) {
+ /* RFC 9000 19.8. STREAM Frames
+ *
+ * An endpoint MUST terminate the connection with error
+ * STREAM_STATE_ERROR if it receives a STREAM frame for a locally
+ * initiated stream that has not yet been created, or for a send-only
+ * stream.
+ */
+ TRACE_ERROR("locally initiated stream not yet created", QMUX_EV_QCC_RECV|QMUX_EV_QCC_NQCS|QMUX_EV_PROTO_ERR, qcc->conn, NULL, &id);
+ qcc_emit_cc(qcc, QC_ERR_STREAM_STATE_ERROR);
+ goto err;
+ }
+ else {
+ /* Remote stream not found - try to open it. */
+ *out = qcc_init_stream_remote(qcc, id);
+ if (!*out) {
+ TRACE_ERROR("stream creation error", QMUX_EV_QCC_RECV|QMUX_EV_QCC_NQCS, qcc->conn, NULL, &id);
+ goto err;
+ }
+ }
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn, *out);
+ return 0;
+
+ err:
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 1;
+}
+
+/* Simple function to duplicate a buffer */
+static inline struct buffer qcs_b_dup(const struct ncbuf *b)
+{
+ return b_make(ncb_orig(b), b->size, b->head, ncb_data(b, 0));
+}
+
+/* Remove <bytes> from <qcs> Rx buffer. Flow-control for received offsets may
+ * be allocated for the peer if needed.
+ */
+static void qcs_consume(struct qcs *qcs, uint64_t bytes)
+{
+ struct qcc *qcc = qcs->qcc;
+ struct quic_frame *frm;
+ struct ncbuf *buf = &qcs->rx.ncbuf;
+ enum ncb_ret ret;
+
+ TRACE_ENTER(QMUX_EV_QCS_RECV, qcc->conn, qcs);
+
+ ret = ncb_advance(buf, bytes);
+ if (ret) {
+ ABORT_NOW(); /* should not happens because removal only in data */
+ }
+
+ if (ncb_is_empty(buf))
+ qc_free_ncbuf(qcs, buf);
+
+ qcs->rx.offset += bytes;
+ if (qcs->rx.msd - qcs->rx.offset < qcs->rx.msd_init / 2) {
+ TRACE_DATA("increase stream credit via MAX_STREAM_DATA", QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ frm = pool_zalloc(pool_head_quic_frame);
+ BUG_ON(!frm); /* TODO handle this properly */
+
+ qcs->rx.msd = qcs->rx.offset + qcs->rx.msd_init;
+
+ LIST_INIT(&frm->reflist);
+ frm->type = QUIC_FT_MAX_STREAM_DATA;
+ frm->max_stream_data.id = qcs->id;
+ frm->max_stream_data.max_stream_data = qcs->rx.msd;
+
+ LIST_APPEND(&qcc->lfctl.frms, &frm->list);
+ tasklet_wakeup(qcc->wait_event.tasklet);
+ }
+
+ qcc->lfctl.offsets_consume += bytes;
+ if (qcc->lfctl.md - qcc->lfctl.offsets_consume < qcc->lfctl.md_init / 2) {
+ TRACE_DATA("increase conn credit via MAX_DATA", QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ frm = pool_zalloc(pool_head_quic_frame);
+ BUG_ON(!frm); /* TODO handle this properly */
+
+ qcc->lfctl.md = qcc->lfctl.offsets_consume + qcc->lfctl.md_init;
+
+ LIST_INIT(&frm->reflist);
+ frm->type = QUIC_FT_MAX_DATA;
+ frm->max_data.max_data = qcc->lfctl.md;
+
+ LIST_APPEND(&qcs->qcc->lfctl.frms, &frm->list);
+ tasklet_wakeup(qcs->qcc->wait_event.tasklet);
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCS_RECV, qcc->conn, qcs);
+}
+
+/* Decode the content of STREAM frames already received on the stream instance
+ * <qcs>.
+ *
+ * Returns 0 on success else non-zero.
+ */
+static int qcc_decode_qcs(struct qcc *qcc, struct qcs *qcs)
+{
+ struct buffer b;
+ ssize_t ret;
+ int fin = 0;
+
+ TRACE_ENTER(QMUX_EV_QCS_RECV, qcc->conn, qcs);
+
+ b = qcs_b_dup(&qcs->rx.ncbuf);
+
+ /* Signal FIN to application if STREAM FIN received with all data. */
+ if (qcs_is_close_remote(qcs))
+ fin = 1;
+
+ ret = qcc->app_ops->decode_qcs(qcs, &b, fin);
+ if (ret < 0) {
+ TRACE_ERROR("decoding error", QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ goto err;
+ }
+
+ if (ret)
+ qcs_consume(qcs, ret);
+ if (ret || (!b_data(&b) && fin))
+ qcs_notify_recv(qcs);
+
+ TRACE_LEAVE(QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ return 0;
+
+ err:
+ TRACE_LEAVE(QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ return 1;
+}
+
+/* Emit a CONNECTION_CLOSE_APP with error <err>. Reserved for application error
+ * code. To close the connection right away, set <immediate> : this is useful
+ * when dealing with a connection fatal error. Else a graceful shutdown will be
+ * conducted : the error-code is only registered. The lower layer is
+ * responsible to close the connection when deemed suitable. Note that in this
+ * case the error code might be overwritten if an immediate close is requested
+ * in the interval.
+ */
+void qcc_emit_cc_app(struct qcc *qcc, int err, int immediate)
+{
+ TRACE_ENTER(QMUX_EV_QCC_END, qcc->conn);
+
+ if (immediate) {
+ quic_set_connection_close(qcc->conn->handle.qc, quic_err_app(err));
+ qcc->flags |= QC_CF_CC_EMIT;
+ tasklet_wakeup(qcc->wait_event.tasklet);
+ }
+ else {
+ /* Only register the error code for graceful shutdown. */
+ qcc->conn->handle.qc->err = quic_err_app(err);
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCC_END, qcc->conn);
+}
+
+/* Prepare for the emission of RESET_STREAM on <qcs> with error code <err>. */
+void qcc_reset_stream(struct qcs *qcs, int err)
+{
+ struct qcc *qcc = qcs->qcc;
+
+ if ((qcs->flags & QC_SF_TO_RESET) || qcs_is_close_local(qcs))
+ return;
+
+ TRACE_STATE("reset stream", QMUX_EV_QCS_END, qcc->conn, qcs);
+ qcs->flags |= QC_SF_TO_RESET;
+ qcs->err = err;
+
+ /* Remove prepared stream data from connection flow-control calcul. */
+ if (qcs->tx.offset > qcs->tx.sent_offset) {
+ const uint64_t diff = qcs->tx.offset - qcs->tx.sent_offset;
+ BUG_ON(qcc->tx.offsets - diff < qcc->tx.sent_offsets);
+ qcc->tx.offsets -= diff;
+ /* Reset qcs offset to prevent BUG_ON() on qcs_destroy(). */
+ qcs->tx.offset = qcs->tx.sent_offset;
+ }
+
+ tasklet_wakeup(qcc->wait_event.tasklet);
+}
+
+/* Install the <app_ops> applicative layer of a QUIC connection on mux <qcc>.
+ * Returns 0 on success else non-zero.
+ */
+int qcc_install_app_ops(struct qcc *qcc, const struct qcc_app_ops *app_ops)
+{
+ TRACE_ENTER(QMUX_EV_QCC_NEW, qcc->conn);
+
+ qcc->app_ops = app_ops;
+ if (qcc->app_ops->init && !qcc->app_ops->init(qcc)) {
+ TRACE_ERROR("app ops init error", QMUX_EV_QCC_NEW, qcc->conn);
+ goto err;
+ }
+
+ TRACE_PROTO("application layer initialized", QMUX_EV_QCC_NEW, qcc->conn);
+
+ TRACE_LEAVE(QMUX_EV_QCC_NEW, qcc->conn);
+ return 0;
+
+ err:
+ TRACE_LEAVE(QMUX_EV_QCC_NEW, qcc->conn);
+ return 1;
+}
+
+/* Handle a new STREAM frame for stream with id <id>. Payload is pointed by
+ * <data> with length <len> and represents the offset <offset>. <fin> is set if
+ * the QUIC frame FIN bit is set.
+ *
+ * Returns 0 on success else non-zero. On error, the received frame should not
+ * be acknowledged.
+ */
+int qcc_recv(struct qcc *qcc, uint64_t id, uint64_t len, uint64_t offset,
+ char fin, char *data)
+{
+ struct qcs *qcs;
+ enum ncb_ret ret;
+
+ TRACE_ENTER(QMUX_EV_QCC_RECV, qcc->conn);
+
+ if (qcc->flags & QC_CF_CC_EMIT) {
+ TRACE_DATA("connection closed", QMUX_EV_QCC_RECV, qcc->conn);
+ goto err;
+ }
+
+ /* RFC 9000 19.8. STREAM Frames
+ *
+ * An endpoint MUST terminate the connection with error
+ * STREAM_STATE_ERROR if it receives a STREAM frame for a locally
+ * initiated stream that has not yet been created, or for a send-only
+ * stream.
+ */
+ if (qcc_get_qcs(qcc, id, 1, 0, &qcs)) {
+ TRACE_DATA("qcs retrieval error", QMUX_EV_QCC_RECV, qcc->conn);
+ goto err;
+ }
+
+ if (!qcs) {
+ TRACE_DATA("already closed stream", QMUX_EV_QCC_RECV, qcc->conn);
+ goto out;
+ }
+
+ /* RFC 9000 4.5. Stream Final Size
+ *
+ * Once a final size for a stream is known, it cannot change. If a
+ * RESET_STREAM or STREAM frame is received indicating a change in the
+ * final size for the stream, an endpoint SHOULD respond with an error
+ * of type FINAL_SIZE_ERROR; see Section 11 for details on error
+ * handling.
+ */
+ if (qcs->flags & QC_SF_SIZE_KNOWN &&
+ (offset + len > qcs->rx.offset_max || (fin && offset + len < qcs->rx.offset_max))) {
+ TRACE_ERROR("final size error", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV|QMUX_EV_PROTO_ERR, qcc->conn, qcs);
+ qcc_emit_cc(qcc, QC_ERR_FINAL_SIZE_ERROR);
+ goto err;
+ }
+
+ if (offset + len < qcs->rx.offset ||
+ (offset + len == qcs->rx.offset && (!fin || (qcs->flags & QC_SF_SIZE_KNOWN)))) {
+ TRACE_DATA("already received offset", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ goto out;
+ }
+
+ TRACE_PROTO("receiving STREAM", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ qcs_idle_open(qcs);
+
+ if (offset + len > qcs->rx.offset_max) {
+ uint64_t diff = offset + len - qcs->rx.offset_max;
+ qcs->rx.offset_max = offset + len;
+ qcc->lfctl.offsets_recv += diff;
+
+ if (offset + len > qcs->rx.msd ||
+ qcc->lfctl.offsets_recv > qcc->lfctl.md) {
+ /* RFC 9000 4.1. Data Flow Control
+ *
+ * A receiver MUST close the connection with an error
+ * of type FLOW_CONTROL_ERROR if the sender violates
+ * the advertised connection or stream data limits
+ */
+ TRACE_ERROR("flow control error", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV|QMUX_EV_PROTO_ERR,
+ qcc->conn, qcs);
+ qcc_emit_cc(qcc, QC_ERR_FLOW_CONTROL_ERROR);
+ goto err;
+ }
+ }
+
+ if (!qc_get_ncbuf(qcs, &qcs->rx.ncbuf) || ncb_is_null(&qcs->rx.ncbuf)) {
+ /* TODO should mark qcs as full */
+ ABORT_NOW();
+ return 1;
+ }
+
+ TRACE_DATA("newly received offset", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ if (offset < qcs->rx.offset) {
+ size_t diff = qcs->rx.offset - offset;
+
+ len -= diff;
+ data += diff;
+ offset = qcs->rx.offset;
+ }
+
+ if (len) {
+ ret = ncb_add(&qcs->rx.ncbuf, offset - qcs->rx.offset, data, len, NCB_ADD_COMPARE);
+ switch (ret) {
+ case NCB_RET_OK:
+ break;
+
+ case NCB_RET_DATA_REJ:
+ /* RFC 9000 2.2. Sending and Receiving Data
+ *
+ * An endpoint could receive data for a stream at the
+ * same stream offset multiple times. Data that has
+ * already been received can be discarded. The data at
+ * a given offset MUST NOT change if it is sent
+ * multiple times; an endpoint MAY treat receipt of
+ * different data at the same offset within a stream as
+ * a connection error of type PROTOCOL_VIOLATION.
+ */
+ TRACE_ERROR("overlapping data rejected", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV|QMUX_EV_PROTO_ERR,
+ qcc->conn, qcs);
+ qcc_emit_cc(qcc, QC_ERR_PROTOCOL_VIOLATION);
+ return 1;
+
+ case NCB_RET_GAP_SIZE:
+ TRACE_DATA("cannot bufferize frame due to gap size limit", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV,
+ qcc->conn, qcs);
+ return 1;
+ }
+ }
+
+ if (fin)
+ qcs->flags |= QC_SF_SIZE_KNOWN;
+
+ if (qcs->flags & QC_SF_SIZE_KNOWN &&
+ qcs->rx.offset_max == qcs->rx.offset + ncb_data(&qcs->rx.ncbuf, 0)) {
+ qcs_close_remote(qcs);
+ }
+
+ if ((ncb_data(&qcs->rx.ncbuf, 0) && !(qcs->flags & QC_SF_DEM_FULL)) || fin) {
+ qcc_decode_qcs(qcc, qcs);
+ qcc_refresh_timeout(qcc);
+ }
+
+ if (qcs->flags & QC_SF_READ_ABORTED) {
+ /* TODO should send a STOP_SENDING */
+ qcs_free(qcs);
+ }
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 0;
+
+ err:
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 1;
+}
+
+/* Handle a new MAX_DATA frame. <max> must contains the maximum data field of
+ * the frame.
+ *
+ * Returns 0 on success else non-zero.
+ */
+int qcc_recv_max_data(struct qcc *qcc, uint64_t max)
+{
+ TRACE_ENTER(QMUX_EV_QCC_RECV, qcc->conn);
+
+ TRACE_PROTO("receiving MAX_DATA", QMUX_EV_QCC_RECV, qcc->conn);
+ if (qcc->rfctl.md < max) {
+ qcc->rfctl.md = max;
+ TRACE_DEVEL("increase remote max-data", QMUX_EV_QCC_RECV, qcc->conn);
+
+ if (qcc->flags & QC_CF_BLK_MFCTL) {
+ qcc->flags &= ~QC_CF_BLK_MFCTL;
+ tasklet_wakeup(qcc->wait_event.tasklet);
+ }
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 0;
+}
+
+/* Handle a new MAX_STREAM_DATA frame. <max> must contains the maximum data
+ * field of the frame and <id> is the identifier of the QUIC stream.
+ *
+ * Returns 0 on success else non-zero. On error, the received frame should not
+ * be acknowledged.
+ */
+int qcc_recv_max_stream_data(struct qcc *qcc, uint64_t id, uint64_t max)
+{
+ struct qcs *qcs;
+
+ TRACE_ENTER(QMUX_EV_QCC_RECV, qcc->conn);
+
+ /* RFC 9000 19.10. MAX_STREAM_DATA Frames
+ *
+ * Receiving a MAX_STREAM_DATA frame for a locally
+ * initiated stream that has not yet been created MUST be treated as a
+ * connection error of type STREAM_STATE_ERROR. An endpoint that
+ * receives a MAX_STREAM_DATA frame for a receive-only stream MUST
+ * terminate the connection with error STREAM_STATE_ERROR.
+ */
+ if (qcc_get_qcs(qcc, id, 0, 1, &qcs)) {
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 1;
+ }
+
+ if (qcs) {
+ TRACE_PROTO("receiving MAX_STREAM_DATA", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ if (max > qcs->tx.msd) {
+ qcs->tx.msd = max;
+ TRACE_DEVEL("increase remote max-stream-data", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+
+ if (qcs->flags & QC_SF_BLK_SFCTL) {
+ qcs->flags &= ~QC_SF_BLK_SFCTL;
+ tasklet_wakeup(qcc->wait_event.tasklet);
+ }
+ }
+ }
+
+ if (qcc_may_expire(qcc) && !qcc->nb_hreq)
+ qcc_refresh_timeout(qcc);
+
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 0;
+}
+
+/* Handle a new STOP_SENDING frame for stream ID <id>. The error code should be
+ * specified in <err>.
+ *
+ * Returns 0 on success else non-zero. On error, the received frame should not
+ * be acknowledged.
+ */
+int qcc_recv_stop_sending(struct qcc *qcc, uint64_t id, uint64_t err)
+{
+ struct qcs *qcs;
+
+ TRACE_ENTER(QMUX_EV_QCC_RECV, qcc->conn);
+
+ /* RFC 9000 19.5. STOP_SENDING Frames
+ *
+ * Receiving a STOP_SENDING frame for a
+ * locally initiated stream that has not yet been created MUST be
+ * treated as a connection error of type STREAM_STATE_ERROR. An
+ * endpoint that receives a STOP_SENDING frame for a receive-only stream
+ * MUST terminate the connection with error STREAM_STATE_ERROR.
+ */
+ if (qcc_get_qcs(qcc, id, 0, 1, &qcs)) {
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 1;
+ }
+
+ if (!qcs)
+ goto out;
+
+ TRACE_PROTO("receiving STOP_SENDING", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+
+ /* RFC 9000 3.5. Solicited State Transitions
+ *
+ * An endpoint is expected to send another STOP_SENDING frame if a
+ * packet containing a previous STOP_SENDING is lost. However, once
+ * either all stream data or a RESET_STREAM frame has been received for
+ * the stream -- that is, the stream is in any state other than "Recv"
+ * or "Size Known" -- sending a STOP_SENDING frame is unnecessary.
+ */
+
+ /* TODO thanks to previous RFC clause, STOP_SENDING is ignored if current stream
+ * has already been closed locally. This is useful to not emit multiple
+ * RESET_STREAM for a single stream. This is functional if stream is
+ * locally closed due to all data transmitted, but in this case the RFC
+ * advices to use an explicit RESET_STREAM.
+ */
+ if (qcs_is_close_local(qcs)) {
+ TRACE_STATE("ignoring STOP_SENDING", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ goto out;
+ }
+
+ qcs_idle_open(qcs);
+
+ if (qcc->app_ops->close) {
+ if (qcc->app_ops->close(qcs, QCC_APP_OPS_CLOSE_SIDE_WR)) {
+ TRACE_ERROR("closure rejected by app layer", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ goto out;
+ }
+ }
+
+ /* RFC 9000 3.5. Solicited State Transitions
+ *
+ * An endpoint that receives a STOP_SENDING frame
+ * MUST send a RESET_STREAM frame if the stream is in the "Ready" or
+ * "Send" state. If the stream is in the "Data Sent" state, the
+ * endpoint MAY defer sending the RESET_STREAM frame until the packets
+ * containing outstanding data are acknowledged or declared lost. If
+ * any outstanding data is declared lost, the endpoint SHOULD send a
+ * RESET_STREAM frame instead of retransmitting the data.
+ *
+ * An endpoint SHOULD copy the error code from the STOP_SENDING frame to
+ * the RESET_STREAM frame it sends, but it can use any application error
+ * code.
+ */
+ qcc_reset_stream(qcs, err);
+
+ if (qcc_may_expire(qcc) && !qcc->nb_hreq)
+ qcc_refresh_timeout(qcc);
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 0;
+}
+
+/* Signal the closing of remote stream with id <id>. Flow-control for new
+ * streams may be allocated for the peer if needed.
+ */
+static int qcc_release_remote_stream(struct qcc *qcc, uint64_t id)
+{
+ struct quic_frame *frm;
+
+ TRACE_ENTER(QMUX_EV_QCS_END, qcc->conn);
+
+ if (quic_stream_is_bidi(id)) {
+ ++qcc->lfctl.cl_bidi_r;
+ if (qcc->lfctl.cl_bidi_r > qcc->lfctl.ms_bidi_init / 2) {
+ TRACE_DATA("increase max stream limit with MAX_STREAMS_BIDI", QMUX_EV_QCC_SEND, qcc->conn);
+ frm = pool_zalloc(pool_head_quic_frame);
+ BUG_ON(!frm); /* TODO handle this properly */
+
+ LIST_INIT(&frm->reflist);
+ frm->type = QUIC_FT_MAX_STREAMS_BIDI;
+ frm->max_streams_bidi.max_streams = qcc->lfctl.ms_bidi +
+ qcc->lfctl.cl_bidi_r;
+ LIST_APPEND(&qcc->lfctl.frms, &frm->list);
+ tasklet_wakeup(qcc->wait_event.tasklet);
+
+ qcc->lfctl.ms_bidi += qcc->lfctl.cl_bidi_r;
+ qcc->lfctl.cl_bidi_r = 0;
+ }
+ }
+ else {
+ /* TODO unidirectional stream flow control with MAX_STREAMS_UNI
+ * emission not implemented. It should be unnecessary for
+ * HTTP/3 but may be required if other application protocols
+ * are supported.
+ */
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCS_END, qcc->conn);
+
+ return 0;
+}
+
+/* detaches the QUIC stream from its QCC and releases it to the QCS pool. */
+static void qcs_destroy(struct qcs *qcs)
+{
+ struct connection *conn = qcs->qcc->conn;
+ const uint64_t id = qcs->id;
+
+ TRACE_ENTER(QMUX_EV_QCS_END, conn, qcs);
+
+ /* MUST not removed a stream with sending prepared data left. This is
+ * to ensure consistency on connection flow-control calculation.
+ */
+ BUG_ON(qcs->tx.offset < qcs->tx.sent_offset);
+
+ if (quic_stream_is_remote(qcs->qcc, id))
+ qcc_release_remote_stream(qcs->qcc, id);
+
+ qcs_free(qcs);
+
+ TRACE_LEAVE(QMUX_EV_QCS_END, conn);
+}
+
+/* Transfer as much as possible data on <qcs> from <in> to <out>. This is done
+ * in respect with available flow-control at stream and connection level.
+ *
+ * Returns the total bytes of transferred data.
+ */
+static int qcs_xfer_data(struct qcs *qcs, struct buffer *out, struct buffer *in)
+{
+ struct qcc *qcc = qcs->qcc;
+ int left, to_xfer;
+ int total = 0;
+
+ TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+
+ qc_get_buf(qcs, out);
+
+ /*
+ * QCS out buffer diagram
+ * head left to_xfer
+ * -------------> ----------> ----->
+ * --------------------------------------------------
+ * |...............|xxxxxxxxxxx|<<<<<
+ * --------------------------------------------------
+ * ^ ack-off ^ sent-off ^ off
+ *
+ * STREAM frame
+ * ^ ^
+ * |xxxxxxxxxxxxxxxxx|
+ */
+
+ BUG_ON_HOT(qcs->tx.sent_offset < qcs->stream->ack_offset);
+ BUG_ON_HOT(qcs->tx.offset < qcs->tx.sent_offset);
+ BUG_ON_HOT(qcc->tx.offsets < qcc->tx.sent_offsets);
+
+ left = qcs->tx.offset - qcs->tx.sent_offset;
+ to_xfer = QUIC_MIN(b_data(in), b_room(out));
+
+ BUG_ON_HOT(qcs->tx.offset > qcs->tx.msd);
+ /* do not exceed flow control limit */
+ if (qcs->tx.offset + to_xfer > qcs->tx.msd)
+ to_xfer = qcs->tx.msd - qcs->tx.offset;
+
+ BUG_ON_HOT(qcc->tx.offsets > qcc->rfctl.md);
+ /* do not overcome flow control limit on connection */
+ if (qcc->tx.offsets + to_xfer > qcc->rfctl.md)
+ to_xfer = qcc->rfctl.md - qcc->tx.offsets;
+
+ if (!left && !to_xfer)
+ goto out;
+
+ total = b_force_xfer(out, in, to_xfer);
+
+ out:
+ {
+ struct qcs_xfer_data_trace_arg arg = {
+ .prep = b_data(out), .xfer = total,
+ };
+ TRACE_LEAVE(QMUX_EV_QCS_SEND|QMUX_EV_QCS_XFER_DATA,
+ qcc->conn, qcs, &arg);
+ }
+
+ return total;
+}
+
+/* Prepare a STREAM frame for <qcs> instance using <out> as payload. The frame
+ * is appended in <frm_list>. Set <fin> if this is supposed to be the last
+ * stream frame.
+ *
+ * Returns the length of the STREAM frame or a negative error code.
+ */
+static int qcs_build_stream_frm(struct qcs *qcs, struct buffer *out, char fin,
+ struct list *frm_list)
+{
+ struct qcc *qcc = qcs->qcc;
+ struct quic_frame *frm;
+ int head, total;
+ uint64_t base_off;
+
+ TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+
+ /* if ack_offset < buf_offset, it points to an older buffer. */
+ base_off = MAX(qcs->stream->buf_offset, qcs->stream->ack_offset);
+ BUG_ON(qcs->tx.sent_offset < base_off);
+
+ head = qcs->tx.sent_offset - base_off;
+ total = b_data(out) - head;
+ BUG_ON(total < 0);
+
+ if (!total && !fin) {
+ /* No need to send anything if total is NULL and no FIN to signal. */
+ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ return 0;
+ }
+ BUG_ON((!total && qcs->tx.sent_offset > qcs->tx.offset) ||
+ (total && qcs->tx.sent_offset >= qcs->tx.offset));
+ BUG_ON(qcs->tx.sent_offset + total > qcs->tx.offset);
+ BUG_ON(qcc->tx.sent_offsets + total > qcc->rfctl.md);
+
+ TRACE_PROTO("sending STREAM frame", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ frm = pool_zalloc(pool_head_quic_frame);
+ if (!frm) {
+ TRACE_ERROR("frame alloc failure", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ goto err;
+ }
+
+ LIST_INIT(&frm->reflist);
+ frm->type = QUIC_FT_STREAM_8;
+ frm->stream.stream = qcs->stream;
+ frm->stream.id = qcs->id;
+ frm->stream.buf = out;
+ frm->stream.data = (unsigned char *)b_peek(out, head);
+ frm->stream.dup = 0;
+
+ /* FIN is positioned only when the buffer has been totally emptied. */
+ if (fin)
+ frm->type |= QUIC_STREAM_FRAME_TYPE_FIN_BIT;
+
+ if (qcs->tx.sent_offset) {
+ frm->type |= QUIC_STREAM_FRAME_TYPE_OFF_BIT;
+ frm->stream.offset.key = qcs->tx.sent_offset;
+ }
+
+ frm->type |= QUIC_STREAM_FRAME_TYPE_LEN_BIT;
+ frm->stream.len = total;
+
+ LIST_APPEND(frm_list, &frm->list);
+
+ out:
+ {
+ struct qcs_build_stream_trace_arg arg = {
+ .len = frm->stream.len, .fin = fin,
+ .offset = frm->stream.offset.key,
+ };
+ TRACE_LEAVE(QMUX_EV_QCS_SEND|QMUX_EV_QCS_BUILD_STRM,
+ qcc->conn, qcs, &arg);
+ }
+
+ return total;
+
+ err:
+ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ return -1;
+}
+
+/* Check after transferring data from qcs.tx.buf if FIN must be set on the next
+ * STREAM frame for <qcs>.
+ *
+ * Returns true if FIN must be set else false.
+ */
+static int qcs_stream_fin(struct qcs *qcs)
+{
+ return qcs->flags & QC_SF_FIN_STREAM && !b_data(&qcs->tx.buf);
+}
+
+/* This function must be called by the upper layer to inform about the sending
+ * of a STREAM frame for <qcs> instance. The frame is of <data> length and on
+ * <offset>.
+ */
+void qcc_streams_sent_done(struct qcs *qcs, uint64_t data, uint64_t offset)
+{
+ struct qcc *qcc = qcs->qcc;
+ uint64_t diff;
+
+ TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+
+ BUG_ON(offset > qcs->tx.sent_offset);
+ BUG_ON(offset + data > qcs->tx.offset);
+
+ /* check if the STREAM frame has already been notified. It can happen
+ * for retransmission.
+ */
+ if (offset + data < qcs->tx.sent_offset) {
+ TRACE_DEVEL("offset already notified", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ goto out;
+ }
+
+ qcs_idle_open(qcs);
+
+ diff = offset + data - qcs->tx.sent_offset;
+ if (diff) {
+ /* increase offset sum on connection */
+ qcc->tx.sent_offsets += diff;
+ BUG_ON_HOT(qcc->tx.sent_offsets > qcc->rfctl.md);
+ if (qcc->tx.sent_offsets == qcc->rfctl.md) {
+ qcc->flags |= QC_CF_BLK_MFCTL;
+ TRACE_STATE("connection flow-control reached", QMUX_EV_QCS_SEND, qcc->conn);
+ }
+
+ /* increase offset on stream */
+ qcs->tx.sent_offset += diff;
+ BUG_ON_HOT(qcs->tx.sent_offset > qcs->tx.msd);
+ BUG_ON_HOT(qcs->tx.sent_offset > qcs->tx.offset);
+ if (qcs->tx.sent_offset == qcs->tx.msd) {
+ qcs->flags |= QC_SF_BLK_SFCTL;
+ TRACE_STATE("stream flow-control reached", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ }
+
+ if (qcs->tx.offset == qcs->tx.sent_offset &&
+ b_full(&qcs->stream->buf->buf)) {
+ qc_stream_buf_release(qcs->stream);
+ /* prepare qcs for immediate send retry if data to send */
+ if (b_data(&qcs->tx.buf))
+ LIST_APPEND(&qcc->send_retry_list, &qcs->el);
+ }
+ }
+
+ if (qcs->tx.offset == qcs->tx.sent_offset && !b_data(&qcs->tx.buf) &&
+ qcs->flags & (QC_SF_FIN_STREAM|QC_SF_DETACH)) {
+ /* Close stream locally. */
+ qcs_close_local(qcs);
+ /* Reset flag to not emit multiple FIN STREAM frames. */
+ qcs->flags &= ~QC_SF_FIN_STREAM;
+ }
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+}
+
+/* Wrapper for send on transport layer. Send a list of frames <frms> for the
+ * connection <qcc>.
+ *
+ * Returns 0 if all data sent with success else non-zero.
+ */
+static int qc_send_frames(struct qcc *qcc, struct list *frms)
+{
+ TRACE_ENTER(QMUX_EV_QCC_SEND, qcc->conn);
+
+ if (LIST_ISEMPTY(frms)) {
+ TRACE_DEVEL("no frames to send", QMUX_EV_QCC_SEND, qcc->conn);
+ goto err;
+ }
+
+ LIST_INIT(&qcc->send_retry_list);
+
+ if (!qc_send_mux(qcc->conn->handle.qc, frms))
+ goto err;
+
+ /* If there is frames left at this stage, transport layer is blocked.
+ * Subscribe on it to retry later.
+ */
+ if (!LIST_ISEMPTY(frms)) {
+ TRACE_DEVEL("remaining frames to send, subscribing", QMUX_EV_QCC_SEND, qcc->conn);
+ qcc->conn->xprt->subscribe(qcc->conn, qcc->conn->xprt_ctx,
+ SUB_RETRY_SEND, &qcc->wait_event);
+ goto err;
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCC_SEND, qcc->conn);
+ return 0;
+
+ err:
+ TRACE_LEAVE(QMUX_EV_QCC_SEND, qcc->conn);
+ return 1;
+}
+
+/* Emit a RESET_STREAM on <qcs>.
+ *
+ * Returns 0 if the frame has been successfully sent else non-zero.
+ */
+static int qcs_send_reset(struct qcs *qcs)
+{
+ struct list frms = LIST_HEAD_INIT(frms);
+ struct quic_frame *frm;
+
+ TRACE_ENTER(QMUX_EV_QCS_SEND, qcs->qcc->conn, qcs);
+
+ frm = pool_zalloc(pool_head_quic_frame);
+ if (!frm) {
+ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcs->qcc->conn, qcs);
+ return 1;
+ }
+
+ LIST_INIT(&frm->reflist);
+ frm->type = QUIC_FT_RESET_STREAM;
+ frm->reset_stream.id = qcs->id;
+ frm->reset_stream.app_error_code = qcs->err;
+ frm->reset_stream.final_size = qcs->tx.sent_offset;
+
+ LIST_APPEND(&frms, &frm->list);
+ if (qc_send_frames(qcs->qcc, &frms)) {
+ pool_free(pool_head_quic_frame, frm);
+ TRACE_DEVEL("cannot send RESET_STREAM", QMUX_EV_QCS_SEND, qcs->qcc->conn, qcs);
+ return 1;
+ }
+
+ if (qcs_sc(qcs)) {
+ se_fl_set_error(qcs->sd);
+ qcs_alert(qcs);
+ }
+
+ qcs_close_local(qcs);
+ qcs->flags &= ~QC_SF_TO_RESET;
+
+ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcs->qcc->conn, qcs);
+ return 0;
+}
+
+/* Used internally by qc_send function. Proceed to send for <qcs>. This will
+ * transfer data from qcs buffer to its quic_stream counterpart. A STREAM frame
+ * is then generated and inserted in <frms> list.
+ *
+ * Returns the total bytes transferred between qcs and quic_stream buffers. Can
+ * be null if out buffer cannot be allocated.
+ */
+static int _qc_send_qcs(struct qcs *qcs, struct list *frms)
+{
+ struct qcc *qcc = qcs->qcc;
+ struct buffer *buf = &qcs->tx.buf;
+ struct buffer *out = qc_stream_buf_get(qcs->stream);
+ int xfer = 0;
+ char fin = 0;
+
+ /* Allocate <out> buffer if necessary. */
+ if (!out) {
+ if (qcc->flags & QC_CF_CONN_FULL)
+ return 0;
+
+ out = qc_stream_buf_alloc(qcs->stream, qcs->tx.offset);
+ if (!out) {
+ qcc->flags |= QC_CF_CONN_FULL;
+ return 0;
+ }
+ }
+
+ /* Transfer data from <buf> to <out>. */
+ if (b_data(buf)) {
+ xfer = qcs_xfer_data(qcs, out, buf);
+ if (xfer > 0) {
+ qcs_notify_send(qcs);
+ qcs->flags &= ~QC_SF_BLK_MROOM;
+ }
+
+ qcs->tx.offset += xfer;
+ BUG_ON_HOT(qcs->tx.offset > qcs->tx.msd);
+ qcc->tx.offsets += xfer;
+ BUG_ON_HOT(qcc->tx.offsets > qcc->rfctl.md);
+ }
+
+ /* out buffer cannot be emptied if qcs offsets differ. */
+ BUG_ON(!b_data(out) && qcs->tx.sent_offset != qcs->tx.offset);
+
+ /* FIN is set if all incoming data were transferred. */
+ fin = qcs_stream_fin(qcs);
+
+ /* Build a new STREAM frame with <out> buffer. */
+ if (qcs->tx.sent_offset != qcs->tx.offset || fin) {
+ int ret;
+ ret = qcs_build_stream_frm(qcs, out, fin, frms);
+ if (ret < 0) { ABORT_NOW(); /* TODO handle this properly */ }
+ }
+
+ return xfer;
+}
+
+/* Proceed to sending. Loop through all available streams for the <qcc>
+ * instance and try to send as much as possible.
+ *
+ * Returns the total of bytes sent to the transport layer.
+ */
+static int qc_send(struct qcc *qcc)
+{
+ struct list frms = LIST_HEAD_INIT(frms);
+ struct eb64_node *node;
+ struct qcs *qcs, *qcs_tmp;
+ int total = 0, tmp_total = 0;
+
+ TRACE_ENTER(QMUX_EV_QCC_SEND, qcc->conn);
+
+ if (qcc->conn->flags & CO_FL_SOCK_WR_SH || qcc->flags & QC_CF_CC_EMIT) {
+ qcc->conn->flags |= CO_FL_ERROR;
+ TRACE_DEVEL("connection on error", QMUX_EV_QCC_SEND, qcc->conn);
+ goto err;
+ }
+
+ if (!LIST_ISEMPTY(&qcc->lfctl.frms)) {
+ if (qc_send_frames(qcc, &qcc->lfctl.frms)) {
+ TRACE_DEVEL("flow-control frames rejected by transport, aborting send", QMUX_EV_QCC_SEND, qcc->conn);
+ goto out;
+ }
+ }
+
+ if (qcc->flags & QC_CF_BLK_MFCTL)
+ return 0;
+
+ if (!(qcc->flags & QC_CF_APP_FINAL) && !eb_is_empty(&qcc->streams_by_id) &&
+ qcc->app_ops->finalize) {
+ /* Finalize the application layer before sending any stream.
+ * For h3 this consists in preparing the control stream data (SETTINGS h3).
+ */
+ qcc->app_ops->finalize(qcc->ctx);
+ qcc->flags |= QC_CF_APP_FINAL;
+ }
+
+ /* loop through all streams, construct STREAM frames if data available.
+ * TODO optimize the loop to favor streams which are not too heavy.
+ */
+ node = eb64_first(&qcc->streams_by_id);
+ while (node) {
+ int ret;
+ uint64_t id;
+
+ qcs = eb64_entry(node, struct qcs, by_id);
+ id = qcs->id;
+
+ if (quic_stream_is_uni(id) && quic_stream_is_remote(qcc, id)) {
+ node = eb64_next(node);
+ continue;
+ }
+
+ if (qcs->flags & QC_SF_TO_RESET) {
+ qcs_send_reset(qcs);
+ node = eb64_next(node);
+ continue;
+ }
+
+ if (qcs_is_close_local(qcs)) {
+ node = eb64_next(node);
+ continue;
+ }
+
+ if (qcs->flags & QC_SF_BLK_SFCTL) {
+ node = eb64_next(node);
+ continue;
+ }
+
+ /* Check if there is something to send. */
+ if (!b_data(&qcs->tx.buf) && !qcs_stream_fin(qcs) &&
+ !qc_stream_buf_get(qcs->stream)) {
+ node = eb64_next(node);
+ continue;
+ }
+
+ ret = _qc_send_qcs(qcs, &frms);
+ total += ret;
+ node = eb64_next(node);
+ }
+
+ if (qc_send_frames(qcc, &frms)) {
+ /* data rejected by transport layer, do not retry. */
+ goto out;
+ }
+
+ retry:
+ tmp_total = 0;
+ list_for_each_entry_safe(qcs, qcs_tmp, &qcc->send_retry_list, el) {
+ int ret;
+ BUG_ON(!b_data(&qcs->tx.buf));
+ BUG_ON(qc_stream_buf_get(qcs->stream));
+
+ ret = _qc_send_qcs(qcs, &frms);
+ tmp_total += ret;
+ LIST_DELETE(&qcs->el);
+ }
+
+ total += tmp_total;
+ if (!qc_send_frames(qcc, &frms) && !LIST_ISEMPTY(&qcc->send_retry_list))
+ goto retry;
+
+ out:
+ /* Deallocate frames that the transport layer has rejected. */
+ if (!LIST_ISEMPTY(&frms)) {
+ struct quic_frame *frm, *frm2;
+ list_for_each_entry_safe(frm, frm2, &frms, list) {
+ LIST_DELETE(&frm->list);
+ pool_free(pool_head_quic_frame, frm);
+ }
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCC_SEND, qcc->conn);
+ return total;
+
+ err:
+ TRACE_LEAVE(QMUX_EV_QCC_SEND, qcc->conn);
+ return 0;
+}
+
+/* Proceed on receiving. Loop through all streams from <qcc> and use decode_qcs
+ * operation.
+ *
+ * Returns 0 on success else non-zero.
+ */
+static int qc_recv(struct qcc *qcc)
+{
+ struct eb64_node *node;
+ struct qcs *qcs;
+
+ TRACE_ENTER(QMUX_EV_QCC_RECV, qcc->conn);
+
+ if (qcc->flags & QC_CF_CC_EMIT) {
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 0;
+ }
+
+ node = eb64_first(&qcc->streams_by_id);
+ while (node) {
+ uint64_t id;
+
+ qcs = eb64_entry(node, struct qcs, by_id);
+ id = qcs->id;
+
+ if (!ncb_data(&qcs->rx.ncbuf, 0) || (qcs->flags & QC_SF_DEM_FULL)) {
+ node = eb64_next(node);
+ continue;
+ }
+
+ if (quic_stream_is_uni(id) && quic_stream_is_local(qcc, id)) {
+ node = eb64_next(node);
+ continue;
+ }
+
+ qcc_decode_qcs(qcc, qcs);
+ node = eb64_next(node);
+
+ if (qcs->flags & QC_SF_READ_ABORTED) {
+ /* TODO should send a STOP_SENDING */
+ qcs_free(qcs);
+ }
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 0;
+}
+
+
+/* Release all streams which have their transfer operation achieved.
+ *
+ * Returns true if at least one stream is released.
+ */
+static int qc_purge_streams(struct qcc *qcc)
+{
+ struct eb64_node *node;
+ int release = 0;
+
+ TRACE_ENTER(QMUX_EV_QCC_WAKE, qcc->conn);
+
+ node = eb64_first(&qcc->streams_by_id);
+ while (node) {
+ struct qcs *qcs = eb64_entry(node, struct qcs, by_id);
+ node = eb64_next(node);
+
+ /* Release not attached closed streams. */
+ if (qcs->st == QC_SS_CLO && !qcs_sc(qcs)) {
+ TRACE_STATE("purging closed stream", QMUX_EV_QCC_WAKE, qcs->qcc->conn, qcs);
+ qcs_destroy(qcs);
+ release = 1;
+ continue;
+ }
+
+ /* Release detached streams with empty buffer. */
+ if (qcs->flags & QC_SF_DETACH) {
+ if (qcs_is_close_local(qcs)) {
+ TRACE_STATE("purging detached stream", QMUX_EV_QCC_WAKE, qcs->qcc->conn, qcs);
+ qcs_destroy(qcs);
+ release = 1;
+ continue;
+ }
+
+ qcc->conn->xprt->subscribe(qcc->conn, qcc->conn->xprt_ctx,
+ SUB_RETRY_SEND, &qcc->wait_event);
+ }
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCC_WAKE, qcc->conn);
+ return release;
+}
+
+/* release function. This one should be called to free all resources allocated
+ * to the mux.
+ */
+static void qc_release(struct qcc *qcc)
+{
+ struct connection *conn = qcc->conn;
+ struct eb64_node *node;
+
+ TRACE_ENTER(QMUX_EV_QCC_END, conn);
+
+ if (!(qcc->flags & QC_CF_CC_EMIT)) {
+ if (qcc->app_ops && qcc->app_ops->shutdown) {
+ /* Application protocol with dedicated connection closing
+ * procedure.
+ */
+ qcc->app_ops->shutdown(qcc->ctx);
+
+ /* useful if application protocol should emit some closing
+ * frames. For example HTTP/3 GOAWAY frame.
+ */
+ qc_send(qcc);
+ }
+ else {
+ qcc_emit_cc_app(qcc, QC_ERR_NO_ERROR, 0);
+ }
+ }
+
+ if (qcc->task) {
+ task_destroy(qcc->task);
+ qcc->task = NULL;
+ }
+
+ if (qcc->wait_event.tasklet)
+ tasklet_free(qcc->wait_event.tasklet);
+ if (conn && qcc->wait_event.events) {
+ conn->xprt->unsubscribe(conn, conn->xprt_ctx,
+ qcc->wait_event.events,
+ &qcc->wait_event);
+ }
+
+ /* liberate remaining qcs instances */
+ node = eb64_first(&qcc->streams_by_id);
+ while (node) {
+ struct qcs *qcs = eb64_entry(node, struct qcs, by_id);
+ node = eb64_next(node);
+ qcs_free(qcs);
+ }
+
+ while (!LIST_ISEMPTY(&qcc->lfctl.frms)) {
+ struct quic_frame *frm = LIST_ELEM(qcc->lfctl.frms.n, struct quic_frame *, list);
+ LIST_DELETE(&frm->list);
+ pool_free(pool_head_quic_frame, frm);
+ }
+
+ if (qcc->app_ops && qcc->app_ops->release)
+ qcc->app_ops->release(qcc->ctx);
+ TRACE_PROTO("application layer released", QMUX_EV_QCC_END, conn);
+
+ pool_free(pool_head_qcc, qcc);
+
+ if (conn) {
+ LIST_DEL_INIT(&conn->stopping_list);
+
+ conn->handle.qc->conn = NULL;
+ conn->mux = NULL;
+ conn->ctx = NULL;
+
+ TRACE_DEVEL("freeing conn", QMUX_EV_QCC_END, conn);
+
+ conn_stop_tracking(conn);
+ conn_full_close(conn);
+ if (conn->destroy_cb)
+ conn->destroy_cb(conn);
+ conn_free(conn);
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCC_END);
+}
+
+static struct task *qc_io_cb(struct task *t, void *ctx, unsigned int status)
+{
+ struct qcc *qcc = ctx;
+
+ TRACE_ENTER(QMUX_EV_QCC_WAKE, qcc->conn);
+
+ qc_send(qcc);
+
+ if (qc_purge_streams(qcc)) {
+ if (qcc_is_dead(qcc)) {
+ TRACE_STATE("releasing dead connection", QMUX_EV_QCC_WAKE, qcc->conn);
+ goto release;
+ }
+ }
+
+ qc_recv(qcc);
+
+ /* TODO check if qcc proxy is disabled. If yes, use graceful shutdown
+ * to close the connection.
+ */
+
+ qcc_refresh_timeout(qcc);
+
+ end:
+ TRACE_LEAVE(QMUX_EV_QCC_WAKE, qcc->conn);
+ return NULL;
+
+ release:
+ qc_release(qcc);
+ TRACE_LEAVE(QMUX_EV_QCC_WAKE);
+ return NULL;
+}
+
+static struct task *qc_timeout_task(struct task *t, void *ctx, unsigned int state)
+{
+ struct qcc *qcc = ctx;
+ int expired = tick_is_expired(t->expire, now_ms);
+
+ TRACE_ENTER(QMUX_EV_QCC_WAKE, qcc ? qcc->conn : NULL);
+
+ if (qcc) {
+ if (!expired) {
+ TRACE_DEVEL("not expired", QMUX_EV_QCC_WAKE, qcc->conn);
+ goto requeue;
+ }
+
+ if (!qcc_may_expire(qcc)) {
+ TRACE_DEVEL("cannot expired", QMUX_EV_QCC_WAKE, qcc->conn);
+ t->expire = TICK_ETERNITY;
+ goto requeue;
+ }
+ }
+
+ task_destroy(t);
+
+ if (!qcc) {
+ TRACE_DEVEL("no more qcc", QMUX_EV_QCC_WAKE);
+ goto out;
+ }
+
+ qcc->task = NULL;
+
+ /* TODO depending on the timeout condition, different shutdown mode
+ * should be used. For http keep-alive or disabled proxy, a graceful
+ * shutdown should occurs. For all other cases, an immediate close
+ * seems legitimate.
+ */
+ if (qcc_is_dead(qcc)) {
+ TRACE_STATE("releasing dead connection", QMUX_EV_QCC_WAKE, qcc->conn);
+ qc_release(qcc);
+ }
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCC_WAKE);
+ return NULL;
+
+ requeue:
+ TRACE_LEAVE(QMUX_EV_QCC_WAKE);
+ return t;
+}
+
+static int qc_init(struct connection *conn, struct proxy *prx,
+ struct session *sess, struct buffer *input)
+{
+ struct qcc *qcc;
+ struct quic_transport_params *lparams, *rparams;
+
+ TRACE_ENTER(QMUX_EV_QCC_NEW);
+
+ qcc = pool_alloc(pool_head_qcc);
+ if (!qcc) {
+ TRACE_ERROR("alloc failure", QMUX_EV_QCC_NEW);
+ goto fail_no_qcc;
+ }
+
+ qcc->conn = conn;
+ conn->ctx = qcc;
+ qcc->nb_hreq = qcc->nb_sc = 0;
+ qcc->flags = 0;
+
+ qcc->app_ops = NULL;
+ if (qcc_install_app_ops(qcc, conn->handle.qc->app_ops)) {
+ TRACE_PROTO("Cannot install app layer", QMUX_EV_QCC_NEW, qcc->conn);
+ /* prepare a CONNECTION_CLOSE frame */
+ quic_set_connection_close(conn->handle.qc, quic_err_transport(QC_ERR_APPLICATION_ERROR));
+ goto fail_no_tasklet;
+ }
+
+ qcc->streams_by_id = EB_ROOT_UNIQUE;
+
+ /* Server parameters, params used for RX flow control. */
+ lparams = &conn->handle.qc->rx.params;
+
+ qcc->rx.max_data = lparams->initial_max_data;
+ qcc->tx.sent_offsets = qcc->tx.offsets = 0;
+
+ /* Client initiated streams must respect the server flow control. */
+ qcc->strms[QCS_CLT_BIDI].max_streams = lparams->initial_max_streams_bidi;
+ qcc->strms[QCS_CLT_BIDI].nb_streams = 0;
+ qcc->strms[QCS_CLT_BIDI].rx.max_data = 0;
+ qcc->strms[QCS_CLT_BIDI].tx.max_data = lparams->initial_max_stream_data_bidi_remote;
+
+ qcc->strms[QCS_CLT_UNI].max_streams = lparams->initial_max_streams_uni;
+ qcc->strms[QCS_CLT_UNI].nb_streams = 0;
+ qcc->strms[QCS_CLT_UNI].rx.max_data = 0;
+ qcc->strms[QCS_CLT_UNI].tx.max_data = lparams->initial_max_stream_data_uni;
+
+ /* Server initiated streams must respect the server flow control. */
+ qcc->strms[QCS_SRV_BIDI].max_streams = 0;
+ qcc->strms[QCS_SRV_BIDI].nb_streams = 0;
+ qcc->strms[QCS_SRV_BIDI].rx.max_data = lparams->initial_max_stream_data_bidi_local;
+ qcc->strms[QCS_SRV_BIDI].tx.max_data = 0;
+
+ qcc->strms[QCS_SRV_UNI].max_streams = 0;
+ qcc->strms[QCS_SRV_UNI].nb_streams = 0;
+ qcc->strms[QCS_SRV_UNI].rx.max_data = lparams->initial_max_stream_data_uni;
+ qcc->strms[QCS_SRV_UNI].tx.max_data = 0;
+
+ LIST_INIT(&qcc->lfctl.frms);
+ qcc->lfctl.ms_bidi = qcc->lfctl.ms_bidi_init = lparams->initial_max_streams_bidi;
+ qcc->lfctl.ms_uni = lparams->initial_max_streams_uni;
+ qcc->lfctl.msd_bidi_l = lparams->initial_max_stream_data_bidi_local;
+ qcc->lfctl.msd_bidi_r = lparams->initial_max_stream_data_bidi_remote;
+ qcc->lfctl.msd_uni_r = lparams->initial_max_stream_data_uni;
+ qcc->lfctl.cl_bidi_r = 0;
+
+ qcc->lfctl.md = qcc->lfctl.md_init = lparams->initial_max_data;
+ qcc->lfctl.offsets_recv = qcc->lfctl.offsets_consume = 0;
+
+ rparams = &conn->handle.qc->tx.params;
+ qcc->rfctl.md = rparams->initial_max_data;
+ qcc->rfctl.msd_bidi_l = rparams->initial_max_stream_data_bidi_local;
+ qcc->rfctl.msd_bidi_r = rparams->initial_max_stream_data_bidi_remote;
+ qcc->rfctl.msd_uni_l = rparams->initial_max_stream_data_uni;
+
+ if (conn_is_back(conn)) {
+ qcc->next_bidi_l = 0x00;
+ qcc->largest_bidi_r = 0x01;
+ qcc->next_uni_l = 0x02;
+ qcc->largest_uni_r = 0x03;
+ }
+ else {
+ qcc->largest_bidi_r = 0x00;
+ qcc->next_bidi_l = 0x01;
+ qcc->largest_uni_r = 0x02;
+ qcc->next_uni_l = 0x03;
+ }
+
+ qcc->wait_event.tasklet = tasklet_new();
+ if (!qcc->wait_event.tasklet) {
+ TRACE_ERROR("taslket alloc failure", QMUX_EV_QCC_NEW);
+ goto fail_no_tasklet;
+ }
+
+ LIST_INIT(&qcc->send_retry_list);
+
+ qcc->wait_event.tasklet->process = qc_io_cb;
+ qcc->wait_event.tasklet->context = qcc;
+ qcc->wait_event.events = 0;
+
+ qcc->proxy = prx;
+ /* haproxy timeouts */
+ qcc->task = NULL;
+ qcc->timeout = conn_is_back(qcc->conn) ? prx->timeout.server :
+ prx->timeout.client;
+ if (tick_isset(qcc->timeout)) {
+ qcc->task = task_new_here();
+ if (!qcc->task) {
+ TRACE_ERROR("timeout task alloc failure", QMUX_EV_QCC_NEW);
+ goto fail_no_timeout_task;
+ }
+ qcc->task->process = qc_timeout_task;
+ qcc->task->context = qcc;
+ qcc->task->expire = tick_add(now_ms, qcc->timeout);
+ }
+ qcc_reset_idle_start(qcc);
+ LIST_INIT(&qcc->opening_list);
+
+ if (!conn_is_back(conn)) {
+ if (!LIST_INLIST(&conn->stopping_list)) {
+ LIST_APPEND(&mux_stopping_data[tid].list,
+ &conn->stopping_list);
+ }
+ }
+
+ HA_ATOMIC_STORE(&conn->handle.qc->qcc, qcc);
+ /* init read cycle */
+ tasklet_wakeup(qcc->wait_event.tasklet);
+
+ TRACE_LEAVE(QMUX_EV_QCC_NEW, qcc->conn);
+ return 0;
+
+ fail_no_timeout_task:
+ tasklet_free(qcc->wait_event.tasklet);
+ fail_no_tasklet:
+ if (qcc->app_ops && qcc->app_ops->release)
+ qcc->app_ops->release(qcc->ctx);
+ pool_free(pool_head_qcc, qcc);
+ fail_no_qcc:
+ TRACE_LEAVE(QMUX_EV_QCC_NEW);
+ return -1;
+}
+
+static void qc_destroy(void *ctx)
+{
+ struct qcc *qcc = ctx;
+
+ TRACE_ENTER(QMUX_EV_QCC_END, qcc->conn);
+ qc_release(qcc);
+ TRACE_LEAVE(QMUX_EV_QCC_END);
+}
+
+static void qc_detach(struct sedesc *sd)
+{
+ struct qcs *qcs = sd->se;
+ struct qcc *qcc = qcs->qcc;
+
+ TRACE_ENTER(QMUX_EV_STRM_END, qcc->conn, qcs);
+
+ /* TODO this BUG_ON_HOT() is not correct as the stconn layer may detach
+ * from the stream even if it is not closed remotely at the QUIC layer.
+ * This happens for example when a stream must be closed due to a
+ * rejected request. To better handle these cases, it will be required
+ * to implement shutr/shutw MUX operations. Once this is done, this
+ * BUG_ON_HOT() statement can be adjusted.
+ */
+ //BUG_ON_HOT(!qcs_is_close_remote(qcs));
+
+ qcc_rm_sc(qcc);
+
+ if (!qcs_is_close_local(qcs) && !(qcc->conn->flags & CO_FL_ERROR)) {
+ TRACE_STATE("remaining data, detaching qcs", QMUX_EV_STRM_END, qcc->conn, qcs);
+ qcs->flags |= QC_SF_DETACH;
+ qcc_refresh_timeout(qcc);
+
+ TRACE_LEAVE(QMUX_EV_STRM_END, qcc->conn, qcs);
+ return;
+ }
+
+ qcs_destroy(qcs);
+
+ if (qcc_is_dead(qcc)) {
+ TRACE_STATE("killing dead connection", QMUX_EV_STRM_END, qcc->conn);
+ goto release;
+ }
+ else if (qcc->task) {
+ TRACE_DEVEL("refreshing connection's timeout", QMUX_EV_STRM_END, qcc->conn);
+ qcc_refresh_timeout(qcc);
+ }
+ else {
+ TRACE_DEVEL("completed", QMUX_EV_STRM_END, qcc->conn);
+ }
+
+ TRACE_LEAVE(QMUX_EV_STRM_END, qcc->conn);
+ return;
+
+ release:
+ qc_release(qcc);
+ TRACE_LEAVE(QMUX_EV_STRM_END);
+ return;
+}
+
+/* Called from the upper layer, to receive data */
+static size_t qc_rcv_buf(struct stconn *sc, struct buffer *buf,
+ size_t count, int flags)
+{
+ struct qcs *qcs = __sc_mux_strm(sc);
+ size_t ret = 0;
+ char fin = 0;
+
+ TRACE_ENTER(QMUX_EV_STRM_RECV, qcs->qcc->conn, qcs);
+
+ ret = qcs_http_rcv_buf(qcs, buf, count, &fin);
+
+ if (b_data(&qcs->rx.app_buf)) {
+ se_fl_set(qcs->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ }
+ else {
+ se_fl_clr(qcs->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ if (se_fl_test(qcs->sd, SE_FL_ERR_PENDING))
+ se_fl_set(qcs->sd, SE_FL_ERROR);
+
+ /* Set end-of-input if FIN received and all data extracted. */
+ if (fin)
+ se_fl_set(qcs->sd, SE_FL_EOI);
+
+ if (b_size(&qcs->rx.app_buf)) {
+ b_free(&qcs->rx.app_buf);
+ offer_buffers(NULL, 1);
+ }
+ }
+
+ if (ret) {
+ qcs->flags &= ~QC_SF_DEM_FULL;
+ tasklet_wakeup(qcs->qcc->wait_event.tasklet);
+ }
+
+ TRACE_LEAVE(QMUX_EV_STRM_RECV, qcs->qcc->conn, qcs);
+
+ return ret;
+}
+
+static size_t qc_snd_buf(struct stconn *sc, struct buffer *buf,
+ size_t count, int flags)
+{
+ struct qcs *qcs = __sc_mux_strm(sc);
+ size_t ret;
+ char fin;
+
+ TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ /* stream layer has been detached so no transfer must occur after. */
+ BUG_ON_HOT(qcs->flags & QC_SF_DETACH);
+
+ if (qcs_is_close_local(qcs) || (qcs->flags & QC_SF_TO_RESET)) {
+ ret = qcs_http_reset_buf(qcs, buf, count);
+ goto end;
+ }
+
+ ret = qcs_http_snd_buf(qcs, buf, count, &fin);
+ if (fin)
+ qcs->flags |= QC_SF_FIN_STREAM;
+
+ if (ret || fin) {
+ if (!(qcs->qcc->wait_event.events & SUB_RETRY_SEND))
+ tasklet_wakeup(qcs->qcc->wait_event.tasklet);
+ }
+
+ end:
+ TRACE_LEAVE(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ return ret;
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+static int qc_subscribe(struct stconn *sc, int event_type,
+ struct wait_event *es)
+{
+ return qcs_subscribe(__sc_mux_strm(sc), event_type, es);
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int qc_unsubscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct qcs *qcs = __sc_mux_strm(sc);
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(qcs->subs && qcs->subs != es);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ qcs->subs = NULL;
+
+ return 0;
+}
+
+/* Loop through all qcs from <qcc>. If CO_FL_ERROR is set on the connection,
+ * report SE_FL_ERR_PENDING|SE_FL_ERROR on the attached stream connectors and
+ * wake them.
+ */
+static int qc_wake_some_streams(struct qcc *qcc)
+{
+ struct qcs *qcs;
+ struct eb64_node *node;
+
+ for (node = eb64_first(&qcc->streams_by_id); node;
+ node = eb64_next(node)) {
+ qcs = eb64_entry(node, struct qcs, by_id);
+
+ if (!qcs_sc(qcs))
+ continue;
+
+ if (qcc->conn->flags & CO_FL_ERROR) {
+ se_fl_set(qcs->sd, SE_FL_ERR_PENDING);
+ if (se_fl_test(qcs->sd, SE_FL_EOS))
+ se_fl_set(qcs->sd, SE_FL_ERROR);
+
+ qcs_alert(qcs);
+ }
+ }
+
+ return 0;
+}
+
+static int qc_wake(struct connection *conn)
+{
+ struct qcc *qcc = conn->ctx;
+ struct proxy *prx = conn->handle.qc->li->bind_conf->frontend;
+
+ TRACE_ENTER(QMUX_EV_QCC_WAKE, conn);
+
+ /* Check if a soft-stop is in progress.
+ *
+ * TODO this is relevant for frontend connections only.
+ *
+ * TODO Client should be notified with a H3 GOAWAY and then a
+ * CONNECTION_CLOSE. However, quic-conn uses the listener socket for
+ * sending which at this stage is already closed.
+ */
+ if (unlikely(prx->flags & (PR_FL_DISABLED|PR_FL_STOPPED)))
+ qcc->conn->flags |= (CO_FL_SOCK_RD_SH|CO_FL_SOCK_WR_SH);
+
+ if (conn->handle.qc->flags & QUIC_FL_CONN_NOTIFY_CLOSE)
+ qcc->conn->flags |= (CO_FL_SOCK_RD_SH|CO_FL_SOCK_WR_SH);
+
+ qc_send(qcc);
+
+ qc_wake_some_streams(qcc);
+
+ if (qcc_is_dead(qcc))
+ goto release;
+
+ qcc_refresh_timeout(qcc);
+
+ TRACE_LEAVE(QMUX_EV_QCC_WAKE, conn);
+ return 0;
+
+ release:
+ TRACE_STATE("releasing dead connection", QMUX_EV_QCC_WAKE, qcc->conn);
+ qc_release(qcc);
+ TRACE_LEAVE(QMUX_EV_QCC_WAKE);
+ return 1;
+}
+
+
+static const struct mux_ops qc_ops = {
+ .init = qc_init,
+ .destroy = qc_destroy,
+ .detach = qc_detach,
+ .rcv_buf = qc_rcv_buf,
+ .snd_buf = qc_snd_buf,
+ .subscribe = qc_subscribe,
+ .unsubscribe = qc_unsubscribe,
+ .wake = qc_wake,
+ .flags = MX_FL_HTX|MX_FL_NO_UPG|MX_FL_FRAMED,
+ .name = "QUIC",
+};
+
+static struct mux_proto_list mux_proto_quic =
+ { .token = IST("quic"), .mode = PROTO_MODE_HTTP, .side = PROTO_SIDE_FE, .mux = &qc_ops };
+
+INITCALL1(STG_REGISTER, register_mux_proto, &mux_proto_quic);
diff --git a/src/mworker-prog.c b/src/mworker-prog.c
new file mode 100644
index 0000000..75cfc42
--- /dev/null
+++ b/src/mworker-prog.c
@@ -0,0 +1,356 @@
+/*
+ * Master Worker - program
+ *
+ * Copyright HAProxy Technologies - William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/types.h>
+#include <errno.h>
+#include <grp.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/mworker.h>
+#include <haproxy/task.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+
+
+static int use_program = 0; /* do we use the program section ? */
+
+/*
+ * Launch every programs
+ */
+int mworker_ext_launch_all()
+{
+ int ret;
+ struct mworker_proc *child;
+ struct mworker_proc *tmp;
+ int reexec = 0;
+
+ if (!use_program)
+ return 0;
+
+ reexec = getenv("HAPROXY_MWORKER_REEXEC") ? 1 : 0;
+
+ /* find the right mworker_proc */
+ list_for_each_entry_safe(child, tmp, &proc_list, list) {
+ if (child->reloads == 0 && (child->options & PROC_O_TYPE_PROG)) {
+
+ if (reexec && (!(child->options & PROC_O_START_RELOAD))) {
+ struct mworker_proc *old_child;
+
+ /*
+ * This is a reload and we don't want to fork a
+ * new program so have to remove the entry in
+ * the list.
+ *
+ * But before that, we need to mark the
+ * previous program as not leaving, if we find one.
+ */
+
+ list_for_each_entry(old_child, &proc_list, list) {
+ if (!(old_child->options & PROC_O_TYPE_PROG) || (!(old_child->options & PROC_O_LEAVING)))
+ continue;
+
+ if (strcmp(old_child->id, child->id) == 0)
+ old_child->options &= ~PROC_O_LEAVING;
+ }
+
+
+ LIST_DELETE(&child->list);
+ mworker_free_child(child);
+ child = NULL;
+
+ continue;
+ }
+
+ child->timestamp = now.tv_sec;
+
+ ret = fork();
+ if (ret < 0) {
+ ha_alert("Cannot fork program '%s'.\n", child->id);
+ exit(EXIT_FAILURE); /* there has been an error */
+ } else if (ret > 0) { /* parent */
+ child->pid = ret;
+ ha_notice("New program '%s' (%d) forked\n", child->id, ret);
+ continue;
+ } else if (ret == 0) {
+ /* In child */
+ mworker_unblock_signals();
+ mworker_cleanlisteners();
+ mworker_cleantasks();
+
+ /* setgid / setuid */
+ if (child->gid != -1) {
+ if (getgroups(0, NULL) > 0 && setgroups(0, NULL) == -1)
+ ha_warning("[%s.main()] Failed to drop supplementary groups. Using 'gid'/'group'"
+ " without 'uid'/'user' is generally useless.\n", child->command[0]);
+
+ if (setgid(child->gid) == -1) {
+ ha_alert("[%s.main()] Cannot set gid %d.\n", child->command[0], child->gid);
+ exit(1);
+ }
+ }
+
+ if (child->uid != -1 && setuid(child->uid) == -1) {
+ ha_alert("[%s.main()] Cannot set uid %d.\n", child->command[0], child->gid);
+ exit(1);
+ }
+
+ /* This one must not be exported, it's internal! */
+ unsetenv("HAPROXY_MWORKER_REEXEC");
+ execvp(child->command[0], child->command);
+
+ ha_alert("Cannot execute %s: %s\n", child->command[0], strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+
+ return 0;
+
+}
+
+
+/* Configuration */
+
+int cfg_parse_program(const char *file, int linenum, char **args, int kwm)
+{
+ static struct mworker_proc *ext_child = NULL;
+ struct mworker_proc *child;
+ int err_code = 0;
+
+ if (strcmp(args[0], "program") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto error;
+ }
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an <id> argument\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto error;
+ }
+
+ ext_child = calloc(1, sizeof(*ext_child));
+ if (!ext_child) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto error;
+ }
+
+ ext_child->options |= PROC_O_TYPE_PROG; /* external process */
+ ext_child->command = NULL;
+ ext_child->path = NULL;
+ ext_child->id = NULL;
+ ext_child->pid = -1;
+ ext_child->reloads = 0;
+ ext_child->timestamp = -1;
+ ext_child->ipc_fd[0] = -1;
+ ext_child->ipc_fd[1] = -1;
+ ext_child->options |= PROC_O_START_RELOAD; /* restart the programs by default */
+ ext_child->uid = -1;
+ ext_child->gid = -1;
+ LIST_INIT(&ext_child->list);
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (child->reloads == 0 && (child->options & PROC_O_TYPE_PROG)) {
+ if (strcmp(args[1], child->id) == 0) {
+ ha_alert("parsing [%s:%d]: '%s' program section already exists in the configuration.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto error;
+ }
+ }
+ }
+
+ ext_child->id = strdup(args[1]);
+ if (!ext_child->id) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto error;
+ }
+
+ LIST_APPEND(&proc_list, &ext_child->list);
+
+ } else if (strcmp(args[0], "command") == 0) {
+ int arg_nb = 0;
+ int i = 0;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d]: '%s' expects a command with optional arguments separated in words.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ while (*args[arg_nb+1])
+ arg_nb++;
+
+ ext_child->command = calloc(arg_nb+1, sizeof(*ext_child->command));
+
+ if (!ext_child->command) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto error;
+ }
+
+ while (i < arg_nb) {
+ ext_child->command[i] = strdup(args[i+1]);
+ if (!ext_child->command[i]) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto error;
+ }
+ i++;
+ }
+ ext_child->command[i] = NULL;
+
+ } else if (strcmp(args[0], "option") == 0) {
+
+ if (*(args[1]) == '\0') {
+ ha_alert("parsing [%s:%d]: '%s' expects an option name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ if (strcmp(args[1], "start-on-reload") == 0) {
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto error;
+ if (kwm == KWM_STD)
+ ext_child->options |= PROC_O_START_RELOAD;
+ else if (kwm == KWM_NO)
+ ext_child->options &= ~PROC_O_START_RELOAD;
+ goto out;
+
+ } else {
+ ha_alert("parsing [%s:%d] : unknown option '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ } else if (strcmp(args[0], "user") == 0) {
+ struct passwd *ext_child_user;
+ if (*(args[1]) == '\0') {
+ ha_alert("parsing [%s:%d]: '%s' expects a user name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto error;
+
+ if (ext_child->uid != -1) {
+ ha_alert("parsing [%s:%d] : user/uid already specified. Continuing.\n", file, linenum);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ ext_child_user = getpwnam(args[1]);
+ if (ext_child_user != NULL) {
+ ext_child->uid = (int)ext_child_user->pw_uid;
+ } else {
+ ha_alert("parsing [%s:%d] : cannot find user id for '%s' (%d:%s)\n", file, linenum, args[1], errno, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ } else if (strcmp(args[0], "group") == 0) {
+ struct group *ext_child_group;
+ if (*(args[1]) == '\0') {
+ ha_alert("parsing [%s:%d]: '%s' expects a group name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto error;
+
+ if (ext_child->gid != -1) {
+ ha_alert("parsing [%s:%d] : group/gid already specified. Continuing.\n", file, linenum);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ ext_child_group = getgrnam(args[1]);
+ if (ext_child_group != NULL) {
+ ext_child->gid = (int)ext_child_group->gr_gid;
+ } else {
+ ha_alert("parsing [%s:%d] : cannot find group id for '%s' (%d:%s)\n", file, linenum, args[1], errno, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ } else {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], "program");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ use_program = 1;
+
+ return err_code;
+
+error:
+ if (ext_child) {
+ LIST_DELETE(&ext_child->list);
+ if (ext_child->command) {
+ int i;
+
+ for (i = 0; ext_child->command[i]; i++) {
+ ha_free(&ext_child->command[i]);
+ }
+ ha_free(&ext_child->command);
+ }
+ ha_free(&ext_child->id);
+ }
+
+ ha_free(&ext_child);
+
+out:
+ return err_code;
+
+}
+
+int cfg_program_postparser()
+{
+ int err_code = 0;
+ struct mworker_proc *child;
+
+ /* we only need to check this during configuration parsing,
+ * wait mode doesn't have the complete description of a program */
+ if (global.mode & MODE_MWORKER_WAIT)
+ return err_code;
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (child->reloads == 0 && (child->options & PROC_O_TYPE_PROG)) {
+ if (child->command == NULL) {
+ ha_alert("The program section '%s' lacks a command to launch.\n", child->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ }
+
+ if (use_program && !(global.mode & MODE_MWORKER)) {
+ ha_alert("Can't use a 'program' section without master worker mode.\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ return err_code;
+}
+
+
+REGISTER_CONFIG_SECTION("program", cfg_parse_program, NULL);
+REGISTER_CONFIG_POSTPARSER("program", cfg_program_postparser);
diff --git a/src/mworker.c b/src/mworker.c
new file mode 100644
index 0000000..26b16cc
--- /dev/null
+++ b/src/mworker.c
@@ -0,0 +1,743 @@
+/*
+ * Master Worker
+ *
+ * Copyright HAProxy Technologies 2019 - William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+
+#if defined(USE_SYSTEMD)
+#include <systemd/sd-daemon.h>
+#endif
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/mworker.h>
+#include <haproxy/peers.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/signal.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+
+static int exitcode = -1;
+static int max_reloads = -1; /* number max of reloads a worker can have until they are killed */
+struct mworker_proc *proc_self = NULL; /* process structure of current process */
+
+/* ----- children processes handling ----- */
+
+/*
+ * Send signal to every known children.
+ */
+
+static void mworker_kill(int sig)
+{
+ struct mworker_proc *child;
+
+ list_for_each_entry(child, &proc_list, list) {
+ /* careful there, we must be sure that the pid > 0, we don't want to emit a kill -1 */
+ if ((child->options & (PROC_O_TYPE_WORKER|PROC_O_TYPE_PROG)) && (child->pid > 0))
+ kill(child->pid, sig);
+ }
+}
+
+void mworker_kill_max_reloads(int sig)
+{
+ struct mworker_proc *child;
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (max_reloads != -1 && (child->options & PROC_O_TYPE_WORKER) &&
+ (child->pid > 0) && (child->reloads > max_reloads))
+ kill(child->pid, sig);
+ }
+}
+
+/* return 1 if a pid is a current child otherwise 0 */
+int mworker_current_child(int pid)
+{
+ struct mworker_proc *child;
+
+ list_for_each_entry(child, &proc_list, list) {
+ if ((child->options & (PROC_O_TYPE_WORKER|PROC_O_TYPE_PROG)) && (!(child->options & PROC_O_LEAVING)) && (child->pid == pid))
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Return the number of new and old children (including workers and external
+ * processes)
+ */
+int mworker_child_nb()
+{
+ struct mworker_proc *child;
+ int ret = 0;
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (child->options & (PROC_O_TYPE_WORKER|PROC_O_TYPE_PROG))
+ ret++;
+ }
+
+ return ret;
+}
+
+
+/*
+ * serialize the proc list and put it in the environment
+ */
+void mworker_proc_list_to_env()
+{
+ char *msg = NULL;
+ struct mworker_proc *child;
+ int minreloads = INT_MAX; /* minimum number of reloads to chose which processes are "current" ones */
+
+ list_for_each_entry(child, &proc_list, list) {
+ char type = '?';
+
+ if (child->options & PROC_O_TYPE_MASTER)
+ type = 'm';
+ else if (child->options & PROC_O_TYPE_PROG)
+ type = 'e';
+ else if (child->options &= PROC_O_TYPE_WORKER)
+ type = 'w';
+
+ if (child->reloads < minreloads)
+ minreloads = child->reloads;
+
+ if (child->pid > -1)
+ memprintf(&msg, "%s|type=%c;fd=%d;pid=%d;reloads=%d;failedreloads=%d;timestamp=%d;id=%s;version=%s", msg ? msg : "", type, child->ipc_fd[0], child->pid, child->reloads, child->failedreloads, child->timestamp, child->id ? child->id : "", child->version);
+ }
+ if (msg)
+ setenv("HAPROXY_PROCESSES", msg, 1);
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (child->reloads > minreloads && !(child->options & PROC_O_TYPE_MASTER)) {
+ child->options |= PROC_O_LEAVING;
+ }
+ }
+
+
+}
+
+struct mworker_proc *mworker_proc_new()
+{
+ struct mworker_proc *child;
+
+ child = calloc(1, sizeof(*child));
+ if (!child)
+ return NULL;
+
+ child->failedreloads = 0;
+ child->reloads = 0;
+ child->pid = -1;
+ child->ipc_fd[0] = -1;
+ child->ipc_fd[1] = -1;
+ child->timestamp = -1;
+
+ return child;
+}
+
+
+/*
+ * unserialize the proc list from the environment
+ * Return < 0 upon error.
+ */
+int mworker_env_to_proc_list()
+{
+ char *env, *msg, *omsg = NULL, *token = NULL, *s1;
+ struct mworker_proc *child;
+ int minreloads = INT_MAX; /* minimum number of reloads to chose which processes are "current" ones */
+ int err = 0;
+
+ env = getenv("HAPROXY_PROCESSES");
+ if (!env)
+ goto no_env;
+
+ omsg = msg = strdup(env);
+ if (!msg) {
+ ha_alert("Out of memory while trying to allocate a worker process structure.");
+ err = -1;
+ goto out;
+ }
+
+ while ((token = strtok_r(msg, "|", &s1))) {
+ char *subtoken = NULL;
+ char *s2;
+
+ msg = NULL;
+
+ child = mworker_proc_new();
+ if (!child) {
+ ha_alert("out of memory while trying to allocate a worker process structure.");
+ err = -1;
+ goto out;
+ }
+
+ while ((subtoken = strtok_r(token, ";", &s2))) {
+
+ token = NULL;
+
+ if (strncmp(subtoken, "type=", 5) == 0) {
+ char type;
+
+ type = *(subtoken+5);
+ if (type == 'm') { /* we are in the master, assign it */
+ proc_self = child;
+ child->options |= PROC_O_TYPE_MASTER;
+ } else if (type == 'e') {
+ child->options |= PROC_O_TYPE_PROG;
+ } else if (type == 'w') {
+ child->options |= PROC_O_TYPE_WORKER;
+ }
+
+ } else if (strncmp(subtoken, "fd=", 3) == 0) {
+ child->ipc_fd[0] = atoi(subtoken+3);
+ } else if (strncmp(subtoken, "pid=", 4) == 0) {
+ child->pid = atoi(subtoken+4);
+ } else if (strncmp(subtoken, "reloads=", 8) == 0) {
+ /* we only increment the number of asked reload */
+ child->reloads = atoi(subtoken+8);
+
+ if (child->reloads < minreloads)
+ minreloads = child->reloads;
+ } else if (strncmp(subtoken, "failedreloads=", 14) == 0) {
+ child->failedreloads = atoi(subtoken+14);
+ } else if (strncmp(subtoken, "timestamp=", 10) == 0) {
+ child->timestamp = atoi(subtoken+10);
+ } else if (strncmp(subtoken, "id=", 3) == 0) {
+ child->id = strdup(subtoken+3);
+ } else if (strncmp(subtoken, "version=", 8) == 0) {
+ child->version = strdup(subtoken+8);
+ }
+ }
+ if (child->pid) {
+ LIST_APPEND(&proc_list, &child->list);
+ } else {
+ mworker_free_child(child);
+ }
+ }
+
+ /* set the leaving processes once we know which number of reloads are the current processes */
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (child->reloads > minreloads)
+ child->options |= PROC_O_LEAVING;
+ }
+
+ unsetenv("HAPROXY_PROCESSES");
+
+no_env:
+
+ if (!proc_self) {
+
+ proc_self = mworker_proc_new();
+ if (!proc_self) {
+ ha_alert("Cannot allocate process structures.\n");
+ err = -1;
+ goto out;
+ }
+ proc_self->options |= PROC_O_TYPE_MASTER;
+ proc_self->pid = pid;
+ proc_self->timestamp = 0; /* we don't know the startime anymore */
+
+ LIST_APPEND(&proc_list, &proc_self->list);
+ ha_warning("The master internals are corrupted or it was started with a too old version (< 1.9). Please restart the master process.\n");
+ }
+
+out:
+ free(omsg);
+ return err;
+}
+
+/* Signal blocking and unblocking */
+
+void mworker_block_signals()
+{
+ sigset_t set;
+
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ sigaddset(&set, SIGUSR2);
+ sigaddset(&set, SIGTTIN);
+ sigaddset(&set, SIGTTOU);
+ sigaddset(&set, SIGHUP);
+ sigaddset(&set, SIGCHLD);
+ ha_sigmask(SIG_SETMASK, &set, NULL);
+}
+
+void mworker_unblock_signals()
+{
+ haproxy_unblock_signals();
+}
+
+/* ----- mworker signal handlers ----- */
+
+/* broadcast the configured signal to the workers */
+void mworker_broadcast_signal(struct sig_handler *sh)
+{
+ mworker_kill(sh->arg);
+}
+
+/*
+ * When called, this function reexec haproxy with -sf followed by current
+ * children PIDs and possibly old children PIDs if they didn't leave yet.
+ */
+void mworker_catch_sighup(struct sig_handler *sh)
+{
+ mworker_reload();
+}
+
+void mworker_catch_sigterm(struct sig_handler *sh)
+{
+ int sig = sh->arg;
+
+#if defined(USE_SYSTEMD)
+ if (global.tune.options & GTUNE_USE_SYSTEMD) {
+ sd_notify(0, "STOPPING=1");
+ }
+#endif
+ ha_warning("Exiting Master process...\n");
+ mworker_kill(sig);
+}
+
+/*
+ * Wait for every children to exit
+ */
+
+void mworker_catch_sigchld(struct sig_handler *sh)
+{
+ int exitpid = -1;
+ int status = 0;
+ int childfound;
+
+restart_wait:
+
+ childfound = 0;
+
+ exitpid = waitpid(-1, &status, WNOHANG);
+ if (exitpid > 0) {
+ struct mworker_proc *child, *it;
+
+ if (WIFEXITED(status))
+ status = WEXITSTATUS(status);
+ else if (WIFSIGNALED(status))
+ status = 128 + WTERMSIG(status);
+ else if (WIFSTOPPED(status))
+ status = 128 + WSTOPSIG(status);
+ else
+ status = 255;
+
+ /* delete the child from the process list */
+ list_for_each_entry_safe(child, it, &proc_list, list) {
+ if (child->pid != exitpid)
+ continue;
+
+ LIST_DELETE(&child->list);
+ close(child->ipc_fd[0]);
+ childfound = 1;
+ break;
+ }
+
+ if (!childfound) {
+ /* We didn't find the PID in the list, that shouldn't happen but we can emit a warning */
+ ha_warning("Process %d exited with code %d (%s)\n", exitpid, status, (status >= 128) ? strsignal(status - 128) : "Exit");
+ } else {
+ /* check if exited child is a current child */
+ if (!(child->options & PROC_O_LEAVING)) {
+ if (child->options & PROC_O_TYPE_WORKER) {
+ if (status < 128)
+ ha_warning("Current worker (%d) exited with code %d (%s)\n", exitpid, status, "Exit");
+ else
+ ha_alert("Current worker (%d) exited with code %d (%s)\n", exitpid, status, strsignal(status - 128));
+ }
+ else if (child->options & PROC_O_TYPE_PROG)
+ ha_alert("Current program '%s' (%d) exited with code %d (%s)\n", child->id, exitpid, status, (status >= 128) ? strsignal(status - 128) : "Exit");
+
+ if (status != 0 && status != 130 && status != 143
+ && !(global.tune.options & GTUNE_NOEXIT_ONFAILURE)) {
+ ha_alert("exit-on-failure: killing every processes with SIGTERM\n");
+ mworker_kill(SIGTERM);
+ }
+ /* 0 & SIGTERM (143) are normal, but we should report SIGINT (130) and other signals */
+ if (exitcode < 0 && status != 0 && status != 143)
+ exitcode = status;
+ } else {
+ if (child->options & PROC_O_TYPE_WORKER) {
+ ha_warning("Former worker (%d) exited with code %d (%s)\n", exitpid, status, (status >= 128) ? strsignal(status - 128) : "Exit");
+ delete_oldpid(exitpid);
+ } else if (child->options & PROC_O_TYPE_PROG) {
+ ha_warning("Former program '%s' (%d) exited with code %d (%s)\n", child->id, exitpid, status, (status >= 128) ? strsignal(status - 128) : "Exit");
+ }
+ }
+ mworker_free_child(child);
+ child = NULL;
+ }
+
+ /* do it again to check if it was the last worker */
+ goto restart_wait;
+ }
+ /* Better rely on the system than on a list of process to check if it was the last one */
+ else if (exitpid == -1 && errno == ECHILD) {
+ ha_warning("All workers exited. Exiting... (%d)\n", (exitcode > 0) ? exitcode : EXIT_SUCCESS);
+ atexit_flag = 0;
+ if (exitcode > 0)
+ exit(exitcode); /* parent must leave using the status code that provoked the exit */
+ exit(EXIT_SUCCESS);
+ }
+
+}
+
+/* ----- IPC FD (sockpair) related ----- */
+
+/* This wrapper is called from the workers. It is registered instead of the
+ * normal listener_accept() so the worker can exit() when it detects that the
+ * master closed the IPC FD. If it's not a close, we just call the regular
+ * listener_accept() function.
+ */
+void mworker_accept_wrapper(int fd)
+{
+ char c;
+ int ret;
+
+ while (1) {
+ ret = recv(fd, &c, 1, MSG_PEEK);
+ if (ret == -1) {
+ if (errno == EINTR)
+ continue;
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ fd_cant_recv(fd);
+ return;
+ }
+ break;
+ } else if (ret > 0) {
+ struct listener *l = fdtab[fd].owner;
+
+ if (l)
+ listener_accept(l);
+ return;
+ } else if (ret == 0) {
+ /* At this step the master is down before
+ * this worker perform a 'normal' exit.
+ * So we want to exit with an error but
+ * other threads could currently process
+ * some stuff so we can't perform a clean
+ * deinit().
+ */
+ exit(EXIT_FAILURE);
+ }
+ }
+ return;
+}
+
+/*
+ * This function registers the accept wrapper for the sockpair of the master
+ * worker. It's only handled by worker thread #0. Other threads and master do
+ * nothing here. It always returns 1 (success).
+ */
+static int mworker_pipe_register_per_thread()
+{
+ if (!(global.mode & MODE_MWORKER) || master)
+ return 1;
+
+ if (tid != 0)
+ return 1;
+
+ if (proc_self->ipc_fd[1] < 0) /* proc_self was incomplete and we can't find the socketpair */
+ return 1;
+
+ fd_set_nonblock(proc_self->ipc_fd[1]);
+ /* In multi-tread, we need only one thread to process
+ * events on the pipe with master
+ */
+ fdtab[proc_self->ipc_fd[1]].iocb = mworker_accept_wrapper;
+ fd_want_recv(proc_self->ipc_fd[1]);
+ return 1;
+}
+
+REGISTER_PER_THREAD_INIT(mworker_pipe_register_per_thread);
+
+/* ----- proxies ----- */
+/*
+ * Upon a reload, the master worker needs to close all listeners FDs but the mworker_pipe
+ * fd, and the FD provided by fd@
+ */
+void mworker_cleanlisteners()
+{
+ struct listener *l, *l_next;
+ struct proxy *curproxy;
+ struct peers *curpeers;
+
+ /* we might have to unbind some peers sections from some processes */
+ for (curpeers = cfg_peers; curpeers; curpeers = curpeers->next) {
+ if (!curpeers->peers_fe)
+ continue;
+
+ stop_proxy(curpeers->peers_fe);
+ /* disable this peer section so that it kills itself */
+ if (curpeers->sighandler)
+ signal_unregister_handler(curpeers->sighandler);
+ if (curpeers->sync_task)
+ task_destroy(curpeers->sync_task);
+ curpeers->sync_task = NULL;
+ task_destroy(curpeers->peers_fe->task);
+ curpeers->peers_fe->task = NULL;
+ curpeers->peers_fe = NULL;
+ }
+
+ for (curproxy = proxies_list; curproxy; curproxy = curproxy->next) {
+ int listen_in_master = 0;
+
+ list_for_each_entry_safe(l, l_next, &curproxy->conf.listeners, by_fe) {
+ /* remove the listener, but not those we need in the master... */
+ if (!(l->rx.flags & RX_F_MWORKER)) {
+ unbind_listener(l);
+ delete_listener(l);
+ } else {
+ listen_in_master = 1;
+ }
+ }
+ /* if the proxy shouldn't be in the master, we stop it */
+ if (!listen_in_master)
+ curproxy->flags |= PR_FL_DISABLED;
+ }
+}
+
+/* Upon a configuration loading error some mworker_proc and FDs/server were
+ * assigned but the worker was never forked, we must close the FDs and
+ * remove the server
+ */
+void mworker_cleanup_proc()
+{
+ struct mworker_proc *child, *it;
+
+ list_for_each_entry_safe(child, it, &proc_list, list) {
+
+ if (child->pid == -1) {
+ /* Close the socketpair master side. We don't need to
+ * close the worker side, because it's stored in the
+ * GLOBAL cli listener which was supposed to be in the
+ * worker and which will be closed in
+ * mworker_cleanlisteners()
+ */
+ if (child->ipc_fd[0] > -1)
+ close(child->ipc_fd[0]);
+ if (child->srv) {
+ /* only exists if we created a master CLI listener */
+ srv_drop(child->srv);
+ }
+ LIST_DELETE(&child->list);
+ mworker_free_child(child);
+ }
+ }
+}
+
+
+/* Displays workers and processes */
+static int cli_io_handler_show_proc(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct mworker_proc *child;
+ int old = 0;
+ int up = date.tv_sec - proc_self->timestamp;
+ char *uptime = NULL;
+ char *reloadtxt = NULL;
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ return 1;
+
+ if (up < 0) /* must never be negative because of clock drift */
+ up = 0;
+
+ chunk_reset(&trash);
+
+ memprintf(&reloadtxt, "%d [failed: %d]", proc_self->reloads, proc_self->failedreloads);
+ chunk_printf(&trash, "#%-14s %-15s %-15s %-15s %-15s\n", "<PID>", "<type>", "<reloads>", "<uptime>", "<version>");
+ memprintf(&uptime, "%dd%02dh%02dm%02ds", up / 86400, (up % 86400) / 3600, (up % 3600) / 60, (up % 60));
+ chunk_appendf(&trash, "%-15u %-15s %-15s %-15s %-15s\n", (unsigned int)getpid(), "master", reloadtxt, uptime, haproxy_version);
+ ha_free(&reloadtxt);
+ ha_free(&uptime);
+
+ /* displays current processes */
+
+ chunk_appendf(&trash, "# workers\n");
+ list_for_each_entry(child, &proc_list, list) {
+ up = date.tv_sec - child->timestamp;
+ if (up < 0) /* must never be negative because of clock drift */
+ up = 0;
+
+ if (!(child->options & PROC_O_TYPE_WORKER))
+ continue;
+
+ if (child->options & PROC_O_LEAVING) {
+ old++;
+ continue;
+ }
+ memprintf(&uptime, "%dd%02dh%02dm%02ds", up / 86400, (up % 86400) / 3600, (up % 3600) / 60, (up % 60));
+ chunk_appendf(&trash, "%-15u %-15s %-15d %-15s %-15s\n", child->pid, "worker", child->reloads, uptime, child->version);
+ ha_free(&uptime);
+ }
+
+ /* displays old processes */
+
+ if (old) {
+ char *msg = NULL;
+
+ chunk_appendf(&trash, "# old workers\n");
+ list_for_each_entry(child, &proc_list, list) {
+ up = date.tv_sec - child->timestamp;
+ if (up <= 0) /* must never be negative because of clock drift */
+ up = 0;
+
+ if (!(child->options & PROC_O_TYPE_WORKER))
+ continue;
+
+ if (child->options & PROC_O_LEAVING) {
+ memprintf(&uptime, "%dd%02dh%02dm%02ds", up / 86400, (up % 86400) / 3600, (up % 3600) / 60, (up % 60));
+ chunk_appendf(&trash, "%-15u %-15s %-15d %-15s %-15s\n", child->pid, "worker", child->reloads, uptime, child->version);
+ ha_free(&uptime);
+ }
+ }
+ free(msg);
+ }
+
+ /* displays external process */
+ chunk_appendf(&trash, "# programs\n");
+ old = 0;
+ list_for_each_entry(child, &proc_list, list) {
+ up = date.tv_sec - child->timestamp;
+ if (up < 0) /* must never be negative because of clock drift */
+ up = 0;
+
+ if (!(child->options & PROC_O_TYPE_PROG))
+ continue;
+
+ if (child->options & PROC_O_LEAVING) {
+ old++;
+ continue;
+ }
+ memprintf(&uptime, "%dd%02dh%02dm%02ds", up / 86400, (up % 86400) / 3600, (up % 3600) / 60, (up % 60));
+ chunk_appendf(&trash, "%-15u %-15s %-15d %-15s %-15s\n", child->pid, child->id, child->reloads, uptime, "-");
+ ha_free(&uptime);
+ }
+
+ if (old) {
+ chunk_appendf(&trash, "# old programs\n");
+ list_for_each_entry(child, &proc_list, list) {
+ up = date.tv_sec - child->timestamp;
+ if (up < 0) /* must never be negative because of clock drift */
+ up = 0;
+
+ if (!(child->options & PROC_O_TYPE_PROG))
+ continue;
+
+ if (child->options & PROC_O_LEAVING) {
+ memprintf(&uptime, "%dd%02dh%02dm%02ds", up / 86400, (up % 86400) / 3600, (up % 3600) / 60, (up % 60));
+ chunk_appendf(&trash, "%-15u %-15s %-15d %-15s %-15s\n", child->pid, child->id, child->reloads, uptime, "-");
+ ha_free(&uptime);
+ }
+ }
+ }
+
+
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ /* dump complete */
+ return 1;
+}
+
+/* reload the master process */
+static int cli_parse_reload(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ mworker_reload();
+
+ return 1;
+}
+
+
+static int mworker_parse_global_max_reloads(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int linenum, char **err)
+{
+
+ int err_code = 0;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "%sparsing [%s:%d] : '%s' expects an integer argument.\n", *err, file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ max_reloads = atol(args[1]);
+ if (max_reloads < 0) {
+ memprintf(err, "%sparsing [%s:%d] '%s' : invalid value %d, must be >= 0", *err, file, linenum, args[0], max_reloads);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+out:
+ return err_code;
+}
+
+void mworker_free_child(struct mworker_proc *child)
+{
+ int i;
+
+ if (child == NULL)
+ return;
+
+ for (i = 0; child->command && child->command[i]; i++)
+ ha_free(&child->command[i]);
+
+ ha_free(&child->command);
+ ha_free(&child->id);
+ ha_free(&child->version);
+ free(child);
+}
+
+static struct cfg_kw_list mworker_kws = {{ }, {
+ { CFG_GLOBAL, "mworker-max-reloads", mworker_parse_global_max_reloads },
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &mworker_kws);
+
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "@<relative pid>", NULL }, "@<relative pid> : send a command to the <relative pid> process", NULL, cli_io_handler_show_proc, NULL, NULL, ACCESS_MASTER_ONLY},
+ { { "@!<pid>", NULL }, "@!<pid> : send a command to the <pid> process", cli_parse_default, NULL, NULL, NULL, ACCESS_MASTER_ONLY},
+ { { "@master", NULL }, "@master : send a command to the master process", cli_parse_default, NULL, NULL, NULL, ACCESS_MASTER_ONLY},
+ { { "show", "proc", NULL }, "show proc : show processes status", cli_parse_default, cli_io_handler_show_proc, NULL, NULL, ACCESS_MASTER_ONLY},
+ { { "reload", NULL }, "reload : reload haproxy", cli_parse_reload, NULL, NULL, NULL, ACCESS_MASTER_ONLY},
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
diff --git a/src/namespace.c b/src/namespace.c
new file mode 100644
index 0000000..1fc8439
--- /dev/null
+++ b/src/namespace.c
@@ -0,0 +1,131 @@
+#define _GNU_SOURCE
+
+#include <sched.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/socket.h>
+
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/chunk.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/hash.h>
+#include <haproxy/namespace.h>
+#include <haproxy/signal.h>
+
+/* Opens the namespace <ns_name> and returns the FD or -1 in case of error
+ * (check errno).
+ */
+static int open_named_namespace(const char *ns_name)
+{
+ if (chunk_printf(&trash, "/var/run/netns/%s", ns_name) < 0)
+ return -1;
+ return open(trash.area, O_RDONLY | O_CLOEXEC);
+}
+
+static int default_namespace = -1;
+
+static int init_default_namespace()
+{
+ if (chunk_printf(&trash, "/proc/%d/ns/net", getpid()) < 0)
+ return -1;
+ default_namespace = open(trash.area, O_RDONLY | O_CLOEXEC);
+ return default_namespace;
+}
+
+static struct eb_root namespace_tree_root = EB_ROOT;
+
+static void netns_sig_stop(struct sig_handler *sh)
+{
+ struct ebpt_node *node, *next;
+ struct netns_entry *entry;
+
+ /* close namespace file descriptors and remove registered namespaces from the
+ * tree when stopping */
+ node = ebpt_first(&namespace_tree_root);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ entry = container_of(node, struct netns_entry, node);
+ free(entry->node.key);
+ close(entry->fd);
+ node = next;
+ }
+}
+
+int netns_init(void)
+{
+ int err_code = 0;
+
+ /* if no namespaces have been defined in the config then
+ * there is no point in trying to initialize anything:
+ * my_socketat() will never be called with a valid namespace
+ * structure and thus switching back to the default namespace
+ * is not needed either */
+ if (!eb_is_empty(&namespace_tree_root)) {
+ if (init_default_namespace() < 0) {
+ ha_alert("Failed to open the default namespace.\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ signal_register_fct(0, netns_sig_stop, 0);
+
+ return err_code;
+}
+
+struct netns_entry* netns_store_insert(const char *ns_name)
+{
+ struct netns_entry *entry = NULL;
+ int fd = open_named_namespace(ns_name);
+ if (fd == -1)
+ goto out;
+
+ entry = calloc(1, sizeof(*entry));
+ if (!entry)
+ goto out;
+ entry->fd = fd;
+ entry->node.key = strdup(ns_name);
+ entry->name_len = strlen(ns_name);
+ ebis_insert(&namespace_tree_root, &entry->node);
+out:
+ return entry;
+}
+
+const struct netns_entry* netns_store_lookup(const char *ns_name, size_t ns_name_len)
+{
+ struct ebpt_node *node;
+
+ node = ebis_lookup_len(&namespace_tree_root, ns_name, ns_name_len);
+ if (node)
+ return ebpt_entry(node, struct netns_entry, node);
+ else
+ return NULL;
+}
+
+/* Opens a socket in the namespace described by <ns> with the parameters <domain>,
+ * <type> and <protocol> and returns the FD or -1 in case of error (check errno).
+ */
+int my_socketat(const struct netns_entry *ns, int domain, int type, int protocol)
+{
+ int sock;
+
+ if (default_namespace >= 0 && ns && setns(ns->fd, CLONE_NEWNET) == -1)
+ return -1;
+
+ sock = socket(domain, type, protocol);
+
+ if (default_namespace >= 0 && ns && setns(default_namespace, CLONE_NEWNET) == -1) {
+ if (sock >= 0)
+ close(sock);
+ return -1;
+ }
+ return sock;
+}
+
+REGISTER_BUILD_OPTS("Built with network namespace support.");
diff --git a/src/ncbuf.c b/src/ncbuf.c
new file mode 100644
index 0000000..a96cf59
--- /dev/null
+++ b/src/ncbuf.c
@@ -0,0 +1,984 @@
+#include <haproxy/ncbuf.h>
+
+#include <string.h>
+
+#ifndef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#ifdef STANDALONE
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <haproxy/list.h>
+#endif /* STANDALONE */
+
+#ifdef DEBUG_STRICT
+# include <haproxy/bug.h>
+#else
+# include <stdio.h>
+# include <stdlib.h>
+
+# undef BUG_ON
+# define BUG_ON(x) if (x) { fprintf(stderr, "CRASH ON %s:%d\n", __func__, __LINE__); abort(); }
+
+# undef BUG_ON_HOT
+# define BUG_ON_HOT(x) if (x) { fprintf(stderr, "CRASH ON %s:%d\n", __func__, __LINE__); abort(); }
+#endif /* DEBUG_DEV */
+
+#include <haproxy/compiler.h>
+
+/* ******** internal API ******** */
+
+#define NCB_BLK_NULL ((struct ncb_blk){ .st = NULL })
+
+#define NCB_BK_F_GAP 0x01 /* block represents a gap */
+#define NCB_BK_F_FIN 0x02 /* special reduced gap present at the end of the buffer */
+struct ncb_blk {
+ char *st; /* first byte of the block */
+ char *end; /* first byte after this block */
+
+ char *sz_ptr; /* pointer to size element - NULL for reduced gap */
+ ncb_sz_t sz; /* size of the block */
+ ncb_sz_t sz_data; /* size of the data following the block - invalid for reduced GAP */
+ ncb_sz_t off; /* offset of block in buffer */
+
+ char flag;
+};
+
+/* Return pointer to <off> relative to <buf> head. Support buffer wrapping. */
+static char *ncb_peek(const struct ncbuf *buf, ncb_sz_t off)
+{
+ char *ptr = ncb_head(buf) + off;
+ if (ptr >= buf->area + buf->size)
+ ptr -= buf->size;
+ return ptr;
+}
+
+/* Returns the reserved space of <buf> which contains the size of the first
+ * data block.
+ */
+static char *ncb_reserved(const struct ncbuf *buf)
+{
+ return ncb_peek(buf, buf->size - NCB_RESERVED_SZ);
+}
+
+/* Encode <off> at <st> position in <buf>. Support wrapping. */
+static forceinline void ncb_write_off(const struct ncbuf *buf, char *st, ncb_sz_t off)
+{
+ int i;
+
+ BUG_ON_HOT(st >= buf->area + buf->size);
+
+ for (i = 0; i < sizeof(ncb_sz_t); ++i) {
+ (*st) = off >> (8 * i) & 0xff;
+
+ if ((++st) == ncb_wrap(buf))
+ st = ncb_orig(buf);
+ }
+}
+
+/* Decode offset stored at <st> position in <buf>. Support wrapping. */
+static forceinline ncb_sz_t ncb_read_off(const struct ncbuf *buf, char *st)
+{
+ int i;
+ ncb_sz_t off = 0;
+
+ BUG_ON_HOT(st >= buf->area + buf->size);
+
+ for (i = 0; i < sizeof(ncb_sz_t); ++i) {
+ off |= (unsigned char )(*st) << (8 * i);
+
+ if ((++st) == ncb_wrap(buf))
+ st = ncb_orig(buf);
+ }
+
+ return off;
+}
+
+/* Add <off> to the offset stored at <st> in <buf>. Support wrapping. */
+static forceinline void ncb_inc_off(const struct ncbuf *buf, char *st, ncb_sz_t off)
+{
+ const ncb_sz_t old = ncb_read_off(buf, st);
+ ncb_write_off(buf, st, old + off);
+}
+
+/* Returns true if a gap cannot be inserted at <off> : a reduced gap must be used. */
+static forceinline int ncb_off_reduced(const struct ncbuf *b, ncb_sz_t off)
+{
+ return off + NCB_GAP_MIN_SZ > ncb_size(b);
+}
+
+/* Returns true if <blk> is the special NULL block. */
+static forceinline int ncb_blk_is_null(const struct ncb_blk *blk)
+{
+ return !blk->st;
+}
+
+/* Returns true if <blk> is the last block of <buf>. */
+static forceinline int ncb_blk_is_last(const struct ncbuf *buf, const struct ncb_blk *blk)
+{
+ BUG_ON_HOT(blk->off + blk->sz > ncb_size(buf));
+ return blk->off + blk->sz == ncb_size(buf);
+}
+
+/* Returns the first block of <buf> which is always a DATA. */
+static struct ncb_blk ncb_blk_first(const struct ncbuf *buf)
+{
+ struct ncb_blk blk;
+
+ if (ncb_is_null(buf))
+ return NCB_BLK_NULL;
+
+ blk.st = ncb_head(buf);
+
+ blk.sz_ptr = ncb_reserved(buf);
+ blk.sz = ncb_read_off(buf, ncb_reserved(buf));
+ blk.sz_data = 0;
+ BUG_ON_HOT(blk.sz > ncb_size(buf));
+
+ blk.end = ncb_peek(buf, blk.sz);
+ blk.off = 0;
+ blk.flag = 0;
+
+ return blk;
+}
+
+/* Returns the block following <prev> in the buffer <buf>. */
+static struct ncb_blk ncb_blk_next(const struct ncbuf *buf,
+ const struct ncb_blk *prev)
+{
+ struct ncb_blk blk;
+
+ BUG_ON_HOT(ncb_blk_is_null(prev));
+
+ if (ncb_blk_is_last(buf, prev))
+ return NCB_BLK_NULL;
+
+ blk.st = prev->end;
+ blk.off = prev->off + prev->sz;
+ blk.flag = ~prev->flag & NCB_BK_F_GAP;
+
+ if (blk.flag & NCB_BK_F_GAP) {
+ if (ncb_off_reduced(buf, blk.off)) {
+ blk.flag |= NCB_BK_F_FIN;
+ blk.sz_ptr = NULL;
+ blk.sz = ncb_size(buf) - blk.off;
+ blk.sz_data = 0;
+
+ /* A reduced gap can only be the last block. */
+ BUG_ON_HOT(!ncb_blk_is_last(buf, &blk));
+ }
+ else {
+ blk.sz_ptr = ncb_peek(buf, blk.off + NCB_GAP_SZ_OFF);
+ blk.sz = ncb_read_off(buf, blk.sz_ptr);
+ blk.sz_data = ncb_read_off(buf, ncb_peek(buf, blk.off + NCB_GAP_SZ_DATA_OFF));
+ BUG_ON_HOT(blk.sz < NCB_GAP_MIN_SZ);
+ }
+ }
+ else {
+ blk.sz_ptr = ncb_peek(buf, prev->off + NCB_GAP_SZ_DATA_OFF);
+ blk.sz = prev->sz_data;
+ blk.sz_data = 0;
+
+ /* only first DATA block can be empty. If this happens, a GAP
+ * merge should have been realized.
+ */
+ BUG_ON_HOT(!blk.sz);
+ }
+
+ BUG_ON_HOT(blk.off + blk.sz > ncb_size(buf));
+ blk.end = ncb_peek(buf, blk.off + blk.sz);
+
+ return blk;
+}
+
+/* Returns the block containing offset <off>. Note that if <off> is at the
+ * frontier between two blocks, this function will return the preceding one.
+ * This is done to easily merge blocks on insertion/deletion.
+ */
+static struct ncb_blk ncb_blk_find(const struct ncbuf *buf, ncb_sz_t off)
+{
+ struct ncb_blk blk;
+
+ if (ncb_is_null(buf))
+ return NCB_BLK_NULL;
+
+ BUG_ON_HOT(off >= ncb_size(buf));
+
+ for (blk = ncb_blk_first(buf); off > blk.off + blk.sz;
+ blk = ncb_blk_next(buf, &blk)) {
+ }
+
+ return blk;
+}
+
+/* Transform absolute offset <off> to a relative one from <blk> start. */
+static forceinline ncb_sz_t ncb_blk_off(const struct ncb_blk *blk, ncb_sz_t off)
+{
+ BUG_ON_HOT(off < blk->off || off > blk->off + blk->sz);
+ BUG_ON_HOT(off - blk->off > blk->sz);
+ return off - blk->off;
+}
+
+/* Simulate insertion in <buf> of <data> of length <len> at offset <off>. This
+ * ensures that minimal block size are respected for newly formed gaps. <blk>
+ * must be the block where the insert operation begins. If <mode> is
+ * NCB_ADD_COMPARE, old and new overlapped data are compared to validate the
+ * insertion.
+ *
+ * Returns NCB_RET_OK if insertion can proceed.
+ */
+static enum ncb_ret ncb_check_insert(const struct ncbuf *buf,
+ const struct ncb_blk *blk, ncb_sz_t off,
+ const char *data, ncb_sz_t len,
+ enum ncb_add_mode mode)
+{
+ struct ncb_blk next;
+ ncb_sz_t off_blk = ncb_blk_off(blk, off);
+ ncb_sz_t to_copy;
+ ncb_sz_t left = len;
+
+ /* If insertion starts in a gap, it must leave enough space to keep the
+ * gap header.
+ */
+ if (left && (blk->flag & NCB_BK_F_GAP)) {
+ if (off_blk < NCB_GAP_MIN_SZ)
+ return NCB_RET_GAP_SIZE;
+ }
+
+ next = *blk;
+ while (left) {
+ off_blk = ncb_blk_off(&next, off);
+ to_copy = MIN(left, next.sz - off_blk);
+
+ if (next.flag & NCB_BK_F_GAP && off_blk + to_copy < next.sz) {
+ /* Insertion must leave enough space for a new gap
+ * header if stopped in a middle of a gap.
+ */
+ const ncb_sz_t gap_sz = next.sz - (off_blk + to_copy);
+ if (gap_sz < NCB_GAP_MIN_SZ && !ncb_blk_is_last(buf, &next))
+ return NCB_RET_GAP_SIZE;
+ }
+ else if (!(next.flag & NCB_BK_F_GAP) && mode == NCB_ADD_COMPARE) {
+ /* Compare memory of data block in NCB_ADD_COMPARE mode. */
+ const ncb_sz_t off_blk = ncb_blk_off(&next, off);
+ char *st = ncb_peek(buf, off);
+
+ to_copy = MIN(left, next.sz - off_blk);
+ if (st + to_copy > ncb_wrap(buf)) {
+ const ncb_sz_t sz1 = ncb_wrap(buf) - st;
+ if (memcmp(st, data, sz1))
+ return NCB_RET_DATA_REJ;
+ if (memcmp(ncb_orig(buf), data + sz1, to_copy - sz1))
+ return NCB_RET_DATA_REJ;
+ }
+ else {
+ if (memcmp(st, data, to_copy))
+ return NCB_RET_DATA_REJ;
+ }
+ }
+
+ left -= to_copy;
+ data += to_copy;
+ off += to_copy;
+
+ next = ncb_blk_next(buf, &next);
+ }
+
+ return NCB_RET_OK;
+}
+
+/* Fill new <data> of length <len> inside an already existing data <blk> at
+ * offset <off>. Offset is relative to <blk> so it cannot be greater than the
+ * block size. <mode> specifies if old data are preserved or overwritten.
+ */
+static ncb_sz_t ncb_fill_data_blk(const struct ncbuf *buf,
+ const struct ncb_blk *blk, ncb_sz_t off,
+ const char *data, ncb_sz_t len,
+ enum ncb_add_mode mode)
+{
+ const ncb_sz_t to_copy = MIN(len, blk->sz - off);
+ char *ptr = NULL;
+
+ BUG_ON_HOT(off > blk->sz);
+ /* This can happens due to previous ncb_blk_find() usage. In this
+ * case the current fill is a noop.
+ */
+ if (off == blk->sz)
+ return 0;
+
+ if (mode == NCB_ADD_OVERWRT) {
+ ptr = ncb_peek(buf, blk->off + off);
+
+ if (ptr + to_copy >= ncb_wrap(buf)) {
+ const ncb_sz_t sz1 = ncb_wrap(buf) - ptr;
+ memcpy(ptr, data, sz1);
+ memcpy(ncb_orig(buf), data + sz1, to_copy - sz1);
+ }
+ else {
+ memcpy(ptr, data, to_copy);
+ }
+ }
+
+ return to_copy;
+}
+
+/* Fill the gap <blk> starting at <off> with new <data> of length <len>. <off>
+ * is relative to <blk> so it cannot be greater than the block size.
+ */
+static ncb_sz_t ncb_fill_gap_blk(const struct ncbuf *buf,
+ const struct ncb_blk *blk, ncb_sz_t off,
+ const char *data, ncb_sz_t len)
+{
+ const ncb_sz_t to_copy = MIN(len, blk->sz - off);
+ char *ptr;
+
+ BUG_ON_HOT(off > blk->sz);
+ /* This can happens due to previous ncb_blk_find() usage. In this
+ * case the current fill is a noop.
+ */
+ if (off == blk->sz)
+ return 0;
+
+ /* A new gap must be created if insertion stopped before gap end. */
+ if (off + to_copy < blk->sz) {
+ const ncb_sz_t gap_off = blk->off + off + to_copy;
+ const ncb_sz_t gap_sz = blk->sz - off - to_copy;
+
+ BUG_ON_HOT(!ncb_off_reduced(buf, gap_off) &&
+ blk->off + blk->sz - gap_off < NCB_GAP_MIN_SZ);
+
+ /* write the new gap header unless this is a reduced gap. */
+ if (!ncb_off_reduced(buf, gap_off)) {
+ char *gap_ptr = ncb_peek(buf, gap_off + NCB_GAP_SZ_OFF);
+ char *gap_data_ptr = ncb_peek(buf, gap_off + NCB_GAP_SZ_DATA_OFF);
+
+ ncb_write_off(buf, gap_ptr, gap_sz);
+ ncb_write_off(buf, gap_data_ptr, blk->sz_data);
+ }
+ }
+
+ /* fill the gap with new data */
+ ptr = ncb_peek(buf, blk->off + off);
+ if (ptr + to_copy >= ncb_wrap(buf)) {
+ ncb_sz_t sz1 = ncb_wrap(buf) - ptr;
+ memcpy(ptr, data, sz1);
+ memcpy(ncb_orig(buf), data + sz1, to_copy - sz1);
+ }
+ else {
+ memcpy(ptr, data, to_copy);
+ }
+
+ return to_copy;
+}
+
+/* ******** public API ******** */
+
+/* Initialize or reset <buf> by clearing all data. Its size is untouched.
+ * Buffer is positioned to <head> offset. Use 0 to realign it. <buf> must not
+ * be NCBUF_NULL.
+ */
+void ncb_init(struct ncbuf *buf, ncb_sz_t head)
+{
+ BUG_ON_HOT(ncb_is_null(buf));
+
+ BUG_ON_HOT(head >= buf->size);
+ buf->head = head;
+
+ ncb_write_off(buf, ncb_reserved(buf), 0);
+ ncb_write_off(buf, ncb_head(buf), ncb_size(buf));
+ ncb_write_off(buf, ncb_peek(buf, sizeof(ncb_sz_t)), 0);
+}
+
+/* Construct a ncbuf with all its parameters. */
+struct ncbuf ncb_make(char *area, ncb_sz_t size, ncb_sz_t head)
+{
+ struct ncbuf buf;
+
+ /* Ensure that there is enough space for the reserved space and data.
+ * This is the minimal value to not crash later.
+ */
+ BUG_ON_HOT(size <= NCB_RESERVED_SZ);
+
+ buf.area = area;
+ buf.size = size;
+ buf.head = head;
+
+ return buf;
+}
+
+/* Returns the total number of bytes stored in whole <buf>. */
+ncb_sz_t ncb_total_data(const struct ncbuf *buf)
+{
+ struct ncb_blk blk;
+ int total = 0;
+
+ for (blk = ncb_blk_first(buf); !ncb_blk_is_null(&blk); blk = ncb_blk_next(buf, &blk)) {
+ if (!(blk.flag & NCB_BK_F_GAP))
+ total += blk.sz;
+ }
+
+ return total;
+}
+
+/* Returns true if there is no data anywhere in <buf>. */
+int ncb_is_empty(const struct ncbuf *buf)
+{
+ int first_data, first_gap;
+
+ if (ncb_is_null(buf))
+ return 1;
+
+ first_data = ncb_read_off(buf, ncb_reserved(buf));
+ BUG_ON_HOT(first_data > ncb_size(buf));
+ /* Buffer is not empty if first data block is not nul. */
+ if (first_data)
+ return 0;
+
+ /* Head contains the first gap size if first data block is empty. */
+ first_gap = ncb_read_off(buf, ncb_head(buf));
+ BUG_ON_HOT(first_gap > ncb_size(buf));
+ return first_gap == ncb_size(buf);
+}
+
+/* Returns true if no more data can be inserted in <buf>. */
+int ncb_is_full(const struct ncbuf *buf)
+{
+ int first_data;
+
+ if (ncb_is_null(buf))
+ return 0;
+
+ /* First data block must cover whole buffer if full. */
+ first_data = ncb_read_off(buf, ncb_reserved(buf));
+ BUG_ON_HOT(first_data > ncb_size(buf));
+ return first_data == ncb_size(buf);
+}
+
+/* Returns true if <buf> contains data fragmented by gaps. */
+int ncb_is_fragmented(const struct ncbuf *buf)
+{
+ struct ncb_blk data, gap;
+
+ if (ncb_is_null(buf))
+ return 0;
+
+ /* check if buffer is empty or full */
+ if (ncb_is_empty(buf) || ncb_is_full(buf))
+ return 0;
+
+ /* check that following gap is the last block */
+ data = ncb_blk_first(buf);
+ gap = ncb_blk_next(buf, &data);
+ return !ncb_blk_is_last(buf, &gap);
+}
+
+/* Returns the number of bytes of data available in <buf> starting at offset
+ * <off> until the next gap or the buffer end. The counted data may wrapped if
+ * the buffer storage is not aligned.
+ */
+ncb_sz_t ncb_data(const struct ncbuf *buf, ncb_sz_t off)
+{
+ struct ncb_blk blk;
+ ncb_sz_t off_blk;
+
+ if (ncb_is_null(buf))
+ return 0;
+
+ blk = ncb_blk_find(buf, off);
+ off_blk = ncb_blk_off(&blk, off);
+
+ /* if <off> at the frontier between two and <blk> is gap, retrieve the
+ * next data block.
+ */
+ if (blk.flag & NCB_BK_F_GAP && off_blk == blk.sz &&
+ !ncb_blk_is_last(buf, &blk)) {
+ blk = ncb_blk_next(buf, &blk);
+ off_blk = ncb_blk_off(&blk, off);
+ }
+
+ if (blk.flag & NCB_BK_F_GAP)
+ return 0;
+
+ return blk.sz - off_blk;
+}
+
+/* Add a new block at <data> of size <len> in <buf> at offset <off>.
+ *
+ * Returns NCB_RET_OK on success. On error the following codes are returned :
+ * - NCB_RET_GAP_SIZE : cannot add data because the gap formed is too small
+ * - NCB_RET_DATA_REJ : old data would be overwritten by different ones in
+ * NCB_ADD_COMPARE mode.
+ */
+enum ncb_ret ncb_add(struct ncbuf *buf, ncb_sz_t off,
+ const char *data, ncb_sz_t len, enum ncb_add_mode mode)
+{
+ struct ncb_blk blk;
+ ncb_sz_t left = len;
+ enum ncb_ret ret;
+ char *new_sz;
+
+ if (!len)
+ return NCB_RET_OK;
+
+ BUG_ON_HOT(off + len > ncb_size(buf));
+
+ /* Get block where insertion begins. */
+ blk = ncb_blk_find(buf, off);
+
+ /* Check if insertion is possible. */
+ ret = ncb_check_insert(buf, &blk, off, data, len, mode);
+ if (ret != NCB_RET_OK)
+ return ret;
+
+ if (blk.flag & NCB_BK_F_GAP) {
+ /* Reduce gap size if insertion begins in a gap. Gap data size
+ * is reset and will be recalculated during insertion.
+ */
+ const ncb_sz_t gap_sz = off - blk.off;
+ BUG_ON_HOT(gap_sz < NCB_GAP_MIN_SZ);
+
+ /* pointer to data size to increase. */
+ new_sz = ncb_peek(buf, blk.off + NCB_GAP_SZ_DATA_OFF);
+
+ ncb_write_off(buf, blk.sz_ptr, gap_sz);
+ ncb_write_off(buf, new_sz, 0);
+ }
+ else {
+ /* pointer to data size to increase. */
+ new_sz = blk.sz_ptr;
+ }
+
+ /* insert data */
+ while (left) {
+ struct ncb_blk next;
+ const ncb_sz_t off_blk = ncb_blk_off(&blk, off);
+ ncb_sz_t done;
+
+ /* retrieve the next block. This is necessary to do this
+ * before overwriting a gap.
+ */
+ next = ncb_blk_next(buf, &blk);
+
+ if (blk.flag & NCB_BK_F_GAP) {
+ done = ncb_fill_gap_blk(buf, &blk, off_blk, data, left);
+
+ /* update the inserted data block size */
+ if (off + done == blk.off + blk.sz) {
+ /* merge next data block if insertion reached gap end */
+ ncb_inc_off(buf, new_sz, done + blk.sz_data);
+ }
+ else {
+ /* insertion stopped before gap end */
+ ncb_inc_off(buf, new_sz, done);
+ }
+ }
+ else {
+ done = ncb_fill_data_blk(buf, &blk, off_blk, data, left, mode);
+ }
+
+ BUG_ON_HOT(done > blk.sz || done > left);
+ left -= done;
+ data += done;
+ off += done;
+
+ blk = next;
+ }
+
+ return NCB_RET_OK;
+}
+
+/* Advance the head of <buf> to the offset <adv>. Data at the start of buffer
+ * will be lost while some space will be formed at the end to be able to insert
+ * new data.
+ *
+ * Returns NCB_RET_OK on success. It may return NCB_RET_GAP_SIZE if operation
+ * is rejected due to the formation of a too small gap in front. If advance is
+ * done only inside a data block it is guaranteed to succeed.
+ */
+enum ncb_ret ncb_advance(struct ncbuf *buf, ncb_sz_t adv)
+{
+ struct ncb_blk start, last;
+ ncb_sz_t off_blk;
+ ncb_sz_t first_data_sz;
+
+ BUG_ON_HOT(adv > ncb_size(buf));
+ if (!adv)
+ return NCB_RET_OK;
+
+ /* Special case if adv is full size. This is equivalent to a reset. */
+ if (adv == ncb_size(buf)) {
+ ncb_init(buf, buf->head);
+ return NCB_RET_OK;
+ }
+
+ start = ncb_blk_find(buf, adv);
+
+ /* Special case if advance until the last block which is a GAP. The
+ * buffer will be left empty and is thus equivalent to a reset.
+ */
+ if (ncb_blk_is_last(buf, &start) && (start.flag & NCB_BK_F_GAP)) {
+ ncb_sz_t new_head = buf->head + adv;
+ if (new_head >= buf->size)
+ new_head -= buf->size;
+
+ ncb_init(buf, new_head);
+ return NCB_RET_OK;
+ }
+
+ last = start;
+ while (!ncb_blk_is_last(buf, &last))
+ last = ncb_blk_next(buf, &last);
+
+ off_blk = ncb_blk_off(&start, adv);
+
+ if (start.flag & NCB_BK_F_GAP) {
+ /* If advance in a GAP, its new size must be big enough. */
+ if (start.sz == off_blk) {
+ /* GAP removed. Buffer will start with following DATA block. */
+ first_data_sz = start.sz_data;
+ }
+ else if (start.sz - off_blk < NCB_GAP_MIN_SZ) {
+ return NCB_RET_GAP_SIZE;
+ }
+ else {
+ /* Buffer will start with this GAP block. */
+ first_data_sz = 0;
+ }
+ }
+ else {
+ /* If off_blk less than start.sz, the data block will becomes the
+ * first block. If equal, the data block is completely removed
+ * and thus the following GAP will be the first block.
+ */
+ first_data_sz = start.sz - off_blk;
+ }
+
+ if (last.flag & NCB_BK_F_GAP) {
+ /* Extend last GAP unless this is a reduced gap. */
+ if (!(last.flag & NCB_BK_F_FIN) || last.sz + adv >= NCB_GAP_MIN_SZ) {
+ /* use .st instead of .sz_ptr which can be NULL if reduced gap */
+ ncb_write_off(buf, last.st, last.sz + adv);
+ ncb_write_off(buf, ncb_peek(buf, last.off + NCB_GAP_SZ_DATA_OFF), 0);
+ }
+ }
+ else {
+ /* Insert a GAP after the last DATA block. */
+ if (adv >= NCB_GAP_MIN_SZ) {
+ ncb_write_off(buf, ncb_peek(buf, last.off + last.sz + NCB_GAP_SZ_OFF), adv);
+ ncb_write_off(buf, ncb_peek(buf, last.off + last.sz + NCB_GAP_SZ_DATA_OFF), 0);
+ }
+ }
+
+ /* Advance head and update reserved header with new first data size. */
+ buf->head += adv;
+ if (buf->head >= buf->size)
+ buf->head -= buf->size;
+ ncb_write_off(buf, ncb_reserved(buf), first_data_sz);
+
+ /* If advance in a GAP, reduce its size. */
+ if (start.flag & NCB_BK_F_GAP && !first_data_sz) {
+ ncb_write_off(buf, ncb_head(buf), start.sz - off_blk);
+ /* Recopy the block sz_data at the new position. */
+ ncb_write_off(buf, ncb_peek(buf, NCB_GAP_SZ_DATA_OFF), start.sz_data);
+ }
+
+ return NCB_RET_OK;
+}
+
+/* ******** testing API ******** */
+/* To build it :
+ * gcc -Wall -DSTANDALONE -lasan -I./include -o ncbuf src/ncbuf.c
+ */
+#ifdef STANDALONE
+
+int ncb_print = 0;
+
+static void ncbuf_printf(char *str, ...)
+{
+ va_list args;
+
+ va_start(args, str);
+ if (ncb_print)
+ vfprintf(stderr, str, args);
+ va_end(args);
+}
+
+struct rand_off {
+ struct list el;
+ ncb_sz_t off;
+ ncb_sz_t len;
+};
+
+static struct rand_off *ncb_generate_rand_off(const struct ncbuf *buf)
+{
+ struct rand_off *roff;
+ roff = calloc(1, sizeof(*roff));
+ BUG_ON(!roff);
+
+ roff->off = rand() % (ncb_size(buf));
+ if (roff->off > 0 && roff->off < NCB_GAP_MIN_SZ)
+ roff->off = 0;
+
+ roff->len = rand() % (ncb_size(buf) - roff->off + 1);
+
+ return roff;
+}
+
+static void ncb_print_blk(const struct ncb_blk *blk)
+{
+ if (ncb_print) {
+ fprintf(stderr, "%s(%s): %2u/%u.\n",
+ blk->flag & NCB_BK_F_GAP ? "GAP " : "DATA",
+ blk->flag & NCB_BK_F_FIN ? "F" : "-", blk->off, blk->sz);
+ }
+}
+
+static int ncb_is_null_blk(const struct ncb_blk *blk)
+{
+ return !blk->st;
+}
+
+static void ncb_loop(const struct ncbuf *buf)
+{
+ struct ncb_blk blk;
+
+ blk = ncb_blk_first(buf);
+ do {
+ ncb_print_blk(&blk);
+ blk = ncb_blk_next(buf, &blk);
+ } while (!ncb_is_null_blk(&blk));
+
+ ncbuf_printf("\n");
+}
+
+static void ncbuf_print_buf(struct ncbuf *b, ncb_sz_t len,
+ unsigned char *area, int line)
+{
+ int i;
+
+ ncbuf_printf("buffer status at line %d\n", line);
+ for (i = 0; i < len; ++i) {
+ ncbuf_printf("%02x.", area[i]);
+ if (i && i % 32 == 31) ncbuf_printf("\n");
+ else if (i && i % 8 == 7) ncbuf_printf(" ");
+ }
+ ncbuf_printf("\n");
+
+ ncb_loop(b);
+
+ if (ncb_print)
+ getchar();
+}
+
+static struct ncbuf b;
+static unsigned char *bufarea = NULL;
+static ncb_sz_t bufsize = 16384;
+static ncb_sz_t bufhead = 15;
+
+#define NCB_INIT(buf) \
+ if ((reset)) { memset(bufarea, 0xaa, bufsize); } \
+ ncb_init(buf, bufhead); \
+ ncbuf_print_buf(&b, bufsize, bufarea, __LINE__);
+
+#define NCB_ADD_EQ(buf, off, data, sz, mode, ret) \
+ BUG_ON(ncb_add((buf), (off), (data), (sz), (mode)) != (ret)); \
+ ncbuf_print_buf(buf, bufsize, bufarea, __LINE__);
+
+#define NCB_ADD_NEQ(buf, off, data, sz, mode, ret) \
+ BUG_ON(ncb_add((buf), (off), (data), (sz), (mode)) == (ret)); \
+ ncbuf_print_buf(buf, bufsize, bufarea, __LINE__);
+
+#define NCB_ADVANCE_EQ(buf, off, ret) \
+ BUG_ON(ncb_advance((buf), (off)) != (ret)); \
+ ncbuf_print_buf(buf, bufsize, bufarea, __LINE__);
+
+#define NCB_TOTAL_DATA_EQ(buf, data) \
+ BUG_ON(ncb_total_data((buf)) != (data));
+
+#define NCB_DATA_EQ(buf, off, data) \
+ BUG_ON(ncb_data((buf), (off)) != (data));
+
+static int ncbuf_test(ncb_sz_t head, int reset, int print_delay)
+{
+ char *data0, data1[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f };
+ struct list list = LIST_HEAD_INIT(list);
+ struct rand_off *roff, *roff_tmp;
+ enum ncb_ret ret;
+
+ data0 = malloc(bufsize);
+ memset(data0, 0xff, bufsize);
+
+ bufarea = malloc(bufsize);
+
+ fprintf(stderr, "running unit tests\n");
+
+ b = NCBUF_NULL;
+ BUG_ON(!ncb_is_null(&b));
+ NCB_DATA_EQ(&b, 0, 0);
+ NCB_TOTAL_DATA_EQ(&b, 0);
+ BUG_ON(ncb_size(&b) != 0);
+ BUG_ON(!ncb_is_empty(&b));
+ BUG_ON(ncb_is_full(&b));
+ BUG_ON(ncb_is_fragmented(&b));
+
+ b.area = (char *)bufarea;
+ b.size = bufsize;
+ b.head = head;
+ NCB_INIT(&b);
+
+ /* insertion test suite */
+ NCB_INIT(&b);
+ NCB_DATA_EQ(&b, 0, 0); NCB_DATA_EQ(&b, bufsize - NCB_RESERVED_SZ - 1, 0); /* first and last offset */
+ NCB_ADD_EQ(&b, 24, data0, 9, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 24, 9);
+ /* insert new data at the same offset as old */
+ NCB_ADD_EQ(&b, 24, data0, 16, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 24, 16);
+
+ NCB_INIT(&b); NCB_DATA_EQ(&b, 0, 0);
+ NCB_ADD_EQ(&b, 0, data0, 16, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 0, 16);
+ BUG_ON(ncb_is_fragmented(&b));
+ NCB_ADD_EQ(&b, 24, data0, 16, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 0, 16);
+ BUG_ON(!ncb_is_fragmented(&b));
+ /* insert data overlapping two data blocks and a gap */
+ NCB_ADD_EQ(&b, 12, data0, 16, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 0, 40);
+ BUG_ON(ncb_is_fragmented(&b));
+
+ NCB_INIT(&b);
+ NCB_ADD_EQ(&b, 32, data0, 16, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 0, 0); NCB_DATA_EQ(&b, 16, 0); NCB_DATA_EQ(&b, 32, 16);
+ BUG_ON(!ncb_is_fragmented(&b));
+ NCB_ADD_EQ(&b, 0, data0, 16, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 0, 16); NCB_DATA_EQ(&b, 16, 0); NCB_DATA_EQ(&b, 32, 16);
+ BUG_ON(!ncb_is_fragmented(&b));
+ /* insert data to exactly cover a gap between two data blocks */
+ NCB_ADD_EQ(&b, 16, data0, 16, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 0, 48); NCB_DATA_EQ(&b, 16, 32); NCB_DATA_EQ(&b, 32, 16);
+ BUG_ON(ncb_is_fragmented(&b));
+
+ NCB_INIT(&b);
+ NCB_ADD_EQ(&b, 0, data0, 8, NCB_ADD_PRESERVE, NCB_RET_OK);
+ /* this insertion must be rejected because of minimal gap size */
+ NCB_ADD_EQ(&b, 10, data0, 8, NCB_ADD_PRESERVE, NCB_RET_GAP_SIZE);
+
+ /* Test reduced gap support */
+ NCB_INIT(&b);
+ /* this insertion will form a reduced gap */
+ NCB_ADD_EQ(&b, 0, data0, bufsize - (NCB_GAP_MIN_SZ - 1), NCB_ADD_COMPARE, NCB_RET_OK);
+
+ /* Test the various insertion mode */
+ NCB_INIT(&b);
+ NCB_ADD_EQ(&b, 10, data1, 16, NCB_ADD_PRESERVE, NCB_RET_OK);
+ NCB_ADD_EQ(&b, 12, data1, 16, NCB_ADD_COMPARE, NCB_RET_DATA_REJ);
+ NCB_ADD_EQ(&b, 12, data1, 16, NCB_ADD_PRESERVE, NCB_RET_OK); BUG_ON(*ncb_peek(&b, 12) != data1[2]);
+ NCB_ADD_EQ(&b, 12, data1, 16, NCB_ADD_OVERWRT, NCB_RET_OK); BUG_ON(*ncb_peek(&b, 12) == data1[2]);
+
+ /* advance test suite */
+ NCB_INIT(&b);
+ NCB_ADVANCE_EQ(&b, 10, NCB_RET_OK); /* advance in an empty buffer; this ensures we do not leave an empty DATA in the middle of the buffer */
+ NCB_ADVANCE_EQ(&b, ncb_size(&b) - 2, NCB_RET_OK);
+
+ NCB_INIT(&b);
+ /* first fill the buffer */
+ NCB_ADD_EQ(&b, 0, data0, bufsize - NCB_RESERVED_SZ, NCB_ADD_COMPARE, NCB_RET_OK);
+ /* delete 2 bytes : a reduced gap must be created */
+ NCB_ADVANCE_EQ(&b, 2, NCB_RET_OK); NCB_TOTAL_DATA_EQ(&b, ncb_size(&b) - 2);
+ /* delete 1 byte : extend the reduced gap */
+ NCB_ADVANCE_EQ(&b, 1, NCB_RET_OK); NCB_TOTAL_DATA_EQ(&b, ncb_size(&b) - 3);
+ /* delete 5 bytes : a full gap must be present */
+ NCB_ADVANCE_EQ(&b, 5, NCB_RET_OK); NCB_TOTAL_DATA_EQ(&b, ncb_size(&b) - 8);
+ /* completely clear the buffer */
+ NCB_ADVANCE_EQ(&b, bufsize - NCB_RESERVED_SZ, NCB_RET_OK); NCB_TOTAL_DATA_EQ(&b, 0);
+
+
+ NCB_INIT(&b);
+ NCB_ADD_EQ(&b, 10, data0, 10, NCB_ADD_PRESERVE, NCB_RET_OK);
+ NCB_ADVANCE_EQ(&b, 2, NCB_RET_OK); /* reduce a gap in front of the buffer */
+ NCB_ADVANCE_EQ(&b, 1, NCB_RET_GAP_SIZE); /* reject */
+ NCB_ADVANCE_EQ(&b, 8, NCB_RET_OK); /* remove completely the gap */
+ NCB_ADVANCE_EQ(&b, 8, NCB_RET_OK); /* remove inside the data */
+ NCB_ADVANCE_EQ(&b, 10, NCB_RET_OK); /* remove completely the data */
+
+ fprintf(stderr, "first random pass\n");
+ NCB_INIT(&b);
+
+ /* generate randon data offsets until the buffer is full */
+ while (!ncb_is_full(&b)) {
+ roff = ncb_generate_rand_off(&b);
+ LIST_INSERT(&list, &roff->el);
+
+ ret = ncb_add(&b, roff->off, data0, roff->len, NCB_ADD_COMPARE);
+ BUG_ON(ret == NCB_RET_DATA_REJ);
+ ncbuf_print_buf(&b, bufsize, bufarea, __LINE__);
+ usleep(print_delay);
+ }
+
+ fprintf(stderr, "buf full, prepare for reverse random\n");
+ ncbuf_print_buf(&b, bufsize, bufarea, __LINE__);
+
+ /* insert the previously generated random offsets in the reverse order.
+ * At the end, the buffer should be full.
+ */
+ NCB_INIT(&b);
+ list_for_each_entry_safe(roff, roff_tmp, &list, el) {
+ int full = ncb_is_full(&b);
+ if (!full) {
+ ret = ncb_add(&b, roff->off, data0, roff->len, NCB_ADD_COMPARE);
+ BUG_ON(ret == NCB_RET_DATA_REJ);
+ ncbuf_print_buf(&b, bufsize, bufarea, __LINE__);
+ usleep(print_delay);
+ }
+
+ LIST_DELETE(&roff->el);
+ free(roff);
+ }
+
+ if (!ncb_is_full(&b))
+ abort();
+
+ fprintf(stderr, "done\n");
+
+ free(bufarea);
+ free(data0);
+
+ return 1;
+}
+
+int main(int argc, char **argv)
+{
+ int reset = 0;
+ int print_delay = 100000;
+ char c;
+
+ opterr = 0;
+ while ((c = getopt(argc, argv, "h:s:rp::")) != -1) {
+ switch (c) {
+ case 'h':
+ bufhead = atoi(optarg);
+ break;
+ case 's':
+ bufsize = atoi(optarg);
+ if (bufsize < 64) {
+ fprintf(stderr, "bufsize should be at least 64 bytes for unit test suite\n");
+ exit(127);
+ }
+ break;
+ case 'r':
+ reset = 1;
+ break;
+ case 'p':
+ if (optarg)
+ print_delay = atoi(optarg);
+ ncb_print = 1;
+ break;
+ case '?':
+ default:
+ fprintf(stderr, "usage: %s [-r] [-s bufsize] [-h bufhead] [-p <delay_msec>]\n", argv[0]);
+ exit(127);
+ }
+ }
+
+ ncbuf_test(bufhead, reset, print_delay);
+ return EXIT_SUCCESS;
+}
+
+#endif /* STANDALONE */
diff --git a/src/pattern.c b/src/pattern.c
new file mode 100644
index 0000000..a2557de
--- /dev/null
+++ b/src/pattern.c
@@ -0,0 +1,2702 @@
+/*
+ * Pattern management functions.
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <errno.h>
+
+#include <import/ebsttree.h>
+#include <import/lru.h>
+
+#include <haproxy/api.h>
+#include <haproxy/global.h>
+#include <haproxy/log.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/pattern.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/tools.h>
+#include <haproxy/xxhash.h>
+
+
+const char *const pat_match_names[PAT_MATCH_NUM] = {
+ [PAT_MATCH_FOUND] = "found",
+ [PAT_MATCH_BOOL] = "bool",
+ [PAT_MATCH_INT] = "int",
+ [PAT_MATCH_IP] = "ip",
+ [PAT_MATCH_BIN] = "bin",
+ [PAT_MATCH_LEN] = "len",
+ [PAT_MATCH_STR] = "str",
+ [PAT_MATCH_BEG] = "beg",
+ [PAT_MATCH_SUB] = "sub",
+ [PAT_MATCH_DIR] = "dir",
+ [PAT_MATCH_DOM] = "dom",
+ [PAT_MATCH_END] = "end",
+ [PAT_MATCH_REG] = "reg",
+ [PAT_MATCH_REGM] = "regm",
+};
+
+int (*const pat_parse_fcts[PAT_MATCH_NUM])(const char *, struct pattern *, int, char **) = {
+ [PAT_MATCH_FOUND] = pat_parse_nothing,
+ [PAT_MATCH_BOOL] = pat_parse_nothing,
+ [PAT_MATCH_INT] = pat_parse_int,
+ [PAT_MATCH_IP] = pat_parse_ip,
+ [PAT_MATCH_BIN] = pat_parse_bin,
+ [PAT_MATCH_LEN] = pat_parse_int,
+ [PAT_MATCH_STR] = pat_parse_str,
+ [PAT_MATCH_BEG] = pat_parse_str,
+ [PAT_MATCH_SUB] = pat_parse_str,
+ [PAT_MATCH_DIR] = pat_parse_str,
+ [PAT_MATCH_DOM] = pat_parse_str,
+ [PAT_MATCH_END] = pat_parse_str,
+ [PAT_MATCH_REG] = pat_parse_reg,
+ [PAT_MATCH_REGM] = pat_parse_reg,
+};
+
+int (*const pat_index_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pattern *, char **) = {
+ [PAT_MATCH_FOUND] = pat_idx_list_val,
+ [PAT_MATCH_BOOL] = pat_idx_list_val,
+ [PAT_MATCH_INT] = pat_idx_list_val,
+ [PAT_MATCH_IP] = pat_idx_tree_ip,
+ [PAT_MATCH_BIN] = pat_idx_list_ptr,
+ [PAT_MATCH_LEN] = pat_idx_list_val,
+ [PAT_MATCH_STR] = pat_idx_tree_str,
+ [PAT_MATCH_BEG] = pat_idx_tree_pfx,
+ [PAT_MATCH_SUB] = pat_idx_list_str,
+ [PAT_MATCH_DIR] = pat_idx_list_str,
+ [PAT_MATCH_DOM] = pat_idx_list_str,
+ [PAT_MATCH_END] = pat_idx_list_str,
+ [PAT_MATCH_REG] = pat_idx_list_reg,
+ [PAT_MATCH_REGM] = pat_idx_list_regm,
+};
+
+void (*const pat_prune_fcts[PAT_MATCH_NUM])(struct pattern_expr *) = {
+ [PAT_MATCH_FOUND] = pat_prune_gen,
+ [PAT_MATCH_BOOL] = pat_prune_gen,
+ [PAT_MATCH_INT] = pat_prune_gen,
+ [PAT_MATCH_IP] = pat_prune_gen,
+ [PAT_MATCH_BIN] = pat_prune_gen,
+ [PAT_MATCH_LEN] = pat_prune_gen,
+ [PAT_MATCH_STR] = pat_prune_gen,
+ [PAT_MATCH_BEG] = pat_prune_gen,
+ [PAT_MATCH_SUB] = pat_prune_gen,
+ [PAT_MATCH_DIR] = pat_prune_gen,
+ [PAT_MATCH_DOM] = pat_prune_gen,
+ [PAT_MATCH_END] = pat_prune_gen,
+ [PAT_MATCH_REG] = pat_prune_gen,
+ [PAT_MATCH_REGM] = pat_prune_gen,
+};
+
+struct pattern *(*const pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern_expr *, int) = {
+ [PAT_MATCH_FOUND] = NULL,
+ [PAT_MATCH_BOOL] = pat_match_nothing,
+ [PAT_MATCH_INT] = pat_match_int,
+ [PAT_MATCH_IP] = pat_match_ip,
+ [PAT_MATCH_BIN] = pat_match_bin,
+ [PAT_MATCH_LEN] = pat_match_len,
+ [PAT_MATCH_STR] = pat_match_str,
+ [PAT_MATCH_BEG] = pat_match_beg,
+ [PAT_MATCH_SUB] = pat_match_sub,
+ [PAT_MATCH_DIR] = pat_match_dir,
+ [PAT_MATCH_DOM] = pat_match_dom,
+ [PAT_MATCH_END] = pat_match_end,
+ [PAT_MATCH_REG] = pat_match_reg,
+ [PAT_MATCH_REGM] = pat_match_regm,
+};
+
+/* Just used for checking configuration compatibility */
+int const pat_match_types[PAT_MATCH_NUM] = {
+ [PAT_MATCH_FOUND] = SMP_T_SINT,
+ [PAT_MATCH_BOOL] = SMP_T_SINT,
+ [PAT_MATCH_INT] = SMP_T_SINT,
+ [PAT_MATCH_IP] = SMP_T_ADDR,
+ [PAT_MATCH_BIN] = SMP_T_BIN,
+ [PAT_MATCH_LEN] = SMP_T_STR,
+ [PAT_MATCH_STR] = SMP_T_STR,
+ [PAT_MATCH_BEG] = SMP_T_STR,
+ [PAT_MATCH_SUB] = SMP_T_STR,
+ [PAT_MATCH_DIR] = SMP_T_STR,
+ [PAT_MATCH_DOM] = SMP_T_STR,
+ [PAT_MATCH_END] = SMP_T_STR,
+ [PAT_MATCH_REG] = SMP_T_STR,
+ [PAT_MATCH_REGM] = SMP_T_STR,
+};
+
+/* this struct is used to return information */
+static THREAD_LOCAL struct pattern static_pattern;
+static THREAD_LOCAL struct sample_data static_sample_data;
+
+/* This is the root of the list of all pattern_ref avalaibles. */
+struct list pattern_reference = LIST_HEAD_INIT(pattern_reference);
+
+static THREAD_LOCAL struct lru64_head *pat_lru_tree;
+static unsigned long long pat_lru_seed __read_mostly;
+
+/*
+ *
+ * The following functions are not exported and are used by internals process
+ * of pattern matching
+ *
+ */
+
+/* Background: Fast way to find a zero byte in a word
+ * http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+ * hasZeroByte = (v - 0x01010101UL) & ~v & 0x80808080UL;
+ *
+ * To look for 4 different byte values, xor the word with those bytes and
+ * then check for zero bytes:
+ *
+ * v = (((unsigned char)c * 0x1010101U) ^ delimiter)
+ * where <delimiter> is the 4 byte values to look for (as an uint)
+ * and <c> is the character that is being tested
+ */
+static inline unsigned int is_delimiter(unsigned char c, unsigned int mask)
+{
+ mask ^= (c * 0x01010101); /* propagate the char to all 4 bytes */
+ return (mask - 0x01010101) & ~mask & 0x80808080U;
+}
+
+static inline unsigned int make_4delim(unsigned char d1, unsigned char d2, unsigned char d3, unsigned char d4)
+{
+ return d1 << 24 | d2 << 16 | d3 << 8 | d4;
+}
+
+
+/*
+ *
+ * These functions are exported and may be used by any other component.
+ *
+ * The following functions are used for parsing pattern matching input value.
+ * The <text> contain the string to be parsed. <pattern> must be a preallocated
+ * pattern. The pat_parse_* functions fill this structure with the parsed value.
+ * <err> is filled with an error message built with memprintf() function. It is
+ * allowed to use a trash as a temporary storage for the returned pattern, as
+ * the next call after these functions will be pat_idx_*.
+ *
+ * In success case, the pat_parse_* function returns 1. If the function
+ * fails, it returns 0 and <err> is filled.
+ */
+
+/* ignore the current line */
+int pat_parse_nothing(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ return 1;
+}
+
+/* Parse a string. It is allocated and duplicated. */
+int pat_parse_str(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ pattern->type = SMP_T_STR;
+ pattern->ptr.str = (char *)text;
+ pattern->len = strlen(text);
+ return 1;
+}
+
+/* Parse a binary written in hexa. It is allocated. */
+int pat_parse_bin(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ struct buffer *trash;
+
+ pattern->type = SMP_T_BIN;
+ trash = get_trash_chunk();
+ pattern->len = trash->size;
+ pattern->ptr.str = trash->area;
+ return !!parse_binary(text, &pattern->ptr.str, &pattern->len, err);
+}
+
+/* Parse a regex. It is allocated. */
+int pat_parse_reg(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ pattern->ptr.str = (char *)text;
+ return 1;
+}
+
+/* Parse a range of positive integers delimited by either ':' or '-'. If only
+ * one integer is read, it is set as both min and max. An operator may be
+ * specified as the prefix, among this list of 5 :
+ *
+ * 0:eq, 1:gt, 2:ge, 3:lt, 4:le
+ *
+ * The default operator is "eq". It supports range matching. Ranges are
+ * rejected for other operators. The operator may be changed at any time.
+ * The operator is stored in the 'opaque' argument.
+ *
+ * If err is non-NULL, an error message will be returned there on errors and
+ * the caller will have to free it. The function returns zero on error, and
+ * non-zero on success.
+ *
+ */
+int pat_parse_int(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ const char *ptr = text;
+
+ pattern->type = SMP_T_SINT;
+
+ /* Empty string is not valid */
+ if (!*text)
+ goto not_valid_range;
+
+ /* Search ':' or '-' separator. */
+ while (*ptr != '\0' && *ptr != ':' && *ptr != '-')
+ ptr++;
+
+ /* If separator not found. */
+ if (!*ptr) {
+ if (strl2llrc(text, ptr - text, &pattern->val.range.min) != 0) {
+ memprintf(err, "'%s' is not a number", text);
+ return 0;
+ }
+ pattern->val.range.max = pattern->val.range.min;
+ pattern->val.range.min_set = 1;
+ pattern->val.range.max_set = 1;
+ return 1;
+ }
+
+ /* If the separator is the first character. */
+ if (ptr == text && *(ptr + 1) != '\0') {
+ if (strl2llrc(ptr + 1, strlen(ptr + 1), &pattern->val.range.max) != 0)
+ goto not_valid_range;
+
+ pattern->val.range.min_set = 0;
+ pattern->val.range.max_set = 1;
+ return 1;
+ }
+
+ /* If separator is the last character. */
+ if (*(ptr + 1) == '\0') {
+ if (strl2llrc(text, ptr - text, &pattern->val.range.min) != 0)
+ goto not_valid_range;
+
+ pattern->val.range.min_set = 1;
+ pattern->val.range.max_set = 0;
+ return 1;
+ }
+
+ /* Else, parse two numbers. */
+ if (strl2llrc(text, ptr - text, &pattern->val.range.min) != 0)
+ goto not_valid_range;
+
+ if (strl2llrc(ptr + 1, strlen(ptr + 1), &pattern->val.range.max) != 0)
+ goto not_valid_range;
+
+ if (pattern->val.range.min > pattern->val.range.max)
+ goto not_valid_range;
+
+ pattern->val.range.min_set = 1;
+ pattern->val.range.max_set = 1;
+ return 1;
+
+ not_valid_range:
+ memprintf(err, "'%s' is not a valid number range", text);
+ return 0;
+}
+
+/* Parse a range of positive 2-component versions delimited by either ':' or
+ * '-'. The version consists in a major and a minor, both of which must be
+ * smaller than 65536, because internally they will be represented as a 32-bit
+ * integer.
+ * If only one version is read, it is set as both min and max. Just like for
+ * pure integers, an operator may be specified as the prefix, among this list
+ * of 5 :
+ *
+ * 0:eq, 1:gt, 2:ge, 3:lt, 4:le
+ *
+ * The default operator is "eq". It supports range matching. Ranges are
+ * rejected for other operators. The operator may be changed at any time.
+ * The operator is stored in the 'opaque' argument. This allows constructs
+ * such as the following one :
+ *
+ * acl obsolete_ssl ssl_req_proto lt 3
+ * acl unsupported_ssl ssl_req_proto gt 3.1
+ * acl valid_ssl ssl_req_proto 3.0-3.1
+ *
+ */
+int pat_parse_dotted_ver(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ const char *ptr = text;
+
+ pattern->type = SMP_T_SINT;
+
+ /* Search ':' or '-' separator. */
+ while (*ptr != '\0' && *ptr != ':' && *ptr != '-')
+ ptr++;
+
+ /* If separator not found. */
+ if (*ptr == '\0' && ptr > text) {
+ if (strl2llrc_dotted(text, ptr-text, &pattern->val.range.min) != 0) {
+ memprintf(err, "'%s' is not a dotted number", text);
+ return 0;
+ }
+ pattern->val.range.max = pattern->val.range.min;
+ pattern->val.range.min_set = 1;
+ pattern->val.range.max_set = 1;
+ return 1;
+ }
+
+ /* If the separator is the first character. */
+ if (ptr == text && *(ptr+1) != '\0') {
+ if (strl2llrc_dotted(ptr+1, strlen(ptr+1), &pattern->val.range.max) != 0) {
+ memprintf(err, "'%s' is not a valid dotted number range", text);
+ return 0;
+ }
+ pattern->val.range.min_set = 0;
+ pattern->val.range.max_set = 1;
+ return 1;
+ }
+
+ /* If separator is the last character. */
+ if (ptr == &text[strlen(text)-1]) {
+ if (strl2llrc_dotted(text, ptr-text, &pattern->val.range.min) != 0) {
+ memprintf(err, "'%s' is not a valid dotted number range", text);
+ return 0;
+ }
+ pattern->val.range.min_set = 1;
+ pattern->val.range.max_set = 0;
+ return 1;
+ }
+
+ /* Else, parse two numbers. */
+ if (strl2llrc_dotted(text, ptr-text, &pattern->val.range.min) != 0) {
+ memprintf(err, "'%s' is not a valid dotted number range", text);
+ return 0;
+ }
+ if (strl2llrc_dotted(ptr+1, strlen(ptr+1), &pattern->val.range.max) != 0) {
+ memprintf(err, "'%s' is not a valid dotted number range", text);
+ return 0;
+ }
+ if (pattern->val.range.min > pattern->val.range.max) {
+ memprintf(err, "'%s' is not a valid dotted number range", text);
+ return 0;
+ }
+ pattern->val.range.min_set = 1;
+ pattern->val.range.max_set = 1;
+ return 1;
+}
+
+/* Parse an IP address and an optional mask in the form addr[/mask].
+ * The addr may either be an IPv4 address or a hostname. The mask
+ * may either be a dotted mask or a number of bits. Returns 1 if OK,
+ * otherwise 0. NOTE: IP address patterns are typed (IPV4/IPV6).
+ */
+int pat_parse_ip(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ if (str2net(text, !(mflags & PAT_MF_NO_DNS) && (global.mode & MODE_STARTING),
+ &pattern->val.ipv4.addr, &pattern->val.ipv4.mask)) {
+ pattern->type = SMP_T_IPV4;
+ return 1;
+ }
+ else if (str62net(text, &pattern->val.ipv6.addr, &pattern->val.ipv6.mask)) {
+ pattern->type = SMP_T_IPV6;
+ return 1;
+ }
+ else {
+ memprintf(err, "'%s' is not a valid IPv4 or IPv6 address", text);
+ return 0;
+ }
+}
+
+/*
+ *
+ * These functions are exported and may be used by any other component.
+ *
+ * This function just takes a sample <smp> and checks if this sample matches
+ * with the pattern <pattern>. This function returns only PAT_MATCH or
+ * PAT_NOMATCH.
+ *
+ */
+
+/* always return false */
+struct pattern *pat_match_nothing(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ if (smp->data.u.sint) {
+ if (fill) {
+ static_pattern.data = NULL;
+ static_pattern.ref = NULL;
+ static_pattern.type = 0;
+ static_pattern.ptr.str = NULL;
+ }
+ return &static_pattern;
+ }
+ else
+ return NULL;
+}
+
+
+/* NB: For two strings to be identical, it is required that their length match */
+struct pattern *pat_match_str(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ int icase;
+ struct ebmb_node *node;
+ struct pattern_tree *elt;
+ struct pattern_list *lst;
+ struct pattern *pattern;
+ struct pattern *ret = NULL;
+ struct lru64 *lru = NULL;
+
+ /* Lookup a string in the expression's pattern tree. */
+ if (!eb_is_empty(&expr->pattern_tree)) {
+ char prev = 0;
+
+ if (smp->data.u.str.data < smp->data.u.str.size) {
+ /* we may have to force a trailing zero on the test pattern and
+ * the buffer is large enough to accommodate it. If the flag
+ * CONST is set, duplicate the string
+ */
+ prev = smp->data.u.str.area[smp->data.u.str.data];
+ if (prev) {
+ if (smp->flags & SMP_F_CONST) {
+ if (!smp_dup(smp))
+ return NULL;
+ } else {
+ smp->data.u.str.area[smp->data.u.str.data] = '\0';
+ }
+ }
+ }
+ else {
+ /* Otherwise, the sample is duplicated. A trailing zero
+ * is automatically added to the string.
+ */
+ if (!smp_dup(smp))
+ return NULL;
+ }
+
+ node = ebst_lookup(&expr->pattern_tree, smp->data.u.str.area);
+ if (prev)
+ smp->data.u.str.area[smp->data.u.str.data] = prev;
+
+ while (node) {
+ elt = ebmb_entry(node, struct pattern_tree, node);
+ if (elt->ref->gen_id != expr->ref->curr_gen) {
+ node = ebmb_next_dup(node);
+ continue;
+ }
+ if (fill) {
+ static_pattern.data = elt->data;
+ static_pattern.ref = elt->ref;
+ static_pattern.sflags = PAT_SF_TREE;
+ static_pattern.type = SMP_T_STR;
+ static_pattern.ptr.str = (char *)elt->node.key;
+ }
+ return &static_pattern;
+ }
+ }
+
+ /* look in the list */
+ if (pat_lru_tree) {
+ unsigned long long seed = pat_lru_seed ^ (long)expr;
+
+ lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
+ pat_lru_tree, expr, expr->ref->revision);
+ if (lru && lru->domain) {
+ ret = lru->data;
+ return ret;
+ }
+ }
+
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (pattern->len != smp->data.u.str.data)
+ continue;
+
+ icase = expr->mflags & PAT_MF_IGNORE_CASE;
+ if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.str.area, smp->data.u.str.data) == 0) ||
+ (!icase && strncmp(pattern->ptr.str, smp->data.u.str.area, smp->data.u.str.data) == 0)) {
+ ret = pattern;
+ break;
+ }
+ }
+
+ if (lru)
+ lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
+
+ return ret;
+}
+
+/* NB: For two binaries buf to be identical, it is required that their lengths match */
+struct pattern *pat_match_bin(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct pattern_list *lst;
+ struct pattern *pattern;
+ struct pattern *ret = NULL;
+ struct lru64 *lru = NULL;
+
+ if (pat_lru_tree) {
+ unsigned long long seed = pat_lru_seed ^ (long)expr;
+
+ lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
+ pat_lru_tree, expr, expr->ref->revision);
+ if (lru && lru->domain) {
+ ret = lru->data;
+ return ret;
+ }
+ }
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (pattern->len != smp->data.u.str.data)
+ continue;
+
+ if (memcmp(pattern->ptr.str, smp->data.u.str.area, smp->data.u.str.data) == 0) {
+ ret = pattern;
+ break;
+ }
+ }
+
+ if (lru)
+ lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
+
+ return ret;
+}
+
+/* Executes a regex. It temporarily changes the data to add a trailing zero,
+ * and restores the previous character when leaving. This function fills
+ * a matching array.
+ */
+struct pattern *pat_match_regm(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct pattern_list *lst;
+ struct pattern *pattern;
+ struct pattern *ret = NULL;
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (regex_exec_match2(pattern->ptr.reg, smp->data.u.str.area, smp->data.u.str.data,
+ MAX_MATCH, pmatch, 0)) {
+ ret = pattern;
+ smp->ctx.a[0] = pmatch;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/* Executes a regex. It temporarily changes the data to add a trailing zero,
+ * and restores the previous character when leaving.
+ */
+struct pattern *pat_match_reg(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct pattern_list *lst;
+ struct pattern *pattern;
+ struct pattern *ret = NULL;
+ struct lru64 *lru = NULL;
+
+ if (pat_lru_tree) {
+ unsigned long long seed = pat_lru_seed ^ (long)expr;
+
+ lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
+ pat_lru_tree, expr, expr->ref->revision);
+ if (lru && lru->domain) {
+ ret = lru->data;
+ return ret;
+ }
+ }
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (regex_exec2(pattern->ptr.reg, smp->data.u.str.area, smp->data.u.str.data)) {
+ ret = pattern;
+ break;
+ }
+ }
+
+ if (lru)
+ lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
+
+ return ret;
+}
+
+/* Checks that the pattern matches the beginning of the tested string. */
+struct pattern *pat_match_beg(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ int icase;
+ struct ebmb_node *node;
+ struct pattern_tree *elt;
+ struct pattern_list *lst;
+ struct pattern *pattern;
+ struct pattern *ret = NULL;
+ struct lru64 *lru = NULL;
+
+ /* Lookup a string in the expression's pattern tree. */
+ if (!eb_is_empty(&expr->pattern_tree)) {
+ char prev = 0;
+
+ if (smp->data.u.str.data < smp->data.u.str.size) {
+ /* we may have to force a trailing zero on the test pattern and
+ * the buffer is large enough to accommodate it.
+ */
+ prev = smp->data.u.str.area[smp->data.u.str.data];
+ if (prev)
+ smp->data.u.str.area[smp->data.u.str.data] = '\0';
+ }
+ else {
+ /* Otherwise, the sample is duplicated. A trailing zero
+ * is automatically added to the string.
+ */
+ if (!smp_dup(smp))
+ return NULL;
+ }
+
+ node = ebmb_lookup_longest(&expr->pattern_tree,
+ smp->data.u.str.area);
+ if (prev)
+ smp->data.u.str.area[smp->data.u.str.data] = prev;
+
+ while (node) {
+ elt = ebmb_entry(node, struct pattern_tree, node);
+ if (elt->ref->gen_id != expr->ref->curr_gen) {
+ node = ebmb_lookup_shorter(node);
+ continue;
+ }
+ if (fill) {
+ static_pattern.data = elt->data;
+ static_pattern.ref = elt->ref;
+ static_pattern.sflags = PAT_SF_TREE;
+ static_pattern.type = SMP_T_STR;
+ static_pattern.ptr.str = (char *)elt->node.key;
+ }
+ return &static_pattern;
+ }
+ }
+
+ /* look in the list */
+ if (pat_lru_tree) {
+ unsigned long long seed = pat_lru_seed ^ (long)expr;
+
+ lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
+ pat_lru_tree, expr, expr->ref->revision);
+ if (lru && lru->domain) {
+ ret = lru->data;
+ return ret;
+ }
+ }
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (pattern->len > smp->data.u.str.data)
+ continue;
+
+ icase = expr->mflags & PAT_MF_IGNORE_CASE;
+ if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.str.area, pattern->len) != 0) ||
+ (!icase && strncmp(pattern->ptr.str, smp->data.u.str.area, pattern->len) != 0))
+ continue;
+
+ ret = pattern;
+ break;
+ }
+
+ if (lru)
+ lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
+
+ return ret;
+}
+
+/* Checks that the pattern matches the end of the tested string. */
+struct pattern *pat_match_end(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ int icase;
+ struct pattern_list *lst;
+ struct pattern *pattern;
+ struct pattern *ret = NULL;
+ struct lru64 *lru = NULL;
+
+ if (pat_lru_tree) {
+ unsigned long long seed = pat_lru_seed ^ (long)expr;
+
+ lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
+ pat_lru_tree, expr, expr->ref->revision);
+ if (lru && lru->domain) {
+ ret = lru->data;
+ return ret;
+ }
+ }
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (pattern->len > smp->data.u.str.data)
+ continue;
+
+ icase = expr->mflags & PAT_MF_IGNORE_CASE;
+ if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.str.area + smp->data.u.str.data - pattern->len, pattern->len) != 0) ||
+ (!icase && strncmp(pattern->ptr.str, smp->data.u.str.area + smp->data.u.str.data - pattern->len, pattern->len) != 0))
+ continue;
+
+ ret = pattern;
+ break;
+ }
+
+ if (lru)
+ lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
+
+ return ret;
+}
+
+/* Checks that the pattern is included inside the tested string.
+ * NB: Suboptimal, should be rewritten using a Boyer-Moore method.
+ */
+struct pattern *pat_match_sub(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ int icase;
+ char *end;
+ char *c;
+ struct pattern_list *lst;
+ struct pattern *pattern;
+ struct pattern *ret = NULL;
+ struct lru64 *lru = NULL;
+
+ if (pat_lru_tree) {
+ unsigned long long seed = pat_lru_seed ^ (long)expr;
+
+ lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
+ pat_lru_tree, expr, expr->ref->revision);
+ if (lru && lru->domain) {
+ ret = lru->data;
+ return ret;
+ }
+ }
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (pattern->len > smp->data.u.str.data)
+ continue;
+
+ end = smp->data.u.str.area + smp->data.u.str.data - pattern->len;
+ icase = expr->mflags & PAT_MF_IGNORE_CASE;
+ if (icase) {
+ for (c = smp->data.u.str.area; c <= end; c++) {
+ if (tolower((unsigned char)*c) != tolower((unsigned char)*pattern->ptr.str))
+ continue;
+ if (strncasecmp(pattern->ptr.str, c, pattern->len) == 0) {
+ ret = pattern;
+ goto leave;
+ }
+ }
+ } else {
+ for (c = smp->data.u.str.area; c <= end; c++) {
+ if (*c != *pattern->ptr.str)
+ continue;
+ if (strncmp(pattern->ptr.str, c, pattern->len) == 0) {
+ ret = pattern;
+ goto leave;
+ }
+ }
+ }
+ }
+ leave:
+ if (lru)
+ lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
+
+ return ret;
+}
+
+/* This one is used by other real functions. It checks that the pattern is
+ * included inside the tested string, but enclosed between the specified
+ * delimiters or at the beginning or end of the string. The delimiters are
+ * provided as an unsigned int made by make_4delim() and match up to 4 different
+ * delimiters. Delimiters are stripped at the beginning and end of the pattern.
+ */
+static int match_word(struct sample *smp, struct pattern *pattern, int mflags, unsigned int delimiters)
+{
+ int may_match, icase;
+ char *c, *end;
+ char *ps;
+ int pl;
+
+ pl = pattern->len;
+ ps = pattern->ptr.str;
+
+ while (pl > 0 && is_delimiter(*ps, delimiters)) {
+ pl--;
+ ps++;
+ }
+
+ while (pl > 0 && is_delimiter(ps[pl - 1], delimiters))
+ pl--;
+
+ if (pl > smp->data.u.str.data)
+ return PAT_NOMATCH;
+
+ may_match = 1;
+ icase = mflags & PAT_MF_IGNORE_CASE;
+ end = smp->data.u.str.area + smp->data.u.str.data - pl;
+ for (c = smp->data.u.str.area; c <= end; c++) {
+ if (is_delimiter(*c, delimiters)) {
+ may_match = 1;
+ continue;
+ }
+
+ if (!may_match)
+ continue;
+
+ if (icase) {
+ if ((tolower((unsigned char)*c) == tolower((unsigned char)*ps)) &&
+ (strncasecmp(ps, c, pl) == 0) &&
+ (c == end || is_delimiter(c[pl], delimiters)))
+ return PAT_MATCH;
+ } else {
+ if ((*c == *ps) &&
+ (strncmp(ps, c, pl) == 0) &&
+ (c == end || is_delimiter(c[pl], delimiters)))
+ return PAT_MATCH;
+ }
+ may_match = 0;
+ }
+ return PAT_NOMATCH;
+}
+
+/* Checks that the pattern is included inside the tested string, but enclosed
+ * between the delimiters '?' or '/' or at the beginning or end of the string.
+ * Delimiters at the beginning or end of the pattern are ignored.
+ */
+struct pattern *pat_match_dir(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct pattern_list *lst;
+ struct pattern *pattern;
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (match_word(smp, pattern, expr->mflags, make_4delim('/', '?', '?', '?')))
+ return pattern;
+ }
+ return NULL;
+}
+
+/* Checks that the pattern is included inside the tested string, but enclosed
+ * between the delmiters '/', '?', '.' or ":" or at the beginning or end of
+ * the string. Delimiters at the beginning or end of the pattern are ignored.
+ */
+struct pattern *pat_match_dom(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct pattern_list *lst;
+ struct pattern *pattern;
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (match_word(smp, pattern, expr->mflags, make_4delim('/', '?', '.', ':')))
+ return pattern;
+ }
+ return NULL;
+}
+
+/* Checks that the integer in <test> is included between min and max */
+struct pattern *pat_match_int(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct pattern_list *lst;
+ struct pattern *pattern;
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if ((!pattern->val.range.min_set || pattern->val.range.min <= smp->data.u.sint) &&
+ (!pattern->val.range.max_set || smp->data.u.sint <= pattern->val.range.max))
+ return pattern;
+ }
+ return NULL;
+}
+
+/* Checks that the length of the pattern in <test> is included between min and max */
+struct pattern *pat_match_len(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct pattern_list *lst;
+ struct pattern *pattern;
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if ((!pattern->val.range.min_set || pattern->val.range.min <= smp->data.u.str.data) &&
+ (!pattern->val.range.max_set || smp->data.u.str.data <= pattern->val.range.max))
+ return pattern;
+ }
+ return NULL;
+}
+
+struct pattern *pat_match_ip(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ unsigned int v4; /* in network byte order */
+ struct in6_addr tmp6;
+ struct in_addr *s;
+ struct ebmb_node *node;
+ struct pattern_tree *elt;
+ struct pattern_list *lst;
+ struct pattern *pattern;
+
+ /* The input sample is IPv4. Try to match in the trees. */
+ if (smp->data.type == SMP_T_IPV4) {
+ /* Lookup an IPv4 address in the expression's pattern tree using
+ * the longest match method.
+ */
+ s = &smp->data.u.ipv4;
+ node = ebmb_lookup_longest(&expr->pattern_tree, &s->s_addr);
+ while (node) {
+ elt = ebmb_entry(node, struct pattern_tree, node);
+ if (elt->ref->gen_id != expr->ref->curr_gen) {
+ node = ebmb_lookup_shorter(node);
+ continue;
+ }
+ if (fill) {
+ static_pattern.data = elt->data;
+ static_pattern.ref = elt->ref;
+ static_pattern.sflags = PAT_SF_TREE;
+ static_pattern.type = SMP_T_IPV4;
+ static_pattern.val.ipv4.addr.s_addr = read_u32(elt->node.key);
+ if (!cidr2dotted(elt->node.node.pfx, &static_pattern.val.ipv4.mask))
+ return NULL;
+ }
+ return &static_pattern;
+ }
+
+ /* The IPv4 sample don't match the IPv4 tree. Convert the IPv4
+ * sample address to IPv6 with the mapping method using the ::ffff:
+ * prefix, and try to lookup in the IPv6 tree.
+ */
+ memset(&tmp6, 0, 10);
+ write_u16(&tmp6.s6_addr[10], htons(0xffff));
+ write_u32(&tmp6.s6_addr[12], smp->data.u.ipv4.s_addr);
+ node = ebmb_lookup_longest(&expr->pattern_tree_2, &tmp6);
+ while (node) {
+ elt = ebmb_entry(node, struct pattern_tree, node);
+ if (elt->ref->gen_id != expr->ref->curr_gen) {
+ node = ebmb_lookup_shorter(node);
+ continue;
+ }
+ if (fill) {
+ static_pattern.data = elt->data;
+ static_pattern.ref = elt->ref;
+ static_pattern.sflags = PAT_SF_TREE;
+ static_pattern.type = SMP_T_IPV6;
+ memcpy(&static_pattern.val.ipv6.addr, elt->node.key, 16);
+ static_pattern.val.ipv6.mask = elt->node.node.pfx;
+ }
+ return &static_pattern;
+ }
+ }
+
+ /* The input sample is IPv6. Try to match in the trees. */
+ if (smp->data.type == SMP_T_IPV6) {
+ /* Lookup an IPv6 address in the expression's pattern tree using
+ * the longest match method.
+ */
+ node = ebmb_lookup_longest(&expr->pattern_tree_2, &smp->data.u.ipv6);
+ while (node) {
+ elt = ebmb_entry(node, struct pattern_tree, node);
+ if (elt->ref->gen_id != expr->ref->curr_gen) {
+ node = ebmb_lookup_shorter(node);
+ continue;
+ }
+ if (fill) {
+ static_pattern.data = elt->data;
+ static_pattern.ref = elt->ref;
+ static_pattern.sflags = PAT_SF_TREE;
+ static_pattern.type = SMP_T_IPV6;
+ memcpy(&static_pattern.val.ipv6.addr, elt->node.key, 16);
+ static_pattern.val.ipv6.mask = elt->node.node.pfx;
+ }
+ return &static_pattern;
+ }
+
+ /* Try to convert 6 to 4 when the start of the ipv6 address match the
+ * following forms :
+ * - ::ffff:ip:v4 (ipv4 mapped)
+ * - ::0000:ip:v4 (old ipv4 mapped)
+ * - 2002:ip:v4:: (6to4)
+ */
+ if ((read_u64(&smp->data.u.ipv6.s6_addr[0]) == 0 &&
+ (read_u32(&smp->data.u.ipv6.s6_addr[8]) == 0 ||
+ read_u32(&smp->data.u.ipv6.s6_addr[8]) == htonl(0xFFFF))) ||
+ read_u16(&smp->data.u.ipv6.s6_addr[0]) == htons(0x2002)) {
+ if (read_u32(&smp->data.u.ipv6.s6_addr[0]) == 0)
+ v4 = read_u32(&smp->data.u.ipv6.s6_addr[12]);
+ else
+ v4 = htonl((ntohs(read_u16(&smp->data.u.ipv6.s6_addr[2])) << 16) +
+ ntohs(read_u16(&smp->data.u.ipv6.s6_addr[4])));
+
+ /* Lookup an IPv4 address in the expression's pattern tree using the longest
+ * match method.
+ */
+ node = ebmb_lookup_longest(&expr->pattern_tree, &v4);
+ while (node) {
+ elt = ebmb_entry(node, struct pattern_tree, node);
+ if (elt->ref->gen_id != expr->ref->curr_gen) {
+ node = ebmb_lookup_shorter(node);
+ continue;
+ }
+ if (fill) {
+ static_pattern.data = elt->data;
+ static_pattern.ref = elt->ref;
+ static_pattern.sflags = PAT_SF_TREE;
+ static_pattern.type = SMP_T_IPV4;
+ static_pattern.val.ipv4.addr.s_addr = read_u32(elt->node.key);
+ if (!cidr2dotted(elt->node.node.pfx, &static_pattern.val.ipv4.mask))
+ return NULL;
+ }
+ return &static_pattern;
+ }
+ }
+ }
+
+ /* Lookup in the list. the list contain only IPv4 patterns */
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ /* The input sample is IPv4, use it as is. */
+ if (smp->data.type == SMP_T_IPV4) {
+ v4 = smp->data.u.ipv4.s_addr;
+ }
+ else if (smp->data.type == SMP_T_IPV6) {
+ /* v4 match on a V6 sample. We want to check at least for
+ * the following forms :
+ * - ::ffff:ip:v4 (ipv4 mapped)
+ * - ::0000:ip:v4 (old ipv4 mapped)
+ * - 2002:ip:v4:: (6to4)
+ */
+ if (read_u64(&smp->data.u.ipv6.s6_addr[0]) == 0 &&
+ (read_u32(&smp->data.u.ipv6.s6_addr[8]) == 0 ||
+ read_u32(&smp->data.u.ipv6.s6_addr[8]) == htonl(0xFFFF))) {
+ v4 = read_u32(&smp->data.u.ipv6.s6_addr[12]);
+ }
+ else if (read_u16(&smp->data.u.ipv6.s6_addr[0]) == htons(0x2002)) {
+ v4 = htonl((ntohs(read_u16(&smp->data.u.ipv6.s6_addr[2])) << 16) +
+ ntohs(read_u16(&smp->data.u.ipv6.s6_addr[4])));
+ }
+ else
+ continue;
+ } else {
+ /* impossible */
+ continue;
+ }
+
+ /* Check if the input sample match the current pattern. */
+ if (((v4 ^ pattern->val.ipv4.addr.s_addr) & pattern->val.ipv4.mask.s_addr) == 0)
+ return pattern;
+ }
+ return NULL;
+}
+
+/* finds the pattern holding <list> from list head <head> and deletes it.
+ * This is made for use for pattern removal within an expression.
+ */
+static void pat_unlink_from_head(void **head, void **list)
+{
+ while (*head) {
+ if (*head == list) {
+ *head = *list;
+ return;
+ }
+ head = *head;
+ }
+}
+
+void free_pattern_tree(struct eb_root *root)
+{
+ struct eb_node *node, *next;
+ struct pattern_tree *elt;
+
+ node = eb_first(root);
+ while (node) {
+ next = eb_next(node);
+ eb_delete(node);
+ elt = container_of(node, struct pattern_tree, node);
+ pat_unlink_from_head(&elt->ref->tree_head, &elt->from_ref);
+ free(elt->data);
+ free(elt);
+ node = next;
+ }
+}
+
+void pat_prune_gen(struct pattern_expr *expr)
+{
+ struct pattern_list *pat, *tmp;
+
+ list_for_each_entry_safe(pat, tmp, &expr->patterns, list) {
+ LIST_DELETE(&pat->list);
+ pat_unlink_from_head(&pat->pat.ref->list_head, &pat->from_ref);
+ if (pat->pat.sflags & PAT_SF_REGFREE)
+ regex_free(pat->pat.ptr.ptr);
+ else
+ free(pat->pat.ptr.ptr);
+ free(pat->pat.data);
+ free(pat);
+ }
+
+ free_pattern_tree(&expr->pattern_tree);
+ free_pattern_tree(&expr->pattern_tree_2);
+ LIST_INIT(&expr->patterns);
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt = 0;
+}
+
+/*
+ *
+ * The following functions are used for the pattern indexation
+ *
+ */
+
+int pat_idx_list_val(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ struct pattern_list *patl;
+
+ /* allocate pattern */
+ patl = calloc(1, sizeof(*patl));
+ if (!patl) {
+ memprintf(err, "out of memory while indexing pattern");
+ return 0;
+ }
+
+ /* duplicate pattern */
+ memcpy(&patl->pat, pat, sizeof(*pat));
+
+ /* chain pattern in the expression */
+ LIST_APPEND(&expr->patterns, &patl->list);
+ /* and from the reference */
+ patl->from_ref = pat->ref->list_head;
+ pat->ref->list_head = &patl->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+}
+
+int pat_idx_list_ptr(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ struct pattern_list *patl;
+
+ /* allocate pattern */
+ patl = calloc(1, sizeof(*patl));
+ if (!patl) {
+ memprintf(err, "out of memory while indexing pattern");
+ return 0;
+ }
+
+ /* duplicate pattern */
+ memcpy(&patl->pat, pat, sizeof(*pat));
+ patl->pat.ptr.ptr = malloc(patl->pat.len);
+ if (!patl->pat.ptr.ptr) {
+ free(patl);
+ memprintf(err, "out of memory while indexing pattern");
+ return 0;
+ }
+ memcpy(patl->pat.ptr.ptr, pat->ptr.ptr, pat->len);
+
+ /* chain pattern in the expression */
+ LIST_APPEND(&expr->patterns, &patl->list);
+ /* and from the reference */
+ patl->from_ref = pat->ref->list_head;
+ pat->ref->list_head = &patl->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+}
+
+int pat_idx_list_str(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ struct pattern_list *patl;
+
+ /* allocate pattern */
+ patl = calloc(1, sizeof(*patl));
+ if (!patl) {
+ memprintf(err, "out of memory while indexing pattern");
+ return 0;
+ }
+
+ /* duplicate pattern */
+ memcpy(&patl->pat, pat, sizeof(*pat));
+ patl->pat.ptr.str = malloc(patl->pat.len + 1);
+ if (!patl->pat.ptr.str) {
+ free(patl);
+ memprintf(err, "out of memory while indexing pattern");
+ return 0;
+ }
+ memcpy(patl->pat.ptr.ptr, pat->ptr.ptr, pat->len);
+ patl->pat.ptr.str[patl->pat.len] = '\0';
+
+ /* chain pattern in the expression */
+ LIST_APPEND(&expr->patterns, &patl->list);
+ /* and from the reference */
+ patl->from_ref = pat->ref->list_head;
+ pat->ref->list_head = &patl->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+}
+
+int pat_idx_list_reg_cap(struct pattern_expr *expr, struct pattern *pat, int cap, char **err)
+{
+ struct pattern_list *patl;
+
+ /* allocate pattern */
+ patl = calloc(1, sizeof(*patl));
+ if (!patl) {
+ memprintf(err, "out of memory while indexing pattern");
+ return 0;
+ }
+
+ /* duplicate pattern */
+ memcpy(&patl->pat, pat, sizeof(*pat));
+
+ /* compile regex */
+ patl->pat.sflags |= PAT_SF_REGFREE;
+ if (!(patl->pat.ptr.reg = regex_comp(pat->ptr.str, !(expr->mflags & PAT_MF_IGNORE_CASE),
+ cap, err))) {
+ free(patl);
+ return 0;
+ }
+
+ /* chain pattern in the expression */
+ LIST_APPEND(&expr->patterns, &patl->list);
+ /* and from the reference */
+ patl->from_ref = pat->ref->list_head;
+ pat->ref->list_head = &patl->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+}
+
+int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ return pat_idx_list_reg_cap(expr, pat, 0, err);
+}
+
+int pat_idx_list_regm(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ return pat_idx_list_reg_cap(expr, pat, 1, err);
+}
+
+int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ unsigned int mask;
+ struct pattern_tree *node;
+
+ /* Only IPv4 can be indexed */
+ if (pat->type == SMP_T_IPV4) {
+ /* in IPv4 case, check if the mask is contiguous so that we can
+ * insert the network into the tree. A continuous mask has only
+ * ones on the left. This means that this mask + its lower bit
+ * added once again is null.
+ */
+ mask = ntohl(pat->val.ipv4.mask.s_addr);
+ if (mask + (mask & -mask) == 0) {
+ mask = mask ? 33 - flsnz(mask & -mask) : 0; /* equals cidr value */
+
+ /* node memory allocation */
+ node = calloc(1, sizeof(*node) + 4);
+ if (!node) {
+ memprintf(err, "out of memory while loading pattern");
+ return 0;
+ }
+
+ /* copy the pointer to sample associated to this node */
+ node->data = pat->data;
+ node->ref = pat->ref;
+
+ /* FIXME: insert <addr>/<mask> into the tree here */
+ memcpy(node->node.key, &pat->val.ipv4.addr, 4); /* network byte order */
+ node->node.node.pfx = mask;
+
+ /* Insert the entry. */
+ ebmb_insert_prefix(&expr->pattern_tree, &node->node, 4);
+ node->from_ref = pat->ref->tree_head;
+ pat->ref->tree_head = &node->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+ }
+ else {
+ /* If the mask is not contiguous, just add the pattern to the list */
+ return pat_idx_list_val(expr, pat, err);
+ }
+ }
+ else if (pat->type == SMP_T_IPV6) {
+ /* IPv6 also can be indexed */
+ node = calloc(1, sizeof(*node) + 16);
+ if (!node) {
+ memprintf(err, "out of memory while loading pattern");
+ return 0;
+ }
+
+ /* copy the pointer to sample associated to this node */
+ node->data = pat->data;
+ node->ref = pat->ref;
+
+ /* FIXME: insert <addr>/<mask> into the tree here */
+ memcpy(node->node.key, &pat->val.ipv6.addr, 16); /* network byte order */
+ node->node.node.pfx = pat->val.ipv6.mask;
+
+ /* Insert the entry. */
+ ebmb_insert_prefix(&expr->pattern_tree_2, &node->node, 16);
+ node->from_ref = pat->ref->tree_head;
+ pat->ref->tree_head = &node->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+ }
+
+ return 0;
+}
+
+int pat_idx_tree_str(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ int len;
+ struct pattern_tree *node;
+
+ /* Only string can be indexed */
+ if (pat->type != SMP_T_STR) {
+ memprintf(err, "internal error: string expected, but the type is '%s'",
+ smp_to_type[pat->type]);
+ return 0;
+ }
+
+ /* If the flag PAT_F_IGNORE_CASE is set, we cannot use trees */
+ if (expr->mflags & PAT_MF_IGNORE_CASE)
+ return pat_idx_list_str(expr, pat, err);
+
+ /* Process the key len */
+ len = strlen(pat->ptr.str) + 1;
+
+ /* node memory allocation */
+ node = calloc(1, sizeof(*node) + len);
+ if (!node) {
+ memprintf(err, "out of memory while loading pattern");
+ return 0;
+ }
+
+ /* copy the pointer to sample associated to this node */
+ node->data = pat->data;
+ node->ref = pat->ref;
+
+ /* copy the string */
+ memcpy(node->node.key, pat->ptr.str, len);
+
+ /* index the new node */
+ ebst_insert(&expr->pattern_tree, &node->node);
+ node->from_ref = pat->ref->tree_head;
+ pat->ref->tree_head = &node->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+}
+
+int pat_idx_tree_pfx(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ int len;
+ struct pattern_tree *node;
+
+ /* Only string can be indexed */
+ if (pat->type != SMP_T_STR) {
+ memprintf(err, "internal error: string expected, but the type is '%s'",
+ smp_to_type[pat->type]);
+ return 0;
+ }
+
+ /* If the flag PAT_F_IGNORE_CASE is set, we cannot use trees */
+ if (expr->mflags & PAT_MF_IGNORE_CASE)
+ return pat_idx_list_str(expr, pat, err);
+
+ /* Process the key len */
+ len = strlen(pat->ptr.str);
+
+ /* node memory allocation */
+ node = calloc(1, sizeof(*node) + len + 1);
+ if (!node) {
+ memprintf(err, "out of memory while loading pattern");
+ return 0;
+ }
+
+ /* copy the pointer to sample associated to this node */
+ node->data = pat->data;
+ node->ref = pat->ref;
+
+ /* copy the string and the trailing zero */
+ memcpy(node->node.key, pat->ptr.str, len + 1);
+ node->node.node.pfx = len * 8;
+
+ /* index the new node */
+ ebmb_insert_prefix(&expr->pattern_tree, &node->node, len);
+ node->from_ref = pat->ref->tree_head;
+ pat->ref->tree_head = &node->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+}
+
+/* Deletes all patterns from reference <elt>. Note that all of their
+ * expressions must be locked, and the pattern lock must be held as well.
+ */
+void pat_delete_gen(struct pat_ref *ref, struct pat_ref_elt *elt)
+{
+ struct pattern_tree *tree;
+ struct pattern_list *pat;
+ void **node;
+
+ /* delete all known tree nodes. They are all allocated inline */
+ for (node = elt->tree_head; node;) {
+ tree = container_of(node, struct pattern_tree, from_ref);
+ node = *node;
+ BUG_ON(tree->ref != elt);
+
+ ebmb_delete(&tree->node);
+ free(tree->data);
+ free(tree);
+ }
+
+ /* delete all list nodes and free their pattern entries (str/reg) */
+ for (node = elt->list_head; node;) {
+ pat = container_of(node, struct pattern_list, from_ref);
+ node = *node;
+ BUG_ON(pat->pat.ref != elt);
+
+ /* Delete and free entry. */
+ LIST_DELETE(&pat->list);
+ if (pat->pat.sflags & PAT_SF_REGFREE)
+ regex_free(pat->pat.ptr.reg);
+ else
+ free(pat->pat.ptr.ptr);
+ free(pat->pat.data);
+ free(pat);
+ }
+
+ /* update revision number to refresh the cache */
+ ref->revision = rdtsc();
+ ref->entry_cnt--;
+ elt->tree_head = NULL;
+ elt->list_head = NULL;
+}
+
+void pattern_init_expr(struct pattern_expr *expr)
+{
+ LIST_INIT(&expr->patterns);
+ expr->pattern_tree = EB_ROOT;
+ expr->pattern_tree_2 = EB_ROOT;
+}
+
+void pattern_init_head(struct pattern_head *head)
+{
+ LIST_INIT(&head->head);
+}
+
+/* The following functions are relative to the management of the reference
+ * lists. These lists are used to store the original pattern and associated
+ * value as string form.
+ *
+ * This is used with modifiable ACL and MAPS
+ *
+ * The pattern reference are stored with two identifiers: the unique_id and
+ * the reference.
+ *
+ * The reference identify a file. Each file with the same name point to the
+ * same reference. We can register many times one file. If the file is modified,
+ * all his dependencies are also modified. The reference can be used with map or
+ * acl.
+ *
+ * The unique_id identify inline acl. The unique id is unique for each acl.
+ * You cannot force the same id in the configuration file, because this repoort
+ * an error.
+ *
+ * A particular case appears if the filename is a number. In this case, the
+ * unique_id is set with the number represented by the filename and the
+ * reference is also set. This method prevent double unique_id.
+ *
+ */
+
+/* This function looks up a reference by name. If the reference is found, a
+ * pointer to the struct pat_ref is returned, otherwise NULL is returned.
+ */
+struct pat_ref *pat_ref_lookup(const char *reference)
+{
+ struct pat_ref *ref;
+
+ list_for_each_entry(ref, &pattern_reference, list)
+ if (ref->reference && strcmp(reference, ref->reference) == 0)
+ return ref;
+ return NULL;
+}
+
+/* This function looks up a reference's unique id. If the reference is found, a
+ * pointer to the struct pat_ref is returned, otherwise NULL is returned.
+ */
+struct pat_ref *pat_ref_lookupid(int unique_id)
+{
+ struct pat_ref *ref;
+
+ list_for_each_entry(ref, &pattern_reference, list)
+ if (ref->unique_id == unique_id)
+ return ref;
+ return NULL;
+}
+
+/* This function removes from the pattern reference <ref> all the patterns
+ * attached to the reference element <elt>, and the element itself. The
+ * reference must be locked.
+ */
+void pat_ref_delete_by_ptr(struct pat_ref *ref, struct pat_ref_elt *elt)
+{
+ struct pattern_expr *expr;
+ struct bref *bref, *back;
+
+ /*
+ * we have to unlink all watchers from this reference pattern. We must
+ * not relink them if this elt was the last one in the list.
+ */
+ list_for_each_entry_safe(bref, back, &elt->back_refs, users) {
+ LIST_DELETE(&bref->users);
+ LIST_INIT(&bref->users);
+ if (elt->list.n != &ref->head)
+ LIST_APPEND(&LIST_ELEM(elt->list.n, typeof(elt), list)->back_refs, &bref->users);
+ bref->ref = elt->list.n;
+ }
+
+ /* delete all entries from all expressions for this pattern */
+ list_for_each_entry(expr, &ref->pat, list)
+ HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
+
+ pat_delete_gen(ref, elt);
+
+ list_for_each_entry(expr, &ref->pat, list)
+ HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
+
+ LIST_DELETE(&elt->list);
+ free(elt->sample);
+ free(elt->pattern);
+ free(elt);
+}
+
+/* This function removes all the patterns matching the pointer <refelt> from
+ * the reference and from each expr member of this reference. This function
+ * returns 1 if the entry was found and deleted, otherwise zero.
+ */
+int pat_ref_delete_by_id(struct pat_ref *ref, struct pat_ref_elt *refelt)
+{
+ struct pat_ref_elt *elt, *safe;
+
+ /* delete pattern from reference */
+ list_for_each_entry_safe(elt, safe, &ref->head, list) {
+ if (elt == refelt) {
+ pat_ref_delete_by_ptr(ref, elt);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* This function removes all patterns matching <key> from the reference
+ * and from each expr member of the reference. This function returns 1
+ * if the deletion is done and returns 0 is the entry is not found.
+ */
+int pat_ref_delete(struct pat_ref *ref, const char *key)
+{
+ struct pat_ref_elt *elt, *safe;
+ int found = 0;
+
+ /* delete pattern from reference */
+ list_for_each_entry_safe(elt, safe, &ref->head, list) {
+ if (strcmp(key, elt->pattern) == 0) {
+ pat_ref_delete_by_ptr(ref, elt);
+ found = 1;
+ }
+ }
+
+ return found;
+}
+
+/*
+ * find and return an element <elt> matching <key> in a reference <ref>
+ * return NULL if not found
+ */
+struct pat_ref_elt *pat_ref_find_elt(struct pat_ref *ref, const char *key)
+{
+ struct pat_ref_elt *elt;
+
+ list_for_each_entry(elt, &ref->head, list) {
+ if (strcmp(key, elt->pattern) == 0)
+ return elt;
+ }
+
+ return NULL;
+}
+
+
+/* This function modifies the sample of pat_ref_elt <elt> in all expressions
+ * found under <ref> to become <value>. It is assumed that the caller has
+ * already verified that <elt> belongs to <ref>.
+ */
+static inline int pat_ref_set_elt(struct pat_ref *ref, struct pat_ref_elt *elt,
+ const char *value, char **err)
+{
+ struct pattern_expr *expr;
+ struct sample_data **data;
+ char *sample;
+ struct sample_data test;
+
+ /* Try all needed converters. */
+ list_for_each_entry(expr, &ref->pat, list) {
+ if (!expr->pat_head->parse_smp)
+ continue;
+
+ if (!expr->pat_head->parse_smp(value, &test)) {
+ memprintf(err, "unable to parse '%s'", value);
+ return 0;
+ }
+ }
+
+ /* Modify pattern from reference. */
+ sample = strdup(value);
+ if (!sample) {
+ memprintf(err, "out of memory error");
+ return 0;
+ }
+ /* Load sample in each reference. All the conversions are tested
+ * below, normally these calls don't fail.
+ */
+ list_for_each_entry(expr, &ref->pat, list) {
+ if (!expr->pat_head->parse_smp)
+ continue;
+
+ HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
+ data = pattern_find_smp(expr, elt);
+ if (data && *data && !expr->pat_head->parse_smp(sample, *data))
+ *data = NULL;
+ HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
+ }
+
+ /* free old sample only when all exprs are updated */
+ free(elt->sample);
+ elt->sample = sample;
+
+
+ return 1;
+}
+
+/* This function modifies the sample of pat_ref_elt <refelt> in all expressions
+ * found under <ref> to become <value>, after checking that <refelt> really
+ * belongs to <ref>.
+ */
+int pat_ref_set_by_id(struct pat_ref *ref, struct pat_ref_elt *refelt, const char *value, char **err)
+{
+ struct pat_ref_elt *elt;
+
+ /* Look for pattern in the reference. */
+ list_for_each_entry(elt, &ref->head, list) {
+ if (elt == refelt) {
+ if (!pat_ref_set_elt(ref, elt, value, err))
+ return 0;
+ return 1;
+ }
+ }
+
+ memprintf(err, "key or pattern not found");
+ return 0;
+}
+
+/* This function modifies to <value> the sample of all patterns matching <key>
+ * under <ref>.
+ */
+int pat_ref_set(struct pat_ref *ref, const char *key, const char *value, char **err)
+{
+ struct pat_ref_elt *elt;
+ int found = 0;
+ char *_merr;
+ char **merr;
+
+ if (err) {
+ merr = &_merr;
+ *merr = NULL;
+ }
+ else
+ merr = NULL;
+
+ /* Look for pattern in the reference. */
+ list_for_each_entry(elt, &ref->head, list) {
+ if (strcmp(key, elt->pattern) == 0) {
+ if (!pat_ref_set_elt(ref, elt, value, merr)) {
+ if (err && merr) {
+ if (!found) {
+ *err = *merr;
+ } else {
+ memprintf(err, "%s, %s", *err, *merr);
+ ha_free(merr);
+ }
+ }
+ }
+ found = 1;
+ }
+ }
+
+ if (!found) {
+ memprintf(err, "entry not found");
+ return 0;
+ }
+ return 1;
+}
+
+/* This function creates a new reference. <ref> is the reference name.
+ * <flags> are PAT_REF_*. /!\ The reference is not checked, and must
+ * be unique. The user must check the reference with "pat_ref_lookup()"
+ * before calling this function. If the function fails, it returns NULL,
+ * otherwise it returns the new struct pat_ref.
+ */
+struct pat_ref *pat_ref_new(const char *reference, const char *display, unsigned int flags)
+{
+ struct pat_ref *ref;
+
+ ref = calloc(1, sizeof(*ref));
+ if (!ref)
+ return NULL;
+
+ if (display) {
+ ref->display = strdup(display);
+ if (!ref->display) {
+ free(ref);
+ return NULL;
+ }
+ }
+
+ ref->reference = strdup(reference);
+ if (!ref->reference) {
+ free(ref->display);
+ free(ref);
+ return NULL;
+ }
+
+ ref->flags = flags;
+ ref->unique_id = -1;
+ ref->revision = 0;
+ ref->entry_cnt = 0;
+
+ LIST_INIT(&ref->head);
+ LIST_INIT(&ref->pat);
+ HA_SPIN_INIT(&ref->lock);
+ LIST_APPEND(&pattern_reference, &ref->list);
+
+ return ref;
+}
+
+/* This function creates a new reference. <unique_id> is the unique id. If
+ * the value of <unique_id> is -1, the unique id is calculated later.
+ * <flags> are PAT_REF_*. /!\ The reference is not checked, and must
+ * be unique. The user must check the reference with "pat_ref_lookup()"
+ * or pat_ref_lookupid before calling this function. If the function
+ * fails, it returns NULL, otherwise it returns the new struct pat_ref.
+ */
+struct pat_ref *pat_ref_newid(int unique_id, const char *display, unsigned int flags)
+{
+ struct pat_ref *ref;
+
+ ref = calloc(1, sizeof(*ref));
+ if (!ref)
+ return NULL;
+
+ if (display) {
+ ref->display = strdup(display);
+ if (!ref->display) {
+ free(ref);
+ return NULL;
+ }
+ }
+
+ ref->reference = NULL;
+ ref->flags = flags;
+ ref->curr_gen = 0;
+ ref->next_gen = 0;
+ ref->unique_id = unique_id;
+ LIST_INIT(&ref->head);
+ LIST_INIT(&ref->pat);
+ HA_SPIN_INIT(&ref->lock);
+ LIST_APPEND(&pattern_reference, &ref->list);
+
+ return ref;
+}
+
+/* This function adds entry to <ref>. It can fail on memory error. It returns
+ * the newly added element on success, or NULL on failure. The PATREF_LOCK on
+ * <ref> must be held. It sets the newly created pattern's generation number
+ * to the same value as the reference's.
+ */
+struct pat_ref_elt *pat_ref_append(struct pat_ref *ref, const char *pattern, const char *sample, int line)
+{
+ struct pat_ref_elt *elt;
+
+ elt = calloc(1, sizeof(*elt));
+ if (!elt)
+ goto fail;
+
+ elt->gen_id = ref->curr_gen;
+ elt->line = line;
+
+ elt->pattern = strdup(pattern);
+ if (!elt->pattern)
+ goto fail;
+
+ if (sample) {
+ elt->sample = strdup(sample);
+ if (!elt->sample)
+ goto fail;
+ }
+
+ LIST_INIT(&elt->back_refs);
+ elt->list_head = NULL;
+ elt->tree_head = NULL;
+ LIST_APPEND(&ref->head, &elt->list);
+ return elt;
+ fail:
+ if (elt)
+ free(elt->pattern);
+ free(elt);
+ return NULL;
+}
+
+/* This function creates sample found in <elt>, parses the pattern also
+ * found in <elt> and inserts it in <expr>. The function copies <patflags>
+ * into <expr>. If the function fails, it returns 0 and <err> is filled.
+ * In success case, the function returns 1.
+ */
+int pat_ref_push(struct pat_ref_elt *elt, struct pattern_expr *expr,
+ int patflags, char **err)
+{
+ struct sample_data *data;
+ struct pattern pattern;
+
+ /* Create sample */
+ if (elt->sample && expr->pat_head->parse_smp) {
+ /* New sample. */
+ data = malloc(sizeof(*data));
+ if (!data)
+ return 0;
+
+ /* Parse value. */
+ if (!expr->pat_head->parse_smp(elt->sample, data)) {
+ memprintf(err, "unable to parse '%s'", elt->sample);
+ free(data);
+ return 0;
+ }
+
+ }
+ else
+ data = NULL;
+
+ /* initialise pattern */
+ memset(&pattern, 0, sizeof(pattern));
+ pattern.data = data;
+ pattern.ref = elt;
+
+ /* parse pattern */
+ if (!expr->pat_head->parse(elt->pattern, &pattern, expr->mflags, err)) {
+ free(data);
+ return 0;
+ }
+
+ HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
+ /* index pattern */
+ if (!expr->pat_head->index(expr, &pattern, err)) {
+ HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
+ free(data);
+ return 0;
+ }
+ HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
+
+ return 1;
+}
+
+/* This function tries to commit entry <elt> into <ref>. The new entry must
+ * have already been inserted using pat_ref_append(), and its generation number
+ * may have been adjusted as it will not be changed. <err> must point to a NULL
+ * pointer. The PATREF lock on <ref> must be held. All the pattern_expr for
+ * this reference will be updated (parsing, indexing). On success, non-zero is
+ * returned. On failure, all the operation is rolled back (the element is
+ * deleted from all expressions and is freed), zero is returned and the error
+ * pointer <err> may have been updated (and the caller must free it). Failure
+ * causes include memory allocation, parsing error or indexing error.
+ */
+int pat_ref_commit_elt(struct pat_ref *ref, struct pat_ref_elt *elt, char **err)
+{
+ struct pattern_expr *expr;
+
+ list_for_each_entry(expr, &ref->pat, list) {
+ if (!pat_ref_push(elt, expr, 0, err)) {
+ pat_ref_delete_by_ptr(ref, elt);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/* Loads <pattern>:<sample> into <ref> for generation <gen>. <sample> may be
+ * NULL if none exists (e.g. ACL). If not needed, the generation number should
+ * be set to ref->curr_gen. The error pointer must initially point to NULL. The
+ * new entry will be propagated to all use places, involving allocation, parsing
+ * and indexing. On error (parsing, allocation), the operation will be rolled
+ * back, an error may be reported, and NULL will be reported. On success, the
+ * freshly allocated element will be returned. The PATREF lock on <ref> must be
+ * held during the operation.
+ */
+struct pat_ref_elt *pat_ref_load(struct pat_ref *ref, unsigned int gen,
+ const char *pattern, const char *sample,
+ int line, char **err)
+{
+ struct pat_ref_elt *elt;
+
+ elt = pat_ref_append(ref, pattern, sample, line);
+ if (elt) {
+ elt->gen_id = gen;
+ if (!pat_ref_commit_elt(ref, elt, err))
+ elt = NULL;
+ } else
+ memprintf(err, "out of memory error");
+
+ return elt;
+}
+
+/* This function adds entry to <ref>. It can fail on memory error. The new
+ * entry is added at all the pattern_expr registered in this reference. The
+ * function stops on the first error encountered. It returns 0 and <err> is
+ * filled. If an error is encountered, the complete add operation is cancelled.
+ * If the insertion is a success the function returns 1.
+ */
+int pat_ref_add(struct pat_ref *ref,
+ const char *pattern, const char *sample,
+ char **err)
+{
+ return !!pat_ref_load(ref, ref->curr_gen, pattern, sample, -1, err);
+}
+
+/* This function purges all elements from <ref> whose generation is included in
+ * the range of <from> to <to> (inclusive), taking wrapping into consideration.
+ * It will not purge more than <budget> entries at once, in order to remain
+ * responsive. If budget is negative, no limit is applied.
+ * The caller must already hold the PATREF_LOCK on <ref>. The function will
+ * take the PATEXP_LOCK on all expressions of the pattern as needed. It returns
+ * non-zero on completion, or zero if it had to stop before the end after
+ * <budget> was depleted.
+ */
+int pat_ref_purge_range(struct pat_ref *ref, uint from, uint to, int budget)
+{
+ struct pat_ref_elt *elt, *elt_bck;
+ struct bref *bref, *bref_bck;
+ struct pattern_expr *expr;
+ int done;
+
+ list_for_each_entry(expr, &ref->pat, list)
+ HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
+
+ /* all expr are locked, we can safely remove all pat_ref */
+
+ /* assume completion for e.g. empty lists */
+ done = 1;
+ list_for_each_entry_safe(elt, elt_bck, &ref->head, list) {
+ if (elt->gen_id - from > to - from)
+ continue;
+
+ if (budget >= 0 && !budget--) {
+ done = 0;
+ break;
+ }
+
+ /*
+ * we have to unlink all watchers from this reference pattern. We must
+ * not relink them if this elt was the last one in the list.
+ */
+ list_for_each_entry_safe(bref, bref_bck, &elt->back_refs, users) {
+ LIST_DELETE(&bref->users);
+ LIST_INIT(&bref->users);
+ if (elt->list.n != &ref->head)
+ LIST_APPEND(&LIST_ELEM(elt->list.n, typeof(elt), list)->back_refs, &bref->users);
+ bref->ref = elt->list.n;
+ }
+
+ /* delete the storage for all representations of this pattern. */
+ pat_delete_gen(ref, elt);
+
+ LIST_DELETE(&elt->list);
+ free(elt->pattern);
+ free(elt->sample);
+ free(elt);
+ }
+
+ list_for_each_entry(expr, &ref->pat, list)
+ HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
+
+#if defined(HA_HAVE_MALLOC_TRIM)
+ if (done) {
+ malloc_trim(0);
+ }
+#endif
+
+ return done;
+}
+
+/* This function prunes all entries of <ref> and all their associated
+ * pattern_expr. It may return before the end of the list is reached,
+ * returning 0, to yield, indicating to the caller that it must call it again.
+ * until it returns non-zero. All patterns are purged, both current ones and
+ * future or incomplete ones. This is used by "clear map" or "clear acl".
+ */
+int pat_ref_prune(struct pat_ref *ref)
+{
+ return pat_ref_purge_range(ref, 0, ~0, 100);
+}
+
+/* This function looks up any existing reference <ref> in pattern_head <head>, and
+ * returns the associated pattern_expr pointer if found, otherwise NULL.
+ */
+struct pattern_expr *pattern_lookup_expr(struct pattern_head *head, struct pat_ref *ref)
+{
+ struct pattern_expr_list *expr;
+
+ list_for_each_entry(expr, &head->head, list)
+ if (expr->expr->ref == ref)
+ return expr->expr;
+ return NULL;
+}
+
+/* This function creates new pattern_expr associated to the reference <ref>.
+ * <ref> can be NULL. If an error occurs, the function returns NULL and
+ * <err> is filled. Otherwise, the function returns new pattern_expr linked
+ * with <head> and <ref>.
+ *
+ * The returned value can be an already filled pattern list, in this case the
+ * flag <reuse> is set.
+ */
+struct pattern_expr *pattern_new_expr(struct pattern_head *head, struct pat_ref *ref,
+ int patflags, char **err, int *reuse)
+{
+ struct pattern_expr *expr;
+ struct pattern_expr_list *list;
+
+ if (reuse)
+ *reuse = 0;
+
+ /* Memory and initialization of the chain element. */
+ list = calloc(1, sizeof(*list));
+ if (!list) {
+ memprintf(err, "out of memory");
+ return NULL;
+ }
+
+ /* Look for existing similar expr. No that only the index, parse and
+ * parse_smp function must be identical for having similar pattern.
+ * The other function depends of these first.
+ */
+ if (ref) {
+ list_for_each_entry(expr, &ref->pat, list)
+ if (expr->pat_head->index == head->index &&
+ expr->pat_head->parse == head->parse &&
+ expr->pat_head->parse_smp == head->parse_smp &&
+ expr->mflags == patflags)
+ break;
+ if (&expr->list == &ref->pat)
+ expr = NULL;
+ }
+ else
+ expr = NULL;
+
+ /* If no similar expr was found, we create new expr. */
+ if (!expr) {
+ /* Get a lot of memory for the expr struct. */
+ expr = calloc(1, sizeof(*expr));
+ if (!expr) {
+ free(list);
+ memprintf(err, "out of memory");
+ return NULL;
+ }
+
+ /* Initialize this new expr. */
+ pattern_init_expr(expr);
+
+ /* Copy the pattern matching and indexing flags. */
+ expr->mflags = patflags;
+
+ /* This new pattern expression reference one of his heads. */
+ expr->pat_head = head;
+
+ /* Link with ref, or to self to facilitate LIST_DELETE() */
+ if (ref)
+ LIST_APPEND(&ref->pat, &expr->list);
+ else
+ LIST_INIT(&expr->list);
+
+ expr->ref = ref;
+
+ HA_RWLOCK_INIT(&expr->lock);
+
+ /* We must free this pattern if it is no more used. */
+ list->do_free = 1;
+ }
+ else {
+ /* If the pattern used already exists, it is already linked
+ * with ref and we must not free it.
+ */
+ list->do_free = 0;
+ if (reuse)
+ *reuse = 1;
+ }
+
+ /* The new list element reference the pattern_expr. */
+ list->expr = expr;
+
+ /* Link the list element with the pattern_head. */
+ LIST_APPEND(&head->head, &list->list);
+ return expr;
+}
+
+/* Reads patterns from a file. If <err_msg> is non-NULL, an error message will
+ * be returned there on errors and the caller will have to free it.
+ *
+ * The file contains one key + value per line. Lines which start with '#' are
+ * ignored, just like empty lines. Leading tabs/spaces are stripped. The key is
+ * then the first "word" (series of non-space/tabs characters), and the value is
+ * what follows this series of space/tab till the end of the line excluding
+ * trailing spaces/tabs.
+ *
+ * Example :
+ *
+ * # this is a comment and is ignored
+ * 62.212.114.60 1wt.eu \n
+ * <-><-----------><---><----><---->
+ * | | | | `--- trailing spaces ignored
+ * | | | `-------- value
+ * | | `--------------- middle spaces ignored
+ * | `------------------------ key
+ * `-------------------------------- leading spaces ignored
+ *
+ * Return non-zero in case of success, otherwise 0.
+ */
+int pat_ref_read_from_file_smp(struct pat_ref *ref, const char *filename, char **err)
+{
+ FILE *file;
+ char *c;
+ int ret = 0;
+ int line = 0;
+ char *key_beg;
+ char *key_end;
+ char *value_beg;
+ char *value_end;
+
+ file = fopen(filename, "r");
+ if (!file) {
+ memprintf(err, "failed to open pattern file <%s>", filename);
+ return 0;
+ }
+
+ /* now parse all patterns. The file may contain only one pattern
+ * followed by one value per line. The start spaces, separator spaces
+ * and and spaces are stripped. Each can contain comment started by '#'
+ */
+ while (fgets(trash.area, trash.size, file) != NULL) {
+ line++;
+ c = trash.area;
+
+ /* ignore lines beginning with a dash */
+ if (*c == '#')
+ continue;
+
+ /* strip leading spaces and tabs */
+ while (*c == ' ' || *c == '\t')
+ c++;
+
+ /* empty lines are ignored too */
+ if (*c == '\0' || *c == '\r' || *c == '\n')
+ continue;
+
+ /* look for the end of the key */
+ key_beg = c;
+ while (*c && *c != ' ' && *c != '\t' && *c != '\n' && *c != '\r')
+ c++;
+
+ key_end = c;
+
+ /* strip middle spaces and tabs */
+ while (*c == ' ' || *c == '\t')
+ c++;
+
+ /* look for the end of the value, it is the end of the line */
+ value_beg = c;
+ while (*c && *c != '\n' && *c != '\r')
+ c++;
+ value_end = c;
+
+ /* trim possibly trailing spaces and tabs */
+ while (value_end > value_beg && (value_end[-1] == ' ' || value_end[-1] == '\t'))
+ value_end--;
+
+ /* set final \0 and check entries */
+ *key_end = '\0';
+ *value_end = '\0';
+
+ /* insert values */
+ if (!pat_ref_append(ref, key_beg, value_beg, line)) {
+ memprintf(err, "out of memory");
+ goto out_close;
+ }
+ }
+
+ if (ferror(file)) {
+ memprintf(err, "error encountered while reading <%s> : %s",
+ filename, strerror(errno));
+ goto out_close;
+ }
+ /* success */
+ ret = 1;
+
+ out_close:
+ fclose(file);
+ return ret;
+}
+
+/* Reads patterns from a file. If <err_msg> is non-NULL, an error message will
+ * be returned there on errors and the caller will have to free it.
+ */
+int pat_ref_read_from_file(struct pat_ref *ref, const char *filename, char **err)
+{
+ FILE *file;
+ char *c;
+ char *arg;
+ int ret = 0;
+ int line = 0;
+
+ file = fopen(filename, "r");
+ if (!file) {
+ memprintf(err, "failed to open pattern file <%s>", filename);
+ return 0;
+ }
+
+ /* now parse all patterns. The file may contain only one pattern per
+ * line. If the line contains spaces, they will be part of the pattern.
+ * The pattern stops at the first CR, LF or EOF encountered.
+ */
+ while (fgets(trash.area, trash.size, file) != NULL) {
+ line++;
+ c = trash.area;
+
+ /* ignore lines beginning with a dash */
+ if (*c == '#')
+ continue;
+
+ /* strip leading spaces and tabs */
+ while (*c == ' ' || *c == '\t')
+ c++;
+
+
+ arg = c;
+ while (*c && *c != '\n' && *c != '\r')
+ c++;
+ *c = 0;
+
+ /* empty lines are ignored too */
+ if (c == arg)
+ continue;
+
+ if (!pat_ref_append(ref, arg, NULL, line)) {
+ memprintf(err, "out of memory when loading patterns from file <%s>", filename);
+ goto out_close;
+ }
+ }
+
+ if (ferror(file)) {
+ memprintf(err, "error encountered while reading <%s> : %s",
+ filename, strerror(errno));
+ goto out_close;
+ }
+ ret = 1; /* success */
+
+ out_close:
+ fclose(file);
+ return ret;
+}
+
+int pattern_read_from_file(struct pattern_head *head, unsigned int refflags,
+ const char *filename, int patflags, int load_smp,
+ char **err, const char *file, int line)
+{
+ struct pat_ref *ref;
+ struct pattern_expr *expr;
+ struct pat_ref_elt *elt;
+ int reuse = 0;
+
+ /* Lookup for the existing reference. */
+ ref = pat_ref_lookup(filename);
+
+ /* If the reference doesn't exists, create it and load associated file. */
+ if (!ref) {
+ chunk_printf(&trash,
+ "pattern loaded from file '%s' used by %s at file '%s' line %d",
+ filename, refflags & PAT_REF_MAP ? "map" : "acl", file, line);
+
+ ref = pat_ref_new(filename, trash.area, refflags);
+ if (!ref) {
+ memprintf(err, "out of memory");
+ return 0;
+ }
+
+ if (load_smp) {
+ ref->flags |= PAT_REF_SMP;
+ if (!pat_ref_read_from_file_smp(ref, filename, err))
+ return 0;
+ }
+ else {
+ if (!pat_ref_read_from_file(ref, filename, err))
+ return 0;
+ }
+ }
+ else {
+ /* The reference already exists, check the map compatibility. */
+
+ /* If the load require samples and the flag PAT_REF_SMP is not set,
+ * the reference doesn't contain sample, and cannot be used.
+ */
+ if (load_smp) {
+ if (!(ref->flags & PAT_REF_SMP)) {
+ memprintf(err, "The file \"%s\" is already used as one column file "
+ "and cannot be used by as two column file.",
+ filename);
+ return 0;
+ }
+ }
+ else {
+ /* The load doesn't require samples. If the flag PAT_REF_SMP is
+ * set, the reference contains a sample, and cannot be used.
+ */
+ if (ref->flags & PAT_REF_SMP) {
+ memprintf(err, "The file \"%s\" is already used as two column file "
+ "and cannot be used by as one column file.",
+ filename);
+ return 0;
+ }
+ }
+
+ /* Extends display */
+ chunk_printf(&trash, "%s", ref->display);
+ chunk_appendf(&trash, ", by %s at file '%s' line %d",
+ refflags & PAT_REF_MAP ? "map" : "acl", file, line);
+ free(ref->display);
+ ref->display = strdup(trash.area);
+ if (!ref->display) {
+ memprintf(err, "out of memory");
+ return 0;
+ }
+
+ /* Merge flags. */
+ ref->flags |= refflags;
+ }
+
+ /* Now, we can loading patterns from the reference. */
+
+ /* Lookup for existing reference in the head. If the reference
+ * doesn't exists, create it.
+ */
+ expr = pattern_lookup_expr(head, ref);
+ if (!expr || (expr->mflags != patflags)) {
+ expr = pattern_new_expr(head, ref, patflags, err, &reuse);
+ if (!expr)
+ return 0;
+ }
+
+ /* The returned expression may be not empty, because the function
+ * "pattern_new_expr" lookup for similar pattern list and can
+ * reuse a already filled pattern list. In this case, we can not
+ * reload the patterns.
+ */
+ if (reuse)
+ return 1;
+
+ /* Load reference content in the pattern expression. */
+ list_for_each_entry(elt, &ref->head, list) {
+ if (!pat_ref_push(elt, expr, patflags, err)) {
+ if (elt->line > 0)
+ memprintf(err, "%s at line %d of file '%s'",
+ *err, elt->line, filename);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/* This function executes a pattern match on a sample. It applies pattern <expr>
+ * to sample <smp>. The function returns NULL if the sample don't match. It returns
+ * non-null if the sample match. If <fill> is true and the sample match, the
+ * function returns the matched pattern. In many cases, this pattern can be a
+ * static buffer.
+ */
+struct pattern *pattern_exec_match(struct pattern_head *head, struct sample *smp, int fill)
+{
+ struct pattern_expr_list *list;
+ struct pattern *pat;
+
+ if (!head->match) {
+ if (fill) {
+ static_pattern.data = NULL;
+ static_pattern.ref = NULL;
+ static_pattern.sflags = 0;
+ static_pattern.type = SMP_T_SINT;
+ static_pattern.val.i = 1;
+ }
+ return &static_pattern;
+ }
+
+ /* convert input to string */
+ if (!sample_convert(smp, head->expect_type))
+ return NULL;
+
+ list_for_each_entry(list, &head->head, list) {
+ HA_RWLOCK_RDLOCK(PATEXP_LOCK, &list->expr->lock);
+ pat = head->match(smp, list->expr, fill);
+ if (pat) {
+ /* We duplicate the pattern cause it could be modified
+ by another thread */
+ if (pat != &static_pattern) {
+ memcpy(&static_pattern, pat, sizeof(struct pattern));
+ pat = &static_pattern;
+ }
+
+ /* We also duplicate the sample data for
+ same reason */
+ if (pat->data && (pat->data != &static_sample_data)) {
+ switch(pat->data->type) {
+ case SMP_T_STR:
+ static_sample_data.type = SMP_T_STR;
+ static_sample_data.u.str = *get_trash_chunk();
+ static_sample_data.u.str.data = pat->data->u.str.data;
+ if (static_sample_data.u.str.data >= static_sample_data.u.str.size)
+ static_sample_data.u.str.data = static_sample_data.u.str.size - 1;
+ memcpy(static_sample_data.u.str.area,
+ pat->data->u.str.area, static_sample_data.u.str.data);
+ static_sample_data.u.str.area[static_sample_data.u.str.data] = 0;
+ pat->data = &static_sample_data;
+ break;
+
+ case SMP_T_IPV4:
+ case SMP_T_IPV6:
+ case SMP_T_SINT:
+ memcpy(&static_sample_data, pat->data, sizeof(struct sample_data));
+ pat->data = &static_sample_data;
+ break;
+ default:
+ /* unimplemented pattern type */
+ pat->data = NULL;
+ break;
+ }
+ }
+ HA_RWLOCK_RDUNLOCK(PATEXP_LOCK, &list->expr->lock);
+ return pat;
+ }
+ HA_RWLOCK_RDUNLOCK(PATEXP_LOCK, &list->expr->lock);
+ }
+ return NULL;
+}
+
+/* This function prunes the pattern expressions starting at pattern_head <head>. */
+void pattern_prune(struct pattern_head *head)
+{
+ struct pattern_expr_list *list, *safe;
+
+ list_for_each_entry_safe(list, safe, &head->head, list) {
+ LIST_DELETE(&list->list);
+ if (list->do_free) {
+ LIST_DELETE(&list->expr->list);
+ HA_RWLOCK_WRLOCK(PATEXP_LOCK, &list->expr->lock);
+ head->prune(list->expr);
+ HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &list->expr->lock);
+ free(list->expr);
+ }
+ free(list);
+ }
+}
+
+/* This function searches occurrences of pattern reference element <ref> in
+ * expression <expr> and returns a pointer to a pointer of the sample storage.
+ * If <ref> is not found, NULL is returned.
+ */
+struct sample_data **pattern_find_smp(struct pattern_expr *expr, struct pat_ref_elt *ref)
+{
+ struct ebmb_node *node;
+ struct pattern_tree *elt;
+ struct pattern_list *pat;
+
+ for (node = ebmb_first(&expr->pattern_tree);
+ node;
+ node = ebmb_next(node)) {
+ elt = container_of(node, struct pattern_tree, node);
+ if (elt->ref == ref)
+ return &elt->data;
+ }
+
+ for (node = ebmb_first(&expr->pattern_tree_2);
+ node;
+ node = ebmb_next(node)) {
+ elt = container_of(node, struct pattern_tree, node);
+ if (elt->ref == ref)
+ return &elt->data;
+ }
+
+ list_for_each_entry(pat, &expr->patterns, list)
+ if (pat->pat.ref == ref)
+ return &pat->pat.data;
+
+ return NULL;
+}
+
+/* This function compares two pat_ref** on their unique_id, and returns -1/0/1
+ * depending on their order (suitable for sorting).
+ */
+static int cmp_pat_ref(const void *_a, const void *_b)
+{
+ struct pat_ref * const *a = _a;
+ struct pat_ref * const *b = _b;
+
+ if ((*a)->unique_id < (*b)->unique_id)
+ return -1;
+ else if ((*a)->unique_id > (*b)->unique_id)
+ return 1;
+ return 0;
+}
+
+/* This function finalizes the configuration parsing. It sets all the
+ * automatic ids.
+ */
+int pattern_finalize_config(void)
+{
+ size_t len = 0;
+ size_t unassigned_pos = 0;
+ int next_unique_id = 0;
+ size_t i, j;
+ struct pat_ref *ref, **arr;
+ struct list pr = LIST_HEAD_INIT(pr);
+
+ pat_lru_seed = ha_random();
+
+ /* Count pat_refs with user defined unique_id and totalt count */
+ list_for_each_entry(ref, &pattern_reference, list) {
+ len++;
+ if (ref->unique_id != -1)
+ unassigned_pos++;
+ }
+
+ if (len == 0) {
+ return 0;
+ }
+
+ arr = calloc(len, sizeof(*arr));
+ if (arr == NULL) {
+ ha_alert("Out of memory error.\n");
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ i = 0;
+ j = unassigned_pos;
+ list_for_each_entry(ref, &pattern_reference, list) {
+ if (ref->unique_id != -1)
+ arr[i++] = ref;
+ else
+ arr[j++] = ref;
+ }
+
+ /* Sort first segment of array with user-defined unique ids for
+ * fast lookup when generating unique ids
+ */
+ qsort(arr, unassigned_pos, sizeof(*arr), cmp_pat_ref);
+
+ /* Assign unique ids to the rest of the elements */
+ for (i = unassigned_pos; i < len; i++) {
+ do {
+ arr[i]->unique_id = next_unique_id++;
+ } while (bsearch(&arr[i], arr, unassigned_pos, sizeof(*arr), cmp_pat_ref));
+ }
+
+ /* Sort complete array */
+ qsort(arr, len, sizeof(*arr), cmp_pat_ref);
+
+ /* Convert back to linked list */
+ for (i = 0; i < len; i++)
+ LIST_APPEND(&pr, &arr[i]->list);
+
+ /* swap root */
+ LIST_INSERT(&pr, &pattern_reference);
+ LIST_DELETE(&pr);
+
+ free(arr);
+ return 0;
+}
+
+static int pattern_per_thread_lru_alloc()
+{
+ if (!global.tune.pattern_cache)
+ return 1;
+ pat_lru_tree = lru64_new(global.tune.pattern_cache);
+ return !!pat_lru_tree;
+}
+
+static void pattern_per_thread_lru_free()
+{
+ lru64_destroy(pat_lru_tree);
+}
+
+REGISTER_PER_THREAD_ALLOC(pattern_per_thread_lru_alloc);
+REGISTER_PER_THREAD_FREE(pattern_per_thread_lru_free);
diff --git a/src/payload.c b/src/payload.c
new file mode 100644
index 0000000..04d3a57
--- /dev/null
+++ b/src/payload.c
@@ -0,0 +1,1448 @@
+/*
+ * General protocol-agnostic payload-based sample fetches and ACLs
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/channel.h>
+#include <haproxy/connection.h>
+#include <haproxy/htx.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/pattern.h>
+#include <haproxy/payload.h>
+#include <haproxy/sample.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+
+
+/************************************************************************/
+/* All supported sample fetch functions must be declared here */
+/************************************************************************/
+
+/* wait for more data as long as possible, then return TRUE. This should be
+ * used with content inspection.
+ */
+static int
+smp_fetch_wait_end(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!(smp->opt & SMP_OPT_FINAL)) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 1;
+ return 1;
+}
+
+/* return the number of bytes in the request buffer */
+static int
+smp_fetch_len(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (smp->strm) {
+ struct channel *chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+
+ /* Not accurate but kept for backward compatibility purpose */
+ if (IS_HTX_STRM(smp->strm)) {
+ struct htx *htx = htxbuf(&chn->buf);
+ smp->data.u.sint = htx->data - co_data(chn);
+ }
+ else
+ smp->data.u.sint = ci_data(chn);
+ }
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK) {
+ struct check *check = __objt_check(smp->sess->origin);
+
+ /* Not accurate but kept for backward compatibility purpose */
+ smp->data.u.sint = ((check->sc && IS_HTX_SC(check->sc)) ? (htxbuf(&check->bi))->data: b_data(&check->bi));
+ }
+ else
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns 0 if the client didn't send a SessionTicket Extension
+ * Returns 1 if the client sent SessionTicket Extension
+ * Returns 2 if the client also sent non-zero length SessionTicket
+ * Returns SMP_T_SINT data type
+ */
+static int
+smp_fetch_req_ssl_st_ext(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int hs_len, ext_len, bleft;
+ struct channel *chn;
+ unsigned char *data;
+
+ if (!smp->strm)
+ goto not_ssl_hello;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ goto not_ssl_hello;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+
+
+ bleft = ci_data(chn);
+ data = (unsigned char *)ci_head(chn);
+
+ /* Check for SSL/TLS Handshake */
+ if (!bleft)
+ goto too_short;
+ if (*data != 0x16)
+ goto not_ssl_hello;
+
+ /* Check for SSLv3 or later (SSL version >= 3.0) in the record layer*/
+ if (bleft < 3)
+ goto too_short;
+ if (data[1] < 0x03)
+ goto not_ssl_hello;
+
+ if (bleft < 5)
+ goto too_short;
+ hs_len = (data[3] << 8) + data[4];
+ if (hs_len < 1 + 3 + 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ data += 5; /* enter TLS handshake */
+ bleft -= 5;
+
+ /* Check for a complete client hello starting at <data> */
+ if (bleft < 1)
+ goto too_short;
+ if (data[0] != 0x01) /* msg_type = Client Hello */
+ goto not_ssl_hello;
+
+ /* Check the Hello's length */
+ if (bleft < 4)
+ goto too_short;
+ hs_len = (data[1] << 16) + (data[2] << 8) + data[3];
+ if (hs_len < 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ /* We want the full handshake here */
+ if (bleft < hs_len)
+ goto too_short;
+
+ data += 4;
+ /* Start of the ClientHello message */
+ if (data[0] < 0x03 || data[1] < 0x01) /* TLSv1 minimum */
+ goto not_ssl_hello;
+
+ ext_len = data[34]; /* session_id_len */
+ if (ext_len > 32 || ext_len > (hs_len - 35)) /* check for correct session_id len */
+ goto not_ssl_hello;
+
+ /* Jump to cipher suite */
+ hs_len -= 35 + ext_len;
+ data += 35 + ext_len;
+
+ if (hs_len < 4 || /* minimum one cipher */
+ (ext_len = (data[0] << 8) + data[1]) < 2 || /* minimum 2 bytes for a cipher */
+ ext_len > hs_len)
+ goto not_ssl_hello;
+
+ /* Jump to the compression methods */
+ hs_len -= 2 + ext_len;
+ data += 2 + ext_len;
+
+ if (hs_len < 2 || /* minimum one compression method */
+ data[0] < 1 || data[0] > hs_len) /* minimum 1 bytes for a method */
+ goto not_ssl_hello;
+
+ /* Jump to the extensions */
+ hs_len -= 1 + data[0];
+ data += 1 + data[0];
+
+ if (hs_len < 2 || /* minimum one extension list length */
+ (ext_len = (data[0] << 8) + data[1]) > hs_len - 2) /* list too long */
+ goto not_ssl_hello;
+
+ hs_len = ext_len; /* limit ourselves to the extension length */
+ data += 2;
+
+ while (hs_len >= 4) {
+ int ext_type, ext_len;
+
+ ext_type = (data[0] << 8) + data[1];
+ ext_len = (data[2] << 8) + data[3];
+
+ if (ext_len > hs_len - 4) /* Extension too long */
+ goto not_ssl_hello;
+
+ /* SesstionTicket extension */
+ if (ext_type == 35) {
+ smp->data.type = SMP_T_SINT;
+ /* SessionTicket also present */
+ if (ext_len > 0)
+ smp->data.u.sint = 2;
+ /* SessionTicket absent */
+ else
+ smp->data.u.sint = 1;
+ smp->flags = SMP_F_VOLATILE;
+ return 1;
+ }
+
+ hs_len -= 4 + ext_len;
+ data += 4 + ext_len;
+ }
+ /* SessionTicket Extension not found */
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ smp->flags = SMP_F_VOLATILE;
+ return 1;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE;
+
+ not_ssl_hello:
+ return 0;
+}
+
+/* Returns TRUE if the client sent Supported Elliptic Curves Extension (0x000a)
+ * Mainly used to detect if client supports ECC cipher suites.
+ */
+static int
+smp_fetch_req_ssl_ec_ext(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int hs_len, ext_len, bleft;
+ struct channel *chn;
+ unsigned char *data;
+
+ if (!smp->strm)
+ goto not_ssl_hello;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ goto not_ssl_hello;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ bleft = ci_data(chn);
+ data = (unsigned char *)ci_head(chn);
+
+ /* Check for SSL/TLS Handshake */
+ if (!bleft)
+ goto too_short;
+ if (*data != 0x16)
+ goto not_ssl_hello;
+
+ /* Check for SSLv3 or later (SSL version >= 3.0) in the record layer*/
+ if (bleft < 3)
+ goto too_short;
+ if (data[1] < 0x03)
+ goto not_ssl_hello;
+
+ if (bleft < 5)
+ goto too_short;
+ hs_len = (data[3] << 8) + data[4];
+ if (hs_len < 1 + 3 + 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ data += 5; /* enter TLS handshake */
+ bleft -= 5;
+
+ /* Check for a complete client hello starting at <data> */
+ if (bleft < 1)
+ goto too_short;
+ if (data[0] != 0x01) /* msg_type = Client Hello */
+ goto not_ssl_hello;
+
+ /* Check the Hello's length */
+ if (bleft < 4)
+ goto too_short;
+ hs_len = (data[1] << 16) + (data[2] << 8) + data[3];
+ if (hs_len < 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ /* We want the full handshake here */
+ if (bleft < hs_len)
+ goto too_short;
+
+ data += 4;
+ /* Start of the ClientHello message */
+ if (data[0] < 0x03 || data[1] < 0x01) /* TLSv1 minimum */
+ goto not_ssl_hello;
+
+ ext_len = data[34]; /* session_id_len */
+ if (ext_len > 32 || ext_len > (hs_len - 35)) /* check for correct session_id len */
+ goto not_ssl_hello;
+
+ /* Jump to cipher suite */
+ hs_len -= 35 + ext_len;
+ data += 35 + ext_len;
+
+ if (hs_len < 4 || /* minimum one cipher */
+ (ext_len = (data[0] << 8) + data[1]) < 2 || /* minimum 2 bytes for a cipher */
+ ext_len > hs_len)
+ goto not_ssl_hello;
+
+ /* Jump to the compression methods */
+ hs_len -= 2 + ext_len;
+ data += 2 + ext_len;
+
+ if (hs_len < 2 || /* minimum one compression method */
+ data[0] < 1 || data[0] > hs_len) /* minimum 1 bytes for a method */
+ goto not_ssl_hello;
+
+ /* Jump to the extensions */
+ hs_len -= 1 + data[0];
+ data += 1 + data[0];
+
+ if (hs_len < 2 || /* minimum one extension list length */
+ (ext_len = (data[0] << 8) + data[1]) > hs_len - 2) /* list too long */
+ goto not_ssl_hello;
+
+ hs_len = ext_len; /* limit ourselves to the extension length */
+ data += 2;
+
+ while (hs_len >= 4) {
+ int ext_type, ext_len;
+
+ ext_type = (data[0] << 8) + data[1];
+ ext_len = (data[2] << 8) + data[3];
+
+ if (ext_len > hs_len - 4) /* Extension too long */
+ goto not_ssl_hello;
+
+ /* Elliptic curves extension */
+ if (ext_type == 10) {
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 1;
+ smp->flags = SMP_F_VOLATILE;
+ return 1;
+ }
+
+ hs_len -= 4 + ext_len;
+ data += 4 + ext_len;
+ }
+ /* server name not found */
+ goto not_ssl_hello;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE;
+
+ not_ssl_hello:
+
+ return 0;
+}
+/* returns the type of SSL hello message (mainly used to detect an SSL hello) */
+static int
+smp_fetch_ssl_hello_type(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int hs_len;
+ int hs_type, bleft;
+ struct channel *chn;
+ const unsigned char *data;
+
+ if (!smp->strm)
+ goto not_ssl_hello;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ goto not_ssl_hello;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ bleft = ci_data(chn);
+ data = (const unsigned char *)ci_head(chn);
+
+ if (!bleft)
+ goto too_short;
+
+ if ((*data >= 0x14 && *data <= 0x17) || (*data == 0xFF)) {
+ /* SSLv3 header format */
+ if (bleft < 9)
+ goto too_short;
+
+ /* ssl version 3 */
+ if ((data[1] << 16) + data[2] < 0x00030000)
+ goto not_ssl_hello;
+
+ /* ssl message len must present handshake type and len */
+ if ((data[3] << 8) + data[4] < 4)
+ goto not_ssl_hello;
+
+ /* format introduced with SSLv3 */
+
+ hs_type = (int)data[5];
+ hs_len = ( data[6] << 16 ) + ( data[7] << 8 ) + data[8];
+
+ /* not a full handshake */
+ if (bleft < (9 + hs_len))
+ goto too_short;
+
+ }
+ else {
+ goto not_ssl_hello;
+ }
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = hs_type;
+ smp->flags = SMP_F_VOLATILE;
+
+ return 1;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE;
+
+ not_ssl_hello:
+
+ return 0;
+}
+
+/* Return the version of the SSL protocol in the request. It supports both
+ * SSLv3 (TLSv1) header format for any message, and SSLv2 header format for
+ * the hello message. The SSLv3 format is described in RFC 2246 p49, and the
+ * SSLv2 format is described here, and completed p67 of RFC 2246 :
+ * http://wp.netscape.com/eng/security/SSL_2.html
+ *
+ * Note: this decoder only works with non-wrapping data.
+ */
+static int
+smp_fetch_req_ssl_ver(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int version, bleft, msg_len;
+ const unsigned char *data;
+ struct channel *req;
+
+ if (!smp->strm)
+ goto not_ssl;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ goto not_ssl;
+
+ req = &smp->strm->req;
+ msg_len = 0;
+ bleft = ci_data(req);
+ if (!bleft)
+ goto too_short;
+
+ data = (const unsigned char *)ci_head(req);
+ if ((*data >= 0x14 && *data <= 0x17) || (*data == 0xFF)) {
+ /* SSLv3 header format */
+ if (bleft < 11)
+ goto too_short;
+
+ version = (data[1] << 16) + data[2]; /* record layer version: major, minor */
+ msg_len = (data[3] << 8) + data[4]; /* record length */
+
+ /* format introduced with SSLv3 */
+ if (version < 0x00030000)
+ goto not_ssl;
+
+ /* message length between 6 and 2^14 + 2048 */
+ if (msg_len < 6 || msg_len > ((1<<14) + 2048))
+ goto not_ssl;
+
+ bleft -= 5; data += 5;
+
+ /* return the client hello client version, not the record layer version */
+ version = (data[4] << 16) + data[5]; /* client hello version: major, minor */
+ } else {
+ /* SSLv2 header format, only supported for hello (msg type 1) */
+ int rlen, plen, cilen, silen, chlen;
+
+ if (*data & 0x80) {
+ if (bleft < 3)
+ goto too_short;
+ /* short header format : 15 bits for length */
+ rlen = ((data[0] & 0x7F) << 8) | data[1];
+ plen = 0;
+ bleft -= 2; data += 2;
+ } else {
+ if (bleft < 4)
+ goto too_short;
+ /* long header format : 14 bits for length + pad length */
+ rlen = ((data[0] & 0x3F) << 8) | data[1];
+ plen = data[2];
+ bleft -= 3; data += 3;
+ }
+
+ if (*data != 0x01)
+ goto not_ssl;
+ bleft--; data++;
+
+ if (bleft < 8)
+ goto too_short;
+ version = (data[0] << 16) + data[1]; /* version: major, minor */
+ cilen = (data[2] << 8) + data[3]; /* cipher len, multiple of 3 */
+ silen = (data[4] << 8) + data[5]; /* session_id_len: 0 or 16 */
+ chlen = (data[6] << 8) + data[7]; /* 16<=challenge length<=32 */
+
+ bleft -= 8; data += 8;
+ if (cilen % 3 != 0)
+ goto not_ssl;
+ if (silen && silen != 16)
+ goto not_ssl;
+ if (chlen < 16 || chlen > 32)
+ goto not_ssl;
+ if (rlen != 9 + cilen + silen + chlen)
+ goto not_ssl;
+
+ /* focus on the remaining data length */
+ msg_len = cilen + silen + chlen + plen;
+ }
+ /* We could recursively check that the buffer ends exactly on an SSL
+ * fragment boundary and that a possible next segment is still SSL,
+ * but that's a bit pointless. However, we could still check that
+ * all the part of the request which fits in a buffer is already
+ * there.
+ */
+ if (msg_len > channel_recv_limit(req) + b_orig(&req->buf) - ci_head(req))
+ msg_len = channel_recv_limit(req) + b_orig(&req->buf) - ci_head(req);
+
+ if (bleft < msg_len)
+ goto too_short;
+
+ /* OK that's enough. We have at least the whole message, and we have
+ * the protocol version.
+ */
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = version;
+ smp->flags = SMP_F_VOLATILE;
+ return 1;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE;
+ not_ssl:
+ return 0;
+}
+
+/* Try to extract the Server Name Indication that may be presented in a TLS
+ * client hello handshake message. The format of the message is the following
+ * (cf RFC5246 + RFC6066) :
+ * TLS frame :
+ * - uint8 type = 0x16 (Handshake)
+ * - uint16 version >= 0x0301 (TLSv1)
+ * - uint16 length (frame length)
+ * - TLS handshake :
+ * - uint8 msg_type = 0x01 (ClientHello)
+ * - uint24 length (handshake message length)
+ * - ClientHello :
+ * - uint16 client_version >= 0x0301 (TLSv1)
+ * - uint8 Random[32] (4 first ones are timestamp)
+ * - SessionID :
+ * - uint8 session_id_len (0..32) (SessionID len in bytes)
+ * - uint8 session_id[session_id_len]
+ * - CipherSuite :
+ * - uint16 cipher_len >= 2 (Cipher length in bytes)
+ * - uint16 ciphers[cipher_len/2]
+ * - CompressionMethod :
+ * - uint8 compression_len >= 1 (# of supported methods)
+ * - uint8 compression_methods[compression_len]
+ * - optional client_extension_len (in bytes)
+ * - optional sequence of ClientHelloExtensions (as many bytes as above):
+ * - uint16 extension_type = 0 for server_name
+ * - uint16 extension_len
+ * - opaque extension_data[extension_len]
+ * - uint16 server_name_list_len (# of bytes here)
+ * - opaque server_names[server_name_list_len bytes]
+ * - uint8 name_type = 0 for host_name
+ * - uint16 name_len
+ * - opaque hostname[name_len bytes]
+ */
+static int
+smp_fetch_ssl_hello_sni(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int hs_len, ext_len, bleft;
+ struct channel *chn;
+ unsigned char *data;
+
+ if (!smp->strm)
+ goto not_ssl_hello;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ goto not_ssl_hello;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ bleft = ci_data(chn);
+ data = (unsigned char *)ci_head(chn);
+
+ /* Check for SSL/TLS Handshake */
+ if (!bleft)
+ goto too_short;
+ if (*data != 0x16)
+ goto not_ssl_hello;
+
+ /* Check for SSLv3 or later (SSL version >= 3.0) in the record layer*/
+ if (bleft < 3)
+ goto too_short;
+ if (data[1] < 0x03)
+ goto not_ssl_hello;
+
+ if (bleft < 5)
+ goto too_short;
+ hs_len = (data[3] << 8) + data[4];
+ if (hs_len < 1 + 3 + 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ data += 5; /* enter TLS handshake */
+ bleft -= 5;
+
+ /* Check for a complete client hello starting at <data> */
+ if (bleft < 1)
+ goto too_short;
+ if (data[0] != 0x01) /* msg_type = Client Hello */
+ goto not_ssl_hello;
+
+ /* Check the Hello's length */
+ if (bleft < 4)
+ goto too_short;
+ hs_len = (data[1] << 16) + (data[2] << 8) + data[3];
+ if (hs_len < 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ /* We want the full handshake here */
+ if (bleft < hs_len)
+ goto too_short;
+
+ data += 4;
+ /* Start of the ClientHello message */
+ if (data[0] < 0x03 || data[1] < 0x01) /* TLSv1 minimum */
+ goto not_ssl_hello;
+
+ ext_len = data[34]; /* session_id_len */
+ if (ext_len > 32 || ext_len > (hs_len - 35)) /* check for correct session_id len */
+ goto not_ssl_hello;
+
+ /* Jump to cipher suite */
+ hs_len -= 35 + ext_len;
+ data += 35 + ext_len;
+
+ if (hs_len < 4 || /* minimum one cipher */
+ (ext_len = (data[0] << 8) + data[1]) < 2 || /* minimum 2 bytes for a cipher */
+ ext_len > hs_len)
+ goto not_ssl_hello;
+
+ /* Jump to the compression methods */
+ hs_len -= 2 + ext_len;
+ data += 2 + ext_len;
+
+ if (hs_len < 2 || /* minimum one compression method */
+ data[0] < 1 || data[0] > hs_len) /* minimum 1 bytes for a method */
+ goto not_ssl_hello;
+
+ /* Jump to the extensions */
+ hs_len -= 1 + data[0];
+ data += 1 + data[0];
+
+ if (hs_len < 2 || /* minimum one extension list length */
+ (ext_len = (data[0] << 8) + data[1]) > hs_len - 2) /* list too long */
+ goto not_ssl_hello;
+
+ hs_len = ext_len; /* limit ourselves to the extension length */
+ data += 2;
+
+ while (hs_len >= 4) {
+ int ext_type, name_type, srv_len, name_len;
+
+ ext_type = (data[0] << 8) + data[1];
+ ext_len = (data[2] << 8) + data[3];
+
+ if (ext_len > hs_len - 4) /* Extension too long */
+ goto not_ssl_hello;
+
+ if (ext_type == 0) { /* Server name */
+ if (ext_len < 2) /* need one list length */
+ goto not_ssl_hello;
+
+ srv_len = (data[4] << 8) + data[5];
+ if (srv_len < 4 || srv_len > hs_len - 6)
+ goto not_ssl_hello; /* at least 4 bytes per server name */
+
+ name_type = data[6];
+ name_len = (data[7] << 8) + data[8];
+
+ if (name_type == 0) { /* hostname */
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = (char *)data + 9;
+ smp->data.u.str.data = name_len;
+ smp->flags = SMP_F_VOLATILE | SMP_F_CONST;
+ return 1;
+ }
+ }
+
+ hs_len -= 4 + ext_len;
+ data += 4 + ext_len;
+ }
+ /* server name not found */
+ goto not_ssl_hello;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE;
+
+ not_ssl_hello:
+
+ return 0;
+}
+
+/* Try to extract the Application-Layer Protocol Negotiation (ALPN) protocol
+ * names that may be presented in a TLS client hello handshake message. As the
+ * message presents a list of protocol names in descending order of preference,
+ * it may return iteratively. The format of the message is the following
+ * (cf RFC5246 + RFC7301) :
+ * TLS frame :
+ * - uint8 type = 0x16 (Handshake)
+ * - uint16 version >= 0x0301 (TLSv1)
+ * - uint16 length (frame length)
+ * - TLS handshake :
+ * - uint8 msg_type = 0x01 (ClientHello)
+ * - uint24 length (handshake message length)
+ * - ClientHello :
+ * - uint16 client_version >= 0x0301 (TLSv1)
+ * - uint8 Random[32] (4 first ones are timestamp)
+ * - SessionID :
+ * - uint8 session_id_len (0..32) (SessionID len in bytes)
+ * - uint8 session_id[session_id_len]
+ * - CipherSuite :
+ * - uint16 cipher_len >= 2 (Cipher length in bytes)
+ * - uint16 ciphers[cipher_len/2]
+ * - CompressionMethod :
+ * - uint8 compression_len >= 1 (# of supported methods)
+ * - uint8 compression_methods[compression_len]
+ * - optional client_extension_len (in bytes)
+ * - optional sequence of ClientHelloExtensions (as many bytes as above):
+ * - uint16 extension_type = 16 for application_layer_protocol_negotiation
+ * - uint16 extension_len
+ * - opaque extension_data[extension_len]
+ * - uint16 protocol_names_len (# of bytes here)
+ * - opaque protocol_names[protocol_names_len bytes]
+ * - uint8 name_len
+ * - opaque protocol_name[name_len bytes]
+ */
+static int
+smp_fetch_ssl_hello_alpn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int hs_len, ext_len, bleft;
+ struct channel *chn;
+ unsigned char *data;
+
+ if (!smp->strm)
+ goto not_ssl_hello;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ goto not_ssl_hello;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ bleft = ci_data(chn);
+ data = (unsigned char *)ci_head(chn);
+
+ /* Check for SSL/TLS Handshake */
+ if (!bleft)
+ goto too_short;
+ if (*data != 0x16)
+ goto not_ssl_hello;
+
+ /* Check for SSLv3 or later (SSL version >= 3.0) in the record layer*/
+ if (bleft < 3)
+ goto too_short;
+ if (data[1] < 0x03)
+ goto not_ssl_hello;
+
+ if (bleft < 5)
+ goto too_short;
+ hs_len = (data[3] << 8) + data[4];
+ if (hs_len < 1 + 3 + 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ data += 5; /* enter TLS handshake */
+ bleft -= 5;
+
+ /* Check for a complete client hello starting at <data> */
+ if (bleft < 1)
+ goto too_short;
+ if (data[0] != 0x01) /* msg_type = Client Hello */
+ goto not_ssl_hello;
+
+ /* Check the Hello's length */
+ if (bleft < 4)
+ goto too_short;
+ hs_len = (data[1] << 16) + (data[2] << 8) + data[3];
+ if (hs_len < 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ /* We want the full handshake here */
+ if (bleft < hs_len)
+ goto too_short;
+
+ data += 4;
+ /* Start of the ClientHello message */
+ if (data[0] < 0x03 || data[1] < 0x01) /* TLSv1 minimum */
+ goto not_ssl_hello;
+
+ ext_len = data[34]; /* session_id_len */
+ if (ext_len > 32 || ext_len > (hs_len - 35)) /* check for correct session_id len */
+ goto not_ssl_hello;
+
+ /* Jump to cipher suite */
+ hs_len -= 35 + ext_len;
+ data += 35 + ext_len;
+
+ if (hs_len < 4 || /* minimum one cipher */
+ (ext_len = (data[0] << 8) + data[1]) < 2 || /* minimum 2 bytes for a cipher */
+ ext_len > hs_len)
+ goto not_ssl_hello;
+
+ /* Jump to the compression methods */
+ hs_len -= 2 + ext_len;
+ data += 2 + ext_len;
+
+ if (hs_len < 2 || /* minimum one compression method */
+ data[0] < 1 || data[0] > hs_len) /* minimum 1 bytes for a method */
+ goto not_ssl_hello;
+
+ /* Jump to the extensions */
+ hs_len -= 1 + data[0];
+ data += 1 + data[0];
+
+ if (hs_len < 2 || /* minimum one extension list length */
+ (ext_len = (data[0] << 8) + data[1]) > hs_len - 2) /* list too long */
+ goto not_ssl_hello;
+
+ hs_len = ext_len; /* limit ourselves to the extension length */
+ data += 2;
+
+ while (hs_len >= 4) {
+ int ext_type, name_len, name_offset;
+
+ ext_type = (data[0] << 8) + data[1];
+ ext_len = (data[2] << 8) + data[3];
+
+ if (ext_len > hs_len - 4) /* Extension too long */
+ goto not_ssl_hello;
+
+ if (ext_type == 16) { /* ALPN */
+ if (ext_len < 3) /* one list length [uint16] + at least one name length [uint8] */
+ goto not_ssl_hello;
+
+ /* Name cursor in ctx, must begin after protocol_names_len */
+ name_offset = smp->ctx.i < 6 ? 6 : smp->ctx.i;
+ name_len = data[name_offset];
+
+ if (name_len + name_offset - 3 > ext_len)
+ goto not_ssl_hello;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = (char *)data + name_offset + 1; /* +1 to skip name_len */
+ smp->data.u.str.data = name_len;
+ smp->flags = SMP_F_VOLATILE | SMP_F_CONST;
+
+ /* May have more protocol names remaining */
+ if (name_len + name_offset - 3 < ext_len) {
+ smp->ctx.i = name_offset + name_len + 1;
+ smp->flags |= SMP_F_NOT_LAST;
+ }
+
+ return 1;
+ }
+
+ hs_len -= 4 + ext_len;
+ data += 4 + ext_len;
+ }
+ /* alpn not found */
+ goto not_ssl_hello;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE;
+
+ not_ssl_hello:
+
+ return 0;
+}
+
+/* Fetch the request RDP cookie identified in <cname>:<clen>, or any cookie if
+ * <clen> is empty (cname is then ignored). It returns the data into sample <smp>
+ * of type SMP_T_CSTR. Note: this decoder only works with non-wrapping data.
+ */
+int
+fetch_rdp_cookie_name(struct stream *s, struct sample *smp, const char *cname, int clen)
+{
+ int bleft;
+ const unsigned char *data;
+
+ smp->flags = SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+
+ bleft = ci_data(&s->req);
+ if (bleft <= 11)
+ goto too_short;
+
+ data = (const unsigned char *)ci_head(&s->req) + 11;
+ bleft -= 11;
+
+ if (bleft <= 7)
+ goto too_short;
+
+ if (strncasecmp((const char *)data, "Cookie:", 7) != 0)
+ goto not_cookie;
+
+ data += 7;
+ bleft -= 7;
+
+ while (bleft > 0 && *data == ' ') {
+ data++;
+ bleft--;
+ }
+
+ if (clen) {
+ if (bleft <= clen)
+ goto too_short;
+
+ if ((data[clen] != '=') ||
+ strncasecmp(cname, (const char *)data, clen) != 0)
+ goto not_cookie;
+
+ data += clen + 1;
+ bleft -= clen + 1;
+ } else {
+ while (bleft > 0 && *data != '=') {
+ if (*data == '\r' || *data == '\n')
+ goto not_cookie;
+ data++;
+ bleft--;
+ }
+
+ if (bleft < 1)
+ goto too_short;
+
+ if (*data != '=')
+ goto not_cookie;
+
+ data++;
+ bleft--;
+ }
+
+ /* data points to cookie value */
+ smp->data.u.str.area = (char *)data;
+ smp->data.u.str.data = 0;
+
+ while (bleft > 0 && *data != '\r') {
+ data++;
+ bleft--;
+ }
+
+ if (bleft < 2)
+ goto too_short;
+
+ if (data[0] != '\r' || data[1] != '\n')
+ goto not_cookie;
+
+ smp->data.u.str.data = (char *)data - smp->data.u.str.area;
+ smp->flags = SMP_F_VOLATILE | SMP_F_CONST;
+ return 1;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE | SMP_F_CONST;
+ not_cookie:
+ return 0;
+}
+
+/* Fetch the request RDP cookie identified in the args, or any cookie if no arg
+ * is passed. It is usable both for ACL and for samples. Note: this decoder
+ * only works with non-wrapping data. Accepts either 0 or 1 argument. Argument
+ * is a string (cookie name), other types will lead to undefined behaviour. The
+ * returned sample has type SMP_T_CSTR.
+ */
+int
+smp_fetch_rdp_cookie(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ return 0;
+
+ return fetch_rdp_cookie_name(smp->strm, smp,
+ args ? args->data.str.area : NULL,
+ args ? args->data.str.data : 0);
+}
+
+/* returns either 1 or 0 depending on whether an RDP cookie is found or not */
+static int
+smp_fetch_rdp_cookie_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int ret;
+
+ ret = smp_fetch_rdp_cookie(args, smp, kw, private);
+
+ if (smp->flags & SMP_F_MAY_CHANGE)
+ return 0;
+
+ smp->flags = SMP_F_VOLATILE;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = ret;
+ return 1;
+}
+
+/* extracts part of a payload with offset and length at a given position */
+static int
+smp_fetch_payload_lv(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ unsigned int len_offset = arg_p[0].data.sint;
+ unsigned int len_size = arg_p[1].data.sint;
+ unsigned int buf_offset;
+ unsigned int buf_size = 0;
+ struct channel *chn = NULL;
+ char *head = NULL;
+ size_t max, data;
+ int i;
+
+ /* Format is (len offset, len size, buf offset) or (len offset, len size) */
+ /* by default buf offset == len offset + len size */
+ /* buf offset could be absolute or relative to len offset + len size if prefixed by + or - */
+
+ if (smp->strm) {
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ return 0;
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ head = ci_head(chn);
+ data = ci_data(chn);
+ }
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK) {
+ struct check *check = __objt_check(smp->sess->origin);
+
+ /* meaningless for HTX buffers */
+ if (check->sc && IS_HTX_SC(check->sc))
+ return 0;
+ head = b_head(&check->bi);
+ data = b_data(&check->bi);
+ }
+ max = global.tune.bufsize;
+ if (!head)
+ goto too_short;
+
+ if (len_offset + len_size > data)
+ goto too_short;
+
+ for (i = 0; i < len_size; i++) {
+ buf_size = (buf_size << 8) + ((unsigned char *)head)[i + len_offset];
+ }
+
+ /* buf offset may be implicit, absolute or relative. If the LSB
+ * is set, then the offset is relative otherwise it is absolute.
+ */
+ buf_offset = len_offset + len_size;
+ if (arg_p[2].type == ARGT_SINT) {
+ if (arg_p[2].data.sint & 1)
+ buf_offset += arg_p[2].data.sint >> 1;
+ else
+ buf_offset = arg_p[2].data.sint >> 1;
+ }
+
+ if (!buf_size || buf_size > max || buf_offset + buf_size > max) {
+ /* will never match */
+ smp->flags = 0;
+ return 0;
+ }
+
+ if (buf_offset + buf_size > data)
+ goto too_short;
+
+ /* init chunk as read only */
+ smp->data.type = SMP_T_BIN;
+ smp->flags = SMP_F_VOLATILE | SMP_F_CONST;
+ chunk_initlen(&smp->data.u.str, head + buf_offset, 0, buf_size);
+ return 1;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE | SMP_F_CONST;
+ return 0;
+}
+
+/* extracts some payload at a fixed position and length */
+static int
+smp_fetch_payload(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ unsigned int buf_offset = arg_p[0].data.sint;
+ unsigned int buf_size = arg_p[1].data.sint;
+ struct channel *chn = NULL;
+ char *head = NULL;
+ size_t max, data;
+
+ if (smp->strm) {
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ return 0;
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ head = ci_head(chn);
+ data = ci_data(chn);
+ }
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK) {
+ struct check *check = __objt_check(smp->sess->origin);
+
+ /* meaningless for HTX buffers */
+ if (check->sc && IS_HTX_SC(check->sc))
+ return 0;
+ head = b_head(&check->bi);
+ data = b_data(&check->bi);
+ }
+ max = global.tune.bufsize;
+ if (!head)
+ goto too_short;
+
+ if (buf_size > max || buf_offset + buf_size > max) {
+ /* will never match */
+ smp->flags = 0;
+ return 0;
+ }
+ if (buf_offset + buf_size > data)
+ goto too_short;
+
+ /* init chunk as read only */
+ smp->data.type = SMP_T_BIN;
+ smp->flags = SMP_F_VOLATILE | SMP_F_CONST;
+ chunk_initlen(&smp->data.u.str, head + buf_offset, 0, buf_size ? buf_size : (data - buf_offset));
+
+ if (!buf_size && chn && channel_may_recv(chn) && !channel_input_closed(chn))
+ smp->flags |= SMP_F_MAY_CHANGE;
+
+ return 1;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE | SMP_F_CONST;
+ return 0;
+}
+
+/* This function is used to validate the arguments passed to a "payload_lv" fetch
+ * keyword. This keyword allows two positive integers and an optional signed one,
+ * with the second one being strictly positive and the third one being greater than
+ * the opposite of the two others if negative. It is assumed that the types are
+ * already the correct ones. Returns 0 on error, non-zero if OK. If <err_msg> is
+ * not NULL, it will be filled with a pointer to an error message in case of
+ * error, that the caller is responsible for freeing. The initial location must
+ * either be freeable or NULL.
+ *
+ * Note that offset2 is stored with SINT type, but its not directly usable as is.
+ * The value is contained in the 63 MSB and the LSB is used as a flag for marking
+ * the "relative" property of the value.
+ */
+int val_payload_lv(struct arg *arg, char **err_msg)
+{
+ int relative = 0;
+ const char *str;
+
+ if (arg[0].data.sint < 0) {
+ memprintf(err_msg, "payload offset1 must be positive");
+ return 0;
+ }
+
+ if (!arg[1].data.sint) {
+ memprintf(err_msg, "payload length must be > 0");
+ return 0;
+ }
+
+ if (arg[2].type == ARGT_STR && arg[2].data.str.data > 0) {
+ long long int i;
+
+ if (arg[2].data.str.area[0] == '+' || arg[2].data.str.area[0] == '-')
+ relative = 1;
+ str = arg[2].data.str.area;
+ i = read_int64(&str, str + arg[2].data.str.data);
+ if (*str != '\0') {
+ memprintf(err_msg, "payload offset2 is not a number");
+ return 0;
+ }
+ chunk_destroy(&arg[2].data.str);
+ arg[2].type = ARGT_SINT;
+ arg[2].data.sint = i;
+
+ if (arg[0].data.sint + arg[1].data.sint + arg[2].data.sint < 0) {
+ memprintf(err_msg, "payload offset2 too negative");
+ return 0;
+ }
+ if (relative)
+ arg[2].data.sint = ( arg[2].data.sint << 1 ) + 1;
+ }
+ return 1;
+}
+
+/* extracts the parameter value of a distcc token */
+static int
+smp_fetch_distcc_param(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ unsigned int match_tok = arg_p[0].data.sint;
+ unsigned int match_occ = arg_p[1].data.sint;
+ unsigned int token;
+ unsigned int param;
+ unsigned int body;
+ unsigned int ofs;
+ unsigned int occ;
+ struct channel *chn;
+ int i;
+
+ /* Format is (token[,occ]). occ starts at 1. */
+
+ if (!smp->strm)
+ return 0;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+
+ ofs = 0; occ = 0;
+ while (1) {
+ if (ofs + 12 > ci_data(chn)) {
+ /* not there yet but could it at least fit ? */
+ if (!chn->buf.size)
+ goto too_short;
+
+ if (ofs + 12 <= channel_recv_limit(chn) + b_orig(&chn->buf) - ci_head(chn))
+ goto too_short;
+
+ goto no_match;
+ }
+
+ token = read_n32(ci_head(chn) + ofs);
+ ofs += 4;
+
+ for (i = param = 0; i < 8; i++) {
+ int c = hex2i(ci_head(chn)[ofs + i]);
+
+ if (c < 0)
+ goto no_match;
+ param = (param << 4) + c;
+ }
+ ofs += 8;
+
+ /* these tokens don't have a body */
+ if (token != 0x41524743 /* ARGC */ && token != 0x44495354 /* DIST */ &&
+ token != 0x4E46494C /* NFIL */ && token != 0x53544154 /* STAT */ &&
+ token != 0x444F4E45 /* DONE */)
+ body = param;
+ else
+ body = 0;
+
+ if (token == match_tok) {
+ occ++;
+ if (!match_occ || match_occ == occ) {
+ /* found */
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = param;
+ smp->flags = SMP_F_VOLATILE | SMP_F_CONST;
+ return 1;
+ }
+ }
+ ofs += body;
+ }
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE | SMP_F_CONST;
+ return 0;
+ no_match:
+ /* will never match (end of buffer, or bad contents) */
+ smp->flags = 0;
+ return 0;
+
+}
+
+/* extracts the (possibly truncated) body of a distcc token */
+static int
+smp_fetch_distcc_body(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ unsigned int match_tok = arg_p[0].data.sint;
+ unsigned int match_occ = arg_p[1].data.sint;
+ unsigned int token;
+ unsigned int param;
+ unsigned int ofs;
+ unsigned int occ;
+ unsigned int body;
+ struct channel *chn;
+ int i;
+
+ /* Format is (token[,occ]). occ starts at 1. */
+
+ if (!smp->strm)
+ return 0;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+
+ ofs = 0; occ = 0;
+ while (1) {
+ if (ofs + 12 > ci_data(chn)) {
+ if (!chn->buf.size)
+ goto too_short;
+
+ if (ofs + 12 <= channel_recv_limit(chn) + b_orig(&chn->buf) - ci_head(chn))
+ goto too_short;
+
+ goto no_match;
+ }
+
+ token = read_n32(ci_head(chn) + ofs);
+ ofs += 4;
+
+ for (i = param = 0; i < 8; i++) {
+ int c = hex2i(ci_head(chn)[ofs + i]);
+
+ if (c < 0)
+ goto no_match;
+ param = (param << 4) + c;
+ }
+ ofs += 8;
+
+ /* these tokens don't have a body */
+ if (token != 0x41524743 /* ARGC */ && token != 0x44495354 /* DIST */ &&
+ token != 0x4E46494C /* NFIL */ && token != 0x53544154 /* STAT */ &&
+ token != 0x444F4E45 /* DONE */)
+ body = param;
+ else
+ body = 0;
+
+ if (token == match_tok) {
+ occ++;
+ if (!match_occ || match_occ == occ) {
+ /* found */
+
+ smp->data.type = SMP_T_BIN;
+ smp->flags = SMP_F_VOLATILE | SMP_F_CONST;
+
+ if (ofs + body > ci_head(chn) - b_orig(&chn->buf) + ci_data(chn)) {
+ /* incomplete body */
+
+ if (ofs + body > channel_recv_limit(chn) + b_orig(&chn->buf) - ci_head(chn)) {
+ /* truncate it to whatever will fit */
+ smp->flags |= SMP_F_MAY_CHANGE;
+ body = channel_recv_limit(chn) + b_orig(&chn->buf) - ci_head(chn) - ofs;
+ }
+ }
+
+ chunk_initlen(&smp->data.u.str, ci_head(chn) + ofs, 0, body);
+ return 1;
+ }
+ }
+ ofs += body;
+ }
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE | SMP_F_CONST;
+ return 0;
+ no_match:
+ /* will never match (end of buffer, or bad contents) */
+ smp->flags = 0;
+ return 0;
+
+}
+
+/* This function is used to validate the arguments passed to a "distcc_param" or
+ * "distcc_body" sample fetch keyword. They take a mandatory token name of exactly
+ * 4 characters, followed by an optional occurrence number starting at 1. It is
+ * assumed that the types are already the correct ones. Returns 0 on error, non-
+ * zero if OK. If <err_msg> is not NULL, it will be filled with a pointer to an
+ * error message in case of error, that the caller is responsible for freeing.
+ * The initial location must either be freeable or NULL.
+ */
+int val_distcc(struct arg *arg, char **err_msg)
+{
+ unsigned int token;
+
+ if (arg[0].data.str.data != 4) {
+ memprintf(err_msg, "token name must be exactly 4 characters");
+ return 0;
+ }
+
+ /* convert the token name to an unsigned int (one byte per character,
+ * big endian format).
+ */
+ token = (arg[0].data.str.area[0] << 24) + (arg[0].data.str.area[1] << 16) +
+ (arg[0].data.str.area[2] << 8) + (arg[0].data.str.area[3] << 0);
+
+ chunk_destroy(&arg[0].data.str);
+ arg[0].type = ARGT_SINT;
+ arg[0].data.sint = token;
+
+ if (arg[1].type != ARGT_SINT) {
+ arg[1].type = ARGT_SINT;
+ arg[1].data.sint = 0;
+ }
+ return 1;
+}
+
+/************************************************************************/
+/* All supported sample and ACL keywords must be declared here. */
+/************************************************************************/
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Note: fetches that may return multiple types must be declared as the lowest
+ * common denominator, the type that can be casted into all other ones. For
+ * instance IPv4/IPv6 must be declared IPv4.
+ */
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { "distcc_body", smp_fetch_distcc_body, ARG2(1,STR,SINT), val_distcc, SMP_T_BIN, SMP_USE_L6REQ|SMP_USE_L6RES },
+ { "distcc_param", smp_fetch_distcc_param, ARG2(1,STR,SINT), val_distcc, SMP_T_SINT, SMP_USE_L6REQ|SMP_USE_L6RES },
+ { "payload", smp_fetch_payload, ARG2(2,SINT,SINT), NULL, SMP_T_BIN, SMP_USE_L6REQ|SMP_USE_L6RES },
+ { "payload_lv", smp_fetch_payload_lv, ARG3(2,SINT,SINT,STR), val_payload_lv, SMP_T_BIN, SMP_USE_L6REQ|SMP_USE_L6RES },
+ { "rdp_cookie", smp_fetch_rdp_cookie, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_L6REQ },
+ { "rdp_cookie_cnt", smp_fetch_rdp_cookie_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "rep_ssl_hello_type", smp_fetch_ssl_hello_type, 0, NULL, SMP_T_SINT, SMP_USE_L6RES },
+ { "req_len", smp_fetch_len, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "req_ssl_hello_type", smp_fetch_ssl_hello_type, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "req_ssl_sni", smp_fetch_ssl_hello_sni, 0, NULL, SMP_T_STR, SMP_USE_L6REQ },
+ { "req_ssl_ver", smp_fetch_req_ssl_ver, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ },
+
+ { "req.len", smp_fetch_len, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "req.payload", smp_fetch_payload, ARG2(2,SINT,SINT), NULL, SMP_T_BIN, SMP_USE_L6REQ },
+ { "req.payload_lv", smp_fetch_payload_lv, ARG3(2,SINT,SINT,STR), val_payload_lv, SMP_T_BIN, SMP_USE_L6REQ },
+ { "req.rdp_cookie", smp_fetch_rdp_cookie, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_L6REQ },
+ { "req.rdp_cookie_cnt", smp_fetch_rdp_cookie_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "req.ssl_ec_ext", smp_fetch_req_ssl_ec_ext, 0, NULL, SMP_T_BOOL, SMP_USE_L6REQ },
+ { "req.ssl_st_ext", smp_fetch_req_ssl_st_ext, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "req.ssl_hello_type", smp_fetch_ssl_hello_type, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "req.ssl_sni", smp_fetch_ssl_hello_sni, 0, NULL, SMP_T_STR, SMP_USE_L6REQ },
+ { "req.ssl_alpn", smp_fetch_ssl_hello_alpn, 0, NULL, SMP_T_STR, SMP_USE_L6REQ },
+ { "req.ssl_ver", smp_fetch_req_ssl_ver, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "res.len", smp_fetch_len, 0, NULL, SMP_T_SINT, SMP_USE_L6RES },
+ { "res.payload", smp_fetch_payload, ARG2(2,SINT,SINT), NULL, SMP_T_BIN, SMP_USE_L6RES },
+ { "res.payload_lv", smp_fetch_payload_lv, ARG3(2,SINT,SINT,STR), val_payload_lv, SMP_T_BIN, SMP_USE_L6RES },
+ { "res.ssl_hello_type", smp_fetch_ssl_hello_type, 0, NULL, SMP_T_SINT, SMP_USE_L6RES },
+ { "wait_end", smp_fetch_wait_end, 0, NULL, SMP_T_BOOL, SMP_USE_INTRN },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct acl_kw_list acl_kws = {ILH, {
+ { "payload", "req.payload", PAT_MATCH_BIN },
+ { "payload_lv", "req.payload_lv", PAT_MATCH_BIN },
+ { "req_rdp_cookie", "req.rdp_cookie", PAT_MATCH_STR },
+ { "req_rdp_cookie_cnt", "req.rdp_cookie_cnt", PAT_MATCH_INT },
+ { "req_ssl_sni", "req.ssl_sni", PAT_MATCH_STR },
+ { "req_ssl_ver", "req.ssl_ver", PAT_MATCH_INT, pat_parse_dotted_ver },
+ { "req.ssl_ver", "req.ssl_ver", PAT_MATCH_INT, pat_parse_dotted_ver },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/peers.c b/src/peers.c
new file mode 100644
index 0000000..e3981a6
--- /dev/null
+++ b/src/peers.c
@@ -0,0 +1,4076 @@
+/*
+ * Peer synchro management.
+ *
+ * Copyright 2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <import/eb32tree.h>
+#include <import/ebmbtree.h>
+#include <import/ebpttree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/cli.h>
+#include <haproxy/dict.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/frontend.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/obj_type-t.h>
+#include <haproxy/peers.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/session-t.h>
+#include <haproxy/signal.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/thread.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/trace.h>
+
+
+/*******************************/
+/* Current peer learning state */
+/*******************************/
+
+/******************************/
+/* Current peers section resync state */
+/******************************/
+#define PEERS_F_RESYNC_LOCAL 0x00000001 /* Learn from local finished or no more needed */
+#define PEERS_F_RESYNC_REMOTE 0x00000002 /* Learn from remote finished or no more needed */
+#define PEERS_F_RESYNC_ASSIGN 0x00000004 /* A peer was assigned to learn our lesson */
+#define PEERS_F_RESYNC_PROCESS 0x00000008 /* The assigned peer was requested for resync */
+#define PEERS_F_RESYNC_LOCALTIMEOUT 0x00000010 /* Timeout waiting for a full resync from a local node */
+#define PEERS_F_RESYNC_REMOTETIMEOUT 0x00000020 /* Timeout waiting for a full resync from a remote node */
+#define PEERS_F_RESYNC_LOCALABORT 0x00000040 /* Session aborted learning from a local node */
+#define PEERS_F_RESYNC_REMOTEABORT 0x00000080 /* Session aborted learning from a remote node */
+#define PEERS_F_RESYNC_LOCALFINISHED 0x00000100 /* A local node teach us and was fully up to date */
+#define PEERS_F_RESYNC_REMOTEFINISHED 0x00000200 /* A remote node teach us and was fully up to date */
+#define PEERS_F_RESYNC_LOCALPARTIAL 0x00000400 /* A local node teach us but was partially up to date */
+#define PEERS_F_RESYNC_REMOTEPARTIAL 0x00000800 /* A remote node teach us but was partially up to date */
+#define PEERS_F_RESYNC_LOCALASSIGN 0x00001000 /* A local node was assigned for a full resync */
+#define PEERS_F_RESYNC_REMOTEASSIGN 0x00002000 /* A remote node was assigned for a full resync */
+#define PEERS_F_RESYNC_REQUESTED 0x00004000 /* A resync was explicitly requested */
+#define PEERS_F_DONOTSTOP 0x00010000 /* Main table sync task block process during soft stop
+ to push data to new process */
+
+#define PEERS_RESYNC_STATEMASK (PEERS_F_RESYNC_LOCAL|PEERS_F_RESYNC_REMOTE)
+#define PEERS_RESYNC_FROMLOCAL 0x00000000
+#define PEERS_RESYNC_FROMREMOTE PEERS_F_RESYNC_LOCAL
+#define PEERS_RESYNC_FINISHED (PEERS_F_RESYNC_LOCAL|PEERS_F_RESYNC_REMOTE)
+
+/***********************************/
+/* Current shared table sync state */
+/***********************************/
+#define SHTABLE_F_TEACH_STAGE1 0x00000001 /* Teach state 1 complete */
+#define SHTABLE_F_TEACH_STAGE2 0x00000002 /* Teach state 2 complete */
+
+/******************************/
+/* Remote peer teaching state */
+/******************************/
+#define PEER_F_TEACH_PROCESS 0x00000001 /* Teach a lesson to current peer */
+#define PEER_F_TEACH_FINISHED 0x00000008 /* Teach conclude, (wait for confirm) */
+#define PEER_F_TEACH_COMPLETE 0x00000010 /* All that we know already taught to current peer, used only for a local peer */
+#define PEER_F_LEARN_ASSIGN 0x00000100 /* Current peer was assigned for a lesson */
+#define PEER_F_LEARN_NOTUP2DATE 0x00000200 /* Learn from peer finished but peer is not up to date */
+#define PEER_F_ALIVE 0x20000000 /* Used to flag a peer a alive. */
+#define PEER_F_HEARTBEAT 0x40000000 /* Heartbeat message to send. */
+#define PEER_F_DWNGRD 0x80000000 /* When this flag is enabled, we must downgrade the supported version announced during peer sessions. */
+
+#define PEER_TEACH_RESET ~(PEER_F_TEACH_PROCESS|PEER_F_TEACH_FINISHED) /* PEER_F_TEACH_COMPLETE should never be reset */
+#define PEER_LEARN_RESET ~(PEER_F_LEARN_ASSIGN|PEER_F_LEARN_NOTUP2DATE)
+
+#define PEER_RESYNC_TIMEOUT 5000 /* 5 seconds */
+#define PEER_RECONNECT_TIMEOUT 5000 /* 5 seconds */
+#define PEER_LOCAL_RECONNECT_TIMEOUT 500 /* 500ms */
+#define PEER_HEARTBEAT_TIMEOUT 3000 /* 3 seconds */
+
+/* default maximum of updates sent at once */
+#define PEER_DEF_MAX_UPDATES_AT_ONCE 200
+
+/* flags for "show peers" */
+#define PEERS_SHOW_F_DICT 0x00000001 /* also show the contents of the dictionary */
+
+/*****************************/
+/* Sync message class */
+/*****************************/
+enum {
+ PEER_MSG_CLASS_CONTROL = 0,
+ PEER_MSG_CLASS_ERROR,
+ PEER_MSG_CLASS_STICKTABLE = 10,
+ PEER_MSG_CLASS_RESERVED = 255,
+};
+
+/*****************************/
+/* control message types */
+/*****************************/
+enum {
+ PEER_MSG_CTRL_RESYNCREQ = 0,
+ PEER_MSG_CTRL_RESYNCFINISHED,
+ PEER_MSG_CTRL_RESYNCPARTIAL,
+ PEER_MSG_CTRL_RESYNCCONFIRM,
+ PEER_MSG_CTRL_HEARTBEAT,
+};
+
+/*****************************/
+/* error message types */
+/*****************************/
+enum {
+ PEER_MSG_ERR_PROTOCOL = 0,
+ PEER_MSG_ERR_SIZELIMIT,
+};
+
+/* network key types;
+ * network types were directly and mistakenly
+ * mapped on sample types, to keep backward
+ * compatiblitiy we keep those values but
+ * we now use a internal/network mapping
+ * to avoid further mistakes adding or
+ * modifying internals types
+ */
+enum {
+ PEER_KT_ANY = 0, /* any type */
+ PEER_KT_RESV1, /* UNUSED */
+ PEER_KT_SINT, /* signed 64bits integer type */
+ PEER_KT_RESV3, /* UNUSED */
+ PEER_KT_IPV4, /* ipv4 type */
+ PEER_KT_IPV6, /* ipv6 type */
+ PEER_KT_STR, /* char string type */
+ PEER_KT_BIN, /* buffer type */
+ PEER_KT_TYPES /* number of types, must always be last */
+};
+
+/* Map used to retrieve network type from internal type
+ * Note: Undeclared mapping maps entry to PEER_KT_ANY == 0
+ */
+static int peer_net_key_type[SMP_TYPES] = {
+ [SMP_T_SINT] = PEER_KT_SINT,
+ [SMP_T_IPV4] = PEER_KT_IPV4,
+ [SMP_T_IPV6] = PEER_KT_IPV6,
+ [SMP_T_STR] = PEER_KT_STR,
+ [SMP_T_BIN] = PEER_KT_BIN,
+};
+
+/* Map used to retrieve internal type from external type
+ * Note: Undeclared mapping maps entry to SMP_T_ANY == 0
+ */
+static int peer_int_key_type[PEER_KT_TYPES] = {
+ [PEER_KT_SINT] = SMP_T_SINT,
+ [PEER_KT_IPV4] = SMP_T_IPV4,
+ [PEER_KT_IPV6] = SMP_T_IPV6,
+ [PEER_KT_STR] = SMP_T_STR,
+ [PEER_KT_BIN] = SMP_T_BIN,
+};
+
+/*
+ * Parameters used by functions to build peer protocol messages. */
+struct peer_prep_params {
+ struct {
+ struct peer *peer;
+ } hello;
+ struct {
+ unsigned int st1;
+ } error_status;
+ struct {
+ struct stksess *stksess;
+ struct shared_table *shared_table;
+ unsigned int updateid;
+ int use_identifier;
+ int use_timed;
+ struct peer *peer;
+ } updt;
+ struct {
+ struct shared_table *shared_table;
+ } swtch;
+ struct {
+ struct shared_table *shared_table;
+ } ack;
+ struct {
+ unsigned char head[2];
+ } control;
+ struct {
+ unsigned char head[2];
+ } error;
+};
+
+/*******************************/
+/* stick table sync mesg types */
+/* Note: ids >= 128 contains */
+/* id message contains data */
+/*******************************/
+#define PEER_MSG_STKT_UPDATE 0x80
+#define PEER_MSG_STKT_INCUPDATE 0x81
+#define PEER_MSG_STKT_DEFINE 0x82
+#define PEER_MSG_STKT_SWITCH 0x83
+#define PEER_MSG_STKT_ACK 0x84
+#define PEER_MSG_STKT_UPDATE_TIMED 0x85
+#define PEER_MSG_STKT_INCUPDATE_TIMED 0x86
+/* All the stick-table message identifiers abova have the #7 bit set */
+#define PEER_MSG_STKT_BIT 7
+#define PEER_MSG_STKT_BIT_MASK (1 << PEER_MSG_STKT_BIT)
+
+/* The maximum length of an encoded data length. */
+#define PEER_MSG_ENC_LENGTH_MAXLEN 5
+
+/* Minimum 64-bits value encoded with 2 bytes */
+#define PEER_ENC_2BYTES_MIN 0xf0 /* 0xf0 (or 240) */
+/* 3 bytes */
+#define PEER_ENC_3BYTES_MIN ((1ULL << 11) | PEER_ENC_2BYTES_MIN) /* 0x8f0 (or 2288) */
+/* 4 bytes */
+#define PEER_ENC_4BYTES_MIN ((1ULL << 18) | PEER_ENC_3BYTES_MIN) /* 0x408f0 (or 264432) */
+/* 5 bytes */
+#define PEER_ENC_5BYTES_MIN ((1ULL << 25) | PEER_ENC_4BYTES_MIN) /* 0x20408f0 (or 33818864) */
+/* 6 bytes */
+#define PEER_ENC_6BYTES_MIN ((1ULL << 32) | PEER_ENC_5BYTES_MIN) /* 0x1020408f0 (or 4328786160) */
+/* 7 bytes */
+#define PEER_ENC_7BYTES_MIN ((1ULL << 39) | PEER_ENC_6BYTES_MIN) /* 0x81020408f0 (or 554084600048) */
+/* 8 bytes */
+#define PEER_ENC_8BYTES_MIN ((1ULL << 46) | PEER_ENC_7BYTES_MIN) /* 0x4081020408f0 (or 70922828777712) */
+/* 9 bytes */
+#define PEER_ENC_9BYTES_MIN ((1ULL << 53) | PEER_ENC_8BYTES_MIN) /* 0x204081020408f0 (or 9078122083518704) */
+/* 10 bytes */
+#define PEER_ENC_10BYTES_MIN ((1ULL << 60) | PEER_ENC_9BYTES_MIN) /* 0x10204081020408f0 (or 1161999626690365680) */
+
+/* #7 bit used to detect the last byte to be encoded */
+#define PEER_ENC_STOP_BIT 7
+/* The byte minimum value with #7 bit set */
+#define PEER_ENC_STOP_BYTE (1 << PEER_ENC_STOP_BIT)
+/* The left most number of bits set for PEER_ENC_2BYTES_MIN */
+#define PEER_ENC_2BYTES_MIN_BITS 4
+
+#define PEER_MSG_HEADER_LEN 2
+
+#define PEER_STKT_CACHE_MAX_ENTRIES 128
+
+/**********************************/
+/* Peer Session IO handler states */
+/**********************************/
+
+enum {
+ PEER_SESS_ST_ACCEPT = 0, /* Initial state for session create by an accept, must be zero! */
+ PEER_SESS_ST_GETVERSION, /* Validate supported protocol version */
+ PEER_SESS_ST_GETHOST, /* Validate host ID correspond to local host id */
+ PEER_SESS_ST_GETPEER, /* Validate peer ID correspond to a known remote peer id */
+ /* after this point, data were possibly exchanged */
+ PEER_SESS_ST_SENDSUCCESS, /* Send ret code 200 (success) and wait for message */
+ PEER_SESS_ST_CONNECT, /* Initial state for session create on a connect, push presentation into buffer */
+ PEER_SESS_ST_GETSTATUS, /* Wait for the welcome message */
+ PEER_SESS_ST_WAITMSG, /* Wait for data messages */
+ PEER_SESS_ST_EXIT, /* Exit with status code */
+ PEER_SESS_ST_ERRPROTO, /* Send error proto message before exit */
+ PEER_SESS_ST_ERRSIZE, /* Send error size message before exit */
+ PEER_SESS_ST_END, /* Killed session */
+};
+
+/***************************************************/
+/* Peer Session status code - part of the protocol */
+/***************************************************/
+
+#define PEER_SESS_SC_CONNECTCODE 100 /* connect in progress */
+#define PEER_SESS_SC_CONNECTEDCODE 110 /* tcp connect success */
+
+#define PEER_SESS_SC_SUCCESSCODE 200 /* accept or connect successful */
+
+#define PEER_SESS_SC_TRYAGAIN 300 /* try again later */
+
+#define PEER_SESS_SC_ERRPROTO 501 /* error protocol */
+#define PEER_SESS_SC_ERRVERSION 502 /* unknown protocol version */
+#define PEER_SESS_SC_ERRHOST 503 /* bad host name */
+#define PEER_SESS_SC_ERRPEER 504 /* unknown peer */
+
+#define PEER_SESSION_PROTO_NAME "HAProxyS"
+#define PEER_MAJOR_VER 2
+#define PEER_MINOR_VER 1
+#define PEER_DWNGRD_MINOR_VER 0
+
+static size_t proto_len = sizeof(PEER_SESSION_PROTO_NAME) - 1;
+struct peers *cfg_peers = NULL;
+static int peers_max_updates_at_once = PEER_DEF_MAX_UPDATES_AT_ONCE;
+static void peer_session_forceshutdown(struct peer *peer);
+
+static struct ebpt_node *dcache_tx_insert(struct dcache *dc,
+ struct dcache_tx_entry *i);
+static inline void flush_dcache(struct peer *peer);
+
+/* trace source and events */
+static void peers_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+static const struct trace_event peers_trace_events[] = {
+#define PEERS_EV_UPDTMSG (1 << 0)
+ { .mask = PEERS_EV_UPDTMSG, .name = "updtmsg", .desc = "update message received" },
+#define PEERS_EV_ACKMSG (1 << 1)
+ { .mask = PEERS_EV_ACKMSG, .name = "ackmsg", .desc = "ack message received" },
+#define PEERS_EV_SWTCMSG (1 << 2)
+ { .mask = PEERS_EV_SWTCMSG, .name = "swtcmsg", .desc = "switch message received" },
+#define PEERS_EV_DEFMSG (1 << 3)
+ { .mask = PEERS_EV_DEFMSG, .name = "defmsg", .desc = "definition message received" },
+#define PEERS_EV_CTRLMSG (1 << 4)
+ { .mask = PEERS_EV_CTRLMSG, .name = "ctrlmsg", .desc = "control message sent/received" },
+#define PEERS_EV_SESSREL (1 << 5)
+ { .mask = PEERS_EV_SESSREL, .name = "sessrl", .desc = "peer session releasing" },
+#define PEERS_EV_PROTOERR (1 << 6)
+ { .mask = PEERS_EV_PROTOERR, .name = "protoerr", .desc = "protocol error" },
+};
+
+static const struct name_desc peers_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the connection */ },
+ /* arg2 */ { .name="peers", .desc="Peers protocol" },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc peers_trace_decoding[] = {
+#define PEERS_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+ { /* end */ }
+};
+
+
+struct trace_source trace_peers = {
+ .name = IST("peers"),
+ .desc = "Peers protocol",
+ .arg_def = TRC_ARG1_CONN, /* TRACE()'s first argument is always a connection */
+ .default_cb = peers_trace,
+ .known_events = peers_trace_events,
+ .lockon_args = peers_trace_lockon_args,
+ .decoding = peers_trace_decoding,
+ .report_events = ~0, /* report everything by default */
+};
+
+/* Return peer control message types as strings (only for debugging purpose). */
+static inline char *ctrl_msg_type_str(unsigned int type)
+{
+ switch (type) {
+ case PEER_MSG_CTRL_RESYNCREQ:
+ return "RESYNCREQ";
+ case PEER_MSG_CTRL_RESYNCFINISHED:
+ return "RESYNCFINISHED";
+ case PEER_MSG_CTRL_RESYNCPARTIAL:
+ return "RESYNCPARTIAL";
+ case PEER_MSG_CTRL_RESYNCCONFIRM:
+ return "RESYNCCONFIRM";
+ case PEER_MSG_CTRL_HEARTBEAT:
+ return "HEARTBEAT";
+ default:
+ return "???";
+ }
+}
+
+#define TRACE_SOURCE &trace_peers
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+static void peers_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ if (mask & (PEERS_EV_UPDTMSG|PEERS_EV_ACKMSG|PEERS_EV_SWTCMSG)) {
+ if (a2) {
+ const struct peer *peer = a2;
+
+ chunk_appendf(&trace_buf, " peer=%s", peer->id);
+ }
+ if (a3) {
+ const char *p = a3;
+
+ chunk_appendf(&trace_buf, " @%p", p);
+ }
+ if (a4) {
+ const size_t *val = a4;
+
+ chunk_appendf(&trace_buf, " %llu", (unsigned long long)*val);
+ }
+ }
+
+ if (mask & PEERS_EV_DEFMSG) {
+ if (a2) {
+ const struct peer *peer = a2;
+
+ chunk_appendf(&trace_buf, " peer=%s", peer->id);
+ }
+ if (a3) {
+ const char *p = a3;
+
+ chunk_appendf(&trace_buf, " @%p", p);
+ }
+ if (a4) {
+ const int *val = a4;
+
+ chunk_appendf(&trace_buf, " %d", *val);
+ }
+ }
+
+ if (mask & PEERS_EV_CTRLMSG) {
+ if (a2) {
+ const unsigned char *ctrl_msg_type = a2;
+
+ chunk_appendf(&trace_buf, " %s", ctrl_msg_type_str(*ctrl_msg_type));
+
+ }
+ if (a3) {
+ const char *local_peer = a3;
+
+ chunk_appendf(&trace_buf, " %s", local_peer);
+ }
+
+ if (a4) {
+ const char *remote_peer = a4;
+
+ chunk_appendf(&trace_buf, " -> %s", remote_peer);
+ }
+ }
+
+ if (mask & (PEERS_EV_SESSREL|PEERS_EV_PROTOERR)) {
+ if (a2) {
+ const struct peer *peer = a2;
+ struct peers *peers = NULL;
+
+ if (peer->appctx)
+ peers = peer->peers;
+
+ if (peers)
+ chunk_appendf(&trace_buf, " %s", peers->local->id);
+ chunk_appendf(&trace_buf, " -> %s", peer->id);
+ }
+
+ if (a3) {
+ const int *prev_state = a3;
+
+ chunk_appendf(&trace_buf, " prev_state=%d\n", *prev_state);
+ }
+ }
+}
+
+static const char *statuscode_str(int statuscode)
+{
+ switch (statuscode) {
+ case PEER_SESS_SC_CONNECTCODE:
+ return "CONN";
+ case PEER_SESS_SC_CONNECTEDCODE:
+ return "HSHK";
+ case PEER_SESS_SC_SUCCESSCODE:
+ return "ESTA";
+ case PEER_SESS_SC_TRYAGAIN:
+ return "RETR";
+ case PEER_SESS_SC_ERRPROTO:
+ return "PROT";
+ case PEER_SESS_SC_ERRVERSION:
+ return "VERS";
+ case PEER_SESS_SC_ERRHOST:
+ return "NAME";
+ case PEER_SESS_SC_ERRPEER:
+ return "UNKN";
+ default:
+ return "NONE";
+ }
+}
+
+/* This function encode an uint64 to 'dynamic' length format.
+ The encoded value is written at address *str, and the
+ caller must assure that size after *str is large enough.
+ At return, the *str is set at the next Byte after then
+ encoded integer. The function returns then length of the
+ encoded integer in Bytes */
+int intencode(uint64_t i, char **str) {
+ int idx = 0;
+ unsigned char *msg;
+
+ msg = (unsigned char *)*str;
+ if (i < PEER_ENC_2BYTES_MIN) {
+ msg[0] = (unsigned char)i;
+ *str = (char *)&msg[idx+1];
+ return (idx+1);
+ }
+
+ msg[idx] =(unsigned char)i | PEER_ENC_2BYTES_MIN;
+ i = (i - PEER_ENC_2BYTES_MIN) >> PEER_ENC_2BYTES_MIN_BITS;
+ while (i >= PEER_ENC_STOP_BYTE) {
+ msg[++idx] = (unsigned char)i | PEER_ENC_STOP_BYTE;
+ i = (i - PEER_ENC_STOP_BYTE) >> PEER_ENC_STOP_BIT;
+ }
+ msg[++idx] = (unsigned char)i;
+ *str = (char *)&msg[idx+1];
+ return (idx+1);
+}
+
+
+/* This function returns a decoded 64bits unsigned integer
+ * from a varint
+ *
+ * Calling:
+ * - *str must point on the first byte of the buffer to decode.
+ * - end must point on the next byte after the end of the buffer
+ * we are authorized to parse (buf + buflen)
+ *
+ * At return:
+ *
+ * On success *str will point at the byte following
+ * the fully decoded integer into the buffer. and
+ * the decoded value is returned.
+ *
+ * If end is reached before the integer was fully decoded,
+ * *str is set to NULL and the caller have to check this
+ * to know there is a decoding error. In this case
+ * the returned integer is also forced to 0
+ */
+uint64_t intdecode(char **str, char *end)
+{
+ unsigned char *msg;
+ uint64_t i;
+ int shift;
+
+ if (!*str)
+ return 0;
+
+ msg = (unsigned char *)*str;
+ if (msg >= (unsigned char *)end)
+ goto fail;
+
+ i = *(msg++);
+ if (i >= PEER_ENC_2BYTES_MIN) {
+ shift = PEER_ENC_2BYTES_MIN_BITS;
+ do {
+ if (msg >= (unsigned char *)end)
+ goto fail;
+ i += (uint64_t)*msg << shift;
+ shift += PEER_ENC_STOP_BIT;
+ } while (*(msg++) >= PEER_ENC_STOP_BYTE);
+ }
+ *str = (char *)msg;
+ return i;
+
+ fail:
+ *str = NULL;
+ return 0;
+}
+
+/*
+ * Build a "hello" peer protocol message.
+ * Return the number of written bytes written to build this messages if succeeded,
+ * 0 if not.
+ */
+static int peer_prepare_hellomsg(char *msg, size_t size, struct peer_prep_params *p)
+{
+ int min_ver, ret;
+ struct peer *peer;
+
+ peer = p->hello.peer;
+ min_ver = (peer->flags & PEER_F_DWNGRD) ? PEER_DWNGRD_MINOR_VER : PEER_MINOR_VER;
+ /* Prepare headers */
+ ret = snprintf(msg, size, PEER_SESSION_PROTO_NAME " %d.%d\n%s\n%s %d %d\n",
+ (int)PEER_MAJOR_VER, min_ver, peer->id, localpeer, (int)getpid(), (int)1);
+ if (ret >= size)
+ return 0;
+
+ return ret;
+}
+
+/*
+ * Build a "handshake succeeded" status message.
+ * Return the number of written bytes written to build this messages if succeeded,
+ * 0 if not.
+ */
+static int peer_prepare_status_successmsg(char *msg, size_t size, struct peer_prep_params *p)
+{
+ int ret;
+
+ ret = snprintf(msg, size, "%d\n", (int)PEER_SESS_SC_SUCCESSCODE);
+ if (ret >= size)
+ return 0;
+
+ return ret;
+}
+
+/*
+ * Build an error status message.
+ * Return the number of written bytes written to build this messages if succeeded,
+ * 0 if not.
+ */
+static int peer_prepare_status_errormsg(char *msg, size_t size, struct peer_prep_params *p)
+{
+ int ret;
+ unsigned int st1;
+
+ st1 = p->error_status.st1;
+ ret = snprintf(msg, size, "%d\n", st1);
+ if (ret >= size)
+ return 0;
+
+ return ret;
+}
+
+/* Set the stick-table UPDATE message type byte at <msg_type> address,
+ * depending on <use_identifier> and <use_timed> boolean parameters.
+ * Always successful.
+ */
+static inline void peer_set_update_msg_type(char *msg_type, int use_identifier, int use_timed)
+{
+ if (use_timed) {
+ if (use_identifier)
+ *msg_type = PEER_MSG_STKT_UPDATE_TIMED;
+ else
+ *msg_type = PEER_MSG_STKT_INCUPDATE_TIMED;
+ }
+ else {
+ if (use_identifier)
+ *msg_type = PEER_MSG_STKT_UPDATE;
+ else
+ *msg_type = PEER_MSG_STKT_INCUPDATE;
+ }
+}
+/*
+ * This prepare the data update message on the stick session <ts>, <st> is the considered
+ * stick table.
+ * <msg> is a buffer of <size> to receive data message content
+ * If function returns 0, the caller should consider we were unable to encode this message (TODO:
+ * check size)
+ */
+static int peer_prepare_updatemsg(char *msg, size_t size, struct peer_prep_params *p)
+{
+ uint32_t netinteger;
+ unsigned short datalen;
+ char *cursor, *datamsg;
+ unsigned int data_type;
+ void *data_ptr;
+ struct stksess *ts;
+ struct shared_table *st;
+ unsigned int updateid;
+ int use_identifier;
+ int use_timed;
+ struct peer *peer;
+
+ ts = p->updt.stksess;
+ st = p->updt.shared_table;
+ updateid = p->updt.updateid;
+ use_identifier = p->updt.use_identifier;
+ use_timed = p->updt.use_timed;
+ peer = p->updt.peer;
+
+ cursor = datamsg = msg + PEER_MSG_HEADER_LEN + PEER_MSG_ENC_LENGTH_MAXLEN;
+
+ /* construct message */
+
+ /* check if we need to send the update identifier */
+ if (!st->last_pushed || updateid < st->last_pushed || ((updateid - st->last_pushed) != 1)) {
+ use_identifier = 1;
+ }
+
+ /* encode update identifier if needed */
+ if (use_identifier) {
+ netinteger = htonl(updateid);
+ memcpy(cursor, &netinteger, sizeof(netinteger));
+ cursor += sizeof(netinteger);
+ }
+
+ if (use_timed) {
+ netinteger = htonl(tick_remain(now_ms, ts->expire));
+ memcpy(cursor, &netinteger, sizeof(netinteger));
+ cursor += sizeof(netinteger);
+ }
+
+ /* encode the key */
+ if (st->table->type == SMP_T_STR) {
+ int stlen = strlen((char *)ts->key.key);
+
+ intencode(stlen, &cursor);
+ memcpy(cursor, ts->key.key, stlen);
+ cursor += stlen;
+ }
+ else if (st->table->type == SMP_T_SINT) {
+ netinteger = htonl(read_u32(ts->key.key));
+ memcpy(cursor, &netinteger, sizeof(netinteger));
+ cursor += sizeof(netinteger);
+ }
+ else {
+ memcpy(cursor, ts->key.key, st->table->key_size);
+ cursor += st->table->key_size;
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &ts->lock);
+ /* encode values */
+ for (data_type = 0 ; data_type < STKTABLE_DATA_TYPES ; data_type++) {
+
+ data_ptr = stktable_data_ptr(st->table, ts, data_type);
+ if (data_ptr) {
+ /* in case of array all elements use
+ * the same std_type and they are linearly
+ * encoded.
+ */
+ if (stktable_data_types[data_type].is_array) {
+ unsigned int idx = 0;
+
+ switch (stktable_data_types[data_type].std_type) {
+ case STD_T_SINT: {
+ int data;
+
+ do {
+ data = stktable_data_cast(data_ptr, std_t_sint);
+ intencode(data, &cursor);
+
+ data_ptr = stktable_data_ptr_idx(st->table, ts, data_type, ++idx);
+ } while(data_ptr);
+ break;
+ }
+ case STD_T_UINT: {
+ unsigned int data;
+
+ do {
+ data = stktable_data_cast(data_ptr, std_t_uint);
+ intencode(data, &cursor);
+
+ data_ptr = stktable_data_ptr_idx(st->table, ts, data_type, ++idx);
+ } while(data_ptr);
+ break;
+ }
+ case STD_T_ULL: {
+ unsigned long long data;
+
+ do {
+ data = stktable_data_cast(data_ptr, std_t_ull);
+ intencode(data, &cursor);
+
+ data_ptr = stktable_data_ptr_idx(st->table, ts, data_type, ++idx);
+ } while(data_ptr);
+ break;
+ }
+ case STD_T_FRQP: {
+ struct freq_ctr *frqp;
+
+ do {
+ frqp = &stktable_data_cast(data_ptr, std_t_frqp);
+ intencode((unsigned int)(now_ms - frqp->curr_tick), &cursor);
+ intencode(frqp->curr_ctr, &cursor);
+ intencode(frqp->prev_ctr, &cursor);
+
+ data_ptr = stktable_data_ptr_idx(st->table, ts, data_type, ++idx);
+ } while(data_ptr);
+ break;
+ }
+ }
+
+ /* array elements fully encoded
+ * proceed next data_type.
+ */
+ continue;
+ }
+ switch (stktable_data_types[data_type].std_type) {
+ case STD_T_SINT: {
+ int data;
+
+ data = stktable_data_cast(data_ptr, std_t_sint);
+ intencode(data, &cursor);
+ break;
+ }
+ case STD_T_UINT: {
+ unsigned int data;
+
+ data = stktable_data_cast(data_ptr, std_t_uint);
+ intencode(data, &cursor);
+ break;
+ }
+ case STD_T_ULL: {
+ unsigned long long data;
+
+ data = stktable_data_cast(data_ptr, std_t_ull);
+ intencode(data, &cursor);
+ break;
+ }
+ case STD_T_FRQP: {
+ struct freq_ctr *frqp;
+
+ frqp = &stktable_data_cast(data_ptr, std_t_frqp);
+ intencode((unsigned int)(now_ms - frqp->curr_tick), &cursor);
+ intencode(frqp->curr_ctr, &cursor);
+ intencode(frqp->prev_ctr, &cursor);
+ break;
+ }
+ case STD_T_DICT: {
+ struct dict_entry *de;
+ struct ebpt_node *cached_de;
+ struct dcache_tx_entry cde = { };
+ char *beg, *end;
+ size_t value_len, data_len;
+ struct dcache *dc;
+
+ de = stktable_data_cast(data_ptr, std_t_dict);
+ if (!de) {
+ /* No entry */
+ intencode(0, &cursor);
+ break;
+ }
+
+ dc = peer->dcache;
+ cde.entry.key = de;
+ cached_de = dcache_tx_insert(dc, &cde);
+ if (cached_de == &cde.entry) {
+ if (cde.id + 1 >= PEER_ENC_2BYTES_MIN)
+ break;
+ /* Encode the length of the remaining data -> 1 */
+ intencode(1, &cursor);
+ /* Encode the cache entry ID */
+ intencode(cde.id + 1, &cursor);
+ }
+ else {
+ /* Leave enough room to encode the remaining data length. */
+ end = beg = cursor + PEER_MSG_ENC_LENGTH_MAXLEN;
+ /* Encode the dictionary entry key */
+ intencode(cde.id + 1, &end);
+ /* Encode the length of the dictionary entry data */
+ value_len = de->len;
+ intencode(value_len, &end);
+ /* Copy the data */
+ memcpy(end, de->value.key, value_len);
+ end += value_len;
+ /* Encode the length of the data */
+ data_len = end - beg;
+ intencode(data_len, &cursor);
+ memmove(cursor, beg, data_len);
+ cursor += data_len;
+ }
+ break;
+ }
+ }
+ }
+ }
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ /* Compute datalen */
+ datalen = (cursor - datamsg);
+
+ /* prepare message header */
+ msg[0] = PEER_MSG_CLASS_STICKTABLE;
+ peer_set_update_msg_type(&msg[1], use_identifier, use_timed);
+ cursor = &msg[2];
+ intencode(datalen, &cursor);
+
+ /* move data after header */
+ memmove(cursor, datamsg, datalen);
+
+ /* return header size + data_len */
+ return (cursor - msg) + datalen;
+}
+
+/*
+ * This prepare the switch table message to targeted share table <st>.
+ * <msg> is a buffer of <size> to receive data message content
+ * If function returns 0, the caller should consider we were unable to encode this message (TODO:
+ * check size)
+ */
+static int peer_prepare_switchmsg(char *msg, size_t size, struct peer_prep_params *params)
+{
+ int len;
+ unsigned short datalen;
+ struct buffer *chunk;
+ char *cursor, *datamsg, *chunkp, *chunkq;
+ uint64_t data = 0;
+ unsigned int data_type;
+ struct shared_table *st;
+
+ st = params->swtch.shared_table;
+ cursor = datamsg = msg + PEER_MSG_HEADER_LEN + PEER_MSG_ENC_LENGTH_MAXLEN;
+
+ /* Encode data */
+
+ /* encode local id */
+ intencode(st->local_id, &cursor);
+
+ /* encode table name */
+ len = strlen(st->table->nid);
+ intencode(len, &cursor);
+ memcpy(cursor, st->table->nid, len);
+ cursor += len;
+
+ /* encode table type */
+
+ intencode(peer_net_key_type[st->table->type], &cursor);
+
+ /* encode table key size */
+ intencode(st->table->key_size, &cursor);
+
+ chunk = get_trash_chunk();
+ chunkp = chunkq = chunk->area;
+ /* encode available known data types in table */
+ for (data_type = 0 ; data_type < STKTABLE_DATA_TYPES ; data_type++) {
+ if (st->table->data_ofs[data_type]) {
+ /* stored data types parameters are all linearly encoded
+ * at the end of the 'table definition' message.
+ *
+ * Currently only array data_types and and data_types
+ * using freq_counter base type have parameters:
+ *
+ * - array has always at least one parameter set to the
+ * number of elements.
+ *
+ * - array of base-type freq_counters has an additional
+ * parameter set to the period used to compute those
+ * freq_counters.
+ *
+ * - simple freq counter has a parameter set to the period
+ * used to compute
+ *
+ * A set of parameter for a datatype MUST BE prefixed
+ * by the data-type id itself:
+ * This is useless because the data_types are ordered and
+ * the data_type bitfield already gives the information of
+ * stored types, but it was designed this way when the
+ * push of period parameter was added for freq counters
+ * and we don't want to break the compatibility.
+ *
+ */
+ if (stktable_data_types[data_type].is_array) {
+ /* This is an array type so we first encode
+ * the data_type itself to prefix parameters
+ */
+ intencode(data_type, &chunkq);
+
+ /* We encode the first parameter which is
+ * the number of elements of this array
+ */
+ intencode(st->table->data_nbelem[data_type], &chunkq);
+
+ /* for array of freq counters, there is an additional
+ * period parameter to encode
+ */
+ if (stktable_data_types[data_type].std_type == STD_T_FRQP)
+ intencode(st->table->data_arg[data_type].u, &chunkq);
+ }
+ else if (stktable_data_types[data_type].std_type == STD_T_FRQP) {
+ /* this datatype is a simple freq counter not part
+ * of an array. We encode the data_type itself
+ * to prefix the 'period' parameter
+ */
+ intencode(data_type, &chunkq);
+ intencode(st->table->data_arg[data_type].u, &chunkq);
+ }
+ /* set the bit corresponding to stored data type */
+ data |= 1ULL << data_type;
+ }
+ }
+ intencode(data, &cursor);
+
+ /* Encode stick-table entries duration. */
+ intencode(st->table->expire, &cursor);
+
+ if (chunkq > chunkp) {
+ chunk->data = chunkq - chunkp;
+ memcpy(cursor, chunk->area, chunk->data);
+ cursor += chunk->data;
+ }
+
+ /* Compute datalen */
+ datalen = (cursor - datamsg);
+
+ /* prepare message header */
+ msg[0] = PEER_MSG_CLASS_STICKTABLE;
+ msg[1] = PEER_MSG_STKT_DEFINE;
+ cursor = &msg[2];
+ intencode(datalen, &cursor);
+
+ /* move data after header */
+ memmove(cursor, datamsg, datalen);
+
+ /* return header size + data_len */
+ return (cursor - msg) + datalen;
+}
+
+/*
+ * This prepare the acknowledge message on the stick session <ts>, <st> is the considered
+ * stick table.
+ * <msg> is a buffer of <size> to receive data message content
+ * If function returns 0, the caller should consider we were unable to encode this message (TODO:
+ * check size)
+ */
+static int peer_prepare_ackmsg(char *msg, size_t size, struct peer_prep_params *p)
+{
+ unsigned short datalen;
+ char *cursor, *datamsg;
+ uint32_t netinteger;
+ struct shared_table *st;
+
+ cursor = datamsg = msg + PEER_MSG_HEADER_LEN + PEER_MSG_ENC_LENGTH_MAXLEN;
+
+ st = p->ack.shared_table;
+ intencode(st->remote_id, &cursor);
+ netinteger = htonl(st->last_get);
+ memcpy(cursor, &netinteger, sizeof(netinteger));
+ cursor += sizeof(netinteger);
+
+ /* Compute datalen */
+ datalen = (cursor - datamsg);
+
+ /* prepare message header */
+ msg[0] = PEER_MSG_CLASS_STICKTABLE;
+ msg[1] = PEER_MSG_STKT_ACK;
+ cursor = &msg[2];
+ intencode(datalen, &cursor);
+
+ /* move data after header */
+ memmove(cursor, datamsg, datalen);
+
+ /* return header size + data_len */
+ return (cursor - msg) + datalen;
+}
+
+/*
+ * Function to deinit connected peer
+ */
+void __peer_session_deinit(struct peer *peer)
+{
+ struct peers *peers = peer->peers;
+ int thr;
+
+ if (!peers || !peer->appctx)
+ return;
+
+ thr = my_ffsl(peer->appctx->t->thread_mask) - 1;
+ HA_ATOMIC_DEC(&peers->applet_count[thr]);
+
+ if (peer->appctx->st0 == PEER_SESS_ST_WAITMSG)
+ HA_ATOMIC_DEC(&connected_peers);
+
+ HA_ATOMIC_DEC(&active_peers);
+
+ flush_dcache(peer);
+
+ /* Re-init current table pointers to force announcement on re-connect */
+ peer->remote_table = peer->last_local_table = NULL;
+ peer->appctx = NULL;
+ if (peer->flags & PEER_F_LEARN_ASSIGN) {
+ /* unassign current peer for learning */
+ peer->flags &= ~(PEER_F_LEARN_ASSIGN);
+ peers->flags &= ~(PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS);
+
+ if (peer->local)
+ peers->flags |= PEERS_F_RESYNC_LOCALABORT;
+ else
+ peers->flags |= PEERS_F_RESYNC_REMOTEABORT;
+ /* reschedule a resync */
+ peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(5000));
+ }
+ /* reset teaching and learning flags to 0 */
+ peer->flags &= PEER_TEACH_RESET;
+ peer->flags &= PEER_LEARN_RESET;
+ task_wakeup(peers->sync_task, TASK_WOKEN_MSG);
+}
+
+static int peer_session_init(struct appctx *appctx)
+{
+ struct peer *peer = appctx->svcctx;
+ struct stream *s;
+ struct sockaddr_storage *addr = NULL;
+
+ if (!sockaddr_alloc(&addr, &peer->addr, sizeof(peer->addr)))
+ goto out_error;
+
+ if (appctx_finalize_startup(appctx, peer->peers->peers_fe, &BUF_NULL) == -1)
+ goto out_free_addr;
+
+ s = appctx_strm(appctx);
+ /* applet is waiting for data */
+ applet_need_more_data(appctx);
+ appctx_wakeup(appctx);
+
+ /* initiate an outgoing connection */
+ s->scb->dst = addr;
+ s->scb->flags |= SC_FL_NOLINGER;
+ s->flags = SF_ASSIGNED;
+ s->target = peer_session_target(peer, s);
+
+ s->do_log = NULL;
+ s->uniq_id = 0;
+
+ s->res.flags |= CF_READ_DONTWAIT;
+
+ _HA_ATOMIC_INC(&active_peers);
+ return 0;
+
+ out_free_addr:
+ sockaddr_free(&addr);
+ out_error:
+ return -1;
+}
+
+/*
+ * Callback to release a session with a peer
+ */
+static void peer_session_release(struct appctx *appctx)
+{
+ struct peer *peer = appctx->svcctx;
+
+ TRACE_PROTO("releasing peer session", PEERS_EV_SESSREL, NULL, peer);
+ /* appctx->svcctx is not a peer session */
+ if (appctx->st0 < PEER_SESS_ST_SENDSUCCESS)
+ return;
+
+ /* peer session identified */
+ if (peer) {
+ HA_SPIN_LOCK(PEER_LOCK, &peer->lock);
+ if (peer->appctx == appctx)
+ __peer_session_deinit(peer);
+ peer->flags &= ~PEER_F_ALIVE;
+ HA_SPIN_UNLOCK(PEER_LOCK, &peer->lock);
+ }
+}
+
+/* Retrieve the major and minor versions of peers protocol
+ * announced by a remote peer. <str> is a null-terminated
+ * string with the following format: "<maj_ver>.<min_ver>".
+ */
+static int peer_get_version(const char *str,
+ unsigned int *maj_ver, unsigned int *min_ver)
+{
+ unsigned int majv, minv;
+ const char *pos, *saved;
+ const char *end;
+
+ saved = pos = str;
+ end = str + strlen(str);
+
+ majv = read_uint(&pos, end);
+ if (saved == pos || *pos++ != '.')
+ return -1;
+
+ saved = pos;
+ minv = read_uint(&pos, end);
+ if (saved == pos || pos != end)
+ return -1;
+
+ *maj_ver = majv;
+ *min_ver = minv;
+
+ return 0;
+}
+
+/*
+ * Parse a line terminated by an optional '\r' character, followed by a mandatory
+ * '\n' character.
+ * Returns 1 if succeeded or 0 if a '\n' character could not be found, and -1 if
+ * a line could not be read because the communication channel is closed.
+ */
+static inline int peer_getline(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ int n;
+
+ n = co_getline(sc_oc(sc), trash.area, trash.size);
+ if (!n)
+ return 0;
+
+ if (n < 0 || trash.area[n - 1] != '\n') {
+ appctx->st0 = PEER_SESS_ST_END;
+ return -1;
+ }
+
+ if (n > 1 && (trash.area[n - 2] == '\r'))
+ trash.area[n - 2] = 0;
+ else
+ trash.area[n - 1] = 0;
+
+ co_skip(sc_oc(sc), n);
+
+ return n;
+}
+
+/*
+ * Send a message after having called <peer_prepare_msg> to build it.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_msg(struct appctx *appctx,
+ int (*peer_prepare_msg)(char *, size_t, struct peer_prep_params *),
+ struct peer_prep_params *params)
+{
+ int ret, msglen;
+ struct stconn *sc = appctx_sc(appctx);
+
+ msglen = peer_prepare_msg(trash.area, trash.size, params);
+ if (!msglen) {
+ /* internal error: message does not fit in trash */
+ appctx->st0 = PEER_SESS_ST_END;
+ return 0;
+ }
+
+ /* message to buffer */
+ ret = ci_putblk(sc_ic(sc), trash.area, msglen);
+ if (ret <= 0) {
+ if (ret == -1) {
+ /* No more write possible */
+ sc_need_room(sc);
+ return -1;
+ }
+ appctx->st0 = PEER_SESS_ST_END;
+ }
+
+ return ret;
+}
+
+/*
+ * Send a hello message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_hellomsg(struct appctx *appctx, struct peer *peer)
+{
+ struct peer_prep_params p = {
+ .hello.peer = peer,
+ };
+
+ return peer_send_msg(appctx, peer_prepare_hellomsg, &p);
+}
+
+/*
+ * Send a success peer handshake status message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_status_successmsg(struct appctx *appctx)
+{
+ return peer_send_msg(appctx, peer_prepare_status_successmsg, NULL);
+}
+
+/*
+ * Send a peer handshake status error message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_status_errormsg(struct appctx *appctx)
+{
+ struct peer_prep_params p = {
+ .error_status.st1 = appctx->st1,
+ };
+
+ return peer_send_msg(appctx, peer_prepare_status_errormsg, &p);
+}
+
+/*
+ * Send a stick-table switch message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_switchmsg(struct shared_table *st, struct appctx *appctx)
+{
+ struct peer_prep_params p = {
+ .swtch.shared_table = st,
+ };
+
+ return peer_send_msg(appctx, peer_prepare_switchmsg, &p);
+}
+
+/*
+ * Send a stick-table update acknowledgement message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_ackmsg(struct shared_table *st, struct appctx *appctx)
+{
+ struct peer_prep_params p = {
+ .ack.shared_table = st,
+ };
+
+ return peer_send_msg(appctx, peer_prepare_ackmsg, &p);
+}
+
+/*
+ * Send a stick-table update message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_updatemsg(struct shared_table *st, struct appctx *appctx, struct stksess *ts,
+ unsigned int updateid, int use_identifier, int use_timed)
+{
+ struct peer_prep_params p = {
+ .updt = {
+ .stksess = ts,
+ .shared_table = st,
+ .updateid = updateid,
+ .use_identifier = use_identifier,
+ .use_timed = use_timed,
+ .peer = appctx->svcctx,
+ },
+ };
+
+ return peer_send_msg(appctx, peer_prepare_updatemsg, &p);
+}
+
+/*
+ * Build a peer protocol control class message.
+ * Returns the number of written bytes used to build the message if succeeded,
+ * 0 if not.
+ */
+static int peer_prepare_control_msg(char *msg, size_t size, struct peer_prep_params *p)
+{
+ if (size < sizeof p->control.head)
+ return 0;
+
+ msg[0] = p->control.head[0];
+ msg[1] = p->control.head[1];
+
+ return 2;
+}
+
+/*
+ * Send a stick-table synchronization request message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appctx st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_resync_reqmsg(struct appctx *appctx,
+ struct peer *peer, struct peers *peers)
+{
+ struct peer_prep_params p = {
+ .control.head = { PEER_MSG_CLASS_CONTROL, PEER_MSG_CTRL_RESYNCREQ, },
+ };
+
+ TRACE_PROTO("send control message", PEERS_EV_CTRLMSG,
+ NULL, &p.control.head[1], peers->local->id, peer->id);
+
+ return peer_send_msg(appctx, peer_prepare_control_msg, &p);
+}
+
+/*
+ * Send a stick-table synchronization confirmation message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appctx st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_resync_confirmsg(struct appctx *appctx,
+ struct peer *peer, struct peers *peers)
+{
+ struct peer_prep_params p = {
+ .control.head = { PEER_MSG_CLASS_CONTROL, PEER_MSG_CTRL_RESYNCCONFIRM, },
+ };
+
+ TRACE_PROTO("send control message", PEERS_EV_CTRLMSG,
+ NULL, &p.control.head[1], peers->local->id, peer->id);
+
+ return peer_send_msg(appctx, peer_prepare_control_msg, &p);
+}
+
+/*
+ * Send a stick-table synchronization finished message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appctx st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_resync_finishedmsg(struct appctx *appctx,
+ struct peer *peer, struct peers *peers)
+{
+ struct peer_prep_params p = {
+ .control.head = { PEER_MSG_CLASS_CONTROL, },
+ };
+
+ p.control.head[1] = (peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FINISHED ?
+ PEER_MSG_CTRL_RESYNCFINISHED : PEER_MSG_CTRL_RESYNCPARTIAL;
+
+ TRACE_PROTO("send control message", PEERS_EV_CTRLMSG,
+ NULL, &p.control.head[1], peers->local->id, peer->id);
+
+ return peer_send_msg(appctx, peer_prepare_control_msg, &p);
+}
+
+/*
+ * Send a heartbeat message.
+ * Return 0 if the message could not be built modifying the appctx st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appctx st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_heartbeatmsg(struct appctx *appctx,
+ struct peer *peer, struct peers *peers)
+{
+ struct peer_prep_params p = {
+ .control.head = { PEER_MSG_CLASS_CONTROL, PEER_MSG_CTRL_HEARTBEAT, },
+ };
+
+ TRACE_PROTO("send control message", PEERS_EV_CTRLMSG,
+ NULL, &p.control.head[1], peers->local->id, peer->id);
+
+ return peer_send_msg(appctx, peer_prepare_control_msg, &p);
+}
+
+/*
+ * Build a peer protocol error class message.
+ * Returns the number of written bytes used to build the message if succeeded,
+ * 0 if not.
+ */
+static int peer_prepare_error_msg(char *msg, size_t size, struct peer_prep_params *p)
+{
+ if (size < sizeof p->error.head)
+ return 0;
+
+ msg[0] = p->error.head[0];
+ msg[1] = p->error.head[1];
+
+ return 2;
+}
+
+/*
+ * Send a "size limit reached" error message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appctx st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_error_size_limitmsg(struct appctx *appctx)
+{
+ struct peer_prep_params p = {
+ .error.head = { PEER_MSG_CLASS_ERROR, PEER_MSG_ERR_SIZELIMIT, },
+ };
+
+ return peer_send_msg(appctx, peer_prepare_error_msg, &p);
+}
+
+/*
+ * Send a "peer protocol" error message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appctx st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_error_protomsg(struct appctx *appctx)
+{
+ struct peer_prep_params p = {
+ .error.head = { PEER_MSG_CLASS_ERROR, PEER_MSG_ERR_PROTOCOL, },
+ };
+
+ return peer_send_msg(appctx, peer_prepare_error_msg, &p);
+}
+
+/*
+ * Function used to lookup for recent stick-table updates associated with
+ * <st> shared stick-table when a lesson must be taught a peer (PEER_F_LEARN_ASSIGN flag set).
+ */
+static inline struct stksess *peer_teach_process_stksess_lookup(struct shared_table *st)
+{
+ struct eb32_node *eb;
+
+ eb = eb32_lookup_ge(&st->table->updates, st->last_pushed+1);
+ if (!eb) {
+ eb = eb32_first(&st->table->updates);
+ if (!eb || (eb->key == st->last_pushed)) {
+ st->table->commitupdate = st->last_pushed = st->table->localupdate;
+ return NULL;
+ }
+ }
+
+ /* if distance between the last pushed and the retrieved key
+ * is greater than the distance last_pushed and the local_update
+ * this means we are beyond localupdate.
+ */
+ if ((eb->key - st->last_pushed) > (st->table->localupdate - st->last_pushed)) {
+ st->table->commitupdate = st->last_pushed = st->table->localupdate;
+ return NULL;
+ }
+
+ return eb32_entry(eb, struct stksess, upd);
+}
+
+/*
+ * Function used to lookup for recent stick-table updates associated with
+ * <st> shared stick-table during teach state 1 step.
+ */
+static inline struct stksess *peer_teach_stage1_stksess_lookup(struct shared_table *st)
+{
+ struct eb32_node *eb;
+
+ eb = eb32_lookup_ge(&st->table->updates, st->last_pushed+1);
+ if (!eb) {
+ st->flags |= SHTABLE_F_TEACH_STAGE1;
+ eb = eb32_first(&st->table->updates);
+ if (eb)
+ st->last_pushed = eb->key - 1;
+ return NULL;
+ }
+
+ return eb32_entry(eb, struct stksess, upd);
+}
+
+/*
+ * Function used to lookup for recent stick-table updates associated with
+ * <st> shared stick-table during teach state 2 step.
+ */
+static inline struct stksess *peer_teach_stage2_stksess_lookup(struct shared_table *st)
+{
+ struct eb32_node *eb;
+
+ eb = eb32_lookup_ge(&st->table->updates, st->last_pushed+1);
+ if (!eb || eb->key > st->teaching_origin) {
+ st->flags |= SHTABLE_F_TEACH_STAGE2;
+ return NULL;
+ }
+
+ return eb32_entry(eb, struct stksess, upd);
+}
+
+/*
+ * Generic function to emit update messages for <st> stick-table when a lesson must
+ * be taught to the peer <p>.
+ * <locked> must be set to 1 if the shared table <st> is already locked when entering
+ * this function, 0 if not.
+ *
+ * This function temporary unlock/lock <st> when it sends stick-table updates or
+ * when decrementing its refcount in case of any error when it sends this updates.
+ *
+ * Return 0 if any message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ * If it returns 0 or -1, this function leave <st> locked if already locked when entering this function
+ * unlocked if not already locked when entering this function.
+ */
+static inline int peer_send_teachmsgs(struct appctx *appctx, struct peer *p,
+ struct stksess *(*peer_stksess_lookup)(struct shared_table *),
+ struct shared_table *st, int locked)
+{
+ int ret, new_pushed, use_timed;
+ int updates_sent = 0;
+
+ ret = 1;
+ use_timed = 0;
+ if (st != p->last_local_table) {
+ ret = peer_send_switchmsg(st, appctx);
+ if (ret <= 0)
+ return ret;
+
+ p->last_local_table = st;
+ }
+
+ if (peer_stksess_lookup != peer_teach_process_stksess_lookup)
+ use_timed = !(p->flags & PEER_F_DWNGRD);
+
+ /* We force new pushed to 1 to force identifier in update message */
+ new_pushed = 1;
+
+ if (!locked)
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &st->table->lock);
+
+ while (1) {
+ struct stksess *ts;
+ unsigned updateid;
+
+ /* push local updates */
+ ts = peer_stksess_lookup(st);
+ if (!ts) {
+ ret = 1; // done
+ break;
+ }
+
+ updateid = ts->upd.key;
+ ts->ref_cnt++;
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &st->table->lock);
+
+ ret = peer_send_updatemsg(st, appctx, ts, updateid, new_pushed, use_timed);
+ if (ret <= 0) {
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &st->table->lock);
+ ts->ref_cnt--;
+ break;
+ }
+
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &st->table->lock);
+ ts->ref_cnt--;
+ st->last_pushed = updateid;
+
+ if (peer_stksess_lookup == peer_teach_process_stksess_lookup &&
+ (int)(st->last_pushed - st->table->commitupdate) > 0)
+ st->table->commitupdate = st->last_pushed;
+
+ /* identifier may not needed in next update message */
+ new_pushed = 0;
+
+ updates_sent++;
+ if (updates_sent >= peers_max_updates_at_once) {
+ /* pretend we're full so that we get back ASAP */
+ struct stconn *sc = appctx_sc(appctx);
+
+ sc_need_room(sc);
+ ret = -1;
+ break;
+ }
+ }
+
+ out:
+ if (!locked)
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &st->table->lock);
+ return ret;
+}
+
+/*
+ * Function to emit update messages for <st> stick-table when a lesson must
+ * be taught to the peer <p> (PEER_F_LEARN_ASSIGN flag set).
+ *
+ * Note that <st> shared stick-table is locked when calling this function.
+ *
+ * Return 0 if any message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_teach_process_msgs(struct appctx *appctx, struct peer *p,
+ struct shared_table *st)
+{
+ return peer_send_teachmsgs(appctx, p, peer_teach_process_stksess_lookup, st, 1);
+}
+
+/*
+ * Function to emit update messages for <st> stick-table when a lesson must
+ * be taught to the peer <p> during teach state 1 step.
+ *
+ * Return 0 if any message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_teach_stage1_msgs(struct appctx *appctx, struct peer *p,
+ struct shared_table *st)
+{
+ return peer_send_teachmsgs(appctx, p, peer_teach_stage1_stksess_lookup, st, 0);
+}
+
+/*
+ * Function to emit update messages for <st> stick-table when a lesson must
+ * be taught to the peer <p> during teach state 1 step.
+ *
+ * Return 0 if any message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_teach_stage2_msgs(struct appctx *appctx, struct peer *p,
+ struct shared_table *st)
+{
+ return peer_send_teachmsgs(appctx, p, peer_teach_stage2_stksess_lookup, st, 0);
+}
+
+
+/*
+ * Function used to parse a stick-table update message after it has been received
+ * by <p> peer with <msg_cur> as address of the pointer to the position in the
+ * receipt buffer with <msg_end> being position of the end of the stick-table message.
+ * Update <msg_curr> accordingly to the peer protocol specs if no peer protocol error
+ * was encountered.
+ * <exp> must be set if the stick-table entry expires.
+ * <updt> must be set for PEER_MSG_STKT_UPDATE or PEER_MSG_STKT_UPDATE_TIMED stick-table
+ * messages, in this case the stick-table update message is received with a stick-table
+ * update ID.
+ * <totl> is the length of the stick-table update message computed upon receipt.
+ */
+static int peer_treat_updatemsg(struct appctx *appctx, struct peer *p, int updt, int exp,
+ char **msg_cur, char *msg_end, int msg_len, int totl)
+{
+ struct shared_table *st = p->remote_table;
+ struct stksess *ts, *newts;
+ uint32_t update;
+ int expire;
+ unsigned int data_type;
+ void *data_ptr;
+
+ TRACE_ENTER(PEERS_EV_UPDTMSG, NULL, p);
+ /* Here we have data message */
+ if (!st)
+ goto ignore_msg;
+
+ expire = MS_TO_TICKS(st->table->expire);
+
+ if (updt) {
+ if (msg_len < sizeof(update)) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_exit;
+ }
+
+ memcpy(&update, *msg_cur, sizeof(update));
+ *msg_cur += sizeof(update);
+ st->last_get = htonl(update);
+ }
+ else {
+ st->last_get++;
+ }
+
+ if (exp) {
+ size_t expire_sz = sizeof expire;
+
+ if (*msg_cur + expire_sz > msg_end) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, *msg_cur);
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, msg_end, &expire_sz);
+ goto malformed_exit;
+ }
+
+ memcpy(&expire, *msg_cur, expire_sz);
+ *msg_cur += expire_sz;
+ expire = ntohl(expire);
+ }
+
+ newts = stksess_new(st->table, NULL);
+ if (!newts)
+ goto ignore_msg;
+
+ if (st->table->type == SMP_T_STR) {
+ unsigned int to_read, to_store;
+
+ to_read = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_free_newts;
+ }
+
+ to_store = MIN(to_read, st->table->key_size - 1);
+ if (*msg_cur + to_store > msg_end) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, *msg_cur);
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, msg_end, &to_store);
+ goto malformed_free_newts;
+ }
+
+ memcpy(newts->key.key, *msg_cur, to_store);
+ newts->key.key[to_store] = 0;
+ *msg_cur += to_read;
+ }
+ else if (st->table->type == SMP_T_SINT) {
+ unsigned int netinteger;
+
+ if (*msg_cur + sizeof(netinteger) > msg_end) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, *msg_cur);
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, msg_end);
+ goto malformed_free_newts;
+ }
+
+ memcpy(&netinteger, *msg_cur, sizeof(netinteger));
+ netinteger = ntohl(netinteger);
+ memcpy(newts->key.key, &netinteger, sizeof(netinteger));
+ *msg_cur += sizeof(netinteger);
+ }
+ else {
+ if (*msg_cur + st->table->key_size > msg_end) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, *msg_cur);
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, msg_end, &st->table->key_size);
+ goto malformed_free_newts;
+ }
+
+ memcpy(newts->key.key, *msg_cur, st->table->key_size);
+ *msg_cur += st->table->key_size;
+ }
+
+ /* lookup for existing entry */
+ ts = stktable_set_entry(st->table, newts);
+ if (ts != newts) {
+ stksess_free(st->table, newts);
+ newts = NULL;
+ }
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ for (data_type = 0 ; data_type < STKTABLE_DATA_TYPES ; data_type++) {
+ uint64_t decoded_int;
+ unsigned int idx;
+ int ignore;
+
+ if (!((1ULL << data_type) & st->remote_data))
+ continue;
+
+ ignore = stktable_data_types[data_type].is_local;
+
+ if (stktable_data_types[data_type].is_array) {
+ /* in case of array all elements
+ * use the same std_type and they
+ * are linearly encoded.
+ * The number of elements was provided
+ * by table definition message
+ */
+ switch (stktable_data_types[data_type].std_type) {
+ case STD_T_SINT:
+ for (idx = 0; idx < st->remote_data_nbelem[data_type]; idx++) {
+ decoded_int = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data_ptr = stktable_data_ptr_idx(st->table, ts, data_type, idx);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_sint) = decoded_int;
+ }
+ break;
+ case STD_T_UINT:
+ for (idx = 0; idx < st->remote_data_nbelem[data_type]; idx++) {
+ decoded_int = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data_ptr = stktable_data_ptr_idx(st->table, ts, data_type, idx);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_uint) = decoded_int;
+ }
+ break;
+ case STD_T_ULL:
+ for (idx = 0; idx < st->remote_data_nbelem[data_type]; idx++) {
+ decoded_int = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data_ptr = stktable_data_ptr_idx(st->table, ts, data_type, idx);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_ull) = decoded_int;
+ }
+ break;
+ case STD_T_FRQP:
+ for (idx = 0; idx < st->remote_data_nbelem[data_type]; idx++) {
+ struct freq_ctr data;
+
+ /* First bit is reserved for the freq_ctr lock
+ * Note: here we're still protected by the stksess lock
+ * so we don't need to update the update the freq_ctr
+ * using its internal lock.
+ */
+
+ decoded_int = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data.curr_tick = tick_add(now_ms, -decoded_int) & ~0x1;
+ data.curr_ctr = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data.prev_ctr = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data_ptr = stktable_data_ptr_idx(st->table, ts, data_type, idx);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_frqp) = data;
+ }
+ break;
+ }
+
+ /* array is fully decoded
+ * proceed next data_type.
+ */
+ continue;
+ }
+ decoded_int = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ switch (stktable_data_types[data_type].std_type) {
+ case STD_T_SINT:
+ data_ptr = stktable_data_ptr(st->table, ts, data_type);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_sint) = decoded_int;
+ break;
+
+ case STD_T_UINT:
+ data_ptr = stktable_data_ptr(st->table, ts, data_type);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_uint) = decoded_int;
+ break;
+
+ case STD_T_ULL:
+ data_ptr = stktable_data_ptr(st->table, ts, data_type);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_ull) = decoded_int;
+ break;
+
+ case STD_T_FRQP: {
+ struct freq_ctr data;
+
+ /* First bit is reserved for the freq_ctr lock
+ Note: here we're still protected by the stksess lock
+ so we don't need to update the update the freq_ctr
+ using its internal lock.
+ */
+
+ data.curr_tick = tick_add(now_ms, -decoded_int) & ~0x1;
+ data.curr_ctr = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data.prev_ctr = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data_ptr = stktable_data_ptr(st->table, ts, data_type);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_frqp) = data;
+ break;
+ }
+ case STD_T_DICT: {
+ struct buffer *chunk;
+ size_t data_len, value_len;
+ unsigned int id;
+ struct dict_entry *de;
+ struct dcache *dc;
+ char *end;
+
+ if (!decoded_int) {
+ /* No entry. */
+ break;
+ }
+ data_len = decoded_int;
+ if (*msg_cur + data_len > msg_end) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, *msg_cur);
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, msg_end, &data_len);
+ goto malformed_unlock;
+ }
+
+ /* Compute the end of the current data, <msg_end> being at the end of
+ * the entire message.
+ */
+ end = *msg_cur + data_len;
+ id = intdecode(msg_cur, end);
+ if (!*msg_cur || !id) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, *msg_cur, &id);
+ goto malformed_unlock;
+ }
+
+ dc = p->dcache;
+ if (*msg_cur == end) {
+ /* Dictionary entry key without value. */
+ if (id > dc->max_entries) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, NULL, &id);
+ goto malformed_unlock;
+ }
+ /* IDs sent over the network are numbered from 1. */
+ de = dc->rx[id - 1].de;
+ }
+ else {
+ chunk = get_trash_chunk();
+ value_len = intdecode(msg_cur, end);
+ if (!*msg_cur || *msg_cur + value_len > end ||
+ unlikely(value_len + 1 >= chunk->size)) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, *msg_cur, &value_len);
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, end, &chunk->size);
+ goto malformed_unlock;
+ }
+
+ chunk_memcpy(chunk, *msg_cur, value_len);
+ chunk->area[chunk->data] = '\0';
+ *msg_cur += value_len;
+
+ de = dict_insert(&server_key_dict, chunk->area);
+ dict_entry_unref(&server_key_dict, dc->rx[id - 1].de);
+ dc->rx[id - 1].de = de;
+ }
+ if (de) {
+ data_ptr = stktable_data_ptr(st->table, ts, data_type);
+ if (data_ptr && !ignore) {
+ HA_ATOMIC_INC(&de->refcount);
+ stktable_data_cast(data_ptr, std_t_dict) = de;
+ }
+ }
+ break;
+ }
+ }
+ }
+ /* Force new expiration */
+ ts->expire = tick_add(now_ms, expire);
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_touch_remote(st->table, ts, 1);
+
+ ignore_msg:
+ TRACE_LEAVE(PEERS_EV_UPDTMSG, NULL, p);
+ return 1;
+
+ malformed_unlock:
+ /* malformed message */
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_touch_remote(st->table, ts, 1);
+ appctx->st0 = PEER_SESS_ST_ERRPROTO;
+ TRACE_DEVEL("leaving in error", PEERS_EV_UPDTMSG);
+ return 0;
+
+ malformed_free_newts:
+ /* malformed message */
+ stksess_free(st->table, newts);
+ malformed_exit:
+ appctx->st0 = PEER_SESS_ST_ERRPROTO;
+ TRACE_DEVEL("leaving in error", PEERS_EV_UPDTMSG);
+ return 0;
+}
+
+/*
+ * Function used to parse a stick-table update acknowledgement message after it
+ * has been received by <p> peer with <msg_cur> as address of the pointer to the position in the
+ * receipt buffer with <msg_end> being the position of the end of the stick-table message.
+ * Update <msg_curr> accordingly to the peer protocol specs if no peer protocol error
+ * was encountered.
+ * Return 1 if succeeded, 0 if not with the appctx state st0 set to PEER_SESS_ST_ERRPROTO.
+ */
+static inline int peer_treat_ackmsg(struct appctx *appctx, struct peer *p,
+ char **msg_cur, char *msg_end)
+{
+ /* ack message */
+ uint32_t table_id ;
+ uint32_t update;
+ struct shared_table *st;
+
+ /* ignore ack during teaching process */
+ if (p->flags & PEER_F_TEACH_PROCESS)
+ return 1;
+
+ table_id = intdecode(msg_cur, msg_end);
+ if (!*msg_cur || (*msg_cur + sizeof(update) > msg_end)) {
+ /* malformed message */
+
+ TRACE_PROTO("malformed message", PEERS_EV_ACKMSG,
+ NULL, p, *msg_cur);
+ appctx->st0 = PEER_SESS_ST_ERRPROTO;
+ return 0;
+ }
+
+ memcpy(&update, *msg_cur, sizeof(update));
+ update = ntohl(update);
+
+ for (st = p->tables; st; st = st->next) {
+ if (st->local_id == table_id) {
+ st->update = update;
+ break;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * Function used to parse a stick-table switch message after it has been received
+ * by <p> peer with <msg_cur> as address of the pointer to the position in the
+ * receipt buffer with <msg_end> being the position of the end of the stick-table message.
+ * Update <msg_curr> accordingly to the peer protocol specs if no peer protocol error
+ * was encountered.
+ * Return 1 if succeeded, 0 if not with the appctx state st0 set to PEER_SESS_ST_ERRPROTO.
+ */
+static inline int peer_treat_switchmsg(struct appctx *appctx, struct peer *p,
+ char **msg_cur, char *msg_end)
+{
+ struct shared_table *st;
+ int table_id;
+
+ table_id = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_SWTCMSG, NULL, p);
+ /* malformed message */
+ appctx->st0 = PEER_SESS_ST_ERRPROTO;
+ return 0;
+ }
+
+ p->remote_table = NULL;
+ for (st = p->tables; st; st = st->next) {
+ if (st->remote_id == table_id) {
+ p->remote_table = st;
+ break;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * Function used to parse a stick-table definition message after it has been received
+ * by <p> peer with <msg_cur> as address of the pointer to the position in the
+ * receipt buffer with <msg_end> being the position of the end of the stick-table message.
+ * Update <msg_curr> accordingly to the peer protocol specs if no peer protocol error
+ * was encountered.
+ * <totl> is the length of the stick-table update message computed upon receipt.
+ * Return 1 if succeeded, 0 if not with the appctx state st0 set to PEER_SESS_ST_ERRPROTO.
+ */
+static inline int peer_treat_definemsg(struct appctx *appctx, struct peer *p,
+ char **msg_cur, char *msg_end, int totl)
+{
+ int table_id_len;
+ struct shared_table *st;
+ int table_type;
+ int table_keylen;
+ int table_id;
+ uint64_t table_data;
+
+ table_id = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_DEFMSG, NULL, p);
+ goto malformed_exit;
+ }
+
+ table_id_len = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_DEFMSG, NULL, p, *msg_cur);
+ goto malformed_exit;
+ }
+
+ p->remote_table = NULL;
+ if (!table_id_len || (*msg_cur + table_id_len) >= msg_end) {
+ TRACE_PROTO("malformed message", PEERS_EV_DEFMSG, NULL, p, *msg_cur, &table_id_len);
+ goto malformed_exit;
+ }
+
+ for (st = p->tables; st; st = st->next) {
+ /* Reset IDs */
+ if (st->remote_id == table_id)
+ st->remote_id = 0;
+
+ if (!p->remote_table && (table_id_len == strlen(st->table->nid)) &&
+ (memcmp(st->table->nid, *msg_cur, table_id_len) == 0))
+ p->remote_table = st;
+ }
+
+ if (!p->remote_table) {
+ TRACE_PROTO("ignored message", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+
+ *msg_cur += table_id_len;
+ if (*msg_cur >= msg_end) {
+ TRACE_PROTO("malformed message", PEERS_EV_DEFMSG, NULL, p);
+ goto malformed_exit;
+ }
+
+ table_type = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_DEFMSG, NULL, p);
+ goto malformed_exit;
+ }
+
+ table_keylen = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_DEFMSG, NULL, p);
+ goto malformed_exit;
+ }
+
+ table_data = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_DEFMSG, NULL, p);
+ goto malformed_exit;
+ }
+
+ if (p->remote_table->table->type != peer_int_key_type[table_type]
+ || p->remote_table->table->key_size != table_keylen) {
+ p->remote_table = NULL;
+ TRACE_PROTO("ignored message", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+
+ /* Check if there there is the additional expire data */
+ intdecode(msg_cur, msg_end);
+ if (*msg_cur) {
+ uint64_t data_type;
+ uint64_t type;
+
+ /* This define contains the expire data so we consider
+ * it also contain all data_types parameters.
+ */
+ for (data_type = 0; data_type < STKTABLE_DATA_TYPES; data_type++) {
+ if (table_data & (1ULL << data_type)) {
+ if (stktable_data_types[data_type].is_array) {
+ /* This should be an array
+ * so we parse the data_type prefix
+ * because we must have parameters.
+ */
+ type = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ p->remote_table = NULL;
+ TRACE_PROTO("missing meta data for array", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+
+ /* check if the data_type match the current from the bitfield */
+ if (type != data_type) {
+ p->remote_table = NULL;
+ TRACE_PROTO("meta data mismatch type", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+
+ /* decode the nbelem of the array */
+ p->remote_table->remote_data_nbelem[type] = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ p->remote_table = NULL;
+ TRACE_PROTO("missing array size meta data for array", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+
+ /* if it is an array of frqp, we must also have the period to decode */
+ if (stktable_data_types[data_type].std_type == STD_T_FRQP) {
+ intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ p->remote_table = NULL;
+ TRACE_PROTO("missing period for frqp", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+ }
+ }
+ else if (stktable_data_types[data_type].std_type == STD_T_FRQP) {
+ /* This should be a std freq counter data_type
+ * so we parse the data_type prefix
+ * because we must have parameters.
+ */
+ type = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ p->remote_table = NULL;
+ TRACE_PROTO("missing meta data for frqp", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+
+ /* check if the data_type match the current from the bitfield */
+ if (type != data_type) {
+ p->remote_table = NULL;
+ TRACE_PROTO("meta data mismatch type", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+
+ /* decode the period */
+ intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ p->remote_table = NULL;
+ TRACE_PROTO("missing period for frqp", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+ }
+ }
+ }
+ }
+ else {
+ uint64_t data_type;
+
+ /* There is not additional data but
+ * array size parameter is mandatory to parse array
+ * so we consider an error if an array data_type is define
+ * but there is no additional data.
+ */
+ for (data_type = 0; data_type < STKTABLE_DATA_TYPES; data_type++) {
+ if (table_data & (1ULL << data_type)) {
+ if (stktable_data_types[data_type].is_array) {
+ p->remote_table = NULL;
+ TRACE_PROTO("missing array size meta data for array", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+ }
+ }
+ }
+
+ p->remote_table->remote_data = table_data;
+ p->remote_table->remote_id = table_id;
+
+ ignore_msg:
+ return 1;
+
+ malformed_exit:
+ /* malformed message */
+ appctx->st0 = PEER_SESS_ST_ERRPROTO;
+ return 0;
+}
+
+/*
+ * Receive a stick-table message or pre-parse any other message.
+ * The message's header will be sent into <msg_head> which must be at least
+ * <msg_head_sz> bytes long (at least 7 to store 32-bit variable lengths).
+ * The first two bytes are always read, and the rest is only read if the
+ * first bytes indicate a stick-table message. If the message is a stick-table
+ * message, the varint is decoded and the equivalent number of bytes will be
+ * copied into the trash at trash.area. <totl> is incremented by the number of
+ * bytes read EVEN IN CASE OF INCOMPLETE MESSAGES.
+ * Returns 1 if there was no error, if not, returns 0 if not enough data were available,
+ * -1 if there was an error updating the appctx state st0 accordingly.
+ */
+static inline int peer_recv_msg(struct appctx *appctx, char *msg_head, size_t msg_head_sz,
+ uint32_t *msg_len, int *totl)
+{
+ int reql;
+ struct stconn *sc = appctx_sc(appctx);
+ char *cur;
+
+ reql = co_getblk(sc_oc(sc), msg_head, 2 * sizeof(char), *totl);
+ if (reql <= 0) /* closed or EOL not found */
+ goto incomplete;
+
+ *totl += reql;
+
+ if (!(msg_head[1] & PEER_MSG_STKT_BIT_MASK))
+ return 1;
+
+ /* This is a stick-table message, let's go on */
+
+ /* Read and Decode message length */
+ msg_head += *totl;
+ msg_head_sz -= *totl;
+ reql = co_data(sc_oc(sc)) - *totl;
+ if (reql > msg_head_sz)
+ reql = msg_head_sz;
+
+ reql = co_getblk(sc_oc(sc), msg_head, reql, *totl);
+ if (reql <= 0) /* closed */
+ goto incomplete;
+
+ cur = msg_head;
+ *msg_len = intdecode(&cur, cur + reql);
+ if (!cur) {
+ /* the number is truncated, did we read enough ? */
+ if (reql < msg_head_sz)
+ goto incomplete;
+
+ /* malformed message */
+ TRACE_PROTO("malformed message: too large length encoding", PEERS_EV_UPDTMSG);
+ appctx->st0 = PEER_SESS_ST_ERRPROTO;
+ return -1;
+ }
+ *totl += cur - msg_head;
+
+ /* Read message content */
+ if (*msg_len) {
+ if (*msg_len > trash.size) {
+ /* Status code is not success, abort */
+ appctx->st0 = PEER_SESS_ST_ERRSIZE;
+ return -1;
+ }
+
+ reql = co_getblk(sc_oc(sc), trash.area, *msg_len, *totl);
+ if (reql <= 0) /* closed */
+ goto incomplete;
+ *totl += reql;
+ }
+
+ return 1;
+
+ incomplete:
+ if (reql < 0 || (sc_oc(sc)->flags & (CF_SHUTW|CF_SHUTW_NOW))) {
+ /* there was an error or the message was truncated */
+ appctx->st0 = PEER_SESS_ST_END;
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Treat the awaited message with <msg_head> as header.*
+ * Return 1 if succeeded, 0 if not.
+ */
+static inline int peer_treat_awaited_msg(struct appctx *appctx, struct peer *peer, unsigned char *msg_head,
+ char **msg_cur, char *msg_end, int msg_len, int totl)
+{
+ struct peers *peers = peer->peers;
+
+ if (msg_head[0] == PEER_MSG_CLASS_CONTROL) {
+ if (msg_head[1] == PEER_MSG_CTRL_RESYNCREQ) {
+ struct shared_table *st;
+ /* Reset message: remote need resync */
+
+ TRACE_PROTO("received control message", PEERS_EV_CTRLMSG,
+ NULL, &msg_head[1], peers->local->id, peer->id);
+ /* prepare tables for a global push */
+ for (st = peer->tables; st; st = st->next) {
+ st->teaching_origin = st->last_pushed = st->update;
+ st->flags = 0;
+ }
+
+ /* reset teaching flags to 0 */
+ peer->flags &= PEER_TEACH_RESET;
+
+ /* flag to start to teach lesson */
+ peer->flags |= PEER_F_TEACH_PROCESS;
+ peers->flags |= PEERS_F_RESYNC_REQUESTED;
+ }
+ else if (msg_head[1] == PEER_MSG_CTRL_RESYNCFINISHED) {
+ TRACE_PROTO("received control message", PEERS_EV_CTRLMSG,
+ NULL, &msg_head[1], peers->local->id, peer->id);
+ if (peer->flags & PEER_F_LEARN_ASSIGN) {
+ peer->flags &= ~PEER_F_LEARN_ASSIGN;
+ peers->flags &= ~(PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS);
+ peers->flags |= (PEERS_F_RESYNC_LOCAL|PEERS_F_RESYNC_REMOTE);
+ if (peer->local)
+ peers->flags |= PEERS_F_RESYNC_LOCALFINISHED;
+ else
+ peers->flags |= PEERS_F_RESYNC_REMOTEFINISHED;
+ }
+ peer->confirm++;
+ }
+ else if (msg_head[1] == PEER_MSG_CTRL_RESYNCPARTIAL) {
+ TRACE_PROTO("received control message", PEERS_EV_CTRLMSG,
+ NULL, &msg_head[1], peers->local->id, peer->id);
+ if (peer->flags & PEER_F_LEARN_ASSIGN) {
+ peer->flags &= ~PEER_F_LEARN_ASSIGN;
+ peers->flags &= ~(PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS);
+
+ if (peer->local)
+ peers->flags |= PEERS_F_RESYNC_LOCALPARTIAL;
+ else
+ peers->flags |= PEERS_F_RESYNC_REMOTEPARTIAL;
+ peer->flags |= PEER_F_LEARN_NOTUP2DATE;
+ peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT));
+ task_wakeup(peers->sync_task, TASK_WOKEN_MSG);
+ }
+ peer->confirm++;
+ }
+ else if (msg_head[1] == PEER_MSG_CTRL_RESYNCCONFIRM) {
+ struct shared_table *st;
+
+ TRACE_PROTO("received control message", PEERS_EV_CTRLMSG,
+ NULL, &msg_head[1], peers->local->id, peer->id);
+ /* If stopping state */
+ if (stopping) {
+ /* Close session, push resync no more needed */
+ peer->flags |= PEER_F_TEACH_COMPLETE;
+ appctx->st0 = PEER_SESS_ST_END;
+ return 0;
+ }
+ for (st = peer->tables; st; st = st->next) {
+ st->update = st->last_pushed = st->teaching_origin;
+ st->flags = 0;
+ }
+
+ /* reset teaching flags to 0 */
+ peer->flags &= PEER_TEACH_RESET;
+ }
+ else if (msg_head[1] == PEER_MSG_CTRL_HEARTBEAT) {
+ TRACE_PROTO("received control message", PEERS_EV_CTRLMSG,
+ NULL, &msg_head[1], peers->local->id, peer->id);
+ peer->reconnect = tick_add(now_ms, MS_TO_TICKS(PEER_RECONNECT_TIMEOUT));
+ peer->rx_hbt++;
+ }
+ }
+ else if (msg_head[0] == PEER_MSG_CLASS_STICKTABLE) {
+ if (msg_head[1] == PEER_MSG_STKT_DEFINE) {
+ if (!peer_treat_definemsg(appctx, peer, msg_cur, msg_end, totl))
+ return 0;
+ }
+ else if (msg_head[1] == PEER_MSG_STKT_SWITCH) {
+ if (!peer_treat_switchmsg(appctx, peer, msg_cur, msg_end))
+ return 0;
+ }
+ else if (msg_head[1] == PEER_MSG_STKT_UPDATE ||
+ msg_head[1] == PEER_MSG_STKT_INCUPDATE ||
+ msg_head[1] == PEER_MSG_STKT_UPDATE_TIMED ||
+ msg_head[1] == PEER_MSG_STKT_INCUPDATE_TIMED) {
+ int update, expire;
+
+ update = msg_head[1] == PEER_MSG_STKT_UPDATE || msg_head[1] == PEER_MSG_STKT_UPDATE_TIMED;
+ expire = msg_head[1] == PEER_MSG_STKT_UPDATE_TIMED || msg_head[1] == PEER_MSG_STKT_INCUPDATE_TIMED;
+ if (!peer_treat_updatemsg(appctx, peer, update, expire,
+ msg_cur, msg_end, msg_len, totl))
+ return 0;
+
+ }
+ else if (msg_head[1] == PEER_MSG_STKT_ACK) {
+ if (!peer_treat_ackmsg(appctx, peer, msg_cur, msg_end))
+ return 0;
+ }
+ }
+ else if (msg_head[0] == PEER_MSG_CLASS_RESERVED) {
+ appctx->st0 = PEER_SESS_ST_ERRPROTO;
+ return 0;
+ }
+
+ return 1;
+}
+
+
+/*
+ * Send any message to <peer> peer.
+ * Returns 1 if succeeded, or -1 or 0 if failed.
+ * -1 means an internal error occurred, 0 is for a peer protocol error leading
+ * to a peer state change (from the peer I/O handler point of view).
+ */
+static inline int peer_send_msgs(struct appctx *appctx,
+ struct peer *peer, struct peers *peers)
+{
+ int repl;
+
+ /* Need to request a resync */
+ if ((peer->flags & PEER_F_LEARN_ASSIGN) &&
+ (peers->flags & PEERS_F_RESYNC_ASSIGN) &&
+ !(peers->flags & PEERS_F_RESYNC_PROCESS)) {
+
+ repl = peer_send_resync_reqmsg(appctx, peer, peers);
+ if (repl <= 0)
+ return repl;
+
+ peers->flags |= PEERS_F_RESYNC_PROCESS;
+ }
+
+ /* Nothing to read, now we start to write */
+ if (peer->tables) {
+ struct shared_table *st;
+ struct shared_table *last_local_table;
+ int updates_sent = 0;
+
+ last_local_table = peer->last_local_table;
+ if (!last_local_table)
+ last_local_table = peer->tables;
+ st = last_local_table->next;
+
+ while (1) {
+ if (!st)
+ st = peer->tables;
+
+ /* It remains some updates to ack */
+ if (st->last_get != st->last_acked) {
+ repl = peer_send_ackmsg(st, appctx);
+ if (repl <= 0)
+ return repl;
+
+ st->last_acked = st->last_get;
+ }
+
+ if (!(peer->flags & PEER_F_TEACH_PROCESS)) {
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &st->table->lock);
+ if (!(peer->flags & PEER_F_LEARN_ASSIGN) &&
+ (st->last_pushed != st->table->localupdate)) {
+
+ repl = peer_send_teach_process_msgs(appctx, peer, st);
+ if (repl <= 0) {
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &st->table->lock);
+ return repl;
+ }
+ }
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &st->table->lock);
+ }
+ else if (!(peer->flags & PEER_F_TEACH_FINISHED)) {
+ if (!(st->flags & SHTABLE_F_TEACH_STAGE1)) {
+ repl = peer_send_teach_stage1_msgs(appctx, peer, st);
+ if (repl <= 0)
+ return repl;
+ }
+
+ if (!(st->flags & SHTABLE_F_TEACH_STAGE2)) {
+ repl = peer_send_teach_stage2_msgs(appctx, peer, st);
+ if (repl <= 0)
+ return repl;
+ }
+ }
+
+ if (st == last_local_table)
+ break;
+ st = st->next;
+
+ updates_sent++;
+ if (updates_sent >= peers_max_updates_at_once) {
+ /* pretend we're full so that we get back ASAP */
+ struct stconn *sc = appctx_sc(appctx);
+
+ sc_need_room(sc);
+ return -1;
+ }
+ }
+ }
+
+ if ((peer->flags & PEER_F_TEACH_PROCESS) && !(peer->flags & PEER_F_TEACH_FINISHED)) {
+ repl = peer_send_resync_finishedmsg(appctx, peer, peers);
+ if (repl <= 0)
+ return repl;
+
+ /* flag finished message sent */
+ peer->flags |= PEER_F_TEACH_FINISHED;
+ }
+
+ /* Confirm finished or partial messages */
+ while (peer->confirm) {
+ repl = peer_send_resync_confirmsg(appctx, peer, peers);
+ if (repl <= 0)
+ return repl;
+
+ peer->confirm--;
+ }
+
+ return 1;
+}
+
+/*
+ * Read and parse a first line of a "hello" peer protocol message.
+ * Returns 0 if could not read a line, -1 if there was a read error or
+ * the line is malformed, 1 if succeeded.
+ */
+static inline int peer_getline_version(struct appctx *appctx,
+ unsigned int *maj_ver, unsigned int *min_ver)
+{
+ int reql;
+
+ reql = peer_getline(appctx);
+ if (!reql)
+ return 0;
+
+ if (reql < 0)
+ return -1;
+
+ /* test protocol */
+ if (strncmp(PEER_SESSION_PROTO_NAME " ", trash.area, proto_len + 1) != 0) {
+ appctx->st0 = PEER_SESS_ST_EXIT;
+ appctx->st1 = PEER_SESS_SC_ERRPROTO;
+ return -1;
+ }
+ if (peer_get_version(trash.area + proto_len + 1, maj_ver, min_ver) == -1 ||
+ *maj_ver != PEER_MAJOR_VER || *min_ver > PEER_MINOR_VER) {
+ appctx->st0 = PEER_SESS_ST_EXIT;
+ appctx->st1 = PEER_SESS_SC_ERRVERSION;
+ return -1;
+ }
+
+ return 1;
+}
+
+/*
+ * Read and parse a second line of a "hello" peer protocol message.
+ * Returns 0 if could not read a line, -1 if there was a read error or
+ * the line is malformed, 1 if succeeded.
+ */
+static inline int peer_getline_host(struct appctx *appctx)
+{
+ int reql;
+
+ reql = peer_getline(appctx);
+ if (!reql)
+ return 0;
+
+ if (reql < 0)
+ return -1;
+
+ /* test hostname match */
+ if (strcmp(localpeer, trash.area) != 0) {
+ appctx->st0 = PEER_SESS_ST_EXIT;
+ appctx->st1 = PEER_SESS_SC_ERRHOST;
+ return -1;
+ }
+
+ return 1;
+}
+
+/*
+ * Read and parse a last line of a "hello" peer protocol message.
+ * Returns 0 if could not read a character, -1 if there was a read error or
+ * the line is malformed, 1 if succeeded.
+ * Set <curpeer> accordingly (the remote peer sending the "hello" message).
+ */
+static inline int peer_getline_last(struct appctx *appctx, struct peer **curpeer)
+{
+ char *p;
+ int reql;
+ struct peer *peer;
+ struct stream *s = appctx_strm(appctx);
+ struct peers *peers = strm_fe(s)->parent;
+
+ reql = peer_getline(appctx);
+ if (!reql)
+ return 0;
+
+ if (reql < 0)
+ return -1;
+
+ /* parse line "<peer name> <pid> <relative_pid>" */
+ p = strchr(trash.area, ' ');
+ if (!p) {
+ appctx->st0 = PEER_SESS_ST_EXIT;
+ appctx->st1 = PEER_SESS_SC_ERRPROTO;
+ return -1;
+ }
+ *p = 0;
+
+ /* lookup known peer */
+ for (peer = peers->remote; peer; peer = peer->next) {
+ if (strcmp(peer->id, trash.area) == 0)
+ break;
+ }
+
+ /* if unknown peer */
+ if (!peer) {
+ appctx->st0 = PEER_SESS_ST_EXIT;
+ appctx->st1 = PEER_SESS_SC_ERRPEER;
+ return -1;
+ }
+ *curpeer = peer;
+
+ return 1;
+}
+
+/*
+ * Init <peer> peer after having accepted it at peer protocol level.
+ */
+static inline void init_accepted_peer(struct peer *peer, struct peers *peers)
+{
+ struct shared_table *st;
+
+ peer->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT));
+ /* Register status code */
+ peer->statuscode = PEER_SESS_SC_SUCCESSCODE;
+ peer->last_hdshk = now_ms;
+
+ /* Awake main task */
+ task_wakeup(peers->sync_task, TASK_WOKEN_MSG);
+
+ /* Init confirm counter */
+ peer->confirm = 0;
+
+ /* Init cursors */
+ for (st = peer->tables; st ; st = st->next) {
+ st->last_get = st->last_acked = 0;
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &st->table->lock);
+ /* if st->update appears to be in future it means
+ * that the last acked value is very old and we
+ * remain unconnected a too long time to use this
+ * acknowlegement as a reset.
+ * We should update the protocol to be able to
+ * signal the remote peer that it needs a full resync.
+ * Here a partial fix consist to set st->update at
+ * the max past value
+ */
+ if ((int)(st->table->localupdate - st->update) < 0)
+ st->update = st->table->localupdate + (2147483648U);
+ st->teaching_origin = st->last_pushed = st->update;
+ st->flags = 0;
+ if ((int)(st->last_pushed - st->table->commitupdate) > 0)
+ st->table->commitupdate = st->last_pushed;
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &st->table->lock);
+ }
+
+ /* reset teaching and learning flags to 0 */
+ peer->flags &= PEER_TEACH_RESET;
+ peer->flags &= PEER_LEARN_RESET;
+
+ /* if current peer is local */
+ if (peer->local) {
+ /* if current host need resyncfrom local and no process assigned */
+ if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMLOCAL &&
+ !(peers->flags & PEERS_F_RESYNC_ASSIGN)) {
+ /* assign local peer for a lesson, consider lesson already requested */
+ peer->flags |= PEER_F_LEARN_ASSIGN;
+ peers->flags |= (PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS);
+ peers->flags |= PEERS_F_RESYNC_LOCALASSIGN;
+ }
+
+ }
+ else if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE &&
+ !(peers->flags & PEERS_F_RESYNC_ASSIGN)) {
+ /* assign peer for a lesson */
+ peer->flags |= PEER_F_LEARN_ASSIGN;
+ peers->flags |= PEERS_F_RESYNC_ASSIGN;
+ peers->flags |= PEERS_F_RESYNC_REMOTEASSIGN;
+ }
+}
+
+/*
+ * Init <peer> peer after having connected it at peer protocol level.
+ */
+static inline void init_connected_peer(struct peer *peer, struct peers *peers)
+{
+ struct shared_table *st;
+
+ peer->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT));
+ /* Init cursors */
+ for (st = peer->tables; st ; st = st->next) {
+ st->last_get = st->last_acked = 0;
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &st->table->lock);
+ /* if st->update appears to be in future it means
+ * that the last acked value is very old and we
+ * remain unconnected a too long time to use this
+ * acknowlegement as a reset.
+ * We should update the protocol to be able to
+ * signal the remote peer that it needs a full resync.
+ * Here a partial fix consist to set st->update at
+ * the max past value.
+ */
+ if ((int)(st->table->localupdate - st->update) < 0)
+ st->update = st->table->localupdate + (2147483648U);
+ st->teaching_origin = st->last_pushed = st->update;
+ st->flags = 0;
+ if ((int)(st->last_pushed - st->table->commitupdate) > 0)
+ st->table->commitupdate = st->last_pushed;
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &st->table->lock);
+ }
+
+ /* Init confirm counter */
+ peer->confirm = 0;
+
+ /* reset teaching and learning flags to 0 */
+ peer->flags &= PEER_TEACH_RESET;
+ peer->flags &= PEER_LEARN_RESET;
+
+ /* If current peer is local */
+ if (peer->local) {
+ /* flag to start to teach lesson */
+ peer->flags |= PEER_F_TEACH_PROCESS;
+ }
+ else if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE &&
+ !(peers->flags & PEERS_F_RESYNC_ASSIGN)) {
+ /* If peer is remote and resync from remote is needed,
+ and no peer currently assigned */
+
+ /* assign peer for a lesson */
+ peer->flags |= PEER_F_LEARN_ASSIGN;
+ peers->flags |= PEERS_F_RESYNC_ASSIGN;
+ peers->flags |= PEERS_F_RESYNC_REMOTEASSIGN;
+ }
+}
+
+/*
+ * IO Handler to handle message exchange with a peer
+ */
+static void peer_io_handler(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct stream *s = __sc_strm(sc);
+ struct peers *curpeers = strm_fe(s)->parent;
+ struct peer *curpeer = NULL;
+ int reql = 0;
+ int repl = 0;
+ unsigned int maj_ver, min_ver;
+ int prev_state;
+
+ /* Check if the input buffer is available. */
+ if (sc_ib(sc)->size == 0) {
+ sc_need_room(sc);
+ goto out;
+ }
+
+ while (1) {
+ prev_state = appctx->st0;
+switchstate:
+ maj_ver = min_ver = (unsigned int)-1;
+ switch(appctx->st0) {
+ case PEER_SESS_ST_ACCEPT:
+ prev_state = appctx->st0;
+ appctx->svcctx = NULL;
+ appctx->st0 = PEER_SESS_ST_GETVERSION;
+ /* fall through */
+ case PEER_SESS_ST_GETVERSION:
+ prev_state = appctx->st0;
+ reql = peer_getline_version(appctx, &maj_ver, &min_ver);
+ if (reql <= 0) {
+ if (!reql)
+ goto out;
+ goto switchstate;
+ }
+
+ appctx->st0 = PEER_SESS_ST_GETHOST;
+ /* fall through */
+ case PEER_SESS_ST_GETHOST:
+ prev_state = appctx->st0;
+ reql = peer_getline_host(appctx);
+ if (reql <= 0) {
+ if (!reql)
+ goto out;
+ goto switchstate;
+ }
+
+ appctx->st0 = PEER_SESS_ST_GETPEER;
+ /* fall through */
+ case PEER_SESS_ST_GETPEER: {
+ prev_state = appctx->st0;
+ reql = peer_getline_last(appctx, &curpeer);
+ if (reql <= 0) {
+ if (!reql)
+ goto out;
+ goto switchstate;
+ }
+
+ HA_SPIN_LOCK(PEER_LOCK, &curpeer->lock);
+ if (curpeer->appctx && curpeer->appctx != appctx) {
+ if (curpeer->local) {
+ /* Local connection, reply a retry */
+ appctx->st0 = PEER_SESS_ST_EXIT;
+ appctx->st1 = PEER_SESS_SC_TRYAGAIN;
+ goto switchstate;
+ }
+
+ /* we're killing a connection, we must apply a random delay before
+ * retrying otherwise the other end will do the same and we can loop
+ * for a while.
+ */
+ curpeer->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000));
+ peer_session_forceshutdown(curpeer);
+ curpeer->heartbeat = TICK_ETERNITY;
+ curpeer->coll++;
+ }
+ if (maj_ver != (unsigned int)-1 && min_ver != (unsigned int)-1) {
+ if (min_ver == PEER_DWNGRD_MINOR_VER) {
+ curpeer->flags |= PEER_F_DWNGRD;
+ }
+ else {
+ curpeer->flags &= ~PEER_F_DWNGRD;
+ }
+ }
+ curpeer->appctx = appctx;
+ curpeer->flags |= PEER_F_ALIVE;
+ appctx->svcctx = curpeer;
+ appctx->st0 = PEER_SESS_ST_SENDSUCCESS;
+ _HA_ATOMIC_INC(&active_peers);
+ }
+ /* fall through */
+ case PEER_SESS_ST_SENDSUCCESS: {
+ prev_state = appctx->st0;
+ if (!curpeer) {
+ curpeer = appctx->svcctx;
+ HA_SPIN_LOCK(PEER_LOCK, &curpeer->lock);
+ if (curpeer->appctx != appctx) {
+ appctx->st0 = PEER_SESS_ST_END;
+ goto switchstate;
+ }
+ }
+
+ repl = peer_send_status_successmsg(appctx);
+ if (repl <= 0) {
+ if (repl == -1)
+ goto out;
+ goto switchstate;
+ }
+
+ init_accepted_peer(curpeer, curpeers);
+
+ /* switch to waiting message state */
+ _HA_ATOMIC_INC(&connected_peers);
+ appctx->st0 = PEER_SESS_ST_WAITMSG;
+ goto switchstate;
+ }
+ case PEER_SESS_ST_CONNECT: {
+ prev_state = appctx->st0;
+ if (!curpeer) {
+ curpeer = appctx->svcctx;
+ HA_SPIN_LOCK(PEER_LOCK, &curpeer->lock);
+ if (curpeer->appctx != appctx) {
+ appctx->st0 = PEER_SESS_ST_END;
+ goto switchstate;
+ }
+ }
+
+ repl = peer_send_hellomsg(appctx, curpeer);
+ if (repl <= 0) {
+ if (repl == -1)
+ goto out;
+ goto switchstate;
+ }
+
+ /* switch to the waiting statuscode state */
+ appctx->st0 = PEER_SESS_ST_GETSTATUS;
+ }
+ /* fall through */
+ case PEER_SESS_ST_GETSTATUS: {
+ prev_state = appctx->st0;
+ if (!curpeer) {
+ curpeer = appctx->svcctx;
+ HA_SPIN_LOCK(PEER_LOCK, &curpeer->lock);
+ if (curpeer->appctx != appctx) {
+ appctx->st0 = PEER_SESS_ST_END;
+ goto switchstate;
+ }
+ }
+
+ if (sc_ic(sc)->flags & CF_WROTE_DATA)
+ curpeer->statuscode = PEER_SESS_SC_CONNECTEDCODE;
+
+ reql = peer_getline(appctx);
+ if (!reql)
+ goto out;
+
+ if (reql < 0)
+ goto switchstate;
+
+ /* Register status code */
+ curpeer->statuscode = atoi(trash.area);
+ curpeer->last_hdshk = now_ms;
+
+ /* Awake main task */
+ task_wakeup(curpeers->sync_task, TASK_WOKEN_MSG);
+
+ /* If status code is success */
+ if (curpeer->statuscode == PEER_SESS_SC_SUCCESSCODE) {
+ init_connected_peer(curpeer, curpeers);
+ }
+ else {
+ if (curpeer->statuscode == PEER_SESS_SC_ERRVERSION)
+ curpeer->flags |= PEER_F_DWNGRD;
+ /* Status code is not success, abort */
+ appctx->st0 = PEER_SESS_ST_END;
+ goto switchstate;
+ }
+ _HA_ATOMIC_INC(&connected_peers);
+ appctx->st0 = PEER_SESS_ST_WAITMSG;
+ }
+ /* fall through */
+ case PEER_SESS_ST_WAITMSG: {
+ uint32_t msg_len = 0;
+ char *msg_cur = trash.area;
+ char *msg_end = trash.area;
+ unsigned char msg_head[7]; // 2 + 5 for varint32
+ int totl = 0;
+
+ prev_state = appctx->st0;
+ if (!curpeer) {
+ curpeer = appctx->svcctx;
+ HA_SPIN_LOCK(PEER_LOCK, &curpeer->lock);
+ if (curpeer->appctx != appctx) {
+ appctx->st0 = PEER_SESS_ST_END;
+ goto switchstate;
+ }
+ }
+
+ reql = peer_recv_msg(appctx, (char *)msg_head, sizeof msg_head, &msg_len, &totl);
+ if (reql <= 0) {
+ if (reql == -1)
+ goto switchstate;
+ goto send_msgs;
+ }
+
+ msg_end += msg_len;
+ if (!peer_treat_awaited_msg(appctx, curpeer, msg_head, &msg_cur, msg_end, msg_len, totl))
+ goto switchstate;
+
+ curpeer->flags |= PEER_F_ALIVE;
+
+ /* skip consumed message */
+ co_skip(sc_oc(sc), totl);
+ /* loop on that state to peek next message */
+ goto switchstate;
+
+send_msgs:
+ if (curpeer->flags & PEER_F_HEARTBEAT) {
+ curpeer->flags &= ~PEER_F_HEARTBEAT;
+ repl = peer_send_heartbeatmsg(appctx, curpeer, curpeers);
+ if (repl <= 0) {
+ if (repl == -1)
+ goto out;
+ goto switchstate;
+ }
+ curpeer->tx_hbt++;
+ }
+ /* we get here when a peer_recv_msg() returns 0 in reql */
+ repl = peer_send_msgs(appctx, curpeer, curpeers);
+ if (repl <= 0) {
+ if (repl == -1)
+ goto out;
+ goto switchstate;
+ }
+
+ /* noting more to do */
+ goto out;
+ }
+ case PEER_SESS_ST_EXIT:
+ if (prev_state == PEER_SESS_ST_WAITMSG)
+ _HA_ATOMIC_DEC(&connected_peers);
+ prev_state = appctx->st0;
+ if (peer_send_status_errormsg(appctx) == -1)
+ goto out;
+ appctx->st0 = PEER_SESS_ST_END;
+ goto switchstate;
+ case PEER_SESS_ST_ERRSIZE: {
+ if (prev_state == PEER_SESS_ST_WAITMSG)
+ _HA_ATOMIC_DEC(&connected_peers);
+ prev_state = appctx->st0;
+ if (peer_send_error_size_limitmsg(appctx) == -1)
+ goto out;
+ appctx->st0 = PEER_SESS_ST_END;
+ goto switchstate;
+ }
+ case PEER_SESS_ST_ERRPROTO: {
+ TRACE_PROTO("protocol error", PEERS_EV_PROTOERR,
+ NULL, curpeer, &prev_state);
+ if (curpeer)
+ curpeer->proto_err++;
+ if (prev_state == PEER_SESS_ST_WAITMSG)
+ _HA_ATOMIC_DEC(&connected_peers);
+ prev_state = appctx->st0;
+ if (peer_send_error_protomsg(appctx) == -1) {
+ TRACE_PROTO("could not send error message", PEERS_EV_PROTOERR);
+ goto out;
+ }
+ appctx->st0 = PEER_SESS_ST_END;
+ prev_state = appctx->st0;
+ }
+ /* fall through */
+ case PEER_SESS_ST_END: {
+ if (prev_state == PEER_SESS_ST_WAITMSG)
+ _HA_ATOMIC_DEC(&connected_peers);
+ prev_state = appctx->st0;
+ if (curpeer) {
+ HA_SPIN_UNLOCK(PEER_LOCK, &curpeer->lock);
+ curpeer = NULL;
+ }
+ sc_shutw(sc);
+ sc_shutr(sc);
+ sc_ic(sc)->flags |= CF_READ_NULL;
+ goto out;
+ }
+ }
+ }
+out:
+ sc_oc(sc)->flags |= CF_READ_DONTWAIT;
+
+ if (curpeer)
+ HA_SPIN_UNLOCK(PEER_LOCK, &curpeer->lock);
+ return;
+}
+
+static struct applet peer_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<PEER>", /* used for logging */
+ .fct = peer_io_handler,
+ .init = peer_session_init,
+ .release = peer_session_release,
+};
+
+
+/*
+ * Use this function to force a close of a peer session
+ */
+static void peer_session_forceshutdown(struct peer *peer)
+{
+ struct appctx *appctx = peer->appctx;
+
+ /* Note that the peer sessions which have just been created
+ * (->st0 == PEER_SESS_ST_CONNECT) must not
+ * be shutdown, if not, the TCP session will never be closed
+ * and stay in CLOSE_WAIT state after having been closed by
+ * the remote side.
+ */
+ if (!appctx || appctx->st0 == PEER_SESS_ST_CONNECT)
+ return;
+
+ if (appctx->applet != &peer_applet)
+ return;
+
+ __peer_session_deinit(peer);
+
+ appctx->st0 = PEER_SESS_ST_END;
+ appctx_wakeup(appctx);
+}
+
+/* Pre-configures a peers frontend to accept incoming connections */
+void peers_setup_frontend(struct proxy *fe)
+{
+ fe->last_change = now.tv_sec;
+ fe->cap = PR_CAP_FE | PR_CAP_BE;
+ fe->mode = PR_MODE_PEERS;
+ fe->maxconn = 0;
+ fe->conn_retries = CONN_RETRIES;
+ fe->timeout.connect = MS_TO_TICKS(1000);
+ fe->timeout.client = MS_TO_TICKS(5000);
+ fe->timeout.server = MS_TO_TICKS(5000);
+ fe->accept = frontend_accept;
+ fe->default_target = &peer_applet.obj_type;
+ fe->options2 |= PR_O2_INDEPSTR | PR_O2_SMARTCON | PR_O2_SMARTACC;
+}
+
+/*
+ * Create a new peer session in assigned state (connect will start automatically)
+ */
+static struct appctx *peer_session_create(struct peers *peers, struct peer *peer)
+{
+ struct appctx *appctx;
+ unsigned int thr = 0;
+ int idx;
+
+ peer->new_conn++;
+ peer->reconnect = tick_add(now_ms, (stopping ? MS_TO_TICKS(PEER_LOCAL_RECONNECT_TIMEOUT) : MS_TO_TICKS(PEER_RECONNECT_TIMEOUT)));
+ peer->heartbeat = TICK_ETERNITY;
+ peer->statuscode = PEER_SESS_SC_CONNECTCODE;
+ peer->last_hdshk = now_ms;
+
+ for (idx = 0; idx < global.nbthread; idx++)
+ thr = peers->applet_count[idx] < peers->applet_count[thr] ? idx : thr;
+ appctx = appctx_new_on(&peer_applet, NULL, thr);
+ if (!appctx)
+ goto out_close;
+ appctx->svcctx = (void *)peer;
+
+ appctx->st0 = PEER_SESS_ST_CONNECT;
+ peer->appctx = appctx;
+
+ HA_ATOMIC_INC(&peers->applet_count[thr]);
+ appctx_wakeup(appctx);
+ return appctx;
+
+ out_close:
+ return NULL;
+}
+
+/*
+ * Task processing function to manage re-connect, peer session
+ * tasks wakeup on local update and heartbeat. Let's keep it exported so that it
+ * resolves in stack traces and "show tasks".
+ */
+struct task *process_peer_sync(struct task * task, void *context, unsigned int state)
+{
+ struct peers *peers = context;
+ struct peer *ps;
+ struct shared_table *st;
+
+ task->expire = TICK_ETERNITY;
+
+ if (!peers->peers_fe) {
+ /* this one was never started, kill it */
+ signal_unregister_handler(peers->sighandler);
+ task_destroy(peers->sync_task);
+ peers->sync_task = NULL;
+ return NULL;
+ }
+
+ /* Acquire lock for all peers of the section */
+ for (ps = peers->remote; ps; ps = ps->next)
+ HA_SPIN_LOCK(PEER_LOCK, &ps->lock);
+
+ if (!stopping) {
+ /* Normal case (not soft stop)*/
+
+ /* resync timeout set to TICK_ETERNITY means we just start
+ * a new process and timer was not initialized.
+ * We must arm this timer to switch to a request to a remote
+ * node if incoming connection from old local process never
+ * comes.
+ */
+ if (peers->resync_timeout == TICK_ETERNITY)
+ peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT));
+
+ if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMLOCAL) &&
+ (!nb_oldpids || tick_is_expired(peers->resync_timeout, now_ms)) &&
+ !(peers->flags & PEERS_F_RESYNC_ASSIGN)) {
+ /* Resync from local peer needed
+ no peer was assigned for the lesson
+ and no old local peer found
+ or resync timeout expire */
+
+ /* flag no more resync from local, to try resync from remotes */
+ peers->flags |= PEERS_F_RESYNC_LOCAL;
+ peers->flags |= PEERS_F_RESYNC_LOCALTIMEOUT;
+
+ /* reschedule a resync */
+ peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT));
+ }
+
+ /* For each session */
+ for (ps = peers->remote; ps; ps = ps->next) {
+ /* For each remote peers */
+ if (!ps->local) {
+ if (!ps->appctx) {
+ /* no active peer connection */
+ if (ps->statuscode == 0 ||
+ ((ps->statuscode == PEER_SESS_SC_CONNECTCODE ||
+ ps->statuscode == PEER_SESS_SC_SUCCESSCODE ||
+ ps->statuscode == PEER_SESS_SC_CONNECTEDCODE) &&
+ tick_is_expired(ps->reconnect, now_ms))) {
+ /* connection never tried
+ * or previous peer connection established with success
+ * or previous peer connection failed while connecting
+ * and reconnection timer is expired */
+
+ /* retry a connect */
+ ps->appctx = peer_session_create(peers, ps);
+ }
+ else if (!tick_is_expired(ps->reconnect, now_ms)) {
+ /* If previous session failed during connection
+ * but reconnection timer is not expired */
+
+ /* reschedule task for reconnect */
+ task->expire = tick_first(task->expire, ps->reconnect);
+ }
+ /* else do nothing */
+ } /* !ps->appctx */
+ else if (ps->statuscode == PEER_SESS_SC_SUCCESSCODE) {
+ /* current peer connection is active and established */
+ if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE) &&
+ !(peers->flags & PEERS_F_RESYNC_ASSIGN) &&
+ !(ps->flags & PEER_F_LEARN_NOTUP2DATE)) {
+ /* Resync from a remote is needed
+ * and no peer was assigned for lesson
+ * and current peer may be up2date */
+
+ /* assign peer for the lesson */
+ ps->flags |= PEER_F_LEARN_ASSIGN;
+ peers->flags |= PEERS_F_RESYNC_ASSIGN;
+ peers->flags |= PEERS_F_RESYNC_REMOTEASSIGN;
+
+ /* wake up peer handler to handle a request of resync */
+ appctx_wakeup(ps->appctx);
+ }
+ else {
+ int update_to_push = 0;
+
+ /* Awake session if there is data to push */
+ for (st = ps->tables; st ; st = st->next) {
+ if (st->last_pushed != st->table->localupdate) {
+ /* wake up the peer handler to push local updates */
+ update_to_push = 1;
+ /* There is no need to send a heartbeat message
+ * when some updates must be pushed. The remote
+ * peer will consider <ps> peer as alive when it will
+ * receive these updates.
+ */
+ ps->flags &= ~PEER_F_HEARTBEAT;
+ /* Re-schedule another one later. */
+ ps->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT));
+ /* We are going to send updates, let's ensure we will
+ * come back to send heartbeat messages or to reconnect.
+ */
+ task->expire = tick_first(ps->reconnect, ps->heartbeat);
+ appctx_wakeup(ps->appctx);
+ break;
+ }
+ }
+ /* When there are updates to send we do not reconnect
+ * and do not send heartbeat message either.
+ */
+ if (!update_to_push) {
+ if (tick_is_expired(ps->reconnect, now_ms)) {
+ if (ps->flags & PEER_F_ALIVE) {
+ /* This peer was alive during a 'reconnect' period.
+ * Flag it as not alive again for the next period.
+ */
+ ps->flags &= ~PEER_F_ALIVE;
+ ps->reconnect = tick_add(now_ms, MS_TO_TICKS(PEER_RECONNECT_TIMEOUT));
+ }
+ else {
+ ps->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000));
+ ps->heartbeat = TICK_ETERNITY;
+ peer_session_forceshutdown(ps);
+ ps->no_hbt++;
+ }
+ }
+ else if (tick_is_expired(ps->heartbeat, now_ms)) {
+ ps->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT));
+ ps->flags |= PEER_F_HEARTBEAT;
+ appctx_wakeup(ps->appctx);
+ }
+ task->expire = tick_first(ps->reconnect, ps->heartbeat);
+ }
+ }
+ /* else do nothing */
+ } /* SUCCESSCODE */
+ } /* !ps->peer->local */
+ } /* for */
+
+ /* Resync from remotes expired: consider resync is finished */
+ if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE) &&
+ !(peers->flags & PEERS_F_RESYNC_ASSIGN) &&
+ tick_is_expired(peers->resync_timeout, now_ms)) {
+ /* Resync from remote peer needed
+ * no peer was assigned for the lesson
+ * and resync timeout expire */
+
+ /* flag no more resync from remote, consider resync is finished */
+ peers->flags |= PEERS_F_RESYNC_REMOTE;
+ peers->flags |= PEERS_F_RESYNC_REMOTETIMEOUT;
+ }
+
+ if ((peers->flags & PEERS_RESYNC_STATEMASK) != PEERS_RESYNC_FINISHED) {
+ /* Resync not finished*/
+ /* reschedule task to resync timeout if not expired, to ended resync if needed */
+ if (!tick_is_expired(peers->resync_timeout, now_ms))
+ task->expire = tick_first(task->expire, peers->resync_timeout);
+ }
+ } /* !stopping */
+ else {
+ /* soft stop case */
+ if (state & TASK_WOKEN_SIGNAL) {
+ /* We've just received the signal */
+ if (!(peers->flags & PEERS_F_DONOTSTOP)) {
+ /* add DO NOT STOP flag if not present */
+ _HA_ATOMIC_INC(&jobs);
+ peers->flags |= PEERS_F_DONOTSTOP;
+
+ /* disconnect all connected peers to process a local sync
+ * this must be done only the first time we are switching
+ * in stopping state
+ */
+ for (ps = peers->remote; ps; ps = ps->next) {
+ /* we're killing a connection, we must apply a random delay before
+ * retrying otherwise the other end will do the same and we can loop
+ * for a while.
+ */
+ ps->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000));
+ if (ps->appctx) {
+ peer_session_forceshutdown(ps);
+ }
+ }
+
+ /* Set resync timeout for the local peer and request a immediate reconnect */
+ peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT));
+ peers->local->reconnect = now_ms;
+ }
+ }
+
+ ps = peers->local;
+ if (ps->flags & PEER_F_TEACH_COMPLETE) {
+ if (peers->flags & PEERS_F_DONOTSTOP) {
+ /* resync of new process was complete, current process can die now */
+ _HA_ATOMIC_DEC(&jobs);
+ peers->flags &= ~PEERS_F_DONOTSTOP;
+ for (st = ps->tables; st ; st = st->next)
+ HA_ATOMIC_DEC(&st->table->refcnt);
+ }
+ }
+ else if (!ps->appctx) {
+ /* Re-arm resync timeout if necessary */
+ if (!tick_isset(peers->resync_timeout))
+ peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT));
+
+ /* If there's no active peer connection */
+ if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FINISHED &&
+ !tick_is_expired(peers->resync_timeout, now_ms) &&
+ (ps->statuscode == 0 ||
+ ps->statuscode == PEER_SESS_SC_SUCCESSCODE ||
+ ps->statuscode == PEER_SESS_SC_CONNECTEDCODE ||
+ ps->statuscode == PEER_SESS_SC_TRYAGAIN)) {
+ /* The resync is finished for the local peer and
+ * the resync timeout is not expired and
+ * connection never tried
+ * or previous peer connection was successfully established
+ * or previous tcp connect succeeded but init state incomplete
+ * or during previous connect, peer replies a try again statuscode */
+
+ if (!tick_is_expired(ps->reconnect, now_ms)) {
+ /* reconnection timer is not expired. reschedule task for reconnect */
+ task->expire = tick_first(task->expire, ps->reconnect);
+ }
+ else {
+ /* connect to the local peer if we must push a local sync */
+ if (peers->flags & PEERS_F_DONOTSTOP) {
+ peer_session_create(peers, ps);
+ }
+ }
+ }
+ else {
+ /* Other error cases */
+ if (peers->flags & PEERS_F_DONOTSTOP) {
+ /* unable to resync new process, current process can die now */
+ _HA_ATOMIC_DEC(&jobs);
+ peers->flags &= ~PEERS_F_DONOTSTOP;
+ for (st = ps->tables; st ; st = st->next)
+ HA_ATOMIC_DEC(&st->table->refcnt);
+ }
+ }
+ }
+ else if (ps->statuscode == PEER_SESS_SC_SUCCESSCODE ) {
+ /* Reset resync timeout during a resync */
+ peers->resync_timeout = TICK_ETERNITY;
+
+ /* current peer connection is active and established
+ * wake up all peer handlers to push remaining local updates */
+ for (st = ps->tables; st ; st = st->next) {
+ if (st->last_pushed != st->table->localupdate) {
+ appctx_wakeup(ps->appctx);
+ break;
+ }
+ }
+ }
+ } /* stopping */
+
+ /* Release lock for all peers of the section */
+ for (ps = peers->remote; ps; ps = ps->next)
+ HA_SPIN_UNLOCK(PEER_LOCK, &ps->lock);
+
+ /* Wakeup for re-connect */
+ return task;
+}
+
+
+/*
+ * returns 0 in case of error.
+ */
+int peers_init_sync(struct peers *peers)
+{
+ struct peer * curpeer;
+
+ for (curpeer = peers->remote; curpeer; curpeer = curpeer->next) {
+ peers->peers_fe->maxconn += 3;
+ }
+
+ peers->sync_task = task_new_anywhere();
+ if (!peers->sync_task)
+ return 0;
+
+ memset(peers->applet_count, 0, sizeof(peers->applet_count));
+ peers->sync_task->process = process_peer_sync;
+ peers->sync_task->context = (void *)peers;
+ peers->sighandler = signal_register_task(0, peers->sync_task, 0);
+ task_wakeup(peers->sync_task, TASK_WOKEN_INIT);
+ return 1;
+}
+
+/*
+ * Allocate a cache a dictionary entries used upon transmission.
+ */
+static struct dcache_tx *new_dcache_tx(size_t max_entries)
+{
+ struct dcache_tx *d;
+ struct ebpt_node *entries;
+
+ d = malloc(sizeof *d);
+ entries = calloc(max_entries, sizeof *entries);
+ if (!d || !entries)
+ goto err;
+
+ d->lru_key = 0;
+ d->prev_lookup = NULL;
+ d->cached_entries = EB_ROOT_UNIQUE;
+ d->entries = entries;
+
+ return d;
+
+ err:
+ free(d);
+ free(entries);
+ return NULL;
+}
+
+/*
+ * Allocate a cache of dictionary entries with <name> as name and <max_entries>
+ * as maximum of entries.
+ * Return the dictionary cache if succeeded, NULL if not.
+ * Must be deallocated calling free_dcache().
+ */
+static struct dcache *new_dcache(size_t max_entries)
+{
+ struct dcache_tx *dc_tx;
+ struct dcache *dc;
+ struct dcache_rx *dc_rx;
+
+ dc = calloc(1, sizeof *dc);
+ dc_tx = new_dcache_tx(max_entries);
+ dc_rx = calloc(max_entries, sizeof *dc_rx);
+ if (!dc || !dc_tx || !dc_rx)
+ goto err;
+
+ dc->tx = dc_tx;
+ dc->rx = dc_rx;
+ dc->max_entries = max_entries;
+
+ return dc;
+
+ err:
+ free(dc);
+ free(dc_tx);
+ free(dc_rx);
+ return NULL;
+}
+
+/*
+ * Look for the dictionary entry with the value of <i> in <d> cache of dictionary
+ * entries used upon transmission.
+ * Return the entry if found, NULL if not.
+ */
+static struct ebpt_node *dcache_tx_lookup_value(struct dcache_tx *d,
+ struct dcache_tx_entry *i)
+{
+ return ebpt_lookup(&d->cached_entries, i->entry.key);
+}
+
+/*
+ * Flush <dc> cache.
+ * Always succeeds.
+ */
+static inline void flush_dcache(struct peer *peer)
+{
+ int i;
+ struct dcache *dc = peer->dcache;
+
+ for (i = 0; i < dc->max_entries; i++) {
+ ebpt_delete(&dc->tx->entries[i]);
+ dc->tx->entries[i].key = NULL;
+ dict_entry_unref(&server_key_dict, dc->rx[i].de);
+ dc->rx[i].de = NULL;
+ }
+ dc->tx->prev_lookup = NULL;
+ dc->tx->lru_key = 0;
+
+ memset(dc->rx, 0, dc->max_entries * sizeof *dc->rx);
+}
+
+/*
+ * Insert a dictionary entry in <dc> cache part used upon transmission (->tx)
+ * with information provided by <i> dictionary cache entry (especially the value
+ * to be inserted if not already). Return <i> if already present in the cache
+ * or something different of <i> if not.
+ */
+static struct ebpt_node *dcache_tx_insert(struct dcache *dc, struct dcache_tx_entry *i)
+{
+ struct dcache_tx *dc_tx;
+ struct ebpt_node *o;
+
+ dc_tx = dc->tx;
+
+ if (dc_tx->prev_lookup && dc_tx->prev_lookup->key == i->entry.key) {
+ o = dc_tx->prev_lookup;
+ } else {
+ o = dcache_tx_lookup_value(dc_tx, i);
+ if (o) {
+ /* Save it */
+ dc_tx->prev_lookup = o;
+ }
+ }
+
+ if (o) {
+ /* Copy the ID. */
+ i->id = o - dc->tx->entries;
+ return &i->entry;
+ }
+
+ /* The new entry to put in cache */
+ dc_tx->prev_lookup = o = &dc_tx->entries[dc_tx->lru_key];
+
+ ebpt_delete(o);
+ o->key = i->entry.key;
+ ebpt_insert(&dc_tx->cached_entries, o);
+ i->id = dc_tx->lru_key;
+
+ /* Update the index for the next entry to put in cache */
+ dc_tx->lru_key = (dc_tx->lru_key + 1) & (dc->max_entries - 1);
+
+ return o;
+}
+
+/*
+ * Allocate a dictionary cache for each peer of <peers> section.
+ * Return 1 if succeeded, 0 if not.
+ */
+int peers_alloc_dcache(struct peers *peers)
+{
+ struct peer *p;
+
+ for (p = peers->remote; p; p = p->next) {
+ p->dcache = new_dcache(PEER_STKT_CACHE_MAX_ENTRIES);
+ if (!p->dcache)
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Function used to register a table for sync on a group of peers
+ * Returns 0 in case of success.
+ */
+int peers_register_table(struct peers *peers, struct stktable *table)
+{
+ struct shared_table *st;
+ struct peer * curpeer;
+ int id = 0;
+ int retval = 0;
+
+ for (curpeer = peers->remote; curpeer; curpeer = curpeer->next) {
+ st = calloc(1,sizeof(*st));
+ if (!st) {
+ retval = 1;
+ break;
+ }
+ st->table = table;
+ st->next = curpeer->tables;
+ if (curpeer->tables)
+ id = curpeer->tables->local_id;
+ st->local_id = id + 1;
+
+ /* If peer is local we inc table
+ * refcnt to protect against flush
+ * until this process pushed all
+ * table content to the new one
+ */
+ if (curpeer->local)
+ HA_ATOMIC_INC(&st->table->refcnt);
+ curpeer->tables = st;
+ }
+
+ table->sync_task = peers->sync_task;
+
+ return retval;
+}
+
+/* context used by a "show peers" command */
+struct show_peers_ctx {
+ void *target; /* if non-null, dump only this section and stop */
+ struct peers *peers; /* "peers" section being currently dumped. */
+ struct peer *peer; /* "peer" being currently dumped. */
+ int flags; /* non-zero if "dict" dump requested */
+ enum {
+ STATE_HEAD = 0, /* dump the section's header */
+ STATE_PEER, /* dump the whole peer */
+ STATE_DONE, /* finished */
+ } state; /* parser's state */
+};
+
+/*
+ * Parse the "show peers" command arguments.
+ * Returns 0 if succeeded, 1 if not with the ->msg of the appctx set as
+ * error message.
+ */
+static int cli_parse_show_peers(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_peers_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[2], "dict") == 0) {
+ /* show the dictionaries (large dump) */
+ ctx->flags |= PEERS_SHOW_F_DICT;
+ args++;
+ } else if (strcmp(args[2], "-") == 0)
+ args++; // allows to show a section called "dict"
+
+ if (*args[2]) {
+ struct peers *p;
+
+ for (p = cfg_peers; p; p = p->next) {
+ if (strcmp(p->id, args[2]) == 0) {
+ ctx->target = p;
+ break;
+ }
+ }
+
+ if (!p)
+ return cli_err(appctx, "No such peers\n");
+ }
+
+ /* where to start from */
+ ctx->peers = ctx->target ? ctx->target : cfg_peers;
+ return 0;
+}
+
+/*
+ * This function dumps the peer state information of <peers> "peers" section.
+ * Returns 0 if the output buffer is full and needs to be called again, non-zero if not.
+ * Dedicated to be called by cli_io_handler_show_peers() cli I/O handler.
+ */
+static int peers_dump_head(struct buffer *msg, struct appctx *appctx, struct peers *peers)
+{
+ struct tm tm;
+
+ get_localtime(peers->last_change, &tm);
+ chunk_appendf(msg, "%p: [%02d/%s/%04d:%02d:%02d:%02d] id=%s disabled=%d flags=0x%x resync_timeout=%s task_calls=%u\n",
+ peers,
+ tm.tm_mday, monthname[tm.tm_mon], tm.tm_year+1900,
+ tm.tm_hour, tm.tm_min, tm.tm_sec,
+ peers->id, peers->disabled, peers->flags,
+ peers->resync_timeout ?
+ tick_is_expired(peers->resync_timeout, now_ms) ? "<PAST>" :
+ human_time(TICKS_TO_MS(peers->resync_timeout - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>",
+ peers->sync_task ? peers->sync_task->calls : 0);
+
+ if (applet_putchk(appctx, msg) == -1)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * This function dumps <peer> state information.
+ * Returns 0 if the output buffer is full and needs to be called again, non-zero
+ * if not. Dedicated to be called by cli_io_handler_show_peers() cli I/O handler.
+ */
+static int peers_dump_peer(struct buffer *msg, struct appctx *appctx, struct peer *peer, int flags)
+{
+ struct connection *conn;
+ char pn[INET6_ADDRSTRLEN];
+ struct stconn *peer_cs;
+ struct stream *peer_s;
+ struct shared_table *st;
+
+ addr_to_str(&peer->addr, pn, sizeof pn);
+ chunk_appendf(msg, " %p: id=%s(%s,%s) addr=%s:%d last_status=%s",
+ peer, peer->id,
+ peer->local ? "local" : "remote",
+ peer->appctx ? "active" : "inactive",
+ pn, get_host_port(&peer->addr),
+ statuscode_str(peer->statuscode));
+
+ chunk_appendf(msg, " last_hdshk=%s\n",
+ peer->last_hdshk ? human_time(TICKS_TO_MS(now_ms - peer->last_hdshk),
+ TICKS_TO_MS(1000)) : "<NEVER>");
+
+ chunk_appendf(msg, " reconnect=%s",
+ peer->reconnect ?
+ tick_is_expired(peer->reconnect, now_ms) ? "<PAST>" :
+ human_time(TICKS_TO_MS(peer->reconnect - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>");
+
+ chunk_appendf(msg, " heartbeat=%s",
+ peer->heartbeat ?
+ tick_is_expired(peer->heartbeat, now_ms) ? "<PAST>" :
+ human_time(TICKS_TO_MS(peer->heartbeat - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>");
+
+ chunk_appendf(msg, " confirm=%u tx_hbt=%u rx_hbt=%u no_hbt=%u new_conn=%u proto_err=%u coll=%u\n",
+ peer->confirm, peer->tx_hbt, peer->rx_hbt,
+ peer->no_hbt, peer->new_conn, peer->proto_err, peer->coll);
+
+ chunk_appendf(&trash, " flags=0x%x", peer->flags);
+
+ if (!peer->appctx)
+ goto table_info;
+
+ chunk_appendf(&trash, " appctx:%p st0=%d st1=%d task_calls=%u",
+ peer->appctx, peer->appctx->st0, peer->appctx->st1,
+ peer->appctx->t ? peer->appctx->t->calls : 0);
+
+ peer_cs = appctx_sc(peer->appctx);
+ if (!peer_cs) {
+ /* the appctx might exist but not yet be initialized due to
+ * deferred initialization used to balance applets across
+ * threads.
+ */
+ goto table_info;
+ }
+
+ peer_s = __sc_strm(peer_cs);
+
+ chunk_appendf(&trash, " state=%s", sc_state_str(sc_opposite(peer_cs)->state));
+
+ conn = objt_conn(strm_orig(peer_s));
+ if (conn)
+ chunk_appendf(&trash, "\n xprt=%s", conn_get_xprt_name(conn));
+
+ switch (conn && conn_get_src(conn) ? addr_to_str(conn->src, pn, sizeof(pn)) : AF_UNSPEC) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_appendf(&trash, " src=%s:%d", pn, get_host_port(conn->src));
+ break;
+ case AF_UNIX:
+ chunk_appendf(&trash, " src=unix:%d", strm_li(peer_s)->luid);
+ break;
+ }
+
+ switch (conn && conn_get_dst(conn) ? addr_to_str(conn->dst, pn, sizeof(pn)) : AF_UNSPEC) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_appendf(&trash, " addr=%s:%d", pn, get_host_port(conn->dst));
+ break;
+ case AF_UNIX:
+ chunk_appendf(&trash, " addr=unix:%d", strm_li(peer_s)->luid);
+ break;
+ }
+
+ table_info:
+ if (peer->remote_table)
+ chunk_appendf(&trash, "\n remote_table:%p id=%s local_id=%d remote_id=%d",
+ peer->remote_table,
+ peer->remote_table->table->id,
+ peer->remote_table->local_id,
+ peer->remote_table->remote_id);
+
+ if (peer->last_local_table)
+ chunk_appendf(&trash, "\n last_local_table:%p id=%s local_id=%d remote_id=%d",
+ peer->last_local_table,
+ peer->last_local_table->table->id,
+ peer->last_local_table->local_id,
+ peer->last_local_table->remote_id);
+
+ if (peer->tables) {
+ chunk_appendf(&trash, "\n shared tables:");
+ for (st = peer->tables; st; st = st->next) {
+ int i, count;
+ struct stktable *t;
+ struct dcache *dcache;
+
+ t = st->table;
+ dcache = peer->dcache;
+
+ chunk_appendf(&trash, "\n %p local_id=%d remote_id=%d "
+ "flags=0x%x remote_data=0x%llx",
+ st, st->local_id, st->remote_id,
+ st->flags, (unsigned long long)st->remote_data);
+ chunk_appendf(&trash, "\n last_acked=%u last_pushed=%u last_get=%u"
+ " teaching_origin=%u update=%u",
+ st->last_acked, st->last_pushed, st->last_get,
+ st->teaching_origin, st->update);
+ chunk_appendf(&trash, "\n table:%p id=%s update=%u localupdate=%u"
+ " commitupdate=%u refcnt=%u",
+ t, t->id, t->update, t->localupdate, t->commitupdate, t->refcnt);
+ if (flags & PEERS_SHOW_F_DICT) {
+ chunk_appendf(&trash, "\n TX dictionary cache:");
+ count = 0;
+ for (i = 0; i < dcache->max_entries; i++) {
+ struct ebpt_node *node;
+ struct dict_entry *de;
+
+ node = &dcache->tx->entries[i];
+ if (!node->key)
+ break;
+
+ if (!count++)
+ chunk_appendf(&trash, "\n ");
+ de = node->key;
+ chunk_appendf(&trash, " %3u -> %s", i, (char *)de->value.key);
+ count &= 0x3;
+ }
+ chunk_appendf(&trash, "\n RX dictionary cache:");
+ count = 0;
+ for (i = 0; i < dcache->max_entries; i++) {
+ if (!count++)
+ chunk_appendf(&trash, "\n ");
+ chunk_appendf(&trash, " %3u -> %s", i,
+ dcache->rx[i].de ?
+ (char *)dcache->rx[i].de->value.key : "-");
+ count &= 0x3;
+ }
+ } else {
+ chunk_appendf(&trash, "\n Dictionary cache not dumped (use \"show peers dict\")");
+ }
+ }
+ }
+
+ end:
+ chunk_appendf(&trash, "\n");
+ if (applet_putchk(appctx, msg) == -1)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * This function dumps all the peers of "peers" section.
+ * Returns 0 if the output buffer is full and needs to be called
+ * again, non-zero if not. It proceeds in an isolated thread, so
+ * there is no thread safety issue here.
+ */
+static int cli_io_handler_show_peers(struct appctx *appctx)
+{
+ struct show_peers_ctx *ctx = appctx->svcctx;
+ int ret = 0, first_peers = 1;
+
+ thread_isolate();
+
+ chunk_reset(&trash);
+
+ while (ctx->state != STATE_DONE) {
+ switch (ctx->state) {
+ case STATE_HEAD:
+ if (!ctx->peers) {
+ /* No more peers list. */
+ ctx->state = STATE_DONE;
+ }
+ else {
+ if (!first_peers)
+ chunk_appendf(&trash, "\n");
+ else
+ first_peers = 0;
+ if (!peers_dump_head(&trash, appctx, ctx->peers))
+ goto out;
+
+ ctx->peer = ctx->peers->remote;
+ ctx->peers = ctx->peers->next;
+ ctx->state = STATE_PEER;
+ }
+ break;
+
+ case STATE_PEER:
+ if (!ctx->peer) {
+ /* End of peer list */
+ if (!ctx->target)
+ ctx->state = STATE_HEAD; // next one
+ else
+ ctx->state = STATE_DONE;
+ }
+ else {
+ if (!peers_dump_peer(&trash, appctx, ctx->peer, ctx->flags))
+ goto out;
+
+ ctx->peer = ctx->peer->next;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+ ret = 1;
+ out:
+ thread_release();
+ return ret;
+}
+
+/* config parser for global "tune.peers.max-updates-at-once" */
+static int cfg_parse_max_updt_at_once(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int arg = -1;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) != 0)
+ arg = atoi(args[1]);
+
+ if (arg < 1) {
+ memprintf(err, "'%s' expects an integer argument greater than 0.", args[0]);
+ return -1;
+ }
+
+ peers_max_updates_at_once = arg;
+ return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.peers.max-updates-at-once", cfg_parse_max_updt_at_once },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/*
+ * CLI keywords.
+ */
+static struct cli_kw_list cli_kws = {{ }, {
+ { { "show", "peers", NULL }, "show peers [dict|-] [section] : dump some information about all the peers or this peers section", cli_parse_show_peers, cli_io_handler_show_peers, },
+ {},
+}};
+
+/* Register cli keywords */
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
diff --git a/src/pipe.c b/src/pipe.c
new file mode 100644
index 0000000..5599fe0
--- /dev/null
+++ b/src/pipe.c
@@ -0,0 +1,136 @@
+/*
+ * Pipe management
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <haproxy/api.h>
+#include <haproxy/global.h>
+#include <haproxy/pipe-t.h>
+#include <haproxy/pool.h>
+#include <haproxy/thread.h>
+
+
+DECLARE_STATIC_POOL(pool_head_pipe, "pipe", sizeof(struct pipe));
+
+struct pipe *pipes_live = NULL; /* pipes which are still ready to use */
+
+__decl_spinlock(pipes_lock); /* lock used to protect pipes list */
+
+static THREAD_LOCAL int local_pipes_free = 0; /* #cache objects */
+static THREAD_LOCAL struct pipe *local_pipes = NULL;
+
+int pipes_used = 0; /* # of pipes in use (2 fds each) */
+int pipes_free = 0; /* # of pipes unused */
+
+/* return a pre-allocated empty pipe. Try to allocate one if there isn't any
+ * left. NULL is returned if a pipe could not be allocated.
+ */
+struct pipe *get_pipe()
+{
+ struct pipe *ret = NULL;
+ int pipefd[2];
+
+ ret = local_pipes;
+ if (likely(ret)) {
+ local_pipes = ret->next;
+ local_pipes_free--;
+ HA_ATOMIC_DEC(&pipes_free);
+ HA_ATOMIC_INC(&pipes_used);
+ goto out;
+ }
+
+ if (likely(pipes_live)) {
+ HA_SPIN_LOCK(PIPES_LOCK, &pipes_lock);
+ ret = pipes_live;
+ if (likely(ret))
+ pipes_live = ret->next;
+ HA_SPIN_UNLOCK(PIPES_LOCK, &pipes_lock);
+ if (ret) {
+ HA_ATOMIC_DEC(&pipes_free);
+ HA_ATOMIC_INC(&pipes_used);
+ goto out;
+ }
+ }
+
+ HA_ATOMIC_INC(&pipes_used);
+ if (pipes_used + pipes_free >= global.maxpipes)
+ goto fail;
+
+ ret = pool_alloc(pool_head_pipe);
+ if (!ret)
+ goto fail;
+
+ if (pipe(pipefd) < 0)
+ goto fail;
+
+#ifdef F_SETPIPE_SZ
+ if (global.tune.pipesize)
+ fcntl(pipefd[0], F_SETPIPE_SZ, global.tune.pipesize);
+#endif
+ ret->data = 0;
+ ret->prod = pipefd[1];
+ ret->cons = pipefd[0];
+ ret->next = NULL;
+ out:
+ return ret;
+ fail:
+ pool_free(pool_head_pipe, ret);
+ HA_ATOMIC_DEC(&pipes_used);
+ return NULL;
+
+}
+
+/* destroy a pipe, possibly because an error was encountered on it. Its FDs
+ * will be closed and it will not be reinjected into the live pool.
+ */
+void kill_pipe(struct pipe *p)
+{
+ close(p->prod);
+ close(p->cons);
+ pool_free(pool_head_pipe, p);
+ HA_ATOMIC_DEC(&pipes_used);
+}
+
+/* put back a unused pipe into the live pool. If it still has data in it, it is
+ * closed and not reinjected into the live pool. The caller is not allowed to
+ * use it once released.
+ */
+void put_pipe(struct pipe *p)
+{
+ if (unlikely(p->data)) {
+ kill_pipe(p);
+ return;
+ }
+
+ if (likely(local_pipes_free * global.nbthread < global.maxpipes - pipes_used)) {
+ p->next = local_pipes;
+ local_pipes = p;
+ local_pipes_free++;
+ goto out;
+ }
+
+ HA_SPIN_LOCK(PIPES_LOCK, &pipes_lock);
+ p->next = pipes_live;
+ pipes_live = p;
+ HA_SPIN_UNLOCK(PIPES_LOCK, &pipes_lock);
+ out:
+ HA_ATOMIC_INC(&pipes_free);
+ HA_ATOMIC_DEC(&pipes_used);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/pool.c b/src/pool.c
new file mode 100644
index 0000000..54ae25b
--- /dev/null
+++ b/src/pool.c
@@ -0,0 +1,1108 @@
+/*
+ * Memory management functions.
+ *
+ * Copyright 2000-2007 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/mman.h>
+#include <errno.h>
+
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/applet-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/pool.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/thread.h>
+#include <haproxy/tools.h>
+
+
+/* These ones are initialized per-thread on startup by init_pools() */
+THREAD_LOCAL size_t pool_cache_bytes = 0; /* total cache size */
+THREAD_LOCAL size_t pool_cache_count = 0; /* #cache objects */
+
+static struct list pools __read_mostly = LIST_HEAD_INIT(pools);
+int mem_poison_byte __read_mostly = 'P';
+uint pool_debugging __read_mostly = /* set of POOL_DBG_* flags */
+#ifdef DEBUG_FAIL_ALLOC
+ POOL_DBG_FAIL_ALLOC |
+#endif
+#ifdef DEBUG_DONT_SHARE_POOLS
+ POOL_DBG_DONT_MERGE |
+#endif
+#ifdef DEBUG_POOL_INTEGRITY
+ POOL_DBG_COLD_FIRST |
+#endif
+#ifdef DEBUG_POOL_INTEGRITY
+ POOL_DBG_INTEGRITY |
+#endif
+#ifdef CONFIG_HAP_NO_GLOBAL_POOLS
+ POOL_DBG_NO_GLOBAL |
+#endif
+#ifndef CONFIG_HAP_POOLS
+ POOL_DBG_NO_CACHE |
+#endif
+#if defined(DEBUG_POOL_TRACING)
+ POOL_DBG_CALLER |
+#endif
+#if defined(DEBUG_MEMORY_POOLS)
+ POOL_DBG_TAG |
+#endif
+ 0;
+
+static const struct {
+ uint flg;
+ const char *set;
+ const char *clr;
+ const char *hlp;
+} dbg_options[] = {
+ /* flg, set, clr, hlp */
+ { POOL_DBG_FAIL_ALLOC, "fail", "no-fail", "randomly fail allocations" },
+ { POOL_DBG_DONT_MERGE, "no-merge", "merge", "disable merging of similar pools" },
+ { POOL_DBG_COLD_FIRST, "cold-first", "hot-first", "pick cold objects first" },
+ { POOL_DBG_INTEGRITY, "integrity", "no-integrity", "enable cache integrity checks" },
+ { POOL_DBG_NO_GLOBAL, "no-global", "global", "disable global shared cache" },
+ { POOL_DBG_NO_CACHE, "no-cache", "cache", "disable thread-local cache" },
+ { POOL_DBG_CALLER, "caller", "no-caller", "save caller information in cache" },
+ { POOL_DBG_TAG, "tag", "no-tag", "add tag at end of allocated objects" },
+ { POOL_DBG_POISON, "poison", "no-poison", "poison newly allocated objects" },
+ { 0 /* end */ }
+};
+
+static int mem_fail_rate __read_mostly = 0;
+static int using_default_allocator __read_mostly = 1;
+static int disable_trim __read_mostly = 0;
+static int(*my_mallctl)(const char *, void *, size_t *, void *, size_t) = NULL;
+
+/* ask the allocator to trim memory pools.
+ * This must run under thread isolation so that competing threads trying to
+ * allocate or release memory do not prevent the allocator from completing
+ * its job. We just have to be careful as callers might already be isolated
+ * themselves.
+ */
+static void trim_all_pools(void)
+{
+ int isolated = thread_isolated();
+
+ if (disable_trim)
+ return;
+
+ if (!isolated)
+ thread_isolate();
+
+ if (my_mallctl) {
+ unsigned int i, narenas = 0;
+ size_t len = sizeof(narenas);
+
+ if (my_mallctl("arenas.narenas", &narenas, &len, NULL, 0) == 0) {
+ for (i = 0; i < narenas; i ++) {
+ char mib[32] = {0};
+ snprintf(mib, sizeof(mib), "arena.%u.purge", i);
+ (void)my_mallctl(mib, NULL, NULL, NULL, 0);
+ }
+ }
+ } else {
+#if defined(HA_HAVE_MALLOC_TRIM)
+ if (using_default_allocator)
+ malloc_trim(0);
+#elif defined(HA_HAVE_MALLOC_ZONE)
+ if (using_default_allocator) {
+ vm_address_t *zones;
+ unsigned int i, nzones;
+
+ if (malloc_get_all_zones(0, NULL, &zones, &nzones) == KERN_SUCCESS) {
+ for (i = 0; i < nzones; i ++) {
+ malloc_zone_t *zone = (malloc_zone_t *)zones[i];
+
+ /* we cannot purge anonymous zones */
+ if (zone->zone_name)
+ malloc_zone_pressure_relief(zone, 0);
+ }
+ }
+ }
+#endif
+ }
+
+ if (!isolated)
+ thread_release();
+}
+
+/* check if we're using the same allocator as the one that provides
+ * malloc_trim() and mallinfo(). The principle is that on glibc, both
+ * malloc_trim() and mallinfo() are provided, and using mallinfo() we
+ * can check if malloc() is performed through glibc or any other one
+ * the executable was linked against (e.g. jemalloc). Prior to this we
+ * have to check whether we're running on jemalloc by verifying if the
+ * mallctl() function is provided. Its pointer will be used later.
+ */
+static void detect_allocator(void)
+{
+#if defined(__ELF__)
+ extern int mallctl(const char *, void *, size_t *, void *, size_t) __attribute__((weak));
+
+ my_mallctl = mallctl;
+#endif
+
+ if (!my_mallctl) {
+ my_mallctl = get_sym_curr_addr("mallctl");
+ using_default_allocator = (my_mallctl == NULL);
+ }
+
+ if (!my_mallctl) {
+#if defined(HA_HAVE_MALLOC_TRIM)
+#ifdef HA_HAVE_MALLINFO2
+ struct mallinfo2 mi1, mi2;
+#else
+ struct mallinfo mi1, mi2;
+#endif
+ void *ptr;
+
+#ifdef HA_HAVE_MALLINFO2
+ mi1 = mallinfo2();
+#else
+ mi1 = mallinfo();
+#endif
+ ptr = DISGUISE(malloc(1));
+#ifdef HA_HAVE_MALLINFO2
+ mi2 = mallinfo2();
+#else
+ mi2 = mallinfo();
+#endif
+ free(DISGUISE(ptr));
+
+ using_default_allocator = !!memcmp(&mi1, &mi2, sizeof(mi1));
+#elif defined(HA_HAVE_MALLOC_ZONE)
+ using_default_allocator = (malloc_default_zone() != NULL);
+#endif
+ }
+}
+
+static int is_trim_enabled(void)
+{
+ return using_default_allocator;
+}
+
+static int mem_should_fail(const struct pool_head *pool)
+{
+ int ret = 0;
+
+ if (mem_fail_rate > 0 && !(global.mode & MODE_STARTING)) {
+ if (mem_fail_rate > statistical_prng_range(100))
+ ret = 1;
+ else
+ ret = 0;
+ }
+ return ret;
+}
+
+/* Try to find an existing shared pool with the same characteristics and
+ * returns it, otherwise creates this one. NULL is returned if no memory
+ * is available for a new creation. Two flags are supported :
+ * - MEM_F_SHARED to indicate that the pool may be shared with other users
+ * - MEM_F_EXACT to indicate that the size must not be rounded up
+ */
+struct pool_head *create_pool(char *name, unsigned int size, unsigned int flags)
+{
+ unsigned int extra_mark, extra_caller, extra;
+ struct pool_head *pool;
+ struct pool_head *entry;
+ struct list *start;
+ unsigned int align;
+ int thr __maybe_unused;
+
+ /* We need to store a (void *) at the end of the chunks. Since we know
+ * that the malloc() function will never return such a small size,
+ * let's round the size up to something slightly bigger, in order to
+ * ease merging of entries. Note that the rounding is a power of two.
+ * This extra (void *) is not accounted for in the size computation
+ * so that the visible parts outside are not affected.
+ *
+ * Note: for the LRU cache, we need to store 2 doubly-linked lists.
+ */
+
+ extra_mark = (pool_debugging & POOL_DBG_TAG) ? POOL_EXTRA_MARK : 0;
+ extra_caller = (pool_debugging & POOL_DBG_CALLER) ? POOL_EXTRA_CALLER : 0;
+ extra = extra_mark + extra_caller;
+
+ if (!(flags & MEM_F_EXACT)) {
+ align = 4 * sizeof(void *); // 2 lists = 4 pointers min
+ size = ((size + extra + align - 1) & -align) - extra;
+ }
+
+ if (!(pool_debugging & POOL_DBG_NO_CACHE)) {
+ /* we'll store two lists there, we need the room for this. This is
+ * guaranteed by the test above, except if MEM_F_EXACT is set, or if
+ * the only EXTRA part is in fact the one that's stored in the cache
+ * in addition to the pci struct.
+ */
+ if (size + extra - extra_caller < sizeof(struct pool_cache_item))
+ size = sizeof(struct pool_cache_item) + extra_caller - extra;
+ }
+
+ /* TODO: thread: we do not lock pool list for now because all pools are
+ * created during HAProxy startup (so before threads creation) */
+ start = &pools;
+ pool = NULL;
+
+ list_for_each_entry(entry, &pools, list) {
+ if (entry->size == size) {
+ /* either we can share this place and we take it, or
+ * we look for a shareable one or for the next position
+ * before which we will insert a new one.
+ */
+ if ((flags & entry->flags & MEM_F_SHARED) &&
+ (!(pool_debugging & POOL_DBG_DONT_MERGE) ||
+ strcmp(name, entry->name) == 0)) {
+ /* we can share this one */
+ pool = entry;
+ DPRINTF(stderr, "Sharing %s with %s\n", name, pool->name);
+ break;
+ }
+ }
+ else if (entry->size > size) {
+ /* insert before this one */
+ start = &entry->list;
+ break;
+ }
+ }
+
+ if (!pool) {
+ void *pool_addr;
+
+ pool_addr = calloc(1, sizeof(*pool) + __alignof__(*pool));
+ if (!pool_addr)
+ return NULL;
+
+ /* always provide an aligned pool */
+ pool = (struct pool_head*)((((size_t)pool_addr) + __alignof__(*pool)) & -(size_t)__alignof__(*pool));
+ pool->base_addr = pool_addr; // keep it, it's the address to free later
+
+ if (name)
+ strlcpy2(pool->name, name, sizeof(pool->name));
+ pool->alloc_sz = size + extra;
+ pool->size = size;
+ pool->flags = flags;
+ LIST_APPEND(start, &pool->list);
+
+ if (!(pool_debugging & POOL_DBG_NO_CACHE)) {
+ /* update per-thread pool cache if necessary */
+ for (thr = 0; thr < MAX_THREADS; thr++) {
+ LIST_INIT(&pool->cache[thr].list);
+ pool->cache[thr].tid = thr;
+ pool->cache[thr].pool = pool;
+ }
+ }
+ }
+ pool->users++;
+ return pool;
+}
+
+/* Tries to allocate an object for the pool <pool> using the system's allocator
+ * and directly returns it. The pool's allocated counter is checked and updated,
+ * but no other checks are performed.
+ */
+void *pool_get_from_os(struct pool_head *pool)
+{
+ if (!pool->limit || pool->allocated < pool->limit) {
+ void *ptr = pool_alloc_area(pool->alloc_sz);
+ if (ptr) {
+ _HA_ATOMIC_INC(&pool->allocated);
+ return ptr;
+ }
+ _HA_ATOMIC_INC(&pool->failed);
+ }
+ activity[tid].pool_fail++;
+ return NULL;
+
+}
+
+/* Releases a pool item back to the operating system and atomically updates
+ * the allocation counter.
+ */
+void pool_put_to_os(struct pool_head *pool, void *ptr)
+{
+#ifdef DEBUG_UAF
+ /* This object will be released for real in order to detect a use after
+ * free. We also force a write to the area to ensure we crash on double
+ * free or free of a const area.
+ */
+ *(uint32_t *)ptr = 0xDEADADD4;
+#endif /* DEBUG_UAF */
+
+ pool_free_area(ptr, pool->alloc_sz);
+ _HA_ATOMIC_DEC(&pool->allocated);
+}
+
+/* Tries to allocate an object for the pool <pool> using the system's allocator
+ * and directly returns it. The pool's counters are updated but the object is
+ * never cached, so this is usable with and without local or shared caches.
+ */
+void *pool_alloc_nocache(struct pool_head *pool)
+{
+ void *ptr = NULL;
+
+ ptr = pool_get_from_os(pool);
+ if (!ptr)
+ return NULL;
+
+ swrate_add_scaled(&pool->needed_avg, POOL_AVG_SAMPLES, pool->used, POOL_AVG_SAMPLES/4);
+ _HA_ATOMIC_INC(&pool->used);
+
+ /* keep track of where the element was allocated from */
+ POOL_DEBUG_SET_MARK(pool, ptr);
+ POOL_DEBUG_TRACE_CALLER(pool, (struct pool_cache_item *)ptr, NULL);
+ return ptr;
+}
+
+/* Release a pool item back to the OS and keeps the pool's counters up to date.
+ * This is always defined even when pools are not enabled (their usage stats
+ * are maintained).
+ */
+void pool_free_nocache(struct pool_head *pool, void *ptr)
+{
+ _HA_ATOMIC_DEC(&pool->used);
+ swrate_add(&pool->needed_avg, POOL_AVG_SAMPLES, pool->used);
+ pool_put_to_os(pool, ptr);
+}
+
+
+/* Updates <pch>'s fill_pattern and fills the free area after <item> with it,
+ * up to <size> bytes. The item part is left untouched.
+ */
+void pool_fill_pattern(struct pool_cache_head *pch, struct pool_cache_item *item, uint size)
+{
+ ulong *ptr = (ulong *)item;
+ uint ofs;
+ ulong u;
+
+ if (size <= sizeof(*item))
+ return;
+
+ /* Upgrade the fill_pattern to change about half of the bits
+ * (to be sure to catch static flag corruption), and apply it.
+ */
+ u = pch->fill_pattern += ~0UL / 3; // 0x55...55
+ ofs = sizeof(*item) / sizeof(*ptr);
+ while (ofs < size / sizeof(*ptr))
+ ptr[ofs++] = u;
+}
+
+/* check for a pool_cache_item integrity after extracting it from the cache. It
+ * must have been previously initialized using pool_fill_pattern(). If any
+ * corruption is detected, the function provokes an immediate crash.
+ */
+void pool_check_pattern(struct pool_cache_head *pch, struct pool_cache_item *item, uint size)
+{
+ const ulong *ptr = (const ulong *)item;
+ uint ofs;
+ ulong u;
+
+ if (size <= sizeof(*item))
+ return;
+
+ /* let's check that all words past *item are equal */
+ ofs = sizeof(*item) / sizeof(*ptr);
+ u = ptr[ofs++];
+ while (ofs < size / sizeof(*ptr)) {
+ if (unlikely(ptr[ofs] != u))
+ ABORT_NOW();
+ ofs++;
+ }
+}
+
+/* removes up to <count> items from the end of the local pool cache <ph> for
+ * pool <pool>. The shared pool is refilled with these objects in the limit
+ * of the number of acceptable objects, and the rest will be released to the
+ * OS. It is not a problem is <count> is larger than the number of objects in
+ * the local cache. The counters are automatically updated. Must not be used
+ * with pools disabled.
+ */
+static void pool_evict_last_items(struct pool_head *pool, struct pool_cache_head *ph, uint count)
+{
+ struct pool_cache_item *item;
+ struct pool_item *pi, *head = NULL;
+ uint released = 0;
+ uint cluster = 0;
+ uint to_free_max;
+
+ BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
+ /* Note: this will be zero when global pools are disabled */
+ to_free_max = pool_releasable(pool);
+
+ while (released < count && !LIST_ISEMPTY(&ph->list)) {
+ item = LIST_PREV(&ph->list, typeof(item), by_pool);
+ BUG_ON(&item->by_pool == &ph->list);
+ if (unlikely(pool_debugging & POOL_DBG_INTEGRITY))
+ pool_check_pattern(ph, item, pool->size);
+ LIST_DELETE(&item->by_pool);
+ LIST_DELETE(&item->by_lru);
+
+ if (to_free_max > released || cluster) {
+ /* will never match when global pools are disabled */
+ pi = (struct pool_item *)item;
+ pi->next = NULL;
+ pi->down = head;
+ head = pi;
+ cluster++;
+ if (cluster >= CONFIG_HAP_POOL_CLUSTER_SIZE) {
+ /* enough to make a cluster */
+ pool_put_to_shared_cache(pool, head, cluster);
+ cluster = 0;
+ head = NULL;
+ }
+ } else
+ pool_free_nocache(pool, item);
+
+ released++;
+ }
+
+ /* incomplete cluster left */
+ if (cluster)
+ pool_put_to_shared_cache(pool, head, cluster);
+
+ ph->count -= released;
+ pool_cache_count -= released;
+ pool_cache_bytes -= released * pool->size;
+}
+
+/* Evicts some of the oldest objects from one local cache, until its number of
+ * objects is no more than 16+1/8 of the total number of locally cached objects
+ * or the total size of the local cache is no more than 75% of its maximum (i.e.
+ * we don't want a single cache to use all the cache for itself). For this, the
+ * list is scanned in reverse. If <full> is non-null, all objects are evicted.
+ * Must not be used when pools are disabled.
+ */
+void pool_evict_from_local_cache(struct pool_head *pool, int full)
+{
+ struct pool_cache_head *ph = &pool->cache[tid];
+
+ BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
+ while ((ph->count && full) ||
+ (ph->count >= CONFIG_HAP_POOL_CLUSTER_SIZE &&
+ ph->count >= 16 + pool_cache_count / 8 &&
+ pool_cache_bytes > CONFIG_HAP_POOL_CACHE_SIZE * 3 / 4)) {
+ pool_evict_last_items(pool, ph, CONFIG_HAP_POOL_CLUSTER_SIZE);
+ }
+}
+
+/* Evicts some of the oldest objects from the local cache, pushing them to the
+ * global pool. Must not be used when pools are disabled.
+ */
+void pool_evict_from_local_caches()
+{
+ struct pool_cache_item *item;
+ struct pool_cache_head *ph;
+ struct pool_head *pool;
+
+ BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
+ do {
+ item = LIST_PREV(&th_ctx->pool_lru_head, struct pool_cache_item *, by_lru);
+ BUG_ON(&item->by_lru == &th_ctx->pool_lru_head);
+ /* note: by definition we remove oldest objects so they also are the
+ * oldest in their own pools, thus their next is the pool's head.
+ */
+ ph = LIST_NEXT(&item->by_pool, struct pool_cache_head *, list);
+ BUG_ON(ph->tid != tid);
+
+ pool = container_of(ph - tid, struct pool_head, cache);
+ BUG_ON(pool != ph->pool);
+
+ pool_evict_last_items(pool, ph, CONFIG_HAP_POOL_CLUSTER_SIZE);
+ } while (pool_cache_bytes > CONFIG_HAP_POOL_CACHE_SIZE * 7 / 8);
+}
+
+/* Frees an object to the local cache, possibly pushing oldest objects to the
+ * shared cache, which itself may decide to release some of them to the OS.
+ * While it is unspecified what the object becomes past this point, it is
+ * guaranteed to be released from the users' perpective. A caller address may
+ * be passed and stored into the area when DEBUG_POOL_TRACING is set. Must not
+ * be used with pools disabled.
+ */
+void pool_put_to_cache(struct pool_head *pool, void *ptr, const void *caller)
+{
+ struct pool_cache_item *item = (struct pool_cache_item *)ptr;
+ struct pool_cache_head *ph = &pool->cache[tid];
+
+ BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
+ LIST_INSERT(&ph->list, &item->by_pool);
+ LIST_INSERT(&th_ctx->pool_lru_head, &item->by_lru);
+ POOL_DEBUG_TRACE_CALLER(pool, item, caller);
+ ph->count++;
+ if (unlikely(pool_debugging & POOL_DBG_INTEGRITY))
+ pool_fill_pattern(ph, item, pool->size);
+ pool_cache_count++;
+ pool_cache_bytes += pool->size;
+
+ if (unlikely(pool_cache_bytes > CONFIG_HAP_POOL_CACHE_SIZE * 3 / 4)) {
+ if (ph->count >= 16 + pool_cache_count / 8 + CONFIG_HAP_POOL_CLUSTER_SIZE)
+ pool_evict_from_local_cache(pool, 0);
+ if (pool_cache_bytes > CONFIG_HAP_POOL_CACHE_SIZE)
+ pool_evict_from_local_caches();
+ }
+}
+
+/* Tries to refill the local cache <pch> from the shared one for pool <pool>.
+ * This is only used when pools are in use and shared pools are enabled. No
+ * malloc() is attempted, and poisonning is never performed. The purpose is to
+ * get the fastest possible refilling so that the caller can easily check if
+ * the cache has enough objects for its use. Must not be used when pools are
+ * disabled.
+ */
+void pool_refill_local_from_shared(struct pool_head *pool, struct pool_cache_head *pch)
+{
+ struct pool_cache_item *item;
+ struct pool_item *ret, *down;
+ uint count;
+
+ BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
+ /* we'll need to reference the first element to figure the next one. We
+ * must temporarily lock it so that nobody allocates then releases it,
+ * or the dereference could fail.
+ */
+ ret = _HA_ATOMIC_LOAD(&pool->free_list);
+ do {
+ while (unlikely(ret == POOL_BUSY)) {
+ __ha_cpu_relax();
+ ret = _HA_ATOMIC_LOAD(&pool->free_list);
+ }
+ if (ret == NULL)
+ return;
+ } while (unlikely((ret = _HA_ATOMIC_XCHG(&pool->free_list, POOL_BUSY)) == POOL_BUSY));
+
+ if (unlikely(ret == NULL)) {
+ HA_ATOMIC_STORE(&pool->free_list, NULL);
+ return;
+ }
+
+ /* this releases the lock */
+ HA_ATOMIC_STORE(&pool->free_list, ret->next);
+
+ /* now store the retrieved object(s) into the local cache */
+ count = 0;
+ for (; ret; ret = down) {
+ down = ret->down;
+ item = (struct pool_cache_item *)ret;
+ POOL_DEBUG_TRACE_CALLER(pool, item, NULL);
+ LIST_INSERT(&pch->list, &item->by_pool);
+ LIST_INSERT(&th_ctx->pool_lru_head, &item->by_lru);
+ count++;
+ if (unlikely(pool_debugging & POOL_DBG_INTEGRITY))
+ pool_fill_pattern(pch, item, pool->size);
+ }
+ HA_ATOMIC_ADD(&pool->used, count);
+ pch->count += count;
+ pool_cache_count += count;
+ pool_cache_bytes += count * pool->size;
+}
+
+/* Adds pool item cluster <item> to the shared cache, which contains <count>
+ * elements. The caller is advised to first check using pool_releasable() if
+ * it's wise to add this series of objects there. Both the pool and the item's
+ * head must be valid.
+ */
+void pool_put_to_shared_cache(struct pool_head *pool, struct pool_item *item, uint count)
+{
+ struct pool_item *free_list;
+
+ _HA_ATOMIC_SUB(&pool->used, count);
+ free_list = _HA_ATOMIC_LOAD(&pool->free_list);
+ do {
+ while (unlikely(free_list == POOL_BUSY)) {
+ __ha_cpu_relax();
+ free_list = _HA_ATOMIC_LOAD(&pool->free_list);
+ }
+ _HA_ATOMIC_STORE(&item->next, free_list);
+ __ha_barrier_atomic_store();
+ } while (!_HA_ATOMIC_CAS(&pool->free_list, &free_list, item));
+ __ha_barrier_atomic_store();
+ swrate_add(&pool->needed_avg, POOL_AVG_SAMPLES, pool->used);
+}
+
+/*
+ * This function frees whatever can be freed in pool <pool>.
+ */
+void pool_flush(struct pool_head *pool)
+{
+ struct pool_item *next, *temp, *down;
+
+ if (!pool || (pool_debugging & (POOL_DBG_NO_CACHE|POOL_DBG_NO_GLOBAL)))
+ return;
+
+ /* The loop below atomically detaches the head of the free list and
+ * replaces it with a NULL. Then the list can be released.
+ */
+ next = pool->free_list;
+ do {
+ while (unlikely(next == POOL_BUSY)) {
+ __ha_cpu_relax();
+ next = _HA_ATOMIC_LOAD(&pool->free_list);
+ }
+ if (next == NULL)
+ return;
+ } while (unlikely((next = _HA_ATOMIC_XCHG(&pool->free_list, POOL_BUSY)) == POOL_BUSY));
+ _HA_ATOMIC_STORE(&pool->free_list, NULL);
+ __ha_barrier_atomic_store();
+
+ while (next) {
+ temp = next;
+ next = temp->next;
+ for (; temp; temp = down) {
+ down = temp->down;
+ pool_put_to_os(pool, temp);
+ }
+ }
+ /* here, we should have pool->allocated == pool->used */
+}
+
+/*
+ * This function frees whatever can be freed in all pools, but respecting
+ * the minimum thresholds imposed by owners. It makes sure to be alone to
+ * run by using thread_isolate(). <pool_ctx> is unused.
+ */
+void pool_gc(struct pool_head *pool_ctx)
+{
+ struct pool_head *entry;
+ int isolated = thread_isolated();
+
+ if (!isolated)
+ thread_isolate();
+
+ list_for_each_entry(entry, &pools, list) {
+ struct pool_item *temp, *down;
+
+ while (entry->free_list &&
+ (int)(entry->allocated - entry->used) > (int)entry->minavail) {
+ temp = entry->free_list;
+ entry->free_list = temp->next;
+ for (; temp; temp = down) {
+ down = temp->down;
+ pool_put_to_os(entry, temp);
+ }
+ }
+ }
+
+ trim_all_pools();
+
+ if (!isolated)
+ thread_release();
+}
+
+/*
+ * Returns a pointer to type <type> taken from the pool <pool_type> or
+ * dynamically allocated. In the first case, <pool_type> is updated to point to
+ * the next element in the list. <flags> is a binary-OR of POOL_F_* flags.
+ * Prefer using pool_alloc() which does the right thing without flags.
+ */
+void *__pool_alloc(struct pool_head *pool, unsigned int flags)
+{
+ void *p = NULL;
+ void *caller = __builtin_return_address(0);
+
+ if (unlikely(pool_debugging & POOL_DBG_FAIL_ALLOC))
+ if (!(flags & POOL_F_NO_FAIL) && mem_should_fail(pool))
+ return NULL;
+
+ if (likely(!(pool_debugging & POOL_DBG_NO_CACHE)) && !p)
+ p = pool_get_from_cache(pool, caller);
+
+ if (unlikely(!p))
+ p = pool_alloc_nocache(pool);
+
+ if (likely(p)) {
+ if (unlikely(flags & POOL_F_MUST_ZERO))
+ memset(p, 0, pool->size);
+ else if (unlikely(!(flags & POOL_F_NO_POISON) && (pool_debugging & POOL_DBG_POISON)))
+ memset(p, mem_poison_byte, pool->size);
+ }
+ return p;
+}
+
+/*
+ * Puts a memory area back to the corresponding pool. <ptr> be valid. Using
+ * pool_free() is preferred.
+ */
+void __pool_free(struct pool_head *pool, void *ptr)
+{
+ const void *caller = __builtin_return_address(0);
+
+ /* we'll get late corruption if we refill to the wrong pool or double-free */
+ POOL_DEBUG_CHECK_MARK(pool, ptr);
+ POOL_DEBUG_RESET_MARK(pool, ptr);
+
+ if (unlikely(pool_debugging & POOL_DBG_NO_CACHE)) {
+ pool_free_nocache(pool, ptr);
+ return;
+ }
+
+ pool_put_to_cache(pool, ptr, caller);
+}
+
+
+#ifdef DEBUG_UAF
+
+/************* use-after-free allocator *************/
+
+/* allocates an area of size <size> and returns it. The semantics are similar
+ * to those of malloc(). However the allocation is rounded up to 4kB so that a
+ * full page is allocated. This ensures the object can be freed alone so that
+ * future dereferences are easily detected. The returned object is always
+ * 16-bytes aligned to avoid issues with unaligned structure objects. In case
+ * some padding is added, the area's start address is copied at the end of the
+ * padding to help detect underflows.
+ */
+void *pool_alloc_area_uaf(size_t size)
+{
+ size_t pad = (4096 - size) & 0xFF0;
+ void *ret;
+
+ ret = mmap(NULL, (size + 4095) & -4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (ret != MAP_FAILED) {
+ /* let's dereference the page before returning so that the real
+ * allocation in the system is performed without holding the lock.
+ */
+ *(int *)ret = 0;
+ if (pad >= sizeof(void *))
+ *(void **)(ret + pad - sizeof(void *)) = ret + pad;
+ ret += pad;
+ } else {
+ ret = NULL;
+ }
+ return ret;
+}
+
+/* frees an area <area> of size <size> allocated by pool_alloc_area(). The
+ * semantics are identical to free() except that the size must absolutely match
+ * the one passed to pool_alloc_area(). In case some padding is added, the
+ * area's start address is compared to the one at the end of the padding, and
+ * a segfault is triggered if they don't match, indicating an underflow.
+ */
+void pool_free_area_uaf(void *area, size_t size)
+{
+ size_t pad = (4096 - size) & 0xFF0;
+
+ if (pad >= sizeof(void *) && *(void **)(area - sizeof(void *)) != area)
+ ABORT_NOW();
+
+ munmap(area - pad, (size + 4095) & -4096);
+}
+
+#endif /* DEBUG_UAF */
+
+/*
+ * This function destroys a pool by freeing it completely, unless it's still
+ * in use. This should be called only under extreme circumstances. It always
+ * returns NULL if the resulting pool is empty, easing the clearing of the old
+ * pointer, otherwise it returns the pool.
+ * .
+ */
+void *pool_destroy(struct pool_head *pool)
+{
+ if (pool) {
+ if (!(pool_debugging & POOL_DBG_NO_CACHE))
+ pool_evict_from_local_cache(pool, 1);
+
+ pool_flush(pool);
+ if (pool->used)
+ return pool;
+ pool->users--;
+ if (!pool->users) {
+ LIST_DELETE(&pool->list);
+ /* note that if used == 0, the cache is empty */
+ free(pool->base_addr);
+ }
+ }
+ return NULL;
+}
+
+/* This destroys all pools on exit. It is *not* thread safe. */
+void pool_destroy_all()
+{
+ struct pool_head *entry, *back;
+
+ list_for_each_entry_safe(entry, back, &pools, list) {
+ /* there's only one occurrence of each pool in the list,
+ * and we're existing instead of looping on the whole
+ * list just to decrement users, force it to 1 here.
+ */
+ entry->users = 1;
+ pool_destroy(entry);
+ }
+}
+
+/* This function dumps memory usage information into the trash buffer. */
+void dump_pools_to_trash()
+{
+ struct pool_head *entry;
+ unsigned long long allocated, used;
+ int nbpools;
+ unsigned long long cached_bytes = 0;
+ uint cached = 0;
+
+ allocated = used = nbpools = 0;
+ chunk_printf(&trash, "Dumping pools usage. Use SIGQUIT to flush them.\n");
+ list_for_each_entry(entry, &pools, list) {
+ if (!(pool_debugging & POOL_DBG_NO_CACHE)) {
+ int i;
+ for (cached = i = 0; i < global.nbthread; i++)
+ cached += entry->cache[i].count;
+ cached_bytes += cached * (ullong)entry->size;
+ }
+ chunk_appendf(&trash, " - Pool %s (%u bytes) : %u allocated (%llu bytes), %u used"
+ " (~%u by thread caches)"
+ ", needed_avg %u, %u failures, %u users, @%p%s\n",
+ entry->name, entry->size, entry->allocated,
+ (ullong)entry->size * entry->allocated, entry->used,
+ cached,
+ swrate_avg(entry->needed_avg, POOL_AVG_SAMPLES), entry->failed,
+ entry->users, entry,
+ (entry->flags & MEM_F_SHARED) ? " [SHARED]" : "");
+
+ allocated += entry->allocated * (ullong)entry->size;
+ used += entry->used * (ullong)entry->size;
+ nbpools++;
+ }
+ chunk_appendf(&trash, "Total: %d pools, %llu bytes allocated, %llu used"
+ " (~%llu by thread caches)"
+ ".\n",
+ nbpools, allocated, used, cached_bytes
+ );
+}
+
+/* Dump statistics on pools usage. */
+void dump_pools(void)
+{
+ dump_pools_to_trash();
+ qfprintf(stderr, "%s", trash.area);
+}
+
+/* This function returns the total number of failed pool allocations */
+int pool_total_failures()
+{
+ struct pool_head *entry;
+ int failed = 0;
+
+ list_for_each_entry(entry, &pools, list)
+ failed += entry->failed;
+ return failed;
+}
+
+/* This function returns the total amount of memory allocated in pools (in bytes) */
+unsigned long long pool_total_allocated()
+{
+ struct pool_head *entry;
+ unsigned long long allocated = 0;
+
+ list_for_each_entry(entry, &pools, list)
+ allocated += entry->allocated * (ullong)entry->size;
+ return allocated;
+}
+
+/* This function returns the total amount of memory used in pools (in bytes) */
+unsigned long long pool_total_used()
+{
+ struct pool_head *entry;
+ unsigned long long used = 0;
+
+ list_for_each_entry(entry, &pools, list)
+ used += entry->used * (ullong)entry->size;
+ return used;
+}
+
+/* This function parses a string made of a set of debugging features as
+ * specified after -dM on the command line, and will set pool_debugging
+ * accordingly. On success it returns a strictly positive value. It may zero
+ * with the first warning in <err>, -1 with a help message in <err>, or -2 with
+ * the first error in <err> return the first error in <err>. <err> is undefined
+ * on success, and will be non-null and locally allocated on help/error/warning.
+ * The caller must free it. Warnings are used to report features that were not
+ * enabled at build time, and errors are used to report unknown features.
+ */
+int pool_parse_debugging(const char *str, char **err)
+{
+ struct ist args;
+ char *end;
+ uint new_dbg;
+ int v;
+
+
+ /* if it's empty or starts with a number, it's the mem poisonning byte */
+ v = strtol(str, &end, 0);
+ if (!*end || *end == ',') {
+ mem_poison_byte = *str ? v : 'P';
+ if (mem_poison_byte >= 0)
+ pool_debugging |= POOL_DBG_POISON;
+ else
+ pool_debugging &= ~POOL_DBG_POISON;
+ str = end;
+ }
+
+ new_dbg = pool_debugging;
+
+ for (args = ist(str); istlen(args); args = istadv(istfind(args, ','), 1)) {
+ struct ist feat = iststop(args, ',');
+
+ if (!istlen(feat))
+ continue;
+
+ if (isteq(feat, ist("help"))) {
+ ha_free(err);
+ memprintf(err,
+ "-dM alone enables memory poisonning with byte 0x50 on allocation. A numeric\n"
+ "value may be appended immediately after -dM to use another value (0 supported).\n"
+ "Then an optional list of comma-delimited keywords may be appended to set or\n"
+ "clear some debugging options ('*' marks the current setting):\n\n"
+ " set clear description\n"
+ " -----------------+-----------------+-----------------------------------------\n");
+
+ for (v = 0; dbg_options[v].flg; v++) {
+ memprintf(err, "%s %c %-15s|%c %-15s| %s\n",
+ *err,
+ (pool_debugging & dbg_options[v].flg) ? '*' : ' ',
+ dbg_options[v].set,
+ (pool_debugging & dbg_options[v].flg) ? ' ' : '*',
+ dbg_options[v].clr,
+ dbg_options[v].hlp);
+ }
+ return -1;
+ }
+
+ for (v = 0; dbg_options[v].flg; v++) {
+ if (isteq(feat, ist(dbg_options[v].set))) {
+ new_dbg |= dbg_options[v].flg;
+ break;
+ }
+ else if (isteq(feat, ist(dbg_options[v].clr))) {
+ new_dbg &= ~dbg_options[v].flg;
+ break;
+ }
+ }
+
+ if (!dbg_options[v].flg) {
+ memprintf(err, "unknown pool debugging feature <%.*s>", (int)istlen(feat), istptr(feat));
+ return -2;
+ }
+ }
+
+ pool_debugging = new_dbg;
+ return 1;
+}
+
+/* This function dumps memory usage information onto the stream connector's
+ * read buffer. It returns 0 as long as it does not complete, non-zero upon
+ * completion. No state is used.
+ */
+static int cli_io_handler_dump_pools(struct appctx *appctx)
+{
+ dump_pools_to_trash();
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+ return 1;
+}
+
+/* callback used to create early pool <name> of size <size> and store the
+ * resulting pointer into <ptr>. If the allocation fails, it quits with after
+ * emitting an error message.
+ */
+void create_pool_callback(struct pool_head **ptr, char *name, unsigned int size)
+{
+ *ptr = create_pool(name, size, MEM_F_SHARED);
+ if (!*ptr) {
+ ha_alert("Failed to allocate pool '%s' of size %u : %s. Aborting.\n",
+ name, size, strerror(errno));
+ exit(1);
+ }
+}
+
+/* Initializes all per-thread arrays on startup */
+static void init_pools()
+{
+ int thr;
+
+ for (thr = 0; thr < MAX_THREADS; thr++) {
+ LIST_INIT(&ha_thread_ctx[thr].pool_lru_head);
+ }
+
+ detect_allocator();
+}
+
+INITCALL0(STG_PREPARE, init_pools);
+
+/* Report in build options if trim is supported */
+static void pools_register_build_options(void)
+{
+ if (is_trim_enabled()) {
+ char *ptr = NULL;
+ memprintf(&ptr, "Support for malloc_trim() is enabled.");
+ hap_register_build_opts(ptr, 1);
+ }
+}
+INITCALL0(STG_REGISTER, pools_register_build_options);
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "show", "pools", NULL }, "show pools : report information about the memory pools usage", NULL, cli_io_handler_dump_pools },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+
+/* config parser for global "tune.fail-alloc" */
+static int mem_parse_global_fail_alloc(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+ mem_fail_rate = atoi(args[1]);
+ if (mem_fail_rate < 0 || mem_fail_rate > 100) {
+ memprintf(err, "'%s' expects a numeric value between 0 and 100.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config parser for global "no-memory-trimming" */
+static int mem_parse_global_no_mem_trim(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(0, args, err, NULL))
+ return -1;
+ disable_trim = 1;
+ return 0;
+}
+
+/* register global config keywords */
+static struct cfg_kw_list mem_cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.fail-alloc", mem_parse_global_fail_alloc },
+ { CFG_GLOBAL, "no-memory-trimming", mem_parse_global_no_mem_trim },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &mem_cfg_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/proto_quic.c b/src/proto_quic.c
new file mode 100644
index 0000000..d4a5a13
--- /dev/null
+++ b/src/proto_quic.c
@@ -0,0 +1,719 @@
+/*
+ * AF_INET/AF_INET6 QUIC protocol layer.
+ *
+ * Copyright 2020 Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <netinet/udp.h>
+#include <netinet/in.h>
+
+#include <import/ebtree-t.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/cbuf.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/namespace.h>
+#include <haproxy/port_range.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proto_quic.h>
+#include <haproxy/proto_udp.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_sock.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_inet.h>
+#include <haproxy/task.h>
+#include <haproxy/tools.h>
+
+/* per-thread quic datagram handlers */
+struct quic_dghdlr *quic_dghdlrs;
+
+/* Size of the internal buffer of QUIC RX buffer at the fd level */
+#define QUIC_RX_BUFSZ (1UL << 18)
+
+DECLARE_STATIC_POOL(pool_head_quic_rxbuf, "quic_rxbuf", QUIC_RX_BUFSZ);
+
+static void quic_add_listener(struct protocol *proto, struct listener *listener);
+static int quic_bind_listener(struct listener *listener, char *errmsg, int errlen);
+static int quic_connect_server(struct connection *conn, int flags);
+static void quic_enable_listener(struct listener *listener);
+static void quic_disable_listener(struct listener *listener);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_quic4 = {
+ .name = "quic4",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_STREAM,
+ .listen = quic_bind_listener,
+ .enable = quic_enable_listener,
+ .disable = quic_disable_listener,
+ .add = quic_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .resume = default_resume_listener,
+ .accept_conn = quic_sock_accept_conn,
+ .get_src = quic_sock_get_src,
+ .get_dst = quic_sock_get_dst,
+ .connect = quic_connect_server,
+
+ /* binding layer */
+ .rx_suspend = udp_suspend_receiver,
+ .rx_resume = udp_resume_receiver,
+
+ /* address family */
+ .fam = &proto_fam_inet4,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_DGRAM,
+ .sock_type = SOCK_DGRAM,
+ .sock_prot = IPPROTO_UDP,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .rx_listening = quic_sock_accepting_conn,
+ .default_iocb = quic_sock_fd_iocb,
+ .receivers = LIST_HEAD_INIT(proto_quic4.receivers),
+ .nb_receivers = 0,
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_quic4);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_quic6 = {
+ .name = "quic6",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_STREAM,
+ .listen = quic_bind_listener,
+ .enable = quic_enable_listener,
+ .disable = quic_disable_listener,
+ .add = quic_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .resume = default_resume_listener,
+ .accept_conn = quic_sock_accept_conn,
+ .get_src = quic_sock_get_src,
+ .get_dst = quic_sock_get_dst,
+ .connect = quic_connect_server,
+
+ /* binding layer */
+ .rx_suspend = udp_suspend_receiver,
+ .rx_resume = udp_resume_receiver,
+
+ /* address family */
+ .fam = &proto_fam_inet6,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_DGRAM,
+ .sock_type = SOCK_DGRAM,
+ .sock_prot = IPPROTO_UDP,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .rx_listening = quic_sock_accepting_conn,
+ .default_iocb = quic_sock_fd_iocb,
+ .receivers = LIST_HEAD_INIT(proto_quic6.receivers),
+ .nb_receivers = 0,
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_quic6);
+
+/* Binds ipv4/ipv6 address <local> to socket <fd>, unless <flags> is set, in which
+ * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
+ * - 0 : ignore remote address (may even be a NULL pointer)
+ * - 1 : use provided address
+ * - 2 : use provided port
+ * - 3 : use both
+ *
+ * The function supports multiple foreign binding methods :
+ * - linux_tproxy: we directly bind to the foreign address
+ * The second one can be used as a fallback for the first one.
+ * This function returns 0 when everything's OK, 1 if it could not bind, to the
+ * local address, 2 if it could not bind to the foreign address.
+ */
+int quic_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct sockaddr_storage *remote)
+{
+ struct sockaddr_storage bind_addr;
+ int foreign_ok = 0;
+ int ret;
+ static THREAD_LOCAL int ip_transp_working = 1;
+ static THREAD_LOCAL int ip6_transp_working = 1;
+
+ switch (local->ss_family) {
+ case AF_INET:
+ if (flags && ip_transp_working) {
+ /* This deserves some explanation. Some platforms will support
+ * multiple combinations of certain methods, so we try the
+ * supported ones until one succeeds.
+ */
+ if (sock_inet4_make_foreign(fd))
+ foreign_ok = 1;
+ else
+ ip_transp_working = 0;
+ }
+ break;
+ case AF_INET6:
+ if (flags && ip6_transp_working) {
+ if (sock_inet6_make_foreign(fd))
+ foreign_ok = 1;
+ else
+ ip6_transp_working = 0;
+ }
+ break;
+ }
+
+ if (flags) {
+ memset(&bind_addr, 0, sizeof(bind_addr));
+ bind_addr.ss_family = remote->ss_family;
+ switch (remote->ss_family) {
+ case AF_INET:
+ if (flags & 1)
+ ((struct sockaddr_in *)&bind_addr)->sin_addr = ((struct sockaddr_in *)remote)->sin_addr;
+ if (flags & 2)
+ ((struct sockaddr_in *)&bind_addr)->sin_port = ((struct sockaddr_in *)remote)->sin_port;
+ break;
+ case AF_INET6:
+ if (flags & 1)
+ ((struct sockaddr_in6 *)&bind_addr)->sin6_addr = ((struct sockaddr_in6 *)remote)->sin6_addr;
+ if (flags & 2)
+ ((struct sockaddr_in6 *)&bind_addr)->sin6_port = ((struct sockaddr_in6 *)remote)->sin6_port;
+ break;
+ default:
+ /* we don't want to try to bind to an unknown address family */
+ foreign_ok = 0;
+ }
+ }
+
+ setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
+ if (foreign_ok) {
+ if (is_inet_addr(&bind_addr)) {
+ ret = bind(fd, (struct sockaddr *)&bind_addr, get_addr_len(&bind_addr));
+ if (ret < 0)
+ return 2;
+ }
+ }
+ else {
+ if (is_inet_addr(local)) {
+ ret = bind(fd, (struct sockaddr *)local, get_addr_len(local));
+ if (ret < 0)
+ return 1;
+ }
+ }
+
+ if (!flags)
+ return 0;
+
+ if (!foreign_ok)
+ /* we could not bind to a foreign address */
+ return 2;
+
+ return 0;
+}
+
+/*
+ * This function initiates a QUIC connection establishment to the target assigned
+ * to connection <conn> using (si->{target,dst}). A source address may be
+ * pointed to by conn->src in case of transparent proxying. Normal source
+ * bind addresses are still determined locally (due to the possible need of a
+ * source port). conn->target may point either to a valid server or to a backend,
+ * depending on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are
+ * supported. The <data> parameter is a boolean indicating whether there are data
+ * waiting for being sent or not, in order to adjust data write polling and on
+ * some platforms, the ability to avoid an empty initial ACK. The <flags> argument
+ * is not used.
+ *
+ * Note that a pending send_proxy message accounts for data.
+ *
+ * It can return one of :
+ * - SF_ERR_NONE if everything's OK
+ * - SF_ERR_SRVTO if there are no more servers
+ * - SF_ERR_SRVCL if the connection was refused by the server
+ * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
+ * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
+ * - SF_ERR_INTERNAL for any other purely internal errors
+ * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
+ *
+ * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
+ * it's invalid and the caller has nothing to do.
+ */
+
+int quic_connect_server(struct connection *conn, int flags)
+{
+ int fd;
+ struct server *srv;
+ struct proxy *be;
+ struct conn_src *src;
+ struct sockaddr_storage *addr;
+
+ BUG_ON(!conn->dst);
+
+ conn->flags |= CO_FL_WAIT_L4_CONN; /* connection in progress */
+
+ switch (obj_type(conn->target)) {
+ case OBJ_TYPE_PROXY:
+ be = __objt_proxy(conn->target);
+ srv = NULL;
+ break;
+ case OBJ_TYPE_SERVER:
+ srv = __objt_server(conn->target);
+ be = srv->proxy;
+ break;
+ default:
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ fd = conn->handle.fd = sock_create_server_socket(conn);
+
+ if (fd == -1) {
+ qfprintf(stderr, "Cannot get a server socket.\n");
+
+ if (errno == ENFILE) {
+ conn->err_code = CO_ER_SYS_FDLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == EMFILE) {
+ conn->err_code = CO_ER_PROC_FDLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == ENOBUFS || errno == ENOMEM) {
+ conn->err_code = CO_ER_SYS_MEMLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
+ conn->err_code = CO_ER_NOPROTO;
+ }
+ else
+ conn->err_code = CO_ER_SOCK_ERR;
+
+ /* this is a resource error */
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ }
+
+ if (fd >= global.maxsock) {
+ /* do not log anything there, it's a normal condition when this option
+ * is used to serialize connections to a server !
+ */
+ ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
+ close(fd);
+ conn->err_code = CO_ER_CONF_FDLIM;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_PRXCOND; /* it is a configuration limit */
+ }
+
+ if (fd_set_nonblock(fd) == -1) {
+ qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (master == 1 && fd_set_cloexec(fd) == -1) {
+ ha_alert("Cannot set CLOEXEC on client socket.\n");
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ /* allow specific binding :
+ * - server-specific at first
+ * - proxy-specific next
+ */
+ if (srv && srv->conn_src.opts & CO_SRC_BIND)
+ src = &srv->conn_src;
+ else if (be->conn_src.opts & CO_SRC_BIND)
+ src = &be->conn_src;
+ else
+ src = NULL;
+
+ if (src) {
+ int ret, flags = 0;
+
+ if (conn->src && is_inet_addr(conn->src)) {
+ switch (src->opts & CO_SRC_TPROXY_MASK) {
+ case CO_SRC_TPROXY_CLI:
+ conn_set_private(conn);
+ /* fall through */
+ case CO_SRC_TPROXY_ADDR:
+ flags = 3;
+ break;
+ case CO_SRC_TPROXY_CIP:
+ case CO_SRC_TPROXY_DYN:
+ conn_set_private(conn);
+ flags = 1;
+ break;
+ }
+ }
+
+#ifdef SO_BINDTODEVICE
+ /* Note: this might fail if not CAP_NET_RAW */
+ if (src->iface_name)
+ setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, src->iface_name, src->iface_len + 1);
+#endif
+
+ if (src->sport_range) {
+ int attempts = 10; /* should be more than enough to find a spare port */
+ struct sockaddr_storage sa;
+
+ ret = 1;
+ memcpy(&sa, &src->source_addr, sizeof(sa));
+
+ do {
+ /* note: in case of retry, we may have to release a previously
+ * allocated port, hence this loop's construct.
+ */
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+
+ if (!attempts)
+ break;
+ attempts--;
+
+ fdinfo[fd].local_port = port_range_alloc_port(src->sport_range);
+ if (!fdinfo[fd].local_port) {
+ conn->err_code = CO_ER_PORT_RANGE;
+ break;
+ }
+
+ fdinfo[fd].port_range = src->sport_range;
+ set_host_port(&sa, fdinfo[fd].local_port);
+
+ ret = quic_bind_socket(fd, flags, &sa, conn->src);
+ if (ret != 0)
+ conn->err_code = CO_ER_CANT_BIND;
+ } while (ret != 0); /* binding NOK */
+ }
+ else {
+#ifdef IP_BIND_ADDRESS_NO_PORT
+ static THREAD_LOCAL int bind_address_no_port = 1;
+ setsockopt(fd, IPPROTO_IP, IP_BIND_ADDRESS_NO_PORT, (const void *) &bind_address_no_port, sizeof(int));
+#endif
+ ret = quic_bind_socket(fd, flags, &src->source_addr, conn->src);
+ if (ret != 0)
+ conn->err_code = CO_ER_CANT_BIND;
+ }
+
+ if (unlikely(ret != 0)) {
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+
+ if (ret == 1) {
+ ha_alert("Cannot bind to source address before connect() for backend %s. Aborting.\n",
+ be->id);
+ send_log(be, LOG_EMERG,
+ "Cannot bind to source address before connect() for backend %s.\n",
+ be->id);
+ } else {
+ ha_alert("Cannot bind to tproxy source address before connect() for backend %s. Aborting.\n",
+ be->id);
+ send_log(be, LOG_EMERG,
+ "Cannot bind to tproxy source address before connect() for backend %s.\n",
+ be->id);
+ }
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ }
+ }
+
+ if (global.tune.server_sndbuf)
+ setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
+
+ if (global.tune.server_rcvbuf)
+ setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
+
+ addr = (conn->flags & CO_FL_SOCKS4) ? &srv->socks4_addr : conn->dst;
+ if (connect(fd, (const struct sockaddr *)addr, get_addr_len(addr)) == -1) {
+ if (errno == EINPROGRESS || errno == EALREADY) {
+ /* common case, let's wait for connect status */
+ conn->flags |= CO_FL_WAIT_L4_CONN;
+ }
+ else if (errno == EISCONN) {
+ /* should normally not happen but if so, indicates that it's OK */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+ else if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
+ char *msg;
+ if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EADDRNOTAVAIL) {
+ msg = "no free ports";
+ conn->err_code = CO_ER_FREE_PORTS;
+ }
+ else {
+ msg = "local address already in use";
+ conn->err_code = CO_ER_ADDR_INUSE;
+ }
+
+ qfprintf(stderr,"Connect() failed for backend %s: %s.\n", be->id, msg);
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+ send_log(be, LOG_ERR, "Connect() failed for backend %s: %s.\n", be->id, msg);
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ } else if (errno == ETIMEDOUT) {
+ //qfprintf(stderr,"Connect(): ETIMEDOUT");
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_SRVTO;
+ } else {
+ // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
+ //qfprintf(stderr,"Connect(): %d", errno);
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_SRVCL;
+ }
+ }
+ else {
+ /* connect() == 0, this is great! */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+
+ conn_ctrl_init(conn); /* registers the FD */
+ HA_ATOMIC_OR(&fdtab[fd].state, FD_LINGER_RISK); /* close hard if needed */
+
+ if (conn->flags & CO_FL_WAIT_L4_CONN) {
+ fd_want_send(fd);
+ fd_cant_send(fd);
+ fd_cant_recv(fd);
+ }
+
+ return SF_ERR_NONE; /* connection is OK */
+}
+
+/* Add listener <listener> to protocol <proto>. Technically speaking we just
+ * initialize a few entries which should be doable during quic_bind_listener().
+ * The end of the initialization goes on with the default function.
+ */
+static void quic_add_listener(struct protocol *proto, struct listener *listener)
+{
+ listener->flags |= LI_F_QUIC_LISTENER;
+ listener->rx.flags |= RX_F_LOCAL_ACCEPT;
+
+ default_add_listener(proto, listener);
+}
+
+/* Allocate the RX buffers for <l> listener.
+ * Return 1 if succeeded, 0 if not.
+ */
+static int quic_alloc_rxbufs_listener(struct listener *l)
+{
+ int i;
+ struct quic_receiver_buf *tmp;
+
+ MT_LIST_INIT(&l->rx.rxbuf_list);
+ for (i = 0; i < global.nbthread; i++) {
+ struct quic_receiver_buf *rxbuf;
+ char *buf;
+
+ rxbuf = calloc(1, sizeof(*rxbuf));
+ if (!rxbuf)
+ goto err;
+
+ buf = pool_alloc(pool_head_quic_rxbuf);
+ if (!buf) {
+ free(rxbuf);
+ goto err;
+ }
+
+ rxbuf->buf = b_make(buf, QUIC_RX_BUFSZ, 0, 0);
+ LIST_INIT(&rxbuf->dgram_list);
+ MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->rxbuf_el);
+ }
+
+ return 1;
+
+ err:
+ while ((tmp = MT_LIST_POP(&l->rx.rxbuf_list, typeof(tmp), rxbuf_el))) {
+ pool_free(pool_head_quic_rxbuf, tmp->buf.area);
+ free(tmp);
+ }
+ return 0;
+}
+
+/* This function tries to bind a QUIC4/6 listener. It may return a warning or
+ * an error message in <errmsg> if the message is at most <errlen> bytes long
+ * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
+ * The return value is composed from ERR_ABORT, ERR_WARN,
+ * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
+ * was alright and that no message was returned. ERR_RETRYABLE means that an
+ * error occurred but that it may vanish after a retry (eg: port in use), and
+ * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
+ * the meaning of the error, but just indicate that a message is present which
+ * should be displayed with the respective level. Last, ERR_ABORT indicates
+ * that it's pointless to try to start other listeners. No error message is
+ * returned if errlen is NULL.
+ */
+static int quic_bind_listener(struct listener *listener, char *errmsg, int errlen)
+{
+ const struct sockaddr_storage addr = listener->rx.addr;
+ int fd, err = ERR_NONE;
+ char *msg = NULL;
+
+ /* ensure we never return garbage */
+ if (errlen)
+ *errmsg = 0;
+
+ if (listener->state != LI_ASSIGNED)
+ return ERR_NONE; /* already bound */
+
+ if (!(listener->rx.flags & RX_F_BOUND)) {
+ msg = "receiving socket not bound";
+ goto udp_return;
+ }
+
+ /* Set IP_PKTINFO to retrieve destination address on recv. */
+ fd = listener->rx.fd;
+ switch (addr.ss_family) {
+ case AF_INET:
+#if defined(IP_PKTINFO)
+ setsockopt(fd, IPPROTO_IP, IP_PKTINFO, &one, sizeof(one));
+#elif defined(IP_RECVDSTADDR)
+ setsockopt(fd, IPPROTO_IP, IP_RECVDSTADDR, &one, sizeof(one));
+#endif /* IP_PKTINFO || IP_RECVDSTADDR */
+ break;
+ case AF_INET6:
+#ifdef IPV6_RECVPKTINFO
+ setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one));
+#endif
+ break;
+ default:
+ break;
+ }
+
+ if (!quic_alloc_rxbufs_listener(listener)) {
+ msg = "could not initialize tx/rx rings";
+ err |= ERR_WARN;
+ goto udp_return;
+ }
+
+ listener_set_state(listener, LI_LISTEN);
+
+ udp_return:
+ if (msg && errlen) {
+ char pn[INET6_ADDRSTRLEN];
+
+ addr_to_str(&listener->rx.addr, pn, sizeof(pn));
+ snprintf(errmsg, errlen, "%s for [%s:%d]", msg, pn, get_host_port(&listener->rx.addr));
+ }
+ return err;
+}
+
+/* Enable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid. Does nothing in early boot (needs fd_updt).
+ */
+static void quic_enable_listener(struct listener *l)
+{
+ /* FIXME: The following statements are incorrect. This
+ * is the responsibility of the QUIC xprt to stop accepting new
+ * connections.
+ */
+ if (fd_updt)
+ fd_want_recv(l->rx.fd);
+}
+
+/* Disable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid. Does nothing in early boot (needs fd_updt).
+ */
+static void quic_disable_listener(struct listener *l)
+{
+ /* FIXME: The following statements are incorrect. This
+ * is the responsibility of the QUIC xprt to start accepting new
+ * connections again.
+ */
+ if (fd_updt)
+ fd_stop_recv(l->rx.fd);
+}
+
+static int quic_alloc_dghdlrs(void)
+{
+ int i;
+
+ quic_dghdlrs = calloc(global.nbthread, sizeof(*quic_dghdlrs));
+ if (!quic_dghdlrs) {
+ ha_alert("Failed to allocate the quic datagram handlers.\n");
+ return 0;
+ }
+
+ for (i = 0; i < global.nbthread; i++) {
+ struct quic_dghdlr *dghdlr = &quic_dghdlrs[i];
+
+ dghdlr->task = tasklet_new();
+ if (!dghdlr->task) {
+ ha_alert("Failed to allocate the quic datagram handler on thread %d.\n", i);
+ return 0;
+ }
+
+ tasklet_set_tid(dghdlr->task, i);
+ dghdlr->task->context = dghdlr;
+ dghdlr->task->process = quic_lstnr_dghdlr;
+
+ dghdlr->odcids = EB_ROOT_UNIQUE;
+ dghdlr->cids = EB_ROOT_UNIQUE;
+
+ MT_LIST_INIT(&dghdlr->dgrams);
+ }
+
+ return 1;
+}
+REGISTER_POST_CHECK(quic_alloc_dghdlrs);
+
+static int quic_deallocate_dghdlrs(void)
+{
+ int i;
+
+ if (quic_dghdlrs) {
+ for (i = 0; i < global.nbthread; ++i)
+ tasklet_free(quic_dghdlrs[i].task);
+ free(quic_dghdlrs);
+ }
+
+ return 1;
+}
+REGISTER_POST_DEINIT(quic_deallocate_dghdlrs);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/proto_sockpair.c b/src/proto_sockpair.c
new file mode 100644
index 0000000..e140fda
--- /dev/null
+++ b/src/proto_sockpair.c
@@ -0,0 +1,564 @@
+/*
+ * Socket Pair protocol layer (sockpair)
+ *
+ * Copyright HAProxy Technologies - William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <pwd.h>
+#include <grp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+#include <time.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <haproxy/api.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proto_sockpair.h>
+#include <haproxy/sock.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+
+static int sockpair_bind_listener(struct listener *listener, char *errmsg, int errlen);
+static void sockpair_enable_listener(struct listener *listener);
+static void sockpair_disable_listener(struct listener *listener);
+static int sockpair_connect_server(struct connection *conn, int flags);
+static int sockpair_accepting_conn(const struct receiver *rx);
+struct connection *sockpair_accept_conn(struct listener *l, int *status);
+
+struct proto_fam proto_fam_sockpair = {
+ .name = "sockpair",
+ .sock_domain = AF_CUST_SOCKPAIR,
+ .sock_family = AF_UNIX,
+ .sock_addrlen = sizeof(struct sockaddr_un),
+ .l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path),
+ .addrcmp = NULL,
+ .bind = sockpair_bind_receiver,
+ .get_src = NULL,
+ .get_dst = NULL,
+};
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_sockpair = {
+ .name = "sockpair",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_STREAM,
+ .listen = sockpair_bind_listener,
+ .enable = sockpair_enable_listener,
+ .disable = sockpair_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .accept_conn = sockpair_accept_conn,
+ .ctrl_init = sock_conn_ctrl_init,
+ .ctrl_close = sock_conn_ctrl_close,
+ .connect = sockpair_connect_server,
+ .drain = sock_drain,
+ .check_events = sock_check_events,
+ .ignore_events = sock_ignore_events,
+
+ /* binding layer */
+ /* Note: suspend/resume not supported */
+
+ /* address family */
+ .fam = &proto_fam_sockpair,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_STREAM,
+ .sock_type = SOCK_STREAM,
+ .sock_prot = 0,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .rx_listening = sockpair_accepting_conn,
+ .default_iocb = sock_accept_iocb,
+ .receivers = LIST_HEAD_INIT(proto_sockpair.receivers),
+ .nb_receivers = 0,
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_sockpair);
+
+/* Enable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void sockpair_enable_listener(struct listener *l)
+{
+ fd_want_recv_safe(l->rx.fd);
+}
+
+/* Disable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void sockpair_disable_listener(struct listener *l)
+{
+ fd_stop_recv(l->rx.fd);
+}
+
+/* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback
+ * and context, respectively, with ->bind_thread as the thread mask. Returns an
+ * error code made of ERR_* bits on failure or ERR_NONE on success. On failure,
+ * an error message may be passed into <errmsg>. Note that the binding address
+ * is only an FD to receive the incoming FDs on. Thus by definition there is no
+ * real "bind" operation, this only completes the receiver. Such FDs are not
+ * inherited upon reload.
+ */
+int sockpair_bind_receiver(struct receiver *rx, char **errmsg)
+{
+ int err;
+
+ /* ensure we never return garbage */
+ if (errmsg)
+ *errmsg = 0;
+
+ err = ERR_NONE;
+
+ if (rx->flags & RX_F_BOUND)
+ return ERR_NONE;
+
+ if (rx->fd == -1) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "sockpair may be only used with inherited FDs");
+ goto bind_return;
+ }
+
+ if (rx->fd >= global.maxsock) {
+ err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
+ memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
+ goto bind_close_return;
+ }
+
+ if (fd_set_nonblock(rx->fd) == -1) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "cannot make socket non-blocking");
+ goto bind_close_return;
+ }
+
+ rx->flags |= RX_F_BOUND;
+
+ fd_insert(rx->fd, rx->owner, rx->iocb, thread_mask(rx->bind_thread) & all_threads_mask);
+ return err;
+
+ bind_return:
+ if (errmsg && *errmsg)
+ memprintf(errmsg, "%s for [fd %d]", *errmsg, rx->fd);
+
+ return err;
+
+ bind_close_return:
+ close(rx->fd);
+ goto bind_return;
+}
+
+/* This function changes the state from ASSIGNED to LISTEN. The socket is NOT
+ * enabled for polling. The return value is composed from ERR_NONE,
+ * ERR_RETRYABLE and ERR_FATAL. It may return a warning or an error message in
+ * <errmsg> if the message is at most <errlen> bytes long (including '\0').
+ * Note that <errmsg> may be NULL if <errlen> is also zero.
+ */
+static int sockpair_bind_listener(struct listener *listener, char *errmsg, int errlen)
+{
+ int err;
+ char *msg = NULL;
+
+ err = ERR_NONE;
+
+ /* ensure we never return garbage */
+ if (errlen)
+ *errmsg = 0;
+
+ if (listener->state != LI_ASSIGNED)
+ return ERR_NONE; /* already bound */
+
+ if (!(listener->rx.flags & RX_F_BOUND)) {
+ msg = "receiving socket not bound";
+ goto err_return;
+ }
+
+ listener_set_state(listener, LI_LISTEN);
+ return err;
+
+ err_return:
+ if (msg && errlen)
+ snprintf(errmsg, errlen, "%s [fd %d]", msg, listener->rx.fd);
+ return err;
+}
+
+/*
+ * Send FD over a unix socket
+ *
+ * <send_fd> is the FD to send
+ * <fd> is the fd of the unix socket to use for the transfer
+ *
+ * The iobuf variable could be use in the future to enhance the protocol.
+ */
+int send_fd_uxst(int fd, int send_fd)
+{
+ char iobuf[2];
+ struct iovec iov;
+ struct msghdr msghdr;
+
+ char cmsgbuf[CMSG_SPACE(sizeof(int))];
+ char buf[CMSG_SPACE(sizeof(int))];
+ struct cmsghdr *cmsg = (void *)buf;
+
+ int *fdptr;
+
+ iov.iov_base = iobuf;
+ iov.iov_len = sizeof(iobuf);
+
+ memset(&msghdr, 0, sizeof(msghdr));
+ msghdr.msg_iov = &iov;
+ msghdr.msg_iovlen = 1;
+
+ /* Now send the fds */
+ msghdr.msg_control = cmsgbuf;
+ msghdr.msg_controllen = CMSG_SPACE(sizeof(int));
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+
+ fdptr = (int *)CMSG_DATA(cmsg);
+ memcpy(fdptr, &send_fd, sizeof(send_fd));
+
+ if (sendmsg(fd, &msghdr, 0) != sizeof(iobuf)) {
+ ha_warning("Failed to transfer socket\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ *
+ * This function works like uxst_connect_server but instead of creating a
+ * socket and establishing a connection, it creates a pair of connected
+ * sockets, and send one of them through the destination FD. The destination FD
+ * is stored in conn->dst->sin_addr.s_addr during configuration parsing.
+ *
+ * conn->target may point either to a valid server or to a backend, depending
+ * on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are supported. The
+ * <data> parameter is a boolean indicating whether there are data waiting for
+ * being sent or not, in order to adjust data write polling and on some
+ * platforms. The <delack> argument is ignored.
+ *
+ * Note that a pending send_proxy message accounts for data.
+ *
+ * It can return one of :
+ * - SF_ERR_NONE if everything's OK
+ * - SF_ERR_SRVTO if there are no more servers
+ * - SF_ERR_SRVCL if the connection was refused by the server
+ * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
+ * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
+ * - SF_ERR_INTERNAL for any other purely internal errors
+ * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
+ *
+ * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
+ * it's invalid and the caller has nothing to do.
+ */
+static int sockpair_connect_server(struct connection *conn, int flags)
+{
+ int sv[2], fd, dst_fd = -1;
+
+ BUG_ON(!conn->dst);
+
+ /* the FD is stored in the sockaddr struct */
+ dst_fd = ((struct sockaddr_in *)conn->dst)->sin_addr.s_addr;
+
+ if (obj_type(conn->target) != OBJ_TYPE_PROXY &&
+ obj_type(conn->target) != OBJ_TYPE_SERVER) {
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
+ ha_alert("socketpair(): Cannot create socketpair. Giving up.\n");
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ }
+
+ fd = conn->handle.fd = sv[1];
+
+ if (fd >= global.maxsock) {
+ /* do not log anything there, it's a normal condition when this option
+ * is used to serialize connections to a server !
+ */
+ ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
+ close(sv[0]);
+ close(sv[1]);
+ conn->err_code = CO_ER_CONF_FDLIM;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_PRXCOND; /* it is a configuration limit */
+ }
+
+ if (fd_set_nonblock(fd) == -1) {
+ qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
+ close(sv[0]);
+ close(sv[1]);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (master == 1 && fd_set_cloexec(fd) == -1) {
+ ha_alert("Cannot set CLOEXEC on client socket.\n");
+ close(sv[0]);
+ close(sv[1]);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (global.tune.server_sndbuf)
+ setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
+
+ if (global.tune.server_rcvbuf)
+ setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
+
+ /* The new socket is sent on the other side, it should be retrieved and
+ * considered as an 'accept' socket on the server side */
+ if (send_fd_uxst(dst_fd, sv[0]) == -1) {
+ close(sv[0]);
+ close(sv[1]);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ close(sv[0]); /* we don't need this side anymore */
+
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ /* Prepare to send a few handshakes related to the on-wire protocol. */
+ if (conn->send_proxy_ofs)
+ conn->flags |= CO_FL_SEND_PROXY;
+
+ conn_ctrl_init(conn); /* registers the FD */
+ HA_ATOMIC_AND(&fdtab[fd].state, ~FD_LINGER_RISK); /* no need to disable lingering */
+
+ return SF_ERR_NONE; /* connection is OK */
+}
+
+
+/*
+ * Receives a file descriptor transferred from a unix socket.
+ *
+ * Return -1 or a socket fd;
+ *
+ * The iobuf variable could be used in the future to enhance the protocol.
+ */
+int recv_fd_uxst(int sock)
+{
+ struct msghdr msghdr;
+ struct iovec iov;
+ char iobuf[2];
+
+ char cmsgbuf[CMSG_SPACE(sizeof(int))];
+ char buf[CMSG_SPACE(sizeof(int))];
+ struct cmsghdr *cmsg = (void *)buf;
+
+
+ int recv_fd = -1;
+ int ret = -1;
+
+ memset(&msghdr, 0, sizeof(msghdr));
+
+ iov.iov_base = iobuf;
+ iov.iov_len = sizeof(iobuf);
+
+ msghdr.msg_iov = &iov;
+ msghdr.msg_iovlen = 1;
+
+ msghdr.msg_control = cmsgbuf;
+ msghdr.msg_controllen = CMSG_SPACE(sizeof(int));
+
+ iov.iov_len = sizeof(iobuf);
+ iov.iov_base = iobuf;
+
+ while (1) {
+ ret = recvmsg(sock, &msghdr, 0);
+ if (ret == -1 && errno == EINTR)
+ continue;
+ else
+ break;
+ }
+
+ if (ret == -1)
+ return ret;
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS) {
+ size_t totlen = cmsg->cmsg_len -
+ CMSG_LEN(0);
+ memcpy(&recv_fd, CMSG_DATA(cmsg), totlen);
+ }
+ return recv_fd;
+}
+
+/* Tests if the receiver supports accepting connections. Returns positive on
+ * success, 0 if not possible, negative if the socket is non-recoverable. In
+ * practice zero is never returned since we don't support suspending sockets.
+ * The real test consists in verifying we have a connected SOCK_STREAM of
+ * family AF_UNIX.
+ */
+static int sockpair_accepting_conn(const struct receiver *rx)
+{
+ struct sockaddr sa;
+ socklen_t len;
+ int val;
+
+ len = sizeof(val);
+ if (getsockopt(rx->fd, SOL_SOCKET, SO_TYPE, &val, &len) == -1)
+ return -1;
+
+ if (val != SOCK_STREAM)
+ return -1;
+
+ len = sizeof(sa);
+ if (getsockname(rx->fd, &sa, &len) != 0)
+ return -1;
+
+ if (sa.sa_family != AF_UNIX)
+ return -1;
+
+ len = sizeof(val);
+ if (getsockopt(rx->fd, SOL_SOCKET, SO_ACCEPTCONN, &val, &len) == -1)
+ return -1;
+
+ /* Note: cannot be a listening socket, must be established */
+ if (val)
+ return -1;
+
+ return 1;
+}
+
+/* Accept an incoming connection from listener <l>, and return it, as well as
+ * a CO_AC_* status code into <status> if not null. Null is returned on error.
+ * <l> must be a valid listener with a valid frontend.
+ */
+struct connection *sockpair_accept_conn(struct listener *l, int *status)
+{
+ struct proxy *p = l->bind_conf->frontend;
+ struct connection *conn = NULL;
+ int ret;
+ int cfd;
+
+ if ((cfd = recv_fd_uxst(l->rx.fd)) != -1)
+ fd_set_nonblock(cfd);
+
+ if (likely(cfd != -1)) {
+ /* Perfect, the connection was accepted */
+ conn = conn_new(&l->obj_type);
+ if (!conn)
+ goto fail_conn;
+
+ if (!sockaddr_alloc(&conn->src, NULL, 0))
+ goto fail_addr;
+
+ /* just like with UNIX sockets, only the family is filled */
+ conn->src->ss_family = AF_UNIX;
+ conn->handle.fd = cfd;
+ ret = CO_AC_DONE;
+ goto done;
+ }
+
+ switch (errno) {
+#if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
+ case EWOULDBLOCK:
+#endif
+ case EAGAIN:
+ ret = CO_AC_DONE; /* nothing more to accept */
+ if (fdtab[l->rx.fd].state & (FD_POLL_HUP|FD_POLL_ERR)) {
+ /* the listening socket might have been disabled in a shared
+ * process and we're a collateral victim. We'll just pause for
+ * a while in case it comes back. In the mean time, we need to
+ * clear this sticky flag.
+ */
+ _HA_ATOMIC_AND(&fdtab[l->rx.fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
+ ret = CO_AC_PAUSE;
+ }
+ fd_cant_recv(l->rx.fd);
+ break;
+
+ case EINVAL:
+ /* might be trying to accept on a shut fd (eg: soft stop) */
+ ret = CO_AC_PAUSE;
+ break;
+
+ case EINTR:
+ case ECONNABORTED:
+ ret = CO_AC_RETRY;
+ break;
+
+ case ENFILE:
+ if (p)
+ send_log(p, LOG_EMERG,
+ "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
+ p->id, global.maxsock);
+ ret = CO_AC_PAUSE;
+ break;
+
+ case EMFILE:
+ if (p)
+ send_log(p, LOG_EMERG,
+ "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
+ p->id, global.maxsock);
+ ret = CO_AC_PAUSE;
+ break;
+
+ case ENOBUFS:
+ case ENOMEM:
+ if (p)
+ send_log(p, LOG_EMERG,
+ "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
+ p->id, global.maxsock);
+ ret = CO_AC_PAUSE;
+ break;
+
+ default:
+ /* unexpected result, let's give up and let other tasks run */
+ ret = CO_AC_YIELD;
+ }
+ done:
+ if (status)
+ *status = ret;
+ return conn;
+
+ fail_addr:
+ conn_free(conn);
+ conn = NULL;
+ fail_conn:
+ ret = CO_AC_PAUSE;
+ goto done;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/proto_tcp.c b/src/proto_tcp.c
new file mode 100644
index 0000000..0c86d6e
--- /dev/null
+++ b/src/proto_tcp.c
@@ -0,0 +1,824 @@
+/*
+ * AF_INET/AF_INET6 SOCK_STREAM protocol layer (tcp)
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/namespace.h>
+#include <haproxy/port_range.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_inet.h>
+#include <haproxy/tools.h>
+
+
+static int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen);
+static int tcp_suspend_receiver(struct receiver *rx);
+static int tcp_resume_receiver(struct receiver *rx);
+static void tcp_enable_listener(struct listener *listener);
+static void tcp_disable_listener(struct listener *listener);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_tcpv4 = {
+ .name = "tcpv4",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_STREAM,
+ .listen = tcp_bind_listener,
+ .enable = tcp_enable_listener,
+ .disable = tcp_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .resume = default_resume_listener,
+ .accept_conn = sock_accept_conn,
+ .ctrl_init = sock_conn_ctrl_init,
+ .ctrl_close = sock_conn_ctrl_close,
+ .connect = tcp_connect_server,
+ .drain = sock_drain,
+ .check_events = sock_check_events,
+ .ignore_events = sock_ignore_events,
+
+ /* binding layer */
+ .rx_suspend = tcp_suspend_receiver,
+ .rx_resume = tcp_resume_receiver,
+
+ /* address family */
+ .fam = &proto_fam_inet4,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_STREAM,
+ .sock_type = SOCK_STREAM,
+ .sock_prot = IPPROTO_TCP,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .rx_listening = sock_accepting_conn,
+ .default_iocb = sock_accept_iocb,
+ .receivers = LIST_HEAD_INIT(proto_tcpv4.receivers),
+ .nb_receivers = 0,
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_tcpv4);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_tcpv6 = {
+ .name = "tcpv6",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_STREAM,
+ .listen = tcp_bind_listener,
+ .enable = tcp_enable_listener,
+ .disable = tcp_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .resume = default_resume_listener,
+ .accept_conn = sock_accept_conn,
+ .ctrl_init = sock_conn_ctrl_init,
+ .ctrl_close = sock_conn_ctrl_close,
+ .connect = tcp_connect_server,
+ .drain = sock_drain,
+ .check_events = sock_check_events,
+ .ignore_events = sock_ignore_events,
+
+ /* binding layer */
+ .rx_suspend = tcp_suspend_receiver,
+ .rx_resume = tcp_resume_receiver,
+
+ /* address family */
+ .fam = &proto_fam_inet6,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_STREAM,
+ .sock_type = SOCK_STREAM,
+ .sock_prot = IPPROTO_TCP,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .rx_listening = sock_accepting_conn,
+ .default_iocb = sock_accept_iocb,
+ .receivers = LIST_HEAD_INIT(proto_tcpv6.receivers),
+ .nb_receivers = 0,
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_tcpv6);
+
+/* Binds ipv4/ipv6 address <local> to socket <fd>, unless <flags> is set, in which
+ * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
+ * - 0 : ignore remote address (may even be a NULL pointer)
+ * - 1 : use provided address
+ * - 2 : use provided port
+ * - 3 : use both
+ *
+ * The function supports multiple foreign binding methods :
+ * - linux_tproxy: we directly bind to the foreign address
+ * The second one can be used as a fallback for the first one.
+ * This function returns 0 when everything's OK, 1 if it could not bind, to the
+ * local address, 2 if it could not bind to the foreign address.
+ */
+int tcp_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct sockaddr_storage *remote)
+{
+ struct sockaddr_storage bind_addr;
+ int foreign_ok = 0;
+ int ret;
+ static THREAD_LOCAL int ip_transp_working = 1;
+ static THREAD_LOCAL int ip6_transp_working = 1;
+
+ switch (local->ss_family) {
+ case AF_INET:
+ if (flags && ip_transp_working) {
+ /* This deserves some explanation. Some platforms will support
+ * multiple combinations of certain methods, so we try the
+ * supported ones until one succeeds.
+ */
+ if (sock_inet4_make_foreign(fd))
+ foreign_ok = 1;
+ else
+ ip_transp_working = 0;
+ }
+ break;
+ case AF_INET6:
+ if (flags && ip6_transp_working) {
+ if (sock_inet6_make_foreign(fd))
+ foreign_ok = 1;
+ else
+ ip6_transp_working = 0;
+ }
+ break;
+ }
+
+ if (flags) {
+ memset(&bind_addr, 0, sizeof(bind_addr));
+ bind_addr.ss_family = remote->ss_family;
+ switch (remote->ss_family) {
+ case AF_INET:
+ if (flags & 1)
+ ((struct sockaddr_in *)&bind_addr)->sin_addr = ((struct sockaddr_in *)remote)->sin_addr;
+ if (flags & 2)
+ ((struct sockaddr_in *)&bind_addr)->sin_port = ((struct sockaddr_in *)remote)->sin_port;
+ break;
+ case AF_INET6:
+ if (flags & 1)
+ ((struct sockaddr_in6 *)&bind_addr)->sin6_addr = ((struct sockaddr_in6 *)remote)->sin6_addr;
+ if (flags & 2)
+ ((struct sockaddr_in6 *)&bind_addr)->sin6_port = ((struct sockaddr_in6 *)remote)->sin6_port;
+ break;
+ default:
+ /* we don't want to try to bind to an unknown address family */
+ foreign_ok = 0;
+ }
+ }
+
+ setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
+ if (foreign_ok) {
+ if (is_inet_addr(&bind_addr)) {
+ ret = bind(fd, (struct sockaddr *)&bind_addr, get_addr_len(&bind_addr));
+ if (ret < 0)
+ return 2;
+ }
+ }
+ else {
+ if (is_inet_addr(local)) {
+ ret = bind(fd, (struct sockaddr *)local, get_addr_len(local));
+ if (ret < 0)
+ return 1;
+ }
+ }
+
+ if (!flags)
+ return 0;
+
+ if (!foreign_ok)
+ /* we could not bind to a foreign address */
+ return 2;
+
+ return 0;
+}
+
+/*
+ * This function initiates a TCP connection establishment to the target assigned
+ * to connection <conn> using (si->{target,dst}). A source address may be
+ * pointed to by conn->src in case of transparent proxying. Normal source
+ * bind addresses are still determined locally (due to the possible need of a
+ * source port). conn->target may point either to a valid server or to a backend,
+ * depending on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are
+ * supported. The <data> parameter is a boolean indicating whether there are data
+ * waiting for being sent or not, in order to adjust data write polling and on
+ * some platforms, the ability to avoid an empty initial ACK. The <flags> argument
+ * allows the caller to force using a delayed ACK when establishing the connection
+ * - 0 = no delayed ACK unless data are advertised and backend has tcp-smart-connect
+ * - CONNECT_DELACK_SMART_CONNECT = delayed ACK if backend has tcp-smart-connect, regardless of data
+ * - CONNECT_DELACK_ALWAYS = delayed ACK regardless of backend options
+ *
+ * Note that a pending send_proxy message accounts for data.
+ *
+ * It can return one of :
+ * - SF_ERR_NONE if everything's OK
+ * - SF_ERR_SRVTO if there are no more servers
+ * - SF_ERR_SRVCL if the connection was refused by the server
+ * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
+ * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
+ * - SF_ERR_INTERNAL for any other purely internal errors
+ * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
+ *
+ * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
+ * it's invalid and the caller has nothing to do.
+ */
+
+int tcp_connect_server(struct connection *conn, int flags)
+{
+ int fd;
+ struct server *srv;
+ struct proxy *be;
+ struct conn_src *src;
+ int use_fastopen = 0;
+ struct sockaddr_storage *addr;
+
+ BUG_ON(!conn->dst);
+
+ conn->flags |= CO_FL_WAIT_L4_CONN; /* connection in progress */
+
+ switch (obj_type(conn->target)) {
+ case OBJ_TYPE_PROXY:
+ be = __objt_proxy(conn->target);
+ srv = NULL;
+ break;
+ case OBJ_TYPE_SERVER:
+ srv = __objt_server(conn->target);
+ be = srv->proxy;
+ /* Make sure we check that we have data before activating
+ * TFO, or we could trigger a kernel issue whereby after
+ * a successful connect() == 0, any subsequent connect()
+ * will return EINPROGRESS instead of EISCONN.
+ */
+ use_fastopen = (srv->flags & SRV_F_FASTOPEN) &&
+ ((flags & (CONNECT_CAN_USE_TFO | CONNECT_HAS_DATA)) ==
+ (CONNECT_CAN_USE_TFO | CONNECT_HAS_DATA));
+ break;
+ default:
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ fd = conn->handle.fd = sock_create_server_socket(conn);
+
+ if (fd == -1) {
+ qfprintf(stderr, "Cannot get a server socket.\n");
+
+ if (errno == ENFILE) {
+ conn->err_code = CO_ER_SYS_FDLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == EMFILE) {
+ conn->err_code = CO_ER_PROC_FDLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == ENOBUFS || errno == ENOMEM) {
+ conn->err_code = CO_ER_SYS_MEMLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
+ conn->err_code = CO_ER_NOPROTO;
+ }
+ else
+ conn->err_code = CO_ER_SOCK_ERR;
+
+ /* this is a resource error */
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ }
+
+ if (fd >= global.maxsock) {
+ /* do not log anything there, it's a normal condition when this option
+ * is used to serialize connections to a server !
+ */
+ ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
+ close(fd);
+ conn->err_code = CO_ER_CONF_FDLIM;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_PRXCOND; /* it is a configuration limit */
+ }
+
+ if (fd_set_nonblock(fd) == -1 ||
+ (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) == -1)) {
+ qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (master == 1 && fd_set_cloexec(fd) == -1) {
+ ha_alert("Cannot set CLOEXEC on client socket.\n");
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (be->options & PR_O_TCP_SRV_KA) {
+ setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one));
+
+#ifdef TCP_KEEPCNT
+ if (be->srvtcpka_cnt)
+ setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &be->srvtcpka_cnt, sizeof(be->srvtcpka_cnt));
+#endif
+
+#ifdef TCP_KEEPIDLE
+ if (be->srvtcpka_idle)
+ setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &be->srvtcpka_idle, sizeof(be->srvtcpka_idle));
+#endif
+
+#ifdef TCP_KEEPINTVL
+ if (be->srvtcpka_intvl)
+ setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &be->srvtcpka_intvl, sizeof(be->srvtcpka_intvl));
+#endif
+ }
+
+ /* allow specific binding :
+ * - server-specific at first
+ * - proxy-specific next
+ */
+ if (srv && srv->conn_src.opts & CO_SRC_BIND)
+ src = &srv->conn_src;
+ else if (be->conn_src.opts & CO_SRC_BIND)
+ src = &be->conn_src;
+ else
+ src = NULL;
+
+ if (src) {
+ int ret, flags = 0;
+
+ if (conn->src && is_inet_addr(conn->src)) {
+ switch (src->opts & CO_SRC_TPROXY_MASK) {
+ case CO_SRC_TPROXY_CLI:
+ case CO_SRC_TPROXY_ADDR:
+ flags = 3;
+ break;
+ case CO_SRC_TPROXY_CIP:
+ case CO_SRC_TPROXY_DYN:
+ flags = 1;
+ break;
+ }
+ }
+
+#ifdef SO_BINDTODEVICE
+ /* Note: this might fail if not CAP_NET_RAW */
+ if (src->iface_name)
+ setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, src->iface_name, src->iface_len + 1);
+#endif
+
+ if (src->sport_range) {
+ int attempts = 10; /* should be more than enough to find a spare port */
+ struct sockaddr_storage sa;
+
+ ret = 1;
+ memcpy(&sa, &src->source_addr, sizeof(sa));
+
+ do {
+ /* note: in case of retry, we may have to release a previously
+ * allocated port, hence this loop's construct.
+ */
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+
+ if (!attempts)
+ break;
+ attempts--;
+
+ fdinfo[fd].local_port = port_range_alloc_port(src->sport_range);
+ if (!fdinfo[fd].local_port) {
+ conn->err_code = CO_ER_PORT_RANGE;
+ break;
+ }
+
+ fdinfo[fd].port_range = src->sport_range;
+ set_host_port(&sa, fdinfo[fd].local_port);
+
+ ret = tcp_bind_socket(fd, flags, &sa, conn->src);
+ if (ret != 0)
+ conn->err_code = CO_ER_CANT_BIND;
+ } while (ret != 0); /* binding NOK */
+ }
+ else {
+#ifdef IP_BIND_ADDRESS_NO_PORT
+ static THREAD_LOCAL int bind_address_no_port = 1;
+ setsockopt(fd, IPPROTO_IP, IP_BIND_ADDRESS_NO_PORT, (const void *) &bind_address_no_port, sizeof(int));
+#endif
+ ret = tcp_bind_socket(fd, flags, &src->source_addr, conn->src);
+ if (ret != 0)
+ conn->err_code = CO_ER_CANT_BIND;
+ }
+
+ if (unlikely(ret != 0)) {
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+
+ if (ret == 1) {
+ ha_alert("Cannot bind to source address before connect() for backend %s. Aborting.\n",
+ be->id);
+ send_log(be, LOG_EMERG,
+ "Cannot bind to source address before connect() for backend %s.\n",
+ be->id);
+ } else {
+ ha_alert("Cannot bind to tproxy source address before connect() for backend %s. Aborting.\n",
+ be->id);
+ send_log(be, LOG_EMERG,
+ "Cannot bind to tproxy source address before connect() for backend %s.\n",
+ be->id);
+ }
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ }
+ }
+
+#if defined(TCP_QUICKACK)
+ /* disabling tcp quick ack now allows the first request to leave the
+ * machine with the first ACK. We only do this if there are pending
+ * data in the buffer.
+ */
+ if (flags & (CONNECT_DELACK_ALWAYS) ||
+ ((flags & CONNECT_DELACK_SMART_CONNECT ||
+ (flags & CONNECT_HAS_DATA) || conn->send_proxy_ofs) &&
+ (be->options2 & PR_O2_SMARTCON)))
+ setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
+#endif
+
+#ifdef TCP_USER_TIMEOUT
+ /* there is not much more we can do here when it fails, it's still minor */
+ if (srv && srv->tcp_ut)
+ setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &srv->tcp_ut, sizeof(srv->tcp_ut));
+#endif
+
+ if (use_fastopen) {
+#if defined(TCP_FASTOPEN_CONNECT)
+ setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN_CONNECT, &one, sizeof(one));
+#endif
+ }
+ if (global.tune.server_sndbuf)
+ setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
+
+ if (global.tune.server_rcvbuf)
+ setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
+
+ addr = (conn->flags & CO_FL_SOCKS4) ? &srv->socks4_addr : conn->dst;
+ if (connect(fd, (const struct sockaddr *)addr, get_addr_len(addr)) == -1) {
+ if (errno == EINPROGRESS || errno == EALREADY) {
+ /* common case, let's wait for connect status */
+ conn->flags |= CO_FL_WAIT_L4_CONN;
+ }
+ else if (errno == EISCONN) {
+ /* should normally not happen but if so, indicates that it's OK */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+ else if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
+ char *msg;
+ if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EADDRNOTAVAIL) {
+ msg = "no free ports";
+ conn->err_code = CO_ER_FREE_PORTS;
+ }
+ else {
+ msg = "local address already in use";
+ conn->err_code = CO_ER_ADDR_INUSE;
+ }
+
+ qfprintf(stderr,"Connect() failed for backend %s: %s.\n", be->id, msg);
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+ send_log(be, LOG_ERR, "Connect() failed for backend %s: %s.\n", be->id, msg);
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ } else if (errno == ETIMEDOUT) {
+ //qfprintf(stderr,"Connect(): ETIMEDOUT");
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_SRVTO;
+ } else {
+ // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
+ //qfprintf(stderr,"Connect(): %d", errno);
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_SRVCL;
+ }
+ }
+ else {
+ /* connect() == 0, this is great! */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+
+ conn_ctrl_init(conn); /* registers the FD */
+ HA_ATOMIC_OR(&fdtab[fd].state, FD_LINGER_RISK); /* close hard if needed */
+
+ if (conn->flags & CO_FL_WAIT_L4_CONN) {
+ fd_want_send(fd);
+ fd_cant_send(fd);
+ fd_cant_recv(fd);
+ }
+
+ return SF_ERR_NONE; /* connection is OK */
+}
+
+/* This function tries to bind a TCPv4/v6 listener. It may return a warning or
+ * an error message in <errmsg> if the message is at most <errlen> bytes long
+ * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
+ * The return value is composed from ERR_ABORT, ERR_WARN,
+ * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
+ * was alright and that no message was returned. ERR_RETRYABLE means that an
+ * error occurred but that it may vanish after a retry (eg: port in use), and
+ * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
+ * the meaning of the error, but just indicate that a message is present which
+ * should be displayed with the respective level. Last, ERR_ABORT indicates
+ * that it's pointless to try to start other listeners. No error message is
+ * returned if errlen is NULL.
+ */
+int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen)
+{
+ int fd, err;
+ int ready;
+ struct buffer *msg = alloc_trash_chunk();
+
+ err = ERR_NONE;
+
+ if (!msg) {
+ if (errlen)
+ snprintf(errmsg, errlen, "out of memory");
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ /* ensure we never return garbage */
+ if (errlen)
+ *errmsg = 0;
+
+ if (listener->state != LI_ASSIGNED)
+ return ERR_NONE; /* already bound */
+
+ if (!(listener->rx.flags & RX_F_BOUND)) {
+ chunk_appendf(msg, "%sreceiving socket not bound", msg->data ? ", " : "");
+ goto tcp_return;
+ }
+
+ fd = listener->rx.fd;
+
+ if (listener->options & LI_O_NOLINGER)
+ setsockopt(fd, SOL_SOCKET, SO_LINGER, &nolinger, sizeof(struct linger));
+ else {
+ struct linger tmplinger;
+ socklen_t len = sizeof(tmplinger);
+ if (getsockopt(fd, SOL_SOCKET, SO_LINGER, &tmplinger, &len) == 0 &&
+ (tmplinger.l_onoff == 1 || tmplinger.l_linger == 0)) {
+ tmplinger.l_onoff = 0;
+ tmplinger.l_linger = 0;
+ setsockopt(fd, SOL_SOCKET, SO_LINGER, &tmplinger,
+ sizeof(tmplinger));
+ }
+ }
+
+#if defined(TCP_MAXSEG)
+ if (listener->maxseg > 0) {
+ if (setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG,
+ &listener->maxseg, sizeof(listener->maxseg)) == -1) {
+ chunk_appendf(msg, "%scannot set MSS to %d", msg->data ? ", " : "", listener->maxseg);
+ err |= ERR_WARN;
+ }
+ } else {
+ /* we may want to try to restore the default MSS if the socket was inherited */
+ int tmpmaxseg = -1;
+ int defaultmss;
+ socklen_t len = sizeof(tmpmaxseg);
+
+ if (listener->rx.addr.ss_family == AF_INET)
+ defaultmss = sock_inet_tcp_maxseg_default;
+ else
+ defaultmss = sock_inet6_tcp_maxseg_default;
+
+ getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &tmpmaxseg, &len);
+ if (defaultmss > 0 &&
+ tmpmaxseg != defaultmss &&
+ setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &defaultmss, sizeof(defaultmss)) == -1) {
+ chunk_appendf(msg, "%scannot set MSS to %d", msg->data ? ", " : "", defaultmss);
+ err |= ERR_WARN;
+ }
+ }
+#endif
+#if defined(TCP_USER_TIMEOUT)
+ if (listener->tcp_ut) {
+ if (setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT,
+ &listener->tcp_ut, sizeof(listener->tcp_ut)) == -1) {
+ chunk_appendf(msg, "%scannot set TCP User Timeout", msg->data ? ", " : "");
+ err |= ERR_WARN;
+ }
+ } else
+ setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &zero,
+ sizeof(zero));
+#endif
+#if defined(TCP_DEFER_ACCEPT)
+ if (listener->options & LI_O_DEF_ACCEPT) {
+ /* defer accept by up to one second */
+ int accept_delay = 1;
+ if (setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &accept_delay, sizeof(accept_delay)) == -1) {
+ chunk_appendf(msg, "%scannot enable DEFER_ACCEPT", msg->data ? ", " : "");
+ err |= ERR_WARN;
+ }
+ } else
+ setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &zero,
+ sizeof(zero));
+#endif
+#if defined(TCP_FASTOPEN)
+ if (listener->options & LI_O_TCP_FO) {
+ /* TFO needs a queue length, let's use the configured backlog */
+ int qlen = listener_backlog(listener);
+ if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) {
+ chunk_appendf(msg, "%scannot enable TCP_FASTOPEN", msg->data ? ", " : "");
+ err |= ERR_WARN;
+ }
+ } else {
+ socklen_t len;
+ int qlen;
+ len = sizeof(qlen);
+ /* Only disable fast open if it was enabled, we don't want
+ * the kernel to create a fast open queue if there's none.
+ */
+ if (getsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, &len) == 0 &&
+ qlen != 0) {
+ if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &zero,
+ sizeof(zero)) == -1) {
+ chunk_appendf(msg, "%scannot disable TCP_FASTOPEN", msg->data ? ", " : "");
+ err |= ERR_WARN;
+ }
+ }
+ }
+#endif
+
+ ready = sock_accepting_conn(&listener->rx) > 0;
+
+ if (!ready && /* only listen if not already done by external process */
+ listen(fd, listener_backlog(listener)) == -1) {
+ err |= ERR_RETRYABLE | ERR_ALERT;
+ chunk_appendf(msg, "%scannot listen to socket", msg->data ? ", " : "");
+ goto tcp_close_return;
+ }
+
+#if !defined(TCP_DEFER_ACCEPT) && defined(SO_ACCEPTFILTER)
+ /* the socket needs to listen first */
+ if (listener->options & LI_O_DEF_ACCEPT) {
+ struct accept_filter_arg accept;
+ memset(&accept, 0, sizeof(accept));
+ strcpy(accept.af_name, "dataready");
+ if (setsockopt(fd, SOL_SOCKET, SO_ACCEPTFILTER, &accept, sizeof(accept)) == -1) {
+ chunk_appendf(msg, "%scannot enable ACCEPT_FILTER", msg->data ? ", " : "");
+ err |= ERR_WARN;
+ }
+ }
+#endif
+#if defined(TCP_QUICKACK)
+ if (listener->options & LI_O_NOQUICKACK)
+ setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
+ else
+ setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &one, sizeof(one));
+#endif
+
+ /* the socket is ready */
+ listener_set_state(listener, LI_LISTEN);
+ goto tcp_return;
+
+ tcp_close_return:
+ free_trash_chunk(msg);
+ msg = NULL;
+ close(fd);
+ tcp_return:
+ if (msg && errlen && msg->data) {
+ char pn[INET6_ADDRSTRLEN];
+
+ addr_to_str(&listener->rx.addr, pn, sizeof(pn));
+ snprintf(errmsg, errlen, "%s for [%s:%d]", msg->area, pn, get_host_port(&listener->rx.addr));
+ }
+ free_trash_chunk(msg);
+ msg = NULL;
+ return err;
+}
+
+/* Enable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void tcp_enable_listener(struct listener *l)
+{
+ fd_want_recv_safe(l->rx.fd);
+}
+
+/* Disable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void tcp_disable_listener(struct listener *l)
+{
+ fd_stop_recv(l->rx.fd);
+}
+
+/* Suspend a receiver. Returns < 0 in case of failure, 0 if the receiver
+ * was totally stopped, or > 0 if correctly suspended. Note that inherited FDs
+ * are neither suspended nor resumed, we only enable/disable polling on them.
+ */
+static int tcp_suspend_receiver(struct receiver *rx)
+{
+ const struct sockaddr sa = { .sa_family = AF_UNSPEC };
+ int ret;
+
+ /* We never disconnect a shared FD otherwise we'd break it in the
+ * parent process and any possible subsequent worker inheriting it.
+ * Thus we just stop receiving from it.
+ */
+ if (rx->flags & RX_F_INHERITED)
+ goto done;
+
+ if (connect(rx->fd, &sa, sizeof(sa)) < 0)
+ goto check_already_done;
+ done:
+ fd_stop_recv(rx->fd);
+ return 1;
+
+ check_already_done:
+ /* in case one of the shutdown() above fails, it might be because we're
+ * dealing with a socket that is shared with other processes doing the
+ * same. Let's check if it's still accepting connections.
+ */
+ ret = sock_accepting_conn(rx);
+ if (ret <= 0) {
+ /* unrecoverable or paused by another process */
+ fd_stop_recv(rx->fd);
+ return ret == 0;
+ }
+
+ /* still listening, that's not good */
+ return -1;
+}
+
+/* Resume a receiver. Returns < 0 in case of failure, 0 if the receiver
+ * was totally stopped, or > 0 if correctly resumed. Note that inherited FDs
+ * are neither suspended nor resumed, we only enable/disable polling on them.
+ */
+static int tcp_resume_receiver(struct receiver *rx)
+{
+ struct listener *l = LIST_ELEM(rx, struct listener *, rx);
+
+ if (rx->fd < 0)
+ return 0;
+
+ if ((rx->flags & RX_F_INHERITED) || listen(rx->fd, listener_backlog(l)) == 0) {
+ fd_want_recv(l->rx.fd);
+ return 1;
+ }
+ return -1;
+}
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/proto_udp.c b/src/proto_udp.c
new file mode 100644
index 0000000..1fd92c7
--- /dev/null
+++ b/src/proto_udp.c
@@ -0,0 +1,234 @@
+/*
+ * UDP protocol layer on top of AF_INET/AF_INET6
+ *
+ * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * Partial merge by Emeric Brun <ebrun@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <netinet/udp.h>
+#include <netinet/in.h>
+
+#include <haproxy/fd.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/namespace.h>
+#include <haproxy/port_range.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proto_udp.h>
+#include <haproxy/proxy.h>
+#include <haproxy/server.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_inet.h>
+#include <haproxy/task.h>
+#include <haproxy/tools.h>
+
+static int udp_bind_listener(struct listener *listener, char *errmsg, int errlen);
+static void udp_enable_listener(struct listener *listener);
+static void udp_disable_listener(struct listener *listener);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_udp4 = {
+ .name = "udp4",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_DGRAM,
+ .listen = udp_bind_listener,
+ .enable = udp_enable_listener,
+ .disable = udp_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .resume = default_resume_listener,
+
+ /* binding layer */
+ .rx_suspend = udp_suspend_receiver,
+ .rx_resume = udp_resume_receiver,
+
+ /* address family */
+ .fam = &proto_fam_inet4,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_DGRAM,
+ .sock_type = SOCK_DGRAM,
+ .sock_prot = IPPROTO_UDP,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .receivers = LIST_HEAD_INIT(proto_udp4.receivers),
+ .nb_receivers = 0,
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_udp4);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_udp6 = {
+ .name = "udp6",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_DGRAM,
+ .listen = udp_bind_listener,
+ .enable = udp_enable_listener,
+ .disable = udp_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .resume = default_resume_listener,
+
+ /* binding layer */
+ .rx_suspend = udp_suspend_receiver,
+ .rx_resume = udp_resume_receiver,
+
+ /* address family */
+ .fam = &proto_fam_inet6,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_DGRAM,
+ .sock_type = SOCK_DGRAM,
+ .sock_prot = IPPROTO_UDP,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .receivers = LIST_HEAD_INIT(proto_udp6.receivers),
+ .nb_receivers = 0,
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_udp6);
+
+/* This function tries to bind a UDPv4/v6 listener. It may return a warning or
+ * an error message in <errmsg> if the message is at most <errlen> bytes long
+ * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
+ * The return value is composed from ERR_ABORT, ERR_WARN,
+ * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
+ * was alright and that no message was returned. ERR_RETRYABLE means that an
+ * error occurred but that it may vanish after a retry (eg: port in use), and
+ * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
+ * the meaning of the error, but just indicate that a message is present which
+ * should be displayed with the respective level. Last, ERR_ABORT indicates
+ * that it's pointless to try to start other listeners. No error message is
+ * returned if errlen is NULL.
+ */
+int udp_bind_listener(struct listener *listener, char *errmsg, int errlen)
+{
+ int err = ERR_NONE;
+ char *msg = NULL;
+
+ /* ensure we never return garbage */
+ if (errlen)
+ *errmsg = 0;
+
+ if (listener->state != LI_ASSIGNED)
+ return ERR_NONE; /* already bound */
+
+ if (!(listener->rx.flags & RX_F_BOUND)) {
+ msg = "receiving socket not bound";
+ goto udp_return;
+ }
+
+ listener_set_state(listener, LI_LISTEN);
+
+ udp_return:
+ if (msg && errlen) {
+ char pn[INET6_ADDRSTRLEN];
+
+ addr_to_str(&listener->rx.addr, pn, sizeof(pn));
+ snprintf(errmsg, errlen, "%s for [%s:%d]", msg, pn, get_host_port(&listener->rx.addr));
+ }
+ return err;
+}
+
+/* Enable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void udp_enable_listener(struct listener *l)
+{
+ fd_want_recv_safe(l->rx.fd);
+}
+
+/* Disable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void udp_disable_listener(struct listener *l)
+{
+ fd_stop_recv(l->rx.fd);
+}
+
+/* Suspend a receiver. Returns < 0 in case of failure, 0 if the receiver
+ * was totally stopped, or > 0 if correctly suspended.
+ * The principle is a bit ugly but works well, at least on Linux: in order to
+ * suspend the receiver, we want it to stop receiving traffic, which means that
+ * the socket must be unhashed from the kernel's socket table. The simple way
+ * to do this is to connect to any address that is reachable and will not be
+ * used by regular traffic, and a great one is reconnecting to self. Note that
+ * inherited FDs are neither suspended nor resumed, we only enable/disable
+ * polling on them.
+ */
+int udp_suspend_receiver(struct receiver *rx)
+{
+ struct sockaddr_storage ss;
+ socklen_t len = sizeof(ss);
+
+ if (rx->fd < 0)
+ return 0;
+
+ /* we never do that with a shared FD otherwise we'd break it in the
+ * parent process and any possible subsequent worker inheriting it.
+ */
+ if (rx->flags & RX_F_INHERITED)
+ goto done;
+
+ if (getsockname(rx->fd, (struct sockaddr *)&ss, &len) < 0)
+ return -1;
+
+ if (connect(rx->fd, (struct sockaddr *)&ss, len) < 0)
+ return -1;
+ done:
+ /* not necessary but may make debugging clearer */
+ fd_stop_recv(rx->fd);
+ return 1;
+}
+
+/* Resume a receiver. Returns < 0 in case of failure, 0 if the receiver
+ * was totally stopped, or > 0 if correctly suspended.
+ * The principle is to reverse the change above, we'll break the connection by
+ * connecting to AF_UNSPEC. The association breaks and the socket starts to
+ * receive from everywhere again. Note that inherited FDs are neither suspended
+ * nor resumed, we only enable/disable polling on them.
+ */
+int udp_resume_receiver(struct receiver *rx)
+{
+ const struct sockaddr sa = { .sa_family = AF_UNSPEC };
+
+ if (rx->fd < 0)
+ return 0;
+
+ if (!(rx->flags & RX_F_INHERITED) && connect(rx->fd, &sa, sizeof(sa)) < 0)
+ return -1;
+
+ fd_want_recv(rx->fd);
+ return 1;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/proto_uxdg.c b/src/proto_uxdg.c
new file mode 100644
index 0000000..68fe207
--- /dev/null
+++ b/src/proto_uxdg.c
@@ -0,0 +1,152 @@
+/*
+ * DGRAM protocol layer on top of AF_UNIX
+ *
+ * Copyright 2020 HAProxy Technologies, Emeric Brun <ebrun@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <haproxy/fd.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/namespace.h>
+#include <haproxy/protocol.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_unix.h>
+
+static int uxdg_bind_listener(struct listener *listener, char *errmsg, int errlen);
+static void uxdg_enable_listener(struct listener *listener);
+static void uxdg_disable_listener(struct listener *listener);
+static int uxdg_suspend_receiver(struct receiver *rx);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_uxdg = {
+ .name = "uxdg",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_DGRAM,
+ .listen = uxdg_bind_listener,
+ .enable = uxdg_enable_listener,
+ .disable = uxdg_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .resume = default_resume_listener,
+
+ /* binding layer */
+ .rx_suspend = uxdg_suspend_receiver,
+
+ /* address family */
+ .fam = &proto_fam_unix,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_DGRAM,
+ .sock_type = SOCK_DGRAM,
+ .sock_prot = 0,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .receivers = LIST_HEAD_INIT(proto_uxdg.receivers),
+ .nb_receivers = 0,
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_uxdg);
+
+/* This function tries to bind dgram unix socket listener. It may return a warning or
+ * an error message in <errmsg> if the message is at most <errlen> bytes long
+ * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
+ * The return value is composed from ERR_ABORT, ERR_WARN,
+ * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
+ * was alright and that no message was returned. ERR_RETRYABLE means that an
+ * error occurred but that it may vanish after a retry (eg: port in use), and
+ * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
+ * the meaning of the error, but just indicate that a message is present which
+ * should be displayed with the respective level. Last, ERR_ABORT indicates
+ * that it's pointless to try to start other listeners. No error message is
+ * returned if errlen is NULL.
+ */
+int uxdg_bind_listener(struct listener *listener, char *errmsg, int errlen)
+{
+ int err = ERR_NONE;
+ char *msg = NULL;
+
+ /* ensure we never return garbage */
+ if (errlen)
+ *errmsg = 0;
+
+ if (listener->state != LI_ASSIGNED)
+ return ERR_NONE; /* already bound */
+
+ if (!(listener->rx.flags & RX_F_BOUND)) {
+ msg = "receiving socket not bound";
+ err |= ERR_FATAL | ERR_ALERT;
+ goto uxdg_return;
+ }
+
+ listener_set_state(listener, LI_LISTEN);
+
+ uxdg_return:
+ if (msg && errlen) {
+ const char *path = ((struct sockaddr_un *)&listener->rx.addr)->sun_path;
+ snprintf(errmsg, errlen, "%s for [%s]", msg, path);
+ }
+ return err;
+}
+
+/* Enable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void uxdg_enable_listener(struct listener *l)
+{
+ fd_want_recv_safe(l->rx.fd);
+}
+
+/* Disable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void uxdg_disable_listener(struct listener *l)
+{
+ fd_stop_recv(l->rx.fd);
+}
+
+/* Suspend a receiver. Returns < 0 in case of failure, 0 if the receiver
+ * was totally stopped, or > 0 if correctly suspended. Nothing is done for
+ * plain unix sockets since currently it's the new process which handles
+ * the renaming. Abstract sockets are completely unbound and closed so
+ * there's no need to stop the poller.
+ */
+static int uxdg_suspend_receiver(struct receiver *rx)
+{
+ struct listener *l = LIST_ELEM(rx, struct listener *, rx);
+
+ if (((struct sockaddr_un *)&rx->addr)->sun_path[0])
+ return 1;
+
+ /* Listener's lock already held. Call lockless version of
+ * unbind_listener. */
+ do_unbind_listener(l);
+ return 0;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/proto_uxst.c b/src/proto_uxst.c
new file mode 100644
index 0000000..fd22e95
--- /dev/null
+++ b/src/proto_uxst.c
@@ -0,0 +1,361 @@
+/*
+ * UNIX SOCK_STREAM protocol layer (uxst)
+ *
+ * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+#include <time.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <haproxy/api.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proto_uxst.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_unix.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+
+static int uxst_bind_listener(struct listener *listener, char *errmsg, int errlen);
+static int uxst_connect_server(struct connection *conn, int flags);
+static void uxst_enable_listener(struct listener *listener);
+static void uxst_disable_listener(struct listener *listener);
+static int uxst_suspend_receiver(struct receiver *rx);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_uxst = {
+ .name = "unix_stream",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_STREAM,
+ .listen = uxst_bind_listener,
+ .enable = uxst_enable_listener,
+ .disable = uxst_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .accept_conn = sock_accept_conn,
+ .ctrl_init = sock_conn_ctrl_init,
+ .ctrl_close = sock_conn_ctrl_close,
+ .connect = uxst_connect_server,
+ .drain = sock_drain,
+ .check_events = sock_check_events,
+ .ignore_events = sock_ignore_events,
+
+ /* binding layer */
+ .rx_suspend = uxst_suspend_receiver,
+
+ /* address family */
+ .fam = &proto_fam_unix,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_STREAM,
+ .sock_type = SOCK_STREAM,
+ .sock_prot = 0,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .rx_listening = sock_accepting_conn,
+ .default_iocb = sock_accept_iocb,
+ .receivers = LIST_HEAD_INIT(proto_uxst.receivers),
+ .nb_receivers = 0,
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_uxst);
+
+/********************************
+ * 1) low-level socket functions
+ ********************************/
+
+
+/********************************
+ * 2) listener-oriented functions
+ ********************************/
+
+/* This function creates a UNIX socket associated to the listener. It changes
+ * the state from ASSIGNED to LISTEN. The socket is NOT enabled for polling.
+ * The return value is composed from ERR_NONE, ERR_RETRYABLE and ERR_FATAL. It
+ * may return a warning or an error message in <errmsg> if the message is at
+ * most <errlen> bytes long (including '\0'). Note that <errmsg> may be NULL if
+ * <errlen> is also zero.
+ */
+static int uxst_bind_listener(struct listener *listener, char *errmsg, int errlen)
+{
+ int fd, err;
+ int ready;
+ char *msg = NULL;
+
+ err = ERR_NONE;
+
+ /* ensure we never return garbage */
+ if (errlen)
+ *errmsg = 0;
+
+ if (listener->state != LI_ASSIGNED)
+ return ERR_NONE; /* already bound */
+
+ if (!(listener->rx.flags & RX_F_BOUND)) {
+ msg = "receiving socket not bound";
+ err |= ERR_FATAL | ERR_ALERT;
+ goto uxst_return;
+ }
+
+ fd = listener->rx.fd;
+ ready = sock_accepting_conn(&listener->rx) > 0;
+
+ if (!ready && /* only listen if not already done by external process */
+ listen(fd, listener_backlog(listener)) < 0) {
+ err |= ERR_FATAL | ERR_ALERT;
+ msg = "cannot listen to UNIX socket";
+ goto uxst_close_return;
+ }
+
+ /* the socket is now listening */
+ listener_set_state(listener, LI_LISTEN);
+ return err;
+
+ uxst_close_return:
+ close(fd);
+ uxst_return:
+ if (msg && errlen) {
+ const char *path = ((struct sockaddr_un *)&listener->rx.addr)->sun_path;
+ snprintf(errmsg, errlen, "%s for [%s]", msg, path);
+ }
+ return err;
+}
+
+/* Enable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void uxst_enable_listener(struct listener *l)
+{
+ fd_want_recv_safe(l->rx.fd);
+}
+
+/* Disable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void uxst_disable_listener(struct listener *l)
+{
+ fd_stop_recv(l->rx.fd);
+}
+
+/* Suspend a receiver. Returns < 0 in case of failure, 0 if the receiver
+ * was totally stopped, or > 0 if correctly suspended. Nothing is done for
+ * plain unix sockets since currently it's the new process which handles
+ * the renaming. Abstract sockets are completely unbound and closed so
+ * there's no need to stop the poller.
+ */
+static int uxst_suspend_receiver(struct receiver *rx)
+{
+ struct listener *l = LIST_ELEM(rx, struct listener *, rx);
+
+ if (((struct sockaddr_un *)&rx->addr)->sun_path[0])
+ return 1;
+
+ /* Listener's lock already held. Call lockless version of
+ * unbind_listener. */
+ do_unbind_listener(l);
+ return 0;
+}
+
+
+/*
+ * This function initiates a UNIX connection establishment to the target assigned
+ * to connection <conn> using (si->{target,dst}). The source address is ignored
+ * and will be selected by the system. conn->target may point either to a valid
+ * server or to a backend, depending on conn->target. Only OBJ_TYPE_PROXY and
+ * OBJ_TYPE_SERVER are supported. The <data> parameter is a boolean indicating
+ * whether there are data waiting for being sent or not, in order to adjust data
+ * write polling and on some platforms. The <delack> argument is ignored.
+ *
+ * Note that a pending send_proxy message accounts for data.
+ *
+ * It can return one of :
+ * - SF_ERR_NONE if everything's OK
+ * - SF_ERR_SRVTO if there are no more servers
+ * - SF_ERR_SRVCL if the connection was refused by the server
+ * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
+ * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
+ * - SF_ERR_INTERNAL for any other purely internal errors
+ * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
+ *
+ * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
+ * it's invalid and the caller has nothing to do.
+ */
+static int uxst_connect_server(struct connection *conn, int flags)
+{
+ int fd;
+ struct server *srv;
+ struct proxy *be;
+
+ BUG_ON(!conn->dst);
+
+ switch (obj_type(conn->target)) {
+ case OBJ_TYPE_PROXY:
+ be = __objt_proxy(conn->target);
+ srv = NULL;
+ break;
+ case OBJ_TYPE_SERVER:
+ srv = __objt_server(conn->target);
+ be = srv->proxy;
+ break;
+ default:
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if ((fd = conn->handle.fd = socket(PF_UNIX, SOCK_STREAM, 0)) == -1) {
+ qfprintf(stderr, "Cannot get a server socket.\n");
+
+ if (errno == ENFILE) {
+ conn->err_code = CO_ER_SYS_FDLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == EMFILE) {
+ conn->err_code = CO_ER_PROC_FDLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == ENOBUFS || errno == ENOMEM) {
+ conn->err_code = CO_ER_SYS_MEMLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
+ conn->err_code = CO_ER_NOPROTO;
+ }
+ else
+ conn->err_code = CO_ER_SOCK_ERR;
+
+ /* this is a resource error */
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ }
+
+ if (fd >= global.maxsock) {
+ /* do not log anything there, it's a normal condition when this option
+ * is used to serialize connections to a server !
+ */
+ ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
+ close(fd);
+ conn->err_code = CO_ER_CONF_FDLIM;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_PRXCOND; /* it is a configuration limit */
+ }
+
+ if (fd_set_nonblock(fd) == -1) {
+ qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (master == 1 && fd_set_cloexec(fd) == -1) {
+ ha_alert("Cannot set CLOEXEC on client socket.\n");
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (global.tune.server_sndbuf)
+ setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
+
+ if (global.tune.server_rcvbuf)
+ setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
+
+ if (connect(fd, (struct sockaddr *)conn->dst, get_addr_len(conn->dst)) == -1) {
+ if (errno == EINPROGRESS || errno == EALREADY) {
+ conn->flags |= CO_FL_WAIT_L4_CONN;
+ }
+ else if (errno == EISCONN) {
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+ else if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
+ char *msg;
+ if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EADDRNOTAVAIL) {
+ msg = "can't connect to destination unix socket, check backlog size on the server";
+ conn->err_code = CO_ER_FREE_PORTS;
+ }
+ else {
+ msg = "local address already in use";
+ conn->err_code = CO_ER_ADDR_INUSE;
+ }
+
+ qfprintf(stderr,"Connect() failed for backend %s: %s.\n", be->id, msg);
+ close(fd);
+ send_log(be, LOG_ERR, "Connect() failed for backend %s: %s.\n", be->id, msg);
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ }
+ else if (errno == ETIMEDOUT) {
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_SRVTO;
+ }
+ else { // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_SRVCL;
+ }
+ }
+ else {
+ /* connect() already succeeded, which is quite usual for unix
+ * sockets. Let's avoid a second connect() probe to complete it.
+ */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+
+ /* Prepare to send a few handshakes related to the on-wire protocol. */
+ if (conn->send_proxy_ofs)
+ conn->flags |= CO_FL_SEND_PROXY;
+
+ conn_ctrl_init(conn); /* registers the FD */
+ HA_ATOMIC_AND(&fdtab[fd].state, ~FD_LINGER_RISK); /* no need to disable lingering */
+
+ if (conn->flags & CO_FL_WAIT_L4_CONN) {
+ fd_want_send(fd);
+ fd_cant_send(fd);
+ fd_cant_recv(fd);
+ }
+
+ return SF_ERR_NONE; /* connection is OK */
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/protocol.c b/src/protocol.c
new file mode 100644
index 0000000..146733a
--- /dev/null
+++ b/src/protocol.c
@@ -0,0 +1,237 @@
+/*
+ * Protocol registration functions.
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <haproxy/api.h>
+#include <haproxy/errors.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/tools.h>
+
+
+/* List head of all registered protocols */
+static struct list protocols = LIST_HEAD_INIT(protocols);
+struct protocol *__protocol_by_family[AF_CUST_MAX][PROTO_NUM_TYPES][2] __read_mostly = { };
+
+/* This is the global spinlock we may need to register/unregister listeners or
+ * protocols. Its main purpose is in fact to serialize the rare stop/deinit()
+ * phases.
+ */
+__decl_spinlock(proto_lock);
+
+/* Registers the protocol <proto> */
+void protocol_register(struct protocol *proto)
+{
+ int sock_domain = proto->fam->sock_domain;
+
+ BUG_ON(sock_domain < 0 || sock_domain >= AF_CUST_MAX);
+ BUG_ON(proto->proto_type >= PROTO_NUM_TYPES);
+
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ LIST_APPEND(&protocols, &proto->list);
+ __protocol_by_family[sock_domain]
+ [proto->proto_type]
+ [proto->xprt_type == PROTO_TYPE_DGRAM] = proto;
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+}
+
+/* Unregisters the protocol <proto>. Note that all listeners must have
+ * previously been unbound.
+ */
+void protocol_unregister(struct protocol *proto)
+{
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ LIST_DELETE(&proto->list);
+ LIST_INIT(&proto->list);
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+}
+
+/* binds all listeners of all registered protocols. Returns a composition
+ * of ERR_NONE, ERR_RETRYABLE, ERR_FATAL.
+ */
+int protocol_bind_all(int verbose)
+{
+ struct protocol *proto;
+ struct listener *listener;
+ struct receiver *receiver;
+ char msg[1000];
+ char *errmsg;
+ int err, lerr;
+
+ err = 0;
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ list_for_each_entry(proto, &protocols, list) {
+ list_for_each_entry(receiver, &proto->receivers, proto_list) {
+ listener = LIST_ELEM(receiver, struct listener *, rx);
+
+ lerr = proto->fam->bind(receiver, &errmsg);
+ err |= lerr;
+
+ /* errors are reported if <verbose> is set or if they are fatal */
+ if (verbose || (lerr & (ERR_FATAL | ERR_ABORT))) {
+ struct proxy *px = listener->bind_conf->frontend;
+
+ if (lerr & ERR_ALERT)
+ ha_alert("Binding [%s:%d] for %s %s: %s\n",
+ listener->bind_conf->file, listener->bind_conf->line,
+ proxy_type_str(px), px->id, errmsg);
+ else if (lerr & ERR_WARN)
+ ha_warning("Binding [%s:%d] for %s %s: %s\n",
+ listener->bind_conf->file, listener->bind_conf->line,
+ proxy_type_str(px), px->id, errmsg);
+ }
+ if (lerr != ERR_NONE)
+ ha_free(&errmsg);
+
+ if (lerr & ERR_ABORT)
+ break;
+
+ if (lerr & ~ERR_WARN)
+ continue;
+
+ /* for now there's still always a listening function */
+ BUG_ON(!proto->listen);
+ lerr = proto->listen(listener, msg, sizeof(msg));
+ err |= lerr;
+
+ if (verbose || (lerr & (ERR_FATAL | ERR_ABORT))) {
+ struct proxy *px = listener->bind_conf->frontend;
+
+ if (lerr & ERR_ALERT)
+ ha_alert("Starting [%s:%d] for %s %s: %s\n",
+ listener->bind_conf->file, listener->bind_conf->line,
+ proxy_type_str(px), px->id, msg);
+ else if (lerr & ERR_WARN)
+ ha_warning("Starting [%s:%d] for %s %s: %s\n",
+ listener->bind_conf->file, listener->bind_conf->line,
+ proxy_type_str(px), px->id, msg);
+ }
+ if (lerr & ERR_ABORT)
+ break;
+ }
+ if (err & ERR_ABORT)
+ break;
+ }
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+ return err;
+}
+
+/* unbinds all listeners of all registered protocols. They are also closed.
+ * This must be performed before calling exit() in order to get a chance to
+ * remove file-system based sockets and pipes.
+ * Returns a composition of ERR_NONE, ERR_RETRYABLE, ERR_FATAL, ERR_ABORT.
+ */
+int protocol_unbind_all(void)
+{
+ struct protocol *proto;
+ struct listener *listener;
+ int err;
+
+ err = 0;
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ list_for_each_entry(proto, &protocols, list) {
+ list_for_each_entry(listener, &proto->receivers, rx.proto_list)
+ unbind_listener(listener);
+ }
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+ return err;
+}
+
+/* stops all listeners of all registered protocols. This will normally catch
+ * every single listener, all protocols included. This is to be used during
+ * soft_stop() only. It does not return any error.
+ */
+void protocol_stop_now(void)
+{
+ struct protocol *proto;
+ struct listener *listener, *lback;
+
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ list_for_each_entry(proto, &protocols, list) {
+ list_for_each_entry_safe(listener, lback, &proto->receivers, rx.proto_list)
+ stop_listener(listener, 0, 1);
+ }
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+}
+
+/* pauses all listeners of all registered protocols. This is typically
+ * used on SIG_TTOU to release all listening sockets for the time needed to
+ * try to bind a new process. The listeners enter LI_PAUSED. It returns
+ * ERR_NONE, with ERR_FATAL on failure.
+ */
+int protocol_pause_all(void)
+{
+ struct protocol *proto;
+ struct listener *listener;
+ int err;
+
+ err = 0;
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ list_for_each_entry(proto, &protocols, list) {
+ list_for_each_entry(listener, &proto->receivers, rx.proto_list)
+ if (!pause_listener(listener, 0))
+ err |= ERR_FATAL;
+ }
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+ return err;
+}
+
+/* resumes all listeners of all registered protocols. This is typically used on
+ * SIG_TTIN to re-enable listening sockets after a new process failed to bind.
+ * The listeners switch to LI_READY/LI_FULL. It returns ERR_NONE, with ERR_FATAL
+ * on failure.
+ */
+int protocol_resume_all(void)
+{
+ struct protocol *proto;
+ struct listener *listener;
+ int err;
+
+ err = 0;
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ list_for_each_entry(proto, &protocols, list) {
+ list_for_each_entry(listener, &proto->receivers, rx.proto_list)
+ if (!resume_listener(listener, 0))
+ err |= ERR_FATAL;
+ }
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+ return err;
+}
+
+/* enables all listeners of all registered protocols. This is intended to be
+ * used after a fork() to enable reading on all file descriptors. Returns
+ * composition of ERR_NONE.
+ */
+int protocol_enable_all(void)
+{
+ struct protocol *proto;
+ struct listener *listener;
+
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ list_for_each_entry(proto, &protocols, list) {
+ list_for_each_entry(listener, &proto->receivers, rx.proto_list)
+ enable_listener(listener);
+ }
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+ return ERR_NONE;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/proxy.c b/src/proxy.c
new file mode 100644
index 0000000..3937809
--- /dev/null
+++ b/src/proxy.c
@@ -0,0 +1,3373 @@
+/*
+ * Proxy variables and functions.
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include <import/eb32tree.h>
+#include <import/ebistree.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/filters.h>
+#include <haproxy/global.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/obj_type-t.h>
+#include <haproxy/peers.h>
+#include <haproxy/pool.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/quic_tp.h>
+#include <haproxy/server-t.h>
+#include <haproxy/signal.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+
+
+int listeners; /* # of proxy listeners, set by cfgparse */
+struct proxy *proxies_list = NULL; /* list of all existing proxies */
+struct eb_root used_proxy_id = EB_ROOT; /* list of proxy IDs in use */
+struct eb_root proxy_by_name = EB_ROOT; /* tree of proxies sorted by name */
+struct eb_root defproxy_by_name = EB_ROOT; /* tree of default proxies sorted by name (dups possible) */
+unsigned int error_snapshot_id = 0; /* global ID assigned to each error then incremented */
+
+/* CLI context used during "show servers {state|conn}" */
+struct show_srv_ctx {
+ struct proxy *px; /* current proxy to dump or NULL */
+ struct server *sv; /* current server to dump or NULL */
+ uint only_pxid; /* dump only this proxy ID when explicit */
+ int show_conn; /* non-zero = "conn" otherwise "state" */
+ enum {
+ SHOW_SRV_HEAD = 0,
+ SHOW_SRV_LIST,
+ } state;
+};
+
+/* proxy->options */
+const struct cfg_opt cfg_opts[] =
+{
+ { "abortonclose", PR_O_ABRT_CLOSE, PR_CAP_BE, 0, 0 },
+ { "allbackups", PR_O_USE_ALL_BK, PR_CAP_BE, 0, 0 },
+ { "checkcache", PR_O_CHK_CACHE, PR_CAP_BE, 0, PR_MODE_HTTP },
+ { "clitcpka", PR_O_TCP_CLI_KA, PR_CAP_FE, 0, 0 },
+ { "contstats", PR_O_CONTSTATS, PR_CAP_FE, 0, 0 },
+ { "dontlognull", PR_O_NULLNOLOG, PR_CAP_FE, 0, 0 },
+ { "http-buffer-request", PR_O_WREQ_BODY, PR_CAP_FE | PR_CAP_BE, 0, PR_MODE_HTTP },
+ { "http-ignore-probes", PR_O_IGNORE_PRB, PR_CAP_FE, 0, PR_MODE_HTTP },
+ { "idle-close-on-response", PR_O_IDLE_CLOSE_RESP, PR_CAP_FE, 0, PR_MODE_HTTP },
+ { "prefer-last-server", PR_O_PREF_LAST, PR_CAP_BE, 0, PR_MODE_HTTP },
+ { "logasap", PR_O_LOGASAP, PR_CAP_FE, 0, 0 },
+ { "nolinger", PR_O_TCP_NOLING, PR_CAP_FE | PR_CAP_BE, 0, 0 },
+ { "persist", PR_O_PERSIST, PR_CAP_BE, 0, 0 },
+ { "srvtcpka", PR_O_TCP_SRV_KA, PR_CAP_BE, 0, 0 },
+#ifdef USE_TPROXY
+ { "transparent", PR_O_TRANSP, PR_CAP_BE, 0, 0 },
+#else
+ { "transparent", 0, 0, 0, 0 },
+#endif
+
+ { NULL, 0, 0, 0, 0 }
+};
+
+/* proxy->options2 */
+const struct cfg_opt cfg_opts2[] =
+{
+#ifdef USE_LINUX_SPLICE
+ { "splice-request", PR_O2_SPLIC_REQ, PR_CAP_FE|PR_CAP_BE, 0, 0 },
+ { "splice-response", PR_O2_SPLIC_RTR, PR_CAP_FE|PR_CAP_BE, 0, 0 },
+ { "splice-auto", PR_O2_SPLIC_AUT, PR_CAP_FE|PR_CAP_BE, 0, 0 },
+#else
+ { "splice-request", 0, 0, 0, 0 },
+ { "splice-response", 0, 0, 0, 0 },
+ { "splice-auto", 0, 0, 0, 0 },
+#endif
+ { "accept-invalid-http-request", PR_O2_REQBUG_OK, PR_CAP_FE, 0, PR_MODE_HTTP },
+ { "accept-invalid-http-response", PR_O2_RSPBUG_OK, PR_CAP_BE, 0, PR_MODE_HTTP },
+ { "dontlog-normal", PR_O2_NOLOGNORM, PR_CAP_FE, 0, 0 },
+ { "log-separate-errors", PR_O2_LOGERRORS, PR_CAP_FE, 0, 0 },
+ { "log-health-checks", PR_O2_LOGHCHKS, PR_CAP_BE, 0, 0 },
+ { "socket-stats", PR_O2_SOCKSTAT, PR_CAP_FE, 0, 0 },
+ { "tcp-smart-accept", PR_O2_SMARTACC, PR_CAP_FE, 0, 0 },
+ { "tcp-smart-connect", PR_O2_SMARTCON, PR_CAP_BE, 0, 0 },
+ { "independent-streams", PR_O2_INDEPSTR, PR_CAP_FE|PR_CAP_BE, 0, 0 },
+ { "http-use-proxy-header", PR_O2_USE_PXHDR, PR_CAP_FE, 0, PR_MODE_HTTP },
+ { "http-pretend-keepalive", PR_O2_FAKE_KA, PR_CAP_BE, 0, PR_MODE_HTTP },
+ { "http-no-delay", PR_O2_NODELAY, PR_CAP_FE|PR_CAP_BE, 0, PR_MODE_HTTP },
+
+ {"h1-case-adjust-bogus-client", PR_O2_H1_ADJ_BUGCLI, PR_CAP_FE, 0, 0 },
+ {"h1-case-adjust-bogus-server", PR_O2_H1_ADJ_BUGSRV, PR_CAP_BE, 0, 0 },
+ {"disable-h2-upgrade", PR_O2_NO_H2_UPGRADE, PR_CAP_FE, 0, PR_MODE_HTTP },
+ { NULL, 0, 0, 0 }
+};
+
+static void free_stick_rules(struct list *rules)
+{
+ struct sticking_rule *rule, *ruleb;
+
+ list_for_each_entry_safe(rule, ruleb, rules, list) {
+ LIST_DELETE(&rule->list);
+ free_acl_cond(rule->cond);
+ release_sample_expr(rule->expr);
+ free(rule);
+ }
+}
+
+void free_proxy(struct proxy *p)
+{
+ struct server *s;
+ struct cap_hdr *h,*h_next;
+ struct listener *l,*l_next;
+ struct bind_conf *bind_conf, *bind_back;
+ struct acl_cond *cond, *condb;
+ struct acl *acl, *aclb;
+ struct server_rule *srule, *sruleb;
+ struct switching_rule *rule, *ruleb;
+ struct redirect_rule *rdr, *rdrb;
+ struct logsrv *log, *logb;
+ struct logformat_node *lf, *lfb;
+ struct proxy_deinit_fct *pxdf;
+ struct server_deinit_fct *srvdf;
+
+ if (!p)
+ return;
+
+ free(p->conf.file);
+ free(p->id);
+ free(p->cookie_name);
+ free(p->cookie_domain);
+ free(p->cookie_attrs);
+ free(p->lbprm.arg_str);
+ release_sample_expr(p->lbprm.expr);
+ free(p->server_state_file_name);
+ free(p->capture_name);
+ istfree(&p->monitor_uri);
+ free(p->rdp_cookie_name);
+ free(p->invalid_rep);
+ free(p->invalid_req);
+#if defined(CONFIG_HAP_TRANSPARENT)
+ free(p->conn_src.bind_hdr_name);
+#endif
+ if (p->conf.logformat_string != default_http_log_format &&
+ p->conf.logformat_string != default_tcp_log_format &&
+ p->conf.logformat_string != clf_http_log_format &&
+ p->conf.logformat_string != default_https_log_format)
+ free(p->conf.logformat_string);
+
+ free(p->conf.lfs_file);
+ free(p->conf.uniqueid_format_string);
+ istfree(&p->header_unique_id);
+ free(p->conf.uif_file);
+ if ((p->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_MAP)
+ free(p->lbprm.map.srv);
+
+ if (p->conf.logformat_sd_string != default_rfc5424_sd_log_format)
+ free(p->conf.logformat_sd_string);
+ free(p->conf.lfsd_file);
+
+ free(p->conf.error_logformat_string);
+ free(p->conf.elfs_file);
+
+ list_for_each_entry_safe(cond, condb, &p->mon_fail_cond, list) {
+ LIST_DELETE(&cond->list);
+ prune_acl_cond(cond);
+ free(cond);
+ }
+
+ EXTRA_COUNTERS_FREE(p->extra_counters_fe);
+ EXTRA_COUNTERS_FREE(p->extra_counters_be);
+
+ list_for_each_entry_safe(acl, aclb, &p->acl, list) {
+ LIST_DELETE(&acl->list);
+ prune_acl(acl);
+ free(acl);
+ }
+
+ list_for_each_entry_safe(srule, sruleb, &p->server_rules, list) {
+ LIST_DELETE(&srule->list);
+ prune_acl_cond(srule->cond);
+ list_for_each_entry_safe(lf, lfb, &srule->expr, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ free(srule->file);
+ free(srule->cond);
+ free(srule);
+ }
+
+ list_for_each_entry_safe(rule, ruleb, &p->switching_rules, list) {
+ LIST_DELETE(&rule->list);
+ if (rule->cond) {
+ prune_acl_cond(rule->cond);
+ free(rule->cond);
+ }
+ free(rule->file);
+ free(rule);
+ }
+
+ list_for_each_entry_safe(rdr, rdrb, &p->redirect_rules, list) {
+ LIST_DELETE(&rdr->list);
+ if (rdr->cond) {
+ prune_acl_cond(rdr->cond);
+ free(rdr->cond);
+ }
+ free(rdr->rdr_str);
+ list_for_each_entry_safe(lf, lfb, &rdr->rdr_fmt, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ free(rdr);
+ }
+
+ list_for_each_entry_safe(log, logb, &p->logsrvs, list) {
+ LIST_DEL_INIT(&log->list);
+ free_logsrv(log);
+ }
+
+ list_for_each_entry_safe(lf, lfb, &p->logformat, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+
+ list_for_each_entry_safe(lf, lfb, &p->logformat_sd, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+
+ list_for_each_entry_safe(lf, lfb, &p->format_unique_id, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+
+ list_for_each_entry_safe(lf, lfb, &p->logformat_error, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+
+ free_act_rules(&p->tcp_req.inspect_rules);
+ free_act_rules(&p->tcp_rep.inspect_rules);
+ free_act_rules(&p->tcp_req.l4_rules);
+ free_act_rules(&p->tcp_req.l5_rules);
+ free_act_rules(&p->http_req_rules);
+ free_act_rules(&p->http_res_rules);
+ free_act_rules(&p->http_after_res_rules);
+
+ free_stick_rules(&p->storersp_rules);
+ free_stick_rules(&p->sticking_rules);
+
+ h = p->req_cap;
+ while (h) {
+ if (p->defpx && h == p->defpx->req_cap)
+ break;
+ h_next = h->next;
+ free(h->name);
+ pool_destroy(h->pool);
+ free(h);
+ h = h_next;
+ }/* end while(h) */
+
+ h = p->rsp_cap;
+ while (h) {
+ if (p->defpx && h == p->defpx->rsp_cap)
+ break;
+ h_next = h->next;
+ free(h->name);
+ pool_destroy(h->pool);
+ free(h);
+ h = h_next;
+ }/* end while(h) */
+
+ s = p->srv;
+ while (s) {
+ list_for_each_entry(srvdf, &server_deinit_list, list)
+ srvdf->fct(s);
+ s = srv_drop(s);
+ }/* end while(s) */
+
+ list_for_each_entry_safe(l, l_next, &p->conf.listeners, by_fe) {
+ LIST_DELETE(&l->by_fe);
+ LIST_DELETE(&l->by_bind);
+ free(l->name);
+ free(l->per_thr);
+ free(l->counters);
+
+ EXTRA_COUNTERS_FREE(l->extra_counters);
+ free(l);
+ }
+
+ /* Release unused SSL configs. */
+ list_for_each_entry_safe(bind_conf, bind_back, &p->conf.bind, by_fe) {
+ if (bind_conf->xprt->destroy_bind_conf)
+ bind_conf->xprt->destroy_bind_conf(bind_conf);
+ free(bind_conf->file);
+ free(bind_conf->arg);
+ LIST_DELETE(&bind_conf->by_fe);
+ free(bind_conf);
+ }
+
+ flt_deinit(p);
+
+ list_for_each_entry(pxdf, &proxy_deinit_list, list)
+ pxdf->fct(p);
+
+ free(p->desc);
+ istfree(&p->fwdfor_hdr_name);
+ istfree(&p->orgto_hdr_name);
+
+ task_destroy(p->task);
+
+ pool_destroy(p->req_cap_pool);
+ pool_destroy(p->rsp_cap_pool);
+ if (p->table)
+ pool_destroy(p->table->pool);
+
+ HA_RWLOCK_DESTROY(&p->lbprm.lock);
+ HA_RWLOCK_DESTROY(&p->lock);
+
+ proxy_unref_defaults(p);
+ ha_free(&p);
+}
+
+/*
+ * This function returns a string containing a name describing capabilities to
+ * report comprehensible error messages. Specifically, it will return the words
+ * "frontend", "backend" when appropriate, "defaults" if it corresponds to a
+ * defaults section, or "proxy" for all other cases including the proxies
+ * declared in "listen" mode.
+ */
+const char *proxy_cap_str(int cap)
+{
+ if (cap & PR_CAP_DEF)
+ return "defaults";
+
+ if ((cap & PR_CAP_LISTEN) != PR_CAP_LISTEN) {
+ if (cap & PR_CAP_FE)
+ return "frontend";
+ else if (cap & PR_CAP_BE)
+ return "backend";
+ }
+ return "proxy";
+}
+
+/*
+ * This function returns a string containing the mode of the proxy in a format
+ * suitable for error messages.
+ */
+const char *proxy_mode_str(int mode) {
+
+ if (mode == PR_MODE_TCP)
+ return "tcp";
+ else if (mode == PR_MODE_HTTP)
+ return "http";
+ else if (mode == PR_MODE_CLI)
+ return "cli";
+ else if (mode == PR_MODE_SYSLOG)
+ return "syslog";
+ else if (mode == PR_MODE_PEERS)
+ return "peers";
+ else
+ return "unknown";
+}
+
+/* try to find among known options the one that looks closest to <word> by
+ * counting transitions between letters, digits and other characters. Will
+ * return the best matching word if found, otherwise NULL. An optional array
+ * of extra words to compare may be passed in <extra>, but it must then be
+ * terminated by a NULL entry. If unused it may be NULL.
+ */
+const char *proxy_find_best_option(const char *word, const char **extra)
+{
+ uint8_t word_sig[1024];
+ uint8_t list_sig[1024];
+ const char *best_ptr = NULL;
+ int dist, best_dist = INT_MAX;
+ int index;
+
+ make_word_fingerprint(word_sig, word);
+
+ for (index = 0; cfg_opts[index].name; index++) {
+ make_word_fingerprint(list_sig, cfg_opts[index].name);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = cfg_opts[index].name;
+ }
+ }
+
+ for (index = 0; cfg_opts2[index].name; index++) {
+ make_word_fingerprint(list_sig, cfg_opts2[index].name);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = cfg_opts2[index].name;
+ }
+ }
+
+ while (extra && *extra) {
+ make_word_fingerprint(list_sig, *extra);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = *extra;
+ }
+ extra++;
+ }
+
+ if (best_dist > 2 * strlen(word) || (best_ptr && best_dist > 2 * strlen(best_ptr)))
+ best_ptr = NULL;
+ return best_ptr;
+}
+
+/*
+ * This function scans the list of backends and servers to retrieve the first
+ * backend and the first server with the given names, and sets them in both
+ * parameters. It returns zero if either is not found, or non-zero and sets
+ * the ones it did not found to NULL. If a NULL pointer is passed for the
+ * backend, only the pointer to the server will be updated.
+ */
+int get_backend_server(const char *bk_name, const char *sv_name,
+ struct proxy **bk, struct server **sv)
+{
+ struct proxy *p;
+ struct server *s;
+ int sid;
+
+ *sv = NULL;
+
+ sid = -1;
+ if (*sv_name == '#')
+ sid = atoi(sv_name + 1);
+
+ p = proxy_be_by_name(bk_name);
+ if (bk)
+ *bk = p;
+ if (!p)
+ return 0;
+
+ for (s = p->srv; s; s = s->next)
+ if ((sid >= 0 && s->puid == sid) ||
+ (sid < 0 && strcmp(s->id, sv_name) == 0))
+ break;
+ *sv = s;
+ if (!s)
+ return 0;
+ return 1;
+}
+
+/* This function parses a "timeout" statement in a proxy section. It returns
+ * -1 if there is any error, 1 for a warning, otherwise zero. If it does not
+ * return zero, it will write an error or warning message into a preallocated
+ * buffer returned at <err>. The trailing is not be written. The function must
+ * be called with <args> pointing to the first command line word, with <proxy>
+ * pointing to the proxy being parsed, and <defpx> to the default proxy or NULL.
+ * As a special case for compatibility with older configs, it also accepts
+ * "{cli|srv|con}timeout" in args[0].
+ */
+static int proxy_parse_timeout(char **args, int section, struct proxy *proxy,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ unsigned timeout;
+ int retval, cap;
+ const char *res, *name;
+ int *tv = NULL;
+ const int *td = NULL;
+
+ retval = 0;
+
+ /* simply skip "timeout" but remain compatible with old form */
+ if (strcmp(args[0], "timeout") == 0)
+ args++;
+
+ name = args[0];
+ if (strcmp(args[0], "client") == 0) {
+ name = "client";
+ tv = &proxy->timeout.client;
+ td = &defpx->timeout.client;
+ cap = PR_CAP_FE;
+ } else if (strcmp(args[0], "tarpit") == 0) {
+ tv = &proxy->timeout.tarpit;
+ td = &defpx->timeout.tarpit;
+ cap = PR_CAP_FE | PR_CAP_BE;
+ } else if (strcmp(args[0], "http-keep-alive") == 0) {
+ tv = &proxy->timeout.httpka;
+ td = &defpx->timeout.httpka;
+ cap = PR_CAP_FE | PR_CAP_BE;
+ } else if (strcmp(args[0], "http-request") == 0) {
+ tv = &proxy->timeout.httpreq;
+ td = &defpx->timeout.httpreq;
+ cap = PR_CAP_FE | PR_CAP_BE;
+ } else if (strcmp(args[0], "server") == 0) {
+ name = "server";
+ tv = &proxy->timeout.server;
+ td = &defpx->timeout.server;
+ cap = PR_CAP_BE;
+ } else if (strcmp(args[0], "connect") == 0) {
+ name = "connect";
+ tv = &proxy->timeout.connect;
+ td = &defpx->timeout.connect;
+ cap = PR_CAP_BE;
+ } else if (strcmp(args[0], "check") == 0) {
+ tv = &proxy->timeout.check;
+ td = &defpx->timeout.check;
+ cap = PR_CAP_BE;
+ } else if (strcmp(args[0], "queue") == 0) {
+ tv = &proxy->timeout.queue;
+ td = &defpx->timeout.queue;
+ cap = PR_CAP_BE;
+ } else if (strcmp(args[0], "tunnel") == 0) {
+ tv = &proxy->timeout.tunnel;
+ td = &defpx->timeout.tunnel;
+ cap = PR_CAP_BE;
+ } else if (strcmp(args[0], "client-fin") == 0) {
+ tv = &proxy->timeout.clientfin;
+ td = &defpx->timeout.clientfin;
+ cap = PR_CAP_FE;
+ } else if (strcmp(args[0], "server-fin") == 0) {
+ tv = &proxy->timeout.serverfin;
+ td = &defpx->timeout.serverfin;
+ cap = PR_CAP_BE;
+ } else if (strcmp(args[0], "clitimeout") == 0) {
+ memprintf(err, "the '%s' directive is not supported anymore since HAProxy 2.1. Use 'timeout client'.", args[0]);
+ return -1;
+ } else if (strcmp(args[0], "srvtimeout") == 0) {
+ memprintf(err, "the '%s' directive is not supported anymore since HAProxy 2.1. Use 'timeout server'.", args[0]);
+ return -1;
+ } else if (strcmp(args[0], "contimeout") == 0) {
+ memprintf(err, "the '%s' directive is not supported anymore since HAProxy 2.1. Use 'timeout connect'.", args[0]);
+ return -1;
+ } else {
+ memprintf(err,
+ "'timeout' supports 'client', 'server', 'connect', 'check', "
+ "'queue', 'http-keep-alive', 'http-request', 'tunnel', 'tarpit', "
+ "'client-fin' and 'server-fin' (got '%s')",
+ args[0]);
+ return -1;
+ }
+
+ if (*args[1] == 0) {
+ memprintf(err, "'timeout %s' expects an integer value (in milliseconds)", name);
+ return -1;
+ }
+
+ res = parse_time_err(args[1], &timeout, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to 'timeout %s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[1], name);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to 'timeout %s' (minimum non-null value is 1 ms)",
+ args[1], name);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in 'timeout %s'", *res, name);
+ return -1;
+ }
+
+ if (!(proxy->cap & cap)) {
+ memprintf(err, "'timeout %s' will be ignored because %s '%s' has no %s capability",
+ name, proxy_type_str(proxy), proxy->id,
+ (cap & PR_CAP_BE) ? "backend" : "frontend");
+ retval = 1;
+ }
+ else if (defpx && *tv != *td) {
+ memprintf(err, "overwriting 'timeout %s' which was already specified", name);
+ retval = 1;
+ }
+
+ if (*args[2] != 0) {
+ memprintf(err, "'timeout %s' : unexpected extra argument '%s' after value '%s'.", name, args[2], args[1]);
+ retval = -1;
+ }
+
+ *tv = MS_TO_TICKS(timeout);
+ return retval;
+}
+
+/* This function parses a "rate-limit" statement in a proxy section. It returns
+ * -1 if there is any error, 1 for a warning, otherwise zero. If it does not
+ * return zero, it will write an error or warning message into a preallocated
+ * buffer returned at <err>. The function must be called with <args> pointing
+ * to the first command line word, with <proxy> pointing to the proxy being
+ * parsed, and <defpx> to the default proxy or NULL.
+ */
+static int proxy_parse_rate_limit(char **args, int section, struct proxy *proxy,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int retval;
+ char *res;
+ unsigned int *tv = NULL;
+ const unsigned int *td = NULL;
+ unsigned int val;
+
+ retval = 0;
+
+ if (strcmp(args[1], "sessions") == 0) {
+ tv = &proxy->fe_sps_lim;
+ td = &defpx->fe_sps_lim;
+ }
+ else {
+ memprintf(err, "'%s' only supports 'sessions' (got '%s')", args[0], args[1]);
+ return -1;
+ }
+
+ if (*args[2] == 0) {
+ memprintf(err, "'%s %s' expects expects an integer value (in sessions/second)", args[0], args[1]);
+ return -1;
+ }
+
+ val = strtoul(args[2], &res, 0);
+ if (*res) {
+ memprintf(err, "'%s %s' : unexpected character '%c' in integer value '%s'", args[0], args[1], *res, args[2]);
+ return -1;
+ }
+
+ if (!(proxy->cap & PR_CAP_FE)) {
+ memprintf(err, "%s %s will be ignored because %s '%s' has no frontend capability",
+ args[0], args[1], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+ else if (defpx && *tv != *td) {
+ memprintf(err, "overwriting %s %s which was already specified", args[0], args[1]);
+ retval = 1;
+ }
+
+ *tv = val;
+ return retval;
+}
+
+/* This function parses a "max-keep-alive-queue" statement in a proxy section.
+ * It returns -1 if there is any error, 1 for a warning, otherwise zero. If it
+ * does not return zero, it will write an error or warning message into a
+ * preallocated buffer returned at <err>. The function must be called with
+ * <args> pointing to the first command line word, with <proxy> pointing to
+ * the proxy being parsed, and <defpx> to the default proxy or NULL.
+ */
+static int proxy_parse_max_ka_queue(char **args, int section, struct proxy *proxy,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int retval;
+ char *res;
+ unsigned int val;
+
+ retval = 0;
+
+ if (*args[1] == 0) {
+ memprintf(err, "'%s' expects expects an integer value (or -1 to disable)", args[0]);
+ return -1;
+ }
+
+ val = strtol(args[1], &res, 0);
+ if (*res) {
+ memprintf(err, "'%s' : unexpected character '%c' in integer value '%s'", args[0], *res, args[1]);
+ return -1;
+ }
+
+ if (!(proxy->cap & PR_CAP_BE)) {
+ memprintf(err, "%s will be ignored because %s '%s' has no backend capability",
+ args[0], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+
+ /* we store <val+1> so that a user-facing value of -1 is stored as zero (default) */
+ proxy->max_ka_queue = val + 1;
+ return retval;
+}
+
+/* This function parses a "declare" statement in a proxy section. It returns -1
+ * if there is any error, 1 for warning, otherwise 0. If it does not return zero,
+ * it will write an error or warning message into a preallocated buffer returned
+ * at <err>. The function must be called with <args> pointing to the first command
+ * line word, with <proxy> pointing to the proxy being parsed, and <defpx> to the
+ * default proxy or NULL.
+ */
+static int proxy_parse_declare(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ /* Capture keyword wannot be declared in a default proxy. */
+ if (curpx == defpx) {
+ memprintf(err, "'%s' not available in default section", args[0]);
+ return -1;
+ }
+
+ /* Capture keyword is only available in frontend. */
+ if (!(curpx->cap & PR_CAP_FE)) {
+ memprintf(err, "'%s' only available in frontend or listen section", args[0]);
+ return -1;
+ }
+
+ /* Check mandatory second keyword. */
+ if (!args[1] || !*args[1]) {
+ memprintf(err, "'%s' needs a second keyword that specify the type of declaration ('capture')", args[0]);
+ return -1;
+ }
+
+ /* Actually, declare is only available for declaring capture
+ * slot, but in the future it can declare maps or variables.
+ * So, this section permits to check and switch according with
+ * the second keyword.
+ */
+ if (strcmp(args[1], "capture") == 0) {
+ char *error = NULL;
+ long len;
+ struct cap_hdr *hdr;
+
+ /* Check the next keyword. */
+ if (!args[2] || !*args[2] ||
+ (strcmp(args[2], "response") != 0 &&
+ strcmp(args[2], "request") != 0)) {
+ memprintf(err, "'%s %s' requires a direction ('request' or 'response')", args[0], args[1]);
+ return -1;
+ }
+
+ /* Check the 'len' keyword. */
+ if (!args[3] || !*args[3] || strcmp(args[3], "len") != 0) {
+ memprintf(err, "'%s %s' requires a capture length ('len')", args[0], args[1]);
+ return -1;
+ }
+
+ /* Check the length value. */
+ if (!args[4] || !*args[4]) {
+ memprintf(err, "'%s %s': 'len' requires a numeric value that represents the "
+ "capture length",
+ args[0], args[1]);
+ return -1;
+ }
+
+ /* convert the length value. */
+ len = strtol(args[4], &error, 10);
+ if (*error != '\0') {
+ memprintf(err, "'%s %s': cannot parse the length '%s'.",
+ args[0], args[1], args[3]);
+ return -1;
+ }
+
+ /* check length. */
+ if (len <= 0) {
+ memprintf(err, "length must be > 0");
+ return -1;
+ }
+
+ /* register the capture. */
+ hdr = calloc(1, sizeof(*hdr));
+ if (!hdr) {
+ memprintf(err, "proxy '%s': out of memory while registering a capture", curpx->id);
+ return -1;
+ }
+ hdr->name = NULL; /* not a header capture */
+ hdr->namelen = 0;
+ hdr->len = len;
+ hdr->pool = create_pool("caphdr", hdr->len + 1, MEM_F_SHARED);
+
+ if (strcmp(args[2], "request") == 0) {
+ hdr->next = curpx->req_cap;
+ hdr->index = curpx->nb_req_cap++;
+ curpx->req_cap = hdr;
+ }
+ if (strcmp(args[2], "response") == 0) {
+ hdr->next = curpx->rsp_cap;
+ hdr->index = curpx->nb_rsp_cap++;
+ curpx->rsp_cap = hdr;
+ }
+ return 0;
+ }
+ else {
+ memprintf(err, "unknown declaration type '%s' (supports 'capture')", args[1]);
+ return -1;
+ }
+}
+
+/* This function parses a "retry-on" statement */
+static int
+proxy_parse_retry_on(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int i;
+
+ if (!(*args[1])) {
+ memprintf(err, "'%s' needs at least one keyword to specify when to retry", args[0]);
+ return -1;
+ }
+ if (!(curpx->cap & PR_CAP_BE)) {
+ memprintf(err, "'%s' only available in backend or listen section", args[0]);
+ return -1;
+ }
+ curpx->retry_type = 0;
+ for (i = 1; *(args[i]); i++) {
+ if (strcmp(args[i], "conn-failure") == 0)
+ curpx->retry_type |= PR_RE_CONN_FAILED;
+ else if (strcmp(args[i], "empty-response") == 0)
+ curpx->retry_type |= PR_RE_DISCONNECTED;
+ else if (strcmp(args[i], "response-timeout") == 0)
+ curpx->retry_type |= PR_RE_TIMEOUT;
+ else if (strcmp(args[i], "401") == 0)
+ curpx->retry_type |= PR_RE_401;
+ else if (strcmp(args[i], "403") == 0)
+ curpx->retry_type |= PR_RE_403;
+ else if (strcmp(args[i], "404") == 0)
+ curpx->retry_type |= PR_RE_404;
+ else if (strcmp(args[i], "408") == 0)
+ curpx->retry_type |= PR_RE_408;
+ else if (strcmp(args[i], "425") == 0)
+ curpx->retry_type |= PR_RE_425;
+ else if (strcmp(args[i], "500") == 0)
+ curpx->retry_type |= PR_RE_500;
+ else if (strcmp(args[i], "501") == 0)
+ curpx->retry_type |= PR_RE_501;
+ else if (strcmp(args[i], "502") == 0)
+ curpx->retry_type |= PR_RE_502;
+ else if (strcmp(args[i], "503") == 0)
+ curpx->retry_type |= PR_RE_503;
+ else if (strcmp(args[i], "504") == 0)
+ curpx->retry_type |= PR_RE_504;
+ else if (strcmp(args[i], "0rtt-rejected") == 0)
+ curpx->retry_type |= PR_RE_EARLY_ERROR;
+ else if (strcmp(args[i], "junk-response") == 0)
+ curpx->retry_type |= PR_RE_JUNK_REQUEST;
+ else if (!(strcmp(args[i], "all-retryable-errors")))
+ curpx->retry_type |= PR_RE_CONN_FAILED | PR_RE_DISCONNECTED |
+ PR_RE_TIMEOUT | PR_RE_500 | PR_RE_502 |
+ PR_RE_503 | PR_RE_504 | PR_RE_EARLY_ERROR |
+ PR_RE_JUNK_REQUEST;
+ else if (strcmp(args[i], "none") == 0) {
+ if (i != 1 || *args[i + 1]) {
+ memprintf(err, "'%s' 'none' keyworld only usable alone", args[0]);
+ return -1;
+ }
+ } else {
+ memprintf(err, "'%s': unknown keyword '%s'", args[0], args[i]);
+ return -1;
+ }
+
+ }
+
+
+ return 0;
+}
+
+#ifdef TCP_KEEPCNT
+/* This function parses "{cli|srv}tcpka-cnt" statements */
+static int proxy_parse_tcpka_cnt(char **args, int section, struct proxy *proxy,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int retval;
+ char *res;
+ unsigned int tcpka_cnt;
+
+ retval = 0;
+
+ if (*args[1] == 0) {
+ memprintf(err, "'%s' expects an integer value", args[0]);
+ return -1;
+ }
+
+ tcpka_cnt = strtol(args[1], &res, 0);
+ if (*res) {
+ memprintf(err, "'%s' : unexpected character '%c' in integer value '%s'", args[0], *res, args[1]);
+ return -1;
+ }
+
+ if (strcmp(args[0], "clitcpka-cnt") == 0) {
+ if (!(proxy->cap & PR_CAP_FE)) {
+ memprintf(err, "%s will be ignored because %s '%s' has no frontend capability",
+ args[0], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+ proxy->clitcpka_cnt = tcpka_cnt;
+ } else if (strcmp(args[0], "srvtcpka-cnt") == 0) {
+ if (!(proxy->cap & PR_CAP_BE)) {
+ memprintf(err, "%s will be ignored because %s '%s' has no backend capability",
+ args[0], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+ proxy->srvtcpka_cnt = tcpka_cnt;
+ } else {
+ /* unreachable */
+ memprintf(err, "'%s': unknown keyword", args[0]);
+ return -1;
+ }
+
+ return retval;
+}
+#endif
+
+#ifdef TCP_KEEPIDLE
+/* This function parses "{cli|srv}tcpka-idle" statements */
+static int proxy_parse_tcpka_idle(char **args, int section, struct proxy *proxy,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int retval;
+ const char *res;
+ unsigned int tcpka_idle;
+
+ retval = 0;
+
+ if (*args[1] == 0) {
+ memprintf(err, "'%s' expects an integer value", args[0]);
+ return -1;
+ }
+ res = parse_time_err(args[1], &tcpka_idle, TIME_UNIT_S);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in argument to <%s>.\n", *res, args[0]);
+ return -1;
+ }
+
+ if (strcmp(args[0], "clitcpka-idle") == 0) {
+ if (!(proxy->cap & PR_CAP_FE)) {
+ memprintf(err, "%s will be ignored because %s '%s' has no frontend capability",
+ args[0], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+ proxy->clitcpka_idle = tcpka_idle;
+ } else if (strcmp(args[0], "srvtcpka-idle") == 0) {
+ if (!(proxy->cap & PR_CAP_BE)) {
+ memprintf(err, "%s will be ignored because %s '%s' has no backend capability",
+ args[0], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+ proxy->srvtcpka_idle = tcpka_idle;
+ } else {
+ /* unreachable */
+ memprintf(err, "'%s': unknown keyword", args[0]);
+ return -1;
+ }
+
+ return retval;
+}
+#endif
+
+#ifdef TCP_KEEPINTVL
+/* This function parses "{cli|srv}tcpka-intvl" statements */
+static int proxy_parse_tcpka_intvl(char **args, int section, struct proxy *proxy,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int retval;
+ const char *res;
+ unsigned int tcpka_intvl;
+
+ retval = 0;
+
+ if (*args[1] == 0) {
+ memprintf(err, "'%s' expects an integer value", args[0]);
+ return -1;
+ }
+ res = parse_time_err(args[1], &tcpka_intvl, TIME_UNIT_S);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in argument to <%s>.\n", *res, args[0]);
+ return -1;
+ }
+
+ if (strcmp(args[0], "clitcpka-intvl") == 0) {
+ if (!(proxy->cap & PR_CAP_FE)) {
+ memprintf(err, "%s will be ignored because %s '%s' has no frontend capability",
+ args[0], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+ proxy->clitcpka_intvl = tcpka_intvl;
+ } else if (strcmp(args[0], "srvtcpka-intvl") == 0) {
+ if (!(proxy->cap & PR_CAP_BE)) {
+ memprintf(err, "%s will be ignored because %s '%s' has no backend capability",
+ args[0], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+ proxy->srvtcpka_intvl = tcpka_intvl;
+ } else {
+ /* unreachable */
+ memprintf(err, "'%s': unknown keyword", args[0]);
+ return -1;
+ }
+
+ return retval;
+}
+#endif
+
+/* This function inserts proxy <px> into the tree of known proxies (regular
+ * ones or defaults depending on px->cap & PR_CAP_DEF). The proxy's name is
+ * used as the storing key so it must already have been initialized.
+ */
+void proxy_store_name(struct proxy *px)
+{
+ struct eb_root *root = (px->cap & PR_CAP_DEF) ? &defproxy_by_name : &proxy_by_name;
+
+ px->conf.by_name.key = px->id;
+ ebis_insert(root, &px->conf.by_name);
+}
+
+/* Returns a pointer to the first proxy matching capabilities <cap> and id
+ * <id>. NULL is returned if no match is found. If <table> is non-zero, it
+ * only considers proxies having a table.
+ */
+struct proxy *proxy_find_by_id(int id, int cap, int table)
+{
+ struct eb32_node *n;
+
+ for (n = eb32_lookup(&used_proxy_id, id); n; n = eb32_next(n)) {
+ struct proxy *px = container_of(n, struct proxy, conf.id);
+
+ if (px->uuid != id)
+ break;
+
+ if ((px->cap & cap) != cap)
+ continue;
+
+ if (table && (!px->table || !px->table->size))
+ continue;
+
+ return px;
+ }
+ return NULL;
+}
+
+/* Returns a pointer to the first proxy matching either name <name>, or id
+ * <name> if <name> begins with a '#'. NULL is returned if no match is found.
+ * If <table> is non-zero, it only considers proxies having a table. The search
+ * is made into the regular proxies, unless <cap> has PR_CAP_DEF set in which
+ * case it's searched into the defproxy tree.
+ */
+struct proxy *proxy_find_by_name(const char *name, int cap, int table)
+{
+ struct proxy *curproxy;
+
+ if (*name == '#' && !(cap & PR_CAP_DEF)) {
+ curproxy = proxy_find_by_id(atoi(name + 1), cap, table);
+ if (curproxy)
+ return curproxy;
+ }
+ else {
+ struct eb_root *root;
+ struct ebpt_node *node;
+
+ root = (cap & PR_CAP_DEF) ? &defproxy_by_name : &proxy_by_name;
+ for (node = ebis_lookup(root, name); node; node = ebpt_next(node)) {
+ curproxy = container_of(node, struct proxy, conf.by_name);
+
+ if (strcmp(curproxy->id, name) != 0)
+ break;
+
+ if ((curproxy->cap & cap) != cap)
+ continue;
+
+ if (table && (!curproxy->table || !curproxy->table->size))
+ continue;
+
+ return curproxy;
+ }
+ }
+ return NULL;
+}
+
+/* Finds the best match for a proxy with capabilities <cap>, name <name> and id
+ * <id>. At most one of <id> or <name> may be different provided that <cap> is
+ * valid. Either <id> or <name> may be left unspecified (0). The purpose is to
+ * find a proxy based on some information from a previous configuration, across
+ * reloads or during information exchange between peers.
+ *
+ * Names are looked up first if present, then IDs are compared if present. In
+ * case of an inexact match whatever is forced in the configuration has
+ * precedence in the following order :
+ * - 1) forced ID (proves a renaming / change of proxy type)
+ * - 2) proxy name+type (may indicate a move if ID differs)
+ * - 3) automatic ID+type (may indicate a renaming)
+ *
+ * Depending on what is found, we can end up in the following situations :
+ *
+ * name id cap | possible causes
+ * -------------+-----------------
+ * -- -- -- | nothing found
+ * -- -- ok | nothing found
+ * -- ok -- | proxy deleted, ID points to next one
+ * -- ok ok | proxy renamed, or deleted with ID pointing to next one
+ * ok -- -- | proxy deleted, but other half with same name still here (before)
+ * ok -- ok | proxy's ID changed (proxy moved in the config file)
+ * ok ok -- | proxy deleted, but other half with same name still here (after)
+ * ok ok ok | perfect match
+ *
+ * Upon return if <diff> is not NULL, it is zeroed then filled with up to 3 bits :
+ * - PR_FBM_MISMATCH_ID : proxy was found but ID differs
+ * (and ID was not zero)
+ * - PR_FBM_MISMATCH_NAME : proxy was found by ID but name differs
+ * (and name was not NULL)
+ * - PR_FBM_MISMATCH_PROXYTYPE : a proxy of different type was found with
+ * the same name and/or id
+ *
+ * Only a valid proxy is returned. If capabilities do not match, NULL is
+ * returned. The caller can check <diff> to report detailed warnings / errors,
+ * and decide whether or not to use what was found.
+ */
+struct proxy *proxy_find_best_match(int cap, const char *name, int id, int *diff)
+{
+ struct proxy *byname;
+ struct proxy *byid;
+
+ if (!name && !id)
+ return NULL;
+
+ if (diff)
+ *diff = 0;
+
+ byname = byid = NULL;
+
+ if (name) {
+ byname = proxy_find_by_name(name, cap, 0);
+ if (byname && (!id || byname->uuid == id))
+ return byname;
+ }
+
+ /* remaining possibilities :
+ * - name not set
+ * - name set but not found
+ * - name found, but ID doesn't match.
+ */
+ if (id) {
+ byid = proxy_find_by_id(id, cap, 0);
+ if (byid) {
+ if (byname) {
+ /* id+type found, name+type found, but not all 3.
+ * ID wins only if forced, otherwise name wins.
+ */
+ if (byid->options & PR_O_FORCED_ID) {
+ if (diff)
+ *diff |= PR_FBM_MISMATCH_NAME;
+ return byid;
+ }
+ else {
+ if (diff)
+ *diff |= PR_FBM_MISMATCH_ID;
+ return byname;
+ }
+ }
+
+ /* remaining possibilities :
+ * - name not set
+ * - name set but not found
+ */
+ if (name && diff)
+ *diff |= PR_FBM_MISMATCH_NAME;
+ return byid;
+ }
+
+ /* ID not found */
+ if (byname) {
+ if (diff)
+ *diff |= PR_FBM_MISMATCH_ID;
+ return byname;
+ }
+ }
+
+ /* All remaining possibilities will lead to NULL. If we can report more
+ * detailed information to the caller about changed types and/or name,
+ * we'll do it. For example, we could detect that "listen foo" was
+ * split into "frontend foo_ft" and "backend foo_bk" if IDs are forced.
+ * - name not set, ID not found
+ * - name not found, ID not set
+ * - name not found, ID not found
+ */
+ if (!diff)
+ return NULL;
+
+ if (name) {
+ byname = proxy_find_by_name(name, 0, 0);
+ if (byname && (!id || byname->uuid == id))
+ *diff |= PR_FBM_MISMATCH_PROXYTYPE;
+ }
+
+ if (id) {
+ byid = proxy_find_by_id(id, 0, 0);
+ if (byid) {
+ if (!name)
+ *diff |= PR_FBM_MISMATCH_PROXYTYPE; /* only type changed */
+ else if (byid->options & PR_O_FORCED_ID)
+ *diff |= PR_FBM_MISMATCH_NAME | PR_FBM_MISMATCH_PROXYTYPE; /* name and type changed */
+ /* otherwise it's a different proxy that was returned */
+ }
+ }
+ return NULL;
+}
+
+/*
+ * This function finds a server with matching name within selected proxy.
+ * It also checks if there are more matching servers with
+ * requested name as this often leads into unexpected situations.
+ */
+
+struct server *findserver(const struct proxy *px, const char *name) {
+
+ struct server *cursrv, *target = NULL;
+
+ if (!px)
+ return NULL;
+
+ for (cursrv = px->srv; cursrv; cursrv = cursrv->next) {
+ if (strcmp(cursrv->id, name) != 0)
+ continue;
+
+ if (!target) {
+ target = cursrv;
+ continue;
+ }
+
+ ha_alert("Refusing to use duplicated server '%s' found in proxy: %s!\n",
+ name, px->id);
+
+ return NULL;
+ }
+
+ return target;
+}
+
+/* This function checks that the designated proxy has no http directives
+ * enabled. It will output a warning if there are, and will fix some of them.
+ * It returns the number of fatal errors encountered. This should be called
+ * at the end of the configuration parsing if the proxy is not in http mode.
+ * The <file> argument is used to construct the error message.
+ */
+int proxy_cfg_ensure_no_http(struct proxy *curproxy)
+{
+ if (curproxy->cookie_name != NULL) {
+ ha_warning("cookie will be ignored for %s '%s' (needs 'mode http').\n",
+ proxy_type_str(curproxy), curproxy->id);
+ }
+ if (isttest(curproxy->monitor_uri)) {
+ ha_warning("monitor-uri will be ignored for %s '%s' (needs 'mode http').\n",
+ proxy_type_str(curproxy), curproxy->id);
+ }
+ if (curproxy->lbprm.algo & BE_LB_NEED_HTTP) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_RR;
+ ha_warning("Layer 7 hash not possible for %s '%s' (needs 'mode http'). Falling back to round robin.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ }
+ if (curproxy->to_log & (LW_REQ | LW_RESP)) {
+ curproxy->to_log &= ~(LW_REQ | LW_RESP);
+ ha_warning("parsing [%s:%d] : HTTP log/header format not usable with %s '%s' (needs 'mode http').\n",
+ curproxy->conf.lfs_file, curproxy->conf.lfs_line,
+ proxy_type_str(curproxy), curproxy->id);
+ }
+ if (curproxy->conf.logformat_string == default_http_log_format ||
+ curproxy->conf.logformat_string == clf_http_log_format) {
+ /* Note: we don't change the directive's file:line number */
+ curproxy->conf.logformat_string = default_tcp_log_format;
+ ha_warning("parsing [%s:%d] : 'option httplog' not usable with %s '%s' (needs 'mode http'). Falling back to 'option tcplog'.\n",
+ curproxy->conf.lfs_file, curproxy->conf.lfs_line,
+ proxy_type_str(curproxy), curproxy->id);
+ }
+ else if (curproxy->conf.logformat_string == default_https_log_format) {
+ /* Note: we don't change the directive's file:line number */
+ curproxy->conf.logformat_string = default_tcp_log_format;
+ ha_warning("parsing [%s:%d] : 'option httpslog' not usable with %s '%s' (needs 'mode http'). Falling back to 'option tcplog'.\n",
+ curproxy->conf.lfs_file, curproxy->conf.lfs_line,
+ proxy_type_str(curproxy), curproxy->id);
+ }
+
+ return 0;
+}
+
+/* Perform the most basic initialization of a proxy :
+ * memset(), list_init(*), reset_timeouts(*).
+ * Any new proxy or peer should be initialized via this function.
+ */
+void init_new_proxy(struct proxy *p)
+{
+ memset(p, 0, sizeof(struct proxy));
+ p->obj_type = OBJ_TYPE_PROXY;
+ queue_init(&p->queue, p, NULL);
+ LIST_INIT(&p->acl);
+ LIST_INIT(&p->http_req_rules);
+ LIST_INIT(&p->http_res_rules);
+ LIST_INIT(&p->http_after_res_rules);
+ LIST_INIT(&p->redirect_rules);
+ LIST_INIT(&p->mon_fail_cond);
+ LIST_INIT(&p->switching_rules);
+ LIST_INIT(&p->server_rules);
+ LIST_INIT(&p->persist_rules);
+ LIST_INIT(&p->sticking_rules);
+ LIST_INIT(&p->storersp_rules);
+ LIST_INIT(&p->tcp_req.inspect_rules);
+ LIST_INIT(&p->tcp_rep.inspect_rules);
+ LIST_INIT(&p->tcp_req.l4_rules);
+ LIST_INIT(&p->tcp_req.l5_rules);
+ MT_LIST_INIT(&p->listener_queue);
+ LIST_INIT(&p->logsrvs);
+ LIST_INIT(&p->logformat);
+ LIST_INIT(&p->logformat_sd);
+ LIST_INIT(&p->format_unique_id);
+ LIST_INIT(&p->logformat_error);
+ LIST_INIT(&p->conf.bind);
+ LIST_INIT(&p->conf.listeners);
+ LIST_INIT(&p->conf.errors);
+ LIST_INIT(&p->conf.args.list);
+ LIST_INIT(&p->filter_configs);
+ LIST_INIT(&p->tcpcheck_rules.preset_vars);
+
+ p->defsrv.id = "default-server";
+ p->conf.used_listener_id = EB_ROOT;
+ p->conf.used_server_id = EB_ROOT;
+ p->used_server_addr = EB_ROOT_UNIQUE;
+
+ /* Timeouts are defined as -1 */
+ proxy_reset_timeouts(p);
+ p->tcp_rep.inspect_delay = TICK_ETERNITY;
+
+ /* initial uuid is unassigned (-1) */
+ p->uuid = -1;
+
+ /* Default to only allow L4 retries */
+ p->retry_type = PR_RE_CONN_FAILED;
+
+ p->extra_counters_fe = NULL;
+ p->extra_counters_be = NULL;
+
+ HA_RWLOCK_INIT(&p->lock);
+}
+
+/* Preset default settings onto proxy <defproxy>. */
+void proxy_preset_defaults(struct proxy *defproxy)
+{
+ defproxy->mode = PR_MODE_TCP;
+ defproxy->flags = 0;
+ if (!(defproxy->cap & PR_CAP_INT)) {
+ defproxy->maxconn = cfg_maxpconn;
+ defproxy->conn_retries = CONN_RETRIES;
+ }
+ defproxy->redispatch_after = 0;
+ defproxy->options = PR_O_REUSE_SAFE;
+ if (defproxy->cap & PR_CAP_INT)
+ defproxy->options2 |= PR_O2_INDEPSTR;
+ defproxy->max_out_conns = MAX_SRV_LIST;
+
+ defproxy->defsrv.check.inter = DEF_CHKINTR;
+ defproxy->defsrv.check.fastinter = 0;
+ defproxy->defsrv.check.downinter = 0;
+ defproxy->defsrv.agent.inter = DEF_CHKINTR;
+ defproxy->defsrv.agent.fastinter = 0;
+ defproxy->defsrv.agent.downinter = 0;
+ defproxy->defsrv.check.rise = DEF_RISETIME;
+ defproxy->defsrv.check.fall = DEF_FALLTIME;
+ defproxy->defsrv.agent.rise = DEF_AGENT_RISETIME;
+ defproxy->defsrv.agent.fall = DEF_AGENT_FALLTIME;
+ defproxy->defsrv.check.port = 0;
+ defproxy->defsrv.agent.port = 0;
+ defproxy->defsrv.maxqueue = 0;
+ defproxy->defsrv.minconn = 0;
+ defproxy->defsrv.maxconn = 0;
+ defproxy->defsrv.max_reuse = -1;
+ defproxy->defsrv.max_idle_conns = -1;
+ defproxy->defsrv.pool_purge_delay = 5000;
+ defproxy->defsrv.slowstart = 0;
+ defproxy->defsrv.onerror = DEF_HANA_ONERR;
+ defproxy->defsrv.consecutive_errors_limit = DEF_HANA_ERRLIMIT;
+ defproxy->defsrv.uweight = defproxy->defsrv.iweight = 1;
+
+ defproxy->email_alert.level = LOG_ALERT;
+ defproxy->load_server_state_from_file = PR_SRV_STATE_FILE_UNSPEC;
+#if defined(USE_QUIC)
+ quic_transport_params_init(&defproxy->defsrv.quic_params, 0);
+#endif
+
+ if (defproxy->cap & PR_CAP_INT)
+ defproxy->timeout.connect = 5000;
+}
+
+/* Frees all dynamic settings allocated on a default proxy that's about to be
+ * destroyed. This is a subset of the complete proxy deinit code, but these
+ * should probably be merged ultimately. Note that most of the fields are not
+ * even reset, so extreme care is required here, and calling
+ * proxy_preset_defaults() afterwards would be safer.
+ */
+void proxy_free_defaults(struct proxy *defproxy)
+{
+ struct acl *acl, *aclb;
+ struct logsrv *log, *logb;
+ struct cap_hdr *h,*h_next;
+
+ ha_free(&defproxy->id);
+ ha_free(&defproxy->conf.file);
+ ha_free(&defproxy->check_command);
+ ha_free(&defproxy->check_path);
+ ha_free(&defproxy->cookie_name);
+ ha_free(&defproxy->rdp_cookie_name);
+ ha_free(&defproxy->dyncookie_key);
+ ha_free(&defproxy->cookie_domain);
+ ha_free(&defproxy->cookie_attrs);
+ ha_free(&defproxy->lbprm.arg_str);
+ ha_free(&defproxy->capture_name);
+ istfree(&defproxy->monitor_uri);
+ ha_free(&defproxy->defbe.name);
+ ha_free(&defproxy->conn_src.iface_name);
+ istfree(&defproxy->fwdfor_hdr_name);
+ istfree(&defproxy->orgto_hdr_name);
+ istfree(&defproxy->server_id_hdr_name);
+
+ list_for_each_entry_safe(acl, aclb, &defproxy->acl, list) {
+ LIST_DELETE(&acl->list);
+ prune_acl(acl);
+ free(acl);
+ }
+
+ free_act_rules(&defproxy->tcp_req.inspect_rules);
+ free_act_rules(&defproxy->tcp_rep.inspect_rules);
+ free_act_rules(&defproxy->tcp_req.l4_rules);
+ free_act_rules(&defproxy->tcp_req.l5_rules);
+ free_act_rules(&defproxy->http_req_rules);
+ free_act_rules(&defproxy->http_res_rules);
+ free_act_rules(&defproxy->http_after_res_rules);
+
+ h = defproxy->req_cap;
+ while (h) {
+ h_next = h->next;
+ free(h->name);
+ pool_destroy(h->pool);
+ free(h);
+ h = h_next;
+ }
+
+ h = defproxy->rsp_cap;
+ while (h) {
+ h_next = h->next;
+ free(h->name);
+ pool_destroy(h->pool);
+ free(h);
+ h = h_next;
+ }
+
+ if (defproxy->conf.logformat_string != default_http_log_format &&
+ defproxy->conf.logformat_string != default_tcp_log_format &&
+ defproxy->conf.logformat_string != clf_http_log_format &&
+ defproxy->conf.logformat_string != default_https_log_format) {
+ ha_free(&defproxy->conf.logformat_string);
+ }
+
+ if (defproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format)
+ ha_free(&defproxy->conf.logformat_sd_string);
+
+ list_for_each_entry_safe(log, logb, &defproxy->logsrvs, list) {
+ LIST_DEL_INIT(&log->list);
+ free_logsrv(log);
+ }
+
+ ha_free(&defproxy->conf.uniqueid_format_string);
+ ha_free(&defproxy->conf.error_logformat_string);
+ ha_free(&defproxy->conf.lfs_file);
+ ha_free(&defproxy->conf.lfsd_file);
+ ha_free(&defproxy->conf.uif_file);
+ ha_free(&defproxy->conf.elfs_file);
+ chunk_destroy(&defproxy->log_tag);
+
+ free_email_alert(defproxy);
+ proxy_release_conf_errors(defproxy);
+ deinit_proxy_tcpcheck(defproxy);
+
+ /* FIXME: we cannot free uri_auth because it might already be used by
+ * another proxy (legacy code for stats URI ...). Refcount anyone ?
+ */
+}
+
+/* delete a defproxy from the tree if still in it, frees its content and its
+ * storage. Nothing is done if <px> is NULL or if it doesn't have PR_CAP_DEF
+ * set, allowing to pass it the direct result of a lookup function.
+ */
+void proxy_destroy_defaults(struct proxy *px)
+{
+ if (!px)
+ return;
+ if (!(px->cap & PR_CAP_DEF))
+ return;
+ BUG_ON(px->conf.refcount != 0);
+ ebpt_delete(&px->conf.by_name);
+ proxy_free_defaults(px);
+ free(px);
+}
+
+/* delete all unreferenced default proxies. A default proxy is unreferenced if
+ * its refcount is equal to zero.
+ */
+void proxy_destroy_all_unref_defaults()
+{
+ struct ebpt_node *n;
+
+ n = ebpt_first(&defproxy_by_name);
+ while (n) {
+ struct proxy *px = container_of(n, struct proxy, conf.by_name);
+ BUG_ON(!(px->cap & PR_CAP_DEF));
+ n = ebpt_next(n);
+ if (!px->conf.refcount)
+ proxy_destroy_defaults(px);
+ }
+}
+
+/* Add a reference on the default proxy <defpx> for the proxy <px> Nothing is
+ * done if <px> already references <defpx>. Otherwise, the default proxy
+ * refcount is incremented by one. For now, this operation is not thread safe
+ * and is perform during init stage only.
+ */
+void proxy_ref_defaults(struct proxy *px, struct proxy *defpx)
+{
+ if (px->defpx == defpx)
+ return;
+ BUG_ON(px->defpx != NULL);
+ px->defpx = defpx;
+ defpx->conf.refcount++;
+}
+
+/* proxy <px> removes its reference on its default proxy. The default proxy
+ * refcount is decremented by one. If it was the last reference, the
+ * corresponding default proxy is destroyed. For now this operation is not
+ * thread safe and is performed during deinit staged only.
+*/
+void proxy_unref_defaults(struct proxy *px)
+{
+ if (px->defpx == NULL)
+ return;
+ if (!--px->defpx->conf.refcount)
+ proxy_destroy_defaults(px->defpx);
+ px->defpx = NULL;
+}
+
+/* Allocates a new proxy <name> of type <cap>.
+ * Returns the proxy instance on success. On error, NULL is returned.
+ */
+struct proxy *alloc_new_proxy(const char *name, unsigned int cap, char **errmsg)
+{
+ struct proxy *curproxy;
+
+ if ((curproxy = calloc(1, sizeof(*curproxy))) == NULL) {
+ memprintf(errmsg, "proxy '%s': out of memory", name);
+ goto fail;
+ }
+
+ init_new_proxy(curproxy);
+ curproxy->last_change = now.tv_sec;
+ curproxy->id = strdup(name);
+ curproxy->cap = cap;
+
+ if (!(cap & PR_CAP_INT))
+ proxy_store_name(curproxy);
+
+ done:
+ return curproxy;
+
+ fail:
+ /* Note: in case of fatal error here, we WILL make valgrind unhappy,
+ * but its not worth trying to unroll everything here just before
+ * quitting.
+ */
+ free(curproxy);
+ return NULL;
+}
+
+/* Copy the proxy settings from <defproxy> to <curproxy>.
+ * Returns 0 on success.
+ * Returns 1 on error. <errmsg> will be allocated with an error description.
+ */
+static int proxy_defproxy_cpy(struct proxy *curproxy, const struct proxy *defproxy,
+ char **errmsg)
+{
+ struct logsrv *tmplogsrv;
+ char *tmpmsg = NULL;
+
+ /* set default values from the specified default proxy */
+ srv_settings_cpy(&curproxy->defsrv, &defproxy->defsrv, 0);
+
+ curproxy->flags = (defproxy->flags & PR_FL_DISABLED); /* Only inherit from disabled flag */
+ curproxy->options = defproxy->options;
+ curproxy->options2 = defproxy->options2;
+ curproxy->no_options = defproxy->no_options;
+ curproxy->no_options2 = defproxy->no_options2;
+ curproxy->except_xff_net = defproxy->except_xff_net;
+ curproxy->except_xot_net = defproxy->except_xot_net;
+ curproxy->retry_type = defproxy->retry_type;
+ curproxy->tcp_req.inspect_delay = defproxy->tcp_req.inspect_delay;
+ curproxy->tcp_rep.inspect_delay = defproxy->tcp_rep.inspect_delay;
+
+ if (isttest(defproxy->fwdfor_hdr_name))
+ curproxy->fwdfor_hdr_name = istdup(defproxy->fwdfor_hdr_name);
+
+ if (isttest(defproxy->orgto_hdr_name))
+ curproxy->orgto_hdr_name = istdup(defproxy->orgto_hdr_name);
+
+ if (isttest(defproxy->server_id_hdr_name))
+ curproxy->server_id_hdr_name = istdup(defproxy->server_id_hdr_name);
+
+ /* initialize error relocations */
+ if (!proxy_dup_default_conf_errors(curproxy, defproxy, &tmpmsg)) {
+ memprintf(errmsg, "proxy '%s' : %s", curproxy->id, tmpmsg);
+ free(tmpmsg);
+ return 1;
+ }
+
+ if (curproxy->cap & PR_CAP_FE) {
+ curproxy->maxconn = defproxy->maxconn;
+ curproxy->backlog = defproxy->backlog;
+ curproxy->fe_sps_lim = defproxy->fe_sps_lim;
+
+ curproxy->to_log = defproxy->to_log & ~LW_COOKIE & ~LW_REQHDR & ~ LW_RSPHDR;
+ curproxy->max_out_conns = defproxy->max_out_conns;
+
+ curproxy->clitcpka_cnt = defproxy->clitcpka_cnt;
+ curproxy->clitcpka_idle = defproxy->clitcpka_idle;
+ curproxy->clitcpka_intvl = defproxy->clitcpka_intvl;
+ }
+
+ if (curproxy->cap & PR_CAP_BE) {
+ curproxy->lbprm.algo = defproxy->lbprm.algo;
+ curproxy->lbprm.hash_balance_factor = defproxy->lbprm.hash_balance_factor;
+ curproxy->fullconn = defproxy->fullconn;
+ curproxy->conn_retries = defproxy->conn_retries;
+ curproxy->redispatch_after = defproxy->redispatch_after;
+ curproxy->max_ka_queue = defproxy->max_ka_queue;
+
+ curproxy->tcpcheck_rules.flags = (defproxy->tcpcheck_rules.flags & ~TCPCHK_RULES_UNUSED_RS);
+ curproxy->tcpcheck_rules.list = defproxy->tcpcheck_rules.list;
+ if (!LIST_ISEMPTY(&defproxy->tcpcheck_rules.preset_vars)) {
+ if (!dup_tcpcheck_vars(&curproxy->tcpcheck_rules.preset_vars,
+ &defproxy->tcpcheck_rules.preset_vars)) {
+ memprintf(errmsg, "proxy '%s': failed to duplicate tcpcheck preset-vars", curproxy->id);
+ return 1;
+ }
+ }
+
+ curproxy->ck_opts = defproxy->ck_opts;
+ if (defproxy->cookie_name)
+ curproxy->cookie_name = strdup(defproxy->cookie_name);
+ curproxy->cookie_len = defproxy->cookie_len;
+
+ if (defproxy->dyncookie_key)
+ curproxy->dyncookie_key = strdup(defproxy->dyncookie_key);
+ if (defproxy->cookie_domain)
+ curproxy->cookie_domain = strdup(defproxy->cookie_domain);
+
+ if (defproxy->cookie_maxidle)
+ curproxy->cookie_maxidle = defproxy->cookie_maxidle;
+
+ if (defproxy->cookie_maxlife)
+ curproxy->cookie_maxlife = defproxy->cookie_maxlife;
+
+ if (defproxy->rdp_cookie_name)
+ curproxy->rdp_cookie_name = strdup(defproxy->rdp_cookie_name);
+ curproxy->rdp_cookie_len = defproxy->rdp_cookie_len;
+
+ if (defproxy->cookie_attrs)
+ curproxy->cookie_attrs = strdup(defproxy->cookie_attrs);
+
+ if (defproxy->lbprm.arg_str)
+ curproxy->lbprm.arg_str = strdup(defproxy->lbprm.arg_str);
+ curproxy->lbprm.arg_len = defproxy->lbprm.arg_len;
+ curproxy->lbprm.arg_opt1 = defproxy->lbprm.arg_opt1;
+ curproxy->lbprm.arg_opt2 = defproxy->lbprm.arg_opt2;
+ curproxy->lbprm.arg_opt3 = defproxy->lbprm.arg_opt3;
+
+ if (defproxy->conn_src.iface_name)
+ curproxy->conn_src.iface_name = strdup(defproxy->conn_src.iface_name);
+ curproxy->conn_src.iface_len = defproxy->conn_src.iface_len;
+ curproxy->conn_src.opts = defproxy->conn_src.opts;
+#if defined(CONFIG_HAP_TRANSPARENT)
+ curproxy->conn_src.tproxy_addr = defproxy->conn_src.tproxy_addr;
+#endif
+ curproxy->load_server_state_from_file = defproxy->load_server_state_from_file;
+
+ curproxy->srvtcpka_cnt = defproxy->srvtcpka_cnt;
+ curproxy->srvtcpka_idle = defproxy->srvtcpka_idle;
+ curproxy->srvtcpka_intvl = defproxy->srvtcpka_intvl;
+ }
+
+ if (curproxy->cap & PR_CAP_FE) {
+ if (defproxy->capture_name)
+ curproxy->capture_name = strdup(defproxy->capture_name);
+ curproxy->capture_namelen = defproxy->capture_namelen;
+ curproxy->capture_len = defproxy->capture_len;
+
+ curproxy->nb_req_cap = defproxy->nb_req_cap;
+ curproxy->req_cap = defproxy->req_cap;
+
+ curproxy->nb_rsp_cap = defproxy->nb_rsp_cap;
+ curproxy->rsp_cap = defproxy->rsp_cap;
+ }
+
+ if (curproxy->cap & PR_CAP_FE) {
+ curproxy->timeout.client = defproxy->timeout.client;
+ curproxy->timeout.clientfin = defproxy->timeout.clientfin;
+ curproxy->timeout.tarpit = defproxy->timeout.tarpit;
+ curproxy->timeout.httpreq = defproxy->timeout.httpreq;
+ curproxy->timeout.httpka = defproxy->timeout.httpka;
+ if (isttest(defproxy->monitor_uri))
+ curproxy->monitor_uri = istdup(defproxy->monitor_uri);
+ if (defproxy->defbe.name)
+ curproxy->defbe.name = strdup(defproxy->defbe.name);
+
+ /* get either a pointer to the logformat string or a copy of it */
+ curproxy->conf.logformat_string = defproxy->conf.logformat_string;
+ if (curproxy->conf.logformat_string &&
+ curproxy->conf.logformat_string != default_http_log_format &&
+ curproxy->conf.logformat_string != default_tcp_log_format &&
+ curproxy->conf.logformat_string != clf_http_log_format &&
+ curproxy->conf.logformat_string != default_https_log_format)
+ curproxy->conf.logformat_string = strdup(curproxy->conf.logformat_string);
+
+ if (defproxy->conf.lfs_file) {
+ curproxy->conf.lfs_file = strdup(defproxy->conf.lfs_file);
+ curproxy->conf.lfs_line = defproxy->conf.lfs_line;
+ }
+
+ /* get either a pointer to the logformat string for RFC5424 structured-data or a copy of it */
+ curproxy->conf.logformat_sd_string = defproxy->conf.logformat_sd_string;
+ if (curproxy->conf.logformat_sd_string &&
+ curproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format)
+ curproxy->conf.logformat_sd_string = strdup(curproxy->conf.logformat_sd_string);
+
+ if (defproxy->conf.lfsd_file) {
+ curproxy->conf.lfsd_file = strdup(defproxy->conf.lfsd_file);
+ curproxy->conf.lfsd_line = defproxy->conf.lfsd_line;
+ }
+
+ curproxy->conf.error_logformat_string = defproxy->conf.error_logformat_string;
+ if (curproxy->conf.error_logformat_string)
+ curproxy->conf.error_logformat_string = strdup(curproxy->conf.error_logformat_string);
+
+ if (defproxy->conf.elfs_file) {
+ curproxy->conf.elfs_file = strdup(defproxy->conf.elfs_file);
+ curproxy->conf.elfs_line = defproxy->conf.elfs_line;
+ }
+ }
+
+ if (curproxy->cap & PR_CAP_BE) {
+ curproxy->timeout.connect = defproxy->timeout.connect;
+ curproxy->timeout.server = defproxy->timeout.server;
+ curproxy->timeout.serverfin = defproxy->timeout.serverfin;
+ curproxy->timeout.check = defproxy->timeout.check;
+ curproxy->timeout.queue = defproxy->timeout.queue;
+ curproxy->timeout.tarpit = defproxy->timeout.tarpit;
+ curproxy->timeout.httpreq = defproxy->timeout.httpreq;
+ curproxy->timeout.httpka = defproxy->timeout.httpka;
+ curproxy->timeout.tunnel = defproxy->timeout.tunnel;
+ curproxy->conn_src.source_addr = defproxy->conn_src.source_addr;
+ }
+
+ curproxy->mode = defproxy->mode;
+ curproxy->uri_auth = defproxy->uri_auth; /* for stats */
+
+ /* copy default logsrvs to curproxy */
+ list_for_each_entry(tmplogsrv, &defproxy->logsrvs, list) {
+ struct logsrv *node = malloc(sizeof(*node));
+
+ if (!node) {
+ memprintf(errmsg, "proxy '%s': out of memory", curproxy->id);
+ return 1;
+ }
+ memcpy(node, tmplogsrv, sizeof(struct logsrv));
+ node->ref = tmplogsrv->ref;
+ LIST_INIT(&node->list);
+ LIST_APPEND(&curproxy->logsrvs, &node->list);
+ node->ring_name = tmplogsrv->ring_name ? strdup(tmplogsrv->ring_name) : NULL;
+ node->conf.file = strdup(tmplogsrv->conf.file);
+ node->conf.line = tmplogsrv->conf.line;
+ }
+
+ curproxy->conf.uniqueid_format_string = defproxy->conf.uniqueid_format_string;
+ if (curproxy->conf.uniqueid_format_string)
+ curproxy->conf.uniqueid_format_string = strdup(curproxy->conf.uniqueid_format_string);
+
+ chunk_dup(&curproxy->log_tag, &defproxy->log_tag);
+
+ if (defproxy->conf.uif_file) {
+ curproxy->conf.uif_file = strdup(defproxy->conf.uif_file);
+ curproxy->conf.uif_line = defproxy->conf.uif_line;
+ }
+
+ /* copy default header unique id */
+ if (isttest(defproxy->header_unique_id)) {
+ const struct ist copy = istdup(defproxy->header_unique_id);
+
+ if (!isttest(copy)) {
+ memprintf(errmsg, "proxy '%s': out of memory for unique-id-header", curproxy->id);
+ return 1;
+ }
+ curproxy->header_unique_id = copy;
+ }
+
+ /* default compression options */
+ if (defproxy->comp != NULL) {
+ curproxy->comp = calloc(1, sizeof(*curproxy->comp));
+ if (!curproxy->comp) {
+ memprintf(errmsg, "proxy '%s': out of memory for default compression options", curproxy->id);
+ return 1;
+ }
+ curproxy->comp->algos = defproxy->comp->algos;
+ curproxy->comp->types = defproxy->comp->types;
+ }
+
+ if (defproxy->check_path)
+ curproxy->check_path = strdup(defproxy->check_path);
+ if (defproxy->check_command)
+ curproxy->check_command = strdup(defproxy->check_command);
+
+ if (defproxy->email_alert.mailers.name)
+ curproxy->email_alert.mailers.name = strdup(defproxy->email_alert.mailers.name);
+ if (defproxy->email_alert.from)
+ curproxy->email_alert.from = strdup(defproxy->email_alert.from);
+ if (defproxy->email_alert.to)
+ curproxy->email_alert.to = strdup(defproxy->email_alert.to);
+ if (defproxy->email_alert.myhostname)
+ curproxy->email_alert.myhostname = strdup(defproxy->email_alert.myhostname);
+ curproxy->email_alert.level = defproxy->email_alert.level;
+ curproxy->email_alert.set = defproxy->email_alert.set;
+
+ return 0;
+}
+
+/* Allocates a new proxy <name> of type <cap> found at position <file:linenum>,
+ * preset it from the defaults of <defproxy> and returns it. In case of error,
+ * an alert is printed and NULL is returned.
+ */
+struct proxy *parse_new_proxy(const char *name, unsigned int cap,
+ const char *file, int linenum,
+ const struct proxy *defproxy)
+{
+ struct proxy *curproxy = NULL;
+ char *errmsg = NULL;
+
+ if (!(curproxy = alloc_new_proxy(name, cap, &errmsg))) {
+ ha_alert("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ free(errmsg);
+ return NULL;
+ }
+
+ if (defproxy) {
+ if (proxy_defproxy_cpy(curproxy, defproxy, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ free(errmsg);
+
+ ha_free(&curproxy);
+ return NULL;
+ }
+ }
+ else {
+ proxy_preset_defaults(curproxy);
+ }
+
+ curproxy->conf.args.file = curproxy->conf.file = strdup(file);
+ curproxy->conf.args.line = curproxy->conf.line = linenum;
+
+ return curproxy;
+}
+
+/* to be called under the proxy lock after pausing some listeners. This will
+ * automatically update the p->flags flag
+ */
+void proxy_cond_pause(struct proxy *p)
+{
+ if (p->li_ready)
+ return;
+ p->flags |= PR_FL_PAUSED;
+}
+
+/* to be called under the proxy lock after resuming some listeners. This will
+ * automatically update the p->flags flag
+ */
+void proxy_cond_resume(struct proxy *p)
+{
+ if (!p->li_ready)
+ return;
+ p->flags &= ~PR_FL_PAUSED;
+}
+
+/* to be called under the proxy lock after stopping some listeners. This will
+ * automatically update the p->flags flag after stopping the last one, and
+ * will emit a log indicating the proxy's condition. The function is idempotent
+ * so that it will not emit multiple logs; a proxy will be disabled only once.
+ */
+void proxy_cond_disable(struct proxy *p)
+{
+ if (p->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ return;
+
+ if (p->li_ready + p->li_paused > 0)
+ return;
+
+ p->flags |= PR_FL_STOPPED;
+
+ /* Note: syslog proxies use their own loggers so while it's somewhat OK
+ * to report them being stopped as a warning, we must not spam their log
+ * servers which are in fact production servers. For other types (CLI,
+ * peers, etc) we must not report them at all as they're not really on
+ * the data plane but on the control plane.
+ */
+ if (p->mode == PR_MODE_TCP || p->mode == PR_MODE_HTTP || p->mode == PR_MODE_SYSLOG)
+ ha_warning("Proxy %s stopped (cumulated conns: FE: %lld, BE: %lld).\n",
+ p->id, p->fe_counters.cum_conn, p->be_counters.cum_conn);
+
+ if (p->mode == PR_MODE_TCP || p->mode == PR_MODE_HTTP)
+ send_log(p, LOG_WARNING, "Proxy %s stopped (cumulated conns: FE: %lld, BE: %lld).\n",
+ p->id, p->fe_counters.cum_conn, p->be_counters.cum_conn);
+
+ if (p->table && p->table->size && p->table->sync_task)
+ task_wakeup(p->table->sync_task, TASK_WOKEN_MSG);
+
+ if (p->task)
+ task_wakeup(p->task, TASK_WOKEN_MSG);
+}
+
+/*
+ * This is the proxy management task. It enables proxies when there are enough
+ * free streams, or stops them when the table is full. It is designed to be
+ * called as a task which is woken up upon stopping or when rate limiting must
+ * be enforced.
+ */
+struct task *manage_proxy(struct task *t, void *context, unsigned int state)
+{
+ struct proxy *p = context;
+ int next = TICK_ETERNITY;
+ unsigned int wait;
+
+ /* We should periodically try to enable listeners waiting for a
+ * global resource here.
+ */
+
+ /* first, let's check if we need to stop the proxy */
+ if (unlikely(stopping && !(p->flags & (PR_FL_DISABLED|PR_FL_STOPPED)))) {
+ int t;
+ t = tick_remain(now_ms, p->stop_time);
+ if (t == 0) {
+ stop_proxy(p);
+ /* try to free more memory */
+ pool_gc(NULL);
+ }
+ else {
+ next = tick_first(next, p->stop_time);
+ }
+ }
+
+ /* If the proxy holds a stick table, we need to purge all unused
+ * entries. These are all the ones in the table with ref_cnt == 0
+ * and all the ones in the pool used to allocate new entries. Any
+ * entry attached to an existing stream waiting for a store will
+ * be in neither list. Any entry being dumped will have ref_cnt > 0.
+ * However we protect tables that are being synced to peers.
+ */
+ if (unlikely(stopping && (p->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) && p->table && p->table->current)) {
+
+ if (!p->table->refcnt) {
+ /* !table->refcnt means there
+ * is no more pending full resync
+ * to push to a new process and
+ * we are free to flush the table.
+ */
+ stktable_trash_oldest(p->table, p->table->current);
+ pool_gc(NULL);
+ }
+ if (p->table->current) {
+ /* some entries still remain, let's recheck in one second */
+ next = tick_first(next, tick_add(now_ms, 1000));
+ }
+ }
+
+ /* the rest below is just for frontends */
+ if (!(p->cap & PR_CAP_FE))
+ goto out;
+
+ /* check the various reasons we may find to block the frontend */
+ if (unlikely(p->feconn >= p->maxconn))
+ goto out;
+
+ if (p->fe_sps_lim &&
+ (wait = next_event_delay(&p->fe_sess_per_sec, p->fe_sps_lim, 0))) {
+ /* we're blocking because a limit was reached on the number of
+ * requests/s on the frontend. We want to re-check ASAP, which
+ * means in 1 ms before estimated expiration date, because the
+ * timer will have settled down.
+ */
+ next = tick_first(next, tick_add(now_ms, wait));
+ goto out;
+ }
+
+ /* The proxy is not limited so we can re-enable any waiting listener */
+ dequeue_proxy_listeners(p);
+ out:
+ t->expire = next;
+ task_queue(t);
+ return t;
+}
+
+
+static int proxy_parse_grace(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ const char *res;
+
+ if (!*args[1]) {
+ memprintf(err, "'%s' expects <time> as argument.\n", args[0]);
+ return -1;
+ }
+ res = parse_time_err(args[1], &global.grace_delay, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in argument to <%s>.\n", *res, args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+static int proxy_parse_hard_stop_after(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ const char *res;
+
+ if (!*args[1]) {
+ memprintf(err, "'%s' expects <time> as argument.\n", args[0]);
+ return -1;
+ }
+ res = parse_time_err(args[1], &global.hard_stop_after, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in argument to <%s>.\n", *res, args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+static int proxy_parse_close_spread_time(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ const char *res;
+
+ if (!*args[1]) {
+ memprintf(err, "'%s' expects <time> as argument.\n", args[0]);
+ return -1;
+ }
+
+ /* If close-spread-time is set to "infinite", disable the active connection
+ * closing during soft-stop.
+ */
+ if (strcmp(args[1], "infinite") == 0) {
+ global.tune.options |= GTUNE_DISABLE_ACTIVE_CLOSE;
+ global.close_spread_time = TICK_ETERNITY;
+ return 0;
+ }
+
+ res = parse_time_err(args[1], &global.close_spread_time, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in argument to <%s>.\n", *res, args[0]);
+ return -1;
+ }
+ global.tune.options &= ~GTUNE_DISABLE_ACTIVE_CLOSE;
+
+ return 0;
+}
+
+struct task *hard_stop(struct task *t, void *context, unsigned int state)
+{
+ struct proxy *p;
+ struct stream *s;
+ int thr;
+
+ if (killed) {
+ ha_warning("Some tasks resisted to hard-stop, exiting now.\n");
+ send_log(NULL, LOG_WARNING, "Some tasks resisted to hard-stop, exiting now.\n");
+ killed = 2;
+ for (thr = 0; thr < global.nbthread; thr++)
+ if (((all_threads_mask & ~tid_bit) >> thr) & 1)
+ wake_thread(thr);
+ t->expire = TICK_ETERNITY;
+ return t;
+ }
+
+ ha_warning("soft-stop running for too long, performing a hard-stop.\n");
+ send_log(NULL, LOG_WARNING, "soft-stop running for too long, performing a hard-stop.\n");
+ p = proxies_list;
+ while (p) {
+ if ((p->cap & PR_CAP_FE) && (p->feconn > 0)) {
+ ha_warning("Proxy %s hard-stopped (%d remaining conns will be closed).\n",
+ p->id, p->feconn);
+ send_log(p, LOG_WARNING, "Proxy %s hard-stopped (%d remaining conns will be closed).\n",
+ p->id, p->feconn);
+ }
+ p = p->next;
+ }
+
+ thread_isolate();
+
+ for (thr = 0; thr < global.nbthread; thr++) {
+ list_for_each_entry(s, &ha_thread_ctx[thr].streams, list) {
+ stream_shutdown(s, SF_ERR_KILLED);
+ }
+ }
+
+ thread_release();
+
+ killed = 1;
+ t->expire = tick_add(now_ms, MS_TO_TICKS(1000));
+ return t;
+}
+
+/* perform the soft-stop right now (i.e. unbind listeners) */
+static void do_soft_stop_now()
+{
+ struct proxy *p;
+ struct task *task;
+
+ /* disable busy polling to avoid cpu eating for the new process */
+ global.tune.options &= ~GTUNE_BUSY_POLLING;
+
+ if (tick_isset(global.close_spread_time)) {
+ global.close_spread_end = tick_add(now_ms, global.close_spread_time);
+ }
+
+ /* schedule a hard-stop after a delay if needed */
+ if (tick_isset(global.hard_stop_after)) {
+ task = task_new_anywhere();
+ if (task) {
+ task->process = hard_stop;
+ task_schedule(task, tick_add(now_ms, global.hard_stop_after));
+ }
+ else {
+ ha_alert("out of memory trying to allocate the hard-stop task.\n");
+ }
+ }
+
+ /* stop all stoppable listeners */
+ protocol_stop_now();
+
+ /* Loop on proxies to stop backends */
+ p = proxies_list;
+ while (p) {
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->lock);
+ proxy_cond_disable(p);
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+ p = p->next;
+ }
+
+ /* signal zero is used to broadcast the "stopping" event */
+ signal_handler(0);
+}
+
+/* triggered by a soft-stop delayed with `grace` */
+static struct task *grace_expired(struct task *t, void *context, unsigned int state)
+{
+ ha_notice("Grace period expired, proceeding with soft-stop now.\n");
+ send_log(NULL, LOG_NOTICE, "Grace period expired, proceeding with soft-stop now.\n");
+ do_soft_stop_now();
+ task_destroy(t);
+ return NULL;
+}
+
+/*
+ * this function disables health-check servers so that the process will quickly be ignored
+ * by load balancers.
+ */
+void soft_stop(void)
+{
+ struct task *task;
+
+ stopping = 1;
+
+ if (tick_isset(global.grace_delay)) {
+ task = task_new_anywhere();
+ if (task) {
+ ha_notice("Scheduling a soft-stop in %u ms.\n", global.grace_delay);
+ send_log(NULL, LOG_WARNING, "Scheduling a soft-stop in %u ms.\n", global.grace_delay);
+ task->process = grace_expired;
+ task_schedule(task, tick_add(now_ms, global.grace_delay));
+ return;
+ }
+ else {
+ ha_alert("out of memory trying to allocate the stop-stop task, stopping now.\n");
+ }
+ }
+
+ /* no grace (or failure to enforce it): stop now */
+ do_soft_stop_now();
+}
+
+
+/* Temporarily disables listening on all of the proxy's listeners. Upon
+ * success, the proxy enters the PR_PAUSED state. The function returns 0
+ * if it fails, or non-zero on success.
+ * The function takes the proxy's lock so it's safe to
+ * call from multiple places.
+ */
+int pause_proxy(struct proxy *p)
+{
+ struct listener *l;
+
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->lock);
+
+ if (!(p->cap & PR_CAP_FE) || (p->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) || !p->li_ready)
+ goto end;
+
+ list_for_each_entry(l, &p->conf.listeners, by_fe)
+ pause_listener(l, 1);
+
+ if (p->li_ready) {
+ ha_warning("%s %s failed to enter pause mode.\n", proxy_cap_str(p->cap), p->id);
+ send_log(p, LOG_WARNING, "%s %s failed to enter pause mode.\n", proxy_cap_str(p->cap), p->id);
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+ return 0;
+ }
+end:
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+ return 1;
+}
+
+/*
+ * This function completely stops a proxy and releases its listeners. It has
+ * to be called when going down in order to release the ports so that another
+ * process may bind to them. It must also be called on disabled proxies at the
+ * end of start-up. If all listeners are closed, the proxy is set to the
+ * PR_STOPPED state.
+ * The function takes the proxy's lock so it's safe to
+ * call from multiple places.
+ */
+void stop_proxy(struct proxy *p)
+{
+ struct listener *l;
+
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->lock);
+
+ list_for_each_entry(l, &p->conf.listeners, by_fe)
+ stop_listener(l, 1, 0);
+
+ if (!(p->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) && !p->li_ready) {
+ /* might be just a backend */
+ p->flags |= PR_FL_STOPPED;
+ }
+
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+}
+
+/* This function resumes listening on the specified proxy. It scans all of its
+ * listeners and tries to enable them all. If any of them fails, the proxy is
+ * put back to the paused state. It returns 1 upon success, or zero if an error
+ * is encountered.
+ * The function takes the proxy's lock so it's safe to
+ * call from multiple places.
+ */
+int resume_proxy(struct proxy *p)
+{
+ struct listener *l;
+ int fail;
+
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->lock);
+
+ if ((p->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) || !p->li_paused)
+ goto end;
+
+ fail = 0;
+ list_for_each_entry(l, &p->conf.listeners, by_fe) {
+ if (!resume_listener(l, 1)) {
+ int port;
+
+ port = get_host_port(&l->rx.addr);
+ if (port) {
+ ha_warning("Port %d busy while trying to enable %s %s.\n",
+ port, proxy_cap_str(p->cap), p->id);
+ send_log(p, LOG_WARNING, "Port %d busy while trying to enable %s %s.\n",
+ port, proxy_cap_str(p->cap), p->id);
+ }
+ else {
+ ha_warning("Bind on socket %d busy while trying to enable %s %s.\n",
+ l->luid, proxy_cap_str(p->cap), p->id);
+ send_log(p, LOG_WARNING, "Bind on socket %d busy while trying to enable %s %s.\n",
+ l->luid, proxy_cap_str(p->cap), p->id);
+ }
+
+ /* Another port might have been enabled. Let's stop everything. */
+ fail = 1;
+ break;
+ }
+ }
+
+ if (fail) {
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+ /* pause_proxy will take PROXY_LOCK */
+ pause_proxy(p);
+ return 0;
+ }
+end:
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+ return 1;
+}
+
+/* Set current stream's backend to <be>. Nothing is done if the
+ * stream already had a backend assigned, which is indicated by
+ * s->flags & SF_BE_ASSIGNED.
+ * All flags, stats and counters which need be updated are updated.
+ * Returns 1 if done, 0 in case of internal error, eg: lack of resource.
+ */
+int stream_set_backend(struct stream *s, struct proxy *be)
+{
+ unsigned int req_ana;
+
+ if (s->flags & SF_BE_ASSIGNED)
+ return 1;
+
+ if (flt_set_stream_backend(s, be) < 0)
+ return 0;
+
+ s->be = be;
+ HA_ATOMIC_UPDATE_MAX(&be->be_counters.conn_max,
+ HA_ATOMIC_ADD_FETCH(&be->beconn, 1));
+ proxy_inc_be_ctr(be);
+
+ /* assign new parameters to the stream from the new backend */
+ s->scb->flags &= ~SC_FL_INDEP_STR;
+ if (be->options2 & PR_O2_INDEPSTR)
+ s->scb->flags |= SC_FL_INDEP_STR;
+
+ if (tick_isset(be->timeout.serverfin))
+ s->scb->hcto = be->timeout.serverfin;
+
+ /* We want to enable the backend-specific analysers except those which
+ * were already run as part of the frontend/listener. Note that it would
+ * be more reliable to store the list of analysers that have been run,
+ * but what we do here is OK for now.
+ */
+ req_ana = be->be_req_ana;
+ if (!(strm_fe(s)->options & PR_O_WREQ_BODY) && be->options & PR_O_WREQ_BODY) {
+ /* The backend request to parse a request body while it was not
+ * performed on the frontend, so add the corresponding analyser
+ */
+ req_ana |= AN_REQ_HTTP_BODY;
+ }
+ if (IS_HTX_STRM(s) && strm_fe(s)->mode != PR_MODE_HTTP) {
+ /* The stream was already upgraded to HTTP, so remove analysers
+ * set during the upgrade
+ */
+ req_ana &= ~(AN_REQ_WAIT_HTTP|AN_REQ_HTTP_PROCESS_FE);
+ }
+ s->req.analysers |= req_ana & ~(strm_li(s) ? strm_li(s)->analysers : 0);
+
+ if (!IS_HTX_STRM(s) && be->mode == PR_MODE_HTTP) {
+ /* If we chain a TCP frontend to an HTX backend, we must upgrade
+ * the client mux */
+ if (!stream_set_http_mode(s, NULL))
+ return 0;
+ }
+ else if (IS_HTX_STRM(s) && be->mode != PR_MODE_HTTP) {
+ /* If a TCP backend is assgiend to an HTX stream, return an
+ * error. It may happens for a new stream on a previously
+ * upgraded connections. */
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_INTERNAL;
+ return 0;
+ }
+ else {
+ /* If the target backend requires HTTP processing, we have to allocate
+ * the HTTP transaction if we did not have one.
+ */
+ if (unlikely(!s->txn && be->http_needed && !http_create_txn(s)))
+ return 0;
+ }
+
+ s->flags |= SF_BE_ASSIGNED;
+ if (be->options2 & PR_O2_NODELAY) {
+ s->req.flags |= CF_NEVER_WAIT;
+ s->res.flags |= CF_NEVER_WAIT;
+ }
+
+ return 1;
+}
+
+/* Capture a bad request or response and archive it in the proxy's structure.
+ * It is relatively protocol-agnostic so it requires that a number of elements
+ * are passed :
+ * - <proxy> is the proxy where the error was detected and where the snapshot
+ * needs to be stored
+ * - <is_back> indicates that the error happened when receiving the response
+ * - <other_end> is a pointer to the proxy on the other side when known
+ * - <target> is the target of the connection, usually a server or a proxy
+ * - <sess> is the session which experienced the error
+ * - <ctx> may be NULL or should contain any info relevant to the protocol
+ * - <buf> is the buffer containing the offending data
+ * - <buf_ofs> is the position of this buffer's input data in the input
+ * stream, starting at zero. It may be passed as zero if unknown.
+ * - <buf_out> is the portion of <buf->data> which was already forwarded and
+ * which precedes the buffer's input. The buffer's input starts at
+ * buf->head + buf_out.
+ * - <err_pos> is the pointer to the faulty byte in the buffer's input.
+ * - <show> is the callback to use to display <ctx>. It may be NULL.
+ */
+void proxy_capture_error(struct proxy *proxy, int is_back,
+ struct proxy *other_end, enum obj_type *target,
+ const struct session *sess,
+ const struct buffer *buf, long buf_ofs,
+ unsigned int buf_out, unsigned int err_pos,
+ const union error_snapshot_ctx *ctx,
+ void (*show)(struct buffer *, const struct error_snapshot *))
+{
+ struct error_snapshot *es;
+ unsigned int buf_len;
+ int len1, len2;
+ unsigned int ev_id;
+
+ ev_id = HA_ATOMIC_FETCH_ADD(&error_snapshot_id, 1);
+
+ buf_len = b_data(buf) - buf_out;
+
+ es = malloc(sizeof(*es) + buf_len);
+ if (!es)
+ return;
+
+ es->buf_len = buf_len;
+ es->ev_id = ev_id;
+
+ len1 = b_size(buf) - b_peek_ofs(buf, buf_out);
+ if (len1 > buf_len)
+ len1 = buf_len;
+
+ if (len1) {
+ memcpy(es->buf, b_peek(buf, buf_out), len1);
+ len2 = buf_len - len1;
+ if (len2)
+ memcpy(es->buf + len1, b_orig(buf), len2);
+ }
+
+ es->buf_err = err_pos;
+ es->when = date; // user-visible date
+ es->srv = objt_server(target);
+ es->oe = other_end;
+ if (sess && objt_conn(sess->origin) && conn_get_src(__objt_conn(sess->origin)))
+ es->src = *__objt_conn(sess->origin)->src;
+ else
+ memset(&es->src, 0, sizeof(es->src));
+
+ es->buf_wrap = b_wrap(buf) - b_peek(buf, buf_out);
+ es->buf_out = buf_out;
+ es->buf_ofs = buf_ofs;
+
+ /* be sure to indicate the offset of the first IN byte */
+ if (es->buf_ofs >= es->buf_len)
+ es->buf_ofs -= es->buf_len;
+ else
+ es->buf_ofs = 0;
+
+ /* protocol-specific part now */
+ if (ctx)
+ es->ctx = *ctx;
+ else
+ memset(&es->ctx, 0, sizeof(es->ctx));
+ es->show = show;
+
+ /* note: we still lock since we have to be certain that nobody is
+ * dumping the output while we free.
+ */
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &proxy->lock);
+ if (is_back) {
+ es = HA_ATOMIC_XCHG(&proxy->invalid_rep, es);
+ } else {
+ es = HA_ATOMIC_XCHG(&proxy->invalid_req, es);
+ }
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &proxy->lock);
+ ha_free(&es);
+}
+
+/* Configure all proxies which lack a maxconn setting to use the global one by
+ * default. This avoids the common mistake consisting in setting maxconn only
+ * in the global section and discovering the hard way that it doesn't propagate
+ * through the frontends. These values are also propagated through the various
+ * targeted backends, whose fullconn is finally calculated if not yet set.
+ */
+void proxy_adjust_all_maxconn()
+{
+ struct proxy *curproxy;
+ struct switching_rule *swrule1, *swrule2;
+
+ for (curproxy = proxies_list; curproxy; curproxy = curproxy->next) {
+ if (curproxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ continue;
+
+ if (!(curproxy->cap & PR_CAP_FE))
+ continue;
+
+ if (!curproxy->maxconn)
+ curproxy->maxconn = global.maxconn;
+
+ /* update the target backend's fullconn count : default_backend */
+ if (curproxy->defbe.be)
+ curproxy->defbe.be->tot_fe_maxconn += curproxy->maxconn;
+ else if ((curproxy->cap & PR_CAP_LISTEN) == PR_CAP_LISTEN)
+ curproxy->tot_fe_maxconn += curproxy->maxconn;
+
+ list_for_each_entry(swrule1, &curproxy->switching_rules, list) {
+ /* For each target of switching rules, we update their
+ * tot_fe_maxconn, except if a previous rule points to
+ * the same backend or to the default backend.
+ */
+ if (swrule1->be.backend != curproxy->defbe.be) {
+ /* note: swrule1->be.backend isn't a backend if the rule
+ * is dynamic, it's an expression instead, so it must not
+ * be dereferenced as a backend before being certain it is.
+ */
+ list_for_each_entry(swrule2, &curproxy->switching_rules, list) {
+ if (swrule2 == swrule1) {
+ if (!swrule1->dynamic)
+ swrule1->be.backend->tot_fe_maxconn += curproxy->maxconn;
+ break;
+ }
+ else if (!swrule2->dynamic && swrule2->be.backend == swrule1->be.backend) {
+ /* there are multiple refs of this backend */
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* automatically compute fullconn if not set. We must not do it in the
+ * loop above because cross-references are not yet fully resolved.
+ */
+ for (curproxy = proxies_list; curproxy; curproxy = curproxy->next) {
+ if (curproxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ continue;
+
+ /* If <fullconn> is not set, let's set it to 10% of the sum of
+ * the possible incoming frontend's maxconns.
+ */
+ if (!curproxy->fullconn && (curproxy->cap & PR_CAP_BE)) {
+ /* we have the sum of the maxconns in <total>. We only
+ * keep 10% of that sum to set the default fullconn, with
+ * a hard minimum of 1 (to avoid a divide by zero).
+ */
+ curproxy->fullconn = (curproxy->tot_fe_maxconn + 9) / 10;
+ if (!curproxy->fullconn)
+ curproxy->fullconn = 1;
+ }
+ }
+}
+
+/* Config keywords below */
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "grace", proxy_parse_grace },
+ { CFG_GLOBAL, "hard-stop-after", proxy_parse_hard_stop_after },
+ { CFG_GLOBAL, "close-spread-time", proxy_parse_close_spread_time },
+ { CFG_LISTEN, "timeout", proxy_parse_timeout },
+ { CFG_LISTEN, "clitimeout", proxy_parse_timeout }, /* This keyword actually fails to parse, this line remains for better error messages. */
+ { CFG_LISTEN, "contimeout", proxy_parse_timeout }, /* This keyword actually fails to parse, this line remains for better error messages. */
+ { CFG_LISTEN, "srvtimeout", proxy_parse_timeout }, /* This keyword actually fails to parse, this line remains for better error messages. */
+ { CFG_LISTEN, "rate-limit", proxy_parse_rate_limit },
+ { CFG_LISTEN, "max-keep-alive-queue", proxy_parse_max_ka_queue },
+ { CFG_LISTEN, "declare", proxy_parse_declare },
+ { CFG_LISTEN, "retry-on", proxy_parse_retry_on },
+#ifdef TCP_KEEPCNT
+ { CFG_LISTEN, "clitcpka-cnt", proxy_parse_tcpka_cnt },
+ { CFG_LISTEN, "srvtcpka-cnt", proxy_parse_tcpka_cnt },
+#endif
+#ifdef TCP_KEEPIDLE
+ { CFG_LISTEN, "clitcpka-idle", proxy_parse_tcpka_idle },
+ { CFG_LISTEN, "srvtcpka-idle", proxy_parse_tcpka_idle },
+#endif
+#ifdef TCP_KEEPINTVL
+ { CFG_LISTEN, "clitcpka-intvl", proxy_parse_tcpka_intvl },
+ { CFG_LISTEN, "srvtcpka-intvl", proxy_parse_tcpka_intvl },
+#endif
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/* Expects to find a frontend named <arg> and returns it, otherwise displays various
+ * adequate error messages and returns NULL. This function is designed to be used by
+ * functions requiring a frontend on the CLI.
+ */
+struct proxy *cli_find_frontend(struct appctx *appctx, const char *arg)
+{
+ struct proxy *px;
+
+ if (!*arg) {
+ cli_err(appctx, "A frontend name is expected.\n");
+ return NULL;
+ }
+
+ px = proxy_fe_by_name(arg);
+ if (!px) {
+ cli_err(appctx, "No such frontend.\n");
+ return NULL;
+ }
+ return px;
+}
+
+/* Expects to find a backend named <arg> and returns it, otherwise displays various
+ * adequate error messages and returns NULL. This function is designed to be used by
+ * functions requiring a frontend on the CLI.
+ */
+struct proxy *cli_find_backend(struct appctx *appctx, const char *arg)
+{
+ struct proxy *px;
+
+ if (!*arg) {
+ cli_err(appctx, "A backend name is expected.\n");
+ return NULL;
+ }
+
+ px = proxy_be_by_name(arg);
+ if (!px) {
+ cli_err(appctx, "No such backend.\n");
+ return NULL;
+ }
+ return px;
+}
+
+
+/* parse a "show servers [state|conn]" CLI line, returns 0 if it wants to start
+ * the dump or 1 if it stops immediately. If an argument is specified, it will
+ * reserve a show_srv_ctx context and set the proxy pointer into ->px, its ID
+ * into ->only_pxid, and ->show_conn to 0 for "state", or 1 for "conn".
+ */
+static int cli_parse_show_servers(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_srv_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct proxy *px;
+
+ ctx->show_conn = *args[2] == 'c'; // "conn" vs "state"
+
+ /* check if a backend name has been provided */
+ if (*args[3]) {
+ /* read server state from local file */
+ px = proxy_be_by_name(args[3]);
+
+ if (!px)
+ return cli_err(appctx, "Can't find backend.\n");
+
+ ctx->px = px;
+ ctx->only_pxid = px->uuid;
+ }
+ return 0;
+}
+
+/* helper to dump server addr */
+static void dump_server_addr(const struct sockaddr_storage *addr, char *addr_str)
+{
+ addr_str[0] = '\0';
+ switch (addr->ss_family) {
+ case AF_INET:
+ case AF_INET6:
+ addr_to_str(addr, addr_str, INET6_ADDRSTRLEN + 1);
+ break;
+ default:
+ memcpy(addr_str, "-\0", 2);
+ break;
+ }
+}
+
+/* dumps server state information for all the servers found in backend cli.p0.
+ * These information are all the parameters which may change during HAProxy runtime.
+ * By default, we only export to the last known server state file format. These
+ * information can be used at next startup to recover same level of server
+ * state. It takes its context from show_srv_ctx, with the proxy pointer from
+ * ->px, the proxy's id ->only_pxid, the server's pointer from ->sv, and the
+ * choice of what to dump from ->show_conn.
+ */
+static int dump_servers_state(struct stconn *sc)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_srv_ctx *ctx = appctx->svcctx;
+ struct proxy *px = ctx->px;
+ struct server *srv;
+ char srv_addr[INET6_ADDRSTRLEN + 1];
+ char srv_agent_addr[INET6_ADDRSTRLEN + 1];
+ char srv_check_addr[INET6_ADDRSTRLEN + 1];
+ time_t srv_time_since_last_change;
+ int bk_f_forced_id, srv_f_forced_id;
+ char *srvrecord;
+
+ if (!ctx->sv)
+ ctx->sv = px->srv;
+
+ for (; ctx->sv != NULL; ctx->sv = srv->next) {
+ srv = ctx->sv;
+
+ dump_server_addr(&srv->addr, srv_addr);
+ dump_server_addr(&srv->check.addr, srv_check_addr);
+ dump_server_addr(&srv->agent.addr, srv_agent_addr);
+
+ srv_time_since_last_change = now.tv_sec - srv->last_change;
+ bk_f_forced_id = px->options & PR_O_FORCED_ID ? 1 : 0;
+ srv_f_forced_id = srv->flags & SRV_F_FORCED_ID ? 1 : 0;
+
+ srvrecord = NULL;
+ if (srv->srvrq && srv->srvrq->name)
+ srvrecord = srv->srvrq->name;
+
+ if (ctx->show_conn == 0) {
+ /* show servers state */
+ chunk_printf(&trash,
+ "%d %s "
+ "%d %s %s "
+ "%d %d %d %d %ld "
+ "%d %d %d %d %d "
+ "%d %d %s %u "
+ "%s %d %d "
+ "%s %s %d"
+ "\n",
+ px->uuid, px->id,
+ srv->puid, srv->id, srv_addr,
+ srv->cur_state, srv->cur_admin, srv->uweight, srv->iweight, (long int)srv_time_since_last_change,
+ srv->check.status, srv->check.result, srv->check.health, srv->check.state & 0x0F, srv->agent.state & 0x1F,
+ bk_f_forced_id, srv_f_forced_id, srv->hostname ? srv->hostname : "-", srv->svc_port,
+ srvrecord ? srvrecord : "-", srv->use_ssl, srv->check.port,
+ srv_check_addr, srv_agent_addr, srv->agent.port);
+ } else {
+ /* show servers conn */
+ int thr;
+
+ chunk_printf(&trash,
+ "%s/%s %d/%d %s %u - %u %u %u %u %u %u %d %u",
+ px->id, srv->id, px->uuid, srv->puid, srv_addr,srv->svc_port,
+ srv->pool_purge_delay,
+ srv->curr_used_conns, srv->max_used_conns, srv->est_need_conns,
+ srv->curr_idle_nb, srv->curr_safe_nb, (int)srv->max_idle_conns, srv->curr_idle_conns);
+
+ for (thr = 0; thr < global.nbthread && srv->curr_idle_thr; thr++)
+ chunk_appendf(&trash, " %u", srv->curr_idle_thr[thr]);
+
+ chunk_appendf(&trash, "\n");
+ }
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/* Parses backend list or simply use backend name provided by the user to return
+ * states of servers to stdout. It takes its context from show_srv_ctx and dumps
+ * proxy ->px and stops if ->only_pxid is non-null.
+ */
+static int cli_io_handler_servers_state(struct appctx *appctx)
+{
+ struct show_srv_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct proxy *curproxy;
+
+ if (ctx->state == SHOW_SRV_HEAD) {
+ if (ctx->show_conn == 0)
+ chunk_printf(&trash, "%d\n# %s\n", SRV_STATE_FILE_VERSION, SRV_STATE_FILE_FIELD_NAMES);
+ else
+ chunk_printf(&trash,
+ "# bkname/svname bkid/svid addr port - purge_delay used_cur used_max need_est unsafe_nb safe_nb idle_lim idle_cur idle_per_thr[%d]\n",
+ global.nbthread);
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ ctx->state = SHOW_SRV_LIST;
+
+ if (!ctx->px)
+ ctx->px = proxies_list;
+ }
+
+ for (; ctx->px != NULL; ctx->px = curproxy->next) {
+ curproxy = ctx->px;
+ /* servers are only in backends */
+ if ((curproxy->cap & PR_CAP_BE) && !(curproxy->cap & PR_CAP_INT)) {
+ if (!dump_servers_state(sc))
+ return 0;
+ }
+ /* only the selected proxy is dumped */
+ if (ctx->only_pxid)
+ break;
+ }
+
+ return 1;
+}
+
+/* Parses backend list and simply report backend names. It keeps the proxy
+ * pointer in svcctx since there's nothing else to store there.
+ */
+static int cli_io_handler_show_backend(struct appctx *appctx)
+{
+ struct proxy *curproxy;
+
+ chunk_reset(&trash);
+
+ if (!appctx->svcctx) {
+ chunk_printf(&trash, "# name\n");
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ appctx->svcctx = proxies_list;
+ }
+
+ for (; appctx->svcctx != NULL; appctx->svcctx = curproxy->next) {
+ curproxy = appctx->svcctx;
+
+ /* looking for non-internal backends only */
+ if ((curproxy->cap & (PR_CAP_BE|PR_CAP_INT)) != PR_CAP_BE)
+ continue;
+
+ chunk_appendf(&trash, "%s\n", curproxy->id);
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Parses the "enable dynamic-cookies backend" directive, it always returns 1.
+ *
+ * Grabs the proxy lock and each server's lock.
+ */
+static int cli_parse_enable_dyncookie_backend(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+ struct server *s;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ px = cli_find_backend(appctx, args[3]);
+ if (!px)
+ return 1;
+
+ /* Note: this lock is to make sure this doesn't change while another
+ * thread is in srv_set_dyncookie().
+ */
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &px->lock);
+ px->ck_opts |= PR_CK_DYNAMIC;
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &px->lock);
+
+ for (s = px->srv; s != NULL; s = s->next) {
+ HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
+ srv_set_dyncookie(s);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+ }
+
+ return 1;
+}
+
+/* Parses the "disable dynamic-cookies backend" directive, it always returns 1.
+ *
+ * Grabs the proxy lock and each server's lock.
+ */
+static int cli_parse_disable_dyncookie_backend(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+ struct server *s;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ px = cli_find_backend(appctx, args[3]);
+ if (!px)
+ return 1;
+
+ /* Note: this lock is to make sure this doesn't change while another
+ * thread is in srv_set_dyncookie().
+ */
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &px->lock);
+ px->ck_opts &= ~PR_CK_DYNAMIC;
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &px->lock);
+
+ for (s = px->srv; s != NULL; s = s->next) {
+ HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
+ if (!(s->flags & SRV_F_COOKIESET))
+ ha_free(&s->cookie);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+ }
+
+ return 1;
+}
+
+/* Parses the "set dynamic-cookie-key backend" directive, it always returns 1.
+ *
+ * Grabs the proxy lock and each server's lock.
+ */
+static int cli_parse_set_dyncookie_key_backend(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+ struct server *s;
+ char *newkey;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ px = cli_find_backend(appctx, args[3]);
+ if (!px)
+ return 1;
+
+ if (!*args[4])
+ return cli_err(appctx, "String value expected.\n");
+
+ newkey = strdup(args[4]);
+ if (!newkey)
+ return cli_err(appctx, "Failed to allocate memory.\n");
+
+ /* Note: this lock is to make sure this doesn't change while another
+ * thread is in srv_set_dyncookie().
+ */
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &px->lock);
+ free(px->dyncookie_key);
+ px->dyncookie_key = newkey;
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &px->lock);
+
+ for (s = px->srv; s != NULL; s = s->next) {
+ HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
+ srv_set_dyncookie(s);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+ }
+
+ return 1;
+}
+
+/* Parses the "set maxconn frontend" directive, it always returns 1.
+ *
+ * Grabs the proxy lock.
+ */
+static int cli_parse_set_maxconn_frontend(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+ struct listener *l;
+ int v;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ px = cli_find_frontend(appctx, args[3]);
+ if (!px)
+ return 1;
+
+ if (!*args[4])
+ return cli_err(appctx, "Integer value expected.\n");
+
+ v = atoi(args[4]);
+ if (v < 0)
+ return cli_err(appctx, "Value out of range.\n");
+
+ /* OK, the value is fine, so we assign it to the proxy and to all of
+ * its listeners. The blocked ones will be dequeued.
+ */
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &px->lock);
+
+ px->maxconn = v;
+ list_for_each_entry(l, &px->conf.listeners, by_fe) {
+ if (l->state == LI_FULL)
+ resume_listener(l, 1);
+ }
+
+ if (px->maxconn > px->feconn)
+ dequeue_proxy_listeners(px);
+
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &px->lock);
+
+ return 1;
+}
+
+/* Parses the "shutdown frontend" directive, it always returns 1.
+ *
+ * Grabs the proxy lock.
+ */
+static int cli_parse_shutdown_frontend(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ px = cli_find_frontend(appctx, args[2]);
+ if (!px)
+ return 1;
+
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ return cli_msg(appctx, LOG_NOTICE, "Frontend was already shut down.\n");
+
+ stop_proxy(px);
+ return 1;
+}
+
+/* Parses the "disable frontend" directive, it always returns 1.
+ *
+ * Grabs the proxy lock.
+ */
+static int cli_parse_disable_frontend(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+ int ret;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ px = cli_find_frontend(appctx, args[2]);
+ if (!px)
+ return 1;
+
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ return cli_msg(appctx, LOG_NOTICE, "Frontend was previously shut down, cannot disable.\n");
+
+ if (!px->li_ready)
+ return cli_msg(appctx, LOG_NOTICE, "All sockets are already disabled.\n");
+
+ /* pause_proxy will take PROXY_LOCK */
+ ret = pause_proxy(px);
+
+ if (!ret)
+ return cli_err(appctx, "Failed to pause frontend, check logs for precise cause.\n");
+
+ return 1;
+}
+
+/* Parses the "enable frontend" directive, it always returns 1.
+ *
+ * Grabs the proxy lock.
+ */
+static int cli_parse_enable_frontend(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+ int ret;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ px = cli_find_frontend(appctx, args[2]);
+ if (!px)
+ return 1;
+
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ return cli_err(appctx, "Frontend was previously shut down, cannot enable.\n");
+
+ if (px->li_ready == px->li_all)
+ return cli_msg(appctx, LOG_NOTICE, "All sockets are already enabled.\n");
+
+ /* resume_proxy will take PROXY_LOCK */
+ ret = resume_proxy(px);
+
+ if (!ret)
+ return cli_err(appctx, "Failed to resume frontend, check logs for precise cause (port conflict?).\n");
+ return 1;
+}
+
+/* appctx context used during "show errors" */
+struct show_errors_ctx {
+ struct proxy *px; /* current proxy being dumped, NULL = not started yet. */
+ unsigned int flag; /* bit0: buffer being dumped, 0 = req, 1 = resp ; bit1=skip req ; bit2=skip resp. */
+ unsigned int ev_id; /* event ID of error being dumped */
+ int iid; /* if >= 0, ID of the proxy to filter on */
+ int ptr; /* <0: headers, >=0 : text pointer to restart from */
+ int bol; /* pointer to beginning of current line */
+};
+
+/* "show errors" handler for the CLI. Returns 0 if wants to continue, 1 to stop
+ * now.
+ */
+static int cli_parse_show_errors(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_errors_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ if (*args[2]) {
+ struct proxy *px;
+
+ px = proxy_find_by_name(args[2], 0, 0);
+ if (px)
+ ctx->iid = px->uuid;
+ else
+ ctx->iid = atoi(args[2]);
+
+ if (!ctx->iid)
+ return cli_err(appctx, "No such proxy.\n");
+ }
+ else
+ ctx->iid = -1; // dump all proxies
+
+ ctx->flag = 0;
+ if (strcmp(args[3], "request") == 0)
+ ctx->flag |= 4; // ignore response
+ else if (strcmp(args[3], "response") == 0)
+ ctx->flag |= 2; // ignore request
+ ctx->px = NULL;
+ return 0;
+}
+
+/* This function dumps all captured errors onto the stream connector's
+ * read buffer. It returns 0 if the output buffer is full and it needs
+ * to be called again, otherwise non-zero.
+ */
+static int cli_io_handler_show_errors(struct appctx *appctx)
+{
+ struct show_errors_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ extern const char *monthname[12];
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ return 1;
+
+ chunk_reset(&trash);
+
+ if (!ctx->px) {
+ /* the function had not been called yet, let's prepare the
+ * buffer for a response.
+ */
+ struct tm tm;
+
+ get_localtime(date.tv_sec, &tm);
+ chunk_appendf(&trash, "Total events captured on [%02d/%s/%04d:%02d:%02d:%02d.%03d] : %u\n",
+ tm.tm_mday, monthname[tm.tm_mon], tm.tm_year+1900,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, (int)(date.tv_usec/1000),
+ error_snapshot_id);
+
+ if (applet_putchk(appctx, &trash) == -1)
+ goto cant_send;
+
+ ctx->px = proxies_list;
+ ctx->bol = 0;
+ ctx->ptr = -1;
+ }
+
+ /* we have two inner loops here, one for the proxy, the other one for
+ * the buffer.
+ */
+ while (ctx->px) {
+ struct error_snapshot *es;
+
+ HA_RWLOCK_RDLOCK(PROXY_LOCK, &ctx->px->lock);
+
+ if ((ctx->flag & 1) == 0) {
+ es = ctx->px->invalid_req;
+ if (ctx->flag & 2) // skip req
+ goto next;
+ }
+ else {
+ es = ctx->px->invalid_rep;
+ if (ctx->flag & 4) // skip resp
+ goto next;
+ }
+
+ if (!es)
+ goto next;
+
+ if (ctx->iid >= 0 &&
+ ctx->px->uuid != ctx->iid &&
+ (!es->oe || es->oe->uuid != ctx->iid))
+ goto next;
+
+ if (ctx->ptr < 0) {
+ /* just print headers now */
+
+ char pn[INET6_ADDRSTRLEN];
+ struct tm tm;
+ int port;
+
+ get_localtime(es->when.tv_sec, &tm);
+ chunk_appendf(&trash, " \n[%02d/%s/%04d:%02d:%02d:%02d.%03d]",
+ tm.tm_mday, monthname[tm.tm_mon], tm.tm_year+1900,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, (int)(es->when.tv_usec/1000));
+
+ switch (addr_to_str(&es->src, pn, sizeof(pn))) {
+ case AF_INET:
+ case AF_INET6:
+ port = get_host_port(&es->src);
+ break;
+ default:
+ port = 0;
+ }
+
+ switch (ctx->flag & 1) {
+ case 0:
+ chunk_appendf(&trash,
+ " frontend %s (#%d): invalid request\n"
+ " backend %s (#%d)",
+ ctx->px->id, ctx->px->uuid,
+ (es->oe && es->oe->cap & PR_CAP_BE) ? es->oe->id : "<NONE>",
+ (es->oe && es->oe->cap & PR_CAP_BE) ? es->oe->uuid : -1);
+ break;
+ case 1:
+ chunk_appendf(&trash,
+ " backend %s (#%d): invalid response\n"
+ " frontend %s (#%d)",
+ ctx->px->id, ctx->px->uuid,
+ es->oe ? es->oe->id : "<NONE>" , es->oe ? es->oe->uuid : -1);
+ break;
+ }
+
+ chunk_appendf(&trash,
+ ", server %s (#%d), event #%u, src %s:%d\n"
+ " buffer starts at %llu (including %u out), %u free,\n"
+ " len %u, wraps at %u, error at position %u\n",
+ es->srv ? es->srv->id : "<NONE>",
+ es->srv ? es->srv->puid : -1,
+ es->ev_id, pn, port,
+ es->buf_ofs, es->buf_out,
+ global.tune.bufsize - es->buf_out - es->buf_len,
+ es->buf_len, es->buf_wrap, es->buf_err);
+
+ if (es->show)
+ es->show(&trash, es);
+
+ chunk_appendf(&trash, " \n");
+
+ if (applet_putchk(appctx, &trash) == -1)
+ goto cant_send_unlock;
+
+ ctx->ptr = 0;
+ ctx->ev_id = es->ev_id;
+ }
+
+ if (ctx->ev_id != es->ev_id) {
+ /* the snapshot changed while we were dumping it */
+ chunk_appendf(&trash,
+ " WARNING! update detected on this snapshot, dump interrupted. Please re-check!\n");
+ if (applet_putchk(appctx, &trash) == -1)
+ goto cant_send_unlock;
+
+ goto next;
+ }
+
+ /* OK, ptr >= 0, so we have to dump the current line */
+ while (ctx->ptr < es->buf_len && ctx->ptr < global.tune.bufsize) {
+ int newptr;
+ int newline;
+
+ newline = ctx->bol;
+ newptr = dump_text_line(&trash, es->buf, global.tune.bufsize, es->buf_len, &newline, ctx->ptr);
+ if (newptr == ctx->ptr)
+ goto cant_send_unlock;
+
+ if (applet_putchk(appctx, &trash) == -1)
+ goto cant_send_unlock;
+
+ ctx->ptr = newptr;
+ ctx->bol = newline;
+ };
+ next:
+ HA_RWLOCK_RDUNLOCK(PROXY_LOCK, &ctx->px->lock);
+ ctx->bol = 0;
+ ctx->ptr = -1;
+ ctx->flag ^= 1;
+ if (!(ctx->flag & 1))
+ ctx->px = ctx->px->next;
+ }
+
+ /* dump complete */
+ return 1;
+
+ cant_send_unlock:
+ HA_RWLOCK_RDUNLOCK(PROXY_LOCK, &ctx->px->lock);
+ cant_send:
+ sc_need_room(sc);
+ return 0;
+}
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "disable", "frontend", NULL }, "disable frontend <frontend> : temporarily disable specific frontend", cli_parse_disable_frontend, NULL, NULL },
+ { { "enable", "frontend", NULL }, "enable frontend <frontend> : re-enable specific frontend", cli_parse_enable_frontend, NULL, NULL },
+ { { "set", "maxconn", "frontend", NULL }, "set maxconn frontend <frontend> <value> : change a frontend's maxconn setting", cli_parse_set_maxconn_frontend, NULL },
+ { { "show","servers", "conn", NULL }, "show servers conn [<backend>] : dump server connections status (all or for a single backend)", cli_parse_show_servers, cli_io_handler_servers_state },
+ { { "show","servers", "state", NULL }, "show servers state [<backend>] : dump volatile server information (all or for a single backend)", cli_parse_show_servers, cli_io_handler_servers_state },
+ { { "show", "backend", NULL }, "show backend : list backends in the current running config", NULL, cli_io_handler_show_backend },
+ { { "shutdown", "frontend", NULL }, "shutdown frontend <frontend> : stop a specific frontend", cli_parse_shutdown_frontend, NULL, NULL },
+ { { "set", "dynamic-cookie-key", "backend", NULL }, "set dynamic-cookie-key backend <bk> <k> : change a backend secret key for dynamic cookies", cli_parse_set_dyncookie_key_backend, NULL },
+ { { "enable", "dynamic-cookie", "backend", NULL }, "enable dynamic-cookie backend <bk> : enable dynamic cookies on a specific backend", cli_parse_enable_dyncookie_backend, NULL },
+ { { "disable", "dynamic-cookie", "backend", NULL }, "disable dynamic-cookie backend <bk> : disable dynamic cookies on a specific backend", cli_parse_disable_dyncookie_backend, NULL },
+ { { "show", "errors", NULL }, "show errors [<px>] [request|response] : report last request and/or response errors for each proxy", cli_parse_show_errors, cli_io_handler_show_errors, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/qmux_http.c b/src/qmux_http.c
new file mode 100644
index 0000000..6eedf0c
--- /dev/null
+++ b/src/qmux_http.c
@@ -0,0 +1,131 @@
+#include <haproxy/qmux_http.h>
+
+#include <haproxy/api-t.h>
+#include <haproxy/htx.h>
+#include <haproxy/qmux_trace.h>
+
+/* QUIC MUX rcv_buf operation using HTX data. Received data from stream <qcs>
+ * will be transferred as HTX in <buf>. Output buffer is expected to be of
+ * length <count>. <fin> will be set to signal the last data to receive on this
+ * stream.
+ *
+ * Return the size in bytes of transferred data.
+ */
+size_t qcs_http_rcv_buf(struct qcs *qcs, struct buffer *buf, size_t count,
+ char *fin)
+{
+ struct htx *qcs_htx = NULL;
+ struct htx *cs_htx = NULL;
+ size_t ret = 0;
+
+ TRACE_ENTER(QMUX_EV_STRM_RECV, qcs->qcc->conn, qcs);
+
+ *fin = 0;
+ qcs_htx = htx_from_buf(&qcs->rx.app_buf);
+ if (htx_is_empty(qcs_htx)) {
+ /* Set buffer data to 0 as HTX is empty. */
+ htx_to_buf(qcs_htx, &qcs->rx.app_buf);
+ goto end;
+ }
+
+ ret = qcs_htx->data;
+
+ cs_htx = htx_from_buf(buf);
+ if (htx_is_empty(cs_htx) && htx_used_space(qcs_htx) <= count) {
+ /* EOM will be copied to cs_htx via b_xfer(). */
+ if (qcs_htx->flags & HTX_FL_EOM)
+ *fin = 1;
+
+ htx_to_buf(cs_htx, buf);
+ htx_to_buf(qcs_htx, &qcs->rx.app_buf);
+ b_xfer(buf, &qcs->rx.app_buf, b_data(&qcs->rx.app_buf));
+ goto end;
+ }
+
+ htx_xfer_blks(cs_htx, qcs_htx, count, HTX_BLK_UNUSED);
+ BUG_ON(qcs_htx->flags & HTX_FL_PARSING_ERROR);
+
+ /* Copy EOM from src to dst buffer if all data copied. */
+ if (htx_is_empty(qcs_htx) && (qcs_htx->flags & HTX_FL_EOM)) {
+ cs_htx->flags |= HTX_FL_EOM;
+ *fin = 1;
+ }
+
+ cs_htx->extra = qcs_htx->extra ? (qcs_htx->data + qcs_htx->extra) : 0;
+ htx_to_buf(cs_htx, buf);
+ htx_to_buf(qcs_htx, &qcs->rx.app_buf);
+ ret -= qcs_htx->data;
+
+ end:
+ TRACE_LEAVE(QMUX_EV_STRM_RECV, qcs->qcc->conn, qcs);
+
+ return ret;
+}
+
+/* QUIC MUX snd_buf operation using HTX data. HTX data will be transferred from
+ * <buf> to <qcs> stream buffer. Input buffer is expected to be of length
+ * <count>. <fin> will be set to signal the last data to send for this stream.
+ *
+ * Return the size in bytes of transferred data.
+ */
+size_t qcs_http_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count,
+ char *fin)
+{
+ struct htx *htx;
+ size_t ret;
+
+ TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ htx = htx_from_buf(buf);
+
+ ret = qcs->qcc->app_ops->snd_buf(qcs, htx, count);
+ *fin = (htx->flags & HTX_FL_EOM) && htx_is_empty(htx);
+
+ htx_to_buf(htx, buf);
+
+ TRACE_LEAVE(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ return ret;
+}
+
+/* QUIC MUX snd_buf reset. HTX data stored in <buf> of length <count> will be
+ * cleared. This can be used when data should not be transmitted any longer.
+ *
+ * Return the size in bytes of cleared data.
+ */
+size_t qcs_http_reset_buf(struct qcs *qcs, struct buffer *buf, size_t count)
+{
+ struct htx *htx;
+
+ TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ htx = htx_from_buf(buf);
+ htx_reset(htx);
+ htx_to_buf(htx, buf);
+
+ TRACE_LEAVE(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ return count;
+}
+
+/* Utility function which can be used by app layer an empty STREAM frame is
+ * received with FIN bit set for <qcs> stream. It will ensure that HTX EOM is
+ * properly inserted in <qcs> app_buf.
+ */
+void qcs_http_handle_standalone_fin(struct qcs *qcs)
+{
+ struct buffer *appbuf;
+ struct htx *htx = NULL;
+
+ appbuf = qc_get_buf(qcs, &qcs->rx.app_buf);
+ BUG_ON(!appbuf);
+
+ htx = htx_from_buf(appbuf);
+ if (htx_is_empty(htx)) {
+ if (!htx_add_endof(htx, HTX_BLK_EOT)) {
+ ABORT_NOW(); /* cannot happen for empty HTX message. */
+ }
+ }
+ htx->flags |= HTX_FL_EOM;
+ htx_to_buf(htx, appbuf);
+}
diff --git a/src/qmux_trace.c b/src/qmux_trace.c
new file mode 100644
index 0000000..5f2e262
--- /dev/null
+++ b/src/qmux_trace.c
@@ -0,0 +1,108 @@
+#include <haproxy/qmux_trace.h>
+
+#include <import/ist.h>
+#include <haproxy/api.h>
+#include <haproxy/connection.h>
+#include <haproxy/chunk.h>
+#include <haproxy/mux_quic.h>
+#include <haproxy/quic_frame-t.h>
+
+/* trace source and events */
+static void qmux_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+static const struct name_desc qmux_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the connection */ },
+ /* arg2 */ { .name="qcs", .desc="QUIC stream" },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc qmux_trace_decoding[] = {
+#define QMUX_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define QMUX_VERB_MINIMAL 2
+ { .name="minimal", .desc="report only qcc/qcs state and flags, no real decoding" },
+ { /* end */ }
+};
+
+struct trace_source trace_qmux = {
+ .name = IST("qmux"),
+ .desc = "QUIC multiplexer",
+ .arg_def = TRC_ARG1_CONN, /* TRACE()'s first argument is always a connection */
+ .default_cb = qmux_trace,
+ .known_events = qmux_trace_events,
+ .lockon_args = qmux_trace_lockon_args,
+ .decoding = qmux_trace_decoding,
+ .report_events = ~0, /* report everything by default */
+};
+
+
+static void qmux_trace_frm(const struct quic_frame *frm)
+{
+ switch (frm->type) {
+ case QUIC_FT_MAX_STREAMS_BIDI:
+ chunk_appendf(&trace_buf, " max_streams=%llu",
+ (ullong)frm->max_streams_bidi.max_streams);
+ break;
+
+ case QUIC_FT_MAX_STREAMS_UNI:
+ chunk_appendf(&trace_buf, " max_streams=%llu",
+ (ullong)frm->max_streams_uni.max_streams);
+ break;
+
+ default:
+ break;
+ }
+}
+
+/* quic-mux trace handler */
+static void qmux_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct connection *conn = a1;
+ const struct qcc *qcc = conn ? conn->ctx : NULL;
+ const struct qcs *qcs = a2;
+
+ if (!qcc)
+ return;
+
+ if (src->verbosity > QMUX_VERB_CLEAN) {
+ chunk_appendf(&trace_buf, " : qcc=%p(F)", qcc);
+ if (qcc->conn->handle.qc)
+ chunk_appendf(&trace_buf, " qc=%p", qcc->conn->handle.qc);
+
+ if (qcs)
+ chunk_appendf(&trace_buf, " qcs=%p .id=%llu .st=%s",
+ qcs, (ullong)qcs->id,
+ qcs_st_to_str(qcs->st));
+
+ if (mask & QMUX_EV_QCC_NQCS) {
+ const uint64_t *id = a3;
+ chunk_appendf(&trace_buf, " id=%llu", (ullong)*id);
+ }
+
+ if (mask & QMUX_EV_SEND_FRM)
+ qmux_trace_frm(a3);
+
+ if (mask & QMUX_EV_QCS_XFER_DATA) {
+ const struct qcs_xfer_data_trace_arg *arg = a3;
+ chunk_appendf(&trace_buf, " prep=%lu xfer=%d",
+ (ulong)arg->prep, arg->xfer);
+ }
+
+ if (mask & QMUX_EV_QCS_BUILD_STRM) {
+ const struct qcs_build_stream_trace_arg *arg = a3;
+ chunk_appendf(&trace_buf, " len=%lu fin=%d offset=%llu",
+ (ulong)arg->len, arg->fin, (ullong)arg->offset);
+ }
+ }
+}
+
+
+/* register qmux traces */
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
diff --git a/src/qpack-dec.c b/src/qpack-dec.c
new file mode 100644
index 0000000..a6e2923
--- /dev/null
+++ b/src/qpack-dec.c
@@ -0,0 +1,563 @@
+/*
+ * QPACK decompressor
+ *
+ * Copyright 2021 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <import/ist.h>
+#include <haproxy/buf.h>
+#include <haproxy/chunk.h>
+#include <haproxy/h3.h>
+#include <haproxy/mux_quic.h>
+#include <haproxy/qpack-t.h>
+#include <haproxy/qpack-dec.h>
+#include <haproxy/qpack-tbl.h>
+#include <haproxy/hpack-huff.h>
+#include <haproxy/hpack-tbl.h>
+#include <haproxy/http-hdr.h>
+#include <haproxy/tools.h>
+
+#if defined(DEBUG_QPACK)
+#define qpack_debug_printf fprintf
+#define qpack_debug_hexdump debug_hexdump
+#else
+#define qpack_debug_printf(...) do { } while (0)
+#define qpack_debug_hexdump(...) do { } while (0)
+#endif
+
+/* Encoded field line bitmask */
+#define QPACK_EFL_BITMASK 0xf0
+#define QPACK_LFL_WPBNM 0x00 // Literal field line with post-base name reference
+#define QPACK_IFL_WPBI 0x10 // Indexed field line with post-based index
+#define QPACK_LFL_WLN_BIT 0x20 // Literal field line with literal name
+#define QPACK_LFL_WNR_BIT 0x40 // Literal field line with name reference
+#define QPACK_IFL_BIT 0x80 // Indexed field line
+
+/* reads a varint from <raw>'s lowest <b> bits and <len> bytes max (raw included).
+ * returns the 64-bit value on success after updating buf and len_in. Forces
+ * len_in to (uint64_t)-1 on truncated input.
+ * Note that this function is similar to the one used for HPACK (except that is supports
+ * up to 62-bits integers).
+ */
+static uint64_t qpack_get_varint(const unsigned char **buf, uint64_t *len_in, int b)
+{
+ uint64_t ret = 0;
+ int len = *len_in;
+ const uint8_t *raw = *buf;
+ uint8_t shift = 0;
+
+ len--;
+ ret = *raw++ & ((1ULL << b) - 1);
+ if (ret != (uint64_t)((1ULL << b) - 1))
+ goto end;
+
+ while (len && (*raw & 128)) {
+ ret += ((uint64_t)*raw++ & 127) << shift;
+ shift += 7;
+ len--;
+ }
+
+ /* last 7 bits */
+ if (!len)
+ goto too_short;
+
+ len--;
+ ret += ((uint64_t)*raw++ & 127) << shift;
+
+ end:
+ *buf = raw;
+ *len_in = len;
+ return ret;
+
+ too_short:
+ *len_in = (uint64_t)-1;
+ return 0;
+}
+
+/* Decode an encoder stream.
+ *
+ * Returns 0 on success else non-zero.
+ */
+int qpack_decode_enc(struct buffer *buf, int fin, void *ctx)
+{
+ struct qcs *qcs = ctx;
+ size_t len;
+ unsigned char inst;
+
+ /* RFC 9204 4.2. Encoder and Decoder Streams
+ *
+ * The sender MUST NOT close either of these streams, and the receiver
+ * MUST NOT request that the sender close either of these streams.
+ * Closure of either unidirectional stream type MUST be treated as a
+ * connection error of type H3_CLOSED_CRITICAL_STREAM.
+ */
+ if (fin) {
+ qcc_emit_cc_app(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1);
+ return -1;
+ }
+
+ len = b_data(buf);
+ qpack_debug_hexdump(stderr, "[QPACK-DEC-ENC] ", b_head(buf), 0, len);
+
+ if (!len) {
+ qpack_debug_printf(stderr, "[QPACK-DEC-ENC] empty stream\n");
+ return 0;
+ }
+
+ inst = (unsigned char)*b_head(buf) & QPACK_ENC_INST_BITMASK;
+ if (inst == QPACK_ENC_INST_DUP) {
+ /* Duplicate */
+ }
+ else if (inst & QPACK_ENC_INST_IWNR_BIT) {
+ /* Insert With Name Reference */
+ }
+ else if (inst & QPACK_ENC_INST_IWLN_BIT) {
+ /* Insert with literal name */
+ }
+ else if (inst & QPACK_ENC_INST_SDTC_BIT) {
+ /* Set dynamic table capacity */
+ }
+
+ return 0;
+}
+
+/* Decode an decoder stream.
+ *
+ * Returns 0 on success else non-zero.
+ */
+int qpack_decode_dec(struct buffer *buf, int fin, void *ctx)
+{
+ struct qcs *qcs = ctx;
+ size_t len;
+ unsigned char inst;
+
+ /* RFC 9204 4.2. Encoder and Decoder Streams
+ *
+ * The sender MUST NOT close either of these streams, and the receiver
+ * MUST NOT request that the sender close either of these streams.
+ * Closure of either unidirectional stream type MUST be treated as a
+ * connection error of type H3_CLOSED_CRITICAL_STREAM.
+ */
+ if (fin) {
+ qcc_emit_cc_app(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1);
+ return -1;
+ }
+
+ len = b_data(buf);
+ qpack_debug_hexdump(stderr, "[QPACK-DEC-DEC] ", b_head(buf), 0, len);
+
+ if (!len) {
+ qpack_debug_printf(stderr, "[QPACK-DEC-DEC] empty stream\n");
+ return 0;
+ }
+
+ inst = (unsigned char)*b_head(buf) & QPACK_DEC_INST_BITMASK;
+ if (inst == QPACK_DEC_INST_ICINC) {
+ /* Insert count increment */
+ }
+ else if (inst & QPACK_DEC_INST_SACK) {
+ /* Section Acknowledgment */
+ }
+ else if (inst & QPACK_DEC_INST_SCCL) {
+ /* Stream cancellation */
+ }
+
+ return 0;
+}
+
+/* Decode a field section prefix made of <enc_ric> and <db> two varints.
+ * Also set the 'S' sign bit for <db>.
+ * Return a negative error if failed, 0 if not.
+ */
+static int qpack_decode_fs_pfx(uint64_t *enc_ric, uint64_t *db, int *sign_bit,
+ const unsigned char **raw, uint64_t *len)
+{
+ *enc_ric = qpack_get_varint(raw, len, 8);
+ if (*len == (uint64_t)-1)
+ return -QPACK_ERR_RIC;
+
+ *sign_bit = **raw & 0x8;
+ *db = qpack_get_varint(raw, len, 7);
+ if (*len == (uint64_t)-1)
+ return -QPACK_ERR_DB;
+
+ return 0;
+}
+
+/* Decode a field section from the <raw> buffer of <len> bytes. Each parsed
+ * header is inserted into <list> of <list_size> entries max and uses <tmp> as
+ * a storage for some elements pointing into it. An end marker is inserted at
+ * the end of the list with empty strings as name/value.
+ *
+ * Returns the number of headers inserted into list excluding the end marker.
+ * In case of error, a negative code QPACK_ERR_* is returned.
+ */
+int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp,
+ struct http_hdr *list, int list_size)
+{
+ struct ist name, value;
+ uint64_t enc_ric, db;
+ int s;
+ unsigned int efl_type;
+ int ret;
+ int hdr_idx = 0;
+
+ qpack_debug_hexdump(stderr, "[QPACK-DEC-FS] ", (const char *)raw, 0, len);
+
+ /* parse field section prefix */
+ ret = qpack_decode_fs_pfx(&enc_ric, &db, &s, &raw, &len);
+ if (ret < 0) {
+ qpack_debug_printf(stderr, "##ERR@%d(%d)\n", __LINE__, ret);
+ goto out;
+ }
+
+ chunk_reset(tmp);
+ qpack_debug_printf(stderr, "enc_ric: %llu db: %llu s=%d\n",
+ (unsigned long long)enc_ric, (unsigned long long)db, !!s);
+ /* Decode field lines */
+ while (len) {
+ if (hdr_idx >= list_size) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TOO_LARGE;
+ goto out;
+ }
+
+ /* parse field line representation */
+ efl_type = *raw & QPACK_EFL_BITMASK;
+ qpack_debug_printf(stderr, "efl_type=0x%02x\n", efl_type);
+
+ if (efl_type == QPACK_LFL_WPBNM) {
+ /* Literal field line with post-base name reference
+ * TODO adjust this when dynamic table support is implemented.
+ */
+#if 0
+ uint64_t index __maybe_unused, length;
+ unsigned int n __maybe_unused, h __maybe_unused;
+
+ qpack_debug_printf(stderr, "literal field line with post-base name reference:");
+ n = *raw & 0x08;
+ index = qpack_get_varint(&raw, &len, 3);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " n=%d index=%llu", !!n, (unsigned long long)index);
+ h = *raw & 0x80;
+ length = qpack_get_varint(&raw, &len, 7);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " h=%d length=%llu", !!h, (unsigned long long)length);
+
+ if (len < length) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ raw += length;
+ len -= length;
+#endif
+
+ /* RFC9204 2.2.3 Invalid References
+ *
+ * If the decoder encounters a reference in a field line representation
+ * to a dynamic table entry that has already been evicted or that has an
+ * absolute index greater than or equal to the declared Required Insert
+ * Count (Section 4.5.1), it MUST treat this as a connection error of
+ * type QPACK_DECOMPRESSION_FAILED.
+ */
+ return -QPACK_DECOMPRESSION_FAILED;
+ }
+ else if (efl_type == QPACK_IFL_WPBI) {
+ /* Indexed field line with post-base index
+ * TODO adjust this when dynamic table support is implemented.
+ */
+#if 0
+ uint64_t index __maybe_unused;
+
+ qpack_debug_printf(stderr, "indexed field line with post-base index:");
+ index = qpack_get_varint(&raw, &len, 4);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " index=%llu", (unsigned long long)index);
+#endif
+
+ /* RFC9204 2.2.3 Invalid References
+ *
+ * If the decoder encounters a reference in a field line representation
+ * to a dynamic table entry that has already been evicted or that has an
+ * absolute index greater than or equal to the declared Required Insert
+ * Count (Section 4.5.1), it MUST treat this as a connection error of
+ * type QPACK_DECOMPRESSION_FAILED.
+ */
+ return -QPACK_DECOMPRESSION_FAILED;
+ }
+ else if (efl_type & QPACK_IFL_BIT) {
+ /* Indexed field line */
+ uint64_t index;
+ unsigned int static_tbl;
+
+ qpack_debug_printf(stderr, "indexed field line:");
+ static_tbl = efl_type & 0x40;
+ index = qpack_get_varint(&raw, &len, 6);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ if (static_tbl && index < QPACK_SHT_SIZE) {
+ name = qpack_sht[index].n;
+ value = qpack_sht[index].v;
+ }
+ else {
+ /* RFC9204 2.2.3 Invalid References
+ *
+ * If the decoder encounters a reference in a field line representation
+ * to a dynamic table entry that has already been evicted or that has an
+ * absolute index greater than or equal to the declared Required Insert
+ * Count (Section 4.5.1), it MUST treat this as a connection error of
+ * type QPACK_DECOMPRESSION_FAILED.
+ *
+ * TODO adjust this when dynamic table support is implemented.
+ */
+ return -QPACK_DECOMPRESSION_FAILED;
+ }
+
+ qpack_debug_printf(stderr, " t=%d index=%llu", !!static_tbl, (unsigned long long)index);
+ }
+ else if (efl_type & QPACK_LFL_WNR_BIT) {
+ /* Literal field line with name reference */
+ uint64_t index, length;
+ unsigned int static_tbl, n __maybe_unused, h;
+
+ qpack_debug_printf(stderr, "Literal field line with name reference:");
+ n = efl_type & 0x20;
+ static_tbl = efl_type & 0x10;
+ index = qpack_get_varint(&raw, &len, 4);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ if (static_tbl && index < QPACK_SHT_SIZE) {
+ name = qpack_sht[index].n;
+ }
+ else {
+ /* RFC9204 2.2.3 Invalid References
+ *
+ * If the decoder encounters a reference in a field line representation
+ * to a dynamic table entry that has already been evicted or that has an
+ * absolute index greater than or equal to the declared Required Insert
+ * Count (Section 4.5.1), it MUST treat this as a connection error of
+ * type QPACK_DECOMPRESSION_FAILED.
+ *
+ * TODO adjust this when dynamic table support is implemented.
+ */
+ return -QPACK_DECOMPRESSION_FAILED;
+ }
+
+ qpack_debug_printf(stderr, " n=%d t=%d index=%llu", !!n, !!static_tbl, (unsigned long long)index);
+ h = *raw & 0x80;
+ length = qpack_get_varint(&raw, &len, 7);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " h=%d length=%llu", !!h, (unsigned long long)length);
+ if (h) {
+ char *trash;
+ int nlen;
+
+ trash = chunk_newstr(tmp);
+ if (!trash) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_DECOMPRESSION_FAILED;
+ goto out;
+ }
+ nlen = huff_dec(raw, length, trash, tmp->size - tmp->data);
+ if (nlen == (uint32_t)-1) {
+ qpack_debug_printf(stderr, " can't decode huffman.\n");
+ ret = -QPACK_ERR_HUFFMAN;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " [name huff %d->%d '%s']", (int)length, (int)nlen, trash);
+ /* makes an ist from tmp storage */
+ b_add(tmp, nlen);
+ value = ist2(trash, nlen);
+ }
+ else {
+ value = ist2(raw, length);
+ }
+
+ if (len < length) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ raw += length;
+ len -= length;
+ }
+ else if (efl_type & QPACK_LFL_WLN_BIT) {
+ /* Literal field line with literal name */
+ unsigned int n __maybe_unused, hname, hvalue;
+ uint64_t name_len, value_len;
+
+ qpack_debug_printf(stderr, "Literal field line with literal name:");
+ n = *raw & 0x10;
+ hname = *raw & 0x08;
+ name_len = qpack_get_varint(&raw, &len, 3);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " n=%d hname=%d name_len=%llu", !!n, !!hname, (unsigned long long)name_len);
+ /* Name string */
+
+ if (len < name_len) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ if (hname) {
+ char *trash;
+ int nlen;
+
+ trash = chunk_newstr(tmp);
+ if (!trash) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_DECOMPRESSION_FAILED;
+ goto out;
+ }
+ nlen = huff_dec(raw, name_len, trash, tmp->size - tmp->data);
+ if (nlen == (uint32_t)-1) {
+ qpack_debug_printf(stderr, " can't decode huffman.\n");
+ ret = -QPACK_ERR_HUFFMAN;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " [name huff %d->%d '%s']", (int)name_len, (int)nlen, trash);
+ /* makes an ist from tmp storage */
+ b_add(tmp, nlen);
+ name = ist2(trash, nlen);
+ }
+ else {
+ name = ist2(raw, name_len);
+ }
+
+ raw += name_len;
+ len -= name_len;
+
+ hvalue = *raw & 0x80;
+ value_len = qpack_get_varint(&raw, &len, 7);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " hvalue=%d value_len=%llu", !!hvalue, (unsigned long long)value_len);
+
+ if (len < value_len) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ if (hvalue) {
+ char *trash;
+ int nlen;
+
+ trash = chunk_newstr(tmp);
+ if (!trash) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_DECOMPRESSION_FAILED;
+ goto out;
+ }
+ nlen = huff_dec(raw, value_len, trash, tmp->size - tmp->data);
+ if (nlen == (uint32_t)-1) {
+ qpack_debug_printf(stderr, " can't decode huffman.\n");
+ ret = -QPACK_ERR_HUFFMAN;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " [name huff %d->%d '%s']", (int)value_len, (int)nlen, trash);
+ /* makes an ist from tmp storage */
+ b_add(tmp, nlen);
+ value = ist2(trash, nlen);
+ }
+ else {
+ value = ist2(raw, value_len);
+ }
+
+ raw += value_len;
+ len -= value_len;
+ }
+
+ /* We must not accept empty header names (forbidden by the spec and used
+ * as a list termination).
+ */
+ if (!name.len) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_DECOMPRESSION_FAILED;
+ goto out;
+ }
+
+ list[hdr_idx].n = name;
+ list[hdr_idx].v = value;
+ ++hdr_idx;
+
+ qpack_debug_printf(stderr, "\n");
+ }
+
+ if (hdr_idx >= list_size) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TOO_LARGE;
+ goto out;
+ }
+
+ /* put an end marker */
+ list[hdr_idx].n = list[hdr_idx].v = IST_NULL;
+ ret = hdr_idx;
+
+ out:
+ qpack_debug_printf(stderr, "-- done: ret=%d\n", ret);
+ return ret;
+}
diff --git a/src/qpack-enc.c b/src/qpack-enc.c
new file mode 100644
index 0000000..59bb97f
--- /dev/null
+++ b/src/qpack-enc.c
@@ -0,0 +1,185 @@
+#include <haproxy/qpack-enc.h>
+
+#include <haproxy/buf.h>
+#include <haproxy/intops.h>
+
+/* Returns the byte size required to encode <i> as a <prefix_size>-prefix
+ * integer.
+ */
+static size_t qpack_get_prefix_int_size(int i, int prefix_size)
+{
+ int n = (1 << prefix_size) - 1;
+ if (i < n) {
+ return 1;
+ }
+ else {
+ size_t result = 0;
+ while (i) {
+ ++result;
+ i >>= 7;
+ }
+ return 1 + result;
+ }
+}
+
+/* Encode the integer <i> in the buffer <out> in a <prefix_size>-bit prefix
+ * integer. The caller must ensure there is enough size in the buffer. The
+ * prefix is OR-ed with <before_prefix> byte.
+ *
+ * Returns 0 if success else non-zero.
+ */
+static int qpack_encode_prefix_integer(struct buffer *out, int i,
+ int prefix_size,
+ unsigned char before_prefix)
+{
+ const int mod = (1 << prefix_size) - 1;
+ BUG_ON_HOT(!prefix_size);
+
+ if (i < mod) {
+ if (b_room(out) < 1)
+ return 1;
+
+ b_putchr(out, before_prefix | i);
+ }
+ else {
+ int to_encode = i - mod;
+ const size_t sz = to_encode / mod;
+
+ if (b_room(out) < sz)
+ return 1;
+
+ b_putchr(out, before_prefix | mod);
+ while (1) {
+ if (to_encode > 0x7f) {
+ b_putchr(out, 0x80 | (to_encode & 0x7f));
+ to_encode >>= 7;
+ }
+ else {
+ b_putchr(out, to_encode & 0x7f);
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/* Returns 0 on success else non-zero. */
+int qpack_encode_int_status(struct buffer *out, unsigned int status)
+{
+ int status_size, idx = 0;
+
+ if (status < 100 || status > 599)
+ return 1;
+
+ switch (status) {
+ case 103: idx = 24; break;
+ case 200: idx = 25; break;
+ case 304: idx = 26; break;
+ case 404: idx = 27; break;
+ case 503: idx = 28; break;
+ case 100: idx = 63; break;
+ case 204: idx = 64; break;
+ case 206: idx = 65; break;
+ case 302: idx = 66; break;
+ case 400: idx = 67; break;
+ case 403: idx = 68; break;
+ case 421: idx = 69; break;
+ case 425: idx = 70; break;
+ case 500: idx = 71; break;
+
+ /* status code not in QPACK static table, idx is null. */
+ default: break;
+ }
+
+ if (idx) {
+ /* status code present in QPACK static table
+ * -> indexed field line
+ */
+ status_size = qpack_get_prefix_int_size(idx, 6);
+ if (b_room(out) < status_size)
+ return 1;
+
+ qpack_encode_prefix_integer(out, idx, 6, 0xc0);
+ }
+ else {
+ /* status code not present in QPACK static table
+ * -> literal field line with name reference
+ */
+ char a, b, c;
+ a = '0' + status / 100;
+ status -= (status / 100 * 100);
+ b = '0' + status / 10;
+ status -= (status / 10 * 10);
+ c = '0' + status;
+
+ /* field name */
+ if (qpack_encode_prefix_integer(out, 24, 4, 0x50))
+ return 1;
+
+ /* field value length */
+ if (qpack_encode_prefix_integer(out, 3, 7, 0x00))
+ return 1;
+
+ if (b_room(out) < 3)
+ return 1;
+
+ b_putchr(out, a);
+ b_putchr(out, b);
+ b_putchr(out, c);
+ }
+
+ return 0;
+}
+
+/* Returns 0 on success else non-zero. */
+int qpack_encode_field_section_line(struct buffer *out)
+{
+ char qpack_field_section[] = {
+ '\x00', /* required insert count */
+ '\x00', /* S + delta base */
+ };
+
+ if (b_room(out) < 2)
+ return 1;
+
+ b_putblk(out, qpack_field_section, 2);
+
+ return 0;
+}
+
+#define QPACK_LFL_WLN_BIT 0x20 // Literal field line with literal name
+
+/* Encode a header in literal field line with literal name.
+ * Returns 0 on success else non-zero.
+ */
+int qpack_encode_header(struct buffer *out, const struct ist n, const struct ist v)
+{
+ int i;
+ size_t sz = qpack_get_prefix_int_size(n.len, 3) + n.len +
+ qpack_get_prefix_int_size(v.len, 7) + v.len;
+
+ if (sz > b_room(out))
+ return 1;
+
+ /* literal field line with literal name
+ * | 0 | 0 | 1 | N | H | . | . | . |
+ * N :(allow an intermediary to add the header in a dynamic table)
+ * H: huffman encoded
+ * name len
+ */
+ qpack_encode_prefix_integer(out, n.len, 3, QPACK_LFL_WLN_BIT);
+ /* name */
+ for (i = 0; i < n.len; ++i)
+ b_putchr(out, n.ptr[i]);
+
+ /* | 0 | . | . | . | . | . | . | . |
+ * value len
+ */
+ qpack_encode_prefix_integer(out, v.len, 7, 0x00);
+ /* value */
+ for (i = 0; i < v.len; ++i)
+ b_putchr(out, v.ptr[i]);
+
+ return 0;
+}
diff --git a/src/qpack-tbl.c b/src/qpack-tbl.c
new file mode 100644
index 0000000..6713f6d
--- /dev/null
+++ b/src/qpack-tbl.c
@@ -0,0 +1,415 @@
+/*
+ * QPACK header table management (draft-ietf-quic-qpack-20)
+ *
+ * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#include <import/ist.h>
+#include <haproxy/http-hdr-t.h>
+#include <haproxy/qpack-tbl.h>
+
+/* static header table as in draft-ietf-quic-qpack-20 Appendix A. */
+const struct http_hdr qpack_sht[QPACK_SHT_SIZE] = {
+ [ 0] = { .n = IST(":authority"), .v = IST("") },
+ [ 1] = { .n = IST(":path"), .v = IST("/") },
+ [ 2] = { .n = IST("age"), .v = IST("0") },
+ [ 3] = { .n = IST("content-disposition"), .v = IST("") },
+ [ 4] = { .n = IST("content-length"), .v = IST("0") },
+ [ 5] = { .n = IST("cookie"), .v = IST("") },
+ [ 6] = { .n = IST("date"), .v = IST("") },
+ [ 7] = { .n = IST("etag"), .v = IST("") },
+ [ 8] = { .n = IST("if-modified-since"), .v = IST("") },
+ [ 9] = { .n = IST("if-none-match"), .v = IST("") },
+ [10] = { .n = IST("last-modified"), .v = IST("") },
+ [11] = { .n = IST("link"), .v = IST("") },
+ [12] = { .n = IST("location"), .v = IST("") },
+ [13] = { .n = IST("referer"), .v = IST("") },
+ [14] = { .n = IST("set-cookie"), .v = IST("") },
+ [15] = { .n = IST(":method"), .v = IST("CONNECT") },
+ [16] = { .n = IST(":method"), .v = IST("DELETE") },
+ [17] = { .n = IST(":method"), .v = IST("GET") },
+ [18] = { .n = IST(":method"), .v = IST("HEAD") },
+ [19] = { .n = IST(":method"), .v = IST("OPTIONS") },
+ [20] = { .n = IST(":method"), .v = IST("POST") },
+ [21] = { .n = IST(":method"), .v = IST("PUT") },
+ [22] = { .n = IST(":scheme"), .v = IST("http") },
+ [23] = { .n = IST(":scheme"), .v = IST("https") },
+ [24] = { .n = IST(":status"), .v = IST("103") },
+ [25] = { .n = IST(":status"), .v = IST("200") },
+ [26] = { .n = IST(":status"), .v = IST("304") },
+ [27] = { .n = IST(":status"), .v = IST("404") },
+ [28] = { .n = IST(":status"), .v = IST("503") },
+ [29] = { .n = IST("accept"), .v = IST("*/*") },
+ [30] = { .n = IST("accept"), .v = IST("application/dns-message") },
+ [31] = { .n = IST("accept-encoding"), .v = IST("gzip, deflate, br") },
+ [32] = { .n = IST("accept-ranges"), .v = IST("bytes") },
+ [33] = { .n = IST("access-control-allow-headers"), .v = IST("cache-control") },
+ [34] = { .n = IST("access-control-allow-headers"), .v = IST("content-type") },
+ [35] = { .n = IST("access-control-allow-origin"), .v = IST("*") },
+ [36] = { .n = IST("cache-control"), .v = IST("max-age=0") },
+ [37] = { .n = IST("cache-control"), .v = IST("max-age=2592000") },
+ [38] = { .n = IST("cache-control"), .v = IST("max-age=604800") },
+ [39] = { .n = IST("cache-control"), .v = IST("no-cache") },
+ [40] = { .n = IST("cache-control"), .v = IST("no-store") },
+ [41] = { .n = IST("cache-control"), .v = IST("public, max-age=31536000") },
+ [42] = { .n = IST("content-encoding"), .v = IST("br") },
+ [43] = { .n = IST("content-encoding"), .v = IST("gzip") },
+ [44] = { .n = IST("content-type"), .v = IST("application/dns-message") },
+ [45] = { .n = IST("content-type"), .v = IST("application/javascript") },
+ [46] = { .n = IST("content-type"), .v = IST("application/json") },
+ [47] = { .n = IST("content-type"), .v = IST("application/"
+ "x-www-form-urlencoded") },
+ [48] = { .n = IST("content-type"), .v = IST("image/gif") },
+ [49] = { .n = IST("content-type"), .v = IST("image/jpeg") },
+ [50] = { .n = IST("content-type"), .v = IST("image/png") },
+ [51] = { .n = IST("content-type"), .v = IST("text/css") },
+ [52] = { .n = IST("content-type"), .v = IST("text/html;"
+ " charset=utf-8") },
+ [53] = { .n = IST("content-type"), .v = IST("text/plain") },
+ [54] = { .n = IST("content-type"), .v = IST("text/plain;"
+ "charset=utf-8") },
+ [55] = { .n = IST("range"), .v = IST("bytes=0-") },
+ [56] = { .n = IST("strict-transport-security"), .v = IST("max-age=31536000") },
+ [57] = { .n = IST("strict-transport-security"), .v = IST("max-age=31536000;"
+ " includesubdomains") },
+ [58] = { .n = IST("strict-transport-security"), .v = IST("max-age=31536000;"
+ " includesubdomains;"
+ " preload") },
+ [59] = { .n = IST("vary"), .v = IST("accept-encoding") },
+ [60] = { .n = IST("vary"), .v = IST("origin") },
+ [61] = { .n = IST("x-content-type-options"), .v = IST("nosniff") },
+ [62] = { .n = IST("x-xss-protection"), .v = IST("1; mode=block") },
+ [63] = { .n = IST(":status"), .v = IST("100") },
+ [64] = { .n = IST(":status"), .v = IST("204") },
+ [65] = { .n = IST(":status"), .v = IST("206") },
+ [66] = { .n = IST(":status"), .v = IST("302") },
+ [67] = { .n = IST(":status"), .v = IST("400") },
+ [68] = { .n = IST(":status"), .v = IST("403") },
+ [69] = { .n = IST(":status"), .v = IST("421") },
+ [70] = { .n = IST(":status"), .v = IST("425") },
+ [71] = { .n = IST(":status"), .v = IST("500") },
+ [72] = { .n = IST("accept-language"), .v = IST("") },
+ [73] = { .n = IST("access-control-allow-credentials"), .v = IST("FALSE") },
+ [74] = { .n = IST("access-control-allow-credentials"), .v = IST("TRUE") },
+ [75] = { .n = IST("access-control-allow-headers"), .v = IST("*") },
+ [76] = { .n = IST("access-control-allow-methods"), .v = IST("get") },
+ [77] = { .n = IST("access-control-allow-methods"), .v = IST("get, post, options") },
+ [78] = { .n = IST("access-control-allow-methods"), .v = IST("options") },
+ [79] = { .n = IST("access-control-expose-headers"), .v = IST("content-length") },
+ [80] = { .n = IST("access-control-request-headers"), .v = IST("content-type") },
+ [81] = { .n = IST("access-control-request-method"), .v = IST("get") },
+ [82] = { .n = IST("access-control-request-method"), .v = IST("post") },
+ [83] = { .n = IST("alt-svc"), .v = IST("clear") },
+ [84] = { .n = IST("authorization"), .v = IST("") },
+ [85] = { .n = IST("content-security-policy"), .v = IST("script-src 'none';"
+ " object-src 'none';"
+ " base-uri 'none'") },
+ [86] = { .n = IST("early-data"), .v = IST("1") },
+ [87] = { .n = IST("expect-ct"), .v = IST("") },
+ [88] = { .n = IST("forwarded"), .v = IST("") },
+ [89] = { .n = IST("if-range"), .v = IST("") },
+ [90] = { .n = IST("origin"), .v = IST("") },
+ [91] = { .n = IST("purpose"), .v = IST("prefetch") },
+ [92] = { .n = IST("server"), .v = IST("") },
+ [93] = { .n = IST("timing-allow-origin"), .v = IST("*") },
+ [94] = { .n = IST("upgrade-insecure-requests"), .v = IST("1") },
+ [95] = { .n = IST("user-agent"), .v = IST("") },
+ [96] = { .n = IST("x-forwarded-for"), .v = IST("") },
+ [97] = { .n = IST("x-frame-options"), .v = IST("deny") },
+ [98] = { .n = IST("x-frame-options"), .v = IST("sameorigin") },
+};
+
+struct pool_head *pool_head_qpack_tbl = NULL;
+
+#ifdef DEBUG_HPACK
+/* dump the whole dynamic header table */
+void qpack_dht_dump(FILE *out, const struct qpack_dht *dht)
+{
+ unsigned int i;
+ unsigned int slot;
+ char name[4096], value[4096];
+
+ for (i = HPACK_SHT_SIZE; i < HPACK_SHT_SIZE + dht->used; i++) {
+ slot = (qpack_get_dte(dht, i - HPACK_SHT_SIZE + 1) - dht->dte);
+ fprintf(out, "idx=%d slot=%u name=<%s> value=<%s> addr=%u-%u\n",
+ i, slot,
+ istpad(name, qpack_idx_to_name(dht, i)).ptr,
+ istpad(value, qpack_idx_to_value(dht, i)).ptr,
+ dht->dte[slot].addr, dht->dte[slot].addr+dht->dte[slot].nlen+dht->dte[slot].vlen-1);
+ }
+}
+
+/* check for the whole dynamic header table consistency, abort on failures */
+void qpack_dht_check_consistency(const struct qpack_dht *dht)
+{
+ unsigned slot = qpack_dht_get_tail(dht);
+ unsigned used2 = dht->used;
+ unsigned total = 0;
+
+ if (!dht->used)
+ return;
+
+ if (dht->front >= dht->wrap)
+ abort();
+
+ if (dht->used > dht->wrap)
+ abort();
+
+ if (dht->head >= dht->wrap)
+ abort();
+
+ while (used2--) {
+ total += dht->dte[slot].nlen + dht->dte[slot].vlen;
+ slot++;
+ if (slot >= dht->wrap)
+ slot = 0;
+ }
+
+ if (total != dht->total) {
+ fprintf(stderr, "%d: total=%u dht=%u\n", __LINE__, total, dht->total);
+ abort();
+ }
+}
+#endif // DEBUG_HPACK
+
+/* rebuild a new dynamic header table from <dht> with an unwrapped index and
+ * contents at the end. The new table is returned, the caller must not use the
+ * previous one anymore. NULL may be returned if no table could be allocated.
+ */
+static struct qpack_dht *qpack_dht_defrag(struct qpack_dht *dht)
+{
+ struct qpack_dht *alt_dht;
+ uint16_t old, new;
+ uint32_t addr;
+
+ /* Note: for small tables we could use alloca() instead but
+ * portability especially for large tables can be problematic.
+ */
+ alt_dht = qpack_dht_alloc();
+ if (!alt_dht)
+ return NULL;
+
+ alt_dht->total = dht->total;
+ alt_dht->used = dht->used;
+ alt_dht->wrap = dht->used;
+
+ new = 0;
+ addr = alt_dht->size;
+
+ if (dht->used) {
+ /* start from the tail */
+ old = qpack_dht_get_tail(dht);
+ do {
+ alt_dht->dte[new].nlen = dht->dte[old].nlen;
+ alt_dht->dte[new].vlen = dht->dte[old].vlen;
+ addr -= dht->dte[old].nlen + dht->dte[old].vlen;
+ alt_dht->dte[new].addr = addr;
+
+ memcpy((void *)alt_dht + alt_dht->dte[new].addr,
+ (void *)dht + dht->dte[old].addr,
+ dht->dte[old].nlen + dht->dte[old].vlen);
+
+ old++;
+ if (old >= dht->wrap)
+ old = 0;
+ new++;
+ } while (new < dht->used);
+ }
+
+ alt_dht->front = alt_dht->head = new - 1;
+
+ memcpy(dht, alt_dht, dht->size);
+ qpack_dht_free(alt_dht);
+
+ return dht;
+}
+
+/* Purges table dht until a header field of <needed> bytes fits according to
+ * the protocol (adding 32 bytes overhead). Returns non-zero on success, zero
+ * on failure (ie: table empty but still not sufficient). It must only be
+ * called when the table is not large enough to suit the new entry and there
+ * are some entries left. In case of doubt, use dht_make_room() instead.
+ */
+int __qpack_dht_make_room(struct qpack_dht *dht, unsigned int needed)
+{
+ unsigned int used = dht->used;
+ unsigned int wrap = dht->wrap;
+ unsigned int tail;
+
+ do {
+ tail = ((dht->head + 1U < used) ? wrap : 0) + dht->head + 1U - used;
+ dht->total -= dht->dte[tail].nlen + dht->dte[tail].vlen;
+ if (tail == dht->front)
+ dht->front = dht->head;
+ used--;
+ } while (used && used * 32 + dht->total + needed + 32 > dht->size);
+
+ dht->used = used;
+
+ /* realign if empty */
+ if (!used)
+ dht->front = dht->head = 0;
+
+ /* pack the table if it doesn't wrap anymore */
+ if (dht->head + 1U >= used)
+ dht->wrap = dht->head + 1;
+
+ /* no need to check for 'used' here as if it doesn't fit, used==0 */
+ return needed + 32 <= dht->size;
+}
+
+/* tries to insert a new header <name>:<value> in front of the current head. A
+ * negative value is returned on error.
+ */
+int qpack_dht_insert(struct qpack_dht *dht, struct ist name, struct ist value)
+{
+ unsigned int used;
+ unsigned int head;
+ unsigned int prev;
+ unsigned int wrap;
+ unsigned int tail;
+ uint32_t headroom, tailroom;
+
+ if (!qpack_dht_make_room(dht, name.len + value.len))
+ return 0;
+
+ /* Now there is enough room in the table, that's guaranteed by the
+ * protocol, but not necessarily where we need it.
+ */
+
+ used = dht->used;
+ if (!used) {
+ /* easy, the table was empty */
+ dht->front = dht->head = 0;
+ dht->wrap = dht->used = 1;
+ dht->total = 0;
+ head = 0;
+ dht->dte[head].addr = dht->size - (name.len + value.len);
+ goto copy;
+ }
+
+ /* compute the new head, used and wrap position */
+ prev = head = dht->head;
+ wrap = dht->wrap;
+ tail = qpack_dht_get_tail(dht);
+
+ used++;
+ head++;
+
+ if (head >= wrap) {
+ /* head is leading the entries, we either need to push the
+ * table further or to loop back to released entries. We could
+ * force to loop back when at least half of the allocatable
+ * entries are free but in practice it never happens.
+ */
+ if ((sizeof(*dht) + (wrap + 1) * sizeof(dht->dte[0]) <= dht->dte[dht->front].addr))
+ wrap++;
+ else if (head >= used) /* there's a hole at the beginning */
+ head = 0;
+ else {
+ /* no more room, head hits tail and the index cannot be
+ * extended, we have to realign the whole table.
+ */
+ if (!qpack_dht_defrag(dht))
+ return -1;
+
+ wrap = dht->wrap + 1;
+ head = dht->head + 1;
+ prev = head - 1;
+ tail = 0;
+ }
+ }
+ else if (used >= wrap) {
+ /* we've hit the tail, we need to reorganize the index so that
+ * the head is at the end (but not necessarily move the data).
+ */
+ if (!qpack_dht_defrag(dht))
+ return -1;
+
+ wrap = dht->wrap + 1;
+ head = dht->head + 1;
+ prev = head - 1;
+ tail = 0;
+ }
+
+ /* Now we have updated head, used and wrap, we know that there is some
+ * available room at least from the protocol's perspective. This space
+ * is split in two areas :
+ *
+ * 1: if the previous head was the front cell, the space between the
+ * end of the index table and the front cell's address.
+ * 2: if the previous head was the front cell, the space between the
+ * end of the tail and the end of the table ; or if the previous
+ * head was not the front cell, the space between the end of the
+ * tail and the head's address.
+ */
+ if (prev == dht->front) {
+ /* the area was contiguous */
+ headroom = dht->dte[dht->front].addr - (sizeof(*dht) + wrap * sizeof(dht->dte[0]));
+ tailroom = dht->size - dht->dte[tail].addr - dht->dte[tail].nlen - dht->dte[tail].vlen;
+ }
+ else {
+ /* it's already wrapped so we can't store anything in the headroom */
+ headroom = 0;
+ tailroom = dht->dte[prev].addr - dht->dte[tail].addr - dht->dte[tail].nlen - dht->dte[tail].vlen;
+ }
+
+ /* We can decide to stop filling the headroom as soon as there's enough
+ * room left in the tail to suit the protocol, but tests show that in
+ * practice it almost never happens in other situations so the extra
+ * test is useless and we simply fill the headroom as long as it's
+ * available and we don't wrap.
+ */
+ if (prev == dht->front && headroom >= name.len + value.len) {
+ /* install upfront and update ->front */
+ dht->dte[head].addr = dht->dte[dht->front].addr - (name.len + value.len);
+ dht->front = head;
+ }
+ else if (tailroom >= name.len + value.len) {
+ dht->dte[head].addr = dht->dte[tail].addr + dht->dte[tail].nlen + dht->dte[tail].vlen + tailroom - (name.len + value.len);
+ }
+ else {
+ /* need to defragment the table before inserting upfront */
+ dht = qpack_dht_defrag(dht);
+ wrap = dht->wrap + 1;
+ head = dht->head + 1;
+ dht->dte[head].addr = dht->dte[dht->front].addr - (name.len + value.len);
+ dht->front = head;
+ }
+
+ dht->wrap = wrap;
+ dht->head = head;
+ dht->used = used;
+
+ copy:
+ dht->total += name.len + value.len;
+ dht->dte[head].nlen = name.len;
+ dht->dte[head].vlen = value.len;
+
+ memcpy((void *)dht + dht->dte[head].addr, name.ptr, name.len);
+ memcpy((void *)dht + dht->dte[head].addr + name.len, value.ptr, value.len);
+ return 0;
+}
diff --git a/src/queue.c b/src/queue.c
new file mode 100644
index 0000000..ebe3b75
--- /dev/null
+++ b/src/queue.c
@@ -0,0 +1,761 @@
+/*
+ * Queue management functions.
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* Short explanation on the locking, which is far from being trivial : a
+ * pendconn is a list element which necessarily is associated with an existing
+ * stream. It has pendconn->strm always valid. A pendconn may only be in one of
+ * these three states :
+ * - unlinked : in this case it is an empty list head ;
+ * - linked into the server's queue ;
+ * - linked into the proxy's queue.
+ *
+ * A stream does not necessarily have such a pendconn. Thus the pendconn is
+ * designated by the stream->pend_pos pointer. This results in some properties :
+ * - pendconn->strm->pend_pos is never NULL for any valid pendconn
+ * - if p->node.node.leaf_p is NULL, the element is unlinked,
+ * otherwise it necessarily belongs to one of the other lists ; this may
+ * not be atomically checked under threads though ;
+ * - pendconn->px is never NULL if pendconn->list is not empty
+ * - pendconn->srv is never NULL if pendconn->list is in the server's queue,
+ * and is always NULL if pendconn->list is in the backend's queue or empty.
+ * - pendconn->target is NULL while the element is queued, and points to the
+ * assigned server when the pendconn is picked.
+ *
+ * Threads complicate the design a little bit but rules remain simple :
+ * - the server's queue lock must be held at least when manipulating the
+ * server's queue, which is when adding a pendconn to the queue and when
+ * removing a pendconn from the queue. It protects the queue's integrity.
+ *
+ * - the proxy's queue lock must be held at least when manipulating the
+ * proxy's queue, which is when adding a pendconn to the queue and when
+ * removing a pendconn from the queue. It protects the queue's integrity.
+ *
+ * - both locks are compatible and may be held at the same time.
+ *
+ * - a pendconn_add() is only performed by the stream which will own the
+ * pendconn ; the pendconn is allocated at this moment and returned ; it is
+ * added to either the server or the proxy's queue while holding this
+s * queue's lock.
+ *
+ * - the pendconn is then met by a thread walking over the proxy or server's
+ * queue with the respective lock held. This lock is exclusive and the
+ * pendconn can only appear in one queue so by definition a single thread
+ * may find this pendconn at a time.
+ *
+ * - the pendconn is unlinked either by its own stream upon success/abort/
+ * free, or by another one offering it its server slot. This is achieved by
+ * pendconn_process_next_strm() under either the server or proxy's lock,
+ * pendconn_redistribute() under the server's lock, pendconn_grab_from_px()
+ * under the proxy's lock, or pendconn_unlink() under either the proxy's or
+ * the server's lock depending on the queue the pendconn is attached to.
+ *
+ * - no single operation except the pendconn initialisation prior to the
+ * insertion are performed without eithre a queue lock held or the element
+ * being unlinked and visible exclusively to its stream.
+ *
+ * - pendconn_grab_from_px() and pendconn_process_next_strm() assign ->target
+ * so that the stream knows what server to work with (via
+ * pendconn_dequeue() which sets it on strm->target).
+ *
+ * - a pendconn doesn't switch between queues, it stays where it is.
+ */
+
+#include <import/eb32tree.h>
+#include <haproxy/api.h>
+#include <haproxy/backend.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/pool.h>
+#include <haproxy/queue.h>
+#include <haproxy/sample.h>
+#include <haproxy/server-t.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/thread.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+
+
+#define NOW_OFFSET_BOUNDARY() ((now_ms - (TIMER_LOOK_BACK >> 12)) & 0xfffff)
+#define KEY_CLASS(key) ((u32)key & 0xfff00000)
+#define KEY_OFFSET(key) ((u32)key & 0x000fffff)
+#define KEY_CLASS_OFFSET_BOUNDARY(key) (KEY_CLASS(key) | NOW_OFFSET_BOUNDARY())
+#define MAKE_KEY(class, offset) (((u32)(class + 0x7ff) << 20) | ((u32)(now_ms + offset) & 0xfffff))
+
+DECLARE_POOL(pool_head_pendconn, "pendconn", sizeof(struct pendconn));
+
+/* returns the effective dynamic maxconn for a server, considering the minconn
+ * and the proxy's usage relative to its dynamic connections limit. It is
+ * expected that 0 < s->minconn <= s->maxconn when this is called. If the
+ * server is currently warming up, the slowstart is also applied to the
+ * resulting value, which can be lower than minconn in this case, but never
+ * less than 1.
+ */
+unsigned int srv_dynamic_maxconn(const struct server *s)
+{
+ unsigned int max;
+
+ if (s->proxy->beconn >= s->proxy->fullconn)
+ /* no fullconn or proxy is full */
+ max = s->maxconn;
+ else if (s->minconn == s->maxconn)
+ /* static limit */
+ max = s->maxconn;
+ else max = MAX(s->minconn,
+ s->proxy->beconn * s->maxconn / s->proxy->fullconn);
+
+ if ((s->cur_state == SRV_ST_STARTING) &&
+ now.tv_sec < s->last_change + s->slowstart &&
+ now.tv_sec >= s->last_change) {
+ unsigned int ratio;
+ ratio = 100 * (now.tv_sec - s->last_change) / s->slowstart;
+ max = MAX(1, max * ratio / 100);
+ }
+ return max;
+}
+
+/* Remove the pendconn from the server's queue. At this stage, the connection
+ * is not really dequeued. It will be done during the process_stream. It is
+ * up to the caller to atomically decrement the pending counts.
+ *
+ * The caller must own the lock on the server queue. The pendconn must still be
+ * queued (p->node.leaf_p != NULL) and must be in a server (p->srv != NULL).
+ */
+static void __pendconn_unlink_srv(struct pendconn *p)
+{
+ p->strm->logs.srv_queue_pos += _HA_ATOMIC_LOAD(&p->queue->idx) - p->queue_idx;
+ eb32_delete(&p->node);
+}
+
+/* Remove the pendconn from the proxy's queue. At this stage, the connection
+ * is not really dequeued. It will be done during the process_stream. It is
+ * up to the caller to atomically decrement the pending counts.
+ *
+ * The caller must own the lock on the proxy queue. The pendconn must still be
+ * queued (p->node.leaf_p != NULL) and must be in the proxy (p->srv == NULL).
+ */
+static void __pendconn_unlink_prx(struct pendconn *p)
+{
+ p->strm->logs.prx_queue_pos += _HA_ATOMIC_LOAD(&p->queue->idx) - p->queue_idx;
+ eb32_delete(&p->node);
+}
+
+/* Locks the queue the pendconn element belongs to. This relies on both p->px
+ * and p->srv to be properly initialized (which is always the case once the
+ * element has been added).
+ */
+static inline void pendconn_queue_lock(struct pendconn *p)
+{
+ HA_SPIN_LOCK(QUEUE_LOCK, &p->queue->lock);
+}
+
+/* Unlocks the queue the pendconn element belongs to. This relies on both p->px
+ * and p->srv to be properly initialized (which is always the case once the
+ * element has been added).
+ */
+static inline void pendconn_queue_unlock(struct pendconn *p)
+{
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &p->queue->lock);
+}
+
+/* Removes the pendconn from the server/proxy queue. At this stage, the
+ * connection is not really dequeued. It will be done during process_stream().
+ * This function takes all the required locks for the operation. The pendconn
+ * must be valid, though it doesn't matter if it was already unlinked. Prefer
+ * pendconn_cond_unlink() to first check <p>. It also forces a serialization
+ * on p->del_lock to make sure another thread currently waking it up finishes
+ * first.
+ */
+void pendconn_unlink(struct pendconn *p)
+{
+ struct queue *q = p->queue;
+ struct proxy *px = q->px;
+ struct server *sv = q->sv;
+ uint oldidx;
+ int done = 0;
+
+ oldidx = _HA_ATOMIC_LOAD(&p->queue->idx);
+ HA_SPIN_LOCK(QUEUE_LOCK, &q->lock);
+ HA_SPIN_LOCK(QUEUE_LOCK, &p->del_lock);
+
+ if (p->node.node.leaf_p) {
+ eb32_delete(&p->node);
+ done = 1;
+ }
+
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &p->del_lock);
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &q->lock);
+
+ if (done) {
+ oldidx -= p->queue_idx;
+ if (sv)
+ p->strm->logs.srv_queue_pos += oldidx;
+ else
+ p->strm->logs.prx_queue_pos += oldidx;
+
+ _HA_ATOMIC_DEC(&q->length);
+ _HA_ATOMIC_DEC(&px->totpend);
+ }
+}
+
+/* Retrieve the first pendconn from tree <pendconns>. Classes are always
+ * considered first, then the time offset. The time does wrap, so the
+ * lookup is performed twice, one to retrieve the first class and a second
+ * time to retrieve the earliest time in this class.
+ */
+static struct pendconn *pendconn_first(struct eb_root *pendconns)
+{
+ struct eb32_node *node, *node2 = NULL;
+ u32 key;
+
+ node = eb32_first(pendconns);
+ if (!node)
+ return NULL;
+
+ key = KEY_CLASS_OFFSET_BOUNDARY(node->key);
+ node2 = eb32_lookup_ge(pendconns, key);
+
+ if (!node2 ||
+ KEY_CLASS(node2->key) != KEY_CLASS(node->key)) {
+ /* no other key in the tree, or in this class */
+ return eb32_entry(node, struct pendconn, node);
+ }
+
+ /* found a better key */
+ return eb32_entry(node2, struct pendconn, node);
+}
+
+/* Process the next pending connection from either a server or a proxy, and
+ * returns a strictly positive value on success (see below). If no pending
+ * connection is found, 0 is returned. Note that neither <srv> nor <px> may be
+ * NULL. Priority is given to the oldest request in the queue if both <srv> and
+ * <px> have pending requests. This ensures that no request will be left
+ * unserved. The <px> queue is not considered if the server (or a tracked
+ * server) is not RUNNING, is disabled, or has a null weight (server going
+ * down). The <srv> queue is still considered in this case, because if some
+ * connections remain there, it means that some requests have been forced there
+ * after it was seen down (eg: due to option persist). The stream is
+ * immediately marked as "assigned", and both its <srv> and <srv_conn> are set
+ * to <srv>.
+ *
+ * The proxy's queue will be consulted only if px_ok is non-zero.
+ *
+ * This function must only be called if the server queue is locked _AND_ the
+ * proxy queue is not. Today it is only called by process_srv_queue.
+ * When a pending connection is dequeued, this function returns 1 if a pendconn
+ * is dequeued, otherwise 0.
+ */
+static int pendconn_process_next_strm(struct server *srv, struct proxy *px, int px_ok)
+{
+ struct pendconn *p = NULL;
+ struct pendconn *pp = NULL;
+ u32 pkey, ppkey;
+
+ p = NULL;
+ if (srv->queue.length)
+ p = pendconn_first(&srv->queue.head);
+
+ pp = NULL;
+ if (px_ok && px->queue.length) {
+ /* the lock only remains held as long as the pp is
+ * in the proxy's queue.
+ */
+ HA_SPIN_LOCK(QUEUE_LOCK, &px->queue.lock);
+ pp = pendconn_first(&px->queue.head);
+ if (!pp)
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
+ }
+
+ if (!p && !pp)
+ return 0;
+ else if (!pp)
+ goto use_p; /* p != NULL */
+ else if (!p)
+ goto use_pp; /* pp != NULL */
+
+ /* p != NULL && pp != NULL*/
+
+ if (KEY_CLASS(p->node.key) < KEY_CLASS(pp->node.key))
+ goto use_p;
+
+ if (KEY_CLASS(pp->node.key) < KEY_CLASS(p->node.key))
+ goto use_pp;
+
+ pkey = KEY_OFFSET(p->node.key);
+ ppkey = KEY_OFFSET(pp->node.key);
+
+ if (pkey < NOW_OFFSET_BOUNDARY())
+ pkey += 0x100000; // key in the future
+
+ if (ppkey < NOW_OFFSET_BOUNDARY())
+ ppkey += 0x100000; // key in the future
+
+ if (pkey <= ppkey)
+ goto use_p;
+
+ use_pp:
+ /* we'd like to release the proxy lock ASAP to let other threads
+ * work with other servers. But for this we must first hold the
+ * pendconn alive to prevent a removal from its owning stream.
+ */
+ HA_SPIN_LOCK(QUEUE_LOCK, &pp->del_lock);
+
+ /* now the element won't go, we can release the proxy */
+ __pendconn_unlink_prx(pp);
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
+
+ pp->strm_flags |= SF_ASSIGNED;
+ pp->target = srv;
+ stream_add_srv_conn(pp->strm, srv);
+
+ /* we must wake the task up before releasing the lock as it's the only
+ * way to make sure the task still exists. The pendconn cannot vanish
+ * under us since the task will need to take the lock anyway and to wait
+ * if it wakes up on a different thread.
+ */
+ task_instant_wakeup(pp->strm->task, TASK_WOKEN_RES);
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &pp->del_lock);
+
+ _HA_ATOMIC_DEC(&px->queue.length);
+ _HA_ATOMIC_INC(&px->queue.idx);
+ return 1;
+
+ use_p:
+ /* we don't need the px queue lock anymore, we have the server's lock */
+ if (pp)
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
+
+ p->strm_flags |= SF_ASSIGNED;
+ p->target = srv;
+ stream_add_srv_conn(p->strm, srv);
+
+ /* we must wake the task up before releasing the lock as it's the only
+ * way to make sure the task still exists. The pendconn cannot vanish
+ * under us since the task will need to take the lock anyway and to wait
+ * if it wakes up on a different thread.
+ */
+ task_instant_wakeup(p->strm->task, TASK_WOKEN_RES);
+ __pendconn_unlink_srv(p);
+
+ _HA_ATOMIC_DEC(&srv->queue.length);
+ _HA_ATOMIC_INC(&srv->queue.idx);
+ return 1;
+}
+
+/* Manages a server's connection queue. This function will try to dequeue as
+ * many pending streams as possible, and wake them up.
+ */
+void process_srv_queue(struct server *s)
+{
+ struct server *ref = s->track ? s->track : s;
+ struct proxy *p = s->proxy;
+ int maxconn;
+ int stop = 0;
+ int done = 0;
+ int px_ok;
+
+ /* if a server is not usable or backup and must not be used
+ * to dequeue backend requests.
+ */
+ px_ok = srv_currently_usable(ref) &&
+ (!(s->flags & SRV_F_BACKUP) ||
+ (!p->srv_act &&
+ (s == p->lbprm.fbck || (p->options & PR_O_USE_ALL_BK))));
+
+ /* let's repeat that under the lock on each round. Threads competing
+ * for the same server will give up, knowing that at least one of
+ * them will check the conditions again before quitting. In order
+ * to avoid the deadly situation where one thread spends its time
+ * dequeueing for others, we limit the number of rounds it does.
+ * However we still re-enter the loop for one pass if there's no
+ * more served, otherwise we could end up with no other thread
+ * trying to dequeue them.
+ */
+ while (!stop && (done < global.tune.maxpollevents || !s->served) &&
+ s->served < (maxconn = srv_dynamic_maxconn(s))) {
+ if (HA_SPIN_TRYLOCK(QUEUE_LOCK, &s->queue.lock) != 0)
+ break;
+
+ while (s->served < maxconn) {
+ stop = !pendconn_process_next_strm(s, p, px_ok);
+ if (stop)
+ break;
+ _HA_ATOMIC_INC(&s->served);
+ done++;
+ if (done >= global.tune.maxpollevents)
+ break;
+ }
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &s->queue.lock);
+ }
+
+ if (done) {
+ _HA_ATOMIC_SUB(&p->totpend, done);
+ _HA_ATOMIC_ADD(&p->served, done);
+ __ha_barrier_atomic_store();
+ if (p->lbprm.server_take_conn)
+ p->lbprm.server_take_conn(s);
+ }
+}
+
+/* Adds the stream <strm> to the pending connection queue of server <strm>->srv
+ * or to the one of <strm>->proxy if srv is NULL. All counters and back pointers
+ * are updated accordingly. Returns NULL if no memory is available, otherwise the
+ * pendconn itself. If the stream was already marked as served, its flag is
+ * cleared. It is illegal to call this function with a non-NULL strm->srv_conn.
+ * The stream's queue position is counted with an offset of -1 because we want
+ * to make sure that being at the first position in the queue reports 1.
+ *
+ * The queue is sorted by the composition of the priority_class, and the current
+ * timestamp offset by strm->priority_offset. The timestamp is in milliseconds
+ * and truncated to 20 bits, so will wrap every 17m28s575ms.
+ * The offset can be positive or negative, and an offset of 0 puts it in the
+ * middle of this range (~ 8 min). Note that this also means if the adjusted
+ * timestamp wraps around, the request will be misinterpreted as being of
+ * the highest priority for that priority class.
+ *
+ * This function must be called by the stream itself, so in the context of
+ * process_stream.
+ */
+struct pendconn *pendconn_add(struct stream *strm)
+{
+ struct pendconn *p;
+ struct proxy *px;
+ struct server *srv;
+ struct queue *q;
+ unsigned int *max_ptr;
+ unsigned int old_max, new_max;
+
+ p = pool_alloc(pool_head_pendconn);
+ if (!p)
+ return NULL;
+
+ p->target = NULL;
+ p->node.key = MAKE_KEY(strm->priority_class, strm->priority_offset);
+ p->strm = strm;
+ p->strm_flags = strm->flags;
+ HA_SPIN_INIT(&p->del_lock);
+ strm->pend_pos = p;
+
+ px = strm->be;
+ if (strm->flags & SF_ASSIGNED)
+ srv = objt_server(strm->target);
+ else
+ srv = NULL;
+
+ if (srv) {
+ q = &srv->queue;
+ max_ptr = &srv->counters.nbpend_max;
+ }
+ else {
+ q = &px->queue;
+ max_ptr = &px->be_counters.nbpend_max;
+ }
+
+ p->queue = q;
+ p->queue_idx = _HA_ATOMIC_LOAD(&q->idx) - 1; // for logging only
+ new_max = _HA_ATOMIC_ADD_FETCH(&q->length, 1);
+ old_max = _HA_ATOMIC_LOAD(max_ptr);
+ while (new_max > old_max) {
+ if (likely(_HA_ATOMIC_CAS(max_ptr, &old_max, new_max)))
+ break;
+ }
+ __ha_barrier_atomic_store();
+
+ HA_SPIN_LOCK(QUEUE_LOCK, &q->lock);
+ eb32_insert(&q->head, &p->node);
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &q->lock);
+
+ _HA_ATOMIC_INC(&px->totpend);
+ return p;
+}
+
+/* Redistribute pending connections when a server goes down. The number of
+ * connections redistributed is returned. It will take the server queue lock
+ * and does not use nor depend on other locks.
+ */
+int pendconn_redistribute(struct server *s)
+{
+ struct pendconn *p;
+ struct eb32_node *node, *nodeb;
+ int xferred = 0;
+
+ /* The REDISP option was specified. We will ignore cookie and force to
+ * balance or use the dispatcher. */
+ if ((s->proxy->options & (PR_O_REDISP|PR_O_PERSIST)) != PR_O_REDISP)
+ return 0;
+
+ HA_SPIN_LOCK(QUEUE_LOCK, &s->queue.lock);
+ for (node = eb32_first(&s->queue.head); node; node = nodeb) {
+ nodeb = eb32_next(node);
+
+ p = eb32_entry(node, struct pendconn, node);
+ if (p->strm_flags & SF_FORCE_PRST)
+ continue;
+
+ /* it's left to the dispatcher to choose a server */
+ __pendconn_unlink_srv(p);
+ p->strm_flags &= ~(SF_DIRECT | SF_ASSIGNED);
+
+ task_instant_wakeup(p->strm->task, TASK_WOKEN_RES);
+ xferred++;
+ }
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &s->queue.lock);
+
+ if (xferred) {
+ _HA_ATOMIC_SUB(&s->queue.length, xferred);
+ _HA_ATOMIC_SUB(&s->proxy->totpend, xferred);
+ }
+ return xferred;
+}
+
+/* Check for pending connections at the backend, and assign some of them to
+ * the server coming up. The server's weight is checked before being assigned
+ * connections it may not be able to handle. The total number of transferred
+ * connections is returned. It will take the proxy's queue lock and will not
+ * use nor depend on other locks.
+ */
+int pendconn_grab_from_px(struct server *s)
+{
+ struct pendconn *p;
+ int maxconn, xferred = 0;
+
+ if (!srv_currently_usable(s))
+ return 0;
+
+ /* if this is a backup server and there are active servers or at
+ * least another backup server was elected, then this one must
+ * not dequeue requests from the proxy.
+ */
+ if ((s->flags & SRV_F_BACKUP) &&
+ (s->proxy->srv_act ||
+ ((s != s->proxy->lbprm.fbck) && !(s->proxy->options & PR_O_USE_ALL_BK))))
+ return 0;
+
+ HA_SPIN_LOCK(QUEUE_LOCK, &s->proxy->queue.lock);
+ maxconn = srv_dynamic_maxconn(s);
+ while ((p = pendconn_first(&s->proxy->queue.head))) {
+ if (s->maxconn && s->served + xferred >= maxconn)
+ break;
+
+ __pendconn_unlink_prx(p);
+ p->target = s;
+
+ task_instant_wakeup(p->strm->task, TASK_WOKEN_RES);
+ xferred++;
+ }
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &s->proxy->queue.lock);
+ if (xferred) {
+ _HA_ATOMIC_SUB(&s->proxy->queue.length, xferred);
+ _HA_ATOMIC_SUB(&s->proxy->totpend, xferred);
+ }
+ return xferred;
+}
+
+/* Try to dequeue pending connection attached to the stream <strm>. It must
+ * always exists here. If the pendconn is still linked to the server or the
+ * proxy queue, nothing is done and the function returns 1. Otherwise,
+ * <strm>->flags and <strm>->target are updated, the pendconn is released and 0
+ * is returned.
+ *
+ * This function must be called by the stream itself, so in the context of
+ * process_stream.
+ */
+int pendconn_dequeue(struct stream *strm)
+{
+ struct pendconn *p;
+ int is_unlinked;
+
+ /* unexpected case because it is called by the stream itself and
+ * only the stream can release a pendconn. So it is only
+ * possible if a pendconn is released by someone else or if the
+ * stream is supposed to be queued but without its associated
+ * pendconn. In both cases it is a bug! */
+ BUG_ON(!strm->pend_pos);
+
+ p = strm->pend_pos;
+
+ /* note below : we need to grab the queue's lock to check for emptiness
+ * because we don't want a partial _grab_from_px() or _redistribute()
+ * to be called in parallel and show an empty list without having the
+ * time to finish. With this we know that if we see the element
+ * unlinked, these functions were completely done.
+ */
+ pendconn_queue_lock(p);
+ is_unlinked = !p->node.node.leaf_p;
+ pendconn_queue_unlock(p);
+
+ /* serialize to make sure the element was finished processing */
+ HA_SPIN_LOCK(QUEUE_LOCK, &p->del_lock);
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &p->del_lock);
+
+ if (!is_unlinked)
+ return 1;
+
+ /* the pendconn is not queued anymore and will not be so we're safe
+ * to proceed.
+ */
+ strm->flags &= ~(SF_DIRECT | SF_ASSIGNED);
+ strm->flags |= p->strm_flags & (SF_DIRECT | SF_ASSIGNED);
+
+ /* the entry might have been redistributed to another server */
+ if (!(strm->flags & SF_ASSIGNED))
+ sockaddr_free(&strm->scb->dst);
+
+ if (p->target) {
+ /* a server picked this pendconn, it must skip LB */
+ strm->target = &p->target->obj_type;
+ strm->flags |= SF_ASSIGNED;
+ }
+
+ strm->pend_pos = NULL;
+ pool_free(pool_head_pendconn, p);
+ return 0;
+}
+
+static enum act_return action_set_priority_class(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct sample *smp;
+
+ smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
+ if (!smp)
+ return ACT_RET_CONT;
+
+ s->priority_class = queue_limit_class(smp->data.u.sint);
+ return ACT_RET_CONT;
+}
+
+static enum act_return action_set_priority_offset(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct sample *smp;
+
+ smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
+ if (!smp)
+ return ACT_RET_CONT;
+
+ s->priority_offset = queue_limit_offset(smp->data.u.sint);
+
+ return ACT_RET_CONT;
+}
+
+static enum act_parse_ret parse_set_priority_class(const char **args, int *arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ unsigned int where = 0;
+
+ rule->arg.expr = sample_parse_expr((char **)args, arg, px->conf.args.file,
+ px->conf.args.line, err, &px->conf.args, NULL);
+ if (!rule->arg.expr)
+ return ACT_RET_PRS_ERR;
+
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+
+ if (!(rule->arg.expr->fetch->val & where)) {
+ memprintf(err,
+ "fetch method '%s' extracts information from '%s', none of which is available here",
+ args[0], sample_src_names(rule->arg.expr->fetch->use));
+ free(rule->arg.expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = action_set_priority_class;
+ return ACT_RET_PRS_OK;
+}
+
+static enum act_parse_ret parse_set_priority_offset(const char **args, int *arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ unsigned int where = 0;
+
+ rule->arg.expr = sample_parse_expr((char **)args, arg, px->conf.args.file,
+ px->conf.args.line, err, &px->conf.args, NULL);
+ if (!rule->arg.expr)
+ return ACT_RET_PRS_ERR;
+
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+
+ if (!(rule->arg.expr->fetch->val & where)) {
+ memprintf(err,
+ "fetch method '%s' extracts information from '%s', none of which is available here",
+ args[0], sample_src_names(rule->arg.expr->fetch->use));
+ free(rule->arg.expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = action_set_priority_offset;
+ return ACT_RET_PRS_OK;
+}
+
+static struct action_kw_list tcp_cont_kws = {ILH, {
+ { "set-priority-class", parse_set_priority_class },
+ { "set-priority-offset", parse_set_priority_offset },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_cont_kws);
+
+static struct action_kw_list http_req_kws = {ILH, {
+ { "set-priority-class", parse_set_priority_class },
+ { "set-priority-offset", parse_set_priority_offset },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_kws);
+
+static int
+smp_fetch_priority_class(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->strm->priority_class;
+
+ return 1;
+}
+
+static int
+smp_fetch_priority_offset(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->strm->priority_offset;
+
+ return 1;
+}
+
+
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { "prio_class", smp_fetch_priority_class, 0, NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "prio_offset", smp_fetch_priority_offset, 0, NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { /* END */},
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/quic_cc.c b/src/quic_cc.c
new file mode 100644
index 0000000..8fd99d3
--- /dev/null
+++ b/src/quic_cc.c
@@ -0,0 +1,49 @@
+/*
+ * Congestion controller handling.
+ *
+ * This file contains definitions for QUIC congestion control.
+ *
+ * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <haproxy/quic_cc.h>
+
+struct quic_cc_algo *default_quic_cc_algo = &quic_cc_algo_cubic;
+
+/*
+ * Initialize <cc> congestion control with <algo> as algorithm depending on <ipv4>
+ * a boolean which is true for an IPv4 path.
+ */
+void quic_cc_init(struct quic_cc *cc,
+ struct quic_cc_algo *algo, struct quic_conn *qc)
+{
+ cc->qc = qc;
+ cc->algo = algo;
+ if (cc->algo->init)
+ (cc->algo->init(cc));
+}
+
+/* Send <ev> event to <cc> congestion controller. */
+void quic_cc_event(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ cc->algo->event(cc, ev);
+}
+
+void quic_cc_state_trace(struct buffer *buf, const struct quic_cc *cc)
+{
+ cc->algo->state_trace(buf, cc);
+}
diff --git a/src/quic_cc_cubic.c b/src/quic_cc_cubic.c
new file mode 100644
index 0000000..dc6ef9f
--- /dev/null
+++ b/src/quic_cc_cubic.c
@@ -0,0 +1,283 @@
+#include <haproxy/trace.h>
+#include <haproxy/quic_cc.h>
+
+/* This source file is highly inspired from Linux kernel source file
+ * implementation for TCP Cubic. In fact, we have no choice if we do
+ * not want to use any floating point operations to be fast!
+ * (See net/ipv4/tcp_cubic.c)
+ */
+#define TRACE_SOURCE &trace_quic
+
+#define CUBIC_BETA_SCALE 1024
+#define CUBIC_BETA_SCALE_SHIFT 10
+/* beta = 0.7 ; C = 0.4 */
+#define CUBIC_BETA 717 /* CUBIC_BETA / CUBIC_BETA_SCALE = 0.7 */
+#define CUBIC_C 410 /* CUBIC_C / CUBIC_BETA_SCALE = 0.4 */
+
+#define CUBIC_BETA_SCALE_FACTOR_SHIFT (3 * CUBIC_BETA_SCALE_SHIFT)
+#define TIME_SCALE_FACTOR_SHIFT 10
+
+/* The maximum value which may be cubed an multiplied by CUBIC_BETA */
+#define CUBIC_DIFF_TIME_LIMIT 355535ULL /* ms */
+
+/* K cube factor: (1 - beta) / c */
+struct cubic {
+ uint32_t ssthresh;
+ uint32_t remaining_inc;
+ uint32_t remaining_tcp_inc;
+ uint32_t epoch_start;
+ uint32_t origin_point;
+ uint32_t K;
+ uint32_t last_w_max;
+ uint32_t tcp_wnd;
+ uint32_t recovery_start_time;
+};
+
+static void quic_cc_cubic_reset(struct quic_cc *cc)
+{
+ struct cubic *c = quic_cc_priv(cc);
+
+ cc->algo->state = QUIC_CC_ST_SS;
+
+ c->ssthresh = QUIC_CC_INFINITE_SSTHESH;
+ c->remaining_inc = 0;
+ c->remaining_tcp_inc = 0;
+ c->epoch_start = 0;
+ c->origin_point = 0;
+ c->K = 0;
+ c->last_w_max = 0;
+ c->tcp_wnd = 0;
+ c->recovery_start_time = 0;
+}
+
+static int quic_cc_cubic_init(struct quic_cc *cc)
+{
+ quic_cc_cubic_reset(cc);
+ return 1;
+}
+
+/* Cubic root.
+ * Highly inspired from Linux kernel sources.
+ * See net/ipv4/tcp_cubic.c
+ */
+static uint32_t cubic_root(uint64_t val)
+{
+ uint32_t x, b, shift;
+
+ static const uint8_t v[] = {
+ 0, 54, 54, 54, 118, 118, 118, 118,
+ 123, 129, 134, 138, 143, 147, 151, 156,
+ 157, 161, 164, 168, 170, 173, 176, 179,
+ 181, 185, 187, 190, 192, 194, 197, 199,
+ 200, 202, 204, 206, 209, 211, 213, 215,
+ 217, 219, 221, 222, 224, 225, 227, 229,
+ 231, 232, 234, 236, 237, 239, 240, 242,
+ 244, 245, 246, 248, 250, 251, 252, 254,
+ };
+
+ if (!val || (b = my_flsl(val)) < 7) {
+ /* val in [0..63] */
+ return ((uint32_t)v[(uint32_t)val] + 35) >> 6;
+ }
+
+ b = ((b * 84) >> 8) - 1;
+ shift = (val >> (b * 3));
+
+ x = ((uint32_t)(((uint32_t)v[shift] + 10) << b)) >> 6;
+
+ x = 2 * x + (uint32_t)(val / ((uint64_t)x * (uint64_t)(x - 1)));
+ x = ((x * 341) >> 10);
+
+ return x;
+}
+
+static inline void quic_cubic_update(struct quic_cc *cc, uint32_t acked)
+{
+ struct cubic *c = quic_cc_priv(cc);
+ struct quic_path *path = container_of(cc, struct quic_path, cc);
+ /* Current cwnd as number of packets */
+ uint32_t t, target, inc, inc_diff;
+ uint64_t delta, diff;
+
+ if (!c->epoch_start) {
+ c->epoch_start = now_ms;
+ if (c->last_w_max <= path->cwnd) {
+ c->K = 0;
+ c->origin_point = path->cwnd;
+ }
+ else {
+ /* K = cubic_root((1 - beta) * W_max / C) */
+ c->K = cubic_root((c->last_w_max - path->cwnd) *
+ (CUBIC_BETA_SCALE - CUBIC_BETA) / CUBIC_C / path->mtu) << TIME_SCALE_FACTOR_SHIFT;
+ c->origin_point = c->last_w_max;
+ }
+
+ c->tcp_wnd = path->cwnd;
+ c->remaining_inc = 0;
+ c->remaining_tcp_inc = 0;
+ }
+
+ t = now_ms + path->loss.rtt_min - c->epoch_start;
+ if (t < c->K) {
+ diff = c->K - t;
+ }
+ else {
+ diff = t - c->K;
+ }
+
+ if (diff > CUBIC_DIFF_TIME_LIMIT) {
+ /* TODO : should not happen if we handle the case
+ * of very late acks receipt. This must be handled as a congestion
+ * control event: a very late ack should trigger a congestion
+ * control algorithm reset.
+ */
+ quic_cc_cubic_reset(cc);
+ return;
+ }
+
+ delta = path->mtu * ((CUBIC_C * diff * diff * diff) >> (10 + 3 * TIME_SCALE_FACTOR_SHIFT));
+ if (t < c->K)
+ target = c->origin_point - delta;
+ else
+ target = c->origin_point + delta;
+
+ if (target > path->cwnd) {
+ inc_diff = c->remaining_inc + path->mtu * (target - path->cwnd);
+ c->remaining_inc = inc_diff % path->cwnd;
+ inc = inc_diff / path->cwnd;
+ }
+ else {
+ /* small increment */
+ inc_diff = c->remaining_inc + path->mtu;
+ c->remaining_inc = inc_diff % (100 * path->cwnd);
+ inc = inc_diff / (100 * path->cwnd);
+ }
+
+ inc_diff = c->remaining_tcp_inc + path->mtu * acked;
+ c->tcp_wnd += inc_diff / path->cwnd;
+ c->remaining_tcp_inc = inc_diff % path->cwnd;
+ /* TCP friendliness */
+ if (c->tcp_wnd > path->cwnd) {
+ uint32_t tcp_inc = path->mtu * (c->tcp_wnd - path->cwnd) / path->cwnd;
+ if (tcp_inc > inc)
+ inc = tcp_inc;
+ }
+
+ path->cwnd += inc;
+}
+
+static void quic_cc_cubic_slow_start(struct quic_cc *cc)
+{
+ quic_cc_cubic_reset(cc);
+}
+
+static void quic_enter_recovery(struct quic_cc *cc)
+{
+ struct quic_path *path = container_of(cc, struct quic_path, cc);
+ struct cubic *c = quic_cc_priv(cc);
+ /* Current cwnd as number of packets */
+
+ c->epoch_start = 0;
+ c->recovery_start_time = now_ms;
+ /* Fast convergence */
+ if (path->cwnd < c->last_w_max) {
+ /* (1 + beta) * path->cwnd / 2 */
+ c->last_w_max = (path->cwnd * (CUBIC_BETA_SCALE + CUBIC_BETA) / 2) >> CUBIC_BETA_SCALE_SHIFT;
+ }
+ else {
+ c->last_w_max = path->cwnd;
+ }
+ path->cwnd = (CUBIC_BETA * path->cwnd) >> CUBIC_BETA_SCALE_SHIFT;
+ c->ssthresh = QUIC_MAX(path->cwnd, path->min_cwnd);
+}
+
+/* Congestion slow-start callback. */
+static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ struct quic_path *path = container_of(cc, struct quic_path, cc);
+ struct cubic *c = quic_cc_priv(cc);
+
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc, ev);
+ switch (ev->type) {
+ case QUIC_CC_EVT_ACK:
+ /* Do not increase the congestion window in recovery period. */
+ if (ev->ack.time_sent <= c->recovery_start_time)
+ goto out;
+
+ path->cwnd += ev->ack.acked;
+ /* Exit to congestion avoidance if slow start threshold is reached. */
+ if (path->cwnd >= c->ssthresh)
+ cc->algo->state = QUIC_CC_ST_CA;
+ break;
+
+ case QUIC_CC_EVT_LOSS:
+ /* Do not decrease the congestion window when already in recovery period. */
+ if (ev->loss.time_sent <= c->recovery_start_time)
+ goto out;
+
+ quic_enter_recovery(cc);
+ /* Exit to congestion avoidance. */
+ cc->algo->state = QUIC_CC_ST_CA;
+ break;
+
+ case QUIC_CC_EVT_ECN_CE:
+ /* TODO */
+ break;
+ }
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc);
+}
+
+/* Congestion avoidance callback. */
+static void quic_cc_cubic_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ struct cubic *c = quic_cc_priv(cc);
+
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc, ev);
+ switch (ev->type) {
+ case QUIC_CC_EVT_ACK:
+ /* Do not increase the congestion window when already in recovery period. */
+ if (ev->ack.time_sent <= c->recovery_start_time)
+ goto out;
+
+ quic_cubic_update(cc, ev->ack.acked);
+ break;
+ case QUIC_CC_EVT_LOSS:
+ /* Do not decrease the congestion window when already in recovery period. */
+ if (ev->loss.time_sent <= c->recovery_start_time)
+ goto out;
+
+ quic_enter_recovery(cc);
+ break;
+ case QUIC_CC_EVT_ECN_CE:
+ /* TODO */
+ break;
+ }
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc);
+}
+
+static void (*quic_cc_cubic_state_cbs[])(struct quic_cc *cc,
+ struct quic_cc_event *ev) = {
+ [QUIC_CC_ST_SS] = quic_cc_cubic_ss_cb,
+ [QUIC_CC_ST_CA] = quic_cc_cubic_ca_cb,
+};
+
+static void quic_cc_cubic_event(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ return quic_cc_cubic_state_cbs[cc->algo->state](cc, ev);
+}
+
+static void quic_cc_cubic_state_trace(struct buffer *buf, const struct quic_cc *cc)
+{
+}
+
+struct quic_cc_algo quic_cc_algo_cubic = {
+ .type = QUIC_CC_ALGO_TP_CUBIC,
+ .init = quic_cc_cubic_init,
+ .event = quic_cc_cubic_event,
+ .slow_start = quic_cc_cubic_slow_start,
+ .state_trace = quic_cc_cubic_state_trace,
+};
diff --git a/src/quic_cc_newreno.c b/src/quic_cc_newreno.c
new file mode 100644
index 0000000..e18ba6f
--- /dev/null
+++ b/src/quic_cc_newreno.c
@@ -0,0 +1,173 @@
+/*
+ * NewReno congestion control algorithm.
+ *
+ * This file contains definitions for QUIC congestion control.
+ *
+ * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <haproxy/api-t.h>
+#include <haproxy/buf.h>
+#include <haproxy/chunk.h>
+#include <haproxy/quic_cc.h>
+#include <haproxy/quic_conn-t.h>
+#include <haproxy/trace.h>
+
+#define TRACE_SOURCE &trace_quic
+
+/* Newreno state */
+struct nr {
+ uint32_t ssthresh;
+ uint32_t recovery_start_time;
+ uint32_t remain_acked;
+};
+
+static int quic_cc_nr_init(struct quic_cc *cc)
+{
+ struct nr *nr = quic_cc_priv(cc);
+
+ cc->algo->state = QUIC_CC_ST_SS;
+ nr->ssthresh = QUIC_CC_INFINITE_SSTHESH;
+ nr->recovery_start_time = 0;
+ nr->remain_acked = 0;
+
+ return 1;
+}
+
+/* Re-enter slow start state. */
+static void quic_cc_nr_slow_start(struct quic_cc *cc)
+{
+ struct quic_path *path;
+ struct nr *nr = quic_cc_priv(cc);
+
+ path = container_of(cc, struct quic_path, cc);
+ path->cwnd = path->min_cwnd;
+ /* Re-entering slow start state. */
+ cc->algo->state = QUIC_CC_ST_SS;
+ /* Recovery start time reset */
+ nr->recovery_start_time = 0;
+}
+
+/* Slow start callback. */
+static void quic_cc_nr_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ struct quic_path *path;
+ struct nr *nr = quic_cc_priv(cc);
+
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc, ev);
+ path = container_of(cc, struct quic_path, cc);
+ switch (ev->type) {
+ case QUIC_CC_EVT_ACK:
+ /* Do not increase the congestion window in recovery period. */
+ if (ev->ack.time_sent <= nr->recovery_start_time)
+ return;
+
+ path->cwnd += ev->ack.acked;
+ /* Exit to congestion avoidance if slow start threshold is reached. */
+ if (path->cwnd > nr->ssthresh)
+ cc->algo->state = QUIC_CC_ST_CA;
+ break;
+
+ case QUIC_CC_EVT_LOSS:
+ path->cwnd = QUIC_MAX(path->cwnd >> 1, path->min_cwnd);
+ nr->ssthresh = path->cwnd;
+ /* Exit to congestion avoidance. */
+ cc->algo->state = QUIC_CC_ST_CA;
+ break;
+
+ case QUIC_CC_EVT_ECN_CE:
+ /* XXX TO DO XXX */
+ break;
+ }
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc,, cc);
+}
+
+/* Congestion avoidance callback. */
+static void quic_cc_nr_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ struct quic_path *path;
+ struct nr *nr = quic_cc_priv(cc);
+
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc, ev);
+ path = container_of(cc, struct quic_path, cc);
+ switch (ev->type) {
+ case QUIC_CC_EVT_ACK:
+ {
+ uint64_t acked;
+ /* Do not increase the congestion window in recovery period. */
+ if (ev->ack.time_sent <= nr->recovery_start_time)
+ goto out;
+
+ /* Increasing the congestion window by (acked / cwnd)
+ */
+ acked = ev->ack.acked * path->mtu + nr->remain_acked;
+ nr->remain_acked = acked % path->cwnd;
+ path->cwnd += acked / path->cwnd;
+ break;
+ }
+
+ case QUIC_CC_EVT_LOSS:
+ /* Do not decrease the congestion window when already in recovery period. */
+ if (ev->loss.time_sent <= nr->recovery_start_time)
+ goto out;
+
+ nr->recovery_start_time = now_ms;
+ nr->ssthresh = path->cwnd;
+ path->cwnd = QUIC_MAX(path->cwnd >> 1, path->min_cwnd);
+ break;
+
+ case QUIC_CC_EVT_ECN_CE:
+ /* XXX TO DO XXX */
+ break;
+ }
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc);
+}
+
+static void quic_cc_nr_state_trace(struct buffer *buf, const struct quic_cc *cc)
+{
+ struct quic_path *path;
+ struct nr *nr = quic_cc_priv(cc);
+
+ path = container_of(cc, struct quic_path, cc);
+ chunk_appendf(buf, " state=%s cwnd=%llu ssthresh=%ld recovery_start_time=%llu",
+ quic_cc_state_str(cc->algo->state),
+ (unsigned long long)path->cwnd,
+ (long)nr->ssthresh,
+ (unsigned long long)nr->recovery_start_time);
+}
+
+static void (*quic_cc_nr_state_cbs[])(struct quic_cc *cc,
+ struct quic_cc_event *ev) = {
+ [QUIC_CC_ST_SS] = quic_cc_nr_ss_cb,
+ [QUIC_CC_ST_CA] = quic_cc_nr_ca_cb,
+};
+
+static void quic_cc_nr_event(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ return quic_cc_nr_state_cbs[cc->algo->state](cc, ev);
+}
+
+struct quic_cc_algo quic_cc_algo_nr = {
+ .type = QUIC_CC_ALGO_TP_NEWRENO,
+ .init = quic_cc_nr_init,
+ .event = quic_cc_nr_event,
+ .slow_start = quic_cc_nr_slow_start,
+ .state_trace = quic_cc_nr_state_trace,
+};
+
diff --git a/src/quic_conn.c b/src/quic_conn.c
new file mode 100644
index 0000000..c768160
--- /dev/null
+++ b/src/quic_conn.c
@@ -0,0 +1,7565 @@
+/*
+ * QUIC protocol implementation. Lower layer with internal features implemented
+ * here such as QUIC encryption, idle timeout, acknowledgement and
+ * retransmission.
+ *
+ * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/quic_conn.h>
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <netinet/tcp.h>
+
+#include <import/ebmbtree.h>
+
+#include <haproxy/buf-t.h>
+#include <haproxy/compat.h>
+#include <haproxy/api.h>
+#include <haproxy/debug.h>
+#include <haproxy/tools.h>
+#include <haproxy/ticks.h>
+
+#include <haproxy/connection.h>
+#include <haproxy/fd.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/global.h>
+#include <haproxy/h3.h>
+#include <haproxy/hq_interop.h>
+#include <haproxy/log.h>
+#include <haproxy/mux_quic.h>
+#include <haproxy/ncbuf.h>
+#include <haproxy/pipe.h>
+#include <haproxy/proxy.h>
+#include <haproxy/quic_cc.h>
+#include <haproxy/quic_frame.h>
+#include <haproxy/quic_enc.h>
+#include <haproxy/quic_loss.h>
+#include <haproxy/quic_sock.h>
+#include <haproxy/quic_stats.h>
+#include <haproxy/quic_stream.h>
+#include <haproxy/quic_tp.h>
+#include <haproxy/cbuf.h>
+#include <haproxy/proto_quic.h>
+#include <haproxy/quic_tls.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/task.h>
+#include <haproxy/trace.h>
+
+/* list of supported QUIC versions by this implementation */
+const struct quic_version quic_versions[] = {
+ {
+ .num = QUIC_PROTOCOL_VERSION_DRAFT_29,
+ .initial_salt = initial_salt_draft_29,
+ .initial_salt_len = sizeof initial_salt_draft_29,
+ .key_label = (const unsigned char *)QUIC_HKDF_KEY_LABEL_V1,
+ .key_label_len = sizeof(QUIC_HKDF_KEY_LABEL_V1) - 1,
+ .iv_label = (const unsigned char *)QUIC_HKDF_IV_LABEL_V1,
+ .iv_label_len = sizeof(QUIC_HKDF_IV_LABEL_V1) - 1,
+ .hp_label = (const unsigned char *)QUIC_HKDF_HP_LABEL_V1,
+ .hp_label_len = sizeof(QUIC_HKDF_HP_LABEL_V1) - 1,
+ .ku_label = (const unsigned char *)QUIC_HKDF_KU_LABEL_V1,
+ .ku_label_len = sizeof(QUIC_HKDF_KU_LABEL_V1) - 1,
+ .retry_tag_key = (const unsigned char *)QUIC_TLS_RETRY_KEY_DRAFT,
+ .retry_tag_nonce = (const unsigned char *)QUIC_TLS_RETRY_NONCE_DRAFT,
+ },
+ {
+ .num = QUIC_PROTOCOL_VERSION_1,
+ .initial_salt = initial_salt_v1,
+ .initial_salt_len = sizeof initial_salt_v1,
+ .key_label = (const unsigned char *)QUIC_HKDF_KEY_LABEL_V1,
+ .key_label_len = sizeof(QUIC_HKDF_KEY_LABEL_V1) - 1,
+ .iv_label = (const unsigned char *)QUIC_HKDF_IV_LABEL_V1,
+ .iv_label_len = sizeof(QUIC_HKDF_IV_LABEL_V1) - 1,
+ .hp_label = (const unsigned char *)QUIC_HKDF_HP_LABEL_V1,
+ .hp_label_len = sizeof(QUIC_HKDF_HP_LABEL_V1) - 1,
+ .ku_label = (const unsigned char *)QUIC_HKDF_KU_LABEL_V1,
+ .ku_label_len = sizeof(QUIC_HKDF_KU_LABEL_V1) - 1,
+ .retry_tag_key = (const unsigned char *)QUIC_TLS_RETRY_KEY_V1,
+ .retry_tag_nonce = (const unsigned char *)QUIC_TLS_RETRY_NONCE_V1,
+ },
+ {
+ .num = QUIC_PROTOCOL_VERSION_2_DRAFT,
+ .initial_salt = initial_salt_v2_draft,
+ .initial_salt_len = sizeof initial_salt_v2_draft,
+ .key_label = (const unsigned char *)QUIC_HKDF_KEY_LABEL_V2,
+ .key_label_len = sizeof(QUIC_HKDF_KEY_LABEL_V2) - 1,
+ .iv_label = (const unsigned char *)QUIC_HKDF_IV_LABEL_V2,
+ .iv_label_len = sizeof(QUIC_HKDF_IV_LABEL_V2) - 1,
+ .hp_label = (const unsigned char *)QUIC_HKDF_HP_LABEL_V2,
+ .hp_label_len = sizeof(QUIC_HKDF_HP_LABEL_V2) - 1,
+ .ku_label = (const unsigned char *)QUIC_HKDF_KU_LABEL_V2,
+ .ku_label_len = sizeof(QUIC_HKDF_KU_LABEL_V2) - 1,
+ .retry_tag_key = (const unsigned char *)QUIC_TLS_RETRY_KEY_V2_DRAFT,
+ .retry_tag_nonce = (const unsigned char *)QUIC_TLS_RETRY_NONCE_V2_DRAFT,
+ },
+};
+
+/* The total number of supported versions */
+const size_t quic_versions_nb = sizeof quic_versions / sizeof *quic_versions;
+/* Listener only preferred version */
+const struct quic_version *preferred_version;
+
+/* trace source and events */
+static void quic_trace(enum trace_level level, uint64_t mask, \
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+static const struct trace_event quic_trace_events[] = {
+ { .mask = QUIC_EV_CONN_NEW, .name = "new_conn", .desc = "new QUIC connection" },
+ { .mask = QUIC_EV_CONN_INIT, .name = "new_conn_init", .desc = "new QUIC connection initialization" },
+ { .mask = QUIC_EV_CONN_ISEC, .name = "init_secs", .desc = "initial secrets derivation" },
+ { .mask = QUIC_EV_CONN_RSEC, .name = "read_secs", .desc = "read secrets derivation" },
+ { .mask = QUIC_EV_CONN_WSEC, .name = "write_secs", .desc = "write secrets derivation" },
+ { .mask = QUIC_EV_CONN_LPKT, .name = "lstnr_packet", .desc = "new listener received packet" },
+ { .mask = QUIC_EV_CONN_SPKT, .name = "srv_packet", .desc = "new server received packet" },
+ { .mask = QUIC_EV_CONN_ENCPKT, .name = "enc_hdshk_pkt", .desc = "handhshake packet encryption" },
+ { .mask = QUIC_EV_CONN_TXPKT, .name = "tx_pkt", .desc = "TX packet" },
+ { .mask = QUIC_EV_CONN_PAPKT, .name = "phdshk_apkt", .desc = "post handhshake application packet preparation" },
+ { .mask = QUIC_EV_CONN_PAPKTS, .name = "phdshk_apkts", .desc = "post handhshake application packets preparation" },
+ { .mask = QUIC_EV_CONN_IO_CB, .name = "qc_io_cb", .desc = "QUIC conn. I/O processing" },
+ { .mask = QUIC_EV_CONN_RMHP, .name = "rm_hp", .desc = "Remove header protection" },
+ { .mask = QUIC_EV_CONN_PRSHPKT, .name = "parse_hpkt", .desc = "parse handshake packet" },
+ { .mask = QUIC_EV_CONN_PRSAPKT, .name = "parse_apkt", .desc = "parse application packet" },
+ { .mask = QUIC_EV_CONN_PRSFRM, .name = "parse_frm", .desc = "parse frame" },
+ { .mask = QUIC_EV_CONN_PRSAFRM, .name = "parse_ack_frm", .desc = "parse ACK frame" },
+ { .mask = QUIC_EV_CONN_BFRM, .name = "build_frm", .desc = "build frame" },
+ { .mask = QUIC_EV_CONN_PHPKTS, .name = "phdshk_pkts", .desc = "handhshake packets preparation" },
+ { .mask = QUIC_EV_CONN_TRMHP, .name = "rm_hp_try", .desc = "header protection removing try" },
+ { .mask = QUIC_EV_CONN_ELRMHP, .name = "el_rm_hp", .desc = "handshake enc. level header protection removing" },
+ { .mask = QUIC_EV_CONN_RXPKT, .name = "rx_pkt", .desc = "RX packet" },
+ { .mask = QUIC_EV_CONN_SSLDATA, .name = "ssl_provide_data", .desc = "CRYPTO data provision to TLS stack" },
+ { .mask = QUIC_EV_CONN_RXCDATA, .name = "el_treat_rx_cfrms",.desc = "enc. level RX CRYPTO frames processing"},
+ { .mask = QUIC_EV_CONN_ADDDATA, .name = "add_hdshk_data", .desc = "TLS stack ->add_handshake_data() call"},
+ { .mask = QUIC_EV_CONN_FFLIGHT, .name = "flush_flight", .desc = "TLS stack ->flush_flight() call"},
+ { .mask = QUIC_EV_CONN_SSLALERT, .name = "send_alert", .desc = "TLS stack ->send_alert() call"},
+ { .mask = QUIC_EV_CONN_RTTUPDT, .name = "rtt_updt", .desc = "RTT sampling" },
+ { .mask = QUIC_EV_CONN_SPPKTS, .name = "sppkts", .desc = "send prepared packets" },
+ { .mask = QUIC_EV_CONN_PKTLOSS, .name = "pktloss", .desc = "detect packet loss" },
+ { .mask = QUIC_EV_CONN_STIMER, .name = "stimer", .desc = "set timer" },
+ { .mask = QUIC_EV_CONN_PTIMER, .name = "ptimer", .desc = "process timer" },
+ { .mask = QUIC_EV_CONN_SPTO, .name = "spto", .desc = "set PTO" },
+ { .mask = QUIC_EV_CONN_BCFRMS, .name = "bcfrms", .desc = "build CRYPTO data frames" },
+ { .mask = QUIC_EV_CONN_XPRTSEND, .name = "xprt_send", .desc = "sending XRPT subscription" },
+ { .mask = QUIC_EV_CONN_XPRTRECV, .name = "xprt_recv", .desc = "receiving XRPT subscription" },
+ { .mask = QUIC_EV_CONN_FREED, .name = "conn_freed", .desc = "releasing conn. memory" },
+ { .mask = QUIC_EV_CONN_CLOSE, .name = "conn_close", .desc = "closing conn." },
+ { .mask = QUIC_EV_CONN_ACKSTRM, .name = "ack_strm", .desc = "STREAM ack."},
+ { .mask = QUIC_EV_CONN_FRMLIST, .name = "frm_list", .desc = "frame list"},
+ { .mask = QUIC_EV_STATELESS_RST, .name = "stateless_reset", .desc = "stateless reset sent"},
+ { .mask = QUIC_EV_TRANSP_PARAMS, .name = "transport_params", .desc = "transport parameters"},
+ { .mask = QUIC_EV_CONN_IDLE_TIMER, .name = "idle_timer", .desc = "idle timer task"},
+ { .mask = QUIC_EV_CONN_SUB, .name = "xprt_sub", .desc = "RX/TX subcription or unsubscription to QUIC xprt"},
+ { /* end */ }
+};
+
+static const struct name_desc quic_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the connection */ },
+ /* arg2 */ { .name="quic", .desc="QUIC transport" },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc quic_trace_decoding[] = {
+#define QUIC_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+ { /* end */ }
+};
+
+
+struct trace_source trace_quic = {
+ .name = IST("quic"),
+ .desc = "QUIC xprt",
+ .arg_def = TRC_ARG1_QCON, /* TRACE()'s first argument is always a quic_conn */
+ .default_cb = quic_trace,
+ .known_events = quic_trace_events,
+ .lockon_args = quic_trace_lockon_args,
+ .decoding = quic_trace_decoding,
+ .report_events = ~0, /* report everything by default */
+};
+
+#define TRACE_SOURCE &trace_quic
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+static BIO_METHOD *ha_quic_meth;
+
+DECLARE_POOL(pool_head_quic_tx_ring, "quic_tx_ring", QUIC_TX_RING_BUFSZ);
+DECLARE_POOL(pool_head_quic_conn_rxbuf, "quic_conn_rxbuf", QUIC_CONN_RX_BUFSZ);
+DECLARE_STATIC_POOL(pool_head_quic_conn_ctx,
+ "quic_conn_ctx", sizeof(struct ssl_sock_ctx));
+DECLARE_STATIC_POOL(pool_head_quic_conn, "quic_conn", sizeof(struct quic_conn));
+DECLARE_POOL(pool_head_quic_connection_id,
+ "quic_connnection_id", sizeof(struct quic_connection_id));
+DECLARE_POOL(pool_head_quic_dgram, "quic_dgram", sizeof(struct quic_dgram));
+DECLARE_POOL(pool_head_quic_rx_packet, "quic_rx_packet", sizeof(struct quic_rx_packet));
+DECLARE_POOL(pool_head_quic_tx_packet, "quic_tx_packet", sizeof(struct quic_tx_packet));
+DECLARE_STATIC_POOL(pool_head_quic_rx_crypto_frm, "quic_rx_crypto_frm", sizeof(struct quic_rx_crypto_frm));
+DECLARE_STATIC_POOL(pool_head_quic_crypto_buf, "quic_crypto_buf", sizeof(struct quic_crypto_buf));
+DECLARE_STATIC_POOL(pool_head_quic_cstream, "quic_cstream", sizeof(struct quic_cstream));
+DECLARE_POOL(pool_head_quic_frame, "quic_frame", sizeof(struct quic_frame));
+DECLARE_STATIC_POOL(pool_head_quic_arng, "quic_arng", sizeof(struct quic_arng_node));
+
+static struct quic_tx_packet *qc_build_pkt(unsigned char **pos, const unsigned char *buf_end,
+ struct quic_enc_level *qel, struct quic_tls_ctx *ctx,
+ struct list *frms, struct quic_conn *qc,
+ const struct quic_version *ver, size_t dglen, int pkt_type,
+ int force_ack, int padding, int probe, int cc, int *err);
+struct task *quic_conn_app_io_cb(struct task *t, void *context, unsigned int state);
+static void qc_idle_timer_do_rearm(struct quic_conn *qc);
+static void qc_idle_timer_rearm(struct quic_conn *qc, int read);
+static int qc_conn_alloc_ssl_ctx(struct quic_conn *qc);
+static int quic_conn_init_timer(struct quic_conn *qc);
+static int quic_conn_init_idle_timer_task(struct quic_conn *qc);
+
+/* Only for debug purpose */
+struct enc_debug_info {
+ unsigned char *payload;
+ size_t payload_len;
+ unsigned char *aad;
+ size_t aad_len;
+ uint64_t pn;
+};
+
+/* Initializes a enc_debug_info struct (only for debug purpose) */
+static inline void enc_debug_info_init(struct enc_debug_info *edi,
+ unsigned char *payload, size_t payload_len,
+ unsigned char *aad, size_t aad_len, uint64_t pn)
+{
+ edi->payload = payload;
+ edi->payload_len = payload_len;
+ edi->aad = aad;
+ edi->aad_len = aad_len;
+ edi->pn = pn;
+}
+
+/* Trace callback for QUIC.
+ * These traces always expect that arg1, if non-null, is of type connection.
+ */
+static void quic_trace(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct quic_conn *qc = a1;
+
+ if (qc) {
+ const struct quic_tls_ctx *tls_ctx;
+
+ chunk_appendf(&trace_buf, " : qc@%p", qc);
+ if (mask & QUIC_EV_CONN_INIT) {
+ chunk_appendf(&trace_buf, "\n odcid");
+ quic_cid_dump(&trace_buf, &qc->odcid);
+ chunk_appendf(&trace_buf, "\n dcid");
+ quic_cid_dump(&trace_buf, &qc->dcid);
+ chunk_appendf(&trace_buf, "\n scid");
+ quic_cid_dump(&trace_buf, &qc->scid);
+ }
+
+ if (mask & QUIC_EV_TRANSP_PARAMS) {
+ const struct quic_transport_params *p = a2;
+
+ if (p)
+ quic_transport_params_dump(&trace_buf, qc, p);
+ }
+
+ if (mask & QUIC_EV_CONN_ADDDATA) {
+ const enum ssl_encryption_level_t *level = a2;
+ const size_t *len = a3;
+
+ if (level) {
+ enum quic_tls_enc_level lvl = ssl_to_quic_enc_level(*level);
+
+ chunk_appendf(&trace_buf, " el=%c(%d)", quic_enc_level_char(lvl), lvl);
+ }
+ if (len)
+ chunk_appendf(&trace_buf, " len=%llu", (unsigned long long)*len);
+ }
+ if ((mask & QUIC_EV_CONN_ISEC) && qc) {
+ /* Initial read & write secrets. */
+ enum quic_tls_enc_level level = QUIC_TLS_ENC_LEVEL_INITIAL;
+ const unsigned char *rx_sec = a2;
+ const unsigned char *tx_sec = a3;
+
+ tls_ctx = &qc->els[level].tls_ctx;
+ chunk_appendf(&trace_buf, "\n RX el=%c", quic_enc_level_char(level));
+ if (rx_sec)
+ quic_tls_secret_hexdump(&trace_buf, rx_sec, 32);
+ quic_tls_keys_hexdump(&trace_buf, &tls_ctx->rx);
+ chunk_appendf(&trace_buf, "\n TX el=%c", quic_enc_level_char(level));
+ if (tx_sec)
+ quic_tls_secret_hexdump(&trace_buf, tx_sec, 32);
+ quic_tls_keys_hexdump(&trace_buf, &tls_ctx->tx);
+ }
+ if (mask & (QUIC_EV_CONN_RSEC|QUIC_EV_CONN_RWSEC)) {
+ const enum ssl_encryption_level_t *level = a2;
+
+ if (level) {
+ enum quic_tls_enc_level lvl = ssl_to_quic_enc_level(*level);
+
+ chunk_appendf(&trace_buf, "\n RX el=%c", quic_enc_level_char(lvl));
+ if (quic_tls_has_rx_sec(&qc->els[lvl])) {
+ tls_ctx = &qc->els[lvl].tls_ctx;
+ quic_tls_keys_hexdump(&trace_buf, &tls_ctx->rx);
+ }
+ else
+ chunk_appendf(&trace_buf, " (none)");
+ }
+ }
+
+ if (mask & (QUIC_EV_CONN_WSEC|QUIC_EV_CONN_RWSEC)) {
+ const enum ssl_encryption_level_t *level = a2;
+
+ if (level) {
+ enum quic_tls_enc_level lvl = ssl_to_quic_enc_level(*level);
+
+ chunk_appendf(&trace_buf, "\n TX el=%c", quic_enc_level_char(lvl));
+ if (quic_tls_has_tx_sec(&qc->els[lvl])) {
+ tls_ctx = &qc->els[lvl].tls_ctx;
+ quic_tls_keys_hexdump(&trace_buf, &tls_ctx->tx);
+ }
+ else
+ chunk_appendf(&trace_buf, " (none)");
+ }
+
+ }
+
+ if (mask & QUIC_EV_CONN_FRMLIST) {
+ const struct list *l = a2;
+
+ if (l) {
+ const struct quic_frame *frm;
+ list_for_each_entry(frm, l, list) {
+ chunk_appendf(&trace_buf, " frm@%p", frm);
+ chunk_frm_appendf(&trace_buf, frm);
+ }
+ }
+ }
+
+ if (mask & (QUIC_EV_CONN_TXPKT|QUIC_EV_CONN_PAPKT)) {
+ const struct quic_tx_packet *pkt = a2;
+ const struct quic_enc_level *qel = a3;
+ const ssize_t *room = a4;
+
+ if (qel) {
+ const struct quic_pktns *pktns = qel->pktns;
+ chunk_appendf(&trace_buf, " qel=%c cwnd=%llu ppif=%lld pif=%llu "
+ "if=%llu pp=%u",
+ quic_enc_level_char_from_qel(qel, qc),
+ (unsigned long long)qc->path->cwnd,
+ (unsigned long long)qc->path->prep_in_flight,
+ (unsigned long long)qc->path->in_flight,
+ (unsigned long long)pktns->tx.in_flight,
+ pktns->tx.pto_probe);
+ }
+ if (pkt) {
+ const struct quic_frame *frm;
+ if (pkt->pn_node.key != (uint64_t)-1)
+ chunk_appendf(&trace_buf, " pn=%llu",(ull)pkt->pn_node.key);
+ list_for_each_entry(frm, &pkt->frms, list) {
+ chunk_appendf(&trace_buf, " frm@%p", frm);
+ chunk_frm_appendf(&trace_buf, frm);
+ }
+ }
+
+ if (room) {
+ chunk_appendf(&trace_buf, " room=%lld", (long long)*room);
+ chunk_appendf(&trace_buf, " dcid.len=%llu scid.len=%llu",
+ (unsigned long long)qc->dcid.len, (unsigned long long)qc->scid.len);
+ }
+ }
+
+ if (mask & QUIC_EV_CONN_IO_CB) {
+ const enum quic_handshake_state *state = a2;
+ const int *err = a3;
+
+ if (state)
+ chunk_appendf(&trace_buf, " state=%s", quic_hdshk_state_str(*state));
+ if (err)
+ chunk_appendf(&trace_buf, " err=%s", ssl_error_str(*err));
+ }
+
+ if (mask & (QUIC_EV_CONN_TRMHP|QUIC_EV_CONN_ELRMHP|QUIC_EV_CONN_SPKT)) {
+ const struct quic_rx_packet *pkt = a2;
+ const unsigned long *pktlen = a3;
+ const SSL *ssl = a4;
+
+ if (pkt) {
+ chunk_appendf(&trace_buf, " pkt@%p", pkt);
+ if (pkt->type == QUIC_PACKET_TYPE_SHORT && pkt->data)
+ chunk_appendf(&trace_buf, " kp=%d",
+ !!(*pkt->data & QUIC_PACKET_KEY_PHASE_BIT));
+ chunk_appendf(&trace_buf, " el=%c",
+ quic_packet_type_enc_level_char(pkt->type));
+ if (pkt->pnl)
+ chunk_appendf(&trace_buf, " pnl=%u pn=%llu", pkt->pnl,
+ (unsigned long long)pkt->pn);
+ if (pkt->token_len)
+ chunk_appendf(&trace_buf, " toklen=%llu",
+ (unsigned long long)pkt->token_len);
+ if (pkt->aad_len)
+ chunk_appendf(&trace_buf, " aadlen=%llu",
+ (unsigned long long)pkt->aad_len);
+ chunk_appendf(&trace_buf, " flags=0x%x len=%llu",
+ pkt->flags, (unsigned long long)pkt->len);
+ }
+ if (pktlen)
+ chunk_appendf(&trace_buf, " (%ld)", *pktlen);
+ if (ssl) {
+ enum ssl_encryption_level_t level = SSL_quic_read_level(ssl);
+ chunk_appendf(&trace_buf, " el=%c",
+ quic_enc_level_char(ssl_to_quic_enc_level(level)));
+ }
+ }
+
+ if (mask & (QUIC_EV_CONN_RXPKT|QUIC_EV_CONN_PRSHPKT|QUIC_EV_CONN_SSLDATA)) {
+ const struct quic_rx_packet *pkt = a2;
+ const struct quic_rx_crypto_frm *cf = a3;
+ const SSL *ssl = a4;
+
+ if (pkt)
+ chunk_appendf(&trace_buf, " pkt@%p el=%c pn=%llu", pkt,
+ quic_packet_type_enc_level_char(pkt->type),
+ (unsigned long long)pkt->pn);
+ if (cf)
+ chunk_appendf(&trace_buf, " cfoff=%llu cflen=%llu",
+ (unsigned long long)cf->offset_node.key,
+ (unsigned long long)cf->len);
+ if (ssl) {
+ enum ssl_encryption_level_t level = SSL_quic_read_level(ssl);
+ chunk_appendf(&trace_buf, " rel=%c",
+ quic_enc_level_char(ssl_to_quic_enc_level(level)));
+ }
+
+ if (qc->err.code)
+ chunk_appendf(&trace_buf, " err_code=0x%llx", (ull)qc->err.code);
+ }
+
+ if (mask & (QUIC_EV_CONN_PRSFRM|QUIC_EV_CONN_BFRM)) {
+ const struct quic_frame *frm = a2;
+
+ if (frm)
+ chunk_appendf(&trace_buf, " %s", quic_frame_type_string(frm->type));
+ }
+
+ if (mask & QUIC_EV_CONN_PHPKTS) {
+ const struct quic_enc_level *qel = a2;
+
+ if (qel) {
+ const struct quic_pktns *pktns = qel->pktns;
+ chunk_appendf(&trace_buf,
+ " qel=%c state=%s ack?%d cwnd=%llu ppif=%lld pif=%llu if=%llu pp=%u off=%llu",
+ quic_enc_level_char_from_qel(qel, qc),
+ quic_hdshk_state_str(qc->state),
+ !!(qel->pktns->flags & QUIC_FL_PKTNS_ACK_REQUIRED),
+ (unsigned long long)qc->path->cwnd,
+ (unsigned long long)qc->path->prep_in_flight,
+ (unsigned long long)qc->path->in_flight,
+ (unsigned long long)pktns->tx.in_flight,
+ pktns->tx.pto_probe,
+ qel->cstream ? (unsigned long long)qel->cstream->rx.offset : 0);
+ }
+ }
+
+ if (mask & QUIC_EV_CONN_ENCPKT) {
+ const struct enc_debug_info *edi = a2;
+
+ if (edi)
+ chunk_appendf(&trace_buf,
+ " payload=@%p payload_len=%llu"
+ " aad=@%p aad_len=%llu pn=%llu",
+ edi->payload, (unsigned long long)edi->payload_len,
+ edi->aad, (unsigned long long)edi->aad_len,
+ (unsigned long long)edi->pn);
+ }
+
+ if (mask & QUIC_EV_CONN_RMHP) {
+ const struct quic_rx_packet *pkt = a2;
+
+ if (pkt) {
+ const int *ret = a3;
+
+ chunk_appendf(&trace_buf, " pkt@%p", pkt);
+ if (ret && *ret)
+ chunk_appendf(&trace_buf, " pnl=%u pn=%llu",
+ pkt->pnl, (unsigned long long)pkt->pn);
+ }
+ }
+
+ if (mask & QUIC_EV_CONN_PRSAFRM) {
+ const struct quic_frame *frm = a2;
+ const unsigned long *val1 = a3;
+ const unsigned long *val2 = a4;
+
+ if (frm) {
+ chunk_appendf(&trace_buf, " frm@%p", frm);
+ chunk_frm_appendf(&trace_buf, frm);
+ }
+ if (val1)
+ chunk_appendf(&trace_buf, " %lu", *val1);
+ if (val2)
+ chunk_appendf(&trace_buf, "..%lu", *val2);
+ }
+
+ if (mask & QUIC_EV_CONN_ACKSTRM) {
+ const struct quic_stream *s = a2;
+ const struct qc_stream_desc *stream = a3;
+
+ if (s)
+ chunk_appendf(&trace_buf, " off=%llu len=%llu", (ull)s->offset.key, (ull)s->len);
+ if (stream)
+ chunk_appendf(&trace_buf, " ack_offset=%llu", (ull)stream->ack_offset);
+ }
+
+ if (mask & QUIC_EV_CONN_RTTUPDT) {
+ const unsigned int *rtt_sample = a2;
+ const unsigned int *ack_delay = a3;
+ const struct quic_loss *ql = a4;
+
+ if (rtt_sample)
+ chunk_appendf(&trace_buf, " rtt_sample=%ums", *rtt_sample);
+ if (ack_delay)
+ chunk_appendf(&trace_buf, " ack_delay=%ums", *ack_delay);
+ if (ql)
+ chunk_appendf(&trace_buf,
+ " srtt=%ums rttvar=%ums min_rtt=%ums",
+ ql->srtt >> 3, ql->rtt_var >> 2, ql->rtt_min);
+ }
+ if (mask & QUIC_EV_CONN_CC) {
+ const struct quic_cc_event *ev = a2;
+ const struct quic_cc *cc = a3;
+
+ if (a2)
+ quic_cc_event_trace(&trace_buf, ev);
+ if (a3)
+ quic_cc_state_trace(&trace_buf, cc);
+ }
+
+ if (mask & QUIC_EV_CONN_PKTLOSS) {
+ const struct quic_pktns *pktns = a2;
+ const struct list *lost_pkts = a3;
+
+ if (pktns) {
+ chunk_appendf(&trace_buf, " pktns=%s",
+ pktns == &qc->pktns[QUIC_TLS_PKTNS_INITIAL] ? "I" :
+ pktns == &qc->pktns[QUIC_TLS_PKTNS_01RTT] ? "01RTT": "H");
+ if (pktns->tx.loss_time)
+ chunk_appendf(&trace_buf, " loss_time=%dms",
+ TICKS_TO_MS(tick_remain(now_ms, pktns->tx.loss_time)));
+ }
+ if (lost_pkts && !LIST_ISEMPTY(lost_pkts)) {
+ struct quic_tx_packet *pkt;
+
+ chunk_appendf(&trace_buf, " lost_pkts:");
+ list_for_each_entry(pkt, lost_pkts, list)
+ chunk_appendf(&trace_buf, " %lu", (unsigned long)pkt->pn_node.key);
+ }
+ }
+
+ if (mask & (QUIC_EV_CONN_STIMER|QUIC_EV_CONN_PTIMER|QUIC_EV_CONN_SPTO)) {
+ const struct quic_pktns *pktns = a2;
+ const int *duration = a3;
+ const uint64_t *ifae_pkts = a4;
+
+ if (ifae_pkts)
+ chunk_appendf(&trace_buf, " ifae_pkts=%llu",
+ (unsigned long long)*ifae_pkts);
+ if (pktns) {
+ chunk_appendf(&trace_buf, " pktns=%s pp=%d",
+ pktns == &qc->pktns[QUIC_TLS_PKTNS_INITIAL] ? "I" :
+ pktns == &qc->pktns[QUIC_TLS_PKTNS_01RTT] ? "01RTT": "H",
+ pktns->tx.pto_probe);
+ if (mask & (QUIC_EV_CONN_STIMER|QUIC_EV_CONN_SPTO)) {
+ if (pktns->tx.in_flight)
+ chunk_appendf(&trace_buf, " if=%llu", (ull)pktns->tx.in_flight);
+ if (pktns->tx.loss_time)
+ chunk_appendf(&trace_buf, " loss_time=%dms",
+ TICKS_TO_MS(pktns->tx.loss_time - now_ms));
+ }
+ if (mask & QUIC_EV_CONN_SPTO) {
+ if (pktns->tx.time_of_last_eliciting)
+ chunk_appendf(&trace_buf, " tole=%dms",
+ TICKS_TO_MS(pktns->tx.time_of_last_eliciting - now_ms));
+ if (duration)
+ chunk_appendf(&trace_buf, " dur=%dms", TICKS_TO_MS(*duration));
+ }
+ }
+
+ if (!(mask & (QUIC_EV_CONN_SPTO|QUIC_EV_CONN_PTIMER)) && qc->timer_task) {
+ chunk_appendf(&trace_buf,
+ " expire=%dms", TICKS_TO_MS(qc->timer - now_ms));
+ }
+ }
+
+ if (mask & QUIC_EV_CONN_SPPKTS) {
+ const struct quic_tx_packet *pkt = a2;
+
+ chunk_appendf(&trace_buf, " cwnd=%llu ppif=%llu pif=%llu",
+ (unsigned long long)qc->path->cwnd,
+ (unsigned long long)qc->path->prep_in_flight,
+ (unsigned long long)qc->path->in_flight);
+ if (pkt) {
+ const struct quic_frame *frm;
+ chunk_appendf(&trace_buf, " pn=%lu(%s) iflen=%llu",
+ (unsigned long)pkt->pn_node.key,
+ pkt->pktns == &qc->pktns[QUIC_TLS_PKTNS_INITIAL] ? "I" :
+ pkt->pktns == &qc->pktns[QUIC_TLS_PKTNS_01RTT] ? "01RTT": "H",
+ (unsigned long long)pkt->in_flight_len);
+ chunk_appendf(&trace_buf, " rx.bytes=%llu tx.bytes=%llu",
+ (unsigned long long)qc->rx.bytes,
+ (unsigned long long)qc->tx.bytes);
+ list_for_each_entry(frm, &pkt->frms, list) {
+ chunk_appendf(&trace_buf, " frm@%p", frm);
+ chunk_frm_appendf(&trace_buf, frm);
+ }
+ }
+ }
+
+ if (mask & QUIC_EV_CONN_SSLALERT) {
+ const uint8_t *alert = a2;
+ const enum ssl_encryption_level_t *level = a3;
+
+ if (alert)
+ chunk_appendf(&trace_buf, " alert=0x%02x", *alert);
+ if (level)
+ chunk_appendf(&trace_buf, " el=%c",
+ quic_enc_level_char(ssl_to_quic_enc_level(*level)));
+ }
+
+ if (mask & QUIC_EV_CONN_BCFRMS) {
+ const size_t *sz1 = a2;
+ const size_t *sz2 = a3;
+ const size_t *sz3 = a4;
+
+ if (sz1)
+ chunk_appendf(&trace_buf, " %llu", (unsigned long long)*sz1);
+ if (sz2)
+ chunk_appendf(&trace_buf, " %llu", (unsigned long long)*sz2);
+ if (sz3)
+ chunk_appendf(&trace_buf, " %llu", (unsigned long long)*sz3);
+ }
+
+ if (mask & QUIC_EV_CONN_PSTRM) {
+ const struct quic_frame *frm = a2;
+
+ if (frm) {
+ chunk_appendf(&trace_buf, " frm@%p", frm);
+ chunk_frm_appendf(&trace_buf, frm);
+ }
+ }
+
+ if (mask & QUIC_EV_CONN_ELEVELSEL) {
+ const enum quic_handshake_state *state = a2;
+ const enum quic_tls_enc_level *level = a3;
+ const enum quic_tls_enc_level *next_level = a4;
+
+ if (state)
+ chunk_appendf(&trace_buf, " state=%s", quic_hdshk_state_str(qc->state));
+ if (level)
+ chunk_appendf(&trace_buf, " level=%c", quic_enc_level_char(*level));
+ if (next_level)
+ chunk_appendf(&trace_buf, " next_level=%c", quic_enc_level_char(*next_level));
+
+ }
+ }
+ if (mask & QUIC_EV_CONN_LPKT) {
+ const struct quic_rx_packet *pkt = a2;
+ const uint64_t *len = a3;
+ const struct quic_version *ver = a4;
+
+ if (pkt) {
+ chunk_appendf(&trace_buf, " pkt@%p type=0x%02x %s",
+ pkt, pkt->type, qc_pkt_long(pkt) ? "long" : "short");
+ if (pkt->pn_node.key != (uint64_t)-1)
+ chunk_appendf(&trace_buf, " pn=%llu", pkt->pn_node.key);
+ }
+
+ if (len)
+ chunk_appendf(&trace_buf, " len=%llu", (ull)*len);
+
+ if (ver)
+ chunk_appendf(&trace_buf, " ver=0x%08x", ver->num);
+ }
+
+ if (mask & QUIC_EV_STATELESS_RST) {
+ const struct quic_cid *cid = a2;
+
+ if (cid)
+ quic_cid_dump(&trace_buf, cid);
+ }
+
+}
+
+/* Returns 1 if the peer has validated <qc> QUIC connection address, 0 if not. */
+static inline int quic_peer_validated_addr(struct quic_conn *qc)
+{
+ struct quic_pktns *hdshk_pktns, *app_pktns;
+
+ if (!qc_is_listener(qc))
+ return 1;
+
+ hdshk_pktns = qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE].pktns;
+ app_pktns = qc->els[QUIC_TLS_ENC_LEVEL_APP].pktns;
+ if ((hdshk_pktns->flags & QUIC_FL_PKTNS_PKT_RECEIVED) ||
+ (app_pktns->flags & QUIC_FL_PKTNS_PKT_RECEIVED) ||
+ qc->state >= QUIC_HS_ST_COMPLETE)
+ return 1;
+
+ return 0;
+}
+
+/* To be called to kill a connection as soon as possible (without sending any packet). */
+void qc_kill_conn(struct quic_conn *qc)
+{
+ qc->flags |= QUIC_FL_CONN_TO_KILL;
+ task_wakeup(qc->idle_timer_task, TASK_WOKEN_OTHER);
+}
+
+/* Set the timer attached to the QUIC connection with <ctx> as I/O handler and used for
+ * both loss detection and PTO and schedule the task assiated to this timer if needed.
+ */
+static inline void qc_set_timer(struct quic_conn *qc)
+{
+ struct quic_pktns *pktns;
+ unsigned int pto;
+ int handshake_confirmed;
+
+ TRACE_ENTER(QUIC_EV_CONN_STIMER, qc,
+ NULL, NULL, &qc->path->ifae_pkts);
+
+ pktns = quic_loss_pktns(qc);
+ if (tick_isset(pktns->tx.loss_time)) {
+ qc->timer = pktns->tx.loss_time;
+ goto out;
+ }
+
+ /* anti-amplification: the timer must be
+ * cancelled for a server which reached the anti-amplification limit.
+ */
+ if (!quic_peer_validated_addr(qc) &&
+ (qc->flags & QUIC_FL_CONN_ANTI_AMPLIFICATION_REACHED)) {
+ TRACE_PROTO("anti-amplification reached", QUIC_EV_CONN_STIMER, qc);
+ qc->timer = TICK_ETERNITY;
+ goto out;
+ }
+
+ if (!qc->path->ifae_pkts && quic_peer_validated_addr(qc)) {
+ TRACE_PROTO("timer cancellation", QUIC_EV_CONN_STIMER, qc);
+ /* Timer cancellation. */
+ qc->timer = TICK_ETERNITY;
+ goto out;
+ }
+
+ handshake_confirmed = qc->state >= QUIC_HS_ST_CONFIRMED;
+ pktns = quic_pto_pktns(qc, handshake_confirmed, &pto);
+ if (tick_isset(pto))
+ qc->timer = pto;
+ out:
+ if (qc->timer_task) {
+ if (qc->timer == TICK_ETERNITY) {
+ qc->timer_task->expire = TICK_ETERNITY;
+ }
+ else if (tick_is_expired(qc->timer, now_ms)) {
+ TRACE_DEVEL("wakeup asap timer task", QUIC_EV_CONN_STIMER, qc);
+ task_wakeup(qc->timer_task, TASK_WOKEN_MSG);
+ }
+ else {
+ TRACE_DEVEL("timer task scheduling", QUIC_EV_CONN_STIMER, qc);
+ task_schedule(qc->timer_task, qc->timer);
+ }
+ }
+ TRACE_LEAVE(QUIC_EV_CONN_STIMER, qc, pktns);
+}
+
+/* Derive new keys and ivs required for Key Update feature for <qc> QUIC
+ * connection.
+ * Return 1 if succeeded, 0 if not.
+ */
+static int quic_tls_key_update(struct quic_conn *qc)
+{
+ struct quic_tls_ctx *tls_ctx = &qc->els[QUIC_TLS_ENC_LEVEL_APP].tls_ctx;
+ struct quic_tls_secrets *rx, *tx;
+ struct quic_tls_kp *nxt_rx = &qc->ku.nxt_rx;
+ struct quic_tls_kp *nxt_tx = &qc->ku.nxt_tx;
+ const struct quic_version *ver =
+ qc->negotiated_version ? qc->negotiated_version : qc->original_version;
+ int ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_RWSEC, qc);
+
+ tls_ctx = &qc->els[QUIC_TLS_ENC_LEVEL_APP].tls_ctx;
+ rx = &tls_ctx->rx;
+ tx = &tls_ctx->tx;
+ nxt_rx = &qc->ku.nxt_rx;
+ nxt_tx = &qc->ku.nxt_tx;
+
+ /* Prepare new RX secrets */
+ if (!quic_tls_sec_update(rx->md, ver, nxt_rx->secret, nxt_rx->secretlen,
+ rx->secret, rx->secretlen)) {
+ TRACE_ERROR("New RX secret update failed", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ if (!quic_tls_derive_keys(rx->aead, NULL, rx->md, ver,
+ nxt_rx->key, nxt_rx->keylen,
+ nxt_rx->iv, nxt_rx->ivlen, NULL, 0,
+ nxt_rx->secret, nxt_rx->secretlen)) {
+ TRACE_ERROR("New RX key derivation failed", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ /* Prepare new TX secrets */
+ if (!quic_tls_sec_update(tx->md, ver, nxt_tx->secret, nxt_tx->secretlen,
+ tx->secret, tx->secretlen)) {
+ TRACE_ERROR("New TX secret update failed", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ if (!quic_tls_derive_keys(tx->aead, NULL, tx->md, ver,
+ nxt_tx->key, nxt_tx->keylen,
+ nxt_tx->iv, nxt_tx->ivlen, NULL, 0,
+ nxt_tx->secret, nxt_tx->secretlen)) {
+ TRACE_ERROR("New TX key derivation failed", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ if (nxt_rx->ctx) {
+ EVP_CIPHER_CTX_free(nxt_rx->ctx);
+ nxt_rx->ctx = NULL;
+ }
+
+ if (!quic_tls_rx_ctx_init(&nxt_rx->ctx, tls_ctx->rx.aead, nxt_rx->key)) {
+ TRACE_ERROR("could not initial RX TLS cipher context", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ if (nxt_tx->ctx) {
+ EVP_CIPHER_CTX_free(nxt_tx->ctx);
+ nxt_tx->ctx = NULL;
+ }
+
+ if (!quic_tls_rx_ctx_init(&nxt_tx->ctx, tls_ctx->tx.aead, nxt_tx->key)) {
+ TRACE_ERROR("could not initial RX TLS cipher context", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_RWSEC, qc);
+ return ret;
+}
+
+/* Rotate the Key Update information for <qc> QUIC connection.
+ * Must be used after having updated them.
+ * Always succeeds.
+ */
+static void quic_tls_rotate_keys(struct quic_conn *qc)
+{
+ struct quic_tls_ctx *tls_ctx = &qc->els[QUIC_TLS_ENC_LEVEL_APP].tls_ctx;
+ unsigned char *curr_secret, *curr_iv, *curr_key;
+ EVP_CIPHER_CTX *curr_ctx;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT, qc);
+
+ /* Rotate the RX secrets */
+ curr_ctx = tls_ctx->rx.ctx;
+ curr_secret = tls_ctx->rx.secret;
+ curr_iv = tls_ctx->rx.iv;
+ curr_key = tls_ctx->rx.key;
+
+ tls_ctx->rx.ctx = qc->ku.nxt_rx.ctx;
+ tls_ctx->rx.secret = qc->ku.nxt_rx.secret;
+ tls_ctx->rx.iv = qc->ku.nxt_rx.iv;
+ tls_ctx->rx.key = qc->ku.nxt_rx.key;
+
+ qc->ku.nxt_rx.ctx = qc->ku.prv_rx.ctx;
+ qc->ku.nxt_rx.secret = qc->ku.prv_rx.secret;
+ qc->ku.nxt_rx.iv = qc->ku.prv_rx.iv;
+ qc->ku.nxt_rx.key = qc->ku.prv_rx.key;
+
+ qc->ku.prv_rx.ctx = curr_ctx;
+ qc->ku.prv_rx.secret = curr_secret;
+ qc->ku.prv_rx.iv = curr_iv;
+ qc->ku.prv_rx.key = curr_key;
+ qc->ku.prv_rx.pn = tls_ctx->rx.pn;
+
+ /* Update the TX secrets */
+ curr_ctx = tls_ctx->tx.ctx;
+ curr_secret = tls_ctx->tx.secret;
+ curr_iv = tls_ctx->tx.iv;
+ curr_key = tls_ctx->tx.key;
+
+ tls_ctx->tx.ctx = qc->ku.nxt_tx.ctx;
+ tls_ctx->tx.secret = qc->ku.nxt_tx.secret;
+ tls_ctx->tx.iv = qc->ku.nxt_tx.iv;
+ tls_ctx->tx.key = qc->ku.nxt_tx.key;
+
+ qc->ku.nxt_tx.ctx = curr_ctx;
+ qc->ku.nxt_tx.secret = curr_secret;
+ qc->ku.nxt_tx.iv = curr_iv;
+ qc->ku.nxt_tx.key = curr_key;
+
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT, qc);
+}
+
+/* returns 0 on error, 1 on success */
+int ha_quic_set_encryption_secrets(SSL *ssl, enum ssl_encryption_level_t level,
+ const uint8_t *read_secret,
+ const uint8_t *write_secret, size_t secret_len)
+{
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+ struct quic_tls_ctx *tls_ctx = &qc->els[ssl_to_quic_enc_level(level)].tls_ctx;
+ const SSL_CIPHER *cipher = SSL_get_current_cipher(ssl);
+ struct quic_tls_secrets *rx = NULL, *tx = NULL;
+ const struct quic_version *ver =
+ qc->negotiated_version ? qc->negotiated_version : qc->original_version;
+ int ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_RWSEC, qc);
+ BUG_ON(secret_len > QUIC_TLS_SECRET_LEN);
+
+ if (qc->flags & QUIC_FL_CONN_TO_KILL) {
+ TRACE_PROTO("connection to be killed", QUIC_EV_CONN_ADDDATA, qc);
+ goto out;
+ }
+
+ if (qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE) {
+ TRACE_PROTO("CC required", QUIC_EV_CONN_RWSEC, qc);
+ goto out;
+ }
+
+ if (!read_secret)
+ goto write;
+
+ rx = &tls_ctx->rx;
+ if (!quic_tls_secrets_keys_alloc(rx)) {
+ TRACE_ERROR("RX keys allocation failed", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ rx->aead = tls_aead(cipher);
+ rx->md = tls_md(cipher);
+ rx->hp = tls_hp(cipher);
+
+ if (!quic_tls_derive_keys(rx->aead, rx->hp, rx->md, ver, rx->key, rx->keylen,
+ rx->iv, rx->ivlen, rx->hp_key, sizeof rx->hp_key,
+ read_secret, secret_len)) {
+ TRACE_ERROR("TX key derivation failed", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ if (!quic_tls_rx_ctx_init(&rx->ctx, rx->aead, rx->key)) {
+ TRACE_ERROR("could not initial RX TLS cipher context", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ if (!quic_tls_dec_aes_ctx_init(&rx->hp_ctx, rx->hp, rx->hp_key)) {
+ TRACE_ERROR("could not initial RX TLS cipher context for HP", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ /* Enqueue this connection asap if we could derive O-RTT secrets as
+ * listener. Note that a listener derives only RX secrets for this
+ * level.
+ */
+ if (qc_is_listener(qc) && level == ssl_encryption_early_data) {
+ TRACE_DEVEL("pushing connection into accept queue", QUIC_EV_CONN_RWSEC, qc);
+ quic_accept_push_qc(qc);
+ }
+
+write:
+
+ if (!write_secret)
+ goto out;
+
+ tx = &tls_ctx->tx;
+ if (!quic_tls_secrets_keys_alloc(tx)) {
+ TRACE_ERROR("TX keys allocation failed", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ tx->aead = tls_aead(cipher);
+ tx->md = tls_md(cipher);
+ tx->hp = tls_hp(cipher);
+
+ if (!quic_tls_derive_keys(tx->aead, tx->hp, tx->md, ver, tx->key, tx->keylen,
+ tx->iv, tx->ivlen, tx->hp_key, sizeof tx->hp_key,
+ write_secret, secret_len)) {
+ TRACE_ERROR("TX key derivation failed", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ if (!quic_tls_tx_ctx_init(&tx->ctx, tx->aead, tx->key)) {
+ TRACE_ERROR("could not initial RX TLS cipher context", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ if (!quic_tls_enc_aes_ctx_init(&tx->hp_ctx, tx->hp, tx->hp_key)) {
+ TRACE_ERROR("could not initial TX TLS cipher context for HP", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ if (level == ssl_encryption_handshake && qc_is_listener(qc)) {
+ qc->enc_params_len =
+ quic_transport_params_encode(qc->enc_params,
+ qc->enc_params + sizeof qc->enc_params,
+ &qc->rx.params, ver, 1);
+ if (!qc->enc_params_len) {
+ TRACE_ERROR("quic_transport_params_encode() failed", QUIC_EV_CONN_RWSEC);
+ goto leave;
+ }
+
+ if (!SSL_set_quic_transport_params(qc->xprt_ctx->ssl, qc->enc_params, qc->enc_params_len)) {
+ TRACE_ERROR("SSL_set_quic_transport_params() failed", QUIC_EV_CONN_RWSEC);
+ goto leave;
+ }
+ }
+
+ if (level == ssl_encryption_application) {
+ struct quic_tls_kp *prv_rx = &qc->ku.prv_rx;
+ struct quic_tls_kp *nxt_rx = &qc->ku.nxt_rx;
+ struct quic_tls_kp *nxt_tx = &qc->ku.nxt_tx;
+
+ if (rx) {
+ if (!(rx->secret = pool_alloc(pool_head_quic_tls_secret))) {
+ TRACE_ERROR("Could not allocate RX Application secrete keys", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ memcpy(rx->secret, read_secret, secret_len);
+ rx->secretlen = secret_len;
+ }
+
+ if (tx) {
+ if (!(tx->secret = pool_alloc(pool_head_quic_tls_secret))) {
+ TRACE_ERROR("Could not allocate TX Application secrete keys", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ memcpy(tx->secret, write_secret, secret_len);
+ tx->secretlen = secret_len;
+ }
+
+ /* Initialize all the secret keys lengths */
+ prv_rx->secretlen = nxt_rx->secretlen = nxt_tx->secretlen = secret_len;
+ }
+
+ out:
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_RWSEC, qc, &level);
+ return ret;
+}
+
+/* This function copies the CRYPTO data provided by the TLS stack found at <data>
+ * with <len> as size in CRYPTO buffers dedicated to store the information about
+ * outgoing CRYPTO frames so that to be able to replay the CRYPTO data streams.
+ * It fails (returns 0) only if it could not managed to allocate enough CRYPTO
+ * buffers to store all the data.
+ * Note that CRYPTO data may exist at any encryption level except at 0-RTT.
+ */
+static int quic_crypto_data_cpy(struct quic_conn *qc, struct quic_enc_level *qel,
+ const unsigned char *data, size_t len)
+{
+ struct quic_crypto_buf **qcb;
+ /* The remaining byte to store in CRYPTO buffers. */
+ size_t cf_offset, cf_len, *nb_buf;
+ unsigned char *pos;
+ int ret = 0;
+
+ nb_buf = &qel->tx.crypto.nb_buf;
+ qcb = &qel->tx.crypto.bufs[*nb_buf - 1];
+ cf_offset = (*nb_buf - 1) * QUIC_CRYPTO_BUF_SZ + (*qcb)->sz;
+ cf_len = len;
+
+ TRACE_ENTER(QUIC_EV_CONN_ADDDATA, qc);
+
+ while (len) {
+ size_t to_copy, room;
+
+ pos = (*qcb)->data + (*qcb)->sz;
+ room = QUIC_CRYPTO_BUF_SZ - (*qcb)->sz;
+ to_copy = len > room ? room : len;
+ if (to_copy) {
+ memcpy(pos, data, to_copy);
+ /* Increment the total size of this CRYPTO buffers by <to_copy>. */
+ qel->tx.crypto.sz += to_copy;
+ (*qcb)->sz += to_copy;
+ len -= to_copy;
+ data += to_copy;
+ }
+ else {
+ struct quic_crypto_buf **tmp;
+
+ // FIXME: realloc!
+ tmp = realloc(qel->tx.crypto.bufs,
+ (*nb_buf + 1) * sizeof *qel->tx.crypto.bufs);
+ if (tmp) {
+ qel->tx.crypto.bufs = tmp;
+ qcb = &qel->tx.crypto.bufs[*nb_buf];
+ *qcb = pool_alloc(pool_head_quic_crypto_buf);
+ if (!*qcb) {
+ TRACE_ERROR("Could not allocate crypto buf", QUIC_EV_CONN_ADDDATA, qc);
+ goto leave;
+ }
+
+ (*qcb)->sz = 0;
+ ++*nb_buf;
+ }
+ else {
+ break;
+ }
+ }
+ }
+
+ /* Allocate a TX CRYPTO frame only if all the CRYPTO data
+ * have been buffered.
+ */
+ if (!len) {
+ struct quic_frame *frm;
+ struct quic_frame *found = NULL;
+
+ /* There is at most one CRYPTO frame in this packet number
+ * space. Let's look for it.
+ */
+ list_for_each_entry(frm, &qel->pktns->tx.frms, list) {
+ if (frm->type != QUIC_FT_CRYPTO)
+ continue;
+
+ /* Found */
+ found = frm;
+ break;
+ }
+
+ if (found) {
+ found->crypto.len += cf_len;
+ }
+ else {
+ frm = pool_zalloc(pool_head_quic_frame);
+ if (!frm) {
+ TRACE_ERROR("Could not allocate quic frame", QUIC_EV_CONN_ADDDATA, qc);
+ goto leave;
+ }
+
+ LIST_INIT(&frm->reflist);
+ frm->type = QUIC_FT_CRYPTO;
+ frm->crypto.offset = cf_offset;
+ frm->crypto.len = cf_len;
+ frm->crypto.qel = qel;
+ LIST_APPEND(&qel->pktns->tx.frms, &frm->list);
+ }
+ }
+ ret = len == 0;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_ADDDATA, qc);
+ return ret;
+}
+
+/* Prepare the emission of CONNECTION_CLOSE with error <err>. All send/receive
+ * activity for <qc> will be interrupted.
+ */
+void quic_set_connection_close(struct quic_conn *qc, const struct quic_err err)
+{
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+ if (qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE)
+ goto leave;
+
+ TRACE_STATE("setting immediate close", QUIC_EV_CONN_CLOSE, qc);
+ qc->flags |= QUIC_FL_CONN_IMMEDIATE_CLOSE;
+ qc->err.code = err.code;
+ qc->err.app = err.app;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/* Set <alert> TLS alert as QUIC CRYPTO_ERROR error */
+void quic_set_tls_alert(struct quic_conn *qc, int alert)
+{
+ TRACE_ENTER(QUIC_EV_CONN_SSLALERT, qc);
+
+ if (!(qc->flags & QUIC_FL_CONN_HALF_OPEN_CNT_DECREMENTED)) {
+ qc->flags |= QUIC_FL_CONN_HALF_OPEN_CNT_DECREMENTED;
+ TRACE_DEVEL("dec half open counter", QUIC_EV_CONN_SSLALERT, qc);
+ HA_ATOMIC_DEC(&qc->prx_counters->half_open_conn);
+ }
+ quic_set_connection_close(qc, quic_err_tls(alert));
+ qc->flags |= QUIC_FL_CONN_TLS_ALERT;
+ TRACE_STATE("Alert set", QUIC_EV_CONN_SSLALERT, qc);
+
+ TRACE_LEAVE(QUIC_EV_CONN_SSLALERT, qc);
+}
+
+/* Set the application for <qc> QUIC connection.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_set_app_ops(struct quic_conn *qc, const unsigned char *alpn, size_t alpn_len)
+{
+ if (alpn_len >= 2 && memcmp(alpn, "h3", 2) == 0)
+ qc->app_ops = &h3_ops;
+ else if (alpn_len >= 10 && memcmp(alpn, "hq-interop", 10) == 0)
+ qc->app_ops = &hq_interop_ops;
+ else
+ return 0;
+
+ return 1;
+}
+
+/* ->add_handshake_data QUIC TLS callback used by the QUIC TLS stack when it
+ * wants to provide the QUIC layer with CRYPTO data.
+ * Returns 1 if succeeded, 0 if not.
+ */
+int ha_quic_add_handshake_data(SSL *ssl, enum ssl_encryption_level_t level,
+ const uint8_t *data, size_t len)
+{
+ struct quic_conn *qc;
+ enum quic_tls_enc_level tel;
+ struct quic_enc_level *qel;
+ int ret = 0;
+
+ qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+ TRACE_ENTER(QUIC_EV_CONN_ADDDATA, qc);
+
+ if (qc->flags & QUIC_FL_CONN_TO_KILL) {
+ TRACE_PROTO("connection to be killed", QUIC_EV_CONN_ADDDATA, qc);
+ goto out;
+ }
+
+ if (qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE) {
+ TRACE_PROTO("CC required", QUIC_EV_CONN_ADDDATA, qc);
+ goto out;
+ }
+
+ tel = ssl_to_quic_enc_level(level);
+ if (tel == -1) {
+ TRACE_ERROR("Wrong encryption level", QUIC_EV_CONN_ADDDATA, qc);
+ goto leave;
+ }
+
+ qel = &qc->els[tel];
+ if (!quic_crypto_data_cpy(qc, qel, data, len)) {
+ TRACE_ERROR("Could not bufferize", QUIC_EV_CONN_ADDDATA, qc);
+ goto leave;
+ }
+
+ TRACE_DEVEL("CRYPTO data buffered", QUIC_EV_CONN_ADDDATA,
+ qc, &level, &len);
+ out:
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_ADDDATA, qc);
+ return ret;
+}
+
+int ha_quic_flush_flight(SSL *ssl)
+{
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+
+ TRACE_ENTER(QUIC_EV_CONN_FFLIGHT, qc);
+ TRACE_LEAVE(QUIC_EV_CONN_FFLIGHT, qc);
+
+ return 1;
+}
+
+int ha_quic_send_alert(SSL *ssl, enum ssl_encryption_level_t level, uint8_t alert)
+{
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+
+ TRACE_ENTER(QUIC_EV_CONN_SSLALERT, qc);
+
+ TRACE_PROTO("Received TLS alert", QUIC_EV_CONN_SSLALERT, qc, &alert, &level);
+
+ quic_set_tls_alert(qc, alert);
+ TRACE_LEAVE(QUIC_EV_CONN_SSLALERT, qc);
+ return 1;
+}
+
+/* QUIC TLS methods */
+static SSL_QUIC_METHOD ha_quic_method = {
+ .set_encryption_secrets = ha_quic_set_encryption_secrets,
+ .add_handshake_data = ha_quic_add_handshake_data,
+ .flush_flight = ha_quic_flush_flight,
+ .send_alert = ha_quic_send_alert,
+};
+
+/* Initialize the TLS context of a listener with <bind_conf> as configuration.
+ * Returns an error count.
+ */
+int ssl_quic_initial_ctx(struct bind_conf *bind_conf)
+{
+ struct ssl_bind_conf __maybe_unused *ssl_conf_cur;
+ int cfgerr = 0;
+
+ long options =
+ (SSL_OP_ALL & ~SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS) |
+ SSL_OP_SINGLE_ECDH_USE |
+ SSL_OP_CIPHER_SERVER_PREFERENCE;
+ SSL_CTX *ctx;
+
+ ctx = SSL_CTX_new(TLS_server_method());
+ bind_conf->initial_ctx = ctx;
+
+ SSL_CTX_set_options(ctx, options);
+ SSL_CTX_set_mode(ctx, SSL_MODE_RELEASE_BUFFERS);
+ SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION);
+ SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION);
+
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+# if defined(HAVE_SSL_CLIENT_HELLO_CB)
+# if defined(SSL_OP_NO_ANTI_REPLAY)
+ if (bind_conf->ssl_conf.early_data) {
+ SSL_CTX_set_options(ctx, SSL_OP_NO_ANTI_REPLAY);
+ SSL_CTX_set_max_early_data(ctx, 0xffffffff);
+ }
+# endif /* !SSL_OP_NO_ANTI_REPLAY */
+ SSL_CTX_set_client_hello_cb(ctx, ssl_sock_switchctx_cbk, NULL);
+ SSL_CTX_set_tlsext_servername_callback(ctx, ssl_sock_switchctx_err_cbk);
+# else /* ! HAVE_SSL_CLIENT_HELLO_CB */
+ SSL_CTX_set_tlsext_servername_callback(ctx, ssl_sock_switchctx_cbk);
+# endif
+ SSL_CTX_set_tlsext_servername_arg(ctx, bind_conf);
+#endif
+ SSL_CTX_set_quic_method(ctx, &ha_quic_method);
+
+ return cfgerr;
+}
+
+/* Decode an expected packet number from <truncated_on> its truncated value,
+ * depending on <largest_pn> the largest received packet number, and <pn_nbits>
+ * the number of bits used to encode this packet number (its length in bytes * 8).
+ * See https://quicwg.org/base-drafts/draft-ietf-quic-transport.html#packet-encoding
+ */
+static uint64_t decode_packet_number(uint64_t largest_pn,
+ uint32_t truncated_pn, unsigned int pn_nbits)
+{
+ uint64_t expected_pn = largest_pn + 1;
+ uint64_t pn_win = (uint64_t)1 << pn_nbits;
+ uint64_t pn_hwin = pn_win / 2;
+ uint64_t pn_mask = pn_win - 1;
+ uint64_t candidate_pn;
+
+
+ candidate_pn = (expected_pn & ~pn_mask) | truncated_pn;
+ /* Note that <pn_win> > <pn_hwin>. */
+ if (candidate_pn < QUIC_MAX_PACKET_NUM - pn_win &&
+ candidate_pn + pn_hwin <= expected_pn)
+ return candidate_pn + pn_win;
+
+ if (candidate_pn > expected_pn + pn_hwin && candidate_pn >= pn_win)
+ return candidate_pn - pn_win;
+
+ return candidate_pn;
+}
+
+/* Remove the header protection of <pkt> QUIC packet using <tls_ctx> as QUIC TLS
+ * cryptographic context.
+ * <largest_pn> is the largest received packet number and <pn> the address of
+ * the packet number field for this packet with <byte0> address of its first byte.
+ * <end> points to one byte past the end of this packet.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int qc_do_rm_hp(struct quic_conn *qc,
+ struct quic_rx_packet *pkt, struct quic_tls_ctx *tls_ctx,
+ int64_t largest_pn, unsigned char *pn, unsigned char *byte0)
+{
+ int ret, i, pnlen;
+ uint64_t packet_number;
+ uint32_t truncated_pn = 0;
+ unsigned char mask[5] = {0};
+ unsigned char *sample;
+ EVP_CIPHER_CTX *cctx = NULL;
+
+ TRACE_ENTER(QUIC_EV_CONN_RMHP, qc);
+
+ ret = 0;
+
+ /* Check there is enough data in this packet. */
+ if (pkt->len - (pn - byte0) < QUIC_PACKET_PN_MAXLEN + sizeof mask) {
+ TRACE_PROTO("too short packet", QUIC_EV_CONN_RMHP, qc, pkt);
+ goto leave;
+ }
+
+ cctx = EVP_CIPHER_CTX_new();
+ if (!cctx) {
+ TRACE_ERROR("memory allocation failed", QUIC_EV_CONN_RMHP, qc, pkt);
+ goto leave;
+ }
+
+ sample = pn + QUIC_PACKET_PN_MAXLEN;
+
+ if (!quic_tls_aes_decrypt(mask, sample, sizeof mask, tls_ctx->rx.hp_ctx)) {
+ TRACE_ERROR("HP removing failed", QUIC_EV_CONN_RMHP, qc, pkt);
+ goto leave;
+ }
+
+ *byte0 ^= mask[0] & (*byte0 & QUIC_PACKET_LONG_HEADER_BIT ? 0xf : 0x1f);
+ pnlen = (*byte0 & QUIC_PACKET_PNL_BITMASK) + 1;
+ for (i = 0; i < pnlen; i++) {
+ pn[i] ^= mask[i + 1];
+ truncated_pn = (truncated_pn << 8) | pn[i];
+ }
+
+ packet_number = decode_packet_number(largest_pn, truncated_pn, pnlen * 8);
+ /* Store remaining information for this unprotected header */
+ pkt->pn = packet_number;
+ pkt->pnl = pnlen;
+
+ ret = 1;
+ leave:
+ if (cctx)
+ EVP_CIPHER_CTX_free(cctx);
+ TRACE_LEAVE(QUIC_EV_CONN_RMHP, qc);
+ return ret;
+}
+
+/* Encrypt the payload of a QUIC packet with <pn> as number found at <payload>
+ * address, with <payload_len> as payload length, <aad> as address of
+ * the ADD and <aad_len> as AAD length depending on the <tls_ctx> QUIC TLS
+ * context.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_packet_encrypt(unsigned char *payload, size_t payload_len,
+ unsigned char *aad, size_t aad_len, uint64_t pn,
+ struct quic_tls_ctx *tls_ctx, struct quic_conn *qc)
+{
+ int ret = 0;
+ unsigned char iv[QUIC_TLS_IV_LEN];
+ unsigned char *tx_iv = tls_ctx->tx.iv;
+ size_t tx_iv_sz = tls_ctx->tx.ivlen;
+ struct enc_debug_info edi;
+
+ TRACE_ENTER(QUIC_EV_CONN_ENCPKT, qc);
+
+ if (!quic_aead_iv_build(iv, sizeof iv, tx_iv, tx_iv_sz, pn)) {
+ TRACE_ERROR("AEAD IV building for encryption failed", QUIC_EV_CONN_ENCPKT, qc);
+ goto err;
+ }
+
+ if (!quic_tls_encrypt(payload, payload_len, aad, aad_len,
+ tls_ctx->tx.ctx, tls_ctx->tx.aead, tls_ctx->tx.key, iv)) {
+ TRACE_ERROR("QUIC packet encryption failed", QUIC_EV_CONN_ENCPKT, qc);
+ goto err;
+ }
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_ENCPKT, qc);
+ return ret;
+
+ err:
+ enc_debug_info_init(&edi, payload, payload_len, aad, aad_len, pn);
+ goto leave;
+}
+
+/* Decrypt <pkt> QUIC packet with <tls_ctx> as QUIC TLS cryptographic context.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int qc_pkt_decrypt(struct quic_rx_packet *pkt, struct quic_enc_level *qel,
+ struct quic_conn *qc)
+{
+ int ret, kp_changed;
+ unsigned char iv[QUIC_TLS_IV_LEN];
+ struct quic_tls_ctx *tls_ctx = &qel->tls_ctx;
+ EVP_CIPHER_CTX *rx_ctx = tls_ctx->rx.ctx;
+ unsigned char *rx_iv = tls_ctx->rx.iv;
+ size_t rx_iv_sz = tls_ctx->rx.ivlen;
+ unsigned char *rx_key = tls_ctx->rx.key;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT, qc);
+
+ ret = 0;
+ kp_changed = 0;
+
+ if (pkt->type == QUIC_PACKET_TYPE_SHORT) {
+ /* The two tested bits are not at the same position,
+ * this is why they are first both inversed.
+ */
+ if (!(*pkt->data & QUIC_PACKET_KEY_PHASE_BIT) ^ !(tls_ctx->flags & QUIC_FL_TLS_KP_BIT_SET)) {
+ if (pkt->pn < tls_ctx->rx.pn) {
+ /* The lowest packet number of a previous key phase
+ * cannot be null if it really stores previous key phase
+ * secrets.
+ */
+ // TODO: check if BUG_ON() more suitable
+ if (!pkt->qc->ku.prv_rx.pn) {
+ TRACE_ERROR("null previous packet number", QUIC_EV_CONN_RXPKT, qc);
+ goto leave;
+ }
+
+ rx_ctx = pkt->qc->ku.prv_rx.ctx;
+ rx_iv = pkt->qc->ku.prv_rx.iv;
+ rx_key = pkt->qc->ku.prv_rx.key;
+ }
+ else if (pkt->pn > qel->pktns->rx.largest_pn) {
+ /* Next key phase */
+ kp_changed = 1;
+ rx_ctx = pkt->qc->ku.nxt_rx.ctx;
+ rx_iv = pkt->qc->ku.nxt_rx.iv;
+ rx_key = pkt->qc->ku.nxt_rx.key;
+ }
+ }
+ }
+
+ if (!quic_aead_iv_build(iv, sizeof iv, rx_iv, rx_iv_sz, pkt->pn)) {
+ TRACE_ERROR("quic_aead_iv_build() failed", QUIC_EV_CONN_RXPKT, qc);
+ goto leave;
+ }
+
+ ret = quic_tls_decrypt(pkt->data + pkt->aad_len, pkt->len - pkt->aad_len,
+ pkt->data, pkt->aad_len,
+ rx_ctx, tls_ctx->rx.aead, rx_key, iv);
+ if (!ret) {
+ TRACE_ERROR("quic_tls_decrypt() failed", QUIC_EV_CONN_RXPKT, qc);
+ goto leave;
+ }
+
+ /* Update the keys only if the packet decryption succeeded. */
+ if (kp_changed) {
+ quic_tls_rotate_keys(pkt->qc);
+ /* Toggle the Key Phase bit */
+ tls_ctx->flags ^= QUIC_FL_TLS_KP_BIT_SET;
+ /* Store the lowest packet number received for the current key phase */
+ tls_ctx->rx.pn = pkt->pn;
+ /* Prepare the next key update */
+ if (!quic_tls_key_update(pkt->qc)) {
+ TRACE_ERROR("quic_tls_key_update() failed", QUIC_EV_CONN_RXPKT, qc);
+ goto leave;
+ }
+ }
+
+ /* Update the packet length (required to parse the frames). */
+ pkt->len -= QUIC_TLS_TAG_LEN;
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT, qc);
+ return ret;
+}
+
+
+/* Remove references to <frm> frame */
+static void qc_frm_unref(struct quic_conn *qc, struct quic_frame *frm)
+{
+ struct quic_frame *f, *tmp;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ list_for_each_entry_safe(f, tmp, &frm->reflist, ref) {
+ f->origin = NULL;
+ LIST_DELETE(&f->ref);
+ if (f->pkt) {
+ TRACE_DEVEL("remove frame reference",
+ QUIC_EV_CONN_PRSAFRM, qc, f, &f->pkt->pn_node.key);
+ }
+ else {
+ TRACE_DEVEL("remove frame reference for unsent frame",
+ QUIC_EV_CONN_PRSAFRM, qc, f);
+ }
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+}
+
+/* Release <frm> frame and mark its copies as acknowledged */
+void qc_release_frm(struct quic_conn *qc, struct quic_frame *frm)
+{
+ uint64_t pn;
+ struct quic_frame *origin, *f, *tmp;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ /* Identify this frame: a frame copy or one of its copies */
+ origin = frm->origin ? frm->origin : frm;
+ /* Ensure the source of the copies is flagged as acked, <frm> being
+ * possibly a copy of <origin>
+ */
+ origin->flags |= QUIC_FL_TX_FRAME_ACKED;
+ /* Mark all the copy of <origin> as acknowledged. We must
+ * not release the packets (releasing the frames) at this time as
+ * they are possibly also to be acknowledged alongside the
+ * the current one.
+ */
+ list_for_each_entry_safe(f, tmp, &origin->reflist, ref) {
+ if (f->pkt) {
+ f->flags |= QUIC_FL_TX_FRAME_ACKED;
+ f->origin = NULL;
+ LIST_DELETE(&f->ref);
+ pn = f->pkt->pn_node.key;
+ TRACE_DEVEL("mark frame as acked from packet",
+ QUIC_EV_CONN_PRSAFRM, qc, f, &pn);
+ }
+ else {
+ TRACE_DEVEL("freeing unsent frame",
+ QUIC_EV_CONN_PRSAFRM, qc, f);
+ LIST_DELETE(&f->ref);
+ LIST_DELETE(&f->list);
+ pool_free(pool_head_quic_frame, f);
+ }
+ }
+ LIST_DELETE(&frm->list);
+ pn = frm->pkt->pn_node.key;
+ quic_tx_packet_refdec(frm->pkt);
+ TRACE_DEVEL("freeing frame from packet",
+ QUIC_EV_CONN_PRSAFRM, qc, frm, &pn);
+ pool_free(pool_head_quic_frame, frm);
+
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+}
+
+/* Schedule a CONNECTION_CLOSE emission on <qc> if the MUX has been released
+ * and all STREAM data are acknowledged. The MUX is responsible to have set
+ * <qc.err> before as it is reused for the CONNECTION_CLOSE frame.
+ *
+ * TODO this should also be called on lost packet detection
+ */
+void qc_check_close_on_released_mux(struct quic_conn *qc)
+{
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ if (qc->mux_state == QC_MUX_RELEASED && eb_is_empty(&qc->streams_by_id)) {
+ /* Reuse errcode which should have been previously set by the MUX on release. */
+ quic_set_connection_close(qc, qc->err);
+ tasklet_wakeup(qc->wait_event.tasklet);
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/* Remove from <stream> the acknowledged frames.
+ *
+ * Returns 1 if at least one frame was removed else 0.
+ */
+static int quic_stream_try_to_consume(struct quic_conn *qc,
+ struct qc_stream_desc *stream)
+{
+ int ret;
+ struct eb64_node *frm_node;
+
+ TRACE_ENTER(QUIC_EV_CONN_ACKSTRM, qc);
+
+ ret = 0;
+ frm_node = eb64_first(&stream->acked_frms);
+ while (frm_node) {
+ struct quic_stream *strm;
+ struct quic_frame *frm;
+ size_t offset, len;
+
+ strm = eb64_entry(frm_node, struct quic_stream, offset);
+ offset = strm->offset.key;
+ len = strm->len;
+
+ if (offset > stream->ack_offset)
+ break;
+
+ if (qc_stream_desc_ack(&stream, offset, len)) {
+ /* cf. next comment : frame may be freed at this stage. */
+ TRACE_DEVEL("stream consumed", QUIC_EV_CONN_ACKSTRM,
+ qc, stream ? strm : NULL, stream);
+ ret = 1;
+ }
+
+ /* If stream is NULL after qc_stream_desc_ack(), it means frame
+ * has been freed. with the stream frames tree. Nothing to do
+ * anymore in here.
+ */
+ if (!stream) {
+ qc_check_close_on_released_mux(qc);
+ ret = 1;
+ goto leave;
+ }
+
+ frm_node = eb64_next(frm_node);
+ eb64_delete(&strm->offset);
+
+ frm = container_of(strm, struct quic_frame, stream);
+ qc_release_frm(qc, frm);
+ }
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_ACKSTRM, qc);
+ return ret;
+}
+
+/* Treat <frm> frame whose packet it is attached to has just been acknowledged. */
+static inline void qc_treat_acked_tx_frm(struct quic_conn *qc,
+ struct quic_frame *frm)
+{
+ int stream_acked;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc, frm);
+
+ stream_acked = 0;
+ switch (frm->type) {
+ case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
+ {
+ struct quic_stream *strm_frm = &frm->stream;
+ struct eb64_node *node = NULL;
+ struct qc_stream_desc *stream = NULL;
+ const size_t offset = strm_frm->offset.key;
+ const size_t len = strm_frm->len;
+
+ /* do not use strm_frm->stream as the qc_stream_desc instance
+ * might be freed at this stage. Use the id to do a proper
+ * lookup.
+ *
+ * TODO if lookup operation impact on the perf is noticeable,
+ * implement a refcount on qc_stream_desc instances.
+ */
+ node = eb64_lookup(&qc->streams_by_id, strm_frm->id);
+ if (!node) {
+ TRACE_DEVEL("acked stream for released stream", QUIC_EV_CONN_ACKSTRM, qc, strm_frm);
+ qc_release_frm(qc, frm);
+ /* early return */
+ goto leave;
+ }
+ stream = eb64_entry(node, struct qc_stream_desc, by_id);
+
+ TRACE_DEVEL("acked stream", QUIC_EV_CONN_ACKSTRM, qc, strm_frm, stream);
+ if (offset <= stream->ack_offset) {
+ if (qc_stream_desc_ack(&stream, offset, len)) {
+ stream_acked = 1;
+ TRACE_DEVEL("stream consumed", QUIC_EV_CONN_ACKSTRM,
+ qc, strm_frm, stream);
+ }
+
+ if (!stream) {
+ /* no need to continue if stream freed. */
+ TRACE_DEVEL("stream released and freed", QUIC_EV_CONN_ACKSTRM, qc);
+ qc_release_frm(qc, frm);
+ qc_check_close_on_released_mux(qc);
+ break;
+ }
+
+ TRACE_DEVEL("stream consumed", QUIC_EV_CONN_ACKSTRM,
+ qc, strm_frm, stream);
+ qc_release_frm(qc, frm);
+ }
+ else {
+ eb64_insert(&stream->acked_frms, &strm_frm->offset);
+ }
+
+ stream_acked |= quic_stream_try_to_consume(qc, stream);
+ }
+ break;
+ default:
+ qc_release_frm(qc, frm);
+ }
+
+ if (stream_acked) {
+ if (qc->subs && qc->subs->events & SUB_RETRY_SEND) {
+ tasklet_wakeup(qc->subs->tasklet);
+ qc->subs->events &= ~SUB_RETRY_SEND;
+ if (!qc->subs->events)
+ qc->subs = NULL;
+ }
+ }
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+}
+
+/* Remove <largest> down to <smallest> node entries from <pkts> tree of TX packet,
+ * deallocating them, and their TX frames.
+ * Returns the last node reached to be used for the next range.
+ * May be NULL if <largest> node could not be found.
+ */
+static inline struct eb64_node *qc_ackrng_pkts(struct quic_conn *qc,
+ struct eb_root *pkts,
+ unsigned int *pkt_flags,
+ struct list *newly_acked_pkts,
+ struct eb64_node *largest_node,
+ uint64_t largest, uint64_t smallest)
+{
+ struct eb64_node *node;
+ struct quic_tx_packet *pkt;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ node = largest_node ? largest_node : eb64_lookup_le(pkts, largest);
+ while (node && node->key >= smallest) {
+ struct quic_frame *frm, *frmbak;
+
+ pkt = eb64_entry(node, struct quic_tx_packet, pn_node);
+ *pkt_flags |= pkt->flags;
+ LIST_INSERT(newly_acked_pkts, &pkt->list);
+ TRACE_DEVEL("Removing packet #", QUIC_EV_CONN_PRSAFRM, qc, NULL, &pkt->pn_node.key);
+ list_for_each_entry_safe(frm, frmbak, &pkt->frms, list)
+ qc_treat_acked_tx_frm(qc, frm);
+ /* If there are others packet in the same datagram <pkt> is attached to,
+ * detach the previous one and the next one from <pkt>.
+ */
+ quic_tx_packet_dgram_detach(pkt);
+ node = eb64_prev(node);
+ eb64_delete(&pkt->pn_node);
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+ return node;
+}
+
+/* Remove all frames from <pkt_frm_list> and reinsert them in the
+ * same order they have been sent into <pktns_frm_list>.
+ */
+static inline void qc_requeue_nacked_pkt_tx_frms(struct quic_conn *qc,
+ struct quic_tx_packet *pkt,
+ struct list *pktns_frm_list)
+{
+ struct quic_frame *frm, *frmbak;
+ struct list tmp = LIST_HEAD_INIT(tmp);
+ struct list *pkt_frm_list = &pkt->frms;
+ uint64_t pn = pkt->pn_node.key;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ list_for_each_entry_safe(frm, frmbak, pkt_frm_list, list) {
+ /* First remove this frame from the packet it was attached to */
+ LIST_DELETE(&frm->list);
+ quic_tx_packet_refdec(pkt);
+ /* At this time, this frame is not freed but removed from its packet */
+ frm->pkt = NULL;
+ /* Remove any reference to this frame */
+ qc_frm_unref(qc, frm);
+ switch (frm->type) {
+ case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
+ {
+ struct quic_stream *strm_frm = &frm->stream;
+ struct eb64_node *node = NULL;
+ struct qc_stream_desc *stream_desc;
+
+ node = eb64_lookup(&qc->streams_by_id, strm_frm->id);
+ if (!node) {
+ TRACE_DEVEL("released stream", QUIC_EV_CONN_PRSAFRM, qc, frm);
+ TRACE_DEVEL("freeing frame from packet", QUIC_EV_CONN_PRSAFRM,
+ qc, frm, &pn);
+ pool_free(pool_head_quic_frame, frm);
+ continue;
+ }
+
+ stream_desc = eb64_entry(node, struct qc_stream_desc, by_id);
+ /* Do not resend this frame if in the "already acked range" */
+ if (strm_frm->offset.key + strm_frm->len <= stream_desc->ack_offset) {
+ TRACE_DEVEL("ignored frame in already acked range",
+ QUIC_EV_CONN_PRSAFRM, qc, frm);
+ pool_free(pool_head_quic_frame, frm);
+ continue;
+ }
+ else if (strm_frm->offset.key < stream_desc->ack_offset) {
+ uint64_t diff = stream_desc->ack_offset - strm_frm->offset.key;
+
+ qc_stream_frm_mv_fwd(frm, diff);
+ TRACE_DEVEL("updated partially acked frame",
+ QUIC_EV_CONN_PRSAFRM, qc, frm);
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ /* Do not resend probing packet with old data */
+ if (pkt->flags & QUIC_FL_TX_PACKET_PROBE_WITH_OLD_DATA) {
+ TRACE_DEVEL("ignored frame with old data from packet", QUIC_EV_CONN_PRSAFRM,
+ qc, frm, &pn);
+ if (frm->origin)
+ LIST_DELETE(&frm->ref);
+ pool_free(pool_head_quic_frame, frm);
+ continue;
+ }
+
+ if (frm->flags & QUIC_FL_TX_FRAME_ACKED) {
+ TRACE_DEVEL("already acked frame", QUIC_EV_CONN_PRSAFRM, qc, frm);
+ TRACE_DEVEL("freeing frame from packet", QUIC_EV_CONN_PRSAFRM,
+ qc, frm, &pn);
+ pool_free(pool_head_quic_frame, frm);
+ }
+ else {
+ LIST_APPEND(&tmp, &frm->list);
+ TRACE_DEVEL("frame requeued", QUIC_EV_CONN_PRSAFRM, qc, frm);
+ }
+ }
+
+ LIST_SPLICE(pktns_frm_list, &tmp);
+
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+}
+
+/* Free <pkt> TX packet and its attached frames.
+ * This is the responsibility of the caller to remove this packet of
+ * any data structure it was possibly attached to.
+ */
+static inline void free_quic_tx_packet(struct quic_conn *qc,
+ struct quic_tx_packet *pkt)
+{
+ struct quic_frame *frm, *frmbak;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ if (!pkt)
+ goto leave;
+
+ list_for_each_entry_safe(frm, frmbak, &pkt->frms, list) {
+ LIST_DELETE(&frm->list);
+ pool_free(pool_head_quic_frame, frm);
+ }
+ pool_free(pool_head_quic_tx_packet, pkt);
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+}
+
+/* Free the TX packets of <pkts> list */
+static inline void free_quic_tx_pkts(struct quic_conn *qc, struct list *pkts)
+{
+ struct quic_tx_packet *pkt, *tmp;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ list_for_each_entry_safe(pkt, tmp, pkts, list) {
+ LIST_DELETE(&pkt->list);
+ eb64_delete(&pkt->pn_node);
+ free_quic_tx_packet(qc, pkt);
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+}
+
+/* Remove already sent ranges of acknowledged packet numbers from
+ * <pktns> packet number space tree below <largest_acked_pn> possibly
+ * updating the range which contains <largest_acked_pn>.
+ * Never fails.
+ */
+static void qc_treat_ack_of_ack(struct quic_conn *qc,
+ struct quic_pktns *pktns,
+ int64_t largest_acked_pn)
+{
+ struct eb64_node *ar, *next_ar;
+ struct quic_arngs *arngs = &pktns->rx.arngs;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ ar = eb64_first(&arngs->root);
+ while (ar) {
+ struct quic_arng_node *ar_node;
+
+ next_ar = eb64_next(ar);
+ ar_node = eb64_entry(ar, struct quic_arng_node, first);
+
+ if ((int64_t)ar_node->first.key > largest_acked_pn) {
+ TRACE_DEVEL("first.key > largest", QUIC_EV_CONN_PRSAFRM, qc);
+ break;
+ }
+
+ if (largest_acked_pn < ar_node->last) {
+ eb64_delete(ar);
+ ar_node->first.key = largest_acked_pn + 1;
+ eb64_insert(&arngs->root, ar);
+ break;
+ }
+
+ eb64_delete(ar);
+ pool_free(pool_head_quic_arng, ar_node);
+ arngs->sz--;
+ ar = next_ar;
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+}
+
+/* Send a packet ack event nofication for each newly acked packet of
+ * <newly_acked_pkts> list and free them.
+ * Always succeeds.
+ */
+static inline void qc_treat_newly_acked_pkts(struct quic_conn *qc,
+ struct list *newly_acked_pkts)
+{
+ struct quic_tx_packet *pkt, *tmp;
+ struct quic_cc_event ev = { .type = QUIC_CC_EVT_ACK, };
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ list_for_each_entry_safe(pkt, tmp, newly_acked_pkts, list) {
+ pkt->pktns->tx.in_flight -= pkt->in_flight_len;
+ qc->path->prep_in_flight -= pkt->in_flight_len;
+ qc->path->in_flight -= pkt->in_flight_len;
+ if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING)
+ qc->path->ifae_pkts--;
+ /* If this packet contained an ACK frame, proceed to the
+ * acknowledging of range of acks from the largest acknowledged
+ * packet number which was sent in an ACK frame by this packet.
+ */
+ if (pkt->largest_acked_pn != -1)
+ qc_treat_ack_of_ack(qc, pkt->pktns, pkt->largest_acked_pn);
+ ev.ack.acked = pkt->in_flight_len;
+ ev.ack.time_sent = pkt->time_sent;
+ quic_cc_event(&qc->path->cc, &ev);
+ LIST_DELETE(&pkt->list);
+ eb64_delete(&pkt->pn_node);
+ quic_tx_packet_refdec(pkt);
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+
+}
+
+/* Release all the frames attached to <pktns> packet number space */
+static inline void qc_release_pktns_frms(struct quic_conn *qc,
+ struct quic_pktns *pktns)
+{
+ struct quic_frame *frm, *frmbak;
+
+ TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc);
+
+ list_for_each_entry_safe(frm, frmbak, &pktns->tx.frms, list) {
+ LIST_DELETE(&frm->list);
+ pool_free(pool_head_quic_frame, frm);
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, qc);
+}
+
+/* Handle <pkts> list of lost packets detected at <now_us> handling
+ * their TX frames.
+ * Send a packet loss event to the congestion controller if
+ * in flight packet have been lost.
+ * Also frees the packet in <pkts> list.
+ * Never fails.
+ */
+static inline void qc_release_lost_pkts(struct quic_conn *qc,
+ struct quic_pktns *pktns,
+ struct list *pkts,
+ uint64_t now_us)
+{
+ struct quic_tx_packet *pkt, *tmp, *oldest_lost, *newest_lost;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ if (LIST_ISEMPTY(pkts))
+ goto leave;
+
+ oldest_lost = newest_lost = NULL;
+ list_for_each_entry_safe(pkt, tmp, pkts, list) {
+ struct list tmp = LIST_HEAD_INIT(tmp);
+
+ pkt->pktns->tx.in_flight -= pkt->in_flight_len;
+ qc->path->prep_in_flight -= pkt->in_flight_len;
+ qc->path->in_flight -= pkt->in_flight_len;
+ if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING)
+ qc->path->ifae_pkts--;
+ /* Treat the frames of this lost packet. */
+ qc_requeue_nacked_pkt_tx_frms(qc, pkt, &pktns->tx.frms);
+ LIST_DELETE(&pkt->list);
+ if (!oldest_lost) {
+ oldest_lost = newest_lost = pkt;
+ }
+ else {
+ if (newest_lost != oldest_lost)
+ quic_tx_packet_refdec(newest_lost);
+ newest_lost = pkt;
+ }
+ }
+
+ if (newest_lost) {
+ /* Sent a congestion event to the controller */
+ struct quic_cc_event ev = { };
+
+ ev.type = QUIC_CC_EVT_LOSS;
+ ev.loss.time_sent = newest_lost->time_sent;
+
+ quic_cc_event(&qc->path->cc, &ev);
+ }
+
+ /* If an RTT have been already sampled, <rtt_min> has been set.
+ * We must check if we are experiencing a persistent congestion.
+ * If this is the case, the congestion controller must re-enter
+ * slow start state.
+ */
+ if (qc->path->loss.rtt_min && newest_lost != oldest_lost) {
+ unsigned int period = newest_lost->time_sent - oldest_lost->time_sent;
+
+ if (quic_loss_persistent_congestion(&qc->path->loss, period,
+ now_ms, qc->max_ack_delay))
+ qc->path->cc.algo->slow_start(&qc->path->cc);
+ }
+
+ /* <oldest_lost> cannot be NULL at this stage because we have ensured
+ * that <pkts> list is not empty. Without this, GCC 12.2.0 reports a
+ * possible overflow on a 0 byte region with O2 optimization.
+ */
+ ALREADY_CHECKED(oldest_lost);
+ quic_tx_packet_refdec(oldest_lost);
+ if (newest_lost != oldest_lost)
+ quic_tx_packet_refdec(newest_lost);
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+}
+
+/* Parse ACK frame into <frm> from a buffer at <buf> address with <end> being at
+ * one byte past the end of this buffer. Also update <rtt_sample> if needed, i.e.
+ * if the largest acked packet was newly acked and if there was at least one newly
+ * acked ack-eliciting packet.
+ * Return 1, if succeeded, 0 if not.
+ */
+static inline int qc_parse_ack_frm(struct quic_conn *qc,
+ struct quic_frame *frm,
+ struct quic_enc_level *qel,
+ unsigned int *rtt_sample,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct quic_ack *ack = &frm->ack;
+ uint64_t smallest, largest;
+ struct eb_root *pkts;
+ struct eb64_node *largest_node;
+ unsigned int time_sent, pkt_flags;
+ struct list newly_acked_pkts = LIST_HEAD_INIT(newly_acked_pkts);
+ struct list lost_pkts = LIST_HEAD_INIT(lost_pkts);
+ int ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ if (ack->largest_ack > qel->pktns->tx.next_pn) {
+ TRACE_DEVEL("ACK for not sent packet", QUIC_EV_CONN_PRSAFRM,
+ qc, NULL, &ack->largest_ack);
+ goto err;
+ }
+
+ if (ack->first_ack_range > ack->largest_ack) {
+ TRACE_DEVEL("too big first ACK range", QUIC_EV_CONN_PRSAFRM,
+ qc, NULL, &ack->first_ack_range);
+ goto err;
+ }
+
+ largest = ack->largest_ack;
+ smallest = largest - ack->first_ack_range;
+ pkts = &qel->pktns->tx.pkts;
+ pkt_flags = 0;
+ largest_node = NULL;
+ time_sent = 0;
+
+ if ((int64_t)ack->largest_ack > qel->pktns->rx.largest_acked_pn) {
+ largest_node = eb64_lookup(pkts, largest);
+ if (!largest_node) {
+ TRACE_DEVEL("Largest acked packet not found",
+ QUIC_EV_CONN_PRSAFRM, qc);
+ }
+ else {
+ time_sent = eb64_entry(largest_node,
+ struct quic_tx_packet, pn_node)->time_sent;
+ }
+ }
+
+ TRACE_PROTO("rcvd ack range", QUIC_EV_CONN_PRSAFRM,
+ qc, NULL, &largest, &smallest);
+ do {
+ uint64_t gap, ack_range;
+
+ qc_ackrng_pkts(qc, pkts, &pkt_flags, &newly_acked_pkts,
+ largest_node, largest, smallest);
+ if (!ack->ack_range_num--)
+ break;
+
+ if (!quic_dec_int(&gap, pos, end)) {
+ TRACE_ERROR("quic_dec_int(gap) failed", QUIC_EV_CONN_PRSAFRM, qc);
+ goto err;
+ }
+
+ if (smallest < gap + 2) {
+ TRACE_DEVEL("wrong gap value", QUIC_EV_CONN_PRSAFRM,
+ qc, NULL, &gap, &smallest);
+ goto err;
+ }
+
+ largest = smallest - gap - 2;
+ if (!quic_dec_int(&ack_range, pos, end)) {
+ TRACE_ERROR("quic_dec_int(ack_range) failed", QUIC_EV_CONN_PRSAFRM, qc);
+ goto err;
+ }
+
+ if (largest < ack_range) {
+ TRACE_DEVEL("wrong ack range value", QUIC_EV_CONN_PRSAFRM,
+ qc, NULL, &largest, &ack_range);
+ goto err;
+ }
+
+ /* Do not use this node anymore. */
+ largest_node = NULL;
+ /* Next range */
+ smallest = largest - ack_range;
+
+ TRACE_PROTO("rcvd next ack range", QUIC_EV_CONN_PRSAFRM,
+ qc, NULL, &largest, &smallest);
+ } while (1);
+
+ if (time_sent && (pkt_flags & QUIC_FL_TX_PACKET_ACK_ELICITING)) {
+ *rtt_sample = tick_remain(time_sent, now_ms);
+ qel->pktns->rx.largest_acked_pn = ack->largest_ack;
+ }
+
+ if (!LIST_ISEMPTY(&newly_acked_pkts)) {
+ if (!eb_is_empty(&qel->pktns->tx.pkts)) {
+ qc_packet_loss_lookup(qel->pktns, qc, &lost_pkts);
+ qc_release_lost_pkts(qc, qel->pktns, &lost_pkts, now_ms);
+ }
+ qc_treat_newly_acked_pkts(qc, &newly_acked_pkts);
+ if (quic_peer_validated_addr(qc))
+ qc->path->loss.pto_count = 0;
+ qc_set_timer(qc);
+ }
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+ return ret;
+
+ err:
+ free_quic_tx_pkts(qc, &newly_acked_pkts);
+ goto leave;
+}
+
+/* This function gives the detail of the SSL error. It is used only
+ * if the debug mode and the verbose mode are activated. It dump all
+ * the SSL error until the stack was empty.
+ */
+static forceinline void qc_ssl_dump_errors(struct connection *conn)
+{
+ if (unlikely(global.mode & MODE_DEBUG)) {
+ while (1) {
+ const char *func = NULL;
+ unsigned long ret;
+
+ ERR_peek_error_func(&func);
+ ret = ERR_get_error();
+ if (!ret)
+ return;
+
+ fprintf(stderr, "conn. @%p OpenSSL error[0x%lx] %s: %s\n", conn, ret,
+ func, ERR_reason_error_string(ret));
+ }
+ }
+}
+
+int ssl_sock_get_alpn(const struct connection *conn, void *xprt_ctx,
+ const char **str, int *len);
+
+/* Finalize <qc> QUIC connection:
+ * - initialize the Initial QUIC TLS context for negotiated version,
+ * - derive the secrets for this context,
+ * - set them into the TLS stack,
+ *
+ * MUST be called after having received the remote transport parameters which
+ * are parsed when the TLS callback for the ClientHello message is called upon
+ * SSL_do_handshake() calls, not necessarily at the first time as this TLS
+ * message may be splitted between packets
+ * Return 1 if succeeded, 0 if not.
+ */
+static int qc_conn_finalize(struct quic_conn *qc, int server)
+{
+ int ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_NEW, qc);
+
+ if (qc->flags & QUIC_FL_CONN_FINALIZED)
+ goto finalized;
+
+ if (qc->negotiated_version &&
+ !qc_new_isecs(qc, &qc->negotiated_ictx, qc->negotiated_version,
+ qc->odcid.data, qc->odcid.len, server))
+ goto out;
+
+ /* This connection is functional (ready to send/receive) */
+ qc->flags |= QUIC_FL_CONN_FINALIZED;
+
+ finalized:
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
+ return ret;
+}
+
+/* Provide CRYPTO data to the TLS stack found at <data> with <len> as length
+ * from <qel> encryption level with <ctx> as QUIC connection context.
+ * Remaining parameter are there for debugging purposes.
+ * Return 1 if succeeded, 0 if not.
+ */
+static inline int qc_provide_cdata(struct quic_enc_level *el,
+ struct ssl_sock_ctx *ctx,
+ const unsigned char *data, size_t len,
+ struct quic_rx_packet *pkt,
+ struct quic_rx_crypto_frm *cf)
+{
+#ifdef DEBUG_STRICT
+ enum ncb_ret ncb_ret;
+#endif
+ int ssl_err, state;
+ struct quic_conn *qc;
+ int ret = 0;
+ struct ncbuf *ncbuf = &el->cstream->rx.ncbuf;
+
+ ssl_err = SSL_ERROR_NONE;
+ qc = ctx->qc;
+
+ TRACE_ENTER(QUIC_EV_CONN_SSLDATA, qc);
+
+ if (SSL_provide_quic_data(ctx->ssl, el->level, data, len) != 1) {
+ TRACE_ERROR("SSL_provide_quic_data() error",
+ QUIC_EV_CONN_SSLDATA, qc, pkt, cf, ctx->ssl);
+ goto leave;
+ }
+
+ TRACE_PROTO("in order CRYPTO data",
+ QUIC_EV_CONN_SSLDATA, qc, NULL, cf, ctx->ssl);
+
+ state = qc->state;
+ if (state < QUIC_HS_ST_COMPLETE) {
+ ssl_err = SSL_do_handshake(ctx->ssl);
+
+ if (qc->flags & QUIC_FL_CONN_TO_KILL) {
+ TRACE_DEVEL("connection to be killed", QUIC_EV_CONN_IO_CB, qc);
+ goto leave;
+ }
+
+ /* Finalize the connection as soon as possible if the peer transport parameters
+ * have been received. This may be useful to send packets even if this
+ * handshake fails.
+ */
+ if ((qc->flags & QUIC_FL_CONN_TX_TP_RECEIVED) && !qc_conn_finalize(qc, 1)) {
+ TRACE_ERROR("connection finalization failed", QUIC_EV_CONN_IO_CB, qc, &state);
+ goto leave;
+ }
+
+ if (ssl_err != 1) {
+ ssl_err = SSL_get_error(ctx->ssl, ssl_err);
+ if (ssl_err == SSL_ERROR_WANT_READ || ssl_err == SSL_ERROR_WANT_WRITE) {
+ TRACE_PROTO("SSL handshake in progress",
+ QUIC_EV_CONN_IO_CB, qc, &state, &ssl_err);
+ goto out;
+ }
+
+ /* TODO: Should close the connection asap */
+ if (!(qc->flags & QUIC_FL_CONN_HALF_OPEN_CNT_DECREMENTED)) {
+ qc->flags |= QUIC_FL_CONN_HALF_OPEN_CNT_DECREMENTED;
+ HA_ATOMIC_DEC(&qc->prx_counters->half_open_conn);
+ HA_ATOMIC_INC(&qc->prx_counters->hdshk_fail);
+ }
+ TRACE_ERROR("SSL handshake error", QUIC_EV_CONN_IO_CB, qc, &state, &ssl_err);
+ qc_ssl_dump_errors(ctx->conn);
+ ERR_clear_error();
+ goto leave;
+ }
+
+ TRACE_PROTO("SSL handshake OK", QUIC_EV_CONN_IO_CB, qc, &state);
+
+ /* Check the alpn could be negotiated */
+ if (!qc->app_ops) {
+ TRACE_ERROR("No negotiated ALPN", QUIC_EV_CONN_IO_CB, qc, &state);
+ quic_set_tls_alert(qc, SSL_AD_NO_APPLICATION_PROTOCOL);
+ goto leave;
+ }
+
+ if (!(qc->flags & QUIC_FL_CONN_HALF_OPEN_CNT_DECREMENTED)) {
+ TRACE_DEVEL("dec half open counter", QUIC_EV_CONN_IO_CB, qc, &state);
+ qc->flags |= QUIC_FL_CONN_HALF_OPEN_CNT_DECREMENTED;
+ HA_ATOMIC_DEC(&qc->prx_counters->half_open_conn);
+ }
+ /* I/O callback switch */
+ qc->wait_event.tasklet->process = quic_conn_app_io_cb;
+ if (qc_is_listener(ctx->qc)) {
+ qc->state = QUIC_HS_ST_CONFIRMED;
+ /* The connection is ready to be accepted. */
+ quic_accept_push_qc(qc);
+ }
+ else {
+ qc->state = QUIC_HS_ST_COMPLETE;
+ }
+
+ /* Prepare the next key update */
+ if (!quic_tls_key_update(qc)) {
+ TRACE_ERROR("quic_tls_key_update() failed", QUIC_EV_CONN_IO_CB, qc);
+ goto leave;
+ }
+ } else {
+ ssl_err = SSL_process_quic_post_handshake(ctx->ssl);
+ if (ssl_err != 1) {
+ ssl_err = SSL_get_error(ctx->ssl, ssl_err);
+ if (ssl_err == SSL_ERROR_WANT_READ || ssl_err == SSL_ERROR_WANT_WRITE) {
+ TRACE_PROTO("SSL post handshake in progress",
+ QUIC_EV_CONN_IO_CB, qc, &state, &ssl_err);
+ goto out;
+ }
+
+ TRACE_ERROR("SSL post handshake error",
+ QUIC_EV_CONN_IO_CB, qc, &state, &ssl_err);
+ goto leave;
+ }
+
+ TRACE_STATE("SSL post handshake succeeded", QUIC_EV_CONN_IO_CB, qc, &state);
+ }
+
+ out:
+ ret = 1;
+ leave:
+ /* The CRYPTO data are consumed even in case of an error to release
+ * the memory asap.
+ */
+ if (!ncb_is_null(ncbuf)) {
+#ifdef DEBUG_STRICT
+ ncb_ret = ncb_advance(ncbuf, len);
+ /* ncb_advance() must always succeed. This is guaranteed as
+ * this is only done inside a data block. If false, this will
+ * lead to handshake failure with quic_enc_level offset shifted
+ * from buffer data.
+ */
+ BUG_ON(ncb_ret != NCB_RET_OK);
+#else
+ ncb_advance(ncbuf, len);
+#endif
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_SSLDATA, qc);
+ return ret;
+}
+
+/* Parse a STREAM frame <strm_frm>
+ *
+ * Return 1 on success. On error, 0 is returned. In this case, the packet
+ * containing the frame must not be acknowledged.
+ */
+static inline int qc_handle_strm_frm(struct quic_rx_packet *pkt,
+ struct quic_stream *strm_frm,
+ struct quic_conn *qc)
+{
+ int ret;
+
+ /* RFC9000 13.1. Packet Processing
+ *
+ * A packet MUST NOT be acknowledged until packet protection has been
+ * successfully removed and all frames contained in the packet have
+ * been processed. For STREAM frames, this means the data has been
+ * enqueued in preparation to be received by the application protocol,
+ * but it does not require that data be delivered and consumed.
+ */
+ TRACE_ENTER(QUIC_EV_CONN_PRSFRM, qc);
+
+ ret = qcc_recv(qc->qcc, strm_frm->id, strm_frm->len,
+ strm_frm->offset.key, strm_frm->fin,
+ (char *)strm_frm->data);
+
+ /* frame rejected - packet must not be acknowledeged */
+ TRACE_LEAVE(QUIC_EV_CONN_PRSFRM, qc);
+ return !ret;
+}
+
+/* Duplicate all frames from <pkt_frm_list> list into <out_frm_list> list
+ * for <qc> QUIC connection.
+ * This is a best effort function which never fails even if no memory could be
+ * allocated to duplicate these frames.
+ */
+static void qc_dup_pkt_frms(struct quic_conn *qc,
+ struct list *pkt_frm_list, struct list *out_frm_list)
+{
+ struct quic_frame *frm, *frmbak;
+ struct list tmp = LIST_HEAD_INIT(tmp);
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ list_for_each_entry_safe(frm, frmbak, pkt_frm_list, list) {
+ struct quic_frame *dup_frm, *origin;
+
+ if (frm->flags & QUIC_FL_TX_FRAME_ACKED) {
+ TRACE_DEVEL("already acknowledged frame", QUIC_EV_CONN_PRSAFRM, qc, frm);
+ continue;
+ }
+
+ switch (frm->type) {
+ case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
+ {
+ struct quic_stream *strm_frm = &frm->stream;
+ struct eb64_node *node = NULL;
+ struct qc_stream_desc *stream_desc;
+
+ node = eb64_lookup(&qc->streams_by_id, strm_frm->id);
+ if (!node) {
+ TRACE_DEVEL("ignored frame for a released stream", QUIC_EV_CONN_PRSAFRM, qc, frm);
+ continue;
+ }
+
+ stream_desc = eb64_entry(node, struct qc_stream_desc, by_id);
+ /* Do not resend this frame if in the "already acked range" */
+ if (strm_frm->offset.key + strm_frm->len <= stream_desc->ack_offset) {
+ TRACE_DEVEL("ignored frame in already acked range",
+ QUIC_EV_CONN_PRSAFRM, qc, frm);
+ continue;
+ }
+ else if (strm_frm->offset.key < stream_desc->ack_offset) {
+ uint64_t diff = stream_desc->ack_offset - strm_frm->offset.key;
+
+ qc_stream_frm_mv_fwd(frm, diff);
+ TRACE_DEVEL("updated partially acked frame",
+ QUIC_EV_CONN_PRSAFRM, qc, frm);
+ }
+
+ strm_frm->dup = 1;
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ dup_frm = pool_alloc(pool_head_quic_frame);
+ if (!dup_frm) {
+ TRACE_ERROR("could not duplicate frame", QUIC_EV_CONN_PRSAFRM, qc, frm);
+ break;
+ }
+
+ /* If <frm> is already a copy of another frame, we must take
+ * its original frame as source for the copy.
+ */
+ origin = frm->origin ? frm->origin : frm;
+ TRACE_DEVEL("built probing frame", QUIC_EV_CONN_PRSAFRM, qc, origin);
+ if (origin->pkt)
+ TRACE_DEVEL("duplicated from packet", QUIC_EV_CONN_PRSAFRM,
+ qc, NULL, &origin->pkt->pn_node.key);
+ else {
+ /* <origin> is a frame which was sent from a packet detected as lost. */
+ TRACE_DEVEL("duplicated from lost packet", QUIC_EV_CONN_PRSAFRM, qc);
+ }
+ *dup_frm = *origin;
+ dup_frm->pkt = NULL;
+ dup_frm->origin = origin;
+ dup_frm->flags = 0;
+ LIST_INIT(&dup_frm->reflist);
+ LIST_APPEND(&origin->reflist, &dup_frm->ref);
+ LIST_APPEND(&tmp, &dup_frm->list);
+ }
+
+ LIST_SPLICE(out_frm_list, &tmp);
+
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+}
+
+/* Boolean function which return 1 if <pkt> TX packet is only made of
+ * already acknowledged frame.
+ */
+static inline int qc_pkt_with_only_acked_frms(struct quic_tx_packet *pkt)
+{
+ struct quic_frame *frm;
+
+ list_for_each_entry(frm, &pkt->frms, list)
+ if (!(frm->flags & QUIC_FL_TX_FRAME_ACKED))
+ return 0;
+
+ return 1;
+}
+
+/* Prepare a fast retransmission from <qel> encryption level */
+static void qc_prep_fast_retrans(struct quic_conn *qc,
+ struct quic_enc_level *qel,
+ struct list *frms1, struct list *frms2)
+{
+ struct eb_root *pkts = &qel->pktns->tx.pkts;
+ struct list *frms = frms1;
+ struct eb64_node *node;
+ struct quic_tx_packet *pkt;
+
+ TRACE_ENTER(QUIC_EV_CONN_SPPKTS, qc);
+
+ BUG_ON(frms1 == frms2);
+
+ pkt = NULL;
+ node = eb64_first(pkts);
+ start:
+ while (node) {
+ struct quic_tx_packet *p;
+
+ p = eb64_entry(node, struct quic_tx_packet, pn_node);
+ node = eb64_next(node);
+ /* Skip the empty and coalesced packets */
+ if (!LIST_ISEMPTY(&p->frms) && !qc_pkt_with_only_acked_frms(p)) {
+ pkt = p;
+ break;
+ }
+ }
+
+ if (!pkt)
+ goto leave;
+
+ /* When building a packet from another one, the field which may increase the
+ * packet size is the packet number. And the maximum increase is 4 bytes.
+ */
+ if (!quic_peer_validated_addr(qc) && qc_is_listener(qc) &&
+ pkt->len + 4 > 3 * qc->rx.bytes - qc->tx.prep_bytes) {
+ qc->flags |= QUIC_FL_CONN_ANTI_AMPLIFICATION_REACHED;
+ TRACE_PROTO("anti-amplification limit would be reached", QUIC_EV_CONN_SPPKTS, qc, pkt);
+ goto leave;
+ }
+
+ TRACE_DEVEL("duplicating packet", QUIC_EV_CONN_SPPKTS, qc, pkt);
+ qc_dup_pkt_frms(qc, &pkt->frms, frms);
+ if (frms == frms1 && frms2) {
+ frms = frms2;
+ goto start;
+ }
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_SPPKTS, qc);
+}
+
+/* Prepare a fast retransmission during a handshake after a client
+ * has resent Initial packets. According to the RFC a server may retransmit
+ * Initial packets send them coalescing with others (Handshake here).
+ * (Listener only function).
+ */
+static void qc_prep_hdshk_fast_retrans(struct quic_conn *qc,
+ struct list *ifrms, struct list *hfrms)
+{
+ struct list itmp = LIST_HEAD_INIT(itmp);
+ struct list htmp = LIST_HEAD_INIT(htmp);
+
+ struct quic_enc_level *iqel = &qc->els[QUIC_TLS_ENC_LEVEL_INITIAL];
+ struct quic_enc_level *hqel = &qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE];
+ struct quic_enc_level *qel = iqel;
+ struct eb_root *pkts;
+ struct eb64_node *node;
+ struct quic_tx_packet *pkt;
+ struct list *tmp = &itmp;
+
+ TRACE_ENTER(QUIC_EV_CONN_SPPKTS, qc);
+ start:
+ pkt = NULL;
+ pkts = &qel->pktns->tx.pkts;
+ node = eb64_first(pkts);
+ /* Skip the empty packet (they have already been retransmitted) */
+ while (node) {
+ struct quic_tx_packet *p;
+
+ p = eb64_entry(node, struct quic_tx_packet, pn_node);
+ if (!LIST_ISEMPTY(&p->frms) && !(p->flags & QUIC_FL_TX_PACKET_COALESCED) &&
+ !qc_pkt_with_only_acked_frms(p)) {
+ pkt = p;
+ break;
+ }
+
+ node = eb64_next(node);
+ }
+
+ if (!pkt)
+ goto end;
+
+ /* When building a packet from another one, the field which may increase the
+ * packet size is the packet number. And the maximum increase is 4 bytes.
+ */
+ if (!quic_peer_validated_addr(qc) && qc_is_listener(qc)) {
+ size_t dglen = pkt->len + 4;
+
+ dglen += pkt->next ? pkt->next->len + 4 : 0;
+ if (dglen > 3 * qc->rx.bytes - qc->tx.prep_bytes) {
+ qc->flags |= QUIC_FL_CONN_ANTI_AMPLIFICATION_REACHED;
+ TRACE_PROTO("anti-amplification limit would be reached", QUIC_EV_CONN_SPPKTS, qc, pkt);
+ if (pkt->next)
+ TRACE_PROTO("anti-amplification limit would be reached", QUIC_EV_CONN_SPPKTS, qc, pkt->next);
+ goto end;
+ }
+ }
+
+ qel->pktns->tx.pto_probe += 1;
+
+ /* No risk to loop here, #packet per datagram is bounded */
+ requeue:
+ TRACE_DEVEL("duplicating packet", QUIC_EV_CONN_PRSAFRM, qc, NULL, &pkt->pn_node.key);
+ qc_dup_pkt_frms(qc, &pkt->frms, tmp);
+ if (qel == iqel) {
+ if (pkt->next && pkt->next->type == QUIC_PACKET_TYPE_HANDSHAKE) {
+ pkt = pkt->next;
+ tmp = &htmp;
+ hqel->pktns->tx.pto_probe += 1;
+ TRACE_DEVEL("looping for next packet", QUIC_EV_CONN_SPPKTS, qc);
+ goto requeue;
+ }
+ }
+
+ end:
+ LIST_SPLICE(ifrms, &itmp);
+ LIST_SPLICE(hfrms, &htmp);
+
+ TRACE_LEAVE(QUIC_EV_CONN_SPPKTS, qc);
+}
+
+static void qc_cc_err_count_inc(struct quic_conn *qc, struct quic_frame *frm)
+{
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ if (frm->type == QUIC_FT_CONNECTION_CLOSE)
+ quic_stats_transp_err_count_inc(qc->prx_counters, frm->connection_close.error_code);
+ else if (frm->type == QUIC_FT_CONNECTION_CLOSE_APP) {
+ if (qc->mux_state != QC_MUX_READY || !qc->qcc->app_ops->inc_err_cnt)
+ goto out;
+
+ qc->qcc->app_ops->inc_err_cnt(qc->qcc->ctx, frm->connection_close_app.error_code);
+ }
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/* Cancel a request on connection <qc> for stream id <id>. This is useful when
+ * the client opens a new stream but the MUX has already been released. A
+ * STOP_SENDING + RESET_STREAM frames are prepared for emission.
+ *
+ * TODO this function is closely related to H3. Its place should be in H3 layer
+ * instead of quic-conn but this requires an architecture adjustment.
+ *
+ * Returns 1 on sucess else 0.
+ */
+static int qc_h3_request_reject(struct quic_conn *qc, uint64_t id)
+{
+ int ret = 0;
+ struct quic_frame *ss, *rs;
+ struct quic_enc_level *qel = &qc->els[QUIC_TLS_ENC_LEVEL_APP];
+ const uint64_t app_error_code = H3_REQUEST_REJECTED;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSHPKT, qc);
+
+ /* Do not emit rejection for unknown unidirectional stream as it is
+ * forbidden to close some of them (H3 control stream and QPACK
+ * encoder/decoder streams).
+ */
+ if (quic_stream_is_uni(id)) {
+ ret = 1;
+ goto out;
+ }
+
+ // fixme: zalloc
+ ss = pool_zalloc(pool_head_quic_frame);
+ if (!ss) {
+ TRACE_ERROR("failed to allocate quic_frame", QUIC_EV_CONN_PRSHPKT, qc);
+ goto out;
+ }
+
+ ss->type = QUIC_FT_STOP_SENDING;
+ ss->stop_sending.id = id;
+ ss->stop_sending.app_error_code = app_error_code;
+ LIST_INIT(&ss->reflist);
+
+ rs = pool_zalloc(pool_head_quic_frame);
+ if (!rs) {
+ TRACE_ERROR("failed to allocate quic_frame", QUIC_EV_CONN_PRSHPKT, qc);
+ pool_free(pool_head_quic_frame, &ss);
+ goto out;
+ }
+
+ rs->type = QUIC_FT_RESET_STREAM;
+ rs->reset_stream.id = id;
+ rs->reset_stream.app_error_code = app_error_code;
+ rs->reset_stream.final_size = 0;
+ LIST_INIT(&rs->reflist);
+
+ LIST_APPEND(&qel->pktns->tx.frms, &ss->list);
+ LIST_APPEND(&qel->pktns->tx.frms, &rs->list);
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSHPKT, qc);
+ return ret;
+}
+
+/* Release the underlying memory use by <ncbuf> non-contiguous buffer */
+static void quic_free_ncbuf(struct ncbuf *ncbuf)
+{
+ struct buffer buf;
+
+ if (ncb_is_null(ncbuf))
+ return;
+
+ buf = b_make(ncbuf->area, ncbuf->size, 0, 0);
+ b_free(&buf);
+ offer_buffers(NULL, 1);
+
+ *ncbuf = NCBUF_NULL;
+}
+
+/* Allocate the underlying required memory for <ncbuf> non-contiguous buffer */
+static struct ncbuf *quic_get_ncbuf(struct ncbuf *ncbuf)
+{
+ struct buffer buf = BUF_NULL;
+
+ if (!ncb_is_null(ncbuf))
+ return ncbuf;
+
+ b_alloc(&buf);
+ BUG_ON(b_is_null(&buf));
+
+ *ncbuf = ncb_make(buf.area, buf.size, 0);
+ ncb_init(ncbuf, 0);
+
+ return ncbuf;
+}
+
+/* Parse <frm> CRYPTO frame coming with <pkt> packet at <qel> <qc> connectionn.
+ * Returns 1 if succeeded, 0 if not. Also set <*fast_retrans> to 1 if the
+ * speed up handshake completion may be run after having received duplicated
+ * CRYPTO data.
+ */
+static int qc_handle_crypto_frm(struct quic_conn *qc,
+ struct quic_crypto *frm, struct quic_rx_packet *pkt,
+ struct quic_enc_level *qel, int *fast_retrans)
+{
+ int ret = 0;
+ enum ncb_ret ncb_ret;
+ /* XXX TO DO: <cfdebug> is used only for the traces. */
+ struct quic_rx_crypto_frm cfdebug = {
+ .offset_node.key = frm->offset,
+ .len = frm->len,
+ };
+ struct quic_cstream *cstream = qel->cstream;
+ struct ncbuf *ncbuf = &qel->cstream->rx.ncbuf;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSHPKT, qc);
+ if (unlikely(qel->tls_ctx.flags & QUIC_FL_TLS_SECRETS_DCD)) {
+ TRACE_PROTO("CRYPTO data discarded",
+ QUIC_EV_CONN_RXPKT, qc, pkt, &cfdebug);
+ goto done;
+ }
+
+ if (unlikely(frm->offset < cstream->rx.offset)) {
+ size_t diff;
+
+ if (frm->offset + frm->len <= cstream->rx.offset) {
+ /* Nothing to do */
+ TRACE_PROTO("Already received CRYPTO data",
+ QUIC_EV_CONN_RXPKT, qc, pkt, &cfdebug);
+ if (qc_is_listener(qc) && qel == &qc->els[QUIC_TLS_ENC_LEVEL_INITIAL] &&
+ !(qc->flags & QUIC_FL_CONN_HANDSHAKE_SPEED_UP))
+ *fast_retrans = 1;
+ goto done;
+ }
+
+ TRACE_PROTO("Partially already received CRYPTO data",
+ QUIC_EV_CONN_RXPKT, qc, pkt, &cfdebug);
+
+ diff = cstream->rx.offset - frm->offset;
+ frm->len -= diff;
+ frm->data += diff;
+ frm->offset = cstream->rx.offset;
+ }
+
+ if (frm->offset == cstream->rx.offset && ncb_is_empty(ncbuf)) {
+ if (!qc_provide_cdata(qel, qc->xprt_ctx, frm->data, frm->len,
+ pkt, &cfdebug)) {
+ // trace already emitted by function above
+ goto leave;
+ }
+
+ cstream->rx.offset += frm->len;
+ TRACE_DEVEL("increment crypto level offset", QUIC_EV_CONN_PHPKTS, qc, qel);
+ goto done;
+ }
+
+ if (!quic_get_ncbuf(ncbuf) ||
+ ncb_is_null(ncbuf)) {
+ TRACE_ERROR("CRYPTO ncbuf allocation failed", QUIC_EV_CONN_PRSHPKT, qc);
+ goto leave;
+ }
+
+ /* frm->offset > cstream-trx.offset */
+ ncb_ret = ncb_add(ncbuf, frm->offset - cstream->rx.offset,
+ (const char *)frm->data, frm->len, NCB_ADD_COMPARE);
+ if (ncb_ret != NCB_RET_OK) {
+ if (ncb_ret == NCB_RET_DATA_REJ) {
+ TRACE_ERROR("overlapping data rejected", QUIC_EV_CONN_PRSHPKT, qc);
+ quic_set_connection_close(qc, quic_err_transport(QC_ERR_PROTOCOL_VIOLATION));
+ }
+ else if (ncb_ret == NCB_RET_GAP_SIZE) {
+ TRACE_ERROR("cannot bufferize frame due to gap size limit",
+ QUIC_EV_CONN_PRSHPKT, qc);
+ }
+ goto leave;
+ }
+
+ done:
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSHPKT, qc);
+ return ret;
+}
+
+/* Parse all the frames of <pkt> QUIC packet for QUIC connection <qc> and <qel>
+ * as encryption level.
+ * Returns 1 if succeeded, 0 if failed.
+ */
+static int qc_parse_pkt_frms(struct quic_conn *qc, struct quic_rx_packet *pkt,
+ struct quic_enc_level *qel)
+{
+ struct quic_frame frm;
+ const unsigned char *pos, *end;
+ int fast_retrans = 0, ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSHPKT, qc);
+ /* Skip the AAD */
+ pos = pkt->data + pkt->aad_len;
+ end = pkt->data + pkt->len;
+
+ while (pos < end) {
+ if (!qc_parse_frm(&frm, pkt, &pos, end, qc)) {
+ // trace already emitted by function above
+ goto leave;
+ }
+
+ TRACE_PROTO("RX frame", QUIC_EV_CONN_PSTRM, qc, &frm);
+ switch (frm.type) {
+ case QUIC_FT_PADDING:
+ break;
+ case QUIC_FT_PING:
+ break;
+ case QUIC_FT_ACK:
+ {
+ unsigned int rtt_sample;
+
+ rtt_sample = 0;
+ if (!qc_parse_ack_frm(qc, &frm, qel, &rtt_sample, &pos, end)) {
+ // trace already emitted by function above
+ goto leave;
+ }
+
+ if (rtt_sample) {
+ unsigned int ack_delay;
+
+ ack_delay = !quic_application_pktns(qel->pktns, qc) ? 0 :
+ qc->state >= QUIC_HS_ST_CONFIRMED ?
+ MS_TO_TICKS(QUIC_MIN(quic_ack_delay_ms(&frm.ack, qc), qc->max_ack_delay)) :
+ MS_TO_TICKS(quic_ack_delay_ms(&frm.ack, qc));
+ quic_loss_srtt_update(&qc->path->loss, rtt_sample, ack_delay, qc);
+ }
+ break;
+ }
+ case QUIC_FT_RESET_STREAM:
+ /* TODO: handle this frame at STREAM level */
+ break;
+ case QUIC_FT_STOP_SENDING:
+ {
+ struct quic_stop_sending *stop_sending = &frm.stop_sending;
+ if (qc->mux_state == QC_MUX_READY) {
+ if (qcc_recv_stop_sending(qc->qcc, stop_sending->id,
+ stop_sending->app_error_code)) {
+ TRACE_ERROR("qcc_recv_stop_sending() failed", QUIC_EV_CONN_PRSHPKT, qc);
+ goto leave;
+ }
+ }
+ break;
+ }
+ case QUIC_FT_CRYPTO:
+ if (!qc_handle_crypto_frm(qc, &frm.crypto, pkt, qel, &fast_retrans))
+ goto leave;
+ break;
+ case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
+ {
+ struct quic_stream *stream = &frm.stream;
+ unsigned nb_streams = qc->rx.strms[qcs_id_type(stream->id)].nb_streams;
+
+ /* The upper layer may not be allocated. */
+ if (qc->mux_state != QC_MUX_READY) {
+ if ((stream->id >> QCS_ID_TYPE_SHIFT) < nb_streams) {
+ TRACE_DATA("Already closed stream", QUIC_EV_CONN_PRSHPKT, qc);
+ }
+ else {
+ TRACE_DEVEL("No mux for new stream", QUIC_EV_CONN_PRSHPKT, qc);
+ if (qc->app_ops == &h3_ops) {
+ if (!qc_h3_request_reject(qc, stream->id)) {
+ TRACE_ERROR("error on request rejection", QUIC_EV_CONN_PRSHPKT, qc);
+ /* This packet will not be acknowledged */
+ goto leave;
+ }
+ }
+ else {
+ /* This packet will not be acknowledged */
+ goto leave;
+ }
+ }
+
+ break;
+ }
+
+ if (!qc_handle_strm_frm(pkt, stream, qc)) {
+ TRACE_ERROR("qc_handle_strm_frm() failed", QUIC_EV_CONN_PRSHPKT, qc);
+ goto leave;
+ }
+
+ break;
+ }
+ case QUIC_FT_MAX_DATA:
+ if (qc->mux_state == QC_MUX_READY) {
+ struct quic_max_data *data = &frm.max_data;
+ qcc_recv_max_data(qc->qcc, data->max_data);
+ }
+ break;
+ case QUIC_FT_MAX_STREAM_DATA:
+ if (qc->mux_state == QC_MUX_READY) {
+ struct quic_max_stream_data *data = &frm.max_stream_data;
+ if (qcc_recv_max_stream_data(qc->qcc, data->id,
+ data->max_stream_data)) {
+ TRACE_ERROR("qcc_recv_max_stream_data() failed", QUIC_EV_CONN_PRSHPKT, qc);
+ goto leave;
+ }
+ }
+ break;
+ case QUIC_FT_MAX_STREAMS_BIDI:
+ case QUIC_FT_MAX_STREAMS_UNI:
+ break;
+ case QUIC_FT_DATA_BLOCKED:
+ HA_ATOMIC_INC(&qc->prx_counters->data_blocked);
+ break;
+ case QUIC_FT_STREAM_DATA_BLOCKED:
+ HA_ATOMIC_INC(&qc->prx_counters->stream_data_blocked);
+ break;
+ case QUIC_FT_STREAMS_BLOCKED_BIDI:
+ HA_ATOMIC_INC(&qc->prx_counters->streams_data_blocked_bidi);
+ break;
+ case QUIC_FT_STREAMS_BLOCKED_UNI:
+ HA_ATOMIC_INC(&qc->prx_counters->streams_data_blocked_uni);
+ break;
+ case QUIC_FT_NEW_CONNECTION_ID:
+ case QUIC_FT_RETIRE_CONNECTION_ID:
+ /* XXX TO DO XXX */
+ break;
+ case QUIC_FT_CONNECTION_CLOSE:
+ case QUIC_FT_CONNECTION_CLOSE_APP:
+ /* Increment the error counters */
+ qc_cc_err_count_inc(qc, &frm);
+ if (!(qc->flags & QUIC_FL_CONN_DRAINING)) {
+ if (!(qc->flags & QUIC_FL_CONN_HALF_OPEN_CNT_DECREMENTED)) {
+ qc->flags |= QUIC_FL_CONN_HALF_OPEN_CNT_DECREMENTED;
+ HA_ATOMIC_DEC(&qc->prx_counters->half_open_conn);
+ }
+ TRACE_STATE("Entering draining state", QUIC_EV_CONN_PRSHPKT, qc);
+ /* RFC 9000 10.2. Immediate Close:
+ * The closing and draining connection states exist to ensure
+ * that connections close cleanly and that delayed or reordered
+ * packets are properly discarded. These states SHOULD persist
+ * for at least three times the current PTO interval...
+ *
+ * Rearm the idle timeout only one time when entering draining
+ * state.
+ */
+ qc_idle_timer_do_rearm(qc);
+ qc->flags |= QUIC_FL_CONN_DRAINING|QUIC_FL_CONN_IMMEDIATE_CLOSE;
+ qc_notify_close(qc);
+ }
+ break;
+ case QUIC_FT_HANDSHAKE_DONE:
+ if (qc_is_listener(qc)) {
+ TRACE_ERROR("non accepted QUIC_FT_HANDSHAKE_DONE frame",
+ QUIC_EV_CONN_PRSHPKT, qc);
+ goto leave;
+ }
+
+ qc->state = QUIC_HS_ST_CONFIRMED;
+ break;
+ default:
+ TRACE_ERROR("unknosw frame type", QUIC_EV_CONN_PRSHPKT, qc);
+ goto leave;
+ }
+ }
+
+ /* Flag this packet number space as having received a packet. */
+ qel->pktns->flags |= QUIC_FL_PKTNS_PKT_RECEIVED;
+
+ if (fast_retrans) {
+ struct quic_enc_level *iqel = &qc->els[QUIC_TLS_ENC_LEVEL_INITIAL];
+ struct quic_enc_level *hqel = &qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE];
+
+ TRACE_PROTO("speeding up handshake completion", QUIC_EV_CONN_PRSHPKT, qc);
+ qc_prep_hdshk_fast_retrans(qc, &iqel->pktns->tx.frms, &hqel->pktns->tx.frms);
+ qc->flags |= QUIC_FL_CONN_HANDSHAKE_SPEED_UP;
+ }
+
+ /* The server must switch from INITIAL to HANDSHAKE handshake state when it
+ * has successfully parse a Handshake packet. The Initial encryption must also
+ * be discarded.
+ */
+ if (pkt->type == QUIC_PACKET_TYPE_HANDSHAKE && qc_is_listener(qc)) {
+ if (qc->state >= QUIC_HS_ST_SERVER_INITIAL) {
+ if (!(qc->els[QUIC_TLS_ENC_LEVEL_INITIAL].tls_ctx.flags &
+ QUIC_FL_TLS_SECRETS_DCD)) {
+ quic_tls_discard_keys(&qc->els[QUIC_TLS_ENC_LEVEL_INITIAL]);
+ TRACE_PROTO("discarding Initial pktns", QUIC_EV_CONN_PRSHPKT, qc);
+ quic_pktns_discard(qc->els[QUIC_TLS_ENC_LEVEL_INITIAL].pktns, qc);
+ qc_set_timer(qc);
+ qc_el_rx_pkts_del(&qc->els[QUIC_TLS_ENC_LEVEL_INITIAL]);
+ qc_release_pktns_frms(qc, qc->els[QUIC_TLS_ENC_LEVEL_INITIAL].pktns);
+ }
+ if (qc->state < QUIC_HS_ST_SERVER_HANDSHAKE)
+ qc->state = QUIC_HS_ST_SERVER_HANDSHAKE;
+ }
+ }
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSHPKT, qc);
+ return ret;
+}
+
+
+/* Allocate Tx buffer from <qc> quic-conn if needed.
+ *
+ * Returns allocated buffer or NULL on error.
+ */
+static struct buffer *qc_txb_alloc(struct quic_conn *qc)
+{
+ struct buffer *buf = &qc->tx.buf;
+ if (!b_alloc(buf))
+ return NULL;
+
+ return buf;
+}
+
+/* Free Tx buffer from <qc> if it is empty. */
+static void qc_txb_release(struct quic_conn *qc)
+{
+ struct buffer *buf = &qc->tx.buf;
+
+ /* For the moment sending function is responsible to purge the buffer
+ * entirely. It may change in the future but this requires to be able
+ * to reuse old data.
+ */
+ BUG_ON_HOT(buf && b_data(buf));
+
+ if (!b_data(buf)) {
+ b_free(buf);
+ offer_buffers(NULL, 1);
+ }
+}
+
+/* Commit a datagram payload written into <buf> of length <length>. <first_pkt>
+ * must contains the address of the first packet stored in the payload.
+ *
+ * Caller is responsible that there is enough space in the buffer.
+ */
+static void qc_txb_store(struct buffer *buf, uint16_t length,
+ struct quic_tx_packet *first_pkt)
+{
+ const size_t hdlen = sizeof(uint16_t) + sizeof(void *);
+ BUG_ON_HOT(b_contig_space(buf) < hdlen); /* this must not happen */
+
+ write_u16(b_tail(buf), length);
+ write_ptr(b_tail(buf) + sizeof(length), first_pkt);
+ b_add(buf, hdlen + length);
+}
+
+/* Returns 1 if a packet may be built for <qc> from <qel> encryption level
+ * with <frms> as ack-eliciting frame list to send, 0 if not.
+ * <cc> must equal to 1 if an immediate close was asked, 0 if not.
+ * <probe> must equalt to 1 if a probing packet is required, 0 if not.
+ * <force_ack> may be set to 1 if you want to force an ack.
+ */
+static int qc_may_build_pkt(struct quic_conn *qc, struct list *frms,
+ struct quic_enc_level *qel, int cc, int probe, int force_ack)
+{
+ unsigned int must_ack = force_ack ||
+ (LIST_ISEMPTY(frms) && (qel->pktns->flags & QUIC_FL_PKTNS_ACK_REQUIRED));
+
+ /* Do not build any more packet if the TX secrets are not available or
+ * if there is nothing to send, i.e. if no CONNECTION_CLOSE or ACK are required
+ * and if there is no more packets to send upon PTO expiration
+ * and if there is no more ack-eliciting frames to send or in flight
+ * congestion control limit is reached for prepared data
+ */
+ if (!quic_tls_has_tx_sec(qel) ||
+ (!cc && !probe && !must_ack &&
+ (LIST_ISEMPTY(frms) || qc->path->prep_in_flight >= qc->path->cwnd))) {
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Prepare as much as possible QUIC packets for sending from prebuilt frames
+ * <frms>. Each packet is stored in a distinct datagram written to <buf>.
+ *
+ * Each datagram is prepended by a two fields header : the datagram length and
+ * the address of the packet contained in the datagram.
+ *
+ * Returns the number of bytes prepared in packets if succeeded (may be 0), or
+ * -1 if something wrong happened.
+ */
+static int qc_prep_app_pkts(struct quic_conn *qc, struct buffer *buf,
+ struct list *frms)
+{
+ int ret = -1;
+ struct quic_enc_level *qel;
+ unsigned char *end, *pos;
+ struct quic_tx_packet *pkt;
+ size_t total;
+ /* Each datagram is prepended with its length followed by the address
+ * of the first packet in the datagram.
+ */
+ const size_t dg_headlen = sizeof(uint16_t) + sizeof(pkt);
+
+ TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc);
+
+ qel = &qc->els[QUIC_TLS_ENC_LEVEL_APP];
+ total = 0;
+ pos = (unsigned char *)b_tail(buf);
+ while (b_contig_space(buf) >= (int)qc->path->mtu + dg_headlen) {
+ int err, probe, cc;
+
+ TRACE_POINT(QUIC_EV_CONN_PHPKTS, qc, qel);
+ probe = 0;
+ cc = qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE;
+ /* We do not probe if an immediate close was asked */
+ if (!cc)
+ probe = qel->pktns->tx.pto_probe;
+
+ if (!qc_may_build_pkt(qc, frms, qel, cc, probe, 0))
+ break;
+
+ /* Leave room for the datagram header */
+ pos += dg_headlen;
+ if (!quic_peer_validated_addr(qc) && qc_is_listener(qc)) {
+ end = pos + QUIC_MIN((uint64_t)qc->path->mtu, 3 * qc->rx.bytes - qc->tx.prep_bytes);
+ }
+ else {
+ end = pos + qc->path->mtu;
+ }
+
+ pkt = qc_build_pkt(&pos, end, qel, &qel->tls_ctx, frms, qc, NULL, 0,
+ QUIC_PACKET_TYPE_SHORT, 0, 0, probe, cc, &err);
+ switch (err) {
+ case -2:
+ // trace already emitted by function above
+ goto leave;
+ case -1:
+ /* As we provide qc_build_pkt() with an enough big buffer to fulfill an
+ * MTU, we are here because of the congestion control window. There is
+ * no need to try to reuse this buffer.
+ */
+ TRACE_DEVEL("could not prepare anymore packet", QUIC_EV_CONN_PHPKTS, qc);
+ goto out;
+ default:
+ break;
+ }
+
+ /* This is to please to GCC. We cannot have (err >= 0 && !pkt) */
+ BUG_ON(!pkt);
+
+ if (qc->flags & QUIC_FL_CONN_RETRANS_OLD_DATA)
+ pkt->flags |= QUIC_FL_TX_PACKET_PROBE_WITH_OLD_DATA;
+
+ total += pkt->len;
+
+ /* Write datagram header. */
+ qc_txb_store(buf, pkt->len, pkt);
+ }
+
+ out:
+ ret = total;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, qc);
+ return ret;
+}
+
+/* Prepare as much as possible QUIC packets for sending from prebuilt frames
+ * <frms>. Several packets can be regrouped in a single datagram. The result is
+ * written into <buf>.
+ *
+ * Each datagram is prepended by a two fields header : the datagram length and
+ * the address of first packet in the datagram.
+ *
+ * Returns the number of bytes prepared in packets if succeeded (may be 0), or
+ * -1 if something wrong happened.
+ */
+static int qc_prep_pkts(struct quic_conn *qc, struct buffer *buf,
+ enum quic_tls_enc_level tel, struct list *tel_frms,
+ enum quic_tls_enc_level next_tel, struct list *next_tel_frms)
+{
+ struct quic_enc_level *qel;
+ unsigned char *end, *pos;
+ struct quic_tx_packet *first_pkt, *cur_pkt, *prv_pkt;
+ /* length of datagrams */
+ uint16_t dglen;
+ size_t total;
+ int ret = -1, padding;
+ /* Each datagram is prepended with its length followed by the address
+ * of the first packet in the datagram.
+ */
+ const size_t dg_headlen = sizeof(uint16_t) + sizeof(first_pkt);
+ struct list *frms;
+
+ TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc);
+
+ /* Currently qc_prep_pkts() does not handle buffer wrapping so the
+ * caller must ensure that buf is resetted.
+ */
+ BUG_ON_HOT(buf->head || buf->data);
+
+ total = 0;
+ qel = &qc->els[tel];
+ frms = tel_frms;
+ dglen = 0;
+ padding = 0;
+ pos = (unsigned char *)b_head(buf);
+ first_pkt = prv_pkt = NULL;
+ while (b_contig_space(buf) >= (int)qc->path->mtu + dg_headlen || prv_pkt) {
+ int err, probe, cc;
+ enum quic_pkt_type pkt_type;
+ struct quic_tls_ctx *tls_ctx;
+ const struct quic_version *ver;
+ int force_ack = (qel->pktns->flags & QUIC_FL_PKTNS_ACK_REQUIRED) &&
+ (qel == &qc->els[QUIC_TLS_ENC_LEVEL_INITIAL] ||
+ qel == &qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE]);
+
+ TRACE_POINT(QUIC_EV_CONN_PHPKTS, qc, qel);
+ probe = 0;
+ cc = qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE;
+ /* We do not probe if an immediate close was asked */
+ if (!cc)
+ probe = qel->pktns->tx.pto_probe;
+
+ if (!qc_may_build_pkt(qc, frms, qel, cc, probe, force_ack)) {
+ if (prv_pkt)
+ qc_txb_store(buf, dglen, first_pkt);
+ /* Let's select the next encryption level */
+ if (tel != next_tel && next_tel != QUIC_TLS_ENC_LEVEL_NONE) {
+ tel = next_tel;
+ frms = next_tel_frms;
+ qel = &qc->els[tel];
+ /* Build a new datagram */
+ prv_pkt = NULL;
+ TRACE_DEVEL("next encryption level selected", QUIC_EV_CONN_PHPKTS, qc);
+ continue;
+ }
+ break;
+ }
+
+ pkt_type = quic_tls_level_pkt_type(tel);
+ if (!prv_pkt) {
+ /* Leave room for the datagram header */
+ pos += dg_headlen;
+ if (!quic_peer_validated_addr(qc) && qc_is_listener(qc)) {
+ end = pos + QUIC_MIN((uint64_t)qc->path->mtu, 3 * qc->rx.bytes - qc->tx.prep_bytes);
+ }
+ else {
+ end = pos + qc->path->mtu;
+ }
+ }
+
+ /* RFC 9000 14.1 Initial datagram size
+ * a server MUST expand the payload of all UDP datagrams carrying ack-eliciting
+ * Initial packets to at least the smallest allowed maximum datagram size of
+ * 1200 bytes.
+ *
+ * Ensure that no ack-eliciting packets are sent into too small datagrams
+ */
+ if (pkt_type == QUIC_PACKET_TYPE_INITIAL && !LIST_ISEMPTY(tel_frms)) {
+ if (end - pos < QUIC_INITIAL_PACKET_MINLEN) {
+ TRACE_PROTO("No more enough room to build an Initial packet",
+ QUIC_EV_CONN_PHPKTS, qc);
+ goto out;
+ }
+
+ /* Pad this Initial packet if there is no ack-eliciting frames to send from
+ * the next packet number space.
+ */
+ if (LIST_ISEMPTY(next_tel_frms))
+ padding = 1;
+ }
+
+ if (qc->negotiated_version) {
+ ver = qc->negotiated_version;
+ if (qel == &qc->els[QUIC_TLS_ENC_LEVEL_INITIAL])
+ tls_ctx = &qc->negotiated_ictx;
+ else
+ tls_ctx = &qel->tls_ctx;
+ }
+ else {
+ ver = qc->original_version;
+ tls_ctx = &qel->tls_ctx;
+ }
+
+ cur_pkt = qc_build_pkt(&pos, end, qel, tls_ctx, frms,
+ qc, ver, dglen, pkt_type,
+ force_ack, padding, probe, cc, &err);
+ switch (err) {
+ case -2:
+ // trace already emitted by function above
+ goto leave;
+ case -1:
+ /* If there was already a correct packet present, set the
+ * current datagram as prepared into <cbuf>.
+ */
+ if (prv_pkt)
+ qc_txb_store(buf, dglen, first_pkt);
+ TRACE_DEVEL("could not prepare anymore packet", QUIC_EV_CONN_PHPKTS, qc);
+ goto out;
+ default:
+ break;
+ }
+
+ /* This is to please to GCC. We cannot have (err >= 0 && !cur_pkt) */
+ BUG_ON(!cur_pkt);
+
+ if (qc->flags & QUIC_FL_CONN_RETRANS_OLD_DATA)
+ cur_pkt->flags |= QUIC_FL_TX_PACKET_PROBE_WITH_OLD_DATA;
+
+ total += cur_pkt->len;
+ /* keep trace of the first packet in the datagram */
+ if (!first_pkt)
+ first_pkt = cur_pkt;
+ /* Attach the current one to the previous one and vice versa */
+ if (prv_pkt) {
+ prv_pkt->next = cur_pkt;
+ cur_pkt->prev = prv_pkt;
+ cur_pkt->flags |= QUIC_FL_TX_PACKET_COALESCED;
+ }
+ /* Let's say we have to build a new dgram */
+ prv_pkt = NULL;
+ dglen += cur_pkt->len;
+ /* Client: discard the Initial encryption keys as soon as
+ * a handshake packet could be built.
+ */
+ if (qc->state == QUIC_HS_ST_CLIENT_INITIAL &&
+ pkt_type == QUIC_PACKET_TYPE_HANDSHAKE) {
+ quic_tls_discard_keys(&qc->els[QUIC_TLS_ENC_LEVEL_INITIAL]);
+ TRACE_PROTO("discarding Initial pktns", QUIC_EV_CONN_PHPKTS, qc);
+ quic_pktns_discard(qc->els[QUIC_TLS_ENC_LEVEL_INITIAL].pktns, qc);
+ qc_set_timer(qc);
+ qc_el_rx_pkts_del(&qc->els[QUIC_TLS_ENC_LEVEL_INITIAL]);
+ qc_release_pktns_frms(qc, qc->els[QUIC_TLS_ENC_LEVEL_INITIAL].pktns);
+ qc->state = QUIC_HS_ST_CLIENT_HANDSHAKE;
+ }
+ /* If the data for the current encryption level have all been sent,
+ * select the next level.
+ */
+ if ((tel == QUIC_TLS_ENC_LEVEL_INITIAL || tel == QUIC_TLS_ENC_LEVEL_HANDSHAKE) &&
+ next_tel != QUIC_TLS_ENC_LEVEL_NONE && (LIST_ISEMPTY(frms) && !qel->pktns->tx.pto_probe)) {
+ /* If QUIC_TLS_ENC_LEVEL_HANDSHAKE was already reached let's try QUIC_TLS_ENC_LEVEL_APP */
+ if (tel == QUIC_TLS_ENC_LEVEL_HANDSHAKE && next_tel == tel)
+ next_tel = QUIC_TLS_ENC_LEVEL_APP;
+ tel = next_tel;
+ if (tel == QUIC_TLS_ENC_LEVEL_APP)
+ frms = &qc->els[tel].pktns->tx.frms;
+ else
+ frms = next_tel_frms;
+ qel = &qc->els[tel];
+ if (!LIST_ISEMPTY(frms)) {
+ /* If there is data for the next level, do not
+ * consume a datagram.
+ */
+ prv_pkt = cur_pkt;
+ }
+ }
+
+ /* If we have to build a new datagram, set the current datagram as
+ * prepared into <cbuf>.
+ */
+ if (!prv_pkt) {
+ qc_txb_store(buf, dglen, first_pkt);
+ first_pkt = NULL;
+ dglen = 0;
+ padding = 0;
+ }
+ else if (prv_pkt->type == QUIC_TLS_ENC_LEVEL_INITIAL &&
+ (!qc_is_listener(qc) ||
+ prv_pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING)) {
+ padding = 1;
+ }
+ }
+
+ out:
+ ret = total;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, qc);
+ return ret;
+}
+
+/* Send datagrams stored in <buf>.
+ *
+ * This function always returns 1 for success. Even if sendto() syscall failed,
+ * buffer is drained and packets are considered as emitted. QUIC loss detection
+ * mechanism is used as a back door way to retry sending.
+ */
+int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx)
+{
+ struct quic_conn *qc;
+ char skip_sendto = 0;
+
+ qc = ctx->qc;
+ TRACE_ENTER(QUIC_EV_CONN_SPPKTS, qc);
+ while (b_contig_data(buf, 0)) {
+ unsigned char *pos;
+ struct buffer tmpbuf = { };
+ struct quic_tx_packet *first_pkt, *pkt, *next_pkt;
+ uint16_t dglen;
+ size_t headlen = sizeof dglen + sizeof first_pkt;
+ unsigned int time_sent;
+
+ pos = (unsigned char *)b_head(buf);
+ dglen = read_u16(pos);
+ BUG_ON_HOT(!dglen); /* this should not happen */
+
+ pos += sizeof dglen;
+ first_pkt = read_ptr(pos);
+ pos += sizeof first_pkt;
+ tmpbuf.area = (char *)pos;
+ tmpbuf.size = tmpbuf.data = dglen;
+
+ TRACE_DATA("send dgram", QUIC_EV_CONN_SPPKTS, qc);
+ /* If sendto is on error just skip the call to it for the rest
+ * of the loop but continue to purge the buffer. Data will be
+ * transmitted when QUIC packets are detected as lost on our
+ * side.
+ *
+ * TODO use fd-monitoring to detect when send operation can be
+ * retry. This should improve the bandwidth without relying on
+ * retransmission timer. However, it requires a major rework on
+ * quic-conn fd management.
+ */
+ if (!skip_sendto) {
+ if (qc_snd_buf(qc, &tmpbuf, tmpbuf.data, 0)) {
+ skip_sendto = 1;
+ TRACE_ERROR("sendto error, simulate sending for the rest of data", QUIC_EV_CONN_SPPKTS, qc);
+ }
+ }
+
+ b_del(buf, dglen + headlen);
+ qc->tx.bytes += tmpbuf.data;
+ time_sent = now_ms;
+
+ for (pkt = first_pkt; pkt; pkt = next_pkt) {
+ pkt->time_sent = time_sent;
+ if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING) {
+ pkt->pktns->tx.time_of_last_eliciting = time_sent;
+ qc->path->ifae_pkts++;
+ if (qc->flags & QUIC_FL_CONN_IDLE_TIMER_RESTARTED_AFTER_READ)
+ qc_idle_timer_rearm(qc, 0);
+ }
+ if (!(qc->flags & QUIC_FL_CONN_CLOSING) &&
+ (pkt->flags & QUIC_FL_TX_PACKET_CC)) {
+ qc->flags |= QUIC_FL_CONN_CLOSING;
+ qc_notify_close(qc);
+
+ /* RFC 9000 10.2. Immediate Close:
+ * The closing and draining connection states exist to ensure
+ * that connections close cleanly and that delayed or reordered
+ * packets are properly discarded. These states SHOULD persist
+ * for at least three times the current PTO interval...
+ *
+ * Rearm the idle timeout only one time when entering closing
+ * state.
+ */
+ qc_idle_timer_do_rearm(qc);
+ if (qc->timer_task) {
+ task_destroy(qc->timer_task);
+ qc->timer_task = NULL;
+ }
+ }
+ qc->path->in_flight += pkt->in_flight_len;
+ pkt->pktns->tx.in_flight += pkt->in_flight_len;
+ if (pkt->in_flight_len)
+ qc_set_timer(qc);
+ TRACE_DATA("sent pkt", QUIC_EV_CONN_SPPKTS, qc, pkt);
+ next_pkt = pkt->next;
+ quic_tx_packet_refinc(pkt);
+ eb64_insert(&pkt->pktns->tx.pkts, &pkt->pn_node);
+ }
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_SPPKTS, qc);
+
+ return 1;
+}
+
+/* Copy into <buf> buffer a stateless reset token depending on the
+ * <salt> salt input. This is the cluster secret which will be derived
+ * as HKDF input secret to generate this token.
+ * Return 1 if succeeded, 0 if not.
+ */
+static int quic_stateless_reset_token_cpy(struct quic_conn *qc,
+ unsigned char *buf, size_t len,
+ const unsigned char *salt, size_t saltlen)
+{
+ /* Input secret */
+ const unsigned char *key = (const unsigned char *)global.cluster_secret;
+ size_t keylen = strlen(global.cluster_secret);
+ /* Info */
+ const unsigned char label[] = "stateless token";
+ size_t labellen = sizeof label - 1;
+ int ret;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ ret = quic_hkdf_extract_and_expand(EVP_sha256(), buf, len,
+ key, keylen, salt, saltlen, label, labellen);
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return ret;
+}
+
+/* Initialize the stateless reset token attached to <cid> connection ID.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_stateless_reset_token_init(struct quic_conn *qc,
+ struct quic_connection_id *quic_cid)
+{
+ int ret;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ if (global.cluster_secret) {
+ /* Output secret */
+ unsigned char *token = quic_cid->stateless_reset_token;
+ size_t tokenlen = sizeof quic_cid->stateless_reset_token;
+ /* Salt */
+ const unsigned char *cid = quic_cid->cid.data;
+ size_t cidlen = quic_cid->cid.len;
+
+ ret = quic_stateless_reset_token_cpy(qc, token, tokenlen, cid, cidlen);
+ }
+ else {
+ /* TODO: RAND_bytes() should be replaced */
+ ret = RAND_bytes(quic_cid->stateless_reset_token,
+ sizeof quic_cid->stateless_reset_token) == 1;
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return ret;
+}
+
+/* Allocate a new CID with <seq_num> as sequence number and attach it to <root>
+ * ebtree.
+ *
+ * The CID is randomly generated in part with the result altered to be
+ * associated with the current thread ID. This means this function must only
+ * be called by the quic_conn thread.
+ *
+ * Returns the new CID if succeeded, NULL if not.
+ */
+static struct quic_connection_id *new_quic_cid(struct eb_root *root,
+ struct quic_conn *qc,
+ int seq_num)
+{
+ struct quic_connection_id *cid;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ cid = pool_alloc(pool_head_quic_connection_id);
+ if (!cid) {
+ TRACE_ERROR("cid allocation failed", QUIC_EV_CONN_TXPKT, qc);
+ goto err;
+ }
+
+ cid->cid.len = QUIC_HAP_CID_LEN;
+ /* TODO: RAND_bytes() should be replaced */
+ if (RAND_bytes(cid->cid.data, cid->cid.len) != 1) {
+ TRACE_ERROR("RAND_bytes() failed", QUIC_EV_CONN_TXPKT, qc);
+ goto err;
+ }
+
+ quic_pin_cid_to_tid(cid->cid.data, tid);
+ if (quic_stateless_reset_token_init(qc, cid) != 1) {
+ TRACE_ERROR("quic_stateless_reset_token_init() failed", QUIC_EV_CONN_TXPKT, qc);
+ goto err;
+ }
+
+ cid->qc = qc;
+
+ cid->seq_num.key = seq_num;
+ cid->retire_prior_to = 0;
+ /* insert the allocated CID in the quic_conn tree */
+ eb64_insert(root, &cid->seq_num);
+
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return cid;
+
+ err:
+ pool_free(pool_head_quic_connection_id, cid);
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return NULL;
+}
+
+/* Build all the frames which must be sent just after the handshake have succeeded.
+ * This is essentially NEW_CONNECTION_ID frames. A QUIC server must also send
+ * a HANDSHAKE_DONE frame.
+ * Return 1 if succeeded, 0 if not.
+ */
+static int quic_build_post_handshake_frames(struct quic_conn *qc)
+{
+ int ret = 0, i, first, max;
+ struct quic_enc_level *qel;
+ struct quic_frame *frm, *frmbak;
+ struct list frm_list = LIST_HEAD_INIT(frm_list);
+ struct eb64_node *node;
+
+ TRACE_ENTER(QUIC_EV_CONN_IO_CB, qc);
+
+ qel = &qc->els[QUIC_TLS_ENC_LEVEL_APP];
+ /* Only servers must send a HANDSHAKE_DONE frame. */
+ if (qc_is_listener(qc)) {
+ frm = pool_zalloc(pool_head_quic_frame);
+ if (!frm) {
+ TRACE_ERROR("frame allocation error", QUIC_EV_CONN_IO_CB, qc);
+ goto leave;
+ }
+
+ LIST_INIT(&frm->reflist);
+ frm->type = QUIC_FT_HANDSHAKE_DONE;
+ LIST_APPEND(&frm_list, &frm->list);
+ }
+
+ /* Initialize <max> connection IDs minus one: there is
+ * already one connection ID used for the current connection.
+ */
+ first = 1;
+ max = qc->tx.params.active_connection_id_limit;
+
+ /* TODO: check limit */
+ for (i = first; i < max; i++) {
+ struct quic_connection_id *cid;
+
+ frm = pool_zalloc(pool_head_quic_frame);
+ if (!frm) {
+ TRACE_ERROR("frame allocation error", QUIC_EV_CONN_IO_CB, qc);
+ goto err;
+ }
+
+ LIST_INIT(&frm->reflist);
+ cid = new_quic_cid(&qc->cids, qc, i);
+ if (!cid) {
+ pool_free(pool_head_quic_frame, frm);
+ TRACE_ERROR("CID allocation error", QUIC_EV_CONN_IO_CB, qc);
+ goto err;
+ }
+
+ /* insert the allocated CID in the receiver datagram handler tree */
+ ebmb_insert(&quic_dghdlrs[tid].cids, &cid->node, cid->cid.len);
+
+ quic_connection_id_to_frm_cpy(frm, cid);
+ LIST_APPEND(&frm_list, &frm->list);
+ }
+
+ LIST_SPLICE(&qel->pktns->tx.frms, &frm_list);
+ qc->flags |= QUIC_FL_CONN_POST_HANDSHAKE_FRAMES_BUILT;
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_IO_CB, qc);
+ return ret;
+
+ err:
+ /* free the frames */
+ list_for_each_entry_safe(frm, frmbak, &frm_list, list)
+ pool_free(pool_head_quic_frame, frm);
+
+ node = eb64_lookup_ge(&qc->cids, first);
+ while (node) {
+ struct quic_connection_id *cid;
+
+ cid = eb64_entry(node, struct quic_connection_id, seq_num);
+ if (cid->seq_num.key >= max)
+ break;
+
+ node = eb64_next(node);
+ ebmb_delete(&cid->node);
+ eb64_delete(&cid->seq_num);
+ pool_free(pool_head_quic_connection_id, cid);
+ }
+ goto leave;
+}
+
+/* Deallocate <l> list of ACK ranges. */
+void quic_free_arngs(struct quic_conn *qc, struct quic_arngs *arngs)
+{
+ struct eb64_node *n;
+ struct quic_arng_node *ar;
+
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ n = eb64_first(&arngs->root);
+ while (n) {
+ struct eb64_node *next;
+
+ ar = eb64_entry(n, struct quic_arng_node, first);
+ next = eb64_next(n);
+ eb64_delete(n);
+ pool_free(pool_head_quic_arng, ar);
+ n = next;
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/* Return the gap value between <p> and <q> ACK ranges where <q> follows <p> in
+ * descending order.
+ */
+static inline size_t sack_gap(struct quic_arng_node *p,
+ struct quic_arng_node *q)
+{
+ return p->first.key - q->last - 2;
+}
+
+
+/* Remove the last elements of <ack_ranges> list of ack range updating its
+ * encoded size until it goes below <limit>.
+ * Returns 1 if succeeded, 0 if not (no more element to remove).
+ */
+static int quic_rm_last_ack_ranges(struct quic_conn *qc,
+ struct quic_arngs *arngs, size_t limit)
+{
+ int ret = 0;
+ struct eb64_node *last, *prev;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ last = eb64_last(&arngs->root);
+ while (last && arngs->enc_sz > limit) {
+ struct quic_arng_node *last_node, *prev_node;
+
+ prev = eb64_prev(last);
+ if (!prev) {
+ TRACE_DEVEL("<last> not found", QUIC_EV_CONN_TXPKT, qc);
+ goto out;
+ }
+
+ last_node = eb64_entry(last, struct quic_arng_node, first);
+ prev_node = eb64_entry(prev, struct quic_arng_node, first);
+ arngs->enc_sz -= quic_int_getsize(last_node->last - last_node->first.key);
+ arngs->enc_sz -= quic_int_getsize(sack_gap(prev_node, last_node));
+ arngs->enc_sz -= quic_decint_size_diff(arngs->sz);
+ --arngs->sz;
+ eb64_delete(last);
+ pool_free(pool_head_quic_arng, last);
+ last = prev;
+ }
+
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return ret;
+}
+
+/* Set the encoded size of <arngs> QUIC ack ranges. */
+static void quic_arngs_set_enc_sz(struct quic_conn *qc, struct quic_arngs *arngs)
+{
+ struct eb64_node *node, *next;
+ struct quic_arng_node *ar, *ar_next;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ node = eb64_last(&arngs->root);
+ if (!node)
+ goto leave;
+
+ ar = eb64_entry(node, struct quic_arng_node, first);
+ arngs->enc_sz = quic_int_getsize(ar->last) +
+ quic_int_getsize(ar->last - ar->first.key) + quic_int_getsize(arngs->sz - 1);
+
+ while ((next = eb64_prev(node))) {
+ ar_next = eb64_entry(next, struct quic_arng_node, first);
+ arngs->enc_sz += quic_int_getsize(sack_gap(ar, ar_next)) +
+ quic_int_getsize(ar_next->last - ar_next->first.key);
+ node = next;
+ ar = eb64_entry(node, struct quic_arng_node, first);
+ }
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+}
+
+/* Insert <ar> ack range into <argns> tree of ack ranges.
+ * Returns the ack range node which has been inserted if succeeded, NULL if not.
+ */
+static inline
+struct quic_arng_node *quic_insert_new_range(struct quic_conn *qc,
+ struct quic_arngs *arngs,
+ struct quic_arng *ar)
+{
+ struct quic_arng_node *new_ar;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT, qc);
+
+ new_ar = pool_alloc(pool_head_quic_arng);
+ if (!new_ar) {
+ TRACE_ERROR("ack range allocation failed", QUIC_EV_CONN_RXPKT, qc);
+ goto leave;
+ }
+
+ new_ar->first.key = ar->first;
+ new_ar->last = ar->last;
+ eb64_insert(&arngs->root, &new_ar->first);
+ arngs->sz++;
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT, qc);
+ return new_ar;
+}
+
+/* Update <arngs> tree of ACK ranges with <ar> as new ACK range value.
+ * Note that this function computes the number of bytes required to encode
+ * this tree of ACK ranges in descending order.
+ *
+ * Descending order
+ * ------------->
+ * range1 range2
+ * ..........|--------|..............|--------|
+ * ^ ^ ^ ^
+ * | | | |
+ * last1 first1 last2 first2
+ * ..........+--------+--------------+--------+......
+ * diff1 gap12 diff2
+ *
+ * To encode the previous list of ranges we must encode integers as follows in
+ * descending order:
+ * enc(last2),enc(diff2),enc(gap12),enc(diff1)
+ * with diff1 = last1 - first1
+ * diff2 = last2 - first2
+ * gap12 = first1 - last2 - 2 (>= 0)
+ *
+
+returns 0 on error
+
+ */
+int quic_update_ack_ranges_list(struct quic_conn *qc,
+ struct quic_arngs *arngs,
+ struct quic_arng *ar)
+{
+ int ret = 0;
+ struct eb64_node *le;
+ struct quic_arng_node *new_node;
+ struct eb64_node *new;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT, qc);
+
+ new = NULL;
+ if (eb_is_empty(&arngs->root)) {
+ new_node = quic_insert_new_range(qc, arngs, ar);
+ if (new_node)
+ ret = 1;
+
+ goto leave;
+ }
+
+ le = eb64_lookup_le(&arngs->root, ar->first);
+ if (!le) {
+ new_node = quic_insert_new_range(qc, arngs, ar);
+ if (!new_node)
+ goto leave;
+
+ new = &new_node->first;
+ }
+ else {
+ struct quic_arng_node *le_ar =
+ eb64_entry(le, struct quic_arng_node, first);
+
+ /* Already existing range */
+ if (le_ar->last >= ar->last) {
+ ret = 1;
+ }
+ else if (le_ar->last + 1 >= ar->first) {
+ le_ar->last = ar->last;
+ new = le;
+ new_node = le_ar;
+ }
+ else {
+ new_node = quic_insert_new_range(qc, arngs, ar);
+ if (!new_node)
+ goto leave;
+
+ new = &new_node->first;
+ }
+ }
+
+ /* Verify that the new inserted node does not overlap the nodes
+ * which follow it.
+ */
+ if (new) {
+ struct eb64_node *next;
+ struct quic_arng_node *next_node;
+
+ while ((next = eb64_next(new))) {
+ next_node =
+ eb64_entry(next, struct quic_arng_node, first);
+ if (new_node->last + 1 < next_node->first.key)
+ break;
+
+ if (next_node->last > new_node->last)
+ new_node->last = next_node->last;
+ eb64_delete(next);
+ pool_free(pool_head_quic_arng, next_node);
+ /* Decrement the size of these ranges. */
+ arngs->sz--;
+ }
+ }
+
+ ret = 1;
+ leave:
+ quic_arngs_set_enc_sz(qc, arngs);
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT, qc);
+ return ret;
+}
+/* Remove the header protection of packets at <el> encryption level.
+ * Always succeeds.
+ */
+static inline void qc_rm_hp_pkts(struct quic_conn *qc, struct quic_enc_level *el)
+{
+ struct quic_tls_ctx *tls_ctx;
+ struct quic_rx_packet *pqpkt, *pkttmp;
+ struct quic_enc_level *app_qel;
+
+ TRACE_ENTER(QUIC_EV_CONN_ELRMHP, qc);
+ app_qel = &qc->els[QUIC_TLS_ENC_LEVEL_APP];
+ /* A server must not process incoming 1-RTT packets before the handshake is complete. */
+ if (el == app_qel && qc_is_listener(qc) && qc->state < QUIC_HS_ST_COMPLETE) {
+ TRACE_DEVEL("hp not removed (handshake not completed)",
+ QUIC_EV_CONN_ELRMHP, qc);
+ goto out;
+ }
+ tls_ctx = &el->tls_ctx;
+ list_for_each_entry_safe(pqpkt, pkttmp, &el->rx.pqpkts, list) {
+ if (!qc_do_rm_hp(qc, pqpkt, tls_ctx, el->pktns->rx.largest_pn,
+ pqpkt->data + pqpkt->pn_offset, pqpkt->data)) {
+ TRACE_ERROR("hp removing error", QUIC_EV_CONN_ELRMHP, qc);
+ }
+ else {
+ /* The AAD includes the packet number field */
+ pqpkt->aad_len = pqpkt->pn_offset + pqpkt->pnl;
+ /* Store the packet into the tree of packets to decrypt. */
+ pqpkt->pn_node.key = pqpkt->pn;
+ eb64_insert(&el->rx.pkts, &pqpkt->pn_node);
+ quic_rx_packet_refinc(pqpkt);
+ TRACE_DEVEL("hp removed", QUIC_EV_CONN_ELRMHP, qc, pqpkt);
+ }
+ LIST_DELETE(&pqpkt->list);
+ quic_rx_packet_refdec(pqpkt);
+ }
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_ELRMHP, qc);
+}
+
+/* Process all the CRYPTO frame at <el> encryption level. This is the
+ * responsability of the called to ensure there exists a CRYPTO data
+ * stream for this level.
+ * Return 1 if succeeded, 0 if not.
+ */
+static inline int qc_treat_rx_crypto_frms(struct quic_conn *qc,
+ struct quic_enc_level *el,
+ struct ssl_sock_ctx *ctx)
+{
+ int ret = 0;
+ struct ncbuf *ncbuf;
+ struct quic_cstream *cstream = el->cstream;
+ ncb_sz_t data;
+
+ TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc, el);
+
+ BUG_ON(!cstream);
+ ncbuf = &cstream->rx.ncbuf;
+ if (ncb_is_null(ncbuf))
+ goto done;
+
+ /* TODO not working if buffer is wrapping */
+ while ((data = ncb_data(ncbuf, 0))) {
+ const unsigned char *cdata = (const unsigned char *)ncb_head(ncbuf);
+
+ if (!qc_provide_cdata(el, ctx, cdata, data, NULL, NULL))
+ goto leave;
+
+ cstream->rx.offset += data;
+ TRACE_DEVEL("buffered crypto data were provided to TLS stack",
+ QUIC_EV_CONN_PHPKTS, qc, el);
+ }
+
+ done:
+ ret = 1;
+ leave:
+ if (!ncb_is_null(ncbuf) && ncb_is_empty(ncbuf)) {
+ TRACE_DEVEL("freeing crypto buf", QUIC_EV_CONN_PHPKTS, qc, el);
+ quic_free_ncbuf(ncbuf);
+ }
+ TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, qc);
+ return ret;
+}
+
+/* Process all the packets at <el> and <next_el> encryption level.
+ * This is the caller responsibility to check that <cur_el> is different of <next_el>
+ * as pointer value.
+ * Return 1 if succeeded, 0 if not.
+ */
+int qc_treat_rx_pkts(struct quic_conn *qc, struct quic_enc_level *cur_el,
+ struct quic_enc_level *next_el)
+{
+ int ret = 0;
+ struct eb64_node *node;
+ int64_t largest_pn = -1;
+ unsigned int largest_pn_time_received = 0;
+ struct quic_enc_level *qel = cur_el;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT, qc);
+ qel = cur_el;
+ next_tel:
+ if (!qel)
+ goto out;
+
+ node = eb64_first(&qel->rx.pkts);
+ while (node) {
+ struct quic_rx_packet *pkt;
+
+ pkt = eb64_entry(node, struct quic_rx_packet, pn_node);
+ TRACE_DATA("new packet", QUIC_EV_CONN_RXPKT,
+ qc, pkt, NULL, qc->xprt_ctx->ssl);
+ if (!qc_pkt_decrypt(pkt, qel, qc)) {
+ /* Drop the packet */
+ TRACE_ERROR("packet decryption failed -> dropped",
+ QUIC_EV_CONN_RXPKT, qc, pkt);
+ }
+ else {
+ if (!qc_parse_pkt_frms(qc, pkt, qel)) {
+ /* Drop the packet */
+ TRACE_ERROR("packet parsing failed -> dropped",
+ QUIC_EV_CONN_RXPKT, qc, pkt);
+ HA_ATOMIC_INC(&qc->prx_counters->dropped_parsing);
+ }
+ else {
+ struct quic_arng ar = { .first = pkt->pn, .last = pkt->pn };
+
+ if (pkt->flags & QUIC_FL_RX_PACKET_ACK_ELICITING) {
+ qel->pktns->flags |= QUIC_FL_PKTNS_ACK_REQUIRED;
+ qel->pktns->rx.nb_aepkts_since_last_ack++;
+ qc_idle_timer_rearm(qc, 1);
+ }
+ if (pkt->pn > largest_pn) {
+ largest_pn = pkt->pn;
+ largest_pn_time_received = pkt->time_received;
+ }
+ /* Update the list of ranges to acknowledge. */
+ if (!quic_update_ack_ranges_list(qc, &qel->pktns->rx.arngs, &ar))
+ TRACE_ERROR("Could not update ack range list",
+ QUIC_EV_CONN_RXPKT, qc);
+ }
+ }
+ node = eb64_next(node);
+ eb64_delete(&pkt->pn_node);
+ quic_rx_packet_refdec(pkt);
+ }
+
+ if (largest_pn != -1 && largest_pn > qel->pktns->rx.largest_pn) {
+ /* Update the largest packet number. */
+ qel->pktns->rx.largest_pn = largest_pn;
+ /* Update the largest acknowledged packet timestamps */
+ qel->pktns->rx.largest_time_received = largest_pn_time_received;
+ qel->pktns->flags |= QUIC_FL_PKTNS_NEW_LARGEST_PN;
+ }
+
+ if (qel->cstream && !qc_treat_rx_crypto_frms(qc, qel, qc->xprt_ctx)) {
+ // trace already emitted by function above
+ goto leave;
+ }
+
+ if (qel == cur_el) {
+ BUG_ON(qel == next_el);
+ qel = next_el;
+ largest_pn = -1;
+ goto next_tel;
+ }
+
+ out:
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT, qc);
+ return ret;
+}
+
+/* Check if it's possible to remove header protection for packets related to
+ * encryption level <qel>. If <qel> is NULL, assume it's false.
+ *
+ * Return true if the operation is possible else false.
+ */
+static int qc_qel_may_rm_hp(struct quic_conn *qc, struct quic_enc_level *qel)
+{
+ int ret = 0;
+ enum quic_tls_enc_level tel;
+
+ TRACE_ENTER(QUIC_EV_CONN_TRMHP, qc);
+
+ if (!qel)
+ goto cant_rm_hp;
+
+ tel = ssl_to_quic_enc_level(qel->level);
+
+ /* check if tls secrets are available */
+ if (qel->tls_ctx.flags & QUIC_FL_TLS_SECRETS_DCD) {
+ TRACE_DEVEL("Discarded keys", QUIC_EV_CONN_TRMHP, qc);
+ goto cant_rm_hp;
+ }
+
+ if (!quic_tls_has_rx_sec(qel)) {
+ TRACE_DEVEL("non available secrets", QUIC_EV_CONN_TRMHP, qc);
+ goto cant_rm_hp;
+ }
+
+ if (tel == QUIC_TLS_ENC_LEVEL_APP && qc->state < QUIC_HS_ST_COMPLETE) {
+ TRACE_DEVEL("handshake not complete", QUIC_EV_CONN_TRMHP, qc);
+ goto cant_rm_hp;
+ }
+
+ /* check if the connection layer is ready before using app level */
+ if ((tel == QUIC_TLS_ENC_LEVEL_APP || tel == QUIC_TLS_ENC_LEVEL_EARLY_DATA) &&
+ qc->mux_state == QC_MUX_NULL) {
+ TRACE_DEVEL("connection layer not ready", QUIC_EV_CONN_TRMHP, qc);
+ goto cant_rm_hp;
+ }
+
+ ret = 1;
+ cant_rm_hp:
+ TRACE_LEAVE(QUIC_EV_CONN_TRMHP, qc);
+ return ret;
+}
+
+/* Try to send application frames from list <frms> on connection <qc>.
+ *
+ * Use qc_send_app_probing wrapper when probing with old data.
+ *
+ * Returns 1 on success. Some data might not have been sent due to congestion,
+ * in this case they are left in <frms> input list. The caller may subscribe on
+ * quic-conn to retry later.
+ *
+ * Returns 0 on critical error.
+ * TODO review and classify more distinctly transient from definitive errors to
+ * allow callers to properly handle it.
+ */
+static int qc_send_app_pkts(struct quic_conn *qc, struct list *frms)
+{
+ int status = 0;
+ struct buffer *buf;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ buf = qc_txb_alloc(qc);
+ if (!buf) {
+ TRACE_ERROR("buffer allocation failed", QUIC_EV_CONN_TXPKT, qc);
+ goto leave;
+ }
+
+ /* Prepare and send packets until we could not further prepare packets. */
+ while (1) {
+ int ret;
+ /* Currently buf cannot be non-empty at this stage. Even if a
+ * previous sendto() has failed it is emptied to simulate
+ * packet emission and rely on QUIC lost detection to try to
+ * emit it.
+ */
+ BUG_ON_HOT(b_data(buf));
+ b_reset(buf);
+
+ ret = qc_prep_app_pkts(qc, buf, frms);
+ if (ret == -1)
+ goto err;
+ else if (ret == 0)
+ goto out;
+
+ if (!qc_send_ppkts(buf, qc->xprt_ctx))
+ goto err;
+ }
+
+ out:
+ status = 1;
+ qc_txb_release(qc);
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return status;
+
+ err:
+ qc_txb_release(qc);
+ goto leave;
+}
+
+/* Try to send application frames from list <frms> on connection <qc>. Use this
+ * function when probing is required.
+ *
+ * Returns the result from qc_send_app_pkts function.
+ */
+static forceinline int qc_send_app_probing(struct quic_conn *qc,
+ struct list *frms)
+{
+ int ret;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ TRACE_STATE("preparing old data (probing)", QUIC_EV_CONN_TXPKT, qc);
+ qc->flags |= QUIC_FL_CONN_RETRANS_OLD_DATA;
+ ret = qc_send_app_pkts(qc, frms);
+ qc->flags &= ~QUIC_FL_CONN_RETRANS_OLD_DATA;
+
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return ret;
+}
+
+/* Try to send application frames from list <frms> on connection <qc>. This
+ * function is provided for MUX upper layer usage only.
+ *
+ * Returns the result from qc_send_app_pkts function.
+ */
+int qc_send_mux(struct quic_conn *qc, struct list *frms)
+{
+ int ret;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+ BUG_ON(qc->mux_state != QC_MUX_READY); /* Only MUX can uses this function so it must be ready. */
+
+ TRACE_STATE("preparing data (from MUX)", QUIC_EV_CONN_TXPKT, qc);
+ qc->flags |= QUIC_FL_CONN_TX_MUX_CONTEXT;
+ ret = qc_send_app_pkts(qc, frms);
+ qc->flags &= ~QUIC_FL_CONN_TX_MUX_CONTEXT;
+
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return ret;
+}
+
+/* Sends handshake packets from up to two encryption levels <tel> and <next_te>
+ * with <tel_frms> and <next_tel_frms> as frame list respectively for <qc>
+ * QUIC connection. <old_data> is used as boolean to send data already sent but
+ * not already acknowledged (in flight).
+ * Returns 1 if succeeded, 0 if not.
+ */
+int qc_send_hdshk_pkts(struct quic_conn *qc, int old_data,
+ enum quic_tls_enc_level tel, struct list *tel_frms,
+ enum quic_tls_enc_level next_tel, struct list *next_tel_frms)
+{
+ int ret, status = 0;
+ struct buffer *buf = qc_txb_alloc(qc);
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ if (!buf) {
+ TRACE_ERROR("buffer allocation failed", QUIC_EV_CONN_TXPKT, qc);
+ goto leave;
+ }
+
+ /* Currently buf cannot be non-empty at this stage. Even if a previous
+ * sendto() has failed it is emptied to simulate packet emission and
+ * rely on QUIC lost detection to try to emit it.
+ */
+ BUG_ON_HOT(b_data(buf));
+ b_reset(buf);
+
+ if (old_data) {
+ TRACE_STATE("old data for probing asked", QUIC_EV_CONN_TXPKT, qc);
+ qc->flags |= QUIC_FL_CONN_RETRANS_OLD_DATA;
+ }
+
+ ret = qc_prep_pkts(qc, buf, tel, tel_frms, next_tel, next_tel_frms);
+ if (ret == -1)
+ goto out;
+ else if (ret == 0)
+ goto skip_send;
+
+ if (!qc_send_ppkts(buf, qc->xprt_ctx))
+ goto out;
+
+ skip_send:
+ status = 1;
+ out:
+ TRACE_STATE("no more need old data for probing", QUIC_EV_CONN_TXPKT, qc);
+ qc->flags &= ~QUIC_FL_CONN_RETRANS_OLD_DATA;
+ qc_txb_release(qc);
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return status;
+}
+
+/* Retransmit up to two datagrams depending on packet number space */
+static void qc_dgrams_retransmit(struct quic_conn *qc)
+{
+ struct quic_enc_level *iqel = &qc->els[QUIC_TLS_ENC_LEVEL_INITIAL];
+ struct quic_enc_level *hqel = &qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE];
+ struct quic_enc_level *aqel = &qc->els[QUIC_TLS_ENC_LEVEL_APP];
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ if (iqel->pktns->flags & QUIC_FL_PKTNS_PROBE_NEEDED) {
+ int i;
+
+ for (i = 0; i < QUIC_MAX_NB_PTO_DGRAMS; i++) {
+ struct list ifrms = LIST_HEAD_INIT(ifrms);
+ struct list hfrms = LIST_HEAD_INIT(hfrms);
+
+ qc_prep_hdshk_fast_retrans(qc, &ifrms, &hfrms);
+ TRACE_DEVEL("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, &ifrms);
+ TRACE_DEVEL("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, &hfrms);
+ if (!LIST_ISEMPTY(&ifrms)) {
+ iqel->pktns->tx.pto_probe = 1;
+ if (!LIST_ISEMPTY(&hfrms))
+ hqel->pktns->tx.pto_probe = 1;
+ qc_send_hdshk_pkts(qc, 1, QUIC_TLS_ENC_LEVEL_INITIAL, &ifrms,
+ QUIC_TLS_ENC_LEVEL_HANDSHAKE, &hfrms);
+ /* Put back unsent frames in their packet number spaces */
+ LIST_SPLICE(&iqel->pktns->tx.frms, &ifrms);
+ LIST_SPLICE(&hqel->pktns->tx.frms, &hfrms);
+ }
+ }
+ TRACE_STATE("no more need to probe Initial packet number space",
+ QUIC_EV_CONN_TXPKT, qc);
+ iqel->pktns->flags &= ~QUIC_FL_PKTNS_PROBE_NEEDED;
+ hqel->pktns->flags &= ~QUIC_FL_PKTNS_PROBE_NEEDED;
+ }
+ else {
+ int i;
+
+ if (hqel->pktns->flags & QUIC_FL_PKTNS_PROBE_NEEDED) {
+ hqel->pktns->tx.pto_probe = 0;
+ for (i = 0; i < QUIC_MAX_NB_PTO_DGRAMS; i++) {
+ struct list frms1 = LIST_HEAD_INIT(frms1);
+
+ qc_prep_fast_retrans(qc, hqel, &frms1, NULL);
+ TRACE_DEVEL("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, &frms1);
+ if (!LIST_ISEMPTY(&frms1)) {
+ hqel->pktns->tx.pto_probe = 1;
+ qc_send_hdshk_pkts(qc, 1, QUIC_TLS_ENC_LEVEL_HANDSHAKE, &frms1,
+ QUIC_TLS_ENC_LEVEL_NONE, NULL);
+ /* Put back unsent frames into their packet number spaces */
+ LIST_SPLICE(&hqel->pktns->tx.frms, &frms1);
+ }
+ }
+ TRACE_STATE("no more need to probe Handshake packet number space",
+ QUIC_EV_CONN_TXPKT, qc);
+ hqel->pktns->flags &= ~QUIC_FL_PKTNS_PROBE_NEEDED;
+ }
+ else if (aqel->pktns->flags & QUIC_FL_PKTNS_PROBE_NEEDED) {
+ struct list frms2 = LIST_HEAD_INIT(frms2);
+ struct list frms1 = LIST_HEAD_INIT(frms1);
+
+ aqel->pktns->tx.pto_probe = 0;
+ qc_prep_fast_retrans(qc, aqel, &frms1, &frms2);
+ TRACE_PROTO("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, &frms1);
+ TRACE_PROTO("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, &frms2);
+ if (!LIST_ISEMPTY(&frms1)) {
+ aqel->pktns->tx.pto_probe = 1;
+ qc_send_app_probing(qc, &frms1);
+ /* Put back unsent frames into their packet number spaces */
+ LIST_SPLICE(&aqel->pktns->tx.frms, &frms1);
+ }
+ if (!LIST_ISEMPTY(&frms2)) {
+ aqel->pktns->tx.pto_probe = 1;
+ qc_send_app_probing(qc, &frms2);
+ /* Put back unsent frames into their packet number spaces */
+ LIST_SPLICE(&aqel->pktns->tx.frms, &frms2);
+ }
+ TRACE_STATE("no more need to probe 01RTT packet number space",
+ QUIC_EV_CONN_TXPKT, qc);
+ aqel->pktns->flags &= ~QUIC_FL_PKTNS_PROBE_NEEDED;
+ }
+ }
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+}
+
+/* QUIC connection packet handler task (post handshake) */
+struct task *quic_conn_app_io_cb(struct task *t, void *context, unsigned int state)
+{
+ struct quic_conn *qc = context;
+ struct quic_enc_level *qel;
+
+ qel = &qc->els[QUIC_TLS_ENC_LEVEL_APP];
+
+ TRACE_ENTER(QUIC_EV_CONN_IO_CB, qc);
+ TRACE_STATE("connection handshake state", QUIC_EV_CONN_IO_CB, qc, &qc->state);
+
+ /* Retranmissions */
+ if (qc->flags & QUIC_FL_CONN_RETRANS_NEEDED) {
+ TRACE_STATE("retransmission needed", QUIC_EV_CONN_IO_CB, qc);
+ qc->flags &= ~QUIC_FL_CONN_RETRANS_NEEDED;
+ qc_dgrams_retransmit(qc);
+ }
+
+ if (!LIST_ISEMPTY(&qel->rx.pqpkts) && qc_qel_may_rm_hp(qc, qel))
+ qc_rm_hp_pkts(qc, qel);
+
+ if (!qc_treat_rx_pkts(qc, qel, NULL)) {
+ TRACE_DEVEL("qc_treat_rx_pkts() failed", QUIC_EV_CONN_IO_CB, qc);
+ goto out;
+ }
+
+ if (qc->flags & QUIC_FL_CONN_TO_KILL) {
+ TRACE_DEVEL("connection to be killed", QUIC_EV_CONN_IO_CB, qc);
+ goto out;
+ }
+
+ if ((qc->flags & QUIC_FL_CONN_DRAINING) &&
+ !(qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE)) {
+ TRACE_STATE("draining connection (must not send packets)", QUIC_EV_CONN_IO_CB, qc);
+ goto out;
+ }
+
+ /* XXX TODO: how to limit the list frames to send */
+ if (!qc_send_app_pkts(qc, &qel->pktns->tx.frms)) {
+ TRACE_DEVEL("qc_send_app_pkts() failed", QUIC_EV_CONN_IO_CB, qc);
+ goto out;
+ }
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_IO_CB, qc);
+ return t;
+}
+
+/* Returns a boolean if <qc> needs to emit frames for <qel> encryption level. */
+static int qc_need_sending(struct quic_conn *qc, struct quic_enc_level *qel)
+{
+ return (qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE) ||
+ (qel->pktns->flags & QUIC_FL_PKTNS_ACK_REQUIRED) ||
+ qel->pktns->tx.pto_probe ||
+ !LIST_ISEMPTY(&qel->pktns->tx.frms);
+}
+
+/* QUIC connection packet handler task. */
+struct task *quic_conn_io_cb(struct task *t, void *context, unsigned int state)
+{
+ int ret, ssl_err;
+ struct quic_conn *qc = context;
+ enum quic_tls_enc_level tel, next_tel;
+ struct quic_enc_level *qel, *next_qel;
+ /* Early-data encryption level */
+ struct quic_enc_level *eqel;
+ struct buffer *buf = NULL;
+ int st, zero_rtt;
+
+ TRACE_ENTER(QUIC_EV_CONN_IO_CB, qc);
+ eqel = &qc->els[QUIC_TLS_ENC_LEVEL_EARLY_DATA];
+ st = qc->state;
+ TRACE_PROTO("connection state", QUIC_EV_CONN_IO_CB, qc, &st);
+
+ /* Retranmissions */
+ if (qc->flags & QUIC_FL_CONN_RETRANS_NEEDED) {
+ TRACE_DEVEL("retransmission needed", QUIC_EV_CONN_PHPKTS, qc);
+ qc->flags &= ~QUIC_FL_CONN_RETRANS_NEEDED;
+ qc_dgrams_retransmit(qc);
+ }
+
+ ssl_err = SSL_ERROR_NONE;
+ zero_rtt = st < QUIC_HS_ST_COMPLETE &&
+ quic_tls_has_rx_sec(eqel) &&
+ (!LIST_ISEMPTY(&eqel->rx.pqpkts) || qc_el_rx_pkts(eqel));
+ start:
+ if (st >= QUIC_HS_ST_COMPLETE &&
+ qc_el_rx_pkts(&qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE])) {
+ TRACE_DEVEL("remaining Handshake packets", QUIC_EV_CONN_PHPKTS, qc);
+ /* There may be remaining Handshake packets to treat and acknowledge. */
+ tel = QUIC_TLS_ENC_LEVEL_HANDSHAKE;
+ next_tel = QUIC_TLS_ENC_LEVEL_APP;
+ }
+ else if (!quic_get_tls_enc_levels(&tel, &next_tel, qc, st, zero_rtt))
+ goto out;
+
+ qel = &qc->els[tel];
+ next_qel = next_tel == QUIC_TLS_ENC_LEVEL_NONE ? NULL : &qc->els[next_tel];
+
+ next_level:
+ /* Treat packets waiting for header packet protection decryption */
+ if (!LIST_ISEMPTY(&qel->rx.pqpkts) && qc_qel_may_rm_hp(qc, qel))
+ qc_rm_hp_pkts(qc, qel);
+
+ if (!qc_treat_rx_pkts(qc, qel, next_qel))
+ goto out;
+
+ if (qc->flags & QUIC_FL_CONN_TO_KILL) {
+ TRACE_DEVEL("connection to be killed", QUIC_EV_CONN_PHPKTS, qc);
+ goto out;
+ }
+
+ if ((qc->flags & QUIC_FL_CONN_DRAINING) &&
+ !(qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE))
+ goto out;
+
+ zero_rtt = st < QUIC_HS_ST_COMPLETE &&
+ quic_tls_has_rx_sec(eqel) &&
+ (!LIST_ISEMPTY(&eqel->rx.pqpkts) || qc_el_rx_pkts(eqel));
+ if (next_qel && next_qel == eqel && zero_rtt) {
+ TRACE_DEVEL("select 0RTT as next encryption level",
+ QUIC_EV_CONN_PHPKTS, qc);
+ qel = next_qel;
+ next_qel = NULL;
+ goto next_level;
+ }
+
+ st = qc->state;
+ if (st >= QUIC_HS_ST_COMPLETE) {
+ if (!(qc->flags & QUIC_FL_CONN_POST_HANDSHAKE_FRAMES_BUILT) &&
+ !quic_build_post_handshake_frames(qc))
+ goto out;
+
+ if (!(qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE].tls_ctx.flags &
+ QUIC_FL_TLS_SECRETS_DCD)) {
+ /* Discard the Handshake keys. */
+ quic_tls_discard_keys(&qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE]);
+ TRACE_PROTO("discarding Handshake pktns", QUIC_EV_CONN_PHPKTS, qc);
+ quic_pktns_discard(qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE].pktns, qc);
+ qc_set_timer(qc);
+ qc_el_rx_pkts_del(&qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE]);
+ qc_release_pktns_frms(qc, qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE].pktns);
+ }
+
+ if (qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE].pktns->flags & QUIC_FL_PKTNS_ACK_REQUIRED) {
+ /* There may be remaining handshake to build (acks) */
+ st = QUIC_HS_ST_SERVER_HANDSHAKE;
+ }
+ }
+
+ /* A listener does not send any O-RTT packet. O-RTT packet number space must not
+ * be considered.
+ */
+ if (!quic_get_tls_enc_levels(&tel, &next_tel, qc, st, 0))
+ goto out;
+
+ if (!qc_need_sending(qc, qel) &&
+ (!next_qel || !qc_need_sending(qc, next_qel))) {
+ goto skip_send;
+ }
+
+ buf = qc_txb_alloc(qc);
+ if (!buf)
+ goto out;
+
+ /* Currently buf cannot be non-empty at this stage. Even if a previous
+ * sendto() has failed it is emptied to simulate packet emission and
+ * rely on QUIC lost detection to try to emit it.
+ */
+ BUG_ON_HOT(b_data(buf));
+ b_reset(buf);
+
+ ret = qc_prep_pkts(qc, buf, tel, &qc->els[tel].pktns->tx.frms,
+ next_tel, &qc->els[next_tel].pktns->tx.frms);
+ if (ret == -1)
+ goto out;
+ else if (ret == 0)
+ goto skip_send;
+
+ if (!qc_send_ppkts(buf, qc->xprt_ctx))
+ goto out;
+
+ skip_send:
+ /* Check if there is something to do for the next level.
+ */
+ if (next_qel && next_qel != qel &&
+ quic_tls_has_rx_sec(next_qel) &&
+ (!LIST_ISEMPTY(&next_qel->rx.pqpkts) || qc_el_rx_pkts(next_qel))) {
+ qel = next_qel;
+ next_qel = NULL;
+ goto next_level;
+ }
+
+ out:
+ qc_txb_release(qc);
+ TRACE_LEAVE(QUIC_EV_CONN_IO_CB, qc, &st, &ssl_err);
+ return t;
+}
+
+/* Release the memory allocated for <cs> CRYPTO stream */
+void quic_cstream_free(struct quic_cstream *cs)
+{
+ if (!cs) {
+ /* This is the case for ORTT encryption level */
+ return;
+ }
+
+ quic_free_ncbuf(&cs->rx.ncbuf);
+
+ qc_stream_desc_release(cs->desc);
+ pool_free(pool_head_quic_cstream, cs);
+}
+
+/* Allocate a new QUIC stream for <qc>.
+ * Return it if succeeded, NULL if not.
+ */
+struct quic_cstream *quic_cstream_new(struct quic_conn *qc)
+{
+ struct quic_cstream *cs, *ret_cs = NULL;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT, qc);
+ cs = pool_alloc(pool_head_quic_cstream);
+ if (!cs) {
+ TRACE_ERROR("crypto stream allocation failed", QUIC_EV_CONN_INIT, qc);
+ goto leave;
+ }
+
+ cs->rx.offset = 0;
+ cs->rx.ncbuf = NCBUF_NULL;
+ cs->rx.offset = 0;
+
+ cs->tx.offset = 0;
+ cs->tx.sent_offset = 0;
+ cs->tx.buf = BUF_NULL;
+ cs->desc = qc_stream_desc_new((uint64_t)-1, -1, cs, qc);
+ if (!cs->desc) {
+ TRACE_ERROR("crypto stream allocation failed", QUIC_EV_CONN_INIT, qc);
+ goto err;
+ }
+
+ ret_cs = cs;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc);
+ return ret_cs;
+
+ err:
+ pool_free(pool_head_quic_cstream, cs);
+ goto leave;
+}
+
+/* Uninitialize <qel> QUIC encryption level. Never fails. */
+static void quic_conn_enc_level_uninit(struct quic_conn *qc, struct quic_enc_level *qel)
+{
+ int i;
+
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ for (i = 0; i < qel->tx.crypto.nb_buf; i++) {
+ if (qel->tx.crypto.bufs[i]) {
+ pool_free(pool_head_quic_crypto_buf, qel->tx.crypto.bufs[i]);
+ qel->tx.crypto.bufs[i] = NULL;
+ }
+ }
+ ha_free(&qel->tx.crypto.bufs);
+ quic_cstream_free(qel->cstream);
+
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/* Initialize QUIC TLS encryption level with <level<> as level for <qc> QUIC
+ * connection allocating everything needed.
+ *
+ * Returns 1 if succeeded, 0 if not. On error the caller is responsible to use
+ * quic_conn_enc_level_uninit() to cleanup partially allocated content.
+ */
+static int quic_conn_enc_level_init(struct quic_conn *qc,
+ enum quic_tls_enc_level level)
+{
+ int ret = 0;
+ struct quic_enc_level *qel;
+
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ qel = &qc->els[level];
+ qel->level = quic_to_ssl_enc_level(level);
+ qel->tls_ctx.rx.aead = qel->tls_ctx.tx.aead = NULL;
+ qel->tls_ctx.rx.md = qel->tls_ctx.tx.md = NULL;
+ qel->tls_ctx.rx.hp = qel->tls_ctx.tx.hp = NULL;
+ qel->tls_ctx.flags = 0;
+
+ qel->rx.pkts = EB_ROOT;
+ LIST_INIT(&qel->rx.pqpkts);
+
+ /* Allocate only one buffer. */
+ /* TODO: use a pool */
+ qel->tx.crypto.bufs = malloc(sizeof *qel->tx.crypto.bufs);
+ if (!qel->tx.crypto.bufs)
+ goto leave;
+
+ qel->tx.crypto.bufs[0] = pool_alloc(pool_head_quic_crypto_buf);
+ if (!qel->tx.crypto.bufs[0])
+ goto leave;
+
+ qel->tx.crypto.bufs[0]->sz = 0;
+ qel->tx.crypto.nb_buf = 1;
+
+ qel->tx.crypto.sz = 0;
+ qel->tx.crypto.offset = 0;
+ /* No CRYPTO data for early data TLS encryption level */
+ if (level == QUIC_TLS_ENC_LEVEL_EARLY_DATA)
+ qel->cstream = NULL;
+ else {
+ qel->cstream = quic_cstream_new(qc);
+ if (!qel->cstream)
+ goto leave;
+ }
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+ return ret;
+}
+
+/* Return 1 if <qc> connection may probe the Initial packet number space, 0 if not.
+ * This is not the case if the remote peer address is not validated and if
+ * it cannot send at least QUIC_INITIAL_PACKET_MINLEN bytes.
+ */
+static int qc_may_probe_ipktns(struct quic_conn *qc)
+{
+ return quic_peer_validated_addr(qc) ||
+ (int)(3 * qc->rx.bytes - qc->tx.prep_bytes) >= QUIC_INITIAL_PACKET_MINLEN;
+}
+
+/* Callback called upon loss detection and PTO timer expirations. */
+struct task *qc_process_timer(struct task *task, void *ctx, unsigned int state)
+{
+ struct quic_conn *qc = ctx;
+ struct quic_pktns *pktns;
+
+ TRACE_ENTER(QUIC_EV_CONN_PTIMER, qc,
+ NULL, NULL, &qc->path->ifae_pkts);
+ task->expire = TICK_ETERNITY;
+ pktns = quic_loss_pktns(qc);
+ if (tick_isset(pktns->tx.loss_time)) {
+ struct list lost_pkts = LIST_HEAD_INIT(lost_pkts);
+
+ qc_packet_loss_lookup(pktns, qc, &lost_pkts);
+ if (!LIST_ISEMPTY(&lost_pkts))
+ tasklet_wakeup(qc->wait_event.tasklet);
+ qc_release_lost_pkts(qc, pktns, &lost_pkts, now_ms);
+ qc_set_timer(qc);
+ goto out;
+ }
+
+ if (qc->path->in_flight) {
+ pktns = quic_pto_pktns(qc, qc->state >= QUIC_HS_ST_CONFIRMED, NULL);
+ if (pktns == &qc->pktns[QUIC_TLS_PKTNS_INITIAL]) {
+ if (qc_may_probe_ipktns(qc)) {
+ qc->flags |= QUIC_FL_CONN_RETRANS_NEEDED;
+ pktns->flags |= QUIC_FL_PKTNS_PROBE_NEEDED;
+ TRACE_STATE("needs to probe Initial packet number space", QUIC_EV_CONN_TXPKT, qc);
+ }
+ else {
+ TRACE_STATE("Cannot probe Initial packet number space", QUIC_EV_CONN_TXPKT, qc);
+ }
+ if (qc->pktns[QUIC_TLS_PKTNS_HANDSHAKE].tx.in_flight) {
+ qc->flags |= QUIC_FL_CONN_RETRANS_NEEDED;
+ qc->pktns[QUIC_TLS_PKTNS_HANDSHAKE].flags |= QUIC_FL_PKTNS_PROBE_NEEDED;
+ TRACE_STATE("needs to probe Handshake packet number space", QUIC_EV_CONN_TXPKT, qc);
+ }
+ }
+ else if (pktns == &qc->pktns[QUIC_TLS_PKTNS_HANDSHAKE]) {
+ TRACE_STATE("needs to probe Handshake packet number space", QUIC_EV_CONN_TXPKT, qc);
+ qc->flags |= QUIC_FL_CONN_RETRANS_NEEDED;
+ pktns->flags |= QUIC_FL_PKTNS_PROBE_NEEDED;
+ if (qc->pktns[QUIC_TLS_PKTNS_INITIAL].tx.in_flight) {
+ if (qc_may_probe_ipktns(qc)) {
+ qc->pktns[QUIC_TLS_PKTNS_INITIAL].flags |= QUIC_FL_PKTNS_PROBE_NEEDED;
+ TRACE_STATE("needs to probe Initial packet number space", QUIC_EV_CONN_TXPKT, qc);
+ }
+ else {
+ TRACE_STATE("Cannot probe Initial packet number space", QUIC_EV_CONN_TXPKT, qc);
+ }
+ }
+ }
+ else if (pktns == &qc->pktns[QUIC_TLS_PKTNS_01RTT]) {
+ /* Wake up upper layer if waiting to send new data. */
+ if (qc->subs && qc->subs->events & SUB_RETRY_SEND) {
+ pktns->tx.pto_probe = QUIC_MAX_NB_PTO_DGRAMS;
+ tasklet_wakeup(qc->subs->tasklet);
+ qc->subs->events &= ~SUB_RETRY_SEND;
+ if (!qc->subs->events)
+ qc->subs = NULL;
+ }
+ else {
+ TRACE_STATE("needs to probe 01RTT packet number space", QUIC_EV_CONN_TXPKT, qc);
+ qc->flags |= QUIC_FL_CONN_RETRANS_NEEDED;
+ pktns->flags |= QUIC_FL_PKTNS_PROBE_NEEDED;
+ }
+ }
+ }
+ else if (!qc_is_listener(qc) && qc->state <= QUIC_HS_ST_COMPLETE) {
+ struct quic_enc_level *iel = &qc->els[QUIC_TLS_ENC_LEVEL_INITIAL];
+ struct quic_enc_level *hel = &qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE];
+
+ if (quic_tls_has_tx_sec(hel))
+ hel->pktns->tx.pto_probe = 1;
+ if (quic_tls_has_tx_sec(iel))
+ iel->pktns->tx.pto_probe = 1;
+ }
+
+ tasklet_wakeup(qc->wait_event.tasklet);
+ qc->path->loss.pto_count++;
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_PTIMER, qc, pktns);
+
+ return task;
+}
+
+/* Parse the Retry token from buffer <token> with <end> a pointer to
+ * one byte past the end of this buffer. This will extract the ODCID
+ * which will be stored into <odcid>
+ *
+ * Returns 0 on success else non-zero.
+ */
+static int parse_retry_token(struct quic_conn *qc,
+ const unsigned char *token, const unsigned char *end,
+ struct quic_cid *odcid)
+{
+ int ret = 0;
+ uint64_t odcid_len;
+ uint32_t timestamp;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT, qc);
+
+ if (!quic_dec_int(&odcid_len, &token, end)) {
+ TRACE_ERROR("quic_dec_int() error", QUIC_EV_CONN_LPKT, qc);
+ goto leave;
+ }
+
+ /* RFC 9000 7.2. Negotiating Connection IDs:
+ * When an Initial packet is sent by a client that has not previously
+ * received an Initial or Retry packet from the server, the client
+ * populates the Destination Connection ID field with an unpredictable
+ * value. This Destination Connection ID MUST be at least 8 bytes in length.
+ */
+ if (odcid_len < QUIC_ODCID_MINLEN || odcid_len > QUIC_CID_MAXLEN) {
+ TRACE_ERROR("wrong ODCID length", QUIC_EV_CONN_LPKT, qc);
+ goto leave;
+ }
+
+ if (end - token < odcid_len + sizeof timestamp) {
+ TRACE_ERROR("too long ODCID length", QUIC_EV_CONN_LPKT, qc);
+ goto leave;
+ }
+
+ timestamp = ntohl(read_u32(token + odcid_len));
+ if (timestamp + MS_TO_TICKS(QUIC_RETRY_DURATION_MS) <= now_ms) {
+ TRACE_ERROR("token has expired", QUIC_EV_CONN_LPKT, qc);
+ goto leave;
+ }
+
+ ret = 1;
+ memcpy(odcid->data, token, odcid_len);
+ odcid->len = odcid_len;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc);
+ return !ret;
+}
+
+/* Allocate a new QUIC connection with <version> as QUIC version. <ipv4>
+ * boolean is set to 1 for IPv4 connection, 0 for IPv6. <server> is set to 1
+ * for QUIC servers (or haproxy listeners).
+ * <dcid> is the destination connection ID, <scid> is the source connection ID,
+ * <token> the token found to be used for this connection with <token_len> as
+ * length. Endpoints addresses are specified via <local_addr> and <peer_addr>.
+ * Returns the connection if succeeded, NULL if not.
+ */
+static struct quic_conn *qc_new_conn(const struct quic_version *qv, int ipv4,
+ struct quic_cid *dcid, struct quic_cid *scid,
+ const struct quic_cid *token_odcid,
+ struct sockaddr_storage *local_addr,
+ struct sockaddr_storage *peer_addr,
+ int server, int token, void *owner)
+{
+ int i;
+ struct quic_conn *qc;
+ /* Initial CID. */
+ struct quic_connection_id *icid;
+ char *buf_area = NULL;
+ struct listener *l = NULL;
+ struct quic_cc_algo *cc_algo = NULL;
+ struct quic_tls_ctx *ictx;
+ TRACE_ENTER(QUIC_EV_CONN_INIT);
+ /* TODO replace pool_zalloc by pool_alloc(). This requires special care
+ * to properly initialized internal quic_conn members to safely use
+ * quic_conn_release() on alloc failure.
+ */
+ qc = pool_zalloc(pool_head_quic_conn);
+ if (!qc) {
+ TRACE_ERROR("Could not allocate a new connection", QUIC_EV_CONN_INIT);
+ goto err;
+ }
+
+ /* Initialize in priority qc members required for a safe dealloc. */
+
+ /* required to use MTLIST_IN_LIST */
+ MT_LIST_INIT(&qc->accept_list);
+
+ LIST_INIT(&qc->rx.pkt_list);
+
+ /* Now proceeds to allocation of qc members. */
+
+ buf_area = pool_alloc(pool_head_quic_conn_rxbuf);
+ if (!buf_area) {
+ TRACE_ERROR("Could not allocate a new RX buffer", QUIC_EV_CONN_INIT, qc);
+ goto err;
+ }
+
+ qc->cids = EB_ROOT;
+ /* QUIC Server (or listener). */
+ if (server) {
+ struct proxy *prx;
+
+ l = owner;
+ prx = l->bind_conf->frontend;
+ cc_algo = l->bind_conf->quic_cc_algo;
+
+ qc->prx_counters = EXTRA_COUNTERS_GET(prx->extra_counters_fe,
+ &quic_stats_module);
+ qc->flags |= QUIC_FL_CONN_LISTENER;
+ qc->state = QUIC_HS_ST_SERVER_INITIAL;
+ /* Copy the initial DCID with the address. */
+ qc->odcid.len = dcid->len;
+ qc->odcid.addrlen = dcid->addrlen;
+ memcpy(qc->odcid.data, dcid->data, dcid->len + dcid->addrlen);
+
+ /* copy the packet SCID to reuse it as DCID for sending */
+ if (scid->len)
+ memcpy(qc->dcid.data, scid->data, scid->len);
+ qc->dcid.len = scid->len;
+ qc->tx.buf = BUF_NULL;
+ qc->li = l;
+ }
+ /* QUIC Client (outgoing connection to servers) */
+ else {
+ qc->state = QUIC_HS_ST_CLIENT_INITIAL;
+ if (dcid->len)
+ memcpy(qc->dcid.data, dcid->data, dcid->len);
+ qc->dcid.len = dcid->len;
+ }
+ qc->mux_state = QC_MUX_NULL;
+ qc->err = quic_err_transport(QC_ERR_NO_ERROR);
+
+ icid = new_quic_cid(&qc->cids, qc, 0);
+ if (!icid) {
+ TRACE_ERROR("Could not allocate a new connection ID", QUIC_EV_CONN_INIT, qc);
+ goto err;
+ }
+
+ /* insert the allocated CID in the receiver datagram handler tree */
+ if (server)
+ ebmb_insert(&quic_dghdlrs[tid].cids, &icid->node, icid->cid.len);
+
+ /* Select our SCID which is the first CID with 0 as sequence number. */
+ qc->scid = icid->cid;
+
+ /* Packet number spaces initialization. */
+ for (i = 0; i < QUIC_TLS_PKTNS_MAX; i++)
+ quic_pktns_init(&qc->pktns[i]);
+ /* QUIC encryption level context initialization. */
+ for (i = 0; i < QUIC_TLS_ENC_LEVEL_MAX; i++) {
+ if (!quic_conn_enc_level_init(qc, i)) {
+ TRACE_ERROR("Could not initialize an encryption level", QUIC_EV_CONN_INIT, qc);
+ goto err;
+ }
+ /* Initialize the packet number space. */
+ qc->els[i].pktns = &qc->pktns[quic_tls_pktns(i)];
+ }
+
+ qc->original_version = qv;
+ qc->tps_tls_ext = (qc->original_version->num & 0xff000000) == 0xff000000 ?
+ TLS_EXTENSION_QUIC_TRANSPORT_PARAMETERS_DRAFT:
+ TLS_EXTENSION_QUIC_TRANSPORT_PARAMETERS;
+ /* TX part. */
+ LIST_INIT(&qc->tx.frms_to_send);
+ qc->tx.nb_buf = QUIC_CONN_TX_BUFS_NB;
+ qc->tx.wbuf = qc->tx.rbuf = 0;
+ qc->tx.bytes = 0;
+ qc->tx.buf = BUF_NULL;
+ /* RX part. */
+ qc->rx.bytes = 0;
+ qc->rx.buf = b_make(buf_area, QUIC_CONN_RX_BUFSZ, 0, 0);
+ for (i = 0; i < QCS_MAX_TYPES; i++)
+ qc->rx.strms[i].nb_streams = 0;
+
+ qc->nb_pkt_for_cc = 1;
+ qc->nb_pkt_since_cc = 0;
+
+ if (!quic_tls_ku_init(qc)) {
+ TRACE_ERROR("Key update initialization failed", QUIC_EV_CONN_INIT, qc);
+ goto err;
+ }
+
+ /* XXX TO DO: Only one path at this time. */
+ qc->path = &qc->paths[0];
+ quic_path_init(qc->path, ipv4, cc_algo ? cc_algo : default_quic_cc_algo, qc);
+
+ qc->streams_by_id = EB_ROOT_UNIQUE;
+ qc->stream_buf_count = 0;
+ memcpy(&qc->local_addr, local_addr, sizeof(qc->local_addr));
+ memcpy(&qc->peer_addr, peer_addr, sizeof qc->peer_addr);
+
+ if (server && !qc_lstnr_params_init(qc, &l->bind_conf->quic_params,
+ icid->stateless_reset_token,
+ dcid->data, dcid->len,
+ qc->scid.data, qc->scid.len, token_odcid))
+ goto err;
+
+ qc->wait_event.tasklet = tasklet_new();
+ if (!qc->wait_event.tasklet) {
+ TRACE_ERROR("tasklet_new() failed", QUIC_EV_CONN_TXPKT);
+ goto err;
+ }
+ qc->wait_event.tasklet->process = quic_conn_io_cb;
+ qc->wait_event.tasklet->context = qc;
+ qc->wait_event.events = 0;
+ /* Set tasklet tid based on the SCID selected by us for this
+ * connection. The upper layer will also be binded on the same thread.
+ */
+ qc->tid = qc->wait_event.tasklet->tid = quic_get_cid_tid(qc->scid.data);
+ qc->subs = NULL;
+
+ if (qc_conn_alloc_ssl_ctx(qc) ||
+ !quic_conn_init_timer(qc) ||
+ !quic_conn_init_idle_timer_task(qc))
+ goto err;
+
+ ictx = &qc->els[QUIC_TLS_ENC_LEVEL_INITIAL].tls_ctx;
+ if (!qc_new_isecs(qc, ictx,qc->original_version, dcid->data, dcid->len, 1))
+ goto err;
+
+ TRACE_LEAVE(QUIC_EV_CONN_INIT, qc);
+
+ return qc;
+
+ err:
+ pool_free(pool_head_quic_conn_rxbuf, buf_area);
+ if (qc) {
+ qc->rx.buf.area = NULL;
+ quic_conn_release(qc);
+ }
+ TRACE_LEAVE(QUIC_EV_CONN_INIT);
+ return NULL;
+}
+
+/* Release the quic_conn <qc>. The connection is removed from the CIDs tree.
+ * The connection tasklet is killed.
+ *
+ * This function must only be called by the thread responsible of the quic_conn
+ * tasklet.
+ */
+void quic_conn_release(struct quic_conn *qc)
+{
+ int i;
+ struct ssl_sock_ctx *conn_ctx;
+ struct eb64_node *node;
+ struct quic_tls_ctx *app_tls_ctx;
+ struct quic_rx_packet *pkt, *pktback;
+
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ /* We must not free the quic-conn if the MUX is still allocated. */
+ BUG_ON(qc->mux_state == QC_MUX_READY);
+
+ /* in the unlikely (but possible) case the connection was just added to
+ * the accept_list we must delete it from there.
+ */
+ MT_LIST_DELETE(&qc->accept_list);
+
+ /* free remaining stream descriptors */
+ node = eb64_first(&qc->streams_by_id);
+ while (node) {
+ struct qc_stream_desc *stream;
+
+ stream = eb64_entry(node, struct qc_stream_desc, by_id);
+ node = eb64_next(node);
+
+ /* all streams attached to the quic-conn are released, so
+ * qc_stream_desc_free will liberate the stream instance.
+ */
+ BUG_ON(!stream->release);
+ qc_stream_desc_free(stream, 1);
+ }
+
+ /* Purge Rx packet list. */
+ list_for_each_entry_safe(pkt, pktback, &qc->rx.pkt_list, qc_rx_pkt_list) {
+ LIST_DELETE(&pkt->qc_rx_pkt_list);
+ pool_free(pool_head_quic_rx_packet, pkt);
+ }
+
+ if (qc->idle_timer_task) {
+ task_destroy(qc->idle_timer_task);
+ qc->idle_timer_task = NULL;
+ }
+
+ if (qc->timer_task) {
+ task_destroy(qc->timer_task);
+ qc->timer_task = NULL;
+ }
+
+ if (qc->wait_event.tasklet)
+ tasklet_free(qc->wait_event.tasklet);
+
+ /* remove the connection from receiver cids trees */
+ ebmb_delete(&qc->odcid_node);
+ ebmb_delete(&qc->scid_node);
+ free_quic_conn_cids(qc);
+
+ conn_ctx = qc->xprt_ctx;
+ if (conn_ctx) {
+ SSL_free(conn_ctx->ssl);
+ pool_free(pool_head_quic_conn_ctx, conn_ctx);
+ }
+
+ quic_tls_ku_free(qc);
+ for (i = 0; i < QUIC_TLS_ENC_LEVEL_MAX; i++) {
+ quic_tls_ctx_secs_free(&qc->els[i].tls_ctx);
+ quic_conn_enc_level_uninit(qc, &qc->els[i]);
+ }
+ quic_tls_ctx_secs_free(&qc->negotiated_ictx);
+
+ app_tls_ctx = &qc->els[QUIC_TLS_ENC_LEVEL_APP].tls_ctx;
+ pool_free(pool_head_quic_tls_secret, app_tls_ctx->rx.secret);
+ pool_free(pool_head_quic_tls_secret, app_tls_ctx->tx.secret);
+
+ for (i = 0; i < QUIC_TLS_PKTNS_MAX; i++) {
+ quic_pktns_tx_pkts_release(&qc->pktns[i], qc);
+ quic_free_arngs(qc, &qc->pktns[i].rx.arngs);
+ }
+
+ pool_free(pool_head_quic_conn_rxbuf, qc->rx.buf.area);
+ pool_free(pool_head_quic_conn, qc);
+ TRACE_PROTO("QUIC conn. freed", QUIC_EV_CONN_FREED, qc);
+
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/* Initialize the timer task of <qc> QUIC connection.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_conn_init_timer(struct quic_conn *qc)
+{
+ int ret = 0;
+ /* Attach this task to the same thread ID used for the connection */
+ TRACE_ENTER(QUIC_EV_CONN_NEW, qc);
+
+ qc->timer_task = task_new_on(qc->tid);
+ if (!qc->timer_task) {
+ TRACE_ERROR("timer task allocation failed", QUIC_EV_CONN_NEW, qc);
+ goto leave;
+ }
+
+ qc->timer = TICK_ETERNITY;
+ qc->timer_task->process = qc_process_timer;
+ qc->timer_task->context = qc;
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
+ return ret;
+}
+
+/* Rearm the idle timer for <qc> QUIC connection. */
+static void qc_idle_timer_do_rearm(struct quic_conn *qc)
+{
+ unsigned int expire;
+
+ expire = QUIC_MAX(3 * quic_pto(qc), qc->max_idle_timeout);
+ qc->idle_timer_task->expire = tick_add(now_ms, MS_TO_TICKS(expire));
+ task_queue(qc->idle_timer_task);
+}
+
+/* Rearm the idle timer for <qc> QUIC connection depending on <read> boolean
+ * which is set to 1 when receiving a packet , and 0 when sending packet
+ */
+static void qc_idle_timer_rearm(struct quic_conn *qc, int read)
+{
+ TRACE_ENTER(QUIC_EV_CONN_IDLE_TIMER, qc);
+
+ if (read) {
+ qc->flags |= QUIC_FL_CONN_IDLE_TIMER_RESTARTED_AFTER_READ;
+ }
+ else {
+ qc->flags &= ~QUIC_FL_CONN_IDLE_TIMER_RESTARTED_AFTER_READ;
+ }
+ qc_idle_timer_do_rearm(qc);
+
+ TRACE_LEAVE(QUIC_EV_CONN_IDLE_TIMER, qc);
+}
+
+/* The task handling the idle timeout */
+struct task *qc_idle_timer_task(struct task *t, void *ctx, unsigned int state)
+{
+ struct quic_conn *qc = ctx;
+ struct quic_counters *prx_counters = qc->prx_counters;
+ unsigned int qc_flags = qc->flags;
+
+ TRACE_ENTER(QUIC_EV_CONN_IDLE_TIMER, qc);
+
+ /* Notify the MUX before settings QUIC_FL_CONN_EXP_TIMER or the MUX
+ * might free the quic-conn too early via quic_close().
+ */
+ qc_notify_close(qc);
+
+ /* If the MUX is still alive, keep the quic-conn. The MUX is
+ * responsible to call quic_close to release it.
+ */
+ qc->flags |= QUIC_FL_CONN_EXP_TIMER;
+ if (qc->mux_state != QC_MUX_READY)
+ quic_conn_release(qc);
+
+ /* TODO if the quic-conn cannot be freed because of the MUX, we may at
+ * least clean some parts of it such as the tasklet.
+ */
+
+ if (!(qc_flags & QUIC_FL_CONN_HALF_OPEN_CNT_DECREMENTED)) {
+ qc_flags |= QUIC_FL_CONN_HALF_OPEN_CNT_DECREMENTED;
+ TRACE_DEVEL("dec half open counter", QUIC_EV_CONN_SSLALERT, qc);
+ HA_ATOMIC_DEC(&prx_counters->half_open_conn);
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_IDLE_TIMER, qc);
+ return NULL;
+}
+
+/* Initialize the idle timeout task for <qc>.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_conn_init_idle_timer_task(struct quic_conn *qc)
+{
+ int ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_NEW, qc);
+
+ qc->idle_timer_task = task_new_here();
+ if (!qc->idle_timer_task) {
+ TRACE_ERROR("Idle timer task allocation failed", QUIC_EV_CONN_NEW, qc);
+ goto leave;
+ }
+
+ qc->idle_timer_task->process = qc_idle_timer_task;
+ qc->idle_timer_task->context = qc;
+ qc_idle_timer_rearm(qc, 1);
+ task_queue(qc->idle_timer_task);
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
+ return ret;
+}
+
+/* Parse into <pkt> a long header located at <*buf> buffer, <end> begin a pointer to the end
+ * past one byte of this buffer.
+ */
+static inline int quic_packet_read_long_header(unsigned char **buf, const unsigned char *end,
+ struct quic_rx_packet *pkt)
+{
+ int ret = 0;
+ unsigned char dcid_len, scid_len;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT);
+
+ if (end == *buf) {
+ TRACE_ERROR("buffer data consumed", QUIC_EV_CONN_RXPKT);
+ goto leave;
+ }
+
+ /* Destination Connection ID Length */
+ dcid_len = *(*buf)++;
+ /* We want to be sure we can read <dcid_len> bytes and one more for <scid_len> value */
+ if (dcid_len > QUIC_CID_MAXLEN || end - *buf < dcid_len + 1) {
+ TRACE_ERROR("too long DCID", QUIC_EV_CONN_RXPKT);
+ goto leave;
+ }
+
+ if (dcid_len) {
+ /* Check that the length of this received DCID matches the CID lengths
+ * of our implementation for non Initials packets only.
+ */
+ if (pkt->type != QUIC_PACKET_TYPE_INITIAL &&
+ pkt->type != QUIC_PACKET_TYPE_0RTT &&
+ dcid_len != QUIC_HAP_CID_LEN) {
+ TRACE_ERROR("wrong DCID length", QUIC_EV_CONN_RXPKT);
+ goto leave;
+ }
+
+ memcpy(pkt->dcid.data, *buf, dcid_len);
+ }
+
+ pkt->dcid.len = dcid_len;
+ *buf += dcid_len;
+
+ /* Source Connection ID Length */
+ scid_len = *(*buf)++;
+ if (scid_len > QUIC_CID_MAXLEN || end - *buf < scid_len) {
+ TRACE_ERROR("too long SCID", QUIC_EV_CONN_RXPKT);
+ goto leave;
+ }
+
+ if (scid_len)
+ memcpy(pkt->scid.data, *buf, scid_len);
+ pkt->scid.len = scid_len;
+ *buf += scid_len;
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT);
+ return ret;
+}
+
+/* Insert <pkt> RX packet in its <qel> RX packets tree */
+static void qc_pkt_insert(struct quic_conn *qc,
+ struct quic_rx_packet *pkt, struct quic_enc_level *qel)
+{
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT, qc);
+
+ pkt->pn_node.key = pkt->pn;
+ quic_rx_packet_refinc(pkt);
+ eb64_insert(&qel->rx.pkts, &pkt->pn_node);
+
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT, qc);
+}
+
+/* Try to remove the header protection of <pkt> QUIC packet with <beg> the
+ * address of the packet first byte, using the keys from encryption level <el>.
+ *
+ * If header protection has been successfully removed, packet data are copied
+ * into <qc> Rx buffer. If <el> secrets are not yet available, the copy is also
+ * proceeded, and the packet is inserted into <qc> protected packets tree. In
+ * both cases, packet can now be considered handled by the <qc> connection.
+ *
+ * If header protection cannot be removed due to <el> secrets already
+ * discarded, no operation is conducted.
+ *
+ * Returns 1 on success : packet data is now handled by the connection. On
+ * error 0 is returned : packet should be dropped by the caller.
+ */
+static inline int qc_try_rm_hp(struct quic_conn *qc,
+ struct quic_rx_packet *pkt,
+ unsigned char *beg,
+ struct quic_enc_level **el)
+{
+ int ret = 0;
+ unsigned char *pn = NULL; /* Packet number field */
+ enum quic_tls_enc_level tel;
+ struct quic_enc_level *qel;
+ /* Only for traces. */
+ struct quic_rx_packet *qpkt_trace;
+
+ qpkt_trace = NULL;
+ TRACE_ENTER(QUIC_EV_CONN_TRMHP, qc);
+ BUG_ON(!pkt->pn_offset);
+
+ /* The packet number is here. This is also the start minus
+ * QUIC_PACKET_PN_MAXLEN of the sample used to add/remove the header
+ * protection.
+ */
+ pn = beg + pkt->pn_offset;
+
+ tel = quic_packet_type_enc_level(pkt->type);
+ qel = &qc->els[tel];
+
+ if (qc_qel_may_rm_hp(qc, qel)) {
+ /* Note that the following function enables us to unprotect the packet
+ * number and its length subsequently used to decrypt the entire
+ * packets.
+ */
+ if (!qc_do_rm_hp(qc, pkt, &qel->tls_ctx,
+ qel->pktns->rx.largest_pn, pn, beg)) {
+ TRACE_PROTO("hp error", QUIC_EV_CONN_TRMHP, qc);
+ goto out;
+ }
+
+ /* The AAD includes the packet number field. */
+ pkt->aad_len = pkt->pn_offset + pkt->pnl;
+ if (pkt->len - pkt->aad_len < QUIC_TLS_TAG_LEN) {
+ TRACE_PROTO("Too short packet", QUIC_EV_CONN_TRMHP, qc);
+ goto out;
+ }
+
+ qpkt_trace = pkt;
+ }
+ else {
+ if (qel->tls_ctx.flags & QUIC_FL_TLS_SECRETS_DCD) {
+ /* If the packet number space has been discarded, this packet
+ * will be not parsed.
+ */
+ TRACE_PROTO("Discarded pktns", QUIC_EV_CONN_TRMHP, qc, pkt);
+ goto out;
+ }
+
+ TRACE_PROTO("hp not removed", QUIC_EV_CONN_TRMHP, qc, pkt);
+ LIST_APPEND(&qel->rx.pqpkts, &pkt->list);
+ quic_rx_packet_refinc(pkt);
+ }
+
+ *el = qel;
+ /* No reference counter incrementation here!!! */
+ LIST_APPEND(&qc->rx.pkt_list, &pkt->qc_rx_pkt_list);
+ memcpy(b_tail(&qc->rx.buf), beg, pkt->len);
+ pkt->data = (unsigned char *)b_tail(&qc->rx.buf);
+ b_add(&qc->rx.buf, pkt->len);
+
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_TRMHP, qc, qpkt_trace);
+ return ret;
+}
+
+/* Parse the header form from <byte0> first byte of <pkt> packet to set its type.
+ * Also set <*long_header> to 1 if this form is long, 0 if not and the version
+ * of this packet into <*version>.
+ */
+static inline int qc_parse_hd_form(struct quic_rx_packet *pkt,
+ unsigned char **buf, const unsigned char *end,
+ int *long_header, uint32_t *version)
+{
+ int ret = 0;
+ const unsigned char byte0 = **buf;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT);
+
+ (*buf)++;
+ if (byte0 & QUIC_PACKET_LONG_HEADER_BIT) {
+ unsigned char type =
+ (byte0 >> QUIC_PACKET_TYPE_SHIFT) & QUIC_PACKET_TYPE_BITMASK;
+
+ *long_header = 1;
+ /* Version */
+ if (!quic_read_uint32(version, (const unsigned char **)buf, end)) {
+ TRACE_ERROR("could not read the packet version", QUIC_EV_CONN_RXPKT);
+ goto out;
+ }
+
+ if (*version != QUIC_PROTOCOL_VERSION_2_DRAFT) {
+ pkt->type = type;
+ }
+ else {
+ switch (type) {
+ case 0:
+ pkt->type = QUIC_PACKET_TYPE_RETRY;
+ break;
+ case 1:
+ pkt->type = QUIC_PACKET_TYPE_INITIAL;
+ break;
+ case 2:
+ pkt->type = QUIC_PACKET_TYPE_0RTT;
+ break;
+ case 3:
+ pkt->type = QUIC_PACKET_TYPE_HANDSHAKE;
+ break;
+ }
+ }
+ }
+ else {
+ pkt->type = QUIC_PACKET_TYPE_SHORT;
+ *long_header = 0;
+ }
+
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT);
+ return ret;
+}
+
+/* Return the QUIC version (quic_version struct) with <version> as version number
+ * if supported or NULL if not.
+ */
+static inline const struct quic_version *qc_supported_version(uint32_t version)
+{
+ int i;
+
+ for (i = 0; i < quic_versions_nb; i++)
+ if (quic_versions[i].num == version)
+ return &quic_versions[i];
+
+ return NULL;
+}
+
+/*
+ * Send a Version Negotiation packet on response to <pkt> on socket <fd> to
+ * address <addr>.
+ * Implementation of RFC9000 6. Version Negotiation
+ *
+ * TODO implement a rate-limiting sending of Version Negotiation packets
+ *
+ * Returns 0 on success else non-zero
+ */
+static int send_version_negotiation(int fd, struct sockaddr_storage *addr,
+ struct quic_rx_packet *pkt)
+{
+ char buf[256];
+ int ret = 0, i = 0, j;
+ uint32_t version;
+ const socklen_t addrlen = get_addr_len(addr);
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT);
+ /*
+ * header form
+ * long header, fixed bit to 0 for Version Negotiation
+ */
+ /* TODO: RAND_bytes() should be replaced? */
+ if (RAND_bytes((unsigned char *)buf, 1) != 1) {
+ TRACE_ERROR("RAND_bytes() error", QUIC_EV_CONN_TXPKT);
+ goto out;
+ }
+
+ buf[i++] |= '\x80';
+ /* null version for Version Negotiation */
+ buf[i++] = '\x00';
+ buf[i++] = '\x00';
+ buf[i++] = '\x00';
+ buf[i++] = '\x00';
+
+ /* source connection id */
+ buf[i++] = pkt->scid.len;
+ memcpy(&buf[i], pkt->scid.data, pkt->scid.len);
+ i += pkt->scid.len;
+
+ /* destination connection id */
+ buf[i++] = pkt->dcid.len;
+ memcpy(&buf[i], pkt->dcid.data, pkt->dcid.len);
+ i += pkt->dcid.len;
+
+ /* supported version */
+ for (j = 0; j < quic_versions_nb; j++) {
+ version = htonl(quic_versions[j].num);
+ memcpy(&buf[i], &version, sizeof(version));
+ i += sizeof(version);
+ }
+
+ if (sendto(fd, buf, i, 0, (struct sockaddr *)addr, addrlen) < 0)
+ goto out;
+
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT);
+ return !ret;
+}
+
+/* Send a stateless reset packet depending on <pkt> RX packet information
+ * from <fd> UDP socket to <dst>
+ * Return 1 if succeeded, 0 if not.
+ */
+static int send_stateless_reset(struct listener *l, struct sockaddr_storage *dstaddr,
+ struct quic_rx_packet *rxpkt)
+{
+ int ret = 0, pktlen, rndlen;
+ unsigned char pkt[64];
+ const socklen_t addrlen = get_addr_len(dstaddr);
+ struct proxy *prx;
+ struct quic_counters *prx_counters;
+
+ TRACE_ENTER(QUIC_EV_STATELESS_RST);
+
+ prx = l->bind_conf->frontend;
+ prx_counters = EXTRA_COUNTERS_GET(prx->extra_counters_fe, &quic_stats_module);
+ /* 10.3 Stateless Reset (https://www.rfc-editor.org/rfc/rfc9000.html#section-10.3)
+ * The resulting minimum size of 21 bytes does not guarantee that a Stateless
+ * Reset is difficult to distinguish from other packets if the recipient requires
+ * the use of a connection ID. To achieve that end, the endpoint SHOULD ensure
+ * that all packets it sends are at least 22 bytes longer than the minimum
+ * connection ID length that it requests the peer to include in its packets,
+ * adding PADDING frames as necessary. This ensures that any Stateless Reset
+ * sent by the peer is indistinguishable from a valid packet sent to the endpoint.
+ * An endpoint that sends a Stateless Reset in response to a packet that is
+ * 43 bytes or shorter SHOULD send a Stateless Reset that is one byte shorter
+ * than the packet it responds to.
+ */
+
+ /* Note that we build at most a 42 bytes QUIC packet to mimic a short packet */
+ pktlen = rxpkt->len <= 43 ? rxpkt->len - 1 : 0;
+ pktlen = QUIC_MAX(QUIC_STATELESS_RESET_PACKET_MINLEN, pktlen);
+ rndlen = pktlen - QUIC_STATELESS_RESET_TOKEN_LEN;
+
+ /* Put a header of random bytes */
+ /* TODO: RAND_bytes() should be replaced */
+ if (RAND_bytes(pkt, rndlen) != 1) {
+ TRACE_ERROR("RAND_bytes() failed", QUIC_EV_STATELESS_RST);
+ goto leave;
+ }
+
+ /* Clear the most significant bit, and set the second one */
+ *pkt = (*pkt & ~0x80) | 0x40;
+ if (!quic_stateless_reset_token_cpy(NULL, pkt + rndlen, QUIC_STATELESS_RESET_TOKEN_LEN,
+ rxpkt->dcid.data, rxpkt->dcid.len))
+ goto leave;
+
+ if (sendto(l->rx.fd, pkt, pktlen, 0, (struct sockaddr *)dstaddr, addrlen) < 0)
+ goto leave;
+
+ ret = 1;
+ HA_ATOMIC_INC(&prx_counters->stateless_reset_sent);
+ TRACE_PROTO("stateless reset sent", QUIC_EV_STATELESS_RST, NULL, &rxpkt->dcid);
+ leave:
+ TRACE_LEAVE(QUIC_EV_STATELESS_RST);
+ return ret;
+}
+
+/* QUIC server only function.
+ * Add AAD to <add> buffer from <cid> connection ID and <addr> socket address.
+ * This is the responsibility of the caller to check <aad> size is big enough
+ * to contain these data.
+ * Return the number of bytes copied to <aad>.
+ */
+static int quic_generate_retry_token_aad(unsigned char *aad,
+ uint32_t version,
+ const struct quic_cid *cid,
+ const struct sockaddr_storage *addr)
+{
+ unsigned char *p;
+
+ p = aad;
+ memcpy(p, &version, sizeof version);
+ p += sizeof version;
+ p += quic_saddr_cpy(p, addr);
+ memcpy(p, cid->data, cid->len);
+ p += cid->len;
+
+ return p - aad;
+}
+
+/* QUIC server only function.
+ * Generate the token to be used in Retry packets. The token is written to
+ * <buf> whith <len> as length. <odcid> is the original destination connection
+ * ID and <dcid> is our side destination connection ID (or client source
+ * connection ID).
+ * Returns the length of the encoded token or 0 on error.
+ */
+static int quic_generate_retry_token(unsigned char *buf, size_t len,
+ const uint32_t version,
+ const struct quic_cid *odcid,
+ const struct quic_cid *dcid,
+ struct sockaddr_storage *addr)
+{
+ int ret = 0;
+ unsigned char *p;
+ unsigned char aad[sizeof(uint32_t) + sizeof(in_port_t) +
+ sizeof(struct in6_addr) + QUIC_CID_MAXLEN];
+ size_t aadlen;
+ unsigned char salt[QUIC_RETRY_TOKEN_SALTLEN];
+ unsigned char key[QUIC_TLS_KEY_LEN];
+ unsigned char iv[QUIC_TLS_IV_LEN];
+ const unsigned char *sec = (const unsigned char *)global.cluster_secret;
+ size_t seclen = strlen(global.cluster_secret);
+ EVP_CIPHER_CTX *ctx = NULL;
+ const EVP_CIPHER *aead = EVP_aes_128_gcm();
+ uint32_t timestamp = now_ms;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT);
+
+ /* We copy the odcid into the token, prefixed by its one byte
+ * length, the format token byte. It is followed by an AEAD TAG, and finally
+ * the random bytes used to derive the secret to encrypt the token.
+ */
+ if (1 + dcid->len + 1 + QUIC_TLS_TAG_LEN + sizeof salt > len)
+ goto err;
+
+ aadlen = quic_generate_retry_token_aad(aad, version, dcid, addr);
+ /* TODO: RAND_bytes() should be replaced */
+ if (RAND_bytes(salt, sizeof salt) != 1) {
+ TRACE_ERROR("RAND_bytes()", QUIC_EV_CONN_TXPKT);
+ goto err;
+ }
+
+ if (!quic_tls_derive_retry_token_secret(EVP_sha256(), key, sizeof key, iv, sizeof iv,
+ salt, sizeof salt, sec, seclen)) {
+ TRACE_ERROR("quic_tls_derive_retry_token_secret() failed", QUIC_EV_CONN_TXPKT);
+ goto err;
+ }
+
+ if (!quic_tls_tx_ctx_init(&ctx, aead, key)) {
+ TRACE_ERROR("quic_tls_tx_ctx_init() failed", QUIC_EV_CONN_TXPKT);
+ goto err;
+ }
+
+ /* Token build */
+ p = buf;
+ *p++ = QUIC_TOKEN_FMT_RETRY,
+ *p++ = odcid->len;
+ memcpy(p, odcid->data, odcid->len);
+ p += odcid->len;
+ write_u32(p, htonl(timestamp));
+ p += sizeof timestamp;
+
+ /* Do not encrypt the QUIC_TOKEN_FMT_RETRY byte */
+ if (!quic_tls_encrypt(buf + 1, p - buf - 1, aad, aadlen, ctx, aead, key, iv)) {
+ TRACE_ERROR("quic_tls_encrypt() failed", QUIC_EV_CONN_TXPKT);
+ goto err;
+ }
+
+ p += QUIC_TLS_TAG_LEN;
+ memcpy(p, salt, sizeof salt);
+ p += sizeof salt;
+ EVP_CIPHER_CTX_free(ctx);
+
+ ret = p - buf;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT);
+ return ret;
+
+ err:
+ if (ctx)
+ EVP_CIPHER_CTX_free(ctx);
+ goto leave;
+}
+
+/* QUIC server only function.
+ *
+ * Check the validity of the Retry token from Initial packet <pkt>. <dgram> is
+ * the UDP datagram containing <pkt> and <l> is the listener instance on which
+ * it was received. If the token is valid, the ODCID of <qc> QUIC connection
+ * will be put into <odcid>. <qc> is used to retrieve the QUIC version needed
+ * to validate the token but it can be NULL : in this case the version will be
+ * retrieved from the packet.
+ *
+ * Return 1 if succeeded, 0 if not.
+ */
+
+static int quic_retry_token_check(struct quic_rx_packet *pkt,
+ struct quic_dgram *dgram,
+ struct listener *l,
+ struct quic_conn *qc,
+ struct quic_cid *odcid)
+{
+ struct proxy *prx;
+ struct quic_counters *prx_counters;
+ int ret = 0;
+ unsigned char *token = pkt->token;
+ const uint64_t tokenlen = pkt->token_len;
+ unsigned char buf[128];
+ unsigned char aad[sizeof(uint32_t) + sizeof(in_port_t) +
+ sizeof(struct in6_addr) + QUIC_CID_MAXLEN];
+ size_t aadlen;
+ const unsigned char *salt;
+ unsigned char key[QUIC_TLS_KEY_LEN];
+ unsigned char iv[QUIC_TLS_IV_LEN];
+ const unsigned char *sec = (const unsigned char *)global.cluster_secret;
+ size_t seclen = strlen(global.cluster_secret);
+ EVP_CIPHER_CTX *ctx = NULL;
+ const EVP_CIPHER *aead = EVP_aes_128_gcm();
+ const struct quic_version *qv = qc ? qc->original_version :
+ pkt->version;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT, qc);
+
+ /* The caller must ensure this. */
+ BUG_ON(!global.cluster_secret || !pkt->token_len);
+
+ prx = l->bind_conf->frontend;
+ prx_counters = EXTRA_COUNTERS_GET(prx->extra_counters_fe, &quic_stats_module);
+
+ if (*pkt->token != QUIC_TOKEN_FMT_RETRY) {
+ /* TODO: New token check */
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT, qc, NULL, NULL, pkt->version);
+ goto leave;
+ }
+
+ if (sizeof buf < tokenlen) {
+ TRACE_ERROR("too short buffer", QUIC_EV_CONN_LPKT, qc);
+ goto err;
+ }
+
+ aadlen = quic_generate_retry_token_aad(aad, qv->num, &pkt->scid, &dgram->saddr);
+ salt = token + tokenlen - QUIC_RETRY_TOKEN_SALTLEN;
+ if (!quic_tls_derive_retry_token_secret(EVP_sha256(), key, sizeof key, iv, sizeof iv,
+ salt, QUIC_RETRY_TOKEN_SALTLEN, sec, seclen)) {
+ TRACE_ERROR("Could not derive retry secret", QUIC_EV_CONN_LPKT, qc);
+ goto err;
+ }
+
+ if (!quic_tls_rx_ctx_init(&ctx, aead, key)) {
+ TRACE_ERROR("quic_tls_rx_ctx_init() failed", QUIC_EV_CONN_LPKT, qc);
+ goto err;
+ }
+
+ /* Do not decrypt the QUIC_TOKEN_FMT_RETRY byte */
+ if (!quic_tls_decrypt2(buf, token + 1, tokenlen - QUIC_RETRY_TOKEN_SALTLEN - 1, aad, aadlen,
+ ctx, aead, key, iv)) {
+ TRACE_ERROR("Could not decrypt retry token", QUIC_EV_CONN_LPKT, qc);
+ goto err;
+ }
+
+ if (parse_retry_token(qc, buf, buf + tokenlen - QUIC_RETRY_TOKEN_SALTLEN - 1, odcid)) {
+ TRACE_ERROR("Error during Initial token parsing", QUIC_EV_CONN_LPKT, qc);
+ goto err;
+ }
+
+ EVP_CIPHER_CTX_free(ctx);
+
+ ret = 1;
+ HA_ATOMIC_INC(&prx_counters->retry_validated);
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc);
+ return ret;
+
+ err:
+ HA_ATOMIC_INC(&prx_counters->retry_error);
+ if (ctx)
+ EVP_CIPHER_CTX_free(ctx);
+ goto leave;
+}
+
+/* Generate a Retry packet and send it on <fd> socket to <addr> in response to
+ * the Initial <pkt> packet.
+ *
+ * Returns 0 on success else non-zero.
+ */
+static int send_retry(int fd, struct sockaddr_storage *addr,
+ struct quic_rx_packet *pkt, const struct quic_version *qv)
+{
+ int ret = 0;
+ unsigned char buf[128];
+ int i = 0, token_len;
+ const socklen_t addrlen = get_addr_len(addr);
+ struct quic_cid scid;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT);
+
+ /* long header + fixed bit + packet type QUIC_PACKET_TYPE_RETRY */
+ buf[i++] = (QUIC_PACKET_LONG_HEADER_BIT | QUIC_PACKET_FIXED_BIT) |
+ (quic_pkt_type(QUIC_PACKET_TYPE_RETRY, qv->num) << QUIC_PACKET_TYPE_SHIFT);
+ /* version */
+ buf[i++] = *((unsigned char *)&qv->num + 3);
+ buf[i++] = *((unsigned char *)&qv->num + 2);
+ buf[i++] = *((unsigned char *)&qv->num + 1);
+ buf[i++] = *(unsigned char *)&qv->num;
+
+ /* Use the SCID from <pkt> for Retry DCID. */
+ buf[i++] = pkt->scid.len;
+ memcpy(&buf[i], pkt->scid.data, pkt->scid.len);
+ i += pkt->scid.len;
+
+ /* Generate a new CID to be used as SCID for the Retry packet. */
+ scid.len = QUIC_HAP_CID_LEN;
+ /* TODO: RAND_bytes() should be replaced */
+ if (RAND_bytes(scid.data, scid.len) != 1) {
+ TRACE_ERROR("RAND_bytes() failed", QUIC_EV_CONN_TXPKT);
+ goto out;
+ }
+
+ buf[i++] = scid.len;
+ memcpy(&buf[i], scid.data, scid.len);
+ i += scid.len;
+
+ /* token */
+ if (!(token_len = quic_generate_retry_token(&buf[i], sizeof(buf) - i, qv->num,
+ &pkt->dcid, &pkt->scid, addr))) {
+ TRACE_ERROR("quic_generate_retry_token() failed", QUIC_EV_CONN_TXPKT);
+ goto out;
+ }
+
+ i += token_len;
+
+ /* token integrity tag */
+ if ((&buf[i] - buf < QUIC_TLS_TAG_LEN) ||
+ !quic_tls_generate_retry_integrity_tag(pkt->dcid.data,
+ pkt->dcid.len, buf, i, qv)) {
+ TRACE_ERROR("quic_tls_generate_retry_integrity_tag() failed", QUIC_EV_CONN_TXPKT);
+ goto out;
+ }
+
+ i += QUIC_TLS_TAG_LEN;
+
+ if (sendto(fd, buf, i, 0, (struct sockaddr *)addr, addrlen) < 0) {
+ TRACE_ERROR("quic_tls_generate_retry_integrity_tag() failed", QUIC_EV_CONN_TXPKT);
+ goto out;
+ }
+
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT);
+ return !ret;
+}
+
+/* Retrieve a quic_conn instance from the <pkt> DCID field. If the packet is of
+ * type INITIAL, the ODCID tree is first used. In this case, <saddr> is
+ * concatenated to the <pkt> DCID field.
+ *
+ * Returns the instance or NULL if not found.
+ */
+static struct quic_conn *retrieve_qc_conn_from_cid(struct quic_rx_packet *pkt,
+ struct listener *l,
+ struct sockaddr_storage *saddr)
+{
+ struct quic_conn *qc = NULL;
+ struct ebmb_node *node;
+ struct quic_connection_id *id;
+ /* set if the quic_conn is found in the second DCID tree */
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT);
+
+ /* Look first into ODCIDs tree for INITIAL/0-RTT packets. */
+ if (pkt->type == QUIC_PACKET_TYPE_INITIAL ||
+ pkt->type == QUIC_PACKET_TYPE_0RTT) {
+ /* DCIDs of first packets coming from multiple clients may have
+ * the same values. Let's distinguish them by concatenating the
+ * socket addresses.
+ */
+ quic_cid_saddr_cat(&pkt->dcid, saddr);
+ node = ebmb_lookup(&quic_dghdlrs[tid].odcids, pkt->dcid.data,
+ pkt->dcid.len + pkt->dcid.addrlen);
+ if (node) {
+ qc = ebmb_entry(node, struct quic_conn, odcid_node);
+ goto end;
+ }
+ }
+
+ /* Look into DCIDs tree for non-INITIAL/0-RTT packets. This may be used
+ * also for INITIAL/0-RTT non-first packets with the final DCID in
+ * used.
+ */
+ node = ebmb_lookup(&quic_dghdlrs[tid].cids, pkt->dcid.data, pkt->dcid.len);
+ if (!node)
+ goto end;
+
+ id = ebmb_entry(node, struct quic_connection_id, node);
+ qc = id->qc;
+
+ /* If found in DCIDs tree, remove the quic_conn from the ODCIDs tree.
+ * If already done, this is a noop.
+ */
+ if (qc)
+ ebmb_delete(&qc->odcid_node);
+
+ end:
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT, qc);
+ return qc;
+}
+
+/* Try to allocate the <*ssl> SSL session object for <qc> QUIC connection
+ * with <ssl_ctx> as SSL context inherited settings. Also set the transport
+ * parameters of this session.
+ * This is the responsibility of the caller to check the validity of all the
+ * pointers passed as parameter to this function.
+ * Return 0 if succeeded, -1 if not. If failed, sets the ->err_code member of <qc->conn> to
+ * CO_ER_SSL_NO_MEM.
+ */
+static int qc_ssl_sess_init(struct quic_conn *qc, SSL_CTX *ssl_ctx, SSL **ssl,
+ unsigned char *params, size_t params_len)
+{
+ int retry, ret = -1;
+
+ TRACE_ENTER(QUIC_EV_CONN_NEW, qc);
+
+ retry = 1;
+ retry:
+ *ssl = SSL_new(ssl_ctx);
+ if (!*ssl) {
+ if (!retry--)
+ goto err;
+
+ pool_gc(NULL);
+ goto retry;
+ }
+
+ if (!SSL_set_quic_method(*ssl, &ha_quic_method) ||
+ !SSL_set_ex_data(*ssl, ssl_qc_app_data_index, qc)) {
+ SSL_free(*ssl);
+ *ssl = NULL;
+ if (!retry--)
+ goto err;
+
+ pool_gc(NULL);
+ goto retry;
+ }
+
+ ret = 0;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
+ return ret;
+
+ err:
+ qc->conn->err_code = CO_ER_SSL_NO_MEM;
+ goto leave;
+}
+
+/* Allocate the ssl_sock_ctx from connection <qc>. This creates the tasklet
+ * used to process <qc> received packets. The allocated context is stored in
+ * <qc.xprt_ctx>.
+ *
+ * Returns 0 on success else non-zero.
+ */
+static int qc_conn_alloc_ssl_ctx(struct quic_conn *qc)
+{
+ int ret = 0;
+ struct bind_conf *bc = qc->li->bind_conf;
+ struct ssl_sock_ctx *ctx = NULL;
+
+ TRACE_ENTER(QUIC_EV_CONN_NEW, qc);
+
+ ctx = pool_zalloc(pool_head_quic_conn_ctx);
+ if (!ctx) {
+ TRACE_ERROR("SSL context allocation failed", QUIC_EV_CONN_TXPKT);
+ goto err;
+ }
+
+ ctx->subs = NULL;
+ ctx->xprt_ctx = NULL;
+ ctx->qc = qc;
+
+ if (qc_is_listener(qc)) {
+ if (qc_ssl_sess_init(qc, bc->initial_ctx, &ctx->ssl,
+ qc->enc_params, qc->enc_params_len) == -1) {
+ goto err;
+ }
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L)
+ /* Enabling 0-RTT */
+ if (bc->ssl_conf.early_data)
+ SSL_set_quic_early_data_enabled(ctx->ssl, 1);
+#endif
+
+ SSL_set_accept_state(ctx->ssl);
+ }
+
+ ctx->xprt = xprt_get(XPRT_QUIC);
+
+ /* Store the allocated context in <qc>. */
+ qc->xprt_ctx = ctx;
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
+ return !ret;
+
+ err:
+ pool_free(pool_head_quic_conn_ctx, ctx);
+ goto leave;
+}
+
+/* Check that all the bytes between <buf> included and <end> address
+ * excluded are null. This is the responsibility of the caller to
+ * check that there is at least one byte between <buf> end <end>.
+ * Return 1 if this all the bytes are null, 0 if not.
+ */
+static inline int quic_padding_check(const unsigned char *buf,
+ const unsigned char *end)
+{
+ while (buf < end && !*buf)
+ buf++;
+
+ return buf == end;
+}
+
+/* Find the associated connection to the packet <pkt> or create a new one if
+ * this is an Initial packet. <dgram> is the datagram containing the packet and
+ * <l> is the listener instance on which it was received.
+ *
+ * Returns the quic-conn instance or NULL.
+ */
+static struct quic_conn *quic_rx_pkt_retrieve_conn(struct quic_rx_packet *pkt,
+ struct quic_dgram *dgram,
+ struct listener *l)
+{
+ struct quic_cid token_odcid = { .len = 0 };
+ struct quic_conn *qc = NULL;
+ struct proxy *prx;
+ struct quic_counters *prx_counters;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT);
+
+ prx = l->bind_conf->frontend;
+ prx_counters = EXTRA_COUNTERS_GET(prx->extra_counters_fe, &quic_stats_module);
+
+ qc = retrieve_qc_conn_from_cid(pkt, l, &dgram->saddr);
+
+ if (pkt->type == QUIC_PACKET_TYPE_INITIAL) {
+ BUG_ON(!pkt->version); /* This must not happen. */
+
+ if (global.cluster_secret && pkt->token_len) {
+ if (!quic_retry_token_check(pkt, dgram, l, qc, &token_odcid))
+ goto err;
+ }
+
+ if (!qc) {
+ int ipv4;
+
+ if (global.cluster_secret && !pkt->token_len && !(l->bind_conf->options & BC_O_QUIC_FORCE_RETRY) &&
+ HA_ATOMIC_LOAD(&prx_counters->half_open_conn) >= global.tune.quic_retry_threshold) {
+ TRACE_PROTO("Initial without token, sending retry",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version);
+ if (send_retry(l->rx.fd, &dgram->saddr, pkt, pkt->version)) {
+ TRACE_ERROR("Error during Retry generation",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version);
+ goto out;
+ }
+
+ HA_ATOMIC_INC(&prx_counters->retry_sent);
+ goto out;
+ }
+
+ /* RFC 9000 7.2. Negotiating Connection IDs:
+ * When an Initial packet is sent by a client that has not previously
+ * received an Initial or Retry packet from the server, the client
+ * populates the Destination Connection ID field with an unpredictable
+ * value. This Destination Connection ID MUST be at least 8 bytes in length.
+ */
+ if (pkt->dcid.len < QUIC_ODCID_MINLEN) {
+ TRACE_PROTO("dropped packet",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version);
+ goto err;
+ }
+
+ pkt->saddr = dgram->saddr;
+ ipv4 = dgram->saddr.ss_family == AF_INET;
+
+ qc = qc_new_conn(pkt->version, ipv4, &pkt->dcid, &pkt->scid, &token_odcid,
+ &dgram->daddr, &pkt->saddr, 1,
+ !!pkt->token_len, l);
+ if (qc == NULL)
+ goto err;
+
+ HA_ATOMIC_INC(&prx_counters->half_open_conn);
+ /* Insert the DCID the QUIC client has chosen (only for listeners) */
+ ebmb_insert(&quic_dghdlrs[tid].odcids, &qc->odcid_node,
+ qc->odcid.len + qc->odcid.addrlen);
+ }
+ }
+ else if (!qc) {
+ TRACE_PROTO("No connection on a non Initial packet", QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version);
+ if (global.cluster_secret && !send_stateless_reset(l, &dgram->saddr, pkt))
+ TRACE_ERROR("stateless reset not sent", QUIC_EV_CONN_LPKT, qc);
+ goto err;
+ }
+
+ pkt->qc = qc;
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc);
+ return qc;
+
+ err:
+ HA_ATOMIC_INC(&prx_counters->dropped_pkt);
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT);
+ return NULL;
+}
+
+/* Parse a QUIC packet starting at <buf>. Data won't be read after <end> even
+ * if the packet is incomplete. This function will populate fields of <pkt>
+ * instance, most notably its length. <dgram> is the UDP datagram which
+ * contains the parsed packet. <l> is the listener instance on which it was
+ * received.
+ *
+ * Returns 0 on success else non-zero. Packet length is guaranteed to be set to
+ * the real packet value or to cover all data between <buf> and <end> : this is
+ * useful to reject a whole datagram.
+ */
+static int quic_rx_pkt_parse(struct quic_rx_packet *pkt,
+ unsigned char *buf, const unsigned char *end,
+ struct quic_dgram *dgram, struct listener *l)
+{
+ const unsigned char *beg = buf;
+ struct proxy *prx;
+ struct quic_counters *prx_counters;
+ int long_header = 0;
+ uint32_t version;
+ const struct quic_version *qv = NULL;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT);
+
+ prx = l->bind_conf->frontend;
+ prx_counters = EXTRA_COUNTERS_GET(prx->extra_counters_fe, &quic_stats_module);
+ /* This ist only to please to traces and distinguish the
+ * packet with parsed packet number from others.
+ */
+ pkt->pn_node.key = (uint64_t)-1;
+ if (end <= buf) {
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ /* Fixed bit */
+ if (!(*buf & QUIC_PACKET_FIXED_BIT)) {
+ if (!(pkt->flags & QUIC_FL_RX_PACKET_DGRAM_FIRST) &&
+ quic_padding_check(buf, end)) {
+ /* Some browsers may pad the remaining datagram space with null bytes.
+ * That is what we called add padding out of QUIC packets. Such
+ * datagrams must be considered as valid. But we can only consume
+ * the remaining space.
+ */
+ pkt->len = end - buf;
+ goto drop_silent;
+ }
+
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ /* Header form */
+ if (!qc_parse_hd_form(pkt, &buf, end, &long_header, &version)) {
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ if (long_header) {
+ uint64_t len;
+
+ TRACE_PROTO("long header packet received", QUIC_EV_CONN_LPKT);
+ if (!quic_packet_read_long_header(&buf, end, pkt)) {
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ /* When multiple QUIC packets are coalesced on the same UDP datagram,
+ * they must have the same DCID.
+ */
+ if (!(pkt->flags & QUIC_FL_RX_PACKET_DGRAM_FIRST) &&
+ (pkt->dcid.len != dgram->dcid_len ||
+ memcmp(dgram->dcid, pkt->dcid.data, pkt->dcid.len))) {
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ /* Retry of Version Negotiation packets are only sent by servers */
+ if (pkt->type == QUIC_PACKET_TYPE_RETRY || !version) {
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ /* RFC9000 6. Version Negotiation */
+ qv = qc_supported_version(version);
+ if (!qv) {
+ /* unsupported version, send Negotiation packet */
+ if (send_version_negotiation(l->rx.fd, &dgram->saddr, pkt)) {
+ TRACE_ERROR("VN packet not sent", QUIC_EV_CONN_LPKT);
+ goto drop_silent;
+ }
+
+ TRACE_PROTO("VN packet sent", QUIC_EV_CONN_LPKT);
+ goto drop_silent;
+ }
+ pkt->version = qv;
+
+ /* For Initial packets, and for servers (QUIC clients connections),
+ * there is no Initial connection IDs storage.
+ */
+ if (pkt->type == QUIC_PACKET_TYPE_INITIAL) {
+ uint64_t token_len;
+
+ if (!quic_dec_int(&token_len, (const unsigned char **)&buf, end) ||
+ end - buf < token_len) {
+ TRACE_PROTO("Packet dropped",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, qv);
+ goto drop;
+ }
+
+ /* TODO Retry should be automatically activated if
+ * suspect network usage is detected.
+ */
+ if (global.cluster_secret && !token_len) {
+ if (l->bind_conf->options & BC_O_QUIC_FORCE_RETRY) {
+ TRACE_PROTO("Initial without token, sending retry",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, qv);
+ if (send_retry(l->rx.fd, &dgram->saddr, pkt, qv)) {
+ TRACE_PROTO("Error during Retry generation",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, qv);
+ goto drop_silent;
+ }
+
+ HA_ATOMIC_INC(&prx_counters->retry_sent);
+ goto drop_silent;
+ }
+ }
+ else if (!global.cluster_secret && token_len) {
+ /* Impossible case: a token was received without configured
+ * cluster secret.
+ */
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT,
+ NULL, NULL, NULL, qv);
+ goto drop;
+ }
+
+ pkt->token = buf;
+ pkt->token_len = token_len;
+ buf += pkt->token_len;
+ }
+ else if (pkt->type != QUIC_PACKET_TYPE_0RTT) {
+ if (pkt->dcid.len != QUIC_HAP_CID_LEN) {
+ TRACE_PROTO("Packet dropped",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, qv);
+ goto drop;
+ }
+ }
+
+ if (!quic_dec_int(&len, (const unsigned char **)&buf, end) ||
+ end - buf < len) {
+ TRACE_PROTO("Packet dropped",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, qv);
+ goto drop;
+ }
+
+ /* Packet Number is stored here. Packet Length totalizes the
+ * rest of the content.
+ */
+ pkt->pn_offset = buf - beg;
+ pkt->len = pkt->pn_offset + len;
+
+ /* RFC 9000. Initial Datagram Size
+ *
+ * A server MUST discard an Initial packet that is carried in a UDP datagram
+ * with a payload that is smaller than the smallest allowed maximum datagram
+ * size of 1200 bytes.
+ */
+ if (pkt->type == QUIC_PACKET_TYPE_INITIAL &&
+ dgram->len < QUIC_INITIAL_PACKET_MINLEN) {
+ TRACE_PROTO("Too short datagram with an Initial packet", QUIC_EV_CONN_LPKT);
+ HA_ATOMIC_INC(&prx_counters->too_short_initial_dgram);
+ goto drop;
+ }
+
+ /* Interrupt parsing after packet length retrieval : this
+ * ensures that only the packet is dropped but not the whole
+ * datagram.
+ */
+ if (pkt->type == QUIC_PACKET_TYPE_0RTT && !l->bind_conf->ssl_conf.early_data) {
+ TRACE_PROTO("0-RTT packet not supported", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+ }
+ else {
+ TRACE_PROTO("short header packet received", QUIC_EV_CONN_LPKT);
+ if (end - buf < QUIC_HAP_CID_LEN) {
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ memcpy(pkt->dcid.data, buf, QUIC_HAP_CID_LEN);
+ pkt->dcid.len = QUIC_HAP_CID_LEN;
+
+ /* When multiple QUIC packets are coalesced on the same UDP datagram,
+ * they must have the same DCID.
+ */
+ if (!(pkt->flags & QUIC_FL_RX_PACKET_DGRAM_FIRST) &&
+ (pkt->dcid.len != dgram->dcid_len ||
+ memcmp(dgram->dcid, pkt->dcid.data, pkt->dcid.len))) {
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ buf += QUIC_HAP_CID_LEN;
+
+ pkt->pn_offset = buf - beg;
+ /* A short packet is the last one of a UDP datagram. */
+ pkt->len = end - beg;
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, NULL, pkt, NULL, qv);
+ return 0;
+
+ drop:
+ HA_ATOMIC_INC(&prx_counters->dropped_pkt);
+ drop_silent:
+ if (!pkt->len)
+ pkt->len = end - beg;
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, NULL, pkt, NULL, qv);
+ return -1;
+}
+
+/* Check if received packet <pkt> should be drop due to <qc> already in closing
+ * state. This can be true if a CONNECTION_CLOSE has already been emitted for
+ * this connection.
+ *
+ * Returns false if connection is not in closing state else true. The caller
+ * should drop the whole datagram in the last case to not mess up <qc>
+ * CONNECTION_CLOSE rate limit counter.
+ */
+static int qc_rx_check_closing(struct quic_conn *qc,
+ struct quic_rx_packet *pkt)
+{
+ if (!(qc->flags & QUIC_FL_CONN_CLOSING))
+ return 0;
+
+ TRACE_STATE("Closing state connection", QUIC_EV_CONN_LPKT, qc, NULL, NULL, pkt->version);
+
+ /* Check if CONNECTION_CLOSE rate reemission is reached. */
+ if (++qc->nb_pkt_since_cc >= qc->nb_pkt_for_cc) {
+ qc->flags |= QUIC_FL_CONN_IMMEDIATE_CLOSE;
+ qc->nb_pkt_for_cc++;
+ qc->nb_pkt_since_cc = 0;
+ }
+
+ return 1;
+}
+
+/* Handle a parsed packet <pkt> by the connection <qc>. Data will be copied
+ * into <qc> receive buffer after header protection removal procedure.
+ *
+ * <dgram> must be set to the datagram which contains the QUIC packet. <beg>
+ * must point to packet buffer first byte.
+ *
+ * <tasklist_head> may be non-NULL when the caller treat several datagrams for
+ * different quic-conn. In this case, each quic-conn tasklet will be appended
+ * to it in order to be woken up after the current task.
+ *
+ * The caller can safely removed the packet data. If packet refcount was not
+ * incremented by this function, it means that the connection did not handled
+ * it and it should be freed by the caller.
+ */
+static void qc_rx_pkt_handle(struct quic_conn *qc, struct quic_rx_packet *pkt,
+ struct quic_dgram *dgram, unsigned char *beg,
+ struct list **tasklist_head)
+{
+ const struct quic_version *qv = pkt->version;
+ struct quic_enc_level *qel = NULL;
+ size_t b_cspace;
+
+ if (pkt->flags & QUIC_FL_RX_PACKET_DGRAM_FIRST &&
+ qc->flags & QUIC_FL_CONN_ANTI_AMPLIFICATION_REACHED) {
+ TRACE_PROTO("PTO timer must be armed after anti-amplication was reached",
+ QUIC_EV_CONN_LPKT, qc, NULL, NULL, qv);
+ TRACE_DEVEL("needs to wakeup the timer task after the amplification limit was reached",
+ QUIC_EV_CONN_LPKT, qc);
+ /* Reset the anti-amplification bit. It will be set again
+ * when sending the next packet if reached again.
+ */
+ qc->flags &= ~QUIC_FL_CONN_ANTI_AMPLIFICATION_REACHED;
+ qc_set_timer(qc);
+ if (qc->timer_task && tick_isset(qc->timer) && tick_is_lt(qc->timer, now_ms))
+ task_wakeup(qc->timer_task, TASK_WOKEN_MSG);
+ }
+
+ if (qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE) {
+ TRACE_PROTO("Connection error",
+ QUIC_EV_CONN_LPKT, qc, NULL, NULL, qv);
+ goto out;
+ }
+
+ pkt->raw_len = pkt->len;
+ quic_rx_pkts_del(qc);
+ b_cspace = b_contig_space(&qc->rx.buf);
+ if (b_cspace < pkt->len) {
+ /* Do not consume buf if space not at the end. */
+ if (b_tail(&qc->rx.buf) + b_cspace < b_wrap(&qc->rx.buf)) {
+ TRACE_PROTO("Packet dropped",
+ QUIC_EV_CONN_LPKT, qc, NULL, NULL, qv);
+ HA_ATOMIC_INC(&qc->prx_counters->dropped_pkt_bufoverrun);
+ goto drop_silent;
+ }
+
+ /* Let us consume the remaining contiguous space. */
+ if (b_cspace) {
+ b_putchr(&qc->rx.buf, 0x00);
+ b_cspace--;
+ }
+ b_add(&qc->rx.buf, b_cspace);
+ if (b_contig_space(&qc->rx.buf) < pkt->len) {
+ TRACE_PROTO("Too big packet",
+ QUIC_EV_CONN_LPKT, qc, pkt, &pkt->len, qv);
+ HA_ATOMIC_INC(&qc->prx_counters->dropped_pkt_bufoverrun);
+ goto drop_silent;
+ }
+ }
+
+ if (!qc_try_rm_hp(qc, pkt, beg, &qel)) {
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT, qc, NULL, NULL, qv);
+ goto drop;
+ }
+
+ TRACE_DATA("New packet", QUIC_EV_CONN_LPKT, qc, pkt, NULL, qv);
+ if (pkt->aad_len)
+ qc_pkt_insert(qc, pkt, qel);
+ out:
+ *tasklist_head = tasklet_wakeup_after(*tasklist_head,
+ qc->wait_event.tasklet);
+
+ drop_silent:
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc ? qc : NULL, pkt, NULL, qv);
+ return;
+
+ drop:
+ HA_ATOMIC_INC(&qc->prx_counters->dropped_pkt);
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc ? qc : NULL, pkt, NULL, qv);
+}
+
+/* This function builds into <buf> buffer a QUIC long packet header.
+ * Return 1 if enough room to build this header, 0 if not.
+ */
+static int quic_build_packet_long_header(unsigned char **buf, const unsigned char *end,
+ int type, size_t pn_len,
+ struct quic_conn *qc, const struct quic_version *ver)
+{
+ int ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT, qc);
+
+ if (end - *buf < sizeof ver->num + qc->dcid.len + qc->scid.len + 3) {
+ TRACE_DEVEL("not enough room", QUIC_EV_CONN_LPKT, qc);
+ goto leave;
+ }
+
+ type = quic_pkt_type(type, ver->num);
+ /* #0 byte flags */
+ *(*buf)++ = QUIC_PACKET_FIXED_BIT | QUIC_PACKET_LONG_HEADER_BIT |
+ (type << QUIC_PACKET_TYPE_SHIFT) | (pn_len - 1);
+ /* Version */
+ quic_write_uint32(buf, end, ver->num);
+ *(*buf)++ = qc->dcid.len;
+ /* Destination connection ID */
+ if (qc->dcid.len) {
+ memcpy(*buf, qc->dcid.data, qc->dcid.len);
+ *buf += qc->dcid.len;
+ }
+ /* Source connection ID */
+ *(*buf)++ = qc->scid.len;
+ if (qc->scid.len) {
+ memcpy(*buf, qc->scid.data, qc->scid.len);
+ *buf += qc->scid.len;
+ }
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc);
+ return ret;
+}
+
+/* This function builds into <buf> buffer a QUIC short packet header.
+ * Return 1 if enough room to build this header, 0 if not.
+ */
+static int quic_build_packet_short_header(unsigned char **buf, const unsigned char *end,
+ size_t pn_len, struct quic_conn *qc,
+ unsigned char tls_flags)
+{
+ int ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ if (end - *buf < 1 + qc->dcid.len) {
+ TRACE_DEVEL("not enough room", QUIC_EV_CONN_LPKT, qc);
+ goto leave;
+ }
+
+ /* #0 byte flags */
+ *(*buf)++ = QUIC_PACKET_FIXED_BIT |
+ ((tls_flags & QUIC_FL_TLS_KP_BIT_SET) ? QUIC_PACKET_KEY_PHASE_BIT : 0) | (pn_len - 1);
+ /* Destination connection ID */
+ if (qc->dcid.len) {
+ memcpy(*buf, qc->dcid.data, qc->dcid.len);
+ *buf += qc->dcid.len;
+ }
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return ret;
+}
+
+/* Apply QUIC header protection to the packet with <buf> as first byte address,
+ * <pn> as address of the Packet number field, <pnlen> being this field length
+ * with <aead> as AEAD cipher and <key> as secret key.
+ * Returns 1 if succeeded or 0 if failed.
+ */
+static int quic_apply_header_protection(struct quic_conn *qc, unsigned char *buf,
+ unsigned char *pn, size_t pnlen,
+ struct quic_tls_ctx *tls_ctx)
+
+{
+ int i, ret = 0;
+ /* We need an IV of at least 5 bytes: one byte for bytes #0
+ * and at most 4 bytes for the packet number
+ */
+ unsigned char mask[5] = {0};
+ EVP_CIPHER_CTX *aes_ctx = tls_ctx->tx.hp_ctx;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ if (!quic_tls_aes_encrypt(mask, pn + QUIC_PACKET_PN_MAXLEN, sizeof mask, aes_ctx)) {
+ TRACE_ERROR("could not apply header protection", QUIC_EV_CONN_TXPKT, qc);
+ goto out;
+ }
+
+ *buf ^= mask[0] & (*buf & QUIC_PACKET_LONG_HEADER_BIT ? 0xf : 0x1f);
+ for (i = 0; i < pnlen; i++)
+ pn[i] ^= mask[i + 1];
+
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return ret;
+}
+
+/* Reduce the encoded size of <ack_frm> ACK frame removing the last
+ * ACK ranges if needed to a value below <limit> in bytes.
+ * Return 1 if succeeded, 0 if not.
+ */
+static int quic_ack_frm_reduce_sz(struct quic_conn *qc,
+ struct quic_frame *ack_frm, size_t limit)
+{
+ size_t room, ack_delay_sz;
+ int ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ ack_delay_sz = quic_int_getsize(ack_frm->tx_ack.ack_delay);
+ /* A frame is made of 1 byte for the frame type. */
+ room = limit - ack_delay_sz - 1;
+ if (!quic_rm_last_ack_ranges(qc, ack_frm->tx_ack.arngs, room))
+ goto leave;
+
+ ret = 1 + ack_delay_sz + ack_frm->tx_ack.arngs->enc_sz;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return ret;
+}
+
+/* Prepare into <outlist> as most as possible ack-eliciting frame from their
+ * <inlist> prebuilt frames for <qel> encryption level to be encoded in a buffer
+ * with <room> as available room, and <*len> the packet Length field initialized
+ * with the number of bytes already present in this buffer which must be taken
+ * into an account for the Length packet field value. <headlen> is the number of
+ * bytes already present in this packet before building frames.
+ *
+ * Update consequently <*len> to reflect the size of these frames built
+ * by this function. Also attach these frames to <l> frame list.
+ * Return 1 if at least one ack-eleciting frame could be built, 0 if not.
+ */
+static inline int qc_build_frms(struct list *outlist, struct list *inlist,
+ size_t room, size_t *len, size_t headlen,
+ struct quic_enc_level *qel,
+ struct quic_conn *qc)
+{
+ int ret;
+ struct quic_frame *cf, *cfbak;
+
+ TRACE_ENTER(QUIC_EV_CONN_BCFRMS, qc);
+
+ ret = 0;
+ if (*len > room)
+ goto leave;
+
+ /* If we are not probing we must take into an account the congestion
+ * control window.
+ */
+ if (!qel->pktns->tx.pto_probe) {
+ size_t remain = quic_path_prep_data(qc->path);
+
+ if (headlen > remain)
+ goto leave;
+
+ room = QUIC_MIN(room, remain - headlen);
+ }
+
+ TRACE_PROTO("************** frames build (headlen)",
+ QUIC_EV_CONN_BCFRMS, qc, &headlen);
+
+ /* NOTE: switch/case block inside a loop, a successful status must be
+ * returned by this function only if at least one frame could be built
+ * in the switch/case block.
+ */
+ list_for_each_entry_safe(cf, cfbak, inlist, list) {
+ /* header length, data length, frame length. */
+ size_t hlen, dlen, dlen_sz, avail_room, flen;
+
+ if (!room)
+ break;
+
+ switch (cf->type) {
+ case QUIC_FT_CRYPTO:
+ TRACE_DEVEL(" New CRYPTO frame build (room, len)",
+ QUIC_EV_CONN_BCFRMS, qc, &room, len);
+ /* Compute the length of this CRYPTO frame header */
+ hlen = 1 + quic_int_getsize(cf->crypto.offset);
+ /* Compute the data length of this CRyPTO frame. */
+ dlen = max_stream_data_size(room, *len + hlen, cf->crypto.len);
+ TRACE_DEVEL(" CRYPTO data length (hlen, crypto.len, dlen)",
+ QUIC_EV_CONN_BCFRMS, qc, &hlen, &cf->crypto.len, &dlen);
+ if (!dlen)
+ continue;
+
+ /* CRYPTO frame length. */
+ flen = hlen + quic_int_getsize(dlen) + dlen;
+ TRACE_DEVEL(" CRYPTO frame length (flen)",
+ QUIC_EV_CONN_BCFRMS, qc, &flen);
+ /* Add the CRYPTO data length and its encoded length to the packet
+ * length and the length of this length.
+ */
+ *len += flen;
+ room -= flen;
+ if (dlen == cf->crypto.len) {
+ /* <cf> CRYPTO data have been consumed. */
+ LIST_DELETE(&cf->list);
+ LIST_APPEND(outlist, &cf->list);
+ }
+ else {
+ struct quic_frame *new_cf;
+
+ new_cf = pool_zalloc(pool_head_quic_frame);
+ if (!new_cf) {
+ TRACE_ERROR("No memory for new crypto frame", QUIC_EV_CONN_BCFRMS, qc);
+ continue;
+ }
+
+ LIST_INIT(&new_cf->reflist);
+ new_cf->type = QUIC_FT_CRYPTO;
+ new_cf->crypto.len = dlen;
+ new_cf->crypto.offset = cf->crypto.offset;
+ new_cf->crypto.qel = qel;
+ TRACE_DEVEL("splitted frame", QUIC_EV_CONN_PRSAFRM, qc, new_cf);
+ if (cf->origin) {
+ TRACE_DEVEL("duplicated frame", QUIC_EV_CONN_PRSAFRM, qc);
+ /* This <cf> frame was duplicated */
+ LIST_APPEND(&cf->origin->reflist, &new_cf->ref);
+ new_cf->origin = cf->origin;
+ /* Detach the remaining CRYPTO frame from its original frame */
+ LIST_DEL_INIT(&cf->ref);
+ cf->origin = NULL;
+ }
+ LIST_APPEND(outlist, &new_cf->list);
+ /* Consume <dlen> bytes of the current frame. */
+ cf->crypto.len -= dlen;
+ cf->crypto.offset += dlen;
+ }
+ break;
+
+ case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
+ if (cf->stream.dup) {
+ struct eb64_node *node = NULL;
+ struct qc_stream_desc *stream_desc = NULL;
+ struct quic_stream *strm = &cf->stream;
+
+ /* As this frame has been already lost, ensure the stream is always
+ * available or the range of this frame is not consumed before
+ * resending it.
+ */
+ node = eb64_lookup(&qc->streams_by_id, strm->id);
+ if (!node) {
+ TRACE_DEVEL("released stream", QUIC_EV_CONN_PRSAFRM, qc, cf);
+ LIST_DELETE(&cf->list);
+ pool_free(pool_head_quic_frame, cf);
+ continue;
+ }
+
+ stream_desc = eb64_entry(node, struct qc_stream_desc, by_id);
+ if (strm->offset.key + strm->len <= stream_desc->ack_offset) {
+ TRACE_DEVEL("ignored frame frame in already acked range",
+ QUIC_EV_CONN_PRSAFRM, qc, cf);
+ LIST_DELETE(&cf->list);
+ pool_free(pool_head_quic_frame, cf);
+ continue;
+ }
+ else if (strm->offset.key < stream_desc->ack_offset) {
+ uint64_t diff = stream_desc->ack_offset - strm->offset.key;
+
+ qc_stream_frm_mv_fwd(cf, diff);
+ TRACE_DEVEL("updated partially acked frame",
+ QUIC_EV_CONN_PRSAFRM, qc, cf);
+ }
+ }
+ /* Note that these frames are accepted in short packets only without
+ * "Length" packet field. Here, <*len> is used only to compute the
+ * sum of the lengths of the already built frames for this packet.
+ *
+ * Compute the length of this STREAM frame "header" made a all the field
+ * excepting the variable ones. Note that +1 is for the type of this frame.
+ */
+ hlen = 1 + quic_int_getsize(cf->stream.id) +
+ ((cf->type & QUIC_STREAM_FRAME_TYPE_OFF_BIT) ? quic_int_getsize(cf->stream.offset.key) : 0);
+ /* Compute the data length of this STREAM frame. */
+ avail_room = room - hlen - *len;
+ if ((ssize_t)avail_room <= 0)
+ continue;
+
+ TRACE_DEVEL(" New STREAM frame build (room, len)",
+ QUIC_EV_CONN_BCFRMS, qc, &room, len);
+
+ /* hlen contains STREAM id and offset. Ensure there is
+ * enough room for length field.
+ */
+ if (cf->type & QUIC_STREAM_FRAME_TYPE_LEN_BIT) {
+ dlen = QUIC_MIN((uint64_t)max_available_room(avail_room, &dlen_sz),
+ cf->stream.len);
+ dlen_sz = quic_int_getsize(dlen);
+ flen = hlen + dlen_sz + dlen;
+ }
+ else {
+ dlen = QUIC_MIN((uint64_t)avail_room, cf->stream.len);
+ flen = hlen + dlen;
+ }
+
+ if (cf->stream.len && !dlen) {
+ /* Only a small gap is left on buffer, not
+ * enough to encode the STREAM data length.
+ */
+ continue;
+ }
+
+ TRACE_DEVEL(" STREAM data length (hlen, stream.len, dlen)",
+ QUIC_EV_CONN_BCFRMS, qc, &hlen, &cf->stream.len, &dlen);
+ TRACE_DEVEL(" STREAM frame length (flen)",
+ QUIC_EV_CONN_BCFRMS, qc, &flen);
+ /* Add the STREAM data length and its encoded length to the packet
+ * length and the length of this length.
+ */
+ *len += flen;
+ room -= flen;
+ if (dlen == cf->stream.len) {
+ /* <cf> STREAM data have been consumed. */
+ LIST_DELETE(&cf->list);
+ LIST_APPEND(outlist, &cf->list);
+
+ /* Do not notify MUX on retransmission. */
+ if (qc->flags & QUIC_FL_CONN_TX_MUX_CONTEXT) {
+ qcc_streams_sent_done(cf->stream.stream->ctx,
+ cf->stream.len,
+ cf->stream.offset.key);
+ }
+ }
+ else {
+ struct quic_frame *new_cf;
+ struct buffer cf_buf;
+
+ new_cf = pool_zalloc(pool_head_quic_frame);
+ if (!new_cf) {
+ TRACE_ERROR("No memory for new STREAM frame", QUIC_EV_CONN_BCFRMS, qc);
+ continue;
+ }
+
+ LIST_INIT(&new_cf->reflist);
+ new_cf->type = cf->type;
+ new_cf->stream.stream = cf->stream.stream;
+ new_cf->stream.buf = cf->stream.buf;
+ new_cf->stream.id = cf->stream.id;
+ if (cf->type & QUIC_STREAM_FRAME_TYPE_OFF_BIT)
+ new_cf->stream.offset = cf->stream.offset;
+ new_cf->stream.len = dlen;
+ new_cf->type |= QUIC_STREAM_FRAME_TYPE_LEN_BIT;
+ /* FIN bit reset */
+ new_cf->type &= ~QUIC_STREAM_FRAME_TYPE_FIN_BIT;
+ new_cf->stream.data = cf->stream.data;
+ new_cf->stream.dup = cf->stream.dup;
+ TRACE_DEVEL("split frame", QUIC_EV_CONN_PRSAFRM, qc, new_cf);
+ if (cf->origin) {
+ TRACE_DEVEL("duplicated frame", QUIC_EV_CONN_PRSAFRM, qc);
+ /* This <cf> frame was duplicated */
+ LIST_APPEND(&cf->origin->reflist, &new_cf->ref);
+ new_cf->origin = cf->origin;
+ /* Detach this STREAM frame from its origin */
+ LIST_DEL_INIT(&cf->ref);
+ cf->origin = NULL;
+ }
+ LIST_APPEND(outlist, &new_cf->list);
+ cf->type |= QUIC_STREAM_FRAME_TYPE_OFF_BIT;
+ /* Consume <dlen> bytes of the current frame. */
+ cf_buf = b_make(b_orig(cf->stream.buf),
+ b_size(cf->stream.buf),
+ (char *)cf->stream.data - b_orig(cf->stream.buf), 0);
+ cf->stream.len -= dlen;
+ cf->stream.offset.key += dlen;
+ cf->stream.data = (unsigned char *)b_peek(&cf_buf, dlen);
+
+ /* Do not notify MUX on retransmission. */
+ if (qc->flags & QUIC_FL_CONN_TX_MUX_CONTEXT) {
+ qcc_streams_sent_done(new_cf->stream.stream->ctx,
+ new_cf->stream.len,
+ new_cf->stream.offset.key);
+ }
+ }
+
+ /* TODO the MUX is notified about the frame sending via
+ * previous qcc_streams_sent_done call. However, the
+ * sending can fail later, for example if the sendto
+ * system call returns an error. As the MUX has been
+ * notified, the transport layer is responsible to
+ * bufferize and resent the announced data later.
+ */
+
+ break;
+
+ default:
+ flen = qc_frm_len(cf);
+ BUG_ON(!flen);
+ if (flen > room)
+ continue;
+
+ *len += flen;
+ room -= flen;
+ LIST_DELETE(&cf->list);
+ LIST_APPEND(outlist, &cf->list);
+ break;
+ }
+
+ /* Successful status as soon as a frame could be built */
+ ret = 1;
+ }
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_BCFRMS, qc);
+ return ret;
+}
+
+/* Generate a CONNECTION_CLOSE frame for <qc> on <qel> encryption level. <out>
+ * is used as return parameter and should be zero'ed by the caller.
+ */
+static void qc_build_cc_frm(struct quic_conn *qc, struct quic_enc_level *qel,
+ struct quic_frame *out)
+{
+ /* TODO improve CONNECTION_CLOSE on Initial/Handshake encryption levels
+ *
+ * A CONNECTION_CLOSE frame should be sent in several packets with
+ * different encryption levels depending on the client context. This is
+ * to ensure that the client can decrypt it. See RFC 9000 10.2.3 for
+ * more details on how to implement it.
+ */
+ TRACE_ENTER(QUIC_EV_CONN_BFRM, qc);
+
+
+ if (qc->err.app) {
+ if (unlikely(qel == &qc->els[QUIC_TLS_ENC_LEVEL_INITIAL] ||
+ qel == &qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE])) {
+ /* RFC 9000 10.2.3. Immediate Close during the Handshake
+ *
+ * Sending a CONNECTION_CLOSE of type 0x1d in an Initial or Handshake
+ * packet could expose application state or be used to alter application
+ * state. A CONNECTION_CLOSE of type 0x1d MUST be replaced by a
+ * CONNECTION_CLOSE of type 0x1c when sending the frame in Initial or
+ * Handshake packets. Otherwise, information about the application
+ * state might be revealed. Endpoints MUST clear the value of the
+ * Reason Phrase field and SHOULD use the APPLICATION_ERROR code when
+ * converting to a CONNECTION_CLOSE of type 0x1c.
+ */
+ out->type = QUIC_FT_CONNECTION_CLOSE;
+ out->connection_close.error_code = QC_ERR_APPLICATION_ERROR;
+ out->connection_close.reason_phrase_len = 0;
+ }
+ else {
+ out->type = QUIC_FT_CONNECTION_CLOSE_APP;
+ out->connection_close.error_code = qc->err.code;
+ }
+ }
+ else {
+ out->type = QUIC_FT_CONNECTION_CLOSE;
+ out->connection_close.error_code = qc->err.code;
+ }
+ TRACE_LEAVE(QUIC_EV_CONN_BFRM, qc);
+
+}
+
+/* This function builds a clear packet from <pkt> information (its type)
+ * into a buffer with <pos> as position pointer and <qel> as QUIC TLS encryption
+ * level for <conn> QUIC connection and <qel> as QUIC TLS encryption level,
+ * filling the buffer with as much frames as possible from <frms> list of
+ * prebuilt frames.
+ * The trailing QUIC_TLS_TAG_LEN bytes of this packet are not built. But they are
+ * reserved so that to ensure there is enough room to build this AEAD TAG after
+ * having returned from this function.
+ * This function also updates the value of <buf_pn> pointer to point to the packet
+ * number field in this packet. <pn_len> will also have the packet number
+ * length as value.
+ *
+ * Return 1 if succeeded (enough room to buile this packet), O if not.
+ */
+static int qc_do_build_pkt(unsigned char *pos, const unsigned char *end,
+ size_t dglen, struct quic_tx_packet *pkt,
+ int64_t pn, size_t *pn_len, unsigned char **buf_pn,
+ int force_ack, int padding, int cc, int probe,
+ struct quic_enc_level *qel, struct quic_conn *qc,
+ const struct quic_version *ver, struct list *frms)
+{
+ unsigned char *beg, *payload;
+ size_t len, len_sz, len_frms, padding_len;
+ struct quic_frame frm = { .type = QUIC_FT_CRYPTO, };
+ struct quic_frame ack_frm = { .type = QUIC_FT_ACK, };
+ struct quic_frame cc_frm = { };
+ size_t ack_frm_len, head_len;
+ int64_t rx_largest_acked_pn;
+ int add_ping_frm;
+ struct list frm_list = LIST_HEAD_INIT(frm_list);
+ struct quic_frame *cf;
+ int must_ack, ret = 0;
+ int nb_aepkts_since_last_ack;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ /* Length field value with CRYPTO frames if present. */
+ len_frms = 0;
+ beg = pos;
+ /* When not probing, and no immediate close is required, reduce the size of this
+ * buffer to respect the congestion controller window.
+ * This size will be limited if we have ack-eliciting frames to send from <frms>.
+ */
+ if (!probe && !LIST_ISEMPTY(frms) && !cc) {
+ size_t path_room;
+
+ path_room = quic_path_prep_data(qc->path);
+ if (end - beg > path_room)
+ end = beg + path_room;
+ }
+
+ /* Ensure there is enough room for the TLS encryption tag and a zero token
+ * length field if any.
+ */
+ if (end - pos < QUIC_TLS_TAG_LEN +
+ (pkt->type == QUIC_PACKET_TYPE_INITIAL ? 1 : 0))
+ goto no_room;
+
+ end -= QUIC_TLS_TAG_LEN;
+ rx_largest_acked_pn = qel->pktns->rx.largest_acked_pn;
+ /* packet number length */
+ *pn_len = quic_packet_number_length(pn, rx_largest_acked_pn);
+ /* Build the header */
+ if ((pkt->type == QUIC_PACKET_TYPE_SHORT &&
+ !quic_build_packet_short_header(&pos, end, *pn_len, qc, qel->tls_ctx.flags)) ||
+ (pkt->type != QUIC_PACKET_TYPE_SHORT &&
+ !quic_build_packet_long_header(&pos, end, pkt->type, *pn_len, qc, ver)))
+ goto no_room;
+
+ /* Encode the token length (0) for an Initial packet. */
+ if (pkt->type == QUIC_PACKET_TYPE_INITIAL)
+ *pos++ = 0;
+ head_len = pos - beg;
+ /* Build an ACK frame if required. */
+ ack_frm_len = 0;
+ nb_aepkts_since_last_ack = qel->pktns->rx.nb_aepkts_since_last_ack;
+ must_ack = !qel->pktns->tx.pto_probe &&
+ (force_ack || ((qel->pktns->flags & QUIC_FL_PKTNS_ACK_REQUIRED) &&
+ (LIST_ISEMPTY(frms) || nb_aepkts_since_last_ack >= QUIC_MAX_RX_AEPKTS_SINCE_LAST_ACK)));
+ if (must_ack) {
+ struct quic_arngs *arngs = &qel->pktns->rx.arngs;
+ BUG_ON(eb_is_empty(&qel->pktns->rx.arngs.root));
+ ack_frm.tx_ack.arngs = arngs;
+ if (qel->pktns->flags & QUIC_FL_PKTNS_NEW_LARGEST_PN) {
+ qel->pktns->tx.ack_delay =
+ quic_compute_ack_delay_us(qel->pktns->rx.largest_time_received, qc);
+ qel->pktns->flags &= ~QUIC_FL_PKTNS_NEW_LARGEST_PN;
+ }
+ ack_frm.tx_ack.ack_delay = qel->pktns->tx.ack_delay;
+ /* XXX BE CAREFUL XXX : here we reserved at least one byte for the
+ * smallest frame (PING) and <*pn_len> more for the packet number. Note
+ * that from here, we do not know if we will have to send a PING frame.
+ * This will be decided after having computed the ack-eliciting frames
+ * to be added to this packet.
+ */
+ ack_frm_len = quic_ack_frm_reduce_sz(qc, &ack_frm, end - 1 - *pn_len - pos);
+ if (!ack_frm_len)
+ goto no_room;
+ }
+
+ /* Length field value without the ack-eliciting frames. */
+ len = ack_frm_len + *pn_len;
+ len_frms = 0;
+ if (!cc && !LIST_ISEMPTY(frms)) {
+ ssize_t room = end - pos;
+
+ TRACE_DEVEL("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, frms);
+ /* Initialize the length of the frames built below to <len>.
+ * If any frame could be successfully built by qc_build_frms(),
+ * we will have len_frms > len.
+ */
+ len_frms = len;
+ if (!qc_build_frms(&frm_list, frms,
+ end - pos, &len_frms, pos - beg, qel, qc)) {
+ TRACE_DEVEL("Not enough room", QUIC_EV_CONN_TXPKT,
+ qc, NULL, NULL, &room);
+ if (!ack_frm_len && !qel->pktns->tx.pto_probe)
+ goto no_room;
+ }
+ }
+
+ /* Length (of the remaining data). Must not fail because, the buffer size
+ * has been checked above. Note that we have reserved QUIC_TLS_TAG_LEN bytes
+ * for the encryption tag. It must be taken into an account for the length
+ * of this packet.
+ */
+ if (len_frms)
+ len = len_frms + QUIC_TLS_TAG_LEN;
+ else
+ len += QUIC_TLS_TAG_LEN;
+ /* CONNECTION_CLOSE frame */
+ if (cc) {
+ qc_build_cc_frm(qc, qel, &cc_frm);
+ len += qc_frm_len(&cc_frm);
+ }
+ add_ping_frm = 0;
+ padding_len = 0;
+ len_sz = quic_int_getsize(len);
+ /* Add this packet size to <dglen> */
+ dglen += head_len + len_sz + len;
+ if (padding && dglen < QUIC_INITIAL_PACKET_MINLEN) {
+ /* This is a maximum padding size */
+ padding_len = QUIC_INITIAL_PACKET_MINLEN - dglen;
+ /* The length field value is of this packet is <len> + <padding_len>
+ * the size of which may be greater than the initial computed size
+ * <len_sz>. So, let's deduce the difference between these to packet
+ * sizes from <padding_len>.
+ */
+ padding_len -= quic_int_getsize(len + padding_len) - len_sz;
+ len += padding_len;
+ }
+ else if (len_frms && len_frms < QUIC_PACKET_PN_MAXLEN) {
+ len += padding_len = QUIC_PACKET_PN_MAXLEN - len_frms;
+ }
+ else if (LIST_ISEMPTY(&frm_list)) {
+ if (qel->pktns->tx.pto_probe) {
+ /* If we cannot send a frame, we send a PING frame. */
+ add_ping_frm = 1;
+ len += 1;
+ }
+ /* If there is no frame at all to follow, add at least a PADDING frame. */
+ if (!ack_frm_len && !cc)
+ len += padding_len = QUIC_PACKET_PN_MAXLEN - *pn_len;
+ }
+
+ if (pkt->type != QUIC_PACKET_TYPE_SHORT && !quic_enc_int(&pos, end, len))
+ goto no_room;
+
+ /* Packet number field address. */
+ *buf_pn = pos;
+
+ /* Packet number encoding. */
+ if (!quic_packet_number_encode(&pos, end, pn, *pn_len))
+ goto no_room;
+
+ /* payload building (ack-eliciting or not frames) */
+ payload = pos;
+ if (ack_frm_len) {
+ if (!qc_build_frm(&pos, end, &ack_frm, pkt, qc))
+ goto no_room;
+
+ pkt->largest_acked_pn = quic_pktns_get_largest_acked_pn(qel->pktns);
+ pkt->flags |= QUIC_FL_TX_PACKET_ACK;
+ }
+
+ /* Ack-eliciting frames */
+ if (!LIST_ISEMPTY(&frm_list)) {
+ struct quic_frame *tmp_cf;
+ list_for_each_entry_safe(cf, tmp_cf, &frm_list, list) {
+ if (!qc_build_frm(&pos, end, cf, pkt, qc)) {
+ ssize_t room = end - pos;
+ TRACE_DEVEL("Not enough room", QUIC_EV_CONN_TXPKT,
+ qc, NULL, NULL, &room);
+ /* Note that <cf> was added from <frms> to <frm_list> list by
+ * qc_build_frms().
+ */
+ LIST_DELETE(&cf->list);
+ LIST_INSERT(frms, &cf->list);
+ continue;
+ }
+
+ quic_tx_packet_refinc(pkt);
+ cf->pkt = pkt;
+ }
+ }
+
+ /* Build a PING frame if needed. */
+ if (add_ping_frm) {
+ frm.type = QUIC_FT_PING;
+ if (!qc_build_frm(&pos, end, &frm, pkt, qc))
+ goto no_room;
+ }
+
+ /* Build a CONNECTION_CLOSE frame if needed. */
+ if (cc) {
+ if (!qc_build_frm(&pos, end, &cc_frm, pkt, qc))
+ goto no_room;
+
+ pkt->flags |= QUIC_FL_TX_PACKET_CC;
+ }
+
+ /* Build a PADDING frame if needed. */
+ if (padding_len) {
+ frm.type = QUIC_FT_PADDING;
+ frm.padding.len = padding_len;
+ if (!qc_build_frm(&pos, end, &frm, pkt, qc))
+ goto no_room;
+ }
+
+ if (pos == payload) {
+ /* No payload was built because of congestion control */
+ TRACE_DEVEL("limited by congestion control", QUIC_EV_CONN_TXPKT, qc);
+ goto no_room;
+ }
+
+ /* If this packet is ack-eliciting and we are probing let's
+ * decrement the PTO probe counter.
+ */
+ if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING &&
+ qel->pktns->tx.pto_probe)
+ qel->pktns->tx.pto_probe--;
+
+ pkt->len = pos - beg;
+ LIST_SPLICE(&pkt->frms, &frm_list);
+
+ ret = 1;
+ TRACE_DEVEL("Packet ack-eliciting frames", QUIC_EV_CONN_TXPKT, qc, pkt);
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return ret;
+
+ no_room:
+ /* Replace the pre-built frames which could not be add to this packet */
+ LIST_SPLICE(frms, &frm_list);
+ TRACE_DEVEL("Remaining ack-eliciting frames", QUIC_EV_CONN_FRMLIST, qc, frms);
+ goto leave;
+}
+
+static inline void quic_tx_packet_init(struct quic_tx_packet *pkt, int type)
+{
+ pkt->type = type;
+ pkt->len = 0;
+ pkt->in_flight_len = 0;
+ pkt->pn_node.key = (uint64_t)-1;
+ LIST_INIT(&pkt->frms);
+ pkt->time_sent = TICK_ETERNITY;
+ pkt->next = NULL;
+ pkt->prev = NULL;
+ pkt->largest_acked_pn = -1;
+ pkt->flags = 0;
+ pkt->refcnt = 0;
+}
+
+/* Build a packet into <buf> packet buffer with <pkt_type> as packet
+ * type for <qc> QUIC connection from <qel> encryption level from <frms> list
+ * of prebuilt frames.
+ *
+ * Return -2 if the packet could not be allocated or encrypted for any reason,
+ * -1 if there was not enough room to build a packet.
+ * XXX NOTE XXX
+ * If you provide provide qc_build_pkt() with a big enough buffer to build a packet as big as
+ * possible (to fill an MTU), the unique reason why this function may fail is the congestion
+ * control window limitation.
+ */
+static struct quic_tx_packet *qc_build_pkt(unsigned char **pos,
+ const unsigned char *buf_end,
+ struct quic_enc_level *qel,
+ struct quic_tls_ctx *tls_ctx, struct list *frms,
+ struct quic_conn *qc, const struct quic_version *ver,
+ size_t dglen, int pkt_type, int force_ack,
+ int padding, int probe, int cc, int *err)
+{
+ struct quic_tx_packet *ret_pkt = NULL;
+ /* The pointer to the packet number field. */
+ unsigned char *buf_pn;
+ unsigned char *beg, *end, *payload;
+ int64_t pn;
+ size_t pn_len, payload_len, aad_len;
+ struct quic_tx_packet *pkt;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc, NULL, qel);
+ *err = 0;
+ pkt = pool_alloc(pool_head_quic_tx_packet);
+ if (!pkt) {
+ TRACE_DEVEL("Not enough memory for a new packet", QUIC_EV_CONN_TXPKT, qc);
+ *err = -2;
+ goto err;
+ }
+
+ quic_tx_packet_init(pkt, pkt_type);
+ beg = *pos;
+ pn_len = 0;
+ buf_pn = NULL;
+
+ pn = qel->pktns->tx.next_pn + 1;
+ if (!qc_do_build_pkt(*pos, buf_end, dglen, pkt, pn, &pn_len, &buf_pn,
+ force_ack, padding, cc, probe, qel, qc, ver, frms)) {
+ // trace already emitted by function above
+ *err = -1;
+ goto err;
+ }
+
+ end = beg + pkt->len;
+ payload = buf_pn + pn_len;
+ payload_len = end - payload;
+ aad_len = payload - beg;
+
+ if (!quic_packet_encrypt(payload, payload_len, beg, aad_len, pn, tls_ctx, qc)) {
+ // trace already emitted by function above
+ *err = -2;
+ goto err;
+ }
+
+ end += QUIC_TLS_TAG_LEN;
+ pkt->len += QUIC_TLS_TAG_LEN;
+ if (!quic_apply_header_protection(qc, beg, buf_pn, pn_len, tls_ctx)) {
+ // trace already emitted by function above
+ *err = -2;
+ goto err;
+ }
+
+ /* Consume a packet number */
+ qel->pktns->tx.next_pn++;
+ qc->tx.prep_bytes += pkt->len;
+ if (qc->tx.prep_bytes >= 3 * qc->rx.bytes && !quic_peer_validated_addr(qc)) {
+ qc->flags |= QUIC_FL_CONN_ANTI_AMPLIFICATION_REACHED;
+ TRACE_PROTO("anti-amplification limit reached", QUIC_EV_CONN_TXPKT, qc);
+ }
+ /* Now that a correct packet is built, let us consume <*pos> buffer. */
+ *pos = end;
+ /* Attach the built packet to its tree. */
+ pkt->pn_node.key = pn;
+ /* Set the packet in fligth length for in flight packet only. */
+ if (pkt->flags & QUIC_FL_TX_PACKET_IN_FLIGHT) {
+ pkt->in_flight_len = pkt->len;
+ qc->path->prep_in_flight += pkt->len;
+ }
+ /* Always reset this flags */
+ qc->flags &= ~QUIC_FL_CONN_IMMEDIATE_CLOSE;
+ if (pkt->flags & QUIC_FL_TX_PACKET_ACK) {
+ qel->pktns->flags &= ~QUIC_FL_PKTNS_ACK_REQUIRED;
+ qel->pktns->rx.nb_aepkts_since_last_ack = 0;
+ }
+
+ pkt->pktns = qel->pktns;
+
+ ret_pkt = pkt;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc, ret_pkt);
+ return ret_pkt;
+
+ err:
+ /* TODO: what about the frames which have been built
+ * for this packet.
+ */
+ free_quic_tx_packet(qc, pkt);
+ goto leave;
+}
+
+
+static void __quic_conn_init(void)
+{
+ ha_quic_meth = BIO_meth_new(0x666, "ha QUIC methods");
+}
+INITCALL0(STG_REGISTER, __quic_conn_init);
+
+static void __quic_conn_deinit(void)
+{
+ BIO_meth_free(ha_quic_meth);
+}
+REGISTER_POST_DEINIT(__quic_conn_deinit);
+
+/* Read all the QUIC packets found in <buf> from QUIC connection with <owner>
+ * as owner calling <func> function.
+ * Return the number of bytes read if succeeded, -1 if not.
+ */
+struct task *quic_lstnr_dghdlr(struct task *t, void *ctx, unsigned int state)
+{
+ unsigned char *pos;
+ const unsigned char *end;
+ struct quic_dghdlr *dghdlr = ctx;
+ struct quic_dgram *dgram;
+ struct list *tasklist_head = NULL;
+ int max_dgrams = global.tune.maxpollevents;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT);
+
+ while ((dgram = MT_LIST_POP(&dghdlr->dgrams, typeof(dgram), handler_list))) {
+ pos = dgram->buf;
+ end = pos + dgram->len;
+ do {
+ struct quic_rx_packet *pkt;
+ struct quic_conn *qc;
+
+ /* TODO replace zalloc -> alloc. */
+ pkt = pool_zalloc(pool_head_quic_rx_packet);
+ if (!pkt) {
+ TRACE_ERROR("RX packet allocation failed", QUIC_EV_CONN_LPKT);
+ /* TODO count lost datagram. */
+ goto leave;
+ }
+
+ pkt->version = NULL;
+ pkt->pn_offset = 0;
+
+ /* Set flag if pkt is the first one in dgram. */
+ if (pos == dgram->buf)
+ pkt->flags |= QUIC_FL_RX_PACKET_DGRAM_FIRST;
+
+ LIST_INIT(&pkt->qc_rx_pkt_list);
+ pkt->time_received = now_ms;
+ quic_rx_packet_refinc(pkt);
+ if (quic_rx_pkt_parse(pkt, pos, end, dgram, dgram->owner))
+ goto next;
+
+ qc = quic_rx_pkt_retrieve_conn(pkt, dgram, dgram->owner);
+ if (!qc)
+ goto next;
+
+ BUG_ON(dgram->qc && dgram->qc != qc);
+ dgram->qc = qc;
+
+ if (qc_rx_check_closing(qc, pkt)) {
+ /* Skip the entire datagram. */
+ pkt->len = end - pos;
+ goto next;
+ }
+
+ qc_rx_pkt_handle(qc, pkt, dgram, pos, &tasklist_head);
+
+ next:
+ pos += pkt->len;
+ quic_rx_packet_refdec(pkt);
+
+ /* Free rejected packets */
+ if (!pkt->refcnt) {
+ BUG_ON(LIST_INLIST(&pkt->qc_rx_pkt_list));
+ pool_free(pool_head_quic_rx_packet, pkt);
+ }
+ } while (pos < end);
+
+ /* Increasing the received bytes counter by the UDP datagram length
+ * if this datagram could be associated to a connection.
+ */
+ if (dgram->qc)
+ dgram->qc->rx.bytes += dgram->len;
+
+ /* Mark this datagram as consumed */
+ HA_ATOMIC_STORE(&dgram->buf, NULL);
+
+ if (--max_dgrams <= 0)
+ goto stop_here;
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT);
+
+ return t;
+
+ stop_here:
+ /* too much work done at once, come back here later */
+ if (!MT_LIST_ISEMPTY(&dghdlr->dgrams))
+ tasklet_wakeup((struct tasklet *)t);
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT);
+ return t;
+}
+
+/* Retrieve the DCID from a QUIC datagram or packet with <buf> as first octet.
+ * Returns 1 if succeeded, 0 if not.
+ */
+int quic_get_dgram_dcid(unsigned char *buf, const unsigned char *end,
+ unsigned char **dcid, size_t *dcid_len)
+{
+ int ret = 0, long_header;
+ size_t minlen, skip;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT);
+
+ if (!(*buf & QUIC_PACKET_FIXED_BIT)) {
+ TRACE_PROTO("fixed bit not set", QUIC_EV_CONN_RXPKT);
+ goto err;
+ }
+
+ long_header = *buf & QUIC_PACKET_LONG_HEADER_BIT;
+ minlen = long_header ? QUIC_LONG_PACKET_MINLEN :
+ QUIC_SHORT_PACKET_MINLEN + QUIC_HAP_CID_LEN + QUIC_TLS_TAG_LEN;
+ skip = long_header ? QUIC_LONG_PACKET_DCID_OFF : QUIC_SHORT_PACKET_DCID_OFF;
+ if (end - buf < minlen)
+ goto err;
+
+ buf += skip;
+ *dcid_len = long_header ? *buf++ : QUIC_HAP_CID_LEN;
+ if (*dcid_len > QUIC_CID_MAXLEN || end - buf <= *dcid_len)
+ goto err;
+
+ *dcid = buf;
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT);
+ return ret;
+
+ err:
+ TRACE_PROTO("wrong datagram", QUIC_EV_CONN_RXPKT);
+ goto leave;
+}
+
+/* Notify the MUX layer if alive about an imminent close of <qc>. */
+void qc_notify_close(struct quic_conn *qc)
+{
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ if (qc->flags & QUIC_FL_CONN_NOTIFY_CLOSE)
+ goto leave;
+
+ qc->flags |= QUIC_FL_CONN_NOTIFY_CLOSE;
+ /* wake up the MUX */
+ if (qc->mux_state == QC_MUX_READY && qc->conn->mux->wake) {
+ TRACE_STATE("connection closure notidfied to mux",
+ QUIC_FL_CONN_NOTIFY_CLOSE, qc);
+ qc->conn->mux->wake(qc->conn);
+ }
+ else
+ TRACE_STATE("connection closure not notidfied to mux",
+ QUIC_FL_CONN_NOTIFY_CLOSE, qc);
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/quic_frame.c b/src/quic_frame.c
new file mode 100644
index 0000000..f98f594
--- /dev/null
+++ b/src/quic_frame.c
@@ -0,0 +1,1168 @@
+/*
+ * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <string.h>
+
+#include <import/eb64tree.h>
+#include <haproxy/quic_conn-t.h>
+#include <haproxy/quic_enc.h>
+#include <haproxy/quic_frame.h>
+#include <haproxy/quic_tp-t.h>
+#include <haproxy/trace.h>
+
+#define TRACE_SOURCE &trace_quic
+
+const char *quic_frame_type_string(enum quic_frame_type ft)
+{
+ switch (ft) {
+ case QUIC_FT_PADDING:
+ return "PADDING";
+ case QUIC_FT_PING:
+ return "PING";
+ case QUIC_FT_ACK:
+ return "ACK";
+ case QUIC_FT_ACK_ECN:
+ return "ACK_ENC";
+ case QUIC_FT_RESET_STREAM:
+ return "RESET_STREAM";
+ case QUIC_FT_STOP_SENDING:
+ return "STOP_SENDING";
+ case QUIC_FT_CRYPTO:
+ return "CRYPTO";
+ case QUIC_FT_NEW_TOKEN:
+ return "NEW_TOKEN";
+
+ case QUIC_FT_STREAM_8:
+ return "STREAM_8";
+ case QUIC_FT_STREAM_9:
+ return "STREAM_9";
+ case QUIC_FT_STREAM_A:
+ return "STREAM_A";
+ case QUIC_FT_STREAM_B:
+ return "STREAM_B";
+ case QUIC_FT_STREAM_C:
+ return "STREAM_C";
+ case QUIC_FT_STREAM_D:
+ return "STREAM_D";
+ case QUIC_FT_STREAM_E:
+ return "STREAM_E";
+ case QUIC_FT_STREAM_F:
+ return "STREAM_F";
+
+ case QUIC_FT_MAX_DATA:
+ return "MAX_DATA";
+ case QUIC_FT_MAX_STREAM_DATA:
+ return "MAX_STREAM_DATA";
+ case QUIC_FT_MAX_STREAMS_BIDI:
+ return "MAX_STREAMS_BIDI";
+ case QUIC_FT_MAX_STREAMS_UNI:
+ return "MAX_STREAMS_UNI";
+ case QUIC_FT_DATA_BLOCKED:
+ return "DATA_BLOCKED";
+ case QUIC_FT_STREAM_DATA_BLOCKED:
+ return "STREAM_DATA_BLOCKED";
+ case QUIC_FT_STREAMS_BLOCKED_BIDI:
+ return "STREAMS_BLOCKED_BIDI";
+ case QUIC_FT_STREAMS_BLOCKED_UNI:
+ return "STREAMS_BLOCKED_UNI";
+ case QUIC_FT_NEW_CONNECTION_ID:
+ return "NEW_CONNECTION_ID";
+ case QUIC_FT_RETIRE_CONNECTION_ID:
+ return "RETIRE_CONNECTION_ID";
+ case QUIC_FT_PATH_CHALLENGE:
+ return "PATH_CHALLENGE";
+ case QUIC_FT_PATH_RESPONSE:
+ return "PATH_RESPONSE";
+ case QUIC_FT_CONNECTION_CLOSE:
+ return "CONNECTION_CLOSE";
+ case QUIC_FT_CONNECTION_CLOSE_APP:
+ return "CONNECTION_CLOSE_APP";
+ case QUIC_FT_HANDSHAKE_DONE:
+ return "HANDSHAKE_DONE";
+ default:
+ return "UNKNOWN";
+ }
+}
+
+static void chunk_cc_phrase_appendf(struct buffer *buf,
+ const unsigned char *phr, size_t phrlen)
+{
+ chunk_appendf(buf, " reason_phrase: '");
+ while (phrlen--)
+ chunk_appendf(buf, "%c", *phr++);
+ chunk_appendf(buf, "'");
+}
+
+/* Add traces to <buf> depending on <frm> frame type. */
+void chunk_frm_appendf(struct buffer *buf, const struct quic_frame *frm)
+{
+ chunk_appendf(buf, " %s", quic_frame_type_string(frm->type));
+ switch (frm->type) {
+ case QUIC_FT_CRYPTO:
+ {
+ const struct quic_crypto *cf = &frm->crypto;
+ chunk_appendf(buf, " cfoff=%llu cflen=%llu",
+ (ull)cf->offset, (ull)cf->len);
+ break;
+ }
+ case QUIC_FT_RESET_STREAM:
+ {
+ const struct quic_reset_stream *rs = &frm->reset_stream;
+ chunk_appendf(buf, " id=%llu app_error_code=%llu final_size=%llu",
+ (ull)rs->id, (ull)rs->app_error_code, (ull)rs->final_size);
+ break;
+ }
+ case QUIC_FT_STOP_SENDING:
+ {
+ const struct quic_stop_sending *s = &frm->stop_sending;
+ chunk_appendf(&trace_buf, " id=%llu app_error_code=%llu",
+ (ull)s->id, (ull)s->app_error_code);
+ break;
+ }
+ case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
+ {
+ const struct quic_stream *s = &frm->stream;
+ chunk_appendf(&trace_buf, " uni=%d fin=%d id=%llu off=%llu len=%llu",
+ !!(s->id & QUIC_STREAM_FRAME_ID_DIR_BIT),
+ !!(frm->type & QUIC_STREAM_FRAME_TYPE_FIN_BIT),
+ (ull)s->id, (ull)s->offset.key, (ull)s->len);
+ break;
+ }
+ case QUIC_FT_MAX_DATA:
+ {
+ const struct quic_max_data *s = &frm->max_data;
+ chunk_appendf(&trace_buf, " max_data=%llu", (ull)s->max_data);
+ break;
+ }
+ case QUIC_FT_MAX_STREAM_DATA:
+ {
+ const struct quic_max_stream_data *s = &frm->max_stream_data;
+ chunk_appendf(&trace_buf, " id=%llu max_stream_data=%llu",
+ (ull)s->id, (ull)s->max_stream_data);
+ break;
+ }
+ case QUIC_FT_MAX_STREAMS_BIDI:
+ {
+ const struct quic_max_streams *s = &frm->max_streams_bidi;
+ chunk_appendf(&trace_buf, " max_streams=%llu", (ull)s->max_streams);
+ break;
+ }
+ case QUIC_FT_MAX_STREAMS_UNI:
+ {
+ const struct quic_max_streams *s = &frm->max_streams_uni;
+ chunk_appendf(&trace_buf, " max_streams=%llu", (ull)s->max_streams);
+ break;
+ }
+ case QUIC_FT_DATA_BLOCKED:
+ {
+ const struct quic_data_blocked *s = &frm->data_blocked;
+ chunk_appendf(&trace_buf, " limit=%llu", (ull)s->limit);
+ break;
+ }
+ case QUIC_FT_STREAM_DATA_BLOCKED:
+ {
+ const struct quic_stream_data_blocked *s = &frm->stream_data_blocked;
+ chunk_appendf(&trace_buf, " id=%llu limit=%llu",
+ (ull)s->id, (ull)s->limit);
+ break;
+ }
+ case QUIC_FT_STREAMS_BLOCKED_BIDI:
+ {
+ const struct quic_streams_blocked *s = &frm->streams_blocked_bidi;
+ chunk_appendf(&trace_buf, " limit=%llu", (ull)s->limit);
+ break;
+ }
+ case QUIC_FT_STREAMS_BLOCKED_UNI:
+ {
+ const struct quic_streams_blocked *s = &frm->streams_blocked_uni;
+ chunk_appendf(&trace_buf, " limit=%llu", (ull)s->limit);
+ break;
+ }
+ case QUIC_FT_RETIRE_CONNECTION_ID:
+ {
+ const struct quic_retire_connection_id *rci = &frm->retire_connection_id;
+ chunk_appendf(&trace_buf, " seq_num=%llu", (ull)rci->seq_num);
+ break;
+ }
+ case QUIC_FT_CONNECTION_CLOSE:
+ {
+ const struct quic_connection_close *cc = &frm->connection_close;
+ size_t plen = QUIC_MIN((size_t)cc->reason_phrase_len, sizeof cc->reason_phrase);
+ chunk_appendf(&trace_buf,
+ " error_code=%llu frame_type=%llu reason_phrase_len=%llu",
+ (ull)cc->error_code, (ull)cc->frame_type,
+ (ull)cc->reason_phrase_len);
+ if (plen)
+ chunk_cc_phrase_appendf(&trace_buf, cc->reason_phrase, plen);
+ break;
+ }
+ case QUIC_FT_CONNECTION_CLOSE_APP:
+ {
+ const struct quic_connection_close_app *cc = &frm->connection_close_app;
+ size_t plen = QUIC_MIN((size_t)cc->reason_phrase_len, sizeof cc->reason_phrase);
+ chunk_appendf(&trace_buf,
+ " error_code=%llu reason_phrase_len=%llu",
+ (ull)cc->error_code, (ull)cc->reason_phrase_len);
+ if (plen)
+ chunk_cc_phrase_appendf(&trace_buf, cc->reason_phrase, plen);
+ break;
+ }
+ }
+}
+
+/* Encode <frm> PADDING frame into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_padding_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_padding *padding = &frm->padding;
+
+ if (end - *buf < padding->len - 1)
+ return 0;
+
+ memset(*buf, 0, padding->len - 1);
+ *buf += padding->len - 1;
+
+ return 1;
+}
+
+/* Parse a PADDING frame from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_padding_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ const unsigned char *beg;
+ struct quic_padding *padding = &frm->padding;
+
+ beg = *buf;
+ padding->len = 1;
+ while (*buf < end && !**buf)
+ (*buf)++;
+ padding->len += *buf - beg;
+
+ return 1;
+}
+
+/* Encode a ACK frame into <buf> buffer.
+ * Always succeeds.
+ */
+static int quic_build_ping_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ /* No field */
+ return 1;
+}
+
+/* Parse a PADDING frame from <buf> buffer with <end> as end into <frm> frame.
+ * Always succeeds.
+ */
+static int quic_parse_ping_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ /* No field */
+ return 1;
+}
+
+/* Encode a ACK frame.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_ack_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *qc)
+{
+ struct quic_tx_ack *tx_ack = &frm->tx_ack;
+ struct eb64_node *ar, *prev_ar;
+ struct quic_arng_node *ar_node, *prev_ar_node;
+
+ ar = eb64_last(&tx_ack->arngs->root);
+ ar_node = eb64_entry(ar, struct quic_arng_node, first);
+ TRACE_DEVEL("ack range", QUIC_EV_CONN_PRSAFRM,
+ qc,, &ar_node->last, &ar_node->first.key);
+ if (!quic_enc_int(buf, end, ar_node->last) ||
+ !quic_enc_int(buf, end, tx_ack->ack_delay) ||
+ !quic_enc_int(buf, end, tx_ack->arngs->sz - 1) ||
+ !quic_enc_int(buf, end, ar_node->last - ar_node->first.key))
+ return 0;
+
+ while ((prev_ar = eb64_prev(ar))) {
+ prev_ar_node = eb64_entry(prev_ar, struct quic_arng_node, first);
+ TRACE_DEVEL("ack range", QUIC_EV_CONN_PRSAFRM, qc,,
+ &prev_ar_node->last, &prev_ar_node->first.key);
+ if (!quic_enc_int(buf, end, ar_node->first.key - prev_ar_node->last - 2) ||
+ !quic_enc_int(buf, end, prev_ar_node->last - prev_ar_node->first.key))
+ return 0;
+
+ ar = prev_ar;
+ ar_node = eb64_entry(ar, struct quic_arng_node, first);
+ }
+
+ return 1;
+}
+
+/* Parse an ACK frame header from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_ack_frame_header(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ int ret;
+ struct quic_ack *ack = &frm->ack;
+
+ ret = quic_dec_int(&ack->largest_ack, buf, end);
+ if (!ret)
+ return 0;
+
+ ret = quic_dec_int(&ack->ack_delay, buf, end);
+ if (!ret)
+ return 0;
+
+ ret = quic_dec_int(&ack->ack_range_num, buf, end);
+ if (!ret)
+ return 0;
+
+ ret = quic_dec_int(&ack->first_ack_range, buf, end);
+ if (!ret)
+ return 0;
+
+ return 1;
+}
+
+/* Encode a ACK_ECN frame.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_ack_ecn_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_ack *ack = &frm->ack;
+
+ return quic_enc_int(buf, end, ack->largest_ack) &&
+ quic_enc_int(buf, end, ack->ack_delay) &&
+ quic_enc_int(buf, end, ack->first_ack_range) &&
+ quic_enc_int(buf, end, ack->ack_range_num);
+}
+
+/* Parse an ACK_ECN frame from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_ack_ecn_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_ack *ack = &frm->ack;
+
+ return quic_dec_int(&ack->largest_ack, buf, end) &&
+ quic_dec_int(&ack->ack_delay, buf, end) &&
+ quic_dec_int(&ack->first_ack_range, buf, end) &&
+ quic_dec_int(&ack->ack_range_num, buf, end);
+}
+
+/* Encode a RESET_STREAM frame into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_reset_stream_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_reset_stream *reset_stream = &frm->reset_stream;
+
+ return quic_enc_int(buf, end, reset_stream->id) &&
+ quic_enc_int(buf, end, reset_stream->app_error_code) &&
+ quic_enc_int(buf, end, reset_stream->final_size);
+}
+
+/* Parse a RESET_STREAM frame from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_reset_stream_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_reset_stream *reset_stream = &frm->reset_stream;
+
+ return quic_dec_int(&reset_stream->id, buf, end) &&
+ quic_dec_int(&reset_stream->app_error_code, buf, end) &&
+ quic_dec_int(&reset_stream->final_size, buf, end);
+}
+
+/* Encode a STOP_SENDING frame.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_stop_sending_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_stop_sending *stop_sending = &frm->stop_sending;
+
+ return quic_enc_int(buf, end, stop_sending->id) &&
+ quic_enc_int(buf, end, stop_sending->app_error_code);
+}
+
+/* Parse a STOP_SENDING frame from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_stop_sending_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_stop_sending *stop_sending = &frm->stop_sending;
+
+ return quic_dec_int(&stop_sending->id, buf, end) &&
+ quic_dec_int(&stop_sending->app_error_code, buf, end);
+}
+
+/* Encode a CRYPTO frame into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_crypto_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_crypto *crypto = &frm->crypto;
+ const struct quic_enc_level *qel = crypto->qel;
+ size_t offset, len;
+
+ if (!quic_enc_int(buf, end, crypto->offset) ||
+ !quic_enc_int(buf, end, crypto->len) || end - *buf < crypto->len)
+ return 0;
+
+ len = crypto->len;
+ offset = crypto->offset;
+ while (len) {
+ int idx;
+ size_t to_copy;
+ const unsigned char *data;
+
+ idx = offset >> QUIC_CRYPTO_BUF_SHIFT;
+ to_copy = qel->tx.crypto.bufs[idx]->sz - (offset & QUIC_CRYPTO_BUF_MASK);
+ if (to_copy > len)
+ to_copy = len;
+ data = qel->tx.crypto.bufs[idx]->data + (offset & QUIC_CRYPTO_BUF_MASK);
+ memcpy(*buf, data, to_copy);
+ *buf += to_copy;
+ offset += to_copy;
+ len -= to_copy;
+ }
+
+ return 1;
+}
+
+/* Parse a CRYPTO frame from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_crypto_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_crypto *crypto = &frm->crypto;
+
+ if (!quic_dec_int(&crypto->offset, buf, end) ||
+ !quic_dec_int(&crypto->len, buf, end) || end - *buf < crypto->len)
+ return 0;
+
+ crypto->data = *buf;
+ *buf += crypto->len;
+
+ return 1;
+}
+
+/* Encode a NEW_TOKEN frame into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_new_token_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_new_token *new_token = &frm->new_token;
+
+ if (!quic_enc_int(buf, end, new_token->len) || end - *buf < new_token->len)
+ return 0;
+
+ memcpy(*buf, new_token->data, new_token->len);
+
+ return 1;
+}
+
+/* Parse a NEW_TOKEN frame from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_new_token_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_new_token *new_token = &frm->new_token;
+
+ if (!quic_dec_int(&new_token->len, buf, end) || end - *buf < new_token->len)
+ return 0;
+
+ new_token->data = *buf;
+ *buf += new_token->len;
+
+ return 1;
+}
+
+/* Encode a STREAM frame into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_stream_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_stream *stream = &frm->stream;
+ const unsigned char *wrap;
+
+ if (!quic_enc_int(buf, end, stream->id) ||
+ ((frm->type & QUIC_STREAM_FRAME_TYPE_OFF_BIT) && !quic_enc_int(buf, end, stream->offset.key)) ||
+ ((frm->type & QUIC_STREAM_FRAME_TYPE_LEN_BIT) &&
+ (!quic_enc_int(buf, end, stream->len) || end - *buf < stream->len)))
+ return 0;
+
+ wrap = (const unsigned char *)b_wrap(stream->buf);
+ if (stream->data + stream->len > wrap) {
+ size_t to_copy = wrap - stream->data;
+ memcpy(*buf, stream->data, to_copy);
+ *buf += to_copy;
+
+ to_copy = stream->len - to_copy;
+ memcpy(*buf, b_orig(stream->buf), to_copy);
+ *buf += to_copy;
+ }
+ else {
+ memcpy(*buf, stream->data, stream->len);
+ *buf += stream->len;
+ }
+
+ return 1;
+}
+
+/* Parse a STREAM frame from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_stream_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_stream *stream = &frm->stream;
+
+ if (!quic_dec_int(&stream->id, buf, end))
+ return 0;
+
+ /* Offset parsing */
+ if (!(frm->type & QUIC_STREAM_FRAME_TYPE_OFF_BIT)) {
+ stream->offset.key = 0;
+ }
+ else if (!quic_dec_int((uint64_t *)&stream->offset.key, buf, end))
+ return 0;
+
+ /* Length parsing */
+ if (!(frm->type & QUIC_STREAM_FRAME_TYPE_LEN_BIT)) {
+ stream->len = end - *buf;
+ }
+ else if (!quic_dec_int(&stream->len, buf, end) || end - *buf < stream->len)
+ return 0;
+
+ stream->fin = (frm->type & QUIC_STREAM_FRAME_TYPE_FIN_BIT);
+
+ stream->data = *buf;
+ *buf += stream->len;
+
+ return 1;
+}
+
+/* Encode a MAX_DATA frame into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_max_data_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_max_data *max_data = &frm->max_data;
+
+ return quic_enc_int(buf, end, max_data->max_data);
+}
+
+/* Parse a MAX_DATA frame from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_max_data_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_max_data *max_data = &frm->max_data;
+
+ return quic_dec_int(&max_data->max_data, buf, end);
+}
+
+/* Encode a MAX_STREAM_DATA frame into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_max_stream_data_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_max_stream_data *max_stream_data = &frm->max_stream_data;
+
+ return quic_enc_int(buf, end, max_stream_data->id) &&
+ quic_enc_int(buf, end, max_stream_data->max_stream_data);
+}
+
+/* Parse a MAX_STREAM_DATA frame from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_max_stream_data_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_max_stream_data *max_stream_data = &frm->max_stream_data;
+
+ return quic_dec_int(&max_stream_data->id, buf, end) &&
+ quic_dec_int(&max_stream_data->max_stream_data, buf, end);
+}
+
+/* Encode a MAX_STREAMS frame for bidirectional streams into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_max_streams_bidi_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_max_streams *max_streams_bidi = &frm->max_streams_bidi;
+
+ return quic_enc_int(buf, end, max_streams_bidi->max_streams);
+}
+
+/* Parse a MAX_STREAMS frame for bidirectional streams from <buf> buffer with <end>
+ * as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_max_streams_bidi_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_max_streams *max_streams_bidi = &frm->max_streams_bidi;
+
+ return quic_dec_int(&max_streams_bidi->max_streams, buf, end);
+}
+
+/* Encode a MAX_STREAMS frame for unidirectional streams into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_max_streams_uni_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_max_streams *max_streams_uni = &frm->max_streams_uni;
+
+ return quic_enc_int(buf, end, max_streams_uni->max_streams);
+}
+
+/* Parse a MAX_STREAMS frame for undirectional streams from <buf> buffer with <end>
+ * as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_max_streams_uni_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_max_streams *max_streams_uni = &frm->max_streams_uni;
+
+ return quic_dec_int(&max_streams_uni->max_streams, buf, end);
+}
+
+/* Encode a DATA_BLOCKED frame into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_data_blocked_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_data_blocked *data_blocked = &frm->data_blocked;
+
+ return quic_enc_int(buf, end, data_blocked->limit);
+}
+
+/* Parse a DATA_BLOCKED frame from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_data_blocked_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_data_blocked *data_blocked = &frm->data_blocked;
+
+ return quic_dec_int(&data_blocked->limit, buf, end);
+}
+
+/* Encode a STREAM_DATA_BLOCKED into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_stream_data_blocked_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_stream_data_blocked *stream_data_blocked = &frm->stream_data_blocked;
+
+ return quic_enc_int(buf, end, stream_data_blocked->id) &&
+ quic_enc_int(buf, end, stream_data_blocked->limit);
+}
+
+/* Parse a STREAM_DATA_BLOCKED frame from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_stream_data_blocked_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_stream_data_blocked *stream_data_blocked = &frm->stream_data_blocked;
+
+ return quic_dec_int(&stream_data_blocked->id, buf, end) &&
+ quic_dec_int(&stream_data_blocked->limit, buf, end);
+}
+
+/* Encode a STREAMS_BLOCKED frame for bidirectional streams into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_streams_blocked_bidi_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_streams_blocked *streams_blocked_bidi = &frm->streams_blocked_bidi;
+
+ return quic_enc_int(buf, end, streams_blocked_bidi->limit);
+}
+
+/* Parse a STREAMS_BLOCKED frame for bidirectional streams from <buf> buffer with <end>
+ * as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_streams_blocked_bidi_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_streams_blocked *streams_blocked_bidi = &frm->streams_blocked_bidi;
+
+ return quic_dec_int(&streams_blocked_bidi->limit, buf, end);
+}
+
+/* Encode a STREAMS_BLOCKED frame for unidirectional streams into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_streams_blocked_uni_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_streams_blocked *streams_blocked_uni = &frm->streams_blocked_uni;
+
+ return quic_enc_int(buf, end, streams_blocked_uni->limit);
+}
+
+/* Parse a STREAMS_BLOCKED frame for unidirectional streams from <buf> buffer with <end>
+ * as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_streams_blocked_uni_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_streams_blocked *streams_blocked_uni = &frm->streams_blocked_uni;
+
+ return quic_dec_int(&streams_blocked_uni->limit, buf, end);
+}
+
+/* Encode a NEW_CONNECTION_ID frame into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_new_connection_id_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_new_connection_id *new_cid = &frm->new_connection_id;
+
+ if (!quic_enc_int(buf, end, new_cid->seq_num) ||
+ !quic_enc_int(buf, end, new_cid->retire_prior_to) ||
+ end - *buf < sizeof new_cid->cid.len + new_cid->cid.len + QUIC_STATELESS_RESET_TOKEN_LEN)
+ return 0;
+
+ *(*buf)++ = new_cid->cid.len;
+
+ if (new_cid->cid.len) {
+ memcpy(*buf, new_cid->cid.data, new_cid->cid.len);
+ *buf += new_cid->cid.len;
+ }
+ memcpy(*buf, new_cid->stateless_reset_token, QUIC_STATELESS_RESET_TOKEN_LEN);
+ *buf += QUIC_STATELESS_RESET_TOKEN_LEN;
+
+ return 1;
+}
+
+/* Parse a NEW_CONNECTION_ID frame from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_new_connection_id_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_new_connection_id *new_cid = &frm->new_connection_id;
+
+ if (!quic_dec_int(&new_cid->seq_num, buf, end) ||
+ !quic_dec_int(&new_cid->retire_prior_to, buf, end) || end <= *buf)
+ return 0;
+
+ new_cid->cid.len = *(*buf)++;
+ if (end - *buf < new_cid->cid.len + QUIC_STATELESS_RESET_TOKEN_LEN)
+ return 0;
+
+ if (new_cid->cid.len) {
+ new_cid->cid.data = *buf;
+ *buf += new_cid->cid.len;
+ }
+ new_cid->stateless_reset_token = *buf;
+ *buf += QUIC_STATELESS_RESET_TOKEN_LEN;
+
+ return 1;
+}
+
+/* Encode a RETIRE_CONNECTION_ID frame into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_retire_connection_id_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_retire_connection_id *retire_connection_id = &frm->retire_connection_id;
+
+ return quic_enc_int(buf, end, retire_connection_id->seq_num);
+}
+
+/* Parse a RETIRE_CONNECTION_ID frame from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_retire_connection_id_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_retire_connection_id *retire_connection_id = &frm->retire_connection_id;
+
+ return quic_dec_int(&retire_connection_id->seq_num, buf, end);
+}
+
+/* Encode a PATH_CHALLENGE frame into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_path_challenge_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_path_challenge *path_challenge = &frm->path_challenge;
+
+ if (end - *buf < sizeof path_challenge->data)
+ return 0;
+
+ memcpy(*buf, path_challenge->data, sizeof path_challenge->data);
+ *buf += sizeof path_challenge->data;
+
+ return 1;
+}
+
+/* Parse a PATH_CHALLENGE frame from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_path_challenge_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_path_challenge *path_challenge = &frm->path_challenge;
+
+ if (end - *buf < sizeof path_challenge->data)
+ return 0;
+
+ memcpy(path_challenge->data, *buf, sizeof path_challenge->data);
+ *buf += sizeof path_challenge->data;
+
+ return 1;
+}
+
+
+/* Encode a PATH_RESPONSE frame into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_path_response_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_path_challenge_response *path_challenge_response = &frm->path_challenge_response;
+
+ if (end - *buf < sizeof path_challenge_response->data)
+ return 0;
+
+ memcpy(*buf, path_challenge_response->data, sizeof path_challenge_response->data);
+ *buf += sizeof path_challenge_response->data;
+
+ return 1;
+}
+
+/* Parse a PATH_RESPONSE frame from <buf> buffer with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_path_response_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ struct quic_path_challenge_response *path_challenge_response = &frm->path_challenge_response;
+
+ if (end - *buf < sizeof path_challenge_response->data)
+ return 0;
+
+ memcpy(path_challenge_response->data, *buf, sizeof path_challenge_response->data);
+ *buf += sizeof path_challenge_response->data;
+
+ return 1;
+}
+
+/* Encode a CONNECTION_CLOSE frame at QUIC layer into <buf> buffer.
+ * Note there exist two types of CONNECTION_CLOSE frame, one for the application layer
+ * and another at QUIC layer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_connection_close_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_connection_close *cc = &frm->connection_close;
+
+ if (!quic_enc_int(buf, end, cc->error_code) ||
+ !quic_enc_int(buf, end, cc->frame_type) ||
+ !quic_enc_int(buf, end, cc->reason_phrase_len) ||
+ end - *buf < cc->reason_phrase_len)
+ return 0;
+
+ memcpy(*buf, cc->reason_phrase, cc->reason_phrase_len);
+ *buf += cc->reason_phrase_len;
+
+ return 1;
+}
+
+/* Parse a CONNECTION_CLOSE frame at QUIC layer from <buf> buffer with <end> as end into <frm> frame.
+ * Note there exist two types of CONNECTION_CLOSE frame, one for the application layer
+ * and another at QUIC layer.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_connection_close_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ size_t plen;
+ struct quic_connection_close *cc = &frm->connection_close;
+
+ if (!quic_dec_int(&cc->error_code, buf, end) ||
+ !quic_dec_int(&cc->frame_type, buf, end) ||
+ !quic_dec_int(&cc->reason_phrase_len, buf, end) ||
+ end - *buf < cc->reason_phrase_len)
+ return 0;
+
+ plen = QUIC_MIN((size_t)cc->reason_phrase_len, sizeof cc->reason_phrase);
+ memcpy(cc->reason_phrase, *buf, plen);
+ *buf += cc->reason_phrase_len;
+
+ return 1;
+}
+
+/* Encode a CONNECTION_CLOSE frame at application layer into <buf> buffer.
+ * Note there exist two types of CONNECTION_CLOSE frame, one for application layer
+ * and another at QUIC layer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ */
+static int quic_build_connection_close_app_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct quic_connection_close_app *cc = &frm->connection_close_app;
+
+ if (!quic_enc_int(buf, end, cc->error_code) ||
+ !quic_enc_int(buf, end, cc->reason_phrase_len) ||
+ end - *buf < cc->reason_phrase_len)
+ return 0;
+
+ memcpy(*buf, cc->reason_phrase, cc->reason_phrase_len);
+ *buf += cc->reason_phrase_len;
+
+ return 1;
+}
+
+/* Parse a CONNECTION_CLOSE frame at QUIC layer from <buf> buffer with <end> as end into <frm> frame.
+ * Note there exist two types of CONNECTION_CLOSE frame, one for the application layer
+ * and another at QUIC layer.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_connection_close_app_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ size_t plen;
+ struct quic_connection_close_app *cc = &frm->connection_close_app;
+
+ if (!quic_dec_int(&cc->error_code, buf, end) ||
+ !quic_dec_int(&cc->reason_phrase_len, buf, end) ||
+ end - *buf < cc->reason_phrase_len)
+ return 0;
+
+ plen = QUIC_MIN((size_t)cc->reason_phrase_len, sizeof cc->reason_phrase);
+ memcpy(cc->reason_phrase, *buf, plen);
+ *buf += cc->reason_phrase_len;
+
+ return 1;
+}
+
+/* Encode a HANDSHAKE_DONE frame into <buf> buffer.
+ * Always succeeds.
+ */
+static int quic_build_handshake_done_frame(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ /* No field */
+ return 1;
+}
+
+/* Parse a HANDSHAKE_DONE frame at QUIC layer from <buf> buffer with <end> as end into <frm> frame.
+ * Always succeed.
+ */
+static int quic_parse_handshake_done_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end)
+{
+ /* No field */
+ return 1;
+}
+
+struct quic_frame_builder {
+ int (*func)(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn);
+ uint32_t mask;
+ unsigned char flags;
+};
+
+const struct quic_frame_builder quic_frame_builders[] = {
+ [QUIC_FT_PADDING] = { .func = quic_build_padding_frame, .flags = QUIC_FL_TX_PACKET_PADDING, .mask = QUIC_FT_PKT_TYPE_IH01_BITMASK, },
+ [QUIC_FT_PING] = { .func = quic_build_ping_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE_IH01_BITMASK, },
+ [QUIC_FT_ACK] = { .func = quic_build_ack_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE_IH_1_BITMASK, },
+ [QUIC_FT_ACK_ECN] = { .func = quic_build_ack_ecn_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE_IH_1_BITMASK, },
+ [QUIC_FT_RESET_STREAM] = { .func = quic_build_reset_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STOP_SENDING] = { .func = quic_build_stop_sending_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_CRYPTO] = { .func = quic_build_crypto_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE_IH_1_BITMASK, },
+ [QUIC_FT_NEW_TOKEN] = { .func = quic_build_new_token_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE____1_BITMASK, },
+ [QUIC_FT_STREAM_8] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_9] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_A] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_B] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_C] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_D] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_E] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_F] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_DATA] = { .func = quic_build_max_data_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_STREAM_DATA] = { .func = quic_build_max_stream_data_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_STREAMS_BIDI] = { .func = quic_build_max_streams_bidi_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_STREAMS_UNI] = { .func = quic_build_max_streams_uni_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_DATA_BLOCKED] = { .func = quic_build_data_blocked_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_DATA_BLOCKED] = { .func = quic_build_stream_data_blocked_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAMS_BLOCKED_BIDI] = { .func = quic_build_streams_blocked_bidi_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAMS_BLOCKED_UNI] = { .func = quic_build_streams_blocked_uni_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_NEW_CONNECTION_ID] = { .func = quic_build_new_connection_id_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_RETIRE_CONNECTION_ID] = { .func = quic_build_retire_connection_id_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_PATH_CHALLENGE] = { .func = quic_build_path_challenge_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_PATH_RESPONSE] = { .func = quic_build_path_response_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_CONNECTION_CLOSE] = { .func = quic_build_connection_close_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE_IH01_BITMASK, },
+ [QUIC_FT_CONNECTION_CLOSE_APP] = { .func = quic_build_connection_close_app_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_HANDSHAKE_DONE] = { .func = quic_build_handshake_done_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE____1_BITMASK, },
+};
+
+struct quic_frame_parser {
+ int (*func)(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **buf, const unsigned char *end);
+ uint32_t mask;
+ unsigned char flags;
+};
+
+const struct quic_frame_parser quic_frame_parsers[] = {
+ [QUIC_FT_PADDING] = { .func = quic_parse_padding_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE_IH01_BITMASK, },
+ [QUIC_FT_PING] = { .func = quic_parse_ping_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE_IH01_BITMASK, },
+ [QUIC_FT_ACK] = { .func = quic_parse_ack_frame_header, .flags = 0, .mask = QUIC_FT_PKT_TYPE_IH_1_BITMASK, },
+ [QUIC_FT_ACK_ECN] = { .func = quic_parse_ack_ecn_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE_IH_1_BITMASK, },
+ [QUIC_FT_RESET_STREAM] = { .func = quic_parse_reset_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STOP_SENDING] = { .func = quic_parse_stop_sending_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_CRYPTO] = { .func = quic_parse_crypto_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE_IH_1_BITMASK, },
+ [QUIC_FT_NEW_TOKEN] = { .func = quic_parse_new_token_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE____1_BITMASK, },
+ [QUIC_FT_STREAM_8] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_9] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_A] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_B] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_C] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_D] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_E] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_F] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_DATA] = { .func = quic_parse_max_data_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_STREAM_DATA] = { .func = quic_parse_max_stream_data_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_STREAMS_BIDI] = { .func = quic_parse_max_streams_bidi_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_STREAMS_UNI] = { .func = quic_parse_max_streams_uni_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_DATA_BLOCKED] = { .func = quic_parse_data_blocked_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_DATA_BLOCKED] = { .func = quic_parse_stream_data_blocked_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAMS_BLOCKED_BIDI] = { .func = quic_parse_streams_blocked_bidi_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAMS_BLOCKED_UNI] = { .func = quic_parse_streams_blocked_uni_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_NEW_CONNECTION_ID] = { .func = quic_parse_new_connection_id_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_RETIRE_CONNECTION_ID] = { .func = quic_parse_retire_connection_id_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_PATH_CHALLENGE] = { .func = quic_parse_path_challenge_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_PATH_RESPONSE] = { .func = quic_parse_path_response_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_CONNECTION_CLOSE] = { .func = quic_parse_connection_close_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE_IH01_BITMASK, },
+ [QUIC_FT_CONNECTION_CLOSE_APP] = { .func = quic_parse_connection_close_app_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_HANDSHAKE_DONE] = { .func = quic_parse_handshake_done_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE____1_BITMASK, },
+};
+
+/* Decode a QUIC frame from <buf> buffer into <frm> frame.
+ * Returns 1 if succeeded (enough data to parse the frame), 0 if not.
+ */
+int qc_parse_frm(struct quic_frame *frm, struct quic_rx_packet *pkt,
+ const unsigned char **buf, const unsigned char *end,
+ struct quic_conn *qc)
+{
+ int ret = 0;
+ const struct quic_frame_parser *parser;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSFRM, qc);
+ if (end <= *buf) {
+ TRACE_DEVEL("wrong frame", QUIC_EV_CONN_PRSFRM, qc);
+ goto leave;
+ }
+
+ frm->type = *(*buf)++;
+ if (frm->type >= QUIC_FT_MAX) {
+ TRACE_DEVEL("wrong frame type", QUIC_EV_CONN_PRSFRM, qc, frm);
+ goto leave;
+ }
+
+ parser = &quic_frame_parsers[frm->type];
+ if (!(parser->mask & (1U << pkt->type))) {
+ TRACE_DEVEL("unauthorized frame", QUIC_EV_CONN_PRSFRM, qc, frm);
+ goto leave;
+ }
+
+ TRACE_PROTO("frame", QUIC_EV_CONN_PRSFRM, qc, frm);
+ if (!parser->func(frm, qc, buf, end)) {
+ TRACE_DEVEL("parsing error", QUIC_EV_CONN_PRSFRM, qc, frm);
+ goto leave;
+ }
+
+ pkt->flags |= parser->flags;
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSFRM, qc);
+ return ret;
+}
+
+/* Encode <frm> QUIC frame into <buf> buffer.
+ * Returns 1 if succeeded (enough room in <buf> to encode the frame), 0 if not.
+ * The buffer is updated to point to one byte past the end of the built frame
+ * only if succeeded.
+ */
+int qc_build_frm(unsigned char **buf, const unsigned char *end,
+ struct quic_frame *frm, struct quic_tx_packet *pkt,
+ struct quic_conn *qc)
+{
+ int ret = 0;
+ const struct quic_frame_builder *builder;
+ unsigned char *pos = *buf;
+
+ TRACE_ENTER(QUIC_EV_CONN_BFRM, qc);
+ builder = &quic_frame_builders[frm->type];
+ if (!(builder->mask & (1U << pkt->type))) {
+ /* XXX This it a bug to send an unauthorized frame with such a packet type XXX */
+ TRACE_ERROR("unauthorized frame", QUIC_EV_CONN_BFRM, qc, frm);
+ BUG_ON(!(builder->mask & (1U << pkt->type)));
+ }
+
+ if (end <= pos) {
+ TRACE_DEVEL("not enough room", QUIC_EV_CONN_BFRM, qc, frm);
+ goto leave;
+ }
+
+ TRACE_PROTO("frame", QUIC_EV_CONN_BFRM, qc, frm);
+ *pos++ = frm->type;
+ if (!quic_frame_builders[frm->type].func(&pos, end, frm, qc)) {
+ TRACE_DEVEL("frame building error", QUIC_EV_CONN_BFRM, qc, frm);
+ goto leave;
+ }
+
+ pkt->flags |= builder->flags;
+ *buf = pos;
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_BFRM, qc);
+ return ret;
+}
+
diff --git a/src/quic_loss.c b/src/quic_loss.c
new file mode 100644
index 0000000..a92b699
--- /dev/null
+++ b/src/quic_loss.c
@@ -0,0 +1,205 @@
+#include <import/eb64tree.h>
+
+#include <haproxy/quic_conn-t.h>
+#include <haproxy/quic_loss.h>
+#include <haproxy/quic_tls.h>
+
+#include <haproxy/atomic.h>
+#include <haproxy/list.h>
+#include <haproxy/ticks.h>
+#include <haproxy/trace.h>
+
+#define TRACE_SOURCE &trace_quic
+
+/* Update <ql> QUIC loss information with new <rtt> measurement and <ack_delay>
+ * on ACK frame receipt which MUST be min(ack->ack_delay, max_ack_delay)
+ * before the handshake is confirmed.
+ */
+void quic_loss_srtt_update(struct quic_loss *ql,
+ unsigned int rtt, unsigned int ack_delay,
+ struct quic_conn *qc)
+{
+ TRACE_ENTER(QUIC_EV_CONN_RTTUPDT, qc);
+ TRACE_DEVEL("Loss info update", QUIC_EV_CONN_RTTUPDT, qc, &rtt, &ack_delay, ql);
+
+ ql->latest_rtt = rtt;
+ if (!ql->rtt_min) {
+ /* No previous measurement. */
+ ql->srtt = rtt << 3;
+ /* rttval <- rtt / 2 or 4*rttval <- 2*rtt. */
+ ql->rtt_var = rtt << 1;
+ ql->rtt_min = rtt;
+ }
+ else {
+ int diff;
+
+ ql->rtt_min = QUIC_MIN(rtt, ql->rtt_min);
+ /* Specific to QUIC (RTT adjustment). */
+ if (ack_delay && rtt > ql->rtt_min + ack_delay)
+ rtt -= ack_delay;
+ diff = ql->srtt - rtt;
+ if (diff < 0)
+ diff = -diff;
+ /* 4*rttvar = 3*rttvar + |diff| */
+ ql->rtt_var += diff - (ql->rtt_var >> 2);
+ /* 8*srtt = 7*srtt + rtt */
+ ql->srtt += rtt - (ql->srtt >> 3);
+ }
+
+ TRACE_DEVEL("Loss info update", QUIC_EV_CONN_RTTUPDT, qc,,, ql);
+ TRACE_LEAVE(QUIC_EV_CONN_RTTUPDT, qc);
+}
+
+/* Returns for <qc> QUIC connection the first packet number space which
+ * experienced packet loss, if any or a packet number space with
+ * TICK_ETERNITY as packet loss time if not.
+ */
+struct quic_pktns *quic_loss_pktns(struct quic_conn *qc)
+{
+ enum quic_tls_pktns i;
+ struct quic_pktns *pktns;
+
+ TRACE_ENTER(QUIC_EV_CONN_SPTO, qc);
+
+ pktns = &qc->pktns[QUIC_TLS_PKTNS_INITIAL];
+ TRACE_DEVEL("pktns", QUIC_EV_CONN_SPTO, qc, pktns);
+ for (i = QUIC_TLS_PKTNS_HANDSHAKE; i < QUIC_TLS_PKTNS_MAX; i++) {
+ TRACE_DEVEL("pktns", QUIC_EV_CONN_SPTO, qc, &qc->pktns[i]);
+ if (!tick_isset(pktns->tx.loss_time) ||
+ qc->pktns[i].tx.loss_time < pktns->tx.loss_time)
+ pktns = &qc->pktns[i];
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_SPTO, qc);
+
+ return pktns;
+}
+
+/* Returns for <qc> QUIC connection the first packet number space to
+ * arm the PTO for if any or a packet number space with TICK_ETERNITY
+ * as PTO value if not.
+ */
+struct quic_pktns *quic_pto_pktns(struct quic_conn *qc,
+ int handshake_confirmed,
+ unsigned int *pto)
+{
+ int i;
+ unsigned int duration, lpto;
+ struct quic_loss *ql = &qc->path->loss;
+ struct quic_pktns *pktns, *p;
+
+ TRACE_ENTER(QUIC_EV_CONN_SPTO, qc);
+ duration =
+ (ql->srtt >> 3) +
+ (QUIC_MAX(ql->rtt_var, QUIC_TIMER_GRANULARITY) << ql->pto_count);
+
+ if (!qc->path->in_flight) {
+ struct quic_enc_level *hel;
+
+ hel = &qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE];
+ if (quic_tls_has_tx_sec(hel)) {
+ pktns = &qc->pktns[QUIC_TLS_PKTNS_HANDSHAKE];
+ }
+ else {
+ pktns = &qc->pktns[QUIC_TLS_PKTNS_INITIAL];
+ }
+ lpto = tick_add(now_ms, duration);
+ goto out;
+ }
+
+ lpto = TICK_ETERNITY;
+ pktns = p = &qc->pktns[QUIC_TLS_PKTNS_INITIAL];
+
+ for (i = QUIC_TLS_PKTNS_INITIAL; i < QUIC_TLS_PKTNS_MAX; i++) {
+ unsigned int tmp_pto;
+
+ if (!qc->pktns[i].tx.in_flight)
+ continue;
+
+ if (i == QUIC_TLS_PKTNS_01RTT) {
+ if (!handshake_confirmed) {
+ TRACE_STATE("handshake not already completed", QUIC_EV_CONN_SPTO, qc);
+ pktns = p;
+ goto out;
+ }
+
+ duration += qc->max_ack_delay << ql->pto_count;
+ }
+
+ p = &qc->pktns[i];
+ tmp_pto = tick_add(p->tx.time_of_last_eliciting, duration);
+ if (!tick_isset(lpto) || tmp_pto < lpto) {
+ lpto = tmp_pto;
+ pktns = p;
+ }
+ TRACE_DEVEL("pktns", QUIC_EV_CONN_SPTO, qc, p);
+ }
+
+ out:
+ if (pto)
+ *pto = lpto;
+ TRACE_LEAVE(QUIC_EV_CONN_SPTO, qc, pktns, &duration);
+
+ return pktns;
+}
+
+/* Look for packet loss from sent packets for <qel> encryption level of a
+ * connection with <ctx> as I/O handler context. If remove is true, remove them from
+ * their tree if deemed as lost or set the <loss_time> value the packet number
+ * space if any not deemed lost.
+ * Should be called after having received an ACK frame with newly acknowledged
+ * packets or when the the loss detection timer has expired.
+ * Always succeeds.
+ */
+void qc_packet_loss_lookup(struct quic_pktns *pktns, struct quic_conn *qc,
+ struct list *lost_pkts)
+{
+ struct eb_root *pkts;
+ struct eb64_node *node;
+ struct quic_loss *ql;
+ unsigned int loss_delay;
+
+ TRACE_ENTER(QUIC_EV_CONN_PKTLOSS, qc, pktns);
+ pkts = &pktns->tx.pkts;
+ pktns->tx.loss_time = TICK_ETERNITY;
+ if (eb_is_empty(pkts))
+ goto out;
+
+ ql = &qc->path->loss;
+ loss_delay = QUIC_MAX(ql->latest_rtt, ql->srtt >> 3);
+ loss_delay = QUIC_MAX(loss_delay, MS_TO_TICKS(QUIC_TIMER_GRANULARITY)) *
+ QUIC_LOSS_TIME_THRESHOLD_MULTIPLICAND / QUIC_LOSS_TIME_THRESHOLD_DIVISOR;
+
+ node = eb64_first(pkts);
+ while (node) {
+ struct quic_tx_packet *pkt;
+ int64_t largest_acked_pn;
+ unsigned int loss_time_limit, time_sent;
+
+ pkt = eb64_entry(&node->node, struct quic_tx_packet, pn_node);
+ largest_acked_pn = pktns->rx.largest_acked_pn;
+ node = eb64_next(node);
+ if ((int64_t)pkt->pn_node.key > largest_acked_pn)
+ break;
+
+ time_sent = pkt->time_sent;
+ loss_time_limit = tick_add(time_sent, loss_delay);
+ if (tick_is_le(loss_time_limit, now_ms) ||
+ (int64_t)largest_acked_pn >= pkt->pn_node.key + QUIC_LOSS_PACKET_THRESHOLD) {
+ eb64_delete(&pkt->pn_node);
+ LIST_APPEND(lost_pkts, &pkt->list);
+ HA_ATOMIC_INC(&qc->prx_counters->lost_pkt);
+ }
+ else {
+ if (tick_isset(pktns->tx.loss_time))
+ pktns->tx.loss_time = tick_first(pktns->tx.loss_time, loss_time_limit);
+ else
+ pktns->tx.loss_time = loss_time_limit;
+ break;
+ }
+ }
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_PKTLOSS, qc, pktns, lost_pkts);
+}
+
diff --git a/src/quic_sock.c b/src/quic_sock.c
new file mode 100644
index 0000000..11008fb
--- /dev/null
+++ b/src/quic_sock.c
@@ -0,0 +1,595 @@
+/*
+ * QUIC socket management.
+ *
+ * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE /* required for struct in6_pktinfo */
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/connection.h>
+#include <haproxy/fd.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/global-t.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/pool.h>
+#include <haproxy/proto_quic.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_sock.h>
+#include <haproxy/quic_tp-t.h>
+#include <haproxy/session.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/task.h>
+#include <haproxy/tools.h>
+
+/* Retrieve a connection's source address. Returns -1 on failure. */
+int quic_sock_get_src(struct connection *conn, struct sockaddr *addr, socklen_t len)
+{
+ struct quic_conn *qc;
+
+ if (!conn || !conn->handle.qc)
+ return -1;
+
+ qc = conn->handle.qc;
+ if (conn_is_back(conn)) {
+ /* no source address defined for outgoing connections for now */
+ return -1;
+ } else {
+ /* front connection, return the peer's address */
+ if (len > sizeof(qc->peer_addr))
+ len = sizeof(qc->peer_addr);
+ memcpy(addr, &qc->peer_addr, len);
+ return 0;
+ }
+}
+
+/* Retrieve a connection's destination address. Returns -1 on failure. */
+int quic_sock_get_dst(struct connection *conn, struct sockaddr *addr, socklen_t len)
+{
+ struct quic_conn *qc;
+
+ if (!conn || !conn->handle.qc)
+ return -1;
+
+ qc = conn->handle.qc;
+ if (conn_is_back(conn)) {
+ /* back connection, return the peer's address */
+ if (len > sizeof(qc->peer_addr))
+ len = sizeof(qc->peer_addr);
+ memcpy(addr, &qc->peer_addr, len);
+ } else {
+ struct sockaddr_storage *from;
+
+ /* Return listener address if IP_PKTINFO or friends are not
+ * supported by the socket.
+ */
+ BUG_ON(!qc->li);
+ from = is_addr(&qc->local_addr) ? &qc->local_addr :
+ &qc->li->rx.addr;
+ if (len > sizeof(*from))
+ len = sizeof(*from);
+ memcpy(addr, from, len);
+ }
+ return 0;
+}
+
+/*
+ * Inspired from session_accept_fd().
+ * Instantiate a new connection (connection struct) to be attached to <qc>
+ * QUIC connection of <l> listener.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int new_quic_cli_conn(struct quic_conn *qc, struct listener *l,
+ struct sockaddr_storage *saddr)
+{
+ struct connection *cli_conn;
+
+ if (unlikely((cli_conn = conn_new(&l->obj_type)) == NULL))
+ goto out;
+
+ if (!sockaddr_alloc(&cli_conn->src, saddr, sizeof *saddr))
+ goto out_free_conn;
+
+ cli_conn->flags |= CO_FL_FDLESS;
+ qc->conn = cli_conn;
+ cli_conn->handle.qc = qc;
+
+ cli_conn->target = &l->obj_type;
+
+ return 1;
+
+ out_free_conn:
+ qc->conn = NULL;
+ conn_stop_tracking(cli_conn);
+ conn_xprt_close(cli_conn);
+ conn_free(cli_conn);
+ out:
+
+ return 0;
+}
+
+/* Tests if the receiver supports accepting connections. Returns positive on
+ * success, 0 if not possible
+ */
+int quic_sock_accepting_conn(const struct receiver *rx)
+{
+ return 1;
+}
+
+/* Accept an incoming connection from listener <l>, and return it, as well as
+ * a CO_AC_* status code into <status> if not null. Null is returned on error.
+ * <l> must be a valid listener with a valid frontend.
+ */
+struct connection *quic_sock_accept_conn(struct listener *l, int *status)
+{
+ struct quic_conn *qc;
+ struct li_per_thread *lthr = &l->per_thr[tid];
+
+ qc = MT_LIST_POP(&lthr->quic_accept.conns, struct quic_conn *, accept_list);
+ if (!qc)
+ goto done;
+
+ if (!new_quic_cli_conn(qc, l, &qc->peer_addr))
+ goto err;
+
+ done:
+ *status = CO_AC_DONE;
+ return qc ? qc->conn : NULL;
+
+ err:
+ /* in case of error reinsert the element to process it later. */
+ MT_LIST_INSERT(&lthr->quic_accept.conns, &qc->accept_list);
+
+ *status = CO_AC_PAUSE;
+ return NULL;
+}
+
+/* Retrieve the DCID from the datagram found in <buf> and deliver it to the
+ * correct datagram handler.
+ * Return 1 if a correct datagram could be found, 0 if not.
+ */
+static int quic_lstnr_dgram_dispatch(unsigned char *buf, size_t len, void *owner,
+ struct sockaddr_storage *saddr,
+ struct sockaddr_storage *daddr,
+ struct quic_dgram *new_dgram, struct list *dgrams)
+{
+ struct quic_dgram *dgram;
+ unsigned char *dcid;
+ size_t dcid_len;
+ int cid_tid;
+
+ if (!len || !quic_get_dgram_dcid(buf, buf + len, &dcid, &dcid_len))
+ goto err;
+
+ dgram = new_dgram ? new_dgram : pool_alloc(pool_head_quic_dgram);
+ if (!dgram)
+ goto err;
+
+ cid_tid = quic_get_cid_tid(dcid);
+
+ /* All the members must be initialized! */
+ dgram->owner = owner;
+ dgram->buf = buf;
+ dgram->len = len;
+ dgram->dcid = dcid;
+ dgram->dcid_len = dcid_len;
+ dgram->saddr = *saddr;
+ dgram->daddr = *daddr;
+ dgram->qc = NULL;
+
+ /* Attached datagram to its quic_receiver_buf and quic_dghdlrs. */
+ LIST_APPEND(dgrams, &dgram->recv_list);
+ MT_LIST_APPEND(&quic_dghdlrs[cid_tid].dgrams, &dgram->handler_list);
+
+ /* typically quic_lstnr_dghdlr() */
+ tasklet_wakeup(quic_dghdlrs[cid_tid].task);
+
+ return 1;
+
+ err:
+ pool_free(pool_head_quic_dgram, new_dgram);
+ return 0;
+}
+
+/* This function is responsible to remove unused datagram attached in front of
+ * <buf>. Each instances will be freed until a not yet consumed datagram is
+ * found or end of the list is hit. The last unused datagram found is not freed
+ * and is instead returned so that the caller can reuse it if needed.
+ *
+ * Returns the last unused datagram or NULL if no occurence found.
+ */
+static struct quic_dgram *quic_rxbuf_purge_dgrams(struct quic_receiver_buf *buf)
+{
+ struct quic_dgram *cur, *prev = NULL;
+
+ while (!LIST_ISEMPTY(&buf->dgram_list)) {
+ cur = LIST_ELEM(buf->dgram_list.n, struct quic_dgram *, recv_list);
+
+ /* Loop until a not yet consumed datagram is found. */
+ if (HA_ATOMIC_LOAD(&cur->buf))
+ break;
+
+ /* Clear buffer of current unused datagram. */
+ LIST_DELETE(&cur->recv_list);
+ b_del(&buf->buf, cur->len);
+
+ /* Free last found unused datagram. */
+ if (prev)
+ pool_free(pool_head_quic_dgram, prev);
+ prev = cur;
+ }
+
+ /* Return last unused datagram found. */
+ return prev;
+}
+
+/* Receive data from datagram socket <fd>. Data are placed in <out> buffer of
+ * length <len>.
+ *
+ * Datagram addresses will be returned via the next arguments. <from> will be
+ * the peer address and <to> the reception one. Note that <to> can only be
+ * retrieved if the socket supports IP_PKTINFO or affiliated options. If not,
+ * <to> will be set as AF_UNSPEC. The caller must specify <to_port> to ensure
+ * that <to> address is completely filled.
+ *
+ * Returns value from recvmsg syscall.
+ */
+static ssize_t quic_recv(int fd, void *out, size_t len,
+ struct sockaddr *from, socklen_t from_len,
+ struct sockaddr *to, socklen_t to_len,
+ uint16_t dst_port)
+{
+ union pktinfo {
+#ifdef IP_PKTINFO
+ struct in_pktinfo in;
+#else /* !IP_PKTINFO */
+ struct in_addr addr;
+#endif
+#ifdef IPV6_RECVPKTINFO
+ struct in6_pktinfo in6;
+#endif
+ };
+ char cdata[CMSG_SPACE(sizeof(union pktinfo))];
+ struct msghdr msg;
+ struct iovec vec;
+ struct cmsghdr *cmsg;
+ ssize_t ret;
+
+ vec.iov_base = out;
+ vec.iov_len = len;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_name = from;
+ msg.msg_namelen = from_len;
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+ msg.msg_control = &cdata;
+ msg.msg_controllen = sizeof(cdata);
+
+ clear_addr((struct sockaddr_storage *)to);
+
+ do {
+ ret = recvmsg(fd, &msg, 0);
+ } while (ret < 0 && errno == EINTR);
+
+ /* TODO handle errno. On EAGAIN/EWOULDBLOCK use fd_cant_recv() if
+ * using dedicated connection socket.
+ */
+
+ if (ret < 0)
+ goto end;
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ switch (cmsg->cmsg_level) {
+ case IPPROTO_IP:
+#if defined(IP_PKTINFO)
+ if (cmsg->cmsg_type == IP_PKTINFO) {
+ struct sockaddr_in *in = (struct sockaddr_in *)to;
+ struct in_pktinfo *info = (struct in_pktinfo *)CMSG_DATA(cmsg);
+
+ if (to_len >= sizeof(struct sockaddr_in)) {
+ in->sin_family = AF_INET;
+ in->sin_addr = info->ipi_addr;
+ in->sin_port = dst_port;
+ }
+ }
+#elif defined(IP_RECVDSTADDR)
+ if (cmsg->cmsg_type == IP_RECVDSTADDR) {
+ struct sockaddr_in *in = (struct sockaddr_in *)to;
+ struct in_addr *info = (struct in_addr *)CMSG_DATA(cmsg);
+
+ if (to_len >= sizeof(struct sockaddr_in)) {
+ in->sin_family = AF_INET;
+ in->sin_addr.s_addr = info->s_addr;
+ in->sin_port = dst_port;
+ }
+ }
+#endif /* IP_PKTINFO || IP_RECVDSTADDR */
+ break;
+
+ case IPPROTO_IPV6:
+#ifdef IPV6_RECVPKTINFO
+ if (cmsg->cmsg_type == IPV6_PKTINFO) {
+ struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)to;
+ struct in6_pktinfo *info6 = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+
+ if (to_len >= sizeof(struct sockaddr_in6)) {
+ in6->sin6_family = AF_INET6;
+ memcpy(&in6->sin6_addr, &info6->ipi6_addr, sizeof(in6->sin6_addr));
+ in6->sin6_port = dst_port;
+ }
+ }
+#endif
+ break;
+ }
+ }
+
+ end:
+ return ret;
+}
+
+/* Function called on a read event from a listening socket. It tries
+ * to handle as many connections as possible.
+ */
+void quic_sock_fd_iocb(int fd)
+{
+ ssize_t ret;
+ struct quic_receiver_buf *rxbuf;
+ struct buffer *buf;
+ struct listener *l = objt_listener(fdtab[fd].owner);
+ struct quic_transport_params *params;
+ /* Source address */
+ struct sockaddr_storage saddr = {0}, daddr = {0};
+ size_t max_sz, cspace;
+ struct quic_dgram *new_dgram;
+ unsigned char *dgram_buf;
+ int max_dgrams;
+
+ BUG_ON(!l);
+
+ new_dgram = NULL;
+ if (!l)
+ return;
+
+ if (!(fdtab[fd].state & FD_POLL_IN) || !fd_recv_ready(fd))
+ return;
+
+ rxbuf = MT_LIST_POP(&l->rx.rxbuf_list, typeof(rxbuf), rxbuf_el);
+ if (!rxbuf)
+ goto out;
+
+ buf = &rxbuf->buf;
+
+ max_dgrams = global.tune.maxpollevents;
+ start:
+ /* Try to reuse an existing dgram. Note that there is always at
+ * least one datagram to pick, except the first time we enter
+ * this function for this <rxbuf> buffer.
+ */
+ new_dgram = quic_rxbuf_purge_dgrams(rxbuf);
+
+ params = &l->bind_conf->quic_params;
+ max_sz = params->max_udp_payload_size;
+ cspace = b_contig_space(buf);
+ if (cspace < max_sz) {
+ struct quic_dgram *dgram;
+
+ /* Do no mark <buf> as full, and do not try to consume it
+ * if the contiguous remaining space is not at the end
+ */
+ if (b_tail(buf) + cspace < b_wrap(buf))
+ goto out;
+
+ /* Allocate a fake datagram, without data to locate
+ * the end of the RX buffer (required during purging).
+ */
+ dgram = pool_alloc(pool_head_quic_dgram);
+ if (!dgram)
+ goto out;
+
+ /* Initialize only the useful members of this fake datagram. */
+ dgram->buf = NULL;
+ dgram->len = cspace;
+ /* Append this datagram only to the RX buffer list. It will
+ * not be treated by any datagram handler.
+ */
+ LIST_APPEND(&rxbuf->dgram_list, &dgram->recv_list);
+
+ /* Consume the remaining space */
+ b_add(buf, cspace);
+ if (b_contig_space(buf) < max_sz)
+ goto out;
+ }
+
+ dgram_buf = (unsigned char *)b_tail(buf);
+ ret = quic_recv(fd, dgram_buf, max_sz,
+ (struct sockaddr *)&saddr, sizeof(saddr),
+ (struct sockaddr *)&daddr, sizeof(daddr),
+ get_net_port(&l->rx.addr));
+ if (ret <= 0)
+ goto out;
+
+ b_add(buf, ret);
+ if (!quic_lstnr_dgram_dispatch(dgram_buf, ret, l, &saddr, &daddr,
+ new_dgram, &rxbuf->dgram_list)) {
+ /* If wrong, consume this datagram */
+ b_sub(buf, ret);
+ }
+ new_dgram = NULL;
+ if (--max_dgrams > 0)
+ goto start;
+ out:
+ pool_free(pool_head_quic_dgram, new_dgram);
+ MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->rxbuf_el);
+}
+
+/* Send a datagram stored into <buf> buffer with <sz> as size.
+ * The caller must ensure there is at least <sz> bytes in this buffer.
+ *
+ * Returns 0 on success else non-zero.
+ *
+ * TODO standardize this function for a generic UDP sendto wrapper. This can be
+ * done by removing the <qc> arg and replace it with address/port.
+ */
+int qc_snd_buf(struct quic_conn *qc, const struct buffer *buf, size_t sz,
+ int flags)
+{
+ ssize_t ret;
+
+ do {
+ ret = sendto(qc->li->rx.fd, b_peek(buf, b_head_ofs(buf)), sz,
+ MSG_DONTWAIT | MSG_NOSIGNAL,
+ (struct sockaddr *)&qc->peer_addr, get_addr_len(&qc->peer_addr));
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0) {
+ struct proxy *prx = qc->li->bind_conf->frontend;
+ struct quic_counters *prx_counters =
+ EXTRA_COUNTERS_GET(prx->extra_counters_fe,
+ &quic_stats_module);
+
+ /* TODO adjust errno for UDP context. */
+ if (errno == EAGAIN || errno == EWOULDBLOCK ||
+ errno == ENOTCONN || errno == EINPROGRESS || errno == EBADF) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ HA_ATOMIC_INC(&prx_counters->socket_full);
+ else
+ HA_ATOMIC_INC(&prx_counters->sendto_err);
+ }
+ else if (errno) {
+ /* TODO unlisted errno : handle it explicitly. */
+ HA_ATOMIC_INC(&prx_counters->sendto_err_unknown);
+ }
+
+ return 1;
+ }
+
+ if (ret != sz)
+ return 1;
+
+ /* we count the total bytes sent, and the send rate for 32-byte blocks.
+ * The reason for the latter is that freq_ctr are limited to 4GB and
+ * that it's not enough per second.
+ */
+ _HA_ATOMIC_ADD(&global.out_bytes, ret);
+ update_freq_ctr(&global.out_32bps, (ret + 16) / 32);
+
+ return 0;
+}
+
+
+/*********************** QUIC accept queue management ***********************/
+/* per-thread accept queues */
+struct quic_accept_queue *quic_accept_queues;
+
+/* Install <qc> on the queue ready to be accepted. The queue task is then woken
+ * up. If <qc> accept is already scheduled or done, nothing is done.
+ */
+void quic_accept_push_qc(struct quic_conn *qc)
+{
+ struct quic_accept_queue *queue = &quic_accept_queues[qc->tid];
+ struct li_per_thread *lthr = &qc->li->per_thr[qc->tid];
+
+ /* early return if accept is already in progress/done for this
+ * connection
+ */
+ if (qc->flags & QUIC_FL_CONN_ACCEPT_REGISTERED)
+ return;
+
+ BUG_ON(MT_LIST_INLIST(&qc->accept_list));
+
+ qc->flags |= QUIC_FL_CONN_ACCEPT_REGISTERED;
+ /* 1. insert the listener in the accept queue
+ *
+ * Use TRY_APPEND as there is a possible race even with INLIST if
+ * multiple threads try to add the same listener instance from several
+ * quic_conn.
+ */
+ if (!MT_LIST_INLIST(&(lthr->quic_accept.list)))
+ MT_LIST_TRY_APPEND(&queue->listeners, &(lthr->quic_accept.list));
+
+ /* 2. insert the quic_conn in the listener per-thread queue. */
+ MT_LIST_APPEND(&lthr->quic_accept.conns, &qc->accept_list);
+
+ /* 3. wake up the queue tasklet */
+ tasklet_wakeup(quic_accept_queues[qc->tid].tasklet);
+}
+
+/* Tasklet handler to accept QUIC connections. Call listener_accept on every
+ * listener instances registered in the accept queue.
+ */
+static struct task *quic_accept_run(struct task *t, void *ctx, unsigned int i)
+{
+ struct li_per_thread *lthr;
+ struct mt_list *elt1, elt2;
+ struct quic_accept_queue *queue = &quic_accept_queues[tid];
+
+ mt_list_for_each_entry_safe(lthr, &queue->listeners, quic_accept.list, elt1, elt2) {
+ listener_accept(lthr->li);
+ if (!MT_LIST_ISEMPTY(&lthr->quic_accept.conns))
+ tasklet_wakeup((struct tasklet*)t);
+ else
+ MT_LIST_DELETE_SAFE(elt1);
+ }
+
+ return NULL;
+}
+
+static int quic_alloc_accept_queues(void)
+{
+ int i;
+
+ quic_accept_queues = calloc(global.nbthread,
+ sizeof(*quic_accept_queues));
+ if (!quic_accept_queues) {
+ ha_alert("Failed to allocate the quic accept queues.\n");
+ return 0;
+ }
+
+ for (i = 0; i < global.nbthread; ++i) {
+ struct tasklet *task;
+ if (!(task = tasklet_new())) {
+ ha_alert("Failed to allocate the quic accept queue on thread %d.\n", i);
+ return 0;
+ }
+
+ tasklet_set_tid(task, i);
+ task->process = quic_accept_run;
+ quic_accept_queues[i].tasklet = task;
+
+ MT_LIST_INIT(&quic_accept_queues[i].listeners);
+ }
+
+ return 1;
+}
+REGISTER_POST_CHECK(quic_alloc_accept_queues);
+
+static int quic_deallocate_accept_queues(void)
+{
+ int i;
+
+ if (quic_accept_queues) {
+ for (i = 0; i < global.nbthread; ++i)
+ tasklet_free(quic_accept_queues[i].tasklet);
+ free(quic_accept_queues);
+ }
+
+ return 1;
+}
+REGISTER_POST_DEINIT(quic_deallocate_accept_queues);
diff --git a/src/quic_stats.c b/src/quic_stats.c
new file mode 100644
index 0000000..73b03e4
--- /dev/null
+++ b/src/quic_stats.c
@@ -0,0 +1,204 @@
+#include <haproxy/quic_frame-t.h>
+#include <haproxy/quic_stats-t.h>
+#include <haproxy/stats.h>
+
+static struct name_desc quic_stats[] = {
+ [QUIC_ST_DROPPED_PACKET] = { .name = "quic_dropped_pkt",
+ .desc = "Total number of dropped packets" },
+ [QUIC_ST_DROPPED_PACKET_BUFOVERRUN] = { .name = "quic_dropped_pkt_bufoverrun",
+ .desc = "Total number of dropped packets because of buffer overrun" },
+ [QUIC_ST_DROPPED_PARSING] = { .name = "quic_dropped_parsing_pkt",
+ .desc = "Total number of dropped packets upon parsing error" },
+ [QUIC_ST_SOCKET_FULL] = { .name = "quic_socket_full",
+ .desc = "Total number of EAGAIN error on sendto() calls" },
+ [QUIC_ST_SENDTO_ERR] = { .name = "quic_sendto_err",
+ .desc = "Total number of error on sendto() calls, EAGAIN excepted" },
+ [QUIC_ST_SENDTO_ERR_UNKNWN] = { .name = "quic_sendto_err_unknwn",
+ .desc = "Total number of error on sendto() calls not explicitely listed" },
+ [QUIC_ST_LOST_PACKET] = { .name = "quic_lost_pkt",
+ .desc = "Total number of lost sent packets" },
+ [QUIC_ST_TOO_SHORT_INITIAL_DGRAM] = { .name = "quic_too_short_dgram",
+ .desc = "Total number of too short dgrams with Initial packets" },
+ [QUIC_ST_RETRY_SENT] = { .name = "quic_retry_sent",
+ .desc = "Total number of Retry sent" },
+ [QUIC_ST_RETRY_VALIDATED] = { .name = "quic_retry_validated",
+ .desc = "Total number of validated Retry tokens" },
+ [QUIC_ST_RETRY_ERRORS] = { .name = "quic_retry_error",
+ .desc = "Total number of Retry tokens errors" },
+ [QUIC_ST_HALF_OPEN_CONN] = { .name = "quic_half_open_conn",
+ .desc = "Total number of half open connections" },
+ [QUIC_ST_HDSHK_FAIL] = { .name = "quic_hdshk_fail",
+ .desc = "Total number of handshake failures" },
+ [QUIC_ST_STATELESS_RESET_SENT] = { .name = "quic_stless_rst_sent",
+ .desc = "Total number of stateless reset packet sent" },
+ /* Transport errors */
+ [QUIC_ST_TRANSP_ERR_NO_ERROR] = { .name = "quic_transp_err_no_error",
+ .desc = "Total number of NO_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_INTERNAL_ERROR] = { .name = "quic_transp_err_internal_error",
+ .desc = "Total number of INTERNAL_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_CONNECTION_REFUSED] = { .name = "quic_transp_err_connection_refused",
+ .desc = "Total number of CONNECTION_REFUSED errors received" },
+ [QUIC_ST_TRANSP_ERR_FLOW_CONTROL_ERROR] = { .name = "quic_transp_err_flow_control_error",
+ .desc = "Total number of FLOW_CONTROL_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_STREAM_LIMIT_ERROR] = { .name = "quic_transp_err_stream_limit_error",
+ .desc = "Total number of STREAM_LIMIT_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_STREAM_STATE_ERROR] = { .name = "quic_transp_err_stream_state_error",
+ .desc = "Total number of STREAM_STATE_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_FINAL_SIZE_ERROR] = { .name = "quic_transp_err_final_size_error",
+ .desc = "Total number of FINAL_SIZE_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_FRAME_ENCODING_ERROR] = { .name = "quic_transp_err_frame_encoding_error",
+ .desc = "Total number of FRAME_ENCODING_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_TRANSPORT_PARAMETER_ERROR] = { .name = "quic_transp_err_transport_parameter_error",
+ .desc = "Total number of TRANSPORT_PARAMETER_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_CONNECTION_ID_LIMIT_ERROR] = { .name = "quic_transp_err_connection_id_limit",
+ .desc = "Total number of CONNECTION_ID_LIMIT_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_PROTOCOL_VIOLATION] = { .name = "quic_transp_err_protocol_violation_error",
+ .desc = "Total number of PROTOCOL_VIOLATION errors received" },
+ [QUIC_ST_TRANSP_ERR_INVALID_TOKEN] = { .name = "quic_transp_err_invalid_token",
+ .desc = "Total number of INVALID_TOKEN errors received" },
+ [QUIC_ST_TRANSP_ERR_APPLICATION_ERROR] = { .name = "quic_transp_err_application_error",
+ .desc = "Total number of APPLICATION_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_CRYPTO_BUFFER_EXCEEDED] = { .name = "quic_transp_err_crypto_buffer_exceeded",
+ .desc = "Total number of CRYPTO_BUFFER_EXCEEDED errors received" },
+ [QUIC_ST_TRANSP_ERR_KEY_UPDATE_ERROR] = { .name = "quic_transp_err_key_update_error",
+ .desc = "Total number of KEY_UPDATE_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_AEAD_LIMIT_REACHED] = { .name = "quic_transp_err_aead_limit_reached",
+ .desc = "Total number of AEAD_LIMIT_REACHED errors received" },
+ [QUIC_ST_TRANSP_ERR_NO_VIABLE_PATH] = { .name = "quic_transp_err_no_viable_path",
+ .desc = "Total number of NO_VIABLE_PATH errors received" },
+ [QUIC_ST_TRANSP_ERR_CRYPTO_ERROR] = { .name = "quic_transp_err_crypto_error",
+ .desc = "Total number of CRYPTO_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_UNKNOWN_ERROR] = { .name = "quic_transp_err_unknown_error",
+ .desc = "Total number of UNKNOWN_ERROR errors received" },
+ /* Streams related counters */
+ [QUIC_ST_DATA_BLOCKED] = { .name = "quic_data_blocked",
+ .desc = "Total number of received DATA_BLOCKED frames" },
+ [QUIC_ST_STREAM_DATA_BLOCKED] = { .name = "quic_stream_data_blocked",
+ .desc = "Total number of received STREAMS_BLOCKED frames" },
+ [QUIC_ST_STREAMS_DATA_BLOCKED_BIDI] = { .name = "quic_streams_data_blocked_bidi",
+ .desc = "Total number of received STREAM_DATA_BLOCKED_BIDI frames" },
+ [QUIC_ST_STREAMS_DATA_BLOCKED_UNI] = { .name = "quic_streams_data_blocked_uni",
+ .desc = "Total number of received STREAM_DATA_BLOCKED_UNI frames" },
+};
+
+struct quic_counters quic_counters;
+
+static void quic_fill_stats(void *data, struct field *stats)
+{
+ struct quic_counters *counters = data;
+
+ stats[QUIC_ST_DROPPED_PACKET] = mkf_u64(FN_COUNTER, counters->dropped_pkt);
+ stats[QUIC_ST_DROPPED_PACKET_BUFOVERRUN] = mkf_u64(FN_COUNTER, counters->dropped_pkt_bufoverrun);
+ stats[QUIC_ST_DROPPED_PARSING] = mkf_u64(FN_COUNTER, counters->dropped_parsing);
+ stats[QUIC_ST_SOCKET_FULL] = mkf_u64(FN_COUNTER, counters->socket_full);
+ stats[QUIC_ST_SENDTO_ERR] = mkf_u64(FN_COUNTER, counters->sendto_err);
+ stats[QUIC_ST_SENDTO_ERR_UNKNWN] = mkf_u64(FN_COUNTER, counters->sendto_err_unknown);
+ stats[QUIC_ST_LOST_PACKET] = mkf_u64(FN_COUNTER, counters->lost_pkt);
+ stats[QUIC_ST_TOO_SHORT_INITIAL_DGRAM] = mkf_u64(FN_COUNTER, counters->too_short_initial_dgram);
+ stats[QUIC_ST_RETRY_SENT] = mkf_u64(FN_COUNTER, counters->retry_sent);
+ stats[QUIC_ST_RETRY_VALIDATED] = mkf_u64(FN_COUNTER, counters->retry_validated);
+ stats[QUIC_ST_RETRY_ERRORS] = mkf_u64(FN_COUNTER, counters->retry_error);
+ stats[QUIC_ST_HALF_OPEN_CONN] = mkf_u64(FN_GAUGE, counters->half_open_conn);
+ stats[QUIC_ST_HDSHK_FAIL] = mkf_u64(FN_COUNTER, counters->hdshk_fail);
+ stats[QUIC_ST_STATELESS_RESET_SENT] = mkf_u64(FN_COUNTER, counters->stateless_reset_sent);
+ /* Transport errors */
+ stats[QUIC_ST_TRANSP_ERR_NO_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_no_error);
+ stats[QUIC_ST_TRANSP_ERR_INTERNAL_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_internal_error);
+ stats[QUIC_ST_TRANSP_ERR_CONNECTION_REFUSED] = mkf_u64(FN_COUNTER, counters->quic_transp_err_connection_refused);
+ stats[QUIC_ST_TRANSP_ERR_FLOW_CONTROL_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_flow_control_error);
+ stats[QUIC_ST_TRANSP_ERR_STREAM_LIMIT_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_stream_limit_error);
+ stats[QUIC_ST_TRANSP_ERR_STREAM_STATE_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_stream_state_error);
+ stats[QUIC_ST_TRANSP_ERR_FINAL_SIZE_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_final_size_error);
+ stats[QUIC_ST_TRANSP_ERR_FRAME_ENCODING_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_frame_encoding_error);
+ stats[QUIC_ST_TRANSP_ERR_TRANSPORT_PARAMETER_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_transport_parameter_error);
+ stats[QUIC_ST_TRANSP_ERR_CONNECTION_ID_LIMIT_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_connection_id_limit);
+ stats[QUIC_ST_TRANSP_ERR_PROTOCOL_VIOLATION] = mkf_u64(FN_COUNTER, counters->quic_transp_err_protocol_violation);
+ stats[QUIC_ST_TRANSP_ERR_INVALID_TOKEN] = mkf_u64(FN_COUNTER, counters->quic_transp_err_invalid_token);
+ stats[QUIC_ST_TRANSP_ERR_APPLICATION_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_application_error);
+ stats[QUIC_ST_TRANSP_ERR_CRYPTO_BUFFER_EXCEEDED] = mkf_u64(FN_COUNTER, counters->quic_transp_err_crypto_buffer_exceeded);
+ stats[QUIC_ST_TRANSP_ERR_KEY_UPDATE_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_key_update_error);
+ stats[QUIC_ST_TRANSP_ERR_AEAD_LIMIT_REACHED] = mkf_u64(FN_COUNTER, counters->quic_transp_err_aead_limit_reached);
+ stats[QUIC_ST_TRANSP_ERR_NO_VIABLE_PATH] = mkf_u64(FN_COUNTER, counters->quic_transp_err_no_viable_path);
+ stats[QUIC_ST_TRANSP_ERR_CRYPTO_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_crypto_error);
+ stats[QUIC_ST_TRANSP_ERR_UNKNOWN_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_unknown_error);
+ /* Streams related counters */
+ stats[QUIC_ST_DATA_BLOCKED] = mkf_u64(FN_COUNTER, counters->data_blocked);
+ stats[QUIC_ST_STREAM_DATA_BLOCKED] = mkf_u64(FN_COUNTER, counters->stream_data_blocked);
+ stats[QUIC_ST_STREAMS_DATA_BLOCKED_BIDI] = mkf_u64(FN_COUNTER, counters->streams_data_blocked_bidi);
+ stats[QUIC_ST_STREAMS_DATA_BLOCKED_UNI] = mkf_u64(FN_COUNTER, counters->streams_data_blocked_uni);
+}
+
+struct stats_module quic_stats_module = {
+ .name = "quic",
+ .fill_stats = quic_fill_stats,
+ .stats = quic_stats,
+ .stats_count = QUIC_STATS_COUNT,
+ .counters = &quic_counters,
+ .counters_size = sizeof(quic_counters),
+ .domain_flags = MK_STATS_PROXY_DOMAIN(STATS_PX_CAP_FE),
+ .clearable = 1,
+};
+
+INITCALL1(STG_REGISTER, stats_register_module, &quic_stats_module);
+
+void quic_stats_transp_err_count_inc(struct quic_counters *ctrs, int error_code)
+{
+ switch (error_code) {
+ case QC_ERR_NO_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_no_error);
+ break;
+ case QC_ERR_INTERNAL_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_internal_error);
+ break;
+ case QC_ERR_CONNECTION_REFUSED:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_connection_refused);
+ break;
+ case QC_ERR_FLOW_CONTROL_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_flow_control_error);
+ break;
+ case QC_ERR_STREAM_LIMIT_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_stream_limit_error);
+ break;
+ case QC_ERR_STREAM_STATE_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_stream_state_error);
+ break;
+ case QC_ERR_FINAL_SIZE_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_final_size_error);
+ break;
+ case QC_ERR_FRAME_ENCODING_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_frame_encoding_error);
+ break;
+ case QC_ERR_TRANSPORT_PARAMETER_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_transport_parameter_error);
+ break;
+ case QC_ERR_CONNECTION_ID_LIMIT_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_connection_id_limit);
+ break;
+ case QC_ERR_PROTOCOL_VIOLATION:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_protocol_violation);
+ break;
+ case QC_ERR_INVALID_TOKEN:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_invalid_token);
+ break;
+ case QC_ERR_APPLICATION_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_application_error);
+ break;
+ case QC_ERR_CRYPTO_BUFFER_EXCEEDED:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_crypto_buffer_exceeded);
+ break;
+ case QC_ERR_KEY_UPDATE_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_key_update_error);
+ break;
+ case QC_ERR_AEAD_LIMIT_REACHED:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_aead_limit_reached);
+ break;
+ case QC_ERR_NO_VIABLE_PATH:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_no_viable_path);
+ break;
+ default:
+ if (error_code >= 0x100 && error_code <= 0x1ff)
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_crypto_error);
+ else
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_unknown_error);
+ }
+}
diff --git a/src/quic_stream.c b/src/quic_stream.c
new file mode 100644
index 0000000..692f4d5
--- /dev/null
+++ b/src/quic_stream.c
@@ -0,0 +1,267 @@
+#include <haproxy/quic_stream.h>
+
+#include <import/eb64tree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/list.h>
+#include <haproxy/mux_quic-t.h>
+#include <haproxy/pool.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/task.h>
+
+DECLARE_STATIC_POOL(pool_head_quic_stream_desc, "qc_stream_desc",
+ sizeof(struct qc_stream_desc));
+DECLARE_STATIC_POOL(pool_head_quic_stream_buf, "qc_stream_buf",
+ sizeof(struct qc_stream_buf));
+
+
+/* Allocate a new stream descriptor with id <id>. The caller is responsible to
+ * store the stream in the appropriate tree. -1 special value must be used for
+ * a CRYPTO data stream, the type being ignored.
+ *
+ * Returns the newly allocated instance on success or else NULL.
+ */
+struct qc_stream_desc *qc_stream_desc_new(uint64_t id, enum qcs_type type, void *ctx,
+ struct quic_conn *qc)
+{
+ struct qc_stream_desc *stream;
+
+ stream = pool_alloc(pool_head_quic_stream_desc);
+ if (!stream)
+ return NULL;
+
+ if (id == (uint64_t)-1) {
+ stream->by_id.key = (uint64_t)-1;
+ }
+ else {
+ stream->by_id.key = id;
+ eb64_insert(&qc->streams_by_id, &stream->by_id);
+ qc->rx.strms[type].nb_streams++;
+ }
+ stream->qc = qc;
+
+ stream->buf = NULL;
+ LIST_INIT(&stream->buf_list);
+ stream->buf_offset = 0;
+
+ stream->acked_frms = EB_ROOT;
+ stream->ack_offset = 0;
+ stream->release = 0;
+ stream->ctx = ctx;
+
+ return stream;
+}
+
+/* Mark the stream descriptor <stream> as released. It will be freed as soon as
+ * all its buffered data are acknowledged. Does nothing if <stream> is already
+ * NULL.
+ */
+void qc_stream_desc_release(struct qc_stream_desc *stream)
+{
+ if (!stream)
+ return;
+
+ /* A stream can be released only one time. */
+ BUG_ON(stream->release);
+
+ stream->release = 1;
+ stream->ctx = NULL;
+
+ if (LIST_ISEMPTY(&stream->buf_list)) {
+ /* if no buffer left we can free the stream. */
+ qc_stream_desc_free(stream, 0);
+ }
+ else {
+ /* A released stream does not use <stream.buf>. */
+ stream->buf = NULL;
+ }
+}
+
+/* Acknowledge data at <offset> of length <len> for <stream>. It is handled
+ * only if it covers a range corresponding to stream.ack_offset. After data
+ * removal, if the stream does not contains data any more and is already
+ * released, the instance stream is freed. <stream> is set to NULL to indicate
+ * this.
+ *
+ * Returns the count of byte removed from stream. Do not forget to check if
+ * <stream> is NULL after invocation.
+ */
+int qc_stream_desc_ack(struct qc_stream_desc **stream, size_t offset, size_t len)
+{
+ struct qc_stream_desc *s = *stream;
+ struct qc_stream_buf *stream_buf;
+ struct quic_conn *qc = s->qc;
+ struct buffer *buf;
+ size_t diff;
+
+ if (offset + len <= s->ack_offset || offset > s->ack_offset)
+ return 0;
+
+ /* There must be at least a buffer or we must not report an ACK. */
+ BUG_ON(LIST_ISEMPTY(&s->buf_list));
+
+ /* get oldest buffer from buf_list */
+ stream_buf = LIST_NEXT(&s->buf_list, struct qc_stream_buf *, list);
+ buf = &stream_buf->buf;
+
+ diff = offset + len - s->ack_offset;
+ s->ack_offset += diff;
+ b_del(buf, diff);
+
+ /* nothing more to do if buf still not empty. */
+ if (b_data(buf))
+ return diff;
+
+ /* buf is empty and can now be freed. Do not forget to reset current
+ * buf ptr if we were working on it.
+ */
+ LIST_DELETE(&stream_buf->list);
+ if (stream_buf == s->buf) {
+ /* current buf must always be last entry in buflist */
+ BUG_ON(!LIST_ISEMPTY(&s->buf_list));
+ s->buf = NULL;
+ }
+
+ b_free(buf);
+ pool_free(pool_head_quic_stream_buf, stream_buf);
+ offer_buffers(NULL, 1);
+
+ /* notify MUX about available buffers. */
+ --qc->stream_buf_count;
+ if (qc->mux_state == QC_MUX_READY) {
+ if (qc->qcc->flags & QC_CF_CONN_FULL) {
+ qc->qcc->flags &= ~QC_CF_CONN_FULL;
+ tasklet_wakeup(qc->qcc->wait_event.tasklet);
+ }
+ }
+
+ /* Free stream instance if already released and no buffers left. */
+ if (s->release && LIST_ISEMPTY(&s->buf_list)) {
+ qc_stream_desc_free(s, 0);
+ *stream = NULL;
+ }
+
+ return diff;
+}
+
+/* Free the stream descriptor <stream> content. This function should be used
+ * when all its data have been acknowledged or on full connection closing if <closing>
+ * boolean is set to 1. It must only be called after the stream is released.
+ */
+void qc_stream_desc_free(struct qc_stream_desc *stream, int closing)
+{
+ struct qc_stream_buf *buf, *buf_back;
+ struct quic_conn *qc = stream->qc;
+ struct eb64_node *frm_node;
+ unsigned int free_count = 0;
+
+ /* This function only deals with released streams. */
+ BUG_ON(!stream->release);
+
+ /* free remaining stream buffers */
+ list_for_each_entry_safe(buf, buf_back, &stream->buf_list, list) {
+ if (!(b_data(&buf->buf)) || closing) {
+ b_free(&buf->buf);
+ LIST_DELETE(&buf->list);
+ pool_free(pool_head_quic_stream_buf, buf);
+
+ ++free_count;
+ }
+ }
+
+ if (free_count) {
+ offer_buffers(NULL, free_count);
+
+ qc->stream_buf_count -= free_count;
+ if (qc->mux_state == QC_MUX_READY) {
+ /* notify MUX about available buffers. */
+ if (qc->qcc->flags & QC_CF_CONN_FULL) {
+ qc->qcc->flags &= ~QC_CF_CONN_FULL;
+ tasklet_wakeup(qc->qcc->wait_event.tasklet);
+ }
+ }
+ }
+
+ /* qc_stream_desc might be freed before having received all its ACKs.
+ * This is the case if some frames were retransmitted.
+ */
+ frm_node = eb64_first(&stream->acked_frms);
+ while (frm_node) {
+ struct quic_stream *strm;
+ struct quic_frame *frm;
+
+ strm = eb64_entry(frm_node, struct quic_stream, offset);
+
+ frm_node = eb64_next(frm_node);
+ eb64_delete(&strm->offset);
+
+ frm = container_of(strm, struct quic_frame, stream);
+ qc_release_frm(qc, frm);
+ }
+
+ if (stream->by_id.key != (uint64_t)-1)
+ eb64_delete(&stream->by_id);
+ pool_free(pool_head_quic_stream_desc, stream);
+}
+
+/* Return the current buffer of <stream>. May be NULL if not allocated. */
+struct buffer *qc_stream_buf_get(struct qc_stream_desc *stream)
+{
+ if (!stream->buf)
+ return NULL;
+
+ return &stream->buf->buf;
+}
+
+/* Check if a new stream buffer can be allocated for the connection <qc>.
+ * Returns a boolean.
+ */
+static int qc_stream_buf_avail(struct quic_conn *qc)
+{
+ return qc->stream_buf_count < global.tune.quic_streams_buf;
+}
+
+/* Allocate a new current buffer for <stream>. The buffer limit count for the
+ * connection is checked first. This function is not allowed if current buffer
+ * is not NULL prior to this call. The new buffer represents stream payload at
+ * offset <offset>.
+ *
+ * Returns the buffer or NULL.
+ */
+struct buffer *qc_stream_buf_alloc(struct qc_stream_desc *stream,
+ uint64_t offset)
+{
+ struct quic_conn *qc = stream->qc;
+
+ /* current buffer must be released first before allocate a new one. */
+ BUG_ON(stream->buf);
+
+ if (!qc_stream_buf_avail(qc))
+ return NULL;
+
+ ++qc->stream_buf_count;
+
+ stream->buf_offset = offset;
+ stream->buf = pool_alloc(pool_head_quic_stream_buf);
+ if (!stream->buf)
+ return NULL;
+
+ stream->buf->buf = BUF_NULL;
+ LIST_APPEND(&stream->buf_list, &stream->buf->list);
+
+ return &stream->buf->buf;
+}
+
+/* Release the current buffer of <stream>. It will be kept internally by
+ * the <stream>. The current buffer cannot be NULL.
+ */
+void qc_stream_buf_release(struct qc_stream_desc *stream)
+{
+ /* current buffer already released */
+ BUG_ON(!stream->buf);
+
+ stream->buf = NULL;
+ stream->buf_offset = 0;
+}
diff --git a/src/quic_tls.c b/src/quic_tls.c
new file mode 100644
index 0000000..7d2d2c1
--- /dev/null
+++ b/src/quic_tls.c
@@ -0,0 +1,672 @@
+#include <haproxy/quic_tls.h>
+
+#include <string.h>
+
+#include <openssl/evp.h>
+#include <openssl/kdf.h>
+#include <openssl/ssl.h>
+
+#include <haproxy/buf.h>
+#include <haproxy/chunk.h>
+#include <haproxy/pool.h>
+#include <haproxy/quic_conn-t.h>
+
+
+DECLARE_POOL(pool_head_quic_tls_secret, "quic_tls_secret", QUIC_TLS_SECRET_LEN);
+DECLARE_POOL(pool_head_quic_tls_iv, "quic_tls_iv", QUIC_TLS_IV_LEN);
+DECLARE_POOL(pool_head_quic_tls_key, "quic_tls_key", QUIC_TLS_KEY_LEN);
+
+/* Initial salt depending on QUIC version to derive client/server initial secrets.
+ * This one is for draft-29 QUIC version.
+ */
+const unsigned char initial_salt_draft_29[20] = {
+ 0xaf, 0xbf, 0xec, 0x28, 0x99, 0x93, 0xd2, 0x4c,
+ 0x9e, 0x97, 0x86, 0xf1, 0x9c, 0x61, 0x11, 0xe0,
+ 0x43, 0x90, 0xa8, 0x99
+};
+
+const unsigned char initial_salt_v1[20] = {
+ 0x38, 0x76, 0x2c, 0xf7, 0xf5, 0x59, 0x34, 0xb3,
+ 0x4d, 0x17, 0x9a, 0xe6, 0xa4, 0xc8, 0x0c, 0xad,
+ 0xcc, 0xbb, 0x7f, 0x0a
+};
+
+const unsigned char initial_salt_v2_draft[20] = {
+ 0xa7, 0x07, 0xc2, 0x03, 0xa5, 0x9b, 0x47, 0x18,
+ 0x4a, 0x1d, 0x62, 0xca, 0x57, 0x04, 0x06, 0xea,
+ 0x7a, 0xe3, 0xe5, 0xd3
+};
+
+/* Dump the RX/TX secrets of <secs> QUIC TLS secrets. */
+void quic_tls_keys_hexdump(struct buffer *buf,
+ const struct quic_tls_secrets *secs)
+{
+ int i;
+ size_t aead_keylen = (size_t)EVP_CIPHER_key_length(secs->aead);
+ size_t aead_ivlen = (size_t)EVP_CIPHER_iv_length(secs->aead);
+ size_t hp_len = (size_t)EVP_CIPHER_key_length(secs->hp);
+
+ chunk_appendf(buf, "\n key=");
+ for (i = 0; i < aead_keylen; i++)
+ chunk_appendf(buf, "%02x", secs->key[i]);
+ chunk_appendf(buf, "\n iv=");
+ for (i = 0; i < aead_ivlen; i++)
+ chunk_appendf(buf, "%02x", secs->iv[i]);
+ chunk_appendf(buf, "\n hp=");
+ for (i = 0; i < hp_len; i++)
+ chunk_appendf(buf, "%02x", secs->hp_key[i]);
+}
+
+/* Dump <secret> TLS secret. */
+void quic_tls_secret_hexdump(struct buffer *buf,
+ const unsigned char *secret, size_t secret_len)
+{
+ int i;
+
+ chunk_appendf(buf, " secret=");
+ for (i = 0; i < secret_len; i++)
+ chunk_appendf(buf, "%02x", secret[i]);
+}
+
+int quic_hkdf_extract(const EVP_MD *md,
+ unsigned char *buf, size_t buflen,
+ const unsigned char *key, size_t keylen,
+ const unsigned char *salt, size_t saltlen)
+{
+ EVP_PKEY_CTX *ctx;
+
+ ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_HKDF, NULL);
+ if (!ctx)
+ return 0;
+
+ if (EVP_PKEY_derive_init(ctx) <= 0 ||
+ EVP_PKEY_CTX_hkdf_mode(ctx, EVP_PKEY_HKDEF_MODE_EXTRACT_ONLY) <= 0 ||
+ EVP_PKEY_CTX_set_hkdf_md(ctx, md) <= 0 ||
+ EVP_PKEY_CTX_set1_hkdf_salt(ctx, salt, saltlen) <= 0 ||
+ EVP_PKEY_CTX_set1_hkdf_key(ctx, key, keylen) <= 0 ||
+ EVP_PKEY_derive(ctx, buf, &buflen) <= 0)
+ goto err;
+
+ EVP_PKEY_CTX_free(ctx);
+ return 1;
+
+ err:
+ EVP_PKEY_CTX_free(ctx);
+ return 0;
+}
+
+int quic_hkdf_expand(const EVP_MD *md,
+ unsigned char *buf, size_t buflen,
+ const unsigned char *key, size_t keylen,
+ const unsigned char *label, size_t labellen)
+{
+ EVP_PKEY_CTX *ctx;
+
+ ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_HKDF, NULL);
+ if (!ctx)
+ return 0;
+
+ if (EVP_PKEY_derive_init(ctx) <= 0 ||
+ EVP_PKEY_CTX_hkdf_mode(ctx, EVP_PKEY_HKDEF_MODE_EXPAND_ONLY) <= 0 ||
+ EVP_PKEY_CTX_set_hkdf_md(ctx, md) <= 0 ||
+ EVP_PKEY_CTX_set1_hkdf_key(ctx, key, keylen) <= 0 ||
+ EVP_PKEY_CTX_add1_hkdf_info(ctx, label, labellen) <= 0 ||
+ EVP_PKEY_derive(ctx, buf, &buflen) <= 0)
+ goto err;
+
+ EVP_PKEY_CTX_free(ctx);
+ return 1;
+
+ err:
+ EVP_PKEY_CTX_free(ctx);
+ return 0;
+}
+
+/* Extracts a peudo-random secret key from <key> which is eventually not
+ * pseudo-random and expand it to a new pseudo-random key into
+ * <buf> with <buflen> as key length according to HKDF specifications
+ * (https://datatracker.ietf.org/doc/html/rfc5869).
+ * According to this specifications it is highly recommended to use
+ * a salt, even if optional (NULL value).
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_hkdf_extract_and_expand(const EVP_MD *md,
+ unsigned char *buf, size_t buflen,
+ const unsigned char *key, size_t keylen,
+ const unsigned char *salt, size_t saltlen,
+ const unsigned char *label, size_t labellen)
+{
+ EVP_PKEY_CTX *ctx;
+
+ ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_HKDF, NULL);
+ if (!ctx)
+ return 0;
+
+ if (EVP_PKEY_derive_init(ctx) <= 0 ||
+ EVP_PKEY_CTX_hkdf_mode(ctx, EVP_PKEY_HKDEF_MODE_EXTRACT_AND_EXPAND) <= 0 ||
+ EVP_PKEY_CTX_set_hkdf_md(ctx, md) <= 0 ||
+ EVP_PKEY_CTX_set1_hkdf_salt(ctx, salt, saltlen) <= 0 ||
+ EVP_PKEY_CTX_set1_hkdf_key(ctx, key, keylen) <= 0 ||
+ EVP_PKEY_CTX_add1_hkdf_info(ctx, label, labellen) <= 0 ||
+ EVP_PKEY_derive(ctx, buf, &buflen) <= 0)
+ goto err;
+
+ EVP_PKEY_CTX_free(ctx);
+ return 1;
+
+ err:
+ EVP_PKEY_CTX_free(ctx);
+ return 0;
+}
+
+/* https://quicwg.org/base-drafts/draft-ietf-quic-tls.html#protection-keys
+ * refers to:
+ *
+ * https://tools.ietf.org/html/rfc8446#section-7.1:
+ * 7.1. Key Schedule
+ *
+ * The key derivation process makes use of the HKDF-Extract and
+ * HKDF-Expand functions as defined for HKDF [RFC5869], as well as the
+ * functions defined below:
+ *
+ * HKDF-Expand-Label(Secret, Label, Context, Length) =
+ * HKDF-Expand(Secret, HkdfLabel, Length)
+ *
+ * Where HkdfLabel is specified as:
+ *
+ * struct {
+ * uint16 length = Length;
+ * opaque label<7..255> = "tls13 " + Label;
+ * opaque context<0..255> = Context;
+ * } HkdfLabel;
+ *
+ * Derive-Secret(Secret, Label, Messages) =
+ * HKDF-Expand-Label(Secret, Label,
+ * Transcript-Hash(Messages), Hash.length)
+ *
+ */
+int quic_hkdf_expand_label(const EVP_MD *md,
+ unsigned char *buf, size_t buflen,
+ const unsigned char *key, size_t keylen,
+ const unsigned char *label, size_t labellen)
+{
+ unsigned char hdkf_label[256], *pos;
+ const unsigned char hdkf_label_label[] = "tls13 ";
+ size_t hdkf_label_label_sz = sizeof hdkf_label_label - 1;
+
+ pos = hdkf_label;
+ *pos++ = buflen >> 8;
+ *pos++ = buflen & 0xff;
+ *pos++ = hdkf_label_label_sz + labellen;
+ memcpy(pos, hdkf_label_label, hdkf_label_label_sz);
+ pos += hdkf_label_label_sz;
+ memcpy(pos, label, labellen);
+ pos += labellen;
+ *pos++ = '\0';
+
+ return quic_hkdf_expand(md, buf, buflen,
+ key, keylen, hdkf_label, pos - hdkf_label);
+}
+
+/*
+ * This function derives two keys from <secret> is <ctx> as TLS cryptographic context.
+ * ->key is the TLS key to be derived to encrypt/decrypt data at TLS level.
+ * ->iv is the initialization vector to be used with ->key.
+ * ->hp_key is the key to be derived for header protection.
+ * Obviouly these keys have the same size becaused derived with the same TLS cryptographic context.
+ */
+int quic_tls_derive_keys(const EVP_CIPHER *aead, const EVP_CIPHER *hp,
+ const EVP_MD *md, const struct quic_version *qv,
+ unsigned char *key, size_t keylen,
+ unsigned char *iv, size_t ivlen,
+ unsigned char *hp_key, size_t hp_keylen,
+ const unsigned char *secret, size_t secretlen)
+{
+ size_t aead_keylen = (size_t)EVP_CIPHER_key_length(aead);
+ size_t aead_ivlen = (size_t)EVP_CIPHER_iv_length(aead);
+ size_t hp_len = hp ? (size_t)EVP_CIPHER_key_length(hp) : 0;
+
+ if (aead_keylen > keylen || aead_ivlen > ivlen || hp_len > hp_keylen)
+ return 0;
+
+ if (!quic_hkdf_expand_label(md, key, aead_keylen, secret, secretlen,
+ qv->key_label,qv->key_label_len) ||
+ !quic_hkdf_expand_label(md, iv, aead_ivlen, secret, secretlen,
+ qv->iv_label, qv->iv_label_len) ||
+ (hp_key && !quic_hkdf_expand_label(md, hp_key, hp_len, secret, secretlen,
+ qv->hp_label, qv->hp_label_len)))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Derive the initial secret from <secret> and QUIC version dependent salt.
+ * Returns the size of the derived secret if succeeded, 0 if not.
+ */
+int quic_derive_initial_secret(const EVP_MD *md,
+ const unsigned char *initial_salt, size_t initial_salt_sz,
+ unsigned char *initial_secret, size_t initial_secret_sz,
+ const unsigned char *secret, size_t secret_sz)
+{
+ if (!quic_hkdf_extract(md, initial_secret, initial_secret_sz, secret, secret_sz,
+ initial_salt, initial_salt_sz))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Derive the client initial secret from the initial secret.
+ * Returns the size of the derived secret if succeeded, 0 if not.
+ */
+int quic_tls_derive_initial_secrets(const EVP_MD *md,
+ unsigned char *rx, size_t rx_sz,
+ unsigned char *tx, size_t tx_sz,
+ const unsigned char *secret, size_t secret_sz,
+ int server)
+{
+ const unsigned char client_label[] = "client in";
+ const unsigned char server_label[] = "server in";
+ const unsigned char *tx_label, *rx_label;
+ size_t rx_label_sz, tx_label_sz;
+
+ if (server) {
+ rx_label = client_label;
+ rx_label_sz = sizeof client_label;
+ tx_label = server_label;
+ tx_label_sz = sizeof server_label;
+ }
+ else {
+ rx_label = server_label;
+ rx_label_sz = sizeof server_label;
+ tx_label = client_label;
+ tx_label_sz = sizeof client_label;
+ }
+
+ if (!quic_hkdf_expand_label(md, rx, rx_sz, secret, secret_sz,
+ rx_label, rx_label_sz - 1) ||
+ !quic_hkdf_expand_label(md, tx, tx_sz, secret, secret_sz,
+ tx_label, tx_label_sz - 1))
+ return 0;
+
+ return 1;
+}
+
+/* Update <sec> secret key into <new_sec> according to RFC 9001 6.1.
+ * Always succeeds.
+ */
+int quic_tls_sec_update(const EVP_MD *md, const struct quic_version *qv,
+ unsigned char *new_sec, size_t new_seclen,
+ const unsigned char *sec, size_t seclen)
+{
+ return quic_hkdf_expand_label(md, new_sec, new_seclen, sec, seclen,
+ qv->ku_label, qv->ku_label_len);
+}
+
+/*
+ * Build an IV into <iv> buffer with <ivlen> as size from <aead_iv> with
+ * <aead_ivlen> as size depending on <pn> packet number.
+ * This is the function which must be called to build an AEAD IV for the AEAD cryptographic algorithm
+ * used to encrypt/decrypt the QUIC packet payloads depending on the packet number <pn>.
+ * This function fails and return 0 only if the two buffer lengths are different, 1 if not.
+ */
+int quic_aead_iv_build(unsigned char *iv, size_t ivlen,
+ unsigned char *aead_iv, size_t aead_ivlen, uint64_t pn)
+{
+ int i;
+ unsigned int shift;
+ unsigned char *pos = iv;
+
+ if (ivlen != aead_ivlen)
+ return 0;
+
+ for (i = 0; i < ivlen - sizeof pn; i++)
+ *pos++ = *aead_iv++;
+
+ /* Only the remaining (sizeof pn) bytes are XOR'ed. */
+ shift = 56;
+ for (i = aead_ivlen - sizeof pn; i < aead_ivlen ; i++, shift -= 8)
+ *pos++ = *aead_iv++ ^ (pn >> shift);
+
+ return 1;
+}
+
+/* Initialize the cipher context for RX part of <tls_ctx> QUIC TLS context.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_tls_rx_ctx_init(EVP_CIPHER_CTX **rx_ctx,
+ const EVP_CIPHER *aead, unsigned char *key)
+{
+ EVP_CIPHER_CTX *ctx;
+ int aead_nid = EVP_CIPHER_nid(aead);
+
+ ctx = EVP_CIPHER_CTX_new();
+ if (!ctx)
+ return 0;
+
+ if (!EVP_DecryptInit_ex(ctx, aead, NULL, NULL, NULL) ||
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_IVLEN, QUIC_TLS_IV_LEN, NULL) ||
+ (aead_nid == NID_aes_128_ccm &&
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, QUIC_TLS_TAG_LEN, NULL)) ||
+ !EVP_DecryptInit_ex(ctx, NULL, NULL, key, NULL))
+ goto err;
+
+ *rx_ctx = ctx;
+
+ return 1;
+
+ err:
+ EVP_CIPHER_CTX_free(ctx);
+ return 0;
+}
+
+/* Initialize <*aes_ctx> AES cipher context with <key> as key for encryption */
+int quic_tls_enc_aes_ctx_init(EVP_CIPHER_CTX **aes_ctx,
+ const EVP_CIPHER *aes, unsigned char *key)
+{
+ EVP_CIPHER_CTX *ctx;
+
+ ctx = EVP_CIPHER_CTX_new();
+ if (!ctx)
+ return 0;
+
+ if (!EVP_EncryptInit_ex(ctx, aes, NULL, key, NULL))
+ goto err;
+
+ *aes_ctx = ctx;
+ return 1;
+
+ err:
+ EVP_CIPHER_CTX_free(ctx);
+ return 0;
+}
+
+/* Encrypt <inlen> bytes from <in> buffer into <out> with <ctx> as AES
+ * cipher context. This is the responsibility of the caller to check there
+ * is at least <inlen> bytes of available space in <out> buffer.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_tls_aes_encrypt(unsigned char *out,
+ const unsigned char *in, size_t inlen,
+ EVP_CIPHER_CTX *ctx)
+{
+ int ret = 0;
+
+ if (!EVP_EncryptInit_ex(ctx, NULL, NULL, NULL, in) ||
+ !EVP_EncryptUpdate(ctx, out, &ret, out, inlen) ||
+ !EVP_EncryptFinal_ex(ctx, out, &ret))
+ return 0;
+
+ return 1;
+}
+
+/* Initialize <*aes_ctx> AES cipher context with <key> as key for decryption */
+int quic_tls_dec_aes_ctx_init(EVP_CIPHER_CTX **aes_ctx,
+ const EVP_CIPHER *aes, unsigned char *key)
+{
+ EVP_CIPHER_CTX *ctx;
+
+ ctx = EVP_CIPHER_CTX_new();
+ if (!ctx)
+ return 0;
+
+ if (!EVP_DecryptInit_ex(ctx, aes, NULL, key, NULL))
+ goto err;
+
+ *aes_ctx = ctx;
+ return 1;
+
+ err:
+ EVP_CIPHER_CTX_free(ctx);
+ return 0;
+}
+
+/* Decrypt <in> data into <out> with <ctx> as AES cipher context.
+ * This is the responsibility of the caller to check there is at least
+ * <outlen> bytes into <in> buffer.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_tls_aes_decrypt(unsigned char *out,
+ const unsigned char *in, size_t inlen,
+ EVP_CIPHER_CTX *ctx)
+{
+ int ret = 0;
+
+ if (!EVP_DecryptInit_ex(ctx, NULL, NULL, NULL, in) ||
+ !EVP_DecryptUpdate(ctx, out, &ret, out, inlen) ||
+ !EVP_DecryptFinal_ex(ctx, out, &ret))
+ return 0;
+
+ return 1;
+}
+
+/* Initialize the cipher context for TX part of <tls_ctx> QUIC TLS context.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_tls_tx_ctx_init(EVP_CIPHER_CTX **tx_ctx,
+ const EVP_CIPHER *aead, unsigned char *key)
+{
+ EVP_CIPHER_CTX *ctx;
+ int aead_nid = EVP_CIPHER_nid(aead);
+
+ ctx = EVP_CIPHER_CTX_new();
+ if (!ctx)
+ return 0;
+
+ if (!EVP_EncryptInit_ex(ctx, aead, NULL, NULL, NULL) ||
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_IVLEN, QUIC_TLS_IV_LEN, NULL) ||
+ (aead_nid == NID_aes_128_ccm &&
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, QUIC_TLS_TAG_LEN, NULL)) ||
+ !EVP_EncryptInit_ex(ctx, NULL, NULL, key, NULL))
+ goto err;
+
+ *tx_ctx = ctx;
+
+ return 1;
+
+ err:
+ EVP_CIPHER_CTX_free(ctx);
+ return 0;
+}
+
+/*
+ * https://quicwg.org/base-drafts/draft-ietf-quic-tls.html#aead
+ *
+ * 5.3. AEAD Usage
+ *
+ * Packets are protected prior to applying header protection (Section 5.4).
+ * The unprotected packet header is part of the associated data (A). When removing
+ * packet protection, an endpoint first removes the header protection.
+ * (...)
+ * These ciphersuites have a 16-byte authentication tag and produce an output 16
+ * bytes larger than their input.
+ * The key and IV for the packet are computed as described in Section 5.1. The nonce,
+ * N, is formed by combining the packet protection IV with the packet number. The 62
+ * bits of the reconstructed QUIC packet number in network byte order are left-padded
+ * with zeros to the size of the IV. The exclusive OR of the padded packet number and
+ * the IV forms the AEAD nonce.
+ *
+ * The associated data, A, for the AEAD is the contents of the QUIC header, starting
+ * from the flags byte in either the short or long header, up to and including the
+ * unprotected packet number.
+ *
+ * The input plaintext, P, for the AEAD is the payload of the QUIC packet, as described
+ * in [QUIC-TRANSPORT].
+ *
+ * The output ciphertext, C, of the AEAD is transmitted in place of P.
+ *
+ * Some AEAD functions have limits for how many packets can be encrypted under the same
+ * key and IV (see for example [AEBounds]). This might be lower than the packet number limit.
+ * An endpoint MUST initiate a key update (Section 6) prior to exceeding any limit set for
+ * the AEAD that is in use.
+ */
+
+/* Encrypt in place <buf> plaintext with <len> as length with QUIC_TLS_TAG_LEN
+ * included tailing bytes for the tag.
+ * Note that for CCM mode, we must set the the ciphertext length if AAD data
+ * are provided from <aad> buffer with <aad_len> as length. This is always the
+ * case here. So the caller of this function must provide <aad>.
+ *
+ * https://wiki.openssl.org/index.php/EVP_Authenticated_Encryption_and_Decryption
+ */
+int quic_tls_encrypt(unsigned char *buf, size_t len,
+ const unsigned char *aad, size_t aad_len,
+ EVP_CIPHER_CTX *ctx, const EVP_CIPHER *aead,
+ const unsigned char *key, const unsigned char *iv)
+{
+ int outlen;
+ int aead_nid = EVP_CIPHER_nid(aead);
+
+ if (!EVP_EncryptInit_ex(ctx, NULL, NULL, NULL, iv) ||
+ (aead_nid == NID_aes_128_ccm &&
+ !EVP_EncryptUpdate(ctx, NULL, &outlen, NULL, len)) ||
+ !EVP_EncryptUpdate(ctx, NULL, &outlen, aad, aad_len) ||
+ !EVP_EncryptUpdate(ctx, buf, &outlen, buf, len) ||
+ !EVP_EncryptFinal_ex(ctx, buf + outlen, &outlen) ||
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_GET_TAG, QUIC_TLS_TAG_LEN, buf + len))
+ return 0;
+
+ return 1;
+}
+
+/* Decrypt in place <buf> ciphertext with <len> as length with QUIC_TLS_TAG_LEN
+ * included tailing bytes for the tag.
+ * Note that for CCM mode, we must set the the ciphertext length if AAD data
+ * are provided from <aad> buffer with <aad_len> as length. This is always the
+ * case here. So the caller of this function must provide <aad>. Also not the
+ * there is no need to call EVP_DecryptFinal_ex for CCM mode.
+ *
+ * https://wiki.openssl.org/index.php/EVP_Authenticated_Encryption_and_Decryption
+ */
+int quic_tls_decrypt(unsigned char *buf, size_t len,
+ unsigned char *aad, size_t aad_len,
+ EVP_CIPHER_CTX *ctx, const EVP_CIPHER *aead,
+ const unsigned char *key, const unsigned char *iv)
+{
+ int outlen;
+ int aead_nid = EVP_CIPHER_nid(aead);
+
+ if (!EVP_DecryptInit_ex(ctx, NULL, NULL, NULL, iv) ||
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, QUIC_TLS_TAG_LEN,
+ buf + len - QUIC_TLS_TAG_LEN) ||
+ (aead_nid == NID_aes_128_ccm &&
+ !EVP_DecryptUpdate(ctx, NULL, &outlen, NULL, len - QUIC_TLS_TAG_LEN)) ||
+ !EVP_DecryptUpdate(ctx, NULL, &outlen, aad, aad_len) ||
+ !EVP_DecryptUpdate(ctx, buf, &outlen, buf, len - QUIC_TLS_TAG_LEN) ||
+ (aead_nid != NID_aes_128_ccm &&
+ !EVP_DecryptFinal_ex(ctx, buf + outlen, &outlen)))
+ return 0;
+
+ return 1;
+}
+
+/* Similar to quic_tls_decrypt(), except that this function does not decrypt
+ * in place its ciphertest if <out> output buffer ciphertest with <len> as length
+ * is different from <in> input buffer. This is the responbality of the caller
+ * to check that the output buffer has at least the same size as the input buffer.
+ * Note that for CCM mode, we must set the the ciphertext length if AAD data
+ * are provided from <aad> buffer with <aad_len> as length. This is always the
+ * case here. So the caller of this function must provide <aad>. Also note that
+ * there is no need to call EVP_DecryptFinal_ex for CCM mode.
+ *
+ * https://wiki.openssl.org/index.php/EVP_Authenticated_Encryption_and_Decryption
+ *
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_tls_decrypt2(unsigned char *out,
+ unsigned char *in, size_t len,
+ unsigned char *aad, size_t aad_len,
+ EVP_CIPHER_CTX *ctx, const EVP_CIPHER *aead,
+ const unsigned char *key, const unsigned char *iv)
+{
+ int outlen;
+ int aead_nid = EVP_CIPHER_nid(aead);
+
+ len -= QUIC_TLS_TAG_LEN;
+ if (!EVP_DecryptInit_ex(ctx, NULL, NULL, NULL, iv) ||
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, QUIC_TLS_TAG_LEN, in + len) ||
+ (aead_nid == NID_aes_128_ccm &&
+ !EVP_DecryptUpdate(ctx, NULL, &outlen, NULL, len)) ||
+ !EVP_DecryptUpdate(ctx, NULL, &outlen, aad, aad_len) ||
+ !EVP_DecryptUpdate(ctx, out, &outlen, in, len) ||
+ (aead_nid != NID_aes_128_ccm &&
+ !EVP_DecryptFinal_ex(ctx, out + outlen, &outlen)))
+ return 0;
+
+ return 1;
+}
+
+/* Derive <key> and <iv> key and IV to be used to encrypt a retry token
+ * with <secret> which is not pseudo-random.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_tls_derive_retry_token_secret(const EVP_MD *md,
+ unsigned char *key, size_t keylen,
+ unsigned char *iv, size_t ivlen,
+ const unsigned char *salt, size_t saltlen,
+ const unsigned char *secret, size_t secretlen)
+{
+ unsigned char tmpkey[QUIC_TLS_KEY_LEN];
+ const unsigned char tmpkey_label[] = "retry token";
+ const unsigned char key_label[] = "retry token key";
+ const unsigned char iv_label[] = "retry token iv";
+
+ if (!quic_hkdf_extract_and_expand(md, tmpkey, sizeof tmpkey,
+ secret, secretlen, salt, saltlen,
+ tmpkey_label, sizeof tmpkey_label - 1) ||
+ !quic_hkdf_expand(md, key, keylen, tmpkey, sizeof tmpkey,
+ key_label, sizeof key_label - 1) ||
+ !quic_hkdf_expand(md, iv, ivlen, secret, secretlen,
+ iv_label, sizeof iv_label - 1))
+ return 0;
+
+ return 1;
+}
+
+/* Generate the AEAD tag for the Retry packet <pkt> of <pkt_len> bytes and
+ * write it to <tag>. The tag is written just after the <pkt> area. It should
+ * be at least 16 bytes longs. <odcid> is the CID of the Initial packet
+ * received which triggers the Retry.
+ *
+ * Returns non-zero on success else zero.
+ */
+int quic_tls_generate_retry_integrity_tag(unsigned char *odcid, unsigned char odcid_len,
+ unsigned char *pkt, size_t pkt_len,
+ const struct quic_version *qv)
+{
+ const EVP_CIPHER *evp = EVP_aes_128_gcm();
+ EVP_CIPHER_CTX *ctx;
+
+ /* encryption buffer - not used as only AEAD tag generation is proceed */
+ unsigned char *out = NULL;
+ /* address to store the AEAD tag */
+ unsigned char *tag = pkt + pkt_len;
+ int outlen, ret = 0;
+
+ ctx = EVP_CIPHER_CTX_new();
+ if (!ctx)
+ return 0;
+
+ /* rfc9001 5.8. Retry Packet Integrity
+ *
+ * AEAD is proceed over a pseudo-Retry packet used as AAD. It contains
+ * the ODCID len + data and the Retry packet itself.
+ */
+ if (!EVP_EncryptInit_ex(ctx, evp, NULL, qv->retry_tag_key, qv->retry_tag_nonce) ||
+ /* specify pseudo-Retry as AAD */
+ !EVP_EncryptUpdate(ctx, NULL, &outlen, &odcid_len, sizeof(odcid_len)) ||
+ !EVP_EncryptUpdate(ctx, NULL, &outlen, odcid, odcid_len) ||
+ !EVP_EncryptUpdate(ctx, NULL, &outlen, pkt, pkt_len) ||
+ /* finalize */
+ !EVP_EncryptFinal_ex(ctx, out, &outlen) ||
+ /* store the tag */
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_GET_TAG, QUIC_TLS_TAG_LEN, tag)) {
+ goto out;
+ }
+ ret = 1;
+
+ out:
+ EVP_CIPHER_CTX_free(ctx);
+ return ret;
+}
diff --git a/src/quic_tp.c b/src/quic_tp.c
new file mode 100644
index 0000000..8eeb455
--- /dev/null
+++ b/src/quic_tp.c
@@ -0,0 +1,711 @@
+#include <arpa/inet.h>
+#include <string.h>
+
+#include <haproxy/global.h>
+#include <haproxy/ncbuf-t.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_enc.h>
+#include <haproxy/quic_tp.h>
+#include <haproxy/trace.h>
+
+#define QUIC_MAX_UDP_PAYLOAD_SIZE 2048
+
+#define TRACE_SOURCE &trace_quic
+
+/* This is the values of some QUIC transport parameters when absent.
+ * Should be used to initialize any transport parameters (local or remote)
+ * before updating them with customized values.
+ */
+struct quic_transport_params quic_dflt_transport_params = {
+ .max_udp_payload_size = QUIC_TP_DFLT_MAX_UDP_PAYLOAD_SIZE,
+ .ack_delay_exponent = QUIC_TP_DFLT_ACK_DELAY_COMPONENT,
+ .max_ack_delay = QUIC_TP_DFLT_MAX_ACK_DELAY,
+ .active_connection_id_limit = QUIC_TP_DFLT_ACTIVE_CONNECTION_ID_LIMIT,
+};
+
+/* Initialize <dst> transport parameters with default values (when absent)
+ * from <quic_dflt_transport_params>.
+ * Never fails.
+ */
+static void quic_dflt_transport_params_cpy(struct quic_transport_params *dst)
+{
+ dst->max_udp_payload_size = quic_dflt_transport_params.max_udp_payload_size;
+ dst->ack_delay_exponent = quic_dflt_transport_params.ack_delay_exponent;
+ dst->max_ack_delay = quic_dflt_transport_params.max_ack_delay;
+ dst->active_connection_id_limit = quic_dflt_transport_params.active_connection_id_limit;
+}
+
+/* Initialize <p> transport parameters. <server> is a boolean, set if TPs are
+ * used by a server (haproxy frontend) else this is for a client (haproxy
+ * backend).
+ *
+ * This must only be used for haproxy local parameters. To initialize peer
+ * parameters, see quic_dflt_transport_params_cpy().
+ *
+ * Never fails.
+ */
+void quic_transport_params_init(struct quic_transport_params *p, int server)
+{
+ const uint64_t ncb_size = global.tune.bufsize - NCB_RESERVED_SZ;
+ const int max_streams_bidi = global.tune.quic_frontend_max_streams_bidi;
+ const int max_streams_uni = 3;
+
+ /* Set RFC default values for unspecified parameters. */
+ quic_dflt_transport_params_cpy(p);
+
+ /* Set the max_udp_payload_size value. If not would equal to
+ * QUIC_TP_DFLT_MAX_UDP_PAYLOAD_SIZE
+ */
+ p->max_udp_payload_size = QUIC_MAX_UDP_PAYLOAD_SIZE;
+ if (server)
+ p->max_idle_timeout = global.tune.quic_frontend_max_idle_timeout;
+ else
+ p->max_idle_timeout = global.tune.quic_backend_max_idle_timeout;
+
+ p->initial_max_streams_bidi = max_streams_bidi;
+ p->initial_max_streams_uni = max_streams_uni;
+ p->initial_max_stream_data_bidi_local = ncb_size;
+ p->initial_max_stream_data_bidi_remote = ncb_size;
+ p->initial_max_stream_data_uni = ncb_size;
+ p->initial_max_data = (max_streams_bidi + max_streams_uni) * ncb_size;
+
+ if (server)
+ p->with_stateless_reset_token = 1;
+
+ p->active_connection_id_limit = 8;
+
+ p->retry_source_connection_id.len = 0;
+}
+
+/* Encode <addr> preferred address transport parameter in <buf> without its
+ * "type+len" prefix. Note that the IP addresses must be encoded in network byte
+ * order.
+ * So ->ipv4_addr and ->ipv6_addr, which are buffers, must contained values
+ * already encoded in network byte order.
+ * It is the responsibility of the caller to check there is enough room in <buf> to encode
+ * this address.
+ * Never fails.
+ */
+static void quic_transport_param_enc_pref_addr_val(unsigned char **buf,
+ const unsigned char *end,
+ struct tp_preferred_address *addr)
+{
+ write_n16(*buf, addr->ipv4_port);
+ *buf += sizeof addr->ipv4_port;
+
+ memcpy(*buf, addr->ipv4_addr, sizeof addr->ipv4_addr);
+ *buf += sizeof addr->ipv4_addr;
+
+ write_n16(*buf, addr->ipv6_port);
+ *buf += sizeof addr->ipv6_port;
+
+ memcpy(*buf, addr->ipv6_addr, sizeof addr->ipv6_addr);
+ *buf += sizeof addr->ipv6_addr;
+
+ *(*buf)++ = addr->cid.len;
+ if (addr->cid.len) {
+ memcpy(*buf, addr->cid.data, addr->cid.len);
+ *buf += addr->cid.len;
+ }
+
+ memcpy(*buf, addr->stateless_reset_token, sizeof addr->stateless_reset_token);
+ *buf += sizeof addr->stateless_reset_token;
+}
+
+/* Decode into <addr> preferred address transport parameter found in <*buf> buffer.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_dec_pref_addr(struct tp_preferred_address *addr,
+ const unsigned char **buf,
+ const unsigned char *end)
+{
+ ssize_t addr_len;
+
+ addr_len = sizeof addr->ipv4_port + sizeof addr->ipv4_addr;
+ addr_len += sizeof addr->ipv6_port + sizeof addr->ipv6_addr;
+ addr_len += sizeof addr->cid.len;
+
+ if (end - *buf < addr_len)
+ return 0;
+
+ addr->ipv4_port = read_n16(*buf);
+ *buf += sizeof addr->ipv4_port;
+
+ memcpy(addr->ipv4_addr, *buf, sizeof addr->ipv4_addr);
+ *buf += sizeof addr->ipv4_addr;
+
+ addr->ipv6_port = read_n16(*buf);
+ *buf += sizeof addr->ipv6_port;
+
+ memcpy(addr->ipv6_addr, *buf, sizeof addr->ipv6_addr);
+ *buf += sizeof addr->ipv6_addr;
+
+ addr->cid.len = *(*buf)++;
+ if (addr->cid.len) {
+ if (end - *buf > addr->cid.len || addr->cid.len > sizeof addr->cid.data)
+ return 0;
+ memcpy(addr->cid.data, *buf, addr->cid.len);
+ *buf += addr->cid.len;
+ }
+
+ if (end - *buf != sizeof addr->stateless_reset_token)
+ return 0;
+
+ memcpy(addr->stateless_reset_token, *buf, end - *buf);
+ *buf += sizeof addr->stateless_reset_token;
+
+ return *buf == end;
+}
+
+/* Decode into <v> version information received transport parameters from <*buf>
+ * buffer. <server> must be set to 1 for QUIC clients which receive server
+ * transport parameters, and 0 for QUIC servers which receive client transport
+ * parameters.
+ * Also set the QUIC negotiated version into <tp>.
+ * Return 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_dec_version_info(struct tp_version_information *tp,
+ const unsigned char **buf,
+ const unsigned char *end, int server)
+{
+ size_t tp_len = end - *buf;
+ const uint32_t *ver;
+
+ /* <tp_len> must be a multiple of sizeof(uint32_t) */
+ if (tp_len < sizeof tp->choosen || (tp_len & 0x3))
+ return 0;
+
+ tp->choosen = ntohl(*(uint32_t *)*buf);
+ /* Must not be null */
+ if (!tp->choosen)
+ return 0;
+
+ *buf += sizeof tp->choosen;
+ tp->others = (const uint32_t *)*buf;
+
+ /* Others versions must not be null */
+ for (ver = tp->others; ver < (const uint32_t *)end; ver++) {
+ if (!*ver)
+ return 0;
+ }
+
+ if (server)
+ /* TODO: not supported */
+ return 0;
+
+ tp->nb_others = (end - (const unsigned char *)tp->others) / sizeof *tp->others;
+ for (ver = tp->others; ver < (const uint32_t *)end; ver++) {
+ if (!tp->negotiated_version) {
+ int i;
+
+ for (i = 0; i < quic_versions_nb; i++) {
+ if (ntohl(*ver) == quic_versions[i].num) {
+ tp->negotiated_version = &quic_versions[i];
+ break;
+ }
+ }
+ }
+
+ if (preferred_version && ntohl(*ver) == preferred_version->num) {
+ tp->negotiated_version = preferred_version;
+ goto out;
+ }
+ }
+
+ out:
+ *buf = end;
+
+ return 1;
+}
+
+/* Decode into <p> struct a transport parameter found in <*buf> buffer with
+ * <type> as type and <len> as length, depending on <server> boolean value which
+ * must be set to 1 for a server (haproxy listener) or 0 for a client (connection
+ * to an haproxy server).
+ */
+static int quic_transport_param_decode(struct quic_transport_params *p,
+ int server, uint64_t type,
+ const unsigned char **buf, size_t len)
+{
+ const unsigned char *end = *buf + len;
+
+ switch (type) {
+ case QUIC_TP_ORIGINAL_DESTINATION_CONNECTION_ID:
+ if (!server || len > sizeof p->original_destination_connection_id.data)
+ return 0;
+
+ if (len)
+ memcpy(p->original_destination_connection_id.data, *buf, len);
+ p->original_destination_connection_id.len = len;
+ *buf += len;
+ p->original_destination_connection_id_present = 1;
+ break;
+ case QUIC_TP_INITIAL_SOURCE_CONNECTION_ID:
+ if (len > sizeof p->initial_source_connection_id.data)
+ return 0;
+
+ if (len)
+ memcpy(p->initial_source_connection_id.data, *buf, len);
+ p->initial_source_connection_id.len = len;
+ *buf += len;
+ p->initial_source_connection_id_present = 1;
+ break;
+ case QUIC_TP_STATELESS_RESET_TOKEN:
+ if (!server || len != sizeof p->stateless_reset_token)
+ return 0;
+ memcpy(p->stateless_reset_token, *buf, len);
+ *buf += len;
+ p->with_stateless_reset_token = 1;
+ break;
+ case QUIC_TP_PREFERRED_ADDRESS:
+ if (!server)
+ return 0;
+ if (!quic_transport_param_dec_pref_addr(&p->preferred_address, buf, *buf + len))
+ return 0;
+ p->with_preferred_address = 1;
+ break;
+ case QUIC_TP_MAX_IDLE_TIMEOUT:
+ if (!quic_dec_int(&p->max_idle_timeout, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_MAX_UDP_PAYLOAD_SIZE:
+ if (!quic_dec_int(&p->max_udp_payload_size, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_INITIAL_MAX_DATA:
+ if (!quic_dec_int(&p->initial_max_data, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL:
+ if (!quic_dec_int(&p->initial_max_stream_data_bidi_local, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE:
+ if (!quic_dec_int(&p->initial_max_stream_data_bidi_remote, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_INITIAL_MAX_STREAM_DATA_UNI:
+ if (!quic_dec_int(&p->initial_max_stream_data_uni, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_INITIAL_MAX_STREAMS_BIDI:
+ if (!quic_dec_int(&p->initial_max_streams_bidi, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_INITIAL_MAX_STREAMS_UNI:
+ if (!quic_dec_int(&p->initial_max_streams_uni, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_ACK_DELAY_EXPONENT:
+ if (!quic_dec_int(&p->ack_delay_exponent, buf, end) ||
+ p->ack_delay_exponent > QUIC_TP_ACK_DELAY_EXPONENT_LIMIT)
+ return 0;
+ break;
+ case QUIC_TP_MAX_ACK_DELAY:
+ if (!quic_dec_int(&p->max_ack_delay, buf, end) ||
+ p->max_ack_delay > QUIC_TP_MAX_ACK_DELAY_LIMIT)
+ return 0;
+ break;
+ case QUIC_TP_DISABLE_ACTIVE_MIGRATION:
+ /* Zero-length parameter type. */
+ if (len != 0)
+ return 0;
+ p->disable_active_migration = 1;
+ break;
+ case QUIC_TP_ACTIVE_CONNECTION_ID_LIMIT:
+ if (!quic_dec_int(&p->active_connection_id_limit, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_DRAFT_VERSION_INFORMATION:
+ if (!quic_transport_param_dec_version_info(&p->version_information,
+ buf, *buf + len, server))
+ return 0;
+ break;
+ default:
+ *buf += len;
+ };
+
+ return *buf == end;
+}
+
+/* Encode <type> and <len> variable length values in <buf>.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_encode_type_len(unsigned char **buf,
+ const unsigned char *end,
+ uint64_t type, uint64_t len)
+{
+ return quic_enc_int(buf, end, type) && quic_enc_int(buf, end, len);
+}
+
+/* Decode variable length type and length values of a QUIC transport parameter
+ * into <type> and <len> found in <*buf> buffer.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_decode_type_len(uint64_t *type, uint64_t *len,
+ const unsigned char **buf,
+ const unsigned char *end)
+{
+ return quic_dec_int(type, buf, end) && quic_dec_int(len, buf, end);
+}
+
+/* Encode <param> bytes stream with <type> as type and <length> as length into buf.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_enc_mem(unsigned char **buf, const unsigned char *end,
+ uint64_t type, void *param, uint64_t length)
+{
+ if (!quic_transport_param_encode_type_len(buf, end, type, length))
+ return 0;
+
+ if (end - *buf < length)
+ return 0;
+
+ if (length)
+ memcpy(*buf, param, length);
+ *buf += length;
+
+ return 1;
+}
+
+/* Encode <val> 64-bits value as variable length integer into <buf>.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_enc_int(unsigned char **buf,
+ const unsigned char *end,
+ uint64_t type, uint64_t val)
+{
+ size_t len;
+
+ len = quic_int_getsize(val);
+
+ return len && quic_transport_param_encode_type_len(buf, end, type, len) &&
+ quic_enc_int(buf, end, val);
+}
+
+/* Returns the required length in bytes to encode <cid> QUIC connection ID. */
+static inline size_t sizeof_quic_cid(const struct tp_cid *cid)
+{
+ return sizeof cid->len + cid->len;
+}
+
+/* Encode <addr> preferred address into <buf>.
+ * Note that the IP addresses must be encoded in network byte order.
+ * So ->ipv4_addr and ->ipv6_addr, which are buffers, must contained
+ * values already encoded in network byte order.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_enc_pref_addr(unsigned char **buf,
+ const unsigned char *end,
+ struct tp_preferred_address *addr)
+{
+ uint64_t addr_len = 0;
+
+ addr_len += sizeof addr->ipv4_port + sizeof addr->ipv4_addr;
+ addr_len += sizeof addr->ipv6_port + sizeof addr->ipv6_addr;
+ addr_len += sizeof_quic_cid(&addr->cid);
+ addr_len += sizeof addr->stateless_reset_token;
+
+ if (!quic_transport_param_encode_type_len(buf, end, QUIC_TP_PREFERRED_ADDRESS, addr_len))
+ return 0;
+
+ if (end - *buf < addr_len)
+ return 0;
+
+ quic_transport_param_enc_pref_addr_val(buf, end, addr);
+
+ return 1;
+}
+
+/* Encode version information transport parameters with <choosen_version> as choosen
+ * version.
+ * Return 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_enc_version_info(unsigned char **buf,
+ const unsigned char *end,
+ const struct quic_version *choosen_version,
+ int server)
+{
+ int i;
+ uint64_t tp_len;
+ uint32_t ver;
+
+ tp_len = sizeof choosen_version->num + quic_versions_nb * sizeof(uint32_t);
+ if (!quic_transport_param_encode_type_len(buf, end,
+ QUIC_TP_DRAFT_VERSION_INFORMATION,
+ tp_len))
+ return 0;
+
+ if (end - *buf < tp_len)
+ return 0;
+
+ /* First: choosen version */
+ ver = htonl(choosen_version->num);
+ memcpy(*buf, &ver, sizeof ver);
+ *buf += sizeof ver;
+ /* For servers: all supported version, choosen included */
+ for (i = 0; i < quic_versions_nb; i++) {
+ ver = htonl(quic_versions[i].num);
+ memcpy(*buf, &ver, sizeof ver);
+ *buf += sizeof ver;
+ }
+
+ return 1;
+}
+
+/* Encode <p> transport parameter into <buf> depending on <server> value which
+ * must be set to 1 for a server (haproxy listener) or 0 for a client
+ * (connection to a haproxy server).
+ * Return the number of bytes consumed if succeeded, 0 if not.
+ */
+int quic_transport_params_encode(unsigned char *buf,
+ const unsigned char *end,
+ struct quic_transport_params *p,
+ const struct quic_version *choosen_version,
+ int server)
+{
+ unsigned char *head;
+ unsigned char *pos;
+
+ head = pos = buf;
+ if (server) {
+ if (!quic_transport_param_enc_mem(&pos, end,
+ QUIC_TP_ORIGINAL_DESTINATION_CONNECTION_ID,
+ p->original_destination_connection_id.data,
+ p->original_destination_connection_id.len))
+ return 0;
+
+ if (p->retry_source_connection_id.len) {
+ if (!quic_transport_param_enc_mem(&pos, end,
+ QUIC_TP_RETRY_SOURCE_CONNECTION_ID,
+ p->retry_source_connection_id.data,
+ p->retry_source_connection_id.len))
+ return 0;
+ }
+
+ if (p->with_stateless_reset_token &&
+ !quic_transport_param_enc_mem(&pos, end, QUIC_TP_STATELESS_RESET_TOKEN,
+ p->stateless_reset_token,
+ sizeof p->stateless_reset_token))
+ return 0;
+ if (p->with_preferred_address &&
+ !quic_transport_param_enc_pref_addr(&pos, end, &p->preferred_address))
+ return 0;
+ }
+
+ if (!quic_transport_param_enc_mem(&pos, end,
+ QUIC_TP_INITIAL_SOURCE_CONNECTION_ID,
+ p->initial_source_connection_id.data,
+ p->initial_source_connection_id.len))
+ return 0;
+
+ if (p->max_idle_timeout &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_MAX_IDLE_TIMEOUT, p->max_idle_timeout))
+ return 0;
+
+ /*
+ * "max_packet_size" transport parameter must be transmitted only if different
+ * of the default value.
+ */
+ if (p->max_udp_payload_size != QUIC_TP_DFLT_MAX_UDP_PAYLOAD_SIZE &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_MAX_UDP_PAYLOAD_SIZE, p->max_udp_payload_size))
+ return 0;
+
+ if (p->initial_max_data &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_INITIAL_MAX_DATA, p->initial_max_data))
+ return 0;
+
+ if (p->initial_max_stream_data_bidi_local &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL,
+ p->initial_max_stream_data_bidi_local))
+ return 0;
+
+ if (p->initial_max_stream_data_bidi_remote &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE,
+ p->initial_max_stream_data_bidi_remote))
+ return 0;
+
+ if (p->initial_max_stream_data_uni &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_INITIAL_MAX_STREAM_DATA_UNI,
+ p->initial_max_stream_data_uni))
+ return 0;
+
+ if (p->initial_max_streams_bidi &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_INITIAL_MAX_STREAMS_BIDI,
+ p->initial_max_streams_bidi))
+ return 0;
+
+ if (p->initial_max_streams_uni &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_INITIAL_MAX_STREAMS_UNI,
+ p->initial_max_streams_uni))
+ return 0;
+
+ /*
+ * "ack_delay_exponent" transport parameter must be transmitted only if different
+ * of the default value.
+ */
+ if (p->ack_delay_exponent != QUIC_TP_DFLT_ACK_DELAY_COMPONENT &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_ACK_DELAY_EXPONENT, p->ack_delay_exponent))
+ return 0;
+
+ /*
+ * "max_ack_delay" transport parameter must be transmitted only if different
+ * of the default value.
+ */
+ if (p->max_ack_delay != QUIC_TP_DFLT_MAX_ACK_DELAY &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_MAX_ACK_DELAY, p->max_ack_delay))
+ return 0;
+
+ /* 0-length value */
+ if (p->disable_active_migration &&
+ !quic_transport_param_encode_type_len(&pos, end, QUIC_TP_DISABLE_ACTIVE_MIGRATION, 0))
+ return 0;
+
+ if (p->active_connection_id_limit &&
+ p->active_connection_id_limit != QUIC_TP_DFLT_ACTIVE_CONNECTION_ID_LIMIT &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_ACTIVE_CONNECTION_ID_LIMIT,
+ p->active_connection_id_limit))
+ return 0;
+
+ if (!quic_transport_param_enc_version_info(&pos, end, choosen_version, server))
+ return 0;
+
+ return pos - head;
+}
+
+/* Decode transport parameters found in <buf> buffer into <p>, depending on
+ * <server> boolean value which must be set to 1 for a server (haproxy listener)
+ * or 0 for a client (connection to a haproxy server).
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_transport_params_decode(struct quic_transport_params *p, int server,
+ const unsigned char *buf,
+ const unsigned char *end)
+{
+ const unsigned char *pos;
+
+ pos = buf;
+
+ while (pos != end) {
+ uint64_t type, len;
+
+ if (!quic_transport_param_decode_type_len(&type, &len, &pos, end))
+ return 0;
+
+ if (end - pos < len)
+ return 0;
+
+ if (!quic_transport_param_decode(p, server, type, &pos, len))
+ return 0;
+ }
+
+ /*
+ * A server MUST send original_destination_connection_id transport parameter.
+ * initial_source_connection_id must be present both for server and client.
+ */
+ if ((server && !p->original_destination_connection_id_present) ||
+ !p->initial_source_connection_id_present)
+ return 0;
+
+ return 1;
+}
+
+/* Store transport parameters found in <buf> buffer into <qc> QUIC connection
+ * depending on <server> value which must be 1 for a server (haproxy listener)
+ * or 0 for a client (connection to a haproxy server).
+ * Note that peer transport parameters are stored in the TX part of the connection:
+ * they are used to send packets to the peer with its transport parameters as
+ * limitations.
+ * Returns 1 if succeeded, 0 if not.
+ */
+int quic_transport_params_store(struct quic_conn *qc, int server,
+ const unsigned char *buf,
+ const unsigned char *end)
+{
+ struct quic_transport_params *tx_params = &qc->tx.params;
+ struct quic_transport_params *rx_params = &qc->rx.params;
+ /* Initial source connection ID */
+ struct tp_cid *iscid;
+
+ /* initialize peer TPs to RFC default value */
+ quic_dflt_transport_params_cpy(tx_params);
+
+ if (!quic_transport_params_decode(tx_params, server, buf, end))
+ return 0;
+
+ /* Update the connection from transport parameters received */
+ if (tx_params->version_information.negotiated_version &&
+ tx_params->version_information.negotiated_version != qc->original_version)
+ qc->negotiated_version =
+ qc->tx.params.version_information.negotiated_version;
+
+ if (tx_params->max_ack_delay)
+ qc->max_ack_delay = tx_params->max_ack_delay;
+
+ if (tx_params->max_idle_timeout && rx_params->max_idle_timeout)
+ qc->max_idle_timeout =
+ QUIC_MIN(tx_params->max_idle_timeout, rx_params->max_idle_timeout);
+ else
+ qc->max_idle_timeout =
+ QUIC_MAX(tx_params->max_idle_timeout, rx_params->max_idle_timeout);
+ TRACE_PROTO("\nTX(remote) transp. params.", QUIC_EV_TRANSP_PARAMS, qc, tx_params);
+
+ /* Check that the "initial_source_connection_id" transport parameter matches
+ * the SCID received which is also the DCID of the connection.
+ */
+ iscid = &tx_params->initial_source_connection_id;
+ if (qc->dcid.len != iscid->len ||
+ (qc->dcid.len && memcmp(qc->dcid.data, iscid->data, qc->dcid.len))) {
+ TRACE_PROTO("initial_source_connection_id transport parameter mismatch",
+ QUIC_EV_TRANSP_PARAMS, qc);
+ /* Kill the connection as soon as possible */
+ qc_kill_conn(qc);
+ }
+
+ return 1;
+}
+
+/* QUIC server (or haproxy listener) only function.
+ * Initialize the local transport parameters <rx_params> from <listener_params>
+ * coming from configuration and Initial packet information (destintation
+ * connection ID, source connection ID, original destination connection ID from
+ * client token.
+ * Returns 1 if succeeded, 0 if not.
+ */
+int qc_lstnr_params_init(struct quic_conn *qc,
+ const struct quic_transport_params *listener_params,
+ const unsigned char *stateless_reset_token,
+ const unsigned char *dcid, size_t dcidlen,
+ const unsigned char *scid, size_t scidlen,
+ const struct quic_cid *token_odcid)
+{
+ struct quic_transport_params *rx_params = &qc->rx.params;
+ struct tp_cid *odcid_param = &rx_params->original_destination_connection_id;
+
+ /* Copy the transport parameters. */
+ *rx_params = *listener_params;
+ /* Copy the stateless reset token */
+ memcpy(rx_params->stateless_reset_token, stateless_reset_token,
+ sizeof rx_params->stateless_reset_token);
+ /* Copy original_destination_connection_id transport parameter. */
+ if (token_odcid->len) {
+ memcpy(odcid_param->data, token_odcid->data, token_odcid->len);
+ odcid_param->len = token_odcid->len;
+ /* Copy retry_source_connection_id transport parameter. */
+ memcpy(rx_params->retry_source_connection_id.data, dcid, dcidlen);
+ rx_params->retry_source_connection_id.len = dcidlen;
+ }
+ else {
+ memcpy(odcid_param->data, dcid, dcidlen);
+ odcid_param->len = dcidlen;
+ }
+
+ /* Copy the initial source connection ID. */
+ memcpy(rx_params->initial_source_connection_id.data, scid, scidlen);
+ rx_params->initial_source_connection_id.len = scidlen;
+ TRACE_PROTO("\nRX(local) transp. params.", QUIC_EV_TRANSP_PARAMS, qc, rx_params);
+
+ return 1;
+}
+
diff --git a/src/raw_sock.c b/src/raw_sock.c
new file mode 100644
index 0000000..ae85c75
--- /dev/null
+++ b/src/raw_sock.c
@@ -0,0 +1,489 @@
+/*
+ * RAW transport layer over SOCK_STREAM sockets.
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <netinet/tcp.h>
+
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/global.h>
+#include <haproxy/pipe.h>
+#include <haproxy/tools.h>
+
+
+#if defined(USE_LINUX_SPLICE)
+
+/* A pipe contains 16 segments max, and it's common to see segments of 1448 bytes
+ * because of timestamps. Use this as a hint for not looping on splice().
+ */
+#define SPLICE_FULL_HINT 16*1448
+
+/* how many data we attempt to splice at once when the buffer is configured for
+ * infinite forwarding */
+#define MAX_SPLICE_AT_ONCE (1<<30)
+
+/* Returns :
+ * -1 if splice() is not supported
+ * >= 0 to report the amount of spliced bytes.
+ * connection flags are updated (error, read0, wait_room, wait_data).
+ * The caller must have previously allocated the pipe.
+ */
+int raw_sock_to_pipe(struct connection *conn, void *xprt_ctx, struct pipe *pipe, unsigned int count)
+{
+ int ret;
+ int retval = 0;
+
+
+ if (!conn_ctrl_ready(conn))
+ return 0;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_recv_ready(conn->handle.fd))
+ return 0;
+
+ conn->flags &= ~CO_FL_WAIT_ROOM;
+ errno = 0;
+
+ /* Under Linux, if FD_POLL_HUP is set, we have reached the end.
+ * Since older splice() implementations were buggy and returned
+ * EAGAIN on end of read, let's bypass the call to splice() now.
+ */
+ if (unlikely(!(fdtab[conn->handle.fd].state & FD_POLL_IN))) {
+ /* stop here if we reached the end of data */
+ if ((fdtab[conn->handle.fd].state & (FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_HUP)
+ goto out_read0;
+
+ /* report error on POLL_ERR before connection establishment */
+ if ((fdtab[conn->handle.fd].state & FD_POLL_ERR) && (conn->flags & CO_FL_WAIT_L4_CONN)) {
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ errno = 0; /* let the caller do a getsockopt() if it wants it */
+ goto leave;
+ }
+ }
+
+ while (count) {
+ if (count > MAX_SPLICE_AT_ONCE)
+ count = MAX_SPLICE_AT_ONCE;
+
+ ret = splice(conn->handle.fd, NULL, pipe->prod, NULL, count,
+ SPLICE_F_MOVE|SPLICE_F_NONBLOCK);
+
+ if (ret <= 0) {
+ if (ret == 0)
+ goto out_read0;
+
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ /* there are two reasons for EAGAIN :
+ * - nothing in the socket buffer (standard)
+ * - pipe is full
+ * The difference between these two situations
+ * is problematic. Since we don't know if the
+ * pipe is full, we'll stop if the pipe is not
+ * empty. Anyway, we will almost always fill or
+ * empty the pipe.
+ */
+ if (pipe->data) {
+ /* always stop reading until the pipe is flushed */
+ conn->flags |= CO_FL_WAIT_ROOM;
+ break;
+ }
+ /* socket buffer exhausted */
+ fd_cant_recv(conn->handle.fd);
+ break;
+ }
+ else if (errno == ENOSYS || errno == EINVAL || errno == EBADF) {
+ /* splice not supported on this end, disable it.
+ * We can safely return -1 since there is no
+ * chance that any data has been piped yet.
+ */
+ retval = -1;
+ goto leave;
+ }
+ else if (errno == EINTR) {
+ /* try again */
+ continue;
+ }
+ /* here we have another error */
+ conn->flags |= CO_FL_ERROR;
+ break;
+ } /* ret <= 0 */
+
+ retval += ret;
+ pipe->data += ret;
+ count -= ret;
+
+ if (pipe->data >= SPLICE_FULL_HINT || ret >= global.tune.recv_enough) {
+ /* We've read enough of it for this time, let's stop before
+ * being asked to poll.
+ */
+ conn->flags |= CO_FL_WAIT_ROOM;
+ break;
+ }
+ } /* while */
+
+ if (unlikely(conn->flags & CO_FL_WAIT_L4_CONN) && retval)
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ leave:
+ if (retval > 0) {
+ /* we count the total bytes sent, and the send rate for 32-byte
+ * blocks. The reason for the latter is that freq_ctr are
+ * limited to 4GB and that it's not enough per second.
+ */
+ _HA_ATOMIC_ADD(&global.out_bytes, retval);
+ _HA_ATOMIC_ADD(&global.spliced_out_bytes, retval);
+ update_freq_ctr(&global.out_32bps, (retval + 16) / 32);
+ }
+ return retval;
+
+ out_read0:
+ conn_sock_read0(conn);
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ goto leave;
+}
+
+/* Send as many bytes as possible from the pipe to the connection's socket.
+ */
+int raw_sock_from_pipe(struct connection *conn, void *xprt_ctx, struct pipe *pipe)
+{
+ int ret, done;
+
+ if (!conn_ctrl_ready(conn))
+ return 0;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_send_ready(conn->handle.fd))
+ return 0;
+
+ if (conn->flags & CO_FL_SOCK_WR_SH) {
+ /* it's already closed */
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH;
+ errno = EPIPE;
+ return 0;
+ }
+
+ done = 0;
+ while (pipe->data) {
+ ret = splice(pipe->cons, NULL, conn->handle.fd, NULL, pipe->data,
+ SPLICE_F_MOVE|SPLICE_F_NONBLOCK);
+
+ if (ret <= 0) {
+ if (ret == 0 || errno == EAGAIN || errno == EWOULDBLOCK) {
+ fd_cant_send(conn->handle.fd);
+ break;
+ }
+ else if (errno == EINTR)
+ continue;
+
+ /* here we have another error */
+ conn->flags |= CO_FL_ERROR;
+ break;
+ }
+
+ done += ret;
+ pipe->data -= ret;
+ }
+ if (unlikely(conn->flags & CO_FL_WAIT_L4_CONN) && done) {
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+
+ return done;
+}
+
+#endif /* USE_LINUX_SPLICE */
+
+
+/* Receive up to <count> bytes from connection <conn>'s socket and store them
+ * into buffer <buf>. Only one call to recv() is performed, unless the
+ * buffer wraps, in which case a second call may be performed. The connection's
+ * flags are updated with whatever special event is detected (error, read0,
+ * empty). The caller is responsible for taking care of those events and
+ * avoiding the call if inappropriate. The function does not call the
+ * connection's polling update function, so the caller is responsible for this.
+ * errno is cleared before starting so that the caller knows that if it spots an
+ * error without errno, it's pending and can be retrieved via getsockopt(SO_ERROR).
+ */
+static size_t raw_sock_to_buf(struct connection *conn, void *xprt_ctx, struct buffer *buf, size_t count, int flags)
+{
+ ssize_t ret;
+ size_t try, done = 0;
+
+ if (!conn_ctrl_ready(conn))
+ return 0;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_recv_ready(conn->handle.fd))
+ return 0;
+
+ conn->flags &= ~CO_FL_WAIT_ROOM;
+ errno = 0;
+
+ if (unlikely(!(fdtab[conn->handle.fd].state & FD_POLL_IN))) {
+ /* stop here if we reached the end of data */
+ if ((fdtab[conn->handle.fd].state & (FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_HUP)
+ goto read0;
+
+ /* report error on POLL_ERR before connection establishment */
+ if ((fdtab[conn->handle.fd].state & FD_POLL_ERR) && (conn->flags & CO_FL_WAIT_L4_CONN)) {
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ goto leave;
+ }
+ }
+
+ /* read the largest possible block. For this, we perform only one call
+ * to recv() unless the buffer wraps and we exactly fill the first hunk,
+ * in which case we accept to do it once again. A new attempt is made on
+ * EINTR too.
+ */
+ while (count > 0) {
+ try = b_contig_space(buf);
+ if (!try)
+ break;
+
+ if (try > count)
+ try = count;
+
+ ret = recv(conn->handle.fd, b_tail(buf), try, 0);
+
+ if (ret > 0) {
+ b_add(buf, ret);
+ done += ret;
+ if (ret < try) {
+ /* socket buffer exhausted */
+ fd_cant_recv(conn->handle.fd);
+
+ /* unfortunately, on level-triggered events, POLL_HUP
+ * is generally delivered AFTER the system buffer is
+ * empty, unless the poller supports POLL_RDHUP. If
+ * we know this is the case, we don't try to read more
+ * as we know there's no more available. Similarly, if
+ * there's no problem with lingering we don't even try
+ * to read an unlikely close from the client since we'll
+ * close first anyway.
+ */
+ if (fdtab[conn->handle.fd].state & FD_POLL_HUP)
+ goto read0;
+
+ if (!(fdtab[conn->handle.fd].state & FD_LINGER_RISK) ||
+ (cur_poller.flags & HAP_POLL_F_RDHUP)) {
+ break;
+ }
+ }
+ count -= ret;
+
+ if (flags & CO_RFL_READ_ONCE)
+ break;
+ }
+ else if (ret == 0) {
+ goto read0;
+ }
+ else if (errno == EAGAIN || errno == EWOULDBLOCK || errno == ENOTCONN) {
+ /* socket buffer exhausted */
+ fd_cant_recv(conn->handle.fd);
+ break;
+ }
+ else if (errno != EINTR) {
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ break;
+ }
+ }
+
+ if (unlikely(conn->flags & CO_FL_WAIT_L4_CONN) && done)
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ leave:
+ return done;
+
+ read0:
+ conn_sock_read0(conn);
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ /* Now a final check for a possible asynchronous low-level error
+ * report. This can happen when a connection receives a reset
+ * after a shutdown, both POLL_HUP and POLL_ERR are queued, and
+ * we might have come from there by just checking POLL_HUP instead
+ * of recv()'s return value 0, so we have no way to tell there was
+ * an error without checking.
+ */
+ if (unlikely(!done && fdtab[conn->handle.fd].state & FD_POLL_ERR))
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ goto leave;
+}
+
+
+/* Send up to <count> pending bytes from buffer <buf> to connection <conn>'s
+ * socket. <flags> may contain some CO_SFL_* flags to hint the system about
+ * other pending data for example, but this flag is ignored at the moment.
+ * Only one call to send() is performed, unless the buffer wraps, in which case
+ * a second call may be performed. The connection's flags are updated with
+ * whatever special event is detected (error, empty). The caller is responsible
+ * for taking care of those events and avoiding the call if inappropriate. The
+ * function does not call the connection's polling update function, so the caller
+ * is responsible for this. It's up to the caller to update the buffer's contents
+ * based on the return value.
+ */
+static size_t raw_sock_from_buf(struct connection *conn, void *xprt_ctx, const struct buffer *buf, size_t count, int flags)
+{
+ ssize_t ret;
+ size_t try, done;
+ int send_flag;
+
+ if (!conn_ctrl_ready(conn))
+ return 0;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_send_ready(conn->handle.fd))
+ return 0;
+
+ if (conn->flags & CO_FL_SOCK_WR_SH) {
+ /* it's already closed */
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH;
+ errno = EPIPE;
+ return 0;
+ }
+
+ done = 0;
+ /* send the largest possible block. For this we perform only one call
+ * to send() unless the buffer wraps and we exactly fill the first hunk,
+ * in which case we accept to do it once again.
+ */
+ while (count) {
+ try = b_contig_data(buf, done);
+ if (try > count)
+ try = count;
+
+ send_flag = MSG_DONTWAIT | MSG_NOSIGNAL;
+ if (try < count || flags & CO_SFL_MSG_MORE)
+ send_flag |= MSG_MORE;
+
+ ret = send(conn->handle.fd, b_peek(buf, done), try, send_flag);
+
+ if (ret > 0) {
+ count -= ret;
+ done += ret;
+
+ /* if the system buffer is full, don't insist */
+ if (ret < try) {
+ fd_cant_send(conn->handle.fd);
+ break;
+ }
+ if (!count)
+ fd_stop_send(conn->handle.fd);
+ }
+ else if (ret == 0 || errno == EAGAIN || errno == EWOULDBLOCK || errno == ENOTCONN || errno == EINPROGRESS) {
+ /* nothing written, we need to poll for write first */
+ fd_cant_send(conn->handle.fd);
+ break;
+ }
+ else if (errno != EINTR) {
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ break;
+ }
+ }
+ if (unlikely(conn->flags & CO_FL_WAIT_L4_CONN) && done) {
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+
+ if (done > 0) {
+ /* we count the total bytes sent, and the send rate for 32-byte
+ * blocks. The reason for the latter is that freq_ctr are
+ * limited to 4GB and that it's not enough per second.
+ */
+ _HA_ATOMIC_ADD(&global.out_bytes, done);
+ update_freq_ctr(&global.out_32bps, (done + 16) / 32);
+ }
+ return done;
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+static int raw_sock_subscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ return conn_subscribe(conn, xprt_ctx, event_type, es);
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int raw_sock_unsubscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ return conn_unsubscribe(conn, xprt_ctx, event_type, es);
+}
+
+static void raw_sock_close(struct connection *conn, void *xprt_ctx)
+{
+ if (conn->subs != NULL) {
+ conn_unsubscribe(conn, NULL, conn->subs->events, conn->subs);
+ }
+}
+
+/* We can't have an underlying XPRT, so just return -1 to signify failure */
+static int raw_sock_remove_xprt(struct connection *conn, void *xprt_ctx, void *toremove_ctx, const struct xprt_ops *newops, void *newctx)
+{
+ /* This is the lowest xprt we can have, so if we get there we didn't
+ * find the xprt we wanted to remove, that's a bug
+ */
+ BUG_ON(1);
+ return -1;
+}
+
+/* transport-layer operations for RAW sockets */
+static struct xprt_ops raw_sock = {
+ .snd_buf = raw_sock_from_buf,
+ .rcv_buf = raw_sock_to_buf,
+ .subscribe = raw_sock_subscribe,
+ .unsubscribe = raw_sock_unsubscribe,
+ .remove_xprt = raw_sock_remove_xprt,
+#if defined(USE_LINUX_SPLICE)
+ .rcv_pipe = raw_sock_to_pipe,
+ .snd_pipe = raw_sock_from_pipe,
+#endif
+ .shutr = NULL,
+ .shutw = NULL,
+ .close = raw_sock_close,
+ .name = "RAW",
+};
+
+
+static void __raw_sock_init(void)
+{
+ xprt_register(XPRT_RAW, &raw_sock);
+}
+
+INITCALL0(STG_REGISTER, __raw_sock_init);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/regex.c b/src/regex.c
new file mode 100644
index 0000000..19c7eda
--- /dev/null
+++ b/src/regex.c
@@ -0,0 +1,459 @@
+/*
+ * Regex and string management functions.
+ *
+ * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/regex.h>
+#include <haproxy/tools.h>
+
+/* regex trash buffer used by various regex tests */
+THREAD_LOCAL regmatch_t pmatch[MAX_MATCH]; /* rm_so, rm_eo for regular expressions */
+
+int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches)
+{
+ char *old_dst = dst;
+ char* dst_end = dst + dst_size;
+
+ while (*str) {
+ if (*str == '\\') {
+ str++;
+ if (!*str)
+ return -1;
+
+ if (isdigit((unsigned char)*str)) {
+ int len, num;
+
+ num = *str - '0';
+ str++;
+
+ if (matches[num].rm_eo > -1 && matches[num].rm_so > -1) {
+ len = matches[num].rm_eo - matches[num].rm_so;
+
+ if (dst + len >= dst_end)
+ return -1;
+
+ memcpy(dst, src + matches[num].rm_so, len);
+ dst += len;
+ }
+
+ } else if (*str == 'x') {
+ unsigned char hex1, hex2;
+ str++;
+
+ if (!*str)
+ return -1;
+
+ hex1 = toupper((unsigned char)*str++) - '0';
+
+ if (!*str)
+ return -1;
+
+ hex2 = toupper((unsigned char)*str++) - '0';
+
+ if (hex1 > 9) hex1 -= 'A' - '9' - 1;
+ if (hex2 > 9) hex2 -= 'A' - '9' - 1;
+
+ if (dst >= dst_end)
+ return -1;
+
+ *dst++ = (hex1<<4) + hex2;
+ } else {
+ if (dst >= dst_end)
+ return -1;
+
+ *dst++ = *str++;
+ }
+ } else {
+ if (dst >= dst_end)
+ return -1;
+
+ *dst++ = *str++;
+ }
+ }
+ if (dst >= dst_end)
+ return -1;
+
+ *dst = '\0';
+ return dst - old_dst;
+}
+
+/* returns NULL if the replacement string <str> is valid, or the pointer to the first error */
+const char *check_replace_string(const char *str)
+{
+ const char *err = NULL;
+ while (*str) {
+ if (*str == '\\') {
+ err = str; /* in case of a backslash, we return the pointer to it */
+ str++;
+ if (!*str)
+ return err;
+ else if (isdigit((unsigned char)*str))
+ err = NULL;
+ else if (*str == 'x') {
+ str++;
+ if (!ishex(*str))
+ return err;
+ str++;
+ if (!ishex(*str))
+ return err;
+ err = NULL;
+ }
+ else {
+ ha_warning("'\\%c' : deprecated use of a backslash before something not '\\','x' or a digit.\n", *str);
+ err = NULL;
+ }
+ }
+ str++;
+ }
+ return err;
+}
+
+
+/* This function apply regex. It take const null terminated char as input.
+ * If the function doesn't match, it returns false, else it returns true.
+ * When it is compiled with JIT, this function execute strlen on the subject.
+ * Currently the only supported flag is REG_NOTBOL.
+ */
+int regex_exec_match(const struct my_regex *preg, const char *subject,
+ size_t nmatch, regmatch_t pmatch[], int flags) {
+#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
+ int ret;
+#ifdef USE_PCRE2
+ PCRE2_SIZE *matches;
+ pcre2_match_data *pm;
+#else
+ int matches[MAX_MATCH * 3];
+#endif
+ int enmatch;
+ int i;
+ int options;
+
+ /* Silently limit the number of allowed matches. max
+ * match i the maximum value for match, in fact this
+ * limit is not applied.
+ */
+
+ enmatch = nmatch;
+ if (enmatch > MAX_MATCH)
+ enmatch = MAX_MATCH;
+
+ options = 0;
+ if (flags & REG_NOTBOL)
+#ifdef USE_PCRE2
+ options |= PCRE2_NOTBOL;
+#else
+ options |= PCRE_NOTBOL;
+#endif
+
+ /* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
+ * pair that has been set. For example, if two substrings have been captured,
+ * the returned value is 3. If there are no capturing subpatterns, the return
+ * value from a successful match is 1, indicating that just the first pair of
+ * offsets has been set.
+ *
+ * It seems that this function returns 0 if it detects more matches than available
+ * space in the matches array.
+ */
+#ifdef USE_PCRE2
+ pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+ ret = preg->mfn(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), 0, options, pm, NULL);
+
+ if (ret < 0) {
+ pcre2_match_data_free(pm);
+ return 0;
+ }
+
+ matches = pcre2_get_ovector_pointer(pm);
+#else
+ ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3);
+
+ if (ret < 0)
+ return 0;
+#endif
+
+ if (ret == 0)
+ ret = enmatch;
+
+ for (i=0; i<nmatch; i++) {
+ /* Copy offset. */
+ if (i < ret) {
+ pmatch[i].rm_so = matches[(i*2)];
+ pmatch[i].rm_eo = matches[(i*2)+1];
+ continue;
+ }
+ /* Set the unmatvh flag (-1). */
+ pmatch[i].rm_so = -1;
+ pmatch[i].rm_eo = -1;
+ }
+#ifdef USE_PCRE2
+ pcre2_match_data_free(pm);
+#endif
+ return 1;
+#else
+ int match;
+
+ flags &= REG_NOTBOL;
+ match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
+ if (match == REG_NOMATCH)
+ return 0;
+ return 1;
+#endif
+}
+
+/* This function apply regex. It take a "char *" ans length as input. The
+ * <subject> can be modified during the processing. If the function doesn't
+ * match, it returns false, else it returns true.
+ * When it is compiled with standard POSIX regex or PCRE, this function add
+ * a temporary null characters at the end of the <subject>. The <subject> must
+ * have a real length of <length> + 1. Currently the only supported flag is
+ * REG_NOTBOL.
+ */
+int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
+ size_t nmatch, regmatch_t pmatch[], int flags) {
+#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
+ int ret;
+#ifdef USE_PCRE2
+ PCRE2_SIZE *matches;
+ pcre2_match_data *pm;
+#else
+ int matches[MAX_MATCH * 3];
+#endif
+ int enmatch;
+ int i;
+ int options;
+
+ /* Silently limit the number of allowed matches. max
+ * match i the maximum value for match, in fact this
+ * limit is not applied.
+ */
+ enmatch = nmatch;
+ if (enmatch > MAX_MATCH)
+ enmatch = MAX_MATCH;
+
+ options = 0;
+ if (flags & REG_NOTBOL)
+#ifdef USE_PCRE2
+ options |= PCRE2_NOTBOL;
+#else
+ options |= PCRE_NOTBOL;
+#endif
+
+ /* The value returned by pcre_exec()/pcre2_(jit)_match() is one more than the highest numbered
+ * pair that has been set. For example, if two substrings have been captured,
+ * the returned value is 3. If there are no capturing subpatterns, the return
+ * value from a successful match is 1, indicating that just the first pair of
+ * offsets has been set.
+ *
+ * It seems that this function returns 0 if it detects more matches than available
+ * space in the matches array.
+ */
+#ifdef USE_PCRE2
+ pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+ ret = preg->mfn(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, 0, options, pm, NULL);
+
+ if (ret < 0) {
+ pcre2_match_data_free(pm);
+ return 0;
+ }
+
+ matches = pcre2_get_ovector_pointer(pm);
+#else
+ ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3);
+ if (ret < 0)
+ return 0;
+#endif
+
+ if (ret == 0)
+ ret = enmatch;
+
+ for (i=0; i<nmatch; i++) {
+ /* Copy offset. */
+ if (i < ret) {
+ pmatch[i].rm_so = matches[(i*2)];
+ pmatch[i].rm_eo = matches[(i*2)+1];
+ continue;
+ }
+ /* Set the unmatvh flag (-1). */
+ pmatch[i].rm_so = -1;
+ pmatch[i].rm_eo = -1;
+ }
+#ifdef USE_PCRE2
+ pcre2_match_data_free(pm);
+#endif
+ return 1;
+#else
+ char old_char = subject[length];
+ int match;
+
+ flags &= REG_NOTBOL;
+ subject[length] = 0;
+ match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
+ subject[length] = old_char;
+ if (match == REG_NOMATCH)
+ return 0;
+ return 1;
+#endif
+}
+
+struct my_regex *regex_comp(const char *str, int cs, int cap, char **err)
+{
+ struct my_regex *regex = NULL;
+#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
+ int flags = 0;
+ const char *error;
+ int erroffset;
+#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
+ int flags = 0;
+ int errn;
+#if defined(USE_PCRE2_JIT)
+ int jit;
+#endif
+ PCRE2_UCHAR error[256];
+ PCRE2_SIZE erroffset;
+#else
+ int flags = REG_EXTENDED;
+#endif
+
+ regex = calloc(1, sizeof(*regex));
+ if (!regex) {
+ memprintf(err, "not enough memory to build regex");
+ goto out_fail_alloc;
+ }
+
+#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
+ if (!cs)
+ flags |= PCRE_CASELESS;
+ if (!cap)
+ flags |= PCRE_NO_AUTO_CAPTURE;
+
+ regex->reg = pcre_compile(str, flags, &error, &erroffset, NULL);
+ if (!regex->reg) {
+ memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%d)", str, error, erroffset);
+ goto out_fail_alloc;
+ }
+
+ regex->extra = pcre_study(regex->reg, PCRE_STUDY_JIT_COMPILE, &error);
+ if (!regex->extra && error != NULL) {
+ pcre_free(regex->reg);
+ memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
+ goto out_fail_alloc;
+ }
+#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
+ if (!cs)
+ flags |= PCRE2_CASELESS;
+ if (!cap)
+ flags |= PCRE2_NO_AUTO_CAPTURE;
+
+ regex->reg = pcre2_compile((PCRE2_SPTR)str, PCRE2_ZERO_TERMINATED, flags, &errn, &erroffset, NULL);
+ if (!regex->reg) {
+ pcre2_get_error_message(errn, error, sizeof(error));
+ memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%zu)", str, error, erroffset);
+ goto out_fail_alloc;
+ }
+
+ regex->mfn = &pcre2_match;
+#if defined(USE_PCRE2_JIT)
+ jit = pcre2_jit_compile(regex->reg, PCRE2_JIT_COMPLETE);
+ /*
+ * We end if it is an error not related to lack of JIT support
+ * in a case of JIT support missing pcre2_jit_compile is "no-op"
+ */
+ if (!jit)
+ regex->mfn = &pcre2_jit_match;
+ else {
+ if (jit != PCRE2_ERROR_JIT_BADOPTION) {
+ pcre2_code_free(regex->reg);
+ memprintf(err, "regex '%s' jit compilation failed", str);
+ goto out_fail_alloc;
+ }
+ else
+ regex->mfn = &pcre2_match;
+ }
+#endif
+
+#else
+ if (!cs)
+ flags |= REG_ICASE;
+ if (!cap)
+ flags |= REG_NOSUB;
+
+ if (regcomp(&regex->regex, str, flags) != 0) {
+ memprintf(err, "regex '%s' is invalid", str);
+ goto out_fail_alloc;
+ }
+#endif
+ return regex;
+
+ out_fail_alloc:
+ free(regex);
+ return NULL;
+}
+
+static void regex_register_build_options(void)
+{
+ char *ptr = NULL;
+
+#ifdef USE_PCRE
+ memprintf(&ptr, "Built with PCRE version : %s", (HAP_XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
+ HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
+ HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR) HAP_XSTRING(PCRE_PRERELEASE PCRE_DATE));
+ memprintf(&ptr, "%s\nRunning on PCRE version : %s", ptr, pcre_version());
+
+ memprintf(&ptr, "%s\nPCRE library supports JIT : %s", ptr,
+#ifdef USE_PCRE_JIT
+ ({
+ int r;
+ pcre_config(PCRE_CONFIG_JIT, &r);
+ r ? "yes" : "no (libpcre build without JIT?)";
+ })
+#else
+ "no (USE_PCRE_JIT not set)"
+#endif
+ );
+#endif /* USE_PCRE */
+
+#ifdef USE_PCRE2
+ memprintf(&ptr, "Built with PCRE2 version : %s", (HAP_XSTRING(Z PCRE2_PRERELEASE)[1] == 0) ?
+ HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
+ HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR) HAP_XSTRING(PCRE2_PRERELEASE PCRE2_DATE));
+ memprintf(&ptr, "%s\nPCRE2 library supports JIT : %s", ptr,
+#ifdef USE_PCRE2_JIT
+ ({
+ int r;
+ pcre2_config(PCRE2_CONFIG_JIT, &r);
+ r ? "yes" : "no (libpcre2 build without JIT?)";
+ })
+#else
+ "no (USE_PCRE2_JIT not set)"
+#endif
+ );
+#endif /* USE_PCRE2 */
+
+#if !defined(USE_PCRE) && !defined(USE_PCRE2)
+ memprintf(&ptr, "Built without PCRE or PCRE2 support (using libc's regex instead)");
+#endif
+ hap_register_build_opts(ptr, 1);
+}
+
+INITCALL0(STG_REGISTER, regex_register_build_options);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/resolvers.c b/src/resolvers.c
new file mode 100644
index 0000000..a814c24
--- /dev/null
+++ b/src/resolvers.c
@@ -0,0 +1,3801 @@
+/*
+ * Name server resolution
+ *
+ * Copyright 2014 Baptiste Assmann <bedis9@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/types.h>
+
+#include <import/ebistree.h>
+
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/cli.h>
+#include <haproxy/dns.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/log.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/ring.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/stats.h>
+#include <haproxy/stconn.h>
+#include <haproxy/task.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/ticks.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/vars.h>
+#include <haproxy/xxhash.h>
+
+
+struct list sec_resolvers = LIST_HEAD_INIT(sec_resolvers);
+struct list resolv_srvrq_list = LIST_HEAD_INIT(resolv_srvrq_list);
+
+static THREAD_LOCAL struct list death_row; /* list of deferred resolutions to kill, local validity only */
+static THREAD_LOCAL unsigned int recurse = 0; /* counter to track calls to public functions */
+static THREAD_LOCAL uint64_t resolv_query_id_seed = 0; /* random seed */
+struct resolvers *curr_resolvers = NULL;
+
+DECLARE_STATIC_POOL(resolv_answer_item_pool, "resolv_answer_item", sizeof(struct resolv_answer_item));
+DECLARE_STATIC_POOL(resolv_resolution_pool, "resolv_resolution", sizeof(struct resolv_resolution));
+DECLARE_POOL(resolv_requester_pool, "resolv_requester", sizeof(struct resolv_requester));
+
+static unsigned int resolution_uuid = 1;
+unsigned int resolv_failed_resolutions = 0;
+static struct task *process_resolvers(struct task *t, void *context, unsigned int state);
+static void resolv_free_resolution(struct resolv_resolution *resolution);
+static void _resolv_unlink_resolution(struct resolv_requester *requester);
+static void enter_resolver_code();
+static void leave_resolver_code();
+
+enum {
+ RSLV_STAT_ID,
+ RSLV_STAT_PID,
+ RSLV_STAT_SENT,
+ RSLV_STAT_SND_ERROR,
+ RSLV_STAT_VALID,
+ RSLV_STAT_UPDATE,
+ RSLV_STAT_CNAME,
+ RSLV_STAT_CNAME_ERROR,
+ RSLV_STAT_ANY_ERR,
+ RSLV_STAT_NX,
+ RSLV_STAT_TIMEOUT,
+ RSLV_STAT_REFUSED,
+ RSLV_STAT_OTHER,
+ RSLV_STAT_INVALID,
+ RSLV_STAT_TOO_BIG,
+ RSLV_STAT_TRUNCATED,
+ RSLV_STAT_OUTDATED,
+ RSLV_STAT_END,
+};
+
+static struct name_desc resolv_stats[] = {
+ [RSLV_STAT_ID] = { .name = "id", .desc = "ID" },
+ [RSLV_STAT_PID] = { .name = "pid", .desc = "Parent ID" },
+ [RSLV_STAT_SENT] = { .name = "sent", .desc = "Sent" },
+ [RSLV_STAT_SND_ERROR] = { .name = "send_error", .desc = "Send error" },
+ [RSLV_STAT_VALID] = { .name = "valid", .desc = "Valid" },
+ [RSLV_STAT_UPDATE] = { .name = "update", .desc = "Update" },
+ [RSLV_STAT_CNAME] = { .name = "cname", .desc = "CNAME" },
+ [RSLV_STAT_CNAME_ERROR] = { .name = "cname_error", .desc = "CNAME error" },
+ [RSLV_STAT_ANY_ERR] = { .name = "any_err", .desc = "Any errors" },
+ [RSLV_STAT_NX] = { .name = "nx", .desc = "NX" },
+ [RSLV_STAT_TIMEOUT] = { .name = "timeout", .desc = "Timeout" },
+ [RSLV_STAT_REFUSED] = { .name = "refused", .desc = "Refused" },
+ [RSLV_STAT_OTHER] = { .name = "other", .desc = "Other" },
+ [RSLV_STAT_INVALID] = { .name = "invalid", .desc = "Invalid" },
+ [RSLV_STAT_TOO_BIG] = { .name = "too_big", .desc = "Too big" },
+ [RSLV_STAT_TRUNCATED] = { .name = "truncated", .desc = "Truncated" },
+ [RSLV_STAT_OUTDATED] = { .name = "outdated", .desc = "Outdated" },
+};
+
+static struct dns_counters dns_counters;
+
+static void resolv_fill_stats(void *d, struct field *stats)
+{
+ struct dns_counters *counters = d;
+ stats[RSLV_STAT_ID] = mkf_str(FO_CONFIG, counters->id);
+ stats[RSLV_STAT_PID] = mkf_str(FO_CONFIG, counters->pid);
+ stats[RSLV_STAT_SENT] = mkf_u64(FN_GAUGE, counters->sent);
+ stats[RSLV_STAT_SND_ERROR] = mkf_u64(FN_GAUGE, counters->snd_error);
+ stats[RSLV_STAT_VALID] = mkf_u64(FN_GAUGE, counters->app.resolver.valid);
+ stats[RSLV_STAT_UPDATE] = mkf_u64(FN_GAUGE, counters->app.resolver.update);
+ stats[RSLV_STAT_CNAME] = mkf_u64(FN_GAUGE, counters->app.resolver.cname);
+ stats[RSLV_STAT_CNAME_ERROR] = mkf_u64(FN_GAUGE, counters->app.resolver.cname_error);
+ stats[RSLV_STAT_ANY_ERR] = mkf_u64(FN_GAUGE, counters->app.resolver.any_err);
+ stats[RSLV_STAT_NX] = mkf_u64(FN_GAUGE, counters->app.resolver.nx);
+ stats[RSLV_STAT_TIMEOUT] = mkf_u64(FN_GAUGE, counters->app.resolver.timeout);
+ stats[RSLV_STAT_REFUSED] = mkf_u64(FN_GAUGE, counters->app.resolver.refused);
+ stats[RSLV_STAT_OTHER] = mkf_u64(FN_GAUGE, counters->app.resolver.other);
+ stats[RSLV_STAT_INVALID] = mkf_u64(FN_GAUGE, counters->app.resolver.invalid);
+ stats[RSLV_STAT_TOO_BIG] = mkf_u64(FN_GAUGE, counters->app.resolver.too_big);
+ stats[RSLV_STAT_TRUNCATED] = mkf_u64(FN_GAUGE, counters->app.resolver.truncated);
+ stats[RSLV_STAT_OUTDATED] = mkf_u64(FN_GAUGE, counters->app.resolver.outdated);
+}
+
+static struct stats_module rslv_stats_module = {
+ .name = "resolvers",
+ .domain_flags = STATS_DOMAIN_RESOLVERS << STATS_DOMAIN,
+ .fill_stats = resolv_fill_stats,
+ .stats = resolv_stats,
+ .stats_count = RSLV_STAT_END,
+ .counters = &dns_counters,
+ .counters_size = sizeof(dns_counters),
+ .clearable = 0,
+};
+
+INITCALL1(STG_REGISTER, stats_register_module, &rslv_stats_module);
+
+/* CLI context used during "show resolvers" */
+struct show_resolvers_ctx {
+ struct resolvers *forced_section;
+ struct resolvers *resolvers;
+ struct dns_nameserver *ns;
+};
+
+/* Returns a pointer to the resolvers matching the id <id>. NULL is returned if
+ * no match is found.
+ */
+struct resolvers *find_resolvers_by_id(const char *id)
+{
+ struct resolvers *res;
+
+ list_for_each_entry(res, &sec_resolvers, list) {
+ if (strcmp(res->id, id) == 0)
+ return res;
+ }
+ return NULL;
+}
+
+/* Returns a pointer on the SRV request matching the name <name> for the proxy
+ * <px>. NULL is returned if no match is found.
+ */
+struct resolv_srvrq *find_srvrq_by_name(const char *name, struct proxy *px)
+{
+ struct resolv_srvrq *srvrq;
+
+ list_for_each_entry(srvrq, &resolv_srvrq_list, list) {
+ if (srvrq->proxy == px && strcmp(srvrq->name, name) == 0)
+ return srvrq;
+ }
+ return NULL;
+}
+
+/* Allocates a new SRVRQ for the given server with the name <fqdn>. It returns
+ * NULL if an error occurred. */
+struct resolv_srvrq *new_resolv_srvrq(struct server *srv, char *fqdn)
+{
+ struct proxy *px = srv->proxy;
+ struct resolv_srvrq *srvrq = NULL;
+ int fqdn_len, hostname_dn_len;
+
+ fqdn_len = strlen(fqdn);
+ hostname_dn_len = resolv_str_to_dn_label(fqdn, fqdn_len, trash.area,
+ trash.size);
+ if (hostname_dn_len == -1) {
+ ha_alert("%s '%s', server '%s': failed to parse FQDN '%s'\n",
+ proxy_type_str(px), px->id, srv->id, fqdn);
+ goto err;
+ }
+
+ if ((srvrq = calloc(1, sizeof(*srvrq))) == NULL) {
+ ha_alert("%s '%s', server '%s': out of memory\n",
+ proxy_type_str(px), px->id, srv->id);
+ goto err;
+ }
+ srvrq->obj_type = OBJ_TYPE_SRVRQ;
+ srvrq->proxy = px;
+ srvrq->name = strdup(fqdn);
+ srvrq->hostname_dn = strdup(trash.area);
+ srvrq->hostname_dn_len = hostname_dn_len;
+ if (!srvrq->name || !srvrq->hostname_dn) {
+ ha_alert("%s '%s', server '%s': out of memory\n",
+ proxy_type_str(px), px->id, srv->id);
+ goto err;
+ }
+ LIST_INIT(&srvrq->attached_servers);
+ srvrq->named_servers = EB_ROOT;
+ LIST_APPEND(&resolv_srvrq_list, &srvrq->list);
+ return srvrq;
+
+ err:
+ if (srvrq) {
+ free(srvrq->name);
+ free(srvrq->hostname_dn);
+ free(srvrq);
+ }
+ return NULL;
+}
+
+
+/* finds and return the SRV answer item associated to a requester (whose type is 'server').
+ *
+ * returns NULL in case of error or not found.
+ */
+struct resolv_answer_item *find_srvrq_answer_record(const struct resolv_requester *requester)
+{
+ struct resolv_resolution *res;
+ struct eb32_node *eb32;
+ struct server *srv;
+
+ if (!requester)
+ return NULL;
+
+ if ((srv = objt_server(requester->owner)) == NULL)
+ return NULL;
+ /* check if the server is managed by a SRV record */
+ if (srv->srvrq == NULL)
+ return NULL;
+
+ res = srv->srvrq->requester->resolution;
+
+ /* search an ANSWER record whose target points to the server's hostname and whose port is
+ * the same as server's svc_port */
+ for (eb32 = eb32_first(&res->response.answer_tree); eb32 != NULL; eb32 = eb32_next(eb32)) {
+ struct resolv_answer_item *item = eb32_entry(eb32, typeof(*item), link);
+
+ if (memcmp(srv->hostname_dn, item->data.target, srv->hostname_dn_len) == 0 &&
+ (srv->svc_port == item->port))
+ return item;
+ }
+
+ return NULL;
+}
+
+/* 2 bytes random generator to generate DNS query ID */
+static inline uint16_t resolv_rnd16(void)
+{
+ if (!resolv_query_id_seed)
+ resolv_query_id_seed = now_ms;
+ resolv_query_id_seed ^= resolv_query_id_seed << 13;
+ resolv_query_id_seed ^= resolv_query_id_seed >> 7;
+ resolv_query_id_seed ^= resolv_query_id_seed << 17;
+ return resolv_query_id_seed;
+}
+
+
+static inline int resolv_resolution_timeout(struct resolv_resolution *res)
+{
+ return res->resolvers->timeout.resolve;
+}
+
+/* Updates a resolvers' task timeout for next wake up and queue it */
+static void resolv_update_resolvers_timeout(struct resolvers *resolvers)
+{
+ struct resolv_resolution *res;
+ int next;
+
+ next = tick_add(now_ms, resolvers->timeout.resolve);
+ if (!LIST_ISEMPTY(&resolvers->resolutions.curr)) {
+ res = LIST_NEXT(&resolvers->resolutions.curr, struct resolv_resolution *, list);
+ next = tick_first(next, tick_add(res->last_query, resolvers->timeout.retry));
+ }
+
+ list_for_each_entry(res, &resolvers->resolutions.wait, list)
+ next = tick_first(next, tick_add(res->last_resolution, resolv_resolution_timeout(res)));
+
+ resolvers->t->expire = next;
+ task_queue(resolvers->t);
+}
+
+/* Forges a DNS query. It needs the following information from the caller:
+ * - <query_id> : the DNS query id corresponding to this query
+ * - <query_type> : DNS_RTYPE_* request DNS record type (A, AAAA, ANY...)
+ * - <hostname_dn> : hostname in domain name format
+ * - <hostname_dn_len> : length of <hostname_dn>
+ *
+ * To store the query, the caller must pass a buffer <buf> and its size
+ * <bufsize>. It returns the number of written bytes in success, -1 if <buf> is
+ * too short.
+ */
+static int resolv_build_query(int query_id, int query_type, unsigned int accepted_payload_size,
+ char *hostname_dn, int hostname_dn_len, char *buf, int bufsize)
+{
+ struct dns_header dns_hdr;
+ struct dns_question qinfo;
+ struct dns_additional_record edns;
+ char *p = buf;
+
+ if (sizeof(dns_hdr) + sizeof(qinfo) + sizeof(edns) + hostname_dn_len >= bufsize)
+ return -1;
+
+ memset(buf, 0, bufsize);
+
+ /* Set dns query headers */
+ dns_hdr.id = (unsigned short) htons(query_id);
+ dns_hdr.flags = htons(0x0100); /* qr=0, opcode=0, aa=0, tc=0, rd=1, ra=0, z=0, rcode=0 */
+ dns_hdr.qdcount = htons(1); /* 1 question */
+ dns_hdr.ancount = 0;
+ dns_hdr.nscount = 0;
+ dns_hdr.arcount = htons(1);
+ memcpy(p, &dns_hdr, sizeof(dns_hdr));
+ p += sizeof(dns_hdr);
+
+ /* Set up query hostname */
+ memcpy(p, hostname_dn, hostname_dn_len);
+ p += hostname_dn_len;
+ *p++ = 0;
+
+ /* Set up query info (type and class) */
+ qinfo.qtype = htons(query_type);
+ qinfo.qclass = htons(DNS_RCLASS_IN);
+ memcpy(p, &qinfo, sizeof(qinfo));
+ p += sizeof(qinfo);
+
+ /* Set the DNS extension */
+ edns.name = 0;
+ edns.type = htons(DNS_RTYPE_OPT);
+ edns.udp_payload_size = htons(accepted_payload_size);
+ edns.extension = 0;
+ edns.data_length = 0;
+ memcpy(p, &edns, sizeof(edns));
+ p += sizeof(edns);
+
+ return (p - buf);
+}
+
+/* Sends a DNS query to resolvers associated to a resolution. It returns 0 on
+ * success or -1 if trash buffer is not large enough to build a valid query.
+ */
+static int resolv_send_query(struct resolv_resolution *resolution)
+{
+ struct resolvers *resolvers = resolution->resolvers;
+ struct dns_nameserver *ns;
+ int len;
+
+ /* Update resolution */
+ resolution->nb_queries = 0;
+ resolution->nb_responses = 0;
+ resolution->last_query = now_ms;
+
+ len = resolv_build_query(resolution->query_id, resolution->query_type,
+ resolvers->accepted_payload_size,
+ resolution->hostname_dn, resolution->hostname_dn_len,
+ trash.area, trash.size);
+ if (len < 0) {
+ send_log(NULL, LOG_NOTICE,
+ "can not build the query message for %s, in resolvers %s.\n",
+ resolution->hostname_dn, resolvers->id);
+ return -1;
+ }
+
+ list_for_each_entry(ns, &resolvers->nameservers, list) {
+ if (dns_send_nameserver(ns, trash.area, len) >= 0)
+ resolution->nb_queries++;
+ }
+
+ /* Push the resolution at the end of the active list */
+ LIST_DEL_INIT(&resolution->list);
+ LIST_APPEND(&resolvers->resolutions.curr, &resolution->list);
+ return 0;
+}
+
+/* Prepares and sends a DNS resolution. It returns 1 if the query was sent, 0 if
+ * skipped and -1 if an error occurred.
+ */
+static int
+resolv_run_resolution(struct resolv_resolution *resolution)
+{
+ struct resolvers *resolvers = resolution->resolvers;
+ int query_id, i;
+
+ /* Avoid sending requests for resolutions that don't yet have an
+ * hostname, ie resolutions linked to servers that do not yet have an
+ * fqdn */
+ if (!resolution->hostname_dn)
+ return 0;
+
+ /* Check if a resolution has already been started for this server return
+ * directly to avoid resolution pill up. */
+ if (resolution->step != RSLV_STEP_NONE)
+ return 0;
+
+ /* Generates a new query id. We try at most 100 times to find a free
+ * query id */
+ for (i = 0; i < 100; ++i) {
+ query_id = resolv_rnd16();
+ if (!eb32_lookup(&resolvers->query_ids, query_id))
+ break;
+ query_id = -1;
+ }
+ if (query_id == -1) {
+ send_log(NULL, LOG_NOTICE,
+ "could not generate a query id for %s, in resolvers %s.\n",
+ resolution->hostname_dn, resolvers->id);
+ return -1;
+ }
+
+ /* Update resolution parameters */
+ resolution->query_id = query_id;
+ resolution->qid.key = query_id;
+ resolution->step = RSLV_STEP_RUNNING;
+ resolution->query_type = resolution->prefered_query_type;
+ resolution->try = resolvers->resolve_retries;
+ eb32_insert(&resolvers->query_ids, &resolution->qid);
+
+ /* Send the DNS query */
+ resolution->try -= 1;
+ resolv_send_query(resolution);
+ return 1;
+}
+
+/* Performs a name resolution for the requester <req> */
+void resolv_trigger_resolution(struct resolv_requester *req)
+{
+ struct resolvers *resolvers;
+ struct resolv_resolution *res;
+ int exp;
+
+ if (!req || !req->resolution)
+ return;
+ res = req->resolution;
+ resolvers = res->resolvers;
+
+ enter_resolver_code();
+
+ /* The resolution must not be triggered yet. Use the cached response, if
+ * valid */
+ exp = tick_add(res->last_resolution, resolvers->hold.valid);
+ if (resolvers->t && (res->status != RSLV_STATUS_VALID ||
+ !tick_isset(res->last_resolution) || tick_is_expired(exp, now_ms))) {
+ /* If the resolution is not running and the requester is a
+ * server, reset the resoltion timer to force a quick
+ * resolution.
+ */
+ if (res->step == RSLV_STEP_NONE &&
+ (obj_type(req->owner) == OBJ_TYPE_SERVER ||
+ obj_type(req->owner) == OBJ_TYPE_SRVRQ))
+ res->last_resolution = TICK_ETERNITY;
+ task_wakeup(resolvers->t, TASK_WOKEN_OTHER);
+ }
+
+ leave_resolver_code();
+}
+
+
+/* Resets some resolution parameters to initial values and also delete the query
+ * ID from the resolver's tree.
+ */
+static void resolv_reset_resolution(struct resolv_resolution *resolution)
+{
+ /* update resolution status */
+ resolution->step = RSLV_STEP_NONE;
+ resolution->try = 0;
+ resolution->last_resolution = now_ms;
+ resolution->nb_queries = 0;
+ resolution->nb_responses = 0;
+ resolution->query_type = resolution->prefered_query_type;
+
+ /* clean up query id */
+ eb32_delete(&resolution->qid);
+ resolution->query_id = 0;
+ resolution->qid.key = 0;
+}
+
+/* Returns the query id contained in a DNS response */
+static inline unsigned short resolv_response_get_query_id(unsigned char *resp)
+{
+ return resp[0] * 256 + resp[1];
+}
+
+
+/* Analyses, re-builds and copies the name <name> from the DNS response packet
+ * <buffer>. <name> must point to the 'data_len' information or pointer 'c0'
+ * for compressed data. The result is copied into <dest>, ensuring we don't
+ * overflow using <dest_len> Returns the number of bytes the caller can move
+ * forward. If 0 it means an error occurred while parsing the name. <offset> is
+ * the number of bytes the caller could move forward.
+ */
+int resolv_read_name(unsigned char *buffer, unsigned char *bufend,
+ unsigned char *name, char *destination, int dest_len,
+ int *offset, unsigned int depth)
+{
+ int nb_bytes = 0, n = 0;
+ int label_len;
+ unsigned char *reader = name;
+ char *dest = destination;
+
+ while (1) {
+ if (reader >= bufend)
+ goto err;
+
+ /* Name compression is in use */
+ if ((*reader & 0xc0) == 0xc0) {
+ if (reader + 1 >= bufend)
+ goto err;
+
+ /* Must point BEFORE current position */
+ if ((buffer + reader[1]) > reader)
+ goto err;
+
+ if (depth++ > 100)
+ goto err;
+
+ n = resolv_read_name(buffer, bufend, buffer + (*reader & 0x3f)*256 + reader[1],
+ dest, dest_len - nb_bytes, offset, depth);
+ if (n == 0)
+ goto err;
+
+ dest += n;
+ nb_bytes += n;
+ goto out;
+ }
+
+ label_len = *reader;
+ if (label_len == 0)
+ goto out;
+
+ /* Check if:
+ * - we won't read outside the buffer
+ * - there is enough place in the destination
+ */
+ if ((reader + label_len >= bufend) || (nb_bytes + label_len >= dest_len))
+ goto err;
+
+ /* +1 to take label len + label string */
+ label_len++;
+
+ memcpy(dest, reader, label_len);
+
+ dest += label_len;
+ nb_bytes += label_len;
+ reader += label_len;
+ }
+
+ out:
+ /* offset computation:
+ * parse from <name> until finding either NULL or a pointer "c0xx"
+ */
+ reader = name;
+ *offset = 0;
+ while (reader < bufend) {
+ if ((reader[0] & 0xc0) == 0xc0) {
+ *offset += 2;
+ break;
+ }
+ else if (*reader == 0) {
+ *offset += 1;
+ break;
+ }
+ *offset += 1;
+ ++reader;
+ }
+ return nb_bytes;
+
+ err:
+ return 0;
+}
+
+/* Reinitialize the list of aborted resolutions before calling certain
+ * functions relying on it. The list must be processed by calling
+ * leave_resolver_code() after operations.
+ */
+static void enter_resolver_code()
+{
+ if (!recurse)
+ LIST_INIT(&death_row);
+ recurse++;
+}
+
+/* Add a resolution to the death_row. */
+static void abort_resolution(struct resolv_resolution *res)
+{
+ /* Remove the resolution from query_ids tree and from any resolvers list */
+ eb32_delete(&res->qid);
+ res->query_id = 0;
+ res->qid.key = 0;
+
+ LIST_DEL_INIT(&res->list);
+ LIST_APPEND(&death_row, &res->list);
+}
+
+/* This releases any aborted resolution found in the death row. It is mandatory
+ * to call enter_resolver_code() first before the function (or loop) that
+ * needs to defer deletions. Note that some of them are in relation via internal
+ * objects and might cause the deletion of other ones from the same list, so we
+ * must absolutely not use a list_for_each_entry_safe() nor any such thing here,
+ * and solely rely on each call to remove the first remaining list element.
+ */
+static void leave_resolver_code()
+{
+ struct resolv_resolution *res;
+
+ recurse--;
+ if (recurse)
+ return;
+
+ while (!LIST_ISEMPTY(&death_row)) {
+ res = LIST_NEXT(&death_row, struct resolv_resolution *, list);
+ resolv_free_resolution(res);
+ }
+
+ /* make sure nobody tries to add anything without having initialized it */
+ death_row = (struct list){ };
+}
+
+/* Cleanup fqdn/port and address of a server attached to a SRV resolution. This
+ * happens when an SRV item is purged or when the server status is considered as
+ * obsolete.
+ *
+ * Must be called with the DNS lock held, and with the death_row already
+ * initialized via enter_resolver_code().
+ */
+static void resolv_srvrq_cleanup_srv(struct server *srv)
+{
+ _resolv_unlink_resolution(srv->resolv_requester);
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srvrq_update_srv_status(srv, 1);
+ ha_free(&srv->hostname);
+ ha_free(&srv->hostname_dn);
+ srv->hostname_dn_len = 0;
+ memset(&srv->addr, 0, sizeof(srv->addr));
+ srv->svc_port = 0;
+ srv->flags |= SRV_F_NO_RESOLUTION;
+
+ ebpt_delete(&srv->host_dn);
+ ha_free(&srv->host_dn.key);
+
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ LIST_DEL_INIT(&srv->srv_rec_item);
+ LIST_APPEND(&srv->srvrq->attached_servers, &srv->srv_rec_item);
+
+ srv->srvrq_check->expire = TICK_ETERNITY;
+}
+
+/* Takes care to cleanup a server resolution when it is outdated. This only
+ * happens for a server relying on a SRV record.
+ */
+static struct task *resolv_srvrq_expire_task(struct task *t, void *context, unsigned int state)
+{
+ struct server *srv = context;
+
+ if (!tick_is_expired(t->expire, now_ms))
+ goto end;
+
+ enter_resolver_code();
+ HA_SPIN_LOCK(DNS_LOCK, &srv->srvrq->resolvers->lock);
+ resolv_srvrq_cleanup_srv(srv);
+ HA_SPIN_UNLOCK(DNS_LOCK, &srv->srvrq->resolvers->lock);
+ leave_resolver_code();
+
+ end:
+ return t;
+}
+
+/* Checks for any obsolete record, also identify any SRV request, and try to
+ * find a corresponding server.
+ */
+static void resolv_check_response(struct resolv_resolution *res)
+{
+ struct resolvers *resolvers = res->resolvers;
+ struct resolv_requester *req;
+ struct eb32_node *eb32, *eb32_back;
+ struct server *srv, *srvback;
+ struct resolv_srvrq *srvrq;
+
+ for (eb32 = eb32_first(&res->response.answer_tree); eb32 && (eb32_back = eb32_next(eb32), 1); eb32 = eb32_back) {
+ struct resolv_answer_item *item = eb32_entry(eb32, typeof(*item), link);
+ struct resolv_answer_item *ar_item = item->ar_item;
+
+ /* clean up obsolete Additional record */
+ if (ar_item && tick_is_lt(tick_add(ar_item->last_seen, resolvers->hold.obsolete), now_ms)) {
+ /* Cleaning up the AR item will trigger an extra DNS resolution, except if the SRV
+ * item is also obsolete.
+ */
+ pool_free(resolv_answer_item_pool, ar_item);
+ item->ar_item = NULL;
+ }
+
+ /* Remove obsolete items */
+ if (tick_is_lt(tick_add(item->last_seen, resolvers->hold.obsolete), now_ms)) {
+ if (item->type == DNS_RTYPE_A || item->type == DNS_RTYPE_AAAA) {
+ /* Remove any associated server */
+ list_for_each_entry_safe(srv, srvback, &item->attached_servers, ip_rec_item) {
+ LIST_DEL_INIT(&srv->ip_rec_item);
+ }
+ }
+ else if (item->type == DNS_RTYPE_SRV) {
+ /* Remove any associated server */
+ list_for_each_entry_safe(srv, srvback, &item->attached_servers, srv_rec_item)
+ resolv_srvrq_cleanup_srv(srv);
+ }
+
+ eb32_delete(&item->link);
+ if (item->ar_item) {
+ pool_free(resolv_answer_item_pool, item->ar_item);
+ item->ar_item = NULL;
+ }
+ pool_free(resolv_answer_item_pool, item);
+ continue;
+ }
+
+ if (item->type != DNS_RTYPE_SRV)
+ continue;
+
+ /* Now process SRV records */
+ list_for_each_entry(req, &res->requesters, list) {
+ struct ebpt_node *node;
+ char target[DNS_MAX_NAME_SIZE+1];
+
+ int i;
+ if ((srvrq = objt_resolv_srvrq(req->owner)) == NULL)
+ continue;
+
+ /* Check if a server already uses that record */
+ srv = NULL;
+ list_for_each_entry(srv, &item->attached_servers, srv_rec_item) {
+ if (srv->srvrq == srvrq) {
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ goto srv_found;
+ }
+ }
+
+
+ /* If not empty we try to match a server
+ * in server state file tree with the same hostname
+ */
+ if (!eb_is_empty(&srvrq->named_servers)) {
+ srv = NULL;
+
+ /* convert the key to lookup in lower case */
+ for (i = 0 ; item->data.target[i] ; i++)
+ target[i] = tolower(item->data.target[i]);
+ target[i] = 0;
+
+ node = ebis_lookup(&srvrq->named_servers, target);
+ if (node) {
+ srv = ebpt_entry(node, struct server, host_dn);
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+
+ /* an entry was found with the same hostname
+ * let check this node if the port matches
+ * and try next node if the hostname
+ * is still the same
+ */
+ while (1) {
+ if (srv->svc_port == item->port) {
+ /* server found, we remove it from tree */
+ ebpt_delete(node);
+ ha_free(&srv->host_dn.key);
+ goto srv_found;
+ }
+
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+
+ node = ebpt_next(node);
+ if (!node)
+ break;
+
+ srv = ebpt_entry(node, struct server, host_dn);
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+
+ if ((item->data_len != srv->hostname_dn_len)
+ || memcmp(srv->hostname_dn, item->data.target, item->data_len) != 0) {
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ break;
+ }
+ }
+ }
+ }
+
+ /* Pick the first server listed in srvrq (those ones don't
+ * have hostname and are free to use)
+ */
+ srv = NULL;
+ list_for_each_entry(srv, &srvrq->attached_servers, srv_rec_item) {
+ LIST_DEL_INIT(&srv->srv_rec_item);
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ goto srv_found;
+ }
+ srv = NULL;
+
+srv_found:
+ /* And update this server, if found (srv is locked here) */
+ if (srv) {
+ /* re-enable DNS resolution for this server by default */
+ srv->flags &= ~SRV_F_NO_RESOLUTION;
+ srv->srvrq_check->expire = TICK_ETERNITY;
+
+ srv->svc_port = item->port;
+ srv->flags &= ~SRV_F_MAPPORTS;
+
+ /* Check if an Additional Record is associated to this SRV record.
+ * Perform some sanity checks too to ensure the record can be used.
+ * If all fine, we simply pick up the IP address found and associate
+ * it to the server. And DNS resolution is disabled for this server.
+ */
+ if ((item->ar_item != NULL) &&
+ (item->ar_item->type == DNS_RTYPE_A || item->ar_item->type == DNS_RTYPE_AAAA))
+ {
+
+ switch (item->ar_item->type) {
+ case DNS_RTYPE_A:
+ srv_update_addr(srv, &item->ar_item->data.in4.sin_addr, AF_INET, "DNS additional record");
+ break;
+ case DNS_RTYPE_AAAA:
+ srv_update_addr(srv, &item->ar_item->data.in6.sin6_addr, AF_INET6, "DNS additional record");
+ break;
+ }
+
+ srv->flags |= SRV_F_NO_RESOLUTION;
+
+ /* Unlink A/AAAA resolution for this server if there is an AR item.
+ * It is usless to perform an extra resolution
+ */
+ _resolv_unlink_resolution(srv->resolv_requester);
+ }
+
+ if (!srv->hostname_dn) {
+ const char *msg = NULL;
+ char hostname[DNS_MAX_NAME_SIZE+1];
+
+ if (resolv_dn_label_to_str(item->data.target, item->data_len,
+ hostname, sizeof(hostname)) == -1) {
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ continue;
+ }
+ msg = srv_update_fqdn(srv, hostname, "SRV record", 1);
+ if (msg)
+ send_log(srv->proxy, LOG_NOTICE, "%s", msg);
+ }
+
+ if (!LIST_INLIST(&srv->srv_rec_item))
+ LIST_APPEND(&item->attached_servers, &srv->srv_rec_item);
+
+ if (!(srv->flags & SRV_F_NO_RESOLUTION)) {
+ /* If there is no AR item responsible of the FQDN resolution,
+ * trigger a dedicated DNS resolution
+ */
+ if (!srv->resolv_requester || !srv->resolv_requester->resolution)
+ resolv_link_resolution(srv, OBJ_TYPE_SERVER, 1);
+ }
+
+ /* Update the server status */
+ srvrq_update_srv_status(srv, (srv->addr.ss_family != AF_INET && srv->addr.ss_family != AF_INET6));
+
+ if (!srv->resolv_opts.ignore_weight) {
+ char weight[9];
+ int ha_weight;
+
+ /* DNS weight range if from 0 to 65535
+ * HAProxy weight is from 0 to 256
+ * The rule below ensures that weight 0 is well respected
+ * while allowing a "mapping" from DNS weight into HAProxy's one.
+ */
+ ha_weight = (item->weight + 255) / 256;
+
+ snprintf(weight, sizeof(weight), "%d", ha_weight);
+ server_parse_weight_change_request(srv, weight);
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ }
+ }
+ }
+}
+
+/* Validates that the buffer DNS response provided in <resp> and finishing
+ * before <bufend> is valid from a DNS protocol point of view.
+ *
+ * The result is stored in <resolution>' response, buf_response,
+ * response_query_records and response_answer_records members.
+ *
+ * This function returns one of the RSLV_RESP_* code to indicate the type of
+ * error found.
+ */
+static int resolv_validate_dns_response(unsigned char *resp, unsigned char *bufend,
+ struct resolv_resolution *resolution, int max_answer_records)
+{
+ unsigned char *reader;
+ char *previous_dname, tmpname[DNS_MAX_NAME_SIZE];
+ int len, flags, offset;
+ int nb_saved_records;
+ struct resolv_query_item *query;
+ struct resolv_answer_item *answer_record, *tmp_record;
+ struct resolv_response *r_res;
+ struct eb32_node *eb32;
+ uint32_t key = 0;
+ int i, found = 0;
+ int cause = RSLV_RESP_ERROR;
+
+ reader = resp;
+ len = 0;
+ previous_dname = NULL;
+ query = NULL;
+ answer_record = NULL;
+
+ /* Initialization of response buffer and structure */
+ r_res = &resolution->response;
+
+ /* query id */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+
+ r_res->header.id = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* Flags and rcode are stored over 2 bytes
+ * First byte contains:
+ * - response flag (1 bit)
+ * - opcode (4 bits)
+ * - authoritative (1 bit)
+ * - truncated (1 bit)
+ * - recursion desired (1 bit)
+ */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+
+ flags = reader[0] * 256 + reader[1];
+
+ if ((flags & DNS_FLAG_REPLYCODE) != DNS_RCODE_NO_ERROR) {
+ if ((flags & DNS_FLAG_REPLYCODE) == DNS_RCODE_NX_DOMAIN) {
+ cause = RSLV_RESP_NX_DOMAIN;
+ goto return_error;
+ }
+ else if ((flags & DNS_FLAG_REPLYCODE) == DNS_RCODE_REFUSED) {
+ cause = RSLV_RESP_REFUSED;
+ goto return_error;
+ }
+ else {
+ cause = RSLV_RESP_ERROR;
+ goto return_error;
+ }
+ }
+
+ /* Move forward 2 bytes for flags */
+ reader += 2;
+
+ /* 2 bytes for question count */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+ r_res->header.qdcount = reader[0] * 256 + reader[1];
+ /* (for now) we send one query only, so we expect only one in the
+ * response too */
+ if (r_res->header.qdcount != 1) {
+ cause = RSLV_RESP_QUERY_COUNT_ERROR;
+ goto return_error;
+ }
+
+ if (r_res->header.qdcount > DNS_MAX_QUERY_RECORDS)
+ goto invalid_resp;
+ reader += 2;
+
+ /* 2 bytes for answer count */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+ r_res->header.ancount = reader[0] * 256 + reader[1];
+ if (r_res->header.ancount == 0) {
+ cause = RSLV_RESP_ANCOUNT_ZERO;
+ goto return_error;
+ }
+
+ /* Check if too many records are announced */
+ if (r_res->header.ancount > max_answer_records)
+ goto invalid_resp;
+ reader += 2;
+
+ /* 2 bytes authority count */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+ r_res->header.nscount = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* 2 bytes additional count */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+ r_res->header.arcount = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* Parsing dns queries. For now there is only one query and it exists
+ * because (qdcount == 1).
+ */
+ query = &resolution->response_query_records[0];
+
+ /* Name is a NULL terminated string in our case, since we have
+ * one query per response and the first one can't be compressed
+ * (using the 0x0c format) */
+ offset = 0;
+ len = resolv_read_name(resp, bufend, reader, query->name, DNS_MAX_NAME_SIZE, &offset, 0);
+
+ if (len == 0)
+ goto invalid_resp;
+
+ /* Now let's check the query's dname corresponds to the one we sent. */
+ if (len != resolution->hostname_dn_len ||
+ memcmp(query->name, resolution->hostname_dn, resolution->hostname_dn_len) != 0) {
+ cause = RSLV_RESP_WRONG_NAME;
+ goto return_error;
+ }
+
+ reader += offset;
+ previous_dname = query->name;
+
+ /* move forward 2 bytes for question type */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+ query->type = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* move forward 2 bytes for question class */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+ query->class = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* TRUNCATED flag must be checked after we could read the query type
+ * because a TRUNCATED SRV query type response can still be exploited
+ */
+ if (query->type != DNS_RTYPE_SRV && flags & DNS_FLAG_TRUNCATED) {
+ cause = RSLV_RESP_TRUNCATED;
+ goto return_error;
+ }
+
+ /* now parsing response records */
+ nb_saved_records = 0;
+ for (i = 0; i < r_res->header.ancount; i++) {
+ if (reader >= bufend)
+ goto invalid_resp;
+
+ answer_record = pool_alloc(resolv_answer_item_pool);
+ if (answer_record == NULL)
+ goto invalid_resp;
+
+ /* initialization */
+ answer_record->ar_item = NULL;
+ answer_record->last_seen = TICK_ETERNITY;
+ LIST_INIT(&answer_record->attached_servers);
+ answer_record->link.node.leaf_p = NULL;
+
+ offset = 0;
+ len = resolv_read_name(resp, bufend, reader, tmpname, DNS_MAX_NAME_SIZE, &offset, 0);
+
+ if (len == 0)
+ goto invalid_resp;
+
+ /* Check if the current record dname is valid. previous_dname
+ * points either to queried dname or last CNAME target */
+ if (query->type != DNS_RTYPE_SRV && memcmp(previous_dname, tmpname, len) != 0) {
+ if (i == 0) {
+ /* First record, means a mismatch issue between
+ * queried dname and dname found in the first
+ * record */
+ goto invalid_resp;
+ }
+ else {
+ /* If not the first record, this means we have a
+ * CNAME resolution error.
+ */
+ cause = RSLV_RESP_CNAME_ERROR;
+ goto return_error;
+ }
+
+ }
+
+ memcpy(answer_record->name, tmpname, len);
+ answer_record->name[len] = 0;
+
+ reader += offset;
+ if (reader >= bufend)
+ goto invalid_resp;
+
+ /* 2 bytes for record type (A, AAAA, CNAME, etc...) */
+ if (reader + 2 > bufend)
+ goto invalid_resp;
+
+ answer_record->type = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* 2 bytes for class (2) */
+ if (reader + 2 > bufend)
+ goto invalid_resp;
+
+ answer_record->class = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* 4 bytes for ttl (4) */
+ if (reader + 4 > bufend)
+ goto invalid_resp;
+
+ answer_record->ttl = reader[0] * 16777216 + reader[1] * 65536
+ + reader[2] * 256 + reader[3];
+ reader += 4;
+
+ /* Now reading data len */
+ if (reader + 2 > bufend)
+ goto invalid_resp;
+
+ answer_record->data_len = reader[0] * 256 + reader[1];
+
+ /* Move forward 2 bytes for data len */
+ reader += 2;
+
+ if (reader + answer_record->data_len > bufend)
+ goto invalid_resp;
+
+ /* Analyzing record content */
+ switch (answer_record->type) {
+ case DNS_RTYPE_A:
+ /* ipv4 is stored on 4 bytes */
+ if (answer_record->data_len != 4)
+ goto invalid_resp;
+
+ answer_record->data.in4.sin_family = AF_INET;
+ memcpy(&answer_record->data.in4.sin_addr, reader, answer_record->data_len);
+ key = XXH32(reader, answer_record->data_len, answer_record->type);
+ break;
+
+ case DNS_RTYPE_CNAME:
+ /* Check if this is the last record and update the caller about the status:
+ * no IP could be found and last record was a CNAME. Could be triggered
+ * by a wrong query type
+ *
+ * + 1 because answer_record_id starts at 0
+ * while number of answers is an integer and
+ * starts at 1.
+ */
+ if (i + 1 == r_res->header.ancount) {
+ cause = RSLV_RESP_CNAME_ERROR;
+ goto return_error;
+ }
+
+ offset = 0;
+ len = resolv_read_name(resp, bufend, reader, tmpname, DNS_MAX_NAME_SIZE, &offset, 0);
+ if (len == 0)
+ goto invalid_resp;
+
+ memcpy(answer_record->data.target, tmpname, len);
+ answer_record->data.target[len] = 0;
+ key = XXH32(tmpname, len, answer_record->type);
+ previous_dname = answer_record->data.target;
+ break;
+
+
+ case DNS_RTYPE_SRV:
+ /* Answer must contain :
+ * - 2 bytes for the priority
+ * - 2 bytes for the weight
+ * - 2 bytes for the port
+ * - the target hostname
+ */
+ if (answer_record->data_len <= 6)
+ goto invalid_resp;
+
+ answer_record->priority = read_n16(reader);
+ reader += sizeof(uint16_t);
+ answer_record->weight = read_n16(reader);
+ reader += sizeof(uint16_t);
+ answer_record->port = read_n16(reader);
+ reader += sizeof(uint16_t);
+ offset = 0;
+ len = resolv_read_name(resp, bufend, reader, tmpname, DNS_MAX_NAME_SIZE, &offset, 0);
+ if (len == 0)
+ goto invalid_resp;
+
+ answer_record->data_len = len;
+ memcpy(answer_record->data.target, tmpname, len);
+ answer_record->data.target[len] = 0;
+ key = XXH32(tmpname, len, answer_record->type);
+ if (answer_record->ar_item != NULL) {
+ pool_free(resolv_answer_item_pool, answer_record->ar_item);
+ answer_record->ar_item = NULL;
+ }
+ break;
+
+ case DNS_RTYPE_AAAA:
+ /* ipv6 is stored on 16 bytes */
+ if (answer_record->data_len != 16)
+ goto invalid_resp;
+
+ answer_record->data.in6.sin6_family = AF_INET6;
+ memcpy(&answer_record->data.in6.sin6_addr, reader, answer_record->data_len);
+ key = XXH32(reader, answer_record->data_len, answer_record->type);
+ break;
+
+ } /* switch (record type) */
+
+ /* Increment the counter for number of records saved into our
+ * local response */
+ nb_saved_records++;
+
+ /* Move forward answer_record->data_len for analyzing next
+ * record in the response */
+ reader += ((answer_record->type == DNS_RTYPE_SRV)
+ ? offset
+ : answer_record->data_len);
+
+ /* Lookup to see if we already had this entry */
+ found = 0;
+
+ for (eb32 = eb32_lookup(&r_res->answer_tree, key); eb32 != NULL; eb32 = eb32_next(eb32)) {
+ tmp_record = eb32_entry(eb32, typeof(*tmp_record), link);
+ if (tmp_record->type != answer_record->type)
+ continue;
+
+ switch(tmp_record->type) {
+ case DNS_RTYPE_A:
+ if (!memcmp(&answer_record->data.in4.sin_addr,
+ &tmp_record->data.in4.sin_addr,
+ sizeof(answer_record->data.in4.sin_addr)))
+ found = 1;
+ break;
+
+ case DNS_RTYPE_AAAA:
+ if (!memcmp(&answer_record->data.in6.sin6_addr,
+ &tmp_record->data.in6.sin6_addr,
+ sizeof(answer_record->data.in6.sin6_addr)))
+ found = 1;
+ break;
+
+ case DNS_RTYPE_SRV:
+ if (answer_record->data_len == tmp_record->data_len &&
+ memcmp(answer_record->data.target, tmp_record->data.target, answer_record->data_len) == 0 &&
+ answer_record->port == tmp_record->port) {
+ tmp_record->weight = answer_record->weight;
+ found = 1;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (found == 1)
+ break;
+ }
+
+ if (found == 1) {
+ tmp_record->last_seen = now_ms;
+ pool_free(resolv_answer_item_pool, answer_record);
+ answer_record = NULL;
+ }
+ else {
+ answer_record->last_seen = now_ms;
+ answer_record->ar_item = NULL;
+ answer_record->link.key = key;
+ eb32_insert(&r_res->answer_tree, &answer_record->link);
+ answer_record = NULL;
+ }
+ } /* for i 0 to ancount */
+
+ /* Save the number of records we really own */
+ r_res->header.ancount = nb_saved_records;
+
+ /* now parsing additional records for SRV queries only */
+ if (query->type != DNS_RTYPE_SRV)
+ goto skip_parsing_additional_records;
+
+ /* if we find Authority records, just skip them */
+ for (i = 0; i < r_res->header.nscount; i++) {
+ offset = 0;
+ len = resolv_read_name(resp, bufend, reader, tmpname, DNS_MAX_NAME_SIZE,
+ &offset, 0);
+ if (len == 0)
+ continue;
+
+ if (reader + offset + 10 >= bufend)
+ goto invalid_resp;
+
+ reader += offset;
+ /* skip 2 bytes for class */
+ reader += 2;
+ /* skip 2 bytes for type */
+ reader += 2;
+ /* skip 4 bytes for ttl */
+ reader += 4;
+ /* read data len */
+ len = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ if (reader + len >= bufend)
+ goto invalid_resp;
+
+ reader += len;
+ }
+
+ nb_saved_records = 0;
+ for (i = 0; i < r_res->header.arcount; i++) {
+ if (reader >= bufend)
+ goto invalid_resp;
+
+ answer_record = pool_alloc(resolv_answer_item_pool);
+ if (answer_record == NULL)
+ goto invalid_resp;
+ answer_record->last_seen = TICK_ETERNITY;
+ LIST_INIT(&answer_record->attached_servers);
+
+ offset = 0;
+ len = resolv_read_name(resp, bufend, reader, tmpname, DNS_MAX_NAME_SIZE, &offset, 0);
+
+ if (len == 0) {
+ pool_free(resolv_answer_item_pool, answer_record);
+ answer_record = NULL;
+ continue;
+ }
+
+ memcpy(answer_record->name, tmpname, len);
+ answer_record->name[len] = 0;
+
+ reader += offset;
+ if (reader >= bufend)
+ goto invalid_resp;
+
+ /* 2 bytes for record type (A, AAAA, CNAME, etc...) */
+ if (reader + 2 > bufend)
+ goto invalid_resp;
+
+ answer_record->type = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* 2 bytes for class (2) */
+ if (reader + 2 > bufend)
+ goto invalid_resp;
+
+ answer_record->class = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* 4 bytes for ttl (4) */
+ if (reader + 4 > bufend)
+ goto invalid_resp;
+
+ answer_record->ttl = reader[0] * 16777216 + reader[1] * 65536
+ + reader[2] * 256 + reader[3];
+ reader += 4;
+
+ /* Now reading data len */
+ if (reader + 2 > bufend)
+ goto invalid_resp;
+
+ answer_record->data_len = reader[0] * 256 + reader[1];
+
+ /* Move forward 2 bytes for data len */
+ reader += 2;
+
+ if (reader + answer_record->data_len > bufend)
+ goto invalid_resp;
+
+ /* Analyzing record content */
+ switch (answer_record->type) {
+ case DNS_RTYPE_A:
+ /* ipv4 is stored on 4 bytes */
+ if (answer_record->data_len != 4)
+ goto invalid_resp;
+
+ answer_record->data.in4.sin_family = AF_INET;
+ memcpy(&answer_record->data.in4.sin_addr, reader, answer_record->data_len);
+ break;
+
+ case DNS_RTYPE_AAAA:
+ /* ipv6 is stored on 16 bytes */
+ if (answer_record->data_len != 16)
+ goto invalid_resp;
+
+ answer_record->data.in6.sin6_family = AF_INET6;
+ memcpy(&answer_record->data.in6.sin6_addr, reader, answer_record->data_len);
+ break;
+
+ default:
+ pool_free(resolv_answer_item_pool, answer_record);
+ answer_record = NULL;
+ continue;
+
+ } /* switch (record type) */
+
+ /* Increment the counter for number of records saved into our
+ * local response */
+ nb_saved_records++;
+
+ /* Move forward answer_record->data_len for analyzing next
+ * record in the response */
+ reader += answer_record->data_len;
+
+ /* Lookup to see if we already had this entry */
+ found = 0;
+
+ for (eb32 = eb32_first(&r_res->answer_tree); eb32 != NULL; eb32 = eb32_next(eb32)) {
+ struct resolv_answer_item *ar_item;
+
+ tmp_record = eb32_entry(eb32, typeof(*tmp_record), link);
+ if (tmp_record->type != DNS_RTYPE_SRV || !tmp_record->ar_item)
+ continue;
+
+ ar_item = tmp_record->ar_item;
+ if (ar_item->type != answer_record->type || ar_item->last_seen == now_ms ||
+ len != tmp_record->data_len ||
+ memcmp(answer_record->name, tmp_record->data.target, tmp_record->data_len) != 0)
+ continue;
+
+ switch(ar_item->type) {
+ case DNS_RTYPE_A:
+ if (!memcmp(&answer_record->data.in4.sin_addr,
+ &ar_item->data.in4.sin_addr,
+ sizeof(answer_record->data.in4.sin_addr)))
+ found = 1;
+ break;
+
+ case DNS_RTYPE_AAAA:
+ if (!memcmp(&answer_record->data.in6.sin6_addr,
+ &ar_item->data.in6.sin6_addr,
+ sizeof(answer_record->data.in6.sin6_addr)))
+ found = 1;
+ break;
+
+ default:
+ break;
+ }
+
+ if (found == 1)
+ break;
+ }
+
+ if (found == 1) {
+ tmp_record->ar_item->last_seen = now_ms;
+ pool_free(resolv_answer_item_pool, answer_record);
+ answer_record = NULL;
+ }
+ else {
+ answer_record->last_seen = now_ms;
+ answer_record->ar_item = NULL;
+
+ // looking for the SRV record in the response list linked to this additional record
+ for (eb32 = eb32_first(&r_res->answer_tree); eb32 != NULL; eb32 = eb32_next(eb32)) {
+ tmp_record = eb32_entry(eb32, typeof(*tmp_record), link);
+
+ if (tmp_record->type == DNS_RTYPE_SRV &&
+ tmp_record->ar_item == NULL &&
+ memcmp(tmp_record->data.target, answer_record->name, tmp_record->data_len) == 0) {
+ /* Always use the received additional record to refresh info */
+ if (tmp_record->ar_item)
+ pool_free(resolv_answer_item_pool, tmp_record->ar_item);
+ tmp_record->ar_item = answer_record;
+ answer_record = NULL;
+ break;
+ }
+ }
+ if (answer_record) {
+ pool_free(resolv_answer_item_pool, answer_record);
+ answer_record = NULL;
+ }
+ }
+ } /* for i 0 to arcount */
+
+ skip_parsing_additional_records:
+
+ /* Save the number of records we really own */
+ r_res->header.arcount = nb_saved_records;
+ resolv_check_response(resolution);
+ return RSLV_RESP_VALID;
+
+ invalid_resp:
+ cause = RSLV_RESP_INVALID;
+
+ return_error:
+ pool_free(resolv_answer_item_pool, answer_record);
+ return cause;
+}
+
+/* Searches dn_name resolution in resp.
+ * If existing IP not found, return the first IP matching family_priority,
+ * otherwise, first ip found
+ * The following tasks are the responsibility of the caller:
+ * - <r_res> contains an error free DNS response
+ * For both cases above, resolv_validate_dns_response is required
+ * returns one of the RSLV_UPD_* code
+ */
+int resolv_get_ip_from_response(struct resolv_response *r_res,
+ struct resolv_options *resolv_opts, void *currentip,
+ short currentip_sin_family,
+ void **newip, short *newip_sin_family,
+ struct server *owner)
+{
+ struct resolv_answer_item *record, *found_record = NULL;
+ struct eb32_node *eb32;
+ int family_priority;
+ int currentip_found;
+ unsigned char *newip4, *newip6;
+ int currentip_sel;
+ int j;
+ int score, max_score;
+ int allowed_duplicated_ip;
+
+ /* srv is linked to an alive ip record */
+ if (owner && LIST_INLIST(&owner->ip_rec_item))
+ return RSLV_UPD_NO;
+
+ family_priority = resolv_opts->family_prio;
+ allowed_duplicated_ip = resolv_opts->accept_duplicate_ip;
+ *newip = newip4 = newip6 = NULL;
+ currentip_found = 0;
+ *newip_sin_family = AF_UNSPEC;
+ max_score = -1;
+
+ /* Select an IP regarding configuration preference.
+ * Top priority is the preferred network ip version,
+ * second priority is the preferred network.
+ * the last priority is the currently used IP,
+ *
+ * For these three priorities, a score is calculated. The
+ * weight are:
+ * 8 - preferred ip version.
+ * 4 - preferred network.
+ * 2 - if the ip in the record is not affected to any other server in the same backend (duplication)
+ * 1 - current ip.
+ * The result with the biggest score is returned.
+ */
+
+ for (eb32 = eb32_first(&r_res->answer_tree); eb32 != NULL; eb32 = eb32_next(eb32)) {
+ void *ip;
+ unsigned char ip_type;
+
+ record = eb32_entry(eb32, typeof(*record), link);
+ if (record->type == DNS_RTYPE_A) {
+ ip_type = AF_INET;
+ ip = &record->data.in4.sin_addr;
+ }
+ else if (record->type == DNS_RTYPE_AAAA) {
+ ip_type = AF_INET6;
+ ip = &record->data.in6.sin6_addr;
+ }
+ else
+ continue;
+ score = 0;
+
+ /* Check for preferred ip protocol. */
+ if (ip_type == family_priority)
+ score += 8;
+
+ /* Check for preferred network. */
+ for (j = 0; j < resolv_opts->pref_net_nb; j++) {
+
+ /* Compare only the same addresses class. */
+ if (resolv_opts->pref_net[j].family != ip_type)
+ continue;
+
+ if ((ip_type == AF_INET &&
+ in_net_ipv4(ip,
+ &resolv_opts->pref_net[j].mask.in4,
+ &resolv_opts->pref_net[j].addr.in4)) ||
+ (ip_type == AF_INET6 &&
+ in_net_ipv6(ip,
+ &resolv_opts->pref_net[j].mask.in6,
+ &resolv_opts->pref_net[j].addr.in6))) {
+ score += 4;
+ break;
+ }
+ }
+
+ /* Check if the IP found in the record is already affected to a
+ * member of a group. If not, the score should be incremented
+ * by 2. */
+ if (owner) {
+ struct server *srv;
+ int already_used = 0;
+
+ list_for_each_entry(srv, &record->attached_servers, ip_rec_item) {
+ if (srv == owner)
+ continue;
+ if (srv->proxy == owner->proxy) {
+ already_used = 1;
+ break;
+ }
+ }
+ if (already_used) {
+ if (!allowed_duplicated_ip) {
+ continue;
+ }
+ }
+ else {
+ score += 2;
+ }
+ } else {
+ score += 2;
+ }
+
+ /* Check for current ip matching. */
+ if (ip_type == currentip_sin_family &&
+ ((currentip_sin_family == AF_INET &&
+ !memcmp(ip, currentip, 4)) ||
+ (currentip_sin_family == AF_INET6 &&
+ !memcmp(ip, currentip, 16)))) {
+ score++;
+ currentip_sel = 1;
+ }
+ else
+ currentip_sel = 0;
+
+ /* Keep the address if the score is better than the previous
+ * score. The maximum score is 15, if this value is reached, we
+ * break the parsing. Implicitly, this score is reached the ip
+ * selected is the current ip. */
+ if (score > max_score) {
+ if (ip_type == AF_INET)
+ newip4 = ip;
+ else
+ newip6 = ip;
+ found_record = record;
+ currentip_found = currentip_sel;
+ if (score == 15) {
+ /* this was not registered on the current record but it matches
+ * let's fix it (it may comes from state file */
+ if (owner)
+ LIST_APPEND(&found_record->attached_servers, &owner->ip_rec_item);
+ return RSLV_UPD_NO;
+ }
+ max_score = score;
+ }
+ } /* list for each record entries */
+
+ /* No IP found in the response */
+ if (!newip4 && !newip6)
+ return RSLV_UPD_NO_IP_FOUND;
+
+ /* Case when the caller looks first for an IPv4 address */
+ if (family_priority == AF_INET) {
+ if (newip4) {
+ *newip = newip4;
+ *newip_sin_family = AF_INET;
+ }
+ else if (newip6) {
+ *newip = newip6;
+ *newip_sin_family = AF_INET6;
+ }
+ }
+ /* Case when the caller looks first for an IPv6 address */
+ else if (family_priority == AF_INET6) {
+ if (newip6) {
+ *newip = newip6;
+ *newip_sin_family = AF_INET6;
+ }
+ else if (newip4) {
+ *newip = newip4;
+ *newip_sin_family = AF_INET;
+ }
+ }
+ /* Case when the caller have no preference (we prefer IPv6) */
+ else if (family_priority == AF_UNSPEC) {
+ if (newip6) {
+ *newip = newip6;
+ *newip_sin_family = AF_INET6;
+ }
+ else if (newip4) {
+ *newip = newip4;
+ *newip_sin_family = AF_INET;
+ }
+ }
+
+ /* the ip of this record was chosen for the server */
+ if (owner && found_record) {
+ LIST_DEL_INIT(&owner->ip_rec_item);
+ LIST_APPEND(&found_record->attached_servers, &owner->ip_rec_item);
+ }
+
+ eb32 = eb32_first(&r_res->answer_tree);
+ if (eb32) {
+ /* Move the first record to the end of the list, for internal
+ * round robin.
+ */
+ eb32_delete(eb32);
+ eb32_insert(&r_res->answer_tree, eb32);
+ }
+
+ return (currentip_found ? RSLV_UPD_NO : RSLV_UPD_SRVIP_NOT_FOUND);
+}
+
+/* Turns a domain name label into a string: 3www7haproxy3org into www.haproxy.org
+ *
+ * <dn> contains the input label of <dn_len> bytes long and does not need to be
+ * null-terminated. <str> must be allocated large enough to contain a full host
+ * name plus the trailing zero, and the allocated size must be passed in
+ * <str_len>.
+ *
+ * In case of error, -1 is returned, otherwise, the number of bytes copied in
+ * <str> (including the terminating null byte).
+ */
+int resolv_dn_label_to_str(const char *dn, int dn_len, char *str, int str_len)
+{
+ char *ptr;
+ int i, sz;
+
+ if (str_len < dn_len)
+ return -1;
+
+ ptr = str;
+ for (i = 0; i < dn_len; ++i) {
+ sz = dn[i];
+ if (i)
+ *ptr++ = '.';
+ /* copy the string at i+1 to lower case */
+ for (; sz > 0; sz--)
+ *(ptr++) = tolower(dn[++i]);
+ }
+ *ptr++ = '\0';
+ return (ptr - str);
+}
+
+/* Turns a string into domain name label: www.haproxy.org into 3www7haproxy3org
+ *
+ * <str> contains the input string that is <str_len> bytes long (trailing zero
+ * not needed). <dn> buffer must be allocated large enough to contain the
+ * encoded string and a trailing zero, so it must be at least str_len+2, and
+ * this allocated buffer size must be passed in <dn_len>.
+ *
+ * In case of error, -1 is returned, otherwise, the number of bytes copied in
+ * <dn> (excluding the terminating null byte).
+ */
+int resolv_str_to_dn_label(const char *str, int str_len, char *dn, int dn_len)
+{
+ int i, offset;
+
+ if (dn_len < str_len + 2)
+ return -1;
+
+ /* First byte of dn will be used to store the length of the first
+ * label */
+ offset = 0;
+ for (i = 0; i < str_len; ++i) {
+ if (str[i] == '.') {
+ /* 2 or more consecutive dots is invalid */
+ if (i == offset)
+ return -1;
+
+ /* ignore trailing dot */
+ if (i + 1 == str_len)
+ break;
+
+ dn[offset] = (i - offset);
+ offset = i+1;
+ continue;
+ }
+ dn[i+1] = tolower(str[i]);
+ }
+ dn[offset] = i - offset;
+ dn[i+1] = '\0';
+ return i+1;
+}
+
+/* Validates host name:
+ * - total size
+ * - each label size individually
+ * returns:
+ * 0 in case of error. If <err> is not NULL, an error message is stored there.
+ * 1 when no error. <err> is left unaffected.
+ */
+int resolv_hostname_validation(const char *string, char **err)
+{
+ int i;
+
+ if (strlen(string) > DNS_MAX_NAME_SIZE) {
+ if (err)
+ *err = DNS_TOO_LONG_FQDN;
+ return 0;
+ }
+
+ while (*string) {
+ i = 0;
+ while (*string && *string != '.' && i < DNS_MAX_LABEL_SIZE) {
+ if (!(*string == '-' || *string == '_' ||
+ (*string >= 'a' && *string <= 'z') ||
+ (*string >= 'A' && *string <= 'Z') ||
+ (*string >= '0' && *string <= '9'))) {
+ if (err)
+ *err = DNS_INVALID_CHARACTER;
+ return 0;
+ }
+ i++;
+ string++;
+ }
+
+ if (!(*string))
+ break;
+
+ if (*string != '.' && i >= DNS_MAX_LABEL_SIZE) {
+ if (err)
+ *err = DNS_LABEL_TOO_LONG;
+ return 0;
+ }
+
+ string++;
+ }
+ return 1;
+}
+
+/* Picks up an available resolution from the different resolution list
+ * associated to a resolvers section, in this order:
+ * 1. check in resolutions.curr for the same hostname and query_type
+ * 2. check in resolutions.wait for the same hostname and query_type
+ * 3. Get a new resolution from resolution pool
+ *
+ * Returns an available resolution, NULL if none found.
+ */
+static struct resolv_resolution *resolv_pick_resolution(struct resolvers *resolvers,
+ char **hostname_dn, int hostname_dn_len,
+ int query_type)
+{
+ struct resolv_resolution *res;
+
+ if (!*hostname_dn)
+ goto from_pool;
+
+ /* Search for same hostname and query type in resolutions.curr */
+ list_for_each_entry(res, &resolvers->resolutions.curr, list) {
+ if (!res->hostname_dn)
+ continue;
+ if ((query_type == res->prefered_query_type) &&
+ hostname_dn_len == res->hostname_dn_len &&
+ memcmp(*hostname_dn, res->hostname_dn, hostname_dn_len) == 0)
+ return res;
+ }
+
+ /* Search for same hostname and query type in resolutions.wait */
+ list_for_each_entry(res, &resolvers->resolutions.wait, list) {
+ if (!res->hostname_dn)
+ continue;
+ if ((query_type == res->prefered_query_type) &&
+ hostname_dn_len == res->hostname_dn_len &&
+ memcmp(*hostname_dn, res->hostname_dn, hostname_dn_len) == 0)
+ return res;
+ }
+
+ from_pool:
+ /* No resolution could be found, so let's allocate a new one */
+ res = pool_zalloc(resolv_resolution_pool);
+ if (res) {
+ res->resolvers = resolvers;
+ res->uuid = resolution_uuid;
+ res->status = RSLV_STATUS_NONE;
+ res->step = RSLV_STEP_NONE;
+ res->last_valid = now_ms;
+
+ LIST_INIT(&res->requesters);
+ res->response.answer_tree = EB_ROOT;
+
+ res->prefered_query_type = query_type;
+ res->query_type = query_type;
+ res->hostname_dn = *hostname_dn;
+ res->hostname_dn_len = hostname_dn_len;
+
+ ++resolution_uuid;
+
+ /* Move the resolution to the resolvers wait queue */
+ LIST_APPEND(&resolvers->resolutions.wait, &res->list);
+ }
+ return res;
+}
+
+/* deletes and frees all answer_items from the resolution's answer_list */
+static void resolv_purge_resolution_answer_records(struct resolv_resolution *resolution)
+{
+ struct eb32_node *eb32, *eb32_back;
+ struct resolv_answer_item *item;
+
+ for (eb32 = eb32_first(&resolution->response.answer_tree);
+ eb32 && (eb32_back = eb32_next(eb32), 1);
+ eb32 = eb32_back) {
+ item = eb32_entry(eb32, typeof(*item), link);
+ eb32_delete(&item->link);
+ pool_free(resolv_answer_item_pool, item->ar_item);
+ pool_free(resolv_answer_item_pool, item);
+ }
+}
+
+/* Releases a resolution from its requester(s) and move it back to the pool */
+static void resolv_free_resolution(struct resolv_resolution *resolution)
+{
+ struct resolv_requester *req, *reqback;
+
+ /* clean up configuration */
+ resolv_reset_resolution(resolution);
+ resolution->hostname_dn = NULL;
+ resolution->hostname_dn_len = 0;
+
+ list_for_each_entry_safe(req, reqback, &resolution->requesters, list) {
+ LIST_DEL_INIT(&req->list);
+ req->resolution = NULL;
+ }
+ resolv_purge_resolution_answer_records(resolution);
+
+ LIST_DEL_INIT(&resolution->list);
+ pool_free(resolv_resolution_pool, resolution);
+}
+
+/* If *<req> is not NULL, returns it, otherwise tries to allocate a requester
+ * and makes it owned by this obj_type, with the proposed callback and error
+ * callback. On success, *req is assigned the allocated requester. Returns
+ * NULL on allocation failure.
+ */
+static struct resolv_requester *
+resolv_get_requester(struct resolv_requester **req, enum obj_type *owner,
+ int (*cb)(struct resolv_requester *, struct dns_counters *),
+ int (*err_cb)(struct resolv_requester *, int))
+{
+ struct resolv_requester *tmp;
+
+ if (*req)
+ return *req;
+
+ tmp = pool_alloc(resolv_requester_pool);
+ if (!tmp)
+ goto end;
+
+ LIST_INIT(&tmp->list);
+ tmp->owner = owner;
+ tmp->resolution = NULL;
+ tmp->requester_cb = cb;
+ tmp->requester_error_cb = err_cb;
+ *req = tmp;
+ end:
+ return tmp;
+}
+
+/* Links a requester (a server or a resolv_srvrq) with a resolution. It returns 0
+ * on success, -1 otherwise.
+ */
+int resolv_link_resolution(void *requester, int requester_type, int requester_locked)
+{
+ struct resolv_resolution *res = NULL;
+ struct resolv_requester *req;
+ struct resolvers *resolvers;
+ struct server *srv = NULL;
+ struct resolv_srvrq *srvrq = NULL;
+ struct stream *stream = NULL;
+ char **hostname_dn;
+ int hostname_dn_len, query_type;
+
+ enter_resolver_code();
+ switch (requester_type) {
+ case OBJ_TYPE_SERVER:
+ srv = (struct server *)requester;
+
+ if (!requester_locked)
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+
+ req = resolv_get_requester(&srv->resolv_requester,
+ &srv->obj_type,
+ snr_resolution_cb,
+ snr_resolution_error_cb);
+
+ if (!requester_locked)
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+
+ if (!req)
+ goto err;
+
+ hostname_dn = &srv->hostname_dn;
+ hostname_dn_len = srv->hostname_dn_len;
+ resolvers = srv->resolvers;
+ query_type = ((srv->resolv_opts.family_prio == AF_INET)
+ ? DNS_RTYPE_A
+ : DNS_RTYPE_AAAA);
+ break;
+
+ case OBJ_TYPE_SRVRQ:
+ srvrq = (struct resolv_srvrq *)requester;
+
+ req = resolv_get_requester(&srvrq->requester,
+ &srvrq->obj_type,
+ snr_resolution_cb,
+ srvrq_resolution_error_cb);
+ if (!req)
+ goto err;
+
+ hostname_dn = &srvrq->hostname_dn;
+ hostname_dn_len = srvrq->hostname_dn_len;
+ resolvers = srvrq->resolvers;
+ query_type = DNS_RTYPE_SRV;
+ break;
+
+ case OBJ_TYPE_STREAM:
+ stream = (struct stream *)requester;
+
+ req = resolv_get_requester(&stream->resolv_ctx.requester,
+ &stream->obj_type,
+ act_resolution_cb,
+ act_resolution_error_cb);
+ if (!req)
+ goto err;
+
+ hostname_dn = &stream->resolv_ctx.hostname_dn;
+ hostname_dn_len = stream->resolv_ctx.hostname_dn_len;
+ resolvers = stream->resolv_ctx.parent->arg.resolv.resolvers;
+ query_type = ((stream->resolv_ctx.parent->arg.resolv.opts->family_prio == AF_INET)
+ ? DNS_RTYPE_A
+ : DNS_RTYPE_AAAA);
+ break;
+ default:
+ goto err;
+ }
+
+ /* Get a resolution from the resolvers' wait queue or pool */
+ if ((res = resolv_pick_resolution(resolvers, hostname_dn, hostname_dn_len, query_type)) == NULL)
+ goto err;
+
+ req->resolution = res;
+
+ LIST_APPEND(&res->requesters, &req->list);
+ leave_resolver_code();
+ return 0;
+
+ err:
+ if (res && LIST_ISEMPTY(&res->requesters))
+ resolv_free_resolution(res);
+ leave_resolver_code();
+ return -1;
+}
+
+/* This function removes all server/srvrq references on answer items. */
+void resolv_detach_from_resolution_answer_items(struct resolv_resolution *res, struct resolv_requester *req)
+{
+ struct eb32_node *eb32, *eb32_back;
+ struct resolv_answer_item *item;
+ struct server *srv, *srvback;
+ struct resolv_srvrq *srvrq;
+
+ enter_resolver_code();
+ if ((srv = objt_server(req->owner)) != NULL) {
+ LIST_DEL_INIT(&srv->ip_rec_item);
+ }
+ else if ((srvrq = objt_resolv_srvrq(req->owner)) != NULL) {
+ for (eb32 = eb32_first(&res->response.answer_tree);
+ eb32 && (eb32_back = eb32_next(eb32), 1);
+ eb32 = eb32_back) {
+ item = eb32_entry(eb32, typeof(*item), link);
+ if (item->type == DNS_RTYPE_SRV) {
+ list_for_each_entry_safe(srv, srvback, &item->attached_servers, srv_rec_item) {
+ if (srv->srvrq == srvrq)
+ resolv_srvrq_cleanup_srv(srv);
+ }
+ }
+ }
+ }
+ leave_resolver_code();
+}
+
+/* Removes a requester from a DNS resolution. It takes takes care of all the
+ * consequences. It also cleans up some parameters from the requester.
+ */
+static void _resolv_unlink_resolution(struct resolv_requester *requester)
+{
+ struct resolv_resolution *res;
+ struct resolv_requester *req;
+
+ /* Nothing to do */
+ if (!requester || !requester->resolution)
+ return;
+ res = requester->resolution;
+
+ /* Clean up the requester */
+ LIST_DEL_INIT(&requester->list);
+ requester->resolution = NULL;
+
+ /* remove ref from the resolution answer item list to the requester */
+ resolv_detach_from_resolution_answer_items(res, requester);
+
+ /* We need to find another requester linked on this resolution */
+ if (!LIST_ISEMPTY(&res->requesters))
+ req = LIST_NEXT(&res->requesters, struct resolv_requester *, list);
+ else {
+ abort_resolution(res);
+ return;
+ }
+
+ /* Move hostname_dn related pointers to the next requester */
+ switch (obj_type(req->owner)) {
+ case OBJ_TYPE_SERVER:
+ res->hostname_dn = __objt_server(req->owner)->hostname_dn;
+ res->hostname_dn_len = __objt_server(req->owner)->hostname_dn_len;
+ break;
+ case OBJ_TYPE_SRVRQ:
+ res->hostname_dn = __objt_resolv_srvrq(req->owner)->hostname_dn;
+ res->hostname_dn_len = __objt_resolv_srvrq(req->owner)->hostname_dn_len;
+ break;
+ case OBJ_TYPE_STREAM:
+ res->hostname_dn = __objt_stream(req->owner)->resolv_ctx.hostname_dn;
+ res->hostname_dn_len = __objt_stream(req->owner)->resolv_ctx.hostname_dn_len;
+ break;
+ default:
+ res->hostname_dn = NULL;
+ res->hostname_dn_len = 0;
+ break;
+ }
+}
+
+/* The public version of the function above that deals with the death row. */
+void resolv_unlink_resolution(struct resolv_requester *requester)
+{
+ enter_resolver_code();
+ _resolv_unlink_resolution(requester);
+ leave_resolver_code();
+}
+
+/* Called when a network IO is generated on a name server socket for an incoming
+ * packet. It performs the following actions:
+ * - check if the packet requires processing (not outdated resolution)
+ * - ensure the DNS packet received is valid and call requester's callback
+ * - call requester's error callback if invalid response
+ * - check the dn_name in the packet against the one sent
+ */
+static int resolv_process_responses(struct dns_nameserver *ns)
+{
+ struct dns_counters *tmpcounters;
+ struct resolvers *resolvers;
+ struct resolv_resolution *res;
+ unsigned char buf[DNS_MAX_UDP_MESSAGE + 1];
+ unsigned char *bufend;
+ int buflen, dns_resp;
+ int max_answer_records;
+ unsigned short query_id;
+ struct eb32_node *eb;
+ struct resolv_requester *req;
+ int keep_answer_items;
+
+ resolvers = ns->parent;
+ enter_resolver_code();
+ HA_SPIN_LOCK(DNS_LOCK, &resolvers->lock);
+
+ /* process all pending input messages */
+ while (1) {
+ /* read message received */
+ memset(buf, '\0', resolvers->accepted_payload_size + 1);
+ if ((buflen = dns_recv_nameserver(ns, (void *)buf, sizeof(buf))) <= 0) {
+ break;
+ }
+
+ /* message too big */
+ if (buflen > resolvers->accepted_payload_size) {
+ ns->counters->app.resolver.too_big++;
+ continue;
+ }
+
+ /* initializing variables */
+ bufend = buf + buflen; /* pointer to mark the end of the buffer */
+
+ /* read the query id from the packet (16 bits) */
+ if (buf + 2 > bufend) {
+ ns->counters->app.resolver.invalid++;
+ continue;
+ }
+ query_id = resolv_response_get_query_id(buf);
+
+ /* search the query_id in the pending resolution tree */
+ eb = eb32_lookup(&resolvers->query_ids, query_id);
+ if (eb == NULL) {
+ /* unknown query id means an outdated response and can be safely ignored */
+ ns->counters->app.resolver.outdated++;
+ continue;
+ }
+
+ /* known query id means a resolution in progress */
+ res = eb32_entry(eb, struct resolv_resolution, qid);
+ /* number of responses received */
+ res->nb_responses++;
+
+ max_answer_records = (resolvers->accepted_payload_size - DNS_HEADER_SIZE) / DNS_MIN_RECORD_SIZE;
+ dns_resp = resolv_validate_dns_response(buf, bufend, res, max_answer_records);
+
+ switch (dns_resp) {
+ case RSLV_RESP_VALID:
+ break;
+
+ case RSLV_RESP_INVALID:
+ case RSLV_RESP_QUERY_COUNT_ERROR:
+ case RSLV_RESP_WRONG_NAME:
+ res->status = RSLV_STATUS_INVALID;
+ ns->counters->app.resolver.invalid++;
+ break;
+
+ case RSLV_RESP_NX_DOMAIN:
+ res->status = RSLV_STATUS_NX;
+ ns->counters->app.resolver.nx++;
+ break;
+
+ case RSLV_RESP_REFUSED:
+ res->status = RSLV_STATUS_REFUSED;
+ ns->counters->app.resolver.refused++;
+ break;
+
+ case RSLV_RESP_ANCOUNT_ZERO:
+ res->status = RSLV_STATUS_OTHER;
+ ns->counters->app.resolver.any_err++;
+ break;
+
+ case RSLV_RESP_CNAME_ERROR:
+ res->status = RSLV_STATUS_OTHER;
+ ns->counters->app.resolver.cname_error++;
+ break;
+
+ case RSLV_RESP_TRUNCATED:
+ res->status = RSLV_STATUS_OTHER;
+ ns->counters->app.resolver.truncated++;
+ break;
+
+ case RSLV_RESP_NO_EXPECTED_RECORD:
+ case RSLV_RESP_ERROR:
+ case RSLV_RESP_INTERNAL:
+ res->status = RSLV_STATUS_OTHER;
+ ns->counters->app.resolver.other++;
+ break;
+ }
+
+ /* Wait all nameservers response to handle errors */
+ if (dns_resp != RSLV_RESP_VALID && res->nb_responses < res->nb_queries)
+ continue;
+
+ /* Process error codes */
+ if (dns_resp != RSLV_RESP_VALID) {
+ if (res->prefered_query_type != res->query_type) {
+ /* The fallback on the query type was already performed,
+ * so check the try counter. If it falls to 0, we can
+ * report an error. Else, wait the next attempt. */
+ if (!res->try)
+ goto report_res_error;
+ }
+ else {
+ /* Fallback from A to AAAA or the opposite and re-send
+ * the resolution immediately. try counter is not
+ * decremented. */
+ if (res->prefered_query_type == DNS_RTYPE_A) {
+ res->query_type = DNS_RTYPE_AAAA;
+ resolv_send_query(res);
+ }
+ else if (res->prefered_query_type == DNS_RTYPE_AAAA) {
+ res->query_type = DNS_RTYPE_A;
+ resolv_send_query(res);
+ }
+ }
+ continue;
+ }
+
+ /* So the resolution succeeded */
+ res->status = RSLV_STATUS_VALID;
+ res->last_valid = now_ms;
+ ns->counters->app.resolver.valid++;
+ goto report_res_success;
+
+ report_res_error:
+ keep_answer_items = 0;
+ list_for_each_entry(req, &res->requesters, list)
+ keep_answer_items |= req->requester_error_cb(req, dns_resp);
+ if (!keep_answer_items)
+ resolv_purge_resolution_answer_records(res);
+ resolv_reset_resolution(res);
+ LIST_DEL_INIT(&res->list);
+ LIST_APPEND(&resolvers->resolutions.wait, &res->list);
+ continue;
+
+ report_res_success:
+ /* Only the 1rst requester s managed by the server, others are
+ * from the cache */
+ tmpcounters = ns->counters;
+ list_for_each_entry(req, &res->requesters, list) {
+ struct server *s = objt_server(req->owner);
+
+ if (s)
+ HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
+ req->requester_cb(req, tmpcounters);
+ if (s)
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+ tmpcounters = NULL;
+ }
+
+ resolv_reset_resolution(res);
+ LIST_DEL_INIT(&res->list);
+ LIST_APPEND(&resolvers->resolutions.wait, &res->list);
+ continue;
+ }
+ resolv_update_resolvers_timeout(resolvers);
+ HA_SPIN_UNLOCK(DNS_LOCK, &resolvers->lock);
+ leave_resolver_code();
+ return buflen;
+}
+
+/* Processes DNS resolution. First, it checks the active list to detect expired
+ * resolutions and retry them if possible. Else a timeout is reported. Then, it
+ * checks the wait list to trigger new resolutions.
+ */
+static struct task *process_resolvers(struct task *t, void *context, unsigned int state)
+{
+ struct resolvers *resolvers = context;
+ struct resolv_resolution *res, *resback;
+ int exp;
+
+ enter_resolver_code();
+ HA_SPIN_LOCK(DNS_LOCK, &resolvers->lock);
+
+ /* Handle all expired resolutions from the active list. Elements that
+ * need to be removed will in fact be moved to the death_row. Other
+ * ones will be handled normally.
+ */
+
+ res = LIST_NEXT(&resolvers->resolutions.curr, struct resolv_resolution *, list);
+ while (&res->list != &resolvers->resolutions.curr) {
+ resback = LIST_NEXT(&res->list, struct resolv_resolution *, list);
+
+ if (LIST_ISEMPTY(&res->requesters)) {
+ abort_resolution(res);
+ res = resback;
+ continue;
+ }
+
+ /* When we find the first resolution in the future, then we can
+ * stop here */
+ exp = tick_add(res->last_query, resolvers->timeout.retry);
+ if (!tick_is_expired(exp, now_ms))
+ break;
+
+ /* If current resolution has been tried too many times and
+ * finishes in timeout we update its status and remove it from
+ * the list */
+ if (!res->try) {
+ struct resolv_requester *req;
+ int keep_answer_items = 0;
+
+ /* Notify the result to the requesters */
+ if (!res->nb_responses)
+ res->status = RSLV_STATUS_TIMEOUT;
+ list_for_each_entry(req, &res->requesters, list)
+ keep_answer_items |= req->requester_error_cb(req, res->status);
+ if (!keep_answer_items)
+ resolv_purge_resolution_answer_records(res);
+
+ /* Clean up resolution info and remove it from the
+ * current list */
+ resolv_reset_resolution(res);
+
+ /* subsequent entries might have been deleted here */
+ resback = LIST_NEXT(&res->list, struct resolv_resolution *, list);
+ LIST_DEL_INIT(&res->list);
+ LIST_APPEND(&resolvers->resolutions.wait, &res->list);
+ res = resback;
+ }
+ else {
+ /* Otherwise resend the DNS query and requeue the resolution */
+ if (!res->nb_responses || res->prefered_query_type != res->query_type) {
+ /* No response received (a real timeout) or fallback already done */
+ res->query_type = res->prefered_query_type;
+ res->try--;
+ }
+ else {
+ /* Fallback from A to AAAA or the opposite and re-send
+ * the resolution immediately. try counter is not
+ * decremented. */
+ if (res->prefered_query_type == DNS_RTYPE_A)
+ res->query_type = DNS_RTYPE_AAAA;
+ else if (res->prefered_query_type == DNS_RTYPE_AAAA)
+ res->query_type = DNS_RTYPE_A;
+ else
+ res->try--;
+ }
+ resolv_send_query(res);
+ resback = LIST_NEXT(&res->list, struct resolv_resolution *, list);
+ res = resback;
+ }
+ }
+
+ /* Handle all resolutions in the wait list */
+ list_for_each_entry_safe(res, resback, &resolvers->resolutions.wait, list) {
+
+ if (unlikely(stopping)) {
+ /* If haproxy is stopping, check if the resolution to know if it must be run or not.
+ * If at least a requester is a stream (because of a do-resolv action) or if there
+ * is a requester attached to a running proxy, the resolution is performed.
+ * Otherwise, it is skipped for now.
+ */
+ struct resolv_requester *req;
+ int must_run = 0;
+
+ list_for_each_entry(req, &res->requesters, list) {
+ struct proxy *px = NULL;
+
+ switch (obj_type(req->owner)) {
+ case OBJ_TYPE_SERVER:
+ px = __objt_server(req->owner)->proxy;
+ break;
+ case OBJ_TYPE_SRVRQ:
+ px = __objt_resolv_srvrq(req->owner)->proxy;
+ break;
+ case OBJ_TYPE_STREAM:
+ /* Always perform the resolution */
+ must_run = 1;
+ break;
+ default:
+ break;
+ }
+ /* Perform the resolution if the proxy is not stopped or disabled */
+ if (px && !(px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)))
+ must_run = 1;
+
+ if (must_run)
+ break;
+ }
+
+ if (!must_run) {
+ /* Skip the reolsution. reset it and wait for the next wakeup */
+ resolv_reset_resolution(res);
+ continue;
+ }
+ }
+
+ if (LIST_ISEMPTY(&res->requesters)) {
+ abort_resolution(res);
+ continue;
+ }
+
+ exp = tick_add(res->last_resolution, resolv_resolution_timeout(res));
+ if (tick_isset(res->last_resolution) && !tick_is_expired(exp, now_ms))
+ continue;
+
+ if (resolv_run_resolution(res) != 1) {
+ res->last_resolution = now_ms;
+ LIST_DEL_INIT(&res->list);
+ LIST_APPEND(&resolvers->resolutions.wait, &res->list);
+ }
+ }
+ resolv_update_resolvers_timeout(resolvers);
+ HA_SPIN_UNLOCK(DNS_LOCK, &resolvers->lock);
+
+ /* now we can purge all queued deletions */
+ leave_resolver_code();
+ return t;
+}
+
+
+/* destroy a resolvers */
+static void resolvers_destroy(struct resolvers *resolvers)
+{
+ struct dns_nameserver *ns, *nsback;
+ struct resolv_resolution *res, *resback;
+ struct resolv_requester *req, *reqback;
+
+ list_for_each_entry_safe(ns, nsback, &resolvers->nameservers, list) {
+ free(ns->id);
+ free((char *)ns->conf.file);
+ if (ns->dgram) {
+ if (ns->dgram->conn.t.sock.fd != -1) {
+ fd_delete(ns->dgram->conn.t.sock.fd);
+ close(ns->dgram->conn.t.sock.fd);
+ }
+ if (ns->dgram->ring_req)
+ ring_free(ns->dgram->ring_req);
+ free(ns->dgram);
+ }
+ if (ns->stream) {
+ if (ns->stream->ring_req)
+ ring_free(ns->stream->ring_req);
+ if (ns->stream->task_req)
+ task_destroy(ns->stream->task_req);
+ if (ns->stream->task_rsp)
+ task_destroy(ns->stream->task_rsp);
+ free(ns->stream);
+ }
+ LIST_DEL_INIT(&ns->list);
+ EXTRA_COUNTERS_FREE(ns->extra_counters);
+ free(ns);
+ }
+
+ list_for_each_entry_safe(res, resback, &resolvers->resolutions.curr, list) {
+ list_for_each_entry_safe(req, reqback, &res->requesters, list) {
+ LIST_DEL_INIT(&req->list);
+ pool_free(resolv_requester_pool, req);
+ }
+ resolv_free_resolution(res);
+ }
+
+ list_for_each_entry_safe(res, resback, &resolvers->resolutions.wait, list) {
+ list_for_each_entry_safe(req, reqback, &res->requesters, list) {
+ LIST_DEL_INIT(&req->list);
+ pool_free(resolv_requester_pool, req);
+ }
+ resolv_free_resolution(res);
+ }
+
+ free_proxy(resolvers->px);
+ free(resolvers->id);
+ free((char *)resolvers->conf.file);
+ task_destroy(resolvers->t);
+ LIST_DEL_INIT(&resolvers->list);
+ free(resolvers);
+}
+
+/* Release memory allocated by DNS */
+static void resolvers_deinit(void)
+{
+ struct resolvers *resolvers, *resolversback;
+ struct resolv_srvrq *srvrq, *srvrqback;
+
+ list_for_each_entry_safe(resolvers, resolversback, &sec_resolvers, list) {
+ resolvers_destroy(resolvers);
+ }
+
+ list_for_each_entry_safe(srvrq, srvrqback, &resolv_srvrq_list, list) {
+ free(srvrq->name);
+ free(srvrq->hostname_dn);
+ LIST_DEL_INIT(&srvrq->list);
+ free(srvrq);
+ }
+}
+
+/* Finalizes the DNS configuration by allocating required resources and checking
+ * live parameters.
+ * Returns 0 on success, 1 on error.
+ */
+static int resolvers_finalize_config(void)
+{
+ struct resolvers *resolvers;
+ struct proxy *px;
+ int err_code = 0;
+
+ enter_resolver_code();
+
+ /* allocate pool of resolution per resolvers */
+ list_for_each_entry(resolvers, &sec_resolvers, list) {
+ struct dns_nameserver *ns;
+ struct task *t;
+
+ /* Check if we can create the socket with nameservers info */
+ list_for_each_entry(ns, &resolvers->nameservers, list) {
+ int fd;
+
+ if (ns->dgram) {
+ /* Check nameserver info */
+ if ((fd = socket(ns->dgram->conn.addr.to.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
+ ha_alert("resolvers '%s': can't create socket for nameserver '%s'.\n",
+ resolvers->id, ns->id);
+ err_code |= (ERR_ALERT|ERR_ABORT);
+ continue;
+ }
+ if (connect(fd, (struct sockaddr*)&ns->dgram->conn.addr.to, get_addr_len(&ns->dgram->conn.addr.to)) == -1) {
+ if (!resolvers->conf.implicit) { /* emit a warning only if it was configured manually */
+ ha_warning("resolvers '%s': can't connect socket for nameserver '%s'.\n",
+ resolvers->id, ns->id);
+ }
+ close(fd);
+ err_code |= ERR_WARN;
+ continue;
+ }
+ close(fd);
+ }
+ }
+
+ /* Create the task associated to the resolvers section */
+ if ((t = task_new_anywhere()) == NULL) {
+ ha_alert("resolvers '%s' : out of memory.\n", resolvers->id);
+ err_code |= (ERR_ALERT|ERR_ABORT);
+ goto err;
+ }
+
+ /* Update task's parameters */
+ t->process = process_resolvers;
+ t->context = resolvers;
+ resolvers->t = t;
+ task_wakeup(t, TASK_WOKEN_INIT);
+ }
+
+ for (px = proxies_list; px; px = px->next) {
+ struct server *srv;
+
+ if (px->flags & PR_FL_DISABLED) {
+ /* must not run and will not work anyway since
+ * nothing in the proxy is initialized.
+ */
+ continue;
+ }
+
+ for (srv = px->srv; srv; srv = srv->next) {
+ struct resolvers *resolvers;
+
+ if (!srv->resolvers_id)
+ continue;
+
+ if ((resolvers = find_resolvers_by_id(srv->resolvers_id)) == NULL) {
+ ha_alert("%s '%s', server '%s': unable to find required resolvers '%s'\n",
+ proxy_type_str(px), px->id, srv->id, srv->resolvers_id);
+ err_code |= (ERR_ALERT|ERR_ABORT);
+ continue;
+ }
+ srv->resolvers = resolvers;
+ srv->srvrq_check = NULL;
+ if (srv->srvrq) {
+ if (!srv->srvrq->resolvers) {
+ srv->srvrq->resolvers = srv->resolvers;
+ if (resolv_link_resolution(srv->srvrq, OBJ_TYPE_SRVRQ, 0) == -1) {
+ ha_alert("%s '%s' : unable to set DNS resolution for server '%s'.\n",
+ proxy_type_str(px), px->id, srv->id);
+ err_code |= (ERR_ALERT|ERR_ABORT);
+ continue;
+ }
+ }
+
+ srv->srvrq_check = task_new_anywhere();
+ if (!srv->srvrq_check) {
+ ha_alert("%s '%s' : unable to create SRVRQ task for server '%s'.\n",
+ proxy_type_str(px), px->id, srv->id);
+ err_code |= (ERR_ALERT|ERR_ABORT);
+ goto err;
+ }
+ srv->srvrq_check->process = resolv_srvrq_expire_task;
+ srv->srvrq_check->context = srv;
+ srv->srvrq_check->expire = TICK_ETERNITY;
+ }
+ else if (resolv_link_resolution(srv, OBJ_TYPE_SERVER, 0) == -1) {
+ ha_alert("%s '%s', unable to set DNS resolution for server '%s'.\n",
+ proxy_type_str(px), px->id, srv->id);
+ err_code |= (ERR_ALERT|ERR_ABORT);
+ continue;
+ }
+
+ srv->flags |= SRV_F_NON_PURGEABLE;
+ }
+ }
+
+ if (err_code & (ERR_ALERT|ERR_ABORT))
+ goto err;
+
+ leave_resolver_code();
+ return 0;
+ err:
+ leave_resolver_code();
+ resolvers_deinit();
+ return 1;
+
+}
+
+static int stats_dump_resolv_to_buffer(struct stconn *sc,
+ struct dns_nameserver *ns,
+ struct field *stats, size_t stats_count,
+ struct list *stat_modules)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct channel *rep = sc_ic(sc);
+ struct stats_module *mod;
+ size_t idx = 0;
+
+ memset(stats, 0, sizeof(struct field) * stats_count);
+
+ list_for_each_entry(mod, stat_modules, list) {
+ struct counters_node *counters = EXTRA_COUNTERS_GET(ns->extra_counters, mod);
+
+ mod->fill_stats(counters, stats + idx);
+ idx += mod->stats_count;
+ }
+
+ if (!stats_dump_one_line(stats, idx, appctx))
+ return 0;
+
+ if (!stats_putchk(rep, NULL))
+ goto full;
+
+ return 1;
+
+ full:
+ sc_have_room(sc);
+ return 0;
+}
+
+/* Uses <appctx.ctx.stats.obj1> as a pointer to the current resolver and <obj2>
+ * as a pointer to the current nameserver.
+ */
+int stats_dump_resolvers(struct stconn *sc,
+ struct field *stats, size_t stats_count,
+ struct list *stat_modules)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct channel *rep = sc_ic(sc);
+ struct resolvers *resolver = ctx->obj1;
+ struct dns_nameserver *ns = ctx->obj2;
+
+ if (!resolver)
+ resolver = LIST_NEXT(&sec_resolvers, struct resolvers *, list);
+
+ /* dump resolvers */
+ list_for_each_entry_from(resolver, &sec_resolvers, list) {
+ ctx->obj1 = resolver;
+
+ ns = ctx->obj2 ?
+ ctx->obj2 :
+ LIST_NEXT(&resolver->nameservers, struct dns_nameserver *, list);
+
+ list_for_each_entry_from(ns, &resolver->nameservers, list) {
+ ctx->obj2 = ns;
+
+ if (buffer_almost_full(&rep->buf))
+ goto full;
+
+ if (!stats_dump_resolv_to_buffer(sc, ns,
+ stats, stats_count,
+ stat_modules)) {
+ return 0;
+ }
+ }
+
+ ctx->obj2 = NULL;
+ }
+
+ return 1;
+
+ full:
+ sc_need_room(sc);
+ return 0;
+}
+
+void resolv_stats_clear_counters(int clrall, struct list *stat_modules)
+{
+ struct resolvers *resolvers;
+ struct dns_nameserver *ns;
+ struct stats_module *mod;
+ void *counters;
+
+ list_for_each_entry(mod, stat_modules, list) {
+ if (!mod->clearable && !clrall)
+ continue;
+
+ list_for_each_entry(resolvers, &sec_resolvers, list) {
+ list_for_each_entry(ns, &resolvers->nameservers, list) {
+ counters = EXTRA_COUNTERS_GET(ns->extra_counters, mod);
+ memcpy(counters, mod->counters, mod->counters_size);
+ }
+ }
+ }
+
+}
+
+int resolv_allocate_counters(struct list *stat_modules)
+{
+ struct stats_module *mod;
+ struct resolvers *resolvers;
+ struct dns_nameserver *ns;
+
+ list_for_each_entry(resolvers, &sec_resolvers, list) {
+ list_for_each_entry(ns, &resolvers->nameservers, list) {
+ EXTRA_COUNTERS_REGISTER(&ns->extra_counters, COUNTERS_RSLV,
+ alloc_failed);
+
+ list_for_each_entry(mod, stat_modules, list) {
+ EXTRA_COUNTERS_ADD(mod,
+ ns->extra_counters,
+ mod->counters,
+ mod->counters_size);
+ }
+
+ EXTRA_COUNTERS_ALLOC(ns->extra_counters, alloc_failed);
+
+ list_for_each_entry(mod, stat_modules, list) {
+ memcpy(ns->extra_counters->data + mod->counters_off[ns->extra_counters->type],
+ mod->counters, mod->counters_size);
+
+ /* Store the ns counters pointer */
+ if (strcmp(mod->name, "resolvers") == 0) {
+ ns->counters = (struct dns_counters *)ns->extra_counters->data + mod->counters_off[COUNTERS_RSLV];
+ ns->counters->id = ns->id;
+ ns->counters->pid = resolvers->id;
+ }
+ }
+ }
+ }
+
+ return 1;
+
+alloc_failed:
+ return 0;
+}
+
+/* if an arg is found, it sets the optional resolvers section pointer into a
+ * show_resolvers_ctx struct pointed to by svcctx, or NULL when dumping all.
+ */
+static int cli_parse_stat_resolvers(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_resolvers_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct resolvers *presolvers;
+
+ if (*args[2]) {
+ list_for_each_entry(presolvers, &sec_resolvers, list) {
+ if (strcmp(presolvers->id, args[2]) == 0) {
+ ctx->forced_section = presolvers;
+ break;
+ }
+ }
+ if (ctx->forced_section == NULL)
+ return cli_err(appctx, "Can't find that resolvers section\n");
+ }
+ return 0;
+}
+
+/* Dumps counters from all resolvers section and associated name servers. It
+ * returns 0 if the output buffer is full and it needs to be called again,
+ * otherwise non-zero. It may limit itself to the resolver pointed to by the
+ * <resolvers> field of struct show_resolvers_ctx pointed to by <svcctx> if
+ * it's not null.
+ */
+static int cli_io_handler_dump_resolvers_to_buffer(struct appctx *appctx)
+{
+ struct show_resolvers_ctx *ctx = appctx->svcctx;
+ struct resolvers *resolvers = ctx->resolvers;
+ struct dns_nameserver *ns;
+
+ chunk_reset(&trash);
+
+ if (LIST_ISEMPTY(&sec_resolvers)) {
+ if (applet_putstr(appctx, "No resolvers found\n") == -1)
+ goto full;
+ }
+ else {
+ if (!resolvers)
+ resolvers = LIST_ELEM(sec_resolvers.n, typeof(resolvers), list);
+
+ list_for_each_entry_from(resolvers, &sec_resolvers, list) {
+ if (ctx->forced_section != NULL && ctx->forced_section != resolvers)
+ continue;
+
+ ctx->resolvers = resolvers;
+ ns = ctx->ns;
+
+ if (!ns) {
+ chunk_printf(&trash, "Resolvers section %s\n", resolvers->id);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto full;
+
+ ns = LIST_ELEM(resolvers->nameservers.n, typeof(ns), list);
+ ctx->ns = ns;
+ }
+
+ list_for_each_entry_from(ns, &resolvers->nameservers, list) {
+ chunk_reset(&trash);
+ chunk_appendf(&trash, " nameserver %s:\n", ns->id);
+ chunk_appendf(&trash, " sent: %lld\n", ns->counters->sent);
+ chunk_appendf(&trash, " snd_error: %lld\n", ns->counters->snd_error);
+ chunk_appendf(&trash, " valid: %lld\n", ns->counters->app.resolver.valid);
+ chunk_appendf(&trash, " update: %lld\n", ns->counters->app.resolver.update);
+ chunk_appendf(&trash, " cname: %lld\n", ns->counters->app.resolver.cname);
+ chunk_appendf(&trash, " cname_error: %lld\n", ns->counters->app.resolver.cname_error);
+ chunk_appendf(&trash, " any_err: %lld\n", ns->counters->app.resolver.any_err);
+ chunk_appendf(&trash, " nx: %lld\n", ns->counters->app.resolver.nx);
+ chunk_appendf(&trash, " timeout: %lld\n", ns->counters->app.resolver.timeout);
+ chunk_appendf(&trash, " refused: %lld\n", ns->counters->app.resolver.refused);
+ chunk_appendf(&trash, " other: %lld\n", ns->counters->app.resolver.other);
+ chunk_appendf(&trash, " invalid: %lld\n", ns->counters->app.resolver.invalid);
+ chunk_appendf(&trash, " too_big: %lld\n", ns->counters->app.resolver.too_big);
+ chunk_appendf(&trash, " truncated: %lld\n", ns->counters->app.resolver.truncated);
+ chunk_appendf(&trash, " outdated: %lld\n", ns->counters->app.resolver.outdated);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto full;
+ ctx->ns = ns;
+ }
+
+ ctx->ns = NULL;
+
+ /* was this the only section to dump ? */
+ if (ctx->forced_section)
+ break;
+ }
+ }
+
+ /* done! */
+ return 1;
+ full:
+ /* the output buffer is full, retry later */
+ return 0;
+}
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ }, {
+ { { "show", "resolvers", NULL }, "show resolvers [id] : dumps counters from all resolvers section and associated name servers",
+ cli_parse_stat_resolvers, cli_io_handler_dump_resolvers_to_buffer },
+ {{},}
+ }
+};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+/*
+ * Prepare <rule> for hostname resolution.
+ * Returns -1 in case of any allocation failure, 0 if not.
+ * On error, a global failure counter is also incremented.
+ */
+static int action_prepare_for_resolution(struct stream *stream, const char *hostname, int hostname_len)
+{
+ char *hostname_dn;
+ int hostname_dn_len;
+ struct buffer *tmp = get_trash_chunk();
+
+ if (!hostname)
+ return 0;
+
+ hostname_dn = tmp->area;
+ hostname_dn_len = resolv_str_to_dn_label(hostname, hostname_len,
+ hostname_dn, tmp->size);
+ if (hostname_dn_len == -1)
+ goto err;
+
+
+ stream->resolv_ctx.hostname_dn = strdup(hostname_dn);
+ stream->resolv_ctx.hostname_dn_len = hostname_dn_len;
+ if (!stream->resolv_ctx.hostname_dn)
+ goto err;
+
+ return 0;
+
+ err:
+ ha_free(&stream->resolv_ctx.hostname_dn);
+ resolv_failed_resolutions += 1;
+ return -1;
+}
+
+
+/*
+ * Execute the "do-resolution" action. May be called from {tcp,http}request.
+ */
+enum act_return resolv_action_do_resolve(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct resolv_resolution *resolution;
+ struct sample *smp;
+ struct resolv_requester *req;
+ struct resolvers *resolvers;
+ struct resolv_resolution *res;
+ int exp, locked = 0;
+ enum act_return ret = ACT_RET_CONT;
+
+ resolvers = rule->arg.resolv.resolvers;
+
+ enter_resolver_code();
+
+ /* we have a response to our DNS resolution */
+ use_cache:
+ if (s->resolv_ctx.requester && s->resolv_ctx.requester->resolution != NULL) {
+ resolution = s->resolv_ctx.requester->resolution;
+ if (!locked) {
+ HA_SPIN_LOCK(DNS_LOCK, &resolvers->lock);
+ locked = 1;
+ }
+
+ if (resolution->step == RSLV_STEP_RUNNING)
+ goto yield;
+ if (resolution->step == RSLV_STEP_NONE) {
+ /* We update the variable only if we have a valid
+ * response. If the response was not received yet, we
+ * must yield.
+ */
+ if (resolution->status == RSLV_STATUS_NONE)
+ goto yield;
+ if (resolution->status == RSLV_STATUS_VALID) {
+ struct sample smp;
+ short ip_sin_family = 0;
+ void *ip = NULL;
+
+ resolv_get_ip_from_response(&resolution->response, rule->arg.resolv.opts, NULL,
+ 0, &ip, &ip_sin_family, NULL);
+
+ switch (ip_sin_family) {
+ case AF_INET:
+ smp.data.type = SMP_T_IPV4;
+ memcpy(&smp.data.u.ipv4, ip, 4);
+ break;
+ case AF_INET6:
+ smp.data.type = SMP_T_IPV6;
+ memcpy(&smp.data.u.ipv6, ip, 16);
+ break;
+ default:
+ ip = NULL;
+ }
+
+ if (ip) {
+ smp.px = px;
+ smp.sess = sess;
+ smp.strm = s;
+
+ vars_set_by_name(rule->arg.resolv.varname, strlen(rule->arg.resolv.varname), &smp);
+ }
+ }
+ }
+
+ goto release_requester;
+ }
+
+ /* need to configure and start a new DNS resolution */
+ smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.resolv.expr, SMP_T_STR);
+ if (smp == NULL)
+ goto end;
+
+ if (action_prepare_for_resolution(s, smp->data.u.str.area, smp->data.u.str.data) == -1)
+ goto end; /* on error, ignore the action */
+
+ s->resolv_ctx.parent = rule;
+
+ HA_SPIN_LOCK(DNS_LOCK, &resolvers->lock);
+ locked = 1;
+
+ resolv_link_resolution(s, OBJ_TYPE_STREAM, 0);
+
+ /* Check if there is a fresh enough response in the cache of our associated resolution */
+ req = s->resolv_ctx.requester;
+ if (!req || !req->resolution)
+ goto release_requester; /* on error, ignore the action */
+ res = req->resolution;
+
+ exp = tick_add(res->last_resolution, resolvers->hold.valid);
+ if (resolvers->t && res->status == RSLV_STATUS_VALID && tick_isset(res->last_resolution)
+ && !tick_is_expired(exp, now_ms)) {
+ goto use_cache;
+ }
+
+ resolv_trigger_resolution(s->resolv_ctx.requester);
+
+ yield:
+ if (flags & ACT_OPT_FINAL)
+ goto release_requester;
+ ret = ACT_RET_YIELD;
+
+ end:
+ leave_resolver_code();
+ if (locked)
+ HA_SPIN_UNLOCK(DNS_LOCK, &resolvers->lock);
+ return ret;
+
+ release_requester:
+ ha_free(&s->resolv_ctx.hostname_dn);
+ s->resolv_ctx.hostname_dn_len = 0;
+ if (s->resolv_ctx.requester) {
+ _resolv_unlink_resolution(s->resolv_ctx.requester);
+ pool_free(resolv_requester_pool, s->resolv_ctx.requester);
+ s->resolv_ctx.requester = NULL;
+ }
+ goto end;
+}
+
+static void release_resolv_action(struct act_rule *rule)
+{
+ release_sample_expr(rule->arg.resolv.expr);
+ free(rule->arg.resolv.varname);
+ free(rule->arg.resolv.resolvers_id);
+ free(rule->arg.resolv.opts);
+}
+
+
+/* parse "do-resolve" action
+ * This action takes the following arguments:
+ * do-resolve(<varName>,<resolversSectionName>,<resolvePrefer>) <expr>
+ *
+ * - <varName> is the variable name where the result of the DNS resolution will be stored
+ * (mandatory)
+ * - <resolversSectionName> is the name of the resolvers section to use to perform the resolution
+ * (mandatory)
+ * - <resolvePrefer> can be either 'ipv4' or 'ipv6' and is the IP family we would like to resolve first
+ * (optional), defaults to ipv6
+ * - <expr> is an HAProxy expression used to fetch the name to be resolved
+ */
+enum act_parse_ret resolv_parse_do_resolve(const char **args, int *orig_arg, struct proxy *px, struct act_rule *rule, char **err)
+{
+ int cur_arg;
+ struct sample_expr *expr;
+ unsigned int where;
+ const char *beg, *end;
+
+ /* orig_arg points to the first argument, but we need to analyse the command itself first */
+ cur_arg = *orig_arg - 1;
+
+ /* locate varName, which is mandatory */
+ beg = strchr(args[cur_arg], '(');
+ if (beg == NULL)
+ goto do_resolve_parse_error;
+ beg = beg + 1; /* beg should points to the first character after opening parenthesis '(' */
+ end = strchr(beg, ',');
+ if (end == NULL)
+ goto do_resolve_parse_error;
+ rule->arg.resolv.varname = my_strndup(beg, end - beg);
+ if (rule->arg.resolv.varname == NULL)
+ goto do_resolve_parse_error;
+
+
+ /* locate resolversSectionName, which is mandatory.
+ * Since next parameters are optional, the delimiter may be comma ','
+ * or closing parenthesis ')'
+ */
+ beg = end + 1;
+ end = strchr(beg, ',');
+ if (end == NULL)
+ end = strchr(beg, ')');
+ if (end == NULL)
+ goto do_resolve_parse_error;
+ rule->arg.resolv.resolvers_id = my_strndup(beg, end - beg);
+ if (rule->arg.resolv.resolvers_id == NULL)
+ goto do_resolve_parse_error;
+
+
+ rule->arg.resolv.opts = calloc(1, sizeof(*rule->arg.resolv.opts));
+ if (rule->arg.resolv.opts == NULL)
+ goto do_resolve_parse_error;
+
+ /* Default priority is ipv6 */
+ rule->arg.resolv.opts->family_prio = AF_INET6;
+
+ /* optional arguments accepted for now:
+ * ipv4 or ipv6
+ */
+ while (*end != ')') {
+ beg = end + 1;
+ end = strchr(beg, ',');
+ if (end == NULL)
+ end = strchr(beg, ')');
+ if (end == NULL)
+ goto do_resolve_parse_error;
+
+ if (strncmp(beg, "ipv4", end - beg) == 0) {
+ rule->arg.resolv.opts->family_prio = AF_INET;
+ }
+ else if (strncmp(beg, "ipv6", end - beg) == 0) {
+ rule->arg.resolv.opts->family_prio = AF_INET6;
+ }
+ else {
+ goto do_resolve_parse_error;
+ }
+ }
+
+ cur_arg = cur_arg + 1;
+
+ expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args, NULL);
+ if (!expr)
+ goto do_resolve_parse_error;
+
+
+ where = 0;
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+
+ if (!(expr->fetch->val & where)) {
+ memprintf(err,
+ "fetch method '%s' extracts information from '%s', none of which is available here",
+ args[cur_arg-1], sample_src_names(expr->fetch->use));
+ free(expr);
+ return ACT_RET_PRS_ERR;
+ }
+ rule->arg.resolv.expr = expr;
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = resolv_action_do_resolve;
+ *orig_arg = cur_arg;
+
+ rule->check_ptr = check_action_do_resolve;
+ rule->release_ptr = release_resolv_action;
+
+ return ACT_RET_PRS_OK;
+
+ do_resolve_parse_error:
+ ha_free(&rule->arg.resolv.varname);
+ ha_free(&rule->arg.resolv.resolvers_id);
+ memprintf(err, "Can't parse '%s'. Expects 'do-resolve(<varname>,<resolvers>[,<options>]) <expr>'. Available options are 'ipv4' and 'ipv6'",
+ args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+}
+
+static struct action_kw_list http_req_kws = { { }, {
+ { "do-resolve", resolv_parse_do_resolve, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_kws);
+
+static struct action_kw_list tcp_req_cont_actions = {ILH, {
+ { "do-resolve", resolv_parse_do_resolve, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_req_cont_actions);
+
+/* Check an "http-request do-resolve" action.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+int check_action_do_resolve(struct act_rule *rule, struct proxy *px, char **err)
+{
+ struct resolvers *resolvers = NULL;
+
+ if (rule->arg.resolv.resolvers_id == NULL) {
+ memprintf(err,"Proxy '%s': %s", px->id, "do-resolve action without resolvers");
+ return 0;
+ }
+
+ resolvers = find_resolvers_by_id(rule->arg.resolv.resolvers_id);
+ if (resolvers == NULL) {
+ memprintf(err,"Can't find resolvers section '%s' for do-resolve action", rule->arg.resolv.resolvers_id);
+ return 0;
+ }
+ rule->arg.resolv.resolvers = resolvers;
+
+ return 1;
+}
+
+void resolvers_setup_proxy(struct proxy *px)
+{
+ px->last_change = now.tv_sec;
+ px->cap = PR_CAP_FE | PR_CAP_BE;
+ px->maxconn = 0;
+ px->conn_retries = 1;
+ px->timeout.server = TICK_ETERNITY;
+ px->timeout.client = TICK_ETERNITY;
+ px->timeout.connect = TICK_ETERNITY;
+ px->accept = NULL;
+ px->options2 |= PR_O2_INDEPSTR | PR_O2_SMARTCON;
+}
+
+static int parse_resolve_conf(char **errmsg, char **warnmsg)
+{
+ struct dns_nameserver *newnameserver = NULL;
+ const char *whitespace = "\r\n\t ";
+ char *resolv_line = NULL;
+ int resolv_linenum = 0;
+ FILE *f = NULL;
+ char *address = NULL;
+ struct sockaddr_storage *sk = NULL;
+ struct protocol *proto;
+ int duplicate_name = 0;
+ int err_code = 0;
+
+ if ((resolv_line = malloc(sizeof(*resolv_line) * LINESIZE)) == NULL) {
+ memprintf(errmsg, "out of memory.\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto resolv_out;
+ }
+
+ if ((f = fopen("/etc/resolv.conf", "r")) == NULL) {
+ if (errmsg)
+ memprintf(errmsg, "failed to open /etc/resolv.conf.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto resolv_out;
+ }
+
+ sk = calloc(1, sizeof(*sk));
+ if (sk == NULL) {
+ if (errmsg)
+ memprintf(errmsg, "parsing [/etc/resolv.conf:%d] : out of memory.", resolv_linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto resolv_out;
+ }
+
+ while (fgets(resolv_line, LINESIZE, f) != NULL) {
+ resolv_linenum++;
+ if (strncmp(resolv_line, "nameserver", 10) != 0)
+ continue;
+
+ address = strtok(resolv_line + 10, whitespace);
+ if (address == resolv_line + 10)
+ continue;
+
+ if (address == NULL) {
+ if (warnmsg)
+ memprintf(warnmsg, "%sparsing [/etc/resolv.conf:%d] : nameserver line is missing address.\n",
+ *warnmsg ? *warnmsg : "", resolv_linenum);
+ err_code |= ERR_WARN;
+ continue;
+ }
+
+ duplicate_name = 0;
+ list_for_each_entry(newnameserver, &curr_resolvers->nameservers, list) {
+ if (strcmp(newnameserver->id, address) == 0) {
+ if (warnmsg)
+ memprintf(warnmsg, "%sParsing [/etc/resolv.conf:%d] : generated name for /etc/resolv.conf nameserver '%s' conflicts with another nameserver (declared at %s:%d), it appears to be a duplicate and will be excluded.\n",
+ *warnmsg ? *warnmsg : "", resolv_linenum, address, newnameserver->conf.file, newnameserver->conf.line);
+ err_code |= ERR_WARN;
+ duplicate_name = 1;
+ }
+ }
+
+ if (duplicate_name)
+ continue;
+
+ memset(sk, 0, sizeof(*sk));
+ if (!str2ip2(address, sk, 1)) {
+ if (warnmsg)
+ memprintf(warnmsg, "%sparsing [/etc/resolv.conf:%d] : address '%s' could not be recognized, nameserver will be excluded.\n",
+ *warnmsg ? *warnmsg : "", resolv_linenum, address);
+ err_code |= ERR_WARN;
+ continue;
+ }
+
+ set_host_port(sk, 53);
+
+ proto = protocol_lookup(sk->ss_family, PROTO_TYPE_STREAM, 0);
+ if (!proto || !proto->connect) {
+ if (warnmsg)
+ memprintf(warnmsg, "%sparsing [/etc/resolv.conf:%d] : '%s' : connect() not supported for this address family.\n",
+ *warnmsg ? *warnmsg : "", resolv_linenum, address);
+ err_code |= ERR_WARN;
+ continue;
+ }
+
+ if ((newnameserver = calloc(1, sizeof(*newnameserver))) == NULL) {
+ if (errmsg)
+ memprintf(errmsg, "parsing [/etc/resolv.conf:%d] : out of memory.", resolv_linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto resolv_out;
+ }
+
+ if (dns_dgram_init(newnameserver, sk) < 0) {
+ if (errmsg)
+ memprintf(errmsg, "parsing [/etc/resolv.conf:%d] : out of memory.", resolv_linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(newnameserver);
+ goto resolv_out;
+ }
+
+ newnameserver->conf.file = strdup("/etc/resolv.conf");
+ if (newnameserver->conf.file == NULL) {
+ if (errmsg)
+ memprintf(errmsg, "parsing [/etc/resolv.conf:%d] : out of memory.", resolv_linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(newnameserver);
+ goto resolv_out;
+ }
+
+ newnameserver->id = strdup(address);
+ if (newnameserver->id == NULL) {
+ if (errmsg)
+ memprintf(errmsg, "parsing [/etc/resolv.conf:%d] : out of memory.", resolv_linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free((char *)newnameserver->conf.file);
+ free(newnameserver);
+ goto resolv_out;
+ }
+
+ newnameserver->parent = curr_resolvers;
+ newnameserver->process_responses = resolv_process_responses;
+ newnameserver->conf.line = resolv_linenum;
+ LIST_APPEND(&curr_resolvers->nameservers, &newnameserver->list);
+ }
+
+resolv_out:
+ free(sk);
+ free(resolv_line);
+ if (f != NULL)
+ fclose(f);
+
+ return err_code;
+}
+
+static int resolvers_new(struct resolvers **resolvers, const char *id, const char *file, int linenum)
+{
+ struct resolvers *r = NULL;
+ struct proxy *p = NULL;
+ int err_code = 0;
+
+ if ((r = calloc(1, sizeof(*r))) == NULL) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* allocate new proxy to tcp servers */
+ p = calloc(1, sizeof *p);
+ if (!p) {
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ init_new_proxy(p);
+ resolvers_setup_proxy(p);
+ p->parent = r;
+ p->id = strdup(id);
+ p->conf.args.file = p->conf.file = strdup(file);
+ p->conf.args.line = p->conf.line = linenum;
+ r->px = p;
+
+ /* default values */
+ LIST_APPEND(&sec_resolvers, &r->list);
+ r->conf.file = strdup(file);
+ r->conf.line = linenum;
+ r->id = strdup(id);
+ r->query_ids = EB_ROOT;
+ /* default maximum response size */
+ r->accepted_payload_size = 512;
+ /* default hold period for nx, other, refuse and timeout is 30s */
+ r->hold.nx = 30000;
+ r->hold.other = 30000;
+ r->hold.refused = 30000;
+ r->hold.timeout = 30000;
+ r->hold.obsolete = 0;
+ /* default hold period for valid is 10s */
+ r->hold.valid = 10000;
+ r->timeout.resolve = 1000;
+ r->timeout.retry = 1000;
+ r->resolve_retries = 3;
+ LIST_INIT(&r->nameservers);
+ LIST_INIT(&r->resolutions.curr);
+ LIST_INIT(&r->resolutions.wait);
+ HA_SPIN_INIT(&r->lock);
+
+ *resolvers = r;
+
+out:
+ if (err_code & (ERR_FATAL|ERR_ABORT)) {
+ ha_free(&r);
+ ha_free(&p);
+ }
+
+ return err_code;
+}
+
+
+/*
+ * Parse a <resolvers> section.
+ * Returns the error code, 0 if OK, or any combination of :
+ * - ERR_ABORT: must abort ASAP
+ * - ERR_FATAL: we can continue parsing but not start the service
+ * - ERR_WARN: a warning has been emitted
+ * - ERR_ALERT: an alert has been emitted
+ * Only the two first ones can stop processing, the two others are just
+ * indicators.
+ */
+int cfg_parse_resolvers(const char *file, int linenum, char **args, int kwm)
+{
+ const char *err;
+ int err_code = 0;
+ char *errmsg = NULL;
+ char *warnmsg = NULL;
+
+ if (strcmp(args[0], "resolvers") == 0) { /* new resolvers section */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for resolvers section.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ list_for_each_entry(curr_resolvers, &sec_resolvers, list) {
+ /* Error if two resolvers owns the same name */
+ if (strcmp(curr_resolvers->id, args[1]) == 0) {
+ ha_alert("Parsing [%s:%d]: resolvers '%s' has same name as another resolvers (declared at %s:%d).\n",
+ file, linenum, args[1], curr_resolvers->conf.file, curr_resolvers->conf.line);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ }
+
+ err_code |= resolvers_new(&curr_resolvers, args[1], file, linenum);
+ if (err_code & ERR_ALERT) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ goto out;
+ }
+
+ }
+ else if (strcmp(args[0], "nameserver") == 0) { /* nameserver definition */
+ struct dns_nameserver *newnameserver = NULL;
+ struct sockaddr_storage *sk;
+ int port1, port2;
+ struct protocol *proto;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d] : '%s' expects <name> and <addr>[:<port>] as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in server name '%s'.\n",
+ file, linenum, *err, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ list_for_each_entry(newnameserver, &curr_resolvers->nameservers, list) {
+ /* Error if two resolvers owns the same name */
+ if (strcmp(newnameserver->id, args[1]) == 0) {
+ ha_alert("Parsing [%s:%d]: nameserver '%s' has same name as another nameserver (declared at %s:%d).\n",
+ file, linenum, args[1], newnameserver->conf.file, newnameserver->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ sk = str2sa_range(args[2], NULL, &port1, &port2, NULL, &proto,
+ &errmsg, NULL, NULL, PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_MAND | PA_O_DGRAM | PA_O_STREAM | PA_O_DEFAULT_DGRAM);
+ if (!sk) {
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n", file, linenum, args[0], args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((newnameserver = calloc(1, sizeof(*newnameserver))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (proto && proto->xprt_type == PROTO_TYPE_STREAM) {
+ err_code |= parse_server(file, linenum, args, curr_resolvers->px, NULL,
+ SRV_PARSE_PARSE_ADDR|SRV_PARSE_INITIAL_RESOLVE);
+ if (err_code & (ERR_FATAL|ERR_ABORT)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ if (dns_stream_init(newnameserver, curr_resolvers->px->srv) < 0) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT|ERR_ABORT;
+ goto out;
+ }
+ }
+ else if (dns_dgram_init(newnameserver, sk) < 0) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if ((newnameserver->conf.file = strdup(file)) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if ((newnameserver->id = strdup(args[1])) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ newnameserver->parent = curr_resolvers;
+ newnameserver->process_responses = resolv_process_responses;
+ newnameserver->conf.line = linenum;
+ /* the nameservers are linked backward first */
+ LIST_APPEND(&curr_resolvers->nameservers, &newnameserver->list);
+ }
+ else if (strcmp(args[0], "parse-resolv-conf") == 0) {
+ err_code |= parse_resolve_conf(&errmsg, &warnmsg);
+ if (err_code & ERR_WARN) {
+ indent_msg(&warnmsg, 8);
+ ha_warning("parsing [%s:%d]: %s\n", file, linenum, warnmsg);
+ ha_free(&warnmsg);
+ }
+ if (err_code & ERR_ALERT) {
+ indent_msg(&errmsg, 8);
+ ha_alert("parsing [%s:%d]: %s\n", file, linenum, errmsg);
+ ha_free(&errmsg);
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "hold") == 0) { /* hold periods */
+ const char *res;
+ unsigned int time;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an <event> and a <time> as arguments.\n",
+ file, linenum, args[0]);
+ ha_alert("<event> can be either 'valid', 'nx', 'refused', 'timeout', or 'other'\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ res = parse_time_err(args[2], &time, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s>, maximum value is 2147483647 ms (~24.8 days).\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to <%s>.\n",
+ file, linenum, *res, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (strcmp(args[1], "nx") == 0)
+ curr_resolvers->hold.nx = time;
+ else if (strcmp(args[1], "other") == 0)
+ curr_resolvers->hold.other = time;
+ else if (strcmp(args[1], "refused") == 0)
+ curr_resolvers->hold.refused = time;
+ else if (strcmp(args[1], "timeout") == 0)
+ curr_resolvers->hold.timeout = time;
+ else if (strcmp(args[1], "valid") == 0)
+ curr_resolvers->hold.valid = time;
+ else if (strcmp(args[1], "obsolete") == 0)
+ curr_resolvers->hold.obsolete = time;
+ else {
+ ha_alert("parsing [%s:%d] : '%s' unknown <event>: '%s', expects either 'nx', 'timeout', 'valid', 'obsolete' or 'other'.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ }
+ else if (strcmp(args[0], "accepted_payload_size") == 0) {
+ int i = 0;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects <nb> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ i = atoi(args[1]);
+ if (i < DNS_HEADER_SIZE || i > DNS_MAX_UDP_MESSAGE) {
+ ha_alert("parsing [%s:%d] : '%s' must be between %d and %d inclusive (was %s).\n",
+ file, linenum, args[0], DNS_HEADER_SIZE, DNS_MAX_UDP_MESSAGE, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curr_resolvers->accepted_payload_size = i;
+ }
+ else if (strcmp(args[0], "resolution_pool_size") == 0) {
+ ha_alert("parsing [%s:%d] : '%s' directive is not supported anymore (it never appeared in a stable release).\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "resolve_retries") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects <nb> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curr_resolvers->resolve_retries = atoi(args[1]);
+ }
+ else if (strcmp(args[0], "timeout") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects 'retry' or 'resolve' and <time> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[1], "retry") == 0 ||
+ strcmp(args[1], "resolve") == 0) {
+ const char *res;
+ unsigned int tout;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d] : '%s %s' expects <time> as argument.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ res = parse_time_err(args[2], &tout, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s %s>, maximum value is 2147483647 ms (~24.8 days).\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s %s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to <%s %s>.\n",
+ file, linenum, *res, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (args[1][2] == 't')
+ curr_resolvers->timeout.retry = tout;
+ else
+ curr_resolvers->timeout.resolve = tout;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'retry' or 'resolve' and <time> as arguments got '%s'.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (*args[0] != 0) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], cursection);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+out:
+ free(errmsg);
+ free(warnmsg);
+ return err_code;
+}
+
+/* try to create a "default" resolvers section which uses "/etc/resolv.conf"
+ *
+ * This function is opportunistic and does not try to display errors or warnings.
+ */
+int resolvers_create_default()
+{
+ int err_code = 0;
+
+ if (global.mode & MODE_MWORKER_WAIT) /* does not create the section if in wait mode */
+ return 0;
+
+ /* if the section already exists, do nothing */
+ if (find_resolvers_by_id("default"))
+ return 0;
+
+ curr_resolvers = NULL;
+ err_code |= resolvers_new(&curr_resolvers, "default", "<internal>", 0);
+ if (err_code & ERR_CODE)
+ goto err;
+
+ curr_resolvers->conf.implicit = 1;
+
+ err_code |= parse_resolve_conf(NULL, NULL);
+ if (err_code & ERR_CODE)
+ goto err;
+ /* check if there was any nameserver in the resolvconf file */
+ if (LIST_ISEMPTY(&curr_resolvers->nameservers)) {
+ err_code |= ERR_FATAL;
+ goto err;
+ }
+
+err:
+ if (err_code & ERR_CODE) {
+ resolvers_destroy(curr_resolvers);
+ curr_resolvers = NULL;
+ }
+
+ /* we never return an error there, we only try to create this section
+ * if that's possible */
+ return 0;
+}
+
+int cfg_post_parse_resolvers()
+{
+ int err_code = 0;
+ struct server *srv;
+
+ if (curr_resolvers) {
+
+ /* prepare forward server descriptors */
+ if (curr_resolvers->px) {
+ srv = curr_resolvers->px->srv;
+ while (srv) {
+ /* init ssl if needed */
+ if (srv->use_ssl == 1 && xprt_get(XPRT_SSL) && xprt_get(XPRT_SSL)->prepare_srv) {
+ if (xprt_get(XPRT_SSL)->prepare_srv(srv)) {
+ ha_alert("unable to prepare SSL for server '%s' in resolvers section '%s'.\n", srv->id, curr_resolvers->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ break;
+ }
+ }
+ srv = srv->next;
+ }
+ }
+ }
+ curr_resolvers = NULL;
+ return err_code;
+}
+
+REGISTER_CONFIG_SECTION("resolvers", cfg_parse_resolvers, cfg_post_parse_resolvers);
+REGISTER_POST_DEINIT(resolvers_deinit);
+REGISTER_CONFIG_POSTPARSER("dns runtime resolver", resolvers_finalize_config);
+REGISTER_PRE_CHECK(resolvers_create_default);
diff --git a/src/ring.c b/src/ring.c
new file mode 100644
index 0000000..e40a068
--- /dev/null
+++ b/src/ring.c
@@ -0,0 +1,444 @@
+/*
+ * Ring buffer management
+ *
+ * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/buf.h>
+#include <haproxy/cli.h>
+#include <haproxy/ring.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+#include <haproxy/thread.h>
+
+/* context used to dump the contents of a ring via "show events" or "show errors" */
+struct show_ring_ctx {
+ struct ring *ring; /* ring to be dumped */
+ size_t ofs; /* offset to restart from, ~0 = end */
+ uint flags; /* set of RING_WF_* */
+};
+
+/* Initialize a pre-allocated ring with the buffer area
+ * of size */
+void ring_init(struct ring *ring, void *area, size_t size)
+{
+ HA_RWLOCK_INIT(&ring->lock);
+ LIST_INIT(&ring->waiters);
+ ring->readers_count = 0;
+ ring->ofs = 0;
+ ring->buf = b_make(area, size, 0, 0);
+ /* write the initial RC byte */
+ b_putchr(&ring->buf, 0);
+}
+
+/* Creates and returns a ring buffer of size <size> bytes. Returns NULL on
+ * allocation failure.
+ */
+struct ring *ring_new(size_t size)
+{
+ struct ring *ring = NULL;
+ void *area = NULL;
+
+ if (size < 2)
+ goto fail;
+
+ ring = malloc(sizeof(*ring));
+ if (!ring)
+ goto fail;
+
+ area = malloc(size);
+ if (!area)
+ goto fail;
+
+ ring_init(ring, area, size);
+ return ring;
+ fail:
+ free(area);
+ free(ring);
+ return NULL;
+}
+
+/* Creates a unified ring + storage area at address <area> for <size> bytes.
+ * If <area> is null, then it's allocated of the requested size. The ring
+ * struct is part of the area so the usable area is slightly reduced. However
+ * the ring storage is immediately adjacent to the struct. ring_free() will
+ * ignore such rings, so the caller is responsible for releasing them.
+ */
+struct ring *ring_make_from_area(void *area, size_t size)
+{
+ struct ring *ring = NULL;
+
+ if (size < sizeof(ring))
+ return NULL;
+
+ if (!area)
+ area = malloc(size);
+ if (!area)
+ return NULL;
+
+ ring = area;
+ area += sizeof(*ring);
+ ring_init(ring, area, size - sizeof(*ring));
+ return ring;
+}
+
+/* Resizes existing ring <ring> to <size> which must be larger, without losing
+ * its contents. The new size must be at least as large as the previous one or
+ * no change will be performed. The pointer to the ring is returned on success,
+ * or NULL on allocation failure. This will lock the ring for writes.
+ */
+struct ring *ring_resize(struct ring *ring, size_t size)
+{
+ void *area;
+
+ if (b_size(&ring->buf) >= size)
+ return ring;
+
+ area = malloc(size);
+ if (!area)
+ return NULL;
+
+ HA_RWLOCK_WRLOCK(LOGSRV_LOCK, &ring->lock);
+
+ /* recheck the buffer's size, it may have changed during the malloc */
+ if (b_size(&ring->buf) < size) {
+ /* copy old contents */
+ b_getblk(&ring->buf, area, ring->buf.data, 0);
+ area = HA_ATOMIC_XCHG(&ring->buf.area, area);
+ ring->buf.size = size;
+ }
+
+ HA_RWLOCK_WRUNLOCK(LOGSRV_LOCK, &ring->lock);
+
+ free(area);
+ return ring;
+}
+
+/* destroys and frees ring <ring> */
+void ring_free(struct ring *ring)
+{
+ if (!ring)
+ return;
+
+ /* make sure it was not allocated by ring_make_from_area */
+ if (ring->buf.area == (void *)ring + sizeof(*ring))
+ return;
+
+ free(ring->buf.area);
+ free(ring);
+}
+
+/* Tries to send <npfx> parts from <prefix> followed by <nmsg> parts from <msg>
+ * to ring <ring>. The message is sent atomically. It may be truncated to
+ * <maxlen> bytes if <maxlen> is non-null. There is no distinction between the
+ * two lists, it's just a convenience to help the caller prepend some prefixes
+ * when necessary. It takes the ring's write lock to make sure no other thread
+ * will touch the buffer during the update. Returns the number of bytes sent,
+ * or <=0 on failure.
+ */
+ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg)
+{
+ struct buffer *buf = &ring->buf;
+ struct appctx *appctx;
+ size_t totlen = 0;
+ size_t lenlen;
+ uint64_t dellen;
+ int dellenlen;
+ ssize_t sent = 0;
+ int i;
+
+ /* we have to find some room to add our message (the buffer is
+ * never empty and at least contains the previous counter) and
+ * to update both the buffer contents and heads at the same
+ * time (it's doable using atomic ops but not worth the
+ * trouble, let's just lock). For this we first need to know
+ * the total message's length. We cannot measure it while
+ * copying due to the varint encoding of the length.
+ */
+ for (i = 0; i < npfx; i++)
+ totlen += pfx[i].len;
+ for (i = 0; i < nmsg; i++)
+ totlen += msg[i].len;
+
+ if (totlen > maxlen)
+ totlen = maxlen;
+
+ lenlen = varint_bytes(totlen);
+
+ HA_RWLOCK_WRLOCK(LOGSRV_LOCK, &ring->lock);
+ if (lenlen + totlen + 1 + 1 > b_size(buf))
+ goto done_buf;
+
+ while (b_room(buf) < lenlen + totlen + 1) {
+ /* we need to delete the oldest message (from the end),
+ * and we have to stop if there's a reader stuck there.
+ * Unless there's corruption in the buffer it's guaranteed
+ * that we have enough data to find 1 counter byte, a
+ * varint-encoded length (1 byte min) and the message
+ * payload (0 bytes min).
+ */
+ if (*b_head(buf))
+ goto done_buf;
+ dellenlen = b_peek_varint(buf, 1, &dellen);
+ if (!dellenlen)
+ goto done_buf;
+ BUG_ON(b_data(buf) < 1 + dellenlen + dellen);
+
+ b_del(buf, 1 + dellenlen + dellen);
+ ring->ofs += 1 + dellenlen + dellen;
+ }
+
+ /* OK now we do have room */
+ __b_put_varint(buf, totlen);
+
+ totlen = 0;
+ for (i = 0; i < npfx; i++) {
+ size_t len = pfx[i].len;
+
+ if (len + totlen > maxlen)
+ len = maxlen - totlen;
+ if (len)
+ __b_putblk(buf, pfx[i].ptr, len);
+ totlen += len;
+ }
+
+ for (i = 0; i < nmsg; i++) {
+ size_t len = msg[i].len;
+
+ if (len + totlen > maxlen)
+ len = maxlen - totlen;
+ if (len)
+ __b_putblk(buf, msg[i].ptr, len);
+ totlen += len;
+ }
+
+ *b_tail(buf) = 0; buf->data++; // new read counter
+ sent = lenlen + totlen + 1;
+
+ /* notify potential readers */
+ list_for_each_entry(appctx, &ring->waiters, wait_entry)
+ appctx_wakeup(appctx);
+
+ done_buf:
+ HA_RWLOCK_WRUNLOCK(LOGSRV_LOCK, &ring->lock);
+ return sent;
+}
+
+/* Tries to attach appctx <appctx> as a new reader on ring <ring>. This is
+ * meant to be used by low level appctx code such as CLI or ring forwarding.
+ * For higher level functions, please see the relevant parts in appctx or CLI.
+ * It returns non-zero on success or zero on failure if too many users are
+ * already attached. On success, the caller MUST call ring_detach_appctx()
+ * to detach itself, even if it was never woken up.
+ */
+int ring_attach(struct ring *ring)
+{
+ int users = ring->readers_count;
+
+ do {
+ if (users >= 255)
+ return 0;
+ } while (!_HA_ATOMIC_CAS(&ring->readers_count, &users, users + 1));
+ return 1;
+}
+
+/* detach an appctx from a ring. The appctx is expected to be waiting at
+ * offset <ofs>. Nothing is done if <ring> is NULL.
+ */
+void ring_detach_appctx(struct ring *ring, struct appctx *appctx, size_t ofs)
+{
+ if (!ring)
+ return;
+
+ HA_RWLOCK_WRLOCK(LOGSRV_LOCK, &ring->lock);
+ if (ofs != ~0) {
+ /* reader was still attached */
+ ofs -= ring->ofs;
+ BUG_ON(ofs >= b_size(&ring->buf));
+ LIST_DEL_INIT(&appctx->wait_entry);
+ HA_ATOMIC_DEC(b_peek(&ring->buf, ofs));
+ }
+ HA_ATOMIC_DEC(&ring->readers_count);
+ HA_RWLOCK_WRUNLOCK(LOGSRV_LOCK, &ring->lock);
+}
+
+/* Tries to attach CLI handler <appctx> as a new reader on ring <ring>. This is
+ * meant to be used when registering a CLI function to dump a buffer, so it
+ * returns zero on success, or non-zero on failure with a message in the appctx
+ * CLI context. It automatically sets the io_handler and io_release callbacks if
+ * they were not set. The <flags> take a combination of RING_WF_*.
+ */
+int ring_attach_cli(struct ring *ring, struct appctx *appctx, uint flags)
+{
+ struct show_ring_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (!ring_attach(ring))
+ return cli_err(appctx,
+ "Sorry, too many watchers (255) on this ring buffer. "
+ "What could it have so interesting to attract so many watchers ?");
+
+ if (!appctx->io_handler)
+ appctx->io_handler = cli_io_handler_show_ring;
+ if (!appctx->io_release)
+ appctx->io_release = cli_io_release_show_ring;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->ring = ring;
+ ctx->ofs = ~0; // start from the oldest event
+ ctx->flags = flags;
+ return 0;
+}
+
+/* This function dumps all events from the ring whose pointer is in <p0> into
+ * the appctx's output buffer, and takes from <o0> the seek offset into the
+ * buffer's history (0 for oldest known event). It looks at <i0> for boolean
+ * options: bit0 means it must wait for new data or any key to be pressed. Bit1
+ * means it must seek directly to the end to wait for new contents. It returns
+ * 0 if the output buffer or events are missing is full and it needs to be
+ * called again, otherwise non-zero. It is meant to be used with
+ * cli_release_show_ring() to clean up.
+ */
+int cli_io_handler_show_ring(struct appctx *appctx)
+{
+ struct show_ring_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct ring *ring = ctx->ring;
+ struct buffer *buf = &ring->buf;
+ size_t ofs = ctx->ofs;
+ size_t last_ofs;
+ uint64_t msg_len;
+ size_t len, cnt;
+ int ret;
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ return 1;
+
+ HA_RWLOCK_WRLOCK(LOGSRV_LOCK, &ring->lock);
+ LIST_DEL_INIT(&appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(LOGSRV_LOCK, &ring->lock);
+
+ HA_RWLOCK_RDLOCK(LOGSRV_LOCK, &ring->lock);
+
+ /* explanation for the initialization below: it would be better to do
+ * this in the parsing function but this would occasionally result in
+ * dropped events because we'd take a reference on the oldest message
+ * and keep it while being scheduled. Thus instead let's take it the
+ * first time we enter here so that we have a chance to pass many
+ * existing messages before grabbing a reference to a location. This
+ * value cannot be produced after initialization.
+ */
+ if (unlikely(ofs == ~0)) {
+ ofs = 0;
+
+ /* going to the end means looking at tail-1 */
+ if (ctx->flags & RING_WF_SEEK_NEW)
+ ofs += b_data(buf) - 1;
+
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ ofs += ring->ofs;
+ }
+
+ /* we were already there, adjust the offset to be relative to
+ * the buffer's head and remove us from the counter.
+ */
+ ofs -= ring->ofs;
+ BUG_ON(ofs >= buf->size);
+ HA_ATOMIC_DEC(b_peek(buf, ofs));
+
+ /* in this loop, ofs always points to the counter byte that precedes
+ * the message so that we can take our reference there if we have to
+ * stop before the end (ret=0).
+ */
+ ret = 1;
+ while (ofs + 1 < b_data(buf)) {
+ cnt = 1;
+ len = b_peek_varint(buf, ofs + cnt, &msg_len);
+ if (!len)
+ break;
+ cnt += len;
+ BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
+
+ if (unlikely(msg_len + 1 > b_size(&trash))) {
+ /* too large a message to ever fit, let's skip it */
+ ofs += cnt + msg_len;
+ continue;
+ }
+
+ chunk_reset(&trash);
+ len = b_getblk(buf, trash.area, msg_len, ofs + cnt);
+ trash.data += len;
+ trash.area[trash.data++] = '\n';
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ ret = 0;
+ break;
+ }
+ ofs += cnt + msg_len;
+ }
+
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ ofs += ring->ofs;
+ last_ofs = ring->ofs;
+ ctx->ofs = ofs;
+ HA_RWLOCK_RDUNLOCK(LOGSRV_LOCK, &ring->lock);
+
+ if (ret && (ctx->flags & RING_WF_WAIT_MODE)) {
+ /* we've drained everything and are configured to wait for more
+ * data or an event (keypress, close)
+ */
+ if (!sc_oc(sc)->output && !(sc_oc(sc)->flags & CF_SHUTW)) {
+ /* let's be woken up once new data arrive */
+ HA_RWLOCK_WRLOCK(LOGSRV_LOCK, &ring->lock);
+ LIST_APPEND(&ring->waiters, &appctx->wait_entry);
+ ofs = ring->ofs;
+ HA_RWLOCK_WRUNLOCK(LOGSRV_LOCK, &ring->lock);
+ if (ofs != last_ofs) {
+ /* more data was added into the ring between the
+ * unlock and the lock, and the writer might not
+ * have seen us. We need to reschedule a read.
+ */
+ applet_have_more_data(appctx);
+ } else
+ applet_have_no_more_data(appctx);
+ ret = 0;
+ }
+ /* always drain all the request */
+ co_skip(sc_oc(sc), sc_oc(sc)->output);
+ }
+ return ret;
+}
+
+/* must be called after cli_io_handler_show_ring() above */
+void cli_io_release_show_ring(struct appctx *appctx)
+{
+ struct show_ring_ctx *ctx = appctx->svcctx;
+ struct ring *ring = ctx->ring;
+ size_t ofs = ctx->ofs;
+
+ ring_detach_appctx(ring, appctx, ofs);
+}
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/sample.c b/src/sample.c
new file mode 100644
index 0000000..852cd04
--- /dev/null
+++ b/src/sample.c
@@ -0,0 +1,4452 @@
+/*
+ * Sample management functions.
+ *
+ * Copyright 2009-2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ * Copyright (C) 2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <stdio.h>
+
+#include <import/mjson.h>
+#include <import/sha1.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/auth.h>
+#include <haproxy/base64.h>
+#include <haproxy/buf.h>
+#include <haproxy/chunk.h>
+#include <haproxy/clock.h>
+#include <haproxy/errors.h>
+#include <haproxy/fix.h>
+#include <haproxy/global.h>
+#include <haproxy/hash.h>
+#include <haproxy/http.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/mqtt.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/protobuf.h>
+#include <haproxy/proxy.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/sink.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/tools.h>
+#include <haproxy/uri_auth-t.h>
+#include <haproxy/vars.h>
+#include <haproxy/xxhash.h>
+#include <haproxy/jwt.h>
+
+/* sample type names */
+const char *smp_to_type[SMP_TYPES] = {
+ [SMP_T_ANY] = "any",
+ [SMP_T_BOOL] = "bool",
+ [SMP_T_SINT] = "sint",
+ [SMP_T_ADDR] = "addr",
+ [SMP_T_IPV4] = "ipv4",
+ [SMP_T_IPV6] = "ipv6",
+ [SMP_T_STR] = "str",
+ [SMP_T_BIN] = "bin",
+ [SMP_T_METH] = "meth",
+};
+
+/* static sample used in sample_process() when <p> is NULL */
+static THREAD_LOCAL struct sample temp_smp;
+
+/* list head of all known sample fetch keywords */
+static struct sample_fetch_kw_list sample_fetches = {
+ .list = LIST_HEAD_INIT(sample_fetches.list)
+};
+
+/* list head of all known sample format conversion keywords */
+static struct sample_conv_kw_list sample_convs = {
+ .list = LIST_HEAD_INIT(sample_convs.list)
+};
+
+const unsigned int fetch_cap[SMP_SRC_ENTRIES] = {
+ [SMP_SRC_CONST] = (SMP_VAL_FE_CON_ACC | SMP_VAL_FE_SES_ACC | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL_CFG_PARSER |
+ SMP_VAL_CLI_PARSER ),
+
+ [SMP_SRC_INTRN] = (SMP_VAL_FE_CON_ACC | SMP_VAL_FE_SES_ACC | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL_CLI_PARSER ),
+
+ [SMP_SRC_LISTN] = (SMP_VAL_FE_CON_ACC | SMP_VAL_FE_SES_ACC | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_FTEND] = (SMP_VAL_FE_CON_ACC | SMP_VAL_FE_SES_ACC | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_L4CLI] = (SMP_VAL_FE_CON_ACC | SMP_VAL_FE_SES_ACC | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_L5CLI] = (SMP_VAL___________ | SMP_VAL_FE_SES_ACC | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_TRACK] = (SMP_VAL_FE_CON_ACC | SMP_VAL_FE_SES_ACC | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_L6REQ] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_HRQHV] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_HRQHP] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_HRQBO] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_BKEND] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_SERVR] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_L4SRV] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_L5SRV] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_L6RES] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL___________ | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_HRSHV] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL___________ | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_HRSHP] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_HRSBO] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL___________ | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_RQFIN] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_RSFIN] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_TXFIN] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_SSFIN] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+};
+
+static const char *fetch_src_names[SMP_SRC_ENTRIES] = {
+ [SMP_SRC_INTRN] = "internal state",
+ [SMP_SRC_LISTN] = "listener",
+ [SMP_SRC_FTEND] = "frontend",
+ [SMP_SRC_L4CLI] = "client address",
+ [SMP_SRC_L5CLI] = "client-side connection",
+ [SMP_SRC_TRACK] = "track counters",
+ [SMP_SRC_L6REQ] = "request buffer",
+ [SMP_SRC_HRQHV] = "HTTP request headers",
+ [SMP_SRC_HRQHP] = "HTTP request",
+ [SMP_SRC_HRQBO] = "HTTP request body",
+ [SMP_SRC_BKEND] = "backend",
+ [SMP_SRC_SERVR] = "server",
+ [SMP_SRC_L4SRV] = "server address",
+ [SMP_SRC_L5SRV] = "server-side connection",
+ [SMP_SRC_L6RES] = "response buffer",
+ [SMP_SRC_HRSHV] = "HTTP response headers",
+ [SMP_SRC_HRSHP] = "HTTP response",
+ [SMP_SRC_HRSBO] = "HTTP response body",
+ [SMP_SRC_RQFIN] = "request buffer statistics",
+ [SMP_SRC_RSFIN] = "response buffer statistics",
+ [SMP_SRC_TXFIN] = "transaction statistics",
+ [SMP_SRC_SSFIN] = "session statistics",
+};
+
+static const char *fetch_ckp_names[SMP_CKP_ENTRIES] = {
+ [SMP_CKP_FE_CON_ACC] = "frontend tcp-request connection rule",
+ [SMP_CKP_FE_SES_ACC] = "frontend tcp-request session rule",
+ [SMP_CKP_FE_REQ_CNT] = "frontend tcp-request content rule",
+ [SMP_CKP_FE_HRQ_HDR] = "frontend http-request header rule",
+ [SMP_CKP_FE_HRQ_BDY] = "frontend http-request body rule",
+ [SMP_CKP_FE_SET_BCK] = "frontend use-backend rule",
+ [SMP_CKP_BE_REQ_CNT] = "backend tcp-request content rule",
+ [SMP_CKP_BE_HRQ_HDR] = "backend http-request header rule",
+ [SMP_CKP_BE_HRQ_BDY] = "backend http-request body rule",
+ [SMP_CKP_BE_SET_SRV] = "backend use-server, balance or stick-match rule",
+ [SMP_CKP_BE_SRV_CON] = "server source selection",
+ [SMP_CKP_BE_RES_CNT] = "backend tcp-response content rule",
+ [SMP_CKP_BE_HRS_HDR] = "backend http-response header rule",
+ [SMP_CKP_BE_HRS_BDY] = "backend http-response body rule",
+ [SMP_CKP_BE_STO_RUL] = "backend stick-store rule",
+ [SMP_CKP_FE_RES_CNT] = "frontend tcp-response content rule",
+ [SMP_CKP_FE_HRS_HDR] = "frontend http-response header rule",
+ [SMP_CKP_FE_HRS_BDY] = "frontend http-response body rule",
+ [SMP_CKP_FE_LOG_END] = "logs",
+ [SMP_CKP_BE_CHK_RUL] = "backend tcp-check rule",
+ [SMP_CKP_CFG_PARSER] = "configuration parser",
+ [SMP_CKP_CLI_PARSER] = "CLI parser",
+};
+
+/* This function returns the type of the data returned by the sample_expr.
+ * It assumes that the <expr> and all of its converters are properly
+ * initialized.
+ */
+inline
+int smp_expr_output_type(struct sample_expr *expr)
+{
+ struct sample_conv_expr *smp_expr;
+
+ if (!LIST_ISEMPTY(&expr->conv_exprs)) {
+ smp_expr = LIST_PREV(&expr->conv_exprs, struct sample_conv_expr *, list);
+ return smp_expr->conv->out_type;
+ }
+ return expr->fetch->out_type;
+}
+
+
+/* fill the trash with a comma-delimited list of source names for the <use> bit
+ * field which must be composed of a non-null set of SMP_USE_* flags. The return
+ * value is the pointer to the string in the trash buffer.
+ */
+const char *sample_src_names(unsigned int use)
+{
+ int bit;
+
+ trash.data = 0;
+ trash.area[0] = '\0';
+ for (bit = 0; bit < SMP_SRC_ENTRIES; bit++) {
+ if (!(use & ~((1 << bit) - 1)))
+ break; /* no more bits */
+
+ if (!(use & (1 << bit)))
+ continue; /* bit not set */
+
+ trash.data += snprintf(trash.area + trash.data,
+ trash.size - trash.data, "%s%s",
+ (use & ((1 << bit) - 1)) ? "," : "",
+ fetch_src_names[bit]);
+ }
+ return trash.area;
+}
+
+/* return a pointer to the correct sample checkpoint name, or "unknown" when
+ * the flags are invalid. Only the lowest bit is used, higher bits are ignored
+ * if set.
+ */
+const char *sample_ckp_names(unsigned int use)
+{
+ int bit;
+
+ for (bit = 0; bit < SMP_CKP_ENTRIES; bit++)
+ if (use & (1 << bit))
+ return fetch_ckp_names[bit];
+ return "unknown sample check place, please report this bug";
+}
+
+/*
+ * Registers the sample fetch keyword list <kwl> as a list of valid keywords
+ * for next parsing sessions. The fetch keywords capabilities are also computed
+ * from their ->use field.
+ */
+void sample_register_fetches(struct sample_fetch_kw_list *kwl)
+{
+ struct sample_fetch *sf;
+ int bit;
+
+ for (sf = kwl->kw; sf->kw != NULL; sf++) {
+ for (bit = 0; bit < SMP_SRC_ENTRIES; bit++)
+ if (sf->use & (1 << bit))
+ sf->val |= fetch_cap[bit];
+ }
+ LIST_APPEND(&sample_fetches.list, &kwl->list);
+}
+
+/*
+ * Registers the sample format coverstion keyword list <pckl> as a list of valid keywords for next
+ * parsing sessions.
+ */
+void sample_register_convs(struct sample_conv_kw_list *pckl)
+{
+ LIST_APPEND(&sample_convs.list, &pckl->list);
+}
+
+/*
+ * Returns the pointer on sample fetch keyword structure identified by
+ * string of <len> in buffer <kw>.
+ *
+ */
+struct sample_fetch *find_sample_fetch(const char *kw, int len)
+{
+ int index;
+ struct sample_fetch_kw_list *kwl;
+
+ list_for_each_entry(kwl, &sample_fetches.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (strncmp(kwl->kw[index].kw, kw, len) == 0 &&
+ kwl->kw[index].kw[len] == '\0')
+ return &kwl->kw[index];
+ }
+ }
+ return NULL;
+}
+
+/* dump list of registered sample fetch keywords on stdout */
+void smp_dump_fetch_kw(void)
+{
+ struct sample_fetch_kw_list *kwl;
+ struct sample_fetch *kwp, *kw;
+ uint64_t mask;
+ int index;
+ int arg;
+ int bit;
+
+ for (bit = 0; bit <= SMP_CKP_ENTRIES + 1; bit++) {
+ putchar('#');
+ for (index = 0; bit + index <= SMP_CKP_ENTRIES; index++)
+ putchar(' ');
+ for (index = 0; index < bit && index < SMP_CKP_ENTRIES; index++)
+ printf((bit <= SMP_CKP_ENTRIES) ? "/ " : " |");
+ for (index = bit; bit < SMP_CKP_ENTRIES && index < SMP_CKP_ENTRIES + 2; index++)
+ if (index == bit)
+ putchar('_');
+ else if (index == bit + 1)
+ putchar('.');
+ else
+ putchar('-');
+ printf(" %s\n", (bit < SMP_CKP_ENTRIES) ? fetch_ckp_names[bit] : "");
+ }
+
+ for (kw = kwp = NULL;; kwp = kw) {
+ list_for_each_entry(kwl, &sample_fetches.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (strordered(kwp ? kwp->kw : NULL,
+ kwl->kw[index].kw,
+ kw != kwp ? kw->kw : NULL))
+ kw = &kwl->kw[index];
+ }
+ }
+
+ if (kw == kwp)
+ break;
+
+ printf("[ ");
+ for (bit = 0; bit < SMP_CKP_ENTRIES; bit++)
+ printf("%s", (kw->val & (1 << bit)) ? "Y " : ". ");
+
+ printf("] %s", kw->kw);
+ if (kw->arg_mask) {
+ mask = kw->arg_mask >> ARGM_BITS;
+ printf("(");
+ for (arg = 0;
+ arg < ARGM_NBARGS && ((mask >> (arg * ARGT_BITS)) & ARGT_MASK);
+ arg++) {
+ if (arg == (kw->arg_mask & ARGM_MASK)) {
+ /* now dumping extra args */
+ printf("[");
+ }
+ if (arg)
+ printf(",");
+ printf("%s", arg_type_names[(mask >> (arg * ARGT_BITS)) & ARGT_MASK]);
+ }
+ if (arg > (kw->arg_mask & ARGM_MASK)) {
+ /* extra args were dumped */
+ printf("]");
+ }
+ printf(")");
+ }
+ printf(": %s", smp_to_type[kw->out_type]);
+ printf("\n");
+ }
+}
+
+/* dump list of registered sample converter keywords on stdout */
+void smp_dump_conv_kw(void)
+{
+ struct sample_conv_kw_list *kwl;
+ struct sample_conv *kwp, *kw;
+ uint64_t mask;
+ int index;
+ int arg;
+
+ for (kw = kwp = NULL;; kwp = kw) {
+ list_for_each_entry(kwl, &sample_convs.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (strordered(kwp ? kwp->kw : NULL,
+ kwl->kw[index].kw,
+ kw != kwp ? kw->kw : NULL))
+ kw = &kwl->kw[index];
+ }
+ }
+
+ if (kw == kwp)
+ break;
+
+ printf("%s", kw->kw);
+ if (kw->arg_mask) {
+ mask = kw->arg_mask >> ARGM_BITS;
+ printf("(");
+ for (arg = 0;
+ arg < ARGM_NBARGS && ((mask >> (arg * ARGT_BITS)) & ARGT_MASK);
+ arg++) {
+ if (arg == (kw->arg_mask & ARGM_MASK)) {
+ /* now dumping extra args */
+ printf("[");
+ }
+ if (arg)
+ printf(",");
+ printf("%s", arg_type_names[(mask >> (arg * ARGT_BITS)) & ARGT_MASK]);
+ }
+ if (arg > (kw->arg_mask & ARGM_MASK)) {
+ /* extra args were dumped */
+ printf("]");
+ }
+ printf(")");
+ }
+ printf(": %s => %s", smp_to_type[kw->out_type], smp_to_type[kw->in_type]);
+ printf("\n");
+ }
+}
+
+/* This function browses the list of available sample fetches. <current> is
+ * the last used sample fetch. If it is the first call, it must set to NULL.
+ * <idx> is the index of the next sample fetch entry. It is used as private
+ * value. It is useless to initiate it.
+ *
+ * It returns always the new fetch_sample entry, and NULL when the end of
+ * the list is reached.
+ */
+struct sample_fetch *sample_fetch_getnext(struct sample_fetch *current, int *idx)
+{
+ struct sample_fetch_kw_list *kwl;
+ struct sample_fetch *base;
+
+ if (!current) {
+ /* Get first kwl entry. */
+ kwl = LIST_NEXT(&sample_fetches.list, struct sample_fetch_kw_list *, list);
+ (*idx) = 0;
+ } else {
+ /* Get kwl corresponding to the curret entry. */
+ base = current + 1 - (*idx);
+ kwl = container_of(base, struct sample_fetch_kw_list, kw);
+ }
+
+ while (1) {
+
+ /* Check if kwl is the last entry. */
+ if (&kwl->list == &sample_fetches.list)
+ return NULL;
+
+ /* idx contain the next keyword. If it is available, return it. */
+ if (kwl->kw[*idx].kw) {
+ (*idx)++;
+ return &kwl->kw[(*idx)-1];
+ }
+
+ /* get next entry in the main list, and return NULL if the end is reached. */
+ kwl = LIST_NEXT(&kwl->list, struct sample_fetch_kw_list *, list);
+
+ /* Set index to 0, ans do one other loop. */
+ (*idx) = 0;
+ }
+}
+
+/* This function browses the list of available converters. <current> is
+ * the last used converter. If it is the first call, it must set to NULL.
+ * <idx> is the index of the next converter entry. It is used as private
+ * value. It is useless to initiate it.
+ *
+ * It returns always the next sample_conv entry, and NULL when the end of
+ * the list is reached.
+ */
+struct sample_conv *sample_conv_getnext(struct sample_conv *current, int *idx)
+{
+ struct sample_conv_kw_list *kwl;
+ struct sample_conv *base;
+
+ if (!current) {
+ /* Get first kwl entry. */
+ kwl = LIST_NEXT(&sample_convs.list, struct sample_conv_kw_list *, list);
+ (*idx) = 0;
+ } else {
+ /* Get kwl corresponding to the curret entry. */
+ base = current + 1 - (*idx);
+ kwl = container_of(base, struct sample_conv_kw_list, kw);
+ }
+
+ while (1) {
+ /* Check if kwl is the last entry. */
+ if (&kwl->list == &sample_convs.list)
+ return NULL;
+
+ /* idx contain the next keyword. If it is available, return it. */
+ if (kwl->kw[*idx].kw) {
+ (*idx)++;
+ return &kwl->kw[(*idx)-1];
+ }
+
+ /* get next entry in the main list, and return NULL if the end is reached. */
+ kwl = LIST_NEXT(&kwl->list, struct sample_conv_kw_list *, list);
+
+ /* Set index to 0, ans do one other loop. */
+ (*idx) = 0;
+ }
+}
+
+/*
+ * Returns the pointer on sample format conversion keyword structure identified by
+ * string of <len> in buffer <kw>.
+ *
+ */
+struct sample_conv *find_sample_conv(const char *kw, int len)
+{
+ int index;
+ struct sample_conv_kw_list *kwl;
+
+ list_for_each_entry(kwl, &sample_convs.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (strncmp(kwl->kw[index].kw, kw, len) == 0 &&
+ kwl->kw[index].kw[len] == '\0')
+ return &kwl->kw[index];
+ }
+ }
+ return NULL;
+}
+
+/******************************************************************/
+/* Sample casts functions */
+/******************************************************************/
+
+static int c_ip2int(struct sample *smp)
+{
+ smp->data.u.sint = ntohl(smp->data.u.ipv4.s_addr);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+static int c_ip2str(struct sample *smp)
+{
+ struct buffer *trash = get_trash_chunk();
+
+ if (!inet_ntop(AF_INET, (void *)&smp->data.u.ipv4, trash->area, trash->size))
+ return 0;
+
+ trash->data = strlen(trash->area);
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+
+ return 1;
+}
+
+static int c_ip2ipv6(struct sample *smp)
+{
+ v4tov6(&smp->data.u.ipv6, &smp->data.u.ipv4);
+ smp->data.type = SMP_T_IPV6;
+ return 1;
+}
+
+static int c_ipv62ip(struct sample *smp)
+{
+ if (!v6tov4(&smp->data.u.ipv4, &smp->data.u.ipv6))
+ return 0;
+ smp->data.type = SMP_T_IPV4;
+ return 1;
+}
+
+static int c_ipv62str(struct sample *smp)
+{
+ struct buffer *trash = get_trash_chunk();
+
+ if (!inet_ntop(AF_INET6, (void *)&smp->data.u.ipv6, trash->area, trash->size))
+ return 0;
+
+ trash->data = strlen(trash->area);
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+/*
+static int c_ipv62ip(struct sample *smp)
+{
+ return v6tov4(&smp->data.u.ipv4, &smp->data.u.ipv6);
+}
+*/
+
+static int c_int2ip(struct sample *smp)
+{
+ smp->data.u.ipv4.s_addr = htonl((unsigned int)smp->data.u.sint);
+ smp->data.type = SMP_T_IPV4;
+ return 1;
+}
+
+static int c_int2ipv6(struct sample *smp)
+{
+ smp->data.u.ipv4.s_addr = htonl((unsigned int)smp->data.u.sint);
+ v4tov6(&smp->data.u.ipv6, &smp->data.u.ipv4);
+ smp->data.type = SMP_T_IPV6;
+ return 1;
+}
+
+static int c_str2addr(struct sample *smp)
+{
+ if (!buf2ip(smp->data.u.str.area, smp->data.u.str.data, &smp->data.u.ipv4)) {
+ if (!buf2ip6(smp->data.u.str.area, smp->data.u.str.data, &smp->data.u.ipv6))
+ return 0;
+ smp->data.type = SMP_T_IPV6;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+ }
+ smp->data.type = SMP_T_IPV4;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int c_str2ip(struct sample *smp)
+{
+ if (!buf2ip(smp->data.u.str.area, smp->data.u.str.data, &smp->data.u.ipv4))
+ return 0;
+ smp->data.type = SMP_T_IPV4;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int c_str2ipv6(struct sample *smp)
+{
+ if (!buf2ip6(smp->data.u.str.area, smp->data.u.str.data, &smp->data.u.ipv6))
+ return 0;
+ smp->data.type = SMP_T_IPV6;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+/*
+ * The NULL char always enforces the end of string if it is met.
+ * Data is never changed, so we can ignore the CONST case
+ */
+static int c_bin2str(struct sample *smp)
+{
+ int i;
+
+ for (i = 0; i < smp->data.u.str.data; i++) {
+ if (!smp->data.u.str.area[i]) {
+ smp->data.u.str.data = i;
+ break;
+ }
+ }
+ smp->data.type = SMP_T_STR;
+ return 1;
+}
+
+static int c_int2str(struct sample *smp)
+{
+ struct buffer *trash = get_trash_chunk();
+ char *pos;
+
+ pos = lltoa_r(smp->data.u.sint, trash->area, trash->size);
+ if (!pos)
+ return 0;
+
+ trash->size = trash->size - (pos - trash->area);
+ trash->area = pos;
+ trash->data = strlen(pos);
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+/* This function unconditionally duplicates data and removes the "const" flag.
+ * For strings and binary blocks, it also provides a known allocated size with
+ * a length that is capped to the size, and ensures a trailing zero is always
+ * appended for strings. This is necessary for some operations which may
+ * require to extend the length. It returns 0 if it fails, 1 on success.
+ */
+int smp_dup(struct sample *smp)
+{
+ struct buffer *trash;
+
+ switch (smp->data.type) {
+ case SMP_T_BOOL:
+ case SMP_T_SINT:
+ case SMP_T_ADDR:
+ case SMP_T_IPV4:
+ case SMP_T_IPV6:
+ /* These type are not const. */
+ break;
+
+ case SMP_T_METH:
+ if (smp->data.u.meth.meth != HTTP_METH_OTHER)
+ break;
+ /* Fall through */
+
+ case SMP_T_STR:
+ trash = get_trash_chunk();
+ trash->data = smp->data.type == SMP_T_STR ?
+ smp->data.u.str.data : smp->data.u.meth.str.data;
+ if (trash->data > trash->size - 1)
+ trash->data = trash->size - 1;
+
+ memcpy(trash->area, smp->data.type == SMP_T_STR ?
+ smp->data.u.str.area : smp->data.u.meth.str.area,
+ trash->data);
+ trash->area[trash->data] = 0;
+ smp->data.u.str = *trash;
+ break;
+
+ case SMP_T_BIN:
+ trash = get_trash_chunk();
+ trash->data = smp->data.u.str.data;
+ if (trash->data > trash->size)
+ trash->data = trash->size;
+
+ memcpy(trash->area, smp->data.u.str.area, trash->data);
+ smp->data.u.str = *trash;
+ break;
+
+ default:
+ /* Other cases are unexpected. */
+ return 0;
+ }
+
+ /* remove const flag */
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+int c_none(struct sample *smp)
+{
+ return 1;
+}
+
+static int c_str2int(struct sample *smp)
+{
+ const char *str;
+ const char *end;
+
+ if (smp->data.u.str.data == 0)
+ return 0;
+
+ str = smp->data.u.str.area;
+ end = smp->data.u.str.area + smp->data.u.str.data;
+
+ smp->data.u.sint = read_int64(&str, end);
+ smp->data.type = SMP_T_SINT;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int c_str2meth(struct sample *smp)
+{
+ enum http_meth_t meth;
+ int len;
+
+ meth = find_http_meth(smp->data.u.str.area, smp->data.u.str.data);
+ if (meth == HTTP_METH_OTHER) {
+ len = smp->data.u.str.data;
+ smp->data.u.meth.str.area = smp->data.u.str.area;
+ smp->data.u.meth.str.data = len;
+ }
+ else
+ smp->flags &= ~SMP_F_CONST;
+ smp->data.u.meth.meth = meth;
+ smp->data.type = SMP_T_METH;
+ return 1;
+}
+
+static int c_meth2str(struct sample *smp)
+{
+ int len;
+ enum http_meth_t meth;
+
+ if (smp->data.u.meth.meth == HTTP_METH_OTHER) {
+ /* The method is unknown. Copy the original pointer. */
+ len = smp->data.u.meth.str.data;
+ smp->data.u.str.area = smp->data.u.meth.str.area;
+ smp->data.u.str.data = len;
+ smp->data.type = SMP_T_STR;
+ }
+ else if (smp->data.u.meth.meth < HTTP_METH_OTHER) {
+ /* The method is known, copy the pointer containing the string. */
+ meth = smp->data.u.meth.meth;
+ smp->data.u.str.area = http_known_methods[meth].ptr;
+ smp->data.u.str.data = http_known_methods[meth].len;
+ smp->flags |= SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+ }
+ else {
+ /* Unknown method */
+ return 0;
+ }
+ return 1;
+}
+
+static int c_addr2bin(struct sample *smp)
+{
+ struct buffer *chk = get_trash_chunk();
+
+ if (smp->data.type == SMP_T_IPV4) {
+ chk->data = 4;
+ memcpy(chk->area, &smp->data.u.ipv4, chk->data);
+ }
+ else if (smp->data.type == SMP_T_IPV6) {
+ chk->data = 16;
+ memcpy(chk->area, &smp->data.u.ipv6, chk->data);
+ }
+ else
+ return 0;
+
+ smp->data.u.str = *chk;
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+static int c_int2bin(struct sample *smp)
+{
+ struct buffer *chk = get_trash_chunk();
+
+ *(unsigned long long int *) chk->area = my_htonll(smp->data.u.sint);
+ chk->data = 8;
+
+ smp->data.u.str = *chk;
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+static int c_bool2bin(struct sample *smp)
+{
+ struct buffer *chk = get_trash_chunk();
+
+ *(unsigned long long int *)chk->area = my_htonll(!!smp->data.u.sint);
+ chk->data = 8;
+ smp->data.u.str = *chk;
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+
+/*****************************************************************/
+/* Sample casts matrix: */
+/* sample_casts[from type][to type] */
+/* NULL pointer used for impossible sample casts */
+/*****************************************************************/
+
+sample_cast_fct sample_casts[SMP_TYPES][SMP_TYPES] = {
+/* to: ANY BOOL SINT ADDR IPV4 IPV6 STR BIN METH */
+/* from: ANY */ { c_none, c_none, c_none, c_none, c_none, c_none, c_none, c_none, c_none, },
+/* BOOL */ { c_none, c_none, c_none, NULL, NULL, NULL, c_int2str, c_bool2bin, NULL, },
+/* SINT */ { c_none, c_none, c_none, c_int2ip, c_int2ip, c_int2ipv6, c_int2str, c_int2bin, NULL, },
+/* ADDR */ { c_none, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, },
+/* IPV4 */ { c_none, NULL, c_ip2int, c_none, c_none, c_ip2ipv6, c_ip2str, c_addr2bin, NULL, },
+/* IPV6 */ { c_none, NULL, NULL, c_none, c_ipv62ip,c_none, c_ipv62str, c_addr2bin, NULL, },
+/* STR */ { c_none, c_str2int, c_str2int, c_str2addr, c_str2ip, c_str2ipv6, c_none, c_none, c_str2meth, },
+/* BIN */ { c_none, NULL, NULL, NULL, NULL, NULL, c_bin2str, c_none, c_str2meth, },
+/* METH */ { c_none, NULL, NULL, NULL, NULL, NULL, c_meth2str, c_meth2str, c_none, }
+};
+
+/*
+ * Parse a sample expression configuration:
+ * fetch keyword followed by format conversion keywords.
+ * Returns a pointer on allocated sample expression structure.
+ * <al> is an arg_list serving as a list head to report missing dependencies.
+ * It may be NULL if such dependencies are not allowed. Otherwise, the caller
+ * must have set al->ctx if al is set.
+ * If <endptr> is non-nul, it will be set to the first unparsed character
+ * (which may be the final '\0') on success. If it is nul, the expression
+ * must be properly terminated by a '\0' otherwise an error is reported.
+ */
+struct sample_expr *sample_parse_expr(char **str, int *idx, const char *file, int line, char **err_msg, struct arg_list *al, char **endptr)
+{
+ const char *begw; /* beginning of word */
+ const char *endw; /* end of word */
+ const char *endt; /* end of term */
+ struct sample_expr *expr = NULL;
+ struct sample_fetch *fetch;
+ struct sample_conv *conv;
+ unsigned long prev_type;
+ char *fkw = NULL;
+ char *ckw = NULL;
+ int err_arg;
+
+ begw = str[*idx];
+ for (endw = begw; is_idchar(*endw); endw++)
+ ;
+
+ if (endw == begw) {
+ memprintf(err_msg, "missing fetch method");
+ goto out_error;
+ }
+
+ /* keep a copy of the current fetch keyword for error reporting */
+ fkw = my_strndup(begw, endw - begw);
+
+ fetch = find_sample_fetch(begw, endw - begw);
+ if (!fetch) {
+ memprintf(err_msg, "unknown fetch method '%s'", fkw);
+ goto out_error;
+ }
+
+ /* At this point, we have :
+ * - begw : beginning of the keyword
+ * - endw : end of the keyword, first character not part of keyword
+ */
+
+ if (fetch->out_type >= SMP_TYPES) {
+ memprintf(err_msg, "returns type of fetch method '%s' is unknown", fkw);
+ goto out_error;
+ }
+ prev_type = fetch->out_type;
+
+ expr = calloc(1, sizeof(*expr));
+ if (!expr)
+ goto out_error;
+
+ LIST_INIT(&(expr->conv_exprs));
+ expr->fetch = fetch;
+ expr->arg_p = empty_arg_list;
+
+ /* Note that we call the argument parser even with an empty string,
+ * this allows it to automatically create entries for mandatory
+ * implicit arguments (eg: local proxy name).
+ */
+ if (al) {
+ al->kw = expr->fetch->kw;
+ al->conv = NULL;
+ }
+ if (make_arg_list(endw, -1, fetch->arg_mask, &expr->arg_p, err_msg, &endt, &err_arg, al) < 0) {
+ memprintf(err_msg, "fetch method '%s' : %s", fkw, *err_msg);
+ goto out_error;
+ }
+
+ /* now endt is our first char not part of the arg list, typically the
+ * comma after the sample fetch name or after the closing parenthesis,
+ * or the NUL char.
+ */
+
+ if (!expr->arg_p) {
+ expr->arg_p = empty_arg_list;
+ }
+ else if (fetch->val_args && !fetch->val_args(expr->arg_p, err_msg)) {
+ memprintf(err_msg, "invalid args in fetch method '%s' : %s", fkw, *err_msg);
+ goto out_error;
+ }
+
+ /* Now process the converters if any. We have two supported syntaxes
+ * for the converters, which can be combined :
+ * - comma-delimited list of converters just after the keyword and args ;
+ * - one converter per keyword
+ * The combination allows to have each keyword being a comma-delimited
+ * series of converters.
+ *
+ * We want to process the former first, then the latter. For this we start
+ * from the beginning of the supposed place in the exiting conv chain, which
+ * starts at the last comma (endt).
+ */
+
+ while (1) {
+ struct sample_conv_expr *conv_expr;
+ int err_arg;
+ int argcnt;
+
+ if (*endt && *endt != ',') {
+ if (endptr) {
+ /* end found, let's stop here */
+ break;
+ }
+ if (ckw)
+ memprintf(err_msg, "missing comma after converter '%s'", ckw);
+ else
+ memprintf(err_msg, "missing comma after fetch keyword '%s'", fkw);
+ goto out_error;
+ }
+
+ /* FIXME: how long should we support such idiocies ? Maybe we
+ * should already warn ?
+ */
+ while (*endt == ',') /* then trailing commas */
+ endt++;
+
+ begw = endt; /* start of converter */
+
+ if (!*begw) {
+ /* none ? skip to next string */
+ (*idx)++;
+ begw = str[*idx];
+ if (!begw || !*begw)
+ break;
+ }
+
+ for (endw = begw; is_idchar(*endw); endw++)
+ ;
+
+ free(ckw);
+ ckw = my_strndup(begw, endw - begw);
+
+ conv = find_sample_conv(begw, endw - begw);
+ if (!conv) {
+ /* we found an isolated keyword that we don't know, it's not ours */
+ if (begw == str[*idx]) {
+ endt = begw;
+ break;
+ }
+ memprintf(err_msg, "unknown converter '%s'", ckw);
+ goto out_error;
+ }
+
+ if (conv->in_type >= SMP_TYPES || conv->out_type >= SMP_TYPES) {
+ memprintf(err_msg, "returns type of converter '%s' is unknown", ckw);
+ goto out_error;
+ }
+
+ /* If impossible type conversion */
+ if (!sample_casts[prev_type][conv->in_type]) {
+ memprintf(err_msg, "converter '%s' cannot be applied", ckw);
+ goto out_error;
+ }
+
+ prev_type = conv->out_type;
+ conv_expr = calloc(1, sizeof(*conv_expr));
+ if (!conv_expr)
+ goto out_error;
+
+ LIST_APPEND(&(expr->conv_exprs), &(conv_expr->list));
+ conv_expr->conv = conv;
+
+ if (al) {
+ al->kw = expr->fetch->kw;
+ al->conv = conv_expr->conv->kw;
+ }
+ argcnt = make_arg_list(endw, -1, conv->arg_mask, &conv_expr->arg_p, err_msg, &endt, &err_arg, al);
+ if (argcnt < 0) {
+ memprintf(err_msg, "invalid arg %d in converter '%s' : %s", err_arg+1, ckw, *err_msg);
+ goto out_error;
+ }
+
+ if (argcnt && !conv->arg_mask) {
+ memprintf(err_msg, "converter '%s' does not support any args", ckw);
+ goto out_error;
+ }
+
+ if (!conv_expr->arg_p)
+ conv_expr->arg_p = empty_arg_list;
+
+ if (conv->val_args && !conv->val_args(conv_expr->arg_p, conv, file, line, err_msg)) {
+ memprintf(err_msg, "invalid args in converter '%s' : %s", ckw, *err_msg);
+ goto out_error;
+ }
+ }
+
+ if (endptr) {
+ /* end found, let's stop here */
+ *endptr = (char *)endt;
+ }
+
+ out:
+ free(fkw);
+ free(ckw);
+ return expr;
+
+out_error:
+ release_sample_expr(expr);
+ expr = NULL;
+ goto out;
+}
+
+/*
+ * Process a fetch + format conversion of defined by the sample expression <expr>
+ * on request or response considering the <opt> parameter.
+ * Returns a pointer on a typed sample structure containing the result or NULL if
+ * sample is not found or when format conversion failed.
+ * If <p> is not null, function returns results in structure pointed by <p>.
+ * If <p> is null, functions returns a pointer on a static sample structure.
+ *
+ * Note: the fetch functions are required to properly set the return type. The
+ * conversion functions must do so too. However the cast functions do not need
+ * to since they're made to cast multiple types according to what is required.
+ *
+ * The caller may indicate in <opt> if it considers the result final or not.
+ * The caller needs to check the SMP_F_MAY_CHANGE flag in p->flags to verify
+ * if the result is stable or not, according to the following table :
+ *
+ * return MAY_CHANGE FINAL Meaning for the sample
+ * NULL 0 * Not present and will never be (eg: header)
+ * NULL 1 0 Not present yet, could change (eg: POST param)
+ * NULL 1 1 Not present yet, will not change anymore
+ * smp 0 * Present and will not change (eg: header)
+ * smp 1 0 Present, may change (eg: request length)
+ * smp 1 1 Present, last known value (eg: request length)
+ */
+struct sample *sample_process(struct proxy *px, struct session *sess,
+ struct stream *strm, unsigned int opt,
+ struct sample_expr *expr, struct sample *p)
+{
+ struct sample_conv_expr *conv_expr;
+
+ if (p == NULL) {
+ p = &temp_smp;
+ memset(p, 0, sizeof(*p));
+ }
+
+ smp_set_owner(p, px, sess, strm, opt);
+ if (!expr->fetch->process(expr->arg_p, p, expr->fetch->kw, expr->fetch->private))
+ return NULL;
+
+ list_for_each_entry(conv_expr, &expr->conv_exprs, list) {
+ /* we want to ensure that p->type can be casted into
+ * conv_expr->conv->in_type. We have 3 possibilities :
+ * - NULL => not castable.
+ * - c_none => nothing to do (let's optimize it)
+ * - other => apply cast and prepare to fail
+ */
+ if (!sample_casts[p->data.type][conv_expr->conv->in_type])
+ return NULL;
+
+ if (sample_casts[p->data.type][conv_expr->conv->in_type] != c_none &&
+ !sample_casts[p->data.type][conv_expr->conv->in_type](p))
+ return NULL;
+
+ /* OK cast succeeded */
+
+ if (!conv_expr->conv->process(conv_expr->arg_p, p, conv_expr->conv->private))
+ return NULL;
+ }
+ return p;
+}
+
+/*
+ * Resolve all remaining arguments in proxy <p>. Returns the number of
+ * errors or 0 if everything is fine. If at least one error is met, it will
+ * be appended to *err. If *err==NULL it will be allocated first.
+ */
+int smp_resolve_args(struct proxy *p, char **err)
+{
+ struct arg_list *cur, *bak;
+ const char *ctx, *where;
+ const char *conv_ctx, *conv_pre, *conv_pos;
+ struct userlist *ul;
+ struct my_regex *reg;
+ struct arg *arg;
+ int cfgerr = 0;
+ int rflags;
+
+ list_for_each_entry_safe(cur, bak, &p->conf.args.list, list) {
+ struct proxy *px;
+ struct server *srv;
+ struct stktable *t;
+ char *pname, *sname, *stktname;
+ char *err2;
+
+ arg = cur->arg;
+
+ /* prepare output messages */
+ conv_pre = conv_pos = conv_ctx = "";
+ if (cur->conv) {
+ conv_ctx = cur->conv;
+ conv_pre = "conversion keyword '";
+ conv_pos = "' for ";
+ }
+
+ where = "in";
+ ctx = "sample fetch keyword";
+ switch (cur->ctx) {
+ case ARGC_STK: where = "in stick rule in"; break;
+ case ARGC_TRK: where = "in tracking rule in"; break;
+ case ARGC_LOG: where = "in log-format string in"; break;
+ case ARGC_LOGSD: where = "in log-format-sd string in"; break;
+ case ARGC_HRQ: where = "in http-request expression in"; break;
+ case ARGC_HRS: where = "in http-response response in"; break;
+ case ARGC_UIF: where = "in unique-id-format string in"; break;
+ case ARGC_RDR: where = "in redirect format string in"; break;
+ case ARGC_CAP: where = "in capture rule in"; break;
+ case ARGC_ACL: ctx = "ACL keyword"; break;
+ case ARGC_SRV: where = "in server directive in"; break;
+ case ARGC_SPOE: where = "in spoe-message directive in"; break;
+ case ARGC_UBK: where = "in use_backend expression in"; break;
+ case ARGC_USRV: where = "in use-server or balance expression in"; break;
+ case ARGC_HERR: where = "in http-error directive in"; break;
+ case ARGC_OT: where = "in ot-scope directive in"; break;
+ case ARGC_TCO: where = "in tcp-request connection expression in"; break;
+ case ARGC_TSE: where = "in tcp-request session expression in"; break;
+ case ARGC_TRQ: where = "in tcp-request content expression in"; break;
+ case ARGC_TRS: where = "in tcp-response content expression in"; break;
+ case ARGC_TCK: where = "in tcp-check expression in"; break;
+ case ARGC_CFG: where = "in configuration expression in"; break;
+ case ARGC_CLI: where = "in CLI expression in"; break;
+ }
+
+ /* set a few default settings */
+ px = p;
+ pname = p->id;
+
+ switch (arg->type) {
+ case ARGT_SRV:
+ if (!arg->data.str.data) {
+ memprintf(err, "%sparsing [%s:%d]: missing server name in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ continue;
+ }
+
+ /* we support two formats : "bck/srv" and "srv" */
+ sname = strrchr(arg->data.str.area, '/');
+
+ if (sname) {
+ *sname++ = '\0';
+ pname = arg->data.str.area;
+
+ px = proxy_be_by_name(pname);
+ if (!px) {
+ memprintf(err, "%sparsing [%s:%d]: unable to find proxy '%s' referenced in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line, pname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+ }
+ else {
+ if (px->cap & PR_CAP_DEF) {
+ memprintf(err, "%sparsing [%s:%d]: backend name must be set in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+ sname = arg->data.str.area;
+ }
+
+ srv = findserver(px, sname);
+ if (!srv) {
+ memprintf(err, "%sparsing [%s:%d]: unable to find server '%s' in proxy '%s', referenced in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line, sname, pname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ srv->flags |= SRV_F_NON_PURGEABLE;
+
+ chunk_destroy(&arg->data.str);
+ arg->unresolved = 0;
+ arg->data.srv = srv;
+ break;
+
+ case ARGT_FE:
+ if (arg->data.str.data) {
+ pname = arg->data.str.area;
+ px = proxy_fe_by_name(pname);
+ }
+
+ if (!px) {
+ memprintf(err, "%sparsing [%s:%d]: unable to find frontend '%s' referenced in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line, pname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ if (!(px->cap & PR_CAP_FE)) {
+ memprintf(err, "%sparsing [%s:%d]: proxy '%s', referenced in arg %d of %s%s%s%s '%s' %s proxy '%s', has not frontend capability.\n",
+ *err ? *err : "", cur->file, cur->line, pname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ chunk_destroy(&arg->data.str);
+ arg->unresolved = 0;
+ arg->data.prx = px;
+ break;
+
+ case ARGT_BE:
+ if (arg->data.str.data) {
+ pname = arg->data.str.area;
+ px = proxy_be_by_name(pname);
+ }
+
+ if (!px) {
+ memprintf(err, "%sparsing [%s:%d]: unable to find backend '%s' referenced in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line, pname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ if (!(px->cap & PR_CAP_BE)) {
+ memprintf(err, "%sparsing [%s:%d]: proxy '%s', referenced in arg %d of %s%s%s%s '%s' %s proxy '%s', has not backend capability.\n",
+ *err ? *err : "", cur->file, cur->line, pname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ chunk_destroy(&arg->data.str);
+ arg->unresolved = 0;
+ arg->data.prx = px;
+ break;
+
+ case ARGT_TAB:
+ if (arg->data.str.data)
+ stktname = arg->data.str.area;
+ else {
+ if (px->cap & PR_CAP_DEF) {
+ memprintf(err, "%sparsing [%s:%d]: table name must be set in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+ stktname = px->id;
+ }
+
+ t = stktable_find_by_name(stktname);
+ if (!t) {
+ memprintf(err, "%sparsing [%s:%d]: unable to find table '%s' referenced in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line, stktname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ if (!t->size) {
+ memprintf(err, "%sparsing [%s:%d]: no table in proxy '%s' referenced in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line, stktname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ if (!in_proxies_list(t->proxies_list, p)) {
+ p->next_stkt_ref = t->proxies_list;
+ t->proxies_list = p;
+ }
+
+ chunk_destroy(&arg->data.str);
+ arg->unresolved = 0;
+ arg->data.t = t;
+ break;
+
+ case ARGT_USR:
+ if (!arg->data.str.data) {
+ memprintf(err, "%sparsing [%s:%d]: missing userlist name in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ if (p->uri_auth && p->uri_auth->userlist &&
+ strcmp(p->uri_auth->userlist->name, arg->data.str.area) == 0)
+ ul = p->uri_auth->userlist;
+ else
+ ul = auth_find_userlist(arg->data.str.area);
+
+ if (!ul) {
+ memprintf(err, "%sparsing [%s:%d]: unable to find userlist '%s' referenced in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line,
+ arg->data.str.area,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ chunk_destroy(&arg->data.str);
+ arg->unresolved = 0;
+ arg->data.usr = ul;
+ break;
+
+ case ARGT_REG:
+ if (!arg->data.str.data) {
+ memprintf(err, "%sparsing [%s:%d]: missing regex in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ continue;
+ }
+
+ rflags = 0;
+ rflags |= (arg->type_flags & ARGF_REG_ICASE) ? REG_ICASE : 0;
+ err2 = NULL;
+
+ if (!(reg = regex_comp(arg->data.str.area, !(rflags & REG_ICASE), 1 /* capture substr */, &err2))) {
+ memprintf(err, "%sparsing [%s:%d]: error in regex '%s' in arg %d of %s%s%s%s '%s' %s proxy '%s' : %s.\n",
+ *err ? *err : "", cur->file, cur->line,
+ arg->data.str.area,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id, err2);
+ cfgerr++;
+ continue;
+ }
+
+ chunk_destroy(&arg->data.str);
+ arg->unresolved = 0;
+ arg->data.reg = reg;
+ break;
+
+
+ }
+
+ LIST_DELETE(&cur->list);
+ free(cur);
+ } /* end of args processing */
+
+ return cfgerr;
+}
+
+/*
+ * Process a fetch + format conversion as defined by the sample expression
+ * <expr> on request or response considering the <opt> parameter. The output is
+ * not explicitly set to <smp_type>, but shall be compatible with it as
+ * specified by 'sample_casts' table. If a stable sample can be fetched, or an
+ * unstable one when <opt> contains SMP_OPT_FINAL, the sample is converted and
+ * returned without the SMP_F_MAY_CHANGE flag. If an unstable sample is found
+ * and <opt> does not contain SMP_OPT_FINAL, then the sample is returned as-is
+ * with its SMP_F_MAY_CHANGE flag so that the caller can check it and decide to
+ * take actions (eg: wait longer). If a sample could not be found or could not
+ * be converted, NULL is returned. The caller MUST NOT use the sample if the
+ * SMP_F_MAY_CHANGE flag is present, as it is used only as a hint that there is
+ * still hope to get it after waiting longer, and is not converted to string.
+ * The possible output combinations are the following :
+ *
+ * return MAY_CHANGE FINAL Meaning for the sample
+ * NULL * * Not present and will never be (eg: header)
+ * smp 0 * Final value converted (eg: header)
+ * smp 1 0 Not present yet, may appear later (eg: header)
+ * smp 1 1 never happens (either flag is cleared on output)
+ */
+struct sample *sample_fetch_as_type(struct proxy *px, struct session *sess,
+ struct stream *strm, unsigned int opt,
+ struct sample_expr *expr, int smp_type)
+{
+ struct sample *smp = &temp_smp;
+
+ memset(smp, 0, sizeof(*smp));
+
+ if (!sample_process(px, sess, strm, opt, expr, smp)) {
+ if ((smp->flags & SMP_F_MAY_CHANGE) && !(opt & SMP_OPT_FINAL))
+ return smp;
+ return NULL;
+ }
+
+ if (!sample_casts[smp->data.type][smp_type])
+ return NULL;
+
+ if (sample_casts[smp->data.type][smp_type] != c_none &&
+ !sample_casts[smp->data.type][smp_type](smp))
+ return NULL;
+
+ smp->flags &= ~SMP_F_MAY_CHANGE;
+ return smp;
+}
+
+static void release_sample_arg(struct arg *p)
+{
+ struct arg *p_back = p;
+
+ if (!p)
+ return;
+
+ while (p->type != ARGT_STOP) {
+ if (p->type == ARGT_STR || p->unresolved) {
+ chunk_destroy(&p->data.str);
+ p->unresolved = 0;
+ }
+ else if (p->type == ARGT_REG) {
+ regex_free(p->data.reg);
+ p->data.reg = NULL;
+ }
+ p++;
+ }
+
+ if (p_back != empty_arg_list)
+ free(p_back);
+}
+
+void release_sample_expr(struct sample_expr *expr)
+{
+ struct sample_conv_expr *conv_expr, *conv_exprb;
+
+ if (!expr)
+ return;
+
+ list_for_each_entry_safe(conv_expr, conv_exprb, &expr->conv_exprs, list) {
+ LIST_DELETE(&conv_expr->list);
+ release_sample_arg(conv_expr->arg_p);
+ free(conv_expr);
+ }
+
+ release_sample_arg(expr->arg_p);
+ free(expr);
+}
+
+/*****************************************************************/
+/* Sample format convert functions */
+/* These functions set the data type on return. */
+/*****************************************************************/
+
+static int sample_conv_debug(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ int i;
+ struct sample tmp;
+ struct buffer *buf;
+ struct sink *sink;
+ struct ist line;
+ char *pfx;
+
+ buf = alloc_trash_chunk();
+ if (!buf)
+ goto end;
+
+ sink = (struct sink *)arg_p[1].data.ptr;
+ BUG_ON(!sink);
+
+ pfx = arg_p[0].data.str.area;
+ BUG_ON(!pfx);
+
+ chunk_printf(buf, "[debug] %s: type=%s ", pfx, smp_to_type[smp->data.type]);
+ if (!sample_casts[smp->data.type][SMP_T_STR])
+ goto nocast;
+
+ /* Copy sample fetch. This puts the sample as const, the
+ * cast will copy data if a transformation is required.
+ */
+ memcpy(&tmp, smp, sizeof(struct sample));
+ tmp.flags = SMP_F_CONST;
+
+ if (!sample_casts[smp->data.type][SMP_T_STR](&tmp))
+ goto nocast;
+
+ /* Display the displayable chars*. */
+ b_putchr(buf, '<');
+ for (i = 0; i < tmp.data.u.str.data; i++) {
+ if (isprint((unsigned char)tmp.data.u.str.area[i]))
+ b_putchr(buf, tmp.data.u.str.area[i]);
+ else
+ b_putchr(buf, '.');
+ }
+ b_putchr(buf, '>');
+
+ done:
+ line = ist2(buf->area, buf->data);
+ sink_write(sink, &line, 1, 0, 0, NULL);
+ end:
+ free_trash_chunk(buf);
+ return 1;
+ nocast:
+ chunk_appendf(buf, "(undisplayable)");
+ goto done;
+}
+
+// This function checks the "debug" converter's arguments.
+static int smp_check_debug(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ const char *name = "buf0";
+ struct sink *sink = NULL;
+
+ if (args[0].type != ARGT_STR) {
+ /* optional prefix */
+ args[0].data.str.area = "";
+ args[0].data.str.data = 0;
+ }
+
+ if (args[1].type == ARGT_STR)
+ name = args[1].data.str.area;
+
+ sink = sink_find(name);
+ if (!sink) {
+ memprintf(err, "No such sink '%s'", name);
+ return 0;
+ }
+
+ chunk_destroy(&args[1].data.str);
+ args[1].type = ARGT_PTR;
+ args[1].data.ptr = sink;
+ return 1;
+}
+
+static int sample_conv_base642bin(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ int bin_len;
+
+ trash->data = 0;
+ bin_len = base64dec(smp->data.u.str.area, smp->data.u.str.data,
+ trash->area, trash->size);
+ if (bin_len < 0)
+ return 0;
+
+ trash->data = bin_len;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_BIN;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int sample_conv_base64url2bin(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ int bin_len;
+
+ trash->data = 0;
+ bin_len = base64urldec(smp->data.u.str.area, smp->data.u.str.data,
+ trash->area, trash->size);
+ if (bin_len < 0)
+ return 0;
+
+ trash->data = bin_len;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_BIN;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int sample_conv_bin2base64(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ int b64_len;
+
+ trash->data = 0;
+ b64_len = a2base64(smp->data.u.str.area, smp->data.u.str.data,
+ trash->area, trash->size);
+ if (b64_len < 0)
+ return 0;
+
+ trash->data = b64_len;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int sample_conv_bin2base64url(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ int b64_len;
+
+ trash->data = 0;
+ b64_len = a2base64url(smp->data.u.str.area, smp->data.u.str.data,
+ trash->area, trash->size);
+ if (b64_len < 0)
+ return 0;
+
+ trash->data = b64_len;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+/* This function returns a sample struct filled with the conversion of variable
+ * <var> to sample type <type> (SMP_T_*), via a cast to the target type. If the
+ * variable cannot be retrieved or casted, 0 is returned, otherwise 1.
+ *
+ * Keep in mind that the sample content may be written to a pre-allocated
+ * trash chunk as returned by get_trash_chunk().
+ */
+int sample_conv_var2smp(const struct var_desc *var, struct sample *smp, int type)
+{
+ if (!vars_get_by_desc(var, smp, NULL))
+ return 0;
+ if (!sample_casts[smp->data.type][type])
+ return 0;
+ if (!sample_casts[smp->data.type][type](smp))
+ return 0;
+ return 1;
+}
+
+static int sample_conv_sha1(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ blk_SHA_CTX ctx;
+ struct buffer *trash = get_trash_chunk();
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ blk_SHA1_Init(&ctx);
+ blk_SHA1_Update(&ctx, smp->data.u.str.area, smp->data.u.str.data);
+ blk_SHA1_Final((unsigned char *) trash->area, &ctx);
+
+ trash->data = 20;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_BIN;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+/* This function returns a sample struct filled with an <arg> content.
+ * If the <arg> contains a string, it is returned in the sample flagged as
+ * SMP_F_CONST. If the <arg> contains a variable descriptor, the sample is
+ * filled with the content of the variable by using vars_get_by_desc().
+ *
+ * Keep in mind that the sample content may be written to a pre-allocated
+ * trash chunk as returned by get_trash_chunk().
+ *
+ * This function returns 0 if an error occurs, otherwise it returns 1.
+ */
+int sample_conv_var2smp_str(const struct arg *arg, struct sample *smp)
+{
+ switch (arg->type) {
+ case ARGT_STR:
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = arg->data.str;
+ smp->flags = SMP_F_CONST;
+ return 1;
+ case ARGT_VAR:
+ return sample_conv_var2smp(&arg->data.var, smp, SMP_T_STR);
+ default:
+ return 0;
+ }
+}
+
+static int sample_conv_be2dec_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (args[1].data.sint <= 0 || args[1].data.sint > sizeof(unsigned long long)) {
+ memprintf(err, "chunk_size out of [1..%u] range (%lld)", (uint)sizeof(unsigned long long), args[1].data.sint);
+ return 0;
+ }
+
+ if (args[2].data.sint != 0 && args[2].data.sint != 1) {
+ memprintf(err, "Unsupported truncate value (%lld)", args[2].data.sint);
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Converts big-endian binary input sample to a string containing an unsigned
+ * integer number per <chunk_size> input bytes separated with <separator>.
+ * Optional <truncate> flag indicates if input is truncated at <chunk_size>
+ * boundaries.
+ * Arguments: separator (string), chunk_size (integer), truncate (0,1)
+ */
+static int sample_conv_be2dec(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ const int last = args[2].data.sint ? smp->data.u.str.data - args[1].data.sint + 1 : smp->data.u.str.data;
+ int max_size = trash->size - 2;
+ int i;
+ int start;
+ int ptr = 0;
+ unsigned long long number;
+ char *pos;
+
+ trash->data = 0;
+
+ while (ptr < last && trash->data <= max_size) {
+ start = trash->data;
+ if (ptr) {
+ /* Add separator */
+ memcpy(trash->area + trash->data, args[0].data.str.area, args[0].data.str.data);
+ trash->data += args[0].data.str.data;
+ }
+ else
+ max_size -= args[0].data.str.data;
+
+ /* Add integer */
+ for (number = 0, i = 0; i < args[1].data.sint && ptr < smp->data.u.str.data; i++)
+ number = (number << 8) + (unsigned char)smp->data.u.str.area[ptr++];
+
+ pos = ulltoa(number, trash->area + trash->data, trash->size - trash->data);
+ if (pos)
+ trash->data = pos - trash->area;
+ else {
+ trash->data = start;
+ break;
+ }
+ }
+
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int sample_conv_be2hex_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (args[1].data.sint <= 0 && (args[0].data.str.data > 0 || args[2].data.sint != 0)) {
+ memprintf(err, "chunk_size needs to be positive (%lld)", args[1].data.sint);
+ return 0;
+ }
+
+ if (args[2].data.sint != 0 && args[2].data.sint != 1) {
+ memprintf(err, "Unsupported truncate value (%lld)", args[2].data.sint);
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Converts big-endian binary input sample to a hex string containing two hex
+ * digits per input byte. <separator> is put every <chunk_size> binary input
+ * bytes if specified. Optional <truncate> flag indicates if input is truncated
+ * at <chunk_size> boundaries.
+ * Arguments: separator (string), chunk_size (integer), truncate (0,1)
+ */
+static int sample_conv_be2hex(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ int chunk_size = args[1].data.sint;
+ const int last = args[2].data.sint ? smp->data.u.str.data - chunk_size + 1 : smp->data.u.str.data;
+ int i;
+ int max_size;
+ int ptr = 0;
+ unsigned char c;
+
+ trash->data = 0;
+ if (args[0].data.str.data == 0 && args[2].data.sint == 0)
+ chunk_size = smp->data.u.str.data;
+ max_size = trash->size - 2 * chunk_size;
+
+ while (ptr < last && trash->data <= max_size) {
+ if (ptr) {
+ /* Add separator */
+ memcpy(trash->area + trash->data, args[0].data.str.area, args[0].data.str.data);
+ trash->data += args[0].data.str.data;
+ }
+ else
+ max_size -= args[0].data.str.data;
+
+ /* Add hex */
+ for (i = 0; i < chunk_size && ptr < smp->data.u.str.data; i++) {
+ c = smp->data.u.str.area[ptr++];
+ trash->area[trash->data++] = hextab[(c >> 4) & 0xF];
+ trash->area[trash->data++] = hextab[c & 0xF];
+ }
+ }
+
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int sample_conv_bin2hex(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ unsigned char c;
+ int ptr = 0;
+
+ trash->data = 0;
+ while (ptr < smp->data.u.str.data && trash->data <= trash->size - 2) {
+ c = smp->data.u.str.area[ptr++];
+ trash->area[trash->data++] = hextab[(c >> 4) & 0xF];
+ trash->area[trash->data++] = hextab[c & 0xF];
+ }
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int sample_conv_hex2int(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ long long int n = 0;
+ int i, c;
+
+ for (i = 0; i < smp->data.u.str.data; i++) {
+ if ((c = hex2i(smp->data.u.str.area[i])) < 0)
+ return 0;
+ n = (n << 4) + c;
+ }
+
+ smp->data.u.sint = n;
+ smp->data.type = SMP_T_SINT;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+/* hashes the binary input into a 32-bit unsigned int */
+static int sample_conv_djb2(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ smp->data.u.sint = hash_djb2(smp->data.u.str.area,
+ smp->data.u.str.data);
+ if (arg_p->data.sint)
+ smp->data.u.sint = full_hash(smp->data.u.sint);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+static int sample_conv_length(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ int i = smp->data.u.str.data;
+ smp->data.u.sint = i;
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+
+static int sample_conv_str2lower(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ int i;
+
+ if (!smp_make_rw(smp))
+ return 0;
+
+ for (i = 0; i < smp->data.u.str.data; i++) {
+ if ((smp->data.u.str.area[i] >= 'A') && (smp->data.u.str.area[i] <= 'Z'))
+ smp->data.u.str.area[i] += 'a' - 'A';
+ }
+ return 1;
+}
+
+static int sample_conv_str2upper(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ int i;
+
+ if (!smp_make_rw(smp))
+ return 0;
+
+ for (i = 0; i < smp->data.u.str.data; i++) {
+ if ((smp->data.u.str.area[i] >= 'a') && (smp->data.u.str.area[i] <= 'z'))
+ smp->data.u.str.area[i] += 'A' - 'a';
+ }
+ return 1;
+}
+
+/* takes the IPv4 mask in args[0] and an optional IPv6 mask in args[1] */
+static int sample_conv_ipmask(const struct arg *args, struct sample *smp, void *private)
+{
+ /* Attempt to convert to IPv4 to apply the correct mask. */
+ c_ipv62ip(smp);
+
+ if (smp->data.type == SMP_T_IPV4) {
+ smp->data.u.ipv4.s_addr &= args[0].data.ipv4.s_addr;
+ smp->data.type = SMP_T_IPV4;
+ }
+ else if (smp->data.type == SMP_T_IPV6) {
+ /* IPv6 cannot be converted without an IPv6 mask. */
+ if (args[1].type != ARGT_IPV6)
+ return 0;
+
+ write_u64(&smp->data.u.ipv6.s6_addr[0],
+ read_u64(&smp->data.u.ipv6.s6_addr[0]) & read_u64(&args[1].data.ipv6.s6_addr[0]));
+ write_u64(&smp->data.u.ipv6.s6_addr[8],
+ read_u64(&smp->data.u.ipv6.s6_addr[8]) & read_u64(&args[1].data.ipv6.s6_addr[8]));
+ smp->data.type = SMP_T_IPV6;
+ }
+
+ return 1;
+}
+
+/* takes an UINT value on input supposed to represent the time since EPOCH,
+ * adds an optional offset found in args[1] and emits a string representing
+ * the local time in the format specified in args[1] using strftime().
+ */
+static int sample_conv_ltime(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *temp;
+ /* With high numbers, the date returned can be negative, the 55 bits mask prevent this. */
+ time_t curr_date = smp->data.u.sint & 0x007fffffffffffffLL;
+ struct tm tm;
+
+ /* add offset */
+ if (args[1].type == ARGT_SINT)
+ curr_date += args[1].data.sint;
+
+ get_localtime(curr_date, &tm);
+
+ temp = get_trash_chunk();
+ temp->data = strftime(temp->area, temp->size, args[0].data.str.area, &tm);
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_STR;
+ return 1;
+}
+
+/* hashes the binary input into a 32-bit unsigned int */
+static int sample_conv_sdbm(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ smp->data.u.sint = hash_sdbm(smp->data.u.str.area,
+ smp->data.u.str.data);
+ if (arg_p->data.sint)
+ smp->data.u.sint = full_hash(smp->data.u.sint);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+/* takes an UINT value on input supposed to represent the time since EPOCH,
+ * adds an optional offset found in args[1] and emits a string representing
+ * the UTC date in the format specified in args[1] using strftime().
+ */
+static int sample_conv_utime(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *temp;
+ /* With high numbers, the date returned can be negative, the 55 bits mask prevent this. */
+ time_t curr_date = smp->data.u.sint & 0x007fffffffffffffLL;
+ struct tm tm;
+
+ /* add offset */
+ if (args[1].type == ARGT_SINT)
+ curr_date += args[1].data.sint;
+
+ get_gmtime(curr_date, &tm);
+
+ temp = get_trash_chunk();
+ temp->data = strftime(temp->area, temp->size, args[0].data.str.area, &tm);
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_STR;
+ return 1;
+}
+
+/* hashes the binary input into a 32-bit unsigned int */
+static int sample_conv_wt6(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ smp->data.u.sint = hash_wt6(smp->data.u.str.area,
+ smp->data.u.str.data);
+ if (arg_p->data.sint)
+ smp->data.u.sint = full_hash(smp->data.u.sint);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+/* hashes the binary input into a 32-bit unsigned int using xxh.
+ * The seed of the hash defaults to 0 but can be changd in argument 1.
+ */
+static int sample_conv_xxh32(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ unsigned int seed;
+
+ if (arg_p->data.sint)
+ seed = arg_p->data.sint;
+ else
+ seed = 0;
+ smp->data.u.sint = XXH32(smp->data.u.str.area, smp->data.u.str.data,
+ seed);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+/* hashes the binary input into a 64-bit unsigned int using xxh.
+ * In fact, the function returns a 64 bit unsigned, but the sample
+ * storage of haproxy only proposes 64-bits signed, so the value is
+ * cast as signed. This cast doesn't impact the hash repartition.
+ * The seed of the hash defaults to 0 but can be changd in argument 1.
+ */
+static int sample_conv_xxh64(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ unsigned long long int seed;
+
+ if (arg_p->data.sint)
+ seed = (unsigned long long int)arg_p->data.sint;
+ else
+ seed = 0;
+ smp->data.u.sint = (long long int)XXH64(smp->data.u.str.area,
+ smp->data.u.str.data, seed);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+static int sample_conv_xxh3(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ unsigned long long int seed;
+
+ if (arg_p->data.sint)
+ seed = (unsigned long long int)arg_p->data.sint;
+ else
+ seed = 0;
+ smp->data.u.sint = (long long int)XXH3(smp->data.u.str.area,
+ smp->data.u.str.data, seed);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+/* hashes the binary input into a 32-bit unsigned int */
+static int sample_conv_crc32(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ smp->data.u.sint = hash_crc32(smp->data.u.str.area,
+ smp->data.u.str.data);
+ if (arg_p->data.sint)
+ smp->data.u.sint = full_hash(smp->data.u.sint);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+/* hashes the binary input into crc32c (RFC4960, Appendix B [8].) */
+static int sample_conv_crc32c(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ smp->data.u.sint = hash_crc32c(smp->data.u.str.area,
+ smp->data.u.str.data);
+ if (arg_p->data.sint)
+ smp->data.u.sint = full_hash(smp->data.u.sint);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+/* This function escape special json characters. The returned string can be
+ * safely set between two '"' and used as json string. The json string is
+ * defined like this:
+ *
+ * any Unicode character except '"' or '\' or control character
+ * \", \\, \/, \b, \f, \n, \r, \t, \u + four-hex-digits
+ *
+ * The enum input_type contain all the allowed mode for decoding the input
+ * string.
+ */
+enum input_type {
+ IT_ASCII = 0,
+ IT_UTF8,
+ IT_UTF8S,
+ IT_UTF8P,
+ IT_UTF8PS,
+};
+
+static int sample_conv_json_check(struct arg *arg, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ enum input_type type;
+
+ if (strcmp(arg->data.str.area, "") == 0)
+ type = IT_ASCII;
+ else if (strcmp(arg->data.str.area, "ascii") == 0)
+ type = IT_ASCII;
+ else if (strcmp(arg->data.str.area, "utf8") == 0)
+ type = IT_UTF8;
+ else if (strcmp(arg->data.str.area, "utf8s") == 0)
+ type = IT_UTF8S;
+ else if (strcmp(arg->data.str.area, "utf8p") == 0)
+ type = IT_UTF8P;
+ else if (strcmp(arg->data.str.area, "utf8ps") == 0)
+ type = IT_UTF8PS;
+ else {
+ memprintf(err, "Unexpected input code type. "
+ "Allowed value are 'ascii', 'utf8', 'utf8s', 'utf8p' and 'utf8ps'");
+ return 0;
+ }
+
+ chunk_destroy(&arg->data.str);
+ arg->type = ARGT_SINT;
+ arg->data.sint = type;
+ return 1;
+}
+
+static int sample_conv_json(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *temp;
+ char _str[7]; /* \u + 4 hex digit + null char for sprintf. */
+ const char *str;
+ int len;
+ enum input_type input_type = IT_ASCII;
+ unsigned int c;
+ unsigned int ret;
+ char *p;
+
+ input_type = arg_p->data.sint;
+
+ temp = get_trash_chunk();
+ temp->data = 0;
+
+ p = smp->data.u.str.area;
+ while (p < smp->data.u.str.area + smp->data.u.str.data) {
+
+ if (input_type == IT_ASCII) {
+ /* Read input as ASCII. */
+ c = *(unsigned char *)p;
+ p++;
+ }
+ else {
+ /* Read input as UTF8. */
+ ret = utf8_next(p,
+ smp->data.u.str.data - ( p - smp->data.u.str.area),
+ &c);
+ p += utf8_return_length(ret);
+
+ if (input_type == IT_UTF8 && utf8_return_code(ret) != UTF8_CODE_OK)
+ return 0;
+ if (input_type == IT_UTF8S && utf8_return_code(ret) != UTF8_CODE_OK)
+ continue;
+ if (input_type == IT_UTF8P && utf8_return_code(ret) & (UTF8_CODE_INVRANGE|UTF8_CODE_BADSEQ))
+ return 0;
+ if (input_type == IT_UTF8PS && utf8_return_code(ret) & (UTF8_CODE_INVRANGE|UTF8_CODE_BADSEQ))
+ continue;
+
+ /* Check too big values. */
+ if ((unsigned int)c > 0xffff) {
+ if (input_type == IT_UTF8 || input_type == IT_UTF8P)
+ return 0;
+ continue;
+ }
+ }
+
+ /* Convert character. */
+ if (c == '"') {
+ len = 2;
+ str = "\\\"";
+ }
+ else if (c == '\\') {
+ len = 2;
+ str = "\\\\";
+ }
+ else if (c == '/') {
+ len = 2;
+ str = "\\/";
+ }
+ else if (c == '\b') {
+ len = 2;
+ str = "\\b";
+ }
+ else if (c == '\f') {
+ len = 2;
+ str = "\\f";
+ }
+ else if (c == '\r') {
+ len = 2;
+ str = "\\r";
+ }
+ else if (c == '\n') {
+ len = 2;
+ str = "\\n";
+ }
+ else if (c == '\t') {
+ len = 2;
+ str = "\\t";
+ }
+ else if (c > 0xff || !isprint((unsigned char)c)) {
+ /* isprint generate a segfault if c is too big. The man says that
+ * c must have the value of an unsigned char or EOF.
+ */
+ len = 6;
+ _str[0] = '\\';
+ _str[1] = 'u';
+ snprintf(&_str[2], 5, "%04x", (unsigned short)c);
+ str = _str;
+ }
+ else {
+ len = 1;
+ _str[0] = c;
+ str = _str;
+ }
+
+ /* Check length */
+ if (temp->data + len > temp->size)
+ return 0;
+
+ /* Copy string. */
+ memcpy(temp->area + temp->data, str, len);
+ temp->data += len;
+ }
+
+ smp->flags &= ~SMP_F_CONST;
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_STR;
+
+ return 1;
+}
+
+/* This sample function is designed to extract some bytes from an input buffer.
+ * First arg is the offset.
+ * Optional second arg is the length to truncate */
+static int sample_conv_bytes(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ if (smp->data.u.str.data <= arg_p[0].data.sint) {
+ smp->data.u.str.data = 0;
+ return 1;
+ }
+
+ if (smp->data.u.str.size)
+ smp->data.u.str.size -= arg_p[0].data.sint;
+ smp->data.u.str.data -= arg_p[0].data.sint;
+ smp->data.u.str.area += arg_p[0].data.sint;
+
+ if ((arg_p[1].type == ARGT_SINT) && (arg_p[1].data.sint < smp->data.u.str.data))
+ smp->data.u.str.data = arg_p[1].data.sint;
+
+ return 1;
+}
+
+static int sample_conv_field_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ struct arg *arg = args;
+
+ if (arg->type != ARGT_SINT) {
+ memprintf(err, "Unexpected arg type");
+ return 0;
+ }
+
+ if (!arg->data.sint) {
+ memprintf(err, "Unexpected value 0 for index");
+ return 0;
+ }
+
+ arg++;
+
+ if (arg->type != ARGT_STR) {
+ memprintf(err, "Unexpected arg type");
+ return 0;
+ }
+
+ if (!arg->data.str.data) {
+ memprintf(err, "Empty separators list");
+ return 0;
+ }
+
+ return 1;
+}
+
+/* This sample function is designed to a return selected part of a string (field).
+ * First arg is the index of the field (start at 1)
+ * Second arg is a char list of separators (type string)
+ */
+static int sample_conv_field(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ int field;
+ char *start, *end;
+ int i;
+ int count = (arg_p[2].type == ARGT_SINT) ? arg_p[2].data.sint : 1;
+
+ if (!arg_p[0].data.sint)
+ return 0;
+
+ if (arg_p[0].data.sint < 0) {
+ field = -1;
+ end = start = smp->data.u.str.area + smp->data.u.str.data;
+ while (start > smp->data.u.str.area) {
+ for (i = 0 ; i < arg_p[1].data.str.data; i++) {
+ if (*(start-1) == arg_p[1].data.str.area[i]) {
+ if (field == arg_p[0].data.sint) {
+ if (count == 1)
+ goto found;
+ else if (count > 1)
+ count--;
+ } else {
+ end = start-1;
+ field--;
+ }
+ break;
+ }
+ }
+ start--;
+ }
+ } else {
+ field = 1;
+ end = start = smp->data.u.str.area;
+ while (end - smp->data.u.str.area < smp->data.u.str.data) {
+ for (i = 0 ; i < arg_p[1].data.str.data; i++) {
+ if (*end == arg_p[1].data.str.area[i]) {
+ if (field == arg_p[0].data.sint) {
+ if (count == 1)
+ goto found;
+ else if (count > 1)
+ count--;
+ } else {
+ start = end+1;
+ field++;
+ }
+ break;
+ }
+ }
+ end++;
+ }
+ }
+
+ /* Field not found */
+ if (field != arg_p[0].data.sint) {
+ smp->data.u.str.data = 0;
+ return 0;
+ }
+found:
+ smp->data.u.str.data = end - start;
+ /* If ret string is len 0, no need to
+ change pointers or to update size */
+ if (!smp->data.u.str.data)
+ return 1;
+
+ /* Compute remaining size if needed
+ Note: smp->data.u.str.size cannot be set to 0 */
+ if (smp->data.u.str.size)
+ smp->data.u.str.size -= start - smp->data.u.str.area;
+
+ smp->data.u.str.area = start;
+
+ return 1;
+}
+
+/* This sample function is designed to return a word from a string.
+ * First arg is the index of the word (start at 1)
+ * Second arg is a char list of words separators (type string)
+ */
+static int sample_conv_word(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ int word;
+ char *start, *end;
+ int i, issep, inword;
+ int count = (arg_p[2].type == ARGT_SINT) ? arg_p[2].data.sint : 1;
+
+ if (!arg_p[0].data.sint)
+ return 0;
+
+ word = 0;
+ inword = 0;
+ if (arg_p[0].data.sint < 0) {
+ end = start = smp->data.u.str.area + smp->data.u.str.data;
+ while (start > smp->data.u.str.area) {
+ issep = 0;
+ for (i = 0 ; i < arg_p[1].data.str.data; i++) {
+ if (*(start-1) == arg_p[1].data.str.area[i]) {
+ issep = 1;
+ break;
+ }
+ }
+ if (!inword) {
+ if (!issep) {
+ if (word != arg_p[0].data.sint) {
+ word--;
+ end = start;
+ }
+ inword = 1;
+ }
+ }
+ else if (issep) {
+ if (word == arg_p[0].data.sint) {
+ if (count == 1)
+ goto found;
+ else if (count > 1)
+ count--;
+ }
+ inword = 0;
+ }
+ start--;
+ }
+ } else {
+ end = start = smp->data.u.str.area;
+ while (end - smp->data.u.str.area < smp->data.u.str.data) {
+ issep = 0;
+ for (i = 0 ; i < arg_p[1].data.str.data; i++) {
+ if (*end == arg_p[1].data.str.area[i]) {
+ issep = 1;
+ break;
+ }
+ }
+ if (!inword) {
+ if (!issep) {
+ if (word != arg_p[0].data.sint) {
+ word++;
+ start = end;
+ }
+ inword = 1;
+ }
+ }
+ else if (issep) {
+ if (word == arg_p[0].data.sint) {
+ if (count == 1)
+ goto found;
+ else if (count > 1)
+ count--;
+ }
+ inword = 0;
+ }
+ end++;
+ }
+ }
+
+ /* Field not found */
+ if (word != arg_p[0].data.sint) {
+ smp->data.u.str.data = 0;
+ return 1;
+ }
+found:
+ smp->data.u.str.data = end - start;
+ /* If ret string is len 0, no need to
+ change pointers or to update size */
+ if (!smp->data.u.str.data)
+ return 1;
+
+
+ /* Compute remaining size if needed
+ Note: smp->data.u.str.size cannot be set to 0 */
+ if (smp->data.u.str.size)
+ smp->data.u.str.size -= start - smp->data.u.str.area;
+
+ smp->data.u.str.area = start;
+
+ return 1;
+}
+
+static int sample_conv_regsub_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ struct arg *arg = args;
+ char *p;
+ int len;
+
+ /* arg0 is a regex, it uses type_flag for ICASE and global match */
+ arg[0].type_flags = 0;
+
+ if (arg[2].type != ARGT_STR)
+ return 1;
+
+ p = arg[2].data.str.area;
+ len = arg[2].data.str.data;
+ while (len) {
+ if (*p == 'i') {
+ arg[0].type_flags |= ARGF_REG_ICASE;
+ }
+ else if (*p == 'g') {
+ arg[0].type_flags |= ARGF_REG_GLOB;
+ }
+ else {
+ memprintf(err, "invalid regex flag '%c', only 'i' and 'g' are supported", *p);
+ return 0;
+ }
+ p++;
+ len--;
+ }
+ return 1;
+}
+
+/* This sample function is designed to do the equivalent of s/match/replace/ on
+ * the input string. It applies a regex and restarts from the last matched
+ * location until nothing matches anymore. First arg is the regex to apply to
+ * the input string, second arg is the replacement expression.
+ */
+static int sample_conv_regsub(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ char *start, *end;
+ struct my_regex *reg = arg_p[0].data.reg;
+ regmatch_t pmatch[MAX_MATCH];
+ struct buffer *trash = get_trash_chunk();
+ struct buffer *output;
+ int flag, max;
+ int found;
+
+ start = smp->data.u.str.area;
+ end = start + smp->data.u.str.data;
+
+ flag = 0;
+ while (1) {
+ /* check for last round which is used to copy remaining parts
+ * when not running in global replacement mode.
+ */
+ found = 0;
+ if ((arg_p[0].type_flags & ARGF_REG_GLOB) || !(flag & REG_NOTBOL)) {
+ /* Note: we can have start == end on empty strings or at the end */
+ found = regex_exec_match2(reg, start, end - start, MAX_MATCH, pmatch, flag);
+ }
+
+ if (!found)
+ pmatch[0].rm_so = end - start;
+
+ /* copy the heading non-matching part (which may also be the tail if nothing matches) */
+ max = trash->size - trash->data;
+ if (max && pmatch[0].rm_so > 0) {
+ if (max > pmatch[0].rm_so)
+ max = pmatch[0].rm_so;
+ memcpy(trash->area + trash->data, start, max);
+ trash->data += max;
+ }
+
+ if (!found)
+ break;
+
+ output = alloc_trash_chunk();
+ if (!output)
+ break;
+
+ output->data = exp_replace(output->area, output->size, start, arg_p[1].data.str.area, pmatch);
+
+ /* replace the matching part */
+ max = output->size - output->data;
+ if (max) {
+ if (max > output->data)
+ max = output->data;
+ memcpy(trash->area + trash->data,
+ output->area, max);
+ trash->data += max;
+ }
+
+ free_trash_chunk(output);
+
+ /* stop here if we're done with this string */
+ if (start >= end)
+ break;
+
+ /* We have a special case for matches of length 0 (eg: "x*y*").
+ * These ones are considered to match in front of a character,
+ * so we have to copy that character and skip to the next one.
+ */
+ if (!pmatch[0].rm_eo) {
+ if (trash->data < trash->size)
+ trash->area[trash->data++] = start[pmatch[0].rm_eo];
+ pmatch[0].rm_eo++;
+ }
+
+ start += pmatch[0].rm_eo;
+ flag |= REG_NOTBOL;
+ }
+
+ smp->data.u.str = *trash;
+ return 1;
+}
+
+/* This function check an operator entry. It expects a string.
+ * The string can be an integer or a variable name.
+ */
+static int check_operator(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ const char *str;
+ const char *end;
+ long long int i;
+
+ /* Try to decode a variable. */
+ if (vars_check_arg(&args[0], NULL))
+ return 1;
+
+ /* Try to convert an integer */
+ str = args[0].data.str.area;
+ end = str + strlen(str);
+ i = read_int64(&str, end);
+ if (*str != '\0') {
+ memprintf(err, "expects an integer or a variable name");
+ return 0;
+ }
+
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = i;
+ return 1;
+}
+
+/* This function returns a sample struct filled with an arg content.
+ * If the arg contain an integer, the integer is returned in the
+ * sample. If the arg contains a variable descriptor, it returns the
+ * variable value.
+ *
+ * This function returns 0 if an error occurs, otherwise it returns 1.
+ */
+int sample_conv_var2smp_sint(const struct arg *arg, struct sample *smp)
+{
+ switch (arg->type) {
+ case ARGT_SINT:
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = arg->data.sint;
+ return 1;
+ case ARGT_VAR:
+ return sample_conv_var2smp(&arg->data.var, smp, SMP_T_SINT);
+ default:
+ return 0;
+ }
+}
+
+/* Takes a SINT on input, applies a binary twos complement and returns the SINT
+ * result.
+ */
+static int sample_conv_binary_cpl(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ smp->data.u.sint = ~smp->data.u.sint;
+ return 1;
+}
+
+/* Takes a SINT on input, applies a binary "and" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ */
+static int sample_conv_binary_and(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct sample tmp;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+ smp->data.u.sint &= tmp.data.u.sint;
+ return 1;
+}
+
+/* Takes a SINT on input, applies a binary "or" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ */
+static int sample_conv_binary_or(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct sample tmp;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+ smp->data.u.sint |= tmp.data.u.sint;
+ return 1;
+}
+
+/* Takes a SINT on input, applies a binary "xor" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ */
+static int sample_conv_binary_xor(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct sample tmp;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+ smp->data.u.sint ^= tmp.data.u.sint;
+ return 1;
+}
+
+static inline long long int arith_add(long long int a, long long int b)
+{
+ /* Prevent overflow and makes capped calculus.
+ * We must ensure that the check calculus doesn't
+ * exceed the signed 64 bits limits.
+ *
+ * +----------+----------+
+ * | a<0 | a>=0 |
+ * +------+----------+----------+
+ * | b<0 | MIN-a>b | no check |
+ * +------+----------+----------+
+ * | b>=0 | no check | MAX-a<b |
+ * +------+----------+----------+
+ */
+ if ((a ^ b) >= 0) {
+ /* signs are different. */
+ if (a < 0) {
+ if (LLONG_MIN - a > b)
+ return LLONG_MIN;
+ }
+ if (LLONG_MAX - a < b)
+ return LLONG_MAX;
+ }
+ return a + b;
+}
+
+/* Takes a SINT on input, applies an arithmetic "add" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ */
+static int sample_conv_arith_add(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct sample tmp;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+ smp->data.u.sint = arith_add(smp->data.u.sint, tmp.data.u.sint);
+ return 1;
+}
+
+/* Takes a SINT on input, applies an arithmetic "sub" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ */
+static int sample_conv_arith_sub(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ struct sample tmp;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+
+ /* We cannot represent -LLONG_MIN because abs(LLONG_MIN) is greater
+ * than abs(LLONG_MAX). So, the following code use LLONG_MAX in place
+ * of -LLONG_MIN and correct the result.
+ */
+ if (tmp.data.u.sint == LLONG_MIN) {
+ smp->data.u.sint = arith_add(smp->data.u.sint, LLONG_MAX);
+ if (smp->data.u.sint < LLONG_MAX)
+ smp->data.u.sint++;
+ return 1;
+ }
+
+ /* standard subtraction: we use the "add" function and negate
+ * the second operand.
+ */
+ smp->data.u.sint = arith_add(smp->data.u.sint, -tmp.data.u.sint);
+ return 1;
+}
+
+/* Takes a SINT on input, applies an arithmetic "mul" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ * If the result makes an overflow, then the largest possible quantity is
+ * returned.
+ */
+static int sample_conv_arith_mul(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ struct sample tmp;
+ long long int c;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+
+ /* prevent divide by 0 during the check */
+ if (!smp->data.u.sint || !tmp.data.u.sint) {
+ smp->data.u.sint = 0;
+ return 1;
+ }
+
+ /* The multiply between LLONG_MIN and -1 returns a
+ * "floating point exception".
+ */
+ if (smp->data.u.sint == LLONG_MIN && tmp.data.u.sint == -1) {
+ smp->data.u.sint = LLONG_MAX;
+ return 1;
+ }
+
+ /* execute standard multiplication. */
+ c = smp->data.u.sint * tmp.data.u.sint;
+
+ /* check for overflow and makes capped multiply. */
+ if (smp->data.u.sint != c / tmp.data.u.sint) {
+ if ((smp->data.u.sint < 0) == (tmp.data.u.sint < 0)) {
+ smp->data.u.sint = LLONG_MAX;
+ return 1;
+ }
+ smp->data.u.sint = LLONG_MIN;
+ return 1;
+ }
+ smp->data.u.sint = c;
+ return 1;
+}
+
+/* Takes a SINT on input, applies an arithmetic "div" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ * If arg_p makes the result overflow, then the largest possible quantity is
+ * returned.
+ */
+static int sample_conv_arith_div(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ struct sample tmp;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+
+ if (tmp.data.u.sint) {
+ /* The divide between LLONG_MIN and -1 returns a
+ * "floating point exception".
+ */
+ if (smp->data.u.sint == LLONG_MIN && tmp.data.u.sint == -1) {
+ smp->data.u.sint = LLONG_MAX;
+ return 1;
+ }
+ smp->data.u.sint /= tmp.data.u.sint;
+ return 1;
+ }
+ smp->data.u.sint = LLONG_MAX;
+ return 1;
+}
+
+/* Takes a SINT on input, applies an arithmetic "mod" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ * If arg_p makes the result overflow, then 0 is returned.
+ */
+static int sample_conv_arith_mod(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ struct sample tmp;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+
+ if (tmp.data.u.sint) {
+ /* The divide between LLONG_MIN and -1 returns a
+ * "floating point exception".
+ */
+ if (smp->data.u.sint == LLONG_MIN && tmp.data.u.sint == -1) {
+ smp->data.u.sint = 0;
+ return 1;
+ }
+ smp->data.u.sint %= tmp.data.u.sint;
+ return 1;
+ }
+ smp->data.u.sint = 0;
+ return 1;
+}
+
+/* Takes an SINT on input, applies an arithmetic "neg" and returns the SINT
+ * result.
+ */
+static int sample_conv_arith_neg(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ if (smp->data.u.sint == LLONG_MIN)
+ smp->data.u.sint = LLONG_MAX;
+ else
+ smp->data.u.sint = -smp->data.u.sint;
+ return 1;
+}
+
+/* Takes a SINT on input, returns true is the value is non-null, otherwise
+ * false. The output is a BOOL.
+ */
+static int sample_conv_arith_bool(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ smp->data.u.sint = !!smp->data.u.sint;
+ smp->data.type = SMP_T_BOOL;
+ return 1;
+}
+
+/* Takes a SINT on input, returns false is the value is non-null, otherwise
+ * truee. The output is a BOOL.
+ */
+static int sample_conv_arith_not(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ smp->data.u.sint = !smp->data.u.sint;
+ smp->data.type = SMP_T_BOOL;
+ return 1;
+}
+
+/* Takes a SINT on input, returns true is the value is odd, otherwise false.
+ * The output is a BOOL.
+ */
+static int sample_conv_arith_odd(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ smp->data.u.sint = smp->data.u.sint & 1;
+ smp->data.type = SMP_T_BOOL;
+ return 1;
+}
+
+/* Takes a SINT on input, returns true is the value is even, otherwise false.
+ * The output is a BOOL.
+ */
+static int sample_conv_arith_even(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ smp->data.u.sint = !(smp->data.u.sint & 1);
+ smp->data.type = SMP_T_BOOL;
+ return 1;
+}
+
+/* appends an optional const string, an optional variable contents and another
+ * optional const string to an existing string.
+ */
+static int sample_conv_concat(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *trash;
+ struct sample tmp;
+ int max;
+
+ trash = alloc_trash_chunk();
+ if (!trash)
+ return 0;
+
+ trash->data = smp->data.u.str.data;
+ if (trash->data > trash->size - 1)
+ trash->data = trash->size - 1;
+
+ memcpy(trash->area, smp->data.u.str.area, trash->data);
+ trash->area[trash->data] = 0;
+
+ /* append first string */
+ max = arg_p[0].data.str.data;
+ if (max > trash->size - 1 - trash->data)
+ max = trash->size - 1 - trash->data;
+
+ if (max) {
+ memcpy(trash->area + trash->data, arg_p[0].data.str.area, max);
+ trash->data += max;
+ trash->area[trash->data] = 0;
+ }
+
+ /* append second string (variable) if it's found and we can turn it
+ * into a string.
+ */
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (arg_p[1].type == ARGT_VAR && vars_get_by_desc(&arg_p[1].data.var, &tmp, NULL) &&
+ (sample_casts[tmp.data.type][SMP_T_STR] == c_none ||
+ sample_casts[tmp.data.type][SMP_T_STR](&tmp))) {
+
+ max = tmp.data.u.str.data;
+ if (max > trash->size - 1 - trash->data)
+ max = trash->size - 1 - trash->data;
+
+ if (max) {
+ memcpy(trash->area + trash->data, tmp.data.u.str.area,
+ max);
+ trash->data += max;
+ trash->area[trash->data] = 0;
+ }
+ }
+
+ /* append third string */
+ max = arg_p[2].data.str.data;
+ if (max > trash->size - 1 - trash->data)
+ max = trash->size - 1 - trash->data;
+
+ if (max) {
+ memcpy(trash->area + trash->data, arg_p[2].data.str.area, max);
+ trash->data += max;
+ trash->area[trash->data] = 0;
+ }
+
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp_dup(smp);
+ free_trash_chunk(trash);
+ return 1;
+}
+
+/* This function checks the "concat" converter's arguments and extracts the
+ * variable name and its scope.
+ */
+static int smp_check_concat(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ /* Try to decode a variable. */
+ if (args[1].data.str.data > 0 && !vars_check_arg(&args[1], NULL)) {
+ memprintf(err, "failed to register variable name '%s'",
+ args[1].data.str.area);
+ return 0;
+ }
+ return 1;
+}
+
+/* Append delimiter (only to a non empty input) followed by the optional
+ * variable contents concatenated with the optional sufix.
+ */
+static int sample_conv_add_item(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *tmpbuf;
+ struct sample tmp;
+ size_t max;
+ int var_available;
+
+ tmpbuf = alloc_trash_chunk();
+ if (!tmpbuf)
+ return 0;
+
+ tmpbuf->data = smp->data.u.str.data;
+ if (tmpbuf->data > tmpbuf->size - 1)
+ tmpbuf->data = tmpbuf->size - 1;
+
+ memcpy(tmpbuf->area, smp->data.u.str.area, tmpbuf->data);
+ tmpbuf->area[tmpbuf->data] = 0;
+
+ /* Check if variable is found and we can turn into a string. */
+ var_available = 0;
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (arg_p[1].type == ARGT_VAR && vars_get_by_desc(&arg_p[1].data.var, &tmp, NULL) &&
+ (sample_casts[tmp.data.type][SMP_T_STR] == c_none ||
+ sample_casts[tmp.data.type][SMP_T_STR](&tmp)))
+ var_available = 1;
+
+ /* Append delimiter only if input is not empty and either
+ * the variable or the suffix are not empty
+ */
+ if (smp->data.u.str.data && ((var_available && tmp.data.u.str.data) ||
+ arg_p[2].data.str.data)) {
+ max = arg_p[0].data.str.data;
+ if (max > tmpbuf->size - 1 - tmpbuf->data)
+ max = tmpbuf->size - 1 - tmpbuf->data;
+
+ if (max) {
+ memcpy(tmpbuf->area + tmpbuf->data, arg_p[0].data.str.area, max);
+ tmpbuf->data += max;
+ tmpbuf->area[tmpbuf->data] = 0;
+ }
+ }
+
+ /* Append variable contents if variable is found and turned into string. */
+ if (var_available) {
+ max = tmp.data.u.str.data;
+ if (max > tmpbuf->size - 1 - tmpbuf->data)
+ max = tmpbuf->size - 1 - tmpbuf->data;
+
+ if (max) {
+ memcpy(tmpbuf->area + tmpbuf->data, tmp.data.u.str.area, max);
+ tmpbuf->data += max;
+ tmpbuf->area[tmpbuf->data] = 0;
+ }
+ }
+
+ /* Append optional suffix. */
+ max = arg_p[2].data.str.data;
+ if (max > tmpbuf->size - 1 - tmpbuf->data)
+ max = tmpbuf->size - 1 - tmpbuf->data;
+
+ if (max) {
+ memcpy(tmpbuf->area + tmpbuf->data, arg_p[2].data.str.area, max);
+ tmpbuf->data += max;
+ tmpbuf->area[tmpbuf->data] = 0;
+ }
+
+ smp->data.u.str = *tmpbuf;
+ smp->data.type = SMP_T_STR;
+ smp_dup(smp);
+ free_trash_chunk(tmpbuf);
+ return 1;
+}
+
+/* Check the "add_item" converter's arguments and extracts the
+ * variable name and its scope.
+ */
+static int smp_check_add_item(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ /* Try to decode a variable. */
+ if (args[1].data.str.data > 0 && !vars_check_arg(&args[1], NULL)) {
+ memprintf(err, "failed to register variable name '%s'",
+ args[1].data.str.area);
+ return 0;
+ }
+
+ if (args[1].data.str.data == 0 && args[2].data.str.data == 0) {
+ memprintf(err, "one of the optional arguments has to be nonempty");
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Compares string with a variable containing a string. Return value
+ * is compatible with strcmp(3)'s return value.
+ */
+static int sample_conv_strcmp(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct sample tmp;
+ int max, result;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (arg_p[0].type != ARGT_VAR)
+ return 0;
+
+ if (!sample_conv_var2smp(&arg_p[0].data.var, &tmp, SMP_T_STR))
+ return 0;
+
+ max = MIN(smp->data.u.str.data, tmp.data.u.str.data);
+ result = strncmp(smp->data.u.str.area, tmp.data.u.str.area, max);
+ if (result == 0) {
+ if (smp->data.u.str.data != tmp.data.u.str.data) {
+ if (smp->data.u.str.data < tmp.data.u.str.data) {
+ result = -1;
+ }
+ else {
+ result = 1;
+ }
+ }
+ }
+
+ smp->data.u.sint = result;
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+/*
+ * This converter can takes a Host header value as defined by rfc9110#section-7.2
+ * Host = uri-host [ ":" port ] ;
+ * It returns the uri-host value in lowecase with the port stripped.
+ */
+static int sample_conv_host_only(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ /* Working cases: hostname00, hostname00:80, 127.0.0.1, 127.0.0.1:80, [::1], [::1]:80 */
+ char *beg = smp->data.u.str.area;
+ char *end = smp->data.u.str.area + smp->data.u.str.data - 1;
+ char *p;
+
+ for (p = end; p >= beg; p--) {
+ if (*p == ':' || *p == ']')
+ break;
+ }
+
+ if (p >= beg && *p == ':')
+ smp->data.u.str.data = p - beg;
+ /* if no port part was found, the hostname is the whole string */
+
+ smp->data.type = SMP_T_STR;
+
+ return sample_conv_str2lower(arg_p, smp, NULL);
+}
+
+/*
+ * This converter can takes a Host header value as defined by rfc9110#section-7.2
+ * Host = uri-host [ ":" port ] ;
+ * It returns the port value as a int.
+ */
+static int sample_conv_port_only(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ /* Working cases: hostname00, hostname00:80, 127.0.0.1, 127.0.0.1:80, [::1], [::1]:80 */
+ char *beg = smp->data.u.str.area;
+ char *end = smp->data.u.str.area + smp->data.u.str.data - 1;
+ char *p;
+
+ for (p = end; p >= beg; p--) {
+ if (*p == ':' || *p == ']')
+ break;
+ }
+
+ smp->data.type = SMP_T_SINT;
+ if (p >= beg && *p == ':' && ++p <= end) {
+ smp->data.u.sint = strl2ui(p, smp->data.u.str.data + smp->data.u.str.area - p);
+ } else {
+ smp->data.u.sint = 0;
+ }
+ return 1;
+}
+
+
+/* Takes a boolean as input. Returns the first argument if that boolean is true and
+ * the second argument otherwise.
+ */
+static int sample_conv_iif(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_CONST;
+
+ if (smp->data.u.sint) {
+ smp->data.u.str.data = arg_p[0].data.str.data;
+ smp->data.u.str.area = arg_p[0].data.str.area;
+ }
+ else {
+ smp->data.u.str.data = arg_p[1].data.str.data;
+ smp->data.u.str.area = arg_p[1].data.str.area;
+ }
+
+ return 1;
+}
+
+#define GRPC_MSG_COMPRESS_FLAG_SZ 1 /* 1 byte */
+#define GRPC_MSG_LENGTH_SZ 4 /* 4 bytes */
+#define GRPC_MSG_HEADER_SZ (GRPC_MSG_COMPRESS_FLAG_SZ + GRPC_MSG_LENGTH_SZ)
+
+/*
+ * Extract the field value of an input binary sample. Takes a mandatory argument:
+ * the protocol buffers field identifier (dotted notation) internally represented
+ * as an array of unsigned integers and its size.
+ * Return 1 if the field was found, 0 if not.
+ */
+static int sample_conv_ungrpc(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ unsigned char *pos;
+ size_t grpc_left;
+
+ pos = (unsigned char *)smp->data.u.str.area;
+ grpc_left = smp->data.u.str.data;
+
+ while (grpc_left > GRPC_MSG_HEADER_SZ) {
+ size_t grpc_msg_len, left;
+
+ grpc_msg_len = left = ntohl(*(uint32_t *)(pos + GRPC_MSG_COMPRESS_FLAG_SZ));
+
+ pos += GRPC_MSG_HEADER_SZ;
+ grpc_left -= GRPC_MSG_HEADER_SZ;
+
+ if (grpc_left < left)
+ return 0;
+
+ if (protobuf_field_lookup(arg_p, smp, &pos, &left))
+ return 1;
+
+ grpc_left -= grpc_msg_len;
+ }
+
+ return 0;
+}
+
+static int sample_conv_protobuf(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ unsigned char *pos;
+ size_t left;
+
+ pos = (unsigned char *)smp->data.u.str.area;
+ left = smp->data.u.str.data;
+
+ return protobuf_field_lookup(arg_p, smp, &pos, &left);
+}
+
+static int sample_conv_protobuf_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (!args[1].type) {
+ args[1].type = ARGT_SINT;
+ args[1].data.sint = PBUF_T_BINARY;
+ }
+ else {
+ int pbuf_type;
+
+ pbuf_type = protobuf_type(args[1].data.str.area);
+ if (pbuf_type == -1) {
+ memprintf(err, "Wrong protocol buffer type '%s'", args[1].data.str.area);
+ return 0;
+ }
+
+ chunk_destroy(&args[1].data.str);
+ args[1].type = ARGT_SINT;
+ args[1].data.sint = pbuf_type;
+ }
+
+ return 1;
+}
+
+/*
+ * Extract the tag value of an input binary sample. Takes a mandatory argument:
+ * the FIX protocol tag identifier.
+ * Return 1 if the tag was found, 0 if not.
+ */
+static int sample_conv_fix_tag_value(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct ist value;
+
+ smp->flags &= ~SMP_F_MAY_CHANGE;
+ value = fix_tag_value(ist2(smp->data.u.str.area, smp->data.u.str.data),
+ arg_p[0].data.sint);
+ if (!istlen(value)) {
+ if (isttest(value)) {
+ /* value != IST_NULL, need more data */
+ smp->flags |= SMP_F_MAY_CHANGE;
+ }
+ return 0;
+ }
+
+ smp->data.u.str = ist2buf(value);
+ smp->flags |= SMP_F_CONST;
+
+ return 1;
+}
+
+/* This function checks the "fix_tag_value" converter configuration.
+ * It expects a "known" (by HAProxy) tag name or ID.
+ * Tag string names are converted to their ID counterpart because this is the
+ * format they are sent over the wire.
+ */
+static int sample_conv_fix_value_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ struct ist str;
+ unsigned int tag;
+
+ str = ist2(args[0].data.str.area, args[0].data.str.data);
+ tag = fix_tagid(str);
+ if (!tag) {
+ memprintf(err, "Unknown FIX tag name '%s'", args[0].data.str.area);
+ return 0;
+ }
+
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = tag;
+
+ return 1;
+}
+
+/*
+ * Checks that a buffer contains a valid FIX message
+ *
+ * Return 1 if the check could be run, 0 if not.
+ * The result of the analyse itself is stored in <smp> as a boolean
+ */
+static int sample_conv_fix_is_valid(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct ist msg;
+
+ msg = ist2(smp->data.u.str.area, smp->data.u.str.data);
+
+ smp->flags &= ~SMP_F_MAY_CHANGE;
+ switch (fix_validate_message(msg)) {
+ case FIX_VALID_MESSAGE:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 1;
+ return 1;
+ case FIX_NEED_MORE_DATA:
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ case FIX_INVALID_MESSAGE:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 0;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Extract the field value of an input binary sample containing an MQTT packet.
+ * Takes 2 mandatory arguments:
+ * - packet type
+ * - field name
+ *
+ * return 1 if the field was found, 0 if not.
+ */
+static int sample_conv_mqtt_field_value(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct ist pkt, value;
+ int type, fieldname_id;
+
+ pkt = ist2(smp->data.u.str.area, smp->data.u.str.data);
+ type = arg_p[0].data.sint;
+ fieldname_id = arg_p[1].data.sint;
+
+ smp->flags &= ~SMP_F_MAY_CHANGE;
+ value = mqtt_field_value(pkt, type, fieldname_id);
+ if (!istlen(value)) {
+ if (isttest(value)) {
+ /* value != IST_NULL, need more data */
+ smp->flags |= SMP_F_MAY_CHANGE;
+ }
+ return 0;
+ }
+
+ smp->data.u.str = ist2buf(value);
+ smp->flags |= SMP_F_CONST;
+ return 1;
+}
+
+/*
+ * this function checks the "mqtt_field_value" converter configuration.
+ * It expects a known packet type name or ID and a field name, in this order
+ *
+ * Args[0] will be turned into a MQTT_CPT_* value for direct matching when parsing
+ * a packet.
+ */
+static int sample_conv_mqtt_field_value_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ int type, fieldname_id;
+
+ /* check the MQTT packet type is valid */
+ type = mqtt_typeid(ist2(args[0].data.str.area, args[0].data.str.data));
+ if (type == MQTT_CPT_INVALID) {
+ memprintf(err, "Unknown MQTT type '%s'", args[0].data.str.area);
+ return 0;
+ }
+
+ /* check the field name belongs to the MQTT packet type */
+ fieldname_id = mqtt_check_type_fieldname(type, ist2(args[1].data.str.area, args[1].data.str.data));
+ if (fieldname_id == MQTT_FN_INVALID) {
+ memprintf(err, "Unknown MQTT field name '%s' for packet type '%s'", args[1].data.str.area,
+ args[0].data.str.area);
+ return 0;
+ }
+
+ /* save numeric counterparts of type and field name */
+ chunk_destroy(&args[0].data.str);
+ chunk_destroy(&args[1].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = type;
+ args[1].type = ARGT_SINT;
+ args[1].data.sint = fieldname_id;
+
+ return 1;
+}
+
+/*
+ * Checks that <smp> contains a valid MQTT message
+ *
+ * The function returns 1 if the check was run to its end, 0 otherwise.
+ * The result of the analyse itself is stored in <smp> as a boolean.
+ */
+static int sample_conv_mqtt_is_valid(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct ist msg;
+
+ msg = ist2(smp->data.u.str.area, smp->data.u.str.data);
+
+ smp->flags &= ~SMP_F_MAY_CHANGE;
+ switch (mqtt_validate_message(msg, NULL)) {
+ case FIX_VALID_MESSAGE:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 1;
+ return 1;
+ case FIX_NEED_MORE_DATA:
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ case FIX_INVALID_MESSAGE:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 0;
+ return 1;
+ }
+ return 0;
+}
+
+/* This function checks the "strcmp" converter's arguments and extracts the
+ * variable name and its scope.
+ */
+static int smp_check_strcmp(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (!args[0].data.str.data) {
+ memprintf(err, "missing variable name");
+ return 0;
+ }
+
+ /* Try to decode a variable. */
+ if (vars_check_arg(&args[0], NULL))
+ return 1;
+
+ memprintf(err, "failed to register variable name '%s'",
+ args[0].data.str.area);
+ return 0;
+}
+
+/**/
+static int sample_conv_htonl(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *tmp;
+ uint32_t n;
+
+ n = htonl((uint32_t)smp->data.u.sint);
+ tmp = get_trash_chunk();
+
+ memcpy(b_head(tmp), &n, 4);
+ b_add(tmp, 4);
+
+ smp->data.u.str = *tmp;
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+/**/
+static int sample_conv_cut_crlf(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ char *p;
+ size_t l;
+
+ p = smp->data.u.str.area;
+ for (l = 0; l < smp->data.u.str.data; l++) {
+ if (*(p+l) == '\r' || *(p+l) == '\n')
+ break;
+ }
+ smp->data.u.str.data = l;
+ return 1;
+}
+
+/**/
+static int sample_conv_ltrim(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ char *delimiters, *p;
+ size_t dlen, l;
+
+ delimiters = arg_p[0].data.str.area;
+ dlen = arg_p[0].data.str.data;
+
+ l = smp->data.u.str.data;
+ p = smp->data.u.str.area;
+ while (l && memchr(delimiters, *p, dlen) != NULL) {
+ p++;
+ l--;
+ }
+
+ smp->data.u.str.area = p;
+ smp->data.u.str.data = l;
+ return 1;
+}
+
+/**/
+static int sample_conv_rtrim(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ char *delimiters, *p;
+ size_t dlen, l;
+
+ delimiters = arg_p[0].data.str.area;
+ dlen = arg_p[0].data.str.data;
+
+ l = smp->data.u.str.data;
+ p = smp->data.u.str.area + l - 1;
+ while (l && memchr(delimiters, *p, dlen) != NULL) {
+ p--;
+ l--;
+ }
+
+ smp->data.u.str.data = l;
+ return 1;
+}
+
+/* This function checks the "json_query" converter's arguments. */
+static int sample_check_json_query(struct arg *arg, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (arg[0].data.str.data == 0) {
+ memprintf(err, "json_path must not be empty");
+ return 0;
+ }
+
+ if (arg[1].data.str.data != 0) {
+ if (strcmp(arg[1].data.str.area, "int") != 0) {
+ memprintf(err, "output_type only supports \"int\" as argument");
+ return 0;
+ } else {
+ arg[1].type = ARGT_SINT;
+ arg[1].data.sint = 0;
+ }
+ }
+ return 1;
+}
+
+/* Limit JSON integer values to the range [-(2**53)+1, (2**53)-1] as per
+ * the recommendation for interoperable integers in section 6 of RFC 7159.
+ */
+#define JSON_INT_MAX ((1LL << 53) - 1)
+#define JSON_INT_MIN (-JSON_INT_MAX)
+
+/* This sample function get the value from a given json string.
+ * The mjson library is used to parse the JSON struct
+ */
+static int sample_conv_json_query(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ const char *token; /* holds the temporary string from mjson_find */
+ int token_size; /* holds the length of <token> */
+
+ enum mjson_tok token_type;
+
+ token_type = mjson_find(smp->data.u.str.area, smp->data.u.str.data, args[0].data.str.area, &token, &token_size);
+
+ switch (token_type) {
+ case MJSON_TOK_NUMBER:
+ if (args[1].type == ARGT_SINT) {
+ smp->data.u.sint = strtoll(token, NULL, 0);
+
+ if (smp->data.u.sint < JSON_INT_MIN || smp->data.u.sint > JSON_INT_MAX)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+
+ return 1;
+ } else {
+ double double_val;
+
+ if (mjson_get_number(smp->data.u.str.area, smp->data.u.str.data, args[0].data.str.area, &double_val) == 0)
+ return 0;
+
+ trash->data = snprintf(trash->area,trash->size,"%g",double_val);
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+
+ return 1;
+ }
+ case MJSON_TOK_TRUE:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 1;
+
+ return 1;
+ case MJSON_TOK_FALSE:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 0;
+
+ return 1;
+ case MJSON_TOK_STRING: {
+ int len;
+
+ len = mjson_get_string(smp->data.u.str.area, smp->data.u.str.data, args[0].data.str.area, trash->area, trash->size);
+
+ if (len == -1) {
+ /* invalid string */
+ return 0;
+ }
+
+ trash->data = len;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+
+ return 1;
+ }
+ case MJSON_TOK_NULL:
+ case MJSON_TOK_ARRAY:
+ case MJSON_TOK_OBJECT:
+ /* We cannot handle these. */
+ return 0;
+ case MJSON_TOK_INVALID:
+ /* Nothing matches the query. */
+ return 0;
+ case MJSON_TOK_KEY:
+ /* This is not a valid return value according to the
+ * mjson documentation, but we handle it to benefit
+ * from '-Wswitch'.
+ */
+ return 0;
+ }
+
+ my_unreachable();
+ return 0;
+}
+
+#ifdef USE_OPENSSL
+static int sample_conv_jwt_verify_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ vars_check_arg(&args[0], NULL);
+ vars_check_arg(&args[1], NULL);
+
+ if (args[0].type == ARGT_STR) {
+ enum jwt_alg alg = jwt_parse_alg(args[0].data.str.area, args[0].data.str.data);
+
+ switch(alg) {
+ case JWT_ALG_DEFAULT:
+ memprintf(err, "unknown JWT algorithm: %s", args[0].data.str.area);
+ return 0;
+
+ case JWS_ALG_PS256:
+ case JWS_ALG_PS384:
+ case JWS_ALG_PS512:
+ memprintf(err, "RSASSA-PSS JWS signing not managed yet");
+ return 0;
+
+ default:
+ break;
+ }
+ }
+
+ if (args[1].type == ARGT_STR) {
+ jwt_tree_load_cert(args[1].data.str.area, args[1].data.str.data, err);
+ }
+
+ return 1;
+}
+
+/* Check that a JWT's signature is correct */
+static int sample_conv_jwt_verify(const struct arg *args, struct sample *smp, void *private)
+{
+ struct sample alg_smp, key_smp;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ smp_set_owner(&alg_smp, smp->px, smp->sess, smp->strm, smp->opt);
+ smp_set_owner(&key_smp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_str(&args[0], &alg_smp))
+ return 0;
+ if (!sample_conv_var2smp_str(&args[1], &key_smp))
+ return 0;
+
+ smp->data.u.sint = jwt_verify(&smp->data.u.str, &alg_smp.data.u.str,
+ &key_smp.data.u.str);
+
+ return 1;
+}
+
+
+/*
+ * Returns the decoded header or payload of a JWT if no parameter is given, or
+ * the value of the specified field of the corresponding JWT subpart if a
+ * parameter is given.
+ */
+static int sample_conv_jwt_member_query(const struct arg *args, struct sample *smp,
+ void *private, enum jwt_elt member)
+{
+ struct jwt_item items[JWT_ELT_MAX] = { { 0 } };
+ unsigned int item_num = member + 1; /* We don't need to tokenize the full token */
+ struct buffer *decoded_header = get_trash_chunk();
+ int retval = 0;
+ int ret;
+
+ jwt_tokenize(&smp->data.u.str, items, &item_num);
+
+ if (item_num < member + 1)
+ goto end;
+
+ ret = base64urldec(items[member].start, items[member].length,
+ decoded_header->area, decoded_header->size);
+ if (ret == -1)
+ goto end;
+
+ decoded_header->data = ret;
+ if (args[0].type != ARGT_STR) {
+ smp->data.u.str = *decoded_header;
+ smp->data.type = SMP_T_STR;
+ goto end;
+ }
+
+ /* We look for a specific field of the header or payload part of the JWT */
+ smp->data.u.str = *decoded_header;
+
+ retval = sample_conv_json_query(args, smp, private);
+
+end:
+ return retval;
+}
+
+/* This function checks the "jwt_header_query" and "jwt_payload_query" converters' arguments.
+ * It is based on the "json_query" converter's check with the only difference
+ * being that the jwt converters can take 0 parameters as well.
+ */
+static int sample_conv_jwt_query_check(struct arg *arg, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (arg[1].data.str.data != 0) {
+ if (strcmp(arg[1].data.str.area, "int") != 0) {
+ memprintf(err, "output_type only supports \"int\" as argument");
+ return 0;
+ } else {
+ arg[1].type = ARGT_SINT;
+ arg[1].data.sint = 0;
+ }
+ }
+ return 1;
+}
+
+/*
+ * If no parameter is given, return the decoded header part of a JWT (the first
+ * base64 encoded part, corresponding to the JOSE header).
+ * If a parameter is given, this converter acts as a "json_query" on this
+ * decoded JSON.
+ */
+static int sample_conv_jwt_header_query(const struct arg *args, struct sample *smp, void *private)
+{
+ return sample_conv_jwt_member_query(args, smp, private, JWT_ELT_JOSE);
+}
+
+/*
+ * If no parameter is given, return the decoded payload part of a JWT (the
+ * second base64 encoded part, which contains all the claims). If a parameter
+ * is given, this converter acts as a "json_query" on this decoded JSON.
+ */
+static int sample_conv_jwt_payload_query(const struct arg *args, struct sample *smp, void *private)
+{
+ return sample_conv_jwt_member_query(args, smp, private, JWT_ELT_CLAIMS);
+}
+
+#endif /* USE_OPENSSL */
+
+/************************************************************************/
+/* All supported sample fetch functions must be declared here */
+/************************************************************************/
+
+/* force TRUE to be returned at the fetch level */
+static int
+smp_fetch_true(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp_make_rw(smp))
+ return 0;
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 1;
+ return 1;
+}
+
+/* force FALSE to be returned at the fetch level */
+static int
+smp_fetch_false(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 0;
+ return 1;
+}
+
+/* retrieve environment variable $1 as a string */
+static int
+smp_fetch_env(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ char *env;
+
+ if (args[0].type != ARGT_STR)
+ return 0;
+
+ env = getenv(args[0].data.str.area);
+ if (!env)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.area = env;
+ smp->data.u.str.data = strlen(env);
+ return 1;
+}
+
+/* Validates the data unit argument passed to "date" fetch. Argument 1 support an
+ * optional string representing the unit of the result: "s" for seconds, "ms" for
+ * milliseconds and "us" for microseconds.
+ * Returns 0 on error and non-zero if OK.
+ */
+int smp_check_date_unit(struct arg *args, char **err)
+{
+ if (args[1].type == ARGT_STR) {
+ long long int unit;
+
+ if (strcmp(args[1].data.str.area, "s") == 0) {
+ unit = TIME_UNIT_S;
+ }
+ else if (strcmp(args[1].data.str.area, "ms") == 0) {
+ unit = TIME_UNIT_MS;
+ }
+ else if (strcmp(args[1].data.str.area, "us") == 0) {
+ unit = TIME_UNIT_US;
+ }
+ else {
+ memprintf(err, "expects 's', 'ms' or 'us', got '%s'",
+ args[1].data.str.area);
+ return 0;
+ }
+
+ chunk_destroy(&args[1].data.str);
+ args[1].type = ARGT_SINT;
+ args[1].data.sint = unit;
+ }
+ else if (args[1].type != ARGT_STOP) {
+ memprintf(err, "Unexpected arg type");
+ return 0;
+ }
+
+ return 1;
+}
+
+/* retrieve the current local date in epoch time, converts it to milliseconds
+ * or microseconds if asked to in optional args[1] unit param, and applies an
+ * optional args[0] offset.
+ */
+static int
+smp_fetch_date(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.u.sint = date.tv_sec;
+
+ /* report in milliseconds */
+ if (args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_MS) {
+ smp->data.u.sint *= 1000;
+ smp->data.u.sint += date.tv_usec / 1000;
+ }
+ /* report in microseconds */
+ else if (args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_US) {
+ smp->data.u.sint *= 1000000;
+ smp->data.u.sint += date.tv_usec;
+ }
+
+ /* add offset */
+ if (args[0].type == ARGT_SINT)
+ smp->data.u.sint += args[0].data.sint;
+
+ smp->data.type = SMP_T_SINT;
+ smp->flags |= SMP_F_VOL_TEST | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* retrieve the current microsecond part of the date */
+static int
+smp_fetch_date_us(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.u.sint = date.tv_usec;
+ smp->data.type = SMP_T_SINT;
+ smp->flags |= SMP_F_VOL_TEST | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+
+/* returns the hostname */
+static int
+smp_fetch_hostname(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.area = hostname;
+ smp->data.u.str.data = strlen(hostname);
+ return 1;
+}
+
+/* returns the number of processes */
+static int
+smp_fetch_nbproc(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 1;
+ return 1;
+}
+
+/* returns the number of the current process (between 1 and nbproc */
+static int
+smp_fetch_proc(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 1;
+ return 1;
+}
+
+/* returns the number of the current thread (between 1 and nbthread */
+static int
+smp_fetch_thread(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = tid;
+ return 1;
+}
+
+/* generate a random 32-bit integer for whatever purpose, with an optional
+ * range specified in argument.
+ */
+static int
+smp_fetch_rand(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.u.sint = ha_random32();
+
+ /* reduce if needed. Don't do a modulo, use all bits! */
+ if (args[0].type == ARGT_SINT)
+ smp->data.u.sint = ((u64)smp->data.u.sint * (u64)args[0].data.sint) >> 32;
+
+ smp->data.type = SMP_T_SINT;
+ smp->flags |= SMP_F_VOL_TEST | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* returns true if the current process is stopping */
+static int
+smp_fetch_stopping(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = stopping;
+ return 1;
+}
+
+/* returns the number of calls of the current stream's process_stream() */
+static int
+smp_fetch_cpu_calls(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->strm->task->calls;
+ return 1;
+}
+
+/* returns the average number of nanoseconds spent processing the stream per call */
+static int
+smp_fetch_cpu_ns_avg(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->strm->task->calls ? smp->strm->task->cpu_time / smp->strm->task->calls : 0;
+ return 1;
+}
+
+/* returns the total number of nanoseconds spent processing the stream */
+static int
+smp_fetch_cpu_ns_tot(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->strm->task->cpu_time;
+ return 1;
+}
+
+/* returns the average number of nanoseconds per call spent waiting for other tasks to be processed */
+static int
+smp_fetch_lat_ns_avg(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->strm->task->calls ? smp->strm->task->lat_time / smp->strm->task->calls : 0;
+ return 1;
+}
+
+/* returns the total number of nanoseconds per call spent waiting for other tasks to be processed */
+static int
+smp_fetch_lat_ns_tot(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->strm->task->lat_time;
+ return 1;
+}
+
+static int smp_fetch_const_str(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags |= SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = args[0].data.str.area;
+ smp->data.u.str.data = args[0].data.str.data;
+ return 1;
+}
+
+static int smp_check_const_bool(struct arg *args, char **err)
+{
+ if (strcasecmp(args[0].data.str.area, "true") == 0 ||
+ strcasecmp(args[0].data.str.area, "1") == 0) {
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = 1;
+ return 1;
+ }
+ if (strcasecmp(args[0].data.str.area, "false") == 0 ||
+ strcasecmp(args[0].data.str.area, "0") == 0) {
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = 0;
+ return 1;
+ }
+ memprintf(err, "Expects 'true', 'false', '0' or '1'");
+ return 0;
+}
+
+static int smp_fetch_const_bool(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = args[0].data.sint;
+ return 1;
+}
+
+static int smp_fetch_const_int(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = args[0].data.sint;
+ return 1;
+}
+
+static int smp_fetch_const_ipv4(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_IPV4;
+ smp->data.u.ipv4 = args[0].data.ipv4;
+ return 1;
+}
+
+static int smp_fetch_const_ipv6(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_IPV6;
+ smp->data.u.ipv6 = args[0].data.ipv6;
+ return 1;
+}
+
+static int smp_check_const_bin(struct arg *args, char **err)
+{
+ char *binstr = NULL;
+ int binstrlen;
+
+ if (!parse_binary(args[0].data.str.area, &binstr, &binstrlen, err))
+ return 0;
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_STR;
+ args[0].data.str.area = binstr;
+ args[0].data.str.data = binstrlen;
+ return 1;
+}
+
+static int smp_fetch_const_bin(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags |= SMP_F_CONST;
+ smp->data.type = SMP_T_BIN;
+ smp->data.u.str.area = args[0].data.str.area;
+ smp->data.u.str.data = args[0].data.str.data;
+ return 1;
+}
+
+static int smp_check_const_meth(struct arg *args, char **err)
+{
+ enum http_meth_t meth;
+ int i;
+
+ meth = find_http_meth(args[0].data.str.area, args[0].data.str.data);
+ if (meth != HTTP_METH_OTHER) {
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = meth;
+ } else {
+ /* Check method avalaibility. A method is a token defined as :
+ * tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
+ * "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
+ * token = 1*tchar
+ */
+ for (i = 0; i < args[0].data.str.data; i++) {
+ if (!HTTP_IS_TOKEN(args[0].data.str.area[i])) {
+ memprintf(err, "expects valid method.");
+ return 0;
+ }
+ }
+ }
+ return 1;
+}
+
+static int smp_fetch_const_meth(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_METH;
+ if (args[0].type == ARGT_SINT) {
+ smp->flags &= ~SMP_F_CONST;
+ smp->data.u.meth.meth = args[0].data.sint;
+ smp->data.u.meth.str.area = "";
+ smp->data.u.meth.str.data = 0;
+ } else {
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.meth.meth = HTTP_METH_OTHER;
+ smp->data.u.meth.str.area = args[0].data.str.area;
+ smp->data.u.meth.str.data = args[0].data.str.data;
+ }
+ return 1;
+}
+
+// This function checks the "uuid" sample's arguments.
+// Function won't get called when no parameter is specified (maybe a bug?)
+static int smp_check_uuid(struct arg *args, char **err)
+{
+ if (!args[0].type) {
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = 4;
+ }
+ else if (args[0].data.sint != 4) {
+ memprintf(err, "Unsupported UUID version: '%lld'", args[0].data.sint);
+ return 0;
+ }
+
+ return 1;
+}
+
+// Generate a RFC4122 UUID (default is v4 = fully random)
+static int smp_fetch_uuid(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (args[0].data.sint == 4 || !args[0].type) {
+ ha_generate_uuid(&trash);
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_VOL_TEST | SMP_F_MAY_CHANGE;
+ smp->data.u.str = trash;
+ return 1;
+ }
+
+ // more implementations of other uuid formats possible here
+ return 0;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Note: fetches that may return multiple types must be declared as the lowest
+ * common denominator, the type that can be casted into all other ones. For
+ * instance IPv4/IPv6 must be declared IPv4.
+ */
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { "always_false", smp_fetch_false, 0, NULL, SMP_T_BOOL, SMP_USE_CONST },
+ { "always_true", smp_fetch_true, 0, NULL, SMP_T_BOOL, SMP_USE_CONST },
+ { "env", smp_fetch_env, ARG1(1,STR), NULL, SMP_T_STR, SMP_USE_CONST },
+ { "date", smp_fetch_date, ARG2(0,SINT,STR), smp_check_date_unit, SMP_T_SINT, SMP_USE_CONST },
+ { "date_us", smp_fetch_date_us, 0, NULL, SMP_T_SINT, SMP_USE_CONST },
+ { "hostname", smp_fetch_hostname, 0, NULL, SMP_T_STR, SMP_USE_CONST },
+ { "nbproc", smp_fetch_nbproc,0, NULL, SMP_T_SINT, SMP_USE_CONST },
+ { "proc", smp_fetch_proc, 0, NULL, SMP_T_SINT, SMP_USE_CONST },
+ { "thread", smp_fetch_thread, 0, NULL, SMP_T_SINT, SMP_USE_CONST },
+ { "rand", smp_fetch_rand, ARG1(0,SINT), NULL, SMP_T_SINT, SMP_USE_CONST },
+ { "stopping", smp_fetch_stopping, 0, NULL, SMP_T_BOOL, SMP_USE_INTRN },
+ { "uuid", smp_fetch_uuid, ARG1(0, SINT), smp_check_uuid, SMP_T_STR, SMP_USE_CONST },
+
+ { "cpu_calls", smp_fetch_cpu_calls, 0, NULL, SMP_T_SINT, SMP_USE_INTRN },
+ { "cpu_ns_avg", smp_fetch_cpu_ns_avg, 0, NULL, SMP_T_SINT, SMP_USE_INTRN },
+ { "cpu_ns_tot", smp_fetch_cpu_ns_tot, 0, NULL, SMP_T_SINT, SMP_USE_INTRN },
+ { "lat_ns_avg", smp_fetch_lat_ns_avg, 0, NULL, SMP_T_SINT, SMP_USE_INTRN },
+ { "lat_ns_tot", smp_fetch_lat_ns_tot, 0, NULL, SMP_T_SINT, SMP_USE_INTRN },
+
+ { "str", smp_fetch_const_str, ARG1(1,STR), NULL , SMP_T_STR, SMP_USE_CONST },
+ { "bool", smp_fetch_const_bool, ARG1(1,STR), smp_check_const_bool, SMP_T_BOOL, SMP_USE_CONST },
+ { "int", smp_fetch_const_int, ARG1(1,SINT), NULL , SMP_T_SINT, SMP_USE_CONST },
+ { "ipv4", smp_fetch_const_ipv4, ARG1(1,IPV4), NULL , SMP_T_IPV4, SMP_USE_CONST },
+ { "ipv6", smp_fetch_const_ipv6, ARG1(1,IPV6), NULL , SMP_T_IPV6, SMP_USE_CONST },
+ { "bin", smp_fetch_const_bin, ARG1(1,STR), smp_check_const_bin , SMP_T_BIN, SMP_USE_CONST },
+ { "meth", smp_fetch_const_meth, ARG1(1,STR), smp_check_const_meth, SMP_T_METH, SMP_USE_CONST },
+
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_conv_kw_list sample_conv_kws = {ILH, {
+ { "add_item",sample_conv_add_item, ARG3(2,STR,STR,STR), smp_check_add_item, SMP_T_STR, SMP_T_STR },
+ { "debug", sample_conv_debug, ARG2(0,STR,STR), smp_check_debug, SMP_T_ANY, SMP_T_ANY },
+ { "b64dec", sample_conv_base642bin, 0, NULL, SMP_T_STR, SMP_T_BIN },
+ { "base64", sample_conv_bin2base64, 0, NULL, SMP_T_BIN, SMP_T_STR },
+ { "concat", sample_conv_concat, ARG3(1,STR,STR,STR), smp_check_concat, SMP_T_STR, SMP_T_STR },
+ { "ub64enc", sample_conv_bin2base64url,0, NULL, SMP_T_BIN, SMP_T_STR },
+ { "ub64dec", sample_conv_base64url2bin,0, NULL, SMP_T_STR, SMP_T_BIN },
+ { "upper", sample_conv_str2upper, 0, NULL, SMP_T_STR, SMP_T_STR },
+ { "lower", sample_conv_str2lower, 0, NULL, SMP_T_STR, SMP_T_STR },
+ { "length", sample_conv_length, 0, NULL, SMP_T_STR, SMP_T_SINT },
+ { "be2dec", sample_conv_be2dec, ARG3(1,STR,SINT,SINT), sample_conv_be2dec_check, SMP_T_BIN, SMP_T_STR },
+ { "be2hex", sample_conv_be2hex, ARG3(1,STR,SINT,SINT), sample_conv_be2hex_check, SMP_T_BIN, SMP_T_STR },
+ { "hex", sample_conv_bin2hex, 0, NULL, SMP_T_BIN, SMP_T_STR },
+ { "hex2i", sample_conv_hex2int, 0, NULL, SMP_T_STR, SMP_T_SINT },
+ { "ipmask", sample_conv_ipmask, ARG2(1,MSK4,MSK6), NULL, SMP_T_ADDR, SMP_T_IPV4 },
+ { "ltime", sample_conv_ltime, ARG2(1,STR,SINT), NULL, SMP_T_SINT, SMP_T_STR },
+ { "utime", sample_conv_utime, ARG2(1,STR,SINT), NULL, SMP_T_SINT, SMP_T_STR },
+ { "crc32", sample_conv_crc32, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "crc32c", sample_conv_crc32c, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "djb2", sample_conv_djb2, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "sdbm", sample_conv_sdbm, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "wt6", sample_conv_wt6, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "xxh3", sample_conv_xxh3, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "xxh32", sample_conv_xxh32, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "xxh64", sample_conv_xxh64, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "json", sample_conv_json, ARG1(1,STR), sample_conv_json_check, SMP_T_STR, SMP_T_STR },
+ { "bytes", sample_conv_bytes, ARG2(1,SINT,SINT), NULL, SMP_T_BIN, SMP_T_BIN },
+ { "field", sample_conv_field, ARG3(2,SINT,STR,SINT), sample_conv_field_check, SMP_T_STR, SMP_T_STR },
+ { "word", sample_conv_word, ARG3(2,SINT,STR,SINT), sample_conv_field_check, SMP_T_STR, SMP_T_STR },
+ { "regsub", sample_conv_regsub, ARG3(2,REG,STR,STR), sample_conv_regsub_check, SMP_T_STR, SMP_T_STR },
+ { "sha1", sample_conv_sha1, 0, NULL, SMP_T_BIN, SMP_T_BIN },
+ { "strcmp", sample_conv_strcmp, ARG1(1,STR), smp_check_strcmp, SMP_T_STR, SMP_T_SINT },
+ { "host_only", sample_conv_host_only, 0, NULL, SMP_T_STR, SMP_T_STR },
+ { "port_only", sample_conv_port_only, 0, NULL, SMP_T_STR, SMP_T_SINT },
+
+ /* gRPC converters. */
+ { "ungrpc", sample_conv_ungrpc, ARG2(1,PBUF_FNUM,STR), sample_conv_protobuf_check, SMP_T_BIN, SMP_T_BIN },
+ { "protobuf", sample_conv_protobuf, ARG2(1,PBUF_FNUM,STR), sample_conv_protobuf_check, SMP_T_BIN, SMP_T_BIN },
+
+ /* FIX converters */
+ { "fix_is_valid", sample_conv_fix_is_valid, 0, NULL, SMP_T_BIN, SMP_T_BOOL },
+ { "fix_tag_value", sample_conv_fix_tag_value, ARG1(1,STR), sample_conv_fix_value_check, SMP_T_BIN, SMP_T_BIN },
+
+ /* MQTT converters */
+ { "mqtt_is_valid", sample_conv_mqtt_is_valid, 0, NULL, SMP_T_BIN, SMP_T_BOOL },
+ { "mqtt_field_value", sample_conv_mqtt_field_value, ARG2(2,STR,STR), sample_conv_mqtt_field_value_check, SMP_T_BIN, SMP_T_STR },
+
+ { "iif", sample_conv_iif, ARG2(2, STR, STR), NULL, SMP_T_BOOL, SMP_T_STR },
+
+ { "and", sample_conv_binary_and, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "or", sample_conv_binary_or, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "xor", sample_conv_binary_xor, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "cpl", sample_conv_binary_cpl, 0, NULL, SMP_T_SINT, SMP_T_SINT },
+ { "bool", sample_conv_arith_bool, 0, NULL, SMP_T_SINT, SMP_T_BOOL },
+ { "not", sample_conv_arith_not, 0, NULL, SMP_T_SINT, SMP_T_BOOL },
+ { "odd", sample_conv_arith_odd, 0, NULL, SMP_T_SINT, SMP_T_BOOL },
+ { "even", sample_conv_arith_even, 0, NULL, SMP_T_SINT, SMP_T_BOOL },
+ { "add", sample_conv_arith_add, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "sub", sample_conv_arith_sub, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "mul", sample_conv_arith_mul, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "div", sample_conv_arith_div, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "mod", sample_conv_arith_mod, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "neg", sample_conv_arith_neg, 0, NULL, SMP_T_SINT, SMP_T_SINT },
+
+ { "htonl", sample_conv_htonl, 0, NULL, SMP_T_SINT, SMP_T_BIN },
+ { "cut_crlf", sample_conv_cut_crlf, 0, NULL, SMP_T_STR, SMP_T_STR },
+ { "ltrim", sample_conv_ltrim, ARG1(1,STR), NULL, SMP_T_STR, SMP_T_STR },
+ { "rtrim", sample_conv_rtrim, ARG1(1,STR), NULL, SMP_T_STR, SMP_T_STR },
+ { "json_query", sample_conv_json_query, ARG2(1,STR,STR), sample_check_json_query , SMP_T_STR, SMP_T_ANY },
+
+#ifdef USE_OPENSSL
+ /* JSON Web Token converters */
+ { "jwt_header_query", sample_conv_jwt_header_query, ARG2(0,STR,STR), sample_conv_jwt_query_check, SMP_T_BIN, SMP_T_ANY },
+ { "jwt_payload_query", sample_conv_jwt_payload_query, ARG2(0,STR,STR), sample_conv_jwt_query_check, SMP_T_BIN, SMP_T_ANY },
+ { "jwt_verify", sample_conv_jwt_verify, ARG2(2,STR,STR), sample_conv_jwt_verify_check, SMP_T_BIN, SMP_T_SINT },
+#endif
+ { NULL, NULL, 0, 0, 0 },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
diff --git a/src/server.c b/src/server.c
new file mode 100644
index 0000000..d701eae
--- /dev/null
+++ b/src/server.c
@@ -0,0 +1,6074 @@
+/*
+ * Server management functions.
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ * Copyright 2007-2008 Krzysztof Piotr Oledzki <ole@ans.pl>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+#include <netinet/tcp.h>
+#include <ctype.h>
+#include <errno.h>
+
+#include <import/ebmbtree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet-t.h>
+#include <haproxy/backend.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/cli.h>
+#include <haproxy/connection.h>
+#include <haproxy/dict-t.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/log.h>
+#include <haproxy/mailers.h>
+#include <haproxy/namespace.h>
+#include <haproxy/port_range.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/queue.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/stats.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/xxhash.h>
+
+
+static void srv_update_status(struct server *s);
+static int srv_apply_lastaddr(struct server *srv, int *err_code);
+static void srv_cleanup_connections(struct server *srv);
+
+/* extra keywords used as value for other arguments. They are used as
+ * suggestions for mistyped words.
+ */
+static const char *extra_kw_list[] = {
+ "ipv4", "ipv6", "legacy", "octet-count",
+ "fail-check", "sudden-death", "mark-down",
+ NULL /* must be last */
+};
+
+/* List head of all known server keywords */
+struct srv_kw_list srv_keywords = {
+ .list = LIST_HEAD_INIT(srv_keywords.list)
+};
+
+__decl_thread(HA_SPINLOCK_T idle_conn_srv_lock);
+struct eb_root idle_conn_srv = EB_ROOT;
+struct task *idle_conn_task __read_mostly = NULL;
+struct list servers_list = LIST_HEAD_INIT(servers_list);
+
+/* The server names dictionary */
+struct dict server_key_dict = {
+ .name = "server keys",
+ .values = EB_ROOT_UNIQUE,
+};
+
+int srv_downtime(const struct server *s)
+{
+ if ((s->cur_state != SRV_ST_STOPPED) || s->last_change >= now.tv_sec) // ignore negative time
+ return s->down_time;
+
+ return now.tv_sec - s->last_change + s->down_time;
+}
+
+int srv_lastsession(const struct server *s)
+{
+ if (s->counters.last_sess)
+ return now.tv_sec - s->counters.last_sess;
+
+ return -1;
+}
+
+int srv_getinter(const struct check *check)
+{
+ const struct server *s = check->server;
+
+ if ((check->state & CHK_ST_CONFIGURED) && (check->health == check->rise + check->fall - 1))
+ return check->inter;
+
+ if ((s->next_state == SRV_ST_STOPPED) && check->health == 0)
+ return (check->downinter)?(check->downinter):(check->inter);
+
+ return (check->fastinter)?(check->fastinter):(check->inter);
+}
+
+/*
+ * Check that we did not get a hash collision.
+ * Unlikely, but it can happen. The server's proxy must be at least
+ * read-locked.
+ */
+static inline void srv_check_for_dup_dyncookie(struct server *s)
+{
+ struct proxy *p = s->proxy;
+ struct server *tmpserv;
+
+ for (tmpserv = p->srv; tmpserv != NULL;
+ tmpserv = tmpserv->next) {
+ if (tmpserv == s)
+ continue;
+ if (tmpserv->next_admin & SRV_ADMF_FMAINT)
+ continue;
+ if (tmpserv->cookie &&
+ strcmp(tmpserv->cookie, s->cookie) == 0) {
+ ha_warning("We generated two equal cookies for two different servers.\n"
+ "Please change the secret key for '%s'.\n",
+ s->proxy->id);
+ }
+ }
+
+}
+
+/*
+ * Must be called with the server lock held, and will read-lock the proxy.
+ */
+void srv_set_dyncookie(struct server *s)
+{
+ struct proxy *p = s->proxy;
+ char *tmpbuf;
+ unsigned long long hash_value;
+ size_t key_len;
+ size_t buffer_len;
+ int addr_len;
+ int port;
+
+ HA_RWLOCK_RDLOCK(PROXY_LOCK, &p->lock);
+
+ if ((s->flags & SRV_F_COOKIESET) ||
+ !(s->proxy->ck_opts & PR_CK_DYNAMIC) ||
+ s->proxy->dyncookie_key == NULL)
+ goto out;
+ key_len = strlen(p->dyncookie_key);
+
+ if (s->addr.ss_family != AF_INET &&
+ s->addr.ss_family != AF_INET6)
+ goto out;
+ /*
+ * Buffer to calculate the cookie value.
+ * The buffer contains the secret key + the server IP address
+ * + the TCP port.
+ */
+ addr_len = (s->addr.ss_family == AF_INET) ? 4 : 16;
+ /*
+ * The TCP port should use only 2 bytes, but is stored in
+ * an unsigned int in struct server, so let's use 4, to be
+ * on the safe side.
+ */
+ buffer_len = key_len + addr_len + 4;
+ tmpbuf = trash.area;
+ memcpy(tmpbuf, p->dyncookie_key, key_len);
+ memcpy(&(tmpbuf[key_len]),
+ s->addr.ss_family == AF_INET ?
+ (void *)&((struct sockaddr_in *)&s->addr)->sin_addr.s_addr :
+ (void *)&(((struct sockaddr_in6 *)&s->addr)->sin6_addr.s6_addr),
+ addr_len);
+ /*
+ * Make sure it's the same across all the load balancers,
+ * no matter their endianness.
+ */
+ port = htonl(s->svc_port);
+ memcpy(&tmpbuf[key_len + addr_len], &port, 4);
+ hash_value = XXH64(tmpbuf, buffer_len, 0);
+ memprintf(&s->cookie, "%016llx", hash_value);
+ if (!s->cookie)
+ goto out;
+ s->cklen = 16;
+
+ /* Don't bother checking if the dyncookie is duplicated if
+ * the server is marked as "disabled", maybe it doesn't have
+ * its real IP yet, but just a place holder.
+ */
+ if (!(s->next_admin & SRV_ADMF_FMAINT))
+ srv_check_for_dup_dyncookie(s);
+ out:
+ HA_RWLOCK_RDUNLOCK(PROXY_LOCK, &p->lock);
+}
+
+/* Returns true if it's possible to reuse an idle connection from server <srv>
+ * for a websocket stream. This is the case if server is configured to use the
+ * same protocol for both HTTP and websocket streams. This depends on the value
+ * of "proto", "alpn" and "ws" keywords.
+ */
+int srv_check_reuse_ws(struct server *srv)
+{
+ if (srv->mux_proto || srv->use_ssl != 1 || !srv->ssl_ctx.alpn_str) {
+ /* explicit srv.mux_proto or no ALPN : srv.mux_proto is used
+ * for mux selection.
+ */
+ const struct ist srv_mux = srv->mux_proto ?
+ srv->mux_proto->token : IST_NULL;
+
+ switch (srv->ws) {
+ /* "auto" means use the same protocol : reuse is possible. */
+ case SRV_WS_AUTO:
+ return 1;
+
+ /* "h2" means use h2 for websocket : reuse is possible if
+ * server mux is h2.
+ */
+ case SRV_WS_H2:
+ if (srv->mux_proto && isteq(srv_mux, ist("h2")))
+ return 1;
+ break;
+
+ /* "h1" means use h1 for websocket : reuse is possible if
+ * server mux is h1.
+ */
+ case SRV_WS_H1:
+ if (!srv->mux_proto || isteq(srv_mux, ist("h1")))
+ return 1;
+ break;
+ }
+ }
+ else {
+ /* ALPN selection.
+ * Based on the assumption that only "h2" and "http/1.1" token
+ * are used on server ALPN.
+ */
+ const struct ist alpn = ist2(srv->ssl_ctx.alpn_str,
+ srv->ssl_ctx.alpn_len);
+
+ switch (srv->ws) {
+ case SRV_WS_AUTO:
+ /* for auto mode, consider reuse as possible if the
+ * server uses a single protocol ALPN
+ */
+ if (!istchr(alpn, ','))
+ return 1;
+ break;
+
+ case SRV_WS_H2:
+ return isteq(alpn, ist("\x02h2"));
+
+ case SRV_WS_H1:
+ return isteq(alpn, ist("\x08http/1.1"));
+ }
+ }
+
+ return 0;
+}
+
+/* Return the proto to used for a websocket stream on <srv> without ALPN. NULL
+ * is a valid value indicating to use the fallback mux.
+ */
+const struct mux_ops *srv_get_ws_proto(struct server *srv)
+{
+ const struct mux_proto_list *mux = NULL;
+
+ switch (srv->ws) {
+ case SRV_WS_AUTO:
+ mux = srv->mux_proto;
+ break;
+
+ case SRV_WS_H1:
+ mux = get_mux_proto(ist("h1"));
+ break;
+
+ case SRV_WS_H2:
+ mux = get_mux_proto(ist("h2"));
+ break;
+ }
+
+ return mux ? mux->mux : NULL;
+}
+
+/*
+ * Must be called with the server lock held. The server is first removed from
+ * the proxy tree if it was already attached. If <reattach> is true, the server
+ * will then be attached in the proxy tree. The proxy lock is held to
+ * manipulate the tree.
+ */
+static void srv_set_addr_desc(struct server *s, int reattach)
+{
+ struct proxy *p = s->proxy;
+ char *key;
+
+ key = sa2str(&s->addr, s->svc_port, s->flags & SRV_F_MAPPORTS);
+
+ if (s->addr_node.key) {
+ if (key && strcmp(key, s->addr_node.key) == 0) {
+ free(key);
+ return;
+ }
+
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->lock);
+ ebpt_delete(&s->addr_node);
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+
+ free(s->addr_node.key);
+ }
+
+ s->addr_node.key = key;
+
+ if (reattach) {
+ if (s->addr_node.key) {
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->lock);
+ ebis_insert(&p->used_server_addr, &s->addr_node);
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+ }
+ }
+}
+
+/*
+ * Registers the server keyword list <kwl> as a list of valid keywords for next
+ * parsing sessions.
+ */
+void srv_register_keywords(struct srv_kw_list *kwl)
+{
+ LIST_APPEND(&srv_keywords.list, &kwl->list);
+}
+
+/* Return a pointer to the server keyword <kw>, or NULL if not found. If the
+ * keyword is found with a NULL ->parse() function, then an attempt is made to
+ * find one with a valid ->parse() function. This way it is possible to declare
+ * platform-dependant, known keywords as NULL, then only declare them as valid
+ * if some options are met. Note that if the requested keyword contains an
+ * opening parenthesis, everything from this point is ignored.
+ */
+struct srv_kw *srv_find_kw(const char *kw)
+{
+ int index;
+ const char *kwend;
+ struct srv_kw_list *kwl;
+ struct srv_kw *ret = NULL;
+
+ kwend = strchr(kw, '(');
+ if (!kwend)
+ kwend = kw + strlen(kw);
+
+ list_for_each_entry(kwl, &srv_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if ((strncmp(kwl->kw[index].kw, kw, kwend - kw) == 0) &&
+ kwl->kw[index].kw[kwend-kw] == 0) {
+ if (kwl->kw[index].parse)
+ return &kwl->kw[index]; /* found it !*/
+ else
+ ret = &kwl->kw[index]; /* may be OK */
+ }
+ }
+ }
+ return ret;
+}
+
+/* Dumps all registered "server" keywords to the <out> string pointer. The
+ * unsupported keywords are only dumped if their supported form was not
+ * found.
+ */
+void srv_dump_kws(char **out)
+{
+ struct srv_kw_list *kwl;
+ int index;
+
+ if (!out)
+ return;
+
+ *out = NULL;
+ list_for_each_entry(kwl, &srv_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (kwl->kw[index].parse ||
+ srv_find_kw(kwl->kw[index].kw) == &kwl->kw[index]) {
+ memprintf(out, "%s[%4s] %s%s%s%s\n", *out ? *out : "",
+ kwl->scope,
+ kwl->kw[index].kw,
+ kwl->kw[index].skip ? " <arg>" : "",
+ kwl->kw[index].default_ok ? " [dflt_ok]" : "",
+ kwl->kw[index].parse ? "" : " (not supported)");
+ }
+ }
+ }
+}
+
+/* Try to find in srv_keyword the word that looks closest to <word> by counting
+ * transitions between letters, digits and other characters. Will return the
+ * best matching word if found, otherwise NULL. An optional array of extra
+ * words to compare may be passed in <extra>, but it must then be terminated
+ * by a NULL entry. If unused it may be NULL.
+ */
+static const char *srv_find_best_kw(const char *word)
+{
+ uint8_t word_sig[1024];
+ uint8_t list_sig[1024];
+ const struct srv_kw_list *kwl;
+ const char *best_ptr = NULL;
+ int dist, best_dist = INT_MAX;
+ const char **extra;
+ int index;
+
+ make_word_fingerprint(word_sig, word);
+ list_for_each_entry(kwl, &srv_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ make_word_fingerprint(list_sig, kwl->kw[index].kw);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = kwl->kw[index].kw;
+ }
+ }
+ }
+
+ for (extra = extra_kw_list; *extra; extra++) {
+ make_word_fingerprint(list_sig, *extra);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = *extra;
+ }
+ }
+
+ if (best_dist > 2 * strlen(word) || (best_ptr && best_dist > 2 * strlen(best_ptr)))
+ best_ptr = NULL;
+
+ return best_ptr;
+}
+
+/* Parse the "backup" server keyword */
+static int srv_parse_backup(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->flags |= SRV_F_BACKUP;
+ return 0;
+}
+
+
+/* Parse the "cookie" server keyword */
+static int srv_parse_cookie(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' expects <value> as argument.\n", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->cookie);
+ newsrv->cookie = strdup(arg);
+ newsrv->cklen = strlen(arg);
+ newsrv->flags |= SRV_F_COOKIESET;
+ return 0;
+}
+
+/* Parse the "disabled" server keyword */
+static int srv_parse_disabled(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->next_admin |= SRV_ADMF_CMAINT | SRV_ADMF_FMAINT;
+ newsrv->next_state = SRV_ST_STOPPED;
+ newsrv->check.state |= CHK_ST_PAUSED;
+ newsrv->check.health = 0;
+ return 0;
+}
+
+/* Parse the "enabled" server keyword */
+static int srv_parse_enabled(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->next_admin &= ~SRV_ADMF_CMAINT & ~SRV_ADMF_FMAINT;
+ newsrv->next_state = SRV_ST_RUNNING;
+ newsrv->check.state &= ~CHK_ST_PAUSED;
+ newsrv->check.health = newsrv->check.rise;
+ return 0;
+}
+
+/* Parse the "error-limit" server keyword */
+static int srv_parse_error_limit(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' expects an integer argument.",
+ args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ newsrv->consecutive_errors_limit = atoi(args[*cur_arg + 1]);
+
+ if (newsrv->consecutive_errors_limit <= 0) {
+ memprintf(err, "%s has to be > 0.",
+ args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* Parse the "ws" keyword */
+static int srv_parse_ws(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ if (!args[*cur_arg + 1]) {
+ memprintf(err, "'%s' expects 'auto', 'h1' or 'h2' value", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(args[*cur_arg + 1], "h1") == 0) {
+ newsrv->ws = SRV_WS_H1;
+ }
+ else if (strcmp(args[*cur_arg + 1], "h2") == 0) {
+ newsrv->ws = SRV_WS_H2;
+ }
+ else if (strcmp(args[*cur_arg + 1], "auto") == 0) {
+ newsrv->ws = SRV_WS_AUTO;
+ }
+ else {
+ memprintf(err, "'%s' has to be 'auto', 'h1' or 'h2'", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+
+ return 0;
+}
+
+/* Parse the "init-addr" server keyword */
+static int srv_parse_init_addr(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *p, *end;
+ int done;
+ struct sockaddr_storage sa;
+
+ newsrv->init_addr_methods = 0;
+ memset(&newsrv->init_addr, 0, sizeof(newsrv->init_addr));
+
+ for (p = args[*cur_arg + 1]; *p; p = end) {
+ /* cut on next comma */
+ for (end = p; *end && *end != ','; end++);
+ if (*end)
+ *(end++) = 0;
+
+ memset(&sa, 0, sizeof(sa));
+ if (strcmp(p, "libc") == 0) {
+ done = srv_append_initaddr(&newsrv->init_addr_methods, SRV_IADDR_LIBC);
+ }
+ else if (strcmp(p, "last") == 0) {
+ done = srv_append_initaddr(&newsrv->init_addr_methods, SRV_IADDR_LAST);
+ }
+ else if (strcmp(p, "none") == 0) {
+ done = srv_append_initaddr(&newsrv->init_addr_methods, SRV_IADDR_NONE);
+ }
+ else if (str2ip2(p, &sa, 0)) {
+ if (is_addr(&newsrv->init_addr)) {
+ memprintf(err, "'%s' : initial address already specified, cannot add '%s'.",
+ args[*cur_arg], p);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ newsrv->init_addr = sa;
+ done = srv_append_initaddr(&newsrv->init_addr_methods, SRV_IADDR_IP);
+ }
+ else {
+ memprintf(err, "'%s' : unknown init-addr method '%s', supported methods are 'libc', 'last', 'none'.",
+ args[*cur_arg], p);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ if (!done) {
+ memprintf(err, "'%s' : too many init-addr methods when trying to add '%s'",
+ args[*cur_arg], p);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ return 0;
+}
+
+/* Parse the "log-proto" server keyword */
+static int srv_parse_log_proto(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ if (strcmp(args[*cur_arg + 1], "legacy") == 0)
+ newsrv->log_proto = SRV_LOG_PROTO_LEGACY;
+ else if (strcmp(args[*cur_arg + 1], "octet-count") == 0)
+ newsrv->log_proto = SRV_LOG_PROTO_OCTET_COUNTING;
+ else {
+ memprintf(err, "'%s' expects one of 'legacy' or 'octet-count' but got '%s'",
+ args[*cur_arg], args[*cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* Parse the "maxconn" server keyword */
+static int srv_parse_maxconn(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->maxconn = atol(args[*cur_arg + 1]);
+ return 0;
+}
+
+/* Parse the "maxqueue" server keyword */
+static int srv_parse_maxqueue(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->maxqueue = atol(args[*cur_arg + 1]);
+ return 0;
+}
+
+/* Parse the "minconn" server keyword */
+static int srv_parse_minconn(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->minconn = atol(args[*cur_arg + 1]);
+ return 0;
+}
+
+static int srv_parse_max_reuse(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' expects <value> as argument.\n", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ newsrv->max_reuse = atoi(arg);
+
+ return 0;
+}
+
+static int srv_parse_pool_purge_delay(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ const char *res;
+ char *arg;
+ unsigned int time;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' expects <value> as argument.\n", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ res = parse_time_err(arg, &time, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[*cur_arg+1], args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[*cur_arg+1], args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in argument to <%s>.\n",
+ *res, args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ newsrv->pool_purge_delay = time;
+
+ return 0;
+}
+
+static int srv_parse_pool_low_conn(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' expects <value> as argument.\n", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ newsrv->low_idle_conns = atoi(arg);
+ return 0;
+}
+
+static int srv_parse_pool_max_conn(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' expects <value> as argument.\n", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ newsrv->max_idle_conns = atoi(arg);
+ if ((int)newsrv->max_idle_conns < -1) {
+ memprintf(err, "'%s' must be >= -1", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "id" server keyword */
+static int srv_parse_id(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ struct eb32_node *node;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : expects an integer argument", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ newsrv->puid = atol(args[*cur_arg + 1]);
+ newsrv->conf.id.key = newsrv->puid;
+
+ if (newsrv->puid <= 0) {
+ memprintf(err, "'%s' : custom id has to be > 0", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ node = eb32_lookup(&curproxy->conf.used_server_id, newsrv->puid);
+ if (node) {
+ struct server *target = container_of(node, struct server, conf.id);
+ memprintf(err, "'%s' : custom id %d already used at %s:%d ('server %s')",
+ args[*cur_arg], newsrv->puid, target->conf.file, target->conf.line,
+ target->id);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ newsrv->flags |= SRV_F_FORCED_ID;
+ return 0;
+}
+
+/* Parse the "namespace" server keyword */
+static int srv_parse_namespace(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+#ifdef USE_NS
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' : expects <name> as argument", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(arg, "*") == 0) {
+ /* Use the namespace associated with the connection (if present). */
+ newsrv->flags |= SRV_F_USE_NS_FROM_PP;
+ return 0;
+ }
+
+ /*
+ * As this parser may be called several times for the same 'default-server'
+ * object, or for a new 'server' instance deriving from a 'default-server'
+ * one with SRV_F_USE_NS_FROM_PP flag enabled, let's reset it.
+ */
+ newsrv->flags &= ~SRV_F_USE_NS_FROM_PP;
+
+ newsrv->netns = netns_store_lookup(arg, strlen(arg));
+ if (!newsrv->netns)
+ newsrv->netns = netns_store_insert(arg);
+
+ if (!newsrv->netns) {
+ memprintf(err, "Cannot open namespace '%s'", arg);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+#else
+ memprintf(err, "'%s': '%s' option not implemented", args[0], args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+/* Parse the "no-backup" server keyword */
+static int srv_parse_no_backup(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->flags &= ~SRV_F_BACKUP;
+ return 0;
+}
+
+
+/* Disable server PROXY protocol flags. */
+static inline int srv_disable_pp_flags(struct server *srv, unsigned int flags)
+{
+ srv->pp_opts &= ~flags;
+ return 0;
+}
+
+/* Parse the "no-send-proxy" server keyword */
+static int srv_parse_no_send_proxy(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ return srv_disable_pp_flags(newsrv, SRV_PP_V1);
+}
+
+/* Parse the "no-send-proxy-v2" server keyword */
+static int srv_parse_no_send_proxy_v2(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ return srv_disable_pp_flags(newsrv, SRV_PP_V2);
+}
+
+/* Parse the "no-tfo" server keyword */
+static int srv_parse_no_tfo(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->flags &= ~SRV_F_FASTOPEN;
+ return 0;
+}
+
+/* Parse the "non-stick" server keyword */
+static int srv_parse_non_stick(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->flags |= SRV_F_NON_STICK;
+ return 0;
+}
+
+/* Enable server PROXY protocol flags. */
+static inline int srv_enable_pp_flags(struct server *srv, unsigned int flags)
+{
+ srv->pp_opts |= flags;
+ return 0;
+}
+/* parse the "proto" server keyword */
+static int srv_parse_proto(char **args, int *cur_arg,
+ struct proxy *px, struct server *newsrv, char **err)
+{
+ struct ist proto;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ proto = ist(args[*cur_arg + 1]);
+ newsrv->mux_proto = get_mux_proto(proto);
+ if (!newsrv->mux_proto) {
+ memprintf(err, "'%s' : unknown MUX protocol '%s'", args[*cur_arg], args[*cur_arg+1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+}
+
+/* parse the "proxy-v2-options" */
+static int srv_parse_proxy_v2_options(char **args, int *cur_arg,
+ struct proxy *px, struct server *newsrv, char **err)
+{
+ char *p, *n;
+ for (p = args[*cur_arg+1]; p; p = n) {
+ n = strchr(p, ',');
+ if (n)
+ *n++ = '\0';
+ if (strcmp(p, "ssl") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_SSL;
+ } else if (strcmp(p, "cert-cn") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_SSL;
+ newsrv->pp_opts |= SRV_PP_V2_SSL_CN;
+ } else if (strcmp(p, "cert-key") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_SSL;
+ newsrv->pp_opts |= SRV_PP_V2_SSL_KEY_ALG;
+ } else if (strcmp(p, "cert-sig") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_SSL;
+ newsrv->pp_opts |= SRV_PP_V2_SSL_SIG_ALG;
+ } else if (strcmp(p, "ssl-cipher") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_SSL;
+ newsrv->pp_opts |= SRV_PP_V2_SSL_CIPHER;
+ } else if (strcmp(p, "authority") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_AUTHORITY;
+ } else if (strcmp(p, "crc32c") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_CRC32C;
+ } else if (strcmp(p, "unique-id") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_UNIQUE_ID;
+ } else
+ goto fail;
+ }
+ return 0;
+ fail:
+ if (err)
+ memprintf(err, "'%s' : proxy v2 option not implemented", p);
+ return ERR_ALERT | ERR_FATAL;
+}
+
+/* Parse the "observe" server keyword */
+static int srv_parse_observe(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' expects <mode> as argument.\n", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(arg, "none") == 0) {
+ newsrv->observe = HANA_OBS_NONE;
+ }
+ else if (strcmp(arg, "layer4") == 0) {
+ newsrv->observe = HANA_OBS_LAYER4;
+ }
+ else if (strcmp(arg, "layer7") == 0) {
+ if (curproxy->mode != PR_MODE_HTTP) {
+ memprintf(err, "'%s' can only be used in http proxies.\n", arg);
+ return ERR_ALERT;
+ }
+ newsrv->observe = HANA_OBS_LAYER7;
+ }
+ else {
+ memprintf(err, "'%s' expects one of 'none', 'layer4', 'layer7' "
+ "but got '%s'\n", args[*cur_arg], arg);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* Parse the "on-error" server keyword */
+static int srv_parse_on_error(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ if (strcmp(args[*cur_arg + 1], "fastinter") == 0)
+ newsrv->onerror = HANA_ONERR_FASTINTER;
+ else if (strcmp(args[*cur_arg + 1], "fail-check") == 0)
+ newsrv->onerror = HANA_ONERR_FAILCHK;
+ else if (strcmp(args[*cur_arg + 1], "sudden-death") == 0)
+ newsrv->onerror = HANA_ONERR_SUDDTH;
+ else if (strcmp(args[*cur_arg + 1], "mark-down") == 0)
+ newsrv->onerror = HANA_ONERR_MARKDWN;
+ else {
+ memprintf(err, "'%s' expects one of 'fastinter', "
+ "'fail-check', 'sudden-death' or 'mark-down' but got '%s'",
+ args[*cur_arg], args[*cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* Parse the "on-marked-down" server keyword */
+static int srv_parse_on_marked_down(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ if (strcmp(args[*cur_arg + 1], "shutdown-sessions") == 0)
+ newsrv->onmarkeddown = HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS;
+ else {
+ memprintf(err, "'%s' expects 'shutdown-sessions' but got '%s'",
+ args[*cur_arg], args[*cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* Parse the "on-marked-up" server keyword */
+static int srv_parse_on_marked_up(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ if (strcmp(args[*cur_arg + 1], "shutdown-backup-sessions") == 0)
+ newsrv->onmarkedup = HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS;
+ else {
+ memprintf(err, "'%s' expects 'shutdown-backup-sessions' but got '%s'",
+ args[*cur_arg], args[*cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* Parse the "redir" server keyword */
+static int srv_parse_redir(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' expects <prefix> as argument.\n", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->rdr_pfx);
+ newsrv->rdr_pfx = strdup(arg);
+ newsrv->rdr_len = strlen(arg);
+
+ return 0;
+}
+
+/* Parse the "resolvers" server keyword */
+static int srv_parse_resolvers(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ free(newsrv->resolvers_id);
+ newsrv->resolvers_id = strdup(args[*cur_arg + 1]);
+ return 0;
+}
+
+/* Parse the "resolve-net" server keyword */
+static int srv_parse_resolve_net(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *p, *e;
+ unsigned char mask;
+ struct resolv_options *opt;
+
+ if (!args[*cur_arg + 1] || args[*cur_arg + 1][0] == '\0') {
+ memprintf(err, "'%s' expects a list of networks.",
+ args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ opt = &newsrv->resolv_opts;
+
+ /* Split arguments by comma, and convert it from ipv4 or ipv6
+ * string network in in_addr or in6_addr.
+ */
+ p = args[*cur_arg + 1];
+ e = p;
+ while (*p != '\0') {
+ /* If no room available, return error. */
+ if (opt->pref_net_nb >= SRV_MAX_PREF_NET) {
+ memprintf(err, "'%s' exceed %d networks.",
+ args[*cur_arg], SRV_MAX_PREF_NET);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ /* look for end or comma. */
+ while (*e != ',' && *e != '\0')
+ e++;
+ if (*e == ',') {
+ *e = '\0';
+ e++;
+ }
+ if (str2net(p, 0, &opt->pref_net[opt->pref_net_nb].addr.in4,
+ &opt->pref_net[opt->pref_net_nb].mask.in4)) {
+ /* Try to convert input string from ipv4 or ipv6 network. */
+ opt->pref_net[opt->pref_net_nb].family = AF_INET;
+ } else if (str62net(p, &opt->pref_net[opt->pref_net_nb].addr.in6,
+ &mask)) {
+ /* Try to convert input string from ipv6 network. */
+ len2mask6(mask, &opt->pref_net[opt->pref_net_nb].mask.in6);
+ opt->pref_net[opt->pref_net_nb].family = AF_INET6;
+ } else {
+ /* All network conversions fail, return error. */
+ memprintf(err, "'%s' invalid network '%s'.",
+ args[*cur_arg], p);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ opt->pref_net_nb++;
+ p = e;
+ }
+
+ return 0;
+}
+
+/* Parse the "resolve-opts" server keyword */
+static int srv_parse_resolve_opts(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *p, *end;
+
+ for (p = args[*cur_arg + 1]; *p; p = end) {
+ /* cut on next comma */
+ for (end = p; *end && *end != ','; end++);
+ if (*end)
+ *(end++) = 0;
+
+ if (strcmp(p, "allow-dup-ip") == 0) {
+ newsrv->resolv_opts.accept_duplicate_ip = 1;
+ }
+ else if (strcmp(p, "ignore-weight") == 0) {
+ newsrv->resolv_opts.ignore_weight = 1;
+ }
+ else if (strcmp(p, "prevent-dup-ip") == 0) {
+ newsrv->resolv_opts.accept_duplicate_ip = 0;
+ }
+ else {
+ memprintf(err, "'%s' : unknown resolve-opts option '%s', supported methods are 'allow-dup-ip', 'ignore-weight', and 'prevent-dup-ip'.",
+ args[*cur_arg], p);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ return 0;
+}
+
+/* Parse the "resolve-prefer" server keyword */
+static int srv_parse_resolve_prefer(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ if (strcmp(args[*cur_arg + 1], "ipv4") == 0)
+ newsrv->resolv_opts.family_prio = AF_INET;
+ else if (strcmp(args[*cur_arg + 1], "ipv6") == 0)
+ newsrv->resolv_opts.family_prio = AF_INET6;
+ else {
+ memprintf(err, "'%s' expects either ipv4 or ipv6 as argument.",
+ args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* Parse the "send-proxy" server keyword */
+static int srv_parse_send_proxy(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ return srv_enable_pp_flags(newsrv, SRV_PP_V1);
+}
+
+/* Parse the "send-proxy-v2" server keyword */
+static int srv_parse_send_proxy_v2(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ return srv_enable_pp_flags(newsrv, SRV_PP_V2);
+}
+
+/* Parse the "slowstart" server keyword */
+static int srv_parse_slowstart(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ /* slowstart is stored in seconds */
+ unsigned int val;
+ const char *time_err = parse_time_err(args[*cur_arg + 1], &val, TIME_UNIT_MS);
+
+ if (time_err == PARSE_TIME_OVER) {
+ memprintf(err, "overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
+ args[*cur_arg+1], args[*cur_arg], newsrv->id);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (time_err == PARSE_TIME_UNDER) {
+ memprintf(err, "underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
+ args[*cur_arg+1], args[*cur_arg], newsrv->id);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (time_err) {
+ memprintf(err, "unexpected character '%c' in 'slowstart' argument of server %s.",
+ *time_err, newsrv->id);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ newsrv->slowstart = (val + 999) / 1000;
+
+ return 0;
+}
+
+/* Parse the "source" server keyword */
+static int srv_parse_source(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *errmsg;
+ int port_low, port_high;
+ struct sockaddr_storage *sk;
+
+ errmsg = NULL;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' expects <addr>[:<port>[-<port>]], and optionally '%s' <addr>, "
+ "and '%s' <name> as argument.\n", args[*cur_arg], "usesrc", "interface");
+ goto err;
+ }
+
+ /* 'sk' is statically allocated (no need to be freed). */
+ sk = str2sa_range(args[*cur_arg + 1], NULL, &port_low, &port_high, NULL, NULL,
+ &errmsg, NULL, NULL,
+ PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_RANGE | PA_O_STREAM | PA_O_CONNECT);
+ if (!sk) {
+ memprintf(err, "'%s %s' : %s\n", args[*cur_arg], args[*cur_arg + 1], errmsg);
+ goto err;
+ }
+
+ newsrv->conn_src.opts |= CO_SRC_BIND;
+ newsrv->conn_src.source_addr = *sk;
+
+ if (port_low != port_high) {
+ int i;
+
+ newsrv->conn_src.sport_range = port_range_alloc_range(port_high - port_low + 1);
+ if (!newsrv->conn_src.sport_range) {
+ ha_alert("Server '%s': Out of memory (sport_range)\n", args[0]);
+ goto err;
+ }
+ for (i = 0; i < newsrv->conn_src.sport_range->size; i++)
+ newsrv->conn_src.sport_range->ports[i] = port_low + i;
+ }
+
+ *cur_arg += 2;
+ while (*(args[*cur_arg])) {
+ if (strcmp(args[*cur_arg], "usesrc") == 0) { /* address to use outside */
+#if defined(CONFIG_HAP_TRANSPARENT)
+ if (!*args[*cur_arg + 1]) {
+ ha_alert("'usesrc' expects <addr>[:<port>], 'client', 'clientip', "
+ "or 'hdr_ip(name,#)' as argument.\n");
+ goto err;
+ }
+ if (strcmp(args[*cur_arg + 1], "client") == 0) {
+ newsrv->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
+ newsrv->conn_src.opts |= CO_SRC_TPROXY_CLI;
+ }
+ else if (strcmp(args[*cur_arg + 1], "clientip") == 0) {
+ newsrv->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
+ newsrv->conn_src.opts |= CO_SRC_TPROXY_CIP;
+ }
+ else if (!strncmp(args[*cur_arg + 1], "hdr_ip(", 7)) {
+ char *name, *end;
+
+ name = args[*cur_arg + 1] + 7;
+ while (isspace((unsigned char)*name))
+ name++;
+
+ end = name;
+ while (*end && !isspace((unsigned char)*end) && *end != ',' && *end != ')')
+ end++;
+
+ newsrv->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
+ newsrv->conn_src.opts |= CO_SRC_TPROXY_DYN;
+ free(newsrv->conn_src.bind_hdr_name);
+ newsrv->conn_src.bind_hdr_name = calloc(1, end - name + 1);
+ if (!newsrv->conn_src.bind_hdr_name) {
+ ha_alert("Server '%s': Out of memory (bind_hdr_name)\n", args[0]);
+ goto err;
+ }
+ newsrv->conn_src.bind_hdr_len = end - name;
+ memcpy(newsrv->conn_src.bind_hdr_name, name, end - name);
+ newsrv->conn_src.bind_hdr_name[end - name] = '\0';
+ newsrv->conn_src.bind_hdr_occ = -1;
+
+ /* now look for an occurrence number */
+ while (isspace((unsigned char)*end))
+ end++;
+ if (*end == ',') {
+ end++;
+ name = end;
+ if (*end == '-')
+ end++;
+ while (isdigit((unsigned char)*end))
+ end++;
+ newsrv->conn_src.bind_hdr_occ = strl2ic(name, end - name);
+ }
+
+ if (newsrv->conn_src.bind_hdr_occ < -MAX_HDR_HISTORY) {
+ ha_alert("usesrc hdr_ip(name,num) does not support negative"
+ " occurrences values smaller than %d.\n", MAX_HDR_HISTORY);
+ goto err;
+ }
+ }
+ else {
+ struct sockaddr_storage *sk;
+ int port1, port2;
+
+ /* 'sk' is statically allocated (no need to be freed). */
+ sk = str2sa_range(args[*cur_arg + 1], NULL, &port1, &port2, NULL, NULL,
+ &errmsg, NULL, NULL,
+ PA_O_RESOLVE | PA_O_PORT_OK | PA_O_STREAM | PA_O_CONNECT);
+ if (!sk) {
+ ha_alert("'%s %s' : %s\n", args[*cur_arg], args[*cur_arg + 1], errmsg);
+ goto err;
+ }
+
+ newsrv->conn_src.tproxy_addr = *sk;
+ newsrv->conn_src.opts |= CO_SRC_TPROXY_ADDR;
+ }
+ global.last_checks |= LSTCHK_NETADM;
+ *cur_arg += 2;
+ continue;
+#else /* no TPROXY support */
+ ha_alert("'usesrc' not allowed here because support for TPROXY was not compiled in.\n");
+ goto err;
+#endif /* defined(CONFIG_HAP_TRANSPARENT) */
+ } /* "usesrc" */
+
+ if (strcmp(args[*cur_arg], "interface") == 0) { /* specifically bind to this interface */
+#ifdef SO_BINDTODEVICE
+ if (!*args[*cur_arg + 1]) {
+ ha_alert("'%s' : missing interface name.\n", args[0]);
+ goto err;
+ }
+ free(newsrv->conn_src.iface_name);
+ newsrv->conn_src.iface_name = strdup(args[*cur_arg + 1]);
+ newsrv->conn_src.iface_len = strlen(newsrv->conn_src.iface_name);
+ global.last_checks |= LSTCHK_NETADM;
+#else
+ ha_alert("'%s' : '%s' option not implemented.\n", args[0], args[*cur_arg]);
+ goto err;
+#endif
+ *cur_arg += 2;
+ continue;
+ }
+ /* this keyword in not an option of "source" */
+ break;
+ } /* while */
+
+ return 0;
+
+ err:
+ free(errmsg);
+ return ERR_ALERT | ERR_FATAL;
+}
+
+/* Parse the "stick" server keyword */
+static int srv_parse_stick(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->flags &= ~SRV_F_NON_STICK;
+ return 0;
+}
+
+/* Parse the "track" server keyword */
+static int srv_parse_track(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'track' expects [<proxy>/]<server> as argument.\n");
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->trackit);
+ newsrv->trackit = strdup(arg);
+
+ return 0;
+}
+
+/* Parse the "socks4" server keyword */
+static int srv_parse_socks4(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *errmsg;
+ int port_low, port_high;
+ struct sockaddr_storage *sk;
+
+ errmsg = NULL;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' expects <addr>:<port> as argument.\n", args[*cur_arg]);
+ goto err;
+ }
+
+ /* 'sk' is statically allocated (no need to be freed). */
+ sk = str2sa_range(args[*cur_arg + 1], NULL, &port_low, &port_high, NULL, NULL,
+ &errmsg, NULL, NULL,
+ PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_MAND | PA_O_STREAM | PA_O_CONNECT);
+ if (!sk) {
+ memprintf(err, "'%s %s' : %s\n", args[*cur_arg], args[*cur_arg + 1], errmsg);
+ goto err;
+ }
+
+ newsrv->flags |= SRV_F_SOCKS4_PROXY;
+ newsrv->socks4_addr = *sk;
+
+ return 0;
+
+ err:
+ free(errmsg);
+ return ERR_ALERT | ERR_FATAL;
+}
+
+
+/* parse the "tfo" server keyword */
+static int srv_parse_tfo(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->flags |= SRV_F_FASTOPEN;
+ return 0;
+}
+
+/* parse the "usesrc" server keyword */
+static int srv_parse_usesrc(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ memprintf(err, "'%s' only allowed after a '%s' statement.",
+ "usesrc", "source");
+ return ERR_ALERT | ERR_FATAL;
+}
+
+/* parse the "weight" server keyword */
+static int srv_parse_weight(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ int w;
+
+ w = atol(args[*cur_arg + 1]);
+ if (w < 0 || w > SRV_UWGHT_MAX) {
+ memprintf(err, "weight of server %s is not within 0 and %d (%d).",
+ newsrv->id, SRV_UWGHT_MAX, w);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ newsrv->uweight = newsrv->iweight = w;
+
+ return 0;
+}
+
+/* Shutdown all connections of a server. The caller must pass a termination
+ * code in <why>, which must be one of SF_ERR_* indicating the reason for the
+ * shutdown.
+ *
+ * Must be called with the server lock held.
+ */
+void srv_shutdown_streams(struct server *srv, int why)
+{
+ struct stream *stream;
+ struct mt_list *elt1, elt2;
+ int thr;
+
+ for (thr = 0; thr < global.nbthread; thr++)
+ mt_list_for_each_entry_safe(stream, &srv->per_thr[thr].streams, by_srv, elt1, elt2)
+ if (stream->srv_conn == srv)
+ stream_shutdown(stream, why);
+}
+
+/* Shutdown all connections of all backup servers of a proxy. The caller must
+ * pass a termination code in <why>, which must be one of SF_ERR_* indicating
+ * the reason for the shutdown.
+ *
+ * Must be called with the server lock held.
+ */
+void srv_shutdown_backup_streams(struct proxy *px, int why)
+{
+ struct server *srv;
+
+ for (srv = px->srv; srv != NULL; srv = srv->next)
+ if (srv->flags & SRV_F_BACKUP)
+ srv_shutdown_streams(srv, why);
+}
+
+/* Appends some information to a message string related to a server going UP or
+ * DOWN. If both <forced> and <reason> are null and the server tracks another
+ * one, a "via" information will be provided to know where the status came from.
+ * If <check> is non-null, an entire string describing the check result will be
+ * appended after a comma and a space (eg: to report some information from the
+ * check that changed the state). In the other case, the string will be built
+ * using the check results stored into the struct server if present.
+ * If <xferred> is non-negative, some information about requeued sessions are
+ * provided.
+ *
+ * Must be called with the server lock held.
+ */
+void srv_append_status(struct buffer *msg, struct server *s,
+ struct check *check, int xferred, int forced)
+{
+ short status = s->op_st_chg.status;
+ short code = s->op_st_chg.code;
+ long duration = s->op_st_chg.duration;
+ char *desc = s->op_st_chg.reason;
+
+ if (check) {
+ status = check->status;
+ code = check->code;
+ duration = check->duration;
+ desc = check->desc;
+ }
+
+ if (status != -1) {
+ chunk_appendf(msg, ", reason: %s", get_check_status_description(status));
+
+ if (status >= HCHK_STATUS_L57DATA)
+ chunk_appendf(msg, ", code: %d", code);
+
+ if (desc && *desc) {
+ struct buffer src;
+
+ chunk_appendf(msg, ", info: \"");
+
+ chunk_initlen(&src, desc, 0, strlen(desc));
+ chunk_asciiencode(msg, &src, '"');
+
+ chunk_appendf(msg, "\"");
+ }
+
+ if (duration >= 0)
+ chunk_appendf(msg, ", check duration: %ldms", duration);
+ }
+ else if (desc && *desc) {
+ chunk_appendf(msg, ", %s", desc);
+ }
+ else if (!forced && s->track) {
+ chunk_appendf(msg, " via %s/%s", s->track->proxy->id, s->track->id);
+ }
+
+ if (xferred >= 0) {
+ if (s->next_state == SRV_ST_STOPPED)
+ chunk_appendf(msg, ". %d active and %d backup servers left.%s"
+ " %d sessions active, %d requeued, %d remaining in queue",
+ s->proxy->srv_act, s->proxy->srv_bck,
+ (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
+ s->cur_sess, xferred, s->queue.length);
+ else
+ chunk_appendf(msg, ". %d active and %d backup servers online.%s"
+ " %d sessions requeued, %d total in queue",
+ s->proxy->srv_act, s->proxy->srv_bck,
+ (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
+ xferred, s->queue.length);
+ }
+}
+
+/* Marks server <s> down, regardless of its checks' statuses. The server is
+ * registered in a list to postpone the counting of the remaining servers on
+ * the proxy and transfers queued streams whenever possible to other servers at
+ * a sync point. Maintenance servers are ignored. It stores the <reason> if
+ * non-null as the reason for going down or the available data from the check
+ * struct to recompute this reason later.
+ *
+ * Must be called with the server lock held.
+ */
+void srv_set_stopped(struct server *s, const char *reason, struct check *check)
+{
+ struct server *srv;
+
+ if ((s->cur_admin & SRV_ADMF_MAINT) || s->next_state == SRV_ST_STOPPED)
+ return;
+
+ s->next_state = SRV_ST_STOPPED;
+ *s->op_st_chg.reason = 0;
+ s->op_st_chg.status = -1;
+ if (reason) {
+ strlcpy2(s->op_st_chg.reason, reason, sizeof(s->op_st_chg.reason));
+ }
+ else if (check) {
+ strlcpy2(s->op_st_chg.reason, check->desc, sizeof(s->op_st_chg.reason));
+ s->op_st_chg.code = check->code;
+ s->op_st_chg.status = check->status;
+ s->op_st_chg.duration = check->duration;
+ }
+
+ /* propagate changes */
+ srv_update_status(s);
+
+ for (srv = s->trackers; srv; srv = srv->tracknext) {
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_set_stopped(srv, NULL, NULL);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ }
+}
+
+/* Marks server <s> up regardless of its checks' statuses and provided it isn't
+ * in maintenance. The server is registered in a list to postpone the counting
+ * of the remaining servers on the proxy and tries to grab requests from the
+ * proxy at a sync point. Maintenance servers are ignored. It stores the
+ * <reason> if non-null as the reason for going down or the available data
+ * from the check struct to recompute this reason later.
+ *
+ * Must be called with the server lock held.
+ */
+void srv_set_running(struct server *s, const char *reason, struct check *check)
+{
+ struct server *srv;
+
+ if (s->cur_admin & SRV_ADMF_MAINT)
+ return;
+
+ if (s->next_state == SRV_ST_STARTING || s->next_state == SRV_ST_RUNNING)
+ return;
+
+ s->next_state = SRV_ST_STARTING;
+ *s->op_st_chg.reason = 0;
+ s->op_st_chg.status = -1;
+ if (reason) {
+ strlcpy2(s->op_st_chg.reason, reason, sizeof(s->op_st_chg.reason));
+ }
+ else if (check) {
+ strlcpy2(s->op_st_chg.reason, check->desc, sizeof(s->op_st_chg.reason));
+ s->op_st_chg.code = check->code;
+ s->op_st_chg.status = check->status;
+ s->op_st_chg.duration = check->duration;
+ }
+
+ if (s->slowstart <= 0)
+ s->next_state = SRV_ST_RUNNING;
+
+ /* propagate changes */
+ srv_update_status(s);
+
+ for (srv = s->trackers; srv; srv = srv->tracknext) {
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_set_running(srv, NULL, NULL);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ }
+}
+
+/* Marks server <s> stopping regardless of its checks' statuses and provided it
+ * isn't in maintenance. The server is registered in a list to postpone the
+ * counting of the remaining servers on the proxy and tries to grab requests
+ * from the proxy. Maintenance servers are ignored. It stores the
+ * <reason> if non-null as the reason for going down or the available data
+ * from the check struct to recompute this reason later.
+ * up. Note that it makes use of the trash to build the log strings, so <reason>
+ * must not be placed there.
+ *
+ * Must be called with the server lock held.
+ */
+void srv_set_stopping(struct server *s, const char *reason, struct check *check)
+{
+ struct server *srv;
+
+ if (s->cur_admin & SRV_ADMF_MAINT)
+ return;
+
+ if (s->next_state == SRV_ST_STOPPING)
+ return;
+
+ s->next_state = SRV_ST_STOPPING;
+ *s->op_st_chg.reason = 0;
+ s->op_st_chg.status = -1;
+ if (reason) {
+ strlcpy2(s->op_st_chg.reason, reason, sizeof(s->op_st_chg.reason));
+ }
+ else if (check) {
+ strlcpy2(s->op_st_chg.reason, check->desc, sizeof(s->op_st_chg.reason));
+ s->op_st_chg.code = check->code;
+ s->op_st_chg.status = check->status;
+ s->op_st_chg.duration = check->duration;
+ }
+
+ /* propagate changes */
+ srv_update_status(s);
+
+ for (srv = s->trackers; srv; srv = srv->tracknext) {
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_set_stopping(srv, NULL, NULL);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ }
+}
+
+/* Enables admin flag <mode> (among SRV_ADMF_*) on server <s>. This is used to
+ * enforce either maint mode or drain mode. It is not allowed to set more than
+ * one flag at once. The equivalent "inherited" flag is propagated to all
+ * tracking servers. Maintenance mode disables health checks (but not agent
+ * checks). When either the flag is already set or no flag is passed, nothing
+ * is done. If <cause> is non-null, it will be displayed at the end of the log
+ * lines to justify the state change.
+ *
+ * Must be called with the server lock held.
+ */
+void srv_set_admin_flag(struct server *s, enum srv_admin mode, const char *cause)
+{
+ struct server *srv;
+
+ if (!mode)
+ return;
+
+ /* stop going down as soon as we meet a server already in the same state */
+ if (s->next_admin & mode)
+ return;
+
+ s->next_admin |= mode;
+ if (cause)
+ strlcpy2(s->adm_st_chg_cause, cause, sizeof(s->adm_st_chg_cause));
+
+ /* propagate changes */
+ srv_update_status(s);
+
+ /* stop going down if the equivalent flag was already present (forced or inherited) */
+ if (((mode & SRV_ADMF_MAINT) && (s->next_admin & ~mode & SRV_ADMF_MAINT)) ||
+ ((mode & SRV_ADMF_DRAIN) && (s->next_admin & ~mode & SRV_ADMF_DRAIN)))
+ return;
+
+ /* compute the inherited flag to propagate */
+ if (mode & SRV_ADMF_MAINT)
+ mode = SRV_ADMF_IMAINT;
+ else if (mode & SRV_ADMF_DRAIN)
+ mode = SRV_ADMF_IDRAIN;
+
+ for (srv = s->trackers; srv; srv = srv->tracknext) {
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_set_admin_flag(srv, mode, cause);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ }
+}
+
+/* Disables admin flag <mode> (among SRV_ADMF_*) on server <s>. This is used to
+ * stop enforcing either maint mode or drain mode. It is not allowed to set more
+ * than one flag at once. The equivalent "inherited" flag is propagated to all
+ * tracking servers. Leaving maintenance mode re-enables health checks. When
+ * either the flag is already cleared or no flag is passed, nothing is done.
+ *
+ * Must be called with the server lock held.
+ */
+void srv_clr_admin_flag(struct server *s, enum srv_admin mode)
+{
+ struct server *srv;
+
+ if (!mode)
+ return;
+
+ /* stop going down as soon as we see the flag is not there anymore */
+ if (!(s->next_admin & mode))
+ return;
+
+ s->next_admin &= ~mode;
+
+ /* propagate changes */
+ srv_update_status(s);
+
+ /* stop going down if the equivalent flag is still present (forced or inherited) */
+ if (((mode & SRV_ADMF_MAINT) && (s->next_admin & SRV_ADMF_MAINT)) ||
+ ((mode & SRV_ADMF_DRAIN) && (s->next_admin & SRV_ADMF_DRAIN)))
+ return;
+
+ if (mode & SRV_ADMF_MAINT)
+ mode = SRV_ADMF_IMAINT;
+ else if (mode & SRV_ADMF_DRAIN)
+ mode = SRV_ADMF_IDRAIN;
+
+ for (srv = s->trackers; srv; srv = srv->tracknext) {
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_clr_admin_flag(srv, mode);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ }
+}
+
+/* principle: propagate maint and drain to tracking servers. This is useful
+ * upon startup so that inherited states are correct.
+ */
+static void srv_propagate_admin_state(struct server *srv)
+{
+ struct server *srv2;
+
+ if (!srv->trackers)
+ return;
+
+ for (srv2 = srv->trackers; srv2; srv2 = srv2->tracknext) {
+ HA_SPIN_LOCK(SERVER_LOCK, &srv2->lock);
+ if (srv->next_admin & (SRV_ADMF_MAINT | SRV_ADMF_CMAINT))
+ srv_set_admin_flag(srv2, SRV_ADMF_IMAINT, NULL);
+
+ if (srv->next_admin & SRV_ADMF_DRAIN)
+ srv_set_admin_flag(srv2, SRV_ADMF_IDRAIN, NULL);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv2->lock);
+ }
+}
+
+/* Compute and propagate the admin states for all servers in proxy <px>.
+ * Only servers *not* tracking another one are considered, because other
+ * ones will be handled when the server they track is visited.
+ */
+void srv_compute_all_admin_states(struct proxy *px)
+{
+ struct server *srv;
+
+ for (srv = px->srv; srv; srv = srv->next) {
+ if (srv->track)
+ continue;
+ srv_propagate_admin_state(srv);
+ }
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted, doing so helps
+ * all code contributors.
+ * Optional keywords are also declared with a NULL ->parse() function so that
+ * the config parser can report an appropriate error when a known keyword was
+ * not enabled.
+ * Note: -1 as ->skip value means that the number of arguments are variable.
+ */
+static struct srv_kw_list srv_kws = { "ALL", { }, {
+ { "backup", srv_parse_backup, 0, 1, 1 }, /* Flag as backup server */
+ { "cookie", srv_parse_cookie, 1, 1, 0 }, /* Assign a cookie to the server */
+ { "disabled", srv_parse_disabled, 0, 1, 1 }, /* Start the server in 'disabled' state */
+ { "enabled", srv_parse_enabled, 0, 1, 1 }, /* Start the server in 'enabled' state */
+ { "error-limit", srv_parse_error_limit, 1, 1, 1 }, /* Configure the consecutive count of check failures to consider a server on error */
+ { "ws", srv_parse_ws, 1, 1, 1 }, /* websocket protocol */
+ { "id", srv_parse_id, 1, 0, 1 }, /* set id# of server */
+ { "init-addr", srv_parse_init_addr, 1, 1, 0 }, /* */
+ { "log-proto", srv_parse_log_proto, 1, 1, 0 }, /* Set the protocol for event messages, only relevant in a ring section */
+ { "maxconn", srv_parse_maxconn, 1, 1, 1 }, /* Set the max number of concurrent connection */
+ { "maxqueue", srv_parse_maxqueue, 1, 1, 1 }, /* Set the max number of connection to put in queue */
+ { "max-reuse", srv_parse_max_reuse, 1, 1, 0 }, /* Set the max number of requests on a connection, -1 means unlimited */
+ { "minconn", srv_parse_minconn, 1, 1, 1 }, /* Enable a dynamic maxconn limit */
+ { "namespace", srv_parse_namespace, 1, 1, 0 }, /* Namespace the server socket belongs to (if supported) */
+ { "no-backup", srv_parse_no_backup, 0, 1, 1 }, /* Flag as non-backup server */
+ { "no-send-proxy", srv_parse_no_send_proxy, 0, 1, 1 }, /* Disable use of PROXY V1 protocol */
+ { "no-send-proxy-v2", srv_parse_no_send_proxy_v2, 0, 1, 1 }, /* Disable use of PROXY V2 protocol */
+ { "no-tfo", srv_parse_no_tfo, 0, 1, 1 }, /* Disable use of TCP Fast Open */
+ { "non-stick", srv_parse_non_stick, 0, 1, 0 }, /* Disable stick-table persistence */
+ { "observe", srv_parse_observe, 1, 1, 1 }, /* Enables health adjusting based on observing communication with the server */
+ { "on-error", srv_parse_on_error, 1, 1, 1 }, /* Configure the action on check failure */
+ { "on-marked-down", srv_parse_on_marked_down, 1, 1, 1 }, /* Configure the action when a server is marked down */
+ { "on-marked-up", srv_parse_on_marked_up, 1, 1, 1 }, /* Configure the action when a server is marked up */
+ { "pool-low-conn", srv_parse_pool_low_conn, 1, 1, 1 }, /* Set the min number of orphan idle connecbefore being allowed to pick from other threads */
+ { "pool-max-conn", srv_parse_pool_max_conn, 1, 1, 1 }, /* Set the max number of orphan idle connections, -1 means unlimited */
+ { "pool-purge-delay", srv_parse_pool_purge_delay, 1, 1, 1 }, /* Set the time before we destroy orphan idle connections, defaults to 1s */
+ { "proto", srv_parse_proto, 1, 1, 1 }, /* Set the proto to use for all outgoing connections */
+ { "proxy-v2-options", srv_parse_proxy_v2_options, 1, 1, 1 }, /* options for send-proxy-v2 */
+ { "redir", srv_parse_redir, 1, 1, 0 }, /* Enable redirection mode */
+ { "resolve-net", srv_parse_resolve_net, 1, 1, 0 }, /* Set the preferred network range for name resolution */
+ { "resolve-opts", srv_parse_resolve_opts, 1, 1, 0 }, /* Set options for name resolution */
+ { "resolve-prefer", srv_parse_resolve_prefer, 1, 1, 0 }, /* Set the preferred family for name resolution */
+ { "resolvers", srv_parse_resolvers, 1, 1, 0 }, /* Configure the resolver to use for name resolution */
+ { "send-proxy", srv_parse_send_proxy, 0, 1, 1 }, /* Enforce use of PROXY V1 protocol */
+ { "send-proxy-v2", srv_parse_send_proxy_v2, 0, 1, 1 }, /* Enforce use of PROXY V2 protocol */
+ { "slowstart", srv_parse_slowstart, 1, 1, 1 }, /* Set the warm-up timer for a previously failed server */
+ { "source", srv_parse_source, -1, 1, 1 }, /* Set the source address to be used to connect to the server */
+ { "stick", srv_parse_stick, 0, 1, 0 }, /* Enable stick-table persistence */
+ { "tfo", srv_parse_tfo, 0, 1, 1 }, /* enable TCP Fast Open of server */
+ { "track", srv_parse_track, 1, 1, 1 }, /* Set the current state of the server, tracking another one */
+ { "socks4", srv_parse_socks4, 1, 1, 0 }, /* Set the socks4 proxy of the server*/
+ { "usesrc", srv_parse_usesrc, 0, 1, 1 }, /* safe-guard against usesrc without preceding <source> keyword */
+ { "weight", srv_parse_weight, 1, 1, 1 }, /* Set the load-balancing weight */
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, srv_register_keywords, &srv_kws);
+
+/* Recomputes the server's eweight based on its state, uweight, the current time,
+ * and the proxy's algorithm. To be used after updating sv->uweight. The warmup
+ * state is automatically disabled if the time is elapsed. If <must_update> is
+ * not zero, the update will be propagated immediately.
+ *
+ * Must be called with the server lock held.
+ */
+void server_recalc_eweight(struct server *sv, int must_update)
+{
+ struct proxy *px = sv->proxy;
+ unsigned w;
+
+ if (now.tv_sec < sv->last_change || now.tv_sec >= sv->last_change + sv->slowstart) {
+ /* go to full throttle if the slowstart interval is reached */
+ if (sv->next_state == SRV_ST_STARTING)
+ sv->next_state = SRV_ST_RUNNING;
+ }
+
+ /* We must take care of not pushing the server to full throttle during slow starts.
+ * It must also start immediately, at least at the minimal step when leaving maintenance.
+ */
+ if ((sv->next_state == SRV_ST_STARTING) && (px->lbprm.algo & BE_LB_PROP_DYN))
+ w = (px->lbprm.wdiv * (now.tv_sec - sv->last_change) + sv->slowstart) / sv->slowstart;
+ else
+ w = px->lbprm.wdiv;
+
+ sv->next_eweight = (sv->uweight * w + px->lbprm.wmult - 1) / px->lbprm.wmult;
+
+ /* propagate changes only if needed (i.e. not recursively) */
+ if (must_update)
+ srv_update_status(sv);
+}
+
+/*
+ * Parses weight_str and configures sv accordingly.
+ * Returns NULL on success, error message string otherwise.
+ *
+ * Must be called with the server lock held.
+ */
+const char *server_parse_weight_change_request(struct server *sv,
+ const char *weight_str)
+{
+ struct proxy *px;
+ long int w;
+ char *end;
+
+ px = sv->proxy;
+
+ /* if the weight is terminated with '%', it is set relative to
+ * the initial weight, otherwise it is absolute.
+ */
+ if (!*weight_str)
+ return "Require <weight> or <weight%>.\n";
+
+ w = strtol(weight_str, &end, 10);
+ if (end == weight_str)
+ return "Empty weight string empty or preceded by garbage";
+ else if (end[0] == '%' && end[1] == '\0') {
+ if (w < 0)
+ return "Relative weight must be positive.\n";
+ /* Avoid integer overflow */
+ if (w > 25600)
+ w = 25600;
+ w = sv->iweight * w / 100;
+ if (w > 256)
+ w = 256;
+ }
+ else if (w < 0 || w > 256)
+ return "Absolute weight can only be between 0 and 256 inclusive.\n";
+ else if (end[0] != '\0')
+ return "Trailing garbage in weight string";
+
+ if (w && w != sv->iweight && !(px->lbprm.algo & BE_LB_PROP_DYN))
+ return "Backend is using a static LB algorithm and only accepts weights '0%' and '100%'.\n";
+
+ sv->uweight = w;
+ server_recalc_eweight(sv, 1);
+
+ return NULL;
+}
+
+/*
+ * Parses <addr_str> and configures <sv> accordingly. <from> precise
+ * the source of the change in the associated message log.
+ * Returns:
+ * - error string on error
+ * - NULL on success
+ *
+ * Must be called with the server lock held.
+ */
+const char *server_parse_addr_change_request(struct server *sv,
+ const char *addr_str, const char *updater)
+{
+ unsigned char ip[INET6_ADDRSTRLEN];
+
+ if (inet_pton(AF_INET6, addr_str, ip)) {
+ srv_update_addr(sv, ip, AF_INET6, updater);
+ return NULL;
+ }
+ if (inet_pton(AF_INET, addr_str, ip)) {
+ srv_update_addr(sv, ip, AF_INET, updater);
+ return NULL;
+ }
+
+ return "Could not understand IP address format.\n";
+}
+
+/*
+ * Must be called with the server lock held.
+ */
+const char *server_parse_maxconn_change_request(struct server *sv,
+ const char *maxconn_str)
+{
+ long int v;
+ char *end;
+
+ if (!*maxconn_str)
+ return "Require <maxconn>.\n";
+
+ v = strtol(maxconn_str, &end, 10);
+ if (end == maxconn_str)
+ return "maxconn string empty or preceded by garbage";
+ else if (end[0] != '\0')
+ return "Trailing garbage in maxconn string";
+
+ if (sv->maxconn == sv->minconn) { // static maxconn
+ sv->maxconn = sv->minconn = v;
+ } else { // dynamic maxconn
+ sv->maxconn = v;
+ }
+
+ if (may_dequeue_tasks(sv, sv->proxy))
+ process_srv_queue(sv);
+
+ return NULL;
+}
+
+static struct sample_expr *srv_sni_sample_parse_expr(struct server *srv, struct proxy *px,
+ const char *file, int linenum, char **err)
+{
+ int idx;
+ const char *args[] = {
+ srv->sni_expr,
+ NULL,
+ };
+
+ idx = 0;
+ px->conf.args.ctx = ARGC_SRV;
+
+ return sample_parse_expr((char **)args, &idx, file, linenum, err, &px->conf.args, NULL);
+}
+
+int server_parse_sni_expr(struct server *newsrv, struct proxy *px, char **err)
+{
+ struct sample_expr *expr;
+
+ expr = srv_sni_sample_parse_expr(newsrv, px, px->conf.file, px->conf.line, err);
+ if (!expr) {
+ memprintf(err, "error detected while parsing sni expression : %s", *err);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (!(expr->fetch->val & SMP_VAL_BE_SRV_CON)) {
+ memprintf(err, "error detected while parsing sni expression : "
+ " fetch method '%s' extracts information from '%s', "
+ "none of which is available here.",
+ newsrv->sni_expr, sample_src_names(expr->fetch->use));
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ px->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY);
+ release_sample_expr(newsrv->ssl_ctx.sni);
+ newsrv->ssl_ctx.sni = expr;
+
+ return 0;
+}
+
+static void display_parser_err(const char *file, int linenum, char **args, int cur_arg, int err_code, char **err)
+{
+ char *msg = "error encountered while processing ";
+ char *quote = "'";
+ char *token = args[cur_arg];
+
+ if (err && *err) {
+ indent_msg(err, 2);
+ msg = *err;
+ quote = "";
+ token = "";
+ }
+
+ if (err_code & ERR_WARN && !(err_code & ERR_ALERT))
+ ha_warning("%s%s%s%s.\n", msg, quote, token, quote);
+ else
+ ha_alert("%s%s%s%s.\n", msg, quote, token, quote);
+}
+
+static void srv_conn_src_sport_range_cpy(struct server *srv, const struct server *src)
+{
+ int range_sz;
+
+ range_sz = src->conn_src.sport_range->size;
+ if (range_sz > 0) {
+ srv->conn_src.sport_range = port_range_alloc_range(range_sz);
+ if (srv->conn_src.sport_range != NULL) {
+ int i;
+
+ for (i = 0; i < range_sz; i++) {
+ srv->conn_src.sport_range->ports[i] =
+ src->conn_src.sport_range->ports[i];
+ }
+ }
+ }
+}
+
+/*
+ * Copy <src> server connection source settings to <srv> server everything needed.
+ */
+static void srv_conn_src_cpy(struct server *srv, const struct server *src)
+{
+ srv->conn_src.opts = src->conn_src.opts;
+ srv->conn_src.source_addr = src->conn_src.source_addr;
+
+ /* Source port range copy. */
+ if (src->conn_src.sport_range != NULL)
+ srv_conn_src_sport_range_cpy(srv, src);
+
+#ifdef CONFIG_HAP_TRANSPARENT
+ if (src->conn_src.bind_hdr_name != NULL) {
+ srv->conn_src.bind_hdr_name = strdup(src->conn_src.bind_hdr_name);
+ srv->conn_src.bind_hdr_len = strlen(src->conn_src.bind_hdr_name);
+ }
+ srv->conn_src.bind_hdr_occ = src->conn_src.bind_hdr_occ;
+ srv->conn_src.tproxy_addr = src->conn_src.tproxy_addr;
+#endif
+ if (src->conn_src.iface_name != NULL)
+ srv->conn_src.iface_name = strdup(src->conn_src.iface_name);
+}
+
+/*
+ * Copy <src> server SSL settings to <srv> server allocating
+ * everything needed.
+ */
+#if defined(USE_OPENSSL)
+static void srv_ssl_settings_cpy(struct server *srv, const struct server *src)
+{
+ /* <src> is the current proxy's default server and SSL is enabled */
+ BUG_ON(src->ssl_ctx.ctx != NULL); /* the SSL_CTX must never be initialized in a default-server */
+
+ if (src == &srv->proxy->defsrv && src->use_ssl == 1)
+ srv->flags |= SRV_F_DEFSRV_USE_SSL;
+
+ if (src->ssl_ctx.ca_file != NULL)
+ srv->ssl_ctx.ca_file = strdup(src->ssl_ctx.ca_file);
+ if (src->ssl_ctx.crl_file != NULL)
+ srv->ssl_ctx.crl_file = strdup(src->ssl_ctx.crl_file);
+ if (src->ssl_ctx.client_crt != NULL)
+ srv->ssl_ctx.client_crt = strdup(src->ssl_ctx.client_crt);
+
+ srv->ssl_ctx.verify = src->ssl_ctx.verify;
+
+
+ if (src->ssl_ctx.verify_host != NULL)
+ srv->ssl_ctx.verify_host = strdup(src->ssl_ctx.verify_host);
+ if (src->ssl_ctx.ciphers != NULL)
+ srv->ssl_ctx.ciphers = strdup(src->ssl_ctx.ciphers);
+ if (src->ssl_ctx.options)
+ srv->ssl_ctx.options = src->ssl_ctx.options;
+ if (src->ssl_ctx.methods.flags)
+ srv->ssl_ctx.methods.flags = src->ssl_ctx.methods.flags;
+ if (src->ssl_ctx.methods.min)
+ srv->ssl_ctx.methods.min = src->ssl_ctx.methods.min;
+ if (src->ssl_ctx.methods.max)
+ srv->ssl_ctx.methods.max = src->ssl_ctx.methods.max;
+
+ if (src->ssl_ctx.ciphersuites != NULL)
+ srv->ssl_ctx.ciphersuites = strdup(src->ssl_ctx.ciphersuites);
+ if (src->sni_expr != NULL)
+ srv->sni_expr = strdup(src->sni_expr);
+
+ if (src->ssl_ctx.alpn_str) {
+ srv->ssl_ctx.alpn_str = malloc(src->ssl_ctx.alpn_len);
+ if (srv->ssl_ctx.alpn_str) {
+ memcpy(srv->ssl_ctx.alpn_str, src->ssl_ctx.alpn_str,
+ src->ssl_ctx.alpn_len);
+ srv->ssl_ctx.alpn_len = src->ssl_ctx.alpn_len;
+ }
+ }
+
+ if (src->ssl_ctx.npn_str) {
+ srv->ssl_ctx.npn_str = malloc(src->ssl_ctx.npn_len);
+ if (srv->ssl_ctx.npn_str) {
+ memcpy(srv->ssl_ctx.npn_str, src->ssl_ctx.npn_str,
+ src->ssl_ctx.npn_len);
+ srv->ssl_ctx.npn_len = src->ssl_ctx.npn_len;
+ }
+ }
+}
+
+/* Activate ssl on server <s>.
+ * do nothing if there is no change to apply
+ *
+ * Must be called with the server lock held.
+ */
+void srv_set_ssl(struct server *s, int use_ssl)
+{
+ if (s->use_ssl == use_ssl)
+ return;
+
+ s->use_ssl = use_ssl;
+ if (s->use_ssl)
+ s->xprt = xprt_get(XPRT_SSL);
+ else
+ s->xprt = xprt_get(XPRT_RAW);
+}
+
+#endif /* USE_OPENSSL */
+
+/*
+ * Prepare <srv> for hostname resolution.
+ * May be safely called with a default server as <src> argument (without hostname).
+ * Returns -1 in case of any allocation failure, 0 if not.
+ */
+int srv_prepare_for_resolution(struct server *srv, const char *hostname)
+{
+ char *hostname_dn;
+ int hostname_len, hostname_dn_len;
+
+ if (!hostname)
+ return 0;
+
+ hostname_len = strlen(hostname);
+ hostname_dn = trash.area;
+ hostname_dn_len = resolv_str_to_dn_label(hostname, hostname_len,
+ hostname_dn, trash.size);
+ if (hostname_dn_len == -1)
+ goto err;
+
+
+ free(srv->hostname);
+ free(srv->hostname_dn);
+ srv->hostname = strdup(hostname);
+ srv->hostname_dn = strdup(hostname_dn);
+ srv->hostname_dn_len = hostname_dn_len;
+ if (!srv->hostname || !srv->hostname_dn)
+ goto err;
+
+ return 0;
+
+ err:
+ ha_free(&srv->hostname);
+ ha_free(&srv->hostname_dn);
+ return -1;
+}
+
+/*
+ * Copy <src> server settings to <srv> server allocating
+ * everything needed.
+ * This function is not supposed to be called at any time, but only
+ * during server settings parsing or during server allocations from
+ * a server template, and just after having calloc()'ed a new server.
+ * So, <src> may only be a default server (when parsing server settings)
+ * or a server template (during server allocations from a server template).
+ * <srv_tmpl> distinguishes these two cases (must be 1 if <srv> is a template,
+ * 0 if not).
+ */
+void srv_settings_cpy(struct server *srv, const struct server *src, int srv_tmpl)
+{
+ /* Connection source settings copy */
+ srv_conn_src_cpy(srv, src);
+
+ if (srv_tmpl) {
+ srv->addr = src->addr;
+ srv->svc_port = src->svc_port;
+ }
+
+ srv->pp_opts = src->pp_opts;
+ if (src->rdr_pfx != NULL) {
+ srv->rdr_pfx = strdup(src->rdr_pfx);
+ srv->rdr_len = src->rdr_len;
+ }
+ if (src->cookie != NULL) {
+ srv->cookie = strdup(src->cookie);
+ srv->cklen = src->cklen;
+ }
+ srv->use_ssl = src->use_ssl;
+ srv->check.addr = src->check.addr;
+ srv->agent.addr = src->agent.addr;
+ srv->check.use_ssl = src->check.use_ssl;
+ srv->check.port = src->check.port;
+ srv->check.sni = src->check.sni;
+ srv->check.alpn_str = src->check.alpn_str;
+ srv->check.alpn_len = src->check.alpn_len;
+ /* Note: 'flags' field has potentially been already initialized. */
+ srv->flags |= src->flags;
+ srv->do_check = src->do_check;
+ srv->do_agent = src->do_agent;
+ srv->check.inter = src->check.inter;
+ srv->check.fastinter = src->check.fastinter;
+ srv->check.downinter = src->check.downinter;
+ srv->agent.use_ssl = src->agent.use_ssl;
+ srv->agent.port = src->agent.port;
+
+ if (src->agent.tcpcheck_rules) {
+ srv->agent.tcpcheck_rules = calloc(1, sizeof(*srv->agent.tcpcheck_rules));
+ if (srv->agent.tcpcheck_rules) {
+ srv->agent.tcpcheck_rules->flags = src->agent.tcpcheck_rules->flags;
+ srv->agent.tcpcheck_rules->list = src->agent.tcpcheck_rules->list;
+ LIST_INIT(&srv->agent.tcpcheck_rules->preset_vars);
+ dup_tcpcheck_vars(&srv->agent.tcpcheck_rules->preset_vars,
+ &src->agent.tcpcheck_rules->preset_vars);
+ }
+ }
+
+ srv->agent.inter = src->agent.inter;
+ srv->agent.fastinter = src->agent.fastinter;
+ srv->agent.downinter = src->agent.downinter;
+ srv->maxqueue = src->maxqueue;
+ srv->ws = src->ws;
+ srv->minconn = src->minconn;
+ srv->maxconn = src->maxconn;
+ srv->slowstart = src->slowstart;
+ srv->observe = src->observe;
+ srv->onerror = src->onerror;
+ srv->onmarkeddown = src->onmarkeddown;
+ srv->onmarkedup = src->onmarkedup;
+ if (src->trackit != NULL)
+ srv->trackit = strdup(src->trackit);
+ srv->consecutive_errors_limit = src->consecutive_errors_limit;
+ srv->uweight = srv->iweight = src->iweight;
+
+ srv->check.send_proxy = src->check.send_proxy;
+ /* health: up, but will fall down at first failure */
+ srv->check.rise = srv->check.health = src->check.rise;
+ srv->check.fall = src->check.fall;
+
+ /* Here we check if 'disabled' is the default server state */
+ if (src->next_admin & (SRV_ADMF_CMAINT | SRV_ADMF_FMAINT)) {
+ srv->next_admin |= SRV_ADMF_CMAINT | SRV_ADMF_FMAINT;
+ srv->next_state = SRV_ST_STOPPED;
+ srv->check.state |= CHK_ST_PAUSED;
+ srv->check.health = 0;
+ }
+
+ /* health: up but will fall down at first failure */
+ srv->agent.rise = srv->agent.health = src->agent.rise;
+ srv->agent.fall = src->agent.fall;
+
+ if (src->resolvers_id != NULL)
+ srv->resolvers_id = strdup(src->resolvers_id);
+ srv->resolv_opts.family_prio = src->resolv_opts.family_prio;
+ srv->resolv_opts.accept_duplicate_ip = src->resolv_opts.accept_duplicate_ip;
+ srv->resolv_opts.ignore_weight = src->resolv_opts.ignore_weight;
+ if (srv->resolv_opts.family_prio == AF_UNSPEC)
+ srv->resolv_opts.family_prio = AF_INET6;
+ memcpy(srv->resolv_opts.pref_net,
+ src->resolv_opts.pref_net,
+ sizeof srv->resolv_opts.pref_net);
+ srv->resolv_opts.pref_net_nb = src->resolv_opts.pref_net_nb;
+
+ srv->init_addr_methods = src->init_addr_methods;
+ srv->init_addr = src->init_addr;
+#if defined(USE_OPENSSL)
+ srv_ssl_settings_cpy(srv, src);
+#endif
+#ifdef TCP_USER_TIMEOUT
+ srv->tcp_ut = src->tcp_ut;
+#endif
+ srv->mux_proto = src->mux_proto;
+ srv->pool_purge_delay = src->pool_purge_delay;
+ srv->low_idle_conns = src->low_idle_conns;
+ srv->max_idle_conns = src->max_idle_conns;
+ srv->max_reuse = src->max_reuse;
+
+ if (srv_tmpl)
+ srv->srvrq = src->srvrq;
+
+ srv->check.via_socks4 = src->check.via_socks4;
+ srv->socks4_addr = src->socks4_addr;
+}
+
+/* allocate a server and attach it to the global servers_list. Returns
+ * the server on success, otherwise NULL.
+ */
+struct server *new_server(struct proxy *proxy)
+{
+ struct server *srv;
+
+ srv = calloc(1, sizeof *srv);
+ if (!srv)
+ return NULL;
+
+ srv_take(srv);
+
+ srv->obj_type = OBJ_TYPE_SERVER;
+ srv->proxy = proxy;
+ queue_init(&srv->queue, proxy, srv);
+ LIST_APPEND(&servers_list, &srv->global_list);
+ LIST_INIT(&srv->srv_rec_item);
+ LIST_INIT(&srv->ip_rec_item);
+
+ srv->next_state = SRV_ST_RUNNING; /* early server setup */
+ srv->last_change = now.tv_sec;
+
+ srv->check.obj_type = OBJ_TYPE_CHECK;
+ srv->check.status = HCHK_STATUS_INI;
+ srv->check.server = srv;
+ srv->check.proxy = proxy;
+ srv->check.tcpcheck_rules = &proxy->tcpcheck_rules;
+
+ srv->agent.obj_type = OBJ_TYPE_CHECK;
+ srv->agent.status = HCHK_STATUS_INI;
+ srv->agent.server = srv;
+ srv->agent.proxy = proxy;
+ srv->xprt = srv->check.xprt = srv->agent.xprt = xprt_get(XPRT_RAW);
+#if defined(USE_QUIC)
+ srv->cids = EB_ROOT_UNIQUE;
+#endif
+
+ srv->extra_counters = NULL;
+#ifdef USE_OPENSSL
+ HA_RWLOCK_INIT(&srv->ssl_ctx.lock);
+#endif
+
+ /* please don't put default server settings here, they are set in
+ * proxy_preset_defaults().
+ */
+ return srv;
+}
+
+/* Increment the server refcount. */
+void srv_take(struct server *srv)
+{
+ HA_ATOMIC_INC(&srv->refcount);
+}
+
+/* Deallocate a server <srv> and its member. <srv> must be allocated. For
+ * dynamic servers, its refcount is decremented first. The free operations are
+ * conducted only if the refcount is nul, unless the process is stopping.
+ *
+ * As a convenience, <srv.next> is returned if srv is not NULL. It may be useful
+ * when calling srv_drop on the list of servers.
+ */
+struct server *srv_drop(struct server *srv)
+{
+ struct server *next = NULL;
+
+ if (!srv)
+ goto end;
+
+ next = srv->next;
+
+ /* For dynamic servers, decrement the reference counter. Only free the
+ * server when reaching zero.
+ */
+ if (likely(!(global.mode & MODE_STOPPING))) {
+ if (HA_ATOMIC_SUB_FETCH(&srv->refcount, 1))
+ goto end;
+ }
+
+ task_destroy(srv->warmup);
+ task_destroy(srv->srvrq_check);
+
+ free(srv->id);
+ free(srv->cookie);
+ free(srv->hostname);
+ free(srv->hostname_dn);
+ free((char*)srv->conf.file);
+ free(srv->per_thr);
+ free(srv->curr_idle_thr);
+ free(srv->resolvers_id);
+ free(srv->addr_node.key);
+ free(srv->lb_nodes);
+
+ if (srv->use_ssl == 1 || srv->check.use_ssl == 1 || (srv->proxy->options & PR_O_TCPCHK_SSL)) {
+ if (xprt_get(XPRT_SSL) && xprt_get(XPRT_SSL)->destroy_srv)
+ xprt_get(XPRT_SSL)->destroy_srv(srv);
+ }
+ HA_SPIN_DESTROY(&srv->lock);
+
+ LIST_DELETE(&srv->global_list);
+
+ EXTRA_COUNTERS_FREE(srv->extra_counters);
+
+ ha_free(&srv);
+
+ end:
+ return next;
+}
+
+/* Remove a server <srv> from a tracking list if <srv> is tracking another
+ * server. No special care is taken if <srv> is tracked itself by another one :
+ * this situation should be avoided by the caller.
+ *
+ * Not thread-safe.
+ */
+static void release_server_track(struct server *srv)
+{
+ struct server *strack = srv->track;
+ struct server **base;
+
+ if (!strack)
+ return;
+
+ for (base = &strack->trackers; *base; base = &((*base)->tracknext)) {
+ if (*base == srv) {
+ *base = srv->tracknext;
+ return;
+ }
+ }
+
+ /* srv not found on the tracking list, this should never happen */
+ BUG_ON(!*base);
+}
+
+/*
+ * Parse as much as possible such a range string argument: low[-high]
+ * Set <nb_low> and <nb_high> values so that they may be reused by this loop
+ * for(int i = nb_low; i <= nb_high; i++)... with nb_low >= 1.
+ * Fails if 'low' < 0 or 'high' is present and not higher than 'low'.
+ * Returns 0 if succeeded, -1 if not.
+ */
+static int _srv_parse_tmpl_range(struct server *srv, const char *arg,
+ int *nb_low, int *nb_high)
+{
+ char *nb_high_arg;
+
+ *nb_high = 0;
+ chunk_printf(&trash, "%s", arg);
+ *nb_low = atoi(trash.area);
+
+ if ((nb_high_arg = strchr(trash.area, '-'))) {
+ *nb_high_arg++ = '\0';
+ *nb_high = atoi(nb_high_arg);
+ }
+ else {
+ *nb_high += *nb_low;
+ *nb_low = 1;
+ }
+
+ if (*nb_low < 0 || *nb_high < *nb_low)
+ return -1;
+
+ return 0;
+}
+
+/* Parse as much as possible such a range string argument: low[-high]
+ * Set <nb_low> and <nb_high> values so that they may be reused by this loop
+ * for(int i = nb_low; i <= nb_high; i++)... with nb_low >= 1.
+ *
+ * This function is first intended to be used through parse_server to
+ * initialize a new server on startup.
+ *
+ * Fails if 'low' < 0 or 'high' is present and not higher than 'low'.
+ * Returns 0 if succeeded, -1 if not.
+ */
+static inline void _srv_parse_set_id_from_prefix(struct server *srv,
+ const char *prefix, int nb)
+{
+ chunk_printf(&trash, "%s%d", prefix, nb);
+ free(srv->id);
+ srv->id = strdup(trash.area);
+}
+
+/* Initialize as much as possible servers from <srv> server template.
+ * Note that a server template is a special server with
+ * a few different parameters than a server which has
+ * been parsed mostly the same way as a server.
+ *
+ * This function is first intended to be used through parse_server to
+ * initialize a new server on startup.
+ *
+ * Returns the number of servers successfully allocated,
+ * 'srv' template included.
+ */
+static int _srv_parse_tmpl_init(struct server *srv, struct proxy *px)
+{
+ int i;
+ struct server *newsrv;
+
+ for (i = srv->tmpl_info.nb_low + 1; i <= srv->tmpl_info.nb_high; i++) {
+ newsrv = new_server(px);
+ if (!newsrv)
+ goto err;
+
+ newsrv->conf.file = strdup(srv->conf.file);
+ newsrv->conf.line = srv->conf.line;
+
+ srv_settings_cpy(newsrv, srv, 1);
+ srv_prepare_for_resolution(newsrv, srv->hostname);
+
+ if (newsrv->sni_expr) {
+ newsrv->ssl_ctx.sni = srv_sni_sample_parse_expr(newsrv, px, NULL, 0, NULL);
+ if (!newsrv->ssl_ctx.sni)
+ goto err;
+ }
+
+ /* append to list of servers available to receive an hostname */
+ if (newsrv->srvrq)
+ LIST_APPEND(&newsrv->srvrq->attached_servers, &newsrv->srv_rec_item);
+
+ /* Set this new server ID. */
+ _srv_parse_set_id_from_prefix(newsrv, srv->tmpl_info.prefix, i);
+
+ /* Linked backwards first. This will be restablished after parsing. */
+ newsrv->next = px->srv;
+ px->srv = newsrv;
+ }
+ _srv_parse_set_id_from_prefix(srv, srv->tmpl_info.prefix, srv->tmpl_info.nb_low);
+
+ return i - srv->tmpl_info.nb_low;
+
+ err:
+ _srv_parse_set_id_from_prefix(srv, srv->tmpl_info.prefix, srv->tmpl_info.nb_low);
+ if (newsrv) {
+ release_sample_expr(newsrv->ssl_ctx.sni);
+ free_check(&newsrv->agent);
+ free_check(&newsrv->check);
+ LIST_DELETE(&newsrv->global_list);
+ }
+ free(newsrv);
+ return i - srv->tmpl_info.nb_low;
+}
+
+/* Allocate a new server pointed by <srv> and try to parse the first arguments
+ * in <args> as an address for a server or an address-range for a template or
+ * nothing for a default-server. <cur_arg> is incremented to the next argument.
+ *
+ * This function is first intended to be used through parse_server to
+ * initialize a new server on startup.
+ *
+ * A mask of errors is returned. On a parsing error, ERR_FATAL is set. In case
+ * of memory exhaustion, ERR_ABORT is set. If the server cannot be allocated,
+ * <srv> will be set to NULL.
+ */
+static int _srv_parse_init(struct server **srv, char **args, int *cur_arg,
+ struct proxy *curproxy,
+ int parse_flags)
+{
+ struct server *newsrv = NULL;
+ const char *err = NULL;
+ int err_code = 0;
+ char *fqdn = NULL;
+ int tmpl_range_low = 0, tmpl_range_high = 0;
+ char *errmsg = NULL;
+
+ *srv = NULL;
+
+ /* There is no mandatory first arguments for default server. */
+ if (parse_flags & SRV_PARSE_PARSE_ADDR) {
+ if (parse_flags & SRV_PARSE_TEMPLATE) {
+ if (!*args[3]) {
+ /* 'server-template' line number of argument check. */
+ ha_alert("'%s' expects <prefix> <nb | range> <addr>[:<port>] as arguments.\n",
+ args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = invalid_prefix_char(args[1]);
+ }
+ else {
+ if (!*args[2]) {
+ /* 'server' line number of argument check. */
+ ha_alert("'%s' expects <name> and <addr>[:<port>] as arguments.\n",
+ args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ }
+
+ if (err) {
+ ha_alert("character '%c' is not permitted in %s %s '%s'.\n",
+ *err, args[0], !(parse_flags & SRV_PARSE_TEMPLATE) ? "name" : "prefix", args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ *cur_arg = 2;
+ if (parse_flags & SRV_PARSE_TEMPLATE) {
+ /* Parse server-template <nb | range> arg. */
+ if (_srv_parse_tmpl_range(newsrv, args[*cur_arg], &tmpl_range_low, &tmpl_range_high) < 0) {
+ ha_alert("Wrong %s number or range arg '%s'.\n",
+ args[0], args[*cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ (*cur_arg)++;
+ }
+
+ if (!(parse_flags & SRV_PARSE_DEFAULT_SERVER)) {
+ struct sockaddr_storage *sk;
+ int port1, port2, port;
+
+ *srv = newsrv = new_server(curproxy);
+ if (!newsrv) {
+ ha_alert("out of memory.\n");
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ register_parsing_obj(&newsrv->obj_type);
+
+ if (parse_flags & SRV_PARSE_TEMPLATE) {
+ newsrv->tmpl_info.nb_low = tmpl_range_low;
+ newsrv->tmpl_info.nb_high = tmpl_range_high;
+ }
+
+ if (parse_flags & SRV_PARSE_DYNAMIC)
+ newsrv->flags |= SRV_F_DYNAMIC;
+
+ /* Note: for a server template, its id is its prefix.
+ * This is a temporary id which will be used for server allocations to come
+ * after parsing.
+ */
+ if (!(parse_flags & SRV_PARSE_TEMPLATE))
+ newsrv->id = strdup(args[1]);
+ else
+ newsrv->tmpl_info.prefix = strdup(args[1]);
+
+ /* several ways to check the port component :
+ * - IP => port=+0, relative (IPv4 only)
+ * - IP: => port=+0, relative
+ * - IP:N => port=N, absolute
+ * - IP:+N => port=+N, relative
+ * - IP:-N => port=-N, relative
+ */
+ if (!(parse_flags & SRV_PARSE_PARSE_ADDR))
+ goto skip_addr;
+
+ sk = str2sa_range(args[*cur_arg], &port, &port1, &port2, NULL, NULL,
+ &errmsg, NULL, &fqdn,
+ (parse_flags & SRV_PARSE_INITIAL_RESOLVE ? PA_O_RESOLVE : 0) | PA_O_PORT_OK | PA_O_PORT_OFS | PA_O_STREAM | PA_O_XPRT | PA_O_CONNECT);
+ if (!sk) {
+ ha_alert("%s\n", errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ ha_free(&errmsg);
+ goto out;
+ }
+
+ if (!port1 || !port2) {
+ /* no port specified, +offset, -offset */
+ newsrv->flags |= SRV_F_MAPPORTS;
+ }
+
+ /* save hostname and create associated name resolution */
+ if (fqdn) {
+ if (fqdn[0] == '_') { /* SRV record */
+ /* Check if a SRV request already exists, and if not, create it */
+ if ((newsrv->srvrq = find_srvrq_by_name(fqdn, curproxy)) == NULL)
+ newsrv->srvrq = new_resolv_srvrq(newsrv, fqdn);
+ if (newsrv->srvrq == NULL) {
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ LIST_APPEND(&newsrv->srvrq->attached_servers, &newsrv->srv_rec_item);
+ }
+ else if (srv_prepare_for_resolution(newsrv, fqdn) == -1) {
+ ha_alert("Can't create DNS resolution for server '%s'\n",
+ newsrv->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ newsrv->addr = *sk;
+ newsrv->svc_port = port;
+ /*
+ * we don't need to lock the server here, because
+ * we are in the process of initializing.
+ *
+ * Note that the server is not attached into the proxy tree if
+ * this is a dynamic server.
+ */
+ srv_set_addr_desc(newsrv, !(parse_flags & SRV_PARSE_DYNAMIC));
+
+ if (!newsrv->srvrq && !newsrv->hostname &&
+ !protocol_lookup(newsrv->addr.ss_family, PROTO_TYPE_STREAM, 0)) {
+ ha_alert("Unknown protocol family %d '%s'\n",
+ newsrv->addr.ss_family, args[*cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ (*cur_arg)++;
+ skip_addr:
+ if (!(parse_flags & SRV_PARSE_DYNAMIC)) {
+ /* Copy default server settings to new server */
+ srv_settings_cpy(newsrv, &curproxy->defsrv, 0);
+ } else {
+ /* Initialize dynamic server weight to 1 */
+ newsrv->uweight = newsrv->iweight = 1;
+
+ /* A dynamic server is disabled on startup */
+ newsrv->next_admin = SRV_ADMF_FMAINT;
+ newsrv->next_state = SRV_ST_STOPPED;
+ server_recalc_eweight(newsrv, 0);
+
+ /* Set default values for checks */
+ newsrv->check.inter = DEF_CHKINTR;
+ newsrv->check.rise = DEF_RISETIME;
+ newsrv->check.fall = DEF_FALLTIME;
+
+ newsrv->agent.inter = DEF_CHKINTR;
+ newsrv->agent.rise = DEF_AGENT_RISETIME;
+ newsrv->agent.fall = DEF_AGENT_FALLTIME;
+ }
+ HA_SPIN_INIT(&newsrv->lock);
+ }
+ else {
+ *srv = newsrv = &curproxy->defsrv;
+ *cur_arg = 1;
+ newsrv->resolv_opts.family_prio = AF_INET6;
+ newsrv->resolv_opts.accept_duplicate_ip = 0;
+ }
+
+ free(fqdn);
+ return 0;
+
+out:
+ free(fqdn);
+ return err_code;
+}
+
+/* Parse the server keyword in <args>.
+ * <cur_arg> is incremented beyond the keyword optional value. Note that this
+ * might not be the case if an error is reported.
+ *
+ * This function is first intended to be used through parse_server to
+ * initialize a new server on startup.
+ *
+ * A mask of errors is returned. ERR_FATAL is set if the parsing should be
+ * interrupted.
+ */
+static int _srv_parse_kw(struct server *srv, char **args, int *cur_arg,
+ struct proxy *curproxy,
+ int parse_flags)
+{
+ int err_code = 0;
+ struct srv_kw *kw;
+ const char *best;
+ char *errmsg = NULL;
+
+ kw = srv_find_kw(args[*cur_arg]);
+ if (!kw) {
+ best = srv_find_best_kw(args[*cur_arg]);
+ if (best)
+ ha_alert("unknown keyword '%s'; did you mean '%s' maybe ?%s\n",
+ args[*cur_arg], best,
+ (parse_flags & SRV_PARSE_PARSE_ADDR) ? "" :
+ " Hint: no address was expected for this server.");
+ else
+ ha_alert("unknown keyword '%s'.%s\n", args[*cur_arg],
+ (parse_flags & SRV_PARSE_PARSE_ADDR) ? "" :
+ " Hint: no address was expected for this server.");
+
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (!kw->parse) {
+ ha_alert("'%s' option is not implemented in this version (check build options)\n",
+ args[*cur_arg]);
+ err_code = ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((parse_flags & SRV_PARSE_DEFAULT_SERVER) && !kw->default_ok) {
+ ha_alert("'%s' option is not accepted in default-server sections\n",
+ args[*cur_arg]);
+ err_code = ERR_ALERT;
+ goto out;
+ }
+ else if ((parse_flags & SRV_PARSE_DYNAMIC) && !kw->dynamic_ok) {
+ ha_alert("'%s' option is not accepted for dynamic server\n",
+ args[*cur_arg]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ err_code = kw->parse(args, cur_arg, curproxy, srv, &errmsg);
+ if (err_code) {
+ display_parser_err(NULL, 0, args, *cur_arg, err_code, &errmsg);
+ free(errmsg);
+ }
+
+out:
+ if (kw->skip != -1)
+ *cur_arg += 1 + kw->skip;
+
+ return err_code;
+}
+
+/* This function is first intended to be used through parse_server to
+ * initialize a new server on startup.
+ */
+static int _srv_parse_sni_expr_init(char **args, int cur_arg,
+ struct server *srv, struct proxy *proxy,
+ char **errmsg)
+{
+ int ret;
+
+ if (!srv->sni_expr)
+ return 0;
+
+ ret = server_parse_sni_expr(srv, proxy, errmsg);
+ if (!ret)
+ return 0;
+
+ return ret;
+}
+
+/* Server initializations finalization.
+ * Initialize health check, agent check and SNI expression if enabled.
+ * Must not be called for a default server instance.
+ *
+ * This function is first intended to be used through parse_server to
+ * initialize a new server on startup.
+ */
+static int _srv_parse_finalize(char **args, int cur_arg,
+ struct server *srv, struct proxy *px,
+ int parse_flags)
+{
+ int ret;
+ char *errmsg = NULL;
+
+ if (srv->do_check && srv->trackit) {
+ ha_alert("unable to enable checks and tracking at the same time!\n");
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (srv->do_agent && !srv->agent.port) {
+ ha_alert("server %s does not have agent port. Agent check has been disabled.\n",
+ srv->id);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((ret = _srv_parse_sni_expr_init(args, cur_arg, srv, px, &errmsg)) != 0) {
+ if (errmsg) {
+ ha_alert("%s\n", errmsg);
+ free(errmsg);
+ }
+ return ret;
+ }
+
+ /* A dynamic server is disabled on startup. It must not be counted as
+ * an active backend entry.
+ */
+ if (!(parse_flags & SRV_PARSE_DYNAMIC)) {
+ if (srv->flags & SRV_F_BACKUP)
+ px->srv_bck++;
+ else
+ px->srv_act++;
+ }
+
+ srv_lb_commit_status(srv);
+
+ return 0;
+}
+
+int parse_server(const char *file, int linenum, char **args,
+ struct proxy *curproxy, const struct proxy *defproxy,
+ int parse_flags)
+{
+ struct server *newsrv = NULL;
+ int err_code = 0;
+
+ int cur_arg;
+
+ set_usermsgs_ctx(file, linenum, NULL);
+
+ if (!(parse_flags & SRV_PARSE_DEFAULT_SERVER) && curproxy == defproxy) {
+ ha_alert("'%s' not allowed in 'defaults' section.\n", args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (failifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL)) {
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((parse_flags & (SRV_PARSE_IN_PEER_SECTION|SRV_PARSE_PARSE_ADDR)) ==
+ (SRV_PARSE_IN_PEER_SECTION|SRV_PARSE_PARSE_ADDR)) {
+ if (!*args[2])
+ return 0;
+ }
+
+ err_code = _srv_parse_init(&newsrv, args, &cur_arg, curproxy,
+ parse_flags);
+
+ /* the servers are linked backwards first */
+ if (newsrv && !(parse_flags & SRV_PARSE_DEFAULT_SERVER)) {
+ newsrv->next = curproxy->srv;
+ curproxy->srv = newsrv;
+ }
+
+ if (err_code & ERR_CODE)
+ goto out;
+
+ newsrv->conf.file = strdup(file);
+ newsrv->conf.line = linenum;
+
+ while (*args[cur_arg]) {
+ err_code = _srv_parse_kw(newsrv, args, &cur_arg, curproxy,
+ parse_flags);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+
+ if (!(parse_flags & SRV_PARSE_DEFAULT_SERVER)) {
+ err_code |= _srv_parse_finalize(args, cur_arg, newsrv, curproxy, parse_flags);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+
+ if (parse_flags & SRV_PARSE_TEMPLATE)
+ _srv_parse_tmpl_init(newsrv, curproxy);
+
+ /* If the server id is fixed, insert it in the proxy used_id tree.
+ * This is needed to detect a later duplicate id via srv_parse_id.
+ *
+ * If no is specified, a dynamic one is generated in
+ * check_config_validity.
+ */
+ if (newsrv->flags & SRV_F_FORCED_ID)
+ eb32_insert(&curproxy->conf.used_server_id, &newsrv->conf.id);
+
+ HA_DIAG_WARNING_COND((curproxy->cap & PR_CAP_LB) && !newsrv->uweight,
+ "configured with weight of 0 will never be selected by load balancing algorithms\n");
+
+ reset_usermsgs_ctx();
+ return 0;
+
+ out:
+ reset_usermsgs_ctx();
+ return err_code;
+}
+
+/* Returns a pointer to the first server matching either id <id>.
+ * NULL is returned if no match is found.
+ * the lookup is performed in the backend <bk>
+ */
+struct server *server_find_by_id(struct proxy *bk, int id)
+{
+ struct eb32_node *eb32;
+ struct server *curserver;
+
+ if (!bk || (id ==0))
+ return NULL;
+
+ /* <bk> has no backend capabilities, so it can't have a server */
+ if (!(bk->cap & PR_CAP_BE))
+ return NULL;
+
+ curserver = NULL;
+
+ eb32 = eb32_lookup(&bk->conf.used_server_id, id);
+ if (eb32)
+ curserver = container_of(eb32, struct server, conf.id);
+
+ return curserver;
+}
+
+/* Returns a pointer to the first server matching either name <name>, or id
+ * if <name> starts with a '#'. NULL is returned if no match is found.
+ * the lookup is performed in the backend <bk>
+ */
+struct server *server_find_by_name(struct proxy *bk, const char *name)
+{
+ struct server *curserver;
+
+ if (!bk || !name)
+ return NULL;
+
+ /* <bk> has no backend capabilities, so it can't have a server */
+ if (!(bk->cap & PR_CAP_BE))
+ return NULL;
+
+ curserver = NULL;
+ if (*name == '#') {
+ curserver = server_find_by_id(bk, atoi(name + 1));
+ if (curserver)
+ return curserver;
+ }
+ else {
+ curserver = bk->srv;
+
+ while (curserver && (strcmp(curserver->id, name) != 0))
+ curserver = curserver->next;
+
+ if (curserver)
+ return curserver;
+ }
+
+ return NULL;
+}
+
+struct server *server_find_best_match(struct proxy *bk, char *name, int id, int *diff)
+{
+ struct server *byname;
+ struct server *byid;
+
+ if (!name && !id)
+ return NULL;
+
+ if (diff)
+ *diff = 0;
+
+ byname = byid = NULL;
+
+ if (name) {
+ byname = server_find_by_name(bk, name);
+ if (byname && (!id || byname->puid == id))
+ return byname;
+ }
+
+ /* remaining possibilities :
+ * - name not set
+ * - name set but not found
+ * - name found but ID doesn't match
+ */
+ if (id) {
+ byid = server_find_by_id(bk, id);
+ if (byid) {
+ if (byname) {
+ /* use id only if forced by configuration */
+ if (byid->flags & SRV_F_FORCED_ID) {
+ if (diff)
+ *diff |= 2;
+ return byid;
+ }
+ else {
+ if (diff)
+ *diff |= 1;
+ return byname;
+ }
+ }
+
+ /* remaining possibilities:
+ * - name not set
+ * - name set but not found
+ */
+ if (name && diff)
+ *diff |= 2;
+ return byid;
+ }
+
+ /* id bot found */
+ if (byname) {
+ if (diff)
+ *diff |= 1;
+ return byname;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * update a server's current IP address.
+ * ip is a pointer to the new IP address, whose address family is ip_sin_family.
+ * ip is in network format.
+ * updater is a string which contains an information about the requester of the update.
+ * updater is used if not NULL.
+ *
+ * A log line and a stderr warning message is generated based on server's backend options.
+ *
+ * Must be called with the server lock held.
+ */
+int srv_update_addr(struct server *s, void *ip, int ip_sin_family, const char *updater)
+{
+ /* save the new IP family & address if necessary */
+ switch (ip_sin_family) {
+ case AF_INET:
+ if (s->addr.ss_family == ip_sin_family &&
+ !memcmp(ip, &((struct sockaddr_in *)&s->addr)->sin_addr.s_addr, 4))
+ return 0;
+ break;
+ case AF_INET6:
+ if (s->addr.ss_family == ip_sin_family &&
+ !memcmp(ip, &((struct sockaddr_in6 *)&s->addr)->sin6_addr.s6_addr, 16))
+ return 0;
+ break;
+ };
+
+ /* generates a log line and a warning on stderr */
+ if (1) {
+ /* book enough space for both IPv4 and IPv6 */
+ char oldip[INET6_ADDRSTRLEN];
+ char newip[INET6_ADDRSTRLEN];
+
+ memset(oldip, '\0', INET6_ADDRSTRLEN);
+ memset(newip, '\0', INET6_ADDRSTRLEN);
+
+ /* copy old IP address in a string */
+ switch (s->addr.ss_family) {
+ case AF_INET:
+ inet_ntop(s->addr.ss_family, &((struct sockaddr_in *)&s->addr)->sin_addr, oldip, INET_ADDRSTRLEN);
+ break;
+ case AF_INET6:
+ inet_ntop(s->addr.ss_family, &((struct sockaddr_in6 *)&s->addr)->sin6_addr, oldip, INET6_ADDRSTRLEN);
+ break;
+ default:
+ strcpy(oldip, "(none)");
+ break;
+ };
+
+ /* copy new IP address in a string */
+ switch (ip_sin_family) {
+ case AF_INET:
+ inet_ntop(ip_sin_family, ip, newip, INET_ADDRSTRLEN);
+ break;
+ case AF_INET6:
+ inet_ntop(ip_sin_family, ip, newip, INET6_ADDRSTRLEN);
+ break;
+ };
+
+ /* save log line into a buffer */
+ chunk_printf(&trash, "%s/%s changed its IP from %s to %s by %s",
+ s->proxy->id, s->id, oldip, newip, updater);
+
+ /* write the buffer on stderr */
+ ha_warning("%s.\n", trash.area);
+
+ /* send a log */
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.area);
+ }
+
+ /* save the new IP family */
+ s->addr.ss_family = ip_sin_family;
+ /* save the new IP address */
+ switch (ip_sin_family) {
+ case AF_INET:
+ memcpy(&((struct sockaddr_in *)&s->addr)->sin_addr.s_addr, ip, 4);
+ break;
+ case AF_INET6:
+ memcpy(((struct sockaddr_in6 *)&s->addr)->sin6_addr.s6_addr, ip, 16);
+ break;
+ };
+ srv_set_dyncookie(s);
+ srv_set_addr_desc(s, 1);
+
+ return 0;
+}
+
+/* update agent health check address and port
+ * addr can be ip4/ip6 or a hostname
+ * if one error occurs, don't apply anything
+ * must be called with the server lock held.
+ */
+const char *srv_update_agent_addr_port(struct server *s, const char *addr, const char *port)
+{
+ struct sockaddr_storage sk;
+ struct buffer *msg;
+ int new_port;
+
+ msg = get_trash_chunk();
+ chunk_reset(msg);
+
+ if (!(s->agent.state & CHK_ST_ENABLED)) {
+ chunk_strcat(msg, "agent checks are not enabled on this server");
+ goto out;
+ }
+ if (addr) {
+ memset(&sk, 0, sizeof(struct sockaddr_storage));
+ if (str2ip(addr, &sk) == NULL) {
+ chunk_appendf(msg, "invalid addr '%s'", addr);
+ goto out;
+ }
+ }
+ if (port) {
+ if (strl2irc(port, strlen(port), &new_port) != 0) {
+ chunk_appendf(msg, "provided port is not an integer");
+ goto out;
+ }
+ if (new_port < 0 || new_port > 65535) {
+ chunk_appendf(msg, "provided port is invalid");
+ goto out;
+ }
+ }
+out:
+ if (msg->data)
+ return msg->area;
+ else {
+ if (addr)
+ set_srv_agent_addr(s, &sk);
+ if (port)
+ set_srv_agent_port(s, new_port);
+ }
+ return NULL;
+}
+
+/* update server health check address and port
+ * addr must be ip4 or ip6, it won't be resolved
+ * if one error occurs, don't apply anything
+ * must be called with the server lock held.
+ */
+const char *srv_update_check_addr_port(struct server *s, const char *addr, const char *port)
+{
+ struct sockaddr_storage sk;
+ struct buffer *msg;
+ int new_port;
+
+ msg = get_trash_chunk();
+ chunk_reset(msg);
+
+ if (!(s->check.state & CHK_ST_ENABLED)) {
+ chunk_strcat(msg, "health checks are not enabled on this server");
+ goto out;
+ }
+ if (addr) {
+ memset(&sk, 0, sizeof(struct sockaddr_storage));
+ if (str2ip2(addr, &sk, 0) == NULL) {
+ chunk_appendf(msg, "invalid addr '%s'", addr);
+ goto out;
+ }
+ }
+ if (port) {
+ if (strl2irc(port, strlen(port), &new_port) != 0) {
+ chunk_appendf(msg, "provided port is not an integer");
+ goto out;
+ }
+ if (new_port < 0 || new_port > 65535) {
+ chunk_appendf(msg, "provided port is invalid");
+ goto out;
+ }
+ /* prevent the update of port to 0 if MAPPORTS are in use */
+ if ((s->flags & SRV_F_MAPPORTS) && new_port == 0) {
+ chunk_appendf(msg, "can't unset 'port' since MAPPORTS is in use");
+ goto out;
+ }
+ }
+out:
+ if (msg->data)
+ return msg->area;
+ else {
+ if (addr)
+ s->check.addr = sk;
+ if (port)
+ s->check.port = new_port;
+ }
+ return NULL;
+}
+
+/*
+ * This function update a server's addr and port only for AF_INET and AF_INET6 families.
+ *
+ * Caller can pass its name through <updater> to get it integrated in the response message
+ * returned by the function.
+ *
+ * The function first does the following, in that order:
+ * - validates the new addr and/or port
+ * - checks if an update is required (new IP or port is different than current ones)
+ * - checks the update is allowed:
+ * - don't switch from/to a family other than AF_INET4 and AF_INET6
+ * - allow all changes if no CHECKS are configured
+ * - if CHECK is configured:
+ * - if switch to port map (SRV_F_MAPPORTS), ensure health check have their own ports
+ * - applies required changes to both ADDR and PORT if both 'required' and 'allowed'
+ * conditions are met
+ *
+ * Must be called with the server lock held.
+ */
+const char *srv_update_addr_port(struct server *s, const char *addr, const char *port, char *updater)
+{
+ struct sockaddr_storage sa;
+ int ret, port_change_required;
+ char current_addr[INET6_ADDRSTRLEN];
+ uint16_t current_port, new_port;
+ struct buffer *msg;
+ int changed = 0;
+
+ msg = get_trash_chunk();
+ chunk_reset(msg);
+
+ if (addr) {
+ memset(&sa, 0, sizeof(struct sockaddr_storage));
+ if (str2ip2(addr, &sa, 0) == NULL) {
+ chunk_printf(msg, "Invalid addr '%s'", addr);
+ goto out;
+ }
+
+ /* changes are allowed on AF_INET* families only */
+ if ((sa.ss_family != AF_INET) && (sa.ss_family != AF_INET6)) {
+ chunk_printf(msg, "Update to families other than AF_INET and AF_INET6 supported only through configuration file");
+ goto out;
+ }
+
+ /* collecting data currently setup */
+ memset(current_addr, '\0', sizeof(current_addr));
+ ret = addr_to_str(&s->addr, current_addr, sizeof(current_addr));
+ /* changes are allowed on AF_INET* families only */
+ if ((ret != AF_INET) && (ret != AF_INET6)) {
+ chunk_printf(msg, "Update for the current server address family is only supported through configuration file");
+ goto out;
+ }
+
+ /* applying ADDR changes if required and allowed
+ * ipcmp returns 0 when both ADDR are the same
+ */
+ if (ipcmp(&s->addr, &sa) == 0) {
+ chunk_appendf(msg, "no need to change the addr");
+ goto port;
+ }
+ ipcpy(&sa, &s->addr);
+ changed = 1;
+
+ /* update report for caller */
+ chunk_printf(msg, "IP changed from '%s' to '%s'", current_addr, addr);
+ }
+
+ port:
+ if (port) {
+ char sign = '\0';
+ char *endptr;
+
+ if (addr)
+ chunk_appendf(msg, ", ");
+
+ /* collecting data currently setup */
+ current_port = s->svc_port;
+
+ /* check if PORT change is required */
+ port_change_required = 0;
+
+ sign = *port;
+ errno = 0;
+ new_port = strtol(port, &endptr, 10);
+ if ((errno != 0) || (port == endptr)) {
+ chunk_appendf(msg, "problem converting port '%s' to an int", port);
+ goto out;
+ }
+
+ /* check if caller triggers a port mapped or offset */
+ if (sign == '-' || (sign == '+')) {
+ /* check if server currently uses port map */
+ if (!(s->flags & SRV_F_MAPPORTS)) {
+ /* switch from fixed port to port map mandatorily triggers
+ * a port change */
+ port_change_required = 1;
+ /* check is configured
+ * we're switching from a fixed port to a SRV_F_MAPPORTS (mapped) port
+ * prevent PORT change if check doesn't have it's dedicated port while switching
+ * to port mapping */
+ if (!s->check.port) {
+ chunk_appendf(msg, "can't change <port> to port map because it is incompatible with current health check port configuration (use 'port' statement from the 'server' directive.");
+ goto out;
+ }
+ }
+ /* we're already using port maps */
+ else {
+ port_change_required = current_port != new_port;
+ }
+ }
+ /* fixed port */
+ else {
+ port_change_required = current_port != new_port;
+ }
+
+ /* applying PORT changes if required and update response message */
+ if (port_change_required) {
+ /* apply new port */
+ s->svc_port = new_port;
+ changed = 1;
+
+ /* prepare message */
+ chunk_appendf(msg, "port changed from '");
+ if (s->flags & SRV_F_MAPPORTS)
+ chunk_appendf(msg, "+");
+ chunk_appendf(msg, "%d' to '", current_port);
+
+ if (sign == '-') {
+ s->flags |= SRV_F_MAPPORTS;
+ chunk_appendf(msg, "%c", sign);
+ /* just use for result output */
+ new_port = -new_port;
+ }
+ else if (sign == '+') {
+ s->flags |= SRV_F_MAPPORTS;
+ chunk_appendf(msg, "%c", sign);
+ }
+ else {
+ s->flags &= ~SRV_F_MAPPORTS;
+ }
+
+ chunk_appendf(msg, "%d'", new_port);
+ }
+ else {
+ chunk_appendf(msg, "no need to change the port");
+ }
+ }
+
+out:
+ if (changed) {
+ /* force connection cleanup on the given server */
+ srv_cleanup_connections(s);
+ srv_set_dyncookie(s);
+ srv_set_addr_desc(s, 1);
+ }
+ if (updater)
+ chunk_appendf(msg, " by '%s'", updater);
+ chunk_appendf(msg, "\n");
+ return msg->area;
+}
+
+/*
+ * update server status based on result of SRV resolution
+ * returns:
+ * 0 if server status is updated
+ * 1 if server status has not changed
+ *
+ * Must be called with the server lock held.
+ */
+int srvrq_update_srv_status(struct server *s, int has_no_ip)
+{
+ if (!s->srvrq)
+ return 1;
+
+ /* since this server has an IP, it can go back in production */
+ if (has_no_ip == 0) {
+ srv_clr_admin_flag(s, SRV_ADMF_RMAINT);
+ return 1;
+ }
+
+ if (s->next_admin & SRV_ADMF_RMAINT)
+ return 1;
+
+ srv_set_admin_flag(s, SRV_ADMF_RMAINT, "entry removed from SRV record");
+ return 0;
+}
+
+/*
+ * update server status based on result of name resolution
+ * returns:
+ * 0 if server status is updated
+ * 1 if server status has not changed
+ *
+ * Must be called with the server lock held.
+ */
+int snr_update_srv_status(struct server *s, int has_no_ip)
+{
+ struct resolvers *resolvers = s->resolvers;
+ struct resolv_resolution *resolution = (s->resolv_requester ? s->resolv_requester->resolution : NULL);
+ int exp;
+
+ /* If resolution is NULL we're dealing with SRV records Additional records */
+ if (resolution == NULL)
+ return srvrq_update_srv_status(s, has_no_ip);
+
+ switch (resolution->status) {
+ case RSLV_STATUS_NONE:
+ /* status when HAProxy has just (re)started.
+ * Nothing to do, since the task is already automatically started */
+ break;
+
+ case RSLV_STATUS_VALID:
+ /*
+ * resume health checks
+ * server will be turned back on if health check is safe
+ */
+ if (has_no_ip) {
+ if (s->next_admin & SRV_ADMF_RMAINT)
+ return 1;
+ srv_set_admin_flag(s, SRV_ADMF_RMAINT,
+ "No IP for server ");
+ return 0;
+ }
+
+ if (!(s->next_admin & SRV_ADMF_RMAINT))
+ return 1;
+ srv_clr_admin_flag(s, SRV_ADMF_RMAINT);
+ chunk_printf(&trash, "Server %s/%s administratively READY thanks to valid DNS answer",
+ s->proxy->id, s->id);
+
+ ha_warning("%s.\n", trash.area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.area);
+ return 0;
+
+ case RSLV_STATUS_NX:
+ /* stop server if resolution is NX for a long enough period */
+ exp = tick_add(resolution->last_valid, resolvers->hold.nx);
+ if (!tick_is_expired(exp, now_ms))
+ break;
+
+ if (s->next_admin & SRV_ADMF_RMAINT)
+ return 1;
+ srv_set_admin_flag(s, SRV_ADMF_RMAINT, "DNS NX status");
+ return 0;
+
+ case RSLV_STATUS_TIMEOUT:
+ /* stop server if resolution is TIMEOUT for a long enough period */
+ exp = tick_add(resolution->last_valid, resolvers->hold.timeout);
+ if (!tick_is_expired(exp, now_ms))
+ break;
+
+ if (s->next_admin & SRV_ADMF_RMAINT)
+ return 1;
+ srv_set_admin_flag(s, SRV_ADMF_RMAINT, "DNS timeout status");
+ return 0;
+
+ case RSLV_STATUS_REFUSED:
+ /* stop server if resolution is REFUSED for a long enough period */
+ exp = tick_add(resolution->last_valid, resolvers->hold.refused);
+ if (!tick_is_expired(exp, now_ms))
+ break;
+
+ if (s->next_admin & SRV_ADMF_RMAINT)
+ return 1;
+ srv_set_admin_flag(s, SRV_ADMF_RMAINT, "DNS refused status");
+ return 0;
+
+ default:
+ /* stop server if resolution failed for a long enough period */
+ exp = tick_add(resolution->last_valid, resolvers->hold.other);
+ if (!tick_is_expired(exp, now_ms))
+ break;
+
+ if (s->next_admin & SRV_ADMF_RMAINT)
+ return 1;
+ srv_set_admin_flag(s, SRV_ADMF_RMAINT, "unspecified DNS error");
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Server Name Resolution valid response callback
+ * It expects:
+ * - <nameserver>: the name server which answered the valid response
+ * - <response>: buffer containing a valid DNS response
+ * - <response_len>: size of <response>
+ * It performs the following actions:
+ * - ignore response if current ip found and server family not met
+ * - update with first new ip found if family is met and current IP is not found
+ * returns:
+ * 0 on error
+ * 1 when no error or safe ignore
+ *
+ * Must be called with server lock held
+ */
+int snr_resolution_cb(struct resolv_requester *requester, struct dns_counters *counters)
+{
+ struct server *s = NULL;
+ struct resolv_resolution *resolution = NULL;
+ void *serverip, *firstip;
+ short server_sin_family, firstip_sin_family;
+ int ret;
+ struct buffer *chk = get_trash_chunk();
+ int has_no_ip = 0;
+
+ s = objt_server(requester->owner);
+ if (!s)
+ return 1;
+
+ if (s->srvrq) {
+ /* If DNS resolution is disabled ignore it.
+ * This is the case if the server was associated to
+ * a SRV record and this record is now expired.
+ */
+ if (s->flags & SRV_F_NO_RESOLUTION)
+ return 1;
+ }
+
+ resolution = (s->resolv_requester ? s->resolv_requester->resolution : NULL);
+ if (!resolution)
+ return 1;
+
+ /* initializing variables */
+ firstip = NULL; /* pointer to the first valid response found */
+ /* it will be used as the new IP if a change is required */
+ firstip_sin_family = AF_UNSPEC;
+ serverip = NULL; /* current server IP address */
+
+ /* initializing server IP pointer */
+ server_sin_family = s->addr.ss_family;
+ switch (server_sin_family) {
+ case AF_INET:
+ serverip = &((struct sockaddr_in *)&s->addr)->sin_addr.s_addr;
+ break;
+
+ case AF_INET6:
+ serverip = &((struct sockaddr_in6 *)&s->addr)->sin6_addr.s6_addr;
+ break;
+
+ case AF_UNSPEC:
+ break;
+
+ default:
+ goto invalid;
+ }
+
+ ret = resolv_get_ip_from_response(&resolution->response, &s->resolv_opts,
+ serverip, server_sin_family, &firstip,
+ &firstip_sin_family, s);
+
+ switch (ret) {
+ case RSLV_UPD_NO:
+ goto update_status;
+
+ case RSLV_UPD_SRVIP_NOT_FOUND:
+ goto save_ip;
+
+ case RSLV_UPD_NO_IP_FOUND:
+ has_no_ip = 1;
+ goto update_status;
+
+ case RSLV_UPD_NAME_ERROR:
+ /* update resolution status to OTHER error type */
+ resolution->status = RSLV_STATUS_OTHER;
+ has_no_ip = 1;
+ goto update_status;
+
+ default:
+ has_no_ip = 1;
+ goto invalid;
+
+ }
+
+ save_ip:
+ if (counters) {
+ counters->app.resolver.update++;
+ /* save the first ip we found */
+ chunk_printf(chk, "%s/%s", counters->pid, counters->id);
+ }
+ else
+ chunk_printf(chk, "DNS cache");
+ srv_update_addr(s, firstip, firstip_sin_family, (char *) chk->area);
+
+ update_status:
+ if (!snr_update_srv_status(s, has_no_ip) && has_no_ip)
+ memset(&s->addr, 0, sizeof(s->addr));
+ return 1;
+
+ invalid:
+ if (counters) {
+ counters->app.resolver.invalid++;
+ goto update_status;
+ }
+ if (!snr_update_srv_status(s, has_no_ip) && has_no_ip)
+ memset(&s->addr, 0, sizeof(s->addr));
+ return 0;
+}
+
+/*
+ * SRV record error management callback
+ * returns:
+ * 0 if we can trash answser items.
+ * 1 when safely ignored and we must kept answer items
+ *
+ * Grabs the server's lock.
+ */
+int srvrq_resolution_error_cb(struct resolv_requester *requester, int error_code)
+{
+ struct resolv_srvrq *srvrq;
+ struct resolv_resolution *res;
+ struct resolvers *resolvers;
+ int exp;
+
+ /* SRV records */
+ srvrq = objt_resolv_srvrq(requester->owner);
+ if (!srvrq)
+ return 0;
+
+ resolvers = srvrq->resolvers;
+ res = requester->resolution;
+
+ switch (res->status) {
+
+ case RSLV_STATUS_NX:
+ /* stop server if resolution is NX for a long enough period */
+ exp = tick_add(res->last_valid, resolvers->hold.nx);
+ if (!tick_is_expired(exp, now_ms))
+ return 1;
+ break;
+
+ case RSLV_STATUS_TIMEOUT:
+ /* stop server if resolution is TIMEOUT for a long enough period */
+ exp = tick_add(res->last_valid, resolvers->hold.timeout);
+ if (!tick_is_expired(exp, now_ms))
+ return 1;
+ break;
+
+ case RSLV_STATUS_REFUSED:
+ /* stop server if resolution is REFUSED for a long enough period */
+ exp = tick_add(res->last_valid, resolvers->hold.refused);
+ if (!tick_is_expired(exp, now_ms))
+ return 1;
+ break;
+
+ default:
+ /* stop server if resolution failed for a long enough period */
+ exp = tick_add(res->last_valid, resolvers->hold.other);
+ if (!tick_is_expired(exp, now_ms))
+ return 1;
+ }
+
+ /* Remove any associated server ref */
+ resolv_detach_from_resolution_answer_items(res, requester);
+
+ return 0;
+}
+
+/*
+ * Server Name Resolution error management callback
+ * returns:
+ * 0 if we can trash answser items.
+ * 1 when safely ignored and we must kept answer items
+ *
+ * Grabs the server's lock.
+ */
+int snr_resolution_error_cb(struct resolv_requester *requester, int error_code)
+{
+ struct server *s;
+
+ s = objt_server(requester->owner);
+ if (!s)
+ return 0;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
+ if (!snr_update_srv_status(s, 1)) {
+ memset(&s->addr, 0, sizeof(s->addr));
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+ resolv_detach_from_resolution_answer_items(requester->resolution, requester);
+ return 0;
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+
+ return 1;
+}
+
+/*
+ * Function to check if <ip> is already affected to a server in the backend
+ * which owns <srv> and is up.
+ * It returns a pointer to the first server found or NULL if <ip> is not yet
+ * assigned.
+ *
+ * Must be called with server lock held
+ */
+struct server *snr_check_ip_callback(struct server *srv, void *ip, unsigned char *ip_family)
+{
+ struct server *tmpsrv;
+ struct proxy *be;
+
+ if (!srv)
+ return NULL;
+
+ be = srv->proxy;
+ for (tmpsrv = be->srv; tmpsrv; tmpsrv = tmpsrv->next) {
+ /* we found the current server is the same, ignore it */
+ if (srv == tmpsrv)
+ continue;
+
+ /* We want to compare the IP in the record with the IP of the servers in the
+ * same backend, only if:
+ * * DNS resolution is enabled on the server
+ * * the hostname used for the resolution by our server is the same than the
+ * one used for the server found in the backend
+ * * the server found in the backend is not our current server
+ */
+ HA_SPIN_LOCK(SERVER_LOCK, &tmpsrv->lock);
+ if ((tmpsrv->hostname_dn == NULL) ||
+ (srv->hostname_dn_len != tmpsrv->hostname_dn_len) ||
+ (strcasecmp(srv->hostname_dn, tmpsrv->hostname_dn) != 0) ||
+ (srv->puid == tmpsrv->puid)) {
+ HA_SPIN_UNLOCK(SERVER_LOCK, &tmpsrv->lock);
+ continue;
+ }
+
+ /* If the server has been taken down, don't consider it */
+ if (tmpsrv->next_admin & SRV_ADMF_RMAINT) {
+ HA_SPIN_UNLOCK(SERVER_LOCK, &tmpsrv->lock);
+ continue;
+ }
+
+ /* At this point, we have 2 different servers using the same DNS hostname
+ * for their respective resolution.
+ */
+ if (*ip_family == tmpsrv->addr.ss_family &&
+ ((tmpsrv->addr.ss_family == AF_INET &&
+ memcmp(ip, &((struct sockaddr_in *)&tmpsrv->addr)->sin_addr, 4) == 0) ||
+ (tmpsrv->addr.ss_family == AF_INET6 &&
+ memcmp(ip, &((struct sockaddr_in6 *)&tmpsrv->addr)->sin6_addr, 16) == 0))) {
+ HA_SPIN_UNLOCK(SERVER_LOCK, &tmpsrv->lock);
+ return tmpsrv;
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &tmpsrv->lock);
+ }
+
+
+ return NULL;
+}
+
+/* Sets the server's address (srv->addr) from srv->hostname using the libc's
+ * resolver. This is suited for initial address configuration. Returns 0 on
+ * success otherwise a non-zero error code. In case of error, *err_code, if
+ * not NULL, is filled up.
+ */
+int srv_set_addr_via_libc(struct server *srv, int *err_code)
+{
+ if (str2ip2(srv->hostname, &srv->addr, 1) == NULL) {
+ if (err_code)
+ *err_code |= ERR_WARN;
+ return 1;
+ }
+ return 0;
+}
+
+/* Set the server's FDQN (->hostname) from <hostname>.
+ * Returns -1 if failed, 0 if not.
+ *
+ * Must be called with the server lock held.
+ */
+int srv_set_fqdn(struct server *srv, const char *hostname, int resolv_locked)
+{
+ struct resolv_resolution *resolution;
+ char *hostname_dn;
+ int hostname_len, hostname_dn_len;
+
+ /* Note that the server lock is already held. */
+ if (!srv->resolvers)
+ return -1;
+
+ if (!resolv_locked)
+ HA_SPIN_LOCK(DNS_LOCK, &srv->resolvers->lock);
+ /* run time DNS/SRV resolution was not active for this server
+ * and we can't enable it at run time for now.
+ */
+ if (!srv->resolv_requester && !srv->srvrq)
+ goto err;
+
+ chunk_reset(&trash);
+ hostname_len = strlen(hostname);
+ hostname_dn = trash.area;
+ hostname_dn_len = resolv_str_to_dn_label(hostname, hostname_len,
+ hostname_dn, trash.size);
+ if (hostname_dn_len == -1)
+ goto err;
+
+ resolution = (srv->resolv_requester ? srv->resolv_requester->resolution : NULL);
+ if (resolution &&
+ resolution->hostname_dn &&
+ resolution->hostname_dn_len == hostname_dn_len &&
+ strcasecmp(resolution->hostname_dn, hostname_dn) == 0)
+ goto end;
+
+ resolv_unlink_resolution(srv->resolv_requester);
+
+ free(srv->hostname);
+ free(srv->hostname_dn);
+ srv->hostname = strdup(hostname);
+ srv->hostname_dn = strdup(hostname_dn);
+ srv->hostname_dn_len = hostname_dn_len;
+ if (!srv->hostname || !srv->hostname_dn)
+ goto err;
+
+ if (srv->flags & SRV_F_NO_RESOLUTION)
+ goto end;
+
+ if (resolv_link_resolution(srv, OBJ_TYPE_SERVER, 1) == -1)
+ goto err;
+
+ end:
+ if (!resolv_locked)
+ HA_SPIN_UNLOCK(DNS_LOCK, &srv->resolvers->lock);
+ return 0;
+
+ err:
+ if (!resolv_locked)
+ HA_SPIN_UNLOCK(DNS_LOCK, &srv->resolvers->lock);
+ return -1;
+}
+
+/* Sets the server's address (srv->addr) from srv->lastaddr which was filled
+ * from the state file. This is suited for initial address configuration.
+ * Returns 0 on success otherwise a non-zero error code. In case of error,
+ * *err_code, if not NULL, is filled up.
+ */
+static int srv_apply_lastaddr(struct server *srv, int *err_code)
+{
+ if (!str2ip2(srv->lastaddr, &srv->addr, 0)) {
+ if (err_code)
+ *err_code |= ERR_WARN;
+ return 1;
+ }
+ return 0;
+}
+
+/* returns 0 if no error, otherwise a combination of ERR_* flags */
+static int srv_iterate_initaddr(struct server *srv)
+{
+ char *name = srv->hostname;
+ int return_code = 0;
+ int err_code;
+ unsigned int methods;
+
+ /* If no addr and no hostname set, get the name from the DNS SRV request */
+ if (!name && srv->srvrq)
+ name = srv->srvrq->name;
+
+ methods = srv->init_addr_methods;
+ if (!methods) {
+ /* otherwise default to "last,libc" */
+ srv_append_initaddr(&methods, SRV_IADDR_LAST);
+ srv_append_initaddr(&methods, SRV_IADDR_LIBC);
+ if (srv->resolvers_id) {
+ /* dns resolution is configured, add "none" to not fail on startup */
+ srv_append_initaddr(&methods, SRV_IADDR_NONE);
+ }
+ }
+
+ /* "-dr" : always append "none" so that server addresses resolution
+ * failures are silently ignored, this is convenient to validate some
+ * configs out of their environment.
+ */
+ if (global.tune.options & GTUNE_RESOLVE_DONTFAIL)
+ srv_append_initaddr(&methods, SRV_IADDR_NONE);
+
+ while (methods) {
+ err_code = 0;
+ switch (srv_get_next_initaddr(&methods)) {
+ case SRV_IADDR_LAST:
+ if (!srv->lastaddr)
+ continue;
+ if (srv_apply_lastaddr(srv, &err_code) == 0)
+ goto out;
+ return_code |= err_code;
+ break;
+
+ case SRV_IADDR_LIBC:
+ if (!srv->hostname)
+ continue;
+ if (srv_set_addr_via_libc(srv, &err_code) == 0)
+ goto out;
+ return_code |= err_code;
+ break;
+
+ case SRV_IADDR_NONE:
+ srv_set_admin_flag(srv, SRV_ADMF_RMAINT, NULL);
+ if (return_code) {
+ ha_warning("could not resolve address '%s', disabling server.\n",
+ name);
+ }
+ return return_code;
+
+ case SRV_IADDR_IP:
+ ipcpy(&srv->init_addr, &srv->addr);
+ if (return_code) {
+ ha_warning("could not resolve address '%s', falling back to configured address.\n",
+ name);
+ }
+ goto out;
+
+ default: /* unhandled method */
+ break;
+ }
+ }
+
+ if (!return_code)
+ ha_alert("no method found to resolve address '%s'.\n", name);
+ else
+ ha_alert("could not resolve address '%s'.\n", name);
+
+ return_code |= ERR_ALERT | ERR_FATAL;
+ return return_code;
+out:
+ srv_set_dyncookie(srv);
+ srv_set_addr_desc(srv, 1);
+ return return_code;
+}
+
+/*
+ * This function parses all backends and all servers within each backend
+ * and performs servers' addr resolution based on information provided by:
+ * - configuration file
+ * - server-state file (states provided by an 'old' haproxy process)
+ *
+ * Returns 0 if no error, otherwise, a combination of ERR_ flags.
+ */
+int srv_init_addr(void)
+{
+ struct proxy *curproxy;
+ int return_code = 0;
+
+ curproxy = proxies_list;
+ while (curproxy) {
+ struct server *srv;
+
+ /* servers are in backend only */
+ if (!(curproxy->cap & PR_CAP_BE) || (curproxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED)))
+ goto srv_init_addr_next;
+
+ for (srv = curproxy->srv; srv; srv = srv->next) {
+ set_usermsgs_ctx(srv->conf.file, srv->conf.line, &srv->obj_type);
+ if (srv->hostname || srv->srvrq)
+ return_code |= srv_iterate_initaddr(srv);
+ reset_usermsgs_ctx();
+ }
+
+ srv_init_addr_next:
+ curproxy = curproxy->next;
+ }
+
+ return return_code;
+}
+
+/*
+ * Must be called with the server lock held.
+ */
+const char *srv_update_fqdn(struct server *server, const char *fqdn, const char *updater, int resolv_locked)
+{
+
+ struct buffer *msg;
+
+ msg = get_trash_chunk();
+ chunk_reset(msg);
+
+ if (server->hostname && strcmp(fqdn, server->hostname) == 0) {
+ chunk_appendf(msg, "no need to change the FDQN");
+ goto out;
+ }
+
+ if (strlen(fqdn) > DNS_MAX_NAME_SIZE || invalid_domainchar(fqdn)) {
+ chunk_appendf(msg, "invalid fqdn '%s'", fqdn);
+ goto out;
+ }
+
+ chunk_appendf(msg, "%s/%s changed its FQDN from %s to %s",
+ server->proxy->id, server->id, server->hostname, fqdn);
+
+ if (srv_set_fqdn(server, fqdn, resolv_locked) < 0) {
+ chunk_reset(msg);
+ chunk_appendf(msg, "could not update %s/%s FQDN",
+ server->proxy->id, server->id);
+ goto out;
+ }
+
+ /* Flag as FQDN set from stats socket. */
+ server->next_admin |= SRV_ADMF_HMAINT;
+
+ out:
+ if (updater)
+ chunk_appendf(msg, " by '%s'", updater);
+ chunk_appendf(msg, "\n");
+
+ return msg->area;
+}
+
+
+/* Expects to find a backend and a server in <arg> under the form <backend>/<server>,
+ * and returns the pointer to the server. Otherwise, display adequate error messages
+ * on the CLI, sets the CLI's state to CLI_ST_PRINT and returns NULL. This is only
+ * used for CLI commands requiring a server name.
+ * Important: the <arg> is modified to remove the '/'.
+ */
+struct server *cli_find_server(struct appctx *appctx, char *arg)
+{
+ struct proxy *px;
+ struct server *sv;
+ char *line;
+
+ /* split "backend/server" and make <line> point to server */
+ for (line = arg; *line; line++)
+ if (*line == '/') {
+ *line++ = '\0';
+ break;
+ }
+
+ if (!*line || !*arg) {
+ cli_err(appctx, "Require 'backend/server'.\n");
+ return NULL;
+ }
+
+ if (!get_backend_server(arg, line, &px, &sv)) {
+ cli_err(appctx, px ? "No such server.\n" : "No such backend.\n");
+ return NULL;
+ }
+
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) {
+ cli_err(appctx, "Proxy is disabled.\n");
+ return NULL;
+ }
+
+ return sv;
+}
+
+
+/* grabs the server lock */
+static int cli_parse_set_server(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+ const char *warning;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ if (strcmp(args[3], "weight") == 0) {
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ warning = server_parse_weight_change_request(sv, args[4]);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ if (warning)
+ cli_err(appctx, warning);
+ }
+ else if (strcmp(args[3], "state") == 0) {
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (strcmp(args[4], "ready") == 0)
+ srv_adm_set_ready(sv);
+ else if (strcmp(args[4], "drain") == 0)
+ srv_adm_set_drain(sv);
+ else if (strcmp(args[4], "maint") == 0)
+ srv_adm_set_maint(sv);
+ else
+ cli_err(appctx, "'set server <srv> state' expects 'ready', 'drain' and 'maint'.\n");
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ }
+ else if (strcmp(args[3], "health") == 0) {
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (sv->track)
+ cli_err(appctx, "cannot change health on a tracking server.\n");
+ else if (strcmp(args[4], "up") == 0) {
+ sv->check.health = sv->check.rise + sv->check.fall - 1;
+ srv_set_running(sv, "changed from CLI", NULL);
+ }
+ else if (strcmp(args[4], "stopping") == 0) {
+ sv->check.health = sv->check.rise + sv->check.fall - 1;
+ srv_set_stopping(sv, "changed from CLI", NULL);
+ }
+ else if (strcmp(args[4], "down") == 0) {
+ sv->check.health = 0;
+ srv_set_stopped(sv, "changed from CLI", NULL);
+ }
+ else
+ cli_err(appctx, "'set server <srv> health' expects 'up', 'stopping', or 'down'.\n");
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ }
+ else if (strcmp(args[3], "agent") == 0) {
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (!(sv->agent.state & CHK_ST_ENABLED))
+ cli_err(appctx, "agent checks are not enabled on this server.\n");
+ else if (strcmp(args[4], "up") == 0) {
+ sv->agent.health = sv->agent.rise + sv->agent.fall - 1;
+ srv_set_running(sv, "changed from CLI", NULL);
+ }
+ else if (strcmp(args[4], "down") == 0) {
+ sv->agent.health = 0;
+ srv_set_stopped(sv, "changed from CLI", NULL);
+ }
+ else
+ cli_err(appctx, "'set server <srv> agent' expects 'up' or 'down'.\n");
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ }
+ else if (strcmp(args[3], "agent-addr") == 0) {
+ char *addr = NULL;
+ char *port = NULL;
+ if (strlen(args[4]) == 0) {
+ cli_err(appctx, "set server <b>/<s> agent-addr requires"
+ " an address and optionally a port.\n");
+ goto out;
+ }
+ addr = args[4];
+ if (strcmp(args[5], "port") == 0)
+ port = args[6];
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ warning = srv_update_agent_addr_port(sv, addr, port);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ if (warning)
+ cli_msg(appctx, LOG_WARNING, warning);
+ }
+ else if (strcmp(args[3], "agent-port") == 0) {
+ char *port = NULL;
+ if (strlen(args[4]) == 0) {
+ cli_err(appctx, "set server <b>/<s> agent-port requires"
+ " a port.\n");
+ goto out;
+ }
+ port = args[4];
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ warning = srv_update_agent_addr_port(sv, NULL, port);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ if (warning)
+ cli_msg(appctx, LOG_WARNING, warning);
+ }
+ else if (strcmp(args[3], "agent-send") == 0) {
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (!(sv->agent.state & CHK_ST_ENABLED))
+ cli_err(appctx, "agent checks are not enabled on this server.\n");
+ else {
+ if (!set_srv_agent_send(sv, args[4]))
+ cli_err(appctx, "cannot allocate memory for new string.\n");
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ }
+ else if (strcmp(args[3], "check-addr") == 0) {
+ char *addr = NULL;
+ char *port = NULL;
+ if (strlen(args[4]) == 0) {
+ cli_err(appctx, "set server <b>/<s> check-addr requires"
+ " an address and optionally a port.\n");
+ goto out;
+ }
+ addr = args[4];
+ if (strcmp(args[5], "port") == 0)
+ port = args[6];
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ warning = srv_update_check_addr_port(sv, addr, port);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ if (warning)
+ cli_msg(appctx, LOG_WARNING, warning);
+ }
+ else if (strcmp(args[3], "check-port") == 0) {
+ char *port = NULL;
+ if (strlen(args[4]) == 0) {
+ cli_err(appctx, "set server <b>/<s> check-port requires"
+ " a port.\n");
+ goto out;
+ }
+ port = args[4];
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ warning = srv_update_check_addr_port(sv, NULL, port);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ if (warning)
+ cli_msg(appctx, LOG_WARNING, warning);
+ }
+ else if (strcmp(args[3], "addr") == 0) {
+ char *addr = NULL;
+ char *port = NULL;
+ if (strlen(args[4]) == 0) {
+ cli_err(appctx, "set server <b>/<s> addr requires an address and optionally a port.\n");
+ goto out;
+ }
+ else {
+ addr = args[4];
+ }
+ if (strcmp(args[5], "port") == 0) {
+ port = args[6];
+ }
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ warning = srv_update_addr_port(sv, addr, port, "stats socket command");
+ if (warning)
+ cli_msg(appctx, LOG_WARNING, warning);
+ srv_clr_admin_flag(sv, SRV_ADMF_RMAINT);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ }
+ else if (strcmp(args[3], "fqdn") == 0) {
+ if (!*args[4]) {
+ cli_err(appctx, "set server <b>/<s> fqdn requires a FQDN.\n");
+ goto out;
+ }
+ if (!sv->resolvers) {
+ cli_err(appctx, "set server <b>/<s> fqdn failed because no resolution is configured.\n");
+ goto out;
+ }
+ if (sv->srvrq) {
+ cli_err(appctx, "set server <b>/<s> fqdn failed because SRV resolution is configured.\n");
+ goto out;
+ }
+ HA_SPIN_LOCK(DNS_LOCK, &sv->resolvers->lock);
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ /* ensure runtime resolver will process this new fqdn */
+ if (sv->flags & SRV_F_NO_RESOLUTION) {
+ sv->flags &= ~SRV_F_NO_RESOLUTION;
+ }
+ warning = srv_update_fqdn(sv, args[4], "stats socket command", 1);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ HA_SPIN_UNLOCK(DNS_LOCK, &sv->resolvers->lock);
+ if (warning)
+ cli_msg(appctx, LOG_WARNING, warning);
+ }
+ else if (strcmp(args[3], "ssl") == 0) {
+#ifdef USE_OPENSSL
+ if (sv->flags & SRV_F_DYNAMIC) {
+ cli_err(appctx, "'set server <srv> ssl' not supported on dynamic servers\n");
+ goto out;
+ }
+
+ if (sv->ssl_ctx.ctx == NULL) {
+ cli_err(appctx, "'set server <srv> ssl' cannot be set. "
+ " default-server should define ssl settings\n");
+ goto out;
+ }
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (strcmp(args[4], "on") == 0) {
+ srv_set_ssl(sv, 1);
+ } else if (strcmp(args[4], "off") == 0) {
+ srv_set_ssl(sv, 0);
+ } else {
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ cli_err(appctx, "'set server <srv> ssl' expects 'on' or 'off'.\n");
+ goto out;
+ }
+ srv_cleanup_connections(sv);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ cli_msg(appctx, LOG_NOTICE, "server ssl setting updated.\n");
+#else
+ cli_msg(appctx, LOG_NOTICE, "server ssl setting not supported.\n");
+#endif
+ } else {
+ cli_err(appctx,
+ "usage: set server <backend>/<server> "
+ "addr | agent | agent-addr | agent-port | agent-send | "
+ "check-addr | check-port | fqdn | health | ssl | "
+ "state | weight\n");
+ }
+ out:
+ return 1;
+}
+
+static int cli_parse_get_weight(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+ struct server *sv;
+ char *line;
+
+
+ /* split "backend/server" and make <line> point to server */
+ for (line = args[2]; *line; line++)
+ if (*line == '/') {
+ *line++ = '\0';
+ break;
+ }
+
+ if (!*line)
+ return cli_err(appctx, "Require 'backend/server'.\n");
+
+ if (!get_backend_server(args[2], line, &px, &sv))
+ return cli_err(appctx, px ? "No such server.\n" : "No such backend.\n");
+
+ /* return server's effective weight at the moment */
+ snprintf(trash.area, trash.size, "%d (initial %d)\n", sv->uweight,
+ sv->iweight);
+ if (applet_putstr(appctx, trash.area) == -1)
+ return 0;
+ return 1;
+}
+
+/* Parse a "set weight" command.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_set_weight(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+ const char *warning;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+
+ warning = server_parse_weight_change_request(sv, args[3]);
+ if (warning)
+ cli_err(appctx, warning);
+
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+
+ return 1;
+}
+
+/* parse a "set maxconn server" command. It always returns 1.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_set_maxconn_server(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+ const char *warning;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[3]);
+ if (!sv)
+ return 1;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+
+ warning = server_parse_maxconn_change_request(sv, args[4]);
+ if (warning)
+ cli_err(appctx, warning);
+
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+
+ return 1;
+}
+
+/* parse a "disable agent" command. It always returns 1.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_disable_agent(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ sv->agent.state &= ~CHK_ST_ENABLED;
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 1;
+}
+
+/* parse a "disable health" command. It always returns 1.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_disable_health(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ sv->check.state &= ~CHK_ST_ENABLED;
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 1;
+}
+
+/* parse a "disable server" command. It always returns 1.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_disable_server(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ srv_adm_set_maint(sv);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 1;
+}
+
+/* parse a "enable agent" command. It always returns 1.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_enable_agent(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ if (!(sv->agent.state & CHK_ST_CONFIGURED))
+ return cli_err(appctx, "Agent was not configured on this server, cannot enable.\n");
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ sv->agent.state |= CHK_ST_ENABLED;
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 1;
+}
+
+/* parse a "enable health" command. It always returns 1.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_enable_health(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ if (!(sv->check.state & CHK_ST_CONFIGURED))
+ return cli_err(appctx, "Health check was not configured on this server, cannot enable.\n");
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ sv->check.state |= CHK_ST_ENABLED;
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 1;
+}
+
+/* parse a "enable server" command. It always returns 1.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_enable_server(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ srv_adm_set_ready(sv);
+ if (!(sv->flags & SRV_F_COOKIESET)
+ && (sv->proxy->ck_opts & PR_CK_DYNAMIC) &&
+ sv->cookie)
+ srv_check_for_dup_dyncookie(sv);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 1;
+}
+
+/* Allocates data structure related to load balancing for the server <sv>. It
+ * is only required for dynamic servers.
+ *
+ * At the moment, the server lock is not used as this function is only called
+ * for a dynamic server not yet registered.
+ *
+ * Returns 1 on success, 0 on allocation failure.
+ */
+static int srv_alloc_lb(struct server *sv, struct proxy *be)
+{
+ int node;
+
+ sv->lb_tree = (sv->flags & SRV_F_BACKUP) ?
+ &be->lbprm.chash.bck : &be->lbprm.chash.act;
+ sv->lb_nodes_tot = sv->uweight * BE_WEIGHT_SCALE;
+ sv->lb_nodes_now = 0;
+
+ if (((be->lbprm.algo & (BE_LB_KIND | BE_LB_PARM)) == (BE_LB_KIND_RR | BE_LB_RR_RANDOM)) ||
+ ((be->lbprm.algo & (BE_LB_KIND | BE_LB_HASH_TYPE)) == (BE_LB_KIND_HI | BE_LB_HASH_CONS))) {
+ sv->lb_nodes = calloc(sv->lb_nodes_tot, sizeof(*sv->lb_nodes));
+
+ if (!sv->lb_nodes)
+ return 0;
+
+ for (node = 0; node < sv->lb_nodes_tot; node++) {
+ sv->lb_nodes[node].server = sv;
+ sv->lb_nodes[node].node.key = full_hash(sv->puid * SRV_EWGHT_RANGE + node);
+ }
+ }
+
+ return 1;
+}
+
+/* updates the server's weight during a warmup stage. Once the final weight is
+ * reached, the task automatically stops. Note that any server status change
+ * must have updated s->last_change accordingly.
+ */
+static struct task *server_warmup(struct task *t, void *context, unsigned int state)
+{
+ struct server *s = context;
+
+ /* by default, plan on stopping the task */
+ t->expire = TICK_ETERNITY;
+ if ((s->next_admin & SRV_ADMF_MAINT) ||
+ (s->next_state != SRV_ST_STARTING))
+ return t;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
+
+ /* recalculate the weights and update the state */
+ server_recalc_eweight(s, 1);
+
+ /* probably that we can refill this server with a bit more connections */
+ pendconn_grab_from_px(s);
+
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+
+ /* get back there in 1 second or 1/20th of the slowstart interval,
+ * whichever is greater, resulting in small 5% steps.
+ */
+ if (s->next_state == SRV_ST_STARTING)
+ t->expire = tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20)));
+ return t;
+}
+
+/* Allocate the slowstart task if the server is configured with a slowstart
+ * timer. If server next_state is SRV_ST_STARTING, the task is scheduled.
+ *
+ * Returns 0 on success else non-zero.
+ */
+static int init_srv_slowstart(struct server *srv)
+{
+ struct task *t;
+
+ if (srv->slowstart) {
+ if ((t = task_new_anywhere()) == NULL) {
+ ha_alert("Cannot activate slowstart for server %s/%s: out of memory.\n", srv->proxy->id, srv->id);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ /* We need a warmup task that will be called when the server
+ * state switches from down to up.
+ */
+ srv->warmup = t;
+ t->process = server_warmup;
+ t->context = srv;
+
+ /* server can be in this state only because of */
+ if (srv->next_state == SRV_ST_STARTING) {
+ task_schedule(srv->warmup,
+ tick_add(now_ms,
+ MS_TO_TICKS(MAX(1000, (now.tv_sec - srv->last_change)) / 20)));
+ }
+ }
+
+ return ERR_NONE;
+}
+REGISTER_POST_SERVER_CHECK(init_srv_slowstart);
+
+/* Memory allocation and initialization of the per_thr field.
+ * Returns 0 if the field has been successfully initialized, -1 on failure.
+ */
+int srv_init_per_thr(struct server *srv)
+{
+ int i;
+
+ srv->per_thr = calloc(global.nbthread, sizeof(*srv->per_thr));
+ if (!srv->per_thr)
+ return -1;
+
+ for (i = 0; i < global.nbthread; i++) {
+ srv->per_thr[i].idle_conns = EB_ROOT;
+ srv->per_thr[i].safe_conns = EB_ROOT;
+ srv->per_thr[i].avail_conns = EB_ROOT;
+ MT_LIST_INIT(&srv->per_thr[i].streams);
+ }
+
+ return 0;
+}
+
+/* Parse a "add server" command
+ * Returns 0 if the server has been successfully initialized, 1 on failure.
+ */
+static int cli_parse_add_server(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *be;
+ struct server *srv;
+ char *be_name, *sv_name;
+ int errcode, argc;
+ int next_id;
+ const int parse_flags = SRV_PARSE_DYNAMIC|SRV_PARSE_PARSE_ADDR;
+
+ usermsgs_clr("CLI");
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ ++args;
+
+ sv_name = be_name = args[1];
+ /* split backend/server arg */
+ while (*sv_name && *(++sv_name)) {
+ if (*sv_name == '/') {
+ *sv_name = '\0';
+ ++sv_name;
+ break;
+ }
+ }
+
+ if (!*sv_name)
+ return cli_err(appctx, "Require 'backend/server'.");
+
+ be = proxy_be_by_name(be_name);
+ if (!be)
+ return cli_err(appctx, "No such backend.");
+
+ if (!(be->lbprm.algo & BE_LB_PROP_DYN)) {
+ cli_err(appctx, "Backend must use a dynamic load balancing to support dynamic servers.");
+ return 1;
+ }
+
+ /* At this point, some operations might not be thread-safe anymore. This
+ * might be the case for parsing handlers which were designed to run
+ * only at the starting stage on single-thread mode.
+ *
+ * Activate thread isolation to ensure thread-safety.
+ */
+ thread_isolate();
+
+ args[1] = sv_name;
+ errcode = _srv_parse_init(&srv, args, &argc, be, parse_flags);
+ if (errcode)
+ goto out;
+
+ while (*args[argc]) {
+ errcode = _srv_parse_kw(srv, args, &argc, be, parse_flags);
+
+ if (errcode)
+ goto out;
+ }
+
+ errcode = _srv_parse_finalize(args, argc, srv, be, parse_flags);
+ if (errcode)
+ goto out;
+
+ /* A dynamic server does not currently support resolution.
+ *
+ * Initialize it explicitly to the "none" method to ensure no
+ * resolution will ever be executed.
+ */
+ srv->init_addr_methods = SRV_IADDR_NONE;
+
+ if (srv->mux_proto) {
+ if (!conn_get_best_mux_entry(srv->mux_proto->token, PROTO_SIDE_BE, be->mode)) {
+ ha_alert("MUX protocol is not usable for server.\n");
+ goto out;
+ }
+ }
+
+ if (srv_init_per_thr(srv) == -1) {
+ ha_alert("failed to allocate per-thread lists for server.\n");
+ goto out;
+ }
+
+ if (srv->max_idle_conns != 0) {
+ srv->curr_idle_thr = calloc(global.nbthread, sizeof(*srv->curr_idle_thr));
+ if (!srv->curr_idle_thr) {
+ ha_alert("failed to allocate counters for server.\n");
+ goto out;
+ }
+ }
+
+ if (!srv_alloc_lb(srv, be)) {
+ ha_alert("Failed to initialize load-balancing data.\n");
+ goto out;
+ }
+
+ if (!stats_allocate_proxy_counters_internal(&srv->extra_counters,
+ COUNTERS_SV,
+ STATS_PX_CAP_SRV)) {
+ ha_alert("failed to allocate extra counters for server.\n");
+ goto out;
+ }
+
+ /* ensure minconn/maxconn consistency */
+ srv_minmax_conn_apply(srv);
+
+ if (srv->use_ssl == 1 || (srv->proxy->options & PR_O_TCPCHK_SSL) ||
+ srv->check.use_ssl == 1) {
+ if (xprt_get(XPRT_SSL) && xprt_get(XPRT_SSL)->prepare_srv) {
+ if (xprt_get(XPRT_SSL)->prepare_srv(srv))
+ goto out;
+ }
+ }
+
+ if (srv->trackit) {
+ if (srv_apply_track(srv, be))
+ goto out;
+ }
+
+ /* Init check/agent if configured. The check is manually disabled
+ * because a dynamic server is started in a disable state. It must be
+ * manually activated via a "enable health/agent" command.
+ */
+ if (srv->do_check) {
+ if (init_srv_check(srv))
+ goto out;
+
+ srv->check.state &= ~CHK_ST_ENABLED;
+ }
+
+ if (srv->do_agent) {
+ if (init_srv_agent_check(srv))
+ goto out;
+
+ srv->agent.state &= ~CHK_ST_ENABLED;
+ }
+
+ /* Init slowstart if needed. */
+ if (init_srv_slowstart(srv))
+ goto out;
+
+ /* Attach the server to the end of the proxy linked list. Note that this
+ * operation is not thread-safe so this is executed under thread
+ * isolation.
+ *
+ * If a server with the same name is found, reject the new one.
+ */
+
+ /* TODO use a double-linked list for px->srv */
+ if (be->srv) {
+ struct server *next = be->srv;
+
+ while (1) {
+ /* check for duplicate server */
+ if (strcmp(srv->id, next->id) == 0) {
+ ha_alert("Already exists a server with the same name in backend.\n");
+ goto out;
+ }
+
+ if (!next->next)
+ break;
+
+ next = next->next;
+ }
+
+ next->next = srv;
+ }
+ else {
+ srv->next = be->srv;
+ be->srv = srv;
+ }
+
+ /* generate the server id if not manually specified */
+ if (!srv->puid) {
+ next_id = get_next_id(&be->conf.used_server_id, 1);
+ if (!next_id) {
+ ha_alert("Cannot attach server : no id left in proxy\n");
+ goto out;
+ }
+
+ srv->conf.id.key = srv->puid = next_id;
+ }
+ srv->conf.name.key = srv->id;
+
+ /* insert the server in the backend trees */
+ eb32_insert(&be->conf.used_server_id, &srv->conf.id);
+ ebis_insert(&be->conf.used_server_name, &srv->conf.name);
+ /* addr_node.key could be NULL if FQDN resolution is postponed (ie: add server from cli) */
+ if (srv->addr_node.key)
+ ebis_insert(&be->used_server_addr, &srv->addr_node);
+
+ thread_release();
+
+ /* Start the check task. The server must be fully initialized.
+ *
+ * <srvpos> and <nbcheck> parameters are set to 1 as there should be no
+ * need to randomly spread the task interval for dynamic servers.
+ */
+ if (srv->check.state & CHK_ST_CONFIGURED) {
+ if (!start_check_task(&srv->check, 0, 1, 1))
+ ha_alert("System might be unstable, consider to execute a reload");
+ }
+ if (srv->agent.state & CHK_ST_CONFIGURED) {
+ if (!start_check_task(&srv->agent, 0, 1, 1))
+ ha_alert("System might be unstable, consider to execute a reload");
+ }
+
+ ha_notice("New server registered.\n");
+ cli_msg(appctx, LOG_INFO, usermsgs_str());
+
+ return 0;
+
+out:
+ if (srv) {
+ if (srv->track)
+ release_server_track(srv);
+
+ if (srv->check.state & CHK_ST_CONFIGURED)
+ free_check(&srv->check);
+ if (srv->agent.state & CHK_ST_CONFIGURED)
+ free_check(&srv->agent);
+
+ /* remove the server from the proxy linked list */
+ if (be->srv == srv) {
+ be->srv = srv->next;
+ }
+ else {
+ struct server *prev;
+ for (prev = be->srv; prev && prev->next != srv; prev = prev->next)
+ ;
+ if (prev)
+ prev->next = srv->next;
+ }
+
+ }
+
+ thread_release();
+
+ if (!usermsgs_empty())
+ cli_err(appctx, usermsgs_str());
+
+ if (srv)
+ srv_drop(srv);
+
+ return 1;
+}
+
+/* Parse a "del server" command
+ * Returns 0 if the server has been successfully initialized, 1 on failure.
+ */
+static int cli_parse_delete_server(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *be;
+ struct server *srv;
+ char *be_name, *sv_name;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ ++args;
+
+ sv_name = be_name = args[1];
+ /* split backend/server arg */
+ while (*sv_name && *(++sv_name)) {
+ if (*sv_name == '/') {
+ *sv_name = '\0';
+ ++sv_name;
+ break;
+ }
+ }
+
+ if (!*sv_name)
+ return cli_err(appctx, "Require 'backend/server'.");
+
+ /* The proxy servers list is currently not protected by a lock so this
+ * requires thread isolation. In addition, any place referencing the
+ * server about to be deleted would be unsafe after our operation, so
+ * we must be certain to be alone so that no other thread has even
+ * started to grab a temporary reference to this server.
+ */
+ thread_isolate_full();
+
+ get_backend_server(be_name, sv_name, &be, &srv);
+ if (!be) {
+ cli_err(appctx, "No such backend.");
+ goto out;
+ }
+
+ if (!srv) {
+ cli_err(appctx, "No such server.");
+ goto out;
+ }
+
+ if (srv->flags & SRV_F_NON_PURGEABLE) {
+ cli_err(appctx, "This server cannot be removed at runtime due to other configuration elements pointing to it.");
+ goto out;
+ }
+
+ /* Only servers in maintenance can be deleted. This ensures that the
+ * server is not present anymore in the lb structures (through
+ * lbprm.set_server_status_down).
+ */
+ if (!(srv->cur_admin & SRV_ADMF_MAINT)) {
+ cli_err(appctx, "Only servers in maintenance mode can be deleted.");
+ goto out;
+ }
+
+ /* Ensure that there is no active/idle/pending connection on the server.
+ *
+ * TODO idle connections should not prevent server deletion. A proper
+ * cleanup function should be implemented to be used here.
+ */
+ if (srv->cur_sess || srv->curr_idle_conns ||
+ !eb_is_empty(&srv->queue.head)) {
+ cli_err(appctx, "Server still has connections attached to it, cannot remove it.");
+ goto out;
+ }
+
+ /* remove srv from tracking list */
+ if (srv->track)
+ release_server_track(srv);
+
+ /* stop the check task if running */
+ if (srv->check.state & CHK_ST_CONFIGURED)
+ check_purge(&srv->check);
+ if (srv->agent.state & CHK_ST_CONFIGURED)
+ check_purge(&srv->agent);
+
+ /* detach the server from the proxy linked list
+ * The proxy servers list is currently not protected by a lock, so this
+ * requires thread_isolate/release.
+ */
+
+ /* be->srv cannot be empty since we have already found the server with
+ * get_backend_server */
+ BUG_ON(!be->srv);
+ if (be->srv == srv) {
+ be->srv = srv->next;
+ }
+ else {
+ struct server *next;
+ for (next = be->srv; srv != next->next; next = next->next) {
+ /* srv cannot be not found since we have already found
+ * it with get_backend_server */
+ BUG_ON(!next);
+ }
+
+ next->next = srv->next;
+ }
+
+ /* remove srv from addr_node tree */
+ eb32_delete(&srv->conf.id);
+ ebpt_delete(&srv->conf.name);
+ if (srv->addr_node.key)
+ ebpt_delete(&srv->addr_node);
+
+ /* remove srv from idle_node tree for idle conn cleanup */
+ eb32_delete(&srv->idle_node);
+
+ thread_release();
+
+ ha_notice("Server deleted.\n");
+ srv_drop(srv);
+
+ cli_msg(appctx, LOG_INFO, "Server deleted.");
+
+ return 0;
+
+out:
+ thread_release();
+
+ return 1;
+}
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "disable", "agent", NULL }, "disable agent : disable agent checks", cli_parse_disable_agent, NULL },
+ { { "disable", "health", NULL }, "disable health : disable health checks", cli_parse_disable_health, NULL },
+ { { "disable", "server", NULL }, "disable server (DEPRECATED) : disable a server for maintenance (use 'set server' instead)", cli_parse_disable_server, NULL },
+ { { "enable", "agent", NULL }, "enable agent : enable agent checks", cli_parse_enable_agent, NULL },
+ { { "enable", "health", NULL }, "enable health : enable health checks", cli_parse_enable_health, NULL },
+ { { "enable", "server", NULL }, "enable server (DEPRECATED) : enable a disabled server (use 'set server' instead)", cli_parse_enable_server, NULL },
+ { { "set", "maxconn", "server", NULL }, "set maxconn server <bk>/<srv> : change a server's maxconn setting", cli_parse_set_maxconn_server, NULL },
+ { { "set", "server", NULL }, "set server <bk>/<srv> [opts] : change a server's state, weight, address or ssl", cli_parse_set_server },
+ { { "get", "weight", NULL }, "get weight <bk>/<srv> : report a server's current weight", cli_parse_get_weight },
+ { { "set", "weight", NULL }, "set weight <bk>/<srv> (DEPRECATED) : change a server's weight (use 'set server' instead)", cli_parse_set_weight },
+ { { "add", "server", NULL }, "add server <bk>/<srv> : create a new server", cli_parse_add_server, NULL },
+ { { "del", "server", NULL }, "del server <bk>/<srv> : remove a dynamically added server", cli_parse_delete_server, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+/* Prepare a server <srv> to track check status of another one. <srv>.<trackit>
+ * field is used to retrieve the identifier of the tracked server, either with
+ * the format "proxy/server" or just "server". <curproxy> must point to the
+ * backend owning <srv>; if no proxy is specified in <trackit>, it will be used
+ * to find the tracked server.
+ *
+ * Returns 0 if the server track has been activated else non-zero.
+ *
+ * Not thread-safe.
+ */
+int srv_apply_track(struct server *srv, struct proxy *curproxy)
+{
+ struct proxy *px;
+ struct server *strack, *loop;
+ char *pname, *sname;
+
+ if (!srv->trackit)
+ return 1;
+
+ pname = srv->trackit;
+ sname = strrchr(pname, '/');
+
+ if (sname) {
+ *sname++ = '\0';
+ }
+ else {
+ sname = pname;
+ pname = NULL;
+ }
+
+ if (pname) {
+ px = proxy_be_by_name(pname);
+ if (!px) {
+ ha_alert("unable to find required proxy '%s' for tracking.\n",
+ pname);
+ return 1;
+ }
+ }
+ else {
+ px = curproxy;
+ }
+
+ strack = findserver(px, sname);
+ if (!strack) {
+ ha_alert("unable to find required server '%s' for tracking.\n",
+ sname);
+ return 1;
+ }
+
+ if (strack->flags & SRV_F_DYNAMIC) {
+ ha_alert("unable to use %s/%s for tracking as it is a dynamic server.\n",
+ px->id, strack->id);
+ return 1;
+ }
+
+ if (!strack->do_check && !strack->do_agent && !strack->track &&
+ !strack->trackit) {
+ ha_alert("unable to use %s/%s for "
+ "tracking as it does not have any check nor agent enabled.\n",
+ px->id, strack->id);
+ return 1;
+ }
+
+ for (loop = strack->track; loop && loop != srv; loop = loop->track)
+ ;
+
+ if (srv == strack || loop) {
+ ha_alert("unable to track %s/%s as it "
+ "belongs to a tracking chain looping back to %s/%s.\n",
+ px->id, strack->id, px->id,
+ srv == strack ? strack->id : loop->id);
+ return 1;
+ }
+
+ if (curproxy != px &&
+ (curproxy->options & PR_O_DISABLE404) != (px->options & PR_O_DISABLE404)) {
+ ha_alert("unable to use %s/%s for"
+ "tracking: disable-on-404 option inconsistency.\n",
+ px->id, strack->id);
+ return 1;
+ }
+
+ srv->track = strack;
+ srv->tracknext = strack->trackers;
+ strack->trackers = srv;
+ strack->flags |= SRV_F_NON_PURGEABLE;
+
+ ha_free(&srv->trackit);
+
+ return 0;
+}
+
+/*
+ * This function applies server's status changes, it is
+ * is designed to be called asynchronously.
+ *
+ * Must be called with the server lock held. This may also be called at init
+ * time as the result of parsing the state file, in which case no lock will be
+ * held, and the server's warmup task can be null.
+ */
+static void srv_update_status(struct server *s)
+{
+ struct check *check = &s->check;
+ int xferred;
+ struct proxy *px = s->proxy;
+ int prev_srv_count = s->proxy->srv_bck + s->proxy->srv_act;
+ int srv_was_stopping = (s->cur_state == SRV_ST_STOPPING) || (s->cur_admin & SRV_ADMF_DRAIN);
+ int log_level;
+ struct buffer *tmptrash = NULL;
+
+ /* If currently main is not set we try to apply pending state changes */
+ if (!(s->cur_admin & SRV_ADMF_MAINT)) {
+ int next_admin;
+
+ /* Backup next admin */
+ next_admin = s->next_admin;
+
+ /* restore current admin state */
+ s->next_admin = s->cur_admin;
+
+ if ((s->cur_state != SRV_ST_STOPPED) && (s->next_state == SRV_ST_STOPPED)) {
+ s->last_change = now.tv_sec;
+ if (s->proxy->lbprm.set_server_status_down)
+ s->proxy->lbprm.set_server_status_down(s);
+
+ if (s->onmarkeddown & HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS)
+ srv_shutdown_streams(s, SF_ERR_DOWN);
+
+ /* we might have streams queued on this server and waiting for
+ * a connection. Those which are redispatchable will be queued
+ * to another server or to the proxy itself.
+ */
+ xferred = pendconn_redistribute(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is DOWN", s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+
+ srv_append_status(tmptrash, s, NULL, xferred, 0);
+ ha_warning("%s.\n", tmptrash->area);
+
+ /* we don't send an alert if the server was previously paused */
+ log_level = srv_was_stopping ? LOG_NOTICE : LOG_ALERT;
+ send_log(s->proxy, log_level, "%s.\n",
+ tmptrash->area);
+ send_email_alert(s, log_level, "%s",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy);
+
+ s->counters.down_trans++;
+ }
+ else if ((s->cur_state != SRV_ST_STOPPING) && (s->next_state == SRV_ST_STOPPING)) {
+ s->last_change = now.tv_sec;
+ if (s->proxy->lbprm.set_server_status_down)
+ s->proxy->lbprm.set_server_status_down(s);
+
+ /* we might have streams queued on this server and waiting for
+ * a connection. Those which are redispatchable will be queued
+ * to another server or to the proxy itself.
+ */
+ xferred = pendconn_redistribute(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is stopping", s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+
+ srv_append_status(tmptrash, s, NULL, xferred, 0);
+
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy);
+ }
+ else if (((s->cur_state != SRV_ST_RUNNING) && (s->next_state == SRV_ST_RUNNING))
+ || ((s->cur_state != SRV_ST_STARTING) && (s->next_state == SRV_ST_STARTING))) {
+ if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
+ if (s->proxy->last_change < now.tv_sec) // ignore negative times
+ s->proxy->down_time += now.tv_sec - s->proxy->last_change;
+ s->proxy->last_change = now.tv_sec;
+ }
+
+ if (s->cur_state == SRV_ST_STOPPED && s->last_change < now.tv_sec) // ignore negative times
+ s->down_time += now.tv_sec - s->last_change;
+
+ s->last_change = now.tv_sec;
+ if (s->next_state == SRV_ST_STARTING && s->warmup)
+ task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20))));
+
+ server_recalc_eweight(s, 0);
+ /* now propagate the status change to any LB algorithms */
+ if (px->lbprm.update_server_eweight)
+ px->lbprm.update_server_eweight(s);
+ else if (srv_willbe_usable(s)) {
+ if (px->lbprm.set_server_status_up)
+ px->lbprm.set_server_status_up(s);
+ }
+ else {
+ if (px->lbprm.set_server_status_down)
+ px->lbprm.set_server_status_down(s);
+ }
+
+ /* If the server is set with "on-marked-up shutdown-backup-sessions",
+ * and it's not a backup server and its effective weight is > 0,
+ * then it can accept new connections, so we shut down all streams
+ * on all backup servers.
+ */
+ if ((s->onmarkedup & HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS) &&
+ !(s->flags & SRV_F_BACKUP) && s->next_eweight)
+ srv_shutdown_backup_streams(s->proxy, SF_ERR_UP);
+
+ /* check if we can handle some connections queued at the proxy. We
+ * will take as many as we can handle.
+ */
+ xferred = pendconn_grab_from_px(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is UP", s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+
+ srv_append_status(tmptrash, s, NULL, xferred, 0);
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ send_email_alert(s, LOG_NOTICE, "%s",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy);
+ }
+ else if (s->cur_eweight != s->next_eweight) {
+ /* now propagate the status change to any LB algorithms */
+ if (px->lbprm.update_server_eweight)
+ px->lbprm.update_server_eweight(s);
+ else if (srv_willbe_usable(s)) {
+ if (px->lbprm.set_server_status_up)
+ px->lbprm.set_server_status_up(s);
+ }
+ else {
+ if (px->lbprm.set_server_status_down)
+ px->lbprm.set_server_status_down(s);
+ }
+
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy);
+ }
+
+ s->next_admin = next_admin;
+ }
+
+ /* reset operational state change */
+ *s->op_st_chg.reason = 0;
+ s->op_st_chg.status = s->op_st_chg.code = -1;
+ s->op_st_chg.duration = 0;
+
+ /* Now we try to apply pending admin changes */
+
+ /* Maintenance must also disable health checks */
+ if (!(s->cur_admin & SRV_ADMF_MAINT) && (s->next_admin & SRV_ADMF_MAINT)) {
+ if (s->check.state & CHK_ST_ENABLED) {
+ s->check.state |= CHK_ST_PAUSED;
+ check->health = 0;
+ }
+
+ if (s->cur_state == SRV_ST_STOPPED) { /* server was already down */
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s was DOWN and now enters maintenance%s%s%s",
+ s->flags & SRV_F_BACKUP ? "Backup " : "", s->proxy->id, s->id,
+ *(s->adm_st_chg_cause) ? " (" : "", s->adm_st_chg_cause, *(s->adm_st_chg_cause) ? ")" : "");
+
+ srv_append_status(tmptrash, s, NULL, -1, (s->next_admin & SRV_ADMF_FMAINT));
+
+ if (!(global.mode & MODE_STARTING)) {
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ }
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+ /* commit new admin status */
+
+ s->cur_admin = s->next_admin;
+ }
+ else { /* server was still running */
+ check->health = 0; /* failure */
+ s->last_change = now.tv_sec;
+
+ s->next_state = SRV_ST_STOPPED;
+ if (s->proxy->lbprm.set_server_status_down)
+ s->proxy->lbprm.set_server_status_down(s);
+
+ if (s->onmarkeddown & HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS)
+ srv_shutdown_streams(s, SF_ERR_DOWN);
+
+ /* force connection cleanup on the given server */
+ srv_cleanup_connections(s);
+ /* we might have streams queued on this server and waiting for
+ * a connection. Those which are redispatchable will be queued
+ * to another server or to the proxy itself.
+ */
+ xferred = pendconn_redistribute(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is going DOWN for maintenance%s%s%s",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id,
+ *(s->adm_st_chg_cause) ? " (" : "", s->adm_st_chg_cause, *(s->adm_st_chg_cause) ? ")" : "");
+
+ srv_append_status(tmptrash, s, NULL, xferred, (s->next_admin & SRV_ADMF_FMAINT));
+
+ if (!(global.mode & MODE_STARTING)) {
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, srv_was_stopping ? LOG_NOTICE : LOG_ALERT, "%s.\n",
+ tmptrash->area);
+ }
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy);
+
+ s->counters.down_trans++;
+ }
+ }
+ else if ((s->cur_admin & SRV_ADMF_MAINT) && !(s->next_admin & SRV_ADMF_MAINT)) {
+ /* OK here we're leaving maintenance, we have many things to check,
+ * because the server might possibly be coming back up depending on
+ * its state. In practice, leaving maintenance means that we should
+ * immediately turn to UP (more or less the slowstart) under the
+ * following conditions :
+ * - server is neither checked nor tracked
+ * - server tracks another server which is not checked
+ * - server tracks another server which is already up
+ * Which sums up as something simpler :
+ * "either the tracking server is up or the server's checks are disabled
+ * or up". Otherwise we only re-enable health checks. There's a special
+ * case associated to the stopping state which can be inherited. Note
+ * that the server might still be in drain mode, which is naturally dealt
+ * with by the lower level functions.
+ */
+
+ if (s->check.state & CHK_ST_ENABLED) {
+ s->check.state &= ~CHK_ST_PAUSED;
+ check->health = check->rise; /* start OK but check immediately */
+ }
+
+ if ((!s->track || s->track->next_state != SRV_ST_STOPPED) &&
+ (!(s->agent.state & CHK_ST_ENABLED) || (s->agent.health >= s->agent.rise)) &&
+ (!(s->check.state & CHK_ST_ENABLED) || (s->check.health >= s->check.rise))) {
+ if (s->track && s->track->next_state == SRV_ST_STOPPING) {
+ s->next_state = SRV_ST_STOPPING;
+ }
+ else {
+ s->last_change = now.tv_sec;
+ s->next_state = SRV_ST_STARTING;
+ if (s->slowstart > 0) {
+ if (s->warmup)
+ task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20))));
+ }
+ else
+ s->next_state = SRV_ST_RUNNING;
+ }
+
+ }
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ if (!(s->next_admin & SRV_ADMF_FMAINT) && (s->cur_admin & SRV_ADMF_FMAINT)) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is %s/%s (leaving forced maintenance)",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id,
+ (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP",
+ (s->next_admin & SRV_ADMF_DRAIN) ? "DRAIN" : "READY");
+ }
+ if (!(s->next_admin & SRV_ADMF_RMAINT) && (s->cur_admin & SRV_ADMF_RMAINT)) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s ('%s') is %s/%s (resolves again)",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id, s->hostname,
+ (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP",
+ (s->next_admin & SRV_ADMF_DRAIN) ? "DRAIN" : "READY");
+ }
+ if (!(s->next_admin & SRV_ADMF_IMAINT) && (s->cur_admin & SRV_ADMF_IMAINT)) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is %s/%s (leaving maintenance)",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id,
+ (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP",
+ (s->next_admin & SRV_ADMF_DRAIN) ? "DRAIN" : "READY");
+ }
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+
+ server_recalc_eweight(s, 0);
+ /* now propagate the status change to any LB algorithms */
+ if (px->lbprm.update_server_eweight)
+ px->lbprm.update_server_eweight(s);
+ else if (srv_willbe_usable(s)) {
+ if (px->lbprm.set_server_status_up)
+ px->lbprm.set_server_status_up(s);
+ }
+ else {
+ if (px->lbprm.set_server_status_down)
+ px->lbprm.set_server_status_down(s);
+ }
+
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy);
+ else if (!prev_srv_count && (s->proxy->srv_bck || s->proxy->srv_act))
+ s->proxy->last_change = now.tv_sec;
+
+ /* If the server is set with "on-marked-up shutdown-backup-sessions",
+ * and it's not a backup server and its effective weight is > 0,
+ * then it can accept new connections, so we shut down all streams
+ * on all backup servers.
+ */
+ if ((s->onmarkedup & HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS) &&
+ !(s->flags & SRV_F_BACKUP) && s->next_eweight)
+ srv_shutdown_backup_streams(s->proxy, SF_ERR_UP);
+
+ /* check if we can handle some connections queued at the proxy. We
+ * will take as many as we can handle.
+ */
+ xferred = pendconn_grab_from_px(s);
+ }
+ else if (s->next_admin & SRV_ADMF_MAINT) {
+ /* remaining in maintenance mode, let's inform precisely about the
+ * situation.
+ */
+ if (!(s->next_admin & SRV_ADMF_FMAINT) && (s->cur_admin & SRV_ADMF_FMAINT)) {
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is leaving forced maintenance but remains in maintenance",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+
+ if (s->track) /* normally it's mandatory here */
+ chunk_appendf(tmptrash, " via %s/%s",
+ s->track->proxy->id, s->track->id);
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+ }
+ if (!(s->next_admin & SRV_ADMF_RMAINT) && (s->cur_admin & SRV_ADMF_RMAINT)) {
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s ('%s') resolves again but remains in maintenance",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id, s->hostname);
+
+ if (s->track) /* normally it's mandatory here */
+ chunk_appendf(tmptrash, " via %s/%s",
+ s->track->proxy->id, s->track->id);
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+ }
+ else if (!(s->next_admin & SRV_ADMF_IMAINT) && (s->cur_admin & SRV_ADMF_IMAINT)) {
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s remains in forced maintenance",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+ }
+ /* don't report anything when leaving drain mode and remaining in maintenance */
+
+ s->cur_admin = s->next_admin;
+ }
+
+ if (!(s->next_admin & SRV_ADMF_MAINT)) {
+ if (!(s->cur_admin & SRV_ADMF_DRAIN) && (s->next_admin & SRV_ADMF_DRAIN)) {
+ /* drain state is applied only if not yet in maint */
+
+ s->last_change = now.tv_sec;
+ if (px->lbprm.set_server_status_down)
+ px->lbprm.set_server_status_down(s);
+
+ /* we might have streams queued on this server and waiting for
+ * a connection. Those which are redispatchable will be queued
+ * to another server or to the proxy itself.
+ */
+ xferred = pendconn_redistribute(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash, "%sServer %s/%s enters drain state%s%s%s",
+ s->flags & SRV_F_BACKUP ? "Backup " : "", s->proxy->id, s->id,
+ *(s->adm_st_chg_cause) ? " (" : "", s->adm_st_chg_cause, *(s->adm_st_chg_cause) ? ")" : "");
+
+ srv_append_status(tmptrash, s, NULL, xferred, (s->next_admin & SRV_ADMF_FDRAIN));
+
+ if (!(global.mode & MODE_STARTING)) {
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ send_email_alert(s, LOG_NOTICE, "%s",
+ tmptrash->area);
+ }
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy);
+ }
+ else if ((s->cur_admin & SRV_ADMF_DRAIN) && !(s->next_admin & SRV_ADMF_DRAIN)) {
+ /* OK completely leaving drain mode */
+ if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
+ if (s->proxy->last_change < now.tv_sec) // ignore negative times
+ s->proxy->down_time += now.tv_sec - s->proxy->last_change;
+ s->proxy->last_change = now.tv_sec;
+ }
+
+ if (s->last_change < now.tv_sec) // ignore negative times
+ s->down_time += now.tv_sec - s->last_change;
+ s->last_change = now.tv_sec;
+ server_recalc_eweight(s, 0);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ if (!(s->next_admin & SRV_ADMF_FDRAIN)) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is %s (leaving forced drain)",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id,
+ (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP");
+ }
+ else {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is %s (leaving drain)",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id,
+ (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP");
+ if (s->track) /* normally it's mandatory here */
+ chunk_appendf(tmptrash, " via %s/%s",
+ s->track->proxy->id, s->track->id);
+ }
+
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+
+ /* now propagate the status change to any LB algorithms */
+ if (px->lbprm.update_server_eweight)
+ px->lbprm.update_server_eweight(s);
+ else if (srv_willbe_usable(s)) {
+ if (px->lbprm.set_server_status_up)
+ px->lbprm.set_server_status_up(s);
+ }
+ else {
+ if (px->lbprm.set_server_status_down)
+ px->lbprm.set_server_status_down(s);
+ }
+ }
+ else if ((s->next_admin & SRV_ADMF_DRAIN)) {
+ /* remaining in drain mode after removing one of its flags */
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ if (!(s->next_admin & SRV_ADMF_FDRAIN)) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is leaving forced drain but remains in drain mode",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+
+ if (s->track) /* normally it's mandatory here */
+ chunk_appendf(tmptrash, " via %s/%s",
+ s->track->proxy->id, s->track->id);
+ }
+ else {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s remains in forced drain mode",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+ }
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+
+ /* commit new admin status */
+
+ s->cur_admin = s->next_admin;
+ }
+ }
+
+ /* Re-set log strings to empty */
+ *s->adm_st_chg_cause = 0;
+}
+
+struct task *srv_cleanup_toremove_conns(struct task *task, void *context, unsigned int state)
+{
+ struct connection *conn;
+
+ while ((conn = MT_LIST_POP(&idle_conns[tid].toremove_conns,
+ struct connection *, toremove_list)) != NULL) {
+ conn->mux->destroy(conn->ctx);
+ }
+
+ return task;
+}
+
+/* Move toremove_nb connections from idle_tree to toremove_list, -1 means
+ * moving them all.
+ * Returns the number of connections moved.
+ *
+ * Must be called with idle_conns_lock held.
+ */
+static int srv_migrate_conns_to_remove(struct eb_root *idle_tree, struct mt_list *toremove_list, int toremove_nb)
+{
+ struct eb_node *node, *next;
+ struct conn_hash_node *hash_node;
+ int i = 0;
+
+ node = eb_first(idle_tree);
+ while (node) {
+ next = eb_next(node);
+ if (toremove_nb != -1 && i >= toremove_nb)
+ break;
+
+ hash_node = ebmb_entry(node, struct conn_hash_node, node);
+ eb_delete(node);
+ MT_LIST_APPEND(toremove_list, &hash_node->conn->toremove_list);
+ i++;
+
+ node = next;
+ }
+ return i;
+}
+/* cleanup connections for a given server
+ * might be useful when going on forced maintenance or live changing ip/port
+ */
+static void srv_cleanup_connections(struct server *srv)
+{
+ int did_remove;
+ int i;
+
+ /* nothing to do if pool-max-conn is null */
+ if (!srv->max_idle_conns)
+ return;
+
+ /* check all threads starting with ours */
+ for (i = tid;;) {
+ did_remove = 0;
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
+ if (srv_migrate_conns_to_remove(&srv->per_thr[i].idle_conns, &idle_conns[i].toremove_conns, -1) > 0)
+ did_remove = 1;
+ if (srv_migrate_conns_to_remove(&srv->per_thr[i].safe_conns, &idle_conns[i].toremove_conns, -1) > 0)
+ did_remove = 1;
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
+ if (did_remove)
+ task_wakeup(idle_conns[i].cleanup_task, TASK_WOKEN_OTHER);
+
+ if ((i = ((i + 1 == global.nbthread) ? 0 : i + 1)) == tid)
+ break;
+ }
+}
+
+/* removes an idle conn after updating the server idle conns counters */
+void srv_release_conn(struct server *srv, struct connection *conn)
+{
+ if (conn->flags & CO_FL_LIST_MASK) {
+ /* The connection is currently in the server's idle list, so tell it
+ * there's one less connection available in that list.
+ */
+ _HA_ATOMIC_DEC(&srv->curr_idle_conns);
+ _HA_ATOMIC_DEC(conn->flags & CO_FL_SAFE_LIST ? &srv->curr_safe_nb : &srv->curr_idle_nb);
+ _HA_ATOMIC_DEC(&srv->curr_idle_thr[tid]);
+ }
+ else {
+ /* The connection is not private and not in any server's idle
+ * list, so decrement the current number of used connections
+ */
+ _HA_ATOMIC_DEC(&srv->curr_used_conns);
+ }
+
+ /* Remove the connection from any tree (safe, idle or available) */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn_delete_from_tree(&conn->hash_node->node);
+ conn->flags &= ~CO_FL_LIST_MASK;
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+}
+
+/* retrieve a connection from its <hash> in <tree>
+ * returns NULL if no connection found
+ */
+struct connection *srv_lookup_conn(struct eb_root *tree, uint64_t hash)
+{
+ struct eb64_node *node = NULL;
+ struct connection *conn = NULL;
+ struct conn_hash_node *hash_node = NULL;
+
+ node = eb64_lookup(tree, hash);
+ if (node) {
+ hash_node = ebmb_entry(node, struct conn_hash_node, node);
+ conn = hash_node->conn;
+ }
+
+ return conn;
+}
+
+/* retrieve the next connection sharing the same hash as <conn>
+ * returns NULL if no connection found
+ */
+struct connection *srv_lookup_conn_next(struct connection *conn)
+{
+ struct eb64_node *node = NULL;
+ struct connection *next_conn = NULL;
+ struct conn_hash_node *hash_node = NULL;
+
+ node = eb64_next_dup(&conn->hash_node->node);
+ if (node) {
+ hash_node = eb64_entry(node, struct conn_hash_node, node);
+ next_conn = hash_node->conn;
+ }
+
+ return next_conn;
+}
+
+/* This adds an idle connection to the server's list if the connection is
+ * reusable, not held by any owner anymore, but still has available streams.
+ */
+int srv_add_to_idle_list(struct server *srv, struct connection *conn, int is_safe)
+{
+ /* we try to keep the connection in the server's idle list
+ * if we don't have too many FD in use, and if the number of
+ * idle+current conns is lower than what was observed before
+ * last purge, or if we already don't have idle conns for the
+ * current thread and we don't exceed last count by global.nbthread.
+ */
+ if (!(conn->flags & CO_FL_PRIVATE) &&
+ srv && srv->pool_purge_delay > 0 &&
+ ((srv->proxy->options & PR_O_REUSE_MASK) != PR_O_REUSE_NEVR) &&
+ ha_used_fds < global.tune.pool_high_count &&
+ (srv->max_idle_conns == -1 || srv->max_idle_conns > srv->curr_idle_conns) &&
+ ((eb_is_empty(&srv->per_thr[tid].safe_conns) &&
+ (is_safe || eb_is_empty(&srv->per_thr[tid].idle_conns))) ||
+ (ha_used_fds < global.tune.pool_low_count &&
+ (srv->curr_used_conns + srv->curr_idle_conns <=
+ MAX(srv->curr_used_conns, srv->est_need_conns) + srv->low_idle_conns))) &&
+ !conn->mux->used_streams(conn) && conn->mux->avail_streams(conn)) {
+ int retadd;
+
+ retadd = _HA_ATOMIC_ADD_FETCH(&srv->curr_idle_conns, 1);
+ if (retadd > srv->max_idle_conns) {
+ _HA_ATOMIC_DEC(&srv->curr_idle_conns);
+ return 0;
+ }
+ _HA_ATOMIC_DEC(&srv->curr_used_conns);
+
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn_delete_from_tree(&conn->hash_node->node);
+
+ if (is_safe) {
+ conn->flags = (conn->flags & ~CO_FL_LIST_MASK) | CO_FL_SAFE_LIST;
+ eb64_insert(&srv->per_thr[tid].safe_conns, &conn->hash_node->node);
+ _HA_ATOMIC_INC(&srv->curr_safe_nb);
+ } else {
+ conn->flags = (conn->flags & ~CO_FL_LIST_MASK) | CO_FL_IDLE_LIST;
+ eb64_insert(&srv->per_thr[tid].idle_conns, &conn->hash_node->node);
+ _HA_ATOMIC_INC(&srv->curr_idle_nb);
+ }
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ _HA_ATOMIC_INC(&srv->curr_idle_thr[tid]);
+
+ __ha_barrier_full();
+ if ((volatile void *)srv->idle_node.node.leaf_p == NULL) {
+ HA_SPIN_LOCK(OTHER_LOCK, &idle_conn_srv_lock);
+ if ((volatile void *)srv->idle_node.node.leaf_p == NULL) {
+ srv->idle_node.key = tick_add(srv->pool_purge_delay,
+ now_ms);
+ eb32_insert(&idle_conn_srv, &srv->idle_node);
+ if (!task_in_wq(idle_conn_task) && !
+ task_in_rq(idle_conn_task)) {
+ task_schedule(idle_conn_task,
+ srv->idle_node.key);
+ }
+
+ }
+ HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conn_srv_lock);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+struct task *srv_cleanup_idle_conns(struct task *task, void *context, unsigned int state)
+{
+ struct server *srv;
+ struct eb32_node *eb;
+ int i;
+ unsigned int next_wakeup;
+
+ next_wakeup = TICK_ETERNITY;
+ HA_SPIN_LOCK(OTHER_LOCK, &idle_conn_srv_lock);
+ while (1) {
+ int exceed_conns;
+ int to_kill;
+ int curr_idle;
+
+ eb = eb32_lookup_ge(&idle_conn_srv, now_ms - TIMER_LOOK_BACK);
+ if (!eb) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now.
+ */
+
+ eb = eb32_first(&idle_conn_srv);
+ if (likely(!eb))
+ break;
+ }
+ if (tick_is_lt(now_ms, eb->key)) {
+ /* timer not expired yet, revisit it later */
+ next_wakeup = eb->key;
+ break;
+ }
+ srv = eb32_entry(eb, struct server, idle_node);
+
+ /* Calculate how many idle connections we want to kill :
+ * we want to remove half the difference between the total
+ * of established connections (used or idle) and the max
+ * number of used connections.
+ */
+ curr_idle = srv->curr_idle_conns;
+ if (curr_idle == 0)
+ goto remove;
+ exceed_conns = srv->curr_used_conns + curr_idle - MAX(srv->max_used_conns, srv->est_need_conns);
+ exceed_conns = to_kill = exceed_conns / 2 + (exceed_conns & 1);
+
+ srv->est_need_conns = (srv->est_need_conns + srv->max_used_conns) / 2;
+ if (srv->est_need_conns < srv->max_used_conns)
+ srv->est_need_conns = srv->max_used_conns;
+
+ HA_ATOMIC_STORE(&srv->max_used_conns, srv->curr_used_conns);
+
+ if (exceed_conns <= 0)
+ goto remove;
+
+ /* check all threads starting with ours */
+ for (i = tid;;) {
+ int max_conn;
+ int j;
+ int did_remove = 0;
+
+ max_conn = (exceed_conns * srv->curr_idle_thr[i]) /
+ curr_idle + 1;
+
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
+ j = srv_migrate_conns_to_remove(&srv->per_thr[i].idle_conns, &idle_conns[i].toremove_conns, max_conn);
+ if (j > 0)
+ did_remove = 1;
+ if (max_conn - j > 0 &&
+ srv_migrate_conns_to_remove(&srv->per_thr[i].safe_conns, &idle_conns[i].toremove_conns, max_conn - j) > 0)
+ did_remove = 1;
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
+
+ if (did_remove)
+ task_wakeup(idle_conns[i].cleanup_task, TASK_WOKEN_OTHER);
+
+ if ((i = ((i + 1 == global.nbthread) ? 0 : i + 1)) == tid)
+ break;
+ }
+remove:
+ eb32_delete(&srv->idle_node);
+
+ if (srv->curr_idle_conns) {
+ /* There are still more idle connections, add the
+ * server back in the tree.
+ */
+ srv->idle_node.key = tick_add(srv->pool_purge_delay, now_ms);
+ eb32_insert(&idle_conn_srv, &srv->idle_node);
+ next_wakeup = tick_first(next_wakeup, srv->idle_node.key);
+ }
+ }
+ HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conn_srv_lock);
+
+ task->expire = next_wakeup;
+ return task;
+}
+
+/* Close remaining idle connections. This functions is designed to be run on
+ * process shutdown. This guarantees a proper socket shutdown to avoid
+ * TIME_WAIT state. For a quick operation, only ctrl is closed, xprt stack is
+ * bypassed.
+ *
+ * This function is not thread-safe so it must only be called via a global
+ * deinit function.
+ */
+static void srv_close_idle_conns(struct server *srv)
+{
+ struct eb_root **cleaned_tree;
+ int i;
+
+ for (i = 0; i < global.nbthread; ++i) {
+ struct eb_root *conn_trees[] = {
+ &srv->per_thr[i].idle_conns,
+ &srv->per_thr[i].safe_conns,
+ &srv->per_thr[i].avail_conns,
+ NULL
+ };
+
+ for (cleaned_tree = conn_trees; *cleaned_tree; ++cleaned_tree) {
+ while (!eb_is_empty(*cleaned_tree)) {
+ struct ebmb_node *node = ebmb_first(*cleaned_tree);
+ struct conn_hash_node *conn_hash_node = ebmb_entry(node, struct conn_hash_node, node);
+ struct connection *conn = conn_hash_node->conn;
+
+ if (conn->ctrl->ctrl_close)
+ conn->ctrl->ctrl_close(conn);
+ ebmb_delete(node);
+ }
+ }
+ }
+}
+
+REGISTER_SERVER_DEINIT(srv_close_idle_conns);
+
+/* config parser for global "tune.idle-pool.shared", accepts "on" or "off" */
+static int cfg_parse_idle_pool_shared(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ global.tune.options |= GTUNE_IDLE_POOL_SHARED;
+ else if (strcmp(args[1], "off") == 0)
+ global.tune.options &= ~GTUNE_IDLE_POOL_SHARED;
+ else {
+ memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config parser for global "tune.pool-{low,high}-fd-ratio" */
+static int cfg_parse_pool_fd_ratio(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int arg = -1;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) != 0)
+ arg = atoi(args[1]);
+
+ if (arg < 0 || arg > 100) {
+ memprintf(err, "'%s' expects an integer argument between 0 and 100.", args[0]);
+ return -1;
+ }
+
+ if (args[0][10] == 'h')
+ global.tune.pool_high_ratio = arg;
+ else
+ global.tune.pool_low_ratio = arg;
+ return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.idle-pool.shared", cfg_parse_idle_pool_shared },
+ { CFG_GLOBAL, "tune.pool-high-fd-ratio", cfg_parse_pool_fd_ratio },
+ { CFG_GLOBAL, "tune.pool-low-fd-ratio", cfg_parse_pool_fd_ratio },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/server_state.c b/src/server_state.c
new file mode 100644
index 0000000..285d23e
--- /dev/null
+++ b/src/server_state.c
@@ -0,0 +1,931 @@
+/*
+ * Server-state management functions.
+ *
+ * Copyright (C) 2021 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+
+#include <import/eb64tree.h>
+#include <import/ebistree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/backend.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/log.h>
+#include <haproxy/port_range.h>
+#include <haproxy/proxy.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/server.h>
+#include <haproxy/tools.h>
+#include <haproxy/xxhash.h>
+
+
+/* Update a server state using the parameters available in the params list.
+ * The caller must provide a supported version
+ * Grabs the server lock during operation.
+ */
+static void srv_state_srv_update(struct server *srv, int version, char **params)
+{
+ char *p;
+ struct buffer *msg;
+ const char *warning;
+
+ /* fields since version 1
+ * and common to all other upcoming versions
+ */
+ enum srv_state srv_op_state;
+ enum srv_admin srv_admin_state;
+ unsigned srv_uweight, srv_iweight;
+ unsigned long srv_last_time_change;
+ short srv_check_status;
+ enum chk_result srv_check_result;
+ int srv_check_health;
+ int srv_check_state, srv_agent_state;
+ int bk_f_forced_id;
+ int srv_f_forced_id;
+ int fqdn_set_by_cli;
+ const char *fqdn;
+ const char *port_st;
+ unsigned int port_svc;
+ char *srvrecord;
+ char *addr;
+ int partial_apply = 0;
+#ifdef USE_OPENSSL
+ int use_ssl;
+#endif
+
+ fqdn = NULL;
+ port_svc = 0;
+ msg = alloc_trash_chunk();
+ if (!msg)
+ goto end;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+
+ /* Only version 1 supported for now, don't check it. Fields are :
+ * srv_addr: params[0]
+ * srv_op_state: params[1]
+ * srv_admin_state: params[2]
+ * srv_uweight: params[3]
+ * srv_iweight: params[4]
+ * srv_last_time_change: params[5]
+ * srv_check_status: params[6]
+ * srv_check_result: params[7]
+ * srv_check_health: params[8]
+ * srv_check_state: params[9]
+ * srv_agent_state: params[10]
+ * bk_f_forced_id: params[11]
+ * srv_f_forced_id: params[12]
+ * srv_fqdn: params[13]
+ * srv_port: params[14]
+ * srvrecord: params[15]
+ * srv_use_ssl: params[16]
+ * srv_check_port: params[17]
+ * srv_check_addr: params[18]
+ * srv_agent_addr: params[19]
+ * srv_agent_port: params[20]
+ */
+
+ /* validating srv_op_state */
+ p = NULL;
+ errno = 0;
+ srv_op_state = strtol(params[1], &p, 10);
+ if ((p == params[1]) || errno == EINVAL || errno == ERANGE ||
+ (srv_op_state != SRV_ST_STOPPED &&
+ srv_op_state != SRV_ST_STARTING &&
+ srv_op_state != SRV_ST_RUNNING &&
+ srv_op_state != SRV_ST_STOPPING)) {
+ chunk_appendf(msg, ", invalid srv_op_state value '%s'", params[1]);
+ }
+
+ /* validating srv_admin_state */
+ p = NULL;
+ errno = 0;
+ srv_admin_state = strtol(params[2], &p, 10);
+ fqdn_set_by_cli = !!(srv_admin_state & SRV_ADMF_HMAINT);
+
+ /* inherited statuses will be recomputed later.
+ * Also disable SRV_ADMF_HMAINT flag (set from stats socket fqdn).
+ */
+ srv_admin_state &= ~SRV_ADMF_IDRAIN & ~SRV_ADMF_IMAINT & ~SRV_ADMF_HMAINT & ~SRV_ADMF_RMAINT;
+
+ if ((p == params[2]) || errno == EINVAL || errno == ERANGE ||
+ (srv_admin_state != 0 &&
+ srv_admin_state != SRV_ADMF_FMAINT &&
+ srv_admin_state != SRV_ADMF_CMAINT &&
+ srv_admin_state != (SRV_ADMF_CMAINT | SRV_ADMF_FMAINT) &&
+ srv_admin_state != (SRV_ADMF_CMAINT | SRV_ADMF_FDRAIN) &&
+ srv_admin_state != SRV_ADMF_FDRAIN)) {
+ chunk_appendf(msg, ", invalid srv_admin_state value '%s'", params[2]);
+ }
+
+ /* validating srv_uweight */
+ p = NULL;
+ errno = 0;
+ srv_uweight = strtol(params[3], &p, 10);
+ if ((p == params[3]) || errno == EINVAL || errno == ERANGE || (srv_uweight > SRV_UWGHT_MAX))
+ chunk_appendf(msg, ", invalid srv_uweight value '%s'", params[3]);
+
+ /* validating srv_iweight */
+ p = NULL;
+ errno = 0;
+ srv_iweight = strtol(params[4], &p, 10);
+ if ((p == params[4]) || errno == EINVAL || errno == ERANGE || (srv_iweight > SRV_UWGHT_MAX))
+ chunk_appendf(msg, ", invalid srv_iweight value '%s'", params[4]);
+
+ /* validating srv_last_time_change */
+ p = NULL;
+ errno = 0;
+ srv_last_time_change = strtol(params[5], &p, 10);
+ if ((p == params[5]) || errno == EINVAL || errno == ERANGE)
+ chunk_appendf(msg, ", invalid srv_last_time_change value '%s'", params[5]);
+
+ /* validating srv_check_status */
+ p = NULL;
+ errno = 0;
+ srv_check_status = strtol(params[6], &p, 10);
+ if (p == params[6] || errno == EINVAL || errno == ERANGE ||
+ (srv_check_status >= HCHK_STATUS_SIZE))
+ chunk_appendf(msg, ", invalid srv_check_status value '%s'", params[6]);
+
+ /* validating srv_check_result */
+ p = NULL;
+ errno = 0;
+ srv_check_result = strtol(params[7], &p, 10);
+ if ((p == params[7]) || errno == EINVAL || errno == ERANGE ||
+ (srv_check_result != CHK_RES_UNKNOWN &&
+ srv_check_result != CHK_RES_NEUTRAL &&
+ srv_check_result != CHK_RES_FAILED &&
+ srv_check_result != CHK_RES_PASSED &&
+ srv_check_result != CHK_RES_CONDPASS)) {
+ chunk_appendf(msg, ", invalid srv_check_result value '%s'", params[7]);
+ }
+
+ /* validating srv_check_health */
+ p = NULL;
+ errno = 0;
+ srv_check_health = strtol(params[8], &p, 10);
+ if (p == params[8] || errno == EINVAL || errno == ERANGE)
+ chunk_appendf(msg, ", invalid srv_check_health value '%s'", params[8]);
+
+ /* validating srv_check_state */
+ p = NULL;
+ errno = 0;
+ srv_check_state = strtol(params[9], &p, 10);
+ if (p == params[9] || errno == EINVAL || errno == ERANGE ||
+ (srv_check_state & ~(CHK_ST_INPROGRESS | CHK_ST_CONFIGURED | CHK_ST_ENABLED | CHK_ST_PAUSED | CHK_ST_AGENT)))
+ chunk_appendf(msg, ", invalid srv_check_state value '%s'", params[9]);
+
+ /* validating srv_agent_state */
+ p = NULL;
+ errno = 0;
+ srv_agent_state = strtol(params[10], &p, 10);
+ if (p == params[10] || errno == EINVAL || errno == ERANGE ||
+ (srv_agent_state & ~(CHK_ST_INPROGRESS | CHK_ST_CONFIGURED | CHK_ST_ENABLED | CHK_ST_PAUSED | CHK_ST_AGENT)))
+ chunk_appendf(msg, ", invalid srv_agent_state value '%s'", params[10]);
+
+ /* validating bk_f_forced_id */
+ p = NULL;
+ errno = 0;
+ bk_f_forced_id = strtol(params[11], &p, 10);
+ if (p == params[11] || errno == EINVAL || errno == ERANGE || !((bk_f_forced_id == 0) || (bk_f_forced_id == 1)))
+ chunk_appendf(msg, ", invalid bk_f_forced_id value '%s'", params[11]);
+
+ /* validating srv_f_forced_id */
+ p = NULL;
+ errno = 0;
+ srv_f_forced_id = strtol(params[12], &p, 10);
+ if (p == params[12] || errno == EINVAL || errno == ERANGE || !((srv_f_forced_id == 0) || (srv_f_forced_id == 1)))
+ chunk_appendf(msg, ", invalid srv_f_forced_id value '%s'", params[12]);
+
+ /* validating srv_fqdn */
+ fqdn = params[13];
+ if (fqdn && *fqdn == '-')
+ fqdn = NULL;
+ if (fqdn && (strlen(fqdn) > DNS_MAX_NAME_SIZE || invalid_domainchar(fqdn))) {
+ chunk_appendf(msg, ", invalid srv_fqdn value '%s'", params[13]);
+ fqdn = NULL;
+ }
+
+ port_st = params[14];
+ if (port_st) {
+ port_svc = strl2uic(port_st, strlen(port_st));
+ if (port_svc > USHRT_MAX) {
+ chunk_appendf(msg, ", invalid srv_port value '%s'", port_st);
+ port_st = NULL;
+ }
+ }
+
+ /* SRV record
+ * NOTE: in HAProxy, SRV records must start with an underscore '_'
+ */
+ srvrecord = params[15];
+ if (srvrecord && *srvrecord != '_')
+ srvrecord = NULL;
+
+ /* don't apply anything if one error has been detected */
+ if (msg->data)
+ goto out;
+ partial_apply = 1;
+
+ /* recover operational state and apply it to this server
+ * and all servers tracking this one */
+ srv->check.health = srv_check_health;
+ switch (srv_op_state) {
+ case SRV_ST_STOPPED:
+ srv->check.health = 0;
+ srv_set_stopped(srv, "changed from server-state after a reload", NULL);
+ break;
+ case SRV_ST_STARTING:
+ /* If rise == 1 there is no STARTING state, let's switch to
+ * RUNNING
+ */
+ if (srv->check.rise == 1) {
+ srv->check.health = srv->check.rise + srv->check.fall - 1;
+ srv_set_running(srv, "", NULL);
+ break;
+ }
+ if (srv->check.health < 1 || srv->check.health >= srv->check.rise)
+ srv->check.health = srv->check.rise - 1;
+ srv->next_state = srv_op_state;
+ break;
+ case SRV_ST_STOPPING:
+ /* If fall == 1 there is no STOPPING state, let's switch to
+ * STOPPED
+ */
+ if (srv->check.fall == 1) {
+ srv->check.health = 0;
+ srv_set_stopped(srv, "changed from server-state after a reload", NULL);
+ break;
+ }
+ if (srv->check.health < srv->check.rise ||
+ srv->check.health > srv->check.rise + srv->check.fall - 2)
+ srv->check.health = srv->check.rise;
+ srv_set_stopping(srv, "changed from server-state after a reload", NULL);
+ break;
+ case SRV_ST_RUNNING:
+ srv->check.health = srv->check.rise + srv->check.fall - 1;
+ srv_set_running(srv, "", NULL);
+ break;
+ }
+
+ /* When applying server state, the following rules apply:
+ * - in case of a configuration change, we apply the setting from the new
+ * configuration, regardless of old running state
+ * - if no configuration change, we apply old running state only if old running
+ * state is different from new configuration state
+ */
+ /* configuration has changed */
+ if ((srv_admin_state & SRV_ADMF_CMAINT) != (srv->next_admin & SRV_ADMF_CMAINT)) {
+ if (srv->next_admin & SRV_ADMF_CMAINT)
+ srv_adm_set_maint(srv);
+ else
+ srv_adm_set_ready(srv);
+ }
+ /* configuration is the same, let's compate old running state and new conf state */
+ else {
+ if (srv_admin_state & SRV_ADMF_FMAINT && !(srv->next_admin & SRV_ADMF_CMAINT))
+ srv_adm_set_maint(srv);
+ else if (!(srv_admin_state & SRV_ADMF_FMAINT) && (srv->next_admin & SRV_ADMF_CMAINT))
+ srv_adm_set_ready(srv);
+ }
+ /* apply drain mode if server is currently enabled */
+ if (!(srv->next_admin & SRV_ADMF_FMAINT) && (srv_admin_state & SRV_ADMF_FDRAIN)) {
+ /* The SRV_ADMF_FDRAIN flag is inherited when srv->iweight is 0
+ * (srv->iweight is the weight set up in configuration).
+ * There are two possible reasons for FDRAIN to have been present :
+ * - previous config weight was zero
+ * - "set server b/s drain" was sent to the CLI
+ *
+ * In the first case, we simply want to drop this drain state
+ * if the new weight is not zero anymore, meaning the administrator
+ * has intentionally turned the weight back to a positive value to
+ * enable the server again after an operation. In the second case,
+ * the drain state was forced on the CLI regardless of the config's
+ * weight so we don't want a change to the config weight to lose this
+ * status. What this means is :
+ * - if previous weight was 0 and new one is >0, drop the DRAIN state.
+ * - if the previous weight was >0, keep it.
+ */
+ if (srv_iweight > 0 || srv->iweight == 0)
+ srv_adm_set_drain(srv);
+ }
+
+ srv->last_change = date.tv_sec - srv_last_time_change;
+ srv->check.status = srv_check_status;
+ srv->check.result = srv_check_result;
+
+ /* Only case we want to apply is removing ENABLED flag which could have been
+ * done by the "disable health" command over the stats socket
+ */
+ if ((srv->check.state & CHK_ST_CONFIGURED) &&
+ (srv_check_state & CHK_ST_CONFIGURED) &&
+ !(srv_check_state & CHK_ST_ENABLED))
+ srv->check.state &= ~CHK_ST_ENABLED;
+
+ /* Only case we want to apply is removing ENABLED flag which could have been
+ * done by the "disable agent" command over the stats socket
+ */
+ if ((srv->agent.state & CHK_ST_CONFIGURED) &&
+ (srv_agent_state & CHK_ST_CONFIGURED) &&
+ !(srv_agent_state & CHK_ST_ENABLED))
+ srv->agent.state &= ~CHK_ST_ENABLED;
+
+ /* We want to apply the previous 'running' weight (srv_uweight) only if there
+ * was no change in the configuration: both previous and new iweight are equals
+ *
+ * It means that a configuration file change has precedence over a unix socket change
+ * for server's weight
+ *
+ * by default, HAProxy applies the following weight when parsing the configuration
+ * srv->uweight = srv->iweight
+ */
+ if (srv_iweight == srv->iweight) {
+ srv->uweight = srv_uweight;
+ }
+ server_recalc_eweight(srv, 1);
+
+ /* load server IP address */
+ if (strcmp(params[0], "-") != 0)
+ srv->lastaddr = strdup(params[0]);
+
+ if (fqdn && srv->hostname) {
+ if (strcmp(srv->hostname, fqdn) == 0) {
+ /* Here we reset the 'set from stats socket FQDN' flag
+ * to support such transitions:
+ * Let's say initial FQDN value is foo1 (in configuration file).
+ * - FQDN changed from stats socket, from foo1 to foo2 value,
+ * - FQDN changed again from file configuration (with the same previous value
+ set from stats socket, from foo1 to foo2 value),
+ * - reload for any other reason than a FQDN modification,
+ * the configuration file FQDN matches the fqdn server state file value.
+ * So we must reset the 'set from stats socket FQDN' flag to be consistent with
+ * any further FQDN modification.
+ */
+ srv->next_admin &= ~SRV_ADMF_HMAINT;
+ }
+ else {
+ /* If the FDQN has been changed from stats socket,
+ * apply fqdn state file value (which is the value set
+ * from stats socket).
+ * Also ensure the runtime resolver will process this resolution.
+ */
+ if (fqdn_set_by_cli) {
+ srv_set_fqdn(srv, fqdn, 0);
+ srv->flags &= ~SRV_F_NO_RESOLUTION;
+ srv->next_admin |= SRV_ADMF_HMAINT;
+ }
+ }
+ }
+ /* If all the conditions below are validated, this means
+ * we're evaluating a server managed by SRV resolution
+ */
+ else if (fqdn && !srv->hostname && srvrecord) {
+ int res;
+ int i;
+ char *tmp;
+
+ /* we can't apply previous state if SRV record has changed */
+ if (!srv->srvrq) {
+ chunk_appendf(msg, ", no SRV resolution for server '%s'. Previous state not applied", srv->id);
+ goto out;
+ }
+ if (strcmp(srv->srvrq->name, srvrecord) != 0) {
+ chunk_appendf(msg, ", SRV record mismatch between configuration ('%s') and state file ('%s) for server '%s'. Previous state not applied", srv->srvrq->name, srvrecord, srv->id);
+ goto out;
+ }
+
+ /* prepare DNS resolution for this server */
+ res = srv_prepare_for_resolution(srv, fqdn);
+ if (res == -1) {
+ chunk_appendf(msg, ", can't allocate memory for DNS resolution for server '%s'", srv->id);
+ goto out;
+ }
+
+ /* Remove from available list and insert in tree
+ * since this server has an hostname
+ */
+ LIST_DEL_INIT(&srv->srv_rec_item);
+ srv->host_dn.key = tmp = strdup(srv->hostname_dn);
+
+ /* convert the key in lowercase because tree
+ * lookup is case sensitive but we don't care
+ */
+ for (i = 0; tmp[i]; i++)
+ tmp[i] = tolower(tmp[i]);
+
+ /* insert in tree and set the srvrq expiration date */
+ ebis_insert(&srv->srvrq->named_servers, &srv->host_dn);
+ task_schedule(srv->srvrq_check, tick_add(now_ms, srv->srvrq->resolvers->hold.timeout));
+
+ /* Unset SRV_F_MAPPORTS for SRV records.
+ * SRV_F_MAPPORTS is unfortunately set by parse_server()
+ * because no ports are provided in the configuration file.
+ * This is because HAProxy will use the port found into the SRV record.
+ */
+ srv->flags &= ~SRV_F_MAPPORTS;
+ }
+
+ if (port_st)
+ srv->svc_port = port_svc;
+
+
+ if (params[16]) {
+#ifdef USE_OPENSSL
+ use_ssl = strtol(params[16], &p, 10);
+
+ /* configure ssl if connection has been initiated at startup */
+ if (srv->ssl_ctx.ctx != NULL)
+ srv_set_ssl(srv, use_ssl);
+#endif
+ }
+
+ port_st = NULL;
+ if (params[17] && strcmp(params[17], "0") != 0)
+ port_st = params[17];
+ addr = NULL;
+ if (params[18] && strcmp(params[18], "-") != 0)
+ addr = params[18];
+ if (addr || port_st) {
+ warning = srv_update_check_addr_port(srv, addr, port_st);
+ if (warning) {
+ chunk_appendf(msg, ", %s", warning);
+ goto out;
+ }
+ }
+
+ port_st = NULL;
+ if (params[20] && strcmp(params[20], "0") != 0)
+ port_st = params[20];
+ addr = NULL;
+ if (params[19] && strcmp(params[19], "-") != 0)
+ addr = params[19];
+ if (addr || port_st) {
+ warning = srv_update_agent_addr_port(srv, addr, port_st);
+ if (warning) {
+ chunk_appendf(msg, ", %s", warning);
+ goto out;
+ }
+ }
+
+ out:
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ if (msg->data) {
+ if (partial_apply == 1)
+ ha_warning("server-state partially applied for server '%s/%s'%s\n",
+ srv->proxy->id, srv->id, msg->area);
+ else
+ ha_warning("server-state application failed for server '%s/%s'%s\n",
+ srv->proxy->id, srv->id, msg->area);
+ }
+ end:
+ free_trash_chunk(msg);
+}
+
+/*
+ * Loop on the proxy's servers and try to load its state from <st_tree> using
+ * srv_state_srv_update(). The proxy name and the server name are concatenated
+ * to form the key. If found the entry is removed from the tree.
+ */
+static void srv_state_px_update(const struct proxy *px, int vsn, struct eb_root *st_tree)
+{
+ struct server_state_line *st_line;
+ struct eb64_node *node;
+ struct server *srv;
+ unsigned long key;
+
+ for (srv = px->srv; srv; srv = srv->next) {
+ chunk_printf(&trash, "%s %s", px->id, srv->id);
+ key = XXH3(trash.area, trash.data, 0);
+ node = eb64_lookup(st_tree, key);
+ if (!node)
+ continue; /* next server */
+ st_line = eb64_entry(node, typeof(*st_line), node);
+ srv_state_srv_update(srv, vsn, st_line->params+4);
+
+ /* the node may be released now */
+ eb64_delete(node);
+ free(st_line->line);
+ free(st_line);
+ }
+}
+
+/*
+ * read next line from file <f> and return the server state version if one found.
+ * If no version is found, then 0 is returned
+ * Note that this should be the first read on <f>
+ */
+static int srv_state_get_version(FILE *f) {
+ char mybuf[SRV_STATE_LINE_MAXLEN];
+ char *endptr;
+ long int vsn;
+
+ /* first character of first line of the file must contain the version of the export */
+ if (fgets(mybuf, SRV_STATE_LINE_MAXLEN, f) == NULL)
+ return 0;
+
+ vsn = strtol(mybuf, &endptr, 10);
+ if (endptr == mybuf || *endptr != '\n') {
+ /* Empty or truncated line */
+ return 0;
+ }
+
+ if (vsn < SRV_STATE_FILE_VERSION_MIN || vsn > SRV_STATE_FILE_VERSION_MAX) {
+ /* Wrong version number */
+ return 0;
+ }
+
+ return vsn;
+}
+
+
+/*
+ * parses server state line stored in <buf> and supposedly in version <version>.
+ * Set <params> accordingly on success. It returns 1 on success, 0 if the line
+ * must be ignored and -1 on error.
+ * The caller must provide a supported version
+ */
+static int srv_state_parse_line(char *buf, const int version, char **params)
+{
+ int buflen, arg, ret;
+ char *cur;
+
+ buflen = strlen(buf);
+ cur = buf;
+ ret = 1; /* be optimistic and pretend a success */
+
+ /* we need at least one character and a non-truncated line */
+ if (buflen == 0 || buf[buflen - 1] != '\n') {
+ ret = -1;
+ goto out;
+ }
+
+ /* skip blank characters at the beginning of the line */
+ while (*cur == ' ' || *cur == '\t')
+ ++cur;
+
+ /* ignore empty or commented lines */
+ if (!*cur || *cur == '\n' || *cur == '#') {
+ ret = 0;
+ goto out;
+ }
+
+ /* Removes trailing '\n' to ease parsing */
+ buf[buflen - 1] = '\0';
+
+ /* we're now ready to move the line into <params> */
+ memset(params, 0, SRV_STATE_FILE_MAX_FIELDS * sizeof(*params));
+ arg = 0;
+ while (*cur) {
+ /* first of all, stop if there are too many fields */
+ if (arg >= SRV_STATE_FILE_MAX_FIELDS)
+ break;
+
+ /* then skip leading spaces */
+ while (*cur && (*cur == ' ' || *cur == '\t')) {
+ ++cur;
+ if (!*cur)
+ break;
+ }
+
+ /*
+ * idx:
+ * be_id: params[0]
+ * be_name: params[1]
+ * srv_id: params[2]
+ * srv_name: params[3]
+ * v1
+ * srv_addr: params[4]
+ * srv_op_state: params[5]
+ * srv_admin_state: params[6]
+ * srv_uweight: params[7]
+ * srv_iweight: params[8]
+ * srv_last_time_change: params[9]
+ * srv_check_status: params[10]
+ * srv_check_result: params[11]
+ * srv_check_health: params[12]
+ * srv_check_state: params[13]
+ * srv_agent_state: params[14]
+ * bk_f_forced_id: params[15]
+ * srv_f_forced_id: params[16]
+ * srv_fqdn: params[17]
+ * srv_port: params[18]
+ * srvrecord: params[19]
+ *
+ * srv_use_ssl: params[20] (optional field)
+ * srv_check_port: params[21] (optional field)
+ * srv_check_addr: params[22] (optional field)
+ * srv_agent_addr: params[23] (optional field)
+ * srv_agent_port: params[24] (optional field)
+ *
+ */
+ params[arg++] = cur;
+
+ /* look for the end of the current field */
+ while (*cur && *cur != ' ' && *cur != '\t') {
+ ++cur;
+ if (!*cur)
+ break;
+ }
+
+ /* otherwise, cut the field and move to the next one */
+ *cur++ = '\0';
+ }
+
+ /* if the number of fields does not match the version, then return an error */
+ if (version == 1 &&
+ (arg < SRV_STATE_FILE_MIN_FIELDS_VERSION_1 ||
+ arg > SRV_STATE_FILE_MAX_FIELDS_VERSION_1))
+ ret = -1;
+
+ out:
+ return ret;
+}
+
+
+/*
+ * parses a server state line using srv_state_parse_line() and store the result
+ * in <st_tree>. If an error occurred during the parsing, the line is
+ * ignored. if <px> is defined, it is used to check the backend id/name against
+ * the parsed params and to compute the key of the line.
+ */
+static int srv_state_parse_and_store_line(char *line, int vsn, struct eb_root *st_tree,
+ struct proxy *px)
+{
+ struct server_state_line *st_line;
+ int ret = 0;
+
+ /* store line in tree and duplicate the line */
+ st_line = calloc(1, sizeof(*st_line));
+ if (st_line == NULL)
+ goto skip_line;
+ st_line->line = strdup(line);
+ if (st_line->line == NULL)
+ goto skip_line;
+
+ ret = srv_state_parse_line(st_line->line, vsn, st_line->params);
+ if (ret <= 0)
+ goto skip_line;
+
+ /* Check backend name against params if <px> is defined */
+ if (px) {
+ int check_id = (atoi(st_line->params[0]) == px->uuid);
+ int check_name = (strcmp(px->id, st_line->params[1]) == 0);
+ int bk_f_forced_id = (atoi(st_line->params[15]) & PR_O_FORCED_ID);
+
+
+ if (!check_id && !check_name) {
+ /* backend does not match at all: skip the line */
+ goto skip_line;
+ }
+ else if (!check_id) {
+ /* Id mismatch: warn but continue */
+ ha_warning("Proxy '%s': backend ID mismatch: from server state file: '%s', from running config '%d'\n",
+ px->id, st_line->params[0], px->uuid);
+ send_log(px, LOG_NOTICE, "backend ID mismatch: from server state file: '%s', from running config '%d'\n",
+ st_line->params[0], px->uuid);
+ }
+ else if (!check_name) {
+ /* Name mismatch: warn and skip the line, except if the backend id was forced
+ * in the previous configuration */
+ ha_warning("Proxy '%s': backend name mismatch: from server state file: '%s', from running config '%s'\n",
+ px->id, st_line->params[1], px->id);
+ send_log(px, LOG_NOTICE, "backend name mismatch: from server state file: '%s', from running config '%s'\n",
+ st_line->params[1], px->id);
+ if (!bk_f_forced_id)
+ goto skip_line;
+ }
+ }
+
+ /*
+ * The key: "be_name srv_name"
+ * if <px> is defined: be_name == px->id
+ * otherwise: be_name == params[1]
+ */
+ chunk_printf(&trash, "%s %s", (px ? px->id : st_line->params[1]), st_line->params[3]);
+ st_line->node.key = XXH3(trash.area, trash.data, 0);
+ if (eb64_insert(st_tree, &st_line->node) != &st_line->node) {
+ /* this is a duplicate key, probably a hand-crafted file, drop it! */
+ goto skip_line;
+ }
+
+ return ret;
+
+ skip_line:
+ /* free up memory in case of error during the processing of the line */
+ if (st_line) {
+ free(st_line->line);
+ free(st_line);
+ }
+ return ret;
+}
+
+/* Helper function to get the server-state file path.
+ * If <filename> starts with a '/', it is considered as an absolute path. In
+ * this case or if <global.server_state_base> is not set, <filename> only is
+ * considered. Otherwise, the <global.server_state_base> is concatenated to
+ * <filename> to produce the file path and copied to <dst_path>. in both cases,
+ * the result must not exceeds <maxpathlen>.
+ *
+ * The len is returned on success or -1 if the path is too long. On error, the
+ * caller must not rely on <dst_path>.
+ */
+static inline int srv_state_get_filepath(char *dst_path, int maxpathlen, const char *filename)
+{
+ char *sep;
+ int len = 0;
+
+ /* create the globalfilepath variable */
+ if (*filename == '/' || !global.server_state_base) {
+ /* absolute path or no base directory provided */
+ len = strlcpy2(dst_path, filename, maxpathlen);
+ }
+ else {
+ /* concat base directory and global server-state file */
+ sep = (global.server_state_base[strlen(global.server_state_base)-1] != '/' ? "/": "");
+ len = snprintf(dst_path, maxpathlen, "%s%s%s", global.server_state_base, sep, filename);
+ }
+ return (len < maxpathlen ? len: -1);
+}
+
+
+/* This function parses all the proxies and only take care of the backends (since we're looking for server)
+ * For each proxy, it does the following:
+ * - opens its server state file (either one or local one)
+ * - read whole file, line by line
+ * - analyse each line to check if it matches our current backend:
+ * - backend name matches
+ * - backend id matches if id is forced and name doesn't match
+ * - if the server pointed by the line is found, then state is applied
+ *
+ * If the running backend uuid or id differs from the state file, then HAProxy reports
+ * a warning.
+ *
+ * Grabs the server's lock via srv_state_srv_update().
+ */
+void apply_server_state(void)
+{
+ /* tree where global state_file is loaded */
+ struct eb_root global_state_tree = EB_ROOT_UNIQUE;
+ struct proxy *curproxy;
+ struct server_state_line *st_line;
+ struct eb64_node *node, *next_node;
+ FILE *f;
+ char mybuf[SRV_STATE_LINE_MAXLEN];
+ char file[MAXPATHLEN];
+ int local_vsn, global_vsn, len, linenum;
+
+ global_vsn = 0; /* no global file */
+ if (!global.server_state_file)
+ goto no_globalfile;
+ len = srv_state_get_filepath(file, MAXPATHLEN, global.server_state_file);
+ if (len == -1) {
+ ha_warning("config: Can't load global server state file: file too long.\n");
+ goto no_globalfile;
+ }
+
+ /* Load global server state in a tree */
+ errno = 0;
+ f = fopen(file, "r");
+ if (!f) {
+ ha_warning("config: Can't open global server state file '%s': %s\n", file, strerror(errno));
+ goto no_globalfile;
+ }
+
+ global_vsn = srv_state_get_version(f);
+ if (global_vsn == 0) {
+ ha_warning("config: Can't get version of the global server state file '%s'.\n",
+ file);
+ goto close_globalfile;
+ }
+
+ for (linenum = 1; fgets(mybuf, SRV_STATE_LINE_MAXLEN, f); linenum++) {
+ int ret;
+
+ ret = srv_state_parse_and_store_line(mybuf, global_vsn, &global_state_tree, NULL);
+ if (ret == -1) {
+ ha_warning("config: corrupted global server state file '%s' at line %d.\n",
+ file, linenum);
+ global_vsn = 0;
+ break;
+ }
+ }
+
+ close_globalfile:
+ fclose(f);
+
+ no_globalfile:
+ /* parse all proxies and load states form tree (global file) or from local file */
+ for (curproxy = proxies_list; curproxy != NULL; curproxy = curproxy->next) {
+ struct eb_root local_state_tree = EB_ROOT_UNIQUE;
+
+ /* Must be an enabled backend with at least a server */
+ if (!(curproxy->cap & PR_CAP_BE) || (curproxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) || !curproxy->srv)
+ continue; /* next proxy */
+
+ /* Mode must be specified */
+ BUG_ON(curproxy->load_server_state_from_file == PR_SRV_STATE_FILE_UNSPEC);
+
+ /* No server-state file for this proxy */
+ if (curproxy->load_server_state_from_file == PR_SRV_STATE_FILE_NONE)
+ continue; /* next proxy */
+
+ if (curproxy->load_server_state_from_file == PR_SRV_STATE_FILE_GLOBAL) {
+ /* when global file is used, we get data from the tree
+ * Note that in such case we don't check backend name neither uuid.
+ * Backend name can't be wrong since it's used as a key to retrieve the server state
+ * line from the tree.
+ */
+ if (global_vsn)
+ srv_state_px_update(curproxy, global_vsn, &global_state_tree);
+ continue; /* next proxy */
+ }
+
+ /*
+ * Here we load a local server state-file
+ */
+
+ /* create file variable */
+ len = srv_state_get_filepath(file, MAXPATHLEN, curproxy->server_state_file_name);
+ if (len == -1) {
+ ha_warning("Proxy '%s': Can't load local server state file: file too long.\n", curproxy->id);
+ continue; /* next proxy */
+ }
+
+ /* Load local server state in a tree */
+ errno = 0;
+ f = fopen(file, "r");
+ if (!f) {
+ ha_warning("Proxy '%s': Can't open server state file '%s': %s.\n",
+ curproxy->id, file, strerror(errno));
+ continue; /* next proxy */
+ }
+
+ /* first character of first line of the file must contain the version of the export */
+ local_vsn = srv_state_get_version(f);
+ if (local_vsn == 0) {
+ ha_warning("Proxy '%s': Can't get version of the server state file '%s'.\n",
+ curproxy->id, file);
+ goto close_localfile;
+ }
+
+ /* First, parse lines of the local server-state file and store them in a eb-tree */
+ for (linenum = 1; fgets(mybuf, SRV_STATE_LINE_MAXLEN, f); linenum++) {
+ int ret;
+
+ ret = srv_state_parse_and_store_line(mybuf, local_vsn, &local_state_tree, curproxy);
+ if (ret == -1) {
+ ha_warning("Proxy '%s': corrupted server state file '%s' at line %d.\n",
+ curproxy->id, file, linenum);
+ local_vsn = 0;
+ break;
+ }
+ }
+
+ if (local_vsn)
+ srv_state_px_update(curproxy, local_vsn, &local_state_tree);
+
+ /* Remove unused server-state lines */
+ node = eb64_first(&local_state_tree);
+ while (node) {
+ st_line = eb64_entry(node, typeof(*st_line), node);
+ next_node = eb64_next(node);
+ eb64_delete(node);
+
+ if (local_vsn) {
+ /* if no server found, then warn */
+ ha_warning("Proxy '%s': can't find server '%s' in backend '%s'\n",
+ curproxy->id, st_line->params[3], curproxy->id);
+ send_log(curproxy, LOG_NOTICE, "can't find server '%s' in backend '%s'\n",
+ st_line->params[3], curproxy->id);
+ }
+
+ free(st_line->line);
+ free(st_line);
+ node = next_node;
+ }
+
+ close_localfile:
+ fclose(f);
+ }
+
+ node = eb64_first(&global_state_tree);
+ while (node) {
+ st_line = eb64_entry(node, typeof(*st_line), node);
+ next_node = eb64_next(node);
+ eb64_delete(node);
+ free(st_line->line);
+ free(st_line);
+ node = next_node;
+ }
+}
diff --git a/src/session.c b/src/session.c
new file mode 100644
index 0000000..66120d7
--- /dev/null
+++ b/src/session.c
@@ -0,0 +1,460 @@
+/*
+ * Session management functions.
+ *
+ * Copyright 2000-2015 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/connection.h>
+#include <haproxy/global.h>
+#include <haproxy/http.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/pool.h>
+#include <haproxy/proxy.h>
+#include <haproxy/session.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/tools.h>
+#include <haproxy/vars.h>
+
+
+DECLARE_POOL(pool_head_session, "session", sizeof(struct session));
+DECLARE_POOL(pool_head_sess_srv_list, "session server list",
+ sizeof(struct sess_srv_list));
+
+int conn_complete_session(struct connection *conn);
+
+/* Create a a new session and assign it to frontend <fe>, listener <li>,
+ * origin <origin>, set the current date and clear the stick counters pointers.
+ * Returns the session upon success or NULL. The session may be released using
+ * session_free(). Note: <li> may be NULL.
+ */
+struct session *session_new(struct proxy *fe, struct listener *li, enum obj_type *origin)
+{
+ struct session *sess;
+
+ sess = pool_alloc(pool_head_session);
+ if (sess) {
+ sess->listener = li;
+ sess->fe = fe;
+ sess->origin = origin;
+ sess->accept_date = date; /* user-visible date for logging */
+ sess->tv_accept = now; /* corrected date for internal use */
+ memset(sess->stkctr, 0, sizeof(sess->stkctr));
+ vars_init_head(&sess->vars, SCOPE_SESS);
+ sess->task = NULL;
+ sess->t_handshake = -1; /* handshake not done yet */
+ sess->t_idle = -1;
+ _HA_ATOMIC_INC(&totalconn);
+ _HA_ATOMIC_INC(&jobs);
+ LIST_INIT(&sess->srv_list);
+ sess->idle_conns = 0;
+ sess->flags = SESS_FL_NONE;
+ sess->src = NULL;
+ sess->dst = NULL;
+ }
+ return sess;
+}
+
+void session_free(struct session *sess)
+{
+ struct connection *conn, *conn_back;
+ struct sess_srv_list *srv_list, *srv_list_back;
+
+ if (sess->listener)
+ listener_release(sess->listener);
+ session_store_counters(sess);
+ vars_prune_per_sess(&sess->vars);
+ conn = objt_conn(sess->origin);
+ if (conn != NULL && conn->mux)
+ conn->mux->destroy(conn->ctx);
+ list_for_each_entry_safe(srv_list, srv_list_back, &sess->srv_list, srv_list) {
+ list_for_each_entry_safe(conn, conn_back, &srv_list->conn_list, session_list) {
+ LIST_DEL_INIT(&conn->session_list);
+ if (conn->mux) {
+ conn->owner = NULL;
+ conn->flags &= ~CO_FL_SESS_IDLE;
+ conn->mux->destroy(conn->ctx);
+ } else {
+ /* We have a connection, but not yet an associated mux.
+ * So destroy it now.
+ */
+ conn_stop_tracking(conn);
+ conn_full_close(conn);
+ conn_free(conn);
+ }
+ }
+ pool_free(pool_head_sess_srv_list, srv_list);
+ }
+ sockaddr_free(&sess->src);
+ sockaddr_free(&sess->dst);
+ pool_free(pool_head_session, sess);
+ _HA_ATOMIC_DEC(&jobs);
+}
+
+/* callback used from the connection/mux layer to notify that a connection is
+ * going to be released.
+ */
+void conn_session_free(struct connection *conn)
+{
+ session_free(conn->owner);
+ conn->owner = NULL;
+}
+
+/* count a new session to keep frontend, listener and track stats up to date */
+static void session_count_new(struct session *sess)
+{
+ struct stkctr *stkctr;
+ void *ptr;
+ int i;
+
+ proxy_inc_fe_sess_ctr(sess->listener, sess->fe);
+
+ for (i = 0; i < MAX_SESS_STKCTR; i++) {
+ stkctr = &sess->stkctr[i];
+ if (!stkctr_entry(stkctr))
+ continue;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_SESS_CNT);
+ if (ptr)
+ HA_ATOMIC_INC(&stktable_data_cast(ptr, std_t_uint));
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_SESS_RATE);
+ if (ptr)
+ update_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_SESS_RATE].u, 1);
+ }
+}
+
+/* This function is called from the protocol layer accept() in order to
+ * instantiate a new session on behalf of a given listener and frontend. It
+ * returns a positive value upon success, 0 if the connection can be ignored,
+ * or a negative value upon critical failure. The accepted connection is
+ * closed if we return <= 0. If no handshake is needed, it immediately tries
+ * to instantiate a new stream. The connection must already have been filled
+ * with the incoming connection handle (a fd), a target (the listener) and a
+ * source address.
+ */
+int session_accept_fd(struct connection *cli_conn)
+{
+ struct listener *l = __objt_listener(cli_conn->target);
+ struct proxy *p = l->bind_conf->frontend;
+ int cfd = cli_conn->handle.fd;
+ struct session *sess;
+ int ret;
+
+ ret = -1; /* assume unrecoverable error by default */
+
+ cli_conn->proxy_netns = l->rx.settings->netns;
+
+ if (conn_prepare(cli_conn, l->rx.proto, l->bind_conf->xprt) < 0)
+ goto out_free_conn;
+
+ conn_ctrl_init(cli_conn);
+
+ /* wait for a PROXY protocol header */
+ if (l->options & LI_O_ACC_PROXY)
+ cli_conn->flags |= CO_FL_ACCEPT_PROXY;
+
+ /* wait for a NetScaler client IP insertion protocol header */
+ if (l->options & LI_O_ACC_CIP)
+ cli_conn->flags |= CO_FL_ACCEPT_CIP;
+
+ /* Add the handshake pseudo-XPRT */
+ if (cli_conn->flags & (CO_FL_ACCEPT_PROXY | CO_FL_ACCEPT_CIP)) {
+ if (xprt_add_hs(cli_conn) != 0)
+ goto out_free_conn;
+ }
+ sess = session_new(p, l, &cli_conn->obj_type);
+ if (!sess)
+ goto out_free_conn;
+
+ conn_set_owner(cli_conn, sess, NULL);
+
+ /* now evaluate the tcp-request layer4 rules. We only need a session
+ * and no stream for these rules.
+ */
+ if ((l->options & LI_O_TCP_L4_RULES) && !tcp_exec_l4_rules(sess)) {
+ /* let's do a no-linger now to close with a single RST. */
+ if (!(cli_conn->flags & CO_FL_FDLESS))
+ setsockopt(cfd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
+ ret = 0; /* successful termination */
+ goto out_free_sess;
+ }
+ /* TCP rules may flag the connection as needing proxy protocol, now that it's done we can start ourxprt */
+ if (conn_xprt_start(cli_conn) < 0)
+ goto out_free_sess;
+
+ /* FIXME/WTA: we should implement the setsockopt() calls at the proto
+ * level instead and let non-inet protocols implement their own equivalent.
+ */
+ if (cli_conn->flags & CO_FL_FDLESS)
+ goto skip_fd_setup;
+
+ /* Adjust some socket options */
+ if (l->rx.addr.ss_family == AF_INET || l->rx.addr.ss_family == AF_INET6) {
+ setsockopt(cfd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one));
+
+ if (p->options & PR_O_TCP_CLI_KA) {
+ setsockopt(cfd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
+
+#ifdef TCP_KEEPCNT
+ if (p->clitcpka_cnt)
+ setsockopt(cfd, IPPROTO_TCP, TCP_KEEPCNT, &p->clitcpka_cnt, sizeof(p->clitcpka_cnt));
+#endif
+
+#ifdef TCP_KEEPIDLE
+ if (p->clitcpka_idle)
+ setsockopt(cfd, IPPROTO_TCP, TCP_KEEPIDLE, &p->clitcpka_idle, sizeof(p->clitcpka_idle));
+#endif
+
+#ifdef TCP_KEEPINTVL
+ if (p->clitcpka_intvl)
+ setsockopt(cfd, IPPROTO_TCP, TCP_KEEPINTVL, &p->clitcpka_intvl, sizeof(p->clitcpka_intvl));
+#endif
+ }
+
+ if (p->options & PR_O_TCP_NOLING)
+ HA_ATOMIC_OR(&fdtab[cfd].state, FD_LINGER_RISK);
+
+#if defined(TCP_MAXSEG)
+ if (l->maxseg < 0) {
+ /* we just want to reduce the current MSS by that value */
+ int mss;
+ socklen_t mss_len = sizeof(mss);
+ if (getsockopt(cfd, IPPROTO_TCP, TCP_MAXSEG, &mss, &mss_len) == 0) {
+ mss += l->maxseg; /* remember, it's < 0 */
+ setsockopt(cfd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss));
+ }
+ }
+#endif
+ }
+
+ if (global.tune.client_sndbuf)
+ setsockopt(cfd, SOL_SOCKET, SO_SNDBUF, &global.tune.client_sndbuf, sizeof(global.tune.client_sndbuf));
+
+ if (global.tune.client_rcvbuf)
+ setsockopt(cfd, SOL_SOCKET, SO_RCVBUF, &global.tune.client_rcvbuf, sizeof(global.tune.client_rcvbuf));
+
+ skip_fd_setup:
+ /* OK, now either we have a pending handshake to execute with and then
+ * we must return to the I/O layer, or we can proceed with the end of
+ * the stream initialization. In case of handshake, we also set the I/O
+ * timeout to the frontend's client timeout and register a task in the
+ * session for this purpose. The connection's owner is left to the
+ * session during this period.
+ *
+ * At this point we set the relation between sess/task/conn this way :
+ *
+ * +----------------- task
+ * | |
+ * orig -- sess <-- context |
+ * | ^ | |
+ * v | | |
+ * conn -- owner ---> task <-----+
+ */
+ if (cli_conn->flags & (CO_FL_WAIT_XPRT | CO_FL_EARLY_SSL_HS)) {
+ if (unlikely((sess->task = task_new_here()) == NULL))
+ goto out_free_sess;
+
+ sess->task->context = sess;
+ sess->task->nice = l->nice;
+ sess->task->process = session_expire_embryonic;
+ sess->task->expire = tick_add_ifset(now_ms, p->timeout.client);
+ task_queue(sess->task);
+ return 1;
+ }
+
+ /* OK let's complete stream initialization since there is no handshake */
+ if (conn_complete_session(cli_conn) >= 0)
+ return 1;
+
+ /* if we reach here we have deliberately decided not to keep this
+ * session (e.g. tcp-request rule), so that's not an error we should
+ * try to protect against.
+ */
+ ret = 0;
+
+ /* error unrolling */
+ out_free_sess:
+ /* prevent call to listener_release during session_free. It will be
+ * done below, for all errors. */
+ sess->listener = NULL;
+ session_free(sess);
+
+ out_free_conn:
+ if (ret < 0 && l->bind_conf->xprt == xprt_get(XPRT_RAW) &&
+ p->mode == PR_MODE_HTTP && l->bind_conf->mux_proto == NULL &&
+ !(cli_conn->flags & CO_FL_FDLESS)) {
+ /* critical error, no more memory, try to emit a 500 response */
+ send(cfd, http_err_msgs[HTTP_ERR_500], strlen(http_err_msgs[HTTP_ERR_500]),
+ MSG_DONTWAIT|MSG_NOSIGNAL);
+ }
+
+ conn_stop_tracking(cli_conn);
+ conn_full_close(cli_conn);
+ conn_free(cli_conn);
+ listener_release(l);
+ return ret;
+}
+
+
+/* prepare the trash with a log prefix for session <sess>. It only works with
+ * embryonic sessions based on a real connection. This function requires that
+ * at sess->origin points to the incoming connection.
+ */
+static void session_prepare_log_prefix(struct session *sess)
+{
+ const struct sockaddr_storage *src;
+ struct tm tm;
+ char pn[INET6_ADDRSTRLEN];
+ int ret;
+ char *end;
+
+ src = sess_src(sess);
+ ret = (src ? addr_to_str(src, pn, sizeof(pn)) : 0);
+ if (ret <= 0)
+ chunk_printf(&trash, "unknown [");
+ else if (ret == AF_UNIX)
+ chunk_printf(&trash, "%s:%d [", pn, sess->listener->luid);
+ else
+ chunk_printf(&trash, "%s:%d [", pn, get_host_port(src));
+
+ get_localtime(sess->accept_date.tv_sec, &tm);
+ end = date2str_log(trash.area + trash.data, &tm, &(sess->accept_date),
+ trash.size - trash.data);
+ trash.data = end - trash.area;
+ if (sess->listener->name)
+ chunk_appendf(&trash, "] %s/%s", sess->fe->id, sess->listener->name);
+ else
+ chunk_appendf(&trash, "] %s/%d", sess->fe->id, sess->listener->luid);
+}
+
+/* This function kills an existing embryonic session. It stops the connection's
+ * transport layer, releases assigned resources, resumes the listener if it was
+ * disabled and finally kills the file descriptor. This function requires that
+ * sess->origin points to the incoming connection.
+ */
+static void session_kill_embryonic(struct session *sess, unsigned int state)
+{
+ int level = LOG_INFO;
+ struct connection *conn = __objt_conn(sess->origin);
+ struct task *task = sess->task;
+ unsigned int log = sess->fe->to_log;
+ const char *err_msg;
+
+ if (sess->fe->options2 & PR_O2_LOGERRORS)
+ level = LOG_ERR;
+
+ if (log && (sess->fe->options & PR_O_NULLNOLOG)) {
+ /* with "option dontlognull", we don't log connections with no transfer */
+ if (!conn->err_code ||
+ conn->err_code == CO_ER_PRX_EMPTY || conn->err_code == CO_ER_PRX_ABORT ||
+ conn->err_code == CO_ER_CIP_EMPTY || conn->err_code == CO_ER_CIP_ABORT ||
+ conn->err_code == CO_ER_SSL_EMPTY || conn->err_code == CO_ER_SSL_ABORT)
+ log = 0;
+ }
+
+ if (log) {
+ if (!conn->err_code && (state & TASK_WOKEN_TIMER)) {
+ if (conn->flags & CO_FL_ACCEPT_PROXY)
+ conn->err_code = CO_ER_PRX_TIMEOUT;
+ else if (conn->flags & CO_FL_ACCEPT_CIP)
+ conn->err_code = CO_ER_CIP_TIMEOUT;
+ else if (conn->flags & CO_FL_SSL_WAIT_HS)
+ conn->err_code = CO_ER_SSL_TIMEOUT;
+ }
+
+ if(!LIST_ISEMPTY(&sess->fe->logformat_error)) {
+ /* Display a log line following the configured error-log-format. */
+ sess_log(sess);
+ }
+ else {
+ session_prepare_log_prefix(sess);
+ err_msg = conn_err_code_str(conn);
+ if (err_msg)
+ send_log(sess->fe, level, "%s: %s\n", trash.area,
+ err_msg);
+ else
+ send_log(sess->fe, level, "%s: unknown connection error (code=%d flags=%08x)\n",
+ trash.area, conn->err_code, conn->flags);
+ }
+ }
+
+ /* kill the connection now */
+ conn_stop_tracking(conn);
+ conn_full_close(conn);
+ conn_free(conn);
+ sess->origin = NULL;
+
+ task_destroy(task);
+ session_free(sess);
+}
+
+/* Manages the embryonic session timeout. It is only called when the timeout
+ * strikes and performs the required cleanup. It's only exported to make it
+ * resolve in "show tasks".
+ */
+struct task *session_expire_embryonic(struct task *t, void *context, unsigned int state)
+{
+ struct session *sess = context;
+
+ if (!(state & TASK_WOKEN_TIMER))
+ return t;
+
+ session_kill_embryonic(sess, state);
+ return NULL;
+}
+
+/* Finish initializing a session from a connection, or kills it if the
+ * connection shows and error. Returns <0 if the connection was killed. It may
+ * be called either asynchronously when ssl handshake is done with an embryonic
+ * session, or synchronously to finalize the session. The distinction is made
+ * on sess->task which is only set in the embryonic session case.
+ */
+int conn_complete_session(struct connection *conn)
+{
+ struct session *sess = conn->owner;
+
+ sess->t_handshake = tv_ms_elapsed(&sess->tv_accept, &now);
+
+ if (conn->flags & CO_FL_ERROR)
+ goto fail;
+
+ /* if logs require transport layer information, note it on the connection */
+ if (sess->fe->to_log & LW_XPRT)
+ conn->flags |= CO_FL_XPRT_TRACKED;
+
+ /* we may have some tcp-request-session rules */
+ if ((sess->listener->options & LI_O_TCP_L5_RULES) && !tcp_exec_l5_rules(sess))
+ goto fail;
+
+ session_count_new(sess);
+ if (conn_install_mux_fe(conn, NULL) < 0)
+ goto fail;
+
+ /* the embryonic session's task is not needed anymore */
+ task_destroy(sess->task);
+ sess->task = NULL;
+ conn_set_owner(conn, sess, conn_session_free);
+
+ return 0;
+
+ fail:
+ if (sess->task)
+ session_kill_embryonic(sess, 0);
+ return -1;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/sha1.c b/src/sha1.c
new file mode 100644
index 0000000..b7c2d70
--- /dev/null
+++ b/src/sha1.c
@@ -0,0 +1,308 @@
+/*
+ * Based on the git SHA1 Implementation.
+ *
+ * Copyright (C) 2009-2015, Linus Torvalds and others.
+ *
+ * SHA1 routine optimized to do word accesses rather than byte accesses,
+ * and to avoid unnecessary copies into the context array.
+ *
+ * This was initially based on the Mozilla SHA1 implementation, although
+ * none of the original Mozilla code remains.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* this is only to get definitions for memcpy(), ntohl() and htonl() */
+#include <string.h>
+#include <inttypes.h>
+#include <arpa/inet.h>
+
+#include <import/sha1.h>
+
+/*
+ * Performance might be improved if the CPU architecture is OK with
+ * unaligned 32-bit loads and a fast ntohl() is available.
+ * Otherwise fall back to byte loads and shifts which is portable,
+ * and is faster on architectures with memory alignment issues.
+ */
+
+#if defined(__i386__) || defined(__x86_64__) || \
+ defined(__ppc__) || defined(__ppc64__) || \
+ defined(__powerpc__) || defined(__powerpc64__) || \
+ defined(__s390__) || defined(__s390x__)
+
+#define get_be32(p) ntohl(*(unsigned int *)(p))
+#define put_be32(p, v) do { *(unsigned int *)(p) = htonl(v); } while (0)
+
+#else
+
+static inline uint32_t get_be32(const void *ptr)
+{
+ const unsigned char *p = ptr;
+ return (uint32_t)p[0] << 24 |
+ (uint32_t)p[1] << 16 |
+ (uint32_t)p[2] << 8 |
+ (uint32_t)p[3] << 0;
+}
+
+static inline void put_be32(void *ptr, uint32_t value)
+{
+ unsigned char *p = ptr;
+ p[0] = value >> 24;
+ p[1] = value >> 16;
+ p[2] = value >> 8;
+ p[3] = value >> 0;
+}
+
+#endif
+
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+
+/*
+ * Force usage of rol or ror by selecting the one with the smaller constant.
+ * It _can_ generate slightly smaller code (a constant of 1 is special), but
+ * perhaps more importantly it's possibly faster on any uarch that does a
+ * rotate with a loop.
+ */
+
+#define SHA_ASM(op, x, n) ({ unsigned int __res; __asm__(op " %1,%0":"=r" (__res):"i" (n), "0" (x)); __res; })
+#define SHA_ROL(x,n) SHA_ASM("rol", x, n)
+#define SHA_ROR(x,n) SHA_ASM("ror", x, n)
+
+#else
+
+#define SHA_ROT(X,l,r) (((X) << (l)) | ((X) >> (r)))
+#define SHA_ROL(X,n) SHA_ROT(X,n,32-(n))
+#define SHA_ROR(X,n) SHA_ROT(X,32-(n),n)
+
+#endif
+
+/*
+ * If you have 32 registers or more, the compiler can (and should)
+ * try to change the array[] accesses into registers. However, on
+ * machines with less than ~25 registers, that won't really work,
+ * and at least gcc will make an unholy mess of it.
+ *
+ * So to avoid that mess which just slows things down, we force
+ * the stores to memory to actually happen (we might be better off
+ * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as
+ * suggested by Artur Skawina - that will also make gcc unable to
+ * try to do the silly "optimize away loads" part because it won't
+ * see what the value will be).
+ *
+ * Ben Herrenschmidt reports that on PPC, the C version comes close
+ * to the optimized asm with this (ie on PPC you don't want that
+ * 'volatile', since there are lots of registers).
+ *
+ * On ARM we get the best code generation by forcing a full memory barrier
+ * between each SHA_ROUND, otherwise gcc happily get wild with spilling and
+ * the stack frame size simply explode and performance goes down the drain.
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+ #define setW(x, val) (*(volatile unsigned int *)&W(x) = (val))
+#elif defined(__GNUC__) && defined(__arm__)
+ #define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while (0)
+#else
+ #define setW(x, val) (W(x) = (val))
+#endif
+
+/* This "rolls" over the 512-bit array */
+#define W(x) (array[(x)&15])
+
+/*
+ * Where do we get the source from? The first 16 iterations get it from
+ * the input data, the next mix it from the 512-bit array.
+ */
+#define SHA_SRC(t) get_be32((unsigned char *) block + (t)*4)
+#define SHA_MIX(t) SHA_ROL(W((t)+13) ^ W((t)+8) ^ W((t)+2) ^ W(t), 1);
+
+#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \
+ unsigned int TEMP = input(t); setW(t, TEMP); \
+ E += TEMP + SHA_ROL(A,5) + (fn) + (constant); \
+ B = SHA_ROR(B, 2); } while (0)
+
+#define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
+#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
+#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E )
+#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E )
+#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6, A, B, C, D, E )
+
+static void blk_SHA1_Block(blk_SHA_CTX *ctx, const void *block)
+{
+ unsigned int A,B,C,D,E;
+ unsigned int array[16];
+
+ A = ctx->H[0];
+ B = ctx->H[1];
+ C = ctx->H[2];
+ D = ctx->H[3];
+ E = ctx->H[4];
+
+ /* Round 1 - iterations 0-16 take their input from 'block' */
+ T_0_15( 0, A, B, C, D, E);
+ T_0_15( 1, E, A, B, C, D);
+ T_0_15( 2, D, E, A, B, C);
+ T_0_15( 3, C, D, E, A, B);
+ T_0_15( 4, B, C, D, E, A);
+ T_0_15( 5, A, B, C, D, E);
+ T_0_15( 6, E, A, B, C, D);
+ T_0_15( 7, D, E, A, B, C);
+ T_0_15( 8, C, D, E, A, B);
+ T_0_15( 9, B, C, D, E, A);
+ T_0_15(10, A, B, C, D, E);
+ T_0_15(11, E, A, B, C, D);
+ T_0_15(12, D, E, A, B, C);
+ T_0_15(13, C, D, E, A, B);
+ T_0_15(14, B, C, D, E, A);
+ T_0_15(15, A, B, C, D, E);
+
+ /* Round 1 - tail. Input from 512-bit mixing array */
+ T_16_19(16, E, A, B, C, D);
+ T_16_19(17, D, E, A, B, C);
+ T_16_19(18, C, D, E, A, B);
+ T_16_19(19, B, C, D, E, A);
+
+ /* Round 2 */
+ T_20_39(20, A, B, C, D, E);
+ T_20_39(21, E, A, B, C, D);
+ T_20_39(22, D, E, A, B, C);
+ T_20_39(23, C, D, E, A, B);
+ T_20_39(24, B, C, D, E, A);
+ T_20_39(25, A, B, C, D, E);
+ T_20_39(26, E, A, B, C, D);
+ T_20_39(27, D, E, A, B, C);
+ T_20_39(28, C, D, E, A, B);
+ T_20_39(29, B, C, D, E, A);
+ T_20_39(30, A, B, C, D, E);
+ T_20_39(31, E, A, B, C, D);
+ T_20_39(32, D, E, A, B, C);
+ T_20_39(33, C, D, E, A, B);
+ T_20_39(34, B, C, D, E, A);
+ T_20_39(35, A, B, C, D, E);
+ T_20_39(36, E, A, B, C, D);
+ T_20_39(37, D, E, A, B, C);
+ T_20_39(38, C, D, E, A, B);
+ T_20_39(39, B, C, D, E, A);
+
+ /* Round 3 */
+ T_40_59(40, A, B, C, D, E);
+ T_40_59(41, E, A, B, C, D);
+ T_40_59(42, D, E, A, B, C);
+ T_40_59(43, C, D, E, A, B);
+ T_40_59(44, B, C, D, E, A);
+ T_40_59(45, A, B, C, D, E);
+ T_40_59(46, E, A, B, C, D);
+ T_40_59(47, D, E, A, B, C);
+ T_40_59(48, C, D, E, A, B);
+ T_40_59(49, B, C, D, E, A);
+ T_40_59(50, A, B, C, D, E);
+ T_40_59(51, E, A, B, C, D);
+ T_40_59(52, D, E, A, B, C);
+ T_40_59(53, C, D, E, A, B);
+ T_40_59(54, B, C, D, E, A);
+ T_40_59(55, A, B, C, D, E);
+ T_40_59(56, E, A, B, C, D);
+ T_40_59(57, D, E, A, B, C);
+ T_40_59(58, C, D, E, A, B);
+ T_40_59(59, B, C, D, E, A);
+
+ /* Round 4 */
+ T_60_79(60, A, B, C, D, E);
+ T_60_79(61, E, A, B, C, D);
+ T_60_79(62, D, E, A, B, C);
+ T_60_79(63, C, D, E, A, B);
+ T_60_79(64, B, C, D, E, A);
+ T_60_79(65, A, B, C, D, E);
+ T_60_79(66, E, A, B, C, D);
+ T_60_79(67, D, E, A, B, C);
+ T_60_79(68, C, D, E, A, B);
+ T_60_79(69, B, C, D, E, A);
+ T_60_79(70, A, B, C, D, E);
+ T_60_79(71, E, A, B, C, D);
+ T_60_79(72, D, E, A, B, C);
+ T_60_79(73, C, D, E, A, B);
+ T_60_79(74, B, C, D, E, A);
+ T_60_79(75, A, B, C, D, E);
+ T_60_79(76, E, A, B, C, D);
+ T_60_79(77, D, E, A, B, C);
+ T_60_79(78, C, D, E, A, B);
+ T_60_79(79, B, C, D, E, A);
+
+ ctx->H[0] += A;
+ ctx->H[1] += B;
+ ctx->H[2] += C;
+ ctx->H[3] += D;
+ ctx->H[4] += E;
+}
+
+void blk_SHA1_Init(blk_SHA_CTX *ctx)
+{
+ ctx->size = 0;
+
+ /* Initialize H with the magic constants (see FIPS180 for constants) */
+ ctx->H[0] = 0x67452301;
+ ctx->H[1] = 0xefcdab89;
+ ctx->H[2] = 0x98badcfe;
+ ctx->H[3] = 0x10325476;
+ ctx->H[4] = 0xc3d2e1f0;
+}
+
+void blk_SHA1_Update(blk_SHA_CTX *ctx, const void *data, unsigned long len)
+{
+ unsigned int lenW = ctx->size & 63;
+
+ ctx->size += len;
+
+ /* Read the data into W and process blocks as they get full */
+ if (lenW) {
+ unsigned int left = 64 - lenW;
+ if (len < left)
+ left = len;
+ memcpy(lenW + (char *)ctx->W, data, left);
+ lenW = (lenW + left) & 63;
+ len -= left;
+ data = ((const char *)data + left);
+ if (lenW)
+ return;
+ blk_SHA1_Block(ctx, ctx->W);
+ }
+ while (len >= 64) {
+ blk_SHA1_Block(ctx, data);
+ data = ((const char *)data + 64);
+ len -= 64;
+ }
+ if (len)
+ memcpy(ctx->W, data, len);
+}
+
+void blk_SHA1_Final(unsigned char hashout[20], blk_SHA_CTX *ctx)
+{
+ static const unsigned char pad[64] = { 0x80 };
+ unsigned int padlen[2];
+ int i;
+
+ /* Pad with a binary 1 (ie 0x80), then zeroes, then length */
+ padlen[0] = htonl((uint32_t)(ctx->size >> 29));
+ padlen[1] = htonl((uint32_t)(ctx->size << 3));
+
+ i = ctx->size & 63;
+ blk_SHA1_Update(ctx, pad, 1 + (63 & (55 - i)));
+ blk_SHA1_Update(ctx, padlen, 8);
+
+ /* Output hash */
+ for (i = 0; i < 5; i++)
+ put_be32(hashout + i * 4, ctx->H[i]);
+}
diff --git a/src/shctx.c b/src/shctx.c
new file mode 100644
index 0000000..d114d46
--- /dev/null
+++ b/src/shctx.c
@@ -0,0 +1,355 @@
+/*
+ * shctx.c - shared context management functions for SSL
+ *
+ * Copyright (C) 2011-2012 EXCELIANCE
+ *
+ * Author: Emeric Brun - emeric@exceliance.fr
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <sys/mman.h>
+#include <arpa/inet.h>
+#include <import/ebmbtree.h>
+#include <haproxy/list.h>
+#include <haproxy/shctx.h>
+
+int use_shared_mem = 0;
+
+/*
+ * Reserve a new row if <first> is null, put it in the hotlist, set the refcount to 1
+ * or append new blocks to the row with <first> as first block if non null.
+ *
+ * Reserve blocks in the avail list and put them in the hot list
+ * Return the first block put in the hot list or NULL if not enough blocks available
+ */
+struct shared_block *shctx_row_reserve_hot(struct shared_context *shctx,
+ struct shared_block *first, int data_len)
+{
+ struct shared_block *last = NULL, *block, *sblock, *ret = NULL, *next;
+ int enough = 0;
+ int freed = 0;
+ int remain;
+
+ BUG_ON(data_len < 0);
+
+ /* not enough usable blocks */
+ if (data_len > shctx->nbav * shctx->block_size)
+ goto out;
+
+ /* Check the object size limit. */
+ if (shctx->max_obj_size > 0) {
+ if ((first && first->len + data_len > shctx->max_obj_size) ||
+ (!first && data_len > shctx->max_obj_size))
+ goto out;
+ }
+
+ /* Note that <remain> is nul only if <first> is not nul. */
+ remain = 1;
+ if (first) {
+ /* Check that there is some block to reserve.
+ * In this first block of code we compute the remaining room in the
+ * current list of block already reserved for this object.
+ * We return asap if there is enough room to copy <data_len> bytes.
+ */
+ last = first->last_reserved;
+ /* Remaining room. */
+ remain = (shctx->block_size * first->block_count - first->len);
+ if (remain) {
+ if (remain > data_len) {
+ return last ? last : first;
+ } else {
+ data_len -= remain;
+ if (data_len <= 0)
+ return last ? last : first;
+ }
+ }
+ }
+
+ while (!enough && !LIST_ISEMPTY(&shctx->avail)) {
+ int count = 0;
+ int first_count = 0, first_len = 0;
+
+ next = block = LIST_NEXT(&shctx->avail, struct shared_block *, list);
+ if (ret == NULL)
+ ret = next;
+
+ first_count = next->block_count;
+ first_len = next->len;
+ /*
+ Should never been set to 0.
+ if (next->block_count == 0)
+ next->block_count = 1;
+ */
+
+ list_for_each_entry_safe_from(block, sblock, &shctx->avail, list) {
+
+ /* release callback */
+ if (first_len && shctx->free_block)
+ shctx->free_block(next, block);
+
+ block->block_count = 1;
+ block->len = 0;
+
+ freed++;
+
+ BUG_ON(data_len < 0);
+ data_len -= shctx->block_size;
+
+ if (data_len > 0 || !enough) {
+ if (last) {
+ shctx_block_append_hot(shctx, &last->list, block);
+ last = block;
+ } else {
+ shctx_block_set_hot(shctx, block);
+ }
+ if (!remain) {
+ first->last_append = block;
+ remain = 1;
+ }
+ if (data_len <= 0) {
+ ret->block_count = freed;
+ ret->refcount = 1;
+ ret->last_reserved = block;
+ enough = 1;
+ break;
+ }
+ }
+ count++;
+ if (count >= first_count)
+ break;
+ }
+ }
+
+ if (first) {
+ first->block_count += ret->block_count;
+ first->last_reserved = ret->last_reserved;
+ /* Reset this block. */
+ ret->last_reserved = NULL;
+ ret->block_count = 1;
+ ret->refcount = 0;
+ /* Return the first block. */
+ ret = first;
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * if the refcount is 0 move the row to the hot list. Increment the refcount
+ */
+void shctx_row_inc_hot(struct shared_context *shctx, struct shared_block *first)
+{
+ struct shared_block *block, *sblock;
+ int count = 0;
+
+ if (first->refcount <= 0) {
+
+ block = first;
+
+ list_for_each_entry_safe_from(block, sblock, &shctx->avail, list) {
+
+ shctx_block_set_hot(shctx, block);
+
+ count++;
+ if (count >= first->block_count)
+ break;
+ }
+ }
+
+ first->refcount++;
+}
+
+/*
+ * decrement the refcount and move the row at the end of the avail list if it reaches 0.
+ */
+void shctx_row_dec_hot(struct shared_context *shctx, struct shared_block *first)
+{
+ struct shared_block *block, *sblock;
+ int count = 0;
+
+ first->refcount--;
+
+ if (first->refcount <= 0) {
+
+ block = first;
+
+ list_for_each_entry_safe_from(block, sblock, &shctx->hot, list) {
+
+ shctx_block_set_avail(shctx, block);
+
+ count++;
+ if (count >= first->block_count)
+ break;
+ }
+ }
+
+}
+
+
+/*
+ * Append data in the row if there is enough space.
+ * The row should be in the hot list
+ *
+ * Return the amount of appended data if ret >= 0
+ * or how much more space it needs to contains the data if < 0.
+ */
+int shctx_row_data_append(struct shared_context *shctx,
+ struct shared_block *first, struct shared_block *from,
+ unsigned char *data, int len)
+{
+ int remain, start;
+ struct shared_block *block;
+
+ /* return -<len> needed to work */
+ if (len > first->block_count * shctx->block_size - first->len)
+ return (first->block_count * shctx->block_size - first->len) - len;
+
+ block = from ? from : first;
+ list_for_each_entry_from(block, &shctx->hot, list) {
+ /* end of copy */
+ if (len <= 0)
+ break;
+
+ /* remaining written bytes in the current block. */
+ remain = (shctx->block_size * first->block_count - first->len) % shctx->block_size;
+ BUG_ON(remain < 0);
+
+ /* if remain == 0, previous buffers are full, or first->len == 0 */
+ if (!remain) {
+ remain = shctx->block_size;
+ start = 0;
+ }
+ else {
+ /* start must be calculated before remain is modified */
+ start = shctx->block_size - remain;
+ BUG_ON(start < 0);
+ }
+
+ /* must not try to copy more than len */
+ remain = MIN(remain, len);
+
+ memcpy(block->data + start, data, remain);
+
+ data += remain;
+ len -= remain;
+ first->len += remain; /* update len in the head of the row */
+ first->last_append = block;
+ }
+
+ return len;
+}
+
+/*
+ * Copy <len> data from a row of blocks, return the remaining data to copy
+ * If 0 is returned, the full data has successfully been copied
+ *
+ * The row should be in the hot list
+ */
+int shctx_row_data_get(struct shared_context *shctx, struct shared_block *first,
+ unsigned char *dst, int offset, int len)
+{
+ int count = 0, size = 0, start = -1;
+ struct shared_block *block;
+
+ /* can't copy more */
+ if (len > first->len)
+ len = first->len;
+
+ block = first;
+ count = 0;
+ /* Pass through the blocks to copy them */
+ list_for_each_entry_from(block, &shctx->hot, list) {
+ if (count >= first->block_count || len <= 0)
+ break;
+
+ count++;
+ /* continue until we are in right block
+ corresponding to the offset */
+ if (count < offset / shctx->block_size + 1)
+ continue;
+
+ /* on the first block, data won't possibly began at offset 0 */
+ if (start == -1)
+ start = offset - (count - 1) * shctx->block_size;
+
+ BUG_ON(start < 0);
+
+ /* size can be lower than a block when copying the last block */
+ size = MIN(shctx->block_size - start, len);
+ BUG_ON(size < 0);
+
+ memcpy(dst, block->data + start, size);
+ dst += size;
+ len -= size;
+ start = 0;
+ }
+ return len;
+}
+
+/* Allocate shared memory context.
+ * <maxblocks> is maximum blocks.
+ * If <maxblocks> is set to less or equal to 0, ssl cache is disabled.
+ * Returns: -1 on alloc failure, <maxblocks> if it performs context alloc,
+ * and 0 if cache is already allocated.
+ */
+int shctx_init(struct shared_context **orig_shctx, int maxblocks, int blocksize,
+ unsigned int maxobjsz, int extra, int shared)
+{
+ int i;
+ struct shared_context *shctx;
+ int ret;
+ void *cur;
+ int maptype = MAP_PRIVATE;
+
+ if (maxblocks <= 0)
+ return 0;
+
+ /* make sure to align the records on a pointer size */
+ blocksize = (blocksize + sizeof(void *) - 1) & -sizeof(void *);
+ extra = (extra + sizeof(void *) - 1) & -sizeof(void *);
+
+ if (shared) {
+ maptype = MAP_SHARED;
+ use_shared_mem = 1;
+ }
+
+ shctx = (struct shared_context *)mmap(NULL, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)),
+ PROT_READ | PROT_WRITE, maptype | MAP_ANON, -1, 0);
+ if (!shctx || shctx == MAP_FAILED) {
+ shctx = NULL;
+ ret = SHCTX_E_ALLOC_CACHE;
+ goto err;
+ }
+
+ HA_SPIN_INIT(&shctx->lock);
+ shctx->nbav = 0;
+
+ LIST_INIT(&shctx->avail);
+ LIST_INIT(&shctx->hot);
+
+ shctx->block_size = blocksize;
+ shctx->max_obj_size = maxobjsz == (unsigned int)-1 ? 0 : maxobjsz;
+
+ /* init the free blocks after the shared context struct */
+ cur = (void *)shctx + sizeof(struct shared_context) + extra;
+ for (i = 0; i < maxblocks; i++) {
+ struct shared_block *cur_block = (struct shared_block *)cur;
+ cur_block->len = 0;
+ cur_block->refcount = 0;
+ cur_block->block_count = 1;
+ LIST_APPEND(&shctx->avail, &cur_block->list);
+ shctx->nbav++;
+ cur += sizeof(struct shared_block) + blocksize;
+ }
+ ret = maxblocks;
+
+err:
+ *orig_shctx = shctx;
+ return ret;
+}
+
diff --git a/src/signal.c b/src/signal.c
new file mode 100644
index 0000000..3d7a9a8
--- /dev/null
+++ b/src/signal.c
@@ -0,0 +1,284 @@
+/*
+ * Asynchronous signal delivery functions.
+ *
+ * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <signal.h>
+#include <string.h>
+
+#include <haproxy/errors.h>
+#include <haproxy/signal.h>
+#include <haproxy/task.h>
+
+/* Principle : we keep an in-order list of the first occurrence of all received
+ * signals. All occurrences of a same signal are grouped though. The signal
+ * queue does not need to be deeper than the number of signals we can handle.
+ * The handlers will be called asynchronously with the signal number. They can
+ * check themselves the number of calls by checking the descriptor this signal.
+ */
+
+int signal_queue_len; /* length of signal queue, <= MAX_SIGNAL (1 entry per signal max) */
+int signal_queue[MAX_SIGNAL]; /* in-order queue of received signals */
+struct signal_descriptor signal_state[MAX_SIGNAL];
+sigset_t blocked_sig;
+int signal_pending = 0; /* non-zero if t least one signal remains unprocessed */
+
+DECLARE_STATIC_POOL(pool_head_sig_handlers, "sig_handlers", sizeof(struct sig_handler));
+
+/* Common signal handler, used by all signals. Received signals are queued.
+ * Signal number zero has a specific status, as it cannot be delivered by the
+ * system, any function may call it to perform asynchronous signal delivery.
+ */
+void signal_handler(int sig)
+{
+ if (sig < 0 || sig >= MAX_SIGNAL) {
+ /* unhandled signal */
+ signal(sig, SIG_IGN);
+ qfprintf(stderr, "Received unhandled signal %d. Signal has been disabled.\n", sig);
+ return;
+ }
+
+ if (!signal_state[sig].count) {
+ /* signal was not queued yet */
+ if (signal_queue_len < MAX_SIGNAL)
+ signal_queue[signal_queue_len++] = sig;
+ else
+ qfprintf(stderr, "Signal %d : signal queue is unexpectedly full.\n", sig);
+ }
+
+ signal_state[sig].count++;
+ if (sig)
+ signal(sig, signal_handler); /* re-arm signal */
+
+ /* If the thread is TH_FL_SLEEPING we need to wake it */
+ wake_thread(tid);
+}
+
+/* Call handlers of all pending signals and clear counts and queue length. The
+ * handlers may unregister themselves by calling signal_register() while they
+ * are called, just like it is done with normal signal handlers.
+ * Note that it is more efficient to call the inline version which checks the
+ * queue length before getting here.
+ */
+void __signal_process_queue()
+{
+ int sig, cur_pos = 0;
+ struct signal_descriptor *desc;
+ sigset_t old_sig;
+
+ /* block signal delivery during processing */
+ ha_sigmask(SIG_SETMASK, &blocked_sig, &old_sig);
+
+ /* It is important that we scan the queue forwards so that we can
+ * catch any signal that would have been queued by another signal
+ * handler. That allows real signal handlers to redistribute signals
+ * to tasks subscribed to signal zero.
+ */
+ for (cur_pos = 0; cur_pos < signal_queue_len; cur_pos++) {
+ sig = signal_queue[cur_pos];
+ desc = &signal_state[sig];
+ if (desc->count) {
+ struct sig_handler *sh, *shb;
+ list_for_each_entry_safe(sh, shb, &desc->handlers, list) {
+ if ((sh->flags & SIG_F_TYPE_FCT) && sh->handler)
+ ((void (*)(struct sig_handler *))sh->handler)(sh);
+ else if ((sh->flags & SIG_F_TYPE_TASK) && sh->handler)
+ task_wakeup(sh->handler, TASK_WOKEN_SIGNAL);
+ }
+ desc->count = 0;
+ }
+ }
+ signal_queue_len = 0;
+
+ /* restore signal delivery */
+ ha_sigmask(SIG_SETMASK, &old_sig, NULL);
+}
+
+/* perform minimal intializations */
+static void signal_init()
+{
+ int sig;
+
+ signal_queue_len = 0;
+ memset(signal_queue, 0, sizeof(signal_queue));
+ memset(signal_state, 0, sizeof(signal_state));
+
+ sigfillset(&blocked_sig);
+ sigdelset(&blocked_sig, SIGPROF);
+ /* man sigprocmask: If SIGBUS, SIGFPE, SIGILL, or SIGSEGV are
+ generated while they are blocked, the result is undefined, unless
+ the signal was generated by kill(2),
+ sigqueue(3), or raise(3).
+ Do not ignore WDTSIG or DEBUGSIG either, or it may deadlock the
+ watchdog */
+ sigdelset(&blocked_sig, SIGBUS);
+ sigdelset(&blocked_sig, SIGFPE);
+ sigdelset(&blocked_sig, SIGILL);
+ sigdelset(&blocked_sig, SIGSEGV);
+#ifdef DEBUGSIG
+ sigdelset(&blocked_sig, DEBUGSIG);
+#endif
+#ifdef WDTSIG
+ sigdelset(&blocked_sig, WDTSIG);
+#endif
+ for (sig = 0; sig < MAX_SIGNAL; sig++)
+ LIST_INIT(&signal_state[sig].handlers);
+}
+
+/*
+ * This function should be called to unblock all signals
+ */
+void haproxy_unblock_signals()
+{
+ sigset_t set;
+
+ /* Ensure signals are not blocked. Some shells or service managers may
+ * accidentally block all of our signals unfortunately, causing lots of
+ * zombie processes to remain in the background during reloads.
+ */
+ sigemptyset(&set);
+ ha_sigmask(SIG_SETMASK, &set, NULL);
+}
+
+/* releases all registered signal handlers */
+void deinit_signals()
+{
+ int sig;
+ struct sig_handler *sh, *shb;
+
+ for (sig = 0; sig < MAX_SIGNAL; sig++) {
+ if (sig != SIGPROF)
+ signal(sig, SIG_DFL);
+ list_for_each_entry_safe(sh, shb, &signal_state[sig].handlers, list) {
+ LIST_DELETE(&sh->list);
+ pool_free(pool_head_sig_handlers, sh);
+ }
+ }
+}
+
+/* Register a function and an integer argument on a signal. A pointer to the
+ * newly allocated sig_handler is returned, or NULL in case of any error. The
+ * caller is responsible for unregistering the function when not used anymore.
+ * Note that passing a NULL as the function pointer enables interception of the
+ * signal without processing, which is identical to SIG_IGN. If the signal is
+ * zero (which the system cannot deliver), only internal functions will be able
+ * to notify the registered functions.
+ */
+struct sig_handler *signal_register_fct(int sig, void (*fct)(struct sig_handler *), int arg)
+{
+ struct sig_handler *sh;
+
+ if (sig < 0 || sig >= MAX_SIGNAL)
+ return NULL;
+
+ if (sig)
+ signal(sig, fct ? signal_handler : SIG_IGN);
+
+ if (!fct)
+ return NULL;
+
+ sh = pool_alloc(pool_head_sig_handlers);
+ if (!sh)
+ return NULL;
+
+ sh->handler = fct;
+ sh->arg = arg;
+ sh->flags = SIG_F_TYPE_FCT;
+ LIST_APPEND(&signal_state[sig].handlers, &sh->list);
+ return sh;
+}
+
+/* Register a task and a wake-up reason on a signal. A pointer to the newly
+ * allocated sig_handler is returned, or NULL in case of any error. The caller
+ * is responsible for unregistering the task when not used anymore. Note that
+ * passing a NULL as the task pointer enables interception of the signal
+ * without processing, which is identical to SIG_IGN. If the signal is zero
+ * (which the system cannot deliver), only internal functions will be able to
+ * notify the registered functions.
+ */
+struct sig_handler *signal_register_task(int sig, struct task *task, int reason)
+{
+ struct sig_handler *sh;
+
+ if (sig < 0 || sig >= MAX_SIGNAL)
+ return NULL;
+
+ if (sig)
+ signal(sig, signal_handler);
+
+ if (!task)
+ return NULL;
+
+ sh = pool_alloc(pool_head_sig_handlers);
+ if (!sh)
+ return NULL;
+
+ sh->handler = task;
+ sh->arg = reason & ~TASK_WOKEN_ANY;
+ sh->flags = SIG_F_TYPE_TASK;
+ LIST_APPEND(&signal_state[sig].handlers, &sh->list);
+ return sh;
+}
+
+/* Immediately unregister a handler so that no further signals may be delivered
+ * to it. The struct is released so the caller may not reference it anymore.
+ */
+void signal_unregister_handler(struct sig_handler *handler)
+{
+ LIST_DELETE(&handler->list);
+ pool_free(pool_head_sig_handlers, handler);
+}
+
+/* Immediately unregister a handler so that no further signals may be delivered
+ * to it. The handler struct does not need to be known, only the function or
+ * task pointer. This method is expensive because it scans all the list, so it
+ * should only be used for rare cases (eg: exit). The struct is released so the
+ * caller may not reference it anymore.
+ */
+void signal_unregister_target(int sig, void *target)
+{
+ struct sig_handler *sh, *shb;
+
+ if (sig < 0 || sig >= MAX_SIGNAL)
+ return;
+
+ if (!target)
+ return;
+
+ list_for_each_entry_safe(sh, shb, &signal_state[sig].handlers, list) {
+ if (sh->handler == target) {
+ LIST_DELETE(&sh->list);
+ pool_free(pool_head_sig_handlers, sh);
+ break;
+ }
+ }
+}
+
+/*
+ * Immedialtely unregister every handler assigned to a signal <sig>.
+ * Once the handler list is empty, the signal is ignored with SIG_IGN.
+ */
+
+void signal_unregister(int sig)
+{
+ struct sig_handler *sh, *shb;
+
+ if (sig < 0 || sig >= MAX_SIGNAL)
+ return;
+
+ list_for_each_entry_safe(sh, shb, &signal_state[sig].handlers, list) {
+ LIST_DELETE(&sh->list);
+ pool_free(pool_head_sig_handlers, sh);
+ }
+
+ signal(sig, SIG_IGN);
+}
+
+INITCALL0(STG_PREPARE, signal_init);
diff --git a/src/sink.c b/src/sink.c
new file mode 100644
index 0000000..6dbda87
--- /dev/null
+++ b/src/sink.c
@@ -0,0 +1,1434 @@
+/*
+ * Event sink management
+ *
+ * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <sys/mman.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <import/ist.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/proxy.h>
+#include <haproxy/ring.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/signal.h>
+#include <haproxy/sink.h>
+#include <haproxy/stconn.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+
+struct list sink_list = LIST_HEAD_INIT(sink_list);
+
+/* sink proxies list */
+struct proxy *sink_proxies_list;
+
+struct sink *cfg_sink;
+
+struct sink *sink_find(const char *name)
+{
+ struct sink *sink;
+
+ list_for_each_entry(sink, &sink_list, sink_list)
+ if (strcmp(sink->name, name) == 0)
+ return sink;
+ return NULL;
+}
+
+/* creates a new sink and adds it to the list, it's still generic and not fully
+ * initialized. Returns NULL on allocation failure. If another one already
+ * exists with the same name, it will be returned. The caller can detect it as
+ * a newly created one has type SINK_TYPE_NEW.
+ */
+static struct sink *__sink_new(const char *name, const char *desc, int fmt)
+{
+ struct sink *sink;
+
+ sink = sink_find(name);
+ if (sink)
+ goto end;
+
+ sink = calloc(1, sizeof(*sink));
+ if (!sink)
+ goto end;
+
+ sink->name = strdup(name);
+ if (!sink->name)
+ goto err;
+
+ sink->desc = strdup(desc);
+ if (!sink->desc)
+ goto err;
+
+ sink->fmt = fmt;
+ sink->type = SINK_TYPE_NEW;
+ sink->maxlen = BUFSIZE;
+ /* address will be filled by the caller if needed */
+ sink->ctx.fd = -1;
+ sink->ctx.dropped = 0;
+ HA_RWLOCK_INIT(&sink->ctx.lock);
+ LIST_APPEND(&sink_list, &sink->sink_list);
+ end:
+ return sink;
+
+ err:
+ ha_free(&sink->name);
+ ha_free(&sink->desc);
+ ha_free(&sink);
+
+ return NULL;
+}
+
+/* creates a sink called <name> of type FD associated to fd <fd>, format <fmt>,
+ * and description <desc>. Returns NULL on allocation failure or conflict.
+ * Perfect duplicates are merged (same type, fd, and name).
+ */
+struct sink *sink_new_fd(const char *name, const char *desc, enum log_fmt fmt, int fd)
+{
+ struct sink *sink;
+
+ sink = __sink_new(name, desc, fmt);
+ if (!sink || (sink->type == SINK_TYPE_FD && sink->ctx.fd == fd))
+ goto end;
+
+ if (sink->type != SINK_TYPE_NEW) {
+ sink = NULL;
+ goto end;
+ }
+
+ sink->type = SINK_TYPE_FD;
+ sink->ctx.fd = fd;
+ end:
+ return sink;
+}
+
+/* creates a sink called <name> of type BUF of size <size>, format <fmt>,
+ * and description <desc>. Returns NULL on allocation failure or conflict.
+ * Perfect duplicates are merged (same type and name). If sizes differ, the
+ * largest one is kept.
+ */
+struct sink *sink_new_buf(const char *name, const char *desc, enum log_fmt fmt, size_t size)
+{
+ struct sink *sink;
+
+ sink = __sink_new(name, desc, fmt);
+ if (!sink)
+ goto fail;
+
+ if (sink->type == SINK_TYPE_BUFFER) {
+ /* such a buffer already exists, we may have to resize it */
+ if (!ring_resize(sink->ctx.ring, size))
+ goto fail;
+ goto end;
+ }
+
+ if (sink->type != SINK_TYPE_NEW) {
+ /* already exists of another type */
+ goto fail;
+ }
+
+ sink->ctx.ring = ring_new(size);
+ if (!sink->ctx.ring) {
+ LIST_DELETE(&sink->sink_list);
+ free(sink->name);
+ free(sink->desc);
+ free(sink);
+ goto fail;
+ }
+
+ sink->type = SINK_TYPE_BUFFER;
+ end:
+ return sink;
+ fail:
+ return NULL;
+}
+
+/* tries to send <nmsg> message parts (up to 8, ignored above) from message
+ * array <msg> to sink <sink>. Formatting according to the sink's preference is
+ * done here. Lost messages are NOT accounted for. It is preferable to call
+ * sink_write() instead which will also try to emit the number of dropped
+ * messages when there are any. It returns >0 if it could write anything,
+ * <=0 otherwise.
+ */
+ ssize_t __sink_write(struct sink *sink, const struct ist msg[], size_t nmsg,
+ int level, int facility, struct ist *metadata)
+ {
+ struct ist *pfx = NULL;
+ size_t npfx = 0;
+
+ if (sink->fmt == LOG_FORMAT_RAW)
+ goto send;
+
+ pfx = build_log_header(sink->fmt, level, facility, metadata, &npfx);
+
+send:
+ if (sink->type == SINK_TYPE_FD) {
+ return fd_write_frag_line(sink->ctx.fd, sink->maxlen, pfx, npfx, msg, nmsg, 1);
+ }
+ else if (sink->type == SINK_TYPE_BUFFER) {
+ return ring_write(sink->ctx.ring, sink->maxlen, pfx, npfx, msg, nmsg);
+ }
+ return 0;
+}
+
+/* Tries to emit a message indicating the number of dropped events. In case of
+ * success, the amount of drops is reduced by as much. It's supposed to be
+ * called under an exclusive lock on the sink to avoid multiple produces doing
+ * the same. On success, >0 is returned, otherwise <=0 on failure.
+ */
+int sink_announce_dropped(struct sink *sink, int facility)
+{
+ static THREAD_LOCAL struct ist metadata[LOG_META_FIELDS];
+ static THREAD_LOCAL pid_t curr_pid;
+ static THREAD_LOCAL char pidstr[16];
+ unsigned int dropped;
+ struct buffer msg;
+ struct ist msgvec[1];
+ char logbuf[64];
+
+ while (unlikely((dropped = sink->ctx.dropped) > 0)) {
+ chunk_init(&msg, logbuf, sizeof(logbuf));
+ chunk_printf(&msg, "%u event%s dropped", dropped, dropped > 1 ? "s" : "");
+ msgvec[0] = ist2(msg.area, msg.data);
+
+ if (!metadata[LOG_META_HOST].len) {
+ if (global.log_send_hostname)
+ metadata[LOG_META_HOST] = ist(global.log_send_hostname);
+ }
+
+ if (!metadata[LOG_META_TAG].len)
+ metadata[LOG_META_TAG] = ist2(global.log_tag.area, global.log_tag.data);
+
+ if (unlikely(curr_pid != getpid()))
+ metadata[LOG_META_PID].len = 0;
+
+ if (!metadata[LOG_META_PID].len) {
+ curr_pid = getpid();
+ ltoa_o(curr_pid, pidstr, sizeof(pidstr));
+ metadata[LOG_META_PID] = ist2(pidstr, strlen(pidstr));
+ }
+
+ if (__sink_write(sink, msgvec, 1, LOG_NOTICE, facility, metadata) <= 0)
+ return 0;
+ /* success! */
+ HA_ATOMIC_SUB(&sink->ctx.dropped, dropped);
+ }
+ return 1;
+}
+
+/* parse the "show events" command, returns 1 if a message is returned, otherwise zero */
+static int cli_parse_show_events(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct sink *sink;
+ uint ring_flags;
+ int arg;
+
+ args++; // make args[1] the 1st arg
+
+ if (!*args[1]) {
+ /* no arg => report the list of supported sink */
+ chunk_printf(&trash, "Supported events sinks are listed below. Add -w(wait), -n(new). Any key to stop\n");
+ list_for_each_entry(sink, &sink_list, sink_list) {
+ chunk_appendf(&trash, " %-10s : type=%s, %u dropped, %s\n",
+ sink->name,
+ sink->type == SINK_TYPE_NEW ? "init" :
+ sink->type == SINK_TYPE_FD ? "fd" :
+ sink->type == SINK_TYPE_BUFFER ? "buffer" : "?",
+ sink->ctx.dropped, sink->desc);
+ }
+
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_WARNING, trash.area);
+ }
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ sink = sink_find(args[1]);
+ if (!sink)
+ return cli_err(appctx, "No such event sink");
+
+ if (sink->type != SINK_TYPE_BUFFER)
+ return cli_msg(appctx, LOG_NOTICE, "Nothing to report for this sink");
+
+ ring_flags = 0;
+ for (arg = 2; *args[arg]; arg++) {
+ if (strcmp(args[arg], "-w") == 0)
+ ring_flags |= RING_WF_WAIT_MODE;
+ else if (strcmp(args[arg], "-n") == 0)
+ ring_flags |= RING_WF_SEEK_NEW;
+ else if (strcmp(args[arg], "-nw") == 0 || strcmp(args[arg], "-wn") == 0)
+ ring_flags |= RING_WF_WAIT_MODE | RING_WF_SEEK_NEW;
+ else
+ return cli_err(appctx, "unknown option");
+ }
+ return ring_attach_cli(sink->ctx.ring, appctx, ring_flags);
+}
+
+/* Pre-configures a ring proxy to emit connections */
+void sink_setup_proxy(struct proxy *px)
+{
+ px->last_change = now.tv_sec;
+ px->cap = PR_CAP_BE;
+ px->maxconn = 0;
+ px->conn_retries = 1;
+ px->timeout.server = TICK_ETERNITY;
+ px->timeout.client = TICK_ETERNITY;
+ px->timeout.connect = TICK_ETERNITY;
+ px->accept = NULL;
+ px->options2 |= PR_O2_INDEPSTR | PR_O2_SMARTCON | PR_O2_SMARTACC;
+ px->next = sink_proxies_list;
+ sink_proxies_list = px;
+}
+
+/*
+ * IO Handler to handle message push to syslog tcp server.
+ * It takes its context from appctx->svcctx.
+ */
+static void sink_forward_io_handler(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct stream *s = __sc_strm(sc);
+ struct sink *sink = strm_fe(s)->parent;
+ struct sink_forward_target *sft = appctx->svcctx;
+ struct ring *ring = sink->ctx.ring;
+ struct buffer *buf = &ring->buf;
+ uint64_t msg_len;
+ size_t len, cnt, ofs, last_ofs;
+ int ret = 0;
+
+ /* if stopping was requested, close immediately */
+ if (unlikely(stopping))
+ goto close;
+
+ /* for rex because it seems reset to timeout
+ * and we don't want expire on this case
+ * with a syslog server
+ */
+ sc_oc(sc)->rex = TICK_ETERNITY;
+ /* rto should not change but it seems the case */
+ sc_oc(sc)->rto = TICK_ETERNITY;
+
+ /* an error was detected */
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ goto close;
+
+ /* con closed by server side */
+ if ((sc_oc(sc)->flags & CF_SHUTW))
+ goto close;
+
+ /* if the connection is not established, inform the stream that we want
+ * to be notified whenever the connection completes.
+ */
+ if (sc_opposite(sc)->state < SC_ST_EST) {
+ applet_need_more_data(appctx);
+ se_need_remote_conn(appctx->sedesc);
+ applet_have_more_data(appctx);
+ return;
+ }
+
+ HA_SPIN_LOCK(SFT_LOCK, &sft->lock);
+ if (appctx != sft->appctx) {
+ HA_SPIN_UNLOCK(SFT_LOCK, &sft->lock);
+ goto close;
+ }
+ ofs = sft->ofs;
+
+ HA_RWLOCK_WRLOCK(LOGSRV_LOCK, &ring->lock);
+ LIST_DEL_INIT(&appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(LOGSRV_LOCK, &ring->lock);
+
+ HA_RWLOCK_RDLOCK(LOGSRV_LOCK, &ring->lock);
+
+ /* explanation for the initialization below: it would be better to do
+ * this in the parsing function but this would occasionally result in
+ * dropped events because we'd take a reference on the oldest message
+ * and keep it while being scheduled. Thus instead let's take it the
+ * first time we enter here so that we have a chance to pass many
+ * existing messages before grabbing a reference to a location. This
+ * value cannot be produced after initialization.
+ */
+ if (unlikely(ofs == ~0)) {
+ ofs = 0;
+
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ ofs += ring->ofs;
+ }
+
+ /* in this loop, ofs always points to the counter byte that precedes
+ * the message so that we can take our reference there if we have to
+ * stop before the end (ret=0).
+ */
+ if (sc_opposite(sc)->state == SC_ST_EST) {
+ /* we were already there, adjust the offset to be relative to
+ * the buffer's head and remove us from the counter.
+ */
+ ofs -= ring->ofs;
+ BUG_ON(ofs >= buf->size);
+ HA_ATOMIC_DEC(b_peek(buf, ofs));
+
+ ret = 1;
+ while (ofs + 1 < b_data(buf)) {
+ cnt = 1;
+ len = b_peek_varint(buf, ofs + cnt, &msg_len);
+ if (!len)
+ break;
+ cnt += len;
+ BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
+
+ if (unlikely(msg_len + 1 > b_size(&trash))) {
+ /* too large a message to ever fit, let's skip it */
+ ofs += cnt + msg_len;
+ continue;
+ }
+
+ chunk_reset(&trash);
+ len = b_getblk(buf, trash.area, msg_len, ofs + cnt);
+ trash.data += len;
+ trash.area[trash.data++] = '\n';
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ ret = 0;
+ break;
+ }
+ ofs += cnt + msg_len;
+ }
+
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ ofs += ring->ofs;
+ sft->ofs = ofs;
+ last_ofs = ring->ofs;
+ }
+ HA_RWLOCK_RDUNLOCK(LOGSRV_LOCK, &ring->lock);
+
+ if (ret) {
+ /* let's be woken up once new data arrive */
+ HA_RWLOCK_WRLOCK(LOGSRV_LOCK, &ring->lock);
+ LIST_APPEND(&ring->waiters, &appctx->wait_entry);
+ ofs = ring->ofs;
+ HA_RWLOCK_WRUNLOCK(LOGSRV_LOCK, &ring->lock);
+ if (ofs != last_ofs) {
+ /* more data was added into the ring between the
+ * unlock and the lock, and the writer might not
+ * have seen us. We need to reschedule a read.
+ */
+ applet_have_more_data(appctx);
+ } else
+ applet_have_no_more_data(appctx);
+ }
+ HA_SPIN_UNLOCK(SFT_LOCK, &sft->lock);
+
+ /* always drain data from server */
+ co_skip(sc_oc(sc), sc_oc(sc)->output);
+ return;
+
+close:
+ sc_shutw(sc);
+ sc_shutr(sc);
+ sc_ic(sc)->flags |= CF_READ_NULL;
+}
+
+/*
+ * IO Handler to handle message push to syslog tcp server
+ * using octet counting frames
+ * It takes its context from appctx->svcctx.
+ */
+static void sink_forward_oc_io_handler(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct stream *s = __sc_strm(sc);
+ struct sink *sink = strm_fe(s)->parent;
+ struct sink_forward_target *sft = appctx->svcctx;
+ struct ring *ring = sink->ctx.ring;
+ struct buffer *buf = &ring->buf;
+ uint64_t msg_len;
+ size_t len, cnt, ofs;
+ int ret = 0;
+ char *p;
+
+ /* if stopping was requested, close immediately */
+ if (unlikely(stopping))
+ goto close;
+
+ /* for rex because it seems reset to timeout
+ * and we don't want expire on this case
+ * with a syslog server
+ */
+ sc_oc(sc)->rex = TICK_ETERNITY;
+ /* rto should not change but it seems the case */
+ sc_oc(sc)->rto = TICK_ETERNITY;
+
+ /* an error was detected */
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ goto close;
+
+ /* con closed by server side */
+ if ((sc_oc(sc)->flags & CF_SHUTW))
+ goto close;
+
+ /* if the connection is not established, inform the stream that we want
+ * to be notified whenever the connection completes.
+ */
+ if (sc_opposite(sc)->state < SC_ST_EST) {
+ applet_need_more_data(appctx);
+ se_need_remote_conn(appctx->sedesc);
+ applet_have_more_data(appctx);
+ return;
+ }
+
+ HA_SPIN_LOCK(SFT_LOCK, &sft->lock);
+ if (appctx != sft->appctx) {
+ HA_SPIN_UNLOCK(SFT_LOCK, &sft->lock);
+ goto close;
+ }
+ ofs = sft->ofs;
+
+ HA_RWLOCK_WRLOCK(LOGSRV_LOCK, &ring->lock);
+ LIST_DEL_INIT(&appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(LOGSRV_LOCK, &ring->lock);
+
+ HA_RWLOCK_RDLOCK(LOGSRV_LOCK, &ring->lock);
+
+ /* explanation for the initialization below: it would be better to do
+ * this in the parsing function but this would occasionally result in
+ * dropped events because we'd take a reference on the oldest message
+ * and keep it while being scheduled. Thus instead let's take it the
+ * first time we enter here so that we have a chance to pass many
+ * existing messages before grabbing a reference to a location. This
+ * value cannot be produced after initialization.
+ */
+ if (unlikely(ofs == ~0)) {
+ ofs = 0;
+
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ ofs += ring->ofs;
+ }
+
+ /* in this loop, ofs always points to the counter byte that precedes
+ * the message so that we can take our reference there if we have to
+ * stop before the end (ret=0).
+ */
+ if (sc_opposite(sc)->state == SC_ST_EST) {
+ /* we were already there, adjust the offset to be relative to
+ * the buffer's head and remove us from the counter.
+ */
+ ofs -= ring->ofs;
+ BUG_ON(ofs >= buf->size);
+ HA_ATOMIC_DEC(b_peek(buf, ofs));
+
+ ret = 1;
+ while (ofs + 1 < b_data(buf)) {
+ cnt = 1;
+ len = b_peek_varint(buf, ofs + cnt, &msg_len);
+ if (!len)
+ break;
+ cnt += len;
+ BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
+
+ chunk_reset(&trash);
+ p = ulltoa(msg_len, trash.area, b_size(&trash));
+ if (p) {
+ trash.data = (p - trash.area) + 1;
+ *p = ' ';
+ }
+
+ if (!p || (trash.data + msg_len > b_size(&trash))) {
+ /* too large a message to ever fit, let's skip it */
+ ofs += cnt + msg_len;
+ continue;
+ }
+
+ trash.data += b_getblk(buf, p + 1, msg_len, ofs + cnt);
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ ret = 0;
+ break;
+ }
+ ofs += cnt + msg_len;
+ }
+
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ ofs += ring->ofs;
+ sft->ofs = ofs;
+ }
+ HA_RWLOCK_RDUNLOCK(LOGSRV_LOCK, &ring->lock);
+
+ if (ret) {
+ /* let's be woken up once new data arrive */
+ HA_RWLOCK_WRLOCK(LOGSRV_LOCK, &ring->lock);
+ LIST_APPEND(&ring->waiters, &appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(LOGSRV_LOCK, &ring->lock);
+ applet_have_no_more_data(appctx);
+ }
+ HA_SPIN_UNLOCK(SFT_LOCK, &sft->lock);
+
+ /* always drain data from server */
+ co_skip(sc_oc(sc), sc_oc(sc)->output);
+ return;
+
+close:
+ sc_shutw(sc);
+ sc_shutr(sc);
+ sc_ic(sc)->flags |= CF_READ_NULL;
+}
+
+void __sink_forward_session_deinit(struct sink_forward_target *sft)
+{
+ struct stream *s = appctx_strm(sft->appctx);
+ struct sink *sink;
+
+ sink = strm_fe(s)->parent;
+ if (!sink)
+ return;
+
+ HA_RWLOCK_WRLOCK(LOGSRV_LOCK, &sink->ctx.ring->lock);
+ LIST_DEL_INIT(&sft->appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(LOGSRV_LOCK, &sink->ctx.ring->lock);
+
+ sft->appctx = NULL;
+ task_wakeup(sink->forward_task, TASK_WOKEN_MSG);
+}
+
+static int sink_forward_session_init(struct appctx *appctx)
+{
+ struct sink_forward_target *sft = appctx->svcctx;
+ struct stream *s;
+ struct sockaddr_storage *addr = NULL;
+
+ if (!sockaddr_alloc(&addr, &sft->srv->addr, sizeof(sft->srv->addr)))
+ goto out_error;
+
+ if (appctx_finalize_startup(appctx, sft->sink->forward_px, &BUF_NULL) == -1)
+ goto out_free_addr;
+
+ s = appctx_strm(appctx);
+ s->scb->dst = addr;
+ s->scb->flags |= SC_FL_NOLINGER;
+
+ s->target = &sft->srv->obj_type;
+ s->flags = SF_ASSIGNED;
+
+ s->do_log = NULL;
+ s->uniq_id = 0;
+
+ s->res.flags |= CF_READ_DONTWAIT;
+ /* for rto and rex to eternity to not expire on idle recv:
+ * We are using a syslog server.
+ */
+ s->res.rto = TICK_ETERNITY;
+ s->res.rex = TICK_ETERNITY;
+ sft->appctx = appctx;
+
+ return 0;
+
+ out_free_addr:
+ sockaddr_free(&addr);
+ out_error:
+ return -1;
+}
+
+static void sink_forward_session_release(struct appctx *appctx)
+{
+ struct sink_forward_target *sft = appctx->svcctx;
+
+ if (!sft)
+ return;
+
+ HA_SPIN_LOCK(SFT_LOCK, &sft->lock);
+ if (sft->appctx == appctx)
+ __sink_forward_session_deinit(sft);
+ HA_SPIN_UNLOCK(SFT_LOCK, &sft->lock);
+}
+
+static struct applet sink_forward_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<SINKFWD>", /* used for logging */
+ .fct = sink_forward_io_handler,
+ .init = sink_forward_session_init,
+ .release = sink_forward_session_release,
+};
+
+static struct applet sink_forward_oc_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<SINKFWDOC>", /* used for logging */
+ .fct = sink_forward_oc_io_handler,
+ .init = sink_forward_session_init,
+ .release = sink_forward_session_release,
+};
+
+/*
+ * Create a new peer session in assigned state (connect will start automatically)
+ * It sets its context into appctx->svcctx.
+ */
+static struct appctx *sink_forward_session_create(struct sink *sink, struct sink_forward_target *sft)
+{
+ struct appctx *appctx;
+ struct applet *applet = &sink_forward_applet;
+
+ if (sft->srv->log_proto == SRV_LOG_PROTO_OCTET_COUNTING)
+ applet = &sink_forward_oc_applet;
+
+ appctx = appctx_new_here(applet, NULL);
+ if (!appctx)
+ goto out_close;
+ appctx->svcctx = (void *)sft;
+
+ if (appctx_init(appctx) == -1)
+ goto out_free_appctx;
+
+ return appctx;
+
+ /* Error unrolling */
+ out_free_appctx:
+ appctx_free_on_early_error(appctx);
+ out_close:
+ return NULL;
+}
+
+/*
+ * Task to handle connctions to forward servers
+ */
+static struct task *process_sink_forward(struct task * task, void *context, unsigned int state)
+{
+ struct sink *sink = (struct sink *)context;
+ struct sink_forward_target *sft = sink->sft;
+
+ task->expire = TICK_ETERNITY;
+
+ if (!stopping) {
+ while (sft) {
+ HA_SPIN_LOCK(SFT_LOCK, &sft->lock);
+ /* if appctx is NULL, start a new session */
+ if (!sft->appctx)
+ sft->appctx = sink_forward_session_create(sink, sft);
+ HA_SPIN_UNLOCK(SFT_LOCK, &sft->lock);
+ sft = sft->next;
+ }
+ }
+ else {
+ while (sft) {
+ HA_SPIN_LOCK(SFT_LOCK, &sft->lock);
+ /* awake applet to perform a clean close */
+ if (sft->appctx)
+ appctx_wakeup(sft->appctx);
+ HA_SPIN_UNLOCK(SFT_LOCK, &sft->lock);
+ sft = sft->next;
+ }
+ }
+
+ return task;
+}
+/*
+ * Init task to manage connctions to forward servers
+ *
+ * returns 0 in case of error.
+ */
+int sink_init_forward(struct sink *sink)
+{
+ sink->forward_task = task_new_anywhere();
+ if (!sink->forward_task)
+ return 0;
+
+ sink->forward_task->process = process_sink_forward;
+ sink->forward_task->context = (void *)sink;
+ sink->forward_sighandler = signal_register_task(0, sink->forward_task, 0);
+ task_wakeup(sink->forward_task, TASK_WOKEN_INIT);
+ return 1;
+}
+
+/* This tries to rotate a file-backed ring, but only if it contains contents.
+ * This way empty rings will not cause backups to be overwritten and it's safe
+ * to reload multiple times. That's only best effort, failures are silently
+ * ignored.
+ */
+void sink_rotate_file_backed_ring(const char *name)
+{
+ struct ring ring;
+ char *oldback;
+ int ret;
+ int fd;
+
+ fd = open(name, O_RDONLY);
+ if (fd < 0)
+ return;
+
+ /* check for contents validity */
+ ret = read(fd, &ring, sizeof(ring));
+ close(fd);
+
+ if (ret != sizeof(ring))
+ goto rotate;
+
+ /* contents are present, we want to keep them => rotate. Note that
+ * an empty ring buffer has one byte (the marker).
+ */
+ if (ring.buf.data > 1)
+ goto rotate;
+
+ /* nothing to keep, let's scratch the file and preserve the backup */
+ return;
+
+ rotate:
+ oldback = NULL;
+ memprintf(&oldback, "%s.bak", name);
+ if (oldback) {
+ /* try to rename any possibly existing ring file to
+ * ".bak" and delete remains of older ones. This will
+ * ensure we don't wipe useful debug info upon restart.
+ */
+ unlink(oldback);
+ if (rename(name, oldback) < 0)
+ unlink(oldback);
+ ha_free(&oldback);
+ }
+}
+
+/*
+ * Parse "ring" section and create corresponding sink buffer.
+ *
+ * The function returns 0 in success case, otherwise, it returns error
+ * flags.
+ */
+int cfg_parse_ring(const char *file, int linenum, char **args, int kwm)
+{
+ int err_code = 0;
+ const char *inv;
+ size_t size = BUFSIZE;
+ struct proxy *p;
+
+ if (strcmp(args[0], "ring") == 0) { /* new ring section */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing ring name.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ inv = invalid_char(args[1]);
+ if (inv) {
+ ha_alert("parsing [%s:%d] : invalid ring name '%s' (character '%c' is not permitted).\n", file, linenum, args[1], *inv);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ if (sink_find(args[1])) {
+ ha_alert("parsing [%s:%d] : sink named '%s' already exists.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ cfg_sink = sink_new_buf(args[1], args[1], LOG_FORMAT_RAW, size);
+ if (!cfg_sink || cfg_sink->type != SINK_TYPE_BUFFER) {
+ ha_alert("parsing [%s:%d] : unable to create a new sink buffer for ring '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ /* allocate new proxy to handle forwards */
+ p = calloc(1, sizeof *p);
+ if (!p) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ init_new_proxy(p);
+ sink_setup_proxy(p);
+ p->parent = cfg_sink;
+ p->id = strdup(args[1]);
+ p->conf.args.file = p->conf.file = strdup(file);
+ p->conf.args.line = p->conf.line = linenum;
+ cfg_sink->forward_px = p;
+ }
+ else if (strcmp(args[0], "size") == 0) {
+ if (!cfg_sink || (cfg_sink->type != SINK_TYPE_BUFFER)) {
+ ha_alert("parsing [%s:%d] : 'size' directive not usable with this type of sink.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ size = atol(args[1]);
+ if (!size) {
+ ha_alert("parsing [%s:%d] : invalid size '%s' for new sink buffer.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ if (cfg_sink->store) {
+ ha_alert("parsing [%s:%d] : cannot resize an already mapped file, please specify 'size' before 'backing-file'.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ if (size < cfg_sink->ctx.ring->buf.size) {
+ ha_warning("parsing [%s:%d] : ignoring new size '%llu' that is smaller than current size '%llu' for ring '%s'.\n",
+ file, linenum, (ullong)size, (ullong)cfg_sink->ctx.ring->buf.size, cfg_sink->name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ if (!ring_resize(cfg_sink->ctx.ring, size)) {
+ ha_alert("parsing [%s:%d] : fail to set sink buffer size '%llu' for ring '%s'.\n", file, linenum,
+ (ullong)cfg_sink->ctx.ring->buf.size, cfg_sink->name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ }
+ else if (strcmp(args[0], "backing-file") == 0) {
+ /* This tries to mmap file <file> for size <size> and to use it as a backing store
+ * for ring <ring>. Existing data are delete. NULL is returned on error.
+ */
+ const char *backing = args[1];
+ size_t size;
+ void *area;
+ int fd;
+
+ if (!cfg_sink || (cfg_sink->type != SINK_TYPE_BUFFER)) {
+ ha_alert("parsing [%s:%d] : 'backing-file' only usable with existing rings.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ if (cfg_sink->store) {
+ ha_alert("parsing [%s:%d] : 'backing-file' already specified for ring '%s' (was '%s').\n", file, linenum, cfg_sink->name, cfg_sink->store);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ /* let's check if the file exists and is not empty. That's the
+ * only condition under which we'll trigger a rotate, so that
+ * config checks, reloads, or restarts that don't emit anything
+ * do not rotate it again.
+ */
+ sink_rotate_file_backed_ring(backing);
+
+ fd = open(backing, O_RDWR | O_CREAT, 0600);
+ if (fd < 0) {
+ ha_alert("parsing [%s:%d] : cannot open backing-file '%s' for ring '%s': %s.\n", file, linenum, backing, cfg_sink->name, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ size = (cfg_sink->ctx.ring->buf.size + 4095UL) & -4096UL;
+ if (ftruncate(fd, size) != 0) {
+ close(fd);
+ ha_alert("parsing [%s:%d] : could not adjust size of backing-file for ring '%s': %s.\n", file, linenum, cfg_sink->name, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ area = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (area == MAP_FAILED) {
+ close(fd);
+ ha_alert("parsing [%s:%d] : failed to use '%s' as a backing file for ring '%s': %s.\n", file, linenum, backing, cfg_sink->name, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ /* we don't need the file anymore */
+ close(fd);
+ cfg_sink->store = strdup(backing);
+
+ /* never fails */
+ ring_free(cfg_sink->ctx.ring);
+ cfg_sink->ctx.ring = ring_make_from_area(area, size);
+ }
+ else if (strcmp(args[0],"server") == 0) {
+ if (!cfg_sink || (cfg_sink->type != SINK_TYPE_BUFFER)) {
+ ha_alert("parsing [%s:%d] : unable to create server '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ err_code |= parse_server(file, linenum, args, cfg_sink->forward_px, NULL,
+ SRV_PARSE_PARSE_ADDR|SRV_PARSE_INITIAL_RESOLVE);
+ }
+ else if (strcmp(args[0],"timeout") == 0) {
+ if (!cfg_sink || !cfg_sink->forward_px) {
+ ha_alert("parsing [%s:%d] : unable to set timeout '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ if (strcmp(args[1], "connect") == 0 ||
+ strcmp(args[1], "server") == 0) {
+ const char *res;
+ unsigned int tout;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d] : '%s %s' expects <time> as argument.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ res = parse_time_err(args[2], &tout, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s %s>, maximum value is 2147483647 ms (~24.8 days).\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s %s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to <%s %s>.\n",
+ file, linenum, *res, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ if (args[1][0] == 'c')
+ cfg_sink->forward_px->timeout.connect = tout;
+ else
+ cfg_sink->forward_px->timeout.server = tout;
+ }
+ }
+ else if (strcmp(args[0],"format") == 0) {
+ if (!cfg_sink) {
+ ha_alert("parsing [%s:%d] : unable to set format '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ cfg_sink->fmt = get_log_format(args[1]);
+ if (cfg_sink->fmt == LOG_FORMAT_UNSPEC) {
+ ha_alert("parsing [%s:%d] : unknown format '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ }
+ else if (strcmp(args[0],"maxlen") == 0) {
+ if (!cfg_sink) {
+ ha_alert("parsing [%s:%d] : unable to set event max length '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ cfg_sink->maxlen = atol(args[1]);
+ if (!cfg_sink->maxlen) {
+ ha_alert("parsing [%s:%d] : invalid size '%s' for new sink buffer.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ }
+ else if (strcmp(args[0],"description") == 0) {
+ if (!cfg_sink) {
+ ha_alert("parsing [%s:%d] : unable to set description '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing ring description text.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ free(cfg_sink->desc);
+
+ cfg_sink->desc = strdup(args[1]);
+ if (!cfg_sink->desc) {
+ ha_alert("parsing [%s:%d] : fail to set description '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ }
+ else {
+ ha_alert("parsing [%s:%d] : unknown statement '%s'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+err:
+ return err_code;
+}
+
+/* Creates an new sink buffer from a log server.
+ *
+ * It uses the logsrvaddress to declare a forward
+ * server for this buffer. And it initializes the
+ * forwarding.
+ *
+ * The function returns a pointer on the
+ * allocated struct sink if allocate
+ * and initialize succeed, else if it fails
+ * it returns NULL.
+ *
+ * Note: the sink is created using the name
+ * specified into logsrv->ring_name
+ */
+struct sink *sink_new_from_logsrv(struct logsrv *logsrv)
+{
+ struct proxy *p = NULL;
+ struct sink *sink = NULL;
+ struct server *srv = NULL;
+ struct sink_forward_target *sft = NULL;
+
+ /* allocate new proxy to handle
+ * forward to a stream server
+ */
+ p = calloc(1, sizeof *p);
+ if (!p) {
+ goto error;
+ }
+
+ init_new_proxy(p);
+ sink_setup_proxy(p);
+ p->id = strdup(logsrv->ring_name);
+ p->conf.args.file = p->conf.file = strdup(logsrv->conf.file);
+ p->conf.args.line = p->conf.line = logsrv->conf.line;
+
+ /* Set default connect and server timeout */
+ p->timeout.connect = MS_TO_TICKS(1000);
+ p->timeout.server = MS_TO_TICKS(5000);
+
+ /* allocate a new server to forward messages
+ * from ring buffer
+ */
+ srv = new_server(p);
+ if (!srv)
+ goto error;
+
+ /* init server */
+ srv->id = strdup(logsrv->ring_name);
+ srv->conf.file = strdup(logsrv->conf.file);
+ srv->conf.line = logsrv->conf.line;
+ srv->addr = logsrv->addr;
+ srv->svc_port = get_host_port(&logsrv->addr);
+ HA_SPIN_INIT(&srv->lock);
+
+ /* process per thread init */
+ if (srv_init_per_thr(srv) == -1)
+ goto error;
+
+ /* the servers are linked backwards
+ * first into proxy
+ */
+ p->srv = srv;
+ srv->next = p->srv;
+
+ /* allocate sink_forward_target descriptor */
+ sft = calloc(1, sizeof(*sft));
+ if (!sft)
+ goto error;
+
+ /* init sink_forward_target offset */
+ sft->srv = srv;
+ sft->appctx = NULL;
+ sft->ofs = ~0;
+ HA_SPIN_INIT(&sft->lock);
+
+ /* prepare description for the sink */
+ chunk_reset(&trash);
+ chunk_printf(&trash, "created from logserver declared into '%s' at line %d", logsrv->conf.file, logsrv->conf.line);
+
+ /* allocate a new sink buffer */
+ sink = sink_new_buf(logsrv->ring_name, trash.area, logsrv->format, BUFSIZE);
+ if (!sink || sink->type != SINK_TYPE_BUFFER) {
+ goto error;
+ }
+
+ /* link sink_forward_target to proxy */
+ sink->forward_px = p;
+ p->parent = sink;
+
+ /* insert into sink_forward_targets
+ * list into sink
+ */
+ sft->sink = sink;
+ sft->next = sink->sft;
+ sink->sft = sft;
+
+ /* mark server as an attached reader to the ring */
+ if (!ring_attach(sink->ctx.ring)) {
+ /* should never fail since there is
+ * only one reader
+ */
+ goto error;
+ }
+
+ /* initialize sink buffer forwarding */
+ if (!sink_init_forward(sink))
+ goto error;
+
+ /* reset familyt of logsrv to consider the ring buffer target */
+ logsrv->addr.ss_family = AF_UNSPEC;
+
+ return sink;
+error:
+ if (p) {
+ if (p->id)
+ free(p->id);
+ if (p->conf.file)
+ free(p->conf.file);
+
+ free(p);
+ }
+
+ if (srv) {
+ if (srv->id)
+ free(srv->id);
+ if (srv->conf.file)
+ free((void *)srv->conf.file);
+ if (srv->per_thr)
+ free(srv->per_thr);
+ free(srv);
+ }
+
+ if (sft)
+ free(sft);
+
+ if (sink) {
+ if (sink->ctx.ring)
+ ring_free(sink->ctx.ring);
+
+ LIST_DELETE(&sink->sink_list);
+ free(sink->name);
+ free(sink->desc);
+ free(sink);
+ }
+
+ return NULL;
+}
+
+/*
+ * Post parsing "ring" section.
+ *
+ * The function returns 0 in success case, otherwise, it returns error
+ * flags.
+ */
+int cfg_post_parse_ring()
+{
+ int err_code = 0;
+ struct server *srv;
+
+ if (cfg_sink && (cfg_sink->type == SINK_TYPE_BUFFER)) {
+ if (cfg_sink->maxlen > b_size(&cfg_sink->ctx.ring->buf)) {
+ ha_warning("ring '%s' event max length '%u' exceeds size, forced to size '%lu'.\n",
+ cfg_sink->name, cfg_sink->maxlen, (unsigned long)b_size(&cfg_sink->ctx.ring->buf));
+ cfg_sink->maxlen = b_size(&cfg_sink->ctx.ring->buf);
+ err_code |= ERR_ALERT;
+ }
+
+ /* prepare forward server descriptors */
+ if (cfg_sink->forward_px) {
+ srv = cfg_sink->forward_px->srv;
+ while (srv) {
+ struct sink_forward_target *sft;
+
+ /* allocate sink_forward_target descriptor */
+ sft = calloc(1, sizeof(*sft));
+ if (!sft) {
+ ha_alert("memory allocation error initializing server '%s' in ring '%s'.\n",srv->id, cfg_sink->name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ break;
+ }
+ sft->srv = srv;
+ sft->appctx = NULL;
+ sft->ofs = ~0; /* init ring offset */
+ sft->sink = cfg_sink;
+ sft->next = cfg_sink->sft;
+ HA_SPIN_INIT(&sft->lock);
+
+ /* mark server attached to the ring */
+ if (!ring_attach(cfg_sink->ctx.ring)) {
+ ha_alert("server '%s' sets too many watchers > 255 on ring '%s'.\n", srv->id, cfg_sink->name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ cfg_sink->sft = sft;
+ srv = srv->next;
+ }
+ sink_init_forward(cfg_sink);
+ }
+ }
+ cfg_sink = NULL;
+
+ return err_code;
+}
+
+/* resolve sink names at end of config. Returns 0 on success otherwise error
+ * flags.
+*/
+int post_sink_resolve()
+{
+ int err_code = ERR_NONE;
+ struct logsrv *logsrv, *logb;
+ struct sink *sink;
+ struct proxy *px;
+
+ list_for_each_entry_safe(logsrv, logb, &global.logsrvs, list) {
+ if (logsrv->type == LOG_TARGET_BUFFER) {
+ sink = sink_find(logsrv->ring_name);
+ if (!sink) {
+ /* LOG_TARGET_BUFFER but !AF_UNSPEC
+ * means we must allocate a sink
+ * buffer to send messages to this logsrv
+ */
+ if (logsrv->addr.ss_family != AF_UNSPEC) {
+ sink = sink_new_from_logsrv(logsrv);
+ if (!sink) {
+ ha_alert("global stream log server declared in file '%s' at line %d cannot be initialized'.\n",
+ logsrv->conf.file, logsrv->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else {
+ ha_alert("global log server declared in file '%s' at line %d uses unknown ring named '%s'.\n",
+ logsrv->conf.file, logsrv->conf.line, logsrv->ring_name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else if (sink->type != SINK_TYPE_BUFFER) {
+ ha_alert("global log server declared in file '%s' at line %d uses incompatible ring '%s'.\n",
+ logsrv->conf.file, logsrv->conf.line, logsrv->ring_name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ logsrv->sink = sink;
+ }
+
+ }
+
+ for (px = proxies_list; px; px = px->next) {
+ list_for_each_entry_safe(logsrv, logb, &px->logsrvs, list) {
+ if (logsrv->type == LOG_TARGET_BUFFER) {
+ sink = sink_find(logsrv->ring_name);
+ if (!sink) {
+ /* LOG_TARGET_BUFFER but !AF_UNSPEC
+ * means we must allocate a sink
+ * buffer to send messages to this logsrv
+ */
+ if (logsrv->addr.ss_family != AF_UNSPEC) {
+ sink = sink_new_from_logsrv(logsrv);
+ if (!sink) {
+ ha_alert("log server declared in proxy section '%s' file '%s' at line %d cannot be initialized'.\n",
+ px->id, logsrv->conf.file, logsrv->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else {
+ ha_alert("log server declared in proxy section '%s' in file '%s' at line %d uses unknown ring named '%s'.\n",
+ px->id, logsrv->conf.file, logsrv->conf.line, logsrv->ring_name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else if (sink->type != SINK_TYPE_BUFFER) {
+ ha_alert("log server declared in proxy section '%s' in file '%s' at line %d uses incomatible ring named '%s'.\n",
+ px->id, logsrv->conf.file, logsrv->conf.line, logsrv->ring_name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ logsrv->sink = sink;
+ }
+ }
+ }
+
+ for (px = cfg_log_forward; px; px = px->next) {
+ list_for_each_entry_safe(logsrv, logb, &px->logsrvs, list) {
+ if (logsrv->type == LOG_TARGET_BUFFER) {
+ sink = sink_find(logsrv->ring_name);
+ if (!sink) {
+ /* LOG_TARGET_BUFFER but !AF_UNSPEC
+ * means we must allocate a sink
+ * buffer to send messages to this logsrv
+ */
+ if (logsrv->addr.ss_family != AF_UNSPEC) {
+ sink = sink_new_from_logsrv(logsrv);
+ if (!sink) {
+ ha_alert("log server declared in log-forward section '%s' file '%s' at line %d cannot be initialized'.\n",
+ px->id, logsrv->conf.file, logsrv->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else {
+ ha_alert("log server declared in log-forward section '%s' in file '%s' at line %d uses unknown ring named '%s'.\n",
+ px->id, logsrv->conf.file, logsrv->conf.line, logsrv->ring_name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else if (sink->type != SINK_TYPE_BUFFER) {
+ ha_alert("log server declared in log-forward section '%s' in file '%s' at line %d uses unknown ring named '%s'.\n",
+ px->id, logsrv->conf.file, logsrv->conf.line, logsrv->ring_name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ logsrv->sink = sink;
+ }
+ }
+ }
+ return err_code;
+}
+
+
+static void sink_init()
+{
+ sink_new_fd("stdout", "standard output (fd#1)", LOG_FORMAT_RAW, 1);
+ sink_new_fd("stderr", "standard output (fd#2)", LOG_FORMAT_RAW, 2);
+ sink_new_buf("buf0", "in-memory ring buffer", LOG_FORMAT_TIMED, 1048576);
+}
+
+static void sink_deinit()
+{
+ struct sink *sink, *sb;
+
+ list_for_each_entry_safe(sink, sb, &sink_list, sink_list) {
+ if (sink->type == SINK_TYPE_BUFFER) {
+ if (sink->store) {
+ size_t size = (sink->ctx.ring->buf.size + 4095UL) & -4096UL;
+ void *area = (sink->ctx.ring->buf.area - sizeof(*sink->ctx.ring));
+
+ msync(area, size, MS_SYNC);
+ munmap(area, size);
+ ha_free(&sink->store);
+ }
+ else
+ ring_free(sink->ctx.ring);
+ }
+ LIST_DELETE(&sink->sink_list);
+ task_destroy(sink->forward_task);
+ free(sink->name);
+ free(sink->desc);
+ free(sink);
+ }
+}
+
+INITCALL0(STG_REGISTER, sink_init);
+REGISTER_POST_DEINIT(sink_deinit);
+
+static struct cli_kw_list cli_kws = {{ },{
+ { { "show", "events", NULL }, "show events [<sink>] [-w] [-n] : show event sink state", cli_parse_show_events, NULL, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+/* config parsers for this section */
+REGISTER_CONFIG_SECTION("ring", cfg_parse_ring, cfg_post_parse_ring);
+REGISTER_POST_CHECK(post_sink_resolve);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/slz.c b/src/slz.c
new file mode 100644
index 0000000..0ca9d27
--- /dev/null
+++ b/src/slz.c
@@ -0,0 +1,1341 @@
+/*
+ * Copyright (C) 2013-2015 Willy Tarreau <w@1wt.eu>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <import/slz.h>
+#include <import/slz-tables.h>
+
+/* First, RFC1951-specific declarations and extracts from the RFC.
+ *
+ * RFC1951 - deflate stream format
+
+
+ * Data elements are packed into bytes in order of
+ increasing bit number within the byte, i.e., starting
+ with the least-significant bit of the byte.
+ * Data elements other than Huffman codes are packed
+ starting with the least-significant bit of the data
+ element.
+ * Huffman codes are packed starting with the most-
+ significant bit of the code.
+
+ 3.2.3. Details of block format
+
+ Each block of compressed data begins with 3 header bits
+ containing the following data:
+
+ first bit BFINAL
+ next 2 bits BTYPE
+
+ Note that the header bits do not necessarily begin on a byte
+ boundary, since a block does not necessarily occupy an integral
+ number of bytes.
+
+ BFINAL is set if and only if this is the last block of the data
+ set.
+
+ BTYPE specifies how the data are compressed, as follows:
+
+ 00 - no compression
+ 01 - compressed with fixed Huffman codes
+ 10 - compressed with dynamic Huffman codes
+ 11 - reserved (error)
+
+ 3.2.4. Non-compressed blocks (BTYPE=00)
+
+ Any bits of input up to the next byte boundary are ignored.
+ The rest of the block consists of the following information:
+
+ 0 1 2 3 4...
+ +---+---+---+---+================================+
+ | LEN | NLEN |... LEN bytes of literal data...|
+ +---+---+---+---+================================+
+
+ LEN is the number of data bytes in the block. NLEN is the
+ one's complement of LEN.
+
+ 3.2.5. Compressed blocks (length and distance codes)
+
+ As noted above, encoded data blocks in the "deflate" format
+ consist of sequences of symbols drawn from three conceptually
+ distinct alphabets: either literal bytes, from the alphabet of
+ byte values (0..255), or <length, backward distance> pairs,
+ where the length is drawn from (3..258) and the distance is
+ drawn from (1..32,768). In fact, the literal and length
+ alphabets are merged into a single alphabet (0..285), where
+ values 0..255 represent literal bytes, the value 256 indicates
+ end-of-block, and values 257..285 represent length codes
+ (possibly in conjunction with extra bits following the symbol
+ code) as follows:
+
+Length encoding :
+ Extra Extra Extra
+ Code Bits Length(s) Code Bits Lengths Code Bits Length(s)
+ ---- ---- ------ ---- ---- ------- ---- ---- -------
+ 257 0 3 267 1 15,16 277 4 67-82
+ 258 0 4 268 1 17,18 278 4 83-98
+ 259 0 5 269 2 19-22 279 4 99-114
+ 260 0 6 270 2 23-26 280 4 115-130
+ 261 0 7 271 2 27-30 281 5 131-162
+ 262 0 8 272 2 31-34 282 5 163-194
+ 263 0 9 273 3 35-42 283 5 195-226
+ 264 0 10 274 3 43-50 284 5 227-257
+ 265 1 11,12 275 3 51-58 285 0 258
+ 266 1 13,14 276 3 59-66
+
+Distance encoding :
+ Extra Extra Extra
+ Code Bits Dist Code Bits Dist Code Bits Distance
+ ---- ---- ---- ---- ---- ------ ---- ---- --------
+ 0 0 1 10 4 33-48 20 9 1025-1536
+ 1 0 2 11 4 49-64 21 9 1537-2048
+ 2 0 3 12 5 65-96 22 10 2049-3072
+ 3 0 4 13 5 97-128 23 10 3073-4096
+ 4 1 5,6 14 6 129-192 24 11 4097-6144
+ 5 1 7,8 15 6 193-256 25 11 6145-8192
+ 6 2 9-12 16 7 257-384 26 12 8193-12288
+ 7 2 13-16 17 7 385-512 27 12 12289-16384
+ 8 3 17-24 18 8 513-768 28 13 16385-24576
+ 9 3 25-32 19 8 769-1024 29 13 24577-32768
+
+ 3.2.6. Compression with fixed Huffman codes (BTYPE=01)
+
+ The Huffman codes for the two alphabets are fixed, and are not
+ represented explicitly in the data. The Huffman code lengths
+ for the literal/length alphabet are:
+
+ Lit Value Bits Codes
+ --------- ---- -----
+ 0 - 143 8 00110000 through
+ 10111111
+ 144 - 255 9 110010000 through
+ 111111111
+ 256 - 279 7 0000000 through
+ 0010111
+ 280 - 287 8 11000000 through
+ 11000111
+
+ The code lengths are sufficient to generate the actual codes,
+ as described above; we show the codes in the table for added
+ clarity. Literal/length values 286-287 will never actually
+ occur in the compressed data, but participate in the code
+ construction.
+
+ Distance codes 0-31 are represented by (fixed-length) 5-bit
+ codes, with possible additional bits as shown in the table
+ shown in Paragraph 3.2.5, above. Note that distance codes 30-
+ 31 will never actually occur in the compressed data.
+
+*/
+
+/* back references, built in a way that is optimal for 32/64 bits */
+union ref {
+ struct {
+ uint32_t pos;
+ uint32_t word;
+ } by32;
+ uint64_t by64;
+};
+
+#if defined(USE_64BIT_QUEUE) && defined(UNALIGNED_LE_OK)
+
+/* enqueue code x of <xbits> bits (LSB aligned, at most 24) and copy complete
+ * 32-bit words into output buffer. X must not contain non-zero bits above
+ * xbits.
+ */
+static inline void enqueue24(struct slz_stream *strm, uint32_t x, uint32_t xbits)
+{
+ uint64_t queue = strm->queue + ((uint64_t)x << strm->qbits);
+ uint32_t qbits = strm->qbits + xbits;
+
+ if (__builtin_expect(qbits >= 32, 1)) {
+ *(uint32_t *)strm->outbuf = queue;
+ queue >>= 32;
+ qbits -= 32;
+ strm->outbuf += 4;
+ }
+
+ strm->queue = queue;
+ strm->qbits = qbits;
+}
+
+#define enqueue8 enqueue24
+
+/* flush the queue and align to next byte */
+static inline void flush_bits(struct slz_stream *strm)
+{
+ if (strm->qbits > 0)
+ *strm->outbuf++ = strm->queue;
+
+ if (strm->qbits > 8)
+ *strm->outbuf++ = strm->queue >> 8;
+
+ if (strm->qbits > 16)
+ *strm->outbuf++ = strm->queue >> 16;
+
+ if (strm->qbits > 24)
+ *strm->outbuf++ = strm->queue >> 24;
+
+ strm->queue = 0;
+ strm->qbits = 0;
+}
+
+#else /* non-64 bit or aligned or big endian */
+
+/* enqueue code x of <xbits> bits (LSB aligned, at most 24) and copy complete
+ * bytes into out buf. X must not contain non-zero bits above xbits. Prefer
+ * enqueue8() when xbits is known for being 8 or less.
+ */
+static void enqueue24(struct slz_stream *strm, uint32_t x, uint32_t xbits)
+{
+ uint32_t queue = strm->queue + (x << strm->qbits);
+ uint32_t qbits = strm->qbits + xbits;
+
+ if (qbits >= 16) {
+#ifndef UNALIGNED_LE_OK
+ strm->outbuf[0] = queue;
+ strm->outbuf[1] = queue >> 8;
+#else
+ *(uint16_t *)strm->outbuf = queue;
+#endif
+ strm->outbuf += 2;
+ queue >>= 16;
+ qbits -= 16;
+ }
+
+ if (qbits >= 8) {
+ qbits -= 8;
+ *strm->outbuf++ = queue;
+ queue >>= 8;
+ }
+ strm->qbits = qbits;
+ strm->queue = queue;
+ return;
+}
+
+/* enqueue code x of <xbits> bits (at most 8) and copy complete bytes into
+ * out buf. X must not contain non-zero bits above xbits.
+ */
+static inline void enqueue8(struct slz_stream *strm, uint32_t x, uint32_t xbits)
+{
+ uint32_t queue = strm->queue + (x << strm->qbits);
+ uint32_t qbits = strm->qbits + xbits;
+
+ if (__builtin_expect((signed)(qbits - 8) >= 0, 1)) {
+ qbits -= 8;
+ *strm->outbuf++ = queue;
+ queue >>= 8;
+ }
+
+ strm->qbits = qbits;
+ strm->queue = queue;
+}
+
+/* align to next byte */
+static inline void flush_bits(struct slz_stream *strm)
+{
+ if (strm->qbits > 0)
+ *strm->outbuf++ = strm->queue;
+
+ if (strm->qbits > 8)
+ *strm->outbuf++ = strm->queue >> 8;
+
+ strm->queue = 0;
+ strm->qbits = 0;
+}
+#endif
+
+
+/* only valid if buffer is already aligned */
+static inline void copy_8b(struct slz_stream *strm, uint32_t x)
+{
+ *strm->outbuf++ = x;
+}
+
+/* only valid if buffer is already aligned */
+static inline void copy_16b(struct slz_stream *strm, uint32_t x)
+{
+ strm->outbuf[0] = x;
+ strm->outbuf[1] = x >> 8;
+ strm->outbuf += 2;
+}
+
+/* only valid if buffer is already aligned */
+static inline void copy_32b(struct slz_stream *strm, uint32_t x)
+{
+ strm->outbuf[0] = x;
+ strm->outbuf[1] = x >> 8;
+ strm->outbuf[2] = x >> 16;
+ strm->outbuf[3] = x >> 24;
+ strm->outbuf += 4;
+}
+
+static inline void send_huff(struct slz_stream *strm, uint32_t code)
+{
+ uint32_t bits;
+
+ code = fixed_huff[code];
+ bits = code & 15;
+ code >>= 4;
+ enqueue24(strm, code, bits);
+}
+
+static inline void send_eob(struct slz_stream *strm)
+{
+ enqueue8(strm, 0, 7); // direct encoding of 256 = EOB (cf RFC1951)
+}
+
+/* copies <len> literals from <buf>. <more> indicates that there are data past
+ * buf + <len>. <len> must not be null.
+ */
+static void copy_lit(struct slz_stream *strm, const void *buf, uint32_t len, int more)
+{
+ uint32_t len2;
+
+ do {
+ len2 = len;
+ if (__builtin_expect(len2 > 65535, 0))
+ len2 = 65535;
+
+ len -= len2;
+
+ if (strm->state != SLZ_ST_EOB)
+ send_eob(strm);
+
+ strm->state = (more || len) ? SLZ_ST_EOB : SLZ_ST_DONE;
+
+ enqueue8(strm, !(more || len), 3); // BFINAL = !more ; BTYPE = 00
+ flush_bits(strm);
+ copy_16b(strm, len2); // len2
+ copy_16b(strm, ~len2); // nlen2
+ memcpy(strm->outbuf, buf, len2);
+ buf += len2;
+ strm->outbuf += len2;
+ } while (len);
+}
+
+/* copies <len> literals from <buf>. <more> indicates that there are data past
+ * buf + <len>. <len> must not be null.
+ */
+static void copy_lit_huff(struct slz_stream *strm, const unsigned char *buf, uint32_t len, int more)
+{
+ uint32_t pos;
+
+ /* This ugly construct limits the mount of tests and optimizes for the
+ * most common case (more > 0).
+ */
+ if (strm->state == SLZ_ST_EOB) {
+ eob:
+ strm->state = more ? SLZ_ST_FIXED : SLZ_ST_LAST;
+ enqueue8(strm, 2 + !more, 3); // BFINAL = !more ; BTYPE = 01
+ }
+ else if (!more) {
+ send_eob(strm);
+ goto eob;
+ }
+
+ pos = 0;
+ do {
+ send_huff(strm, buf[pos++]);
+ } while (pos < len);
+}
+
+/* format:
+ * bit0..31 = word
+ * bit32..63 = last position in buffer of similar content
+ */
+
+/* This hash provides good average results on HTML contents, and is among the
+ * few which provide almost optimal results on various different pages.
+ */
+static inline uint32_t slz_hash(uint32_t a)
+{
+#if defined(__ARM_FEATURE_CRC32)
+# if defined(__ARM_ARCH_ISA_A64)
+ // 64 bit mode
+ __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(a) : "r"(0));
+# else
+ // 32 bit mode (e.g. armv7 compiler building for armv8
+ __asm__ volatile("crc32w %0,%0,%1" : "+r"(a) : "r"(0));
+# endif
+ return a >> (32 - HASH_BITS);
+#else
+ return ((a << 19) + (a << 6) - a) >> (32 - HASH_BITS);
+#endif
+}
+
+/* This function compares buffers <a> and <b> and reads 32 or 64 bits at a time
+ * during the approach. It makes us of unaligned little endian memory accesses
+ * on capable architectures. <max> is the maximum number of bytes that can be
+ * read, so both <a> and <b> must have at least <max> bytes ahead. <max> may
+ * safely be null or negative if that simplifies computations in the caller.
+ */
+static inline long memmatch(const unsigned char *a, const unsigned char *b, long max)
+{
+ long len = 0;
+
+#ifdef UNALIGNED_LE_OK
+ unsigned long xor;
+
+ while (1) {
+ if ((long)(len + 2 * sizeof(long)) > max) {
+ while (len < max) {
+ if (a[len] != b[len])
+ break;
+ len++;
+ }
+ return len;
+ }
+
+ xor = *(long *)&a[len] ^ *(long *)&b[len];
+ if (xor)
+ break;
+ len += sizeof(long);
+
+ xor = *(long *)&a[len] ^ *(long *)&b[len];
+ if (xor)
+ break;
+ len += sizeof(long);
+ }
+
+#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
+ /* x86 has bsf. We know that xor is non-null here */
+ asm("bsf %1,%0\n" : "=r"(xor) : "0" (xor));
+ return len + xor / 8;
+#else
+ if (sizeof(long) > 4 && !(xor & 0xffffffff)) {
+ /* This code is optimized out on 32-bit archs, but we still
+ * need to shift in two passes to avoid a warning. It is
+ * properly optimized out as a single shift.
+ */
+ xor >>= 16; xor >>= 16;
+ if (xor & 0xffff) {
+ if (xor & 0xff)
+ return len + 4;
+ return len + 5;
+ }
+ if (xor & 0xffffff)
+ return len + 6;
+ return len + 7;
+ }
+
+ if (xor & 0xffff) {
+ if (xor & 0xff)
+ return len;
+ return len + 1;
+ }
+ if (xor & 0xffffff)
+ return len + 2;
+ return len + 3;
+#endif // x86
+
+#else // UNALIGNED_LE_OK
+ /* This is the generic version for big endian or unaligned-incompatible
+ * architectures.
+ */
+ while (len < max) {
+ if (a[len] != b[len])
+ break;
+ len++;
+ }
+ return len;
+
+#endif
+}
+
+/* sets <count> BYTES to -32769 in <refs> so that any uninitialized entry will
+ * verify (pos-last-1 >= 32768) and be ignored. <count> must be a multiple of
+ * 128 bytes and <refs> must be at least one count in length. It's supposed to
+ * be applied to 64-bit aligned data exclusively, which makes it slightly
+ * faster than the regular memset() since no alignment check is performed.
+ */
+static void reset_refs(union ref *refs, long count)
+{
+ /* avoid a shift/mask by casting to void* */
+ union ref *end = (void *)refs + count;
+
+ do {
+ refs[ 0].by64 = -32769;
+ refs[ 1].by64 = -32769;
+ refs[ 2].by64 = -32769;
+ refs[ 3].by64 = -32769;
+ refs[ 4].by64 = -32769;
+ refs[ 5].by64 = -32769;
+ refs[ 6].by64 = -32769;
+ refs[ 7].by64 = -32769;
+ refs[ 8].by64 = -32769;
+ refs[ 9].by64 = -32769;
+ refs[10].by64 = -32769;
+ refs[11].by64 = -32769;
+ refs[12].by64 = -32769;
+ refs[13].by64 = -32769;
+ refs[14].by64 = -32769;
+ refs[15].by64 = -32769;
+ refs += 16;
+ } while (refs < end);
+}
+
+/* Compresses <ilen> bytes from <in> into <out> according to RFC1951. The
+ * output result may be up to 5 bytes larger than the input, to which 2 extra
+ * bytes may be added to send the last chunk due to BFINAL+EOB encoding (10
+ * bits) when <more> is not set. The caller is responsible for ensuring there
+ * is enough room in the output buffer for this. The amount of output bytes is
+ * returned, and no CRC is computed.
+ */
+long slz_rfc1951_encode(struct slz_stream *strm, unsigned char *out, const unsigned char *in, long ilen, int more)
+{
+ long rem = ilen;
+ unsigned long pos = 0;
+ unsigned long last;
+ uint32_t word = 0;
+ long mlen;
+ uint32_t h;
+ uint64_t ent;
+
+ uint32_t plit = 0;
+ uint32_t bit9 = 0;
+ uint32_t dist, code;
+ union ref refs[1 << HASH_BITS];
+
+ if (!strm->level) {
+ /* force to send as literals (eg to preserve CPU) */
+ strm->outbuf = out;
+ plit = pos = ilen;
+ bit9 = 52; /* force literal dump */
+ goto final_lit_dump;
+ }
+
+ reset_refs(refs, sizeof(refs));
+
+ strm->outbuf = out;
+
+#ifndef UNALIGNED_FASTER
+ word = ((unsigned char)in[pos] << 8) + ((unsigned char)in[pos + 1] << 16) + ((unsigned char)in[pos + 2] << 24);
+#endif
+ while (rem >= 4) {
+#ifndef UNALIGNED_FASTER
+ word = ((unsigned char)in[pos + 3] << 24) + (word >> 8);
+#else
+ word = *(uint32_t *)&in[pos];
+#endif
+ h = slz_hash(word);
+ asm volatile ("" ::); // prevent gcc from trying to be smart with the prefetch
+
+ if (sizeof(long) >= 8) {
+ ent = refs[h].by64;
+ last = (uint32_t)ent;
+ ent >>= 32;
+ refs[h].by64 = ((uint64_t)pos) + ((uint64_t)word << 32);
+ } else {
+ ent = refs[h].by32.word;
+ last = refs[h].by32.pos;
+ refs[h].by32.pos = pos;
+ refs[h].by32.word = word;
+ }
+
+#ifdef FIND_OPTIMAL_MATCH
+ /* Experimental code to see what could be saved with an ideal
+ * longest match lookup algorithm. This one is very slow but
+ * scans the whole window. In short, here are the savings :
+ * file orig fast(ratio) optimal(ratio)
+ * README 5185 3419 (65.9%) 3165 (61.0%) -7.5%
+ * index.html 76799 35662 (46.4%) 29875 (38.9%) -16.3%
+ * rfc1952.c 29383 13442 (45.7%) 11793 (40.1%) -12.3%
+ *
+ * Thus the savings to expect for large files is at best 16%.
+ *
+ * A non-colliding hash gives 33025 instead of 35662 (-7.4%),
+ * and keeping the last two entries gives 31724 (-11.0%).
+ */
+ unsigned long scan;
+ int saved = 0;
+ int bestpos = 0;
+ int bestlen = 0;
+ int firstlen = 0;
+ int max_lookup = 2; // 0 = no limit
+
+ for (scan = pos - 1; scan < pos && (unsigned long)(pos - scan - 1) < 32768; scan--) {
+ if (*(uint32_t *)(in + scan) != word)
+ continue;
+
+ len = memmatch(in + pos, in + scan, rem);
+ if (!bestlen)
+ firstlen = len;
+
+ if (len > bestlen) {
+ bestlen = len;
+ bestpos = scan;
+ }
+ if (!--max_lookup)
+ break;
+ }
+ if (bestlen) {
+ //printf("pos=%d last=%d bestpos=%d word=%08x ent=%08x len=%d\n",
+ // (int)pos, (int)last, (int)bestpos, (int)word, (int)ent, bestlen);
+ last = bestpos;
+ ent = word;
+ saved += bestlen - firstlen;
+ }
+ //fprintf(stderr, "first=%d best=%d saved_total=%d\n", firstlen, bestlen, saved);
+#endif
+
+ if ((uint32_t)ent != word) {
+ send_as_lit:
+ rem--;
+ plit++;
+ bit9 += ((unsigned char)word >= 144);
+ pos++;
+ continue;
+ }
+
+ /* We reject pos = last and pos > last+32768 */
+ if ((unsigned long)(pos - last - 1) >= 32768)
+ goto send_as_lit;
+
+ /* Note: cannot encode a length larger than 258 bytes */
+ mlen = memmatch(in + pos + 4, in + last + 4, (rem > 258 ? 258 : rem) - 4) + 4;
+
+ /* found a matching entry */
+
+ if (bit9 >= 52 && mlen < 6)
+ goto send_as_lit;
+
+ /* compute the output code, its size and the length's size in
+ * bits to know if the reference is cheaper than literals.
+ */
+ code = len_fh[mlen];
+
+ /* direct mapping of dist->huffman code */
+ dist = fh_dist_table[pos - last - 1];
+
+ /* if encoding the dist+length is more expensive than sending
+ * the equivalent as bytes, lets keep the literals.
+ */
+ if ((dist & 0x1f) + (code >> 16) + 8 >= 8 * mlen + bit9)
+ goto send_as_lit;
+
+ /* first, copy pending literals */
+ if (plit) {
+ /* Huffman encoding requires 9 bits for octets 144..255, so this
+ * is a waste of space for binary data. Switching between Huffman
+ * and no-comp then huffman consumes 52 bits (7 for EOB + 3 for
+ * block type + 7 for alignment + 32 for LEN+NLEN + 3 for next
+ * block. Only use plain literals if there are more than 52 bits
+ * to save then.
+ */
+ if (bit9 >= 52)
+ copy_lit(strm, in + pos - plit, plit, 1);
+ else
+ copy_lit_huff(strm, in + pos - plit, plit, 1);
+
+ plit = 0;
+ }
+
+ /* use mode 01 - fixed huffman */
+ if (strm->state == SLZ_ST_EOB) {
+ strm->state = SLZ_ST_FIXED;
+ enqueue8(strm, 0x02, 3); // BTYPE = 01, BFINAL = 0
+ }
+
+ /* copy the length first */
+ enqueue24(strm, code & 0xFFFF, code >> 16);
+
+ /* in fixed huffman mode, dist is fixed 5 bits */
+ enqueue24(strm, dist >> 5, dist & 0x1f);
+ bit9 = 0;
+ rem -= mlen;
+ pos += mlen;
+
+#ifndef UNALIGNED_FASTER
+#ifdef UNALIGNED_LE_OK
+ word = *(uint32_t *)&in[pos - 1];
+#else
+ word = ((unsigned char)in[pos] << 8) + ((unsigned char)in[pos + 1] << 16) + ((unsigned char)in[pos + 2] << 24);
+#endif
+#endif
+ }
+
+ if (__builtin_expect(rem, 0)) {
+ /* we're reading the 1..3 last bytes */
+ plit += rem;
+ do {
+ bit9 += ((unsigned char)in[pos++] >= 144);
+ } while (--rem);
+ }
+
+ final_lit_dump:
+ /* now copy remaining literals or mark the end */
+ if (plit) {
+ if (bit9 >= 52)
+ copy_lit(strm, in + pos - plit, plit, more);
+ else
+ copy_lit_huff(strm, in + pos - plit, plit, more);
+
+ plit = 0;
+ }
+
+ strm->ilen += ilen;
+ return strm->outbuf - out;
+}
+
+/* Initializes stream <strm> for use with raw deflate (rfc1951). The CRC is
+ * unused but set to zero. The compression level passed in <level> is set. This
+ * value can only be 0 (no compression) or 1 (compression) and other values
+ * will lead to unpredictable behaviour. The function always returns 0.
+ */
+int slz_rfc1951_init(struct slz_stream *strm, int level)
+{
+ strm->state = SLZ_ST_EOB; // no header
+ strm->level = level;
+ strm->format = SLZ_FMT_DEFLATE;
+ strm->crc32 = 0;
+ strm->ilen = 0;
+ strm->qbits = 0;
+ strm->queue = 0;
+ return 0;
+}
+
+/* Flushes any pending for stream <strm> into buffer <buf>, then sends BTYPE=1
+ * and BFINAL=1 if needed. The stream ends in SLZ_ST_DONE. It returns the number
+ * of bytes emitted. The trailer consists in flushing the possibly pending bits
+ * from the queue (up to 7 bits), then possibly EOB (7 bits), then 3 bits, EOB,
+ * a rounding to the next byte, which amounts to a total of 4 bytes max, that
+ * the caller must ensure are available before calling the function.
+ */
+int slz_rfc1951_finish(struct slz_stream *strm, unsigned char *buf)
+{
+ strm->outbuf = buf;
+
+ if (strm->state == SLZ_ST_FIXED || strm->state == SLZ_ST_LAST) {
+ strm->state = (strm->state == SLZ_ST_LAST) ? SLZ_ST_DONE : SLZ_ST_EOB;
+ send_eob(strm);
+ }
+
+ if (strm->state != SLZ_ST_DONE) {
+ /* send BTYPE=1, BFINAL=1 */
+ enqueue8(strm, 3, 3);
+ send_eob(strm);
+ strm->state = SLZ_ST_DONE;
+ }
+
+ flush_bits(strm);
+ return strm->outbuf - buf;
+}
+
+/* Now RFC1952-specific declarations and extracts from RFC.
+ * From RFC1952 about the GZIP file format :
+
+A gzip file consists of a series of "members" ...
+
+2.3. Member format
+
+ Each member has the following structure:
+
+ +---+---+---+---+---+---+---+---+---+---+
+ |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->)
+ +---+---+---+---+---+---+---+---+---+---+
+
+ (if FLG.FEXTRA set)
+
+ +---+---+=================================+
+ | XLEN |...XLEN bytes of "extra field"...| (more-->)
+ +---+---+=================================+
+
+ (if FLG.FNAME set)
+
+ +=========================================+
+ |...original file name, zero-terminated...| (more-->)
+ +=========================================+
+
+ (if FLG.FCOMMENT set)
+
+ +===================================+
+ |...file comment, zero-terminated...| (more-->)
+ +===================================+
+
+ (if FLG.FHCRC set)
+
+ +---+---+
+ | CRC16 |
+ +---+---+
+
+ +=======================+
+ |...compressed blocks...| (more-->)
+ +=======================+
+
+ 0 1 2 3 4 5 6 7
+ +---+---+---+---+---+---+---+---+
+ | CRC32 | ISIZE |
+ +---+---+---+---+---+---+---+---+
+
+
+2.3.1. Member header and trailer
+
+ ID1 (IDentification 1)
+ ID2 (IDentification 2)
+ These have the fixed values ID1 = 31 (0x1f, \037), ID2 = 139
+ (0x8b, \213), to identify the file as being in gzip format.
+
+ CM (Compression Method)
+ This identifies the compression method used in the file. CM
+ = 0-7 are reserved. CM = 8 denotes the "deflate"
+ compression method, which is the one customarily used by
+ gzip and which is documented elsewhere.
+
+ FLG (FLaGs)
+ This flag byte is divided into individual bits as follows:
+
+ bit 0 FTEXT
+ bit 1 FHCRC
+ bit 2 FEXTRA
+ bit 3 FNAME
+ bit 4 FCOMMENT
+ bit 5 reserved
+ bit 6 reserved
+ bit 7 reserved
+
+ Reserved FLG bits must be zero.
+
+ MTIME (Modification TIME)
+ This gives the most recent modification time of the original
+ file being compressed. The time is in Unix format, i.e.,
+ seconds since 00:00:00 GMT, Jan. 1, 1970. (Note that this
+ may cause problems for MS-DOS and other systems that use
+ local rather than Universal time.) If the compressed data
+ did not come from a file, MTIME is set to the time at which
+ compression started. MTIME = 0 means no time stamp is
+ available.
+
+ XFL (eXtra FLags)
+ These flags are available for use by specific compression
+ methods. The "deflate" method (CM = 8) sets these flags as
+ follows:
+
+ XFL = 2 - compressor used maximum compression,
+ slowest algorithm
+ XFL = 4 - compressor used fastest algorithm
+
+ OS (Operating System)
+ This identifies the type of file system on which compression
+ took place. This may be useful in determining end-of-line
+ convention for text files. The currently defined values are
+ as follows:
+
+ 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)
+ 1 - Amiga
+ 2 - VMS (or OpenVMS)
+ 3 - Unix
+ 4 - VM/CMS
+ 5 - Atari TOS
+ 6 - HPFS filesystem (OS/2, NT)
+ 7 - Macintosh
+ 8 - Z-System
+ 9 - CP/M
+ 10 - TOPS-20
+ 11 - NTFS filesystem (NT)
+ 12 - QDOS
+ 13 - Acorn RISCOS
+ 255 - unknown
+
+ ==> A file compressed using "gzip -1" on Unix-like systems can be :
+
+ 1F 8B 08 00 00 00 00 00 04 03
+ <deflate-compressed stream>
+ crc32 size32
+*/
+
+static const unsigned char gzip_hdr[] = { 0x1F, 0x8B, // ID1, ID2
+ 0x08, 0x00, // Deflate, flags (none)
+ 0x00, 0x00, 0x00, 0x00, // mtime: none
+ 0x04, 0x03 }; // fastest comp, OS=Unix
+
+static inline uint32_t crc32_char(uint32_t crc, uint8_t x)
+{
+#if defined(__ARM_FEATURE_CRC32)
+ crc = ~crc;
+# if defined(__ARM_ARCH_ISA_A64)
+ // 64 bit mode
+ __asm__ volatile("crc32b %w0,%w0,%w1" : "+r"(crc) : "r"(x));
+# else
+ // 32 bit mode (e.g. armv7 compiler building for armv8
+ __asm__ volatile("crc32b %0,%0,%1" : "+r"(crc) : "r"(x));
+# endif
+ crc = ~crc;
+#else
+ crc = crc32_fast[0][(crc ^ x) & 0xff] ^ (crc >> 8);
+#endif
+ return crc;
+}
+
+static inline uint32_t crc32_uint32(uint32_t data)
+{
+#if defined(__ARM_FEATURE_CRC32)
+# if defined(__ARM_ARCH_ISA_A64)
+ // 64 bit mode
+ __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(data) : "r"(~0UL));
+# else
+ // 32 bit mode (e.g. armv7 compiler building for armv8
+ __asm__ volatile("crc32w %0,%0,%1" : "+r"(data) : "r"(~0UL));
+# endif
+ data = ~data;
+#else
+ data = crc32_fast[3][(data >> 0) & 0xff] ^
+ crc32_fast[2][(data >> 8) & 0xff] ^
+ crc32_fast[1][(data >> 16) & 0xff] ^
+ crc32_fast[0][(data >> 24) & 0xff];
+#endif
+ return data;
+}
+
+/* Modified version originally from RFC1952, working with non-inverting CRCs */
+uint32_t slz_crc32_by1(uint32_t crc, const unsigned char *buf, int len)
+{
+ int n;
+
+ for (n = 0; n < len; n++)
+ crc = crc32_char(crc, buf[n]);
+ return crc;
+}
+
+/* This version computes the crc32 of <buf> over <len> bytes, doing most of it
+ * in 32-bit chunks.
+ */
+uint32_t slz_crc32_by4(uint32_t crc, const unsigned char *buf, int len)
+{
+ const unsigned char *end = buf + len;
+
+ while (buf <= end - 16) {
+#ifdef UNALIGNED_LE_OK
+#if defined(__ARM_FEATURE_CRC32)
+ crc = ~crc;
+# if defined(__ARM_ARCH_ISA_A64)
+ // 64 bit mode
+ __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf)));
+ __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 4)));
+ __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 8)));
+ __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 12)));
+# else
+ // 32 bit mode (e.g. armv7 compiler building for armv8
+ __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf)));
+ __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 4)));
+ __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 8)));
+ __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 12)));
+# endif
+ crc = ~crc;
+#else
+ crc ^= *(uint32_t *)buf;
+ crc = crc32_uint32(crc);
+
+ crc ^= *(uint32_t *)(buf + 4);
+ crc = crc32_uint32(crc);
+
+ crc ^= *(uint32_t *)(buf + 8);
+ crc = crc32_uint32(crc);
+
+ crc ^= *(uint32_t *)(buf + 12);
+ crc = crc32_uint32(crc);
+#endif
+#else
+ crc = crc32_fast[3][(buf[0] ^ (crc >> 0)) & 0xff] ^
+ crc32_fast[2][(buf[1] ^ (crc >> 8)) & 0xff] ^
+ crc32_fast[1][(buf[2] ^ (crc >> 16)) & 0xff] ^
+ crc32_fast[0][(buf[3] ^ (crc >> 24)) & 0xff];
+
+ crc = crc32_fast[3][(buf[4] ^ (crc >> 0)) & 0xff] ^
+ crc32_fast[2][(buf[5] ^ (crc >> 8)) & 0xff] ^
+ crc32_fast[1][(buf[6] ^ (crc >> 16)) & 0xff] ^
+ crc32_fast[0][(buf[7] ^ (crc >> 24)) & 0xff];
+
+ crc = crc32_fast[3][(buf[8] ^ (crc >> 0)) & 0xff] ^
+ crc32_fast[2][(buf[9] ^ (crc >> 8)) & 0xff] ^
+ crc32_fast[1][(buf[10] ^ (crc >> 16)) & 0xff] ^
+ crc32_fast[0][(buf[11] ^ (crc >> 24)) & 0xff];
+
+ crc = crc32_fast[3][(buf[12] ^ (crc >> 0)) & 0xff] ^
+ crc32_fast[2][(buf[13] ^ (crc >> 8)) & 0xff] ^
+ crc32_fast[1][(buf[14] ^ (crc >> 16)) & 0xff] ^
+ crc32_fast[0][(buf[15] ^ (crc >> 24)) & 0xff];
+#endif
+ buf += 16;
+ }
+
+ while (buf <= end - 4) {
+#ifdef UNALIGNED_LE_OK
+ crc ^= *(uint32_t *)buf;
+ crc = crc32_uint32(crc);
+#else
+ crc = crc32_fast[3][(buf[0] ^ (crc >> 0)) & 0xff] ^
+ crc32_fast[2][(buf[1] ^ (crc >> 8)) & 0xff] ^
+ crc32_fast[1][(buf[2] ^ (crc >> 16)) & 0xff] ^
+ crc32_fast[0][(buf[3] ^ (crc >> 24)) & 0xff];
+#endif
+ buf += 4;
+ }
+
+ while (buf < end)
+ crc = crc32_char(crc, *buf++);
+ return crc;
+}
+
+/* uses the most suitable crc32 function to update crc on <buf, len> */
+static inline uint32_t update_crc(uint32_t crc, const void *buf, int len)
+{
+ return slz_crc32_by4(crc, buf, len);
+}
+
+/* Sends the gzip header for stream <strm> into buffer <buf>. When it's done,
+ * the stream state is updated to SLZ_ST_EOB. It returns the number of bytes
+ * emitted which is always 10. The caller is responsible for ensuring there's
+ * always enough room in the buffer.
+ */
+int slz_rfc1952_send_header(struct slz_stream *strm, unsigned char *buf)
+{
+ memcpy(buf, gzip_hdr, sizeof(gzip_hdr));
+ strm->state = SLZ_ST_EOB;
+ return sizeof(gzip_hdr);
+}
+
+/* Encodes the block according to rfc1952. This means that the CRC of the input
+ * block is computed according to the CRC32 algorithm. If the header was never
+ * sent, it may be sent first. The number of output bytes is returned.
+ */
+long slz_rfc1952_encode(struct slz_stream *strm, unsigned char *out, const unsigned char *in, long ilen, int more)
+{
+ long ret = 0;
+
+ if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
+ ret += slz_rfc1952_send_header(strm, out);
+
+ strm->crc32 = update_crc(strm->crc32, in, ilen);
+ ret += slz_rfc1951_encode(strm, out + ret, in, ilen, more);
+ return ret;
+}
+
+/* Initializes stream <strm> for use with the gzip format (rfc1952). The
+ * compression level passed in <level> is set. This value can only be 0 (no
+ * compression) or 1 (compression) and other values will lead to unpredictable
+ * behaviour. The function always returns 0.
+ */
+int slz_rfc1952_init(struct slz_stream *strm, int level)
+{
+ strm->state = SLZ_ST_INIT;
+ strm->level = level;
+ strm->format = SLZ_FMT_GZIP;
+ strm->crc32 = 0;
+ strm->ilen = 0;
+ strm->qbits = 0;
+ strm->queue = 0;
+ return 0;
+}
+
+/* Flushes pending bits and sends the gzip trailer for stream <strm> into
+ * buffer <buf>. When it's done, the stream state is updated to SLZ_ST_END. It
+ * returns the number of bytes emitted. The trailer consists in flushing the
+ * possibly pending bits from the queue (up to 24 bits), rounding to the next
+ * byte, then 4 bytes for the CRC and another 4 bytes for the input length.
+ * That may about to 4+4+4 = 12 bytes, that the caller must ensure are
+ * available before calling the function. Note that if the initial header was
+ * never sent, it will be sent first as well (10 extra bytes).
+ */
+int slz_rfc1952_finish(struct slz_stream *strm, unsigned char *buf)
+{
+ strm->outbuf = buf;
+
+ if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
+ strm->outbuf += slz_rfc1952_send_header(strm, strm->outbuf);
+
+ slz_rfc1951_finish(strm, strm->outbuf);
+ copy_32b(strm, strm->crc32);
+ copy_32b(strm, strm->ilen);
+ strm->state = SLZ_ST_END;
+
+ return strm->outbuf - buf;
+}
+
+
+/* RFC1950-specific stuff. This is for the Zlib stream format.
+ * From RFC1950 (zlib) :
+ *
+
+ 2.2. Data format
+
+ A zlib stream has the following structure:
+
+ 0 1
+ +---+---+
+ |CMF|FLG| (more-->)
+ +---+---+
+
+
+ (if FLG.FDICT set)
+
+ 0 1 2 3
+ +---+---+---+---+
+ | DICTID | (more-->)
+ +---+---+---+---+
+
+ +=====================+---+---+---+---+
+ |...compressed data...| ADLER32 |
+ +=====================+---+---+---+---+
+
+ Any data which may appear after ADLER32 are not part of the zlib
+ stream.
+
+ CMF (Compression Method and flags)
+ This byte is divided into a 4-bit compression method and a 4-
+ bit information field depending on the compression method.
+
+ bits 0 to 3 CM Compression method
+ bits 4 to 7 CINFO Compression info
+
+ CM (Compression method)
+ This identifies the compression method used in the file. CM = 8
+ denotes the "deflate" compression method with a window size up
+ to 32K. This is the method used by gzip and PNG (see
+ references [1] and [2] in Chapter 3, below, for the reference
+ documents). CM = 15 is reserved. It might be used in a future
+ version of this specification to indicate the presence of an
+ extra field before the compressed data.
+
+ CINFO (Compression info)
+ For CM = 8, CINFO is the base-2 logarithm of the LZ77 window
+ size, minus eight (CINFO=7 indicates a 32K window size). Values
+ of CINFO above 7 are not allowed in this version of the
+ specification. CINFO is not defined in this specification for
+ CM not equal to 8.
+
+ FLG (FLaGs)
+ This flag byte is divided as follows:
+
+ bits 0 to 4 FCHECK (check bits for CMF and FLG)
+ bit 5 FDICT (preset dictionary)
+ bits 6 to 7 FLEVEL (compression level)
+
+ The FCHECK value must be such that CMF and FLG, when viewed as
+ a 16-bit unsigned integer stored in MSB order (CMF*256 + FLG),
+ is a multiple of 31.
+
+
+ FDICT (Preset dictionary)
+ If FDICT is set, a DICT dictionary identifier is present
+ immediately after the FLG byte. The dictionary is a sequence of
+ bytes which are initially fed to the compressor without
+ producing any compressed output. DICT is the Adler-32 checksum
+ of this sequence of bytes (see the definition of ADLER32
+ below). The decompressor can use this identifier to determine
+ which dictionary has been used by the compressor.
+
+ FLEVEL (Compression level)
+ These flags are available for use by specific compression
+ methods. The "deflate" method (CM = 8) sets these flags as
+ follows:
+
+ 0 - compressor used fastest algorithm
+ 1 - compressor used fast algorithm
+ 2 - compressor used default algorithm
+ 3 - compressor used maximum compression, slowest algorithm
+
+ The information in FLEVEL is not needed for decompression; it
+ is there to indicate if recompression might be worthwhile.
+
+ compressed data
+ For compression method 8, the compressed data is stored in the
+ deflate compressed data format as described in the document
+ "DEFLATE Compressed Data Format Specification" by L. Peter
+ Deutsch. (See reference [3] in Chapter 3, below)
+
+ Other compressed data formats are not specified in this version
+ of the zlib specification.
+
+ ADLER32 (Adler-32 checksum)
+ This contains a checksum value of the uncompressed data
+ (excluding any dictionary data) computed according to Adler-32
+ algorithm. This algorithm is a 32-bit extension and improvement
+ of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073
+ standard. See references [4] and [5] in Chapter 3, below)
+
+ Adler-32 is composed of two sums accumulated per byte: s1 is
+ the sum of all bytes, s2 is the sum of all s1 values. Both sums
+ are done modulo 65521. s1 is initialized to 1, s2 to zero. The
+ Adler-32 checksum is stored as s2*65536 + s1 in most-
+ significant-byte first (network) order.
+
+ ==> The stream can start with only 2 bytes :
+ - CM = 0x78 : CMINFO=7 (32kB window), CM=8 (deflate)
+ - FLG = 0x01 : FLEVEL = 0 (fastest), FDICT=0 (no dict), FCHECK=1 so
+ that 0x7801 is a multiple of 31 (30721 = 991 * 31).
+
+ ==> and it ends with only 4 bytes, the Adler-32 checksum in big-endian format.
+
+ */
+
+static const unsigned char zlib_hdr[] = { 0x78, 0x01 }; // 32k win, deflate, chk=1
+
+
+/* Original version from RFC1950, verified and works OK */
+uint32_t slz_adler32_by1(uint32_t crc, const unsigned char *buf, int len)
+{
+ uint32_t s1 = crc & 0xffff;
+ uint32_t s2 = (crc >> 16) & 0xffff;
+ int n;
+
+ for (n = 0; n < len; n++) {
+ s1 = (s1 + buf[n]) % 65521;
+ s2 = (s2 + s1) % 65521;
+ }
+ return (s2 << 16) + s1;
+}
+
+/* Computes the adler32 sum on <buf> for <len> bytes. It avoids the expensive
+ * modulus by retrofitting the number of bytes missed between 65521 and 65536
+ * which is easy to count : For every sum above 65536, the modulus is offset
+ * by (65536-65521) = 15. So for any value, we can count the accumulated extra
+ * values by dividing the sum by 65536 and multiplying this value by
+ * (65536-65521). That's easier with a drawing with boxes and marbles. It gives
+ * this :
+ * x % 65521 = (x % 65536) + (x / 65536) * (65536 - 65521)
+ * = (x & 0xffff) + (x >> 16) * 15.
+ */
+uint32_t slz_adler32_block(uint32_t crc, const unsigned char *buf, long len)
+{
+ long s1 = crc & 0xffff;
+ long s2 = (crc >> 16);
+ long blk;
+ long n;
+
+ do {
+ blk = len;
+ /* ensure we never overflow s2 (limit is about 2^((32-8)/2) */
+ if (blk > (1U << 12))
+ blk = 1U << 12;
+ len -= blk;
+
+ for (n = 0; n < blk; n++) {
+ s1 = (s1 + buf[n]);
+ s2 = (s2 + s1);
+ }
+
+ /* Largest value here is 2^12 * 255 = 1044480 < 2^20. We can
+ * still overflow once, but not twice because the right hand
+ * size is 225 max, so the total is 65761. However we also
+ * have to take care of the values between 65521 and 65536.
+ */
+ s1 = (s1 & 0xffff) + 15 * (s1 >> 16);
+ if (s1 >= 65521)
+ s1 -= 65521;
+
+ /* For s2, the largest value is estimated to 2^32-1 for
+ * simplicity, so the right hand side is about 15*65535
+ * = 983025. We can overflow twice at most.
+ */
+ s2 = (s2 & 0xffff) + 15 * (s2 >> 16);
+ s2 = (s2 & 0xffff) + 15 * (s2 >> 16);
+ if (s2 >= 65521)
+ s2 -= 65521;
+
+ buf += blk;
+ } while (len);
+ return (s2 << 16) + s1;
+}
+
+/* Sends the zlib header for stream <strm> into buffer <buf>. When it's done,
+ * the stream state is updated to SLZ_ST_EOB. It returns the number of bytes
+ * emitted which is always 2. The caller is responsible for ensuring there's
+ * always enough room in the buffer.
+ */
+int slz_rfc1950_send_header(struct slz_stream *strm, unsigned char *buf)
+{
+ memcpy(buf, zlib_hdr, sizeof(zlib_hdr));
+ strm->state = SLZ_ST_EOB;
+ return sizeof(zlib_hdr);
+}
+
+/* Encodes the block according to rfc1950. This means that the CRC of the input
+ * block is computed according to the ADLER32 algorithm. If the header was never
+ * sent, it may be sent first. The number of output bytes is returned.
+ */
+long slz_rfc1950_encode(struct slz_stream *strm, unsigned char *out, const unsigned char *in, long ilen, int more)
+{
+ long ret = 0;
+
+ if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
+ ret += slz_rfc1950_send_header(strm, out);
+
+ strm->crc32 = slz_adler32_block(strm->crc32, in, ilen);
+ ret += slz_rfc1951_encode(strm, out + ret, in, ilen, more);
+ return ret;
+}
+
+/* Initializes stream <strm> for use with the zlib format (rfc1952). The
+ * compression level passed in <level> is set. This value can only be 0 (no
+ * compression) or 1 (compression) and other values will lead to unpredictable
+ * behaviour. The function always returns 0.
+ */
+int slz_rfc1950_init(struct slz_stream *strm, int level)
+{
+ strm->state = SLZ_ST_INIT;
+ strm->level = level;
+ strm->format = SLZ_FMT_ZLIB;
+ strm->crc32 = 1; // rfc1950/zlib starts with initial crc=1
+ strm->ilen = 0;
+ strm->qbits = 0;
+ strm->queue = 0;
+ return 0;
+}
+
+/* Flushes pending bits and sends the gzip trailer for stream <strm> into
+ * buffer <buf>. When it's done, the stream state is updated to SLZ_ST_END. It
+ * returns the number of bytes emitted. The trailer consists in flushing the
+ * possibly pending bits from the queue (up to 24 bits), rounding to the next
+ * byte, then 4 bytes for the CRC. That may about to 4+4 = 8 bytes, that the
+ * caller must ensure are available before calling the function. Note that if
+ * the initial header was never sent, it will be sent first as well (2 extra
+ * bytes).
+ */
+int slz_rfc1950_finish(struct slz_stream *strm, unsigned char *buf)
+{
+ strm->outbuf = buf;
+
+ if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
+ strm->outbuf += slz_rfc1952_send_header(strm, strm->outbuf);
+
+ slz_rfc1951_finish(strm, strm->outbuf);
+ copy_8b(strm, (strm->crc32 >> 24) & 0xff);
+ copy_8b(strm, (strm->crc32 >> 16) & 0xff);
+ copy_8b(strm, (strm->crc32 >> 8) & 0xff);
+ copy_8b(strm, (strm->crc32 >> 0) & 0xff);
+ strm->state = SLZ_ST_END;
+ return strm->outbuf - buf;
+}
+
+__attribute__((constructor))
+static void __slz_initialize(void)
+{
+#if !defined(__ARM_FEATURE_CRC32)
+ __slz_make_crc_table();
+#endif
+ __slz_prepare_dist_table();
+}
diff --git a/src/sock.c b/src/sock.c
new file mode 100644
index 0000000..602e9c5
--- /dev/null
+++ b/src/sock.c
@@ -0,0 +1,1000 @@
+/*
+ * Generic code for native (BSD-compatible) sockets
+ *
+ * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <net/if.h>
+
+#include <haproxy/api.h>
+#include <haproxy/activity.h>
+#include <haproxy/connection.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/namespace.h>
+#include <haproxy/proto_sockpair.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_inet.h>
+#include <haproxy/tools.h>
+
+#define SOCK_XFER_OPT_FOREIGN 0x000000001
+#define SOCK_XFER_OPT_V6ONLY 0x000000002
+#define SOCK_XFER_OPT_DGRAM 0x000000004
+
+/* the list of remaining sockets transferred from an older process */
+struct xfer_sock_list {
+ int fd;
+ int options; /* socket options as SOCK_XFER_OPT_* */
+ char *iface;
+ char *namespace;
+ int if_namelen;
+ int ns_namelen;
+ struct xfer_sock_list *prev;
+ struct xfer_sock_list *next;
+ struct sockaddr_storage addr;
+};
+
+static struct xfer_sock_list *xfer_sock_list;
+
+
+/* Accept an incoming connection from listener <l>, and return it, as well as
+ * a CO_AC_* status code into <status> if not null. Null is returned on error.
+ * <l> must be a valid listener with a valid frontend.
+ */
+struct connection *sock_accept_conn(struct listener *l, int *status)
+{
+#ifdef USE_ACCEPT4
+ static int accept4_broken;
+#endif
+ struct proxy *p = l->bind_conf->frontend;
+ struct connection *conn = NULL;
+ struct sockaddr_storage *addr = NULL;
+ socklen_t laddr;
+ int ret;
+ int cfd;
+
+ if (!sockaddr_alloc(&addr, NULL, 0))
+ goto fail_addr;
+
+ /* accept() will mark all accepted FDs O_NONBLOCK and the ones accepted
+ * in the master process as FD_CLOEXEC. It's not done for workers
+ * because 1) workers are not supposed to execute anything so there's
+ * no reason for uselessly slowing down everything, and 2) that would
+ * prevent us from implementing fd passing in the future.
+ */
+#ifdef USE_ACCEPT4
+ laddr = sizeof(*conn->src);
+
+ /* only call accept4() if it's known to be safe, otherwise fallback to
+ * the legacy accept() + fcntl().
+ */
+ if (unlikely(accept4_broken) ||
+ (((cfd = accept4(l->rx.fd, (struct sockaddr*)addr, &laddr,
+ SOCK_NONBLOCK | (master ? SOCK_CLOEXEC : 0))) == -1) &&
+ (errno == ENOSYS || errno == EINVAL || errno == EBADF) &&
+ ((accept4_broken = 1))))
+#endif
+ {
+ laddr = sizeof(*conn->src);
+ if ((cfd = accept(l->rx.fd, (struct sockaddr*)addr, &laddr)) != -1) {
+ fd_set_nonblock(cfd);
+ if (master)
+ fd_set_cloexec(cfd);
+ }
+ }
+
+ if (likely(cfd != -1)) {
+ if (unlikely(cfd >= global.maxsock)) {
+ send_log(p, LOG_EMERG,
+ "Proxy %s reached the configured maximum connection limit. Please check the global 'maxconn' value.\n",
+ p->id);
+ goto fail_conn;
+ }
+
+ /* Perfect, the connection was accepted */
+ conn = conn_new(&l->obj_type);
+ if (!conn)
+ goto fail_conn;
+
+ conn->src = addr;
+ conn->handle.fd = cfd;
+ ret = CO_AC_DONE;
+ goto done;
+ }
+
+ /* error conditions below */
+ sockaddr_free(&addr);
+
+ switch (errno) {
+#if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
+ case EWOULDBLOCK:
+#endif
+ case EAGAIN:
+ ret = CO_AC_DONE; /* nothing more to accept */
+ if (fdtab[l->rx.fd].state & (FD_POLL_HUP|FD_POLL_ERR)) {
+ /* the listening socket might have been disabled in a shared
+ * process and we're a collateral victim. We'll just pause for
+ * a while in case it comes back. In the mean time, we need to
+ * clear this sticky flag.
+ */
+ _HA_ATOMIC_AND(&fdtab[l->rx.fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
+ ret = CO_AC_PAUSE;
+ }
+ fd_cant_recv(l->rx.fd);
+ break;
+
+ case EINVAL:
+ /* might be trying to accept on a shut fd (eg: soft stop) */
+ ret = CO_AC_PAUSE;
+ break;
+
+ case EINTR:
+ case ECONNABORTED:
+ ret = CO_AC_RETRY;
+ break;
+
+ case ENFILE:
+ if (p)
+ send_log(p, LOG_EMERG,
+ "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
+ p->id, global.maxsock);
+ ret = CO_AC_PAUSE;
+ break;
+
+ case EMFILE:
+ if (p)
+ send_log(p, LOG_EMERG,
+ "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
+ p->id, global.maxsock);
+ ret = CO_AC_PAUSE;
+ break;
+
+ case ENOBUFS:
+ case ENOMEM:
+ if (p)
+ send_log(p, LOG_EMERG,
+ "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
+ p->id, global.maxsock);
+ ret = CO_AC_PAUSE;
+ break;
+
+ default:
+ /* unexpected result, let's give up and let other tasks run */
+ ret = CO_AC_YIELD;
+ }
+ done:
+ if (status)
+ *status = ret;
+ return conn;
+
+ fail_conn:
+ sockaddr_free(&addr);
+ /* The accept call already succeeded by the time we try to allocate the connection,
+ * we need to close it in case of failure. */
+ close(cfd);
+ fail_addr:
+ ret = CO_AC_PAUSE;
+ goto done;
+}
+
+/* Create a socket to connect to the server in conn->dst (which MUST be valid),
+ * using the configured namespace if needed, or the one passed by the proxy
+ * protocol if required to do so. It ultimately calls socket() or socketat()
+ * and returns the FD or error code.
+ */
+int sock_create_server_socket(struct connection *conn)
+{
+ const struct netns_entry *ns = NULL;
+
+#ifdef USE_NS
+ if (objt_server(conn->target)) {
+ if (__objt_server(conn->target)->flags & SRV_F_USE_NS_FROM_PP)
+ ns = conn->proxy_netns;
+ else
+ ns = __objt_server(conn->target)->netns;
+ }
+#endif
+ return my_socketat(ns, conn->dst->ss_family, SOCK_STREAM, 0);
+}
+
+/* Enables receiving on receiver <rx> once already bound. */
+void sock_enable(struct receiver *rx)
+{
+ if (rx->flags & RX_F_BOUND)
+ fd_want_recv_safe(rx->fd);
+}
+
+/* Disables receiving on receiver <rx> once already bound. */
+void sock_disable(struct receiver *rx)
+{
+ if (rx->flags & RX_F_BOUND)
+ fd_stop_recv(rx->fd);
+}
+
+/* stops, unbinds and possibly closes the FD associated with receiver rx */
+void sock_unbind(struct receiver *rx)
+{
+ /* There are a number of situations where we prefer to keep the FD and
+ * not to close it (unless we're stopping, of course):
+ * - worker process unbinding from a worker's FD with socket transfer enabled => keep
+ * - master process unbinding from a master's inherited FD => keep
+ * - master process unbinding from a master's FD => close
+ * - master process unbinding from a worker's inherited FD => keep
+ * - master process unbinding from a worker's FD => close
+ * - worker process unbinding from a master's FD => close
+ * - worker process unbinding from a worker's FD => close
+ */
+ if (rx->flags & RX_F_BOUND)
+ rx->proto->rx_disable(rx);
+
+ if (!stopping && !master &&
+ !(rx->flags & RX_F_MWORKER) &&
+ (global.tune.options & GTUNE_SOCKET_TRANSFER))
+ return;
+
+ if (!stopping && master &&
+ rx->flags & RX_F_INHERITED)
+ return;
+
+ rx->flags &= ~RX_F_BOUND;
+ if (rx->fd != -1)
+ fd_delete(rx->fd);
+ rx->fd = -1;
+}
+
+/*
+ * Retrieves the source address for the socket <fd>, with <dir> indicating
+ * if we're a listener (=0) or an initiator (!=0). It returns 0 in case of
+ * success, -1 in case of error. The socket's source address is stored in
+ * <sa> for <salen> bytes.
+ */
+int sock_get_src(int fd, struct sockaddr *sa, socklen_t salen, int dir)
+{
+ if (dir)
+ return getsockname(fd, sa, &salen);
+ else
+ return getpeername(fd, sa, &salen);
+}
+
+/*
+ * Retrieves the original destination address for the socket <fd>, with <dir>
+ * indicating if we're a listener (=0) or an initiator (!=0). It returns 0 in
+ * case of success, -1 in case of error. The socket's source address is stored
+ * in <sa> for <salen> bytes.
+ */
+int sock_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
+{
+ if (dir)
+ return getpeername(fd, sa, &salen);
+ else
+ return getsockname(fd, sa, &salen);
+}
+
+/* Try to retrieve exported sockets from worker at CLI <unixsocket>. These
+ * ones will be placed into the xfer_sock_list for later use by function
+ * sock_find_compatible_fd(). Returns 0 on success, -1 on failure.
+ */
+int sock_get_old_sockets(const char *unixsocket)
+{
+ char *cmsgbuf = NULL, *tmpbuf = NULL;
+ int *tmpfd = NULL;
+ struct sockaddr_un addr;
+ struct cmsghdr *cmsg;
+ struct msghdr msghdr;
+ struct iovec iov;
+ struct xfer_sock_list *xfer_sock = NULL;
+ struct timeval tv = { .tv_sec = 1, .tv_usec = 0 };
+ int sock = -1;
+ int ret = -1;
+ int ret2 = -1;
+ int fd_nb;
+ int got_fd = 0;
+ int cur_fd = 0;
+ size_t maxoff = 0, curoff = 0;
+
+ if (strncmp("sockpair@", unixsocket, strlen("sockpair@")) == 0) {
+ /* sockpair for master-worker usage */
+ int sv[2];
+ int dst_fd;
+
+ dst_fd = strtoll(unixsocket + strlen("sockpair@"), NULL, 0);
+
+ if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
+ ha_warning("socketpair(): Cannot create socketpair. Giving up.\n");
+ }
+
+ if (send_fd_uxst(dst_fd, sv[0]) == -1) {
+ ha_alert("socketpair: cannot transfer socket.\n");
+ close(sv[0]);
+ close(sv[1]);
+ goto out;
+ }
+
+ close(sv[0]); /* we don't need this side anymore */
+ sock = sv[1];
+
+ } else {
+ /* Unix socket */
+
+ sock = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (sock < 0) {
+ ha_warning("Failed to connect to the old process socket '%s'\n", unixsocket);
+ goto out;
+ }
+
+ strncpy(addr.sun_path, unixsocket, sizeof(addr.sun_path) - 1);
+ addr.sun_path[sizeof(addr.sun_path) - 1] = 0;
+ addr.sun_family = PF_UNIX;
+
+ ret = connect(sock, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret < 0) {
+ ha_warning("Failed to connect to the old process socket '%s'\n", unixsocket);
+ goto out;
+ }
+
+ }
+ memset(&msghdr, 0, sizeof(msghdr));
+ cmsgbuf = malloc(CMSG_SPACE(sizeof(int)) * MAX_SEND_FD);
+ if (!cmsgbuf) {
+ ha_warning("Failed to allocate memory to send sockets\n");
+ goto out;
+ }
+
+ setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (void *)&tv, sizeof(tv));
+ iov.iov_base = &fd_nb;
+ iov.iov_len = sizeof(fd_nb);
+ msghdr.msg_iov = &iov;
+ msghdr.msg_iovlen = 1;
+
+ if (send(sock, "_getsocks\n", strlen("_getsocks\n"), 0) != strlen("_getsocks\n")) {
+ ha_warning("Failed to get the number of sockets to be transferred !\n");
+ goto out;
+ }
+
+ /* First, get the number of file descriptors to be received */
+ if (recvmsg(sock, &msghdr, MSG_WAITALL) != sizeof(fd_nb)) {
+ ha_warning("Failed to get the number of sockets to be transferred !\n");
+ goto out;
+ }
+
+ if (fd_nb == 0) {
+ ret2 = 0;
+ goto out;
+ }
+
+ tmpbuf = malloc(fd_nb * (1 + MAXPATHLEN + 1 + IFNAMSIZ + sizeof(int)));
+ if (tmpbuf == NULL) {
+ ha_warning("Failed to allocate memory while receiving sockets\n");
+ goto out;
+ }
+
+ tmpfd = malloc(fd_nb * sizeof(int));
+ if (tmpfd == NULL) {
+ ha_warning("Failed to allocate memory while receiving sockets\n");
+ goto out;
+ }
+
+ msghdr.msg_control = cmsgbuf;
+ msghdr.msg_controllen = CMSG_SPACE(sizeof(int)) * MAX_SEND_FD;
+ iov.iov_len = MAX_SEND_FD * (1 + MAXPATHLEN + 1 + IFNAMSIZ + sizeof(int));
+
+ do {
+ int ret3;
+
+ iov.iov_base = tmpbuf + curoff;
+
+ ret = recvmsg(sock, &msghdr, 0);
+
+ if (ret == -1 && errno == EINTR)
+ continue;
+
+ if (ret <= 0)
+ break;
+
+ /* Send an ack to let the sender know we got the sockets
+ * and it can send some more
+ */
+ do {
+ ret3 = send(sock, &got_fd, sizeof(got_fd), 0);
+ } while (ret3 == -1 && errno == EINTR);
+
+ for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg != NULL; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+ size_t totlen = cmsg->cmsg_len - CMSG_LEN(0);
+
+ if (totlen / sizeof(int) + got_fd > fd_nb) {
+ ha_warning("Got to many sockets !\n");
+ goto out;
+ }
+
+ /*
+ * Be paranoid and use memcpy() to avoid any
+ * potential alignment issue.
+ */
+ memcpy(&tmpfd[got_fd], CMSG_DATA(cmsg), totlen);
+ got_fd += totlen / sizeof(int);
+ }
+ }
+ curoff += ret;
+ } while (got_fd < fd_nb);
+
+ if (got_fd != fd_nb) {
+ ha_warning("We didn't get the expected number of sockets (expecting %d got %d)\n",
+ fd_nb, got_fd);
+ goto out;
+ }
+
+ maxoff = curoff;
+ curoff = 0;
+
+ for (cur_fd = 0; cur_fd < got_fd; cur_fd++) {
+ int fd = tmpfd[cur_fd];
+ socklen_t socklen;
+ int val;
+ int len;
+
+ xfer_sock = calloc(1, sizeof(*xfer_sock));
+ if (!xfer_sock) {
+ ha_warning("Failed to allocate memory in get_old_sockets() !\n");
+ break;
+ }
+ xfer_sock->fd = -1;
+
+ socklen = sizeof(xfer_sock->addr);
+ if (getsockname(fd, (struct sockaddr *)&xfer_sock->addr, &socklen) != 0) {
+ ha_warning("Failed to get socket address\n");
+ ha_free(&xfer_sock);
+ continue;
+ }
+
+ if (curoff >= maxoff) {
+ ha_warning("Inconsistency while transferring sockets\n");
+ goto out;
+ }
+
+ len = tmpbuf[curoff++];
+ if (len > 0) {
+ /* We have a namespace */
+ if (curoff + len > maxoff) {
+ ha_warning("Inconsistency while transferring sockets\n");
+ goto out;
+ }
+ xfer_sock->namespace = malloc(len + 1);
+ if (!xfer_sock->namespace) {
+ ha_warning("Failed to allocate memory while transferring sockets\n");
+ goto out;
+ }
+ memcpy(xfer_sock->namespace, &tmpbuf[curoff], len);
+ xfer_sock->namespace[len] = 0;
+ xfer_sock->ns_namelen = len;
+ curoff += len;
+ }
+
+ if (curoff >= maxoff) {
+ ha_warning("Inconsistency while transferring sockets\n");
+ goto out;
+ }
+
+ len = tmpbuf[curoff++];
+ if (len > 0) {
+ /* We have an interface */
+ if (curoff + len > maxoff) {
+ ha_warning("Inconsistency while transferring sockets\n");
+ goto out;
+ }
+ xfer_sock->iface = malloc(len + 1);
+ if (!xfer_sock->iface) {
+ ha_warning("Failed to allocate memory while transferring sockets\n");
+ goto out;
+ }
+ memcpy(xfer_sock->iface, &tmpbuf[curoff], len);
+ xfer_sock->iface[len] = 0;
+ xfer_sock->if_namelen = len;
+ curoff += len;
+ }
+
+ if (curoff + sizeof(int) > maxoff) {
+ ha_warning("Inconsistency while transferring sockets\n");
+ goto out;
+ }
+
+ /* we used to have 32 bits of listener options here but we don't
+ * use them anymore.
+ */
+ curoff += sizeof(int);
+
+ /* determine the foreign status directly from the socket itself */
+ if (sock_inet_is_foreign(fd, xfer_sock->addr.ss_family))
+ xfer_sock->options |= SOCK_XFER_OPT_FOREIGN;
+
+ socklen = sizeof(val);
+ if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &val, &socklen) == 0 && val == SOCK_DGRAM)
+ xfer_sock->options |= SOCK_XFER_OPT_DGRAM;
+
+#if defined(IPV6_V6ONLY)
+ /* keep only the v6only flag depending on what's currently
+ * active on the socket, and always drop the v4v6 one.
+ */
+ socklen = sizeof(val);
+ if (xfer_sock->addr.ss_family == AF_INET6 &&
+ getsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, &socklen) == 0 && val > 0)
+ xfer_sock->options |= SOCK_XFER_OPT_V6ONLY;
+#endif
+
+ xfer_sock->fd = fd;
+ if (xfer_sock_list)
+ xfer_sock_list->prev = xfer_sock;
+ xfer_sock->next = xfer_sock_list;
+ xfer_sock->prev = NULL;
+ xfer_sock_list = xfer_sock;
+ xfer_sock = NULL;
+ }
+
+ ret2 = 0;
+out:
+ /* If we failed midway make sure to close the remaining
+ * file descriptors
+ */
+ if (tmpfd != NULL && cur_fd < got_fd) {
+ for (; cur_fd < got_fd; cur_fd++) {
+ close(tmpfd[cur_fd]);
+ }
+ }
+
+ free(tmpbuf);
+ free(tmpfd);
+ free(cmsgbuf);
+
+ if (sock != -1)
+ close(sock);
+
+ if (xfer_sock) {
+ free(xfer_sock->namespace);
+ free(xfer_sock->iface);
+ if (xfer_sock->fd != -1)
+ close(xfer_sock->fd);
+ free(xfer_sock);
+ }
+ return (ret2);
+}
+
+/* When binding the receivers, check if a socket has been sent to us by the
+ * previous process that we could reuse, instead of creating a new one. Note
+ * that some address family-specific options are checked on the listener and
+ * on the socket. Typically for AF_INET and AF_INET6, we check for transparent
+ * mode, and for AF_INET6 we also check for "v4v6" or "v6only". The reused
+ * socket is automatically removed from the list so that it's not proposed
+ * anymore.
+ */
+int sock_find_compatible_fd(const struct receiver *rx)
+{
+ struct xfer_sock_list *xfer_sock = xfer_sock_list;
+ int options = 0;
+ int if_namelen = 0;
+ int ns_namelen = 0;
+ int ret = -1;
+
+ if (!rx->proto->fam->addrcmp)
+ return -1;
+
+ if (rx->proto->proto_type == PROTO_TYPE_DGRAM)
+ options |= SOCK_XFER_OPT_DGRAM;
+
+ if (rx->settings->options & RX_O_FOREIGN)
+ options |= SOCK_XFER_OPT_FOREIGN;
+
+ if (rx->addr.ss_family == AF_INET6) {
+ /* Prepare to match the v6only option against what we really want. Note
+ * that sadly the two options are not exclusive to each other and that
+ * v6only is stronger than v4v6.
+ */
+ if ((rx->settings->options & RX_O_V6ONLY) ||
+ (sock_inet6_v6only_default && !(rx->settings->options & RX_O_V4V6)))
+ options |= SOCK_XFER_OPT_V6ONLY;
+ }
+
+ if (rx->settings->interface)
+ if_namelen = strlen(rx->settings->interface);
+#ifdef USE_NS
+ if (rx->settings->netns)
+ ns_namelen = rx->settings->netns->name_len;
+#endif
+
+ while (xfer_sock) {
+ if ((options == xfer_sock->options) &&
+ (if_namelen == xfer_sock->if_namelen) &&
+ (ns_namelen == xfer_sock->ns_namelen) &&
+ (!if_namelen || strcmp(rx->settings->interface, xfer_sock->iface) == 0) &&
+#ifdef USE_NS
+ (!ns_namelen || strcmp(rx->settings->netns->node.key, xfer_sock->namespace) == 0) &&
+#endif
+ rx->proto->fam->addrcmp(&xfer_sock->addr, &rx->addr) == 0)
+ break;
+ xfer_sock = xfer_sock->next;
+ }
+
+ if (xfer_sock != NULL) {
+ ret = xfer_sock->fd;
+ if (xfer_sock == xfer_sock_list)
+ xfer_sock_list = xfer_sock->next;
+ if (xfer_sock->prev)
+ xfer_sock->prev->next = xfer_sock->next;
+ if (xfer_sock->next)
+ xfer_sock->next->prev = xfer_sock->prev;
+ free(xfer_sock->iface);
+ free(xfer_sock->namespace);
+ free(xfer_sock);
+ }
+ return ret;
+}
+
+/* After all protocols are bound, there may remain some old sockets that have
+ * been removed between the previous config and the new one. These ones must
+ * be dropped, otherwise they will remain open and may prevent a service from
+ * restarting.
+ */
+void sock_drop_unused_old_sockets()
+{
+ while (xfer_sock_list != NULL) {
+ struct xfer_sock_list *tmpxfer = xfer_sock_list->next;
+
+ close(xfer_sock_list->fd);
+ free(xfer_sock_list->iface);
+ free(xfer_sock_list->namespace);
+ free(xfer_sock_list);
+ xfer_sock_list = tmpxfer;
+ }
+}
+
+/* Tests if the receiver supports accepting connections. Returns positive on
+ * success, 0 if not possible, negative if the socket is non-recoverable. The
+ * rationale behind this is that inherited FDs may be broken and that shared
+ * FDs might have been paused by another process.
+ */
+int sock_accepting_conn(const struct receiver *rx)
+{
+ int opt_val = 0;
+ socklen_t opt_len = sizeof(opt_val);
+
+ if (getsockopt(rx->fd, SOL_SOCKET, SO_ACCEPTCONN, &opt_val, &opt_len) == -1)
+ return -1;
+
+ return opt_val;
+}
+
+/* This is the FD handler IO callback for stream sockets configured for
+ * accepting incoming connections. It's a pass-through to listener_accept()
+ * which will iterate over the listener protocol's accept_conn() function.
+ * The FD's owner must be a listener.
+ */
+void sock_accept_iocb(int fd)
+{
+ struct listener *l = fdtab[fd].owner;
+
+ if (!l)
+ return;
+
+ BUG_ON(!!master != !!(l->rx.flags & RX_F_MWORKER));
+ listener_accept(l);
+}
+
+/* This completes the initialization of connection <conn> by inserting its FD
+ * into the fdtab, associating it with the regular connection handler. It will
+ * be bound to the current thread only. This call cannot fail.
+ */
+void sock_conn_ctrl_init(struct connection *conn)
+{
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+ fd_insert(conn->handle.fd, conn, sock_conn_iocb, tid_bit);
+}
+
+/* This completes the release of connection <conn> by removing its FD from the
+ * fdtab and deleting it. The connection must not use the FD anymore past this
+ * point. The FD may be modified in the connection.
+ */
+void sock_conn_ctrl_close(struct connection *conn)
+{
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+ fd_delete(conn->handle.fd);
+ conn->handle.fd = DEAD_FD_MAGIC;
+}
+
+/* This is the callback which is set when a connection establishment is pending
+ * and we have nothing to send. It may update the FD polling status to indicate
+ * !READY. It returns 0 if it fails in a fatal way or needs to poll to go
+ * further, otherwise it returns non-zero and removes the CO_FL_WAIT_L4_CONN
+ * flag from the connection's flags. In case of error, it sets CO_FL_ERROR and
+ * leaves the error code in errno.
+ */
+int sock_conn_check(struct connection *conn)
+{
+ struct sockaddr_storage *addr;
+ int fd = conn->handle.fd;
+
+ if (conn->flags & CO_FL_ERROR)
+ return 0;
+
+ if (!conn_ctrl_ready(conn))
+ return 0;
+
+ if (!(conn->flags & CO_FL_WAIT_L4_CONN))
+ return 1; /* strange we were called while ready */
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_send_ready(fd) && !(fdtab[fd].state & (FD_POLL_ERR|FD_POLL_HUP)))
+ return 0;
+
+ /* Here we have 2 cases :
+ * - modern pollers, able to report ERR/HUP. If these ones return any
+ * of these flags then it's likely a failure, otherwise it possibly
+ * is a success (i.e. there may have been data received just before
+ * the error was reported).
+ * - select, which doesn't report these and with which it's always
+ * necessary either to try connect() again or to check for SO_ERROR.
+ * In order to simplify everything, we double-check using connect() as
+ * soon as we meet either of these delicate situations. Note that
+ * SO_ERROR would clear the error after reporting it!
+ */
+ if (cur_poller.flags & HAP_POLL_F_ERRHUP) {
+ /* modern poller, able to report ERR/HUP */
+ if ((fdtab[fd].state & (FD_POLL_IN|FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_IN)
+ goto done;
+ if ((fdtab[fd].state & (FD_POLL_OUT|FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_OUT)
+ goto done;
+ if (!(fdtab[fd].state & (FD_POLL_ERR|FD_POLL_HUP)))
+ goto wait;
+ /* error present, fall through common error check path */
+ }
+
+ /* Use connect() to check the state of the socket. This has the double
+ * advantage of *not* clearing the error (so that health checks can
+ * still use getsockopt(SO_ERROR)) and giving us the following info :
+ * - error
+ * - connecting (EALREADY, EINPROGRESS)
+ * - connected (EISCONN, 0)
+ */
+ addr = conn->dst;
+ if ((conn->flags & CO_FL_SOCKS4) && obj_type(conn->target) == OBJ_TYPE_SERVER)
+ addr = &objt_server(conn->target)->socks4_addr;
+
+ if (connect(fd, (const struct sockaddr *)addr, get_addr_len(addr)) == -1) {
+ if (errno == EALREADY || errno == EINPROGRESS)
+ goto wait;
+
+ if (errno && errno != EISCONN)
+ goto out_error;
+ }
+
+ done:
+ /* The FD is ready now, we'll mark the connection as complete and
+ * forward the event to the transport layer which will notify the
+ * data layer.
+ */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ fd_may_send(fd);
+ fd_cond_recv(fd);
+ errno = 0; // make health checks happy
+ return 1;
+
+ out_error:
+ /* Write error on the file descriptor. Report it to the connection
+ * and disable polling on this FD.
+ */
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ HA_ATOMIC_AND(&fdtab[fd].state, ~FD_LINGER_RISK);
+ fd_stop_both(fd);
+ return 0;
+
+ wait:
+ fd_cant_send(fd);
+ fd_want_send(fd);
+ return 0;
+}
+
+/* I/O callback for fd-based connections. It calls the read/write handlers
+ * provided by the connection's sock_ops, which must be valid.
+ */
+void sock_conn_iocb(int fd)
+{
+ struct connection *conn = fdtab[fd].owner;
+ unsigned int flags;
+ int need_wake = 0;
+
+ if (unlikely(!conn)) {
+ activity[tid].conn_dead++;
+ return;
+ }
+
+ flags = conn->flags & ~CO_FL_ERROR; /* ensure to call the wake handler upon error */
+
+ if (unlikely(conn->flags & CO_FL_WAIT_L4_CONN) &&
+ ((fd_send_ready(fd) && fd_send_active(fd)) ||
+ (fd_recv_ready(fd) && fd_recv_active(fd)))) {
+ /* Still waiting for a connection to establish and nothing was
+ * attempted yet to probe the connection. this will clear the
+ * CO_FL_WAIT_L4_CONN flag on success.
+ */
+ if (!sock_conn_check(conn))
+ goto leave;
+ need_wake = 1;
+ }
+
+ if (fd_send_ready(fd) && fd_send_active(fd)) {
+ /* force reporting of activity by clearing the previous flags :
+ * we'll have at least ERROR or CONNECTED at the end of an I/O,
+ * both of which will be detected below.
+ */
+ flags = 0;
+ if (conn->subs && conn->subs->events & SUB_RETRY_SEND) {
+ need_wake = 0; // wake will be called after this I/O
+ tasklet_wakeup(conn->subs->tasklet);
+ conn->subs->events &= ~SUB_RETRY_SEND;
+ if (!conn->subs->events)
+ conn->subs = NULL;
+ }
+ fd_stop_send(fd);
+ }
+
+ /* The data transfer starts here and stops on error and handshakes. Note
+ * that we must absolutely test conn->xprt at each step in case it suddenly
+ * changes due to a quick unexpected close().
+ */
+ if (fd_recv_ready(fd) && fd_recv_active(fd)) {
+ /* force reporting of activity by clearing the previous flags :
+ * we'll have at least ERROR or CONNECTED at the end of an I/O,
+ * both of which will be detected below.
+ */
+ flags = 0;
+ if (conn->subs && conn->subs->events & SUB_RETRY_RECV) {
+ need_wake = 0; // wake will be called after this I/O
+ tasklet_wakeup(conn->subs->tasklet);
+ conn->subs->events &= ~SUB_RETRY_RECV;
+ if (!conn->subs->events)
+ conn->subs = NULL;
+ }
+ fd_stop_recv(fd);
+ }
+
+ leave:
+ /* we may have to finish to install a mux or to wake it up based on
+ * what was just done above. It may kill the connection so we have to
+ * be prpared not to use it anymore.
+ */
+ if (conn_notify_mux(conn, flags, need_wake) < 0)
+ return;
+
+ /* commit polling changes in case of error.
+ * WT: it seems that the last case where this could still be relevant
+ * is if a mux wake function above report a connection error but does
+ * not stop polling. Shouldn't we enforce this into the mux instead of
+ * having to deal with this ?
+ */
+ if (unlikely(conn->flags & CO_FL_ERROR)) {
+ if (conn_ctrl_ready(conn))
+ fd_stop_both(fd);
+ }
+}
+
+/* Drains possibly pending incoming data on the file descriptor attached to the
+ * connection. This is used to know whether we need to disable lingering on
+ * close. Returns non-zero if it is safe to close without disabling lingering,
+ * otherwise zero.
+ */
+int sock_drain(struct connection *conn)
+{
+ int turns = 2;
+ int fd = conn->handle.fd;
+ int len;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (fdtab[fd].state & (FD_POLL_ERR|FD_POLL_HUP))
+ goto shut;
+
+ if (!(conn->flags & CO_FL_WANT_DRAIN) && !fd_recv_ready(fd))
+ return 0;
+
+ /* no drain function defined, use the generic one */
+
+ while (turns) {
+#ifdef MSG_TRUNC_CLEARS_INPUT
+ len = recv(fd, NULL, INT_MAX, MSG_DONTWAIT | MSG_NOSIGNAL | MSG_TRUNC);
+ if (len == -1 && errno == EFAULT)
+#endif
+ len = recv(fd, trash.area, trash.size, MSG_DONTWAIT | MSG_NOSIGNAL);
+
+ if (len == 0)
+ goto shut;
+
+ if (len < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ /* connection not closed yet */
+ fd_cant_recv(fd);
+ break;
+ }
+ if (errno == EINTR) /* oops, try again */
+ continue;
+ /* other errors indicate a dead connection, fine. */
+ goto shut;
+ }
+ /* OK we read some data, let's try again once */
+ turns--;
+ }
+
+ /* some data are still present, give up */
+ return 0;
+
+ shut:
+ /* we're certain the connection was shut down */
+ HA_ATOMIC_AND(&fdtab[fd].state, ~FD_LINGER_RISK);
+ return 1;
+}
+
+/* Checks the connection's FD for readiness of events <event_type>, which may
+ * only be a combination of SUB_RETRY_RECV and SUB_RETRY_SEND. Those which are
+ * ready are returned. The ones that are not ready are enabled. The caller is
+ * expected to do what is needed to handle ready events and to deal with
+ * subsequent wakeups caused by the requested events' readiness.
+ */
+int sock_check_events(struct connection *conn, int event_type)
+{
+ int ret = 0;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (event_type & SUB_RETRY_RECV) {
+ if (fd_recv_ready(conn->handle.fd))
+ ret |= SUB_RETRY_RECV;
+ else
+ fd_want_recv(conn->handle.fd);
+ }
+
+ if (event_type & SUB_RETRY_SEND) {
+ if (fd_send_ready(conn->handle.fd))
+ ret |= SUB_RETRY_SEND;
+ else
+ fd_want_send(conn->handle.fd);
+ }
+
+ return ret;
+}
+
+/* Ignore readiness events from connection's FD for events of types <event_type>
+ * which may only be a combination of SUB_RETRY_RECV and SUB_RETRY_SEND.
+ */
+void sock_ignore_events(struct connection *conn, int event_type)
+{
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (event_type & SUB_RETRY_RECV)
+ fd_stop_recv(conn->handle.fd);
+
+ if (event_type & SUB_RETRY_SEND)
+ fd_stop_send(conn->handle.fd);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/sock_inet.c b/src/sock_inet.c
new file mode 100644
index 0000000..2bd27df
--- /dev/null
+++ b/src/sock_inet.c
@@ -0,0 +1,488 @@
+/*
+ * AF_INET/AF_INET6 socket management
+ *
+ * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+
+#include <haproxy/api.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/namespace.h>
+#include <haproxy/receiver-t.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_inet.h>
+#include <haproxy/tools.h>
+
+struct proto_fam proto_fam_inet4 = {
+ .name = "inet4",
+ .sock_domain = PF_INET,
+ .sock_family = AF_INET,
+ .sock_addrlen = sizeof(struct sockaddr_in),
+ .l3_addrlen = 32/8,
+ .addrcmp = sock_inet4_addrcmp,
+ .bind = sock_inet_bind_receiver,
+ .get_src = sock_get_src,
+ .get_dst = sock_inet_get_dst,
+ .set_port = sock_inet_set_port,
+};
+
+struct proto_fam proto_fam_inet6 = {
+ .name = "inet6",
+ .sock_domain = PF_INET6,
+ .sock_family = AF_INET6,
+ .sock_addrlen = sizeof(struct sockaddr_in6),
+ .l3_addrlen = 128/8,
+ .addrcmp = sock_inet6_addrcmp,
+ .bind = sock_inet_bind_receiver,
+ .get_src = sock_get_src,
+ .get_dst = sock_get_dst,
+ .set_port = sock_inet_set_port,
+};
+
+/* PLEASE NOTE for function below:
+ * - sock_inet4_* is solely for AF_INET (IPv4)
+ * - sock_inet6_* is solely for AF_INET6 (IPv6)
+ * - sock_inet_* is for either
+ *
+ * The address family SHOULD always be checked. In some cases a function will
+ * be used in a situation where the address family is guaranteed (e.g. protocol
+ * definitions), so the test may be avoided. This special case must then be
+ * mentioned in the comment before the function definition.
+ */
+
+/* determine if the operating system uses IPV6_V6ONLY by default. 0=no, 1=yes.
+ * It also remains if IPv6 is not enabled/configured.
+ */
+int sock_inet6_v6only_default = 0;
+
+/* Default TCPv4/TCPv6 MSS settings. -1=unknown. */
+int sock_inet_tcp_maxseg_default = -1;
+int sock_inet6_tcp_maxseg_default = -1;
+
+/* Compares two AF_INET sockaddr addresses. Returns 0 if they match or non-zero
+ * if they do not match.
+ */
+int sock_inet4_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
+{
+ const struct sockaddr_in *a4 = (const struct sockaddr_in *)a;
+ const struct sockaddr_in *b4 = (const struct sockaddr_in *)b;
+
+ if (a->ss_family != b->ss_family)
+ return -1;
+
+ if (a->ss_family != AF_INET)
+ return -1;
+
+ if (a4->sin_port != b4->sin_port)
+ return -1;
+
+ return memcmp(&a4->sin_addr, &b4->sin_addr, sizeof(a4->sin_addr));
+}
+
+/* Compares two AF_INET6 sockaddr addresses. Returns 0 if they match or
+ * non-zero if they do not match.
+ */
+int sock_inet6_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
+{
+ const struct sockaddr_in6 *a6 = (const struct sockaddr_in6 *)a;
+ const struct sockaddr_in6 *b6 = (const struct sockaddr_in6 *)b;
+
+ if (a->ss_family != b->ss_family)
+ return -1;
+
+ if (a->ss_family != AF_INET6)
+ return -1;
+
+ if (a6->sin6_port != b6->sin6_port)
+ return -1;
+
+ return memcmp(&a6->sin6_addr, &b6->sin6_addr, sizeof(a6->sin6_addr));
+}
+
+/* Sets the port <port> on IPv4 or IPv6 address <addr>. The address family is
+ * determined from the sockaddr_storage's address family. Nothing is done for
+ * other families.
+ */
+void sock_inet_set_port(struct sockaddr_storage *addr, int port)
+{
+ if (addr->ss_family == AF_INET)
+ ((struct sockaddr_in *)addr)->sin_port = htons(port);
+ else if (addr->ss_family == AF_INET6)
+ ((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
+}
+
+/*
+ * Retrieves the original destination address for the socket <fd> which must be
+ * of family AF_INET (not AF_INET6), with <dir> indicating if we're a listener
+ * (=0) or an initiator (!=0). In the case of a listener, if the original
+ * destination address was translated, the original address is retrieved. It
+ * returns 0 in case of success, -1 in case of error. The socket's source
+ * address is stored in <sa> for <salen> bytes.
+ */
+int sock_inet_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
+{
+ if (dir)
+ return getpeername(fd, sa, &salen);
+ else {
+ int ret = getsockname(fd, sa, &salen);
+
+ if (ret < 0)
+ return ret;
+
+#if defined(USE_TPROXY) && defined(SO_ORIGINAL_DST)
+ /* For TPROXY and Netfilter's NAT, we can retrieve the original
+ * IPv4 address before DNAT/REDIRECT. We must not do that with
+ * other families because v6-mapped IPv4 addresses are still
+ * reported as v4.
+ */
+ if (getsockopt(fd, IPPROTO_IP, SO_ORIGINAL_DST, sa, &salen) == 0)
+ return 0;
+#endif
+ return ret;
+ }
+}
+
+/* Returns true if the passed FD corresponds to a socket bound with RX_O_FOREIGN
+ * according to the various supported socket options. The socket's address family
+ * must be passed in <family>.
+ */
+int sock_inet_is_foreign(int fd, sa_family_t family)
+{
+ int val __maybe_unused;
+ socklen_t len __maybe_unused;
+
+ switch (family) {
+ case AF_INET:
+#if defined(IP_TRANSPARENT)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_IP, IP_TRANSPARENT, &val, &len) == 0 && val)
+ return 1;
+#endif
+#if defined(IP_FREEBIND)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_IP, IP_FREEBIND, &val, &len) == 0 && val)
+ return 1;
+#endif
+#if defined(IP_BINDANY)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_IP, IP_BINDANY, &val, &len) == 0 && val)
+ return 1;
+#endif
+#if defined(SO_BINDANY)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
+ return 1;
+#endif
+ break;
+
+ case AF_INET6:
+#if defined(IPV6_TRANSPARENT)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &val, &len) == 0 && val)
+ return 1;
+#endif
+#if defined(IP_FREEBIND)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_IP, IP_FREEBIND, &val, &len) == 0 && val)
+ return 1;
+#endif
+#if defined(IPV6_BINDANY)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &val, &len) == 0 && val)
+ return 1;
+#endif
+#if defined(SO_BINDANY)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
+ return 1;
+#endif
+ break;
+ }
+ return 0;
+}
+
+/* Attempt all known socket options to prepare an AF_INET4 socket to be bound
+ * to a foreign address. The socket must already exist and must not be bound.
+ * 1 is returned on success, 0 on failure. The caller must check the address
+ * family before calling this function.
+ */
+int sock_inet4_make_foreign(int fd)
+{
+ return
+#if defined(IP_TRANSPARENT)
+ setsockopt(fd, IPPROTO_IP, IP_TRANSPARENT, &one, sizeof(one)) == 0 ||
+#endif
+#if defined(IP_FREEBIND)
+ setsockopt(fd, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
+#endif
+#if defined(IP_BINDANY)
+ setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == 0 ||
+#endif
+#if defined(SO_BINDANY)
+ setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
+#endif
+ 0;
+}
+
+/* Attempt all known socket options to prepare an AF_INET6 socket to be bound
+ * to a foreign address. The socket must already exist and must not be bound.
+ * 1 is returned on success, 0 on failure. The caller must check the address
+ * family before calling this function.
+ */
+int sock_inet6_make_foreign(int fd)
+{
+ return
+#if defined(IPV6_TRANSPARENT)
+ setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == 0 ||
+#endif
+#if defined(IP_FREEBIND)
+ setsockopt(fd, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
+#endif
+#if defined(IPV6_BINDANY)
+ setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == 0 ||
+#endif
+#if defined(SO_BINDANY)
+ setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
+#endif
+ 0;
+}
+
+/* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and
+ * context, respectively. Returns and error code made of ERR_* bits on failure
+ * or ERR_NONE on success. On failure, an error message may be passed into
+ * <errmsg>.
+ */
+int sock_inet_bind_receiver(struct receiver *rx, char **errmsg)
+{
+ int fd, err, ext;
+ /* copy listener addr because sometimes we need to switch family */
+ struct sockaddr_storage addr_inet = rx->addr;
+
+ /* force to classic sock family, not AF_CUST_* */
+ addr_inet.ss_family = rx->proto->fam->sock_family;
+
+ /* ensure we never return garbage */
+ if (errmsg)
+ *errmsg = 0;
+
+ err = ERR_NONE;
+
+ if (rx->flags & RX_F_BOUND)
+ return ERR_NONE;
+
+ /* if no FD was assigned yet, we'll have to either find a compatible
+ * one or create a new one.
+ */
+ if (rx->fd == -1)
+ rx->fd = sock_find_compatible_fd(rx);
+
+ /* if the receiver now has an fd assigned, then we were offered the fd
+ * by an external process (most likely the parent), and we don't want
+ * to create a new socket. However we still want to set a few flags on
+ * the socket.
+ */
+ fd = rx->fd;
+ ext = (fd >= 0);
+
+ if (!ext) {
+ fd = my_socketat(rx->settings->netns, rx->proto->fam->sock_domain,
+ rx->proto->sock_type, rx->proto->sock_prot);
+ if (fd == -1) {
+ err |= ERR_RETRYABLE | ERR_ALERT;
+ memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno));
+ goto bind_return;
+ }
+ }
+
+ if (ext && fd < global.maxsock && fdtab[fd].owner) {
+ /* This FD was already bound so this means that it was already
+ * known and registered before parsing, hence it's an inherited
+ * FD. The only reason why it's already known here is that it
+ * has been registered multiple times (multiple listeners on the
+ * same, or a "shards" directive on the line). There cannot be
+ * multiple listeners on one FD but at least we can create a
+ * new one from the original one. We won't reconfigure it,
+ * however, as this was already done for the first one.
+ */
+ fd = dup(fd);
+ if (fd == -1) {
+ err |= ERR_RETRYABLE | ERR_ALERT;
+ memprintf(errmsg, "cannot dup() receiving socket (%s)", strerror(errno));
+ goto bind_return;
+ }
+ }
+
+ if (fd >= global.maxsock) {
+ err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
+ memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
+ goto bind_close_return;
+ }
+
+ if (fd_set_nonblock(fd) == -1) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "cannot make socket non-blocking");
+ goto bind_close_return;
+ }
+
+ if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) {
+ /* not fatal but should be reported */
+ memprintf(errmsg, "cannot do so_reuseaddr");
+ err |= ERR_ALERT;
+ }
+
+#ifdef SO_REUSEPORT
+ /* OpenBSD and Linux 3.9 support this. As it's present in old libc versions of
+ * Linux, it might return an error that we will silently ignore.
+ */
+ if (!ext && (global.tune.options & GTUNE_USE_REUSEPORT))
+ setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+#endif
+
+ if (!ext && (rx->settings->options & RX_O_FOREIGN)) {
+ switch (addr_inet.ss_family) {
+ case AF_INET:
+ if (!sock_inet4_make_foreign(fd)) {
+ memprintf(errmsg, "cannot make receiving socket transparent");
+ err |= ERR_ALERT;
+ }
+ break;
+ case AF_INET6:
+ if (!sock_inet6_make_foreign(fd)) {
+ memprintf(errmsg, "cannot make receiving socket transparent");
+ err |= ERR_ALERT;
+ }
+ break;
+ }
+ }
+
+#ifdef SO_BINDTODEVICE
+ /* Note: this might fail if not CAP_NET_RAW */
+ if (!ext && rx->settings->interface) {
+ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+ rx->settings->interface,
+ strlen(rx->settings->interface) + 1) == -1) {
+ memprintf(errmsg, "cannot bind receiver to device '%s' (%s)", rx->settings->interface, strerror(errno));
+ err |= ERR_WARN;
+ }
+ }
+#endif
+
+#if defined(IPV6_V6ONLY)
+ if (addr_inet.ss_family == AF_INET6 && !ext) {
+ /* Prepare to match the v6only option against what we really want. Note
+ * that sadly the two options are not exclusive to each other and that
+ * v6only is stronger than v4v6.
+ */
+ if ((rx->settings->options & RX_O_V6ONLY) ||
+ (sock_inet6_v6only_default && !(rx->settings->options & RX_O_V4V6)))
+ setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one));
+ else
+ setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero));
+ }
+#endif
+
+ if (!ext && bind(fd, (struct sockaddr *)&addr_inet, rx->proto->fam->sock_addrlen) == -1) {
+ err |= ERR_RETRYABLE | ERR_ALERT;
+ memprintf(errmsg, "cannot bind socket (%s)", strerror(errno));
+ goto bind_close_return;
+ }
+
+ rx->fd = fd;
+ rx->flags |= RX_F_BOUND;
+
+ fd_insert(fd, rx->owner, rx->iocb, thread_mask(rx->bind_thread) & all_threads_mask);
+
+ /* for now, all regularly bound TCP listeners are exportable */
+ if (!(rx->flags & RX_F_INHERITED))
+ HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED);
+
+ bind_return:
+ if (errmsg && *errmsg) {
+ char pn[INET6_ADDRSTRLEN];
+
+ addr_to_str(&addr_inet, pn, sizeof(pn));
+ memprintf(errmsg, "%s for [%s:%d]", *errmsg, pn, get_host_port(&addr_inet));
+ }
+ return err;
+
+ bind_close_return:
+ close(fd);
+ goto bind_return;
+}
+
+static void sock_inet_prepare()
+{
+ int fd, val;
+ socklen_t len;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd >= 0) {
+#ifdef TCP_MAXSEG
+ /* retrieve the OS' default mss for TCPv4 */
+ len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
+ sock_inet_tcp_maxseg_default = val;
+#endif
+ close(fd);
+ }
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd >= 0) {
+#if defined(IPV6_V6ONLY)
+ /* retrieve the OS' bindv6only value */
+ len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, &len) == 0 && val > 0)
+ sock_inet6_v6only_default = 1;
+#endif
+
+#ifdef TCP_MAXSEG
+ /* retrieve the OS' default mss for TCPv6 */
+ len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
+ sock_inet6_tcp_maxseg_default = val;
+#endif
+ close(fd);
+ }
+}
+
+INITCALL0(STG_PREPARE, sock_inet_prepare);
+
+
+REGISTER_BUILD_OPTS("Built with transparent proxy support using:"
+#if defined(IP_TRANSPARENT)
+ " IP_TRANSPARENT"
+#endif
+#if defined(IPV6_TRANSPARENT)
+ " IPV6_TRANSPARENT"
+#endif
+#if defined(IP_FREEBIND)
+ " IP_FREEBIND"
+#endif
+#if defined(IP_BINDANY)
+ " IP_BINDANY"
+#endif
+#if defined(IPV6_BINDANY)
+ " IPV6_BINDANY"
+#endif
+#if defined(SO_BINDANY)
+ " SO_BINDANY"
+#endif
+ "");
diff --git a/src/sock_unix.c b/src/sock_unix.c
new file mode 100644
index 0000000..1c3dbd7
--- /dev/null
+++ b/src/sock_unix.c
@@ -0,0 +1,350 @@
+/*
+ * SOCK_UNIX socket management
+ *
+ * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <haproxy/api.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/listener.h>
+#include <haproxy/receiver-t.h>
+#include <haproxy/namespace.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_unix.h>
+#include <haproxy/tools.h>
+
+
+struct proto_fam proto_fam_unix = {
+ .name = "unix",
+ .sock_domain = PF_UNIX,
+ .sock_family = AF_UNIX,
+ .sock_addrlen = sizeof(struct sockaddr_un),
+ .l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path),
+ .addrcmp = sock_unix_addrcmp,
+ .bind = sock_unix_bind_receiver,
+ .get_src = sock_get_src,
+ .get_dst = sock_get_dst,
+};
+
+/* PLEASE NOTE for functions below:
+ *
+ * The address family SHOULD always be checked. In some cases a function will
+ * be used in a situation where the address family is guaranteed (e.g. protocol
+ * definitions), so the test may be avoided. This special case must then be
+ * mentioned in the comment before the function definition.
+ */
+
+
+/* Compares two AF_UNIX sockaddr addresses. Returns 0 if they match or non-zero
+ * if they do not match. It also supports ABNS socket addresses (those starting
+ * with \0). For regular UNIX sockets however, this does explicitly support
+ * matching names ending exactly with .XXXXX.tmp which are newly bound sockets
+ * about to be replaced; this suffix is then ignored. Note that our UNIX socket
+ * paths are always zero-terminated.
+ */
+int sock_unix_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
+{
+ const struct sockaddr_un *au = (const struct sockaddr_un *)a;
+ const struct sockaddr_un *bu = (const struct sockaddr_un *)b;
+ int idx, dot, idx2;
+
+ if (a->ss_family != b->ss_family)
+ return -1;
+
+ if (a->ss_family != AF_UNIX)
+ return -1;
+
+ if (au->sun_path[0] != bu->sun_path[0])
+ return -1;
+
+ if (au->sun_path[0] == 0)
+ return memcmp(au->sun_path, bu->sun_path, sizeof(au->sun_path));
+
+ idx = 1; dot = 0;
+ while (au->sun_path[idx] == bu->sun_path[idx]) {
+ if (au->sun_path[idx] == 0)
+ return 0;
+ if (au->sun_path[idx] == '.')
+ dot = idx;
+ idx++;
+ }
+
+ /* Now we have a difference. It's OK if they are within or after a
+ * sequence of digits following a dot, and are followed by ".tmp".
+ *
+ * make sure to perform the check against tempname if the compared
+ * string is in "final" format (does not end with ".XXXX.tmp").
+ *
+ * Examples:
+ * /tmp/test matches with /tmp/test.1822.tmp
+ * /tmp/test.1822.tmp matches with /tmp/test.XXXX.tmp
+ */
+ if (au->sun_path[idx] == 0 || bu->sun_path[idx] == 0) {
+ if (au->sun_path[idx] == '.' || bu->sun_path[idx] == '.')
+ dot = idx; /* try to match against temp path */
+ else
+ return -1; /* invalid temp path */
+ }
+
+ if (!dot)
+ return -1;
+
+ /* First, check in path "a" */
+ if (au->sun_path[idx] != 0) {
+ for (idx2 = dot + 1; idx2 && isdigit((unsigned char)au->sun_path[idx2]);)
+ idx2++;
+ if (strcmp(au->sun_path + idx2, ".tmp") != 0)
+ return -1;
+ }
+
+ /* Then check in path "b" */
+ if (bu->sun_path[idx] != 0) {
+ for (idx2 = dot + 1; idx2 && isdigit((unsigned char)bu->sun_path[idx2]); idx2++)
+ ;
+ if (strcmp(bu->sun_path + idx2, ".tmp") != 0)
+ return -1;
+ }
+
+ /* OK that's a match */
+ return 0;
+}
+
+/* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and
+ * context, respectively, with ->bind_thread as the thread mask. Returns an
+ * error code made of ERR_* bits on failure or ERR_NONE on success. On failure,
+ * an error message may be passed into <errmsg>.
+ */
+int sock_unix_bind_receiver(struct receiver *rx, char **errmsg)
+{
+ char tempname[MAXPATHLEN];
+ char backname[MAXPATHLEN];
+ struct sockaddr_un addr;
+ const char *path;
+ int maxpathlen;
+ int fd, err, ext, ret;
+
+ /* ensure we never return garbage */
+ if (errmsg)
+ *errmsg = 0;
+
+ err = ERR_NONE;
+
+ if (rx->flags & RX_F_BOUND)
+ return ERR_NONE;
+
+ /* if no FD was assigned yet, we'll have to either find a compatible
+ * one or create a new one.
+ */
+ if (rx->fd == -1)
+ rx->fd = sock_find_compatible_fd(rx);
+
+ path = ((struct sockaddr_un *)&rx->addr)->sun_path;
+ maxpathlen = MIN(MAXPATHLEN, sizeof(addr.sun_path));
+
+ /* if the listener already has an fd assigned, then we were offered the
+ * fd by an external process (most likely the parent), and we don't want
+ * to create a new socket. However we still want to set a few flags on
+ * the socket.
+ */
+ fd = rx->fd;
+ ext = (fd >= 0);
+ if (ext)
+ goto fd_ready;
+
+ if (path[0]) {
+ ret = snprintf(tempname, maxpathlen, "%s.%d.tmp", path, pid);
+ if (ret < 0 || ret >= sizeof(addr.sun_path)) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
+ goto bind_return;
+ }
+
+ ret = snprintf(backname, maxpathlen, "%s.%d.bak", path, pid);
+ if (ret < 0 || ret >= maxpathlen) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
+ goto bind_return;
+ }
+
+ /* 2. clean existing orphaned entries */
+ if (unlink(tempname) < 0 && errno != ENOENT) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno));
+ goto bind_return;
+ }
+
+ if (unlink(backname) < 0 && errno != ENOENT) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno));
+ goto bind_return;
+ }
+
+ /* 3. backup existing socket */
+ if (link(path, backname) < 0 && errno != ENOENT) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "error when trying to preserve previous UNIX socket (%s)", strerror(errno));
+ goto bind_return;
+ }
+
+ /* Note: this test is redundant with the snprintf one above and
+ * will never trigger, it's just added as the only way to shut
+ * gcc's painfully dumb warning about possibly truncated output
+ * during strncpy(). Don't move it above or smart gcc will not
+ * see it!
+ */
+ if (strlen(tempname) >= sizeof(addr.sun_path)) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
+ goto bind_return;
+ }
+
+ strncpy(addr.sun_path, tempname, sizeof(addr.sun_path) - 1);
+ addr.sun_path[sizeof(addr.sun_path) - 1] = 0;
+ }
+ else {
+ /* first char is zero, it's an abstract socket whose address
+ * is defined by all the bytes past this zero.
+ */
+ memcpy(addr.sun_path, path, sizeof(addr.sun_path));
+ }
+ addr.sun_family = AF_UNIX;
+
+ /* WT: shouldn't we use my_socketat(rx->netns) here instead ? */
+ fd = socket(rx->proto->fam->sock_domain, rx->proto->sock_type, rx->proto->sock_prot);
+ if (fd < 0) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno));
+ goto bind_return;
+ }
+
+ fd_ready:
+ if (ext && fd < global.maxsock && fdtab[fd].owner) {
+ /* This FD was already bound so this means that it was already
+ * known and registered before parsing, hence it's an inherited
+ * FD. The only reason why it's already known here is that it
+ * has been registered multiple times (multiple listeners on the
+ * same, or a "shards" directive on the line). There cannot be
+ * multiple listeners on one FD but at least we can create a
+ * new one from the original one. We won't reconfigure it,
+ * however, as this was already done for the first one.
+ */
+ fd = dup(fd);
+ if (fd == -1) {
+ err |= ERR_RETRYABLE | ERR_ALERT;
+ memprintf(errmsg, "cannot dup() receiving socket (%s)", strerror(errno));
+ goto bind_return;
+ }
+ }
+
+ if (fd >= global.maxsock) {
+ err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
+ memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
+ goto bind_close_return;
+ }
+
+ if (fd_set_nonblock(fd) == -1) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "cannot make socket non-blocking");
+ goto bind_close_return;
+ }
+
+ if (!ext && bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ /* note that bind() creates the socket <tempname> on the file system */
+ if (errno == EADDRINUSE) {
+ /* the old process might still own it, let's retry */
+ err |= ERR_RETRYABLE | ERR_ALERT;
+ memprintf(errmsg, "cannot bind UNIX socket (already in use)");
+ goto bind_close_return;
+ }
+ else {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "cannot bind UNIX socket (%s)", strerror(errno));
+ goto bind_close_return;
+ }
+ }
+
+ /* <uid> and <gid> different of -1 will be used to change the socket owner.
+ * If <mode> is not 0, it will be used to restrict access to the socket.
+ * While it is known not to be portable on every OS, it's still useful
+ * where it works. We also don't change permissions on abstract sockets.
+ */
+ if (!ext && path[0] &&
+ (((rx->settings->ux.uid != -1 || rx->settings->ux.gid != -1) &&
+ (chown(tempname, rx->settings->ux.uid, rx->settings->ux.gid) == -1)) ||
+ (rx->settings->ux.mode != 0 && chmod(tempname, rx->settings->ux.mode) == -1))) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "cannot change UNIX socket ownership (%s)", strerror(errno));
+ goto err_unlink_temp;
+ }
+
+ /* Point of no return: we are ready, we'll switch the sockets. We don't
+ * fear losing the socket <path> because we have a copy of it in
+ * backname. Abstract sockets are not renamed.
+ */
+ if (!ext && path[0] && rename(tempname, path) < 0) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "cannot switch final and temporary UNIX sockets (%s)", strerror(errno));
+ goto err_rename;
+ }
+
+ /* Cleanup: only unlink if we didn't inherit the fd from the parent */
+ if (!ext && path[0])
+ unlink(backname);
+
+ rx->fd = fd;
+ rx->flags |= RX_F_BOUND;
+
+ fd_insert(fd, rx->owner, rx->iocb, thread_mask(rx->bind_thread) & all_threads_mask);
+
+ /* for now, all regularly bound TCP listeners are exportable */
+ if (!(rx->flags & RX_F_INHERITED))
+ HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED);
+
+ return err;
+
+ err_rename:
+ ret = rename(backname, path);
+ if (ret < 0 && errno == ENOENT)
+ unlink(path);
+ err_unlink_temp:
+ if (!ext && path[0])
+ unlink(tempname);
+ close(fd);
+ err_unlink_back:
+ if (!ext && path[0])
+ unlink(backname);
+ bind_return:
+ if (errmsg && *errmsg) {
+ if (!ext)
+ memprintf(errmsg, "%s [%s]", *errmsg, path);
+ else
+ memprintf(errmsg, "%s [fd %d]", *errmsg, fd);
+ }
+ return err;
+
+ bind_close_return:
+ close(fd);
+ goto bind_return;
+}
diff --git a/src/ssl_ckch.c b/src/ssl_ckch.c
new file mode 100644
index 0000000..19980c3
--- /dev/null
+++ b/src/ssl_ckch.c
@@ -0,0 +1,3938 @@
+/*
+ *
+ * Copyright (C) 2020 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+#include <unistd.h>
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <import/ebpttree.h>
+#include <import/ebsttree.h>
+
+#include <haproxy/applet.h>
+#include <haproxy/base64.h>
+#include <haproxy/channel.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/ssl_ckch.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/ssl_utils.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+
+/* Uncommitted CKCH transaction */
+
+static struct {
+ struct ckch_store *new_ckchs;
+ struct ckch_store *old_ckchs;
+ char *path;
+} ckchs_transaction;
+
+/* Uncommitted CA file transaction */
+
+static struct {
+ struct cafile_entry *old_cafile_entry;
+ struct cafile_entry *new_cafile_entry;
+ char *path;
+} cafile_transaction;
+
+/* Uncommitted CRL file transaction */
+
+static struct {
+ struct cafile_entry *old_crlfile_entry;
+ struct cafile_entry *new_crlfile_entry;
+ char *path;
+} crlfile_transaction;
+
+/* CLI context used by "show cafile" */
+struct show_cafile_ctx {
+ struct cafile_entry *cur_cafile_entry;
+ struct cafile_entry *old_cafile_entry;
+ int ca_index;
+ int show_all;
+};
+
+/* CLI context used by "show crlfile" */
+struct show_crlfile_ctx {
+ struct cafile_entry *cafile_entry;
+ struct cafile_entry *old_crlfile_entry;
+ int index;
+};
+
+/* CLI context used by "show cert" */
+struct show_cert_ctx {
+ struct ckch_store *old_ckchs;
+ struct ckch_store *cur_ckchs;
+ int transaction;
+};
+
+/* CLI context used by "commit cert" */
+struct commit_cert_ctx {
+ struct ckch_store *old_ckchs;
+ struct ckch_store *new_ckchs;
+ struct ckch_inst *next_ckchi;
+ char *err;
+ enum {
+ CERT_ST_INIT = 0,
+ CERT_ST_GEN,
+ CERT_ST_INSERT,
+ CERT_ST_SUCCESS,
+ CERT_ST_FIN,
+ CERT_ST_ERROR,
+ } state;
+};
+
+/* CLI context used by "set cert" */
+struct set_cert_ctx {
+ struct ckch_store *old_ckchs;
+ struct ckch_store *new_ckchs;
+};
+
+/* CLI context used by "set ca-file" */
+struct set_cafile_ctx {
+ struct cafile_entry *old_cafile_entry;
+ struct cafile_entry *new_cafile_entry;
+};
+
+/* CLI context used by "set crl-file" */
+struct set_crlfile_ctx {
+ struct cafile_entry *old_crlfile_entry;
+ struct cafile_entry *new_crlfile_entry;
+};
+
+/* CLI context used by "commit cafile" and "commit crlfile" */
+struct commit_cacrlfile_ctx {
+ struct cafile_entry *old_cafile_entry;
+ struct cafile_entry *new_cafile_entry;
+ struct cafile_entry *old_crlfile_entry;
+ struct cafile_entry *new_crlfile_entry;
+ struct ckch_inst_link *next_ckchi_link;
+ struct ckch_inst *next_ckchi;
+ int cafile_type; /* either CA or CRL, depending on the current command */
+ char *err;
+ enum {
+ CACRL_ST_INIT = 0,
+ CACRL_ST_GEN,
+ CACRL_ST_INSERT,
+ CACRL_ST_SUCCESS,
+ CACRL_ST_FIN,
+ CACRL_ST_ERROR,
+ } state;
+};
+
+
+/******************** cert_key_and_chain functions *************************
+ * These are the functions that fills a cert_key_and_chain structure. For the
+ * functions filling a SSL_CTX from a cert_key_and_chain, see ssl_sock.c
+ */
+
+/*
+ * Try to parse Signed Certificate Timestamp List structure. This function
+ * makes only basic test if the data seems like SCTL. No signature validation
+ * is performed.
+ */
+static int ssl_sock_parse_sctl(struct buffer *sctl)
+{
+ int ret = 1;
+ int len, pos, sct_len;
+ unsigned char *data;
+
+ if (sctl->data < 2)
+ goto out;
+
+ data = (unsigned char *) sctl->area;
+ len = (data[0] << 8) | data[1];
+
+ if (len + 2 != sctl->data)
+ goto out;
+
+ data = data + 2;
+ pos = 0;
+ while (pos < len) {
+ if (len - pos < 2)
+ goto out;
+
+ sct_len = (data[pos] << 8) | data[pos + 1];
+ if (pos + sct_len + 2 > len)
+ goto out;
+
+ pos += sct_len + 2;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* Try to load a sctl from a buffer <buf> if not NULL, or read the file <sctl_path>
+ * It fills the ckch->sctl buffer
+ * return 0 on success or != 0 on failure */
+int ssl_sock_load_sctl_from_file(const char *sctl_path, char *buf, struct cert_key_and_chain *ckch, char **err)
+{
+ int fd = -1;
+ int r = 0;
+ int ret = 1;
+ struct buffer tmp;
+ struct buffer *src;
+ struct buffer *sctl;
+
+ if (buf) {
+ chunk_initstr(&tmp, buf);
+ src = &tmp;
+ } else {
+ fd = open(sctl_path, O_RDONLY);
+ if (fd == -1)
+ goto end;
+
+ trash.data = 0;
+ while (trash.data < trash.size) {
+ r = read(fd, trash.area + trash.data, trash.size - trash.data);
+ if (r < 0) {
+ if (errno == EINTR)
+ continue;
+ goto end;
+ }
+ else if (r == 0) {
+ break;
+ }
+ trash.data += r;
+ }
+ src = &trash;
+ }
+
+ ret = ssl_sock_parse_sctl(src);
+ if (ret)
+ goto end;
+
+ sctl = calloc(1, sizeof(*sctl));
+ if (!chunk_dup(sctl, src)) {
+ ha_free(&sctl);
+ goto end;
+ }
+ /* no error, fill ckch with new context, old context must be free */
+ if (ckch->sctl) {
+ ha_free(&ckch->sctl->area);
+ free(ckch->sctl);
+ }
+ ckch->sctl = sctl;
+ ret = 0;
+end:
+ if (fd != -1)
+ close(fd);
+
+ return ret;
+}
+
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) || defined OPENSSL_IS_BORINGSSL)
+/*
+ * This function load the OCSP Response in DER format contained in file at
+ * path 'ocsp_path' or base64 in a buffer <buf>
+ *
+ * Returns 0 on success, 1 in error case.
+ */
+int ssl_sock_load_ocsp_response_from_file(const char *ocsp_path, char *buf, struct cert_key_and_chain *ckch, char **err)
+{
+ int fd = -1;
+ int r = 0;
+ int ret = 1;
+ struct buffer *ocsp_response;
+ struct buffer *src = NULL;
+
+ if (buf) {
+ int i, j;
+ /* if it's from a buffer it will be base64 */
+
+ /* remove \r and \n from the payload */
+ for (i = 0, j = 0; buf[i]; i++) {
+ if (buf[i] == '\r' || buf[i] == '\n')
+ continue;
+ buf[j++] = buf[i];
+ }
+ buf[j] = 0;
+
+ ret = base64dec(buf, j, trash.area, trash.size);
+ if (ret < 0) {
+ memprintf(err, "Error reading OCSP response in base64 format");
+ goto end;
+ }
+ trash.data = ret;
+ src = &trash;
+ } else {
+ fd = open(ocsp_path, O_RDONLY);
+ if (fd == -1) {
+ memprintf(err, "Error opening OCSP response file");
+ goto end;
+ }
+
+ trash.data = 0;
+ while (trash.data < trash.size) {
+ r = read(fd, trash.area + trash.data, trash.size - trash.data);
+ if (r < 0) {
+ if (errno == EINTR)
+ continue;
+
+ memprintf(err, "Error reading OCSP response from file");
+ goto end;
+ }
+ else if (r == 0) {
+ break;
+ }
+ trash.data += r;
+ }
+ close(fd);
+ fd = -1;
+ src = &trash;
+ }
+
+ ocsp_response = calloc(1, sizeof(*ocsp_response));
+ if (!chunk_dup(ocsp_response, src)) {
+ ha_free(&ocsp_response);
+ goto end;
+ }
+ /* no error, fill ckch with new context, old context must be free */
+ if (ckch->ocsp_response) {
+ ha_free(&ckch->ocsp_response->area);
+ free(ckch->ocsp_response);
+ }
+ ckch->ocsp_response = ocsp_response;
+ ret = 0;
+end:
+ if (fd != -1)
+ close(fd);
+
+ return ret;
+}
+#endif
+
+/*
+ * Try to load in a ckch every files related to a ckch.
+ * (PEM, sctl, ocsp, issuer etc.)
+ *
+ * This function is only used to load files during the configuration parsing,
+ * it is not used with the CLI.
+ *
+ * This allows us to carry the contents of the file without having to read the
+ * file multiple times. The caller must call
+ * ssl_sock_free_cert_key_and_chain_contents.
+ *
+ * returns:
+ * 0 on Success
+ * 1 on SSL Failure
+ */
+int ssl_sock_load_files_into_ckch(const char *path, struct cert_key_and_chain *ckch, char **err)
+{
+ struct buffer *fp = NULL;
+ int ret = 1;
+ struct stat st;
+
+ /* try to load the PEM */
+ if (ssl_sock_load_pem_into_ckch(path, NULL, ckch , err) != 0) {
+ goto end;
+ }
+
+ fp = alloc_trash_chunk();
+ if (!fp) {
+ memprintf(err, "%sCan't allocate memory\n", err && *err ? *err : "");
+ goto end;
+ }
+
+ if (!chunk_strcpy(fp, path) || (b_data(fp) > MAXPATHLEN)) {
+ memprintf(err, "%s '%s' filename too long'.\n",
+ err && *err ? *err : "", fp->area);
+ ret = 1;
+ goto end;
+ }
+
+ /* remove the ".crt" extension */
+ if (global_ssl.extra_files_noext) {
+ char *ext;
+
+ /* look for the extension */
+ if ((ext = strrchr(fp->area, '.'))) {
+
+ if (strcmp(ext, ".crt") == 0) {
+ *ext = '\0';
+ fp->data = strlen(fp->area);
+ }
+ }
+
+ }
+
+ if (ckch->key == NULL) {
+ /* If no private key was found yet and we cannot look for it in extra
+ * files, raise an error.
+ */
+ if (!(global_ssl.extra_files & SSL_GF_KEY)) {
+ memprintf(err, "%sNo Private Key found in '%s'.\n", err && *err ? *err : "", fp->area);
+ goto end;
+ }
+
+ /* try to load an external private key if it wasn't in the PEM */
+ if (!chunk_strcat(fp, ".key") || (b_data(fp) > MAXPATHLEN)) {
+ memprintf(err, "%s '%s' filename too long'.\n",
+ err && *err ? *err : "", fp->area);
+ ret = 1;
+ goto end;
+ }
+
+ if (stat(fp->area, &st) == 0) {
+ if (ssl_sock_load_key_into_ckch(fp->area, NULL, ckch, err)) {
+ memprintf(err, "%s '%s' is present but cannot be read or parsed'.\n",
+ err && *err ? *err : "", fp->area);
+ goto end;
+ }
+ }
+
+ if (ckch->key == NULL) {
+ memprintf(err, "%sNo Private Key found in '%s'.\n", err && *err ? *err : "", fp->area);
+ goto end;
+ }
+ /* remove the added extension */
+ *(fp->area + fp->data - strlen(".key")) = '\0';
+ b_sub(fp, strlen(".key"));
+ }
+
+
+ if (!X509_check_private_key(ckch->cert, ckch->key)) {
+ memprintf(err, "%sinconsistencies between private key and certificate loaded '%s'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+
+#ifdef HAVE_SSL_SCTL
+ /* try to load the sctl file */
+ if (global_ssl.extra_files & SSL_GF_SCTL) {
+ struct stat st;
+
+ if (!chunk_strcat(fp, ".sctl") || b_data(fp) > MAXPATHLEN) {
+ memprintf(err, "%s '%s' filename too long'.\n",
+ err && *err ? *err : "", fp->area);
+ ret = 1;
+ goto end;
+ }
+
+ if (stat(fp->area, &st) == 0) {
+ if (ssl_sock_load_sctl_from_file(fp->area, NULL, ckch, err)) {
+ memprintf(err, "%s '%s.sctl' is present but cannot be read or parsed'.\n",
+ err && *err ? *err : "", fp->area);
+ ret = 1;
+ goto end;
+ }
+ }
+ /* remove the added extension */
+ *(fp->area + fp->data - strlen(".sctl")) = '\0';
+ b_sub(fp, strlen(".sctl"));
+ }
+#endif
+
+ /* try to load an ocsp response file */
+ if (global_ssl.extra_files & SSL_GF_OCSP) {
+ struct stat st;
+
+ if (!chunk_strcat(fp, ".ocsp") || b_data(fp) > MAXPATHLEN) {
+ memprintf(err, "%s '%s' filename too long'.\n",
+ err && *err ? *err : "", fp->area);
+ ret = 1;
+ goto end;
+ }
+
+ if (stat(fp->area, &st) == 0) {
+ if (ssl_sock_load_ocsp_response_from_file(fp->area, NULL, ckch, err)) {
+ ret = 1;
+ goto end;
+ }
+ }
+ /* remove the added extension */
+ *(fp->area + fp->data - strlen(".ocsp")) = '\0';
+ b_sub(fp, strlen(".ocsp"));
+ }
+
+#ifndef OPENSSL_IS_BORINGSSL /* Useless for BoringSSL */
+ if (ckch->ocsp_response && (global_ssl.extra_files & SSL_GF_OCSP_ISSUER)) {
+ /* if no issuer was found, try to load an issuer from the .issuer */
+ if (!ckch->ocsp_issuer) {
+ struct stat st;
+
+ if (!chunk_strcat(fp, ".issuer") || b_data(fp) > MAXPATHLEN) {
+ memprintf(err, "%s '%s' filename too long'.\n",
+ err && *err ? *err : "", fp->area);
+ ret = 1;
+ goto end;
+ }
+
+ if (stat(fp->area, &st) == 0) {
+ if (ssl_sock_load_issuer_file_into_ckch(fp->area, NULL, ckch, err)) {
+ ret = 1;
+ goto end;
+ }
+
+ if (X509_check_issued(ckch->ocsp_issuer, ckch->cert) != X509_V_OK) {
+ memprintf(err, "%s '%s' is not an issuer'.\n",
+ err && *err ? *err : "", fp->area);
+ ret = 1;
+ goto end;
+ }
+ }
+ /* remove the added extension */
+ *(fp->area + fp->data - strlen(".issuer")) = '\0';
+ b_sub(fp, strlen(".issuer"));
+ }
+ }
+#endif
+
+ ret = 0;
+
+end:
+
+ ERR_clear_error();
+
+ /* Something went wrong in one of the reads */
+ if (ret != 0)
+ ssl_sock_free_cert_key_and_chain_contents(ckch);
+
+ free_trash_chunk(fp);
+
+ return ret;
+}
+
+/*
+ * Try to load a private key file from a <path> or a buffer <buf>
+ *
+ * If it failed you should not attempt to use the ckch but free it.
+ *
+ * Return 0 on success or != 0 on failure
+ */
+int ssl_sock_load_key_into_ckch(const char *path, char *buf, struct cert_key_and_chain *ckch , char **err)
+{
+ BIO *in = NULL;
+ int ret = 1;
+ EVP_PKEY *key = NULL;
+
+ if (buf) {
+ /* reading from a buffer */
+ in = BIO_new_mem_buf(buf, -1);
+ if (in == NULL) {
+ memprintf(err, "%sCan't allocate memory\n", err && *err ? *err : "");
+ goto end;
+ }
+
+ } else {
+ /* reading from a file */
+ in = BIO_new(BIO_s_file());
+ if (in == NULL)
+ goto end;
+
+ if (BIO_read_filename(in, path) <= 0)
+ goto end;
+ }
+
+ /* Read Private Key */
+ key = PEM_read_bio_PrivateKey(in, NULL, NULL, NULL);
+ if (key == NULL) {
+ memprintf(err, "%sunable to load private key from file '%s'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+
+ ret = 0;
+
+ SWAP(ckch->key, key);
+
+end:
+
+ ERR_clear_error();
+ if (in)
+ BIO_free(in);
+ if (key)
+ EVP_PKEY_free(key);
+
+ return ret;
+}
+
+/*
+ * Try to load a PEM file from a <path> or a buffer <buf>
+ * The PEM must contain at least a Certificate,
+ * It could contain a DH, a certificate chain and a PrivateKey.
+ *
+ * If it failed you should not attempt to use the ckch but free it.
+ *
+ * Return 0 on success or != 0 on failure
+ */
+int ssl_sock_load_pem_into_ckch(const char *path, char *buf, struct cert_key_and_chain *ckch , char **err)
+{
+ BIO *in = NULL;
+ int ret = 1;
+ X509 *ca;
+ X509 *cert = NULL;
+ EVP_PKEY *key = NULL;
+ HASSL_DH *dh = NULL;
+ STACK_OF(X509) *chain = NULL;
+
+ if (buf) {
+ /* reading from a buffer */
+ in = BIO_new_mem_buf(buf, -1);
+ if (in == NULL) {
+ memprintf(err, "%sCan't allocate memory\n", err && *err ? *err : "");
+ goto end;
+ }
+
+ } else {
+ /* reading from a file */
+ in = BIO_new(BIO_s_file());
+ if (in == NULL) {
+ memprintf(err, "%sCan't allocate memory\n", err && *err ? *err : "");
+ goto end;
+ }
+
+ if (BIO_read_filename(in, path) <= 0) {
+ memprintf(err, "%scannot open the file '%s'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+ }
+
+ /* Read Private Key */
+ key = PEM_read_bio_PrivateKey(in, NULL, NULL, NULL);
+ /* no need to check for errors here, because the private key could be loaded later */
+
+#ifndef OPENSSL_NO_DH
+ /* Seek back to beginning of file */
+ if (BIO_reset(in) == -1) {
+ memprintf(err, "%san error occurred while reading the file '%s'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+
+ dh = ssl_sock_get_dh_from_bio(in);
+ ERR_clear_error();
+ /* no need to return an error there, dh is not mandatory */
+#endif
+
+ /* Seek back to beginning of file */
+ if (BIO_reset(in) == -1) {
+ memprintf(err, "%san error occurred while reading the file '%s'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+
+ /* Read Certificate */
+ cert = PEM_read_bio_X509_AUX(in, NULL, NULL, NULL);
+ if (cert == NULL) {
+ memprintf(err, "%sunable to load certificate from file '%s'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+
+ /* Look for a Certificate Chain */
+ while ((ca = PEM_read_bio_X509(in, NULL, NULL, NULL))) {
+ if (chain == NULL)
+ chain = sk_X509_new_null();
+ if (!sk_X509_push(chain, ca)) {
+ X509_free(ca);
+ goto end;
+ }
+ }
+
+ ret = ERR_get_error();
+ if (ret && (ERR_GET_LIB(ret) != ERR_LIB_PEM && ERR_GET_REASON(ret) != PEM_R_NO_START_LINE)) {
+ memprintf(err, "%sunable to load certificate chain from file '%s'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+
+ /* once it loaded the PEM, it should remove everything else in the ckch */
+ if (ckch->ocsp_response) {
+ ha_free(&ckch->ocsp_response->area);
+ ha_free(&ckch->ocsp_response);
+ }
+
+ if (ckch->sctl) {
+ ha_free(&ckch->sctl->area);
+ ha_free(&ckch->sctl);
+ }
+
+ if (ckch->ocsp_issuer) {
+ X509_free(ckch->ocsp_issuer);
+ ckch->ocsp_issuer = NULL;
+ }
+
+ /* no error, fill ckch with new context, old context will be free at end: */
+ SWAP(ckch->key, key);
+ SWAP(ckch->dh, dh);
+ SWAP(ckch->cert, cert);
+ SWAP(ckch->chain, chain);
+
+ ret = 0;
+
+end:
+
+ ERR_clear_error();
+ if (in)
+ BIO_free(in);
+ if (key)
+ EVP_PKEY_free(key);
+ if (dh)
+ HASSL_DH_free(dh);
+ if (cert)
+ X509_free(cert);
+ if (chain)
+ sk_X509_pop_free(chain, X509_free);
+
+ return ret;
+}
+
+/* Frees the contents of a cert_key_and_chain
+ */
+void ssl_sock_free_cert_key_and_chain_contents(struct cert_key_and_chain *ckch)
+{
+ if (!ckch)
+ return;
+
+ /* Free the certificate and set pointer to NULL */
+ if (ckch->cert)
+ X509_free(ckch->cert);
+ ckch->cert = NULL;
+
+ /* Free the key and set pointer to NULL */
+ if (ckch->key)
+ EVP_PKEY_free(ckch->key);
+ ckch->key = NULL;
+
+ /* Free each certificate in the chain */
+ if (ckch->chain)
+ sk_X509_pop_free(ckch->chain, X509_free);
+ ckch->chain = NULL;
+
+ if (ckch->dh)
+ HASSL_DH_free(ckch->dh);
+ ckch->dh = NULL;
+
+ if (ckch->sctl) {
+ ha_free(&ckch->sctl->area);
+ ha_free(&ckch->sctl);
+ }
+
+ if (ckch->ocsp_response) {
+ ha_free(&ckch->ocsp_response->area);
+ ha_free(&ckch->ocsp_response);
+ }
+
+ if (ckch->ocsp_issuer)
+ X509_free(ckch->ocsp_issuer);
+ ckch->ocsp_issuer = NULL;
+}
+
+/*
+ *
+ * This function copy a cert_key_and_chain in memory
+ *
+ * It's used to try to apply changes on a ckch before committing them, because
+ * most of the time it's not possible to revert those changes
+ *
+ * Return a the dst or NULL
+ */
+struct cert_key_and_chain *ssl_sock_copy_cert_key_and_chain(struct cert_key_and_chain *src,
+ struct cert_key_and_chain *dst)
+{
+ if (!src || !dst)
+ return NULL;
+
+ if (src->cert) {
+ dst->cert = src->cert;
+ X509_up_ref(src->cert);
+ }
+
+ if (src->key) {
+ dst->key = src->key;
+ EVP_PKEY_up_ref(src->key);
+ }
+
+ if (src->chain) {
+ dst->chain = X509_chain_up_ref(src->chain);
+ }
+
+ if (src->dh) {
+ HASSL_DH_up_ref(src->dh);
+ dst->dh = src->dh;
+ }
+
+ if (src->sctl) {
+ struct buffer *sctl;
+
+ sctl = calloc(1, sizeof(*sctl));
+ if (!chunk_dup(sctl, src->sctl)) {
+ ha_free(&sctl);
+ goto error;
+ }
+ dst->sctl = sctl;
+ }
+
+ if (src->ocsp_response) {
+ struct buffer *ocsp_response;
+
+ ocsp_response = calloc(1, sizeof(*ocsp_response));
+ if (!chunk_dup(ocsp_response, src->ocsp_response)) {
+ ha_free(&ocsp_response);
+ goto error;
+ }
+ dst->ocsp_response = ocsp_response;
+ }
+
+ if (src->ocsp_issuer) {
+ X509_up_ref(src->ocsp_issuer);
+ dst->ocsp_issuer = src->ocsp_issuer;
+ }
+
+ return dst;
+
+error:
+
+ /* free everything */
+ ssl_sock_free_cert_key_and_chain_contents(dst);
+
+ return NULL;
+}
+
+/*
+ * return 0 on success or != 0 on failure
+ */
+int ssl_sock_load_issuer_file_into_ckch(const char *path, char *buf, struct cert_key_and_chain *ckch, char **err)
+{
+ int ret = 1;
+ BIO *in = NULL;
+ X509 *issuer;
+
+ if (buf) {
+ /* reading from a buffer */
+ in = BIO_new_mem_buf(buf, -1);
+ if (in == NULL) {
+ memprintf(err, "%sCan't allocate memory\n", err && *err ? *err : "");
+ goto end;
+ }
+
+ } else {
+ /* reading from a file */
+ in = BIO_new(BIO_s_file());
+ if (in == NULL)
+ goto end;
+
+ if (BIO_read_filename(in, path) <= 0)
+ goto end;
+ }
+
+ issuer = PEM_read_bio_X509_AUX(in, NULL, NULL, NULL);
+ if (!issuer) {
+ memprintf(err, "%s'%s' cannot be read or parsed'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+ /* no error, fill ckch with new context, old context must be free */
+ if (ckch->ocsp_issuer)
+ X509_free(ckch->ocsp_issuer);
+ ckch->ocsp_issuer = issuer;
+ ret = 0;
+
+end:
+
+ ERR_clear_error();
+ if (in)
+ BIO_free(in);
+
+ return ret;
+}
+
+/******************** ckch_store functions ***********************************
+ * The ckch_store is a structure used to cache and index the SSL files used in
+ * configuration
+ */
+
+/*
+ * Free a ckch_store, its ckch, its instances and remove it from the ebtree
+ */
+void ckch_store_free(struct ckch_store *store)
+{
+ struct ckch_inst *inst, *inst_s;
+
+ if (!store)
+ return;
+
+ ssl_sock_free_cert_key_and_chain_contents(store->ckch);
+
+ ha_free(&store->ckch);
+
+ list_for_each_entry_safe(inst, inst_s, &store->ckch_inst, by_ckchs) {
+ ckch_inst_free(inst);
+ }
+ ebmb_delete(&store->node);
+ free(store);
+}
+
+/*
+ * create and initialize a ckch_store
+ * <path> is the key name
+ * <nmemb> is the number of store->ckch objects to allocate
+ *
+ * Return a ckch_store or NULL upon failure.
+ */
+struct ckch_store *ckch_store_new(const char *filename)
+{
+ struct ckch_store *store;
+ int pathlen;
+
+ pathlen = strlen(filename);
+ store = calloc(1, sizeof(*store) + pathlen + 1);
+ if (!store)
+ return NULL;
+
+ memcpy(store->path, filename, pathlen + 1);
+
+ LIST_INIT(&store->ckch_inst);
+ LIST_INIT(&store->crtlist_entry);
+
+ store->ckch = calloc(1, sizeof(*store->ckch));
+ if (!store->ckch)
+ goto error;
+
+ return store;
+error:
+ ckch_store_free(store);
+ return NULL;
+}
+
+/* allocate and duplicate a ckch_store
+ * Return a new ckch_store or NULL */
+struct ckch_store *ckchs_dup(const struct ckch_store *src)
+{
+ struct ckch_store *dst;
+
+ if (!src)
+ return NULL;
+
+ dst = ckch_store_new(src->path);
+ if (!dst)
+ return NULL;
+
+ if (!ssl_sock_copy_cert_key_and_chain(src->ckch, dst->ckch))
+ goto error;
+
+ return dst;
+
+error:
+ ckch_store_free(dst);
+
+ return NULL;
+}
+
+/*
+ * lookup a path into the ckchs tree.
+ */
+struct ckch_store *ckchs_lookup(char *path)
+{
+ struct ebmb_node *eb;
+
+ eb = ebst_lookup(&ckchs_tree, path);
+ if (!eb)
+ return NULL;
+
+ return ebmb_entry(eb, struct ckch_store, node);
+}
+
+/*
+ * This function allocate a ckch_store and populate it with certificates from files.
+ */
+struct ckch_store *ckchs_load_cert_file(char *path, char **err)
+{
+ struct ckch_store *ckchs;
+
+ ckchs = ckch_store_new(path);
+ if (!ckchs) {
+ memprintf(err, "%sunable to allocate memory.\n", err && *err ? *err : "");
+ goto end;
+ }
+
+ if (ssl_sock_load_files_into_ckch(path, ckchs->ckch, err) == 1)
+ goto end;
+
+ /* insert into the ckchs tree */
+ memcpy(ckchs->path, path, strlen(path) + 1);
+ ebst_insert(&ckchs_tree, &ckchs->node);
+ return ckchs;
+
+end:
+ ckch_store_free(ckchs);
+
+ return NULL;
+}
+
+
+/******************** ckch_inst functions ******************************/
+
+/* unlink a ckch_inst, free all SNIs, free the ckch_inst */
+/* The caller must use the lock of the bind_conf if used with inserted SNIs */
+void ckch_inst_free(struct ckch_inst *inst)
+{
+ struct sni_ctx *sni, *sni_s;
+ struct ckch_inst_link_ref *link_ref, *link_ref_s;
+
+ if (inst == NULL)
+ return;
+
+ list_for_each_entry_safe(sni, sni_s, &inst->sni_ctx, by_ckch_inst) {
+ SSL_CTX_free(sni->ctx);
+ LIST_DELETE(&sni->by_ckch_inst);
+ ebmb_delete(&sni->name);
+ free(sni);
+ }
+ SSL_CTX_free(inst->ctx);
+ inst->ctx = NULL;
+ LIST_DELETE(&inst->by_ckchs);
+ LIST_DELETE(&inst->by_crtlist_entry);
+
+ /* Free the cafile_link_refs list */
+ list_for_each_entry_safe(link_ref, link_ref_s, &inst->cafile_link_refs, list) {
+ if (link_ref->link && LIST_INLIST(&link_ref->link->list)) {
+ /* Try to detach and free the ckch_inst_link only if it
+ * was attached, this way it can be used to loop from
+ * the caller */
+ LIST_DEL_INIT(&link_ref->link->list);
+ ha_free(&link_ref->link);
+ }
+ LIST_DELETE(&link_ref->list);
+ free(link_ref);
+ }
+
+ free(inst);
+}
+
+/* Alloc and init a ckch_inst */
+struct ckch_inst *ckch_inst_new()
+{
+ struct ckch_inst *ckch_inst;
+
+ ckch_inst = calloc(1, sizeof *ckch_inst);
+ if (!ckch_inst)
+ return NULL;
+
+ LIST_INIT(&ckch_inst->sni_ctx);
+ LIST_INIT(&ckch_inst->by_ckchs);
+ LIST_INIT(&ckch_inst->by_crtlist_entry);
+ LIST_INIT(&ckch_inst->cafile_link_refs);
+
+ return ckch_inst;
+}
+
+
+/******************** ssl_store functions ******************************/
+struct eb_root cafile_tree = EB_ROOT;
+
+/*
+ * Returns the cafile_entry found in the cafile_tree indexed by the path 'path'.
+ * If 'oldest_entry' is 1, returns the "original" cafile_entry (since
+ * during a set cafile/commit cafile cycle there might be two entries for any
+ * given path, the original one and the new one set via the CLI but not
+ * committed yet).
+ */
+struct cafile_entry *ssl_store_get_cafile_entry(char *path, int oldest_entry)
+{
+ struct cafile_entry *ca_e = NULL;
+ struct ebmb_node *eb;
+
+ eb = ebst_lookup(&cafile_tree, path);
+ while (eb) {
+ ca_e = ebmb_entry(eb, struct cafile_entry, node);
+ /* The ebst_lookup in a tree that has duplicates returns the
+ * oldest entry first. If we want the latest entry, we need to
+ * iterate over all the duplicates until we find the last one
+ * (in our case there should never be more than two entries for
+ * any given path). */
+ if (oldest_entry)
+ return ca_e;
+ eb = ebmb_next_dup(eb);
+ }
+ return ca_e;
+}
+
+int ssl_store_add_uncommitted_cafile_entry(struct cafile_entry *entry)
+{
+ return (ebst_insert(&cafile_tree, &entry->node) != &entry->node);
+}
+
+X509_STORE* ssl_store_get0_locations_file(char *path)
+{
+ struct cafile_entry *ca_e = ssl_store_get_cafile_entry(path, 0);
+
+ if (ca_e)
+ return ca_e->ca_store;
+
+ return NULL;
+}
+
+/* Create a cafile_entry object, without adding it to the cafile_tree. */
+struct cafile_entry *ssl_store_create_cafile_entry(char *path, X509_STORE *store, enum cafile_type type)
+{
+ struct cafile_entry *ca_e;
+ int pathlen;
+
+ pathlen = strlen(path);
+
+ ca_e = calloc(1, sizeof(*ca_e) + pathlen + 1);
+ if (ca_e) {
+ memcpy(ca_e->path, path, pathlen + 1);
+ ca_e->ca_store = store;
+ ca_e->type = type;
+ LIST_INIT(&ca_e->ckch_inst_link);
+ }
+ return ca_e;
+}
+
+/* Delete a cafile_entry. The caller is responsible from removing this entry
+ * from the cafile_tree first if is was previously added into it. */
+void ssl_store_delete_cafile_entry(struct cafile_entry *ca_e)
+{
+ struct ckch_inst_link *link, *link_s;
+ if (!ca_e)
+ return;
+
+ X509_STORE_free(ca_e->ca_store);
+
+ list_for_each_entry_safe(link, link_s, &ca_e->ckch_inst_link, list) {
+ struct ckch_inst *inst = link->ckch_inst;
+ struct ckch_inst_link_ref *link_ref, *link_ref_s;
+ list_for_each_entry_safe(link_ref, link_ref_s, &inst->cafile_link_refs, list) {
+ if (link_ref->link == link) {
+ LIST_DELETE(&link_ref->list);
+ free(link_ref);
+ break;
+ }
+ }
+ LIST_DELETE(&link->list);
+ free(link);
+ }
+
+ free(ca_e);
+}
+
+/*
+ * Build a cafile_entry out of a buffer instead of out of a file.
+ * This function is used when the "commit ssl ca-file" cli command is used.
+ * It can parse CERTIFICATE sections as well as CRL ones.
+ * Returns 0 in case of success, 1 otherwise.
+ */
+int ssl_store_load_ca_from_buf(struct cafile_entry *ca_e, char *cert_buf)
+{
+ int retval = 0;
+
+ if (!ca_e)
+ return 1;
+
+ if (!ca_e->ca_store) {
+ ca_e->ca_store = X509_STORE_new();
+ if (ca_e->ca_store) {
+ BIO *bio = BIO_new_mem_buf(cert_buf, strlen(cert_buf));
+ if (bio) {
+ X509_INFO *info;
+ int i;
+ STACK_OF(X509_INFO) *infos = PEM_X509_INFO_read_bio(bio, NULL, NULL, NULL);
+ if (!infos)
+ {
+ BIO_free(bio);
+ return 1;
+ }
+
+ for (i = 0; i < sk_X509_INFO_num(infos) && !retval; i++) {
+ info = sk_X509_INFO_value(infos, i);
+ /* X509_STORE_add_cert and X509_STORE_add_crl return 1 on success */
+ if (info->x509) {
+ retval = !X509_STORE_add_cert(ca_e->ca_store, info->x509);
+ }
+ if (!retval && info->crl) {
+ retval = !X509_STORE_add_crl(ca_e->ca_store, info->crl);
+ }
+ }
+ /* return an error if we didn't compute all the X509_INFO or if there was none */
+ retval = retval || (i != sk_X509_INFO_num(infos)) || ( sk_X509_INFO_num(infos) == 0);
+
+ /* Cleanup */
+ sk_X509_INFO_pop_free(infos, X509_INFO_free);
+ BIO_free(bio);
+ }
+ }
+ }
+
+ return retval;
+}
+
+/*
+ * Try to load a ca-file from disk into the ca-file cache.
+ * <shuterror> allows you to to stop emitting the errors.
+ * Return 0 upon error
+ */
+int __ssl_store_load_locations_file(char *path, int create_if_none, enum cafile_type type, int shuterror)
+{
+ X509_STORE *store = ssl_store_get0_locations_file(path);
+
+ /* If this function is called by the CLI, we should not call the
+ * X509_STORE_load_locations function because it performs forbidden disk
+ * accesses. */
+ if (!store && create_if_none) {
+ STACK_OF(X509_OBJECT) *objs;
+ int cert_count = 0;
+ struct stat buf;
+ struct cafile_entry *ca_e;
+ const char *file = NULL;
+ const char *dir = NULL;
+ unsigned long e;
+
+ store = X509_STORE_new();
+ if (!store) {
+ if (!shuterror)
+ ha_alert("Cannot allocate memory!\n");
+ goto err;
+ }
+
+ if (strcmp(path, "@system-ca") == 0) {
+ dir = X509_get_default_cert_dir();
+ if (!dir) {
+ if (!shuterror)
+ ha_alert("Couldn't get the system CA directory from X509_get_default_cert_dir().\n");
+ goto err;
+ }
+
+ } else {
+
+ if (stat(path, &buf) == -1) {
+ if (!shuterror)
+ ha_alert("Couldn't open the ca-file '%s' (%s).\n", path, strerror(errno));
+ goto err;
+ }
+
+ if (S_ISDIR(buf.st_mode))
+ dir = path;
+ else
+ file = path;
+ }
+
+ if (file) {
+ if (!X509_STORE_load_locations(store, file, NULL)) {
+ e = ERR_get_error();
+ if (!shuterror)
+ ha_alert("Couldn't open the ca-file '%s' (%s).\n", path, ERR_reason_error_string(e));
+ goto err;
+ }
+ } else if (dir) {
+ int n, i;
+ struct dirent **de_list;
+
+ n = scandir(dir, &de_list, 0, alphasort);
+ if (n < 0)
+ goto err;
+
+ for (i= 0; i < n; i++) {
+ char *end;
+ struct dirent *de = de_list[i];
+ BIO *in = NULL;
+ X509 *ca = NULL;;
+
+ ERR_clear_error();
+
+ /* we try to load the files that would have
+ * been loaded in an hashed directory loaded by
+ * X509_LOOKUP_hash_dir, so according to "man 1
+ * c_rehash", we should load ".pem", ".crt",
+ * ".cer", or ".crl". Files starting with a dot
+ * are ignored.
+ */
+ end = strrchr(de->d_name, '.');
+ if (!end || de->d_name[0] == '.' ||
+ (strcmp(end, ".pem") != 0 &&
+ strcmp(end, ".crt") != 0 &&
+ strcmp(end, ".cer") != 0 &&
+ strcmp(end, ".crl") != 0)) {
+ free(de);
+ continue;
+ }
+ in = BIO_new(BIO_s_file());
+ if (in == NULL)
+ goto scandir_err;
+
+ chunk_printf(&trash, "%s/%s", dir, de->d_name);
+
+ if (BIO_read_filename(in, trash.area) == 0)
+ goto scandir_err;
+
+ if (PEM_read_bio_X509_AUX(in, &ca, NULL, NULL) == NULL)
+ goto scandir_err;
+
+ if (X509_STORE_add_cert(store, ca) == 0) {
+ /* only exits on error if the error is not about duplicate certificates */
+ if (!(ERR_GET_REASON(ERR_get_error()) == X509_R_CERT_ALREADY_IN_HASH_TABLE)) {
+ goto scandir_err;
+ }
+ }
+
+ X509_free(ca);
+ BIO_free(in);
+ free(de);
+ continue;
+
+scandir_err:
+ e = ERR_get_error();
+ X509_free(ca);
+ BIO_free(in);
+ free(de);
+ /* warn if it can load one of the files, but don't abort */
+ if (!shuterror)
+ ha_warning("ca-file: '%s' couldn't load '%s' (%s)\n", path, trash.area, ERR_reason_error_string(e));
+
+ }
+ free(de_list);
+ } else {
+ if (!shuterror)
+ ha_alert("ca-file: couldn't load '%s'\n", path);
+ goto err;
+ }
+
+ objs = X509_STORE_get0_objects(store);
+ cert_count = sk_X509_OBJECT_num(objs);
+ if (cert_count == 0) {
+ if (!shuterror)
+ ha_warning("ca-file: 0 CA were loaded from '%s'\n", path);
+ }
+ ca_e = ssl_store_create_cafile_entry(path, store, type);
+ if (!ca_e) {
+ if (!shuterror)
+ ha_alert("Cannot allocate memory!\n");
+ goto err;
+ }
+ ebst_insert(&cafile_tree, &ca_e->node);
+ }
+ return (store != NULL);
+
+err:
+ X509_STORE_free(store);
+ store = NULL;
+ return 0;
+
+}
+
+int ssl_store_load_locations_file(char *path, int create_if_none, enum cafile_type type)
+{
+ return __ssl_store_load_locations_file(path, create_if_none, type, 0);
+}
+
+/*************************** CLI commands ***********************/
+
+/* Type of SSL payloads that can be updated over the CLI */
+
+struct cert_exts cert_exts[] = {
+ { "", CERT_TYPE_PEM, &ssl_sock_load_pem_into_ckch }, /* default mode, no extensions */
+ { "crt", CERT_TYPE_CRT, &ssl_sock_load_pem_into_ckch },
+ { "key", CERT_TYPE_KEY, &ssl_sock_load_key_into_ckch },
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) || defined OPENSSL_IS_BORINGSSL)
+ { "ocsp", CERT_TYPE_OCSP, &ssl_sock_load_ocsp_response_from_file },
+#endif
+#ifdef HAVE_SSL_SCTL
+ { "sctl", CERT_TYPE_SCTL, &ssl_sock_load_sctl_from_file },
+#endif
+ { "issuer", CERT_TYPE_ISSUER, &ssl_sock_load_issuer_file_into_ckch },
+ { NULL, CERT_TYPE_MAX, NULL },
+};
+
+
+/* release function of the `show ssl cert' command */
+static void cli_release_show_cert(struct appctx *appctx)
+{
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+}
+
+/* IO handler of "show ssl cert <filename>".
+ * It makes use of a show_cert_ctx context, and ckchs_transaction in read-only.
+ */
+static int cli_io_handler_show_cert(struct appctx *appctx)
+{
+ struct show_cert_ctx *ctx = appctx->svcctx;
+ struct buffer *trash = alloc_trash_chunk();
+ struct ebmb_node *node;
+ struct ckch_store *ckchs = NULL;
+
+ if (trash == NULL)
+ return 1;
+
+ if (!ctx->old_ckchs && ckchs_transaction.old_ckchs) {
+ ckchs = ckchs_transaction.old_ckchs;
+ chunk_appendf(trash, "# transaction\n");
+ chunk_appendf(trash, "*%s\n", ckchs->path);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ ctx->old_ckchs = ckchs_transaction.old_ckchs;
+ }
+
+ if (!ctx->cur_ckchs) {
+ chunk_appendf(trash, "# filename\n");
+ node = ebmb_first(&ckchs_tree);
+ } else {
+ node = &ctx->cur_ckchs->node;
+ }
+ while (node) {
+ ckchs = ebmb_entry(node, struct ckch_store, node);
+ chunk_appendf(trash, "%s\n", ckchs->path);
+
+ node = ebmb_next(node);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ }
+
+ ctx->cur_ckchs = NULL;
+ free_trash_chunk(trash);
+ return 1;
+yield:
+
+ free_trash_chunk(trash);
+ ctx->cur_ckchs = ckchs;
+ return 0; /* should come back */
+}
+
+/*
+ * Extract and format the DNS SAN extensions and copy result into a chuink
+ * Return 0;
+ */
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+static int ssl_sock_get_san_oneline(X509 *cert, struct buffer *out)
+{
+ int i;
+ char *str;
+ STACK_OF(GENERAL_NAME) *names = NULL;
+
+ names = X509_get_ext_d2i(cert, NID_subject_alt_name, NULL, NULL);
+ if (names) {
+ for (i = 0; i < sk_GENERAL_NAME_num(names); i++) {
+ GENERAL_NAME *name = sk_GENERAL_NAME_value(names, i);
+ if (i > 0)
+ chunk_appendf(out, ", ");
+ if (name->type == GEN_DNS) {
+ if (ASN1_STRING_to_UTF8((unsigned char **)&str, name->d.dNSName) >= 0) {
+ chunk_appendf(out, "DNS:%s", str);
+ OPENSSL_free(str);
+ }
+ }
+ }
+ sk_GENERAL_NAME_pop_free(names, GENERAL_NAME_free);
+ }
+ return 0;
+}
+#endif
+
+/*
+ * Build the ckch_inst_link that will be chained in the CA file entry and the
+ * corresponding ckch_inst_link_ref that will be chained in the ckch instance.
+ * Return 0 in case of success.
+ */
+static int do_chain_inst_and_cafile(struct cafile_entry *cafile_entry, struct ckch_inst *ckch_inst)
+{
+ struct ckch_inst_link *new_link;
+ if (!LIST_ISEMPTY(&cafile_entry->ckch_inst_link)) {
+ struct ckch_inst_link *link = LIST_ELEM(cafile_entry->ckch_inst_link.n,
+ typeof(link), list);
+ /* Do not add multiple references to the same
+ * instance in a cafile_entry */
+ if (link->ckch_inst == ckch_inst) {
+ return 1;
+ }
+ }
+
+ new_link = calloc(1, sizeof(*new_link));
+ if (new_link) {
+ struct ckch_inst_link_ref *new_link_ref = calloc(1, sizeof(*new_link_ref));
+ if (!new_link_ref) {
+ free(new_link);
+ return 1;
+ }
+
+ new_link->ckch_inst = ckch_inst;
+ new_link_ref->link = new_link;
+ LIST_INIT(&new_link->list);
+ LIST_INIT(&new_link_ref->list);
+
+ LIST_APPEND(&cafile_entry->ckch_inst_link, &new_link->list);
+ LIST_APPEND(&ckch_inst->cafile_link_refs, &new_link_ref->list);
+ }
+
+ return 0;
+}
+
+
+/*
+ * Link a CA file tree entry to the ckch instance that uses it.
+ * To determine if and which CA file tree entries need to be linked to the
+ * instance, we follow the same logic performed in ssl_sock_prepare_ctx when
+ * processing the verify option.
+ * This function works for a frontend as well as for a backend, depending on the
+ * configuration parameters given (bind_conf or server).
+ */
+void ckch_inst_add_cafile_link(struct ckch_inst *ckch_inst, struct bind_conf *bind_conf,
+ struct ssl_bind_conf *ssl_conf, const struct server *srv)
+{
+ int verify = SSL_VERIFY_NONE;
+
+ if (srv) {
+
+ if (global.ssl_server_verify == SSL_SERVER_VERIFY_REQUIRED)
+ verify = SSL_VERIFY_PEER;
+ switch (srv->ssl_ctx.verify) {
+ case SSL_SOCK_VERIFY_NONE:
+ verify = SSL_VERIFY_NONE;
+ break;
+ case SSL_SOCK_VERIFY_REQUIRED:
+ verify = SSL_VERIFY_PEER;
+ break;
+ }
+ }
+ else {
+ switch ((ssl_conf && ssl_conf->verify) ? ssl_conf->verify : bind_conf->ssl_conf.verify) {
+ case SSL_SOCK_VERIFY_NONE:
+ verify = SSL_VERIFY_NONE;
+ break;
+ case SSL_SOCK_VERIFY_OPTIONAL:
+ verify = SSL_VERIFY_PEER;
+ break;
+ case SSL_SOCK_VERIFY_REQUIRED:
+ verify = SSL_VERIFY_PEER|SSL_VERIFY_FAIL_IF_NO_PEER_CERT;
+ break;
+ }
+ }
+
+ if (verify & SSL_VERIFY_PEER) {
+ struct cafile_entry *ca_file_entry = NULL;
+ struct cafile_entry *ca_verify_file_entry = NULL;
+ struct cafile_entry *crl_file_entry = NULL;
+ if (srv) {
+ if (srv->ssl_ctx.ca_file) {
+ ca_file_entry = ssl_store_get_cafile_entry(srv->ssl_ctx.ca_file, 0);
+
+ }
+ if (srv->ssl_ctx.crl_file) {
+ crl_file_entry = ssl_store_get_cafile_entry(srv->ssl_ctx.crl_file, 0);
+ }
+ }
+ else {
+ char *ca_file = (ssl_conf && ssl_conf->ca_file) ? ssl_conf->ca_file : bind_conf->ssl_conf.ca_file;
+ char *ca_verify_file = (ssl_conf && ssl_conf->ca_verify_file) ? ssl_conf->ca_verify_file : bind_conf->ssl_conf.ca_verify_file;
+ char *crl_file = (ssl_conf && ssl_conf->crl_file) ? ssl_conf->crl_file : bind_conf->ssl_conf.crl_file;
+
+ if (ca_file)
+ ca_file_entry = ssl_store_get_cafile_entry(ca_file, 0);
+ if (ca_verify_file)
+ ca_verify_file_entry = ssl_store_get_cafile_entry(ca_verify_file, 0);
+ if (crl_file)
+ crl_file_entry = ssl_store_get_cafile_entry(crl_file, 0);
+ }
+
+ if (ca_file_entry) {
+ /* If we have a ckch instance that is not already in the
+ * cafile_entry's list, add it to it. */
+ if (do_chain_inst_and_cafile(ca_file_entry, ckch_inst))
+ return;
+
+ }
+ if (ca_verify_file_entry && (ca_file_entry != ca_verify_file_entry)) {
+ /* If we have a ckch instance that is not already in the
+ * cafile_entry's list, add it to it. */
+ if (do_chain_inst_and_cafile(ca_verify_file_entry, ckch_inst))
+ return;
+ }
+ if (crl_file_entry) {
+ /* If we have a ckch instance that is not already in the
+ * cafile_entry's list, add it to it. */
+ if (do_chain_inst_and_cafile(crl_file_entry, ckch_inst))
+ return;
+ }
+ }
+}
+
+
+
+static int show_cert_detail(X509 *cert, STACK_OF(X509) *chain, struct buffer *out)
+{
+ BIO *bio = NULL;
+ struct buffer *tmp = alloc_trash_chunk();
+ int i;
+ int write = -1;
+ unsigned int len = 0;
+ X509_NAME *name = NULL;
+
+ if (!tmp)
+ return -1;
+
+ if (!cert)
+ goto end;
+
+ if (chain == NULL) {
+ struct issuer_chain *issuer;
+ issuer = ssl_get0_issuer_chain(cert);
+ if (issuer) {
+ chain = issuer->chain;
+ chunk_appendf(out, "Chain Filename: ");
+ chunk_appendf(out, "%s\n", issuer->path);
+ }
+ }
+ chunk_appendf(out, "Serial: ");
+ if (ssl_sock_get_serial(cert, tmp) == -1)
+ goto end;
+ dump_binary(out, tmp->area, tmp->data);
+ chunk_appendf(out, "\n");
+
+ chunk_appendf(out, "notBefore: ");
+ chunk_reset(tmp);
+ if ((bio = BIO_new(BIO_s_mem())) == NULL)
+ goto end;
+ if (ASN1_TIME_print(bio, X509_getm_notBefore(cert)) == 0)
+ goto end;
+ write = BIO_read(bio, tmp->area, tmp->size-1);
+ tmp->area[write] = '\0';
+ BIO_free(bio);
+ bio = NULL;
+ chunk_appendf(out, "%s\n", tmp->area);
+
+ chunk_appendf(out, "notAfter: ");
+ chunk_reset(tmp);
+ if ((bio = BIO_new(BIO_s_mem())) == NULL)
+ goto end;
+ if (ASN1_TIME_print(bio, X509_getm_notAfter(cert)) == 0)
+ goto end;
+ if ((write = BIO_read(bio, tmp->area, tmp->size-1)) <= 0)
+ goto end;
+ tmp->area[write] = '\0';
+ BIO_free(bio);
+ bio = NULL;
+ chunk_appendf(out, "%s\n", tmp->area);
+
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ chunk_appendf(out, "Subject Alternative Name: ");
+ if (ssl_sock_get_san_oneline(cert, out) == -1)
+ goto end;
+ *(out->area + out->data) = '\0';
+ chunk_appendf(out, "\n");
+#endif
+ chunk_reset(tmp);
+ chunk_appendf(out, "Algorithm: ");
+ if (cert_get_pkey_algo(cert, tmp) == 0)
+ goto end;
+ chunk_appendf(out, "%s\n", tmp->area);
+
+ chunk_reset(tmp);
+ chunk_appendf(out, "SHA1 FingerPrint: ");
+ if (X509_digest(cert, EVP_sha1(), (unsigned char *) tmp->area, &len) == 0)
+ goto end;
+ tmp->data = len;
+ dump_binary(out, tmp->area, tmp->data);
+ chunk_appendf(out, "\n");
+
+ chunk_appendf(out, "Subject: ");
+ if ((name = X509_get_subject_name(cert)) == NULL)
+ goto end;
+ if ((ssl_sock_get_dn_oneline(name, tmp)) == -1)
+ goto end;
+ *(tmp->area + tmp->data) = '\0';
+ chunk_appendf(out, "%s\n", tmp->area);
+
+ chunk_appendf(out, "Issuer: ");
+ if ((name = X509_get_issuer_name(cert)) == NULL)
+ goto end;
+ if ((ssl_sock_get_dn_oneline(name, tmp)) == -1)
+ goto end;
+ *(tmp->area + tmp->data) = '\0';
+ chunk_appendf(out, "%s\n", tmp->area);
+
+ /* Displays subject of each certificate in the chain */
+ for (i = 0; i < sk_X509_num(chain); i++) {
+ X509 *ca = sk_X509_value(chain, i);
+
+ chunk_appendf(out, "Chain Subject: ");
+ if ((name = X509_get_subject_name(ca)) == NULL)
+ goto end;
+ if ((ssl_sock_get_dn_oneline(name, tmp)) == -1)
+ goto end;
+ *(tmp->area + tmp->data) = '\0';
+ chunk_appendf(out, "%s\n", tmp->area);
+
+ chunk_appendf(out, "Chain Issuer: ");
+ if ((name = X509_get_issuer_name(ca)) == NULL)
+ goto end;
+ if ((ssl_sock_get_dn_oneline(name, tmp)) == -1)
+ goto end;
+ *(tmp->area + tmp->data) = '\0';
+ chunk_appendf(out, "%s\n", tmp->area);
+ }
+
+end:
+ if (bio)
+ BIO_free(bio);
+ free_trash_chunk(tmp);
+
+ return 0;
+}
+
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+/*
+ * Build the OCSP tree entry's key for a given ckch_store.
+ * Returns a negative value in case of error.
+ */
+static int ckch_store_build_certid(struct ckch_store *ckch_store, unsigned char certid[128], unsigned int *key_length)
+{
+ OCSP_RESPONSE *resp;
+ OCSP_BASICRESP *bs = NULL;
+ OCSP_SINGLERESP *sr;
+ OCSP_CERTID *id;
+ unsigned char *p = NULL;
+
+ if (!key_length)
+ return -1;
+
+ *key_length = 0;
+
+ if (!ckch_store->ckch->ocsp_response)
+ return 0;
+
+ p = (unsigned char *) ckch_store->ckch->ocsp_response->area;
+
+ resp = d2i_OCSP_RESPONSE(NULL, (const unsigned char **)&p,
+ ckch_store->ckch->ocsp_response->data);
+ if (!resp) {
+ goto end;
+ }
+
+ bs = OCSP_response_get1_basic(resp);
+ if (!bs) {
+ goto end;
+ }
+
+ sr = OCSP_resp_get0(bs, 0);
+ if (!sr) {
+ goto end;
+ }
+
+ id = (OCSP_CERTID*)OCSP_SINGLERESP_get0_id(sr);
+
+ p = certid;
+ *key_length = i2d_OCSP_CERTID(id, &p);
+
+end:
+ return *key_length > 0;
+}
+#endif
+
+/*
+ * Dump the OCSP certificate key (if it exists) of certificate <ckch> into
+ * buffer <out>.
+ * Returns 0 in case of success.
+ */
+static int ckch_store_show_ocsp_certid(struct ckch_store *ckch_store, struct buffer *out)
+{
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+ unsigned char key[OCSP_MAX_CERTID_ASN1_LENGTH] = {};
+ unsigned int key_length = 0;
+ int i;
+
+ if (ckch_store_build_certid(ckch_store, (unsigned char*)key, &key_length) >= 0) {
+ /* Dump the CERTID info */
+ chunk_appendf(out, "OCSP Response Key: ");
+ for (i = 0; i < key_length; ++i) {
+ chunk_appendf(out, "%02x", key[i]);
+ }
+ chunk_appendf(out, "\n");
+ }
+#endif
+
+ return 0;
+}
+
+
+/* IO handler of the details "show ssl cert <filename>".
+ * It uses a struct show_cert_ctx and ckchs_transaction in read-only.
+ */
+static int cli_io_handler_show_cert_detail(struct appctx *appctx)
+{
+ struct show_cert_ctx *ctx = appctx->svcctx;
+ struct ckch_store *ckchs = ctx->cur_ckchs;
+ struct buffer *out = alloc_trash_chunk();
+ int retval = 0;
+
+ if (!out)
+ goto end_no_putchk;
+
+ chunk_appendf(out, "Filename: ");
+ if (ckchs == ckchs_transaction.new_ckchs)
+ chunk_appendf(out, "*");
+ chunk_appendf(out, "%s\n", ckchs->path);
+
+ chunk_appendf(out, "Status: ");
+ if (ckchs->ckch->cert == NULL)
+ chunk_appendf(out, "Empty\n");
+ else if (LIST_ISEMPTY(&ckchs->ckch_inst))
+ chunk_appendf(out, "Unused\n");
+ else
+ chunk_appendf(out, "Used\n");
+
+ retval = show_cert_detail(ckchs->ckch->cert, ckchs->ckch->chain, out);
+ if (retval < 0)
+ goto end_no_putchk;
+ else if (retval)
+ goto end;
+
+ ckch_store_show_ocsp_certid(ckchs, out);
+
+end:
+ if (applet_putchk(appctx, out) == -1)
+ goto yield;
+
+end_no_putchk:
+ free_trash_chunk(out);
+ return 1;
+yield:
+ free_trash_chunk(out);
+ return 0; /* should come back */
+}
+
+
+/* IO handler of the details "show ssl cert <filename.ocsp>".
+ * It uses a show_cert_ctx.
+ */
+static int cli_io_handler_show_cert_ocsp_detail(struct appctx *appctx)
+{
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+ struct show_cert_ctx *ctx = appctx->svcctx;
+ struct ckch_store *ckchs = ctx->cur_ckchs;
+ struct buffer *out = alloc_trash_chunk();
+ int from_transaction = ctx->transaction;
+
+ if (!out)
+ goto end_no_putchk;
+
+ /* If we try to display an ongoing transaction's OCSP response, we
+ * need to dump the ckch's ocsp_response buffer directly.
+ * Otherwise, we must rebuild the certificate's certid in order to
+ * look for the current OCSP response in the tree. */
+ if (from_transaction && ckchs->ckch->ocsp_response) {
+ if (ssl_ocsp_response_print(ckchs->ckch->ocsp_response, out))
+ goto end_no_putchk;
+ }
+ else {
+ unsigned char key[OCSP_MAX_CERTID_ASN1_LENGTH] = {};
+ unsigned int key_length = 0;
+
+ if (ckch_store_build_certid(ckchs, (unsigned char*)key, &key_length) < 0)
+ goto end_no_putchk;
+
+ if (ssl_get_ocspresponse_detail(key, out))
+ goto end_no_putchk;
+ }
+
+ if (applet_putchk(appctx, out) == -1)
+ goto yield;
+
+end_no_putchk:
+ free_trash_chunk(out);
+ return 1;
+yield:
+ free_trash_chunk(out);
+ return 0; /* should come back */
+#else
+ return cli_err(appctx, "HAProxy was compiled against a version of OpenSSL that doesn't support OCSP stapling.\n");
+#endif
+}
+
+/* parsing function for 'show ssl cert [certfile]' */
+static int cli_parse_show_cert(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_cert_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct ckch_store *ckchs;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return cli_err(appctx, "Can't allocate memory!\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't show!\nOperations on certificates are currently locked!\n");
+
+ /* check if there is a certificate to lookup */
+ if (*args[3]) {
+ int show_ocsp_detail = 0;
+ int from_transaction = 0;
+ char *end;
+
+ /* We manage the special case "certname.ocsp" through which we
+ * can show the details of an OCSP response. */
+ end = strrchr(args[3], '.');
+ if (end && strcmp(end+1, "ocsp") == 0) {
+ *end = '\0';
+ show_ocsp_detail = 1;
+ }
+
+ if (*args[3] == '*') {
+ from_transaction = 1;
+ if (!ckchs_transaction.new_ckchs)
+ goto error;
+
+ ckchs = ckchs_transaction.new_ckchs;
+
+ if (strcmp(args[3] + 1, ckchs->path) != 0)
+ goto error;
+
+ } else {
+ if ((ckchs = ckchs_lookup(args[3])) == NULL)
+ goto error;
+
+ }
+
+ ctx->cur_ckchs = ckchs;
+ /* use the IO handler that shows details */
+ if (show_ocsp_detail) {
+ ctx->transaction = from_transaction;
+ appctx->io_handler = cli_io_handler_show_cert_ocsp_detail;
+ }
+ else
+ appctx->io_handler = cli_io_handler_show_cert_detail;
+ }
+
+ return 0;
+
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_err(appctx, "Can't display the certificate: Not found or the certificate is a bundle!\n");
+}
+
+/* release function of the `set ssl cert' command, free things and unlock the spinlock */
+static void cli_release_commit_cert(struct appctx *appctx)
+{
+ struct commit_cert_ctx *ctx = appctx->svcctx;
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ /* free every new sni_ctx and the new store, which are not in the trees so no spinlock there */
+ if (ctx->new_ckchs)
+ ckch_store_free(ctx->new_ckchs);
+ ha_free(&ctx->err);
+}
+
+
+/*
+ * Rebuild a new instance 'new_inst' based on an old instance 'ckchi' and a
+ * specific ckch_store.
+ * Returns 0 in case of success, 1 otherwise.
+ */
+int ckch_inst_rebuild(struct ckch_store *ckch_store, struct ckch_inst *ckchi,
+ struct ckch_inst **new_inst, char **err)
+{
+ int retval = 0;
+ int errcode = 0;
+ struct sni_ctx *sc0, *sc0s;
+ char **sni_filter = NULL;
+ int fcount = 0;
+
+ if (ckchi->crtlist_entry) {
+ sni_filter = ckchi->crtlist_entry->filters;
+ fcount = ckchi->crtlist_entry->fcount;
+ }
+
+ if (ckchi->is_server_instance)
+ errcode |= ckch_inst_new_load_srv_store(ckch_store->path, ckch_store, new_inst, err);
+ else
+ errcode |= ckch_inst_new_load_store(ckch_store->path, ckch_store, ckchi->bind_conf, ckchi->ssl_conf, sni_filter, fcount, new_inst, err);
+
+ if (errcode & ERR_CODE)
+ return 1;
+
+ /* if the previous ckchi was used as the default */
+ if (ckchi->is_default)
+ (*new_inst)->is_default = 1;
+
+ (*new_inst)->is_server_instance = ckchi->is_server_instance;
+ (*new_inst)->server = ckchi->server;
+ /* Create a new SSL_CTX and link it to the new instance. */
+ if ((*new_inst)->is_server_instance) {
+ retval = ssl_sock_prep_srv_ctx_and_inst(ckchi->server, (*new_inst)->ctx, (*new_inst));
+ if (retval)
+ return 1;
+ }
+
+ /* create the link to the crtlist_entry */
+ (*new_inst)->crtlist_entry = ckchi->crtlist_entry;
+
+ /* we need to initialize the SSL_CTX generated */
+ /* this iterate on the newly generated SNIs in the new instance to prepare their SSL_CTX */
+ list_for_each_entry_safe(sc0, sc0s, &(*new_inst)->sni_ctx, by_ckch_inst) {
+ if (!sc0->order) { /* we initialized only the first SSL_CTX because it's the same in the other sni_ctx's */
+ errcode |= ssl_sock_prep_ctx_and_inst(ckchi->bind_conf, ckchi->ssl_conf, sc0->ctx, *new_inst, err);
+ if (errcode & ERR_CODE)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Load all the new SNIs of a newly built ckch instance in the trees, or replace
+ * a server's main ckch instance.
+ */
+static void __ssl_sock_load_new_ckch_instance(struct ckch_inst *ckchi)
+{
+ /* The bind_conf will be null on server ckch_instances. */
+ if (ckchi->is_server_instance) {
+ int i;
+ /* a lock is needed here since we have to free the SSL cache */
+ HA_RWLOCK_WRLOCK(SSL_SERVER_LOCK, &ckchi->server->ssl_ctx.lock);
+ /* free the server current SSL_CTX */
+ SSL_CTX_free(ckchi->server->ssl_ctx.ctx);
+ /* Actual ssl context update */
+ SSL_CTX_up_ref(ckchi->ctx);
+ ckchi->server->ssl_ctx.ctx = ckchi->ctx;
+ ckchi->server->ssl_ctx.inst = ckchi;
+
+ /* flush the session cache of the server */
+ for (i = 0; i < global.nbthread; i++) {
+ ha_free(&ckchi->server->ssl_ctx.reused_sess[i].sni);
+ ha_free(&ckchi->server->ssl_ctx.reused_sess[i].ptr);
+ }
+ HA_RWLOCK_WRUNLOCK(SSL_SERVER_LOCK, &ckchi->server->ssl_ctx.lock);
+
+ } else {
+ HA_RWLOCK_WRLOCK(SNI_LOCK, &ckchi->bind_conf->sni_lock);
+ ssl_sock_load_cert_sni(ckchi, ckchi->bind_conf);
+ HA_RWLOCK_WRUNLOCK(SNI_LOCK, &ckchi->bind_conf->sni_lock);
+ }
+}
+
+/*
+ * Delete a ckch instance that was replaced after a CLI command.
+ */
+static void __ckch_inst_free_locked(struct ckch_inst *ckchi)
+{
+ if (ckchi->is_server_instance) {
+ /* no lock for servers */
+ ckch_inst_free(ckchi);
+ } else {
+ struct bind_conf __maybe_unused *bind_conf = ckchi->bind_conf;
+
+ HA_RWLOCK_WRLOCK(SNI_LOCK, &bind_conf->sni_lock);
+ ckch_inst_free(ckchi);
+ HA_RWLOCK_WRUNLOCK(SNI_LOCK, &bind_conf->sni_lock);
+ }
+}
+
+/* Replace a ckch_store in the ckch tree and insert the whole dependencies,
+* then free the previous dependencies and store.
+* Used in the case of a certificate update.
+*
+* Every dependencies must allocated before using this function.
+*
+* This function can't fail as it only update pointers, and does not alloc anything.
+*
+* /!\ This function must be used under the ckch lock. /!\
+*
+* - Insert every dependencies (SNI, crtlist_entry, ckch_inst, etc)
+* - Delete the old ckch_store from the tree
+* - Insert the new ckch_store
+* - Free the old dependencies and the old ckch_store
+*/
+void ckch_store_replace(struct ckch_store *old_ckchs, struct ckch_store *new_ckchs)
+{
+ struct crtlist_entry *entry;
+ struct ckch_inst *ckchi, *ckchis;
+
+ LIST_SPLICE(&new_ckchs->crtlist_entry, &old_ckchs->crtlist_entry);
+ list_for_each_entry(entry, &new_ckchs->crtlist_entry, by_ckch_store) {
+ ebpt_delete(&entry->node);
+ /* change the ptr and reinsert the node */
+ entry->node.key = new_ckchs;
+ ebpt_insert(&entry->crtlist->entries, &entry->node);
+ }
+ /* insert the new ckch_insts in the crtlist_entry */
+ list_for_each_entry(ckchi, &new_ckchs->ckch_inst, by_ckchs) {
+ if (ckchi->crtlist_entry)
+ LIST_INSERT(&ckchi->crtlist_entry->ckch_inst, &ckchi->by_crtlist_entry);
+ }
+ /* First, we insert every new SNIs in the trees, also replace the default_ctx */
+ list_for_each_entry_safe(ckchi, ckchis, &new_ckchs->ckch_inst, by_ckchs) {
+ __ssl_sock_load_new_ckch_instance(ckchi);
+ }
+ /* delete the old sni_ctx, the old ckch_insts and the ckch_store */
+ list_for_each_entry_safe(ckchi, ckchis, &old_ckchs->ckch_inst, by_ckchs) {
+ __ckch_inst_free_locked(ckchi);
+ }
+
+ ckch_store_free(old_ckchs);
+ ebst_insert(&ckchs_tree, &new_ckchs->node);
+}
+
+
+/*
+ * This function tries to create the new ckch_inst and their SNIs
+ *
+ * /!\ don't forget to update __hlua_ckch_commit() if you changes things there. /!\
+ */
+static int cli_io_handler_commit_cert(struct appctx *appctx)
+{
+ struct commit_cert_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ int y = 0;
+ struct ckch_store *old_ckchs, *new_ckchs = NULL;
+ struct ckch_inst *ckchi;
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ goto end;
+
+ while (1) {
+ switch (ctx->state) {
+ case CERT_ST_INIT:
+ /* This state just print the update message */
+ chunk_printf(&trash, "Committing %s", ckchs_transaction.path);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto yield;
+
+ ctx->state = CERT_ST_GEN;
+ /* fallthrough */
+ case CERT_ST_GEN:
+ /*
+ * This state generates the ckch instances with their
+ * sni_ctxs and SSL_CTX.
+ *
+ * Since the SSL_CTX generation can be CPU consumer, we
+ * yield every 10 instances.
+ */
+
+ old_ckchs = ctx->old_ckchs;
+ new_ckchs = ctx->new_ckchs;
+
+ /* get the next ckchi to regenerate */
+ ckchi = ctx->next_ckchi;
+ /* we didn't start yet, set it to the first elem */
+ if (ckchi == NULL)
+ ckchi = LIST_ELEM(old_ckchs->ckch_inst.n, typeof(ckchi), by_ckchs);
+
+ /* walk through the old ckch_inst and creates new ckch_inst using the updated ckchs */
+ list_for_each_entry_from(ckchi, &old_ckchs->ckch_inst, by_ckchs) {
+ struct ckch_inst *new_inst;
+
+ /* save the next ckchi to compute in case of yield */
+ ctx->next_ckchi = ckchi;
+
+ /* it takes a lot of CPU to creates SSL_CTXs, so we yield every 10 CKCH instances */
+ if (y >= 10) {
+ applet_have_more_data(appctx); /* let's come back later */
+ goto yield;
+ }
+
+ /* display one dot per new instance */
+ if (applet_putstr(appctx, ".") == -1)
+ goto yield;
+
+ ctx->err = NULL;
+ if (ckch_inst_rebuild(new_ckchs, ckchi, &new_inst, &ctx->err)) {
+ ctx->state = CERT_ST_ERROR;
+ goto error;
+ }
+
+ /* link the new ckch_inst to the duplicate */
+ LIST_APPEND(&new_ckchs->ckch_inst, &new_inst->by_ckchs);
+ y++;
+ }
+ ctx->state = CERT_ST_INSERT;
+ /* fallthrough */
+ case CERT_ST_INSERT:
+ /* The generation is finished, we can insert everything */
+
+ old_ckchs = ctx->old_ckchs;
+ new_ckchs = ctx->new_ckchs;
+
+ /* insert everything and remove the previous objects */
+ ckch_store_replace(old_ckchs, new_ckchs);
+ ctx->new_ckchs = ctx->old_ckchs = NULL;
+ ctx->state = CERT_ST_SUCCESS;
+ /* fallthrough */
+ case CERT_ST_SUCCESS:
+ if (applet_putstr(appctx, "\nSuccess!\n") == -1)
+ goto yield;
+ ctx->state = CERT_ST_FIN;
+ /* fallthrough */
+ case CERT_ST_FIN:
+ /* we achieved the transaction, we can set everything to NULL */
+ ckchs_transaction.new_ckchs = NULL;
+ ckchs_transaction.old_ckchs = NULL;
+ ckchs_transaction.path = NULL;
+ goto end;
+
+ case CERT_ST_ERROR:
+ error:
+ chunk_printf(&trash, "\n%sFailed!\n", ctx->err);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto yield;
+ ctx->state = CERT_ST_FIN;
+ break;
+ }
+ }
+end:
+ /* success: call the release function and don't come back */
+ return 1;
+
+yield:
+ return 0; /* should come back */
+}
+
+/*
+ * Parsing function of 'commit ssl cert'
+ */
+static int cli_parse_commit_cert(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct commit_cert_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'commit ssl cert expects a filename\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't commit the certificate!\nOperations on certificates are currently locked!\n");
+
+ if (!ckchs_transaction.path) {
+ memprintf(&err, "No ongoing transaction! !\n");
+ goto error;
+ }
+
+ if (strcmp(ckchs_transaction.path, args[3]) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to set '%s'\n", ckchs_transaction.path, args[3]);
+ goto error;
+ }
+
+ /* if a certificate is here, a private key must be here too */
+ if (ckchs_transaction.new_ckchs->ckch->cert && !ckchs_transaction.new_ckchs->ckch->key) {
+ memprintf(&err, "The transaction must contain at least a certificate and a private key!\n");
+ goto error;
+ }
+
+ if (!X509_check_private_key(ckchs_transaction.new_ckchs->ckch->cert, ckchs_transaction.new_ckchs->ckch->key)) {
+ memprintf(&err, "inconsistencies between private key and certificate loaded '%s'.\n", ckchs_transaction.path);
+ goto error;
+ }
+
+ /* init the appctx structure */
+ ctx->state = CERT_ST_INIT;
+ ctx->next_ckchi = NULL;
+ ctx->new_ckchs = ckchs_transaction.new_ckchs;
+ ctx->old_ckchs = ckchs_transaction.old_ckchs;
+
+ /* we don't unlock there, it will be unlock after the IO handler, in the release handler */
+ return 0;
+
+error:
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ err = memprintf(&err, "%sCan't commit %s!\n", err ? err : "", args[3]);
+
+ return cli_dynerr(appctx, err);
+}
+
+
+
+
+/*
+ * Parsing function of `set ssl cert`, it updates or creates a temporary ckch.
+ * It uses a set_cert_ctx context, and ckchs_transaction under a lock.
+ */
+static int cli_parse_set_cert(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct set_cert_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct ckch_store *new_ckchs = NULL;
+ struct ckch_store *old_ckchs = NULL;
+ char *err = NULL;
+ int i;
+ int errcode = 0;
+ char *end;
+ struct cert_exts *cert_ext = &cert_exts[0]; /* default one, PEM */
+ struct cert_key_and_chain *ckch;
+ struct buffer *buf;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3] || !payload)
+ return cli_err(appctx, "'set ssl cert expects a filename and a certificate as a payload\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't update the certificate!\nOperations on certificates are currently locked!\n");
+
+ if ((buf = alloc_trash_chunk()) == NULL) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ if (!chunk_strcpy(buf, args[3])) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* check which type of file we want to update */
+ for (i = 0; cert_exts[i].ext != NULL; i++) {
+ end = strrchr(buf->area, '.');
+ if (end && *cert_exts[i].ext && (strcmp(end + 1, cert_exts[i].ext) == 0)) {
+ *end = '\0';
+ buf->data = strlen(buf->area);
+ cert_ext = &cert_exts[i];
+ break;
+ }
+ }
+
+ ctx->old_ckchs = NULL;
+ ctx->new_ckchs = NULL;
+
+ /* if there is an ongoing transaction */
+ if (ckchs_transaction.path) {
+ /* if there is an ongoing transaction, check if this is the same file */
+ if (strcmp(ckchs_transaction.path, buf->area) != 0) {
+ /* we didn't find the transaction, must try more cases below */
+
+ /* if the del-ext option is activated we should try to take a look at a ".crt" too. */
+ if (cert_ext->type != CERT_TYPE_PEM && global_ssl.extra_files_noext) {
+ if (!chunk_strcat(buf, ".crt")) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ if (strcmp(ckchs_transaction.path, buf->area) != 0) {
+ /* remove .crt of the error message */
+ *(b_orig(buf) + b_data(buf) + strlen(".crt")) = '\0';
+ b_sub(buf, strlen(".crt"));
+
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to set '%s'\n", ckchs_transaction.path, buf->area);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ }
+ }
+
+ ctx->old_ckchs = ckchs_transaction.new_ckchs;
+
+ } else {
+
+ /* lookup for the certificate in the tree */
+ ctx->old_ckchs = ckchs_lookup(buf->area);
+
+ if (!ctx->old_ckchs) {
+ /* if the del-ext option is activated we should try to take a look at a ".crt" too. */
+ if (cert_ext->type != CERT_TYPE_PEM && global_ssl.extra_files_noext) {
+ if (!chunk_strcat(buf, ".crt")) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ ctx->old_ckchs = ckchs_lookup(buf->area);
+ }
+ }
+ }
+
+ if (!ctx->old_ckchs) {
+ memprintf(&err, "%sCan't replace a certificate which is not referenced by the configuration!\n",
+ err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ old_ckchs = ctx->old_ckchs;
+
+ /* duplicate the ckch store */
+ new_ckchs = ckchs_dup(old_ckchs);
+ if (!new_ckchs) {
+ memprintf(&err, "%sCannot allocate memory!\n",
+ err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ ckch = new_ckchs->ckch;
+
+ /* appply the change on the duplicate */
+ if (cert_ext->load(buf->area, payload, ckch, &err) != 0) {
+ memprintf(&err, "%sCan't load the payload\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ ctx->new_ckchs = new_ckchs;
+
+ /* we succeed, we can save the ckchs in the transaction */
+
+ /* if there wasn't a transaction, update the old ckchs */
+ if (!ckchs_transaction.old_ckchs) {
+ ckchs_transaction.old_ckchs = ctx->old_ckchs;
+ ckchs_transaction.path = ctx->old_ckchs->path;
+ err = memprintf(&err, "Transaction created for certificate %s!\n", ckchs_transaction.path);
+ } else {
+ err = memprintf(&err, "Transaction updated for certificate %s!\n", ckchs_transaction.path);
+
+ }
+
+ /* free the previous ckchs if there was a transaction */
+ ckch_store_free(ckchs_transaction.new_ckchs);
+
+ ckchs_transaction.new_ckchs = ctx->new_ckchs;
+
+
+ /* creates the SNI ctxs later in the IO handler */
+
+end:
+ free_trash_chunk(buf);
+
+ if (errcode & ERR_CODE) {
+ ckch_store_free(ctx->new_ckchs);
+ ctx->new_ckchs = NULL;
+ ctx->old_ckchs = NULL;
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, memprintf(&err, "%sCan't update %s!\n", err ? err : "", args[3]));
+ } else {
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+ }
+ /* TODO: handle the ERR_WARN which are not handled because of the io_handler */
+}
+
+/* parsing function of 'abort ssl cert' */
+static int cli_parse_abort_cert(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'abort ssl cert' expects a filename\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't abort!\nOperations on certificates are currently locked!\n");
+
+ if (!ckchs_transaction.path) {
+ memprintf(&err, "No ongoing transaction!\n");
+ goto error;
+ }
+
+ if (strcmp(ckchs_transaction.path, args[3]) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to abort a transaction for '%s'\n", ckchs_transaction.path, args[3]);
+ goto error;
+ }
+
+ /* Only free the ckchs there, because the SNI and instances were not generated yet */
+ ckch_store_free(ckchs_transaction.new_ckchs);
+ ckchs_transaction.new_ckchs = NULL;
+ ckchs_transaction.old_ckchs = NULL;
+ ckchs_transaction.path = NULL;
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ err = memprintf(&err, "Transaction aborted for certificate '%s'!\n", args[3]);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ return cli_dynerr(appctx, err);
+}
+
+/* parsing function of 'new ssl cert' */
+static int cli_parse_new_cert(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct ckch_store *store;
+ char *err = NULL;
+ char *path;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'new ssl cert' expects a filename\n");
+
+ path = args[3];
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't create a certificate!\nOperations on certificates are currently locked!\n");
+
+ store = ckchs_lookup(path);
+ if (store != NULL) {
+ memprintf(&err, "Certificate '%s' already exists!\n", path);
+ store = NULL; /* we don't want to free it */
+ goto error;
+ }
+ /* we won't support multi-certificate bundle here */
+ store = ckch_store_new(path);
+ if (!store) {
+ memprintf(&err, "unable to allocate memory.\n");
+ goto error;
+ }
+
+ /* insert into the ckchs tree */
+ ebst_insert(&ckchs_tree, &store->node);
+ memprintf(&err, "New empty certificate store '%s'!\n", args[3]);
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+error:
+ free(store);
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, err);
+}
+
+/* parsing function of 'del ssl cert' */
+static int cli_parse_del_cert(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct ckch_store *store;
+ char *err = NULL;
+ char *filename;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'del ssl cert' expects a certificate name\n");
+
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't delete the certificate!\nOperations on certificates are currently locked!\n");
+
+ filename = args[3];
+
+ if (ckchs_transaction.path && strcmp(ckchs_transaction.path, filename) == 0) {
+ memprintf(&err, "ongoing transaction for the certificate '%s'", filename);
+ goto error;
+ }
+
+ store = ckchs_lookup(filename);
+ if (store == NULL) {
+ memprintf(&err, "certificate '%s' doesn't exist!\n", filename);
+ goto error;
+ }
+ if (!LIST_ISEMPTY(&store->ckch_inst)) {
+ memprintf(&err, "certificate '%s' in use, can't be deleted!\n", filename);
+ goto error;
+ }
+
+ ebmb_delete(&store->node);
+ ckch_store_free(store);
+
+ memprintf(&err, "Certificate '%s' deleted!\n", filename);
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+
+error:
+ memprintf(&err, "Can't remove the certificate: %s\n", err ? err : "");
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, err);
+}
+
+
+
+/* parsing function of 'new ssl ca-file' */
+static int cli_parse_new_cafile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct cafile_entry *cafile_entry;
+ char *err = NULL;
+ char *path;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'new ssl ca-file' expects a filename\n");
+
+ path = args[3];
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't create a CA file!\nOperations on certificates are currently locked!\n");
+
+ cafile_entry = ssl_store_get_cafile_entry(path, 0);
+ if (cafile_entry) {
+ memprintf(&err, "CA file '%s' already exists!\n", path);
+ goto error;
+ }
+
+ cafile_entry = ssl_store_create_cafile_entry(path, NULL, CAFILE_CERT);
+ if (!cafile_entry) {
+ memprintf(&err, "%sCannot allocate memory!\n",
+ err ? err : "");
+ goto error;
+ }
+
+ /* Add the newly created cafile_entry to the tree so that
+ * any new ckch instance created from now can use it. */
+ if (ssl_store_add_uncommitted_cafile_entry(cafile_entry))
+ goto error;
+
+ memprintf(&err, "New CA file created '%s'!\n", path);
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, err);
+}
+
+/*
+ * Parsing function of `set ssl ca-file`
+ */
+static int cli_parse_set_cafile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct set_cafile_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ char *err = NULL;
+ int errcode = 0;
+ struct buffer *buf;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3] || !payload)
+ return cli_err(appctx, "'set ssl ca-file expects a filename and CAs as a payload\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't update the CA file!\nOperations on certificates are currently locked!\n");
+
+ if ((buf = alloc_trash_chunk()) == NULL) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ if (!chunk_strcpy(buf, args[3])) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ ctx->old_cafile_entry = NULL;
+ ctx->new_cafile_entry = NULL;
+
+ /* if there is an ongoing transaction */
+ if (cafile_transaction.path) {
+ /* if there is an ongoing transaction, check if this is the same file */
+ if (strcmp(cafile_transaction.path, buf->area) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to set '%s'\n", cafile_transaction.path, buf->area);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ ctx->old_cafile_entry = cafile_transaction.old_cafile_entry;
+ }
+ else {
+ /* lookup for the certificate in the tree */
+ ctx->old_cafile_entry = ssl_store_get_cafile_entry(buf->area, 0);
+ }
+
+ if (!ctx->old_cafile_entry) {
+ memprintf(&err, "%sCan't replace a CA file which is not referenced by the configuration!\n",
+ err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ if (ctx->new_cafile_entry)
+ ssl_store_delete_cafile_entry(ctx->new_cafile_entry);
+
+ /* Create a new cafile_entry without adding it to the cafile tree. */
+ ctx->new_cafile_entry = ssl_store_create_cafile_entry(ctx->old_cafile_entry->path, NULL, CAFILE_CERT);
+ if (!ctx->new_cafile_entry) {
+ memprintf(&err, "%sCannot allocate memory!\n",
+ err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* Fill the new entry with the new CAs. */
+ if (ssl_store_load_ca_from_buf(ctx->new_cafile_entry, payload)) {
+ memprintf(&err, "%sInvalid payload\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* we succeed, we can save the ca in the transaction */
+
+ /* if there wasn't a transaction, update the old CA */
+ if (!cafile_transaction.old_cafile_entry) {
+ cafile_transaction.old_cafile_entry = ctx->old_cafile_entry;
+ cafile_transaction.path = ctx->old_cafile_entry->path;
+ err = memprintf(&err, "transaction created for CA %s!\n", cafile_transaction.path);
+ } else {
+ err = memprintf(&err, "transaction updated for CA %s!\n", cafile_transaction.path);
+ }
+
+ /* free the previous CA if there was a transaction */
+ ssl_store_delete_cafile_entry(cafile_transaction.new_cafile_entry);
+
+ cafile_transaction.new_cafile_entry = ctx->new_cafile_entry;
+
+ /* creates the SNI ctxs later in the IO handler */
+
+end:
+ free_trash_chunk(buf);
+
+ if (errcode & ERR_CODE) {
+ ssl_store_delete_cafile_entry(ctx->new_cafile_entry);
+ ctx->new_cafile_entry = NULL;
+ ctx->old_cafile_entry = NULL;
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, memprintf(&err, "%sCan't update %s!\n", err ? err : "", args[3]));
+ } else {
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+ }
+}
+
+
+/*
+ * Parsing function of 'commit ssl ca-file'.
+ * It uses a commit_cacrlfile_ctx that's also shared with "commit ssl crl-file".
+ */
+static int cli_parse_commit_cafile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct commit_cacrlfile_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'commit ssl ca-file expects a filename\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't commit the CA file!\nOperations on certificates are currently locked!\n");
+
+ if (!cafile_transaction.path) {
+ memprintf(&err, "No ongoing transaction! !\n");
+ goto error;
+ }
+
+ if (strcmp(cafile_transaction.path, args[3]) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to set '%s'\n", cafile_transaction.path, args[3]);
+ goto error;
+ }
+ /* init the appctx structure */
+ ctx->state = CACRL_ST_INIT;
+ ctx->next_ckchi_link = NULL;
+ ctx->old_cafile_entry = cafile_transaction.old_cafile_entry;
+ ctx->new_cafile_entry = cafile_transaction.new_cafile_entry;
+ ctx->cafile_type = CAFILE_CERT;
+
+ return 0;
+
+error:
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ err = memprintf(&err, "%sCan't commit %s!\n", err ? err : "", args[3]);
+
+ return cli_dynerr(appctx, err);
+}
+
+/*
+ * This function tries to create new ckch instances and their SNIs using a newly
+ * set certificate authority (CA file) or a newly set Certificate Revocation
+ * List (CRL), depending on the command being called.
+ */
+static int cli_io_handler_commit_cafile_crlfile(struct appctx *appctx)
+{
+ struct commit_cacrlfile_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ int y = 0;
+ struct cafile_entry *old_cafile_entry = NULL, *new_cafile_entry = NULL;
+ struct ckch_inst_link *ckchi_link;
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ goto end;
+
+ while (1) {
+ switch (ctx->state) {
+ case CACRL_ST_INIT:
+ /* This state just print the update message */
+ switch (ctx->cafile_type) {
+ case CAFILE_CERT:
+ chunk_printf(&trash, "Committing %s", cafile_transaction.path);
+ break;
+ case CAFILE_CRL:
+ chunk_printf(&trash, "Committing %s", crlfile_transaction.path);
+ break;
+ }
+ if (applet_putchk(appctx, &trash) == -1)
+ goto yield;
+
+ ctx->state = CACRL_ST_GEN;
+ /* fallthrough */
+ case CACRL_ST_GEN:
+ /*
+ * This state generates the ckch instances with their
+ * sni_ctxs and SSL_CTX.
+ *
+ * Since the SSL_CTX generation can be CPU consumer, we
+ * yield every 10 instances.
+ */
+ switch (ctx->cafile_type) {
+ case CAFILE_CERT:
+ old_cafile_entry = ctx->old_cafile_entry;
+ new_cafile_entry = ctx->new_cafile_entry;
+ break;
+ case CAFILE_CRL:
+ old_cafile_entry = ctx->old_crlfile_entry;
+ new_cafile_entry = ctx->new_crlfile_entry;
+ break;
+ }
+
+ /* get the next ckchi to regenerate */
+ ckchi_link = ctx->next_ckchi_link;
+
+ /* we didn't start yet, set it to the first elem */
+ if (ckchi_link == NULL) {
+ ckchi_link = LIST_ELEM(old_cafile_entry->ckch_inst_link.n, typeof(ckchi_link), list);
+ /* Add the newly created cafile_entry to the tree so that
+ * any new ckch instance created from now can use it. */
+ if (ssl_store_add_uncommitted_cafile_entry(new_cafile_entry)) {
+ ctx->state = CACRL_ST_ERROR;
+ goto error;
+ }
+ }
+
+ list_for_each_entry_from(ckchi_link, &old_cafile_entry->ckch_inst_link, list) {
+ struct ckch_inst *new_inst;
+
+ /* save the next ckchi to compute */
+ ctx->next_ckchi_link = ckchi_link;
+
+ /* it takes a lot of CPU to creates SSL_CTXs, so we yield every 10 CKCH instances */
+ if (y >= 10) {
+ applet_have_more_data(appctx); /* let's come back later */
+ goto yield;
+ }
+
+ /* display one dot per new instance */
+ if (applet_putstr(appctx, ".") == -1)
+ goto yield;
+
+ /* Rebuild a new ckch instance that uses the same ckch_store
+ * than a reference ckchi instance but will use a new CA file. */
+ ctx->err = NULL;
+ if (ckch_inst_rebuild(ckchi_link->ckch_inst->ckch_store, ckchi_link->ckch_inst, &new_inst, &ctx->err)) {
+ ctx->state = CACRL_ST_ERROR;
+ goto error;
+ }
+
+ y++;
+ }
+
+ ctx->state = CACRL_ST_INSERT;
+ /* fallthrough */
+ case CACRL_ST_INSERT:
+ /* The generation is finished, we can insert everything */
+ switch (ctx->cafile_type) {
+ case CAFILE_CERT:
+ old_cafile_entry = ctx->old_cafile_entry;
+ new_cafile_entry = ctx->new_cafile_entry;
+ break;
+ case CAFILE_CRL:
+ old_cafile_entry = ctx->old_crlfile_entry;
+ new_cafile_entry = ctx->new_crlfile_entry;
+ break;
+ }
+ if (!new_cafile_entry)
+ continue;
+
+ /* insert the new ckch_insts in the crtlist_entry */
+ list_for_each_entry(ckchi_link, &new_cafile_entry->ckch_inst_link, list) {
+ if (ckchi_link->ckch_inst->crtlist_entry)
+ LIST_INSERT(&ckchi_link->ckch_inst->crtlist_entry->ckch_inst,
+ &ckchi_link->ckch_inst->by_crtlist_entry);
+ }
+
+ /* First, we insert every new SNIs in the trees, also replace the default_ctx */
+ list_for_each_entry(ckchi_link, &new_cafile_entry->ckch_inst_link, list) {
+ __ssl_sock_load_new_ckch_instance(ckchi_link->ckch_inst);
+ }
+
+ /* delete the old sni_ctx, the old ckch_insts
+ * and the ckch_store. ckch_inst_free() also
+ * manipulates the list so it's cleaner to loop
+ * until it's empty */
+ while (!LIST_ISEMPTY(&old_cafile_entry->ckch_inst_link)) {
+ ckchi_link = LIST_ELEM(old_cafile_entry->ckch_inst_link.n, typeof(ckchi_link), list);
+
+ LIST_DEL_INIT(&ckchi_link->list); /* must reinit because ckch_inst checks the list */
+ __ckch_inst_free_locked(ckchi_link->ckch_inst);
+ free(ckchi_link);
+ }
+
+ /* Remove the old cafile entry from the tree */
+ ebmb_delete(&old_cafile_entry->node);
+ ssl_store_delete_cafile_entry(old_cafile_entry);
+
+ switch (ctx->cafile_type) {
+ case CAFILE_CERT:
+ ctx->old_cafile_entry = ctx->new_cafile_entry = NULL;
+ break;
+ case CAFILE_CRL:
+ ctx->old_crlfile_entry = ctx->new_crlfile_entry = NULL;
+ break;
+ }
+ ctx->state = CACRL_ST_SUCCESS;
+ /* fallthrough */
+ case CACRL_ST_SUCCESS:
+ if (applet_putstr(appctx, "\nSuccess!\n") == -1)
+ goto yield;
+ ctx->state = CACRL_ST_FIN;
+ /* fallthrough */
+ case CACRL_ST_FIN:
+ /* we achieved the transaction, we can set everything to NULL */
+ switch (ctx->cafile_type) {
+ case CAFILE_CERT:
+ cafile_transaction.old_cafile_entry = NULL;
+ cafile_transaction.new_cafile_entry = NULL;
+ cafile_transaction.path = NULL;
+ break;
+ case CAFILE_CRL:
+ crlfile_transaction.old_crlfile_entry = NULL;
+ crlfile_transaction.new_crlfile_entry = NULL;
+ crlfile_transaction.path = NULL;
+ break;
+ }
+ goto end;
+
+ case CACRL_ST_ERROR:
+ error:
+ chunk_printf(&trash, "\n%sFailed!\n", ctx->err);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto yield;
+ ctx->state = CACRL_ST_FIN;
+ break;
+ }
+ }
+end:
+ /* success: call the release function and don't come back */
+ return 1;
+yield:
+ return 0; /* should come back */
+}
+
+
+/* parsing function of 'abort ssl ca-file' */
+static int cli_parse_abort_cafile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'abort ssl ca-file' expects a filename\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't abort!\nOperations on certificates are currently locked!\n");
+
+ if (!cafile_transaction.path) {
+ memprintf(&err, "No ongoing transaction!\n");
+ goto error;
+ }
+
+ if (strcmp(cafile_transaction.path, args[3]) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to abort a transaction for '%s'\n", cafile_transaction.path, args[3]);
+ goto error;
+ }
+
+ /* Only free the uncommitted cafile_entry here, because the SNI and instances were not generated yet */
+ ssl_store_delete_cafile_entry(cafile_transaction.new_cafile_entry);
+ cafile_transaction.new_cafile_entry = NULL;
+ cafile_transaction.old_cafile_entry = NULL;
+ cafile_transaction.path = NULL;
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ err = memprintf(&err, "Transaction aborted for certificate '%s'!\n", args[3]);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ return cli_dynerr(appctx, err);
+}
+
+/* release function of the `commit ssl ca-file' command, free things and unlock the spinlock.
+ * It uses a commit_cacrlfile_ctx context.
+ */
+static void cli_release_commit_cafile(struct appctx *appctx)
+{
+ struct commit_cacrlfile_ctx *ctx = appctx->svcctx;
+ struct cafile_entry *new_cafile_entry = ctx->new_cafile_entry;
+
+ /* Remove the uncommitted cafile_entry from the tree. */
+ if (new_cafile_entry) {
+ ebmb_delete(&new_cafile_entry->node);
+ ssl_store_delete_cafile_entry(new_cafile_entry);
+ }
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ ha_free(&ctx->err);
+}
+
+
+/* IO handler of details "show ssl ca-file <filename[:index]>".
+ * It uses a show_cafile_ctx context, and the global
+ * cafile_transaction.new_cafile_entry in read-only.
+ */
+static int cli_io_handler_show_cafile_detail(struct appctx *appctx)
+{
+ struct show_cafile_ctx *ctx = appctx->svcctx;
+ struct cafile_entry *cafile_entry = ctx->cur_cafile_entry;
+ struct buffer *out = alloc_trash_chunk();
+ int i = 0;
+ X509 *cert;
+ STACK_OF(X509_OBJECT) *objs;
+ int retval = 0;
+ int ca_index = ctx->ca_index;
+ int show_all = ctx->show_all;
+
+ if (!out)
+ goto end_no_putchk;
+
+ chunk_appendf(out, "Filename: ");
+ if (cafile_entry == cafile_transaction.new_cafile_entry)
+ chunk_appendf(out, "*");
+ chunk_appendf(out, "%s\n", cafile_entry->path);
+
+ chunk_appendf(out, "Status: ");
+ if (!cafile_entry->ca_store)
+ chunk_appendf(out, "Empty\n");
+ else if (LIST_ISEMPTY(&cafile_entry->ckch_inst_link))
+ chunk_appendf(out, "Unused\n");
+ else
+ chunk_appendf(out, "Used\n");
+
+ if (!cafile_entry->ca_store)
+ goto end;
+
+ objs = X509_STORE_get0_objects(cafile_entry->ca_store);
+ for (i = ca_index; i < sk_X509_OBJECT_num(objs); i++) {
+
+ cert = X509_OBJECT_get0_X509(sk_X509_OBJECT_value(objs, i));
+ if (!cert)
+ continue;
+
+ /* file starts at line 1 */
+ chunk_appendf(out, " \nCertificate #%d:\n", i+1);
+ retval = show_cert_detail(cert, NULL, out);
+ if (retval < 0)
+ goto end_no_putchk;
+ else if (retval)
+ goto yield;
+
+ if (applet_putchk(appctx, out) == -1)
+ goto yield;
+
+ if (!show_all) /* only need to dump one certificate */
+ goto end;
+ }
+
+end:
+ free_trash_chunk(out);
+ return 1; /* end, don't come back */
+
+end_no_putchk:
+ free_trash_chunk(out);
+ return 1;
+yield:
+ /* save the current state */
+ ctx->ca_index = i;
+ free_trash_chunk(out);
+ return 0; /* should come back */
+}
+
+
+/* parsing function for 'show ssl ca-file [cafile[:index]]'.
+ * It prepares a show_cafile_ctx context, and checks the global
+ * cafile_transaction under the ckch_lock (read only).
+ */
+static int cli_parse_show_cafile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_cafile_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct cafile_entry *cafile_entry;
+ int ca_index = 0;
+ char *colons;
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return cli_err(appctx, "Can't allocate memory!\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't show!\nOperations on certificates are currently locked!\n");
+
+ ctx->show_all = 1; /* show all certificates */
+ ctx->ca_index = 0;
+ /* check if there is a certificate to lookup */
+ if (*args[3]) {
+
+ /* Look for an optional CA index after the CA file name */
+ colons = strchr(args[3], ':');
+ if (colons) {
+ char *endptr;
+
+ ca_index = strtol(colons + 1, &endptr, 10);
+ /* Indexes start at 1 */
+ if (colons + 1 == endptr || *endptr != '\0' || ca_index <= 0) {
+ memprintf(&err, "wrong CA index after colons in '%s'!", args[3]);
+ goto error;
+ }
+ *colons = '\0';
+ ctx->ca_index = ca_index - 1; /* we start counting at 0 in the ca_store, but at 1 on the CLI */
+ ctx->show_all = 0; /* show only one certificate */
+ }
+
+ if (*args[3] == '*') {
+ if (!cafile_transaction.new_cafile_entry)
+ goto error;
+
+ cafile_entry = cafile_transaction.new_cafile_entry;
+
+ if (strcmp(args[3] + 1, cafile_entry->path) != 0)
+ goto error;
+
+ } else {
+ /* Get the "original" cafile_entry and not the
+ * uncommitted one if it exists. */
+ if ((cafile_entry = ssl_store_get_cafile_entry(args[3], 1)) == NULL || cafile_entry->type != CAFILE_CERT)
+ goto error;
+ }
+
+ ctx->cur_cafile_entry = cafile_entry;
+ /* use the IO handler that shows details */
+ appctx->io_handler = cli_io_handler_show_cafile_detail;
+ }
+
+ return 0;
+
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ if (err)
+ return cli_dynerr(appctx, err);
+ return cli_err(appctx, "Can't display the CA file : Not found!\n");
+}
+
+
+/* release function of the 'show ssl ca-file' command */
+static void cli_release_show_cafile(struct appctx *appctx)
+{
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+}
+
+
+/* This function returns the number of certificates in a cafile_entry. */
+static int get_certificate_count(struct cafile_entry *cafile_entry)
+{
+ int cert_count = 0;
+ STACK_OF(X509_OBJECT) *objs;
+
+ if (cafile_entry && cafile_entry->ca_store) {
+ objs = X509_STORE_get0_objects(cafile_entry->ca_store);
+ if (objs)
+ cert_count = sk_X509_OBJECT_num(objs);
+ }
+ return cert_count;
+}
+
+/* IO handler of "show ssl ca-file". The command taking a specific CA file name
+ * is managed in cli_io_handler_show_cafile_detail.
+ * It uses a show_cafile_ctx and the global cafile_transaction.new_cafile_entry
+ * in read-only.
+ */
+static int cli_io_handler_show_cafile(struct appctx *appctx)
+{
+ struct show_cafile_ctx *ctx = appctx->svcctx;
+ struct buffer *trash = alloc_trash_chunk();
+ struct ebmb_node *node;
+ struct cafile_entry *cafile_entry = NULL;
+
+ if (trash == NULL)
+ return 1;
+
+ if (!ctx->old_cafile_entry && cafile_transaction.old_cafile_entry) {
+ chunk_appendf(trash, "# transaction\n");
+ chunk_appendf(trash, "*%s", cafile_transaction.old_cafile_entry->path);
+ chunk_appendf(trash, " - %d certificate(s)\n", get_certificate_count(cafile_transaction.new_cafile_entry));
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ ctx->old_cafile_entry = cafile_transaction.new_cafile_entry;
+ }
+
+ /* First time in this io_handler. */
+ if (!ctx->cur_cafile_entry) {
+ chunk_appendf(trash, "# filename\n");
+ node = ebmb_first(&cafile_tree);
+ } else {
+ /* We yielded during a previous call. */
+ node = &ctx->cur_cafile_entry->node;
+ }
+
+ while (node) {
+ cafile_entry = ebmb_entry(node, struct cafile_entry, node);
+ if (cafile_entry->type == CAFILE_CERT) {
+ chunk_appendf(trash, "%s", cafile_entry->path);
+
+ chunk_appendf(trash, " - %d certificate(s)\n", get_certificate_count(cafile_entry));
+ }
+
+ node = ebmb_next(node);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ }
+
+ ctx->cur_cafile_entry = NULL;
+ free_trash_chunk(trash);
+ return 1;
+yield:
+
+ free_trash_chunk(trash);
+ ctx->cur_cafile_entry = cafile_entry;
+ return 0; /* should come back */
+}
+
+/* parsing function of 'del ssl ca-file' */
+static int cli_parse_del_cafile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct cafile_entry *cafile_entry;
+ char *err = NULL;
+ char *filename;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'del ssl ca-file' expects a CA file name\n");
+
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't delete the CA file!\nOperations on certificates are currently locked!\n");
+
+ filename = args[3];
+
+ if (cafile_transaction.path && strcmp(cafile_transaction.path, filename) == 0) {
+ memprintf(&err, "ongoing transaction for the CA file '%s'", filename);
+ goto error;
+ }
+
+ cafile_entry = ssl_store_get_cafile_entry(filename, 0);
+ if (!cafile_entry) {
+ memprintf(&err, "CA file '%s' doesn't exist!\n", filename);
+ goto error;
+ }
+
+ if (!LIST_ISEMPTY(&cafile_entry->ckch_inst_link)) {
+ memprintf(&err, "CA file '%s' in use, can't be deleted!\n", filename);
+ goto error;
+ }
+
+ /* Remove the cafile_entry from the tree */
+ ebmb_delete(&cafile_entry->node);
+ ssl_store_delete_cafile_entry(cafile_entry);
+
+ memprintf(&err, "CA file '%s' deleted!\n", filename);
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+
+error:
+ memprintf(&err, "Can't remove the CA file: %s\n", err ? err : "");
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, err);
+}
+
+/* parsing function of 'new ssl crl-file' */
+static int cli_parse_new_crlfile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct cafile_entry *cafile_entry;
+ char *err = NULL;
+ char *path;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'new ssl crl-file' expects a filename\n");
+
+ path = args[3];
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't create a CRL file!\nOperations on certificates are currently locked!\n");
+
+ cafile_entry = ssl_store_get_cafile_entry(path, 0);
+ if (cafile_entry) {
+ memprintf(&err, "CRL file '%s' already exists!\n", path);
+ goto error;
+ }
+
+ cafile_entry = ssl_store_create_cafile_entry(path, NULL, CAFILE_CRL);
+ if (!cafile_entry) {
+ memprintf(&err, "%sCannot allocate memory!\n", err ? err : "");
+ goto error;
+ }
+
+ /* Add the newly created cafile_entry to the tree so that
+ * any new ckch instance created from now can use it. */
+ if (ssl_store_add_uncommitted_cafile_entry(cafile_entry))
+ goto error;
+
+ memprintf(&err, "New CRL file created '%s'!\n", path);
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, err);
+}
+
+/* Parsing function of `set ssl crl-file` */
+static int cli_parse_set_crlfile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct set_crlfile_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ char *err = NULL;
+ int errcode = 0;
+ struct buffer *buf;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3] || !payload)
+ return cli_err(appctx, "'set ssl crl-file expects a filename and CRLs as a payload\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't update the CRL file!\nOperations on certificates are currently locked!\n");
+
+ if ((buf = alloc_trash_chunk()) == NULL) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ if (!chunk_strcpy(buf, args[3])) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ ctx->old_crlfile_entry = NULL;
+ ctx->new_crlfile_entry = NULL;
+
+ /* if there is an ongoing transaction */
+ if (crlfile_transaction.path) {
+ /* if there is an ongoing transaction, check if this is the same file */
+ if (strcmp(crlfile_transaction.path, buf->area) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to set '%s'\n", crlfile_transaction.path, buf->area);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ ctx->old_crlfile_entry = crlfile_transaction.old_crlfile_entry;
+ }
+ else {
+ /* lookup for the certificate in the tree */
+ ctx->old_crlfile_entry = ssl_store_get_cafile_entry(buf->area, 0);
+ }
+
+ if (!ctx->old_crlfile_entry) {
+ memprintf(&err, "%sCan't replace a CRL file which is not referenced by the configuration!\n",
+ err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ if (ctx->new_crlfile_entry)
+ ssl_store_delete_cafile_entry(ctx->new_crlfile_entry);
+
+ /* Create a new cafile_entry without adding it to the cafile tree. */
+ ctx->new_crlfile_entry = ssl_store_create_cafile_entry(ctx->old_crlfile_entry->path, NULL, CAFILE_CRL);
+ if (!ctx->new_crlfile_entry) {
+ memprintf(&err, "%sCannot allocate memory!\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* Fill the new entry with the new CRL. */
+ if (ssl_store_load_ca_from_buf(ctx->new_crlfile_entry, payload)) {
+ memprintf(&err, "%sInvalid payload\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* we succeed, we can save the crl in the transaction */
+
+ /* if there wasn't a transaction, update the old CRL */
+ if (!crlfile_transaction.old_crlfile_entry) {
+ crlfile_transaction.old_crlfile_entry = ctx->old_crlfile_entry;
+ crlfile_transaction.path = ctx->old_crlfile_entry->path;
+ err = memprintf(&err, "transaction created for CRL %s!\n", crlfile_transaction.path);
+ } else {
+ err = memprintf(&err, "transaction updated for CRL %s!\n", crlfile_transaction.path);
+ }
+
+ /* free the previous CRL file if there was a transaction */
+ ssl_store_delete_cafile_entry(crlfile_transaction.new_crlfile_entry);
+
+ crlfile_transaction.new_crlfile_entry = ctx->new_crlfile_entry;
+
+ /* creates the SNI ctxs later in the IO handler */
+
+end:
+ free_trash_chunk(buf);
+
+ if (errcode & ERR_CODE) {
+ ssl_store_delete_cafile_entry(ctx->new_crlfile_entry);
+ ctx->new_crlfile_entry = NULL;
+ ctx->old_crlfile_entry = NULL;
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, memprintf(&err, "%sCan't update %s!\n", err ? err : "", args[3]));
+ } else {
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+ }
+}
+
+/* Parsing function of 'commit ssl crl-file'.
+ * It uses a commit_cacrlfile_ctx that's also shared with "commit ssl ca-file".
+ */
+static int cli_parse_commit_crlfile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct commit_cacrlfile_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'commit ssl ca-file expects a filename\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't commit the CRL file!\nOperations on certificates are currently locked!\n");
+
+ if (!crlfile_transaction.path) {
+ memprintf(&err, "No ongoing transaction! !\n");
+ goto error;
+ }
+
+ if (strcmp(crlfile_transaction.path, args[3]) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to set '%s'\n", crlfile_transaction.path, args[3]);
+ goto error;
+ }
+ /* init the appctx structure */
+ ctx->state = CACRL_ST_INIT;
+ ctx->next_ckchi_link = NULL;
+ ctx->old_crlfile_entry = crlfile_transaction.old_crlfile_entry;
+ ctx->new_crlfile_entry = crlfile_transaction.new_crlfile_entry;
+ ctx->cafile_type = CAFILE_CRL;
+
+ return 0;
+
+error:
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ err = memprintf(&err, "%sCan't commit %s!\n", err ? err : "", args[3]);
+
+ return cli_dynerr(appctx, err);
+}
+
+
+/* release function of the `commit ssl crl-file' command, free things and unlock the spinlock.
+ * it uses a commit_cacrlfile_ctx that's the same as for "commit ssl ca-file".
+ */
+static void cli_release_commit_crlfile(struct appctx *appctx)
+{
+ struct commit_cacrlfile_ctx *ctx = appctx->svcctx;
+ struct cafile_entry *new_crlfile_entry = ctx->new_crlfile_entry;
+
+ /* Remove the uncommitted cafile_entry from the tree. */
+ if (new_crlfile_entry) {
+ ebmb_delete(&new_crlfile_entry->node);
+ ssl_store_delete_cafile_entry(new_crlfile_entry);
+ }
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ ha_free(&ctx->err);
+}
+
+/* parsing function of 'del ssl crl-file' */
+static int cli_parse_del_crlfile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct cafile_entry *cafile_entry;
+ char *err = NULL;
+ char *filename;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'del ssl crl-file' expects a CRL file name\n");
+
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't delete the CRL file!\nOperations on certificates are currently locked!\n");
+
+ filename = args[3];
+
+ if (crlfile_transaction.path && strcmp(crlfile_transaction.path, filename) == 0) {
+ memprintf(&err, "ongoing transaction for the CRL file '%s'", filename);
+ goto error;
+ }
+
+ cafile_entry = ssl_store_get_cafile_entry(filename, 0);
+ if (!cafile_entry) {
+ memprintf(&err, "CRL file '%s' doesn't exist!\n", filename);
+ goto error;
+ }
+ if (cafile_entry->type != CAFILE_CRL) {
+ memprintf(&err, "'del ssl crl-file' does not work on CA files!\n");
+ goto error;
+ }
+
+ if (!LIST_ISEMPTY(&cafile_entry->ckch_inst_link)) {
+ memprintf(&err, "CRL file '%s' in use, can't be deleted!\n", filename);
+ goto error;
+ }
+
+ /* Remove the cafile_entry from the tree */
+ ebmb_delete(&cafile_entry->node);
+ ssl_store_delete_cafile_entry(cafile_entry);
+
+ memprintf(&err, "CRL file '%s' deleted!\n", filename);
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+
+error:
+ memprintf(&err, "Can't remove the CRL file: %s\n", err ? err : "");
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, err);
+}
+
+/* parsing function of 'abort ssl crl-file' */
+static int cli_parse_abort_crlfile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'abort ssl crl-file' expects a filename\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't abort!\nOperations on certificates are currently locked!\n");
+
+ if (!crlfile_transaction.path) {
+ memprintf(&err, "No ongoing transaction!\n");
+ goto error;
+ }
+
+ if (strcmp(crlfile_transaction.path, args[3]) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to abort a transaction for '%s'\n", crlfile_transaction.path, args[3]);
+ goto error;
+ }
+
+ /* Only free the uncommitted cafile_entry here, because the SNI and instances were not generated yet */
+ ssl_store_delete_cafile_entry(crlfile_transaction.new_crlfile_entry);
+ crlfile_transaction.new_crlfile_entry = NULL;
+ crlfile_transaction.old_crlfile_entry = NULL;
+ crlfile_transaction.path = NULL;
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ err = memprintf(&err, "Transaction aborted for certificate '%s'!\n", args[3]);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ return cli_dynerr(appctx, err);
+}
+
+
+/*
+ * Display a Certificate Resignation List's information.
+ * The information displayed is inspired by the output of 'openssl crl -in
+ * crl.pem -text'.
+ * Returns 0 in case of success.
+ */
+static int show_crl_detail(X509_CRL *crl, struct buffer *out)
+{
+ BIO *bio = NULL;
+ struct buffer *tmp = alloc_trash_chunk();
+ long version;
+ X509_NAME *issuer;
+ int write = -1;
+ STACK_OF(X509_REVOKED) *rev = NULL;
+ X509_REVOKED *rev_entry = NULL;
+ int i;
+
+ if (!tmp)
+ return -1;
+
+ if ((bio = BIO_new(BIO_s_mem())) == NULL)
+ goto end;
+
+ /* Version (as displayed by 'openssl crl') */
+ version = X509_CRL_get_version(crl);
+ chunk_appendf(out, "Version %ld\n", version + 1);
+
+ /* Signature Algorithm */
+ chunk_appendf(out, "Signature Algorithm: %s\n", OBJ_nid2ln(X509_CRL_get_signature_nid(crl)));
+
+ /* Issuer */
+ chunk_appendf(out, "Issuer: ");
+ if ((issuer = X509_CRL_get_issuer(crl)) == NULL)
+ goto end;
+ if ((ssl_sock_get_dn_oneline(issuer, tmp)) == -1)
+ goto end;
+ *(tmp->area + tmp->data) = '\0';
+ chunk_appendf(out, "%s\n", tmp->area);
+
+ /* Last Update */
+ chunk_appendf(out, "Last Update: ");
+ chunk_reset(tmp);
+ if (BIO_reset(bio) == -1)
+ goto end;
+ if (ASN1_TIME_print(bio, X509_CRL_get0_lastUpdate(crl)) == 0)
+ goto end;
+ write = BIO_read(bio, tmp->area, tmp->size-1);
+ tmp->area[write] = '\0';
+ chunk_appendf(out, "%s\n", tmp->area);
+
+
+ /* Next Update */
+ chunk_appendf(out, "Next Update: ");
+ chunk_reset(tmp);
+ if (BIO_reset(bio) == -1)
+ goto end;
+ if (ASN1_TIME_print(bio, X509_CRL_get0_nextUpdate(crl)) == 0)
+ goto end;
+ write = BIO_read(bio, tmp->area, tmp->size-1);
+ tmp->area[write] = '\0';
+ chunk_appendf(out, "%s\n", tmp->area);
+
+
+ /* Revoked Certificates */
+ rev = X509_CRL_get_REVOKED(crl);
+ if (sk_X509_REVOKED_num(rev) > 0)
+ chunk_appendf(out, "Revoked Certificates:\n");
+ else
+ chunk_appendf(out, "No Revoked Certificates.\n");
+
+ for (i = 0; i < sk_X509_REVOKED_num(rev); i++) {
+ rev_entry = sk_X509_REVOKED_value(rev, i);
+
+ /* Serial Number and Revocation Date */
+ if (BIO_reset(bio) == -1)
+ goto end;
+ BIO_printf(bio , " Serial Number: ");
+ i2a_ASN1_INTEGER(bio, (ASN1_INTEGER*)X509_REVOKED_get0_serialNumber(rev_entry));
+ BIO_printf(bio, "\n Revocation Date: ");
+ if (ASN1_TIME_print(bio, X509_REVOKED_get0_revocationDate(rev_entry)) == 0)
+ goto end;
+ BIO_printf(bio, "\n");
+
+ write = BIO_read(bio, tmp->area, tmp->size-1);
+ tmp->area[write] = '\0';
+ chunk_appendf(out, "%s", tmp->area);
+ }
+
+end:
+ free_trash_chunk(tmp);
+ if (bio)
+ BIO_free(bio);
+
+ return 0;
+}
+
+/* IO handler of details "show ssl crl-file <filename[:index]>".
+ * It uses show_crlfile_ctx and the global
+ * crlfile_transaction.new_cafile_entry in read-only.
+ */
+static int cli_io_handler_show_crlfile_detail(struct appctx *appctx)
+{
+ struct show_crlfile_ctx *ctx = appctx->svcctx;
+ struct cafile_entry *cafile_entry = ctx->cafile_entry;
+ struct buffer *out = alloc_trash_chunk();
+ int i;
+ X509_CRL *crl;
+ STACK_OF(X509_OBJECT) *objs;
+ int retval = 0;
+ int index = ctx->index;
+
+ if (!out)
+ goto end_no_putchk;
+
+ chunk_appendf(out, "Filename: ");
+ if (cafile_entry == crlfile_transaction.new_crlfile_entry)
+ chunk_appendf(out, "*");
+ chunk_appendf(out, "%s\n", cafile_entry->path);
+
+ chunk_appendf(out, "Status: ");
+ if (!cafile_entry->ca_store)
+ chunk_appendf(out, "Empty\n");
+ else if (LIST_ISEMPTY(&cafile_entry->ckch_inst_link))
+ chunk_appendf(out, "Unused\n");
+ else
+ chunk_appendf(out, "Used\n");
+
+ if (!cafile_entry->ca_store)
+ goto end;
+
+ objs = X509_STORE_get0_objects(cafile_entry->ca_store);
+ for (i = 0; i < sk_X509_OBJECT_num(objs); i++) {
+ crl = X509_OBJECT_get0_X509_CRL(sk_X509_OBJECT_value(objs, i));
+ if (!crl)
+ continue;
+
+ /* CRL indexes start at 1 on the CLI output. */
+ if (index && index-1 != i)
+ continue;
+
+ chunk_appendf(out, " \nCertificate Revocation List #%d:\n", i+1);
+ retval = show_crl_detail(crl, out);
+ if (retval < 0)
+ goto end_no_putchk;
+ else if (retval || index)
+ goto end;
+ }
+
+end:
+ if (applet_putchk(appctx, out) == -1)
+ goto yield;
+
+end_no_putchk:
+ free_trash_chunk(out);
+ return 1;
+yield:
+ free_trash_chunk(out);
+ return 0; /* should come back */
+}
+
+/* parsing function for 'show ssl crl-file [crlfile[:index]]'.
+ * It sets the context to a show_crlfile_ctx, and the global
+ * cafile_transaction.new_crlfile_entry under the ckch_lock.
+ */
+static int cli_parse_show_crlfile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_crlfile_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct cafile_entry *cafile_entry;
+ long index = 0;
+ char *colons;
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return cli_err(appctx, "Can't allocate memory!\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't show!\nOperations on certificates are currently locked!\n");
+
+ /* check if there is a certificate to lookup */
+ if (*args[3]) {
+
+ /* Look for an optional index after the CRL file name */
+ colons = strchr(args[3], ':');
+ if (colons) {
+ char *endptr;
+
+ index = strtol(colons + 1, &endptr, 10);
+ /* Indexes start at 1 */
+ if (colons + 1 == endptr || *endptr != '\0' || index <= 0) {
+ memprintf(&err, "wrong CRL index after colons in '%s'!", args[3]);
+ goto error;
+ }
+ *colons = '\0';
+ }
+
+ if (*args[3] == '*') {
+ if (!crlfile_transaction.new_crlfile_entry)
+ goto error;
+
+ cafile_entry = crlfile_transaction.new_crlfile_entry;
+
+ if (strcmp(args[3] + 1, cafile_entry->path) != 0)
+ goto error;
+
+ } else {
+ /* Get the "original" cafile_entry and not the
+ * uncommitted one if it exists. */
+ if ((cafile_entry = ssl_store_get_cafile_entry(args[3], 1)) == NULL || cafile_entry->type != CAFILE_CRL)
+ goto error;
+ }
+
+ ctx->cafile_entry = cafile_entry;
+ ctx->index = index;
+ /* use the IO handler that shows details */
+ appctx->io_handler = cli_io_handler_show_crlfile_detail;
+ }
+
+ return 0;
+
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ if (err)
+ return cli_dynerr(appctx, err);
+ return cli_err(appctx, "Can't display the CRL file : Not found!\n");
+}
+
+/* IO handler of "show ssl crl-file". The command taking a specific CRL file name
+ * is managed in cli_io_handler_show_crlfile_detail. */
+static int cli_io_handler_show_crlfile(struct appctx *appctx)
+{
+ struct show_crlfile_ctx *ctx = appctx->svcctx;
+ struct buffer *trash = alloc_trash_chunk();
+ struct ebmb_node *node;
+ struct cafile_entry *cafile_entry = NULL;
+
+ if (trash == NULL)
+ return 1;
+
+ if (!ctx->old_crlfile_entry && crlfile_transaction.old_crlfile_entry) {
+ chunk_appendf(trash, "# transaction\n");
+ chunk_appendf(trash, "*%s\n", crlfile_transaction.old_crlfile_entry->path);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ ctx->old_crlfile_entry = crlfile_transaction.old_crlfile_entry;
+ }
+
+ /* First time in this io_handler. */
+ if (!ctx->cafile_entry) {
+ chunk_appendf(trash, "# filename\n");
+ node = ebmb_first(&cafile_tree);
+ } else {
+ /* We yielded during a previous call. */
+ node = &ctx->cafile_entry->node;
+ }
+
+ while (node) {
+ cafile_entry = ebmb_entry(node, struct cafile_entry, node);
+ if (cafile_entry->type == CAFILE_CRL) {
+ chunk_appendf(trash, "%s\n", cafile_entry->path);
+ }
+
+ node = ebmb_next(node);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ }
+
+ ctx->cafile_entry = NULL;
+ free_trash_chunk(trash);
+ return 1;
+yield:
+
+ free_trash_chunk(trash);
+ ctx->cafile_entry = cafile_entry;
+ return 0; /* should come back */
+}
+
+
+/* release function of the 'show ssl crl-file' command */
+static void cli_release_show_crlfile(struct appctx *appctx)
+{
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+}
+
+
+void ckch_deinit()
+{
+ struct eb_node *node, *next;
+ struct ckch_store *store;
+ struct ebmb_node *canode;
+
+ /* deinit the ckch stores */
+ node = eb_first(&ckchs_tree);
+ while (node) {
+ next = eb_next(node);
+ store = ebmb_entry(node, struct ckch_store, node);
+ ckch_store_free(store);
+ node = next;
+ }
+
+ /* deinit the ca-file store */
+ canode = ebmb_first(&cafile_tree);
+ while (canode) {
+ struct cafile_entry *entry = NULL;
+
+ entry = ebmb_entry(canode, struct cafile_entry, node);
+ canode = ebmb_next(canode);
+ ebmb_delete(&entry->node);
+ ssl_store_delete_cafile_entry(entry);
+ }
+}
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "new", "ssl", "cert", NULL }, "new ssl cert <certfile> : create a new certificate file to be used in a crt-list or a directory", cli_parse_new_cert, NULL, NULL },
+ { { "set", "ssl", "cert", NULL }, "set ssl cert <certfile> <payload> : replace a certificate file", cli_parse_set_cert, NULL, NULL },
+ { { "commit", "ssl", "cert", NULL }, "commit ssl cert <certfile> : commit a certificate file", cli_parse_commit_cert, cli_io_handler_commit_cert, cli_release_commit_cert },
+ { { "abort", "ssl", "cert", NULL }, "abort ssl cert <certfile> : abort a transaction for a certificate file", cli_parse_abort_cert, NULL, NULL },
+ { { "del", "ssl", "cert", NULL }, "del ssl cert <certfile> : delete an unused certificate file", cli_parse_del_cert, NULL, NULL },
+ { { "show", "ssl", "cert", NULL }, "show ssl cert [<certfile>] : display the SSL certificates used in memory, or the details of a file", cli_parse_show_cert, cli_io_handler_show_cert, cli_release_show_cert },
+
+ { { "new", "ssl", "ca-file", NULL }, "new ssl ca-file <cafile> : create a new CA file to be used in a crt-list", cli_parse_new_cafile, NULL, NULL },
+ { { "set", "ssl", "ca-file", NULL }, "set ssl ca-file <cafile> <payload> : replace a CA file", cli_parse_set_cafile, NULL, NULL },
+ { { "commit", "ssl", "ca-file", NULL }, "commit ssl ca-file <cafile> : commit a CA file", cli_parse_commit_cafile, cli_io_handler_commit_cafile_crlfile, cli_release_commit_cafile },
+ { { "abort", "ssl", "ca-file", NULL }, "abort ssl ca-file <cafile> : abort a transaction for a CA file", cli_parse_abort_cafile, NULL, NULL },
+ { { "del", "ssl", "ca-file", NULL }, "del ssl ca-file <cafile> : delete an unused CA file", cli_parse_del_cafile, NULL, NULL },
+ { { "show", "ssl", "ca-file", NULL }, "show ssl ca-file [<cafile>[:<index>]] : display the SSL CA files used in memory, or the details of a <cafile>, or a single certificate of index <index> of a CA file <cafile>", cli_parse_show_cafile, cli_io_handler_show_cafile, cli_release_show_cafile },
+
+ { { "new", "ssl", "crl-file", NULL }, "new ssl crlfile <crlfile> : create a new CRL file to be used in a crt-list", cli_parse_new_crlfile, NULL, NULL },
+ { { "set", "ssl", "crl-file", NULL }, "set ssl crl-file <crlfile> <payload> : replace a CRL file", cli_parse_set_crlfile, NULL, NULL },
+ { { "commit", "ssl", "crl-file", NULL },"commit ssl crl-file <crlfile> : commit a CRL file", cli_parse_commit_crlfile, cli_io_handler_commit_cafile_crlfile, cli_release_commit_crlfile },
+ { { "abort", "ssl", "crl-file", NULL }, "abort ssl crl-file <crlfile> : abort a transaction for a CRL file", cli_parse_abort_crlfile, NULL, NULL },
+ { { "del", "ssl", "crl-file", NULL }, "del ssl crl-file <crlfile> : delete an unused CRL file", cli_parse_del_crlfile, NULL, NULL },
+ { { "show", "ssl", "crl-file", NULL }, "show ssl crl-file [<crlfile[:<index>>]] : display the SSL CRL files used in memory, or the details of a <crlfile>, or a single CRL of index <index> of CRL file <crlfile>", cli_parse_show_crlfile, cli_io_handler_show_crlfile, cli_release_show_crlfile },
+ { { NULL }, NULL, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
diff --git a/src/ssl_crtlist.c b/src/ssl_crtlist.c
new file mode 100644
index 0000000..ab6b262
--- /dev/null
+++ b/src/ssl_crtlist.c
@@ -0,0 +1,1510 @@
+/*
+ *
+ * Copyright (C) 2020 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <dirent.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+
+#include <import/ebpttree.h>
+#include <import/ebsttree.h>
+
+#include <haproxy/applet.h>
+#include <haproxy/channel.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/ssl_ckch.h>
+#include <haproxy/ssl_crtlist.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+
+/* CLI context for "show ssl crt-list" or "dump ssl crt-list" */
+struct show_crtlist_ctx {
+ struct ebmb_node *crtlist_node; /* ebmb_node for the current crtlist */
+ struct crtlist_entry *entry; /* current entry */
+ int mode; /* 'd' for dump, 's' for show */
+};
+
+/* CLI context for "add ssl crt-list" */
+struct add_crtlist_ctx {
+ struct crtlist *crtlist;
+ struct crtlist_entry *entry;
+ struct bind_conf_list *bind_conf_node;
+ char *err;
+ enum {
+ ADDCRT_ST_INIT = 0,
+ ADDCRT_ST_GEN,
+ ADDCRT_ST_INSERT,
+ ADDCRT_ST_SUCCESS,
+ ADDCRT_ST_ERROR,
+ ADDCRT_ST_FIN,
+ } state;
+};
+
+/* release ssl bind conf */
+void ssl_sock_free_ssl_conf(struct ssl_bind_conf *conf)
+{
+ if (conf) {
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ ha_free(&conf->npn_str);
+#endif
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ ha_free(&conf->alpn_str);
+#endif
+ ha_free(&conf->ca_file);
+ ha_free(&conf->ca_verify_file);
+ ha_free(&conf->crl_file);
+ ha_free(&conf->ciphers);
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ ha_free(&conf->ciphersuites);
+#endif
+ ha_free(&conf->curves);
+ ha_free(&conf->ecdhe);
+ }
+}
+
+/*
+ * Allocate and copy a ssl_bind_conf structure
+ */
+struct ssl_bind_conf *crtlist_dup_ssl_conf(struct ssl_bind_conf *src)
+{
+ struct ssl_bind_conf *dst;
+
+ if (!src)
+ return NULL;
+
+ dst = calloc(1, sizeof(*dst));
+ if (!dst)
+ return NULL;
+
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ if (src->npn_str) {
+ dst->npn_str = strdup(src->npn_str);
+ if (!dst->npn_str)
+ goto error;
+ }
+#endif
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ if (src->alpn_str) {
+ dst->alpn_str = strdup(src->alpn_str);
+ if (!dst->alpn_str)
+ goto error;
+ }
+#endif
+ if (src->ca_file) {
+ dst->ca_file = strdup(src->ca_file);
+ if (!dst->ca_file)
+ goto error;
+ }
+ if (src->ca_verify_file) {
+ dst->ca_verify_file = strdup(src->ca_verify_file);
+ if (!dst->ca_verify_file)
+ goto error;
+ }
+ if (src->crl_file) {
+ dst->crl_file = strdup(src->crl_file);
+ if (!dst->crl_file)
+ goto error;
+ }
+ if (src->ciphers) {
+ dst->ciphers = strdup(src->ciphers);
+ if (!dst->ciphers)
+ goto error;
+ }
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ if (src->ciphersuites) {
+ dst->ciphersuites = strdup(src->ciphersuites);
+ if (!dst->ciphersuites)
+ goto error;
+ }
+#endif
+ if (src->curves) {
+ dst->curves = strdup(src->curves);
+ if (!dst->curves)
+ goto error;
+ }
+ if (src->ecdhe) {
+ dst->ecdhe = strdup(src->ecdhe);
+ if (!dst->ecdhe)
+ goto error;
+ }
+ return dst;
+
+error:
+ ssl_sock_free_ssl_conf(dst);
+ free(dst);
+
+ return NULL;
+}
+
+/* free sni filters */
+void crtlist_free_filters(char **args)
+{
+ int i;
+
+ if (!args)
+ return;
+
+ for (i = 0; args[i]; i++)
+ free(args[i]);
+
+ free(args);
+}
+
+/* Alloc and duplicate a char ** array */
+char **crtlist_dup_filters(char **args, int fcount)
+{
+ char **dst;
+ int i;
+
+ if (fcount == 0)
+ return NULL;
+
+ dst = calloc(fcount + 1, sizeof(*dst));
+ if (!dst)
+ return NULL;
+
+ for (i = 0; i < fcount; i++) {
+ dst[i] = strdup(args[i]);
+ if (!dst[i])
+ goto error;
+ }
+ return dst;
+
+error:
+ crtlist_free_filters(dst);
+ return NULL;
+}
+
+/*
+ * Detach and free a crtlist_entry.
+ * Free the filters, the ssl_conf and call ckch_inst_free() for each ckch_inst
+ */
+void crtlist_entry_free(struct crtlist_entry *entry)
+{
+ struct ckch_inst *inst, *inst_s;
+
+ if (entry == NULL)
+ return;
+
+ ebpt_delete(&entry->node);
+ LIST_DELETE(&entry->by_crtlist);
+ LIST_DELETE(&entry->by_ckch_store);
+ crtlist_free_filters(entry->filters);
+ ssl_sock_free_ssl_conf(entry->ssl_conf);
+ free(entry->ssl_conf);
+ list_for_each_entry_safe(inst, inst_s, &entry->ckch_inst, by_crtlist_entry) {
+ ckch_inst_free(inst);
+ }
+ free(entry);
+}
+/*
+ * Duplicate a crt_list entry and its content (ssl_conf, filters/fcount)
+ * Return a pointer to the new entry
+ */
+struct crtlist_entry *crtlist_entry_dup(struct crtlist_entry *src)
+{
+ struct crtlist_entry *entry;
+
+ if (src == NULL)
+ return NULL;
+
+ entry = crtlist_entry_new();
+ if (entry == NULL)
+ return NULL;
+
+ if (src->filters) {
+ entry->filters = crtlist_dup_filters(src->filters, src->fcount);
+ if (!entry->filters)
+ goto error;
+ }
+ entry->fcount = src->fcount;
+ if (src->ssl_conf) {
+ entry->ssl_conf = crtlist_dup_ssl_conf(src->ssl_conf);
+ if (!entry->ssl_conf)
+ goto error;
+ }
+ entry->crtlist = src->crtlist;
+
+ return entry;
+
+error:
+
+ crtlist_free_filters(entry->filters);
+ ssl_sock_free_ssl_conf(entry->ssl_conf);
+ free(entry->ssl_conf);
+ free(entry);
+
+ return NULL;
+}
+
+/*
+ * Allocate and initialize a crtlist_entry
+ */
+struct crtlist_entry *crtlist_entry_new()
+{
+ struct crtlist_entry *entry;
+
+ entry = calloc(1, sizeof(*entry));
+ if (entry == NULL)
+ return NULL;
+
+ LIST_INIT(&entry->ckch_inst);
+
+ /* initialize the nodes so we can LIST_DELETE in any cases */
+ LIST_INIT(&entry->by_crtlist);
+ LIST_INIT(&entry->by_ckch_store);
+
+ return entry;
+}
+
+/* Free a crtlist, from the crt_entry to the content of the ssl_conf */
+void crtlist_free(struct crtlist *crtlist)
+{
+ struct crtlist_entry *entry, *s_entry;
+ struct bind_conf_list *bind_conf_node;
+
+ if (crtlist == NULL)
+ return;
+
+ bind_conf_node = crtlist->bind_conf;
+ while (bind_conf_node) {
+ struct bind_conf_list *next = bind_conf_node->next;
+ free(bind_conf_node);
+ bind_conf_node = next;
+ }
+
+ list_for_each_entry_safe(entry, s_entry, &crtlist->ord_entries, by_crtlist) {
+ crtlist_entry_free(entry);
+ }
+ ebmb_delete(&crtlist->node);
+ free(crtlist);
+}
+
+/* Alloc and initialize a struct crtlist
+ * <filename> is the key of the ebmb_node
+ * <unique> initialize the list of entries to be unique (1) or not (0)
+ */
+struct crtlist *crtlist_new(const char *filename, int unique)
+{
+ struct crtlist *newlist;
+
+ newlist = calloc(1, sizeof(*newlist) + strlen(filename) + 1);
+ if (newlist == NULL)
+ return NULL;
+
+ memcpy(newlist->node.key, filename, strlen(filename) + 1);
+ if (unique)
+ newlist->entries = EB_ROOT_UNIQUE;
+ else
+ newlist->entries = EB_ROOT;
+
+ LIST_INIT(&newlist->ord_entries);
+
+ return newlist;
+}
+
+/*
+ * Read a single crt-list line. /!\ alter the <line> string.
+ * Fill <crt_path> and <crtlist_entry>
+ * <crtlist_entry> must be alloc and free by the caller
+ * <crtlist_entry->ssl_conf> is alloc by the function
+ * <crtlist_entry->filters> is alloc by the function
+ * <crt_path> is a ptr in <line>
+ * Return an error code
+ */
+int crtlist_parse_line(char *line, char **crt_path, struct crtlist_entry *entry, const char *file, int linenum, int from_cli, char **err)
+{
+ int cfgerr = 0;
+ int arg, newarg, cur_arg, i, ssl_b = 0, ssl_e = 0;
+ char *end;
+ char *args[MAX_CRT_ARGS + 1];
+ struct ssl_bind_conf *ssl_conf = NULL;
+
+ if (!line || !crt_path || !entry)
+ return ERR_ALERT | ERR_FATAL;
+
+ end = line + strlen(line);
+ if (end-line >= CRT_LINESIZE-1 && *(end-1) != '\n') {
+ /* Check if we reached the limit and the last char is not \n.
+ * Watch out for the last line without the terminating '\n'!
+ */
+ memprintf(err, "parsing [%s:%d]: line too long, limit is %d characters",
+ file, linenum, CRT_LINESIZE-1);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ arg = 0;
+ newarg = 1;
+ while (*line) {
+ if (isspace((unsigned char)*line)) {
+ newarg = 1;
+ *line = 0;
+ } else if (*line == '[') {
+ if (ssl_b) {
+ memprintf(err, "parsing [%s:%d]: too many '['", file, linenum);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ if (!arg) {
+ memprintf(err, "parsing [%s:%d]: file must start with a cert", file, linenum);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ ssl_b = arg;
+ newarg = 1;
+ *line = 0;
+ } else if (*line == ']') {
+ if (ssl_e) {
+ memprintf(err, "parsing [%s:%d]: too many ']'", file, linenum);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ if (!ssl_b) {
+ memprintf(err, "parsing [%s:%d]: missing '['", file, linenum);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ ssl_e = arg;
+ newarg = 1;
+ *line = 0;
+ } else if (newarg) {
+ if (arg == MAX_CRT_ARGS) {
+ memprintf(err, "parsing [%s:%d]: too many args ", file, linenum);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ newarg = 0;
+ args[arg++] = line;
+ }
+ line++;
+ }
+ args[arg++] = line;
+
+ /* empty line */
+ if (!*args[0]) {
+ cfgerr |= ERR_NONE;
+ goto error;
+ }
+
+ *crt_path = args[0];
+
+ if (ssl_b) {
+ if (ssl_b > 1) {
+ memprintf(err, "parsing [%s:%d]: malformated line, filters can't be between filename and options!", file, linenum);
+ cfgerr |= ERR_WARN;
+ }
+
+ ssl_conf = calloc(1, sizeof *ssl_conf);
+ if (!ssl_conf) {
+ memprintf(err, "not enough memory!");
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ }
+
+ cur_arg = ssl_b ? ssl_b : 1;
+ while (cur_arg < ssl_e) {
+ newarg = 0;
+ for (i = 0; ssl_crtlist_kws[i].kw != NULL; i++) {
+ if (strcmp(ssl_crtlist_kws[i].kw, args[cur_arg]) == 0) {
+ newarg = 1;
+ cfgerr |= ssl_crtlist_kws[i].parse(args, cur_arg, NULL, ssl_conf, from_cli, err);
+ if (cur_arg + 1 + ssl_crtlist_kws[i].skip > ssl_e) {
+ memprintf(err, "parsing [%s:%d]: ssl args out of '[]' for %s",
+ file, linenum, args[cur_arg]);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ cur_arg += 1 + ssl_crtlist_kws[i].skip;
+ break;
+ }
+ }
+ if (!cfgerr && !newarg) {
+ memprintf(err, "parsing [%s:%d]: unknown ssl keyword %s",
+ file, linenum, args[cur_arg]);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ }
+ entry->linenum = linenum;
+ entry->ssl_conf = ssl_conf;
+ entry->filters = crtlist_dup_filters(&args[cur_arg], arg - cur_arg - 1);
+ entry->fcount = arg - cur_arg - 1;
+
+ return cfgerr;
+
+error:
+ crtlist_free_filters(entry->filters);
+ entry->filters = NULL;
+ ssl_sock_free_ssl_conf(entry->ssl_conf);
+ ha_free(&entry->ssl_conf);
+ return cfgerr;
+}
+
+
+
+/* This function parse a crt-list file and store it in a struct crtlist, each line is a crtlist_entry structure
+ * Fill the <crtlist> argument with a pointer to a new crtlist struct
+ *
+ * This function tries to open and store certificate files.
+ */
+int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *curproxy, struct crtlist **crtlist, char **err)
+{
+ struct crtlist *newlist;
+ struct crtlist_entry *entry = NULL;
+ char thisline[CRT_LINESIZE];
+ FILE *f;
+ struct stat buf;
+ int linenum = 0;
+ int cfgerr = 0;
+ int missing_lf = -1;
+
+ if ((f = fopen(file, "r")) == NULL) {
+ memprintf(err, "cannot open file '%s' : %s", file, strerror(errno));
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ newlist = crtlist_new(file, 0);
+ if (newlist == NULL) {
+ memprintf(err, "Not enough memory!");
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ while (fgets(thisline, sizeof(thisline), f) != NULL) {
+ char *end;
+ char *line = thisline;
+ char *crt_path;
+ char path[MAXPATHLEN+1];
+ struct ckch_store *ckchs;
+ int found = 0;
+
+ if (missing_lf != -1) {
+ memprintf(err, "parsing [%s:%d]: Stray NUL character at position %d.\n",
+ file, linenum, (missing_lf + 1));
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ missing_lf = -1;
+ break;
+ }
+
+ linenum++;
+ end = line + strlen(line);
+ if (end-line == sizeof(thisline)-1 && *(end-1) != '\n') {
+ /* Check if we reached the limit and the last char is not \n.
+ * Watch out for the last line without the terminating '\n'!
+ */
+ memprintf(err, "parsing [%s:%d]: line too long, limit is %d characters",
+ file, linenum, (int)sizeof(thisline)-1);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ break;
+ }
+
+ if (*line == '#' || *line == '\n' || *line == '\r')
+ continue;
+
+ if (end > line && *(end-1) == '\n') {
+ /* kill trailing LF */
+ *(end - 1) = 0;
+ }
+ else {
+ /* mark this line as truncated */
+ missing_lf = end - line;
+ }
+
+ entry = crtlist_entry_new();
+ if (entry == NULL) {
+ memprintf(err, "Not enough memory!");
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ cfgerr |= crtlist_parse_line(thisline, &crt_path, entry, file, linenum, 0, err);
+ if (cfgerr & ERR_CODE)
+ goto error;
+
+ /* empty line */
+ if (!crt_path || !*crt_path) {
+ crtlist_entry_free(entry);
+ entry = NULL;
+ continue;
+ }
+
+ if (*crt_path != '/' && global_ssl.crt_base) {
+ if ((strlen(global_ssl.crt_base) + 1 + strlen(crt_path)) > sizeof(path) ||
+ snprintf(path, sizeof(path), "%s/%s", global_ssl.crt_base, crt_path) > sizeof(path)) {
+ memprintf(err, "parsing [%s:%d]: '%s' : path too long",
+ file, linenum, crt_path);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ crt_path = path;
+ }
+
+ /* Look for a ckch_store or create one */
+ ckchs = ckchs_lookup(crt_path);
+ if (ckchs == NULL) {
+ if (stat(crt_path, &buf) == 0) {
+ found++;
+
+ ckchs = ckchs_load_cert_file(crt_path, err);
+ if (ckchs == NULL) {
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ entry->node.key = ckchs;
+ entry->crtlist = newlist;
+ ebpt_insert(&newlist->entries, &entry->node);
+ LIST_APPEND(&newlist->ord_entries, &entry->by_crtlist);
+ LIST_APPEND(&ckchs->crtlist_entry, &entry->by_ckch_store);
+
+ } else if (global_ssl.extra_files & SSL_GF_BUNDLE) {
+ /* If we didn't find the file, this could be a
+ bundle, since 2.3 we don't support multiple
+ certificate in the same OpenSSL store, so we
+ emulate it by loading each file separately. To
+ do so we need to duplicate the entry in the
+ crt-list because it becomes independent */
+ char fp[MAXPATHLEN+1] = {0};
+ int n = 0;
+ struct crtlist_entry *entry_dup = entry; /* use the previous created entry */
+ for (n = 0; n < SSL_SOCK_NUM_KEYTYPES; n++) {
+ struct stat buf;
+ int ret;
+
+ ret = snprintf(fp, sizeof(fp), "%s.%s", crt_path, SSL_SOCK_KEYTYPE_NAMES[n]);
+ if (ret > sizeof(fp))
+ continue;
+
+ ckchs = ckchs_lookup(fp);
+ if (!ckchs) {
+ if (stat(fp, &buf) == 0) {
+ ckchs = ckchs_load_cert_file(fp, err);
+ if (!ckchs) {
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ } else {
+ continue; /* didn't find this extension, skip */
+ }
+ }
+ found++;
+ linenum++; /* we duplicate the line for this entry in the bundle */
+ if (!entry_dup) { /* if the entry was used, duplicate one */
+ linenum++;
+ entry_dup = crtlist_entry_dup(entry);
+ if (!entry_dup) {
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ entry_dup->linenum = linenum;
+ }
+
+ entry_dup->node.key = ckchs;
+ entry_dup->crtlist = newlist;
+ ebpt_insert(&newlist->entries, &entry_dup->node);
+ LIST_APPEND(&newlist->ord_entries, &entry_dup->by_crtlist);
+ LIST_APPEND(&ckchs->crtlist_entry, &entry_dup->by_ckch_store);
+
+ entry_dup = NULL; /* the entry was used, we need a new one next round */
+ }
+#if HA_OPENSSL_VERSION_NUMBER < 0x10101000L
+ if (found) {
+ memprintf(err, "%sCan't load '%s'. Loading a multi certificates bundle requires OpenSSL >= 1.1.1\n",
+ err && *err ? *err : "", crt_path);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+#endif
+ }
+ if (!found) {
+ memprintf(err, "%sunable to stat SSL certificate from file '%s' : %s.\n",
+ err && *err ? *err : "", crt_path, strerror(errno));
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+
+ } else {
+ entry->node.key = ckchs;
+ entry->crtlist = newlist;
+ ebpt_insert(&newlist->entries, &entry->node);
+ LIST_APPEND(&newlist->ord_entries, &entry->by_crtlist);
+ LIST_APPEND(&ckchs->crtlist_entry, &entry->by_ckch_store);
+ found++;
+ }
+ entry = NULL;
+ }
+
+ if (missing_lf != -1) {
+ memprintf(err, "parsing [%s:%d]: Missing LF on last line, file might have been truncated at position %d.\n",
+ file, linenum, (missing_lf + 1));
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+
+ if (cfgerr & ERR_CODE)
+ goto error;
+
+ newlist->linecount = linenum;
+
+ fclose(f);
+ *crtlist = newlist;
+
+ return cfgerr;
+error:
+ crtlist_entry_free(entry);
+
+ fclose(f);
+ crtlist_free(newlist);
+ return cfgerr;
+}
+
+/* This function reads a directory and stores it in a struct crtlist, each file is a crtlist_entry structure
+ * Fill the <crtlist> argument with a pointer to a new crtlist struct
+ *
+ * This function tries to open and store certificate files.
+ */
+int crtlist_load_cert_dir(char *path, struct bind_conf *bind_conf, struct crtlist **crtlist, char **err)
+{
+ struct crtlist *dir;
+ struct dirent **de_list;
+ int i, n;
+ struct stat buf;
+ char *end;
+ char fp[MAXPATHLEN+1];
+ int cfgerr = 0;
+ struct ckch_store *ckchs;
+
+ dir = crtlist_new(path, 1);
+ if (dir == NULL) {
+ memprintf(err, "not enough memory");
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ n = scandir(path, &de_list, 0, alphasort);
+ if (n < 0) {
+ memprintf(err, "%sunable to scan directory '%s' : %s.\n",
+ err && *err ? *err : "", path, strerror(errno));
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ else {
+ for (i = 0; i < n; i++) {
+ struct crtlist_entry *entry;
+ struct dirent *de = de_list[i];
+
+ end = strrchr(de->d_name, '.');
+ if (end && (de->d_name[0] == '.' ||
+ strcmp(end, ".issuer") == 0 || strcmp(end, ".ocsp") == 0 ||
+ strcmp(end, ".sctl") == 0 || strcmp(end, ".key") == 0))
+ goto ignore_entry;
+
+ snprintf(fp, sizeof(fp), "%s/%s", path, de->d_name);
+ if (stat(fp, &buf) != 0) {
+ memprintf(err, "%sunable to stat SSL certificate from file '%s' : %s.\n",
+ err && *err ? *err : "", fp, strerror(errno));
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto ignore_entry;
+ }
+ if (!S_ISREG(buf.st_mode))
+ goto ignore_entry;
+
+ entry = crtlist_entry_new();
+ if (entry == NULL) {
+ memprintf(err, "not enough memory '%s'", fp);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto ignore_entry;
+ }
+
+ ckchs = ckchs_lookup(fp);
+ if (ckchs == NULL)
+ ckchs = ckchs_load_cert_file(fp, err);
+ if (ckchs == NULL) {
+ free(de);
+ free(entry);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ entry->node.key = ckchs;
+ entry->crtlist = dir;
+ LIST_APPEND(&ckchs->crtlist_entry, &entry->by_ckch_store);
+ LIST_APPEND(&dir->ord_entries, &entry->by_crtlist);
+ ebpt_insert(&dir->entries, &entry->node);
+
+ignore_entry:
+ free(de);
+ }
+end:
+ free(de_list);
+ }
+
+ if (cfgerr & ERR_CODE) {
+ /* free the dir and entries on error */
+ crtlist_free(dir);
+ } else {
+ *crtlist = dir;
+ }
+ return cfgerr;
+
+}
+
+/*
+ * Take an ssl_bind_conf structure and append the configuration line used to
+ * create it in the buffer
+ */
+static void dump_crtlist_sslconf(struct buffer *buf, const struct ssl_bind_conf *conf)
+{
+ int space = 0;
+
+ if (conf == NULL)
+ return;
+
+ chunk_appendf(buf, " [");
+#ifdef OPENSSL_NPN_NEGOTIATED
+ if (conf->npn_str) {
+ int len = conf->npn_len;
+ char *ptr = conf->npn_str;
+ int comma = 0;
+
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "npn ");
+ while (len) {
+ unsigned short size;
+
+ size = *ptr;
+ ptr++;
+ if (comma)
+ chunk_memcat(buf, ",", 1);
+ chunk_memcat(buf, ptr, size);
+ ptr += size;
+ len -= size + 1;
+ comma = 1;
+ }
+ chunk_memcat(buf, "", 1); /* finish with a \0 */
+ space++;
+ }
+#endif
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ if (conf->alpn_str) {
+ int len = conf->alpn_len;
+ char *ptr = conf->alpn_str;
+ int comma = 0;
+
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "alpn ");
+ while (len) {
+ unsigned short size;
+
+ size = *ptr;
+ ptr++;
+ if (comma)
+ chunk_memcat(buf, ",", 1);
+ chunk_memcat(buf, ptr, size);
+ ptr += size;
+ len -= size + 1;
+ comma = 1;
+ }
+ chunk_memcat(buf, "", 1); /* finish with a \0 */
+ space++;
+ }
+#endif
+ /* verify */
+ {
+ if (conf->verify == SSL_SOCK_VERIFY_NONE) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "verify none");
+ space++;
+ } else if (conf->verify == SSL_SOCK_VERIFY_OPTIONAL) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "verify optional");
+ space++;
+ } else if (conf->verify == SSL_SOCK_VERIFY_REQUIRED) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "verify required");
+ space++;
+ }
+ }
+
+ if (conf->no_ca_names) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "no-ca-names");
+ space++;
+ }
+
+ if (conf->early_data) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "allow-0rtt");
+ space++;
+ }
+ if (conf->ca_file) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "ca-file %s", conf->ca_file);
+ space++;
+ }
+ if (conf->crl_file) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "crl-file %s", conf->crl_file);
+ space++;
+ }
+ if (conf->ciphers) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "ciphers %s", conf->ciphers);
+ space++;
+ }
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ if (conf->ciphersuites) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "ciphersuites %s", conf->ciphersuites);
+ space++;
+ }
+#endif
+ if (conf->curves) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "curves %s", conf->curves);
+ space++;
+ }
+ if (conf->ecdhe) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "ecdhe %s", conf->ecdhe);
+ space++;
+ }
+
+ /* the crt-lists only support ssl-min-ver and ssl-max-ver */
+ if (conf->ssl_methods_cfg.min) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "ssl-min-ver %s", methodVersions[conf->ssl_methods_cfg.min].name);
+ space++;
+ }
+
+ if (conf->ssl_methods_cfg.max) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "ssl-max-ver %s", methodVersions[conf->ssl_methods_cfg.max].name);
+ space++;
+ }
+
+ chunk_appendf(buf, "]");
+
+ return;
+}
+
+/* dump a list of filters */
+static void dump_crtlist_filters(struct buffer *buf, struct crtlist_entry *entry)
+{
+ int i;
+
+ if (!entry->fcount)
+ return;
+
+ for (i = 0; i < entry->fcount; i++) {
+ chunk_appendf(buf, " %s", entry->filters[i]);
+ }
+ return;
+}
+
+/************************** CLI functions ****************************/
+
+
+/* CLI IO handler for '(show|dump) ssl crt-list'.
+ * It uses show_crtlist_ctx for the context.
+ */
+static int cli_io_handler_dump_crtlist(struct appctx *appctx)
+{
+ struct show_crtlist_ctx *ctx = appctx->svcctx;
+ struct buffer *trash = alloc_trash_chunk();
+ struct ebmb_node *lnode;
+
+ if (trash == NULL)
+ return 1;
+
+ /* dump the list of crt-lists */
+ lnode = ctx->crtlist_node;
+ if (lnode == NULL)
+ lnode = ebmb_first(&crtlists_tree);
+ while (lnode) {
+ chunk_appendf(trash, "%s\n", lnode->key);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ lnode = ebmb_next(lnode);
+ }
+ free_trash_chunk(trash);
+ return 1;
+yield:
+ ctx->crtlist_node = lnode;
+ free_trash_chunk(trash);
+ return 0;
+}
+
+/* CLI IO handler for '(show|dump) ssl crt-list <filename>' */
+static int cli_io_handler_dump_crtlist_entries(struct appctx *appctx)
+{
+ struct show_crtlist_ctx *ctx = appctx->svcctx;
+ struct buffer *trash = alloc_trash_chunk();
+ struct crtlist *crtlist;
+ struct crtlist_entry *entry;
+
+ if (trash == NULL)
+ return 1;
+
+ crtlist = ebmb_entry(ctx->crtlist_node, struct crtlist, node);
+
+ entry = ctx->entry;
+ if (entry == NULL) {
+ entry = LIST_ELEM((crtlist->ord_entries).n, typeof(entry), by_crtlist);
+ chunk_appendf(trash, "# %s\n", crtlist->node.key);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ }
+
+ list_for_each_entry_from(entry, &crtlist->ord_entries, by_crtlist) {
+ struct ckch_store *store;
+ const char *filename;
+
+ store = entry->node.key;
+ filename = store->path;
+ chunk_appendf(trash, "%s", filename);
+ if (ctx->mode == 's') /* show */
+ chunk_appendf(trash, ":%d", entry->linenum);
+ dump_crtlist_sslconf(trash, entry->ssl_conf);
+ dump_crtlist_filters(trash, entry);
+ chunk_appendf(trash, "\n");
+
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ }
+ free_trash_chunk(trash);
+ return 1;
+yield:
+ ctx->entry = entry;
+ free_trash_chunk(trash);
+ return 0;
+}
+
+/* CLI argument parser for '(show|dump) ssl crt-list' */
+static int cli_parse_dump_crtlist(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_crtlist_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct ebmb_node *lnode;
+ char *filename = NULL;
+ int mode;
+ char *end;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (*args[3] && strcmp(args[3], "-n") == 0) {
+ mode = 's';
+ filename = args[4];
+ } else {
+ mode = 'd';
+ filename = args[3];
+ }
+
+ if (mode == 's' && !*args[4])
+ return cli_err(appctx, "'show ssl crt-list -n' expects a filename or a directory\n");
+
+ if (filename && *filename) {
+
+
+ /* strip trailing slashes, including first one */
+ for (end = filename + strlen(filename) - 1; end >= filename && *end == '/'; end--)
+ *end = 0;
+
+ lnode = ebst_lookup(&crtlists_tree, filename);
+ if (lnode == NULL)
+ return cli_err(appctx, "didn't find the specified filename\n");
+
+ ctx->crtlist_node = lnode;
+ appctx->io_handler = cli_io_handler_dump_crtlist_entries;
+ }
+ ctx->mode = mode;
+
+ return 0;
+}
+
+/* release function of the "add ssl crt-list' command, free things and unlock
+ * the spinlock. It uses the add_crtlist_ctx.
+ */
+static void cli_release_add_crtlist(struct appctx *appctx)
+{
+ struct add_crtlist_ctx *ctx = appctx->svcctx;
+ struct crtlist_entry *entry = ctx->entry;
+
+ if (entry) {
+ struct ckch_inst *inst, *inst_s;
+
+ /* upon error free the ckch_inst and everything inside */
+ ebpt_delete(&entry->node);
+ LIST_DELETE(&entry->by_crtlist);
+ LIST_DELETE(&entry->by_ckch_store);
+
+ list_for_each_entry_safe(inst, inst_s, &entry->ckch_inst, by_ckchs) {
+ ckch_inst_free(inst);
+ }
+ crtlist_free_filters(entry->filters);
+ ssl_sock_free_ssl_conf(entry->ssl_conf);
+ free(entry->ssl_conf);
+ free(entry);
+ }
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ ha_free(&ctx->err);
+}
+
+
+/* IO Handler for the "add ssl crt-list" command It adds a new entry in the
+ * crt-list and generates the ckch_insts for each bind_conf that uses this crt-list
+ *
+ * The logic is the same as the "commit ssl cert" command but without the
+ * freeing of the old structures, because there are none.
+ *
+ * It uses the add_crtlist_ctx for the context.
+ */
+static int cli_io_handler_add_crtlist(struct appctx *appctx)
+{
+ struct add_crtlist_ctx *ctx = appctx->svcctx;
+ struct bind_conf_list *bind_conf_node;
+ struct stconn *sc = appctx_sc(appctx);
+ struct crtlist *crtlist = ctx->crtlist;
+ struct crtlist_entry *entry = ctx->entry;
+ struct ckch_store *store = entry->node.key;
+ struct ckch_inst *new_inst;
+ int i = 0;
+ int errcode = 0;
+
+ /* for each bind_conf which use the crt-list, a new ckch_inst must be
+ * created.
+ */
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+ goto end;
+
+ switch (ctx->state) {
+ case ADDCRT_ST_INIT:
+ /* This state just print the update message */
+ chunk_printf(&trash, "Inserting certificate '%s' in crt-list '%s'", store->path, crtlist->node.key);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto yield;
+ ctx->state = ADDCRT_ST_GEN;
+ /* fallthrough */
+ case ADDCRT_ST_GEN:
+ bind_conf_node = ctx->bind_conf_node; /* get the previous ptr from the yield */
+ if (bind_conf_node == NULL)
+ bind_conf_node = crtlist->bind_conf;
+ for (; bind_conf_node; bind_conf_node = bind_conf_node->next) {
+ struct bind_conf *bind_conf = bind_conf_node->bind_conf;
+ struct sni_ctx *sni;
+
+ ctx->bind_conf_node = bind_conf_node;
+
+ /* yield every 10 generations */
+ if (i > 10) {
+ applet_have_more_data(appctx); /* let's come back later */
+ goto yield;
+ }
+
+ /* display one dot for each new instance */
+ if (applet_putstr(appctx, ".") == -1)
+ goto yield;
+
+ /* we don't support multi-cert bundles, only simple ones */
+ ctx->err = NULL;
+ errcode |= ckch_inst_new_load_store(store->path, store, bind_conf, entry->ssl_conf, entry->filters, entry->fcount, &new_inst, &ctx->err);
+ if (errcode & ERR_CODE) {
+ ctx->state = ADDCRT_ST_ERROR;
+ goto error;
+ }
+
+ /* we need to initialize the SSL_CTX generated */
+ /* this iterate on the newly generated SNIs in the new instance to prepare their SSL_CTX */
+ list_for_each_entry(sni, &new_inst->sni_ctx, by_ckch_inst) {
+ if (!sni->order) { /* we initialized only the first SSL_CTX because it's the same in the other sni_ctx's */
+ ctx->err = NULL;
+ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, new_inst->ssl_conf, sni->ctx, sni->ckch_inst, &ctx->err);
+ if (errcode & ERR_CODE) {
+ ctx->state = ADDCRT_ST_ERROR;
+ goto error;
+ }
+ }
+ }
+
+ i++;
+ LIST_APPEND(&store->ckch_inst, &new_inst->by_ckchs);
+ LIST_APPEND(&entry->ckch_inst, &new_inst->by_crtlist_entry);
+ new_inst->crtlist_entry = entry;
+ }
+ ctx->state = ADDCRT_ST_INSERT;
+ /* fallthrough */
+ case ADDCRT_ST_INSERT:
+ /* the insertion is called for every instance of the store, not
+ * only the one we generated.
+ * But the ssl_sock_load_cert_sni() skip the sni already
+ * inserted. Not every instance has a bind_conf, it could be
+ * the store of a server so we should be careful */
+
+ list_for_each_entry(new_inst, &store->ckch_inst, by_ckchs) {
+ if (!new_inst->bind_conf) /* this is a server instance */
+ continue;
+ HA_RWLOCK_WRLOCK(SNI_LOCK, &new_inst->bind_conf->sni_lock);
+ ssl_sock_load_cert_sni(new_inst, new_inst->bind_conf);
+ HA_RWLOCK_WRUNLOCK(SNI_LOCK, &new_inst->bind_conf->sni_lock);
+ }
+ entry->linenum = ++crtlist->linecount;
+ ctx->entry = NULL;
+ ctx->state = ADDCRT_ST_SUCCESS;
+ /* fallthrough */
+ case ADDCRT_ST_SUCCESS:
+ chunk_reset(&trash);
+ chunk_appendf(&trash, "\n");
+ if (ctx->err)
+ chunk_appendf(&trash, "%s", ctx->err);
+ chunk_appendf(&trash, "Success!\n");
+ if (applet_putchk(appctx, &trash) == -1)
+ goto yield;
+ ctx->state = ADDCRT_ST_FIN;
+ break;
+
+ case ADDCRT_ST_ERROR:
+ error:
+ chunk_printf(&trash, "\n%sFailed!\n", ctx->err);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto yield;
+ break;
+
+ default:
+ break;
+ }
+
+end:
+ /* success: call the release function and don't come back */
+ return 1;
+yield:
+ return 0; /* should come back */
+}
+
+
+/*
+ * Parse a "add ssl crt-list <crt-list> <certfile>" line.
+ * Filters and option must be passed through payload.
+ * It sets a struct add_crtlist_ctx.
+ */
+static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct add_crtlist_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ int cfgerr = 0;
+ struct ckch_store *store;
+ char *err = NULL;
+ char path[MAXPATHLEN+1];
+ char *crtlist_path;
+ char *cert_path = NULL;
+ struct ebmb_node *eb;
+ struct ebpt_node *inserted;
+ struct crtlist *crtlist;
+ struct crtlist_entry *entry = NULL;
+ char *end;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3] || (!payload && !*args[4]))
+ return cli_err(appctx, "'add ssl crtlist' expects a filename and a certificate name\n");
+
+ crtlist_path = args[3];
+
+ /* strip trailing slashes, including first one */
+ for (end = crtlist_path + strlen(crtlist_path) - 1; end >= crtlist_path && *end == '/'; end--)
+ *end = 0;
+
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Operations on certificates are currently locked!\n");
+
+ eb = ebst_lookup(&crtlists_tree, crtlist_path);
+ if (!eb) {
+ memprintf(&err, "crt-list '%s' does not exist!", crtlist_path);
+ goto error;
+ }
+ crtlist = ebmb_entry(eb, struct crtlist, node);
+
+ entry = crtlist_entry_new();
+ if (entry == NULL) {
+ memprintf(&err, "Not enough memory!");
+ goto error;
+ }
+
+ if (payload) {
+ char *lf;
+
+ lf = strrchr(payload, '\n');
+ if (lf) {
+ memprintf(&err, "only one line of payload is supported!");
+ goto error;
+ }
+ /* cert_path is filled here */
+ cfgerr |= crtlist_parse_line(payload, &cert_path, entry, "CLI", 1, 1, &err);
+ if (cfgerr & ERR_CODE)
+ goto error;
+ } else {
+ cert_path = args[4];
+ }
+
+ if (!cert_path) {
+ memprintf(&err, "'add ssl crtlist' should contain the certificate name in the payload");
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ if (eb_gettag(crtlist->entries.b[EB_RGHT])) {
+ char *slash;
+
+ slash = strrchr(cert_path, '/');
+ if (!slash) {
+ memprintf(&err, "'%s' is a directory, certificate path '%s' must contain the directory path", (char *)crtlist->node.key, cert_path);
+ goto error;
+ }
+ /* temporary replace / by 0 to do an strcmp */
+ *slash = '\0';
+ if (strcmp(cert_path, (char*)crtlist->node.key) != 0) {
+ *slash = '/';
+ memprintf(&err, "'%s' is a directory, certificate path '%s' must contain the directory path", (char *)crtlist->node.key, cert_path);
+ goto error;
+ }
+ *slash = '/';
+ }
+
+ if (*cert_path != '/' && global_ssl.crt_base) {
+ if ((strlen(global_ssl.crt_base) + 1 + strlen(cert_path)) > sizeof(path) ||
+ snprintf(path, sizeof(path), "%s/%s", global_ssl.crt_base, cert_path) > sizeof(path)) {
+ memprintf(&err, "'%s' : path too long", cert_path);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ cert_path = path;
+ }
+
+ store = ckchs_lookup(cert_path);
+ if (store == NULL) {
+ memprintf(&err, "certificate '%s' does not exist!", cert_path);
+ goto error;
+ }
+ if (store->ckch == NULL || store->ckch->cert == NULL) {
+ memprintf(&err, "certificate '%s' is empty!", cert_path);
+ goto error;
+ }
+
+ /* check if it's possible to insert this new crtlist_entry */
+ entry->node.key = store;
+ inserted = ebpt_insert(&crtlist->entries, &entry->node);
+ if (inserted != &entry->node) {
+ memprintf(&err, "file already exists in this directory!");
+ goto error;
+ }
+
+ /* this is supposed to be a directory (EB_ROOT_UNIQUE), so no ssl_conf are allowed */
+ if ((entry->ssl_conf || entry->filters) && eb_gettag(crtlist->entries.b[EB_RGHT])) {
+ memprintf(&err, "this is a directory, SSL configuration and filters are not allowed");
+ goto error;
+ }
+
+ LIST_APPEND(&crtlist->ord_entries, &entry->by_crtlist);
+ entry->crtlist = crtlist;
+ LIST_APPEND(&store->crtlist_entry, &entry->by_ckch_store);
+
+ ctx->state = ADDCRT_ST_INIT;
+ ctx->crtlist = crtlist;
+ ctx->entry = entry;
+
+ /* unlock is done in the release handler */
+ return 0;
+
+error:
+ crtlist_entry_free(entry);
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ err = memprintf(&err, "Can't edit the crt-list: %s\n", err ? err : "");
+ return cli_dynerr(appctx, err);
+}
+
+/* Parse a "del ssl crt-list <crt-list> <certfile>" line. */
+static int cli_parse_del_crtlist(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct ckch_store *store;
+ char *err = NULL;
+ char *crtlist_path, *cert_path;
+ struct ebmb_node *ebmb;
+ struct ebpt_node *ebpt;
+ struct crtlist *crtlist;
+ struct crtlist_entry *entry = NULL;
+ struct ckch_inst *inst, *inst_s;
+ int linenum = 0;
+ char *colons;
+ char *end;
+ int error_message_dumped = 0;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3] || !*args[4])
+ return cli_err(appctx, "'del ssl crtlist' expects a filename and a certificate name\n");
+
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't delete!\nOperations on certificates are currently locked!\n");
+
+ crtlist_path = args[3];
+ cert_path = args[4];
+
+ colons = strchr(cert_path, ':');
+ if (colons) {
+ char *endptr;
+
+ linenum = strtol(colons + 1, &endptr, 10);
+ if (colons + 1 == endptr || *endptr != '\0') {
+ memprintf(&err, "wrong line number after colons in '%s'!", cert_path);
+ goto error;
+ }
+ *colons = '\0';
+ }
+
+ /* strip trailing slashes, including first one */
+ for (end = crtlist_path + strlen(crtlist_path) - 1; end >= crtlist_path && *end == '/'; end--)
+ *end = 0;
+
+ /* look for crtlist */
+ ebmb = ebst_lookup(&crtlists_tree, crtlist_path);
+ if (!ebmb) {
+ memprintf(&err, "crt-list '%s' does not exist!", crtlist_path);
+ goto error;
+ }
+ crtlist = ebmb_entry(ebmb, struct crtlist, node);
+
+ /* look for store */
+ store = ckchs_lookup(cert_path);
+ if (store == NULL) {
+ memprintf(&err, "certificate '%s' does not exist!", cert_path);
+ goto error;
+ }
+ if (store->ckch == NULL || store->ckch->cert == NULL) {
+ memprintf(&err, "certificate '%s' is empty!", cert_path);
+ goto error;
+ }
+
+ ebpt = ebpt_lookup(&crtlist->entries, store);
+ if (!ebpt) {
+ memprintf(&err, "certificate '%s' can't be found in crt-list '%s'!", cert_path, crtlist_path);
+ goto error;
+ }
+
+ /* list the line number of entries for errors in err, and select the right ebpt */
+ for (; ebpt; ebpt = ebpt_next_dup(ebpt)) {
+ struct crtlist_entry *tmp;
+
+ tmp = ebpt_entry(ebpt, struct crtlist_entry, node);
+ memprintf(&err, "%s%s%d", err ? err : "", err ? ", " : "", tmp->linenum);
+
+ /* select the entry we wanted */
+ if (linenum == 0 || tmp->linenum == linenum) {
+ if (!entry)
+ entry = tmp;
+ }
+ }
+
+ /* we didn't found the specified entry */
+ if (!entry) {
+ memprintf(&err, "found a certificate '%s' but the line number is incorrect, please specify a correct line number preceded by colons (%s)!", cert_path, err ? err : NULL);
+ goto error;
+ }
+
+ /* we didn't specified a line number but there were several entries */
+ if (linenum == 0 && ebpt_next_dup(&entry->node)) {
+ memprintf(&err, "found the certificate '%s' in several entries, please specify a line number preceded by colons (%s)!", cert_path, err ? err : NULL);
+ goto error;
+ }
+
+ /* Iterate over all the instances in order to see if any of them is a
+ * default instance. If this is the case, the entry won't be suppressed. */
+ list_for_each_entry_safe(inst, inst_s, &entry->ckch_inst, by_crtlist_entry) {
+ if (inst->is_default && !inst->bind_conf->strict_sni) {
+ if (!error_message_dumped) {
+ memprintf(&err, "certificate '%s' cannot be deleted, it is used as default certificate by the following frontends:\n", cert_path);
+ error_message_dumped = 1;
+ }
+ memprintf(&err, "%s\t- %s:%d\n", err, inst->bind_conf->file, inst->bind_conf->line);
+ }
+ }
+ if (error_message_dumped)
+ goto error;
+
+ /* upon error free the ckch_inst and everything inside */
+
+ ebpt_delete(&entry->node);
+ LIST_DELETE(&entry->by_crtlist);
+ LIST_DELETE(&entry->by_ckch_store);
+
+ list_for_each_entry_safe(inst, inst_s, &entry->ckch_inst, by_crtlist_entry) {
+ struct sni_ctx *sni, *sni_s;
+ struct ckch_inst_link_ref *link_ref, *link_ref_s;
+
+ HA_RWLOCK_WRLOCK(SNI_LOCK, &inst->bind_conf->sni_lock);
+ list_for_each_entry_safe(sni, sni_s, &inst->sni_ctx, by_ckch_inst) {
+ ebmb_delete(&sni->name);
+ LIST_DELETE(&sni->by_ckch_inst);
+ SSL_CTX_free(sni->ctx);
+ free(sni);
+ }
+ HA_RWLOCK_WRUNLOCK(SNI_LOCK, &inst->bind_conf->sni_lock);
+ LIST_DELETE(&inst->by_ckchs);
+ list_for_each_entry_safe(link_ref, link_ref_s, &inst->cafile_link_refs, list) {
+ LIST_DELETE(&link_ref->link->list);
+ LIST_DELETE(&link_ref->list);
+ free(link_ref);
+ }
+ free(inst);
+ }
+
+ crtlist_free_filters(entry->filters);
+ ssl_sock_free_ssl_conf(entry->ssl_conf);
+ free(entry->ssl_conf);
+ free(entry);
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ err = memprintf(&err, "Entry '%s' deleted in crtlist '%s'!\n", cert_path, crtlist_path);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ err = memprintf(&err, "Can't delete the entry: %s\n", err ? err : "");
+ return cli_dynerr(appctx, err);
+}
+
+
+/* unlink and free all crt-list and crt-list entries */
+void crtlist_deinit()
+{
+ struct eb_node *node, *next;
+ struct crtlist *crtlist;
+
+ node = eb_first(&crtlists_tree);
+ while (node) {
+ next = eb_next(node);
+ crtlist = ebmb_entry(node, struct crtlist, node);
+ crtlist_free(crtlist);
+ node = next;
+ }
+}
+
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "add", "ssl", "crt-list", NULL }, "add ssl crt-list <list> <cert> [opts]* : add to crt-list file <list> a line <cert> or a payload", cli_parse_add_crtlist, cli_io_handler_add_crtlist, cli_release_add_crtlist },
+ { { "del", "ssl", "crt-list", NULL }, "del ssl crt-list <list> <cert[:line]> : delete a line <cert> from crt-list file <list>", cli_parse_del_crtlist, NULL, NULL },
+ { { "show", "ssl", "crt-list", NULL }, "show ssl crt-list [-n] [<list>] : show the list of crt-lists or the content of a crt-list file <list>", cli_parse_dump_crtlist, cli_io_handler_dump_crtlist, NULL },
+ { { NULL }, NULL, NULL, NULL } }
+};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
diff --git a/src/ssl_sample.c b/src/ssl_sample.c
new file mode 100644
index 0000000..8b367a1
--- /dev/null
+++ b/src/ssl_sample.c
@@ -0,0 +1,2225 @@
+/*
+ * This file contains the sample fetches related to the SSL
+ *
+ * Copyright (C) 2012 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ * Copyright (C) 2020 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/base64.h>
+#include <haproxy/buf-t.h>
+#include <haproxy/connection.h>
+#include <haproxy/obj_type.h>
+#include <haproxy/openssl-compat.h>
+#include <haproxy/sample.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/ssl_utils.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+#include <haproxy/vars.h>
+
+
+/***** Below are some sample fetching functions for ACL/patterns *****/
+
+#if defined(HAVE_CRYPTO_memcmp)
+/* Compares bytestring with a variable containing a bytestring. Return value
+ * is `true` if both bytestrings are bytewise identical and `false` otherwise.
+ *
+ * Comparison will be performed in constant time if both bytestrings are of
+ * the same length. If the lengths differ execution time will not be constant.
+ */
+static int sample_conv_secure_memcmp(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct sample tmp;
+ int result;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (arg_p[0].type != ARGT_VAR)
+ return 0;
+
+ if (!sample_conv_var2smp(&arg_p[0].data.var, &tmp, SMP_T_BIN))
+ return 0;
+
+ if (smp->data.u.str.data != tmp.data.u.str.data) {
+ smp->data.u.sint = 0;
+ smp->data.type = SMP_T_BOOL;
+ return 1;
+ }
+
+ /* The following comparison is performed in constant time. */
+ result = CRYPTO_memcmp(smp->data.u.str.area, tmp.data.u.str.area, smp->data.u.str.data);
+
+ smp->data.u.sint = result == 0;
+ smp->data.type = SMP_T_BOOL;
+ return 1;
+}
+
+/* This function checks the "secure_memcmp" converter's arguments and extracts the
+ * variable name and its scope.
+ */
+static int smp_check_secure_memcmp(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (!args[0].data.str.data) {
+ memprintf(err, "missing variable name");
+ return 0;
+ }
+
+ /* Try to decode a variable. */
+ if (vars_check_arg(&args[0], NULL))
+ return 1;
+
+ memprintf(err, "failed to register variable name '%s'",
+ args[0].data.str.area);
+ return 0;
+}
+#endif // HAVE_secure_memcmp()
+
+static int smp_check_sha2(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (args[0].type == ARGT_STOP)
+ return 1;
+ if (args[0].type != ARGT_SINT) {
+ memprintf(err, "Invalid type '%s'", arg_type_names[args[0].type]);
+ return 0;
+ }
+
+ switch (args[0].data.sint) {
+ case 224:
+ case 256:
+ case 384:
+ case 512:
+ /* this is okay */
+ return 1;
+ default:
+ memprintf(err, "Unsupported number of bits: '%lld'", args[0].data.sint);
+ return 0;
+ }
+}
+
+static int sample_conv_sha2(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ int bits = 256;
+ EVP_MD_CTX *mdctx;
+ const EVP_MD *evp = NULL;
+ unsigned int digest_length = 0;
+ if (arg_p->data.sint)
+ bits = arg_p->data.sint;
+
+ switch (bits) {
+ case 224:
+ evp = EVP_sha224();
+ break;
+ case 256:
+ evp = EVP_sha256();
+ break;
+ case 384:
+ evp = EVP_sha384();
+ break;
+ case 512:
+ evp = EVP_sha512();
+ break;
+ default:
+ return 0;
+ }
+
+ mdctx = EVP_MD_CTX_new();
+ if (!mdctx)
+ return 0;
+ EVP_DigestInit_ex(mdctx, evp, NULL);
+ EVP_DigestUpdate(mdctx, smp->data.u.str.area, smp->data.u.str.data);
+ EVP_DigestFinal_ex(mdctx, (unsigned char*)trash->area, &digest_length);
+ trash->data = digest_length;
+
+ EVP_MD_CTX_free(mdctx);
+
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_BIN;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+/* This function checks an <arg> and fills it with a variable type if the
+ * <arg> string contains a valid variable name. If failed, the function
+ * tries to perform a base64 decode operation on the same string, and
+ * fills the <arg> with the decoded content.
+ *
+ * Validation is skipped if the <arg> string is empty.
+ *
+ * This function returns 0 if the variable lookup fails and the specified
+ * <arg> string is not a valid base64 encoded string, as well if
+ * unexpected argument type is specified or memory allocation error
+ * occurs. Otherwise it returns 1.
+ */
+static inline int sample_check_arg_base64(struct arg *arg, char **err)
+{
+ char *dec = NULL;
+ int dec_size;
+
+ if (arg->type != ARGT_STR) {
+ memprintf(err, "unexpected argument type");
+ return 0;
+ }
+
+ if (arg->data.str.data == 0) /* empty */
+ return 1;
+
+ if (vars_check_arg(arg, NULL))
+ return 1;
+
+ if (arg->data.str.data % 4) {
+ memprintf(err, "argument needs to be base64 encoded, and "
+ "can either be a string or a variable");
+ return 0;
+ }
+
+ dec_size = (arg->data.str.data / 4 * 3)
+ - (arg->data.str.area[arg->data.str.data-1] == '=' ? 1 : 0)
+ - (arg->data.str.area[arg->data.str.data-2] == '=' ? 1 : 0);
+
+ if ((dec = malloc(dec_size)) == NULL) {
+ memprintf(err, "memory allocation error");
+ return 0;
+ }
+
+ dec_size = base64dec(arg->data.str.area, arg->data.str.data, dec, dec_size);
+ if (dec_size < 0) {
+ memprintf(err, "argument needs to be base64 encoded, and "
+ "can either be a string or a variable");
+ free(dec);
+ return 0;
+ }
+
+ /* base64 decoded */
+ chunk_destroy(&arg->data.str);
+ arg->data.str.area = dec;
+ arg->data.str.data = dec_size;
+ return 1;
+}
+
+#ifdef EVP_CIPH_GCM_MODE
+static int check_aes_gcm(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ switch(args[0].data.sint) {
+ case 128:
+ case 192:
+ case 256:
+ break;
+ default:
+ memprintf(err, "key size must be 128, 192 or 256 (bits).");
+ return 0;
+ }
+
+ /* Try to decode variables. */
+ if (!sample_check_arg_base64(&args[1], err)) {
+ memprintf(err, "failed to parse nonce : %s", *err);
+ return 0;
+ }
+ if (!sample_check_arg_base64(&args[2], err)) {
+ memprintf(err, "failed to parse key : %s", *err);
+ return 0;
+ }
+ if (!sample_check_arg_base64(&args[3], err)) {
+ memprintf(err, "failed to parse aead_tag : %s", *err);
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Arguments: AES size in bits, nonce, key, tag. The last three arguments are base64 encoded */
+static int sample_conv_aes_gcm_dec(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct sample nonce, key, aead_tag;
+ struct buffer *smp_trash = NULL, *smp_trash_alloc = NULL;
+ EVP_CIPHER_CTX *ctx;
+ int dec_size, ret;
+
+ smp_trash_alloc = alloc_trash_chunk();
+ if (!smp_trash_alloc)
+ return 0;
+
+ /* smp copy */
+ smp_trash_alloc->data = smp->data.u.str.data;
+ if (unlikely(smp_trash_alloc->data > smp_trash_alloc->size))
+ smp_trash_alloc->data = smp_trash_alloc->size;
+ memcpy(smp_trash_alloc->area, smp->data.u.str.area, smp_trash_alloc->data);
+
+ ctx = EVP_CIPHER_CTX_new();
+
+ if (!ctx)
+ goto err;
+
+ smp_trash = alloc_trash_chunk();
+ if (!smp_trash)
+ goto err;
+
+ smp_set_owner(&nonce, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_str(&arg_p[1], &nonce))
+ goto err;
+
+ if (arg_p[1].type == ARGT_VAR) {
+ dec_size = base64dec(nonce.data.u.str.area, nonce.data.u.str.data, smp_trash->area, smp_trash->size);
+ if (dec_size < 0)
+ goto err;
+ smp_trash->data = dec_size;
+ nonce.data.u.str = *smp_trash;
+ }
+
+ /* Set cipher type and mode */
+ switch(arg_p[0].data.sint) {
+ case 128:
+ EVP_DecryptInit_ex(ctx, EVP_aes_128_gcm(), NULL, NULL, NULL);
+ break;
+ case 192:
+ EVP_DecryptInit_ex(ctx, EVP_aes_192_gcm(), NULL, NULL, NULL);
+ break;
+ case 256:
+ EVP_DecryptInit_ex(ctx, EVP_aes_256_gcm(), NULL, NULL, NULL);
+ break;
+ }
+
+ EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_IVLEN, nonce.data.u.str.data, NULL);
+
+ /* Initialise IV */
+ if(!EVP_DecryptInit_ex(ctx, NULL, NULL, NULL, (unsigned char *) nonce.data.u.str.area))
+ goto err;
+
+ smp_set_owner(&key, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_str(&arg_p[2], &key))
+ goto err;
+
+ if (arg_p[2].type == ARGT_VAR) {
+ dec_size = base64dec(key.data.u.str.area, key.data.u.str.data, smp_trash->area, smp_trash->size);
+ if (dec_size < 0)
+ goto err;
+ smp_trash->data = dec_size;
+ key.data.u.str = *smp_trash;
+ }
+
+ /* Initialise key */
+ if (!EVP_DecryptInit_ex(ctx, NULL, NULL, (unsigned char *) key.data.u.str.area, NULL))
+ goto err;
+
+ if (!EVP_DecryptUpdate(ctx, (unsigned char *) smp_trash->area, (int *) &smp_trash->data,
+ (unsigned char *) smp_trash_alloc->area, (int) smp_trash_alloc->data))
+ goto err;
+
+ smp_set_owner(&aead_tag, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_str(&arg_p[3], &aead_tag))
+ goto err;
+
+ if (arg_p[3].type == ARGT_VAR) {
+ dec_size = base64dec(aead_tag.data.u.str.area, aead_tag.data.u.str.data, smp_trash_alloc->area, smp_trash_alloc->size);
+ if (dec_size < 0)
+ goto err;
+ smp_trash_alloc->data = dec_size;
+ aead_tag.data.u.str = *smp_trash_alloc;
+ }
+
+ dec_size = smp_trash->data;
+
+ EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, aead_tag.data.u.str.data, (void *) aead_tag.data.u.str.area);
+ ret = EVP_DecryptFinal_ex(ctx, (unsigned char *) smp_trash->area + smp_trash->data, (int *) &smp_trash->data);
+
+ if (ret <= 0)
+ goto err;
+
+ smp->data.u.str.data = dec_size + smp_trash->data;
+ smp->data.u.str.area = smp_trash->area;
+ smp->data.type = SMP_T_BIN;
+ smp_dup(smp);
+ free_trash_chunk(smp_trash_alloc);
+ free_trash_chunk(smp_trash);
+ return 1;
+
+err:
+ free_trash_chunk(smp_trash_alloc);
+ free_trash_chunk(smp_trash);
+ return 0;
+}
+#endif
+
+static int check_crypto_digest(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ const EVP_MD *evp = EVP_get_digestbyname(args[0].data.str.area);
+
+ if (evp)
+ return 1;
+
+ memprintf(err, "algorithm must be a valid OpenSSL message digest name.");
+ return 0;
+}
+
+static int sample_conv_crypto_digest(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ unsigned char *md = (unsigned char*) trash->area;
+ unsigned int md_len = trash->size;
+ EVP_MD_CTX *ctx = EVP_MD_CTX_new();
+ const EVP_MD *evp = EVP_get_digestbyname(args[0].data.str.area);
+
+ if (!ctx)
+ return 0;
+
+ if (!EVP_DigestInit_ex(ctx, evp, NULL) ||
+ !EVP_DigestUpdate(ctx, smp->data.u.str.area, smp->data.u.str.data) ||
+ !EVP_DigestFinal_ex(ctx, md, &md_len)) {
+ EVP_MD_CTX_free(ctx);
+ return 0;
+ }
+
+ EVP_MD_CTX_free(ctx);
+
+ trash->data = md_len;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_BIN;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int check_crypto_hmac(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (!check_crypto_digest(args, conv, file, line, err))
+ return 0;
+
+ if (!sample_check_arg_base64(&args[1], err)) {
+ memprintf(err, "failed to parse key : %s", *err);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int sample_conv_crypto_hmac(const struct arg *args, struct sample *smp, void *private)
+{
+ struct sample key;
+ struct buffer *trash = NULL, *key_trash = NULL;
+ unsigned char *md;
+ unsigned int md_len;
+ const EVP_MD *evp = EVP_get_digestbyname(args[0].data.str.area);
+ int dec_size;
+
+ smp_set_owner(&key, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_str(&args[1], &key))
+ return 0;
+
+ if (args[1].type == ARGT_VAR) {
+ key_trash = alloc_trash_chunk();
+ if (!key_trash)
+ goto err;
+
+ dec_size = base64dec(key.data.u.str.area, key.data.u.str.data, key_trash->area, key_trash->size);
+ if (dec_size < 0)
+ goto err;
+ key_trash->data = dec_size;
+ key.data.u.str = *key_trash;
+ }
+
+ trash = alloc_trash_chunk();
+ if (!trash)
+ goto err;
+
+ md = (unsigned char*) trash->area;
+ md_len = trash->size;
+ if (!HMAC(evp, key.data.u.str.area, key.data.u.str.data, (const unsigned char*) smp->data.u.str.area,
+ smp->data.u.str.data, md, &md_len))
+ goto err;
+
+ free_trash_chunk(key_trash);
+
+ trash->data = md_len;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_BIN;
+ smp_dup(smp);
+ free_trash_chunk(trash);
+ return 1;
+
+err:
+ free_trash_chunk(key_trash);
+ free_trash_chunk(trash);
+ return 0;
+}
+
+static int
+smp_fetch_ssl_fc_has_early(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ SSL *ssl;
+ struct connection *conn;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ smp->flags = 0;
+ smp->data.type = SMP_T_BOOL;
+#ifdef OPENSSL_IS_BORINGSSL
+ {
+ smp->data.u.sint = (SSL_in_early_data(ssl) &&
+ SSL_early_data_accepted(ssl));
+ }
+#else
+ smp->data.u.sint = ((conn->flags & CO_FL_EARLY_DATA) &&
+ (conn->flags & (CO_FL_EARLY_SSL_HS | CO_FL_SSL_WAIT_HS))) ? 1 : 0;
+#endif
+ return 1;
+}
+
+/* boolean, returns true if client cert was present */
+static int
+smp_fetch_ssl_fc_has_crt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn = objt_conn(smp->sess->origin);
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (!ctx)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = SSL_SOCK_ST_FL_VERIFY_DONE & ctx->xprt_st ? 1 : 0;
+
+ return 1;
+}
+
+/* binary, returns a certificate in a binary chunk (der/raw).
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_der(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+
+ X509 *crt = NULL;
+ int ret = 0;
+ struct buffer *smp_trash;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+
+ if (!crt)
+ goto out;
+
+ smp_trash = get_trash_chunk();
+ if (ssl_sock_crt2der(crt, smp_trash) <= 0)
+ goto out;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str = *smp_trash;
+ smp->data.type = SMP_T_BIN;
+ ret = 1;
+out:
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ if (cert_peer && crt)
+ X509_free(crt);
+ return ret;
+}
+
+/* binary, returns a chain certificate in a binary chunk (der/raw).
+ * The 5th keyword char is used to support only peer cert
+ */
+static int
+smp_fetch_ssl_x_chain_der(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ struct buffer *smp_trash;
+ struct buffer *tmp_trash = NULL;
+ struct connection *conn;
+ STACK_OF(X509) *certs = NULL;
+ X509 *crt = NULL;
+ SSL *ssl;
+ int ret = 0;
+ int num_certs;
+ int i;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ if (!conn)
+ return 0;
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (!cert_peer)
+ return 0;
+
+ certs = SSL_get_peer_cert_chain(ssl);
+ if (!certs)
+ return 0;
+
+ num_certs = sk_X509_num(certs);
+ if (!num_certs)
+ goto out;
+ smp_trash = get_trash_chunk();
+ tmp_trash = alloc_trash_chunk();
+ if (!tmp_trash)
+ goto out;
+ for (i = 0; i < num_certs; i++) {
+ crt = sk_X509_value(certs, i);
+ if (ssl_sock_crt2der(crt, tmp_trash) <= 0)
+ goto out;
+ chunk_cat(smp_trash, tmp_trash);
+ }
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str = *smp_trash;
+ smp->data.type = SMP_T_BIN;
+ ret = 1;
+out:
+ if (tmp_trash)
+ free_trash_chunk(tmp_trash);
+ return ret;
+}
+
+/* binary, returns serial of certificate in a binary chunk.
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_serial(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt = NULL;
+ int ret = 0;
+ struct buffer *smp_trash;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+
+ if (!crt)
+ goto out;
+
+ smp_trash = get_trash_chunk();
+ if (ssl_sock_get_serial(crt, smp_trash) <= 0)
+ goto out;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str = *smp_trash;
+ smp->data.type = SMP_T_BIN;
+ ret = 1;
+out:
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ if (cert_peer && crt)
+ X509_free(crt);
+ return ret;
+}
+
+/* binary, returns the client certificate's SHA-1 fingerprint (SHA-1 hash of DER-encoded certificate) in a binary chunk.
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_sha1(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt = NULL;
+ const EVP_MD *digest;
+ int ret = 0;
+ unsigned int len = 0;
+ struct buffer *smp_trash;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ goto out;
+
+ smp_trash = get_trash_chunk();
+ digest = EVP_sha1();
+ X509_digest(crt, digest, (unsigned char *) smp_trash->area, &len);
+ smp_trash->data = len;
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str = *smp_trash;
+ smp->data.type = SMP_T_BIN;
+ ret = 1;
+out:
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ if (cert_peer && crt)
+ X509_free(crt);
+ return ret;
+}
+
+/* string, returns certificate's notafter date in ASN1_UTCTIME format.
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_notafter(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt = NULL;
+ int ret = 0;
+ struct buffer *smp_trash;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ goto out;
+
+ smp_trash = get_trash_chunk();
+ if (ssl_sock_get_time(X509_getm_notAfter(crt), smp_trash) <= 0)
+ goto out;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str = *smp_trash;
+ smp->data.type = SMP_T_STR;
+ ret = 1;
+out:
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ if (cert_peer && crt)
+ X509_free(crt);
+ return ret;
+}
+
+/* string, returns a string of a formatted full dn \C=..\O=..\OU=.. \CN=.. of certificate's issuer
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_i_dn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt = NULL;
+ X509_NAME *name;
+ int ret = 0;
+ struct buffer *smp_trash;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ goto out;
+
+ name = X509_get_issuer_name(crt);
+ if (!name)
+ goto out;
+
+ smp_trash = get_trash_chunk();
+ if (args[0].type == ARGT_STR && args[0].data.str.data > 0) {
+ int pos = 1;
+
+ if (args[1].type == ARGT_SINT)
+ pos = args[1].data.sint;
+
+ if (ssl_sock_get_dn_entry(name, &args[0].data.str, pos, smp_trash) <= 0)
+ goto out;
+ }
+ else if (args[2].type == ARGT_STR && args[2].data.str.data > 0) {
+ if (ssl_sock_get_dn_formatted(name, &args[2].data.str, smp_trash) <= 0)
+ goto out;
+ }
+ else if (ssl_sock_get_dn_oneline(name, smp_trash) <= 0)
+ goto out;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *smp_trash;
+ ret = 1;
+out:
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ if (cert_peer && crt)
+ X509_free(crt);
+ return ret;
+}
+
+/* string, returns notbefore date in ASN1_UTCTIME format.
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_notbefore(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt = NULL;
+ int ret = 0;
+ struct buffer *smp_trash;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ goto out;
+
+ smp_trash = get_trash_chunk();
+ if (ssl_sock_get_time(X509_getm_notBefore(crt), smp_trash) <= 0)
+ goto out;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str = *smp_trash;
+ smp->data.type = SMP_T_STR;
+ ret = 1;
+out:
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ if (cert_peer && crt)
+ X509_free(crt);
+ return ret;
+}
+
+/* string, returns a string of a formatted full dn \C=..\O=..\OU=.. \CN=.. of certificate's subject
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_s_dn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt = NULL;
+ X509_NAME *name;
+ int ret = 0;
+ struct buffer *smp_trash;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ goto out;
+
+ name = X509_get_subject_name(crt);
+ if (!name)
+ goto out;
+
+ smp_trash = get_trash_chunk();
+ if (args[0].type == ARGT_STR && args[0].data.str.data > 0) {
+ int pos = 1;
+
+ if (args[1].type == ARGT_SINT)
+ pos = args[1].data.sint;
+
+ if (ssl_sock_get_dn_entry(name, &args[0].data.str, pos, smp_trash) <= 0)
+ goto out;
+ }
+ else if (args[2].type == ARGT_STR && args[2].data.str.data > 0) {
+ if (ssl_sock_get_dn_formatted(name, &args[2].data.str, smp_trash) <= 0)
+ goto out;
+ }
+ else if (ssl_sock_get_dn_oneline(name, smp_trash) <= 0)
+ goto out;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *smp_trash;
+ ret = 1;
+out:
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ if (cert_peer && crt)
+ X509_free(crt);
+ return ret;
+}
+
+/* integer, returns true if current session use a client certificate */
+static int
+smp_fetch_ssl_c_used(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ X509 *crt;
+ struct connection *conn;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ /* SSL_get_peer_certificate returns a ptr on allocated X509 struct */
+ crt = ssl_sock_get_peer_certificate(ssl);
+ if (crt) {
+ X509_free(crt);
+ }
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = (crt != NULL);
+ return 1;
+}
+
+/* integer, returns the certificate version
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_version(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+
+ X509 *crt;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.sint = (unsigned int)(1 + X509_get_version(crt));
+ /* SSL_get_peer_certificate increase X509 * ref count */
+ if (cert_peer)
+ X509_free(crt);
+ smp->data.type = SMP_T_SINT;
+
+ return 1;
+}
+
+/* string, returns the certificate's signature algorithm.
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_sig_alg(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt;
+ __OPENSSL_110_CONST__ ASN1_OBJECT *algorithm;
+ int nid;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ return 0;
+
+ X509_ALGOR_get0(&algorithm, NULL, NULL, X509_get0_tbs_sigalg(crt));
+ nid = OBJ_obj2nid(algorithm);
+
+ smp->data.u.str.area = (char *)OBJ_nid2sn(nid);
+ if (!smp->data.u.str.area) {
+ /* SSL_get_peer_certificate increase X509 * ref count */
+ if (cert_peer)
+ X509_free(crt);
+ return 0;
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_VOL_SESS | SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+ /* SSL_get_peer_certificate increase X509 * ref count */
+ if (cert_peer)
+ X509_free(crt);
+
+ return 1;
+}
+
+/* string, returns the certificate's key algorithm.
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_key_alg(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt;
+ ASN1_OBJECT *algorithm;
+ int nid;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ return 0;
+
+ X509_PUBKEY_get0_param(&algorithm, NULL, NULL, NULL, X509_get_X509_PUBKEY(crt));
+ nid = OBJ_obj2nid(algorithm);
+
+ smp->data.u.str.area = (char *)OBJ_nid2sn(nid);
+ if (!smp->data.u.str.area) {
+ /* SSL_get_peer_certificate increase X509 * ref count */
+ if (cert_peer)
+ X509_free(crt);
+ return 0;
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_VOL_SESS | SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+ if (cert_peer)
+ X509_free(crt);
+
+ return 1;
+}
+
+/* boolean, returns true if front conn. transport layer is SSL.
+ * This function is also usable on backend conn if the fetch keyword 5th
+ * char is 'b'.
+ */
+static int
+smp_fetch_ssl_fc(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = conn_is_ssl(conn);
+ return 1;
+}
+
+/* boolean, returns true if client present a SNI */
+static int
+smp_fetch_ssl_fc_has_sni(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ struct connection *conn = objt_conn(smp->sess->origin);
+ SSL *ssl = ssl_sock_get_ssl_object(conn);
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = ssl && SSL_get_servername(ssl, TLSEXT_NAMETYPE_host_name) != NULL;
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+/* boolean, returns true if client session has been resumed.
+ * This function is also usable on backend conn if the fetch keyword 5th
+ * char is 'b'.
+ */
+static int
+smp_fetch_ssl_fc_is_resumed(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ ssl = ssl_sock_get_ssl_object(conn);
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = ssl && SSL_session_reused(ssl);
+ return 1;
+}
+
+/* string, returns the used cipher if front conn. transport layer is SSL.
+ * This function is also usable on backend conn if the fetch keyword 5th
+ * char is 'b'.
+ */
+static int
+smp_fetch_ssl_fc_cipher(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ smp->flags = 0;
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ smp->data.u.str.area = (char *)SSL_get_cipher_name(ssl);
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_VOL_SESS | SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+
+ return 1;
+}
+
+/* integer, returns the algoritm's keysize if front conn. transport layer
+ * is SSL.
+ * This function is also usable on backend conn if the fetch keyword 5th
+ * char is 'b'.
+ */
+static int
+smp_fetch_ssl_fc_alg_keysize(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+ int sint;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ smp->flags = 0;
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (!SSL_get_cipher_bits(ssl, &sint))
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.sint = sint;
+ smp->data.type = SMP_T_SINT;
+
+ return 1;
+}
+
+/* integer, returns the used keysize if front conn. transport layer is SSL.
+ * This function is also usable on backend conn if the fetch keyword 5th
+ * char is 'b'.
+ */
+static int
+smp_fetch_ssl_fc_use_keysize(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ smp->flags = 0;
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ smp->data.u.sint = (unsigned int)SSL_get_cipher_bits(ssl, NULL);
+ if (!smp->data.u.sint)
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_SINT;
+
+ return 1;
+}
+
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+static int
+smp_fetch_ssl_fc_npn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+ unsigned int len = 0;
+
+ smp->flags = SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str.area = NULL;
+ SSL_get0_next_proto_negotiated(ssl,
+ (const unsigned char **)&smp->data.u.str.area,
+ &len);
+
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.u.str.data = len;
+ return 1;
+}
+#endif
+
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+static int
+smp_fetch_ssl_fc_alpn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+ unsigned int len = 0;
+
+ smp->flags = SMP_F_VOL_SESS | SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ smp->data.u.str.area = NULL;
+ SSL_get0_alpn_selected(ssl,
+ (const unsigned char **)&smp->data.u.str.area,
+ &len);
+
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.u.str.data = len;
+ return 1;
+}
+#endif
+
+/* string, returns the used protocol if front conn. transport layer is SSL.
+ * This function is also usable on backend conn if the fetch keyword 5th
+ * char is 'b'.
+ */
+static int
+smp_fetch_ssl_fc_protocol(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ smp->flags = 0;
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ smp->data.u.str.area = (char *)SSL_get_version(ssl);
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_VOL_SESS | SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+
+ return 1;
+}
+
+/* binary, returns the SSL stream id if front conn. transport layer is SSL.
+ * This function is also usable on backend conn if the fetch keyword 5th
+ * char is 'b'.
+ */
+#if HA_OPENSSL_VERSION_NUMBER > 0x0090800fL
+static int
+smp_fetch_ssl_fc_session_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL_SESSION *ssl_sess;
+ SSL *ssl;
+ unsigned int len = 0;
+
+ smp->flags = SMP_F_VOL_SESS | SMP_F_CONST;
+ smp->data.type = SMP_T_BIN;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ ssl_sess = SSL_get_session(ssl);
+ if (!ssl_sess)
+ return 0;
+
+ smp->data.u.str.area = (char *)SSL_SESSION_get_id(ssl_sess, &len);
+ if (!smp->data.u.str.area || !len)
+ return 0;
+
+ smp->data.u.str.data = len;
+ return 1;
+}
+#endif
+
+
+#ifdef HAVE_SSL_EXTRACT_RANDOM
+static int
+smp_fetch_ssl_fc_random(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ struct buffer *data;
+ SSL *ssl;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ data = get_trash_chunk();
+ if (kw[7] == 'c')
+ data->data = SSL_get_client_random(ssl,
+ (unsigned char *) data->area,
+ data->size);
+ else
+ data->data = SSL_get_server_random(ssl,
+ (unsigned char *) data->area,
+ data->size);
+ if (!data->data)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_BIN;
+ smp->data.u.str = *data;
+
+ return 1;
+}
+
+static int
+smp_fetch_ssl_fc_session_key(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL_SESSION *ssl_sess;
+ struct buffer *data;
+ SSL *ssl;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ ssl_sess = SSL_get_session(ssl);
+ if (!ssl_sess)
+ return 0;
+
+ data = get_trash_chunk();
+ data->data = SSL_SESSION_get_master_key(ssl_sess,
+ (unsigned char *) data->area,
+ data->size);
+ if (!data->data)
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_BIN;
+ smp->data.u.str = *data;
+
+ return 1;
+}
+#endif
+
+static int
+smp_fetch_ssl_fc_sni(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ struct connection *conn;
+ SSL *ssl;
+
+ smp->flags = SMP_F_VOL_SESS | SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ smp->data.u.str.area = (char *)SSL_get_servername(ssl, TLSEXT_NAMETYPE_host_name);
+ if (!smp->data.u.str.area) {
+ /* We might have stored the SNI ourselves, look for it in the
+ * context's ex_data.
+ */
+ smp->data.u.str.area = SSL_get_ex_data(ssl, ssl_client_sni_index);
+
+ if (!smp->data.u.str.area)
+ return 0;
+ }
+
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+
+ return 1;
+#else
+ /* SNI not supported */
+ return 0;
+#endif
+}
+
+/* binary, returns tls client hello cipher list.
+ * Arguments: filter_option (0,1)
+ */
+static int
+smp_fetch_ssl_fc_cl_bin(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct buffer *smp_trash;
+ struct connection *conn;
+ struct ssl_capture *capture;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ capture = SSL_get_ex_data(ssl, ssl_capture_ptr_index);
+ if (!capture)
+ return 0;
+
+ if (args[0].data.sint) {
+ smp_trash = get_trash_chunk();
+ exclude_tls_grease(capture->data + capture->ciphersuite_offset, capture->ciphersuite_len, smp_trash);
+ smp->data.u.str.area = smp_trash->area;
+ smp->data.u.str.data = smp_trash->data;
+ smp->flags = SMP_F_VOL_SESS;
+ }
+ else {
+ smp->data.u.str.area = capture->data + capture->ciphersuite_offset;
+ smp->data.u.str.data = capture->ciphersuite_len;
+ smp->flags = SMP_F_VOL_TEST | SMP_F_CONST;
+ }
+
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+/* binary, returns tls client hello cipher list as hexadecimal string.
+ * Arguments: filter_option (0,1)
+ */
+static int
+smp_fetch_ssl_fc_cl_hex(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct buffer *data;
+
+ if (!smp_fetch_ssl_fc_cl_bin(args, smp, kw, private))
+ return 0;
+
+ data = get_trash_chunk();
+ dump_binary(data, smp->data.u.str.area, smp->data.u.str.data);
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_BIN;
+ smp->data.u.str = *data;
+ return 1;
+}
+
+/* integer, returns xxh64 hash of tls client hello cipher list. */
+static int
+smp_fetch_ssl_fc_cl_xxh64(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ struct ssl_capture *capture;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ capture = SSL_get_ex_data(ssl, ssl_capture_ptr_index);
+ if (!capture)
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = capture->xxh64;
+ return 1;
+}
+
+static int
+smp_fetch_ssl_fc_err(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ struct ssl_sock_ctx *ctx;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ if (!conn)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags = SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ ctx = conn_get_ssl_sock_ctx(conn);
+ if (!ctx)
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = ctx->error_code;
+ return 1;
+}
+
+static int
+smp_fetch_ssl_fc_protocol_hello_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ struct ssl_capture *capture;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ capture = SSL_get_ex_data(ssl, ssl_capture_ptr_index);
+ if (!capture)
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = capture->protocol_version;
+ return 1;
+}
+
+static int
+smp_fetch_ssl_fc_err_str(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ struct ssl_sock_ctx *ctx;
+ const char *err_code_str;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ if (!conn)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags = SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ ctx = conn_get_ssl_sock_ctx(conn);
+ if (!ctx || !ctx->error_code)
+ return 0;
+
+ err_code_str = ERR_error_string(ctx->error_code, NULL);
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = (char*)err_code_str;
+ smp->data.u.str.data = strlen(err_code_str);
+
+ return 1;
+}
+
+/* binary, returns tls client hello extensions list.
+ * Arguments: filter_option (0,1)
+ */
+static int
+smp_fetch_ssl_fc_ext_bin(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct buffer *smp_trash;
+ struct connection *conn;
+ struct ssl_capture *capture;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ capture = SSL_get_ex_data(ssl, ssl_capture_ptr_index);
+ if (!capture)
+ return 0;
+
+ if (args[0].data.sint) {
+ smp_trash = get_trash_chunk();
+ exclude_tls_grease(capture->data + capture->extensions_offset, capture->extensions_len, smp_trash);
+ smp->data.u.str.area = smp_trash->area;
+ smp->data.u.str.data = smp_trash->data;
+ smp->flags = SMP_F_VOL_SESS;
+ }
+ else {
+ smp->data.u.str.area = capture->data + capture->extensions_offset;
+ smp->data.u.str.data = capture->extensions_len;
+ smp->flags = SMP_F_VOL_TEST | SMP_F_CONST;
+ }
+
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+/* binary, returns tls client hello supported elliptic curves.
+ * Arguments: filter_option (0,1)
+ */
+static int
+smp_fetch_ssl_fc_ecl_bin(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct buffer *smp_trash;
+ struct connection *conn;
+ struct ssl_capture *capture;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ capture = SSL_get_ex_data(ssl, ssl_capture_ptr_index);
+ if (!capture)
+ return 0;
+
+ if (args[0].data.sint) {
+ smp_trash = get_trash_chunk();
+ exclude_tls_grease(capture->data + capture->ec_offset, capture->ec_len, smp_trash);
+ smp->data.u.str.area = smp_trash->area;
+ smp->data.u.str.data = smp_trash->data;
+ smp->flags = SMP_F_VOL_SESS;
+ }
+ else {
+ smp->data.u.str.area = capture->data + capture->ec_offset;
+ smp->data.u.str.data = capture->ec_len;
+ smp->flags = SMP_F_VOL_TEST | SMP_F_CONST;
+ }
+
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+/* binary, returns tls client hello supported elliptic curve point formats */
+static int
+smp_fetch_ssl_fc_ecf_bin(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ struct ssl_capture *capture;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ capture = SSL_get_ex_data(ssl, ssl_capture_ptr_index);
+ if (!capture)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST | SMP_F_CONST;
+ smp->data.type = SMP_T_BIN;
+ smp->data.u.str.area = capture->data + capture->ec_formats_offset;
+ smp->data.u.str.data = capture->ec_formats_len;
+ return 1;
+}
+
+/* Dump the SSL keylog, it only works with "tune.ssl.keylog 1" */
+#ifdef HAVE_SSL_KEYLOG
+static int smp_fetch_ssl_x_keylog(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ struct ssl_keylog *keylog;
+ SSL *ssl;
+ char *src = NULL;
+ const char *sfx;
+
+ if (global_ssl.keylog <= 0)
+ return 0;
+
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ if (!conn)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ keylog = SSL_get_ex_data(ssl, ssl_keylog_index);
+ if (!keylog)
+ return 0;
+
+ sfx = kw + strlen("ssl_xx_");
+
+ if (strcmp(sfx, "client_early_traffic_secret") == 0) {
+ src = keylog->client_early_traffic_secret;
+ } else if (strcmp(sfx, "client_handshake_traffic_secret") == 0) {
+ src = keylog->client_handshake_traffic_secret;
+ } else if (strcmp(sfx, "server_handshake_traffic_secret") == 0) {
+ src = keylog->server_handshake_traffic_secret;
+ } else if (strcmp(sfx, "client_traffic_secret_0") == 0) {
+ src = keylog->client_traffic_secret_0;
+ } else if (strcmp(sfx, "server_traffic_secret_0") == 0) {
+ src = keylog->server_traffic_secret_0;
+ } else if (strcmp(sfx, "exporter_secret") == 0) {
+ src = keylog->exporter_secret;
+ } else if (strcmp(sfx, "early_exporter_secret") == 0) {
+ src = keylog->early_exporter_secret;
+ }
+
+ if (!src || !*src)
+ return 0;
+
+ smp->data.u.str.area = src;
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_VOL_TEST | SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+ return 1;
+}
+#endif
+
+static int
+smp_fetch_ssl_fc_cl_str(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+#if defined(OPENSSL_IS_BORINGSSL) || defined(SSL_CTRL_GET_RAW_CIPHERLIST)
+ struct buffer *data;
+ int i;
+
+ if (!smp_fetch_ssl_fc_cl_bin(args, smp, kw, private))
+ return 0;
+
+ data = get_trash_chunk();
+ for (i = 0; i + 1 < smp->data.u.str.data; i += 2) {
+ const char *str;
+ const SSL_CIPHER *cipher;
+ const unsigned char *bin = (const unsigned char *) smp->data.u.str.area + i;
+ uint16_t id = (bin[0] << 8) | bin[1];
+#if defined(OPENSSL_IS_BORINGSSL)
+ cipher = SSL_get_cipher_by_value(id);
+#else
+ struct connection *conn = __objt_conn(smp->sess->origin);
+ SSL *ssl = ssl_sock_get_ssl_object(conn);
+ cipher = SSL_CIPHER_find(ssl, bin);
+#endif
+ str = SSL_CIPHER_get_name(cipher);
+ if (!str || strcmp(str, "(NONE)") == 0)
+ chunk_appendf(data, "%sUNKNOWN(%04x)", i == 0 ? "" : ",", id);
+ else
+ chunk_appendf(data, "%s%s", i == 0 ? "" : ",", str);
+ }
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *data;
+ return 1;
+#else
+ return smp_fetch_ssl_fc_cl_xxh64(args, smp, kw, private);
+#endif
+}
+
+#if HA_OPENSSL_VERSION_NUMBER > 0x0090800fL
+static int
+smp_fetch_ssl_fc_unique_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ int finished_len;
+ struct buffer *finished_trash;
+ SSL *ssl;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ smp->flags = 0;
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ finished_trash = get_trash_chunk();
+ if (!SSL_session_reused(ssl))
+ finished_len = SSL_get_peer_finished(ssl,
+ finished_trash->area,
+ finished_trash->size);
+ else
+ finished_len = SSL_get_finished(ssl,
+ finished_trash->area,
+ finished_trash->size);
+
+ if (!finished_len)
+ return 0;
+
+ finished_trash->data = finished_len;
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str = *finished_trash;
+ smp->data.type = SMP_T_BIN;
+
+ return 1;
+}
+#endif
+
+/* integer, returns the first verify error in CA chain of client certificate chain. */
+static int
+smp_fetch_ssl_c_ca_err(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn = objt_conn(smp->sess->origin);
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (conn && conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags = SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (!ctx)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = (unsigned long long int)SSL_SOCK_ST_TO_CA_ERROR(ctx->xprt_st);
+ smp->flags = SMP_F_VOL_SESS;
+
+ return 1;
+}
+
+/* integer, returns the depth of the first verify error in CA chain of client certificate chain. */
+static int
+smp_fetch_ssl_c_ca_err_depth(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn = objt_conn(smp->sess->origin);
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (conn && conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags = SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (!ctx)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = (long long int)SSL_SOCK_ST_TO_CAEDEPTH(ctx->xprt_st);
+ smp->flags = SMP_F_VOL_SESS;
+
+ return 1;
+}
+
+/* integer, returns the first verify error on client certificate */
+static int
+smp_fetch_ssl_c_err(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn = objt_conn(smp->sess->origin);
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (conn && conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags = SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (!ctx)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = (long long int)SSL_SOCK_ST_TO_CRTERROR(ctx->xprt_st);
+ smp->flags = SMP_F_VOL_SESS;
+
+ return 1;
+}
+
+/* integer, returns the verify result on client cert */
+static int
+smp_fetch_ssl_c_verify(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ smp->flags = SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = (long long int)SSL_get_verify_result(ssl);
+ smp->flags = SMP_F_VOL_SESS;
+
+ return 1;
+}
+
+/* Argument validation functions */
+
+/* This function is used to validate the arguments passed to any "x_dn" ssl
+ * keywords. These keywords support specifying a third parameter that must be
+ * either empty or the value "rfc2253". Returns 0 on error, non-zero if OK.
+ */
+int val_dnfmt(struct arg *arg, char **err_msg)
+{
+ if (arg && arg[2].type == ARGT_STR && arg[2].data.str.data > 0 && (strcmp(arg[2].data.str.area, "rfc2253") != 0)) {
+ memprintf(err_msg, "only rfc2253 or a blank value are currently supported as the format argument.");
+ return 0;
+ }
+ return 1;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "ssl_bc", smp_fetch_ssl_fc, 0, NULL, SMP_T_BOOL, SMP_USE_L5SRV },
+ { "ssl_bc_alg_keysize", smp_fetch_ssl_fc_alg_keysize, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV },
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ { "ssl_bc_alpn", smp_fetch_ssl_fc_alpn, 0, NULL, SMP_T_STR, SMP_USE_L5SRV },
+#endif
+ { "ssl_bc_cipher", smp_fetch_ssl_fc_cipher, 0, NULL, SMP_T_STR, SMP_USE_L5SRV },
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ { "ssl_bc_npn", smp_fetch_ssl_fc_npn, 0, NULL, SMP_T_STR, SMP_USE_L5SRV },
+#endif
+ { "ssl_bc_is_resumed", smp_fetch_ssl_fc_is_resumed, 0, NULL, SMP_T_BOOL, SMP_USE_L5SRV },
+ { "ssl_bc_protocol", smp_fetch_ssl_fc_protocol, 0, NULL, SMP_T_STR, SMP_USE_L5SRV },
+ { "ssl_bc_unique_id", smp_fetch_ssl_fc_unique_id, 0, NULL, SMP_T_BIN, SMP_USE_L5SRV },
+ { "ssl_bc_use_keysize", smp_fetch_ssl_fc_use_keysize, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV },
+#if HA_OPENSSL_VERSION_NUMBER > 0x0090800fL
+ { "ssl_bc_session_id", smp_fetch_ssl_fc_session_id, 0, NULL, SMP_T_BIN, SMP_USE_L5SRV },
+#endif
+#ifdef HAVE_SSL_EXTRACT_RANDOM
+ { "ssl_bc_client_random", smp_fetch_ssl_fc_random, 0, NULL, SMP_T_BIN, SMP_USE_L5SRV },
+ { "ssl_bc_server_random", smp_fetch_ssl_fc_random, 0, NULL, SMP_T_BIN, SMP_USE_L5SRV },
+ { "ssl_bc_session_key", smp_fetch_ssl_fc_session_key, 0, NULL, SMP_T_BIN, SMP_USE_L5SRV },
+#endif
+ { "ssl_bc_err", smp_fetch_ssl_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV },
+ { "ssl_bc_err_str", smp_fetch_ssl_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L5SRV },
+ { "ssl_c_ca_err", smp_fetch_ssl_c_ca_err, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_c_ca_err_depth", smp_fetch_ssl_c_ca_err_depth, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_c_der", smp_fetch_ssl_x_der, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_c_chain_der", smp_fetch_ssl_x_chain_der, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_c_err", smp_fetch_ssl_c_err, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_c_i_dn", smp_fetch_ssl_x_i_dn, ARG3(0,STR,SINT,STR),val_dnfmt, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_c_key_alg", smp_fetch_ssl_x_key_alg, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_c_notafter", smp_fetch_ssl_x_notafter, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_c_notbefore", smp_fetch_ssl_x_notbefore, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_c_sig_alg", smp_fetch_ssl_x_sig_alg, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_c_s_dn", smp_fetch_ssl_x_s_dn, ARG3(0,STR,SINT,STR),val_dnfmt, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_c_serial", smp_fetch_ssl_x_serial, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_c_sha1", smp_fetch_ssl_x_sha1, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_c_used", smp_fetch_ssl_c_used, 0, NULL, SMP_T_BOOL, SMP_USE_L5CLI },
+ { "ssl_c_verify", smp_fetch_ssl_c_verify, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_c_version", smp_fetch_ssl_x_version, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_f_der", smp_fetch_ssl_x_der, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_f_i_dn", smp_fetch_ssl_x_i_dn, ARG3(0,STR,SINT,STR),val_dnfmt, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_f_key_alg", smp_fetch_ssl_x_key_alg, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_f_notafter", smp_fetch_ssl_x_notafter, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_f_notbefore", smp_fetch_ssl_x_notbefore, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_f_sig_alg", smp_fetch_ssl_x_sig_alg, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_f_s_dn", smp_fetch_ssl_x_s_dn, ARG3(0,STR,SINT,STR),val_dnfmt, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_f_serial", smp_fetch_ssl_x_serial, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_f_sha1", smp_fetch_ssl_x_sha1, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_f_version", smp_fetch_ssl_x_version, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_fc", smp_fetch_ssl_fc, 0, NULL, SMP_T_BOOL, SMP_USE_L5CLI },
+ { "ssl_fc_alg_keysize", smp_fetch_ssl_fc_alg_keysize, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_fc_cipher", smp_fetch_ssl_fc_cipher, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_has_crt", smp_fetch_ssl_fc_has_crt, 0, NULL, SMP_T_BOOL, SMP_USE_L5CLI },
+ { "ssl_fc_has_early", smp_fetch_ssl_fc_has_early, 0, NULL, SMP_T_BOOL, SMP_USE_L5CLI },
+ { "ssl_fc_has_sni", smp_fetch_ssl_fc_has_sni, 0, NULL, SMP_T_BOOL, SMP_USE_L5CLI },
+ { "ssl_fc_is_resumed", smp_fetch_ssl_fc_is_resumed, 0, NULL, SMP_T_BOOL, SMP_USE_L5CLI },
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ { "ssl_fc_npn", smp_fetch_ssl_fc_npn, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+#endif
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ { "ssl_fc_alpn", smp_fetch_ssl_fc_alpn, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+#endif
+ { "ssl_fc_protocol", smp_fetch_ssl_fc_protocol, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+#if HA_OPENSSL_VERSION_NUMBER > 0x0090800fL
+ { "ssl_fc_unique_id", smp_fetch_ssl_fc_unique_id, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+#endif
+ { "ssl_fc_use_keysize", smp_fetch_ssl_fc_use_keysize, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+#if HA_OPENSSL_VERSION_NUMBER > 0x0090800fL
+ { "ssl_fc_session_id", smp_fetch_ssl_fc_session_id, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+#endif
+#ifdef HAVE_SSL_EXTRACT_RANDOM
+ { "ssl_fc_client_random", smp_fetch_ssl_fc_random, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_fc_server_random", smp_fetch_ssl_fc_random, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_fc_session_key", smp_fetch_ssl_fc_session_key, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+#endif
+
+#ifdef HAVE_SSL_KEYLOG
+ { "ssl_fc_client_early_traffic_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_client_handshake_traffic_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_server_handshake_traffic_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_client_traffic_secret_0", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_server_traffic_secret_0", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_exporter_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_early_exporter_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+#endif
+
+ { "ssl_fc_sni", smp_fetch_ssl_fc_sni, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_cipherlist_bin", smp_fetch_ssl_fc_cl_bin, ARG1(0,SINT), NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_cipherlist_hex", smp_fetch_ssl_fc_cl_hex, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_fc_cipherlist_str", smp_fetch_ssl_fc_cl_str, ARG1(0,SINT), NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_cipherlist_xxh", smp_fetch_ssl_fc_cl_xxh64, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_fc_err", smp_fetch_ssl_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_fc_err_str", smp_fetch_ssl_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_protocol_hello_id",smp_fetch_ssl_fc_protocol_hello_id,0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_fc_extlist_bin", smp_fetch_ssl_fc_ext_bin, ARG1(0,SINT), NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_eclist_bin", smp_fetch_ssl_fc_ecl_bin, ARG1(0,SINT), NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_ecformats_bin", smp_fetch_ssl_fc_ecf_bin, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+
+/* SSL server certificate fetches */
+ { "ssl_s_der", smp_fetch_ssl_x_der, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_s_chain_der", smp_fetch_ssl_x_chain_der, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_s_key_alg", smp_fetch_ssl_x_key_alg, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_s_notafter", smp_fetch_ssl_x_notafter, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_s_notbefore", smp_fetch_ssl_x_notbefore, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_s_sig_alg", smp_fetch_ssl_x_sig_alg, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_s_s_dn", smp_fetch_ssl_x_s_dn, ARG3(0,STR,SINT,STR),val_dnfmt, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_s_i_dn", smp_fetch_ssl_x_i_dn, ARG3(0,STR,SINT,STR),val_dnfmt, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_s_serial", smp_fetch_ssl_x_serial, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_s_sha1", smp_fetch_ssl_x_sha1, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_s_version", smp_fetch_ssl_x_version, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { NULL, NULL, 0, 0, 0 },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_conv_kw_list sample_conv_kws = {ILH, {
+ { "sha2", sample_conv_sha2, ARG1(0, SINT), smp_check_sha2, SMP_T_BIN, SMP_T_BIN },
+#ifdef EVP_CIPH_GCM_MODE
+ { "aes_gcm_dec", sample_conv_aes_gcm_dec, ARG4(4,SINT,STR,STR,STR), check_aes_gcm, SMP_T_BIN, SMP_T_BIN },
+#endif
+ { "digest", sample_conv_crypto_digest, ARG1(1,STR), check_crypto_digest, SMP_T_BIN, SMP_T_BIN },
+ { "hmac", sample_conv_crypto_hmac, ARG2(2,STR,STR), check_crypto_hmac, SMP_T_BIN, SMP_T_BIN },
+#if defined(HAVE_CRYPTO_memcmp)
+ { "secure_memcmp", sample_conv_secure_memcmp, ARG1(1,STR), smp_check_secure_memcmp, SMP_T_BIN, SMP_T_BOOL },
+#endif
+ { NULL, NULL, 0, 0, 0 },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
+
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct acl_kw_list acl_kws = {ILH, {
+ { "ssl_fc_sni_end", "ssl_fc_sni", PAT_MATCH_END },
+ { "ssl_fc_sni_reg", "ssl_fc_sni", PAT_MATCH_REG },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
diff --git a/src/ssl_sock.c b/src/ssl_sock.c
new file mode 100644
index 0000000..b2f9374
--- /dev/null
+++ b/src/ssl_sock.c
@@ -0,0 +1,8319 @@
+
+/*
+ * SSL/TLS transport layer over SOCK_STREAM sockets
+ *
+ * Copyright (C) 2012 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Acknowledgement:
+ * We'd like to specially thank the Stud project authors for a very clean
+ * and well documented code which helped us understand how the OpenSSL API
+ * ought to be used in non-blocking mode. This is one difficult part which
+ * is not easy to get from the OpenSSL doc, and reading the Stud code made
+ * it much more obvious than the examples in the OpenSSL package. Keep up
+ * the good works, guys !
+ *
+ * Stud is an extremely efficient and scalable SSL/TLS proxy which combines
+ * particularly well with haproxy. For more info about this project, visit :
+ * https://github.com/bumptech/stud
+ *
+ */
+
+/* Note: do NOT include openssl/xxx.h here, do it in openssl-compat.h */
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <netdb.h>
+#include <netinet/tcp.h>
+
+#include <import/ebpttree.h>
+#include <import/ebsttree.h>
+#include <import/lru.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/arg.h>
+#include <haproxy/base64.h>
+#include <haproxy/channel.h>
+#include <haproxy/chunk.h>
+#include <haproxy/cli.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/log.h>
+#include <haproxy/openssl-compat.h>
+#include <haproxy/pattern-t.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_tp.h>
+#include <haproxy/server.h>
+#include <haproxy/shctx.h>
+#include <haproxy/ssl_ckch.h>
+#include <haproxy/ssl_crtlist.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/ssl_utils.h>
+#include <haproxy/stats.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream-t.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/vars.h>
+#include <haproxy/xxhash.h>
+#include <haproxy/istbuf.h>
+
+
+/* ***** READ THIS before adding code here! *****
+ *
+ * Due to API incompatibilities between multiple OpenSSL versions and their
+ * derivatives, it's often tempting to add macros to (re-)define certain
+ * symbols. Please do not do this here, and do it in common/openssl-compat.h
+ * exclusively so that the whole code consistently uses the same macros.
+ *
+ * Whenever possible if a macro is missing in certain versions, it's better
+ * to conditionally define it in openssl-compat.h than using lots of ifdefs.
+ */
+
+int nb_engines = 0;
+
+static struct eb_root cert_issuer_tree = EB_ROOT; /* issuers tree from "issuers-chain-path" */
+
+struct global_ssl global_ssl = {
+#ifdef LISTEN_DEFAULT_CIPHERS
+ .listen_default_ciphers = LISTEN_DEFAULT_CIPHERS,
+#endif
+#ifdef CONNECT_DEFAULT_CIPHERS
+ .connect_default_ciphers = CONNECT_DEFAULT_CIPHERS,
+#endif
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ .listen_default_ciphersuites = LISTEN_DEFAULT_CIPHERSUITES,
+ .connect_default_ciphersuites = CONNECT_DEFAULT_CIPHERSUITES,
+#endif
+ .listen_default_ssloptions = BC_SSL_O_NONE,
+ .connect_default_ssloptions = SRV_SSL_O_NONE,
+
+ .listen_default_sslmethods.flags = MC_SSL_O_ALL,
+ .listen_default_sslmethods.min = CONF_TLSV_NONE,
+ .listen_default_sslmethods.max = CONF_TLSV_NONE,
+ .connect_default_sslmethods.flags = MC_SSL_O_ALL,
+ .connect_default_sslmethods.min = CONF_TLSV_NONE,
+ .connect_default_sslmethods.max = CONF_TLSV_NONE,
+
+#ifdef DEFAULT_SSL_MAX_RECORD
+ .max_record = DEFAULT_SSL_MAX_RECORD,
+#endif
+ .hard_max_record = 0,
+ .default_dh_param = SSL_DEFAULT_DH_PARAM,
+ .ctx_cache = DEFAULT_SSL_CTX_CACHE,
+ .capture_buffer_size = 0,
+ .extra_files = SSL_GF_ALL,
+ .extra_files_noext = 0,
+#ifdef HAVE_SSL_KEYLOG
+ .keylog = 0
+#endif
+};
+
+static BIO_METHOD *ha_meth;
+
+DECLARE_STATIC_POOL(ssl_sock_ctx_pool, "ssl_sock_ctx_pool", sizeof(struct ssl_sock_ctx));
+
+DECLARE_STATIC_POOL(ssl_sock_client_sni_pool, "ssl_sock_client_sni_pool", TLSEXT_MAXLEN_host_name + 1);
+
+/* ssl stats module */
+enum {
+ SSL_ST_SESS,
+ SSL_ST_REUSED_SESS,
+ SSL_ST_FAILED_HANDSHAKE,
+
+ SSL_ST_STATS_COUNT /* must be the last member of the enum */
+};
+
+static struct name_desc ssl_stats[] = {
+ [SSL_ST_SESS] = { .name = "ssl_sess",
+ .desc = "Total number of ssl sessions established" },
+ [SSL_ST_REUSED_SESS] = { .name = "ssl_reused_sess",
+ .desc = "Total number of ssl sessions reused" },
+ [SSL_ST_FAILED_HANDSHAKE] = { .name = "ssl_failed_handshake",
+ .desc = "Total number of failed handshake" },
+};
+
+static struct ssl_counters {
+ long long sess;
+ long long reused_sess;
+ long long failed_handshake;
+} ssl_counters;
+
+static void ssl_fill_stats(void *data, struct field *stats)
+{
+ struct ssl_counters *counters = data;
+
+ stats[SSL_ST_SESS] = mkf_u64(FN_COUNTER, counters->sess);
+ stats[SSL_ST_REUSED_SESS] = mkf_u64(FN_COUNTER, counters->reused_sess);
+ stats[SSL_ST_FAILED_HANDSHAKE] = mkf_u64(FN_COUNTER, counters->failed_handshake);
+}
+
+static struct stats_module ssl_stats_module = {
+ .name = "ssl",
+ .fill_stats = ssl_fill_stats,
+ .stats = ssl_stats,
+ .stats_count = SSL_ST_STATS_COUNT,
+ .counters = &ssl_counters,
+ .counters_size = sizeof(ssl_counters),
+ .domain_flags = MK_STATS_PROXY_DOMAIN(STATS_PX_CAP_FE|STATS_PX_CAP_LI|STATS_PX_CAP_BE|STATS_PX_CAP_SRV),
+ .clearable = 1,
+};
+
+INITCALL1(STG_REGISTER, stats_register_module, &ssl_stats_module);
+
+/* CLI context for "show tls-keys" */
+struct show_keys_ctx {
+ struct tls_keys_ref *next_ref; /* next reference to be dumped */
+ int names_only; /* non-zero = only show file names */
+ int next_index; /* next index to be dumped */
+ int dump_entries; /* dump entries also */
+ enum {
+ SHOW_KEYS_INIT = 0,
+ SHOW_KEYS_LIST,
+ SHOW_KEYS_DONE,
+ } state; /* phase of the current dump */
+};
+
+/* ssl_sock_io_cb is exported to see it resolved in "show fd" */
+struct task *ssl_sock_io_cb(struct task *, void *, unsigned int);
+static int ssl_sock_handshake(struct connection *conn, unsigned int flag);
+
+/* Methods to implement OpenSSL BIO */
+static int ha_ssl_write(BIO *h, const char *buf, int num)
+{
+ struct buffer tmpbuf;
+ struct ssl_sock_ctx *ctx;
+ int ret;
+
+ ctx = BIO_get_data(h);
+ tmpbuf.size = num;
+ tmpbuf.area = (void *)(uintptr_t)buf;
+ tmpbuf.data = num;
+ tmpbuf.head = 0;
+ ret = ctx->xprt->snd_buf(ctx->conn, ctx->xprt_ctx, &tmpbuf, num, 0);
+ if (ret == 0 && !(ctx->conn->flags & (CO_FL_ERROR | CO_FL_SOCK_WR_SH))) {
+ BIO_set_retry_write(h);
+ ret = -1;
+ } else if (ret == 0)
+ BIO_clear_retry_flags(h);
+ return ret;
+}
+
+static int ha_ssl_gets(BIO *h, char *buf, int size)
+{
+
+ return 0;
+}
+
+static int ha_ssl_puts(BIO *h, const char *str)
+{
+
+ return ha_ssl_write(h, str, strlen(str));
+}
+
+static int ha_ssl_read(BIO *h, char *buf, int size)
+{
+ struct buffer tmpbuf;
+ struct ssl_sock_ctx *ctx;
+ int ret;
+
+ ctx = BIO_get_data(h);
+ tmpbuf.size = size;
+ tmpbuf.area = buf;
+ tmpbuf.data = 0;
+ tmpbuf.head = 0;
+ ret = ctx->xprt->rcv_buf(ctx->conn, ctx->xprt_ctx, &tmpbuf, size, 0);
+ if (ret == 0 && !(ctx->conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH))) {
+ BIO_set_retry_read(h);
+ ret = -1;
+ } else if (ret == 0)
+ BIO_clear_retry_flags(h);
+
+ return ret;
+}
+
+static long ha_ssl_ctrl(BIO *h, int cmd, long arg1, void *arg2)
+{
+ int ret = 0;
+ switch (cmd) {
+ case BIO_CTRL_DUP:
+ case BIO_CTRL_FLUSH:
+ ret = 1;
+ break;
+ }
+ return ret;
+}
+
+static int ha_ssl_new(BIO *h)
+{
+ BIO_set_init(h, 1);
+ BIO_set_data(h, NULL);
+ BIO_clear_flags(h, ~0);
+ return 1;
+}
+
+static int ha_ssl_free(BIO *data)
+{
+
+ return 1;
+}
+
+
+#if defined(USE_THREAD) && (HA_OPENSSL_VERSION_NUMBER < 0x10100000L)
+
+static HA_RWLOCK_T *ssl_rwlocks;
+
+
+unsigned long ssl_id_function(void)
+{
+ return (unsigned long)tid;
+}
+
+void ssl_locking_function(int mode, int n, const char * file, int line)
+{
+ if (mode & CRYPTO_LOCK) {
+ if (mode & CRYPTO_READ)
+ HA_RWLOCK_RDLOCK(SSL_LOCK, &ssl_rwlocks[n]);
+ else
+ HA_RWLOCK_WRLOCK(SSL_LOCK, &ssl_rwlocks[n]);
+ }
+ else {
+ if (mode & CRYPTO_READ)
+ HA_RWLOCK_RDUNLOCK(SSL_LOCK, &ssl_rwlocks[n]);
+ else
+ HA_RWLOCK_WRUNLOCK(SSL_LOCK, &ssl_rwlocks[n]);
+ }
+}
+
+static int ssl_locking_init(void)
+{
+ int i;
+
+ ssl_rwlocks = malloc(sizeof(HA_RWLOCK_T)*CRYPTO_num_locks());
+ if (!ssl_rwlocks)
+ return -1;
+
+ for (i = 0 ; i < CRYPTO_num_locks() ; i++)
+ HA_RWLOCK_INIT(&ssl_rwlocks[i]);
+
+ CRYPTO_set_id_callback(ssl_id_function);
+ CRYPTO_set_locking_callback(ssl_locking_function);
+
+ return 0;
+}
+
+#endif
+
+__decl_thread(HA_SPINLOCK_T ckch_lock);
+
+
+
+/* mimic what X509_STORE_load_locations do with store_ctx */
+static int ssl_set_cert_crl_file(X509_STORE *store_ctx, char *path)
+{
+ X509_STORE *store = NULL;
+ struct cafile_entry *ca_e = ssl_store_get_cafile_entry(path, 0);
+ if (ca_e)
+ store = ca_e->ca_store;
+ if (store_ctx && store) {
+ int i;
+ X509_OBJECT *obj;
+ STACK_OF(X509_OBJECT) *objs = X509_STORE_get0_objects(store);
+ for (i = 0; i < sk_X509_OBJECT_num(objs); i++) {
+ obj = sk_X509_OBJECT_value(objs, i);
+ switch (X509_OBJECT_get_type(obj)) {
+ case X509_LU_X509:
+ X509_STORE_add_cert(store_ctx, X509_OBJECT_get0_X509(obj));
+ break;
+ case X509_LU_CRL:
+ X509_STORE_add_crl(store_ctx, X509_OBJECT_get0_X509_CRL(obj));
+ break;
+ default:
+ break;
+ }
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/* SSL_CTX_load_verify_locations substitute, internally call X509_STORE_load_locations */
+static int ssl_set_verify_locations_file(SSL_CTX *ctx, char *path)
+{
+ X509_STORE *store_ctx = SSL_CTX_get_cert_store(ctx);
+ return ssl_set_cert_crl_file(store_ctx, path);
+}
+
+/*
+ Extract CA_list from CA_file already in tree.
+ Duplicate ca_name is tracking with ebtree. It's simplify openssl compatibility.
+ Return a shared ca_list: SSL_dup_CA_list must be used before set it on SSL_CTX.
+*/
+static STACK_OF(X509_NAME)* ssl_get_client_ca_file(char *path)
+{
+ struct ebmb_node *eb;
+ struct cafile_entry *ca_e;
+
+ eb = ebst_lookup(&cafile_tree, path);
+ if (!eb)
+ return NULL;
+ ca_e = ebmb_entry(eb, struct cafile_entry, node);
+
+ if (ca_e->ca_list == NULL) {
+ int i;
+ unsigned long key;
+ struct eb_root ca_name_tree = EB_ROOT;
+ struct eb64_node *node, *back;
+ struct {
+ struct eb64_node node;
+ X509_NAME *xname;
+ } *ca_name;
+ STACK_OF(X509_OBJECT) *objs;
+ STACK_OF(X509_NAME) *skn;
+ X509 *x;
+ X509_NAME *xn;
+
+ skn = sk_X509_NAME_new_null();
+ /* take x509 from cafile_tree */
+ objs = X509_STORE_get0_objects(ca_e->ca_store);
+ for (i = 0; i < sk_X509_OBJECT_num(objs); i++) {
+ x = X509_OBJECT_get0_X509(sk_X509_OBJECT_value(objs, i));
+ if (!x)
+ continue;
+ xn = X509_get_subject_name(x);
+ if (!xn)
+ continue;
+ /* Check for duplicates. */
+ key = X509_NAME_hash(xn);
+ for (node = eb64_lookup(&ca_name_tree, key), ca_name = NULL;
+ node && ca_name == NULL;
+ node = eb64_next(node)) {
+ ca_name = container_of(node, typeof(*ca_name), node);
+ if (X509_NAME_cmp(xn, ca_name->xname) != 0)
+ ca_name = NULL;
+ }
+ /* find a duplicate */
+ if (ca_name)
+ continue;
+ ca_name = calloc(1, sizeof *ca_name);
+ xn = X509_NAME_dup(xn);
+ if (!ca_name ||
+ !xn ||
+ !sk_X509_NAME_push(skn, xn)) {
+ free(ca_name);
+ X509_NAME_free(xn);
+ sk_X509_NAME_pop_free(skn, X509_NAME_free);
+ sk_X509_NAME_free(skn);
+ skn = NULL;
+ break;
+ }
+ ca_name->node.key = key;
+ ca_name->xname = xn;
+ eb64_insert(&ca_name_tree, &ca_name->node);
+ }
+ ca_e->ca_list = skn;
+ /* remove temporary ca_name tree */
+ node = eb64_first(&ca_name_tree);
+ while (node) {
+ ca_name = container_of(node, typeof(*ca_name), node);
+ back = eb64_next(node);
+ eb64_delete(node);
+ free(ca_name);
+ node = back;
+ }
+ }
+ return ca_e->ca_list;
+}
+
+struct pool_head *pool_head_ssl_capture __read_mostly = NULL;
+int ssl_capture_ptr_index = -1;
+int ssl_app_data_index = -1;
+#ifdef USE_QUIC
+int ssl_qc_app_data_index = -1;
+#endif /* USE_QUIC */
+
+#ifdef HAVE_SSL_KEYLOG
+int ssl_keylog_index = -1;
+struct pool_head *pool_head_ssl_keylog __read_mostly = NULL;
+struct pool_head *pool_head_ssl_keylog_str __read_mostly = NULL;
+#endif
+
+int ssl_client_crt_ref_index = -1;
+
+/* Used to store the client's SNI in case of ClientHello callback error */
+int ssl_client_sni_index = -1;
+
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+struct list tlskeys_reference = LIST_HEAD_INIT(tlskeys_reference);
+#endif
+
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+unsigned int openssl_engines_initialized;
+struct list openssl_engines = LIST_HEAD_INIT(openssl_engines);
+struct ssl_engine_list {
+ struct list list;
+ ENGINE *e;
+};
+#endif
+
+#ifdef HAVE_SSL_PROVIDERS
+struct list openssl_providers = LIST_HEAD_INIT(openssl_providers);
+struct ssl_provider_list {
+ struct list list;
+ OSSL_PROVIDER *provider;
+};
+#endif
+
+#ifndef OPENSSL_NO_DH
+static int ssl_dh_ptr_index = -1;
+static HASSL_DH *global_dh = NULL;
+static HASSL_DH *local_dh_1024 = NULL;
+static HASSL_DH *local_dh_2048 = NULL;
+static HASSL_DH *local_dh_4096 = NULL;
+#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL)
+static DH *ssl_get_tmp_dh_cbk(SSL *ssl, int export, int keylen);
+#else
+static void ssl_sock_set_tmp_dh_from_pkey(SSL_CTX *ctx, EVP_PKEY *pkey);
+#endif
+#endif /* OPENSSL_NO_DH */
+
+#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES)
+/* X509V3 Extensions that will be added on generated certificates */
+#define X509V3_EXT_SIZE 5
+static char *x509v3_ext_names[X509V3_EXT_SIZE] = {
+ "basicConstraints",
+ "nsComment",
+ "subjectKeyIdentifier",
+ "authorityKeyIdentifier",
+ "keyUsage",
+};
+static char *x509v3_ext_values[X509V3_EXT_SIZE] = {
+ "CA:FALSE",
+ "\"OpenSSL Generated Certificate\"",
+ "hash",
+ "keyid,issuer:always",
+ "nonRepudiation,digitalSignature,keyEncipherment"
+};
+/* LRU cache to store generated certificate */
+static struct lru64_head *ssl_ctx_lru_tree = NULL;
+static unsigned int ssl_ctx_lru_seed = 0;
+static unsigned int ssl_ctx_serial;
+__decl_rwlock(ssl_ctx_lru_rwlock);
+
+#endif // SSL_CTRL_SET_TLSEXT_HOSTNAME
+
+/* The order here matters for picking a default context,
+ * keep the most common keytype at the bottom of the list
+ */
+const char *SSL_SOCK_KEYTYPE_NAMES[] = {
+ "dsa",
+ "ecdsa",
+ "rsa"
+};
+
+static struct shared_context *ssl_shctx = NULL; /* ssl shared session cache */
+static struct eb_root *sh_ssl_sess_tree; /* ssl shared session tree */
+
+/* Dedicated callback functions for heartbeat and clienthello.
+ */
+#ifdef TLS1_RT_HEARTBEAT
+static void ssl_sock_parse_heartbeat(struct connection *conn, int write_p, int version,
+ int content_type, const void *buf, size_t len,
+ SSL *ssl);
+#endif
+static void ssl_sock_parse_clienthello(struct connection *conn, int write_p, int version,
+ int content_type, const void *buf, size_t len,
+ SSL *ssl);
+
+#ifdef HAVE_SSL_KEYLOG
+static void ssl_init_keylog(struct connection *conn, int write_p, int version,
+ int content_type, const void *buf, size_t len,
+ SSL *ssl);
+#endif
+
+/* List head of all registered SSL/TLS protocol message callbacks. */
+struct list ssl_sock_msg_callbacks = LIST_HEAD_INIT(ssl_sock_msg_callbacks);
+
+/* Registers the function <func> in order to be called on SSL/TLS protocol
+ * message processing. It will return 0 if the function <func> is not set
+ * or if it fails to allocate memory.
+ */
+int ssl_sock_register_msg_callback(ssl_sock_msg_callback_func func)
+{
+ struct ssl_sock_msg_callback *cbk;
+
+ if (!func)
+ return 0;
+
+ cbk = calloc(1, sizeof(*cbk));
+ if (!cbk) {
+ ha_alert("out of memory in ssl_sock_register_msg_callback().\n");
+ return 0;
+ }
+
+ cbk->func = func;
+
+ LIST_APPEND(&ssl_sock_msg_callbacks, &cbk->list);
+
+ return 1;
+}
+
+/* Used to register dedicated SSL/TLS protocol message callbacks.
+ */
+static int ssl_sock_register_msg_callbacks(void)
+{
+#ifdef TLS1_RT_HEARTBEAT
+ if (!ssl_sock_register_msg_callback(ssl_sock_parse_heartbeat))
+ return ERR_ABORT;
+#endif
+ if (global_ssl.capture_buffer_size > 0) {
+ if (!ssl_sock_register_msg_callback(ssl_sock_parse_clienthello))
+ return ERR_ABORT;
+ }
+#ifdef HAVE_SSL_KEYLOG
+ if (global_ssl.keylog > 0) {
+ if (!ssl_sock_register_msg_callback(ssl_init_keylog))
+ return ERR_ABORT;
+ }
+#endif
+
+ return ERR_NONE;
+}
+
+/* Used to free all SSL/TLS protocol message callbacks that were
+ * registered by using ssl_sock_register_msg_callback().
+ */
+static void ssl_sock_unregister_msg_callbacks(void)
+{
+ struct ssl_sock_msg_callback *cbk, *cbkback;
+
+ list_for_each_entry_safe(cbk, cbkback, &ssl_sock_msg_callbacks, list) {
+ LIST_DELETE(&cbk->list);
+ free(cbk);
+ }
+}
+
+static struct ssl_sock_ctx *ssl_sock_get_ctx(struct connection *conn)
+{
+ if (!conn || conn->xprt != xprt_get(XPRT_SSL) || !conn->xprt_ctx)
+ return NULL;
+
+ return (struct ssl_sock_ctx *)conn->xprt_ctx;
+}
+
+SSL *ssl_sock_get_ssl_object(struct connection *conn)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ return ctx ? ctx->ssl : NULL;
+}
+/*
+ * This function gives the detail of the SSL error. It is used only
+ * if the debug mode and the verbose mode are activated. It dump all
+ * the SSL error until the stack was empty.
+ */
+static forceinline void ssl_sock_dump_errors(struct connection *conn,
+ struct quic_conn *qc)
+{
+ unsigned long ret;
+
+ if (unlikely(global.mode & MODE_DEBUG)) {
+ while(1) {
+ const char *func = NULL;
+ ERR_peek_error_func(&func);
+
+ ret = ERR_get_error();
+ if (ret == 0)
+ return;
+ if (conn) {
+ fprintf(stderr, "fd[%#x] OpenSSL error[0x%lx] %s: %s\n",
+ conn_fd(conn), ret,
+ func, ERR_reason_error_string(ret));
+ }
+#ifdef USE_QUIC
+ else {
+ /* TODO: we are not sure <conn> is always initialized for QUIC connections */
+ fprintf(stderr, "qc @%p OpenSSL error[0x%lx] %s: %s\n", qc, ret,
+ func, ERR_reason_error_string(ret));
+ }
+#endif
+ }
+ }
+}
+
+
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+int ssl_init_single_engine(const char *engine_id, const char *def_algorithms)
+{
+ int err_code = ERR_ABORT;
+ ENGINE *engine;
+ struct ssl_engine_list *el;
+
+ /* grab the structural reference to the engine */
+ engine = ENGINE_by_id(engine_id);
+ if (engine == NULL) {
+ ha_alert("ssl-engine %s: failed to get structural reference\n", engine_id);
+ goto fail_get;
+ }
+
+ if (!ENGINE_init(engine)) {
+ /* the engine couldn't initialise, release it */
+ ha_alert("ssl-engine %s: failed to initialize\n", engine_id);
+ goto fail_init;
+ }
+
+ if (ENGINE_set_default_string(engine, def_algorithms) == 0) {
+ ha_alert("ssl-engine %s: failed on ENGINE_set_default_string\n", engine_id);
+ goto fail_set_method;
+ }
+
+ el = calloc(1, sizeof(*el));
+ if (!el)
+ goto fail_alloc;
+ el->e = engine;
+ LIST_INSERT(&openssl_engines, &el->list);
+ nb_engines++;
+ if (global_ssl.async)
+ global.ssl_used_async_engines = nb_engines;
+ return 0;
+
+fail_alloc:
+fail_set_method:
+ /* release the functional reference from ENGINE_init() */
+ ENGINE_finish(engine);
+
+fail_init:
+ /* release the structural reference from ENGINE_by_id() */
+ ENGINE_free(engine);
+
+fail_get:
+ return err_code;
+}
+#endif
+
+#ifdef HAVE_SSL_PROVIDERS
+int ssl_init_provider(const char *provider_name)
+{
+ int err_code = ERR_ABORT;
+ struct ssl_provider_list *prov = NULL;
+
+ prov = calloc(1, sizeof(*prov));
+ if (!prov) {
+ ha_alert("ssl-provider %s: memory allocation failure\n", provider_name);
+ goto error;
+ }
+
+ if ((prov->provider = OSSL_PROVIDER_load(NULL, provider_name)) == NULL) {
+ ha_alert("ssl-provider %s: unknown provider\n", provider_name);
+ goto error;
+ }
+
+ LIST_INSERT(&openssl_providers, &prov->list);
+
+ return 0;
+
+error:
+ ha_free(&prov);
+ return err_code;
+}
+#endif /* HAVE_SSL_PROVIDERS */
+
+#ifdef SSL_MODE_ASYNC
+/*
+ * openssl async fd handler
+ */
+void ssl_async_fd_handler(int fd)
+{
+ struct ssl_sock_ctx *ctx = fdtab[fd].owner;
+
+ /* fd is an async enfine fd, we must stop
+ * to poll this fd until it is requested
+ */
+ fd_stop_recv(fd);
+ fd_cant_recv(fd);
+
+ /* crypto engine is available, let's notify the associated
+ * connection that it can pursue its processing.
+ */
+ tasklet_wakeup(ctx->wait_event.tasklet);
+}
+
+/*
+ * openssl async delayed SSL_free handler
+ */
+void ssl_async_fd_free(int fd)
+{
+ SSL *ssl = fdtab[fd].owner;
+ OSSL_ASYNC_FD all_fd[32];
+ size_t num_all_fds = 0;
+ int i;
+
+ /* We suppose that the async job for a same SSL *
+ * are serialized. So if we are awake it is
+ * because the running job has just finished
+ * and we can remove all async fds safely
+ */
+ SSL_get_all_async_fds(ssl, NULL, &num_all_fds);
+ if (num_all_fds > 32) {
+ send_log(NULL, LOG_EMERG, "haproxy: openssl returns too many async fds. It seems a bug. Process may crash\n");
+ return;
+ }
+
+ SSL_get_all_async_fds(ssl, all_fd, &num_all_fds);
+ for (i=0 ; i < num_all_fds ; i++) {
+ /* We want to remove the fd from the fdtab
+ * but we flag it to disown because the
+ * close is performed by the engine itself
+ */
+ fdtab[all_fd[i]].state |= FD_DISOWN;
+ fd_delete(all_fd[i]);
+ }
+
+ /* Now we can safely call SSL_free, no more pending job in engines */
+ SSL_free(ssl);
+ _HA_ATOMIC_DEC(&global.sslconns);
+ _HA_ATOMIC_DEC(&jobs);
+}
+/*
+ * function used to manage a returned SSL_ERROR_WANT_ASYNC
+ * and enable/disable polling for async fds
+ */
+static inline void ssl_async_process_fds(struct ssl_sock_ctx *ctx)
+{
+ OSSL_ASYNC_FD add_fd[32];
+ OSSL_ASYNC_FD del_fd[32];
+ SSL *ssl = ctx->ssl;
+ size_t num_add_fds = 0;
+ size_t num_del_fds = 0;
+ int i;
+
+ SSL_get_changed_async_fds(ssl, NULL, &num_add_fds, NULL,
+ &num_del_fds);
+ if (num_add_fds > 32 || num_del_fds > 32) {
+ send_log(NULL, LOG_EMERG, "haproxy: openssl returns too many async fds. It seems a bug. Process may crash\n");
+ return;
+ }
+
+ SSL_get_changed_async_fds(ssl, add_fd, &num_add_fds, del_fd, &num_del_fds);
+
+ /* We remove unused fds from the fdtab */
+ for (i=0 ; i < num_del_fds ; i++) {
+ /* We want to remove the fd from the fdtab
+ * but we flag it to disown because the
+ * close is performed by the engine itself
+ */
+ fdtab[del_fd[i]].state |= FD_DISOWN;
+ fd_delete(del_fd[i]);
+ }
+
+ /* We add new fds to the fdtab */
+ for (i=0 ; i < num_add_fds ; i++) {
+ fd_insert(add_fd[i], ctx, ssl_async_fd_handler, tid_bit);
+ }
+
+ num_add_fds = 0;
+ SSL_get_all_async_fds(ssl, NULL, &num_add_fds);
+ if (num_add_fds > 32) {
+ send_log(NULL, LOG_EMERG, "haproxy: openssl returns too many async fds. It seems a bug. Process may crash\n");
+ return;
+ }
+
+ /* We activate the polling for all known async fds */
+ SSL_get_all_async_fds(ssl, add_fd, &num_add_fds);
+ for (i=0 ; i < num_add_fds ; i++) {
+ fd_want_recv(add_fd[i]);
+ /* To ensure that the fd cache won't be used
+ * We'll prefer to catch a real RD event
+ * because handling an EAGAIN on this fd will
+ * result in a context switch and also
+ * some engines uses a fd in blocking mode.
+ */
+ fd_cant_recv(add_fd[i]);
+ }
+
+}
+#endif
+
+#if (defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP && !defined HAVE_ASN1_TIME_TO_TM)
+/*
+ * This function returns the number of seconds elapsed
+ * since the Epoch, 1970-01-01 00:00:00 +0000 (UTC) and the
+ * date presented un ASN1_GENERALIZEDTIME.
+ *
+ * In parsing error case, it returns -1.
+ */
+static long asn1_generalizedtime_to_epoch(ASN1_GENERALIZEDTIME *d)
+{
+ long epoch;
+ char *p, *end;
+ const unsigned short month_offset[12] = {
+ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334
+ };
+ unsigned long year, month;
+
+ if (!d || (d->type != V_ASN1_GENERALIZEDTIME)) return -1;
+
+ p = (char *)d->data;
+ end = p + d->length;
+
+ if (end - p < 4) return -1;
+ year = 1000 * (p[0] - '0') + 100 * (p[1] - '0') + 10 * (p[2] - '0') + p[3] - '0';
+ p += 4;
+ if (end - p < 2) return -1;
+ month = 10 * (p[0] - '0') + p[1] - '0';
+ if (month < 1 || month > 12) return -1;
+ /* Compute the number of seconds since 1 jan 1970 and the beginning of current month
+ We consider leap years and the current month (<marsh or not) */
+ epoch = ( ((year - 1970) * 365)
+ + ((year - (month < 3)) / 4 - (year - (month < 3)) / 100 + (year - (month < 3)) / 400)
+ - ((1970 - 1) / 4 - (1970 - 1) / 100 + (1970 - 1) / 400)
+ + month_offset[month-1]
+ ) * 24 * 60 * 60;
+ p += 2;
+ if (end - p < 2) return -1;
+ /* Add the number of seconds of completed days of current month */
+ epoch += (10 * (p[0] - '0') + p[1] - '0' - 1) * 24 * 60 * 60;
+ p += 2;
+ if (end - p < 2) return -1;
+ /* Add the completed hours of the current day */
+ epoch += (10 * (p[0] - '0') + p[1] - '0') * 60 * 60;
+ p += 2;
+ if (end - p < 2) return -1;
+ /* Add the completed minutes of the current hour */
+ epoch += (10 * (p[0] - '0') + p[1] - '0') * 60;
+ p += 2;
+ if (p == end) return -1;
+ /* Test if there is available seconds */
+ if (p[0] < '0' || p[0] > '9')
+ goto nosec;
+ if (end - p < 2) return -1;
+ /* Add the seconds of the current minute */
+ epoch += 10 * (p[0] - '0') + p[1] - '0';
+ p += 2;
+ if (p == end) return -1;
+ /* Ignore seconds float part if present */
+ if (p[0] == '.') {
+ do {
+ if (++p == end) return -1;
+ } while (p[0] >= '0' && p[0] <= '9');
+ }
+
+nosec:
+ if (p[0] == 'Z') {
+ if (end - p != 1) return -1;
+ return epoch;
+ }
+ else if (p[0] == '+') {
+ if (end - p != 5) return -1;
+ /* Apply timezone offset */
+ return epoch - ((10 * (p[1] - '0') + p[2] - '0') * 60 * 60 + (10 * (p[3] - '0') + p[4] - '0')) * 60;
+ }
+ else if (p[0] == '-') {
+ if (end - p != 5) return -1;
+ /* Apply timezone offset */
+ return epoch + ((10 * (p[1] - '0') + p[2] - '0') * 60 * 60 + (10 * (p[3] - '0') + p[4] - '0')) * 60;
+ }
+
+ return -1;
+}
+#endif
+
+#if (defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP)
+/*
+ * struct alignment works here such that the key.key is the same as key_data
+ * Do not change the placement of key_data
+ */
+struct certificate_ocsp {
+ struct ebmb_node key;
+ unsigned char key_data[OCSP_MAX_CERTID_ASN1_LENGTH];
+ unsigned int key_length;
+ struct buffer response;
+ int refcount;
+ long expire;
+};
+
+struct ocsp_cbk_arg {
+ int is_single;
+ int single_kt;
+ union {
+ struct certificate_ocsp *s_ocsp;
+ /*
+ * m_ocsp will have multiple entries dependent on key type
+ * Entry 0 - DSA
+ * Entry 1 - ECDSA
+ * Entry 2 - RSA
+ */
+ struct certificate_ocsp *m_ocsp[SSL_SOCK_NUM_KEYTYPES];
+ };
+};
+
+static struct eb_root cert_ocsp_tree = EB_ROOT_UNIQUE;
+
+/* This function starts to check if the OCSP response (in DER format) contained
+ * in chunk 'ocsp_response' is valid (else exits on error).
+ * If 'cid' is not NULL, it will be compared to the OCSP certificate ID
+ * contained in the OCSP Response and exits on error if no match.
+ * If it's a valid OCSP Response:
+ * If 'ocsp' is not NULL, the chunk is copied in the OCSP response's container
+ * pointed by 'ocsp'.
+ * If 'ocsp' is NULL, the function looks up into the OCSP response's
+ * containers tree (using as index the ASN1 form of the OCSP Certificate ID extracted
+ * from the response) and exits on error if not found. Finally, If an OCSP response is
+ * already present in the container, it will be overwritten.
+ *
+ * Note: OCSP response containing more than one OCSP Single response is not
+ * considered valid.
+ *
+ * Returns 0 on success, 1 in error case.
+ */
+static int ssl_sock_load_ocsp_response(struct buffer *ocsp_response,
+ struct certificate_ocsp *ocsp,
+ OCSP_CERTID *cid, char **err)
+{
+ OCSP_RESPONSE *resp;
+ OCSP_BASICRESP *bs = NULL;
+ OCSP_SINGLERESP *sr;
+ OCSP_CERTID *id;
+ unsigned char *p = (unsigned char *) ocsp_response->area;
+ int rc , count_sr;
+ ASN1_GENERALIZEDTIME *revtime, *thisupd, *nextupd = NULL;
+ int reason;
+ int ret = 1;
+#ifdef HAVE_ASN1_TIME_TO_TM
+ struct tm nextupd_tm = {0};
+#endif
+
+ resp = d2i_OCSP_RESPONSE(NULL, (const unsigned char **)&p,
+ ocsp_response->data);
+ if (!resp) {
+ memprintf(err, "Unable to parse OCSP response");
+ goto out;
+ }
+
+ rc = OCSP_response_status(resp);
+ if (rc != OCSP_RESPONSE_STATUS_SUCCESSFUL) {
+ memprintf(err, "OCSP response status not successful");
+ goto out;
+ }
+
+ bs = OCSP_response_get1_basic(resp);
+ if (!bs) {
+ memprintf(err, "Failed to get basic response from OCSP Response");
+ goto out;
+ }
+
+ count_sr = OCSP_resp_count(bs);
+ if (count_sr > 1) {
+ memprintf(err, "OCSP response ignored because contains multiple single responses (%d)", count_sr);
+ goto out;
+ }
+
+ sr = OCSP_resp_get0(bs, 0);
+ if (!sr) {
+ memprintf(err, "Failed to get OCSP single response");
+ goto out;
+ }
+
+ id = (OCSP_CERTID*)OCSP_SINGLERESP_get0_id(sr);
+
+ rc = OCSP_single_get0_status(sr, &reason, &revtime, &thisupd, &nextupd);
+ if (rc != V_OCSP_CERTSTATUS_GOOD && rc != V_OCSP_CERTSTATUS_REVOKED) {
+ memprintf(err, "OCSP single response: certificate status is unknown");
+ goto out;
+ }
+
+ if (!nextupd) {
+ memprintf(err, "OCSP single response: missing nextupdate");
+ goto out;
+ }
+
+ rc = OCSP_check_validity(thisupd, nextupd, OCSP_MAX_RESPONSE_TIME_SKEW, -1);
+ if (!rc) {
+ memprintf(err, "OCSP single response: no longer valid.");
+ goto out;
+ }
+
+ if (cid) {
+ if (OCSP_id_cmp(id, cid)) {
+ memprintf(err, "OCSP single response: Certificate ID does not match certificate and issuer");
+ goto out;
+ }
+ }
+
+ if (!ocsp) {
+ unsigned char key[OCSP_MAX_CERTID_ASN1_LENGTH];
+ unsigned char *p;
+
+ rc = i2d_OCSP_CERTID(id, NULL);
+ if (!rc) {
+ memprintf(err, "OCSP single response: Unable to encode Certificate ID");
+ goto out;
+ }
+
+ if (rc > OCSP_MAX_CERTID_ASN1_LENGTH) {
+ memprintf(err, "OCSP single response: Certificate ID too long");
+ goto out;
+ }
+
+ p = key;
+ memset(key, 0, OCSP_MAX_CERTID_ASN1_LENGTH);
+ i2d_OCSP_CERTID(id, &p);
+ ocsp = (struct certificate_ocsp *)ebmb_lookup(&cert_ocsp_tree, key, OCSP_MAX_CERTID_ASN1_LENGTH);
+ if (!ocsp) {
+ memprintf(err, "OCSP single response: Certificate ID does not match any certificate or issuer");
+ goto out;
+ }
+ }
+
+ /* According to comments on "chunk_dup", the
+ previous chunk buffer will be freed */
+ if (!chunk_dup(&ocsp->response, ocsp_response)) {
+ memprintf(err, "OCSP response: Memory allocation error");
+ goto out;
+ }
+
+#ifdef HAVE_ASN1_TIME_TO_TM
+ if (ASN1_TIME_to_tm(nextupd, &nextupd_tm) == 0) {
+ memprintf(err, "OCSP single response: Invalid \"Next Update\" time");
+ goto out;
+ }
+ ocsp->expire = my_timegm(&nextupd_tm) - OCSP_MAX_RESPONSE_TIME_SKEW;
+#else
+ ocsp->expire = asn1_generalizedtime_to_epoch(nextupd) - OCSP_MAX_RESPONSE_TIME_SKEW;
+ if (ocsp->expire < 0) {
+ memprintf(err, "OCSP single response: Invalid \"Next Update\" time");
+ goto out;
+ }
+#endif
+
+ ret = 0;
+out:
+ ERR_clear_error();
+
+ if (bs)
+ OCSP_BASICRESP_free(bs);
+
+ if (resp)
+ OCSP_RESPONSE_free(resp);
+
+ return ret;
+}
+/*
+ * External function use to update the OCSP response in the OCSP response's
+ * containers tree. The chunk 'ocsp_response' must contain the OCSP response
+ * to update in DER format.
+ *
+ * Returns 0 on success, 1 in error case.
+ */
+int ssl_sock_update_ocsp_response(struct buffer *ocsp_response, char **err)
+{
+ return ssl_sock_load_ocsp_response(ocsp_response, NULL, NULL, err);
+}
+
+#endif
+
+
+/*
+ * Initialize an HMAC context <hctx> using the <key> and <md> parameters.
+ * Returns -1 in case of error, 1 otherwise.
+ */
+static int ssl_hmac_init(MAC_CTX *hctx, unsigned char *key, int key_len, const EVP_MD *md)
+{
+#ifdef HAVE_OSSL_PARAM
+ OSSL_PARAM params[3];
+
+ params[0] = OSSL_PARAM_construct_octet_string(OSSL_MAC_PARAM_KEY, key, key_len);
+ params[1] = OSSL_PARAM_construct_utf8_string(OSSL_MAC_PARAM_DIGEST, (char*)EVP_MD_name(md), 0);
+ params[2] = OSSL_PARAM_construct_end();
+ if (EVP_MAC_CTX_set_params(hctx, params) == 0)
+ return -1; /* error in mac initialisation */
+
+#else
+ HMAC_Init_ex(hctx, key, key_len, md, NULL);
+#endif
+ return 1;
+}
+
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+
+static int ssl_tlsext_ticket_key_cb(SSL *s, unsigned char key_name[16], unsigned char *iv, EVP_CIPHER_CTX *ectx, MAC_CTX *hctx, int enc)
+{
+ struct tls_keys_ref *ref = NULL;
+ union tls_sess_key *keys;
+ int head;
+ int i;
+ int ret = -1; /* error by default */
+ struct connection *conn = SSL_get_ex_data(s, ssl_app_data_index);
+#ifdef USE_QUIC
+ struct quic_conn *qc = SSL_get_ex_data(s, ssl_qc_app_data_index);
+#endif
+
+ if (conn)
+ ref = __objt_listener(conn->target)->bind_conf->keys_ref;
+#ifdef USE_QUIC
+ else if (qc)
+ ref = qc->li->bind_conf->keys_ref;
+#endif
+
+ if (!ref) {
+ /* must never happen */
+ ABORT_NOW();
+ }
+
+ HA_RWLOCK_RDLOCK(TLSKEYS_REF_LOCK, &ref->lock);
+
+ keys = ref->tlskeys;
+ head = ref->tls_ticket_enc_index;
+
+ if (enc) {
+ memcpy(key_name, keys[head].name, 16);
+
+ if(!RAND_pseudo_bytes(iv, EVP_MAX_IV_LENGTH))
+ goto end;
+
+ if (ref->key_size_bits == 128) {
+
+ if(!EVP_EncryptInit_ex(ectx, EVP_aes_128_cbc(), NULL, keys[head].key_128.aes_key, iv))
+ goto end;
+
+ if (ssl_hmac_init(hctx, keys[head].key_128.hmac_key, 16, TLS_TICKET_HASH_FUNCT()) < 0)
+ goto end;
+ ret = 1;
+ }
+ else if (ref->key_size_bits == 256 ) {
+
+ if(!EVP_EncryptInit_ex(ectx, EVP_aes_256_cbc(), NULL, keys[head].key_256.aes_key, iv))
+ goto end;
+
+ if (ssl_hmac_init(hctx, keys[head].key_256.hmac_key, 32, TLS_TICKET_HASH_FUNCT()) < 0)
+ goto end;
+ ret = 1;
+ }
+ } else {
+ for (i = 0; i < TLS_TICKETS_NO; i++) {
+ if (!memcmp(key_name, keys[(head + i) % TLS_TICKETS_NO].name, 16))
+ goto found;
+ }
+ ret = 0;
+ goto end;
+
+ found:
+ if (ref->key_size_bits == 128) {
+ if (ssl_hmac_init(hctx, keys[(head + i) % TLS_TICKETS_NO].key_128.hmac_key, 16, TLS_TICKET_HASH_FUNCT()) < 0)
+ goto end;
+ if(!EVP_DecryptInit_ex(ectx, EVP_aes_128_cbc(), NULL, keys[(head + i) % TLS_TICKETS_NO].key_128.aes_key, iv))
+ goto end;
+ /* 2 for key renewal, 1 if current key is still valid */
+ ret = i ? 2 : 1;
+ }
+ else if (ref->key_size_bits == 256) {
+ if (ssl_hmac_init(hctx, keys[(head + i) % TLS_TICKETS_NO].key_256.hmac_key, 32, TLS_TICKET_HASH_FUNCT()) < 0)
+ goto end;
+ if(!EVP_DecryptInit_ex(ectx, EVP_aes_256_cbc(), NULL, keys[(head + i) % TLS_TICKETS_NO].key_256.aes_key, iv))
+ goto end;
+ /* 2 for key renewal, 1 if current key is still valid */
+ ret = i ? 2 : 1;
+ }
+ }
+
+ end:
+ HA_RWLOCK_RDUNLOCK(TLSKEYS_REF_LOCK, &ref->lock);
+ return ret;
+}
+
+struct tls_keys_ref *tlskeys_ref_lookup(const char *filename)
+{
+ struct tls_keys_ref *ref;
+
+ list_for_each_entry(ref, &tlskeys_reference, list)
+ if (ref->filename && strcmp(filename, ref->filename) == 0)
+ return ref;
+ return NULL;
+}
+
+struct tls_keys_ref *tlskeys_ref_lookupid(int unique_id)
+{
+ struct tls_keys_ref *ref;
+
+ list_for_each_entry(ref, &tlskeys_reference, list)
+ if (ref->unique_id == unique_id)
+ return ref;
+ return NULL;
+}
+
+/* Update the key into ref: if keysize doesn't
+ * match existing ones, this function returns -1
+ * else it returns 0 on success.
+ */
+int ssl_sock_update_tlskey_ref(struct tls_keys_ref *ref,
+ struct buffer *tlskey)
+{
+ if (ref->key_size_bits == 128) {
+ if (tlskey->data != sizeof(struct tls_sess_key_128))
+ return -1;
+ }
+ else if (ref->key_size_bits == 256) {
+ if (tlskey->data != sizeof(struct tls_sess_key_256))
+ return -1;
+ }
+ else
+ return -1;
+
+ HA_RWLOCK_WRLOCK(TLSKEYS_REF_LOCK, &ref->lock);
+ memcpy((char *) (ref->tlskeys + ((ref->tls_ticket_enc_index + 2) % TLS_TICKETS_NO)),
+ tlskey->area, tlskey->data);
+ ref->tls_ticket_enc_index = (ref->tls_ticket_enc_index + 1) % TLS_TICKETS_NO;
+ HA_RWLOCK_WRUNLOCK(TLSKEYS_REF_LOCK, &ref->lock);
+
+ return 0;
+}
+
+int ssl_sock_update_tlskey(char *filename, struct buffer *tlskey, char **err)
+{
+ struct tls_keys_ref *ref = tlskeys_ref_lookup(filename);
+
+ if(!ref) {
+ memprintf(err, "Unable to locate the referenced filename: %s", filename);
+ return 1;
+ }
+ if (ssl_sock_update_tlskey_ref(ref, tlskey) < 0) {
+ memprintf(err, "Invalid key size");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* This function finalize the configuration parsing. Its set all the
+ * automatic ids. It's called just after the basic checks. It returns
+ * 0 on success otherwise ERR_*.
+ */
+static int tlskeys_finalize_config(void)
+{
+ int i = 0;
+ struct tls_keys_ref *ref, *ref2, *ref3;
+ struct list tkr = LIST_HEAD_INIT(tkr);
+
+ list_for_each_entry(ref, &tlskeys_reference, list) {
+ if (ref->unique_id == -1) {
+ /* Look for the first free id. */
+ while (1) {
+ list_for_each_entry(ref2, &tlskeys_reference, list) {
+ if (ref2->unique_id == i) {
+ i++;
+ break;
+ }
+ }
+ if (&ref2->list == &tlskeys_reference)
+ break;
+ }
+
+ /* Uses the unique id and increment it for the next entry. */
+ ref->unique_id = i;
+ i++;
+ }
+ }
+
+ /* This sort the reference list by id. */
+ list_for_each_entry_safe(ref, ref2, &tlskeys_reference, list) {
+ LIST_DELETE(&ref->list);
+ list_for_each_entry(ref3, &tkr, list) {
+ if (ref->unique_id < ref3->unique_id) {
+ LIST_APPEND(&ref3->list, &ref->list);
+ break;
+ }
+ }
+ if (&ref3->list == &tkr)
+ LIST_APPEND(&tkr, &ref->list);
+ }
+
+ /* swap root */
+ LIST_INSERT(&tkr, &tlskeys_reference);
+ LIST_DELETE(&tkr);
+ return ERR_NONE;
+}
+#endif /* SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB */
+
+#ifndef OPENSSL_NO_OCSP
+int ocsp_ex_index = -1;
+
+int ssl_sock_get_ocsp_arg_kt_index(int evp_keytype)
+{
+ switch (evp_keytype) {
+ case EVP_PKEY_RSA:
+ return 2;
+ case EVP_PKEY_DSA:
+ return 0;
+ case EVP_PKEY_EC:
+ return 1;
+ }
+
+ return -1;
+}
+
+/*
+ * Callback used to set OCSP status extension content in server hello.
+ */
+int ssl_sock_ocsp_stapling_cbk(SSL *ssl, void *arg)
+{
+ struct certificate_ocsp *ocsp;
+ struct ocsp_cbk_arg *ocsp_arg;
+ char *ssl_buf;
+ SSL_CTX *ctx;
+ EVP_PKEY *ssl_pkey;
+ int key_type;
+ int index;
+
+ ctx = SSL_get_SSL_CTX(ssl);
+ if (!ctx)
+ return SSL_TLSEXT_ERR_NOACK;
+
+ ocsp_arg = SSL_CTX_get_ex_data(ctx, ocsp_ex_index);
+ if (!ocsp_arg)
+ return SSL_TLSEXT_ERR_NOACK;
+
+ ssl_pkey = SSL_get_privatekey(ssl);
+ if (!ssl_pkey)
+ return SSL_TLSEXT_ERR_NOACK;
+
+ key_type = EVP_PKEY_base_id(ssl_pkey);
+
+ if (ocsp_arg->is_single && ocsp_arg->single_kt == key_type)
+ ocsp = ocsp_arg->s_ocsp;
+ else {
+ /* For multiple certs per context, we have to find the correct OCSP response based on
+ * the certificate type
+ */
+ index = ssl_sock_get_ocsp_arg_kt_index(key_type);
+
+ if (index < 0)
+ return SSL_TLSEXT_ERR_NOACK;
+
+ ocsp = ocsp_arg->m_ocsp[index];
+
+ }
+
+ if (!ocsp ||
+ !ocsp->response.area ||
+ !ocsp->response.data ||
+ (ocsp->expire < date.tv_sec))
+ return SSL_TLSEXT_ERR_NOACK;
+
+ ssl_buf = OPENSSL_malloc(ocsp->response.data);
+ if (!ssl_buf)
+ return SSL_TLSEXT_ERR_NOACK;
+
+ memcpy(ssl_buf, ocsp->response.area, ocsp->response.data);
+ SSL_set_tlsext_status_ocsp_resp(ssl, ssl_buf, ocsp->response.data);
+
+ return SSL_TLSEXT_ERR_OK;
+}
+
+#endif
+
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+
+
+/*
+ * Decrease the refcount of the struct ocsp_response and frees it if it's not
+ * used anymore. Also removes it from the tree if free'd.
+ */
+static void ssl_sock_free_ocsp(struct certificate_ocsp *ocsp)
+{
+ if (!ocsp)
+ return;
+
+ ocsp->refcount--;
+ if (ocsp->refcount <= 0) {
+ ebmb_delete(&ocsp->key);
+ chunk_destroy(&ocsp->response);
+ free(ocsp);
+ }
+}
+
+
+/*
+ * This function enables the handling of OCSP status extension on 'ctx' if a
+ * ocsp_response buffer was found in the cert_key_and_chain. To enable OCSP
+ * status extension, the issuer's certificate is mandatory. It should be
+ * present in ckch->ocsp_issuer.
+ *
+ * In addition, the ckch->ocsp_reponse buffer is loaded as a DER format of an
+ * OCSP response. If file is empty or content is not a valid OCSP response,
+ * OCSP status extension is enabled but OCSP response is ignored (a warning is
+ * displayed).
+ *
+ * Returns 1 if no ".ocsp" file found, 0 if OCSP status extension is
+ * successfully enabled, or -1 in other error case.
+ */
+static int ssl_sock_load_ocsp(SSL_CTX *ctx, const struct cert_key_and_chain *ckch, STACK_OF(X509) *chain)
+{
+ X509 *x, *issuer;
+ OCSP_CERTID *cid = NULL;
+ int i, ret = -1;
+ struct certificate_ocsp *ocsp = NULL, *iocsp;
+ char *warn = NULL;
+ unsigned char *p;
+ void (*callback) (void);
+
+
+ x = ckch->cert;
+ if (!x)
+ goto out;
+
+ issuer = ckch->ocsp_issuer;
+ /* take issuer from chain over ocsp_issuer, is what is done historicaly */
+ if (chain) {
+ /* check if one of the certificate of the chain is the issuer */
+ for (i = 0; i < sk_X509_num(chain); i++) {
+ X509 *ti = sk_X509_value(chain, i);
+ if (X509_check_issued(ti, x) == X509_V_OK) {
+ issuer = ti;
+ break;
+ }
+ }
+ }
+ if (!issuer)
+ goto out;
+
+ cid = OCSP_cert_to_id(0, x, issuer);
+ if (!cid)
+ goto out;
+
+ i = i2d_OCSP_CERTID(cid, NULL);
+ if (!i || (i > OCSP_MAX_CERTID_ASN1_LENGTH))
+ goto out;
+
+ ocsp = calloc(1, sizeof(*ocsp));
+ if (!ocsp)
+ goto out;
+
+ p = ocsp->key_data;
+ ocsp->key_length = i2d_OCSP_CERTID(cid, &p);
+
+ iocsp = (struct certificate_ocsp *)ebmb_insert(&cert_ocsp_tree, &ocsp->key, OCSP_MAX_CERTID_ASN1_LENGTH);
+ if (iocsp == ocsp)
+ ocsp = NULL;
+
+#ifndef SSL_CTX_get_tlsext_status_cb
+# define SSL_CTX_get_tlsext_status_cb(ctx, cb) \
+ *cb = (void (*) (void))ctx->tlsext_status_cb;
+#endif
+ SSL_CTX_get_tlsext_status_cb(ctx, &callback);
+
+ if (!callback) {
+ struct ocsp_cbk_arg *cb_arg;
+ EVP_PKEY *pkey;
+
+ cb_arg = calloc(1, sizeof(*cb_arg));
+ if (!cb_arg)
+ goto out;
+
+ cb_arg->is_single = 1;
+ cb_arg->s_ocsp = iocsp;
+ iocsp->refcount++;
+
+ pkey = X509_get_pubkey(x);
+ cb_arg->single_kt = EVP_PKEY_base_id(pkey);
+ EVP_PKEY_free(pkey);
+
+ SSL_CTX_set_tlsext_status_cb(ctx, ssl_sock_ocsp_stapling_cbk);
+ SSL_CTX_set_ex_data(ctx, ocsp_ex_index, cb_arg); /* we use the ex_data instead of the cb_arg function here, so we can use the cleanup callback to free */
+
+ } else {
+ /*
+ * If the ctx has a status CB, then we have previously set an OCSP staple for this ctx
+ * Update that cb_arg with the new cert's staple
+ */
+ struct ocsp_cbk_arg *cb_arg;
+ struct certificate_ocsp *tmp_ocsp;
+ int index;
+ int key_type;
+ EVP_PKEY *pkey;
+
+ cb_arg = SSL_CTX_get_ex_data(ctx, ocsp_ex_index);
+
+ /*
+ * The following few lines will convert cb_arg from a single ocsp to multi ocsp
+ * the order of operations below matter, take care when changing it
+ */
+ tmp_ocsp = cb_arg->s_ocsp;
+ index = ssl_sock_get_ocsp_arg_kt_index(cb_arg->single_kt);
+ cb_arg->s_ocsp = NULL;
+ cb_arg->m_ocsp[index] = tmp_ocsp;
+ cb_arg->is_single = 0;
+ cb_arg->single_kt = 0;
+
+ pkey = X509_get_pubkey(x);
+ key_type = EVP_PKEY_base_id(pkey);
+ EVP_PKEY_free(pkey);
+
+ index = ssl_sock_get_ocsp_arg_kt_index(key_type);
+ if (index >= 0 && !cb_arg->m_ocsp[index]) {
+ cb_arg->m_ocsp[index] = iocsp;
+ iocsp->refcount++;
+ }
+ }
+
+ ret = 0;
+
+ warn = NULL;
+ if (ssl_sock_load_ocsp_response(ckch->ocsp_response, iocsp, cid, &warn)) {
+ memprintf(&warn, "Loading: %s. Content will be ignored", warn ? warn : "failure");
+ ha_warning("%s.\n", warn);
+ }
+
+out:
+ if (cid)
+ OCSP_CERTID_free(cid);
+
+ if (ocsp)
+ ssl_sock_free_ocsp(ocsp);
+
+ if (warn)
+ free(warn);
+
+ return ret;
+}
+#endif
+
+#ifdef OPENSSL_IS_BORINGSSL
+static int ssl_sock_load_ocsp(SSL_CTX *ctx, const struct cert_key_and_chain *ckch, STACK_OF(X509) *chain)
+{
+ return SSL_CTX_set_ocsp_response(ctx, (const uint8_t *)ckch->ocsp_response->area, ckch->ocsp_response->data);
+}
+#endif
+
+
+#ifdef HAVE_SSL_CTX_ADD_SERVER_CUSTOM_EXT
+
+#define CT_EXTENSION_TYPE 18
+
+int sctl_ex_index = -1;
+
+int ssl_sock_sctl_add_cbk(SSL *ssl, unsigned ext_type, const unsigned char **out, size_t *outlen, int *al, void *add_arg)
+{
+ struct buffer *sctl = add_arg;
+
+ *out = (unsigned char *) sctl->area;
+ *outlen = sctl->data;
+
+ return 1;
+}
+
+int ssl_sock_sctl_parse_cbk(SSL *s, unsigned int ext_type, const unsigned char *in, size_t inlen, int *al, void *parse_arg)
+{
+ return 1;
+}
+
+static int ssl_sock_load_sctl(SSL_CTX *ctx, struct buffer *sctl)
+{
+ int ret = -1;
+
+ if (!SSL_CTX_add_server_custom_ext(ctx, CT_EXTENSION_TYPE, ssl_sock_sctl_add_cbk, NULL, sctl, ssl_sock_sctl_parse_cbk, NULL))
+ goto out;
+
+ SSL_CTX_set_ex_data(ctx, sctl_ex_index, sctl);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+#endif
+
+void ssl_sock_infocbk(const SSL *ssl, int where, int ret)
+{
+ struct connection *conn = SSL_get_ex_data(ssl, ssl_app_data_index);
+#ifdef USE_QUIC
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+#endif /* USE_QUIC */
+ struct ssl_sock_ctx *ctx = NULL;
+
+ BIO *write_bio;
+ (void)ret; /* shut gcc stupid warning */
+
+ if (conn)
+ ctx = conn_get_ssl_sock_ctx(conn);
+#ifdef USE_QUIC
+ else if (qc)
+ ctx = qc->xprt_ctx;
+#endif /* USE_QUIC */
+
+ if (!ctx) {
+ /* must never happen */
+ ABORT_NOW();
+ return;
+ }
+
+#ifndef SSL_OP_NO_RENEGOTIATION
+ /* Please note that BoringSSL defines this macro to zero so don't
+ * change this to #if and do not assign a default value to this macro!
+ */
+ if (where & SSL_CB_HANDSHAKE_START) {
+ /* Disable renegotiation (CVE-2009-3555) */
+ if (conn && (conn->flags & (CO_FL_WAIT_L6_CONN | CO_FL_EARLY_SSL_HS | CO_FL_EARLY_DATA)) == 0) {
+ conn->flags |= CO_FL_ERROR;
+ conn->err_code = CO_ER_SSL_RENEG;
+ }
+ }
+#endif
+
+ if ((where & SSL_CB_ACCEPT_LOOP) == SSL_CB_ACCEPT_LOOP) {
+ if (!(ctx->xprt_st & SSL_SOCK_ST_FL_16K_WBFSIZE)) {
+ /* Long certificate chains optimz
+ If write and read bios are different, we
+ consider that the buffering was activated,
+ so we rise the output buffer size from 4k
+ to 16k */
+ write_bio = SSL_get_wbio(ssl);
+ if (write_bio != SSL_get_rbio(ssl)) {
+ BIO_set_write_buffer_size(write_bio, 16384);
+ ctx->xprt_st |= SSL_SOCK_ST_FL_16K_WBFSIZE;
+ }
+ }
+ }
+}
+
+/* Callback is called for each certificate of the chain during a verify
+ ok is set to 1 if preverify detect no error on current certificate.
+ Returns 0 to break the handshake, 1 otherwise. */
+int ssl_sock_bind_verifycbk(int ok, X509_STORE_CTX *x_store)
+{
+ SSL *ssl;
+ struct connection *conn;
+ struct ssl_sock_ctx *ctx = NULL;
+ int err, depth;
+ X509 *client_crt;
+ STACK_OF(X509) *certs;
+ struct bind_conf *bind_conf = NULL;
+ struct quic_conn *qc = NULL;
+
+ ssl = X509_STORE_CTX_get_ex_data(x_store, SSL_get_ex_data_X509_STORE_CTX_idx());
+ conn = SSL_get_ex_data(ssl, ssl_app_data_index);
+ client_crt = SSL_get_ex_data(ssl, ssl_client_crt_ref_index);
+
+ if (conn) {
+ bind_conf = __objt_listener(conn->target)->bind_conf;
+ ctx = __conn_get_ssl_sock_ctx(conn);
+ }
+#ifdef USE_QUIC
+ else {
+ qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+ BUG_ON(!qc); /* Must never happen */
+ bind_conf = qc->li->bind_conf;
+ ctx = qc->xprt_ctx;
+ }
+#endif
+
+ BUG_ON(!ctx || !bind_conf);
+ ALREADY_CHECKED(ctx);
+ ALREADY_CHECKED(bind_conf);
+
+ ctx->xprt_st |= SSL_SOCK_ST_FL_VERIFY_DONE;
+
+ depth = X509_STORE_CTX_get_error_depth(x_store);
+ err = X509_STORE_CTX_get_error(x_store);
+
+ if (ok) /* no errors */
+ return ok;
+
+ /* Keep a reference to the client's certificate in order to be able to
+ * dump some fetches values in a log even when the verification process
+ * fails. */
+ if (depth == 0) {
+ X509_free(client_crt);
+ client_crt = X509_STORE_CTX_get0_cert(x_store);
+ if (client_crt) {
+ X509_up_ref(client_crt);
+ SSL_set_ex_data(ssl, ssl_client_crt_ref_index, client_crt);
+ }
+ }
+ else {
+ /* An error occurred on a CA certificate of the certificate
+ * chain, we might never call this verify callback on the client
+ * certificate's depth (which is 0) so we try to store the
+ * reference right now. */
+ certs = X509_STORE_CTX_get1_chain(x_store);
+ if (certs) {
+ client_crt = sk_X509_value(certs, 0);
+ if (client_crt) {
+ X509_up_ref(client_crt);
+ SSL_set_ex_data(ssl, ssl_client_crt_ref_index, client_crt);
+ }
+ sk_X509_pop_free(certs, X509_free);
+ }
+ }
+
+ /* check if CA error needs to be ignored */
+ if (depth > 0) {
+ if (!SSL_SOCK_ST_TO_CA_ERROR(ctx->xprt_st)) {
+ ctx->xprt_st |= SSL_SOCK_CA_ERROR_TO_ST(err);
+ ctx->xprt_st |= SSL_SOCK_CAEDEPTH_TO_ST(depth);
+ }
+
+ if (err <= SSL_MAX_VFY_ERROR_CODE &&
+ cert_ignerr_bitfield_get(__objt_listener(conn->target)->bind_conf->ca_ignerr_bitfield, err))
+ goto err_ignored;
+
+ /* TODO: for QUIC connection, this error code is lost */
+ if (conn)
+ conn->err_code = CO_ER_SSL_CA_FAIL;
+ return 0;
+ }
+
+ if (!SSL_SOCK_ST_TO_CRTERROR(ctx->xprt_st))
+ ctx->xprt_st |= SSL_SOCK_CRTERROR_TO_ST(err);
+
+ /* check if certificate error needs to be ignored */
+ if (err <= SSL_MAX_VFY_ERROR_CODE &&
+ cert_ignerr_bitfield_get(__objt_listener(conn->target)->bind_conf->crt_ignerr_bitfield, err))
+ goto err_ignored;
+
+ /* TODO: for QUIC connection, this error code is lost */
+ if (conn)
+ conn->err_code = CO_ER_SSL_CRT_FAIL;
+ return 0;
+
+ err_ignored:
+ ssl_sock_dump_errors(conn, qc);
+ ERR_clear_error();
+ return 1;
+}
+
+#ifdef TLS1_RT_HEARTBEAT
+static void ssl_sock_parse_heartbeat(struct connection *conn, int write_p, int version,
+ int content_type, const void *buf, size_t len,
+ SSL *ssl)
+{
+ /* test heartbeat received (write_p is set to 0
+ for a received record) */
+ if ((content_type == TLS1_RT_HEARTBEAT) && (write_p == 0)) {
+ struct ssl_sock_ctx *ctx = __conn_get_ssl_sock_ctx(conn);
+ const unsigned char *p = buf;
+ unsigned int payload;
+
+ ctx->xprt_st |= SSL_SOCK_RECV_HEARTBEAT;
+
+ /* Check if this is a CVE-2014-0160 exploitation attempt. */
+ if (*p != TLS1_HB_REQUEST)
+ return;
+
+ if (len < 1 + 2 + 16) /* 1 type + 2 size + 0 payload + 16 padding */
+ goto kill_it;
+
+ payload = (p[1] * 256) + p[2];
+ if (3 + payload + 16 <= len)
+ return; /* OK no problem */
+ kill_it:
+ /* We have a clear heartbleed attack (CVE-2014-0160), the
+ * advertised payload is larger than the advertised packet
+ * length, so we have garbage in the buffer between the
+ * payload and the end of the buffer (p+len). We can't know
+ * if the SSL stack is patched, and we don't know if we can
+ * safely wipe out the area between p+3+len and payload.
+ * So instead, we prevent the response from being sent by
+ * setting the max_send_fragment to 0 and we report an SSL
+ * error, which will kill this connection. It will be reported
+ * above as SSL_ERROR_SSL while an other handshake failure with
+ * a heartbeat message will be reported as SSL_ERROR_SYSCALL.
+ */
+ ssl->max_send_fragment = 0;
+ SSLerr(SSL_F_TLS1_HEARTBEAT, SSL_R_SSL_HANDSHAKE_FAILURE);
+ }
+}
+#endif
+
+static void ssl_sock_parse_clienthello(struct connection *conn, int write_p, int version,
+ int content_type, const void *buf, size_t len,
+ SSL *ssl)
+{
+ struct ssl_capture *capture;
+ uchar *msg;
+ uchar *end;
+ uchar *extensions_end;
+ uchar *ec_start = NULL;
+ uchar *ec_formats_start = NULL;
+ uchar *list_end;
+ ushort protocol_version;
+ ushort extension_id;
+ ushort ec_len = 0;
+ uchar ec_formats_len = 0;
+ int offset = 0;
+ int rec_len;
+
+ /* This function is called for "from client" and "to server"
+ * connections. The combination of write_p == 0 and content_type == 22
+ * is only available during "from client" connection.
+ */
+
+ /* "write_p" is set to 0 is the bytes are received messages,
+ * otherwise it is set to 1.
+ */
+ if (write_p != 0)
+ return;
+
+ /* content_type contains the type of message received or sent
+ * according with the SSL/TLS protocol spec. This message is
+ * encoded with one byte. The value 256 (two bytes) is used
+ * for designing the SSL/TLS record layer. According with the
+ * rfc6101, the expected message (other than 256) are:
+ * - change_cipher_spec(20)
+ * - alert(21)
+ * - handshake(22)
+ * - application_data(23)
+ * - (255)
+ * We are interessed by the handshake and specially the client
+ * hello.
+ */
+ if (content_type != 22)
+ return;
+
+ /* The message length is at least 4 bytes, containing the
+ * message type and the message length.
+ */
+ if (len < 4)
+ return;
+
+ /* First byte of the handshake message id the type of
+ * message. The known types are:
+ * - hello_request(0)
+ * - client_hello(1)
+ * - server_hello(2)
+ * - certificate(11)
+ * - server_key_exchange (12)
+ * - certificate_request(13)
+ * - server_hello_done(14)
+ * We are interested by the client hello.
+ */
+ msg = (unsigned char *)buf;
+ if (msg[0] != 1)
+ return;
+
+ /* Next three bytes are the length of the message. The total length
+ * must be this decoded length + 4. If the length given as argument
+ * is not the same, we abort the protocol dissector.
+ */
+ rec_len = (msg[1] << 16) + (msg[2] << 8) + msg[3];
+ if (len < rec_len + 4)
+ return;
+ msg += 4;
+ end = msg + rec_len;
+ if (end < msg)
+ return;
+
+ /* Expect 2 bytes for protocol version
+ * (1 byte for major and 1 byte for minor)
+ */
+ if (msg + 2 > end)
+ return;
+ protocol_version = (msg[0] << 8) + msg[1];
+ msg += 2;
+
+ /* Expect the random, composed by 4 bytes for the unix time and
+ * 28 bytes for unix payload. So we jump 4 + 28.
+ */
+ msg += 4 + 28;
+ if (msg > end)
+ return;
+
+ /* Next, is session id:
+ * if present, we have to jump by length + 1 for the size information
+ * if not present, we have to jump by 1 only
+ */
+ if (msg[0] > 0)
+ msg += msg[0];
+ msg += 1;
+ if (msg > end)
+ return;
+
+ /* Next two bytes are the ciphersuite length. */
+ if (msg + 2 > end)
+ return;
+ rec_len = (msg[0] << 8) + msg[1];
+ msg += 2;
+ if (msg + rec_len > end || msg + rec_len < msg)
+ return;
+
+ capture = pool_zalloc(pool_head_ssl_capture);
+ if (!capture)
+ return;
+ /* Compute the xxh64 of the ciphersuite. */
+ capture->xxh64 = XXH64(msg, rec_len, 0);
+
+ /* Capture the ciphersuite. */
+ capture->ciphersuite_len = MIN(global_ssl.capture_buffer_size, rec_len);
+ capture->ciphersuite_offset = 0;
+ memcpy(capture->data, msg, capture->ciphersuite_len);
+ msg += rec_len;
+ offset += capture->ciphersuite_len;
+
+ /* Initialize other data */
+ capture->protocol_version = protocol_version;
+
+ /* Next, compression methods:
+ * if present, we have to jump by length + 1 for the size information
+ * if not present, we have to jump by 1 only
+ */
+ if (msg[0] > 0)
+ msg += msg[0];
+ msg += 1;
+ if (msg > end)
+ goto store_capture;
+
+ /* We reached extensions */
+ if (msg + 2 > end)
+ goto store_capture;
+ rec_len = (msg[0] << 8) + msg[1];
+ msg += 2;
+ if (msg + rec_len > end || msg + rec_len < msg)
+ goto store_capture;
+ extensions_end = msg + rec_len;
+ capture->extensions_offset = offset;
+
+ /* Parse each extension */
+ while (msg + 4 < extensions_end) {
+ /* Add 2 bytes of extension_id */
+ if (global_ssl.capture_buffer_size >= offset + 2) {
+ capture->data[offset++] = msg[0];
+ capture->data[offset++] = msg[1];
+ capture->extensions_len += 2;
+ }
+ else
+ break;
+ extension_id = (msg[0] << 8) + msg[1];
+ /* Length of the extension */
+ rec_len = (msg[2] << 8) + msg[3];
+
+ /* Expect 2 bytes extension id + 2 bytes extension size */
+ msg += 2 + 2;
+ if (msg + rec_len > extensions_end || msg + rec_len < msg)
+ goto store_capture;
+ /* TLS Extensions
+ * https://www.iana.org/assignments/tls-extensiontype-values/tls-extensiontype-values.xhtml */
+ if (extension_id == 0x000a) {
+ /* Elliptic Curves:
+ * https://www.rfc-editor.org/rfc/rfc8422.html
+ * https://www.rfc-editor.org/rfc/rfc7919.html */
+ list_end = msg + rec_len;
+ if (msg + 2 > list_end)
+ goto store_capture;
+ rec_len = (msg[0] << 8) + msg[1];
+ msg += 2;
+
+ if (msg + rec_len > list_end || msg + rec_len < msg)
+ goto store_capture;
+ /* Store location/size of the list */
+ ec_start = msg;
+ ec_len = rec_len;
+ }
+ else if (extension_id == 0x000b) {
+ /* Elliptic Curves Point Formats:
+ * https://www.rfc-editor.org/rfc/rfc8422.html */
+ list_end = msg + rec_len;
+ if (msg + 1 > list_end)
+ goto store_capture;
+ rec_len = msg[0];
+ msg += 1;
+
+ if (msg + rec_len > list_end || msg + rec_len < msg)
+ goto store_capture;
+ /* Store location/size of the list */
+ ec_formats_start = msg;
+ ec_formats_len = rec_len;
+ }
+ msg += rec_len;
+ }
+
+ if (ec_start) {
+ rec_len = ec_len;
+ if (offset + rec_len > global_ssl.capture_buffer_size)
+ rec_len = global_ssl.capture_buffer_size - offset;
+ memcpy(capture->data + offset, ec_start, rec_len);
+ capture->ec_offset = offset;
+ capture->ec_len = rec_len;
+ offset += rec_len;
+ }
+ if (ec_formats_start) {
+ rec_len = ec_formats_len;
+ if (offset + rec_len > global_ssl.capture_buffer_size)
+ rec_len = global_ssl.capture_buffer_size - offset;
+ memcpy(capture->data + offset, ec_formats_start, rec_len);
+ capture->ec_formats_offset = offset;
+ capture->ec_formats_len = rec_len;
+ offset += rec_len;
+ }
+
+ store_capture:
+ SSL_set_ex_data(ssl, ssl_capture_ptr_index, capture);
+}
+
+
+#ifdef HAVE_SSL_KEYLOG
+static void ssl_init_keylog(struct connection *conn, int write_p, int version,
+ int content_type, const void *buf, size_t len,
+ SSL *ssl)
+{
+ struct ssl_keylog *keylog;
+
+ if (SSL_get_ex_data(ssl, ssl_keylog_index))
+ return;
+
+ keylog = pool_zalloc(pool_head_ssl_keylog);
+ if (!keylog)
+ return;
+
+ if (!SSL_set_ex_data(ssl, ssl_keylog_index, keylog)) {
+ pool_free(pool_head_ssl_keylog, keylog);
+ return;
+ }
+}
+#endif
+
+/* Callback is called for ssl protocol analyse */
+void ssl_sock_msgcbk(int write_p, int version, int content_type, const void *buf, size_t len, SSL *ssl, void *arg)
+{
+ struct connection *conn = SSL_get_ex_data(ssl, ssl_app_data_index);
+ struct ssl_sock_msg_callback *cbk;
+
+ /* Try to call all callback functions that were registered by using
+ * ssl_sock_register_msg_callback().
+ */
+ list_for_each_entry(cbk, &ssl_sock_msg_callbacks, list) {
+ cbk->func(conn, write_p, version, content_type, buf, len, ssl);
+ }
+}
+
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+static int ssl_sock_srv_select_protos(SSL *s, unsigned char **out, unsigned char *outlen,
+ const unsigned char *in, unsigned int inlen,
+ void *arg)
+{
+ struct server *srv = arg;
+
+ if (SSL_select_next_proto(out, outlen, in, inlen, (unsigned char *)srv->ssl_ctx.npn_str,
+ srv->ssl_ctx.npn_len) == OPENSSL_NPN_NEGOTIATED)
+ return SSL_TLSEXT_ERR_OK;
+ return SSL_TLSEXT_ERR_NOACK;
+}
+#endif
+
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+/* This callback is used so that the server advertises the list of
+ * negotiable protocols for NPN.
+ */
+static int ssl_sock_advertise_npn_protos(SSL *s, const unsigned char **data,
+ unsigned int *len, void *arg)
+{
+ struct ssl_bind_conf *conf = arg;
+
+ *data = (const unsigned char *)conf->npn_str;
+ *len = conf->npn_len;
+ return SSL_TLSEXT_ERR_OK;
+}
+#endif
+
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+/* This callback is used so that the server advertises the list of
+ * negotiable protocols for ALPN.
+ */
+static int ssl_sock_advertise_alpn_protos(SSL *s, const unsigned char **out,
+ unsigned char *outlen,
+ const unsigned char *server,
+ unsigned int server_len, void *arg)
+{
+ struct ssl_bind_conf *conf = arg;
+#ifdef USE_QUIC
+ struct quic_conn *qc = SSL_get_ex_data(s, ssl_qc_app_data_index);
+#endif
+
+ if (SSL_select_next_proto((unsigned char**) out, outlen, (const unsigned char *)conf->alpn_str,
+ conf->alpn_len, server, server_len) != OPENSSL_NPN_NEGOTIATED) {
+#ifdef USE_QUIC
+ if (qc)
+ quic_set_tls_alert(qc, SSL_AD_NO_APPLICATION_PROTOCOL);
+#endif
+ return SSL_TLSEXT_ERR_NOACK;
+ }
+
+#ifdef USE_QUIC
+ if (qc && !quic_set_app_ops(qc, *out, *outlen)) {
+ quic_set_tls_alert(qc, SSL_AD_NO_APPLICATION_PROTOCOL);
+ return SSL_TLSEXT_ERR_NOACK;
+ }
+#endif
+
+ return SSL_TLSEXT_ERR_OK;
+}
+#endif
+
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+#ifndef SSL_NO_GENERATE_CERTIFICATES
+
+/* Configure a DNS SAN extension on a certificate. */
+int ssl_sock_add_san_ext(X509V3_CTX* ctx, X509* cert, const char *servername) {
+ int failure = 0;
+ X509_EXTENSION *san_ext = NULL;
+ CONF *conf = NULL;
+ struct buffer *san_name = get_trash_chunk();
+
+ conf = NCONF_new(NULL);
+ if (!conf) {
+ failure = 1;
+ goto cleanup;
+ }
+
+ /* Build an extension based on the DNS entry above */
+ chunk_appendf(san_name, "DNS:%s", servername);
+ san_ext = X509V3_EXT_nconf_nid(conf, ctx, NID_subject_alt_name, san_name->area);
+ if (!san_ext) {
+ failure = 1;
+ goto cleanup;
+ }
+
+ /* Add the extension */
+ if (!X509_add_ext(cert, san_ext, -1 /* Add to end */)) {
+ failure = 1;
+ goto cleanup;
+ }
+
+ /* Success */
+ failure = 0;
+
+cleanup:
+ if (NULL != san_ext) X509_EXTENSION_free(san_ext);
+ if (NULL != conf) NCONF_free(conf);
+
+ return failure;
+}
+
+/* Create a X509 certificate with the specified servername and serial. This
+ * function returns a SSL_CTX object or NULL if an error occurs. */
+static SSL_CTX *
+ssl_sock_do_create_cert(const char *servername, struct bind_conf *bind_conf, SSL *ssl)
+{
+ X509 *cacert = bind_conf->ca_sign_ckch->cert;
+ EVP_PKEY *capkey = bind_conf->ca_sign_ckch->key;
+ SSL_CTX *ssl_ctx = NULL;
+ X509 *newcrt = NULL;
+ EVP_PKEY *pkey = NULL;
+ SSL *tmp_ssl = NULL;
+ CONF *ctmp = NULL;
+ X509_NAME *name;
+ const EVP_MD *digest;
+ X509V3_CTX ctx;
+ unsigned int i;
+ int key_type;
+
+ /* Get the private key of the default certificate and use it */
+#ifdef HAVE_SSL_CTX_get0_privatekey
+ pkey = SSL_CTX_get0_privatekey(bind_conf->default_ctx);
+#else
+ tmp_ssl = SSL_new(bind_conf->default_ctx);
+ if (tmp_ssl)
+ pkey = SSL_get_privatekey(tmp_ssl);
+#endif
+ if (!pkey)
+ goto mkcert_error;
+
+ /* Create the certificate */
+ if (!(newcrt = X509_new()))
+ goto mkcert_error;
+
+ /* Set version number for the certificate (X509v3) and the serial
+ * number */
+ if (X509_set_version(newcrt, 2L) != 1)
+ goto mkcert_error;
+ ASN1_INTEGER_set(X509_get_serialNumber(newcrt), _HA_ATOMIC_ADD_FETCH(&ssl_ctx_serial, 1));
+
+ /* Set duration for the certificate */
+ if (!X509_gmtime_adj(X509_getm_notBefore(newcrt), (long)-60*60*24) ||
+ !X509_gmtime_adj(X509_getm_notAfter(newcrt),(long)60*60*24*365))
+ goto mkcert_error;
+
+ /* set public key in the certificate */
+ if (X509_set_pubkey(newcrt, pkey) != 1)
+ goto mkcert_error;
+
+ /* Set issuer name from the CA */
+ if (!(name = X509_get_subject_name(cacert)))
+ goto mkcert_error;
+ if (X509_set_issuer_name(newcrt, name) != 1)
+ goto mkcert_error;
+
+ /* Set the subject name using the same, but the CN */
+ name = X509_NAME_dup(name);
+ if (X509_NAME_add_entry_by_txt(name, "CN", MBSTRING_ASC,
+ (const unsigned char *)servername,
+ -1, -1, 0) != 1) {
+ X509_NAME_free(name);
+ goto mkcert_error;
+ }
+ if (X509_set_subject_name(newcrt, name) != 1) {
+ X509_NAME_free(name);
+ goto mkcert_error;
+ }
+ X509_NAME_free(name);
+
+ /* Add x509v3 extensions as specified */
+ ctmp = NCONF_new(NULL);
+ X509V3_set_ctx(&ctx, cacert, newcrt, NULL, NULL, 0);
+ for (i = 0; i < X509V3_EXT_SIZE; i++) {
+ X509_EXTENSION *ext;
+
+ if (!(ext = X509V3_EXT_nconf(ctmp, &ctx, x509v3_ext_names[i], x509v3_ext_values[i])))
+ goto mkcert_error;
+ if (!X509_add_ext(newcrt, ext, -1)) {
+ X509_EXTENSION_free(ext);
+ goto mkcert_error;
+ }
+ X509_EXTENSION_free(ext);
+ }
+
+ /* Add SAN extension */
+ if (ssl_sock_add_san_ext(&ctx, newcrt, servername)) {
+ goto mkcert_error;
+ }
+
+ /* Sign the certificate with the CA private key */
+
+ key_type = EVP_PKEY_base_id(capkey);
+
+ if (key_type == EVP_PKEY_DSA)
+ digest = EVP_sha1();
+ else if (key_type == EVP_PKEY_RSA)
+ digest = EVP_sha256();
+ else if (key_type == EVP_PKEY_EC)
+ digest = EVP_sha256();
+ else {
+#ifdef ASN1_PKEY_CTRL_DEFAULT_MD_NID
+ int nid;
+
+ if (EVP_PKEY_get_default_digest_nid(capkey, &nid) <= 0)
+ goto mkcert_error;
+ if (!(digest = EVP_get_digestbynid(nid)))
+ goto mkcert_error;
+#else
+ goto mkcert_error;
+#endif
+ }
+
+ if (!(X509_sign(newcrt, capkey, digest)))
+ goto mkcert_error;
+
+ /* Create and set the new SSL_CTX */
+ if (!(ssl_ctx = SSL_CTX_new(SSLv23_server_method())))
+ goto mkcert_error;
+ if (!SSL_CTX_use_PrivateKey(ssl_ctx, pkey))
+ goto mkcert_error;
+ if (!SSL_CTX_use_certificate(ssl_ctx, newcrt))
+ goto mkcert_error;
+ if (!SSL_CTX_check_private_key(ssl_ctx))
+ goto mkcert_error;
+
+ /* Build chaining the CA cert and the rest of the chain, keep these order */
+#if defined(SSL_CTX_add1_chain_cert)
+ if (!SSL_CTX_add1_chain_cert(ssl_ctx, bind_conf->ca_sign_ckch->cert)) {
+ goto mkcert_error;
+ }
+
+ if (bind_conf->ca_sign_ckch->chain) {
+ for (i = 0; i < sk_X509_num(bind_conf->ca_sign_ckch->chain); i++) {
+ X509 *chain_cert = sk_X509_value(bind_conf->ca_sign_ckch->chain, i);
+ if (!SSL_CTX_add1_chain_cert(ssl_ctx, chain_cert)) {
+ goto mkcert_error;
+ }
+ }
+ }
+#endif
+
+ if (newcrt) X509_free(newcrt);
+
+#ifndef OPENSSL_NO_DH
+#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL)
+ SSL_CTX_set_tmp_dh_callback(ssl_ctx, ssl_get_tmp_dh_cbk);
+#else
+ ssl_sock_set_tmp_dh_from_pkey(ssl_ctx, pkey);
+#endif
+#endif
+
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L)
+#if defined(SSL_CTX_set1_curves_list)
+ {
+ const char *ecdhe = (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe : ECDHE_DEFAULT_CURVE);
+ if (!SSL_CTX_set1_curves_list(ssl_ctx, ecdhe))
+ goto end;
+ }
+#endif
+#else
+#if defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH)
+ {
+ const char *ecdhe = (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe : ECDHE_DEFAULT_CURVE);
+ EC_KEY *ecc;
+ int nid;
+
+ if ((nid = OBJ_sn2nid(ecdhe)) == NID_undef)
+ goto end;
+ if (!(ecc = EC_KEY_new_by_curve_name(nid)))
+ goto end;
+ SSL_CTX_set_tmp_ecdh(ssl_ctx, ecc);
+ EC_KEY_free(ecc);
+ }
+#endif /* defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH) */
+#endif /* HA_OPENSSL_VERSION_NUMBER >= 0x10101000L */
+ end:
+ return ssl_ctx;
+
+ mkcert_error:
+ if (ctmp) NCONF_free(ctmp);
+ if (tmp_ssl) SSL_free(tmp_ssl);
+ if (ssl_ctx) SSL_CTX_free(ssl_ctx);
+ if (newcrt) X509_free(newcrt);
+ return NULL;
+}
+
+
+/* Do a lookup for a certificate in the LRU cache used to store generated
+ * certificates and immediately assign it to the SSL session if not null. */
+SSL_CTX *
+ssl_sock_assign_generated_cert(unsigned int key, struct bind_conf *bind_conf, SSL *ssl)
+{
+ struct lru64 *lru = NULL;
+
+ if (ssl_ctx_lru_tree) {
+ HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ lru = lru64_lookup(key, ssl_ctx_lru_tree, bind_conf->ca_sign_ckch->cert, 0);
+ if (lru && lru->domain) {
+ if (ssl)
+ SSL_set_SSL_CTX(ssl, (SSL_CTX *)lru->data);
+ HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ return (SSL_CTX *)lru->data;
+ }
+ HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ }
+ return NULL;
+}
+
+/* Same as <ssl_sock_assign_generated_cert> but without SSL session. This
+ * function is not thread-safe, it should only be used to check if a certificate
+ * exists in the lru cache (with no warranty it will not be removed by another
+ * thread). It is kept for backward compatibility. */
+SSL_CTX *
+ssl_sock_get_generated_cert(unsigned int key, struct bind_conf *bind_conf)
+{
+ return ssl_sock_assign_generated_cert(key, bind_conf, NULL);
+}
+
+/* Set a certificate int the LRU cache used to store generated
+ * certificate. Return 0 on success, otherwise -1 */
+int
+ssl_sock_set_generated_cert(SSL_CTX *ssl_ctx, unsigned int key, struct bind_conf *bind_conf)
+{
+ struct lru64 *lru = NULL;
+
+ if (ssl_ctx_lru_tree) {
+ HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ lru = lru64_get(key, ssl_ctx_lru_tree, bind_conf->ca_sign_ckch->cert, 0);
+ if (!lru) {
+ HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ return -1;
+ }
+ if (lru->domain && lru->data)
+ lru->free((SSL_CTX *)lru->data);
+ lru64_commit(lru, ssl_ctx, bind_conf->ca_sign_ckch->cert, 0, (void (*)(void *))SSL_CTX_free);
+ HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ return 0;
+ }
+ return -1;
+}
+
+/* Compute the key of the certificate. */
+unsigned int
+ssl_sock_generated_cert_key(const void *data, size_t len)
+{
+ return XXH32(data, len, ssl_ctx_lru_seed);
+}
+
+/* Generate a cert and immediately assign it to the SSL session so that the cert's
+ * refcount is maintained regardless of the cert's presence in the LRU cache.
+ */
+static int
+ssl_sock_generate_certificate(const char *servername, struct bind_conf *bind_conf, SSL *ssl)
+{
+ X509 *cacert = bind_conf->ca_sign_ckch->cert;
+ SSL_CTX *ssl_ctx = NULL;
+ struct lru64 *lru = NULL;
+ unsigned int key;
+
+ key = ssl_sock_generated_cert_key(servername, strlen(servername));
+ if (ssl_ctx_lru_tree) {
+ HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ lru = lru64_get(key, ssl_ctx_lru_tree, cacert, 0);
+ if (lru && lru->domain)
+ ssl_ctx = (SSL_CTX *)lru->data;
+ if (!ssl_ctx && lru) {
+ ssl_ctx = ssl_sock_do_create_cert(servername, bind_conf, ssl);
+ lru64_commit(lru, ssl_ctx, cacert, 0, (void (*)(void *))SSL_CTX_free);
+ }
+ SSL_set_SSL_CTX(ssl, ssl_ctx);
+ HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ return 1;
+ }
+ else {
+ ssl_ctx = ssl_sock_do_create_cert(servername, bind_conf, ssl);
+ SSL_set_SSL_CTX(ssl, ssl_ctx);
+ /* No LRU cache, this CTX will be released as soon as the session dies */
+ SSL_CTX_free(ssl_ctx);
+ return 1;
+ }
+ return 0;
+}
+static int
+ssl_sock_generate_certificate_from_conn(struct bind_conf *bind_conf, SSL *ssl)
+{
+ unsigned int key;
+ struct connection *conn = SSL_get_ex_data(ssl, ssl_app_data_index);
+
+ if (conn_get_dst(conn)) {
+ key = ssl_sock_generated_cert_key(conn->dst, get_addr_len(conn->dst));
+ if (ssl_sock_assign_generated_cert(key, bind_conf, ssl))
+ return 1;
+ }
+ return 0;
+}
+#endif /* !defined SSL_NO_GENERATE_CERTIFICATES */
+
+#if (HA_OPENSSL_VERSION_NUMBER < 0x1010000fL)
+
+static void ctx_set_SSLv3_func(SSL_CTX *ctx, set_context_func c)
+{
+#if SSL_OP_NO_SSLv3
+ c == SET_SERVER ? SSL_CTX_set_ssl_version(ctx, SSLv3_server_method())
+ : SSL_CTX_set_ssl_version(ctx, SSLv3_client_method());
+#endif
+}
+static void ctx_set_TLSv10_func(SSL_CTX *ctx, set_context_func c) {
+ c == SET_SERVER ? SSL_CTX_set_ssl_version(ctx, TLSv1_server_method())
+ : SSL_CTX_set_ssl_version(ctx, TLSv1_client_method());
+}
+static void ctx_set_TLSv11_func(SSL_CTX *ctx, set_context_func c) {
+#if SSL_OP_NO_TLSv1_1
+ c == SET_SERVER ? SSL_CTX_set_ssl_version(ctx, TLSv1_1_server_method())
+ : SSL_CTX_set_ssl_version(ctx, TLSv1_1_client_method());
+#endif
+}
+static void ctx_set_TLSv12_func(SSL_CTX *ctx, set_context_func c) {
+#if SSL_OP_NO_TLSv1_2
+ c == SET_SERVER ? SSL_CTX_set_ssl_version(ctx, TLSv1_2_server_method())
+ : SSL_CTX_set_ssl_version(ctx, TLSv1_2_client_method());
+#endif
+}
+/* TLSv1.2 is the last supported version in this context. */
+static void ctx_set_TLSv13_func(SSL_CTX *ctx, set_context_func c) {}
+/* Unusable in this context. */
+static void ssl_set_SSLv3_func(SSL *ssl, set_context_func c) {}
+static void ssl_set_TLSv10_func(SSL *ssl, set_context_func c) {}
+static void ssl_set_TLSv11_func(SSL *ssl, set_context_func c) {}
+static void ssl_set_TLSv12_func(SSL *ssl, set_context_func c) {}
+static void ssl_set_TLSv13_func(SSL *ssl, set_context_func c) {}
+#else /* openssl >= 1.1.0 */
+
+static void ctx_set_SSLv3_func(SSL_CTX *ctx, set_context_func c) {
+ c == SET_MAX ? SSL_CTX_set_max_proto_version(ctx, SSL3_VERSION)
+ : SSL_CTX_set_min_proto_version(ctx, SSL3_VERSION);
+}
+static void ssl_set_SSLv3_func(SSL *ssl, set_context_func c) {
+ c == SET_MAX ? SSL_set_max_proto_version(ssl, SSL3_VERSION)
+ : SSL_set_min_proto_version(ssl, SSL3_VERSION);
+}
+static void ctx_set_TLSv10_func(SSL_CTX *ctx, set_context_func c) {
+ c == SET_MAX ? SSL_CTX_set_max_proto_version(ctx, TLS1_VERSION)
+ : SSL_CTX_set_min_proto_version(ctx, TLS1_VERSION);
+}
+static void ssl_set_TLSv10_func(SSL *ssl, set_context_func c) {
+ c == SET_MAX ? SSL_set_max_proto_version(ssl, TLS1_VERSION)
+ : SSL_set_min_proto_version(ssl, TLS1_VERSION);
+}
+static void ctx_set_TLSv11_func(SSL_CTX *ctx, set_context_func c) {
+ c == SET_MAX ? SSL_CTX_set_max_proto_version(ctx, TLS1_1_VERSION)
+ : SSL_CTX_set_min_proto_version(ctx, TLS1_1_VERSION);
+}
+static void ssl_set_TLSv11_func(SSL *ssl, set_context_func c) {
+ c == SET_MAX ? SSL_set_max_proto_version(ssl, TLS1_1_VERSION)
+ : SSL_set_min_proto_version(ssl, TLS1_1_VERSION);
+}
+static void ctx_set_TLSv12_func(SSL_CTX *ctx, set_context_func c) {
+ c == SET_MAX ? SSL_CTX_set_max_proto_version(ctx, TLS1_2_VERSION)
+ : SSL_CTX_set_min_proto_version(ctx, TLS1_2_VERSION);
+}
+static void ssl_set_TLSv12_func(SSL *ssl, set_context_func c) {
+ c == SET_MAX ? SSL_set_max_proto_version(ssl, TLS1_2_VERSION)
+ : SSL_set_min_proto_version(ssl, TLS1_2_VERSION);
+}
+static void ctx_set_TLSv13_func(SSL_CTX *ctx, set_context_func c) {
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L)
+ c == SET_MAX ? SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION)
+ : SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION);
+#endif
+}
+static void ssl_set_TLSv13_func(SSL *ssl, set_context_func c) {
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L)
+ c == SET_MAX ? SSL_set_max_proto_version(ssl, TLS1_3_VERSION)
+ : SSL_set_min_proto_version(ssl, TLS1_3_VERSION);
+#endif
+}
+#endif
+static void ctx_set_None_func(SSL_CTX *ctx, set_context_func c) { }
+static void ssl_set_None_func(SSL *ssl, set_context_func c) { }
+
+struct methodVersions methodVersions[] = {
+ {0, 0, ctx_set_None_func, ssl_set_None_func, "NONE"}, /* CONF_TLSV_NONE */
+ {SSL_OP_NO_SSLv3, MC_SSL_O_NO_SSLV3, ctx_set_SSLv3_func, ssl_set_SSLv3_func, "SSLv3"}, /* CONF_SSLV3 */
+ {SSL_OP_NO_TLSv1, MC_SSL_O_NO_TLSV10, ctx_set_TLSv10_func, ssl_set_TLSv10_func, "TLSv1.0"}, /* CONF_TLSV10 */
+ {SSL_OP_NO_TLSv1_1, MC_SSL_O_NO_TLSV11, ctx_set_TLSv11_func, ssl_set_TLSv11_func, "TLSv1.1"}, /* CONF_TLSV11 */
+ {SSL_OP_NO_TLSv1_2, MC_SSL_O_NO_TLSV12, ctx_set_TLSv12_func, ssl_set_TLSv12_func, "TLSv1.2"}, /* CONF_TLSV12 */
+ {SSL_OP_NO_TLSv1_3, MC_SSL_O_NO_TLSV13, ctx_set_TLSv13_func, ssl_set_TLSv13_func, "TLSv1.3"}, /* CONF_TLSV13 */
+};
+
+static void ssl_sock_switchctx_set(SSL *ssl, SSL_CTX *ctx)
+{
+ SSL_set_verify(ssl, SSL_CTX_get_verify_mode(ctx), ssl_sock_bind_verifycbk);
+ SSL_set_client_CA_list(ssl, SSL_dup_CA_list(SSL_CTX_get_client_CA_list(ctx)));
+ SSL_set_SSL_CTX(ssl, ctx);
+}
+
+#ifdef HAVE_SSL_CLIENT_HELLO_CB
+
+int ssl_sock_switchctx_err_cbk(SSL *ssl, int *al, void *priv)
+{
+ struct bind_conf *s = priv;
+ (void)al; /* shut gcc stupid warning */
+
+ if (SSL_get_servername(ssl, TLSEXT_NAMETYPE_host_name) || (s->options & BC_O_GENERATE_CERTS))
+ return SSL_TLSEXT_ERR_OK;
+ return SSL_TLSEXT_ERR_NOACK;
+}
+
+#ifdef OPENSSL_IS_BORINGSSL
+int ssl_sock_switchctx_cbk(const struct ssl_early_callback_ctx *ctx)
+{
+ SSL *ssl = ctx->ssl;
+#else
+int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *arg)
+{
+#endif
+ struct connection *conn = SSL_get_ex_data(ssl, ssl_app_data_index);
+#ifdef USE_QUIC
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+#endif /* USE_QUIC */
+ struct bind_conf *s = NULL;
+ const uint8_t *extension_data;
+ size_t extension_len;
+ int has_rsa_sig = 0, has_ecdsa_sig = 0;
+
+ char *wildp = NULL;
+ const uint8_t *servername;
+ size_t servername_len;
+ struct ebmb_node *node, *n, *node_ecdsa = NULL, *node_rsa = NULL, *node_anonymous = NULL;
+ int allow_early = 0;
+ int i;
+
+ if (conn)
+ s = __objt_listener(conn->target)->bind_conf;
+#ifdef USE_QUIC
+ else if (qc)
+ s = qc->li->bind_conf;
+#endif /* USE_QUIC */
+
+ if (!s) {
+ /* must never happen */
+ ABORT_NOW();
+ return 0;
+ }
+
+#ifdef USE_QUIC
+ if (qc) {
+ /* Look for the QUIC transport parameters. */
+#ifdef OPENSSL_IS_BORINGSSL
+ if (!SSL_early_callback_ctx_extension_get(ctx, qc->tps_tls_ext,
+ &extension_data, &extension_len))
+#else
+ if (!SSL_client_hello_get0_ext(ssl, qc->tps_tls_ext,
+ &extension_data, &extension_len))
+#endif
+ {
+ /* This is not redundant. It we only return 0 without setting
+ * <*al>, this has as side effect to generate another TLS alert
+ * which would be set after calling quic_set_tls_alert().
+ */
+ *al = SSL_AD_MISSING_EXTENSION;
+ quic_set_tls_alert(qc, SSL_AD_MISSING_EXTENSION);
+ return 0;
+ }
+
+ if (!quic_transport_params_store(qc, 0, extension_data,
+ extension_data + extension_len))
+ goto abort;
+
+ qc->flags |= QUIC_FL_CONN_TX_TP_RECEIVED;
+ }
+#endif /* USE_QUIC */
+
+ if (s->ssl_conf.early_data)
+ allow_early = 1;
+#ifdef OPENSSL_IS_BORINGSSL
+ if (SSL_early_callback_ctx_extension_get(ctx, TLSEXT_TYPE_server_name,
+ &extension_data, &extension_len)) {
+#else
+ if (SSL_client_hello_get0_ext(ssl, TLSEXT_TYPE_server_name, &extension_data, &extension_len)) {
+#endif
+ /*
+ * The server_name extension was given too much extensibility when it
+ * was written, so parsing the normal case is a bit complex.
+ */
+ size_t len;
+ if (extension_len <= 2)
+ goto abort;
+ /* Extract the length of the supplied list of names. */
+ len = (*extension_data++) << 8;
+ len |= *extension_data++;
+ if (len + 2 != extension_len)
+ goto abort;
+ /*
+ * The list in practice only has a single element, so we only consider
+ * the first one.
+ */
+ if (len == 0 || *extension_data++ != TLSEXT_NAMETYPE_host_name)
+ goto abort;
+ extension_len = len - 1;
+ /* Now we can finally pull out the byte array with the actual hostname. */
+ if (extension_len <= 2)
+ goto abort;
+ len = (*extension_data++) << 8;
+ len |= *extension_data++;
+ if (len == 0 || len + 2 > extension_len || len > TLSEXT_MAXLEN_host_name
+ || memchr(extension_data, 0, len) != NULL)
+ goto abort;
+ servername = extension_data;
+ servername_len = len;
+ } else {
+#if (!defined SSL_NO_GENERATE_CERTIFICATES)
+ if (s->options & BC_O_GENERATE_CERTS && ssl_sock_generate_certificate_from_conn(s, ssl)) {
+ goto allow_early;
+ }
+#endif
+ /* without SNI extension, is the default_ctx (need SSL_TLSEXT_ERR_NOACK) */
+ if (!s->strict_sni) {
+ HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock);
+ ssl_sock_switchctx_set(ssl, s->default_ctx);
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ goto allow_early;
+ }
+ goto abort;
+ }
+
+ /* extract/check clientHello information */
+#ifdef OPENSSL_IS_BORINGSSL
+ if (SSL_early_callback_ctx_extension_get(ctx, TLSEXT_TYPE_signature_algorithms, &extension_data, &extension_len)) {
+#else
+ if (SSL_client_hello_get0_ext(ssl, TLSEXT_TYPE_signature_algorithms, &extension_data, &extension_len)) {
+#endif
+ uint8_t sign;
+ size_t len;
+ if (extension_len < 2)
+ goto abort;
+ len = (*extension_data++) << 8;
+ len |= *extension_data++;
+ if (len + 2 != extension_len)
+ goto abort;
+ if (len % 2 != 0)
+ goto abort;
+ for (; len > 0; len -= 2) {
+ extension_data++; /* hash */
+ sign = *extension_data++;
+ switch (sign) {
+ case TLSEXT_signature_rsa:
+ has_rsa_sig = 1;
+ break;
+ case TLSEXT_signature_ecdsa:
+ has_ecdsa_sig = 1;
+ break;
+ default:
+ continue;
+ }
+ if (has_ecdsa_sig && has_rsa_sig)
+ break;
+ }
+ } else {
+ /* without TLSEXT_TYPE_signature_algorithms extension (< TLSv1.2) */
+ has_rsa_sig = 1;
+ }
+ if (has_ecdsa_sig) { /* in very rare case: has ecdsa sign but not a ECDSA cipher */
+ const SSL_CIPHER *cipher;
+ size_t len;
+ const uint8_t *cipher_suites;
+ has_ecdsa_sig = 0;
+#ifdef OPENSSL_IS_BORINGSSL
+ len = ctx->cipher_suites_len;
+ cipher_suites = ctx->cipher_suites;
+#else
+ len = SSL_client_hello_get0_ciphers(ssl, &cipher_suites);
+#endif
+ if (len % 2 != 0)
+ goto abort;
+ for (; len != 0; len -= 2, cipher_suites += 2) {
+#ifdef OPENSSL_IS_BORINGSSL
+ uint16_t cipher_suite = (cipher_suites[0] << 8) | cipher_suites[1];
+ cipher = SSL_get_cipher_by_value(cipher_suite);
+#else
+ cipher = SSL_CIPHER_find(ssl, cipher_suites);
+#endif
+ if (cipher && SSL_CIPHER_get_auth_nid(cipher) == NID_auth_ecdsa) {
+ has_ecdsa_sig = 1;
+ break;
+ }
+ }
+ }
+
+ for (i = 0; i < trash.size && i < servername_len; i++) {
+ trash.area[i] = tolower(servername[i]);
+ if (!wildp && (trash.area[i] == '.'))
+ wildp = &trash.area[i];
+ }
+ trash.area[i] = 0;
+
+ HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock);
+
+ /* Look for an ECDSA, RSA and DSA certificate, first in the single
+ * name and if not found in the wildcard */
+ for (i = 0; i < 2; i++) {
+ if (i == 0) /* lookup in full qualified names */
+ node = ebst_lookup(&s->sni_ctx, trash.area);
+ else if (i == 1 && wildp) /* lookup in wildcards names */
+ node = ebst_lookup(&s->sni_w_ctx, wildp);
+ else
+ break;
+
+ for (n = node; n; n = ebmb_next_dup(n)) {
+
+ /* lookup a not neg filter */
+ if (!container_of(n, struct sni_ctx, name)->neg) {
+ struct sni_ctx *sni, *sni_tmp;
+ int skip = 0;
+
+ if (i == 1 && wildp) { /* wildcard */
+ /* If this is a wildcard, look for an exclusion on the same crt-list line */
+ sni = container_of(n, struct sni_ctx, name);
+ list_for_each_entry(sni_tmp, &sni->ckch_inst->sni_ctx, by_ckch_inst) {
+ if (sni_tmp->neg && (strcmp((const char *)sni_tmp->name.key, trash.area) == 0)) {
+ skip = 1;
+ break;
+ }
+ }
+ if (skip)
+ continue;
+ }
+
+ switch(container_of(n, struct sni_ctx, name)->kinfo.sig) {
+ case TLSEXT_signature_ecdsa:
+ if (!node_ecdsa)
+ node_ecdsa = n;
+ break;
+ case TLSEXT_signature_rsa:
+ if (!node_rsa)
+ node_rsa = n;
+ break;
+ default: /* TLSEXT_signature_anonymous|dsa */
+ if (!node_anonymous)
+ node_anonymous = n;
+ break;
+ }
+ }
+ }
+ }
+ /* Once the certificates are found, select them depending on what is
+ * supported in the client and by key_signature priority order: EDSA >
+ * RSA > DSA */
+ if (has_ecdsa_sig && node_ecdsa)
+ node = node_ecdsa;
+ else if (has_rsa_sig && node_rsa)
+ node = node_rsa;
+ else if (node_anonymous)
+ node = node_anonymous;
+ else if (node_ecdsa)
+ node = node_ecdsa; /* no ecdsa signature case (< TLSv1.2) */
+ else
+ node = node_rsa; /* no rsa signature case (far far away) */
+
+ if (node) {
+ /* switch ctx */
+ struct ssl_bind_conf *conf = container_of(node, struct sni_ctx, name)->conf;
+ ssl_sock_switchctx_set(ssl, container_of(node, struct sni_ctx, name)->ctx);
+ if (conf) {
+ methodVersions[conf->ssl_methods.min].ssl_set_version(ssl, SET_MIN);
+ methodVersions[conf->ssl_methods.max].ssl_set_version(ssl, SET_MAX);
+ if (conf->early_data)
+ allow_early = 1;
+ }
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ goto allow_early;
+ }
+
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+#if (!defined SSL_NO_GENERATE_CERTIFICATES)
+ if (s->options & BC_O_GENERATE_CERTS && ssl_sock_generate_certificate(trash.area, s, ssl)) {
+ /* switch ctx done in ssl_sock_generate_certificate */
+ goto allow_early;
+ }
+#endif
+ if (!s->strict_sni) {
+ /* no certificate match, is the default_ctx */
+ HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock);
+ ssl_sock_switchctx_set(ssl, s->default_ctx);
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ goto allow_early;
+ }
+
+ /* We are about to raise an handshake error so the servername extension
+ * callback will never be called and the SNI will never be stored in the
+ * SSL context. In order for the ssl_fc_sni sample fetch to still work
+ * in such a case, we store the SNI ourselves as an ex_data information
+ * in the SSL context.
+ */
+ {
+ char *client_sni = pool_alloc(ssl_sock_client_sni_pool);
+ if (client_sni) {
+ strncpy(client_sni, trash.area, TLSEXT_MAXLEN_host_name);
+ client_sni[TLSEXT_MAXLEN_host_name] = '\0';
+ SSL_set_ex_data(ssl, ssl_client_sni_index, client_sni);
+ }
+ }
+
+ /* other cases fallback on abort, if strict-sni is set but no node was found */
+
+ abort:
+ /* abort handshake (was SSL_TLSEXT_ERR_ALERT_FATAL) */
+ if (conn)
+ conn->err_code = CO_ER_SSL_HANDSHAKE;
+#ifdef OPENSSL_IS_BORINGSSL
+ return ssl_select_cert_error;
+#else
+ *al = SSL_AD_UNRECOGNIZED_NAME;
+ return 0;
+#endif
+
+allow_early:
+#ifdef OPENSSL_IS_BORINGSSL
+ if (allow_early)
+ SSL_set_early_data_enabled(ssl, 1);
+#else
+ if (!allow_early)
+ SSL_set_max_early_data(ssl, 0);
+#endif
+ return 1;
+}
+
+#else /* ! HAVE_SSL_CLIENT_HELLO_CB */
+
+/* Sets the SSL ctx of <ssl> to match the advertised server name. Returns a
+ * warning when no match is found, which implies the default (first) cert
+ * will keep being used.
+ */
+int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *priv)
+{
+ const char *servername;
+ const char *wildp = NULL;
+ struct ebmb_node *node, *n;
+ struct bind_conf *s = priv;
+#ifdef USE_QUIC
+ const uint8_t *extension_data;
+ size_t extension_len;
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+#endif /* USE_QUIC */
+ int i;
+ (void)al; /* shut gcc stupid warning */
+
+#ifdef USE_QUIC
+ if (qc) {
+
+ /* Look for the QUIC transport parameters. */
+ SSL_get_peer_quic_transport_params(ssl, &extension_data, &extension_len);
+ if (extension_len == 0) {
+ /* This is not redundant. It we only return 0 without setting
+ * <*al>, this has as side effect to generate another TLS alert
+ * which would be set after calling quic_set_tls_alert().
+ */
+ *al = SSL_AD_MISSING_EXTENSION;
+ quic_set_tls_alert(qc, SSL_AD_MISSING_EXTENSION);
+ return SSL_TLSEXT_ERR_NOACK;
+ }
+
+ if (!quic_transport_params_store(qc, 0, extension_data,
+ extension_data + extension_len))
+ return SSL_TLSEXT_ERR_NOACK;
+
+ qc->flags |= QUIC_FL_CONN_TX_TP_RECEIVED;
+ }
+#endif /* USE_QUIC */
+
+ servername = SSL_get_servername(ssl, TLSEXT_NAMETYPE_host_name);
+ if (!servername) {
+#if (!defined SSL_NO_GENERATE_CERTIFICATES)
+ if (s->options & BC_O_GENERATE_CERTS && ssl_sock_generate_certificate_from_conn(s, ssl))
+ return SSL_TLSEXT_ERR_OK;
+#endif
+ if (s->strict_sni)
+ return SSL_TLSEXT_ERR_ALERT_FATAL;
+ HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock);
+ ssl_sock_switchctx_set(ssl, s->default_ctx);
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ return SSL_TLSEXT_ERR_NOACK;
+ }
+
+ for (i = 0; i < trash.size; i++) {
+ if (!servername[i])
+ break;
+ trash.area[i] = tolower((unsigned char)servername[i]);
+ if (!wildp && (trash.area[i] == '.'))
+ wildp = &trash.area[i];
+ }
+ trash.area[i] = 0;
+
+ HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock);
+ node = NULL;
+ /* lookup in full qualified names */
+ for (n = ebst_lookup(&s->sni_ctx, trash.area); n; n = ebmb_next_dup(n)) {
+ /* lookup a not neg filter */
+ if (!container_of(n, struct sni_ctx, name)->neg) {
+ node = n;
+ break;
+ }
+ }
+ if (!node && wildp) {
+ /* lookup in wildcards names */
+ for (n = ebst_lookup(&s->sni_w_ctx, wildp); n; n = ebmb_next_dup(n)) {
+ /* lookup a not neg filter */
+ if (!container_of(n, struct sni_ctx, name)->neg) {
+ node = n;
+ break;
+ }
+ }
+ }
+ if (!node) {
+#if (!defined SSL_NO_GENERATE_CERTIFICATES)
+ if (s->options & BC_O_GENERATE_CERTS && ssl_sock_generate_certificate(servername, s, ssl)) {
+ /* switch ctx done in ssl_sock_generate_certificate */
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ return SSL_TLSEXT_ERR_OK;
+ }
+#endif
+ if (s->strict_sni) {
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ return SSL_TLSEXT_ERR_ALERT_FATAL;
+ }
+ ssl_sock_switchctx_set(ssl, s->default_ctx);
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ return SSL_TLSEXT_ERR_OK;
+ }
+
+ /* switch ctx */
+ ssl_sock_switchctx_set(ssl, container_of(node, struct sni_ctx, name)->ctx);
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ return SSL_TLSEXT_ERR_OK;
+}
+#endif /* (!) OPENSSL_IS_BORINGSSL */
+#endif /* SSL_CTRL_SET_TLSEXT_HOSTNAME */
+
+#ifndef OPENSSL_NO_DH
+
+static inline HASSL_DH *ssl_new_dh_fromdata(BIGNUM *p, BIGNUM *g)
+{
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL)
+ OSSL_PARAM_BLD *tmpl = NULL;
+ OSSL_PARAM *params = NULL;
+ EVP_PKEY_CTX *ctx = NULL;
+ EVP_PKEY *pkey = NULL;
+
+ if ((tmpl = OSSL_PARAM_BLD_new()) == NULL
+ || !OSSL_PARAM_BLD_push_BN(tmpl, OSSL_PKEY_PARAM_FFC_P, p)
+ || !OSSL_PARAM_BLD_push_BN(tmpl, OSSL_PKEY_PARAM_FFC_G, g)
+ || (params = OSSL_PARAM_BLD_to_param(tmpl)) == NULL) {
+ goto end;
+ }
+ ctx = EVP_PKEY_CTX_new_from_name(NULL, "DH", NULL);
+ if (ctx == NULL
+ || !EVP_PKEY_fromdata_init(ctx)
+ || !EVP_PKEY_fromdata(ctx, &pkey, EVP_PKEY_KEY_PARAMETERS, params)) {
+ goto end;
+ }
+
+end:
+ EVP_PKEY_CTX_free(ctx);
+ OSSL_PARAM_free(params);
+ OSSL_PARAM_BLD_free(tmpl);
+ BN_free(p);
+ BN_free(g);
+ return pkey;
+#else
+
+ HASSL_DH *dh = DH_new();
+
+ if (!dh)
+ return NULL;
+
+ DH_set0_pqg(dh, p, NULL, g);
+
+ return dh;
+#endif
+}
+
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L)
+static inline HASSL_DH *ssl_get_dh_by_nid(int nid)
+{
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL)
+ OSSL_PARAM params[2];
+ EVP_PKEY *pkey = NULL;
+ EVP_PKEY_CTX *pctx = EVP_PKEY_CTX_new_from_name(NULL, "DH", NULL);
+ const char *named_group = NULL;
+
+ if (!pctx)
+ goto end;
+
+ named_group = OBJ_nid2ln(nid);
+
+ if (!named_group)
+ goto end;
+
+ params[0] = OSSL_PARAM_construct_utf8_string("group", (char*)named_group, 0);
+ params[1] = OSSL_PARAM_construct_end();
+
+ if (EVP_PKEY_keygen_init(pctx) && EVP_PKEY_CTX_set_params(pctx, params))
+ EVP_PKEY_generate(pctx, &pkey);
+
+end:
+ EVP_PKEY_CTX_free(pctx);
+ return pkey;
+#else
+
+ HASSL_DH *dh = NULL;
+ dh = DH_new_by_nid(nid);
+ return dh;
+#endif
+}
+#endif
+
+
+static HASSL_DH * ssl_get_dh_1024(void)
+{
+ static unsigned char dh1024_p[]={
+ 0xFA,0xF9,0x2A,0x22,0x2A,0xA7,0x7F,0xE1,0x67,0x4E,0x53,0xF7,
+ 0x56,0x13,0xC3,0xB1,0xE3,0x29,0x6B,0x66,0x31,0x6A,0x7F,0xB3,
+ 0xC2,0x68,0x6B,0xCB,0x1D,0x57,0x39,0x1D,0x1F,0xFF,0x1C,0xC9,
+ 0xA6,0xA4,0x98,0x82,0x31,0x5D,0x25,0xFF,0x8A,0xE0,0x73,0x96,
+ 0x81,0xC8,0x83,0x79,0xC1,0x5A,0x04,0xF8,0x37,0x0D,0xA8,0x3D,
+ 0xAE,0x74,0xBC,0xDB,0xB6,0xA4,0x75,0xD9,0x71,0x8A,0xA0,0x17,
+ 0x9E,0x2D,0xC8,0xA8,0xDF,0x2C,0x5F,0x82,0x95,0xF8,0x92,0x9B,
+ 0xA7,0x33,0x5F,0x89,0x71,0xC8,0x2D,0x6B,0x18,0x86,0xC4,0x94,
+ 0x22,0xA5,0x52,0x8D,0xF6,0xF6,0xD2,0x37,0x92,0x0F,0xA5,0xCC,
+ 0xDB,0x7B,0x1D,0x3D,0xA1,0x31,0xB7,0x80,0x8F,0x0B,0x67,0x5E,
+ 0x36,0xA5,0x60,0x0C,0xF1,0x95,0x33,0x8B,
+ };
+ static unsigned char dh1024_g[]={
+ 0x02,
+ };
+
+ BIGNUM *p;
+ BIGNUM *g;
+
+ HASSL_DH *dh = NULL;
+
+ p = BN_bin2bn(dh1024_p, sizeof dh1024_p, NULL);
+ g = BN_bin2bn(dh1024_g, sizeof dh1024_g, NULL);
+
+ if (p && g)
+ dh = ssl_new_dh_fromdata(p, g);
+
+ return dh;
+}
+
+static HASSL_DH *ssl_get_dh_2048(void)
+{
+#if (HA_OPENSSL_VERSION_NUMBER < 0x10101000L)
+ static unsigned char dh2048_p[]={
+ 0xEC,0x86,0xF8,0x70,0xA0,0x33,0x16,0xEC,0x05,0x1A,0x73,0x59,
+ 0xCD,0x1F,0x8B,0xF8,0x29,0xE4,0xD2,0xCF,0x52,0xDD,0xC2,0x24,
+ 0x8D,0xB5,0x38,0x9A,0xFB,0x5C,0xA4,0xE4,0xB2,0xDA,0xCE,0x66,
+ 0x50,0x74,0xA6,0x85,0x4D,0x4B,0x1D,0x30,0xB8,0x2B,0xF3,0x10,
+ 0xE9,0xA7,0x2D,0x05,0x71,0xE7,0x81,0xDF,0x8B,0x59,0x52,0x3B,
+ 0x5F,0x43,0x0B,0x68,0xF1,0xDB,0x07,0xBE,0x08,0x6B,0x1B,0x23,
+ 0xEE,0x4D,0xCC,0x9E,0x0E,0x43,0xA0,0x1E,0xDF,0x43,0x8C,0xEC,
+ 0xBE,0xBE,0x90,0xB4,0x51,0x54,0xB9,0x2F,0x7B,0x64,0x76,0x4E,
+ 0x5D,0xD4,0x2E,0xAE,0xC2,0x9E,0xAE,0x51,0x43,0x59,0xC7,0x77,
+ 0x9C,0x50,0x3C,0x0E,0xED,0x73,0x04,0x5F,0xF1,0x4C,0x76,0x2A,
+ 0xD8,0xF8,0xCF,0xFC,0x34,0x40,0xD1,0xB4,0x42,0x61,0x84,0x66,
+ 0x42,0x39,0x04,0xF8,0x68,0xB2,0x62,0xD7,0x55,0xED,0x1B,0x74,
+ 0x75,0x91,0xE0,0xC5,0x69,0xC1,0x31,0x5C,0xDB,0x7B,0x44,0x2E,
+ 0xCE,0x84,0x58,0x0D,0x1E,0x66,0x0C,0xC8,0x44,0x9E,0xFD,0x40,
+ 0x08,0x67,0x5D,0xFB,0xA7,0x76,0x8F,0x00,0x11,0x87,0xE9,0x93,
+ 0xF9,0x7D,0xC4,0xBC,0x74,0x55,0x20,0xD4,0x4A,0x41,0x2F,0x43,
+ 0x42,0x1A,0xC1,0xF2,0x97,0x17,0x49,0x27,0x37,0x6B,0x2F,0x88,
+ 0x7E,0x1C,0xA0,0xA1,0x89,0x92,0x27,0xD9,0x56,0x5A,0x71,0xC1,
+ 0x56,0x37,0x7E,0x3A,0x9D,0x05,0xE7,0xEE,0x5D,0x8F,0x82,0x17,
+ 0xBC,0xE9,0xC2,0x93,0x30,0x82,0xF9,0xF4,0xC9,0xAE,0x49,0xDB,
+ 0xD0,0x54,0xB4,0xD9,0x75,0x4D,0xFA,0x06,0xB8,0xD6,0x38,0x41,
+ 0xB7,0x1F,0x77,0xF3,
+ };
+ static unsigned char dh2048_g[]={
+ 0x02,
+ };
+
+ BIGNUM *p;
+ BIGNUM *g;
+
+ HASSL_DH *dh = NULL;
+
+ p = BN_bin2bn(dh2048_p, sizeof dh2048_p, NULL);
+ g = BN_bin2bn(dh2048_g, sizeof dh2048_g, NULL);
+
+ if (p && g)
+ dh = ssl_new_dh_fromdata(p, g);
+
+ return dh;
+#else
+ return ssl_get_dh_by_nid(NID_ffdhe2048);
+#endif
+}
+
+static HASSL_DH *ssl_get_dh_4096(void)
+{
+#if (HA_OPENSSL_VERSION_NUMBER < 0x10101000L)
+ static unsigned char dh4096_p[]={
+ 0xDE,0x16,0x94,0xCD,0x99,0x58,0x07,0xF1,0xF7,0x32,0x96,0x11,
+ 0x04,0x82,0xD4,0x84,0x72,0x80,0x99,0x06,0xCA,0xF0,0xA3,0x68,
+ 0x07,0xCE,0x64,0x50,0xE7,0x74,0x45,0x20,0x80,0x5E,0x4D,0xAD,
+ 0xA5,0xB6,0xED,0xFA,0x80,0x6C,0x3B,0x35,0xC4,0x9A,0x14,0x6B,
+ 0x32,0xBB,0xFD,0x1F,0x17,0x8E,0xB7,0x1F,0xD6,0xFA,0x3F,0x7B,
+ 0xEE,0x16,0xA5,0x62,0x33,0x0D,0xED,0xBC,0x4E,0x58,0xE5,0x47,
+ 0x4D,0xE9,0xAB,0x8E,0x38,0xD3,0x6E,0x90,0x57,0xE3,0x22,0x15,
+ 0x33,0xBD,0xF6,0x43,0x45,0xB5,0x10,0x0A,0xBE,0x2C,0xB4,0x35,
+ 0xB8,0x53,0x8D,0xAD,0xFB,0xA7,0x1F,0x85,0x58,0x41,0x7A,0x79,
+ 0x20,0x68,0xB3,0xE1,0x3D,0x08,0x76,0xBF,0x86,0x0D,0x49,0xE3,
+ 0x82,0x71,0x8C,0xB4,0x8D,0x81,0x84,0xD4,0xE7,0xBE,0x91,0xDC,
+ 0x26,0x39,0x48,0x0F,0x35,0xC4,0xCA,0x65,0xE3,0x40,0x93,0x52,
+ 0x76,0x58,0x7D,0xDD,0x51,0x75,0xDC,0x69,0x61,0xBF,0x47,0x2C,
+ 0x16,0x68,0x2D,0xC9,0x29,0xD3,0xE6,0xC0,0x99,0x48,0xA0,0x9A,
+ 0xC8,0x78,0xC0,0x6D,0x81,0x67,0x12,0x61,0x3F,0x71,0xBA,0x41,
+ 0x1F,0x6C,0x89,0x44,0x03,0xBA,0x3B,0x39,0x60,0xAA,0x28,0x55,
+ 0x59,0xAE,0xB8,0xFA,0xCB,0x6F,0xA5,0x1A,0xF7,0x2B,0xDD,0x52,
+ 0x8A,0x8B,0xE2,0x71,0xA6,0x5E,0x7E,0xD8,0x2E,0x18,0xE0,0x66,
+ 0xDF,0xDD,0x22,0x21,0x99,0x52,0x73,0xA6,0x33,0x20,0x65,0x0E,
+ 0x53,0xE7,0x6B,0x9B,0xC5,0xA3,0x2F,0x97,0x65,0x76,0xD3,0x47,
+ 0x23,0x77,0x12,0xB6,0x11,0x7B,0x24,0xED,0xF1,0xEF,0xC0,0xE2,
+ 0xA3,0x7E,0x67,0x05,0x3E,0x96,0x4D,0x45,0xC2,0x18,0xD1,0x73,
+ 0x9E,0x07,0xF3,0x81,0x6E,0x52,0x63,0xF6,0x20,0x76,0xB9,0x13,
+ 0xD2,0x65,0x30,0x18,0x16,0x09,0x16,0x9E,0x8F,0xF1,0xD2,0x10,
+ 0x5A,0xD3,0xD4,0xAF,0x16,0x61,0xDA,0x55,0x2E,0x18,0x5E,0x14,
+ 0x08,0x54,0x2E,0x2A,0x25,0xA2,0x1A,0x9B,0x8B,0x32,0xA9,0xFD,
+ 0xC2,0x48,0x96,0xE1,0x80,0xCA,0xE9,0x22,0x17,0xBB,0xCE,0x3E,
+ 0x9E,0xED,0xC7,0xF1,0x1F,0xEC,0x17,0x21,0xDC,0x7B,0x82,0x48,
+ 0x8E,0xBB,0x4B,0x9D,0x5B,0x04,0x04,0xDA,0xDB,0x39,0xDF,0x01,
+ 0x40,0xC3,0xAA,0x26,0x23,0x89,0x75,0xC6,0x0B,0xD0,0xA2,0x60,
+ 0x6A,0xF1,0xCC,0x65,0x18,0x98,0x1B,0x52,0xD2,0x74,0x61,0xCC,
+ 0xBD,0x60,0xAE,0xA3,0xA0,0x66,0x6A,0x16,0x34,0x92,0x3F,0x41,
+ 0x40,0x31,0x29,0xC0,0x2C,0x63,0xB2,0x07,0x8D,0xEB,0x94,0xB8,
+ 0xE8,0x47,0x92,0x52,0x93,0x6A,0x1B,0x7E,0x1A,0x61,0xB3,0x1B,
+ 0xF0,0xD6,0x72,0x9B,0xF1,0xB0,0xAF,0xBF,0x3E,0x65,0xEF,0x23,
+ 0x1D,0x6F,0xFF,0x70,0xCD,0x8A,0x4C,0x8A,0xA0,0x72,0x9D,0xBE,
+ 0xD4,0xBB,0x24,0x47,0x4A,0x68,0xB5,0xF5,0xC6,0xD5,0x7A,0xCD,
+ 0xCA,0x06,0x41,0x07,0xAD,0xC2,0x1E,0xE6,0x54,0xA7,0xAD,0x03,
+ 0xD9,0x12,0xC1,0x9C,0x13,0xB1,0xC9,0x0A,0x43,0x8E,0x1E,0x08,
+ 0xCE,0x50,0x82,0x73,0x5F,0xA7,0x55,0x1D,0xD9,0x59,0xAC,0xB5,
+ 0xEA,0x02,0x7F,0x6C,0x5B,0x74,0x96,0x98,0x67,0x24,0xA3,0x0F,
+ 0x15,0xFC,0xA9,0x7D,0x3E,0x67,0xD1,0x70,0xF8,0x97,0xF3,0x67,
+ 0xC5,0x8C,0x88,0x44,0x08,0x02,0xC7,0x2B,
+ };
+ static unsigned char dh4096_g[]={
+ 0x02,
+ };
+
+ BIGNUM *p;
+ BIGNUM *g;
+
+ HASSL_DH *dh = NULL;
+
+ p = BN_bin2bn(dh4096_p, sizeof dh4096_p, NULL);
+ g = BN_bin2bn(dh4096_g, sizeof dh4096_g, NULL);
+
+ if (p && g)
+ dh = ssl_new_dh_fromdata(p, g);
+
+ return dh;
+#else
+ return ssl_get_dh_by_nid(NID_ffdhe4096);
+#endif
+}
+
+static HASSL_DH *ssl_get_tmp_dh(EVP_PKEY *pkey)
+{
+ HASSL_DH *dh = NULL;
+ int type;
+ int keylen = 0;
+
+ type = pkey ? EVP_PKEY_base_id(pkey) : EVP_PKEY_NONE;
+
+ /* The keylen supplied by OpenSSL can only be 512 or 1024.
+ See ssl3_send_server_key_exchange() in ssl/s3_srvr.c
+ */
+ if (type == EVP_PKEY_RSA || type == EVP_PKEY_DSA) {
+ keylen = EVP_PKEY_bits(pkey);
+ }
+
+ if (keylen > global_ssl.default_dh_param) {
+ keylen = global_ssl.default_dh_param;
+ }
+
+ if (keylen >= 4096) {
+ if (!local_dh_4096)
+ local_dh_4096 = ssl_get_dh_4096();
+ dh = local_dh_4096;
+ }
+ else if (keylen >= 2048) {
+ if (!local_dh_2048)
+ local_dh_2048 = ssl_get_dh_2048();
+ dh = local_dh_2048;
+ }
+ else {
+ if (!local_dh_1024)
+ local_dh_1024 = ssl_get_dh_1024();
+ dh = local_dh_1024;
+ }
+
+ return dh;
+}
+
+#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL)
+/* Returns Diffie-Hellman parameters matching the private key length
+ but not exceeding global_ssl.default_dh_param */
+static HASSL_DH *ssl_get_tmp_dh_cbk(SSL *ssl, int export, int keylen)
+{
+ EVP_PKEY *pkey = SSL_get_privatekey(ssl);
+
+ return ssl_get_tmp_dh(pkey);
+}
+#endif
+
+static int ssl_sock_set_tmp_dh(SSL_CTX *ctx, HASSL_DH *dh)
+{
+#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL)
+ return SSL_CTX_set_tmp_dh(ctx, dh);
+#else
+ int retval = 0;
+ HASSL_DH_up_ref(dh);
+
+ retval = SSL_CTX_set0_tmp_dh_pkey(ctx, dh);
+
+ if (!retval)
+ HASSL_DH_free(dh);
+
+ return retval;
+#endif
+}
+
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL)
+static void ssl_sock_set_tmp_dh_from_pkey(SSL_CTX *ctx, EVP_PKEY *pkey)
+{
+ HASSL_DH *dh = NULL;
+ if (pkey && (dh = ssl_get_tmp_dh(pkey))) {
+ HASSL_DH_up_ref(dh);
+ if (!SSL_CTX_set0_tmp_dh_pkey(ctx, dh))
+ HASSL_DH_free(dh);
+ }
+}
+#endif
+
+HASSL_DH *ssl_sock_get_dh_from_bio(BIO *bio)
+{
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL)
+ HASSL_DH *dh = NULL;
+ OSSL_DECODER_CTX *dctx = NULL;
+ const char *format = "PEM";
+ const char *keytype = "DH";
+
+ dctx = OSSL_DECODER_CTX_new_for_pkey(&dh, format, NULL, keytype,
+ OSSL_KEYMGMT_SELECT_DOMAIN_PARAMETERS,
+ NULL, NULL);
+
+ if (dctx == NULL || OSSL_DECODER_CTX_get_num_decoders(dctx) == 0)
+ goto end;
+
+ /* The DH parameters might not be the first section found in the PEM
+ * file so we need to iterate over all of them until we find the right
+ * one.
+ */
+ while (!BIO_eof(bio) && !dh)
+ OSSL_DECODER_from_bio(dctx, bio);
+
+end:
+ OSSL_DECODER_CTX_free(dctx);
+ return dh;
+#else
+ HASSL_DH *dh = NULL;
+
+ dh = PEM_read_bio_DHparams(bio, NULL, NULL, NULL);
+
+ return dh;
+#endif
+}
+
+static HASSL_DH * ssl_sock_get_dh_from_file(const char *filename)
+{
+ HASSL_DH *dh = NULL;
+ BIO *in = BIO_new(BIO_s_file());
+
+ if (in == NULL)
+ goto end;
+
+ if (BIO_read_filename(in, filename) <= 0)
+ goto end;
+
+ dh = ssl_sock_get_dh_from_bio(in);
+
+end:
+ if (in)
+ BIO_free(in);
+
+ ERR_clear_error();
+
+ return dh;
+}
+
+int ssl_sock_load_global_dh_param_from_file(const char *filename)
+{
+ global_dh = ssl_sock_get_dh_from_file(filename);
+
+ if (global_dh) {
+ return 0;
+ }
+
+ return -1;
+}
+#endif
+
+/* This function allocates a sni_ctx and adds it to the ckch_inst */
+static int ckch_inst_add_cert_sni(SSL_CTX *ctx, struct ckch_inst *ckch_inst,
+ struct bind_conf *s, struct ssl_bind_conf *conf,
+ struct pkey_info kinfo, char *name, int order)
+{
+ struct sni_ctx *sc;
+ int wild = 0, neg = 0;
+
+ if (*name == '!') {
+ neg = 1;
+ name++;
+ }
+ if (*name == '*') {
+ wild = 1;
+ name++;
+ }
+ /* !* filter is a nop */
+ if (neg && wild)
+ return order;
+ if (*name) {
+ int j, len;
+ len = strlen(name);
+ for (j = 0; j < len && j < trash.size; j++)
+ trash.area[j] = tolower((unsigned char)name[j]);
+ if (j >= trash.size)
+ return -1;
+ trash.area[j] = 0;
+
+ sc = malloc(sizeof(struct sni_ctx) + len + 1);
+ if (!sc)
+ return -1;
+ memcpy(sc->name.key, trash.area, len + 1);
+ SSL_CTX_up_ref(ctx);
+ sc->ctx = ctx;
+ sc->conf = conf;
+ sc->kinfo = kinfo;
+ sc->order = order++;
+ sc->neg = neg;
+ sc->wild = wild;
+ sc->name.node.leaf_p = NULL;
+ sc->ckch_inst = ckch_inst;
+ LIST_APPEND(&ckch_inst->sni_ctx, &sc->by_ckch_inst);
+ }
+ return order;
+}
+
+/*
+ * Insert the sni_ctxs that are listed in the ckch_inst, in the bind_conf's sni_ctx tree
+ * This function can't return an error.
+ *
+ * *CAUTION*: The caller must lock the sni tree if called in multithreading mode
+ */
+void ssl_sock_load_cert_sni(struct ckch_inst *ckch_inst, struct bind_conf *bind_conf)
+{
+
+ struct sni_ctx *sc0, *sc0b, *sc1;
+ struct ebmb_node *node;
+
+ list_for_each_entry_safe(sc0, sc0b, &ckch_inst->sni_ctx, by_ckch_inst) {
+
+ /* ignore if sc0 was already inserted in a tree */
+ if (sc0->name.node.leaf_p)
+ continue;
+
+ /* Check for duplicates. */
+ if (sc0->wild)
+ node = ebst_lookup(&bind_conf->sni_w_ctx, (char *)sc0->name.key);
+ else
+ node = ebst_lookup(&bind_conf->sni_ctx, (char *)sc0->name.key);
+
+ for (; node; node = ebmb_next_dup(node)) {
+ sc1 = ebmb_entry(node, struct sni_ctx, name);
+ if (sc1->ctx == sc0->ctx && sc1->conf == sc0->conf
+ && sc1->neg == sc0->neg && sc1->wild == sc0->wild) {
+ /* it's a duplicate, we should remove and free it */
+ LIST_DELETE(&sc0->by_ckch_inst);
+ SSL_CTX_free(sc0->ctx);
+ ha_free(&sc0);
+ break;
+ }
+ }
+
+ /* if duplicate, ignore the insertion */
+ if (!sc0)
+ continue;
+
+ if (sc0->wild)
+ ebst_insert(&bind_conf->sni_w_ctx, &sc0->name);
+ else
+ ebst_insert(&bind_conf->sni_ctx, &sc0->name);
+ }
+
+ /* replace the default_ctx if required with the instance's ctx. */
+ if (ckch_inst->is_default) {
+ SSL_CTX_free(bind_conf->default_ctx);
+ SSL_CTX_up_ref(ckch_inst->ctx);
+ bind_conf->default_ctx = ckch_inst->ctx;
+ bind_conf->default_inst = ckch_inst;
+ }
+}
+
+/*
+ * tree used to store the ckchs ordered by filename/bundle name
+ */
+struct eb_root ckchs_tree = EB_ROOT_UNIQUE;
+
+/* tree of crtlist (crt-list/directory) */
+struct eb_root crtlists_tree = EB_ROOT_UNIQUE;
+
+/* Loads Diffie-Hellman parameter from a ckchs to an SSL_CTX.
+ * If there is no DH parameter available in the ckchs, the global
+ * DH parameter is loaded into the SSL_CTX and if there is no
+ * DH parameter available in ckchs nor in global, the default
+ * DH parameters are applied on the SSL_CTX.
+ * Returns a bitfield containing the flags:
+ * ERR_FATAL in any fatal error case
+ * ERR_ALERT if a reason of the error is availabine in err
+ * ERR_WARN if a warning is available into err
+ * The value 0 means there is no error nor warning and
+ * the operation succeed.
+ */
+#ifndef OPENSSL_NO_DH
+static int ssl_sock_load_dh_params(SSL_CTX *ctx, const struct cert_key_and_chain *ckch,
+ const char *path, char **err)
+{
+ int ret = 0;
+ HASSL_DH *dh = NULL;
+
+ if (ckch && ckch->dh) {
+ dh = ckch->dh;
+ if (!ssl_sock_set_tmp_dh(ctx, dh)) {
+ memprintf(err, "%sunable to load the DH parameter specified in '%s'",
+ err && *err ? *err : "", path);
+ memprintf(err, "%s, DH ciphers won't be available.\n",
+ err && *err ? *err : "");
+ ret |= ERR_WARN;
+ goto end;
+ }
+
+ if (ssl_dh_ptr_index >= 0) {
+ /* store a pointer to the DH params to avoid complaining about
+ ssl-default-dh-param not being set for this SSL_CTX */
+ SSL_CTX_set_ex_data(ctx, ssl_dh_ptr_index, dh);
+ }
+ }
+ else if (global_dh) {
+ if (!ssl_sock_set_tmp_dh(ctx, global_dh)) {
+ memprintf(err, "%sunable to use the global DH parameter for certificate '%s'",
+ err && *err ? *err : "", path);
+ memprintf(err, "%s, DH ciphers won't be available.\n",
+ err && *err ? *err : "");
+ ret |= ERR_WARN;
+ goto end;
+ }
+ }
+ else {
+ /* Clear openssl global errors stack */
+ ERR_clear_error();
+
+ /* We do not want DHE ciphers to be added to the cipher list
+ * unless there is an explicit global dh option in the conf.
+ */
+ if (global_ssl.default_dh_param) {
+ if (global_ssl.default_dh_param <= 1024) {
+ /* we are limited to DH parameter of 1024 bits anyway */
+ if (local_dh_1024 == NULL)
+ local_dh_1024 = ssl_get_dh_1024();
+
+ if (local_dh_1024 == NULL) {
+ memprintf(err, "%sunable to load default 1024 bits DH parameter for certificate '%s'.\n",
+ err && *err ? *err : "", path);
+ ret |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ if (!ssl_sock_set_tmp_dh(ctx, local_dh_1024)) {
+ memprintf(err, "%sunable to load default 1024 bits DH parameter for certificate '%s'.\n",
+ err && *err ? *err : "", path);
+ memprintf(err, "%s, DH ciphers won't be available.\n",
+ err && *err ? *err : "");
+ ret |= ERR_WARN;
+ goto end;
+ }
+ }
+ else {
+#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL)
+ SSL_CTX_set_tmp_dh_callback(ctx, ssl_get_tmp_dh_cbk);
+#else
+ ssl_sock_set_tmp_dh_from_pkey(ctx, ckch ? ckch->key : NULL);
+#endif
+ }
+ }
+ }
+
+end:
+ ERR_clear_error();
+ return ret;
+}
+#endif
+
+
+/* Load a certificate chain into an SSL context.
+ * Returns a bitfield containing the flags:
+ * ERR_FATAL in any fatal error case
+ * ERR_ALERT if the reason of the error is available in err
+ * ERR_WARN if a warning is available into err
+ * The caller is responsible of freeing the newly built or newly refcounted
+ * find_chain element.
+ * The value 0 means there is no error nor warning and
+ * the operation succeed.
+ */
+static int ssl_sock_load_cert_chain(const char *path, const struct cert_key_and_chain *ckch,
+ SSL_CTX *ctx, STACK_OF(X509) **find_chain, char **err)
+{
+ int errcode = 0;
+
+ if (find_chain == NULL) {
+ errcode |= ERR_FATAL;
+ goto end;
+ }
+
+ if (!SSL_CTX_use_certificate(ctx, ckch->cert)) {
+ memprintf(err, "%sunable to load SSL certificate into SSL Context '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ if (ckch->chain) {
+ *find_chain = X509_chain_up_ref(ckch->chain);
+ } else {
+ /* Find Certificate Chain in global */
+ struct issuer_chain *issuer;
+ issuer = ssl_get0_issuer_chain(ckch->cert);
+ if (issuer)
+ *find_chain = X509_chain_up_ref(issuer->chain);
+ }
+
+ if (!*find_chain) {
+ /* always put a null chain stack in the SSL_CTX so it does not
+ * try to build the chain from the verify store */
+ *find_chain = sk_X509_new_null();
+ }
+
+ /* Load all certs in the ckch into the ctx_chain for the ssl_ctx */
+#ifdef SSL_CTX_set1_chain
+ if (!SSL_CTX_set1_chain(ctx, *find_chain)) {
+ memprintf(err, "%sunable to load chain certificate into SSL Context '%s'. Make sure you are linking against Openssl >= 1.0.2.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+#else
+ { /* legacy compat (< openssl 1.0.2) */
+ X509 *ca;
+ while ((ca = sk_X509_shift(*find_chain)))
+ if (!SSL_CTX_add_extra_chain_cert(ctx, ca)) {
+ memprintf(err, "%sunable to load chain certificate into SSL Context '%s'.\n",
+ err && *err ? *err : "", path);
+ X509_free(ca);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ }
+#endif
+
+#ifdef SSL_CTX_build_cert_chain
+ /* remove the Root CA from the SSL_CTX if the option is activated */
+ if (global_ssl.skip_self_issued_ca) {
+ if (!SSL_CTX_build_cert_chain(ctx, SSL_BUILD_CHAIN_FLAG_NO_ROOT|SSL_BUILD_CHAIN_FLAG_UNTRUSTED|SSL_BUILD_CHAIN_FLAG_IGNORE_ERROR)) {
+ memprintf(err, "%sunable to load chain certificate into SSL Context '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ }
+#endif
+
+end:
+ return errcode;
+}
+
+
+/* Loads the info in ckch into ctx
+ * Returns a bitfield containing the flags:
+ * ERR_FATAL in any fatal error case
+ * ERR_ALERT if the reason of the error is available in err
+ * ERR_WARN if a warning is available into err
+ * The value 0 means there is no error nor warning and
+ * the operation succeed.
+ */
+static int ssl_sock_put_ckch_into_ctx(const char *path, const struct cert_key_and_chain *ckch, SSL_CTX *ctx, char **err)
+{
+ int errcode = 0;
+ STACK_OF(X509) *find_chain = NULL;
+
+ if (SSL_CTX_use_PrivateKey(ctx, ckch->key) <= 0) {
+ memprintf(err, "%sunable to load SSL private key into SSL Context '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ return errcode;
+ }
+
+ /* Load certificate chain */
+ errcode |= ssl_sock_load_cert_chain(path, ckch, ctx, &find_chain, err);
+ if (errcode & ERR_CODE)
+ goto end;
+
+#ifndef OPENSSL_NO_DH
+ /* store a NULL pointer to indicate we have not yet loaded
+ a custom DH param file */
+ if (ssl_dh_ptr_index >= 0) {
+ SSL_CTX_set_ex_data(ctx, ssl_dh_ptr_index, NULL);
+ }
+
+ errcode |= ssl_sock_load_dh_params(ctx, ckch, path, err);
+ if (errcode & ERR_CODE) {
+ memprintf(err, "%sunable to load DH parameters from file '%s'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+#endif
+
+#ifdef HAVE_SSL_CTX_ADD_SERVER_CUSTOM_EXT
+ if (sctl_ex_index >= 0 && ckch->sctl) {
+ if (ssl_sock_load_sctl(ctx, ckch->sctl) < 0) {
+ memprintf(err, "%s '%s.sctl' is present but cannot be read or parsed'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ }
+#endif
+
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) || defined OPENSSL_IS_BORINGSSL)
+ /* Load OCSP Info into context */
+ if (ckch->ocsp_response) {
+ if (ssl_sock_load_ocsp(ctx, ckch, find_chain) < 0) {
+ memprintf(err, "%s '%s.ocsp' is present and activates OCSP but it is impossible to compute the OCSP certificate ID (maybe the issuer could not be found)'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ }
+#endif
+
+ end:
+ sk_X509_pop_free(find_chain, X509_free);
+ return errcode;
+}
+
+
+/* Loads the info of a ckch built out of a backend certificate into an SSL ctx
+ * Returns a bitfield containing the flags:
+ * ERR_FATAL in any fatal error case
+ * ERR_ALERT if the reason of the error is available in err
+ * ERR_WARN if a warning is available into err
+ * The value 0 means there is no error nor warning and
+ * the operation succeed.
+ */
+static int ssl_sock_put_srv_ckch_into_ctx(const char *path, const struct cert_key_and_chain *ckch,
+ SSL_CTX *ctx, char **err)
+{
+ int errcode = 0;
+ STACK_OF(X509) *find_chain = NULL;
+
+ /* Load the private key */
+ if (SSL_CTX_use_PrivateKey(ctx, ckch->key) <= 0) {
+ memprintf(err, "%sunable to load SSL private key into SSL Context '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ }
+
+ /* Load certificate chain */
+ errcode |= ssl_sock_load_cert_chain(path, ckch, ctx, &find_chain, err);
+ if (errcode & ERR_CODE)
+ goto end;
+
+ if (SSL_CTX_check_private_key(ctx) <= 0) {
+ memprintf(err, "%sinconsistencies between private key and certificate loaded from PEM file '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ }
+
+end:
+ sk_X509_pop_free(find_chain, X509_free);
+ return errcode;
+}
+
+
+/*
+ * This function allocate a ckch_inst and create its snis
+ *
+ * Returns a bitfield containing the flags:
+ * ERR_FATAL in any fatal error case
+ * ERR_ALERT if the reason of the error is available in err
+ * ERR_WARN if a warning is available into err
+ */
+int ckch_inst_new_load_store(const char *path, struct ckch_store *ckchs, struct bind_conf *bind_conf,
+ struct ssl_bind_conf *ssl_conf, char **sni_filter, int fcount, struct ckch_inst **ckchi, char **err)
+{
+ SSL_CTX *ctx;
+ int i;
+ int order = 0;
+ X509_NAME *xname;
+ char *str;
+ EVP_PKEY *pkey;
+ struct pkey_info kinfo = { .sig = TLSEXT_signature_anonymous, .bits = 0 };
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ STACK_OF(GENERAL_NAME) *names;
+#endif
+ struct cert_key_and_chain *ckch;
+ struct ckch_inst *ckch_inst = NULL;
+ int errcode = 0;
+
+ *ckchi = NULL;
+
+ if (!ckchs || !ckchs->ckch)
+ return ERR_FATAL;
+
+ ckch = ckchs->ckch;
+
+ ctx = SSL_CTX_new(SSLv23_server_method());
+ if (!ctx) {
+ memprintf(err, "%sunable to allocate SSL context for cert '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ errcode |= ssl_sock_put_ckch_into_ctx(path, ckch, ctx, err);
+ if (errcode & ERR_CODE)
+ goto error;
+
+ ckch_inst = ckch_inst_new();
+ if (!ckch_inst) {
+ memprintf(err, "%sunable to allocate SSL context for cert '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ pkey = X509_get_pubkey(ckch->cert);
+ if (pkey) {
+ kinfo.bits = EVP_PKEY_bits(pkey);
+ switch(EVP_PKEY_base_id(pkey)) {
+ case EVP_PKEY_RSA:
+ kinfo.sig = TLSEXT_signature_rsa;
+ break;
+ case EVP_PKEY_EC:
+ kinfo.sig = TLSEXT_signature_ecdsa;
+ break;
+ case EVP_PKEY_DSA:
+ kinfo.sig = TLSEXT_signature_dsa;
+ break;
+ }
+ EVP_PKEY_free(pkey);
+ }
+
+ if (fcount) {
+ while (fcount--) {
+ order = ckch_inst_add_cert_sni(ctx, ckch_inst, bind_conf, ssl_conf, kinfo, sni_filter[fcount], order);
+ if (order < 0) {
+ memprintf(err, "%sunable to create a sni context.\n", err && *err ? *err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ }
+ }
+ else {
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ names = X509_get_ext_d2i(ckch->cert, NID_subject_alt_name, NULL, NULL);
+ if (names) {
+ for (i = 0; i < sk_GENERAL_NAME_num(names); i++) {
+ GENERAL_NAME *name = sk_GENERAL_NAME_value(names, i);
+ if (name->type == GEN_DNS) {
+ if (ASN1_STRING_to_UTF8((unsigned char **)&str, name->d.dNSName) >= 0) {
+ order = ckch_inst_add_cert_sni(ctx, ckch_inst, bind_conf, ssl_conf, kinfo, str, order);
+ OPENSSL_free(str);
+ if (order < 0) {
+ memprintf(err, "%sunable to create a sni context.\n", err && *err ? *err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ }
+ }
+ }
+ sk_GENERAL_NAME_pop_free(names, GENERAL_NAME_free);
+ }
+#endif /* SSL_CTRL_SET_TLSEXT_HOSTNAME */
+ xname = X509_get_subject_name(ckch->cert);
+ i = -1;
+ while ((i = X509_NAME_get_index_by_NID(xname, NID_commonName, i)) != -1) {
+ X509_NAME_ENTRY *entry = X509_NAME_get_entry(xname, i);
+ ASN1_STRING *value;
+
+ value = X509_NAME_ENTRY_get_data(entry);
+ if (ASN1_STRING_to_UTF8((unsigned char **)&str, value) >= 0) {
+ order = ckch_inst_add_cert_sni(ctx, ckch_inst, bind_conf, ssl_conf, kinfo, str, order);
+ OPENSSL_free(str);
+ if (order < 0) {
+ memprintf(err, "%sunable to create a sni context.\n", err && *err ? *err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ }
+ }
+ }
+ /* we must not free the SSL_CTX anymore below, since it's already in
+ * the tree, so it will be discovered and cleaned in time.
+ */
+
+#ifndef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ if (bind_conf->default_ctx) {
+ memprintf(err, "%sthis version of openssl cannot load multiple SSL certificates.\n",
+ err && *err ? *err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+#endif
+ if (!bind_conf->default_ctx) {
+ bind_conf->default_ctx = ctx;
+ bind_conf->default_ssl_conf = ssl_conf;
+ ckch_inst->is_default = 1;
+ SSL_CTX_up_ref(ctx);
+ bind_conf->default_inst = ckch_inst;
+ }
+
+ /* Always keep a reference to the newly constructed SSL_CTX in the
+ * instance. This way if the instance has no SNIs, the SSL_CTX will
+ * still be linked. */
+ SSL_CTX_up_ref(ctx);
+ ckch_inst->ctx = ctx;
+
+ /* everything succeed, the ckch instance can be used */
+ ckch_inst->bind_conf = bind_conf;
+ ckch_inst->ssl_conf = ssl_conf;
+ ckch_inst->ckch_store = ckchs;
+
+ SSL_CTX_free(ctx); /* we need to free the ctx since we incremented the refcount where it's used */
+
+ *ckchi = ckch_inst;
+ return errcode;
+
+error:
+ /* free the allocated sni_ctxs */
+ if (ckch_inst) {
+ if (ckch_inst->is_default)
+ SSL_CTX_free(ctx);
+
+ ckch_inst_free(ckch_inst);
+ ckch_inst = NULL;
+ }
+ SSL_CTX_free(ctx);
+
+ return errcode;
+}
+
+
+/*
+ * This function allocate a ckch_inst that will be used on the backend side
+ * (server line)
+ *
+ * Returns a bitfield containing the flags:
+ * ERR_FATAL in any fatal error case
+ * ERR_ALERT if the reason of the error is available in err
+ * ERR_WARN if a warning is available into err
+ */
+int ckch_inst_new_load_srv_store(const char *path, struct ckch_store *ckchs,
+ struct ckch_inst **ckchi, char **err)
+{
+ SSL_CTX *ctx;
+ struct cert_key_and_chain *ckch;
+ struct ckch_inst *ckch_inst = NULL;
+ int errcode = 0;
+
+ *ckchi = NULL;
+
+ if (!ckchs || !ckchs->ckch)
+ return ERR_FATAL;
+
+ ckch = ckchs->ckch;
+
+ ctx = SSL_CTX_new(SSLv23_client_method());
+ if (!ctx) {
+ memprintf(err, "%sunable to allocate SSL context for cert '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ errcode |= ssl_sock_put_srv_ckch_into_ctx(path, ckch, ctx, err);
+ if (errcode & ERR_CODE)
+ goto error;
+
+ ckch_inst = ckch_inst_new();
+ if (!ckch_inst) {
+ memprintf(err, "%sunable to allocate SSL context for cert '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ /* everything succeed, the ckch instance can be used */
+ ckch_inst->bind_conf = NULL;
+ ckch_inst->ssl_conf = NULL;
+ ckch_inst->ckch_store = ckchs;
+ ckch_inst->ctx = ctx;
+ ckch_inst->is_server_instance = 1;
+
+ *ckchi = ckch_inst;
+ return errcode;
+
+error:
+ SSL_CTX_free(ctx);
+
+ return errcode;
+}
+
+/* Returns a set of ERR_* flags possibly with an error in <err>. */
+static int ssl_sock_load_ckchs(const char *path, struct ckch_store *ckchs,
+ struct bind_conf *bind_conf, struct ssl_bind_conf *ssl_conf,
+ char **sni_filter, int fcount, struct ckch_inst **ckch_inst, char **err)
+{
+ int errcode = 0;
+
+ /* we found the ckchs in the tree, we can use it directly */
+ errcode |= ckch_inst_new_load_store(path, ckchs, bind_conf, ssl_conf, sni_filter, fcount, ckch_inst, err);
+
+ if (errcode & ERR_CODE)
+ return errcode;
+
+ ssl_sock_load_cert_sni(*ckch_inst, bind_conf);
+
+ /* succeed, add the instance to the ckch_store's list of instance */
+ LIST_APPEND(&ckchs->ckch_inst, &((*ckch_inst)->by_ckchs));
+ return errcode;
+}
+
+/* This function generates a <struct ckch_inst *> for a <struct server *>, and
+ * fill the SSL_CTX of the server.
+ *
+ * Returns a set of ERR_* flags possibly with an error in <err>. */
+static int ssl_sock_load_srv_ckchs(const char *path, struct ckch_store *ckchs,
+ struct server *server, struct ckch_inst **ckch_inst, char **err)
+{
+ int errcode = 0;
+
+ /* we found the ckchs in the tree, we can use it directly */
+ errcode |= ckch_inst_new_load_srv_store(path, ckchs, ckch_inst, err);
+
+ if (errcode & ERR_CODE)
+ return errcode;
+
+ (*ckch_inst)->server = server;
+ /* Keep the reference to the SSL_CTX in the server. */
+ SSL_CTX_up_ref((*ckch_inst)->ctx);
+ server->ssl_ctx.ctx = (*ckch_inst)->ctx;
+ /* succeed, add the instance to the ckch_store's list of instance */
+ LIST_APPEND(&ckchs->ckch_inst, &((*ckch_inst)->by_ckchs));
+ return errcode;
+}
+
+
+
+
+/* Make sure openssl opens /dev/urandom before the chroot. The work is only
+ * done once. Zero is returned if the operation fails. No error is returned
+ * if the random is said as not implemented, because we expect that openssl
+ * will use another method once needed.
+ */
+int ssl_initialize_random(void)
+{
+ unsigned char random;
+ static int random_initialized = 0;
+
+ if (!random_initialized && RAND_bytes(&random, 1) != 0)
+ random_initialized = 1;
+
+ return random_initialized;
+}
+
+/* Load a crt-list file, this is done in 2 parts:
+ * - store the content of the file in a crtlist structure with crtlist_entry structures
+ * - generate the instances by iterating on entries in the crtlist struct
+ *
+ * Nothing is locked there, this function is used in the configuration parser.
+ *
+ * Returns a set of ERR_* flags possibly with an error in <err>.
+ */
+int ssl_sock_load_cert_list_file(char *file, int dir, struct bind_conf *bind_conf, struct proxy *curproxy, char **err)
+{
+ struct crtlist *crtlist = NULL;
+ struct ebmb_node *eb;
+ struct crtlist_entry *entry = NULL;
+ struct bind_conf_list *bind_conf_node = NULL;
+ int cfgerr = 0;
+ char *end;
+
+ bind_conf_node = malloc(sizeof(*bind_conf_node));
+ if (!bind_conf_node) {
+ memprintf(err, "%sCan't alloc memory!\n", err && *err ? *err : "");
+ cfgerr |= ERR_FATAL | ERR_ALERT;
+ goto error;
+ }
+ bind_conf_node->next = NULL;
+ bind_conf_node->bind_conf = bind_conf;
+
+ /* strip trailing slashes, including first one */
+ for (end = file + strlen(file) - 1; end >= file && *end == '/'; end--)
+ *end = 0;
+
+ /* look for an existing crtlist or create one */
+ eb = ebst_lookup(&crtlists_tree, file);
+ if (eb) {
+ crtlist = ebmb_entry(eb, struct crtlist, node);
+ } else {
+ /* load a crt-list OR a directory */
+ if (dir)
+ cfgerr |= crtlist_load_cert_dir(file, bind_conf, &crtlist, err);
+ else
+ cfgerr |= crtlist_parse_file(file, bind_conf, curproxy, &crtlist, err);
+
+ if (!(cfgerr & ERR_CODE))
+ ebst_insert(&crtlists_tree, &crtlist->node);
+ }
+
+ if (cfgerr & ERR_CODE) {
+ cfgerr |= ERR_FATAL | ERR_ALERT;
+ goto error;
+ }
+
+ /* generates ckch instance from the crtlist_entry */
+ list_for_each_entry(entry, &crtlist->ord_entries, by_crtlist) {
+ struct ckch_store *store;
+ struct ckch_inst *ckch_inst = NULL;
+
+ store = entry->node.key;
+ cfgerr |= ssl_sock_load_ckchs(store->path, store, bind_conf, entry->ssl_conf, entry->filters, entry->fcount, &ckch_inst, err);
+ if (cfgerr & ERR_CODE) {
+ memprintf(err, "error processing line %d in file '%s' : %s", entry->linenum, file, *err);
+ goto error;
+ }
+ LIST_APPEND(&entry->ckch_inst, &ckch_inst->by_crtlist_entry);
+ ckch_inst->crtlist_entry = entry;
+ }
+
+ /* add the bind_conf to the list */
+ bind_conf_node->next = crtlist->bind_conf;
+ crtlist->bind_conf = bind_conf_node;
+
+ return cfgerr;
+error:
+ {
+ struct crtlist_entry *lastentry;
+ struct ckch_inst *inst, *s_inst;
+
+ lastentry = entry; /* which entry we tried to generate last */
+ if (lastentry) {
+ list_for_each_entry(entry, &crtlist->ord_entries, by_crtlist) {
+ if (entry == lastentry) /* last entry we tried to generate, no need to go further */
+ break;
+
+ list_for_each_entry_safe(inst, s_inst, &entry->ckch_inst, by_crtlist_entry) {
+
+ /* this was not generated for this bind_conf, skip */
+ if (inst->bind_conf != bind_conf)
+ continue;
+
+ /* free the sni_ctx and instance */
+ ckch_inst_free(inst);
+ }
+ }
+ }
+ free(bind_conf_node);
+ }
+ return cfgerr;
+}
+
+/* Returns a set of ERR_* flags possibly with an error in <err>. */
+int ssl_sock_load_cert(char *path, struct bind_conf *bind_conf, char **err)
+{
+ struct stat buf;
+ int cfgerr = 0;
+ struct ckch_store *ckchs;
+ struct ckch_inst *ckch_inst = NULL;
+ int found = 0; /* did we found a file to load ? */
+
+ if ((ckchs = ckchs_lookup(path))) {
+ /* we found the ckchs in the tree, we can use it directly */
+ cfgerr |= ssl_sock_load_ckchs(path, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err);
+ found++;
+ } else if (stat(path, &buf) == 0) {
+ found++;
+ if (S_ISDIR(buf.st_mode) == 0) {
+ ckchs = ckchs_load_cert_file(path, err);
+ if (!ckchs)
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ cfgerr |= ssl_sock_load_ckchs(path, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err);
+ } else {
+ cfgerr |= ssl_sock_load_cert_list_file(path, 1, bind_conf, bind_conf->frontend, err);
+ }
+ } else {
+ /* stat failed, could be a bundle */
+ if (global_ssl.extra_files & SSL_GF_BUNDLE) {
+ char fp[MAXPATHLEN+1] = {0};
+ int n = 0;
+
+ /* Load all possible certs and keys in separate ckch_store */
+ for (n = 0; n < SSL_SOCK_NUM_KEYTYPES; n++) {
+ struct stat buf;
+ int ret;
+
+ ret = snprintf(fp, sizeof(fp), "%s.%s", path, SSL_SOCK_KEYTYPE_NAMES[n]);
+ if (ret > sizeof(fp))
+ continue;
+
+ if ((ckchs = ckchs_lookup(fp))) {
+ cfgerr |= ssl_sock_load_ckchs(fp, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err);
+ found++;
+ } else {
+ if (stat(fp, &buf) == 0) {
+ found++;
+ ckchs = ckchs_load_cert_file(fp, err);
+ if (!ckchs)
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ cfgerr |= ssl_sock_load_ckchs(fp, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err);
+ }
+ }
+ }
+#if HA_OPENSSL_VERSION_NUMBER < 0x10101000L
+ if (found) {
+ memprintf(err, "%sCan't load '%s'. Loading a multi certificates bundle requires OpenSSL >= 1.1.1\n",
+ err && *err ? *err : "", path);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+#endif
+ }
+ }
+ if (!found) {
+ memprintf(err, "%sunable to stat SSL certificate from file '%s' : %s.\n",
+ err && *err ? *err : "", path, strerror(errno));
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+
+ return cfgerr;
+}
+
+
+/* Create a full ssl context and ckch instance that will be used for a specific
+ * backend server (server configuration line).
+ * Returns a set of ERR_* flags possibly with an error in <err>.
+ */
+int ssl_sock_load_srv_cert(char *path, struct server *server, int create_if_none, char **err)
+{
+ struct stat buf;
+ int cfgerr = 0;
+ struct ckch_store *ckchs;
+ int found = 0; /* did we found a file to load ? */
+
+ if ((ckchs = ckchs_lookup(path))) {
+ /* we found the ckchs in the tree, we can use it directly */
+ cfgerr |= ssl_sock_load_srv_ckchs(path, ckchs, server, &server->ssl_ctx.inst, err);
+ found++;
+ } else {
+ if (!create_if_none) {
+ memprintf(err, "%sunable to stat SSL certificate '%s'.\n",
+ err && *err ? *err : "", path);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (stat(path, &buf) == 0) {
+ /* We do not manage directories on backend side. */
+ if (S_ISDIR(buf.st_mode) == 0) {
+ ++found;
+ ckchs = ckchs_load_cert_file(path, err);
+ if (!ckchs)
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ cfgerr |= ssl_sock_load_srv_ckchs(path, ckchs, server, &server->ssl_ctx.inst, err);
+ }
+ }
+ }
+ if (!found) {
+ memprintf(err, "%sunable to stat SSL certificate from file '%s' : %s.\n",
+ err && *err ? *err : "", path, strerror(errno));
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+
+out:
+ return cfgerr;
+}
+
+/* Create an initial CTX used to start the SSL connection before switchctx */
+static int
+ssl_sock_initial_ctx(struct bind_conf *bind_conf)
+{
+ SSL_CTX *ctx = NULL;
+ long options =
+ SSL_OP_ALL | /* all known workarounds for bugs */
+ SSL_OP_NO_SSLv2 |
+ SSL_OP_NO_COMPRESSION |
+ SSL_OP_SINGLE_DH_USE |
+ SSL_OP_SINGLE_ECDH_USE |
+ SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION |
+ SSL_OP_PRIORITIZE_CHACHA |
+ SSL_OP_CIPHER_SERVER_PREFERENCE;
+ long mode =
+ SSL_MODE_ENABLE_PARTIAL_WRITE |
+ SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER |
+ SSL_MODE_RELEASE_BUFFERS |
+ SSL_MODE_SMALL_BUFFERS;
+ struct tls_version_filter *conf_ssl_methods = &bind_conf->ssl_conf.ssl_methods;
+ int i, min, max, hole;
+ int flags = MC_SSL_O_ALL;
+ int cfgerr = 0;
+ const int default_min_ver = CONF_TLSV12;
+
+ ctx = SSL_CTX_new(SSLv23_server_method());
+ bind_conf->initial_ctx = ctx;
+
+ if (conf_ssl_methods->flags && (conf_ssl_methods->min || conf_ssl_methods->max))
+ ha_warning("Proxy '%s': no-sslv3/no-tlsv1x are ignored for bind '%s' at [%s:%d]. "
+ "Use only 'ssl-min-ver' and 'ssl-max-ver' to fix.\n",
+ bind_conf->frontend->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ else
+ flags = conf_ssl_methods->flags;
+
+ min = conf_ssl_methods->min;
+ max = conf_ssl_methods->max;
+
+ /* default minimum is TLSV12, */
+ if (!min) {
+ if (!max || (max >= default_min_ver)) {
+ min = default_min_ver;
+ } else {
+ ha_warning("Proxy '%s': Ambiguous configuration for bind '%s' at [%s:%d]: the ssl-min-ver value is not configured and the ssl-max-ver value is lower than the default ssl-min-ver value (%s). "
+ "Setting the ssl-min-ver to %s. Use 'ssl-min-ver' to fix this.\n",
+ bind_conf->frontend->id, bind_conf->arg, bind_conf->file, bind_conf->line, methodVersions[default_min_ver].name, methodVersions[max].name);
+ min = max;
+ }
+ }
+ /* Real min and max should be determinate with configuration and openssl's capabilities */
+ if (min)
+ flags |= (methodVersions[min].flag - 1);
+ if (max)
+ flags |= ~((methodVersions[max].flag << 1) - 1);
+ /* find min, max and holes */
+ min = max = CONF_TLSV_NONE;
+ hole = 0;
+ for (i = CONF_TLSV_MIN; i <= CONF_TLSV_MAX; i++)
+ /* version is in openssl && version not disable in configuration */
+ if (methodVersions[i].option && !(flags & methodVersions[i].flag)) {
+ if (min) {
+ if (hole) {
+ ha_warning("Proxy '%s': SSL/TLS versions range not contiguous for bind '%s' at [%s:%d]. "
+ "Hole find for %s. Use only 'ssl-min-ver' and 'ssl-max-ver' to fix.\n",
+ bind_conf->frontend->id, bind_conf->arg, bind_conf->file, bind_conf->line,
+ methodVersions[hole].name);
+ hole = 0;
+ }
+ max = i;
+ }
+ else {
+ min = max = i;
+ }
+ }
+ else {
+ if (min)
+ hole = i;
+ }
+ if (!min) {
+ ha_alert("Proxy '%s': all SSL/TLS versions are disabled for bind '%s' at [%s:%d].\n",
+ bind_conf->frontend->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr += 1;
+ }
+ /* save real min/max in bind_conf */
+ conf_ssl_methods->min = min;
+ conf_ssl_methods->max = max;
+
+#if (HA_OPENSSL_VERSION_NUMBER < 0x1010000fL)
+ /* Keep force-xxx implementation as it is in older haproxy. It's a
+ precautionary measure to avoid any surprise with older openssl version. */
+ if (min == max)
+ methodVersions[min].ctx_set_version(ctx, SET_SERVER);
+ else
+ for (i = CONF_TLSV_MIN; i <= CONF_TLSV_MAX; i++) {
+ /* clear every version flags in case SSL_CTX_new()
+ * returns an SSL_CTX with disabled versions */
+ SSL_CTX_clear_options(ctx, methodVersions[i].option);
+
+ if (flags & methodVersions[i].flag)
+ options |= methodVersions[i].option;
+
+ }
+#else /* openssl >= 1.1.0 */
+ /* set the max_version is required to cap TLS version or activate new TLS (v1.3) */
+ methodVersions[min].ctx_set_version(ctx, SET_MIN);
+ methodVersions[max].ctx_set_version(ctx, SET_MAX);
+#endif
+
+ if (bind_conf->ssl_options & BC_SSL_O_NO_TLS_TICKETS)
+ options |= SSL_OP_NO_TICKET;
+ if (bind_conf->ssl_options & BC_SSL_O_PREF_CLIE_CIPH)
+ options &= ~SSL_OP_CIPHER_SERVER_PREFERENCE;
+
+#ifdef SSL_OP_NO_RENEGOTIATION
+ options |= SSL_OP_NO_RENEGOTIATION;
+#endif
+
+ SSL_CTX_set_options(ctx, options);
+
+#ifdef SSL_MODE_ASYNC
+ if (global_ssl.async)
+ mode |= SSL_MODE_ASYNC;
+#endif
+ SSL_CTX_set_mode(ctx, mode);
+ if (global_ssl.life_time)
+ SSL_CTX_set_timeout(ctx, global_ssl.life_time);
+
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+# ifdef OPENSSL_IS_BORINGSSL
+ SSL_CTX_set_select_certificate_cb(ctx, ssl_sock_switchctx_cbk);
+ SSL_CTX_set_tlsext_servername_callback(ctx, ssl_sock_switchctx_err_cbk);
+# elif defined(HAVE_SSL_CLIENT_HELLO_CB)
+# if defined(SSL_OP_NO_ANTI_REPLAY)
+ if (bind_conf->ssl_conf.early_data)
+ SSL_CTX_set_options(ctx, SSL_OP_NO_ANTI_REPLAY);
+# endif /* ! SSL_OP_NO_ANTI_REPLAY */
+ SSL_CTX_set_client_hello_cb(ctx, ssl_sock_switchctx_cbk, NULL);
+ SSL_CTX_set_tlsext_servername_callback(ctx, ssl_sock_switchctx_err_cbk);
+# else /* ! OPENSSL_IS_BORINGSSL && ! HAVE_SSL_CLIENT_HELLO_CB */
+ SSL_CTX_set_tlsext_servername_callback(ctx, ssl_sock_switchctx_cbk);
+# endif
+ SSL_CTX_set_tlsext_servername_arg(ctx, bind_conf);
+#endif /* ! SSL_CTRL_SET_TLSEXT_HOSTNAME */
+ return cfgerr;
+}
+
+
+static inline void sh_ssl_sess_free_blocks(struct shared_block *first, struct shared_block *block)
+{
+ if (first == block) {
+ struct sh_ssl_sess_hdr *sh_ssl_sess = (struct sh_ssl_sess_hdr *)first->data;
+ if (first->len > 0)
+ sh_ssl_sess_tree_delete(sh_ssl_sess);
+ }
+}
+
+/* return first block from sh_ssl_sess */
+static inline struct shared_block *sh_ssl_sess_first_block(struct sh_ssl_sess_hdr *sh_ssl_sess)
+{
+ return (struct shared_block *)((unsigned char *)sh_ssl_sess - ((struct shared_block *)NULL)->data);
+
+}
+
+/* store a session into the cache
+ * s_id : session id padded with zero to SSL_MAX_SSL_SESSION_ID_LENGTH
+ * data: asn1 encoded session
+ * data_len: asn1 encoded session length
+ * Returns 1 id session was stored (else 0)
+ */
+static int sh_ssl_sess_store(unsigned char *s_id, unsigned char *data, int data_len)
+{
+ struct shared_block *first;
+ struct sh_ssl_sess_hdr *sh_ssl_sess, *oldsh_ssl_sess;
+
+ first = shctx_row_reserve_hot(ssl_shctx, NULL, data_len + sizeof(struct sh_ssl_sess_hdr));
+ if (!first) {
+ /* Could not retrieve enough free blocks to store that session */
+ return 0;
+ }
+
+ /* STORE the key in the first elem */
+ sh_ssl_sess = (struct sh_ssl_sess_hdr *)first->data;
+ memcpy(sh_ssl_sess->key_data, s_id, SSL_MAX_SSL_SESSION_ID_LENGTH);
+ first->len = sizeof(struct sh_ssl_sess_hdr);
+
+ /* it returns the already existing node
+ or current node if none, never returns null */
+ oldsh_ssl_sess = sh_ssl_sess_tree_insert(sh_ssl_sess);
+ if (oldsh_ssl_sess != sh_ssl_sess) {
+ /* NOTE: Row couldn't be in use because we lock read & write function */
+ /* release the reserved row */
+ first->len = 0; /* the len must be liberated in order not to call the release callback on it */
+ shctx_row_dec_hot(ssl_shctx, first);
+ /* replace the previous session already in the tree */
+ sh_ssl_sess = oldsh_ssl_sess;
+ /* ignore the previous session data, only use the header */
+ first = sh_ssl_sess_first_block(sh_ssl_sess);
+ shctx_row_inc_hot(ssl_shctx, first);
+ first->len = sizeof(struct sh_ssl_sess_hdr);
+ }
+
+ if (shctx_row_data_append(ssl_shctx, first, NULL, data, data_len) < 0) {
+ shctx_row_dec_hot(ssl_shctx, first);
+ return 0;
+ }
+
+ shctx_row_dec_hot(ssl_shctx, first);
+
+ return 1;
+}
+
+/* SSL callback used when a new session is created while connecting to a server */
+static int ssl_sess_new_srv_cb(SSL *ssl, SSL_SESSION *sess)
+{
+ struct connection *conn = SSL_get_ex_data(ssl, ssl_app_data_index);
+ struct server *s;
+
+ s = __objt_server(conn->target);
+
+ /* RWLOCK: only read lock the SSL cache even when writing in it because there is
+ * one cache per thread, it only prevents to flush it from the CLI in
+ * another thread */
+
+ if (!(s->ssl_ctx.options & SRV_SSL_O_NO_REUSE)) {
+ int len;
+ unsigned char *ptr;
+ const char *sni;
+
+ len = i2d_SSL_SESSION(sess, NULL);
+ sni = SSL_get_servername(ssl, TLSEXT_NAMETYPE_host_name);
+ HA_RWLOCK_RDLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.lock);
+ if (s->ssl_ctx.reused_sess[tid].ptr && s->ssl_ctx.reused_sess[tid].allocated_size >= len) {
+ ptr = s->ssl_ctx.reused_sess[tid].ptr;
+ } else {
+ ptr = realloc(s->ssl_ctx.reused_sess[tid].ptr, len);
+ s->ssl_ctx.reused_sess[tid].ptr = ptr;
+ s->ssl_ctx.reused_sess[tid].allocated_size = len;
+ }
+ if (s->ssl_ctx.reused_sess[tid].ptr) {
+ s->ssl_ctx.reused_sess[tid].size = i2d_SSL_SESSION(sess,
+ &ptr);
+ }
+
+ if (s->ssl_ctx.reused_sess[tid].sni) {
+ /* if the new sni is empty or isn' t the same as the old one */
+ if ((!sni) || strcmp(s->ssl_ctx.reused_sess[tid].sni, sni) != 0) {
+ ha_free(&s->ssl_ctx.reused_sess[tid].sni);
+ if (sni)
+ s->ssl_ctx.reused_sess[tid].sni = strdup(sni);
+ }
+ } else if (sni) {
+ /* if there wasn't an old sni but there is a new one */
+ s->ssl_ctx.reused_sess[tid].sni = strdup(sni);
+ }
+ HA_RWLOCK_RDUNLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.lock);
+ } else {
+ HA_RWLOCK_RDLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.lock);
+ ha_free(&s->ssl_ctx.reused_sess[tid].ptr);
+ HA_RWLOCK_RDUNLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.lock);
+ }
+
+ return 0;
+}
+
+
+/* SSL callback used on new session creation */
+int sh_ssl_sess_new_cb(SSL *ssl, SSL_SESSION *sess)
+{
+ unsigned char encsess[SHSESS_MAX_DATA_LEN]; /* encoded session */
+ unsigned char encid[SSL_MAX_SSL_SESSION_ID_LENGTH]; /* encoded id */
+ unsigned char *p;
+ int data_len;
+ unsigned int sid_length;
+ const unsigned char *sid_data;
+
+ /* Session id is already stored in to key and session id is known
+ * so we don't store it to keep size.
+ * note: SSL_SESSION_set1_id is using
+ * a memcpy so we need to use a different pointer
+ * than sid_data or sid_ctx_data to avoid valgrind
+ * complaining.
+ */
+
+ sid_data = SSL_SESSION_get_id(sess, &sid_length);
+
+ /* copy value in an other buffer */
+ memcpy(encid, sid_data, sid_length);
+
+ /* pad with 0 */
+ if (sid_length < SSL_MAX_SSL_SESSION_ID_LENGTH)
+ memset(encid + sid_length, 0, SSL_MAX_SSL_SESSION_ID_LENGTH-sid_length);
+
+ /* force length to zero to avoid ASN1 encoding */
+ SSL_SESSION_set1_id(sess, encid, 0);
+
+ /* force length to zero to avoid ASN1 encoding */
+ SSL_SESSION_set1_id_context(sess, (const unsigned char *)SHCTX_APPNAME, 0);
+
+ /* check if buffer is large enough for the ASN1 encoded session */
+ data_len = i2d_SSL_SESSION(sess, NULL);
+ if (data_len > SHSESS_MAX_DATA_LEN)
+ goto err;
+
+ p = encsess;
+
+ /* process ASN1 session encoding before the lock */
+ i2d_SSL_SESSION(sess, &p);
+
+
+ shctx_lock(ssl_shctx);
+ /* store to cache */
+ sh_ssl_sess_store(encid, encsess, data_len);
+ shctx_unlock(ssl_shctx);
+err:
+ /* reset original length values */
+ SSL_SESSION_set1_id(sess, encid, sid_length);
+ SSL_SESSION_set1_id_context(sess, (const unsigned char *)SHCTX_APPNAME, strlen(SHCTX_APPNAME));
+
+ return 0; /* do not increment session reference count */
+}
+
+/* SSL callback used on lookup an existing session cause none found in internal cache */
+SSL_SESSION *sh_ssl_sess_get_cb(SSL *ssl, __OPENSSL_110_CONST__ unsigned char *key, int key_len, int *do_copy)
+{
+ struct sh_ssl_sess_hdr *sh_ssl_sess;
+ unsigned char data[SHSESS_MAX_DATA_LEN], *p;
+ unsigned char tmpkey[SSL_MAX_SSL_SESSION_ID_LENGTH];
+ SSL_SESSION *sess;
+ struct shared_block *first;
+
+ _HA_ATOMIC_INC(&global.shctx_lookups);
+
+ /* allow the session to be freed automatically by openssl */
+ *do_copy = 0;
+
+ /* tree key is zeros padded sessionid */
+ if (key_len < SSL_MAX_SSL_SESSION_ID_LENGTH) {
+ memcpy(tmpkey, key, key_len);
+ memset(tmpkey + key_len, 0, SSL_MAX_SSL_SESSION_ID_LENGTH - key_len);
+ key = tmpkey;
+ }
+
+ /* lock cache */
+ shctx_lock(ssl_shctx);
+
+ /* lookup for session */
+ sh_ssl_sess = sh_ssl_sess_tree_lookup(key);
+ if (!sh_ssl_sess) {
+ /* no session found: unlock cache and exit */
+ shctx_unlock(ssl_shctx);
+ _HA_ATOMIC_INC(&global.shctx_misses);
+ return NULL;
+ }
+
+ /* sh_ssl_sess (shared_block->data) is at the end of shared_block */
+ first = sh_ssl_sess_first_block(sh_ssl_sess);
+
+ shctx_row_data_get(ssl_shctx, first, data, sizeof(struct sh_ssl_sess_hdr), first->len-sizeof(struct sh_ssl_sess_hdr));
+
+ shctx_unlock(ssl_shctx);
+
+ /* decode ASN1 session */
+ p = data;
+ sess = d2i_SSL_SESSION(NULL, (const unsigned char **)&p, first->len-sizeof(struct sh_ssl_sess_hdr));
+ /* Reset session id and session id contenxt */
+ if (sess) {
+ SSL_SESSION_set1_id(sess, key, key_len);
+ SSL_SESSION_set1_id_context(sess, (const unsigned char *)SHCTX_APPNAME, strlen(SHCTX_APPNAME));
+ }
+
+ return sess;
+}
+
+
+/* SSL callback used to signal session is no more used in internal cache */
+void sh_ssl_sess_remove_cb(SSL_CTX *ctx, SSL_SESSION *sess)
+{
+ struct sh_ssl_sess_hdr *sh_ssl_sess;
+ unsigned char tmpkey[SSL_MAX_SSL_SESSION_ID_LENGTH];
+ unsigned int sid_length;
+ const unsigned char *sid_data;
+ (void)ctx;
+
+ sid_data = SSL_SESSION_get_id(sess, &sid_length);
+ /* tree key is zeros padded sessionid */
+ if (sid_length < SSL_MAX_SSL_SESSION_ID_LENGTH) {
+ memcpy(tmpkey, sid_data, sid_length);
+ memset(tmpkey+sid_length, 0, SSL_MAX_SSL_SESSION_ID_LENGTH - sid_length);
+ sid_data = tmpkey;
+ }
+
+ shctx_lock(ssl_shctx);
+
+ /* lookup for session */
+ sh_ssl_sess = sh_ssl_sess_tree_lookup(sid_data);
+ if (sh_ssl_sess) {
+ /* free session */
+ sh_ssl_sess_tree_delete(sh_ssl_sess);
+ }
+
+ /* unlock cache */
+ shctx_unlock(ssl_shctx);
+}
+
+/* Set session cache mode to server and disable openssl internal cache.
+ * Set shared cache callbacks on an ssl context.
+ * Shared context MUST be firstly initialized */
+void ssl_set_shctx(SSL_CTX *ctx)
+{
+ SSL_CTX_set_session_id_context(ctx, (const unsigned char *)SHCTX_APPNAME, strlen(SHCTX_APPNAME));
+
+ if (!ssl_shctx) {
+ SSL_CTX_set_session_cache_mode(ctx, SSL_SESS_CACHE_OFF);
+ return;
+ }
+
+ SSL_CTX_set_session_cache_mode(ctx, SSL_SESS_CACHE_SERVER |
+ SSL_SESS_CACHE_NO_INTERNAL |
+ SSL_SESS_CACHE_NO_AUTO_CLEAR);
+
+ /* Set callbacks */
+ SSL_CTX_sess_set_new_cb(ctx, sh_ssl_sess_new_cb);
+ SSL_CTX_sess_set_get_cb(ctx, sh_ssl_sess_get_cb);
+ SSL_CTX_sess_set_remove_cb(ctx, sh_ssl_sess_remove_cb);
+}
+
+/*
+ * https://developer.mozilla.org/en-US/docs/Mozilla/Projects/NSS/Key_Log_Format
+ *
+ * The format is:
+ * * <Label> <space> <ClientRandom> <space> <Secret>
+ * We only need to copy the secret as there is a sample fetch for the ClientRandom
+ */
+
+#ifdef HAVE_SSL_KEYLOG
+void SSL_CTX_keylog(const SSL *ssl, const char *line)
+{
+ struct ssl_keylog *keylog;
+ char *lastarg = NULL;
+ char *dst = NULL;
+
+ keylog = SSL_get_ex_data(ssl, ssl_keylog_index);
+ if (!keylog)
+ return;
+
+ lastarg = strrchr(line, ' ');
+ if (lastarg == NULL || ++lastarg == NULL)
+ return;
+
+ dst = pool_alloc(pool_head_ssl_keylog_str);
+ if (!dst)
+ return;
+
+ strncpy(dst, lastarg, SSL_KEYLOG_MAX_SECRET_SIZE-1);
+ dst[SSL_KEYLOG_MAX_SECRET_SIZE-1] = '\0';
+
+ if (strncmp(line, "CLIENT_RANDOM ", strlen("CLIENT RANDOM ")) == 0) {
+ if (keylog->client_random)
+ goto error;
+ keylog->client_random = dst;
+
+ } else if (strncmp(line, "CLIENT_EARLY_TRAFFIC_SECRET ", strlen("CLIENT_EARLY_TRAFFIC_SECRET ")) == 0) {
+ if (keylog->client_early_traffic_secret)
+ goto error;
+ keylog->client_early_traffic_secret = dst;
+
+ } else if (strncmp(line, "CLIENT_HANDSHAKE_TRAFFIC_SECRET ", strlen("CLIENT_HANDSHAKE_TRAFFIC_SECRET ")) == 0) {
+ if(keylog->client_handshake_traffic_secret)
+ goto error;
+ keylog->client_handshake_traffic_secret = dst;
+
+ } else if (strncmp(line, "SERVER_HANDSHAKE_TRAFFIC_SECRET ", strlen("SERVER_HANDSHAKE_TRAFFIC_SECRET ")) == 0) {
+ if (keylog->server_handshake_traffic_secret)
+ goto error;
+ keylog->server_handshake_traffic_secret = dst;
+
+ } else if (strncmp(line, "CLIENT_TRAFFIC_SECRET_0 ", strlen("CLIENT_TRAFFIC_SECRET_0 ")) == 0) {
+ if (keylog->client_traffic_secret_0)
+ goto error;
+ keylog->client_traffic_secret_0 = dst;
+
+ } else if (strncmp(line, "SERVER_TRAFFIC_SECRET_0 ", strlen("SERVER_TRAFFIC_SECRET_0 ")) == 0) {
+ if (keylog->server_traffic_secret_0)
+ goto error;
+ keylog->server_traffic_secret_0 = dst;
+
+ } else if (strncmp(line, "EARLY_EXPORTER_SECRET ", strlen("EARLY_EXPORTER_SECRET ")) == 0) {
+ if (keylog->early_exporter_secret)
+ goto error;
+ keylog->early_exporter_secret = dst;
+
+ } else if (strncmp(line, "EXPORTER_SECRET ", strlen("EXPORTER_SECRET ")) == 0) {
+ if (keylog->exporter_secret)
+ goto error;
+ keylog->exporter_secret = dst;
+ } else {
+ goto error;
+ }
+
+ return;
+
+error:
+ pool_free(pool_head_ssl_keylog_str, dst);
+
+ return;
+}
+#endif
+
+/*
+ * This function applies the SSL configuration on a SSL_CTX
+ * It returns an error code and fills the <err> buffer
+ */
+static int ssl_sock_prepare_ctx(struct bind_conf *bind_conf, struct ssl_bind_conf *ssl_conf, SSL_CTX *ctx, char **err)
+{
+ struct proxy *curproxy = bind_conf->frontend;
+ int cfgerr = 0;
+ int verify = SSL_VERIFY_NONE;
+ struct ssl_bind_conf __maybe_unused *ssl_conf_cur;
+ const char *conf_ciphers;
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ const char *conf_ciphersuites;
+#endif
+ const char *conf_curves = NULL;
+
+ if (ssl_conf) {
+ struct tls_version_filter *conf_ssl_methods = &ssl_conf->ssl_methods;
+ int i, min, max;
+ int flags = MC_SSL_O_ALL;
+
+ /* Real min and max should be determinate with configuration and openssl's capabilities */
+ min = conf_ssl_methods->min ? conf_ssl_methods->min : bind_conf->ssl_conf.ssl_methods.min;
+ max = conf_ssl_methods->max ? conf_ssl_methods->max : bind_conf->ssl_conf.ssl_methods.max;
+ if (min)
+ flags |= (methodVersions[min].flag - 1);
+ if (max)
+ flags |= ~((methodVersions[max].flag << 1) - 1);
+ min = max = CONF_TLSV_NONE;
+ for (i = CONF_TLSV_MIN; i <= CONF_TLSV_MAX; i++)
+ if (methodVersions[i].option && !(flags & methodVersions[i].flag)) {
+ if (min)
+ max = i;
+ else
+ min = max = i;
+ }
+ /* save real min/max */
+ conf_ssl_methods->min = min;
+ conf_ssl_methods->max = max;
+ if (!min) {
+ memprintf(err, "%sProxy '%s': all SSL/TLS versions are disabled for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", bind_conf->frontend->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ switch ((ssl_conf && ssl_conf->verify) ? ssl_conf->verify : bind_conf->ssl_conf.verify) {
+ case SSL_SOCK_VERIFY_NONE:
+ verify = SSL_VERIFY_NONE;
+ break;
+ case SSL_SOCK_VERIFY_OPTIONAL:
+ verify = SSL_VERIFY_PEER;
+ break;
+ case SSL_SOCK_VERIFY_REQUIRED:
+ verify = SSL_VERIFY_PEER|SSL_VERIFY_FAIL_IF_NO_PEER_CERT;
+ break;
+ }
+ SSL_CTX_set_verify(ctx, verify, ssl_sock_bind_verifycbk);
+ if (verify & SSL_VERIFY_PEER) {
+ char *ca_file = (ssl_conf && ssl_conf->ca_file) ? ssl_conf->ca_file : bind_conf->ssl_conf.ca_file;
+ char *ca_verify_file = (ssl_conf && ssl_conf->ca_verify_file) ? ssl_conf->ca_verify_file : bind_conf->ssl_conf.ca_verify_file;
+ char *crl_file = (ssl_conf && ssl_conf->crl_file) ? ssl_conf->crl_file : bind_conf->ssl_conf.crl_file;
+ if (ca_file || ca_verify_file) {
+ /* set CAfile to verify */
+ if (ca_file && !ssl_set_verify_locations_file(ctx, ca_file)) {
+ memprintf(err, "%sProxy '%s': unable to set CA file '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, ca_file, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ if (ca_verify_file && !ssl_set_verify_locations_file(ctx, ca_verify_file)) {
+ memprintf(err, "%sProxy '%s': unable to set CA-no-names file '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, ca_verify_file, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ if (ca_file && !((ssl_conf && ssl_conf->no_ca_names) || bind_conf->ssl_conf.no_ca_names)) {
+ /* set CA names for client cert request, function returns void */
+ SSL_CTX_set_client_CA_list(ctx, SSL_dup_CA_list(ssl_get_client_ca_file(ca_file)));
+ }
+ }
+ else {
+ memprintf(err, "%sProxy '%s': verify is enabled but no CA file specified for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+#ifdef X509_V_FLAG_CRL_CHECK
+ if (crl_file) {
+ X509_STORE *store = SSL_CTX_get_cert_store(ctx);
+
+ if (!ssl_set_cert_crl_file(store, crl_file)) {
+ memprintf(err, "%sProxy '%s': unable to configure CRL file '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, crl_file, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ else {
+ X509_STORE_set_flags(store, X509_V_FLAG_CRL_CHECK|X509_V_FLAG_CRL_CHECK_ALL);
+ }
+ }
+#endif
+ ERR_clear_error();
+ }
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+ if(bind_conf->keys_ref) {
+ if (!SSL_CTX_set_tlsext_ticket_key_evp_cb(ctx, ssl_tlsext_ticket_key_cb)) {
+ memprintf(err, "%sProxy '%s': unable to set callback for TLS ticket validation for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+#endif
+
+ ssl_set_shctx(ctx);
+ conf_ciphers = (ssl_conf && ssl_conf->ciphers) ? ssl_conf->ciphers : bind_conf->ssl_conf.ciphers;
+ if (conf_ciphers &&
+ !SSL_CTX_set_cipher_list(ctx, conf_ciphers)) {
+ memprintf(err, "%sProxy '%s': unable to set SSL cipher list to '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, conf_ciphers, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ conf_ciphersuites = (ssl_conf && ssl_conf->ciphersuites) ? ssl_conf->ciphersuites : bind_conf->ssl_conf.ciphersuites;
+ if (conf_ciphersuites &&
+ !SSL_CTX_set_ciphersuites(ctx, conf_ciphersuites)) {
+ memprintf(err, "%sProxy '%s': unable to set TLS 1.3 cipher suites to '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, conf_ciphersuites, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+#endif
+
+#ifndef OPENSSL_NO_DH
+ if (!local_dh_1024)
+ local_dh_1024 = ssl_get_dh_1024();
+ if (!local_dh_2048)
+ local_dh_2048 = ssl_get_dh_2048();
+ if (!local_dh_4096)
+ local_dh_4096 = ssl_get_dh_4096();
+#endif /* OPENSSL_NO_DH */
+
+ SSL_CTX_set_info_callback(ctx, ssl_sock_infocbk);
+#ifdef SSL_CTRL_SET_MSG_CALLBACK
+ SSL_CTX_set_msg_callback(ctx, ssl_sock_msgcbk);
+#endif
+#ifdef HAVE_SSL_KEYLOG
+ /* only activate the keylog callback if it was required to prevent performance loss */
+ if (global_ssl.keylog > 0)
+ SSL_CTX_set_keylog_callback(ctx, SSL_CTX_keylog);
+#endif
+
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ ssl_conf_cur = NULL;
+ if (ssl_conf && ssl_conf->npn_str)
+ ssl_conf_cur = ssl_conf;
+ else if (bind_conf->ssl_conf.npn_str)
+ ssl_conf_cur = &bind_conf->ssl_conf;
+ if (ssl_conf_cur)
+ SSL_CTX_set_next_protos_advertised_cb(ctx, ssl_sock_advertise_npn_protos, ssl_conf_cur);
+#endif
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ ssl_conf_cur = NULL;
+ if (ssl_conf && ssl_conf->alpn_str)
+ ssl_conf_cur = ssl_conf;
+ else if (bind_conf->ssl_conf.alpn_str)
+ ssl_conf_cur = &bind_conf->ssl_conf;
+ if (ssl_conf_cur)
+ SSL_CTX_set_alpn_select_cb(ctx, ssl_sock_advertise_alpn_protos, ssl_conf_cur);
+#endif
+#if defined(SSL_CTX_set1_curves_list)
+ conf_curves = (ssl_conf && ssl_conf->curves) ? ssl_conf->curves : bind_conf->ssl_conf.curves;
+ if (conf_curves) {
+ if (!SSL_CTX_set1_curves_list(ctx, conf_curves)) {
+ memprintf(err, "%sProxy '%s': unable to set SSL curves list to '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, conf_curves, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ (void)SSL_CTX_set_ecdh_auto(ctx, 1);
+ }
+#endif /* defined(SSL_CTX_set1_curves_list) */
+
+ if (!conf_curves) {
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L)
+#if defined(SSL_CTX_set1_curves_list)
+ const char *ecdhe = (ssl_conf && ssl_conf->ecdhe) ? ssl_conf->ecdhe :
+ (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe :
+ NULL);
+
+ if (ecdhe && SSL_CTX_set1_curves_list(ctx, ecdhe) == 0) {
+ memprintf(err, "%sProxy '%s': unable to set elliptic named curve to '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, ecdhe, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+#endif /* defined(SSL_CTX_set1_curves_list) */
+#else
+#if defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH)
+ int i;
+ EC_KEY *ecdh;
+
+ const char *ecdhe = (ssl_conf && ssl_conf->ecdhe) ? ssl_conf->ecdhe :
+ (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe :
+ ECDHE_DEFAULT_CURVE);
+
+ i = OBJ_sn2nid(ecdhe);
+ if (!i || ((ecdh = EC_KEY_new_by_curve_name(i)) == NULL)) {
+ memprintf(err, "%sProxy '%s': unable to set elliptic named curve to '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, ecdhe, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ else {
+ SSL_CTX_set_tmp_ecdh(ctx, ecdh);
+ EC_KEY_free(ecdh);
+ }
+#endif /* defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH) */
+#endif /* HA_OPENSSL_VERSION_NUMBER >= 0x10101000L */
+ }
+
+ return cfgerr;
+}
+
+
+/*
+ * Prepare the SSL_CTX based on the bind line configuration.
+ * Since the CA file loading is made depending on the verify option of the bind
+ * line, the link between the SSL_CTX and the CA file tree entry is made here.
+ * If we want to create a link between the CA file entry and the corresponding
+ * ckch instance (for CA file hot update), it needs to be done after
+ * ssl_sock_prepare_ctx.
+ * Returns 0 in case of success.
+ */
+int ssl_sock_prep_ctx_and_inst(struct bind_conf *bind_conf, struct ssl_bind_conf *ssl_conf,
+ SSL_CTX *ctx, struct ckch_inst *ckch_inst, char **err)
+{
+ int errcode = 0;
+
+ errcode |= ssl_sock_prepare_ctx(bind_conf, ssl_conf, ctx, err);
+ if (!errcode && ckch_inst)
+ ckch_inst_add_cafile_link(ckch_inst, bind_conf, ssl_conf, NULL);
+
+ return errcode;
+}
+
+static int ssl_sock_srv_hostcheck(const char *pattern, const char *hostname)
+{
+ const char *pattern_wildcard, *pattern_left_label_end, *hostname_left_label_end;
+ size_t prefixlen, suffixlen;
+
+ /* Trivial case */
+ if (strcasecmp(pattern, hostname) == 0)
+ return 1;
+
+ /* The rest of this logic is based on RFC 6125, section 6.4.3
+ * (http://tools.ietf.org/html/rfc6125#section-6.4.3) */
+
+ pattern_wildcard = NULL;
+ pattern_left_label_end = pattern;
+ while (*pattern_left_label_end != '.') {
+ switch (*pattern_left_label_end) {
+ case 0:
+ /* End of label not found */
+ return 0;
+ case '*':
+ /* If there is more than one wildcards */
+ if (pattern_wildcard)
+ return 0;
+ pattern_wildcard = pattern_left_label_end;
+ break;
+ }
+ pattern_left_label_end++;
+ }
+
+ /* If it's not trivial and there is no wildcard, it can't
+ * match */
+ if (!pattern_wildcard)
+ return 0;
+
+ /* Make sure all labels match except the leftmost */
+ hostname_left_label_end = strchr(hostname, '.');
+ if (!hostname_left_label_end
+ || strcasecmp(pattern_left_label_end, hostname_left_label_end) != 0)
+ return 0;
+
+ /* Make sure the leftmost label of the hostname is long enough
+ * that the wildcard can match */
+ if (hostname_left_label_end - hostname < (pattern_left_label_end - pattern) - 1)
+ return 0;
+
+ /* Finally compare the string on either side of the
+ * wildcard */
+ prefixlen = pattern_wildcard - pattern;
+ suffixlen = pattern_left_label_end - (pattern_wildcard + 1);
+ if ((prefixlen && (strncasecmp(pattern, hostname, prefixlen) != 0))
+ || (suffixlen && (strncasecmp(pattern_wildcard + 1, hostname_left_label_end - suffixlen, suffixlen) != 0)))
+ return 0;
+
+ return 1;
+}
+
+static int ssl_sock_srv_verifycbk(int ok, X509_STORE_CTX *ctx)
+{
+ SSL *ssl;
+ struct connection *conn;
+ struct ssl_sock_ctx *ssl_ctx;
+ const char *servername;
+ const char *sni;
+
+ int depth;
+ X509 *cert;
+ STACK_OF(GENERAL_NAME) *alt_names;
+ int i;
+ X509_NAME *cert_subject;
+ char *str;
+
+ if (ok == 0)
+ return ok;
+
+ ssl = X509_STORE_CTX_get_ex_data(ctx, SSL_get_ex_data_X509_STORE_CTX_idx());
+ conn = SSL_get_ex_data(ssl, ssl_app_data_index);
+ ssl_ctx = __conn_get_ssl_sock_ctx(conn);
+
+ /* We're checking if the provided hostnames match the desired one. The
+ * desired hostname comes from the SNI we presented if any, or if not
+ * provided then it may have been explicitly stated using a "verifyhost"
+ * directive. If neither is set, we don't care about the name so the
+ * verification is OK.
+ */
+ servername = SSL_get_servername(ssl_ctx->ssl, TLSEXT_NAMETYPE_host_name);
+ sni = servername;
+ if (!servername) {
+ servername = __objt_server(conn->target)->ssl_ctx.verify_host;
+ if (!servername)
+ return ok;
+ }
+
+ /* We only need to verify the CN on the actual server cert,
+ * not the indirect CAs */
+ depth = X509_STORE_CTX_get_error_depth(ctx);
+ if (depth != 0)
+ return ok;
+
+ /* At this point, the cert is *not* OK unless we can find a
+ * hostname match */
+ ok = 0;
+
+ cert = X509_STORE_CTX_get_current_cert(ctx);
+ /* It seems like this might happen if verify peer isn't set */
+ if (!cert)
+ return ok;
+
+ alt_names = X509_get_ext_d2i(cert, NID_subject_alt_name, NULL, NULL);
+ if (alt_names) {
+ for (i = 0; !ok && i < sk_GENERAL_NAME_num(alt_names); i++) {
+ GENERAL_NAME *name = sk_GENERAL_NAME_value(alt_names, i);
+ if (name->type == GEN_DNS) {
+#if HA_OPENSSL_VERSION_NUMBER < 0x00907000L
+ if (ASN1_STRING_to_UTF8((unsigned char **)&str, name->d.ia5) >= 0) {
+#else
+ if (ASN1_STRING_to_UTF8((unsigned char **)&str, name->d.dNSName) >= 0) {
+#endif
+ ok = ssl_sock_srv_hostcheck(str, servername);
+ OPENSSL_free(str);
+ }
+ }
+ }
+ sk_GENERAL_NAME_pop_free(alt_names, GENERAL_NAME_free);
+ }
+
+ cert_subject = X509_get_subject_name(cert);
+ i = -1;
+ while (!ok && (i = X509_NAME_get_index_by_NID(cert_subject, NID_commonName, i)) != -1) {
+ X509_NAME_ENTRY *entry = X509_NAME_get_entry(cert_subject, i);
+ ASN1_STRING *value;
+ value = X509_NAME_ENTRY_get_data(entry);
+ if (ASN1_STRING_to_UTF8((unsigned char **)&str, value) >= 0) {
+ ok = ssl_sock_srv_hostcheck(str, servername);
+ OPENSSL_free(str);
+ }
+ }
+
+ /* report the mismatch and indicate if SNI was used or not */
+ if (!ok && !conn->err_code)
+ conn->err_code = sni ? CO_ER_SSL_MISMATCH_SNI : CO_ER_SSL_MISMATCH;
+ return ok;
+}
+
+/* prepare ssl context from servers options. Returns an error count */
+int ssl_sock_prepare_srv_ctx(struct server *srv)
+{
+ int cfgerr = 0;
+ SSL_CTX *ctx;
+ /* Automatic memory computations need to know we use SSL there
+ * If this is an internal proxy, don't use it for the computation */
+ if (!(srv->proxy && srv->proxy->cap & PR_CAP_INT))
+ global.ssl_used_backend = 1;
+
+ /* Initiate SSL context for current server */
+ if (!srv->ssl_ctx.reused_sess) {
+ if ((srv->ssl_ctx.reused_sess = calloc(1, global.nbthread*sizeof(*srv->ssl_ctx.reused_sess))) == NULL) {
+ ha_alert("out of memory.\n");
+ cfgerr++;
+ return cfgerr;
+ }
+ }
+ if (srv->use_ssl == 1)
+ srv->xprt = &ssl_sock;
+
+ if (srv->ssl_ctx.client_crt) {
+ const int create_if_none = srv->flags & SRV_F_DYNAMIC ? 0 : 1;
+ char *err = NULL;
+ int err_code = 0;
+
+ /* If there is a crt keyword there, the SSL_CTX will be created here. */
+ err_code = ssl_sock_load_srv_cert(srv->ssl_ctx.client_crt, srv, create_if_none, &err);
+ if (err_code != ERR_NONE) {
+ if ((err_code & ERR_WARN) && !(err_code & ERR_ALERT))
+ ha_warning("%s", err);
+ else
+ ha_alert("%s", err);
+
+ if (err_code & (ERR_FATAL|ERR_ABORT))
+ cfgerr++;
+ }
+ ha_free(&err);
+ }
+
+ ctx = srv->ssl_ctx.ctx;
+
+ /* The context will be uninitialized if there wasn't any "cert" option
+ * in the server line. */
+ if (!ctx) {
+ ctx = SSL_CTX_new(SSLv23_client_method());
+ if (!ctx) {
+ ha_alert("unable to allocate ssl context.\n");
+ cfgerr++;
+ return cfgerr;
+ }
+
+ srv->ssl_ctx.ctx = ctx;
+ }
+
+ cfgerr += ssl_sock_prep_srv_ctx_and_inst(srv, srv->ssl_ctx.ctx, srv->ssl_ctx.inst);
+
+ return cfgerr;
+}
+
+/* Initialize an SSL context that will be used on the backend side.
+ * Returns an error count.
+ */
+static int ssl_sock_prepare_srv_ssl_ctx(const struct server *srv, SSL_CTX *ctx)
+{
+ struct proxy *curproxy = srv->proxy;
+ int cfgerr = 0;
+ long options =
+ SSL_OP_ALL | /* all known workarounds for bugs */
+ SSL_OP_NO_SSLv2 |
+ SSL_OP_NO_COMPRESSION;
+ long mode =
+ SSL_MODE_ENABLE_PARTIAL_WRITE |
+ SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER |
+ SSL_MODE_RELEASE_BUFFERS |
+ SSL_MODE_SMALL_BUFFERS;
+ int verify = SSL_VERIFY_NONE;
+ const struct tls_version_filter *conf_ssl_methods = &srv->ssl_ctx.methods;
+ int i, min, max, hole;
+ int flags = MC_SSL_O_ALL;
+
+ if (conf_ssl_methods->flags && (conf_ssl_methods->min || conf_ssl_methods->max))
+ ha_warning("no-sslv3/no-tlsv1x are ignored for this server. "
+ "Use only 'ssl-min-ver' and 'ssl-max-ver' to fix.\n");
+ else
+ flags = conf_ssl_methods->flags;
+
+ /* Real min and max should be determinate with configuration and openssl's capabilities */
+ if (conf_ssl_methods->min)
+ flags |= (methodVersions[conf_ssl_methods->min].flag - 1);
+ if (conf_ssl_methods->max)
+ flags |= ~((methodVersions[conf_ssl_methods->max].flag << 1) - 1);
+
+ /* find min, max and holes */
+ min = max = CONF_TLSV_NONE;
+ hole = 0;
+ for (i = CONF_TLSV_MIN; i <= CONF_TLSV_MAX; i++)
+ /* version is in openssl && version not disable in configuration */
+ if (methodVersions[i].option && !(flags & methodVersions[i].flag)) {
+ if (min) {
+ if (hole) {
+ ha_warning("%s '%s': SSL/TLS versions range not contiguous for server '%s'. "
+ "Hole find for %s. Use only 'ssl-min-ver' and 'ssl-max-ver' to fix.\n",
+ proxy_type_str(curproxy), curproxy->id, srv->id,
+ methodVersions[hole].name);
+ hole = 0;
+ }
+ max = i;
+ }
+ else {
+ min = max = i;
+ }
+ }
+ else {
+ if (min)
+ hole = i;
+ }
+ if (!min) {
+ ha_alert("%s '%s': all SSL/TLS versions are disabled for server '%s'.\n",
+ proxy_type_str(curproxy), curproxy->id, srv->id);
+ cfgerr += 1;
+ }
+
+#if (HA_OPENSSL_VERSION_NUMBER < 0x1010000fL)
+ /* Keep force-xxx implementation as it is in older haproxy. It's a
+ precautionary measure to avoid any surprise with older openssl version. */
+ if (min == max)
+ methodVersions[min].ctx_set_version(ctx, SET_CLIENT);
+ else
+ for (i = CONF_TLSV_MIN; i <= CONF_TLSV_MAX; i++)
+ if (flags & methodVersions[i].flag)
+ options |= methodVersions[i].option;
+#else /* openssl >= 1.1.0 */
+ /* set the max_version is required to cap TLS version or activate new TLS (v1.3) */
+ methodVersions[min].ctx_set_version(ctx, SET_MIN);
+ methodVersions[max].ctx_set_version(ctx, SET_MAX);
+#endif
+
+ if (srv->ssl_ctx.options & SRV_SSL_O_NO_TLS_TICKETS)
+ options |= SSL_OP_NO_TICKET;
+ SSL_CTX_set_options(ctx, options);
+
+#ifdef SSL_MODE_ASYNC
+ if (global_ssl.async)
+ mode |= SSL_MODE_ASYNC;
+#endif
+ SSL_CTX_set_mode(ctx, mode);
+
+ if (global.ssl_server_verify == SSL_SERVER_VERIFY_REQUIRED)
+ verify = SSL_VERIFY_PEER;
+ switch (srv->ssl_ctx.verify) {
+ case SSL_SOCK_VERIFY_NONE:
+ verify = SSL_VERIFY_NONE;
+ break;
+ case SSL_SOCK_VERIFY_REQUIRED:
+ verify = SSL_VERIFY_PEER;
+ break;
+ }
+ SSL_CTX_set_verify(ctx, verify,
+ (srv->ssl_ctx.verify_host || (verify & SSL_VERIFY_PEER)) ? ssl_sock_srv_verifycbk : NULL);
+ if (verify & SSL_VERIFY_PEER) {
+ if (srv->ssl_ctx.ca_file) {
+ /* set CAfile to verify */
+ if (!ssl_set_verify_locations_file(ctx, srv->ssl_ctx.ca_file)) {
+ ha_alert("unable to set CA file '%s'.\n",
+ srv->ssl_ctx.ca_file);
+ cfgerr++;
+ }
+ }
+ else {
+ if (global.ssl_server_verify == SSL_SERVER_VERIFY_REQUIRED)
+ ha_alert("verify is enabled by default but no CA file specified. If you're running on a LAN where you're certain to trust the server's certificate, please set an explicit 'verify none' statement on the 'server' line, or use 'ssl-server-verify none' in the global section to disable server-side verifications by default.\n");
+ else
+ ha_alert("verify is enabled but no CA file specified.\n");
+ cfgerr++;
+ }
+#ifdef X509_V_FLAG_CRL_CHECK
+ if (srv->ssl_ctx.crl_file) {
+ X509_STORE *store = SSL_CTX_get_cert_store(ctx);
+
+ if (!ssl_set_cert_crl_file(store, srv->ssl_ctx.crl_file)) {
+ ha_alert("unable to configure CRL file '%s'.\n",
+ srv->ssl_ctx.crl_file);
+ cfgerr++;
+ }
+ else {
+ X509_STORE_set_flags(store, X509_V_FLAG_CRL_CHECK|X509_V_FLAG_CRL_CHECK_ALL);
+ }
+ }
+#endif
+ }
+
+ SSL_CTX_set_session_cache_mode(ctx, SSL_SESS_CACHE_CLIENT | SSL_SESS_CACHE_NO_INTERNAL_STORE);
+ SSL_CTX_sess_set_new_cb(ctx, ssl_sess_new_srv_cb);
+ if (srv->ssl_ctx.ciphers &&
+ !SSL_CTX_set_cipher_list(ctx, srv->ssl_ctx.ciphers)) {
+ ha_alert("unable to set SSL cipher list to '%s'.\n",
+ srv->ssl_ctx.ciphers);
+ cfgerr++;
+ }
+
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ if (srv->ssl_ctx.ciphersuites &&
+ !SSL_CTX_set_ciphersuites(ctx, srv->ssl_ctx.ciphersuites)) {
+ ha_alert("unable to set TLS 1.3 cipher suites to '%s'.\n",
+ srv->ssl_ctx.ciphersuites);
+ cfgerr++;
+ }
+#endif
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ if (srv->ssl_ctx.npn_str)
+ SSL_CTX_set_next_proto_select_cb(ctx, ssl_sock_srv_select_protos, (struct server*)srv);
+#endif
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ if (srv->ssl_ctx.alpn_str)
+ SSL_CTX_set_alpn_protos(ctx, (unsigned char *)srv->ssl_ctx.alpn_str, srv->ssl_ctx.alpn_len);
+#endif
+
+
+ return cfgerr;
+}
+
+/*
+ * Prepare the frontend's SSL_CTX based on the server line configuration.
+ * Since the CA file loading is made depending on the verify option of the
+ * server line, the link between the SSL_CTX and the CA file tree entry is
+ * made here.
+ * If we want to create a link between the CA file entry and the corresponding
+ * ckch instance (for CA file hot update), it needs to be done after
+ * ssl_sock_prepare_srv_ssl_ctx.
+ * Returns an error count.
+ */
+int ssl_sock_prep_srv_ctx_and_inst(const struct server *srv, SSL_CTX *ctx,
+ struct ckch_inst *ckch_inst)
+{
+ int cfgerr = 0;
+
+ cfgerr += ssl_sock_prepare_srv_ssl_ctx(srv, ctx);
+ if (!cfgerr && ckch_inst)
+ ckch_inst_add_cafile_link(ckch_inst, NULL, NULL, srv);
+
+ return cfgerr;
+}
+
+
+/*
+ * Create an initial CTX used to start the SSL connections.
+ * May be used by QUIC xprt which makes usage of SSL sessions initialized from SSL_CTXs.
+ * Returns 0 if succeeded, or something >0 if not.
+ */
+#ifdef USE_QUIC
+static int ssl_initial_ctx(struct bind_conf *bind_conf)
+{
+ if (bind_conf->xprt == xprt_get(XPRT_QUIC))
+ return ssl_quic_initial_ctx(bind_conf);
+ else
+ return ssl_sock_initial_ctx(bind_conf);
+}
+#else
+static int ssl_initial_ctx(struct bind_conf *bind_conf)
+{
+ return ssl_sock_initial_ctx(bind_conf);
+}
+#endif
+
+/* Walks down the two trees in bind_conf and prepares all certs. The pointer may
+ * be NULL, in which case nothing is done. Returns the number of errors
+ * encountered.
+ */
+int ssl_sock_prepare_all_ctx(struct bind_conf *bind_conf)
+{
+ struct ebmb_node *node;
+ struct sni_ctx *sni;
+ int err = 0;
+ int errcode = 0;
+ char *errmsg = NULL;
+
+ /* Automatic memory computations need to know we use SSL there */
+ global.ssl_used_frontend = 1;
+
+ /* Create initial_ctx used to start the ssl connection before do switchctx */
+ if (!bind_conf->initial_ctx) {
+ err += ssl_initial_ctx(bind_conf);
+ /* It should not be necessary to call this function, but it's
+ necessary first to check and move all initialisation related
+ to initial_ctx in ssl_initial_ctx. */
+ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, NULL, bind_conf->initial_ctx, NULL, &errmsg);
+ }
+ if (bind_conf->default_ctx) {
+ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, bind_conf->default_ssl_conf, bind_conf->default_ctx, bind_conf->default_inst, &errmsg);
+ }
+
+ node = ebmb_first(&bind_conf->sni_ctx);
+ while (node) {
+ sni = ebmb_entry(node, struct sni_ctx, name);
+ if (!sni->order && sni->ctx != bind_conf->default_ctx) {
+ /* only initialize the CTX on its first occurrence and
+ if it is not the default_ctx */
+ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, sni->conf, sni->ctx, sni->ckch_inst, &errmsg);
+ }
+ node = ebmb_next(node);
+ }
+
+ node = ebmb_first(&bind_conf->sni_w_ctx);
+ while (node) {
+ sni = ebmb_entry(node, struct sni_ctx, name);
+ if (!sni->order && sni->ctx != bind_conf->default_ctx) {
+ /* only initialize the CTX on its first occurrence and
+ if it is not the default_ctx */
+ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, sni->conf, sni->ctx, sni->ckch_inst, &errmsg);
+ }
+ node = ebmb_next(node);
+ }
+
+ if (errcode & ERR_WARN) {
+ ha_warning("%s", errmsg);
+ } else if (errcode & ERR_CODE) {
+ ha_alert("%s", errmsg);
+ err++;
+ }
+
+ free(errmsg);
+ return err;
+}
+
+/* Prepares all the contexts for a bind_conf and allocates the shared SSL
+ * context if needed. Returns < 0 on error, 0 on success. The warnings and
+ * alerts are directly emitted since the rest of the stack does it below.
+ */
+int ssl_sock_prepare_bind_conf(struct bind_conf *bind_conf)
+{
+ struct proxy *px = bind_conf->frontend;
+ int alloc_ctx;
+ int err;
+
+ if (!(bind_conf->options & BC_O_USE_SSL)) {
+ if (bind_conf->default_ctx) {
+ ha_warning("Proxy '%s': A certificate was specified but SSL was not enabled on bind '%s' at [%s:%d] (use 'ssl').\n",
+ px->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ }
+ return 0;
+ }
+ if (!bind_conf->default_ctx) {
+ if (bind_conf->strict_sni && !(bind_conf->options & BC_O_GENERATE_CERTS)) {
+ ha_warning("Proxy '%s': no SSL certificate specified for bind '%s' at [%s:%d], ssl connections will fail (use 'crt').\n",
+ px->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ }
+ else {
+ ha_alert("Proxy '%s': no SSL certificate specified for bind '%s' at [%s:%d] (use 'crt').\n",
+ px->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ return -1;
+ }
+ }
+ if (!ssl_shctx && global.tune.sslcachesize) {
+ alloc_ctx = shctx_init(&ssl_shctx, global.tune.sslcachesize,
+ sizeof(struct sh_ssl_sess_hdr) + SHSESS_BLOCK_MIN_SIZE, -1,
+ sizeof(*sh_ssl_sess_tree), (global.nbthread > 1));
+ if (alloc_ctx <= 0) {
+ if (alloc_ctx == SHCTX_E_INIT_LOCK)
+ ha_alert("Unable to initialize the lock for the shared SSL session cache. You can retry using the global statement 'tune.ssl.force-private-cache' but it could increase CPU usage due to renegotiations if nbproc > 1.\n");
+ else
+ ha_alert("Unable to allocate SSL session cache.\n");
+ return -1;
+ }
+ /* free block callback */
+ ssl_shctx->free_block = sh_ssl_sess_free_blocks;
+ /* init the root tree within the extra space */
+ sh_ssl_sess_tree = (void *)ssl_shctx + sizeof(struct shared_context);
+ *sh_ssl_sess_tree = EB_ROOT_UNIQUE;
+ }
+ err = 0;
+ /* initialize all certificate contexts */
+ err += ssl_sock_prepare_all_ctx(bind_conf);
+
+ /* initialize CA variables if the certificates generation is enabled */
+ err += ssl_sock_load_ca(bind_conf);
+
+ return -err;
+}
+
+/* release ssl context allocated for servers. Most of the field free here
+ * must also be allocated in srv_ssl_settings_cpy() */
+void ssl_sock_free_srv_ctx(struct server *srv)
+{
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ ha_free(&srv->ssl_ctx.alpn_str);
+#endif
+#ifdef OPENSSL_NPN_NEGOTIATED
+ ha_free(&srv->ssl_ctx.npn_str);
+#endif
+ if (srv->ssl_ctx.reused_sess) {
+ int i;
+
+ for (i = 0; i < global.nbthread; i++) {
+ ha_free(&srv->ssl_ctx.reused_sess[i].ptr);
+ ha_free(&srv->ssl_ctx.reused_sess[i].sni);
+ }
+ ha_free(&srv->ssl_ctx.reused_sess);
+ }
+
+ if (srv->ssl_ctx.ctx) {
+ SSL_CTX_free(srv->ssl_ctx.ctx);
+ srv->ssl_ctx.ctx = NULL;
+ }
+
+ ha_free(&srv->ssl_ctx.ca_file);
+ ha_free(&srv->ssl_ctx.crl_file);
+ ha_free(&srv->ssl_ctx.client_crt);
+ ha_free(&srv->ssl_ctx.verify_host);
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ ha_free(&srv->sni_expr);
+ release_sample_expr(srv->ssl_ctx.sni);
+ srv->ssl_ctx.sni = NULL;
+#endif
+ ha_free(&srv->ssl_ctx.ciphers);
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ ha_free(&srv->ssl_ctx.ciphersuites);
+#endif
+ /* If there is a certificate we must unlink the ckch instance */
+ ckch_inst_free(srv->ssl_ctx.inst);
+}
+
+/* Walks down the two trees in bind_conf and frees all the certs. The pointer may
+ * be NULL, in which case nothing is done. The default_ctx is nullified too.
+ */
+void ssl_sock_free_all_ctx(struct bind_conf *bind_conf)
+{
+ struct ebmb_node *node, *back;
+ struct sni_ctx *sni;
+
+ node = ebmb_first(&bind_conf->sni_ctx);
+ while (node) {
+ sni = ebmb_entry(node, struct sni_ctx, name);
+ back = ebmb_next(node);
+ ebmb_delete(node);
+ SSL_CTX_free(sni->ctx);
+ LIST_DELETE(&sni->by_ckch_inst);
+ free(sni);
+ node = back;
+ }
+
+ node = ebmb_first(&bind_conf->sni_w_ctx);
+ while (node) {
+ sni = ebmb_entry(node, struct sni_ctx, name);
+ back = ebmb_next(node);
+ ebmb_delete(node);
+ SSL_CTX_free(sni->ctx);
+ LIST_DELETE(&sni->by_ckch_inst);
+ free(sni);
+ node = back;
+ }
+
+ SSL_CTX_free(bind_conf->initial_ctx);
+ bind_conf->initial_ctx = NULL;
+ SSL_CTX_free(bind_conf->default_ctx);
+ bind_conf->default_ctx = NULL;
+ bind_conf->default_inst = NULL;
+ bind_conf->default_ssl_conf = NULL;
+}
+
+
+void ssl_sock_deinit()
+{
+ crtlist_deinit(); /* must be free'd before the ckchs */
+ ckch_deinit();
+}
+REGISTER_POST_DEINIT(ssl_sock_deinit);
+
+/* Destroys all the contexts for a bind_conf. This is used during deinit(). */
+void ssl_sock_destroy_bind_conf(struct bind_conf *bind_conf)
+{
+ ssl_sock_free_ca(bind_conf);
+ ssl_sock_free_all_ctx(bind_conf);
+ ssl_sock_free_ssl_conf(&bind_conf->ssl_conf);
+ free(bind_conf->ca_sign_file);
+ free(bind_conf->ca_sign_pass);
+ if (bind_conf->keys_ref && !--bind_conf->keys_ref->refcount) {
+ free(bind_conf->keys_ref->filename);
+ free(bind_conf->keys_ref->tlskeys);
+ LIST_DELETE(&bind_conf->keys_ref->list);
+ free(bind_conf->keys_ref);
+ }
+ bind_conf->keys_ref = NULL;
+ bind_conf->ca_sign_pass = NULL;
+ bind_conf->ca_sign_file = NULL;
+}
+
+/* Load CA cert file and private key used to generate certificates */
+int
+ssl_sock_load_ca(struct bind_conf *bind_conf)
+{
+ struct proxy *px = bind_conf->frontend;
+ struct cert_key_and_chain *ckch = NULL;
+ int ret = 0;
+ char *err = NULL;
+
+ if (!(bind_conf->options & BC_O_GENERATE_CERTS))
+ return ret;
+
+#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES)
+ if (global_ssl.ctx_cache) {
+ ssl_ctx_lru_tree = lru64_new(global_ssl.ctx_cache);
+ }
+ ssl_ctx_lru_seed = (unsigned int)time(NULL);
+ ssl_ctx_serial = now_ms;
+#endif
+
+ if (!bind_conf->ca_sign_file) {
+ ha_alert("Proxy '%s': cannot enable certificate generation, "
+ "no CA certificate File configured at [%s:%d].\n",
+ px->id, bind_conf->file, bind_conf->line);
+ goto failed;
+ }
+
+ /* Allocate cert structure */
+ ckch = calloc(1, sizeof(*ckch));
+ if (!ckch) {
+ ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain allocation failure\n",
+ px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line);
+ goto failed;
+ }
+
+ /* Try to parse file */
+ if (ssl_sock_load_files_into_ckch(bind_conf->ca_sign_file, ckch, &err)) {
+ ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain loading failed: %s\n",
+ px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line, err);
+ free(err);
+ goto failed;
+ }
+
+ /* Fail if missing cert or pkey */
+ if ((!ckch->cert) || (!ckch->key)) {
+ ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain missing certificate or private key\n",
+ px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line);
+ goto failed;
+ }
+
+ /* Final assignment to bind */
+ bind_conf->ca_sign_ckch = ckch;
+ return ret;
+
+ failed:
+ if (ckch) {
+ ssl_sock_free_cert_key_and_chain_contents(ckch);
+ free(ckch);
+ }
+
+ bind_conf->options &= ~BC_O_GENERATE_CERTS;
+ ret++;
+ return ret;
+}
+
+/* Release CA cert and private key used to generate certificated */
+void
+ssl_sock_free_ca(struct bind_conf *bind_conf)
+{
+ if (bind_conf->ca_sign_ckch) {
+ ssl_sock_free_cert_key_and_chain_contents(bind_conf->ca_sign_ckch);
+ ha_free(&bind_conf->ca_sign_ckch);
+ }
+}
+
+/*
+ * Try to allocate the BIO and SSL session objects of <conn> connection with <bio> and
+ * <ssl> as addresses, <bio_meth> as BIO method and <ssl_ctx> as SSL context inherited settings.
+ * Connect the allocated BIO to the allocated SSL session. Also set <ctx> as address of custom
+ * data for the BIO and store <conn> as user data of the SSL session object.
+ * This is the responsibility of the caller to check the validity of all the pointers passed
+ * as parameters to this function.
+ * Return 0 if succeeded, -1 if not. If failed, sets the ->err_code member of <conn> to
+ * CO_ER_SSL_NO_MEM.
+ */
+int ssl_bio_and_sess_init(struct connection *conn, SSL_CTX *ssl_ctx,
+ SSL **ssl, BIO **bio, BIO_METHOD *bio_meth, void *ctx)
+{
+ int retry = 1;
+
+ retry:
+ /* Alloc a new SSL session. */
+ *ssl = SSL_new(ssl_ctx);
+ if (!*ssl) {
+ if (!retry--)
+ goto err;
+
+ pool_gc(NULL);
+ goto retry;
+ }
+
+ *bio = BIO_new(bio_meth);
+ if (!*bio) {
+ SSL_free(*ssl);
+ *ssl = NULL;
+ if (!retry--)
+ goto err;
+
+ pool_gc(NULL);
+ goto retry;
+ }
+
+ BIO_set_data(*bio, ctx);
+ SSL_set_bio(*ssl, *bio, *bio);
+
+ /* set connection pointer. */
+ if (!SSL_set_ex_data(*ssl, ssl_app_data_index, conn)) {
+ SSL_free(*ssl);
+ *ssl = NULL;
+ if (!retry--)
+ goto err;
+
+ pool_gc(NULL);
+ goto retry;
+ }
+
+ return 0;
+
+ err:
+ conn->err_code = CO_ER_SSL_NO_MEM;
+ return -1;
+}
+
+/* This function is called when all the XPRT have been initialized. We can
+ * now attempt to start the SSL handshake.
+ */
+static int ssl_sock_start(struct connection *conn, void *xprt_ctx)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ if (ctx->xprt->start) {
+ int ret;
+
+ ret = ctx->xprt->start(conn, ctx->xprt_ctx);
+ if (ret < 0)
+ return ret;
+ }
+ tasklet_wakeup(ctx->wait_event.tasklet);
+
+ return 0;
+}
+
+/*
+ * This function is called if SSL * context is not yet allocated. The function
+ * is designed to be called before any other data-layer operation and sets the
+ * handshake flag on the connection. It is safe to call it multiple times.
+ * It returns 0 on success and -1 in error case.
+ */
+static int ssl_sock_init(struct connection *conn, void **xprt_ctx)
+{
+ struct ssl_sock_ctx *ctx;
+ /* already initialized */
+ if (*xprt_ctx)
+ return 0;
+
+ ctx = pool_alloc(ssl_sock_ctx_pool);
+ if (!ctx) {
+ conn->err_code = CO_ER_SSL_NO_MEM;
+ return -1;
+ }
+ ctx->wait_event.tasklet = tasklet_new();
+ if (!ctx->wait_event.tasklet) {
+ conn->err_code = CO_ER_SSL_NO_MEM;
+ pool_free(ssl_sock_ctx_pool, ctx);
+ return -1;
+ }
+ ctx->wait_event.tasklet->process = ssl_sock_io_cb;
+ ctx->wait_event.tasklet->context = ctx;
+ ctx->wait_event.tasklet->state |= TASK_HEAVY; // assign it to the bulk queue during handshake
+ ctx->wait_event.events = 0;
+ ctx->sent_early_data = 0;
+ ctx->early_buf = BUF_NULL;
+ ctx->conn = conn;
+ ctx->subs = NULL;
+ ctx->xprt_st = 0;
+ ctx->xprt_ctx = NULL;
+ ctx->error_code = 0;
+
+ /* Only work with sockets for now, this should be adapted when we'll
+ * add QUIC support.
+ */
+ ctx->xprt = xprt_get(XPRT_RAW);
+ if (ctx->xprt->init) {
+ if (ctx->xprt->init(conn, &ctx->xprt_ctx) != 0)
+ goto err;
+ }
+
+ if (global.maxsslconn && global.sslconns >= global.maxsslconn) {
+ conn->err_code = CO_ER_SSL_TOO_MANY;
+ goto err;
+ }
+
+ /* If it is in client mode initiate SSL session
+ in connect state otherwise accept state */
+ if (objt_server(conn->target)) {
+ if (ssl_bio_and_sess_init(conn, __objt_server(conn->target)->ssl_ctx.ctx,
+ &ctx->ssl, &ctx->bio, ha_meth, ctx) == -1)
+ goto err;
+
+ SSL_set_connect_state(ctx->ssl);
+ HA_RWLOCK_RDLOCK(SSL_SERVER_LOCK, &(__objt_server(conn->target)->ssl_ctx.lock));
+ if (__objt_server(conn->target)->ssl_ctx.reused_sess[tid].ptr) {
+ const unsigned char *ptr = __objt_server(conn->target)->ssl_ctx.reused_sess[tid].ptr;
+ SSL_SESSION *sess = d2i_SSL_SESSION(NULL, &ptr, __objt_server(conn->target)->ssl_ctx.reused_sess[tid].size);
+ if (sess && !SSL_set_session(ctx->ssl, sess)) {
+ SSL_SESSION_free(sess);
+ ha_free(&__objt_server(conn->target)->ssl_ctx.reused_sess[tid].ptr);
+ } else if (sess) {
+ SSL_SESSION_free(sess);
+ }
+ }
+ HA_RWLOCK_RDUNLOCK(SSL_SERVER_LOCK, &(__objt_server(conn->target)->ssl_ctx.lock));
+
+ /* leave init state and start handshake */
+ conn->flags |= CO_FL_SSL_WAIT_HS | CO_FL_WAIT_L6_CONN;
+
+ _HA_ATOMIC_INC(&global.sslconns);
+ _HA_ATOMIC_INC(&global.totalsslconns);
+ *xprt_ctx = ctx;
+ return 0;
+ }
+ else if (objt_listener(conn->target)) {
+ struct bind_conf *bc = __objt_listener(conn->target)->bind_conf;
+
+ if (ssl_bio_and_sess_init(conn, bc->initial_ctx,
+ &ctx->ssl, &ctx->bio, ha_meth, ctx) == -1)
+ goto err;
+
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ if (bc->ssl_conf.early_data) {
+ b_alloc(&ctx->early_buf);
+ SSL_set_max_early_data(ctx->ssl,
+ /* Only allow early data if we managed to allocate
+ * a buffer.
+ */
+ (!b_is_null(&ctx->early_buf)) ?
+ global.tune.bufsize - global.tune.maxrewrite : 0);
+ }
+#endif
+
+ SSL_set_accept_state(ctx->ssl);
+
+ /* leave init state and start handshake */
+ conn->flags |= CO_FL_SSL_WAIT_HS | CO_FL_WAIT_L6_CONN;
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ if (bc->ssl_conf.early_data)
+ conn->flags |= CO_FL_EARLY_SSL_HS;
+#endif
+
+ _HA_ATOMIC_INC(&global.sslconns);
+ _HA_ATOMIC_INC(&global.totalsslconns);
+ *xprt_ctx = ctx;
+ return 0;
+ }
+ /* don't know how to handle such a target */
+ conn->err_code = CO_ER_SSL_NO_TARGET;
+err:
+ if (ctx && ctx->wait_event.tasklet)
+ tasklet_free(ctx->wait_event.tasklet);
+ pool_free(ssl_sock_ctx_pool, ctx);
+ return -1;
+}
+
+
+/* This is the callback which is used when an SSL handshake is pending. It
+ * updates the FD status if it wants some polling before being called again.
+ * It returns 0 if it fails in a fatal way or needs to poll to go further,
+ * otherwise it returns non-zero and removes itself from the connection's
+ * flags (the bit is provided in <flag> by the caller).
+ */
+static int ssl_sock_handshake(struct connection *conn, unsigned int flag)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+ int ret;
+ struct ssl_counters *counters = NULL;
+ struct ssl_counters *counters_px = NULL;
+ struct listener *li;
+ struct server *srv;
+ socklen_t lskerr;
+ int skerr;
+
+
+ if (!conn_ctrl_ready(conn))
+ return 0;
+
+ /* get counters */
+ switch (obj_type(conn->target)) {
+ case OBJ_TYPE_LISTENER:
+ li = __objt_listener(conn->target);
+ counters = EXTRA_COUNTERS_GET(li->extra_counters, &ssl_stats_module);
+ counters_px = EXTRA_COUNTERS_GET(li->bind_conf->frontend->extra_counters_fe,
+ &ssl_stats_module);
+ break;
+
+ case OBJ_TYPE_SERVER:
+ srv = __objt_server(conn->target);
+ counters = EXTRA_COUNTERS_GET(srv->extra_counters, &ssl_stats_module);
+ counters_px = EXTRA_COUNTERS_GET(srv->proxy->extra_counters_be,
+ &ssl_stats_module);
+ break;
+
+ default:
+ break;
+ }
+
+ if (!ctx)
+ goto out_error;
+
+ /* don't start calculating a handshake on a dead connection */
+ if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH))
+ goto out_error;
+
+ /* FIXME/WT: for now we don't have a clear way to inspect the connection
+ * status from the lower layers, so let's check the FD directly. Ideally
+ * the xprt layers should provide some status indicating their knowledge
+ * of shutdowns or error.
+ */
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ skerr = 0;
+ lskerr = sizeof(skerr);
+ if ((getsockopt(conn->handle.fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) < 0) ||
+ skerr != 0)
+ goto out_error;
+
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ /*
+ * Check if we have early data. If we do, we have to read them
+ * before SSL_do_handshake() is called, And there's no way to
+ * detect early data, except to try to read them
+ */
+ if (conn->flags & CO_FL_EARLY_SSL_HS) {
+ size_t read_data = 0;
+
+ while (1) {
+ ret = SSL_read_early_data(ctx->ssl,
+ b_tail(&ctx->early_buf), b_room(&ctx->early_buf),
+ &read_data);
+ if (ret == SSL_READ_EARLY_DATA_ERROR)
+ goto check_error;
+ if (read_data > 0) {
+ conn->flags |= CO_FL_EARLY_DATA;
+ b_add(&ctx->early_buf, read_data);
+ }
+ if (ret == SSL_READ_EARLY_DATA_FINISH) {
+ conn->flags &= ~CO_FL_EARLY_SSL_HS;
+ if (!b_data(&ctx->early_buf))
+ b_free(&ctx->early_buf);
+ break;
+ }
+ }
+ }
+#endif
+ /* If we use SSL_do_handshake to process a reneg initiated by
+ * the remote peer, it sometimes returns SSL_ERROR_SSL.
+ * Usually SSL_write and SSL_read are used and process implicitly
+ * the reneg handshake.
+ * Here we use SSL_peek as a workaround for reneg.
+ */
+ if (!(conn->flags & CO_FL_WAIT_L6_CONN) && SSL_renegotiate_pending(ctx->ssl)) {
+ char c;
+
+ ret = SSL_peek(ctx->ssl, &c, 1);
+ if (ret <= 0) {
+ /* handshake may have not been completed, let's find why */
+ ret = SSL_get_error(ctx->ssl, ret);
+
+ if (ret == SSL_ERROR_WANT_WRITE) {
+ /* SSL handshake needs to write, L4 connection may not be ready */
+ if (!(ctx->wait_event.events & SUB_RETRY_SEND))
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_SEND, &ctx->wait_event);
+ return 0;
+ }
+ else if (ret == SSL_ERROR_WANT_READ) {
+ /* handshake may have been completed but we have
+ * no more data to read.
+ */
+ if (!SSL_renegotiate_pending(ctx->ssl)) {
+ ret = 1;
+ goto reneg_ok;
+ }
+ /* SSL handshake needs to read, L4 connection is ready */
+ if (!(ctx->wait_event.events & SUB_RETRY_RECV))
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_RECV, &ctx->wait_event);
+ return 0;
+ }
+#ifdef SSL_MODE_ASYNC
+ else if (ret == SSL_ERROR_WANT_ASYNC) {
+ ssl_async_process_fds(ctx);
+ return 0;
+ }
+#endif
+ else if (ret == SSL_ERROR_SYSCALL) {
+ /* if errno is null, then connection was successfully established */
+ if (!errno && conn->flags & CO_FL_WAIT_L4_CONN)
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ if (!conn->err_code) {
+#if defined(OPENSSL_IS_BORINGSSL) || defined(LIBRESSL_VERSION_NUMBER)
+ /* do not handle empty handshakes in BoringSSL or LibreSSL */
+ conn->err_code = CO_ER_SSL_HANDSHAKE;
+#else
+ int empty_handshake;
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x1010000fL)
+ /* use SSL_get_state() in OpenSSL >= 1.1.0; SSL_state() is broken */
+ OSSL_HANDSHAKE_STATE state = SSL_get_state((SSL *)ctx->ssl);
+ empty_handshake = state == TLS_ST_BEFORE;
+#else
+ /* access packet_length directly in OpenSSL <= 1.0.2; SSL_state() is broken */
+ empty_handshake = !ctx->ssl->packet_length;
+#endif
+ if (empty_handshake) {
+ if (!errno) {
+ if (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT)
+ conn->err_code = CO_ER_SSL_HANDSHAKE_HB;
+ else
+ conn->err_code = CO_ER_SSL_EMPTY;
+ }
+ else {
+ if (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT)
+ conn->err_code = CO_ER_SSL_HANDSHAKE_HB;
+ else
+ conn->err_code = CO_ER_SSL_ABORT;
+ }
+ }
+ else {
+ if (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT)
+ conn->err_code = CO_ER_SSL_HANDSHAKE_HB;
+ else
+ conn->err_code = CO_ER_SSL_HANDSHAKE;
+ }
+#endif /* BoringSSL or LibreSSL */
+ }
+ goto out_error;
+ }
+ else {
+ /* Fail on all other handshake errors */
+ /* Note: OpenSSL may leave unread bytes in the socket's
+ * buffer, causing an RST to be emitted upon close() on
+ * TCP sockets. We first try to drain possibly pending
+ * data to avoid this as much as possible.
+ */
+ conn_ctrl_drain(conn);
+ if (!conn->err_code)
+ conn->err_code = (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT) ?
+ CO_ER_SSL_KILLED_HB : CO_ER_SSL_HANDSHAKE;
+ goto out_error;
+ }
+ }
+ /* read some data: consider handshake completed */
+ goto reneg_ok;
+ }
+ ret = SSL_do_handshake(ctx->ssl);
+check_error:
+ if (ret != 1) {
+ /* handshake did not complete, let's find why */
+ ret = SSL_get_error(ctx->ssl, ret);
+
+ if (!ctx->error_code)
+ ctx->error_code = ERR_peek_error();
+
+ if (ret == SSL_ERROR_WANT_WRITE) {
+ /* SSL handshake needs to write, L4 connection may not be ready */
+ if (!(ctx->wait_event.events & SUB_RETRY_SEND))
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_SEND, &ctx->wait_event);
+ return 0;
+ }
+ else if (ret == SSL_ERROR_WANT_READ) {
+ /* SSL handshake needs to read, L4 connection is ready */
+ if (!(ctx->wait_event.events & SUB_RETRY_RECV))
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx,
+ SUB_RETRY_RECV, &ctx->wait_event);
+ return 0;
+ }
+#ifdef SSL_MODE_ASYNC
+ else if (ret == SSL_ERROR_WANT_ASYNC) {
+ ssl_async_process_fds(ctx);
+ return 0;
+ }
+#endif
+ else if (ret == SSL_ERROR_SYSCALL) {
+ /* if errno is null, then connection was successfully established */
+ if (!errno && conn->flags & CO_FL_WAIT_L4_CONN)
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ if (!conn->err_code) {
+#if defined(OPENSSL_IS_BORINGSSL) || defined(LIBRESSL_VERSION_NUMBER)
+ /* do not handle empty handshakes in BoringSSL or LibreSSL */
+ conn->err_code = CO_ER_SSL_HANDSHAKE;
+#else
+ int empty_handshake;
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x1010000fL)
+ /* use SSL_get_state() in OpenSSL >= 1.1.0; SSL_state() is broken */
+ OSSL_HANDSHAKE_STATE state = SSL_get_state(ctx->ssl);
+ empty_handshake = state == TLS_ST_BEFORE;
+#else
+ /* access packet_length directly in OpenSSL <= 1.0.2; SSL_state() is broken */
+ empty_handshake = !ctx->ssl->packet_length;
+#endif
+ if (empty_handshake) {
+ if (!errno) {
+ if (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT)
+ conn->err_code = CO_ER_SSL_HANDSHAKE_HB;
+ else
+ conn->err_code = CO_ER_SSL_EMPTY;
+ }
+ else {
+ if (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT)
+ conn->err_code = CO_ER_SSL_HANDSHAKE_HB;
+ else
+ conn->err_code = CO_ER_SSL_ABORT;
+ }
+ }
+ else {
+ if (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT)
+ conn->err_code = CO_ER_SSL_HANDSHAKE_HB;
+ else
+ conn->err_code = CO_ER_SSL_HANDSHAKE;
+ }
+#endif /* BoringSSL or LibreSSL */
+ }
+ goto out_error;
+ }
+ else {
+ /* Fail on all other handshake errors */
+ /* Note: OpenSSL may leave unread bytes in the socket's
+ * buffer, causing an RST to be emitted upon close() on
+ * TCP sockets. We first try to drain possibly pending
+ * data to avoid this as much as possible.
+ */
+ conn_ctrl_drain(conn);
+ if (!conn->err_code)
+ conn->err_code = (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT) ?
+ CO_ER_SSL_KILLED_HB : CO_ER_SSL_HANDSHAKE;
+ goto out_error;
+ }
+ }
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ else {
+ /*
+ * If the server refused the early data, we have to send a
+ * 425 to the client, as we no longer have the data to sent
+ * them again.
+ */
+ if ((conn->flags & CO_FL_EARLY_DATA) && (objt_server(conn->target))) {
+ if (SSL_get_early_data_status(ctx->ssl) == SSL_EARLY_DATA_REJECTED) {
+ conn->err_code = CO_ER_SSL_EARLY_FAILED;
+ goto out_error;
+ }
+ }
+ }
+#endif
+
+
+reneg_ok:
+
+#ifdef SSL_MODE_ASYNC
+ /* ASYNC engine API doesn't support moving read/write
+ * buffers. So we disable ASYNC mode right after
+ * the handshake to avoid buffer overflow.
+ */
+ if (global_ssl.async)
+ SSL_clear_mode(ctx->ssl, SSL_MODE_ASYNC);
+#endif
+ /* Handshake succeeded */
+ if (!SSL_session_reused(ctx->ssl)) {
+ if (objt_server(conn->target)) {
+ update_freq_ctr(&global.ssl_be_keys_per_sec, 1);
+ if (global.ssl_be_keys_per_sec.curr_ctr > global.ssl_be_keys_max)
+ global.ssl_be_keys_max = global.ssl_be_keys_per_sec.curr_ctr;
+ }
+ else {
+ update_freq_ctr(&global.ssl_fe_keys_per_sec, 1);
+ if (global.ssl_fe_keys_per_sec.curr_ctr > global.ssl_fe_keys_max)
+ global.ssl_fe_keys_max = global.ssl_fe_keys_per_sec.curr_ctr;
+ }
+
+ if (counters) {
+ HA_ATOMIC_INC(&counters->sess);
+ HA_ATOMIC_INC(&counters_px->sess);
+ }
+ }
+ else if (counters) {
+ HA_ATOMIC_INC(&counters->reused_sess);
+ HA_ATOMIC_INC(&counters_px->reused_sess);
+ }
+
+ /* The connection is now established at both layers, it's time to leave */
+ conn->flags &= ~(flag | CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN);
+ return 1;
+
+ out_error:
+ /* Clear openssl global errors stack */
+ ssl_sock_dump_errors(conn, NULL);
+ ERR_clear_error();
+
+ /* free resumed session if exists */
+ if (objt_server(conn->target)) {
+ struct server *s = __objt_server(conn->target);
+ /* RWLOCK: only rdlock the SSL cache even when writing in it because there is
+ * one cache per thread, it only prevents to flush it from the CLI in
+ * another thread */
+
+ HA_RWLOCK_RDLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.lock);
+ if (s->ssl_ctx.reused_sess[tid].ptr)
+ ha_free(&s->ssl_ctx.reused_sess[tid].ptr);
+ HA_RWLOCK_RDUNLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.lock);
+ }
+
+ if (counters) {
+ HA_ATOMIC_INC(&counters->failed_handshake);
+ HA_ATOMIC_INC(&counters_px->failed_handshake);
+ }
+
+ /* Fail on all other handshake errors */
+ conn->flags |= CO_FL_ERROR;
+ if (!conn->err_code)
+ conn->err_code = CO_ER_SSL_HANDSHAKE;
+ return 0;
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0,
+ * unless the transport layer was already released.
+ */
+static int ssl_subscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ if (!ctx)
+ return -1;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(ctx->subs && ctx->subs != es);
+
+ ctx->subs = es;
+ es->events |= event_type;
+
+ /* we may have to subscribe to lower layers for new events */
+ event_type &= ~ctx->wait_event.events;
+ if (event_type && !(conn->flags & CO_FL_SSL_WAIT_HS))
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, event_type, &ctx->wait_event);
+ return 0;
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int ssl_unsubscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(ctx->subs && ctx->subs != es);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ ctx->subs = NULL;
+
+ /* If we subscribed, and we're not doing the handshake,
+ * then we subscribed because the upper layer asked for it,
+ * as the upper layer is no longer interested, we can
+ * unsubscribe too.
+ */
+ event_type &= ctx->wait_event.events;
+ if (event_type && !(ctx->conn->flags & CO_FL_SSL_WAIT_HS))
+ conn_unsubscribe(conn, ctx->xprt_ctx, event_type, &ctx->wait_event);
+
+ return 0;
+}
+
+/* The connection has been taken over, so destroy the old tasklet and create
+ * a new one. The original thread ID must be passed into orig_tid
+ * It should be called with the takeover lock for the old thread held.
+ * Returns 0 on success, and -1 on failure
+ */
+static int ssl_takeover(struct connection *conn, void *xprt_ctx, int orig_tid)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+ struct tasklet *tl = tasklet_new();
+
+ if (!tl)
+ return -1;
+
+ ctx->wait_event.tasklet->context = NULL;
+ tasklet_wakeup_on(ctx->wait_event.tasklet, orig_tid);
+ ctx->wait_event.tasklet = tl;
+ ctx->wait_event.tasklet->process = ssl_sock_io_cb;
+ ctx->wait_event.tasklet->context = ctx;
+ return 0;
+}
+
+/* notify the next xprt that the connection is about to become idle and that it
+ * may be stolen at any time after the function returns and that any tasklet in
+ * the chain must be careful before dereferencing its context.
+ */
+static void ssl_set_idle(struct connection *conn, void *xprt_ctx)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ if (!ctx || !ctx->wait_event.tasklet)
+ return;
+
+ HA_ATOMIC_OR(&ctx->wait_event.tasklet->state, TASK_F_USR1);
+ if (ctx->xprt)
+ xprt_set_idle(conn, ctx->xprt, ctx->xprt_ctx);
+}
+
+/* notify the next xprt that the connection is not idle anymore and that it may
+ * not be stolen before the next xprt_set_idle().
+ */
+static void ssl_set_used(struct connection *conn, void *xprt_ctx)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ if (!ctx || !ctx->wait_event.tasklet)
+ return;
+
+ HA_ATOMIC_OR(&ctx->wait_event.tasklet->state, TASK_F_USR1);
+ if (ctx->xprt)
+ xprt_set_used(conn, ctx->xprt, ctx->xprt_ctx);
+}
+
+/* Use the provided XPRT as an underlying XPRT, and provide the old one.
+ * Returns 0 on success, and non-zero on failure.
+ */
+static int ssl_add_xprt(struct connection *conn, void *xprt_ctx, void *toadd_ctx, const struct xprt_ops *toadd_ops, void **oldxprt_ctx, const struct xprt_ops **oldxprt_ops)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ if (oldxprt_ops != NULL)
+ *oldxprt_ops = ctx->xprt;
+ if (oldxprt_ctx != NULL)
+ *oldxprt_ctx = ctx->xprt_ctx;
+ ctx->xprt = toadd_ops;
+ ctx->xprt_ctx = toadd_ctx;
+ return 0;
+}
+
+/* Remove the specified xprt. If if it our underlying XPRT, remove it and
+ * return 0, otherwise just call the remove_xprt method from the underlying
+ * XPRT.
+ */
+static int ssl_remove_xprt(struct connection *conn, void *xprt_ctx, void *toremove_ctx, const struct xprt_ops *newops, void *newctx)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ if (ctx->xprt_ctx == toremove_ctx) {
+ ctx->xprt_ctx = newctx;
+ ctx->xprt = newops;
+ return 0;
+ }
+ return (ctx->xprt->remove_xprt(conn, ctx->xprt_ctx, toremove_ctx, newops, newctx));
+}
+
+struct task *ssl_sock_io_cb(struct task *t, void *context, unsigned int state)
+{
+ struct tasklet *tl = (struct tasklet *)t;
+ struct ssl_sock_ctx *ctx = context;
+ struct connection *conn;
+ int conn_in_list;
+ int ret = 0;
+
+ if (state & TASK_F_USR1) {
+ /* the tasklet was idling on an idle connection, it might have
+ * been stolen, let's be careful!
+ */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ if (tl->context == NULL) {
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ tasklet_free(tl);
+ return NULL;
+ }
+ conn = ctx->conn;
+ conn_in_list = conn_get_idle_flag(conn);
+ if (conn_in_list)
+ conn_delete_from_tree(&conn->hash_node->node);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ } else {
+ conn = ctx->conn;
+ conn_in_list = 0;
+ }
+
+ /* First if we're doing an handshake, try that */
+ if (ctx->conn->flags & CO_FL_SSL_WAIT_HS) {
+ ssl_sock_handshake(ctx->conn, CO_FL_SSL_WAIT_HS);
+ if (!(ctx->conn->flags & CO_FL_SSL_WAIT_HS)) {
+ /* handshake completed, leave the bulk queue */
+ _HA_ATOMIC_AND(&tl->state, ~TASK_HEAVY);
+ }
+ }
+ /* If we had an error, or the handshake is done and I/O is available,
+ * let the upper layer know.
+ * If no mux was set up yet, then call conn_create_mux()
+ * we can't be sure conn_fd_handler() will be called again.
+ */
+ if ((ctx->conn->flags & CO_FL_ERROR) ||
+ !(ctx->conn->flags & CO_FL_SSL_WAIT_HS)) {
+ int woke = 0;
+
+ /* On error, wake any waiter */
+ if (ctx->subs) {
+ tasklet_wakeup(ctx->subs->tasklet);
+ ctx->subs->events = 0;
+ woke = 1;
+ ctx->subs = NULL;
+ }
+
+ /* If we're the first xprt for the connection, let the
+ * upper layers know. If we have no mux, create it,
+ * and once we have a mux, call its wake method if we didn't
+ * woke a tasklet already.
+ */
+ if (ctx->conn->xprt_ctx == ctx) {
+ if (!ctx->conn->mux)
+ ret = conn_create_mux(ctx->conn);
+ if (ret >= 0 && !woke && ctx->conn->mux && ctx->conn->mux->wake)
+ ret = ctx->conn->mux->wake(ctx->conn);
+ goto leave;
+ }
+ }
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ /* If we have early data and somebody wants to receive, let them */
+ else if (b_data(&ctx->early_buf) && ctx->subs &&
+ ctx->subs->events & SUB_RETRY_RECV) {
+ tasklet_wakeup(ctx->subs->tasklet);
+ ctx->subs->events &= ~SUB_RETRY_RECV;
+ if (!ctx->subs->events)
+ ctx->subs = NULL;
+ }
+#endif
+leave:
+ if (!ret && conn_in_list) {
+ struct server *srv = objt_server(conn->target);
+
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ if (conn_in_list == CO_FL_SAFE_LIST)
+ eb64_insert(&srv->per_thr[tid].safe_conns, &conn->hash_node->node);
+ else
+ eb64_insert(&srv->per_thr[tid].idle_conns, &conn->hash_node->node);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+ return t;
+}
+
+/* Receive up to <count> bytes from connection <conn>'s socket and store them
+ * into buffer <buf>. Only one call to recv() is performed, unless the
+ * buffer wraps, in which case a second call may be performed. The connection's
+ * flags are updated with whatever special event is detected (error, read0,
+ * empty). The caller is responsible for taking care of those events and
+ * avoiding the call if inappropriate. The function does not call the
+ * connection's polling update function, so the caller is responsible for this.
+ */
+static size_t ssl_sock_to_buf(struct connection *conn, void *xprt_ctx, struct buffer *buf, size_t count, int flags)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+ ssize_t ret;
+ size_t try, done = 0;
+
+ if (!ctx)
+ goto out_error;
+
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ if (b_data(&ctx->early_buf)) {
+ try = b_contig_space(buf);
+ if (try > b_data(&ctx->early_buf))
+ try = b_data(&ctx->early_buf);
+ memcpy(b_tail(buf), b_head(&ctx->early_buf), try);
+ b_add(buf, try);
+ b_del(&ctx->early_buf, try);
+ if (b_data(&ctx->early_buf) == 0)
+ b_free(&ctx->early_buf);
+ return try;
+ }
+#endif
+
+ if (conn->flags & (CO_FL_WAIT_XPRT | CO_FL_SSL_WAIT_HS))
+ /* a handshake was requested */
+ return 0;
+
+ /* read the largest possible block. For this, we perform only one call
+ * to recv() unless the buffer wraps and we exactly fill the first hunk,
+ * in which case we accept to do it once again. A new attempt is made on
+ * EINTR too.
+ */
+ while (count > 0) {
+
+ try = b_contig_space(buf);
+ if (!try)
+ break;
+
+ if (try > count)
+ try = count;
+
+ ret = SSL_read(ctx->ssl, b_tail(buf), try);
+
+ if (conn->flags & CO_FL_ERROR) {
+ /* CO_FL_ERROR may be set by ssl_sock_infocbk */
+ goto out_error;
+ }
+ if (ret > 0) {
+ b_add(buf, ret);
+ done += ret;
+ count -= ret;
+ }
+ else {
+ ret = SSL_get_error(ctx->ssl, ret);
+ if (ret == SSL_ERROR_WANT_WRITE) {
+ /* handshake is running, and it needs to enable write */
+ conn->flags |= CO_FL_SSL_WAIT_HS;
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_SEND, &ctx->wait_event);
+#ifdef SSL_MODE_ASYNC
+ /* Async mode can be re-enabled, because we're leaving data state.*/
+ if (global_ssl.async)
+ SSL_set_mode(ctx->ssl, SSL_MODE_ASYNC);
+#endif
+ break;
+ }
+ else if (ret == SSL_ERROR_WANT_READ) {
+ if (SSL_renegotiate_pending(ctx->ssl)) {
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx,
+ SUB_RETRY_RECV,
+ &ctx->wait_event);
+ /* handshake is running, and it may need to re-enable read */
+ conn->flags |= CO_FL_SSL_WAIT_HS;
+#ifdef SSL_MODE_ASYNC
+ /* Async mode can be re-enabled, because we're leaving data state.*/
+ if (global_ssl.async)
+ SSL_set_mode(ctx->ssl, SSL_MODE_ASYNC);
+#endif
+ break;
+ }
+ break;
+ } else if (ret == SSL_ERROR_ZERO_RETURN)
+ goto read0;
+ else if (ret == SSL_ERROR_SSL) {
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+ if (ctx && !ctx->error_code)
+ ctx->error_code = ERR_peek_error();
+ conn->err_code = CO_ERR_SSL_FATAL;
+ }
+ /* For SSL_ERROR_SYSCALL, make sure to clear the error
+ * stack before shutting down the connection for
+ * reading. */
+ if (ret == SSL_ERROR_SYSCALL && (!errno || errno == EAGAIN || errno == EWOULDBLOCK))
+ goto clear_ssl_error;
+ /* otherwise it's a real error */
+ goto out_error;
+ }
+ }
+ leave:
+ return done;
+
+ clear_ssl_error:
+ /* Clear openssl global errors stack */
+ ssl_sock_dump_errors(conn, NULL);
+ ERR_clear_error();
+ read0:
+ conn_sock_read0(conn);
+ goto leave;
+
+ out_error:
+ conn->flags |= CO_FL_ERROR;
+ /* Clear openssl global errors stack */
+ ssl_sock_dump_errors(conn, NULL);
+ ERR_clear_error();
+ goto leave;
+}
+
+
+/* Send up to <count> pending bytes from buffer <buf> to connection <conn>'s
+ * socket. <flags> may contain some CO_SFL_* flags to hint the system about
+ * other pending data for example, but this flag is ignored at the moment.
+ * Only one call to send() is performed, unless the buffer wraps, in which case
+ * a second call may be performed. The connection's flags are updated with
+ * whatever special event is detected (error, empty). The caller is responsible
+ * for taking care of those events and avoiding the call if inappropriate. The
+ * function does not call the connection's polling update function, so the caller
+ * is responsible for this. The buffer's output is not adjusted, it's up to the
+ * caller to take care of this. It's up to the caller to update the buffer's
+ * contents based on the return value.
+ */
+static size_t ssl_sock_from_buf(struct connection *conn, void *xprt_ctx, const struct buffer *buf, size_t count, int flags)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+ ssize_t ret;
+ size_t try, done;
+
+ done = 0;
+
+ if (!ctx)
+ goto out_error;
+
+ if (conn->flags & (CO_FL_WAIT_XPRT | CO_FL_SSL_WAIT_HS | CO_FL_EARLY_SSL_HS))
+ /* a handshake was requested */
+ return 0;
+
+ /* send the largest possible block. For this we perform only one call
+ * to send() unless the buffer wraps and we exactly fill the first hunk,
+ * in which case we accept to do it once again.
+ */
+ while (count) {
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ size_t written_data;
+#endif
+
+ try = b_contig_data(buf, done);
+ if (try > count)
+ try = count;
+
+ if (global_ssl.hard_max_record && try > global_ssl.hard_max_record)
+ try = global_ssl.hard_max_record;
+
+ if (!(flags & CO_SFL_STREAMER) &&
+ !(ctx->xprt_st & SSL_SOCK_SEND_UNLIMITED) &&
+ global_ssl.max_record && try > global_ssl.max_record) {
+ try = global_ssl.max_record;
+ }
+ else {
+ /* we need to keep the information about the fact that
+ * we're not limiting the upcoming send(), because if it
+ * fails, we'll have to retry with at least as many data.
+ */
+ ctx->xprt_st |= SSL_SOCK_SEND_UNLIMITED;
+ }
+
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ if (!SSL_is_init_finished(ctx->ssl) && conn_is_back(conn)) {
+ unsigned int max_early;
+
+ if (objt_listener(conn->target))
+ max_early = SSL_get_max_early_data(ctx->ssl);
+ else {
+ if (SSL_get0_session(ctx->ssl))
+ max_early = SSL_SESSION_get_max_early_data(SSL_get0_session(ctx->ssl));
+ else
+ max_early = 0;
+ }
+
+ if (try + ctx->sent_early_data > max_early) {
+ try -= (try + ctx->sent_early_data) - max_early;
+ if (try <= 0) {
+ conn->flags |= CO_FL_SSL_WAIT_HS | CO_FL_WAIT_L6_CONN;
+ tasklet_wakeup(ctx->wait_event.tasklet);
+ break;
+ }
+ }
+ ret = SSL_write_early_data(ctx->ssl, b_peek(buf, done), try, &written_data);
+ if (ret == 1) {
+ ret = written_data;
+ ctx->sent_early_data += ret;
+ if (objt_server(conn->target)) {
+ conn->flags |= CO_FL_SSL_WAIT_HS | CO_FL_WAIT_L6_CONN | CO_FL_EARLY_DATA;
+ /* Initiate the handshake, now */
+ tasklet_wakeup(ctx->wait_event.tasklet);
+ }
+
+ }
+
+ } else
+#endif
+ ret = SSL_write(ctx->ssl, b_peek(buf, done), try);
+
+ if (conn->flags & CO_FL_ERROR) {
+ /* CO_FL_ERROR may be set by ssl_sock_infocbk */
+ goto out_error;
+ }
+ if (ret > 0) {
+ /* A send succeeded, so we can consider ourself connected */
+ conn->flags &= ~CO_FL_WAIT_L4L6;
+ ctx->xprt_st &= ~SSL_SOCK_SEND_UNLIMITED;
+ count -= ret;
+ done += ret;
+ }
+ else {
+ ret = SSL_get_error(ctx->ssl, ret);
+
+ if (ret == SSL_ERROR_WANT_WRITE) {
+ if (SSL_renegotiate_pending(ctx->ssl)) {
+ /* handshake is running, and it may need to re-enable write */
+ conn->flags |= CO_FL_SSL_WAIT_HS;
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_SEND, &ctx->wait_event);
+#ifdef SSL_MODE_ASYNC
+ /* Async mode can be re-enabled, because we're leaving data state.*/
+ if (global_ssl.async)
+ SSL_set_mode(ctx->ssl, SSL_MODE_ASYNC);
+#endif
+ break;
+ }
+
+ break;
+ }
+ else if (ret == SSL_ERROR_WANT_READ) {
+ /* handshake is running, and it needs to enable read */
+ conn->flags |= CO_FL_SSL_WAIT_HS;
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx,
+ SUB_RETRY_RECV,
+ &ctx->wait_event);
+#ifdef SSL_MODE_ASYNC
+ /* Async mode can be re-enabled, because we're leaving data state.*/
+ if (global_ssl.async)
+ SSL_set_mode(ctx->ssl, SSL_MODE_ASYNC);
+#endif
+ break;
+ }
+ else if (ret == SSL_ERROR_SSL || ret == SSL_ERROR_SYSCALL) {
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (ctx && !ctx->error_code)
+ ctx->error_code = ERR_peek_error();
+ conn->err_code = CO_ERR_SSL_FATAL;
+ }
+ goto out_error;
+ }
+ }
+ leave:
+ return done;
+
+ out_error:
+ /* Clear openssl global errors stack */
+ ssl_sock_dump_errors(conn, NULL);
+ ERR_clear_error();
+
+ conn->flags |= CO_FL_ERROR;
+ goto leave;
+}
+
+void ssl_sock_close(struct connection *conn, void *xprt_ctx) {
+
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+
+ if (ctx) {
+ if (ctx->wait_event.events != 0)
+ ctx->xprt->unsubscribe(ctx->conn, ctx->xprt_ctx,
+ ctx->wait_event.events,
+ &ctx->wait_event);
+ if (ctx->subs) {
+ ctx->subs->events = 0;
+ tasklet_wakeup(ctx->subs->tasklet);
+ }
+
+ if (ctx->xprt->close)
+ ctx->xprt->close(conn, ctx->xprt_ctx);
+#ifdef SSL_MODE_ASYNC
+ if (global_ssl.async) {
+ OSSL_ASYNC_FD all_fd[32], afd;
+ size_t num_all_fds = 0;
+ int i;
+
+ SSL_get_all_async_fds(ctx->ssl, NULL, &num_all_fds);
+ if (num_all_fds > 32) {
+ send_log(NULL, LOG_EMERG, "haproxy: openssl returns too many async fds. It seems a bug. Process may crash\n");
+ return;
+ }
+
+ SSL_get_all_async_fds(ctx->ssl, all_fd, &num_all_fds);
+
+ /* If an async job is pending, we must try to
+ to catch the end using polling before calling
+ SSL_free */
+ if (num_all_fds && SSL_waiting_for_async(ctx->ssl)) {
+ for (i=0 ; i < num_all_fds ; i++) {
+ /* switch on an handler designed to
+ * handle the SSL_free
+ */
+ afd = all_fd[i];
+ fdtab[afd].iocb = ssl_async_fd_free;
+ fdtab[afd].owner = ctx->ssl;
+ fd_want_recv(afd);
+ /* To ensure that the fd cache won't be used
+ * and we'll catch a real RD event.
+ */
+ fd_cant_recv(afd);
+ }
+ tasklet_free(ctx->wait_event.tasklet);
+ pool_free(ssl_sock_ctx_pool, ctx);
+ _HA_ATOMIC_INC(&jobs);
+ return;
+ }
+ /* Else we can remove the fds from the fdtab
+ * and call SSL_free.
+ * note: we do a fd_stop_both and not a delete
+ * because the fd is owned by the engine.
+ * the engine is responsible to close
+ */
+ for (i=0 ; i < num_all_fds ; i++) {
+ /* We want to remove the fd from the fdtab
+ * but we flag it to disown because the
+ * close is performed by the engine itself
+ */
+ fdtab[all_fd[i]].state |= FD_DISOWN;
+ fd_delete(all_fd[i]);
+ }
+ }
+#endif
+ SSL_free(ctx->ssl);
+ b_free(&ctx->early_buf);
+ tasklet_free(ctx->wait_event.tasklet);
+ pool_free(ssl_sock_ctx_pool, ctx);
+ _HA_ATOMIC_DEC(&global.sslconns);
+ }
+}
+
+/* This function tries to perform a clean shutdown on an SSL connection, and in
+ * any case, flags the connection as reusable if no handshake was in progress.
+ */
+static void ssl_sock_shutw(struct connection *conn, void *xprt_ctx, int clean)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ if (conn->flags & (CO_FL_WAIT_XPRT | CO_FL_SSL_WAIT_HS))
+ return;
+ if (!clean)
+ /* don't sent notify on SSL_shutdown */
+ SSL_set_quiet_shutdown(ctx->ssl, 1);
+ /* no handshake was in progress, try a clean ssl shutdown */
+ if (SSL_shutdown(ctx->ssl) <= 0) {
+ /* Clear openssl global errors stack */
+ ssl_sock_dump_errors(conn, NULL);
+ ERR_clear_error();
+ }
+}
+
+
+/* used for ppv2 pkey algo (can be used for logging) */
+int ssl_sock_get_pkey_algo(struct connection *conn, struct buffer *out)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+ X509 *crt;
+
+ if (!ctx)
+ return 0;
+ crt = SSL_get_certificate(ctx->ssl);
+ if (!crt)
+ return 0;
+
+ return cert_get_pkey_algo(crt, out);
+}
+
+/* used for ppv2 cert signature (can be used for logging) */
+const char *ssl_sock_get_cert_sig(struct connection *conn)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ __OPENSSL_110_CONST__ ASN1_OBJECT *algorithm;
+ X509 *crt;
+
+ if (!ctx)
+ return NULL;
+ crt = SSL_get_certificate(ctx->ssl);
+ if (!crt)
+ return NULL;
+ X509_ALGOR_get0(&algorithm, NULL, NULL, X509_get0_tbs_sigalg(crt));
+ return OBJ_nid2sn(OBJ_obj2nid(algorithm));
+}
+
+/* used for ppv2 authority */
+const char *ssl_sock_get_sni(struct connection *conn)
+{
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (!ctx)
+ return NULL;
+ return SSL_get_servername(ctx->ssl, TLSEXT_NAMETYPE_host_name);
+#else
+ return NULL;
+#endif
+}
+
+/* used for logging/ppv2, may be changed for a sample fetch later */
+const char *ssl_sock_get_cipher_name(struct connection *conn)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (!ctx)
+ return NULL;
+ return SSL_get_cipher_name(ctx->ssl);
+}
+
+/* used for logging/ppv2, may be changed for a sample fetch later */
+const char *ssl_sock_get_proto_version(struct connection *conn)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (!ctx)
+ return NULL;
+ return SSL_get_version(ctx->ssl);
+}
+
+void ssl_sock_set_alpn(struct connection *conn, const unsigned char *alpn, int len)
+{
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (!ctx)
+ return;
+ SSL_set_alpn_protos(ctx->ssl, alpn, len);
+#endif
+}
+
+/* Sets advertised SNI for outgoing connections. Please set <hostname> to NULL
+ * to disable SNI.
+ */
+void ssl_sock_set_servername(struct connection *conn, const char *hostname)
+{
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+ struct server *s;
+ char *prev_name;
+
+ if (!ctx)
+ return;
+
+ BUG_ON(!(conn->flags & CO_FL_WAIT_L6_CONN));
+ BUG_ON(!(conn->flags & CO_FL_SSL_WAIT_HS));
+
+ s = __objt_server(conn->target);
+
+ /* if the SNI changes, we must destroy the reusable context so that a
+ * new connection will present a new SNI. compare with the SNI
+ * previously stored in the reused_sess */
+ /* the RWLOCK is used to ensure that we are not trying to flush the
+ * cache from the CLI */
+
+ HA_RWLOCK_RDLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.lock);
+ prev_name = s->ssl_ctx.reused_sess[tid].sni;
+ if ((!prev_name && hostname) ||
+ (prev_name && (!hostname || strcmp(hostname, prev_name) != 0)))
+ SSL_set_session(ctx->ssl, NULL);
+ HA_RWLOCK_RDUNLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.lock);
+
+ SSL_set_tlsext_host_name(ctx->ssl, hostname);
+#endif
+}
+
+/* Extract peer certificate's common name into the chunk dest
+ * Returns
+ * the len of the extracted common name
+ * or 0 if no CN found in DN
+ * or -1 on error case (i.e. no peer certificate)
+ */
+int ssl_sock_get_remote_common_name(struct connection *conn,
+ struct buffer *dest)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+ X509 *crt = NULL;
+ X509_NAME *name;
+ const char find_cn[] = "CN";
+ const struct buffer find_cn_chunk = {
+ .area = (char *)&find_cn,
+ .data = sizeof(find_cn)-1
+ };
+ int result = -1;
+
+ if (!ctx)
+ goto out;
+
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ crt = SSL_get_peer_certificate(ctx->ssl);
+ if (!crt)
+ goto out;
+
+ name = X509_get_subject_name(crt);
+ if (!name)
+ goto out;
+
+ result = ssl_sock_get_dn_entry(name, &find_cn_chunk, 1, dest);
+out:
+ if (crt)
+ X509_free(crt);
+
+ return result;
+}
+
+/* returns 1 if client passed a certificate for this session, 0 if not */
+int ssl_sock_get_cert_used_sess(struct connection *conn)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+ X509 *crt = NULL;
+
+ if (!ctx)
+ return 0;
+
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ crt = SSL_get_peer_certificate(ctx->ssl);
+ if (!crt)
+ return 0;
+
+ X509_free(crt);
+ return 1;
+}
+
+/* returns 1 if client passed a certificate for this connection, 0 if not */
+int ssl_sock_get_cert_used_conn(struct connection *conn)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (!ctx)
+ return 0;
+ return SSL_SOCK_ST_FL_VERIFY_DONE & ctx->xprt_st ? 1 : 0;
+}
+
+/* returns result from SSL verify */
+unsigned int ssl_sock_get_verify_result(struct connection *conn)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (!ctx)
+ return (unsigned int)X509_V_ERR_APPLICATION_VERIFICATION;
+ return (unsigned int)SSL_get_verify_result(ctx->ssl);
+}
+
+/* Returns the application layer protocol name in <str> and <len> when known.
+ * Zero is returned if the protocol name was not found, otherwise non-zero is
+ * returned. The string is allocated in the SSL context and doesn't have to be
+ * freed by the caller. NPN is also checked if available since older versions
+ * of openssl (1.0.1) which are more common in field only support this one.
+ */
+int ssl_sock_get_alpn(const struct connection *conn, void *xprt_ctx, const char **str, int *len)
+{
+#if defined(TLSEXT_TYPE_application_layer_protocol_negotiation) || \
+ defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+ if (!ctx)
+ return 0;
+
+ *str = NULL;
+
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ SSL_get0_alpn_selected(ctx->ssl, (const unsigned char **)str, (unsigned *)len);
+ if (*str)
+ return 1;
+#endif
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ SSL_get0_next_proto_negotiated(ctx->ssl, (const unsigned char **)str, (unsigned *)len);
+ if (*str)
+ return 1;
+#endif
+#endif
+ return 0;
+}
+
+/* "issuers-chain-path" load chain certificate in global */
+int ssl_load_global_issuer_from_BIO(BIO *in, char *fp, char **err)
+{
+ X509 *ca;
+ X509_NAME *name = NULL;
+ ASN1_OCTET_STRING *skid = NULL;
+ STACK_OF(X509) *chain = NULL;
+ struct issuer_chain *issuer;
+ struct eb64_node *node;
+ char *path;
+ u64 key;
+ int ret = 0;
+
+ while ((ca = PEM_read_bio_X509(in, NULL, NULL, NULL))) {
+ if (chain == NULL) {
+ chain = sk_X509_new_null();
+ skid = X509_get_ext_d2i(ca, NID_subject_key_identifier, NULL, NULL);
+ name = X509_get_subject_name(ca);
+ }
+ if (!sk_X509_push(chain, ca)) {
+ X509_free(ca);
+ goto end;
+ }
+ }
+ if (!chain) {
+ memprintf(err, "unable to load issuers-chain %s : pem certificate not found.\n", fp);
+ goto end;
+ }
+ if (!skid) {
+ memprintf(err, "unable to load issuers-chain %s : SubjectKeyIdentifier not found.\n", fp);
+ goto end;
+ }
+ if (!name) {
+ memprintf(err, "unable to load issuers-chain %s : SubjectName not found.\n", fp);
+ goto end;
+ }
+ key = XXH3(ASN1_STRING_get0_data(skid), ASN1_STRING_length(skid), 0);
+ for (node = eb64_lookup(&cert_issuer_tree, key); node; node = eb64_next(node)) {
+ issuer = container_of(node, typeof(*issuer), node);
+ if (!X509_NAME_cmp(name, X509_get_subject_name(sk_X509_value(issuer->chain, 0)))) {
+ memprintf(err, "duplicate issuers-chain %s: %s already in store\n", fp, issuer->path);
+ goto end;
+ }
+ }
+ issuer = calloc(1, sizeof *issuer);
+ path = strdup(fp);
+ if (!issuer || !path) {
+ free(issuer);
+ free(path);
+ goto end;
+ }
+ issuer->node.key = key;
+ issuer->path = path;
+ issuer->chain = chain;
+ chain = NULL;
+ eb64_insert(&cert_issuer_tree, &issuer->node);
+ ret = 1;
+ end:
+ if (skid)
+ ASN1_OCTET_STRING_free(skid);
+ if (chain)
+ sk_X509_pop_free(chain, X509_free);
+ return ret;
+}
+
+ struct issuer_chain* ssl_get0_issuer_chain(X509 *cert)
+{
+ AUTHORITY_KEYID *akid;
+ struct issuer_chain *issuer = NULL;
+
+ akid = X509_get_ext_d2i(cert, NID_authority_key_identifier, NULL, NULL);
+ if (akid && akid->keyid) {
+ struct eb64_node *node;
+ u64 hk;
+ hk = XXH3(ASN1_STRING_get0_data(akid->keyid), ASN1_STRING_length(akid->keyid), 0);
+ for (node = eb64_lookup(&cert_issuer_tree, hk); node; node = eb64_next(node)) {
+ struct issuer_chain *ti = container_of(node, typeof(*issuer), node);
+ if (X509_check_issued(sk_X509_value(ti->chain, 0), cert) == X509_V_OK) {
+ issuer = ti;
+ break;
+ }
+ }
+ }
+ AUTHORITY_KEYID_free(akid);
+ return issuer;
+}
+
+void ssl_free_global_issuers(void)
+{
+ struct eb64_node *node, *back;
+ struct issuer_chain *issuer;
+
+ node = eb64_first(&cert_issuer_tree);
+ while (node) {
+ issuer = container_of(node, typeof(*issuer), node);
+ back = eb64_next(node);
+ eb64_delete(node);
+ free(issuer->path);
+ sk_X509_pop_free(issuer->chain, X509_free);
+ free(issuer);
+ node = back;
+ }
+}
+
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+static int ssl_check_async_engine_count(void) {
+ int err_code = ERR_NONE;
+
+ if (global_ssl.async && (openssl_engines_initialized > 32)) {
+ ha_alert("ssl-mode-async only supports a maximum of 32 engines.\n");
+ err_code = ERR_ABORT;
+ }
+ return err_code;
+}
+#endif
+
+/* "show fd" helper to dump ssl internals. Warning: the output buffer is often
+ * the common trash! It returns non-zero if the connection entry looks suspicious.
+ */
+static int ssl_sock_show_fd(struct buffer *buf, const struct connection *conn, const void *ctx)
+{
+ const struct ssl_sock_ctx *sctx = ctx;
+ int ret = 0;
+
+ if (!sctx)
+ return ret;
+
+ if (sctx->conn != conn) {
+ chunk_appendf(&trash, " xctx.conn=%p(BOGUS)", sctx->conn);
+ ret = 1;
+ }
+ chunk_appendf(&trash, " xctx.st=%d .err=%ld", sctx->xprt_st, sctx->error_code);
+
+ if (sctx->xprt) {
+ chunk_appendf(&trash, " .xprt=%s", sctx->xprt->name);
+ if (sctx->xprt_ctx)
+ chunk_appendf(&trash, " .xctx=%p", sctx->xprt_ctx);
+ }
+
+ chunk_appendf(&trash, " .wait.ev=%d", sctx->wait_event.events);
+
+ /* as soon as a shutdown is reported the lower layer unregisters its
+ * subscriber, so the situations below are transient and rare enough to
+ * be reported as suspicious. In any case they shouldn't last.
+ */
+ if ((sctx->wait_event.events & 1) && (conn->flags & (CO_FL_SOCK_RD_SH|CO_FL_ERROR)))
+ ret = 1;
+ if ((sctx->wait_event.events & 2) && (conn->flags & (CO_FL_SOCK_WR_SH|CO_FL_ERROR)))
+ ret = 1;
+
+ chunk_appendf(&trash, " .subs=%p", sctx->subs);
+ if (sctx->subs) {
+ chunk_appendf(&trash, "(ev=%d tl=%p", sctx->subs->events, sctx->subs->tasklet);
+ if (sctx->subs->tasklet->calls >= 1000000)
+ ret = 1;
+ chunk_appendf(&trash, " tl.calls=%d tl.ctx=%p tl.fct=",
+ sctx->subs->tasklet->calls,
+ sctx->subs->tasklet->context);
+ resolve_sym_name(&trash, NULL, sctx->subs->tasklet->process);
+ chunk_appendf(&trash, ")");
+ }
+ chunk_appendf(&trash, " .sent_early=%d", sctx->sent_early_data);
+ chunk_appendf(&trash, " .early_in=%d", (int)sctx->early_buf.data);
+ return ret;
+}
+
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+/* This function is used with TLS ticket keys management. It permits to browse
+ * each reference. The variable <ref> must point to the current node's list
+ * element (which starts by the root), and <end> must point to the root node.
+ */
+static inline
+struct tls_keys_ref *tlskeys_list_get_next(struct list *ref, struct list *end)
+{
+ /* Get next list entry. */
+ ref = ref->n;
+
+ /* If the entry is the last of the list, return NULL. */
+ if (ref == end)
+ return NULL;
+
+ return LIST_ELEM(ref, struct tls_keys_ref *, list);
+}
+
+static inline
+struct tls_keys_ref *tlskeys_ref_lookup_ref(const char *reference)
+{
+ int id;
+ char *error;
+
+ /* If the reference starts by a '#', this is numeric id. */
+ if (reference[0] == '#') {
+ /* Try to convert the numeric id. If the conversion fails, the lookup fails. */
+ id = strtol(reference + 1, &error, 10);
+ if (*error != '\0')
+ return NULL;
+
+ /* Perform the unique id lookup. */
+ return tlskeys_ref_lookupid(id);
+ }
+
+ /* Perform the string lookup. */
+ return tlskeys_ref_lookup(reference);
+}
+#endif
+
+
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+
+/* dumps all tls keys. Relies on the show_keys_ctx context from the appctx. */
+static int cli_io_handler_tlskeys_files(struct appctx *appctx)
+{
+ struct show_keys_ctx *ctx = appctx->svcctx;
+
+ switch (ctx->state) {
+ case SHOW_KEYS_INIT:
+ /* Display the column headers. If the message cannot be sent,
+ * quit the function with returning 0. The function is called
+ * later and restart at the state "SHOW_KEYS_INIT".
+ */
+ chunk_reset(&trash);
+
+ if (ctx->dump_entries)
+ chunk_appendf(&trash, "# id secret\n");
+ else
+ chunk_appendf(&trash, "# id (file)\n");
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ /* Now, we start the browsing of the references lists.
+ * Note that the following call to LIST_ELEM return bad pointer. The only
+ * available field of this pointer is <list>. It is used with the function
+ * tlskeys_list_get_next() for retruning the first available entry
+ */
+ if (ctx->next_ref == NULL)
+ ctx->next_ref = tlskeys_list_get_next(&tlskeys_reference, &tlskeys_reference);
+
+ ctx->state = SHOW_KEYS_LIST;
+ /* fall through */
+
+ case SHOW_KEYS_LIST:
+ while (ctx->next_ref) {
+ struct tls_keys_ref *ref = ctx->next_ref;
+
+ chunk_reset(&trash);
+ if (ctx->dump_entries && ctx->next_index == 0)
+ chunk_appendf(&trash, "# ");
+
+ if (ctx->next_index == 0)
+ chunk_appendf(&trash, "%d (%s)\n", ref->unique_id, ref->filename);
+
+ if (ctx->dump_entries) {
+ int head;
+
+ HA_RWLOCK_RDLOCK(TLSKEYS_REF_LOCK, &ref->lock);
+ head = ref->tls_ticket_enc_index;
+ while (ctx->next_index < TLS_TICKETS_NO) {
+ struct buffer *t2 = get_trash_chunk();
+
+ chunk_reset(t2);
+ /* should never fail here because we dump only a key in the t2 buffer */
+ if (ref->key_size_bits == 128) {
+ t2->data = a2base64((char *)(ref->tlskeys + (head + 2 + ctx->next_index) % TLS_TICKETS_NO),
+ sizeof(struct tls_sess_key_128),
+ t2->area, t2->size);
+ chunk_appendf(&trash, "%d.%d %s\n", ref->unique_id, ctx->next_index,
+ t2->area);
+ }
+ else if (ref->key_size_bits == 256) {
+ t2->data = a2base64((char *)(ref->tlskeys + (head + 2 + ctx->next_index) % TLS_TICKETS_NO),
+ sizeof(struct tls_sess_key_256),
+ t2->area, t2->size);
+ chunk_appendf(&trash, "%d.%d %s\n", ref->unique_id, ctx->next_index,
+ t2->area);
+ }
+ else {
+ /* This case should never happen */
+ chunk_appendf(&trash, "%d.%d <unknown>\n", ref->unique_id, ctx->next_index);
+ }
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* let's try again later from this stream. We add ourselves into
+ * this stream's users so that it can remove us upon termination.
+ */
+ HA_RWLOCK_RDUNLOCK(TLSKEYS_REF_LOCK, &ref->lock);
+ return 0;
+ }
+ ctx->next_index++;
+ }
+ HA_RWLOCK_RDUNLOCK(TLSKEYS_REF_LOCK, &ref->lock);
+ ctx->next_index = 0;
+ }
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* let's try again later from this stream. We add ourselves into
+ * this stream's users so that it can remove us upon termination.
+ */
+ return 0;
+ }
+
+ if (ctx->names_only == 0) /* don't display everything if not necessary */
+ break;
+
+ /* get next list entry and check the end of the list */
+ ctx->next_ref = tlskeys_list_get_next(&ref->list, &tlskeys_reference);
+ }
+ ctx->state = SHOW_KEYS_DONE;
+ /* fall through */
+
+ default:
+ return 1;
+ }
+ return 0;
+}
+
+/* Prepares a "show_keys_ctx" and sets the appropriate io_handler if needed */
+static int cli_parse_show_tlskeys(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_keys_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ /* no parameter, shows only file list */
+ if (!*args[2]) {
+ ctx->names_only = 1;
+ return 0;
+ }
+
+ if (args[2][0] == '*') {
+ /* list every TLS ticket keys */
+ ctx->names_only = 1;
+ } else {
+ ctx->next_ref = tlskeys_ref_lookup_ref(args[2]);
+ if (!ctx->next_ref)
+ return cli_err(appctx, "'show tls-keys' unable to locate referenced filename\n");
+ }
+
+ ctx->dump_entries = 1;
+ return 0;
+}
+
+static int cli_parse_set_tlskeys(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct tls_keys_ref *ref;
+ int ret;
+
+ /* Expect two parameters: the filename and the new new TLS key in encoding */
+ if (!*args[3] || !*args[4])
+ return cli_err(appctx, "'set ssl tls-key' expects a filename and the new TLS key in base64 encoding.\n");
+
+ ref = tlskeys_ref_lookup_ref(args[3]);
+ if (!ref)
+ return cli_err(appctx, "'set ssl tls-key' unable to locate referenced filename\n");
+
+ ret = base64dec(args[4], strlen(args[4]), trash.area, trash.size);
+ if (ret < 0)
+ return cli_err(appctx, "'set ssl tls-key' received invalid base64 encoded TLS key.\n");
+
+ trash.data = ret;
+ if (ssl_sock_update_tlskey_ref(ref, &trash) < 0)
+ return cli_err(appctx, "'set ssl tls-key' received a key of wrong size.\n");
+
+ return cli_msg(appctx, LOG_INFO, "TLS ticket key updated!\n");
+}
+#endif
+
+static int cli_parse_set_ocspresponse(char **args, char *payload, struct appctx *appctx, void *private)
+{
+#if (defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP)
+ char *err = NULL;
+ int i, j, ret;
+
+ if (!payload)
+ payload = args[3];
+
+ /* Expect one parameter: the new response in base64 encoding */
+ if (!*payload)
+ return cli_err(appctx, "'set ssl ocsp-response' expects response in base64 encoding.\n");
+
+ /* remove \r and \n from the payload */
+ for (i = 0, j = 0; payload[i]; i++) {
+ if (payload[i] == '\r' || payload[i] == '\n')
+ continue;
+ payload[j++] = payload[i];
+ }
+ payload[j] = 0;
+
+ ret = base64dec(payload, j, trash.area, trash.size);
+ if (ret < 0)
+ return cli_err(appctx, "'set ssl ocsp-response' received invalid base64 encoded response.\n");
+
+ trash.data = ret;
+ if (ssl_sock_update_ocsp_response(&trash, &err)) {
+ if (err)
+ return cli_dynerr(appctx, memprintf(&err, "%s.\n", err));
+ else
+ return cli_err(appctx, "Failed to update OCSP response.\n");
+ }
+
+ return cli_msg(appctx, LOG_INFO, "OCSP Response updated!\n");
+#else
+ return cli_err(appctx, "HAProxy was compiled against a version of OpenSSL that doesn't support OCSP stapling.\n");
+#endif
+
+}
+
+
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+static int cli_io_handler_show_ocspresponse_detail(struct appctx *appctx);
+#endif
+
+/* parsing function for 'show ssl ocsp-response [id]'. If an entry is forced,
+ * it's set into appctx->svcctx.
+ */
+static int cli_parse_show_ocspresponse(char **args, char *payload, struct appctx *appctx, void *private)
+{
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+ if (*args[3]) {
+ struct certificate_ocsp *ocsp = NULL;
+ char *key = NULL;
+ int key_length = 0;
+
+ if (strlen(args[3]) > OCSP_MAX_CERTID_ASN1_LENGTH*2) {
+ return cli_err(appctx, "'show ssl ocsp-response' received a too big key.\n");
+ }
+
+ if (parse_binary(args[3], &key, &key_length, NULL)) {
+
+ char full_key[OCSP_MAX_CERTID_ASN1_LENGTH] = {};
+ memcpy(full_key, key, key_length);
+
+ ocsp = (struct certificate_ocsp *)ebmb_lookup(&cert_ocsp_tree, full_key, OCSP_MAX_CERTID_ASN1_LENGTH);
+ }
+ if (key)
+ ha_free(&key);
+
+ if (!ocsp) {
+ return cli_err(appctx, "Certificate ID does not match any certificate.\n");
+ }
+
+ appctx->svcctx = ocsp;
+ appctx->io_handler = cli_io_handler_show_ocspresponse_detail;
+ }
+
+ return 0;
+
+#else
+ return cli_err(appctx, "HAProxy was compiled against a version of OpenSSL that doesn't support OCSP stapling.\n");
+#endif
+}
+
+
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+/*
+ * This function dumps the details of an OCSP_CERTID. It is based on
+ * ocsp_certid_print in OpenSSL.
+ */
+static inline int ocsp_certid_print(BIO *bp, OCSP_CERTID *certid, int indent)
+{
+ ASN1_OCTET_STRING *piNameHash = NULL;
+ ASN1_OCTET_STRING *piKeyHash = NULL;
+ ASN1_INTEGER *pSerial = NULL;
+
+ if (OCSP_id_get0_info(&piNameHash, NULL, &piKeyHash, &pSerial, certid)) {
+
+ BIO_printf(bp, "%*sCertificate ID:\n", indent, "");
+ indent += 2;
+ BIO_printf(bp, "%*sIssuer Name Hash: ", indent, "");
+ i2a_ASN1_STRING(bp, piNameHash, 0);
+ BIO_printf(bp, "\n%*sIssuer Key Hash: ", indent, "");
+ i2a_ASN1_STRING(bp, piKeyHash, 0);
+ BIO_printf(bp, "\n%*sSerial Number: ", indent, "");
+ i2a_ASN1_INTEGER(bp, pSerial);
+ }
+ return 1;
+}
+#endif
+
+/*
+ * IO handler of "show ssl ocsp-response". The command taking a specific ID
+ * is managed in cli_io_handler_show_ocspresponse_detail.
+ * The current entry is taken from appctx->svcctx.
+ */
+static int cli_io_handler_show_ocspresponse(struct appctx *appctx)
+{
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+ struct buffer *trash = alloc_trash_chunk();
+ struct buffer *tmp = NULL;
+ struct ebmb_node *node;
+ struct certificate_ocsp *ocsp = NULL;
+ BIO *bio = NULL;
+ int write = -1;
+
+ if (trash == NULL)
+ return 1;
+
+ tmp = alloc_trash_chunk();
+ if (!tmp)
+ goto end;
+
+ if ((bio = BIO_new(BIO_s_mem())) == NULL)
+ goto end;
+
+ if (!appctx->svcctx) {
+ chunk_appendf(trash, "# Certificate IDs\n");
+ node = ebmb_first(&cert_ocsp_tree);
+ } else {
+ node = &((struct certificate_ocsp *)appctx->svcctx)->key;
+ }
+
+ while (node) {
+ OCSP_CERTID *certid = NULL;
+ const unsigned char *p = NULL;
+ int i;
+
+ ocsp = ebmb_entry(node, struct certificate_ocsp, key);
+
+ /* Dump the key in hexadecimal */
+ chunk_appendf(trash, "Certificate ID key : ");
+ for (i = 0; i < ocsp->key_length; ++i) {
+ chunk_appendf(trash, "%02x", ocsp->key_data[i]);
+ }
+ chunk_appendf(trash, "\n");
+
+ p = ocsp->key_data;
+
+ /* Decode the certificate ID (serialized into the key). */
+ d2i_OCSP_CERTID(&certid, &p, ocsp->key_length);
+ if (!certid)
+ goto end;
+
+ /* Dump the CERTID info */
+ ocsp_certid_print(bio, certid, 1);
+ OCSP_CERTID_free(certid);
+ write = BIO_read(bio, tmp->area, tmp->size-1);
+ /* strip trailing LFs */
+ while (write > 0 && tmp->area[write-1] == '\n')
+ write--;
+ tmp->area[write] = '\0';
+
+ chunk_appendf(trash, "%s\n", tmp->area);
+
+ node = ebmb_next(node);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ }
+
+end:
+ appctx->svcctx = NULL;
+ if (trash)
+ free_trash_chunk(trash);
+ if (tmp)
+ free_trash_chunk(tmp);
+ if (bio)
+ BIO_free(bio);
+ return 1;
+
+yield:
+
+ if (trash)
+ free_trash_chunk(trash);
+ if (tmp)
+ free_trash_chunk(tmp);
+ if (bio)
+ BIO_free(bio);
+ appctx->svcctx = ocsp;
+ return 0;
+#else
+ return cli_err(appctx, "HAProxy was compiled against a version of OpenSSL that doesn't support OCSP stapling.\n");
+#endif
+}
+
+#ifdef HAVE_SSL_PROVIDERS
+struct provider_name {
+ const char *name;
+ struct list list;
+};
+
+
+static int ssl_provider_get_name_cb(OSSL_PROVIDER *provider, void *cbdata)
+{
+ struct list *provider_names = cbdata;
+ struct provider_name *item = NULL;
+ const char *name = OSSL_PROVIDER_get0_name(provider);
+
+ if (!provider_names)
+ return 0;
+
+ item = calloc(1, sizeof(*item));
+
+ if (!item)
+ return 0;
+
+ item->name = name;
+ LIST_APPEND(provider_names, &item->list);
+
+ return 1;
+}
+
+static void ssl_provider_get_name_list(struct list *provider_names)
+{
+ if (!provider_names)
+ return;
+
+ OSSL_PROVIDER_do_all(NULL, ssl_provider_get_name_cb, provider_names);
+}
+
+static void ssl_provider_clear_name_list(struct list *provider_names)
+{
+ struct provider_name *item = NULL, *item_s = NULL;
+
+ if (provider_names) {
+ list_for_each_entry_safe(item, item_s, provider_names, list) {
+ LIST_DELETE(&item->list);
+ free(item);
+ }
+ }
+}
+
+static int cli_io_handler_show_providers(struct appctx *appctx)
+{
+ struct buffer *trash = get_trash_chunk();
+ struct list provider_names;
+ struct provider_name *name;
+
+ LIST_INIT(&provider_names);
+
+ chunk_appendf(trash, "Loaded providers : \n");
+
+ ssl_provider_get_name_list(&provider_names);
+
+ list_for_each_entry(name, &provider_names, list) {
+ chunk_appendf(trash, "\t- %s\n", name->name);
+ }
+
+ ssl_provider_clear_name_list(&provider_names);
+
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+
+ return 1;
+
+yield:
+ return 0;
+}
+#endif
+
+
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+/*
+ * Dump the details about an OCSP response in DER format stored in
+ * <ocsp_response> into buffer <out>.
+ * Returns 0 in case of success.
+ */
+int ssl_ocsp_response_print(struct buffer *ocsp_response, struct buffer *out)
+{
+ BIO *bio = NULL;
+ int write = -1;
+ OCSP_RESPONSE *resp;
+ const unsigned char *p;
+ int retval = -1;
+
+ if (!ocsp_response)
+ return -1;
+
+ if ((bio = BIO_new(BIO_s_mem())) == NULL)
+ return -1;
+
+ p = (const unsigned char*)ocsp_response->area;
+
+ resp = d2i_OCSP_RESPONSE(NULL, &p, ocsp_response->data);
+ if (!resp) {
+ chunk_appendf(out, "Unable to parse OCSP response");
+ goto end;
+ }
+
+ if (OCSP_RESPONSE_print(bio, resp, 0) != 0) {
+ struct buffer *trash = get_trash_chunk();
+ struct ist ist_block = IST_NULL;
+ struct ist ist_double_lf = IST_NULL;
+ static struct ist double_lf = IST("\n\n");
+
+ write = BIO_read(bio, trash->area, trash->size - 1);
+ if (write <= 0)
+ goto end;
+ trash->data = write;
+
+ /* Look for empty lines in the 'trash' buffer and add a space to
+ * the beginning to avoid having empty lines in the output
+ * (without changing the appearance of the information
+ * displayed).
+ */
+ ist_block = ist2(b_orig(trash), b_data(trash));
+
+ ist_double_lf = istist(ist_block, double_lf);
+
+ while (istlen(ist_double_lf)) {
+ /* istptr(ist_double_lf) points to the first \n of a
+ * \n\n pattern.
+ */
+ uint empty_line_offset = istptr(ist_double_lf) + 1 - istptr(ist_block);
+
+ /* Write up to the first '\n' of the "\n\n" pattern into
+ * the output buffer.
+ */
+ b_putblk(out, istptr(ist_block), empty_line_offset);
+ /* Add an extra space. */
+ b_putchr(out, ' ');
+
+ /* Keep looking for empty lines in the rest of the data. */
+ ist_block = istadv(ist_block, empty_line_offset);
+
+ ist_double_lf = istist(ist_block, double_lf);
+ }
+
+ retval = (b_istput(out, ist_block) <= 0);
+ }
+
+end:
+ if (bio)
+ BIO_free(bio);
+
+ OCSP_RESPONSE_free(resp);
+
+ return retval;
+}
+
+/*
+ * Dump the details of the OCSP response of ID <ocsp_certid> into buffer <out>.
+ * Returns 0 in case of success.
+ */
+int ssl_get_ocspresponse_detail(unsigned char *ocsp_certid, struct buffer *out)
+{
+ struct certificate_ocsp *ocsp;
+
+ ocsp = (struct certificate_ocsp *)ebmb_lookup(&cert_ocsp_tree, ocsp_certid, OCSP_MAX_CERTID_ASN1_LENGTH);
+ if (!ocsp)
+ return -1;
+
+ return ssl_ocsp_response_print(&ocsp->response, out);
+}
+
+
+/* IO handler of details "show ssl ocsp-response <id>".
+ * The current entry is taken from appctx->svcctx.
+ */
+static int cli_io_handler_show_ocspresponse_detail(struct appctx *appctx)
+{
+ struct buffer *trash = alloc_trash_chunk();
+ struct certificate_ocsp *ocsp = appctx->svcctx;
+
+ if (trash == NULL)
+ return 1;
+
+ if (ssl_ocsp_response_print(&ocsp->response, trash)) {
+ free_trash_chunk(trash);
+ return 1;
+ }
+
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+
+ appctx->svcctx = NULL;
+ if (trash)
+ free_trash_chunk(trash);
+ return 1;
+
+yield:
+ if (trash)
+ free_trash_chunk(trash);
+
+ return 0;
+}
+#endif
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+ { { "show", "tls-keys", NULL }, "show tls-keys [id|*] : show tls keys references or dump tls ticket keys when id specified", cli_parse_show_tlskeys, cli_io_handler_tlskeys_files },
+ { { "set", "ssl", "tls-key", NULL }, "set ssl tls-key [id|file] <key> : set the next TLS key for the <id> or <file> listener to <key>", cli_parse_set_tlskeys, NULL },
+#endif
+ { { "set", "ssl", "ocsp-response", NULL }, "set ssl ocsp-response <resp|payload> : update a certificate's OCSP Response from a base64-encode DER", cli_parse_set_ocspresponse, NULL },
+
+ { { "show", "ssl", "ocsp-response", NULL },"show ssl ocsp-response [id] : display the IDs of the OCSP responses used in memory, or the details of a single OCSP response", cli_parse_show_ocspresponse, cli_io_handler_show_ocspresponse, NULL },
+#ifdef HAVE_SSL_PROVIDERS
+ { { "show", "ssl", "providers", NULL }, "show ssl providers : show loaded SSL providers", NULL, cli_io_handler_show_providers },
+#endif
+ { { NULL }, NULL, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+/* transport-layer operations for SSL sockets */
+struct xprt_ops ssl_sock = {
+ .snd_buf = ssl_sock_from_buf,
+ .rcv_buf = ssl_sock_to_buf,
+ .subscribe = ssl_subscribe,
+ .unsubscribe = ssl_unsubscribe,
+ .remove_xprt = ssl_remove_xprt,
+ .add_xprt = ssl_add_xprt,
+ .rcv_pipe = NULL,
+ .snd_pipe = NULL,
+ .shutr = NULL,
+ .shutw = ssl_sock_shutw,
+ .close = ssl_sock_close,
+ .init = ssl_sock_init,
+ .start = ssl_sock_start,
+ .prepare_bind_conf = ssl_sock_prepare_bind_conf,
+ .destroy_bind_conf = ssl_sock_destroy_bind_conf,
+ .prepare_srv = ssl_sock_prepare_srv_ctx,
+ .destroy_srv = ssl_sock_free_srv_ctx,
+ .get_alpn = ssl_sock_get_alpn,
+ .takeover = ssl_takeover,
+ .set_idle = ssl_set_idle,
+ .set_used = ssl_set_used,
+ .get_ssl_sock_ctx = ssl_sock_get_ctx,
+ .name = "SSL",
+ .show_fd = ssl_sock_show_fd,
+};
+
+enum act_return ssl_action_wait_for_hs(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct connection *conn;
+
+ conn = objt_conn(sess->origin);
+
+ if (conn) {
+ if (conn->flags & (CO_FL_EARLY_SSL_HS | CO_FL_SSL_WAIT_HS)) {
+ sc_ep_set(s->scf, SE_FL_WAIT_FOR_HS);
+ s->req.flags |= CF_READ_NULL;
+ return ACT_RET_YIELD;
+ }
+ }
+ return (ACT_RET_CONT);
+}
+
+static enum act_parse_ret ssl_parse_wait_for_hs(const char **args, int *orig_arg, struct proxy *px, struct act_rule *rule, char **err)
+{
+ rule->action_ptr = ssl_action_wait_for_hs;
+
+ return ACT_RET_PRS_OK;
+}
+
+static struct action_kw_list http_req_actions = {ILH, {
+ { "wait-for-handshake", ssl_parse_wait_for_hs },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_actions);
+
+#ifdef HAVE_SSL_CTX_ADD_SERVER_CUSTOM_EXT
+
+static void ssl_sock_sctl_free_func(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int idx, long argl, void *argp)
+{
+ if (ptr) {
+ chunk_destroy(ptr);
+ free(ptr);
+ }
+}
+
+#endif
+
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+static void ssl_sock_ocsp_free_func(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int idx, long argl, void *argp)
+{
+ struct ocsp_cbk_arg *ocsp_arg;
+
+ if (ptr) {
+ ocsp_arg = ptr;
+
+ if (ocsp_arg->is_single) {
+ ssl_sock_free_ocsp(ocsp_arg->s_ocsp);
+ ocsp_arg->s_ocsp = NULL;
+ } else {
+ int i;
+
+ for (i = 0; i < SSL_SOCK_NUM_KEYTYPES; i++) {
+ ssl_sock_free_ocsp(ocsp_arg->m_ocsp[i]);
+ ocsp_arg->m_ocsp[i] = NULL;
+ }
+ }
+ free(ocsp_arg);
+ }
+}
+#endif
+
+static void ssl_sock_capture_free_func(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int idx, long argl, void *argp)
+{
+ pool_free(pool_head_ssl_capture, ptr);
+}
+
+#ifdef HAVE_SSL_KEYLOG
+static void ssl_sock_keylog_free_func(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int idx, long argl, void *argp)
+{
+ struct ssl_keylog *keylog;
+
+ if (!ptr)
+ return;
+
+ keylog = ptr;
+
+ pool_free(pool_head_ssl_keylog_str, keylog->client_random);
+ pool_free(pool_head_ssl_keylog_str, keylog->client_early_traffic_secret);
+ pool_free(pool_head_ssl_keylog_str, keylog->client_handshake_traffic_secret);
+ pool_free(pool_head_ssl_keylog_str, keylog->server_handshake_traffic_secret);
+ pool_free(pool_head_ssl_keylog_str, keylog->client_traffic_secret_0);
+ pool_free(pool_head_ssl_keylog_str, keylog->server_traffic_secret_0);
+ pool_free(pool_head_ssl_keylog_str, keylog->exporter_secret);
+ pool_free(pool_head_ssl_keylog_str, keylog->early_exporter_secret);
+
+ pool_free(pool_head_ssl_keylog, ptr);
+}
+#endif
+
+static void ssl_sock_clt_crt_free_func(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int idx, long argl, void *argp)
+{
+ if (!ptr)
+ return;
+
+ X509_free((X509*)ptr);
+}
+
+static void ssl_sock_clt_sni_free_func(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int idx, long argl, void *argp)
+{
+ pool_free(ssl_sock_client_sni_pool, ptr);
+}
+
+static void __ssl_sock_init(void)
+{
+#if (!defined(OPENSSL_NO_COMP) && !defined(SSL_OP_NO_COMPRESSION))
+ STACK_OF(SSL_COMP)* cm;
+ int n;
+#endif
+
+ if (global_ssl.listen_default_ciphers)
+ global_ssl.listen_default_ciphers = strdup(global_ssl.listen_default_ciphers);
+ if (global_ssl.connect_default_ciphers)
+ global_ssl.connect_default_ciphers = strdup(global_ssl.connect_default_ciphers);
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ if (global_ssl.listen_default_ciphersuites)
+ global_ssl.listen_default_ciphersuites = strdup(global_ssl.listen_default_ciphersuites);
+ if (global_ssl.connect_default_ciphersuites)
+ global_ssl.connect_default_ciphersuites = strdup(global_ssl.connect_default_ciphersuites);
+#endif
+
+ xprt_register(XPRT_SSL, &ssl_sock);
+#if HA_OPENSSL_VERSION_NUMBER < 0x10100000L
+ SSL_library_init();
+#endif
+#if (!defined(OPENSSL_NO_COMP) && !defined(SSL_OP_NO_COMPRESSION))
+ cm = SSL_COMP_get_compression_methods();
+ n = sk_SSL_COMP_num(cm);
+ while (n--) {
+ (void) sk_SSL_COMP_pop(cm);
+ }
+#endif
+
+#if defined(USE_THREAD) && (HA_OPENSSL_VERSION_NUMBER < 0x10100000L)
+ ssl_locking_init();
+#endif
+#ifdef HAVE_SSL_CTX_ADD_SERVER_CUSTOM_EXT
+ sctl_ex_index = SSL_CTX_get_ex_new_index(0, NULL, NULL, NULL, ssl_sock_sctl_free_func);
+#endif
+
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+ ocsp_ex_index = SSL_CTX_get_ex_new_index(0, NULL, NULL, NULL, ssl_sock_ocsp_free_func);
+#endif
+
+ ssl_app_data_index = SSL_get_ex_new_index(0, NULL, NULL, NULL, NULL);
+ ssl_capture_ptr_index = SSL_get_ex_new_index(0, NULL, NULL, NULL, ssl_sock_capture_free_func);
+#ifdef USE_QUIC
+ ssl_qc_app_data_index = SSL_get_ex_new_index(0, NULL, NULL, NULL, NULL);
+#endif /* USE_QUIC */
+#ifdef HAVE_SSL_KEYLOG
+ ssl_keylog_index = SSL_get_ex_new_index(0, NULL, NULL, NULL, ssl_sock_keylog_free_func);
+#endif
+ ssl_client_crt_ref_index = SSL_get_ex_new_index(0, NULL, NULL, NULL, ssl_sock_clt_crt_free_func);
+ ssl_client_sni_index = SSL_get_ex_new_index(0, NULL, NULL, NULL, ssl_sock_clt_sni_free_func);
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+ ENGINE_load_builtin_engines();
+ hap_register_post_check(ssl_check_async_engine_count);
+#endif
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+ hap_register_post_check(tlskeys_finalize_config);
+#endif
+
+ global.ssl_session_max_cost = SSL_SESSION_MAX_COST;
+ global.ssl_handshake_max_cost = SSL_HANDSHAKE_MAX_COST;
+
+ hap_register_post_deinit(ssl_free_global_issuers);
+
+#ifndef OPENSSL_NO_DH
+ ssl_dh_ptr_index = SSL_CTX_get_ex_new_index(0, NULL, NULL, NULL, NULL);
+ hap_register_post_deinit(ssl_free_dh);
+#endif
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+ hap_register_post_deinit(ssl_free_engines);
+#endif
+#ifdef HAVE_SSL_PROVIDERS
+ hap_register_post_deinit(ssl_unload_providers);
+#endif
+#if HA_OPENSSL_VERSION_NUMBER < 0x3000000fL
+ /* Load SSL string for the verbose & debug mode. */
+ ERR_load_SSL_strings();
+#endif
+ ha_meth = BIO_meth_new(0x666, "ha methods");
+ BIO_meth_set_write(ha_meth, ha_ssl_write);
+ BIO_meth_set_read(ha_meth, ha_ssl_read);
+ BIO_meth_set_ctrl(ha_meth, ha_ssl_ctrl);
+ BIO_meth_set_create(ha_meth, ha_ssl_new);
+ BIO_meth_set_destroy(ha_meth, ha_ssl_free);
+ BIO_meth_set_puts(ha_meth, ha_ssl_puts);
+ BIO_meth_set_gets(ha_meth, ha_ssl_gets);
+
+ HA_SPIN_INIT(&ckch_lock);
+
+ /* Try to register dedicated SSL/TLS protocol message callbacks for
+ * heartbleed attack (CVE-2014-0160) and clienthello.
+ */
+ hap_register_post_check(ssl_sock_register_msg_callbacks);
+
+ /* Try to free all callbacks that were registered by using
+ * ssl_sock_register_msg_callback().
+ */
+ hap_register_post_deinit(ssl_sock_unregister_msg_callbacks);
+}
+INITCALL0(STG_REGISTER, __ssl_sock_init);
+
+/* Compute and register the version string */
+static void ssl_register_build_options()
+{
+ char *ptr = NULL;
+ int i;
+
+ memprintf(&ptr, "Built with OpenSSL version : "
+#ifdef OPENSSL_IS_BORINGSSL
+ "BoringSSL");
+#else /* OPENSSL_IS_BORINGSSL */
+ OPENSSL_VERSION_TEXT
+ "\nRunning on OpenSSL version : %s%s",
+ OpenSSL_version(OPENSSL_VERSION),
+ ((OPENSSL_VERSION_NUMBER ^ OpenSSL_version_num()) >> 8) ? " (VERSIONS DIFFER!)" : "");
+#endif
+ memprintf(&ptr, "%s\nOpenSSL library supports TLS extensions : "
+#if HA_OPENSSL_VERSION_NUMBER < 0x00907000L
+ "no (library version too old)"
+#elif defined(OPENSSL_NO_TLSEXT)
+ "no (disabled via OPENSSL_NO_TLSEXT)"
+#else
+ "yes"
+#endif
+ "", ptr);
+
+ memprintf(&ptr, "%s\nOpenSSL library supports SNI : "
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ "yes"
+#else
+#ifdef OPENSSL_NO_TLSEXT
+ "no (because of OPENSSL_NO_TLSEXT)"
+#else
+ "no (version might be too old, 0.9.8f min needed)"
+#endif
+#endif
+ "", ptr);
+
+ memprintf(&ptr, "%s\nOpenSSL library supports :", ptr);
+ for (i = CONF_TLSV_MIN; i <= CONF_TLSV_MAX; i++)
+ if (methodVersions[i].option)
+ memprintf(&ptr, "%s %s", ptr, methodVersions[i].name);
+
+#ifdef HAVE_SSL_PROVIDERS
+ {
+ struct list provider_names;
+ struct provider_name *name;
+ LIST_INIT(&provider_names);
+ ssl_provider_get_name_list(&provider_names);
+
+ memprintf(&ptr, "%s\nOpenSSL providers loaded :", ptr);
+
+ list_for_each_entry(name, &provider_names, list) {
+ memprintf(&ptr, "%s %s", ptr, name->name);
+ }
+
+ ssl_provider_clear_name_list(&provider_names);
+ }
+#endif
+
+ hap_register_build_opts(ptr, 1);
+}
+
+INITCALL0(STG_REGISTER, ssl_register_build_options);
+
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+void ssl_free_engines(void) {
+ struct ssl_engine_list *wl, *wlb;
+ /* free up engine list */
+ list_for_each_entry_safe(wl, wlb, &openssl_engines, list) {
+ ENGINE_finish(wl->e);
+ ENGINE_free(wl->e);
+ LIST_DELETE(&wl->list);
+ free(wl);
+ }
+}
+#endif
+
+#ifdef HAVE_SSL_PROVIDERS
+void ssl_unload_providers(void) {
+ struct ssl_provider_list *prov, *provb;
+ list_for_each_entry_safe(prov, provb, &openssl_providers, list) {
+ OSSL_PROVIDER_unload(prov->provider);
+ LIST_DELETE(&prov->list);
+ free(prov);
+ }
+}
+#endif
+
+#ifndef OPENSSL_NO_DH
+void ssl_free_dh(void) {
+ if (local_dh_1024) {
+ HASSL_DH_free(local_dh_1024);
+ local_dh_1024 = NULL;
+ }
+ if (local_dh_2048) {
+ HASSL_DH_free(local_dh_2048);
+ local_dh_2048 = NULL;
+ }
+ if (local_dh_4096) {
+ HASSL_DH_free(local_dh_4096);
+ local_dh_4096 = NULL;
+ }
+ if (global_dh) {
+ HASSL_DH_free(global_dh);
+ global_dh = NULL;
+ }
+}
+#endif
+
+static void __ssl_sock_deinit(void)
+{
+#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES)
+ if (ssl_ctx_lru_tree) {
+ lru64_destroy(ssl_ctx_lru_tree);
+ HA_RWLOCK_DESTROY(&ssl_ctx_lru_rwlock);
+ }
+#endif
+
+#if (HA_OPENSSL_VERSION_NUMBER < 0x10100000L)
+ ERR_remove_state(0);
+ ERR_free_strings();
+
+ EVP_cleanup();
+#endif
+
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x00907000L) && (HA_OPENSSL_VERSION_NUMBER < 0x10100000L)
+ CRYPTO_cleanup_all_ex_data();
+#endif
+ BIO_meth_free(ha_meth);
+}
+REGISTER_POST_DEINIT(__ssl_sock_deinit);
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/ssl_utils.c b/src/ssl_utils.c
new file mode 100644
index 0000000..0f4a859
--- /dev/null
+++ b/src/ssl_utils.c
@@ -0,0 +1,419 @@
+/*
+ * Utility functions for SSL:
+ * Mostly generic functions that retrieve information from certificates
+ *
+ * Copyright (C) 2012 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ * Copyright (C) 2020 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <haproxy/api.h>
+#include <haproxy/buf-t.h>
+#include <haproxy/chunk.h>
+#include <haproxy/openssl-compat.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/ssl_utils.h>
+
+/* fill a buffer with the algorithm and size of a public key */
+int cert_get_pkey_algo(X509 *crt, struct buffer *out)
+{
+ int bits = 0;
+ int sig = TLSEXT_signature_anonymous;
+ int len = -1;
+ EVP_PKEY *pkey;
+
+ pkey = X509_get_pubkey(crt);
+ if (pkey) {
+ bits = EVP_PKEY_bits(pkey);
+ switch(EVP_PKEY_base_id(pkey)) {
+ case EVP_PKEY_RSA:
+ sig = TLSEXT_signature_rsa;
+ break;
+ case EVP_PKEY_EC:
+ sig = TLSEXT_signature_ecdsa;
+ break;
+ case EVP_PKEY_DSA:
+ sig = TLSEXT_signature_dsa;
+ break;
+ }
+ EVP_PKEY_free(pkey);
+ }
+
+ switch(sig) {
+ case TLSEXT_signature_rsa:
+ len = chunk_printf(out, "RSA%d", bits);
+ break;
+ case TLSEXT_signature_ecdsa:
+ len = chunk_printf(out, "EC%d", bits);
+ break;
+ case TLSEXT_signature_dsa:
+ len = chunk_printf(out, "DSA%d", bits);
+ break;
+ default:
+ return 0;
+ }
+ if (len < 0)
+ return 0;
+ return 1;
+}
+
+/* Extract a serial from a cert, and copy it to a chunk.
+ * Returns 1 if serial is found and copied, 0 if no serial found and
+ * -1 if output is not large enough.
+ */
+int ssl_sock_get_serial(X509 *crt, struct buffer *out)
+{
+ ASN1_INTEGER *serial;
+
+ serial = X509_get_serialNumber(crt);
+ if (!serial)
+ return 0;
+
+ if (out->size < serial->length)
+ return -1;
+
+ memcpy(out->area, serial->data, serial->length);
+ out->data = serial->length;
+ return 1;
+}
+
+/* Extract a cert to der, and copy it to a chunk.
+ * Returns 1 if the cert is found and copied, 0 on der conversion failure
+ * and -1 if the output is not large enough.
+ */
+int ssl_sock_crt2der(X509 *crt, struct buffer *out)
+{
+ int len;
+ unsigned char *p = (unsigned char *) out->area;
+
+ len = i2d_X509(crt, NULL);
+ if (len <= 0)
+ return 1;
+
+ if (out->size < len)
+ return -1;
+
+ i2d_X509(crt, &p);
+ out->data = len;
+ return 1;
+}
+
+
+/* Copy Date in ASN1_UTCTIME format in struct buffer out.
+ * Returns 1 if serial is found and copied, 0 if no valid time found
+ * and -1 if output is not large enough.
+ */
+int ssl_sock_get_time(ASN1_TIME *tm, struct buffer *out)
+{
+ if (tm->type == V_ASN1_GENERALIZEDTIME) {
+ ASN1_GENERALIZEDTIME *gentm = (ASN1_GENERALIZEDTIME *)tm;
+
+ if (gentm->length < 12)
+ return 0;
+ if (gentm->data[0] != 0x32 || gentm->data[1] != 0x30)
+ return 0;
+ if (out->size < gentm->length-2)
+ return -1;
+
+ memcpy(out->area, gentm->data+2, gentm->length-2);
+ out->data = gentm->length-2;
+ return 1;
+ }
+ else if (tm->type == V_ASN1_UTCTIME) {
+ ASN1_UTCTIME *utctm = (ASN1_UTCTIME *)tm;
+
+ if (utctm->length < 10)
+ return 0;
+ if (utctm->data[0] >= 0x35)
+ return 0;
+ if (out->size < utctm->length)
+ return -1;
+
+ memcpy(out->area, utctm->data, utctm->length);
+ out->data = utctm->length;
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Extract an entry from a X509_NAME and copy its value to an output chunk.
+ * Returns 1 if entry found, 0 if entry not found, or -1 if output not large enough.
+ */
+int ssl_sock_get_dn_entry(X509_NAME *a, const struct buffer *entry, int pos,
+ struct buffer *out)
+{
+ X509_NAME_ENTRY *ne;
+ ASN1_OBJECT *obj;
+ ASN1_STRING *data;
+ const unsigned char *data_ptr;
+ int data_len;
+ int i, j, n;
+ int cur = 0;
+ const char *s;
+ char tmp[128];
+ int name_count;
+
+ name_count = X509_NAME_entry_count(a);
+
+ out->data = 0;
+ for (i = 0; i < name_count; i++) {
+ if (pos < 0)
+ j = (name_count-1) - i;
+ else
+ j = i;
+
+ ne = X509_NAME_get_entry(a, j);
+ obj = X509_NAME_ENTRY_get_object(ne);
+ data = X509_NAME_ENTRY_get_data(ne);
+ data_ptr = ASN1_STRING_get0_data(data);
+ data_len = ASN1_STRING_length(data);
+ n = OBJ_obj2nid(obj);
+ if ((n == NID_undef) || ((s = OBJ_nid2sn(n)) == NULL)) {
+ i2t_ASN1_OBJECT(tmp, sizeof(tmp), obj);
+ s = tmp;
+ }
+
+ if (chunk_strcasecmp(entry, s) != 0)
+ continue;
+
+ if (pos < 0)
+ cur--;
+ else
+ cur++;
+
+ if (cur != pos)
+ continue;
+
+ if (data_len > out->size)
+ return -1;
+
+ memcpy(out->area, data_ptr, data_len);
+ out->data = data_len;
+ return 1;
+ }
+
+ return 0;
+
+}
+
+/*
+ * Extract the DN in the specified format from the X509_NAME and copy result to a chunk.
+ * Currently supports rfc2253 for returning LDAP V3 DNs.
+ * Returns 1 if dn entries exist, 0 if no dn entry was found.
+ */
+int ssl_sock_get_dn_formatted(X509_NAME *a, const struct buffer *format, struct buffer *out)
+{
+ BIO *bio = NULL;
+ int ret = 0;
+ int data_len = 0;
+
+ if (chunk_strcmp(format, "rfc2253") == 0) {
+ bio = BIO_new(BIO_s_mem());
+ if (bio == NULL)
+ goto out;
+
+ if (X509_NAME_print_ex(bio, a, 0, XN_FLAG_RFC2253) < 0)
+ goto out;
+
+ if ((data_len = BIO_read(bio, out->area, out->size)) <= 0)
+ goto out;
+
+ out->data = data_len;
+
+ ret = 1;
+ }
+out:
+ if (bio)
+ BIO_free(bio);
+ return ret;
+}
+
+/* Extract and format full DN from a X509_NAME and copy result into a chunk
+ * Returns 1 if dn entries exits, 0 if no dn entry found or -1 if output is not large enough.
+ */
+int ssl_sock_get_dn_oneline(X509_NAME *a, struct buffer *out)
+{
+ X509_NAME_ENTRY *ne;
+ ASN1_OBJECT *obj;
+ ASN1_STRING *data;
+ const unsigned char *data_ptr;
+ int data_len;
+ int i, n, ln;
+ int l = 0;
+ const char *s;
+ char *p;
+ char tmp[128];
+ int name_count;
+
+
+ name_count = X509_NAME_entry_count(a);
+
+ out->data = 0;
+ p = out->area;
+ for (i = 0; i < name_count; i++) {
+ ne = X509_NAME_get_entry(a, i);
+ obj = X509_NAME_ENTRY_get_object(ne);
+ data = X509_NAME_ENTRY_get_data(ne);
+ data_ptr = ASN1_STRING_get0_data(data);
+ data_len = ASN1_STRING_length(data);
+ n = OBJ_obj2nid(obj);
+ if ((n == NID_undef) || ((s = OBJ_nid2sn(n)) == NULL)) {
+ i2t_ASN1_OBJECT(tmp, sizeof(tmp), obj);
+ s = tmp;
+ }
+ ln = strlen(s);
+
+ l += 1 + ln + 1 + data_len;
+ if (l > out->size)
+ return -1;
+ out->data = l;
+
+ *(p++)='/';
+ memcpy(p, s, ln);
+ p += ln;
+ *(p++)='=';
+ memcpy(p, data_ptr, data_len);
+ p += data_len;
+ }
+
+ if (!out->data)
+ return 0;
+
+ return 1;
+}
+
+
+extern int ssl_client_crt_ref_index;
+
+/*
+ * This function fetches the SSL certificate for a specific connection (either
+ * client certificate or server certificate depending on the cert_peer
+ * parameter).
+ * When trying to get the peer certificate from the server side, we first try to
+ * use the dedicated SSL_get_peer_certificate function, but we fall back to
+ * trying to get the client certificate reference that might have been stored in
+ * the SSL structure's ex_data during the verification process.
+ * Returns NULL in case of failure.
+ */
+X509* ssl_sock_get_peer_certificate(SSL *ssl)
+{
+ X509* cert;
+
+ cert = SSL_get_peer_certificate(ssl);
+ /* Get the client certificate reference stored in the SSL
+ * structure's ex_data during the verification process. */
+ if (!cert) {
+ cert = SSL_get_ex_data(ssl, ssl_client_crt_ref_index);
+ if (cert)
+ X509_up_ref(cert);
+ }
+
+ return cert;
+}
+
+/*
+ * Take an OpenSSL version in text format and return a numeric openssl version
+ * Return 0 if it failed to parse the version
+ *
+ * https://www.openssl.org/docs/man1.1.1/man3/OPENSSL_VERSION_NUMBER.html
+ *
+ * MNNFFPPS: major minor fix patch status
+ *
+ * The status nibble has one of the values 0 for development, 1 to e for betas
+ * 1 to 14, and f for release.
+ *
+ * for example
+ *
+ * 0x0090821f 0.9.8zh
+ * 0x1000215f 1.0.2u
+ * 0x30000000 3.0.0-alpha17
+ * 0x30000002 3.0.0-beta2
+ * 0x3000000e 3.0.0-beta14
+ * 0x3000000f 3.0.0
+ */
+unsigned int openssl_version_parser(const char *version)
+{
+ unsigned int numversion;
+ unsigned int major = 0, minor = 0, fix = 0, patch = 0, status = 0;
+ char *p, *end;
+
+ p = (char *)version;
+
+ if (!p || !*p)
+ return 0;
+
+ major = strtol(p, &end, 10);
+ if (*end != '.' || major > 0xf)
+ goto error;
+ p = end + 1;
+
+ minor = strtol(p, &end, 10);
+ if (*end != '.' || minor > 0xff)
+ goto error;
+ p = end + 1;
+
+ fix = strtol(p, &end, 10);
+ if (fix > 0xff)
+ goto error;
+ p = end;
+
+ if (!*p) {
+ /* end of the string, that's a release */
+ status = 0xf;
+ } else if (*p == '-') {
+ /* after the hyphen, only the beta will increment the status
+ * counter, all others versions will be considered as "dev" and
+ * does not increment anything */
+ p++;
+
+ if (!strncmp(p, "beta", 4)) {
+ p += 4;
+ status = strtol(p, &end, 10);
+ if (status > 14)
+ goto error;
+ }
+ } else {
+ /* that's a patch release */
+ patch = 1;
+
+ /* add the value of each letter */
+ while (*p) {
+ patch += (*p & ~0x20) - 'A';
+ p++;
+ }
+ status = 0xf;
+ }
+
+end:
+ numversion = ((major & 0xf) << 28) | ((minor & 0xff) << 20) | ((fix & 0xff) << 12) | ((patch & 0xff) << 4) | (status & 0xf);
+ return numversion;
+
+error:
+ return 0;
+
+}
+
+/* Exclude GREASE (RFC8701) values from input buffer */
+void exclude_tls_grease(char *input, int len, struct buffer *output)
+{
+ int ptr = 0;
+
+ while (ptr < len - 1) {
+ if (input[ptr] != input[ptr+1] || (input[ptr] & 0x0f) != 0x0a) {
+ if (output->data <= output->size - 2) {
+ memcpy(output->area + output->data, input + ptr, 2);
+ output->data += 2;
+ } else
+ break;
+ }
+ ptr += 2;
+ }
+ if (output->size - output->data > 0 && len - ptr > 0)
+ output->area[output->data++] = input[ptr];
+}
diff --git a/src/stats.c b/src/stats.c
new file mode 100644
index 0000000..70a1d7e
--- /dev/null
+++ b/src/stats.c
@@ -0,0 +1,5361 @@
+/*
+ * Functions dedicated to statistics output and the stats socket
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ * Copyright 2007-2009 Krzysztof Piotr Oledzki <ole@ans.pl>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pwd.h>
+#include <grp.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <haproxy/api.h>
+#include <haproxy/activity.h>
+#include <haproxy/applet.h>
+#include <haproxy/backend.h>
+#include <haproxy/base64.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/cli.h>
+#include <haproxy/clock.h>
+#include <haproxy/compression.h>
+#include <haproxy/debug.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/map-t.h>
+#include <haproxy/pattern-t.h>
+#include <haproxy/pipe.h>
+#include <haproxy/pool.h>
+#include <haproxy/proxy.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/session.h>
+#include <haproxy/stats.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/uri_auth-t.h>
+#include <haproxy/version.h>
+
+
+/* status codes available for the stats admin page (strictly 4 chars length) */
+const char *stat_status_codes[STAT_STATUS_SIZE] = {
+ [STAT_STATUS_DENY] = "DENY",
+ [STAT_STATUS_DONE] = "DONE",
+ [STAT_STATUS_ERRP] = "ERRP",
+ [STAT_STATUS_EXCD] = "EXCD",
+ [STAT_STATUS_NONE] = "NONE",
+ [STAT_STATUS_PART] = "PART",
+ [STAT_STATUS_UNKN] = "UNKN",
+ [STAT_STATUS_IVAL] = "IVAL",
+};
+
+/* These are the field names for each INF_* field position. Please pay attention
+ * to always use the exact same name except that the strings for new names must
+ * be lower case or CamelCase while the enum entries must be upper case.
+ */
+const struct name_desc info_fields[INF_TOTAL_FIELDS] = {
+ [INF_NAME] = { .name = "Name", .desc = "Product name" },
+ [INF_VERSION] = { .name = "Version", .desc = "Product version" },
+ [INF_RELEASE_DATE] = { .name = "Release_date", .desc = "Date of latest source code update" },
+ [INF_NBTHREAD] = { .name = "Nbthread", .desc = "Number of started threads (global.nbthread)" },
+ [INF_NBPROC] = { .name = "Nbproc", .desc = "Number of started worker processes (historical, always 1)" },
+ [INF_PROCESS_NUM] = { .name = "Process_num", .desc = "Relative worker process number (1)" },
+ [INF_PID] = { .name = "Pid", .desc = "This worker process identifier for the system" },
+ [INF_UPTIME] = { .name = "Uptime", .desc = "How long ago this worker process was started (days+hours+minutes+seconds)" },
+ [INF_UPTIME_SEC] = { .name = "Uptime_sec", .desc = "How long ago this worker process was started (seconds)" },
+ [INF_START_TIME_SEC] = { .name = "Start_time_sec", .desc = "Start time in seconds" },
+ [INF_MEMMAX_MB] = { .name = "Memmax_MB", .desc = "Worker process's hard limit on memory usage in MB (-m on command line)" },
+ [INF_MEMMAX_BYTES] = { .name = "Memmax_bytes", .desc = "Worker process's hard limit on memory usage in byes (-m on command line)" },
+ [INF_POOL_ALLOC_MB] = { .name = "PoolAlloc_MB", .desc = "Amount of memory allocated in pools (in MB)" },
+ [INF_POOL_ALLOC_BYTES] = { .name = "PoolAlloc_bytes", .desc = "Amount of memory allocated in pools (in bytes)" },
+ [INF_POOL_USED_MB] = { .name = "PoolUsed_MB", .desc = "Amount of pool memory currently used (in MB)" },
+ [INF_POOL_USED_BYTES] = { .name = "PoolUsed_bytes", .desc = "Amount of pool memory currently used (in bytes)" },
+ [INF_POOL_FAILED] = { .name = "PoolFailed", .desc = "Number of failed pool allocations since this worker was started" },
+ [INF_ULIMIT_N] = { .name = "Ulimit-n", .desc = "Hard limit on the number of per-process file descriptors" },
+ [INF_MAXSOCK] = { .name = "Maxsock", .desc = "Hard limit on the number of per-process sockets" },
+ [INF_MAXCONN] = { .name = "Maxconn", .desc = "Hard limit on the number of per-process connections (configured or imposed by Ulimit-n)" },
+ [INF_HARD_MAXCONN] = { .name = "Hard_maxconn", .desc = "Hard limit on the number of per-process connections (imposed by Memmax_MB or Ulimit-n)" },
+ [INF_CURR_CONN] = { .name = "CurrConns", .desc = "Current number of connections on this worker process" },
+ [INF_CUM_CONN] = { .name = "CumConns", .desc = "Total number of connections on this worker process since started" },
+ [INF_CUM_REQ] = { .name = "CumReq", .desc = "Total number of requests on this worker process since started" },
+ [INF_MAX_SSL_CONNS] = { .name = "MaxSslConns", .desc = "Hard limit on the number of per-process SSL endpoints (front+back), 0=unlimited" },
+ [INF_CURR_SSL_CONNS] = { .name = "CurrSslConns", .desc = "Current number of SSL endpoints on this worker process (front+back)" },
+ [INF_CUM_SSL_CONNS] = { .name = "CumSslConns", .desc = "Total number of SSL endpoints on this worker process since started (front+back)" },
+ [INF_MAXPIPES] = { .name = "Maxpipes", .desc = "Hard limit on the number of pipes for splicing, 0=unlimited" },
+ [INF_PIPES_USED] = { .name = "PipesUsed", .desc = "Current number of pipes in use in this worker process" },
+ [INF_PIPES_FREE] = { .name = "PipesFree", .desc = "Current number of allocated and available pipes in this worker process" },
+ [INF_CONN_RATE] = { .name = "ConnRate", .desc = "Number of front connections created on this worker process over the last second" },
+ [INF_CONN_RATE_LIMIT] = { .name = "ConnRateLimit", .desc = "Hard limit for ConnRate (global.maxconnrate)" },
+ [INF_MAX_CONN_RATE] = { .name = "MaxConnRate", .desc = "Highest ConnRate reached on this worker process since started (in connections per second)" },
+ [INF_SESS_RATE] = { .name = "SessRate", .desc = "Number of sessions created on this worker process over the last second" },
+ [INF_SESS_RATE_LIMIT] = { .name = "SessRateLimit", .desc = "Hard limit for SessRate (global.maxsessrate)" },
+ [INF_MAX_SESS_RATE] = { .name = "MaxSessRate", .desc = "Highest SessRate reached on this worker process since started (in sessions per second)" },
+ [INF_SSL_RATE] = { .name = "SslRate", .desc = "Number of SSL connections created on this worker process over the last second" },
+ [INF_SSL_RATE_LIMIT] = { .name = "SslRateLimit", .desc = "Hard limit for SslRate (global.maxsslrate)" },
+ [INF_MAX_SSL_RATE] = { .name = "MaxSslRate", .desc = "Highest SslRate reached on this worker process since started (in connections per second)" },
+ [INF_SSL_FRONTEND_KEY_RATE] = { .name = "SslFrontendKeyRate", .desc = "Number of SSL keys created on frontends in this worker process over the last second" },
+ [INF_SSL_FRONTEND_MAX_KEY_RATE] = { .name = "SslFrontendMaxKeyRate", .desc = "Highest SslFrontendKeyRate reached on this worker process since started (in SSL keys per second)" },
+ [INF_SSL_FRONTEND_SESSION_REUSE_PCT] = { .name = "SslFrontendSessionReuse_pct", .desc = "Percent of frontend SSL connections which did not require a new key" },
+ [INF_SSL_BACKEND_KEY_RATE] = { .name = "SslBackendKeyRate", .desc = "Number of SSL keys created on backends in this worker process over the last second" },
+ [INF_SSL_BACKEND_MAX_KEY_RATE] = { .name = "SslBackendMaxKeyRate", .desc = "Highest SslBackendKeyRate reached on this worker process since started (in SSL keys per second)" },
+ [INF_SSL_CACHE_LOOKUPS] = { .name = "SslCacheLookups", .desc = "Total number of SSL session ID lookups in the SSL session cache on this worker since started" },
+ [INF_SSL_CACHE_MISSES] = { .name = "SslCacheMisses", .desc = "Total number of SSL session ID lookups that didn't find a session in the SSL session cache on this worker since started" },
+ [INF_COMPRESS_BPS_IN] = { .name = "CompressBpsIn", .desc = "Number of bytes submitted to the HTTP compressor in this worker process over the last second" },
+ [INF_COMPRESS_BPS_OUT] = { .name = "CompressBpsOut", .desc = "Number of bytes emitted by the HTTP compressor in this worker process over the last second" },
+ [INF_COMPRESS_BPS_RATE_LIM] = { .name = "CompressBpsRateLim", .desc = "Limit of CompressBpsOut beyond which HTTP compression is automatically disabled" },
+ [INF_ZLIB_MEM_USAGE] = { .name = "ZlibMemUsage", .desc = "Amount of memory currently used by HTTP compression on the current worker process (in bytes)" },
+ [INF_MAX_ZLIB_MEM_USAGE] = { .name = "MaxZlibMemUsage", .desc = "Limit on the amount of memory used by HTTP compression above which it is automatically disabled (in bytes, see global.maxzlibmem)" },
+ [INF_TASKS] = { .name = "Tasks", .desc = "Total number of tasks in the current worker process (active + sleeping)" },
+ [INF_RUN_QUEUE] = { .name = "Run_queue", .desc = "Total number of active tasks+tasklets in the current worker process" },
+ [INF_IDLE_PCT] = { .name = "Idle_pct", .desc = "Percentage of last second spent waiting in the current worker thread" },
+ [INF_NODE] = { .name = "node", .desc = "Node name (global.node)" },
+ [INF_DESCRIPTION] = { .name = "description", .desc = "Node description (global.description)" },
+ [INF_STOPPING] = { .name = "Stopping", .desc = "1 if the worker process is currently stopping, otherwise zero" },
+ [INF_JOBS] = { .name = "Jobs", .desc = "Current number of active jobs on the current worker process (frontend connections, master connections, listeners)" },
+ [INF_UNSTOPPABLE_JOBS] = { .name = "Unstoppable Jobs", .desc = "Current number of unstoppable jobs on the current worker process (master connections)" },
+ [INF_LISTENERS] = { .name = "Listeners", .desc = "Current number of active listeners on the current worker process" },
+ [INF_ACTIVE_PEERS] = { .name = "ActivePeers", .desc = "Current number of verified active peers connections on the current worker process" },
+ [INF_CONNECTED_PEERS] = { .name = "ConnectedPeers", .desc = "Current number of peers having passed the connection step on the current worker process" },
+ [INF_DROPPED_LOGS] = { .name = "DroppedLogs", .desc = "Total number of dropped logs for current worker process since started" },
+ [INF_BUSY_POLLING] = { .name = "BusyPolling", .desc = "1 if busy-polling is currently in use on the worker process, otherwise zero (config.busy-polling)" },
+ [INF_FAILED_RESOLUTIONS] = { .name = "FailedResolutions", .desc = "Total number of failed DNS resolutions in current worker process since started" },
+ [INF_TOTAL_BYTES_OUT] = { .name = "TotalBytesOut", .desc = "Total number of bytes emitted by current worker process since started" },
+ [INF_TOTAL_SPLICED_BYTES_OUT] = { .name = "TotalSplicdedBytesOut", .desc = "Total number of bytes emitted by current worker process through a kernel pipe since started" },
+ [INF_BYTES_OUT_RATE] = { .name = "BytesOutRate", .desc = "Number of bytes emitted by current worker process over the last second" },
+ [INF_DEBUG_COMMANDS_ISSUED] = { .name = "DebugCommandsIssued", .desc = "Number of debug commands issued on this process (anything > 0 is unsafe)" },
+ [INF_CUM_LOG_MSGS] = { .name = "CumRecvLogs", .desc = "Total number of log messages received by log-forwarding listeners on this worker process since started" },
+ [INF_BUILD_INFO] = { .name = "Build info", .desc = "Build info" },
+ [INF_TAINTED] = { .name = "Tainted", .desc = "Experimental features used" },
+};
+
+const struct name_desc stat_fields[ST_F_TOTAL_FIELDS] = {
+ [ST_F_PXNAME] = { .name = "pxname", .desc = "Proxy name" },
+ [ST_F_SVNAME] = { .name = "svname", .desc = "Server name" },
+ [ST_F_QCUR] = { .name = "qcur", .desc = "Number of current queued connections" },
+ [ST_F_QMAX] = { .name = "qmax", .desc = "Highest value of queued connections encountered since process started" },
+ [ST_F_SCUR] = { .name = "scur", .desc = "Number of current sessions on the frontend, backend or server" },
+ [ST_F_SMAX] = { .name = "smax", .desc = "Highest value of current sessions encountered since process started" },
+ [ST_F_SLIM] = { .name = "slim", .desc = "Frontend/listener/server's maxconn, backend's fullconn" },
+ [ST_F_STOT] = { .name = "stot", .desc = "Total number of sessions since process started" },
+ [ST_F_BIN] = { .name = "bin", .desc = "Total number of request bytes since process started" },
+ [ST_F_BOUT] = { .name = "bout", .desc = "Total number of response bytes since process started" },
+ [ST_F_DREQ] = { .name = "dreq", .desc = "Total number of denied requests since process started" },
+ [ST_F_DRESP] = { .name = "dresp", .desc = "Total number of denied responses since process started" },
+ [ST_F_EREQ] = { .name = "ereq", .desc = "Total number of invalid requests since process started" },
+ [ST_F_ECON] = { .name = "econ", .desc = "Total number of failed connections to server since the worker process started" },
+ [ST_F_ERESP] = { .name = "eresp", .desc = "Total number of invalid responses since the worker process started" },
+ [ST_F_WRETR] = { .name = "wretr", .desc = "Total number of server connection retries since the worker process started" },
+ [ST_F_WREDIS] = { .name = "wredis", .desc = "Total number of server redispatches due to connection failures since the worker process started" },
+ [ST_F_STATUS] = { .name = "status", .desc = "Frontend/listen status: OPEN/WAITING/FULL/STOP; backend: UP/DOWN; server: last check status" },
+ [ST_F_WEIGHT] = { .name = "weight", .desc = "Server's effective weight, or sum of active servers' effective weights for a backend" },
+ [ST_F_ACT] = { .name = "act", .desc = "Total number of active UP servers with a non-zero weight" },
+ [ST_F_BCK] = { .name = "bck", .desc = "Total number of backup UP servers with a non-zero weight" },
+ [ST_F_CHKFAIL] = { .name = "chkfail", .desc = "Total number of failed individual health checks per server/backend, since the worker process started" },
+ [ST_F_CHKDOWN] = { .name = "chkdown", .desc = "Total number of failed checks causing UP to DOWN server transitions, per server/backend, since the worker process started" },
+ [ST_F_LASTCHG] = { .name = "lastchg", .desc = "How long ago the last server state changed, in seconds" },
+ [ST_F_DOWNTIME] = { .name = "downtime", .desc = "Total time spent in DOWN state, for server or backend" },
+ [ST_F_QLIMIT] = { .name = "qlimit", .desc = "Limit on the number of connections in queue, for servers only (maxqueue argument)" },
+ [ST_F_PID] = { .name = "pid", .desc = "Relative worker process number (1)" },
+ [ST_F_IID] = { .name = "iid", .desc = "Frontend or Backend numeric identifier ('id' setting)" },
+ [ST_F_SID] = { .name = "sid", .desc = "Server numeric identifier ('id' setting)" },
+ [ST_F_THROTTLE] = { .name = "throttle", .desc = "Throttling ratio applied to a server's maxconn and weight during the slowstart period (0 to 100%)" },
+ [ST_F_LBTOT] = { .name = "lbtot", .desc = "Total number of requests routed by load balancing since the worker process started (ignores queue pop and stickiness)" },
+ [ST_F_TRACKED] = { .name = "tracked", .desc = "Name of the other server this server tracks for its state" },
+ [ST_F_TYPE] = { .name = "type", .desc = "Type of the object (Listener, Frontend, Backend, Server)" },
+ [ST_F_RATE] = { .name = "rate", .desc = "Total number of sessions processed by this object over the last second (sessions for listeners/frontends, requests for backends/servers)" },
+ [ST_F_RATE_LIM] = { .name = "rate_lim", .desc = "Limit on the number of sessions accepted in a second (frontend only, 'rate-limit sessions' setting)" },
+ [ST_F_RATE_MAX] = { .name = "rate_max", .desc = "Highest value of sessions per second observed since the worker process started" },
+ [ST_F_CHECK_STATUS] = { .name = "check_status", .desc = "Status report of the server's latest health check, prefixed with '*' if a check is currently in progress" },
+ [ST_F_CHECK_CODE] = { .name = "check_code", .desc = "HTTP/SMTP/LDAP status code reported by the latest server health check" },
+ [ST_F_CHECK_DURATION] = { .name = "check_duration", .desc = "Total duration of the latest server health check, in milliseconds" },
+ [ST_F_HRSP_1XX] = { .name = "hrsp_1xx", .desc = "Total number of HTTP responses with status 100-199 returned by this object since the worker process started" },
+ [ST_F_HRSP_2XX] = { .name = "hrsp_2xx", .desc = "Total number of HTTP responses with status 200-299 returned by this object since the worker process started" },
+ [ST_F_HRSP_3XX] = { .name = "hrsp_3xx", .desc = "Total number of HTTP responses with status 300-399 returned by this object since the worker process started" },
+ [ST_F_HRSP_4XX] = { .name = "hrsp_4xx", .desc = "Total number of HTTP responses with status 400-499 returned by this object since the worker process started" },
+ [ST_F_HRSP_5XX] = { .name = "hrsp_5xx", .desc = "Total number of HTTP responses with status 500-599 returned by this object since the worker process started" },
+ [ST_F_HRSP_OTHER] = { .name = "hrsp_other", .desc = "Total number of HTTP responses with status <100, >599 returned by this object since the worker process started (error -1 included)" },
+ [ST_F_HANAFAIL] = { .name = "hanafail", .desc = "Total number of failed checks caused by an 'on-error' directive after an 'observe' condition matched" },
+ [ST_F_REQ_RATE] = { .name = "req_rate", .desc = "Number of HTTP requests processed over the last second on this object" },
+ [ST_F_REQ_RATE_MAX] = { .name = "req_rate_max", .desc = "Highest value of http requests observed since the worker process started" },
+ [ST_F_REQ_TOT] = { .name = "req_tot", .desc = "Total number of HTTP requests processed by this object since the worker process started" },
+ [ST_F_CLI_ABRT] = { .name = "cli_abrt", .desc = "Total number of requests or connections aborted by the client since the worker process started" },
+ [ST_F_SRV_ABRT] = { .name = "srv_abrt", .desc = "Total number of requests or connections aborted by the server since the worker process started" },
+ [ST_F_COMP_IN] = { .name = "comp_in", .desc = "Total number of bytes submitted to the HTTP compressor for this object since the worker process started" },
+ [ST_F_COMP_OUT] = { .name = "comp_out", .desc = "Total number of bytes emitted by the HTTP compressor for this object since the worker process started" },
+ [ST_F_COMP_BYP] = { .name = "comp_byp", .desc = "Total number of bytes that bypassed HTTP compression for this object since the worker process started (CPU/memory/bandwidth limitation)" },
+ [ST_F_COMP_RSP] = { .name = "comp_rsp", .desc = "Total number of HTTP responses that were compressed for this object since the worker process started" },
+ [ST_F_LASTSESS] = { .name = "lastsess", .desc = "How long ago some traffic was seen on this object on this worker process, in seconds" },
+ [ST_F_LAST_CHK] = { .name = "last_chk", .desc = "Short description of the latest health check report for this server (see also check_desc)" },
+ [ST_F_LAST_AGT] = { .name = "last_agt", .desc = "Short description of the latest agent check report for this server (see also agent_desc)" },
+ [ST_F_QTIME] = { .name = "qtime", .desc = "Time spent in the queue, in milliseconds, averaged over the 1024 last requests (backend/server)" },
+ [ST_F_CTIME] = { .name = "ctime", .desc = "Time spent waiting for a connection to complete, in milliseconds, averaged over the 1024 last requests (backend/server)" },
+ [ST_F_RTIME] = { .name = "rtime", .desc = "Time spent waiting for a server response, in milliseconds, averaged over the 1024 last requests (backend/server)" },
+ [ST_F_TTIME] = { .name = "ttime", .desc = "Total request+response time (request+queue+connect+response+processing), in milliseconds, averaged over the 1024 last requests (backend/server)" },
+ [ST_F_AGENT_STATUS] = { .name = "agent_status", .desc = "Status report of the server's latest agent check, prefixed with '*' if a check is currently in progress" },
+ [ST_F_AGENT_CODE] = { .name = "agent_code", .desc = "Status code reported by the latest server agent check" },
+ [ST_F_AGENT_DURATION] = { .name = "agent_duration", .desc = "Total duration of the latest server agent check, in milliseconds" },
+ [ST_F_CHECK_DESC] = { .name = "check_desc", .desc = "Textual description of the latest health check report for this server" },
+ [ST_F_AGENT_DESC] = { .name = "agent_desc", .desc = "Textual description of the latest agent check report for this server" },
+ [ST_F_CHECK_RISE] = { .name = "check_rise", .desc = "Number of successful health checks before declaring a server UP (server 'rise' setting)" },
+ [ST_F_CHECK_FALL] = { .name = "check_fall", .desc = "Number of failed health checks before declaring a server DOWN (server 'fall' setting)" },
+ [ST_F_CHECK_HEALTH] = { .name = "check_health", .desc = "Current server health check level (0..fall-1=DOWN, fall..rise-1=UP)" },
+ [ST_F_AGENT_RISE] = { .name = "agent_rise", .desc = "Number of successful agent checks before declaring a server UP (server 'rise' setting)" },
+ [ST_F_AGENT_FALL] = { .name = "agent_fall", .desc = "Number of failed agent checks before declaring a server DOWN (server 'fall' setting)" },
+ [ST_F_AGENT_HEALTH] = { .name = "agent_health", .desc = "Current server agent check level (0..fall-1=DOWN, fall..rise-1=UP)" },
+ [ST_F_ADDR] = { .name = "addr", .desc = "Server's address:port, shown only if show-legends is set, or at levels oper/admin for the CLI" },
+ [ST_F_COOKIE] = { .name = "cookie", .desc = "Backend's cookie name or Server's cookie value, shown only if show-legends is set, or at levels oper/admin for the CLI" },
+ [ST_F_MODE] = { .name = "mode", .desc = "'mode' setting (tcp/http/health/cli)" },
+ [ST_F_ALGO] = { .name = "algo", .desc = "Backend's load balancing algorithm, shown only if show-legends is set, or at levels oper/admin for the CLI" },
+ [ST_F_CONN_RATE] = { .name = "conn_rate", .desc = "Number of new connections accepted over the last second on the frontend for this worker process" },
+ [ST_F_CONN_RATE_MAX] = { .name = "conn_rate_max", .desc = "Highest value of connections per second observed since the worker process started" },
+ [ST_F_CONN_TOT] = { .name = "conn_tot", .desc = "Total number of new connections accepted on this frontend since the worker process started" },
+ [ST_F_INTERCEPTED] = { .name = "intercepted", .desc = "Total number of HTTP requests intercepted on the frontend (redirects/stats/services) since the worker process started" },
+ [ST_F_DCON] = { .name = "dcon", .desc = "Total number of incoming connections blocked on a listener/frontend by a tcp-request connection rule since the worker process started" },
+ [ST_F_DSES] = { .name = "dses", .desc = "Total number of incoming sessions blocked on a listener/frontend by a tcp-request connection rule since the worker process started" },
+ [ST_F_WREW] = { .name = "wrew", .desc = "Total number of failed HTTP header rewrites since the worker process started" },
+ [ST_F_CONNECT] = { .name = "connect", .desc = "Total number of outgoing connection attempts on this backend/server since the worker process started" },
+ [ST_F_REUSE] = { .name = "reuse", .desc = "Total number of reused connection on this backend/server since the worker process started" },
+ [ST_F_CACHE_LOOKUPS] = { .name = "cache_lookups", .desc = "Total number of HTTP requests looked up in the cache on this frontend/backend since the worker process started" },
+ [ST_F_CACHE_HITS] = { .name = "cache_hits", .desc = "Total number of HTTP requests not found in the cache on this frontend/backend since the worker process started" },
+ [ST_F_SRV_ICUR] = { .name = "srv_icur", .desc = "Current number of idle connections available for reuse on this server" },
+ [ST_F_SRV_ILIM] = { .name = "src_ilim", .desc = "Limit on the number of available idle connections on this server (server 'pool_max_conn' directive)" },
+ [ST_F_QT_MAX] = { .name = "qtime_max", .desc = "Maximum observed time spent in the queue, in milliseconds (backend/server)" },
+ [ST_F_CT_MAX] = { .name = "ctime_max", .desc = "Maximum observed time spent waiting for a connection to complete, in milliseconds (backend/server)" },
+ [ST_F_RT_MAX] = { .name = "rtime_max", .desc = "Maximum observed time spent waiting for a server response, in milliseconds (backend/server)" },
+ [ST_F_TT_MAX] = { .name = "ttime_max", .desc = "Maximum observed total request+response time (request+queue+connect+response+processing), in milliseconds (backend/server)" },
+ [ST_F_EINT] = { .name = "eint", .desc = "Total number of internal errors since process started"},
+ [ST_F_IDLE_CONN_CUR] = { .name = "idle_conn_cur", .desc = "Current number of unsafe idle connections"},
+ [ST_F_SAFE_CONN_CUR] = { .name = "safe_conn_cur", .desc = "Current number of safe idle connections"},
+ [ST_F_USED_CONN_CUR] = { .name = "used_conn_cur", .desc = "Current number of connections in use"},
+ [ST_F_NEED_CONN_EST] = { .name = "need_conn_est", .desc = "Estimated needed number of connections"},
+ [ST_F_UWEIGHT] = { .name = "uweight", .desc = "Server's user weight, or sum of active servers' user weights for a backend" },
+ [ST_F_AGG_SRV_CHECK_STATUS] = { .name = "agg_server_check_status", .desc = "Backend's aggregated gauge of servers' state check status" },
+ [ST_F_AGG_SRV_STATUS ] = { .name = "agg_server_status", .desc = "Backend's aggregated gauge of servers' status" },
+ [ST_F_AGG_CHECK_STATUS] = { .name = "agg_check_status", .desc = "Backend's aggregated gauge of servers' state check status" },
+};
+
+/* one line of info */
+THREAD_LOCAL struct field info[INF_TOTAL_FIELDS];
+
+/* description of statistics (static and dynamic) */
+static struct name_desc *stat_f[STATS_DOMAIN_COUNT];
+static size_t stat_count[STATS_DOMAIN_COUNT];
+
+/* one line for stats */
+THREAD_LOCAL struct field *stat_l[STATS_DOMAIN_COUNT];
+
+/* list of all registered stats module */
+static struct list stats_module_list[STATS_DOMAIN_COUNT] = {
+ LIST_HEAD_INIT(stats_module_list[STATS_DOMAIN_PROXY]),
+ LIST_HEAD_INIT(stats_module_list[STATS_DOMAIN_RESOLVERS]),
+};
+
+THREAD_LOCAL void *trash_counters;
+static THREAD_LOCAL struct buffer trash_chunk = BUF_NULL;
+
+
+static inline uint8_t stats_get_domain(uint32_t domain)
+{
+ return domain >> STATS_DOMAIN & STATS_DOMAIN_MASK;
+}
+
+static inline enum stats_domain_px_cap stats_px_get_cap(uint32_t domain)
+{
+ return domain >> STATS_PX_CAP & STATS_PX_CAP_MASK;
+}
+
+static void stats_dump_json_schema(struct buffer *out);
+
+int stats_putchk(struct channel *chn, struct htx *htx)
+{
+ struct buffer *chk = &trash_chunk;
+
+ if (htx) {
+ if (chk->data >= channel_htx_recv_max(chn, htx))
+ return 0;
+ if (!htx_add_data_atonce(htx, ist2(chk->area, chk->data)))
+ return 0;
+ channel_add_input(chn, chk->data);
+ chk->data = 0;
+ }
+ else {
+ if (ci_putchk(chn, chk) == -1)
+ return 0;
+ }
+ return 1;
+}
+
+static const char *stats_scope_ptr(struct appctx *appctx, struct stconn *sc)
+{
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct channel *req = sc_oc(sc);
+ struct htx *htx = htxbuf(&req->buf);
+ struct htx_blk *blk;
+ struct ist uri;
+
+ blk = htx_get_head_blk(htx);
+ BUG_ON(!blk || htx_get_blk_type(blk) != HTX_BLK_REQ_SL);
+ ALREADY_CHECKED(blk);
+ uri = htx_sl_req_uri(htx_get_blk_ptr(htx, blk));
+ return uri.ptr + ctx->scope_str;
+}
+
+/*
+ * http_stats_io_handler()
+ * -> stats_dump_stat_to_buffer() // same as above, but used for CSV or HTML
+ * -> stats_dump_csv_header() // emits the CSV headers (same as above)
+ * -> stats_dump_json_header() // emits the JSON headers (same as above)
+ * -> stats_dump_html_head() // emits the HTML headers
+ * -> stats_dump_html_info() // emits the equivalent of "show info" at the top
+ * -> stats_dump_proxy_to_buffer() // same as above, valid for CSV and HTML
+ * -> stats_dump_html_px_hdr()
+ * -> stats_dump_fe_stats()
+ * -> stats_dump_li_stats()
+ * -> stats_dump_sv_stats()
+ * -> stats_dump_be_stats()
+ * -> stats_dump_html_px_end()
+ * -> stats_dump_html_end() // emits HTML trailer
+ * -> stats_dump_json_end() // emits JSON trailer
+ */
+
+
+/* Dumps the stats CSV header to the local trash buffer. The caller is
+ * responsible for clearing it if needed.
+ * NOTE: Some tools happen to rely on the field position instead of its name,
+ * so please only append new fields at the end, never in the middle.
+ */
+static void stats_dump_csv_header(enum stats_domain domain)
+{
+ int field;
+
+ chunk_appendf(&trash_chunk, "# ");
+ if (stat_f[domain]) {
+ for (field = 0; field < stat_count[domain]; ++field) {
+ chunk_appendf(&trash_chunk, "%s,", stat_f[domain][field].name);
+
+ /* print special delimiter on proxy stats to mark end of
+ static fields */
+ if (domain == STATS_DOMAIN_PROXY && field + 1 == ST_F_TOTAL_FIELDS)
+ chunk_appendf(&trash_chunk, "-,");
+ }
+ }
+
+ chunk_appendf(&trash_chunk, "\n");
+}
+
+/* Emits a stats field without any surrounding element and properly encoded to
+ * resist CSV output. Returns non-zero on success, 0 if the buffer is full.
+ */
+int stats_emit_raw_data_field(struct buffer *out, const struct field *f)
+{
+ switch (field_format(f, 0)) {
+ case FF_EMPTY: return 1;
+ case FF_S32: return chunk_appendf(out, "%d", f->u.s32);
+ case FF_U32: return chunk_appendf(out, "%u", f->u.u32);
+ case FF_S64: return chunk_appendf(out, "%lld", (long long)f->u.s64);
+ case FF_U64: return chunk_appendf(out, "%llu", (unsigned long long)f->u.u64);
+ case FF_FLT: {
+ size_t prev_data = out->data;
+ out->data = flt_trim(out->area, prev_data, chunk_appendf(out, "%f", f->u.flt));
+ return out->data;
+ }
+ case FF_STR: return csv_enc_append(field_str(f, 0), 1, out) != NULL;
+ default: return chunk_appendf(out, "[INCORRECT_FIELD_TYPE_%08x]", f->type);
+ }
+}
+
+const char *field_to_html_str(const struct field *f)
+{
+ switch (field_format(f, 0)) {
+ case FF_S32: return U2H(f->u.s32);
+ case FF_S64: return U2H(f->u.s64);
+ case FF_U64: return U2H(f->u.u64);
+ case FF_U32: return U2H(f->u.u32);
+ case FF_FLT: return F2H(f->u.flt);
+ case FF_STR: return field_str(f, 0);
+ case FF_EMPTY:
+ default:
+ return "";
+ }
+}
+
+/* Emits a stats field prefixed with its type. No CSV encoding is prepared, the
+ * output is supposed to be used on its own line. Returns non-zero on success, 0
+ * if the buffer is full.
+ */
+int stats_emit_typed_data_field(struct buffer *out, const struct field *f)
+{
+ switch (field_format(f, 0)) {
+ case FF_EMPTY: return 1;
+ case FF_S32: return chunk_appendf(out, "s32:%d", f->u.s32);
+ case FF_U32: return chunk_appendf(out, "u32:%u", f->u.u32);
+ case FF_S64: return chunk_appendf(out, "s64:%lld", (long long)f->u.s64);
+ case FF_U64: return chunk_appendf(out, "u64:%llu", (unsigned long long)f->u.u64);
+ case FF_FLT: {
+ size_t prev_data = out->data;
+ out->data = flt_trim(out->area, prev_data, chunk_appendf(out, "flt:%f", f->u.flt));
+ return out->data;
+ }
+ case FF_STR: return chunk_appendf(out, "str:%s", field_str(f, 0));
+ default: return chunk_appendf(out, "%08x:?", f->type);
+ }
+}
+
+/* Limit JSON integer values to the range [-(2**53)+1, (2**53)-1] as per
+ * the recommendation for interoperable integers in section 6 of RFC 7159.
+ */
+#define JSON_INT_MAX ((1ULL << 53) - 1)
+#define JSON_INT_MIN (0 - JSON_INT_MAX)
+
+/* Emits a stats field value and its type in JSON.
+ * Returns non-zero on success, 0 on error.
+ */
+int stats_emit_json_data_field(struct buffer *out, const struct field *f)
+{
+ int old_len;
+ char buf[20];
+ const char *type, *value = buf, *quote = "";
+
+ switch (field_format(f, 0)) {
+ case FF_EMPTY: return 1;
+ case FF_S32: type = "\"s32\"";
+ snprintf(buf, sizeof(buf), "%d", f->u.s32);
+ break;
+ case FF_U32: type = "\"u32\"";
+ snprintf(buf, sizeof(buf), "%u", f->u.u32);
+ break;
+ case FF_S64: type = "\"s64\"";
+ if (f->u.s64 < JSON_INT_MIN || f->u.s64 > JSON_INT_MAX)
+ return 0;
+ type = "\"s64\"";
+ snprintf(buf, sizeof(buf), "%lld", (long long)f->u.s64);
+ break;
+ case FF_U64: if (f->u.u64 > JSON_INT_MAX)
+ return 0;
+ type = "\"u64\"";
+ snprintf(buf, sizeof(buf), "%llu",
+ (unsigned long long) f->u.u64);
+ break;
+ case FF_FLT: type = "\"flt\"";
+ flt_trim(buf, 0, snprintf(buf, sizeof(buf), "%f", f->u.flt));
+ break;
+ case FF_STR: type = "\"str\"";
+ value = field_str(f, 0);
+ quote = "\"";
+ break;
+ default: snprintf(buf, sizeof(buf), "%u", f->type);
+ type = buf;
+ value = "unknown";
+ quote = "\"";
+ break;
+ }
+
+ old_len = out->data;
+ chunk_appendf(out, ",\"value\":{\"type\":%s,\"value\":%s%s%s}",
+ type, quote, value, quote);
+ return !(old_len == out->data);
+}
+
+/* Emits an encoding of the field type on 3 characters followed by a delimiter.
+ * Returns non-zero on success, 0 if the buffer is full.
+ */
+int stats_emit_field_tags(struct buffer *out, const struct field *f,
+ char delim)
+{
+ char origin, nature, scope;
+
+ switch (field_origin(f, 0)) {
+ case FO_METRIC: origin = 'M'; break;
+ case FO_STATUS: origin = 'S'; break;
+ case FO_KEY: origin = 'K'; break;
+ case FO_CONFIG: origin = 'C'; break;
+ case FO_PRODUCT: origin = 'P'; break;
+ default: origin = '?'; break;
+ }
+
+ switch (field_nature(f, 0)) {
+ case FN_GAUGE: nature = 'G'; break;
+ case FN_LIMIT: nature = 'L'; break;
+ case FN_MIN: nature = 'm'; break;
+ case FN_MAX: nature = 'M'; break;
+ case FN_RATE: nature = 'R'; break;
+ case FN_COUNTER: nature = 'C'; break;
+ case FN_DURATION: nature = 'D'; break;
+ case FN_AGE: nature = 'A'; break;
+ case FN_TIME: nature = 'T'; break;
+ case FN_NAME: nature = 'N'; break;
+ case FN_OUTPUT: nature = 'O'; break;
+ case FN_AVG: nature = 'a'; break;
+ default: nature = '?'; break;
+ }
+
+ switch (field_scope(f, 0)) {
+ case FS_PROCESS: scope = 'P'; break;
+ case FS_SERVICE: scope = 'S'; break;
+ case FS_SYSTEM: scope = 's'; break;
+ case FS_CLUSTER: scope = 'C'; break;
+ default: scope = '?'; break;
+ }
+
+ return chunk_appendf(out, "%c%c%c%c", origin, nature, scope, delim);
+}
+
+/* Emits an encoding of the field type as JSON.
+ * Returns non-zero on success, 0 if the buffer is full.
+ */
+int stats_emit_json_field_tags(struct buffer *out, const struct field *f)
+{
+ const char *origin, *nature, *scope;
+ int old_len;
+
+ switch (field_origin(f, 0)) {
+ case FO_METRIC: origin = "Metric"; break;
+ case FO_STATUS: origin = "Status"; break;
+ case FO_KEY: origin = "Key"; break;
+ case FO_CONFIG: origin = "Config"; break;
+ case FO_PRODUCT: origin = "Product"; break;
+ default: origin = "Unknown"; break;
+ }
+
+ switch (field_nature(f, 0)) {
+ case FN_GAUGE: nature = "Gauge"; break;
+ case FN_LIMIT: nature = "Limit"; break;
+ case FN_MIN: nature = "Min"; break;
+ case FN_MAX: nature = "Max"; break;
+ case FN_RATE: nature = "Rate"; break;
+ case FN_COUNTER: nature = "Counter"; break;
+ case FN_DURATION: nature = "Duration"; break;
+ case FN_AGE: nature = "Age"; break;
+ case FN_TIME: nature = "Time"; break;
+ case FN_NAME: nature = "Name"; break;
+ case FN_OUTPUT: nature = "Output"; break;
+ case FN_AVG: nature = "Avg"; break;
+ default: nature = "Unknown"; break;
+ }
+
+ switch (field_scope(f, 0)) {
+ case FS_PROCESS: scope = "Process"; break;
+ case FS_SERVICE: scope = "Service"; break;
+ case FS_SYSTEM: scope = "System"; break;
+ case FS_CLUSTER: scope = "Cluster"; break;
+ default: scope = "Unknown"; break;
+ }
+
+ old_len = out->data;
+ chunk_appendf(out, "\"tags\":{"
+ "\"origin\":\"%s\","
+ "\"nature\":\"%s\","
+ "\"scope\":\"%s\""
+ "}", origin, nature, scope);
+ return !(old_len == out->data);
+}
+
+/* Dump all fields from <stats> into <out> using CSV format */
+static int stats_dump_fields_csv(struct buffer *out,
+ const struct field *stats, size_t stats_count,
+ struct show_stat_ctx *ctx)
+{
+ int domain = ctx->domain;
+ int field;
+
+ for (field = 0; field < stats_count; ++field) {
+ if (!stats_emit_raw_data_field(out, &stats[field]))
+ return 0;
+ if (!chunk_strcat(out, ","))
+ return 0;
+
+ /* print special delimiter on proxy stats to mark end of
+ static fields */
+ if (domain == STATS_DOMAIN_PROXY && field + 1 == ST_F_TOTAL_FIELDS) {
+ if (!chunk_strcat(out, "-,"))
+ return 0;
+ }
+ }
+
+ chunk_strcat(out, "\n");
+ return 1;
+}
+
+/* Dump all fields from <stats> into <out> using a typed "field:desc:type:value" format */
+static int stats_dump_fields_typed(struct buffer *out,
+ const struct field *stats,
+ size_t stats_count,
+ struct show_stat_ctx * ctx)
+{
+ int flags = ctx->flags;
+ int domain = ctx->domain;
+ int field;
+
+ for (field = 0; field < stats_count; ++field) {
+ if (!stats[field].type)
+ continue;
+
+ switch (domain) {
+ case STATS_DOMAIN_PROXY:
+ chunk_appendf(out, "%c.%u.%u.%d.%s.%u:",
+ stats[ST_F_TYPE].u.u32 == STATS_TYPE_FE ? 'F' :
+ stats[ST_F_TYPE].u.u32 == STATS_TYPE_BE ? 'B' :
+ stats[ST_F_TYPE].u.u32 == STATS_TYPE_SO ? 'L' :
+ stats[ST_F_TYPE].u.u32 == STATS_TYPE_SV ? 'S' :
+ '?',
+ stats[ST_F_IID].u.u32, stats[ST_F_SID].u.u32,
+ field,
+ stat_f[domain][field].name,
+ stats[ST_F_PID].u.u32);
+ break;
+
+ case STATS_DOMAIN_RESOLVERS:
+ chunk_appendf(out, "N.%d.%s:", field,
+ stat_f[domain][field].name);
+ break;
+
+ default:
+ break;
+ }
+
+ if (!stats_emit_field_tags(out, &stats[field], ':'))
+ return 0;
+ if (!stats_emit_typed_data_field(out, &stats[field]))
+ return 0;
+
+ if (flags & STAT_SHOW_FDESC &&
+ !chunk_appendf(out, ":\"%s\"", stat_f[domain][field].desc)) {
+ return 0;
+ }
+
+ if (!chunk_strcat(out, "\n"))
+ return 0;
+ }
+ return 1;
+}
+
+/* Dump all fields from <stats> into <out> using the "show info json" format */
+static int stats_dump_json_info_fields(struct buffer *out,
+ const struct field *info,
+ struct show_stat_ctx *ctx)
+{
+ int field;
+ int started = 0;
+
+ if (!chunk_strcat(out, "["))
+ return 0;
+
+ for (field = 0; field < INF_TOTAL_FIELDS; field++) {
+ int old_len;
+
+ if (!field_format(info, field))
+ continue;
+
+ if (started && !chunk_strcat(out, ","))
+ goto err;
+ started = 1;
+
+ old_len = out->data;
+ chunk_appendf(out,
+ "{\"field\":{\"pos\":%d,\"name\":\"%s\"},"
+ "\"processNum\":%u,",
+ field, info_fields[field].name,
+ info[INF_PROCESS_NUM].u.u32);
+ if (old_len == out->data)
+ goto err;
+
+ if (!stats_emit_json_field_tags(out, &info[field]))
+ goto err;
+
+ if (!stats_emit_json_data_field(out, &info[field]))
+ goto err;
+
+ if (!chunk_strcat(out, "}"))
+ goto err;
+ }
+
+ if (!chunk_strcat(out, "]\n"))
+ goto err;
+ return 1;
+
+err:
+ chunk_reset(out);
+ chunk_appendf(out, "{\"errorStr\":\"output buffer too short\"}\n");
+ return 0;
+}
+
+static void stats_print_proxy_field_json(struct buffer *out,
+ const struct field *stat,
+ const char *name,
+ int pos,
+ uint32_t field_type,
+ uint32_t iid,
+ uint32_t sid,
+ uint32_t pid)
+{
+ const char *obj_type;
+ switch (field_type) {
+ case STATS_TYPE_FE: obj_type = "Frontend"; break;
+ case STATS_TYPE_BE: obj_type = "Backend"; break;
+ case STATS_TYPE_SO: obj_type = "Listener"; break;
+ case STATS_TYPE_SV: obj_type = "Server"; break;
+ default: obj_type = "Unknown"; break;
+ }
+
+ chunk_appendf(out,
+ "{"
+ "\"objType\":\"%s\","
+ "\"proxyId\":%u,"
+ "\"id\":%u,"
+ "\"field\":{\"pos\":%d,\"name\":\"%s\"},"
+ "\"processNum\":%u,",
+ obj_type, iid, sid, pos, name, pid);
+}
+
+static void stats_print_rslv_field_json(struct buffer *out,
+ const struct field *stat,
+ const char *name,
+ int pos)
+{
+ chunk_appendf(out,
+ "{"
+ "\"field\":{\"pos\":%d,\"name\":\"%s\"},",
+ pos, name);
+}
+
+
+/* Dump all fields from <stats> into <out> using a typed "field:desc:type:value" format */
+static int stats_dump_fields_json(struct buffer *out,
+ const struct field *stats, size_t stats_count,
+ struct show_stat_ctx *ctx)
+{
+ int flags = ctx->flags;
+ int domain = ctx->domain;
+ int started = (ctx->field) ? 1 : 0;
+ int ready_data = 0;
+
+ if (!started && (flags & STAT_STARTED) && !chunk_strcat(out, ","))
+ return 0;
+ if (!started && !chunk_strcat(out, "["))
+ return 0;
+
+ for (; ctx->field < stats_count; ctx->field++) {
+ int old_len;
+ int field = ctx->field;
+
+ if (!stats[field].type)
+ continue;
+
+ if (started && !chunk_strcat(out, ","))
+ goto err;
+ started = 1;
+
+ old_len = out->data;
+ if (domain == STATS_DOMAIN_PROXY) {
+ stats_print_proxy_field_json(out, &stats[field],
+ stat_f[domain][field].name,
+ field,
+ stats[ST_F_TYPE].u.u32,
+ stats[ST_F_IID].u.u32,
+ stats[ST_F_SID].u.u32,
+ stats[ST_F_PID].u.u32);
+ } else if (domain == STATS_DOMAIN_RESOLVERS) {
+ stats_print_rslv_field_json(out, &stats[field],
+ stat_f[domain][field].name,
+ field);
+ }
+
+ if (old_len == out->data)
+ goto err;
+
+ if (!stats_emit_json_field_tags(out, &stats[field]))
+ goto err;
+
+ if (!stats_emit_json_data_field(out, &stats[field]))
+ goto err;
+
+ if (!chunk_strcat(out, "}"))
+ goto err;
+ ready_data = out->data;
+ }
+
+ if (!chunk_strcat(out, "]"))
+ goto err;
+
+ ctx->field = 0; /* we're done */
+ return 1;
+
+err:
+ if (!ready_data) {
+ /* not enough buffer space for a single entry.. */
+ chunk_reset(out);
+ if (ctx->flags & STAT_STARTED)
+ chunk_strcat(out, ",");
+ chunk_appendf(out, "{\"errorStr\":\"output buffer too short\"}");
+ return 0; /* hard error */
+ }
+ /* push ready data and wait for a new buffer to complete the dump */
+ out->data = ready_data;
+ return 1;
+}
+
+/* Dump all fields from <stats> into <out> using the HTML format. A column is
+ * reserved for the checkbox is STAT_ADMIN is set in <flags>. Some extra info
+ * are provided if STAT_SHLGNDS is present in <flags>. The statistics from
+ * extra modules are displayed at the end of the lines if STAT_SHMODULES is
+ * present in <flags>.
+ */
+static int stats_dump_fields_html(struct buffer *out,
+ const struct field *stats,
+ struct show_stat_ctx *ctx)
+{
+ struct buffer src;
+ struct stats_module *mod;
+ int flags = ctx->flags;
+ int i = 0, j = 0;
+
+ if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_FE) {
+ chunk_appendf(out,
+ /* name, queue */
+ "<tr class=\"frontend\">");
+
+ if (flags & STAT_ADMIN) {
+ /* Column sub-heading for Enable or Disable server */
+ chunk_appendf(out, "<td></td>");
+ }
+
+ chunk_appendf(out,
+ "<td class=ac>"
+ "<a name=\"%s/Frontend\"></a>"
+ "<a class=lfsb href=\"#%s/Frontend\">Frontend</a></td>"
+ "<td colspan=3></td>"
+ "",
+ field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_PXNAME));
+
+ chunk_appendf(out,
+ /* sessions rate : current */
+ "<td><u>%s<div class=tips><table class=det>"
+ "<tr><th>Current connection rate:</th><td>%s/s</td></tr>"
+ "<tr><th>Current session rate:</th><td>%s/s</td></tr>"
+ "",
+ U2H(stats[ST_F_RATE].u.u32),
+ U2H(stats[ST_F_CONN_RATE].u.u32),
+ U2H(stats[ST_F_RATE].u.u32));
+
+ if (strcmp(field_str(stats, ST_F_MODE), "http") == 0)
+ chunk_appendf(out,
+ "<tr><th>Current request rate:</th><td>%s/s</td></tr>",
+ U2H(stats[ST_F_REQ_RATE].u.u32));
+
+ chunk_appendf(out,
+ "</table></div></u></td>"
+ /* sessions rate : max */
+ "<td><u>%s<div class=tips><table class=det>"
+ "<tr><th>Max connection rate:</th><td>%s/s</td></tr>"
+ "<tr><th>Max session rate:</th><td>%s/s</td></tr>"
+ "",
+ U2H(stats[ST_F_RATE_MAX].u.u32),
+ U2H(stats[ST_F_CONN_RATE_MAX].u.u32),
+ U2H(stats[ST_F_RATE_MAX].u.u32));
+
+ if (strcmp(field_str(stats, ST_F_MODE), "http") == 0)
+ chunk_appendf(out,
+ "<tr><th>Max request rate:</th><td>%s/s</td></tr>",
+ U2H(stats[ST_F_REQ_RATE_MAX].u.u32));
+
+ chunk_appendf(out,
+ "</table></div></u></td>"
+ /* sessions rate : limit */
+ "<td>%s</td>",
+ LIM2A(stats[ST_F_RATE_LIM].u.u32, "-"));
+
+ chunk_appendf(out,
+ /* sessions: current, max, limit, total */
+ "<td>%s</td><td>%s</td><td>%s</td>"
+ "<td><u>%s<div class=tips><table class=det>"
+ "<tr><th>Cum. connections:</th><td>%s</td></tr>"
+ "<tr><th>Cum. sessions:</th><td>%s</td></tr>"
+ "",
+ U2H(stats[ST_F_SCUR].u.u32), U2H(stats[ST_F_SMAX].u.u32), U2H(stats[ST_F_SLIM].u.u32),
+ U2H(stats[ST_F_STOT].u.u64),
+ U2H(stats[ST_F_CONN_TOT].u.u64),
+ U2H(stats[ST_F_STOT].u.u64));
+
+ /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */
+ if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) {
+ chunk_appendf(out,
+ "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 1xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 2xx responses:</th><td>%s</td></tr>"
+ "<tr><th>&nbsp;&nbsp;Compressed 2xx:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>- HTTP 3xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 4xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 5xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- other responses:</th><td>%s</td></tr>"
+ "<tr><th>Intercepted requests:</th><td>%s</td></tr>"
+ "<tr><th>Cache lookups:</th><td>%s</td></tr>"
+ "<tr><th>Cache hits:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>"
+ "<tr><th>Internal errors:</th><td>%s</td></tr>"
+ "",
+ U2H(stats[ST_F_REQ_TOT].u.u64),
+ U2H(stats[ST_F_HRSP_1XX].u.u64),
+ U2H(stats[ST_F_HRSP_2XX].u.u64),
+ U2H(stats[ST_F_COMP_RSP].u.u64),
+ stats[ST_F_HRSP_2XX].u.u64 ?
+ (int)(100 * stats[ST_F_COMP_RSP].u.u64 / stats[ST_F_HRSP_2XX].u.u64) : 0,
+ U2H(stats[ST_F_HRSP_3XX].u.u64),
+ U2H(stats[ST_F_HRSP_4XX].u.u64),
+ U2H(stats[ST_F_HRSP_5XX].u.u64),
+ U2H(stats[ST_F_HRSP_OTHER].u.u64),
+ U2H(stats[ST_F_INTERCEPTED].u.u64),
+ U2H(stats[ST_F_CACHE_LOOKUPS].u.u64),
+ U2H(stats[ST_F_CACHE_HITS].u.u64),
+ stats[ST_F_CACHE_LOOKUPS].u.u64 ?
+ (int)(100 * stats[ST_F_CACHE_HITS].u.u64 / stats[ST_F_CACHE_LOOKUPS].u.u64) : 0,
+ U2H(stats[ST_F_WREW].u.u64),
+ U2H(stats[ST_F_EINT].u.u64));
+ }
+
+ chunk_appendf(out,
+ "</table></div></u></td>"
+ /* sessions: lbtot, lastsess */
+ "<td></td><td></td>"
+ /* bytes : in */
+ "<td>%s</td>"
+ "",
+ U2H(stats[ST_F_BIN].u.u64));
+
+ chunk_appendf(out,
+ /* bytes:out + compression stats (via hover): comp_in, comp_out, comp_byp */
+ "<td>%s%s<div class=tips><table class=det>"
+ "<tr><th>Response bytes in:</th><td>%s</td></tr>"
+ "<tr><th>Compression in:</th><td>%s</td></tr>"
+ "<tr><th>Compression out:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>Compression bypass:</th><td>%s</td></tr>"
+ "<tr><th>Total bytes saved:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "</table></div>%s</td>",
+ (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "<u>":"",
+ U2H(stats[ST_F_BOUT].u.u64),
+ U2H(stats[ST_F_BOUT].u.u64),
+ U2H(stats[ST_F_COMP_IN].u.u64),
+ U2H(stats[ST_F_COMP_OUT].u.u64),
+ stats[ST_F_COMP_IN].u.u64 ? (int)(stats[ST_F_COMP_OUT].u.u64 * 100 / stats[ST_F_COMP_IN].u.u64) : 0,
+ U2H(stats[ST_F_COMP_BYP].u.u64),
+ U2H(stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64),
+ stats[ST_F_BOUT].u.u64 ? (int)((stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64) * 100 / stats[ST_F_BOUT].u.u64) : 0,
+ (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "</u>":"");
+
+ chunk_appendf(out,
+ /* denied: req, resp */
+ "<td>%s</td><td>%s</td>"
+ /* errors : request, connect, response */
+ "<td>%s</td><td></td><td></td>"
+ /* warnings: retries, redispatches */
+ "<td></td><td></td>"
+ /* server status : reflect frontend status */
+ "<td class=ac>%s</td>"
+ /* rest of server: nothing */
+ "<td class=ac colspan=8></td>"
+ "",
+ U2H(stats[ST_F_DREQ].u.u64), U2H(stats[ST_F_DRESP].u.u64),
+ U2H(stats[ST_F_EREQ].u.u64),
+ field_str(stats, ST_F_STATUS));
+
+ if (flags & STAT_SHMODULES) {
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ chunk_appendf(out, "<td>");
+
+ if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_FE) {
+ chunk_appendf(out,
+ "<u>%s<div class=tips><table class=det>",
+ mod->name);
+ for (j = 0; j < mod->stats_count; ++j) {
+ chunk_appendf(out,
+ "<tr><th>%s</th><td>%s</td></tr>",
+ mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i]));
+ ++i;
+ }
+ chunk_appendf(out, "</table></div></u>");
+ } else {
+ i += mod->stats_count;
+ }
+
+ chunk_appendf(out, "</td>");
+ }
+ }
+
+ chunk_appendf(out, "</tr>");
+ }
+ else if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_SO) {
+ chunk_appendf(out, "<tr class=socket>");
+ if (flags & STAT_ADMIN) {
+ /* Column sub-heading for Enable or Disable server */
+ chunk_appendf(out, "<td></td>");
+ }
+
+ chunk_appendf(out,
+ /* frontend name, listener name */
+ "<td class=ac><a name=\"%s/+%s\"></a>%s"
+ "<a class=lfsb href=\"#%s/+%s\">%s</a>"
+ "",
+ field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME),
+ (flags & STAT_SHLGNDS)?"<u>":"",
+ field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME), field_str(stats, ST_F_SVNAME));
+
+ if (flags & STAT_SHLGNDS) {
+ chunk_appendf(out, "<div class=tips>");
+
+ if (isdigit((unsigned char)*field_str(stats, ST_F_ADDR)))
+ chunk_appendf(out, "IPv4: %s, ", field_str(stats, ST_F_ADDR));
+ else if (*field_str(stats, ST_F_ADDR) == '[')
+ chunk_appendf(out, "IPv6: %s, ", field_str(stats, ST_F_ADDR));
+ else if (*field_str(stats, ST_F_ADDR))
+ chunk_appendf(out, "%s, ", field_str(stats, ST_F_ADDR));
+
+ /* id */
+ chunk_appendf(out, "id: %d</div>", stats[ST_F_SID].u.u32);
+ }
+
+ chunk_appendf(out,
+ /* queue */
+ "%s</td><td colspan=3></td>"
+ /* sessions rate: current, max, limit */
+ "<td colspan=3>&nbsp;</td>"
+ /* sessions: current, max, limit, total, lbtot, lastsess */
+ "<td>%s</td><td>%s</td><td>%s</td>"
+ "<td>%s</td><td>&nbsp;</td><td>&nbsp;</td>"
+ /* bytes: in, out */
+ "<td>%s</td><td>%s</td>"
+ "",
+ (flags & STAT_SHLGNDS)?"</u>":"",
+ U2H(stats[ST_F_SCUR].u.u32), U2H(stats[ST_F_SMAX].u.u32), U2H(stats[ST_F_SLIM].u.u32),
+ U2H(stats[ST_F_STOT].u.u64), U2H(stats[ST_F_BIN].u.u64), U2H(stats[ST_F_BOUT].u.u64));
+
+ chunk_appendf(out,
+ /* denied: req, resp */
+ "<td>%s</td><td>%s</td>"
+ /* errors: request, connect, response */
+ "<td>%s</td><td></td><td></td>"
+ /* warnings: retries, redispatches */
+ "<td></td><td></td>"
+ /* server status: reflect listener status */
+ "<td class=ac>%s</td>"
+ /* rest of server: nothing */
+ "<td class=ac colspan=8></td>"
+ "",
+ U2H(stats[ST_F_DREQ].u.u64), U2H(stats[ST_F_DRESP].u.u64),
+ U2H(stats[ST_F_EREQ].u.u64),
+ field_str(stats, ST_F_STATUS));
+
+ if (flags & STAT_SHMODULES) {
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ chunk_appendf(out, "<td>");
+
+ if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_LI) {
+ chunk_appendf(out,
+ "<u>%s<div class=tips><table class=det>",
+ mod->name);
+ for (j = 0; j < mod->stats_count; ++j) {
+ chunk_appendf(out,
+ "<tr><th>%s</th><td>%s</td></tr>",
+ mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i]));
+ ++i;
+ }
+ chunk_appendf(out, "</table></div></u>");
+ } else {
+ i += mod->stats_count;
+ }
+
+ chunk_appendf(out, "</td>");
+ }
+ }
+
+ chunk_appendf(out, "</tr>");
+ }
+ else if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_SV) {
+ const char *style;
+
+ /* determine the style to use depending on the server's state,
+ * its health and weight. There isn't a 1-to-1 mapping between
+ * state and styles for the cases where the server is (still)
+ * up. The reason is that we don't want to report nolb and
+ * drain with the same color.
+ */
+
+ if (strcmp(field_str(stats, ST_F_STATUS), "DOWN") == 0 ||
+ strcmp(field_str(stats, ST_F_STATUS), "DOWN (agent)") == 0) {
+ style = "down";
+ }
+ else if (strncmp(field_str(stats, ST_F_STATUS), "DOWN ", strlen("DOWN ")) == 0) {
+ style = "going_up";
+ }
+ else if (strcmp(field_str(stats, ST_F_STATUS), "DRAIN") == 0) {
+ style = "draining";
+ }
+ else if (strncmp(field_str(stats, ST_F_STATUS), "NOLB ", strlen("NOLB ")) == 0) {
+ style = "going_down";
+ }
+ else if (strcmp(field_str(stats, ST_F_STATUS), "NOLB") == 0) {
+ style = "nolb";
+ }
+ else if (strcmp(field_str(stats, ST_F_STATUS), "no check") == 0) {
+ style = "no_check";
+ }
+ else if (!stats[ST_F_CHKFAIL].type ||
+ stats[ST_F_CHECK_HEALTH].u.u32 == stats[ST_F_CHECK_RISE].u.u32 + stats[ST_F_CHECK_FALL].u.u32 - 1) {
+ /* no check or max health = UP */
+ if (stats[ST_F_WEIGHT].u.u32)
+ style = "up";
+ else
+ style = "draining";
+ }
+ else {
+ style = "going_down";
+ }
+
+ if (strncmp(field_str(stats, ST_F_STATUS), "MAINT", 5) == 0)
+ chunk_appendf(out, "<tr class=\"maintain\">");
+ else
+ chunk_appendf(out,
+ "<tr class=\"%s_%s\">",
+ (stats[ST_F_BCK].u.u32) ? "backup" : "active", style);
+
+
+ if (flags & STAT_ADMIN)
+ chunk_appendf(out,
+ "<td><input class='%s-checkbox' type=\"checkbox\" name=\"s\" value=\"%s\"></td>",
+ field_str(stats, ST_F_PXNAME),
+ field_str(stats, ST_F_SVNAME));
+
+ chunk_appendf(out,
+ "<td class=ac><a name=\"%s/%s\"></a>%s"
+ "<a class=lfsb href=\"#%s/%s\">%s</a>"
+ "",
+ field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME),
+ (flags & STAT_SHLGNDS) ? "<u>" : "",
+ field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME), field_str(stats, ST_F_SVNAME));
+
+ if (flags & STAT_SHLGNDS) {
+ chunk_appendf(out, "<div class=tips>");
+
+ if (isdigit((unsigned char)*field_str(stats, ST_F_ADDR)))
+ chunk_appendf(out, "IPv4: %s, ", field_str(stats, ST_F_ADDR));
+ else if (*field_str(stats, ST_F_ADDR) == '[')
+ chunk_appendf(out, "IPv6: %s, ", field_str(stats, ST_F_ADDR));
+ else if (*field_str(stats, ST_F_ADDR))
+ chunk_appendf(out, "%s, ", field_str(stats, ST_F_ADDR));
+
+ /* id */
+ chunk_appendf(out, "id: %d", stats[ST_F_SID].u.u32);
+
+ /* cookie */
+ if (stats[ST_F_COOKIE].type) {
+ chunk_appendf(out, ", cookie: '");
+ chunk_initstr(&src, field_str(stats, ST_F_COOKIE));
+ chunk_htmlencode(out, &src);
+ chunk_appendf(out, "'");
+ }
+
+ chunk_appendf(out, "</div>");
+ }
+
+ chunk_appendf(out,
+ /* queue : current, max, limit */
+ "%s</td><td>%s</td><td>%s</td><td>%s</td>"
+ /* sessions rate : current, max, limit */
+ "<td>%s</td><td>%s</td><td></td>"
+ "",
+ (flags & STAT_SHLGNDS) ? "</u>" : "",
+ U2H(stats[ST_F_QCUR].u.u32), U2H(stats[ST_F_QMAX].u.u32), LIM2A(stats[ST_F_QLIMIT].u.u32, "-"),
+ U2H(stats[ST_F_RATE].u.u32), U2H(stats[ST_F_RATE_MAX].u.u32));
+
+ chunk_appendf(out,
+ /* sessions: current, max, limit, total */
+ "<td><u>%s<div class=tips>"
+ "<table class=det>"
+ "<tr><th>Current active connections:</th><td>%s</td></tr>"
+ "<tr><th>Current used connections:</th><td>%s</td></tr>"
+ "<tr><th>Current idle connections:</th><td>%s</td></tr>"
+ "<tr><th>- unsafe:</th><td>%s</td></tr>"
+ "<tr><th>- safe:</th><td>%s</td></tr>"
+ "<tr><th>Estimated need of connections:</th><td>%s</td></tr>"
+ "<tr><th>Active connections limit:</th><td>%s</td></tr>"
+ "<tr><th>Idle connections limit:</th><td>%s</td></tr>"
+ "</table></div></u>"
+ "</td><td>%s</td><td>%s</td>"
+ "<td><u>%s<div class=tips><table class=det>"
+ "<tr><th>Cum. sessions:</th><td>%s</td></tr>"
+ "",
+ U2H(stats[ST_F_SCUR].u.u32),
+ U2H(stats[ST_F_SCUR].u.u32),
+ U2H(stats[ST_F_USED_CONN_CUR].u.u32),
+ U2H(stats[ST_F_SRV_ICUR].u.u32),
+ U2H(stats[ST_F_IDLE_CONN_CUR].u.u32),
+ U2H(stats[ST_F_SAFE_CONN_CUR].u.u32),
+ U2H(stats[ST_F_NEED_CONN_EST].u.u32),
+
+ LIM2A(stats[ST_F_SLIM].u.u32, "-"),
+ stats[ST_F_SRV_ILIM].type ? U2H(stats[ST_F_SRV_ILIM].u.u32) : "-",
+ U2H(stats[ST_F_SMAX].u.u32), LIM2A(stats[ST_F_SLIM].u.u32, "-"),
+ U2H(stats[ST_F_STOT].u.u64),
+ U2H(stats[ST_F_STOT].u.u64));
+
+ /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */
+ if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) {
+ chunk_appendf(out,
+ "<tr><th>New connections:</th><td>%s</td></tr>"
+ "<tr><th>Reused connections:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 1xx responses:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>- HTTP 2xx responses:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>- HTTP 3xx responses:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>- HTTP 4xx responses:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>- HTTP 5xx responses:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>- other responses:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>"
+ "<tr><th>Internal error:</th><td>%s</td></tr>"
+ "",
+ U2H(stats[ST_F_CONNECT].u.u64),
+ U2H(stats[ST_F_REUSE].u.u64),
+ (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64) ?
+ (int)(100 * stats[ST_F_REUSE].u.u64 / (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64)) : 0,
+ U2H(stats[ST_F_REQ_TOT].u.u64),
+ U2H(stats[ST_F_HRSP_1XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ?
+ (int)(100 * stats[ST_F_HRSP_1XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0,
+ U2H(stats[ST_F_HRSP_2XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ?
+ (int)(100 * stats[ST_F_HRSP_2XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0,
+ U2H(stats[ST_F_HRSP_3XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ?
+ (int)(100 * stats[ST_F_HRSP_3XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0,
+ U2H(stats[ST_F_HRSP_4XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ?
+ (int)(100 * stats[ST_F_HRSP_4XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0,
+ U2H(stats[ST_F_HRSP_5XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ?
+ (int)(100 * stats[ST_F_HRSP_5XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0,
+ U2H(stats[ST_F_HRSP_OTHER].u.u64), stats[ST_F_REQ_TOT].u.u64 ?
+ (int)(100 * stats[ST_F_HRSP_OTHER].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0,
+ U2H(stats[ST_F_WREW].u.u64),
+ U2H(stats[ST_F_EINT].u.u64));
+ }
+
+ chunk_appendf(out, "<tr><th colspan=3>Max / Avg over last 1024 success. conn.</th></tr>");
+ chunk_appendf(out, "<tr><th>- Queue time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_QT_MAX].u.u32), U2H(stats[ST_F_QTIME].u.u32));
+ chunk_appendf(out, "<tr><th>- Connect time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_CT_MAX].u.u32), U2H(stats[ST_F_CTIME].u.u32));
+ if (strcmp(field_str(stats, ST_F_MODE), "http") == 0)
+ chunk_appendf(out, "<tr><th>- Responses time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_RT_MAX].u.u32), U2H(stats[ST_F_RTIME].u.u32));
+ chunk_appendf(out, "<tr><th>- Total time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_TT_MAX].u.u32), U2H(stats[ST_F_TTIME].u.u32));
+
+ chunk_appendf(out,
+ "</table></div></u></td>"
+ /* sessions: lbtot, last */
+ "<td>%s</td><td>%s</td>",
+ U2H(stats[ST_F_LBTOT].u.u64),
+ human_time(stats[ST_F_LASTSESS].u.s32, 1));
+
+ chunk_appendf(out,
+ /* bytes : in, out */
+ "<td>%s</td><td>%s</td>"
+ /* denied: req, resp */
+ "<td></td><td>%s</td>"
+ /* errors : request, connect */
+ "<td></td><td>%s</td>"
+ /* errors : response */
+ "<td><u>%s<div class=tips>Connection resets during transfers: %lld client, %lld server</div></u></td>"
+ /* warnings: retries, redispatches */
+ "<td>%lld</td><td>%lld</td>"
+ "",
+ U2H(stats[ST_F_BIN].u.u64), U2H(stats[ST_F_BOUT].u.u64),
+ U2H(stats[ST_F_DRESP].u.u64),
+ U2H(stats[ST_F_ECON].u.u64),
+ U2H(stats[ST_F_ERESP].u.u64),
+ (long long)stats[ST_F_CLI_ABRT].u.u64,
+ (long long)stats[ST_F_SRV_ABRT].u.u64,
+ (long long)stats[ST_F_WRETR].u.u64,
+ (long long)stats[ST_F_WREDIS].u.u64);
+
+ /* status, last change */
+ chunk_appendf(out, "<td class=ac>");
+
+ /* FIXME!!!!
+ * LASTCHG should contain the last change for *this* server and must be computed
+ * properly above, as was done below, ie: this server if maint, otherwise ref server
+ * if tracking. Note that ref is either local or remote depending on tracking.
+ */
+
+
+ if (strncmp(field_str(stats, ST_F_STATUS), "MAINT", 5) == 0) {
+ chunk_appendf(out, "%s MAINT", human_time(stats[ST_F_LASTCHG].u.u32, 1));
+ }
+ else if (strcmp(field_str(stats, ST_F_STATUS), "no check") == 0) {
+ chunk_strcat(out, "<i>no check</i>");
+ }
+ else {
+ chunk_appendf(out, "%s %s", human_time(stats[ST_F_LASTCHG].u.u32, 1), field_str(stats, ST_F_STATUS));
+ if (strncmp(field_str(stats, ST_F_STATUS), "DOWN", 4) == 0) {
+ if (stats[ST_F_CHECK_HEALTH].u.u32)
+ chunk_strcat(out, " &uarr;");
+ }
+ else if (stats[ST_F_CHECK_HEALTH].u.u32 < stats[ST_F_CHECK_RISE].u.u32 + stats[ST_F_CHECK_FALL].u.u32 - 1)
+ chunk_strcat(out, " &darr;");
+ }
+
+ if (strncmp(field_str(stats, ST_F_STATUS), "DOWN", 4) == 0 &&
+ stats[ST_F_AGENT_STATUS].type && !stats[ST_F_AGENT_HEALTH].u.u32) {
+ chunk_appendf(out,
+ "</td><td class=ac><u> %s",
+ field_str(stats, ST_F_AGENT_STATUS));
+
+ if (stats[ST_F_AGENT_CODE].type)
+ chunk_appendf(out, "/%d", stats[ST_F_AGENT_CODE].u.u32);
+
+ if (stats[ST_F_AGENT_DURATION].type)
+ chunk_appendf(out, " in %lums", (long)stats[ST_F_AGENT_DURATION].u.u64);
+
+ chunk_appendf(out, "<div class=tips>%s", field_str(stats, ST_F_AGENT_DESC));
+
+ if (*field_str(stats, ST_F_LAST_AGT)) {
+ chunk_appendf(out, ": ");
+ chunk_initstr(&src, field_str(stats, ST_F_LAST_AGT));
+ chunk_htmlencode(out, &src);
+ }
+ chunk_appendf(out, "</div></u>");
+ }
+ else if (stats[ST_F_CHECK_STATUS].type) {
+ chunk_appendf(out,
+ "</td><td class=ac><u> %s",
+ field_str(stats, ST_F_CHECK_STATUS));
+
+ if (stats[ST_F_CHECK_CODE].type)
+ chunk_appendf(out, "/%d", stats[ST_F_CHECK_CODE].u.u32);
+
+ if (stats[ST_F_CHECK_DURATION].type)
+ chunk_appendf(out, " in %lums", (long)stats[ST_F_CHECK_DURATION].u.u64);
+
+ chunk_appendf(out, "<div class=tips>%s", field_str(stats, ST_F_CHECK_DESC));
+
+ if (*field_str(stats, ST_F_LAST_CHK)) {
+ chunk_appendf(out, ": ");
+ chunk_initstr(&src, field_str(stats, ST_F_LAST_CHK));
+ chunk_htmlencode(out, &src);
+ }
+ chunk_appendf(out, "</div></u>");
+ }
+ else
+ chunk_appendf(out, "</td><td>");
+
+ chunk_appendf(out,
+ /* weight / uweight */
+ "</td><td class=ac>%d/%d</td>"
+ /* act, bck */
+ "<td class=ac>%s</td><td class=ac>%s</td>"
+ "",
+ stats[ST_F_WEIGHT].u.u32, stats[ST_F_UWEIGHT].u.u32,
+ stats[ST_F_BCK].u.u32 ? "-" : "Y",
+ stats[ST_F_BCK].u.u32 ? "Y" : "-");
+
+ /* check failures: unique, fatal, down time */
+ if (strcmp(field_str(stats, ST_F_STATUS), "MAINT (resolution)") == 0) {
+ chunk_appendf(out, "<td class=ac colspan=3>resolution</td>");
+ }
+ else if (stats[ST_F_CHKFAIL].type) {
+ chunk_appendf(out, "<td><u>%lld", (long long)stats[ST_F_CHKFAIL].u.u64);
+
+ if (stats[ST_F_HANAFAIL].type)
+ chunk_appendf(out, "/%lld", (long long)stats[ST_F_HANAFAIL].u.u64);
+
+ chunk_appendf(out,
+ "<div class=tips>Failed Health Checks%s</div></u></td>"
+ "<td>%lld</td><td>%s</td>"
+ "",
+ stats[ST_F_HANAFAIL].type ? "/Health Analyses" : "",
+ (long long)stats[ST_F_CHKDOWN].u.u64, human_time(stats[ST_F_DOWNTIME].u.u32, 1));
+ }
+ else if (strcmp(field_str(stats, ST_F_STATUS), "MAINT") != 0 && field_format(stats, ST_F_TRACKED) == FF_STR) {
+ /* tracking a server (hence inherited maint would appear as "MAINT (via...)" */
+ chunk_appendf(out,
+ "<td class=ac colspan=3><a class=lfsb href=\"#%s\">via %s</a></td>",
+ field_str(stats, ST_F_TRACKED), field_str(stats, ST_F_TRACKED));
+ }
+ else
+ chunk_appendf(out, "<td colspan=3></td>");
+
+ /* throttle */
+ if (stats[ST_F_THROTTLE].type)
+ chunk_appendf(out, "<td class=ac>%d %%</td>\n", stats[ST_F_THROTTLE].u.u32);
+ else
+ chunk_appendf(out, "<td class=ac>-</td>");
+
+ if (flags & STAT_SHMODULES) {
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ chunk_appendf(out, "<td>");
+
+ if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_SRV) {
+ chunk_appendf(out,
+ "<u>%s<div class=tips><table class=det>",
+ mod->name);
+ for (j = 0; j < mod->stats_count; ++j) {
+ chunk_appendf(out,
+ "<tr><th>%s</th><td>%s</td></tr>",
+ mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i]));
+ ++i;
+ }
+ chunk_appendf(out, "</table></div></u>");
+ } else {
+ i += mod->stats_count;
+ }
+
+ chunk_appendf(out, "</td>");
+ }
+ }
+
+ chunk_appendf(out, "</tr>\n");
+ }
+ else if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_BE) {
+ chunk_appendf(out, "<tr class=\"backend\">");
+ if (flags & STAT_ADMIN) {
+ /* Column sub-heading for Enable or Disable server */
+ chunk_appendf(out, "<td></td>");
+ }
+ chunk_appendf(out,
+ "<td class=ac>"
+ /* name */
+ "%s<a name=\"%s/Backend\"></a>"
+ "<a class=lfsb href=\"#%s/Backend\">Backend</a>"
+ "",
+ (flags & STAT_SHLGNDS)?"<u>":"",
+ field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_PXNAME));
+
+ if (flags & STAT_SHLGNDS) {
+ /* balancing */
+ chunk_appendf(out, "<div class=tips>balancing: %s",
+ field_str(stats, ST_F_ALGO));
+
+ /* cookie */
+ if (stats[ST_F_COOKIE].type) {
+ chunk_appendf(out, ", cookie: '");
+ chunk_initstr(&src, field_str(stats, ST_F_COOKIE));
+ chunk_htmlencode(out, &src);
+ chunk_appendf(out, "'");
+ }
+ chunk_appendf(out, "</div>");
+ }
+
+ chunk_appendf(out,
+ "%s</td>"
+ /* queue : current, max */
+ "<td>%s</td><td>%s</td><td></td>"
+ /* sessions rate : current, max, limit */
+ "<td>%s</td><td>%s</td><td></td>"
+ "",
+ (flags & STAT_SHLGNDS)?"</u>":"",
+ U2H(stats[ST_F_QCUR].u.u32), U2H(stats[ST_F_QMAX].u.u32),
+ U2H(stats[ST_F_RATE].u.u32), U2H(stats[ST_F_RATE_MAX].u.u32));
+
+ chunk_appendf(out,
+ /* sessions: current, max, limit, total */
+ "<td>%s</td><td>%s</td><td>%s</td>"
+ "<td><u>%s<div class=tips><table class=det>"
+ "<tr><th>Cum. sessions:</th><td>%s</td></tr>"
+ "",
+ U2H(stats[ST_F_SCUR].u.u32), U2H(stats[ST_F_SMAX].u.u32), U2H(stats[ST_F_SLIM].u.u32),
+ U2H(stats[ST_F_STOT].u.u64),
+ U2H(stats[ST_F_STOT].u.u64));
+
+ /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */
+ if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) {
+ chunk_appendf(out,
+ "<tr><th>New connections:</th><td>%s</td></tr>"
+ "<tr><th>Reused connections:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 1xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 2xx responses:</th><td>%s</td></tr>"
+ "<tr><th>&nbsp;&nbsp;Compressed 2xx:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>- HTTP 3xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 4xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 5xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- other responses:</th><td>%s</td></tr>"
+ "<tr><th>Cache lookups:</th><td>%s</td></tr>"
+ "<tr><th>Cache hits:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>"
+ "<tr><th>Internal errors:</th><td>%s</td></tr>"
+ "",
+ U2H(stats[ST_F_CONNECT].u.u64),
+ U2H(stats[ST_F_REUSE].u.u64),
+ (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64) ?
+ (int)(100 * stats[ST_F_REUSE].u.u64 / (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64)) : 0,
+ U2H(stats[ST_F_REQ_TOT].u.u64),
+ U2H(stats[ST_F_HRSP_1XX].u.u64),
+ U2H(stats[ST_F_HRSP_2XX].u.u64),
+ U2H(stats[ST_F_COMP_RSP].u.u64),
+ stats[ST_F_HRSP_2XX].u.u64 ?
+ (int)(100 * stats[ST_F_COMP_RSP].u.u64 / stats[ST_F_HRSP_2XX].u.u64) : 0,
+ U2H(stats[ST_F_HRSP_3XX].u.u64),
+ U2H(stats[ST_F_HRSP_4XX].u.u64),
+ U2H(stats[ST_F_HRSP_5XX].u.u64),
+ U2H(stats[ST_F_HRSP_OTHER].u.u64),
+ U2H(stats[ST_F_CACHE_LOOKUPS].u.u64),
+ U2H(stats[ST_F_CACHE_HITS].u.u64),
+ stats[ST_F_CACHE_LOOKUPS].u.u64 ?
+ (int)(100 * stats[ST_F_CACHE_HITS].u.u64 / stats[ST_F_CACHE_LOOKUPS].u.u64) : 0,
+ U2H(stats[ST_F_WREW].u.u64),
+ U2H(stats[ST_F_EINT].u.u64));
+ }
+
+ chunk_appendf(out, "<tr><th colspan=3>Max / Avg over last 1024 success. conn.</th></tr>");
+ chunk_appendf(out, "<tr><th>- Queue time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_QT_MAX].u.u32), U2H(stats[ST_F_QTIME].u.u32));
+ chunk_appendf(out, "<tr><th>- Connect time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_CT_MAX].u.u32), U2H(stats[ST_F_CTIME].u.u32));
+ if (strcmp(field_str(stats, ST_F_MODE), "http") == 0)
+ chunk_appendf(out, "<tr><th>- Responses time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_RT_MAX].u.u32), U2H(stats[ST_F_RTIME].u.u32));
+ chunk_appendf(out, "<tr><th>- Total time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_TT_MAX].u.u32), U2H(stats[ST_F_TTIME].u.u32));
+
+ chunk_appendf(out,
+ "</table></div></u></td>"
+ /* sessions: lbtot, last */
+ "<td>%s</td><td>%s</td>"
+ /* bytes: in */
+ "<td>%s</td>"
+ "",
+ U2H(stats[ST_F_LBTOT].u.u64),
+ human_time(stats[ST_F_LASTSESS].u.s32, 1),
+ U2H(stats[ST_F_BIN].u.u64));
+
+ chunk_appendf(out,
+ /* bytes:out + compression stats (via hover): comp_in, comp_out, comp_byp */
+ "<td>%s%s<div class=tips><table class=det>"
+ "<tr><th>Response bytes in:</th><td>%s</td></tr>"
+ "<tr><th>Compression in:</th><td>%s</td></tr>"
+ "<tr><th>Compression out:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>Compression bypass:</th><td>%s</td></tr>"
+ "<tr><th>Total bytes saved:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "</table></div>%s</td>",
+ (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "<u>":"",
+ U2H(stats[ST_F_BOUT].u.u64),
+ U2H(stats[ST_F_BOUT].u.u64),
+ U2H(stats[ST_F_COMP_IN].u.u64),
+ U2H(stats[ST_F_COMP_OUT].u.u64),
+ stats[ST_F_COMP_IN].u.u64 ? (int)(stats[ST_F_COMP_OUT].u.u64 * 100 / stats[ST_F_COMP_IN].u.u64) : 0,
+ U2H(stats[ST_F_COMP_BYP].u.u64),
+ U2H(stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64),
+ stats[ST_F_BOUT].u.u64 ? (int)((stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64) * 100 / stats[ST_F_BOUT].u.u64) : 0,
+ (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "</u>":"");
+
+ chunk_appendf(out,
+ /* denied: req, resp */
+ "<td>%s</td><td>%s</td>"
+ /* errors : request, connect */
+ "<td></td><td>%s</td>"
+ /* errors : response */
+ "<td><u>%s<div class=tips>Connection resets during transfers: %lld client, %lld server</div></u></td>"
+ /* warnings: retries, redispatches */
+ "<td>%lld</td><td>%lld</td>"
+ /* backend status: reflect backend status (up/down): we display UP
+ * if the backend has known working servers or if it has no server at
+ * all (eg: for stats). Then we display the total weight, number of
+ * active and backups. */
+ "<td class=ac>%s %s</td><td class=ac>&nbsp;</td><td class=ac>%d/%d</td>"
+ "<td class=ac>%d</td><td class=ac>%d</td>"
+ "",
+ U2H(stats[ST_F_DREQ].u.u64), U2H(stats[ST_F_DRESP].u.u64),
+ U2H(stats[ST_F_ECON].u.u64),
+ U2H(stats[ST_F_ERESP].u.u64),
+ (long long)stats[ST_F_CLI_ABRT].u.u64,
+ (long long)stats[ST_F_SRV_ABRT].u.u64,
+ (long long)stats[ST_F_WRETR].u.u64, (long long)stats[ST_F_WREDIS].u.u64,
+ human_time(stats[ST_F_LASTCHG].u.u32, 1),
+ strcmp(field_str(stats, ST_F_STATUS), "DOWN") ? field_str(stats, ST_F_STATUS) : "<font color=\"red\"><b>DOWN</b></font>",
+ stats[ST_F_WEIGHT].u.u32, stats[ST_F_UWEIGHT].u.u32,
+ stats[ST_F_ACT].u.u32, stats[ST_F_BCK].u.u32);
+
+ chunk_appendf(out,
+ /* rest of backend: nothing, down transitions, total downtime, throttle */
+ "<td class=ac>&nbsp;</td><td>%d</td>"
+ "<td>%s</td>"
+ "<td></td>",
+ stats[ST_F_CHKDOWN].u.u32,
+ stats[ST_F_DOWNTIME].type ? human_time(stats[ST_F_DOWNTIME].u.u32, 1) : "&nbsp;");
+
+ if (flags & STAT_SHMODULES) {
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ chunk_appendf(out, "<td>");
+
+ if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_BE) {
+ chunk_appendf(out,
+ "<u>%s<div class=tips><table class=det>",
+ mod->name);
+ for (j = 0; j < mod->stats_count; ++j) {
+ chunk_appendf(out,
+ "<tr><th>%s</th><td>%s</td></tr>",
+ mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i]));
+ ++i;
+ }
+ chunk_appendf(out, "</table></div></u>");
+ } else {
+ i += mod->stats_count;
+ }
+
+ chunk_appendf(out, "</td>");
+ }
+ }
+
+ chunk_appendf(out, "</tr>");
+ }
+
+ return 1;
+}
+
+int stats_dump_one_line(const struct field *stats, size_t stats_count,
+ struct appctx *appctx)
+{
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ int ret;
+
+ if (ctx->flags & STAT_FMT_HTML)
+ ret = stats_dump_fields_html(&trash_chunk, stats, ctx);
+ else if (ctx->flags & STAT_FMT_TYPED)
+ ret = stats_dump_fields_typed(&trash_chunk, stats, stats_count, ctx);
+ else if (ctx->flags & STAT_FMT_JSON)
+ ret = stats_dump_fields_json(&trash_chunk, stats, stats_count, ctx);
+ else
+ ret = stats_dump_fields_csv(&trash_chunk, stats, stats_count, ctx);
+
+ return ret;
+}
+
+/* Fill <stats> with the frontend statistics. <stats> is preallocated array of
+ * length <len>. If <selected_field> is != NULL, only fill this one. The length
+ * of the array must be at least ST_F_TOTAL_FIELDS. If this length is less than
+ * this value, or if the selected field is not implemented for frontends, the
+ * function returns 0, otherwise, it returns 1.
+ */
+int stats_fill_fe_stats(struct proxy *px, struct field *stats, int len,
+ enum stat_field *selected_field)
+{
+ enum stat_field current_field = (selected_field != NULL ? *selected_field : 0);
+
+ if (len < ST_F_TOTAL_FIELDS)
+ return 0;
+
+ for (; current_field < ST_F_TOTAL_FIELDS; current_field++) {
+ struct field metric = { 0 };
+
+ switch (current_field) {
+ case ST_F_PXNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id);
+ break;
+ case ST_F_SVNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, "FRONTEND");
+ break;
+ case ST_F_MODE:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode));
+ break;
+ case ST_F_SCUR:
+ metric = mkf_u32(0, px->feconn);
+ break;
+ case ST_F_SMAX:
+ metric = mkf_u32(FN_MAX, px->fe_counters.conn_max);
+ break;
+ case ST_F_SLIM:
+ metric = mkf_u32(FO_CONFIG|FN_LIMIT, px->maxconn);
+ break;
+ case ST_F_STOT:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_sess);
+ break;
+ case ST_F_BIN:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.bytes_in);
+ break;
+ case ST_F_BOUT:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.bytes_out);
+ break;
+ case ST_F_DREQ:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_req);
+ break;
+ case ST_F_DRESP:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_resp);
+ break;
+ case ST_F_EREQ:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.failed_req);
+ break;
+ case ST_F_DCON:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_conn);
+ break;
+ case ST_F_DSES:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_sess);
+ break;
+ case ST_F_STATUS: {
+ const char *state;
+
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ state = "STOP";
+ else if (px->flags & PR_FL_PAUSED)
+ state = "PAUSED";
+ else
+ state = "OPEN";
+ metric = mkf_str(FO_STATUS, state);
+ break;
+ }
+ case ST_F_PID:
+ metric = mkf_u32(FO_KEY, 1);
+ break;
+ case ST_F_IID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid);
+ break;
+ case ST_F_SID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, 0);
+ break;
+ case ST_F_TYPE:
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_FE);
+ break;
+ case ST_F_RATE:
+ metric = mkf_u32(FN_RATE, read_freq_ctr(&px->fe_sess_per_sec));
+ break;
+ case ST_F_RATE_LIM:
+ metric = mkf_u32(FO_CONFIG|FN_LIMIT, px->fe_sps_lim);
+ break;
+ case ST_F_RATE_MAX:
+ metric = mkf_u32(FN_MAX, px->fe_counters.sps_max);
+ break;
+ case ST_F_WREW:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.failed_rewrites);
+ break;
+ case ST_F_EINT:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.internal_errors);
+ break;
+ case ST_F_HRSP_1XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[1]);
+ break;
+ case ST_F_HRSP_2XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[2]);
+ break;
+ case ST_F_HRSP_3XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[3]);
+ break;
+ case ST_F_HRSP_4XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[4]);
+ break;
+ case ST_F_HRSP_5XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[5]);
+ break;
+ case ST_F_HRSP_OTHER:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[0]);
+ break;
+ case ST_F_INTERCEPTED:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.intercepted_req);
+ break;
+ case ST_F_CACHE_LOOKUPS:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cache_lookups);
+ break;
+ case ST_F_CACHE_HITS:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cache_hits);
+ break;
+ case ST_F_REQ_RATE:
+ metric = mkf_u32(FN_RATE, read_freq_ctr(&px->fe_req_per_sec));
+ break;
+ case ST_F_REQ_RATE_MAX:
+ metric = mkf_u32(FN_MAX, px->fe_counters.p.http.rps_max);
+ break;
+ case ST_F_REQ_TOT:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cum_req);
+ break;
+ case ST_F_COMP_IN:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.comp_in);
+ break;
+ case ST_F_COMP_OUT:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.comp_out);
+ break;
+ case ST_F_COMP_BYP:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.comp_byp);
+ break;
+ case ST_F_COMP_RSP:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.comp_rsp);
+ break;
+ case ST_F_CONN_RATE:
+ metric = mkf_u32(FN_RATE, read_freq_ctr(&px->fe_conn_per_sec));
+ break;
+ case ST_F_CONN_RATE_MAX:
+ metric = mkf_u32(FN_MAX, px->fe_counters.cps_max);
+ break;
+ case ST_F_CONN_TOT:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_conn);
+ break;
+ default:
+ /* not used for frontends. If a specific metric
+ * is requested, return an error. Otherwise continue.
+ */
+ if (selected_field != NULL)
+ return 0;
+ continue;
+ }
+ stats[current_field] = metric;
+ if (selected_field != NULL)
+ break;
+ }
+ return 1;
+}
+
+/* Dumps a frontend's line to the local trash buffer for the current proxy <px>
+ * and uses the state from stream connector <sc>. The caller is responsible for
+ * clearing the local trash buffer if needed. Returns non-zero if it emits
+ * anything, zero otherwise.
+ */
+static int stats_dump_fe_stats(struct stconn *sc, struct proxy *px)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct field *stats = stat_l[STATS_DOMAIN_PROXY];
+ struct stats_module *mod;
+ size_t stats_count = ST_F_TOTAL_FIELDS;
+
+ if (!(px->cap & PR_CAP_FE))
+ return 0;
+
+ if ((ctx->flags & STAT_BOUND) && !(ctx->type & (1 << STATS_TYPE_FE)))
+ return 0;
+
+ memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]);
+
+ if (!stats_fill_fe_stats(px, stats, ST_F_TOTAL_FIELDS, NULL))
+ return 0;
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ void *counters;
+
+ if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_FE)) {
+ stats_count += mod->stats_count;
+ continue;
+ }
+
+ counters = EXTRA_COUNTERS_GET(px->extra_counters_fe, mod);
+ mod->fill_stats(counters, stats + stats_count);
+ stats_count += mod->stats_count;
+ }
+
+ return stats_dump_one_line(stats, stats_count, appctx);
+}
+
+/* Fill <stats> with the listener statistics. <stats> is preallocated array of
+ * length <len>. The length of the array must be at least ST_F_TOTAL_FIELDS. If
+ * this length is less then this value, the function returns 0, otherwise, it
+ * returns 1. If selected_field is != NULL, only fill this one. <flags> can
+ * take the value STAT_SHLGNDS.
+ */
+int stats_fill_li_stats(struct proxy *px, struct listener *l, int flags,
+ struct field *stats, int len, enum stat_field *selected_field)
+{
+ enum stat_field current_field = (selected_field != NULL ? *selected_field : 0);
+ struct buffer *out = get_trash_chunk();
+
+ if (len < ST_F_TOTAL_FIELDS)
+ return 0;
+
+ if (!l->counters)
+ return 0;
+
+ chunk_reset(out);
+
+ for (; current_field < ST_F_TOTAL_FIELDS; current_field++) {
+ struct field metric = { 0 };
+
+ switch (current_field) {
+ case ST_F_PXNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id);
+ break;
+ case ST_F_SVNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, l->name);
+ break;
+ case ST_F_MODE:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode));
+ break;
+ case ST_F_SCUR:
+ metric = mkf_u32(0, l->nbconn);
+ break;
+ case ST_F_SMAX:
+ metric = mkf_u32(FN_MAX, l->counters->conn_max);
+ break;
+ case ST_F_SLIM:
+ metric = mkf_u32(FO_CONFIG|FN_LIMIT, l->maxconn);
+ break;
+ case ST_F_STOT:
+ metric = mkf_u64(FN_COUNTER, l->counters->cum_conn);
+ break;
+ case ST_F_BIN:
+ metric = mkf_u64(FN_COUNTER, l->counters->bytes_in);
+ break;
+ case ST_F_BOUT:
+ metric = mkf_u64(FN_COUNTER, l->counters->bytes_out);
+ break;
+ case ST_F_DREQ:
+ metric = mkf_u64(FN_COUNTER, l->counters->denied_req);
+ break;
+ case ST_F_DRESP:
+ metric = mkf_u64(FN_COUNTER, l->counters->denied_resp);
+ break;
+ case ST_F_EREQ:
+ metric = mkf_u64(FN_COUNTER, l->counters->failed_req);
+ break;
+ case ST_F_DCON:
+ metric = mkf_u64(FN_COUNTER, l->counters->denied_conn);
+ break;
+ case ST_F_DSES:
+ metric = mkf_u64(FN_COUNTER, l->counters->denied_sess);
+ break;
+ case ST_F_STATUS:
+ metric = mkf_str(FO_STATUS, li_status_st[get_li_status(l)]);
+ break;
+ case ST_F_PID:
+ metric = mkf_u32(FO_KEY, 1);
+ break;
+ case ST_F_IID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid);
+ break;
+ case ST_F_SID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, l->luid);
+ break;
+ case ST_F_TYPE:
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_SO);
+ break;
+ case ST_F_WREW:
+ metric = mkf_u64(FN_COUNTER, l->counters->failed_rewrites);
+ break;
+ case ST_F_EINT:
+ metric = mkf_u64(FN_COUNTER, l->counters->internal_errors);
+ break;
+ case ST_F_ADDR:
+ if (flags & STAT_SHLGNDS) {
+ char str[INET6_ADDRSTRLEN];
+ int port;
+
+ port = get_host_port(&l->rx.addr);
+ switch (addr_to_str(&l->rx.addr, str, sizeof(str))) {
+ case AF_INET:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out));
+ chunk_appendf(out, "%s:%d", str, port);
+ break;
+ case AF_INET6:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out));
+ chunk_appendf(out, "[%s]:%d", str, port);
+ break;
+ case AF_UNIX:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, "unix");
+ break;
+ case -1:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out));
+ chunk_strcat(out, strerror(errno));
+ break;
+ default: /* address family not supported */
+ break;
+ }
+ }
+ break;
+ default:
+ /* not used for listen. If a specific metric
+ * is requested, return an error. Otherwise continue.
+ */
+ if (selected_field != NULL)
+ return 0;
+ continue;
+ }
+ stats[current_field] = metric;
+ if (selected_field != NULL)
+ break;
+ }
+ return 1;
+}
+
+/* Dumps a line for listener <l> and proxy <px> to the local trash buffer and
+ * uses the state from stream connector <sc>. The caller is responsible for
+ * clearing the local trash buffer if needed. Returns non-zero if it emits
+ * anything, zero otherwise.
+ */
+static int stats_dump_li_stats(struct stconn *sc, struct proxy *px, struct listener *l)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct field *stats = stat_l[STATS_DOMAIN_PROXY];
+ struct stats_module *mod;
+ size_t stats_count = ST_F_TOTAL_FIELDS;
+
+ memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]);
+
+ if (!stats_fill_li_stats(px, l, ctx->flags, stats,
+ ST_F_TOTAL_FIELDS, NULL))
+ return 0;
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ void *counters;
+
+ if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_LI)) {
+ stats_count += mod->stats_count;
+ continue;
+ }
+
+ counters = EXTRA_COUNTERS_GET(l->extra_counters, mod);
+ mod->fill_stats(counters, stats + stats_count);
+ stats_count += mod->stats_count;
+ }
+
+ return stats_dump_one_line(stats, stats_count, appctx);
+}
+
+enum srv_stats_state {
+ SRV_STATS_STATE_DOWN = 0,
+ SRV_STATS_STATE_DOWN_AGENT,
+ SRV_STATS_STATE_GOING_UP,
+ SRV_STATS_STATE_UP_GOING_DOWN,
+ SRV_STATS_STATE_UP,
+ SRV_STATS_STATE_NOLB_GOING_DOWN,
+ SRV_STATS_STATE_NOLB,
+ SRV_STATS_STATE_DRAIN_GOING_DOWN,
+ SRV_STATS_STATE_DRAIN,
+ SRV_STATS_STATE_DRAIN_AGENT,
+ SRV_STATS_STATE_NO_CHECK,
+
+ SRV_STATS_STATE_COUNT, /* Must be last */
+};
+
+static const char *srv_hlt_st[SRV_STATS_STATE_COUNT] = {
+ [SRV_STATS_STATE_DOWN] = "DOWN",
+ [SRV_STATS_STATE_DOWN_AGENT] = "DOWN (agent)",
+ [SRV_STATS_STATE_GOING_UP] = "DOWN %d/%d",
+ [SRV_STATS_STATE_UP_GOING_DOWN] = "UP %d/%d",
+ [SRV_STATS_STATE_UP] = "UP",
+ [SRV_STATS_STATE_NOLB_GOING_DOWN] = "NOLB %d/%d",
+ [SRV_STATS_STATE_NOLB] = "NOLB",
+ [SRV_STATS_STATE_DRAIN_GOING_DOWN] = "DRAIN %d/%d",
+ [SRV_STATS_STATE_DRAIN] = "DRAIN",
+ [SRV_STATS_STATE_DRAIN_AGENT] = "DRAIN (agent)",
+ [SRV_STATS_STATE_NO_CHECK] = "no check"
+};
+
+/* Compute server state helper
+ */
+static void stats_fill_sv_stats_computestate(struct server *sv, struct server *ref,
+ enum srv_stats_state *state)
+{
+ if (sv->cur_state == SRV_ST_RUNNING || sv->cur_state == SRV_ST_STARTING) {
+ if ((ref->check.state & CHK_ST_ENABLED) &&
+ (ref->check.health < ref->check.rise + ref->check.fall - 1)) {
+ *state = SRV_STATS_STATE_UP_GOING_DOWN;
+ } else {
+ *state = SRV_STATS_STATE_UP;
+ }
+
+ if (sv->cur_admin & SRV_ADMF_DRAIN) {
+ if (ref->agent.state & CHK_ST_ENABLED)
+ *state = SRV_STATS_STATE_DRAIN_AGENT;
+ else if (*state == SRV_STATS_STATE_UP_GOING_DOWN)
+ *state = SRV_STATS_STATE_DRAIN_GOING_DOWN;
+ else
+ *state = SRV_STATS_STATE_DRAIN;
+ }
+
+ if (*state == SRV_STATS_STATE_UP && !(ref->check.state & CHK_ST_ENABLED)) {
+ *state = SRV_STATS_STATE_NO_CHECK;
+ }
+ }
+ else if (sv->cur_state == SRV_ST_STOPPING) {
+ if ((!(sv->check.state & CHK_ST_ENABLED) && !sv->track) ||
+ (ref->check.health == ref->check.rise + ref->check.fall - 1)) {
+ *state = SRV_STATS_STATE_NOLB;
+ } else {
+ *state = SRV_STATS_STATE_NOLB_GOING_DOWN;
+ }
+ }
+ else { /* stopped */
+ if ((ref->agent.state & CHK_ST_ENABLED) && !ref->agent.health) {
+ *state = SRV_STATS_STATE_DOWN_AGENT;
+ } else if ((ref->check.state & CHK_ST_ENABLED) && !ref->check.health) {
+ *state = SRV_STATS_STATE_DOWN; /* DOWN */
+ } else if ((ref->agent.state & CHK_ST_ENABLED) || (ref->check.state & CHK_ST_ENABLED)) {
+ *state = SRV_STATS_STATE_GOING_UP;
+ } else {
+ *state = SRV_STATS_STATE_DOWN; /* DOWN, unchecked */
+ }
+ }
+}
+
+/* Fill <stats> with the backend statistics. <stats> is preallocated array of
+ * length <len>. If <selected_field> is != NULL, only fill this one. The length
+ * of the array must be at least ST_F_TOTAL_FIELDS. If this length is less than
+ * this value, or if the selected field is not implemented for servers, the
+ * function returns 0, otherwise, it returns 1. <flags> can take the value
+ * STAT_SHLGNDS.
+ */
+int stats_fill_sv_stats(struct proxy *px, struct server *sv, int flags,
+ struct field *stats, int len,
+ enum stat_field *selected_field)
+{
+ enum stat_field current_field = (selected_field != NULL ? *selected_field : 0);
+ struct server *via = sv->track ? sv->track : sv;
+ struct server *ref = via;
+ enum srv_stats_state state = 0;
+ char str[INET6_ADDRSTRLEN];
+ struct buffer *out = get_trash_chunk();
+ char *fld_status;
+ long long srv_samples_counter;
+ unsigned int srv_samples_window = TIME_STATS_SAMPLES;
+
+ if (len < ST_F_TOTAL_FIELDS)
+ return 0;
+
+ chunk_reset(out);
+
+ /* compute state for later use */
+ if (selected_field == NULL || *selected_field == ST_F_STATUS ||
+ *selected_field == ST_F_CHECK_RISE || *selected_field == ST_F_CHECK_FALL ||
+ *selected_field == ST_F_CHECK_HEALTH || *selected_field == ST_F_HANAFAIL) {
+ /* we have "via" which is the tracked server as described in the configuration,
+ * and "ref" which is the checked server and the end of the chain.
+ */
+ while (ref->track)
+ ref = ref->track;
+ stats_fill_sv_stats_computestate(sv, ref, &state);
+ }
+
+ /* compue time values for later use */
+ if (selected_field == NULL || *selected_field == ST_F_QTIME ||
+ *selected_field == ST_F_CTIME || *selected_field == ST_F_RTIME ||
+ *selected_field == ST_F_TTIME) {
+ srv_samples_counter = (px->mode == PR_MODE_HTTP) ? sv->counters.p.http.cum_req : sv->counters.cum_lbconn;
+ if (srv_samples_counter < TIME_STATS_SAMPLES && srv_samples_counter > 0)
+ srv_samples_window = srv_samples_counter;
+ }
+
+ for (; current_field < ST_F_TOTAL_FIELDS; current_field++) {
+ struct field metric = { 0 };
+
+ switch (current_field) {
+ case ST_F_PXNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id);
+ break;
+ case ST_F_SVNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, sv->id);
+ break;
+ case ST_F_MODE:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode));
+ break;
+ case ST_F_QCUR:
+ metric = mkf_u32(0, sv->queue.length);
+ break;
+ case ST_F_QMAX:
+ metric = mkf_u32(FN_MAX, sv->counters.nbpend_max);
+ break;
+ case ST_F_SCUR:
+ metric = mkf_u32(0, sv->cur_sess);
+ break;
+ case ST_F_SMAX:
+ metric = mkf_u32(FN_MAX, sv->counters.cur_sess_max);
+ break;
+ case ST_F_SLIM:
+ if (sv->maxconn)
+ metric = mkf_u32(FO_CONFIG|FN_LIMIT, sv->maxconn);
+ break;
+ case ST_F_SRV_ICUR:
+ metric = mkf_u32(0, sv->curr_idle_conns);
+ break;
+ case ST_F_SRV_ILIM:
+ if (sv->max_idle_conns != -1)
+ metric = mkf_u32(FO_CONFIG|FN_LIMIT, sv->max_idle_conns);
+ break;
+ case ST_F_STOT:
+ metric = mkf_u64(FN_COUNTER, sv->counters.cum_sess);
+ break;
+ case ST_F_BIN:
+ metric = mkf_u64(FN_COUNTER, sv->counters.bytes_in);
+ break;
+ case ST_F_BOUT:
+ metric = mkf_u64(FN_COUNTER, sv->counters.bytes_out);
+ break;
+ case ST_F_DRESP:
+ metric = mkf_u64(FN_COUNTER, sv->counters.denied_resp);
+ break;
+ case ST_F_ECON:
+ metric = mkf_u64(FN_COUNTER, sv->counters.failed_conns);
+ break;
+ case ST_F_ERESP:
+ metric = mkf_u64(FN_COUNTER, sv->counters.failed_resp);
+ break;
+ case ST_F_WRETR:
+ metric = mkf_u64(FN_COUNTER, sv->counters.retries);
+ break;
+ case ST_F_WREDIS:
+ metric = mkf_u64(FN_COUNTER, sv->counters.redispatches);
+ break;
+ case ST_F_WREW:
+ metric = mkf_u64(FN_COUNTER, sv->counters.failed_rewrites);
+ break;
+ case ST_F_EINT:
+ metric = mkf_u64(FN_COUNTER, sv->counters.internal_errors);
+ break;
+ case ST_F_CONNECT:
+ metric = mkf_u64(FN_COUNTER, sv->counters.connect);
+ break;
+ case ST_F_REUSE:
+ metric = mkf_u64(FN_COUNTER, sv->counters.reuse);
+ break;
+ case ST_F_IDLE_CONN_CUR:
+ metric = mkf_u32(0, sv->curr_idle_nb);
+ break;
+ case ST_F_SAFE_CONN_CUR:
+ metric = mkf_u32(0, sv->curr_safe_nb);
+ break;
+ case ST_F_USED_CONN_CUR:
+ metric = mkf_u32(0, sv->curr_used_conns);
+ break;
+ case ST_F_NEED_CONN_EST:
+ metric = mkf_u32(0, sv->est_need_conns);
+ break;
+ case ST_F_STATUS:
+ fld_status = chunk_newstr(out);
+ if (sv->cur_admin & SRV_ADMF_RMAINT)
+ chunk_appendf(out, "MAINT (resolution)");
+ else if (sv->cur_admin & SRV_ADMF_IMAINT)
+ chunk_appendf(out, "MAINT (via %s/%s)", via->proxy->id, via->id);
+ else if (sv->cur_admin & SRV_ADMF_MAINT)
+ chunk_appendf(out, "MAINT");
+ else
+ chunk_appendf(out,
+ srv_hlt_st[state],
+ (ref->cur_state != SRV_ST_STOPPED) ? (ref->check.health - ref->check.rise + 1) : (ref->check.health),
+ (ref->cur_state != SRV_ST_STOPPED) ? (ref->check.fall) : (ref->check.rise));
+
+ metric = mkf_str(FO_STATUS, fld_status);
+ break;
+ case ST_F_LASTCHG:
+ metric = mkf_u32(FN_AGE, now.tv_sec - sv->last_change);
+ break;
+ case ST_F_WEIGHT:
+ metric = mkf_u32(FN_AVG, (sv->cur_eweight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv);
+ break;
+ case ST_F_UWEIGHT:
+ metric = mkf_u32(FN_AVG, sv->uweight);
+ break;
+ case ST_F_ACT:
+ metric = mkf_u32(FO_STATUS, (sv->flags & SRV_F_BACKUP) ? 0 : 1);
+ break;
+ case ST_F_BCK:
+ metric = mkf_u32(FO_STATUS, (sv->flags & SRV_F_BACKUP) ? 1 : 0);
+ break;
+ case ST_F_CHKFAIL:
+ if (sv->check.state & CHK_ST_ENABLED)
+ metric = mkf_u64(FN_COUNTER, sv->counters.failed_checks);
+ break;
+ case ST_F_CHKDOWN:
+ if (sv->check.state & CHK_ST_ENABLED)
+ metric = mkf_u64(FN_COUNTER, sv->counters.down_trans);
+ break;
+ case ST_F_DOWNTIME:
+ if (sv->check.state & CHK_ST_ENABLED)
+ metric = mkf_u32(FN_COUNTER, srv_downtime(sv));
+ break;
+ case ST_F_QLIMIT:
+ if (sv->maxqueue)
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->maxqueue);
+ break;
+ case ST_F_PID:
+ metric = mkf_u32(FO_KEY, 1);
+ break;
+ case ST_F_IID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid);
+ break;
+ case ST_F_SID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, sv->puid);
+ break;
+ case ST_F_THROTTLE:
+ if (sv->cur_state == SRV_ST_STARTING && !server_is_draining(sv))
+ metric = mkf_u32(FN_AVG, server_throttle_rate(sv));
+ break;
+ case ST_F_LBTOT:
+ metric = mkf_u64(FN_COUNTER, sv->counters.cum_lbconn);
+ break;
+ case ST_F_TRACKED:
+ if (sv->track) {
+ char *fld_track = chunk_newstr(out);
+ chunk_appendf(out, "%s/%s", sv->track->proxy->id, sv->track->id);
+ metric = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, fld_track);
+ }
+ break;
+ case ST_F_TYPE:
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_SV);
+ break;
+ case ST_F_RATE:
+ metric = mkf_u32(FN_RATE, read_freq_ctr(&sv->sess_per_sec));
+ break;
+ case ST_F_RATE_MAX:
+ metric = mkf_u32(FN_MAX, sv->counters.sps_max);
+ break;
+ case ST_F_CHECK_STATUS:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) {
+ const char *fld_chksts;
+
+ fld_chksts = chunk_newstr(out);
+ chunk_strcat(out, "* "); // for check in progress
+ chunk_strcat(out, get_check_status_info(sv->check.status));
+ if (!(sv->check.state & CHK_ST_INPROGRESS))
+ fld_chksts += 2; // skip "* "
+ metric = mkf_str(FN_OUTPUT, fld_chksts);
+ }
+ break;
+ case ST_F_CHECK_CODE:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED &&
+ sv->check.status >= HCHK_STATUS_L57DATA)
+ metric = mkf_u32(FN_OUTPUT, sv->check.code);
+ break;
+ case ST_F_CHECK_DURATION:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED &&
+ sv->check.status >= HCHK_STATUS_CHECKED)
+ metric = mkf_u64(FN_DURATION, sv->check.duration);
+ break;
+ case ST_F_CHECK_DESC:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_str(FN_OUTPUT, get_check_status_description(sv->check.status));
+ break;
+ case ST_F_LAST_CHK:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_str(FN_OUTPUT, sv->check.desc);
+ break;
+ case ST_F_CHECK_RISE:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.rise);
+ break;
+ case ST_F_CHECK_FALL:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.fall);
+ break;
+ case ST_F_CHECK_HEALTH:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.health);
+ break;
+ case ST_F_AGENT_STATUS:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) {
+ const char *fld_chksts;
+
+ fld_chksts = chunk_newstr(out);
+ chunk_strcat(out, "* "); // for check in progress
+ chunk_strcat(out, get_check_status_info(sv->agent.status));
+ if (!(sv->agent.state & CHK_ST_INPROGRESS))
+ fld_chksts += 2; // skip "* "
+ metric = mkf_str(FN_OUTPUT, fld_chksts);
+ }
+ break;
+ case ST_F_AGENT_CODE:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED &&
+ (sv->agent.status >= HCHK_STATUS_L57DATA))
+ metric = mkf_u32(FN_OUTPUT, sv->agent.code);
+ break;
+ case ST_F_AGENT_DURATION:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_u64(FN_DURATION, sv->agent.duration);
+ break;
+ case ST_F_AGENT_DESC:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_str(FN_OUTPUT, get_check_status_description(sv->agent.status));
+ break;
+ case ST_F_LAST_AGT:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_str(FN_OUTPUT, sv->agent.desc);
+ break;
+ case ST_F_AGENT_RISE:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.rise);
+ break;
+ case ST_F_AGENT_FALL:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.fall);
+ break;
+ case ST_F_AGENT_HEALTH:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.health);
+ break;
+ case ST_F_REQ_TOT:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, sv->counters.p.http.cum_req);
+ break;
+ case ST_F_HRSP_1XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[1]);
+ break;
+ case ST_F_HRSP_2XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[2]);
+ break;
+ case ST_F_HRSP_3XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[3]);
+ break;
+ case ST_F_HRSP_4XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[4]);
+ break;
+ case ST_F_HRSP_5XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[5]);
+ break;
+ case ST_F_HRSP_OTHER:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[0]);
+ break;
+ case ST_F_HANAFAIL:
+ if (ref->observe)
+ metric = mkf_u64(FN_COUNTER, sv->counters.failed_hana);
+ break;
+ case ST_F_CLI_ABRT:
+ metric = mkf_u64(FN_COUNTER, sv->counters.cli_aborts);
+ break;
+ case ST_F_SRV_ABRT:
+ metric = mkf_u64(FN_COUNTER, sv->counters.srv_aborts);
+ break;
+ case ST_F_LASTSESS:
+ metric = mkf_s32(FN_AGE, srv_lastsession(sv));
+ break;
+ case ST_F_QTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.q_time, srv_samples_window));
+ break;
+ case ST_F_CTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.c_time, srv_samples_window));
+ break;
+ case ST_F_RTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.d_time, srv_samples_window));
+ break;
+ case ST_F_TTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.t_time, srv_samples_window));
+ break;
+ case ST_F_QT_MAX:
+ metric = mkf_u32(FN_MAX, sv->counters.qtime_max);
+ break;
+ case ST_F_CT_MAX:
+ metric = mkf_u32(FN_MAX, sv->counters.ctime_max);
+ break;
+ case ST_F_RT_MAX:
+ metric = mkf_u32(FN_MAX, sv->counters.dtime_max);
+ break;
+ case ST_F_TT_MAX:
+ metric = mkf_u32(FN_MAX, sv->counters.ttime_max);
+ break;
+ case ST_F_ADDR:
+ if (flags & STAT_SHLGNDS) {
+ switch (addr_to_str(&sv->addr, str, sizeof(str))) {
+ case AF_INET:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out));
+ chunk_appendf(out, "%s:%d", str, sv->svc_port);
+ break;
+ case AF_INET6:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out));
+ chunk_appendf(out, "[%s]:%d", str, sv->svc_port);
+ break;
+ case AF_UNIX:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, "unix");
+ break;
+ case -1:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out));
+ chunk_strcat(out, strerror(errno));
+ break;
+ default: /* address family not supported */
+ break;
+ }
+ }
+ break;
+ case ST_F_COOKIE:
+ if (flags & STAT_SHLGNDS && sv->cookie)
+ metric = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, sv->cookie);
+ break;
+ default:
+ /* not used for servers. If a specific metric
+ * is requested, return an error. Otherwise continue.
+ */
+ if (selected_field != NULL)
+ return 0;
+ continue;
+ }
+ stats[current_field] = metric;
+ if (selected_field != NULL)
+ break;
+ }
+ return 1;
+}
+
+/* Dumps a line for server <sv> and proxy <px> to the local trash vbuffer and
+ * uses the state from stream connector <sc>, and server state <state>. The
+ * caller is responsible for clearing the local trash buffer if needed. Returns
+ * non-zero if it emits anything, zero otherwise.
+ */
+static int stats_dump_sv_stats(struct stconn *sc, struct proxy *px, struct server *sv)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct stats_module *mod;
+ struct field *stats = stat_l[STATS_DOMAIN_PROXY];
+ size_t stats_count = ST_F_TOTAL_FIELDS;
+
+ memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]);
+
+ if (!stats_fill_sv_stats(px, sv, ctx->flags, stats,
+ ST_F_TOTAL_FIELDS, NULL))
+ return 0;
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ void *counters;
+
+ if (stats_get_domain(mod->domain_flags) != STATS_DOMAIN_PROXY)
+ continue;
+
+ if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_SRV)) {
+ stats_count += mod->stats_count;
+ continue;
+ }
+
+ counters = EXTRA_COUNTERS_GET(sv->extra_counters, mod);
+ mod->fill_stats(counters, stats + stats_count);
+ stats_count += mod->stats_count;
+ }
+
+ return stats_dump_one_line(stats, stats_count, appctx);
+}
+
+/* Helper to compute srv values for a given backend
+ */
+static void stats_fill_be_stats_computesrv(struct proxy *px, int *nbup, int *nbsrv, int *totuw)
+{
+ int nbup_tmp, nbsrv_tmp, totuw_tmp;
+ const struct server *srv;
+
+ nbup_tmp = nbsrv_tmp = totuw_tmp = 0;
+ for (srv = px->srv; srv; srv = srv->next) {
+ if (srv->cur_state != SRV_ST_STOPPED) {
+ nbup_tmp++;
+ if (srv_currently_usable(srv) &&
+ (!px->srv_act ^ !(srv->flags & SRV_F_BACKUP)))
+ totuw_tmp += srv->uweight;
+ }
+ nbsrv_tmp++;
+ }
+
+ HA_RWLOCK_RDLOCK(LBPRM_LOCK, &px->lbprm.lock);
+ if (!px->srv_act && px->lbprm.fbck)
+ totuw_tmp = px->lbprm.fbck->uweight;
+ HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &px->lbprm.lock);
+
+ /* use tmp variable then assign result to make gcc happy */
+ *nbup = nbup_tmp;
+ *nbsrv = nbsrv_tmp;
+ *totuw = totuw_tmp;
+}
+
+/* Fill <stats> with the backend statistics. <stats> is preallocated array of
+ * length <len>. If <selected_field> is != NULL, only fill this one. The length
+ * of the array must be at least ST_F_TOTAL_FIELDS. If this length is less than
+ * this value, or if the selected field is not implemented for backends, the
+ * function returns 0, otherwise, it returns 1. <flags> can take the value
+ * STAT_SHLGNDS.
+ */
+int stats_fill_be_stats(struct proxy *px, int flags, struct field *stats, int len,
+ enum stat_field *selected_field)
+{
+ enum stat_field current_field = (selected_field != NULL ? *selected_field : 0);
+ long long be_samples_counter;
+ unsigned int be_samples_window = TIME_STATS_SAMPLES;
+ struct buffer *out = get_trash_chunk();
+ int nbup, nbsrv, totuw;
+ char *fld;
+
+ if (len < ST_F_TOTAL_FIELDS)
+ return 0;
+
+ nbup = nbsrv = totuw = 0;
+ /* some srv values compute for later if we either select all fields or
+ * need them for one of the mentioned ones */
+ if (selected_field == NULL || *selected_field == ST_F_STATUS ||
+ *selected_field == ST_F_UWEIGHT)
+ stats_fill_be_stats_computesrv(px, &nbup, &nbsrv, &totuw);
+
+ /* same here but specific to time fields */
+ if (selected_field == NULL || *selected_field == ST_F_QTIME ||
+ *selected_field == ST_F_CTIME || *selected_field == ST_F_RTIME ||
+ *selected_field == ST_F_TTIME) {
+ be_samples_counter = (px->mode == PR_MODE_HTTP) ? px->be_counters.p.http.cum_req : px->be_counters.cum_lbconn;
+ if (be_samples_counter < TIME_STATS_SAMPLES && be_samples_counter > 0)
+ be_samples_window = be_samples_counter;
+ }
+
+ for (; current_field < ST_F_TOTAL_FIELDS; current_field++) {
+ struct field metric = { 0 };
+
+ switch (current_field) {
+ case ST_F_PXNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id);
+ break;
+ case ST_F_SVNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, "BACKEND");
+ break;
+ case ST_F_MODE:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode));
+ break;
+ case ST_F_QCUR:
+ metric = mkf_u32(0, px->queue.length);
+ break;
+ case ST_F_QMAX:
+ metric = mkf_u32(FN_MAX, px->be_counters.nbpend_max);
+ break;
+ case ST_F_SCUR:
+ metric = mkf_u32(0, px->beconn);
+ break;
+ case ST_F_SMAX:
+ metric = mkf_u32(FN_MAX, px->be_counters.conn_max);
+ break;
+ case ST_F_SLIM:
+ metric = mkf_u32(FO_CONFIG|FN_LIMIT, px->fullconn);
+ break;
+ case ST_F_STOT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.cum_conn);
+ break;
+ case ST_F_BIN:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.bytes_in);
+ break;
+ case ST_F_BOUT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.bytes_out);
+ break;
+ case ST_F_DREQ:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.denied_req);
+ break;
+ case ST_F_DRESP:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.denied_resp);
+ break;
+ case ST_F_ECON:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.failed_conns);
+ break;
+ case ST_F_ERESP:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.failed_resp);
+ break;
+ case ST_F_WRETR:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.retries);
+ break;
+ case ST_F_WREDIS:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.redispatches);
+ break;
+ case ST_F_WREW:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.failed_rewrites);
+ break;
+ case ST_F_EINT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.internal_errors);
+ break;
+ case ST_F_CONNECT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.connect);
+ break;
+ case ST_F_REUSE:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.reuse);
+ break;
+ case ST_F_STATUS:
+ fld = chunk_newstr(out);
+ chunk_appendf(out, "%s", (px->lbprm.tot_weight > 0 || !px->srv) ? "UP" : "DOWN");
+ if (flags & (STAT_HIDE_MAINT|STAT_HIDE_DOWN))
+ chunk_appendf(out, " (%d/%d)", nbup, nbsrv);
+ metric = mkf_str(FO_STATUS, fld);
+ break;
+ case ST_F_AGG_SRV_CHECK_STATUS: // DEPRECATED
+ case ST_F_AGG_SRV_STATUS:
+ metric = mkf_u32(FN_GAUGE, 0);
+ break;
+ case ST_F_AGG_CHECK_STATUS:
+ metric = mkf_u32(FN_GAUGE, 0);
+ break;
+ case ST_F_WEIGHT:
+ metric = mkf_u32(FN_AVG, (px->lbprm.tot_weight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv);
+ break;
+ case ST_F_UWEIGHT:
+ metric = mkf_u32(FN_AVG, totuw);
+ break;
+ case ST_F_ACT:
+ metric = mkf_u32(0, px->srv_act);
+ break;
+ case ST_F_BCK:
+ metric = mkf_u32(0, px->srv_bck);
+ break;
+ case ST_F_CHKDOWN:
+ metric = mkf_u64(FN_COUNTER, px->down_trans);
+ break;
+ case ST_F_LASTCHG:
+ metric = mkf_u32(FN_AGE, now.tv_sec - px->last_change);
+ break;
+ case ST_F_DOWNTIME:
+ if (px->srv)
+ metric = mkf_u32(FN_COUNTER, be_downtime(px));
+ break;
+ case ST_F_PID:
+ metric = mkf_u32(FO_KEY, 1);
+ break;
+ case ST_F_IID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid);
+ break;
+ case ST_F_SID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, 0);
+ break;
+ case ST_F_LBTOT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.cum_lbconn);
+ break;
+ case ST_F_TYPE:
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_BE);
+ break;
+ case ST_F_RATE:
+ metric = mkf_u32(0, read_freq_ctr(&px->be_sess_per_sec));
+ break;
+ case ST_F_RATE_MAX:
+ metric = mkf_u32(0, px->be_counters.sps_max);
+ break;
+ case ST_F_COOKIE:
+ if (flags & STAT_SHLGNDS && px->cookie_name)
+ metric = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, px->cookie_name);
+ break;
+ case ST_F_ALGO:
+ if (flags & STAT_SHLGNDS)
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, backend_lb_algo_str(px->lbprm.algo & BE_LB_ALGO));
+ break;
+ case ST_F_REQ_TOT:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.cum_req);
+ break;
+ case ST_F_HRSP_1XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[1]);
+ break;
+ case ST_F_HRSP_2XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[2]);
+ break;
+ case ST_F_HRSP_3XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[3]);
+ break;
+ case ST_F_HRSP_4XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[4]);
+ break;
+ case ST_F_HRSP_5XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[5]);
+ break;
+ case ST_F_HRSP_OTHER:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[0]);
+ break;
+ case ST_F_CACHE_LOOKUPS:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.cache_lookups);
+ break;
+ case ST_F_CACHE_HITS:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.cache_hits);
+ break;
+ case ST_F_CLI_ABRT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.cli_aborts);
+ break;
+ case ST_F_SRV_ABRT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.srv_aborts);
+ break;
+ case ST_F_COMP_IN:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.comp_in);
+ break;
+ case ST_F_COMP_OUT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.comp_out);
+ break;
+ case ST_F_COMP_BYP:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.comp_byp);
+ break;
+ case ST_F_COMP_RSP:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.comp_rsp);
+ break;
+ case ST_F_LASTSESS:
+ metric = mkf_s32(FN_AGE, be_lastsession(px));
+ break;
+ case ST_F_QTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.q_time, be_samples_window));
+ break;
+ case ST_F_CTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.c_time, be_samples_window));
+ break;
+ case ST_F_RTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.d_time, be_samples_window));
+ break;
+ case ST_F_TTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.t_time, be_samples_window));
+ break;
+ case ST_F_QT_MAX:
+ metric = mkf_u32(FN_MAX, px->be_counters.qtime_max);
+ break;
+ case ST_F_CT_MAX:
+ metric = mkf_u32(FN_MAX, px->be_counters.ctime_max);
+ break;
+ case ST_F_RT_MAX:
+ metric = mkf_u32(FN_MAX, px->be_counters.dtime_max);
+ break;
+ case ST_F_TT_MAX:
+ metric = mkf_u32(FN_MAX, px->be_counters.ttime_max);
+ break;
+ default:
+ /* not used for backends. If a specific metric
+ * is requested, return an error. Otherwise continue.
+ */
+ if (selected_field != NULL)
+ return 0;
+ continue;
+ }
+ stats[current_field] = metric;
+ if (selected_field != NULL)
+ break;
+ }
+ return 1;
+}
+
+/* Dumps a line for backend <px> to the local trash buffer for and uses the
+ * state from stream interface <si>. The caller is responsible for clearing the
+ * local trash buffer if needed. Returns non-zero if it emits anything, zero
+ * otherwise.
+ */
+static int stats_dump_be_stats(struct stconn *sc, struct proxy *px)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct field *stats = stat_l[STATS_DOMAIN_PROXY];
+ struct stats_module *mod;
+ size_t stats_count = ST_F_TOTAL_FIELDS;
+
+ if (!(px->cap & PR_CAP_BE))
+ return 0;
+
+ if ((ctx->flags & STAT_BOUND) && !(ctx->type & (1 << STATS_TYPE_BE)))
+ return 0;
+
+ memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]);
+
+ if (!stats_fill_be_stats(px, ctx->flags, stats, ST_F_TOTAL_FIELDS, NULL))
+ return 0;
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ struct extra_counters *counters;
+
+ if (stats_get_domain(mod->domain_flags) != STATS_DOMAIN_PROXY)
+ continue;
+
+ if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_BE)) {
+ stats_count += mod->stats_count;
+ continue;
+ }
+
+ counters = EXTRA_COUNTERS_GET(px->extra_counters_be, mod);
+ mod->fill_stats(counters, stats + stats_count);
+ stats_count += mod->stats_count;
+ }
+
+ return stats_dump_one_line(stats, stats_count, appctx);
+}
+
+/* Dumps the HTML table header for proxy <px> to the local trash buffer for and
+ * uses the state from stream connector <sc> and per-uri parameters <uri>. The
+ * caller is responsible for clearing the local trash buffer if needed.
+ */
+static void stats_dump_html_px_hdr(struct stconn *sc, struct proxy *px)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN];
+ struct stats_module *mod;
+ int stats_module_len = 0;
+
+ if (px->cap & PR_CAP_BE && px->srv && (ctx->flags & STAT_ADMIN)) {
+ /* A form to enable/disable this proxy servers */
+
+ /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */
+ scope_txt[0] = 0;
+ if (ctx->scope_len) {
+ const char *scope_ptr = stats_scope_ptr(appctx, sc);
+
+ strcpy(scope_txt, STAT_SCOPE_PATTERN);
+ memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len);
+ scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0;
+ }
+
+ chunk_appendf(&trash_chunk,
+ "<form method=\"post\">");
+ }
+
+ /* print a new table */
+ chunk_appendf(&trash_chunk,
+ "<table class=\"tbl\" width=\"100%%\">\n"
+ "<tr class=\"titre\">"
+ "<th class=\"pxname\" width=\"10%%\">");
+
+ chunk_appendf(&trash_chunk,
+ "<a name=\"%s\"></a>%s"
+ "<a class=px href=\"#%s\">%s</a>",
+ px->id,
+ (ctx->flags & STAT_SHLGNDS) ? "<u>":"",
+ px->id, px->id);
+
+ if (ctx->flags & STAT_SHLGNDS) {
+ /* cap, mode, id */
+ chunk_appendf(&trash_chunk, "<div class=tips>cap: %s, mode: %s, id: %d",
+ proxy_cap_str(px->cap), proxy_mode_str(px->mode),
+ px->uuid);
+ chunk_appendf(&trash_chunk, "</div>");
+ }
+
+ chunk_appendf(&trash_chunk,
+ "%s</th>"
+ "<th class=\"%s\" width=\"90%%\">%s</th>"
+ "</tr>\n"
+ "</table>\n"
+ "<table class=\"tbl\" width=\"100%%\">\n"
+ "<tr class=\"titre\">",
+ (ctx->flags & STAT_SHLGNDS) ? "</u>":"",
+ px->desc ? "desc" : "empty", px->desc ? px->desc : "");
+
+ if (ctx->flags & STAT_ADMIN) {
+ /* Column heading for Enable or Disable server */
+ if ((px->cap & PR_CAP_BE) && px->srv)
+ chunk_appendf(&trash_chunk,
+ "<th rowspan=2 width=1><input type=\"checkbox\" "
+ "onclick=\"for(c in document.getElementsByClassName('%s-checkbox')) "
+ "document.getElementsByClassName('%s-checkbox').item(c).checked = this.checked\"></th>",
+ px->id,
+ px->id);
+ else
+ chunk_appendf(&trash_chunk, "<th rowspan=2></th>");
+ }
+
+ chunk_appendf(&trash_chunk,
+ "<th rowspan=2></th>"
+ "<th colspan=3>Queue</th>"
+ "<th colspan=3>Session rate</th><th colspan=6>Sessions</th>"
+ "<th colspan=2>Bytes</th><th colspan=2>Denied</th>"
+ "<th colspan=3>Errors</th><th colspan=2>Warnings</th>"
+ "<th colspan=9>Server</th>");
+
+ if (ctx->flags & STAT_SHMODULES) {
+ // calculate the count of module for colspan attribute
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ ++stats_module_len;
+ }
+ chunk_appendf(&trash_chunk, "<th colspan=%d>Extra modules</th>",
+ stats_module_len);
+ }
+
+ chunk_appendf(&trash_chunk,
+ "</tr>\n"
+ "<tr class=\"titre\">"
+ "<th>Cur</th><th>Max</th><th>Limit</th>"
+ "<th>Cur</th><th>Max</th><th>Limit</th><th>Cur</th><th>Max</th>"
+ "<th>Limit</th><th>Total</th><th>LbTot</th><th>Last</th><th>In</th><th>Out</th>"
+ "<th>Req</th><th>Resp</th><th>Req</th><th>Conn</th>"
+ "<th>Resp</th><th>Retr</th><th>Redis</th>"
+ "<th>Status</th><th>LastChk</th><th>Wght</th><th>Act</th>"
+ "<th>Bck</th><th>Chk</th><th>Dwn</th><th>Dwntme</th>"
+ "<th>Thrtle</th>\n");
+
+ if (ctx->flags & STAT_SHMODULES) {
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ chunk_appendf(&trash_chunk, "<th>%s</th>", mod->name);
+ }
+ }
+
+ chunk_appendf(&trash_chunk, "</tr>");
+}
+
+/* Dumps the HTML table trailer for proxy <px> to the local trash buffer for and
+ * uses the state from stream connector <sc>. The caller is responsible for
+ * clearing the local trash buffer if needed.
+ */
+static void stats_dump_html_px_end(struct stconn *sc, struct proxy *px)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+
+ chunk_appendf(&trash_chunk, "</table>");
+
+ if ((px->cap & PR_CAP_BE) && px->srv && (ctx->flags & STAT_ADMIN)) {
+ /* close the form used to enable/disable this proxy servers */
+ chunk_appendf(&trash_chunk,
+ "Choose the action to perform on the checked servers : "
+ "<select name=action>"
+ "<option value=\"\"></option>"
+ "<option value=\"ready\">Set state to READY</option>"
+ "<option value=\"drain\">Set state to DRAIN</option>"
+ "<option value=\"maint\">Set state to MAINT</option>"
+ "<option value=\"dhlth\">Health: disable checks</option>"
+ "<option value=\"ehlth\">Health: enable checks</option>"
+ "<option value=\"hrunn\">Health: force UP</option>"
+ "<option value=\"hnolb\">Health: force NOLB</option>"
+ "<option value=\"hdown\">Health: force DOWN</option>"
+ "<option value=\"dagent\">Agent: disable checks</option>"
+ "<option value=\"eagent\">Agent: enable checks</option>"
+ "<option value=\"arunn\">Agent: force UP</option>"
+ "<option value=\"adown\">Agent: force DOWN</option>"
+ "<option value=\"shutdown\">Kill Sessions</option>"
+ "</select>"
+ "<input type=\"hidden\" name=\"b\" value=\"#%d\">"
+ "&nbsp;<input type=\"submit\" value=\"Apply\">"
+ "</form>",
+ px->uuid);
+ }
+
+ chunk_appendf(&trash_chunk, "<p>\n");
+}
+
+/*
+ * Dumps statistics for a proxy. The output is sent to the stream connector's
+ * input buffer. Returns 0 if it had to stop dumping data because of lack of
+ * buffer space, or non-zero if everything completed. This function is used
+ * both by the CLI and the HTTP entry points, and is able to dump the output
+ * in HTML or CSV formats. If the later, <uri> must be NULL.
+ */
+int stats_dump_proxy_to_buffer(struct stconn *sc, struct htx *htx,
+ struct proxy *px, struct uri_auth *uri)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct stream *s = __sc_strm(sc);
+ struct channel *rep = sc_ic(sc);
+ struct server *sv, *svs; /* server and server-state, server-state=server or server->track */
+ struct listener *l;
+ int current_field;
+
+ chunk_reset(&trash_chunk);
+more:
+ current_field = ctx->field;
+
+ switch (ctx->px_st) {
+ case STAT_PX_ST_INIT:
+ /* we are on a new proxy */
+ if (uri && uri->scope) {
+ /* we have a limited scope, we have to check the proxy name */
+ struct stat_scope *scope;
+ int len;
+
+ len = strlen(px->id);
+ scope = uri->scope;
+
+ while (scope) {
+ /* match exact proxy name */
+ if (scope->px_len == len && !memcmp(px->id, scope->px_id, len))
+ break;
+
+ /* match '.' which means 'self' proxy */
+ if (strcmp(scope->px_id, ".") == 0 && px == s->be)
+ break;
+ scope = scope->next;
+ }
+
+ /* proxy name not found : don't dump anything */
+ if (scope == NULL)
+ return 1;
+ }
+
+ /* if the user has requested a limited output and the proxy
+ * name does not match, skip it.
+ */
+ if (ctx->scope_len) {
+ const char *scope_ptr = stats_scope_ptr(appctx, sc);
+
+ if (strnistr(px->id, strlen(px->id), scope_ptr, ctx->scope_len) == NULL)
+ return 1;
+ }
+
+ if ((ctx->flags & STAT_BOUND) &&
+ (ctx->iid != -1) &&
+ (px->uuid != ctx->iid))
+ return 1;
+
+ ctx->px_st = STAT_PX_ST_TH;
+ /* fall through */
+
+ case STAT_PX_ST_TH:
+ if (ctx->flags & STAT_FMT_HTML) {
+ stats_dump_html_px_hdr(sc, px);
+ if (!stats_putchk(rep, htx))
+ goto full;
+ }
+
+ ctx->px_st = STAT_PX_ST_FE;
+ /* fall through */
+
+ case STAT_PX_ST_FE:
+ /* print the frontend */
+ if (stats_dump_fe_stats(sc, px)) {
+ if (!stats_putchk(rep, htx))
+ goto full;
+ ctx->flags |= STAT_STARTED;
+ if (ctx->field)
+ goto more;
+ }
+
+ current_field = 0;
+ ctx->obj2 = px->conf.listeners.n;
+ ctx->px_st = STAT_PX_ST_LI;
+ /* fall through */
+
+ case STAT_PX_ST_LI:
+ /* obj2 points to listeners list as initialized above */
+ for (; ctx->obj2 != &px->conf.listeners; ctx->obj2 = l->by_fe.n) {
+ if (htx) {
+ if (htx_almost_full(htx))
+ goto full;
+ }
+ else {
+ if (buffer_almost_full(&rep->buf))
+ goto full;
+ }
+
+ l = LIST_ELEM(ctx->obj2, struct listener *, by_fe);
+ if (!l->counters)
+ continue;
+
+ if (ctx->flags & STAT_BOUND) {
+ if (!(ctx->type & (1 << STATS_TYPE_SO)))
+ break;
+
+ if (ctx->sid != -1 && l->luid != ctx->sid)
+ continue;
+ }
+
+ /* print the frontend */
+ if (stats_dump_li_stats(sc, px, l)) {
+ if (!stats_putchk(rep, htx))
+ goto full;
+ ctx->flags |= STAT_STARTED;
+ if (ctx->field)
+ goto more;
+ }
+ current_field = 0;
+ }
+
+ ctx->obj2 = px->srv; /* may be NULL */
+ ctx->px_st = STAT_PX_ST_SV;
+ /* fall through */
+
+ case STAT_PX_ST_SV:
+ /* obj2 points to servers list as initialized above.
+ *
+ * A server may be removed during the stats dumping.
+ * Temporarily increment its refcount to prevent its
+ * anticipated cleaning. Call free_server to release it.
+ */
+ for (; ctx->obj2 != NULL;
+ ctx->obj2 = srv_drop(sv)) {
+
+ sv = ctx->obj2;
+ srv_take(sv);
+
+ if (htx) {
+ if (htx_almost_full(htx))
+ goto full;
+ }
+ else {
+ if (buffer_almost_full(&rep->buf))
+ goto full;
+ }
+
+ if (ctx->flags & STAT_BOUND) {
+ if (!(ctx->type & (1 << STATS_TYPE_SV))) {
+ srv_drop(sv);
+ break;
+ }
+
+ if (ctx->sid != -1 && sv->puid != ctx->sid)
+ continue;
+ }
+
+ /* do not report disabled servers */
+ if (ctx->flags & STAT_HIDE_MAINT &&
+ sv->cur_admin & SRV_ADMF_MAINT) {
+ continue;
+ }
+
+ svs = sv;
+ while (svs->track)
+ svs = svs->track;
+
+ /* do not report servers which are DOWN and not changing state */
+ if ((ctx->flags & STAT_HIDE_DOWN) &&
+ ((sv->cur_admin & SRV_ADMF_MAINT) || /* server is in maintenance */
+ (sv->cur_state == SRV_ST_STOPPED && /* server is down */
+ (!((svs->agent.state | svs->check.state) & CHK_ST_ENABLED) ||
+ ((svs->agent.state & CHK_ST_ENABLED) && !svs->agent.health) ||
+ ((svs->check.state & CHK_ST_ENABLED) && !svs->check.health))))) {
+ continue;
+ }
+
+ if (stats_dump_sv_stats(sc, px, sv)) {
+ if (!stats_putchk(rep, htx))
+ goto full;
+ ctx->flags |= STAT_STARTED;
+ if (ctx->field)
+ goto more;
+ }
+ current_field = 0;
+ } /* for sv */
+
+ ctx->px_st = STAT_PX_ST_BE;
+ /* fall through */
+
+ case STAT_PX_ST_BE:
+ /* print the backend */
+ if (stats_dump_be_stats(sc, px)) {
+ if (!stats_putchk(rep, htx))
+ goto full;
+ ctx->flags |= STAT_STARTED;
+ if (ctx->field)
+ goto more;
+ }
+
+ current_field = 0;
+ ctx->px_st = STAT_PX_ST_END;
+ /* fall through */
+
+ case STAT_PX_ST_END:
+ if (ctx->flags & STAT_FMT_HTML) {
+ stats_dump_html_px_end(sc, px);
+ if (!stats_putchk(rep, htx))
+ goto full;
+ }
+
+ ctx->px_st = STAT_PX_ST_FIN;
+ /* fall through */
+
+ case STAT_PX_ST_FIN:
+ return 1;
+
+ default:
+ /* unknown state, we should put an abort() here ! */
+ return 1;
+ }
+
+ full:
+ sc_need_room(sc);
+ /* restore previous field */
+ ctx->field = current_field;
+ return 0;
+}
+
+/* Dumps the HTTP stats head block to the local trash buffer for and uses the
+ * per-uri parameters <uri>. The caller is responsible for clearing the local
+ * trash buffer if needed.
+ */
+static void stats_dump_html_head(struct appctx *appctx, struct uri_auth *uri)
+{
+ struct show_stat_ctx *ctx = appctx->svcctx;
+
+ /* WARNING! This must fit in the first buffer !!! */
+ chunk_appendf(&trash_chunk,
+ "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"\n"
+ "\"http://www.w3.org/TR/html4/loose.dtd\">\n"
+ "<html><head><title>Statistics Report for " PRODUCT_NAME "%s%s</title>\n"
+ "<link rel=\"icon\" href=\"data:,\">\n"
+ "<meta http-equiv=\"content-type\" content=\"text/html; charset=iso-8859-1\">\n"
+ "<style type=\"text/css\"><!--\n"
+ "body {"
+ " font-family: arial, helvetica, sans-serif;"
+ " font-size: 12px;"
+ " font-weight: normal;"
+ " color: black;"
+ " background: white;"
+ "}\n"
+ "th,td {"
+ " font-size: 10px;"
+ "}\n"
+ "h1 {"
+ " font-size: x-large;"
+ " margin-bottom: 0.5em;"
+ "}\n"
+ "h2 {"
+ " font-family: helvetica, arial;"
+ " font-size: x-large;"
+ " font-weight: bold;"
+ " font-style: italic;"
+ " color: #6020a0;"
+ " margin-top: 0em;"
+ " margin-bottom: 0em;"
+ "}\n"
+ "h3 {"
+ " font-family: helvetica, arial;"
+ " font-size: 16px;"
+ " font-weight: bold;"
+ " color: #b00040;"
+ " background: #e8e8d0;"
+ " margin-top: 0em;"
+ " margin-bottom: 0em;"
+ "}\n"
+ "li {"
+ " margin-top: 0.25em;"
+ " margin-right: 2em;"
+ "}\n"
+ ".hr {margin-top: 0.25em;"
+ " border-color: black;"
+ " border-bottom-style: solid;"
+ "}\n"
+ ".titre {background: #20D0D0;color: #000000; font-weight: bold; text-align: center;}\n"
+ ".total {background: #20D0D0;color: #ffff80;}\n"
+ ".frontend {background: #e8e8d0;}\n"
+ ".socket {background: #d0d0d0;}\n"
+ ".backend {background: #e8e8d0;}\n"
+ ".active_down {background: #ff9090;}\n"
+ ".active_going_up {background: #ffd020;}\n"
+ ".active_going_down {background: #ffffa0;}\n"
+ ".active_up {background: #c0ffc0;}\n"
+ ".active_nolb {background: #20a0ff;}\n"
+ ".active_draining {background: #20a0FF;}\n"
+ ".active_no_check {background: #e0e0e0;}\n"
+ ".backup_down {background: #ff9090;}\n"
+ ".backup_going_up {background: #ff80ff;}\n"
+ ".backup_going_down {background: #c060ff;}\n"
+ ".backup_up {background: #b0d0ff;}\n"
+ ".backup_nolb {background: #90b0e0;}\n"
+ ".backup_draining {background: #cc9900;}\n"
+ ".backup_no_check {background: #e0e0e0;}\n"
+ ".maintain {background: #c07820;}\n"
+ ".rls {letter-spacing: 0.2em; margin-right: 1px;}\n" /* right letter spacing (used for grouping digits) */
+ "\n"
+ "a.px:link {color: #ffff40; text-decoration: none;}"
+ "a.px:visited {color: #ffff40; text-decoration: none;}"
+ "a.px:hover {color: #ffffff; text-decoration: none;}"
+ "a.lfsb:link {color: #000000; text-decoration: none;}"
+ "a.lfsb:visited {color: #000000; text-decoration: none;}"
+ "a.lfsb:hover {color: #505050; text-decoration: none;}"
+ "\n"
+ "table.tbl { border-collapse: collapse; border-style: none;}\n"
+ "table.tbl td { text-align: right; border-width: 1px 1px 1px 1px; border-style: solid solid solid solid; padding: 2px 3px; border-color: gray; white-space: nowrap;}\n"
+ "table.tbl td.ac { text-align: center;}\n"
+ "table.tbl th { border-width: 1px; border-style: solid solid solid solid; border-color: gray;}\n"
+ "table.tbl th.pxname { background: #b00040; color: #ffff40; font-weight: bold; border-style: solid solid none solid; padding: 2px 3px; white-space: nowrap;}\n"
+ "table.tbl th.empty { border-style: none; empty-cells: hide; background: white;}\n"
+ "table.tbl th.desc { background: white; border-style: solid solid none solid; text-align: left; padding: 2px 3px;}\n"
+ "\n"
+ "table.lgd { border-collapse: collapse; border-width: 1px; border-style: none none none solid; border-color: black;}\n"
+ "table.lgd td { border-width: 1px; border-style: solid solid solid solid; border-color: gray; padding: 2px;}\n"
+ "table.lgd td.noborder { border-style: none; padding: 2px; white-space: nowrap;}\n"
+ "table.det { border-collapse: collapse; border-style: none; }\n"
+ "table.det th { text-align: left; border-width: 0px; padding: 0px 1px 0px 0px; font-style:normal;font-size:11px;font-weight:bold;font-family: sans-serif;}\n"
+ "table.det td { text-align: right; border-width: 0px; padding: 0px 0px 0px 4px; white-space: nowrap; font-style:normal;font-size:11px;font-weight:normal;}\n"
+ "u {text-decoration:none; border-bottom: 1px dotted black;}\n"
+ "div.tips {\n"
+ " display:block;\n"
+ " visibility:hidden;\n"
+ " z-index:2147483647;\n"
+ " position:absolute;\n"
+ " padding:2px 4px 3px;\n"
+ " background:#f0f060; color:#000000;\n"
+ " border:1px solid #7040c0;\n"
+ " white-space:nowrap;\n"
+ " font-style:normal;font-size:11px;font-weight:normal;\n"
+ " -moz-border-radius:3px;-webkit-border-radius:3px;border-radius:3px;\n"
+ " -moz-box-shadow:gray 2px 2px 3px;-webkit-box-shadow:gray 2px 2px 3px;box-shadow:gray 2px 2px 3px;\n"
+ "}\n"
+ "u:hover div.tips {visibility:visible;}\n"
+ "@media (prefers-color-scheme: dark) {\n"
+ " body { font-family: arial, helvetica, sans-serif; font-size: 12px; font-weight: normal; color: #e8e6e3; background: #131516;}\n"
+ " h1 { color: #a265e0!important; }\n"
+ " h2 { color: #a265e0; }\n"
+ " h3 { color: #ff5190; background-color: #3e3e1f; }\n"
+ " a { color: #3391ff; }\n"
+ " input { background-color: #2f3437; }\n"
+ " .hr { border-color: #8c8273; }\n"
+ " .titre { background-color: #1aa6a6; color: #e8e6e3; }\n"
+ " .frontend {background: #2f3437;}\n"
+ " .socket {background: #2a2d2f;}\n"
+ " .backend {background: #2f3437;}\n"
+ " .active_down {background: #760000;}\n"
+ " .active_going_up {background: #b99200;}\n"
+ " .active_going_down {background: #6c6c00;}\n"
+ " .active_up {background: #165900;}\n"
+ " .active_nolb {background: #006ab9;}\n"
+ " .active_draining {background: #006ab9;}\n"
+ " .active_no_check {background: #2a2d2f;}\n"
+ " .backup_down {background: #760000;}\n"
+ " .backup_going_up {background: #7f007f;}\n"
+ " .backup_going_down {background: #580092;}\n"
+ " .backup_up {background: #2e3234;}\n"
+ " .backup_nolb {background: #1e3c6a;}\n"
+ " .backup_draining {background: #a37a00;}\n"
+ " .backup_no_check {background: #2a2d2f;}\n"
+ " .maintain {background: #9a601a;}\n"
+ " a.px:link {color: #d8d83b; text-decoration: none;}\n"
+ " a.px:visited {color: #d8d83b; text-decoration: none;}\n"
+ " a.px:hover {color: #ffffff; text-decoration: none;}\n"
+ " a.lfsb:link {color: #e8e6e3; text-decoration: none;}\n"
+ " a.lfsb:visited {color: #e8e6e3; text-decoration: none;}\n"
+ " a.lfsb:hover {color: #b5afa6; text-decoration: none;}\n"
+ " table.tbl th.empty { background-color: #181a1b; }\n"
+ " table.tbl th.desc { background: #181a1b; }\n"
+ " table.tbl th.pxname { background-color: #8d0033; color: #ffff46; }\n"
+ " table.tbl th { border-color: #808080; }\n"
+ " table.tbl td { border-color: #808080; }\n"
+ " u {text-decoration:none; border-bottom: 1px dotted #e8e6e3;}\n"
+ " div.tips {\n"
+ " background:#8e8e0d;\n"
+ " color:#e8e6e3;\n"
+ " border-color: #4e2c86;\n"
+ " -moz-box-shadow: #60686c 2px 2px 3px;\n"
+ " -webkit-box-shadow: #60686c 2px 2px 3px;\n"
+ " box-shadow: #60686c 2px 2px 3px;\n"
+ " }\n"
+ "}\n"
+ "-->\n"
+ "</style></head>\n",
+ (ctx->flags & STAT_SHNODE) ? " on " : "",
+ (ctx->flags & STAT_SHNODE) ? (uri && uri->node ? uri->node : global.node) : ""
+ );
+}
+
+/* Dumps the HTML stats information block to the local trash buffer for and uses
+ * the state from stream connector <sc> and per-uri parameters <uri>. The caller
+ * is responsible for clearing the local trash buffer if needed.
+ */
+static void stats_dump_html_info(struct stconn *sc, struct uri_auth *uri)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ unsigned int up = (now.tv_sec - start_date.tv_sec);
+ char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN];
+ const char *scope_ptr = stats_scope_ptr(appctx, sc);
+ unsigned long long bps = (unsigned long long)read_freq_ctr(&global.out_32bps) * 32;
+
+ /* Turn the bytes per second to bits per second and take care of the
+ * usual ethernet overhead in order to help figure how far we are from
+ * interface saturation since it's the only case which usually matters.
+ * For this we count the total size of an Ethernet frame on the wire
+ * including preamble and IFG (1538) for the largest TCP segment it
+ * transports (1448 with TCP timestamps). This is not valid for smaller
+ * packets (under-estimated), but it gives a reasonably accurate
+ * estimation of how far we are from uplink saturation.
+ */
+ bps = bps * 8 * 1538 / 1448;
+
+ /* WARNING! this has to fit the first packet too.
+ * We are around 3.5 kB, add adding entries will
+ * become tricky if we want to support 4kB buffers !
+ */
+ chunk_appendf(&trash_chunk,
+ "<body><h1><a href=\"" PRODUCT_URL "\" style=\"text-decoration: none;\">"
+ PRODUCT_NAME "%s</a></h1>\n"
+ "<h2>Statistics Report for pid %d%s%s%s%s</h2>\n"
+ "<hr width=\"100%%\" class=\"hr\">\n"
+ "<h3>&gt; General process information</h3>\n"
+ "<table border=0><tr><td align=\"left\" nowrap width=\"1%%\">\n"
+ "<p><b>pid = </b> %d (process #%d, nbproc = %d, nbthread = %d)<br>\n"
+ "<b>uptime = </b> %dd %dh%02dm%02ds<br>\n"
+ "<b>system limits:</b> memmax = %s%s; ulimit-n = %d<br>\n"
+ "<b>maxsock = </b> %d; <b>maxconn = </b> %d; <b>maxpipes = </b> %d<br>\n"
+ "current conns = %d; current pipes = %d/%d; conn rate = %d/sec; bit rate = %.3f %cbps<br>\n"
+ "Running tasks: %d/%d; idle = %d %%<br>\n"
+ "</td><td align=\"center\" nowrap>\n"
+ "<table class=\"lgd\"><tr>\n"
+ "<td class=\"active_up\">&nbsp;</td><td class=\"noborder\">active UP </td>"
+ "<td class=\"backup_up\">&nbsp;</td><td class=\"noborder\">backup UP </td>"
+ "</tr><tr>\n"
+ "<td class=\"active_going_down\"></td><td class=\"noborder\">active UP, going down </td>"
+ "<td class=\"backup_going_down\"></td><td class=\"noborder\">backup UP, going down </td>"
+ "</tr><tr>\n"
+ "<td class=\"active_going_up\"></td><td class=\"noborder\">active DOWN, going up </td>"
+ "<td class=\"backup_going_up\"></td><td class=\"noborder\">backup DOWN, going up </td>"
+ "</tr><tr>\n"
+ "<td class=\"active_down\"></td><td class=\"noborder\">active or backup DOWN &nbsp;</td>"
+ "<td class=\"active_no_check\"></td><td class=\"noborder\">not checked </td>"
+ "</tr><tr>\n"
+ "<td class=\"maintain\"></td><td class=\"noborder\" colspan=\"3\">active or backup DOWN for maintenance (MAINT) &nbsp;</td>"
+ "</tr><tr>\n"
+ "<td class=\"active_draining\"></td><td class=\"noborder\" colspan=\"3\">active or backup SOFT STOPPED for maintenance &nbsp;</td>"
+ "</tr></table>\n"
+ "Note: \"NOLB\"/\"DRAIN\" = UP with load-balancing disabled."
+ "</td>"
+ "<td align=\"left\" valign=\"top\" nowrap width=\"1%%\">"
+ "<b>Display option:</b><ul style=\"margin-top: 0.25em;\">"
+ "",
+ (ctx->flags & STAT_HIDEVER) ? "" : (stats_version_string),
+ pid, (ctx->flags & STAT_SHNODE) ? " on " : "",
+ (ctx->flags & STAT_SHNODE) ? (uri->node ? uri->node : global.node) : "",
+ (ctx->flags & STAT_SHDESC) ? ": " : "",
+ (ctx->flags & STAT_SHDESC) ? (uri->desc ? uri->desc : global.desc) : "",
+ pid, 1, 1, global.nbthread,
+ up / 86400, (up % 86400) / 3600,
+ (up % 3600) / 60, (up % 60),
+ global.rlimit_memmax ? ultoa(global.rlimit_memmax) : "unlimited",
+ global.rlimit_memmax ? " MB" : "",
+ global.rlimit_nofile,
+ global.maxsock, global.maxconn, global.maxpipes,
+ actconn, pipes_used, pipes_used+pipes_free, read_freq_ctr(&global.conn_per_sec),
+ bps >= 1000000000UL ? (bps / 1000000000.0) : bps >= 1000000UL ? (bps / 1000000.0) : (bps / 1000.0),
+ bps >= 1000000000UL ? 'G' : bps >= 1000000UL ? 'M' : 'k',
+ total_run_queues(), total_allocated_tasks(), clock_report_idle()
+ );
+
+ /* scope_txt = search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */
+ memcpy(scope_txt, scope_ptr, ctx->scope_len);
+ scope_txt[ctx->scope_len] = '\0';
+
+ chunk_appendf(&trash_chunk,
+ "<li><form method=\"GET\">Scope : <input value=\"%s\" name=\"" STAT_SCOPE_INPUT_NAME "\" size=\"8\" maxlength=\"%d\" tabindex=\"1\"/></form>\n",
+ (ctx->scope_len > 0) ? scope_txt : "",
+ STAT_SCOPE_TXT_MAXLEN);
+
+ /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */
+ scope_txt[0] = 0;
+ if (ctx->scope_len) {
+ strcpy(scope_txt, STAT_SCOPE_PATTERN);
+ memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len);
+ scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0;
+ }
+
+ if (ctx->flags & STAT_HIDE_DOWN)
+ chunk_appendf(&trash_chunk,
+ "<li><a href=\"%s%s%s%s\">Show all servers</a><br>\n",
+ uri->uri_prefix,
+ "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ else
+ chunk_appendf(&trash_chunk,
+ "<li><a href=\"%s%s%s%s\">Hide 'DOWN' servers</a><br>\n",
+ uri->uri_prefix,
+ ";up",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+
+ if (uri->refresh > 0) {
+ if (ctx->flags & STAT_NO_REFRESH)
+ chunk_appendf(&trash_chunk,
+ "<li><a href=\"%s%s%s%s\">Enable refresh</a><br>\n",
+ uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ "",
+ scope_txt);
+ else
+ chunk_appendf(&trash_chunk,
+ "<li><a href=\"%s%s%s%s\">Disable refresh</a><br>\n",
+ uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ ";norefresh",
+ scope_txt);
+ }
+
+ chunk_appendf(&trash_chunk,
+ "<li><a href=\"%s%s%s%s\">Refresh now</a><br>\n",
+ uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+
+ chunk_appendf(&trash_chunk,
+ "<li><a href=\"%s;csv%s%s\">CSV export</a><br>\n",
+ uri->uri_prefix,
+ (uri->refresh > 0) ? ";norefresh" : "",
+ scope_txt);
+
+ chunk_appendf(&trash_chunk,
+ "<li><a href=\"%s;json%s%s\">JSON export</a> (<a href=\"%s;json-schema\">schema</a>)<br>\n",
+ uri->uri_prefix,
+ (uri->refresh > 0) ? ";norefresh" : "",
+ scope_txt, uri->uri_prefix);
+
+ chunk_appendf(&trash_chunk,
+ "</ul></td>"
+ "<td align=\"left\" valign=\"top\" nowrap width=\"1%%\">"
+ "<b>External resources:</b><ul style=\"margin-top: 0.25em;\">\n"
+ "<li><a href=\"" PRODUCT_URL "\">Primary site</a><br>\n"
+ "<li><a href=\"" PRODUCT_URL_UPD "\">Updates (v" PRODUCT_BRANCH ")</a><br>\n"
+ "<li><a href=\"" PRODUCT_URL_DOC "\">Online manual</a><br>\n"
+ "</ul>"
+ "</td>"
+ "</tr></table>\n"
+ ""
+ );
+
+ if (ctx->st_code) {
+ switch (ctx->st_code) {
+ case STAT_STATUS_DONE:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_up>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "Action processed successfully."
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ break;
+ case STAT_STATUS_NONE:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_going_down>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "Nothing has changed."
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ break;
+ case STAT_STATUS_PART:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_going_down>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "Action partially processed.<br>"
+ "Some server names are probably unknown or ambiguous (duplicated names in the backend)."
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ break;
+ case STAT_STATUS_ERRP:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_down>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "Action not processed because of invalid parameters."
+ "<ul>"
+ "<li>The action is maybe unknown.</li>"
+ "<li>Invalid key parameter (empty or too long).</li>"
+ "<li>The backend name is probably unknown or ambiguous (duplicated names).</li>"
+ "<li>Some server names are probably unknown or ambiguous (duplicated names in the backend).</li>"
+ "</ul>"
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ break;
+ case STAT_STATUS_EXCD:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_down>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "<b>Action not processed : the buffer couldn't store all the data.<br>"
+ "You should retry with less servers at a time.</b>"
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ break;
+ case STAT_STATUS_DENY:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_down>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "<b>Action denied.</b>"
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ break;
+ case STAT_STATUS_IVAL:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_down>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "<b>Invalid requests (unsupported method or chunked encoded request).</b>"
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ break;
+ default:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_no_check>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "Unexpected result."
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ }
+ chunk_appendf(&trash_chunk, "<p>\n");
+ }
+}
+
+/* Dumps the HTML stats trailer block to the local trash buffer. The caller is
+ * responsible for clearing the local trash buffer if needed.
+ */
+static void stats_dump_html_end()
+{
+ chunk_appendf(&trash_chunk, "</body></html>\n");
+}
+
+/* Dumps the stats JSON header to the local trash buffer buffer which. The
+ * caller is responsible for clearing it if needed.
+ */
+static void stats_dump_json_header()
+{
+ chunk_strcat(&trash_chunk, "[");
+}
+
+
+/* Dumps the JSON stats trailer block to the local trash buffer. The caller is
+ * responsible for clearing the local trash buffer if needed.
+ */
+static void stats_dump_json_end()
+{
+ chunk_strcat(&trash_chunk, "]\n");
+}
+
+/* Uses <appctx.ctx.stats.obj1> as a pointer to the current proxy and <obj2> as
+ * a pointer to the current server/listener.
+ */
+static int stats_dump_proxies(struct stconn *sc,
+ struct htx *htx,
+ struct uri_auth *uri)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct channel *rep = sc_ic(sc);
+ struct proxy *px;
+
+ /* dump proxies */
+ while (ctx->obj1) {
+ if (htx) {
+ if (htx_almost_full(htx))
+ goto full;
+ }
+ else {
+ if (buffer_almost_full(&rep->buf))
+ goto full;
+ }
+
+ px = ctx->obj1;
+ /* Skip the global frontend proxies and non-networked ones.
+ * Also skip proxies that were disabled in the configuration
+ * This change allows retrieving stats from "old" proxies after a reload.
+ */
+ if (!(px->flags & PR_FL_DISABLED) && px->uuid > 0 &&
+ (px->cap & (PR_CAP_FE | PR_CAP_BE)) && !(px->cap & PR_CAP_INT)) {
+ if (stats_dump_proxy_to_buffer(sc, htx, px, uri) == 0)
+ return 0;
+ }
+
+ ctx->obj1 = px->next;
+ ctx->px_st = STAT_PX_ST_INIT;
+ ctx->field = 0;
+ }
+
+ return 1;
+
+ full:
+ sc_need_room(sc);
+ return 0;
+}
+
+/* This function dumps statistics onto the stream connector's read buffer in
+ * either CSV or HTML format. <uri> contains some HTML-specific parameters that
+ * are ignored for CSV format (hence <uri> may be NULL there). It returns 0 if
+ * it had to stop writing data and an I/O is needed, 1 if the dump is finished
+ * and the stream must be closed, or -1 in case of any error. This function is
+ * used by both the CLI and the HTTP handlers.
+ */
+static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx,
+ struct uri_auth *uri)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct channel *rep = sc_ic(sc);
+ enum stats_domain domain = ctx->domain;
+
+ chunk_reset(&trash_chunk);
+
+ switch (ctx->state) {
+ case STAT_STATE_INIT:
+ ctx->state = STAT_STATE_HEAD; /* let's start producing data */
+ /* fall through */
+
+ case STAT_STATE_HEAD:
+ if (ctx->flags & STAT_FMT_HTML)
+ stats_dump_html_head(appctx, uri);
+ else if (ctx->flags & STAT_JSON_SCHM)
+ stats_dump_json_schema(&trash_chunk);
+ else if (ctx->flags & STAT_FMT_JSON)
+ stats_dump_json_header();
+ else if (!(ctx->flags & STAT_FMT_TYPED))
+ stats_dump_csv_header(ctx->domain);
+
+ if (!stats_putchk(rep, htx))
+ goto full;
+
+ if (ctx->flags & STAT_JSON_SCHM) {
+ ctx->state = STAT_STATE_FIN;
+ return 1;
+ }
+ ctx->state = STAT_STATE_INFO;
+ /* fall through */
+
+ case STAT_STATE_INFO:
+ if (ctx->flags & STAT_FMT_HTML) {
+ stats_dump_html_info(sc, uri);
+ if (!stats_putchk(rep, htx))
+ goto full;
+ }
+
+ if (domain == STATS_DOMAIN_PROXY)
+ ctx->obj1 = proxies_list;
+
+ ctx->px_st = STAT_PX_ST_INIT;
+ ctx->field = 0;
+ ctx->state = STAT_STATE_LIST;
+ /* fall through */
+
+ case STAT_STATE_LIST:
+ switch (domain) {
+ case STATS_DOMAIN_RESOLVERS:
+ if (!stats_dump_resolvers(sc, stat_l[domain],
+ stat_count[domain],
+ &stats_module_list[domain])) {
+ return 0;
+ }
+ break;
+
+ case STATS_DOMAIN_PROXY:
+ default:
+ /* dump proxies */
+ if (!stats_dump_proxies(sc, htx, uri))
+ return 0;
+ break;
+ }
+
+ ctx->state = STAT_STATE_END;
+ /* fall through */
+
+ case STAT_STATE_END:
+ if (ctx->flags & (STAT_FMT_HTML|STAT_FMT_JSON)) {
+ if (ctx->flags & STAT_FMT_HTML)
+ stats_dump_html_end();
+ else
+ stats_dump_json_end();
+ if (!stats_putchk(rep, htx))
+ goto full;
+ }
+
+ ctx->state = STAT_STATE_FIN;
+ /* fall through */
+
+ case STAT_STATE_FIN:
+ return 1;
+
+ default:
+ /* unknown state ! */
+ ctx->state = STAT_STATE_FIN;
+ return -1;
+ }
+
+ full:
+ sc_need_room(sc);
+ return 0;
+
+}
+
+/* We reached the stats page through a POST request. The appctx is
+ * expected to have already been allocated by the caller.
+ * Parse the posted data and enable/disable servers if necessary.
+ * Returns 1 if request was parsed or zero if it needs more data.
+ */
+static int stats_process_http_post(struct stconn *sc)
+{
+ struct stream *s = __sc_strm(sc);
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+
+ struct proxy *px = NULL;
+ struct server *sv = NULL;
+
+ char key[LINESIZE];
+ int action = ST_ADM_ACTION_NONE;
+ int reprocess = 0;
+
+ int total_servers = 0;
+ int altered_servers = 0;
+
+ char *first_param, *cur_param, *next_param, *end_params;
+ char *st_cur_param = NULL;
+ char *st_next_param = NULL;
+
+ struct buffer *temp = get_trash_chunk();
+
+ struct htx *htx = htxbuf(&s->req.buf);
+ struct htx_blk *blk;
+
+ /* we need more data */
+ if (s->txn->req.msg_state < HTTP_MSG_DONE) {
+ /* check if we can receive more */
+ if (htx_free_data_space(htx) <= global.tune.maxrewrite) {
+ ctx->st_code = STAT_STATUS_EXCD;
+ goto out;
+ }
+ goto wait;
+ }
+
+ /* The request was fully received. Copy data */
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA) {
+ struct ist v = htx_get_blk_value(htx, blk);
+
+ if (!chunk_memcat(temp, v.ptr, v.len)) {
+ ctx->st_code = STAT_STATUS_EXCD;
+ goto out;
+ }
+ }
+ blk = htx_get_next_blk(htx, blk);
+ }
+
+ first_param = temp->area;
+ end_params = temp->area + temp->data;
+ cur_param = next_param = end_params;
+ *end_params = '\0';
+
+ ctx->st_code = STAT_STATUS_NONE;
+
+ /*
+ * Parse the parameters in reverse order to only store the last value.
+ * From the html form, the backend and the action are at the end.
+ */
+ while (cur_param > first_param) {
+ char *value;
+ int poffset, plen;
+
+ cur_param--;
+
+ if ((*cur_param == '&') || (cur_param == first_param)) {
+ reprocess_servers:
+ /* Parse the key */
+ poffset = (cur_param != first_param ? 1 : 0);
+ plen = next_param - cur_param + (cur_param == first_param ? 1 : 0);
+ if ((plen > 0) && (plen <= sizeof(key))) {
+ strncpy(key, cur_param + poffset, plen);
+ key[plen - 1] = '\0';
+ } else {
+ ctx->st_code = STAT_STATUS_ERRP;
+ goto out;
+ }
+
+ /* Parse the value */
+ value = key;
+ while (*value != '\0' && *value != '=') {
+ value++;
+ }
+ if (*value == '=') {
+ /* Ok, a value is found, we can mark the end of the key */
+ *value++ = '\0';
+ }
+ if (url_decode(key, 1) < 0 || url_decode(value, 1) < 0)
+ break;
+
+ /* Now we can check the key to see what to do */
+ if (!px && (strcmp(key, "b") == 0)) {
+ if ((px = proxy_be_by_name(value)) == NULL) {
+ /* the backend name is unknown or ambiguous (duplicate names) */
+ ctx->st_code = STAT_STATUS_ERRP;
+ goto out;
+ }
+ }
+ else if (!action && (strcmp(key, "action") == 0)) {
+ if (strcmp(value, "ready") == 0) {
+ action = ST_ADM_ACTION_READY;
+ }
+ else if (strcmp(value, "drain") == 0) {
+ action = ST_ADM_ACTION_DRAIN;
+ }
+ else if (strcmp(value, "maint") == 0) {
+ action = ST_ADM_ACTION_MAINT;
+ }
+ else if (strcmp(value, "shutdown") == 0) {
+ action = ST_ADM_ACTION_SHUTDOWN;
+ }
+ else if (strcmp(value, "dhlth") == 0) {
+ action = ST_ADM_ACTION_DHLTH;
+ }
+ else if (strcmp(value, "ehlth") == 0) {
+ action = ST_ADM_ACTION_EHLTH;
+ }
+ else if (strcmp(value, "hrunn") == 0) {
+ action = ST_ADM_ACTION_HRUNN;
+ }
+ else if (strcmp(value, "hnolb") == 0) {
+ action = ST_ADM_ACTION_HNOLB;
+ }
+ else if (strcmp(value, "hdown") == 0) {
+ action = ST_ADM_ACTION_HDOWN;
+ }
+ else if (strcmp(value, "dagent") == 0) {
+ action = ST_ADM_ACTION_DAGENT;
+ }
+ else if (strcmp(value, "eagent") == 0) {
+ action = ST_ADM_ACTION_EAGENT;
+ }
+ else if (strcmp(value, "arunn") == 0) {
+ action = ST_ADM_ACTION_ARUNN;
+ }
+ else if (strcmp(value, "adown") == 0) {
+ action = ST_ADM_ACTION_ADOWN;
+ }
+ /* else these are the old supported methods */
+ else if (strcmp(value, "disable") == 0) {
+ action = ST_ADM_ACTION_DISABLE;
+ }
+ else if (strcmp(value, "enable") == 0) {
+ action = ST_ADM_ACTION_ENABLE;
+ }
+ else if (strcmp(value, "stop") == 0) {
+ action = ST_ADM_ACTION_STOP;
+ }
+ else if (strcmp(value, "start") == 0) {
+ action = ST_ADM_ACTION_START;
+ }
+ else {
+ ctx->st_code = STAT_STATUS_ERRP;
+ goto out;
+ }
+ }
+ else if (strcmp(key, "s") == 0) {
+ if (!(px && action)) {
+ /*
+ * Indicates that we'll need to reprocess the parameters
+ * as soon as backend and action are known
+ */
+ if (!reprocess) {
+ st_cur_param = cur_param;
+ st_next_param = next_param;
+ }
+ reprocess = 1;
+ }
+ else if ((sv = findserver(px, value)) != NULL) {
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ switch (action) {
+ case ST_ADM_ACTION_DISABLE:
+ if (!(sv->cur_admin & SRV_ADMF_FMAINT)) {
+ altered_servers++;
+ total_servers++;
+ srv_set_admin_flag(sv, SRV_ADMF_FMAINT, "'disable' on stats page");
+ }
+ break;
+ case ST_ADM_ACTION_ENABLE:
+ if (sv->cur_admin & SRV_ADMF_FMAINT) {
+ altered_servers++;
+ total_servers++;
+ srv_clr_admin_flag(sv, SRV_ADMF_FMAINT);
+ }
+ break;
+ case ST_ADM_ACTION_STOP:
+ if (!(sv->cur_admin & SRV_ADMF_FDRAIN)) {
+ srv_set_admin_flag(sv, SRV_ADMF_FDRAIN, "'stop' on stats page");
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_START:
+ if (sv->cur_admin & SRV_ADMF_FDRAIN) {
+ srv_clr_admin_flag(sv, SRV_ADMF_FDRAIN);
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_DHLTH:
+ if (sv->check.state & CHK_ST_CONFIGURED) {
+ sv->check.state &= ~CHK_ST_ENABLED;
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_EHLTH:
+ if (sv->check.state & CHK_ST_CONFIGURED) {
+ sv->check.state |= CHK_ST_ENABLED;
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_HRUNN:
+ if (!(sv->track)) {
+ sv->check.health = sv->check.rise + sv->check.fall - 1;
+ srv_set_running(sv, "changed from Web interface", NULL);
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_HNOLB:
+ if (!(sv->track)) {
+ sv->check.health = sv->check.rise + sv->check.fall - 1;
+ srv_set_stopping(sv, "changed from Web interface", NULL);
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_HDOWN:
+ if (!(sv->track)) {
+ sv->check.health = 0;
+ srv_set_stopped(sv, "changed from Web interface", NULL);
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_DAGENT:
+ if (sv->agent.state & CHK_ST_CONFIGURED) {
+ sv->agent.state &= ~CHK_ST_ENABLED;
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_EAGENT:
+ if (sv->agent.state & CHK_ST_CONFIGURED) {
+ sv->agent.state |= CHK_ST_ENABLED;
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_ARUNN:
+ if (sv->agent.state & CHK_ST_ENABLED) {
+ sv->agent.health = sv->agent.rise + sv->agent.fall - 1;
+ srv_set_running(sv, "changed from Web interface", NULL);
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_ADOWN:
+ if (sv->agent.state & CHK_ST_ENABLED) {
+ sv->agent.health = 0;
+ srv_set_stopped(sv, "changed from Web interface", NULL);
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_READY:
+ srv_adm_set_ready(sv);
+ altered_servers++;
+ total_servers++;
+ break;
+ case ST_ADM_ACTION_DRAIN:
+ srv_adm_set_drain(sv);
+ altered_servers++;
+ total_servers++;
+ break;
+ case ST_ADM_ACTION_MAINT:
+ srv_adm_set_maint(sv);
+ altered_servers++;
+ total_servers++;
+ break;
+ case ST_ADM_ACTION_SHUTDOWN:
+ if (!(px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ srv_shutdown_streams(sv, SF_ERR_KILLED);
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ } else {
+ /* the server name is unknown or ambiguous (duplicate names) */
+ total_servers++;
+ }
+ }
+ if (reprocess && px && action) {
+ /* Now, we know the backend and the action chosen by the user.
+ * We can safely restart from the first server parameter
+ * to reprocess them
+ */
+ cur_param = st_cur_param;
+ next_param = st_next_param;
+ reprocess = 0;
+ goto reprocess_servers;
+ }
+
+ next_param = cur_param;
+ }
+ }
+
+ if (total_servers == 0) {
+ ctx->st_code = STAT_STATUS_NONE;
+ }
+ else if (altered_servers == 0) {
+ ctx->st_code = STAT_STATUS_ERRP;
+ }
+ else if (altered_servers == total_servers) {
+ ctx->st_code = STAT_STATUS_DONE;
+ }
+ else {
+ ctx->st_code = STAT_STATUS_PART;
+ }
+ out:
+ return 1;
+ wait:
+ ctx->st_code = STAT_STATUS_NONE;
+ return 0;
+}
+
+
+static int stats_send_http_headers(struct stconn *sc, struct htx *htx)
+{
+ struct stream *s = __sc_strm(sc);
+ struct uri_auth *uri = s->be->uri_auth;
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct htx_sl *sl;
+ unsigned int flags;
+
+ flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_ENC|HTX_SL_F_XFER_LEN|HTX_SL_F_CHNK);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), ist("200"), ist("OK"));
+ if (!sl)
+ goto full;
+ sl->info.res.status = 200;
+
+ if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache")))
+ goto full;
+ if (ctx->flags & STAT_FMT_HTML) {
+ if (!htx_add_header(htx, ist("Content-Type"), ist("text/html")))
+ goto full;
+ }
+ else if (ctx->flags & (STAT_FMT_JSON|STAT_JSON_SCHM)) {
+ if (!htx_add_header(htx, ist("Content-Type"), ist("application/json")))
+ goto full;
+ }
+ else {
+ if (!htx_add_header(htx, ist("Content-Type"), ist("text/plain")))
+ goto full;
+ }
+
+ if (uri->refresh > 0 && !(ctx->flags & STAT_NO_REFRESH)) {
+ const char *refresh = U2A(uri->refresh);
+ if (!htx_add_header(htx, ist("Refresh"), ist(refresh)))
+ goto full;
+ }
+
+ if (ctx->flags & STAT_CHUNKED) {
+ if (!htx_add_header(htx, ist("Transfer-Encoding"), ist("chunked")))
+ goto full;
+ }
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto full;
+
+ channel_add_input(&s->res, htx->data);
+ return 1;
+
+ full:
+ htx_reset(htx);
+ sc_need_room(sc);
+ return 0;
+}
+
+
+static int stats_send_http_redirect(struct stconn *sc, struct htx *htx)
+{
+ char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN];
+ struct stream *s = __sc_strm(sc);
+ struct uri_auth *uri = s->be->uri_auth;
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct htx_sl *sl;
+ unsigned int flags;
+
+ /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */
+ scope_txt[0] = 0;
+ if (ctx->scope_len) {
+ const char *scope_ptr = stats_scope_ptr(appctx, sc);
+
+ strcpy(scope_txt, STAT_SCOPE_PATTERN);
+ memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len);
+ scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0;
+ }
+
+ /* We don't want to land on the posted stats page because a refresh will
+ * repost the data. We don't want this to happen on accident so we redirect
+ * the browse to the stats page with a GET.
+ */
+ chunk_printf(&trash, "%s;st=%s%s%s%s",
+ uri->uri_prefix,
+ ((ctx->st_code > STAT_STATUS_INIT) &&
+ (ctx->st_code < STAT_STATUS_SIZE) &&
+ stat_status_codes[ctx->st_code]) ?
+ stat_status_codes[ctx->st_code] :
+ stat_status_codes[STAT_STATUS_UNKN],
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+
+ flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_LEN|HTX_SL_F_CHNK);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), ist("303"), ist("See Other"));
+ if (!sl)
+ goto full;
+ sl->info.res.status = 303;
+
+ if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache")) ||
+ !htx_add_header(htx, ist("Content-Type"), ist("text/plain")) ||
+ !htx_add_header(htx, ist("Content-Length"), ist("0")) ||
+ !htx_add_header(htx, ist("Location"), ist2(trash.area, trash.data)))
+ goto full;
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto full;
+
+ channel_add_input(&s->res, htx->data);
+ return 1;
+
+full:
+ htx_reset(htx);
+ sc_need_room(sc);
+ return 0;
+}
+
+
+/* This I/O handler runs as an applet embedded in a stream connector. It is
+ * used to send HTTP stats over a TCP socket. The mechanism is very simple.
+ * appctx->st0 contains the operation in progress (dump, done). The handler
+ * automatically unregisters itself once transfer is complete.
+ */
+static void http_stats_io_handler(struct appctx *appctx)
+{
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct stream *s = __sc_strm(sc);
+ struct channel *req = sc_oc(sc);
+ struct channel *res = sc_ic(sc);
+ struct htx *req_htx, *res_htx;
+
+ /* only proxy stats are available via http */
+ ctx->domain = STATS_DOMAIN_PROXY;
+
+ res_htx = htx_from_buf(&res->buf);
+
+ if (unlikely(sc->state == SC_ST_DIS || sc->state == SC_ST_CLO))
+ goto out;
+
+ /* Check if the input buffer is available. */
+ if (!b_size(&res->buf)) {
+ sc_need_room(sc);
+ goto out;
+ }
+
+ /* check that the output is not closed */
+ if (res->flags & (CF_SHUTW|CF_SHUTW_NOW|CF_SHUTR))
+ appctx->st0 = STAT_HTTP_END;
+
+ /* all states are processed in sequence */
+ if (appctx->st0 == STAT_HTTP_HEAD) {
+ if (stats_send_http_headers(sc, res_htx)) {
+ if (s->txn->meth == HTTP_METH_HEAD)
+ appctx->st0 = STAT_HTTP_DONE;
+ else
+ appctx->st0 = STAT_HTTP_DUMP;
+ }
+ }
+
+ if (appctx->st0 == STAT_HTTP_DUMP) {
+ trash_chunk = b_make(trash.area, res->buf.size, 0, 0);
+ /* adjust buffer size to take htx overhead into account,
+ * make sure to perform this call on an empty buffer
+ */
+ trash_chunk.size = buf_room_for_htx_data(&trash_chunk);
+ if (stats_dump_stat_to_buffer(sc, res_htx, s->be->uri_auth))
+ appctx->st0 = STAT_HTTP_DONE;
+ }
+
+ if (appctx->st0 == STAT_HTTP_POST) {
+ if (stats_process_http_post(sc))
+ appctx->st0 = STAT_HTTP_LAST;
+ else if (req->flags & CF_SHUTR)
+ appctx->st0 = STAT_HTTP_DONE;
+ }
+
+ if (appctx->st0 == STAT_HTTP_LAST) {
+ if (stats_send_http_redirect(sc, res_htx))
+ appctx->st0 = STAT_HTTP_DONE;
+ }
+
+ if (appctx->st0 == STAT_HTTP_DONE) {
+ /* no more data are expected. If the response buffer is empty,
+ * be sure to add something (EOT block in this case) to have
+ * something to send. It is important to be sure the EOM flags
+ * will be handled by the endpoint.
+ */
+ if (htx_is_empty(res_htx)) {
+ if (!htx_add_endof(res_htx, HTX_BLK_EOT)) {
+ sc_need_room(sc);
+ goto out;
+ }
+ channel_add_input(res, 1);
+ }
+ res_htx->flags |= HTX_FL_EOM;
+ res->flags |= CF_EOI;
+ se_fl_set(appctx->sedesc, SE_FL_EOI);
+ appctx->st0 = STAT_HTTP_END;
+ }
+
+ if (appctx->st0 == STAT_HTTP_END) {
+ if (!(res->flags & CF_SHUTR)) {
+ res->flags |= CF_READ_NULL;
+ sc_shutr(sc);
+ }
+ applet_will_consume(appctx);
+ }
+
+ out:
+ /* we have left the request in the buffer for the case where we
+ * process a POST, and this automatically re-enables activity on
+ * read. It's better to indicate that we want to stop reading when
+ * we're sending, so that we know there's at most one direction
+ * deciding to wake the applet up. It saves it from looping when
+ * emitting large blocks into small TCP windows.
+ */
+ htx_to_buf(res_htx, &res->buf);
+ if (appctx->st0 == STAT_HTTP_END) {
+ /* eat the whole request */
+ if (co_data(req)) {
+ req_htx = htx_from_buf(&req->buf);
+ co_htx_skip(req, req_htx, co_data(req));
+ htx_to_buf(req_htx, &req->buf);
+ }
+ }
+ else if (!channel_is_empty(res))
+ applet_wont_consume(appctx);
+}
+
+/* Dump all fields from <info> into <out> using the "show info" format (name: value) */
+static int stats_dump_info_fields(struct buffer *out,
+ const struct field *info,
+ struct show_stat_ctx *ctx)
+{
+ int flags = ctx->flags;
+ int field;
+
+ for (field = 0; field < INF_TOTAL_FIELDS; field++) {
+ if (!field_format(info, field))
+ continue;
+
+ if (!chunk_appendf(out, "%s: ", info_fields[field].name))
+ return 0;
+ if (!stats_emit_raw_data_field(out, &info[field]))
+ return 0;
+ if ((flags & STAT_SHOW_FDESC) && !chunk_appendf(out, ":\"%s\"", info_fields[field].desc))
+ return 0;
+ if (!chunk_strcat(out, "\n"))
+ return 0;
+ }
+ return 1;
+}
+
+/* Dump all fields from <info> into <out> using the "show info typed" format */
+static int stats_dump_typed_info_fields(struct buffer *out,
+ const struct field *info,
+ struct show_stat_ctx *ctx)
+{
+ int flags = ctx->flags;
+ int field;
+
+ for (field = 0; field < INF_TOTAL_FIELDS; field++) {
+ if (!field_format(info, field))
+ continue;
+
+ if (!chunk_appendf(out, "%d.%s.%u:", field, info_fields[field].name, info[INF_PROCESS_NUM].u.u32))
+ return 0;
+ if (!stats_emit_field_tags(out, &info[field], ':'))
+ return 0;
+ if (!stats_emit_typed_data_field(out, &info[field]))
+ return 0;
+ if ((flags & STAT_SHOW_FDESC) && !chunk_appendf(out, ":\"%s\"", info_fields[field].desc))
+ return 0;
+ if (!chunk_strcat(out, "\n"))
+ return 0;
+ }
+ return 1;
+}
+
+/* Fill <info> with HAProxy global info. <info> is preallocated array of length
+ * <len>. The length of the array must be INF_TOTAL_FIELDS. If this length is
+ * less then this value, the function returns 0, otherwise, it returns 1. Some
+ * fields' presence or precision may depend on some of the STAT_* flags present
+ * in <flags>.
+ */
+int stats_fill_info(struct field *info, int len, uint flags)
+{
+ struct timeval up;
+ struct buffer *out = get_trash_chunk();
+
+#ifdef USE_OPENSSL
+ double ssl_sess_rate = read_freq_ctr_flt(&global.ssl_per_sec);
+ double ssl_key_rate = read_freq_ctr_flt(&global.ssl_fe_keys_per_sec);
+ double ssl_reuse = 0;
+
+ if (ssl_key_rate < ssl_sess_rate)
+ ssl_reuse = 100.0 * (1.0 - ssl_key_rate / ssl_sess_rate);
+#endif
+
+ tv_remain(&start_date, &now, &up);
+
+ if (len < INF_TOTAL_FIELDS)
+ return 0;
+
+ chunk_reset(out);
+ memset(info, 0, sizeof(*info) * len);
+
+ info[INF_NAME] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, PRODUCT_NAME);
+ info[INF_VERSION] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_version);
+ info[INF_BUILD_INFO] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_version);
+ info[INF_RELEASE_DATE] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_date);
+
+ info[INF_NBTHREAD] = mkf_u32(FO_CONFIG|FS_SERVICE, global.nbthread);
+ info[INF_NBPROC] = mkf_u32(FO_CONFIG|FS_SERVICE, 1);
+ info[INF_PROCESS_NUM] = mkf_u32(FO_KEY, 1);
+ info[INF_PID] = mkf_u32(FO_STATUS, pid);
+
+ info[INF_UPTIME] = mkf_str(FN_DURATION, chunk_newstr(out));
+ chunk_appendf(out, "%ud %uh%02um%02us", (uint)up.tv_sec / 86400, ((uint)up.tv_sec % 86400) / 3600, ((uint)up.tv_sec % 3600) / 60, ((uint)up.tv_sec % 60));
+
+ info[INF_UPTIME_SEC] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_DURATION, up.tv_sec + up.tv_usec / 1000000.0) : mkf_u32(FN_DURATION, up.tv_sec);
+ info[INF_START_TIME_SEC] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_DURATION, start_date.tv_sec + start_date.tv_usec / 1000000.0) : mkf_u32(FN_DURATION, start_date.tv_sec);
+ info[INF_MEMMAX_MB] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_memmax);
+ info[INF_MEMMAX_BYTES] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_memmax * 1048576L);
+ info[INF_POOL_ALLOC_MB] = mkf_u32(0, (unsigned)(pool_total_allocated() / 1048576L));
+ info[INF_POOL_ALLOC_BYTES] = mkf_u64(0, pool_total_allocated());
+ info[INF_POOL_USED_MB] = mkf_u32(0, (unsigned)(pool_total_used() / 1048576L));
+ info[INF_POOL_USED_BYTES] = mkf_u64(0, pool_total_used());
+ info[INF_POOL_FAILED] = mkf_u32(FN_COUNTER, pool_total_failures());
+ info[INF_ULIMIT_N] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_nofile);
+ info[INF_MAXSOCK] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxsock);
+ info[INF_MAXCONN] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxconn);
+ info[INF_HARD_MAXCONN] = mkf_u32(FO_CONFIG|FN_LIMIT, global.hardmaxconn);
+ info[INF_CURR_CONN] = mkf_u32(0, actconn);
+ info[INF_CUM_CONN] = mkf_u32(FN_COUNTER, totalconn);
+ info[INF_CUM_REQ] = mkf_u32(FN_COUNTER, global.req_count);
+#ifdef USE_OPENSSL
+ info[INF_MAX_SSL_CONNS] = mkf_u32(FN_MAX, global.maxsslconn);
+ info[INF_CURR_SSL_CONNS] = mkf_u32(0, global.sslconns);
+ info[INF_CUM_SSL_CONNS] = mkf_u32(FN_COUNTER, global.totalsslconns);
+#endif
+ info[INF_MAXPIPES] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxpipes);
+ info[INF_PIPES_USED] = mkf_u32(0, pipes_used);
+ info[INF_PIPES_FREE] = mkf_u32(0, pipes_free);
+ info[INF_CONN_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.conn_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.conn_per_sec));
+ info[INF_CONN_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.cps_lim);
+ info[INF_MAX_CONN_RATE] = mkf_u32(FN_MAX, global.cps_max);
+ info[INF_SESS_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.sess_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.sess_per_sec));
+ info[INF_SESS_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.sps_lim);
+ info[INF_MAX_SESS_RATE] = mkf_u32(FN_RATE, global.sps_max);
+
+#ifdef USE_OPENSSL
+ info[INF_SSL_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_sess_rate) : mkf_u32(FN_RATE, ssl_sess_rate);
+ info[INF_SSL_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.ssl_lim);
+ info[INF_MAX_SSL_RATE] = mkf_u32(FN_MAX, global.ssl_max);
+ info[INF_SSL_FRONTEND_KEY_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_key_rate) : mkf_u32(0, ssl_key_rate);
+ info[INF_SSL_FRONTEND_MAX_KEY_RATE] = mkf_u32(FN_MAX, global.ssl_fe_keys_max);
+ info[INF_SSL_FRONTEND_SESSION_REUSE_PCT] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_reuse) : mkf_u32(0, ssl_reuse);
+ info[INF_SSL_BACKEND_KEY_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.ssl_be_keys_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.ssl_be_keys_per_sec));
+ info[INF_SSL_BACKEND_MAX_KEY_RATE] = mkf_u32(FN_MAX, global.ssl_be_keys_max);
+ info[INF_SSL_CACHE_LOOKUPS] = mkf_u32(FN_COUNTER, global.shctx_lookups);
+ info[INF_SSL_CACHE_MISSES] = mkf_u32(FN_COUNTER, global.shctx_misses);
+#endif
+ info[INF_COMPRESS_BPS_IN] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.comp_bps_in)) : mkf_u32(FN_RATE, read_freq_ctr(&global.comp_bps_in));
+ info[INF_COMPRESS_BPS_OUT] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.comp_bps_out)) : mkf_u32(FN_RATE, read_freq_ctr(&global.comp_bps_out));
+ info[INF_COMPRESS_BPS_RATE_LIM] = mkf_u32(FO_CONFIG|FN_LIMIT, global.comp_rate_lim);
+#ifdef USE_ZLIB
+ info[INF_ZLIB_MEM_USAGE] = mkf_u32(0, zlib_used_memory);
+ info[INF_MAX_ZLIB_MEM_USAGE] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxzlibmem);
+#endif
+ info[INF_TASKS] = mkf_u32(0, total_allocated_tasks());
+ info[INF_RUN_QUEUE] = mkf_u32(0, total_run_queues());
+ info[INF_IDLE_PCT] = mkf_u32(FN_AVG, clock_report_idle());
+ info[INF_NODE] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.node);
+ if (global.desc)
+ info[INF_DESCRIPTION] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.desc);
+ info[INF_STOPPING] = mkf_u32(0, stopping);
+ info[INF_JOBS] = mkf_u32(0, jobs);
+ info[INF_UNSTOPPABLE_JOBS] = mkf_u32(0, unstoppable_jobs);
+ info[INF_LISTENERS] = mkf_u32(0, listeners);
+ info[INF_ACTIVE_PEERS] = mkf_u32(0, active_peers);
+ info[INF_CONNECTED_PEERS] = mkf_u32(0, connected_peers);
+ info[INF_DROPPED_LOGS] = mkf_u32(0, dropped_logs);
+ info[INF_BUSY_POLLING] = mkf_u32(0, !!(global.tune.options & GTUNE_BUSY_POLLING));
+ info[INF_FAILED_RESOLUTIONS] = mkf_u32(0, resolv_failed_resolutions);
+ info[INF_TOTAL_BYTES_OUT] = mkf_u64(0, global.out_bytes);
+ info[INF_TOTAL_SPLICED_BYTES_OUT] = mkf_u64(0, global.spliced_out_bytes);
+ info[INF_BYTES_OUT_RATE] = mkf_u64(FN_RATE, (unsigned long long)read_freq_ctr(&global.out_32bps) * 32);
+ info[INF_DEBUG_COMMANDS_ISSUED] = mkf_u32(0, debug_commands_issued);
+ info[INF_CUM_LOG_MSGS] = mkf_u32(FN_COUNTER, cum_log_messages);
+
+ info[INF_TAINTED] = mkf_str(FO_STATUS, chunk_newstr(out));
+ chunk_appendf(out, "%#x", get_tainted());
+
+ return 1;
+}
+
+/* This function dumps information onto the stream connector's read buffer.
+ * It returns 0 as long as it does not complete, non-zero upon completion.
+ * No state is used.
+ */
+static int stats_dump_info_to_buffer(struct stconn *sc)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ int ret;
+ int current_field;
+
+ if (!stats_fill_info(info, INF_TOTAL_FIELDS, ctx->flags))
+ return 0;
+
+ chunk_reset(&trash_chunk);
+more:
+ current_field = ctx->field;
+
+ if (ctx->flags & STAT_FMT_TYPED)
+ ret = stats_dump_typed_info_fields(&trash_chunk, info, ctx);
+ else if (ctx->flags & STAT_FMT_JSON)
+ ret = stats_dump_json_info_fields(&trash_chunk, info, ctx);
+ else
+ ret = stats_dump_info_fields(&trash_chunk, info, ctx);
+
+ if (applet_putchk(appctx, &trash_chunk) == -1) {
+ /* restore previous field */
+ ctx->field = current_field;
+ return 0;
+ }
+ if (ret && ctx->field) {
+ /* partial dump */
+ goto more;
+ }
+ ctx->field = 0;
+ return 1;
+}
+
+/* This function dumps the schema onto the stream connector's read buffer.
+ * It returns 0 as long as it does not complete, non-zero upon completion.
+ * No state is used.
+ *
+ * Integer values bounded to the range [-(2**53)+1, (2**53)-1] as
+ * per the recommendation for interoperable integers in section 6 of RFC 7159.
+ */
+static void stats_dump_json_schema(struct buffer *out)
+{
+
+ int old_len = out->data;
+
+ chunk_strcat(out,
+ "{"
+ "\"$schema\":\"http://json-schema.org/draft-04/schema#\","
+ "\"oneOf\":["
+ "{"
+ "\"title\":\"Info\","
+ "\"type\":\"array\","
+ "\"items\":{"
+ "\"title\":\"InfoItem\","
+ "\"type\":\"object\","
+ "\"properties\":{"
+ "\"field\":{\"$ref\":\"#/definitions/field\"},"
+ "\"processNum\":{\"$ref\":\"#/definitions/processNum\"},"
+ "\"tags\":{\"$ref\":\"#/definitions/tags\"},"
+ "\"value\":{\"$ref\":\"#/definitions/typedValue\"}"
+ "},"
+ "\"required\":[\"field\",\"processNum\",\"tags\","
+ "\"value\"]"
+ "}"
+ "},"
+ "{"
+ "\"title\":\"Stat\","
+ "\"type\":\"array\","
+ "\"items\":{"
+ "\"title\":\"InfoItem\","
+ "\"type\":\"object\","
+ "\"properties\":{"
+ "\"objType\":{"
+ "\"enum\":[\"Frontend\",\"Backend\",\"Listener\","
+ "\"Server\",\"Unknown\"]"
+ "},"
+ "\"proxyId\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":0"
+ "},"
+ "\"id\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":0"
+ "},"
+ "\"field\":{\"$ref\":\"#/definitions/field\"},"
+ "\"processNum\":{\"$ref\":\"#/definitions/processNum\"},"
+ "\"tags\":{\"$ref\":\"#/definitions/tags\"},"
+ "\"typedValue\":{\"$ref\":\"#/definitions/typedValue\"}"
+ "},"
+ "\"required\":[\"objType\",\"proxyId\",\"id\","
+ "\"field\",\"processNum\",\"tags\","
+ "\"value\"]"
+ "}"
+ "},"
+ "{"
+ "\"title\":\"Error\","
+ "\"type\":\"object\","
+ "\"properties\":{"
+ "\"errorStr\":{"
+ "\"type\":\"string\""
+ "}"
+ "},"
+ "\"required\":[\"errorStr\"]"
+ "}"
+ "],"
+ "\"definitions\":{"
+ "\"field\":{"
+ "\"type\":\"object\","
+ "\"pos\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":0"
+ "},"
+ "\"name\":{"
+ "\"type\":\"string\""
+ "},"
+ "\"required\":[\"pos\",\"name\"]"
+ "},"
+ "\"processNum\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":1"
+ "},"
+ "\"tags\":{"
+ "\"type\":\"object\","
+ "\"origin\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"Metric\",\"Status\",\"Key\","
+ "\"Config\",\"Product\",\"Unknown\"]"
+ "},"
+ "\"nature\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"Gauge\",\"Limit\",\"Min\",\"Max\","
+ "\"Rate\",\"Counter\",\"Duration\","
+ "\"Age\",\"Time\",\"Name\",\"Output\","
+ "\"Avg\", \"Unknown\"]"
+ "},"
+ "\"scope\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"Cluster\",\"Process\",\"Service\","
+ "\"System\",\"Unknown\"]"
+ "},"
+ "\"required\":[\"origin\",\"nature\",\"scope\"]"
+ "},"
+ "\"typedValue\":{"
+ "\"type\":\"object\","
+ "\"oneOf\":["
+ "{\"$ref\":\"#/definitions/typedValue/definitions/s32Value\"},"
+ "{\"$ref\":\"#/definitions/typedValue/definitions/s64Value\"},"
+ "{\"$ref\":\"#/definitions/typedValue/definitions/u32Value\"},"
+ "{\"$ref\":\"#/definitions/typedValue/definitions/u64Value\"},"
+ "{\"$ref\":\"#/definitions/typedValue/definitions/strValue\"}"
+ "],"
+ "\"definitions\":{"
+ "\"s32Value\":{"
+ "\"properties\":{"
+ "\"type\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"s32\"]"
+ "},"
+ "\"value\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":-2147483648,"
+ "\"maximum\":2147483647"
+ "}"
+ "},"
+ "\"required\":[\"type\",\"value\"]"
+ "},"
+ "\"s64Value\":{"
+ "\"properties\":{"
+ "\"type\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"s64\"]"
+ "},"
+ "\"value\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":-9007199254740991,"
+ "\"maximum\":9007199254740991"
+ "}"
+ "},"
+ "\"required\":[\"type\",\"value\"]"
+ "},"
+ "\"u32Value\":{"
+ "\"properties\":{"
+ "\"type\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"u32\"]"
+ "},"
+ "\"value\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":0,"
+ "\"maximum\":4294967295"
+ "}"
+ "},"
+ "\"required\":[\"type\",\"value\"]"
+ "},"
+ "\"u64Value\":{"
+ "\"properties\":{"
+ "\"type\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"u64\"]"
+ "},"
+ "\"value\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":0,"
+ "\"maximum\":9007199254740991"
+ "}"
+ "},"
+ "\"required\":[\"type\",\"value\"]"
+ "},"
+ "\"strValue\":{"
+ "\"properties\":{"
+ "\"type\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"str\"]"
+ "},"
+ "\"value\":{\"type\":\"string\"}"
+ "},"
+ "\"required\":[\"type\",\"value\"]"
+ "},"
+ "\"unknownValue\":{"
+ "\"properties\":{"
+ "\"type\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":0"
+ "},"
+ "\"value\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"unknown\"]"
+ "}"
+ "},"
+ "\"required\":[\"type\",\"value\"]"
+ "}"
+ "}"
+ "}"
+ "}"
+ "}");
+
+ if (old_len == out->data) {
+ chunk_reset(out);
+ chunk_appendf(out,
+ "{\"errorStr\":\"output buffer too short\"}");
+ }
+ chunk_appendf(out, "\n");
+}
+
+/* This function dumps the schema onto the stream connector's read buffer.
+ * It returns 0 as long as it does not complete, non-zero upon completion.
+ * No state is used.
+ */
+static int stats_dump_json_schema_to_buffer(struct appctx *appctx)
+{
+
+ chunk_reset(&trash_chunk);
+
+ stats_dump_json_schema(&trash_chunk);
+
+ if (applet_putchk(appctx, &trash_chunk) == -1)
+ return 0;
+
+ return 1;
+}
+
+static int cli_parse_clear_counters(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+ struct server *sv;
+ struct listener *li;
+ struct stats_module *mod;
+ int clrall = 0;
+
+ if (strcmp(args[2], "all") == 0)
+ clrall = 1;
+
+ /* check permissions */
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER) ||
+ (clrall && !cli_has_level(appctx, ACCESS_LVL_ADMIN)))
+ return 1;
+
+ for (px = proxies_list; px; px = px->next) {
+ if (clrall) {
+ memset(&px->be_counters, 0, sizeof(px->be_counters));
+ memset(&px->fe_counters, 0, sizeof(px->fe_counters));
+ }
+ else {
+ px->be_counters.conn_max = 0;
+ px->be_counters.p.http.rps_max = 0;
+ px->be_counters.sps_max = 0;
+ px->be_counters.cps_max = 0;
+ px->be_counters.nbpend_max = 0;
+ px->be_counters.qtime_max = 0;
+ px->be_counters.ctime_max = 0;
+ px->be_counters.dtime_max = 0;
+ px->be_counters.ttime_max = 0;
+
+ px->fe_counters.conn_max = 0;
+ px->fe_counters.p.http.rps_max = 0;
+ px->fe_counters.sps_max = 0;
+ px->fe_counters.cps_max = 0;
+ }
+
+ for (sv = px->srv; sv; sv = sv->next)
+ if (clrall)
+ memset(&sv->counters, 0, sizeof(sv->counters));
+ else {
+ sv->counters.cur_sess_max = 0;
+ sv->counters.nbpend_max = 0;
+ sv->counters.sps_max = 0;
+ sv->counters.qtime_max = 0;
+ sv->counters.ctime_max = 0;
+ sv->counters.dtime_max = 0;
+ sv->counters.ttime_max = 0;
+ }
+
+ list_for_each_entry(li, &px->conf.listeners, by_fe)
+ if (li->counters) {
+ if (clrall)
+ memset(li->counters, 0, sizeof(*li->counters));
+ else
+ li->counters->conn_max = 0;
+ }
+ }
+
+ global.cps_max = 0;
+ global.sps_max = 0;
+ global.ssl_max = 0;
+ global.ssl_fe_keys_max = 0;
+ global.ssl_be_keys_max = 0;
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ if (!mod->clearable && !clrall)
+ continue;
+
+ for (px = proxies_list; px; px = px->next) {
+ enum stats_domain_px_cap mod_cap = stats_px_get_cap(mod->domain_flags);
+
+ if (px->cap & PR_CAP_FE && mod_cap & STATS_PX_CAP_FE) {
+ EXTRA_COUNTERS_INIT(px->extra_counters_fe,
+ mod,
+ mod->counters,
+ mod->counters_size);
+ }
+
+ if (px->cap & PR_CAP_BE && mod_cap & STATS_PX_CAP_BE) {
+ EXTRA_COUNTERS_INIT(px->extra_counters_be,
+ mod,
+ mod->counters,
+ mod->counters_size);
+ }
+
+ if (mod_cap & STATS_PX_CAP_SRV) {
+ for (sv = px->srv; sv; sv = sv->next) {
+ EXTRA_COUNTERS_INIT(sv->extra_counters,
+ mod,
+ mod->counters,
+ mod->counters_size);
+ }
+ }
+
+ if (mod_cap & STATS_PX_CAP_LI) {
+ list_for_each_entry(li, &px->conf.listeners, by_fe) {
+ EXTRA_COUNTERS_INIT(li->extra_counters,
+ mod,
+ mod->counters,
+ mod->counters_size);
+ }
+ }
+ }
+ }
+
+ resolv_stats_clear_counters(clrall, &stats_module_list[STATS_DOMAIN_RESOLVERS]);
+
+ memset(activity, 0, sizeof(activity));
+ return 1;
+}
+
+
+static int cli_parse_show_info(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_stat_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ int arg = 2;
+
+ ctx->scope_str = 0;
+ ctx->scope_len = 0;
+ ctx->flags = 0;
+ ctx->field = 0; /* explicit default value */
+
+ while (*args[arg]) {
+ if (strcmp(args[arg], "typed") == 0)
+ ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_TYPED;
+ else if (strcmp(args[arg], "json") == 0)
+ ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_JSON;
+ else if (strcmp(args[arg], "desc") == 0)
+ ctx->flags |= STAT_SHOW_FDESC;
+ else if (strcmp(args[arg], "float") == 0)
+ ctx->flags |= STAT_USE_FLOAT;
+ arg++;
+ }
+ return 0;
+}
+
+
+static int cli_parse_show_stat(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_stat_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ int arg = 2;
+
+ ctx->scope_str = 0;
+ ctx->scope_len = 0;
+ ctx->flags = STAT_SHNODE | STAT_SHDESC;
+
+ if ((strm_li(appctx_strm(appctx))->bind_conf->level & ACCESS_LVL_MASK) >= ACCESS_LVL_OPER)
+ ctx->flags |= STAT_SHLGNDS;
+
+ /* proxy is the default domain */
+ ctx->domain = STATS_DOMAIN_PROXY;
+ if (strcmp(args[arg], "domain") == 0) {
+ ++args;
+
+ if (strcmp(args[arg], "proxy") == 0) {
+ ++args;
+ } else if (strcmp(args[arg], "resolvers") == 0) {
+ ctx->domain = STATS_DOMAIN_RESOLVERS;
+ ++args;
+ } else {
+ return cli_err(appctx, "Invalid statistics domain.\n");
+ }
+ }
+
+ if (ctx->domain == STATS_DOMAIN_PROXY
+ && *args[arg] && *args[arg+1] && *args[arg+2]) {
+ struct proxy *px;
+
+ px = proxy_find_by_name(args[arg], 0, 0);
+ if (px)
+ ctx->iid = px->uuid;
+ else
+ ctx->iid = atoi(args[arg]);
+
+ if (!ctx->iid)
+ return cli_err(appctx, "No such proxy.\n");
+
+ ctx->flags |= STAT_BOUND;
+ ctx->type = atoi(args[arg+1]);
+ ctx->sid = atoi(args[arg+2]);
+ arg += 3;
+ }
+
+ while (*args[arg]) {
+ if (strcmp(args[arg], "typed") == 0)
+ ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_TYPED;
+ else if (strcmp(args[arg], "json") == 0)
+ ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_JSON;
+ else if (strcmp(args[arg], "desc") == 0)
+ ctx->flags |= STAT_SHOW_FDESC;
+ else if (strcmp(args[arg], "no-maint") == 0)
+ ctx->flags |= STAT_HIDE_MAINT;
+ else if (strcmp(args[arg], "up") == 0)
+ ctx->flags |= STAT_HIDE_DOWN;
+ arg++;
+ }
+
+ return 0;
+}
+
+static int cli_io_handler_dump_info(struct appctx *appctx)
+{
+ trash_chunk = b_make(trash.area, trash.size, 0, 0);
+ return stats_dump_info_to_buffer(appctx_sc(appctx));
+}
+
+/* This I/O handler runs as an applet embedded in a stream connector. It is
+ * used to send raw stats over a socket.
+ */
+static int cli_io_handler_dump_stat(struct appctx *appctx)
+{
+ trash_chunk = b_make(trash.area, trash.size, 0, 0);
+ return stats_dump_stat_to_buffer(appctx_sc(appctx), NULL, NULL);
+}
+
+static int cli_io_handler_dump_json_schema(struct appctx *appctx)
+{
+ trash_chunk = b_make(trash.area, trash.size, 0, 0);
+ return stats_dump_json_schema_to_buffer(appctx);
+}
+
+int stats_allocate_proxy_counters_internal(struct extra_counters **counters,
+ int type, int px_cap)
+{
+ struct stats_module *mod;
+
+ EXTRA_COUNTERS_REGISTER(counters, type, alloc_failed);
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ if (!(stats_px_get_cap(mod->domain_flags) & px_cap))
+ continue;
+
+ EXTRA_COUNTERS_ADD(mod, *counters, mod->counters, mod->counters_size);
+ }
+
+ EXTRA_COUNTERS_ALLOC(*counters, alloc_failed);
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ if (!(stats_px_get_cap(mod->domain_flags) & px_cap))
+ continue;
+
+ EXTRA_COUNTERS_INIT(*counters, mod, mod->counters, mod->counters_size);
+ }
+
+ return 1;
+
+ alloc_failed:
+ return 0;
+}
+
+/* Initialize and allocate all extra counters for a proxy and its attached
+ * servers/listeners with all already registered stats module
+ */
+int stats_allocate_proxy_counters(struct proxy *px)
+{
+ struct server *sv;
+ struct listener *li;
+
+ if (px->cap & PR_CAP_FE) {
+ if (!stats_allocate_proxy_counters_internal(&px->extra_counters_fe,
+ COUNTERS_FE,
+ STATS_PX_CAP_FE)) {
+ return 0;
+ }
+ }
+
+ if (px->cap & PR_CAP_BE) {
+ if (!stats_allocate_proxy_counters_internal(&px->extra_counters_be,
+ COUNTERS_BE,
+ STATS_PX_CAP_BE)) {
+ return 0;
+ }
+ }
+
+ for (sv = px->srv; sv; sv = sv->next) {
+ if (!stats_allocate_proxy_counters_internal(&sv->extra_counters,
+ COUNTERS_SV,
+ STATS_PX_CAP_SRV)) {
+ return 0;
+ }
+ }
+
+ list_for_each_entry(li, &px->conf.listeners, by_fe) {
+ if (!stats_allocate_proxy_counters_internal(&li->extra_counters,
+ COUNTERS_LI,
+ STATS_PX_CAP_LI)) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+void stats_register_module(struct stats_module *m)
+{
+ const uint8_t domain = stats_get_domain(m->domain_flags);
+
+ LIST_APPEND(&stats_module_list[domain], &m->list);
+ stat_count[domain] += m->stats_count;
+}
+
+static int allocate_stats_px_postcheck(void)
+{
+ struct stats_module *mod;
+ size_t i = ST_F_TOTAL_FIELDS;
+ int err_code = 0;
+ struct proxy *px;
+
+ stat_count[STATS_DOMAIN_PROXY] += ST_F_TOTAL_FIELDS;
+
+ stat_f[STATS_DOMAIN_PROXY] = malloc(stat_count[STATS_DOMAIN_PROXY] * sizeof(struct name_desc));
+ if (!stat_f[STATS_DOMAIN_PROXY]) {
+ ha_alert("stats: cannot allocate all fields for proxy statistics\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ return err_code;
+ }
+
+ memcpy(stat_f[STATS_DOMAIN_PROXY], stat_fields,
+ ST_F_TOTAL_FIELDS * sizeof(struct name_desc));
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ memcpy(stat_f[STATS_DOMAIN_PROXY] + i,
+ mod->stats,
+ mod->stats_count * sizeof(struct name_desc));
+ i += mod->stats_count;
+ }
+
+ for (px = proxies_list; px; px = px->next) {
+ if (!stats_allocate_proxy_counters(px)) {
+ ha_alert("stats: cannot allocate all counters for proxy statistics\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ return err_code;
+ }
+ }
+
+ /* wait per-thread alloc to perform corresponding stat_l allocation */
+
+ return err_code;
+}
+
+REGISTER_CONFIG_POSTPARSER("allocate-stats-px", allocate_stats_px_postcheck);
+
+static int allocate_stats_rslv_postcheck(void)
+{
+ struct stats_module *mod;
+ size_t i = 0;
+ int err_code = 0;
+
+ stat_f[STATS_DOMAIN_RESOLVERS] = malloc(stat_count[STATS_DOMAIN_RESOLVERS] * sizeof(struct name_desc));
+ if (!stat_f[STATS_DOMAIN_RESOLVERS]) {
+ ha_alert("stats: cannot allocate all fields for resolver statistics\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ return err_code;
+ }
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_RESOLVERS], list) {
+ memcpy(stat_f[STATS_DOMAIN_RESOLVERS] + i,
+ mod->stats,
+ mod->stats_count * sizeof(struct name_desc));
+ i += mod->stats_count;
+ }
+
+ if (!resolv_allocate_counters(&stats_module_list[STATS_DOMAIN_RESOLVERS])) {
+ ha_alert("stats: cannot allocate all counters for resolver statistics\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ return err_code;
+ }
+
+ /* wait per-thread alloc to perform corresponding stat_l allocation */
+
+ return err_code;
+}
+
+REGISTER_CONFIG_POSTPARSER("allocate-stats-resolver", allocate_stats_rslv_postcheck);
+
+static int allocate_stat_lines_per_thread(void)
+{
+ int domains[] = { STATS_DOMAIN_PROXY, STATS_DOMAIN_RESOLVERS }, i;
+
+ for (i = 0; i < STATS_DOMAIN_COUNT; ++i) {
+ const int domain = domains[i];
+
+ stat_l[domain] = malloc(stat_count[domain] * sizeof(struct field));
+ if (!stat_l[domain])
+ return 0;
+ }
+ return 1;
+}
+
+REGISTER_PER_THREAD_ALLOC(allocate_stat_lines_per_thread);
+
+static int allocate_trash_counters(void)
+{
+ struct stats_module *mod;
+ int domains[] = { STATS_DOMAIN_PROXY, STATS_DOMAIN_RESOLVERS }, i;
+ size_t max_counters_size = 0;
+
+ /* calculate the greatest counters used by any stats modules */
+ for (i = 0; i < STATS_DOMAIN_COUNT; ++i) {
+ list_for_each_entry(mod, &stats_module_list[domains[i]], list) {
+ max_counters_size = mod->counters_size > max_counters_size ?
+ mod->counters_size : max_counters_size;
+ }
+ }
+
+ /* allocate the trash with the size of the greatest counters */
+ if (max_counters_size) {
+ trash_counters = malloc(max_counters_size);
+ if (!trash_counters) {
+ ha_alert("stats: cannot allocate trash counters for statistics\n");
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+REGISTER_PER_THREAD_ALLOC(allocate_trash_counters);
+
+static void deinit_stat_lines_per_thread(void)
+{
+ int domains[] = { STATS_DOMAIN_PROXY, STATS_DOMAIN_RESOLVERS }, i;
+
+ for (i = 0; i < STATS_DOMAIN_COUNT; ++i) {
+ const int domain = domains[i];
+
+ ha_free(&stat_l[domain]);
+ }
+}
+
+
+REGISTER_PER_THREAD_FREE(deinit_stat_lines_per_thread);
+
+static void deinit_stats(void)
+{
+ int domains[] = { STATS_DOMAIN_PROXY, STATS_DOMAIN_RESOLVERS }, i;
+
+ for (i = 0; i < STATS_DOMAIN_COUNT; ++i) {
+ const int domain = domains[i];
+
+ if (stat_f[domain])
+ free(stat_f[domain]);
+ }
+}
+
+REGISTER_POST_DEINIT(deinit_stats);
+
+static void free_trash_counters(void)
+{
+ if (trash_counters)
+ free(trash_counters);
+}
+
+REGISTER_PER_THREAD_FREE(free_trash_counters);
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "clear", "counters", NULL }, "clear counters [all] : clear max statistics counters (or all counters)", cli_parse_clear_counters, NULL, NULL },
+ { { "show", "info", NULL }, "show info [desc|json|typed|float]* : report information about the running process", cli_parse_show_info, cli_io_handler_dump_info, NULL },
+ { { "show", "stat", NULL }, "show stat [desc|json|no-maint|typed|up]*: report counters for each proxy and server", cli_parse_show_stat, cli_io_handler_dump_stat, NULL },
+ { { "show", "schema", "json", NULL }, "show schema json : report schema used for stats", NULL, cli_io_handler_dump_json_schema, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+struct applet http_stats_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<STATS>", /* used for logging */
+ .fct = http_stats_io_handler,
+ .release = NULL,
+};
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/stconn.c b/src/stconn.c
new file mode 100644
index 0000000..e6436a9
--- /dev/null
+++ b/src/stconn.c
@@ -0,0 +1,2012 @@
+/*
+ * stream connector management functions
+ *
+ * Copyright 2021 Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/connection.h>
+#include <haproxy/check.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/pipe.h>
+#include <haproxy/pool.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+
+DECLARE_POOL(pool_head_connstream, "stconn", sizeof(struct stconn));
+DECLARE_POOL(pool_head_sedesc, "sedesc", sizeof(struct sedesc));
+
+/* functions used by default on a detached stream connector */
+static void sc_app_shutr(struct stconn *sc);
+static void sc_app_shutw(struct stconn *sc);
+static void sc_app_chk_rcv(struct stconn *sc);
+static void sc_app_chk_snd(struct stconn *sc);
+
+/* functions used on a mux-based stream connector */
+static void sc_app_shutr_conn(struct stconn *sc);
+static void sc_app_shutw_conn(struct stconn *sc);
+static void sc_app_chk_rcv_conn(struct stconn *sc);
+static void sc_app_chk_snd_conn(struct stconn *sc);
+
+/* functions used on an applet-based stream connector */
+static void sc_app_shutr_applet(struct stconn *sc);
+static void sc_app_shutw_applet(struct stconn *sc);
+static void sc_app_chk_rcv_applet(struct stconn *sc);
+static void sc_app_chk_snd_applet(struct stconn *sc);
+
+static int sc_conn_process(struct stconn *sc);
+static int sc_conn_recv(struct stconn *sc);
+static int sc_conn_send(struct stconn *sc);
+static int sc_applet_process(struct stconn *sc);
+
+/* stream connector operations for connections */
+struct sc_app_ops sc_app_conn_ops = {
+ .chk_rcv = sc_app_chk_rcv_conn,
+ .chk_snd = sc_app_chk_snd_conn,
+ .shutr = sc_app_shutr_conn,
+ .shutw = sc_app_shutw_conn,
+ .wake = sc_conn_process,
+ .name = "STRM",
+};
+
+/* stream connector operations for embedded tasks */
+struct sc_app_ops sc_app_embedded_ops = {
+ .chk_rcv = sc_app_chk_rcv,
+ .chk_snd = sc_app_chk_snd,
+ .shutr = sc_app_shutr,
+ .shutw = sc_app_shutw,
+ .wake = NULL, /* may never be used */
+ .name = "NONE", /* may never be used */
+};
+
+/* stream connector operations for applets */
+struct sc_app_ops sc_app_applet_ops = {
+ .chk_rcv = sc_app_chk_rcv_applet,
+ .chk_snd = sc_app_chk_snd_applet,
+ .shutr = sc_app_shutr_applet,
+ .shutw = sc_app_shutw_applet,
+ .wake = sc_applet_process,
+ .name = "STRM",
+};
+
+/* stream connector for health checks on connections */
+struct sc_app_ops sc_app_check_ops = {
+ .chk_rcv = NULL,
+ .chk_snd = NULL,
+ .shutr = NULL,
+ .shutw = NULL,
+ .wake = wake_srv_chk,
+ .name = "CHCK",
+};
+
+/* Initializes an endpoint */
+void sedesc_init(struct sedesc *sedesc)
+{
+ sedesc->se = NULL;
+ sedesc->conn = NULL;
+ sedesc->sc = NULL;
+ se_fl_setall(sedesc, SE_FL_NONE);
+}
+
+/* Tries to alloc an endpoint and initialize it. Returns NULL on failure. */
+struct sedesc *sedesc_new()
+{
+ struct sedesc *sedesc;
+
+ sedesc = pool_alloc(pool_head_sedesc);
+ if (unlikely(!sedesc))
+ return NULL;
+
+ sedesc_init(sedesc);
+ return sedesc;
+}
+
+/* Releases an endpoint. It is the caller responsibility to be sure it is safe
+ * and it is not shared with another entity
+ */
+void sedesc_free(struct sedesc *sedesc)
+{
+ pool_free(pool_head_sedesc, sedesc);
+}
+
+/* Tries to allocate a new stconn and initialize its main fields. On
+ * failure, nothing is allocated and NULL is returned. It is an internal
+ * function. The caller must, at least, set the SE_FL_ORPHAN or SE_FL_DETACHED
+ * flag.
+ */
+static struct stconn *sc_new(struct sedesc *sedesc)
+{
+ struct stconn *sc;
+
+ sc = pool_alloc(pool_head_connstream);
+
+ if (unlikely(!sc))
+ goto alloc_error;
+
+ sc->obj_type = OBJ_TYPE_SC;
+ sc->flags = SC_FL_NONE;
+ sc->state = SC_ST_INI;
+ sc->hcto = TICK_ETERNITY;
+ sc->app = NULL;
+ sc->app_ops = NULL;
+ sc->src = NULL;
+ sc->dst = NULL;
+ sc->wait_event.tasklet = NULL;
+ sc->wait_event.events = 0;
+
+ /* If there is no endpoint, allocate a new one now */
+ if (!sedesc) {
+ sedesc = sedesc_new();
+ if (unlikely(!sedesc))
+ goto alloc_error;
+ }
+ sc->sedesc = sedesc;
+ sedesc->sc = sc;
+
+ return sc;
+
+ alloc_error:
+ pool_free(pool_head_connstream, sc);
+ return NULL;
+}
+
+/* Creates a new stream connector and its associated stream from a mux. <sd> must
+ * be defined. It returns NULL on error. On success, the new stream connector is
+ * returned. In this case, SE_FL_ORPHAN flag is removed.
+ */
+struct stconn *sc_new_from_endp(struct sedesc *sd, struct session *sess, struct buffer *input)
+{
+ struct stconn *sc;
+
+ sc = sc_new(sd);
+ if (unlikely(!sc))
+ return NULL;
+ if (unlikely(!stream_new(sess, sc, input))) {
+ sd->sc = NULL;
+ if (sc->sedesc != sd) {
+ /* none was provided so sc_new() allocated one */
+ sedesc_free(sc->sedesc);
+ }
+ pool_free(pool_head_connstream, sc);
+ se_fl_set(sd, SE_FL_ORPHAN);
+ return NULL;
+ }
+ se_fl_clr(sd, SE_FL_ORPHAN);
+ return sc;
+}
+
+/* Creates a new stream connector from an stream. There is no endpoint here, thus it
+ * will be created by sc_new(). So the SE_FL_DETACHED flag is set. It returns
+ * NULL on error. On success, the new stream connector is returned.
+ */
+struct stconn *sc_new_from_strm(struct stream *strm, unsigned int flags)
+{
+ struct stconn *sc;
+
+ sc = sc_new(NULL);
+ if (unlikely(!sc))
+ return NULL;
+ sc->flags |= flags;
+ sc_ep_set(sc, SE_FL_DETACHED);
+ sc->app = &strm->obj_type;
+ sc->app_ops = &sc_app_embedded_ops;
+ return sc;
+}
+
+/* Creates a new stream connector from an health-check. There is no endpoint here,
+ * thus it will be created by sc_new(). So the SE_FL_DETACHED flag is set. It
+ * returns NULL on error. On success, the new stream connector is returned.
+ */
+struct stconn *sc_new_from_check(struct check *check, unsigned int flags)
+{
+ struct stconn *sc;
+
+ sc = sc_new(NULL);
+ if (unlikely(!sc))
+ return NULL;
+ sc->flags |= flags;
+ sc_ep_set(sc, SE_FL_DETACHED);
+ sc->app = &check->obj_type;
+ sc->app_ops = &sc_app_check_ops;
+ return sc;
+}
+
+/* Releases a stconn previously allocated by sc_new(), as well as its
+ * endpoint, if it exists. This function is called internally or on error path.
+ */
+void sc_free(struct stconn *sc)
+{
+ sockaddr_free(&sc->src);
+ sockaddr_free(&sc->dst);
+ if (sc->sedesc) {
+ BUG_ON(!sc_ep_test(sc, SE_FL_DETACHED));
+ sedesc_free(sc->sedesc);
+ }
+ if (sc->wait_event.tasklet)
+ tasklet_free(sc->wait_event.tasklet);
+ pool_free(pool_head_connstream, sc);
+}
+
+/* Conditionally removes a stream connector if it is detached and if there is no app
+ * layer defined. Except on error path, this one must be used. if release, the
+ * pointer on the SC is set to NULL.
+ */
+static void sc_free_cond(struct stconn **scp)
+{
+ struct stconn *sc = *scp;
+
+ if (!sc->app && (!sc->sedesc || sc_ep_test(sc, SE_FL_DETACHED))) {
+ sc_free(sc);
+ *scp = NULL;
+ }
+}
+
+
+/* Attaches a stconn to a mux endpoint and sets the endpoint ctx. Returns
+ * -1 on error and 0 on success. SE_FL_DETACHED flag is removed. This function is
+ * called from a mux when it is attached to a stream or a health-check.
+ */
+int sc_attach_mux(struct stconn *sc, void *sd, void *ctx)
+{
+ struct connection *conn = ctx;
+ struct sedesc *sedesc = sc->sedesc;
+
+ if (sc_strm(sc)) {
+ if (!sc->wait_event.tasklet) {
+ sc->wait_event.tasklet = tasklet_new();
+ if (!sc->wait_event.tasklet)
+ return -1;
+ sc->wait_event.tasklet->process = sc_conn_io_cb;
+ sc->wait_event.tasklet->context = sc;
+ sc->wait_event.events = 0;
+ }
+
+ sc->app_ops = &sc_app_conn_ops;
+ }
+ else if (sc_check(sc)) {
+ if (!sc->wait_event.tasklet) {
+ sc->wait_event.tasklet = tasklet_new();
+ if (!sc->wait_event.tasklet)
+ return -1;
+ sc->wait_event.tasklet->process = srv_chk_io_cb;
+ sc->wait_event.tasklet->context = sc;
+ sc->wait_event.events = 0;
+ }
+
+ sc->app_ops = &sc_app_check_ops;
+ }
+
+ sedesc->se = sd;
+ sedesc->conn = ctx;
+ se_fl_set(sedesc, SE_FL_T_MUX);
+ se_fl_clr(sedesc, SE_FL_DETACHED);
+ if (!conn->ctx)
+ conn->ctx = sc;
+ return 0;
+}
+
+/* Attaches a stconn to an applet endpoint and sets the endpoint
+ * ctx. Returns -1 on error and 0 on success. SE_FL_DETACHED flag is
+ * removed. This function is called by a stream when a backend applet is
+ * registered.
+ */
+static void sc_attach_applet(struct stconn *sc, void *sd)
+{
+ sc->sedesc->se = sd;
+ sc_ep_set(sc, SE_FL_T_APPLET);
+ sc_ep_clr(sc, SE_FL_DETACHED);
+ if (sc_strm(sc))
+ sc->app_ops = &sc_app_applet_ops;
+}
+
+/* Attaches a stconn to a app layer and sets the relevant
+ * callbacks. Returns -1 on error and 0 on success. SE_FL_ORPHAN flag is
+ * removed. This function is called by a stream when it is created to attach it
+ * on the stream connector on the client side.
+ */
+int sc_attach_strm(struct stconn *sc, struct stream *strm)
+{
+ sc->app = &strm->obj_type;
+ sc_ep_clr(sc, SE_FL_ORPHAN);
+ if (sc_ep_test(sc, SE_FL_T_MUX)) {
+ sc->wait_event.tasklet = tasklet_new();
+ if (!sc->wait_event.tasklet)
+ return -1;
+ sc->wait_event.tasklet->process = sc_conn_io_cb;
+ sc->wait_event.tasklet->context = sc;
+ sc->wait_event.events = 0;
+
+ sc->app_ops = &sc_app_conn_ops;
+ }
+ else if (sc_ep_test(sc, SE_FL_T_APPLET)) {
+ sc->app_ops = &sc_app_applet_ops;
+ }
+ else {
+ sc->app_ops = &sc_app_embedded_ops;
+ }
+ return 0;
+}
+
+/* Detaches the stconn from the endpoint, if any. For a connecrion, if a
+ * mux owns the connection ->detach() callback is called. Otherwise, it means
+ * the stream connector owns the connection. In this case the connection is closed
+ * and released. For an applet, the appctx is released. If still allocated, the
+ * endpoint is reset and flag as detached. If the app layer is also detached,
+ * the stream connector is released.
+ */
+static void sc_detach_endp(struct stconn **scp)
+{
+ struct stconn *sc = *scp;
+
+ if (!sc)
+ return;
+
+ if (!sc->sedesc)
+ goto reset_cs;
+
+ if (sc_ep_test(sc, SE_FL_T_MUX)) {
+ struct connection *conn = __sc_conn(sc);
+ struct sedesc *sedesc = sc->sedesc;
+
+ if (conn->mux) {
+ if (sc->wait_event.events != 0)
+ conn->mux->unsubscribe(sc, sc->wait_event.events, &sc->wait_event);
+ se_fl_set(sedesc, SE_FL_ORPHAN);
+ sedesc->sc = NULL;
+ sc->sedesc = NULL;
+ conn->mux->detach(sedesc);
+ }
+ else {
+ /* It's too early to have a mux, let's just destroy
+ * the connection
+ */
+ conn_stop_tracking(conn);
+ conn_full_close(conn);
+ if (conn->destroy_cb)
+ conn->destroy_cb(conn);
+ conn_free(conn);
+ }
+ }
+ else if (sc_ep_test(sc, SE_FL_T_APPLET)) {
+ struct appctx *appctx = __sc_appctx(sc);
+
+ sc_ep_set(sc, SE_FL_ORPHAN);
+ sc->sedesc->sc = NULL;
+ sc->sedesc = NULL;
+ appctx_shut(appctx);
+ appctx_free(appctx);
+ }
+
+ if (sc->sedesc) {
+ /* the SD wasn't used and can be recycled */
+ sc->sedesc->se = NULL;
+ sc->sedesc->conn = NULL;
+ sc->sedesc->flags = 0;
+ sc_ep_set(sc, SE_FL_DETACHED);
+ }
+
+ reset_cs:
+ /* FIXME: Rest SC for now but must be reviewed. SC flags are only
+ * connection related for now but this will evolved
+ */
+ sc->flags &= SC_FL_ISBACK;
+ if (sc_strm(sc))
+ sc->app_ops = &sc_app_embedded_ops;
+ else
+ sc->app_ops = NULL;
+ sc_free_cond(scp);
+}
+
+/* Detaches the stconn from the app layer. If there is no endpoint attached
+ * to the stconn
+ */
+static void sc_detach_app(struct stconn **scp)
+{
+ struct stconn *sc = *scp;
+
+ if (!sc)
+ return;
+
+ sc->app = NULL;
+ sc->app_ops = NULL;
+ sockaddr_free(&sc->src);
+ sockaddr_free(&sc->dst);
+
+ if (sc->wait_event.tasklet)
+ tasklet_free(sc->wait_event.tasklet);
+ sc->wait_event.tasklet = NULL;
+ sc->wait_event.events = 0;
+ sc_free_cond(scp);
+}
+
+/* Destroy the stconn. It is detached from its endpoint and its
+ * application. After this call, the stconn must be considered as released.
+ */
+void sc_destroy(struct stconn *sc)
+{
+ sc_detach_endp(&sc);
+ sc_detach_app(&sc);
+ BUG_ON_HOT(sc);
+}
+
+/* Resets the stream connector endpoint. It happens when the app layer want to renew
+ * its endpoint. For a connection retry for instance. If a mux or an applet is
+ * attached, a new endpoint is created. Returns -1 on error and 0 on success.
+ *
+ * Only SE_FL_ERROR flag is removed on the endpoint. Orther flags are preserved.
+ * It is the caller responsibility to remove other flags if needed.
+ */
+int sc_reset_endp(struct stconn *sc)
+{
+ struct sedesc *new_sd;
+
+ BUG_ON(!sc->app);
+
+ sc_ep_clr(sc, SE_FL_ERROR);
+ if (!__sc_endp(sc)) {
+ /* endpoint not attached or attached to a mux with no
+ * target. Thus the endpoint will not be release but just
+ * reset. The app is still attached, the sc will not be
+ * released.
+ */
+ sc_detach_endp(&sc);
+ return 0;
+ }
+
+ /* allocate the new endpoint first to be able to set error if it
+ * fails */
+ new_sd = sedesc_new();
+ if (!unlikely(new_sd)) {
+ sc_ep_set(sc, SE_FL_ERROR);
+ return -1;
+ }
+
+ /* The app is still attached, the sc will not be released */
+ sc_detach_endp(&sc);
+ BUG_ON(!sc);
+ BUG_ON(sc->sedesc);
+ sc->sedesc = new_sd;
+ sc->sedesc->sc = sc;
+ sc_ep_set(sc, SE_FL_DETACHED);
+ return 0;
+}
+
+
+/* Create an applet to handle a stream connector as a new appctx. The SC will
+ * wake it up every time it is solicited. The appctx must be deleted by the task
+ * handler using sc_detach_endp(), possibly from within the function itself.
+ * It also pre-initializes the applet's context and returns it (or NULL in case
+ * it could not be allocated).
+ */
+struct appctx *sc_applet_create(struct stconn *sc, struct applet *app)
+{
+ struct appctx *appctx;
+
+ DPRINTF(stderr, "registering handler %p for sc %p (was %p)\n", app, sc, sc_strm_task(sc));
+
+ appctx = appctx_new_here(app, sc->sedesc);
+ if (!appctx)
+ return NULL;
+ sc_attach_applet(sc, appctx);
+ appctx->t->nice = __sc_strm(sc)->task->nice;
+ applet_need_more_data(appctx);
+ appctx_wakeup(appctx);
+
+ sc->state = SC_ST_RDY;
+ return appctx;
+}
+
+/* Conditionnaly forward the close to the wirte side. It return 1 if it can be
+ * forwarded. It is the caller responsibility to forward the close to the write
+ * side. Otherwise, 0 is returned. In this case, CF_SHUTW_NOW flag may be set on
+ * the channel if we are only waiting for the outgoing data to be flushed.
+ */
+static inline int sc_cond_forward_shutw(struct stconn *sc)
+{
+ /* The close must not be forwarded */
+ if (!(sc_ic(sc)->flags & CF_SHUTR) || !(sc->flags & SC_FL_NOHALF))
+ return 0;
+
+ if (!channel_is_empty(sc_ic(sc))) {
+ /* the close to the write side cannot be forwarded now because
+ * we should flush outgoing data first. But instruct the output
+ * channel it should be done ASAP.
+ */
+ channel_shutw_now(sc_oc(sc));
+ return 0;
+ }
+
+ /* the close can be immediately forwarded to the write side */
+ return 1;
+}
+
+/*
+ * This function performs a shutdown-read on a detached stream connector in a
+ * connected or init state (it does nothing for other states). It either shuts
+ * the read side or marks itself as closed. The buffer flags are updated to
+ * reflect the new state. If the stream connector has SC_FL_NOHALF, we also
+ * forward the close to the write side. The owner task is woken up if it exists.
+ */
+static void sc_app_shutr(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+
+ if (ic->flags & CF_SHUTR)
+ return;
+ ic->flags |= CF_SHUTR;
+ ic->rex = TICK_ETERNITY;
+
+ if (!sc_state_in(sc->state, SC_SB_CON|SC_SB_RDY|SC_SB_EST))
+ return;
+
+ if (sc_oc(sc)->flags & CF_SHUTW) {
+ sc->state = SC_ST_DIS;
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ }
+ else if (sc_cond_forward_shutw(sc))
+ return sc_app_shutw(sc);
+
+ /* note that if the task exists, it must unregister itself once it runs */
+ if (!(sc->flags & SC_FL_DONT_WAKE))
+ task_wakeup(sc_strm_task(sc), TASK_WOKEN_IO);
+}
+
+/*
+ * This function performs a shutdown-write on a detached stream connector in a
+ * connected or init state (it does nothing for other states). It either shuts
+ * the write side or marks itself as closed. The buffer flags are updated to
+ * reflect the new state. It does also close everything if the SC was marked as
+ * being in error state. The owner task is woken up if it exists.
+ */
+static void sc_app_shutw(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+ struct channel *oc = sc_oc(sc);
+
+ oc->flags &= ~CF_SHUTW_NOW;
+ if (oc->flags & CF_SHUTW)
+ return;
+ oc->flags |= CF_SHUTW;
+ oc->wex = TICK_ETERNITY;
+
+ if (tick_isset(sc->hcto)) {
+ ic->rto = sc->hcto;
+ ic->rex = tick_add(now_ms, ic->rto);
+ }
+
+ switch (sc->state) {
+ case SC_ST_RDY:
+ case SC_ST_EST:
+ /* we have to shut before closing, otherwise some short messages
+ * may never leave the system, especially when there are remaining
+ * unread data in the socket input buffer, or when nolinger is set.
+ * However, if SC_FL_NOLINGER is explicitly set, we know there is
+ * no risk so we close both sides immediately.
+ */
+ if (!sc_ep_test(sc, SE_FL_ERROR) && !(sc->flags & SC_FL_NOLINGER) &&
+ !(ic->flags & (CF_SHUTR|CF_DONT_READ)))
+ return;
+
+ /* fall through */
+ case SC_ST_CON:
+ case SC_ST_CER:
+ case SC_ST_QUE:
+ case SC_ST_TAR:
+ /* Note that none of these states may happen with applets */
+ sc->state = SC_ST_DIS;
+ /* fall through */
+ default:
+ sc->flags &= ~SC_FL_NOLINGER;
+ ic->flags |= CF_SHUTR;
+ ic->rex = TICK_ETERNITY;
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ }
+
+ /* note that if the task exists, it must unregister itself once it runs */
+ if (!(sc->flags & SC_FL_DONT_WAKE))
+ task_wakeup(sc_strm_task(sc), TASK_WOKEN_IO);
+}
+
+/* default chk_rcv function for scheduled tasks */
+static void sc_app_chk_rcv(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+
+ DPRINTF(stderr, "%s: sc=%p, sc->state=%d ic->flags=%08x oc->flags=%08x\n",
+ __FUNCTION__,
+ sc, sc->state, ic->flags, sc_oc(sc)->flags);
+
+ if (ic->pipe) {
+ /* stop reading */
+ sc_need_room(sc);
+ }
+ else {
+ /* (re)start reading */
+ if (!(sc->flags & SC_FL_DONT_WAKE))
+ task_wakeup(sc_strm_task(sc), TASK_WOKEN_IO);
+ }
+}
+
+/* default chk_snd function for scheduled tasks */
+static void sc_app_chk_snd(struct stconn *sc)
+{
+ struct channel *oc = sc_oc(sc);
+
+ DPRINTF(stderr, "%s: sc=%p, sc->state=%d ic->flags=%08x oc->flags=%08x\n",
+ __FUNCTION__,
+ sc, sc->state, sc_ic(sc)->flags, oc->flags);
+
+ if (unlikely(sc->state != SC_ST_EST || (oc->flags & CF_SHUTW)))
+ return;
+
+ if (!sc_ep_test(sc, SE_FL_WAIT_DATA) || /* not waiting for data */
+ channel_is_empty(oc)) /* called with nothing to send ! */
+ return;
+
+ /* Otherwise there are remaining data to be sent in the buffer,
+ * so we tell the handler.
+ */
+ sc_ep_clr(sc, SE_FL_WAIT_DATA);
+ if (!tick_isset(oc->wex))
+ oc->wex = tick_add_ifset(now_ms, oc->wto);
+
+ if (!(sc->flags & SC_FL_DONT_WAKE))
+ task_wakeup(sc_strm_task(sc), TASK_WOKEN_IO);
+}
+
+/*
+ * This function performs a shutdown-read on a stream connector attached to
+ * a connection in a connected or init state (it does nothing for other
+ * states). It either shuts the read side or marks itself as closed. The buffer
+ * flags are updated to reflect the new state. If the stream connector has
+ * SC_FL_NOHALF, we also forward the close to the write side. If a control
+ * layer is defined, then it is supposed to be a socket layer and file
+ * descriptors are then shutdown or closed accordingly. The function
+ * automatically disables polling if needed.
+ */
+static void sc_app_shutr_conn(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+
+ BUG_ON(!sc_conn(sc));
+
+ if (ic->flags & CF_SHUTR)
+ return;
+ ic->flags |= CF_SHUTR;
+ ic->rex = TICK_ETERNITY;
+
+ if (!sc_state_in(sc->state, SC_SB_CON|SC_SB_RDY|SC_SB_EST))
+ return;
+
+ if (sc_oc(sc)->flags & CF_SHUTW) {
+ sc_conn_shut(sc);
+ sc->state = SC_ST_DIS;
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ }
+ else if (sc_cond_forward_shutw(sc))
+ return sc_app_shutw_conn(sc);
+}
+
+/*
+ * This function performs a shutdown-write on a stream connector attached to
+ * a connection in a connected or init state (it does nothing for other
+ * states). It either shuts the write side or marks itself as closed. The
+ * buffer flags are updated to reflect the new state. It does also close
+ * everything if the SC was marked as being in error state. If there is a
+ * data-layer shutdown, it is called.
+ */
+static void sc_app_shutw_conn(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+ struct channel *oc = sc_oc(sc);
+
+ BUG_ON(!sc_conn(sc));
+
+ oc->flags &= ~CF_SHUTW_NOW;
+ if (oc->flags & CF_SHUTW)
+ return;
+ oc->flags |= CF_SHUTW;
+ oc->wex = TICK_ETERNITY;
+
+ if (tick_isset(sc->hcto)) {
+ ic->rto = sc->hcto;
+ ic->rex = tick_add(now_ms, ic->rto);
+ }
+
+ switch (sc->state) {
+ case SC_ST_RDY:
+ case SC_ST_EST:
+ /* we have to shut before closing, otherwise some short messages
+ * may never leave the system, especially when there are remaining
+ * unread data in the socket input buffer, or when nolinger is set.
+ * However, if SC_FL_NOLINGER is explicitly set, we know there is
+ * no risk so we close both sides immediately.
+ */
+
+ if (sc_ep_test(sc, SE_FL_ERROR)) {
+ /* quick close, the socket is already shut anyway */
+ }
+ else if (sc->flags & SC_FL_NOLINGER) {
+ /* unclean data-layer shutdown, typically an aborted request
+ * or a forwarded shutdown from a client to a server due to
+ * option abortonclose. No need for the TLS layer to try to
+ * emit a shutdown message.
+ */
+ sc_conn_shutw(sc, CO_SHW_SILENT);
+ }
+ else {
+ /* clean data-layer shutdown. This only happens on the
+ * frontend side, or on the backend side when forwarding
+ * a client close in TCP mode or in HTTP TUNNEL mode
+ * while option abortonclose is set. We want the TLS
+ * layer to try to signal it to the peer before we close.
+ */
+ sc_conn_shutw(sc, CO_SHW_NORMAL);
+
+ if (!(ic->flags & (CF_SHUTR|CF_DONT_READ)))
+ return;
+ }
+
+ /* fall through */
+ case SC_ST_CON:
+ /* we may have to close a pending connection, and mark the
+ * response buffer as shutr
+ */
+ sc_conn_shut(sc);
+ /* fall through */
+ case SC_ST_CER:
+ case SC_ST_QUE:
+ case SC_ST_TAR:
+ sc->state = SC_ST_DIS;
+ /* fall through */
+ default:
+ sc->flags &= ~SC_FL_NOLINGER;
+ ic->flags |= CF_SHUTR;
+ ic->rex = TICK_ETERNITY;
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ }
+}
+
+/* This function is used for inter-stream connector calls. It is called by the
+ * consumer to inform the producer side that it may be interested in checking
+ * for free space in the buffer. Note that it intentionally does not update
+ * timeouts, so that we can still check them later at wake-up. This function is
+ * dedicated to connection-based stream connectors.
+ */
+static void sc_app_chk_rcv_conn(struct stconn *sc)
+{
+ BUG_ON(!sc_conn(sc));
+
+ /* (re)start reading */
+ if (sc_state_in(sc->state, SC_SB_CON|SC_SB_RDY|SC_SB_EST))
+ tasklet_wakeup(sc->wait_event.tasklet);
+}
+
+
+/* This function is used for inter-stream connector calls. It is called by the
+ * producer to inform the consumer side that it may be interested in checking
+ * for data in the buffer. Note that it intentionally does not update timeouts,
+ * so that we can still check them later at wake-up.
+ */
+static void sc_app_chk_snd_conn(struct stconn *sc)
+{
+ struct channel *oc = sc_oc(sc);
+
+ BUG_ON(!sc_conn(sc));
+
+ if (unlikely(!sc_state_in(sc->state, SC_SB_RDY|SC_SB_EST) ||
+ (oc->flags & CF_SHUTW)))
+ return;
+
+ if (unlikely(channel_is_empty(oc))) /* called with nothing to send ! */
+ return;
+
+ if (!oc->pipe && /* spliced data wants to be forwarded ASAP */
+ !sc_ep_test(sc, SE_FL_WAIT_DATA)) /* not waiting for data */
+ return;
+
+ if (!(sc->wait_event.events & SUB_RETRY_SEND) && !channel_is_empty(sc_oc(sc)))
+ sc_conn_send(sc);
+
+ if (sc_ep_test(sc, SE_FL_ERROR | SE_FL_ERR_PENDING) || sc_is_conn_error(sc)) {
+ /* Write error on the file descriptor */
+ if (sc->state >= SC_ST_CON)
+ sc_ep_set(sc, SE_FL_ERROR);
+ goto out_wakeup;
+ }
+
+ /* OK, so now we know that some data might have been sent, and that we may
+ * have to poll first. We have to do that too if the buffer is not empty.
+ */
+ if (channel_is_empty(oc)) {
+ /* the connection is established but we can't write. Either the
+ * buffer is empty, or we just refrain from sending because the
+ * ->o limit was reached. Maybe we just wrote the last
+ * chunk and need to close.
+ */
+ if (((oc->flags & (CF_SHUTW|CF_AUTO_CLOSE|CF_SHUTW_NOW)) ==
+ (CF_AUTO_CLOSE|CF_SHUTW_NOW)) &&
+ sc_state_in(sc->state, SC_SB_RDY|SC_SB_EST)) {
+ sc_shutw(sc);
+ goto out_wakeup;
+ }
+
+ if ((oc->flags & (CF_SHUTW|CF_SHUTW_NOW)) == 0)
+ sc_ep_set(sc, SE_FL_WAIT_DATA);
+ oc->wex = TICK_ETERNITY;
+ }
+ else {
+ /* Otherwise there are remaining data to be sent in the buffer,
+ * which means we have to poll before doing so.
+ */
+ sc_ep_clr(sc, SE_FL_WAIT_DATA);
+ if (!tick_isset(oc->wex))
+ oc->wex = tick_add_ifset(now_ms, oc->wto);
+ }
+
+ if (likely(oc->flags & CF_WRITE_ACTIVITY)) {
+ struct channel *ic = sc_ic(sc);
+
+ /* update timeout if we have written something */
+ if ((oc->flags & (CF_SHUTW|CF_WRITE_PARTIAL)) == CF_WRITE_PARTIAL &&
+ !channel_is_empty(oc))
+ oc->wex = tick_add_ifset(now_ms, oc->wto);
+
+ if (tick_isset(ic->rex) && !(sc->flags & SC_FL_INDEP_STR)) {
+ /* Note: to prevent the client from expiring read timeouts
+ * during writes, we refresh it. We only do this if the
+ * interface is not configured for "independent streams",
+ * because for some applications it's better not to do this,
+ * for instance when continuously exchanging small amounts
+ * of data which can full the socket buffers long before a
+ * write timeout is detected.
+ */
+ ic->rex = tick_add_ifset(now_ms, ic->rto);
+ }
+ }
+
+ /* in case of special condition (error, shutdown, end of write...), we
+ * have to notify the task.
+ */
+ if (likely((oc->flags & (CF_WRITE_NULL|CF_WRITE_ERROR|CF_SHUTW)) ||
+ ((oc->flags & CF_WAKE_WRITE) &&
+ ((channel_is_empty(oc) && !oc->to_forward) ||
+ !sc_state_in(sc->state, SC_SB_EST))))) {
+ out_wakeup:
+ if (!(sc->flags & SC_FL_DONT_WAKE))
+ task_wakeup(sc_strm_task(sc), TASK_WOKEN_IO);
+ }
+}
+
+/*
+ * This function performs a shutdown-read on a stream connector attached to an
+ * applet in a connected or init state (it does nothing for other states). It
+ * either shuts the read side or marks itself as closed. The buffer flags are
+ * updated to reflect the new state. If the stream connector has SC_FL_NOHALF,
+ * we also forward the close to the write side. The owner task is woken up if
+ * it exists.
+ */
+static void sc_app_shutr_applet(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+
+ BUG_ON(!sc_appctx(sc));
+
+ if (ic->flags & CF_SHUTR)
+ return;
+ ic->flags |= CF_SHUTR;
+ ic->rex = TICK_ETERNITY;
+
+ /* Note: on shutr, we don't call the applet */
+
+ if (!sc_state_in(sc->state, SC_SB_CON|SC_SB_RDY|SC_SB_EST))
+ return;
+
+ if (sc_oc(sc)->flags & CF_SHUTW) {
+ appctx_shut(__sc_appctx(sc));
+ sc->state = SC_ST_DIS;
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ }
+ else if (sc_cond_forward_shutw(sc))
+ return sc_app_shutw_applet(sc);
+}
+
+/*
+ * This function performs a shutdown-write on a stream connector attached to an
+ * applet in a connected or init state (it does nothing for other states). It
+ * either shuts the write side or marks itself as closed. The buffer flags are
+ * updated to reflect the new state. It does also close everything if the SI
+ * was marked as being in error state. The owner task is woken up if it exists.
+ */
+static void sc_app_shutw_applet(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+ struct channel *oc = sc_oc(sc);
+
+ BUG_ON(!sc_appctx(sc));
+
+ oc->flags &= ~CF_SHUTW_NOW;
+ if (oc->flags & CF_SHUTW)
+ return;
+ oc->flags |= CF_SHUTW;
+ oc->wex = TICK_ETERNITY;
+
+ if (tick_isset(sc->hcto)) {
+ ic->rto = sc->hcto;
+ ic->rex = tick_add(now_ms, ic->rto);
+ }
+
+ /* on shutw we always wake the applet up */
+ appctx_wakeup(__sc_appctx(sc));
+
+ switch (sc->state) {
+ case SC_ST_RDY:
+ case SC_ST_EST:
+ /* we have to shut before closing, otherwise some short messages
+ * may never leave the system, especially when there are remaining
+ * unread data in the socket input buffer, or when nolinger is set.
+ * However, if SC_FL_NOLINGER is explicitly set, we know there is
+ * no risk so we close both sides immediately.
+ */
+ if (!sc_ep_test(sc, SE_FL_ERROR) && !(sc->flags & SC_FL_NOLINGER) &&
+ !(ic->flags & (CF_SHUTR|CF_DONT_READ)))
+ return;
+
+ /* fall through */
+ case SC_ST_CON:
+ case SC_ST_CER:
+ case SC_ST_QUE:
+ case SC_ST_TAR:
+ /* Note that none of these states may happen with applets */
+ appctx_shut(__sc_appctx(sc));
+ sc->state = SC_ST_DIS;
+ /* fall through */
+ default:
+ sc->flags &= ~SC_FL_NOLINGER;
+ ic->flags |= CF_SHUTR;
+ ic->rex = TICK_ETERNITY;
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ }
+}
+
+/* chk_rcv function for applets */
+static void sc_app_chk_rcv_applet(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+
+ BUG_ON(!sc_appctx(sc));
+
+ DPRINTF(stderr, "%s: sc=%p, sc->state=%d ic->flags=%08x oc->flags=%08x\n",
+ __FUNCTION__,
+ sc, sc->state, ic->flags, sc_oc(sc)->flags);
+
+ if (!ic->pipe) {
+ /* (re)start reading */
+ appctx_wakeup(__sc_appctx(sc));
+ }
+}
+
+/* chk_snd function for applets */
+static void sc_app_chk_snd_applet(struct stconn *sc)
+{
+ struct channel *oc = sc_oc(sc);
+
+ BUG_ON(!sc_appctx(sc));
+
+ DPRINTF(stderr, "%s: sc=%p, sc->state=%d ic->flags=%08x oc->flags=%08x\n",
+ __FUNCTION__,
+ sc, sc->state, sc_ic(sc)->flags, oc->flags);
+
+ if (unlikely(sc->state != SC_ST_EST || (oc->flags & CF_SHUTW)))
+ return;
+
+ /* we only wake the applet up if it was waiting for some data and is ready to consume it */
+ if (!sc_ep_test(sc, SE_FL_WAIT_DATA) || sc_ep_test(sc, SE_FL_WONT_CONSUME))
+ return;
+
+ if (!tick_isset(oc->wex))
+ oc->wex = tick_add_ifset(now_ms, oc->wto);
+
+ if (!channel_is_empty(oc)) {
+ /* (re)start sending */
+ appctx_wakeup(__sc_appctx(sc));
+ }
+}
+
+
+/* This function is designed to be called from within the stream handler to
+ * update the input channel's expiration timer and the stream connector's
+ * Rx flags based on the channel's flags. It needs to be called only once
+ * after the channel's flags have settled down, and before they are cleared,
+ * though it doesn't harm to call it as often as desired (it just slightly
+ * hurts performance). It must not be called from outside of the stream
+ * handler, as what it does will be used to compute the stream task's
+ * expiration.
+ */
+void sc_update_rx(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+
+ if (ic->flags & CF_SHUTR)
+ return;
+
+ /* Read not closed, update FD status and timeout for reads */
+ if (ic->flags & CF_DONT_READ)
+ sc_wont_read(sc);
+ else
+ sc_will_read(sc);
+
+ if (!channel_is_empty(ic) || !channel_may_recv(ic)) {
+ /* stop reading, imposed by channel's policy or contents */
+ sc_need_room(sc);
+ }
+ else {
+ /* (re)start reading and update timeout. Note: we don't recompute the timeout
+ * every time we get here, otherwise it would risk never to expire. We only
+ * update it if is was not yet set. The stream socket handler will already
+ * have updated it if there has been a completed I/O.
+ */
+ sc_have_room(sc);
+ }
+ if ((ic->flags & CF_EOI) || sc->flags & (SC_FL_WONT_READ|SC_FL_NEED_BUFF|SC_FL_NEED_ROOM))
+ ic->rex = TICK_ETERNITY;
+ else if (!(ic->flags & CF_READ_NOEXP) && !tick_isset(ic->rex))
+ ic->rex = tick_add_ifset(now_ms, ic->rto);
+
+ sc_chk_rcv(sc);
+}
+
+/* This function is designed to be called from within the stream handler to
+ * update the output channel's expiration timer and the stream connector's
+ * Tx flags based on the channel's flags. It needs to be called only once
+ * after the channel's flags have settled down, and before they are cleared,
+ * though it doesn't harm to call it as often as desired (it just slightly
+ * hurts performance). It must not be called from outside of the stream
+ * handler, as what it does will be used to compute the stream task's
+ * expiration.
+ */
+void sc_update_tx(struct stconn *sc)
+{
+ struct channel *oc = sc_oc(sc);
+ struct channel *ic = sc_ic(sc);
+
+ if (oc->flags & CF_SHUTW)
+ return;
+
+ /* Write not closed, update FD status and timeout for writes */
+ if (channel_is_empty(oc)) {
+ /* stop writing */
+ if (!sc_ep_test(sc, SE_FL_WAIT_DATA)) {
+ if ((oc->flags & CF_SHUTW_NOW) == 0)
+ sc_ep_set(sc, SE_FL_WAIT_DATA);
+ oc->wex = TICK_ETERNITY;
+ }
+ return;
+ }
+
+ /* (re)start writing and update timeout. Note: we don't recompute the timeout
+ * every time we get here, otherwise it would risk never to expire. We only
+ * update it if is was not yet set. The stream socket handler will already
+ * have updated it if there has been a completed I/O.
+ */
+ sc_ep_clr(sc, SE_FL_WAIT_DATA);
+ if (!tick_isset(oc->wex)) {
+ oc->wex = tick_add_ifset(now_ms, oc->wto);
+ if (tick_isset(ic->rex) && !(sc->flags & SC_FL_INDEP_STR)) {
+ /* Note: depending on the protocol, we don't know if we're waiting
+ * for incoming data or not. So in order to prevent the socket from
+ * expiring read timeouts during writes, we refresh the read timeout,
+ * except if it was already infinite or if we have explicitly setup
+ * independent streams.
+ */
+ ic->rex = tick_add_ifset(now_ms, ic->rto);
+ }
+ }
+}
+
+/* This function is the equivalent to sc_update() except that it's
+ * designed to be called from outside the stream handlers, typically the lower
+ * layers (applets, connections) after I/O completion. After updating the stream
+ * interface and timeouts, it will try to forward what can be forwarded, then to
+ * wake the associated task up if an important event requires special handling.
+ * It may update SE_FL_WAIT_DATA and/or SC_FL_NEED_ROOM, that the callers are
+ * encouraged to watch to take appropriate action.
+ * It should not be called from within the stream itself, sc_update()
+ * is designed for this.
+ */
+static void sc_notify(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+ struct channel *oc = sc_oc(sc);
+ struct stconn *sco = sc_opposite(sc);
+ struct task *task = sc_strm_task(sc);
+
+ /* process consumer side */
+ if (channel_is_empty(oc)) {
+ struct connection *conn = sc_conn(sc);
+
+ if (((oc->flags & (CF_SHUTW|CF_SHUTW_NOW)) == CF_SHUTW_NOW) &&
+ (sc->state == SC_ST_EST) && (!conn || !(conn->flags & (CO_FL_WAIT_XPRT | CO_FL_EARLY_SSL_HS))))
+ sc_shutw(sc);
+ oc->wex = TICK_ETERNITY;
+ }
+
+ /* indicate that we may be waiting for data from the output channel or
+ * we're about to close and can't expect more data if SHUTW_NOW is there.
+ */
+ if (!(oc->flags & (CF_SHUTW|CF_SHUTW_NOW)))
+ sc_ep_set(sc, SE_FL_WAIT_DATA);
+ else if ((oc->flags & (CF_SHUTW|CF_SHUTW_NOW)) == CF_SHUTW_NOW)
+ sc_ep_clr(sc, SE_FL_WAIT_DATA);
+
+ /* update OC timeouts and wake the other side up if it's waiting for room */
+ if (oc->flags & CF_WRITE_ACTIVITY) {
+ if ((oc->flags & (CF_SHUTW|CF_WRITE_PARTIAL)) == CF_WRITE_PARTIAL &&
+ !channel_is_empty(oc))
+ if (tick_isset(oc->wex))
+ oc->wex = tick_add_ifset(now_ms, oc->wto);
+
+ if (!(sc->flags & SC_FL_INDEP_STR))
+ if (tick_isset(ic->rex))
+ ic->rex = tick_add_ifset(now_ms, ic->rto);
+ }
+
+ if (oc->flags & CF_DONT_READ)
+ sc_wont_read(sco);
+ else
+ sc_will_read(sco);
+
+ /* Notify the other side when we've injected data into the IC that
+ * needs to be forwarded. We can do fast-forwarding as soon as there
+ * are output data, but we avoid doing this if some of the data are
+ * not yet scheduled for being forwarded, because it is very likely
+ * that it will be done again immediately afterwards once the following
+ * data are parsed (eg: HTTP chunking). We only clear SC_FL_NEED_ROOM
+ * once we've emptied *some* of the output buffer, and not just when
+ * there is available room, because applets are often forced to stop
+ * before the buffer is full. We must not stop based on input data
+ * alone because an HTTP parser might need more data to complete the
+ * parsing.
+ */
+ if (!channel_is_empty(ic) &&
+ sc_ep_test(sco, SE_FL_WAIT_DATA) &&
+ (!(ic->flags & CF_EXPECT_MORE) || c_full(ic) || ci_data(ic) == 0 || ic->pipe)) {
+ int new_len, last_len;
+
+ last_len = co_data(ic);
+ if (ic->pipe)
+ last_len += ic->pipe->data;
+
+ sc_chk_snd(sco);
+
+ new_len = co_data(ic);
+ if (ic->pipe)
+ new_len += ic->pipe->data;
+
+ /* check if the consumer has freed some space either in the
+ * buffer or in the pipe.
+ */
+ if (new_len < last_len)
+ sc_have_room(sc);
+ }
+
+ if (!(ic->flags & CF_DONT_READ))
+ sc_will_read(sc);
+
+ sc_chk_rcv(sc);
+ sc_chk_rcv(sco);
+
+ if (ic->flags & (CF_EOI|CF_SHUTR) || sc_ep_test(sc, SE_FL_APPLET_NEED_CONN) ||
+ (sc->flags & (SC_FL_WONT_READ|SC_FL_NEED_BUFF|SC_FL_NEED_ROOM))) {
+ ic->rex = TICK_ETERNITY;
+ }
+ else if ((ic->flags & (CF_SHUTR|CF_READ_PARTIAL)) == CF_READ_PARTIAL) {
+ /* we must re-enable reading if sc_chk_snd() has freed some space */
+ if (!(ic->flags & CF_READ_NOEXP) && tick_isset(ic->rex))
+ ic->rex = tick_add_ifset(now_ms, ic->rto);
+ }
+
+ /* wake the task up only when needed */
+ if (/* changes on the production side */
+ (ic->flags & (CF_READ_NULL|CF_READ_ERROR)) ||
+ !sc_state_in(sc->state, SC_SB_CON|SC_SB_RDY|SC_SB_EST) ||
+ sc_ep_test(sc, SE_FL_ERROR) ||
+ ((ic->flags & CF_READ_PARTIAL) &&
+ ((ic->flags & CF_EOI) || !ic->to_forward || sco->state != SC_ST_EST)) ||
+
+ /* changes on the consumption side */
+ (oc->flags & (CF_WRITE_NULL|CF_WRITE_ERROR)) ||
+ ((oc->flags & CF_WRITE_ACTIVITY) &&
+ ((oc->flags & CF_SHUTW) ||
+ (((oc->flags & CF_WAKE_WRITE) ||
+ !(oc->flags & (CF_AUTO_CLOSE|CF_SHUTW_NOW|CF_SHUTW))) &&
+ (sco->state != SC_ST_EST ||
+ (channel_is_empty(oc) && !oc->to_forward)))))) {
+ task_wakeup(task, TASK_WOKEN_IO);
+ }
+ else {
+ /* Update expiration date for the task and requeue it */
+ task->expire = tick_first((tick_is_expired(task->expire, now_ms) ? 0 : task->expire),
+ tick_first(tick_first(ic->rex, ic->wex),
+ tick_first(oc->rex, oc->wex)));
+
+ task->expire = tick_first(task->expire, ic->analyse_exp);
+ task->expire = tick_first(task->expire, oc->analyse_exp);
+ task->expire = tick_first(task->expire, __sc_strm(sc)->conn_exp);
+
+ task_queue(task);
+ }
+ if (ic->flags & CF_READ_ACTIVITY)
+ ic->flags &= ~CF_READ_DONTWAIT;
+}
+
+/*
+ * This function propagates a null read received on a socket-based connection.
+ * It updates the stream connector. If the stream connector has SC_FL_NOHALF,
+ * the close is also forwarded to the write side as an abort.
+ */
+static void sc_conn_read0(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+ struct channel *oc = sc_oc(sc);
+
+ BUG_ON(!sc_conn(sc));
+
+ if (ic->flags & CF_SHUTR)
+ return;
+ ic->flags |= CF_SHUTR;
+ ic->rex = TICK_ETERNITY;
+
+ if (!sc_state_in(sc->state, SC_SB_CON|SC_SB_RDY|SC_SB_EST))
+ return;
+
+ if (oc->flags & CF_SHUTW)
+ goto do_close;
+
+ if (sc_cond_forward_shutw(sc)) {
+ /* we want to immediately forward this close to the write side */
+ /* force flag on ssl to keep stream in cache */
+ sc_conn_shutw(sc, CO_SHW_SILENT);
+ goto do_close;
+ }
+
+ /* otherwise that's just a normal read shutdown */
+ return;
+
+ do_close:
+ /* OK we completely close the socket here just as if we went through sc_shut[rw]() */
+ sc_conn_shut(sc);
+
+ oc->flags &= ~CF_SHUTW_NOW;
+ oc->flags |= CF_SHUTW;
+ oc->wex = TICK_ETERNITY;
+
+ sc->state = SC_ST_DIS;
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ return;
+}
+
+/*
+ * This is the callback which is called by the connection layer to receive data
+ * into the buffer from the connection. It iterates over the mux layer's
+ * rcv_buf function.
+ */
+static int sc_conn_recv(struct stconn *sc)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct channel *ic = sc_ic(sc);
+ int ret, max, cur_read = 0;
+ int read_poll = MAX_READ_POLL_LOOPS;
+ int flags = 0;
+
+ /* If not established yet, do nothing. */
+ if (sc->state != SC_ST_EST)
+ return 0;
+
+ /* If another call to sc_conn_recv() failed, and we subscribed to
+ * recv events already, give up now.
+ */
+ if (sc->wait_event.events & SUB_RETRY_RECV)
+ return 0;
+
+ /* maybe we were called immediately after an asynchronous shutr */
+ if (ic->flags & CF_SHUTR)
+ return 1;
+
+ /* we must wait because the mux is not installed yet */
+ if (!conn->mux)
+ return 0;
+
+ /* stop here if we reached the end of data */
+ if (sc_ep_test(sc, SE_FL_EOS))
+ goto end_recv;
+
+ /* stop immediately on errors. Note that we DON'T want to stop on
+ * POLL_ERR, as the poller might report a write error while there
+ * are still data available in the recv buffer. This typically
+ * happens when we send too large a request to a backend server
+ * which rejects it before reading it all.
+ */
+ if (!sc_ep_test(sc, SE_FL_RCV_MORE)) {
+ if (!conn_xprt_ready(conn))
+ return 0;
+ if (sc_ep_test(sc, SE_FL_ERROR))
+ goto end_recv;
+ }
+
+ /* prepare to detect if the mux needs more room */
+ sc_ep_clr(sc, SE_FL_WANT_ROOM);
+
+ if ((ic->flags & (CF_STREAMER | CF_STREAMER_FAST)) && !co_data(ic) &&
+ global.tune.idle_timer &&
+ (unsigned short)(now_ms - ic->last_read) >= global.tune.idle_timer) {
+ /* The buffer was empty and nothing was transferred for more
+ * than one second. This was caused by a pause and not by
+ * congestion. Reset any streaming mode to reduce latency.
+ */
+ ic->xfer_small = 0;
+ ic->xfer_large = 0;
+ ic->flags &= ~(CF_STREAMER | CF_STREAMER_FAST);
+ }
+
+ /* First, let's see if we may splice data across the channel without
+ * using a buffer.
+ */
+ if (sc_ep_test(sc, SE_FL_MAY_SPLICE) &&
+ (ic->pipe || ic->to_forward >= MIN_SPLICE_FORWARD) &&
+ ic->flags & CF_KERN_SPLICING) {
+ if (c_data(ic)) {
+ /* We're embarrassed, there are already data pending in
+ * the buffer and we don't want to have them at two
+ * locations at a time. Let's indicate we need some
+ * place and ask the consumer to hurry.
+ */
+ flags |= CO_RFL_BUF_FLUSH;
+ goto abort_splice;
+ }
+
+ if (unlikely(ic->pipe == NULL)) {
+ if (pipes_used >= global.maxpipes || !(ic->pipe = get_pipe())) {
+ ic->flags &= ~CF_KERN_SPLICING;
+ goto abort_splice;
+ }
+ }
+
+ ret = conn->mux->rcv_pipe(sc, ic->pipe, ic->to_forward);
+ if (ret < 0) {
+ /* splice not supported on this end, let's disable it */
+ ic->flags &= ~CF_KERN_SPLICING;
+ goto abort_splice;
+ }
+
+ if (ret > 0) {
+ if (ic->to_forward != CHN_INFINITE_FORWARD)
+ ic->to_forward -= ret;
+ ic->total += ret;
+ cur_read += ret;
+ ic->flags |= CF_READ_PARTIAL;
+ }
+
+ if (sc_ep_test(sc, SE_FL_EOS | SE_FL_ERROR))
+ goto end_recv;
+
+ if (conn->flags & CO_FL_WAIT_ROOM) {
+ /* the pipe is full or we have read enough data that it
+ * could soon be full. Let's stop before needing to poll.
+ */
+ sc_need_room(sc);
+ goto done_recv;
+ }
+
+ /* splice not possible (anymore), let's go on on standard copy */
+ }
+
+ abort_splice:
+ if (ic->pipe && unlikely(!ic->pipe->data)) {
+ put_pipe(ic->pipe);
+ ic->pipe = NULL;
+ }
+
+ if (ic->pipe && ic->to_forward && !(flags & CO_RFL_BUF_FLUSH) && sc_ep_test(sc, SE_FL_MAY_SPLICE)) {
+ /* don't break splicing by reading, but still call rcv_buf()
+ * to pass the flag.
+ */
+ goto done_recv;
+ }
+
+ /* now we'll need a input buffer for the stream */
+ if (!sc_alloc_ibuf(sc, &(__sc_strm(sc)->buffer_wait)))
+ goto end_recv;
+
+ /* For an HTX stream, if the buffer is stuck (no output data with some
+ * input data) and if the HTX message is fragmented or if its free space
+ * wraps, we force an HTX deframentation. It is a way to have a
+ * contiguous free space nad to let the mux to copy as much data as
+ * possible.
+ *
+ * NOTE: A possible optim may be to let the mux decides if defrag is
+ * required or not, depending on amount of data to be xferred.
+ */
+ if (IS_HTX_STRM(__sc_strm(sc)) && !co_data(ic)) {
+ struct htx *htx = htxbuf(&ic->buf);
+
+ if (htx_is_not_empty(htx) && ((htx->flags & HTX_FL_FRAGMENTED) || htx_space_wraps(htx)))
+ htx_defrag(htx, NULL, 0);
+ }
+
+ /* Instruct the mux it must subscribed for read events */
+ flags |= ((!conn_is_back(conn) && (__sc_strm(sc)->be->options & PR_O_ABRT_CLOSE)) ? CO_RFL_KEEP_RECV : 0);
+
+ /* Important note : if we're called with POLL_IN|POLL_HUP, it means the read polling
+ * was enabled, which implies that the recv buffer was not full. So we have a guarantee
+ * that if such an event is not handled above in splice, it will be handled here by
+ * recv().
+ */
+ while (sc_ep_test(sc, SE_FL_RCV_MORE) ||
+ (!(conn->flags & CO_FL_HANDSHAKE) &&
+ (!sc_ep_test(sc, SE_FL_ERROR | SE_FL_EOS)) && !(ic->flags & CF_SHUTR))) {
+ int cur_flags = flags;
+
+ /* Compute transient CO_RFL_* flags */
+ if (co_data(ic)) {
+ cur_flags |= (CO_RFL_BUF_WET | CO_RFL_BUF_NOT_STUCK);
+ }
+
+ /* <max> may be null. This is the mux responsibility to set
+ * SE_FL_RCV_MORE on the SC if more space is needed.
+ */
+ max = channel_recv_max(ic);
+ ret = conn->mux->rcv_buf(sc, &ic->buf, max, cur_flags);
+
+ if (sc_ep_test(sc, SE_FL_WANT_ROOM)) {
+ /* SE_FL_WANT_ROOM must not be reported if the channel's
+ * buffer is empty.
+ */
+ BUG_ON(c_empty(ic));
+
+ sc_need_room(sc);
+ /* Add READ_PARTIAL because some data are pending but
+ * cannot be xferred to the channel
+ */
+ ic->flags |= CF_READ_PARTIAL;
+ }
+
+ if (ret <= 0) {
+ /* if we refrained from reading because we asked for a
+ * flush to satisfy rcv_pipe(), we must not subscribe
+ * and instead report that there's not enough room
+ * here to proceed.
+ */
+ if (flags & CO_RFL_BUF_FLUSH)
+ sc_need_room(sc);
+ break;
+ }
+
+ cur_read += ret;
+
+ /* if we're allowed to directly forward data, we must update ->o */
+ if (ic->to_forward && !(ic->flags & (CF_SHUTW|CF_SHUTW_NOW))) {
+ unsigned long fwd = ret;
+ if (ic->to_forward != CHN_INFINITE_FORWARD) {
+ if (fwd > ic->to_forward)
+ fwd = ic->to_forward;
+ ic->to_forward -= fwd;
+ }
+ c_adv(ic, fwd);
+ }
+
+ ic->flags |= CF_READ_PARTIAL;
+ ic->total += ret;
+
+ /* End-of-input reached, we can leave. In this case, it is
+ * important to break the loop to not block the SC because of
+ * the channel's policies.This way, we are still able to receive
+ * shutdowns.
+ */
+ if (sc_ep_test(sc, SE_FL_EOI))
+ break;
+
+ if ((ic->flags & CF_READ_DONTWAIT) || --read_poll <= 0) {
+ /* we're stopped by the channel's policy */
+ sc_wont_read(sc);
+ break;
+ }
+
+ /* if too many bytes were missing from last read, it means that
+ * it's pointless trying to read again because the system does
+ * not have them in buffers.
+ */
+ if (ret < max) {
+ /* if a streamer has read few data, it may be because we
+ * have exhausted system buffers. It's not worth trying
+ * again.
+ */
+ if (ic->flags & CF_STREAMER) {
+ /* we're stopped by the channel's policy */
+ sc_wont_read(sc);
+ break;
+ }
+
+ /* if we read a large block smaller than what we requested,
+ * it's almost certain we'll never get anything more.
+ */
+ if (ret >= global.tune.recv_enough) {
+ /* we're stopped by the channel's policy */
+ sc_wont_read(sc);
+ break;
+ }
+ }
+
+ /* if we are waiting for more space, don't try to read more data
+ * right now.
+ */
+ if (sc->flags & (SC_FL_WONT_READ|SC_FL_NEED_BUFF|SC_FL_NEED_ROOM))
+ break;
+ } /* while !flags */
+
+ done_recv:
+ if (cur_read) {
+ if ((ic->flags & (CF_STREAMER | CF_STREAMER_FAST)) &&
+ (cur_read <= ic->buf.size / 2)) {
+ ic->xfer_large = 0;
+ ic->xfer_small++;
+ if (ic->xfer_small >= 3) {
+ /* we have read less than half of the buffer in
+ * one pass, and this happened at least 3 times.
+ * This is definitely not a streamer.
+ */
+ ic->flags &= ~(CF_STREAMER | CF_STREAMER_FAST);
+ }
+ else if (ic->xfer_small >= 2) {
+ /* if the buffer has been at least half full twice,
+ * we receive faster than we send, so at least it
+ * is not a "fast streamer".
+ */
+ ic->flags &= ~CF_STREAMER_FAST;
+ }
+ }
+ else if (!(ic->flags & CF_STREAMER_FAST) &&
+ (cur_read >= ic->buf.size - global.tune.maxrewrite)) {
+ /* we read a full buffer at once */
+ ic->xfer_small = 0;
+ ic->xfer_large++;
+ if (ic->xfer_large >= 3) {
+ /* we call this buffer a fast streamer if it manages
+ * to be filled in one call 3 consecutive times.
+ */
+ ic->flags |= (CF_STREAMER | CF_STREAMER_FAST);
+ }
+ }
+ else {
+ ic->xfer_small = 0;
+ ic->xfer_large = 0;
+ }
+ ic->last_read = now_ms;
+ }
+
+ end_recv:
+ ret = (cur_read != 0);
+
+ /* Report EOI on the channel if it was reached from the mux point of
+ * view. */
+ if (sc_ep_test(sc, SE_FL_EOI) && !(ic->flags & CF_EOI)) {
+ ic->flags |= (CF_EOI|CF_READ_PARTIAL);
+ ret = 1;
+ }
+
+ if (sc_ep_test(sc, SE_FL_ERROR))
+ ret = 1;
+ else if (sc_ep_test(sc, SE_FL_EOS)) {
+ /* we received a shutdown */
+ ic->flags |= CF_READ_NULL;
+ if (ic->flags & CF_AUTO_CLOSE)
+ channel_shutw_now(ic);
+ sc_conn_read0(sc);
+ ret = 1;
+ }
+ else if (!(sc->flags & (SC_FL_WONT_READ|SC_FL_NEED_BUFF|SC_FL_NEED_ROOM)) &&
+ !(ic->flags & CF_SHUTR)) {
+ /* Subscribe to receive events if we're blocking on I/O */
+ conn->mux->subscribe(sc, SUB_RETRY_RECV, &sc->wait_event);
+ se_have_no_more_data(sc->sedesc);
+ } else {
+ se_have_more_data(sc->sedesc);
+ ret = 1;
+ }
+ return ret;
+}
+
+/* This tries to perform a synchronous receive on the stream connector to
+ * try to collect last arrived data. In practice it's only implemented on
+ * stconns. Returns 0 if nothing was done, non-zero if new data or a
+ * shutdown were collected. This may result on some delayed receive calls
+ * to be programmed and performed later, though it doesn't provide any
+ * such guarantee.
+ */
+int sc_conn_sync_recv(struct stconn *sc)
+{
+ if (!sc_state_in(sc->state, SC_SB_RDY|SC_SB_EST))
+ return 0;
+
+ if (!sc_mux_ops(sc))
+ return 0; // only stconns are supported
+
+ if (sc->wait_event.events & SUB_RETRY_RECV)
+ return 0; // already subscribed
+
+ if (!sc_is_recv_allowed(sc))
+ return 0; // already failed
+
+ return sc_conn_recv(sc);
+}
+
+/*
+ * This function is called to send buffer data to a stream socket.
+ * It calls the mux layer's snd_buf function. It relies on the
+ * caller to commit polling changes. The caller should check conn->flags
+ * for errors.
+ */
+static int sc_conn_send(struct stconn *sc)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct stream *s = __sc_strm(sc);
+ struct channel *oc = sc_oc(sc);
+ int ret;
+ int did_send = 0;
+
+ if (sc_ep_test(sc, SE_FL_ERROR | SE_FL_ERR_PENDING) || sc_is_conn_error(sc)) {
+ /* We're probably there because the tasklet was woken up,
+ * but process_stream() ran before, detected there were an
+ * error and put the SC back to SC_ST_TAR. There's still
+ * CO_FL_ERROR on the connection but we don't want to add
+ * SE_FL_ERROR back, so give up
+ */
+ if (sc->state < SC_ST_CON)
+ return 0;
+ sc_ep_set(sc, SE_FL_ERROR);
+ return 1;
+ }
+
+ /* We're already waiting to be able to send, give up */
+ if (sc->wait_event.events & SUB_RETRY_SEND)
+ return 0;
+
+ /* we might have been called just after an asynchronous shutw */
+ if (oc->flags & CF_SHUTW)
+ return 1;
+
+ /* we must wait because the mux is not installed yet */
+ if (!conn->mux)
+ return 0;
+
+ if (oc->pipe && conn->xprt->snd_pipe && conn->mux->snd_pipe) {
+ ret = conn->mux->snd_pipe(sc, oc->pipe);
+ if (ret > 0)
+ did_send = 1;
+
+ if (!oc->pipe->data) {
+ put_pipe(oc->pipe);
+ oc->pipe = NULL;
+ }
+
+ if (oc->pipe)
+ goto end;
+ }
+
+ /* At this point, the pipe is empty, but we may still have data pending
+ * in the normal buffer.
+ */
+ if (co_data(oc)) {
+ /* when we're here, we already know that there is no spliced
+ * data left, and that there are sendable buffered data.
+ */
+
+ /* check if we want to inform the kernel that we're interested in
+ * sending more data after this call. We want this if :
+ * - we're about to close after this last send and want to merge
+ * the ongoing FIN with the last segment.
+ * - we know we can't send everything at once and must get back
+ * here because of unaligned data
+ * - there is still a finite amount of data to forward
+ * The test is arranged so that the most common case does only 2
+ * tests.
+ */
+ unsigned int send_flag = 0;
+
+ if ((!(oc->flags & (CF_NEVER_WAIT|CF_SEND_DONTWAIT)) &&
+ ((oc->to_forward && oc->to_forward != CHN_INFINITE_FORWARD) ||
+ (oc->flags & CF_EXPECT_MORE) ||
+ (IS_HTX_STRM(s) &&
+ (!(oc->flags & (CF_EOI|CF_SHUTR)) && htx_expect_more(htxbuf(&oc->buf)))))) ||
+ ((oc->flags & CF_ISRESP) &&
+ ((oc->flags & (CF_AUTO_CLOSE|CF_SHUTW_NOW)) == (CF_AUTO_CLOSE|CF_SHUTW_NOW))))
+ send_flag |= CO_SFL_MSG_MORE;
+
+ if (oc->flags & CF_STREAMER)
+ send_flag |= CO_SFL_STREAMER;
+
+ if (s->txn && s->txn->flags & TX_L7_RETRY && !b_data(&s->txn->l7_buffer)) {
+ /* If we want to be able to do L7 retries, copy
+ * the data we're about to send, so that we are able
+ * to resend them if needed
+ */
+ /* Try to allocate a buffer if we had none.
+ * If it fails, the next test will just
+ * disable the l7 retries by setting
+ * l7_conn_retries to 0.
+ */
+ if (s->txn->req.msg_state != HTTP_MSG_DONE)
+ s->txn->flags &= ~TX_L7_RETRY;
+ else {
+ if (b_alloc(&s->txn->l7_buffer) == NULL)
+ s->txn->flags &= ~TX_L7_RETRY;
+ else {
+ memcpy(b_orig(&s->txn->l7_buffer),
+ b_orig(&oc->buf),
+ b_size(&oc->buf));
+ s->txn->l7_buffer.head = co_data(oc);
+ b_add(&s->txn->l7_buffer, co_data(oc));
+ }
+
+ }
+ }
+
+ ret = conn->mux->snd_buf(sc, &oc->buf, co_data(oc), send_flag);
+ if (ret > 0) {
+ did_send = 1;
+ c_rew(oc, ret);
+ c_realign_if_empty(oc);
+
+ if (!co_data(oc)) {
+ /* Always clear both flags once everything has been sent, they're one-shot */
+ oc->flags &= ~(CF_EXPECT_MORE | CF_SEND_DONTWAIT);
+ }
+ /* if some data remain in the buffer, it's only because the
+ * system buffers are full, we will try next time.
+ */
+ }
+ }
+
+ end:
+ if (did_send) {
+ oc->flags |= CF_WRITE_PARTIAL | CF_WROTE_DATA;
+ if (sc->state == SC_ST_CON)
+ sc->state = SC_ST_RDY;
+
+ sc_have_room(sc_opposite(sc));
+ }
+
+ if (sc_ep_test(sc, SE_FL_ERROR | SE_FL_ERR_PENDING)) {
+ sc_ep_set(sc, SE_FL_ERROR);
+ return 1;
+ }
+
+ /* We couldn't send all of our data, let the mux know we'd like to send more */
+ if (!channel_is_empty(oc))
+ conn->mux->subscribe(sc, SUB_RETRY_SEND, &sc->wait_event);
+ return did_send;
+}
+
+/* perform a synchronous send() for the stream connector. The CF_WRITE_NULL and
+ * CF_WRITE_PARTIAL flags are cleared prior to the attempt, and will possibly
+ * be updated in case of success.
+ */
+void sc_conn_sync_send(struct stconn *sc)
+{
+ struct channel *oc = sc_oc(sc);
+
+ oc->flags &= ~(CF_WRITE_NULL|CF_WRITE_PARTIAL);
+
+ if (oc->flags & CF_SHUTW)
+ return;
+
+ if (channel_is_empty(oc))
+ return;
+
+ if (!sc_state_in(sc->state, SC_SB_CON|SC_SB_RDY|SC_SB_EST))
+ return;
+
+ if (!sc_mux_ops(sc))
+ return;
+
+ sc_conn_send(sc);
+}
+
+/* Called by I/O handlers after completion.. It propagates
+ * connection flags to the stream connector, updates the stream (which may or
+ * may not take this opportunity to try to forward data), then update the
+ * connection's polling based on the channels and stream connector's final
+ * states. The function always returns 0.
+ */
+static int sc_conn_process(struct stconn *sc)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct channel *ic = sc_ic(sc);
+ struct channel *oc = sc_oc(sc);
+
+ BUG_ON(!conn);
+
+ /* If we have data to send, try it now */
+ if (!channel_is_empty(oc) && !(sc->wait_event.events & SUB_RETRY_SEND))
+ sc_conn_send(sc);
+
+ /* First step, report to the stream connector what was detected at the
+ * connection layer : errors and connection establishment.
+ * Only add SE_FL_ERROR if we're connected, or we're attempting to
+ * connect, we may get there because we got woken up, but only run
+ * after process_stream() noticed there were an error, and decided
+ * to retry to connect, the connection may still have CO_FL_ERROR,
+ * and we don't want to add SE_FL_ERROR back
+ *
+ * Note: This test is only required because sc_conn_process is also the SI
+ * wake callback. Otherwise sc_conn_recv()/sc_conn_send() already take
+ * care of it.
+ */
+
+ if (sc->state >= SC_ST_CON) {
+ if (sc_is_conn_error(sc))
+ sc_ep_set(sc, SE_FL_ERROR);
+ }
+
+ /* If we had early data, and the handshake ended, then
+ * we can remove the flag, and attempt to wake the task up,
+ * in the event there's an analyser waiting for the end of
+ * the handshake.
+ */
+ if (!(conn->flags & (CO_FL_WAIT_XPRT | CO_FL_EARLY_SSL_HS)) &&
+ sc_ep_test(sc, SE_FL_WAIT_FOR_HS)) {
+ sc_ep_clr(sc, SE_FL_WAIT_FOR_HS);
+ task_wakeup(sc_strm_task(sc), TASK_WOKEN_MSG);
+ }
+
+ if (!sc_state_in(sc->state, SC_SB_EST|SC_SB_DIS|SC_SB_CLO) &&
+ (conn->flags & CO_FL_WAIT_XPRT) == 0) {
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ oc->flags |= CF_WRITE_NULL;
+ if (sc->state == SC_ST_CON)
+ sc->state = SC_ST_RDY;
+ }
+
+ /* Report EOS on the channel if it was reached from the mux point of
+ * view.
+ *
+ * Note: This test is only required because sc_conn_process is also the SI
+ * wake callback. Otherwise sc_conn_recv()/sc_conn_send() already take
+ * care of it.
+ */
+ if (sc_ep_test(sc, SE_FL_EOS) && !(ic->flags & CF_SHUTR)) {
+ /* we received a shutdown */
+ ic->flags |= CF_READ_NULL;
+ if (ic->flags & CF_AUTO_CLOSE)
+ channel_shutw_now(ic);
+ sc_conn_read0(sc);
+ }
+
+ /* Report EOI on the channel if it was reached from the mux point of
+ * view.
+ *
+ * Note: This test is only required because sc_conn_process is also the SI
+ * wake callback. Otherwise sc_conn_recv()/sc_conn_send() already take
+ * care of it.
+ */
+ if (sc_ep_test(sc, SE_FL_EOI) && !(ic->flags & CF_EOI))
+ ic->flags |= (CF_EOI|CF_READ_PARTIAL);
+
+ /* Second step : update the stream connector and channels, try to forward any
+ * pending data, then possibly wake the stream up based on the new
+ * stream connector status.
+ */
+ sc_notify(sc);
+ stream_release_buffers(__sc_strm(sc));
+ return 0;
+}
+
+/* This is the ->process() function for any stream connector's wait_event task.
+ * It's assigned during the stream connector's initialization, for any type of
+ * stream connector. Thus it is always safe to perform a tasklet_wakeup() on a
+ * stream connector, as the presence of the SC is checked there.
+ */
+struct task *sc_conn_io_cb(struct task *t, void *ctx, unsigned int state)
+{
+ struct stconn *sc = ctx;
+ int ret = 0;
+
+ if (!sc_conn(sc))
+ return t;
+
+ if (!(sc->wait_event.events & SUB_RETRY_SEND) && !channel_is_empty(sc_oc(sc)))
+ ret = sc_conn_send(sc);
+ if (!(sc->wait_event.events & SUB_RETRY_RECV))
+ ret |= sc_conn_recv(sc);
+ if (ret != 0)
+ sc_conn_process(sc);
+
+ stream_release_buffers(__sc_strm(sc));
+ return t;
+}
+
+/* Callback to be used by applet handlers upon completion. It updates the stream
+ * (which may or may not take this opportunity to try to forward data), then
+ * may re-enable the applet's based on the channels and stream connector's final
+ * states.
+ */
+static int sc_applet_process(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+
+ BUG_ON(!sc_appctx(sc));
+
+ /* If the applet wants to write and the channel is closed, it's a
+ * broken pipe and it must be reported.
+ */
+ if (!sc_ep_test(sc, SE_FL_HAVE_NO_DATA) && (ic->flags & CF_SHUTR))
+ sc_ep_set(sc, SE_FL_ERROR);
+
+ /* automatically mark the applet having data available if it reported
+ * begin blocked by the channel.
+ */
+ if ((sc->flags & (SC_FL_WONT_READ|SC_FL_NEED_BUFF|SC_FL_NEED_ROOM)) ||
+ sc_ep_test(sc, SE_FL_APPLET_NEED_CONN))
+ applet_have_more_data(__sc_appctx(sc));
+
+ /* update the stream connector, channels, and possibly wake the stream up */
+ sc_notify(sc);
+ stream_release_buffers(__sc_strm(sc));
+
+ /* sc_notify may have passed through chk_snd and released some blocking
+ * flags. Process_stream will consider those flags to wake up the
+ * appctx but in the case the task is not in runqueue we may have to
+ * wakeup the appctx immediately.
+ */
+ if (sc_is_recv_allowed(sc) || sc_is_send_allowed(sc))
+ appctx_wakeup(__sc_appctx(sc));
+ return 0;
+}
+
+
+/* Prepares an endpoint upgrade. We don't now at this stage if the upgrade will
+ * succeed or not and if the stconn will be reused by the new endpoint. Thus,
+ * for now, only pretend the stconn is detached.
+ */
+void sc_conn_prepare_endp_upgrade(struct stconn *sc)
+{
+ BUG_ON(!sc_conn(sc) || !sc->app);
+ sc_ep_clr(sc, SE_FL_T_MUX);
+ sc_ep_set(sc, SE_FL_DETACHED);
+}
+
+/* Endpoint upgrade failed. Restore the stconn state. */
+void sc_conn_abort_endp_upgrade(struct stconn *sc)
+{
+ sc_ep_set(sc, SE_FL_T_MUX);
+ sc_ep_clr(sc, SE_FL_DETACHED);
+}
+
+/* Commit the endpoint upgrade. If stconn is attached, it means the new endpoint
+ * use it. So we do nothing. Otherwise, the stconn will be destroy with the
+ * overlying stream. So, it means we must commit the detach.
+*/
+void sc_conn_commit_endp_upgrade(struct stconn *sc)
+{
+ if (!sc_ep_test(sc, SE_FL_DETACHED))
+ return;
+ sc_detach_endp(&sc);
+ /* Because it was already set as detached, the sedesc must be preserved */
+ BUG_ON(!sc);
+ BUG_ON(!sc->sedesc);
+}
diff --git a/src/stick_table.c b/src/stick_table.c
new file mode 100644
index 0000000..e0a2c93
--- /dev/null
+++ b/src/stick_table.c
@@ -0,0 +1,5190 @@
+/*
+ * Stick tables management functions.
+ *
+ * Copyright 2009-2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ * Copyright (C) 2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <string.h>
+#include <errno.h>
+
+#include <import/ebmbtree.h>
+#include <import/ebsttree.h>
+#include <import/ebistree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/arg.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/cli.h>
+#include <haproxy/dict.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/peers.h>
+#include <haproxy/pool.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/ticks.h>
+#include <haproxy/tools.h>
+
+
+/* structure used to return a table key built from a sample */
+static THREAD_LOCAL struct stktable_key static_table_key;
+static int (*smp_fetch_src)(const struct arg *, struct sample *, const char *, void *);
+
+struct stktable *stktables_list;
+struct eb_root stktable_by_name = EB_ROOT;
+
+#define round_ptr_size(i) (((i) + (sizeof(void *) - 1)) &~ (sizeof(void *) - 1))
+
+/* This function inserts stktable <t> into the tree of known stick-table.
+ * The stick-table ID is used as the storing key so it must already have
+ * been initialized.
+ */
+void stktable_store_name(struct stktable *t)
+{
+ t->name.key = t->id;
+ ebis_insert(&stktable_by_name, &t->name);
+}
+
+struct stktable *stktable_find_by_name(const char *name)
+{
+ struct ebpt_node *node;
+ struct stktable *t;
+
+ node = ebis_lookup(&stktable_by_name, name);
+ if (node) {
+ t = container_of(node, struct stktable, name);
+ if (strcmp(t->id, name) == 0)
+ return t;
+ }
+
+ return NULL;
+}
+
+/*
+ * Free an allocated sticky session <ts>, and decrease sticky sessions counter
+ * in table <t>.
+ */
+void __stksess_free(struct stktable *t, struct stksess *ts)
+{
+ t->current--;
+ pool_free(t->pool, (void *)ts - round_ptr_size(t->data_size));
+}
+
+/*
+ * Free an allocated sticky session <ts>, and decrease sticky sessions counter
+ * in table <t>.
+ * This function locks the table
+ */
+void stksess_free(struct stktable *t, struct stksess *ts)
+{
+ void *data;
+ data = stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_KEY);
+ if (data) {
+ dict_entry_unref(&server_key_dict, stktable_data_cast(data, std_t_dict));
+ stktable_data_cast(data, std_t_dict) = NULL;
+ }
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &t->lock);
+ __stksess_free(t, ts);
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &t->lock);
+}
+
+/*
+ * Kill an stksess (only if its ref_cnt is zero).
+ */
+int __stksess_kill(struct stktable *t, struct stksess *ts)
+{
+ if (ts->ref_cnt)
+ return 0;
+
+ eb32_delete(&ts->exp);
+ eb32_delete(&ts->upd);
+ ebmb_delete(&ts->key);
+ __stksess_free(t, ts);
+ return 1;
+}
+
+/*
+ * Decrease the refcount if decrefcnt is not 0.
+ * and try to kill the stksess
+ * This function locks the table
+ */
+int stksess_kill(struct stktable *t, struct stksess *ts, int decrefcnt)
+{
+ int ret;
+
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &t->lock);
+ if (decrefcnt)
+ ts->ref_cnt--;
+ ret = __stksess_kill(t, ts);
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &t->lock);
+
+ return ret;
+}
+
+/*
+ * Initialize or update the key in the sticky session <ts> present in table <t>
+ * from the value present in <key>.
+ */
+void stksess_setkey(struct stktable *t, struct stksess *ts, struct stktable_key *key)
+{
+ if (t->type != SMP_T_STR)
+ memcpy(ts->key.key, key->key, t->key_size);
+ else {
+ memcpy(ts->key.key, key->key, MIN(t->key_size - 1, key->key_len));
+ ts->key.key[MIN(t->key_size - 1, key->key_len)] = 0;
+ }
+}
+
+
+/*
+ * Init sticky session <ts> of table <t>. The data parts are cleared and <ts>
+ * is returned.
+ */
+static struct stksess *__stksess_init(struct stktable *t, struct stksess * ts)
+{
+ memset((void *)ts - t->data_size, 0, t->data_size);
+ ts->ref_cnt = 0;
+ ts->key.node.leaf_p = NULL;
+ ts->exp.node.leaf_p = NULL;
+ ts->upd.node.leaf_p = NULL;
+ ts->expire = tick_add(now_ms, MS_TO_TICKS(t->expire));
+ HA_RWLOCK_INIT(&ts->lock);
+ return ts;
+}
+
+/*
+ * Trash oldest <to_batch> sticky sessions from table <t>
+ * Returns number of trashed sticky sessions. It may actually trash less
+ * than expected if finding these requires too long a search time (e.g.
+ * most of them have ts->ref_cnt>0).
+ */
+int __stktable_trash_oldest(struct stktable *t, int to_batch)
+{
+ struct stksess *ts;
+ struct eb32_node *eb;
+ int max_search = to_batch * 2; // no more than 50% misses
+ int batched = 0;
+ int looped = 0;
+
+ eb = eb32_lookup_ge(&t->exps, now_ms - TIMER_LOOK_BACK);
+
+ while (batched < to_batch) {
+
+ if (unlikely(!eb)) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now if we
+ * have not yet visited it.
+ */
+ if (looped)
+ break;
+ looped = 1;
+ eb = eb32_first(&t->exps);
+ if (likely(!eb))
+ break;
+ }
+
+ if (--max_search < 0)
+ break;
+
+ /* timer looks expired, detach it from the queue */
+ ts = eb32_entry(eb, struct stksess, exp);
+ eb = eb32_next(eb);
+
+ /* don't delete an entry which is currently referenced */
+ if (ts->ref_cnt)
+ continue;
+
+ eb32_delete(&ts->exp);
+
+ if (ts->expire != ts->exp.key) {
+ if (!tick_isset(ts->expire))
+ continue;
+
+ ts->exp.key = ts->expire;
+ eb32_insert(&t->exps, &ts->exp);
+
+ /* the update might have jumped beyond the next element,
+ * possibly causing a wrapping. We need to check whether
+ * the next element should be used instead. If the next
+ * element doesn't exist it means we're on the right
+ * side and have to check the first one then. If it
+ * exists and is closer, we must use it, otherwise we
+ * use the current one.
+ */
+ if (!eb)
+ eb = eb32_first(&t->exps);
+
+ if (!eb || tick_is_lt(ts->exp.key, eb->key))
+ eb = &ts->exp;
+
+ continue;
+ }
+
+ /* session expired, trash it */
+ ebmb_delete(&ts->key);
+ eb32_delete(&ts->upd);
+ __stksess_free(t, ts);
+ batched++;
+ }
+
+ return batched;
+}
+
+/*
+ * Trash oldest <to_batch> sticky sessions from table <t>
+ * Returns number of trashed sticky sessions.
+ * This function locks the table
+ */
+int stktable_trash_oldest(struct stktable *t, int to_batch)
+{
+ int ret;
+
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &t->lock);
+ ret = __stktable_trash_oldest(t, to_batch);
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &t->lock);
+
+ return ret;
+}
+/*
+ * Allocate and initialise a new sticky session.
+ * The new sticky session is returned or NULL in case of lack of memory.
+ * Sticky sessions should only be allocated this way, and must be freed using
+ * stksess_free(). Table <t>'s sticky session counter is increased. If <key>
+ * is not NULL, it is assigned to the new session.
+ */
+struct stksess *__stksess_new(struct stktable *t, struct stktable_key *key)
+{
+ struct stksess *ts;
+
+ if (unlikely(t->current == t->size)) {
+ if ( t->nopurge )
+ return NULL;
+
+ if (!__stktable_trash_oldest(t, (t->size >> 8) + 1))
+ return NULL;
+ }
+
+ ts = pool_alloc(t->pool);
+ if (ts) {
+ t->current++;
+ ts = (void *)ts + round_ptr_size(t->data_size);
+ __stksess_init(t, ts);
+ if (key)
+ stksess_setkey(t, ts, key);
+ }
+
+ return ts;
+}
+/*
+ * Allocate and initialise a new sticky session.
+ * The new sticky session is returned or NULL in case of lack of memory.
+ * Sticky sessions should only be allocated this way, and must be freed using
+ * stksess_free(). Table <t>'s sticky session counter is increased. If <key>
+ * is not NULL, it is assigned to the new session.
+ * This function locks the table
+ */
+struct stksess *stksess_new(struct stktable *t, struct stktable_key *key)
+{
+ struct stksess *ts;
+
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &t->lock);
+ ts = __stksess_new(t, key);
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &t->lock);
+
+ return ts;
+}
+
+/*
+ * Looks in table <t> for a sticky session matching key <key>.
+ * Returns pointer on requested sticky session or NULL if none was found.
+ */
+struct stksess *__stktable_lookup_key(struct stktable *t, struct stktable_key *key)
+{
+ struct ebmb_node *eb;
+
+ if (t->type == SMP_T_STR)
+ eb = ebst_lookup_len(&t->keys, key->key, key->key_len+1 < t->key_size ? key->key_len : t->key_size-1);
+ else
+ eb = ebmb_lookup(&t->keys, key->key, t->key_size);
+
+ if (unlikely(!eb)) {
+ /* no session found */
+ return NULL;
+ }
+
+ return ebmb_entry(eb, struct stksess, key);
+}
+
+/*
+ * Looks in table <t> for a sticky session matching key <key>.
+ * Returns pointer on requested sticky session or NULL if none was found.
+ * The refcount of the found entry is increased and this function
+ * is protected using the table lock
+ */
+struct stksess *stktable_lookup_key(struct stktable *t, struct stktable_key *key)
+{
+ struct stksess *ts;
+
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &t->lock);
+ ts = __stktable_lookup_key(t, key);
+ if (ts)
+ ts->ref_cnt++;
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &t->lock);
+
+ return ts;
+}
+
+/*
+ * Looks in table <t> for a sticky session with same key as <ts>.
+ * Returns pointer on requested sticky session or NULL if none was found.
+ */
+struct stksess *__stktable_lookup(struct stktable *t, struct stksess *ts)
+{
+ struct ebmb_node *eb;
+
+ if (t->type == SMP_T_STR)
+ eb = ebst_lookup(&(t->keys), (char *)ts->key.key);
+ else
+ eb = ebmb_lookup(&(t->keys), ts->key.key, t->key_size);
+
+ if (unlikely(!eb))
+ return NULL;
+
+ return ebmb_entry(eb, struct stksess, key);
+}
+
+/*
+ * Looks in table <t> for a sticky session with same key as <ts>.
+ * Returns pointer on requested sticky session or NULL if none was found.
+ * The refcount of the found entry is increased and this function
+ * is protected using the table lock
+ */
+struct stksess *stktable_lookup(struct stktable *t, struct stksess *ts)
+{
+ struct stksess *lts;
+
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &t->lock);
+ lts = __stktable_lookup(t, ts);
+ if (lts)
+ lts->ref_cnt++;
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &t->lock);
+
+ return lts;
+}
+
+/* Update the expiration timer for <ts> but do not touch its expiration node.
+ * The table's expiration timer is updated if set.
+ * The node will be also inserted into the update tree if needed, at a position
+ * depending if the update is a local or coming from a remote node
+ */
+void __stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, int expire)
+{
+ struct eb32_node * eb;
+ ts->expire = expire;
+ if (t->expire) {
+ t->exp_task->expire = t->exp_next = tick_first(ts->expire, t->exp_next);
+ task_queue(t->exp_task);
+ }
+
+ /* If sync is enabled */
+ if (t->sync_task) {
+ if (local) {
+ /* If this entry is not in the tree
+ or not scheduled for at least one peer */
+ if (!ts->upd.node.leaf_p
+ || (int)(t->commitupdate - ts->upd.key) >= 0
+ || (int)(ts->upd.key - t->localupdate) >= 0) {
+ ts->upd.key = ++t->update;
+ t->localupdate = t->update;
+ eb32_delete(&ts->upd);
+ eb = eb32_insert(&t->updates, &ts->upd);
+ if (eb != &ts->upd) {
+ eb32_delete(eb);
+ eb32_insert(&t->updates, &ts->upd);
+ }
+ }
+ task_wakeup(t->sync_task, TASK_WOKEN_MSG);
+ }
+ else {
+ /* If this entry is not in the tree */
+ if (!ts->upd.node.leaf_p) {
+ ts->upd.key= (++t->update)+(2147483648U);
+ eb = eb32_insert(&t->updates, &ts->upd);
+ if (eb != &ts->upd) {
+ eb32_delete(eb);
+ eb32_insert(&t->updates, &ts->upd);
+ }
+ }
+ }
+ }
+}
+
+/* Update the expiration timer for <ts> but do not touch its expiration node.
+ * The table's expiration timer is updated using the date of expiration coming from
+ * <t> stick-table configuration.
+ * The node will be also inserted into the update tree if needed, at a position
+ * considering the update is coming from a remote node
+ */
+void stktable_touch_remote(struct stktable *t, struct stksess *ts, int decrefcnt)
+{
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &t->lock);
+ __stktable_touch_with_exp(t, ts, 0, ts->expire);
+ if (decrefcnt)
+ ts->ref_cnt--;
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &t->lock);
+}
+
+/* Update the expiration timer for <ts> but do not touch its expiration node.
+ * The table's expiration timer is updated using the date of expiration coming from
+ * <t> stick-table configuration.
+ * The node will be also inserted into the update tree if needed, at a position
+ * considering the update was made locally
+ */
+void stktable_touch_local(struct stktable *t, struct stksess *ts, int decrefcnt)
+{
+ int expire = tick_add(now_ms, MS_TO_TICKS(t->expire));
+
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &t->lock);
+ __stktable_touch_with_exp(t, ts, 1, expire);
+ if (decrefcnt)
+ ts->ref_cnt--;
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &t->lock);
+}
+/* Just decrease the ref_cnt of the current session. Does nothing if <ts> is NULL */
+static void stktable_release(struct stktable *t, struct stksess *ts)
+{
+ if (!ts)
+ return;
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &t->lock);
+ ts->ref_cnt--;
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &t->lock);
+}
+
+/* Insert new sticky session <ts> in the table. It is assumed that it does not
+ * yet exist (the caller must check this). The table's timeout is updated if it
+ * is set. <ts> is returned.
+ */
+void __stktable_store(struct stktable *t, struct stksess *ts)
+{
+
+ ebmb_insert(&t->keys, &ts->key, t->key_size);
+ ts->exp.key = ts->expire;
+ eb32_insert(&t->exps, &ts->exp);
+ if (t->expire) {
+ t->exp_task->expire = t->exp_next = tick_first(ts->expire, t->exp_next);
+ task_queue(t->exp_task);
+ }
+}
+
+/* Returns a valid or initialized stksess for the specified stktable_key in the
+ * specified table, or NULL if the key was NULL, or if no entry was found nor
+ * could be created. The entry's expiration is updated.
+ */
+struct stksess *__stktable_get_entry(struct stktable *table, struct stktable_key *key)
+{
+ struct stksess *ts;
+
+ if (!key)
+ return NULL;
+
+ ts = __stktable_lookup_key(table, key);
+ if (ts == NULL) {
+ /* entry does not exist, initialize a new one */
+ ts = __stksess_new(table, key);
+ if (!ts)
+ return NULL;
+ __stktable_store(table, ts);
+ }
+ return ts;
+}
+/* Returns a valid or initialized stksess for the specified stktable_key in the
+ * specified table, or NULL if the key was NULL, or if no entry was found nor
+ * could be created. The entry's expiration is updated.
+ * This function locks the table, and the refcount of the entry is increased.
+ */
+struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key *key)
+{
+ struct stksess *ts;
+
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &table->lock);
+ ts = __stktable_get_entry(table, key);
+ if (ts)
+ ts->ref_cnt++;
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &table->lock);
+
+ return ts;
+}
+
+/* Lookup for an entry with the same key and store the submitted
+ * stksess if not found.
+ */
+struct stksess *__stktable_set_entry(struct stktable *table, struct stksess *nts)
+{
+ struct stksess *ts;
+
+ ts = __stktable_lookup(table, nts);
+ if (ts == NULL) {
+ ts = nts;
+ __stktable_store(table, ts);
+ }
+ return ts;
+}
+
+/* Lookup for an entry with the same key and store the submitted
+ * stksess if not found.
+ * This function locks the table, and the refcount of the entry is increased.
+ */
+struct stksess *stktable_set_entry(struct stktable *table, struct stksess *nts)
+{
+ struct stksess *ts;
+
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &table->lock);
+ ts = __stktable_set_entry(table, nts);
+ ts->ref_cnt++;
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &table->lock);
+
+ return ts;
+}
+/*
+ * Task processing function to trash expired sticky sessions. A pointer to the
+ * task itself is returned since it never dies.
+ */
+struct task *process_table_expire(struct task *task, void *context, unsigned int state)
+{
+ struct stktable *t = context;
+ struct stksess *ts;
+ struct eb32_node *eb;
+ int looped = 0;
+
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &t->lock);
+ eb = eb32_lookup_ge(&t->exps, now_ms - TIMER_LOOK_BACK);
+
+ while (1) {
+ if (unlikely(!eb)) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now if we
+ * have not yet visited it.
+ */
+ if (looped)
+ break;
+ looped = 1;
+ eb = eb32_first(&t->exps);
+ if (likely(!eb))
+ break;
+ }
+
+ if (likely(tick_is_lt(now_ms, eb->key))) {
+ /* timer not expired yet, revisit it later */
+ t->exp_next = eb->key;
+ goto out_unlock;
+ }
+
+ /* timer looks expired, detach it from the queue */
+ ts = eb32_entry(eb, struct stksess, exp);
+ eb = eb32_next(eb);
+
+ /* don't delete an entry which is currently referenced */
+ if (ts->ref_cnt)
+ continue;
+
+ eb32_delete(&ts->exp);
+
+ if (!tick_is_expired(ts->expire, now_ms)) {
+ if (!tick_isset(ts->expire))
+ continue;
+
+ ts->exp.key = ts->expire;
+ eb32_insert(&t->exps, &ts->exp);
+
+ /* the update might have jumped beyond the next element,
+ * possibly causing a wrapping. We need to check whether
+ * the next element should be used instead. If the next
+ * element doesn't exist it means we're on the right
+ * side and have to check the first one then. If it
+ * exists and is closer, we must use it, otherwise we
+ * use the current one.
+ */
+ if (!eb)
+ eb = eb32_first(&t->exps);
+
+ if (!eb || tick_is_lt(ts->exp.key, eb->key))
+ eb = &ts->exp;
+ continue;
+ }
+
+ /* session expired, trash it */
+ ebmb_delete(&ts->key);
+ eb32_delete(&ts->upd);
+ __stksess_free(t, ts);
+ }
+
+ /* We have found no task to expire in any tree */
+ t->exp_next = TICK_ETERNITY;
+out_unlock:
+ task->expire = t->exp_next;
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &t->lock);
+ return task;
+}
+
+/* Perform minimal stick table intializations, report 0 in case of error, 1 if OK. */
+int stktable_init(struct stktable *t)
+{
+ int peers_retval = 0;
+ if (t->size) {
+ t->keys = EB_ROOT_UNIQUE;
+ memset(&t->exps, 0, sizeof(t->exps));
+ t->updates = EB_ROOT_UNIQUE;
+ HA_SPIN_INIT(&t->lock);
+
+ t->pool = create_pool("sticktables", sizeof(struct stksess) + round_ptr_size(t->data_size) + t->key_size, MEM_F_SHARED);
+
+ t->exp_next = TICK_ETERNITY;
+ if ( t->expire ) {
+ t->exp_task = task_new_anywhere();
+ if (!t->exp_task)
+ return 0;
+ t->exp_task->process = process_table_expire;
+ t->exp_task->context = (void *)t;
+ }
+ if (t->peers.p && t->peers.p->peers_fe && !(t->peers.p->peers_fe->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ peers_retval = peers_register_table(t->peers.p, t);
+ }
+
+ return (t->pool != NULL) && !peers_retval;
+ }
+ return 1;
+}
+
+/*
+ * Configuration keywords of known table types
+ */
+struct stktable_type stktable_types[SMP_TYPES] = {
+ [SMP_T_SINT] = { "integer", 0, 4 },
+ [SMP_T_IPV4] = { "ip", 0, 4 },
+ [SMP_T_IPV6] = { "ipv6", 0, 16 },
+ [SMP_T_STR] = { "string", STK_F_CUSTOM_KEYSIZE, 32 },
+ [SMP_T_BIN] = { "binary", STK_F_CUSTOM_KEYSIZE, 32 }
+};
+
+/*
+ * Parse table type configuration.
+ * Returns 0 on successful parsing, else 1.
+ * <myidx> is set at next configuration <args> index.
+ */
+int stktable_parse_type(char **args, int *myidx, unsigned long *type, size_t *key_size)
+{
+ for (*type = 0; *type < SMP_TYPES; (*type)++) {
+ if (!stktable_types[*type].kw)
+ continue;
+ if (strcmp(args[*myidx], stktable_types[*type].kw) != 0)
+ continue;
+
+ *key_size = stktable_types[*type].default_size;
+ (*myidx)++;
+
+ if (stktable_types[*type].flags & STK_F_CUSTOM_KEYSIZE) {
+ if (strcmp("len", args[*myidx]) == 0) {
+ (*myidx)++;
+ *key_size = atol(args[*myidx]);
+ if (!*key_size)
+ break;
+ if (*type == SMP_T_STR) {
+ /* null terminated string needs +1 for '\0'. */
+ (*key_size)++;
+ }
+ (*myidx)++;
+ }
+ }
+ return 0;
+ }
+ return 1;
+}
+
+/* reserve some space for data type <type>, there is 2 optionnals
+ * argument at <sa> and <sa2> to configure this data type and
+ * they can be NULL if unused for a given type.
+ * Returns PE_NONE (0) if OK or an error code among :
+ * - PE_ENUM_OOR if <type> does not exist
+ * - PE_EXIST if <type> is already registered
+ * - PE_ARG_NOT_USE if <sa>/<sa2> was provided but not expected
+ * - PE_ARG_MISSING if <sa>/<sa2> was expected but not provided
+ * - PE_ARG_VALUE_OOR if type is an array and <sa> it out of array size range.
+ */
+int stktable_alloc_data_type(struct stktable *t, int type, const char *sa, const char *sa2)
+
+{
+ if (type >= STKTABLE_DATA_TYPES)
+ return PE_ENUM_OOR;
+
+ if (t->data_ofs[type])
+ /* already allocated */
+ return PE_EXIST;
+
+ t->data_nbelem[type] = 1;
+ if (stktable_data_types[type].is_array) {
+ /* arrays take their element count on first argument */
+ if (!sa)
+ return PE_ARG_MISSING;
+ t->data_nbelem[type] = atoi(sa);
+ if (!t->data_nbelem[type] || (t->data_nbelem[type] > STKTABLE_MAX_DT_ARRAY_SIZE))
+ return PE_ARG_VALUE_OOR;
+ sa = sa2;
+ }
+
+ switch (stktable_data_types[type].arg_type) {
+ case ARG_T_NONE:
+ if (sa)
+ return PE_ARG_NOT_USED;
+ break;
+ case ARG_T_INT:
+ if (!sa)
+ return PE_ARG_MISSING;
+ t->data_arg[type].i = atoi(sa);
+ break;
+ case ARG_T_DELAY:
+ if (!sa)
+ return PE_ARG_MISSING;
+ sa = parse_time_err(sa, &t->data_arg[type].u, TIME_UNIT_MS);
+ if (sa)
+ return PE_ARG_INVC; /* invalid char */
+ break;
+ }
+
+ t->data_size += t->data_nbelem[type] * stktable_type_size(stktable_data_types[type].std_type);
+ t->data_ofs[type] = -t->data_size;
+ return PE_NONE;
+}
+
+/*
+ * Parse a line with <linenum> as number in <file> configuration file to configure
+ * the stick-table with <t> as address and <id> as ID.
+ * <peers> provides the "peers" section pointer only if this function is called
+ * from a "peers" section.
+ * <nid> is the stick-table name which is sent over the network. It must be equal
+ * to <id> if this stick-table is parsed from a proxy section, and prefixed by <peers>
+ * "peers" section name followed by a '/' character if parsed from a "peers" section.
+ * This is the responsibility of the caller to check this.
+ * Return an error status with ERR_* flags set if required, 0 if no error was encountered.
+ */
+int parse_stick_table(const char *file, int linenum, char **args,
+ struct stktable *t, char *id, char *nid, struct peers *peers)
+{
+ int err_code = 0;
+ int idx = 1;
+ unsigned int val;
+
+ if (!id || !*id) {
+ ha_alert("parsing [%s:%d] : %s: ID not provided.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* Store the "peers" section if this function is called from a "peers" section. */
+ if (peers) {
+ t->peers.p = peers;
+ idx++;
+ }
+
+ t->id = id;
+ t->nid = nid;
+ t->type = (unsigned int)-1;
+ t->conf.file = file;
+ t->conf.line = linenum;
+
+ while (*args[idx]) {
+ const char *err;
+
+ if (strcmp(args[idx], "size") == 0) {
+ idx++;
+ if (!*(args[idx])) {
+ ha_alert("parsing [%s:%d] : %s: missing argument after '%s'.\n",
+ file, linenum, args[0], args[idx-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if ((err = parse_size_err(args[idx], &t->size))) {
+ ha_alert("parsing [%s:%d] : %s: unexpected character '%c' in argument of '%s'.\n",
+ file, linenum, args[0], *err, args[idx-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ idx++;
+ }
+ /* This argument does not exit in "peers" section. */
+ else if (!peers && strcmp(args[idx], "peers") == 0) {
+ idx++;
+ if (!*(args[idx])) {
+ ha_alert("parsing [%s:%d] : %s: missing argument after '%s'.\n",
+ file, linenum, args[0], args[idx-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ t->peers.name = strdup(args[idx++]);
+ }
+ else if (strcmp(args[idx], "expire") == 0) {
+ idx++;
+ if (!*(args[idx])) {
+ ha_alert("parsing [%s:%d] : %s: missing argument after '%s'.\n",
+ file, linenum, args[0], args[idx-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ err = parse_time_err(args[idx], &val, TIME_UNIT_MS);
+ if (err == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: %s: timer overflow in argument <%s> to <%s>, maximum value is 2147483647 ms (~24.8 days).\n",
+ file, linenum, args[0], args[idx], args[idx-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (err == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: %s: timer underflow in argument <%s> to <%s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[0], args[idx], args[idx-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (err) {
+ ha_alert("parsing [%s:%d] : %s: unexpected character '%c' in argument of '%s'.\n",
+ file, linenum, args[0], *err, args[idx-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ t->expire = val;
+ idx++;
+ }
+ else if (strcmp(args[idx], "nopurge") == 0) {
+ t->nopurge = 1;
+ idx++;
+ }
+ else if (strcmp(args[idx], "type") == 0) {
+ idx++;
+ if (stktable_parse_type(args, &idx, &t->type, &t->key_size) != 0) {
+ ha_alert("parsing [%s:%d] : %s: unknown type '%s'.\n",
+ file, linenum, args[0], args[idx]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ /* idx already points to next arg */
+ }
+ else if (strcmp(args[idx], "store") == 0) {
+ int type, err;
+ char *cw, *nw, *sa, *sa2;
+
+ idx++;
+ nw = args[idx];
+ while (*nw) {
+ /* the "store" keyword supports a comma-separated list */
+ cw = nw;
+ sa = NULL; /* store arg */
+ sa2 = NULL;
+ while (*nw && *nw != ',') {
+ if (*nw == '(') {
+ *nw = 0;
+ sa = ++nw;
+ while (*nw != ')') {
+ if (!*nw) {
+ ha_alert("parsing [%s:%d] : %s: missing closing parenthesis after store option '%s'.\n",
+ file, linenum, args[0], cw);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (*nw == ',') {
+ *nw = '\0';
+ sa2 = nw + 1;
+ }
+ nw++;
+ }
+ *nw = '\0';
+ }
+ nw++;
+ }
+ if (*nw)
+ *nw++ = '\0';
+ type = stktable_get_data_type(cw);
+ if (type < 0) {
+ ha_alert("parsing [%s:%d] : %s: unknown store option '%s'.\n",
+ file, linenum, args[0], cw);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = stktable_alloc_data_type(t, type, sa, sa2);
+ switch (err) {
+ case PE_NONE: break;
+ case PE_EXIST:
+ ha_warning("parsing [%s:%d]: %s: store option '%s' already enabled, ignored.\n",
+ file, linenum, args[0], cw);
+ err_code |= ERR_WARN;
+ break;
+
+ case PE_ARG_MISSING:
+ ha_alert("parsing [%s:%d] : %s: missing argument to store option '%s'.\n",
+ file, linenum, args[0], cw);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+
+ case PE_ARG_NOT_USED:
+ ha_alert("parsing [%s:%d] : %s: unexpected argument to store option '%s'.\n",
+ file, linenum, args[0], cw);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ case PE_ARG_VALUE_OOR:
+ ha_alert("parsing [%s:%d] : %s: array size is out of allowed range (1-%d) for store option '%s'.\n",
+ file, linenum, args[0], STKTABLE_MAX_DT_ARRAY_SIZE, cw);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+
+ default:
+ ha_alert("parsing [%s:%d] : %s: error when processing store option '%s'.\n",
+ file, linenum, args[0], cw);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ idx++;
+ if (t->data_ofs[STKTABLE_DT_GPT] && t->data_ofs[STKTABLE_DT_GPT0]) {
+ ha_alert("parsing [%s:%d] : %s: simultaneous usage of 'gpt' and 'gpt0' in a same table is not permitted as 'gpt' overrides 'gpt0'.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (t->data_ofs[STKTABLE_DT_GPC] && (t->data_ofs[STKTABLE_DT_GPC0] || t->data_ofs[STKTABLE_DT_GPC1])) {
+ ha_alert("parsing [%s:%d] : %s: simultaneous usage of 'gpc' and 'gpc[0/1]' in a same table is not permitted as 'gpc' overrides 'gpc[0/1]'.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (t->data_ofs[STKTABLE_DT_GPC_RATE] && (t->data_ofs[STKTABLE_DT_GPC0_RATE] || t->data_ofs[STKTABLE_DT_GPC1_RATE])) {
+ ha_alert("parsing [%s:%d] : %s: simultaneous usage of 'gpc_rate' and 'gpc[0/1]_rate' in a same table is not permitted as 'gpc_rate' overrides 'gpc[0/1]_rate'.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[idx], "srvkey") == 0) {
+ char *keytype;
+ idx++;
+ keytype = args[idx];
+ if (strcmp(keytype, "name") == 0) {
+ t->server_key_type = STKTABLE_SRV_NAME;
+ }
+ else if (strcmp(keytype, "addr") == 0) {
+ t->server_key_type = STKTABLE_SRV_ADDR;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : %s : unknown server key type '%s'.\n",
+ file, linenum, args[0], keytype);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+
+ }
+ idx++;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : %s: unknown argument '%s'.\n",
+ file, linenum, args[0], args[idx]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ if (!t->size) {
+ ha_alert("parsing [%s:%d] : %s: missing size.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (t->type == (unsigned int)-1) {
+ ha_alert("parsing [%s:%d] : %s: missing type.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ out:
+ return err_code;
+}
+
+/* Prepares a stktable_key from a sample <smp> to search into table <t>.
+ * Note that the sample *is* modified and that the returned key may point
+ * to it, so the sample must not be modified afterwards before the lookup.
+ * Returns NULL if the sample could not be converted (eg: no matching type),
+ * otherwise a pointer to the static stktable_key filled with what is needed
+ * for the lookup.
+ */
+struct stktable_key *smp_to_stkey(struct sample *smp, struct stktable *t)
+{
+ /* Convert sample. */
+ if (!sample_convert(smp, t->type))
+ return NULL;
+
+ /* Fill static_table_key. */
+ switch (t->type) {
+
+ case SMP_T_IPV4:
+ static_table_key.key = &smp->data.u.ipv4;
+ static_table_key.key_len = 4;
+ break;
+
+ case SMP_T_IPV6:
+ static_table_key.key = &smp->data.u.ipv6;
+ static_table_key.key_len = 16;
+ break;
+
+ case SMP_T_SINT:
+ /* The stick table require a 32bit unsigned int, "sint" is a
+ * signed 64 it, so we can convert it inplace.
+ */
+ smp->data.u.sint = (unsigned int)smp->data.u.sint;
+ static_table_key.key = &smp->data.u.sint;
+ static_table_key.key_len = 4;
+ break;
+
+ case SMP_T_STR:
+ if (!smp_make_safe(smp))
+ return NULL;
+ static_table_key.key = smp->data.u.str.area;
+ static_table_key.key_len = smp->data.u.str.data;
+ break;
+
+ case SMP_T_BIN:
+ if (smp->data.u.str.data < t->key_size) {
+ /* This type needs padding with 0. */
+ if (!smp_make_rw(smp))
+ return NULL;
+
+ if (smp->data.u.str.size < t->key_size)
+ if (!smp_dup(smp))
+ return NULL;
+ if (smp->data.u.str.size < t->key_size)
+ return NULL;
+ memset(smp->data.u.str.area + smp->data.u.str.data, 0,
+ t->key_size - smp->data.u.str.data);
+ smp->data.u.str.data = t->key_size;
+ }
+ static_table_key.key = smp->data.u.str.area;
+ static_table_key.key_len = smp->data.u.str.data;
+ break;
+
+ default: /* impossible case. */
+ return NULL;
+ }
+
+ return &static_table_key;
+}
+
+/*
+ * Process a fetch + format conversion as defined by the sample expression <expr>
+ * on request or response considering the <opt> parameter. Returns either NULL if
+ * no key could be extracted, or a pointer to the converted result stored in
+ * static_table_key in format <table_type>. If <smp> is not NULL, it will be reset
+ * and its flags will be initialized so that the caller gets a copy of the input
+ * sample, and knows why it was not accepted (eg: SMP_F_MAY_CHANGE is present
+ * without SMP_OPT_FINAL). The output will be usable like this :
+ *
+ * return MAY_CHANGE FINAL Meaning for the sample
+ * NULL 0 * Not present and will never be (eg: header)
+ * NULL 1 0 Not present or unstable, could change (eg: req_len)
+ * NULL 1 1 Not present, will not change anymore
+ * smp 0 * Present and will not change (eg: header)
+ * smp 1 0 not possible
+ * smp 1 1 Present, last known value (eg: request length)
+ */
+struct stktable_key *stktable_fetch_key(struct stktable *t, struct proxy *px, struct session *sess, struct stream *strm,
+ unsigned int opt, struct sample_expr *expr, struct sample *smp)
+{
+ if (smp)
+ memset(smp, 0, sizeof(*smp));
+
+ smp = sample_process(px, sess, strm, opt, expr, smp);
+ if (!smp)
+ return NULL;
+
+ if ((smp->flags & SMP_F_MAY_CHANGE) && !(opt & SMP_OPT_FINAL))
+ return NULL; /* we can only use stable samples */
+
+ return smp_to_stkey(smp, t);
+}
+
+/*
+ * Returns 1 if sample expression <expr> result can be converted to table key of
+ * type <table_type>, otherwise zero. Used in configuration check.
+ */
+int stktable_compatible_sample(struct sample_expr *expr, unsigned long table_type)
+{
+ int out_type;
+
+ if (table_type >= SMP_TYPES || !stktable_types[table_type].kw)
+ return 0;
+
+ out_type = smp_expr_output_type(expr);
+
+ /* Convert sample. */
+ if (!sample_casts[out_type][table_type])
+ return 0;
+
+ return 1;
+}
+
+/* Extra data types processing : after the last one, some room may remain
+ * before STKTABLE_DATA_TYPES that may be used to register extra data types
+ * at run time.
+ */
+struct stktable_data_type stktable_data_types[STKTABLE_DATA_TYPES] = {
+ [STKTABLE_DT_SERVER_ID] = { .name = "server_id", .std_type = STD_T_SINT },
+ [STKTABLE_DT_GPT0] = { .name = "gpt0", .std_type = STD_T_UINT },
+ [STKTABLE_DT_GPC0] = { .name = "gpc0", .std_type = STD_T_UINT },
+ [STKTABLE_DT_GPC0_RATE] = { .name = "gpc0_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_CONN_CNT] = { .name = "conn_cnt", .std_type = STD_T_UINT },
+ [STKTABLE_DT_CONN_RATE] = { .name = "conn_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_CONN_CUR] = { .name = "conn_cur", .std_type = STD_T_UINT, .is_local = 1 },
+ [STKTABLE_DT_SESS_CNT] = { .name = "sess_cnt", .std_type = STD_T_UINT },
+ [STKTABLE_DT_SESS_RATE] = { .name = "sess_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_HTTP_REQ_CNT] = { .name = "http_req_cnt", .std_type = STD_T_UINT },
+ [STKTABLE_DT_HTTP_REQ_RATE] = { .name = "http_req_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_HTTP_ERR_CNT] = { .name = "http_err_cnt", .std_type = STD_T_UINT },
+ [STKTABLE_DT_HTTP_ERR_RATE] = { .name = "http_err_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_BYTES_IN_CNT] = { .name = "bytes_in_cnt", .std_type = STD_T_ULL },
+ [STKTABLE_DT_BYTES_IN_RATE] = { .name = "bytes_in_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_BYTES_OUT_CNT] = { .name = "bytes_out_cnt", .std_type = STD_T_ULL },
+ [STKTABLE_DT_BYTES_OUT_RATE]= { .name = "bytes_out_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_GPC1] = { .name = "gpc1", .std_type = STD_T_UINT },
+ [STKTABLE_DT_GPC1_RATE] = { .name = "gpc1_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_SERVER_KEY] = { .name = "server_key", .std_type = STD_T_DICT },
+ [STKTABLE_DT_HTTP_FAIL_CNT] = { .name = "http_fail_cnt", .std_type = STD_T_UINT },
+ [STKTABLE_DT_HTTP_FAIL_RATE]= { .name = "http_fail_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_GPT] = { .name = "gpt", .std_type = STD_T_UINT, .is_array = 1 },
+ [STKTABLE_DT_GPC] = { .name = "gpc", .std_type = STD_T_UINT, .is_array = 1 },
+ [STKTABLE_DT_GPC_RATE] = { .name = "gpc_rate", .std_type = STD_T_FRQP, .is_array = 1, .arg_type = ARG_T_DELAY },
+};
+
+/* Registers stick-table extra data type with index <idx>, name <name>, type
+ * <std_type> and arg type <arg_type>. If the index is negative, the next free
+ * index is automatically allocated. The allocated index is returned, or -1 if
+ * no free index was found or <name> was already registered. The <name> is used
+ * directly as a pointer, so if it's not stable, the caller must allocate it.
+ */
+int stktable_register_data_store(int idx, const char *name, int std_type, int arg_type)
+{
+ if (idx < 0) {
+ for (idx = 0; idx < STKTABLE_DATA_TYPES; idx++) {
+ if (!stktable_data_types[idx].name)
+ break;
+
+ if (strcmp(stktable_data_types[idx].name, name) == 0)
+ return -1;
+ }
+ }
+
+ if (idx >= STKTABLE_DATA_TYPES)
+ return -1;
+
+ if (stktable_data_types[idx].name != NULL)
+ return -1;
+
+ stktable_data_types[idx].name = name;
+ stktable_data_types[idx].std_type = std_type;
+ stktable_data_types[idx].arg_type = arg_type;
+ return idx;
+}
+
+/*
+ * Returns the data type number for the stktable_data_type whose name is <name>,
+ * or <0 if not found.
+ */
+int stktable_get_data_type(char *name)
+{
+ int type;
+
+ for (type = 0; type < STKTABLE_DATA_TYPES; type++) {
+ if (!stktable_data_types[type].name)
+ continue;
+ if (strcmp(name, stktable_data_types[type].name) == 0)
+ return type;
+ }
+ /* For backwards compatibility */
+ if (strcmp(name, "server_name") == 0)
+ return STKTABLE_DT_SERVER_KEY;
+ return -1;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns true if found, false otherwise. The input
+ * type is STR so that input samples are converted to string (since all types
+ * can be converted to strings), then the function casts the string again into
+ * the table's type. This is a double conversion, but in the future we might
+ * support automatic input types to perform the cast on the fly.
+ */
+static int sample_conv_in_table(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = !!ts;
+ smp->flags = SMP_F_VOL_TEST;
+ stktable_release(t, ts);
+ return 1;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the data rate received from clients in bytes/s
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_bytes_in_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_BYTES_IN_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_BYTES_IN_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the cumulated number of connections for the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_conn_cnt(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_CONN_CNT);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the number of concurrent connections for the
+ * key if the key is present in the table, otherwise zero, so that comparisons
+ * can be easily performed. If the inspected parameter is not stored in the
+ * table, <not found> is returned.
+ */
+static int sample_conv_table_conn_cur(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_CONN_CUR);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the rate of incoming connections from the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_conn_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_CONN_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_CONN_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the expiration delay for the key if the key is
+ * present in the table, otherwise the default value provided as second argument
+ * if any, if not (no default value), <not found> is returned.
+ */
+static int sample_conv_table_expire(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) { /* key not present */
+ if (arg_p[1].type == ARGT_STOP)
+ return 0;
+
+ /* default value */
+ smp->data.u.sint = arg_p[1].data.sint;
+ return 1;
+ }
+
+ smp->data.u.sint = tick_remain(now_ms, ts->expire);
+
+ stktable_release(t, ts);
+ return 1;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the time the key remains unused if the key is
+ * present in the table, otherwise the default value provided as second argument
+ * if any, if not (no default value), <not found> is returned.
+ */
+static int sample_conv_table_idle(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) { /* key not present */
+ if (arg_p[1].type == ARGT_STOP)
+ return 0;
+
+ /* default value */
+ smp->data.u.sint = arg_p[1].data.sint;
+ return 1;
+ }
+
+ smp->data.u.sint = tick_remain(tick_remain(now_ms, ts->expire), t->expire);
+
+ stktable_release(t, ts);
+ return 1;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the data rate sent to clients in bytes/s
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_bytes_out_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_BYTES_OUT_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_BYTES_OUT_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg_p(1), and looks
+ * it up into this table. Returns the value of the GPT[arg_p(0)] tag for the key
+ * if the key is present in the table, otherwise false, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_gpt(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+ unsigned int idx;
+
+ idx = arg_p[0].data.sint;
+
+ t = arg_p[1].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPT, idx);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the value of the GPT0 tag for the key
+ * if the key is present in the table, otherwise false, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_gpt0(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GPT0);
+ if (!ptr)
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPT, 0);
+
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg_p(1), and looks
+ * it up into this table. Returns the value of the GPC[arg_p(0)] counter for the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_gpc(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+ unsigned int idx;
+
+ idx = arg_p[0].data.sint;
+
+ t = arg_p[1].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPC, idx);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg_p(1), and looks
+ * it up into this table. Returns the event rate of the GPC[arg_p(0)] counter
+ * for the key if the key is present in the table, otherwise zero, so that
+ * comparisons can be easily performed. If the inspected parameter is not
+ * stored in the table, <not found> is returned.
+ */
+static int sample_conv_table_gpc_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+ unsigned int idx;
+
+ idx = arg_p[0].data.sint;
+
+ t = arg_p[1].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPC_RATE, idx);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_GPC_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the value of the GPC0 counter for the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_gpc0(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GPC0);
+ if (!ptr) {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPC, 0);
+ }
+
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the event rate of the GPC0 counter for the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_gpc0_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GPC0_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_GPC0_RATE].u);
+ else {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPC_RATE, 0);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_GPC_RATE].u);
+ }
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the value of the GPC1 counter for the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_gpc1(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GPC1);
+ if (!ptr) {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPC, 1);
+ }
+
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the event rate of the GPC1 counter for the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_gpc1_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GPC1_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_GPC1_RATE].u);
+ else {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPC_RATE, 1);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_GPC_RATE].u);
+ }
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the cumulated number of HTTP request errors
+ * for the key if the key is present in the table, otherwise zero, so that
+ * comparisons can be easily performed. If the inspected parameter is not stored
+ * in the table, <not found> is returned.
+ */
+static int sample_conv_table_http_err_cnt(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_ERR_CNT);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the HTTP request error rate the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_http_err_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_ERR_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_HTTP_ERR_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the cumulated number of HTTP response failures
+ * for the key if the key is present in the table, otherwise zero, so that
+ * comparisons can be easily performed. If the inspected parameter is not stored
+ * in the table, <not found> is returned.
+ */
+static int sample_conv_table_http_fail_cnt(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_FAIL_CNT);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the HTTP response failure rate for the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_http_fail_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_FAIL_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_HTTP_FAIL_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the cumulated number of HTTP request for the
+ * key if the key is present in the table, otherwise zero, so that comparisons
+ * can be easily performed. If the inspected parameter is not stored in the
+ * table, <not found> is returned.
+ */
+static int sample_conv_table_http_req_cnt(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_REQ_CNT);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the HTTP request rate the key if the key is
+ * present in the table, otherwise zero, so that comparisons can be easily
+ * performed. If the inspected parameter is not stored in the table, <not found>
+ * is returned.
+ */
+static int sample_conv_table_http_req_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_REQ_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_HTTP_REQ_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the volume of datareceived from clients in kbytes
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_kbytes_in(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_BYTES_IN_CNT);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_ull) >> 10;
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the volume of data sent to clients in kbytes
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_kbytes_out(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_BYTES_OUT_CNT);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_ull) >> 10;
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the server ID associated with the key if the
+ * key is present in the table, otherwise zero, so that comparisons can be
+ * easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_server_id(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_ID);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_sint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the cumulated number of sessions for the
+ * key if the key is present in the table, otherwise zero, so that comparisons
+ * can be easily performed. If the inspected parameter is not stored in the
+ * table, <not found> is returned.
+ */
+static int sample_conv_table_sess_cnt(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_SESS_CNT);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the session rate the key if the key is
+ * present in the table, otherwise zero, so that comparisons can be easily
+ * performed. If the inspected parameter is not stored in the table, <not found>
+ * is returned.
+ */
+static int sample_conv_table_sess_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_SESS_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_SESS_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the amount of concurrent connections tracking
+ * the same key if the key is present in the table, otherwise zero, so that
+ * comparisons can be easily performed. If the inspected parameter is not
+ * stored in the table, <not found> is returned.
+ */
+static int sample_conv_table_trackers(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts)
+ return 1;
+
+ smp->data.u.sint = ts->ref_cnt;
+
+ stktable_release(t, ts);
+ return 1;
+}
+
+/* This function increments the gpc counter at index 'rule->arg.gpc.idx' of the
+ * array on the tracksc counter of index 'rule->arg.gpc.sc' stored into the
+ * <stream> or directly in the session <sess> if <stream> is set to NULL
+ *
+ * This function always returns ACT_RET_CONT and parameter flags is unused.
+ */
+static enum act_return action_inc_gpc(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct stksess *ts;
+ struct stkctr *stkctr;
+
+ /* Extract the stksess, return OK if no stksess available. */
+ if (s)
+ stkctr = &s->stkctr[rule->arg.gpc.sc];
+ else
+ stkctr = &sess->stkctr[rule->arg.gpc.sc];
+
+ ts = stkctr_entry(stkctr);
+ if (ts) {
+ void *ptr1, *ptr2;
+
+ /* First, update gpc_rate if it's tracked. Second, update its gpc if tracked. */
+ ptr1 = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPC_RATE, rule->arg.gpc.idx);
+ ptr2 = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPC, rule->arg.gpc.idx);
+
+ if (ptr1 || ptr2) {
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ if (ptr1)
+ update_freq_ctr_period(&stktable_data_cast(ptr1, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u, 1);
+
+ if (ptr2)
+ stktable_data_cast(ptr2, std_t_uint)++;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, ts, 0);
+ }
+ }
+ return ACT_RET_CONT;
+}
+
+/* Same as action_inc_gpc() but for gpc0 only */
+static enum act_return action_inc_gpc0(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct stksess *ts;
+ struct stkctr *stkctr;
+ unsigned int period = 0;
+
+ /* Extract the stksess, return OK if no stksess available. */
+ if (s)
+ stkctr = &s->stkctr[rule->arg.gpc.sc];
+ else
+ stkctr = &sess->stkctr[rule->arg.gpc.sc];
+
+ ts = stkctr_entry(stkctr);
+ if (ts) {
+ void *ptr1, *ptr2;
+
+ /* First, update gpc0_rate if it's tracked. Second, update its gpc0 if tracked. */
+ ptr1 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_GPC0_RATE);
+ if (ptr1) {
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC0_RATE].u;
+ }
+ else {
+ /* fallback on the gpc array */
+ ptr1 = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPC_RATE, 0);
+ if (ptr1)
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u;
+ }
+
+ ptr2 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_GPC0);
+ if (!ptr2) {
+ /* fallback on the gpc array */
+ ptr2 = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPC, 0);
+ }
+
+ if (ptr1 || ptr2) {
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ if (ptr1)
+ update_freq_ctr_period(&stktable_data_cast(ptr1, std_t_frqp),
+ period, 1);
+
+ if (ptr2)
+ stktable_data_cast(ptr2, std_t_uint)++;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, ts, 0);
+ }
+ }
+ return ACT_RET_CONT;
+}
+
+/* Same as action_inc_gpc() but for gpc1 only */
+static enum act_return action_inc_gpc1(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct stksess *ts;
+ struct stkctr *stkctr;
+ unsigned int period = 0;
+
+ /* Extract the stksess, return OK if no stksess available. */
+ if (s)
+ stkctr = &s->stkctr[rule->arg.gpc.sc];
+ else
+ stkctr = &sess->stkctr[rule->arg.gpc.sc];
+
+ ts = stkctr_entry(stkctr);
+ if (ts) {
+ void *ptr1, *ptr2;
+
+ /* First, update gpc1_rate if it's tracked. Second, update its gpc1 if tracked. */
+ ptr1 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_GPC1_RATE);
+ if (ptr1) {
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC1_RATE].u;
+ }
+ else {
+ /* fallback on the gpc array */
+ ptr1 = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPC_RATE, 1);
+ if (ptr1)
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u;
+ }
+
+ ptr2 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_GPC1);
+ if (!ptr2) {
+ /* fallback on the gpc array */
+ ptr2 = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPC, 1);
+ }
+
+ if (ptr1 || ptr2) {
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ if (ptr1)
+ update_freq_ctr_period(&stktable_data_cast(ptr1, std_t_frqp),
+ period, 1);
+
+ if (ptr2)
+ stktable_data_cast(ptr2, std_t_uint)++;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, ts, 0);
+ }
+ }
+ return ACT_RET_CONT;
+}
+
+/* This function is a common parser for actions incrementing the GPC
+ * (General Purpose Counters). It understands the formats:
+ *
+ * sc-inc-gpc(<gpc IDX>,<track ID>)
+ * sc-inc-gpc0([<track ID>])
+ * sc-inc-gpc1([<track ID>])
+ *
+ * It returns ACT_RET_PRS_ERR if fails and <err> is filled with an error
+ * message. Otherwise it returns ACT_RET_PRS_OK.
+ */
+static enum act_parse_ret parse_inc_gpc(const char **args, int *arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ const char *cmd_name = args[*arg-1];
+ char *error;
+
+ cmd_name += strlen("sc-inc-gpc");
+ if (*cmd_name == '(') {
+ cmd_name++; /* skip the '(' */
+ rule->arg.gpc.idx = strtoul(cmd_name, &error, 10); /* Convert stick table id. */
+ if (*error != ',') {
+ memprintf(err, "Missing gpc ID '%s'. Expects sc-inc-gpc(<GPC ID>,<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+ else {
+ cmd_name = error + 1; /* skip the ',' */
+ rule->arg.gpc.sc = strtol(cmd_name, &error, 10); /* Convert stick table id. */
+ if (*error != ')') {
+ memprintf(err, "invalid stick table track ID '%s'. Expects sc-inc-gpc(<GPC ID>,<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (rule->arg.gpc.sc >= MAX_SESS_STKCTR) {
+ memprintf(err, "invalid stick table track ID '%s'. The max allowed ID is %d",
+ args[*arg-1], MAX_SESS_STKCTR-1);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+ rule->action_ptr = action_inc_gpc;
+ }
+ else if (*cmd_name == '0' ||*cmd_name == '1') {
+ char c = *cmd_name;
+
+ cmd_name++;
+ if (*cmd_name == '\0') {
+ /* default stick table id. */
+ rule->arg.gpc.sc = 0;
+ } else {
+ /* parse the stick table id. */
+ if (*cmd_name != '(') {
+ memprintf(err, "invalid stick table track ID. Expects %s(<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+ cmd_name++; /* jump the '(' */
+ rule->arg.gpc.sc = strtol(cmd_name, &error, 10); /* Convert stick table id. */
+ if (*error != ')') {
+ memprintf(err, "invalid stick table track ID. Expects %s(<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (rule->arg.gpc.sc >= MAX_SESS_STKCTR) {
+ memprintf(err, "invalid stick table track ID. The max allowed ID is %d",
+ MAX_SESS_STKCTR-1);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+ if (c == '1')
+ rule->action_ptr = action_inc_gpc1;
+ else
+ rule->action_ptr = action_inc_gpc0;
+ }
+ else {
+ /* default stick table id. */
+ memprintf(err, "invalid gpc ID '%s'. Expects sc-inc-gpc(<GPC ID>,<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+ rule->action = ACT_CUSTOM;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function sets the gpt at index 'rule->arg.gpt.idx' of the array on the
+ * tracksc counter of index 'rule->arg.gpt.sc' stored into the <stream> or
+ * directly in the session <sess> if <stream> is set to NULL. This gpt is
+ * set to the value computed by the expression 'rule->arg.gpt.expr' or if
+ * 'rule->arg.gpt.expr' is null directly to the value of 'rule->arg.gpt.value'.
+ *
+ * This function always returns ACT_RET_CONT and parameter flags is unused.
+ */
+static enum act_return action_set_gpt(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ void *ptr;
+ struct stksess *ts;
+ struct stkctr *stkctr;
+ unsigned int value = 0;
+ struct sample *smp;
+ int smp_opt_dir;
+
+ /* Extract the stksess, return OK if no stksess available. */
+ if (s)
+ stkctr = &s->stkctr[rule->arg.gpt.sc];
+ else
+ stkctr = &sess->stkctr[rule->arg.gpt.sc];
+
+ ts = stkctr_entry(stkctr);
+ if (!ts)
+ return ACT_RET_CONT;
+
+ /* Store the sample in the required sc, and ignore errors. */
+ ptr = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPT, rule->arg.gpt.idx);
+ if (ptr) {
+
+ if (!rule->arg.gpt.expr)
+ value = (unsigned int)(rule->arg.gpt.value);
+ else {
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_SES: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_REQ_CNT: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_RES_CNT: smp_opt_dir = SMP_OPT_DIR_RES; break;
+ case ACT_F_HTTP_REQ: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_HTTP_RES: smp_opt_dir = SMP_OPT_DIR_RES; break;
+ default:
+ send_log(px, LOG_ERR, "stick table: internal error while setting gpt%u.", rule->arg.gpt.idx);
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))
+ ha_alert("stick table: internal error while executing setting gpt%u.\n", rule->arg.gpt.idx);
+ return ACT_RET_CONT;
+ }
+
+ /* Fetch and cast the expression. */
+ smp = sample_fetch_as_type(px, sess, s, smp_opt_dir|SMP_OPT_FINAL, rule->arg.gpt.expr, SMP_T_SINT);
+ if (!smp) {
+ send_log(px, LOG_WARNING, "stick table: invalid expression or data type while setting gpt%u.", rule->arg.gpt.idx);
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))
+ ha_alert("stick table: invalid expression or data type while setting gpt%u.\n", rule->arg.gpt.idx);
+ return ACT_RET_CONT;
+ }
+ value = (unsigned int)(smp->data.u.sint);
+ }
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ stktable_data_cast(ptr, std_t_uint) = value;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ stktable_touch_local(stkctr->table, ts, 0);
+ }
+
+ return ACT_RET_CONT;
+}
+
+/* Always returns 1. */
+static enum act_return action_set_gpt0(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ void *ptr;
+ struct stksess *ts;
+ struct stkctr *stkctr;
+ unsigned int value = 0;
+ struct sample *smp;
+ int smp_opt_dir;
+
+ /* Extract the stksess, return OK if no stksess available. */
+ if (s)
+ stkctr = &s->stkctr[rule->arg.gpt.sc];
+ else
+ stkctr = &sess->stkctr[rule->arg.gpt.sc];
+
+ ts = stkctr_entry(stkctr);
+ if (!ts)
+ return ACT_RET_CONT;
+
+ /* Store the sample in the required sc, and ignore errors. */
+ ptr = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_GPT0);
+ if (!ptr)
+ ptr = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPT, 0);
+
+ if (ptr) {
+ if (!rule->arg.gpt.expr)
+ value = (unsigned int)(rule->arg.gpt.value);
+ else {
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_SES: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_REQ_CNT: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_RES_CNT: smp_opt_dir = SMP_OPT_DIR_RES; break;
+ case ACT_F_HTTP_REQ: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_HTTP_RES: smp_opt_dir = SMP_OPT_DIR_RES; break;
+ default:
+ send_log(px, LOG_ERR, "stick table: internal error while setting gpt0.");
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))
+ ha_alert("stick table: internal error while executing setting gpt0.\n");
+ return ACT_RET_CONT;
+ }
+
+ /* Fetch and cast the expression. */
+ smp = sample_fetch_as_type(px, sess, s, smp_opt_dir|SMP_OPT_FINAL, rule->arg.gpt.expr, SMP_T_SINT);
+ if (!smp) {
+ send_log(px, LOG_WARNING, "stick table: invalid expression or data type while setting gpt0.");
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))
+ ha_alert("stick table: invalid expression or data type while setting gpt0.\n");
+ return ACT_RET_CONT;
+ }
+ value = (unsigned int)(smp->data.u.sint);
+ }
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ stktable_data_cast(ptr, std_t_uint) = value;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ stktable_touch_local(stkctr->table, ts, 0);
+ }
+
+ return ACT_RET_CONT;
+}
+
+/* This function is a parser for the "sc-set-gpt" and "sc-set-gpt0" actions.
+ * It understands the formats:
+ *
+ * sc-set-gpt(<gpt IDX>,<track ID>) <expression>
+ * sc-set-gpt0(<track ID>) <expression>
+ *
+ * It returns ACT_RET_PRS_ERR if fails and <err> is filled with an error message.
+ * Otherwise, it returns ACT_RET_PRS_OK and the variable 'rule->arg.gpt.expr'
+ * is filled with the pointer to the expression to execute or NULL if the arg
+ * is directly an integer stored into 'rule->arg.gpt.value'.
+ */
+static enum act_parse_ret parse_set_gpt(const char **args, int *arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ const char *cmd_name = args[*arg-1];
+ char *error;
+ int smp_val;
+
+ cmd_name += strlen("sc-set-gpt");
+ if (*cmd_name == '(') {
+ cmd_name++; /* skip the '(' */
+ rule->arg.gpt.idx = strtoul(cmd_name, &error, 10); /* Convert stick table id. */
+ if (*error != ',') {
+ memprintf(err, "Missing gpt ID '%s'. Expects sc-set-gpt(<GPT ID>,<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+ else {
+ cmd_name = error + 1; /* skip the ',' */
+ rule->arg.gpt.sc = strtol(cmd_name, &error, 10); /* Convert stick table id. */
+ if (*error != ')') {
+ memprintf(err, "invalid stick table track ID '%s'. Expects sc-set-gpt(<GPT ID>,<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (rule->arg.gpt.sc >= MAX_SESS_STKCTR) {
+ memprintf(err, "invalid stick table track ID '%s'. The max allowed ID is %d",
+ args[*arg-1], MAX_SESS_STKCTR-1);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+ rule->action_ptr = action_set_gpt;
+ }
+ else if (*cmd_name == '0') {
+ cmd_name++;
+ if (*cmd_name == '\0') {
+ /* default stick table id. */
+ rule->arg.gpt.sc = 0;
+ } else {
+ /* parse the stick table id. */
+ if (*cmd_name != '(') {
+ memprintf(err, "invalid stick table track ID '%s'. Expects sc-set-gpt0(<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+ cmd_name++; /* jump the '(' */
+ rule->arg.gpt.sc = strtol(cmd_name, &error, 10); /* Convert stick table id. */
+ if (*error != ')') {
+ memprintf(err, "invalid stick table track ID '%s'. Expects sc-set-gpt0(<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (rule->arg.gpt.sc >= MAX_SESS_STKCTR) {
+ memprintf(err, "invalid stick table track ID '%s'. The max allowed ID is %d",
+ args[*arg-1], MAX_SESS_STKCTR-1);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+ rule->action_ptr = action_set_gpt0;
+ }
+ else {
+ /* default stick table id. */
+ memprintf(err, "invalid gpt ID '%s'. Expects sc-set-gpt(<GPT ID>,<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* value may be either an integer or an expression */
+ rule->arg.gpt.expr = NULL;
+ rule->arg.gpt.value = strtol(args[*arg], &error, 10);
+ if (*error == '\0') {
+ /* valid integer, skip it */
+ (*arg)++;
+ } else {
+ rule->arg.gpt.expr = sample_parse_expr((char **)args, arg, px->conf.args.file,
+ px->conf.args.line, err, &px->conf.args, NULL);
+ if (!rule->arg.gpt.expr)
+ return ACT_RET_PRS_ERR;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_SES: smp_val = SMP_VAL_FE_SES_ACC; break;
+ case ACT_F_TCP_REQ_CNT: smp_val = SMP_VAL_FE_REQ_CNT; break;
+ case ACT_F_TCP_RES_CNT: smp_val = SMP_VAL_BE_RES_CNT; break;
+ case ACT_F_HTTP_REQ: smp_val = SMP_VAL_FE_HRQ_HDR; break;
+ case ACT_F_HTTP_RES: smp_val = SMP_VAL_BE_HRS_HDR; break;
+ default:
+ memprintf(err, "internal error, unexpected rule->from=%d, please report this bug!", rule->from);
+ return ACT_RET_PRS_ERR;
+ }
+ if (!(rule->arg.gpt.expr->fetch->val & smp_val)) {
+ memprintf(err, "fetch method '%s' extracts information from '%s', none of which is available here", args[*arg-1],
+ sample_src_names(rule->arg.gpt.expr->fetch->use));
+ free(rule->arg.gpt.expr);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+
+ rule->action = ACT_CUSTOM;
+
+ return ACT_RET_PRS_OK;
+}
+
+/* set temp integer to the number of used entries in the table pointed to by expr.
+ * Accepts exactly 1 argument of type table.
+ */
+static int
+smp_fetch_table_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = args->data.t->current;
+ return 1;
+}
+
+/* set temp integer to the number of free entries in the table pointed to by expr.
+ * Accepts exactly 1 argument of type table.
+ */
+static int
+smp_fetch_table_avl(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stktable *t;
+
+ t = args->data.t;
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = t->size - t->current;
+ return 1;
+}
+
+/* Returns a pointer to a stkctr depending on the fetch keyword name.
+ * It is designed to be called as sc[0-9]_* sc_* or src_* exclusively.
+ * sc[0-9]_* will return a pointer to the respective field in the
+ * stream <l4>. sc_* requires an UINT argument specifying the stick
+ * counter number. src_* will fill a locally allocated structure with
+ * the table and entry corresponding to what is specified with src_*.
+ * NULL may be returned if the designated stkctr is not tracked. For
+ * the sc_* and sc[0-9]_* forms, an optional table argument may be
+ * passed. When present, the currently tracked key is then looked up
+ * in the specified table instead of the current table. The purpose is
+ * to be able to convert multiple values per key (eg: have gpc0 from
+ * multiple tables). <strm> is allowed to be NULL, in which case only
+ * the session will be consulted.
+ */
+struct stkctr *
+smp_fetch_sc_stkctr(struct session *sess, struct stream *strm, const struct arg *args, const char *kw, struct stkctr *stkctr)
+{
+ struct stkctr *stkptr;
+ struct stksess *stksess;
+ unsigned int num = kw[2] - '0';
+ int arg = 0;
+
+ if (num == '_' - '0') {
+ /* sc_* variant, args[0] = ctr# (mandatory) */
+ num = args[arg++].data.sint;
+ }
+ else if (num > 9) { /* src_* variant, args[0] = table */
+ struct stktable_key *key;
+ struct connection *conn = objt_conn(sess->origin);
+ struct sample smp;
+
+ if (!conn)
+ return NULL;
+
+ /* Fetch source address in a sample. */
+ smp.px = NULL;
+ smp.sess = sess;
+ smp.strm = strm;
+ if (!smp_fetch_src || !smp_fetch_src(empty_arg_list, &smp, "src", NULL))
+ return NULL;
+
+ /* Converts into key. */
+ key = smp_to_stkey(&smp, args->data.t);
+ if (!key)
+ return NULL;
+
+ stkctr->table = args->data.t;
+ stkctr_set_entry(stkctr, stktable_lookup_key(stkctr->table, key));
+ return stkctr;
+ }
+
+ /* Here, <num> contains the counter number from 0 to 9 for
+ * the sc[0-9]_ form, or even higher using sc_(num) if needed.
+ * args[arg] is the first optional argument. We first lookup the
+ * ctr form the stream, then from the session if it was not there.
+ * But we must be sure the counter does not exceed MAX_SESS_STKCTR.
+ */
+ if (num >= MAX_SESS_STKCTR)
+ return NULL;
+
+ if (strm)
+ stkptr = &strm->stkctr[num];
+ if (!strm || !stkctr_entry(stkptr)) {
+ stkptr = &sess->stkctr[num];
+ if (!stkctr_entry(stkptr))
+ return NULL;
+ }
+
+ stksess = stkctr_entry(stkptr);
+ if (!stksess)
+ return NULL;
+
+ if (unlikely(args[arg].type == ARGT_TAB)) {
+ /* an alternate table was specified, let's look up the same key there */
+ stkctr->table = args[arg].data.t;
+ stkctr_set_entry(stkctr, stktable_lookup(stkctr->table, stksess));
+ return stkctr;
+ }
+ return stkptr;
+}
+
+/* same as smp_fetch_sc_stkctr() but dedicated to src_* and can create
+ * the entry if it doesn't exist yet. This is needed for a few fetch
+ * functions which need to create an entry, such as src_inc_gpc* and
+ * src_clr_gpc*.
+ */
+struct stkctr *
+smp_create_src_stkctr(struct session *sess, struct stream *strm, const struct arg *args, const char *kw, struct stkctr *stkctr)
+{
+ struct stktable_key *key;
+ struct connection *conn = objt_conn(sess->origin);
+ struct sample smp;
+
+ if (strncmp(kw, "src_", 4) != 0)
+ return NULL;
+
+ if (!conn)
+ return NULL;
+
+ /* Fetch source address in a sample. */
+ smp.px = NULL;
+ smp.sess = sess;
+ smp.strm = strm;
+ if (!smp_fetch_src || !smp_fetch_src(empty_arg_list, &smp, "src", NULL))
+ return NULL;
+
+ /* Converts into key. */
+ key = smp_to_stkey(&smp, args->data.t);
+ if (!key)
+ return NULL;
+
+ stkctr->table = args->data.t;
+ stkctr_set_entry(stkctr, stktable_get_entry(stkctr->table, key));
+ return stkctr;
+}
+
+/* set return a boolean indicating if the requested stream counter is
+ * currently being tracked or not.
+ * Supports being called as "sc[0-9]_tracked" only.
+ */
+static int
+smp_fetch_sc_tracked(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_BOOL;
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ smp->data.u.sint = !!stkctr;
+
+ /* release the ref count */
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+
+ return 1;
+}
+
+/* set <smp> to the General Purpose Tag of index set as first arg
+ * to value from the stream's tracked frontend counters or from the src.
+ * Supports being called as "sc_get_gpt(<gpt-idx>,<sc-idx>[,<table>])" or
+ * "src_get_gpt(<gpt-idx>[,<table>])" only. Value zero is returned if
+ * the key is new or gpt is not stored.
+ */
+static int
+smp_fetch_sc_get_gpt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int idx;
+
+ idx = args[0].data.sint;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args + 1, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (stkctr_entry(stkctr)) {
+ void *ptr;
+
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPT, idx);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the General Purpose Flag 0 value from the stream's tracked
+ * frontend counters or from the src.
+ * Supports being called as "sc[0-9]_get_gpc0" or "src_get_gpt0" only. Value
+ * zero is returned if the key is new.
+ */
+static int
+smp_fetch_sc_get_gpt0(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (stkctr_entry(stkctr)) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPT0);
+ if (!ptr)
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPT, 0);
+
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the GPC[args(0)]'s value from the stream's tracked
+ * frontend counters or from the src.
+ * Supports being called as "sc_get_gpc(<gpc-idx>,<sc-idx>[,<table>])" or
+ * "src_get_gpc(<gpc-idx>[,<table>])" only. Value
+ * Value zero is returned if the key is new or gpc is not stored.
+ */
+static int
+smp_fetch_sc_get_gpc(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int idx;
+
+ idx = args[0].data.sint;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args + 1, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, idx);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the General Purpose Counter 0 value from the stream's tracked
+ * frontend counters or from the src.
+ * Supports being called as "sc[0-9]_get_gpc0" or "src_get_gpc0" only. Value
+ * zero is returned if the key is new.
+ */
+static int
+smp_fetch_sc_get_gpc0(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC0);
+ if (!ptr) {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, 0);
+ }
+
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the General Purpose Counter 1 value from the stream's tracked
+ * frontend counters or from the src.
+ * Supports being called as "sc[0-9]_get_gpc1" or "src_get_gpc1" only. Value
+ * zero is returned if the key is new.
+ */
+static int
+smp_fetch_sc_get_gpc1(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC1);
+ if (!ptr) {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, 1);
+ }
+
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the GPC[args(0)]'s event rate from the stream's
+ * tracked frontend counters or from the src.
+ * Supports being called as "sc_gpc_rate(<gpc-idx>,<sc-idx>[,<table])"
+ * or "src_gpc_rate(<gpc-idx>[,<table>])" only.
+ * Value zero is returned if the key is new or gpc_rate is not stored.
+ */
+static int
+smp_fetch_sc_gpc_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int idx;
+
+ idx = args[0].data.sint;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args + 1, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC_RATE, idx);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the General Purpose Counter 0's event rate from the stream's
+ * tracked frontend counters or from the src.
+ * Supports being called as "sc[0-9]_gpc0_rate" or "src_gpc0_rate" only.
+ * Value zero is returned if the key is new.
+ */
+static int
+smp_fetch_sc_gpc0_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int period;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC0_RATE);
+ if (ptr) {
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC0_RATE].u;
+ }
+ else {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC_RATE, 0);
+ if (ptr)
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u;
+ }
+
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp), period);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the General Purpose Counter 1's event rate from the stream's
+ * tracked frontend counters or from the src.
+ * Supports being called as "sc[0-9]_gpc1_rate" or "src_gpc1_rate" only.
+ * Value zero is returned if the key is new.
+ */
+static int
+smp_fetch_sc_gpc1_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int period;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC1_RATE);
+ if (ptr) {
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC1_RATE].u;
+ }
+ else {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC_RATE, 1);
+ if (ptr)
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u;
+ }
+
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp), period);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* Increment the GPC[args(0)] value from the stream's tracked
+ * frontend counters and return it into temp integer.
+ * Supports being called as "sc_inc_gpc(<gpc-idx>,<sc-idx>[,<table>])"
+ * or "src_inc_gpc(<gpc-idx>[,<table>])" only.
+ */
+static int
+smp_fetch_sc_inc_gpc(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int idx;
+
+ idx = args[0].data.sint;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args + 1, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!stkctr_entry(stkctr))
+ stkctr = smp_create_src_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+
+ if (stkctr && stkctr_entry(stkctr)) {
+ void *ptr1,*ptr2;
+
+
+ /* First, update gpc0_rate if it's tracked. Second, update its
+ * gpc0 if tracked. Returns gpc0's value otherwise the curr_ctr.
+ */
+ ptr1 = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC_RATE, idx);
+ ptr2 = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, idx);
+ if (ptr1 || ptr2) {
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (ptr1) {
+ update_freq_ctr_period(&stktable_data_cast(ptr1, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u, 1);
+ smp->data.u.sint = (&stktable_data_cast(ptr1, std_t_frqp))->curr_ctr;
+ }
+
+ if (ptr2)
+ smp->data.u.sint = ++stktable_data_cast(ptr2, std_t_uint);
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, stkctr_entry(stkctr), (stkctr == &tmpstkctr) ? 1 : 0);
+ }
+ else if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* Increment the General Purpose Counter 0 value from the stream's tracked
+ * frontend counters and return it into temp integer.
+ * Supports being called as "sc[0-9]_inc_gpc0" or "src_inc_gpc0" only.
+ */
+static int
+smp_fetch_sc_inc_gpc0(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int period = 0;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!stkctr_entry(stkctr))
+ stkctr = smp_create_src_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+
+ if (stkctr && stkctr_entry(stkctr)) {
+ void *ptr1,*ptr2;
+
+
+ /* First, update gpc0_rate if it's tracked. Second, update its
+ * gpc0 if tracked. Returns gpc0's value otherwise the curr_ctr.
+ */
+ ptr1 = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC0_RATE);
+ if (ptr1) {
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC0_RATE].u;
+ }
+ else {
+ /* fallback on the gpc array */
+ ptr1 = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC_RATE, 0);
+ if (ptr1)
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u;
+ }
+
+ ptr2 = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC0);
+ if (!ptr2) {
+ /* fallback on the gpc array */
+ ptr2 = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, 0);
+ }
+
+ if (ptr1 || ptr2) {
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (ptr1) {
+ update_freq_ctr_period(&stktable_data_cast(ptr1, std_t_frqp),
+ period, 1);
+ smp->data.u.sint = (&stktable_data_cast(ptr1, std_t_frqp))->curr_ctr;
+ }
+
+ if (ptr2)
+ smp->data.u.sint = ++stktable_data_cast(ptr2, std_t_uint);
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, stkctr_entry(stkctr), (stkctr == &tmpstkctr) ? 1 : 0);
+ }
+ else if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* Increment the General Purpose Counter 1 value from the stream's tracked
+ * frontend counters and return it into temp integer.
+ * Supports being called as "sc[0-9]_inc_gpc1" or "src_inc_gpc1" only.
+ */
+static int
+smp_fetch_sc_inc_gpc1(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int period = 0;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!stkctr_entry(stkctr))
+ stkctr = smp_create_src_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+
+ if (stkctr && stkctr_entry(stkctr)) {
+ void *ptr1,*ptr2;
+
+
+ /* First, update gpc1_rate if it's tracked. Second, update its
+ * gpc1 if tracked. Returns gpc1's value otherwise the curr_ctr.
+ */
+ ptr1 = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC1_RATE);
+ if (ptr1) {
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC1_RATE].u;
+ }
+ else {
+ /* fallback on the gpc array */
+ ptr1 = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC_RATE, 1);
+ if (ptr1)
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u;
+ }
+
+ ptr2 = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC1);
+ if (!ptr2) {
+ /* fallback on the gpc array */
+ ptr2 = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, 1);
+ }
+
+ if (ptr1 || ptr2) {
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (ptr1) {
+ update_freq_ctr_period(&stktable_data_cast(ptr1, std_t_frqp),
+ period, 1);
+ smp->data.u.sint = (&stktable_data_cast(ptr1, std_t_frqp))->curr_ctr;
+ }
+
+ if (ptr2)
+ smp->data.u.sint = ++stktable_data_cast(ptr2, std_t_uint);
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, stkctr_entry(stkctr), (stkctr == &tmpstkctr) ? 1 : 0);
+ }
+ else if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* Clear the GPC[args(0)] value from the stream's tracked
+ * frontend counters and return its previous value into temp integer.
+ * Supports being called as "sc_clr_gpc(<gpc-idx>,<sc-idx>[,<table>])"
+ * or "src_clr_gpc(<gpc-idx>[,<table>])" only.
+ */
+static int
+smp_fetch_sc_clr_gpc(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int idx;
+
+ idx = args[0].data.sint;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args + 1, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!stkctr_entry(stkctr))
+ stkctr = smp_create_src_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+
+ if (stkctr && stkctr_entry(stkctr)) {
+ void *ptr;
+
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, idx);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+ stktable_data_cast(ptr, std_t_uint) = 0;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, stkctr_entry(stkctr), (stkctr == &tmpstkctr) ? 1 : 0);
+ }
+ return 1;
+}
+
+/* Clear the General Purpose Counter 0 value from the stream's tracked
+ * frontend counters and return its previous value into temp integer.
+ * Supports being called as "sc[0-9]_clr_gpc0" or "src_clr_gpc0" only.
+ */
+static int
+smp_fetch_sc_clr_gpc0(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!stkctr_entry(stkctr))
+ stkctr = smp_create_src_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+
+ if (stkctr && stkctr_entry(stkctr)) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC0);
+ if (!ptr) {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, 0);
+ }
+
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+ stktable_data_cast(ptr, std_t_uint) = 0;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, stkctr_entry(stkctr), (stkctr == &tmpstkctr) ? 1 : 0);
+ }
+ return 1;
+}
+
+/* Clear the General Purpose Counter 1 value from the stream's tracked
+ * frontend counters and return its previous value into temp integer.
+ * Supports being called as "sc[0-9]_clr_gpc1" or "src_clr_gpc1" only.
+ */
+static int
+smp_fetch_sc_clr_gpc1(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!stkctr_entry(stkctr))
+ stkctr = smp_create_src_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+
+ if (stkctr && stkctr_entry(stkctr)) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC1);
+ if (!ptr) {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, 1);
+ }
+
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+ stktable_data_cast(ptr, std_t_uint) = 0;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, stkctr_entry(stkctr), (stkctr == &tmpstkctr) ? 1 : 0);
+ }
+ return 1;
+}
+
+/* set <smp> to the cumulated number of connections from the stream's tracked
+ * frontend counters. Supports being called as "sc[0-9]_conn_cnt" or
+ * "src_conn_cnt" only.
+ */
+static int
+smp_fetch_sc_conn_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_CONN_CNT);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+
+
+ }
+ return 1;
+}
+
+/* set <smp> to the connection rate from the stream's tracked frontend
+ * counters. Supports being called as "sc[0-9]_conn_rate" or "src_conn_rate"
+ * only.
+ */
+static int
+smp_fetch_sc_conn_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_CONN_RATE);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_CONN_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set temp integer to the number of connections from the stream's source address
+ * in the table pointed to by expr, after updating it.
+ * Accepts exactly 1 argument of type table.
+ */
+static int
+smp_fetch_src_updt_conn_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn = objt_conn(smp->sess->origin);
+ struct stksess *ts;
+ struct stktable_key *key;
+ void *ptr;
+ struct stktable *t;
+
+ if (!conn)
+ return 0;
+
+ /* Fetch source address in a sample. */
+ if (!smp_fetch_src || !smp_fetch_src(empty_arg_list, smp, "src", NULL))
+ return 0;
+
+ /* Converts into key. */
+ key = smp_to_stkey(smp, args->data.t);
+ if (!key)
+ return 0;
+
+ t = args->data.t;
+
+ if ((ts = stktable_get_entry(t, key)) == NULL)
+ /* entry does not exist and could not be created */
+ return 0;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_CONN_CNT);
+ if (!ptr) {
+ return 0; /* parameter not stored in this table */
+ }
+
+ smp->data.type = SMP_T_SINT;
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ smp->data.u.sint = ++stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ smp->flags = SMP_F_VOL_TEST;
+
+ stktable_touch_local(t, ts, 1);
+
+ /* Touch was previously performed by stktable_update_key */
+ return 1;
+}
+
+/* set <smp> to the number of concurrent connections from the stream's tracked
+ * frontend counters. Supports being called as "sc[0-9]_conn_cur" or
+ * "src_conn_cur" only.
+ */
+static int
+smp_fetch_sc_conn_cur(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_CONN_CUR);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the cumulated number of streams from the stream's tracked
+ * frontend counters. Supports being called as "sc[0-9]_sess_cnt" or
+ * "src_sess_cnt" only.
+ */
+static int
+smp_fetch_sc_sess_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_SESS_CNT);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the stream rate from the stream's tracked frontend counters.
+ * Supports being called as "sc[0-9]_sess_rate" or "src_sess_rate" only.
+ */
+static int
+smp_fetch_sc_sess_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_SESS_RATE);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_SESS_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the cumulated number of HTTP requests from the stream's tracked
+ * frontend counters. Supports being called as "sc[0-9]_http_req_cnt" or
+ * "src_http_req_cnt" only.
+ */
+static int
+smp_fetch_sc_http_req_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_HTTP_REQ_CNT);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the HTTP request rate from the stream's tracked frontend
+ * counters. Supports being called as "sc[0-9]_http_req_rate" or
+ * "src_http_req_rate" only.
+ */
+static int
+smp_fetch_sc_http_req_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_HTTP_REQ_RATE);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_HTTP_REQ_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the cumulated number of HTTP requests errors from the stream's
+ * tracked frontend counters. Supports being called as "sc[0-9]_http_err_cnt" or
+ * "src_http_err_cnt" only.
+ */
+static int
+smp_fetch_sc_http_err_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_HTTP_ERR_CNT);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the HTTP request error rate from the stream's tracked frontend
+ * counters. Supports being called as "sc[0-9]_http_err_rate" or
+ * "src_http_err_rate" only.
+ */
+static int
+smp_fetch_sc_http_err_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_HTTP_ERR_RATE);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_HTTP_ERR_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the cumulated number of HTTP response failures from the stream's
+ * tracked frontend counters. Supports being called as "sc[0-9]_http_fail_cnt" or
+ * "src_http_fail_cnt" only.
+ */
+static int
+smp_fetch_sc_http_fail_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_HTTP_FAIL_CNT);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the HTTP response failure rate from the stream's tracked frontend
+ * counters. Supports being called as "sc[0-9]_http_fail_rate" or
+ * "src_http_fail_rate" only.
+ */
+static int
+smp_fetch_sc_http_fail_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_HTTP_FAIL_RATE);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_HTTP_FAIL_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the number of kbytes received from clients, as found in the
+ * stream's tracked frontend counters. Supports being called as
+ * "sc[0-9]_kbytes_in" or "src_kbytes_in" only.
+ */
+static int
+smp_fetch_sc_kbytes_in(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_BYTES_IN_CNT);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_ull) >> 10;
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the data rate received from clients in bytes/s, as found
+ * in the stream's tracked frontend counters. Supports being called as
+ * "sc[0-9]_bytes_in_rate" or "src_bytes_in_rate" only.
+ */
+static int
+smp_fetch_sc_bytes_in_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_BYTES_IN_RATE);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_BYTES_IN_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the number of kbytes sent to clients, as found in the
+ * stream's tracked frontend counters. Supports being called as
+ * "sc[0-9]_kbytes_out" or "src_kbytes_out" only.
+ */
+static int
+smp_fetch_sc_kbytes_out(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_BYTES_OUT_CNT);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_ull) >> 10;
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the data rate sent to clients in bytes/s, as found in the
+ * stream's tracked frontend counters. Supports being called as
+ * "sc[0-9]_bytes_out_rate" or "src_bytes_out_rate" only.
+ */
+static int
+smp_fetch_sc_bytes_out_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_BYTES_OUT_RATE);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_BYTES_OUT_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the number of active trackers on the SC entry in the stream's
+ * tracked frontend counters. Supports being called as "sc[0-9]_trackers" only.
+ */
+static int
+smp_fetch_sc_trackers(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ if (stkctr == &tmpstkctr) {
+ smp->data.u.sint = stkctr_entry(stkctr) ? (stkctr_entry(stkctr)->ref_cnt-1) : 0;
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ else {
+ smp->data.u.sint = stkctr_entry(stkctr) ? stkctr_entry(stkctr)->ref_cnt : 0;
+ }
+
+ return 1;
+}
+
+
+/* The functions below are used to manipulate table contents from the CLI.
+ * There are 3 main actions, "clear", "set" and "show". The code is shared
+ * between all actions, and the action is encoded in the void *private in
+ * the appctx as well as in the keyword registration, among one of the
+ * following values.
+ */
+
+enum {
+ STK_CLI_ACT_CLR,
+ STK_CLI_ACT_SET,
+ STK_CLI_ACT_SHOW,
+};
+
+/* Dump the status of a table to a stream connector's
+ * read buffer. It returns 0 if the output buffer is full
+ * and needs to be called again, otherwise non-zero.
+ */
+static int table_dump_head_to_buffer(struct buffer *msg,
+ struct appctx *appctx,
+ struct stktable *t, struct stktable *target)
+{
+ struct stream *s = __sc_strm(appctx_sc(appctx));
+
+ chunk_appendf(msg, "# table: %s, type: %s, size:%d, used:%d\n",
+ t->id, stktable_types[t->type].kw, t->size, t->current);
+
+ /* any other information should be dumped here */
+
+ if (target && (strm_li(s)->bind_conf->level & ACCESS_LVL_MASK) < ACCESS_LVL_OPER)
+ chunk_appendf(msg, "# contents not dumped due to insufficient privileges\n");
+
+ if (applet_putchk(appctx, msg) == -1)
+ return 0;
+
+ return 1;
+}
+
+/* Dump a table entry to a stream connector's
+ * read buffer. It returns 0 if the output buffer is full
+ * and needs to be called again, otherwise non-zero.
+ */
+static int table_dump_entry_to_buffer(struct buffer *msg,
+ struct appctx *appctx,
+ struct stktable *t, struct stksess *entry)
+{
+ int dt;
+
+ chunk_appendf(msg, "%p:", entry);
+
+ if (t->type == SMP_T_IPV4) {
+ char addr[INET_ADDRSTRLEN];
+ inet_ntop(AF_INET, (const void *)&entry->key.key, addr, sizeof(addr));
+ chunk_appendf(msg, " key=%s", addr);
+ }
+ else if (t->type == SMP_T_IPV6) {
+ char addr[INET6_ADDRSTRLEN];
+ inet_ntop(AF_INET6, (const void *)&entry->key.key, addr, sizeof(addr));
+ chunk_appendf(msg, " key=%s", addr);
+ }
+ else if (t->type == SMP_T_SINT) {
+ chunk_appendf(msg, " key=%u", read_u32(entry->key.key));
+ }
+ else if (t->type == SMP_T_STR) {
+ chunk_appendf(msg, " key=");
+ dump_text(msg, (const char *)entry->key.key, t->key_size);
+ }
+ else {
+ chunk_appendf(msg, " key=");
+ dump_binary(msg, (const char *)entry->key.key, t->key_size);
+ }
+
+ chunk_appendf(msg, " use=%d exp=%d", entry->ref_cnt - 1, tick_remain(now_ms, entry->expire));
+
+ for (dt = 0; dt < STKTABLE_DATA_TYPES; dt++) {
+ void *ptr;
+
+ if (t->data_ofs[dt] == 0)
+ continue;
+ if (stktable_data_types[dt].is_array) {
+ char tmp[16] = {};
+ const char *name_pfx = stktable_data_types[dt].name;
+ const char *name_sfx = NULL;
+ unsigned int idx = 0;
+ int i = 0;
+
+ /* split name to show index before first _ of the name
+ * for example: 'gpc3_rate' if array name is 'gpc_rate'.
+ */
+ for (i = 0 ; i < (sizeof(tmp) - 1); i++) {
+ if (!name_pfx[i])
+ break;
+ if (name_pfx[i] == '_') {
+ name_pfx = &tmp[0];
+ name_sfx = &stktable_data_types[dt].name[i];
+ break;
+ }
+ tmp[i] = name_pfx[i];
+ }
+
+ ptr = stktable_data_ptr_idx(t, entry, dt, idx);
+ while (ptr) {
+ if (stktable_data_types[dt].arg_type == ARG_T_DELAY)
+ chunk_appendf(msg, " %s%u%s(%u)=", name_pfx, idx, name_sfx ? name_sfx : "", t->data_arg[dt].u);
+ else
+ chunk_appendf(msg, " %s%u%s=", name_pfx, idx, name_sfx ? name_sfx : "");
+ switch (stktable_data_types[dt].std_type) {
+ case STD_T_SINT:
+ chunk_appendf(msg, "%d", stktable_data_cast(ptr, std_t_sint));
+ break;
+ case STD_T_UINT:
+ chunk_appendf(msg, "%u", stktable_data_cast(ptr, std_t_uint));
+ break;
+ case STD_T_ULL:
+ chunk_appendf(msg, "%llu", stktable_data_cast(ptr, std_t_ull));
+ break;
+ case STD_T_FRQP:
+ chunk_appendf(msg, "%u",
+ read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[dt].u));
+ break;
+ }
+ ptr = stktable_data_ptr_idx(t, entry, dt, ++idx);
+ }
+ continue;
+ }
+ if (stktable_data_types[dt].arg_type == ARG_T_DELAY)
+ chunk_appendf(msg, " %s(%u)=", stktable_data_types[dt].name, t->data_arg[dt].u);
+ else
+ chunk_appendf(msg, " %s=", stktable_data_types[dt].name);
+
+ ptr = stktable_data_ptr(t, entry, dt);
+ switch (stktable_data_types[dt].std_type) {
+ case STD_T_SINT:
+ chunk_appendf(msg, "%d", stktable_data_cast(ptr, std_t_sint));
+ break;
+ case STD_T_UINT:
+ chunk_appendf(msg, "%u", stktable_data_cast(ptr, std_t_uint));
+ break;
+ case STD_T_ULL:
+ chunk_appendf(msg, "%llu", stktable_data_cast(ptr, std_t_ull));
+ break;
+ case STD_T_FRQP:
+ chunk_appendf(msg, "%u",
+ read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[dt].u));
+ break;
+ case STD_T_DICT: {
+ struct dict_entry *de;
+ de = stktable_data_cast(ptr, std_t_dict);
+ chunk_appendf(msg, "%s", de ? (char *)de->value.key : "-");
+ break;
+ }
+ }
+ }
+ chunk_appendf(msg, "\n");
+
+ if (applet_putchk(appctx, msg) == -1)
+ return 0;
+
+ return 1;
+}
+
+/* appctx context used by the "show table" command */
+struct show_table_ctx {
+ void *target; /* table we want to dump, or NULL for all */
+ struct stktable *t; /* table being currently dumped (first if NULL) */
+ struct stksess *entry; /* last entry we were trying to dump (or first if NULL) */
+ long long value[STKTABLE_FILTER_LEN]; /* value to compare against */
+ signed char data_type[STKTABLE_FILTER_LEN]; /* type of data to compare, or -1 if none */
+ signed char data_op[STKTABLE_FILTER_LEN]; /* operator (STD_OP_*) when data_type set */
+ enum {
+ STATE_NEXT = 0, /* px points to next table, entry=NULL */
+ STATE_DUMP, /* px points to curr table, entry is valid, refcount held */
+ STATE_DONE, /* done dumping */
+ } state;
+ char action; /* action on the table : one of STK_CLI_ACT_* */
+};
+
+/* Processes a single table entry matching a specific key passed in argument.
+ * returns 0 if wants to be called again, 1 if has ended processing.
+ */
+static int table_process_entry_per_key(struct appctx *appctx, char **args)
+{
+ struct show_table_ctx *ctx = appctx->svcctx;
+ struct stktable *t = ctx->target;
+ struct stksess *ts;
+ uint32_t uint32_key;
+ unsigned char ip6_key[sizeof(struct in6_addr)];
+ long long value;
+ int data_type;
+ int cur_arg;
+ void *ptr;
+ struct freq_ctr *frqp;
+
+ if (!*args[4])
+ return cli_err(appctx, "Key value expected\n");
+
+ switch (t->type) {
+ case SMP_T_IPV4:
+ uint32_key = htonl(inetaddr_host(args[4]));
+ static_table_key.key = &uint32_key;
+ break;
+ case SMP_T_IPV6:
+ if (inet_pton(AF_INET6, args[4], ip6_key) <= 0)
+ return cli_err(appctx, "Invalid key\n");
+ static_table_key.key = &ip6_key;
+ break;
+ case SMP_T_SINT:
+ {
+ char *endptr;
+ unsigned long val;
+ errno = 0;
+ val = strtoul(args[4], &endptr, 10);
+ if ((errno == ERANGE && val == ULONG_MAX) ||
+ (errno != 0 && val == 0) || endptr == args[4] ||
+ val > 0xffffffff)
+ return cli_err(appctx, "Invalid key\n");
+ uint32_key = (uint32_t) val;
+ static_table_key.key = &uint32_key;
+ break;
+ }
+ break;
+ case SMP_T_STR:
+ static_table_key.key = args[4];
+ static_table_key.key_len = strlen(args[4]);
+ break;
+ default:
+ switch (ctx->action) {
+ case STK_CLI_ACT_SHOW:
+ return cli_err(appctx, "Showing keys from tables of type other than ip, ipv6, string and integer is not supported\n");
+ case STK_CLI_ACT_CLR:
+ return cli_err(appctx, "Removing keys from tables of type other than ip, ipv6, string and integer is not supported\n");
+ case STK_CLI_ACT_SET:
+ return cli_err(appctx, "Inserting keys into tables of type other than ip, ipv6, string and integer is not supported\n");
+ default:
+ return cli_err(appctx, "Unknown action\n");
+ }
+ }
+
+ /* check permissions */
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ switch (ctx->action) {
+ case STK_CLI_ACT_SHOW:
+ ts = stktable_lookup_key(t, &static_table_key);
+ if (!ts)
+ return 1;
+ chunk_reset(&trash);
+ if (!table_dump_head_to_buffer(&trash, appctx, t, t)) {
+ stktable_release(t, ts);
+ return 0;
+ }
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &ts->lock);
+ if (!table_dump_entry_to_buffer(&trash, appctx, t, ts)) {
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_release(t, ts);
+ return 0;
+ }
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_release(t, ts);
+ break;
+
+ case STK_CLI_ACT_CLR:
+ ts = stktable_lookup_key(t, &static_table_key);
+ if (!ts)
+ return 1;
+
+ if (!stksess_kill(t, ts, 1)) {
+ /* don't delete an entry which is currently referenced */
+ return cli_err(appctx, "Entry currently in use, cannot remove\n");
+ }
+ break;
+
+ case STK_CLI_ACT_SET:
+ ts = stktable_get_entry(t, &static_table_key);
+ if (!ts) {
+ /* don't delete an entry which is currently referenced */
+ return cli_err(appctx, "Unable to allocate a new entry\n");
+ }
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+ for (cur_arg = 5; *args[cur_arg]; cur_arg += 2) {
+ if (strncmp(args[cur_arg], "data.", 5) != 0) {
+ cli_err(appctx, "\"data.<type>\" followed by a value expected\n");
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_touch_local(t, ts, 1);
+ return 1;
+ }
+
+ data_type = stktable_get_data_type(args[cur_arg] + 5);
+ if (data_type < 0) {
+ cli_err(appctx, "Unknown data type\n");
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_touch_local(t, ts, 1);
+ return 1;
+ }
+
+ if (!t->data_ofs[data_type]) {
+ cli_err(appctx, "Data type not stored in this table\n");
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_touch_local(t, ts, 1);
+ return 1;
+ }
+
+ if (!*args[cur_arg+1] || strl2llrc(args[cur_arg+1], strlen(args[cur_arg+1]), &value) != 0) {
+ cli_err(appctx, "Require a valid integer value to store\n");
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_touch_local(t, ts, 1);
+ return 1;
+ }
+
+ ptr = stktable_data_ptr(t, ts, data_type);
+
+ switch (stktable_data_types[data_type].std_type) {
+ case STD_T_SINT:
+ stktable_data_cast(ptr, std_t_sint) = value;
+ break;
+ case STD_T_UINT:
+ stktable_data_cast(ptr, std_t_uint) = value;
+ break;
+ case STD_T_ULL:
+ stktable_data_cast(ptr, std_t_ull) = value;
+ break;
+ case STD_T_FRQP:
+ /* We set both the current and previous values. That way
+ * the reported frequency is stable during all the period
+ * then slowly fades out. This allows external tools to
+ * push measures without having to update them too often.
+ */
+ frqp = &stktable_data_cast(ptr, std_t_frqp);
+ /* First bit is reserved for the freq_ctr lock
+ Note: here we're still protected by the stksess lock
+ so we don't need to update the update the freq_ctr
+ using its internal lock */
+ frqp->curr_tick = now_ms & ~0x1;
+ frqp->prev_ctr = 0;
+ frqp->curr_ctr = value;
+ break;
+ }
+ }
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_touch_local(t, ts, 1);
+ break;
+
+ default:
+ return cli_err(appctx, "Unknown action\n");
+ }
+ return 1;
+}
+
+/* Prepares the appctx fields with the data-based filters from the command line.
+ * Returns 0 if the dump can proceed, 1 if has ended processing.
+ */
+static int table_prepare_data_request(struct appctx *appctx, char **args)
+{
+ struct show_table_ctx *ctx = appctx->svcctx;
+ int i;
+ char *err = NULL;
+
+ if (ctx->action != STK_CLI_ACT_SHOW && ctx->action != STK_CLI_ACT_CLR)
+ return cli_err(appctx, "content-based lookup is only supported with the \"show\" and \"clear\" actions\n");
+
+ for (i = 0; i < STKTABLE_FILTER_LEN; i++) {
+ if (i > 0 && !*args[3+3*i]) // number of filter entries can be less than STKTABLE_FILTER_LEN
+ break;
+ /* condition on stored data value */
+ ctx->data_type[i] = stktable_get_data_type(args[3+3*i] + 5);
+ if (ctx->data_type[i] < 0)
+ return cli_dynerr(appctx, memprintf(&err, "Filter entry #%i: Unknown data type\n", i + 1));
+
+ if (!((struct stktable *)ctx->target)->data_ofs[ctx->data_type[i]])
+ return cli_dynerr(appctx, memprintf(&err, "Filter entry #%i: Data type not stored in this table\n", i + 1));
+
+ ctx->data_op[i] = get_std_op(args[4+3*i]);
+ if (ctx->data_op[i] < 0)
+ return cli_dynerr(appctx, memprintf(&err, "Filter entry #%i: Require and operator among \"eq\", \"ne\", \"le\", \"ge\", \"lt\", \"gt\"\n", i + 1));
+
+ if (!*args[5+3*i] || strl2llrc(args[5+3*i], strlen(args[5+3*i]), &ctx->value[i]) != 0)
+ return cli_dynerr(appctx, memprintf(&err, "Filter entry #%i: Require a valid integer value to compare against\n", i + 1));
+ }
+
+ if (*args[3+3*i]) {
+ return cli_dynerr(appctx, memprintf(&err, "Detected extra data in filter, %ith word of input, after '%s'\n", 3+3*i + 1, args[2+3*i]));
+ }
+
+ /* OK we're done, all the fields are set */
+ return 0;
+}
+
+/* returns 0 if wants to be called, 1 if has ended processing */
+static int cli_parse_table_req(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_table_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ int i;
+
+ for (i = 0; i < STKTABLE_FILTER_LEN; i++)
+ ctx->data_type[i] = -1;
+ ctx->target = NULL;
+ ctx->entry = NULL;
+ ctx->action = (long)private; // keyword argument, one of STK_CLI_ACT_*
+
+ if (*args[2]) {
+ ctx->t = ctx->target = stktable_find_by_name(args[2]);
+ if (!ctx->target)
+ return cli_err(appctx, "No such table\n");
+ }
+ else {
+ ctx->t = stktables_list;
+ if (ctx->action != STK_CLI_ACT_SHOW)
+ goto err_args;
+ return 0;
+ }
+
+ if (strcmp(args[3], "key") == 0)
+ return table_process_entry_per_key(appctx, args);
+ else if (strncmp(args[3], "data.", 5) == 0)
+ return table_prepare_data_request(appctx, args);
+ else if (*args[3])
+ goto err_args;
+
+ return 0;
+
+err_args:
+ switch (ctx->action) {
+ case STK_CLI_ACT_SHOW:
+ return cli_err(appctx, "Optional argument only supports \"data.<store_data_type>\" <operator> <value> and key <key>\n");
+ case STK_CLI_ACT_CLR:
+ return cli_err(appctx, "Required arguments: <table> \"data.<store_data_type>\" <operator> <value> or <table> key <key>\n");
+ case STK_CLI_ACT_SET:
+ return cli_err(appctx, "Required arguments: <table> key <key> [data.<store_data_type> <value>]*\n");
+ default:
+ return cli_err(appctx, "Unknown action\n");
+ }
+}
+
+/* This function is used to deal with table operations (dump or clear depending
+ * on the action stored in appctx->private). It returns 0 if the output buffer is
+ * full and it needs to be called again, otherwise non-zero.
+ */
+static int cli_io_handler_table(struct appctx *appctx)
+{
+ struct show_table_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct stream *s = __sc_strm(sc);
+ struct ebmb_node *eb;
+ int skip_entry;
+ int show = ctx->action == STK_CLI_ACT_SHOW;
+
+ /*
+ * We have 3 possible states in ctx->state :
+ * - STATE_NEXT : the proxy pointer points to the next table to
+ * dump, the entry pointer is NULL ;
+ * - STATE_DUMP : the proxy pointer points to the current table
+ * and the entry pointer points to the next entry to be dumped,
+ * and the refcount on the next entry is held ;
+ * - STATE_DONE : nothing left to dump, the buffer may contain some
+ * data though.
+ */
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW))) {
+ /* in case of abort, remove any refcount we might have set on an entry */
+ if (ctx->state == STATE_DUMP) {
+ stksess_kill_if_expired(ctx->t, ctx->entry, 1);
+ }
+ return 1;
+ }
+
+ chunk_reset(&trash);
+
+ while (ctx->state != STATE_DONE) {
+ switch (ctx->state) {
+ case STATE_NEXT:
+ if (!ctx->t ||
+ (ctx->target &&
+ ctx->t != ctx->target)) {
+ ctx->state = STATE_DONE;
+ break;
+ }
+
+ if (ctx->t->size) {
+ if (show && !table_dump_head_to_buffer(&trash, appctx, ctx->t, ctx->target))
+ return 0;
+
+ if (ctx->target &&
+ (strm_li(s)->bind_conf->level & ACCESS_LVL_MASK) >= ACCESS_LVL_OPER) {
+ /* dump entries only if table explicitly requested */
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &ctx->t->lock);
+ eb = ebmb_first(&ctx->t->keys);
+ if (eb) {
+ ctx->entry = ebmb_entry(eb, struct stksess, key);
+ ctx->entry->ref_cnt++;
+ ctx->state = STATE_DUMP;
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &ctx->t->lock);
+ break;
+ }
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &ctx->t->lock);
+ }
+ }
+ ctx->t = ctx->t->next;
+ break;
+
+ case STATE_DUMP:
+ skip_entry = 0;
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &ctx->entry->lock);
+
+ if (ctx->data_type[0] >= 0) {
+ /* we're filtering on some data contents */
+ void *ptr;
+ int dt, i;
+ signed char op;
+ long long data, value;
+
+
+ for (i = 0; i < STKTABLE_FILTER_LEN; i++) {
+ if (ctx->data_type[i] == -1)
+ break;
+ dt = ctx->data_type[i];
+ ptr = stktable_data_ptr(ctx->t,
+ ctx->entry,
+ dt);
+
+ data = 0;
+ switch (stktable_data_types[dt].std_type) {
+ case STD_T_SINT:
+ data = stktable_data_cast(ptr, std_t_sint);
+ break;
+ case STD_T_UINT:
+ data = stktable_data_cast(ptr, std_t_uint);
+ break;
+ case STD_T_ULL:
+ data = stktable_data_cast(ptr, std_t_ull);
+ break;
+ case STD_T_FRQP:
+ data = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ ctx->t->data_arg[dt].u);
+ break;
+ }
+
+ op = ctx->data_op[i];
+ value = ctx->value[i];
+
+ /* skip the entry if the data does not match the test and the value */
+ if ((data < value &&
+ (op == STD_OP_EQ || op == STD_OP_GT || op == STD_OP_GE)) ||
+ (data == value &&
+ (op == STD_OP_NE || op == STD_OP_GT || op == STD_OP_LT)) ||
+ (data > value &&
+ (op == STD_OP_EQ || op == STD_OP_LT || op == STD_OP_LE))) {
+ skip_entry = 1;
+ break;
+ }
+ }
+ }
+
+ if (show && !skip_entry &&
+ !table_dump_entry_to_buffer(&trash, appctx, ctx->t, ctx->entry)) {
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ctx->entry->lock);
+ return 0;
+ }
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ctx->entry->lock);
+
+ HA_SPIN_LOCK(STK_TABLE_LOCK, &ctx->t->lock);
+ ctx->entry->ref_cnt--;
+
+ eb = ebmb_next(&ctx->entry->key);
+ if (eb) {
+ struct stksess *old = ctx->entry;
+ ctx->entry = ebmb_entry(eb, struct stksess, key);
+ if (show)
+ __stksess_kill_if_expired(ctx->t, old);
+ else if (!skip_entry && !ctx->entry->ref_cnt)
+ __stksess_kill(ctx->t, old);
+ ctx->entry->ref_cnt++;
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &ctx->t->lock);
+ break;
+ }
+
+
+ if (show)
+ __stksess_kill_if_expired(ctx->t, ctx->entry);
+ else if (!skip_entry && !ctx->entry->ref_cnt)
+ __stksess_kill(ctx->t, ctx->entry);
+
+ HA_SPIN_UNLOCK(STK_TABLE_LOCK, &ctx->t->lock);
+
+ ctx->t = ctx->t->next;
+ ctx->state = STATE_NEXT;
+ break;
+
+ default:
+ break;
+ }
+ }
+ return 1;
+}
+
+static void cli_release_show_table(struct appctx *appctx)
+{
+ struct show_table_ctx *ctx = appctx->svcctx;
+
+ if (ctx->state == STATE_DUMP) {
+ stksess_kill_if_expired(ctx->t, ctx->entry, 1);
+ }
+}
+
+static void stkt_late_init(void)
+{
+ struct sample_fetch *f;
+
+ f = find_sample_fetch("src", strlen("src"));
+ if (f)
+ smp_fetch_src = f->process;
+}
+
+INITCALL0(STG_INIT, stkt_late_init);
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "clear", "table", NULL }, "clear table <table> [<filter>]* : remove an entry from a table (filter: data/key)", cli_parse_table_req, cli_io_handler_table, cli_release_show_table, (void *)STK_CLI_ACT_CLR },
+ { { "set", "table", NULL }, "set table <table> key <k> [data.* <v>]* : update or create a table entry's data", cli_parse_table_req, cli_io_handler_table, NULL, (void *)STK_CLI_ACT_SET },
+ { { "show", "table", NULL }, "show table <table> [<filter>]* : report table usage stats or dump this table's contents (filter: data/key)", cli_parse_table_req, cli_io_handler_table, cli_release_show_table, (void *)STK_CLI_ACT_SHOW },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+static struct action_kw_list tcp_conn_kws = { { }, {
+ { "sc-inc-gpc", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc0", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc1", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-set-gpt", parse_set_gpt, KWF_MATCH_PREFIX },
+ { "sc-set-gpt0", parse_set_gpt, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_conn_keywords_register, &tcp_conn_kws);
+
+static struct action_kw_list tcp_sess_kws = { { }, {
+ { "sc-inc-gpc", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc0", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc1", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-set-gpt", parse_set_gpt, KWF_MATCH_PREFIX },
+ { "sc-set-gpt0", parse_set_gpt, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_sess_keywords_register, &tcp_sess_kws);
+
+static struct action_kw_list tcp_req_kws = { { }, {
+ { "sc-inc-gpc", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc0", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc1", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-set-gpt", parse_set_gpt, KWF_MATCH_PREFIX },
+ { "sc-set-gpt0", parse_set_gpt, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_req_kws);
+
+static struct action_kw_list tcp_res_kws = { { }, {
+ { "sc-inc-gpc", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc0", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc1", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-set-gpt", parse_set_gpt, KWF_MATCH_PREFIX },
+ { "sc-set-gpt0", parse_set_gpt, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_res_cont_keywords_register, &tcp_res_kws);
+
+static struct action_kw_list http_req_kws = { { }, {
+ { "sc-inc-gpc", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc0", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc1", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-set-gpt", parse_set_gpt, KWF_MATCH_PREFIX },
+ { "sc-set-gpt0", parse_set_gpt, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_kws);
+
+static struct action_kw_list http_res_kws = { { }, {
+ { "sc-inc-gpc", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc0", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc1", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-set-gpt", parse_set_gpt, KWF_MATCH_PREFIX },
+ { "sc-set-gpt0", parse_set_gpt, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, {
+ { "sc_bytes_in_rate", smp_fetch_sc_bytes_in_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_bytes_out_rate", smp_fetch_sc_bytes_out_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_clr_gpc", smp_fetch_sc_clr_gpc, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_clr_gpc0", smp_fetch_sc_clr_gpc0, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_clr_gpc1", smp_fetch_sc_clr_gpc1, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN },
+ { "sc_conn_cnt", smp_fetch_sc_conn_cnt, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_conn_cur", smp_fetch_sc_conn_cur, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_conn_rate", smp_fetch_sc_conn_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_get_gpt", smp_fetch_sc_get_gpt, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_get_gpt0", smp_fetch_sc_get_gpt0, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_get_gpc", smp_fetch_sc_get_gpc, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_get_gpc0", smp_fetch_sc_get_gpc0, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_get_gpc1", smp_fetch_sc_get_gpc1, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN },
+ { "sc_gpc_rate", smp_fetch_sc_gpc_rate, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_http_err_rate", smp_fetch_sc_http_err_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_http_fail_cnt", smp_fetch_sc_http_fail_cnt, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_http_fail_rate", smp_fetch_sc_http_fail_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_http_req_cnt", smp_fetch_sc_http_req_cnt, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_http_req_rate", smp_fetch_sc_http_req_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_inc_gpc", smp_fetch_sc_inc_gpc, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_inc_gpc0", smp_fetch_sc_inc_gpc0, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_inc_gpc1", smp_fetch_sc_inc_gpc1, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_kbytes_in", smp_fetch_sc_kbytes_in, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc_kbytes_out", smp_fetch_sc_kbytes_out, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc_sess_cnt", smp_fetch_sc_sess_cnt, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_sess_rate", smp_fetch_sc_sess_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_tracked", smp_fetch_sc_tracked, ARG2(1,SINT,TAB), NULL, SMP_T_BOOL, SMP_USE_INTRN, },
+ { "sc_trackers", smp_fetch_sc_trackers, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_bytes_in_rate", smp_fetch_sc_bytes_in_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_bytes_out_rate", smp_fetch_sc_bytes_out_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_clr_gpc0", smp_fetch_sc_clr_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_clr_gpc1", smp_fetch_sc_clr_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_conn_cnt", smp_fetch_sc_conn_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_conn_cur", smp_fetch_sc_conn_cur, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_conn_rate", smp_fetch_sc_conn_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_http_err_rate", smp_fetch_sc_http_err_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_http_fail_cnt", smp_fetch_sc_http_fail_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_http_fail_rate", smp_fetch_sc_http_fail_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_http_req_cnt", smp_fetch_sc_http_req_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_http_req_rate", smp_fetch_sc_http_req_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_inc_gpc0", smp_fetch_sc_inc_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_inc_gpc1", smp_fetch_sc_inc_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_kbytes_in", smp_fetch_sc_kbytes_in, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc0_kbytes_out", smp_fetch_sc_kbytes_out, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc0_sess_cnt", smp_fetch_sc_sess_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_sess_rate", smp_fetch_sc_sess_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_tracked", smp_fetch_sc_tracked, ARG1(0,TAB), NULL, SMP_T_BOOL, SMP_USE_INTRN, },
+ { "sc0_trackers", smp_fetch_sc_trackers, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_bytes_in_rate", smp_fetch_sc_bytes_in_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_bytes_out_rate", smp_fetch_sc_bytes_out_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_clr_gpc", smp_fetch_sc_clr_gpc, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_clr_gpc0", smp_fetch_sc_clr_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_clr_gpc1", smp_fetch_sc_clr_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_conn_cnt", smp_fetch_sc_conn_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_conn_cur", smp_fetch_sc_conn_cur, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_conn_rate", smp_fetch_sc_conn_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_http_err_rate", smp_fetch_sc_http_err_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_http_fail_cnt", smp_fetch_sc_http_fail_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_http_fail_rate", smp_fetch_sc_http_fail_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_http_req_cnt", smp_fetch_sc_http_req_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_http_req_rate", smp_fetch_sc_http_req_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_inc_gpc0", smp_fetch_sc_inc_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_inc_gpc1", smp_fetch_sc_inc_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_kbytes_in", smp_fetch_sc_kbytes_in, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc1_kbytes_out", smp_fetch_sc_kbytes_out, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc1_sess_cnt", smp_fetch_sc_sess_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_sess_rate", smp_fetch_sc_sess_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_tracked", smp_fetch_sc_tracked, ARG1(0,TAB), NULL, SMP_T_BOOL, SMP_USE_INTRN, },
+ { "sc1_trackers", smp_fetch_sc_trackers, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_bytes_in_rate", smp_fetch_sc_bytes_in_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_bytes_out_rate", smp_fetch_sc_bytes_out_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_clr_gpc0", smp_fetch_sc_clr_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_clr_gpc1", smp_fetch_sc_clr_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_conn_cnt", smp_fetch_sc_conn_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_conn_cur", smp_fetch_sc_conn_cur, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_conn_rate", smp_fetch_sc_conn_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_http_err_rate", smp_fetch_sc_http_err_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_http_fail_cnt", smp_fetch_sc_http_fail_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_http_fail_rate", smp_fetch_sc_http_fail_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_http_req_cnt", smp_fetch_sc_http_req_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_http_req_rate", smp_fetch_sc_http_req_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_inc_gpc0", smp_fetch_sc_inc_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_inc_gpc1", smp_fetch_sc_inc_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_kbytes_in", smp_fetch_sc_kbytes_in, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc2_kbytes_out", smp_fetch_sc_kbytes_out, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc2_sess_cnt", smp_fetch_sc_sess_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_sess_rate", smp_fetch_sc_sess_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_tracked", smp_fetch_sc_tracked, ARG1(0,TAB), NULL, SMP_T_BOOL, SMP_USE_INTRN, },
+ { "sc2_trackers", smp_fetch_sc_trackers, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "src_bytes_in_rate", smp_fetch_sc_bytes_in_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_bytes_out_rate", smp_fetch_sc_bytes_out_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_clr_gpc", smp_fetch_sc_clr_gpc, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_clr_gpc0", smp_fetch_sc_clr_gpc0, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_clr_gpc1", smp_fetch_sc_clr_gpc1, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_conn_cnt", smp_fetch_sc_conn_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_conn_cur", smp_fetch_sc_conn_cur, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_conn_rate", smp_fetch_sc_conn_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_get_gpt" , smp_fetch_sc_get_gpt, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_get_gpc", smp_fetch_sc_get_gpc, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_gpc_rate", smp_fetch_sc_gpc_rate, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_http_err_rate", smp_fetch_sc_http_err_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_http_fail_cnt", smp_fetch_sc_http_fail_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_http_fail_rate", smp_fetch_sc_http_fail_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_http_req_cnt", smp_fetch_sc_http_req_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_http_req_rate", smp_fetch_sc_http_req_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_inc_gpc", smp_fetch_sc_inc_gpc, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_inc_gpc0", smp_fetch_sc_inc_gpc0, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_inc_gpc1", smp_fetch_sc_inc_gpc1, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_kbytes_in", smp_fetch_sc_kbytes_in, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_kbytes_out", smp_fetch_sc_kbytes_out, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_sess_cnt", smp_fetch_sc_sess_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_sess_rate", smp_fetch_sc_sess_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_updt_conn_cnt", smp_fetch_src_updt_conn_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "table_avl", smp_fetch_table_avl, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "table_cnt", smp_fetch_table_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_fetch_keywords);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_conv_kw_list sample_conv_kws = {ILH, {
+ { "in_table", sample_conv_in_table, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_BOOL },
+ { "table_bytes_in_rate", sample_conv_table_bytes_in_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_bytes_out_rate", sample_conv_table_bytes_out_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_conn_cnt", sample_conv_table_conn_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_conn_cur", sample_conv_table_conn_cur, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_conn_rate", sample_conv_table_conn_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_expire", sample_conv_table_expire, ARG2(1,TAB,SINT), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpt", sample_conv_table_gpt, ARG2(2,SINT,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpt0", sample_conv_table_gpt0, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpc", sample_conv_table_gpc, ARG2(2,SINT,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpc0", sample_conv_table_gpc0, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpc1", sample_conv_table_gpc1, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpc_rate", sample_conv_table_gpc_rate, ARG2(2,SINT,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpc0_rate", sample_conv_table_gpc0_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpc1_rate", sample_conv_table_gpc1_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_http_err_cnt", sample_conv_table_http_err_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_http_err_rate", sample_conv_table_http_err_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_http_fail_cnt", sample_conv_table_http_fail_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_http_fail_rate", sample_conv_table_http_fail_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_http_req_cnt", sample_conv_table_http_req_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_http_req_rate", sample_conv_table_http_req_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_idle", sample_conv_table_idle, ARG2(1,TAB,SINT), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_kbytes_in", sample_conv_table_kbytes_in, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_kbytes_out", sample_conv_table_kbytes_out, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_server_id", sample_conv_table_server_id, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_sess_cnt", sample_conv_table_sess_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_sess_rate", sample_conv_table_sess_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_trackers", sample_conv_table_trackers, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
diff --git a/src/stream.c b/src/stream.c
new file mode 100644
index 0000000..224b9b8
--- /dev/null
+++ b/src/stream.c
@@ -0,0 +1,3976 @@
+/*
+ * Stream management functions.
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <import/ebistree.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/action.h>
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/arg.h>
+#include <haproxy/backend.h>
+#include <haproxy/capture.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/cli.h>
+#include <haproxy/connection.h>
+#include <haproxy/dict.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/fd.h>
+#include <haproxy/filters.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/hlua.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/htx.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/log.h>
+#include <haproxy/pipe.h>
+#include <haproxy/pool.h>
+#include <haproxy/proxy.h>
+#include <haproxy/queue.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/sample.h>
+#include <haproxy/session.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/thread.h>
+#include <haproxy/trace.h>
+#include <haproxy/vars.h>
+
+
+DECLARE_POOL(pool_head_stream, "stream", sizeof(struct stream));
+DECLARE_POOL(pool_head_uniqueid, "uniqueid", UNIQUEID_LEN);
+
+/* incremented by each "show sess" to fix a delimiter between streams */
+unsigned stream_epoch = 0;
+
+/* List of all use-service keywords. */
+static struct list service_keywords = LIST_HEAD_INIT(service_keywords);
+
+
+/* trace source and events */
+static void strm_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+/* The event representation is split like this :
+ * strm - stream
+ * sc - stream connector
+ * http - http analyzis
+ * tcp - tcp analyzis
+ *
+ * STRM_EV_* macros are defined in <proto/stream.h>
+ */
+static const struct trace_event strm_trace_events[] = {
+ { .mask = STRM_EV_STRM_NEW, .name = "strm_new", .desc = "new stream" },
+ { .mask = STRM_EV_STRM_FREE, .name = "strm_free", .desc = "release stream" },
+ { .mask = STRM_EV_STRM_ERR, .name = "strm_err", .desc = "error during stream processing" },
+ { .mask = STRM_EV_STRM_ANA, .name = "strm_ana", .desc = "stream analyzers" },
+ { .mask = STRM_EV_STRM_PROC, .name = "strm_proc", .desc = "stream processing" },
+
+ { .mask = STRM_EV_CS_ST, .name = "sc_state", .desc = "processing connector states" },
+
+ { .mask = STRM_EV_HTTP_ANA, .name = "http_ana", .desc = "HTTP analyzers" },
+ { .mask = STRM_EV_HTTP_ERR, .name = "http_err", .desc = "error during HTTP analyzis" },
+
+ { .mask = STRM_EV_TCP_ANA, .name = "tcp_ana", .desc = "TCP analyzers" },
+ { .mask = STRM_EV_TCP_ERR, .name = "tcp_err", .desc = "error during TCP analyzis" },
+
+ { .mask = STRM_EV_FLT_ANA, .name = "flt_ana", .desc = "Filter analyzers" },
+ { .mask = STRM_EV_FLT_ERR, .name = "flt_err", .desc = "error during filter analyzis" },
+ {}
+};
+
+static const struct name_desc strm_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the stream */ },
+ /* arg2 */ { },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc strm_trace_decoding[] = {
+#define STRM_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define STRM_VERB_MINIMAL 2
+ { .name="minimal", .desc="report info on streams and connectors" },
+#define STRM_VERB_SIMPLE 3
+ { .name="simple", .desc="add info on request and response channels" },
+#define STRM_VERB_ADVANCED 4
+ { .name="advanced", .desc="add info on channel's buffer for data and developer levels only" },
+#define STRM_VERB_COMPLETE 5
+ { .name="complete", .desc="add info on channel's buffer" },
+ { /* end */ }
+};
+
+struct trace_source trace_strm = {
+ .name = IST("stream"),
+ .desc = "Applicative stream",
+ .arg_def = TRC_ARG1_STRM, // TRACE()'s first argument is always a stream
+ .default_cb = strm_trace,
+ .known_events = strm_trace_events,
+ .lockon_args = strm_trace_lockon_args,
+ .decoding = strm_trace_decoding,
+ .report_events = ~0, // report everything by default
+};
+
+#define TRACE_SOURCE &trace_strm
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+/* the stream traces always expect that arg1, if non-null, is of a stream (from
+ * which we can derive everything), that arg2, if non-null, is an http
+ * transaction, that arg3, if non-null, is an http message.
+ */
+static void strm_trace(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct stream *s = a1;
+ const struct http_txn *txn = a2;
+ const struct http_msg *msg = a3;
+ struct task *task;
+ const struct channel *req, *res;
+ struct htx *htx;
+
+ if (!s || src->verbosity < STRM_VERB_CLEAN)
+ return;
+
+ task = s->task;
+ req = &s->req;
+ res = &s->res;
+ htx = (msg ? htxbuf(&msg->chn->buf) : NULL);
+
+ /* General info about the stream (htx/tcp, id...) */
+ chunk_appendf(&trace_buf, " : [%u,%s]",
+ s->uniq_id, ((s->flags & SF_HTX) ? "HTX" : "TCP"));
+ if (isttest(s->unique_id)) {
+ chunk_appendf(&trace_buf, " id=");
+ b_putist(&trace_buf, s->unique_id);
+ }
+
+ /* Front and back stream connector state */
+ chunk_appendf(&trace_buf, " SC=(%s,%s)",
+ sc_state_str(s->scf->state), sc_state_str(s->scb->state));
+
+ /* If txn is defined, HTTP req/rep states */
+ if (txn)
+ chunk_appendf(&trace_buf, " HTTP=(%s,%s)",
+ h1_msg_state_str(txn->req.msg_state), h1_msg_state_str(txn->rsp.msg_state));
+ if (msg)
+ chunk_appendf(&trace_buf, " %s", ((msg->chn->flags & CF_ISRESP) ? "RESPONSE" : "REQUEST"));
+
+ if (src->verbosity == STRM_VERB_CLEAN)
+ return;
+
+ /* If msg defined, display status-line if possible (verbosity > MINIMAL) */
+ if (src->verbosity > STRM_VERB_MINIMAL && htx && htx_nbblks(htx)) {
+ const struct htx_blk *blk = __htx_get_head_blk(htx);
+ const struct htx_sl *sl = htx_get_blk_ptr(htx, blk);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_REQ_SL || type == HTX_BLK_RES_SL)
+ chunk_appendf(&trace_buf, " - \"%.*s %.*s %.*s\"",
+ HTX_SL_P1_LEN(sl), HTX_SL_P1_PTR(sl),
+ HTX_SL_P2_LEN(sl), HTX_SL_P2_PTR(sl),
+ HTX_SL_P3_LEN(sl), HTX_SL_P3_PTR(sl));
+ }
+
+
+ /* If txn defined info about HTTP msgs, otherwise info about SI. */
+ if (txn) {
+ chunk_appendf(&trace_buf, " - t=%p s=(%p,0x%08x,0x%x) txn.flags=0x%08x, http.flags=(0x%08x,0x%08x) status=%d",
+ task, s, s->flags, s->conn_err_type, txn->flags, txn->req.flags, txn->rsp.flags, txn->status);
+ }
+ else {
+ chunk_appendf(&trace_buf, " - t=%p s=(%p,0x%08x,0x%x) scf=(%p,%d,0x%08x) scb=(%p,%d,0x%08x) retries=%d",
+ task, s, s->flags, s->conn_err_type,
+ s->scf, s->scf->state, s->scf->flags,
+ s->scb, s->scb->state, s->scb->flags,
+ s->conn_retries);
+ }
+
+ if (src->verbosity == STRM_VERB_MINIMAL)
+ return;
+
+
+ /* If txn defined, don't display all channel info */
+ if (src->verbosity == STRM_VERB_SIMPLE || txn) {
+ chunk_appendf(&trace_buf, " req=(%p .fl=0x%08x .exp(r,w,a)=(%u,%u,%u))",
+ req, req->flags, req->rex, req->wex, req->analyse_exp);
+ chunk_appendf(&trace_buf, " res=(%p .fl=0x%08x .exp(r,w,a)=(%u,%u,%u))",
+ res, res->flags, res->rex, res->wex, res->analyse_exp);
+ }
+ else {
+ chunk_appendf(&trace_buf, " req=(%p .fl=0x%08x .ana=0x%08x .exp(r,w,a)=(%u,%u,%u) .o=%lu .tot=%llu .to_fwd=%u)",
+ req, req->flags, req->analysers, req->rex, req->wex, req->analyse_exp,
+ (long)req->output, req->total, req->to_forward);
+ chunk_appendf(&trace_buf, " res=(%p .fl=0x%08x .ana=0x%08x .exp(r,w,a)=(%u,%u,%u) .o=%lu .tot=%llu .to_fwd=%u)",
+ res, res->flags, res->analysers, res->rex, res->wex, res->analyse_exp,
+ (long)res->output, res->total, res->to_forward);
+ }
+
+ if (src->verbosity == STRM_VERB_SIMPLE ||
+ (src->verbosity == STRM_VERB_ADVANCED && src->level < TRACE_LEVEL_DATA))
+ return;
+
+ /* channels' buffer info */
+ if (s->flags & SF_HTX) {
+ struct htx *rqhtx = htxbuf(&req->buf);
+ struct htx *rphtx = htxbuf(&res->buf);
+
+ chunk_appendf(&trace_buf, " htx=(%u/%u#%u, %u/%u#%u)",
+ rqhtx->data, rqhtx->size, htx_nbblks(rqhtx),
+ rphtx->data, rphtx->size, htx_nbblks(rphtx));
+ }
+ else {
+ chunk_appendf(&trace_buf, " buf=(%u@%p+%u/%u, %u@%p+%u/%u)",
+ (unsigned int)b_data(&req->buf), b_orig(&req->buf),
+ (unsigned int)b_head_ofs(&req->buf), (unsigned int)b_size(&req->buf),
+ (unsigned int)b_data(&res->buf), b_orig(&res->buf),
+ (unsigned int)b_head_ofs(&res->buf), (unsigned int)b_size(&res->buf));
+ }
+
+ /* If msg defined, display htx info if defined (level > USER) */
+ if (src->level > TRACE_LEVEL_USER && htx && htx_nbblks(htx)) {
+ int full = 0;
+
+ /* Full htx info (level > STATE && verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_STATE) {
+ if (src->verbosity == STRM_VERB_COMPLETE)
+ full = 1;
+ }
+
+ chunk_memcat(&trace_buf, "\n\t", 2);
+ htx_dump(&trace_buf, htx, full);
+ }
+}
+
+/* Upgrade an existing stream for stream connector <sc>. Return < 0 on error. This
+ * is only valid right after a TCP to H1 upgrade. The stream should be
+ * "reativated" by removing SF_IGNORE flag. And the right mode must be set. On
+ * success, <input> buffer is transferred to the stream and thus points to
+ * BUF_NULL. On error, it is unchanged and it is the caller responsibility to
+ * release it (this never happens for now).
+ */
+int stream_upgrade_from_sc(struct stconn *sc, struct buffer *input)
+{
+ struct stream *s = __sc_strm(sc);
+ const struct mux_ops *mux = sc_mux_ops(sc);
+
+ if (mux) {
+ if (mux->flags & MX_FL_HTX)
+ s->flags |= SF_HTX;
+ }
+
+ if (!b_is_null(input)) {
+ /* Xfer the input buffer to the request channel. <input> will
+ * than point to BUF_NULL. From this point, it is the stream
+ * responsibility to release it.
+ */
+ s->req.buf = *input;
+ *input = BUF_NULL;
+ s->req.total = (IS_HTX_STRM(s) ? htxbuf(&s->req.buf)->data : b_data(&s->req.buf));
+ s->req.flags |= (s->req.total ? CF_READ_PARTIAL : 0);
+ }
+
+ s->flags &= ~SF_IGNORE;
+
+ task_wakeup(s->task, TASK_WOKEN_INIT);
+ return 0;
+}
+
+/* Callback used to wake up a stream when an input buffer is available. The
+ * stream <s>'s stream connectors are checked for a failed buffer allocation
+ * as indicated by the presence of the SC_FL_NEED_BUFF flag and the lack of a
+ * buffer, and and input buffer is assigned there (at most one). The function
+ * returns 1 and wakes the stream up if a buffer was taken, otherwise zero.
+ * It's designed to be called from __offer_buffer().
+ */
+int stream_buf_available(void *arg)
+{
+ struct stream *s = arg;
+
+ if (!s->req.buf.size && !s->req.pipe && s->scf->flags & SC_FL_NEED_BUFF &&
+ b_alloc(&s->req.buf))
+ sc_have_buff(s->scf);
+ else if (!s->res.buf.size && !s->res.pipe && s->scb->flags & SC_FL_NEED_BUFF &&
+ b_alloc(&s->res.buf))
+ sc_have_buff(s->scb);
+ else
+ return 0;
+
+ task_wakeup(s->task, TASK_WOKEN_RES);
+ return 1;
+
+}
+
+/* This function is called from the session handler which detects the end of
+ * handshake, in order to complete initialization of a valid stream. It must be
+ * called with a completely initialized session. It returns the pointer to
+ * the newly created stream, or NULL in case of fatal error. The client-facing
+ * end point is assigned to <origin>, which must be valid. The stream's task
+ * is configured with a nice value inherited from the listener's nice if any.
+ * The task's context is set to the new stream, and its function is set to
+ * process_stream(). Target and analysers are null. <input> is used as input
+ * buffer for the request channel and may contain data. On success, it is
+ * transfer to the stream and <input> is set to BUF_NULL. On error, <input>
+ * buffer is unchanged and it is the caller responsibility to release it.
+ */
+struct stream *stream_new(struct session *sess, struct stconn *sc, struct buffer *input)
+{
+ struct stream *s;
+ struct task *t;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_NEW);
+ if (unlikely((s = pool_alloc(pool_head_stream)) == NULL))
+ goto out_fail_alloc;
+
+ /* minimum stream initialization required for an embryonic stream is
+ * fairly low. We need very little to execute L4 ACLs, then we need a
+ * task to make the client-side connection live on its own.
+ * - flags
+ * - stick-entry tracking
+ */
+ s->flags = 0;
+ s->logs.logwait = sess->fe->to_log;
+ s->logs.level = 0;
+ tv_zero(&s->logs.tv_request);
+ s->logs.t_queue = -1;
+ s->logs.t_connect = -1;
+ s->logs.t_data = -1;
+ s->logs.t_close = 0;
+ s->logs.bytes_in = s->logs.bytes_out = 0;
+ s->logs.prx_queue_pos = 0; /* we get the number of pending conns before us */
+ s->logs.srv_queue_pos = 0; /* we will get this number soon */
+ s->obj_type = OBJ_TYPE_STREAM;
+
+ s->logs.accept_date = sess->accept_date;
+ s->logs.tv_accept = sess->tv_accept;
+ s->logs.t_handshake = sess->t_handshake;
+ s->logs.t_idle = sess->t_idle;
+
+ /* default logging function */
+ s->do_log = strm_log;
+
+ /* default error reporting function, may be changed by analysers */
+ s->srv_error = default_srv_error;
+
+ /* Initialise the current rule list pointer to NULL. We are sure that
+ * any rulelist match the NULL pointer.
+ */
+ s->current_rule_list = NULL;
+ s->current_rule = NULL;
+ s->rules_exp = TICK_ETERNITY;
+ s->last_rule_file = NULL;
+ s->last_rule_line = 0;
+
+ /* Copy SC counters for the stream. We don't touch refcounts because
+ * any reference we have is inherited from the session. Since the stream
+ * doesn't exist without the session, the session's existence guarantees
+ * we don't lose the entry. During the store operation, the stream won't
+ * touch these ones.
+ */
+ memcpy(s->stkctr, sess->stkctr, sizeof(s->stkctr));
+
+ s->sess = sess;
+
+ s->stream_epoch = _HA_ATOMIC_LOAD(&stream_epoch);
+ s->uniq_id = _HA_ATOMIC_FETCH_ADD(&global.req_count, 1);
+
+ /* OK, we're keeping the stream, so let's properly initialize the stream */
+ LIST_INIT(&s->back_refs);
+
+ LIST_INIT(&s->buffer_wait.list);
+ s->buffer_wait.target = s;
+ s->buffer_wait.wakeup_cb = stream_buf_available;
+
+ s->call_rate.curr_tick = s->call_rate.curr_ctr = s->call_rate.prev_ctr = 0;
+ s->pcli_next_pid = 0;
+ s->pcli_flags = 0;
+ s->unique_id = IST_NULL;
+
+ if ((t = task_new_here()) == NULL)
+ goto out_fail_alloc;
+
+ s->task = t;
+ s->pending_events = 0;
+ s->conn_retries = 0;
+ s->conn_exp = TICK_ETERNITY;
+ s->conn_err_type = STRM_ET_NONE;
+ s->prev_conn_state = SC_ST_INI;
+ t->process = process_stream;
+ t->context = s;
+ t->expire = TICK_ETERNITY;
+ if (sess->listener)
+ t->nice = sess->listener->nice;
+
+ /* Note: initially, the stream's backend points to the frontend.
+ * This changes later when switching rules are executed or
+ * when the default backend is assigned.
+ */
+ s->be = sess->fe;
+ s->req_cap = NULL;
+ s->res_cap = NULL;
+
+ /* Initialize all the variables contexts even if not used.
+ * This permits to prune these contexts without errors.
+ *
+ * We need to make sure that those lists are not re-initialized
+ * by stream-dependant underlying code because we could lose
+ * track of already defined variables, leading to data inconsistency
+ * and memory leaks...
+ *
+ * For reference: we had a very old bug caused by vars_txn and
+ * vars_reqres being accidentally re-initialized in http_create_txn()
+ * (https://github.com/haproxy/haproxy/issues/1935)
+ */
+ vars_init_head(&s->vars_txn, SCOPE_TXN);
+ vars_init_head(&s->vars_reqres, SCOPE_REQ);
+
+ /* Set SF_HTX flag for HTTP frontends. */
+ if (sess->fe->mode == PR_MODE_HTTP)
+ s->flags |= SF_HTX;
+
+ s->scf = sc;
+ if (sc_attach_strm(s->scf, s) < 0)
+ goto out_fail_attach_scf;
+
+ s->scb = sc_new_from_strm(s, SC_FL_ISBACK);
+ if (!s->scb)
+ goto out_fail_alloc_scb;
+
+ sc_set_state(s->scf, SC_ST_EST);
+ s->scf->hcto = sess->fe->timeout.clientfin;
+
+ if (likely(sess->fe->options2 & PR_O2_INDEPSTR))
+ s->scf->flags |= SC_FL_INDEP_STR;
+
+ s->scb->hcto = TICK_ETERNITY;
+ if (likely(sess->fe->options2 & PR_O2_INDEPSTR))
+ s->scb->flags |= SC_FL_INDEP_STR;
+
+ if (sc_ep_test(sc, SE_FL_WEBSOCKET))
+ s->flags |= SF_WEBSOCKET;
+ if (sc_conn(sc)) {
+ const struct mux_ops *mux = sc_mux_ops(sc);
+
+ if (mux && mux->flags & MX_FL_HTX)
+ s->flags |= SF_HTX;
+ }
+
+ stream_init_srv_conn(s);
+ s->target = sess->listener ? sess->listener->default_target : NULL;
+
+ s->pend_pos = NULL;
+ s->priority_class = 0;
+ s->priority_offset = 0;
+
+ /* init store persistence */
+ s->store_count = 0;
+
+ channel_init(&s->req);
+ s->req.flags |= CF_READ_ATTACHED; /* the producer is already connected */
+ s->req.analysers = sess->listener ? sess->listener->analysers : sess->fe->fe_req_ana;
+
+ if (IS_HTX_STRM(s)) {
+ /* Be sure to have HTTP analysers because in case of
+ * "destructive" stream upgrade, they may be missing (e.g
+ * TCP>H2)
+ */
+ s->req.analysers |= AN_REQ_WAIT_HTTP|AN_REQ_HTTP_PROCESS_FE;
+ }
+
+ if (!sess->fe->fe_req_ana) {
+ channel_auto_connect(&s->req); /* don't wait to establish connection */
+ channel_auto_close(&s->req); /* let the producer forward close requests */
+ }
+
+ s->req.rto = sess->fe->timeout.client;
+ s->req.wto = TICK_ETERNITY;
+ s->req.rex = TICK_ETERNITY;
+ s->req.wex = TICK_ETERNITY;
+ s->req.analyse_exp = TICK_ETERNITY;
+
+ channel_init(&s->res);
+ s->res.flags |= CF_ISRESP;
+ s->res.analysers = 0;
+
+ if (sess->fe->options2 & PR_O2_NODELAY) {
+ s->req.flags |= CF_NEVER_WAIT;
+ s->res.flags |= CF_NEVER_WAIT;
+ }
+
+ s->res.wto = sess->fe->timeout.client;
+ s->res.rto = TICK_ETERNITY;
+ s->res.rex = TICK_ETERNITY;
+ s->res.wex = TICK_ETERNITY;
+ s->res.analyse_exp = TICK_ETERNITY;
+
+ s->txn = NULL;
+ s->hlua = NULL;
+
+ s->resolv_ctx.requester = NULL;
+ s->resolv_ctx.hostname_dn = NULL;
+ s->resolv_ctx.hostname_dn_len = 0;
+ s->resolv_ctx.parent = NULL;
+
+ s->tunnel_timeout = TICK_ETERNITY;
+
+ LIST_APPEND(&th_ctx->streams, &s->list);
+
+ if (flt_stream_init(s) < 0 || flt_stream_start(s) < 0)
+ goto out_fail_accept;
+
+ /* just in case the caller would have pre-disabled it */
+ se_will_consume(s->scf->sedesc);
+
+ if (sess->fe->accept && sess->fe->accept(s) < 0)
+ goto out_fail_accept;
+
+ if (!b_is_null(input)) {
+ /* Xfer the input buffer to the request channel. <input> will
+ * than point to BUF_NULL. From this point, it is the stream
+ * responsibility to release it.
+ */
+ s->req.buf = *input;
+ *input = BUF_NULL;
+ s->req.total = (IS_HTX_STRM(s) ? htxbuf(&s->req.buf)->data : b_data(&s->req.buf));
+ s->req.flags |= (s->req.total ? CF_READ_PARTIAL : 0);
+ }
+
+ /* it is important not to call the wakeup function directly but to
+ * pass through task_wakeup(), because this one knows how to apply
+ * priorities to tasks. Using multi thread we must be sure that
+ * stream is fully initialized before calling task_wakeup. So
+ * the caller must handle the task_wakeup
+ */
+ DBG_TRACE_LEAVE(STRM_EV_STRM_NEW, s);
+ task_wakeup(s->task, TASK_WOKEN_INIT);
+ return s;
+
+ /* Error unrolling */
+ out_fail_accept:
+ flt_stream_release(s, 0);
+ LIST_DELETE(&s->list);
+ sc_free(s->scb);
+ out_fail_alloc_scb:
+ out_fail_attach_scf:
+ task_destroy(t);
+ out_fail_alloc:
+ pool_free(pool_head_stream, s);
+ DBG_TRACE_DEVEL("leaving on error", STRM_EV_STRM_NEW|STRM_EV_STRM_ERR);
+ return NULL;
+}
+
+/*
+ * frees the context associated to a stream. It must have been removed first.
+ */
+void stream_free(struct stream *s)
+{
+ struct session *sess = strm_sess(s);
+ struct proxy *fe = sess->fe;
+ struct bref *bref, *back;
+ int i;
+
+ DBG_TRACE_POINT(STRM_EV_STRM_FREE, s);
+
+ /* detach the stream from its own task before even releasing it so
+ * that walking over a task list never exhibits a dying stream.
+ */
+ s->task->context = NULL;
+ __ha_barrier_store();
+
+ pendconn_free(s);
+
+ if (objt_server(s->target)) { /* there may be requests left pending in queue */
+ if (s->flags & SF_CURR_SESS) {
+ s->flags &= ~SF_CURR_SESS;
+ _HA_ATOMIC_DEC(&__objt_server(s->target)->cur_sess);
+ }
+ if (may_dequeue_tasks(__objt_server(s->target), s->be))
+ process_srv_queue(__objt_server(s->target));
+ }
+
+ if (unlikely(s->srv_conn)) {
+ /* the stream still has a reserved slot on a server, but
+ * it should normally be only the same as the one above,
+ * so this should not happen in fact.
+ */
+ sess_change_server(s, NULL);
+ }
+
+ if (s->req.pipe)
+ put_pipe(s->req.pipe);
+
+ if (s->res.pipe)
+ put_pipe(s->res.pipe);
+
+ /* We may still be present in the buffer wait queue */
+ if (LIST_INLIST(&s->buffer_wait.list))
+ LIST_DEL_INIT(&s->buffer_wait.list);
+
+ if (s->req.buf.size || s->res.buf.size) {
+ int count = !!s->req.buf.size + !!s->res.buf.size;
+
+ b_free(&s->req.buf);
+ b_free(&s->res.buf);
+ offer_buffers(NULL, count);
+ }
+
+ pool_free(pool_head_uniqueid, s->unique_id.ptr);
+ s->unique_id = IST_NULL;
+
+ flt_stream_stop(s);
+ flt_stream_release(s, 0);
+
+ hlua_ctx_destroy(s->hlua);
+ s->hlua = NULL;
+ if (s->txn)
+ http_destroy_txn(s);
+
+ /* ensure the client-side transport layer is destroyed */
+ /* Be sure it is useless !! */
+ /* if (cli_cs) */
+ /* cs_close(cli_cs); */
+
+ for (i = 0; i < s->store_count; i++) {
+ if (!s->store[i].ts)
+ continue;
+ stksess_free(s->store[i].table, s->store[i].ts);
+ s->store[i].ts = NULL;
+ }
+
+ if (s->resolv_ctx.requester) {
+ __decl_thread(struct resolvers *resolvers = s->resolv_ctx.parent->arg.resolv.resolvers);
+
+ HA_SPIN_LOCK(DNS_LOCK, &resolvers->lock);
+ ha_free(&s->resolv_ctx.hostname_dn);
+ s->resolv_ctx.hostname_dn_len = 0;
+ resolv_unlink_resolution(s->resolv_ctx.requester);
+ HA_SPIN_UNLOCK(DNS_LOCK, &resolvers->lock);
+
+ pool_free(resolv_requester_pool, s->resolv_ctx.requester);
+ s->resolv_ctx.requester = NULL;
+ }
+
+ if (fe) {
+ if (s->req_cap) {
+ struct cap_hdr *h;
+ for (h = fe->req_cap; h; h = h->next)
+ pool_free(h->pool, s->req_cap[h->index]);
+ }
+
+ if (s->res_cap) {
+ struct cap_hdr *h;
+ for (h = fe->rsp_cap; h; h = h->next)
+ pool_free(h->pool, s->res_cap[h->index]);
+ }
+
+ pool_free(fe->rsp_cap_pool, s->res_cap);
+ pool_free(fe->req_cap_pool, s->req_cap);
+ }
+
+ /* Cleanup all variable contexts. */
+ if (!LIST_ISEMPTY(&s->vars_txn.head))
+ vars_prune(&s->vars_txn, s->sess, s);
+ if (!LIST_ISEMPTY(&s->vars_reqres.head))
+ vars_prune(&s->vars_reqres, s->sess, s);
+
+ stream_store_counters(s);
+
+ list_for_each_entry_safe(bref, back, &s->back_refs, users) {
+ /* we have to unlink all watchers. We must not relink them if
+ * this stream was the last one in the list. This is safe to do
+ * here because we're touching our thread's list so we know
+ * that other streams are not active, and the watchers will
+ * only touch their node under thread isolation.
+ */
+ LIST_DEL_INIT(&bref->users);
+ if (s->list.n != &th_ctx->streams)
+ LIST_APPEND(&LIST_ELEM(s->list.n, struct stream *, list)->back_refs, &bref->users);
+ bref->ref = s->list.n;
+ __ha_barrier_store();
+ }
+ LIST_DELETE(&s->list);
+
+ sc_destroy(s->scb);
+ sc_destroy(s->scf);
+
+ pool_free(pool_head_stream, s);
+
+ /* We may want to free the maximum amount of pools if the proxy is stopping */
+ if (fe && unlikely(fe->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ pool_flush(pool_head_buffer);
+ pool_flush(pool_head_http_txn);
+ pool_flush(pool_head_requri);
+ pool_flush(pool_head_capture);
+ pool_flush(pool_head_stream);
+ pool_flush(pool_head_session);
+ pool_flush(pool_head_connection);
+ pool_flush(pool_head_pendconn);
+ pool_flush(fe->req_cap_pool);
+ pool_flush(fe->rsp_cap_pool);
+ }
+}
+
+
+/* Allocates a work buffer for stream <s>. It is meant to be called inside
+ * process_stream(). It will only allocate the side needed for the function
+ * to work fine, which is the response buffer so that an error message may be
+ * built and returned. Response buffers may be allocated from the reserve, this
+ * is critical to ensure that a response may always flow and will never block a
+ * server from releasing a connection. Returns 0 in case of failure, non-zero
+ * otherwise.
+ */
+static int stream_alloc_work_buffer(struct stream *s)
+{
+ if (b_alloc(&s->res.buf))
+ return 1;
+ return 0;
+}
+
+/* releases unused buffers after processing. Typically used at the end of the
+ * update() functions. It will try to wake up as many tasks/applets as the
+ * number of buffers that it releases. In practice, most often streams are
+ * blocked on a single buffer, so it makes sense to try to wake two up when two
+ * buffers are released at once.
+ */
+void stream_release_buffers(struct stream *s)
+{
+ int offer = 0;
+
+ if (c_size(&s->req) && c_empty(&s->req)) {
+ offer++;
+ b_free(&s->req.buf);
+ }
+ if (c_size(&s->res) && c_empty(&s->res)) {
+ offer++;
+ b_free(&s->res.buf);
+ }
+
+ /* if we're certain to have at least 1 buffer available, and there is
+ * someone waiting, we can wake up a waiter and offer them.
+ */
+ if (offer)
+ offer_buffers(s, offer);
+}
+
+void stream_process_counters(struct stream *s)
+{
+ struct session *sess = s->sess;
+ unsigned long long bytes;
+ int i;
+
+ bytes = s->req.total - s->logs.bytes_in;
+ s->logs.bytes_in = s->req.total;
+ if (bytes) {
+ _HA_ATOMIC_ADD(&sess->fe->fe_counters.bytes_in, bytes);
+ _HA_ATOMIC_ADD(&s->be->be_counters.bytes_in, bytes);
+
+ if (objt_server(s->target))
+ _HA_ATOMIC_ADD(&__objt_server(s->target)->counters.bytes_in, bytes);
+
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_ADD(&sess->listener->counters->bytes_in, bytes);
+
+ for (i = 0; i < MAX_SESS_STKCTR; i++) {
+ if (!stkctr_inc_bytes_in_ctr(&s->stkctr[i], bytes))
+ stkctr_inc_bytes_in_ctr(&sess->stkctr[i], bytes);
+ }
+ }
+
+ bytes = s->res.total - s->logs.bytes_out;
+ s->logs.bytes_out = s->res.total;
+ if (bytes) {
+ _HA_ATOMIC_ADD(&sess->fe->fe_counters.bytes_out, bytes);
+ _HA_ATOMIC_ADD(&s->be->be_counters.bytes_out, bytes);
+
+ if (objt_server(s->target))
+ _HA_ATOMIC_ADD(&__objt_server(s->target)->counters.bytes_out, bytes);
+
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_ADD(&sess->listener->counters->bytes_out, bytes);
+
+ for (i = 0; i < MAX_SESS_STKCTR; i++) {
+ if (!stkctr_inc_bytes_out_ctr(&s->stkctr[i], bytes))
+ stkctr_inc_bytes_out_ctr(&sess->stkctr[i], bytes);
+ }
+ }
+}
+
+/*
+ * Returns a message to the client ; the connection is shut down for read,
+ * and the request is cleared so that no server connection can be initiated.
+ * The buffer is marked for read shutdown on the other side to protect the
+ * message, and the buffer write is enabled. The message is contained in a
+ * "chunk". If it is null, then an empty message is used. The reply buffer does
+ * not need to be empty before this, and its contents will not be overwritten.
+ * The primary goal of this function is to return error messages to a client.
+ */
+void stream_retnclose(struct stream *s, const struct buffer *msg)
+{
+ struct channel *ic = &s->req;
+ struct channel *oc = &s->res;
+
+ channel_auto_read(ic);
+ channel_abort(ic);
+ channel_auto_close(ic);
+ channel_erase(ic);
+ channel_truncate(oc);
+
+ if (likely(msg && msg->data))
+ co_inject(oc, msg->area, msg->data);
+
+ oc->wex = tick_add_ifset(now_ms, oc->wto);
+ channel_auto_read(oc);
+ channel_auto_close(oc);
+ channel_shutr_now(oc);
+}
+
+int stream_set_timeout(struct stream *s, enum act_timeout_name name, int timeout)
+{
+ switch (name) {
+ case ACT_TIMEOUT_SERVER:
+ s->req.wto = timeout;
+ s->res.rto = timeout;
+ return 1;
+
+ case ACT_TIMEOUT_TUNNEL:
+ s->tunnel_timeout = timeout;
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+/*
+ * This function handles the transition between the SC_ST_CON state and the
+ * SC_ST_EST state. It must only be called after switching from SC_ST_CON (or
+ * SC_ST_INI or SC_ST_RDY) to SC_ST_EST, but only when a ->proto is defined.
+ * Note that it will switch the interface to SC_ST_DIS if we already have
+ * the CF_SHUTR flag, it means we were able to forward the request, and
+ * receive the response, before process_stream() had the opportunity to
+ * make the switch from SC_ST_CON to SC_ST_EST. When that happens, we want
+ * to go through back_establish() anyway, to make sure the analysers run.
+ * Timeouts are cleared. Error are reported on the channel so that analysers
+ * can handle them.
+ */
+static void back_establish(struct stream *s)
+{
+ struct connection *conn = sc_conn(s->scb);
+ struct channel *req = &s->req;
+ struct channel *rep = &s->res;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ /* First, centralize the timers information, and clear any irrelevant
+ * timeout.
+ */
+ s->logs.t_connect = tv_ms_elapsed(&s->logs.tv_accept, &now);
+ s->conn_exp = TICK_ETERNITY;
+ s->flags &= ~SF_CONN_EXP;
+
+ /* errors faced after sending data need to be reported */
+ if (sc_ep_test(s->scb, SE_FL_ERROR) && req->flags & CF_WROTE_DATA) {
+ /* Don't add CF_WRITE_ERROR if we're here because
+ * early data were rejected by the server, or
+ * http_wait_for_response() will never be called
+ * to send a 425.
+ */
+ if (conn && conn->err_code != CO_ER_SSL_EARLY_FAILED)
+ req->flags |= CF_WRITE_ERROR;
+ rep->flags |= CF_READ_ERROR;
+ s->conn_err_type = STRM_ET_DATA_ERR;
+ DBG_TRACE_STATE("read/write error", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ }
+
+ if (objt_server(s->target))
+ health_adjust(__objt_server(s->target), HANA_STATUS_L4_OK);
+
+ if (!IS_HTX_STRM(s)) { /* let's allow immediate data connection in this case */
+ /* if the user wants to log as soon as possible, without counting
+ * bytes from the server, then this is the right moment. */
+ if (!LIST_ISEMPTY(&strm_fe(s)->logformat) && !(s->logs.logwait & LW_BYTES)) {
+ /* note: no pend_pos here, session is established */
+ s->logs.t_close = s->logs.t_connect; /* to get a valid end date */
+ s->do_log(s);
+ }
+ }
+ else {
+ rep->flags |= CF_READ_DONTWAIT; /* a single read is enough to get response headers */
+ }
+
+ rep->analysers |= strm_fe(s)->fe_rsp_ana | s->be->be_rsp_ana;
+
+ se_have_more_data(s->scb->sedesc);
+ rep->flags |= CF_READ_ATTACHED; /* producer is now attached */
+ if (conn) {
+ /* real connections have timeouts
+ * if already defined, it means that a set-timeout rule has
+ * been executed so do not overwrite them
+ */
+ if (!tick_isset(req->wto))
+ req->wto = s->be->timeout.server;
+ if (!tick_isset(rep->rto))
+ rep->rto = s->be->timeout.server;
+ if (!tick_isset(s->tunnel_timeout))
+ s->tunnel_timeout = s->be->timeout.tunnel;
+
+ /* The connection is now established, try to read data from the
+ * underlying layer, and subscribe to recv events. We use a
+ * delayed recv here to give a chance to the data to flow back
+ * by the time we process other tasks.
+ */
+ sc_chk_rcv(s->scb);
+ }
+ req->wex = TICK_ETERNITY;
+ /* If we managed to get the whole response, and we don't have anything
+ * left to send, or can't, switch to SC_ST_DIS now. */
+ if (rep->flags & (CF_SHUTR | CF_SHUTW)) {
+ s->scb->state = SC_ST_DIS;
+ DBG_TRACE_STATE("response channel shutdwn for read/write", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ }
+
+ DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+}
+
+/* Set correct stream termination flags in case no analyser has done it. It
+ * also counts a failed request if the server state has not reached the request
+ * stage.
+ */
+static void sess_set_term_flags(struct stream *s)
+{
+ if (!(s->flags & SF_FINST_MASK)) {
+ if (s->scb->state == SC_ST_INI) {
+ /* anything before REQ in fact */
+ _HA_ATOMIC_INC(&strm_fe(s)->fe_counters.failed_req);
+ if (strm_li(s) && strm_li(s)->counters)
+ _HA_ATOMIC_INC(&strm_li(s)->counters->failed_req);
+
+ s->flags |= SF_FINST_R;
+ }
+ else if (s->scb->state == SC_ST_QUE)
+ s->flags |= SF_FINST_Q;
+ else if (sc_state_in(s->scb->state, SC_SB_REQ|SC_SB_TAR|SC_SB_ASS|SC_SB_CON|SC_SB_CER|SC_SB_RDY))
+ s->flags |= SF_FINST_C;
+ else if (s->scb->state == SC_ST_EST || s->prev_conn_state == SC_ST_EST)
+ s->flags |= SF_FINST_D;
+ else
+ s->flags |= SF_FINST_L;
+ }
+}
+
+/* This function parses the use-service action ruleset. It executes
+ * the associated ACL and set an applet as a stream or txn final node.
+ * it returns ACT_RET_ERR if an error occurs, the proxy left in
+ * consistent state. It returns ACT_RET_STOP in success case because
+ * use-service must be a terminal action. Returns ACT_RET_YIELD
+ * if the initialisation function require more data.
+ */
+enum act_return process_use_service(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+
+{
+ struct appctx *appctx;
+
+ /* Initialises the applet if it is required. */
+ if (flags & ACT_OPT_FIRST) {
+ /* Register applet. this function schedules the applet. */
+ s->target = &rule->applet.obj_type;
+ appctx = sc_applet_create(s->scb, objt_applet(s->target));
+ if (unlikely(!appctx))
+ return ACT_RET_ERR;
+
+ /* Finish initialisation of the context. */
+ appctx->rule = rule;
+ if (appctx_init(appctx) == -1)
+ return ACT_RET_ERR;
+ }
+ else
+ appctx = __sc_appctx(s->scb);
+
+ if (rule->from != ACT_F_HTTP_REQ) {
+ if (sess->fe == s->be) /* report it if the request was intercepted by the frontend */
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.intercepted_req);
+
+ /* The flag SF_ASSIGNED prevent from server assignment. */
+ s->flags |= SF_ASSIGNED;
+ }
+
+ /* Now we can schedule the applet. */
+ applet_need_more_data(appctx);
+ appctx_wakeup(appctx);
+ return ACT_RET_STOP;
+}
+
+/* This stream analyser checks the switching rules and changes the backend
+ * if appropriate. The default_backend rule is also considered, then the
+ * target backend's forced persistence rules are also evaluated last if any.
+ * It returns 1 if the processing can continue on next analysers, or zero if it
+ * either needs more data or wants to immediately abort the request.
+ */
+static int process_switching_rules(struct stream *s, struct channel *req, int an_bit)
+{
+ struct persist_rule *prst_rule;
+ struct session *sess = s->sess;
+ struct proxy *fe = sess->fe;
+
+ req->analysers &= ~an_bit;
+ req->analyse_exp = TICK_ETERNITY;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA, s);
+
+ /* now check whether we have some switching rules for this request */
+ if (!(s->flags & SF_BE_ASSIGNED)) {
+ struct switching_rule *rule;
+
+ list_for_each_entry(rule, &fe->switching_rules, list) {
+ int ret = 1;
+
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, fe, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ /* If the backend name is dynamic, try to resolve the name.
+ * If we can't resolve the name, or if any error occurs, break
+ * the loop and fallback to the default backend.
+ */
+ struct proxy *backend = NULL;
+
+ if (rule->dynamic) {
+ struct buffer *tmp;
+
+ tmp = alloc_trash_chunk();
+ if (!tmp)
+ goto sw_failed;
+
+ if (build_logline(s, tmp->area, tmp->size, &rule->be.expr))
+ backend = proxy_be_by_name(tmp->area);
+
+ free_trash_chunk(tmp);
+ tmp = NULL;
+
+ if (!backend)
+ break;
+ }
+ else
+ backend = rule->be.backend;
+
+ if (!stream_set_backend(s, backend))
+ goto sw_failed;
+ break;
+ }
+ }
+
+ /* To ensure correct connection accounting on the backend, we
+ * have to assign one if it was not set (eg: a listen). This
+ * measure also takes care of correctly setting the default
+ * backend if any. Don't do anything if an upgrade is already in
+ * progress.
+ */
+ if (!(s->flags & (SF_BE_ASSIGNED|SF_IGNORE)))
+ if (!stream_set_backend(s, fe->defbe.be ? fe->defbe.be : s->be))
+ goto sw_failed;
+
+ /* No backend assigned but no error reported. It happens when a
+ * TCP stream is upgraded to HTTP/2.
+ */
+ if ((s->flags & (SF_BE_ASSIGNED|SF_IGNORE)) == SF_IGNORE) {
+ DBG_TRACE_DEVEL("leaving with no backend because of a destructive upgrade", STRM_EV_STRM_ANA, s);
+ return 0;
+ }
+
+ }
+
+ /* we don't want to run the TCP or HTTP filters again if the backend has not changed */
+ if (fe == s->be) {
+ s->req.analysers &= ~AN_REQ_INSPECT_BE;
+ s->req.analysers &= ~AN_REQ_HTTP_PROCESS_BE;
+ s->req.analysers &= ~AN_REQ_FLT_START_BE;
+ }
+
+ /* as soon as we know the backend, we must check if we have a matching forced or ignored
+ * persistence rule, and report that in the stream.
+ */
+ list_for_each_entry(prst_rule, &s->be->persist_rules, list) {
+ int ret = 1;
+
+ if (prst_rule->cond) {
+ ret = acl_exec_cond(prst_rule->cond, s->be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (prst_rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ /* no rule, or the rule matches */
+ if (prst_rule->type == PERSIST_TYPE_FORCE) {
+ s->flags |= SF_FORCE_PRST;
+ } else {
+ s->flags |= SF_IGNORE_PRST;
+ }
+ break;
+ }
+ }
+
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA, s);
+ return 1;
+
+ sw_failed:
+ /* immediately abort this request in case of allocation failure */
+ channel_abort(&s->req);
+ channel_abort(&s->res);
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_R;
+
+ if (s->txn)
+ s->txn->status = 500;
+ s->req.analysers &= AN_REQ_FLT_END;
+ s->req.analyse_exp = TICK_ETERNITY;
+ DBG_TRACE_DEVEL("leaving on error", STRM_EV_STRM_ANA|STRM_EV_STRM_ERR, s);
+ return 0;
+}
+
+/* This stream analyser works on a request. It applies all use-server rules on
+ * it then returns 1. The data must already be present in the buffer otherwise
+ * they won't match. It always returns 1.
+ */
+static int process_server_rules(struct stream *s, struct channel *req, int an_bit)
+{
+ struct proxy *px = s->be;
+ struct session *sess = s->sess;
+ struct server_rule *rule;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA, s);
+
+ if (!(s->flags & SF_ASSIGNED)) {
+ list_for_each_entry(rule, &px->server_rules, list) {
+ int ret;
+
+ ret = acl_exec_cond(rule->cond, s->be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+
+ if (ret) {
+ struct server *srv;
+
+ if (rule->dynamic) {
+ struct buffer *tmp = get_trash_chunk();
+
+ if (!build_logline(s, tmp->area, tmp->size, &rule->expr))
+ break;
+
+ srv = findserver(s->be, tmp->area);
+ if (!srv)
+ break;
+ }
+ else
+ srv = rule->srv.ptr;
+
+ if ((srv->cur_state != SRV_ST_STOPPED) ||
+ (px->options & PR_O_PERSIST) ||
+ (s->flags & SF_FORCE_PRST)) {
+ s->flags |= SF_DIRECT | SF_ASSIGNED;
+ s->target = &srv->obj_type;
+ break;
+ }
+ /* if the server is not UP, let's go on with next rules
+ * just in case another one is suited.
+ */
+ }
+ }
+ }
+
+ req->analysers &= ~an_bit;
+ req->analyse_exp = TICK_ETERNITY;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA, s);
+ return 1;
+}
+
+static inline void sticking_rule_find_target(struct stream *s,
+ struct stktable *t, struct stksess *ts)
+{
+ struct proxy *px = s->be;
+ struct eb32_node *node;
+ struct dict_entry *de;
+ void *ptr;
+ struct server *srv;
+
+ /* Look for the server name previously stored in <t> stick-table */
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &ts->lock);
+ ptr = __stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_KEY);
+ de = stktable_data_cast(ptr, std_t_dict);
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ if (de) {
+ struct ebpt_node *node;
+
+ if (t->server_key_type == STKTABLE_SRV_NAME) {
+ node = ebis_lookup(&px->conf.used_server_name, de->value.key);
+ if (node) {
+ srv = container_of(node, struct server, conf.name);
+ goto found;
+ }
+ } else if (t->server_key_type == STKTABLE_SRV_ADDR) {
+ HA_RWLOCK_RDLOCK(PROXY_LOCK, &px->lock);
+ node = ebis_lookup(&px->used_server_addr, de->value.key);
+ HA_RWLOCK_RDUNLOCK(PROXY_LOCK, &px->lock);
+ if (node) {
+ srv = container_of(node, struct server, addr_node);
+ goto found;
+ }
+ }
+ }
+
+ /* Look for the server ID */
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &ts->lock);
+ ptr = __stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_ID);
+ node = eb32_lookup(&px->conf.used_server_id, stktable_data_cast(ptr, std_t_sint));
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ if (!node)
+ return;
+
+ srv = container_of(node, struct server, conf.id);
+ found:
+ if ((srv->cur_state != SRV_ST_STOPPED) ||
+ (px->options & PR_O_PERSIST) || (s->flags & SF_FORCE_PRST)) {
+ s->flags |= SF_DIRECT | SF_ASSIGNED;
+ s->target = &srv->obj_type;
+ }
+}
+
+/* This stream analyser works on a request. It applies all sticking rules on
+ * it then returns 1. The data must already be present in the buffer otherwise
+ * they won't match. It always returns 1.
+ */
+static int process_sticking_rules(struct stream *s, struct channel *req, int an_bit)
+{
+ struct proxy *px = s->be;
+ struct session *sess = s->sess;
+ struct sticking_rule *rule;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA, s);
+
+ list_for_each_entry(rule, &px->sticking_rules, list) {
+ int ret = 1 ;
+ int i;
+
+ /* Only the first stick store-request of each table is applied
+ * and other ones are ignored. The purpose is to allow complex
+ * configurations which look for multiple entries by decreasing
+ * order of precision and to stop at the first which matches.
+ * An example could be a store of the IP address from an HTTP
+ * header first, then from the source if not found.
+ */
+ if (rule->flags & STK_IS_STORE) {
+ for (i = 0; i < s->store_count; i++) {
+ if (rule->table.t == s->store[i].table)
+ break;
+ }
+
+ if (i != s->store_count)
+ continue;
+ }
+
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ struct stktable_key *key;
+
+ key = stktable_fetch_key(rule->table.t, px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->expr, NULL);
+ if (!key)
+ continue;
+
+ if (rule->flags & STK_IS_MATCH) {
+ struct stksess *ts;
+
+ if ((ts = stktable_lookup_key(rule->table.t, key)) != NULL) {
+ if (!(s->flags & SF_ASSIGNED))
+ sticking_rule_find_target(s, rule->table.t, ts);
+ stktable_touch_local(rule->table.t, ts, 1);
+ }
+ }
+ if (rule->flags & STK_IS_STORE) {
+ if (s->store_count < (sizeof(s->store) / sizeof(s->store[0]))) {
+ struct stksess *ts;
+
+ ts = stksess_new(rule->table.t, key);
+ if (ts) {
+ s->store[s->store_count].table = rule->table.t;
+ s->store[s->store_count++].ts = ts;
+ }
+ }
+ }
+ }
+ }
+
+ req->analysers &= ~an_bit;
+ req->analyse_exp = TICK_ETERNITY;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA, s);
+ return 1;
+}
+
+/* This stream analyser works on a response. It applies all store rules on it
+ * then returns 1. The data must already be present in the buffer otherwise
+ * they won't match. It always returns 1.
+ */
+static int process_store_rules(struct stream *s, struct channel *rep, int an_bit)
+{
+ struct proxy *px = s->be;
+ struct session *sess = s->sess;
+ struct sticking_rule *rule;
+ int i;
+ int nbreq = s->store_count;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA, s);
+
+ list_for_each_entry(rule, &px->storersp_rules, list) {
+ int ret = 1 ;
+
+ /* Only the first stick store-response of each table is applied
+ * and other ones are ignored. The purpose is to allow complex
+ * configurations which look for multiple entries by decreasing
+ * order of precision and to stop at the first which matches.
+ * An example could be a store of a set-cookie value, with a
+ * fallback to a parameter found in a 302 redirect.
+ *
+ * The store-response rules are not allowed to override the
+ * store-request rules for the same table, but they may coexist.
+ * Thus we can have up to one store-request entry and one store-
+ * response entry for the same table at any time.
+ */
+ for (i = nbreq; i < s->store_count; i++) {
+ if (rule->table.t == s->store[i].table)
+ break;
+ }
+
+ /* skip existing entries for this table */
+ if (i < s->store_count)
+ continue;
+
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, px, sess, s, SMP_OPT_DIR_RES|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ struct stktable_key *key;
+
+ key = stktable_fetch_key(rule->table.t, px, sess, s, SMP_OPT_DIR_RES|SMP_OPT_FINAL, rule->expr, NULL);
+ if (!key)
+ continue;
+
+ if (s->store_count < (sizeof(s->store) / sizeof(s->store[0]))) {
+ struct stksess *ts;
+
+ ts = stksess_new(rule->table.t, key);
+ if (ts) {
+ s->store[s->store_count].table = rule->table.t;
+ s->store[s->store_count++].ts = ts;
+ }
+ }
+ }
+ }
+
+ /* process store request and store response */
+ for (i = 0; i < s->store_count; i++) {
+ struct stksess *ts;
+ void *ptr;
+ char *key;
+ struct dict_entry *de;
+ struct stktable *t = s->store[i].table;
+
+ if (!objt_server(s->target) || (__objt_server(s->target)->flags & SRV_F_NON_STICK)) {
+ stksess_free(s->store[i].table, s->store[i].ts);
+ s->store[i].ts = NULL;
+ continue;
+ }
+
+ ts = stktable_set_entry(t, s->store[i].ts);
+ if (ts != s->store[i].ts) {
+ /* the entry already existed, we can free ours */
+ stksess_free(t, s->store[i].ts);
+ }
+ s->store[i].ts = NULL;
+
+ if (t->server_key_type == STKTABLE_SRV_NAME)
+ key = __objt_server(s->target)->id;
+ else if (t->server_key_type == STKTABLE_SRV_ADDR)
+ key = __objt_server(s->target)->addr_node.key;
+ else
+ key = NULL;
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+ ptr = __stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_ID);
+ stktable_data_cast(ptr, std_t_sint) = __objt_server(s->target)->puid;
+
+ if (key) {
+ de = dict_insert(&server_key_dict, key);
+ if (de) {
+ ptr = __stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_KEY);
+ stktable_data_cast(ptr, std_t_dict) = de;
+ }
+ }
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ stktable_touch_local(t, ts, 1);
+ }
+ s->store_count = 0; /* everything is stored */
+
+ rep->analysers &= ~an_bit;
+ rep->analyse_exp = TICK_ETERNITY;
+
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA, s);
+ return 1;
+}
+
+/* Set the stream to HTTP mode, if necessary. The minimal request HTTP analysers
+ * are set and the client mux is upgraded. It returns 1 if the stream processing
+ * may continue or 0 if it should be stopped. It happens on error or if the
+ * upgrade required a new stream. The mux protocol may be specified.
+ */
+int stream_set_http_mode(struct stream *s, const struct mux_proto_list *mux_proto)
+{
+ struct stconn *sc = s->scf;
+ struct connection *conn;
+
+ /* Already an HTTP stream */
+ if (IS_HTX_STRM(s))
+ return 1;
+
+ s->req.analysers |= AN_REQ_WAIT_HTTP|AN_REQ_HTTP_PROCESS_FE;
+
+ if (unlikely(!s->txn && !http_create_txn(s)))
+ return 0;
+
+ conn = sc_conn(sc);
+ if (conn) {
+ se_have_more_data(s->scf->sedesc);
+ /* Make sure we're unsubscribed, the the new
+ * mux will probably want to subscribe to
+ * the underlying XPRT
+ */
+ if (s->scf->wait_event.events)
+ conn->mux->unsubscribe(sc, s->scf->wait_event.events, &(s->scf->wait_event));
+
+ if (conn->mux->flags & MX_FL_NO_UPG)
+ return 0;
+
+ sc_conn_prepare_endp_upgrade(sc);
+ if (conn_upgrade_mux_fe(conn, sc, &s->req.buf,
+ (mux_proto ? mux_proto->token : ist("")),
+ PROTO_MODE_HTTP) == -1) {
+ sc_conn_abort_endp_upgrade(sc);
+ return 0;
+ }
+ sc_conn_commit_endp_upgrade(sc);
+
+ s->req.flags &= ~(CF_READ_PARTIAL|CF_AUTO_CONNECT);
+ s->req.total = 0;
+ s->flags |= SF_IGNORE;
+ if (sc_ep_test(sc, SE_FL_DETACHED)) {
+ /* If stream connector is detached, it means it was not
+ * reused by the new mux. Son destroy it, disable
+ * logging, and abort the stream process. Thus the
+ * stream will be silently destroyed. The new mux will
+ * create new streams.
+ */
+ s->logs.logwait = 0;
+ s->logs.level = 0;
+ channel_abort(&s->req);
+ channel_abort(&s->res);
+ s->req.analysers &= AN_REQ_FLT_END;
+ s->req.analyse_exp = TICK_ETERNITY;
+ }
+ }
+
+ return 1;
+}
+
+
+/* Updates at once the channel flags, and timers of both stream connectors of a
+ * same stream, to complete the work after the analysers, then updates the data
+ * layer below. This will ensure that any synchronous update performed at the
+ * data layer will be reflected in the channel flags and/or stream connector.
+ * Note that this does not change the stream connector's current state, though
+ * it updates the previous state to the current one.
+ */
+static void stream_update_both_sc(struct stream *s)
+{
+ struct stconn *scf = s->scf;
+ struct stconn *scb = s->scb;
+ struct channel *req = &s->req;
+ struct channel *res = &s->res;
+
+ req->flags &= ~(CF_READ_NULL|CF_READ_PARTIAL|CF_READ_ATTACHED|CF_WRITE_NULL|CF_WRITE_PARTIAL);
+ res->flags &= ~(CF_READ_NULL|CF_READ_PARTIAL|CF_READ_ATTACHED|CF_WRITE_NULL|CF_WRITE_PARTIAL);
+
+ s->prev_conn_state = scb->state;
+
+ /* let's recompute both sides states */
+ if (sc_state_in(scf->state, SC_SB_RDY|SC_SB_EST))
+ sc_update(scf);
+
+ if (sc_state_in(scb->state, SC_SB_RDY|SC_SB_EST))
+ sc_update(scb);
+
+ /* stream connectors are processed outside of process_stream() and must be
+ * handled at the latest moment.
+ */
+ if (sc_appctx(scf)) {
+ if (sc_is_recv_allowed(scf) || sc_is_send_allowed(scf))
+ appctx_wakeup(__sc_appctx(scf));
+ }
+ if (sc_appctx(scb)) {
+ if (sc_is_recv_allowed(scb) || sc_is_send_allowed(scb))
+ appctx_wakeup(__sc_appctx(scb));
+ }
+}
+
+/* if the current task's wake_date was set, it's being profiled, thus we may
+ * report latencies and CPU usages in logs, so it's desirable to do that before
+ * logging in order to report accurate CPU usage. In this case we count that
+ * final part and reset the wake date so that the scheduler doesn't do it a
+ * second time, and by doing so we also avoid an extra call to clock_gettime().
+ * The CPU usage will be off by the little time needed to run over stream_free()
+ * but that's only marginal.
+ */
+static void stream_cond_update_cpu_usage(struct stream *s)
+{
+ uint32_t cpu;
+
+ /* stats are only registered for non-zero wake dates */
+ if (likely(!th_ctx->sched_wake_date))
+ return;
+
+ cpu = (uint32_t)now_mono_time() - th_ctx->sched_call_date;
+ s->task->cpu_time += cpu;
+ HA_ATOMIC_ADD(&th_ctx->sched_profile_entry->cpu_time, cpu);
+ th_ctx->sched_wake_date = 0;
+}
+
+/* This macro is very specific to the function below. See the comments in
+ * process_stream() below to understand the logic and the tests.
+ */
+#define UPDATE_ANALYSERS(real, list, back, flag) { \
+ list = (((list) & ~(flag)) | ~(back)) & (real); \
+ back = real; \
+ if (!(list)) \
+ break; \
+ if (((list) ^ ((list) & ((list) - 1))) < (flag)) \
+ continue; \
+}
+
+/* These 2 following macros call an analayzer for the specified channel if the
+ * right flag is set. The first one is used for "filterable" analyzers. If a
+ * stream has some registered filters, pre and post analyaze callbacks are
+ * called. The second are used for other analyzers (AN_REQ/RES_FLT_* and
+ * AN_REQ/RES_HTTP_XFER_BODY) */
+#define FLT_ANALYZE(strm, chn, fun, list, back, flag, ...) \
+ { \
+ if ((list) & (flag)) { \
+ if (HAS_FILTERS(strm)) { \
+ if (!flt_pre_analyze((strm), (chn), (flag))) \
+ break; \
+ if (!fun((strm), (chn), (flag), ##__VA_ARGS__)) \
+ break; \
+ if (!flt_post_analyze((strm), (chn), (flag))) \
+ break; \
+ } \
+ else { \
+ if (!fun((strm), (chn), (flag), ##__VA_ARGS__)) \
+ break; \
+ } \
+ UPDATE_ANALYSERS((chn)->analysers, (list), \
+ (back), (flag)); \
+ } \
+ }
+
+#define ANALYZE(strm, chn, fun, list, back, flag, ...) \
+ { \
+ if ((list) & (flag)) { \
+ if (!fun((strm), (chn), (flag), ##__VA_ARGS__)) \
+ break; \
+ UPDATE_ANALYSERS((chn)->analysers, (list), \
+ (back), (flag)); \
+ } \
+ }
+
+/* Processes the client, server, request and response jobs of a stream task,
+ * then puts it back to the wait queue in a clean state, or cleans up its
+ * resources if it must be deleted. Returns in <next> the date the task wants
+ * to be woken up, or TICK_ETERNITY. In order not to call all functions for
+ * nothing too many times, the request and response buffers flags are monitored
+ * and each function is called only if at least another function has changed at
+ * least one flag it is interested in.
+ */
+struct task *process_stream(struct task *t, void *context, unsigned int state)
+{
+ struct server *srv;
+ struct stream *s = context;
+ struct session *sess = s->sess;
+ unsigned int rqf_last, rpf_last;
+ unsigned int rq_prod_last, rq_cons_last;
+ unsigned int rp_cons_last, rp_prod_last;
+ unsigned int req_ana_back;
+ struct channel *req, *res;
+ struct stconn *scf, *scb;
+ unsigned int rate;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_PROC, s);
+
+ activity[tid].stream_calls++;
+
+ req = &s->req;
+ res = &s->res;
+
+ scf = s->scf;
+ scb = s->scb;
+
+ /* First, attempt to receive pending data from I/O layers */
+ sc_conn_sync_recv(scf);
+ sc_conn_sync_recv(scb);
+
+ /* Let's check if we're looping without making any progress, e.g. due
+ * to a bogus analyser or the fact that we're ignoring a read0. The
+ * call_rate counter only counts calls with no progress made.
+ */
+ if (!((req->flags | res->flags) & (CF_READ_PARTIAL|CF_WRITE_PARTIAL))) {
+ rate = update_freq_ctr(&s->call_rate, 1);
+ if (rate >= 100000 && s->call_rate.prev_ctr) // make sure to wait at least a full second
+ stream_dump_and_crash(&s->obj_type, read_freq_ctr(&s->call_rate));
+ }
+
+ /* this data may be no longer valid, clear it */
+ if (s->txn)
+ memset(&s->txn->auth, 0, sizeof(s->txn->auth));
+
+ /* This flag must explicitly be set every time */
+ req->flags &= ~(CF_READ_NOEXP|CF_WAKE_WRITE);
+ res->flags &= ~(CF_READ_NOEXP|CF_WAKE_WRITE);
+
+ /* Keep a copy of req/rep flags so that we can detect shutdowns */
+ rqf_last = req->flags & ~CF_MASK_ANALYSER;
+ rpf_last = res->flags & ~CF_MASK_ANALYSER;
+
+ /* we don't want the stream connector functions to recursively wake us up */
+ scf->flags |= SC_FL_DONT_WAKE;
+ scb->flags |= SC_FL_DONT_WAKE;
+
+ /* update pending events */
+ s->pending_events |= (state & TASK_WOKEN_ANY);
+
+ /* 1a: Check for low level timeouts if needed. We just set a flag on
+ * stream connectors when their timeouts have expired.
+ */
+ if (unlikely(s->pending_events & TASK_WOKEN_TIMER)) {
+ stream_check_conn_timeout(s);
+
+ /* check channel timeouts, and close the corresponding stream connectors
+ * for future reads or writes. Note: this will also concern upper layers
+ * but we do not touch any other flag. We must be careful and correctly
+ * detect state changes when calling them.
+ */
+
+ channel_check_timeouts(req);
+
+ if (unlikely((req->flags & (CF_SHUTW|CF_WRITE_TIMEOUT)) == CF_WRITE_TIMEOUT)) {
+ scb->flags |= SC_FL_NOLINGER;
+ sc_shutw(scb);
+ }
+
+ if (unlikely((req->flags & (CF_SHUTR|CF_READ_TIMEOUT)) == CF_READ_TIMEOUT)) {
+ if (scf->flags & SC_FL_NOHALF)
+ scf->flags |= SC_FL_NOLINGER;
+ sc_shutr(scf);
+ }
+
+ channel_check_timeouts(res);
+
+ if (unlikely((res->flags & (CF_SHUTW|CF_WRITE_TIMEOUT)) == CF_WRITE_TIMEOUT)) {
+ scf->flags |= SC_FL_NOLINGER;
+ sc_shutw(scf);
+ }
+
+ if (unlikely((res->flags & (CF_SHUTR|CF_READ_TIMEOUT)) == CF_READ_TIMEOUT)) {
+ if (scb->flags & SC_FL_NOHALF)
+ scb->flags |= SC_FL_NOLINGER;
+ sc_shutr(scb);
+ }
+
+ if (HAS_FILTERS(s))
+ flt_stream_check_timeouts(s);
+
+ /* Once in a while we're woken up because the task expires. But
+ * this does not necessarily mean that a timeout has been reached.
+ * So let's not run a whole stream processing if only an expiration
+ * timeout needs to be refreshed.
+ */
+ if (!((req->flags | res->flags) &
+ (CF_SHUTR|CF_READ_ACTIVITY|CF_READ_TIMEOUT|CF_SHUTW|
+ CF_WRITE_ACTIVITY|CF_WRITE_TIMEOUT|CF_ANA_TIMEOUT)) &&
+ !(s->flags & SF_CONN_EXP) &&
+ !((sc_ep_get(scf) | scb->flags) & SE_FL_ERROR) &&
+ ((s->pending_events & TASK_WOKEN_ANY) == TASK_WOKEN_TIMER)) {
+ scf->flags &= ~SC_FL_DONT_WAKE;
+ scb->flags &= ~SC_FL_DONT_WAKE;
+ goto update_exp_and_leave;
+ }
+ }
+
+ resync_stconns:
+ /* below we may emit error messages so we have to ensure that we have
+ * our buffers properly allocated. If the allocation failed, an error is
+ * triggered.
+ *
+ * NOTE: An error is returned because the mechanism to queue entities
+ * waiting for a buffer is totally broken for now. However, this
+ * part must be refactored. When it will be handled, this part
+ * must be be reviewed too.
+ */
+ if (!stream_alloc_work_buffer(s)) {
+ sc_ep_set(s->scf, SE_FL_ERROR);
+ s->conn_err_type = STRM_ET_CONN_RES;
+
+ sc_ep_set(s->scb, SE_FL_ERROR);
+ s->conn_err_type = STRM_ET_CONN_RES;
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ sess_set_term_flags(s);
+ }
+
+ /* 1b: check for low-level errors reported at the stream connector.
+ * First we check if it's a retryable error (in which case we don't
+ * want to tell the buffer). Otherwise we report the error one level
+ * upper by setting flags into the buffers. Note that the side towards
+ * the client cannot have connect (hence retryable) errors. Also, the
+ * connection setup code must be able to deal with any type of abort.
+ */
+ srv = objt_server(s->target);
+ if (unlikely(sc_ep_test(scf, SE_FL_ERROR))) {
+ if (sc_state_in(scf->state, SC_SB_EST|SC_SB_DIS)) {
+ sc_shutr(scf);
+ sc_shutw(scf);
+ sc_report_error(scf);
+ if (!(req->analysers) && !(res->analysers)) {
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.cli_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->cli_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.cli_aborts);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_CLICL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_D;
+ }
+ }
+ }
+
+ if (unlikely(sc_ep_test(scb, SE_FL_ERROR))) {
+ if (sc_state_in(scb->state, SC_SB_EST|SC_SB_DIS)) {
+ sc_shutr(scb);
+ sc_shutw(scb);
+ sc_report_error(scb);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.failed_resp);
+ if (!(req->analysers) && !(res->analysers)) {
+ _HA_ATOMIC_INC(&s->be->be_counters.srv_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.srv_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->srv_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.srv_aborts);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_SRVCL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_D;
+ }
+ }
+ /* note: maybe we should process connection errors here ? */
+ }
+
+ if (sc_state_in(scb->state, SC_SB_CON|SC_SB_RDY)) {
+ /* we were trying to establish a connection on the server side,
+ * maybe it succeeded, maybe it failed, maybe we timed out, ...
+ */
+ if (scb->state == SC_ST_RDY)
+ back_handle_st_rdy(s);
+ else if (s->scb->state == SC_ST_CON)
+ back_handle_st_con(s);
+
+ if (scb->state == SC_ST_CER)
+ back_handle_st_cer(s);
+ else if (scb->state == SC_ST_EST)
+ back_establish(s);
+
+ /* state is now one of SC_ST_CON (still in progress), SC_ST_EST
+ * (established), SC_ST_DIS (abort), SC_ST_CLO (last error),
+ * SC_ST_ASS/SC_ST_TAR/SC_ST_REQ for retryable errors.
+ */
+ }
+
+ rq_prod_last = scf->state;
+ rq_cons_last = scb->state;
+ rp_cons_last = scf->state;
+ rp_prod_last = scb->state;
+
+ /* Check for connection closure */
+ DBG_TRACE_POINT(STRM_EV_STRM_PROC, s);
+
+ /* nothing special to be done on client side */
+ if (unlikely(scf->state == SC_ST_DIS)) {
+ scf->state = SC_ST_CLO;
+
+ /* This is needed only when debugging is enabled, to indicate
+ * client-side close.
+ */
+ if (unlikely((global.mode & MODE_DEBUG) &&
+ (!(global.mode & MODE_QUIET) ||
+ (global.mode & MODE_VERBOSE)))) {
+ chunk_printf(&trash, "%08x:%s.clicls[%04x:%04x]\n",
+ s->uniq_id, s->be->id,
+ (unsigned short)conn_fd(sc_conn(scf)),
+ (unsigned short)conn_fd(sc_conn(scb)));
+ DISGUISE(write(1, trash.area, trash.data));
+ }
+ }
+
+ /* When a server-side connection is released, we have to count it and
+ * check for pending connections on this server.
+ */
+ if (unlikely(scb->state == SC_ST_DIS)) {
+ scb->state = SC_ST_CLO;
+ srv = objt_server(s->target);
+ if (srv) {
+ if (s->flags & SF_CURR_SESS) {
+ s->flags &= ~SF_CURR_SESS;
+ _HA_ATOMIC_DEC(&srv->cur_sess);
+ }
+ sess_change_server(s, NULL);
+ if (may_dequeue_tasks(srv, s->be))
+ process_srv_queue(srv);
+ }
+
+ /* This is needed only when debugging is enabled, to indicate
+ * server-side close.
+ */
+ if (unlikely((global.mode & MODE_DEBUG) &&
+ (!(global.mode & MODE_QUIET) ||
+ (global.mode & MODE_VERBOSE)))) {
+ if (s->prev_conn_state == SC_ST_EST) {
+ chunk_printf(&trash, "%08x:%s.srvcls[%04x:%04x]\n",
+ s->uniq_id, s->be->id,
+ (unsigned short)conn_fd(sc_conn(scf)),
+ (unsigned short)conn_fd(sc_conn(scb)));
+ DISGUISE(write(1, trash.area, trash.data));
+ }
+ }
+ }
+
+ /*
+ * Note: of the transient states (REQ, CER, DIS), only REQ may remain
+ * at this point.
+ */
+
+ resync_request:
+ /* Analyse request */
+ if (((req->flags & ~rqf_last) & CF_MASK_ANALYSER) ||
+ ((req->flags ^ rqf_last) & CF_MASK_STATIC) ||
+ (req->analysers && (req->flags & CF_SHUTW)) ||
+ scf->state != rq_prod_last ||
+ scb->state != rq_cons_last ||
+ s->pending_events & TASK_WOKEN_MSG) {
+ unsigned int flags = req->flags;
+
+ if (sc_state_in(scf->state, SC_SB_EST|SC_SB_DIS|SC_SB_CLO)) {
+ int max_loops = global.tune.maxpollevents;
+ unsigned int ana_list;
+ unsigned int ana_back;
+
+ /* it's up to the analysers to stop new connections,
+ * disable reading or closing. Note: if an analyser
+ * disables any of these bits, it is responsible for
+ * enabling them again when it disables itself, so
+ * that other analysers are called in similar conditions.
+ */
+ channel_auto_read(req);
+ channel_auto_connect(req);
+ channel_auto_close(req);
+
+ /* We will call all analysers for which a bit is set in
+ * req->analysers, following the bit order from LSB
+ * to MSB. The analysers must remove themselves from
+ * the list when not needed. Any analyser may return 0
+ * to break out of the loop, either because of missing
+ * data to take a decision, or because it decides to
+ * kill the stream. We loop at least once through each
+ * analyser, and we may loop again if other analysers
+ * are added in the middle.
+ *
+ * We build a list of analysers to run. We evaluate all
+ * of these analysers in the order of the lower bit to
+ * the higher bit. This ordering is very important.
+ * An analyser will often add/remove other analysers,
+ * including itself. Any changes to itself have no effect
+ * on the loop. If it removes any other analysers, we
+ * want those analysers not to be called anymore during
+ * this loop. If it adds an analyser that is located
+ * after itself, we want it to be scheduled for being
+ * processed during the loop. If it adds an analyser
+ * which is located before it, we want it to switch to
+ * it immediately, even if it has already been called
+ * once but removed since.
+ *
+ * In order to achieve this, we compare the analyser
+ * list after the call with a copy of it before the
+ * call. The work list is fed with analyser bits that
+ * appeared during the call. Then we compare previous
+ * work list with the new one, and check the bits that
+ * appeared. If the lowest of these bits is lower than
+ * the current bit, it means we have enabled a previous
+ * analyser and must immediately loop again.
+ */
+
+ ana_list = ana_back = req->analysers;
+ while (ana_list && max_loops--) {
+ /* Warning! ensure that analysers are always placed in ascending order! */
+ ANALYZE (s, req, flt_start_analyze, ana_list, ana_back, AN_REQ_FLT_START_FE);
+ FLT_ANALYZE(s, req, tcp_inspect_request, ana_list, ana_back, AN_REQ_INSPECT_FE);
+ FLT_ANALYZE(s, req, http_wait_for_request, ana_list, ana_back, AN_REQ_WAIT_HTTP);
+ FLT_ANALYZE(s, req, http_wait_for_request_body, ana_list, ana_back, AN_REQ_HTTP_BODY);
+ FLT_ANALYZE(s, req, http_process_req_common, ana_list, ana_back, AN_REQ_HTTP_PROCESS_FE, sess->fe);
+ FLT_ANALYZE(s, req, process_switching_rules, ana_list, ana_back, AN_REQ_SWITCHING_RULES);
+ ANALYZE (s, req, flt_start_analyze, ana_list, ana_back, AN_REQ_FLT_START_BE);
+ FLT_ANALYZE(s, req, tcp_inspect_request, ana_list, ana_back, AN_REQ_INSPECT_BE);
+ FLT_ANALYZE(s, req, http_process_req_common, ana_list, ana_back, AN_REQ_HTTP_PROCESS_BE, s->be);
+ FLT_ANALYZE(s, req, http_process_tarpit, ana_list, ana_back, AN_REQ_HTTP_TARPIT);
+ FLT_ANALYZE(s, req, process_server_rules, ana_list, ana_back, AN_REQ_SRV_RULES);
+ FLT_ANALYZE(s, req, http_process_request, ana_list, ana_back, AN_REQ_HTTP_INNER);
+ FLT_ANALYZE(s, req, tcp_persist_rdp_cookie, ana_list, ana_back, AN_REQ_PRST_RDP_COOKIE);
+ FLT_ANALYZE(s, req, process_sticking_rules, ana_list, ana_back, AN_REQ_STICKING_RULES);
+ ANALYZE (s, req, flt_analyze_http_headers, ana_list, ana_back, AN_REQ_FLT_HTTP_HDRS);
+ ANALYZE (s, req, http_request_forward_body, ana_list, ana_back, AN_REQ_HTTP_XFER_BODY);
+ ANALYZE (s, req, pcli_wait_for_request, ana_list, ana_back, AN_REQ_WAIT_CLI);
+ ANALYZE (s, req, flt_xfer_data, ana_list, ana_back, AN_REQ_FLT_XFER_DATA);
+ ANALYZE (s, req, flt_end_analyze, ana_list, ana_back, AN_REQ_FLT_END);
+ break;
+ }
+ }
+
+ rq_prod_last = scf->state;
+ rq_cons_last = scb->state;
+ req->flags &= ~CF_WAKE_ONCE;
+ rqf_last = req->flags;
+
+ if ((req->flags ^ flags) & (CF_SHUTR|CF_SHUTW))
+ goto resync_request;
+ }
+
+ /* we'll monitor the request analysers while parsing the response,
+ * because some response analysers may indirectly enable new request
+ * analysers (eg: HTTP keep-alive).
+ */
+ req_ana_back = req->analysers;
+
+ resync_response:
+ /* Analyse response */
+
+ if (((res->flags & ~rpf_last) & CF_MASK_ANALYSER) ||
+ (res->flags ^ rpf_last) & CF_MASK_STATIC ||
+ (res->analysers && (res->flags & CF_SHUTW)) ||
+ scf->state != rp_cons_last ||
+ scb->state != rp_prod_last ||
+ s->pending_events & TASK_WOKEN_MSG) {
+ unsigned int flags = res->flags;
+
+ if (sc_state_in(scb->state, SC_SB_EST|SC_SB_DIS|SC_SB_CLO)) {
+ int max_loops = global.tune.maxpollevents;
+ unsigned int ana_list;
+ unsigned int ana_back;
+
+ /* it's up to the analysers to stop disable reading or
+ * closing. Note: if an analyser disables any of these
+ * bits, it is responsible for enabling them again when
+ * it disables itself, so that other analysers are called
+ * in similar conditions.
+ */
+ channel_auto_read(res);
+ channel_auto_close(res);
+
+ /* We will call all analysers for which a bit is set in
+ * res->analysers, following the bit order from LSB
+ * to MSB. The analysers must remove themselves from
+ * the list when not needed. Any analyser may return 0
+ * to break out of the loop, either because of missing
+ * data to take a decision, or because it decides to
+ * kill the stream. We loop at least once through each
+ * analyser, and we may loop again if other analysers
+ * are added in the middle.
+ */
+
+ ana_list = ana_back = res->analysers;
+ while (ana_list && max_loops--) {
+ /* Warning! ensure that analysers are always placed in ascending order! */
+ ANALYZE (s, res, flt_start_analyze, ana_list, ana_back, AN_RES_FLT_START_FE);
+ ANALYZE (s, res, flt_start_analyze, ana_list, ana_back, AN_RES_FLT_START_BE);
+ FLT_ANALYZE(s, res, tcp_inspect_response, ana_list, ana_back, AN_RES_INSPECT);
+ FLT_ANALYZE(s, res, http_wait_for_response, ana_list, ana_back, AN_RES_WAIT_HTTP);
+ FLT_ANALYZE(s, res, process_store_rules, ana_list, ana_back, AN_RES_STORE_RULES);
+ FLT_ANALYZE(s, res, http_process_res_common, ana_list, ana_back, AN_RES_HTTP_PROCESS_BE, s->be);
+ ANALYZE (s, res, flt_analyze_http_headers, ana_list, ana_back, AN_RES_FLT_HTTP_HDRS);
+ ANALYZE (s, res, http_response_forward_body, ana_list, ana_back, AN_RES_HTTP_XFER_BODY);
+ ANALYZE (s, res, pcli_wait_for_response, ana_list, ana_back, AN_RES_WAIT_CLI);
+ ANALYZE (s, res, flt_xfer_data, ana_list, ana_back, AN_RES_FLT_XFER_DATA);
+ ANALYZE (s, res, flt_end_analyze, ana_list, ana_back, AN_RES_FLT_END);
+ break;
+ }
+ }
+
+ rp_cons_last = scf->state;
+ rp_prod_last = scb->state;
+ res->flags &= ~CF_WAKE_ONCE;
+ rpf_last = res->flags;
+
+ if ((res->flags ^ flags) & (CF_SHUTR|CF_SHUTW))
+ goto resync_response;
+ }
+
+ /* maybe someone has added some request analysers, so we must check and loop */
+ if (req->analysers & ~req_ana_back)
+ goto resync_request;
+
+ if ((req->flags & ~rqf_last) & CF_MASK_ANALYSER)
+ goto resync_request;
+
+ /* FIXME: here we should call protocol handlers which rely on
+ * both buffers.
+ */
+
+
+ /*
+ * Now we propagate unhandled errors to the stream. Normally
+ * we're just in a data phase here since it means we have not
+ * seen any analyser who could set an error status.
+ */
+ srv = objt_server(s->target);
+ if (unlikely(!(s->flags & SF_ERR_MASK))) {
+ if (req->flags & (CF_READ_ERROR|CF_READ_TIMEOUT|CF_WRITE_ERROR|CF_WRITE_TIMEOUT)) {
+ /* Report it if the client got an error or a read timeout expired */
+ req->analysers &= AN_REQ_FLT_END;
+ if (req->flags & CF_READ_ERROR) {
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.cli_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->cli_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.cli_aborts);
+ s->flags |= SF_ERR_CLICL;
+ }
+ else if (req->flags & CF_READ_TIMEOUT) {
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.cli_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->cli_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.cli_aborts);
+ s->flags |= SF_ERR_CLITO;
+ }
+ else if (req->flags & CF_WRITE_ERROR) {
+ _HA_ATOMIC_INC(&s->be->be_counters.srv_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.srv_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->srv_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.srv_aborts);
+ s->flags |= SF_ERR_SRVCL;
+ }
+ else {
+ _HA_ATOMIC_INC(&s->be->be_counters.srv_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.srv_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->srv_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.srv_aborts);
+ s->flags |= SF_ERR_SRVTO;
+ }
+ sess_set_term_flags(s);
+
+ /* Abort the request if a client error occurred while
+ * the backend stream connector is in the SC_ST_INI
+ * state. It is switched into the SC_ST_CLO state and
+ * the request channel is erased. */
+ if (scb->state == SC_ST_INI) {
+ s->scb->state = SC_ST_CLO;
+ channel_abort(req);
+ if (IS_HTX_STRM(s))
+ channel_htx_erase(req, htxbuf(&req->buf));
+ else
+ channel_erase(req);
+ }
+ }
+ else if (res->flags & (CF_READ_ERROR|CF_READ_TIMEOUT|CF_WRITE_ERROR|CF_WRITE_TIMEOUT)) {
+ /* Report it if the server got an error or a read timeout expired */
+ res->analysers &= AN_RES_FLT_END;
+ if (res->flags & CF_READ_ERROR) {
+ _HA_ATOMIC_INC(&s->be->be_counters.srv_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.srv_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->srv_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.srv_aborts);
+ s->flags |= SF_ERR_SRVCL;
+ }
+ else if (res->flags & CF_READ_TIMEOUT) {
+ _HA_ATOMIC_INC(&s->be->be_counters.srv_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.srv_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->srv_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.srv_aborts);
+ s->flags |= SF_ERR_SRVTO;
+ }
+ else if (res->flags & CF_WRITE_ERROR) {
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.cli_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->cli_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.cli_aborts);
+ s->flags |= SF_ERR_CLICL;
+ }
+ else {
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.cli_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->cli_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.cli_aborts);
+ s->flags |= SF_ERR_CLITO;
+ }
+ sess_set_term_flags(s);
+ }
+ }
+
+ /*
+ * Here we take care of forwarding unhandled data. This also includes
+ * connection establishments and shutdown requests.
+ */
+
+
+ /* If no one is interested in analysing data, it's time to forward
+ * everything. We configure the buffer to forward indefinitely.
+ * Note that we're checking CF_SHUTR_NOW as an indication of a possible
+ * recent call to channel_abort().
+ */
+ if (unlikely((!req->analysers || (req->analysers == AN_REQ_FLT_END && !(req->flags & CF_FLT_ANALYZE))) &&
+ !(req->flags & (CF_SHUTW|CF_SHUTR_NOW)) &&
+ (sc_state_in(scf->state, SC_SB_EST|SC_SB_DIS|SC_SB_CLO)) &&
+ (req->to_forward != CHN_INFINITE_FORWARD))) {
+ /* This buffer is freewheeling, there's no analyser
+ * attached to it. If any data are left in, we'll permit them to
+ * move.
+ */
+ channel_auto_read(req);
+ channel_auto_connect(req);
+ channel_auto_close(req);
+
+ if (IS_HTX_STRM(s)) {
+ struct htx *htx = htxbuf(&req->buf);
+
+ /* We'll let data flow between the producer (if still connected)
+ * to the consumer.
+ */
+ co_set_data(req, htx->data);
+ if (!(req->flags & (CF_SHUTR|CF_SHUTW_NOW)))
+ channel_htx_forward_forever(req, htx);
+ }
+ else {
+ /* We'll let data flow between the producer (if still connected)
+ * to the consumer (which might possibly not be connected yet).
+ */
+ c_adv(req, ci_data(req));
+ if (!(req->flags & (CF_SHUTR|CF_SHUTW_NOW)))
+ channel_forward_forever(req);
+ }
+ }
+
+ /* check if it is wise to enable kernel splicing to forward request data */
+ if (!(req->flags & (CF_KERN_SPLICING|CF_SHUTR)) &&
+ req->to_forward &&
+ (global.tune.options & GTUNE_USE_SPLICE) &&
+ (sc_conn(scf) && __sc_conn(scf)->xprt && __sc_conn(scf)->xprt->rcv_pipe &&
+ __sc_conn(scf)->mux && __sc_conn(scf)->mux->rcv_pipe) &&
+ (sc_conn(scb) && __sc_conn(scb)->xprt && __sc_conn(scb)->xprt->snd_pipe &&
+ __sc_conn(scb)->mux && __sc_conn(scb)->mux->snd_pipe) &&
+ (pipes_used < global.maxpipes) &&
+ (((sess->fe->options2|s->be->options2) & PR_O2_SPLIC_REQ) ||
+ (((sess->fe->options2|s->be->options2) & PR_O2_SPLIC_AUT) &&
+ (req->flags & CF_STREAMER_FAST)))) {
+ req->flags |= CF_KERN_SPLICING;
+ }
+
+ /* reflect what the L7 analysers have seen last */
+ rqf_last = req->flags;
+
+ /* it's possible that an upper layer has requested a connection setup or abort.
+ * There are 2 situations where we decide to establish a new connection :
+ * - there are data scheduled for emission in the buffer
+ * - the CF_AUTO_CONNECT flag is set (active connection)
+ */
+ if (scb->state == SC_ST_INI) {
+ if (!(req->flags & CF_SHUTW)) {
+ if ((req->flags & CF_AUTO_CONNECT) || !channel_is_empty(req)) {
+ /* If we have an appctx, there is no connect method, so we
+ * immediately switch to the connected state, otherwise we
+ * perform a connection request.
+ */
+ scb->state = SC_ST_REQ; /* new connection requested */
+ s->conn_retries = 0;
+ if ((s->be->retry_type &~ PR_RE_CONN_FAILED) &&
+ (s->be->mode == PR_MODE_HTTP) &&
+ !(s->txn->flags & TX_D_L7_RETRY))
+ s->txn->flags |= TX_L7_RETRY;
+ }
+ }
+ else {
+ s->scb->state = SC_ST_CLO; /* shutw+ini = abort */
+ channel_shutw_now(req); /* fix buffer flags upon abort */
+ channel_shutr_now(res);
+ }
+ }
+
+
+ /* we may have a pending connection request, or a connection waiting
+ * for completion.
+ */
+ if (sc_state_in(scb->state, SC_SB_REQ|SC_SB_QUE|SC_SB_TAR|SC_SB_ASS)) {
+ /* prune the request variables and swap to the response variables. */
+ if (s->vars_reqres.scope != SCOPE_RES) {
+ if (!LIST_ISEMPTY(&s->vars_reqres.head))
+ vars_prune(&s->vars_reqres, s->sess, s);
+ vars_init_head(&s->vars_reqres, SCOPE_RES);
+ }
+
+ do {
+ /* nb: step 1 might switch from QUE to ASS, but we first want
+ * to give a chance to step 2 to perform a redirect if needed.
+ */
+ if (scb->state != SC_ST_REQ)
+ back_try_conn_req(s);
+ if (scb->state == SC_ST_REQ)
+ back_handle_st_req(s);
+
+ /* get a chance to complete an immediate connection setup */
+ if (scb->state == SC_ST_RDY)
+ goto resync_stconns;
+
+ /* applets directly go to the ESTABLISHED state. Similarly,
+ * servers experience the same fate when their connection
+ * is reused.
+ */
+ if (unlikely(scb->state == SC_ST_EST))
+ back_establish(s);
+
+ srv = objt_server(s->target);
+ if (scb->state == SC_ST_ASS && srv && srv->rdr_len && (s->flags & SF_REDIRECTABLE))
+ http_perform_server_redirect(s, scb);
+ } while (scb->state == SC_ST_ASS);
+ }
+
+ /* Let's see if we can send the pending request now */
+ sc_conn_sync_send(scb);
+
+ /*
+ * Now forward all shutdown requests between both sides of the request buffer
+ */
+
+ /* first, let's check if the request buffer needs to shutdown(write), which may
+ * happen either because the input is closed or because we want to force a close
+ * once the server has begun to respond. If a half-closed timeout is set, we adjust
+ * the other side's timeout as well. However this doesn't have effect during the
+ * connection setup unless the backend has abortonclose set.
+ */
+ if (unlikely((req->flags & (CF_SHUTW|CF_SHUTW_NOW|CF_AUTO_CLOSE|CF_SHUTR)) ==
+ (CF_AUTO_CLOSE|CF_SHUTR) &&
+ (scb->state != SC_ST_CON || (s->be->options & PR_O_ABRT_CLOSE)))) {
+ channel_shutw_now(req);
+ }
+
+ /* shutdown(write) pending */
+ if (unlikely((req->flags & (CF_SHUTW|CF_SHUTW_NOW)) == CF_SHUTW_NOW &&
+ channel_is_empty(req))) {
+ if (req->flags & CF_READ_ERROR)
+ scb->flags |= SC_FL_NOLINGER;
+ sc_shutw(scb);
+ }
+
+ /* shutdown(write) done on server side, we must stop the client too */
+ if (unlikely((req->flags & (CF_SHUTW|CF_SHUTR|CF_SHUTR_NOW)) == CF_SHUTW &&
+ !req->analysers))
+ channel_shutr_now(req);
+
+ /* shutdown(read) pending */
+ if (unlikely((req->flags & (CF_SHUTR|CF_SHUTR_NOW)) == CF_SHUTR_NOW)) {
+ if (scf->flags & SC_FL_NOHALF)
+ scf->flags |= SC_FL_NOLINGER;
+ sc_shutr(scf);
+ }
+
+ /* Benchmarks have shown that it's optimal to do a full resync now */
+ if (scf->state == SC_ST_DIS ||
+ sc_state_in(scb->state, SC_SB_RDY|SC_SB_DIS) ||
+ (sc_ep_test(scf, SE_FL_ERROR) && scf->state != SC_ST_CLO) ||
+ (sc_ep_test(scb, SE_FL_ERROR) && scb->state != SC_ST_CLO))
+ goto resync_stconns;
+
+ /* otherwise we want to check if we need to resync the req buffer or not */
+ if ((req->flags ^ rqf_last) & (CF_SHUTR|CF_SHUTW))
+ goto resync_request;
+
+ /* perform output updates to the response buffer */
+
+ /* If no one is interested in analysing data, it's time to forward
+ * everything. We configure the buffer to forward indefinitely.
+ * Note that we're checking CF_SHUTR_NOW as an indication of a possible
+ * recent call to channel_abort().
+ */
+ if (unlikely((!res->analysers || (res->analysers == AN_RES_FLT_END && !(res->flags & CF_FLT_ANALYZE))) &&
+ !(res->flags & (CF_SHUTW|CF_SHUTR_NOW)) &&
+ sc_state_in(scb->state, SC_SB_EST|SC_SB_DIS|SC_SB_CLO) &&
+ (res->to_forward != CHN_INFINITE_FORWARD))) {
+ /* This buffer is freewheeling, there's no analyser
+ * attached to it. If any data are left in, we'll permit them to
+ * move.
+ */
+ channel_auto_read(res);
+ channel_auto_close(res);
+
+ if (IS_HTX_STRM(s)) {
+ struct htx *htx = htxbuf(&res->buf);
+
+ /* We'll let data flow between the producer (if still connected)
+ * to the consumer.
+ */
+ co_set_data(res, htx->data);
+ if (!(res->flags & (CF_SHUTR|CF_SHUTW_NOW)))
+ channel_htx_forward_forever(res, htx);
+ }
+ else {
+ /* We'll let data flow between the producer (if still connected)
+ * to the consumer.
+ */
+ c_adv(res, ci_data(res));
+ if (!(res->flags & (CF_SHUTR|CF_SHUTW_NOW)))
+ channel_forward_forever(res);
+ }
+
+ /* if we have no analyser anymore in any direction and have a
+ * tunnel timeout set, use it now. Note that we must respect
+ * the half-closed timeouts as well.
+ */
+ if (!req->analysers && s->tunnel_timeout) {
+ req->rto = req->wto = res->rto = res->wto =
+ s->tunnel_timeout;
+
+ if ((req->flags & CF_SHUTR) && tick_isset(sess->fe->timeout.clientfin))
+ res->wto = sess->fe->timeout.clientfin;
+ if ((req->flags & CF_SHUTW) && tick_isset(s->be->timeout.serverfin))
+ res->rto = s->be->timeout.serverfin;
+ if ((res->flags & CF_SHUTR) && tick_isset(s->be->timeout.serverfin))
+ req->wto = s->be->timeout.serverfin;
+ if ((res->flags & CF_SHUTW) && tick_isset(sess->fe->timeout.clientfin))
+ req->rto = sess->fe->timeout.clientfin;
+
+ req->rex = tick_add(now_ms, req->rto);
+ req->wex = tick_add(now_ms, req->wto);
+ res->rex = tick_add(now_ms, res->rto);
+ res->wex = tick_add(now_ms, res->wto);
+ }
+ }
+
+ /* check if it is wise to enable kernel splicing to forward response data */
+ if (!(res->flags & (CF_KERN_SPLICING|CF_SHUTR)) &&
+ res->to_forward &&
+ (global.tune.options & GTUNE_USE_SPLICE) &&
+ (sc_conn(scf) && __sc_conn(scf)->xprt && __sc_conn(scf)->xprt->snd_pipe &&
+ __sc_conn(scf)->mux && __sc_conn(scf)->mux->snd_pipe) &&
+ (sc_conn(scb) && __sc_conn(scb)->xprt && __sc_conn(scb)->xprt->rcv_pipe &&
+ __sc_conn(scb)->mux && __sc_conn(scb)->mux->rcv_pipe) &&
+ (pipes_used < global.maxpipes) &&
+ (((sess->fe->options2|s->be->options2) & PR_O2_SPLIC_RTR) ||
+ (((sess->fe->options2|s->be->options2) & PR_O2_SPLIC_AUT) &&
+ (res->flags & CF_STREAMER_FAST)))) {
+ res->flags |= CF_KERN_SPLICING;
+ }
+
+ /* reflect what the L7 analysers have seen last */
+ rpf_last = res->flags;
+
+ /* Let's see if we can send the pending response now */
+ sc_conn_sync_send(scf);
+
+ /*
+ * Now forward all shutdown requests between both sides of the buffer
+ */
+
+ /*
+ * FIXME: this is probably where we should produce error responses.
+ */
+
+ /* first, let's check if the response buffer needs to shutdown(write) */
+ if (unlikely((res->flags & (CF_SHUTW|CF_SHUTW_NOW|CF_AUTO_CLOSE|CF_SHUTR)) ==
+ (CF_AUTO_CLOSE|CF_SHUTR))) {
+ channel_shutw_now(res);
+ }
+
+ /* shutdown(write) pending */
+ if (unlikely((res->flags & (CF_SHUTW|CF_SHUTW_NOW)) == CF_SHUTW_NOW &&
+ channel_is_empty(res))) {
+ sc_shutw(scf);
+ }
+
+ /* shutdown(write) done on the client side, we must stop the server too */
+ if (unlikely((res->flags & (CF_SHUTW|CF_SHUTR|CF_SHUTR_NOW)) == CF_SHUTW) &&
+ !res->analysers)
+ channel_shutr_now(res);
+
+ /* shutdown(read) pending */
+ if (unlikely((res->flags & (CF_SHUTR|CF_SHUTR_NOW)) == CF_SHUTR_NOW)) {
+ if (scb->flags & SC_FL_NOHALF)
+ scb->flags |= SC_FL_NOLINGER;
+ sc_shutr(scb);
+ }
+
+ if (scf->state == SC_ST_DIS ||
+ sc_state_in(scb->state, SC_SB_RDY|SC_SB_DIS) ||
+ (sc_ep_test(scf, SE_FL_ERROR) && scf->state != SC_ST_CLO) ||
+ (sc_ep_test(scb, SE_FL_ERROR) && scb->state != SC_ST_CLO))
+ goto resync_stconns;
+
+ if ((req->flags & ~rqf_last) & CF_MASK_ANALYSER)
+ goto resync_request;
+
+ if ((res->flags ^ rpf_last) & CF_MASK_STATIC)
+ goto resync_response;
+
+ if (((req->flags ^ rqf_last) | (res->flags ^ rpf_last)) & CF_MASK_ANALYSER)
+ goto resync_request;
+
+ /* we're interested in getting wakeups again */
+ scf->flags &= ~SC_FL_DONT_WAKE;
+ scb->flags &= ~SC_FL_DONT_WAKE;
+
+ if (likely((scf->state != SC_ST_CLO) || !sc_state_in(scb->state, SC_SB_INI|SC_SB_CLO) ||
+ (req->analysers & AN_REQ_FLT_END) || (res->analysers & AN_RES_FLT_END))) {
+ if ((sess->fe->options & PR_O_CONTSTATS) && (s->flags & SF_BE_ASSIGNED) && !(s->flags & SF_IGNORE))
+ stream_process_counters(s);
+
+ stream_update_both_sc(s);
+
+ /* Trick: if a request is being waiting for the server to respond,
+ * and if we know the server can timeout, we don't want the timeout
+ * to expire on the client side first, but we're still interested
+ * in passing data from the client to the server (eg: POST). Thus,
+ * we can cancel the client's request timeout if the server's
+ * request timeout is set and the server has not yet sent a response.
+ */
+
+ if ((res->flags & (CF_AUTO_CLOSE|CF_SHUTR)) == 0 &&
+ (tick_isset(req->wex) || tick_isset(res->rex))) {
+ req->flags |= CF_READ_NOEXP;
+ req->rex = TICK_ETERNITY;
+ }
+
+ /* Reset pending events now */
+ s->pending_events = 0;
+
+ update_exp_and_leave:
+ /* Note: please ensure that if you branch here you disable SC_FL_DONT_WAKE */
+ t->expire = tick_first((tick_is_expired(t->expire, now_ms) ? 0 : t->expire),
+ tick_first(tick_first(req->rex, req->wex),
+ tick_first(res->rex, res->wex)));
+ if (!req->analysers)
+ req->analyse_exp = TICK_ETERNITY;
+
+ if ((sess->fe->options & PR_O_CONTSTATS) && (s->flags & SF_BE_ASSIGNED) &&
+ (!tick_isset(req->analyse_exp) || tick_is_expired(req->analyse_exp, now_ms)))
+ req->analyse_exp = tick_add(now_ms, 5000);
+
+ t->expire = tick_first(t->expire, req->analyse_exp);
+
+ t->expire = tick_first(t->expire, res->analyse_exp);
+
+ t->expire = tick_first(t->expire, s->conn_exp);
+
+ s->pending_events &= ~(TASK_WOKEN_TIMER | TASK_WOKEN_RES);
+ stream_release_buffers(s);
+
+ DBG_TRACE_DEVEL("queuing", STRM_EV_STRM_PROC, s);
+ return t; /* nothing more to do */
+ }
+
+ DBG_TRACE_DEVEL("releasing", STRM_EV_STRM_PROC, s);
+
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_DEC(&s->be->beconn);
+
+ if (unlikely((global.mode & MODE_DEBUG) &&
+ (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)))) {
+ chunk_printf(&trash, "%08x:%s.closed[%04x:%04x]\n",
+ s->uniq_id, s->be->id,
+ (unsigned short)conn_fd(sc_conn(scf)),
+ (unsigned short)conn_fd(sc_conn(scb)));
+ DISGUISE(write(1, trash.area, trash.data));
+ }
+
+ if (!(s->flags & SF_IGNORE)) {
+ s->logs.t_close = tv_ms_elapsed(&s->logs.tv_accept, &now);
+
+ stream_process_counters(s);
+
+ if (s->txn && s->txn->status) {
+ int n;
+
+ n = s->txn->status / 100;
+ if (n < 1 || n > 5)
+ n = 0;
+
+ if (sess->fe->mode == PR_MODE_HTTP) {
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.p.http.rsp[n]);
+ }
+ if ((s->flags & SF_BE_ASSIGNED) &&
+ (s->be->mode == PR_MODE_HTTP)) {
+ _HA_ATOMIC_INC(&s->be->be_counters.p.http.rsp[n]);
+ _HA_ATOMIC_INC(&s->be->be_counters.p.http.cum_req);
+ }
+ }
+
+ /* let's do a final log if we need it */
+ if (!LIST_ISEMPTY(&sess->fe->logformat) && s->logs.logwait &&
+ !(s->flags & SF_MONITOR) &&
+ (!(sess->fe->options & PR_O_NULLNOLOG) || req->total)) {
+ /* we may need to know the position in the queue */
+ pendconn_free(s);
+
+ stream_cond_update_cpu_usage(s);
+ s->do_log(s);
+ }
+
+ /* update time stats for this stream */
+ stream_update_time_stats(s);
+ }
+
+ /* the task MUST not be in the run queue anymore */
+ stream_free(s);
+ task_destroy(t);
+ return NULL;
+}
+
+/* Update the stream's backend and server time stats */
+void stream_update_time_stats(struct stream *s)
+{
+ int t_request;
+ int t_queue;
+ int t_connect;
+ int t_data;
+ int t_close;
+ struct server *srv;
+ unsigned int samples_window;
+
+ t_request = 0;
+ t_queue = s->logs.t_queue;
+ t_connect = s->logs.t_connect;
+ t_close = s->logs.t_close;
+ t_data = s->logs.t_data;
+
+ if (s->be->mode != PR_MODE_HTTP)
+ t_data = t_connect;
+
+ if (t_connect < 0 || t_data < 0)
+ return;
+
+ if (tv_isge(&s->logs.tv_request, &s->logs.tv_accept))
+ t_request = tv_ms_elapsed(&s->logs.tv_accept, &s->logs.tv_request);
+
+ t_data -= t_connect;
+ t_connect -= t_queue;
+ t_queue -= t_request;
+
+ srv = objt_server(s->target);
+ if (srv) {
+ samples_window = (((s->be->mode == PR_MODE_HTTP) ?
+ srv->counters.p.http.cum_req : srv->counters.cum_lbconn) > TIME_STATS_SAMPLES) ? TIME_STATS_SAMPLES : 0;
+ swrate_add_dynamic(&srv->counters.q_time, samples_window, t_queue);
+ swrate_add_dynamic(&srv->counters.c_time, samples_window, t_connect);
+ swrate_add_dynamic(&srv->counters.d_time, samples_window, t_data);
+ swrate_add_dynamic(&srv->counters.t_time, samples_window, t_close);
+ HA_ATOMIC_UPDATE_MAX(&srv->counters.qtime_max, t_queue);
+ HA_ATOMIC_UPDATE_MAX(&srv->counters.ctime_max, t_connect);
+ HA_ATOMIC_UPDATE_MAX(&srv->counters.dtime_max, t_data);
+ HA_ATOMIC_UPDATE_MAX(&srv->counters.ttime_max, t_close);
+ }
+ samples_window = (((s->be->mode == PR_MODE_HTTP) ?
+ s->be->be_counters.p.http.cum_req : s->be->be_counters.cum_lbconn) > TIME_STATS_SAMPLES) ? TIME_STATS_SAMPLES : 0;
+ swrate_add_dynamic(&s->be->be_counters.q_time, samples_window, t_queue);
+ swrate_add_dynamic(&s->be->be_counters.c_time, samples_window, t_connect);
+ swrate_add_dynamic(&s->be->be_counters.d_time, samples_window, t_data);
+ swrate_add_dynamic(&s->be->be_counters.t_time, samples_window, t_close);
+ HA_ATOMIC_UPDATE_MAX(&s->be->be_counters.qtime_max, t_queue);
+ HA_ATOMIC_UPDATE_MAX(&s->be->be_counters.ctime_max, t_connect);
+ HA_ATOMIC_UPDATE_MAX(&s->be->be_counters.dtime_max, t_data);
+ HA_ATOMIC_UPDATE_MAX(&s->be->be_counters.ttime_max, t_close);
+}
+
+/*
+ * This function adjusts sess->srv_conn and maintains the previous and new
+ * server's served stream counts. Setting newsrv to NULL is enough to release
+ * current connection slot. This function also notifies any LB algo which might
+ * expect to be informed about any change in the number of active streams on a
+ * server.
+ */
+void sess_change_server(struct stream *strm, struct server *newsrv)
+{
+ struct server *oldsrv = strm->srv_conn;
+
+ if (oldsrv == newsrv)
+ return;
+
+ if (oldsrv) {
+ _HA_ATOMIC_DEC(&oldsrv->served);
+ _HA_ATOMIC_DEC(&oldsrv->proxy->served);
+ __ha_barrier_atomic_store();
+ if (oldsrv->proxy->lbprm.server_drop_conn)
+ oldsrv->proxy->lbprm.server_drop_conn(oldsrv);
+ stream_del_srv_conn(strm);
+ }
+
+ if (newsrv) {
+ _HA_ATOMIC_INC(&newsrv->served);
+ _HA_ATOMIC_INC(&newsrv->proxy->served);
+ __ha_barrier_atomic_store();
+ if (newsrv->proxy->lbprm.server_take_conn)
+ newsrv->proxy->lbprm.server_take_conn(newsrv);
+ stream_add_srv_conn(strm, newsrv);
+ }
+}
+
+/* Handle server-side errors for default protocols. It is called whenever a a
+ * connection setup is aborted or a request is aborted in queue. It sets the
+ * stream termination flags so that the caller does not have to worry about
+ * them. It's installed as ->srv_error for the server-side stream connector.
+ */
+void default_srv_error(struct stream *s, struct stconn *sc)
+{
+ int err_type = s->conn_err_type;
+ int err = 0, fin = 0;
+
+ if (err_type & STRM_ET_QUEUE_ABRT) {
+ err = SF_ERR_CLICL;
+ fin = SF_FINST_Q;
+ }
+ else if (err_type & STRM_ET_CONN_ABRT) {
+ err = SF_ERR_CLICL;
+ fin = SF_FINST_C;
+ }
+ else if (err_type & STRM_ET_QUEUE_TO) {
+ err = SF_ERR_SRVTO;
+ fin = SF_FINST_Q;
+ }
+ else if (err_type & STRM_ET_QUEUE_ERR) {
+ err = SF_ERR_SRVCL;
+ fin = SF_FINST_Q;
+ }
+ else if (err_type & STRM_ET_CONN_TO) {
+ err = SF_ERR_SRVTO;
+ fin = SF_FINST_C;
+ }
+ else if (err_type & STRM_ET_CONN_ERR) {
+ err = SF_ERR_SRVCL;
+ fin = SF_FINST_C;
+ }
+ else if (err_type & STRM_ET_CONN_RES) {
+ err = SF_ERR_RESOURCE;
+ fin = SF_FINST_C;
+ }
+ else /* STRM_ET_CONN_OTHER and others */ {
+ err = SF_ERR_INTERNAL;
+ fin = SF_FINST_C;
+ }
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= err;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= fin;
+}
+
+/* kill a stream and set the termination flags to <why> (one of SF_ERR_*) */
+void stream_shutdown(struct stream *stream, int why)
+{
+ if (stream->req.flags & (CF_SHUTW|CF_SHUTW_NOW))
+ return;
+
+ channel_shutw_now(&stream->req);
+ channel_shutr_now(&stream->res);
+ stream->task->nice = 1024;
+ if (!(stream->flags & SF_ERR_MASK))
+ stream->flags |= why;
+ task_wakeup(stream->task, TASK_WOKEN_OTHER);
+}
+
+/* Appends a dump of the state of stream <s> into buffer <buf> which must have
+ * preliminary be prepared by its caller, with each line prepended by prefix
+ * <pfx>, and each line terminated by character <eol>.
+ */
+void stream_dump(struct buffer *buf, const struct stream *s, const char *pfx, char eol)
+{
+ const struct stconn *scf, *scb;
+ const struct connection *cof, *cob;
+ const struct appctx *acf, *acb;
+ const struct server *srv;
+ const char *src = "unknown";
+ const char *dst = "unknown";
+ char pn[INET6_ADDRSTRLEN];
+ const struct channel *req, *res;
+
+ if (!s) {
+ chunk_appendf(buf, "%sstrm=%p%c", pfx, s, eol);
+ return;
+ }
+
+ if (s->obj_type != OBJ_TYPE_STREAM) {
+ chunk_appendf(buf, "%sstrm=%p [invalid type=%d(%s)]%c",
+ pfx, s, s->obj_type, obj_type_name(&s->obj_type), eol);
+ return;
+ }
+
+ req = &s->req;
+ res = &s->res;
+
+ scf = s->scf;
+ cof = sc_conn(scf);
+ acf = sc_appctx(scf);
+ if (cof && cof->src && addr_to_str(cof->src, pn, sizeof(pn)) >= 0)
+ src = pn;
+ else if (acf)
+ src = acf->applet->name;
+
+ scb = s->scb;
+ cob = sc_conn(scb);
+ acb = sc_appctx(scb);
+ srv = objt_server(s->target);
+ if (srv)
+ dst = srv->id;
+ else if (acb)
+ dst = acb->applet->name;
+
+ chunk_appendf(buf,
+ "%sstrm=%p,%x src=%s fe=%s be=%s dst=%s%c"
+ "%stxn=%p,%x txn.req=%s,%x txn.rsp=%s,%x%c"
+ "%srqf=%x rqa=%x rpf=%x rpa=%x%c"
+ "%sscf=%p,%s,%x scb=%p,%s,%x%c"
+ "%saf=%p,%u sab=%p,%u%c"
+ "%scof=%p,%x:%s(%p)/%s(%p)/%s(%d)%c"
+ "%scob=%p,%x:%s(%p)/%s(%p)/%s(%d)%c"
+ "",
+ pfx, s, s->flags, src, s->sess->fe->id, s->be->id, dst, eol,
+ pfx, s->txn, (s->txn ? s->txn->flags : 0),
+ (s->txn ? h1_msg_state_str(s->txn->req.msg_state): "-"), (s->txn ? s->txn->req.flags : 0),
+ (s->txn ? h1_msg_state_str(s->txn->rsp.msg_state): "-"), (s->txn ? s->txn->rsp.flags : 0), eol,
+ pfx, req->flags, req->analysers, res->flags, res->analysers, eol,
+ pfx, scf, sc_state_str(scf->state), scf->flags, scb, sc_state_str(scb->state), scb->flags, eol,
+ pfx, acf, acf ? acf->st0 : 0, acb, acb ? acb->st0 : 0, eol,
+ pfx, cof, cof ? cof->flags : 0, conn_get_mux_name(cof), cof?cof->ctx:0, conn_get_xprt_name(cof),
+ cof ? cof->xprt_ctx : 0, conn_get_ctrl_name(cof), conn_fd(cof), eol,
+ pfx, cob, cob ? cob->flags : 0, conn_get_mux_name(cob), cob?cob->ctx:0, conn_get_xprt_name(cob),
+ cob ? cob->xprt_ctx : 0, conn_get_ctrl_name(cob), conn_fd(cob), eol);
+}
+
+/* dumps an error message for type <type> at ptr <ptr> related to stream <s>,
+ * having reached loop rate <rate>, then aborts hoping to retrieve a core.
+ */
+void stream_dump_and_crash(enum obj_type *obj, int rate)
+{
+ const struct stream *s;
+ char *msg = NULL;
+ const void *ptr;
+
+ ptr = s = objt_stream(obj);
+ if (!s) {
+ const struct appctx *appctx = objt_appctx(obj);
+ if (!appctx)
+ return;
+ ptr = appctx;
+ s = appctx_strm(appctx);
+ if (!s)
+ return;
+ }
+
+ chunk_reset(&trash);
+ stream_dump(&trash, s, "", ' ');
+
+ chunk_appendf(&trash, "filters={");
+ if (HAS_FILTERS(s)) {
+ struct filter *filter;
+
+ list_for_each_entry(filter, &s->strm_flt.filters, list) {
+ if (filter->list.p != &s->strm_flt.filters)
+ chunk_appendf(&trash, ", ");
+ chunk_appendf(&trash, "%p=\"%s\"", filter, FLT_ID(filter));
+ }
+ }
+ chunk_appendf(&trash, "}");
+
+ if (ptr != s) { // that's an appctx
+ const struct appctx *appctx = ptr;
+
+ chunk_appendf(&trash, " applet=%p(", appctx->applet);
+ resolve_sym_name(&trash, NULL, appctx->applet);
+ chunk_appendf(&trash, ")");
+
+ chunk_appendf(&trash, " handler=%p(", appctx->applet->fct);
+ resolve_sym_name(&trash, NULL, appctx->applet->fct);
+ chunk_appendf(&trash, ")");
+ }
+
+ memprintf(&msg,
+ "A bogus %s [%p] is spinning at %d calls per second and refuses to die, "
+ "aborting now! Please report this error to developers "
+ "[%s]\n",
+ obj_type_name(obj), ptr, rate, trash.area);
+
+ ha_alert("%s", msg);
+ send_log(NULL, LOG_EMERG, "%s", msg);
+ ABORT_NOW();
+}
+
+/* initialize the require structures */
+static void init_stream()
+{
+ int thr;
+
+ for (thr = 0; thr < MAX_THREADS; thr++)
+ LIST_INIT(&ha_thread_ctx[thr].streams);
+}
+INITCALL0(STG_INIT, init_stream);
+
+/* Generates a unique ID based on the given <format>, stores it in the given <strm> and
+ * returns the unique ID.
+ *
+ * If this function fails to allocate memory IST_NULL is returned.
+ *
+ * If an ID is already stored within the stream nothing happens existing unique ID is
+ * returned.
+ */
+struct ist stream_generate_unique_id(struct stream *strm, struct list *format)
+{
+ if (isttest(strm->unique_id)) {
+ return strm->unique_id;
+ }
+ else {
+ char *unique_id;
+ int length;
+ if ((unique_id = pool_alloc(pool_head_uniqueid)) == NULL)
+ return IST_NULL;
+
+ length = build_logline(strm, unique_id, UNIQUEID_LEN, format);
+ strm->unique_id = ist2(unique_id, length);
+
+ return strm->unique_id;
+ }
+}
+
+/************************************************************************/
+/* All supported ACL keywords must be declared here. */
+/************************************************************************/
+static enum act_return stream_action_set_log_level(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ s->logs.level = (uintptr_t)rule->arg.act.p[0];
+ return ACT_RET_CONT;
+}
+
+
+/* Parse a "set-log-level" action. It takes the level value as argument. It
+ * returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret stream_parse_set_log_level(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int level;
+
+ if (!*args[*cur_arg]) {
+ bad_log_level:
+ memprintf(err, "expects exactly 1 argument (log level name or 'silent')");
+ return ACT_RET_PRS_ERR;
+ }
+ if (strcmp(args[*cur_arg], "silent") == 0)
+ level = -1;
+ else if ((level = get_log_level(args[*cur_arg]) + 1) == 0)
+ goto bad_log_level;
+
+ (*cur_arg)++;
+
+ /* Register processing function. */
+ rule->action_ptr = stream_action_set_log_level;
+ rule->action = ACT_CUSTOM;
+ rule->arg.act.p[0] = (void *)(uintptr_t)level;
+ return ACT_RET_PRS_OK;
+}
+
+static enum act_return stream_action_set_nice(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ s->task->nice = (uintptr_t)rule->arg.act.p[0];
+ return ACT_RET_CONT;
+}
+
+
+/* Parse a "set-nice" action. It takes the nice value as argument. It returns
+ * ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret stream_parse_set_nice(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int nice;
+
+ if (!*args[*cur_arg]) {
+ bad_log_level:
+ memprintf(err, "expects exactly 1 argument (integer value)");
+ return ACT_RET_PRS_ERR;
+ }
+
+ nice = atoi(args[*cur_arg]);
+ if (nice < -1024)
+ nice = -1024;
+ else if (nice > 1024)
+ nice = 1024;
+
+ (*cur_arg)++;
+
+ /* Register processing function. */
+ rule->action_ptr = stream_action_set_nice;
+ rule->action = ACT_CUSTOM;
+ rule->arg.act.p[0] = (void *)(uintptr_t)nice;
+ return ACT_RET_PRS_OK;
+}
+
+
+static enum act_return tcp_action_switch_stream_mode(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ enum pr_mode mode = (uintptr_t)rule->arg.act.p[0];
+ const struct mux_proto_list *mux_proto = rule->arg.act.p[1];
+
+ if (!IS_HTX_STRM(s) && mode == PR_MODE_HTTP) {
+ if (!stream_set_http_mode(s, mux_proto)) {
+ channel_abort(&s->req);
+ channel_abort(&s->res);
+ return ACT_RET_ABRT;
+ }
+ }
+ return ACT_RET_STOP;
+}
+
+
+static int check_tcp_switch_stream_mode(struct act_rule *rule, struct proxy *px, char **err)
+{
+ const struct mux_proto_list *mux_ent;
+ const struct mux_proto_list *mux_proto = rule->arg.act.p[1];
+ enum pr_mode pr_mode = (uintptr_t)rule->arg.act.p[0];
+ enum proto_proxy_mode mode = (1 << (pr_mode == PR_MODE_HTTP));
+
+ if (pr_mode == PR_MODE_HTTP)
+ px->options |= PR_O_HTTP_UPG;
+
+ if (mux_proto) {
+ mux_ent = conn_get_best_mux_entry(mux_proto->token, PROTO_SIDE_FE, mode);
+ if (!mux_ent || !isteq(mux_ent->token, mux_proto->token)) {
+ memprintf(err, "MUX protocol '%.*s' is not compatible with the selected mode",
+ (int)mux_proto->token.len, mux_proto->token.ptr);
+ return 0;
+ }
+ }
+ else {
+ mux_ent = conn_get_best_mux_entry(IST_NULL, PROTO_SIDE_FE, mode);
+ if (!mux_ent) {
+ memprintf(err, "Unable to find compatible MUX protocol with the selected mode");
+ return 0;
+ }
+ }
+
+ /* Update the mux */
+ rule->arg.act.p[1] = (void *)mux_ent;
+ return 1;
+
+}
+
+static enum act_parse_ret stream_parse_switch_mode(const char **args, int *cur_arg,
+ struct proxy *px, struct act_rule *rule,
+ char **err)
+{
+ const struct mux_proto_list *mux_proto = NULL;
+ struct ist proto;
+ enum pr_mode mode;
+
+ /* must have at least the mode */
+ if (*(args[*cur_arg]) == 0) {
+ memprintf(err, "'%s %s' expects a mode as argument.", args[0], args[*cur_arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (!(px->cap & PR_CAP_FE)) {
+ memprintf(err, "'%s %s' not allowed because %s '%s' has no frontend capability",
+ args[0], args[*cur_arg-1], proxy_type_str(px), px->id);
+ return ACT_RET_PRS_ERR;
+ }
+ /* Check if the mode. For now "tcp" is disabled because downgrade is not
+ * supported and PT is the only TCP mux.
+ */
+ if (strcmp(args[*cur_arg], "http") == 0)
+ mode = PR_MODE_HTTP;
+ else {
+ memprintf(err, "'%s %s' expects a valid mode (got '%s').", args[0], args[*cur_arg-1], args[*cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* check the proto, if specified */
+ if (*(args[*cur_arg+1]) && strcmp(args[*cur_arg+1], "proto") == 0) {
+ if (*(args[*cur_arg+2]) == 0) {
+ memprintf(err, "'%s %s': '%s' expects a protocol as argument.",
+ args[0], args[*cur_arg-1], args[*cur_arg+1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ proto = ist(args[*cur_arg + 2]);
+ mux_proto = get_mux_proto(proto);
+ if (!mux_proto) {
+ memprintf(err, "'%s %s': '%s' expects a valid MUX protocol, if specified (got '%s')",
+ args[0], args[*cur_arg-1], args[*cur_arg+1], args[*cur_arg+2]);
+ return ACT_RET_PRS_ERR;
+ }
+ *cur_arg += 2;
+ }
+
+ (*cur_arg)++;
+
+ /* Register processing function. */
+ rule->action_ptr = tcp_action_switch_stream_mode;
+ rule->check_ptr = check_tcp_switch_stream_mode;
+ rule->action = ACT_CUSTOM;
+ rule->arg.act.p[0] = (void *)(uintptr_t)mode;
+ rule->arg.act.p[1] = (void *)mux_proto;
+ return ACT_RET_PRS_OK;
+}
+
+/* 0=OK, <0=Alert, >0=Warning */
+static enum act_parse_ret stream_parse_use_service(const char **args, int *cur_arg,
+ struct proxy *px, struct act_rule *rule,
+ char **err)
+{
+ struct action_kw *kw;
+
+ /* Check if the service name exists. */
+ if (*(args[*cur_arg]) == 0) {
+ memprintf(err, "'%s' expects a service name.", args[0]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* lookup for keyword corresponding to a service. */
+ kw = action_lookup(&service_keywords, args[*cur_arg]);
+ if (!kw) {
+ memprintf(err, "'%s' unknown service name.", args[1]);
+ return ACT_RET_PRS_ERR;
+ }
+ (*cur_arg)++;
+
+ /* executes specific rule parser. */
+ rule->kw = kw;
+ if (kw->parse((const char **)args, cur_arg, px, rule, err) == ACT_RET_PRS_ERR)
+ return ACT_RET_PRS_ERR;
+
+ /* Register processing function. */
+ rule->action_ptr = process_use_service;
+ rule->action = ACT_CUSTOM;
+
+ return ACT_RET_PRS_OK;
+}
+
+void service_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&service_keywords, &kw_list->list);
+}
+
+struct action_kw *service_find(const char *kw)
+{
+ return action_lookup(&service_keywords, kw);
+}
+
+/* Lists the known services on <out>. If <out> is null, emit them on stdout one
+ * per line.
+ */
+void list_services(FILE *out)
+{
+ const struct action_kw *akwp, *akwn;
+ struct action_kw_list *kw_list;
+ int found = 0;
+ int i;
+
+ if (out)
+ fprintf(out, "Available services :");
+
+ for (akwn = akwp = NULL;; akwp = akwn) {
+ list_for_each_entry(kw_list, &service_keywords, list) {
+ for (i = 0; kw_list->kw[i].kw != NULL; i++) {
+ if (strordered(akwp ? akwp->kw : NULL,
+ kw_list->kw[i].kw,
+ akwn != akwp ? akwn->kw : NULL))
+ akwn = &kw_list->kw[i];
+ found = 1;
+ }
+ }
+ if (akwn == akwp)
+ break;
+ if (out)
+ fprintf(out, " %s", akwn->kw);
+ else
+ printf("%s\n", akwn->kw);
+ }
+ if (!found && out)
+ fprintf(out, " none\n");
+}
+
+/* appctx context used by the "show sess" command */
+
+struct show_sess_ctx {
+ struct bref bref; /* back-reference from the session being dumped */
+ void *target; /* session we want to dump, or NULL for all */
+ unsigned int thr; /* the thread number being explored (0..MAX_THREADS-1) */
+ unsigned int uid; /* if non-null, the uniq_id of the session being dumped */
+ int section; /* section of the session being dumped */
+ int pos; /* last position of the current session's buffer */
+};
+
+/* This function dumps a complete stream state onto the stream connector's
+ * read buffer. The stream has to be set in strm. It returns 0 if the output
+ * buffer is full and it needs to be called again, otherwise non-zero. It is
+ * designed to be called from stats_dump_strm_to_buffer() below.
+ */
+static int stats_dump_full_strm_to_buffer(struct stconn *sc, struct stream *strm)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_sess_ctx *ctx = appctx->svcctx;
+ struct stconn *scf, *scb;
+ struct tm tm;
+ extern const char *monthname[12];
+ char pn[INET6_ADDRSTRLEN];
+ struct connection *conn;
+ struct appctx *tmpctx;
+
+ chunk_reset(&trash);
+
+ if (ctx->section > 0 && ctx->uid != strm->uniq_id) {
+ /* stream changed, no need to go any further */
+ chunk_appendf(&trash, " *** session terminated while we were watching it ***\n");
+ if (applet_putchk(appctx, &trash) == -1)
+ goto full;
+ goto done;
+ }
+
+ switch (ctx->section) {
+ case 0: /* main status of the stream */
+ ctx->uid = strm->uniq_id;
+ ctx->section = 1;
+ /* fall through */
+
+ case 1:
+ get_localtime(strm->logs.accept_date.tv_sec, &tm);
+ chunk_appendf(&trash,
+ "%p: [%02d/%s/%04d:%02d:%02d:%02d.%06d] id=%u proto=%s",
+ strm,
+ tm.tm_mday, monthname[tm.tm_mon], tm.tm_year+1900,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, (int)(strm->logs.accept_date.tv_usec),
+ strm->uniq_id,
+ strm_li(strm) ? strm_li(strm)->rx.proto->name : "?");
+
+ conn = objt_conn(strm_orig(strm));
+ switch (conn && conn_get_src(conn) ? addr_to_str(conn->src, pn, sizeof(pn)) : AF_UNSPEC) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_appendf(&trash, " source=%s:%d\n",
+ pn, get_host_port(conn->src));
+ break;
+ case AF_UNIX:
+ chunk_appendf(&trash, " source=unix:%d\n", strm_li(strm)->luid);
+ break;
+ default:
+ /* no more information to print right now */
+ chunk_appendf(&trash, "\n");
+ break;
+ }
+
+ chunk_appendf(&trash,
+ " flags=0x%x, conn_retries=%d, conn_exp=%s conn_et=0x%03x srv_conn=%p, pend_pos=%p waiting=%d epoch=%#x\n",
+ strm->flags, strm->conn_retries,
+ strm->conn_exp ?
+ tick_is_expired(strm->conn_exp, now_ms) ? "<PAST>" :
+ human_time(TICKS_TO_MS(strm->conn_exp - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>",
+ strm->conn_err_type, strm->srv_conn, strm->pend_pos,
+ LIST_INLIST(&strm->buffer_wait.list), strm->stream_epoch);
+
+ chunk_appendf(&trash,
+ " frontend=%s (id=%u mode=%s), listener=%s (id=%u)",
+ strm_fe(strm)->id, strm_fe(strm)->uuid, proxy_mode_str(strm_fe(strm)->mode),
+ strm_li(strm) ? strm_li(strm)->name ? strm_li(strm)->name : "?" : "?",
+ strm_li(strm) ? strm_li(strm)->luid : 0);
+
+ switch (conn && conn_get_dst(conn) ? addr_to_str(conn->dst, pn, sizeof(pn)) : AF_UNSPEC) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_appendf(&trash, " addr=%s:%d\n",
+ pn, get_host_port(conn->dst));
+ break;
+ case AF_UNIX:
+ chunk_appendf(&trash, " addr=unix:%d\n", strm_li(strm)->luid);
+ break;
+ default:
+ /* no more information to print right now */
+ chunk_appendf(&trash, "\n");
+ break;
+ }
+
+ if (strm->be->cap & PR_CAP_BE)
+ chunk_appendf(&trash,
+ " backend=%s (id=%u mode=%s)",
+ strm->be->id,
+ strm->be->uuid, proxy_mode_str(strm->be->mode));
+ else
+ chunk_appendf(&trash, " backend=<NONE> (id=-1 mode=-)");
+
+ conn = sc_conn(strm->scb);
+ switch (conn && conn_get_src(conn) ? addr_to_str(conn->src, pn, sizeof(pn)) : AF_UNSPEC) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_appendf(&trash, " addr=%s:%d\n",
+ pn, get_host_port(conn->src));
+ break;
+ case AF_UNIX:
+ chunk_appendf(&trash, " addr=unix\n");
+ break;
+ default:
+ /* no more information to print right now */
+ chunk_appendf(&trash, "\n");
+ break;
+ }
+
+ if (strm->be->cap & PR_CAP_BE)
+ chunk_appendf(&trash,
+ " server=%s (id=%u)",
+ objt_server(strm->target) ? __objt_server(strm->target)->id : "<none>",
+ objt_server(strm->target) ? __objt_server(strm->target)->puid : 0);
+ else
+ chunk_appendf(&trash, " server=<NONE> (id=-1)");
+
+ switch (conn && conn_get_dst(conn) ? addr_to_str(conn->dst, pn, sizeof(pn)) : AF_UNSPEC) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_appendf(&trash, " addr=%s:%d\n",
+ pn, get_host_port(conn->dst));
+ break;
+ case AF_UNIX:
+ chunk_appendf(&trash, " addr=unix\n");
+ break;
+ default:
+ /* no more information to print right now */
+ chunk_appendf(&trash, "\n");
+ break;
+ }
+
+ chunk_appendf(&trash,
+ " task=%p (state=0x%02x nice=%d calls=%u rate=%u exp=%s tmask=0x%lx%s",
+ strm->task,
+ strm->task->state,
+ strm->task->nice, strm->task->calls, read_freq_ctr(&strm->call_rate),
+ strm->task->expire ?
+ tick_is_expired(strm->task->expire, now_ms) ? "<PAST>" :
+ human_time(TICKS_TO_MS(strm->task->expire - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>",
+ strm->task->thread_mask,
+ task_in_rq(strm->task) ? ", running" : "");
+
+ chunk_appendf(&trash,
+ " age=%s)\n",
+ human_time(now.tv_sec - strm->logs.accept_date.tv_sec, 1));
+
+ if (strm->txn)
+ chunk_appendf(&trash,
+ " txn=%p flags=0x%x meth=%d status=%d req.st=%s rsp.st=%s req.f=0x%02x rsp.f=0x%02x\n",
+ strm->txn, strm->txn->flags, strm->txn->meth, strm->txn->status,
+ h1_msg_state_str(strm->txn->req.msg_state), h1_msg_state_str(strm->txn->rsp.msg_state),
+ strm->txn->req.flags, strm->txn->rsp.flags);
+
+ scf = strm->scf;
+ chunk_appendf(&trash, " scf=%p flags=0x%08x state=%s endp=%s,%p,0x%08x sub=%d\n",
+ scf, scf->flags, sc_state_str(scf->state),
+ (sc_ep_test(scf, SE_FL_T_MUX) ? "CONN" : (sc_ep_test(scf, SE_FL_T_APPLET) ? "APPCTX" : "NONE")),
+ scf->sedesc->se, sc_ep_get(scf), scf->wait_event.events);
+
+ if ((conn = sc_conn(scf)) != NULL) {
+ chunk_appendf(&trash,
+ " co0=%p ctrl=%s xprt=%s mux=%s data=%s target=%s:%p\n",
+ conn,
+ conn_get_ctrl_name(conn),
+ conn_get_xprt_name(conn),
+ conn_get_mux_name(conn),
+ sc_get_data_name(scf),
+ obj_type_name(conn->target),
+ obj_base_ptr(conn->target));
+
+ chunk_appendf(&trash,
+ " flags=0x%08x fd=%d fd.state=%02x updt=%d fd.tmask=0x%lx\n",
+ conn->flags,
+ conn_fd(conn),
+ conn_fd(conn) >= 0 ? fdtab[conn->handle.fd].state : 0,
+ conn_fd(conn) >= 0 ? !!(fdtab[conn->handle.fd].update_mask & tid_bit) : 0,
+ conn_fd(conn) >= 0 ? fdtab[conn->handle.fd].thread_mask: 0);
+
+ }
+ else if ((tmpctx = sc_appctx(scf)) != NULL) {
+ chunk_appendf(&trash,
+ " app0=%p st0=%d st1=%d st2=%d applet=%s tmask=0x%lx nice=%d calls=%u rate=%u cpu=%llu lat=%llu\n",
+ tmpctx,
+ tmpctx->st0,
+ tmpctx->st1,
+ tmpctx->_st2,
+ tmpctx->applet->name,
+ tmpctx->t->thread_mask,
+ tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate),
+ (unsigned long long)tmpctx->t->cpu_time, (unsigned long long)tmpctx->t->lat_time);
+ }
+
+ scb = strm->scb;
+ chunk_appendf(&trash, " scb=%p flags=0x%08x state=%s endp=%s,%p,0x%08x sub=%d\n",
+ scb, scb->flags, sc_state_str(scb->state),
+ (sc_ep_test(scb, SE_FL_T_MUX) ? "CONN" : (sc_ep_test(scb, SE_FL_T_APPLET) ? "APPCTX" : "NONE")),
+ scb->sedesc->se, sc_ep_get(scb), scb->wait_event.events);
+
+ if ((conn = sc_conn(scb)) != NULL) {
+ chunk_appendf(&trash,
+ " co1=%p ctrl=%s xprt=%s mux=%s data=%s target=%s:%p\n",
+ conn,
+ conn_get_ctrl_name(conn),
+ conn_get_xprt_name(conn),
+ conn_get_mux_name(conn),
+ sc_get_data_name(scb),
+ obj_type_name(conn->target),
+ obj_base_ptr(conn->target));
+
+ chunk_appendf(&trash,
+ " flags=0x%08x fd=%d fd.state=%02x updt=%d fd.tmask=0x%lx\n",
+ conn->flags,
+ conn_fd(conn),
+ conn_fd(conn) >= 0 ? fdtab[conn->handle.fd].state : 0,
+ conn_fd(conn) >= 0 ? !!(fdtab[conn->handle.fd].update_mask & tid_bit) : 0,
+ conn_fd(conn) >= 0 ? fdtab[conn->handle.fd].thread_mask: 0);
+
+ }
+ else if ((tmpctx = sc_appctx(scb)) != NULL) {
+ chunk_appendf(&trash,
+ " app1=%p st0=%d st1=%d st2=%d applet=%s tmask=0x%lx nice=%d calls=%u rate=%u cpu=%llu lat=%llu\n",
+ tmpctx,
+ tmpctx->st0,
+ tmpctx->st1,
+ tmpctx->_st2,
+ tmpctx->applet->name,
+ tmpctx->t->thread_mask,
+ tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate),
+ (unsigned long long)tmpctx->t->cpu_time, (unsigned long long)tmpctx->t->lat_time);
+ }
+
+ chunk_appendf(&trash,
+ " req=%p (f=0x%06x an=0x%x pipe=%d tofwd=%d total=%lld)\n"
+ " an_exp=%s",
+ &strm->req,
+ strm->req.flags, strm->req.analysers,
+ strm->req.pipe ? strm->req.pipe->data : 0,
+ strm->req.to_forward, strm->req.total,
+ strm->req.analyse_exp ?
+ human_time(TICKS_TO_MS(strm->req.analyse_exp - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>");
+
+ chunk_appendf(&trash,
+ " rex=%s",
+ strm->req.rex ?
+ human_time(TICKS_TO_MS(strm->req.rex - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>");
+
+ chunk_appendf(&trash,
+ " wex=%s\n"
+ " buf=%p data=%p o=%u p=%u i=%u size=%u\n",
+ strm->req.wex ?
+ human_time(TICKS_TO_MS(strm->req.wex - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>",
+ &strm->req.buf,
+ b_orig(&strm->req.buf), (unsigned int)co_data(&strm->req),
+ (unsigned int)ci_head_ofs(&strm->req), (unsigned int)ci_data(&strm->req),
+ (unsigned int)strm->req.buf.size);
+
+ if (IS_HTX_STRM(strm)) {
+ struct htx *htx = htxbuf(&strm->req.buf);
+
+ chunk_appendf(&trash,
+ " htx=%p flags=0x%x size=%u data=%u used=%u wrap=%s extra=%llu\n",
+ htx, htx->flags, htx->size, htx->data, htx_nbblks(htx),
+ (htx->tail >= htx->head) ? "NO" : "YES",
+ (unsigned long long)htx->extra);
+ }
+ if (HAS_FILTERS(strm) && strm_flt(strm)->current[0]) {
+ struct filter *flt = strm_flt(strm)->current[0];
+
+ chunk_appendf(&trash, " current_filter=%p (id=\"%s\" flags=0x%x pre=0x%x post=0x%x) \n",
+ flt, flt->config->id, flt->flags, flt->pre_analyzers, flt->post_analyzers);
+ }
+
+ chunk_appendf(&trash,
+ " res=%p (f=0x%06x an=0x%x pipe=%d tofwd=%d total=%lld)\n"
+ " an_exp=%s",
+ &strm->res,
+ strm->res.flags, strm->res.analysers,
+ strm->res.pipe ? strm->res.pipe->data : 0,
+ strm->res.to_forward, strm->res.total,
+ strm->res.analyse_exp ?
+ human_time(TICKS_TO_MS(strm->res.analyse_exp - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>");
+
+ chunk_appendf(&trash,
+ " rex=%s",
+ strm->res.rex ?
+ human_time(TICKS_TO_MS(strm->res.rex - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>");
+
+ chunk_appendf(&trash,
+ " wex=%s\n"
+ " buf=%p data=%p o=%u p=%u i=%u size=%u\n",
+ strm->res.wex ?
+ human_time(TICKS_TO_MS(strm->res.wex - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>",
+ &strm->res.buf,
+ b_orig(&strm->res.buf), (unsigned int)co_data(&strm->res),
+ (unsigned int)ci_head_ofs(&strm->res), (unsigned int)ci_data(&strm->res),
+ (unsigned int)strm->res.buf.size);
+
+ if (IS_HTX_STRM(strm)) {
+ struct htx *htx = htxbuf(&strm->res.buf);
+
+ chunk_appendf(&trash,
+ " htx=%p flags=0x%x size=%u data=%u used=%u wrap=%s extra=%llu\n",
+ htx, htx->flags, htx->size, htx->data, htx_nbblks(htx),
+ (htx->tail >= htx->head) ? "NO" : "YES",
+ (unsigned long long)htx->extra);
+ }
+ if (HAS_FILTERS(strm) && strm_flt(strm)->current[1]) {
+ struct filter *flt = strm_flt(strm)->current[1];
+
+ chunk_appendf(&trash, " current_filter=%p (id=\"%s\" flags=0x%x pre=0x%x post=0x%x) \n",
+ flt, flt->config->id, flt->flags, flt->pre_analyzers, flt->post_analyzers);
+ }
+
+ if (strm->current_rule_list && strm->current_rule) {
+ const struct act_rule *rule = strm->current_rule;
+ chunk_appendf(&trash, " current_rule=\"%s\" [%s:%d]\n", rule->kw->kw, rule->conf.file, rule->conf.line);
+ }
+
+ if (applet_putchk(appctx, &trash) == -1)
+ goto full;
+
+ /* use other states to dump the contents */
+ }
+ /* end of dump */
+ done:
+ ctx->uid = 0;
+ ctx->section = 0;
+ return 1;
+ full:
+ return 0;
+}
+
+static int cli_parse_show_sess(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_sess_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ if (*args[2] && strcmp(args[2], "all") == 0)
+ ctx->target = (void *)-1;
+ else if (*args[2])
+ ctx->target = (void *)strtoul(args[2], NULL, 0);
+ else
+ ctx->target = NULL;
+ ctx->section = 0; /* start with stream status */
+ ctx->pos = 0;
+ ctx->thr = 0;
+
+ /* The back-ref must be reset, it will be detected and set by
+ * the dump code upon first invocation.
+ */
+ LIST_INIT(&ctx->bref.users);
+
+ /* let's set our own stream's epoch to the current one and increment
+ * it so that we know which streams were already there before us.
+ */
+ appctx_strm(appctx)->stream_epoch = _HA_ATOMIC_FETCH_ADD(&stream_epoch, 1);
+ return 0;
+}
+
+/* This function dumps all streams' states onto the stream connector's
+ * read buffer. It returns 0 if the output buffer is full and it needs
+ * to be called again, otherwise non-zero. It proceeds in an isolated
+ * thread so there is no thread safety issue here.
+ */
+static int cli_io_handler_dump_sess(struct appctx *appctx)
+{
+ struct show_sess_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct connection *conn;
+
+ thread_isolate();
+
+ if (ctx->thr >= global.nbthread) {
+ /* already terminated */
+ goto done;
+ }
+
+ if (unlikely(sc_ic(sc)->flags & (CF_WRITE_ERROR|CF_SHUTW))) {
+ /* If we're forced to shut down, we might have to remove our
+ * reference to the last stream being dumped.
+ */
+ if (!LIST_ISEMPTY(&ctx->bref.users)) {
+ LIST_DELETE(&ctx->bref.users);
+ LIST_INIT(&ctx->bref.users);
+ }
+ goto done;
+ }
+
+ chunk_reset(&trash);
+
+ /* first, let's detach the back-ref from a possible previous stream */
+ if (!LIST_ISEMPTY(&ctx->bref.users)) {
+ LIST_DELETE(&ctx->bref.users);
+ LIST_INIT(&ctx->bref.users);
+ } else if (!ctx->bref.ref) {
+ /* first call, start with first stream */
+ ctx->bref.ref = ha_thread_ctx[ctx->thr].streams.n;
+ }
+
+ /* and start from where we stopped */
+ while (1) {
+ char pn[INET6_ADDRSTRLEN];
+ struct stream *curr_strm;
+ int done= 0;
+
+ if (ctx->bref.ref == &ha_thread_ctx[ctx->thr].streams)
+ done = 1;
+ else {
+ /* check if we've found a stream created after issuing the "show sess" */
+ curr_strm = LIST_ELEM(ctx->bref.ref, struct stream *, list);
+ if ((int)(curr_strm->stream_epoch - appctx_strm(appctx)->stream_epoch) > 0)
+ done = 1;
+ }
+
+ if (done) {
+ ctx->thr++;
+ if (ctx->thr >= global.nbthread)
+ break;
+ ctx->bref.ref = ha_thread_ctx[ctx->thr].streams.n;
+ continue;
+ }
+
+ if (ctx->target) {
+ if (ctx->target != (void *)-1 && ctx->target != curr_strm)
+ goto next_sess;
+
+ LIST_APPEND(&curr_strm->back_refs, &ctx->bref.users);
+ /* call the proper dump() function and return if we're missing space */
+ if (!stats_dump_full_strm_to_buffer(sc, curr_strm))
+ goto full;
+
+ /* stream dump complete */
+ LIST_DELETE(&ctx->bref.users);
+ LIST_INIT(&ctx->bref.users);
+ if (ctx->target != (void *)-1) {
+ ctx->target = NULL;
+ break;
+ }
+ else
+ goto next_sess;
+ }
+
+ chunk_appendf(&trash,
+ "%p: proto=%s",
+ curr_strm,
+ strm_li(curr_strm) ? strm_li(curr_strm)->rx.proto->name : "?");
+
+ conn = objt_conn(strm_orig(curr_strm));
+ switch (conn && conn_get_src(conn) ? addr_to_str(conn->src, pn, sizeof(pn)) : AF_UNSPEC) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_appendf(&trash,
+ " src=%s:%d fe=%s be=%s srv=%s",
+ pn,
+ get_host_port(conn->src),
+ strm_fe(curr_strm)->id,
+ (curr_strm->be->cap & PR_CAP_BE) ? curr_strm->be->id : "<NONE>",
+ objt_server(curr_strm->target) ? __objt_server(curr_strm->target)->id : "<none>"
+ );
+ break;
+ case AF_UNIX:
+ chunk_appendf(&trash,
+ " src=unix:%d fe=%s be=%s srv=%s",
+ strm_li(curr_strm)->luid,
+ strm_fe(curr_strm)->id,
+ (curr_strm->be->cap & PR_CAP_BE) ? curr_strm->be->id : "<NONE>",
+ objt_server(curr_strm->target) ? __objt_server(curr_strm->target)->id : "<none>"
+ );
+ break;
+ }
+
+ chunk_appendf(&trash,
+ " ts=%02x epoch=%#x age=%s calls=%u rate=%u cpu=%llu lat=%llu",
+ curr_strm->task->state, curr_strm->stream_epoch,
+ human_time(now.tv_sec - curr_strm->logs.tv_accept.tv_sec, 1),
+ curr_strm->task->calls, read_freq_ctr(&curr_strm->call_rate),
+ (unsigned long long)curr_strm->task->cpu_time, (unsigned long long)curr_strm->task->lat_time);
+
+ chunk_appendf(&trash,
+ " rq[f=%06xh,i=%u,an=%02xh,rx=%s",
+ curr_strm->req.flags,
+ (unsigned int)ci_data(&curr_strm->req),
+ curr_strm->req.analysers,
+ curr_strm->req.rex ?
+ human_time(TICKS_TO_MS(curr_strm->req.rex - now_ms),
+ TICKS_TO_MS(1000)) : "");
+
+ chunk_appendf(&trash,
+ ",wx=%s",
+ curr_strm->req.wex ?
+ human_time(TICKS_TO_MS(curr_strm->req.wex - now_ms),
+ TICKS_TO_MS(1000)) : "");
+
+ chunk_appendf(&trash,
+ ",ax=%s]",
+ curr_strm->req.analyse_exp ?
+ human_time(TICKS_TO_MS(curr_strm->req.analyse_exp - now_ms),
+ TICKS_TO_MS(1000)) : "");
+
+ chunk_appendf(&trash,
+ " rp[f=%06xh,i=%u,an=%02xh,rx=%s",
+ curr_strm->res.flags,
+ (unsigned int)ci_data(&curr_strm->res),
+ curr_strm->res.analysers,
+ curr_strm->res.rex ?
+ human_time(TICKS_TO_MS(curr_strm->res.rex - now_ms),
+ TICKS_TO_MS(1000)) : "");
+
+ chunk_appendf(&trash,
+ ",wx=%s",
+ curr_strm->res.wex ?
+ human_time(TICKS_TO_MS(curr_strm->res.wex - now_ms),
+ TICKS_TO_MS(1000)) : "");
+
+ chunk_appendf(&trash,
+ ",ax=%s]",
+ curr_strm->res.analyse_exp ?
+ human_time(TICKS_TO_MS(curr_strm->res.analyse_exp - now_ms),
+ TICKS_TO_MS(1000)) : "");
+
+ conn = sc_conn(curr_strm->scf);
+ chunk_appendf(&trash,
+ " scf=[%d,%1xh,fd=%d]",
+ curr_strm->scf->state,
+ curr_strm->scf->flags,
+ conn_fd(conn));
+
+ conn = sc_conn(curr_strm->scb);
+ chunk_appendf(&trash,
+ " scb=[%d,%1xh,fd=%d]",
+ curr_strm->scb->state,
+ curr_strm->scb->flags,
+ conn_fd(conn));
+
+ chunk_appendf(&trash,
+ " exp=%s rc=%d c_exp=%s",
+ curr_strm->task->expire ?
+ human_time(TICKS_TO_MS(curr_strm->task->expire - now_ms),
+ TICKS_TO_MS(1000)) : "",
+ curr_strm->conn_retries,
+ curr_strm->conn_exp ?
+ human_time(TICKS_TO_MS(curr_strm->conn_exp - now_ms),
+ TICKS_TO_MS(1000)) : "");
+ if (task_in_rq(curr_strm->task))
+ chunk_appendf(&trash, " run(nice=%d)", curr_strm->task->nice);
+
+ chunk_appendf(&trash, "\n");
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* let's try again later from this stream. We add ourselves into
+ * this stream's users so that it can remove us upon termination.
+ */
+ LIST_APPEND(&curr_strm->back_refs, &ctx->bref.users);
+ goto full;
+ }
+
+ next_sess:
+ ctx->bref.ref = curr_strm->list.n;
+ }
+
+ if (ctx->target && ctx->target != (void *)-1) {
+ /* specified stream not found */
+ if (ctx->section > 0)
+ chunk_appendf(&trash, " *** session terminated while we were watching it ***\n");
+ else
+ chunk_appendf(&trash, "Session not found.\n");
+
+ if (applet_putchk(appctx, &trash) == -1)
+ goto full;
+
+ ctx->target = NULL;
+ ctx->uid = 0;
+ goto done;
+ }
+
+ done:
+ thread_release();
+ return 1;
+ full:
+ thread_release();
+ return 0;
+}
+
+static void cli_release_show_sess(struct appctx *appctx)
+{
+ struct show_sess_ctx *ctx = appctx->svcctx;
+
+ if (ctx->thr < global.nbthread) {
+ /* a dump was aborted, either in error or timeout. We need to
+ * safely detach from the target stream's list. It's mandatory
+ * to lock because a stream on the target thread could be moving
+ * our node.
+ */
+ thread_isolate();
+ if (!LIST_ISEMPTY(&ctx->bref.users))
+ LIST_DELETE(&ctx->bref.users);
+ thread_release();
+ }
+}
+
+/* Parses the "shutdown session" directive, it always returns 1 */
+static int cli_parse_shutdown_session(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct stream *strm, *ptr;
+ int thr;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ ptr = (void *)strtoul(args[2], NULL, 0);
+ if (!ptr)
+ return cli_err(appctx, "Session pointer expected (use 'show sess').\n");
+
+ strm = NULL;
+
+ thread_isolate();
+
+ /* first, look for the requested stream in the stream table */
+ for (thr = 0; strm != ptr && thr < global.nbthread; thr++) {
+ list_for_each_entry(strm, &ha_thread_ctx[thr].streams, list) {
+ if (strm == ptr) {
+ stream_shutdown(strm, SF_ERR_KILLED);
+ break;
+ }
+ }
+ }
+
+ thread_release();
+
+ /* do we have the stream ? */
+ if (strm != ptr)
+ return cli_err(appctx, "No such session (use 'show sess').\n");
+
+ return 1;
+}
+
+/* Parses the "shutdown session server" directive, it always returns 1 */
+static int cli_parse_shutdown_sessions_server(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[3]);
+ if (!sv)
+ return 1;
+
+ /* kill all the stream that are on this server */
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ srv_shutdown_streams(sv, SF_ERR_KILLED);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 1;
+}
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "show", "sess", NULL }, "show sess [id] : report the list of current sessions or dump this exact session", cli_parse_show_sess, cli_io_handler_dump_sess, cli_release_show_sess },
+ { { "shutdown", "session", NULL }, "shutdown session [id] : kill a specific session", cli_parse_shutdown_session, NULL, NULL },
+ { { "shutdown", "sessions", "server" }, "shutdown sessions server <bk>/<srv> : kill sessions on a server", cli_parse_shutdown_sessions_server, NULL, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+/* main configuration keyword registration. */
+static struct action_kw_list stream_tcp_req_keywords = { ILH, {
+ { "set-log-level", stream_parse_set_log_level },
+ { "set-nice", stream_parse_set_nice },
+ { "switch-mode", stream_parse_switch_mode },
+ { "use-service", stream_parse_use_service },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &stream_tcp_req_keywords);
+
+/* main configuration keyword registration. */
+static struct action_kw_list stream_tcp_res_keywords = { ILH, {
+ { "set-log-level", stream_parse_set_log_level },
+ { "set-nice", stream_parse_set_nice },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_res_cont_keywords_register, &stream_tcp_res_keywords);
+
+static struct action_kw_list stream_http_req_keywords = { ILH, {
+ { "set-log-level", stream_parse_set_log_level },
+ { "set-nice", stream_parse_set_nice },
+ { "use-service", stream_parse_use_service },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &stream_http_req_keywords);
+
+static struct action_kw_list stream_http_res_keywords = { ILH, {
+ { "set-log-level", stream_parse_set_log_level },
+ { "set-nice", stream_parse_set_nice },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_res_keywords_register, &stream_http_res_keywords);
+
+static int smp_fetch_cur_server_timeout(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ if (!smp->strm)
+ return 0;
+
+ smp->data.u.sint = TICKS_TO_MS(smp->strm->res.rto);
+ return 1;
+}
+
+static int smp_fetch_cur_tunnel_timeout(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ if (!smp->strm)
+ return 0;
+
+ smp->data.u.sint = TICKS_TO_MS(smp->strm->tunnel_timeout);
+ return 1;
+}
+
+static int smp_fetch_last_rule_file(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_STR;
+ if (!smp->strm || !smp->strm->last_rule_file)
+ return 0;
+
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.str.area = (char *)smp->strm->last_rule_file;
+ smp->data.u.str.data = strlen(smp->strm->last_rule_file);
+ return 1;
+}
+
+static int smp_fetch_last_rule_line(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ if (!smp->strm || !smp->strm->last_rule_line)
+ return 0;
+
+ smp->data.u.sint = smp->strm->last_rule_line;
+ return 1;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { "cur_server_timeout", smp_fetch_cur_server_timeout, 0, NULL, SMP_T_SINT, SMP_USE_BKEND, },
+ { "cur_tunnel_timeout", smp_fetch_cur_tunnel_timeout, 0, NULL, SMP_T_SINT, SMP_USE_BKEND, },
+ { "last_rule_file", smp_fetch_last_rule_file, 0, NULL, SMP_T_STR, SMP_USE_INTRN, },
+ { "last_rule_line", smp_fetch_last_rule_line, 0, NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { NULL, NULL, 0, 0, 0 },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/task.c b/src/task.c
new file mode 100644
index 0000000..a926f4c
--- /dev/null
+++ b/src/task.c
@@ -0,0 +1,1044 @@
+/*
+ * Task management functions.
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <string.h>
+
+#include <import/eb32sctree.h>
+#include <import/eb32tree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/activity.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/list.h>
+#include <haproxy/pool.h>
+#include <haproxy/task.h>
+#include <haproxy/tools.h>
+
+extern struct task *process_stream(struct task *t, void *context, unsigned int state);
+
+DECLARE_POOL(pool_head_task, "task", sizeof(struct task));
+DECLARE_POOL(pool_head_tasklet, "tasklet", sizeof(struct tasklet));
+
+/* This is the memory pool containing all the signal structs. These
+ * struct are used to store each required signal between two tasks.
+ */
+DECLARE_POOL(pool_head_notification, "notification", sizeof(struct notification));
+
+volatile unsigned long global_tasks_mask = 0; /* Mask of threads with tasks in the global runqueue */
+unsigned int niced_tasks = 0; /* number of niced tasks in the run queue */
+
+__decl_aligned_spinlock(rq_lock); /* spin lock related to run queue */
+__decl_aligned_rwlock(wq_lock); /* RW lock related to the wait queue */
+
+#ifdef USE_THREAD
+struct eb_root timers; /* sorted timers tree, global, accessed under wq_lock */
+struct eb_root rqueue; /* tree constituting the global run queue, accessed under rq_lock */
+unsigned int grq_total; /* total number of entries in the global run queue, atomic */
+static unsigned int global_rqueue_ticks; /* insertion count in the grq, use rq_lock */
+#endif
+
+
+
+/* Flags the task <t> for immediate destruction and puts it into its first
+ * thread's shared tasklet list if not yet queued/running. This will bypass
+ * the priority scheduling and make the task show up as fast as possible in
+ * the other thread's queue. Note that this operation isn't idempotent and is
+ * not supposed to be run on the same task from multiple threads at once. It's
+ * the caller's responsibility to make sure it is the only one able to kill the
+ * task.
+ */
+void task_kill(struct task *t)
+{
+ unsigned int state = t->state;
+ unsigned int thr;
+
+ BUG_ON(state & TASK_KILLED);
+
+ while (1) {
+ while (state & (TASK_RUNNING | TASK_QUEUED)) {
+ /* task already in the queue and about to be executed,
+ * or even currently running. Just add the flag and be
+ * done with it, the process loop will detect it and kill
+ * it. The CAS will fail if we arrive too late.
+ */
+ if (_HA_ATOMIC_CAS(&t->state, &state, state | TASK_KILLED))
+ return;
+ }
+
+ /* We'll have to wake it up, but we must also secure it so that
+ * it doesn't vanish under us. TASK_QUEUED guarantees nobody will
+ * add past us.
+ */
+ if (_HA_ATOMIC_CAS(&t->state, &state, state | TASK_QUEUED | TASK_KILLED)) {
+ /* Bypass the tree and go directly into the shared tasklet list.
+ * Note: that's a task so it must be accounted for as such. Pick
+ * the task's first thread for the job.
+ */
+ thr = my_ffsl(t->thread_mask) - 1;
+
+ /* Beware: tasks that have never run don't have their ->list empty yet! */
+ MT_LIST_APPEND(&ha_thread_ctx[thr].shared_tasklet_list,
+ list_to_mt_list(&((struct tasklet *)t)->list));
+ _HA_ATOMIC_INC(&ha_thread_ctx[thr].rq_total);
+ _HA_ATOMIC_INC(&ha_thread_ctx[thr].tasks_in_list);
+ if (sleeping_thread_mask & (1UL << thr)) {
+ _HA_ATOMIC_AND(&sleeping_thread_mask, ~(1UL << thr));
+ wake_thread(thr);
+ }
+ return;
+ }
+ }
+}
+
+/* Equivalent of task_kill for tasklets. Mark the tasklet <t> for destruction.
+ * It will be deleted on the next scheduler invocation. This function is
+ * thread-safe : a thread can kill a tasklet of another thread.
+ */
+void tasklet_kill(struct tasklet *t)
+{
+ unsigned int state = t->state;
+ unsigned int thr;
+
+ BUG_ON(state & TASK_KILLED);
+
+ while (1) {
+ while (state & (TASK_IN_LIST)) {
+ /* Tasklet already in the list ready to be executed. Add
+ * the killed flag and wait for the process loop to
+ * detect it.
+ */
+ if (_HA_ATOMIC_CAS(&t->state, &state, state | TASK_KILLED))
+ return;
+ }
+
+ /* Mark the tasklet as killed and wake the thread to process it
+ * as soon as possible.
+ */
+ if (_HA_ATOMIC_CAS(&t->state, &state, state | TASK_IN_LIST | TASK_KILLED)) {
+ thr = t->tid >= 0 ? t->tid : tid;
+ MT_LIST_APPEND(&ha_thread_ctx[thr].shared_tasklet_list,
+ list_to_mt_list(&t->list));
+ _HA_ATOMIC_INC(&ha_thread_ctx[thr].rq_total);
+ if (sleeping_thread_mask & (1UL << thr)) {
+ _HA_ATOMIC_AND(&sleeping_thread_mask, ~(1UL << thr));
+ wake_thread(thr);
+ }
+ return;
+ }
+ }
+}
+
+/* Do not call this one, please use tasklet_wakeup_on() instead, as this one is
+ * the slow path of tasklet_wakeup_on() which performs some preliminary checks
+ * and sets TASK_IN_LIST before calling this one. A negative <thr> designates
+ * the current thread.
+ */
+void __tasklet_wakeup_on(struct tasklet *tl, int thr)
+{
+ if (likely(thr < 0)) {
+ /* this tasklet runs on the caller thread */
+ if (tl->state & TASK_HEAVY) {
+ LIST_APPEND(&th_ctx->tasklets[TL_HEAVY], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_HEAVY;
+ }
+ else if (tl->state & TASK_SELF_WAKING) {
+ LIST_APPEND(&th_ctx->tasklets[TL_BULK], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_BULK;
+ }
+ else if ((struct task *)tl == th_ctx->current) {
+ _HA_ATOMIC_OR(&tl->state, TASK_SELF_WAKING);
+ LIST_APPEND(&th_ctx->tasklets[TL_BULK], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_BULK;
+ }
+ else if (th_ctx->current_queue < 0) {
+ LIST_APPEND(&th_ctx->tasklets[TL_URGENT], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_URGENT;
+ }
+ else {
+ LIST_APPEND(&th_ctx->tasklets[th_ctx->current_queue], &tl->list);
+ th_ctx->tl_class_mask |= 1 << th_ctx->current_queue;
+ }
+ _HA_ATOMIC_INC(&th_ctx->rq_total);
+ } else {
+ /* this tasklet runs on a specific thread. */
+ MT_LIST_APPEND(&ha_thread_ctx[thr].shared_tasklet_list, list_to_mt_list(&tl->list));
+ _HA_ATOMIC_INC(&ha_thread_ctx[thr].rq_total);
+ if (sleeping_thread_mask & (1UL << thr)) {
+ _HA_ATOMIC_AND(&sleeping_thread_mask, ~(1UL << thr));
+ wake_thread(thr);
+ }
+ }
+}
+
+/* Do not call this one, please use tasklet_wakeup_after_on() instead, as this one is
+ * the slow path of tasklet_wakeup_after() which performs some preliminary checks
+ * and sets TASK_IN_LIST before calling this one.
+ */
+struct list *__tasklet_wakeup_after(struct list *head, struct tasklet *tl)
+{
+ BUG_ON(tid != tl->tid);
+ /* this tasklet runs on the caller thread */
+ if (!head) {
+ if (tl->state & TASK_HEAVY) {
+ LIST_INSERT(&th_ctx->tasklets[TL_HEAVY], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_HEAVY;
+ }
+ else if (tl->state & TASK_SELF_WAKING) {
+ LIST_INSERT(&th_ctx->tasklets[TL_BULK], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_BULK;
+ }
+ else if ((struct task *)tl == th_ctx->current) {
+ _HA_ATOMIC_OR(&tl->state, TASK_SELF_WAKING);
+ LIST_INSERT(&th_ctx->tasklets[TL_BULK], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_BULK;
+ }
+ else if (th_ctx->current_queue < 0) {
+ LIST_INSERT(&th_ctx->tasklets[TL_URGENT], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_URGENT;
+ }
+ else {
+ LIST_INSERT(&th_ctx->tasklets[th_ctx->current_queue], &tl->list);
+ th_ctx->tl_class_mask |= 1 << th_ctx->current_queue;
+ }
+ }
+ else {
+ LIST_APPEND(head, &tl->list);
+ }
+ _HA_ATOMIC_INC(&th_ctx->rq_total);
+ return &tl->list;
+}
+
+/* Puts the task <t> in run queue at a position depending on t->nice. <t> is
+ * returned. The nice value assigns boosts in 32th of the run queue size. A
+ * nice value of -1024 sets the task to -tasks_run_queue*32, while a nice value
+ * of 1024 sets the task to tasks_run_queue*32. The state flags are cleared, so
+ * the caller will have to set its flags after this call.
+ * The task must not already be in the run queue. If unsure, use the safer
+ * task_wakeup() function.
+ */
+void __task_wakeup(struct task *t)
+{
+ struct eb_root *root = &th_ctx->rqueue;
+
+#ifdef USE_THREAD
+ if (t->thread_mask != tid_bit && global.nbthread != 1) {
+ root = &rqueue;
+
+ _HA_ATOMIC_INC(&grq_total);
+ HA_SPIN_LOCK(TASK_RQ_LOCK, &rq_lock);
+
+ global_tasks_mask |= t->thread_mask;
+ t->rq.key = ++global_rqueue_ticks;
+ __ha_barrier_store();
+ } else
+#endif
+ {
+ _HA_ATOMIC_INC(&th_ctx->rq_total);
+ t->rq.key = ++th_ctx->rqueue_ticks;
+ }
+
+ if (likely(t->nice)) {
+ int offset;
+
+ _HA_ATOMIC_INC(&niced_tasks);
+ offset = t->nice * (int)global.tune.runqueue_depth;
+ t->rq.key += offset;
+ }
+
+ if (task_profiling_mask & tid_bit)
+ t->wake_date = now_mono_time();
+
+ eb32sc_insert(root, &t->rq, t->thread_mask);
+
+#ifdef USE_THREAD
+ if (root == &rqueue) {
+ _HA_ATOMIC_OR(&t->state, TASK_GLOBAL);
+ HA_SPIN_UNLOCK(TASK_RQ_LOCK, &rq_lock);
+
+ /* If all threads that are supposed to handle this task are sleeping,
+ * wake one.
+ */
+ if ((((t->thread_mask & all_threads_mask) & sleeping_thread_mask) ==
+ (t->thread_mask & all_threads_mask))) {
+ unsigned long m = (t->thread_mask & all_threads_mask) &~ tid_bit;
+
+ m = (m & (m - 1)) ^ m; // keep lowest bit set
+ _HA_ATOMIC_AND(&sleeping_thread_mask, ~m);
+ wake_thread(my_ffsl(m) - 1);
+ }
+ }
+#endif
+ return;
+}
+
+/*
+ * __task_queue()
+ *
+ * Inserts a task into wait queue <wq> at the position given by its expiration
+ * date. It does not matter if the task was already in the wait queue or not,
+ * as it will be unlinked. The task MUST NOT have an infinite expiration timer.
+ * Last, tasks must not be queued further than the end of the tree, which is
+ * between <now_ms> and <now_ms> + 2^31 ms (now+24days in 32bit).
+ *
+ * This function should not be used directly, it is meant to be called by the
+ * inline version of task_queue() which performs a few cheap preliminary tests
+ * before deciding to call __task_queue(). Moreover this function doesn't care
+ * at all about locking so the caller must be careful when deciding whether to
+ * lock or not around this call.
+ */
+void __task_queue(struct task *task, struct eb_root *wq)
+{
+#ifdef USE_THREAD
+ BUG_ON((wq == &timers && !(task->state & TASK_SHARED_WQ)) ||
+ (wq == &th_ctx->timers && (task->state & TASK_SHARED_WQ)) ||
+ (wq != &timers && wq != &th_ctx->timers));
+#endif
+ /* if this happens the process is doomed anyway, so better catch it now
+ * so that we have the caller in the stack.
+ */
+ BUG_ON(task->expire == TICK_ETERNITY);
+
+ if (likely(task_in_wq(task)))
+ __task_unlink_wq(task);
+
+ /* the task is not in the queue now */
+ task->wq.key = task->expire;
+#ifdef DEBUG_CHECK_INVALID_EXPIRATION_DATES
+ if (tick_is_lt(task->wq.key, now_ms))
+ /* we're queuing too far away or in the past (most likely) */
+ return;
+#endif
+
+ eb32_insert(wq, &task->wq);
+}
+
+/*
+ * Extract all expired timers from the timer queue, and wakes up all
+ * associated tasks.
+ */
+void wake_expired_tasks()
+{
+ struct thread_ctx * const tt = th_ctx; // thread's tasks
+ int max_processed = global.tune.runqueue_depth;
+ struct task *task;
+ struct eb32_node *eb;
+ __decl_thread(int key);
+
+ while (max_processed-- > 0) {
+ lookup_next_local:
+ eb = eb32_lookup_ge(&tt->timers, now_ms - TIMER_LOOK_BACK);
+ if (!eb) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now.
+ */
+ eb = eb32_first(&tt->timers);
+ if (likely(!eb))
+ break;
+ }
+
+ /* It is possible that this task was left at an earlier place in the
+ * tree because a recent call to task_queue() has not moved it. This
+ * happens when the new expiration date is later than the old one.
+ * Since it is very unlikely that we reach a timeout anyway, it's a
+ * lot cheaper to proceed like this because we almost never update
+ * the tree. We may also find disabled expiration dates there. Since
+ * we have detached the task from the tree, we simply call task_queue
+ * to take care of this. Note that we might occasionally requeue it at
+ * the same place, before <eb>, so we have to check if this happens,
+ * and adjust <eb>, otherwise we may skip it which is not what we want.
+ * We may also not requeue the task (and not point eb at it) if its
+ * expiration time is not set. We also make sure we leave the real
+ * expiration date for the next task in the queue so that when calling
+ * next_timer_expiry() we're guaranteed to see the next real date and
+ * not the next apparent date. This is in order to avoid useless
+ * wakeups.
+ */
+
+ task = eb32_entry(eb, struct task, wq);
+ if (tick_is_expired(task->expire, now_ms)) {
+ /* expired task, wake it up */
+ __task_unlink_wq(task);
+ task_wakeup(task, TASK_WOKEN_TIMER);
+ }
+ else if (task->expire != eb->key) {
+ /* task is not expired but its key doesn't match so let's
+ * update it and skip to next apparently expired task.
+ */
+ __task_unlink_wq(task);
+ if (tick_isset(task->expire))
+ __task_queue(task, &tt->timers);
+ }
+ else {
+ /* task not expired and correctly placed. It may not be eternal. */
+ BUG_ON(task->expire == TICK_ETERNITY);
+ break;
+ }
+ }
+
+#ifdef USE_THREAD
+ if (eb_is_empty(&timers))
+ goto leave;
+
+ HA_RWLOCK_RDLOCK(TASK_WQ_LOCK, &wq_lock);
+ eb = eb32_lookup_ge(&timers, now_ms - TIMER_LOOK_BACK);
+ if (!eb) {
+ eb = eb32_first(&timers);
+ if (likely(!eb)) {
+ HA_RWLOCK_RDUNLOCK(TASK_WQ_LOCK, &wq_lock);
+ goto leave;
+ }
+ }
+ key = eb->key;
+
+ if (tick_is_lt(now_ms, key)) {
+ HA_RWLOCK_RDUNLOCK(TASK_WQ_LOCK, &wq_lock);
+ goto leave;
+ }
+
+ /* There's really something of interest here, let's visit the queue */
+
+ if (HA_RWLOCK_TRYRDTOSK(TASK_WQ_LOCK, &wq_lock)) {
+ /* if we failed to grab the lock it means another thread is
+ * already doing the same here, so let it do the job.
+ */
+ HA_RWLOCK_RDUNLOCK(TASK_WQ_LOCK, &wq_lock);
+ goto leave;
+ }
+
+ while (1) {
+ lookup_next:
+ if (max_processed-- <= 0)
+ break;
+ eb = eb32_lookup_ge(&timers, now_ms - TIMER_LOOK_BACK);
+ if (!eb) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now.
+ */
+ eb = eb32_first(&timers);
+ if (likely(!eb))
+ break;
+ }
+
+ task = eb32_entry(eb, struct task, wq);
+
+ /* Check for any competing run of the task (quite rare but may
+ * involve a dangerous concurrent access on task->expire). In
+ * order to protect against this, we'll take an exclusive access
+ * on TASK_RUNNING before checking/touching task->expire. If the
+ * task is already RUNNING on another thread, it will deal by
+ * itself with the requeuing so we must not do anything and
+ * simply quit the loop for now, because we cannot wait with the
+ * WQ lock held as this would prevent the running thread from
+ * requeuing the task. One annoying effect of holding RUNNING
+ * here is that a concurrent task_wakeup() will refrain from
+ * waking it up. This forces us to check for a wakeup after
+ * releasing the flag.
+ */
+ if (HA_ATOMIC_FETCH_OR(&task->state, TASK_RUNNING) & TASK_RUNNING)
+ break;
+
+ if (tick_is_expired(task->expire, now_ms)) {
+ /* expired task, wake it up */
+ HA_RWLOCK_SKTOWR(TASK_WQ_LOCK, &wq_lock);
+ __task_unlink_wq(task);
+ HA_RWLOCK_WRTOSK(TASK_WQ_LOCK, &wq_lock);
+ task_drop_running(task, TASK_WOKEN_TIMER);
+ }
+ else if (task->expire != eb->key) {
+ /* task is not expired but its key doesn't match so let's
+ * update it and skip to next apparently expired task.
+ */
+ HA_RWLOCK_SKTOWR(TASK_WQ_LOCK, &wq_lock);
+ __task_unlink_wq(task);
+ if (tick_isset(task->expire))
+ __task_queue(task, &timers);
+ HA_RWLOCK_WRTOSK(TASK_WQ_LOCK, &wq_lock);
+ task_drop_running(task, 0);
+ goto lookup_next;
+ }
+ else {
+ /* task not expired and correctly placed. It may not be eternal. */
+ BUG_ON(task->expire == TICK_ETERNITY);
+ task_drop_running(task, 0);
+ break;
+ }
+ }
+
+ HA_RWLOCK_SKUNLOCK(TASK_WQ_LOCK, &wq_lock);
+#endif
+leave:
+ return;
+}
+
+/* Checks the next timer for the current thread by looking into its own timer
+ * list and the global one. It may return TICK_ETERNITY if no timer is present.
+ * Note that the next timer might very well be slightly in the past.
+ */
+int next_timer_expiry()
+{
+ struct thread_ctx * const tt = th_ctx; // thread's tasks
+ struct eb32_node *eb;
+ int ret = TICK_ETERNITY;
+ __decl_thread(int key = TICK_ETERNITY);
+
+ /* first check in the thread-local timers */
+ eb = eb32_lookup_ge(&tt->timers, now_ms - TIMER_LOOK_BACK);
+ if (!eb) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now.
+ */
+ eb = eb32_first(&tt->timers);
+ }
+
+ if (eb)
+ ret = eb->key;
+
+#ifdef USE_THREAD
+ if (!eb_is_empty(&timers)) {
+ HA_RWLOCK_RDLOCK(TASK_WQ_LOCK, &wq_lock);
+ eb = eb32_lookup_ge(&timers, now_ms - TIMER_LOOK_BACK);
+ if (!eb)
+ eb = eb32_first(&timers);
+ if (eb)
+ key = eb->key;
+ HA_RWLOCK_RDUNLOCK(TASK_WQ_LOCK, &wq_lock);
+ if (eb)
+ ret = tick_first(ret, key);
+ }
+#endif
+ return ret;
+}
+
+/* Walks over tasklet lists th_ctx->tasklets[0..TL_CLASSES-1] and run at most
+ * budget[TL_*] of them. Returns the number of entries effectively processed
+ * (tasks and tasklets merged). The count of tasks in the list for the current
+ * thread is adjusted.
+ */
+unsigned int run_tasks_from_lists(unsigned int budgets[])
+{
+ struct task *(*process)(struct task *t, void *ctx, unsigned int state);
+ struct list *tl_queues = th_ctx->tasklets;
+ struct task *t;
+ uint8_t budget_mask = (1 << TL_CLASSES) - 1;
+ struct sched_activity *profile_entry = NULL;
+ unsigned int done = 0;
+ unsigned int queue;
+ unsigned int state;
+ void *ctx;
+
+ for (queue = 0; queue < TL_CLASSES;) {
+ th_ctx->current_queue = queue;
+
+ /* global.tune.sched.low-latency is set */
+ if (global.tune.options & GTUNE_SCHED_LOW_LATENCY) {
+ if (unlikely(th_ctx->tl_class_mask & budget_mask & ((1 << queue) - 1))) {
+ /* a lower queue index has tasks again and still has a
+ * budget to run them. Let's switch to it now.
+ */
+ queue = (th_ctx->tl_class_mask & 1) ? 0 :
+ (th_ctx->tl_class_mask & 2) ? 1 : 2;
+ continue;
+ }
+
+ if (unlikely(queue > TL_URGENT &&
+ budget_mask & (1 << TL_URGENT) &&
+ !MT_LIST_ISEMPTY(&th_ctx->shared_tasklet_list))) {
+ /* an urgent tasklet arrived from another thread */
+ break;
+ }
+
+ if (unlikely(queue > TL_NORMAL &&
+ budget_mask & (1 << TL_NORMAL) &&
+ (!eb_is_empty(&th_ctx->rqueue) ||
+ (global_tasks_mask & tid_bit)))) {
+ /* a task was woken up by a bulk tasklet or another thread */
+ break;
+ }
+ }
+
+ if (LIST_ISEMPTY(&tl_queues[queue])) {
+ th_ctx->tl_class_mask &= ~(1 << queue);
+ queue++;
+ continue;
+ }
+
+ if (!budgets[queue]) {
+ budget_mask &= ~(1 << queue);
+ queue++;
+ continue;
+ }
+
+ budgets[queue]--;
+ activity[tid].ctxsw++;
+
+ t = (struct task *)LIST_ELEM(tl_queues[queue].n, struct tasklet *, list);
+ ctx = t->context;
+ process = t->process;
+ t->calls++;
+ th_ctx->current = t;
+ th_ctx->flags &= ~TH_FL_STUCK; // this thread is still running
+
+ _HA_ATOMIC_DEC(&th_ctx->rq_total);
+
+ if (t->state & TASK_F_TASKLET) {
+ LIST_DEL_INIT(&((struct tasklet *)t)->list);
+ __ha_barrier_store();
+
+ th_ctx->sched_wake_date = ((struct tasklet *)t)->wake_date;
+ if (th_ctx->sched_wake_date) {
+ uint32_t now_ns = now_mono_time();
+ uint32_t lat = now_ns - th_ctx->sched_wake_date;
+
+ ((struct tasklet *)t)->wake_date = 0;
+ th_ctx->sched_call_date = now_ns;
+ profile_entry = sched_activity_entry(sched_activity, t->process);
+ th_ctx->sched_profile_entry = profile_entry;
+ HA_ATOMIC_ADD(&profile_entry->lat_time, lat);
+ HA_ATOMIC_INC(&profile_entry->calls);
+ }
+
+ state = _HA_ATOMIC_FETCH_AND(&t->state, TASK_PERSISTENT);
+ __ha_barrier_atomic_store();
+
+ if (likely(!(state & TASK_KILLED))) {
+ process(t, ctx, state);
+ }
+ else {
+ done++;
+ th_ctx->current = NULL;
+ pool_free(pool_head_tasklet, t);
+ __ha_barrier_store();
+ continue;
+ }
+
+ if (th_ctx->sched_wake_date)
+ HA_ATOMIC_ADD(&profile_entry->cpu_time, (uint32_t)(now_mono_time() - th_ctx->sched_call_date));
+
+ done++;
+ th_ctx->current = NULL;
+ __ha_barrier_store();
+ continue;
+ }
+
+ LIST_DEL_INIT(&((struct tasklet *)t)->list);
+ __ha_barrier_store();
+
+ th_ctx->sched_wake_date = t->wake_date;
+ if (unlikely(t->wake_date)) {
+ uint32_t now_ns = now_mono_time();
+ uint32_t lat = now_ns - t->wake_date;
+
+ t->lat_time += lat;
+ t->wake_date = 0;
+ th_ctx->sched_call_date = now_ns;
+ profile_entry = sched_activity_entry(sched_activity, t->process);
+ th_ctx->sched_profile_entry = profile_entry;
+ HA_ATOMIC_ADD(&profile_entry->lat_time, lat);
+ HA_ATOMIC_INC(&profile_entry->calls);
+ }
+
+ __ha_barrier_store();
+
+ /* We must be the exclusive owner of the TASK_RUNNING bit, and
+ * have to be careful that the task is not being manipulated on
+ * another thread finding it expired in wake_expired_tasks().
+ * The TASK_RUNNING bit will be set during these operations,
+ * they are extremely rare and do not last long so the best to
+ * do here is to wait.
+ */
+ state = _HA_ATOMIC_LOAD(&t->state);
+ do {
+ while (unlikely(state & TASK_RUNNING)) {
+ __ha_cpu_relax();
+ state = _HA_ATOMIC_LOAD(&t->state);
+ }
+ } while (!_HA_ATOMIC_CAS(&t->state, &state, (state & TASK_PERSISTENT) | TASK_RUNNING));
+
+ __ha_barrier_atomic_store();
+
+ /* OK then this is a regular task */
+
+ _HA_ATOMIC_DEC(&ha_thread_ctx[tid].tasks_in_list);
+
+ /* Note for below: if TASK_KILLED arrived before we've read the state, we
+ * directly free the task. Otherwise it will be seen after processing and
+ * it's freed on the exit path.
+ */
+ if (likely(!(state & TASK_KILLED) && process == process_stream))
+ t = process_stream(t, ctx, state);
+ else if (!(state & TASK_KILLED) && process != NULL)
+ t = process(t, ctx, state);
+ else {
+ task_unlink_wq(t);
+ __task_free(t);
+ th_ctx->current = NULL;
+ __ha_barrier_store();
+ /* We don't want max_processed to be decremented if
+ * we're just freeing a destroyed task, we should only
+ * do so if we really ran a task.
+ */
+ continue;
+ }
+ th_ctx->current = NULL;
+ __ha_barrier_store();
+
+ /* stats are only registered for non-zero wake dates */
+ if (unlikely(th_ctx->sched_wake_date)) {
+ uint32_t cpu = (uint32_t)now_mono_time() - th_ctx->sched_call_date;
+
+ if (t)
+ t->cpu_time += cpu;
+ HA_ATOMIC_ADD(&profile_entry->cpu_time, cpu);
+ }
+
+ /* If there is a pending state we have to wake up the task
+ * immediately, else we defer it into wait queue
+ */
+ if (t != NULL) {
+ state = _HA_ATOMIC_LOAD(&t->state);
+ if (unlikely(state & TASK_KILLED)) {
+ task_unlink_wq(t);
+ __task_free(t);
+ }
+ else {
+ task_queue(t);
+ task_drop_running(t, 0);
+ }
+ }
+ done++;
+ }
+ th_ctx->current_queue = -1;
+
+ return done;
+}
+
+/* The run queue is chronologically sorted in a tree. An insertion counter is
+ * used to assign a position to each task. This counter may be combined with
+ * other variables (eg: nice value) to set the final position in the tree. The
+ * counter may wrap without a problem, of course. We then limit the number of
+ * tasks processed to 200 in any case, so that general latency remains low and
+ * so that task positions have a chance to be considered. The function scans
+ * both the global and local run queues and picks the most urgent task between
+ * the two. We need to grab the global runqueue lock to touch it so it's taken
+ * on the very first access to the global run queue and is released as soon as
+ * it reaches the end.
+ *
+ * The function adjusts <next> if a new event is closer.
+ */
+void process_runnable_tasks()
+{
+ struct thread_ctx * const tt = th_ctx;
+ struct eb32sc_node *lrq; // next local run queue entry
+ struct eb32sc_node *grq; // next global run queue entry
+ struct task *t;
+ const unsigned int default_weights[TL_CLASSES] = {
+ [TL_URGENT] = 64, // ~50% of CPU bandwidth for I/O
+ [TL_NORMAL] = 48, // ~37% of CPU bandwidth for tasks
+ [TL_BULK] = 16, // ~13% of CPU bandwidth for self-wakers
+ [TL_HEAVY] = 1, // never more than 1 heavy task at once
+ };
+ unsigned int max[TL_CLASSES]; // max to be run per class
+ unsigned int max_total; // sum of max above
+ struct mt_list *tmp_list;
+ unsigned int queue;
+ int max_processed;
+ int lpicked, gpicked;
+ int heavy_queued = 0;
+ int budget;
+
+ th_ctx->flags &= ~TH_FL_STUCK; // this thread is still running
+
+ if (!thread_has_tasks()) {
+ activity[tid].empty_rq++;
+ return;
+ }
+
+ max_processed = global.tune.runqueue_depth;
+
+ if (likely(niced_tasks))
+ max_processed = (max_processed + 3) / 4;
+
+ if (max_processed < th_ctx->rq_total && th_ctx->rq_total <= 2*max_processed) {
+ /* If the run queue exceeds the budget by up to 50%, let's cut it
+ * into two identical halves to improve latency.
+ */
+ max_processed = th_ctx->rq_total / 2;
+ }
+
+ not_done_yet:
+ max[TL_URGENT] = max[TL_NORMAL] = max[TL_BULK] = 0;
+
+ /* urgent tasklets list gets a default weight of ~50% */
+ if ((tt->tl_class_mask & (1 << TL_URGENT)) ||
+ !MT_LIST_ISEMPTY(&tt->shared_tasklet_list))
+ max[TL_URGENT] = default_weights[TL_URGENT];
+
+ /* normal tasklets list gets a default weight of ~37% */
+ if ((tt->tl_class_mask & (1 << TL_NORMAL)) ||
+ !eb_is_empty(&th_ctx->rqueue) || (global_tasks_mask & tid_bit))
+ max[TL_NORMAL] = default_weights[TL_NORMAL];
+
+ /* bulk tasklets list gets a default weight of ~13% */
+ if ((tt->tl_class_mask & (1 << TL_BULK)))
+ max[TL_BULK] = default_weights[TL_BULK];
+
+ /* heavy tasks are processed only once and never refilled in a
+ * call round. That budget is not lost either as we don't reset
+ * it unless consumed.
+ */
+ if (!heavy_queued) {
+ if ((tt->tl_class_mask & (1 << TL_HEAVY)))
+ max[TL_HEAVY] = default_weights[TL_HEAVY];
+ else
+ max[TL_HEAVY] = 0;
+ heavy_queued = 1;
+ }
+
+ /* Now compute a fair share of the weights. Total may slightly exceed
+ * 100% due to rounding, this is not a problem. Note that while in
+ * theory the sum cannot be NULL as we cannot get there without tasklets
+ * to process, in practice it seldom happens when multiple writers
+ * conflict and rollback on MT_LIST_TRY_APPEND(shared_tasklet_list), causing
+ * a first MT_LIST_ISEMPTY() to succeed for thread_has_task() and the
+ * one above to finally fail. This is extremely rare and not a problem.
+ */
+ max_total = max[TL_URGENT] + max[TL_NORMAL] + max[TL_BULK] + max[TL_HEAVY];
+ if (!max_total)
+ goto leave;
+
+ for (queue = 0; queue < TL_CLASSES; queue++)
+ max[queue] = ((unsigned)max_processed * max[queue] + max_total - 1) / max_total;
+
+ /* The heavy queue must never process more than very few tasks at once
+ * anyway. We set the limit to 1 if running on low_latency scheduling,
+ * given that we know that other values can have an impact on latency
+ * (~500us end-to-end connection achieved at 130kcps in SSL), 1 + one
+ * per 1024 tasks if there is at least one non-heavy task while still
+ * respecting the ratios above, or 1 + one per 128 tasks if only heavy
+ * tasks are present. This allows to drain excess SSL handshakes more
+ * efficiently if the queue becomes congested.
+ */
+ if (max[TL_HEAVY] > 1) {
+ if (global.tune.options & GTUNE_SCHED_LOW_LATENCY)
+ budget = 1;
+ else if (tt->tl_class_mask & ~(1 << TL_HEAVY))
+ budget = 1 + tt->rq_total / 1024;
+ else
+ budget = 1 + tt->rq_total / 128;
+
+ if (max[TL_HEAVY] > budget)
+ max[TL_HEAVY] = budget;
+ }
+
+ lrq = grq = NULL;
+
+ /* pick up to max[TL_NORMAL] regular tasks from prio-ordered run queues */
+ /* Note: the grq lock is always held when grq is not null */
+ lpicked = gpicked = 0;
+ budget = max[TL_NORMAL] - tt->tasks_in_list;
+ while (lpicked + gpicked < budget) {
+ if ((global_tasks_mask & tid_bit) && !grq) {
+#ifdef USE_THREAD
+ HA_SPIN_LOCK(TASK_RQ_LOCK, &rq_lock);
+ grq = eb32sc_lookup_ge(&rqueue, global_rqueue_ticks - TIMER_LOOK_BACK, tid_bit);
+ if (unlikely(!grq)) {
+ grq = eb32sc_first(&rqueue, tid_bit);
+ if (!grq) {
+ global_tasks_mask &= ~tid_bit;
+ HA_SPIN_UNLOCK(TASK_RQ_LOCK, &rq_lock);
+ }
+ }
+#endif
+ }
+
+ /* If a global task is available for this thread, it's in grq
+ * now and the global RQ is locked.
+ */
+
+ if (!lrq) {
+ lrq = eb32sc_lookup_ge(&tt->rqueue, tt->rqueue_ticks - TIMER_LOOK_BACK, tid_bit);
+ if (unlikely(!lrq))
+ lrq = eb32sc_first(&tt->rqueue, tid_bit);
+ }
+
+ if (!lrq && !grq)
+ break;
+
+ if (likely(!grq || (lrq && (int)(lrq->key - grq->key) <= 0))) {
+ t = eb32sc_entry(lrq, struct task, rq);
+ lrq = eb32sc_next(lrq, tid_bit);
+ eb32sc_delete(&t->rq);
+ lpicked++;
+ }
+#ifdef USE_THREAD
+ else {
+ t = eb32sc_entry(grq, struct task, rq);
+ grq = eb32sc_next(grq, tid_bit);
+ _HA_ATOMIC_AND(&t->state, ~TASK_GLOBAL);
+ eb32sc_delete(&t->rq);
+
+ if (unlikely(!grq)) {
+ grq = eb32sc_first(&rqueue, tid_bit);
+ if (!grq) {
+ global_tasks_mask &= ~tid_bit;
+ HA_SPIN_UNLOCK(TASK_RQ_LOCK, &rq_lock);
+ }
+ }
+ gpicked++;
+ }
+#endif
+ if (t->nice)
+ _HA_ATOMIC_DEC(&niced_tasks);
+
+ /* Add it to the local task list */
+ LIST_APPEND(&tt->tasklets[TL_NORMAL], &((struct tasklet *)t)->list);
+ }
+
+ /* release the rqueue lock */
+ if (grq) {
+ HA_SPIN_UNLOCK(TASK_RQ_LOCK, &rq_lock);
+ grq = NULL;
+ }
+
+ if (lpicked + gpicked) {
+ tt->tl_class_mask |= 1 << TL_NORMAL;
+ _HA_ATOMIC_ADD(&tt->tasks_in_list, lpicked + gpicked);
+#ifdef USE_THREAD
+ if (gpicked) {
+ _HA_ATOMIC_SUB(&grq_total, gpicked);
+ _HA_ATOMIC_ADD(&tt->rq_total, gpicked);
+ }
+#endif
+ activity[tid].tasksw += lpicked + gpicked;
+ }
+
+ /* Merge the list of tasklets waken up by other threads to the
+ * main list.
+ */
+ tmp_list = MT_LIST_BEHEAD(&tt->shared_tasklet_list);
+ if (tmp_list) {
+ LIST_SPLICE_END_DETACHED(&tt->tasklets[TL_URGENT], (struct list *)tmp_list);
+ if (!LIST_ISEMPTY(&tt->tasklets[TL_URGENT]))
+ tt->tl_class_mask |= 1 << TL_URGENT;
+ }
+
+ /* execute tasklets in each queue */
+ max_processed -= run_tasks_from_lists(max);
+
+ /* some tasks may have woken other ones up */
+ if (max_processed > 0 && thread_has_tasks())
+ goto not_done_yet;
+
+ leave:
+ if (tt->tl_class_mask)
+ activity[tid].long_rq++;
+}
+
+/*
+ * Delete every tasks before running the master polling loop
+ */
+void mworker_cleantasks()
+{
+ struct task *t;
+ int i;
+ struct eb32_node *tmp_wq = NULL;
+ struct eb32sc_node *tmp_rq = NULL;
+
+#ifdef USE_THREAD
+ /* cleanup the global run queue */
+ tmp_rq = eb32sc_first(&rqueue, MAX_THREADS_MASK);
+ while (tmp_rq) {
+ t = eb32sc_entry(tmp_rq, struct task, rq);
+ tmp_rq = eb32sc_next(tmp_rq, MAX_THREADS_MASK);
+ task_destroy(t);
+ }
+ /* cleanup the timers queue */
+ tmp_wq = eb32_first(&timers);
+ while (tmp_wq) {
+ t = eb32_entry(tmp_wq, struct task, wq);
+ tmp_wq = eb32_next(tmp_wq);
+ task_destroy(t);
+ }
+#endif
+ /* clean the per thread run queue */
+ for (i = 0; i < global.nbthread; i++) {
+ tmp_rq = eb32sc_first(&ha_thread_ctx[i].rqueue, MAX_THREADS_MASK);
+ while (tmp_rq) {
+ t = eb32sc_entry(tmp_rq, struct task, rq);
+ tmp_rq = eb32sc_next(tmp_rq, MAX_THREADS_MASK);
+ task_destroy(t);
+ }
+ /* cleanup the per thread timers queue */
+ tmp_wq = eb32_first(&ha_thread_ctx[i].timers);
+ while (tmp_wq) {
+ t = eb32_entry(tmp_wq, struct task, wq);
+ tmp_wq = eb32_next(tmp_wq);
+ task_destroy(t);
+ }
+ }
+}
+
+/* perform minimal intializations */
+static void init_task()
+{
+ int i, q;
+
+#ifdef USE_THREAD
+ memset(&timers, 0, sizeof(timers));
+ memset(&rqueue, 0, sizeof(rqueue));
+#endif
+ for (i = 0; i < MAX_THREADS; i++) {
+ for (q = 0; q < TL_CLASSES; q++)
+ LIST_INIT(&ha_thread_ctx[i].tasklets[q]);
+ MT_LIST_INIT(&ha_thread_ctx[i].shared_tasklet_list);
+ }
+}
+
+/* config parser for global "tune.sched.low-latency", accepts "on" or "off" */
+static int cfg_parse_tune_sched_low_latency(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ global.tune.options |= GTUNE_SCHED_LOW_LATENCY;
+ else if (strcmp(args[1], "off") == 0)
+ global.tune.options &= ~GTUNE_SCHED_LOW_LATENCY;
+ else {
+ memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.sched.low-latency", cfg_parse_tune_sched_low_latency },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+INITCALL0(STG_PREPARE, init_task);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/tcp_act.c b/src/tcp_act.c
new file mode 100644
index 0000000..f31c9c3
--- /dev/null
+++ b/src/tcp_act.c
@@ -0,0 +1,572 @@
+/*
+ * AF_INET/AF_INET6 SOCK_STREAM protocol layer (tcp)
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+
+#include <haproxy/action-t.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/channel.h>
+#include <haproxy/connection.h>
+#include <haproxy/global.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/session.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/tools.h>
+
+/*
+ * Execute the "set-src" action. May be called from {tcp,http}request.
+ * It only changes the address and tries to preserve the original port. If the
+ * previous family was neither AF_INET nor AF_INET6, the port is set to zero.
+ */
+static enum act_return tcp_action_req_set_src(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct connection *cli_conn;
+ struct sockaddr_storage *src;
+ struct sample *smp;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON:
+ cli_conn = objt_conn(sess->origin);
+ if (!cli_conn || !conn_get_src(cli_conn))
+ goto end;
+ src = cli_conn->src;
+ break;
+
+ case ACT_F_TCP_REQ_SES:
+ if (!sess_get_src(sess))
+ goto end;
+ src = sess->src;
+ break;
+
+ case ACT_F_TCP_REQ_CNT:
+ case ACT_F_HTTP_REQ:
+ if (!sc_get_src(s->scf))
+ goto end;
+ src = s->scf->src;
+ break;
+
+ default:
+ goto end;
+ }
+
+ smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_ADDR);
+ if (smp) {
+ int port = get_net_port(src);
+
+ if (smp->data.type == SMP_T_IPV4) {
+ ((struct sockaddr_in *)src)->sin_family = AF_INET;
+ ((struct sockaddr_in *)src)->sin_addr.s_addr = smp->data.u.ipv4.s_addr;
+ ((struct sockaddr_in *)src)->sin_port = port;
+ } else if (smp->data.type == SMP_T_IPV6) {
+ ((struct sockaddr_in6 *)src)->sin6_family = AF_INET6;
+ memcpy(&((struct sockaddr_in6 *)src)->sin6_addr, &smp->data.u.ipv6, sizeof(struct in6_addr));
+ ((struct sockaddr_in6 *)src)->sin6_port = port;
+ }
+ }
+
+ end:
+ return ACT_RET_CONT;
+}
+
+/*
+ * Execute the "set-dst" action. May be called from {tcp,http}request.
+ * It only changes the address and tries to preserve the original port. If the
+ * previous family was neither AF_INET nor AF_INET6, the port is set to zero.
+ */
+static enum act_return tcp_action_req_set_dst(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct connection *cli_conn;
+ struct sockaddr_storage *dst;
+ struct sample *smp;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON:
+ cli_conn = objt_conn(sess->origin);
+ if (!cli_conn || !conn_get_dst(cli_conn))
+ goto end;
+ dst = cli_conn->dst;
+ break;
+
+ case ACT_F_TCP_REQ_SES:
+ if (!sess_get_dst(sess))
+ goto end;
+ dst = sess->dst;
+ break;
+
+ case ACT_F_TCP_REQ_CNT:
+ case ACT_F_HTTP_REQ:
+ if (!sc_get_dst(s->scf))
+ goto end;
+ dst = s->scf->dst;
+ break;
+
+ default:
+ goto end;
+ }
+
+ smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_ADDR);
+ if (smp) {
+ int port = get_net_port(dst);
+
+ if (smp->data.type == SMP_T_IPV4) {
+ ((struct sockaddr_in *)dst)->sin_family = AF_INET;
+ ((struct sockaddr_in *)dst)->sin_addr.s_addr = smp->data.u.ipv4.s_addr;
+ ((struct sockaddr_in *)dst)->sin_port = port;
+ } else if (smp->data.type == SMP_T_IPV6) {
+ ((struct sockaddr_in6 *)dst)->sin6_family = AF_INET6;
+ memcpy(&((struct sockaddr_in6 *)dst)->sin6_addr, &smp->data.u.ipv6, sizeof(struct in6_addr));
+ ((struct sockaddr_in6 *)dst)->sin6_port = port;
+ }
+ }
+
+ end:
+ return ACT_RET_CONT;
+}
+
+/*
+ * Execute the "set-src-port" action. May be called from {tcp,http}request.
+ * We must test the sin_family before setting the port. If the address family
+ * is neither AF_INET nor AF_INET6, the address is forced to AF_INET "0.0.0.0"
+ * and the port is assigned.
+ */
+static enum act_return tcp_action_req_set_src_port(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct connection *cli_conn;
+ struct sockaddr_storage *src;
+ struct sample *smp;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON:
+ cli_conn = objt_conn(sess->origin);
+ if (!cli_conn || !conn_get_src(cli_conn))
+ goto end;
+ src = cli_conn->src;
+ break;
+
+ case ACT_F_TCP_REQ_SES:
+ if (!sess_get_src(sess))
+ goto end;
+ src = sess->src;
+ break;
+
+ case ACT_F_TCP_REQ_CNT:
+ case ACT_F_HTTP_REQ:
+ if (!sc_get_src(s->scf))
+ goto end;
+ src = s->scf->src;
+ break;
+
+ default:
+ goto end;
+ }
+
+ smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
+ if (smp) {
+ if (src->ss_family == AF_INET6) {
+ ((struct sockaddr_in6 *)src)->sin6_port = htons(smp->data.u.sint);
+ } else {
+ if (src->ss_family != AF_INET) {
+ src->ss_family = AF_INET;
+ ((struct sockaddr_in *)src)->sin_addr.s_addr = 0;
+ }
+ ((struct sockaddr_in *)src)->sin_port = htons(smp->data.u.sint);
+ }
+ }
+
+ end:
+ return ACT_RET_CONT;
+}
+
+/*
+ * Execute the "set-dst-port" action. May be called from {tcp,http}request.
+ * We must test the sin_family before setting the port. If the address family
+ * is neither AF_INET nor AF_INET6, the address is forced to AF_INET "0.0.0.0"
+ * and the port is assigned.
+ */
+static enum act_return tcp_action_req_set_dst_port(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct connection *cli_conn;
+ struct sockaddr_storage *dst;
+ struct sample *smp;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON:
+ cli_conn = objt_conn(sess->origin);
+ if (!cli_conn || !conn_get_dst(cli_conn))
+ goto end;
+ dst = cli_conn->dst;
+ break;
+
+ case ACT_F_TCP_REQ_SES:
+ if (!sess_get_dst(sess))
+ goto end;
+ dst = sess->dst;
+ break;
+
+ case ACT_F_TCP_REQ_CNT:
+ case ACT_F_HTTP_REQ:
+ if (!sc_get_dst(s->scf))
+ goto end;
+ dst = s->scf->dst;
+ break;
+
+ default:
+ goto end;
+ }
+
+ smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
+ if (smp) {
+ if (dst->ss_family == AF_INET6) {
+ ((struct sockaddr_in6 *)dst)->sin6_port = htons(smp->data.u.sint);
+ } else {
+ if (dst->ss_family != AF_INET) {
+ dst->ss_family = AF_INET;
+ ((struct sockaddr_in *)dst)->sin_addr.s_addr = 0;
+ }
+ ((struct sockaddr_in *)dst)->sin_port = htons(smp->data.u.sint);
+ }
+ }
+
+ end:
+ return ACT_RET_CONT;
+}
+
+/* Executes the "silent-drop" action. May be called from {tcp,http}{request,response} */
+static enum act_return tcp_exec_action_silent_drop(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *strm, int flags)
+{
+ struct connection *conn = objt_conn(sess->origin);
+
+ if (!conn)
+ goto out;
+
+ if (!conn_ctrl_ready(conn))
+ goto out;
+
+#ifdef TCP_QUICKACK
+ /* drain is needed only to send the quick ACK */
+ conn_ctrl_drain(conn);
+
+ /* re-enable quickack if it was disabled to ack all data and avoid
+ * retransmits from the client that might trigger a real reset.
+ */
+ setsockopt(conn->handle.fd, IPPROTO_TCP, TCP_QUICKACK, &one, sizeof(one));
+#endif
+ /* lingering must absolutely be disabled so that we don't send a
+ * shutdown(), this is critical to the TCP_REPAIR trick. When no stream
+ * is present, returning with ERR will cause lingering to be disabled.
+ */
+ if (strm)
+ strm->scf->flags |= SC_FL_NOLINGER;
+
+ if (conn->flags & CO_FL_FDLESS)
+ goto out;
+
+ /* We're on the client-facing side, we must force to disable lingering to
+ * ensure we will use an RST exclusively and kill any pending data.
+ */
+ HA_ATOMIC_OR(&fdtab[conn->handle.fd].state, FD_LINGER_RISK);
+
+#ifdef TCP_REPAIR
+ if (setsockopt(conn->handle.fd, IPPROTO_TCP, TCP_REPAIR, &one, sizeof(one)) == 0) {
+ /* socket will be quiet now */
+ goto out;
+ }
+#endif
+ /* either TCP_REPAIR is not defined or it failed (eg: permissions).
+ * Let's fall back on the TTL trick, though it only works for routed
+ * network and has no effect on local net.
+ */
+#ifdef IP_TTL
+ if (conn->src && conn->src->ss_family == AF_INET)
+ setsockopt(conn->handle.fd, IPPROTO_IP, IP_TTL, &one, sizeof(one));
+#endif
+#ifdef IPV6_UNICAST_HOPS
+ if (conn->src && conn->src->ss_family == AF_INET6)
+ setsockopt(conn->handle.fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &one, sizeof(one));
+#endif
+ out:
+ /* kill the stream if any */
+ if (strm) {
+ channel_abort(&strm->req);
+ channel_abort(&strm->res);
+ strm->req.analysers &= AN_REQ_FLT_END;
+ strm->res.analysers &= AN_RES_FLT_END;
+ if (strm->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&strm->be->be_counters.denied_req);
+ if (!(strm->flags & SF_ERR_MASK))
+ strm->flags |= SF_ERR_PRXCOND;
+ if (!(strm->flags & SF_FINST_MASK))
+ strm->flags |= SF_FINST_R;
+ }
+
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_req);
+
+ return ACT_RET_ABRT;
+}
+
+
+#if defined(SO_MARK) || defined(SO_USER_COOKIE) || defined(SO_RTABLE)
+static enum act_return tcp_action_set_mark(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ conn_set_mark(objt_conn(sess->origin), (uintptr_t)rule->arg.act.p[0]);
+ return ACT_RET_CONT;
+}
+#endif
+
+#ifdef IP_TOS
+static enum act_return tcp_action_set_tos(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ conn_set_tos(objt_conn(sess->origin), (uintptr_t)rule->arg.act.p[0]);
+ return ACT_RET_CONT;
+}
+#endif
+
+/*
+ * Release the sample expr when releasing a set src/dst action
+ */
+static void release_set_src_dst_action(struct act_rule *rule)
+{
+ release_sample_expr(rule->arg.expr);
+}
+
+/* parse "set-{src,dst}[-port]" action */
+static enum act_parse_ret tcp_parse_set_src_dst(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg;
+ struct sample_expr *expr;
+ unsigned int where;
+
+ cur_arg = *orig_arg;
+ expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args, NULL);
+ if (!expr)
+ return ACT_RET_PRS_ERR;
+
+ where = 0;
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+
+ if (!(expr->fetch->val & where)) {
+ memprintf(err,
+ "fetch method '%s' extracts information from '%s', none of which is available here",
+ args[cur_arg-1], sample_src_names(expr->fetch->use));
+ free(expr);
+ return ACT_RET_PRS_ERR;
+ }
+ rule->arg.expr = expr;
+ rule->action = ACT_CUSTOM;
+
+ if (strcmp(args[*orig_arg - 1], "set-src") == 0) {
+ rule->action_ptr = tcp_action_req_set_src;
+ } else if (strcmp(args[*orig_arg - 1], "set-src-port") == 0) {
+ rule->action_ptr = tcp_action_req_set_src_port;
+ } else if (strcmp(args[*orig_arg - 1], "set-dst") == 0) {
+ rule->action_ptr = tcp_action_req_set_dst;
+ } else if (strcmp(args[*orig_arg - 1], "set-dst-port") == 0) {
+ rule->action_ptr = tcp_action_req_set_dst_port;
+ } else {
+ return ACT_RET_PRS_ERR;
+ }
+
+ rule->release_ptr = release_set_src_dst_action;
+ (*orig_arg)++;
+
+ return ACT_RET_PRS_OK;
+}
+
+
+/* Parse a "set-mark" action. It takes the MARK value as argument. It returns
+ * ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret tcp_parse_set_mark(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+#if defined(SO_MARK) || defined(SO_USER_COOKIE) || defined(SO_RTABLE)
+ char *endp;
+ unsigned int mark;
+
+ if (!*args[*cur_arg]) {
+ memprintf(err, "expects exactly 1 argument (integer/hex value)");
+ return ACT_RET_PRS_ERR;
+ }
+ mark = strtoul(args[*cur_arg], &endp, 0);
+ if (endp && *endp != '\0') {
+ memprintf(err, "invalid character starting at '%s' (integer/hex value expected)", endp);
+ return ACT_RET_PRS_ERR;
+ }
+
+ (*cur_arg)++;
+
+ /* Register processing function. */
+ rule->action_ptr = tcp_action_set_mark;
+ rule->action = ACT_CUSTOM;
+ rule->arg.act.p[0] = (void *)(uintptr_t)mark;
+ global.last_checks |= LSTCHK_NETADM;
+ return ACT_RET_PRS_OK;
+#else
+ memprintf(err, "not supported on this platform (SO_MARK|SO_USER_COOKIE|SO_RTABLE undefined)");
+ return ACT_RET_PRS_ERR;
+#endif
+}
+
+
+/* Parse a "set-tos" action. It takes the TOS value as argument. It returns
+ * ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret tcp_parse_set_tos(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+#ifdef IP_TOS
+ char *endp;
+ int tos;
+
+ if (!*args[*cur_arg]) {
+ memprintf(err, "expects exactly 1 argument (integer/hex value)");
+ return ACT_RET_PRS_ERR;
+ }
+ tos = strtol(args[*cur_arg], &endp, 0);
+ if (endp && *endp != '\0') {
+ memprintf(err, "invalid character starting at '%s' (integer/hex value expected)", endp);
+ return ACT_RET_PRS_ERR;
+ }
+
+ (*cur_arg)++;
+
+ /* Register processing function. */
+ rule->action_ptr = tcp_action_set_tos;
+ rule->action = ACT_CUSTOM;
+ rule->arg.act.p[0] = (void *)(uintptr_t)tos;
+ return ACT_RET_PRS_OK;
+#else
+ memprintf(err, "not supported on this platform (IP_TOS undefined)");
+ return ACT_RET_PRS_ERR;
+#endif
+}
+
+
+/* Parse a "silent-drop" action. It takes no argument. It returns ACT_RET_PRS_OK on
+ * success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret tcp_parse_silent_drop(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = tcp_exec_action_silent_drop;
+ return ACT_RET_PRS_OK;
+}
+
+
+static struct action_kw_list tcp_req_conn_actions = {ILH, {
+ { "set-dst" , tcp_parse_set_src_dst },
+ { "set-dst-port", tcp_parse_set_src_dst },
+ { "set-mark", tcp_parse_set_mark },
+ { "set-src", tcp_parse_set_src_dst },
+ { "set-src-port", tcp_parse_set_src_dst },
+ { "set-tos", tcp_parse_set_tos },
+ { "silent-drop", tcp_parse_silent_drop },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_conn_keywords_register, &tcp_req_conn_actions);
+
+static struct action_kw_list tcp_req_sess_actions = {ILH, {
+ { "set-dst" , tcp_parse_set_src_dst },
+ { "set-dst-port", tcp_parse_set_src_dst },
+ { "set-mark", tcp_parse_set_mark },
+ { "set-src", tcp_parse_set_src_dst },
+ { "set-src-port", tcp_parse_set_src_dst },
+ { "set-tos", tcp_parse_set_tos },
+ { "silent-drop", tcp_parse_silent_drop },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_sess_keywords_register, &tcp_req_sess_actions);
+
+static struct action_kw_list tcp_req_cont_actions = {ILH, {
+ { "set-src", tcp_parse_set_src_dst },
+ { "set-src-port", tcp_parse_set_src_dst },
+ { "set-dst" , tcp_parse_set_src_dst },
+ { "set-dst-port", tcp_parse_set_src_dst },
+ { "set-mark", tcp_parse_set_mark },
+ { "set-tos", tcp_parse_set_tos },
+ { "silent-drop", tcp_parse_silent_drop },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_req_cont_actions);
+
+static struct action_kw_list tcp_res_cont_actions = {ILH, {
+ { "set-mark", tcp_parse_set_mark },
+ { "set-tos", tcp_parse_set_tos },
+ { "silent-drop", tcp_parse_silent_drop },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_res_cont_keywords_register, &tcp_res_cont_actions);
+
+static struct action_kw_list http_req_actions = {ILH, {
+ { "set-dst", tcp_parse_set_src_dst },
+ { "set-dst-port", tcp_parse_set_src_dst },
+ { "set-mark", tcp_parse_set_mark },
+ { "set-src", tcp_parse_set_src_dst },
+ { "set-src-port", tcp_parse_set_src_dst },
+ { "set-tos", tcp_parse_set_tos },
+ { "silent-drop", tcp_parse_silent_drop },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_actions);
+
+static struct action_kw_list http_res_actions = {ILH, {
+ { "set-mark", tcp_parse_set_mark },
+ { "set-tos", tcp_parse_set_tos },
+ { "silent-drop", tcp_parse_silent_drop },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_actions);
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/tcp_rules.c b/src/tcp_rules.c
new file mode 100644
index 0000000..e649794
--- /dev/null
+++ b/src/tcp_rules.c
@@ -0,0 +1,1428 @@
+/*
+ * "tcp" rules processing
+ *
+ * Copyright 2000-2016 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+#include <haproxy/acl.h>
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/arg-t.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/connection.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/stream-t.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/ticks.h>
+#include <haproxy/tools.h>
+#include <haproxy/trace.h>
+
+
+#define TRACE_SOURCE &trace_strm
+
+/* List head of all known action keywords for "tcp-request connection" */
+struct list tcp_req_conn_keywords = LIST_HEAD_INIT(tcp_req_conn_keywords);
+struct list tcp_req_sess_keywords = LIST_HEAD_INIT(tcp_req_sess_keywords);
+struct list tcp_req_cont_keywords = LIST_HEAD_INIT(tcp_req_cont_keywords);
+struct list tcp_res_cont_keywords = LIST_HEAD_INIT(tcp_res_cont_keywords);
+
+/*
+ * Register keywords.
+ */
+void tcp_req_conn_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&tcp_req_conn_keywords, &kw_list->list);
+}
+
+void tcp_req_sess_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&tcp_req_sess_keywords, &kw_list->list);
+}
+
+void tcp_req_cont_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&tcp_req_cont_keywords, &kw_list->list);
+}
+
+void tcp_res_cont_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&tcp_res_cont_keywords, &kw_list->list);
+}
+
+/*
+ * Return the struct tcp_req_action_kw associated to a keyword.
+ */
+struct action_kw *tcp_req_conn_action(const char *kw)
+{
+ return action_lookup(&tcp_req_conn_keywords, kw);
+}
+
+struct action_kw *tcp_req_sess_action(const char *kw)
+{
+ return action_lookup(&tcp_req_sess_keywords, kw);
+}
+
+struct action_kw *tcp_req_cont_action(const char *kw)
+{
+ return action_lookup(&tcp_req_cont_keywords, kw);
+}
+
+struct action_kw *tcp_res_cont_action(const char *kw)
+{
+ return action_lookup(&tcp_res_cont_keywords, kw);
+}
+
+/* This function performs the TCP request analysis on the current request. It
+ * returns 1 if the processing can continue on next analysers, or zero if it
+ * needs more data, encounters an error, or wants to immediately abort the
+ * request. It relies on buffers flags, and updates s->req->analysers. The
+ * function may be called for frontend rules and backend rules. It only relies
+ * on the backend pointer so this works for both cases.
+ */
+int tcp_inspect_request(struct stream *s, struct channel *req, int an_bit)
+{
+ struct list *def_rules, *rules;
+ struct session *sess = s->sess;
+ struct act_rule *rule;
+ int partial;
+ int act_opts = 0;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+
+ def_rules = ((s->be->defpx && (an_bit == AN_REQ_INSPECT_FE || s->be->defpx != sess->fe->defpx)) ? &s->be->defpx->tcp_req.inspect_rules : NULL);
+ rules = &s->be->tcp_req.inspect_rules;
+
+ /* We don't know whether we have enough data, so must proceed
+ * this way :
+ * - iterate through all rules in their declaration order
+ * - if one rule returns MISS, it means the inspect delay is
+ * not over yet, then return immediately, otherwise consider
+ * it as a non-match.
+ * - if one rule returns OK, then return OK
+ * - if one rule returns KO, then return KO
+ */
+
+ if ((req->flags & (CF_EOI|CF_SHUTR|CF_READ_ERROR)) || channel_full(req, global.tune.maxrewrite) ||
+ sc_waiting_room(chn_prod(req)) ||
+ !s->be->tcp_req.inspect_delay || tick_is_expired(s->rules_exp, now_ms)) {
+ partial = SMP_OPT_FINAL;
+ /* Action may yield while the inspect_delay is not expired and there is no read error */
+ if ((req->flags & CF_READ_ERROR) || !s->be->tcp_req.inspect_delay || tick_is_expired(s->rules_exp, now_ms))
+ act_opts |= ACT_OPT_FINAL;
+ }
+ else
+ partial = 0;
+
+ /* If "the current_rule_list" match the executed rule list, we are in
+ * resume condition. If a resume is needed it is always in the action
+ * and never in the ACL or converters. In this case, we initialise the
+ * current rule, and go to the action execution point.
+ */
+ if (s->current_rule) {
+ rule = s->current_rule;
+ s->current_rule = NULL;
+ if ((def_rules && s->current_rule_list == def_rules) || s->current_rule_list == rules)
+ goto resume_execution;
+ }
+ s->current_rule_list = ((!def_rules || s->current_rule_list == def_rules) ? rules : def_rules);
+
+ restart:
+ list_for_each_entry(rule, s->current_rule_list, list) {
+ enum acl_test_res ret = ACL_TEST_PASS;
+
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, s->be, sess, s, SMP_OPT_DIR_REQ | partial);
+ if (ret == ACL_TEST_MISS)
+ goto missing_data;
+
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ act_opts |= ACT_OPT_FIRST;
+resume_execution:
+ /* Always call the action function if defined */
+ if (rule->action_ptr) {
+ switch (rule->action_ptr(rule, s->be, s->sess, s, act_opts)) {
+ case ACT_RET_CONT:
+ break;
+ case ACT_RET_STOP:
+ case ACT_RET_DONE:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_YIELD:
+ s->current_rule = rule;
+ if (act_opts & ACT_OPT_FINAL) {
+ send_log(s->be, LOG_WARNING,
+ "Internal error: yield not allowed if the inspect-delay expired "
+ "for the tcp-request content actions.");
+ goto internal;
+ }
+ goto missing_data;
+ case ACT_RET_DENY:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto deny;
+ case ACT_RET_ABRT:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto abort;
+ case ACT_RET_ERR:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto internal;
+ case ACT_RET_INV:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto invalid;
+ }
+ continue; /* eval the next rule */
+ }
+
+ /* If not action function defined, check for known actions */
+ if (rule->action == ACT_ACTION_ALLOW) {
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ }
+ else if (rule->action == ACT_ACTION_DENY) {
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto deny;
+ }
+ }
+ }
+
+ if (def_rules && s->current_rule_list == def_rules) {
+ s->current_rule_list = rules;
+ goto restart;
+ }
+
+ end:
+ /* if we get there, it means we have no rule which matches, or
+ * we have an explicit accept, so we apply the default accept.
+ */
+ req->analysers &= ~an_bit;
+ s->current_rule = s->current_rule_list = NULL;
+ req->analyse_exp = s->rules_exp = TICK_ETERNITY;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+ return 1;
+
+ missing_data:
+ channel_dont_connect(req);
+ /* just set the request timeout once at the beginning of the request */
+ if (!tick_isset(s->rules_exp) && s->be->tcp_req.inspect_delay)
+ s->rules_exp = tick_add(now_ms, s->be->tcp_req.inspect_delay);
+ req->analyse_exp = tick_first((tick_is_expired(req->analyse_exp, now_ms) ? 0 : req->analyse_exp), s->rules_exp);
+ DBG_TRACE_DEVEL("waiting for more data", STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+ return 0;
+
+ deny:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_req);
+ goto reject;
+
+ internal:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_INTERNAL;
+ goto reject;
+
+ invalid:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+
+ reject:
+ sc_must_kill_conn(chn_prod(req));
+ channel_abort(req);
+ channel_abort(&s->res);
+
+ abort:
+ req->analysers &= AN_REQ_FLT_END;
+ s->current_rule = s->current_rule_list = NULL;
+ req->analyse_exp = s->rules_exp = TICK_ETERNITY;
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_R;
+ DBG_TRACE_DEVEL("leaving on error|deny|abort", STRM_EV_STRM_ANA|STRM_EV_TCP_ANA|STRM_EV_TCP_ERR, s);
+ return 0;
+}
+
+/* This function performs the TCP response analysis on the current response. It
+ * returns 1 if the processing can continue on next analysers, or zero if it
+ * needs more data, encounters an error, or wants to immediately abort the
+ * response. It relies on buffers flags, and updates s->rep->analysers. The
+ * function may be called for backend rules.
+ */
+int tcp_inspect_response(struct stream *s, struct channel *rep, int an_bit)
+{
+ struct list *def_rules, *rules;
+ struct session *sess = s->sess;
+ struct act_rule *rule;
+ int partial;
+ int act_opts = 0;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+
+ def_rules = (s->be->defpx ? &s->be->defpx->tcp_rep.inspect_rules : NULL);
+ rules = &s->be->tcp_rep.inspect_rules;
+
+ /* We don't know whether we have enough data, so must proceed
+ * this way :
+ * - iterate through all rules in their declaration order
+ * - if one rule returns MISS, it means the inspect delay is
+ * not over yet, then return immediately, otherwise consider
+ * it as a non-match.
+ * - if one rule returns OK, then return OK
+ * - if one rule returns KO, then return KO
+ */
+ if ((rep->flags & (CF_EOI|CF_SHUTR|CF_READ_ERROR)) || channel_full(rep, global.tune.maxrewrite) ||
+ sc_waiting_room(chn_prod(rep)) ||
+ !s->be->tcp_rep.inspect_delay || tick_is_expired(s->rules_exp, now_ms)) {
+ partial = SMP_OPT_FINAL;
+ /* Action may yield while the inspect_delay is not expired and there is no read error */
+ if ((rep->flags & CF_READ_ERROR) || !s->be->tcp_rep.inspect_delay || tick_is_expired(s->rules_exp, now_ms))
+ act_opts |= ACT_OPT_FINAL;
+ }
+ else
+ partial = 0;
+
+ /* If "the current_rule_list" match the executed rule list, we are in
+ * resume condition. If a resume is needed it is always in the action
+ * and never in the ACL or converters. In this case, we initialise the
+ * current rule, and go to the action execution point.
+ */
+ if (s->current_rule) {
+ rule = s->current_rule;
+ s->current_rule = NULL;
+ if ((def_rules && s->current_rule_list == def_rules) || s->current_rule_list == rules)
+ goto resume_execution;
+ }
+ s->current_rule_list = ((!def_rules || s->current_rule_list == def_rules) ? rules : def_rules);
+
+ restart:
+ list_for_each_entry(rule, s->current_rule_list, list) {
+ enum acl_test_res ret = ACL_TEST_PASS;
+
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, s->be, sess, s, SMP_OPT_DIR_RES | partial);
+ if (ret == ACL_TEST_MISS)
+ goto missing_data;
+
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ act_opts |= ACT_OPT_FIRST;
+resume_execution:
+ /* Always call the action function if defined */
+ if (rule->action_ptr) {
+ switch (rule->action_ptr(rule, s->be, s->sess, s, act_opts)) {
+ case ACT_RET_CONT:
+ break;
+ case ACT_RET_STOP:
+ case ACT_RET_DONE:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_YIELD:
+ s->current_rule = rule;
+ if (act_opts & ACT_OPT_FINAL) {
+ send_log(s->be, LOG_WARNING,
+ "Internal error: yield not allowed if the inspect-delay expired "
+ "for the tcp-response content actions.");
+ goto internal;
+ }
+ channel_dont_close(rep);
+ goto missing_data;
+ case ACT_RET_DENY:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto deny;
+ case ACT_RET_ABRT:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto abort;
+ case ACT_RET_ERR:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto internal;
+ case ACT_RET_INV:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto invalid;
+ }
+ continue; /* eval the next rule */
+ }
+
+ /* If not action function defined, check for known actions */
+ if (rule->action == ACT_ACTION_ALLOW) {
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ }
+ else if (rule->action == ACT_ACTION_DENY) {
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto deny;
+ }
+ else if (rule->action == ACT_TCP_CLOSE) {
+ chn_prod(rep)->flags |= SC_FL_NOLINGER | SC_FL_NOHALF;
+ sc_must_kill_conn(chn_prod(rep));
+ sc_shutr(chn_prod(rep));
+ sc_shutw(chn_prod(rep));
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ }
+ }
+ }
+
+ if (def_rules && s->current_rule_list == def_rules) {
+ s->current_rule_list = rules;
+ goto restart;
+ }
+
+ end:
+ /* if we get there, it means we have no rule which matches, or
+ * we have an explicit accept, so we apply the default accept.
+ */
+ rep->analysers &= ~an_bit;
+ s->current_rule = s->current_rule_list = NULL;
+ rep->analyse_exp = s->rules_exp = TICK_ETERNITY;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+ return 1;
+
+ missing_data:
+ /* just set the analyser timeout once at the beginning of the response */
+ if (!tick_isset(s->rules_exp) && s->be->tcp_rep.inspect_delay)
+ s->rules_exp = tick_add(now_ms, s->be->tcp_rep.inspect_delay);
+ rep->analyse_exp = tick_first((tick_is_expired(rep->analyse_exp, now_ms) ? 0 : rep->analyse_exp), s->rules_exp);
+ DBG_TRACE_DEVEL("waiting for more data", STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+ return 0;
+
+ deny:
+ _HA_ATOMIC_INC(&s->sess->fe->fe_counters.denied_resp);
+ _HA_ATOMIC_INC(&s->be->be_counters.denied_resp);
+ if (s->sess->listener && s->sess->listener->counters)
+ _HA_ATOMIC_INC(&s->sess->listener->counters->denied_resp);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.denied_resp);
+ goto reject;
+
+ internal:
+ _HA_ATOMIC_INC(&s->sess->fe->fe_counters.internal_errors);
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (s->sess->listener && s->sess->listener->counters)
+ _HA_ATOMIC_INC(&s->sess->listener->counters->internal_errors);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.internal_errors);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_INTERNAL;
+ goto reject;
+
+ invalid:
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+
+ reject:
+ sc_must_kill_conn(chn_prod(rep));
+ channel_abort(rep);
+ channel_abort(&s->req);
+
+ abort:
+ rep->analysers &= AN_RES_FLT_END;
+ s->current_rule = s->current_rule_list = NULL;
+ rep->analyse_exp = s->rules_exp = TICK_ETERNITY;
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_D;
+ DBG_TRACE_DEVEL("leaving on error", STRM_EV_STRM_ANA|STRM_EV_TCP_ANA|STRM_EV_TCP_ERR, s);
+ return 0;
+}
+
+
+/* This function performs the TCP layer4 analysis on the current request. It
+ * returns 0 if a reject rule matches, otherwise 1 if either an accept rule
+ * matches or if no more rule matches. It can only use rules which don't need
+ * any data. This only works on connection-based client-facing stream connectors.
+ */
+int tcp_exec_l4_rules(struct session *sess)
+{
+ struct proxy *px = sess->fe;
+ struct act_rule *rule;
+ struct connection *conn = objt_conn(sess->origin);
+ int result = 1;
+ enum acl_test_res ret;
+
+ if (!conn)
+ return result;
+
+ if (sess->fe->defpx)
+ px = sess->fe->defpx;
+
+ restart:
+ list_for_each_entry(rule, &px->tcp_req.l4_rules, list) {
+ ret = ACL_TEST_PASS;
+
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, sess->fe, sess, NULL, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ /* Always call the action function if defined */
+ if (rule->action_ptr) {
+ switch (rule->action_ptr(rule, sess->fe, sess, NULL, ACT_OPT_FINAL | ACT_OPT_FIRST)) {
+ case ACT_RET_YIELD:
+ /* yield is not allowed at this point. If this return code is
+ * used it is a bug, so I prefer to abort the process.
+ */
+ send_log(sess->fe, LOG_WARNING,
+ "Internal error: yield not allowed with tcp-request connection actions.");
+ /* fall through */
+ case ACT_RET_STOP:
+ case ACT_RET_DONE:
+ goto end;
+ case ACT_RET_CONT:
+ break;
+ case ACT_RET_DENY:
+ case ACT_RET_ABRT:
+ case ACT_RET_ERR:
+ case ACT_RET_INV:
+ result = 0;
+ goto end;
+ }
+ continue; /* eval the next rule */
+ }
+
+ /* If not action function defined, check for known actions */
+ if (rule->action == ACT_ACTION_ALLOW) {
+ goto end;
+ }
+ else if (rule->action == ACT_ACTION_DENY) {
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_conn);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_conn);
+
+ result = 0;
+ goto end;
+ }
+ else if (rule->action == ACT_TCP_EXPECT_PX) {
+ if (!(conn->flags & CO_FL_HANDSHAKE)) {
+ if (xprt_add_hs(conn) < 0) {
+ result = 0;
+ goto end;
+ }
+ }
+ conn->flags |= CO_FL_ACCEPT_PROXY;
+ }
+ else if (rule->action == ACT_TCP_EXPECT_CIP) {
+ if (!(conn->flags & CO_FL_HANDSHAKE)) {
+ if (xprt_add_hs(conn) < 0) {
+ result = 0;
+ goto end;
+ }
+ }
+ conn->flags |= CO_FL_ACCEPT_CIP;
+ }
+ }
+ }
+
+ if (px != sess->fe) {
+ px = sess->fe;
+ goto restart;
+ }
+ end:
+ return result;
+}
+
+/* This function performs the TCP layer5 analysis on the current request. It
+ * returns 0 if a reject rule matches, otherwise 1 if either an accept rule
+ * matches or if no more rule matches. It can only use rules which don't need
+ * any data. This only works on session-based client-facing stream connectors.
+ * An example of valid use case is to track a stick-counter on the source
+ * address extracted from the proxy protocol.
+ */
+int tcp_exec_l5_rules(struct session *sess)
+{
+ struct proxy *px = sess->fe;
+ struct act_rule *rule;
+ int result = 1;
+ enum acl_test_res ret;
+
+ if (sess->fe->defpx)
+ px = sess->fe->defpx;
+
+ restart:
+ list_for_each_entry(rule, &px->tcp_req.l5_rules, list) {
+ ret = ACL_TEST_PASS;
+
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, sess->fe, sess, NULL, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ /* Always call the action function if defined */
+ if (rule->action_ptr) {
+ switch (rule->action_ptr(rule, sess->fe, sess, NULL, ACT_OPT_FINAL | ACT_OPT_FIRST)) {
+ case ACT_RET_YIELD:
+ /* yield is not allowed at this point. If this return code is
+ * used it is a bug, so I prefer to abort the process.
+ */
+ send_log(sess->fe, LOG_WARNING,
+ "Internal error: yield not allowed with tcp-request session actions.");
+ /* fall through */
+ case ACT_RET_STOP:
+ case ACT_RET_DONE:
+ goto end;
+ case ACT_RET_CONT:
+ break;
+ case ACT_RET_DENY:
+ case ACT_RET_ABRT:
+ case ACT_RET_ERR:
+ case ACT_RET_INV:
+ result = 0;
+ goto end;
+ }
+ continue; /* eval the next rule */
+ }
+
+ /* If not action function defined, check for known actions */
+ if (rule->action == ACT_ACTION_ALLOW) {
+ goto end;
+ }
+ else if (rule->action == ACT_ACTION_DENY) {
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_sess);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_sess);
+
+ result = 0;
+ goto end;
+ }
+ }
+ }
+
+ if (px != sess->fe) {
+ px = sess->fe;
+ goto restart;
+ }
+ end:
+ return result;
+}
+
+/* Parse a tcp-response rule. Return a negative value in case of failure */
+static int tcp_parse_response_rule(char **args, int arg, int section_type,
+ struct proxy *curpx, const struct proxy *defpx,
+ struct act_rule *rule, char **err,
+ unsigned int where,
+ const char *file, int line)
+{
+ if ((curpx == defpx && strlen(defpx->id) == 0) || !(curpx->cap & PR_CAP_BE)) {
+ memprintf(err, "%s %s is only allowed in 'backend' sections or 'defaults' section with a name",
+ args[0], args[1]);
+ return -1;
+ }
+
+ if (strcmp(args[arg], "accept") == 0) {
+ arg++;
+ rule->action = ACT_ACTION_ALLOW;
+ rule->flags |= ACT_FLAG_FINAL;
+ }
+ else if (strcmp(args[arg], "reject") == 0) {
+ arg++;
+ rule->action = ACT_ACTION_DENY;
+ rule->flags |= ACT_FLAG_FINAL;
+ }
+ else if (strcmp(args[arg], "close") == 0) {
+ arg++;
+ rule->action = ACT_TCP_CLOSE;
+ rule->flags |= ACT_FLAG_FINAL;
+ }
+ else {
+ struct action_kw *kw;
+ kw = tcp_res_cont_action(args[arg]);
+ if (kw) {
+ arg++;
+ rule->kw = kw;
+ if (kw->parse((const char **)args, &arg, curpx, rule, err) == ACT_RET_PRS_ERR)
+ return -1;
+ } else {
+ const char *extra[] = { "accept", "reject", "close", NULL };
+ const char *best = action_suggest(args[arg], &tcp_res_cont_keywords, extra);
+
+ action_build_list(&tcp_res_cont_keywords, &trash);
+ memprintf(err,
+ "'%s %s' expects 'accept', 'close', 'reject', %s in %s '%s' (got '%s').%s%s%s",
+ args[0], args[1], trash.area,
+ proxy_type_str(curpx), curpx->id, args[arg],
+ best ? " Did you mean '" : "",
+ best ? best : "",
+ best ? "' maybe ?" : "");
+ return -1;
+ }
+ }
+
+ if (strcmp(args[arg], "if") == 0 || strcmp(args[arg], "unless") == 0) {
+ if ((rule->cond = build_acl_cond(file, line, &curpx->acl, curpx, (const char **)args+arg, err)) == NULL) {
+ memprintf(err,
+ "'%s %s %s' : error detected in %s '%s' while parsing '%s' condition : %s",
+ args[0], args[1], args[2], proxy_type_str(curpx), curpx->id, args[arg], *err);
+ return -1;
+ }
+ }
+ else if (*args[arg]) {
+ memprintf(err,
+ "'%s %s %s' only accepts 'if' or 'unless', in %s '%s' (got '%s')",
+ args[0], args[1], args[2], proxy_type_str(curpx), curpx->id, args[arg]);
+ return -1;
+ }
+ return 0;
+}
+
+
+/* This function executes a track-sc* actions. On success, it returns
+ * ACT_RET_CONT. If it must yield, it return ACT_RET_YIELD. Otherwsize
+ * ACT_RET_ERR is returned.
+ */
+static enum act_return tcp_action_track_sc(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct stksess *ts;
+ struct stktable *t;
+ struct stktable_key *key;
+ struct sample smp;
+ int opt;
+
+ opt = SMP_OPT_DIR_REQ;
+ if (flags & ACT_FLAG_FINAL)
+ opt |= SMP_OPT_FINAL;
+
+ t = rule->arg.trk_ctr.table.t;
+ if (rule->from == ACT_F_TCP_REQ_CNT) { /* L7 rules: use the stream */
+ if (stkctr_entry(&s->stkctr[rule->action]))
+ goto end;
+
+ key = stktable_fetch_key(t, s->be, sess, s, opt, rule->arg.trk_ctr.expr, &smp);
+
+ if ((smp.flags & SMP_F_MAY_CHANGE) && !(flags & ACT_FLAG_FINAL))
+ return ACT_RET_YIELD; /* key might appear later */
+
+ if (key && (ts = stktable_get_entry(t, key))) {
+ stream_track_stkctr(&s->stkctr[rule->action], t, ts);
+ stkctr_set_flags(&s->stkctr[rule->action], STKCTR_TRACK_CONTENT);
+ if (sess->fe != s->be)
+ stkctr_set_flags(&s->stkctr[rule->action], STKCTR_TRACK_BACKEND);
+ }
+ }
+ else { /* L4/L5 rules: use the session */
+ if (stkctr_entry(&sess->stkctr[rule->action]))
+ goto end;
+
+ key = stktable_fetch_key(t, sess->fe, sess, NULL, opt, rule->arg.trk_ctr.expr, NULL);
+ if (key && (ts = stktable_get_entry(t, key)))
+ stream_track_stkctr(&sess->stkctr[rule->action], t, ts);
+ }
+
+ end:
+ return ACT_RET_CONT;
+}
+
+/* This function executes a capture actions. It executes a fetch expression,
+ * turns the result into a string and puts it in a capture slot. On success, it
+ * returns ACT_RET_CONT. If it must yield, it return ACT_RET_YIELD. Otherwsize
+ * ACT_RET_ERR is returned.
+ */
+static enum act_return tcp_action_capture(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct sample *key;
+ struct cap_hdr *h = rule->arg.cap.hdr;
+ char **cap = s->req_cap;
+ int len, opt;
+
+ opt = ((rule->from == ACT_F_TCP_REQ_CNT) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES);
+ if (flags & ACT_FLAG_FINAL)
+ opt |= SMP_OPT_FINAL;
+
+ key = sample_fetch_as_type(s->be, sess, s, opt, rule->arg.cap.expr, SMP_T_STR);
+ if (!key)
+ goto end;
+
+ if ((key->flags & SMP_F_MAY_CHANGE) && !(flags & ACT_FLAG_FINAL))
+ return ACT_RET_YIELD; /* key might appear later */
+
+ if (cap[h->index] == NULL) {
+ cap[h->index] = pool_alloc(h->pool);
+ if (cap[h->index] == NULL) /* no more capture memory, ignore error */
+ goto end;
+ }
+
+ len = key->data.u.str.data;
+ if (len > h->len)
+ len = h->len;
+
+ memcpy(cap[h->index], key->data.u.str.area, len);
+ cap[h->index][len] = 0;
+
+ end:
+ return ACT_RET_CONT;
+}
+
+static void release_tcp_capture(struct act_rule * rule)
+{
+ release_sample_expr(rule->arg.cap.expr);
+}
+
+
+static void release_tcp_track_sc(struct act_rule * rule)
+{
+ release_sample_expr(rule->arg.trk_ctr.expr);
+}
+
+/* Parse a tcp-request rule. Return a negative value in case of failure */
+static int tcp_parse_request_rule(char **args, int arg, int section_type,
+ struct proxy *curpx, const struct proxy *defpx,
+ struct act_rule *rule, char **err,
+ unsigned int where, const char *file, int line)
+{
+ if (curpx == defpx && strlen(defpx->id) == 0) {
+ memprintf(err, "%s %s is not allowed in anonymous 'defaults' sections",
+ args[0], args[1]);
+ return -1;
+ }
+
+ if (strcmp(args[arg], "accept") == 0) {
+ arg++;
+ rule->action = ACT_ACTION_ALLOW;
+ rule->flags |= ACT_FLAG_FINAL;
+ }
+ else if (strcmp(args[arg], "reject") == 0) {
+ arg++;
+ rule->action = ACT_ACTION_DENY;
+ rule->flags |= ACT_FLAG_FINAL;
+ }
+ else if (strcmp(args[arg], "capture") == 0) {
+ struct sample_expr *expr;
+ struct cap_hdr *hdr;
+ int kw = arg;
+ int len = 0;
+
+ if (!(curpx->cap & PR_CAP_FE)) {
+ memprintf(err,
+ "'%s %s %s' : proxy '%s' has no frontend capability",
+ args[0], args[1], args[kw], curpx->id);
+ return -1;
+ }
+
+ if (!(where & SMP_VAL_FE_REQ_CNT)) {
+ memprintf(err,
+ "'%s %s' is not allowed in '%s %s' rules in %s '%s'",
+ args[arg], args[arg+1], args[0], args[1], proxy_type_str(curpx), curpx->id);
+ return -1;
+ }
+
+ arg++;
+
+ curpx->conf.args.ctx = ARGC_CAP;
+ expr = sample_parse_expr(args, &arg, file, line, err, &curpx->conf.args, NULL);
+ if (!expr) {
+ memprintf(err,
+ "'%s %s %s' : %s",
+ args[0], args[1], args[kw], *err);
+ return -1;
+ }
+
+ if (!(expr->fetch->val & where)) {
+ memprintf(err,
+ "'%s %s %s' : fetch method '%s' extracts information from '%s', none of which is available here",
+ args[0], args[1], args[kw], args[arg-1], sample_src_names(expr->fetch->use));
+ release_sample_expr(expr);
+ return -1;
+ }
+
+ if (strcmp(args[arg], "len") == 0) {
+ arg++;
+ if (!args[arg]) {
+ memprintf(err,
+ "'%s %s %s' : missing length value",
+ args[0], args[1], args[kw]);
+ release_sample_expr(expr);
+ return -1;
+ }
+ /* we copy the table name for now, it will be resolved later */
+ len = atoi(args[arg]);
+ if (len <= 0) {
+ memprintf(err,
+ "'%s %s %s' : length must be > 0",
+ args[0], args[1], args[kw]);
+ release_sample_expr(expr);
+ return -1;
+ }
+ arg++;
+ }
+
+ if (!len) {
+ memprintf(err,
+ "'%s %s %s' : a positive 'len' argument is mandatory",
+ args[0], args[1], args[kw]);
+ free(expr);
+ return -1;
+ }
+
+ hdr = calloc(1, sizeof(*hdr));
+ if (!hdr) {
+ memprintf(err, "parsing [%s:%d] : out of memory", file, line);
+ release_sample_expr(expr);
+ return -1;
+ }
+ hdr->next = curpx->req_cap;
+ hdr->name = NULL; /* not a header capture */
+ hdr->namelen = 0;
+ hdr->len = len;
+ hdr->pool = create_pool("caphdr", hdr->len + 1, MEM_F_SHARED);
+ hdr->index = curpx->nb_req_cap++;
+
+ curpx->req_cap = hdr;
+ curpx->to_log |= LW_REQHDR;
+
+ /* check if we need to allocate an http_txn struct for HTTP parsing */
+ curpx->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY);
+
+ rule->arg.cap.expr = expr;
+ rule->arg.cap.hdr = hdr;
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = tcp_action_capture;
+ rule->check_ptr = check_capture;
+ rule->release_ptr = release_tcp_capture;
+ }
+ else if (strncmp(args[arg], "track-sc", 8) == 0) {
+ struct sample_expr *expr;
+ int kw = arg;
+ unsigned int tsc_num;
+ const char *tsc_num_str;
+
+ arg++;
+
+ tsc_num_str = &args[kw][8];
+ if (cfg_parse_track_sc_num(&tsc_num, tsc_num_str, tsc_num_str + strlen(tsc_num_str), err) == -1) {
+ memprintf(err, "'%s %s %s' : %s", args[0], args[1], args[kw], *err);
+ return -1;
+ }
+
+ curpx->conf.args.ctx = ARGC_TRK;
+ expr = sample_parse_expr(args, &arg, file, line, err, &curpx->conf.args, NULL);
+ if (!expr) {
+ memprintf(err,
+ "'%s %s %s' : %s",
+ args[0], args[1], args[kw], *err);
+ return -1;
+ }
+
+ if (!(expr->fetch->val & where)) {
+ memprintf(err,
+ "'%s %s %s' : fetch method '%s' extracts information from '%s', none of which is available here",
+ args[0], args[1], args[kw], args[arg-1], sample_src_names(expr->fetch->use));
+ release_sample_expr(expr);
+ return -1;
+ }
+
+ /* check if we need to allocate an http_txn struct for HTTP parsing */
+ curpx->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY);
+
+ if (strcmp(args[arg], "table") == 0) {
+ arg++;
+ if (!args[arg]) {
+ memprintf(err,
+ "'%s %s %s' : missing table name",
+ args[0], args[1], args[kw]);
+ release_sample_expr(expr);
+ return -1;
+ }
+ /* we copy the table name for now, it will be resolved later */
+ rule->arg.trk_ctr.table.n = strdup(args[arg]);
+ arg++;
+ }
+ rule->action = tsc_num;
+ rule->arg.trk_ctr.expr = expr;
+ rule->action_ptr = tcp_action_track_sc;
+ rule->check_ptr = check_trk_action;
+ rule->release_ptr = release_tcp_track_sc;
+ }
+ else if (strcmp(args[arg], "expect-proxy") == 0) {
+ if (strcmp(args[arg+1], "layer4") != 0) {
+ memprintf(err,
+ "'%s %s %s' only supports 'layer4' in %s '%s' (got '%s')",
+ args[0], args[1], args[arg], proxy_type_str(curpx), curpx->id, args[arg+1]);
+ return -1;
+ }
+
+ if (!(where & SMP_VAL_FE_CON_ACC)) {
+ memprintf(err,
+ "'%s %s' is not allowed in '%s %s' rules in %s '%s'",
+ args[arg], args[arg+1], args[0], args[1], proxy_type_str(curpx), curpx->id);
+ return -1;
+ }
+
+ arg += 2;
+ rule->action = ACT_TCP_EXPECT_PX;
+ }
+ else if (strcmp(args[arg], "expect-netscaler-cip") == 0) {
+ if (strcmp(args[arg+1], "layer4") != 0) {
+ memprintf(err,
+ "'%s %s %s' only supports 'layer4' in %s '%s' (got '%s')",
+ args[0], args[1], args[arg], proxy_type_str(curpx), curpx->id, args[arg+1]);
+ return -1;
+ }
+
+ if (!(where & SMP_VAL_FE_CON_ACC)) {
+ memprintf(err,
+ "'%s %s' is not allowed in '%s %s' rules in %s '%s'",
+ args[arg], args[arg+1], args[0], args[1], proxy_type_str(curpx), curpx->id);
+ return -1;
+ }
+
+ arg += 2;
+ rule->action = ACT_TCP_EXPECT_CIP;
+ }
+ else {
+ struct action_kw *kw;
+ if (where & SMP_VAL_FE_CON_ACC) {
+ /* L4 */
+ kw = tcp_req_conn_action(args[arg]);
+ rule->kw = kw;
+ } else if (where & SMP_VAL_FE_SES_ACC) {
+ /* L5 */
+ kw = tcp_req_sess_action(args[arg]);
+ rule->kw = kw;
+ } else {
+ /* L6 */
+ kw = tcp_req_cont_action(args[arg]);
+ rule->kw = kw;
+ }
+ if (kw) {
+ arg++;
+ if (kw->parse((const char **)args, &arg, curpx, rule, err) == ACT_RET_PRS_ERR)
+ return -1;
+ } else {
+ const char *extra[] = { "accept", "reject", "capture", "track-sc", "expect-proxy", "expect-netscaler-cip", NULL };
+ const char *best = NULL;
+
+
+ if (where & SMP_VAL_FE_CON_ACC) {
+ action_build_list(&tcp_req_conn_keywords, &trash);
+ best = action_suggest(args[arg], &tcp_req_conn_keywords, extra);
+ }
+ else if (where & SMP_VAL_FE_SES_ACC) {
+ action_build_list(&tcp_req_sess_keywords, &trash);
+ best = action_suggest(args[arg], &tcp_req_sess_keywords, extra);
+ }
+ else {
+ action_build_list(&tcp_req_cont_keywords, &trash);
+ best = action_suggest(args[arg], &tcp_req_cont_keywords, extra);
+ }
+
+ memprintf(err,
+ "'%s %s' expects 'accept', 'reject', 'capture', 'expect-proxy', 'expect-netscaler-cip', 'track-sc0' ... 'track-sc%d', %s "
+ "in %s '%s' (got '%s').%s%s%s\n",
+ args[0], args[1], MAX_SESS_STKCTR-1,
+ trash.area, proxy_type_str(curpx),
+ curpx->id, args[arg],
+ best ? " Did you mean '" : "",
+ best ? best : "",
+ best ? "' maybe ?" : "");
+ return -1;
+ }
+ }
+
+ if (strcmp(args[arg], "if") == 0 || strcmp(args[arg], "unless") == 0) {
+ if ((rule->cond = build_acl_cond(file, line, &curpx->acl, curpx, (const char **)args+arg, err)) == NULL) {
+ memprintf(err,
+ "'%s %s %s' : error detected in %s '%s' while parsing '%s' condition : %s",
+ args[0], args[1], args[2], proxy_type_str(curpx), curpx->id, args[arg], *err);
+ return -1;
+ }
+ }
+ else if (*args[arg]) {
+ memprintf(err,
+ "'%s %s %s' only accepts 'if' or 'unless', in %s '%s' (got '%s')",
+ args[0], args[1], args[2], proxy_type_str(curpx), curpx->id, args[arg]);
+ return -1;
+ }
+ return 0;
+}
+
+/* This function should be called to parse a line starting with the "tcp-response"
+ * keyword.
+ */
+static int tcp_parse_tcp_rep(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ const char *ptr = NULL;
+ unsigned int val;
+ int warn = 0;
+ int arg;
+ struct act_rule *rule;
+ unsigned int where;
+ const struct acl *acl;
+ const char *kw;
+
+ if (!*args[1]) {
+ memprintf(err, "missing argument for '%s' in %s '%s'",
+ args[0], proxy_type_str(curpx), curpx->id);
+ return -1;
+ }
+
+ if (strcmp(args[1], "inspect-delay") == 0) {
+ if ((curpx == defpx && strlen(defpx->id) == 0) || !(curpx->cap & PR_CAP_BE)) {
+ memprintf(err, "%s %s is only allowed in 'backend' sections or 'defaults' section with a name",
+ args[0], args[1]);
+ return -1;
+ }
+
+ if (!*args[2] || (ptr = parse_time_err(args[2], &val, TIME_UNIT_MS))) {
+ memprintf(err,
+ "'%s %s' expects a positive delay in milliseconds, in %s '%s'",
+ args[0], args[1], proxy_type_str(curpx), curpx->id);
+
+ if (ptr == PARSE_TIME_OVER)
+ memprintf(err, "%s (timer overflow in '%s', maximum value is 2147483647 ms or ~24.8 days)", *err, args[2]);
+ else if (ptr == PARSE_TIME_UNDER)
+ memprintf(err, "%s (timer underflow in '%s', minimum non-null value is 1 ms)", *err, args[2]);
+ else if (ptr)
+ memprintf(err, "%s (unexpected character '%c')", *err, *ptr);
+ return -1;
+ }
+
+ if (curpx->tcp_rep.inspect_delay) {
+ memprintf(err, "ignoring %s %s (was already defined) in %s '%s'",
+ args[0], args[1], proxy_type_str(curpx), curpx->id);
+ return 1;
+ }
+ curpx->tcp_rep.inspect_delay = val;
+ return 0;
+ }
+
+ rule = new_act_rule(ACT_F_TCP_RES_CNT, file, line);
+ if (!rule) {
+ memprintf(err, "parsing [%s:%d] : out of memory", file, line);
+ return -1;
+ }
+ LIST_INIT(&rule->list);
+ arg = 1;
+ where = 0;
+
+ if (strcmp(args[1], "content") == 0) {
+ arg++;
+
+ if (curpx->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_RES_CNT;
+ if (curpx->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_RES_CNT;
+ if (tcp_parse_response_rule(args, arg, section_type, curpx, defpx, rule, err, where, file, line) < 0)
+ goto error;
+
+ acl = rule->cond ? acl_cond_conflicts(rule->cond, where) : NULL;
+ if (acl) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' will never match in '%s %s' because it only involves keywords that are incompatible with '%s'",
+ acl->name, args[0], args[1], sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl will never match in '%s %s' because it uses keyword '%s' which is incompatible with '%s'",
+ args[0], args[1],
+ LIST_ELEM(acl->expr.n, struct acl_expr *, list)->kw,
+ sample_ckp_names(where));
+
+ warn++;
+ }
+ else if (rule->cond && acl_cond_kw_conflicts(rule->cond, where, &acl, &kw)) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' involves keyword '%s' which is incompatible with '%s'",
+ acl->name, kw, sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl involves keyword '%s' which is incompatible with '%s'",
+ kw, sample_ckp_names(where));
+ warn++;
+ }
+
+ LIST_APPEND(&curpx->tcp_rep.inspect_rules, &rule->list);
+ }
+ else {
+ memprintf(err,
+ "'%s' expects 'inspect-delay' or 'content' in %s '%s' (got '%s')",
+ args[0], proxy_type_str(curpx), curpx->id, args[1]);
+ goto error;
+ }
+
+ return warn;
+ error:
+ free_act_rule(rule);
+ return -1;
+}
+
+
+/* This function should be called to parse a line starting with the "tcp-request"
+ * keyword.
+ */
+static int tcp_parse_tcp_req(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ const char *ptr = NULL;
+ unsigned int val;
+ int warn = 0;
+ int arg;
+ struct act_rule *rule;
+ unsigned int where;
+ const struct acl *acl;
+ const char *kw;
+
+ if (!*args[1]) {
+ if (curpx == defpx)
+ memprintf(err, "missing argument for '%s' in defaults section", args[0]);
+ else
+ memprintf(err, "missing argument for '%s' in %s '%s'",
+ args[0], proxy_type_str(curpx), curpx->id);
+ return -1;
+ }
+
+ if (strcmp(args[1], "inspect-delay") == 0) {
+ if (curpx == defpx && strlen(defpx->id) == 0) {
+ memprintf(err, "%s %s is not allowed in anonymous 'defaults' sections",
+ args[0], args[1]);
+ return -1;
+ }
+
+ if (!*args[2] || (ptr = parse_time_err(args[2], &val, TIME_UNIT_MS))) {
+ memprintf(err,
+ "'%s %s' expects a positive delay in milliseconds, in %s '%s'",
+ args[0], args[1], proxy_type_str(curpx), curpx->id);
+
+ if (ptr == PARSE_TIME_OVER)
+ memprintf(err, "%s (timer overflow in '%s', maximum value is 2147483647 ms or ~24.8 days)", *err, args[2]);
+ else if (ptr == PARSE_TIME_UNDER)
+ memprintf(err, "%s (timer underflow in '%s', minimum non-null value is 1 ms)", *err, args[2]);
+ else if (ptr)
+ memprintf(err, "%s (unexpected character '%c')", *err, *ptr);
+ return -1;
+ }
+
+ if (curpx->tcp_req.inspect_delay) {
+ memprintf(err, "ignoring %s %s (was already defined) in %s '%s'",
+ args[0], args[1], proxy_type_str(curpx), curpx->id);
+ return 1;
+ }
+ curpx->tcp_req.inspect_delay = val;
+ return 0;
+ }
+
+ rule = new_act_rule(0, file, line);
+ if (!rule) {
+ memprintf(err, "parsing [%s:%d] : out of memory", file, line);
+ return -1;
+ }
+ LIST_INIT(&rule->list);
+ arg = 1;
+ where = 0;
+
+ if (strcmp(args[1], "content") == 0) {
+ arg++;
+
+ if (curpx->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_REQ_CNT;
+ if (curpx->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_REQ_CNT;
+ rule->from = ACT_F_TCP_REQ_CNT;
+ if (tcp_parse_request_rule(args, arg, section_type, curpx, defpx, rule, err, where, file, line) < 0)
+ goto error;
+
+ acl = rule->cond ? acl_cond_conflicts(rule->cond, where) : NULL;
+ if (acl) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' will never match in '%s %s' because it only involves keywords that are incompatible with '%s'",
+ acl->name, args[0], args[1], sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl will never match in '%s %s' because it uses keyword '%s' which is incompatible with '%s'",
+ args[0], args[1],
+ LIST_ELEM(acl->expr.n, struct acl_expr *, list)->kw,
+ sample_ckp_names(where));
+
+ warn++;
+ }
+ else if (rule->cond && acl_cond_kw_conflicts(rule->cond, where, &acl, &kw)) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' involves keyword '%s' which is incompatible with '%s'",
+ acl->name, kw, sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl involves keyword '%s' which is incompatible with '%s'",
+ kw, sample_ckp_names(where));
+ warn++;
+ }
+
+ /* the following function directly emits the warning */
+ warnif_misplaced_tcp_cont(curpx, file, line, args[0]);
+ LIST_APPEND(&curpx->tcp_req.inspect_rules, &rule->list);
+ }
+ else if (strcmp(args[1], "connection") == 0) {
+ arg++;
+
+ if (!(curpx->cap & PR_CAP_FE)) {
+ memprintf(err, "%s %s is not allowed because %s %s is not a frontend",
+ args[0], args[1], proxy_type_str(curpx), curpx->id);
+ goto error;
+ }
+
+ where |= SMP_VAL_FE_CON_ACC;
+ rule->from = ACT_F_TCP_REQ_CON;
+ if (tcp_parse_request_rule(args, arg, section_type, curpx, defpx, rule, err, where, file, line) < 0)
+ goto error;
+
+ acl = rule->cond ? acl_cond_conflicts(rule->cond, where) : NULL;
+ if (acl) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' will never match in '%s %s' because it only involves keywords that are incompatible with '%s'",
+ acl->name, args[0], args[1], sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl will never match in '%s %s' because it uses keyword '%s' which is incompatible with '%s'",
+ args[0], args[1],
+ LIST_ELEM(acl->expr.n, struct acl_expr *, list)->kw,
+ sample_ckp_names(where));
+
+ warn++;
+ }
+ else if (rule->cond && acl_cond_kw_conflicts(rule->cond, where, &acl, &kw)) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' involves keyword '%s' which is incompatible with '%s'",
+ acl->name, kw, sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl involves keyword '%s' which is incompatible with '%s'",
+ kw, sample_ckp_names(where));
+ warn++;
+ }
+
+ /* the following function directly emits the warning */
+ warnif_misplaced_tcp_conn(curpx, file, line, args[0]);
+ LIST_APPEND(&curpx->tcp_req.l4_rules, &rule->list);
+ }
+ else if (strcmp(args[1], "session") == 0) {
+ arg++;
+
+ if (!(curpx->cap & PR_CAP_FE)) {
+ memprintf(err, "%s %s is not allowed because %s %s is not a frontend",
+ args[0], args[1], proxy_type_str(curpx), curpx->id);
+ goto error;
+ }
+
+ where |= SMP_VAL_FE_SES_ACC;
+ rule->from = ACT_F_TCP_REQ_SES;
+ if (tcp_parse_request_rule(args, arg, section_type, curpx, defpx, rule, err, where, file, line) < 0)
+ goto error;
+
+ acl = rule->cond ? acl_cond_conflicts(rule->cond, where) : NULL;
+ if (acl) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' will never match in '%s %s' because it only involves keywords that are incompatible with '%s'",
+ acl->name, args[0], args[1], sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl will never match in '%s %s' because it uses keyword '%s' which is incompatible with '%s'",
+ args[0], args[1],
+ LIST_ELEM(acl->expr.n, struct acl_expr *, list)->kw,
+ sample_ckp_names(where));
+ warn++;
+ }
+ else if (rule->cond && acl_cond_kw_conflicts(rule->cond, where, &acl, &kw)) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' involves keyword '%s' which is incompatible with '%s'",
+ acl->name, kw, sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl involves keyword '%s' which is incompatible with '%s'",
+ kw, sample_ckp_names(where));
+ warn++;
+ }
+
+ /* the following function directly emits the warning */
+ warnif_misplaced_tcp_sess(curpx, file, line, args[0]);
+ LIST_APPEND(&curpx->tcp_req.l5_rules, &rule->list);
+ }
+ else {
+ if (curpx == defpx)
+ memprintf(err,
+ "'%s' expects 'inspect-delay', 'connection', or 'content' in defaults section (got '%s')",
+ args[0], args[1]);
+ else
+ memprintf(err,
+ "'%s' expects 'inspect-delay', 'connection', or 'content' in %s '%s' (got '%s')",
+ args[0], proxy_type_str(curpx), curpx->id, args[1]);
+ goto error;
+ }
+
+ return warn;
+ error:
+ free_act_rule(rule);
+ return -1;
+}
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_LISTEN, "tcp-request", tcp_parse_tcp_req },
+ { CFG_LISTEN, "tcp-response", tcp_parse_tcp_rep },
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/tcp_sample.c b/src/tcp_sample.c
new file mode 100644
index 0000000..12eb25c
--- /dev/null
+++ b/src/tcp_sample.c
@@ -0,0 +1,530 @@
+/*
+ * AF_INET/AF_INET6 SOCK_STREAM protocol layer (tcp)
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* this is to have tcp_info defined on systems using musl
+ * library, such as Alpine Linux.
+ */
+#define _GNU_SOURCE
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/listener-t.h>
+#include <haproxy/namespace.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/session.h>
+#include <haproxy/tools.h>
+
+/* Fetch the connection's source IPv4/IPv6 address. Depending on the keyword, it
+ * may be the frontend or the backend connection.
+ */
+static int
+smp_fetch_src(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ const struct sockaddr_storage *src = NULL;
+
+ if (kw[0] == 'b') { /* bc_src */
+ struct connection *conn = ((obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ ? sc_conn(__objt_check(smp->sess->origin)->sc)
+ : (smp->strm ? sc_conn(smp->strm->scb): NULL));
+ if (conn && conn_get_src(conn))
+ src = conn_src(conn);
+ }
+ else if (kw[0] == 'f') { /* fc_src */
+ struct connection *conn = objt_conn(smp->sess->origin);
+
+ if (conn && conn_get_src(conn))
+ src = conn_src(conn);
+ }
+ else /* src */
+ src = (smp->strm ? sc_src(smp->strm->scf) : sess_src(smp->sess));
+
+ if (!src)
+ return 0;
+
+ switch (src->ss_family) {
+ case AF_INET:
+ smp->data.u.ipv4 = ((struct sockaddr_in *)src)->sin_addr;
+ smp->data.type = SMP_T_IPV4;
+ break;
+ case AF_INET6:
+ smp->data.u.ipv6 = ((struct sockaddr_in6 *)src)->sin6_addr;
+ smp->data.type = SMP_T_IPV6;
+ break;
+ default:
+ return 0;
+ }
+
+ smp->flags = 0;
+ return 1;
+}
+
+/* set temp integer to the connection's source port. Depending on the
+ * keyword, it may be the frontend or the backend connection.
+ */
+static int
+smp_fetch_sport(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ const struct sockaddr_storage *src = NULL;
+
+ if (kw[0] == 'b') { /* bc_src_port */
+ struct connection *conn = ((obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ ? sc_conn(__objt_check(smp->sess->origin)->sc)
+ : (smp->strm ? sc_conn(smp->strm->scb): NULL));
+ if (conn && conn_get_src(conn))
+ src = conn_src(conn);
+ }
+ else if (kw[0] == 'f') { /* fc_src_port */
+ struct connection *conn = objt_conn(smp->sess->origin);
+
+ if (conn && conn_get_src(conn))
+ src = conn_src(conn);
+ }
+ else /* src_port */
+ src = (smp->strm ? sc_src(smp->strm->scf) : sess_src(smp->sess));
+
+ if (!src)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ if (!(smp->data.u.sint = get_host_port(src)))
+ return 0;
+
+ smp->flags = 0;
+ return 1;
+}
+
+/* fetch the connection's destination IPv4/IPv6 address. Depending on the
+ * keyword, it may be the frontend or the backend connection.
+ */
+static int
+smp_fetch_dst(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ const struct sockaddr_storage *dst = NULL;
+
+ if (kw[0] == 'b') { /* bc_dst */
+ struct connection *conn = ((obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ ? sc_conn(__objt_check(smp->sess->origin)->sc)
+ : (smp->strm ? sc_conn(smp->strm->scb): NULL));
+ if (conn && conn_get_dst(conn))
+ dst = conn_dst(conn);
+ }
+ else if (kw[0] == 'f') { /* fc_dst */
+ struct connection *conn = objt_conn(smp->sess->origin);
+
+ if (conn && conn_get_dst(conn))
+ dst = conn_dst(conn);
+ }
+ else /* dst */
+ dst = (smp->strm ? sc_dst(smp->strm->scf) : sess_dst(smp->sess));
+
+ if (!dst)
+ return 0;
+
+ switch (dst->ss_family) {
+ case AF_INET:
+ smp->data.u.ipv4 = ((struct sockaddr_in *)dst)->sin_addr;
+ smp->data.type = SMP_T_IPV4;
+ break;
+ case AF_INET6:
+ smp->data.u.ipv6 = ((struct sockaddr_in6 *)dst)->sin6_addr;
+ smp->data.type = SMP_T_IPV6;
+ break;
+ default:
+ return 0;
+ }
+
+ smp->flags = 0;
+ return 1;
+}
+
+/* check if the destination address of the front connection is local to the
+ * system or if it was intercepted.
+ */
+int smp_fetch_dst_is_local(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct listener *li = smp->sess->listener;
+ const struct sockaddr_storage *dst = NULL;
+
+ if (kw[0] == 'f') { /* fc_dst_is_local */
+ struct connection *conn = objt_conn(smp->sess->origin);
+
+ if (conn && conn_get_dst(conn))
+ dst = conn_dst(conn);
+ }
+ else /* dst_is_local */
+ dst = (smp->strm ? sc_dst(smp->strm->scf) : sess_dst(smp->sess));
+
+ if (!dst)
+ return 0;
+
+ smp->data.type = SMP_T_BOOL;
+ smp->flags = 0;
+ smp->data.u.sint = addr_is_local(li->rx.settings->netns, dst);
+ return smp->data.u.sint >= 0;
+}
+
+/* check if the source address of the front connection is local to the system
+ * or not.
+ */
+int smp_fetch_src_is_local(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct listener *li = smp->sess->listener;
+ const struct sockaddr_storage *src = NULL;
+
+ if (kw[0] == 'f') { /* fc_src_is_local */
+ struct connection *conn = objt_conn(smp->sess->origin);
+
+ if (conn && conn_get_src(conn))
+ src = conn_src(conn);
+ }
+ else /* src_is_local */
+ src = (smp->strm ? sc_src(smp->strm->scf) : sess_src(smp->sess));
+
+ if (!src)
+ return 0;
+
+ smp->data.type = SMP_T_BOOL;
+ smp->flags = 0;
+ smp->data.u.sint = addr_is_local(li->rx.settings->netns, src);
+ return smp->data.u.sint >= 0;
+}
+
+/* set temp integer to the connexion's destination port. Depending on the
+ * keyword, it may be the frontend or the backend connection.
+ */
+static int
+smp_fetch_dport(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ const struct sockaddr_storage *dst = NULL;
+
+ if (kw[0] == 'b') { /* bc_dst_port */
+ struct connection *conn = ((obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ ? sc_conn(__objt_check(smp->sess->origin)->sc)
+ : (smp->strm ? sc_conn(smp->strm->scb): NULL));
+ if (conn && conn_get_dst(conn))
+ dst = conn_dst(conn);
+ }
+ else if (kw[0] == 'f') { /* fc_dst_port */
+ struct connection *conn = objt_conn(smp->sess->origin);
+
+ if (conn && conn_get_dst(conn))
+ dst = conn_dst(conn);
+ }
+ else /* dst_port */
+ dst = (smp->strm ? sc_dst(smp->strm->scf) : sess_dst(smp->sess));
+
+ if (!dst)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ if (!(smp->data.u.sint = get_host_port(dst)))
+ return 0;
+
+ smp->flags = 0;
+ return 1;
+}
+
+#ifdef TCP_INFO
+
+
+/* Validates the arguments passed to "fc_*" fetch keywords returning a time
+ * value. These keywords support an optional string representing the unit of the
+ * result: "us" for microseconds and "ms" for milliseconds". Returns 0 on error
+ * and non-zero if OK.
+ */
+static int val_fc_time_value(struct arg *args, char **err)
+{
+ if (args[0].type == ARGT_STR) {
+ if (strcmp(args[0].data.str.area, "us") == 0) {
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = TIME_UNIT_US;
+ }
+ else if (strcmp(args[0].data.str.area, "ms") == 0) {
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = TIME_UNIT_MS;
+ }
+ else {
+ memprintf(err, "expects 'us' or 'ms', got '%s'",
+ args[0].data.str.area);
+ return 0;
+ }
+ }
+ else {
+ memprintf(err, "Unexpected arg type");
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Validates the arguments passed to "fc_*" fetch keywords returning a
+ * counter. These keywords should be used without any keyword, but because of a
+ * bug in previous versions, an optional string argument may be passed. In such
+ * case, the argument is ignored and a warning is emitted. Returns 0 on error
+ * and non-zero if OK.
+ */
+static int var_fc_counter(struct arg *args, char **err)
+{
+ if (args[0].type != ARGT_STOP) {
+ ha_warning("no argument supported for 'fc_*' sample expressions returning counters.\n");
+ if (args[0].type == ARGT_STR)
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_STOP;
+ }
+
+ return 1;
+}
+
+/* Returns some tcp_info data if it's available. "dir" must be set to 0 if
+ * the client connection is required, otherwise it is set to 1. "val" represents
+ * the required value.
+ * If the function fails it returns 0, otherwise it returns 1 and "result" is filled.
+ */
+static inline int get_tcp_info(const struct arg *args, struct sample *smp,
+ int dir, int val)
+{
+ struct connection *conn;
+ struct tcp_info info;
+ socklen_t optlen;
+
+ /* strm can be null. */
+ if (!smp->strm)
+ return 0;
+
+ /* get the object associated with the stream connector.The
+ * object can be other thing than a connection. For example,
+ * it be a appctx.
+ */
+ conn = (dir == 0 ? sc_conn(smp->strm->scf) : sc_conn(smp->strm->scb));
+ if (!conn)
+ return 0;
+
+ /* The fd may not be available for the tcp_info struct, and the
+ syscal can fail. */
+ optlen = sizeof(info);
+ if ((conn->flags & CO_FL_FDLESS) ||
+ getsockopt(conn->handle.fd, IPPROTO_TCP, TCP_INFO, &info, &optlen) == -1)
+ return 0;
+
+ /* extract the value. */
+ smp->data.type = SMP_T_SINT;
+ switch (val) {
+#if defined(__APPLE__)
+ case 0: smp->data.u.sint = info.tcpi_rttcur; break;
+ case 1: smp->data.u.sint = info.tcpi_rttvar; break;
+ case 2: smp->data.u.sint = info.tcpi_tfo_syn_data_acked; break;
+ case 4: smp->data.u.sint = info.tcpi_tfo_syn_loss; break;
+ case 5: smp->data.u.sint = info.tcpi_rto; break;
+#else
+ /* all other platforms supporting TCP_INFO have these ones */
+ case 0: smp->data.u.sint = info.tcpi_rtt; break;
+ case 1: smp->data.u.sint = info.tcpi_rttvar; break;
+# if defined(__linux__)
+ /* these ones are common to all Linux versions */
+ case 2: smp->data.u.sint = info.tcpi_unacked; break;
+ case 3: smp->data.u.sint = info.tcpi_sacked; break;
+ case 4: smp->data.u.sint = info.tcpi_lost; break;
+ case 5: smp->data.u.sint = info.tcpi_retrans; break;
+ case 6: smp->data.u.sint = info.tcpi_fackets; break;
+ case 7: smp->data.u.sint = info.tcpi_reordering; break;
+# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+ /* the ones are found on FreeBSD, NetBSD and OpenBSD featuring TCP_INFO */
+ case 2: smp->data.u.sint = info.__tcpi_unacked; break;
+ case 3: smp->data.u.sint = info.__tcpi_sacked; break;
+ case 4: smp->data.u.sint = info.__tcpi_lost; break;
+ case 5: smp->data.u.sint = info.__tcpi_retrans; break;
+ case 6: smp->data.u.sint = info.__tcpi_fackets; break;
+ case 7: smp->data.u.sint = info.__tcpi_reordering; break;
+# endif
+#endif // apple
+ default: return 0;
+ }
+
+ return 1;
+}
+
+/* get the mean rtt of a client connection */
+static int
+smp_fetch_fc_rtt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 0))
+ return 0;
+
+ /* By default or if explicitly specified, convert rtt to ms */
+ if (!args || args[0].type == ARGT_STOP || args[0].data.sint == TIME_UNIT_MS)
+ smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
+
+ return 1;
+}
+
+/* get the variance of the mean rtt of a client connection */
+static int
+smp_fetch_fc_rttvar(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 1))
+ return 0;
+
+ /* By default or if explicitly specified, convert rttvar to ms */
+ if (!args || args[0].type == ARGT_STOP || args[0].data.sint == TIME_UNIT_MS)
+ smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
+
+ return 1;
+}
+
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__APPLE__)
+/* get the unacked counter on a client connection */
+static int
+smp_fetch_fc_unacked(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 2))
+ return 0;
+ return 1;
+}
+#endif
+
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+/* get the sacked counter on a client connection */
+static int
+smp_fetch_fc_sacked(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 3))
+ return 0;
+ return 1;
+}
+#endif
+
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__APPLE__)
+/* get the lost counter on a client connection */
+static int
+smp_fetch_fc_lost(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 4))
+ return 0;
+ return 1;
+}
+#endif
+
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__APPLE__)
+/* get the retrans counter on a client connection */
+static int
+smp_fetch_fc_retrans(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 5))
+ return 0;
+ return 1;
+}
+#endif
+
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+/* get the fackets counter on a client connection */
+static int
+smp_fetch_fc_fackets(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 6))
+ return 0;
+ return 1;
+}
+#endif
+
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+/* get the reordering counter on a client connection */
+static int
+smp_fetch_fc_reordering(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 7))
+ return 0;
+ return 1;
+}
+#endif
+#endif // TCP_INFO
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Note: fetches that may return multiple types must be declared as the lowest
+ * common denominator, the type that can be casted into all other ones. For
+ * instance v4/v6 must be declared v4.
+ */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "bc_dst", smp_fetch_dst, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
+ { "bc_dst_port", smp_fetch_dport, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
+ { "bc_src", smp_fetch_src, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
+ { "bc_src_port", smp_fetch_sport, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
+
+ { "dst", smp_fetch_dst, 0, NULL, SMP_T_IPV4, SMP_USE_L4CLI },
+ { "dst_is_local", smp_fetch_dst_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
+ { "dst_port", smp_fetch_dport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
+
+ { "fc_dst", smp_fetch_dst, 0, NULL, SMP_T_IPV4, SMP_USE_L4CLI },
+ { "fc_dst_is_local", smp_fetch_dst_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
+ { "fc_dst_port", smp_fetch_dport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
+
+ { "fc_src", smp_fetch_src, 0, NULL, SMP_T_IPV4, SMP_USE_L4CLI },
+ { "fc_src_is_local", smp_fetch_src_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
+ { "fc_src_port", smp_fetch_sport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
+
+ { "src", smp_fetch_src, 0, NULL, SMP_T_IPV4, SMP_USE_L4CLI },
+ { "src_is_local", smp_fetch_src_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
+ { "src_port", smp_fetch_sport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
+#ifdef TCP_INFO
+ { "fc_rtt", smp_fetch_fc_rtt, ARG1(0,STR), val_fc_time_value, SMP_T_SINT, SMP_USE_L4CLI },
+ { "fc_rttvar", smp_fetch_fc_rttvar, ARG1(0,STR), val_fc_time_value, SMP_T_SINT, SMP_USE_L4CLI },
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__APPLE__)
+ { "fc_unacked", smp_fetch_fc_unacked, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
+#endif
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+ { "fc_sacked", smp_fetch_fc_sacked, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
+#endif
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__APPLE__)
+ { "fc_retrans", smp_fetch_fc_retrans, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
+#endif
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+ { "fc_fackets", smp_fetch_fc_fackets, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
+#endif
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__APPLE__)
+ { "fc_lost", smp_fetch_fc_lost, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
+#endif
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+ { "fc_reordering", smp_fetch_fc_reordering, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
+#endif
+#endif // TCP_INFO
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/tcpcheck.c b/src/tcpcheck.c
new file mode 100644
index 0000000..6441d34
--- /dev/null
+++ b/src/tcpcheck.c
@@ -0,0 +1,5209 @@
+/*
+ * Health-checks functions.
+ *
+ * Copyright 2000-2009,2020 Willy Tarreau <w@1wt.eu>
+ * Copyright 2007-2010 Krzysztof Piotr Oledzki <ole@ans.pl>
+ * Copyright 2013 Baptiste Assmann <bedis9@gmail.com>
+ * Copyright 2020 Gaetan Rivet <grive@u256.net>
+ * Copyright 2020 Christopher Faulet <cfaulet@haproxy.com>
+ * Crown Copyright 2022 Defence Science and Technology Laboratory <dstlipgroup@dstl.gov.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/chunk.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/h1.h>
+#include <haproxy/http.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/server.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stconn.h>
+#include <haproxy/task.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/ticks.h>
+#include <haproxy/tools.h>
+#include <haproxy/trace.h>
+#include <haproxy/vars.h>
+
+
+#define TRACE_SOURCE &trace_check
+
+/* Global tree to share all tcp-checks */
+struct eb_root shared_tcpchecks = EB_ROOT;
+
+
+DECLARE_POOL(pool_head_tcpcheck_rule, "tcpcheck_rule", sizeof(struct tcpcheck_rule));
+
+/**************************************************************************/
+/*************** Init/deinit tcp-check rules and ruleset ******************/
+/**************************************************************************/
+/* Releases memory allocated for a log-format string */
+static void free_tcpcheck_fmt(struct list *fmt)
+{
+ struct logformat_node *lf, *lfb;
+
+ list_for_each_entry_safe(lf, lfb, fmt, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+}
+
+/* Releases memory allocated for an HTTP header used in a tcp-check send rule */
+void free_tcpcheck_http_hdr(struct tcpcheck_http_hdr *hdr)
+{
+ if (!hdr)
+ return;
+
+ free_tcpcheck_fmt(&hdr->value);
+ istfree(&hdr->name);
+ free(hdr);
+}
+
+/* Releases memory allocated for an HTTP header list used in a tcp-check send
+ * rule
+ */
+static void free_tcpcheck_http_hdrs(struct list *hdrs)
+{
+ struct tcpcheck_http_hdr *hdr, *bhdr;
+
+ list_for_each_entry_safe(hdr, bhdr, hdrs, list) {
+ LIST_DELETE(&hdr->list);
+ free_tcpcheck_http_hdr(hdr);
+ }
+}
+
+/* Releases memory allocated for a tcp-check. If in_pool is set, it means the
+ * tcp-check was allocated using a memory pool (it is used to instantiate email
+ * alerts).
+ */
+void free_tcpcheck(struct tcpcheck_rule *rule, int in_pool)
+{
+ if (!rule)
+ return;
+
+ free(rule->comment);
+ switch (rule->action) {
+ case TCPCHK_ACT_SEND:
+ switch (rule->send.type) {
+ case TCPCHK_SEND_STRING:
+ case TCPCHK_SEND_BINARY:
+ istfree(&rule->send.data);
+ break;
+ case TCPCHK_SEND_STRING_LF:
+ case TCPCHK_SEND_BINARY_LF:
+ free_tcpcheck_fmt(&rule->send.fmt);
+ break;
+ case TCPCHK_SEND_HTTP:
+ free(rule->send.http.meth.str.area);
+ if (!(rule->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT))
+ istfree(&rule->send.http.uri);
+ else
+ free_tcpcheck_fmt(&rule->send.http.uri_fmt);
+ istfree(&rule->send.http.vsn);
+ free_tcpcheck_http_hdrs(&rule->send.http.hdrs);
+ if (!(rule->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT))
+ istfree(&rule->send.http.body);
+ else
+ free_tcpcheck_fmt(&rule->send.http.body_fmt);
+ break;
+ case TCPCHK_SEND_UNDEF:
+ break;
+ }
+ break;
+ case TCPCHK_ACT_EXPECT:
+ free_tcpcheck_fmt(&rule->expect.onerror_fmt);
+ free_tcpcheck_fmt(&rule->expect.onsuccess_fmt);
+ release_sample_expr(rule->expect.status_expr);
+ switch (rule->expect.type) {
+ case TCPCHK_EXPECT_HTTP_STATUS:
+ free(rule->expect.codes.codes);
+ break;
+ case TCPCHK_EXPECT_STRING:
+ case TCPCHK_EXPECT_BINARY:
+ case TCPCHK_EXPECT_HTTP_BODY:
+ istfree(&rule->expect.data);
+ break;
+ case TCPCHK_EXPECT_STRING_REGEX:
+ case TCPCHK_EXPECT_BINARY_REGEX:
+ case TCPCHK_EXPECT_HTTP_STATUS_REGEX:
+ case TCPCHK_EXPECT_HTTP_BODY_REGEX:
+ regex_free(rule->expect.regex);
+ break;
+ case TCPCHK_EXPECT_STRING_LF:
+ case TCPCHK_EXPECT_BINARY_LF:
+ case TCPCHK_EXPECT_HTTP_BODY_LF:
+ free_tcpcheck_fmt(&rule->expect.fmt);
+ break;
+ case TCPCHK_EXPECT_HTTP_HEADER:
+ if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HNAME_REG)
+ regex_free(rule->expect.hdr.name_re);
+ else if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HNAME_FMT)
+ free_tcpcheck_fmt(&rule->expect.hdr.name_fmt);
+ else
+ istfree(&rule->expect.hdr.name);
+
+ if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_REG)
+ regex_free(rule->expect.hdr.value_re);
+ else if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_FMT)
+ free_tcpcheck_fmt(&rule->expect.hdr.value_fmt);
+ else if (!(rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_NONE))
+ istfree(&rule->expect.hdr.value);
+ break;
+ case TCPCHK_EXPECT_CUSTOM:
+ case TCPCHK_EXPECT_UNDEF:
+ break;
+ }
+ break;
+ case TCPCHK_ACT_CONNECT:
+ free(rule->connect.sni);
+ free(rule->connect.alpn);
+ release_sample_expr(rule->connect.port_expr);
+ break;
+ case TCPCHK_ACT_COMMENT:
+ break;
+ case TCPCHK_ACT_ACTION_KW:
+ free(rule->action_kw.rule);
+ break;
+ }
+
+ if (in_pool)
+ pool_free(pool_head_tcpcheck_rule, rule);
+ else
+ free(rule);
+}
+
+/* Creates a tcp-check variable used in preset variables before executing a
+ * tcp-check ruleset.
+ */
+struct tcpcheck_var *create_tcpcheck_var(const struct ist name)
+{
+ struct tcpcheck_var *var = NULL;
+
+ var = calloc(1, sizeof(*var));
+ if (var == NULL)
+ return NULL;
+
+ var->name = istdup(name);
+ if (!isttest(var->name)) {
+ free(var);
+ return NULL;
+ }
+
+ LIST_INIT(&var->list);
+ return var;
+}
+
+/* Releases memory allocated for a preset tcp-check variable */
+void free_tcpcheck_var(struct tcpcheck_var *var)
+{
+ if (!var)
+ return;
+
+ istfree(&var->name);
+ if (var->data.type == SMP_T_STR || var->data.type == SMP_T_BIN)
+ free(var->data.u.str.area);
+ else if (var->data.type == SMP_T_METH && var->data.u.meth.meth == HTTP_METH_OTHER)
+ free(var->data.u.meth.str.area);
+ free(var);
+}
+
+/* Releases a list of preset tcp-check variables */
+void free_tcpcheck_vars(struct list *vars)
+{
+ struct tcpcheck_var *var, *back;
+
+ list_for_each_entry_safe(var, back, vars, list) {
+ LIST_DELETE(&var->list);
+ free_tcpcheck_var(var);
+ }
+}
+
+/* Duplicate a list of preset tcp-check variables */
+int dup_tcpcheck_vars(struct list *dst, const struct list *src)
+{
+ const struct tcpcheck_var *var;
+ struct tcpcheck_var *new = NULL;
+
+ list_for_each_entry(var, src, list) {
+ new = create_tcpcheck_var(var->name);
+ if (!new)
+ goto error;
+ new->data.type = var->data.type;
+ if (var->data.type == SMP_T_STR || var->data.type == SMP_T_BIN) {
+ if (chunk_dup(&new->data.u.str, &var->data.u.str) == NULL)
+ goto error;
+ if (var->data.type == SMP_T_STR)
+ new->data.u.str.area[new->data.u.str.data] = 0;
+ }
+ else if (var->data.type == SMP_T_METH && var->data.u.meth.meth == HTTP_METH_OTHER) {
+ if (chunk_dup(&new->data.u.str, &var->data.u.str) == NULL)
+ goto error;
+ new->data.u.str.area[new->data.u.str.data] = 0;
+ new->data.u.meth.meth = var->data.u.meth.meth;
+ }
+ else
+ new->data.u = var->data.u;
+ LIST_APPEND(dst, &new->list);
+ }
+ return 1;
+
+ error:
+ free(new);
+ return 0;
+}
+
+/* Looks for a shared tcp-check ruleset given its name. */
+struct tcpcheck_ruleset *find_tcpcheck_ruleset(const char *name)
+{
+ struct tcpcheck_ruleset *rs;
+ struct ebpt_node *node;
+
+ node = ebis_lookup_len(&shared_tcpchecks, name, strlen(name));
+ if (node) {
+ rs = container_of(node, typeof(*rs), node);
+ return rs;
+ }
+ return NULL;
+}
+
+/* Creates a new shared tcp-check ruleset and insert it in shared_tcpchecks
+ * tree.
+ */
+struct tcpcheck_ruleset *create_tcpcheck_ruleset(const char *name)
+{
+ struct tcpcheck_ruleset *rs;
+
+ rs = calloc(1, sizeof(*rs));
+ if (rs == NULL)
+ return NULL;
+
+ rs->node.key = strdup(name);
+ if (rs->node.key == NULL) {
+ free(rs);
+ return NULL;
+ }
+
+ LIST_INIT(&rs->rules);
+ ebis_insert(&shared_tcpchecks, &rs->node);
+ return rs;
+}
+
+/* Releases memory allocated by a tcp-check ruleset. */
+void free_tcpcheck_ruleset(struct tcpcheck_ruleset *rs)
+{
+ struct tcpcheck_rule *r, *rb;
+
+ if (!rs)
+ return;
+
+ ebpt_delete(&rs->node);
+ free(rs->node.key);
+ list_for_each_entry_safe(r, rb, &rs->rules, list) {
+ LIST_DELETE(&r->list);
+ free_tcpcheck(r, 0);
+ }
+ free(rs);
+}
+
+
+/**************************************************************************/
+/**************** Everything about tcp-checks execution *******************/
+/**************************************************************************/
+/* Returns the id of a step in a tcp-check ruleset */
+int tcpcheck_get_step_id(const struct check *check, const struct tcpcheck_rule *rule)
+{
+ if (!rule)
+ rule = check->current_step;
+
+ /* no last started step => first step */
+ if (!rule)
+ return 1;
+
+ /* last step is the first implicit connect */
+ if (rule->index == 0 &&
+ rule->action == TCPCHK_ACT_CONNECT &&
+ (rule->connect.options & TCPCHK_OPT_IMPLICIT))
+ return 0;
+
+ return rule->index + 1;
+}
+
+/* Returns the first non COMMENT/ACTION_KW tcp-check rule from list <list> or
+ * NULL if none was found.
+ */
+struct tcpcheck_rule *get_first_tcpcheck_rule(const struct tcpcheck_rules *rules)
+{
+ struct tcpcheck_rule *r;
+
+ list_for_each_entry(r, rules->list, list) {
+ if (r->action != TCPCHK_ACT_COMMENT && r->action != TCPCHK_ACT_ACTION_KW)
+ return r;
+ }
+ return NULL;
+}
+
+/* Returns the last non COMMENT/ACTION_KW tcp-check rule from list <list> or
+ * NULL if none was found.
+ */
+static struct tcpcheck_rule *get_last_tcpcheck_rule(struct tcpcheck_rules *rules)
+{
+ struct tcpcheck_rule *r;
+
+ list_for_each_entry_rev(r, rules->list, list) {
+ if (r->action != TCPCHK_ACT_COMMENT && r->action != TCPCHK_ACT_ACTION_KW)
+ return r;
+ }
+ return NULL;
+}
+
+/* Returns the non COMMENT/ACTION_KW tcp-check rule from list <list> following
+ * <start> or NULL if non was found. If <start> is NULL, it relies on
+ * get_first_tcpcheck_rule().
+ */
+static struct tcpcheck_rule *get_next_tcpcheck_rule(struct tcpcheck_rules *rules, struct tcpcheck_rule *start)
+{
+ struct tcpcheck_rule *r;
+
+ if (!start)
+ return get_first_tcpcheck_rule(rules);
+
+ r = LIST_NEXT(&start->list, typeof(r), list);
+ list_for_each_entry_from(r, rules->list, list) {
+ if (r->action != TCPCHK_ACT_COMMENT && r->action != TCPCHK_ACT_ACTION_KW)
+ return r;
+ }
+ return NULL;
+}
+
+
+/* Creates info message when a tcp-check healthcheck fails on an expect rule */
+static void tcpcheck_expect_onerror_message(struct buffer *msg, struct check *check, struct tcpcheck_rule *rule,
+ int match, struct ist info)
+{
+ struct sample *smp;
+
+ /* Follows these step to produce the info message:
+ * 1. if info field is already provided, copy it
+ * 2. if the expect rule provides an onerror log-format string,
+ * use it to produce the message
+ * 3. the expect rule is part of a protocol check (http, redis, mysql...), do nothing
+ * 4. Otherwise produce the generic tcp-check info message
+ */
+ if (istlen(info)) {
+ chunk_istcat(msg, info);
+ goto comment;
+ }
+ else if (!LIST_ISEMPTY(&rule->expect.onerror_fmt)) {
+ msg->data += sess_build_logline(check->sess, NULL, b_tail(msg), b_room(msg), &rule->expect.onerror_fmt);
+ goto comment;
+ }
+
+ if (check->type == PR_O2_TCPCHK_CHK &&
+ (check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) != TCPCHK_RULES_TCP_CHK)
+ goto comment;
+
+ chunk_strcat(msg, (match ? "TCPCHK matched unwanted content" : "TCPCHK did not match content"));
+ switch (rule->expect.type) {
+ case TCPCHK_EXPECT_HTTP_STATUS:
+ chunk_appendf(msg, "(status codes) at step %d", tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_STRING:
+ case TCPCHK_EXPECT_HTTP_BODY:
+ chunk_appendf(msg, " '%.*s' at step %d", (unsigned int)istlen(rule->expect.data), istptr(rule->expect.data),
+ tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_BINARY:
+ chunk_appendf(msg, " (binary) at step %d", tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_STRING_REGEX:
+ case TCPCHK_EXPECT_HTTP_STATUS_REGEX:
+ case TCPCHK_EXPECT_HTTP_BODY_REGEX:
+ chunk_appendf(msg, " (regex) at step %d", tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_BINARY_REGEX:
+ chunk_appendf(msg, " (binary regex) at step %d", tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_STRING_LF:
+ case TCPCHK_EXPECT_HTTP_BODY_LF:
+ chunk_appendf(msg, " (log-format string) at step %d", tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_BINARY_LF:
+ chunk_appendf(msg, " (log-format binary) at step %d", tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_CUSTOM:
+ chunk_appendf(msg, " (custom function) at step %d", tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_HTTP_HEADER:
+ chunk_appendf(msg, " (header pattern) at step %d", tcpcheck_get_step_id(check, rule));
+ case TCPCHK_EXPECT_UNDEF:
+ /* Should never happen. */
+ return;
+ }
+
+ comment:
+ /* If the failing expect rule provides a comment, it is concatenated to
+ * the info message.
+ */
+ if (rule->comment) {
+ chunk_strcat(msg, " comment: ");
+ chunk_strcat(msg, rule->comment);
+ }
+
+ /* Finally, the check status code is set if the failing expect rule
+ * defines a status expression.
+ */
+ if (rule->expect.status_expr) {
+ smp = sample_fetch_as_type(check->proxy, check->sess, NULL, SMP_OPT_DIR_RES | SMP_OPT_FINAL,
+ rule->expect.status_expr, SMP_T_STR);
+
+ if (smp && sample_casts[smp->data.type][SMP_T_SINT] &&
+ sample_casts[smp->data.type][SMP_T_SINT](smp))
+ check->code = smp->data.u.sint;
+ }
+
+ *(b_tail(msg)) = '\0';
+}
+
+/* Creates info message when a tcp-check healthcheck succeeds on an expect rule */
+static void tcpcheck_expect_onsuccess_message(struct buffer *msg, struct check *check, struct tcpcheck_rule *rule,
+ struct ist info)
+{
+ struct sample *smp;
+
+ /* Follows these step to produce the info message:
+ * 1. if info field is already provided, copy it
+ * 2. if the expect rule provides an onsucces log-format string,
+ * use it to produce the message
+ * 3. the expect rule is part of a protocol check (http, redis, mysql...), do nothing
+ * 4. Otherwise produce the generic tcp-check info message
+ */
+ if (istlen(info))
+ chunk_istcat(msg, info);
+ if (!LIST_ISEMPTY(&rule->expect.onsuccess_fmt))
+ msg->data += sess_build_logline(check->sess, NULL, b_tail(msg), b_room(msg),
+ &rule->expect.onsuccess_fmt);
+ else if (check->type == PR_O2_TCPCHK_CHK &&
+ (check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_TCP_CHK)
+ chunk_strcat(msg, "(tcp-check)");
+
+ /* Finally, the check status code is set if the expect rule defines a
+ * status expression.
+ */
+ if (rule->expect.status_expr) {
+ smp = sample_fetch_as_type(check->proxy, check->sess, NULL, SMP_OPT_DIR_RES | SMP_OPT_FINAL,
+ rule->expect.status_expr, SMP_T_STR);
+
+ if (smp && sample_casts[smp->data.type][SMP_T_SINT] &&
+ sample_casts[smp->data.type][SMP_T_SINT](smp))
+ check->code = smp->data.u.sint;
+ }
+
+ *(b_tail(msg)) = '\0';
+}
+
+/* Internal functions to parse and validate a MySQL packet in the context of an
+ * expect rule. It start to parse the input buffer at the offset <offset>. If
+ * <last_read> is set, no more data are expected.
+ */
+static enum tcpcheck_eval_ret tcpcheck_mysql_expect_packet(struct check *check, struct tcpcheck_rule *rule,
+ unsigned int offset, int last_read)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ enum healthcheck_status status;
+ struct buffer *msg = NULL;
+ struct ist desc = IST_NULL;
+ unsigned int err = 0, plen = 0;
+
+
+ TRACE_ENTER(CHK_EV_TCPCHK_EXP, check);
+
+ /* 3 Bytes for the packet length and 1 byte for the sequence id */
+ if (b_data(&check->bi) < offset+4) {
+ if (!last_read)
+ goto wait_more_data;
+
+ /* invalid length or truncated response */
+ status = HCHK_STATUS_L7RSP;
+ goto error;
+ }
+
+ plen = ((unsigned char) *b_peek(&check->bi, offset)) +
+ (((unsigned char) *(b_peek(&check->bi, offset+1))) << 8) +
+ (((unsigned char) *(b_peek(&check->bi, offset+2))) << 16);
+
+ if (b_data(&check->bi) < offset+plen+4) {
+ if (!last_read)
+ goto wait_more_data;
+
+ /* invalid length or truncated response */
+ status = HCHK_STATUS_L7RSP;
+ goto error;
+ }
+
+ if (*b_peek(&check->bi, offset+4) == '\xff') {
+ /* MySQL Error packet always begin with field_count = 0xff */
+ status = HCHK_STATUS_L7STS;
+ err = ((unsigned char) *b_peek(&check->bi, offset+5)) +
+ (((unsigned char) *(b_peek(&check->bi, offset+6))) << 8);
+ desc = ist2(b_peek(&check->bi, offset+7), b_data(&check->bi) - offset - 7);
+ goto error;
+ }
+
+ if (get_next_tcpcheck_rule(check->tcpcheck_rules, rule) != NULL) {
+ /* Not the last rule, continue */
+ goto out;
+ }
+
+ /* We set the MySQL Version in description for information purpose
+ * FIXME : it can be cool to use MySQL Version for other purpose,
+ * like mark as down old MySQL server.
+ */
+ status = ((rule->expect.ok_status != HCHK_STATUS_UNKNOWN) ? rule->expect.ok_status : HCHK_STATUS_L7OKD);
+ set_server_check_status(check, status, b_peek(&check->bi, 5));
+
+ out:
+ free_trash_chunk(msg);
+ TRACE_LEAVE(CHK_EV_TCPCHK_EXP, check, 0, 0, (size_t[]){ret});
+ return ret;
+
+ error:
+ ret = TCPCHK_EVAL_STOP;
+ check->code = err;
+ msg = alloc_trash_chunk();
+ if (msg)
+ tcpcheck_expect_onerror_message(msg, check, rule, 0, desc);
+ set_server_check_status(check, status, (msg ? b_head(msg) : NULL));
+ goto out;
+
+ wait_more_data:
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ ret = TCPCHK_EVAL_WAIT;
+ goto out;
+}
+
+/* Custom tcp-check expect function to parse and validate the MySQL initial
+ * handshake packet. Returns TCPCHK_EVAL_WAIT to wait for more data,
+ * TCPCHK_EVAL_CONTINUE to evaluate the next rule or TCPCHK_EVAL_STOP if an
+ * error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_mysql_expect_iniths(struct check *check, struct tcpcheck_rule *rule, int last_read)
+{
+ return tcpcheck_mysql_expect_packet(check, rule, 0, last_read);
+}
+
+/* Custom tcp-check expect function to parse and validate the MySQL OK packet
+ * following the initial handshake. Returns TCPCHK_EVAL_WAIT to wait for more
+ * data, TCPCHK_EVAL_CONTINUE to evaluate the next rule or TCPCHK_EVAL_STOP if
+ * an error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_mysql_expect_ok(struct check *check, struct tcpcheck_rule *rule, int last_read)
+{
+ unsigned int hslen = 0;
+
+ hslen = 4 + ((unsigned char) *b_head(&check->bi)) +
+ (((unsigned char) *(b_peek(&check->bi, 1))) << 8) +
+ (((unsigned char) *(b_peek(&check->bi, 2))) << 16);
+
+ return tcpcheck_mysql_expect_packet(check, rule, hslen, last_read);
+}
+
+/* Custom tcp-check expect function to parse and validate the LDAP bind response
+ * package packet. Returns TCPCHK_EVAL_WAIT to wait for more data,
+ * TCPCHK_EVAL_CONTINUE to evaluate the next rule or TCPCHK_EVAL_STOP if an
+ * error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_ldap_expect_bindrsp(struct check *check, struct tcpcheck_rule *rule, int last_read)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ enum healthcheck_status status;
+ struct buffer *msg = NULL;
+ struct ist desc = IST_NULL;
+ char *ptr;
+ unsigned short nbytes = 0;
+ size_t msglen = 0;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_EXP, check);
+
+ /* Check if the server speaks LDAP (ASN.1/BER)
+ * http://en.wikipedia.org/wiki/Basic_Encoding_Rules
+ * http://tools.ietf.org/html/rfc4511
+ */
+ ptr = b_head(&check->bi) + 1;
+
+ /* size of LDAPMessage */
+ if (*ptr & 0x80) {
+ /* For message size encoded on several bytes, we only handle
+ * size encoded on 2 or 4 bytes. There is no reason to make this
+ * part to complex because only Active Directory is known to
+ * encode BindReponse length on 4 bytes.
+ */
+ nbytes = (*ptr & 0x7f);
+ if (b_data(&check->bi) < 1 + nbytes)
+ goto too_short;
+ switch (nbytes) {
+ case 4: msglen = read_n32(ptr+1); break;
+ case 2: msglen = read_n16(ptr+1); break;
+ default:
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Not LDAPv3 protocol");
+ goto error;
+ }
+ }
+ else
+ msglen = *ptr;
+ ptr += 1 + nbytes;
+
+ if (b_data(&check->bi) < 2 + nbytes + msglen)
+ goto too_short;
+
+ /* http://tools.ietf.org/html/rfc4511#section-4.2.2
+ * messageID: 0x02 0x01 0x01: INTEGER 1
+ * protocolOp: 0x61: bindResponse
+ */
+ if (memcmp(ptr, "\x02\x01\x01\x61", 4) != 0) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Not LDAPv3 protocol");
+ goto error;
+ }
+ ptr += 4;
+
+ /* skip size of bindResponse */
+ nbytes = 0;
+ if (*ptr & 0x80)
+ nbytes = (*ptr & 0x7f);
+ ptr += 1 + nbytes;
+
+ /* http://tools.ietf.org/html/rfc4511#section-4.1.9
+ * ldapResult: 0x0a 0x01: ENUMERATION
+ */
+ if (memcmp(ptr, "\x0a\x01", 2) != 0) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Not LDAPv3 protocol");
+ goto error;
+ }
+ ptr += 2;
+
+ /* http://tools.ietf.org/html/rfc4511#section-4.1.9
+ * resultCode
+ */
+ check->code = *ptr;
+ if (check->code) {
+ status = HCHK_STATUS_L7STS;
+ desc = ist("See RFC: http://tools.ietf.org/html/rfc4511#section-4.1.9");
+ goto error;
+ }
+
+ status = ((rule->expect.ok_status != HCHK_STATUS_UNKNOWN) ? rule->expect.ok_status : HCHK_STATUS_L7OKD);
+ set_server_check_status(check, status, "Success");
+
+ out:
+ free_trash_chunk(msg);
+ TRACE_LEAVE(CHK_EV_TCPCHK_EXP, check, 0, 0, (size_t[]){ret});
+ return ret;
+
+ error:
+ ret = TCPCHK_EVAL_STOP;
+ msg = alloc_trash_chunk();
+ if (msg)
+ tcpcheck_expect_onerror_message(msg, check, rule, 0, desc);
+ set_server_check_status(check, status, (msg ? b_head(msg) : NULL));
+ goto out;
+
+ too_short:
+ if (!last_read)
+ goto wait_more_data;
+ /* invalid length or truncated response */
+ status = HCHK_STATUS_L7RSP;
+ goto error;
+
+ wait_more_data:
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ ret = TCPCHK_EVAL_WAIT;
+ goto out;
+}
+
+/* Custom tcp-check expect function to parse and validate the SPOP hello agent
+ * frame. Returns TCPCHK_EVAL_WAIT to wait for more data, TCPCHK_EVAL_CONTINUE
+ * to evaluate the next rule or TCPCHK_EVAL_STOP if an error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_spop_expect_agenthello(struct check *check, struct tcpcheck_rule *rule, int last_read)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ enum healthcheck_status status;
+ struct buffer *msg = NULL;
+ struct ist desc = IST_NULL;
+ unsigned int framesz;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_EXP, check);
+
+ memcpy(&framesz, b_head(&check->bi), 4);
+ framesz = ntohl(framesz);
+
+ if (!last_read && b_data(&check->bi) < (4+framesz))
+ goto wait_more_data;
+
+ memset(b_orig(&trash), 0, b_size(&trash));
+ if (spoe_handle_healthcheck_response(b_peek(&check->bi, 4), framesz, b_orig(&trash), HCHK_DESC_LEN) == -1) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist2(b_orig(&trash), strlen(b_orig(&trash)));
+ goto error;
+ }
+
+ status = ((rule->expect.ok_status != HCHK_STATUS_UNKNOWN) ? rule->expect.ok_status : HCHK_STATUS_L7OKD);
+ set_server_check_status(check, status, "SPOA server is ok");
+
+ out:
+ free_trash_chunk(msg);
+ TRACE_LEAVE(CHK_EV_TCPCHK_EXP, check, 0, 0, (size_t[]){ret});
+ return ret;
+
+ error:
+ ret = TCPCHK_EVAL_STOP;
+ msg = alloc_trash_chunk();
+ if (msg)
+ tcpcheck_expect_onerror_message(msg, check, rule, 0, desc);
+ set_server_check_status(check, status, (msg ? b_head(msg) : NULL));
+ goto out;
+
+ wait_more_data:
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ ret = TCPCHK_EVAL_WAIT;
+ goto out;
+}
+
+/* Custom tcp-check expect function to parse and validate the agent-check
+ * reply. Returns TCPCHK_EVAL_WAIT to wait for more data, TCPCHK_EVAL_CONTINUE
+ * to evaluate the next rule or TCPCHK_EVAL_STOP if an error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_agent_expect_reply(struct check *check, struct tcpcheck_rule *rule, int last_read)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_STOP;
+ enum healthcheck_status status = HCHK_STATUS_CHECKED;
+ const char *hs = NULL; /* health status */
+ const char *as = NULL; /* admin status */
+ const char *ps = NULL; /* performance status */
+ const char *sc = NULL; /* maxconn */
+ const char *err = NULL; /* first error to report */
+ const char *wrn = NULL; /* first warning to report */
+ char *cmd, *p;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_EXP, check);
+
+ /* We're getting an agent check response. The agent could
+ * have been disabled in the mean time with a long check
+ * still pending. It is important that we ignore the whole
+ * response.
+ */
+ if (!(check->state & CHK_ST_ENABLED))
+ goto out;
+
+ /* The agent supports strings made of a single line ended by the
+ * first CR ('\r') or LF ('\n'). This line is composed of words
+ * delimited by spaces (' '), tabs ('\t'), or commas (','). The
+ * line may optionally contained a description of a state change
+ * after a sharp ('#'), which is only considered if a health state
+ * is announced.
+ *
+ * Words may be composed of :
+ * - a numeric weight suffixed by the percent character ('%').
+ * - a health status among "up", "down", "stopped", and "fail".
+ * - an admin status among "ready", "drain", "maint".
+ *
+ * These words may appear in any order. If multiple words of the
+ * same category appear, the last one wins.
+ */
+
+ p = b_head(&check->bi);
+ while (*p && *p != '\n' && *p != '\r')
+ p++;
+
+ if (!*p) {
+ if (!last_read)
+ goto wait_more_data;
+
+ /* at least inform the admin that the agent is mis-behaving */
+ set_server_check_status(check, check->status, "Ignoring incomplete line from agent");
+ goto out;
+ }
+
+ *p = 0;
+ cmd = b_head(&check->bi);
+
+ while (*cmd) {
+ /* look for next word */
+ if (*cmd == ' ' || *cmd == '\t' || *cmd == ',') {
+ cmd++;
+ continue;
+ }
+
+ if (*cmd == '#') {
+ /* this is the beginning of a health status description,
+ * skip the sharp and blanks.
+ */
+ cmd++;
+ while (*cmd == '\t' || *cmd == ' ')
+ cmd++;
+ break;
+ }
+
+ /* find the end of the word so that we have a null-terminated
+ * word between <cmd> and <p>.
+ */
+ p = cmd + 1;
+ while (*p && *p != '\t' && *p != ' ' && *p != '\n' && *p != ',')
+ p++;
+ if (*p)
+ *p++ = 0;
+
+ /* first, health statuses */
+ if (strcasecmp(cmd, "up") == 0) {
+ check->health = check->rise + check->fall - 1;
+ status = HCHK_STATUS_L7OKD;
+ hs = cmd;
+ }
+ else if (strcasecmp(cmd, "down") == 0) {
+ check->health = 0;
+ status = HCHK_STATUS_L7STS;
+ hs = cmd;
+ }
+ else if (strcasecmp(cmd, "stopped") == 0) {
+ check->health = 0;
+ status = HCHK_STATUS_L7STS;
+ hs = cmd;
+ }
+ else if (strcasecmp(cmd, "fail") == 0) {
+ check->health = 0;
+ status = HCHK_STATUS_L7STS;
+ hs = cmd;
+ }
+ /* admin statuses */
+ else if (strcasecmp(cmd, "ready") == 0) {
+ as = cmd;
+ }
+ else if (strcasecmp(cmd, "drain") == 0) {
+ as = cmd;
+ }
+ else if (strcasecmp(cmd, "maint") == 0) {
+ as = cmd;
+ }
+ /* try to parse a weight here and keep the last one */
+ else if (isdigit((unsigned char)*cmd) && strchr(cmd, '%') != NULL) {
+ ps = cmd;
+ }
+ /* try to parse a maxconn here */
+ else if (strncasecmp(cmd, "maxconn:", strlen("maxconn:")) == 0) {
+ sc = cmd;
+ }
+ else {
+ /* keep a copy of the first error */
+ if (!err)
+ err = cmd;
+ }
+ /* skip to next word */
+ cmd = p;
+ }
+ /* here, cmd points either to \0 or to the beginning of a
+ * description. Skip possible leading spaces.
+ */
+ while (*cmd == ' ' || *cmd == '\n')
+ cmd++;
+
+ /* First, update the admin status so that we avoid sending other
+ * possibly useless warnings and can also update the health if
+ * present after going back up.
+ */
+ if (as) {
+ if (strcasecmp(as, "drain") == 0) {
+ TRACE_DEVEL("set server into DRAIN mode", CHK_EV_TCPCHK_EXP, check);
+ srv_adm_set_drain(check->server);
+ }
+ else if (strcasecmp(as, "maint") == 0) {
+ TRACE_DEVEL("set server into MAINT mode", CHK_EV_TCPCHK_EXP, check);
+ srv_adm_set_maint(check->server);
+ }
+ else {
+ TRACE_DEVEL("set server into READY mode", CHK_EV_TCPCHK_EXP, check);
+ srv_adm_set_ready(check->server);
+ }
+ }
+
+ /* now change weights */
+ if (ps) {
+ const char *msg;
+
+ TRACE_DEVEL("change server weight", CHK_EV_TCPCHK_EXP, check);
+ msg = server_parse_weight_change_request(check->server, ps);
+ if (!wrn || !*wrn)
+ wrn = msg;
+ }
+
+ if (sc) {
+ const char *msg;
+
+ sc += strlen("maxconn:");
+
+ TRACE_DEVEL("change server maxconn", CHK_EV_TCPCHK_EXP, check);
+ /* This is safe to call server_parse_maxconn_change_request
+ * because the server lock is held during the check.
+ */
+ msg = server_parse_maxconn_change_request(check->server, sc);
+ if (!wrn || !*wrn)
+ wrn = msg;
+ }
+
+ /* and finally health status */
+ if (hs) {
+ /* We'll report some of the warnings and errors we have
+ * here. Down reports are critical, we leave them untouched.
+ * Lack of report, or report of 'UP' leaves the room for
+ * ERR first, then WARN.
+ */
+ const char *msg = cmd;
+ struct buffer *t;
+
+ if (!*msg || status == HCHK_STATUS_L7OKD) {
+ if (err && *err)
+ msg = err;
+ else if (wrn && *wrn)
+ msg = wrn;
+ }
+
+ t = get_trash_chunk();
+ chunk_printf(t, "via agent : %s%s%s%s",
+ hs, *msg ? " (" : "",
+ msg, *msg ? ")" : "");
+ TRACE_DEVEL("update server health status", CHK_EV_TCPCHK_EXP, check);
+ set_server_check_status(check, status, t->area);
+ }
+ else if (err && *err) {
+ /* No status change but we'd like to report something odd.
+ * Just report the current state and copy the message.
+ */
+ TRACE_DEVEL("agent reports an error", CHK_EV_TCPCHK_EXP, check);
+ chunk_printf(&trash, "agent reports an error : %s", err);
+ set_server_check_status(check, status/*check->status*/, trash.area);
+ }
+ else if (wrn && *wrn) {
+ /* No status change but we'd like to report something odd.
+ * Just report the current state and copy the message.
+ */
+ TRACE_DEVEL("agent reports a warning", CHK_EV_TCPCHK_EXP, check);
+ chunk_printf(&trash, "agent warns : %s", wrn);
+ set_server_check_status(check, status/*check->status*/, trash.area);
+ }
+ else {
+ TRACE_DEVEL("update server health status", CHK_EV_TCPCHK_EXP, check);
+ set_server_check_status(check, status, NULL);
+ }
+
+ out:
+ TRACE_LEAVE(CHK_EV_TCPCHK_EXP, check, 0, 0, (size_t[]){ret});
+ return ret;
+
+ wait_more_data:
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ ret = TCPCHK_EVAL_WAIT;
+ goto out;
+}
+
+/* Evaluates a TCPCHK_ACT_CONNECT rule. Returns TCPCHK_EVAL_WAIT to wait the
+ * connection establishment, TCPCHK_EVAL_CONTINUE to evaluate the next rule or
+ * TCPCHK_EVAL_STOP if an error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_eval_connect(struct check *check, struct tcpcheck_rule *rule)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ struct tcpcheck_connect *connect = &rule->connect;
+ struct proxy *proxy = check->proxy;
+ struct server *s = check->server;
+ struct task *t = check->task;
+ struct connection *conn = sc_conn(check->sc);
+ struct protocol *proto;
+ struct xprt_ops *xprt;
+ struct tcpcheck_rule *next;
+ int status, port;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_CONN, check);
+
+ next = get_next_tcpcheck_rule(check->tcpcheck_rules, rule);
+
+ /* current connection already created, check if it is established or not */
+ if (conn) {
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ /* We are still waiting for the connection establishment */
+ if (next && next->action == TCPCHK_ACT_SEND) {
+ if (!(check->sc->wait_event.events & SUB_RETRY_SEND))
+ conn->mux->subscribe(check->sc, SUB_RETRY_SEND, &check->sc->wait_event);
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("not connected yet", CHK_EV_TCPCHK_CONN, check);
+ }
+ else
+ ret = tcpcheck_eval_recv(check, rule);
+ }
+ goto out;
+ }
+
+ /* Note: here check->sc = sc = conn = NULL */
+
+ /* Always release input and output buffer when a new connect is evaluated */
+ check_release_buf(check, &check->bi);
+ check_release_buf(check, &check->bo);
+
+ /* No connection, prepare a new one */
+ conn = conn_new((s ? &s->obj_type : &proxy->obj_type));
+ if (!conn) {
+ chunk_printf(&trash, "TCPCHK error allocating connection at step %d",
+ tcpcheck_get_step_id(check, rule));
+ if (rule->comment)
+ chunk_appendf(&trash, " comment: '%s'", rule->comment);
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, trash.area);
+ ret = TCPCHK_EVAL_STOP;
+ TRACE_ERROR("stconn allocation error", CHK_EV_TCPCHK_CONN|CHK_EV_TCPCHK_ERR, check);
+ goto out;
+ }
+ if (sc_attach_mux(check->sc, NULL, conn) < 0) {
+ TRACE_ERROR("mux attach error", CHK_EV_TCPCHK_CONN|CHK_EV_TCPCHK_ERR, check);
+ conn_free(conn);
+ conn = NULL;
+ status = SF_ERR_RESOURCE;
+ goto fail_check;
+ }
+ conn->ctx = check->sc;
+ conn_set_owner(conn, check->sess, NULL);
+
+ /* no client address */
+ if (!sockaddr_alloc(&conn->dst, NULL, 0)) {
+ TRACE_ERROR("sockaddr allocation error", CHK_EV_TCPCHK_CONN|CHK_EV_TCPCHK_ERR, check);
+ status = SF_ERR_RESOURCE;
+ goto fail_check;
+ }
+
+ /* connect to the connect rule addr if specified, otherwise the check
+ * addr if specified on the server. otherwise, use the server addr (it
+ * MUST exist at this step).
+ */
+ *conn->dst = (is_addr(&connect->addr)
+ ? connect->addr
+ : (is_addr(&check->addr) ? check->addr : s->addr));
+ proto = protocol_lookup(conn->dst->ss_family, PROTO_TYPE_STREAM, 0);
+
+ port = 0;
+ if (connect->port)
+ port = connect->port;
+ if (!port && connect->port_expr) {
+ struct sample *smp;
+
+ smp = sample_fetch_as_type(check->proxy, check->sess, NULL,
+ SMP_OPT_DIR_REQ | SMP_OPT_FINAL,
+ connect->port_expr, SMP_T_SINT);
+ if (smp)
+ port = smp->data.u.sint;
+ }
+ if (!port && is_inet_addr(&connect->addr))
+ port = get_host_port(&connect->addr);
+ if (!port && check->port)
+ port = check->port;
+ if (!port && is_inet_addr(&check->addr))
+ port = get_host_port(&check->addr);
+ if (!port) {
+ /* The server MUST exist here */
+ port = s->svc_port;
+ }
+ set_host_port(conn->dst, port);
+ TRACE_DEVEL("set port", CHK_EV_TCPCHK_CONN, check, 0, 0, (size_t[]){port});
+
+ xprt = ((connect->options & TCPCHK_OPT_SSL)
+ ? xprt_get(XPRT_SSL)
+ : ((connect->options & TCPCHK_OPT_DEFAULT_CONNECT) ? check->xprt : xprt_get(XPRT_RAW)));
+
+ if (conn_prepare(conn, proto, xprt) < 0) {
+ TRACE_ERROR("xprt allocation error", CHK_EV_TCPCHK_CONN|CHK_EV_TCPCHK_ERR, check);
+ status = SF_ERR_RESOURCE;
+ goto fail_check;
+ }
+
+ if ((connect->options & TCPCHK_OPT_SOCKS4) && s && (s->flags & SRV_F_SOCKS4_PROXY)) {
+ conn->send_proxy_ofs = 1;
+ conn->flags |= CO_FL_SOCKS4;
+ TRACE_DEVEL("configure SOCKS4 proxy", CHK_EV_TCPCHK_CONN);
+ }
+ else if ((connect->options & TCPCHK_OPT_DEFAULT_CONNECT) && s && s->check.via_socks4 && (s->flags & SRV_F_SOCKS4_PROXY)) {
+ conn->send_proxy_ofs = 1;
+ conn->flags |= CO_FL_SOCKS4;
+ TRACE_DEVEL("configure SOCKS4 proxy", CHK_EV_TCPCHK_CONN);
+ }
+
+ if (connect->options & TCPCHK_OPT_SEND_PROXY) {
+ conn->send_proxy_ofs = 1;
+ conn->flags |= CO_FL_SEND_PROXY;
+ TRACE_DEVEL("configure PROXY protocol", CHK_EV_TCPCHK_CONN, check);
+ }
+ else if ((connect->options & TCPCHK_OPT_DEFAULT_CONNECT) && s && s->check.send_proxy && !(check->state & CHK_ST_AGENT)) {
+ conn->send_proxy_ofs = 1;
+ conn->flags |= CO_FL_SEND_PROXY;
+ TRACE_DEVEL("configure PROXY protocol", CHK_EV_TCPCHK_CONN, check);
+ }
+
+ status = SF_ERR_INTERNAL;
+ if (proto && proto->connect) {
+ int flags = 0;
+
+ if (!next)
+ flags |= CONNECT_DELACK_ALWAYS;
+ if (connect->options & TCPCHK_OPT_HAS_DATA)
+ flags |= (CONNECT_HAS_DATA|CONNECT_DELACK_ALWAYS);
+ status = proto->connect(conn, flags);
+ }
+
+ if (status != SF_ERR_NONE)
+ goto fail_check;
+
+ conn_set_private(conn);
+ conn->ctx = check->sc;
+
+#ifdef USE_OPENSSL
+ if (connect->sni)
+ ssl_sock_set_servername(conn, connect->sni);
+ else if ((connect->options & TCPCHK_OPT_DEFAULT_CONNECT) && s && s->check.sni)
+ ssl_sock_set_servername(conn, s->check.sni);
+
+ if (connect->alpn)
+ ssl_sock_set_alpn(conn, (unsigned char *)connect->alpn, connect->alpn_len);
+ else if ((connect->options & TCPCHK_OPT_DEFAULT_CONNECT) && s && s->check.alpn_str)
+ ssl_sock_set_alpn(conn, (unsigned char *)s->check.alpn_str, s->check.alpn_len);
+#endif
+
+ if (conn_ctrl_ready(conn) && (connect->options & TCPCHK_OPT_LINGER) && !(conn->flags & CO_FL_FDLESS)) {
+ /* Some servers don't like reset on close */
+ HA_ATOMIC_AND(&fdtab[conn->handle.fd].state, ~FD_LINGER_RISK);
+ }
+
+ if (conn_ctrl_ready(conn) && (conn->flags & (CO_FL_SEND_PROXY | CO_FL_SOCKS4))) {
+ if (xprt_add_hs(conn) < 0)
+ status = SF_ERR_RESOURCE;
+ }
+
+ if (conn_xprt_start(conn) < 0) {
+ status = SF_ERR_RESOURCE;
+ goto fail_check;
+ }
+
+ /* The mux may be initialized now if there isn't server attached to the
+ * check (email alerts) or if there is a mux proto specified or if there
+ * is no alpn.
+ */
+ if (!s || ((connect->options & TCPCHK_OPT_DEFAULT_CONNECT) && check->mux_proto) ||
+ connect->mux_proto || (!connect->alpn && !check->alpn_str)) {
+ const struct mux_ops *mux_ops;
+
+ TRACE_DEVEL("try to install mux now", CHK_EV_TCPCHK_CONN, check);
+ if (connect->mux_proto)
+ mux_ops = connect->mux_proto->mux;
+ else if ((connect->options & TCPCHK_OPT_DEFAULT_CONNECT) && check->mux_proto)
+ mux_ops = check->mux_proto->mux;
+ else {
+ int mode = ((check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK
+ ? PROTO_MODE_HTTP
+ : PROTO_MODE_TCP);
+
+ mux_ops = conn_get_best_mux(conn, IST_NULL, PROTO_SIDE_BE, mode);
+ }
+ if (mux_ops && conn_install_mux(conn, mux_ops, check->sc, proxy, check->sess) < 0) {
+ TRACE_ERROR("failed to install mux", CHK_EV_TCPCHK_CONN|CHK_EV_TCPCHK_ERR, check);
+ status = SF_ERR_INTERNAL;
+ goto fail_check;
+ }
+ }
+
+ fail_check:
+ /* It can return one of :
+ * - SF_ERR_NONE if everything's OK
+ * - SF_ERR_SRVTO if there are no more servers
+ * - SF_ERR_SRVCL if the connection was refused by the server
+ * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
+ * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
+ * - SF_ERR_INTERNAL for any other purely internal errors
+ * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
+ * Note that we try to prevent the network stack from sending the ACK during the
+ * connect() when a pure TCP check is used (without PROXY protocol).
+ */
+ switch (status) {
+ case SF_ERR_NONE:
+ /* we allow up to min(inter, timeout.connect) for a connection
+ * to establish but only when timeout.check is set as it may be
+ * to short for a full check otherwise
+ */
+ t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
+
+ if (proxy->timeout.check && proxy->timeout.connect) {
+ int t_con = tick_add(now_ms, proxy->timeout.connect);
+ t->expire = tick_first(t->expire, t_con);
+ }
+ break;
+ case SF_ERR_SRVTO: /* ETIMEDOUT */
+ case SF_ERR_SRVCL: /* ECONNREFUSED, ENETUNREACH, ... */
+ case SF_ERR_PRXCOND:
+ case SF_ERR_RESOURCE:
+ case SF_ERR_INTERNAL:
+ TRACE_ERROR("report connection error", CHK_EV_TCPCHK_CONN|CHK_EV_TCPCHK_ERR, check, 0, 0, (size_t[]){status});
+ chk_report_conn_err(check, errno, 0);
+ ret = TCPCHK_EVAL_STOP;
+ goto out;
+ }
+
+ /* don't do anything until the connection is established */
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ if (conn->mux) {
+ if (next && next->action == TCPCHK_ACT_SEND)
+ conn->mux->subscribe(check->sc, SUB_RETRY_SEND, &check->sc->wait_event);
+ else
+ conn->mux->subscribe(check->sc, SUB_RETRY_RECV, &check->sc->wait_event);
+ }
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("not connected yet", CHK_EV_TCPCHK_CONN, check);
+ goto out;
+ }
+
+ out:
+ if (conn && check->result == CHK_RES_FAILED) {
+ conn->flags |= CO_FL_ERROR;
+ TRACE_ERROR("connect failed, report connection error", CHK_EV_TCPCHK_CONN|CHK_EV_TCPCHK_ERR, check);
+ }
+
+ if (ret == TCPCHK_EVAL_CONTINUE && check->proxy->timeout.check)
+ check->task->expire = tick_add_ifset(now_ms, check->proxy->timeout.check);
+
+ TRACE_LEAVE(CHK_EV_TCPCHK_CONN, check, 0, 0, (size_t[]){ret});
+ return ret;
+}
+
+/* Evaluates a TCPCHK_ACT_SEND rule. Returns TCPCHK_EVAL_WAIT if outgoing data
+ * were not fully sent, TCPCHK_EVAL_CONTINUE to evaluate the next rule or
+ * TCPCHK_EVAL_STOP if an error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_eval_send(struct check *check, struct tcpcheck_rule *rule)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ struct tcpcheck_send *send = &rule->send;
+ struct stconn *sc = check->sc;
+ struct connection *conn = __sc_conn(sc);
+ struct buffer *tmp = NULL;
+ struct htx *htx = NULL;
+ int connection_hdr = 0;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA, check);
+
+ if (check->state & CHK_ST_OUT_ALLOC) {
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_STATE("waiting for output buffer allocation", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA|CHK_EV_TX_BLK, check);
+ goto out;
+ }
+
+ if (!check_get_buf(check, &check->bo)) {
+ check->state |= CHK_ST_OUT_ALLOC;
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_STATE("waiting for output buffer allocation", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA|CHK_EV_TX_BLK, check);
+ goto out;
+ }
+
+ /* Data already pending in the output buffer, send them now */
+ if ((IS_HTX_CONN(conn) && !htx_is_empty(htxbuf(&check->bo))) || (!IS_HTX_CONN(conn) && b_data(&check->bo))) {
+ TRACE_DEVEL("Data still pending, try to send it now", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA, check);
+ goto do_send;
+ }
+
+ /* Always release input buffer when a new send is evaluated */
+ check_release_buf(check, &check->bi);
+
+ switch (send->type) {
+ case TCPCHK_SEND_STRING:
+ case TCPCHK_SEND_BINARY:
+ if (istlen(send->data) >= b_size(&check->bo)) {
+ chunk_printf(&trash, "tcp-check send : string too large (%u) for buffer size (%u) at step %d",
+ (unsigned int)istlen(send->data), (unsigned int)b_size(&check->bo),
+ tcpcheck_get_step_id(check, rule));
+ set_server_check_status(check, HCHK_STATUS_L7RSP, trash.area);
+ ret = TCPCHK_EVAL_STOP;
+ goto out;
+ }
+ b_putist(&check->bo, send->data);
+ break;
+ case TCPCHK_SEND_STRING_LF:
+ check->bo.data = sess_build_logline(check->sess, NULL, b_orig(&check->bo), b_size(&check->bo), &rule->send.fmt);
+ if (!b_data(&check->bo))
+ goto out;
+ break;
+ case TCPCHK_SEND_BINARY_LF: {
+ int len = b_size(&check->bo);
+
+ tmp = alloc_trash_chunk();
+ if (!tmp)
+ goto error_lf;
+ tmp->data = sess_build_logline(check->sess, NULL, b_orig(tmp), b_size(tmp), &rule->send.fmt);
+ if (!b_data(tmp))
+ goto out;
+ tmp->area[tmp->data] = '\0';
+ if (parse_binary(b_orig(tmp), &check->bo.area, &len, NULL) == 0)
+ goto error_lf;
+ check->bo.data = len;
+ break;
+ }
+ case TCPCHK_SEND_HTTP: {
+ struct htx_sl *sl;
+ struct ist meth, uri, vsn, clen, body;
+ unsigned int slflags = 0;
+
+ tmp = alloc_trash_chunk();
+ if (!tmp)
+ goto error_htx;
+
+ meth = ((send->http.meth.meth == HTTP_METH_OTHER)
+ ? ist2(send->http.meth.str.area, send->http.meth.str.data)
+ : http_known_methods[send->http.meth.meth]);
+ if (send->http.flags & TCPCHK_SND_HTTP_FL_URI_FMT) {
+ tmp->data = sess_build_logline(check->sess, NULL, b_orig(tmp), b_size(tmp), &send->http.uri_fmt);
+ uri = (b_data(tmp) ? ist2(b_orig(tmp), b_data(tmp)) : ist("/"));
+ }
+ else
+ uri = (isttest(send->http.uri) ? send->http.uri : ist("/"));
+ vsn = (isttest(send->http.vsn) ? send->http.vsn : ist("HTTP/1.0"));
+
+ if ((istlen(vsn) == 6 && *(vsn.ptr+5) == '2') ||
+ (istlen(vsn) == 8 && (*(vsn.ptr+5) > '1' || (*(vsn.ptr+5) == '1' && *(vsn.ptr+7) >= '1'))))
+ slflags |= HTX_SL_F_VER_11;
+ slflags |= (HTX_SL_F_XFER_LEN|HTX_SL_F_CLEN);
+ if (!(send->http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) && !isttest(send->http.body))
+ slflags |= HTX_SL_F_BODYLESS;
+
+ htx = htx_from_buf(&check->bo);
+ sl = htx_add_stline(htx, HTX_BLK_REQ_SL, slflags, meth, uri, vsn);
+ if (!sl)
+ goto error_htx;
+ sl->info.req.meth = send->http.meth.meth;
+ if (!http_update_host(htx, sl, uri))
+ goto error_htx;
+
+ if (!LIST_ISEMPTY(&send->http.hdrs)) {
+ struct tcpcheck_http_hdr *hdr;
+ struct ist hdr_value;
+
+ list_for_each_entry(hdr, &send->http.hdrs, list) {
+ chunk_reset(tmp);
+ tmp->data = sess_build_logline(check->sess, NULL, b_orig(tmp), b_size(tmp), &hdr->value);
+ if (!b_data(tmp))
+ continue;
+ hdr_value = ist2(b_orig(tmp), b_data(tmp));
+ if (!htx_add_header(htx, hdr->name, hdr_value))
+ goto error_htx;
+ if ((sl->flags & HTX_SL_F_HAS_AUTHORITY) && isteqi(hdr->name, ist("host"))) {
+ if (!http_update_authority(htx, sl, hdr_value))
+ goto error_htx;
+ }
+ if (isteqi(hdr->name, ist("connection")))
+ connection_hdr = 1;
+ }
+
+ }
+ if (check->proxy->options2 & PR_O2_CHK_SNDST) {
+ chunk_reset(tmp);
+ httpchk_build_status_header(check->server, tmp);
+ if (!htx_add_header(htx, ist("X-Haproxy-Server-State"), ist2(b_orig(tmp), b_data(tmp))))
+ goto error_htx;
+ }
+
+
+ if (send->http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) {
+ chunk_reset(tmp);
+ tmp->data = sess_build_logline(check->sess, NULL, b_orig(tmp), b_size(tmp), &send->http.body_fmt);
+ body = ist2(b_orig(tmp), b_data(tmp));
+ }
+ else
+ body = send->http.body;
+
+ if (!connection_hdr && !htx_add_header(htx, ist("Connection"), ist("close")))
+ goto error_htx;
+
+ if ((send->http.meth.meth != HTTP_METH_OPTIONS &&
+ send->http.meth.meth != HTTP_METH_GET &&
+ send->http.meth.meth != HTTP_METH_HEAD &&
+ send->http.meth.meth != HTTP_METH_DELETE) || istlen(body)) {
+ clen = ist((!istlen(body) ? "0" : ultoa(istlen(body))));
+ if (!htx_add_header(htx, ist("Content-length"), clen))
+ goto error_htx;
+ }
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH) ||
+ (istlen(body) && !htx_add_data_atonce(htx, body)))
+ goto error_htx;
+
+ /* no more data are expected */
+ htx->flags |= HTX_FL_EOM;
+ htx_to_buf(htx, &check->bo);
+ break;
+ }
+ case TCPCHK_SEND_UNDEF:
+ /* Should never happen. */
+ ret = TCPCHK_EVAL_STOP;
+ goto out;
+ };
+
+ do_send:
+ TRACE_DATA("send data", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA, check);
+ if (conn->mux->snd_buf(sc, &check->bo,
+ (IS_HTX_CONN(conn) ? (htxbuf(&check->bo))->data: b_data(&check->bo)), 0) <= 0) {
+ if ((conn->flags & CO_FL_ERROR) || sc_ep_test(sc, SE_FL_ERROR)) {
+ ret = TCPCHK_EVAL_STOP;
+ TRACE_DEVEL("connection error during send", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA|CHK_EV_TX_ERR, check);
+ goto out;
+ }
+ }
+ if ((IS_HTX_CONN(conn) && !htx_is_empty(htxbuf(&check->bo))) || (!IS_HTX_CONN(conn) && b_data(&check->bo))) {
+ conn->mux->subscribe(sc, SUB_RETRY_SEND, &sc->wait_event);
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("data not fully sent, wait", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA, check);
+ goto out;
+ }
+
+ out:
+ free_trash_chunk(tmp);
+ if (!b_data(&check->bo) || ret == TCPCHK_EVAL_STOP)
+ check_release_buf(check, &check->bo);
+
+ TRACE_LEAVE(CHK_EV_TCPCHK_SND, check, 0, 0, (size_t[]){ret});
+ return ret;
+
+ error_htx:
+ if (htx) {
+ htx_reset(htx);
+ htx_to_buf(htx, &check->bo);
+ }
+ chunk_printf(&trash, "tcp-check send : failed to build HTTP request at step %d",
+ tcpcheck_get_step_id(check, rule));
+ TRACE_ERROR("failed to build HTTP request", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA|CHK_EV_TCPCHK_ERR, check);
+ set_server_check_status(check, HCHK_STATUS_L7RSP, trash.area);
+ ret = TCPCHK_EVAL_STOP;
+ goto out;
+
+ error_lf:
+ chunk_printf(&trash, "tcp-check send : failed to build log-format string at step %d",
+ tcpcheck_get_step_id(check, rule));
+ TRACE_ERROR("failed to build log-format string", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA|CHK_EV_TCPCHK_ERR, check);
+ set_server_check_status(check, HCHK_STATUS_L7RSP, trash.area);
+ ret = TCPCHK_EVAL_STOP;
+ goto out;
+
+}
+
+/* Try to receive data before evaluating a tcp-check expect rule. Returns
+ * TCPCHK_EVAL_WAIT if it is already subscribed on receive events or if nothing
+ * was received, TCPCHK_EVAL_CONTINUE to evaluate the expect rule or
+ * TCPCHK_EVAL_STOP if an error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_eval_recv(struct check *check, struct tcpcheck_rule *rule)
+{
+ struct stconn *sc = check->sc;
+ struct connection *conn = __sc_conn(sc);
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ size_t max, read, cur_read = 0;
+ int is_empty;
+ int read_poll = MAX_READ_POLL_LOOPS;
+
+ TRACE_ENTER(CHK_EV_RX_DATA, check);
+
+ if (sc->wait_event.events & SUB_RETRY_RECV) {
+ TRACE_DEVEL("waiting for response", CHK_EV_RX_DATA, check);
+ goto wait_more_data;
+ }
+
+ if (sc_ep_test(sc, SE_FL_EOS))
+ goto end_recv;
+
+ if (check->state & CHK_ST_IN_ALLOC) {
+ TRACE_STATE("waiting for input buffer allocation", CHK_EV_RX_DATA|CHK_EV_RX_BLK, check);
+ goto wait_more_data;
+ }
+
+ if (!check_get_buf(check, &check->bi)) {
+ check->state |= CHK_ST_IN_ALLOC;
+ TRACE_STATE("waiting for input buffer allocation", CHK_EV_RX_DATA|CHK_EV_RX_BLK, check);
+ goto wait_more_data;
+ }
+
+ /* errors on the connection and the stream connector were already checked */
+
+ /* prepare to detect if the mux needs more room */
+ sc_ep_clr(sc, SE_FL_WANT_ROOM);
+
+ while (sc_ep_test(sc, SE_FL_RCV_MORE) ||
+ (!(conn->flags & CO_FL_ERROR) && !sc_ep_test(sc, SE_FL_ERROR | SE_FL_EOS))) {
+ max = (IS_HTX_SC(sc) ? htx_free_space(htxbuf(&check->bi)) : b_room(&check->bi));
+ read = conn->mux->rcv_buf(sc, &check->bi, max, 0);
+ cur_read += read;
+ if (!read ||
+ sc_ep_test(sc, SE_FL_WANT_ROOM) ||
+ (--read_poll <= 0) ||
+ (read < max && read >= global.tune.recv_enough))
+ break;
+ }
+
+ end_recv:
+ is_empty = (IS_HTX_SC(sc) ? htx_is_empty(htxbuf(&check->bi)) : !b_data(&check->bi));
+ if (is_empty && ((conn->flags & CO_FL_ERROR) || sc_ep_test(sc, SE_FL_ERROR))) {
+ /* Report network errors only if we got no other data. Otherwise
+ * we'll let the upper layers decide whether the response is OK
+ * or not. It is very common that an RST sent by the server is
+ * reported as an error just after the last data chunk.
+ */
+ TRACE_ERROR("connection error during recv", CHK_EV_RX_DATA|CHK_EV_RX_ERR, check);
+ goto stop;
+ }
+ if (!cur_read) {
+ if (sc_ep_test(sc, SE_FL_EOI)) {
+ /* If EOI is set, it means there is a response or an error */
+ goto out;
+ }
+
+ if (!sc_ep_test(sc, SE_FL_WANT_ROOM | SE_FL_ERROR | SE_FL_EOS)) {
+ conn->mux->subscribe(sc, SUB_RETRY_RECV, &sc->wait_event);
+ TRACE_DEVEL("waiting for response", CHK_EV_RX_DATA, check);
+ goto wait_more_data;
+ }
+
+ if (is_empty) {
+ int status;
+
+ chunk_printf(&trash, "TCPCHK got an empty response at step %d",
+ tcpcheck_get_step_id(check, rule));
+ if (rule->comment)
+ chunk_appendf(&trash, " comment: '%s'", rule->comment);
+
+ TRACE_ERROR("empty response", CHK_EV_RX_DATA|CHK_EV_RX_ERR, check);
+ status = ((rule->expect.err_status != HCHK_STATUS_UNKNOWN) ? rule->expect.err_status : HCHK_STATUS_L7RSP);
+ set_server_check_status(check, status, trash.area);
+ goto stop;
+ }
+ }
+ TRACE_DATA("data received", CHK_EV_RX_DATA, check, 0, 0, (size_t[]){cur_read});
+
+ out:
+ if (!b_data(&check->bi) || ret == TCPCHK_EVAL_STOP)
+ check_release_buf(check, &check->bi);
+
+ TRACE_LEAVE(CHK_EV_RX_DATA, check, 0, 0, (size_t[]){ret});
+ return ret;
+
+ stop:
+ ret = TCPCHK_EVAL_STOP;
+ goto out;
+
+ wait_more_data:
+ ret = TCPCHK_EVAL_WAIT;
+ goto out;
+}
+
+/* Evaluates an HTTP TCPCHK_ACT_EXPECT rule. If <last_read> is set , no more data
+ * are expected. Returns TCPCHK_EVAL_WAIT to wait for more data,
+ * TCPCHK_EVAL_CONTINUE to evaluate the next rule or TCPCHK_EVAL_STOP if an
+ * error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_eval_expect_http(struct check *check, struct tcpcheck_rule *rule, int last_read)
+{
+ struct htx *htx = htxbuf(&check->bi);
+ struct htx_sl *sl;
+ struct htx_blk *blk;
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ struct tcpcheck_expect *expect = &rule->expect;
+ struct buffer *msg = NULL, *tmp = NULL, *nbuf = NULL, *vbuf = NULL;
+ enum healthcheck_status status = HCHK_STATUS_L7RSP;
+ struct ist desc = IST_NULL;
+ int i, match, inverse;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_EXP, check);
+
+ last_read |= (!htx_free_data_space(htx) || (htx->flags & HTX_FL_EOM));
+
+ if (htx->flags & HTX_FL_PARSING_ERROR) {
+ TRACE_ERROR("invalid response", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ status = HCHK_STATUS_L7RSP;
+ goto error;
+ }
+
+ if (htx_is_empty(htx)) {
+ if (last_read) {
+ TRACE_ERROR("empty response received", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ status = HCHK_STATUS_L7RSP;
+ goto error;
+ }
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ goto wait_more_data;
+ }
+
+ sl = http_get_stline(htx);
+ check->code = sl->info.res.status;
+
+ if (check->server &&
+ (check->server->proxy->options & PR_O_DISABLE404) &&
+ (check->server->next_state != SRV_ST_STOPPED) &&
+ (check->code == 404)) {
+ /* 404 may be accepted as "stopping" only if the server was up */
+ TRACE_STATE("404 response & disable-404", CHK_EV_TCPCHK_EXP, check);
+ goto out;
+ }
+
+ inverse = !!(expect->flags & TCPCHK_EXPT_FL_INV);
+ /* Make GCC happy ; initialize match to a failure state. */
+ match = inverse;
+ status = expect->err_status;
+
+ switch (expect->type) {
+ case TCPCHK_EXPECT_HTTP_STATUS:
+ match = 0;
+ for (i = 0; i < expect->codes.num; i++) {
+ if (sl->info.res.status >= expect->codes.codes[i][0] &&
+ sl->info.res.status <= expect->codes.codes[i][1]) {
+ match = 1;
+ break;
+ }
+ }
+
+ /* Set status and description in case of error */
+ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7STS);
+ if (LIST_ISEMPTY(&expect->onerror_fmt))
+ desc = htx_sl_res_reason(sl);
+ break;
+ case TCPCHK_EXPECT_HTTP_STATUS_REGEX:
+ match = regex_exec2(expect->regex, HTX_SL_RES_CPTR(sl), HTX_SL_RES_CLEN(sl));
+
+ /* Set status and description in case of error */
+ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7STS);
+ if (LIST_ISEMPTY(&expect->onerror_fmt))
+ desc = htx_sl_res_reason(sl);
+ break;
+
+ case TCPCHK_EXPECT_HTTP_HEADER: {
+ struct http_hdr_ctx ctx;
+ struct ist npat, vpat, value;
+ int full = (expect->flags & (TCPCHK_EXPT_FL_HTTP_HVAL_NONE|TCPCHK_EXPT_FL_HTTP_HVAL_FULL));
+
+ if (expect->flags & TCPCHK_EXPT_FL_HTTP_HNAME_FMT) {
+ nbuf = alloc_trash_chunk();
+ if (!nbuf) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Failed to allocate buffer to eval log-format string");
+ TRACE_ERROR("buffer allocation failure", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ nbuf->data = sess_build_logline(check->sess, NULL, b_orig(nbuf), b_size(nbuf), &expect->hdr.name_fmt);
+ if (!b_data(nbuf)) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("log-format string evaluated to an empty string");
+ TRACE_ERROR("invalid log-format string (hdr name)", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ npat = ist2(b_orig(nbuf), b_data(nbuf));
+ }
+ else if (!(expect->flags & TCPCHK_EXPT_FL_HTTP_HNAME_REG))
+ npat = expect->hdr.name;
+
+ if (expect->flags & TCPCHK_EXPT_FL_HTTP_HVAL_FMT) {
+ vbuf = alloc_trash_chunk();
+ if (!vbuf) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Failed to allocate buffer to eval log-format string");
+ TRACE_ERROR("buffer allocation failure", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ vbuf->data = sess_build_logline(check->sess, NULL, b_orig(vbuf), b_size(vbuf), &expect->hdr.value_fmt);
+ if (!b_data(vbuf)) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("log-format string evaluated to an empty string");
+ TRACE_ERROR("invalid log-format string (hdr value)", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ vpat = ist2(b_orig(vbuf), b_data(vbuf));
+ }
+ else if (!(expect->flags & TCPCHK_EXPT_FL_HTTP_HVAL_REG))
+ vpat = expect->hdr.value;
+
+ match = 0;
+ ctx.blk = NULL;
+ while (1) {
+ switch (expect->flags & TCPCHK_EXPT_FL_HTTP_HNAME_TYPE) {
+ case TCPCHK_EXPT_FL_HTTP_HNAME_STR:
+ if (!http_find_str_header(htx, npat, &ctx, full))
+ goto end_of_match;
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HNAME_BEG:
+ if (!http_find_pfx_header(htx, npat, &ctx, full))
+ goto end_of_match;
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HNAME_END:
+ if (!http_find_sfx_header(htx, npat, &ctx, full))
+ goto end_of_match;
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HNAME_SUB:
+ if (!http_find_sub_header(htx, npat, &ctx, full))
+ goto end_of_match;
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HNAME_REG:
+ if (!http_match_header(htx, expect->hdr.name_re, &ctx, full))
+ goto end_of_match;
+ break;
+ default:
+ /* should never happen */
+ goto end_of_match;
+ }
+
+ /* A header has matched the name pattern, let's test its
+ * value now (always defined from there). If there is no
+ * value pattern, it is a good match.
+ */
+
+ if (expect->flags & TCPCHK_EXPT_FL_HTTP_HVAL_NONE) {
+ match = 1;
+ goto end_of_match;
+ }
+
+ value = ctx.value;
+ switch (expect->flags & TCPCHK_EXPT_FL_HTTP_HVAL_TYPE) {
+ case TCPCHK_EXPT_FL_HTTP_HVAL_STR:
+ if (isteq(value, vpat)) {
+ match = 1;
+ goto end_of_match;
+ }
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HVAL_BEG:
+ if (istlen(value) < istlen(vpat))
+ break;
+ value = ist2(istptr(value), istlen(vpat));
+ if (isteq(value, vpat)) {
+ match = 1;
+ goto end_of_match;
+ }
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HVAL_END:
+ if (istlen(value) < istlen(vpat))
+ break;
+ value = ist2(istend(value) - istlen(vpat), istlen(vpat));
+ if (isteq(value, vpat)) {
+ match = 1;
+ goto end_of_match;
+ }
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HVAL_SUB:
+ if (isttest(istist(value, vpat))) {
+ match = 1;
+ goto end_of_match;
+ }
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HVAL_REG:
+ if (regex_exec2(expect->hdr.value_re, istptr(value), istlen(value))) {
+ match = 1;
+ goto end_of_match;
+ }
+ break;
+ }
+ }
+
+ end_of_match:
+ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7STS);
+ if (LIST_ISEMPTY(&expect->onerror_fmt))
+ desc = htx_sl_res_reason(sl);
+ break;
+ }
+
+ case TCPCHK_EXPECT_HTTP_BODY:
+ case TCPCHK_EXPECT_HTTP_BODY_REGEX:
+ case TCPCHK_EXPECT_HTTP_BODY_LF:
+ match = 0;
+ chunk_reset(&trash);
+ for (blk = htx_get_head_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA) {
+ if (!chunk_istcat(&trash, htx_get_blk_value(htx, blk)))
+ break;
+ }
+ }
+
+ if (!b_data(&trash)) {
+ if (!last_read) {
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ goto wait_more_data;
+ }
+ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7RSP);
+ if (LIST_ISEMPTY(&expect->onerror_fmt))
+ desc = ist("HTTP content check could not find a response body");
+ TRACE_ERROR("no response boduy found while expected", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+
+ if (expect->type == TCPCHK_EXPECT_HTTP_BODY_LF) {
+ tmp = alloc_trash_chunk();
+ if (!tmp) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Failed to allocate buffer to eval log-format string");
+ TRACE_ERROR("buffer allocation failure", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ tmp->data = sess_build_logline(check->sess, NULL, b_orig(tmp), b_size(tmp), &expect->fmt);
+ if (!b_data(tmp)) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("log-format string evaluated to an empty string");
+ TRACE_ERROR("invalid log-format string", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ }
+
+ if (!last_read &&
+ ((expect->type == TCPCHK_EXPECT_HTTP_BODY && b_data(&trash) < istlen(expect->data)) ||
+ ((expect->type == TCPCHK_EXPECT_HTTP_BODY_LF && b_data(&trash) < b_data(tmp))) ||
+ (expect->min_recv > 0 && b_data(&trash) < expect->min_recv))) {
+ ret = TCPCHK_EVAL_WAIT;
+ goto out;
+ }
+
+ if (expect->type ==TCPCHK_EXPECT_HTTP_BODY)
+ match = my_memmem(b_orig(&trash), b_data(&trash), istptr(expect->data), istlen(expect->data)) != NULL;
+ else if (expect->type ==TCPCHK_EXPECT_HTTP_BODY_LF)
+ match = my_memmem(b_orig(&trash), b_data(&trash), b_orig(tmp), b_data(tmp)) != NULL;
+ else
+ match = regex_exec2(expect->regex, b_orig(&trash), b_data(&trash));
+
+ /* Wait for more data on mismatch only if no minimum is defined (-1),
+ * otherwise the absence of match is already conclusive.
+ */
+ if (!match && !last_read && (expect->min_recv == -1)) {
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ goto out;
+ }
+
+ /* Set status and description in case of error */
+ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7RSP);
+ if (LIST_ISEMPTY(&expect->onerror_fmt))
+ desc = (inverse
+ ? ist("HTTP check matched unwanted content")
+ : ist("HTTP content check did not match"));
+ break;
+
+
+ default:
+ /* should never happen */
+ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7RSP);
+ goto error;
+ }
+
+ if (!(match ^ inverse)) {
+ TRACE_STATE("expect rule failed", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+
+ TRACE_STATE("expect rule succeeded", CHK_EV_TCPCHK_EXP, check);
+
+ out:
+ free_trash_chunk(tmp);
+ free_trash_chunk(nbuf);
+ free_trash_chunk(vbuf);
+ free_trash_chunk(msg);
+ TRACE_LEAVE(CHK_EV_TCPCHK_EXP, check, 0, 0, (size_t[]){ret});
+ return ret;
+
+ error:
+ TRACE_STATE("expect rule failed", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ ret = TCPCHK_EVAL_STOP;
+ msg = alloc_trash_chunk();
+ if (msg)
+ tcpcheck_expect_onerror_message(msg, check, rule, 0, desc);
+ set_server_check_status(check, status, (msg ? b_head(msg) : NULL));
+ goto out;
+
+ wait_more_data:
+ ret = TCPCHK_EVAL_WAIT;
+ goto out;
+}
+
+/* Evaluates a TCP TCPCHK_ACT_EXPECT rule. Returns TCPCHK_EVAL_WAIT to wait for
+ * more data, TCPCHK_EVAL_CONTINUE to evaluate the next rule or TCPCHK_EVAL_STOP
+ * if an error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_eval_expect(struct check *check, struct tcpcheck_rule *rule, int last_read)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ struct tcpcheck_expect *expect = &rule->expect;
+ struct buffer *msg = NULL, *tmp = NULL;
+ struct ist desc = IST_NULL;
+ enum healthcheck_status status;
+ int match, inverse;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_EXP, check);
+
+ last_read |= b_full(&check->bi);
+
+ /* The current expect might need more data than the previous one, check again
+ * that the minimum amount data required to match is respected.
+ */
+ if (!last_read) {
+ if ((expect->type == TCPCHK_EXPECT_STRING || expect->type == TCPCHK_EXPECT_BINARY) &&
+ (b_data(&check->bi) < istlen(expect->data))) {
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ goto out;
+ }
+ if (expect->min_recv > 0 && (b_data(&check->bi) < expect->min_recv)) {
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ goto out;
+ }
+ }
+
+ inverse = !!(expect->flags & TCPCHK_EXPT_FL_INV);
+ /* Make GCC happy ; initialize match to a failure state. */
+ match = inverse;
+ status = ((expect->err_status != HCHK_STATUS_UNKNOWN) ? expect->err_status : HCHK_STATUS_L7RSP);
+
+ switch (expect->type) {
+ case TCPCHK_EXPECT_STRING:
+ case TCPCHK_EXPECT_BINARY:
+ match = my_memmem(b_head(&check->bi), b_data(&check->bi), istptr(expect->data), istlen(expect->data)) != NULL;
+ break;
+ case TCPCHK_EXPECT_STRING_REGEX:
+ match = regex_exec2(expect->regex, b_head(&check->bi), MIN(b_data(&check->bi), b_size(&check->bi)-1));
+ break;
+
+ case TCPCHK_EXPECT_BINARY_REGEX:
+ chunk_reset(&trash);
+ dump_binary(&trash, b_head(&check->bi), b_data(&check->bi));
+ match = regex_exec2(expect->regex, b_head(&trash), MIN(b_data(&trash), b_size(&trash)-1));
+ break;
+
+ case TCPCHK_EXPECT_STRING_LF:
+ case TCPCHK_EXPECT_BINARY_LF:
+ match = 0;
+ tmp = alloc_trash_chunk();
+ if (!tmp) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Failed to allocate buffer to eval format string");
+ TRACE_ERROR("buffer allocation failure", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ tmp->data = sess_build_logline(check->sess, NULL, b_orig(tmp), b_size(tmp), &expect->fmt);
+ if (!b_data(tmp)) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("log-format string evaluated to an empty string");
+ TRACE_ERROR("invalid log-format string", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ if (expect->type == TCPCHK_EXPECT_BINARY_LF) {
+ int len = tmp->data;
+ if (parse_binary(b_orig(tmp), &tmp->area, &len, NULL) == 0) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Failed to parse hexastring resulting of eval of a log-format string");
+ TRACE_ERROR("invalid binary log-format string", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ tmp->data = len;
+ }
+ if (b_data(&check->bi) < tmp->data) {
+ if (!last_read) {
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ goto out;
+ }
+ break;
+ }
+ match = my_memmem(b_head(&check->bi), b_data(&check->bi), b_orig(tmp), b_data(tmp)) != NULL;
+ break;
+
+ case TCPCHK_EXPECT_CUSTOM:
+ if (expect->custom)
+ ret = expect->custom(check, rule, last_read);
+ goto out;
+ default:
+ /* Should never happen. */
+ ret = TCPCHK_EVAL_STOP;
+ goto out;
+ }
+
+
+ /* Wait for more data on mismatch only if no minimum is defined (-1),
+ * otherwise the absence of match is already conclusive.
+ */
+ if (!match && !last_read && (expect->min_recv == -1)) {
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ goto out;
+ }
+
+ /* Result as expected, next rule. */
+ if (match ^ inverse) {
+ TRACE_STATE("expect rule succeeded", CHK_EV_TCPCHK_EXP, check);
+ goto out;
+ }
+
+ error:
+ /* From this point on, we matched something we did not want, this is an error state. */
+ TRACE_STATE("expect rule failed", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ ret = TCPCHK_EVAL_STOP;
+ msg = alloc_trash_chunk();
+ if (msg)
+ tcpcheck_expect_onerror_message(msg, check, rule, match, desc);
+ set_server_check_status(check, status, (msg ? b_head(msg) : NULL));
+ free_trash_chunk(msg);
+
+ out:
+ free_trash_chunk(tmp);
+ TRACE_LEAVE(CHK_EV_TCPCHK_EXP, check, 0, 0, (size_t[]){ret});
+ return ret;
+}
+
+/* Evaluates a TCPCHK_ACT_ACTION_KW rule. Returns TCPCHK_EVAL_CONTINUE to
+ * evaluate the next rule or TCPCHK_EVAL_STOP if an error occurred. It never
+ * waits.
+ */
+enum tcpcheck_eval_ret tcpcheck_eval_action_kw(struct check *check, struct tcpcheck_rule *rule)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ struct act_rule *act_rule;
+ enum act_return act_ret;
+
+ act_rule =rule->action_kw.rule;
+ act_ret = act_rule->action_ptr(act_rule, check->proxy, check->sess, NULL, 0);
+ if (act_ret != ACT_RET_CONT) {
+ chunk_printf(&trash, "TCPCHK ACTION unexpected result at step %d\n",
+ tcpcheck_get_step_id(check, rule));
+ set_server_check_status(check, HCHK_STATUS_L7RSP, trash.area);
+ ret = TCPCHK_EVAL_STOP;
+ }
+
+ return ret;
+}
+
+/* Executes a tcp-check ruleset. Note that this is called both from the
+ * connection's wake() callback and from the check scheduling task. It returns
+ * 0 on normal cases, or <0 if a close() has happened on an existing connection,
+ * presenting the risk of an fd replacement.
+ *
+ * Please do NOT place any return statement in this function and only leave
+ * via the out_end_tcpcheck label after setting retcode.
+ */
+int tcpcheck_main(struct check *check)
+{
+ struct tcpcheck_rule *rule;
+ struct stconn *sc = check->sc;
+ struct connection *conn = sc_conn(sc);
+ int must_read = 1, last_read = 0;
+ int retcode = 0;
+ enum tcpcheck_eval_ret eval_ret;
+
+ /* here, we know that the check is complete or that it failed */
+ if (check->result != CHK_RES_UNKNOWN)
+ goto out;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_EVAL, check);
+
+ /* Note: the stream connector and the connection may only be undefined before
+ * the first rule evaluation (it is always a connect rule) or when the
+ * stream connector allocation failed on a connect rule, during sc allocation.
+ */
+
+ /* 1- check for connection error, if any */
+ if ((conn && conn->flags & CO_FL_ERROR) || sc_ep_test(sc, SE_FL_ERROR))
+ goto out_end_tcpcheck;
+
+ /* 2- check if a rule must be resume. It happens if check->current_step
+ * is defined. */
+ else if (check->current_step) {
+ rule = check->current_step;
+ TRACE_PROTO("resume rule evaluation", CHK_EV_TCPCHK_EVAL, check, 0, 0, (size_t[]){ tcpcheck_get_step_id(check, rule)});
+ }
+
+ /* 3- It is the first evaluation. We must create a session and preset
+ * tcp-check variables */
+ else {
+ struct tcpcheck_var *var;
+
+ /* First evaluation, create a session */
+ check->sess = session_new(&checks_fe, NULL, &check->obj_type);
+ if (!check->sess) {
+ chunk_printf(&trash, "TCPCHK error allocating check session");
+ TRACE_ERROR("session allocation failure", CHK_EV_TCPCHK_EVAL|CHK_EV_TCPCHK_ERR, check);
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, trash.area);
+ goto out_end_tcpcheck;
+ }
+ vars_init_head(&check->vars, SCOPE_CHECK);
+ rule = LIST_NEXT(check->tcpcheck_rules->list, typeof(rule), list);
+
+ /* Preset tcp-check variables */
+ list_for_each_entry(var, &check->tcpcheck_rules->preset_vars, list) {
+ struct sample smp;
+
+ memset(&smp, 0, sizeof(smp));
+ smp_set_owner(&smp, check->proxy, check->sess, NULL, SMP_OPT_FINAL);
+ smp.data = var->data;
+ vars_set_by_name_ifexist(istptr(var->name), istlen(var->name), &smp);
+ }
+ TRACE_PROTO("start rules evaluation", CHK_EV_TCPCHK_EVAL, check);
+ }
+
+ /* Now evaluate the tcp-check rules */
+
+ list_for_each_entry_from(rule, check->tcpcheck_rules->list, list) {
+ check->code = 0;
+ switch (rule->action) {
+ case TCPCHK_ACT_CONNECT:
+ /* Not the first connection, release it first */
+ if (sc_conn(sc) && check->current_step != rule) {
+ check->state |= CHK_ST_CLOSE_CONN;
+ retcode = -1;
+ }
+
+ check->current_step = rule;
+
+ /* We are still waiting the connection gets closed */
+ if (check->state & CHK_ST_CLOSE_CONN) {
+ TRACE_DEVEL("wait previous connection closure", CHK_EV_TCPCHK_EVAL|CHK_EV_TCPCHK_CONN, check);
+ eval_ret = TCPCHK_EVAL_WAIT;
+ break;
+ }
+
+ TRACE_PROTO("eval connect rule", CHK_EV_TCPCHK_EVAL|CHK_EV_TCPCHK_CONN, check);
+ eval_ret = tcpcheck_eval_connect(check, rule);
+
+ /* Refresh connection */
+ conn = sc_conn(sc);
+ last_read = 0;
+ must_read = (IS_HTX_SC(sc) ? htx_is_empty(htxbuf(&check->bi)) : !b_data(&check->bi));
+ break;
+ case TCPCHK_ACT_SEND:
+ check->current_step = rule;
+ TRACE_PROTO("eval send rule", CHK_EV_TCPCHK_EVAL|CHK_EV_TCPCHK_SND, check);
+ eval_ret = tcpcheck_eval_send(check, rule);
+ must_read = 1;
+ break;
+ case TCPCHK_ACT_EXPECT:
+ check->current_step = rule;
+ TRACE_PROTO("eval expect rule", CHK_EV_TCPCHK_EVAL|CHK_EV_TCPCHK_EXP, check);
+ if (must_read) {
+ eval_ret = tcpcheck_eval_recv(check, rule);
+ if (eval_ret == TCPCHK_EVAL_STOP)
+ goto out_end_tcpcheck;
+ else if (eval_ret == TCPCHK_EVAL_WAIT)
+ goto out;
+ last_read = ((conn->flags & CO_FL_ERROR) || sc_ep_test(sc, SE_FL_ERROR | SE_FL_EOS));
+ must_read = 0;
+ }
+
+ eval_ret = ((check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK
+ ? tcpcheck_eval_expect_http(check, rule, last_read)
+ : tcpcheck_eval_expect(check, rule, last_read));
+
+ if (eval_ret == TCPCHK_EVAL_WAIT) {
+ check->current_step = rule->expect.head;
+ if (!(sc->wait_event.events & SUB_RETRY_RECV))
+ conn->mux->subscribe(sc, SUB_RETRY_RECV, &sc->wait_event);
+ }
+ break;
+ case TCPCHK_ACT_ACTION_KW:
+ /* Don't update the current step */
+ TRACE_PROTO("eval action kw rule", CHK_EV_TCPCHK_EVAL|CHK_EV_TCPCHK_ACT, check);
+ eval_ret = tcpcheck_eval_action_kw(check, rule);
+ break;
+ default:
+ /* Otherwise, just go to the next one and don't update
+ * the current step
+ */
+ eval_ret = TCPCHK_EVAL_CONTINUE;
+ break;
+ }
+
+ switch (eval_ret) {
+ case TCPCHK_EVAL_CONTINUE:
+ break;
+ case TCPCHK_EVAL_WAIT:
+ goto out;
+ case TCPCHK_EVAL_STOP:
+ goto out_end_tcpcheck;
+ }
+ }
+
+ /* All rules was evaluated */
+ if (check->current_step) {
+ rule = check->current_step;
+
+ TRACE_DEVEL("eval tcp-check result", CHK_EV_TCPCHK_EVAL, check);
+
+ if (rule->action == TCPCHK_ACT_EXPECT) {
+ struct buffer *msg;
+ enum healthcheck_status status;
+
+ if (check->server &&
+ (check->server->proxy->options & PR_O_DISABLE404) &&
+ (check->server->next_state != SRV_ST_STOPPED) &&
+ (check->code == 404)) {
+ set_server_check_status(check, HCHK_STATUS_L7OKCD, NULL);
+ TRACE_PROTO("tcp-check conditionally passed (disable-404)", CHK_EV_TCPCHK_EVAL, check);
+ goto out_end_tcpcheck;
+ }
+
+ msg = alloc_trash_chunk();
+ if (msg)
+ tcpcheck_expect_onsuccess_message(msg, check, rule, IST_NULL);
+ status = ((rule->expect.ok_status != HCHK_STATUS_UNKNOWN) ? rule->expect.ok_status : HCHK_STATUS_L7OKD);
+ set_server_check_status(check, status, (msg ? b_head(msg) : "(tcp-check)"));
+ free_trash_chunk(msg);
+ }
+ else if (rule->action == TCPCHK_ACT_CONNECT) {
+ const char *msg = ((rule->connect.options & TCPCHK_OPT_IMPLICIT) ? NULL : "(tcp-check)");
+ enum healthcheck_status status = HCHK_STATUS_L4OK;
+#ifdef USE_OPENSSL
+ if (conn_is_ssl(conn))
+ status = HCHK_STATUS_L6OK;
+#endif
+ set_server_check_status(check, status, msg);
+ }
+ else
+ set_server_check_status(check, HCHK_STATUS_L7OKD, "(tcp-check)");
+ }
+ else {
+ set_server_check_status(check, HCHK_STATUS_L7OKD, "(tcp-check)");
+ }
+ TRACE_PROTO("tcp-check passed", CHK_EV_TCPCHK_EVAL, check);
+
+ out_end_tcpcheck:
+ if ((conn && conn->flags & CO_FL_ERROR) || sc_ep_test(sc, SE_FL_ERROR)) {
+ TRACE_ERROR("report connection error", CHK_EV_TCPCHK_EVAL|CHK_EV_TCPCHK_ERR, check);
+ chk_report_conn_err(check, errno, 0);
+ }
+
+ /* the tcpcheck is finished, release in/out buffer now */
+ check_release_buf(check, &check->bi);
+ check_release_buf(check, &check->bo);
+
+ out:
+ TRACE_LEAVE(CHK_EV_HCHK_RUN, check);
+ return retcode;
+}
+
+void tcp_check_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&tcp_check_keywords.list, &kw_list->list);
+}
+
+/**************************************************************************/
+/******************* Internals to parse tcp-check rules *******************/
+/**************************************************************************/
+struct action_kw_list tcp_check_keywords = {
+ .list = LIST_HEAD_INIT(tcp_check_keywords.list),
+};
+
+/* Creates a tcp-check rule resulting from parsing a custom keyword. NULL is
+ * returned on error.
+ */
+struct tcpcheck_rule *parse_tcpcheck_action(char **args, int cur_arg, struct proxy *px,
+ struct list *rules, struct action_kw *kw,
+ const char *file, int line, char **errmsg)
+{
+ struct tcpcheck_rule *chk = NULL;
+ struct act_rule *actrule = NULL;
+
+ actrule = new_act_rule(ACT_F_TCP_CHK, file, line);
+ if (!actrule) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ actrule->kw = kw;
+
+ cur_arg++;
+ if (kw->parse((const char **)args, &cur_arg, px, actrule, errmsg) == ACT_RET_PRS_ERR) {
+ memprintf(errmsg, "'%s' : %s", kw->kw, *errmsg);
+ goto error;
+ }
+
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->action = TCPCHK_ACT_ACTION_KW;
+ chk->action_kw.rule = actrule;
+ return chk;
+
+ error:
+ free(actrule);
+ return NULL;
+}
+
+/* Parses and creates a tcp-check connect or an http-check connect rule. NULL is
+ * returned on error.
+ */
+struct tcpcheck_rule *parse_tcpcheck_connect(char **args, int cur_arg, struct proxy *px, struct list *rules,
+ const char *file, int line, char **errmsg)
+{
+ struct tcpcheck_rule *chk = NULL;
+ struct sockaddr_storage *sk = NULL;
+ char *comment = NULL, *sni = NULL, *alpn = NULL;
+ struct sample_expr *port_expr = NULL;
+ const struct mux_proto_list *mux_proto = NULL;
+ unsigned short conn_opts = 0;
+ long port = 0;
+ int alpn_len = 0;
+
+ list_for_each_entry(chk, rules, list) {
+ if (chk->action == TCPCHK_ACT_CONNECT)
+ break;
+ if (chk->action == TCPCHK_ACT_COMMENT ||
+ chk->action == TCPCHK_ACT_ACTION_KW ||
+ (chk->action == TCPCHK_ACT_SEND && (chk->send.http.flags & TCPCHK_SND_HTTP_FROM_OPT)))
+ continue;
+
+ memprintf(errmsg, "first step MUST also be a 'connect', "
+ "optionally preceded by a 'set-var', an 'unset-var' or a 'comment', "
+ "when there is a 'connect' step in the tcp-check ruleset");
+ goto error;
+ }
+
+ cur_arg++;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "default") == 0)
+ conn_opts |= TCPCHK_OPT_DEFAULT_CONNECT;
+ else if (strcmp(args[cur_arg], "addr") == 0) {
+ int port1, port2;
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects <ipv4|ipv6> as argument.", args[cur_arg]);
+ goto error;
+ }
+
+ sk = str2sa_range(args[cur_arg+1], NULL, &port1, &port2, NULL, NULL,
+ errmsg, NULL, NULL, PA_O_RESOLVE | PA_O_PORT_OK | PA_O_STREAM | PA_O_CONNECT);
+ if (!sk) {
+ memprintf(errmsg, "'%s' : %s.", args[cur_arg], *errmsg);
+ goto error;
+ }
+
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "port") == 0) {
+ const char *p, *end;
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a port number or a sample expression as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+
+ port = 0;
+ release_sample_expr(port_expr);
+ p = args[cur_arg]; end = p + strlen(p);
+ port = read_uint(&p, end);
+ if (p != end) {
+ int idx = 0;
+
+ px->conf.args.ctx = ARGC_SRV;
+ port_expr = sample_parse_expr((char *[]){args[cur_arg], NULL}, &idx,
+ file, line, errmsg, &px->conf.args, NULL);
+
+ if (!port_expr) {
+ memprintf(errmsg, "error detected while parsing port expression : %s", *errmsg);
+ goto error;
+ }
+ if (!(port_expr->fetch->val & SMP_VAL_BE_CHK_RUL)) {
+ memprintf(errmsg, "error detected while parsing port expression : "
+ " fetch method '%s' extracts information from '%s', "
+ "none of which is available here.\n",
+ args[cur_arg], sample_src_names(port_expr->fetch->use));
+ goto error;
+ }
+ px->http_needed |= !!(port_expr->fetch->use & SMP_USE_HTTP_ANY);
+ }
+ else if (port > 65535 || port < 1) {
+ memprintf(errmsg, "expects a valid TCP port (from range 1 to 65535) or a sample expression, got %s.",
+ args[cur_arg]);
+ goto error;
+ }
+ }
+ else if (strcmp(args[cur_arg], "proto") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a MUX protocol as argument.", args[cur_arg]);
+ goto error;
+ }
+ mux_proto = get_mux_proto(ist(args[cur_arg + 1]));
+ if (!mux_proto) {
+ memprintf(errmsg, "'%s' : unknown MUX protocol '%s'.", args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+
+ if (strcmp(args[0], "tcp-check") == 0 && mux_proto->mode != PROTO_MODE_TCP) {
+ memprintf(errmsg, "'%s' : invalid MUX protocol '%s' for tcp-check", args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+ else if (strcmp(args[0], "http-check") == 0 && mux_proto->mode != PROTO_MODE_HTTP) {
+ memprintf(errmsg, "'%s' : invalid MUX protocol '%s' for http-check", args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "comment") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ free(comment);
+ comment = strdup(args[cur_arg]);
+ if (!comment) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ else if (strcmp(args[cur_arg], "send-proxy") == 0)
+ conn_opts |= TCPCHK_OPT_SEND_PROXY;
+ else if (strcmp(args[cur_arg], "via-socks4") == 0)
+ conn_opts |= TCPCHK_OPT_SOCKS4;
+ else if (strcmp(args[cur_arg], "linger") == 0)
+ conn_opts |= TCPCHK_OPT_LINGER;
+#ifdef USE_OPENSSL
+ else if (strcmp(args[cur_arg], "ssl") == 0) {
+ px->options |= PR_O_TCPCHK_SSL;
+ conn_opts |= TCPCHK_OPT_SSL;
+ }
+ else if (strcmp(args[cur_arg], "sni") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ free(sni);
+ sni = strdup(args[cur_arg]);
+ if (!sni) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ else if (strcmp(args[cur_arg], "alpn") == 0) {
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ free(alpn);
+ if (ssl_sock_parse_alpn(args[cur_arg + 1], &alpn, &alpn_len, errmsg)) {
+ memprintf(errmsg, "'%s' : %s", args[cur_arg], *errmsg);
+ goto error;
+ }
+ cur_arg++;
+#else
+ memprintf(errmsg, "'%s' : library does not support TLS ALPN extension.", args[cur_arg]);
+ goto error;
+#endif
+ }
+#endif /* USE_OPENSSL */
+
+ else {
+ memprintf(errmsg, "expects 'comment', 'port', 'addr', 'send-proxy'"
+#ifdef USE_OPENSSL
+ ", 'ssl', 'sni', 'alpn'"
+#endif /* USE_OPENSSL */
+ " or 'via-socks4', 'linger', 'default' but got '%s' as argument.",
+ args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ }
+
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->action = TCPCHK_ACT_CONNECT;
+ chk->comment = comment;
+ chk->connect.port = port;
+ chk->connect.options = conn_opts;
+ chk->connect.sni = sni;
+ chk->connect.alpn = alpn;
+ chk->connect.alpn_len= alpn_len;
+ chk->connect.port_expr= port_expr;
+ chk->connect.mux_proto= mux_proto;
+ if (sk)
+ chk->connect.addr = *sk;
+ return chk;
+
+ error:
+ free(alpn);
+ free(sni);
+ free(comment);
+ release_sample_expr(port_expr);
+ return NULL;
+}
+
+/* Parses and creates a tcp-check send rule. NULL is returned on error */
+struct tcpcheck_rule *parse_tcpcheck_send(char **args, int cur_arg, struct proxy *px, struct list *rules,
+ const char *file, int line, char **errmsg)
+{
+ struct tcpcheck_rule *chk = NULL;
+ char *comment = NULL, *data = NULL;
+ enum tcpcheck_send_type type = TCPCHK_SEND_UNDEF;
+
+ if (strcmp(args[cur_arg], "send-binary-lf") == 0)
+ type = TCPCHK_SEND_BINARY_LF;
+ else if (strcmp(args[cur_arg], "send-binary") == 0)
+ type = TCPCHK_SEND_BINARY;
+ else if (strcmp(args[cur_arg], "send-lf") == 0)
+ type = TCPCHK_SEND_STRING_LF;
+ else if (strcmp(args[cur_arg], "send") == 0)
+ type = TCPCHK_SEND_STRING;
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a %s as argument",
+ (type == TCPCHK_SEND_BINARY ? "binary string": "string"), args[cur_arg]);
+ goto error;
+ }
+
+ data = args[cur_arg+1];
+
+ cur_arg += 2;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "comment") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ free(comment);
+ comment = strdup(args[cur_arg]);
+ if (!comment) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ else {
+ memprintf(errmsg, "expects 'comment' but got '%s' as argument.",
+ args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ }
+
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->action = TCPCHK_ACT_SEND;
+ chk->comment = comment;
+ chk->send.type = type;
+
+ switch (chk->send.type) {
+ case TCPCHK_SEND_STRING:
+ chk->send.data = ist(strdup(data));
+ if (!isttest(chk->send.data)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ break;
+ case TCPCHK_SEND_BINARY: {
+ int len = chk->send.data.len;
+ if (parse_binary(data, &chk->send.data.ptr, &len, errmsg) == 0) {
+ memprintf(errmsg, "'%s' invalid binary string (%s).\n", data, *errmsg);
+ goto error;
+ }
+ chk->send.data.len = len;
+ break;
+ }
+ case TCPCHK_SEND_STRING_LF:
+ case TCPCHK_SEND_BINARY_LF:
+ LIST_INIT(&chk->send.fmt);
+ px->conf.args.ctx = ARGC_SRV;
+ if (!parse_logformat_string(data, px, &chk->send.fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", data, *errmsg);
+ goto error;
+ }
+ break;
+ case TCPCHK_SEND_HTTP:
+ case TCPCHK_SEND_UNDEF:
+ goto error;
+ }
+
+ return chk;
+
+ error:
+ free(chk);
+ free(comment);
+ return NULL;
+}
+
+/* Parses and creates a http-check send rule. NULL is returned on error */
+struct tcpcheck_rule *parse_tcpcheck_send_http(char **args, int cur_arg, struct proxy *px, struct list *rules,
+ const char *file, int line, char **errmsg)
+{
+ struct tcpcheck_rule *chk = NULL;
+ struct tcpcheck_http_hdr *hdr = NULL;
+ struct http_hdr hdrs[global.tune.max_http_hdr];
+ char *meth = NULL, *uri = NULL, *vsn = NULL;
+ char *body = NULL, *comment = NULL;
+ unsigned int flags = 0;
+ int i = 0, host_hdr = -1;
+
+ cur_arg++;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "meth") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ meth = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "uri") == 0 || strcmp(args[cur_arg], "uri-lf") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ flags &= ~TCPCHK_SND_HTTP_FL_URI_FMT;
+ if (strcmp(args[cur_arg], "uri-lf") == 0)
+ flags |= TCPCHK_SND_HTTP_FL_URI_FMT;
+ cur_arg++;
+ uri = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "ver") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ vsn = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "hdr") == 0) {
+ if (!*args[cur_arg+1] || !*args[cur_arg+2]) {
+ memprintf(errmsg, "'%s' expects <name> and <value> as arguments", args[cur_arg]);
+ goto error;
+ }
+
+ if (strcasecmp(args[cur_arg+1], "host") == 0) {
+ if (host_hdr >= 0) {
+ memprintf(errmsg, "'%s' header already defined (previous value is '%s')",
+ args[cur_arg+1], istptr(hdrs[host_hdr].v));
+ goto error;
+ }
+ host_hdr = i;
+ }
+ else if (strcasecmp(args[cur_arg+1], "content-length") == 0 ||
+ strcasecmp(args[cur_arg+1], "transfer-encoding") == 0)
+ goto skip_hdr;
+
+ hdrs[i].n = ist(args[cur_arg + 1]);
+ hdrs[i].v = ist(args[cur_arg + 2]);
+ i++;
+ skip_hdr:
+ cur_arg += 2;
+ }
+ else if (strcmp(args[cur_arg], "body") == 0 || strcmp(args[cur_arg], "body-lf") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ flags &= ~TCPCHK_SND_HTTP_FL_BODY_FMT;
+ if (strcmp(args[cur_arg], "body-lf") == 0)
+ flags |= TCPCHK_SND_HTTP_FL_BODY_FMT;
+ cur_arg++;
+ body = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "comment") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ free(comment);
+ comment = strdup(args[cur_arg]);
+ if (!comment) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ else {
+ memprintf(errmsg, "expects 'comment', 'meth', 'uri', 'uri-lf', 'ver', 'hdr', 'body' or 'body-lf'"
+ " but got '%s' as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ }
+
+ hdrs[i].n = hdrs[i].v = IST_NULL;
+
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->action = TCPCHK_ACT_SEND;
+ chk->comment = comment; comment = NULL;
+ chk->send.type = TCPCHK_SEND_HTTP;
+ chk->send.http.flags = flags;
+ LIST_INIT(&chk->send.http.hdrs);
+
+ if (meth) {
+ chk->send.http.meth.meth = find_http_meth(meth, strlen(meth));
+ chk->send.http.meth.str.area = strdup(meth);
+ chk->send.http.meth.str.data = strlen(meth);
+ if (!chk->send.http.meth.str.area) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ if (uri) {
+ if (chk->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT) {
+ LIST_INIT(&chk->send.http.uri_fmt);
+ px->conf.args.ctx = ARGC_SRV;
+ if (!parse_logformat_string(uri, px, &chk->send.http.uri_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", uri, *errmsg);
+ goto error;
+ }
+ }
+ else {
+ chk->send.http.uri = ist(strdup(uri));
+ if (!isttest(chk->send.http.uri)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ }
+ if (vsn) {
+ chk->send.http.vsn = ist(strdup(vsn));
+ if (!isttest(chk->send.http.vsn)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ for (i = 0; istlen(hdrs[i].n); i++) {
+ hdr = calloc(1, sizeof(*hdr));
+ if (!hdr) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ LIST_INIT(&hdr->value);
+ hdr->name = istdup(hdrs[i].n);
+ if (!isttest(hdr->name)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+
+ ist0(hdrs[i].v);
+ if (!parse_logformat_string(istptr(hdrs[i].v), px, &hdr->value, 0, SMP_VAL_BE_CHK_RUL, errmsg))
+ goto error;
+ LIST_APPEND(&chk->send.http.hdrs, &hdr->list);
+ hdr = NULL;
+ }
+
+ if (body) {
+ if (chk->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) {
+ LIST_INIT(&chk->send.http.body_fmt);
+ px->conf.args.ctx = ARGC_SRV;
+ if (!parse_logformat_string(body, px, &chk->send.http.body_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", body, *errmsg);
+ goto error;
+ }
+ }
+ else {
+ chk->send.http.body = ist(strdup(body));
+ if (!isttest(chk->send.http.body)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ }
+
+ return chk;
+
+ error:
+ free_tcpcheck_http_hdr(hdr);
+ free_tcpcheck(chk, 0);
+ free(comment);
+ return NULL;
+}
+
+/* Parses and creates a http-check comment rule. NULL is returned on error */
+struct tcpcheck_rule *parse_tcpcheck_comment(char **args, int cur_arg, struct proxy *px, struct list *rules,
+ const char *file, int line, char **errmsg)
+{
+ struct tcpcheck_rule *chk = NULL;
+ char *comment = NULL;
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "expects a string as argument");
+ goto error;
+ }
+ cur_arg++;
+ comment = strdup(args[cur_arg]);
+ if (!comment) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->action = TCPCHK_ACT_COMMENT;
+ chk->comment = comment;
+ return chk;
+
+ error:
+ free(comment);
+ return NULL;
+}
+
+/* Parses and creates a tcp-check or an http-check expect rule. NULL is returned
+ * on error. <proto> is set to the right protocol flags (covered by the
+ * TCPCHK_RULES_PROTO_CHK mask).
+ */
+struct tcpcheck_rule *parse_tcpcheck_expect(char **args, int cur_arg, struct proxy *px,
+ struct list *rules, unsigned int proto,
+ const char *file, int line, char **errmsg)
+{
+ struct tcpcheck_rule *prev_check, *chk = NULL;
+ struct sample_expr *status_expr = NULL;
+ char *on_success_msg, *on_error_msg, *comment, *pattern, *npat, *vpat;
+ enum tcpcheck_expect_type type = TCPCHK_EXPECT_UNDEF;
+ enum healthcheck_status ok_st = HCHK_STATUS_UNKNOWN;
+ enum healthcheck_status err_st = HCHK_STATUS_UNKNOWN;
+ enum healthcheck_status tout_st = HCHK_STATUS_UNKNOWN;
+ unsigned int flags = 0;
+ long min_recv = -1;
+ int inverse = 0;
+
+ on_success_msg = on_error_msg = comment = pattern = npat = vpat = NULL;
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "expects at least a matching pattern as arguments");
+ goto error;
+ }
+
+ cur_arg++;
+ while (*(args[cur_arg])) {
+ int in_pattern = 0;
+
+ rescan:
+ if (strcmp(args[cur_arg], "min-recv") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a integer as argument", args[cur_arg]);
+ goto error;
+ }
+ /* Use an signed integer here because of bufsize */
+ cur_arg++;
+ min_recv = atol(args[cur_arg]);
+ if (min_recv < -1 || min_recv > INT_MAX) {
+ memprintf(errmsg, "'%s' expects -1 or an integer from 0 to INT_MAX" , args[cur_arg-1]);
+ goto error;
+ }
+ }
+ else if (*(args[cur_arg]) == '!') {
+ in_pattern = 1;
+ while (*(args[cur_arg]) == '!') {
+ inverse = !inverse;
+ args[cur_arg]++;
+ }
+ if (!*(args[cur_arg]))
+ cur_arg++;
+ goto rescan;
+ }
+ else if (strcmp(args[cur_arg], "string") == 0 || strcmp(args[cur_arg], "rstring") == 0) {
+ if (type != TCPCHK_EXPECT_UNDEF) {
+ memprintf(errmsg, "only on pattern expected");
+ goto error;
+ }
+ if (proto != TCPCHK_RULES_HTTP_CHK)
+ type = ((*(args[cur_arg]) == 's') ? TCPCHK_EXPECT_STRING : TCPCHK_EXPECT_STRING_REGEX);
+ else
+ type = ((*(args[cur_arg]) == 's') ? TCPCHK_EXPECT_HTTP_BODY : TCPCHK_EXPECT_HTTP_BODY_REGEX);
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a <pattern> as argument", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ pattern = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "binary") == 0 || strcmp(args[cur_arg], "rbinary") == 0) {
+ if (proto == TCPCHK_RULES_HTTP_CHK)
+ goto bad_http_kw;
+ if (type != TCPCHK_EXPECT_UNDEF) {
+ memprintf(errmsg, "only on pattern expected");
+ goto error;
+ }
+ type = ((*(args[cur_arg]) == 'b') ? TCPCHK_EXPECT_BINARY : TCPCHK_EXPECT_BINARY_REGEX);
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a <pattern> as argument", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ pattern = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "string-lf") == 0 || strcmp(args[cur_arg], "binary-lf") == 0) {
+ if (type != TCPCHK_EXPECT_UNDEF) {
+ memprintf(errmsg, "only on pattern expected");
+ goto error;
+ }
+ if (proto != TCPCHK_RULES_HTTP_CHK)
+ type = ((*(args[cur_arg]) == 's') ? TCPCHK_EXPECT_STRING_LF : TCPCHK_EXPECT_BINARY_LF);
+ else {
+ if (*(args[cur_arg]) != 's')
+ goto bad_http_kw;
+ type = TCPCHK_EXPECT_HTTP_BODY_LF;
+ }
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a <pattern> as argument", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ pattern = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "status") == 0 || strcmp(args[cur_arg], "rstatus") == 0) {
+ if (proto != TCPCHK_RULES_HTTP_CHK)
+ goto bad_tcp_kw;
+ if (type != TCPCHK_EXPECT_UNDEF) {
+ memprintf(errmsg, "only on pattern expected");
+ goto error;
+ }
+ type = ((*(args[cur_arg]) == 's') ? TCPCHK_EXPECT_HTTP_STATUS : TCPCHK_EXPECT_HTTP_STATUS_REGEX);
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a <pattern> as argument", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ pattern = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "custom") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (type != TCPCHK_EXPECT_UNDEF) {
+ memprintf(errmsg, "only on pattern expected");
+ goto error;
+ }
+ type = TCPCHK_EXPECT_CUSTOM;
+ }
+ else if (strcmp(args[cur_arg], "hdr") == 0 || strcmp(args[cur_arg], "fhdr") == 0) {
+ int orig_arg = cur_arg;
+
+ if (proto != TCPCHK_RULES_HTTP_CHK)
+ goto bad_tcp_kw;
+ if (type != TCPCHK_EXPECT_UNDEF) {
+ memprintf(errmsg, "only on pattern expected");
+ goto error;
+ }
+ type = TCPCHK_EXPECT_HTTP_HEADER;
+
+ if (strcmp(args[cur_arg], "fhdr") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_FULL;
+
+ /* Parse the name pattern, mandatory */
+ if (!*(args[cur_arg+1]) || !*(args[cur_arg+2]) ||
+ (strcmp(args[cur_arg+1], "name") != 0 && strcmp(args[cur_arg+1], "name-lf") != 0)) {
+ memprintf(errmsg, "'%s' expects at the name keyword as first argument followed by a pattern",
+ args[orig_arg]);
+ goto error;
+ }
+
+ if (strcmp(args[cur_arg+1], "name-lf") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HNAME_FMT;
+
+ cur_arg += 2;
+ if (strcmp(args[cur_arg], "-m") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' : '%s' expects at a matching pattern ('str', 'beg', 'end', 'sub' or 'reg')",
+ args[orig_arg], args[cur_arg]);
+ goto error;
+ }
+ if (strcmp(args[cur_arg+1], "str") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HNAME_STR;
+ else if (strcmp(args[cur_arg+1], "beg") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HNAME_BEG;
+ else if (strcmp(args[cur_arg+1], "end") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HNAME_END;
+ else if (strcmp(args[cur_arg+1], "sub") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HNAME_SUB;
+ else if (strcmp(args[cur_arg+1], "reg") == 0) {
+ if (flags & TCPCHK_EXPT_FL_HTTP_HNAME_FMT) {
+ memprintf(errmsg, "'%s': log-format string is not supported with a regex matching method",
+ args[orig_arg]);
+ goto error;
+ }
+ flags |= TCPCHK_EXPT_FL_HTTP_HNAME_REG;
+ }
+ else {
+ memprintf(errmsg, "'%s' : '%s' only supports 'str', 'beg', 'end', 'sub' or 'reg' (got '%s')",
+ args[orig_arg], args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+ cur_arg += 2;
+ }
+ else
+ flags |= TCPCHK_EXPT_FL_HTTP_HNAME_STR;
+ npat = args[cur_arg];
+
+ if (!*(args[cur_arg+1]) ||
+ (strcmp(args[cur_arg+1], "value") != 0 && strcmp(args[cur_arg+1], "value-lf") != 0)) {
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_NONE;
+ goto next;
+ }
+ if (strcmp(args[cur_arg+1], "value-lf") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_FMT;
+
+ /* Parse the value pattern, optional */
+ if (strcmp(args[cur_arg+2], "-m") == 0) {
+ cur_arg += 2;
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' : '%s' expects at a matching pattern ('str', 'beg', 'end', 'sub' or 'reg')",
+ args[orig_arg], args[cur_arg]);
+ goto error;
+ }
+ if (strcmp(args[cur_arg+1], "str") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_STR;
+ else if (strcmp(args[cur_arg+1], "beg") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_BEG;
+ else if (strcmp(args[cur_arg+1], "end") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_END;
+ else if (strcmp(args[cur_arg+1], "sub") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_SUB;
+ else if (strcmp(args[cur_arg+1], "reg") == 0) {
+ if (flags & TCPCHK_EXPT_FL_HTTP_HVAL_FMT) {
+ memprintf(errmsg, "'%s': log-format string is not supported with a regex matching method",
+ args[orig_arg]);
+ goto error;
+ }
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_REG;
+ }
+ else {
+ memprintf(errmsg, "'%s' : '%s' only supports 'str', 'beg', 'end', 'sub' or 'reg' (got '%s')",
+ args[orig_arg], args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+ }
+ else
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_STR;
+
+ if (!*(args[cur_arg+2])) {
+ memprintf(errmsg, "'%s' expect a pattern with the value keyword", args[orig_arg]);
+ goto error;
+ }
+ vpat = args[cur_arg+2];
+ cur_arg += 2;
+ }
+ else if (strcmp(args[cur_arg], "comment") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ free(comment);
+ comment = strdup(args[cur_arg]);
+ if (!comment) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ else if (strcmp(args[cur_arg], "on-success") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ on_success_msg = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "on-error") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ on_error_msg = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "ok-status") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument", args[cur_arg]);
+ goto error;
+ }
+ if (strcasecmp(args[cur_arg+1], "L7OK") == 0)
+ ok_st = HCHK_STATUS_L7OKD;
+ else if (strcasecmp(args[cur_arg+1], "L7OKC") == 0)
+ ok_st = HCHK_STATUS_L7OKCD;
+ else if (strcasecmp(args[cur_arg+1], "L6OK") == 0)
+ ok_st = HCHK_STATUS_L6OK;
+ else if (strcasecmp(args[cur_arg+1], "L4OK") == 0)
+ ok_st = HCHK_STATUS_L4OK;
+ else {
+ memprintf(errmsg, "'%s' only supports 'L4OK', 'L6OK', 'L7OK' or 'L7OKC' status (got '%s').",
+ args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "error-status") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument", args[cur_arg]);
+ goto error;
+ }
+ if (strcasecmp(args[cur_arg+1], "L7RSP") == 0)
+ err_st = HCHK_STATUS_L7RSP;
+ else if (strcasecmp(args[cur_arg+1], "L7STS") == 0)
+ err_st = HCHK_STATUS_L7STS;
+ else if (strcasecmp(args[cur_arg+1], "L7OKC") == 0)
+ err_st = HCHK_STATUS_L7OKCD;
+ else if (strcasecmp(args[cur_arg+1], "L6RSP") == 0)
+ err_st = HCHK_STATUS_L6RSP;
+ else if (strcasecmp(args[cur_arg+1], "L4CON") == 0)
+ err_st = HCHK_STATUS_L4CON;
+ else {
+ memprintf(errmsg, "'%s' only supports 'L4CON', 'L6RSP', 'L7RSP' or 'L7STS' status (got '%s').",
+ args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "status-code") == 0) {
+ int idx = 0;
+
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects an expression as argument", args[cur_arg]);
+ goto error;
+ }
+
+ cur_arg++;
+ release_sample_expr(status_expr);
+ px->conf.args.ctx = ARGC_SRV;
+ status_expr = sample_parse_expr((char *[]){args[cur_arg], NULL}, &idx,
+ file, line, errmsg, &px->conf.args, NULL);
+ if (!status_expr) {
+ memprintf(errmsg, "error detected while parsing status-code expression : %s", *errmsg);
+ goto error;
+ }
+ if (!(status_expr->fetch->val & SMP_VAL_BE_CHK_RUL)) {
+ memprintf(errmsg, "error detected while parsing status-code expression : "
+ " fetch method '%s' extracts information from '%s', "
+ "none of which is available here.\n",
+ args[cur_arg], sample_src_names(status_expr->fetch->use));
+ goto error;
+ }
+ px->http_needed |= !!(status_expr->fetch->use & SMP_USE_HTTP_ANY);
+ }
+ else if (strcmp(args[cur_arg], "tout-status") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument", args[cur_arg]);
+ goto error;
+ }
+ if (strcasecmp(args[cur_arg+1], "L7TOUT") == 0)
+ tout_st = HCHK_STATUS_L7TOUT;
+ else if (strcasecmp(args[cur_arg+1], "L6TOUT") == 0)
+ tout_st = HCHK_STATUS_L6TOUT;
+ else if (strcasecmp(args[cur_arg+1], "L4TOUT") == 0)
+ tout_st = HCHK_STATUS_L4TOUT;
+ else {
+ memprintf(errmsg, "'%s' only supports 'L4TOUT', 'L6TOUT' or 'L7TOUT' status (got '%s').",
+ args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+ cur_arg++;
+ }
+ else {
+ if (proto == TCPCHK_RULES_HTTP_CHK) {
+ bad_http_kw:
+ memprintf(errmsg, "'only supports min-recv, [!]string', '[!]rstring', '[!]string-lf', '[!]status', "
+ "'[!]rstatus', [!]hdr, [!]fhdr or comment but got '%s' as argument.", args[cur_arg]);
+ }
+ else {
+ bad_tcp_kw:
+ memprintf(errmsg, "'only supports min-recv, '[!]binary', '[!]string', '[!]rstring', '[!]string-lf'"
+ "'[!]rbinary', '[!]binary-lf' or comment but got '%s' as argument.", args[cur_arg]);
+ }
+ goto error;
+ }
+ next:
+ cur_arg++;
+ }
+
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->action = TCPCHK_ACT_EXPECT;
+ LIST_INIT(&chk->expect.onerror_fmt);
+ LIST_INIT(&chk->expect.onsuccess_fmt);
+ chk->comment = comment; comment = NULL;
+ chk->expect.type = type;
+ chk->expect.min_recv = min_recv;
+ chk->expect.flags = flags | (inverse ? TCPCHK_EXPT_FL_INV : 0);
+ chk->expect.ok_status = ok_st;
+ chk->expect.err_status = err_st;
+ chk->expect.tout_status = tout_st;
+ chk->expect.status_expr = status_expr; status_expr = NULL;
+
+ if (on_success_msg) {
+ px->conf.args.ctx = ARGC_SRV;
+ if (!parse_logformat_string(on_success_msg, px, &chk->expect.onsuccess_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", on_success_msg, *errmsg);
+ goto error;
+ }
+ }
+ if (on_error_msg) {
+ px->conf.args.ctx = ARGC_SRV;
+ if (!parse_logformat_string(on_error_msg, px, &chk->expect.onerror_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", on_error_msg, *errmsg);
+ goto error;
+ }
+ }
+
+ switch (chk->expect.type) {
+ case TCPCHK_EXPECT_HTTP_STATUS: {
+ const char *p = pattern;
+ unsigned int c1,c2;
+
+ chk->expect.codes.codes = NULL;
+ chk->expect.codes.num = 0;
+ while (1) {
+ c1 = c2 = read_uint(&p, pattern + strlen(pattern));
+ if (*p == '-') {
+ p++;
+ c2 = read_uint(&p, pattern + strlen(pattern));
+ }
+ if (c1 > c2) {
+ memprintf(errmsg, "invalid range of status codes '%s'", pattern);
+ goto error;
+ }
+
+ chk->expect.codes.num++;
+ chk->expect.codes.codes = my_realloc2(chk->expect.codes.codes,
+ chk->expect.codes.num * sizeof(*chk->expect.codes.codes));
+ if (!chk->expect.codes.codes) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->expect.codes.codes[chk->expect.codes.num-1][0] = c1;
+ chk->expect.codes.codes[chk->expect.codes.num-1][1] = c2;
+
+ if (*p == '\0')
+ break;
+ if (*p != ',') {
+ memprintf(errmsg, "invalid character '%c' in the list of status codes", *p);
+ goto error;
+ }
+ p++;
+ }
+ break;
+ }
+ case TCPCHK_EXPECT_STRING:
+ case TCPCHK_EXPECT_HTTP_BODY:
+ chk->expect.data = ist(strdup(pattern));
+ if (!isttest(chk->expect.data)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ break;
+ case TCPCHK_EXPECT_BINARY: {
+ int len = chk->expect.data.len;
+
+ if (parse_binary(pattern, &chk->expect.data.ptr, &len, errmsg) == 0) {
+ memprintf(errmsg, "invalid binary string (%s)", *errmsg);
+ goto error;
+ }
+ chk->expect.data.len = len;
+ break;
+ }
+ case TCPCHK_EXPECT_STRING_REGEX:
+ case TCPCHK_EXPECT_BINARY_REGEX:
+ case TCPCHK_EXPECT_HTTP_STATUS_REGEX:
+ case TCPCHK_EXPECT_HTTP_BODY_REGEX:
+ chk->expect.regex = regex_comp(pattern, 1, 0, errmsg);
+ if (!chk->expect.regex)
+ goto error;
+ break;
+
+ case TCPCHK_EXPECT_STRING_LF:
+ case TCPCHK_EXPECT_BINARY_LF:
+ case TCPCHK_EXPECT_HTTP_BODY_LF:
+ LIST_INIT(&chk->expect.fmt);
+ px->conf.args.ctx = ARGC_SRV;
+ if (!parse_logformat_string(pattern, px, &chk->expect.fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", pattern, *errmsg);
+ goto error;
+ }
+ break;
+
+ case TCPCHK_EXPECT_HTTP_HEADER:
+ if (!npat) {
+ memprintf(errmsg, "unexpected error, undefined header name pattern");
+ goto error;
+ }
+ if (chk->expect.flags & TCPCHK_EXPT_FL_HTTP_HNAME_REG) {
+ chk->expect.hdr.name_re = regex_comp(npat, 0, 0, errmsg);
+ if (!chk->expect.hdr.name_re)
+ goto error;
+ }
+ else if (chk->expect.flags & TCPCHK_EXPT_FL_HTTP_HNAME_FMT) {
+ px->conf.args.ctx = ARGC_SRV;
+ LIST_INIT(&chk->expect.hdr.name_fmt);
+ if (!parse_logformat_string(npat, px, &chk->expect.hdr.name_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", npat, *errmsg);
+ goto error;
+ }
+ }
+ else {
+ chk->expect.hdr.name = ist(strdup(npat));
+ if (!isttest(chk->expect.hdr.name)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+
+ if (chk->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_NONE) {
+ chk->expect.hdr.value = IST_NULL;
+ break;
+ }
+
+ if (!vpat) {
+ memprintf(errmsg, "unexpected error, undefined header value pattern");
+ goto error;
+ }
+ else if (chk->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_REG) {
+ chk->expect.hdr.value_re = regex_comp(vpat, 1, 0, errmsg);
+ if (!chk->expect.hdr.value_re)
+ goto error;
+ }
+ else if (chk->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_FMT) {
+ px->conf.args.ctx = ARGC_SRV;
+ LIST_INIT(&chk->expect.hdr.value_fmt);
+ if (!parse_logformat_string(vpat, px, &chk->expect.hdr.value_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", npat, *errmsg);
+ goto error;
+ }
+ }
+ else {
+ chk->expect.hdr.value = ist(strdup(vpat));
+ if (!isttest(chk->expect.hdr.value)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+
+ break;
+ case TCPCHK_EXPECT_CUSTOM:
+ chk->expect.custom = NULL; /* Must be defined by the caller ! */
+ break;
+ case TCPCHK_EXPECT_UNDEF:
+ memprintf(errmsg, "pattern not found");
+ goto error;
+ }
+
+ /* All tcp-check expect points back to the first inverse expect rule in
+ * a chain of one or more expect rule, potentially itself.
+ */
+ chk->expect.head = chk;
+ list_for_each_entry_rev(prev_check, rules, list) {
+ if (prev_check->action == TCPCHK_ACT_EXPECT) {
+ if (prev_check->expect.flags & TCPCHK_EXPT_FL_INV)
+ chk->expect.head = prev_check;
+ continue;
+ }
+ if (prev_check->action != TCPCHK_ACT_COMMENT && prev_check->action != TCPCHK_ACT_ACTION_KW)
+ break;
+ }
+ return chk;
+
+ error:
+ free_tcpcheck(chk, 0);
+ free(comment);
+ release_sample_expr(status_expr);
+ return NULL;
+}
+
+/* Overwrites fields of the old http send rule with those of the new one. When
+ * replaced, old values are freed and replaced by the new ones. New values are
+ * not copied but transferred. At the end <new> should be empty and can be
+ * safely released. This function never fails.
+ */
+void tcpcheck_overwrite_send_http_rule(struct tcpcheck_rule *old, struct tcpcheck_rule *new)
+{
+ struct logformat_node *lf, *lfb;
+ struct tcpcheck_http_hdr *hdr, *bhdr;
+
+
+ if (new->send.http.meth.str.area) {
+ free(old->send.http.meth.str.area);
+ old->send.http.meth.meth = new->send.http.meth.meth;
+ old->send.http.meth.str.area = new->send.http.meth.str.area;
+ old->send.http.meth.str.data = new->send.http.meth.str.data;
+ new->send.http.meth.str = BUF_NULL;
+ }
+
+ if (!(new->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT) && isttest(new->send.http.uri)) {
+ if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT))
+ istfree(&old->send.http.uri);
+ else
+ free_tcpcheck_fmt(&old->send.http.uri_fmt);
+ old->send.http.flags &= ~TCPCHK_SND_HTTP_FL_URI_FMT;
+ old->send.http.uri = new->send.http.uri;
+ new->send.http.uri = IST_NULL;
+ }
+ else if ((new->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT) && !LIST_ISEMPTY(&new->send.http.uri_fmt)) {
+ if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT))
+ istfree(&old->send.http.uri);
+ else
+ free_tcpcheck_fmt(&old->send.http.uri_fmt);
+ old->send.http.flags |= TCPCHK_SND_HTTP_FL_URI_FMT;
+ LIST_INIT(&old->send.http.uri_fmt);
+ list_for_each_entry_safe(lf, lfb, &new->send.http.uri_fmt, list) {
+ LIST_DELETE(&lf->list);
+ LIST_APPEND(&old->send.http.uri_fmt, &lf->list);
+ }
+ }
+
+ if (isttest(new->send.http.vsn)) {
+ istfree(&old->send.http.vsn);
+ old->send.http.vsn = new->send.http.vsn;
+ new->send.http.vsn = IST_NULL;
+ }
+
+ if (!LIST_ISEMPTY(&new->send.http.hdrs)) {
+ free_tcpcheck_http_hdrs(&old->send.http.hdrs);
+ list_for_each_entry_safe(hdr, bhdr, &new->send.http.hdrs, list) {
+ LIST_DELETE(&hdr->list);
+ LIST_APPEND(&old->send.http.hdrs, &hdr->list);
+ }
+ }
+
+ if (!(new->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) && isttest(new->send.http.body)) {
+ if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT))
+ istfree(&old->send.http.body);
+ else
+ free_tcpcheck_fmt(&old->send.http.body_fmt);
+ old->send.http.flags &= ~TCPCHK_SND_HTTP_FL_BODY_FMT;
+ old->send.http.body = new->send.http.body;
+ new->send.http.body = IST_NULL;
+ }
+ else if ((new->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) && !LIST_ISEMPTY(&new->send.http.body_fmt)) {
+ if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT))
+ istfree(&old->send.http.body);
+ else
+ free_tcpcheck_fmt(&old->send.http.body_fmt);
+ old->send.http.flags |= TCPCHK_SND_HTTP_FL_BODY_FMT;
+ LIST_INIT(&old->send.http.body_fmt);
+ list_for_each_entry_safe(lf, lfb, &new->send.http.body_fmt, list) {
+ LIST_DELETE(&lf->list);
+ LIST_APPEND(&old->send.http.body_fmt, &lf->list);
+ }
+ }
+}
+
+/* Internal function used to add an http-check rule in a list during the config
+ * parsing step. Depending on its type, and the previously inserted rules, a
+ * specific action may be performed or an error may be reported. This functions
+ * returns 1 on success and 0 on error and <errmsg> is filled with the error
+ * message.
+ */
+int tcpcheck_add_http_rule(struct tcpcheck_rule *chk, struct tcpcheck_rules *rules, char **errmsg)
+{
+ struct tcpcheck_rule *r;
+
+ /* the implicit send rule coming from an "option httpchk" line must be
+ * merged with the first explici http-check send rule, if
+ * any. Depending on the declaration order some tests are required.
+ *
+ * Some tests are also required for other kinds of http-check rules to be
+ * sure the ruleset remains valid.
+ */
+
+ if (chk->action == TCPCHK_ACT_SEND && (chk->send.http.flags & TCPCHK_SND_HTTP_FROM_OPT)) {
+ /* Tries to add an implicit http-check send rule from an "option httpchk" line.
+ * First, the first rule is retrieved, skipping the first CONNECT, if any, and
+ * following tests are performed :
+ *
+ * 1- If there is no such rule or if it is not a send rule, the implicit send
+ * rule is pushed in front of the ruleset
+ *
+ * 2- If it is another implicit send rule, it is replaced with the new one.
+ *
+ * 3- Otherwise, it means it is an explicit send rule. In this case we merge
+ * both, overwriting the old send rule (the explicit one) with info of the
+ * new send rule (the implicit one).
+ */
+ r = get_first_tcpcheck_rule(rules);
+ if (r && r->action == TCPCHK_ACT_CONNECT)
+ r = get_next_tcpcheck_rule(rules, r);
+ if (!r || r->action != TCPCHK_ACT_SEND)
+ LIST_INSERT(rules->list, &chk->list);
+ else if (r->send.http.flags & TCPCHK_SND_HTTP_FROM_OPT) {
+ LIST_DELETE(&r->list);
+ free_tcpcheck(r, 0);
+ LIST_INSERT(rules->list, &chk->list);
+ }
+ else {
+ tcpcheck_overwrite_send_http_rule(r, chk);
+ free_tcpcheck(chk, 0);
+ }
+ }
+ else {
+ /* Tries to add an explicit http-check rule. First of all we check the typefo the
+ * last inserted rule to be sure it is valid. Then for send rule, we try to merge it
+ * with an existing implicit send rule, if any. At the end, if there is no error,
+ * the rule is appended to the list.
+ */
+
+ r = get_last_tcpcheck_rule(rules);
+ if (!r || (r->action == TCPCHK_ACT_SEND && (r->send.http.flags & TCPCHK_SND_HTTP_FROM_OPT)))
+ /* no error */;
+ else if (r->action != TCPCHK_ACT_CONNECT && chk->action == TCPCHK_ACT_SEND) {
+ memprintf(errmsg, "unable to add http-check send rule at step %d (missing connect rule).",
+ chk->index+1);
+ return 0;
+ }
+ else if (r->action != TCPCHK_ACT_SEND && r->action != TCPCHK_ACT_EXPECT && chk->action == TCPCHK_ACT_EXPECT) {
+ memprintf(errmsg, "unable to add http-check expect rule at step %d (missing send rule).",
+ chk->index+1);
+ return 0;
+ }
+ else if (r->action != TCPCHK_ACT_EXPECT && chk->action == TCPCHK_ACT_CONNECT) {
+ memprintf(errmsg, "unable to add http-check connect rule at step %d (missing expect rule).",
+ chk->index+1);
+ return 0;
+ }
+
+ if (chk->action == TCPCHK_ACT_SEND) {
+ r = get_first_tcpcheck_rule(rules);
+ if (r && r->action == TCPCHK_ACT_SEND && (r->send.http.flags & TCPCHK_SND_HTTP_FROM_OPT)) {
+ tcpcheck_overwrite_send_http_rule(r, chk);
+ free_tcpcheck(chk, 0);
+ LIST_DELETE(&r->list);
+ r->send.http.flags &= ~TCPCHK_SND_HTTP_FROM_OPT;
+ chk = r;
+ }
+ }
+ LIST_APPEND(rules->list, &chk->list);
+ }
+ return 1;
+}
+
+/* Check tcp-check health-check configuration for the proxy <px>. */
+static int check_proxy_tcpcheck(struct proxy *px)
+{
+ struct tcpcheck_rule *chk, *back;
+ char *comment = NULL, *errmsg = NULL;
+ enum tcpcheck_rule_type prev_action = TCPCHK_ACT_COMMENT;
+ int ret = ERR_NONE;
+
+ if (!(px->cap & PR_CAP_BE) || (px->options2 & PR_O2_CHK_ANY) != PR_O2_TCPCHK_CHK) {
+ deinit_proxy_tcpcheck(px);
+ goto out;
+ }
+
+ ha_free(&px->check_command);
+ ha_free(&px->check_path);
+
+ if (!px->tcpcheck_rules.list) {
+ ha_alert("proxy '%s' : tcp-check configured but no ruleset defined.\n", px->id);
+ ret |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* HTTP ruleset only : */
+ if ((px->tcpcheck_rules.flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK) {
+ struct tcpcheck_rule *next;
+
+ /* move remaining implicit send rule from "option httpchk" line to the right place.
+ * If such rule exists, it must be the first one. In this case, the rule is moved
+ * after the first connect rule, if any. Otherwise, nothing is done.
+ */
+ chk = get_first_tcpcheck_rule(&px->tcpcheck_rules);
+ if (chk && chk->action == TCPCHK_ACT_SEND && (chk->send.http.flags & TCPCHK_SND_HTTP_FROM_OPT)) {
+ next = get_next_tcpcheck_rule(&px->tcpcheck_rules, chk);
+ if (next && next->action == TCPCHK_ACT_CONNECT) {
+ LIST_DELETE(&chk->list);
+ LIST_INSERT(&next->list, &chk->list);
+ chk->index = next->index + 1;
+ }
+ }
+
+ /* add implicit expect rule if the last one is a send. It is inherited from previous
+ * versions where the http expect rule was optional. Now it is possible to chained
+ * send/expect rules but the last expect may still be implicit.
+ */
+ chk = get_last_tcpcheck_rule(&px->tcpcheck_rules);
+ if (chk && chk->action == TCPCHK_ACT_SEND) {
+ next = parse_tcpcheck_expect((char *[]){"http-check", "expect", "status", "200-399", ""},
+ 1, px, px->tcpcheck_rules.list, TCPCHK_RULES_HTTP_CHK,
+ px->conf.file, px->conf.line, &errmsg);
+ if (!next) {
+ ha_alert("proxy '%s': unable to add implicit http-check expect rule "
+ "(%s).\n", px->id, errmsg);
+ free(errmsg);
+ ret |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ LIST_APPEND(px->tcpcheck_rules.list, &next->list);
+ next->index = chk->index + 1;
+ }
+ }
+
+ /* For all ruleset: */
+
+ /* If there is no connect rule preceding all send / expect rules, an
+ * implicit one is inserted before all others.
+ */
+ chk = get_first_tcpcheck_rule(&px->tcpcheck_rules);
+ if (!chk || chk->action != TCPCHK_ACT_CONNECT) {
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ ha_alert("proxy '%s': unable to add implicit tcp-check connect rule "
+ "(out of memory).\n", px->id);
+ ret |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ chk->action = TCPCHK_ACT_CONNECT;
+ chk->connect.options = (TCPCHK_OPT_DEFAULT_CONNECT|TCPCHK_OPT_IMPLICIT);
+ LIST_INSERT(px->tcpcheck_rules.list, &chk->list);
+ }
+
+ /* Remove all comment rules. To do so, when a such rule is found, the
+ * comment is assigned to the following rule(s).
+ */
+ list_for_each_entry_safe(chk, back, px->tcpcheck_rules.list, list) {
+ struct tcpcheck_rule *next;
+
+ if (chk->action != prev_action && prev_action != TCPCHK_ACT_COMMENT)
+ ha_free(&comment);
+
+ prev_action = chk->action;
+ switch (chk->action) {
+ case TCPCHK_ACT_COMMENT:
+ free(comment);
+ comment = chk->comment;
+ LIST_DELETE(&chk->list);
+ free(chk);
+ break;
+ case TCPCHK_ACT_CONNECT:
+ if (!chk->comment && comment)
+ chk->comment = strdup(comment);
+ next = get_next_tcpcheck_rule(&px->tcpcheck_rules, chk);
+ if (next && next->action == TCPCHK_ACT_SEND)
+ chk->connect.options |= TCPCHK_OPT_HAS_DATA;
+ /* fall through */
+ case TCPCHK_ACT_ACTION_KW:
+ ha_free(&comment);
+ break;
+ case TCPCHK_ACT_SEND:
+ case TCPCHK_ACT_EXPECT:
+ if (!chk->comment && comment)
+ chk->comment = strdup(comment);
+ break;
+ }
+ }
+ ha_free(&comment);
+
+ out:
+ return ret;
+}
+
+void deinit_proxy_tcpcheck(struct proxy *px)
+{
+ free_tcpcheck_vars(&px->tcpcheck_rules.preset_vars);
+ px->tcpcheck_rules.flags = 0;
+ px->tcpcheck_rules.list = NULL;
+}
+
+static void deinit_tcpchecks()
+{
+ struct tcpcheck_ruleset *rs;
+ struct tcpcheck_rule *r, *rb;
+ struct ebpt_node *node, *next;
+
+ node = ebpt_first(&shared_tcpchecks);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ free(node->key);
+ rs = container_of(node, typeof(*rs), node);
+ list_for_each_entry_safe(r, rb, &rs->rules, list) {
+ LIST_DELETE(&r->list);
+ free_tcpcheck(r, 0);
+ }
+ free(rs);
+ node = next;
+ }
+}
+
+int add_tcpcheck_expect_str(struct tcpcheck_rules *rules, const char *str)
+{
+ struct tcpcheck_rule *tcpcheck, *prev_check;
+ struct tcpcheck_expect *expect;
+
+ if ((tcpcheck = pool_zalloc(pool_head_tcpcheck_rule)) == NULL)
+ return 0;
+ tcpcheck->action = TCPCHK_ACT_EXPECT;
+
+ expect = &tcpcheck->expect;
+ expect->type = TCPCHK_EXPECT_STRING;
+ LIST_INIT(&expect->onerror_fmt);
+ LIST_INIT(&expect->onsuccess_fmt);
+ expect->ok_status = HCHK_STATUS_L7OKD;
+ expect->err_status = HCHK_STATUS_L7RSP;
+ expect->tout_status = HCHK_STATUS_L7TOUT;
+ expect->data = ist(strdup(str));
+ if (!isttest(expect->data)) {
+ pool_free(pool_head_tcpcheck_rule, tcpcheck);
+ return 0;
+ }
+
+ /* All tcp-check expect points back to the first inverse expect rule
+ * in a chain of one or more expect rule, potentially itself.
+ */
+ tcpcheck->expect.head = tcpcheck;
+ list_for_each_entry_rev(prev_check, rules->list, list) {
+ if (prev_check->action == TCPCHK_ACT_EXPECT) {
+ if (prev_check->expect.flags & TCPCHK_EXPT_FL_INV)
+ tcpcheck->expect.head = prev_check;
+ continue;
+ }
+ if (prev_check->action != TCPCHK_ACT_COMMENT && prev_check->action != TCPCHK_ACT_ACTION_KW)
+ break;
+ }
+ LIST_APPEND(rules->list, &tcpcheck->list);
+ return 1;
+}
+
+int add_tcpcheck_send_strs(struct tcpcheck_rules *rules, const char * const *strs)
+{
+ struct tcpcheck_rule *tcpcheck;
+ struct tcpcheck_send *send;
+ const char *in;
+ char *dst;
+ int i;
+
+ if ((tcpcheck = pool_zalloc(pool_head_tcpcheck_rule)) == NULL)
+ return 0;
+ tcpcheck->action = TCPCHK_ACT_SEND;
+
+ send = &tcpcheck->send;
+ send->type = TCPCHK_SEND_STRING;
+
+ for (i = 0; strs[i]; i++)
+ send->data.len += strlen(strs[i]);
+
+ send->data.ptr = malloc(istlen(send->data) + 1);
+ if (!isttest(send->data)) {
+ pool_free(pool_head_tcpcheck_rule, tcpcheck);
+ return 0;
+ }
+
+ dst = istptr(send->data);
+ for (i = 0; strs[i]; i++)
+ for (in = strs[i]; (*dst = *in++); dst++);
+ *dst = 0;
+
+ LIST_APPEND(rules->list, &tcpcheck->list);
+ return 1;
+}
+
+/* Parses the "tcp-check" proxy keyword */
+static int proxy_parse_tcpcheck(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **errmsg)
+{
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rule *chk = NULL;
+ int index, cur_arg, ret = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[0], NULL))
+ ret = 1;
+
+ /* Deduce the ruleset name from the proxy info */
+ chunk_printf(&trash, "*tcp-check-%s_%s-%d",
+ ((curpx == defpx) ? "defaults" : curpx->id),
+ curpx->conf.file, curpx->conf.line);
+
+ rs = find_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ rs = create_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ memprintf(errmsg, "out of memory.\n");
+ goto error;
+ }
+ }
+
+ index = 0;
+ if (!LIST_ISEMPTY(&rs->rules)) {
+ chk = LIST_PREV(&rs->rules, typeof(chk), list);
+ index = chk->index + 1;
+ chk = NULL;
+ }
+
+ cur_arg = 1;
+ if (strcmp(args[cur_arg], "connect") == 0)
+ chk = parse_tcpcheck_connect(args, cur_arg, curpx, &rs->rules, file, line, errmsg);
+ else if (strcmp(args[cur_arg], "send") == 0 || strcmp(args[cur_arg], "send-binary") == 0 ||
+ strcmp(args[cur_arg], "send-lf") == 0 || strcmp(args[cur_arg], "send-binary-lf") == 0)
+ chk = parse_tcpcheck_send(args, cur_arg, curpx, &rs->rules, file, line, errmsg);
+ else if (strcmp(args[cur_arg], "expect") == 0)
+ chk = parse_tcpcheck_expect(args, cur_arg, curpx, &rs->rules, 0, file, line, errmsg);
+ else if (strcmp(args[cur_arg], "comment") == 0)
+ chk = parse_tcpcheck_comment(args, cur_arg, curpx, &rs->rules, file, line, errmsg);
+ else {
+ struct action_kw *kw = action_kw_tcp_check_lookup(args[cur_arg]);
+
+ if (!kw) {
+ action_kw_tcp_check_build_list(&trash);
+ memprintf(errmsg, "'%s' only supports 'comment', 'connect', 'send', 'send-binary', 'expect'"
+ "%s%s. but got '%s'",
+ args[0], (*trash.area ? ", " : ""), trash.area, args[1]);
+ goto error;
+ }
+ chk = parse_tcpcheck_action(args, cur_arg, curpx, &rs->rules, kw, file, line, errmsg);
+ }
+
+ if (!chk) {
+ memprintf(errmsg, "'%s %s' : %s.", args[0], args[1], *errmsg);
+ goto error;
+ }
+ ret = (ret || (*errmsg != NULL)); /* Handle warning */
+
+ /* No error: add the tcp-check rule in the list */
+ chk->index = index;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ if ((curpx->options2 & PR_O2_CHK_ANY) == PR_O2_TCPCHK_CHK &&
+ (curpx->tcpcheck_rules.flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_TCP_CHK) {
+ /* Use this ruleset if the proxy already has tcp-check enabled */
+ curpx->tcpcheck_rules.list = &rs->rules;
+ curpx->tcpcheck_rules.flags &= ~TCPCHK_RULES_UNUSED_TCP_RS;
+ }
+ else {
+ /* mark this ruleset as unused for now */
+ curpx->tcpcheck_rules.flags |= TCPCHK_RULES_UNUSED_TCP_RS;
+ }
+
+ return ret;
+
+ error:
+ free_tcpcheck(chk, 0);
+ free_tcpcheck_ruleset(rs);
+ return -1;
+}
+
+/* Parses the "http-check" proxy keyword */
+static int proxy_parse_httpcheck(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **errmsg)
+{
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rule *chk = NULL;
+ int index, cur_arg, ret = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[0], NULL))
+ ret = 1;
+
+ cur_arg = 1;
+ if (strcmp(args[cur_arg], "disable-on-404") == 0) {
+ /* enable a graceful server shutdown on an HTTP 404 response */
+ curpx->options |= PR_O_DISABLE404;
+ if (too_many_args(1, args, errmsg, NULL))
+ goto error;
+ goto out;
+ }
+ else if (strcmp(args[cur_arg], "send-state") == 0) {
+ /* enable emission of the apparent state of a server in HTTP checks */
+ curpx->options2 |= PR_O2_CHK_SNDST;
+ if (too_many_args(1, args, errmsg, NULL))
+ goto error;
+ goto out;
+ }
+
+ /* Deduce the ruleset name from the proxy info */
+ chunk_printf(&trash, "*http-check-%s_%s-%d",
+ ((curpx == defpx) ? "defaults" : curpx->id),
+ curpx->conf.file, curpx->conf.line);
+
+ rs = find_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ rs = create_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ memprintf(errmsg, "out of memory.\n");
+ goto error;
+ }
+ }
+
+ index = 0;
+ if (!LIST_ISEMPTY(&rs->rules)) {
+ chk = LIST_PREV(&rs->rules, typeof(chk), list);
+ if (chk->action != TCPCHK_ACT_SEND || !(chk->send.http.flags & TCPCHK_SND_HTTP_FROM_OPT))
+ index = chk->index + 1;
+ chk = NULL;
+ }
+
+ if (strcmp(args[cur_arg], "connect") == 0)
+ chk = parse_tcpcheck_connect(args, cur_arg, curpx, &rs->rules, file, line, errmsg);
+ else if (strcmp(args[cur_arg], "send") == 0)
+ chk = parse_tcpcheck_send_http(args, cur_arg, curpx, &rs->rules, file, line, errmsg);
+ else if (strcmp(args[cur_arg], "expect") == 0)
+ chk = parse_tcpcheck_expect(args, cur_arg, curpx, &rs->rules, TCPCHK_RULES_HTTP_CHK,
+ file, line, errmsg);
+ else if (strcmp(args[cur_arg], "comment") == 0)
+ chk = parse_tcpcheck_comment(args, cur_arg, curpx, &rs->rules, file, line, errmsg);
+ else {
+ struct action_kw *kw = action_kw_tcp_check_lookup(args[cur_arg]);
+
+ if (!kw) {
+ action_kw_tcp_check_build_list(&trash);
+ memprintf(errmsg, "'%s' only supports 'disable-on-404', 'send-state', 'comment', 'connect',"
+ " 'send', 'expect'%s%s. but got '%s'",
+ args[0], (*trash.area ? ", " : ""), trash.area, args[1]);
+ goto error;
+ }
+ chk = parse_tcpcheck_action(args, cur_arg, curpx, &rs->rules, kw, file, line, errmsg);
+ }
+
+ if (!chk) {
+ memprintf(errmsg, "'%s %s' : %s.", args[0], args[1], *errmsg);
+ goto error;
+ }
+ ret = (*errmsg != NULL); /* Handle warning */
+
+ chk->index = index;
+ if ((curpx->options2 & PR_O2_CHK_ANY) == PR_O2_TCPCHK_CHK &&
+ (curpx->tcpcheck_rules.flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK) {
+ /* Use this ruleset if the proxy already has http-check enabled */
+ curpx->tcpcheck_rules.list = &rs->rules;
+ curpx->tcpcheck_rules.flags &= ~TCPCHK_RULES_UNUSED_HTTP_RS;
+ if (!tcpcheck_add_http_rule(chk, &curpx->tcpcheck_rules, errmsg)) {
+ memprintf(errmsg, "'%s %s' : %s.", args[0], args[1], *errmsg);
+ curpx->tcpcheck_rules.list = NULL;
+ goto error;
+ }
+ }
+ else {
+ /* mark this ruleset as unused for now */
+ curpx->tcpcheck_rules.flags |= TCPCHK_RULES_UNUSED_HTTP_RS;
+ LIST_APPEND(&rs->rules, &chk->list);
+ }
+
+ out:
+ return ret;
+
+ error:
+ free_tcpcheck(chk, 0);
+ free_tcpcheck_ruleset(rs);
+ return -1;
+}
+
+/* Parses the "option redis-check" proxy keyword */
+int proxy_parse_redis_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ static char *redis_req = "*1\r\n$4\r\nPING\r\n";
+ static char *redis_res = "+PONG\r\n";
+
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ char *errmsg = NULL;
+ int err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(0, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags = 0;
+
+ rs = find_tcpcheck_ruleset("*redis-check");
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset("*redis-check");
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send", redis_req, ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 0;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "string", redis_res,
+ "error-status", "L7STS",
+ "on-error", "%[res.payload(0,0),cut_crlf]",
+ "on-success", "Redis server is ok",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_REDIS_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 1;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_REDIS_CHK;
+
+ out:
+ free(errmsg);
+ return err_code;
+
+ error:
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+
+/* Parses the "option ssl-hello-chk" proxy keyword */
+int proxy_parse_ssl_hello_chk_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ /* This is the SSLv3 CLIENT HELLO packet used in conjunction with the
+ * ssl-hello-chk option to ensure that the remote server speaks SSL.
+ *
+ * Check RFC 2246 (TLSv1.0) sections A.3 and A.4 for details.
+ */
+ static char sslv3_client_hello[] = {
+ "16" /* ContentType : 0x16 = Handshake */
+ "0300" /* ProtocolVersion : 0x0300 = SSLv3 */
+ "0079" /* ContentLength : 0x79 bytes after this one */
+ "01" /* HanshakeType : 0x01 = CLIENT HELLO */
+ "000075" /* HandshakeLength : 0x75 bytes after this one */
+ "0300" /* Hello Version : 0x0300 = v3 */
+ "%[date(),htonl,hex]" /* Unix GMT Time (s) : filled with <now> (@0x0B) */
+ "%[str(HAPROXYSSLCHK\nHAPROXYSSLCHK\n),hex]" /* Random : must be exactly 28 bytes */
+ "00" /* Session ID length : empty (no session ID) */
+ "004E" /* Cipher Suite Length : 78 bytes after this one */
+ "0001" "0002" "0003" "0004" /* 39 most common ciphers : */
+ "0005" "0006" "0007" "0008" /* 0x01...0x1B, 0x2F...0x3A */
+ "0009" "000A" "000B" "000C" /* This covers RSA/DH, */
+ "000D" "000E" "000F" "0010" /* various bit lengths, */
+ "0011" "0012" "0013" "0014" /* SHA1/MD5, DES/3DES/AES... */
+ "0015" "0016" "0017" "0018"
+ "0019" "001A" "001B" "002F"
+ "0030" "0031" "0032" "0033"
+ "0034" "0035" "0036" "0037"
+ "0038" "0039" "003A"
+ "01" /* Compression Length : 0x01 = 1 byte for types */
+ "00" /* Compression Type : 0x00 = NULL compression */
+ };
+
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ char *errmsg = NULL;
+ int err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(0, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags = 0;
+
+ rs = find_tcpcheck_ruleset("*ssl-hello-check");
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset("*ssl-hello-check");
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send-binary-lf", sslv3_client_hello, ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 0;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "rbinary", "^1[56]",
+ "min-recv", "5", "ok-status", "L6OK",
+ "error-status", "L6RSP", "tout-status", "L6TOUT",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_SSL3_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 1;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_SSL3_CHK;
+
+ out:
+ free(errmsg);
+ return err_code;
+
+ error:
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parses the "option smtpchk" proxy keyword */
+int proxy_parse_smtpchk_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ static char *smtp_req = "%[var(check.smtp_cmd)]\r\n";
+
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ struct tcpcheck_var *var = NULL;
+ char *cmd = NULL, *errmsg = NULL;
+ int err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(2, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags = 0;
+
+ cur_arg += 2;
+ if (*args[cur_arg] && *args[cur_arg+1] &&
+ (strcmp(args[cur_arg], "EHLO") == 0 || strcmp(args[cur_arg], "HELO") == 0)) {
+ /* <EHLO|HELO> + space (1) + <host> + null byte (1) */
+ cmd = calloc(strlen(args[cur_arg]) + 1 + strlen(args[cur_arg+1]) + 1, sizeof(*cmd));
+ if (cmd)
+ sprintf(cmd, "%s %s", args[cur_arg], args[cur_arg+1]);
+ }
+ else {
+ /* this just hits the default for now, but you could potentially expand it to allow for other stuff
+ though, it's unlikely you'd want to send anything other than an EHLO or HELO */
+ cmd = strdup("HELO localhost");
+ }
+
+ var = create_tcpcheck_var(ist("check.smtp_cmd"));
+ if (cmd == NULL || var == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ var->data.type = SMP_T_STR;
+ var->data.u.str.area = cmd;
+ var->data.u.str.data = strlen(cmd);
+ LIST_INIT(&var->list);
+ LIST_APPEND(&rules->preset_vars, &var->list);
+ cmd = NULL;
+ var = NULL;
+
+ rs = find_tcpcheck_ruleset("*smtp-check");
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset("*smtp-check");
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ chk = parse_tcpcheck_connect((char *[]){"tcp-check", "connect", "default", "linger", ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 0;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "rstring", "^[0-9]{3}[ \r]",
+ "min-recv", "4",
+ "error-status", "L7RSP",
+ "on-error", "%[res.payload(0,0),cut_crlf]",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_SMTP_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 1;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "rstring", "^2[0-9]{2}[ \r]",
+ "min-recv", "4",
+ "error-status", "L7STS",
+ "on-error", "%[res.payload(4,0),ltrim(' '),cut_crlf]",
+ "status-code", "res.payload(0,3)",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_SMTP_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 2;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send-lf", smtp_req, ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 3;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "rstring", "^(2[0-9]{2}-[^\r]*\r\n)*2[0-9]{2}[ \r]",
+ "error-status", "L7STS",
+ "on-error", "%[res.payload(4,0),ltrim(' '),cut_crlf]",
+ "on-success", "%[res.payload(4,0),ltrim(' '),cut_crlf]",
+ "status-code", "res.payload(0,3)",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_SMTP_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 4;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ /* Send an SMTP QUIT to ensure clean disconnect (issue 1812), and expect a 2xx response code */
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send", "QUIT\r\n", ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 5;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "rstring", "^2[0-9]{2}[- \r]",
+ "min-recv", "4",
+ "error-status", "L7STS",
+ "on-error", "%[res.payload(4,0),ltrim(' '),cut_crlf]",
+ "on-success", "%[res.payload(4,0),ltrim(' '),cut_crlf]",
+ "status-code", "res.payload(0,3)",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_SMTP_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 6;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_SMTP_CHK;
+
+ out:
+ free(errmsg);
+ return err_code;
+
+ error:
+ free(cmd);
+ free(var);
+ free_tcpcheck_vars(&rules->preset_vars);
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parses the "option pgsql-check" proxy keyword */
+int proxy_parse_pgsql_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ static char pgsql_req[] = {
+ "%[var(check.plen),htonl,hex]" /* The packet length*/
+ "00030000" /* the version 3.0 */
+ "7573657200" /* "user" key */
+ "%[var(check.username),hex]00" /* the username */
+ "00"
+ };
+
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ struct tcpcheck_var *var = NULL;
+ char *user = NULL, *errmsg = NULL;
+ size_t packetlen = 0;
+ int err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(2, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags = 0;
+
+ cur_arg += 2;
+ if (!*args[cur_arg] || !*args[cur_arg+1]) {
+ ha_alert("parsing [%s:%d] : '%s %s' expects 'user <username>' as argument.\n",
+ file, line, args[0], args[1]);
+ goto error;
+ }
+ if (strcmp(args[cur_arg], "user") == 0) {
+ packetlen = 15 + strlen(args[cur_arg+1]);
+ user = strdup(args[cur_arg+1]);
+
+ var = create_tcpcheck_var(ist("check.username"));
+ if (user == NULL || var == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ var->data.type = SMP_T_STR;
+ var->data.u.str.area = user;
+ var->data.u.str.data = strlen(user);
+ LIST_INIT(&var->list);
+ LIST_APPEND(&rules->preset_vars, &var->list);
+ user = NULL;
+ var = NULL;
+
+ var = create_tcpcheck_var(ist("check.plen"));
+ if (var == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ var->data.type = SMP_T_SINT;
+ var->data.u.sint = packetlen;
+ LIST_INIT(&var->list);
+ LIST_APPEND(&rules->preset_vars, &var->list);
+ var = NULL;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s %s' only supports optional values: 'user'.\n",
+ file, line, args[0], args[1]);
+ goto error;
+ }
+
+ rs = find_tcpcheck_ruleset("*pgsql-check");
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset("*pgsql-check");
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ chk = parse_tcpcheck_connect((char *[]){"tcp-check", "connect", "default", "linger", ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 0;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send-binary-lf", pgsql_req, ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 1;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "!rstring", "^E",
+ "min-recv", "5",
+ "error-status", "L7RSP",
+ "on-error", "%[res.payload(6,0)]",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_PGSQL_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 2;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "rbinary", "^52000000[A-Z0-9]{2}000000(00|02|03|04|05|06|07|09|0A)",
+ "min-recv", "9",
+ "error-status", "L7STS",
+ "on-success", "PostgreSQL server is ok",
+ "on-error", "PostgreSQL unknown error",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_PGSQL_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 3;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_PGSQL_CHK;
+
+ out:
+ free(errmsg);
+ return err_code;
+
+ error:
+ free(user);
+ free(var);
+ free_tcpcheck_vars(&rules->preset_vars);
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+
+/* Parses the "option mysql-check" proxy keyword */
+int proxy_parse_mysql_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ /* This is an example of a MySQL >=4.0 client Authentication packet kindly provided by Cyril Bonte.
+ * const char mysql40_client_auth_pkt[] = {
+ * "\x0e\x00\x00" // packet length
+ * "\x01" // packet number
+ * "\x00\x00" // client capabilities
+ * "\x00\x00\x01" // max packet
+ * "haproxy\x00" // username (null terminated string)
+ * "\x00" // filler (always 0x00)
+ * "\x01\x00\x00" // packet length
+ * "\x00" // packet number
+ * "\x01" // COM_QUIT command
+ * };
+ */
+ static char mysql40_rsname[] = "*mysql40-check";
+ static char mysql40_req[] = {
+ "%[var(check.header),hex]" /* 3 bytes for the packet length and 1 byte for the sequence ID */
+ "0080" /* client capabilities */
+ "000001" /* max packet */
+ "%[var(check.username),hex]00" /* the username */
+ "00" /* filler (always 0x00) */
+ "010000" /* packet length*/
+ "00" /* sequence ID */
+ "01" /* COM_QUIT command */
+ };
+
+ /* This is an example of a MySQL >=4.1 client Authentication packet provided by Nenad Merdanovic.
+ * const char mysql41_client_auth_pkt[] = {
+ * "\x0e\x00\x00\" // packet length
+ * "\x01" // packet number
+ * "\x00\x00\x00\x00" // client capabilities
+ * "\x00\x00\x00\x01" // max packet
+ * "\x21" // character set (UTF-8)
+ * char[23] // All zeroes
+ * "haproxy\x00" // username (null terminated string)
+ * "\x00" // filler (always 0x00)
+ * "\x01\x00\x00" // packet length
+ * "\x00" // packet number
+ * "\x01" // COM_QUIT command
+ * };
+ */
+ static char mysql41_rsname[] = "*mysql41-check";
+ static char mysql41_req[] = {
+ "%[var(check.header),hex]" /* 3 bytes for the packet length and 1 byte for the sequence ID */
+ "00820000" /* client capabilities */
+ "00800001" /* max packet */
+ "21" /* character set (UTF-8) */
+ "000000000000000000000000" /* 23 bytes, al zeroes */
+ "0000000000000000000000"
+ "%[var(check.username),hex]00" /* the username */
+ "00" /* filler (always 0x00) */
+ "010000" /* packet length*/
+ "00" /* sequence ID */
+ "01" /* COM_QUIT command */
+ };
+
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ struct tcpcheck_var *var = NULL;
+ char *mysql_rsname = "*mysql-check";
+ char *mysql_req = NULL, *hdr = NULL, *user = NULL, *errmsg = NULL;
+ int index = 0, err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(3, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags = 0;
+
+ cur_arg += 2;
+ if (*args[cur_arg]) {
+ int packetlen, userlen;
+
+ if (strcmp(args[cur_arg], "user") != 0) {
+ ha_alert("parsing [%s:%d] : '%s %s' only supports optional values: 'user' (got '%s').\n",
+ file, line, args[0], args[1], args[cur_arg]);
+ goto error;
+ }
+
+ if (*(args[cur_arg+1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s %s %s' expects <username> as argument.\n",
+ file, line, args[0], args[1], args[cur_arg]);
+ goto error;
+ }
+
+ hdr = calloc(4, sizeof(*hdr));
+ user = strdup(args[cur_arg+1]);
+ userlen = strlen(args[cur_arg+1]);
+
+ if (hdr == NULL || user == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ if (!*args[cur_arg+2] || strcmp(args[cur_arg+2], "post-41") == 0) {
+ packetlen = userlen + 7 + 27;
+ mysql_req = mysql41_req;
+ mysql_rsname = mysql41_rsname;
+ }
+ else if (strcmp(args[cur_arg+2], "pre-41") == 0) {
+ packetlen = userlen + 7;
+ mysql_req = mysql40_req;
+ mysql_rsname = mysql40_rsname;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : keyword '%s' only supports 'post-41' and 'pre-41' (got '%s').\n",
+ file, line, args[cur_arg], args[cur_arg+2]);
+ goto error;
+ }
+
+ hdr[0] = (unsigned char)(packetlen & 0xff);
+ hdr[1] = (unsigned char)((packetlen >> 8) & 0xff);
+ hdr[2] = (unsigned char)((packetlen >> 16) & 0xff);
+ hdr[3] = 1;
+
+ var = create_tcpcheck_var(ist("check.header"));
+ if (var == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ var->data.type = SMP_T_STR;
+ var->data.u.str.area = hdr;
+ var->data.u.str.data = 4;
+ LIST_INIT(&var->list);
+ LIST_APPEND(&rules->preset_vars, &var->list);
+ hdr = NULL;
+ var = NULL;
+
+ var = create_tcpcheck_var(ist("check.username"));
+ if (var == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ var->data.type = SMP_T_STR;
+ var->data.u.str.area = user;
+ var->data.u.str.data = strlen(user);
+ LIST_INIT(&var->list);
+ LIST_APPEND(&rules->preset_vars, &var->list);
+ user = NULL;
+ var = NULL;
+ }
+
+ rs = find_tcpcheck_ruleset(mysql_rsname);
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset(mysql_rsname);
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ chk = parse_tcpcheck_connect((char *[]){"tcp-check", "connect", "default", "linger", ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = index++;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ if (mysql_req) {
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send-binary-lf", mysql_req, ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = index++;
+ LIST_APPEND(&rs->rules, &chk->list);
+ }
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "custom", ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_MYSQL_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->expect.custom = tcpcheck_mysql_expect_iniths;
+ chk->index = index++;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ if (mysql_req) {
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "custom", ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_MYSQL_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->expect.custom = tcpcheck_mysql_expect_ok;
+ chk->index = index++;
+ LIST_APPEND(&rs->rules, &chk->list);
+ }
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_MYSQL_CHK;
+
+ out:
+ free(errmsg);
+ return err_code;
+
+ error:
+ free(hdr);
+ free(user);
+ free(var);
+ free_tcpcheck_vars(&rules->preset_vars);
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+int proxy_parse_ldap_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ static char *ldap_req = "300C020101600702010304008000";
+
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ char *errmsg = NULL;
+ int err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(0, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags = 0;
+
+ rs = find_tcpcheck_ruleset("*ldap-check");
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset("*ldap-check");
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send-binary", ldap_req, ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 0;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "rbinary", "^30",
+ "min-recv", "14",
+ "on-error", "Not LDAPv3 protocol",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_LDAP_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 1;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "custom", ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_LDAP_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->expect.custom = tcpcheck_ldap_expect_bindrsp;
+ chk->index = 2;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_LDAP_CHK;
+
+ out:
+ free(errmsg);
+ return err_code;
+
+ error:
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+int proxy_parse_spop_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ char *spop_req = NULL;
+ char *errmsg = NULL;
+ int spop_len = 0, err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(0, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags = 0;
+
+
+ rs = find_tcpcheck_ruleset("*spop-check");
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset("*spop-check");
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ if (spoe_prepare_healthcheck_request(&spop_req, &spop_len) == -1) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ chunk_reset(&trash);
+ dump_binary(&trash, spop_req, spop_len);
+ trash.area[trash.data] = '\0';
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send-binary", b_head(&trash), ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 0;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "custom", "min-recv", "4", ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_SPOP_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->expect.custom = tcpcheck_spop_expect_agenthello;
+ chk->index = 1;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_SPOP_CHK;
+
+ out:
+ free(spop_req);
+ free(errmsg);
+ return err_code;
+
+ error:
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+
+static struct tcpcheck_rule *proxy_parse_httpchk_req(char **args, int cur_arg, struct proxy *px, char **errmsg)
+{
+ struct tcpcheck_rule *chk = NULL;
+ struct tcpcheck_http_hdr *hdr = NULL;
+ char *meth = NULL, *uri = NULL, *vsn = NULL;
+ char *hdrs, *body;
+
+ hdrs = (*args[cur_arg+2] ? strstr(args[cur_arg+2], "\r\n") : NULL);
+ body = (*args[cur_arg+2] ? strstr(args[cur_arg+2], "\r\n\r\n") : NULL);
+ if (hdrs == body)
+ hdrs = NULL;
+ if (hdrs) {
+ *hdrs = '\0';
+ hdrs +=2;
+ }
+ if (body) {
+ *body = '\0';
+ body += 4;
+ }
+ if (hdrs || body) {
+ memprintf(errmsg, "hiding headers or body at the end of the version string is deprecated."
+ " Please, consider to use 'http-check send' directive instead.");
+ }
+
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->action = TCPCHK_ACT_SEND;
+ chk->send.type = TCPCHK_SEND_HTTP;
+ chk->send.http.flags |= TCPCHK_SND_HTTP_FROM_OPT;
+ chk->send.http.meth.meth = HTTP_METH_OPTIONS;
+ LIST_INIT(&chk->send.http.hdrs);
+
+ /* Copy the method, uri and version */
+ if (*args[cur_arg]) {
+ if (!*args[cur_arg+1])
+ uri = args[cur_arg];
+ else
+ meth = args[cur_arg];
+ }
+ if (*args[cur_arg+1])
+ uri = args[cur_arg+1];
+ if (*args[cur_arg+2])
+ vsn = args[cur_arg+2];
+
+ if (meth) {
+ chk->send.http.meth.meth = find_http_meth(meth, strlen(meth));
+ chk->send.http.meth.str.area = strdup(meth);
+ chk->send.http.meth.str.data = strlen(meth);
+ if (!chk->send.http.meth.str.area) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ if (uri) {
+ chk->send.http.uri = ist(strdup(uri));
+ if (!isttest(chk->send.http.uri)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ if (vsn) {
+ chk->send.http.vsn = ist(strdup(vsn));
+ if (!isttest(chk->send.http.vsn)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+
+ /* Copy the header */
+ if (hdrs) {
+ struct http_hdr tmp_hdrs[global.tune.max_http_hdr];
+ struct h1m h1m;
+ int i, ret;
+
+ /* Build and parse the request */
+ chunk_printf(&trash, "%s\r\n\r\n", hdrs);
+
+ h1m.flags = H1_MF_HDRS_ONLY;
+ ret = h1_headers_to_hdr_list(b_orig(&trash), b_tail(&trash),
+ tmp_hdrs, sizeof(tmp_hdrs)/sizeof(tmp_hdrs[0]),
+ &h1m, NULL);
+ if (ret <= 0) {
+ memprintf(errmsg, "unable to parse the request '%s'.", b_orig(&trash));
+ goto error;
+ }
+
+ for (i = 0; istlen(tmp_hdrs[i].n); i++) {
+ hdr = calloc(1, sizeof(*hdr));
+ if (!hdr) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ LIST_INIT(&hdr->value);
+ hdr->name = istdup(tmp_hdrs[i].n);
+ if (!isttest(hdr->name)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+
+ ist0(tmp_hdrs[i].v);
+ if (!parse_logformat_string(istptr(tmp_hdrs[i].v), px, &hdr->value, 0, SMP_VAL_BE_CHK_RUL, errmsg))
+ goto error;
+ LIST_APPEND(&chk->send.http.hdrs, &hdr->list);
+ }
+ }
+
+ /* Copy the body */
+ if (body) {
+ chk->send.http.body = ist(strdup(body));
+ if (!isttest(chk->send.http.body)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+
+ return chk;
+
+ error:
+ free_tcpcheck_http_hdr(hdr);
+ free_tcpcheck(chk, 0);
+ return NULL;
+}
+
+/* Parses the "option httpchck" proxy keyword */
+int proxy_parse_httpchk_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ char *errmsg = NULL;
+ int err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(3, 1, file, line, args, &err_code))
+ goto out;
+
+ chk = proxy_parse_httpchk_req(args, cur_arg+2, curpx, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : '%s %s' : %s.\n", file, line, args[0], args[1], errmsg);
+ goto error;
+ }
+ if (errmsg) {
+ ha_warning("parsing [%s:%d]: '%s %s' : %s\n", file, line, args[0], args[1], errmsg);
+ err_code |= ERR_WARN;
+ ha_free(&errmsg);
+ }
+
+ no_request:
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags |= TCPCHK_SND_HTTP_FROM_OPT;
+
+ /* Deduce the ruleset name from the proxy info */
+ chunk_printf(&trash, "*http-check-%s_%s-%d",
+ ((curpx == defpx) ? "defaults" : curpx->id),
+ curpx->conf.file, curpx->conf.line);
+
+ rs = find_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ rs = create_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ }
+
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_HTTP_CHK;
+ if (!tcpcheck_add_http_rule(chk, rules, &errmsg)) {
+ ha_alert("parsing [%s:%d] : '%s %s' : %s.\n", file, line, args[0], args[1], errmsg);
+ rules->list = NULL;
+ goto error;
+ }
+
+ out:
+ free(errmsg);
+ return err_code;
+
+ error:
+ free_tcpcheck_ruleset(rs);
+ free_tcpcheck(chk, 0);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parses the "option tcp-check" proxy keyword */
+int proxy_parse_tcp_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ int err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(0, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ if ((rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_TCP_CHK) {
+ /* If a tcp-check rulesset is already set, do nothing */
+ if (rules->list)
+ goto out;
+
+ /* If a tcp-check ruleset is waiting to be used for the current proxy,
+ * get it.
+ */
+ if (rules->flags & TCPCHK_RULES_UNUSED_TCP_RS)
+ goto curpx_ruleset;
+
+ /* Otherwise, try to get the tcp-check ruleset of the default proxy */
+ chunk_printf(&trash, "*tcp-check-defaults_%s-%d", defpx->conf.file, defpx->conf.line);
+ rs = find_tcpcheck_ruleset(b_orig(&trash));
+ if (rs)
+ goto ruleset_found;
+ }
+
+ curpx_ruleset:
+ /* Deduce the ruleset name from the proxy info */
+ chunk_printf(&trash, "*tcp-check-%s_%s-%d",
+ ((curpx == defpx) ? "defaults" : curpx->id),
+ curpx->conf.file, curpx->conf.line);
+
+ rs = find_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ rs = create_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ }
+
+ ruleset_found:
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_TCP_CHK;
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_LISTEN, "http-check", proxy_parse_httpcheck },
+ { CFG_LISTEN, "tcp-check", proxy_parse_tcpcheck },
+ { 0, NULL, NULL },
+}};
+
+REGISTER_POST_PROXY_CHECK(check_proxy_tcpcheck);
+REGISTER_PROXY_DEINIT(deinit_proxy_tcpcheck);
+REGISTER_POST_DEINIT(deinit_tcpchecks);
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
diff --git a/src/thread.c b/src/thread.c
new file mode 100644
index 0000000..369bf8a
--- /dev/null
+++ b/src/thread.c
@@ -0,0 +1,1371 @@
+/*
+ * functions about threads.
+ *
+ * Copyright (C) 2017 Christopher Fauet - cfaulet@haproxy.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <stdlib.h>
+
+#include <signal.h>
+#include <unistd.h>
+#ifdef _POSIX_PRIORITY_SCHEDULING
+#include <sched.h>
+#endif
+
+#ifdef USE_THREAD
+# include <pthread.h>
+#endif
+
+#ifdef USE_CPU_AFFINITY
+# include <sched.h>
+# if defined(__FreeBSD__) || defined(__DragonFly__)
+# include <sys/param.h>
+# ifdef __FreeBSD__
+# include <sys/cpuset.h>
+# endif
+# include <pthread_np.h>
+# endif
+# ifdef __APPLE__
+# include <mach/mach_types.h>
+# include <mach/thread_act.h>
+# include <mach/thread_policy.h>
+# endif
+# include <haproxy/cpuset.h>
+#endif
+
+#include <haproxy/cfgparse.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/log.h>
+#include <haproxy/thread.h>
+#include <haproxy/tools.h>
+
+struct tgroup_info ha_tgroup_info[MAX_TGROUPS] = { };
+THREAD_LOCAL const struct tgroup_info *tg = &ha_tgroup_info[0];
+
+struct thread_info ha_thread_info[MAX_THREADS] = { };
+THREAD_LOCAL const struct thread_info *ti = &ha_thread_info[0];
+
+struct thread_ctx ha_thread_ctx[MAX_THREADS] = { };
+THREAD_LOCAL struct thread_ctx *th_ctx = &ha_thread_ctx[0];
+
+#ifdef USE_THREAD
+
+volatile unsigned long threads_want_rdv_mask __read_mostly = 0;
+volatile unsigned long threads_harmless_mask = 0;
+volatile unsigned long threads_idle_mask = 0;
+volatile unsigned long threads_sync_mask = 0;
+volatile unsigned long all_threads_mask __read_mostly = 1; // nbthread 1 assumed by default
+THREAD_LOCAL unsigned int tgid = 1; // thread ID starts at 1
+THREAD_LOCAL unsigned int tid = 0;
+THREAD_LOCAL unsigned long tid_bit = (1UL << 0);
+int thread_cpus_enabled_at_boot = 1;
+static pthread_t ha_pthread[MAX_THREADS] = { };
+
+/* Marks the thread as harmless until the last thread using the rendez-vous
+ * point quits, excluding the current one. Thus an isolated thread may be safely
+ * marked as harmless. Given that we can wait for a long time, sched_yield() is
+ * used when available to offer the CPU resources to competing threads if
+ * needed.
+ */
+void thread_harmless_till_end()
+{
+ _HA_ATOMIC_OR(&threads_harmless_mask, tid_bit);
+ while (threads_want_rdv_mask & all_threads_mask & ~tid_bit) {
+ ha_thread_relax();
+ }
+}
+
+/* Isolates the current thread : request the ability to work while all other
+ * threads are harmless, as defined by thread_harmless_now() (i.e. they're not
+ * going to touch any visible memory area). Only returns once all of them are
+ * harmless, with the current thread's bit in threads_harmless_mask cleared.
+ * Needs to be completed using thread_release().
+ */
+void thread_isolate()
+{
+ unsigned long old;
+
+ _HA_ATOMIC_OR(&threads_harmless_mask, tid_bit);
+ __ha_barrier_atomic_store();
+ _HA_ATOMIC_OR(&threads_want_rdv_mask, tid_bit);
+
+ /* wait for all threads to become harmless */
+ old = threads_harmless_mask;
+ while (1) {
+ if (unlikely((old & all_threads_mask) != all_threads_mask))
+ old = threads_harmless_mask;
+ else if (_HA_ATOMIC_CAS(&threads_harmless_mask, &old, old & ~tid_bit))
+ break;
+
+ ha_thread_relax();
+ }
+ /* one thread gets released at a time here, with its harmess bit off.
+ * The loss of this bit makes the other one continue to spin while the
+ * thread is working alone.
+ */
+}
+
+/* Isolates the current thread : request the ability to work while all other
+ * threads are idle, as defined by thread_idle_now(). It only returns once
+ * all of them are both harmless and idle, with the current thread's bit in
+ * threads_harmless_mask and idle_mask cleared. Needs to be completed using
+ * thread_release(). By doing so the thread also engages in being safe against
+ * any actions that other threads might be about to start under the same
+ * conditions. This specifically targets destruction of any internal structure,
+ * which implies that the current thread may not hold references to any object.
+ *
+ * Note that a concurrent thread_isolate() will usually win against
+ * thread_isolate_full() as it doesn't consider the idle_mask, allowing it to
+ * get back to the poller or any other fully idle location, that will
+ * ultimately release this one.
+ */
+void thread_isolate_full()
+{
+ unsigned long old;
+
+ _HA_ATOMIC_OR(&threads_idle_mask, tid_bit);
+ _HA_ATOMIC_OR(&threads_harmless_mask, tid_bit);
+ __ha_barrier_atomic_store();
+ _HA_ATOMIC_OR(&threads_want_rdv_mask, tid_bit);
+
+ /* wait for all threads to become harmless */
+ old = threads_harmless_mask;
+ while (1) {
+ unsigned long idle = _HA_ATOMIC_LOAD(&threads_idle_mask);
+
+ if (unlikely((old & all_threads_mask) != all_threads_mask))
+ old = _HA_ATOMIC_LOAD(&threads_harmless_mask);
+ else if ((idle & all_threads_mask) == all_threads_mask &&
+ _HA_ATOMIC_CAS(&threads_harmless_mask, &old, old & ~tid_bit))
+ break;
+
+ ha_thread_relax();
+ }
+
+ /* we're not idle anymore at this point. Other threads waiting on this
+ * condition will need to wait until out next pass to the poller, or
+ * our next call to thread_isolate_full().
+ */
+ _HA_ATOMIC_AND(&threads_idle_mask, ~tid_bit);
+}
+
+/* Cancels the effect of thread_isolate() by releasing the current thread's bit
+ * in threads_want_rdv_mask. This immediately allows other threads to expect be
+ * executed, though they will first have to wait for this thread to become
+ * harmless again (possibly by reaching the poller again).
+ */
+void thread_release()
+{
+ _HA_ATOMIC_AND(&threads_want_rdv_mask, ~tid_bit);
+}
+
+/* Cancels the effect of thread_isolate() by releasing the current thread's bit
+ * in threads_want_rdv_mask and by marking this thread as harmless until the
+ * last worker finishes. The difference with thread_release() is that this one
+ * will not leave the function before others are notified to do the same, so it
+ * guarantees that the current thread will not pass through a subsequent call
+ * to thread_isolate() before others finish.
+ */
+void thread_sync_release()
+{
+ _HA_ATOMIC_OR(&threads_sync_mask, tid_bit);
+ __ha_barrier_atomic_store();
+ _HA_ATOMIC_AND(&threads_want_rdv_mask, ~tid_bit);
+
+ while (threads_want_rdv_mask & all_threads_mask) {
+ _HA_ATOMIC_OR(&threads_harmless_mask, tid_bit);
+ while (threads_want_rdv_mask & all_threads_mask)
+ ha_thread_relax();
+ HA_ATOMIC_AND(&threads_harmless_mask, ~tid_bit);
+ }
+
+ /* the current thread is not harmless anymore, thread_isolate()
+ * is forced to wait till all waiters finish.
+ */
+ _HA_ATOMIC_AND(&threads_sync_mask, ~tid_bit);
+ while (threads_sync_mask & all_threads_mask)
+ ha_thread_relax();
+}
+
+/* Sets up threads, signals and masks, and starts threads 2 and above.
+ * Does nothing when threads are disabled.
+ */
+void setup_extra_threads(void *(*handler)(void *))
+{
+ sigset_t blocked_sig, old_sig;
+ int i;
+
+ /* ensure the signals will be blocked in every thread */
+ sigfillset(&blocked_sig);
+ sigdelset(&blocked_sig, SIGPROF);
+ sigdelset(&blocked_sig, SIGBUS);
+ sigdelset(&blocked_sig, SIGFPE);
+ sigdelset(&blocked_sig, SIGILL);
+ sigdelset(&blocked_sig, SIGSEGV);
+ pthread_sigmask(SIG_SETMASK, &blocked_sig, &old_sig);
+
+ /* Create nbthread-1 thread. The first thread is the current process */
+ ha_pthread[0] = pthread_self();
+ for (i = 1; i < global.nbthread; i++)
+ pthread_create(&ha_pthread[i], NULL, handler, &ha_thread_info[i]);
+}
+
+/* waits for all threads to terminate. Does nothing when threads are
+ * disabled.
+ */
+void wait_for_threads_completion()
+{
+ int i;
+
+ /* Wait the end of other threads */
+ for (i = 1; i < global.nbthread; i++)
+ pthread_join(ha_pthread[i], NULL);
+
+#if defined(DEBUG_THREAD) || defined(DEBUG_FULL)
+ show_lock_stats();
+#endif
+}
+
+/* Tries to set the current thread's CPU affinity according to the cpu_map */
+void set_thread_cpu_affinity()
+{
+#if defined(USE_CPU_AFFINITY)
+ /* no affinity setting for the master process */
+ if (master)
+ return;
+
+ /* Now the CPU affinity for all threads */
+ if (ha_cpuset_count(&cpu_map.proc))
+ ha_cpuset_and(&cpu_map.thread[tid], &cpu_map.proc);
+
+ if (ha_cpuset_count(&cpu_map.thread[tid])) {/* only do this if the thread has a THREAD map */
+# if defined(__APPLE__)
+ /* Note: this API is limited to the first 32/64 CPUs */
+ unsigned long set = cpu_map.thread[tid].cpuset;
+ int j;
+
+ while ((j = ffsl(set)) > 0) {
+ thread_affinity_policy_data_t cpu_set = { j - 1 };
+ thread_port_t mthread;
+
+ mthread = pthread_mach_thread_np(ha_pthread[tid]);
+ thread_policy_set(mthread, THREAD_AFFINITY_POLICY, (thread_policy_t)&cpu_set, 1);
+ set &= ~(1UL << (j - 1));
+ }
+# else
+ struct hap_cpuset *set = &cpu_map.thread[tid];
+
+ pthread_setaffinity_np(ha_pthread[tid], sizeof(set->cpuset), &set->cpuset);
+# endif
+ }
+#endif /* USE_CPU_AFFINITY */
+}
+
+/* Retrieves the opaque pthread_t of thread <thr> cast to an unsigned long long
+ * since POSIX took great care of not specifying its representation, making it
+ * hard to export for post-mortem analysis. For this reason we copy it into a
+ * union and will use the smallest scalar type at least as large as its size,
+ * which will keep endianness and alignment for all regular sizes. As a last
+ * resort we end up with a long long ligned to the first bytes in memory, which
+ * will be endian-dependent if pthread_t is larger than a long long (not seen
+ * yet).
+ */
+unsigned long long ha_get_pthread_id(unsigned int thr)
+{
+ union {
+ pthread_t t;
+ unsigned long long ll;
+ unsigned int i;
+ unsigned short s;
+ unsigned char c;
+ } u = { 0 };
+
+ u.t = ha_pthread[thr];
+
+ if (sizeof(u.t) <= sizeof(u.c))
+ return u.c;
+ else if (sizeof(u.t) <= sizeof(u.s))
+ return u.s;
+ else if (sizeof(u.t) <= sizeof(u.i))
+ return u.i;
+ return u.ll;
+}
+
+/* send signal <sig> to thread <thr> */
+void ha_tkill(unsigned int thr, int sig)
+{
+ pthread_kill(ha_pthread[thr], sig);
+}
+
+/* send signal <sig> to all threads. The calling thread is signaled last in
+ * order to allow all threads to synchronize in the handler.
+ */
+void ha_tkillall(int sig)
+{
+ unsigned int thr;
+
+ for (thr = 0; thr < global.nbthread; thr++) {
+ if (!(all_threads_mask & (1UL << thr)))
+ continue;
+ if (thr == tid)
+ continue;
+ pthread_kill(ha_pthread[thr], sig);
+ }
+ raise(sig);
+}
+
+void ha_thread_relax(void)
+{
+#ifdef _POSIX_PRIORITY_SCHEDULING
+ sched_yield();
+#else
+ pl_cpu_relax();
+#endif
+}
+
+/* these calls are used as callbacks at init time when debugging is on */
+void ha_spin_init(HA_SPINLOCK_T *l)
+{
+ HA_SPIN_INIT(l);
+}
+
+/* these calls are used as callbacks at init time when debugging is on */
+void ha_rwlock_init(HA_RWLOCK_T *l)
+{
+ HA_RWLOCK_INIT(l);
+}
+
+/* returns the number of CPUs the current process is enabled to run on,
+ * regardless of any MAX_THREADS limitation.
+ */
+static int thread_cpus_enabled()
+{
+ int ret = 1;
+
+#ifdef USE_CPU_AFFINITY
+#if defined(__linux__) && defined(CPU_COUNT)
+ cpu_set_t mask;
+
+ if (sched_getaffinity(0, sizeof(mask), &mask) == 0)
+ ret = CPU_COUNT(&mask);
+#elif defined(__FreeBSD__) && defined(USE_CPU_AFFINITY)
+ cpuset_t cpuset;
+ if (cpuset_getaffinity(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1,
+ sizeof(cpuset), &cpuset) == 0)
+ ret = CPU_COUNT(&cpuset);
+#elif defined(__APPLE__)
+ ret = (int)sysconf(_SC_NPROCESSORS_ONLN);
+#endif
+#endif
+ ret = MAX(ret, 1);
+ return ret;
+}
+
+/* Returns 1 if the cpu set is currently restricted for the process else 0.
+ * Currently only implemented for the Linux platform.
+ */
+int thread_cpu_mask_forced()
+{
+#if defined(__linux__)
+ const int cpus_avail = sysconf(_SC_NPROCESSORS_ONLN);
+ return cpus_avail != thread_cpus_enabled();
+#else
+ return 0;
+#endif
+}
+
+/* Below come the lock-debugging functions */
+
+#if defined(DEBUG_THREAD) || defined(DEBUG_FULL)
+
+struct lock_stat lock_stats[LOCK_LABELS];
+
+/* this is only used below */
+static const char *lock_label(enum lock_label label)
+{
+ switch (label) {
+ case TASK_RQ_LOCK: return "TASK_RQ";
+ case TASK_WQ_LOCK: return "TASK_WQ";
+ case LISTENER_LOCK: return "LISTENER";
+ case PROXY_LOCK: return "PROXY";
+ case SERVER_LOCK: return "SERVER";
+ case LBPRM_LOCK: return "LBPRM";
+ case SIGNALS_LOCK: return "SIGNALS";
+ case STK_TABLE_LOCK: return "STK_TABLE";
+ case STK_SESS_LOCK: return "STK_SESS";
+ case APPLETS_LOCK: return "APPLETS";
+ case PEER_LOCK: return "PEER";
+ case SHCTX_LOCK: return "SHCTX";
+ case SSL_LOCK: return "SSL";
+ case SSL_GEN_CERTS_LOCK: return "SSL_GEN_CERTS";
+ case PATREF_LOCK: return "PATREF";
+ case PATEXP_LOCK: return "PATEXP";
+ case VARS_LOCK: return "VARS";
+ case COMP_POOL_LOCK: return "COMP_POOL";
+ case LUA_LOCK: return "LUA";
+ case NOTIF_LOCK: return "NOTIF";
+ case SPOE_APPLET_LOCK: return "SPOE_APPLET";
+ case DNS_LOCK: return "DNS";
+ case PID_LIST_LOCK: return "PID_LIST";
+ case EMAIL_ALERTS_LOCK: return "EMAIL_ALERTS";
+ case PIPES_LOCK: return "PIPES";
+ case TLSKEYS_REF_LOCK: return "TLSKEYS_REF";
+ case AUTH_LOCK: return "AUTH";
+ case LOGSRV_LOCK: return "LOGSRV";
+ case DICT_LOCK: return "DICT";
+ case PROTO_LOCK: return "PROTO";
+ case QUEUE_LOCK: return "QUEUE";
+ case CKCH_LOCK: return "CKCH";
+ case SNI_LOCK: return "SNI";
+ case SSL_SERVER_LOCK: return "SSL_SERVER";
+ case SFT_LOCK: return "SFT";
+ case IDLE_CONNS_LOCK: return "IDLE_CONNS";
+ case QUIC_LOCK: return "QUIC";
+ case OTHER_LOCK: return "OTHER";
+ case DEBUG1_LOCK: return "DEBUG1";
+ case DEBUG2_LOCK: return "DEBUG2";
+ case DEBUG3_LOCK: return "DEBUG3";
+ case DEBUG4_LOCK: return "DEBUG4";
+ case DEBUG5_LOCK: return "DEBUG5";
+ case LOCK_LABELS: break; /* keep compiler happy */
+ };
+ /* only way to come here is consecutive to an internal bug */
+ abort();
+}
+
+void show_lock_stats()
+{
+ int lbl;
+
+ for (lbl = 0; lbl < LOCK_LABELS; lbl++) {
+ if (!lock_stats[lbl].num_write_locked &&
+ !lock_stats[lbl].num_seek_locked &&
+ !lock_stats[lbl].num_read_locked) {
+ fprintf(stderr,
+ "Stats about Lock %s: not used\n",
+ lock_label(lbl));
+ continue;
+ }
+
+ fprintf(stderr,
+ "Stats about Lock %s: \n",
+ lock_label(lbl));
+
+ if (lock_stats[lbl].num_write_locked)
+ fprintf(stderr,
+ "\t # write lock : %llu\n"
+ "\t # write unlock: %llu (%lld)\n"
+ "\t # wait time for write : %.3f msec\n"
+ "\t # wait time for write/lock: %.3f nsec\n",
+ (ullong)lock_stats[lbl].num_write_locked,
+ (ullong)lock_stats[lbl].num_write_unlocked,
+ (llong)(lock_stats[lbl].num_write_unlocked - lock_stats[lbl].num_write_locked),
+ (double)lock_stats[lbl].nsec_wait_for_write / 1000000.0,
+ lock_stats[lbl].num_write_locked ? ((double)lock_stats[lbl].nsec_wait_for_write / (double)lock_stats[lbl].num_write_locked) : 0);
+
+ if (lock_stats[lbl].num_seek_locked)
+ fprintf(stderr,
+ "\t # seek lock : %llu\n"
+ "\t # seek unlock : %llu (%lld)\n"
+ "\t # wait time for seek : %.3f msec\n"
+ "\t # wait time for seek/lock : %.3f nsec\n",
+ (ullong)lock_stats[lbl].num_seek_locked,
+ (ullong)lock_stats[lbl].num_seek_unlocked,
+ (llong)(lock_stats[lbl].num_seek_unlocked - lock_stats[lbl].num_seek_locked),
+ (double)lock_stats[lbl].nsec_wait_for_seek / 1000000.0,
+ lock_stats[lbl].num_seek_locked ? ((double)lock_stats[lbl].nsec_wait_for_seek / (double)lock_stats[lbl].num_seek_locked) : 0);
+
+ if (lock_stats[lbl].num_read_locked)
+ fprintf(stderr,
+ "\t # read lock : %llu\n"
+ "\t # read unlock : %llu (%lld)\n"
+ "\t # wait time for read : %.3f msec\n"
+ "\t # wait time for read/lock : %.3f nsec\n",
+ (ullong)lock_stats[lbl].num_read_locked,
+ (ullong)lock_stats[lbl].num_read_unlocked,
+ (llong)(lock_stats[lbl].num_read_unlocked - lock_stats[lbl].num_read_locked),
+ (double)lock_stats[lbl].nsec_wait_for_read / 1000000.0,
+ lock_stats[lbl].num_read_locked ? ((double)lock_stats[lbl].nsec_wait_for_read / (double)lock_stats[lbl].num_read_locked) : 0);
+ }
+}
+
+void __ha_rwlock_init(struct ha_rwlock *l)
+{
+ memset(l, 0, sizeof(struct ha_rwlock));
+ __RWLOCK_INIT(&l->lock);
+}
+
+void __ha_rwlock_destroy(struct ha_rwlock *l)
+{
+ __RWLOCK_DESTROY(&l->lock);
+ memset(l, 0, sizeof(struct ha_rwlock));
+}
+
+
+void __ha_rwlock_wrlock(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ uint64_t start_time;
+
+ if ((l->info.cur_readers | l->info.cur_seeker | l->info.cur_writer) & tid_bit)
+ abort();
+
+ HA_ATOMIC_OR(&l->info.wait_writers, tid_bit);
+
+ start_time = now_mono_time();
+ __RWLOCK_WRLOCK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_write, (now_mono_time() - start_time));
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_write_locked);
+
+ l->info.cur_writer = tid_bit;
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&l->info.wait_writers, ~tid_bit);
+}
+
+int __ha_rwlock_trywrlock(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ uint64_t start_time;
+ int r;
+
+ if ((l->info.cur_readers | l->info.cur_seeker | l->info.cur_writer) & tid_bit)
+ abort();
+
+ /* We set waiting writer because trywrlock could wait for readers to quit */
+ HA_ATOMIC_OR(&l->info.wait_writers, tid_bit);
+
+ start_time = now_mono_time();
+ r = __RWLOCK_TRYWRLOCK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_write, (now_mono_time() - start_time));
+ if (unlikely(r)) {
+ HA_ATOMIC_AND(&l->info.wait_writers, ~tid_bit);
+ return r;
+ }
+ HA_ATOMIC_INC(&lock_stats[lbl].num_write_locked);
+
+ l->info.cur_writer = tid_bit;
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&l->info.wait_writers, ~tid_bit);
+
+ return 0;
+}
+
+void __ha_rwlock_wrunlock(enum lock_label lbl,struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ if (unlikely(!(l->info.cur_writer & tid_bit))) {
+ /* the thread is not owning the lock for write */
+ abort();
+ }
+
+ l->info.cur_writer = 0;
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ __RWLOCK_WRUNLOCK(&l->lock);
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_write_unlocked);
+}
+
+void __ha_rwlock_rdlock(enum lock_label lbl,struct ha_rwlock *l)
+{
+ uint64_t start_time;
+
+ if ((l->info.cur_readers | l->info.cur_seeker | l->info.cur_writer) & tid_bit)
+ abort();
+
+ HA_ATOMIC_OR(&l->info.wait_readers, tid_bit);
+
+ start_time = now_mono_time();
+ __RWLOCK_RDLOCK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_read, (now_mono_time() - start_time));
+ HA_ATOMIC_INC(&lock_stats[lbl].num_read_locked);
+
+ HA_ATOMIC_OR(&l->info.cur_readers, tid_bit);
+
+ HA_ATOMIC_AND(&l->info.wait_readers, ~tid_bit);
+}
+
+int __ha_rwlock_tryrdlock(enum lock_label lbl,struct ha_rwlock *l)
+{
+ int r;
+
+ if ((l->info.cur_readers | l->info.cur_seeker | l->info.cur_writer) & tid_bit)
+ abort();
+
+ /* try read should never wait */
+ r = __RWLOCK_TRYRDLOCK(&l->lock);
+ if (unlikely(r))
+ return r;
+ HA_ATOMIC_INC(&lock_stats[lbl].num_read_locked);
+
+ HA_ATOMIC_OR(&l->info.cur_readers, tid_bit);
+
+ return 0;
+}
+
+void __ha_rwlock_rdunlock(enum lock_label lbl,struct ha_rwlock *l)
+{
+ if (unlikely(!(l->info.cur_readers & tid_bit))) {
+ /* the thread is not owning the lock for read */
+ abort();
+ }
+
+ HA_ATOMIC_AND(&l->info.cur_readers, ~tid_bit);
+
+ __RWLOCK_RDUNLOCK(&l->lock);
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_read_unlocked);
+}
+
+void __ha_rwlock_wrtord(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ uint64_t start_time;
+
+ if ((l->info.cur_readers | l->info.cur_seeker) & tid_bit)
+ abort();
+
+ if (!(l->info.cur_writer & tid_bit))
+ abort();
+
+ HA_ATOMIC_OR(&l->info.wait_readers, tid_bit);
+
+ start_time = now_mono_time();
+ __RWLOCK_WRTORD(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_read, (now_mono_time() - start_time));
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_read_locked);
+
+ HA_ATOMIC_OR(&l->info.cur_readers, tid_bit);
+ HA_ATOMIC_AND(&l->info.cur_writer, ~tid_bit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&l->info.wait_readers, ~tid_bit);
+}
+
+void __ha_rwlock_wrtosk(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ uint64_t start_time;
+
+ if ((l->info.cur_readers | l->info.cur_seeker) & tid_bit)
+ abort();
+
+ if (!(l->info.cur_writer & tid_bit))
+ abort();
+
+ HA_ATOMIC_OR(&l->info.wait_seekers, tid_bit);
+
+ start_time = now_mono_time();
+ __RWLOCK_WRTOSK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_seek, (now_mono_time() - start_time));
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_seek_locked);
+
+ HA_ATOMIC_OR(&l->info.cur_seeker, tid_bit);
+ HA_ATOMIC_AND(&l->info.cur_writer, ~tid_bit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&l->info.wait_seekers, ~tid_bit);
+}
+
+void __ha_rwlock_sklock(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ uint64_t start_time;
+
+ if ((l->info.cur_readers | l->info.cur_seeker | l->info.cur_writer) & tid_bit)
+ abort();
+
+ HA_ATOMIC_OR(&l->info.wait_seekers, tid_bit);
+
+ start_time = now_mono_time();
+ __RWLOCK_SKLOCK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_seek, (now_mono_time() - start_time));
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_seek_locked);
+
+ HA_ATOMIC_OR(&l->info.cur_seeker, tid_bit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&l->info.wait_seekers, ~tid_bit);
+}
+
+void __ha_rwlock_sktowr(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ uint64_t start_time;
+
+ if ((l->info.cur_readers | l->info.cur_writer) & tid_bit)
+ abort();
+
+ if (!(l->info.cur_seeker & tid_bit))
+ abort();
+
+ HA_ATOMIC_OR(&l->info.wait_writers, tid_bit);
+
+ start_time = now_mono_time();
+ __RWLOCK_SKTOWR(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_write, (now_mono_time() - start_time));
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_write_locked);
+
+ HA_ATOMIC_OR(&l->info.cur_writer, tid_bit);
+ HA_ATOMIC_AND(&l->info.cur_seeker, ~tid_bit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&l->info.wait_writers, ~tid_bit);
+}
+
+void __ha_rwlock_sktord(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ uint64_t start_time;
+
+ if ((l->info.cur_readers | l->info.cur_writer) & tid_bit)
+ abort();
+
+ if (!(l->info.cur_seeker & tid_bit))
+ abort();
+
+ HA_ATOMIC_OR(&l->info.wait_readers, tid_bit);
+
+ start_time = now_mono_time();
+ __RWLOCK_SKTORD(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_read, (now_mono_time() - start_time));
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_read_locked);
+
+ HA_ATOMIC_OR(&l->info.cur_readers, tid_bit);
+ HA_ATOMIC_AND(&l->info.cur_seeker, ~tid_bit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&l->info.wait_readers, ~tid_bit);
+}
+
+void __ha_rwlock_skunlock(enum lock_label lbl,struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ if (!(l->info.cur_seeker & tid_bit))
+ abort();
+
+ HA_ATOMIC_AND(&l->info.cur_seeker, ~tid_bit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ __RWLOCK_SKUNLOCK(&l->lock);
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_seek_unlocked);
+}
+
+int __ha_rwlock_trysklock(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ uint64_t start_time;
+ int r;
+
+ if ((l->info.cur_readers | l->info.cur_seeker | l->info.cur_writer) & tid_bit)
+ abort();
+
+ HA_ATOMIC_OR(&l->info.wait_seekers, tid_bit);
+
+ start_time = now_mono_time();
+ r = __RWLOCK_TRYSKLOCK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_seek, (now_mono_time() - start_time));
+
+ if (likely(!r)) {
+ /* got the lock ! */
+ HA_ATOMIC_INC(&lock_stats[lbl].num_seek_locked);
+ HA_ATOMIC_OR(&l->info.cur_seeker, tid_bit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+ }
+
+ HA_ATOMIC_AND(&l->info.wait_seekers, ~tid_bit);
+ return r;
+}
+
+int __ha_rwlock_tryrdtosk(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ uint64_t start_time;
+ int r;
+
+ if ((l->info.cur_writer | l->info.cur_seeker) & tid_bit)
+ abort();
+
+ if (!(l->info.cur_readers & tid_bit))
+ abort();
+
+ HA_ATOMIC_OR(&l->info.wait_seekers, tid_bit);
+
+ start_time = now_mono_time();
+ r = __RWLOCK_TRYRDTOSK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_seek, (now_mono_time() - start_time));
+
+ if (likely(!r)) {
+ /* got the lock ! */
+ HA_ATOMIC_INC(&lock_stats[lbl].num_seek_locked);
+ HA_ATOMIC_OR(&l->info.cur_seeker, tid_bit);
+ HA_ATOMIC_AND(&l->info.cur_readers, ~tid_bit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+ }
+
+ HA_ATOMIC_AND(&l->info.wait_seekers, ~tid_bit);
+ return r;
+}
+
+void __spin_init(struct ha_spinlock *l)
+{
+ memset(l, 0, sizeof(struct ha_spinlock));
+ __SPIN_INIT(&l->lock);
+}
+
+void __spin_destroy(struct ha_spinlock *l)
+{
+ __SPIN_DESTROY(&l->lock);
+ memset(l, 0, sizeof(struct ha_spinlock));
+}
+
+void __spin_lock(enum lock_label lbl, struct ha_spinlock *l,
+ const char *func, const char *file, int line)
+{
+ uint64_t start_time;
+
+ if (unlikely(l->info.owner & tid_bit)) {
+ /* the thread is already owning the lock */
+ abort();
+ }
+
+ HA_ATOMIC_OR(&l->info.waiters, tid_bit);
+
+ start_time = now_mono_time();
+ __SPIN_LOCK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_write, (now_mono_time() - start_time));
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_write_locked);
+
+
+ l->info.owner = tid_bit;
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&l->info.waiters, ~tid_bit);
+}
+
+int __spin_trylock(enum lock_label lbl, struct ha_spinlock *l,
+ const char *func, const char *file, int line)
+{
+ int r;
+
+ if (unlikely(l->info.owner & tid_bit)) {
+ /* the thread is already owning the lock */
+ abort();
+ }
+
+ /* try read should never wait */
+ r = __SPIN_TRYLOCK(&l->lock);
+ if (unlikely(r))
+ return r;
+ HA_ATOMIC_INC(&lock_stats[lbl].num_write_locked);
+
+ l->info.owner = tid_bit;
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ return 0;
+}
+
+void __spin_unlock(enum lock_label lbl, struct ha_spinlock *l,
+ const char *func, const char *file, int line)
+{
+ if (unlikely(!(l->info.owner & tid_bit))) {
+ /* the thread is not owning the lock */
+ abort();
+ }
+
+ l->info.owner = 0;
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ __SPIN_UNLOCK(&l->lock);
+ HA_ATOMIC_INC(&lock_stats[lbl].num_write_unlocked);
+}
+
+#endif // defined(DEBUG_THREAD) || defined(DEBUG_FULL)
+
+/* Depending on the platform and how libpthread was built, pthread_exit() may
+ * involve some code in libgcc_s that would be loaded on exit for the first
+ * time, causing aborts if the process is chrooted. It's harmless bit very
+ * dirty. There isn't much we can do to make sure libgcc_s is loaded only if
+ * needed, so what we do here is that during early boot we create a dummy
+ * thread that immediately exits. This will lead to libgcc_s being loaded
+ * during boot on the platforms where it's required.
+ */
+static void *dummy_thread_function(void *data)
+{
+ pthread_exit(NULL);
+ return NULL;
+}
+
+static inline void preload_libgcc_s(void)
+{
+ pthread_t dummy_thread;
+ pthread_create(&dummy_thread, NULL, dummy_thread_function, NULL);
+ pthread_join(dummy_thread, NULL);
+}
+
+static void __thread_init(void)
+{
+ char *ptr = NULL;
+
+ if (MAX_THREADS < 1 || MAX_THREADS > LONGBITS) {
+ ha_alert("MAX_THREADS value must be between 1 and %d inclusive; "
+ "HAProxy was built with value %d, please fix it and rebuild.\n",
+ LONGBITS, MAX_THREADS);
+ exit(1);
+ }
+
+ preload_libgcc_s();
+
+ thread_cpus_enabled_at_boot = thread_cpus_enabled();
+ thread_cpus_enabled_at_boot = MIN(thread_cpus_enabled_at_boot, MAX_THREADS);
+
+ memprintf(&ptr, "Built with multi-threading support (MAX_THREADS=%d, default=%d).",
+ MAX_THREADS, thread_cpus_enabled_at_boot);
+ hap_register_build_opts(ptr, 1);
+
+#if defined(DEBUG_THREAD) || defined(DEBUG_FULL)
+ memset(lock_stats, 0, sizeof(lock_stats));
+#endif
+}
+INITCALL0(STG_PREPARE, __thread_init);
+
+#else
+
+/* send signal <sig> to thread <thr> (send to process in fact) */
+void ha_tkill(unsigned int thr, int sig)
+{
+ raise(sig);
+}
+
+/* send signal <sig> to all threads (send to process in fact) */
+void ha_tkillall(int sig)
+{
+ raise(sig);
+}
+
+void ha_thread_relax(void)
+{
+#ifdef _POSIX_PRIORITY_SCHEDULING
+ sched_yield();
+#endif
+}
+
+REGISTER_BUILD_OPTS("Built without multi-threading support (USE_THREAD not set).");
+
+#endif // USE_THREAD
+
+
+/* scans the configured thread mapping and establishes the final one. Returns <0
+ * on failure, >=0 on success.
+ */
+int thread_map_to_groups()
+{
+ int t, g, ut, ug;
+ int q, r;
+
+ ut = ug = 0; // unassigned threads & groups
+
+ for (t = 0; t < global.nbthread; t++) {
+ if (!ha_thread_info[t].tg)
+ ut++;
+ }
+
+ for (g = 0; g < global.nbtgroups; g++) {
+ if (!ha_tgroup_info[g].count)
+ ug++;
+ ha_tgroup_info[g].tgid = g + 1;
+ }
+
+ if (ug > ut) {
+ ha_alert("More unassigned thread-groups (%d) than threads (%d). Please reduce thread-groups\n", ug, ut);
+ return -1;
+ }
+
+ /* look for first unassigned thread */
+ for (t = 0; t < global.nbthread && ha_thread_info[t].tg; t++)
+ ;
+
+ /* assign threads to empty groups */
+ for (g = 0; ug && ut; ) {
+ /* due to sparse thread assignment we can end up with more threads
+ * per group on last assigned groups than former ones, so we must
+ * always try to pack the maximum remaining ones together first.
+ */
+ q = ut / ug;
+ r = ut % ug;
+ if ((q + !!r) > MAX_THREADS_PER_GROUP) {
+ ha_alert("Too many remaining unassigned threads (%d) for thread groups (%d). Please increase thread-groups or make sure to keep thread numbers contiguous\n", ut, ug);
+ return -1;
+ }
+
+ /* thread <t> is the next unassigned one. Let's look for next
+ * unassigned group, we know there are some left
+ */
+ while (ut >= ug && ha_tgroup_info[g].count)
+ g++;
+
+ /* group g is unassigned, try to fill it with consecutive threads */
+ while (ut && ut >= ug && ha_tgroup_info[g].count < q + !!r &&
+ (!ha_tgroup_info[g].count || t == ha_tgroup_info[g].base + ha_tgroup_info[g].count)) {
+
+ if (!ha_tgroup_info[g].count) {
+ /* assign new group */
+ ha_tgroup_info[g].base = t;
+ ug--;
+ }
+
+ ha_tgroup_info[g].count++;
+ ha_thread_info[t].tg = &ha_tgroup_info[g];
+
+ ut--;
+ /* switch to next unassigned thread */
+ while (++t < global.nbthread && ha_thread_info[t].tg)
+ ;
+ }
+ }
+
+ if (ut) {
+ ha_alert("Remaining unassigned threads found (%d) because all groups are in use. Please increase 'thread-groups', reduce 'nbthreads' or remove or extend 'thread-group' enumerations.\n", ut);
+ return -1;
+ }
+
+ for (t = 0; t < global.nbthread; t++) {
+ ha_thread_info[t].tid = t;
+ ha_thread_info[t].ltid = t - ha_thread_info[t].tg->base;
+
+ ha_thread_info[t].tid_bit = 1UL << ha_thread_info[t].tid;
+ ha_thread_info[t].ltid_bit = 1UL << ha_thread_info[t].ltid;
+ }
+
+ return 0;
+}
+
+/* converts a configuration thread group+mask to a global group+mask depending on
+ * the configured thread group id. This is essentially for use with the "thread"
+ * directive on "bind" lines, where "thread 2/1-3" might be turned to "4-6" for
+ * the global ID. It cannot be used before the thread mapping above was completed
+ * and the thread group number configured. Possible options:
+ * - igid == 0: imask represents global IDs. We have to check that all
+ * configured threads in the mask belong to the same group. If imask is zero
+ * it means everything, so for now we only support this with a single group.
+ * - igid > 0, imask = 0: convert local values to global values for this thread
+ * - igid > 0, imask > 0: convert local values to global values
+ *
+ * Returns <0 on failure, >=0 on success.
+ */
+int thread_resolve_group_mask(uint igid, ulong imask, uint *ogid, ulong *omask, char **err)
+{
+ ulong mask;
+ uint t;
+
+ if (igid == 0) {
+ /* unspecified group, IDs are global */
+ if (!imask) {
+ /* all threads of all groups */
+ if (global.nbtgroups > 1) {
+ memprintf(err, "'thread' directive spans multiple groups");
+ return -1;
+ }
+ mask = 0;
+ *ogid = 1; // first and only group
+ *omask = all_threads_mask;
+ return 0;
+ } else {
+ /* some global threads */
+ imask &= all_threads_mask;
+ for (t = 0; t < global.nbthread; t++) {
+ if (imask & (1UL << t)) {
+ if (ha_thread_info[t].tg->tgid != igid) {
+ if (!igid)
+ igid = ha_thread_info[t].tg->tgid;
+ else {
+ memprintf(err, "'thread' directive spans multiple groups (at least %u and %u)", igid, ha_thread_info[t].tg->tgid);
+ return -1;
+ }
+ }
+ }
+ }
+
+ if (!igid) {
+ memprintf(err, "'thread' directive contains threads that belong to no group");
+ return -1;
+ }
+
+ /* we have a valid group, convert this to global thread IDs */
+ *ogid = igid;
+ *omask = imask << ha_tgroup_info[igid - 1].base;
+ return 0;
+ }
+ } else {
+ /* group was specified */
+ if (igid > global.nbtgroups) {
+ memprintf(err, "'thread' directive references non-existing thread group %u", igid);
+ return -1;
+ }
+
+ if (!imask) {
+ /* all threads of this groups. Let's make a mask from their count and base. */
+ *ogid = igid;
+ mask = 1UL << (ha_tgroup_info[igid - 1].count - 1);
+ mask |= mask - 1;
+ *omask = mask << ha_tgroup_info[igid - 1].base;
+ return 0;
+ } else {
+ /* some local threads. Keep only existing ones for this group */
+
+ mask = 1UL << (ha_tgroup_info[igid - 1].count - 1);
+ mask |= mask - 1;
+
+ if (!(mask & imask)) {
+ /* no intersection between the thread group's
+ * threads and the bind line's.
+ */
+#ifdef THREAD_AUTO_ADJUST_GROUPS
+ unsigned long new_mask = 0;
+
+ while (imask) {
+ new_mask |= imask & mask;
+ imask >>= ha_tgroup_info[igid - 1].count;
+ }
+ imask = new_mask;
+#else
+ memprintf(err, "'thread' directive only references threads not belonging to the group");
+ return -1;
+#endif
+ }
+
+ mask &= imask;
+ *omask = mask << ha_tgroup_info[igid - 1].base;
+ *ogid = igid;
+ return 0;
+ }
+ }
+}
+
+/* Parse the "nbthread" global directive, which takes an integer argument that
+ * contains the desired number of threads.
+ */
+static int cfg_parse_nbthread(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ long nbthread;
+ char *errptr;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ nbthread = strtol(args[1], &errptr, 10);
+ if (!*args[1] || *errptr) {
+ memprintf(err, "'%s' passed a missing or unparsable integer value in '%s'", args[0], args[1]);
+ return -1;
+ }
+
+#ifndef USE_THREAD
+ if (nbthread != 1) {
+ memprintf(err, "'%s' specified with a value other than 1 while HAProxy is not compiled with threads support. Please check build options for USE_THREAD", args[0]);
+ return -1;
+ }
+#else
+ if (nbthread < 1 || nbthread > MAX_THREADS) {
+ memprintf(err, "'%s' value must be between 1 and %d (was %ld)", args[0], MAX_THREADS, nbthread);
+ return -1;
+ }
+
+ all_threads_mask = nbits(nbthread);
+#endif
+
+ HA_DIAG_WARNING_COND(global.nbthread,
+ "parsing [%s:%d] : '%s' is already defined and will be overridden.\n",
+ file, line, args[0]);
+
+ global.nbthread = nbthread;
+ return 0;
+}
+
+/* Parse the "thread-group" global directive, which takes an integer argument
+ * that designates a thread group, and a list of threads to put into that group.
+ */
+static int cfg_parse_thread_group(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char *errptr;
+ long tnum, tend, tgroup;
+ int arg, tot;
+
+ tgroup = strtol(args[1], &errptr, 10);
+ if (!*args[1] || *errptr) {
+ memprintf(err, "'%s' passed a missing or unparsable integer value in '%s'", args[0], args[1]);
+ return -1;
+ }
+
+ if (tgroup < 1 || tgroup > MAX_TGROUPS) {
+ memprintf(err, "'%s' thread-group number must be between 1 and %d (was %ld)", args[0], MAX_TGROUPS, tgroup);
+ return -1;
+ }
+
+ /* look for a preliminary definition of any thread pointing to this
+ * group, and remove them.
+ */
+ if (ha_tgroup_info[tgroup-1].count) {
+ ha_warning("parsing [%s:%d] : '%s %ld' was already defined and will be overridden.\n",
+ file, line, args[0], tgroup);
+
+ for (tnum = ha_tgroup_info[tgroup-1].base;
+ tnum < ha_tgroup_info[tgroup-1].base + ha_tgroup_info[tgroup-1].count;
+ tnum++) {
+ if (ha_thread_info[tnum-1].tg == &ha_tgroup_info[tgroup-1])
+ ha_thread_info[tnum-1].tg = NULL;
+ }
+ ha_tgroup_info[tgroup-1].count = ha_tgroup_info[tgroup-1].base = 0;
+ }
+
+ tot = 0;
+ for (arg = 2; args[arg] && *args[arg]; arg++) {
+ tend = tnum = strtol(args[arg], &errptr, 10);
+
+ if (*errptr == '-')
+ tend = strtol(errptr + 1, &errptr, 10);
+
+ if (*errptr || tnum < 1 || tend < 1 || tnum > MAX_THREADS || tend > MAX_THREADS) {
+ memprintf(err, "'%s %ld' passed an unparsable or invalid thread number '%s' (valid range is 1 to %d)", args[0], tgroup, args[arg], MAX_THREADS);
+ return -1;
+ }
+
+ for(; tnum <= tend; tnum++) {
+ if (ha_thread_info[tnum-1].tg == &ha_tgroup_info[tgroup-1]) {
+ ha_warning("parsing [%s:%d] : '%s %ld': thread %ld assigned more than once on the same line.\n",
+ file, line, args[0], tgroup, tnum);
+ } else if (ha_thread_info[tnum-1].tg) {
+ ha_warning("parsing [%s:%d] : '%s %ld': thread %ld was previously assigned to thread group %ld and will be overridden.\n",
+ file, line, args[0], tgroup, tnum,
+ (long)(ha_thread_info[tnum-1].tg - &ha_tgroup_info[0] + 1));
+ }
+
+ if (!ha_tgroup_info[tgroup-1].count) {
+ ha_tgroup_info[tgroup-1].base = tnum-1;
+ ha_tgroup_info[tgroup-1].count = 1;
+ }
+ else if (tnum >= ha_tgroup_info[tgroup-1].base + ha_tgroup_info[tgroup-1].count) {
+ ha_tgroup_info[tgroup-1].count = tnum - ha_tgroup_info[tgroup-1].base;
+ }
+ else if (tnum < ha_tgroup_info[tgroup-1].base) {
+ ha_tgroup_info[tgroup-1].count += ha_tgroup_info[tgroup-1].base - tnum-1;
+ ha_tgroup_info[tgroup-1].base = tnum - 1;
+ }
+
+ ha_thread_info[tnum-1].tg = &ha_tgroup_info[tgroup-1];
+ tot++;
+ }
+ }
+
+ if (ha_tgroup_info[tgroup-1].count > tot) {
+ memprintf(err, "'%s %ld' assigned sparse threads, only contiguous supported", args[0], tgroup);
+ return -1;
+ }
+
+ if (ha_tgroup_info[tgroup-1].count > MAX_THREADS_PER_GROUP) {
+ memprintf(err, "'%s %ld' assigned too many threads (%d, max=%d)", args[0], tgroup, tot, MAX_THREADS_PER_GROUP);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Parse the "thread-groups" global directive, which takes an integer argument
+ * that contains the desired number of thread groups.
+ */
+static int cfg_parse_thread_groups(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ long nbtgroups;
+ char *errptr;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ nbtgroups = strtol(args[1], &errptr, 10);
+ if (!*args[1] || *errptr) {
+ memprintf(err, "'%s' passed a missing or unparsable integer value in '%s'", args[0], args[1]);
+ return -1;
+ }
+
+#ifndef USE_THREAD
+ if (nbtgroups != 1) {
+ memprintf(err, "'%s' specified with a value other than 1 while HAProxy is not compiled with threads support. Please check build options for USE_THREAD", args[0]);
+ return -1;
+ }
+#else
+ if (nbtgroups < 1 || nbtgroups > MAX_TGROUPS) {
+ memprintf(err, "'%s' value must be between 1 and %d (was %ld)", args[0], MAX_TGROUPS, nbtgroups);
+ return -1;
+ }
+#endif
+
+ HA_DIAG_WARNING_COND(global.nbtgroups,
+ "parsing [%s:%d] : '%s' is already defined and will be overridden.\n",
+ file, line, args[0]);
+
+ global.nbtgroups = nbtgroups;
+ return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "nbthread", cfg_parse_nbthread, 0 },
+ { CFG_GLOBAL, "thread-group", cfg_parse_thread_group, 0 },
+ { CFG_GLOBAL, "thread-groups", cfg_parse_thread_groups, 0 },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
diff --git a/src/time.c b/src/time.c
new file mode 100644
index 0000000..280b522
--- /dev/null
+++ b/src/time.c
@@ -0,0 +1,147 @@
+/*
+ * Time calculation functions.
+ *
+ * Copyright 2000-2011 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/time.h>
+
+#include <haproxy/api.h>
+#include <haproxy/time.h>
+
+
+/*
+ * adds <ms> ms to <from>, set the result to <tv> and returns a pointer <tv>
+ */
+struct timeval *_tv_ms_add(struct timeval *tv, const struct timeval *from, int ms)
+{
+ tv->tv_usec = from->tv_usec + (ms % 1000) * 1000;
+ tv->tv_sec = from->tv_sec + (ms / 1000);
+ while (tv->tv_usec >= 1000000) {
+ tv->tv_usec -= 1000000;
+ tv->tv_sec++;
+ }
+ return tv;
+}
+
+/*
+ * compares <tv1> and <tv2> modulo 1ms: returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2
+ * Must not be used when either argument is eternity. Use tv_ms_cmp2() for that.
+ */
+int _tv_ms_cmp(const struct timeval *tv1, const struct timeval *tv2)
+{
+ return __tv_ms_cmp(tv1, tv2);
+}
+
+/*
+ * compares <tv1> and <tv2> modulo 1 ms: returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2,
+ * assuming that TV_ETERNITY is greater than everything.
+ */
+int _tv_ms_cmp2(const struct timeval *tv1, const struct timeval *tv2)
+{
+ return __tv_ms_cmp2(tv1, tv2);
+}
+
+/*
+ * compares <tv1> and <tv2> modulo 1 ms: returns 1 if tv1 <= tv2, 0 if tv1 > tv2,
+ * assuming that TV_ETERNITY is greater than everything. Returns 0 if tv1 is
+ * TV_ETERNITY, and always assumes that tv2 != TV_ETERNITY. Designed to replace
+ * occurrences of (tv_ms_cmp2(tv,now) <= 0).
+ */
+int _tv_ms_le2(const struct timeval *tv1, const struct timeval *tv2)
+{
+ return __tv_ms_le2(tv1, tv2);
+}
+
+/*
+ * returns the remaining time between tv1=now and event=tv2
+ * if tv2 is passed, 0 is returned.
+ * Must not be used when either argument is eternity.
+ */
+unsigned long _tv_ms_remain(const struct timeval *tv1, const struct timeval *tv2)
+{
+ return __tv_ms_remain(tv1, tv2);
+}
+
+/*
+ * returns the remaining time between tv1=now and event=tv2
+ * if tv2 is passed, 0 is returned.
+ * Returns TIME_ETERNITY if tv2 is eternity.
+ */
+unsigned long _tv_ms_remain2(const struct timeval *tv1, const struct timeval *tv2)
+{
+ if (tv_iseternity(tv2))
+ return TIME_ETERNITY;
+
+ return __tv_ms_remain(tv1, tv2);
+}
+
+/*
+ * Returns the time in ms elapsed between tv1 and tv2, assuming that tv1<=tv2.
+ * Must not be used when either argument is eternity.
+ */
+unsigned long _tv_ms_elapsed(const struct timeval *tv1, const struct timeval *tv2)
+{
+ return __tv_ms_elapsed(tv1, tv2);
+}
+
+/*
+ * adds <inc> to <from>, set the result to <tv> and returns a pointer <tv>
+ */
+struct timeval *_tv_add(struct timeval *tv, const struct timeval *from, const struct timeval *inc)
+{
+ return __tv_add(tv, from, inc);
+}
+
+/*
+ * If <inc> is set, then add it to <from> and set the result to <tv>, then
+ * return 1, otherwise return 0. It is meant to be used in if conditions.
+ */
+int _tv_add_ifset(struct timeval *tv, const struct timeval *from, const struct timeval *inc)
+{
+ return __tv_add_ifset(tv, from, inc);
+}
+
+/*
+ * Computes the remaining time between tv1=now and event=tv2. if tv2 is passed,
+ * 0 is returned. The result is stored into tv.
+ */
+struct timeval *_tv_remain(const struct timeval *tv1, const struct timeval *tv2, struct timeval *tv)
+{
+ return __tv_remain(tv1, tv2, tv);
+}
+
+/*
+ * Computes the remaining time between tv1=now and event=tv2. if tv2 is passed,
+ * 0 is returned. The result is stored into tv. Returns ETERNITY if tv2 is
+ * eternity.
+ */
+struct timeval *_tv_remain2(const struct timeval *tv1, const struct timeval *tv2, struct timeval *tv)
+{
+ return __tv_remain2(tv1, tv2, tv);
+}
+
+/* tv_isle: compares <tv1> and <tv2> : returns 1 if tv1 <= tv2, otherwise 0 */
+int _tv_isle(const struct timeval *tv1, const struct timeval *tv2)
+{
+ return __tv_isle(tv1, tv2);
+}
+
+/* tv_isgt: compares <tv1> and <tv2> : returns 1 if tv1 > tv2, otherwise 0 */
+int _tv_isgt(const struct timeval *tv1, const struct timeval *tv2)
+{
+ return __tv_isgt(tv1, tv2);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/tools.c b/src/tools.c
new file mode 100644
index 0000000..6a0c078
--- /dev/null
+++ b/src/tools.c
@@ -0,0 +1,5853 @@
+/*
+ * General purpose functions.
+ *
+ * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#if (defined(__ELF__) && !defined(__linux__)) || defined(USE_DL)
+#define _GNU_SOURCE
+#include <dlfcn.h>
+#include <link.h>
+#endif
+
+#if defined(__FreeBSD__)
+#include <elf.h>
+#include <dlfcn.h>
+extern void *__elf_aux_vector;
+#endif
+
+#if defined(__NetBSD__)
+#include <sys/exec_elf.h>
+#include <dlfcn.h>
+#endif
+
+#include <ctype.h>
+#include <errno.h>
+#include <netdb.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#if defined(__linux__) && defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 16))
+#include <sys/auxv.h>
+#endif
+
+#include <import/eb32sctree.h>
+#include <import/eb32tree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/chunk.h>
+#include <haproxy/dgram.h>
+#include <haproxy/global.h>
+#include <haproxy/hlua.h>
+#include <haproxy/listener.h>
+#include <haproxy/namespace.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/protocol.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/sock.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/ssl_utils.h>
+#include <haproxy/stconn.h>
+#include <haproxy/task.h>
+#include <haproxy/tools.h>
+
+/* This macro returns false if the test __x is false. Many
+ * of the following parsing function must be abort the processing
+ * if it returns 0, so this macro is useful for writing light code.
+ */
+#define RET0_UNLESS(__x) do { if (!(__x)) return 0; } while (0)
+
+/* enough to store NB_ITOA_STR integers of :
+ * 2^64-1 = 18446744073709551615 or
+ * -2^63 = -9223372036854775808
+ *
+ * The HTML version needs room for adding the 25 characters
+ * '<span class="rls"></span>' around digits at positions 3N+1 in order
+ * to add spacing at up to 6 positions : 18 446 744 073 709 551 615
+ */
+THREAD_LOCAL char itoa_str[NB_ITOA_STR][171];
+THREAD_LOCAL int itoa_idx = 0; /* index of next itoa_str to use */
+
+/* sometimes we'll need to quote strings (eg: in stats), and we don't expect
+ * to quote strings larger than a max configuration line.
+ */
+THREAD_LOCAL char quoted_str[NB_QSTR][QSTR_SIZE + 1];
+THREAD_LOCAL int quoted_idx = 0;
+
+/* thread-local PRNG state. It's modified to start from a different sequence
+ * on all threads upon startup. It must not be used or anything beyond getting
+ * statistical values as it's 100% predictable.
+ */
+THREAD_LOCAL unsigned int statistical_prng_state = 2463534242U;
+
+/* set to true if this is a static build */
+int build_is_static = 0;
+
+/*
+ * unsigned long long ASCII representation
+ *
+ * return the last char '\0' or NULL if no enough
+ * space in dst
+ */
+char *ulltoa(unsigned long long n, char *dst, size_t size)
+{
+ int i = 0;
+ char *res;
+
+ switch(n) {
+ case 1ULL ... 9ULL:
+ i = 0;
+ break;
+
+ case 10ULL ... 99ULL:
+ i = 1;
+ break;
+
+ case 100ULL ... 999ULL:
+ i = 2;
+ break;
+
+ case 1000ULL ... 9999ULL:
+ i = 3;
+ break;
+
+ case 10000ULL ... 99999ULL:
+ i = 4;
+ break;
+
+ case 100000ULL ... 999999ULL:
+ i = 5;
+ break;
+
+ case 1000000ULL ... 9999999ULL:
+ i = 6;
+ break;
+
+ case 10000000ULL ... 99999999ULL:
+ i = 7;
+ break;
+
+ case 100000000ULL ... 999999999ULL:
+ i = 8;
+ break;
+
+ case 1000000000ULL ... 9999999999ULL:
+ i = 9;
+ break;
+
+ case 10000000000ULL ... 99999999999ULL:
+ i = 10;
+ break;
+
+ case 100000000000ULL ... 999999999999ULL:
+ i = 11;
+ break;
+
+ case 1000000000000ULL ... 9999999999999ULL:
+ i = 12;
+ break;
+
+ case 10000000000000ULL ... 99999999999999ULL:
+ i = 13;
+ break;
+
+ case 100000000000000ULL ... 999999999999999ULL:
+ i = 14;
+ break;
+
+ case 1000000000000000ULL ... 9999999999999999ULL:
+ i = 15;
+ break;
+
+ case 10000000000000000ULL ... 99999999999999999ULL:
+ i = 16;
+ break;
+
+ case 100000000000000000ULL ... 999999999999999999ULL:
+ i = 17;
+ break;
+
+ case 1000000000000000000ULL ... 9999999999999999999ULL:
+ i = 18;
+ break;
+
+ case 10000000000000000000ULL ... ULLONG_MAX:
+ i = 19;
+ break;
+ }
+ if (i + 2 > size) // (i + 1) + '\0'
+ return NULL; // too long
+ res = dst + i + 1;
+ *res = '\0';
+ for (; i >= 0; i--) {
+ dst[i] = n % 10ULL + '0';
+ n /= 10ULL;
+ }
+ return res;
+}
+
+/*
+ * unsigned long ASCII representation
+ *
+ * return the last char '\0' or NULL if no enough
+ * space in dst
+ */
+char *ultoa_o(unsigned long n, char *dst, size_t size)
+{
+ int i = 0;
+ char *res;
+
+ switch (n) {
+ case 0U ... 9UL:
+ i = 0;
+ break;
+
+ case 10U ... 99UL:
+ i = 1;
+ break;
+
+ case 100U ... 999UL:
+ i = 2;
+ break;
+
+ case 1000U ... 9999UL:
+ i = 3;
+ break;
+
+ case 10000U ... 99999UL:
+ i = 4;
+ break;
+
+ case 100000U ... 999999UL:
+ i = 5;
+ break;
+
+ case 1000000U ... 9999999UL:
+ i = 6;
+ break;
+
+ case 10000000U ... 99999999UL:
+ i = 7;
+ break;
+
+ case 100000000U ... 999999999UL:
+ i = 8;
+ break;
+#if __WORDSIZE == 32
+
+ case 1000000000ULL ... ULONG_MAX:
+ i = 9;
+ break;
+
+#elif __WORDSIZE == 64
+
+ case 1000000000ULL ... 9999999999UL:
+ i = 9;
+ break;
+
+ case 10000000000ULL ... 99999999999UL:
+ i = 10;
+ break;
+
+ case 100000000000ULL ... 999999999999UL:
+ i = 11;
+ break;
+
+ case 1000000000000ULL ... 9999999999999UL:
+ i = 12;
+ break;
+
+ case 10000000000000ULL ... 99999999999999UL:
+ i = 13;
+ break;
+
+ case 100000000000000ULL ... 999999999999999UL:
+ i = 14;
+ break;
+
+ case 1000000000000000ULL ... 9999999999999999UL:
+ i = 15;
+ break;
+
+ case 10000000000000000ULL ... 99999999999999999UL:
+ i = 16;
+ break;
+
+ case 100000000000000000ULL ... 999999999999999999UL:
+ i = 17;
+ break;
+
+ case 1000000000000000000ULL ... 9999999999999999999UL:
+ i = 18;
+ break;
+
+ case 10000000000000000000ULL ... ULONG_MAX:
+ i = 19;
+ break;
+
+#endif
+ }
+ if (i + 2 > size) // (i + 1) + '\0'
+ return NULL; // too long
+ res = dst + i + 1;
+ *res = '\0';
+ for (; i >= 0; i--) {
+ dst[i] = n % 10U + '0';
+ n /= 10U;
+ }
+ return res;
+}
+
+/*
+ * signed long ASCII representation
+ *
+ * return the last char '\0' or NULL if no enough
+ * space in dst
+ */
+char *ltoa_o(long int n, char *dst, size_t size)
+{
+ char *pos = dst;
+
+ if (n < 0) {
+ if (size < 3)
+ return NULL; // min size is '-' + digit + '\0' but another test in ultoa
+ *pos = '-';
+ pos++;
+ dst = ultoa_o(-n, pos, size - 1);
+ } else {
+ dst = ultoa_o(n, dst, size);
+ }
+ return dst;
+}
+
+/*
+ * signed long long ASCII representation
+ *
+ * return the last char '\0' or NULL if no enough
+ * space in dst
+ */
+char *lltoa(long long n, char *dst, size_t size)
+{
+ char *pos = dst;
+
+ if (n < 0) {
+ if (size < 3)
+ return NULL; // min size is '-' + digit + '\0' but another test in ulltoa
+ *pos = '-';
+ pos++;
+ dst = ulltoa(-n, pos, size - 1);
+ } else {
+ dst = ulltoa(n, dst, size);
+ }
+ return dst;
+}
+
+/*
+ * write a ascii representation of a unsigned into dst,
+ * return a pointer to the last character
+ * Pad the ascii representation with '0', using size.
+ */
+char *utoa_pad(unsigned int n, char *dst, size_t size)
+{
+ int i = 0;
+ char *ret;
+
+ switch(n) {
+ case 0U ... 9U:
+ i = 0;
+ break;
+
+ case 10U ... 99U:
+ i = 1;
+ break;
+
+ case 100U ... 999U:
+ i = 2;
+ break;
+
+ case 1000U ... 9999U:
+ i = 3;
+ break;
+
+ case 10000U ... 99999U:
+ i = 4;
+ break;
+
+ case 100000U ... 999999U:
+ i = 5;
+ break;
+
+ case 1000000U ... 9999999U:
+ i = 6;
+ break;
+
+ case 10000000U ... 99999999U:
+ i = 7;
+ break;
+
+ case 100000000U ... 999999999U:
+ i = 8;
+ break;
+
+ case 1000000000U ... 4294967295U:
+ i = 9;
+ break;
+ }
+ if (i + 2 > size) // (i + 1) + '\0'
+ return NULL; // too long
+ if (i < size)
+ i = size - 2; // padding - '\0'
+
+ ret = dst + i + 1;
+ *ret = '\0';
+ for (; i >= 0; i--) {
+ dst[i] = n % 10U + '0';
+ n /= 10U;
+ }
+ return ret;
+}
+
+/*
+ * copies at most <size-1> chars from <src> to <dst>. Last char is always
+ * set to 0, unless <size> is 0. The number of chars copied is returned
+ * (excluding the terminating zero).
+ * This code has been optimized for size and speed : on x86, it's 45 bytes
+ * long, uses only registers, and consumes only 4 cycles per char.
+ */
+int strlcpy2(char *dst, const char *src, int size)
+{
+ char *orig = dst;
+ if (size) {
+ while (--size && (*dst = *src)) {
+ src++; dst++;
+ }
+ *dst = 0;
+ }
+ return dst - orig;
+}
+
+/*
+ * This function simply returns a locally allocated string containing
+ * the ascii representation for number 'n' in decimal.
+ */
+char *ultoa_r(unsigned long n, char *buffer, int size)
+{
+ char *pos;
+
+ pos = buffer + size - 1;
+ *pos-- = '\0';
+
+ do {
+ *pos-- = '0' + n % 10;
+ n /= 10;
+ } while (n && pos >= buffer);
+ return pos + 1;
+}
+
+/*
+ * This function simply returns a locally allocated string containing
+ * the ascii representation for number 'n' in decimal.
+ */
+char *lltoa_r(long long int in, char *buffer, int size)
+{
+ char *pos;
+ int neg = 0;
+ unsigned long long int n;
+
+ pos = buffer + size - 1;
+ *pos-- = '\0';
+
+ if (in < 0) {
+ neg = 1;
+ n = -in;
+ }
+ else
+ n = in;
+
+ do {
+ *pos-- = '0' + n % 10;
+ n /= 10;
+ } while (n && pos >= buffer);
+ if (neg && pos > buffer)
+ *pos-- = '-';
+ return pos + 1;
+}
+
+/*
+ * This function simply returns a locally allocated string containing
+ * the ascii representation for signed number 'n' in decimal.
+ */
+char *sltoa_r(long n, char *buffer, int size)
+{
+ char *pos;
+
+ if (n >= 0)
+ return ultoa_r(n, buffer, size);
+
+ pos = ultoa_r(-n, buffer + 1, size - 1) - 1;
+ *pos = '-';
+ return pos;
+}
+
+/*
+ * This function simply returns a locally allocated string containing
+ * the ascii representation for number 'n' in decimal, formatted for
+ * HTML output with tags to create visual grouping by 3 digits. The
+ * output needs to support at least 171 characters.
+ */
+const char *ulltoh_r(unsigned long long n, char *buffer, int size)
+{
+ char *start;
+ int digit = 0;
+
+ start = buffer + size;
+ *--start = '\0';
+
+ do {
+ if (digit == 3 && start >= buffer + 7)
+ memcpy(start -= 7, "</span>", 7);
+
+ if (start >= buffer + 1) {
+ *--start = '0' + n % 10;
+ n /= 10;
+ }
+
+ if (digit == 3 && start >= buffer + 18)
+ memcpy(start -= 18, "<span class=\"rls\">", 18);
+
+ if (digit++ == 3)
+ digit = 1;
+ } while (n && start > buffer);
+ return start;
+}
+
+/*
+ * This function simply returns a locally allocated string containing the ascii
+ * representation for number 'n' in decimal, unless n is 0 in which case it
+ * returns the alternate string (or an empty string if the alternate string is
+ * NULL). It use is intended for limits reported in reports, where it's
+ * desirable not to display anything if there is no limit. Warning! it shares
+ * the same vector as ultoa_r().
+ */
+const char *limit_r(unsigned long n, char *buffer, int size, const char *alt)
+{
+ return (n) ? ultoa_r(n, buffer, size) : (alt ? alt : "");
+}
+
+/* Trims the first "%f" float in a string to its minimum number of digits after
+ * the decimal point by trimming trailing zeroes, even dropping the decimal
+ * point if not needed. The string is in <buffer> of length <len>, and the
+ * number is expected to start at or after position <num_start> (the first
+ * point appearing there is considered). A NUL character is always placed at
+ * the end if some trimming occurs. The new buffer length is returned.
+ */
+size_t flt_trim(char *buffer, size_t num_start, size_t len)
+{
+ char *end = buffer + len;
+ char *p = buffer + num_start;
+ char *trim;
+
+ do {
+ if (p >= end)
+ return len;
+ trim = p++;
+ } while (*trim != '.');
+
+ /* For now <trim> is on the decimal point. Let's look for any other
+ * meaningful digit after it.
+ */
+ while (p < end) {
+ if (*p++ != '0')
+ trim = p;
+ }
+
+ if (trim < end)
+ *trim = 0;
+
+ return trim - buffer;
+}
+
+/*
+ * This function simply returns a locally allocated string containing
+ * the ascii representation for number 'n' in decimal with useless trailing
+ * zeroes trimmed.
+ */
+char *ftoa_r(double n, char *buffer, int size)
+{
+ flt_trim(buffer, 0, snprintf(buffer, size, "%f", n));
+ return buffer;
+}
+
+/* returns a locally allocated string containing the quoted encoding of the
+ * input string. The output may be truncated to QSTR_SIZE chars, but it is
+ * guaranteed that the string will always be properly terminated. Quotes are
+ * encoded by doubling them as is commonly done in CSV files. QSTR_SIZE must
+ * always be at least 4 chars.
+ */
+const char *qstr(const char *str)
+{
+ char *ret = quoted_str[quoted_idx];
+ char *p, *end;
+
+ if (++quoted_idx >= NB_QSTR)
+ quoted_idx = 0;
+
+ p = ret;
+ end = ret + QSTR_SIZE;
+
+ *p++ = '"';
+
+ /* always keep 3 chars to support passing "" and the ending " */
+ while (*str && p < end - 3) {
+ if (*str == '"') {
+ *p++ = '"';
+ *p++ = '"';
+ }
+ else
+ *p++ = *str;
+ str++;
+ }
+ *p++ = '"';
+ return ret;
+}
+
+/*
+ * Returns non-zero if character <s> is a hex digit (0-9, a-f, A-F), else zero.
+ *
+ * It looks like this one would be a good candidate for inlining, but this is
+ * not interesting because it around 35 bytes long and often called multiple
+ * times within the same function.
+ */
+int ishex(char s)
+{
+ s -= '0';
+ if ((unsigned char)s <= 9)
+ return 1;
+ s -= 'A' - '0';
+ if ((unsigned char)s <= 5)
+ return 1;
+ s -= 'a' - 'A';
+ if ((unsigned char)s <= 5)
+ return 1;
+ return 0;
+}
+
+/* rounds <i> down to the closest value having max 2 digits */
+unsigned int round_2dig(unsigned int i)
+{
+ unsigned int mul = 1;
+
+ while (i >= 100) {
+ i /= 10;
+ mul *= 10;
+ }
+ return i * mul;
+}
+
+/*
+ * Checks <name> for invalid characters. Valid chars are [A-Za-z0-9_:.-]. If an
+ * invalid character is found, a pointer to it is returned. If everything is
+ * fine, NULL is returned.
+ */
+const char *invalid_char(const char *name)
+{
+ if (!*name)
+ return name;
+
+ while (*name) {
+ if (!isalnum((unsigned char)*name) && *name != '.' && *name != ':' &&
+ *name != '_' && *name != '-')
+ return name;
+ name++;
+ }
+ return NULL;
+}
+
+/*
+ * Checks <name> for invalid characters. Valid chars are [_.-] and those
+ * accepted by <f> function.
+ * If an invalid character is found, a pointer to it is returned.
+ * If everything is fine, NULL is returned.
+ */
+static inline const char *__invalid_char(const char *name, int (*f)(int)) {
+
+ if (!*name)
+ return name;
+
+ while (*name) {
+ if (!f((unsigned char)*name) && *name != '.' &&
+ *name != '_' && *name != '-')
+ return name;
+
+ name++;
+ }
+
+ return NULL;
+}
+
+/*
+ * Checks <name> for invalid characters. Valid chars are [A-Za-z0-9_.-].
+ * If an invalid character is found, a pointer to it is returned.
+ * If everything is fine, NULL is returned.
+ */
+const char *invalid_domainchar(const char *name) {
+ return __invalid_char(name, isalnum);
+}
+
+/*
+ * Checks <name> for invalid characters. Valid chars are [A-Za-z_.-].
+ * If an invalid character is found, a pointer to it is returned.
+ * If everything is fine, NULL is returned.
+ */
+const char *invalid_prefix_char(const char *name) {
+ return __invalid_char(name, isalnum);
+}
+
+/*
+ * converts <str> to a struct sockaddr_storage* provided by the caller. The
+ * caller must have zeroed <sa> first, and may have set sa->ss_family to force
+ * parse a specific address format. If the ss_family is 0 or AF_UNSPEC, then
+ * the function tries to guess the address family from the syntax. If the
+ * family is forced and the format doesn't match, an error is returned. The
+ * string is assumed to contain only an address, no port. The address can be a
+ * dotted IPv4 address, an IPv6 address, a host name, or empty or "*" to
+ * indicate INADDR_ANY. NULL is returned if the host part cannot be resolved.
+ * The return address will only have the address family and the address set,
+ * all other fields remain zero. The string is not supposed to be modified.
+ * The IPv6 '::' address is IN6ADDR_ANY. If <resolve> is non-zero, the hostname
+ * is resolved, otherwise only IP addresses are resolved, and anything else
+ * returns NULL. If the address contains a port, this one is preserved.
+ */
+struct sockaddr_storage *str2ip2(const char *str, struct sockaddr_storage *sa, int resolve)
+{
+ struct hostent *he;
+ /* max IPv6 length, including brackets and terminating NULL */
+ char tmpip[48];
+ int port = get_host_port(sa);
+
+ /* check IPv6 with square brackets */
+ if (str[0] == '[') {
+ size_t iplength = strlen(str);
+
+ if (iplength < 4) {
+ /* minimal size is 4 when using brackets "[::]" */
+ goto fail;
+ }
+ else if (iplength >= sizeof(tmpip)) {
+ /* IPv6 literal can not be larger than tmpip */
+ goto fail;
+ }
+ else {
+ if (str[iplength - 1] != ']') {
+ /* if address started with bracket, it should end with bracket */
+ goto fail;
+ }
+ else {
+ memcpy(tmpip, str + 1, iplength - 2);
+ tmpip[iplength - 2] = '\0';
+ str = tmpip;
+ }
+ }
+ }
+
+ /* Any IPv6 address */
+ if (str[0] == ':' && str[1] == ':' && !str[2]) {
+ if (!sa->ss_family || sa->ss_family == AF_UNSPEC)
+ sa->ss_family = AF_INET6;
+ else if (sa->ss_family != AF_INET6)
+ goto fail;
+ set_host_port(sa, port);
+ return sa;
+ }
+
+ /* Any address for the family, defaults to IPv4 */
+ if (!str[0] || (str[0] == '*' && !str[1])) {
+ if (!sa->ss_family || sa->ss_family == AF_UNSPEC)
+ sa->ss_family = AF_INET;
+ set_host_port(sa, port);
+ return sa;
+ }
+
+ /* check for IPv6 first */
+ if ((!sa->ss_family || sa->ss_family == AF_UNSPEC || sa->ss_family == AF_INET6) &&
+ inet_pton(AF_INET6, str, &((struct sockaddr_in6 *)sa)->sin6_addr)) {
+ sa->ss_family = AF_INET6;
+ set_host_port(sa, port);
+ return sa;
+ }
+
+ /* then check for IPv4 */
+ if ((!sa->ss_family || sa->ss_family == AF_UNSPEC || sa->ss_family == AF_INET) &&
+ inet_pton(AF_INET, str, &((struct sockaddr_in *)sa)->sin_addr)) {
+ sa->ss_family = AF_INET;
+ set_host_port(sa, port);
+ return sa;
+ }
+
+ if (!resolve)
+ return NULL;
+
+ if (!resolv_hostname_validation(str, NULL))
+ return NULL;
+
+#ifdef USE_GETADDRINFO
+ if (global.tune.options & GTUNE_USE_GAI) {
+ struct addrinfo hints, *result;
+ int success = 0;
+
+ memset(&result, 0, sizeof(result));
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = sa->ss_family ? sa->ss_family : AF_UNSPEC;
+ hints.ai_socktype = SOCK_DGRAM;
+ hints.ai_flags = 0;
+ hints.ai_protocol = 0;
+
+ if (getaddrinfo(str, NULL, &hints, &result) == 0) {
+ if (!sa->ss_family || sa->ss_family == AF_UNSPEC)
+ sa->ss_family = result->ai_family;
+ else if (sa->ss_family != result->ai_family) {
+ freeaddrinfo(result);
+ goto fail;
+ }
+
+ switch (result->ai_family) {
+ case AF_INET:
+ memcpy((struct sockaddr_in *)sa, result->ai_addr, result->ai_addrlen);
+ set_host_port(sa, port);
+ success = 1;
+ break;
+ case AF_INET6:
+ memcpy((struct sockaddr_in6 *)sa, result->ai_addr, result->ai_addrlen);
+ set_host_port(sa, port);
+ success = 1;
+ break;
+ }
+ }
+
+ if (result)
+ freeaddrinfo(result);
+
+ if (success)
+ return sa;
+ }
+#endif
+ /* try to resolve an IPv4/IPv6 hostname */
+ he = gethostbyname(str);
+ if (he) {
+ if (!sa->ss_family || sa->ss_family == AF_UNSPEC)
+ sa->ss_family = he->h_addrtype;
+ else if (sa->ss_family != he->h_addrtype)
+ goto fail;
+
+ switch (sa->ss_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)sa)->sin_addr = *(struct in_addr *) *(he->h_addr_list);
+ set_host_port(sa, port);
+ return sa;
+ case AF_INET6:
+ ((struct sockaddr_in6 *)sa)->sin6_addr = *(struct in6_addr *) *(he->h_addr_list);
+ set_host_port(sa, port);
+ return sa;
+ }
+ }
+
+ /* unsupported address family */
+ fail:
+ return NULL;
+}
+
+/*
+ * Converts <str> to a locally allocated struct sockaddr_storage *, and a port
+ * range or offset consisting in two integers that the caller will have to
+ * check to find the relevant input format. The following format are supported :
+ *
+ * String format | address | port | low | high
+ * addr | <addr> | 0 | 0 | 0
+ * addr: | <addr> | 0 | 0 | 0
+ * addr:port | <addr> | <port> | <port> | <port>
+ * addr:pl-ph | <addr> | <pl> | <pl> | <ph>
+ * addr:+port | <addr> | <port> | 0 | <port>
+ * addr:-port | <addr> |-<port> | <port> | 0
+ *
+ * The detection of a port range or increment by the caller is made by
+ * comparing <low> and <high>. If both are equal, then port 0 means no port
+ * was specified. The caller may pass NULL for <low> and <high> if it is not
+ * interested in retrieving port ranges.
+ *
+ * Note that <addr> above may also be :
+ * - empty ("") => family will be AF_INET and address will be INADDR_ANY
+ * - "*" => family will be AF_INET and address will be INADDR_ANY
+ * - "::" => family will be AF_INET6 and address will be IN6ADDR_ANY
+ * - a host name => family and address will depend on host name resolving.
+ *
+ * A prefix may be passed in before the address above to force the family :
+ * - "ipv4@" => force address to resolve as IPv4 and fail if not possible.
+ * - "ipv6@" => force address to resolve as IPv6 and fail if not possible.
+ * - "unix@" => force address to be a path to a UNIX socket even if the
+ * path does not start with a '/'
+ * - 'abns@' -> force address to belong to the abstract namespace (Linux
+ * only). These sockets are just like Unix sockets but without
+ * the need for an underlying file system. The address is a
+ * string. Technically it's like a Unix socket with a zero in
+ * the first byte of the address.
+ * - "fd@" => an integer must follow, and is a file descriptor number.
+ *
+ * IPv6 addresses can be declared with or without square brackets. When using
+ * square brackets for IPv6 addresses, the port separator (colon) is optional.
+ * If not using square brackets, and in order to avoid any ambiguity with
+ * IPv6 addresses, the last colon ':' is mandatory even when no port is specified.
+ * NULL is returned if the address cannot be parsed. The <low> and <high> ports
+ * are always initialized if non-null, even for non-IP families.
+ *
+ * If <pfx> is non-null, it is used as a string prefix before any path-based
+ * address (typically the path to a unix socket).
+ *
+ * if <fqdn> is non-null, it will be filled with :
+ * - a pointer to the FQDN of the server name to resolve if there's one, and
+ * that the caller will have to free(),
+ * - NULL if there was an explicit address that doesn't require resolution.
+ *
+ * Hostnames are only resolved if <opts> has PA_O_RESOLVE. Otherwise <fqdn> is
+ * still honored so it is possible for the caller to know whether a resolution
+ * failed by clearing this flag and checking if <fqdn> was filled, indicating
+ * the need for a resolution.
+ *
+ * When a file descriptor is passed, its value is put into the s_addr part of
+ * the address when cast to sockaddr_in and the address family is
+ * AF_CUST_EXISTING_FD.
+ *
+ * The matching protocol will be set into <proto> if non-null.
+ *
+ * Any known file descriptor is also assigned to <fd> if non-null, otherwise it
+ * is forced to -1.
+ */
+struct sockaddr_storage *str2sa_range(const char *str, int *port, int *low, int *high, int *fd,
+ struct protocol **proto, char **err,
+ const char *pfx, char **fqdn, unsigned int opts)
+{
+ static THREAD_LOCAL struct sockaddr_storage ss;
+ struct sockaddr_storage *ret = NULL;
+ struct protocol *new_proto = NULL;
+ char *back, *str2;
+ char *port1, *port2;
+ int portl, porth, porta;
+ int abstract = 0;
+ int new_fd = -1;
+ enum proto_type proto_type;
+ int ctrl_type;
+
+ portl = porth = porta = 0;
+ if (fqdn)
+ *fqdn = NULL;
+
+ str2 = back = env_expand(strdup(str));
+ if (str2 == NULL) {
+ memprintf(err, "out of memory in '%s'\n", __FUNCTION__);
+ goto out;
+ }
+
+ if (!*str2) {
+ memprintf(err, "'%s' resolves to an empty address (environment variable missing?)\n", str);
+ goto out;
+ }
+
+ memset(&ss, 0, sizeof(ss));
+
+ /* prepare the default socket types */
+ if ((opts & (PA_O_STREAM|PA_O_DGRAM)) == PA_O_DGRAM ||
+ ((opts & (PA_O_STREAM|PA_O_DGRAM)) == (PA_O_DGRAM|PA_O_STREAM) && (opts & PA_O_DEFAULT_DGRAM))) {
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_DGRAM;
+ } else {
+ proto_type = PROTO_TYPE_STREAM;
+ ctrl_type = SOCK_STREAM;
+ }
+
+ if (strncmp(str2, "stream+", 7) == 0) {
+ str2 += 7;
+ proto_type = PROTO_TYPE_STREAM;
+ ctrl_type = SOCK_STREAM;
+ }
+ else if (strncmp(str2, "dgram+", 6) == 0) {
+ str2 += 6;
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_DGRAM;
+ }
+
+ if (strncmp(str2, "unix@", 5) == 0) {
+ str2 += 5;
+ abstract = 0;
+ ss.ss_family = AF_UNIX;
+ }
+ else if (strncmp(str2, "uxdg@", 5) == 0) {
+ str2 += 5;
+ abstract = 0;
+ ss.ss_family = AF_UNIX;
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_DGRAM;
+ }
+ else if (strncmp(str2, "uxst@", 5) == 0) {
+ str2 += 5;
+ abstract = 0;
+ ss.ss_family = AF_UNIX;
+ proto_type = PROTO_TYPE_STREAM;
+ ctrl_type = SOCK_STREAM;
+ }
+ else if (strncmp(str2, "abns@", 5) == 0) {
+ str2 += 5;
+ abstract = 1;
+ ss.ss_family = AF_UNIX;
+ }
+ else if (strncmp(str2, "ip@", 3) == 0) {
+ str2 += 3;
+ ss.ss_family = AF_UNSPEC;
+ }
+ else if (strncmp(str2, "ipv4@", 5) == 0) {
+ str2 += 5;
+ ss.ss_family = AF_INET;
+ }
+ else if (strncmp(str2, "ipv6@", 5) == 0) {
+ str2 += 5;
+ ss.ss_family = AF_INET6;
+ }
+ else if (strncmp(str2, "tcp4@", 5) == 0) {
+ str2 += 5;
+ ss.ss_family = AF_INET;
+ proto_type = PROTO_TYPE_STREAM;
+ ctrl_type = SOCK_STREAM;
+ }
+ else if (strncmp(str2, "udp4@", 5) == 0) {
+ str2 += 5;
+ ss.ss_family = AF_INET;
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_DGRAM;
+ }
+ else if (strncmp(str2, "tcp6@", 5) == 0) {
+ str2 += 5;
+ ss.ss_family = AF_INET6;
+ proto_type = PROTO_TYPE_STREAM;
+ ctrl_type = SOCK_STREAM;
+ }
+ else if (strncmp(str2, "udp6@", 5) == 0) {
+ str2 += 5;
+ ss.ss_family = AF_INET6;
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_DGRAM;
+ }
+ else if (strncmp(str2, "tcp@", 4) == 0) {
+ str2 += 4;
+ ss.ss_family = AF_UNSPEC;
+ proto_type = PROTO_TYPE_STREAM;
+ ctrl_type = SOCK_STREAM;
+ }
+ else if (strncmp(str2, "udp@", 4) == 0) {
+ str2 += 4;
+ ss.ss_family = AF_UNSPEC;
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_DGRAM;
+ }
+ else if (strncmp(str2, "quic4@", 6) == 0) {
+ str2 += 6;
+ ss.ss_family = AF_INET;
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_STREAM;
+ }
+ else if (strncmp(str2, "quic6@", 6) == 0) {
+ str2 += 6;
+ ss.ss_family = AF_INET6;
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_STREAM;
+ }
+ else if (strncmp(str2, "fd@", 3) == 0) {
+ str2 += 3;
+ ss.ss_family = AF_CUST_EXISTING_FD;
+ }
+ else if (strncmp(str2, "sockpair@", 9) == 0) {
+ str2 += 9;
+ ss.ss_family = AF_CUST_SOCKPAIR;
+ }
+ else if (*str2 == '/') {
+ ss.ss_family = AF_UNIX;
+ }
+ else
+ ss.ss_family = AF_UNSPEC;
+
+ if (ss.ss_family == AF_CUST_SOCKPAIR) {
+ struct sockaddr_storage ss2;
+ socklen_t addr_len;
+ char *endptr;
+
+ new_fd = strtol(str2, &endptr, 10);
+ if (!*str2 || new_fd < 0 || *endptr) {
+ memprintf(err, "file descriptor '%s' is not a valid integer in '%s'\n", str2, str);
+ goto out;
+ }
+
+ /* just verify that it's a socket */
+ addr_len = sizeof(ss2);
+ if (getsockname(new_fd, (struct sockaddr *)&ss2, &addr_len) == -1) {
+ memprintf(err, "cannot use file descriptor '%d' : %s.\n", new_fd, strerror(errno));
+ goto out;
+ }
+
+ ((struct sockaddr_in *)&ss)->sin_addr.s_addr = new_fd;
+ ((struct sockaddr_in *)&ss)->sin_port = 0;
+ }
+ else if (ss.ss_family == AF_CUST_EXISTING_FD) {
+ char *endptr;
+
+ new_fd = strtol(str2, &endptr, 10);
+ if (!*str2 || new_fd < 0 || *endptr) {
+ memprintf(err, "file descriptor '%s' is not a valid integer in '%s'\n", str2, str);
+ goto out;
+ }
+
+ if (opts & PA_O_SOCKET_FD) {
+ socklen_t addr_len;
+ int type;
+
+ addr_len = sizeof(ss);
+ if (getsockname(new_fd, (struct sockaddr *)&ss, &addr_len) == -1) {
+ memprintf(err, "cannot use file descriptor '%d' : %s.\n", new_fd, strerror(errno));
+ goto out;
+ }
+
+ addr_len = sizeof(type);
+ if (getsockopt(new_fd, SOL_SOCKET, SO_TYPE, &type, &addr_len) != 0 ||
+ (type == SOCK_STREAM) != (proto_type == PROTO_TYPE_STREAM)) {
+ memprintf(err, "socket on file descriptor '%d' is of the wrong type.\n", new_fd);
+ goto out;
+ }
+
+ porta = portl = porth = get_host_port(&ss);
+ } else if (opts & PA_O_RAW_FD) {
+ ((struct sockaddr_in *)&ss)->sin_addr.s_addr = new_fd;
+ ((struct sockaddr_in *)&ss)->sin_port = 0;
+ } else {
+ memprintf(err, "a file descriptor is not acceptable here in '%s'\n", str);
+ goto out;
+ }
+ }
+ else if (ss.ss_family == AF_UNIX) {
+ struct sockaddr_un *un = (struct sockaddr_un *)&ss;
+ int prefix_path_len;
+ int max_path_len;
+ int adr_len;
+
+ /* complete unix socket path name during startup or soft-restart is
+ * <unix_bind_prefix><path>.<pid>.<bak|tmp>
+ */
+ prefix_path_len = (pfx && !abstract) ? strlen(pfx) : 0;
+ max_path_len = (sizeof(un->sun_path) - 1) -
+ (abstract ? 0 : prefix_path_len + 1 + 5 + 1 + 3);
+
+ adr_len = strlen(str2);
+ if (adr_len > max_path_len) {
+ memprintf(err, "socket path '%s' too long (max %d)\n", str, max_path_len);
+ goto out;
+ }
+
+ /* when abstract==1, we skip the first zero and copy all bytes except the trailing zero */
+ memset(un->sun_path, 0, sizeof(un->sun_path));
+ if (prefix_path_len)
+ memcpy(un->sun_path, pfx, prefix_path_len);
+ memcpy(un->sun_path + prefix_path_len + abstract, str2, adr_len + 1 - abstract);
+ }
+ else { /* IPv4 and IPv6 */
+ char *end = str2 + strlen(str2);
+ char *chr;
+
+ /* search for : or ] whatever comes first */
+ for (chr = end-1; chr > str2; chr--) {
+ if (*chr == ']' || *chr == ':')
+ break;
+ }
+
+ if (*chr == ':') {
+ /* Found a colon before a closing-bracket, must be a port separator.
+ * This guarantee backward compatibility.
+ */
+ if (!(opts & PA_O_PORT_OK)) {
+ memprintf(err, "port specification not permitted here in '%s'", str);
+ goto out;
+ }
+ *chr++ = '\0';
+ port1 = chr;
+ }
+ else {
+ /* Either no colon and no closing-bracket
+ * or directly ending with a closing-bracket.
+ * However, no port.
+ */
+ if (opts & PA_O_PORT_MAND) {
+ memprintf(err, "missing port specification in '%s'", str);
+ goto out;
+ }
+ port1 = "";
+ }
+
+ if (isdigit((unsigned char)*port1)) { /* single port or range */
+ port2 = strchr(port1, '-');
+ if (port2) {
+ if (!(opts & PA_O_PORT_RANGE)) {
+ memprintf(err, "port range not permitted here in '%s'", str);
+ goto out;
+ }
+ *port2++ = '\0';
+ }
+ else
+ port2 = port1;
+ portl = atoi(port1);
+ porth = atoi(port2);
+
+ if (portl < !!(opts & PA_O_PORT_MAND) || portl > 65535) {
+ memprintf(err, "invalid port '%s'", port1);
+ goto out;
+ }
+
+ if (porth < !!(opts & PA_O_PORT_MAND) || porth > 65535) {
+ memprintf(err, "invalid port '%s'", port2);
+ goto out;
+ }
+
+ if (portl > porth) {
+ memprintf(err, "invalid port range '%d-%d'", portl, porth);
+ goto out;
+ }
+
+ porta = portl;
+ }
+ else if (*port1 == '-') { /* negative offset */
+ if (!(opts & PA_O_PORT_OFS)) {
+ memprintf(err, "port offset not permitted here in '%s'", str);
+ goto out;
+ }
+ portl = atoi(port1 + 1);
+ porta = -portl;
+ }
+ else if (*port1 == '+') { /* positive offset */
+ if (!(opts & PA_O_PORT_OFS)) {
+ memprintf(err, "port offset not permitted here in '%s'", str);
+ goto out;
+ }
+ porth = atoi(port1 + 1);
+ porta = porth;
+ }
+ else if (*port1) { /* other any unexpected char */
+ memprintf(err, "invalid character '%c' in port number '%s' in '%s'\n", *port1, port1, str);
+ goto out;
+ }
+ else if (opts & PA_O_PORT_MAND) {
+ memprintf(err, "missing port specification in '%s'", str);
+ goto out;
+ }
+
+ /* first try to parse the IP without resolving. If it fails, it
+ * tells us we need to keep a copy of the FQDN to resolve later
+ * and to enable DNS. In this case we can proceed if <fqdn> is
+ * set or if PA_O_RESOLVE is set, otherwise it's an error.
+ */
+ if (str2ip2(str2, &ss, 0) == NULL) {
+ if ((!(opts & PA_O_RESOLVE) && !fqdn) ||
+ ((opts & PA_O_RESOLVE) && str2ip2(str2, &ss, 1) == NULL)) {
+ memprintf(err, "invalid address: '%s' in '%s'\n", str2, str);
+ goto out;
+ }
+
+ if (fqdn) {
+ if (str2 != back)
+ memmove(back, str2, strlen(str2) + 1);
+ *fqdn = back;
+ back = NULL;
+ }
+ }
+ set_host_port(&ss, porta);
+ }
+
+ if (ctrl_type == SOCK_STREAM && !(opts & PA_O_STREAM)) {
+ memprintf(err, "stream-type address not acceptable in '%s'\n", str);
+ goto out;
+ }
+ else if (ctrl_type == SOCK_DGRAM && !(opts & PA_O_DGRAM)) {
+ memprintf(err, "dgram-type address not acceptable in '%s'\n", str);
+ goto out;
+ }
+
+ if (proto || (opts & PA_O_CONNECT)) {
+ /* Note: if the caller asks for a proto, we must find one,
+ * except if we inherit from a raw FD (family == AF_CUST_EXISTING_FD)
+ * orif we return with an fqdn that will resolve later,
+ * in which case the address is not known yet (this is only
+ * for servers actually).
+ */
+ new_proto = protocol_lookup(ss.ss_family,
+ proto_type,
+ ctrl_type == SOCK_DGRAM);
+
+ if (!new_proto && (!fqdn || !*fqdn) && (ss.ss_family != AF_CUST_EXISTING_FD)) {
+ memprintf(err, "unsupported %s protocol for %s family %d address '%s'%s",
+ (ctrl_type == SOCK_DGRAM) ? "datagram" : "stream",
+ (proto_type == PROTO_TYPE_DGRAM) ? "datagram" : "stream",
+ ss.ss_family,
+ str,
+#ifndef USE_QUIC
+ (ctrl_type == SOCK_STREAM && proto_type == PROTO_TYPE_DGRAM)
+ ? "; QUIC is not compiled in if this is what you were looking for."
+ : ""
+#else
+ ""
+#endif
+ );
+ goto out;
+ }
+
+ if ((opts & PA_O_CONNECT) && new_proto && !new_proto->connect) {
+ memprintf(err, "connect() not supported for this protocol family %d used by address '%s'", ss.ss_family, str);
+ goto out;
+ }
+ }
+
+ ret = &ss;
+ out:
+ if (port)
+ *port = porta;
+ if (low)
+ *low = portl;
+ if (high)
+ *high = porth;
+ if (fd)
+ *fd = new_fd;
+ if (proto)
+ *proto = new_proto;
+ free(back);
+ return ret;
+}
+
+/* converts <addr> and <port> into a string representation of the address and port. This is sort
+ * of an inverse of str2sa_range, with some restrictions. The supported families are AF_INET,
+ * AF_INET6, AF_UNIX, and AF_CUST_SOCKPAIR. If the family is unsopported NULL is returned.
+ * If map_ports is true, then the sign of the port is included in the output, to indicate it is
+ * relative to the incoming port. AF_INET and AF_INET6 will be in the form "<addr>:<port>".
+ * AF_UNIX will either be just the path (if using a pathname) or "abns@<path>" if it is abstract.
+ * AF_CUST_SOCKPAIR will be of the form "sockpair@<fd>".
+ *
+ * The returned char* is allocated, and it is the responsibility of the caller to free it.
+ */
+char * sa2str(const struct sockaddr_storage *addr, int port, int map_ports)
+{
+ char buffer[INET6_ADDRSTRLEN];
+ char *out = NULL;
+ const void *ptr;
+ const char *path;
+
+ switch (addr->ss_family) {
+ case AF_INET:
+ ptr = &((struct sockaddr_in *)addr)->sin_addr;
+ break;
+ case AF_INET6:
+ ptr = &((struct sockaddr_in6 *)addr)->sin6_addr;
+ break;
+ case AF_UNIX:
+ path = ((struct sockaddr_un *)addr)->sun_path;
+ if (path[0] == '\0') {
+ const int max_length = sizeof(struct sockaddr_un) - offsetof(struct sockaddr_un, sun_path) - 1;
+ return memprintf(&out, "abns@%.*s", max_length, path+1);
+ } else {
+ return strdup(path);
+ }
+ case AF_CUST_SOCKPAIR:
+ return memprintf(&out, "sockpair@%d", ((struct sockaddr_in *)addr)->sin_addr.s_addr);
+ default:
+ return NULL;
+ }
+ if (inet_ntop(addr->ss_family, ptr, buffer, sizeof(buffer)) == NULL) {
+ BUG_ON(errno == ENOSPC);
+ return NULL;
+ }
+ if (map_ports)
+ return memprintf(&out, "%s:%+d", buffer, port);
+ else
+ return memprintf(&out, "%s:%d", buffer, port);
+}
+
+
+/* converts <str> to a struct in_addr containing a network mask. It can be
+ * passed in dotted form (255.255.255.0) or in CIDR form (24). It returns 1
+ * if the conversion succeeds otherwise zero.
+ */
+int str2mask(const char *str, struct in_addr *mask)
+{
+ if (strchr(str, '.') != NULL) { /* dotted notation */
+ if (!inet_pton(AF_INET, str, mask))
+ return 0;
+ }
+ else { /* mask length */
+ char *err;
+ unsigned long len = strtol(str, &err, 10);
+
+ if (!*str || (err && *err) || (unsigned)len > 32)
+ return 0;
+
+ len2mask4(len, mask);
+ }
+ return 1;
+}
+
+/* converts <str> to a struct in6_addr containing a network mask. It can be
+ * passed in quadruplet form (ffff:ffff::) or in CIDR form (64). It returns 1
+ * if the conversion succeeds otherwise zero.
+ */
+int str2mask6(const char *str, struct in6_addr *mask)
+{
+ if (strchr(str, ':') != NULL) { /* quadruplet notation */
+ if (!inet_pton(AF_INET6, str, mask))
+ return 0;
+ }
+ else { /* mask length */
+ char *err;
+ unsigned long len = strtol(str, &err, 10);
+
+ if (!*str || (err && *err) || (unsigned)len > 128)
+ return 0;
+
+ len2mask6(len, mask);
+ }
+ return 1;
+}
+
+/* convert <cidr> to struct in_addr <mask>. It returns 1 if the conversion
+ * succeeds otherwise zero.
+ */
+int cidr2dotted(int cidr, struct in_addr *mask) {
+
+ if (cidr < 0 || cidr > 32)
+ return 0;
+
+ mask->s_addr = cidr ? htonl(~0UL << (32 - cidr)) : 0;
+ return 1;
+}
+
+/* Convert mask from bit length form to in_addr form.
+ * This function never fails.
+ */
+void len2mask4(int len, struct in_addr *addr)
+{
+ if (len >= 32) {
+ addr->s_addr = 0xffffffff;
+ return;
+ }
+ if (len <= 0) {
+ addr->s_addr = 0x00000000;
+ return;
+ }
+ addr->s_addr = 0xffffffff << (32 - len);
+ addr->s_addr = htonl(addr->s_addr);
+}
+
+/* Convert mask from bit length form to in6_addr form.
+ * This function never fails.
+ */
+void len2mask6(int len, struct in6_addr *addr)
+{
+ len2mask4(len, (struct in_addr *)&addr->s6_addr[0]); /* msb */
+ len -= 32;
+ len2mask4(len, (struct in_addr *)&addr->s6_addr[4]);
+ len -= 32;
+ len2mask4(len, (struct in_addr *)&addr->s6_addr[8]);
+ len -= 32;
+ len2mask4(len, (struct in_addr *)&addr->s6_addr[12]); /* lsb */
+}
+
+/*
+ * converts <str> to two struct in_addr* which must be pre-allocated.
+ * The format is "addr[/mask]", where "addr" cannot be empty, and mask
+ * is optional and either in the dotted or CIDR notation.
+ * Note: "addr" can also be a hostname. Returns 1 if OK, 0 if error.
+ */
+int str2net(const char *str, int resolve, struct in_addr *addr, struct in_addr *mask)
+{
+ __label__ out_free, out_err;
+ char *c, *s;
+ int ret_val;
+
+ s = strdup(str);
+ if (!s)
+ return 0;
+
+ memset(mask, 0, sizeof(*mask));
+ memset(addr, 0, sizeof(*addr));
+
+ if ((c = strrchr(s, '/')) != NULL) {
+ *c++ = '\0';
+ /* c points to the mask */
+ if (!str2mask(c, mask))
+ goto out_err;
+ }
+ else {
+ mask->s_addr = ~0U;
+ }
+ if (!inet_pton(AF_INET, s, addr)) {
+ struct hostent *he;
+
+ if (!resolve)
+ goto out_err;
+
+ if ((he = gethostbyname(s)) == NULL) {
+ goto out_err;
+ }
+ else
+ *addr = *(struct in_addr *) *(he->h_addr_list);
+ }
+
+ ret_val = 1;
+ out_free:
+ free(s);
+ return ret_val;
+ out_err:
+ ret_val = 0;
+ goto out_free;
+}
+
+
+/*
+ * converts <str> to two struct in6_addr* which must be pre-allocated.
+ * The format is "addr[/mask]", where "addr" cannot be empty, and mask
+ * is an optional number of bits (128 being the default).
+ * Returns 1 if OK, 0 if error.
+ */
+int str62net(const char *str, struct in6_addr *addr, unsigned char *mask)
+{
+ char *c, *s;
+ int ret_val = 0;
+ char *err;
+ unsigned long len = 128;
+
+ s = strdup(str);
+ if (!s)
+ return 0;
+
+ memset(mask, 0, sizeof(*mask));
+ memset(addr, 0, sizeof(*addr));
+
+ if ((c = strrchr(s, '/')) != NULL) {
+ *c++ = '\0'; /* c points to the mask */
+ if (!*c)
+ goto out_free;
+
+ len = strtoul(c, &err, 10);
+ if ((err && *err) || (unsigned)len > 128)
+ goto out_free;
+ }
+ *mask = len; /* OK we have a valid mask in <len> */
+
+ if (!inet_pton(AF_INET6, s, addr))
+ goto out_free;
+
+ ret_val = 1;
+ out_free:
+ free(s);
+ return ret_val;
+}
+
+
+/*
+ * Parse IPv4 address found in url. Return the number of bytes parsed. It
+ * expects exactly 4 numbers between 0 and 255 delimited by dots, and returns
+ * zero in case of mismatch.
+ */
+int url2ipv4(const char *addr, struct in_addr *dst)
+{
+ int saw_digit, octets, ch;
+ u_char tmp[4], *tp;
+ const char *cp = addr;
+
+ saw_digit = 0;
+ octets = 0;
+ *(tp = tmp) = 0;
+
+ while (*addr) {
+ unsigned char digit = (ch = *addr) - '0';
+ if (digit > 9 && ch != '.')
+ break;
+ addr++;
+ if (digit <= 9) {
+ u_int new = *tp * 10 + digit;
+ if (new > 255)
+ return 0;
+ *tp = new;
+ if (!saw_digit) {
+ if (++octets > 4)
+ return 0;
+ saw_digit = 1;
+ }
+ } else if (ch == '.' && saw_digit) {
+ if (octets == 4)
+ return 0;
+ *++tp = 0;
+ saw_digit = 0;
+ } else
+ return 0;
+ }
+
+ if (octets < 4)
+ return 0;
+
+ memcpy(&dst->s_addr, tmp, 4);
+ return addr - cp;
+}
+
+/*
+ * Resolve destination server from URL. Convert <str> to a sockaddr_storage.
+ * <out> contain the code of the detected scheme, the start and length of
+ * the hostname. Actually only http and https are supported. <out> can be NULL.
+ * This function returns the consumed length. It is useful if you parse complete
+ * url like http://host:port/path, because the consumed length corresponds to
+ * the first character of the path. If the conversion fails, it returns -1.
+ *
+ * This function tries to resolve the DNS name if haproxy is in starting mode.
+ * So, this function may be used during the configuration parsing.
+ */
+int url2sa(const char *url, int ulen, struct sockaddr_storage *addr, struct split_url *out)
+{
+ const char *curr = url, *cp = url;
+ const char *end;
+ int ret, url_code = 0;
+ unsigned long long int http_code = 0;
+ int default_port;
+ struct hostent *he;
+ char *p;
+
+ /* Firstly, try to find :// pattern */
+ while (curr < url+ulen && url_code != 0x3a2f2f) {
+ url_code = ((url_code & 0xffff) << 8);
+ url_code += (unsigned char)*curr++;
+ }
+
+ /* Secondly, if :// pattern is found, verify parsed stuff
+ * before pattern is matching our http pattern.
+ * If so parse ip address and port in uri.
+ *
+ * WARNING: Current code doesn't support dynamic async dns resolver.
+ */
+ if (url_code != 0x3a2f2f)
+ return -1;
+
+ /* Copy scheme, and utrn to lower case. */
+ while (cp < curr - 3)
+ http_code = (http_code << 8) + *cp++;
+ http_code |= 0x2020202020202020ULL; /* Turn everything to lower case */
+
+ /* HTTP or HTTPS url matching */
+ if (http_code == 0x2020202068747470ULL) {
+ default_port = 80;
+ if (out)
+ out->scheme = SCH_HTTP;
+ }
+ else if (http_code == 0x2020206874747073ULL) {
+ default_port = 443;
+ if (out)
+ out->scheme = SCH_HTTPS;
+ }
+ else
+ return -1;
+
+ /* If the next char is '[', the host address is IPv6. */
+ if (*curr == '[') {
+ curr++;
+
+ /* Check trash size */
+ if (trash.size < ulen)
+ return -1;
+
+ /* Look for ']' and copy the address in a trash buffer. */
+ p = trash.area;
+ for (end = curr;
+ end < url + ulen && *end != ']';
+ end++, p++)
+ *p = *end;
+ if (*end != ']')
+ return -1;
+ *p = '\0';
+
+ /* Update out. */
+ if (out) {
+ out->host = curr;
+ out->host_len = end - curr;
+ }
+
+ /* Try IPv6 decoding. */
+ if (!inet_pton(AF_INET6, trash.area, &((struct sockaddr_in6 *)addr)->sin6_addr))
+ return -1;
+ end++;
+
+ /* Decode port. */
+ if (end < url + ulen && *end == ':') {
+ end++;
+ default_port = read_uint(&end, url + ulen);
+ }
+ ((struct sockaddr_in6 *)addr)->sin6_port = htons(default_port);
+ ((struct sockaddr_in6 *)addr)->sin6_family = AF_INET6;
+ return end - url;
+ }
+ else {
+ /* we need to copy the string into the trash because url2ipv4
+ * needs a \0 at the end of the string */
+ if (trash.size < ulen)
+ return -1;
+
+ memcpy(trash.area, curr, ulen - (curr - url));
+ trash.area[ulen - (curr - url)] = '\0';
+
+ /* We are looking for IP address. If you want to parse and
+ * resolve hostname found in url, you can use str2sa_range(), but
+ * be warned this can slow down global daemon performances
+ * while handling lagging dns responses.
+ */
+ ret = url2ipv4(trash.area, &((struct sockaddr_in *)addr)->sin_addr);
+ if (ret) {
+ /* Update out. */
+ if (out) {
+ out->host = curr;
+ out->host_len = ret;
+ }
+
+ curr += ret;
+
+ /* Decode port. */
+ if (curr < url + ulen && *curr == ':') {
+ curr++;
+ default_port = read_uint(&curr, url + ulen);
+ }
+ ((struct sockaddr_in *)addr)->sin_port = htons(default_port);
+
+ /* Set family. */
+ ((struct sockaddr_in *)addr)->sin_family = AF_INET;
+ return curr - url;
+ }
+ else if (global.mode & MODE_STARTING) {
+ /* The IPv4 and IPv6 decoding fails, maybe the url contain name. Try to execute
+ * synchronous DNS request only if HAProxy is in the start state.
+ */
+
+ /* look for : or / or end */
+ for (end = curr;
+ end < url + ulen && *end != '/' && *end != ':';
+ end++);
+ memcpy(trash.area, curr, end - curr);
+ trash.area[end - curr] = '\0';
+
+ /* try to resolve an IPv4/IPv6 hostname */
+ he = gethostbyname(trash.area);
+ if (!he)
+ return -1;
+
+ /* Update out. */
+ if (out) {
+ out->host = curr;
+ out->host_len = end - curr;
+ }
+
+ /* Decode port. */
+ if (end < url + ulen && *end == ':') {
+ end++;
+ default_port = read_uint(&end, url + ulen);
+ }
+
+ /* Copy IP address, set port and family. */
+ switch (he->h_addrtype) {
+ case AF_INET:
+ ((struct sockaddr_in *)addr)->sin_addr = *(struct in_addr *) *(he->h_addr_list);
+ ((struct sockaddr_in *)addr)->sin_port = htons(default_port);
+ ((struct sockaddr_in *)addr)->sin_family = AF_INET;
+ return end - url;
+
+ case AF_INET6:
+ ((struct sockaddr_in6 *)addr)->sin6_addr = *(struct in6_addr *) *(he->h_addr_list);
+ ((struct sockaddr_in6 *)addr)->sin6_port = htons(default_port);
+ ((struct sockaddr_in6 *)addr)->sin6_family = AF_INET6;
+ return end - url;
+ }
+ }
+ }
+ return -1;
+}
+
+/* Tries to convert a sockaddr_storage address to text form. Upon success, the
+ * address family is returned so that it's easy for the caller to adapt to the
+ * output format. Zero is returned if the address family is not supported. -1
+ * is returned upon error, with errno set. AF_INET, AF_INET6 and AF_UNIX are
+ * supported.
+ */
+int addr_to_str(const struct sockaddr_storage *addr, char *str, int size)
+{
+
+ const void *ptr;
+
+ if (size < 5)
+ return 0;
+ *str = '\0';
+
+ switch (addr->ss_family) {
+ case AF_INET:
+ ptr = &((struct sockaddr_in *)addr)->sin_addr;
+ break;
+ case AF_INET6:
+ ptr = &((struct sockaddr_in6 *)addr)->sin6_addr;
+ break;
+ case AF_UNIX:
+ memcpy(str, "unix", 5);
+ return addr->ss_family;
+ default:
+ return 0;
+ }
+
+ if (inet_ntop(addr->ss_family, ptr, str, size))
+ return addr->ss_family;
+
+ /* failed */
+ return -1;
+}
+
+/* Tries to convert a sockaddr_storage port to text form. Upon success, the
+ * address family is returned so that it's easy for the caller to adapt to the
+ * output format. Zero is returned if the address family is not supported. -1
+ * is returned upon error, with errno set. AF_INET, AF_INET6 and AF_UNIX are
+ * supported.
+ */
+int port_to_str(const struct sockaddr_storage *addr, char *str, int size)
+{
+
+ uint16_t port;
+
+
+ if (size < 6)
+ return 0;
+ *str = '\0';
+
+ switch (addr->ss_family) {
+ case AF_INET:
+ port = ((struct sockaddr_in *)addr)->sin_port;
+ break;
+ case AF_INET6:
+ port = ((struct sockaddr_in6 *)addr)->sin6_port;
+ break;
+ case AF_UNIX:
+ memcpy(str, "unix", 5);
+ return addr->ss_family;
+ default:
+ return 0;
+ }
+
+ snprintf(str, size, "%u", ntohs(port));
+ return addr->ss_family;
+}
+
+/* check if the given address is local to the system or not. It will return
+ * -1 when it's not possible to know, 0 when the address is not local, 1 when
+ * it is. We don't want to iterate over all interfaces for this (and it is not
+ * portable). So instead we try to bind in UDP to this address on a free non
+ * privileged port and to connect to the same address, port 0 (connect doesn't
+ * care). If it succeeds, we own the address. Note that non-inet addresses are
+ * considered local since they're most likely AF_UNIX.
+ */
+int addr_is_local(const struct netns_entry *ns,
+ const struct sockaddr_storage *orig)
+{
+ struct sockaddr_storage addr;
+ int result;
+ int fd;
+
+ if (!is_inet_addr(orig))
+ return 1;
+
+ memcpy(&addr, orig, sizeof(addr));
+ set_host_port(&addr, 0);
+
+ fd = my_socketat(ns, addr.ss_family, SOCK_DGRAM, IPPROTO_UDP);
+ if (fd < 0)
+ return -1;
+
+ result = -1;
+ if (bind(fd, (struct sockaddr *)&addr, get_addr_len(&addr)) == 0) {
+ if (connect(fd, (struct sockaddr *)&addr, get_addr_len(&addr)) == -1)
+ result = 0; // fail, non-local address
+ else
+ result = 1; // success, local address
+ }
+ else {
+ if (errno == EADDRNOTAVAIL)
+ result = 0; // definitely not local :-)
+ }
+ close(fd);
+
+ return result;
+}
+
+/* will try to encode the string <string> replacing all characters tagged in
+ * <map> with the hexadecimal representation of their ASCII-code (2 digits)
+ * prefixed by <escape>, and will store the result between <start> (included)
+ * and <stop> (excluded), and will always terminate the string with a '\0'
+ * before <stop>. The position of the '\0' is returned if the conversion
+ * completes. If bytes are missing between <start> and <stop>, then the
+ * conversion will be incomplete and truncated. If <stop> <= <start>, the '\0'
+ * cannot even be stored so we return <start> without writing the 0.
+ * The input string must also be zero-terminated.
+ */
+const char hextab[16] = "0123456789ABCDEF";
+char *encode_string(char *start, char *stop,
+ const char escape, const long *map,
+ const char *string)
+{
+ if (start < stop) {
+ stop--; /* reserve one byte for the final '\0' */
+ while (start < stop && *string != '\0') {
+ if (!ha_bit_test((unsigned char)(*string), map))
+ *start++ = *string;
+ else {
+ if (start + 3 >= stop)
+ break;
+ *start++ = escape;
+ *start++ = hextab[(*string >> 4) & 15];
+ *start++ = hextab[*string & 15];
+ }
+ string++;
+ }
+ *start = '\0';
+ }
+ return start;
+}
+
+/*
+ * Same behavior as encode_string() above, except that it encodes chunk
+ * <chunk> instead of a string.
+ */
+char *encode_chunk(char *start, char *stop,
+ const char escape, const long *map,
+ const struct buffer *chunk)
+{
+ char *str = chunk->area;
+ char *end = chunk->area + chunk->data;
+
+ if (start < stop) {
+ stop--; /* reserve one byte for the final '\0' */
+ while (start < stop && str < end) {
+ if (!ha_bit_test((unsigned char)(*str), map))
+ *start++ = *str;
+ else {
+ if (start + 3 >= stop)
+ break;
+ *start++ = escape;
+ *start++ = hextab[(*str >> 4) & 15];
+ *start++ = hextab[*str & 15];
+ }
+ str++;
+ }
+ *start = '\0';
+ }
+ return start;
+}
+
+/*
+ * Tries to prefix characters tagged in the <map> with the <escape>
+ * character. The input <string> is processed until string_stop
+ * is reached or NULL-byte is encountered. The result will
+ * be stored between <start> (included) and <stop> (excluded). This
+ * function will always try to terminate the resulting string with a '\0'
+ * before <stop>, and will return its position if the conversion
+ * completes.
+ */
+char *escape_string(char *start, char *stop,
+ const char escape, const long *map,
+ const char *string, const char *string_stop)
+{
+ if (start < stop) {
+ stop--; /* reserve one byte for the final '\0' */
+ while (start < stop && string < string_stop && *string != '\0') {
+ if (!ha_bit_test((unsigned char)(*string), map))
+ *start++ = *string;
+ else {
+ if (start + 2 >= stop)
+ break;
+ *start++ = escape;
+ *start++ = *string;
+ }
+ string++;
+ }
+ *start = '\0';
+ }
+ return start;
+}
+
+/*
+ * Tries to prefix characters tagged in the <map> with the <escape>
+ * character. <chunk> contains the input to be escaped. The result will be
+ * stored between <start> (included) and <stop> (excluded). The function
+ * will always try to terminate the resulting string with a '\0' before
+ * <stop>, and will return its position if the conversion completes.
+ */
+char *escape_chunk(char *start, char *stop,
+ const char escape, const long *map,
+ const struct buffer *chunk)
+{
+ char *str = chunk->area;
+ char *end = chunk->area + chunk->data;
+
+ if (start < stop) {
+ stop--; /* reserve one byte for the final '\0' */
+ while (start < stop && str < end) {
+ if (!ha_bit_test((unsigned char)(*str), map))
+ *start++ = *str;
+ else {
+ if (start + 2 >= stop)
+ break;
+ *start++ = escape;
+ *start++ = *str;
+ }
+ str++;
+ }
+ *start = '\0';
+ }
+ return start;
+}
+
+/* Check a string for using it in a CSV output format. If the string contains
+ * one of the following four char <">, <,>, CR or LF, the string is
+ * encapsulated between <"> and the <"> are escaped by a <""> sequence.
+ * <str> is the input string to be escaped. The function assumes that
+ * the input string is null-terminated.
+ *
+ * If <quote> is 0, the result is returned escaped but without double quote.
+ * It is useful if the escaped string is used between double quotes in the
+ * format.
+ *
+ * printf("..., \"%s\", ...\r\n", csv_enc(str, 0, &trash));
+ *
+ * If <quote> is 1, the converter puts the quotes only if any reserved character
+ * is present. If <quote> is 2, the converter always puts the quotes.
+ *
+ * <output> is a struct buffer used for storing the output string.
+ *
+ * The function returns the converted string on its output. If an error
+ * occurs, the function returns an empty string. This type of output is useful
+ * for using the function directly as printf() argument.
+ *
+ * If the output buffer is too short to contain the input string, the result
+ * is truncated.
+ *
+ * This function appends the encoding to the existing output chunk, and it
+ * guarantees that it starts immediately at the first available character of
+ * the chunk. Please use csv_enc() instead if you want to replace the output
+ * chunk.
+ */
+const char *csv_enc_append(const char *str, int quote, struct buffer *output)
+{
+ char *end = output->area + output->size;
+ char *out = output->area + output->data;
+ char *ptr = out;
+
+ if (quote == 1) {
+ /* automatic quoting: first verify if we'll have to quote the string */
+ if (!strpbrk(str, "\n\r,\""))
+ quote = 0;
+ }
+
+ if (quote)
+ *ptr++ = '"';
+
+ while (*str && ptr < end - 2) { /* -2 for reserving space for <"> and \0. */
+ *ptr = *str;
+ if (*str == '"') {
+ ptr++;
+ if (ptr >= end - 2) {
+ ptr--;
+ break;
+ }
+ *ptr = '"';
+ }
+ ptr++;
+ str++;
+ }
+
+ if (quote)
+ *ptr++ = '"';
+
+ *ptr = '\0';
+ output->data = ptr - output->area;
+ return out;
+}
+
+/* Decode an URL-encoded string in-place. The resulting string might
+ * be shorter. If some forbidden characters are found, the conversion is
+ * aborted, the string is truncated before the issue and a negative value is
+ * returned, otherwise the operation returns the length of the decoded string.
+ * If the 'in_form' argument is non-nul the string is assumed to be part of
+ * an "application/x-www-form-urlencoded" encoded string, and the '+' will be
+ * turned to a space. If it's zero, this will only be done after a question
+ * mark ('?').
+ */
+int url_decode(char *string, int in_form)
+{
+ char *in, *out;
+ int ret = -1;
+
+ in = string;
+ out = string;
+ while (*in) {
+ switch (*in) {
+ case '+' :
+ *out++ = in_form ? ' ' : *in;
+ break;
+ case '%' :
+ if (!ishex(in[1]) || !ishex(in[2]))
+ goto end;
+ *out++ = (hex2i(in[1]) << 4) + hex2i(in[2]);
+ in += 2;
+ break;
+ case '?':
+ in_form = 1;
+ /* fall through */
+ default:
+ *out++ = *in;
+ break;
+ }
+ in++;
+ }
+ ret = out - string; /* success */
+ end:
+ *out = 0;
+ return ret;
+}
+
+unsigned int str2ui(const char *s)
+{
+ return __str2ui(s);
+}
+
+unsigned int str2uic(const char *s)
+{
+ return __str2uic(s);
+}
+
+unsigned int strl2ui(const char *s, int len)
+{
+ return __strl2ui(s, len);
+}
+
+unsigned int strl2uic(const char *s, int len)
+{
+ return __strl2uic(s, len);
+}
+
+unsigned int read_uint(const char **s, const char *end)
+{
+ return __read_uint(s, end);
+}
+
+/* This function reads an unsigned integer from the string pointed to by <s> and
+ * returns it. The <s> pointer is adjusted to point to the first unread char. The
+ * function automatically stops at <end>. If the number overflows, the 2^64-1
+ * value is returned.
+ */
+unsigned long long int read_uint64(const char **s, const char *end)
+{
+ const char *ptr = *s;
+ unsigned long long int i = 0, tmp;
+ unsigned int j;
+
+ while (ptr < end) {
+
+ /* read next char */
+ j = *ptr - '0';
+ if (j > 9)
+ goto read_uint64_end;
+
+ /* add char to the number and check overflow. */
+ tmp = i * 10;
+ if (tmp / 10 != i) {
+ i = ULLONG_MAX;
+ goto read_uint64_eat;
+ }
+ if (ULLONG_MAX - tmp < j) {
+ i = ULLONG_MAX;
+ goto read_uint64_eat;
+ }
+ i = tmp + j;
+ ptr++;
+ }
+read_uint64_eat:
+ /* eat each numeric char */
+ while (ptr < end) {
+ if ((unsigned int)(*ptr - '0') > 9)
+ break;
+ ptr++;
+ }
+read_uint64_end:
+ *s = ptr;
+ return i;
+}
+
+/* This function reads an integer from the string pointed to by <s> and returns
+ * it. The <s> pointer is adjusted to point to the first unread char. The function
+ * automatically stops at <end>. Il the number is bigger than 2^63-2, the 2^63-1
+ * value is returned. If the number is lowest than -2^63-1, the -2^63 value is
+ * returned.
+ */
+long long int read_int64(const char **s, const char *end)
+{
+ unsigned long long int i = 0;
+ int neg = 0;
+
+ /* Look for minus char. */
+ if (**s == '-') {
+ neg = 1;
+ (*s)++;
+ }
+ else if (**s == '+')
+ (*s)++;
+
+ /* convert as positive number. */
+ i = read_uint64(s, end);
+
+ if (neg) {
+ if (i > 0x8000000000000000ULL)
+ return LLONG_MIN;
+ return -i;
+ }
+ if (i > 0x7fffffffffffffffULL)
+ return LLONG_MAX;
+ return i;
+}
+
+/* This one is 7 times faster than strtol() on athlon with checks.
+ * It returns the value of the number composed of all valid digits read,
+ * and can process negative numbers too.
+ */
+int strl2ic(const char *s, int len)
+{
+ int i = 0;
+ int j, k;
+
+ if (len > 0) {
+ if (*s != '-') {
+ /* positive number */
+ while (len-- > 0) {
+ j = (*s++) - '0';
+ k = i * 10;
+ if (j > 9)
+ break;
+ i = k + j;
+ }
+ } else {
+ /* negative number */
+ s++;
+ while (--len > 0) {
+ j = (*s++) - '0';
+ k = i * 10;
+ if (j > 9)
+ break;
+ i = k - j;
+ }
+ }
+ }
+ return i;
+}
+
+
+/* This function reads exactly <len> chars from <s> and converts them to a
+ * signed integer which it stores into <ret>. It accurately detects any error
+ * (truncated string, invalid chars, overflows). It is meant to be used in
+ * applications designed for hostile environments. It returns zero when the
+ * number has successfully been converted, non-zero otherwise. When an error
+ * is returned, the <ret> value is left untouched. It is yet 5 to 40 times
+ * faster than strtol().
+ */
+int strl2irc(const char *s, int len, int *ret)
+{
+ int i = 0;
+ int j;
+
+ if (!len)
+ return 1;
+
+ if (*s != '-') {
+ /* positive number */
+ while (len-- > 0) {
+ j = (*s++) - '0';
+ if (j > 9) return 1; /* invalid char */
+ if (i > INT_MAX / 10) return 1; /* check for multiply overflow */
+ i = i * 10;
+ if (i + j < i) return 1; /* check for addition overflow */
+ i = i + j;
+ }
+ } else {
+ /* negative number */
+ s++;
+ while (--len > 0) {
+ j = (*s++) - '0';
+ if (j > 9) return 1; /* invalid char */
+ if (i < INT_MIN / 10) return 1; /* check for multiply overflow */
+ i = i * 10;
+ if (i - j > i) return 1; /* check for subtract overflow */
+ i = i - j;
+ }
+ }
+ *ret = i;
+ return 0;
+}
+
+
+/* This function reads exactly <len> chars from <s> and converts them to a
+ * signed integer which it stores into <ret>. It accurately detects any error
+ * (truncated string, invalid chars, overflows). It is meant to be used in
+ * applications designed for hostile environments. It returns zero when the
+ * number has successfully been converted, non-zero otherwise. When an error
+ * is returned, the <ret> value is left untouched. It is about 3 times slower
+ * than strl2irc().
+ */
+
+int strl2llrc(const char *s, int len, long long *ret)
+{
+ long long i = 0;
+ int j;
+
+ if (!len)
+ return 1;
+
+ if (*s != '-') {
+ /* positive number */
+ while (len-- > 0) {
+ j = (*s++) - '0';
+ if (j > 9) return 1; /* invalid char */
+ if (i > LLONG_MAX / 10LL) return 1; /* check for multiply overflow */
+ i = i * 10LL;
+ if (i + j < i) return 1; /* check for addition overflow */
+ i = i + j;
+ }
+ } else {
+ /* negative number */
+ s++;
+ while (--len > 0) {
+ j = (*s++) - '0';
+ if (j > 9) return 1; /* invalid char */
+ if (i < LLONG_MIN / 10LL) return 1; /* check for multiply overflow */
+ i = i * 10LL;
+ if (i - j > i) return 1; /* check for subtract overflow */
+ i = i - j;
+ }
+ }
+ *ret = i;
+ return 0;
+}
+
+/* This function is used with pat_parse_dotted_ver(). It converts a string
+ * composed by two number separated by a dot. Each part must contain in 16 bits
+ * because internally they will be represented as a 32-bit quantity stored in
+ * a 64-bit integer. It returns zero when the number has successfully been
+ * converted, non-zero otherwise. When an error is returned, the <ret> value
+ * is left untouched.
+ *
+ * "1.3" -> 0x0000000000010003
+ * "65535.65535" -> 0x00000000ffffffff
+ */
+int strl2llrc_dotted(const char *text, int len, long long *ret)
+{
+ const char *end = &text[len];
+ const char *p;
+ long long major, minor;
+
+ /* Look for dot. */
+ for (p = text; p < end; p++)
+ if (*p == '.')
+ break;
+
+ /* Convert major. */
+ if (strl2llrc(text, p - text, &major) != 0)
+ return 1;
+
+ /* Check major. */
+ if (major >= 65536)
+ return 1;
+
+ /* Convert minor. */
+ minor = 0;
+ if (p < end)
+ if (strl2llrc(p + 1, end - (p + 1), &minor) != 0)
+ return 1;
+
+ /* Check minor. */
+ if (minor >= 65536)
+ return 1;
+
+ /* Compose value. */
+ *ret = (major << 16) | (minor & 0xffff);
+ return 0;
+}
+
+/* This function parses a time value optionally followed by a unit suffix among
+ * "d", "h", "m", "s", "ms" or "us". It converts the value into the unit
+ * expected by the caller. The computation does its best to avoid overflows.
+ * The value is returned in <ret> if everything is fine, and a NULL is returned
+ * by the function. In case of error, a pointer to the error is returned and
+ * <ret> is left untouched. Values are automatically rounded up when needed.
+ * Values resulting in values larger than or equal to 2^31 after conversion are
+ * reported as an overflow as value PARSE_TIME_OVER. Non-null values resulting
+ * in an underflow are reported as an underflow as value PARSE_TIME_UNDER.
+ */
+const char *parse_time_err(const char *text, unsigned *ret, unsigned unit_flags)
+{
+ unsigned long long imult, idiv;
+ unsigned long long omult, odiv;
+ unsigned long long value, result;
+ const char *str = text;
+
+ if (!isdigit((unsigned char)*text))
+ return text;
+
+ omult = odiv = 1;
+
+ switch (unit_flags & TIME_UNIT_MASK) {
+ case TIME_UNIT_US: omult = 1000000; break;
+ case TIME_UNIT_MS: omult = 1000; break;
+ case TIME_UNIT_S: break;
+ case TIME_UNIT_MIN: odiv = 60; break;
+ case TIME_UNIT_HOUR: odiv = 3600; break;
+ case TIME_UNIT_DAY: odiv = 86400; break;
+ default: break;
+ }
+
+ value = 0;
+
+ while (1) {
+ unsigned int j;
+
+ j = *text - '0';
+ if (j > 9)
+ break;
+ text++;
+ value *= 10;
+ value += j;
+ }
+
+ imult = idiv = 1;
+ switch (*text) {
+ case '\0': /* no unit = default unit */
+ imult = omult = idiv = odiv = 1;
+ goto end;
+ case 's': /* second = unscaled unit */
+ break;
+ case 'u': /* microsecond : "us" */
+ if (text[1] == 's') {
+ idiv = 1000000;
+ text++;
+ break;
+ }
+ return text;
+ case 'm': /* millisecond : "ms" or minute: "m" */
+ if (text[1] == 's') {
+ idiv = 1000;
+ text++;
+ } else
+ imult = 60;
+ break;
+ case 'h': /* hour : "h" */
+ imult = 3600;
+ break;
+ case 'd': /* day : "d" */
+ imult = 86400;
+ break;
+ default:
+ return text;
+ }
+ if (*(++text) != '\0') {
+ ha_warning("unexpected character '%c' after the timer value '%s', only "
+ "(us=microseconds,ms=milliseconds,s=seconds,m=minutes,h=hours,d=days) are supported."
+ " This will be reported as an error in next versions.\n", *text, str);
+ }
+
+ end:
+ if (omult % idiv == 0) { omult /= idiv; idiv = 1; }
+ if (idiv % omult == 0) { idiv /= omult; omult = 1; }
+ if (imult % odiv == 0) { imult /= odiv; odiv = 1; }
+ if (odiv % imult == 0) { odiv /= imult; imult = 1; }
+
+ result = (value * (imult * omult) + (idiv * odiv - 1)) / (idiv * odiv);
+ if (result >= 0x80000000)
+ return PARSE_TIME_OVER;
+ if (!result && value)
+ return PARSE_TIME_UNDER;
+ *ret = result;
+ return NULL;
+}
+
+/* this function converts the string starting at <text> to an unsigned int
+ * stored in <ret>. If an error is detected, the pointer to the unexpected
+ * character is returned. If the conversion is successful, NULL is returned.
+ */
+const char *parse_size_err(const char *text, unsigned *ret) {
+ unsigned value = 0;
+
+ if (!isdigit((unsigned char)*text))
+ return text;
+
+ while (1) {
+ unsigned int j;
+
+ j = *text - '0';
+ if (j > 9)
+ break;
+ if (value > ~0U / 10)
+ return text;
+ value *= 10;
+ if (value > (value + j))
+ return text;
+ value += j;
+ text++;
+ }
+
+ switch (*text) {
+ case '\0':
+ break;
+ case 'K':
+ case 'k':
+ if (value > ~0U >> 10)
+ return text;
+ value = value << 10;
+ break;
+ case 'M':
+ case 'm':
+ if (value > ~0U >> 20)
+ return text;
+ value = value << 20;
+ break;
+ case 'G':
+ case 'g':
+ if (value > ~0U >> 30)
+ return text;
+ value = value << 30;
+ break;
+ default:
+ return text;
+ }
+
+ if (*text != '\0' && *++text != '\0')
+ return text;
+
+ *ret = value;
+ return NULL;
+}
+
+/*
+ * Parse binary string written in hexadecimal (source) and store the decoded
+ * result into binstr and set binstrlen to the length of binstr. Memory for
+ * binstr is allocated by the function. In case of error, returns 0 with an
+ * error message in err. In success case, it returns the consumed length.
+ */
+int parse_binary(const char *source, char **binstr, int *binstrlen, char **err)
+{
+ int len;
+ const char *p = source;
+ int i,j;
+ int alloc;
+
+ len = strlen(source);
+ if (len % 2) {
+ memprintf(err, "an even number of hex digit is expected");
+ return 0;
+ }
+
+ len = len >> 1;
+
+ if (!*binstr) {
+ *binstr = calloc(len, sizeof(**binstr));
+ if (!*binstr) {
+ memprintf(err, "out of memory while loading string pattern");
+ return 0;
+ }
+ alloc = 1;
+ }
+ else {
+ if (*binstrlen < len) {
+ memprintf(err, "no space available in the buffer. expect %d, provides %d",
+ len, *binstrlen);
+ return 0;
+ }
+ alloc = 0;
+ }
+ *binstrlen = len;
+
+ i = j = 0;
+ while (j < len) {
+ if (!ishex(p[i++]))
+ goto bad_input;
+ if (!ishex(p[i++]))
+ goto bad_input;
+ (*binstr)[j++] = (hex2i(p[i-2]) << 4) + hex2i(p[i-1]);
+ }
+ return len << 1;
+
+bad_input:
+ memprintf(err, "an hex digit is expected (found '%c')", p[i-1]);
+ if (alloc)
+ ha_free(binstr);
+ return 0;
+}
+
+/* copies at most <n> characters from <src> and always terminates with '\0' */
+char *my_strndup(const char *src, int n)
+{
+ int len = 0;
+ char *ret;
+
+ while (len < n && src[len])
+ len++;
+
+ ret = malloc(len + 1);
+ if (!ret)
+ return ret;
+ memcpy(ret, src, len);
+ ret[len] = '\0';
+ return ret;
+}
+
+/*
+ * search needle in haystack
+ * returns the pointer if found, returns NULL otherwise
+ */
+const void *my_memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen)
+{
+ const void *c = NULL;
+ unsigned char f;
+
+ if ((haystack == NULL) || (needle == NULL) || (haystacklen < needlelen))
+ return NULL;
+
+ f = *(char *)needle;
+ c = haystack;
+ while ((c = memchr(c, f, haystacklen - (c - haystack))) != NULL) {
+ if ((haystacklen - (c - haystack)) < needlelen)
+ return NULL;
+
+ if (memcmp(c, needle, needlelen) == 0)
+ return c;
+ ++c;
+ }
+ return NULL;
+}
+
+/* get length of the initial segment consisting entirely of bytes in <accept> */
+size_t my_memspn(const void *str, size_t len, const void *accept, size_t acceptlen)
+{
+ size_t ret = 0;
+
+ while (ret < len && memchr(accept, *((int *)str), acceptlen)) {
+ str++;
+ ret++;
+ }
+ return ret;
+}
+
+/* get length of the initial segment consisting entirely of bytes not in <rejcet> */
+size_t my_memcspn(const void *str, size_t len, const void *reject, size_t rejectlen)
+{
+ size_t ret = 0;
+
+ while (ret < len) {
+ if(memchr(reject, *((int *)str), rejectlen))
+ return ret;
+ str++;
+ ret++;
+ }
+ return ret;
+}
+
+/* This function returns the first unused key greater than or equal to <key> in
+ * ID tree <root>. Zero is returned if no place is found.
+ */
+unsigned int get_next_id(struct eb_root *root, unsigned int key)
+{
+ struct eb32_node *used;
+
+ do {
+ used = eb32_lookup_ge(root, key);
+ if (!used || used->key > key)
+ return key; /* key is available */
+ key++;
+ } while (key);
+ return key;
+}
+
+/* dump the full tree to <file> in DOT format for debugging purposes. Will
+ * optionally highlight node <subj> if found, depending on operation <op> :
+ * 0 : nothing
+ * >0 : insertion, node/leaf are surrounded in red
+ * <0 : removal, node/leaf are dashed with no background
+ * Will optionally add "desc" as a label on the graph if set and non-null.
+ */
+void eb32sc_to_file(FILE *file, struct eb_root *root, const struct eb32sc_node *subj, int op, const char *desc)
+{
+ struct eb32sc_node *node;
+ unsigned long scope = -1;
+
+ fprintf(file, "digraph ebtree {\n");
+
+ if (desc && *desc) {
+ fprintf(file,
+ " fontname=\"fixed\";\n"
+ " fontsize=8;\n"
+ " label=\"%s\";\n", desc);
+ }
+
+ fprintf(file,
+ " node [fontname=\"fixed\" fontsize=8 shape=\"box\" style=\"filled\" color=\"black\" fillcolor=\"white\"];\n"
+ " edge [fontname=\"fixed\" fontsize=8 style=\"solid\" color=\"magenta\" dir=\"forward\"];\n"
+ " \"%lx_n\" [label=\"root\\n%lx\"]\n", (long)eb_root_to_node(root), (long)root
+ );
+
+ fprintf(file, " \"%lx_n\" -> \"%lx_%c\" [taillabel=\"L\"];\n",
+ (long)eb_root_to_node(root),
+ (long)eb_root_to_node(eb_clrtag(root->b[0])),
+ eb_gettag(root->b[0]) == EB_LEAF ? 'l' : 'n');
+
+ node = eb32sc_first(root, scope);
+ while (node) {
+ if (node->node.node_p) {
+ /* node part is used */
+ fprintf(file, " \"%lx_n\" [label=\"%lx\\nkey=%u\\nscope=%lx\\nbit=%d\" fillcolor=\"lightskyblue1\" %s];\n",
+ (long)node, (long)node, node->key, node->node_s, node->node.bit,
+ (node == subj) ? (op < 0 ? "color=\"red\" style=\"dashed\"" : op > 0 ? "color=\"red\"" : "") : "");
+
+ fprintf(file, " \"%lx_n\" -> \"%lx_n\" [taillabel=\"%c\"];\n",
+ (long)node,
+ (long)eb_root_to_node(eb_clrtag(node->node.node_p)),
+ eb_gettag(node->node.node_p) ? 'R' : 'L');
+
+ fprintf(file, " \"%lx_n\" -> \"%lx_%c\" [taillabel=\"L\"];\n",
+ (long)node,
+ (long)eb_root_to_node(eb_clrtag(node->node.branches.b[0])),
+ eb_gettag(node->node.branches.b[0]) == EB_LEAF ? 'l' : 'n');
+
+ fprintf(file, " \"%lx_n\" -> \"%lx_%c\" [taillabel=\"R\"];\n",
+ (long)node,
+ (long)eb_root_to_node(eb_clrtag(node->node.branches.b[1])),
+ eb_gettag(node->node.branches.b[1]) == EB_LEAF ? 'l' : 'n');
+ }
+
+ fprintf(file, " \"%lx_l\" [label=\"%lx\\nkey=%u\\nscope=%lx\\npfx=%u\" fillcolor=\"yellow\" %s];\n",
+ (long)node, (long)node, node->key, node->leaf_s, node->node.pfx,
+ (node == subj) ? (op < 0 ? "color=\"red\" style=\"dashed\"" : op > 0 ? "color=\"red\"" : "") : "");
+
+ fprintf(file, " \"%lx_l\" -> \"%lx_n\" [taillabel=\"%c\"];\n",
+ (long)node,
+ (long)eb_root_to_node(eb_clrtag(node->node.leaf_p)),
+ eb_gettag(node->node.leaf_p) ? 'R' : 'L');
+ node = eb32sc_next(node, scope);
+ }
+ fprintf(file, "}\n");
+}
+
+/* This function compares a sample word possibly followed by blanks to another
+ * clean word. The compare is case-insensitive. 1 is returned if both are equal,
+ * otherwise zero. This intends to be used when checking HTTP headers for some
+ * values. Note that it validates a word followed only by blanks but does not
+ * validate a word followed by blanks then other chars.
+ */
+int word_match(const char *sample, int slen, const char *word, int wlen)
+{
+ if (slen < wlen)
+ return 0;
+
+ while (wlen) {
+ char c = *sample ^ *word;
+ if (c && c != ('A' ^ 'a'))
+ return 0;
+ sample++;
+ word++;
+ slen--;
+ wlen--;
+ }
+
+ while (slen) {
+ if (*sample != ' ' && *sample != '\t')
+ return 0;
+ sample++;
+ slen--;
+ }
+ return 1;
+}
+
+/* Converts any text-formatted IPv4 address to a host-order IPv4 address. It
+ * is particularly fast because it avoids expensive operations such as
+ * multiplies, which are optimized away at the end. It requires a properly
+ * formatted address though (3 points).
+ */
+unsigned int inetaddr_host(const char *text)
+{
+ const unsigned int ascii_zero = ('0' << 24) | ('0' << 16) | ('0' << 8) | '0';
+ register unsigned int dig100, dig10, dig1;
+ int s;
+ const char *p, *d;
+
+ dig1 = dig10 = dig100 = ascii_zero;
+ s = 24;
+
+ p = text;
+ while (1) {
+ if (((unsigned)(*p - '0')) <= 9) {
+ p++;
+ continue;
+ }
+
+ /* here, we have a complete byte between <text> and <p> (exclusive) */
+ if (p == text)
+ goto end;
+
+ d = p - 1;
+ dig1 |= (unsigned int)(*d << s);
+ if (d == text)
+ goto end;
+
+ d--;
+ dig10 |= (unsigned int)(*d << s);
+ if (d == text)
+ goto end;
+
+ d--;
+ dig100 |= (unsigned int)(*d << s);
+ end:
+ if (!s || *p != '.')
+ break;
+
+ s -= 8;
+ text = ++p;
+ }
+
+ dig100 -= ascii_zero;
+ dig10 -= ascii_zero;
+ dig1 -= ascii_zero;
+ return ((dig100 * 10) + dig10) * 10 + dig1;
+}
+
+/*
+ * Idem except the first unparsed character has to be passed in <stop>.
+ */
+unsigned int inetaddr_host_lim(const char *text, const char *stop)
+{
+ const unsigned int ascii_zero = ('0' << 24) | ('0' << 16) | ('0' << 8) | '0';
+ register unsigned int dig100, dig10, dig1;
+ int s;
+ const char *p, *d;
+
+ dig1 = dig10 = dig100 = ascii_zero;
+ s = 24;
+
+ p = text;
+ while (1) {
+ if (((unsigned)(*p - '0')) <= 9 && p < stop) {
+ p++;
+ continue;
+ }
+
+ /* here, we have a complete byte between <text> and <p> (exclusive) */
+ if (p == text)
+ goto end;
+
+ d = p - 1;
+ dig1 |= (unsigned int)(*d << s);
+ if (d == text)
+ goto end;
+
+ d--;
+ dig10 |= (unsigned int)(*d << s);
+ if (d == text)
+ goto end;
+
+ d--;
+ dig100 |= (unsigned int)(*d << s);
+ end:
+ if (!s || p == stop || *p != '.')
+ break;
+
+ s -= 8;
+ text = ++p;
+ }
+
+ dig100 -= ascii_zero;
+ dig10 -= ascii_zero;
+ dig1 -= ascii_zero;
+ return ((dig100 * 10) + dig10) * 10 + dig1;
+}
+
+/*
+ * Idem except the pointer to first unparsed byte is returned into <ret> which
+ * must not be NULL.
+ */
+unsigned int inetaddr_host_lim_ret(char *text, char *stop, char **ret)
+{
+ const unsigned int ascii_zero = ('0' << 24) | ('0' << 16) | ('0' << 8) | '0';
+ register unsigned int dig100, dig10, dig1;
+ int s;
+ char *p, *d;
+
+ dig1 = dig10 = dig100 = ascii_zero;
+ s = 24;
+
+ p = text;
+ while (1) {
+ if (((unsigned)(*p - '0')) <= 9 && p < stop) {
+ p++;
+ continue;
+ }
+
+ /* here, we have a complete byte between <text> and <p> (exclusive) */
+ if (p == text)
+ goto end;
+
+ d = p - 1;
+ dig1 |= (unsigned int)(*d << s);
+ if (d == text)
+ goto end;
+
+ d--;
+ dig10 |= (unsigned int)(*d << s);
+ if (d == text)
+ goto end;
+
+ d--;
+ dig100 |= (unsigned int)(*d << s);
+ end:
+ if (!s || p == stop || *p != '.')
+ break;
+
+ s -= 8;
+ text = ++p;
+ }
+
+ *ret = p;
+ dig100 -= ascii_zero;
+ dig10 -= ascii_zero;
+ dig1 -= ascii_zero;
+ return ((dig100 * 10) + dig10) * 10 + dig1;
+}
+
+/* Convert a fixed-length string to an IP address. Returns 0 in case of error,
+ * or the number of chars read in case of success. Maybe this could be replaced
+ * by one of the functions above. Also, apparently this function does not support
+ * hosts above 255 and requires exactly 4 octets.
+ * The destination is only modified on success.
+ */
+int buf2ip(const char *buf, size_t len, struct in_addr *dst)
+{
+ const char *addr;
+ int saw_digit, octets, ch;
+ u_char tmp[4], *tp;
+ const char *cp = buf;
+
+ saw_digit = 0;
+ octets = 0;
+ *(tp = tmp) = 0;
+
+ for (addr = buf; addr - buf < len; addr++) {
+ unsigned char digit = (ch = *addr) - '0';
+
+ if (digit > 9 && ch != '.')
+ break;
+
+ if (digit <= 9) {
+ u_int new = *tp * 10 + digit;
+
+ if (new > 255)
+ return 0;
+
+ *tp = new;
+
+ if (!saw_digit) {
+ if (++octets > 4)
+ return 0;
+ saw_digit = 1;
+ }
+ } else if (ch == '.' && saw_digit) {
+ if (octets == 4)
+ return 0;
+
+ *++tp = 0;
+ saw_digit = 0;
+ } else
+ return 0;
+ }
+
+ if (octets < 4)
+ return 0;
+
+ memcpy(&dst->s_addr, tmp, 4);
+ return addr - cp;
+}
+
+/* This function converts the string in <buf> of the len <len> to
+ * struct in6_addr <dst> which must be allocated by the caller.
+ * This function returns 1 in success case, otherwise zero.
+ * The destination is only modified on success.
+ */
+int buf2ip6(const char *buf, size_t len, struct in6_addr *dst)
+{
+ char null_term_ip6[INET6_ADDRSTRLEN + 1];
+ struct in6_addr out;
+
+ if (len > INET6_ADDRSTRLEN)
+ return 0;
+
+ memcpy(null_term_ip6, buf, len);
+ null_term_ip6[len] = '\0';
+
+ if (!inet_pton(AF_INET6, null_term_ip6, &out))
+ return 0;
+
+ *dst = out;
+ return 1;
+}
+
+/* To be used to quote config arg positions. Returns the short string at <ptr>
+ * surrounded by simple quotes if <ptr> is valid and non-empty, or "end of line"
+ * if ptr is NULL or empty. The string is locally allocated.
+ */
+const char *quote_arg(const char *ptr)
+{
+ static THREAD_LOCAL char val[32];
+ int i;
+
+ if (!ptr || !*ptr)
+ return "end of line";
+ val[0] = '\'';
+ for (i = 1; i < sizeof(val) - 2 && *ptr; i++)
+ val[i] = *ptr++;
+ val[i++] = '\'';
+ val[i] = '\0';
+ return val;
+}
+
+/* returns an operator among STD_OP_* for string <str> or < 0 if unknown */
+int get_std_op(const char *str)
+{
+ int ret = -1;
+
+ if (*str == 'e' && str[1] == 'q')
+ ret = STD_OP_EQ;
+ else if (*str == 'n' && str[1] == 'e')
+ ret = STD_OP_NE;
+ else if (*str == 'l') {
+ if (str[1] == 'e') ret = STD_OP_LE;
+ else if (str[1] == 't') ret = STD_OP_LT;
+ }
+ else if (*str == 'g') {
+ if (str[1] == 'e') ret = STD_OP_GE;
+ else if (str[1] == 't') ret = STD_OP_GT;
+ }
+
+ if (ret == -1 || str[2] != '\0')
+ return -1;
+ return ret;
+}
+
+/* hash a 32-bit integer to another 32-bit integer */
+unsigned int full_hash(unsigned int a)
+{
+ return __full_hash(a);
+}
+
+/* Return the bit position in mask <m> of the nth bit set of rank <r>, between
+ * 0 and LONGBITS-1 included, starting from the left. For example ranks 0,1,2,3
+ * for mask 0x55 will be 6, 4, 2 and 0 respectively. This algorithm is based on
+ * a popcount variant and is described here :
+ * https://graphics.stanford.edu/~seander/bithacks.html
+ */
+unsigned int mask_find_rank_bit(unsigned int r, unsigned long m)
+{
+ unsigned long a, b, c, d;
+ unsigned int s;
+ unsigned int t;
+
+ a = m - ((m >> 1) & ~0UL/3);
+ b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5);
+ c = (b + (b >> 4)) & ~0UL/0x11;
+ d = (c + (c >> 8)) & ~0UL/0x101;
+
+ r++; // make r be 1..64
+
+ t = 0;
+ s = LONGBITS;
+ if (s > 32) {
+ unsigned long d2 = (d >> 16) >> 16;
+ t = d2 + (d2 >> 16);
+ s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));
+ }
+
+ t = (d >> (s - 16)) & 0xff;
+ s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));
+ t = (c >> (s - 8)) & 0xf;
+ s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));
+ t = (b >> (s - 4)) & 0x7;
+ s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));
+ t = (a >> (s - 2)) & 0x3;
+ s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));
+ t = (m >> (s - 1)) & 0x1;
+ s -= ((t - r) & 256) >> 8;
+
+ return s - 1;
+}
+
+/* Same as mask_find_rank_bit() above but makes use of pre-computed bitmaps
+ * based on <m>, in <a..d>. These ones must be updated whenever <m> changes
+ * using mask_prep_rank_map() below.
+ */
+unsigned int mask_find_rank_bit_fast(unsigned int r, unsigned long m,
+ unsigned long a, unsigned long b,
+ unsigned long c, unsigned long d)
+{
+ unsigned int s;
+ unsigned int t;
+
+ r++; // make r be 1..64
+
+ t = 0;
+ s = LONGBITS;
+ if (s > 32) {
+ unsigned long d2 = (d >> 16) >> 16;
+ t = d2 + (d2 >> 16);
+ s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));
+ }
+
+ t = (d >> (s - 16)) & 0xff;
+ s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));
+ t = (c >> (s - 8)) & 0xf;
+ s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));
+ t = (b >> (s - 4)) & 0x7;
+ s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));
+ t = (a >> (s - 2)) & 0x3;
+ s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));
+ t = (m >> (s - 1)) & 0x1;
+ s -= ((t - r) & 256) >> 8;
+
+ return s - 1;
+}
+
+/* Prepare the bitmaps used by the fast implementation of the find_rank_bit()
+ * above.
+ */
+void mask_prep_rank_map(unsigned long m,
+ unsigned long *a, unsigned long *b,
+ unsigned long *c, unsigned long *d)
+{
+ *a = m - ((m >> 1) & ~0UL/3);
+ *b = (*a & ~0UL/5) + ((*a >> 2) & ~0UL/5);
+ *c = (*b + (*b >> 4)) & ~0UL/0x11;
+ *d = (*c + (*c >> 8)) & ~0UL/0x101;
+}
+
+/* Return non-zero if IPv4 address is part of the network,
+ * otherwise zero. Note that <addr> may not necessarily be aligned
+ * while the two other ones must.
+ */
+int in_net_ipv4(const void *addr, const struct in_addr *mask, const struct in_addr *net)
+{
+ struct in_addr addr_copy;
+
+ memcpy(&addr_copy, addr, sizeof(addr_copy));
+ return((addr_copy.s_addr & mask->s_addr) == (net->s_addr & mask->s_addr));
+}
+
+/* Return non-zero if IPv6 address is part of the network,
+ * otherwise zero. Note that <addr> may not necessarily be aligned
+ * while the two other ones must.
+ */
+int in_net_ipv6(const void *addr, const struct in6_addr *mask, const struct in6_addr *net)
+{
+ int i;
+ struct in6_addr addr_copy;
+
+ memcpy(&addr_copy, addr, sizeof(addr_copy));
+ for (i = 0; i < sizeof(struct in6_addr) / sizeof(int); i++)
+ if (((((int *)&addr_copy)[i] & ((int *)mask)[i])) !=
+ (((int *)net)[i] & ((int *)mask)[i]))
+ return 0;
+ return 1;
+}
+
+/* RFC 4291 prefix */
+const char rfc4291_pfx[] = { 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xFF, 0xFF };
+
+/* Map IPv4 address on IPv6 address, as specified in RFC 3513.
+ * Input and output may overlap.
+ */
+void v4tov6(struct in6_addr *sin6_addr, struct in_addr *sin_addr)
+{
+ struct in_addr tmp_addr;
+
+ tmp_addr.s_addr = sin_addr->s_addr;
+ memcpy(sin6_addr->s6_addr, rfc4291_pfx, sizeof(rfc4291_pfx));
+ memcpy(sin6_addr->s6_addr+12, &tmp_addr.s_addr, 4);
+}
+
+/* Map IPv6 address on IPv4 address, as specified in RFC 3513.
+ * Return true if conversion is possible and false otherwise.
+ */
+int v6tov4(struct in_addr *sin_addr, struct in6_addr *sin6_addr)
+{
+ if (memcmp(sin6_addr->s6_addr, rfc4291_pfx, sizeof(rfc4291_pfx)) == 0) {
+ memcpy(&(sin_addr->s_addr), &(sin6_addr->s6_addr[12]),
+ sizeof(struct in_addr));
+ return 1;
+ }
+
+ return 0;
+}
+
+/* compare two struct sockaddr_storage and return:
+ * 0 (true) if the addr is the same in both
+ * 1 (false) if the addr is not the same in both
+ * -1 (unable) if one of the addr is not AF_INET*
+ */
+int ipcmp(struct sockaddr_storage *ss1, struct sockaddr_storage *ss2)
+{
+ if ((ss1->ss_family != AF_INET) && (ss1->ss_family != AF_INET6))
+ return -1;
+
+ if ((ss2->ss_family != AF_INET) && (ss2->ss_family != AF_INET6))
+ return -1;
+
+ if (ss1->ss_family != ss2->ss_family)
+ return 1;
+
+ switch (ss1->ss_family) {
+ case AF_INET:
+ return memcmp(&((struct sockaddr_in *)ss1)->sin_addr,
+ &((struct sockaddr_in *)ss2)->sin_addr,
+ sizeof(struct in_addr)) != 0;
+ case AF_INET6:
+ return memcmp(&((struct sockaddr_in6 *)ss1)->sin6_addr,
+ &((struct sockaddr_in6 *)ss2)->sin6_addr,
+ sizeof(struct in6_addr)) != 0;
+ }
+
+ return 1;
+}
+
+/* compare a struct sockaddr_storage to a struct net_addr and return :
+ * 0 (true) if <addr> is matching <net>
+ * 1 (false) if <addr> is not matching <net>
+ * -1 (unable) if <addr> or <net> is not AF_INET*
+ */
+int ipcmp2net(const struct sockaddr_storage *addr, const struct net_addr *net)
+{
+ if ((addr->ss_family != AF_INET) && (addr->ss_family != AF_INET6))
+ return -1;
+
+ if ((net->family != AF_INET) && (net->family != AF_INET6))
+ return -1;
+
+ if (addr->ss_family != net->family)
+ return 1;
+
+ if (addr->ss_family == AF_INET &&
+ (((struct sockaddr_in *)addr)->sin_addr.s_addr & net->addr.v4.mask.s_addr) == net->addr.v4.ip.s_addr)
+ return 0;
+ else {
+ const struct in6_addr *addr6 = &(((const struct sockaddr_in6*)addr)->sin6_addr);
+ const struct in6_addr *nip6 = &net->addr.v6.ip;
+ const struct in6_addr *nmask6 = &net->addr.v6.mask;
+
+ if ((read_u32(&addr6->s6_addr[0]) & read_u32(&nmask6->s6_addr[0])) == read_u32(&nip6->s6_addr[0]) &&
+ (read_u32(&addr6->s6_addr[4]) & read_u32(&nmask6->s6_addr[4])) == read_u32(&nip6->s6_addr[4]) &&
+ (read_u32(&addr6->s6_addr[8]) & read_u32(&nmask6->s6_addr[8])) == read_u32(&nip6->s6_addr[8]) &&
+ (read_u32(&addr6->s6_addr[12]) & read_u32(&nmask6->s6_addr[12])) == read_u32(&nip6->s6_addr[12]))
+ return 0;
+ }
+
+ return 1;
+}
+
+/* copy IP address from <source> into <dest>
+ * The caller must allocate and clear <dest> before calling.
+ * The source must be in either AF_INET or AF_INET6 family, or the destination
+ * address will be undefined. If the destination address used to hold a port,
+ * it is preserved, so that this function can be used to switch to another
+ * address family with no risk. Returns a pointer to the destination.
+ */
+struct sockaddr_storage *ipcpy(struct sockaddr_storage *source, struct sockaddr_storage *dest)
+{
+ int prev_port;
+
+ prev_port = get_net_port(dest);
+ memset(dest, 0, sizeof(*dest));
+ dest->ss_family = source->ss_family;
+
+ /* copy new addr and apply it */
+ switch (source->ss_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)dest)->sin_addr.s_addr = ((struct sockaddr_in *)source)->sin_addr.s_addr;
+ ((struct sockaddr_in *)dest)->sin_port = prev_port;
+ break;
+ case AF_INET6:
+ memcpy(((struct sockaddr_in6 *)dest)->sin6_addr.s6_addr, ((struct sockaddr_in6 *)source)->sin6_addr.s6_addr, sizeof(struct in6_addr));
+ ((struct sockaddr_in6 *)dest)->sin6_port = prev_port;
+ break;
+ }
+
+ return dest;
+}
+
+char *human_time(int t, short hz_div) {
+ static char rv[sizeof("24855d23h")+1]; // longest of "23h59m" and "59m59s"
+ char *p = rv;
+ char *end = rv + sizeof(rv);
+ int cnt=2; // print two numbers
+
+ if (unlikely(t < 0 || hz_div <= 0)) {
+ snprintf(p, end - p, "?");
+ return rv;
+ }
+
+ if (unlikely(hz_div > 1))
+ t /= hz_div;
+
+ if (t >= DAY) {
+ p += snprintf(p, end - p, "%dd", t / DAY);
+ cnt--;
+ }
+
+ if (cnt && t % DAY / HOUR) {
+ p += snprintf(p, end - p, "%dh", t % DAY / HOUR);
+ cnt--;
+ }
+
+ if (cnt && t % HOUR / MINUTE) {
+ p += snprintf(p, end - p, "%dm", t % HOUR / MINUTE);
+ cnt--;
+ }
+
+ if ((cnt && t % MINUTE) || !t) // also display '0s'
+ p += snprintf(p, end - p, "%ds", t % MINUTE / SEC);
+
+ return rv;
+}
+
+const char *monthname[12] = {
+ "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+};
+
+/* date2str_log: write a date in the format :
+ * sprintf(str, "%02d/%s/%04d:%02d:%02d:%02d.%03d",
+ * tm.tm_mday, monthname[tm.tm_mon], tm.tm_year+1900,
+ * tm.tm_hour, tm.tm_min, tm.tm_sec, (int)date.tv_usec/1000);
+ *
+ * without using sprintf. return a pointer to the last char written (\0) or
+ * NULL if there isn't enough space.
+ */
+char *date2str_log(char *dst, const struct tm *tm, const struct timeval *date, size_t size)
+{
+
+ if (size < 25) /* the size is fixed: 24 chars + \0 */
+ return NULL;
+
+ dst = utoa_pad((unsigned int)tm->tm_mday, dst, 3); // day
+ if (!dst)
+ return NULL;
+ *dst++ = '/';
+
+ memcpy(dst, monthname[tm->tm_mon], 3); // month
+ dst += 3;
+ *dst++ = '/';
+
+ dst = utoa_pad((unsigned int)tm->tm_year+1900, dst, 5); // year
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_hour, dst, 3); // hour
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_min, dst, 3); // minutes
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_sec, dst, 3); // secondes
+ if (!dst)
+ return NULL;
+ *dst++ = '.';
+
+ dst = utoa_pad((unsigned int)(date->tv_usec/1000)%1000, dst, 4); // milliseconds
+ if (!dst)
+ return NULL;
+ *dst = '\0';
+
+ return dst;
+}
+
+/* Base year used to compute leap years */
+#define TM_YEAR_BASE 1900
+
+/* Return the difference in seconds between two times (leap seconds are ignored).
+ * Retrieved from glibc 2.18 source code.
+ */
+static int my_tm_diff(const struct tm *a, const struct tm *b)
+{
+ /* Compute intervening leap days correctly even if year is negative.
+ * Take care to avoid int overflow in leap day calculations,
+ * but it's OK to assume that A and B are close to each other.
+ */
+ int a4 = (a->tm_year >> 2) + (TM_YEAR_BASE >> 2) - ! (a->tm_year & 3);
+ int b4 = (b->tm_year >> 2) + (TM_YEAR_BASE >> 2) - ! (b->tm_year & 3);
+ int a100 = a4 / 25 - (a4 % 25 < 0);
+ int b100 = b4 / 25 - (b4 % 25 < 0);
+ int a400 = a100 >> 2;
+ int b400 = b100 >> 2;
+ int intervening_leap_days = (a4 - b4) - (a100 - b100) + (a400 - b400);
+ int years = a->tm_year - b->tm_year;
+ int days = (365 * years + intervening_leap_days
+ + (a->tm_yday - b->tm_yday));
+ return (60 * (60 * (24 * days + (a->tm_hour - b->tm_hour))
+ + (a->tm_min - b->tm_min))
+ + (a->tm_sec - b->tm_sec));
+}
+
+/* Return the GMT offset for a specific local time.
+ * Both t and tm must represent the same time.
+ * The string returned has the same format as returned by strftime(... "%z", tm).
+ * Offsets are kept in an internal cache for better performances.
+ */
+const char *get_gmt_offset(time_t t, struct tm *tm)
+{
+ /* Cache offsets from GMT (depending on whether DST is active or not) */
+ static THREAD_LOCAL char gmt_offsets[2][5+1] = { "", "" };
+
+ char *gmt_offset;
+ struct tm tm_gmt;
+ int diff;
+ int isdst = tm->tm_isdst;
+
+ /* Pretend DST not active if its status is unknown */
+ if (isdst < 0)
+ isdst = 0;
+
+ /* Fetch the offset and initialize it if needed */
+ gmt_offset = gmt_offsets[isdst & 0x01];
+ if (unlikely(!*gmt_offset)) {
+ get_gmtime(t, &tm_gmt);
+ diff = my_tm_diff(tm, &tm_gmt);
+ if (diff < 0) {
+ diff = -diff;
+ *gmt_offset = '-';
+ } else {
+ *gmt_offset = '+';
+ }
+ diff %= 86400U;
+ diff /= 60; /* Convert to minutes */
+ snprintf(gmt_offset+1, 4+1, "%02d%02d", diff/60, diff%60);
+ }
+
+ return gmt_offset;
+}
+
+/* gmt2str_log: write a date in the format :
+ * "%02d/%s/%04d:%02d:%02d:%02d +0000" without using snprintf
+ * return a pointer to the last char written (\0) or
+ * NULL if there isn't enough space.
+ */
+char *gmt2str_log(char *dst, struct tm *tm, size_t size)
+{
+ if (size < 27) /* the size is fixed: 26 chars + \0 */
+ return NULL;
+
+ dst = utoa_pad((unsigned int)tm->tm_mday, dst, 3); // day
+ if (!dst)
+ return NULL;
+ *dst++ = '/';
+
+ memcpy(dst, monthname[tm->tm_mon], 3); // month
+ dst += 3;
+ *dst++ = '/';
+
+ dst = utoa_pad((unsigned int)tm->tm_year+1900, dst, 5); // year
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_hour, dst, 3); // hour
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_min, dst, 3); // minutes
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_sec, dst, 3); // secondes
+ if (!dst)
+ return NULL;
+ *dst++ = ' ';
+ *dst++ = '+';
+ *dst++ = '0';
+ *dst++ = '0';
+ *dst++ = '0';
+ *dst++ = '0';
+ *dst = '\0';
+
+ return dst;
+}
+
+/* localdate2str_log: write a date in the format :
+ * "%02d/%s/%04d:%02d:%02d:%02d +0000(local timezone)" without using snprintf
+ * Both t and tm must represent the same time.
+ * return a pointer to the last char written (\0) or
+ * NULL if there isn't enough space.
+ */
+char *localdate2str_log(char *dst, time_t t, struct tm *tm, size_t size)
+{
+ const char *gmt_offset;
+ if (size < 27) /* the size is fixed: 26 chars + \0 */
+ return NULL;
+
+ gmt_offset = get_gmt_offset(t, tm);
+
+ dst = utoa_pad((unsigned int)tm->tm_mday, dst, 3); // day
+ if (!dst)
+ return NULL;
+ *dst++ = '/';
+
+ memcpy(dst, monthname[tm->tm_mon], 3); // month
+ dst += 3;
+ *dst++ = '/';
+
+ dst = utoa_pad((unsigned int)tm->tm_year+1900, dst, 5); // year
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_hour, dst, 3); // hour
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_min, dst, 3); // minutes
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_sec, dst, 3); // secondes
+ if (!dst)
+ return NULL;
+ *dst++ = ' ';
+
+ memcpy(dst, gmt_offset, 5); // Offset from local time to GMT
+ dst += 5;
+ *dst = '\0';
+
+ return dst;
+}
+
+/* Returns the number of seconds since 01/01/1970 0:0:0 GMT for GMT date <tm>.
+ * It is meant as a portable replacement for timegm() for use with valid inputs.
+ * Returns undefined results for invalid dates (eg: months out of range 0..11).
+ */
+time_t my_timegm(const struct tm *tm)
+{
+ /* Each month has 28, 29, 30 or 31 days, or 28+N. The date in the year
+ * is thus (current month - 1)*28 + cumulated_N[month] to count the
+ * sum of the extra N days for elapsed months. The sum of all these N
+ * days doesn't exceed 30 for a complete year (366-12*28) so it fits
+ * in a 5-bit word. This means that with 60 bits we can represent a
+ * matrix of all these values at once, which is fast and efficient to
+ * access. The extra February day for leap years is not counted here.
+ *
+ * Jan : none = 0 (0)
+ * Feb : Jan = 3 (3)
+ * Mar : Jan..Feb = 3 (3 + 0)
+ * Apr : Jan..Mar = 6 (3 + 0 + 3)
+ * May : Jan..Apr = 8 (3 + 0 + 3 + 2)
+ * Jun : Jan..May = 11 (3 + 0 + 3 + 2 + 3)
+ * Jul : Jan..Jun = 13 (3 + 0 + 3 + 2 + 3 + 2)
+ * Aug : Jan..Jul = 16 (3 + 0 + 3 + 2 + 3 + 2 + 3)
+ * Sep : Jan..Aug = 19 (3 + 0 + 3 + 2 + 3 + 2 + 3 + 3)
+ * Oct : Jan..Sep = 21 (3 + 0 + 3 + 2 + 3 + 2 + 3 + 3 + 2)
+ * Nov : Jan..Oct = 24 (3 + 0 + 3 + 2 + 3 + 2 + 3 + 3 + 2 + 3)
+ * Dec : Jan..Nov = 26 (3 + 0 + 3 + 2 + 3 + 2 + 3 + 3 + 2 + 3 + 2)
+ */
+ uint64_t extra =
+ ( 0ULL << 0*5) + ( 3ULL << 1*5) + ( 3ULL << 2*5) + /* Jan, Feb, Mar, */
+ ( 6ULL << 3*5) + ( 8ULL << 4*5) + (11ULL << 5*5) + /* Apr, May, Jun, */
+ (13ULL << 6*5) + (16ULL << 7*5) + (19ULL << 8*5) + /* Jul, Aug, Sep, */
+ (21ULL << 9*5) + (24ULL << 10*5) + (26ULL << 11*5); /* Oct, Nov, Dec, */
+
+ unsigned int y = tm->tm_year + 1900;
+ unsigned int m = tm->tm_mon;
+ unsigned long days = 0;
+
+ /* days since 1/1/1970 for full years */
+ days += days_since_zero(y) - days_since_zero(1970);
+
+ /* days for full months in the current year */
+ days += 28 * m + ((extra >> (m * 5)) & 0x1f);
+
+ /* count + 1 after March for leap years. A leap year is a year multiple
+ * of 4, unless it's multiple of 100 without being multiple of 400. 2000
+ * is leap, 1900 isn't, 1904 is.
+ */
+ if ((m > 1) && !(y & 3) && ((y % 100) || !(y % 400)))
+ days++;
+
+ days += tm->tm_mday - 1;
+ return days * 86400ULL + tm->tm_hour * 3600 + tm->tm_min * 60 + tm->tm_sec;
+}
+
+/* This function check a char. It returns true and updates
+ * <date> and <len> pointer to the new position if the
+ * character is found.
+ */
+static inline int parse_expect_char(const char **date, int *len, char c)
+{
+ if (*len < 1 || **date != c)
+ return 0;
+ (*len)--;
+ (*date)++;
+ return 1;
+}
+
+/* This function expects a string <str> of len <l>. It return true and updates.
+ * <date> and <len> if the string matches, otherwise, it returns false.
+ */
+static inline int parse_strcmp(const char **date, int *len, char *str, int l)
+{
+ if (*len < l || strncmp(*date, str, l) != 0)
+ return 0;
+ (*len) -= l;
+ (*date) += l;
+ return 1;
+}
+
+/* This macro converts 3 chars name in integer. */
+#define STR2I3(__a, __b, __c) ((__a) * 65536 + (__b) * 256 + (__c))
+
+/* day-name = %x4D.6F.6E ; "Mon", case-sensitive
+ * / %x54.75.65 ; "Tue", case-sensitive
+ * / %x57.65.64 ; "Wed", case-sensitive
+ * / %x54.68.75 ; "Thu", case-sensitive
+ * / %x46.72.69 ; "Fri", case-sensitive
+ * / %x53.61.74 ; "Sat", case-sensitive
+ * / %x53.75.6E ; "Sun", case-sensitive
+ *
+ * This array must be alphabetically sorted
+ */
+static inline int parse_http_dayname(const char **date, int *len, struct tm *tm)
+{
+ if (*len < 3)
+ return 0;
+ switch (STR2I3((*date)[0], (*date)[1], (*date)[2])) {
+ case STR2I3('M','o','n'): tm->tm_wday = 1; break;
+ case STR2I3('T','u','e'): tm->tm_wday = 2; break;
+ case STR2I3('W','e','d'): tm->tm_wday = 3; break;
+ case STR2I3('T','h','u'): tm->tm_wday = 4; break;
+ case STR2I3('F','r','i'): tm->tm_wday = 5; break;
+ case STR2I3('S','a','t'): tm->tm_wday = 6; break;
+ case STR2I3('S','u','n'): tm->tm_wday = 7; break;
+ default: return 0;
+ }
+ *len -= 3;
+ *date += 3;
+ return 1;
+}
+
+/* month = %x4A.61.6E ; "Jan", case-sensitive
+ * / %x46.65.62 ; "Feb", case-sensitive
+ * / %x4D.61.72 ; "Mar", case-sensitive
+ * / %x41.70.72 ; "Apr", case-sensitive
+ * / %x4D.61.79 ; "May", case-sensitive
+ * / %x4A.75.6E ; "Jun", case-sensitive
+ * / %x4A.75.6C ; "Jul", case-sensitive
+ * / %x41.75.67 ; "Aug", case-sensitive
+ * / %x53.65.70 ; "Sep", case-sensitive
+ * / %x4F.63.74 ; "Oct", case-sensitive
+ * / %x4E.6F.76 ; "Nov", case-sensitive
+ * / %x44.65.63 ; "Dec", case-sensitive
+ *
+ * This array must be alphabetically sorted
+ */
+static inline int parse_http_monthname(const char **date, int *len, struct tm *tm)
+{
+ if (*len < 3)
+ return 0;
+ switch (STR2I3((*date)[0], (*date)[1], (*date)[2])) {
+ case STR2I3('J','a','n'): tm->tm_mon = 0; break;
+ case STR2I3('F','e','b'): tm->tm_mon = 1; break;
+ case STR2I3('M','a','r'): tm->tm_mon = 2; break;
+ case STR2I3('A','p','r'): tm->tm_mon = 3; break;
+ case STR2I3('M','a','y'): tm->tm_mon = 4; break;
+ case STR2I3('J','u','n'): tm->tm_mon = 5; break;
+ case STR2I3('J','u','l'): tm->tm_mon = 6; break;
+ case STR2I3('A','u','g'): tm->tm_mon = 7; break;
+ case STR2I3('S','e','p'): tm->tm_mon = 8; break;
+ case STR2I3('O','c','t'): tm->tm_mon = 9; break;
+ case STR2I3('N','o','v'): tm->tm_mon = 10; break;
+ case STR2I3('D','e','c'): tm->tm_mon = 11; break;
+ default: return 0;
+ }
+ *len -= 3;
+ *date += 3;
+ return 1;
+}
+
+/* day-name-l = %x4D.6F.6E.64.61.79 ; "Monday", case-sensitive
+ * / %x54.75.65.73.64.61.79 ; "Tuesday", case-sensitive
+ * / %x57.65.64.6E.65.73.64.61.79 ; "Wednesday", case-sensitive
+ * / %x54.68.75.72.73.64.61.79 ; "Thursday", case-sensitive
+ * / %x46.72.69.64.61.79 ; "Friday", case-sensitive
+ * / %x53.61.74.75.72.64.61.79 ; "Saturday", case-sensitive
+ * / %x53.75.6E.64.61.79 ; "Sunday", case-sensitive
+ *
+ * This array must be alphabetically sorted
+ */
+static inline int parse_http_ldayname(const char **date, int *len, struct tm *tm)
+{
+ if (*len < 6) /* Minimum length. */
+ return 0;
+ switch (STR2I3((*date)[0], (*date)[1], (*date)[2])) {
+ case STR2I3('M','o','n'):
+ RET0_UNLESS(parse_strcmp(date, len, "Monday", 6));
+ tm->tm_wday = 1;
+ return 1;
+ case STR2I3('T','u','e'):
+ RET0_UNLESS(parse_strcmp(date, len, "Tuesday", 7));
+ tm->tm_wday = 2;
+ return 1;
+ case STR2I3('W','e','d'):
+ RET0_UNLESS(parse_strcmp(date, len, "Wednesday", 9));
+ tm->tm_wday = 3;
+ return 1;
+ case STR2I3('T','h','u'):
+ RET0_UNLESS(parse_strcmp(date, len, "Thursday", 8));
+ tm->tm_wday = 4;
+ return 1;
+ case STR2I3('F','r','i'):
+ RET0_UNLESS(parse_strcmp(date, len, "Friday", 6));
+ tm->tm_wday = 5;
+ return 1;
+ case STR2I3('S','a','t'):
+ RET0_UNLESS(parse_strcmp(date, len, "Saturday", 8));
+ tm->tm_wday = 6;
+ return 1;
+ case STR2I3('S','u','n'):
+ RET0_UNLESS(parse_strcmp(date, len, "Sunday", 6));
+ tm->tm_wday = 7;
+ return 1;
+ }
+ return 0;
+}
+
+/* This function parses exactly 1 digit and returns the numeric value in "digit". */
+static inline int parse_digit(const char **date, int *len, int *digit)
+{
+ if (*len < 1 || **date < '0' || **date > '9')
+ return 0;
+ *digit = (**date - '0');
+ (*date)++;
+ (*len)--;
+ return 1;
+}
+
+/* This function parses exactly 2 digits and returns the numeric value in "digit". */
+static inline int parse_2digit(const char **date, int *len, int *digit)
+{
+ int value;
+
+ RET0_UNLESS(parse_digit(date, len, &value));
+ (*digit) = value * 10;
+ RET0_UNLESS(parse_digit(date, len, &value));
+ (*digit) += value;
+
+ return 1;
+}
+
+/* This function parses exactly 4 digits and returns the numeric value in "digit". */
+static inline int parse_4digit(const char **date, int *len, int *digit)
+{
+ int value;
+
+ RET0_UNLESS(parse_digit(date, len, &value));
+ (*digit) = value * 1000;
+
+ RET0_UNLESS(parse_digit(date, len, &value));
+ (*digit) += value * 100;
+
+ RET0_UNLESS(parse_digit(date, len, &value));
+ (*digit) += value * 10;
+
+ RET0_UNLESS(parse_digit(date, len, &value));
+ (*digit) += value;
+
+ return 1;
+}
+
+/* time-of-day = hour ":" minute ":" second
+ * ; 00:00:00 - 23:59:60 (leap second)
+ *
+ * hour = 2DIGIT
+ * minute = 2DIGIT
+ * second = 2DIGIT
+ */
+static inline int parse_http_time(const char **date, int *len, struct tm *tm)
+{
+ RET0_UNLESS(parse_2digit(date, len, &tm->tm_hour)); /* hour 2DIGIT */
+ RET0_UNLESS(parse_expect_char(date, len, ':')); /* expect ":" */
+ RET0_UNLESS(parse_2digit(date, len, &tm->tm_min)); /* min 2DIGIT */
+ RET0_UNLESS(parse_expect_char(date, len, ':')); /* expect ":" */
+ RET0_UNLESS(parse_2digit(date, len, &tm->tm_sec)); /* sec 2DIGIT */
+ return 1;
+}
+
+/* From RFC7231
+ * https://tools.ietf.org/html/rfc7231#section-7.1.1.1
+ *
+ * IMF-fixdate = day-name "," SP date1 SP time-of-day SP GMT
+ * ; fixed length/zone/capitalization subset of the format
+ * ; see Section 3.3 of [RFC5322]
+ *
+ *
+ * date1 = day SP month SP year
+ * ; e.g., 02 Jun 1982
+ *
+ * day = 2DIGIT
+ * year = 4DIGIT
+ *
+ * GMT = %x47.4D.54 ; "GMT", case-sensitive
+ *
+ * time-of-day = hour ":" minute ":" second
+ * ; 00:00:00 - 23:59:60 (leap second)
+ *
+ * hour = 2DIGIT
+ * minute = 2DIGIT
+ * second = 2DIGIT
+ *
+ * DIGIT = decimal 0-9
+ */
+int parse_imf_date(const char *date, int len, struct tm *tm)
+{
+ /* tm_gmtoff, if present, ought to be zero'ed */
+ memset(tm, 0, sizeof(*tm));
+
+ RET0_UNLESS(parse_http_dayname(&date, &len, tm)); /* day-name */
+ RET0_UNLESS(parse_expect_char(&date, &len, ',')); /* expect "," */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_2digit(&date, &len, &tm->tm_mday)); /* day 2DIGIT */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_http_monthname(&date, &len, tm)); /* Month */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_4digit(&date, &len, &tm->tm_year)); /* year = 4DIGIT */
+ tm->tm_year -= 1900;
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_http_time(&date, &len, tm)); /* Parse time. */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_strcmp(&date, &len, "GMT", 3)); /* GMT = %x47.4D.54 ; "GMT", case-sensitive */
+ tm->tm_isdst = -1;
+ return 1;
+}
+
+/* From RFC7231
+ * https://tools.ietf.org/html/rfc7231#section-7.1.1.1
+ *
+ * rfc850-date = day-name-l "," SP date2 SP time-of-day SP GMT
+ * date2 = day "-" month "-" 2DIGIT
+ * ; e.g., 02-Jun-82
+ *
+ * day = 2DIGIT
+ */
+int parse_rfc850_date(const char *date, int len, struct tm *tm)
+{
+ int year;
+
+ /* tm_gmtoff, if present, ought to be zero'ed */
+ memset(tm, 0, sizeof(*tm));
+
+ RET0_UNLESS(parse_http_ldayname(&date, &len, tm)); /* Read the day name */
+ RET0_UNLESS(parse_expect_char(&date, &len, ',')); /* expect "," */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_2digit(&date, &len, &tm->tm_mday)); /* day 2DIGIT */
+ RET0_UNLESS(parse_expect_char(&date, &len, '-')); /* expect "-" */
+ RET0_UNLESS(parse_http_monthname(&date, &len, tm)); /* Month */
+ RET0_UNLESS(parse_expect_char(&date, &len, '-')); /* expect "-" */
+
+ /* year = 2DIGIT
+ *
+ * Recipients of a timestamp value in rfc850-(*date) format, which uses a
+ * two-digit year, MUST interpret a timestamp that appears to be more
+ * than 50 years in the future as representing the most recent year in
+ * the past that had the same last two digits.
+ */
+ RET0_UNLESS(parse_2digit(&date, &len, &tm->tm_year));
+
+ /* expect SP */
+ if (!parse_expect_char(&date, &len, ' ')) {
+ /* Maybe we have the date with 4 digits. */
+ RET0_UNLESS(parse_2digit(&date, &len, &year));
+ tm->tm_year = (tm->tm_year * 100 + year) - 1900;
+ /* expect SP */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' '));
+ } else {
+ /* I fix 60 as pivot: >60: +1900, <60: +2000. Note that the
+ * tm_year is the number of year since 1900, so for +1900, we
+ * do nothing, and for +2000, we add 100.
+ */
+ if (tm->tm_year <= 60)
+ tm->tm_year += 100;
+ }
+
+ RET0_UNLESS(parse_http_time(&date, &len, tm)); /* Parse time. */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_strcmp(&date, &len, "GMT", 3)); /* GMT = %x47.4D.54 ; "GMT", case-sensitive */
+ tm->tm_isdst = -1;
+
+ return 1;
+}
+
+/* From RFC7231
+ * https://tools.ietf.org/html/rfc7231#section-7.1.1.1
+ *
+ * asctime-date = day-name SP date3 SP time-of-day SP year
+ * date3 = month SP ( 2DIGIT / ( SP 1DIGIT ))
+ * ; e.g., Jun 2
+ *
+ * HTTP-date is case sensitive. A sender MUST NOT generate additional
+ * whitespace in an HTTP-date beyond that specifically included as SP in
+ * the grammar.
+ */
+int parse_asctime_date(const char *date, int len, struct tm *tm)
+{
+ /* tm_gmtoff, if present, ought to be zero'ed */
+ memset(tm, 0, sizeof(*tm));
+
+ RET0_UNLESS(parse_http_dayname(&date, &len, tm)); /* day-name */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_http_monthname(&date, &len, tm)); /* expect month */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+
+ /* expect SP and 1DIGIT or 2DIGIT */
+ if (parse_expect_char(&date, &len, ' '))
+ RET0_UNLESS(parse_digit(&date, &len, &tm->tm_mday));
+ else
+ RET0_UNLESS(parse_2digit(&date, &len, &tm->tm_mday));
+
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_http_time(&date, &len, tm)); /* Parse time. */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_4digit(&date, &len, &tm->tm_year)); /* year = 4DIGIT */
+ tm->tm_year -= 1900;
+ tm->tm_isdst = -1;
+ return 1;
+}
+
+/* From RFC7231
+ * https://tools.ietf.org/html/rfc7231#section-7.1.1.1
+ *
+ * HTTP-date = IMF-fixdate / obs-date
+ * obs-date = rfc850-date / asctime-date
+ *
+ * parses an HTTP date in the RFC format and is accepted
+ * alternatives. <date> is the strinf containing the date,
+ * len is the len of the string. <tm> is filled with the
+ * parsed time. We must considers this time as GMT.
+ */
+int parse_http_date(const char *date, int len, struct tm *tm)
+{
+ if (parse_imf_date(date, len, tm))
+ return 1;
+
+ if (parse_rfc850_date(date, len, tm))
+ return 1;
+
+ if (parse_asctime_date(date, len, tm))
+ return 1;
+
+ return 0;
+}
+
+/* print the time <ns> in a short form (exactly 7 chars) at the end of buffer
+ * <out>. "-" is printed if the value is zero, "inf" if larger than 1000 years.
+ * It returns the new buffer length, or 0 if it doesn't fit. The value will be
+ * surrounded by <pfx> and <sfx> respectively if not NULL.
+ */
+int print_time_short(struct buffer *out, const char *pfx, uint64_t ns, const char *sfx)
+{
+ double val = ns; // 52 bits of mantissa keep ns accuracy over 52 days
+ const char *unit;
+
+ if (!pfx)
+ pfx = "";
+ if (!sfx)
+ sfx = "";
+
+ do {
+ unit = " - "; if (val <= 0.0) break;
+ unit = "ns"; if (val < 1000.0) break;
+ unit = "us"; val /= 1000.0; if (val < 1000.0) break;
+ unit = "ms"; val /= 1000.0; if (val < 1000.0) break;
+ unit = "s "; val /= 1000.0; if (val < 60.0) break;
+ unit = "m "; val /= 60.0; if (val < 60.0) break;
+ unit = "h "; val /= 60.0; if (val < 24.0) break;
+ unit = "d "; val /= 24.0; if (val < 365.0) break;
+ unit = "yr"; val /= 365.0; if (val < 1000.0) break;
+ unit = " inf "; val = 0.0; break;
+ } while (0);
+
+ if (val <= 0.0)
+ return chunk_appendf(out, "%s%7s%s", pfx, unit, sfx);
+ else if (val < 10.0)
+ return chunk_appendf(out, "%s%1.3f%s%s", pfx, val, unit, sfx);
+ else if (val < 100.0)
+ return chunk_appendf(out, "%s%2.2f%s%s", pfx, val, unit, sfx);
+ else
+ return chunk_appendf(out, "%s%3.1f%s%s", pfx, val, unit, sfx);
+}
+
+/* Dynamically allocates a string of the proper length to hold the formatted
+ * output. NULL is returned on error. The caller is responsible for freeing the
+ * memory area using free(). The resulting string is returned in <out> if the
+ * pointer is not NULL. A previous version of <out> might be used to build the
+ * new string, and it will be freed before returning if it is not NULL, which
+ * makes it possible to build complex strings from iterative calls without
+ * having to care about freeing intermediate values, as in the example below :
+ *
+ * memprintf(&err, "invalid argument: '%s'", arg);
+ * ...
+ * memprintf(&err, "parser said : <%s>\n", *err);
+ * ...
+ * free(*err);
+ *
+ * This means that <err> must be initialized to NULL before first invocation.
+ * The return value also holds the allocated string, which eases error checking
+ * and immediate consumption. If the output pointer is not used, NULL must be
+ * passed instead and it will be ignored. The returned message will then also
+ * be NULL so that the caller does not have to bother with freeing anything.
+ *
+ * It is also convenient to use it without any free except the last one :
+ * err = NULL;
+ * if (!fct1(err)) report(*err);
+ * if (!fct2(err)) report(*err);
+ * if (!fct3(err)) report(*err);
+ * free(*err);
+ *
+ * memprintf relies on memvprintf. This last version can be called from any
+ * function with variadic arguments.
+ */
+char *memvprintf(char **out, const char *format, va_list orig_args)
+{
+ va_list args;
+ char *ret = NULL;
+ int allocated = 0;
+ int needed = 0;
+
+ if (!out)
+ return NULL;
+
+ do {
+ char buf1;
+
+ /* vsnprintf() will return the required length even when the
+ * target buffer is NULL. We do this in a loop just in case
+ * intermediate evaluations get wrong.
+ */
+ va_copy(args, orig_args);
+ needed = vsnprintf(ret ? ret : &buf1, allocated, format, args);
+ va_end(args);
+ if (needed < allocated) {
+ /* Note: on Solaris 8, the first iteration always
+ * returns -1 if allocated is zero, so we force a
+ * retry.
+ */
+ if (!allocated)
+ needed = 0;
+ else
+ break;
+ }
+
+ allocated = needed + 1;
+ ret = my_realloc2(ret, allocated);
+ } while (ret);
+
+ if (needed < 0) {
+ /* an error was encountered */
+ ha_free(&ret);
+ }
+
+ if (out) {
+ free(*out);
+ *out = ret;
+ }
+
+ return ret;
+}
+
+char *memprintf(char **out, const char *format, ...)
+{
+ va_list args;
+ char *ret = NULL;
+
+ va_start(args, format);
+ ret = memvprintf(out, format, args);
+ va_end(args);
+
+ return ret;
+}
+
+/* Used to add <level> spaces before each line of <out>, unless there is only one line.
+ * The input argument is automatically freed and reassigned. The result will have to be
+ * freed by the caller. It also supports being passed a NULL which results in the same
+ * output.
+ * Example of use :
+ * parse(cmd, &err); (callee: memprintf(&err, ...))
+ * fprintf(stderr, "Parser said: %s\n", indent_error(&err));
+ * free(err);
+ */
+char *indent_msg(char **out, int level)
+{
+ char *ret, *in, *p;
+ int needed = 0;
+ int lf = 0;
+ int lastlf = 0;
+ int len;
+
+ if (!out || !*out)
+ return NULL;
+
+ in = *out - 1;
+ while ((in = strchr(in + 1, '\n')) != NULL) {
+ lastlf = in - *out;
+ lf++;
+ }
+
+ if (!lf) /* single line, no LF, return it as-is */
+ return *out;
+
+ len = strlen(*out);
+
+ if (lf == 1 && lastlf == len - 1) {
+ /* single line, LF at end, strip it and return as-is */
+ (*out)[lastlf] = 0;
+ return *out;
+ }
+
+ /* OK now we have at least one LF, we need to process the whole string
+ * as a multi-line string. What we'll do :
+ * - prefix with an LF if there is none
+ * - add <level> spaces before each line
+ * This means at most ( 1 + level + (len-lf) + lf*<1+level) ) =
+ * 1 + level + len + lf * level = 1 + level * (lf + 1) + len.
+ */
+
+ needed = 1 + level * (lf + 1) + len + 1;
+ p = ret = malloc(needed);
+ in = *out;
+
+ /* skip initial LFs */
+ while (*in == '\n')
+ in++;
+
+ /* copy each line, prefixed with LF and <level> spaces, and without the trailing LF */
+ while (*in) {
+ *p++ = '\n';
+ memset(p, ' ', level);
+ p += level;
+ do {
+ *p++ = *in++;
+ } while (*in && *in != '\n');
+ if (*in)
+ in++;
+ }
+ *p = 0;
+
+ free(*out);
+ *out = ret;
+
+ return ret;
+}
+
+/* makes a copy of message <in> into <out>, with each line prefixed with <pfx>
+ * and end of lines replaced with <eol> if not 0. The first line to indent has
+ * to be indicated in <first> (starts at zero), so that it is possible to skip
+ * indenting the first line if it has to be appended after an existing message.
+ * Empty strings are never indented, and NULL strings are considered empty both
+ * for <in> and <pfx>. It returns non-zero if an EOL was appended as the last
+ * character, non-zero otherwise.
+ */
+int append_prefixed_str(struct buffer *out, const char *in, const char *pfx, char eol, int first)
+{
+ int bol, lf;
+ int pfxlen = pfx ? strlen(pfx) : 0;
+
+ if (!in)
+ return 0;
+
+ bol = 1;
+ lf = 0;
+ while (*in) {
+ if (bol && pfxlen) {
+ if (first > 0)
+ first--;
+ else
+ b_putblk(out, pfx, pfxlen);
+ bol = 0;
+ }
+
+ lf = (*in == '\n');
+ bol |= lf;
+ b_putchr(out, (lf && eol) ? eol : *in);
+ in++;
+ }
+ return lf;
+}
+
+/* removes environment variable <name> from the environment as found in
+ * environ. This is only provided as an alternative for systems without
+ * unsetenv() (old Solaris and AIX versions). THIS IS NOT THREAD SAFE.
+ * The principle is to scan environ for each occurrence of variable name
+ * <name> and to replace the matching pointers with the last pointer of
+ * the array (since variables are not ordered).
+ * It always returns 0 (success).
+ */
+int my_unsetenv(const char *name)
+{
+ extern char **environ;
+ char **p = environ;
+ int vars;
+ int next;
+ int len;
+
+ len = strlen(name);
+ for (vars = 0; p[vars]; vars++)
+ ;
+ next = 0;
+ while (next < vars) {
+ if (strncmp(p[next], name, len) != 0 || p[next][len] != '=') {
+ next++;
+ continue;
+ }
+ if (next < vars - 1)
+ p[next] = p[vars - 1];
+ p[--vars] = NULL;
+ }
+ return 0;
+}
+
+/* Convert occurrences of environment variables in the input string to their
+ * corresponding value. A variable is identified as a series of alphanumeric
+ * characters or underscores following a '$' sign. The <in> string must be
+ * free()able. NULL returns NULL. The resulting string might be reallocated if
+ * some expansion is made. Variable names may also be enclosed into braces if
+ * needed (eg: to concatenate alphanum characters).
+ */
+char *env_expand(char *in)
+{
+ char *txt_beg;
+ char *out;
+ char *txt_end;
+ char *var_beg;
+ char *var_end;
+ char *value;
+ char *next;
+ int out_len;
+ int val_len;
+
+ if (!in)
+ return in;
+
+ value = out = NULL;
+ out_len = 0;
+
+ txt_beg = in;
+ do {
+ /* look for next '$' sign in <in> */
+ for (txt_end = txt_beg; *txt_end && *txt_end != '$'; txt_end++);
+
+ if (!*txt_end && !out) /* end and no expansion performed */
+ return in;
+
+ val_len = 0;
+ next = txt_end;
+ if (*txt_end == '$') {
+ char save;
+
+ var_beg = txt_end + 1;
+ if (*var_beg == '{')
+ var_beg++;
+
+ var_end = var_beg;
+ while (isalnum((unsigned char)*var_end) || *var_end == '_') {
+ var_end++;
+ }
+
+ next = var_end;
+ if (*var_end == '}' && (var_beg > txt_end + 1))
+ next++;
+
+ /* get value of the variable name at this location */
+ save = *var_end;
+ *var_end = '\0';
+ value = getenv(var_beg);
+ *var_end = save;
+ val_len = value ? strlen(value) : 0;
+ }
+
+ out = my_realloc2(out, out_len + (txt_end - txt_beg) + val_len + 1);
+ if (txt_end > txt_beg) {
+ memcpy(out + out_len, txt_beg, txt_end - txt_beg);
+ out_len += txt_end - txt_beg;
+ }
+ if (val_len) {
+ memcpy(out + out_len, value, val_len);
+ out_len += val_len;
+ }
+ out[out_len] = 0;
+ txt_beg = next;
+ } while (*txt_beg);
+
+ /* here we know that <out> was allocated and that we don't need <in> anymore */
+ free(in);
+ return out;
+}
+
+
+/* same as strstr() but case-insensitive and with limit length */
+const char *strnistr(const char *str1, int len_str1, const char *str2, int len_str2)
+{
+ char *pptr, *sptr, *start;
+ unsigned int slen, plen;
+ unsigned int tmp1, tmp2;
+
+ if (str1 == NULL || len_str1 == 0) // search pattern into an empty string => search is not found
+ return NULL;
+
+ if (str2 == NULL || len_str2 == 0) // pattern is empty => every str1 match
+ return str1;
+
+ if (len_str1 < len_str2) // pattern is longer than string => search is not found
+ return NULL;
+
+ for (tmp1 = 0, start = (char *)str1, pptr = (char *)str2, slen = len_str1, plen = len_str2; slen >= plen; start++, slen--) {
+ while (toupper((unsigned char)*start) != toupper((unsigned char)*str2)) {
+ start++;
+ slen--;
+ tmp1++;
+
+ if (tmp1 >= len_str1)
+ return NULL;
+
+ /* if pattern longer than string */
+ if (slen < plen)
+ return NULL;
+ }
+
+ sptr = start;
+ pptr = (char *)str2;
+
+ tmp2 = 0;
+ while (toupper((unsigned char)*sptr) == toupper((unsigned char)*pptr)) {
+ sptr++;
+ pptr++;
+ tmp2++;
+
+ if (*pptr == '\0' || tmp2 == len_str2) /* end of pattern found */
+ return start;
+ if (*sptr == '\0' || tmp2 == len_str1) /* end of string found and the pattern is not fully found */
+ return NULL;
+ }
+ }
+ return NULL;
+}
+
+/* Returns true if s1 < s2 < s3 otherwise zero. Both s1 and s3 may be NULL and
+ * in this case only non-null strings are compared. This allows to pass initial
+ * values in iterators and in sort functions.
+ */
+int strordered(const char *s1, const char *s2, const char *s3)
+{
+ return (!s1 || strcmp(s1, s2) < 0) && (!s3 || strcmp(s2, s3) < 0);
+}
+
+/* This function read the next valid utf8 char.
+ * <s> is the byte srray to be decode, <len> is its length.
+ * The function returns decoded char encoded like this:
+ * The 4 msb are the return code (UTF8_CODE_*), the 4 lsb
+ * are the length read. The decoded character is stored in <c>.
+ */
+unsigned char utf8_next(const char *s, int len, unsigned int *c)
+{
+ const unsigned char *p = (unsigned char *)s;
+ int dec;
+ unsigned char code = UTF8_CODE_OK;
+
+ if (len < 1)
+ return UTF8_CODE_OK;
+
+ /* Check the type of UTF8 sequence
+ *
+ * 0... .... 0x00 <= x <= 0x7f : 1 byte: ascii char
+ * 10.. .... 0x80 <= x <= 0xbf : invalid sequence
+ * 110. .... 0xc0 <= x <= 0xdf : 2 bytes
+ * 1110 .... 0xe0 <= x <= 0xef : 3 bytes
+ * 1111 0... 0xf0 <= x <= 0xf7 : 4 bytes
+ * 1111 10.. 0xf8 <= x <= 0xfb : 5 bytes
+ * 1111 110. 0xfc <= x <= 0xfd : 6 bytes
+ * 1111 111. 0xfe <= x <= 0xff : invalid sequence
+ */
+ switch (*p) {
+ case 0x00 ... 0x7f:
+ *c = *p;
+ return UTF8_CODE_OK | 1;
+
+ case 0x80 ... 0xbf:
+ *c = *p;
+ return UTF8_CODE_BADSEQ | 1;
+
+ case 0xc0 ... 0xdf:
+ if (len < 2) {
+ *c = *p;
+ return UTF8_CODE_BADSEQ | 1;
+ }
+ *c = *p & 0x1f;
+ dec = 1;
+ break;
+
+ case 0xe0 ... 0xef:
+ if (len < 3) {
+ *c = *p;
+ return UTF8_CODE_BADSEQ | 1;
+ }
+ *c = *p & 0x0f;
+ dec = 2;
+ break;
+
+ case 0xf0 ... 0xf7:
+ if (len < 4) {
+ *c = *p;
+ return UTF8_CODE_BADSEQ | 1;
+ }
+ *c = *p & 0x07;
+ dec = 3;
+ break;
+
+ case 0xf8 ... 0xfb:
+ if (len < 5) {
+ *c = *p;
+ return UTF8_CODE_BADSEQ | 1;
+ }
+ *c = *p & 0x03;
+ dec = 4;
+ break;
+
+ case 0xfc ... 0xfd:
+ if (len < 6) {
+ *c = *p;
+ return UTF8_CODE_BADSEQ | 1;
+ }
+ *c = *p & 0x01;
+ dec = 5;
+ break;
+
+ case 0xfe ... 0xff:
+ default:
+ *c = *p;
+ return UTF8_CODE_BADSEQ | 1;
+ }
+
+ p++;
+
+ while (dec > 0) {
+
+ /* need 0x10 for the 2 first bits */
+ if ( ( *p & 0xc0 ) != 0x80 )
+ return UTF8_CODE_BADSEQ | ((p-(unsigned char *)s)&0xffff);
+
+ /* add data at char */
+ *c = ( *c << 6 ) | ( *p & 0x3f );
+
+ dec--;
+ p++;
+ }
+
+ /* Check ovelong encoding.
+ * 1 byte : 5 + 6 : 11 : 0x80 ... 0x7ff
+ * 2 bytes : 4 + 6 + 6 : 16 : 0x800 ... 0xffff
+ * 3 bytes : 3 + 6 + 6 + 6 : 21 : 0x10000 ... 0x1fffff
+ */
+ if (( *c <= 0x7f && (p-(unsigned char *)s) > 1) ||
+ (*c >= 0x80 && *c <= 0x7ff && (p-(unsigned char *)s) > 2) ||
+ (*c >= 0x800 && *c <= 0xffff && (p-(unsigned char *)s) > 3) ||
+ (*c >= 0x10000 && *c <= 0x1fffff && (p-(unsigned char *)s) > 4))
+ code |= UTF8_CODE_OVERLONG;
+
+ /* Check invalid UTF8 range. */
+ if ((*c >= 0xd800 && *c <= 0xdfff) ||
+ (*c >= 0xfffe && *c <= 0xffff))
+ code |= UTF8_CODE_INVRANGE;
+
+ return code | ((p-(unsigned char *)s)&0x0f);
+}
+
+/* append a copy of string <str> (in a wordlist) at the end of the list <li>
+ * On failure : return 0 and <err> filled with an error message.
+ * The caller is responsible for freeing the <err> and <str> copy
+ * memory area using free()
+ */
+int list_append_word(struct list *li, const char *str, char **err)
+{
+ struct wordlist *wl;
+
+ wl = calloc(1, sizeof(*wl));
+ if (!wl) {
+ memprintf(err, "out of memory");
+ goto fail_wl;
+ }
+
+ wl->s = strdup(str);
+ if (!wl->s) {
+ memprintf(err, "out of memory");
+ goto fail_wl_s;
+ }
+
+ LIST_APPEND(li, &wl->list);
+
+ return 1;
+
+fail_wl_s:
+ free(wl->s);
+fail_wl:
+ free(wl);
+ return 0;
+}
+
+/* indicates if a memory location may safely be read or not. The trick consists
+ * in performing a harmless syscall using this location as an input and letting
+ * the operating system report whether it's OK or not. For this we have the
+ * stat() syscall, which will return EFAULT when the memory location supposed
+ * to contain the file name is not readable. If it is readable it will then
+ * either return 0 if the area contains an existing file name, or -1 with
+ * another code. This must not be abused, and some audit systems might detect
+ * this as abnormal activity. It's used only for unsafe dumps.
+ */
+int may_access(const void *ptr)
+{
+ struct stat buf;
+
+ if (stat(ptr, &buf) == 0)
+ return 1;
+ if (errno == EFAULT)
+ return 0;
+ return 1;
+}
+
+/* print a string of text buffer to <out>. The format is :
+ * Non-printable chars \t, \n, \r and \e are * encoded in C format.
+ * Other non-printable chars are encoded "\xHH". Space, '\', and '=' are also escaped.
+ * Print stopped if null char or <bsize> is reached, or if no more place in the chunk.
+ */
+int dump_text(struct buffer *out, const char *buf, int bsize)
+{
+ unsigned char c;
+ size_t ptr = 0;
+
+ while (ptr < bsize && buf[ptr]) {
+ c = buf[ptr];
+ if (isprint((unsigned char)c) && isascii((unsigned char)c) && c != '\\' && c != ' ' && c != '=') {
+ if (out->data > out->size - 1)
+ break;
+ out->area[out->data++] = c;
+ }
+ else if (c == '\t' || c == '\n' || c == '\r' || c == '\e' || c == '\\' || c == ' ' || c == '=') {
+ if (out->data > out->size - 2)
+ break;
+ out->area[out->data++] = '\\';
+ switch (c) {
+ case ' ': c = ' '; break;
+ case '\t': c = 't'; break;
+ case '\n': c = 'n'; break;
+ case '\r': c = 'r'; break;
+ case '\e': c = 'e'; break;
+ case '\\': c = '\\'; break;
+ case '=': c = '='; break;
+ }
+ out->area[out->data++] = c;
+ }
+ else {
+ if (out->data > out->size - 4)
+ break;
+ out->area[out->data++] = '\\';
+ out->area[out->data++] = 'x';
+ out->area[out->data++] = hextab[(c >> 4) & 0xF];
+ out->area[out->data++] = hextab[c & 0xF];
+ }
+ ptr++;
+ }
+
+ return ptr;
+}
+
+/* print a buffer in hexa.
+ * Print stopped if <bsize> is reached, or if no more place in the chunk.
+ */
+int dump_binary(struct buffer *out, const char *buf, int bsize)
+{
+ unsigned char c;
+ int ptr = 0;
+
+ while (ptr < bsize) {
+ c = buf[ptr];
+
+ if (out->data > out->size - 2)
+ break;
+ out->area[out->data++] = hextab[(c >> 4) & 0xF];
+ out->area[out->data++] = hextab[c & 0xF];
+
+ ptr++;
+ }
+ return ptr;
+}
+
+/* Appends into buffer <out> a hex dump of memory area <buf> for <len> bytes,
+ * prepending each line with prefix <pfx>. The output is *not* initialized.
+ * The output will not wrap pas the buffer's end so it is more optimal if the
+ * caller makes sure the buffer is aligned first. A trailing zero will always
+ * be appended (and not counted) if there is room for it. The caller must make
+ * sure that the area is dumpable first. If <unsafe> is non-null, the memory
+ * locations are checked first for being readable.
+ */
+void dump_hex(struct buffer *out, const char *pfx, const void *buf, int len, int unsafe)
+{
+ const unsigned char *d = buf;
+ int i, j, start;
+
+ d = (const unsigned char *)(((unsigned long)buf) & -16);
+ start = ((unsigned long)buf) & 15;
+
+ for (i = 0; i < start + len; i += 16) {
+ chunk_appendf(out, (sizeof(void *) == 4) ? "%s%8p: " : "%s%16p: ", pfx, d + i);
+
+ // 0: unchecked, 1: checked safe, 2: danger
+ unsafe = !!unsafe;
+ if (unsafe && !may_access(d + i))
+ unsafe = 2;
+
+ for (j = 0; j < 16; j++) {
+ if ((i + j < start) || (i + j >= start + len))
+ chunk_strcat(out, "'' ");
+ else if (unsafe > 1)
+ chunk_strcat(out, "** ");
+ else
+ chunk_appendf(out, "%02x ", d[i + j]);
+
+ if (j == 7)
+ chunk_strcat(out, "- ");
+ }
+ chunk_strcat(out, " ");
+ for (j = 0; j < 16; j++) {
+ if ((i + j < start) || (i + j >= start + len))
+ chunk_strcat(out, "'");
+ else if (unsafe > 1)
+ chunk_strcat(out, "*");
+ else if (isprint((unsigned char)d[i + j]))
+ chunk_appendf(out, "%c", d[i + j]);
+ else
+ chunk_strcat(out, ".");
+ }
+ chunk_strcat(out, "\n");
+ }
+}
+
+/* dumps <pfx> followed by <n> bytes from <addr> in hex form into buffer <buf>
+ * enclosed in brackets after the address itself, formatted on 14 chars
+ * including the "0x" prefix. This is meant to be used as a prefix for code
+ * areas. For example:
+ * "0x7f10b6557690 [48 c7 c0 0f 00 00 00 0f]"
+ * It relies on may_access() to know if the bytes are dumpable, otherwise "--"
+ * is emitted. A NULL <pfx> will be considered empty.
+ */
+void dump_addr_and_bytes(struct buffer *buf, const char *pfx, const void *addr, int n)
+{
+ int ok = 0;
+ int i;
+
+ chunk_appendf(buf, "%s%#14lx [", pfx ? pfx : "", (long)addr);
+
+ for (i = 0; i < n; i++) {
+ if (i == 0 || (((long)(addr + i) ^ (long)(addr)) & 4096))
+ ok = may_access(addr + i);
+ if (ok)
+ chunk_appendf(buf, "%02x%s", ((uint8_t*)addr)[i], (i<n-1) ? " " : "]");
+ else
+ chunk_appendf(buf, "--%s", (i<n-1) ? " " : "]");
+ }
+}
+
+/* print a line of text buffer (limited to 70 bytes) to <out>. The format is :
+ * <2 spaces> <offset=5 digits> <space or plus> <space> <70 chars max> <\n>
+ * which is 60 chars per line. Non-printable chars \t, \n, \r and \e are
+ * encoded in C format. Other non-printable chars are encoded "\xHH". Original
+ * lines are respected within the limit of 70 output chars. Lines that are
+ * continuation of a previous truncated line begin with "+" instead of " "
+ * after the offset. The new pointer is returned.
+ */
+int dump_text_line(struct buffer *out, const char *buf, int bsize, int len,
+ int *line, int ptr)
+{
+ int end;
+ unsigned char c;
+
+ end = out->data + 80;
+ if (end > out->size)
+ return ptr;
+
+ chunk_appendf(out, " %05d%c ", ptr, (ptr == *line) ? ' ' : '+');
+
+ while (ptr < len && ptr < bsize) {
+ c = buf[ptr];
+ if (isprint((unsigned char)c) && isascii((unsigned char)c) && c != '\\') {
+ if (out->data > end - 2)
+ break;
+ out->area[out->data++] = c;
+ } else if (c == '\t' || c == '\n' || c == '\r' || c == '\e' || c == '\\') {
+ if (out->data > end - 3)
+ break;
+ out->area[out->data++] = '\\';
+ switch (c) {
+ case '\t': c = 't'; break;
+ case '\n': c = 'n'; break;
+ case '\r': c = 'r'; break;
+ case '\e': c = 'e'; break;
+ case '\\': c = '\\'; break;
+ }
+ out->area[out->data++] = c;
+ } else {
+ if (out->data > end - 5)
+ break;
+ out->area[out->data++] = '\\';
+ out->area[out->data++] = 'x';
+ out->area[out->data++] = hextab[(c >> 4) & 0xF];
+ out->area[out->data++] = hextab[c & 0xF];
+ }
+ if (buf[ptr++] == '\n') {
+ /* we had a line break, let's return now */
+ out->area[out->data++] = '\n';
+ *line = ptr;
+ return ptr;
+ }
+ }
+ /* we have an incomplete line, we return it as-is */
+ out->area[out->data++] = '\n';
+ return ptr;
+}
+
+/* displays a <len> long memory block at <buf>, assuming first byte of <buf>
+ * has address <baseaddr>. String <pfx> may be placed as a prefix in front of
+ * each line. It may be NULL if unused. The output is emitted to file <out>.
+ */
+void debug_hexdump(FILE *out, const char *pfx, const char *buf,
+ unsigned int baseaddr, int len)
+{
+ unsigned int i;
+ int b, j;
+
+ for (i = 0; i < (len + (baseaddr & 15)); i += 16) {
+ b = i - (baseaddr & 15);
+ fprintf(out, "%s%08x: ", pfx ? pfx : "", i + (baseaddr & ~15));
+ for (j = 0; j < 8; j++) {
+ if (b + j >= 0 && b + j < len)
+ fprintf(out, "%02x ", (unsigned char)buf[b + j]);
+ else
+ fprintf(out, " ");
+ }
+
+ if (b + j >= 0 && b + j < len)
+ fputc('-', out);
+ else
+ fputc(' ', out);
+
+ for (j = 8; j < 16; j++) {
+ if (b + j >= 0 && b + j < len)
+ fprintf(out, " %02x", (unsigned char)buf[b + j]);
+ else
+ fprintf(out, " ");
+ }
+
+ fprintf(out, " ");
+ for (j = 0; j < 16; j++) {
+ if (b + j >= 0 && b + j < len) {
+ if (isprint((unsigned char)buf[b + j]))
+ fputc((unsigned char)buf[b + j], out);
+ else
+ fputc('.', out);
+ }
+ else
+ fputc(' ', out);
+ }
+ fputc('\n', out);
+ }
+}
+
+/* Tries to report the executable path name on platforms supporting this. If
+ * not found or not possible, returns NULL.
+ */
+const char *get_exec_path()
+{
+ const char *ret = NULL;
+
+#if defined(__linux__) && defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 16))
+ long execfn = getauxval(AT_EXECFN);
+
+ if (execfn && execfn != ENOENT)
+ ret = (const char *)execfn;
+#elif defined(__FreeBSD__)
+ Elf_Auxinfo *auxv;
+ for (auxv = __elf_aux_vector; auxv->a_type != AT_NULL; ++auxv) {
+ if (auxv->a_type == AT_EXECPATH) {
+ ret = (const char *)auxv->a_un.a_ptr;
+ break;
+ }
+ }
+#elif defined(__NetBSD__)
+ AuxInfo *auxv;
+ for (auxv = _dlauxinfo(); auxv->a_type != AT_NULL; ++auxv) {
+ if (auxv->a_type == AT_SUN_EXECNAME) {
+ ret = (const char *)auxv->a_v;
+ break;
+ }
+ }
+#elif defined(__sun)
+ ret = getexecname();
+#endif
+ return ret;
+}
+
+#if (defined(__ELF__) && !defined(__linux__)) || defined(USE_DL)
+/* calls dladdr() or dladdr1() on <addr> and <dli>. If dladdr1 is available,
+ * also returns the symbol size in <size>, otherwise returns 0 there.
+ */
+static int dladdr_and_size(const void *addr, Dl_info *dli, size_t *size)
+{
+ int ret;
+#if defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 3)) // most detailed one
+ const ElfW(Sym) *sym __attribute__((may_alias));
+
+ ret = dladdr1(addr, dli, (void **)&sym, RTLD_DL_SYMENT);
+ if (ret)
+ *size = sym ? sym->st_size : 0;
+#else
+#if defined(__sun)
+ ret = dladdr((void *)addr, dli);
+#else
+ ret = dladdr(addr, dli);
+#endif
+ *size = 0;
+#endif
+ return ret;
+}
+
+/* Sets build_is_static to true if we detect a static build. Some older glibcs
+ * tend to crash inside dlsym() in static builds, but tests show that at least
+ * dladdr() still works (and will fail to resolve anything of course). Thus we
+ * try to determine if we're on a static build to avoid calling dlsym() in this
+ * case.
+ */
+void check_if_static_build()
+{
+ Dl_info dli = { };
+ size_t size = 0;
+
+ /* Now let's try to be smarter */
+ if (!dladdr_and_size(&main, &dli, &size))
+ build_is_static = 1;
+ else
+ build_is_static = 0;
+}
+
+INITCALL0(STG_PREPARE, check_if_static_build);
+
+/* Tries to retrieve the address of the first occurrence symbol <name>.
+ * Note that NULL in return is not always an error as a symbol may have that
+ * address in special situations.
+ */
+void *get_sym_curr_addr(const char *name)
+{
+ void *ptr = NULL;
+
+#ifdef RTLD_DEFAULT
+ if (!build_is_static)
+ ptr = dlsym(RTLD_DEFAULT, name);
+#endif
+ return ptr;
+}
+
+
+/* Tries to retrieve the address of the next occurrence of symbol <name>
+ * Note that NULL in return is not always an error as a symbol may have that
+ * address in special situations.
+ */
+void *get_sym_next_addr(const char *name)
+{
+ void *ptr = NULL;
+
+#ifdef RTLD_NEXT
+ if (!build_is_static)
+ ptr = dlsym(RTLD_NEXT, name);
+#endif
+ return ptr;
+}
+
+#else /* elf & linux & dl */
+
+/* no possible resolving on other platforms at the moment */
+void *get_sym_curr_addr(const char *name)
+{
+ return NULL;
+}
+
+void *get_sym_next_addr(const char *name)
+{
+ return NULL;
+}
+
+#endif /* elf & linux & dl */
+
+/* Tries to append to buffer <buf> some indications about the symbol at address
+ * <addr> using the following form:
+ * lib:+0xoffset (unresolvable address from lib's base)
+ * main+0xoffset (unresolvable address from main (+/-))
+ * lib:main+0xoffset (unresolvable lib address from main (+/-))
+ * name (resolved exact exec address)
+ * lib:name (resolved exact lib address)
+ * name+0xoffset/0xsize (resolved address within exec symbol)
+ * lib:name+0xoffset/0xsize (resolved address within lib symbol)
+ *
+ * The file name (lib or executable) is limited to what lies between the last
+ * '/' and the first following '.'. An optional prefix <pfx> is prepended before
+ * the output if not null. The file is not dumped when it's the same as the one
+ * that contains the "main" symbol, or when __ELF__ && USE_DL are not set.
+ *
+ * The symbol's base address is returned, or NULL when unresolved, in order to
+ * allow the caller to match it against known ones.
+ */
+const void *resolve_sym_name(struct buffer *buf, const char *pfx, const void *addr)
+{
+ const struct {
+ const void *func;
+ const char *name;
+ } fcts[] = {
+ { .func = process_stream, .name = "process_stream" },
+ { .func = task_run_applet, .name = "task_run_applet" },
+ { .func = sc_conn_io_cb, .name = "sc_conn_io_cb" },
+ { .func = sock_conn_iocb, .name = "sock_conn_iocb" },
+ { .func = dgram_fd_handler, .name = "dgram_fd_handler" },
+ { .func = listener_accept, .name = "listener_accept" },
+ { .func = manage_global_listener_queue, .name = "manage_global_listener_queue" },
+ { .func = poller_pipe_io_handler, .name = "poller_pipe_io_handler" },
+ { .func = mworker_accept_wrapper, .name = "mworker_accept_wrapper" },
+ { .func = session_expire_embryonic, .name = "session_expire_embryonic" },
+#ifdef USE_THREAD
+ { .func = accept_queue_process, .name = "accept_queue_process" },
+#endif
+#ifdef USE_LUA
+ { .func = hlua_process_task, .name = "hlua_process_task" },
+#endif
+#ifdef SSL_MODE_ASYNC
+ { .func = ssl_async_fd_free, .name = "ssl_async_fd_free" },
+ { .func = ssl_async_fd_handler, .name = "ssl_async_fd_handler" },
+#endif
+ };
+
+#if (defined(__ELF__) && !defined(__linux__)) || defined(USE_DL)
+ Dl_info dli, dli_main;
+ size_t size;
+ const char *fname, *p;
+#endif
+ int i;
+
+ if (pfx)
+ chunk_appendf(buf, "%s", pfx);
+
+ for (i = 0; i < sizeof(fcts) / sizeof(fcts[0]); i++) {
+ if (addr == fcts[i].func) {
+ chunk_appendf(buf, "%s", fcts[i].name);
+ return addr;
+ }
+ }
+
+#if (defined(__ELF__) && !defined(__linux__)) || defined(USE_DL)
+ /* Now let's try to be smarter */
+ if (!dladdr_and_size(addr, &dli, &size))
+ goto unknown;
+
+ /* 1. prefix the library name if it's not the same object as the one
+ * that contains the main function. The name is picked between last '/'
+ * and first following '.'.
+ */
+ if (!dladdr(main, &dli_main))
+ dli_main.dli_fbase = NULL;
+
+ if (dli_main.dli_fbase != dli.dli_fbase) {
+ fname = dli.dli_fname;
+ p = strrchr(fname, '/');
+ if (p++)
+ fname = p;
+ p = strchr(fname, '.');
+ if (!p)
+ p = fname + strlen(fname);
+
+ chunk_appendf(buf, "%.*s:", (int)(long)(p - fname), fname);
+ }
+
+ /* 2. symbol name */
+ if (dli.dli_sname) {
+ /* known, dump it and return symbol's address (exact or relative) */
+ chunk_appendf(buf, "%s", dli.dli_sname);
+ if (addr != dli.dli_saddr) {
+ chunk_appendf(buf, "+%#lx", (long)(addr - dli.dli_saddr));
+ if (size)
+ chunk_appendf(buf, "/%#lx", (long)size);
+ }
+ return dli.dli_saddr;
+ }
+ else if (dli_main.dli_fbase != dli.dli_fbase) {
+ /* unresolved symbol from a known library, report relative offset */
+ chunk_appendf(buf, "+%#lx", (long)(addr - dli.dli_fbase));
+ return NULL;
+ }
+#endif /* __ELF__ && !__linux__ || USE_DL */
+ unknown:
+ /* unresolved symbol from the main file, report relative offset to main */
+ if ((void*)addr < (void*)main)
+ chunk_appendf(buf, "main-%#lx", (long)((void*)main - addr));
+ else
+ chunk_appendf(buf, "main+%#lx", (long)(addr - (void*)main));
+ return NULL;
+}
+
+/* On systems where this is supported, let's provide a possibility to enumerate
+ * the list of object files. The output is appended to a buffer initialized by
+ * the caller, with one name per line. A trailing zero is always emitted if data
+ * are written. Only real objects are dumped (executable and .so libs). The
+ * function returns non-zero if it dumps anything. These functions do not make
+ * use of the trash so that it is possible for the caller to call them with the
+ * trash on input. The output format may be platform-specific but at least one
+ * version must emit raw object file names when argument is zero.
+ */
+#if defined(HA_HAVE_DUMP_LIBS)
+# if defined(HA_HAVE_DL_ITERATE_PHDR)
+/* the private <data> we pass below is a dump context initialized like this */
+struct dl_dump_ctx {
+ struct buffer *buf;
+ int with_addr;
+};
+
+static int dl_dump_libs_cb(struct dl_phdr_info *info, size_t size, void *data)
+{
+ struct dl_dump_ctx *ctx = data;
+ const char *fname;
+ size_t p1, p2, beg, end;
+ int idx;
+
+ if (!info || !info->dlpi_name)
+ goto leave;
+
+ if (!*info->dlpi_name)
+ fname = get_exec_path();
+ else if (strchr(info->dlpi_name, '/'))
+ fname = info->dlpi_name;
+ else
+ /* else it's a VDSO or similar and we're not interested */
+ goto leave;
+
+ if (!ctx->with_addr)
+ goto dump_name;
+
+ /* virtual addresses are relative to the load address and are per
+ * pseudo-header, so we have to scan them all to find the furthest
+ * one from the beginning. In this case we only dump entries if
+ * they have at least one section.
+ */
+ beg = ~0; end = 0;
+ for (idx = 0; idx < info->dlpi_phnum; idx++) {
+ if (!info->dlpi_phdr[idx].p_memsz)
+ continue;
+ p1 = info->dlpi_phdr[idx].p_vaddr;
+ if (p1 < beg)
+ beg = p1;
+ p2 = p1 + info->dlpi_phdr[idx].p_memsz - 1;
+ if (p2 > end)
+ end = p2;
+ }
+
+ if (!idx)
+ goto leave;
+
+ chunk_appendf(ctx->buf, "0x%012llx-0x%012llx (0x%07llx) ",
+ (ullong)info->dlpi_addr + beg,
+ (ullong)info->dlpi_addr + end,
+ (ullong)(end - beg + 1));
+ dump_name:
+ chunk_appendf(ctx->buf, "%s\n", fname);
+ leave:
+ return 0;
+}
+
+/* dumps lib names and optionally address ranges */
+int dump_libs(struct buffer *output, int with_addr)
+{
+ struct dl_dump_ctx ctx = { .buf = output, .with_addr = with_addr };
+ size_t old_data = output->data;
+
+ dl_iterate_phdr(dl_dump_libs_cb, &ctx);
+ return output->data != old_data;
+}
+# else // no DL_ITERATE_PHDR
+# error "No dump_libs() function for this platform"
+# endif
+#else // no HA_HAVE_DUMP_LIBS
+
+/* unsupported platform: do not dump anything */
+int dump_libs(struct buffer *output, int with_addr)
+{
+ return 0;
+}
+
+#endif // HA_HAVE_DUMP_LIBS
+
+/*
+ * Allocate an array of unsigned int with <nums> as address from <str> string
+ * made of integer separated by dot characters.
+ *
+ * First, initializes the value with <sz> as address to 0 and initializes the
+ * array with <nums> as address to NULL. Then allocates the array with <nums> as
+ * address updating <sz> pointed value to the size of this array.
+ *
+ * Returns 1 if succeeded, 0 if not.
+ */
+int parse_dotted_uints(const char *str, unsigned int **nums, size_t *sz)
+{
+ unsigned int *n;
+ const char *s, *end;
+
+ s = str;
+ *sz = 0;
+ end = str + strlen(str);
+ *nums = n = NULL;
+
+ while (1) {
+ unsigned int r;
+
+ if (s >= end)
+ break;
+
+ r = read_uint(&s, end);
+ /* Expected characters after having read an uint: '\0' or '.',
+ * if '.', must not be terminal.
+ */
+ if (*s != '\0'&& (*s++ != '.' || s == end)) {
+ free(n);
+ return 0;
+ }
+
+ n = my_realloc2(n, (*sz + 1) * sizeof *n);
+ if (!n)
+ return 0;
+
+ n[(*sz)++] = r;
+ }
+ *nums = n;
+
+ return 1;
+}
+
+
+/* returns the number of bytes needed to encode <v> as a varint. An inline
+ * version exists for use with constants (__varint_bytes()).
+ */
+int varint_bytes(uint64_t v)
+{
+ int len = 1;
+
+ if (v >= 240) {
+ v = (v - 240) >> 4;
+ while (1) {
+ len++;
+ if (v < 128)
+ break;
+ v = (v - 128) >> 7;
+ }
+ }
+ return len;
+}
+
+
+/* Random number generator state, see below */
+static uint64_t ha_random_state[2] ALIGNED(2*sizeof(uint64_t));
+
+/* This is a thread-safe implementation of xoroshiro128** described below:
+ * http://prng.di.unimi.it/
+ * It features a 2^128 long sequence, returns 64 high-quality bits on each call,
+ * supports fast jumps and passes all common quality tests. It is thread-safe,
+ * uses a double-cas on 64-bit architectures supporting it, and falls back to a
+ * local lock on other ones.
+ */
+uint64_t ha_random64()
+{
+ uint64_t old[2] ALIGNED(2*sizeof(uint64_t));
+ uint64_t new[2] ALIGNED(2*sizeof(uint64_t));
+
+#if defined(USE_THREAD) && (!defined(HA_CAS_IS_8B) || !defined(HA_HAVE_CAS_DW))
+ static HA_SPINLOCK_T rand_lock;
+
+ HA_SPIN_LOCK(OTHER_LOCK, &rand_lock);
+#endif
+
+ old[0] = ha_random_state[0];
+ old[1] = ha_random_state[1];
+
+#if defined(USE_THREAD) && defined(HA_CAS_IS_8B) && defined(HA_HAVE_CAS_DW)
+ do {
+#endif
+ new[1] = old[0] ^ old[1];
+ new[0] = rotl64(old[0], 24) ^ new[1] ^ (new[1] << 16); // a, b
+ new[1] = rotl64(new[1], 37); // c
+
+#if defined(USE_THREAD) && defined(HA_CAS_IS_8B) && defined(HA_HAVE_CAS_DW)
+ } while (unlikely(!_HA_ATOMIC_DWCAS(ha_random_state, old, new)));
+#else
+ ha_random_state[0] = new[0];
+ ha_random_state[1] = new[1];
+#if defined(USE_THREAD)
+ HA_SPIN_UNLOCK(OTHER_LOCK, &rand_lock);
+#endif
+#endif
+ return rotl64(old[0] * 5, 7) * 9;
+}
+
+/* seeds the random state using up to <len> bytes from <seed>, starting with
+ * the first non-zero byte.
+ */
+void ha_random_seed(const unsigned char *seed, size_t len)
+{
+ size_t pos;
+
+ /* the seed must not be all zeroes, so we pre-fill it with alternating
+ * bits and overwrite part of them with the block starting at the first
+ * non-zero byte from the seed.
+ */
+ memset(ha_random_state, 0x55, sizeof(ha_random_state));
+
+ for (pos = 0; pos < len; pos++)
+ if (seed[pos] != 0)
+ break;
+
+ if (pos == len)
+ return;
+
+ seed += pos;
+ len -= pos;
+
+ if (len > sizeof(ha_random_state))
+ len = sizeof(ha_random_state);
+
+ memcpy(ha_random_state, seed, len);
+}
+
+/* This causes a jump to (dist * 2^96) places in the pseudo-random sequence,
+ * and is equivalent to calling ha_random64() as many times. It is used to
+ * provide non-overlapping sequences of 2^96 numbers (~7*10^28) to up to 2^32
+ * different generators (i.e. different processes after a fork). The <dist>
+ * argument is the distance to jump to and is used in a loop so it rather not
+ * be too large if the processing time is a concern.
+ *
+ * BEWARE: this function is NOT thread-safe and must not be called during
+ * concurrent accesses to ha_random64().
+ */
+void ha_random_jump96(uint32_t dist)
+{
+ while (dist--) {
+ uint64_t s0 = 0;
+ uint64_t s1 = 0;
+ int b;
+
+ for (b = 0; b < 64; b++) {
+ if ((0xd2a98b26625eee7bULL >> b) & 1) {
+ s0 ^= ha_random_state[0];
+ s1 ^= ha_random_state[1];
+ }
+ ha_random64();
+ }
+
+ for (b = 0; b < 64; b++) {
+ if ((0xdddf9b1090aa7ac1ULL >> b) & 1) {
+ s0 ^= ha_random_state[0];
+ s1 ^= ha_random_state[1];
+ }
+ ha_random64();
+ }
+ ha_random_state[0] = s0;
+ ha_random_state[1] = s1;
+ }
+}
+
+/* Generates an RFC4122 UUID into chunk <output> which must be at least 37
+ * bytes large.
+ */
+void ha_generate_uuid(struct buffer *output)
+{
+ uint32_t rnd[4];
+ uint64_t last;
+
+ last = ha_random64();
+ rnd[0] = last;
+ rnd[1] = last >> 32;
+
+ last = ha_random64();
+ rnd[2] = last;
+ rnd[3] = last >> 32;
+
+ chunk_printf(output, "%8.8x-%4.4x-%4.4x-%4.4x-%12.12llx",
+ rnd[0],
+ rnd[1] & 0xFFFF,
+ ((rnd[1] >> 16u) & 0xFFF) | 0x4000, // highest 4 bits indicate the uuid version
+ (rnd[2] & 0x3FFF) | 0x8000, // the highest 2 bits indicate the UUID variant (10),
+ (long long)((rnd[2] >> 14u) | ((uint64_t) rnd[3] << 18u)) & 0xFFFFFFFFFFFFull);
+}
+
+
+/* only used by parse_line() below. It supports writing in place provided that
+ * <in> is updated to the next location before calling it. In that case, the
+ * char at <in> may be overwritten.
+ */
+#define EMIT_CHAR(x) \
+ do { \
+ char __c = (char)(x); \
+ if ((opts & PARSE_OPT_INPLACE) && out+outpos > in) \
+ err |= PARSE_ERR_OVERLAP; \
+ if (outpos >= outmax) \
+ err |= PARSE_ERR_TOOLARGE; \
+ if (!err) \
+ out[outpos] = __c; \
+ outpos++; \
+ } while (0)
+
+/* Parse <in>, copy it into <out> split into isolated words whose pointers
+ * are put in <args>. If more than <outlen> bytes have to be emitted, the
+ * extraneous ones are not emitted but <outlen> is updated so that the caller
+ * knows how much to realloc. Similarly, <args> are not updated beyond <nbargs>
+ * but the returned <nbargs> indicates how many were found. All trailing args
+ * up to <nbargs> point to the trailing zero, and as long as <nbargs> is > 0,
+ * it is guaranteed that at least one arg will point to the zero. It is safe
+ * to call it with a NULL <args> if <nbargs> is 0.
+ *
+ * <out> may overlap with <in> provided that it never goes further, in which
+ * case the parser will accept to perform in-place parsing and unquoting/
+ * unescaping but only if environment variables do not lead to expansion that
+ * causes overlapping, otherwise the input string being destroyed, the error
+ * will not be recoverable. Note that even during out-of-place <in> will
+ * experience temporary modifications in-place for variable resolution and must
+ * be writable, and will also receive zeroes to delimit words when using
+ * in-place copy. Parsing options <opts> taken from PARSE_OPT_*. Return value
+ * is zero on success otherwise a bitwise-or of PARSE_ERR_*. Upon error, the
+ * starting point of the first invalid character sequence or unmatched
+ * quote/brace is reported in <errptr> if not NULL. When using in-place parsing
+ * error reporting might be difficult since zeroes will have been inserted into
+ * the string. One solution for the caller may consist in replacing all args
+ * delimiters with spaces in this case.
+ */
+uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbargs, uint32_t opts, const char **errptr)
+{
+ char *quote = NULL;
+ char *brace = NULL;
+ char *word_expand = NULL;
+ unsigned char hex1, hex2;
+ size_t outmax = *outlen;
+ int argsmax = *nbargs - 1;
+ size_t outpos = 0;
+ int squote = 0;
+ int dquote = 0;
+ int arg = 0;
+ uint32_t err = 0;
+
+ *nbargs = 0;
+ *outlen = 0;
+
+ /* argsmax may be -1 here, protecting args[] from any write */
+ if (arg < argsmax)
+ args[arg] = out;
+
+ while (1) {
+ if (*in >= '-' && *in != '\\') {
+ /* speedup: directly send all regular chars starting
+ * with '-', '.', '/', alnum etc...
+ */
+ EMIT_CHAR(*in++);
+ continue;
+ }
+ else if (*in == '\0' || *in == '\n' || *in == '\r') {
+ /* end of line */
+ break;
+ }
+ else if (*in == '#' && (opts & PARSE_OPT_SHARP) && !squote && !dquote) {
+ /* comment */
+ break;
+ }
+ else if (*in == '"' && !squote && (opts & PARSE_OPT_DQUOTE)) { /* double quote outside single quotes */
+ if (dquote) {
+ dquote = 0;
+ quote = NULL;
+ }
+ else {
+ dquote = 1;
+ quote = in;
+ }
+ in++;
+ continue;
+ }
+ else if (*in == '\'' && !dquote && (opts & PARSE_OPT_SQUOTE)) { /* single quote outside double quotes */
+ if (squote) {
+ squote = 0;
+ quote = NULL;
+ }
+ else {
+ squote = 1;
+ quote = in;
+ }
+ in++;
+ continue;
+ }
+ else if (*in == '\\' && !squote && (opts & PARSE_OPT_BKSLASH)) {
+ /* first, we'll replace \\, \<space>, \#, \r, \n, \t, \xXX with their
+ * C equivalent value but only when they have a special meaning and within
+ * double quotes for some of them. Other combinations left unchanged (eg: \1).
+ */
+ char tosend = *in;
+
+ switch (in[1]) {
+ case ' ':
+ case '\\':
+ tosend = in[1];
+ in++;
+ break;
+
+ case 't':
+ tosend = '\t';
+ in++;
+ break;
+
+ case 'n':
+ tosend = '\n';
+ in++;
+ break;
+
+ case 'r':
+ tosend = '\r';
+ in++;
+ break;
+
+ case '#':
+ /* escaping of "#" only if comments are supported */
+ if (opts & PARSE_OPT_SHARP)
+ in++;
+ tosend = *in;
+ break;
+
+ case '\'':
+ /* escaping of "'" only outside single quotes and only if single quotes are supported */
+ if (opts & PARSE_OPT_SQUOTE && !squote)
+ in++;
+ tosend = *in;
+ break;
+
+ case '"':
+ /* escaping of '"' only outside single quotes and only if double quotes are supported */
+ if (opts & PARSE_OPT_DQUOTE && !squote)
+ in++;
+ tosend = *in;
+ break;
+
+ case '$':
+ /* escaping of '$' only inside double quotes and only if env supported */
+ if (opts & PARSE_OPT_ENV && dquote)
+ in++;
+ tosend = *in;
+ break;
+
+ case 'x':
+ if (!ishex(in[2]) || !ishex(in[3])) {
+ /* invalid or incomplete hex sequence */
+ err |= PARSE_ERR_HEX;
+ if (errptr)
+ *errptr = in;
+ goto leave;
+ }
+ hex1 = toupper((unsigned char)in[2]) - '0';
+ hex2 = toupper((unsigned char)in[3]) - '0';
+ if (hex1 > 9) hex1 -= 'A' - '9' - 1;
+ if (hex2 > 9) hex2 -= 'A' - '9' - 1;
+ tosend = (hex1 << 4) + hex2;
+ in += 3;
+ break;
+
+ default:
+ /* other combinations are not escape sequences */
+ break;
+ }
+
+ in++;
+ EMIT_CHAR(tosend);
+ }
+ else if (isspace((unsigned char)*in) && !squote && !dquote) {
+ /* a non-escaped space is an argument separator */
+ while (isspace((unsigned char)*in))
+ in++;
+ EMIT_CHAR(0);
+ arg++;
+ if (arg < argsmax)
+ args[arg] = out + outpos;
+ else
+ err |= PARSE_ERR_TOOMANY;
+ }
+ else if (*in == '$' && (opts & PARSE_OPT_ENV) && (dquote || !(opts & PARSE_OPT_DQUOTE))) {
+ /* environment variables are evaluated anywhere, or only
+ * inside double quotes if they are supported.
+ */
+ char *var_name;
+ char save_char;
+ const char *value;
+
+ in++;
+
+ if (*in == '{')
+ brace = in++;
+
+ if (!isalpha((unsigned char)*in) && *in != '_' && *in != '.') {
+ /* unacceptable character in variable name */
+ err |= PARSE_ERR_VARNAME;
+ if (errptr)
+ *errptr = in;
+ goto leave;
+ }
+
+ var_name = in;
+ if (*in == '.')
+ in++;
+ while (isalnum((unsigned char)*in) || *in == '_')
+ in++;
+
+ save_char = *in;
+ *in = '\0';
+ if (unlikely(*var_name == '.')) {
+ /* internal pseudo-variables */
+ if (strcmp(var_name, ".LINE") == 0)
+ value = ultoa(global.cfg_curr_line);
+ else if (strcmp(var_name, ".FILE") == 0)
+ value = global.cfg_curr_file;
+ else if (strcmp(var_name, ".SECTION") == 0)
+ value = global.cfg_curr_section;
+ else {
+ /* unsupported internal variable name */
+ err |= PARSE_ERR_VARNAME;
+ if (errptr)
+ *errptr = var_name;
+ goto leave;
+ }
+ } else {
+ value = getenv(var_name);
+ }
+ *in = save_char;
+
+ /* support for '[*]' sequence to force word expansion,
+ * only available inside braces */
+ if (*in == '[' && brace && (opts & PARSE_OPT_WORD_EXPAND)) {
+ word_expand = in++;
+
+ if (*in++ != '*' || *in++ != ']') {
+ err |= PARSE_ERR_WRONG_EXPAND;
+ if (errptr)
+ *errptr = word_expand;
+ goto leave;
+ }
+ }
+
+ if (brace) {
+ if (*in == '-') {
+ /* default value starts just after the '-' */
+ if (!value)
+ value = in + 1;
+
+ while (*in && *in != '}')
+ in++;
+ if (!*in)
+ goto no_brace;
+ *in = 0; // terminate the default value
+ }
+ else if (*in != '}') {
+ no_brace:
+ /* unmatched brace */
+ err |= PARSE_ERR_BRACE;
+ if (errptr)
+ *errptr = brace;
+ goto leave;
+ }
+
+ /* brace found, skip it */
+ in++;
+ brace = NULL;
+ }
+
+ if (value) {
+ while (*value) {
+ /* expand as individual parameters on a space character */
+ if (word_expand && isspace((unsigned char)*value)) {
+ EMIT_CHAR(0);
+ ++arg;
+ if (arg < argsmax)
+ args[arg] = out + outpos;
+ else
+ err |= PARSE_ERR_TOOMANY;
+
+ /* skip consecutive spaces */
+ while (isspace((unsigned char)*++value))
+ ;
+ } else {
+ EMIT_CHAR(*value++);
+ }
+ }
+ }
+ word_expand = NULL;
+ }
+ else {
+ /* any other regular char */
+ EMIT_CHAR(*in++);
+ }
+ }
+
+ /* end of output string */
+ EMIT_CHAR(0);
+
+ /* Don't add an empty arg after trailing spaces. Note that args[arg]
+ * may contain some distances relative to NULL if <out> was NULL, or
+ * pointers beyond the end of <out> in case <outlen> is too short, thus
+ * we must not dereference it.
+ */
+ if (arg < argsmax && args[arg] != out + outpos - 1)
+ arg++;
+
+ if (quote) {
+ /* unmatched quote */
+ err |= PARSE_ERR_QUOTE;
+ if (errptr)
+ *errptr = quote;
+ goto leave;
+ }
+ leave:
+ *nbargs = arg;
+ *outlen = outpos;
+
+ /* empty all trailing args by making them point to the trailing zero,
+ * at least the last one in any case.
+ */
+ if (arg > argsmax)
+ arg = argsmax;
+
+ while (arg >= 0 && arg <= argsmax)
+ args[arg++] = out + outpos - 1;
+
+ return err;
+}
+#undef EMIT_CHAR
+
+/* This is used to sanitize an input line that's about to be used for error reporting.
+ * It will adjust <line> to print approximately <width> chars around <pos>, trying to
+ * preserve the beginning, with leading or trailing "..." when the line is truncated.
+ * If non-printable chars are present in the output. It returns the new offset <pos>
+ * in the modified line. Non-printable characters are replaced with '?'. <width> must
+ * be at least 6 to support two "..." otherwise the result is undefined. The line
+ * itself must have at least 7 chars allocated for the same reason.
+ */
+size_t sanitize_for_printing(char *line, size_t pos, size_t width)
+{
+ size_t shift = 0;
+ char *out = line;
+ char *in = line;
+ char *end = line + width;
+
+ if (pos >= width) {
+ /* if we have to shift, we'll be out of context, so let's
+ * try to put <pos> at the center of width.
+ */
+ shift = pos - width / 2;
+ in += shift + 3;
+ end = out + width - 3;
+ out[0] = out[1] = out[2] = '.';
+ out += 3;
+ }
+
+ while (out < end && *in) {
+ if (isspace((unsigned char)*in))
+ *out++ = ' ';
+ else if (isprint((unsigned char)*in))
+ *out++ = *in;
+ else
+ *out++ = '?';
+ in++;
+ }
+
+ if (end < line + width) {
+ out[0] = out[1] = out[2] = '.';
+ out += 3;
+ }
+
+ *out++ = 0;
+ return pos - shift;
+}
+
+/* Update array <fp> with the fingerprint of word <word> by counting the
+ * transitions between characters. <fp> is a 1024-entries array indexed as
+ * 32*from+to. Positions for 'from' and 'to' are:
+ * 1..26=letter, 27=digit, 28=other/begin/end.
+ * Row "from=0" is used to mark the character's presence. Others unused.
+ */
+void update_word_fingerprint(uint8_t *fp, const char *word)
+{
+ const char *p;
+ int from, to;
+ int c;
+
+ from = 28; // begin
+ for (p = word; *p; p++) {
+ c = tolower(*p);
+ switch(c) {
+ case 'a'...'z': to = c - 'a' + 1; break;
+ case 'A'...'Z': to = tolower(c) - 'a' + 1; break;
+ case '0'...'9': to = 27; break;
+ default: to = 28; break;
+ }
+ fp[to] = 1;
+ fp[32 * from + to]++;
+ from = to;
+ }
+ to = 28; // end
+ fp[32 * from + to]++;
+}
+
+/* Initialize array <fp> with the fingerprint of word <word> by counting the
+ * transitions between characters. <fp> is a 1024-entries array indexed as
+ * 32*from+to. Positions for 'from' and 'to' are:
+ * 0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused.
+ */
+void make_word_fingerprint(uint8_t *fp, const char *word)
+{
+ memset(fp, 0, 1024);
+ update_word_fingerprint(fp, word);
+}
+
+/* Return the distance between two word fingerprints created by function
+ * make_word_fingerprint(). It's a positive integer calculated as the sum of
+ * the differences between each location.
+ */
+int word_fingerprint_distance(const uint8_t *fp1, const uint8_t *fp2)
+{
+ int i, k, dist = 0;
+
+ for (i = 0; i < 1024; i++) {
+ k = (int)fp1[i] - (int)fp2[i];
+ dist += abs(k);
+ }
+ return dist;
+}
+
+/*
+ * This function compares the loaded openssl version with a string <version>
+ * This function use the same return code as compare_current_version:
+ *
+ * -1 : the version in argument is older than the current openssl version
+ * 0 : the version in argument is the same as the current openssl version
+ * 1 : the version in argument is newer than the current openssl version
+ *
+ * Or some errors:
+ * -2 : openssl is not available on this process
+ * -3 : the version in argument is not parsable
+ */
+int openssl_compare_current_version(const char *version)
+{
+#ifdef USE_OPENSSL
+ int numversion;
+
+ numversion = openssl_version_parser(version);
+ if (numversion == 0)
+ return -3;
+
+ if (numversion < OPENSSL_VERSION_NUMBER)
+ return -1;
+ else if (numversion > OPENSSL_VERSION_NUMBER)
+ return 1;
+ else
+ return 0;
+#else
+ return -2;
+#endif
+}
+
+/*
+ * This function compares the loaded openssl name with a string <name>
+ * This function returns 0 if the OpenSSL name starts like the passed parameter,
+ * 1 otherwise.
+ */
+int openssl_compare_current_name(const char *name)
+{
+#ifdef USE_OPENSSL
+ int name_len = 0;
+ const char *openssl_version = OpenSSL_version(OPENSSL_VERSION);
+
+ if (name) {
+ name_len = strlen(name);
+ if (strlen(name) <= strlen(openssl_version))
+ return strncmp(openssl_version, name, name_len);
+ }
+#endif
+ return 1;
+}
+
+static int init_tools_per_thread()
+{
+ /* Let's make each thread start from a different position */
+ statistical_prng_state += tid * MAX_THREADS;
+ if (!statistical_prng_state)
+ statistical_prng_state++;
+ return 1;
+}
+REGISTER_PER_THREAD_INIT(init_tools_per_thread);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/trace.c b/src/trace.c
new file mode 100644
index 0000000..9e0785c
--- /dev/null
+++ b/src/trace.c
@@ -0,0 +1,761 @@
+/*
+ * Runtime tracing API
+ *
+ * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <import/ist.h>
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/sink.h>
+#include <haproxy/trace.h>
+
+struct list trace_sources = LIST_HEAD_INIT(trace_sources);
+THREAD_LOCAL struct buffer trace_buf = { };
+
+/* allocates the trace buffers. Returns 0 in case of failure. It is safe to
+ * call to call this function multiple times if the size changes.
+ */
+static int alloc_trace_buffers_per_thread()
+{
+ chunk_init(&trace_buf, my_realloc2(trace_buf.area, global.tune.bufsize), global.tune.bufsize);
+ return !!trace_buf.area;
+}
+
+static void free_trace_buffers_per_thread()
+{
+ chunk_destroy(&trace_buf);
+}
+
+REGISTER_PER_THREAD_ALLOC(alloc_trace_buffers_per_thread);
+REGISTER_PER_THREAD_FREE(free_trace_buffers_per_thread);
+
+/* pick the lowest non-null argument with a non-null arg_def mask */
+static inline const void *trace_pick_arg(uint32_t arg_def, const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ if (arg_def & 0x0000FFFF) {
+ if ((arg_def & 0x000000FF) && a1)
+ return a1;
+ if ((arg_def & 0x0000FF00) && a2)
+ return a2;
+ }
+
+ if (arg_def & 0xFFFF0000) {
+ if ((arg_def & 0x00FF0000) && a3)
+ return a3;
+ if ((arg_def & 0xFF000000) && a4)
+ return a4;
+ }
+
+ return NULL;
+}
+
+/* Reports whether the trace is enabled for the specified arguments, needs to enable
+ * or disable tracking. It gets the same API as __trace() except for <cb> and <msg>
+ * which are not used and were dropped, and plockptr which is an optional pointer to
+ * the lockptr to be updated (or NULL) for tracking. The function returns:
+ * 0 if the trace is not enabled for the module or these values
+ * <0 if the trace matches some locking criteria but don't have the proper level.
+ * In this case the interested caller might have to consider disabling tracking.
+ * >0 if the trace is enabled for the given criteria.
+ * In all cases, <plockptr> will only be set if non-null and if a locking criterion
+ * matched. It will be up to the caller to enable tracking if desired. A casual
+ * tester not interested in adjusting tracking (i.e. calling the function before
+ * deciding so prepare a buffer to be dumped) will only need to pass 0 for plockptr
+ * and check if the result is >0.
+ */
+int __trace_enabled(enum trace_level level, uint64_t mask, struct trace_source *src,
+ const struct ist where, const char *func,
+ const void *a1, const void *a2, const void *a3, const void *a4,
+ const void **plockptr)
+{
+ const struct listener *li = NULL;
+ const struct proxy *fe = NULL;
+ const struct proxy *be = NULL;
+ const struct server *srv = NULL;
+ const struct session *sess = NULL;
+ const struct stream *strm = NULL;
+ const struct connection *conn = NULL;
+ const struct check *check = NULL;
+#ifdef USE_QUIC
+ const struct quic_conn *qc = NULL;
+#endif
+ const void *lockon_ptr = NULL;
+
+ if (likely(src->state == TRACE_STATE_STOPPED))
+ return 0;
+
+ /* check that at least one action is interested by this event */
+ if (((src->report_events | src->start_events | src->pause_events | src->stop_events) & mask) == 0)
+ return 0;
+
+ /* retrieve available information from the caller's arguments */
+ if (src->arg_def & TRC_ARGS_CONN)
+ conn = trace_pick_arg(src->arg_def & TRC_ARGS_CONN, a1, a2, a3, a4);
+
+ if (src->arg_def & TRC_ARGS_SESS)
+ sess = trace_pick_arg(src->arg_def & TRC_ARGS_SESS, a1, a2, a3, a4);
+
+ if (src->arg_def & TRC_ARGS_STRM)
+ strm = trace_pick_arg(src->arg_def & TRC_ARGS_STRM, a1, a2, a3, a4);
+
+ if (src->arg_def & TRC_ARGS_CHK)
+ check = trace_pick_arg(src->arg_def & TRC_ARGS_CHK, a1, a2, a3, a4);
+
+#ifdef USE_QUIC
+ if (src->arg_def & TRC_ARGS_QCON)
+ qc = trace_pick_arg(src->arg_def & TRC_ARGS_QCON, a1, a2, a3, a4);
+#endif
+
+ if (!sess && strm)
+ sess = strm->sess;
+ else if (!sess && conn && LIST_INLIST(&conn->session_list))
+ sess = conn->owner;
+ else if (!sess && check)
+ sess = check->sess;
+
+ if (sess) {
+ fe = sess->fe;
+ li = sess->listener;
+ }
+
+ if (!li && conn)
+ li = objt_listener(conn->target);
+
+ if (li && !fe)
+ fe = li->bind_conf->frontend;
+
+ if (strm) {
+ be = strm->be;
+ srv = strm->srv_conn;
+ }
+ if (check) {
+ srv = check->server;
+ be = (srv ? srv->proxy : NULL);
+ }
+
+ if (!srv && conn)
+ srv = objt_server(conn->target);
+
+ if (srv && !be)
+ be = srv->proxy;
+
+ if (!be && conn)
+ be = objt_proxy(conn->target);
+
+ /* TODO: add handling of filters here, return if no match (not even update states) */
+
+ /* check if we need to start the trace now */
+ if (src->state == TRACE_STATE_WAITING) {
+ if ((src->start_events & mask) == 0)
+ return 0;
+
+ /* TODO: add update of lockon+lockon_ptr here */
+ HA_ATOMIC_STORE(&src->state, TRACE_STATE_RUNNING);
+ }
+
+ /* we may want to lock on a particular object */
+ if (src->lockon != TRACE_LOCKON_NOTHING) {
+ switch (src->lockon) {
+ case TRACE_LOCKON_BACKEND: lockon_ptr = be; break;
+ case TRACE_LOCKON_CONNECTION: lockon_ptr = conn; break;
+ case TRACE_LOCKON_FRONTEND: lockon_ptr = fe; break;
+ case TRACE_LOCKON_LISTENER: lockon_ptr = li; break;
+ case TRACE_LOCKON_SERVER: lockon_ptr = srv; break;
+ case TRACE_LOCKON_SESSION: lockon_ptr = sess; break;
+ case TRACE_LOCKON_STREAM: lockon_ptr = strm; break;
+ case TRACE_LOCKON_CHECK: lockon_ptr = check; break;
+ case TRACE_LOCKON_THREAD: lockon_ptr = ti; break;
+#ifdef USE_QUIC
+ case TRACE_LOCKON_QCON: lockon_ptr = qc; break;
+#endif
+ case TRACE_LOCKON_ARG1: lockon_ptr = a1; break;
+ case TRACE_LOCKON_ARG2: lockon_ptr = a2; break;
+ case TRACE_LOCKON_ARG3: lockon_ptr = a3; break;
+ case TRACE_LOCKON_ARG4: lockon_ptr = a4; break;
+ default: break; // silence stupid gcc -Wswitch
+ }
+
+ if (src->lockon_ptr && src->lockon_ptr != lockon_ptr)
+ return 0;
+
+ if (*plockptr && !src->lockon_ptr && lockon_ptr && src->state == TRACE_STATE_RUNNING)
+ *plockptr = lockon_ptr;
+ }
+
+ /* here the trace is running and is tracking a desired item */
+ if ((src->report_events & mask) == 0 || level > src->level) {
+ /* tracking did match, and might have to be disabled */
+ return -1;
+ }
+
+ /* OK trace still enabled */
+ return 1;
+}
+
+/* write a message for the given trace source */
+void __trace(enum trace_level level, uint64_t mask, struct trace_source *src,
+ const struct ist where, const char *func,
+ const void *a1, const void *a2, const void *a3, const void *a4,
+ void (*cb)(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4),
+ const struct ist msg)
+{
+ const void *lockon_ptr;
+ struct ist ist_func = ist(func);
+ char tnum[4];
+ struct ist line[12];
+ int words = 0;
+ int ret;
+
+ lockon_ptr = NULL;
+ ret = __trace_enabled(level, mask, src, where, func, a1, a2, a3, a4, &lockon_ptr);
+ if (lockon_ptr)
+ HA_ATOMIC_STORE(&src->lockon_ptr, lockon_ptr);
+
+ if (ret <= 0) {
+ if (ret < 0) // may have to disable tracking
+ goto end;
+ return;
+ }
+
+ /* log the logging location truncated to 10 chars from the right so that
+ * the line number and the end of the file name are there.
+ */
+ line[words++] = ist("[");
+ tnum[0] = '0' + tid / 10;
+ tnum[1] = '0' + tid % 10;
+ tnum[2] = '|';
+ tnum[3] = 0;
+ line[words++] = ist(tnum);
+ line[words++] = src->name;
+ line[words++] = ist("|");
+ line[words++] = ist2("012345" + level, 1); // "0" to "5"
+ line[words++] = ist("|");
+ line[words] = where;
+ if (line[words].len > 13) {
+ line[words].ptr += (line[words].len - 13);
+ line[words].len = 13;
+ }
+ words++;
+ line[words++] = ist("] ");
+
+ if (isttest(ist_func)) {
+ line[words++] = ist_func;
+ line[words++] = ist("(): ");
+ }
+
+ if (!cb)
+ cb = src->default_cb;
+
+ if (cb && src->verbosity) {
+ /* decode function passed, we want to pre-fill the
+ * buffer with the message and let the decode function
+ * do its job, possibly even overwriting it.
+ */
+ b_reset(&trace_buf);
+ b_istput(&trace_buf, msg);
+ cb(level, mask, src, where, ist_func, a1, a2, a3, a4);
+ line[words] = ist2(trace_buf.area, trace_buf.data);
+ words++;
+ }
+ else {
+ /* Note that here we could decide to print some args whose type
+ * is known, when verbosity is above the quiet level, and even
+ * to print the name and values of those which are declared for
+ * lock-on.
+ */
+ line[words++] = msg;
+ }
+
+ if (src->sink)
+ sink_write(src->sink, line, words, 0, 0, NULL);
+
+ end:
+ /* check if we need to stop the trace now */
+ if ((src->stop_events & mask) != 0) {
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ HA_ATOMIC_STORE(&src->state, TRACE_STATE_STOPPED);
+ }
+ else if ((src->pause_events & mask) != 0) {
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ HA_ATOMIC_STORE(&src->state, TRACE_STATE_WAITING);
+ }
+}
+
+/* this callback may be used when no output modification is desired */
+void trace_no_cb(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ /* do nothing */
+}
+
+/* registers trace source <source>. Modifies the list element!
+ * The {start,pause,stop,report} events are not changed so the source may
+ * preset them.
+ */
+void trace_register_source(struct trace_source *source)
+{
+ source->lockon = TRACE_LOCKON_NOTHING;
+ source->level = TRACE_LEVEL_USER;
+ source->verbosity = 1;
+ source->sink = NULL;
+ source->state = TRACE_STATE_STOPPED;
+ source->lockon_ptr = NULL;
+ LIST_APPEND(&trace_sources, &source->source_link);
+}
+
+struct trace_source *trace_find_source(const char *name)
+{
+ struct trace_source *src;
+ const struct ist iname = ist(name);
+
+ list_for_each_entry(src, &trace_sources, source_link)
+ if (isteq(src->name, iname))
+ return src;
+ return NULL;
+}
+
+const struct trace_event *trace_find_event(const struct trace_event *ev, const char *name)
+{
+ for (; ev && ev->mask; ev++)
+ if (strcmp(ev->name, name) == 0)
+ return ev;
+ return NULL;
+}
+
+/* parse the command, returns 1 if a message is returned, otherwise zero */
+static int cli_parse_trace(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct trace_source *src;
+ uint64_t *ev_ptr = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ if (!*args[1]) {
+ /* no arg => report the list of supported sources as a warning */
+ chunk_printf(&trash,
+ "Supported trace sources and states (.=stopped, w=waiting, R=running) :\n"
+ " [.] 0 : not a source, will immediately stop all traces\n"
+ );
+
+ list_for_each_entry(src, &trace_sources, source_link)
+ chunk_appendf(&trash, " [%c] %-10s : %s\n", trace_state_char(src->state), src->name.ptr, src->desc);
+
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_WARNING, trash.area);
+ }
+
+ if (strcmp(args[1], "0") == 0) {
+ /* emergency stop of all traces */
+ list_for_each_entry(src, &trace_sources, source_link)
+ HA_ATOMIC_STORE(&src->state, TRACE_STATE_STOPPED);
+ return cli_msg(appctx, LOG_NOTICE, "All traces now stopped");
+ }
+
+ src = trace_find_source(args[1]);
+ if (!src)
+ return cli_err(appctx, "No such trace source");
+
+ if (!*args[2]) {
+ return cli_msg(appctx, LOG_WARNING,
+ "Supported commands:\n"
+ " event : list/enable/disable source-specific event reporting\n"
+ //" filter : list/enable/disable generic filters\n"
+ " level : list/set trace reporting level\n"
+ " lock : automatic lock on thread/connection/stream/...\n"
+ " pause : pause and automatically restart after a specific event\n"
+ " sink : list/set event sinks\n"
+ " start : start immediately or after a specific event\n"
+ " stop : stop immediately or after a specific event\n"
+ " verbosity : list/set trace output verbosity\n"
+ );
+ }
+ else if ((strcmp(args[2], "event") == 0 && (ev_ptr = &src->report_events)) ||
+ (strcmp(args[2], "pause") == 0 && (ev_ptr = &src->pause_events)) ||
+ (strcmp(args[2], "start") == 0 && (ev_ptr = &src->start_events)) ||
+ (strcmp(args[2], "stop") == 0 && (ev_ptr = &src->stop_events))) {
+ const struct trace_event *ev;
+ const char *name = args[3];
+ int neg = 0;
+ int i;
+
+ /* skip prefix '!', '-', '+' and remind negation */
+ while (*name) {
+ if (*name == '!' || *name == '-')
+ neg = 1;
+ else if (*name == '+')
+ neg = 0;
+ else
+ break;
+ name++;
+ }
+
+ if (!*name) {
+ chunk_printf(&trash, "Supported events for source %s (+=enabled, -=disabled):\n", src->name.ptr);
+ if (ev_ptr != &src->report_events)
+ chunk_appendf(&trash, " - now : don't wait for events, immediately change the state\n");
+ chunk_appendf(&trash, " - none : disable all event types\n");
+ chunk_appendf(&trash, " - any : enable all event types\n");
+ for (i = 0; src->known_events && src->known_events[i].mask; i++) {
+ chunk_appendf(&trash, " %c %-12s : %s\n",
+ trace_event_char(*ev_ptr, src->known_events[i].mask),
+ src->known_events[i].name, src->known_events[i].desc);
+ }
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_WARNING, trash.area);
+ }
+
+ if (strcmp(name, "now") == 0 && ev_ptr != &src->report_events) {
+ HA_ATOMIC_STORE(ev_ptr, 0);
+ if (ev_ptr == &src->pause_events) {
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ HA_ATOMIC_STORE(&src->state, TRACE_STATE_WAITING);
+ }
+ else if (ev_ptr == &src->start_events) {
+ HA_ATOMIC_STORE(&src->state, TRACE_STATE_RUNNING);
+ }
+ else if (ev_ptr == &src->stop_events) {
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ HA_ATOMIC_STORE(&src->state, TRACE_STATE_STOPPED);
+ }
+ return 0;
+ }
+
+ if (strcmp(name, "none") == 0)
+ HA_ATOMIC_STORE(ev_ptr, 0);
+ else if (strcmp(name, "any") == 0)
+ HA_ATOMIC_STORE(ev_ptr, ~0);
+ else {
+ ev = trace_find_event(src->known_events, name);
+ if (!ev)
+ return cli_err(appctx, "No such trace event");
+
+ if (!neg)
+ HA_ATOMIC_OR(ev_ptr, ev->mask);
+ else
+ HA_ATOMIC_AND(ev_ptr, ~ev->mask);
+ }
+ }
+ else if (strcmp(args[2], "sink") == 0) {
+ const char *name = args[3];
+ struct sink *sink;
+
+ if (!*name) {
+ chunk_printf(&trash, "Supported sinks for source %s (*=current):\n", src->name.ptr);
+ chunk_appendf(&trash, " %c none : no sink\n", src->sink ? ' ' : '*');
+ list_for_each_entry(sink, &sink_list, sink_list) {
+ chunk_appendf(&trash, " %c %-10s : %s\n",
+ src->sink == sink ? '*' : ' ',
+ sink->name, sink->desc);
+ }
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_WARNING, trash.area);
+ }
+
+ if (strcmp(name, "none") == 0)
+ sink = NULL;
+ else {
+ sink = sink_find(name);
+ if (!sink)
+ return cli_err(appctx, "No such sink");
+ }
+
+ HA_ATOMIC_STORE(&src->sink, sink);
+ }
+ else if (strcmp(args[2], "level") == 0) {
+ const char *name = args[3];
+
+ if (!*name) {
+ chunk_printf(&trash, "Supported trace levels for source %s:\n", src->name.ptr);
+ chunk_appendf(&trash, " %c error : report errors\n",
+ src->level == TRACE_LEVEL_ERROR ? '*' : ' ');
+ chunk_appendf(&trash, " %c user : also information useful to the end user\n",
+ src->level == TRACE_LEVEL_USER ? '*' : ' ');
+ chunk_appendf(&trash, " %c proto : also protocol-level updates\n",
+ src->level == TRACE_LEVEL_PROTO ? '*' : ' ');
+ chunk_appendf(&trash, " %c state : also report internal state changes\n",
+ src->level == TRACE_LEVEL_STATE ? '*' : ' ');
+ chunk_appendf(&trash, " %c data : also report data transfers\n",
+ src->level == TRACE_LEVEL_DATA ? '*' : ' ');
+ chunk_appendf(&trash, " %c developer : also report information useful only to the developer\n",
+ src->level == TRACE_LEVEL_DEVELOPER ? '*' : ' ');
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_WARNING, trash.area);
+ }
+
+ if (strcmp(name, "error") == 0)
+ HA_ATOMIC_STORE(&src->level, TRACE_LEVEL_ERROR);
+ else if (strcmp(name, "user") == 0)
+ HA_ATOMIC_STORE(&src->level, TRACE_LEVEL_USER);
+ else if (strcmp(name, "proto") == 0)
+ HA_ATOMIC_STORE(&src->level, TRACE_LEVEL_PROTO);
+ else if (strcmp(name, "state") == 0)
+ HA_ATOMIC_STORE(&src->level, TRACE_LEVEL_STATE);
+ else if (strcmp(name, "data") == 0)
+ HA_ATOMIC_STORE(&src->level, TRACE_LEVEL_DATA);
+ else if (strcmp(name, "developer") == 0)
+ HA_ATOMIC_STORE(&src->level, TRACE_LEVEL_DEVELOPER);
+ else
+ return cli_err(appctx, "No such trace level");
+ }
+ else if (strcmp(args[2], "lock") == 0) {
+ const char *name = args[3];
+
+ if (!*name) {
+ chunk_printf(&trash, "Supported lock-on criteria for source %s:\n", src->name.ptr);
+ if (src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_STRM))
+ chunk_appendf(&trash, " %c backend : lock on the backend that started the trace\n",
+ src->lockon == TRACE_LOCKON_BACKEND ? '*' : ' ');
+
+ if (src->arg_def & TRC_ARGS_CHK)
+ chunk_appendf(&trash, " %c check : lock on the check that started the trace\n",
+ src->lockon == TRACE_LOCKON_CHECK ? '*' : ' ');
+
+ if (src->arg_def & TRC_ARGS_CONN)
+ chunk_appendf(&trash, " %c connection : lock on the connection that started the trace\n",
+ src->lockon == TRACE_LOCKON_CONNECTION ? '*' : ' ');
+
+ if (src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_SESS|TRC_ARGS_STRM))
+ chunk_appendf(&trash, " %c frontend : lock on the frontend that started the trace\n",
+ src->lockon == TRACE_LOCKON_FRONTEND ? '*' : ' ');
+
+ if (src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_SESS|TRC_ARGS_STRM))
+ chunk_appendf(&trash, " %c listener : lock on the listener that started the trace\n",
+ src->lockon == TRACE_LOCKON_LISTENER ? '*' : ' ');
+
+ chunk_appendf(&trash, " %c nothing : do not lock on anything\n",
+ src->lockon == TRACE_LOCKON_NOTHING ? '*' : ' ');
+
+ if (src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_STRM))
+ chunk_appendf(&trash, " %c server : lock on the server that started the trace\n",
+ src->lockon == TRACE_LOCKON_SERVER ? '*' : ' ');
+
+ if (src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_SESS|TRC_ARGS_STRM))
+ chunk_appendf(&trash, " %c session : lock on the session that started the trace\n",
+ src->lockon == TRACE_LOCKON_SESSION ? '*' : ' ');
+
+ if (src->arg_def & TRC_ARGS_STRM)
+ chunk_appendf(&trash, " %c stream : lock on the stream that started the trace\n",
+ src->lockon == TRACE_LOCKON_STREAM ? '*' : ' ');
+
+ chunk_appendf(&trash, " %c thread : lock on the thread that started the trace\n",
+ src->lockon == TRACE_LOCKON_THREAD ? '*' : ' ');
+
+ if (src->lockon_args && src->lockon_args[0].name)
+ chunk_appendf(&trash, " %c %-10s : %s\n",
+ src->lockon == TRACE_LOCKON_ARG1 ? '*' : ' ',
+ src->lockon_args[0].name, src->lockon_args[0].desc);
+
+ if (src->lockon_args && src->lockon_args[1].name)
+ chunk_appendf(&trash, " %c %-10s : %s\n",
+ src->lockon == TRACE_LOCKON_ARG2 ? '*' : ' ',
+ src->lockon_args[1].name, src->lockon_args[1].desc);
+
+ if (src->lockon_args && src->lockon_args[2].name)
+ chunk_appendf(&trash, " %c %-10s : %s\n",
+ src->lockon == TRACE_LOCKON_ARG3 ? '*' : ' ',
+ src->lockon_args[2].name, src->lockon_args[2].desc);
+
+ if (src->lockon_args && src->lockon_args[3].name)
+ chunk_appendf(&trash, " %c %-10s : %s\n",
+ src->lockon == TRACE_LOCKON_ARG4 ? '*' : ' ',
+ src->lockon_args[3].name, src->lockon_args[3].desc);
+
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_WARNING, trash.area);
+ }
+ else if ((src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_STRM)) && strcmp(name, "backend") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_BACKEND);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if ((src->arg_def & TRC_ARGS_CHK) && strcmp(name, "check") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_CHECK);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if ((src->arg_def & TRC_ARGS_CONN) && strcmp(name, "connection") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_CONNECTION);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if ((src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_SESS|TRC_ARGS_STRM)) && strcmp(name, "frontend") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_FRONTEND);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if ((src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_SESS|TRC_ARGS_STRM)) && strcmp(name, "listener") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_LISTENER);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if (strcmp(name, "nothing") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_NOTHING);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if ((src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_STRM)) && strcmp(name, "server") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_SERVER);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if ((src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_SESS|TRC_ARGS_STRM)) && strcmp(name, "session") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_SESSION);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if ((src->arg_def & TRC_ARGS_STRM) && strcmp(name, "stream") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_STREAM);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if (strcmp(name, "thread") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_THREAD);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if (src->lockon_args && src->lockon_args[0].name && strcmp(name, src->lockon_args[0].name) == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_ARG1);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if (src->lockon_args && src->lockon_args[1].name && strcmp(name, src->lockon_args[1].name) == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_ARG2);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if (src->lockon_args && src->lockon_args[2].name && strcmp(name, src->lockon_args[2].name) == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_ARG3);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if (src->lockon_args && src->lockon_args[3].name && strcmp(name, src->lockon_args[3].name) == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_ARG4);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else
+ return cli_err(appctx, "Unsupported lock-on criterion");
+ }
+ else if (strcmp(args[2], "verbosity") == 0) {
+ const char *name = args[3];
+ const struct name_desc *nd;
+
+ if (!*name) {
+ chunk_printf(&trash, "Supported trace verbosities for source %s:\n", src->name.ptr);
+ chunk_appendf(&trash, " %c quiet : only report basic information with no decoding\n",
+ src->verbosity == 0 ? '*' : ' ');
+ if (!src->decoding || !src->decoding[0].name) {
+ chunk_appendf(&trash, " %c default : report extra information when available\n",
+ src->verbosity > 0 ? '*' : ' ');
+ } else {
+ for (nd = src->decoding; nd->name && nd->desc; nd++)
+ chunk_appendf(&trash, " %c %-10s : %s\n",
+ nd == (src->decoding + src->verbosity - 1) ? '*' : ' ',
+ nd->name, nd->desc);
+ }
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_WARNING, trash.area);
+ }
+
+ if (strcmp(name, "quiet") == 0)
+ HA_ATOMIC_STORE(&src->verbosity, 0);
+ else if (!src->decoding || !src->decoding[0].name) {
+ if (strcmp(name, "default") == 0)
+ HA_ATOMIC_STORE(&src->verbosity, 1);
+ else
+ return cli_err(appctx, "No such verbosity level");
+ } else {
+ for (nd = src->decoding; nd->name && nd->desc; nd++)
+ if (strcmp(name, nd->name) == 0)
+ break;
+
+ if (!nd->name || !nd->desc)
+ return cli_err(appctx, "No such verbosity level");
+
+ HA_ATOMIC_STORE(&src->verbosity, (nd - src->decoding) + 1);
+ }
+ }
+ else
+ return cli_err(appctx, "Unknown trace keyword");
+
+ return 0;
+}
+
+/* parse the command, returns 1 if a message is returned, otherwise zero */
+static int cli_parse_show_trace(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct trace_source *src;
+ const struct sink *sink;
+ int i;
+
+ args++; // make args[1] the 1st arg
+
+ if (!*args[1]) {
+ /* no arg => report the list of supported sources */
+ chunk_printf(&trash,
+ "Supported trace sources and states (.=stopped, w=waiting, R=running) :\n"
+ );
+
+ list_for_each_entry(src, &trace_sources, source_link) {
+ sink = src->sink;
+ chunk_appendf(&trash, " [%c] %-10s -> %s [drp %u] [%s]\n",
+ trace_state_char(src->state), src->name.ptr,
+ sink ? sink->name : "none",
+ sink ? sink->ctx.dropped : 0,
+ src->desc);
+ }
+
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_INFO, trash.area);
+ }
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ src = trace_find_source(args[1]);
+ if (!src)
+ return cli_err(appctx, "No such trace source");
+
+ sink = src->sink;
+ chunk_printf(&trash, "Trace status for %s:\n", src->name.ptr);
+ chunk_appendf(&trash, " - sink: %s [%u dropped]\n",
+ sink ? sink->name : "none", sink ? sink->ctx.dropped : 0);
+
+ chunk_appendf(&trash, " - event name : report start stop pause\n");
+ for (i = 0; src->known_events && src->known_events[i].mask; i++) {
+ chunk_appendf(&trash, " %-12s : %c %c %c %c\n",
+ src->known_events[i].name,
+ trace_event_char(src->report_events, src->known_events[i].mask),
+ trace_event_char(src->start_events, src->known_events[i].mask),
+ trace_event_char(src->stop_events, src->known_events[i].mask),
+ trace_event_char(src->pause_events, src->known_events[i].mask));
+ }
+
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_WARNING, trash.area);
+}
+
+static struct cli_kw_list cli_kws = {{ },{
+ { { "trace", NULL }, "trace [<module>|0] [cmd [args...]] : manage live tracing (empty to list, 0 to stop all)", cli_parse_trace, NULL, NULL },
+ { { "show", "trace", NULL }, "show trace [<module>] : show live tracing state", cli_parse_show_trace, NULL, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/uri_auth.c b/src/uri_auth.c
new file mode 100644
index 0000000..db7e6c6
--- /dev/null
+++ b/src/uri_auth.c
@@ -0,0 +1,318 @@
+/*
+ * URI-based user authentication using the HTTP basic method.
+ *
+ * Copyright 2006-2007 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/base64.h>
+#include <haproxy/errors.h>
+#include <haproxy/list.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/uri_auth.h>
+
+
+/*
+ * Initializes a basic uri_auth structure header and returns a pointer to it.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_check_init_uri_auth(struct uri_auth **root)
+{
+ struct uri_auth *u;
+
+ if (!root || !*root) {
+ if ((u = calloc(1, sizeof (*u))) == NULL)
+ goto out_u;
+
+ LIST_INIT(&u->http_req_rules);
+ LIST_INIT(&u->admin_rules);
+ } else
+ u = *root;
+
+ if (!u->uri_prefix) {
+ u->uri_len = strlen(STATS_DEFAULT_URI);
+ if ((u->uri_prefix = strdup(STATS_DEFAULT_URI)) == NULL)
+ goto out_uri;
+ }
+
+ if (root && !*root)
+ *root = u;
+
+ return u;
+
+ out_uri:
+ if (!root || !*root)
+ free(u);
+ out_u:
+ return NULL;
+}
+
+/*
+ * Returns a default uri_auth with <uri> set as the uri_prefix.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_set_uri(struct uri_auth **root, char *uri)
+{
+ struct uri_auth *u;
+ char *uri_copy;
+ int uri_len;
+
+ uri_len = strlen(uri);
+ if ((uri_copy = strdup(uri)) == NULL)
+ goto out_uri;
+
+ if ((u = stats_check_init_uri_auth(root)) == NULL)
+ goto out_u;
+
+ free(u->uri_prefix);
+ u->uri_prefix = uri_copy;
+ u->uri_len = uri_len;
+ return u;
+
+ out_u:
+ free(uri_copy);
+ out_uri:
+ return NULL;
+}
+
+/*
+ * Returns a default uri_auth with <realm> set as the realm.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_set_realm(struct uri_auth **root, char *realm)
+{
+ struct uri_auth *u;
+ char *realm_copy;
+
+ if ((realm_copy = strdup(realm)) == NULL)
+ goto out_realm;
+
+ if ((u = stats_check_init_uri_auth(root)) == NULL)
+ goto out_u;
+
+ free(u->auth_realm);
+ u->auth_realm = realm_copy;
+ return u;
+
+ out_u:
+ free(realm_copy);
+ out_realm:
+ return NULL;
+}
+
+/*
+ * Returns a default uri_auth with STAT_SHNODE flag enabled and
+ * <node> set as the name if it is not empty.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_set_node(struct uri_auth **root, char *name)
+{
+ struct uri_auth *u;
+ char *node_copy = NULL;
+
+ if (name && *name) {
+ node_copy = strdup(name);
+ if (node_copy == NULL)
+ goto out_realm;
+ }
+
+ if ((u = stats_check_init_uri_auth(root)) == NULL)
+ goto out_u;
+
+ if (!stats_set_flag(root, STAT_SHNODE))
+ goto out_u;
+
+ if (node_copy) {
+ free(u->node);
+ u->node = node_copy;
+ }
+
+ return u;
+
+ out_u:
+ free(node_copy);
+ out_realm:
+ return NULL;
+}
+
+/*
+ * Returns a default uri_auth with STAT_SHDESC flag enabled and
+ * <description> set as the desc if it is not empty.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_set_desc(struct uri_auth **root, char *desc)
+{
+ struct uri_auth *u;
+ char *desc_copy = NULL;
+
+ if (desc && *desc) {
+ desc_copy = strdup(desc);
+ if (desc_copy == NULL)
+ goto out_realm;
+ }
+
+ if ((u = stats_check_init_uri_auth(root)) == NULL)
+ goto out_u;
+
+ if (!stats_set_flag(root, STAT_SHDESC))
+ goto out_u;
+
+ if (desc_copy) {
+ free(u->desc);
+ u->desc = desc_copy;
+ }
+
+ return u;
+
+ out_u:
+ free(desc_copy);
+ out_realm:
+ return NULL;
+}
+
+/*
+ * Returns a default uri_auth with the <refresh> refresh interval.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_set_refresh(struct uri_auth **root, int interval)
+{
+ struct uri_auth *u;
+
+ if ((u = stats_check_init_uri_auth(root)) != NULL)
+ u->refresh = interval;
+ return u;
+}
+
+/*
+ * Returns a default uri_auth with the <flag> set.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_set_flag(struct uri_auth **root, int flag)
+{
+ struct uri_auth *u;
+
+ if ((u = stats_check_init_uri_auth(root)) != NULL)
+ u->flags |= flag;
+ return u;
+}
+
+/*
+ * Returns a default uri_auth with a <user:passwd> entry added to the list of
+ * authorized users. If a matching entry is found, no update will be performed.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_add_auth(struct uri_auth **root, char *user)
+{
+ struct uri_auth *u;
+ struct auth_users *newuser;
+ char *pass;
+
+ pass = strchr(user, ':');
+ if (pass)
+ *pass++ = '\0';
+ else
+ pass = "";
+
+ if ((u = stats_check_init_uri_auth(root)) == NULL)
+ return NULL;
+
+ if (!u->userlist)
+ u->userlist = calloc(1, sizeof(*u->userlist));
+
+ if (!u->userlist)
+ return NULL;
+
+ if (!u->userlist->name)
+ u->userlist->name = strdup(".internal-stats-userlist");
+
+ if (!u->userlist->name)
+ return NULL;
+
+ for (newuser = u->userlist->users; newuser; newuser = newuser->next)
+ if (strcmp(newuser->user, user) == 0) {
+ ha_warning("uri auth: ignoring duplicated user '%s'.\n",
+ user);
+ return u;
+ }
+
+ newuser = calloc(1, sizeof(*newuser));
+ if (!newuser)
+ return NULL;
+
+ newuser->user = strdup(user);
+ if (!newuser->user) {
+ free(newuser);
+ return NULL;
+ }
+
+ newuser->pass = strdup(pass);
+ if (!newuser->pass) {
+ free(newuser->user);
+ free(newuser);
+ return NULL;
+ }
+
+ newuser->flags |= AU_O_INSECURE;
+ newuser->next = u->userlist->users;
+ u->userlist->users = newuser;
+
+ return u;
+}
+
+/*
+ * Returns a default uri_auth with a <scope> entry added to the list of
+ * allowed scopes. If a matching entry is found, no update will be performed.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_add_scope(struct uri_auth **root, char *scope)
+{
+ struct uri_auth *u;
+ char *new_name;
+ struct stat_scope *old_scope, **scope_list;
+
+ if ((u = stats_check_init_uri_auth(root)) == NULL)
+ goto out;
+
+ scope_list = &u->scope;
+ while ((old_scope = *scope_list)) {
+ if (strcmp(old_scope->px_id, scope) == 0)
+ break;
+ scope_list = &old_scope->next;
+ }
+
+ if (!old_scope) {
+ if ((new_name = strdup(scope)) == NULL)
+ goto out_u;
+
+ if ((old_scope = calloc(1, sizeof(*old_scope))) == NULL)
+ goto out_name;
+
+ old_scope->px_id = new_name;
+ old_scope->px_len = strlen(new_name);
+ *scope_list = old_scope;
+ }
+ return u;
+
+ out_name:
+ free(new_name);
+ out_u:
+ free(u);
+ out:
+ return NULL;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/uri_normalizer.c b/src/uri_normalizer.c
new file mode 100644
index 0000000..bc793f2
--- /dev/null
+++ b/src/uri_normalizer.c
@@ -0,0 +1,467 @@
+/*
+ * HTTP request URI normalization.
+ *
+ * Copyright 2021 Tim Duesterhus <tim@bastelstu.be>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/ist.h>
+
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/chunk.h>
+#include <haproxy/tools.h>
+#include <haproxy/uri_normalizer.h>
+
+/* Encodes '#' as '%23'. */
+enum uri_normalizer_err uri_normalizer_fragment_encode(const struct ist input, struct ist *dst)
+{
+ enum uri_normalizer_err err;
+
+ const size_t size = istclear(dst);
+ struct ist output = *dst;
+
+ struct ist scanner = input;
+
+ while (istlen(scanner)) {
+ const struct ist before_hash = istsplit(&scanner, '#');
+
+ if (istcat(&output, before_hash, size) < 0) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ if (istend(before_hash) != istend(scanner)) {
+ if (istcat(&output, ist("%23"), size) < 0) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+ }
+ }
+
+ *dst = output;
+
+ return URI_NORMALIZER_ERR_NONE;
+
+ fail:
+
+ return err;
+}
+
+/* Returns 1 if the given character is part of the 'unreserved' set in the
+ * RFC 3986 ABNF.
+ * Returns 0 if not.
+ */
+static int is_unreserved_character(unsigned char c)
+{
+ switch (c) {
+ case 'A'...'Z': /* ALPHA */
+ case 'a'...'z': /* ALPHA */
+ case '0'...'9': /* DIGIT */
+ case '-':
+ case '.':
+ case '_':
+ case '~':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* Decodes percent encoded characters that are part of the 'unreserved' set.
+ *
+ * RFC 3986, section 2.3:
+ * > URIs that differ in the replacement of an unreserved character with
+ * > its corresponding percent-encoded US-ASCII octet are equivalent [...]
+ * > when found in a URI, should be decoded to their corresponding unreserved
+ * > characters by URI normalizers.
+ *
+ * If `strict` is set to 0 then percent characters that are not followed by a
+ * hexadecimal digit are returned as-is without performing any decoding.
+ * If `strict` is set to 1 then `URI_NORMALIZER_ERR_INVALID_INPUT` is returned
+ * for invalid sequences.
+ */
+enum uri_normalizer_err uri_normalizer_percent_decode_unreserved(const struct ist input, int strict, struct ist *dst)
+{
+ enum uri_normalizer_err err;
+
+ const size_t size = istclear(dst);
+ struct ist output = *dst;
+
+ struct ist scanner = input;
+
+ /* The output will either be shortened or have the same length. */
+ if (size < istlen(input)) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ while (istlen(scanner)) {
+ const char current = istshift(&scanner);
+
+ if (current == '%') {
+ if (istlen(scanner) >= 2) {
+ if (ishex(istptr(scanner)[0]) && ishex(istptr(scanner)[1])) {
+ char hex1, hex2, c;
+
+ hex1 = istshift(&scanner);
+ hex2 = istshift(&scanner);
+ c = (hex2i(hex1) << 4) + hex2i(hex2);
+
+ if (is_unreserved_character(c)) {
+ output = __istappend(output, c);
+ }
+ else {
+ output = __istappend(output, current);
+ output = __istappend(output, hex1);
+ output = __istappend(output, hex2);
+ }
+
+ continue;
+ }
+ }
+
+ if (strict) {
+ err = URI_NORMALIZER_ERR_INVALID_INPUT;
+ goto fail;
+ }
+ else {
+ output = __istappend(output, current);
+ }
+ }
+ else {
+ output = __istappend(output, current);
+ }
+ }
+
+ *dst = output;
+
+ return URI_NORMALIZER_ERR_NONE;
+
+ fail:
+
+ return err;
+}
+
+/* Uppercases letters used in percent encoding.
+ *
+ * If `strict` is set to 0 then percent characters that are not followed by a
+ * hexadecimal digit are returned as-is without modifying the following letters.
+ * If `strict` is set to 1 then `URI_NORMALIZER_ERR_INVALID_INPUT` is returned
+ * for invalid sequences.
+ */
+enum uri_normalizer_err uri_normalizer_percent_upper(const struct ist input, int strict, struct ist *dst)
+{
+ enum uri_normalizer_err err;
+
+ const size_t size = istclear(dst);
+ struct ist output = *dst;
+
+ struct ist scanner = input;
+
+ /* The output will have the same length. */
+ if (size < istlen(input)) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ while (istlen(scanner)) {
+ const char current = istshift(&scanner);
+
+ if (current == '%') {
+ if (istlen(scanner) >= 2) {
+ if (ishex(istptr(scanner)[0]) && ishex(istptr(scanner)[1])) {
+ output = __istappend(output, current);
+ output = __istappend(output, toupper(istshift(&scanner)));
+ output = __istappend(output, toupper(istshift(&scanner)));
+ continue;
+ }
+ }
+
+ if (strict) {
+ err = URI_NORMALIZER_ERR_INVALID_INPUT;
+ goto fail;
+ }
+ else {
+ output = __istappend(output, current);
+ }
+ }
+ else {
+ output = __istappend(output, current);
+ }
+ }
+
+ *dst = output;
+
+ return URI_NORMALIZER_ERR_NONE;
+
+ fail:
+
+ return err;
+}
+
+/* Removes `/./` from the given path. */
+enum uri_normalizer_err uri_normalizer_path_dot(const struct ist path, struct ist *dst)
+{
+ enum uri_normalizer_err err;
+
+ const size_t size = istclear(dst);
+ struct ist newpath = *dst;
+
+ struct ist scanner = path;
+
+ /* The path will either be shortened or have the same length. */
+ if (size < istlen(path)) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ while (istlen(scanner) > 0) {
+ const struct ist segment = istsplit(&scanner, '/');
+
+ if (!isteq(segment, ist("."))) {
+ if (istcat(&newpath, segment, size) < 0) {
+ /* This is impossible, because we checked the size of the destination buffer. */
+ my_unreachable();
+ err = URI_NORMALIZER_ERR_INTERNAL_ERROR;
+ goto fail;
+ }
+
+ if (istend(segment) != istend(scanner))
+ newpath = __istappend(newpath, '/');
+ }
+ }
+
+ *dst = newpath;
+
+ return URI_NORMALIZER_ERR_NONE;
+
+ fail:
+
+ return err;
+}
+
+/* Merges `/../` with preceding path segments.
+ *
+ * If `full` is set to `0` then `/../` will be printed at the start of the resulting
+ * path if the number of `/../` exceeds the number of other segments. If `full` is
+ * set to `1` these will not be printed.
+ */
+enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, int full, struct ist *dst)
+{
+ enum uri_normalizer_err err;
+
+ const size_t size = istclear(dst);
+ char * const tail = istptr(*dst) + size;
+ char *head = tail;
+
+ ssize_t offset = istlen(path) - 1;
+
+ int up = 0;
+
+ /* The path will either be shortened or have the same length. */
+ if (size < istlen(path)) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ /* Handle `/..` at the end of the path without a trailing slash. */
+ if (offset >= 2 && istmatch(istadv(path, offset - 2), ist("/.."))) {
+ up++;
+ offset -= 2;
+ }
+
+ while (offset >= 0) {
+ if (offset >= 3 && istmatch(istadv(path, offset - 3), ist("/../"))) {
+ up++;
+ offset -= 3;
+ continue;
+ }
+
+ if (up > 0) {
+ /* Skip the slash. */
+ offset--;
+
+ /* First check whether we already reached the start of the path,
+ * before popping the current `/../`.
+ */
+ if (offset >= 0) {
+ up--;
+
+ /* Skip the current path segment. */
+ while (offset >= 0 && istptr(path)[offset] != '/')
+ offset--;
+ }
+ }
+ else {
+ /* Prepend the slash. */
+ *(--head) = istptr(path)[offset];
+ offset--;
+
+ /* Prepend the current path segment. */
+ while (offset >= 0 && istptr(path)[offset] != '/') {
+ *(--head) = istptr(path)[offset];
+ offset--;
+ }
+ }
+ }
+
+ if (up > 0) {
+ /* Prepend a trailing slash. */
+ *(--head) = '/';
+
+ if (!full) {
+ /* Prepend unconsumed `/..`. */
+ do {
+ *(--head) = '.';
+ *(--head) = '.';
+ *(--head) = '/';
+ up--;
+ } while (up > 0);
+ }
+ }
+
+ *dst = ist2(head, tail - head);
+
+ return URI_NORMALIZER_ERR_NONE;
+
+ fail:
+
+ return err;
+}
+
+/* Merges adjacent slashes in the given path. */
+enum uri_normalizer_err uri_normalizer_path_merge_slashes(const struct ist path, struct ist *dst)
+{
+ enum uri_normalizer_err err;
+
+ const size_t size = istclear(dst);
+ struct ist newpath = *dst;
+
+ struct ist scanner = path;
+
+ /* The path will either be shortened or have the same length. */
+ if (size < istlen(path)) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ while (istlen(scanner) > 0) {
+ const char current = istshift(&scanner);
+
+ if (current == '/') {
+ while (istlen(scanner) > 0 && *istptr(scanner) == '/')
+ scanner = istnext(scanner);
+ }
+
+ newpath = __istappend(newpath, current);
+ }
+
+ *dst = newpath;
+
+ return URI_NORMALIZER_ERR_NONE;
+
+ fail:
+
+ return err;
+}
+
+/* Compares two query parameters by name. Query parameters are ordered
+ * as with memcmp. Shorter parameter names are ordered lower. Identical
+ * parameter names are compared by their pointer to maintain a stable
+ * sort.
+ */
+static int query_param_cmp(const void *a, const void *b)
+{
+ const struct ist param_a = *(struct ist*)a;
+ const struct ist param_b = *(struct ist*)b;
+ const struct ist param_a_name = iststop(param_a, '=');
+ const struct ist param_b_name = iststop(param_b, '=');
+
+ int cmp = istdiff(param_a_name, param_b_name);
+
+ if (cmp != 0)
+ return cmp;
+
+ /* The contents are identical: Compare the pointer. */
+ if (istptr(param_a) < istptr(param_b))
+ return -1;
+
+ if (istptr(param_a) > istptr(param_b))
+ return 1;
+
+ return 0;
+}
+
+/* Sorts the parameters within the given query string. */
+enum uri_normalizer_err uri_normalizer_query_sort(const struct ist query, const char delim, struct ist *dst)
+{
+ enum uri_normalizer_err err;
+
+ const size_t size = istclear(dst);
+ struct ist newquery = *dst;
+
+ struct ist scanner = query;
+
+ const struct buffer *trash = get_trash_chunk();
+ struct ist *params = (struct ist *)b_orig(trash);
+ const size_t max_param = b_size(trash) / sizeof(*params);
+ size_t param_count = 0;
+
+ size_t i;
+
+ /* The query will have the same length. */
+ if (size < istlen(query)) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ /* Handle the leading '?'. */
+ newquery = __istappend(newquery, istshift(&scanner));
+
+ while (istlen(scanner) > 0) {
+ const struct ist param = istsplit(&scanner, delim);
+
+ if (param_count + 1 > max_param) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ params[param_count] = param;
+ param_count++;
+ }
+
+ qsort(params, param_count, sizeof(*params), query_param_cmp);
+
+ for (i = 0; i < param_count; i++) {
+ if (i > 0)
+ newquery = __istappend(newquery, delim);
+
+ if (istcat(&newquery, params[i], size) < 0) {
+ /* This is impossible, because we checked the size of the destination buffer. */
+ my_unreachable();
+ err = URI_NORMALIZER_ERR_INTERNAL_ERROR;
+ goto fail;
+ }
+ }
+
+ *dst = newquery;
+
+ return URI_NORMALIZER_ERR_NONE;
+
+ fail:
+
+ return err;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/vars.c b/src/vars.c
new file mode 100644
index 0000000..e620acf
--- /dev/null
+++ b/src/vars.c
@@ -0,0 +1,1453 @@
+#include <ctype.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/buf.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/cli.h>
+#include <haproxy/global.h>
+#include <haproxy/http.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/sample.h>
+#include <haproxy/session.h>
+#include <haproxy/stream-t.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/tools.h>
+#include <haproxy/vars.h>
+#include <haproxy/xxhash.h>
+
+
+/* This contains a pool of struct vars */
+DECLARE_STATIC_POOL(var_pool, "vars", sizeof(struct var));
+
+/* list of variables for the process scope. */
+struct vars proc_vars THREAD_ALIGNED(64);
+
+/* This array of int contains the system limits per context. */
+static unsigned int var_global_limit = 0;
+static unsigned int var_proc_limit = 0;
+static unsigned int var_sess_limit = 0;
+static unsigned int var_txn_limit = 0;
+static unsigned int var_reqres_limit = 0;
+static unsigned int var_check_limit = 0;
+static uint64_t var_name_hash_seed = 0;
+
+/* Structure and array matching set-var conditions to their respective flag
+ * value.
+ */
+struct var_set_condition {
+ const char *cond_str;
+ uint flag;
+};
+
+static struct var_set_condition conditions_array[] = {
+ { "ifexists", VF_COND_IFEXISTS },
+ { "ifnotexists", VF_COND_IFNOTEXISTS },
+ { "ifempty", VF_COND_IFEMPTY },
+ { "ifnotempty", VF_COND_IFNOTEMPTY },
+ { "ifset", VF_COND_IFSET },
+ { "ifnotset", VF_COND_IFNOTSET },
+ { "ifgt", VF_COND_IFGT },
+ { "iflt", VF_COND_IFLT },
+ { NULL, 0 }
+};
+
+/* returns the struct vars pointer for a session, stream and scope, or NULL if
+ * it does not exist.
+ */
+static inline struct vars *get_vars(struct session *sess, struct stream *strm, enum vars_scope scope)
+{
+ switch (scope) {
+ case SCOPE_PROC:
+ return &proc_vars;
+ case SCOPE_SESS:
+ return sess ? &sess->vars : NULL;
+ case SCOPE_CHECK: {
+ struct check *check = sess ? objt_check(sess->origin) : NULL;
+
+ return check ? &check->vars : NULL;
+ }
+ case SCOPE_TXN:
+ return strm ? &strm->vars_txn : NULL;
+ case SCOPE_REQ:
+ case SCOPE_RES:
+ default:
+ return strm ? &strm->vars_reqres : NULL;
+ }
+}
+
+/* This function adds or remove memory size from the accounting. The inner
+ * pointers may be null when setting the outer ones only.
+ */
+void var_accounting_diff(struct vars *vars, struct session *sess, struct stream *strm, int size)
+{
+ switch (vars->scope) {
+ case SCOPE_REQ:
+ case SCOPE_RES:
+ if (var_reqres_limit && strm)
+ _HA_ATOMIC_ADD(&strm->vars_reqres.size, size);
+ /* fall through */
+ case SCOPE_TXN:
+ if (var_txn_limit && strm)
+ _HA_ATOMIC_ADD(&strm->vars_txn.size, size);
+ goto scope_sess;
+ case SCOPE_CHECK:
+ if (var_check_limit) {
+ struct check *check = objt_check(sess->origin);
+
+ if (check)
+ _HA_ATOMIC_ADD(&check->vars.size, size);
+ }
+ /* fall through */
+scope_sess:
+ case SCOPE_SESS:
+ if (var_sess_limit)
+ _HA_ATOMIC_ADD(&sess->vars.size, size);
+ /* fall through */
+ case SCOPE_PROC:
+ if (var_proc_limit || var_global_limit)
+ _HA_ATOMIC_ADD(&proc_vars.size, size);
+ }
+}
+
+/* This function returns 1 if the <size> is available in the var
+ * pool <vars>, otherwise returns 0. If the space is available,
+ * the size is reserved. The inner pointers may be null when setting
+ * the outer ones only. The accounting uses either <sess> or <strm>
+ * depending on the scope. <strm> may be NULL when no stream is known
+ * and only the session exists (eg: tcp-request connection).
+ */
+static int var_accounting_add(struct vars *vars, struct session *sess, struct stream *strm, int size)
+{
+ switch (vars->scope) {
+ case SCOPE_REQ:
+ case SCOPE_RES:
+ if (var_reqres_limit && strm && strm->vars_reqres.size + size > var_reqres_limit)
+ return 0;
+ /* fall through */
+ case SCOPE_TXN:
+ if (var_txn_limit && strm && strm->vars_txn.size + size > var_txn_limit)
+ return 0;
+ goto scope_sess;
+ case SCOPE_CHECK: {
+ struct check *check = objt_check(sess->origin);
+
+ if (var_check_limit && check && check->vars.size + size > var_check_limit)
+ return 0;
+ }
+ /* fall through */
+scope_sess:
+ case SCOPE_SESS:
+ if (var_sess_limit && sess->vars.size + size > var_sess_limit)
+ return 0;
+ /* fall through */
+ case SCOPE_PROC:
+ /* note: scope proc collects all others and is currently identical to the
+ * global limit.
+ */
+ if (var_proc_limit && proc_vars.size + size > var_proc_limit)
+ return 0;
+ if (var_global_limit && proc_vars.size + size > var_global_limit)
+ return 0;
+ }
+ var_accounting_diff(vars, sess, strm, size);
+ return 1;
+}
+
+/* This function removes a variable from the list and frees the memory it was
+ * using. If the variable is marked "VF_PERMANENT", the sample_data is only
+ * reset to SMP_T_ANY unless <force> is non nul. Returns the freed size.
+ */
+unsigned int var_clear(struct var *var, int force)
+{
+ unsigned int size = 0;
+
+ if (var->data.type == SMP_T_STR || var->data.type == SMP_T_BIN) {
+ ha_free(&var->data.u.str.area);
+ size += var->data.u.str.data;
+ }
+ else if (var->data.type == SMP_T_METH && var->data.u.meth.meth == HTTP_METH_OTHER) {
+ ha_free(&var->data.u.meth.str.area);
+ size += var->data.u.meth.str.data;
+ }
+ /* wipe the sample */
+ var->data.type = SMP_T_ANY;
+
+ if (!(var->flags & VF_PERMANENT) || force) {
+ LIST_DELETE(&var->l);
+ pool_free(var_pool, var);
+ size += sizeof(struct var);
+ }
+ return size;
+}
+
+/* This function free all the memory used by all the variables
+ * in the list.
+ */
+void vars_prune(struct vars *vars, struct session *sess, struct stream *strm)
+{
+ struct var *var, *tmp;
+ unsigned int size = 0;
+
+ vars_wrlock(vars);
+ list_for_each_entry_safe(var, tmp, &vars->head, l) {
+ size += var_clear(var, 1);
+ }
+ vars_wrunlock(vars);
+ var_accounting_diff(vars, sess, strm, -size);
+}
+
+/* This function frees all the memory used by all the session variables in the
+ * list starting at <vars>.
+ */
+void vars_prune_per_sess(struct vars *vars)
+{
+ struct var *var, *tmp;
+ unsigned int size = 0;
+
+ vars_wrlock(vars);
+ list_for_each_entry_safe(var, tmp, &vars->head, l) {
+ size += var_clear(var, 1);
+ }
+ vars_wrunlock(vars);
+
+ if (var_sess_limit)
+ _HA_ATOMIC_SUB(&vars->size, size);
+ if (var_proc_limit || var_global_limit)
+ _HA_ATOMIC_SUB(&proc_vars.size, size);
+}
+
+/* This function initializes a variables list head */
+void vars_init_head(struct vars *vars, enum vars_scope scope)
+{
+ LIST_INIT(&vars->head);
+ vars->scope = scope;
+ vars->size = 0;
+ HA_RWLOCK_INIT(&vars->rwlock);
+}
+
+/* This function returns a hash value and a scope for a variable name of a
+ * specified length. It makes sure that the scope is valid. It returns non-zero
+ * on success, 0 on failure. Neither hash nor scope may be NULL.
+ */
+static int vars_hash_name(const char *name, int len, enum vars_scope *scope,
+ uint64_t *hash, char **err)
+{
+ const char *tmp;
+
+ /* Check length. */
+ if (len == 0) {
+ memprintf(err, "Empty variable name cannot be accepted");
+ return 0;
+ }
+
+ /* Check scope. */
+ if (len > 5 && strncmp(name, "proc.", 5) == 0) {
+ name += 5;
+ len -= 5;
+ *scope = SCOPE_PROC;
+ }
+ else if (len > 5 && strncmp(name, "sess.", 5) == 0) {
+ name += 5;
+ len -= 5;
+ *scope = SCOPE_SESS;
+ }
+ else if (len > 4 && strncmp(name, "txn.", 4) == 0) {
+ name += 4;
+ len -= 4;
+ *scope = SCOPE_TXN;
+ }
+ else if (len > 4 && strncmp(name, "req.", 4) == 0) {
+ name += 4;
+ len -= 4;
+ *scope = SCOPE_REQ;
+ }
+ else if (len > 4 && strncmp(name, "res.", 4) == 0) {
+ name += 4;
+ len -= 4;
+ *scope = SCOPE_RES;
+ }
+ else if (len > 6 && strncmp(name, "check.", 6) == 0) {
+ name += 6;
+ len -= 6;
+ *scope = SCOPE_CHECK;
+ }
+ else {
+ memprintf(err, "invalid variable name '%.*s'. A variable name must be start by its scope. "
+ "The scope can be 'proc', 'sess', 'txn', 'req', 'res' or 'check'", len, name);
+ return 0;
+ }
+
+ /* Check variable name syntax. */
+ for (tmp = name; tmp < name + len; tmp++) {
+ if (!isalnum((unsigned char)*tmp) && *tmp != '_' && *tmp != '.') {
+ memprintf(err, "invalid syntax at char '%s'", tmp);
+ return 0;
+ }
+ }
+
+ *hash = XXH3(name, len, var_name_hash_seed);
+ return 1;
+}
+
+/* This function returns the variable from the given list that matches
+ * <name_hash> or returns NULL if not found. It's only a linked list since it
+ * is not expected to have many variables per scope (a few tens at best).
+ * The caller is responsible for ensuring that <vars> is properly locked.
+ */
+static struct var *var_get(struct vars *vars, uint64_t name_hash)
+{
+ struct var *var;
+
+ list_for_each_entry(var, &vars->head, l)
+ if (var->name_hash == name_hash)
+ return var;
+ return NULL;
+}
+
+/* Returns 0 if fails, else returns 1. */
+static int smp_fetch_var(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ const struct var_desc *var_desc = &args[0].data.var;
+ const struct buffer *def = NULL;
+
+ if (args[1].type == ARGT_STR)
+ def = &args[1].data.str;
+
+ return vars_get_by_desc(var_desc, smp, def);
+}
+
+/*
+ * Clear the contents of a variable so that it can be reset directly.
+ * This function is used just before a variable is filled out of a sample's
+ * content.
+ */
+static inline void var_clear_buffer(struct sample *smp, struct vars *vars, struct var *var, int var_type)
+{
+ if (var_type == SMP_T_STR || var_type == SMP_T_BIN) {
+ ha_free(&var->data.u.str.area);
+ var_accounting_diff(vars, smp->sess, smp->strm,
+ -var->data.u.str.data);
+ }
+ else if (var_type == SMP_T_METH && var->data.u.meth.meth == HTTP_METH_OTHER) {
+ ha_free(&var->data.u.meth.str.area);
+ var_accounting_diff(vars, smp->sess, smp->strm,
+ -var->data.u.meth.str.data);
+ }
+}
+
+/* This function tries to create a variable whose name hash is <name_hash> in
+ * scope <scope> and store sample <smp> as its value.
+ *
+ * The stream and session are extracted from <smp>, whose stream may be NULL
+ * when scope is SCOPE_SESS. In case there wouldn't be enough memory to store
+ * the sample while the variable was already created, it would be changed to
+ * a bool (which is memory-less).
+ *
+ * Flags is a bitfield that may contain one of the following flags:
+ * - VF_CREATEONLY: do nothing if the variable already exists (success).
+ * - VF_PERMANENT: this flag will be passed to the variable upon creation
+ *
+ * - VF_COND_IFEXISTS: only set variable if it already exists
+ * - VF_COND_IFNOTEXISTS: only set variable if it did not exist yet
+ * - VF_COND_IFEMPTY: only set variable if sample is empty
+ * - VF_COND_IFNOTEMPTY: only set variable if sample is not empty
+ * - VF_COND_IFSET: only set variable if its type is not SMP_TYPE_ANY
+ * - VF_COND_IFNOTSET: only set variable if its type is ANY
+ * - VF_COND_IFGT: only set variable if its value is greater than the sample's
+ * - VF_COND_IFLT: only set variable if its value is less than the sample's
+ *
+ * It returns 0 on failure, non-zero on success.
+ */
+static int var_set(uint64_t name_hash, enum vars_scope scope, struct sample *smp, uint flags)
+{
+ struct vars *vars;
+ struct var *var;
+ int ret = 0;
+ int previous_type = SMP_T_ANY;
+
+ vars = get_vars(smp->sess, smp->strm, scope);
+ if (!vars || vars->scope != scope)
+ return 0;
+
+ vars_wrlock(vars);
+
+ /* Look for existing variable name. */
+ var = var_get(vars, name_hash);
+
+ if (var) {
+ if (flags & VF_CREATEONLY) {
+ ret = 1;
+ goto unlock;
+ }
+
+ if (flags & VF_COND_IFNOTEXISTS)
+ goto unlock;
+ } else {
+ if (flags & VF_COND_IFEXISTS)
+ goto unlock;
+
+ /* Check memory available. */
+ if (!var_accounting_add(vars, smp->sess, smp->strm, sizeof(struct var)))
+ goto unlock;
+
+ /* Create new entry. */
+ var = pool_alloc(var_pool);
+ if (!var)
+ goto unlock;
+ LIST_APPEND(&vars->head, &var->l);
+ var->name_hash = name_hash;
+ var->flags = flags & VF_PERMANENT;
+ var->data.type = SMP_T_ANY;
+ }
+
+ /* A variable of type SMP_T_ANY is considered as unset (either created
+ * and never set or unset-var was called on it).
+ */
+ if ((flags & VF_COND_IFSET && var->data.type == SMP_T_ANY) ||
+ (flags & VF_COND_IFNOTSET && var->data.type != SMP_T_ANY))
+ goto unlock;
+
+ /* Set type. */
+ previous_type = var->data.type;
+ var->data.type = smp->data.type;
+
+ if (flags & VF_COND_IFEMPTY) {
+ switch(smp->data.type) {
+ case SMP_T_ANY:
+ case SMP_T_STR:
+ case SMP_T_BIN:
+ /* The actual test on the contents of the sample will be
+ * performed later.
+ */
+ break;
+ default:
+ /* The sample cannot be empty since it has a scalar type. */
+ var->data.type = previous_type;
+ goto unlock;
+ }
+ }
+
+ /* Copy data. If the data needs memory, the function can fail. */
+ switch (var->data.type) {
+ case SMP_T_BOOL:
+ var_clear_buffer(smp, vars, var, previous_type);
+ var->data.u.sint = smp->data.u.sint;
+ break;
+ case SMP_T_SINT:
+ if (previous_type == var->data.type) {
+ if (((flags & VF_COND_IFGT) && !(var->data.u.sint > smp->data.u.sint)) ||
+ ((flags & VF_COND_IFLT) && !(var->data.u.sint < smp->data.u.sint)))
+ goto unlock;
+ }
+ var_clear_buffer(smp, vars, var, previous_type);
+ var->data.u.sint = smp->data.u.sint;
+ break;
+ case SMP_T_IPV4:
+ var_clear_buffer(smp, vars, var, previous_type);
+ var->data.u.ipv4 = smp->data.u.ipv4;
+ break;
+ case SMP_T_IPV6:
+ var_clear_buffer(smp, vars, var, previous_type);
+ var->data.u.ipv6 = smp->data.u.ipv6;
+ break;
+ case SMP_T_STR:
+ case SMP_T_BIN:
+ if ((flags & VF_COND_IFNOTEMPTY && !smp->data.u.str.data) ||
+ (flags & VF_COND_IFEMPTY && smp->data.u.str.data)) {
+ var->data.type = previous_type;
+ goto unlock;
+ }
+ var_clear_buffer(smp, vars, var, previous_type);
+ if (!var_accounting_add(vars, smp->sess, smp->strm, smp->data.u.str.data)) {
+ var->data.type = SMP_T_BOOL; /* This type doesn't use additional memory. */
+ goto unlock;
+ }
+
+ var->data.u.str.area = malloc(smp->data.u.str.data);
+ if (!var->data.u.str.area) {
+ var_accounting_diff(vars, smp->sess, smp->strm,
+ -smp->data.u.str.data);
+ var->data.type = SMP_T_BOOL; /* This type doesn't use additional memory. */
+ goto unlock;
+ }
+ var->data.u.str.data = smp->data.u.str.data;
+ memcpy(var->data.u.str.area, smp->data.u.str.area,
+ var->data.u.str.data);
+ break;
+ case SMP_T_METH:
+ var_clear_buffer(smp, vars, var, previous_type);
+ var->data.u.meth.meth = smp->data.u.meth.meth;
+ if (smp->data.u.meth.meth != HTTP_METH_OTHER)
+ break;
+
+ if (!var_accounting_add(vars, smp->sess, smp->strm, smp->data.u.meth.str.data)) {
+ var->data.type = SMP_T_BOOL; /* This type doesn't use additional memory. */
+ goto unlock;
+ }
+
+ var->data.u.meth.str.area = malloc(smp->data.u.meth.str.data);
+ if (!var->data.u.meth.str.area) {
+ var_accounting_diff(vars, smp->sess, smp->strm,
+ -smp->data.u.meth.str.data);
+ var->data.type = SMP_T_BOOL; /* This type doesn't use additional memory. */
+ goto unlock;
+ }
+ var->data.u.meth.str.data = smp->data.u.meth.str.data;
+ var->data.u.meth.str.size = smp->data.u.meth.str.data;
+ memcpy(var->data.u.meth.str.area, smp->data.u.meth.str.area,
+ var->data.u.meth.str.data);
+ break;
+ }
+
+ /* OK, now done */
+ ret = 1;
+ unlock:
+ vars_wrunlock(vars);
+ return ret;
+}
+
+/* Deletes a variable matching name hash <name_hash> and scope <scope> for the
+ * session and stream found in <smp>. Note that stream may be null for
+ * SCOPE_SESS. Returns 0 if the scope was not found otherwise 1.
+ */
+static int var_unset(uint64_t name_hash, enum vars_scope scope, struct sample *smp)
+{
+ struct vars *vars;
+ struct var *var;
+ unsigned int size = 0;
+
+ vars = get_vars(smp->sess, smp->strm, scope);
+ if (!vars || vars->scope != scope)
+ return 0;
+
+ /* Look for existing variable name. */
+ vars_wrlock(vars);
+ var = var_get(vars, name_hash);
+ if (var) {
+ size = var_clear(var, 0);
+ var_accounting_diff(vars, smp->sess, smp->strm, -size);
+ }
+ vars_wrunlock(vars);
+ return 1;
+}
+
+
+/*
+ * Convert a string set-var condition into its numerical value.
+ * The corresponding bit is set in the <cond_bitmap> parameter if the
+ * <cond> is known.
+ * Returns 1 in case of success.
+ */
+static int vars_parse_cond_param(const struct buffer *cond, uint *cond_bitmap, char **err)
+{
+ struct var_set_condition *cond_elt = &conditions_array[0];
+
+ /* The conditions array is NULL terminated. */
+ while (cond_elt->cond_str) {
+ if (chunk_strcmp(cond, cond_elt->cond_str) == 0) {
+ *cond_bitmap |= cond_elt->flag;
+ break;
+ }
+ ++cond_elt;
+ }
+
+ if (cond_elt->cond_str == NULL && err)
+ memprintf(err, "unknown condition \"%.*s\"", (int)cond->data, cond->area);
+
+ return cond_elt->cond_str != NULL;
+}
+
+/* Returns 0 if fails, else returns 1. */
+static int smp_conv_store(const struct arg *args, struct sample *smp, void *private)
+{
+ uint conditions = 0;
+ int cond_idx = 1;
+
+ while (args[cond_idx].type == ARGT_STR) {
+ if (vars_parse_cond_param(&args[cond_idx++].data.str, &conditions, NULL) == 0)
+ break;
+ }
+
+ return var_set(args[0].data.var.name_hash, args[0].data.var.scope, smp, conditions);
+}
+
+/* Returns 0 if fails, else returns 1. */
+static int smp_conv_clear(const struct arg *args, struct sample *smp, void *private)
+{
+ return var_unset(args[0].data.var.name_hash, args[0].data.var.scope, smp);
+}
+
+/* This functions check an argument entry and fill it with a variable
+ * type. The argumen must be a string. If the variable lookup fails,
+ * the function returns 0 and fill <err>, otherwise it returns 1.
+ */
+int vars_check_arg(struct arg *arg, char **err)
+{
+ enum vars_scope scope;
+ struct sample empty_smp = { };
+ uint64_t hash;
+
+ /* Check arg type. */
+ if (arg->type != ARGT_STR) {
+ memprintf(err, "unexpected argument type");
+ return 0;
+ }
+
+ /* Register new variable name. */
+ if (!vars_hash_name(arg->data.str.area, arg->data.str.data, &scope, &hash, err))
+ return 0;
+
+ if (scope == SCOPE_PROC && !var_set(hash, scope, &empty_smp, VF_CREATEONLY|VF_PERMANENT))
+ return 0;
+
+ /* properly destroy the chunk */
+ chunk_destroy(&arg->data.str);
+
+ /* Use the global variable name pointer. */
+ arg->type = ARGT_VAR;
+ arg->data.var.name_hash = hash;
+ arg->data.var.scope = scope;
+ return 1;
+}
+
+/* This function stores a sample in a variable unless it is of type "proc" and
+ * not defined yet.
+ * Returns zero on failure and non-zero otherwise. The variable not being
+ * defined is treated as a failure.
+ */
+int vars_set_by_name_ifexist(const char *name, size_t len, struct sample *smp)
+{
+ enum vars_scope scope;
+ uint64_t hash;
+
+ /* Resolve name and scope. */
+ if (!vars_hash_name(name, len, &scope, &hash, NULL))
+ return 0;
+
+ /* Variable creation is allowed for all scopes apart from the PROC one. */
+ return var_set(hash, scope, smp, (scope == SCOPE_PROC) ? VF_COND_IFEXISTS : 0);
+}
+
+
+/* This function stores a sample in a variable.
+ * Returns zero on failure and non-zero otherwise.
+ */
+int vars_set_by_name(const char *name, size_t len, struct sample *smp)
+{
+ enum vars_scope scope;
+ uint64_t hash;
+
+ /* Resolve name and scope. */
+ if (!vars_hash_name(name, len, &scope, &hash, NULL))
+ return 0;
+
+ return var_set(hash, scope, smp, 0);
+}
+
+/* This function unsets a variable if it was already defined.
+ * Returns zero on failure and non-zero otherwise.
+ */
+int vars_unset_by_name_ifexist(const char *name, size_t len, struct sample *smp)
+{
+ enum vars_scope scope;
+ uint64_t hash;
+
+ /* Resolve name and scope. */
+ if (!vars_hash_name(name, len, &scope, &hash, NULL))
+ return 0;
+
+ return var_unset(hash, scope, smp);
+}
+
+
+/* This retrieves variable whose hash matches <name_hash> from variables <vars>,
+ * and if found and not empty, duplicates the result into sample <smp>.
+ * smp_dup() is used in order to release the variables lock ASAP (so a pre-
+ * allocated chunk is obtained via get_trash_shunk()). The variables' lock is
+ * used for reads.
+ *
+ * The function returns 0 if the variable was not found and no default
+ * value was provided in <def>, otherwise 1 with the sample filled.
+ * Default values are always returned as strings.
+ */
+static int var_to_smp(struct vars *vars, uint64_t name_hash, struct sample *smp, const struct buffer *def)
+{
+ struct var *var;
+
+ /* Get the variable entry. */
+ vars_rdlock(vars);
+ var = var_get(vars, name_hash);
+ if (!var || !var->data.type) {
+ if (!def) {
+ vars_rdunlock(vars);
+ return 0;
+ }
+
+ /* not found but we have a default value */
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *def;
+ }
+ else
+ smp->data = var->data;
+
+ /* Copy sample. */
+ smp_dup(smp);
+
+ vars_rdunlock(vars);
+ return 1;
+}
+
+/* This function fills a sample with the variable content.
+ *
+ * Keep in mind that a sample content is duplicated by using smp_dup()
+ * and it therefore uses a pre-allocated trash chunk as returned by
+ * get_trash_chunk().
+ *
+ * If the variable is not valid in this scope, 0 is always returned.
+ * If the variable is valid but not found, either the default value
+ * <def> is returned if not NULL, or zero is returned.
+ *
+ * Returns 1 if the sample is filled, otherwise it returns 0.
+ */
+int vars_get_by_name(const char *name, size_t len, struct sample *smp, const struct buffer *def)
+{
+ struct vars *vars;
+ enum vars_scope scope;
+ uint64_t hash;
+
+ /* Resolve name and scope. */
+ if (!vars_hash_name(name, len, &scope, &hash, NULL))
+ return 0;
+
+ /* Select "vars" pool according with the scope. */
+ vars = get_vars(smp->sess, smp->strm, scope);
+ if (!vars || vars->scope != scope)
+ return 0;
+
+ return var_to_smp(vars, hash, smp, def);
+}
+
+/* This function fills a sample with the content of the variable described
+ * by <var_desc>.
+ *
+ * Keep in mind that a sample content is duplicated by using smp_dup()
+ * and it therefore uses a pre-allocated trash chunk as returned by
+ * get_trash_chunk().
+ *
+ * If the variable is not valid in this scope, 0 is always returned.
+ * If the variable is valid but not found, either the default value
+ * <def> is returned if not NULL, or zero is returned.
+ *
+ * Returns 1 if the sample is filled, otherwise it returns 0.
+ */
+int vars_get_by_desc(const struct var_desc *var_desc, struct sample *smp, const struct buffer *def)
+{
+ struct vars *vars;
+
+ /* Select "vars" pool according with the scope. */
+ vars = get_vars(smp->sess, smp->strm, var_desc->scope);
+
+ /* Check if the scope is available a this point of processing. */
+ if (!vars || vars->scope != var_desc->scope)
+ return 0;
+
+ return var_to_smp(vars, var_desc->name_hash, smp, def);
+}
+
+/* Always returns ACT_RET_CONT even if an error occurs. */
+static enum act_return action_store(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct buffer *fmtstr = NULL;
+ struct sample smp;
+ int dir;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_REQ_SES: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_REQ_CNT: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_RES_CNT: dir = SMP_OPT_DIR_RES; break;
+ case ACT_F_HTTP_REQ: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_HTTP_RES: dir = SMP_OPT_DIR_RES; break;
+ case ACT_F_TCP_CHK: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_CFG_PARSER: dir = SMP_OPT_DIR_REQ; break; /* not used anyway */
+ case ACT_F_CLI_PARSER: dir = SMP_OPT_DIR_REQ; break; /* not used anyway */
+ default:
+ send_log(px, LOG_ERR, "Vars: internal error while execute action store.");
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))
+ ha_alert("Vars: internal error while execute action store.\n");
+ return ACT_RET_CONT;
+ }
+
+ /* Process the expression. */
+ memset(&smp, 0, sizeof(smp));
+
+ if (!LIST_ISEMPTY(&rule->arg.vars.fmt)) {
+ /* a format-string is used */
+
+ fmtstr = alloc_trash_chunk();
+ if (!fmtstr) {
+ send_log(px, LOG_ERR, "Vars: memory allocation failure while processing store rule.");
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))
+ ha_alert("Vars: memory allocation failure while processing store rule.\n");
+ return ACT_RET_CONT;
+ }
+
+ /* execute the log-format expression */
+ fmtstr->data = sess_build_logline(sess, s, fmtstr->area, fmtstr->size, &rule->arg.vars.fmt);
+
+ /* convert it to a sample of type string as it's what the vars
+ * API consumes, and store it.
+ */
+ smp_set_owner(&smp, px, sess, s, 0);
+ smp.data.type = SMP_T_STR;
+ smp.data.u.str = *fmtstr;
+ var_set(rule->arg.vars.name_hash, rule->arg.vars.scope, &smp, rule->arg.vars.conditions);
+ }
+ else {
+ /* an expression is used */
+ if (!sample_process(px, sess, s, dir|SMP_OPT_FINAL,
+ rule->arg.vars.expr, &smp))
+ return ACT_RET_CONT;
+ }
+
+ /* Store the sample, and ignore errors. */
+ var_set(rule->arg.vars.name_hash, rule->arg.vars.scope, &smp, rule->arg.vars.conditions);
+ free_trash_chunk(fmtstr);
+ return ACT_RET_CONT;
+}
+
+/* Always returns ACT_RET_CONT even if an error occurs. */
+static enum act_return action_clear(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct sample smp;
+
+ memset(&smp, 0, sizeof(smp));
+ smp_set_owner(&smp, px, sess, s, SMP_OPT_FINAL);
+
+ /* Clear the variable using the sample context, and ignore errors. */
+ var_unset(rule->arg.vars.name_hash, rule->arg.vars.scope, &smp);
+ return ACT_RET_CONT;
+}
+
+static void release_store_rule(struct act_rule *rule)
+{
+ struct logformat_node *lf, *lfb;
+
+ list_for_each_entry_safe(lf, lfb, &rule->arg.vars.fmt, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+
+ release_sample_expr(rule->arg.vars.expr);
+}
+
+/* This two function checks the variable name and replace the
+ * configuration string name by the global string name. its
+ * the same string, but the global pointer can be easy to
+ * compare. They return non-zero on success, zero on failure.
+ *
+ * The first function checks a sample-fetch and the second
+ * checks a converter.
+ */
+static int smp_check_var(struct arg *args, char **err)
+{
+ return vars_check_arg(&args[0], err);
+}
+
+static int conv_check_var(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err_msg)
+{
+ int cond_idx = 1;
+ uint conditions = 0;
+ int retval = vars_check_arg(&args[0], err_msg);
+
+ while (retval && args[cond_idx].type == ARGT_STR)
+ retval = vars_parse_cond_param(&args[cond_idx++].data.str, &conditions, err_msg);
+
+ return retval;
+}
+
+/* This function is a common parser for using variables. It understands
+ * the format:
+ *
+ * set-var-fmt(<variable-name>[,<cond> ...]) <format-string>
+ * set-var(<variable-name>[,<cond> ...]) <expression>
+ * unset-var(<variable-name>)
+ *
+ * It returns ACT_RET_PRS_ERR if fails and <err> is filled with an error
+ * message. Otherwise, it returns ACT_RET_PRS_OK and the variable <expr>
+ * is filled with the pointer to the expression to execute. The proxy is
+ * only used to retrieve the ->conf entries.
+ */
+static enum act_parse_ret parse_store(const char **args, int *arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ const char *var_name = args[*arg-1];
+ int var_len;
+ const char *kw_name;
+ int flags = 0, set_var = 0; /* 0=unset-var, 1=set-var, 2=set-var-fmt */
+ struct sample empty_smp = { };
+ struct ist condition = IST_NULL;
+ struct ist var = IST_NULL;
+ struct ist varname_ist = IST_NULL;
+
+ if (strncmp(var_name, "set-var-fmt", 11) == 0) {
+ var_name += 11;
+ set_var = 2;
+ }
+ else if (strncmp(var_name, "set-var", 7) == 0) {
+ var_name += 7;
+ set_var = 1;
+ }
+ else if (strncmp(var_name, "unset-var", 9) == 0) {
+ var_name += 9;
+ set_var = 0;
+ }
+
+ if (*var_name != '(') {
+ memprintf(err, "invalid or incomplete action '%s'. Expects 'set-var(<var-name>)', 'set-var-fmt(<var-name>)' or 'unset-var(<var-name>)'",
+ args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+ var_name++; /* jump the '(' */
+ var_len = strlen(var_name);
+ var_len--; /* remove the ')' */
+ if (var_name[var_len] != ')') {
+ memprintf(err, "incomplete argument after action '%s'. Expects 'set-var(<var-name>)', 'set-var-fmt(<var-name>)' or 'unset-var(<var-name>)'",
+ args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* Parse the optional conditions. */
+ var = ist2(var_name, var_len);
+ varname_ist = istsplit(&var, ',');
+ var_len = istlen(varname_ist);
+
+ condition = istsplit(&var, ',');
+
+ if (istlen(condition) && set_var == 0) {
+ memprintf(err, "unset-var does not expect parameters after the variable name. Only \"set-var\" and \"set-var-fmt\" manage conditions");
+ return ACT_RET_PRS_ERR;
+ }
+
+ while (istlen(condition)) {
+ struct buffer cond = {};
+
+ chunk_initlen(&cond, istptr(condition), 0, istlen(condition));
+ if (vars_parse_cond_param(&cond, &rule->arg.vars.conditions, err) == 0)
+ return ACT_RET_PRS_ERR;
+
+ condition = istsplit(&var, ',');
+ }
+
+ LIST_INIT(&rule->arg.vars.fmt);
+ if (!vars_hash_name(var_name, var_len, &rule->arg.vars.scope, &rule->arg.vars.name_hash, err))
+ return ACT_RET_PRS_ERR;
+
+ if (rule->arg.vars.scope == SCOPE_PROC &&
+ !var_set(rule->arg.vars.name_hash, rule->arg.vars.scope, &empty_smp, VF_CREATEONLY|VF_PERMANENT))
+ return 0;
+
+ /* There is no fetch method when variable is unset. Just set the right
+ * action and return. */
+ if (!set_var) {
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = action_clear;
+ rule->release_ptr = release_store_rule;
+ return ACT_RET_PRS_OK;
+ }
+
+ kw_name = args[*arg-1];
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON:
+ flags = SMP_VAL_FE_CON_ACC;
+ px->conf.args.ctx = ARGC_TCO;
+ break;
+ case ACT_F_TCP_REQ_SES:
+ flags = SMP_VAL_FE_SES_ACC;
+ px->conf.args.ctx = ARGC_TSE;
+ break;
+ case ACT_F_TCP_REQ_CNT:
+ if (px->cap & PR_CAP_FE)
+ flags |= SMP_VAL_FE_REQ_CNT;
+ if (px->cap & PR_CAP_BE)
+ flags |= SMP_VAL_BE_REQ_CNT;
+ px->conf.args.ctx = ARGC_TRQ;
+ break;
+ case ACT_F_TCP_RES_CNT:
+ if (px->cap & PR_CAP_FE)
+ flags |= SMP_VAL_FE_RES_CNT;
+ if (px->cap & PR_CAP_BE)
+ flags |= SMP_VAL_BE_RES_CNT;
+ px->conf.args.ctx = ARGC_TRS;
+ break;
+ case ACT_F_HTTP_REQ:
+ if (px->cap & PR_CAP_FE)
+ flags |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ flags |= SMP_VAL_BE_HRQ_HDR;
+ px->conf.args.ctx = ARGC_HRQ;
+ break;
+ case ACT_F_HTTP_RES:
+ if (px->cap & PR_CAP_FE)
+ flags |= SMP_VAL_FE_HRS_HDR;
+ if (px->cap & PR_CAP_BE)
+ flags |= SMP_VAL_BE_HRS_HDR;
+ px->conf.args.ctx = ARGC_HRS;
+ break;
+ case ACT_F_TCP_CHK:
+ flags = SMP_VAL_BE_CHK_RUL;
+ px->conf.args.ctx = ARGC_TCK;
+ break;
+ case ACT_F_CFG_PARSER:
+ flags = SMP_VAL_CFG_PARSER;
+ px->conf.args.ctx = ARGC_CFG;
+ break;
+ case ACT_F_CLI_PARSER:
+ flags = SMP_VAL_CLI_PARSER;
+ px->conf.args.ctx = ARGC_CLI;
+ break;
+ default:
+ memprintf(err,
+ "internal error, unexpected rule->from=%d, please report this bug!",
+ rule->from);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (set_var == 2) { /* set-var-fmt */
+ if (!parse_logformat_string(args[*arg], px, &rule->arg.vars.fmt, 0, flags, err))
+ return ACT_RET_PRS_ERR;
+
+ (*arg)++;
+
+ /* for late error reporting */
+ free(px->conf.lfs_file);
+ px->conf.lfs_file = strdup(px->conf.args.file);
+ px->conf.lfs_line = px->conf.args.line;
+ } else {
+ /* set-var */
+ rule->arg.vars.expr = sample_parse_expr((char **)args, arg, px->conf.args.file,
+ px->conf.args.line, err, &px->conf.args, NULL);
+ if (!rule->arg.vars.expr)
+ return ACT_RET_PRS_ERR;
+
+ if (!(rule->arg.vars.expr->fetch->val & flags)) {
+ memprintf(err,
+ "fetch method '%s' extracts information from '%s', none of which is available here",
+ kw_name, sample_src_names(rule->arg.vars.expr->fetch->use));
+ free(rule->arg.vars.expr);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = action_store;
+ rule->release_ptr = release_store_rule;
+ return ACT_RET_PRS_OK;
+}
+
+
+/* parses a global "set-var" directive. It will create a temporary rule and
+ * expression that are parsed, processed, and released on the fly so that we
+ * respect the real set-var syntax. These directives take the following format:
+ * set-var <name> <expression>
+ * set-var-fmt <name> <fmt>
+ * Note that parse_store() expects "set-var(name) <expression>" so we have to
+ * temporarily replace the keyword here.
+ */
+static int vars_parse_global_set_var(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ struct proxy px = {
+ .id = "CFG",
+ .conf.args = { .file = file, .line = line, },
+ };
+ struct act_rule rule = {
+ .arg.vars.scope = SCOPE_PROC,
+ .from = ACT_F_CFG_PARSER,
+ .conf = { .file = (char *)file, .line = line, },
+ };
+ enum obj_type objt = OBJ_TYPE_NONE;
+ struct session *sess = NULL;
+ enum act_parse_ret p_ret;
+ char *old_arg1;
+ char *tmp_arg1;
+ int arg = 2; // variable name
+ int ret = -1;
+ int use_fmt = 0;
+
+ LIST_INIT(&px.conf.args.list);
+
+ use_fmt = strcmp(args[0], "set-var-fmt") == 0;
+
+ if (!*args[1] || !*args[2]) {
+ if (use_fmt)
+ memprintf(err, "'%s' requires a process-wide variable name ('proc.<name>') and a format string.", args[0]);
+ else
+ memprintf(err, "'%s' requires a process-wide variable name ('proc.<name>') and a sample expression.", args[0]);
+ goto end;
+ }
+
+ tmp_arg1 = NULL;
+ if (!memprintf(&tmp_arg1, "set-var%s(%s)", use_fmt ? "-fmt" : "", args[1]))
+ goto end;
+
+ /* parse_store() will always return a message in <err> on error */
+ old_arg1 = args[1]; args[1] = tmp_arg1;
+ p_ret = parse_store((const char **)args, &arg, &px, &rule, err);
+ free(args[1]); args[1] = old_arg1;
+
+ if (p_ret != ACT_RET_PRS_OK)
+ goto end;
+
+ if (rule.arg.vars.scope != SCOPE_PROC) {
+ memprintf(err, "'%s': cannot set variable '%s', only scope 'proc' is permitted in the global section.", args[0], args[1]);
+ goto end;
+ }
+
+ if (smp_resolve_args(&px, err) != 0) {
+ release_sample_expr(rule.arg.vars.expr);
+ indent_msg(err, 2);
+ goto end;
+ }
+
+ if (use_fmt && !(sess = session_new(&px, NULL, &objt))) {
+ release_sample_expr(rule.arg.vars.expr);
+ memprintf(err, "'%s': out of memory when trying to set variable '%s' in the global section.", args[0], args[1]);
+ goto end;
+ }
+
+ action_store(&rule, &px, sess, NULL, 0);
+ release_sample_expr(rule.arg.vars.expr);
+ if (sess)
+ session_free(sess);
+
+ ret = 0;
+ end:
+ return ret;
+}
+
+/* parse CLI's "get var <name>" */
+static int vars_parse_cli_get_var(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct vars *vars;
+ struct sample smp = { };
+ int i;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ if (!*args[2])
+ return cli_err(appctx, "Missing process-wide variable identifier.\n");
+
+ vars = get_vars(NULL, NULL, SCOPE_PROC);
+ if (!vars || vars->scope != SCOPE_PROC)
+ return 0;
+
+ if (!vars_get_by_name(args[2], strlen(args[2]), &smp, NULL))
+ return cli_err(appctx, "Variable not found.\n");
+
+ /* the sample returned by vars_get_by_name() is allocated into a trash
+ * chunk so we have no constraint to manipulate it.
+ */
+ chunk_printf(&trash, "%s: type=%s value=", args[2], smp_to_type[smp.data.type]);
+
+ if (!sample_casts[smp.data.type][SMP_T_STR] ||
+ !sample_casts[smp.data.type][SMP_T_STR](&smp)) {
+ chunk_appendf(&trash, "(undisplayable)");
+ } else {
+ /* Display the displayable chars*. */
+ b_putchr(&trash, '<');
+ for (i = 0; i < smp.data.u.str.data; i++) {
+ if (isprint((unsigned char)smp.data.u.str.area[i]))
+ b_putchr(&trash, smp.data.u.str.area[i]);
+ else
+ b_putchr(&trash, '.');
+ }
+ b_putchr(&trash, '>');
+ b_putchr(&trash, 0);
+ }
+ return cli_msg(appctx, LOG_INFO, trash.area);
+}
+
+/* parse CLI's "set var <name>". It accepts:
+ * - set var <name> <expression>
+ * - set var <name> expr <expression>
+ * - set var <name> fmt <format>
+ */
+static int vars_parse_cli_set_var(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy px = {
+ .id = "CLI",
+ .conf.args = { .file = "CLI", .line = 0, },
+ };
+ struct act_rule rule = {
+ .arg.vars.scope = SCOPE_PROC,
+ .from = ACT_F_CLI_PARSER,
+ .conf = { .file = "CLI", .line = 0, },
+ };
+ enum obj_type objt = OBJ_TYPE_NONE;
+ struct session *sess = NULL;
+ enum act_parse_ret p_ret;
+ const char *tmp_args[3];
+ int tmp_arg;
+ char *tmp_act;
+ char *err = NULL;
+ int nberr;
+ int use_fmt = 0;
+
+ LIST_INIT(&px.conf.args.list);
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ if (!*args[2])
+ return cli_err(appctx, "Missing process-wide variable identifier.\n");
+
+ if (!*args[3])
+ return cli_err(appctx, "Missing either 'expr', 'fmt' or expression.\n");
+
+ if (*args[4]) {
+ /* this is the long format */
+ if (strcmp(args[3], "fmt") == 0)
+ use_fmt = 1;
+ else if (strcmp(args[3], "expr") != 0) {
+ memprintf(&err, "'%s %s': arg type must be either 'expr' or 'fmt' but got '%s'.", args[0], args[1], args[3]);
+ goto fail;
+ }
+ }
+
+ tmp_act = NULL;
+ if (!memprintf(&tmp_act, "set-var%s(%s)", use_fmt ? "-fmt" : "", args[2])) {
+ memprintf(&err, "memory allocation error.");
+ goto fail;
+ }
+
+ /* parse_store() will always return a message in <err> on error */
+ tmp_args[0] = tmp_act;
+ tmp_args[1] = (*args[4]) ? args[4] : args[3];
+ tmp_args[2] = "";
+ tmp_arg = 1; // must point to the first arg after the action
+ p_ret = parse_store(tmp_args, &tmp_arg, &px, &rule, &err);
+ free(tmp_act);
+
+ if (p_ret != ACT_RET_PRS_OK)
+ goto fail;
+
+ if (rule.arg.vars.scope != SCOPE_PROC) {
+ memprintf(&err, "'%s %s': cannot set variable '%s', only scope 'proc' is permitted here.", args[0], args[1], args[2]);
+ goto fail;
+ }
+
+ err = NULL;
+ nberr = smp_resolve_args(&px, &err);
+ if (nberr) {
+ release_sample_expr(rule.arg.vars.expr);
+ indent_msg(&err, 2);
+ goto fail;
+ }
+
+ if (use_fmt && !(sess = session_new(&px, NULL, &objt))) {
+ release_sample_expr(rule.arg.vars.expr);
+ memprintf(&err, "memory allocation error.");
+ goto fail;
+ }
+
+ action_store(&rule, &px, sess, NULL, 0);
+ release_sample_expr(rule.arg.vars.expr);
+ if (sess)
+ session_free(sess);
+
+ appctx->st0 = CLI_ST_PROMPT;
+ return 0;
+ fail:
+ return cli_dynerr(appctx, err);
+}
+
+static int vars_max_size(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err, unsigned int *limit)
+{
+ char *error;
+
+ *limit = strtol(args[1], &error, 10);
+ if (*error != 0) {
+ memprintf(err, "%s: '%s' is an invalid size", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+static int vars_max_size_global(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return vars_max_size(args, section_type, curpx, defpx, file, line, err, &var_global_limit);
+}
+
+static int vars_max_size_proc(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return vars_max_size(args, section_type, curpx, defpx, file, line, err, &var_proc_limit);
+}
+
+static int vars_max_size_sess(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return vars_max_size(args, section_type, curpx, defpx, file, line, err, &var_sess_limit);
+}
+
+static int vars_max_size_txn(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return vars_max_size(args, section_type, curpx, defpx, file, line, err, &var_txn_limit);
+}
+
+static int vars_max_size_reqres(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return vars_max_size(args, section_type, curpx, defpx, file, line, err, &var_reqres_limit);
+}
+
+static int vars_max_size_check(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return vars_max_size(args, section_type, curpx, defpx, file, line, err, &var_check_limit);
+}
+
+/* early boot initialization */
+static void vars_init()
+{
+ var_name_hash_seed = ha_random64();
+ /* Initialize process vars */
+ vars_init_head(&proc_vars, SCOPE_PROC);
+}
+
+INITCALL0(STG_PREPARE, vars_init);
+
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+
+ { "var", smp_fetch_var, ARG2(1,STR,STR), smp_check_var, SMP_T_ANY, SMP_USE_CONST },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
+
+static struct sample_conv_kw_list sample_conv_kws = {ILH, {
+ { "set-var", smp_conv_store, ARG5(1,STR,STR,STR,STR,STR), conv_check_var, SMP_T_ANY, SMP_T_ANY },
+ { "unset-var", smp_conv_clear, ARG1(1,STR), conv_check_var, SMP_T_ANY, SMP_T_ANY },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
+
+static struct action_kw_list tcp_req_conn_kws = { { }, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_conn_keywords_register, &tcp_req_conn_kws);
+
+static struct action_kw_list tcp_req_sess_kws = { { }, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_sess_keywords_register, &tcp_req_sess_kws);
+
+static struct action_kw_list tcp_req_cont_kws = { { }, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_req_cont_kws);
+
+static struct action_kw_list tcp_res_kws = { { }, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_res_cont_keywords_register, &tcp_res_kws);
+
+static struct action_kw_list tcp_check_kws = {ILH, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_check_keywords_register, &tcp_check_kws);
+
+static struct action_kw_list http_req_kws = { { }, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_kws);
+
+static struct action_kw_list http_res_kws = { { }, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_kws);
+
+static struct action_kw_list http_after_res_kws = { { }, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_after_res_keywords_register, &http_after_res_kws);
+
+static struct cfg_kw_list cfg_kws = {{ },{
+ { CFG_GLOBAL, "set-var", vars_parse_global_set_var },
+ { CFG_GLOBAL, "set-var-fmt", vars_parse_global_set_var },
+ { CFG_GLOBAL, "tune.vars.global-max-size", vars_max_size_global },
+ { CFG_GLOBAL, "tune.vars.proc-max-size", vars_max_size_proc },
+ { CFG_GLOBAL, "tune.vars.sess-max-size", vars_max_size_sess },
+ { CFG_GLOBAL, "tune.vars.txn-max-size", vars_max_size_txn },
+ { CFG_GLOBAL, "tune.vars.reqres-max-size", vars_max_size_reqres },
+ { CFG_GLOBAL, "tune.vars.check-max-size", vars_max_size_check },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "get", "var", NULL }, "get var <name> : retrieve contents of a process-wide variable", vars_parse_cli_get_var, NULL },
+ { { "set", "var", NULL }, "set var <name> [fmt|expr] {<fmt>|<expr>}: set variable from an expression or a format", vars_parse_cli_set_var, NULL, NULL, NULL, ACCESS_EXPERIMENTAL },
+ { { NULL }, NULL, NULL, NULL }
+}};
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
diff --git a/src/version.c b/src/version.c
new file mode 100644
index 0000000..e7bb748
--- /dev/null
+++ b/src/version.c
@@ -0,0 +1,28 @@
+/*
+ * Version reporting : all user-visible version information should come from
+ * this file so that rebuilding only this one is enough to report the latest
+ * code version.
+ */
+
+#include <haproxy/global.h>
+#include <haproxy/version.h>
+
+/* These ones are made variables and not constants so that they are stored into
+ * the data region and prominently appear in core files.
+ */
+char haproxy_version_here[] = "HAProxy version follows";
+char haproxy_version[] = HAPROXY_VERSION;
+char haproxy_date[] = HAPROXY_DATE;
+char stats_version_string[] = STATS_VERSION_STRING;
+
+#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
+#define SANITIZE_STRING " with address sanitizer"
+#else
+#define SANITIZE_STRING ""
+#endif
+
+#if defined(__clang_version__)
+REGISTER_BUILD_OPTS("Built with clang compiler version " __clang_version__ "" SANITIZE_STRING);
+#elif defined(__VERSION__)
+REGISTER_BUILD_OPTS("Built with gcc compiler version " __VERSION__ "" SANITIZE_STRING);
+#endif
diff --git a/src/wdt.c b/src/wdt.c
new file mode 100644
index 0000000..6bb7d85
--- /dev/null
+++ b/src/wdt.c
@@ -0,0 +1,189 @@
+/*
+ * Thread lockup detection
+ *
+ * Copyright 2000-2019 Willy Tarreau <willy@haproxy.org>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <signal.h>
+#include <time.h>
+
+#include <haproxy/api.h>
+#include <haproxy/clock.h>
+#include <haproxy/debug.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/signal-t.h>
+#include <haproxy/thread.h>
+#include <haproxy/tools.h>
+
+
+/*
+ * It relies on timer_create() and timer_settime() which are only available in
+ * this case.
+ */
+#if defined(USE_RT) && defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
+
+/* define a dummy value to designate "no timer". Use only 32 bits. */
+#ifndef TIMER_INVALID
+#define TIMER_INVALID ((timer_t)(unsigned long)(0xfffffffful))
+#endif
+
+static timer_t per_thread_wd_timer[MAX_THREADS];
+
+/* Setup (or ping) the watchdog timer for thread <thr>. Returns non-zero on
+ * success, zero on failure. It interrupts once per second of CPU time. It
+ * happens that timers based on the CPU time are not automatically re-armed
+ * so we only use the value and leave the interval unset.
+ */
+int wdt_ping(int thr)
+{
+ struct itimerspec its;
+
+ its.it_value.tv_sec = 1; its.it_value.tv_nsec = 0;
+ its.it_interval.tv_sec = 0; its.it_interval.tv_nsec = 0;
+ return timer_settime(per_thread_wd_timer[thr], 0, &its, NULL) == 0;
+}
+
+/* This is the WDTSIG signal handler */
+void wdt_handler(int sig, siginfo_t *si, void *arg)
+{
+ unsigned long long n, p;
+ int thr;
+
+ switch (si->si_code) {
+ case SI_TIMER:
+ /* A thread's timer fired, the thread ID is in si_int. We have
+ * no guarantee that the thread handling this signal is in any
+ * way related to the one triggering it, so we need to retrieve
+ * the thread number from there. Note: this thread might
+ * continue to execute in parallel.
+ */
+ thr = si->si_value.sival_int;
+
+ /* cannot happen unless an unknown timer tries to play with our
+ * nerves. Let's die for now if this happens.
+ */
+ if (thr < 0 || thr >= global.nbthread)
+ break;
+
+ p = ha_thread_ctx[thr].prev_cpu_time;
+ n = now_cpu_time_thread(thr);
+
+ /* not yet reached the deadline of 1 sec,
+ * or p wasn't initialized yet
+ */
+ if (!p || n - p < 1000000000UL)
+ goto update_and_leave;
+
+ if ((threads_harmless_mask|sleeping_thread_mask|threads_to_dump) & (1UL << thr)) {
+ /* This thread is currently doing exactly nothing
+ * waiting in the poll loop (unlikely but possible),
+ * waiting for all other threads to join the rendez-vous
+ * point (common), or waiting for another thread to
+ * finish an isolated operation (unlikely but possible).
+ */
+ goto update_and_leave;
+ }
+
+ /* So the thread indeed appears locked up. In order to be
+ * certain that we're not witnessing an exceptional spike of
+ * CPU usage due to a configuration issue (like running tens
+ * of thousands of tasks in a single loop), we'll check if the
+ * scheduler is still alive by setting the TH_FL_STUCK flag
+ * that the scheduler clears when switching to the next task.
+ * If it's already set, then it's our second call with no
+ * progress and the thread is dead.
+ */
+ if (!(ha_thread_ctx[thr].flags & TH_FL_STUCK)) {
+ _HA_ATOMIC_OR(&ha_thread_ctx[thr].flags, TH_FL_STUCK);
+ goto update_and_leave;
+ }
+
+ /* No doubt now, there's no hop to recover, die loudly! */
+ break;
+
+#if defined(USE_THREAD) && defined(SI_TKILL) /* Linux uses this */
+
+ case SI_TKILL:
+ /* we got a pthread_kill, stop on it */
+ thr = tid;
+ break;
+
+#elif defined(USE_THREAD) && defined(SI_LWP) /* FreeBSD uses this */
+
+ case SI_LWP:
+ /* we got a pthread_kill, stop on it */
+ thr = tid;
+ break;
+
+#endif
+ default:
+ /* unhandled other conditions */
+ return;
+ }
+
+ /* By default we terminate. If we're not on the victim thread, better
+ * bounce the signal there so that we produce a cleaner stack trace
+ * with the other thread interrupted exactly where it was running and
+ * the current one not involved in this.
+ */
+#ifdef USE_THREAD
+ if (thr != tid)
+ ha_tkill(thr, sig);
+ else
+#endif
+ ha_panic();
+ return;
+
+ update_and_leave:
+ wdt_ping(thr);
+}
+
+int init_wdt_per_thread()
+{
+ if (!clock_setup_signal_timer(&per_thread_wd_timer[tid], WDTSIG, tid))
+ goto fail1;
+
+ if (!wdt_ping(tid))
+ goto fail2;
+
+ return 1;
+
+ fail2:
+ timer_delete(per_thread_wd_timer[tid]);
+ fail1:
+ per_thread_wd_timer[tid] = TIMER_INVALID;
+ ha_warning("Failed to setup watchdog timer for thread %u, disabling lockup detection.\n", tid);
+ return 1;
+}
+
+void deinit_wdt_per_thread()
+{
+ if (per_thread_wd_timer[tid] != TIMER_INVALID)
+ timer_delete(per_thread_wd_timer[tid]);
+}
+
+/* registers the watchdog signal handler and returns 0. This sets up the signal
+ * handler for WDTSIG, so it must be called once per process.
+ */
+int init_wdt()
+{
+ struct sigaction sa;
+
+ sa.sa_handler = NULL;
+ sa.sa_sigaction = wdt_handler;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ sigaction(WDTSIG, &sa, NULL);
+ return ERR_NONE;
+}
+
+REGISTER_POST_CHECK(init_wdt);
+REGISTER_PER_THREAD_INIT(init_wdt_per_thread);
+REGISTER_PER_THREAD_DEINIT(deinit_wdt_per_thread);
+#endif
diff --git a/src/xprt_handshake.c b/src/xprt_handshake.c
new file mode 100644
index 0000000..33f7750
--- /dev/null
+++ b/src/xprt_handshake.c
@@ -0,0 +1,299 @@
+/*
+ * Pseudo-xprt to handle any handshake except the SSL handshake
+ *
+ * Copyright 2019 HAProxy Technologies, Olivier Houchard <ohouchard@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/connection.h>
+
+struct xprt_handshake_ctx {
+ struct connection *conn;
+ struct wait_event *subs;
+ struct wait_event wait_event;
+ const struct xprt_ops *xprt;
+ void *xprt_ctx;
+};
+
+DECLARE_STATIC_POOL(xprt_handshake_ctx_pool, "xprt_handshake_ctx", sizeof(struct xprt_handshake_ctx));
+
+/* This XPRT doesn't take care of sending or receiving data, once its handshake
+ * is done, it just removes itself
+ */
+static size_t xprt_handshake_from_buf(struct connection *conn, void *xprt_ctx, const struct buffer *buf, size_t count, int flags)
+{
+ return 0;
+}
+
+static size_t xprt_handshake_to_buf(struct connection *conn, void *xprt_ctx, struct buffer *buf, size_t count, int flags)
+{
+ return 0;
+}
+
+/* xprt_handshake_io_cb is exported to see it resolved in "show fd" */
+struct task *xprt_handshake_io_cb(struct task *t, void *bctx, unsigned int state)
+{
+ struct xprt_handshake_ctx *ctx = bctx;
+ struct connection *conn = ctx->conn;
+
+ if (conn->flags & CO_FL_SOCKS4_SEND)
+ if (!conn_send_socks4_proxy_request(conn)) {
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_SEND,
+ &ctx->wait_event);
+
+ goto out;
+ }
+
+ if (conn->flags & CO_FL_SOCKS4_RECV)
+ if (!conn_recv_socks4_proxy_response(conn)) {
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_RECV,
+ &ctx->wait_event);
+ goto out;
+ }
+
+ if (conn->flags & CO_FL_ACCEPT_CIP)
+ if (!conn_recv_netscaler_cip(conn, CO_FL_ACCEPT_CIP)) {
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_RECV,
+ &ctx->wait_event);
+ goto out;
+ }
+
+ if (conn->flags & CO_FL_ACCEPT_PROXY)
+ if (!conn_recv_proxy(conn, CO_FL_ACCEPT_PROXY)) {
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_RECV,
+ &ctx->wait_event);
+ goto out;
+ }
+
+ if (conn->flags & CO_FL_SEND_PROXY)
+ if (!conn_send_proxy(conn, CO_FL_SEND_PROXY)) {
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_SEND,
+ &ctx->wait_event);
+ goto out;
+ }
+
+out:
+ /* Wake the stream if we're done with the handshake, or we have a
+ * connection error
+ * */
+ if ((conn->flags & CO_FL_ERROR) ||
+ !(conn->flags & CO_FL_HANDSHAKE)) {
+ int ret = 0;
+ int woke = 0;
+ int was_conn_ctx = 0;
+
+ /* On error, wake any waiter */
+ if (ctx->subs) {
+ tasklet_wakeup(ctx->subs->tasklet);
+ ctx->subs->events = 0;
+ woke = 1;
+ ctx->subs = NULL;
+ }
+
+ /* Remove ourself from the xprt chain */
+ if (ctx->wait_event.events != 0)
+ ctx->xprt->unsubscribe(ctx->conn,
+ ctx->xprt_ctx,
+ ctx->wait_event.events,
+ &ctx->wait_event);
+ if (conn->xprt_ctx == ctx) {
+ conn->xprt_ctx = ctx->xprt_ctx;
+ conn->xprt = ctx->xprt;
+ was_conn_ctx = 1;
+ } else
+ conn->xprt->remove_xprt(conn, conn->xprt_ctx, ctx,
+ ctx->xprt, ctx->xprt_ctx);
+ /* If we're the first xprt for the connection, let the
+ * upper layers know. If no mux was set up yet, then call
+ * conn_create_mux, and if we have a mux, and it has a wake
+ * method, call it too.
+ */
+ if (was_conn_ctx) {
+ if (!ctx->conn->mux)
+ ret = conn_create_mux(ctx->conn);
+ if (ret >= 0 && !woke && ctx->conn->mux && ctx->conn->mux->wake)
+ ret = ctx->conn->mux->wake(ctx->conn);
+ }
+ tasklet_free(ctx->wait_event.tasklet);
+ pool_free(xprt_handshake_ctx_pool, ctx);
+ t = NULL;
+ }
+ return t;
+}
+
+static int xprt_handshake_start(struct connection *conn, void *xprt_ctx)
+{
+ struct xprt_handshake_ctx *ctx = xprt_ctx;
+
+ if (ctx->xprt->start) {
+ int ret;
+
+ ret = ctx->xprt->start(conn, ctx->xprt_ctx);
+ if (ret < 0)
+ return ret;
+ }
+ tasklet_wakeup(ctx->wait_event.tasklet);
+
+ return 0;
+}
+
+static int xprt_handshake_init(struct connection *conn, void **xprt_ctx)
+{
+ struct xprt_handshake_ctx *ctx;
+ /* already initialized */
+ if (*xprt_ctx)
+ return 0;
+
+ ctx = pool_alloc(xprt_handshake_ctx_pool);
+ if (!ctx) {
+ conn->err_code = CO_ER_SSL_NO_MEM;
+ return -1;
+ }
+ ctx->conn = conn;
+ ctx->wait_event.tasklet = tasklet_new();
+ if (!ctx->wait_event.tasklet) {
+ conn->err_code = CO_ER_SSL_NO_MEM;
+ pool_free(xprt_handshake_ctx_pool, ctx);
+ return -1;
+ }
+ ctx->wait_event.tasklet->process = xprt_handshake_io_cb;
+ ctx->wait_event.tasklet->context = ctx;
+ ctx->wait_event.events = 0;
+
+ ctx->xprt = NULL;
+ ctx->xprt_ctx = NULL;
+ ctx->subs = NULL;
+ *xprt_ctx = ctx;
+
+ return 0;
+}
+
+static void xprt_handshake_close(struct connection *conn, void *xprt_ctx)
+{
+ struct xprt_handshake_ctx *ctx = xprt_ctx;
+
+ if (ctx) {
+ if (ctx->wait_event.events != 0)
+ ctx->xprt->unsubscribe(ctx->conn, ctx->xprt_ctx,
+ ctx->wait_event.events,
+ &ctx->wait_event);
+ if (ctx->subs) {
+ ctx->subs->events = 0;
+ tasklet_wakeup(ctx->subs->tasklet);
+ }
+
+ if (ctx->xprt && ctx->xprt->close)
+ ctx->xprt->close(conn, ctx->xprt_ctx);
+ /* Remove any handshake flag, and if we were the connection
+ * xprt, get back to XPRT_RAW. If we're here because we
+ * failed an outoging connection, it will be retried using
+ * the same struct connection, and as xprt_handshake is a bit
+ * magic, because it requires a call to add_xprt(), it's better
+ * to fallback to the original XPRT to re-initiate the
+ * connection.
+ */
+ conn->flags &= ~CO_FL_HANDSHAKE;
+ if (conn->xprt == xprt_get(XPRT_HANDSHAKE))
+ conn->xprt = xprt_get(XPRT_RAW);
+ tasklet_free(ctx->wait_event.tasklet);
+ pool_free(xprt_handshake_ctx_pool, ctx);
+ }
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+static int xprt_handshake_subscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ struct xprt_handshake_ctx *ctx = xprt_ctx;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(ctx->subs && ctx->subs != es);
+
+ ctx->subs = es;
+ es->events |= event_type;
+ return 0;
+
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int xprt_handshake_unsubscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ struct xprt_handshake_ctx *ctx = xprt_ctx;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(ctx->subs && ctx->subs != es);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ ctx->subs = NULL;
+
+ return 0;
+}
+
+/* Use the provided XPRT as an underlying XPRT, and provide the old one.
+ * Returns 0 on success, and non-zero on failure.
+ */
+static int xprt_handshake_add_xprt(struct connection *conn, void *xprt_ctx, void *toadd_ctx, const struct xprt_ops *toadd_ops, void **oldxprt_ctx, const struct xprt_ops **oldxprt_ops)
+{
+ struct xprt_handshake_ctx *ctx = xprt_ctx;
+
+ if (oldxprt_ops)
+ *oldxprt_ops = ctx->xprt;
+ if (oldxprt_ctx)
+ *oldxprt_ctx = ctx->xprt_ctx;
+ ctx->xprt = toadd_ops;
+ ctx->xprt_ctx = toadd_ctx;
+
+ return 0;
+}
+
+/* Remove the specified xprt. If if it our underlying XPRT, remove it and
+ * return 0, otherwise just call the remove_xprt method from the underlying
+ * XPRT.
+ */
+static int xprt_handshake_remove_xprt(struct connection *conn, void *xprt_ctx, void *toremove_ctx, const struct xprt_ops *newops, void *newctx)
+{
+ struct xprt_handshake_ctx *ctx = xprt_ctx;
+
+ if (ctx->xprt_ctx == toremove_ctx) {
+ ctx->xprt_ctx = newctx;
+ ctx->xprt = newops;
+ return 0;
+ }
+ return (ctx->xprt->remove_xprt(conn, ctx->xprt_ctx, toremove_ctx, newops, newctx));
+}
+
+struct xprt_ops xprt_handshake = {
+ .snd_buf = xprt_handshake_from_buf,
+ .rcv_buf = xprt_handshake_to_buf,
+ .subscribe = xprt_handshake_subscribe,
+ .unsubscribe = xprt_handshake_unsubscribe,
+ .remove_xprt = xprt_handshake_remove_xprt,
+ .add_xprt = xprt_handshake_add_xprt,
+ .init = xprt_handshake_init,
+ .start = xprt_handshake_start,
+ .close= xprt_handshake_close,
+ .rcv_pipe = NULL,
+ .snd_pipe = NULL,
+ .shutr = NULL,
+ .shutw = NULL,
+ .name = "HS",
+};
+
+static void __xprt_handshake_init(void)
+{
+ xprt_register(XPRT_HANDSHAKE, &xprt_handshake);
+}
+
+INITCALL0(STG_REGISTER, __xprt_handshake_init);
diff --git a/src/xprt_quic.c b/src/xprt_quic.c
new file mode 100644
index 0000000..9e9258f
--- /dev/null
+++ b/src/xprt_quic.c
@@ -0,0 +1,167 @@
+/*
+ * QUIC xprt layer. Act as an abstraction between quic_conn and MUX layers.
+ *
+ * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/connection.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/trace.h>
+
+#define TRACE_SOURCE &trace_quic
+
+static void quic_close(struct connection *conn, void *xprt_ctx)
+{
+ struct ssl_sock_ctx *conn_ctx = xprt_ctx;
+ struct quic_conn *qc = conn_ctx->qc;
+
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ /* Next application data can be dropped. */
+ qc->mux_state = QC_MUX_RELEASED;
+
+ /* If the quic-conn timer has already expired free the quic-conn. */
+ if (qc->flags & QUIC_FL_CONN_EXP_TIMER) {
+ quic_conn_release(qc);
+ goto leave;
+ }
+
+ qc_check_close_on_released_mux(qc);
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+static int quic_conn_subscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ struct quic_conn *qc = conn->handle.qc;
+
+ TRACE_ENTER(QUIC_EV_CONN_SUB, qc);
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(qc->subs && qc->subs != es);
+
+ es->events |= event_type;
+ qc->subs = es;
+
+ /* TODO implement a check_events to detect if subscriber should be
+ * woken up immediately ?
+ */
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("subscribe(recv)", QUIC_EV_CONN_XPRTRECV, qc);
+
+ if (event_type & SUB_RETRY_SEND)
+ TRACE_DEVEL("subscribe(send)", QUIC_EV_CONN_XPRTSEND, qc);
+
+ TRACE_LEAVE(QUIC_EV_CONN_SUB, qc);
+
+ return 0;
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int quic_conn_unsubscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ struct quic_conn *qc = conn->handle.qc;
+
+ TRACE_ENTER(QUIC_EV_CONN_SUB, qc);
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("unsubscribe(recv)", QUIC_EV_CONN_XPRTRECV, qc);
+ if (event_type & SUB_RETRY_SEND)
+ TRACE_DEVEL("unsubscribe(send)", QUIC_EV_CONN_XPRTSEND, qc);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ qc->subs = NULL;
+
+ /* TODO implement ignore_events similar to conn_unsubscribe() ? */
+
+ TRACE_LEAVE(QUIC_EV_CONN_SUB, qc);
+
+ return 0;
+}
+
+/* Store in <xprt_ctx> the context attached to <conn>.
+ * Returns always 0.
+ */
+static int qc_conn_init(struct connection *conn, void **xprt_ctx)
+{
+ struct quic_conn *qc = NULL;
+
+ TRACE_ENTER(QUIC_EV_CONN_NEW, conn);
+
+ /* do not store the context if already set */
+ if (*xprt_ctx)
+ goto out;
+
+ *xprt_ctx = conn->handle.qc->xprt_ctx;
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
+
+ return 0;
+}
+
+/* Start the QUIC transport layer */
+static int qc_xprt_start(struct connection *conn, void *ctx)
+{
+ int ret = 0;
+ struct quic_conn *qc;
+
+ qc = conn->handle.qc;
+ TRACE_ENTER(QUIC_EV_CONN_NEW, qc);
+
+ /* mux-quic can now be considered ready. */
+ qc->mux_state = QC_MUX_READY;
+
+ tasklet_wakeup(qc->wait_event.tasklet);
+
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
+ return ret;
+}
+
+static struct ssl_sock_ctx *qc_get_ssl_sock_ctx(struct connection *conn)
+{
+ if (!conn || conn->xprt != xprt_get(XPRT_QUIC) || !conn->handle.qc || !conn->xprt_ctx)
+ return NULL;
+
+ return conn->handle.qc->xprt_ctx;
+}
+
+/* transport-layer operations for QUIC connections. */
+static struct xprt_ops ssl_quic = {
+ .close = quic_close,
+ .subscribe = quic_conn_subscribe,
+ .unsubscribe = quic_conn_unsubscribe,
+ .init = qc_conn_init,
+ .start = qc_xprt_start,
+ .prepare_bind_conf = ssl_sock_prepare_bind_conf,
+ .destroy_bind_conf = ssl_sock_destroy_bind_conf,
+ .get_alpn = ssl_sock_get_alpn,
+ .get_ssl_sock_ctx = qc_get_ssl_sock_ctx,
+ .name = "QUIC",
+};
+
+static void __quic_conn_init(void)
+{
+ xprt_register(XPRT_QUIC, &ssl_quic);
+}
+INITCALL0(STG_REGISTER, __quic_conn_init);